r87895 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r87894‎ | r87895 | r87896 >
Date:22:53, 11 May 2011
Author:platonides
Status:deferred
Tags:
Comment:
Unfinished changes for performing expansion.
Modified paths:
  • /trunk/extensions/NativePreprocessor/config.m4 (modified) (history)
  • /trunk/extensions/NativePreprocessor/expand.c (added) (history)
  • /trunk/extensions/NativePreprocessor/nodes.h (modified) (history)

Diff [purge]

Index: trunk/extensions/NativePreprocessor/expand.c
@@ -0,0 +1,209 @@
 2+
 3+#include <string.h>
 4+#include <stdbool.h>
 5+
 6+#include "php.h"
 7+#include "ext/standard/php_string.h"
 8+
 9+#undef NDEBUG
 10+#include <assert.h>
 11+
 12+#include "nodes.h"
 13+
 14+/* PPFRAME flags */
 15+#define NO_ARGS 1
 16+#define NO_TEMPLATES 2
 17+#define STRIP_COMMENTS 4
 18+#define NO_IGNORE 8
 19+#define RECOVER_COMMENTS 16
 20+
 21+enum ParserOutputTypes {
 22+ OT_HTML,
 23+ OT_WIKI,
 24+ OT_PREPROCESS,
 25+ OT_PLAIN
 26+};
 27+
 28+struct PPFrame_Native {
 29+ void* parser;
 30+ enum ParserOutputTypes parserOT;
 31+ bool removeCommentsOption;
 32+
 33+ /* if this PPFrame is also a PPTemplateFrame */
 34+ void *parent;
 35+};
 36+
 37+/* Call insertStripItem() in the parser object, and return the given
 38+ * zval, which shall be a string (or null)
 39+ */
 40+zval* insertStripItem(void* parser, const char* text, int len) {
 41+ /* CODE ME */
 42+}
 43+
 44+struct node* unserializeNode(const char* nodeString, struct node* parentNode) {
 45+ struct node* node = alloc_node();
 46+ node->type = nodeString[0];
 47+ node->flags = nodeString[1] - '0';
 48+ node->nextSibling = getNextSibling( nodeString );
 49+ node->contentLength = getContentLength( nodeString );
 50+ node->index = 0;
 51+ node->parent = parentNode;
 52+
 53+ return node;
 54+}
 55+
 56+#define addText(val,len) if ( !expanded.string ) { expanded.string = val; expanded.length = len; } else { }
 57+
 58+int expand(struct PPFrame_Native* frame, const char* nodeString, int nodeStringLen, const char* text, int text_len, int flags) {
 59+ struct node *curNode = NULL;
 60+ struct str_ref expanded = empty_str;
 61+
 62+ /* TODO: Check the parser nodeCount and expansionDepth */
 63+
 64+ if ( nodeStringLen % NODE_LEN ) {
 65+ return -1;
 66+ }
 67+ if ( nodeStringLen < NODE_LEN ) {
 68+ return -2;
 69+ }
 70+
 71+ char const* textPos = text;
 72+
 73+ for ( ; nodeStringLen > 0; ) {
 74+ curNode = unserializeNode( nodeString, curNode );
 75+ nodeString += NODE_LEN; nodeStringLen -= NODE_LEN;
 76+
 77+ /* Entering in such node */
 78+ switch (curNode->type) {
 79+ case literal_node:
 80+ addText( textPos, curNode->contentLength );
 81+ textPos += curNode->contentLength;
 82+ break;
 83+
 84+ case template_node:
 85+ //TODO
 86+ //ParserOT
 87+
 88+ case tplarg_node:
 89+ //TODO
 90+ break;
 91+
 92+ case comment_node:
 93+ // HTML-style comment
 94+ // Remove it in HTML, pre+remove and STRIP_COMMENTS modes
 95+ if ( frame->parserOT == OT_HTML
 96+ || ( frame->parserOT == OT_PREPROCESS && frame->removeCommentsOption )
 97+ || ( flags & STRIP_COMMENTS ) )
 98+ {
 99+ /* Add nothing */
 100+ }
 101+
 102+ // Add a strip marker in PST mode so that pstPass2() can run some old-fashioned regexes on the result
 103+ // Not in RECOVER_COMMENTS mode (extractSections) though
 104+ else if ( frame->parserOT == OT_WIKI && ! ( flags & RECOVER_COMMENTS ) ) {
 105+ zval * stripItem;
 106+ stripItem = insertStripItem( frame->parser, textPos, curNode->contentLength );
 107+ if ( stripItem ) {
 108+ if ( Z_TYPE_P( stripItem ) == IS_STRING )
 109+ addText( Z_STRVAL_P( stripItem ), Z_STRLEN_P( stripItem ) );
 110+ Z_DELREF_P( stripItem );
 111+ }
 112+ }
 113+ // Recover the literal comment in RECOVER_COMMENTS and pre+no-remove
 114+ else {
 115+ addText( textPos, curNode->contentLength );
 116+ }
 117+ textPos += curNode->contentLength;
 118+ break;
 119+ case ignore_node:
 120+ // Output suppression used by <includeonly> etc.
 121+ // OT_WIKI will only respect <ignore> in substed templates.
 122+ // The other output types respect it unless NO_IGNORE is set.
 123+ // extractSections() sets NO_IGNORE and so never respects it.
 124+ if ( ( !frame->parent && frame->parserOT == OT_WIKI ) || ( flags & NO_IGNORE ) ) {
 125+ addText( textPos, curNode->contentLength );
 126+ } else {
 127+ /* Add nothing */
 128+ }
 129+ textPos += curNode->contentLength;
 130+ break;
 131+ case ext_node:
 132+ curNode->ext_data.expanded = expanded;
 133+ expanded = empty_str;
 134+ curNode->ext_data.name = curNode->ext_data.attr = empty_str;
 135+ break;
 136+ case name_node:
 137+ if ( curNode->parent->type == ext_node ) {
 138+ curNode->ext_data.name.string = textPos;
 139+ curNode->ext_data.name.length = curNode->contentLength;
 140+ textPos += curNode->contentLength;
 141+ }
 142+ break;
 143+ case attr_node:
 144+ weak_assert( curNode->parent->type == ext_node );
 145+ weak_assert( !curNode->parent->ext_data.attr.string );
 146+ curNode->parent->ext_data.attr.string = textPos;
 147+ curNode->parent->ext_data.attr.length = curNode->contentLength;
 148+ textPos += curNode->contentLength;
 149+ break;
 150+ case inner_node:
 151+ weak_assert( curNode->parent->type == ext_node );
 152+ weak_assert( !curNode->parent->ext_data.inner.string );
 153+ curNode->parent->ext_data.inner.string = textPos;
 154+ curNode->parent->ext_data.inner.length = curNode->contentLength;
 155+ textPos += curNode->contentLength;
 156+ case close_node:
 157+ weak_assert( curNode->parent->type == ext_node );
 158+ weak_assert( !curNode->parent->ext_data.close.string );
 159+ curNode->parent->ext_data.close.string = textPos;
 160+ curNode->parent->ext_data.close.length = curNode->contentLength;
 161+ textPos += curNode->contentLength;
 162+ break;
 163+
 164+
 165+ curNode->nextSibling;
 166+ //TODO
 167+ break;
 168+ }
 169+ curNode->index += NODE_LEN;
 170+
 171+ while ( curNode && curNode->index >= curNode->nextSibling ) {
 172+ struct node* tmp;
 173+ tmp = curNode->parent;
 174+
 175+ if ( tmp ) {
 176+ zval* z;
 177+ tmp->index += curNode->index;
 178+
 179+ /* Run curNode destructor */
 180+ switch ( curNode->type ) {
 181+ case ext_node:
 182+ z = extensionSubstitutionInternal();
 183+ if ( z ) {
 184+ if ( Z_TYPE_P( z ) == IS_STRING )
 185+ addText( Z_STRVAL_P( z ), Z_STRLEN_P( z ) );
 186+ Z_DELREF_P( z );
 187+ }
 188+ break;
 189+ case heading_node:
 190+ case h1_node...h6_node:
 191+ if ( curNode->parent && curNode->parent->type == root_node
 192+ && frame->parserOT == OT_WIKI )
 193+ {
 194+ z = getMarker( curNode->flags );
 195+ if ( z ) {
 196+ if ( Z_TYPE_P( z ) == IS_STRING )
 197+ addText( Z_STRVAL_P( z ), Z_STRLEN_P( z ) );
 198+ Z_DELREF_P( z );
 199+ }
 200+ }
 201+ }
 202+ }
 203+ free_node( curNode );
 204+ curNode = tmp;
 205+ }
 206+ }
 207+
 208+failure:
 209+ ;
 210+}
Property changes on: trunk/extensions/NativePreprocessor/expand.c
___________________________________________________________________
Added: svn:eol-style
1211 + native
Index: trunk/extensions/NativePreprocessor/config.m4
@@ -5,6 +5,7 @@
66
77 if test "$PHP_MEDIAWIKIPREPROCESSOR" != "no"; then
88 dnl Enable the extension
9 - PHP_NEW_EXTENSION(mediawiki_preprocessor, mediawiki_preprocessor.c tag_util.c preprocesstoobj.c, $ext_shared)
 9+ PHP_NEW_EXTENSION(mediawiki_preprocessor, mediawiki_preprocessor.c tag_util.c preprocesstoobj.c expand.c, $ext_shared)
1010 PHP_SUBST(MEDIAWIKI_PREPROCESSOR_SHARED_LIBADD)
1111 fi
 12+
Index: trunk/extensions/NativePreprocessor/nodes.h
@@ -30,6 +30,20 @@
3131 closebrace_node = '}',
3232 };
3333
 34+/* May contain childs: root_node, ext_node, name_node, heading_node (h?_node), template_node, tplarg_node, title_node, part_node, value_node */
 35+
 36+const struct str_ref {
 37+ char const* string;
 38+ int length;
 39+ bool allocated;
 40+} empty_str = { NULL, 0, false };
 41+
 42+extern inline void str_ref_free(struct str_ref* str) {
 43+ if ( str->allocated ) {
 44+ efree( (char*)str->string);
 45+ }
 46+}
 47+
3448 struct node {
3549 enum nodeTypes type;
3650 char flags;
@@ -37,7 +51,7 @@
3852 int contentLength;
3953
4054 /* Relevant only for nodes with childs */
41 - int index; /* index inside nodeString */
 55+ int index; /* index inside nodeString (preprocess) / space of children read (expand) */
4256 struct node* parent;
4357
4458 /* Used for headings */
@@ -51,6 +65,14 @@
5266 int eqpos; /* Name nodes */
5367 int argIndex; /* Brace nodes */
5468 /* Compact me: Move the last three blocks into an union */
 69+
 70+ union {
 71+ struct {
 72+ struct str_ref expanded;
 73+ struct str_ref name, attr, inner, close;
 74+ } ext_data;
 75+ };
 76+
5577 };
5678
5779 struct literalNode {
@@ -224,6 +246,14 @@
225247 }
226248
227249 /**
 250+ * Get the contentLength value from a node serialized at pointer.
 251+ * The contentLength is a hexadecimal value in bytes 8-15.
 252+ */
 253+static inline int getContentLength(const char* pointer) {
 254+ return ( ( ( ( ( ( ( hex2dec(pointer[8]) << 4 ) | hex2dec(pointer[9]) ) << 4 | hex2dec(pointer[10]) ) << 4 | hex2dec(pointer[11]) ) << 4 | hex2dec(pointer[12]) ) << 4 | hex2dec(pointer[13]) ) << 4 | hex2dec(pointer[14]) ) << 4 | hex2dec(pointer[15]);
 255+}
 256+
 257+/**
228258 * Get the output string that would result if the close is not found.
229259 *
230260 * TODO: Reduce space by collapsing nodes here.

Status & tagging log