Index: trunk/extensions/NativePreprocessor/expand.c |
— | — | @@ -0,0 +1,209 @@ |
| 2 | + |
| 3 | +#include <string.h> |
| 4 | +#include <stdbool.h> |
| 5 | + |
| 6 | +#include "php.h" |
| 7 | +#include "ext/standard/php_string.h" |
| 8 | + |
| 9 | +#undef NDEBUG |
| 10 | +#include <assert.h> |
| 11 | + |
| 12 | +#include "nodes.h" |
| 13 | + |
| 14 | +/* PPFRAME flags */ |
| 15 | +#define NO_ARGS 1 |
| 16 | +#define NO_TEMPLATES 2 |
| 17 | +#define STRIP_COMMENTS 4 |
| 18 | +#define NO_IGNORE 8 |
| 19 | +#define RECOVER_COMMENTS 16 |
| 20 | + |
| 21 | +enum ParserOutputTypes { |
| 22 | + OT_HTML, |
| 23 | + OT_WIKI, |
| 24 | + OT_PREPROCESS, |
| 25 | + OT_PLAIN |
| 26 | +}; |
| 27 | + |
| 28 | +struct PPFrame_Native { |
| 29 | + void* parser; |
| 30 | + enum ParserOutputTypes parserOT; |
| 31 | + bool removeCommentsOption; |
| 32 | + |
| 33 | + /* if this PPFrame is also a PPTemplateFrame */ |
| 34 | + void *parent; |
| 35 | +}; |
| 36 | + |
| 37 | +/* Call insertStripItem() in the parser object, and return the given |
| 38 | + * zval, which shall be a string (or null) |
| 39 | + */ |
| 40 | +zval* insertStripItem(void* parser, const char* text, int len) { |
| 41 | + /* CODE ME */ |
| 42 | +} |
| 43 | + |
| 44 | +struct node* unserializeNode(const char* nodeString, struct node* parentNode) { |
| 45 | + struct node* node = alloc_node(); |
| 46 | + node->type = nodeString[0]; |
| 47 | + node->flags = nodeString[1] - '0'; |
| 48 | + node->nextSibling = getNextSibling( nodeString ); |
| 49 | + node->contentLength = getContentLength( nodeString ); |
| 50 | + node->index = 0; |
| 51 | + node->parent = parentNode; |
| 52 | + |
| 53 | + return node; |
| 54 | +} |
| 55 | + |
| 56 | +#define addText(val,len) if ( !expanded.string ) { expanded.string = val; expanded.length = len; } else { } |
| 57 | + |
| 58 | +int expand(struct PPFrame_Native* frame, const char* nodeString, int nodeStringLen, const char* text, int text_len, int flags) { |
| 59 | + struct node *curNode = NULL; |
| 60 | + struct str_ref expanded = empty_str; |
| 61 | + |
| 62 | + /* TODO: Check the parser nodeCount and expansionDepth */ |
| 63 | + |
| 64 | + if ( nodeStringLen % NODE_LEN ) { |
| 65 | + return -1; |
| 66 | + } |
| 67 | + if ( nodeStringLen < NODE_LEN ) { |
| 68 | + return -2; |
| 69 | + } |
| 70 | + |
| 71 | + char const* textPos = text; |
| 72 | + |
| 73 | + for ( ; nodeStringLen > 0; ) { |
| 74 | + curNode = unserializeNode( nodeString, curNode ); |
| 75 | + nodeString += NODE_LEN; nodeStringLen -= NODE_LEN; |
| 76 | + |
| 77 | + /* Entering in such node */ |
| 78 | + switch (curNode->type) { |
| 79 | + case literal_node: |
| 80 | + addText( textPos, curNode->contentLength ); |
| 81 | + textPos += curNode->contentLength; |
| 82 | + break; |
| 83 | + |
| 84 | + case template_node: |
| 85 | + //TODO |
| 86 | + //ParserOT |
| 87 | + |
| 88 | + case tplarg_node: |
| 89 | + //TODO |
| 90 | + break; |
| 91 | + |
| 92 | + case comment_node: |
| 93 | + // HTML-style comment |
| 94 | + // Remove it in HTML, pre+remove and STRIP_COMMENTS modes |
| 95 | + if ( frame->parserOT == OT_HTML |
| 96 | + || ( frame->parserOT == OT_PREPROCESS && frame->removeCommentsOption ) |
| 97 | + || ( flags & STRIP_COMMENTS ) ) |
| 98 | + { |
| 99 | + /* Add nothing */ |
| 100 | + } |
| 101 | + |
| 102 | + // Add a strip marker in PST mode so that pstPass2() can run some old-fashioned regexes on the result |
| 103 | + // Not in RECOVER_COMMENTS mode (extractSections) though |
| 104 | + else if ( frame->parserOT == OT_WIKI && ! ( flags & RECOVER_COMMENTS ) ) { |
| 105 | + zval * stripItem; |
| 106 | + stripItem = insertStripItem( frame->parser, textPos, curNode->contentLength ); |
| 107 | + if ( stripItem ) { |
| 108 | + if ( Z_TYPE_P( stripItem ) == IS_STRING ) |
| 109 | + addText( Z_STRVAL_P( stripItem ), Z_STRLEN_P( stripItem ) ); |
| 110 | + Z_DELREF_P( stripItem ); |
| 111 | + } |
| 112 | + } |
| 113 | + // Recover the literal comment in RECOVER_COMMENTS and pre+no-remove |
| 114 | + else { |
| 115 | + addText( textPos, curNode->contentLength ); |
| 116 | + } |
| 117 | + textPos += curNode->contentLength; |
| 118 | + break; |
| 119 | + case ignore_node: |
| 120 | + // Output suppression used by <includeonly> etc. |
| 121 | + // OT_WIKI will only respect <ignore> in substed templates. |
| 122 | + // The other output types respect it unless NO_IGNORE is set. |
| 123 | + // extractSections() sets NO_IGNORE and so never respects it. |
| 124 | + if ( ( !frame->parent && frame->parserOT == OT_WIKI ) || ( flags & NO_IGNORE ) ) { |
| 125 | + addText( textPos, curNode->contentLength ); |
| 126 | + } else { |
| 127 | + /* Add nothing */ |
| 128 | + } |
| 129 | + textPos += curNode->contentLength; |
| 130 | + break; |
| 131 | + case ext_node: |
| 132 | + curNode->ext_data.expanded = expanded; |
| 133 | + expanded = empty_str; |
| 134 | + curNode->ext_data.name = curNode->ext_data.attr = empty_str; |
| 135 | + break; |
| 136 | + case name_node: |
| 137 | + if ( curNode->parent->type == ext_node ) { |
| 138 | + curNode->ext_data.name.string = textPos; |
| 139 | + curNode->ext_data.name.length = curNode->contentLength; |
| 140 | + textPos += curNode->contentLength; |
| 141 | + } |
| 142 | + break; |
| 143 | + case attr_node: |
| 144 | + weak_assert( curNode->parent->type == ext_node ); |
| 145 | + weak_assert( !curNode->parent->ext_data.attr.string ); |
| 146 | + curNode->parent->ext_data.attr.string = textPos; |
| 147 | + curNode->parent->ext_data.attr.length = curNode->contentLength; |
| 148 | + textPos += curNode->contentLength; |
| 149 | + break; |
| 150 | + case inner_node: |
| 151 | + weak_assert( curNode->parent->type == ext_node ); |
| 152 | + weak_assert( !curNode->parent->ext_data.inner.string ); |
| 153 | + curNode->parent->ext_data.inner.string = textPos; |
| 154 | + curNode->parent->ext_data.inner.length = curNode->contentLength; |
| 155 | + textPos += curNode->contentLength; |
| 156 | + case close_node: |
| 157 | + weak_assert( curNode->parent->type == ext_node ); |
| 158 | + weak_assert( !curNode->parent->ext_data.close.string ); |
| 159 | + curNode->parent->ext_data.close.string = textPos; |
| 160 | + curNode->parent->ext_data.close.length = curNode->contentLength; |
| 161 | + textPos += curNode->contentLength; |
| 162 | + break; |
| 163 | + |
| 164 | + |
| 165 | + curNode->nextSibling; |
| 166 | + //TODO |
| 167 | + break; |
| 168 | + } |
| 169 | + curNode->index += NODE_LEN; |
| 170 | + |
| 171 | + while ( curNode && curNode->index >= curNode->nextSibling ) { |
| 172 | + struct node* tmp; |
| 173 | + tmp = curNode->parent; |
| 174 | + |
| 175 | + if ( tmp ) { |
| 176 | + zval* z; |
| 177 | + tmp->index += curNode->index; |
| 178 | + |
| 179 | + /* Run curNode destructor */ |
| 180 | + switch ( curNode->type ) { |
| 181 | + case ext_node: |
| 182 | + z = extensionSubstitutionInternal(); |
| 183 | + if ( z ) { |
| 184 | + if ( Z_TYPE_P( z ) == IS_STRING ) |
| 185 | + addText( Z_STRVAL_P( z ), Z_STRLEN_P( z ) ); |
| 186 | + Z_DELREF_P( z ); |
| 187 | + } |
| 188 | + break; |
| 189 | + case heading_node: |
| 190 | + case h1_node...h6_node: |
| 191 | + if ( curNode->parent && curNode->parent->type == root_node |
| 192 | + && frame->parserOT == OT_WIKI ) |
| 193 | + { |
| 194 | + z = getMarker( curNode->flags ); |
| 195 | + if ( z ) { |
| 196 | + if ( Z_TYPE_P( z ) == IS_STRING ) |
| 197 | + addText( Z_STRVAL_P( z ), Z_STRLEN_P( z ) ); |
| 198 | + Z_DELREF_P( z ); |
| 199 | + } |
| 200 | + } |
| 201 | + } |
| 202 | + } |
| 203 | + free_node( curNode ); |
| 204 | + curNode = tmp; |
| 205 | + } |
| 206 | + } |
| 207 | + |
| 208 | +failure: |
| 209 | + ; |
| 210 | +} |
Property changes on: trunk/extensions/NativePreprocessor/expand.c |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 211 | + native |
Index: trunk/extensions/NativePreprocessor/config.m4 |
— | — | @@ -5,6 +5,7 @@ |
6 | 6 | |
7 | 7 | if test "$PHP_MEDIAWIKIPREPROCESSOR" != "no"; then |
8 | 8 | dnl Enable the extension |
9 | | - PHP_NEW_EXTENSION(mediawiki_preprocessor, mediawiki_preprocessor.c tag_util.c preprocesstoobj.c, $ext_shared) |
| 9 | + PHP_NEW_EXTENSION(mediawiki_preprocessor, mediawiki_preprocessor.c tag_util.c preprocesstoobj.c expand.c, $ext_shared) |
10 | 10 | PHP_SUBST(MEDIAWIKI_PREPROCESSOR_SHARED_LIBADD) |
11 | 11 | fi |
| 12 | + |
Index: trunk/extensions/NativePreprocessor/nodes.h |
— | — | @@ -30,6 +30,20 @@ |
31 | 31 | closebrace_node = '}', |
32 | 32 | }; |
33 | 33 | |
| 34 | +/* May contain childs: root_node, ext_node, name_node, heading_node (h?_node), template_node, tplarg_node, title_node, part_node, value_node */ |
| 35 | + |
| 36 | +const struct str_ref { |
| 37 | + char const* string; |
| 38 | + int length; |
| 39 | + bool allocated; |
| 40 | +} empty_str = { NULL, 0, false }; |
| 41 | + |
| 42 | +extern inline void str_ref_free(struct str_ref* str) { |
| 43 | + if ( str->allocated ) { |
| 44 | + efree( (char*)str->string); |
| 45 | + } |
| 46 | +} |
| 47 | + |
34 | 48 | struct node { |
35 | 49 | enum nodeTypes type; |
36 | 50 | char flags; |
— | — | @@ -37,7 +51,7 @@ |
38 | 52 | int contentLength; |
39 | 53 | |
40 | 54 | /* Relevant only for nodes with childs */ |
41 | | - int index; /* index inside nodeString */ |
| 55 | + int index; /* index inside nodeString (preprocess) / space of children read (expand) */ |
42 | 56 | struct node* parent; |
43 | 57 | |
44 | 58 | /* Used for headings */ |
— | — | @@ -51,6 +65,14 @@ |
52 | 66 | int eqpos; /* Name nodes */ |
53 | 67 | int argIndex; /* Brace nodes */ |
54 | 68 | /* Compact me: Move the last three blocks into an union */ |
| 69 | + |
| 70 | + union { |
| 71 | + struct { |
| 72 | + struct str_ref expanded; |
| 73 | + struct str_ref name, attr, inner, close; |
| 74 | + } ext_data; |
| 75 | + }; |
| 76 | + |
55 | 77 | }; |
56 | 78 | |
57 | 79 | struct literalNode { |
— | — | @@ -224,6 +246,14 @@ |
225 | 247 | } |
226 | 248 | |
227 | 249 | /** |
| 250 | + * Get the contentLength value from a node serialized at pointer. |
| 251 | + * The contentLength is a hexadecimal value in bytes 8-15. |
| 252 | + */ |
| 253 | +static inline int getContentLength(const char* pointer) { |
| 254 | + return ( ( ( ( ( ( ( hex2dec(pointer[8]) << 4 ) | hex2dec(pointer[9]) ) << 4 | hex2dec(pointer[10]) ) << 4 | hex2dec(pointer[11]) ) << 4 | hex2dec(pointer[12]) ) << 4 | hex2dec(pointer[13]) ) << 4 | hex2dec(pointer[14]) ) << 4 | hex2dec(pointer[15]); |
| 255 | +} |
| 256 | + |
| 257 | +/** |
228 | 258 | * Get the output string that would result if the close is not found. |
229 | 259 | * |
230 | 260 | * TODO: Reduce space by collapsing nodes here. |