Index: trunk/extensions/NativePreprocessor/Preprocessor_Native.php |
— | — | @@ -0,0 +1,115 @@ |
| 2 | +<?php |
| 3 | + |
| 4 | +if ( class_exists( 'MediaWikiPreprocessor' ) ) { |
| 5 | + global $wgParserConf; |
| 6 | + $wgParserConf['preprocessorClass'] = 'Preprocessor_Native'; |
| 7 | +} |
| 8 | + |
| 9 | +class Preprocessor_Native implements Preprocessor { |
| 10 | + var $parser; |
| 11 | + |
| 12 | + function __construct( $parser ) { |
| 13 | + $this->parser = $parser; |
| 14 | + } |
| 15 | + |
| 16 | + |
| 17 | + function preprocessToObj( $text, $flags = 0 ) { |
| 18 | + $ntobj = $this->preprocessToObjInternal( $text, $flags ); |
| 19 | + |
| 20 | + return array( 'text' => $text, 'nodes' => $ntobj ); |
| 21 | + } |
| 22 | + |
| 23 | + function preprocessToObjInternal( $text, $flags = 0 ) { |
| 24 | + $nativePP = new MediaWikiPreprocessor(); |
| 25 | + $ntobj = $nativePP->preprocessToObjInternal( $text, $flags, $this->parser->getStripList() ); |
| 26 | + |
| 27 | + return $ntobj; |
| 28 | + } |
| 29 | + |
| 30 | + /** |
| 31 | + * Completely inefficient function to transform into the xml serialization. |
| 32 | + */ |
| 33 | + function preprocessToXml( $text, $flags = 0 ) { |
| 34 | + $ser = $this->preprocessToObjInternal( $text, $flags ); |
| 35 | + |
| 36 | + return $this->unserializeNode( substr( $ser, 0, 16 ), substr( $ser, 16 ), $text ); |
| 37 | + } |
| 38 | + |
| 39 | + const NODE_LEN = 16; |
| 40 | + function unserializeNode( $node, $children, &$text ) { |
| 41 | + $flags = ord( $node[1] ) - 48; |
| 42 | + $childrenLen = hexdec( substr( $node, 2, 6 ) ); |
| 43 | + $textLen = hexdec( substr( $node, 8, 8 ) ); |
| 44 | + $result = htmlspecialchars( substr( $text, 0, $textLen ) ); |
| 45 | + if ( strlen( $text ) < $textLen ) throw new MWException( 'Bad length in node' ); |
| 46 | + $text = substr( $text, $textLen ); |
| 47 | + if ( strpos( '<et|p', $node[0] ) !== false ) |
| 48 | + $result = ''; // Not present in Preprocessor_DOM |
| 49 | + |
| 50 | + while ( $childrenLen > 0 ) { |
| 51 | + $result .= $this->unserializeNode( substr( $children, 0, 16 ), substr( $children, 16 ), $text ); |
| 52 | + $n = self::NODE_LEN + hexdec( substr( $children, 2, 6 ) ); |
| 53 | + $children = substr( $children, $n ); |
| 54 | + $childrenLen -= $n; |
| 55 | + } |
| 56 | + switch ( $node[0] ) { |
| 57 | + case '/': |
| 58 | + return "<root>$result</root>"; |
| 59 | + case 'L': |
| 60 | + return $result; |
| 61 | + case 'I': |
| 62 | + return "<ignore>$result</ignore>"; |
| 63 | + case '-': |
| 64 | + return "<comment>$result</comment>"; |
| 65 | + case '<': |
| 66 | + return "<ext>$result</ext>"; |
| 67 | + case 'N': |
| 68 | + if ($flags) |
| 69 | + return "<name index=\"$flags\" />"; |
| 70 | + else |
| 71 | + return "<name>$result</name>"; |
| 72 | + case 'a': |
| 73 | + return "<attr>$result</attr>"; |
| 74 | + case 'e': |
| 75 | + return $result; |
| 76 | + case '.': |
| 77 | + return "<inner>$result</inner>"; |
| 78 | + case '>': |
| 79 | + return "<close>$result</close>"; |
| 80 | + case 'i': |
| 81 | + case 'j': |
| 82 | + case 'k': |
| 83 | + case 'l': |
| 84 | + case 'm': |
| 85 | + case 'n': |
| 86 | + return "<h level=\"" . ( ord( $node[0] ) - ord( 'h' ) ) . "\" i=\"" . ( ord( $node[1] ) - ord( '0' ) ) . "\">$result</h>"; |
| 87 | + case 't': |
| 88 | + $lineStart = $flags ? " lineStart=\"1\"" : ""; |
| 89 | + return "<template$lineStart>$result</template>"; |
| 90 | + case 'p': |
| 91 | + return "<tplarg>$result</tplarg>"; |
| 92 | + case 'T': |
| 93 | + return "<title>$result</title>"; |
| 94 | + case '|': |
| 95 | + return "<part>$result</part>"; |
| 96 | + case 'v': |
| 97 | + return "<value>$result</value>"; |
| 98 | + case '}': |
| 99 | + return ''; |
| 100 | + default: |
| 101 | + throw new Exception( "Unknown node of type '" . $node[0] . "'"); |
| 102 | + } |
| 103 | + } |
| 104 | + |
| 105 | + function newFrame() { |
| 106 | + throw new Exception( __METHOD__ . 'unimplemented' ); |
| 107 | + } |
| 108 | + |
| 109 | + function newCustomFrame( $args ) { |
| 110 | + throw new Exception( __METHOD__ . 'unimplemented' ); |
| 111 | + } |
| 112 | + |
| 113 | + function newPartNodeArray( $values ) { |
| 114 | + throw new Exception( __METHOD__ . 'unimplemented' ); |
| 115 | + } |
| 116 | +} |
Property changes on: trunk/extensions/NativePreprocessor/Preprocessor_Native.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 117 | + native |
Index: trunk/extensions/NativePreprocessor/config.m4 |
— | — | @@ -0,0 +1,10 @@ |
| 2 | +dnl Change that 'yes' to 'no' to not build it by default |
| 3 | + |
| 4 | +PHP_ARG_ENABLE(mediawiki-preprocessor, mediawiki preprocessor support, |
| 5 | +[ --enable-mediawiki-preprocessor Include MediaWiki preprocessor extension], no, yes) |
| 6 | + |
| 7 | +if test "$PHP_MEDIAWIKIPREPROCESSOR" != "no"; then |
| 8 | + dnl Enable the extension |
| 9 | + PHP_NEW_EXTENSION(mediawiki_preprocessor, mediawiki_preprocessor.c in_array.c preprocesstoobj.c, $ext_shared) |
| 10 | + PHP_SUBST(MEDIAWIKI_PREPROCESSOR_SHARED_LIBADD) |
| 11 | +fi |
Index: trunk/extensions/NativePreprocessor/preprocesstoobj.c |
— | — | @@ -0,0 +1,830 @@ |
| 2 | +#include <string.h> |
| 3 | +#include <stdbool.h> |
| 4 | + |
| 5 | +#include "php.h" |
| 6 | +#include "ext/standard/php_string.h" |
| 7 | + |
| 8 | +#undef NDEBUG |
| 9 | +#include <assert.h> |
| 10 | + |
| 11 | +#include "in_array.h" |
| 12 | +#include "nodes.h" |
| 13 | + |
| 14 | +#define PTD_FOR_INCLUSION 1 /* Matches Parser::PTD_FOR_INCLUSION */ |
| 15 | + |
| 16 | +// FIXME: Do not rely on the terminating \0 |
| 17 | +#define STRSTR(haystack, needle) strpos(haystack, needle, 0) |
| 18 | +int strpos(const char* haystack, const char* needle, int offset) { |
| 19 | + char* s = strstr(haystack+offset, needle); |
| 20 | + if (!s) return -1; |
| 21 | + return s - haystack; |
| 22 | +} |
| 23 | + |
| 24 | +#define strsize(x) (sizeof(x)-1) |
| 25 | +#define min(x,y) (((x) < (y)) ? (x) : (y)) |
| 26 | + |
| 27 | +enum internalTags { |
| 28 | + None, |
| 29 | + includeonly, |
| 30 | + onlyinclude, |
| 31 | + noinclude |
| 32 | +}; |
| 33 | +const char* internalTagNames[] = { NULL, "includeonly", "onlyinclude", "noinclude" }; |
| 34 | + |
| 35 | +enum internalTags getInternalTag(const char* name, int name_len) { |
| 36 | + #define CHECK_INTERNAL_TAG(x) if ((sizeof(#x)-1 == name_len) && !strncasecmp(name, #x, sizeof(#x)-1)) return x; |
| 37 | + if (name[0] == '/') { |
| 38 | + name++; |
| 39 | + name_len--; |
| 40 | + } |
| 41 | + CHECK_INTERNAL_TAG(includeonly); |
| 42 | + CHECK_INTERNAL_TAG(onlyinclude); |
| 43 | + CHECK_INTERNAL_TAG(noinclude); |
| 44 | + return None; |
| 45 | +} |
| 46 | + |
| 47 | +#define pipe foundPipe /* Avoid conflicts with pipe(2) */ |
| 48 | + |
| 49 | +enum foundTypes { |
| 50 | + lineStart, |
| 51 | + lineEnd, |
| 52 | + pipe = '|', |
| 53 | + equals = '=', |
| 54 | + angle = '<', |
| 55 | + closeBrace = '}', |
| 56 | + closeBracket = ']', |
| 57 | + openBrace = '{', |
| 58 | + openBracket = '[', |
| 59 | +}; |
| 60 | + |
| 61 | +#define searchReset() strcpy(search, "[{<\n") // $search = $searchBase; |
| 62 | +#define addSearch(x) addToSearch(search, sizeof(search), x) // $search .= 'x'; |
| 63 | +#define MAX_SEARCH_CHARS "[{<\n|=}]" |
| 64 | +void addToSearch(char* search, int search_len, char x) { |
| 65 | + int e; |
| 66 | + assert(strchr(MAX_SEARCH_CHARS, x)); |
| 67 | + e = strlen(search); |
| 68 | + assert(e < search_len - 2); |
| 69 | + search[e] = x; |
| 70 | + search[e+1] = '\0'; |
| 71 | +} |
| 72 | + |
| 73 | +size_t mwpp_strcspn(const char* text, int text_len, const char* search, int offset) { |
| 74 | + /* Optimize me */ |
| 75 | + //printf(" mwpp_strcspn(%s, %d, %s, %d)\n", text, text_len, search, offset); |
| 76 | + return php_strcspn( text + offset, search, text + text_len, search + strlen(search) ); |
| 77 | +} |
| 78 | + |
| 79 | +/** |
| 80 | + * Counts the number of times the character c appears since start, up to length. |
| 81 | + */ |
| 82 | +int chrspn( const char* text, int c, int start, int length ) { |
| 83 | + int i; |
| 84 | + for (i=0; i < length; i++) { |
| 85 | + if ( text[start+i] != c ) { |
| 86 | + break; |
| 87 | + } |
| 88 | + } |
| 89 | + return i; |
| 90 | +} |
| 91 | + |
| 92 | +/** |
| 93 | + * Return the first index in text that either matches a PCRE \s or a '<' |
| 94 | + * Returns -1 if not found. Remember that for PERL compatibility, \s doesn't |
| 95 | + * include the Vertical Tab (0x11) |
| 96 | + */ |
| 97 | +int findSpaceOrAngle(const char* text, int text_len) { |
| 98 | + int i; |
| 99 | + for (i = 0; i < text_len; i++) { |
| 100 | + switch ( text[i] ) { |
| 101 | + case '\t': |
| 102 | + case '\n': |
| 103 | + case '\f': |
| 104 | + case '\r': |
| 105 | + case ' ': |
| 106 | + case '>': |
| 107 | + return i; |
| 108 | + } |
| 109 | + } |
| 110 | + return -1; |
| 111 | +} |
| 112 | + |
| 113 | +/** |
| 114 | + * Locates an end tag for the given tag name. |
| 115 | + * Matches the regex "/<\/$name\s*>/i" |
| 116 | + * Doesn't (completely) support tag names which contain '<' |
| 117 | + * |
| 118 | + * @param text String: Text in which to find the tag |
| 119 | + * @param text_len int: Length of text |
| 120 | + * @param from int: Offset from which to begin the search |
| 121 | + * @param name String: lowercase name of the tag to close |
| 122 | + * @param name_len int: length of name |
| 123 | + * @param endTagLen int*: length of the found tag (output value) |
| 124 | + * @return int: The position from text where the end tag begins or -1 if not found |
| 125 | + */ |
| 126 | +static int findEndTag( const char* text, int text_len, int from, const char* name, int name_len, int* endTagLen ) { |
| 127 | + int i, j; |
| 128 | + for (i = from; i < text_len - 2 - name_len; i++) { |
| 129 | + if ( text[i] == '<' && text[i+1] == '/' ) { |
| 130 | + for (j = 0; j < name_len; j++) { |
| 131 | + if ( name[j] != tolower( text[i+2+j] ) ) { |
| 132 | + i += j; |
| 133 | + break; |
| 134 | + } |
| 135 | + } |
| 136 | + if ( j == name_len ) { |
| 137 | + while ( text[i+2+j] == ' ' ) j++; |
| 138 | + if ( text[i+2+j] == '>' ) { |
| 139 | + *endTagLen = j + strsize("</>"); |
| 140 | + return i; |
| 141 | + } |
| 142 | + i += j; |
| 143 | + } |
| 144 | + } |
| 145 | + } |
| 146 | + return -1; |
| 147 | +} |
| 148 | + |
| 149 | +/** |
| 150 | + * Returns the number of times the character c appears in text, searching backwards from position start |
| 151 | + */ |
| 152 | +int chrrspn( const char* text, int c, int start ) { |
| 153 | + int i = 0; |
| 154 | + while ( ( start-i >= 0 ) && text[start-i] == c ) { |
| 155 | + i++; |
| 156 | + } |
| 157 | + return i; |
| 158 | +} |
| 159 | + |
| 160 | +char* preprocessToObj( const char* text, int text_len, int flags, HashTable* parserStripList, int* preprocessed_len ) { |
| 161 | + DEFINE_NODE_STRING() |
| 162 | + |
| 163 | + /* The php preprocessors have an array of rules to use, |
| 164 | + * Those are hardcoded here. Places relying on it are |
| 165 | + * marked with a 'Known rules' comment. |
| 166 | + */ |
| 167 | + #define BraceRuleMin 2 |
| 168 | + #define BraceRuleMax 3 |
| 169 | + #define BracketRuleMin 2 |
| 170 | + #define BracketRuleMax 2 |
| 171 | + |
| 172 | + bool forInclusion = flags & PTD_FOR_INCLUSION; |
| 173 | + |
| 174 | + bool enableOnlyinclude = false; |
| 175 | + enum internalTags ignoredElement; /* Act as this tag isn't there */ |
| 176 | + |
| 177 | + HashTable* xmlishElements = parserStripList; |
| 178 | + /* Instead of $xmlishRegex, we use directly the stripList. |
| 179 | + * As it is shared with Parser, includeonly/onlyinclude/noinclude are handled separatedly. |
| 180 | + * Per Parser::set{FunctionTag,}Hook(), the items are all strings and lowercase. |
| 181 | + */ |
| 182 | + |
| 183 | + if ( forInclusion ) { |
| 184 | + /* $ignoredTags = array( 'includeonly', '/includeonly' ); */ |
| 185 | + ignoredElement = noinclude; |
| 186 | + if ( STRSTR( text, "<onlyinclude>" ) && STRSTR( text, "</onlyinclude>" ) ) { |
| 187 | + enableOnlyinclude = true; |
| 188 | + } |
| 189 | + } else { |
| 190 | + /* $ignoredTags = array( 'noinclude', '/noinclude', 'onlyinclude', '/onlyinclude' ); */ |
| 191 | + ignoredElement = includeonly; |
| 192 | + } |
| 193 | + #define isIgnoredTag(internalTag) (forInclusion ? ((internalTag) == includeonly) : ((internalTag) > includeonly) ) |
| 194 | + |
| 195 | + int i = 0; |
| 196 | + bool findEquals = false; // True to find equals signs in arguments |
| 197 | + bool findPipe = false; // True to take notice of pipe characters |
| 198 | + int headingIndex = 1; |
| 199 | + bool inHeading = false; // True if $i is inside a possible heading |
| 200 | + bool noMoreGT = false; // True if there are no more greater-than (>) signs right of $i |
| 201 | + bool findOnlyinclude = enableOnlyinclude; // True to ignore all input up to the next <onlyinclude> |
| 202 | + bool fakeLineStart = true; // Do a line-start run without outputting an LF character |
| 203 | + bool fakePipeFound = false; |
| 204 | + char currentClosing = '\0'; |
| 205 | + int lineStartPos = -1; |
| 206 | + char search[sizeof(MAX_SEARCH_CHARS)]; |
| 207 | + |
| 208 | + #define getFlags() \ |
| 209 | + inHeading = (parentNode->type == heading_node); \ |
| 210 | + findPipe = (parentNode->type != heading_node) && (parentNode->type != bracket_node); \ |
| 211 | + findEquals = findPipe && ( parentNode->nextSibling > 0 ) && ( parentNode->type != value_node ); |
| 212 | + |
| 213 | + while ( true ) { |
| 214 | + |
| 215 | + if ( findOnlyinclude ) { |
| 216 | + // Ignore all input up to the next <onlyinclude> |
| 217 | + int startPos = strpos( text, "<onlyinclude>", i ); |
| 218 | + if ( startPos == -1 ) { |
| 219 | + // Ignored section runs to the end |
| 220 | + addNodeWithText(ignore_node, text, i, -1); |
| 221 | + break; |
| 222 | + } |
| 223 | + int tagEndPos = startPos + strsize( "<onlyinclude>" ); // past-the-end |
| 224 | + addNodeWithText(ignore_node, text, i, tagEndPos - i); |
| 225 | + i = tagEndPos; |
| 226 | + findOnlyinclude = false; |
| 227 | + } |
| 228 | + |
| 229 | + enum foundTypes found; |
| 230 | + if ( fakeLineStart ) { |
| 231 | + found = lineStart; |
| 232 | + } else if ( fakePipeFound ) { |
| 233 | + found = pipe; |
| 234 | + } else { |
| 235 | + // Find next opening brace, closing brace or pipe |
| 236 | + searchReset(); |
| 237 | + if ( parentNode->type == root_node ) { |
| 238 | + currentClosing = 0; |
| 239 | + } else { |
| 240 | + /* This is too ugly */ |
| 241 | + if ( parentNode->type == heading_node ) { |
| 242 | + currentClosing = '\n'; |
| 243 | + } else if ( parentNode->type == '[' ) { |
| 244 | + currentClosing = ']'; /* Known rules */ |
| 245 | + } else if ( parentNode->parent && ( parentNode->parent->type == '{' |
| 246 | + || ( parentNode->parent->parent && parentNode->parent->parent->type == '{' ) ) ) { |
| 247 | + currentClosing = '}'; /* Known rules */ |
| 248 | + } else { |
| 249 | + currentClosing = 0; |
| 250 | + } |
| 251 | + addSearch( currentClosing ); |
| 252 | + } |
| 253 | + if ( findPipe ) { |
| 254 | + addSearch( '|' ); |
| 255 | + } |
| 256 | + if ( findEquals ) { |
| 257 | + // First equals will be for the template |
| 258 | + addSearch( '=' ); |
| 259 | + } |
| 260 | + |
| 261 | + // Output literal section, advance input counter |
| 262 | + size_t literalLength = mwpp_strcspn( text, text_len, search, i ); |
| 263 | + if ( literalLength > 0 ) { |
| 264 | + addLiteral( text, i, literalLength ); |
| 265 | + i += literalLength; |
| 266 | + } |
| 267 | + if ( i >= text_len ) { |
| 268 | + if ( currentClosing == '\n' ) { |
| 269 | + // Do a past-the-end run to finish off the heading |
| 270 | + found = lineEnd; |
| 271 | + } else if ( parentNode->type == name_node && parentNode->parent && parentNode->parent->type == part_node && findEquals ) { |
| 272 | + // Convert this part\name into a value and add the name |
| 273 | + fakePipeFound = true; |
| 274 | + found = pipe; |
| 275 | + } else { |
| 276 | + // All done |
| 277 | + break; |
| 278 | + } |
| 279 | + } else { |
| 280 | + switch ( text[i] ) { |
| 281 | + case '|': |
| 282 | + case '=': |
| 283 | + case '<': |
| 284 | + found = text[i]; |
| 285 | + break; |
| 286 | + case '\n': |
| 287 | + if ( inHeading ) { |
| 288 | + found = lineEnd; |
| 289 | + } else { |
| 290 | + found = lineStart; |
| 291 | + } |
| 292 | + break; |
| 293 | + case '}': /* Known rules */ |
| 294 | + case ']': |
| 295 | + if ( text[i] == currentClosing ) { |
| 296 | + found = currentClosing; |
| 297 | + } |
| 298 | + break; |
| 299 | + case '{': /* Known rules */ |
| 300 | + case '[': |
| 301 | + found = text[i]; |
| 302 | + break; |
| 303 | + |
| 304 | + default: |
| 305 | + // Some versions of PHP have a strcspn which stops on null characters {{refneeded}} |
| 306 | + // Ignore and continue |
| 307 | + ++i; |
| 308 | + continue; |
| 309 | + } |
| 310 | + } |
| 311 | + } |
| 312 | + |
| 313 | + if ( found == angle ) { |
| 314 | + // Determine which tag is this |
| 315 | + if ( enableOnlyinclude && strncasecmp( text + i, "</onlyinclude>", strsize( "</onlyinclude>" ) ) ) { |
| 316 | + findOnlyinclude = true; |
| 317 | + continue; |
| 318 | + } |
| 319 | + |
| 320 | + // Handle comments |
| 321 | + if ( !strncmp( text + i, "<!--", 4 ) ) { |
| 322 | + // To avoid leaving blank lines, when a comment is both preceded |
| 323 | + // and followed by a newline (ignoring spaces), trim leading and |
| 324 | + // trailing spaces and one of the newlines. |
| 325 | + |
| 326 | + // Find the end |
| 327 | + int endPos = strpos( text, "-->", i + 4 ); |
| 328 | + if ( endPos == -1 ) { |
| 329 | + // Unclosed comment in input, runs to end |
| 330 | + addNodeWithText(comment_node, text, i, -1); |
| 331 | + i = text_len; |
| 332 | + } else { |
| 333 | + // Search backwards for leading whitespace |
| 334 | + |
| 335 | + int wsStart; |
| 336 | + for (wsStart = i - 1; wsStart > 0; wsStart--) { |
| 337 | + if ( text[wsStart] != ' ') { /* It can't go over wikitext_len because the php string has a \0 terminator, too */ |
| 338 | + wsStart++; |
| 339 | + break; |
| 340 | + } |
| 341 | + } |
| 342 | + |
| 343 | + // Search forwards for trailing whitespace |
| 344 | + // wsEnd will be the position of the last space (or the > if there's none) |
| 345 | + int startPos, wsEnd = endPos + 3; |
| 346 | + while (text[wsEnd] == ' ') { wsEnd++; } |
| 347 | + wsEnd--; // A bit silly since we will be using wsEnd+1 everywhere, but we want to keep this the same as $wsEnd |
| 348 | + |
| 349 | + // Eat the line if possible |
| 350 | + // This could theoretically be done if $wsStart == 0, i.e. for comments at |
| 351 | + // the overall start. That's not how Sanitizer::removeHTMLcomments() did it, but |
| 352 | + // it's a possible beneficial b/c break. |
| 353 | + if ( wsStart > 0 && text[wsStart - 1] == '\n' && text[wsEnd + 1] == '\n' ) |
| 354 | + { |
| 355 | + startPos = wsStart; |
| 356 | + endPos = wsEnd + 1; |
| 357 | + // Remove leading whitespace from the end of the accumulator |
| 358 | + // Sanity check first though |
| 359 | + int wsLength = i - wsStart; |
| 360 | + if ( wsLength > 0 && currentLiteral.len >= wsLength ) { |
| 361 | + if ( strspn( text + currentLiteral.from + currentLiteral.len - wsLength, " " ) != wsLength ) { |
| 362 | + // Can this ever be false? |
| 363 | + assert(0); |
| 364 | + } |
| 365 | + currentLiteral.len -= wsLength; |
| 366 | + } |
| 367 | + // Do a line-start run next time to look for headings after the comment |
| 368 | + fakeLineStart = true; |
| 369 | + } else { |
| 370 | + // No line to eat, just take the comment itself |
| 371 | + startPos = i; |
| 372 | + endPos += 2; |
| 373 | + } |
| 374 | + |
| 375 | + if ( parentNode ) { |
| 376 | + if ( parentNode->commentEnd != -1 && parentNode->commentEnd == wsStart - 1 ) { |
| 377 | + // Comments abutting, no change in visual end |
| 378 | + parentNode->commentEnd = wsEnd; |
| 379 | + } else { |
| 380 | + parentNode->visualEnd = wsStart - 1; |
| 381 | + parentNode->commentEnd = endPos; |
| 382 | + } |
| 383 | + } |
| 384 | + i = endPos + 1; |
| 385 | + addNodeWithText(comment_node, text, startPos, endPos - startPos + 1); |
| 386 | + } |
| 387 | + continue; |
| 388 | + } |
| 389 | + |
| 390 | + if ( noMoreGT ) { |
| 391 | + addLiteral( text, i, 1 ); |
| 392 | + ++i; |
| 393 | + continue; |
| 394 | + } |
| 395 | + |
| 396 | + /** |
| 397 | + * We differ here from the $xmlishRegex approach |
| 398 | + * The regex ends the tag name with a \s character, /> or > |
| 399 | + * so we start seeking for them, then look which name is it. |
| 400 | + */ |
| 401 | + assert(text[i] == '<'); |
| 402 | + const char* name = text + i + 1; |
| 403 | + int name_len; |
| 404 | + /* TODO: optimize this search by not going further than |
| 405 | + * max( strlen( getParserStripList() + internalTags() ) ) |
| 406 | + * while not setting noMoreGT in such case. |
| 407 | + */ |
| 408 | + name_len = findSpaceOrAngle(name, text_len - i - 1); |
| 409 | + if ( name_len > 0 && name[name_len] == '>' && name[name_len - 1] == '/' ) { |
| 410 | + name_len--; |
| 411 | + } |
| 412 | + int attrStart = i + name_len + 1; |
| 413 | + |
| 414 | + int tagEndPos = -1; |
| 415 | + if ( name_len != -1 ) { |
| 416 | + // Find end of tag |
| 417 | + char* end = memchr(name + name_len, '>', text_len - i - 1); |
| 418 | + |
| 419 | + tagEndPos = end ? end - text : -1; |
| 420 | + } |
| 421 | + if ( tagEndPos == -1 ) { |
| 422 | + // Infinite backtrack |
| 423 | + // Disable tag search to prevent worst-case O(N^2) performance |
| 424 | + noMoreGT = true; |
| 425 | + addLiteral( text, i, 1 ); |
| 426 | + ++i; |
| 427 | + continue; |
| 428 | + } |
| 429 | + assert(text[tagEndPos] == '>'); |
| 430 | + |
| 431 | + enum internalTags internalTag; |
| 432 | + internalTag = getInternalTag(name, name_len); |
| 433 | + |
| 434 | + // Handle ignored tags |
| 435 | + if ( isIgnoredTag( internalTag ) ) { |
| 436 | + addNodeWithText( ignore_node, text, i, tagEndPos - i + 1 ); |
| 437 | + i = tagEndPos + 1; |
| 438 | + continue; |
| 439 | + } |
| 440 | + |
| 441 | + char * lowername; |
| 442 | + if ( internalTag == None ) { |
| 443 | + int j; |
| 444 | + // Verify that it's not just tag-looking text |
| 445 | + lowername = alloca( name_len + 1 ); /* FIXME */ |
| 446 | + for (j = 0; j < name_len; j++) { |
| 447 | + lowername[j] = tolower(name[j]); |
| 448 | + } |
| 449 | + lowername[j] = '\0'; |
| 450 | + if ( !str_in_array(lowername, name_len, xmlishElements, true) ) { |
| 451 | + addLiteral( text, i, 1 ); |
| 452 | + ++i; |
| 453 | + continue; |
| 454 | + } |
| 455 | + } else { |
| 456 | + lowername = (char*)internalTagNames[internalTag]; |
| 457 | + } |
| 458 | + |
| 459 | + int tagStartPos, attrEnd, endTagBegin, endTagLen; |
| 460 | + int innerTextBegin, innerTextLen; |
| 461 | + tagStartPos = i; endTagLen = 0; |
| 462 | + innerTextBegin = -1; innerTextLen = -1; |
| 463 | + |
| 464 | + if ( text[tagEndPos-1] == '/' ) { |
| 465 | + attrEnd = tagEndPos - 1; |
| 466 | + i = tagEndPos + 1; |
| 467 | + } else { |
| 468 | + attrEnd = tagEndPos; |
| 469 | + // Find closing tag |
| 470 | + |
| 471 | + endTagBegin = findEndTag( text, text_len, tagEndPos + 1, lowername, name_len, &endTagLen ); |
| 472 | + |
| 473 | + if ( endTagBegin != -1 ) |
| 474 | + { |
| 475 | + innerTextBegin = tagEndPos + 1; |
| 476 | + innerTextLen = endTagBegin - tagEndPos - 1; |
| 477 | + i = endTagBegin + endTagLen; |
| 478 | + } else { |
| 479 | + // No end tag -- let it run out to the end of the text. |
| 480 | + innerTextBegin = tagEndPos + 1; |
| 481 | + i = text_len; |
| 482 | + } |
| 483 | + } |
| 484 | + |
| 485 | + if ( isIgnoredTag( internalTag ) ) { |
| 486 | + addNodeWithText(ignore_node, text, tagStartPos, i - tagStartPos ); |
| 487 | + continue; |
| 488 | + } |
| 489 | + |
| 490 | + addNodeWithTags( ext_node, 1 ); /* The '<' is implicit in Preprocessor_DOM */ |
| 491 | + addNodeWithText( name_node, text, tagStartPos + 1, name_len ); |
| 492 | + |
| 493 | + // Note that the attr element contains the whitespace between name and attribute, |
| 494 | + // this is necessary for precise reconstruction during pre-save transform. |
| 495 | + assert(attrEnd >= attrStart); |
| 496 | + addNodeWithText( attr_node, text, attrStart, attrEnd - attrStart ); |
| 497 | + addNodeWithText( end_name_node, text, attrEnd, tagEndPos - attrEnd + 1 ); |
| 498 | + |
| 499 | + if ( innerTextBegin != -1 ) { |
| 500 | + addNodeWithText( inner_node, text, innerTextBegin, innerTextLen ); |
| 501 | + } |
| 502 | + if ( endTagLen ) { |
| 503 | + addNodeWithText( close_node, text, endTagBegin, endTagLen ); |
| 504 | + } |
| 505 | + closeNode( ext_node ); |
| 506 | + } |
| 507 | + else if ( found == lineStart ) { |
| 508 | + // Is this the start of a heading? |
| 509 | + // Line break belongs before the heading element in any case |
| 510 | + if ( fakeLineStart ) { |
| 511 | + fakeLineStart = false; |
| 512 | + } else { |
| 513 | + addLiteral( text, i, 1 ); |
| 514 | + i++; |
| 515 | + } |
| 516 | + |
| 517 | + int count = chrspn( text, '=', i, 6 ); |
| 518 | + if ( count == 1 && findEquals ) { |
| 519 | + // DWIM: This looks kind of like a name/value separator |
| 520 | + // Let's let the equals handler have it and break the potential heading |
| 521 | + // This is heuristic, but AFAICT the methods for completely correct disambiguation are very complex. |
| 522 | + } else if ( count > 0 ) { |
| 523 | + /* |
| 524 | + piece = array( |
| 525 | + 'open' => "\n", |
| 526 | + 'close' => "\n", |
| 527 | + 'parts' => array( new PPDPart( str_repeat( '=', $count ) ) ), |
| 528 | + 'startPos' => $i, |
| 529 | + 'count' => $count ); |
| 530 | + */ |
| 531 | + lineStartPos = i; /* This lived in the stack in php, but there can't be two open header pieces */ |
| 532 | + addNodeWithTags(heading_node, count); |
| 533 | + currentClosing = '\n'; |
| 534 | + /* extract( $stack->getFlags(); ) */ |
| 535 | + getFlags() |
| 536 | + i += count; |
| 537 | + } |
| 538 | + } else if ( found == lineEnd ) { |
| 539 | + |
| 540 | + // A heading must be open, otherwise \n wouldn't have been in the search list |
| 541 | + assert( parentNode->type == heading_node ); |
| 542 | + assert( lineStartPos != -1 ); |
| 543 | + |
| 544 | + // Search back through the input to see if it has a proper close |
| 545 | + // Do this using the reversed string since the other solutions (end anchor, etc.) are inefficient |
| 546 | + int searchStart; |
| 547 | + for (searchStart = i - 1; searchStart > 0; --searchStart) { |
| 548 | + if ( ( text[searchStart] != ' ' ) && ( text[searchStart] != '\t' ) ) { |
| 549 | + break; |
| 550 | + } |
| 551 | + } |
| 552 | + |
| 553 | + if ( parentNode->commentEnd != -1 && searchStart == parentNode->commentEnd ) { |
| 554 | + // Comment found at line end |
| 555 | + // Search for equals signs before the comment |
| 556 | + for (searchStart = parentNode->visualEnd; searchStart > 0; --searchStart) { |
| 557 | + if (text[i] != ' ' && text[i] != '\t') |
| 558 | + break; |
| 559 | + } |
| 560 | + } |
| 561 | + searchStart++; |
| 562 | + |
| 563 | + int count = parentNode->contentLength; |
| 564 | + int equalsLength = chrrspn( text, '=', searchStart - 1 ); |
| 565 | + |
| 566 | + if ( equalsLength > 0 ) { |
| 567 | + if ( searchStart - equalsLength == lineStartPos ) { |
| 568 | + // This is just a single string of equals signs on its own line |
| 569 | + // Replicate the doHeadings behaviour /={count}(.+)={count}/ |
| 570 | + // First find out how many equals signs there really are (don't stop at 6) |
| 571 | + count = equalsLength; |
| 572 | + if ( count < 3 ) { |
| 573 | + count = 0; |
| 574 | + } else { |
| 575 | + count = min( 6, ( count - 1 ) / 2 ); |
| 576 | + } |
| 577 | + } else { |
| 578 | + count = min( equalsLength, count ); |
| 579 | + } |
| 580 | + if ( count > 0 ) { |
| 581 | + // Normal match, output <h> |
| 582 | + assert( count < 7 ); |
| 583 | + parentNode->type = heading_node + count; |
| 584 | + parentNode->flags = headingIndex; |
| 585 | + headingIndex++; |
| 586 | + } else { |
| 587 | + // Single equals sign on its own line, count=0 |
| 588 | + parentNode->type = literal_node; |
| 589 | + } |
| 590 | + } else { |
| 591 | + // No match, no <h>, just pass down the inner text |
| 592 | + parentNode->type = literal_node; |
| 593 | + } |
| 594 | + // Unwind the stack |
| 595 | + closeNode( parentNode->type ); |
| 596 | + /* extract( getFlags() ); */ |
| 597 | + getFlags(); |
| 598 | + |
| 599 | + // Note that we do NOT increment the input pointer. |
| 600 | + // This is because the closing linebreak could be the opening linebreak of |
| 601 | + // another heading. Infinite loops are avoided because the next iteration MUST |
| 602 | + // hit the heading open case above, which unconditionally increments the |
| 603 | + // input pointer. |
| 604 | + assert( inHeading == false ); |
| 605 | + } else if ( found == openBrace || found == openBracket ) { |
| 606 | + // count opening brace characters |
| 607 | + int count = chrspn( text, text[i], i, text_len - i ); |
| 608 | + |
| 609 | + // we need to add to stack only if opening brace count is enough for one of the rules |
| 610 | + int rulemin = 2; /* Known rules */ |
| 611 | + |
| 612 | + if ( count >= rulemin ) { |
| 613 | + // Add it to the stack |
| 614 | + addNodeWithTags( found, count ); |
| 615 | + parentNode->flags = (i > 0 && text[i-1] == '\n') /* lineStart boolean */; |
| 616 | + /* close char does not need to be stored per Known rules */ |
| 617 | + parentNode->count = count; |
| 618 | + parentNode->argIndex = 0; |
| 619 | + if ( found == openBrace ) { |
| 620 | + addNodeWithTags( title_node, 0 ); |
| 621 | + } |
| 622 | + getFlags(); |
| 623 | + } else { |
| 624 | + // Add literal brace(s) |
| 625 | + addLiteral( text, i, count ); |
| 626 | + } |
| 627 | + i += count; |
| 628 | + } else if ( found == closeBrace || found == closeBracket ) { |
| 629 | + // lets check if there are enough characters for closing brace |
| 630 | + |
| 631 | + if ( parentNode->type == name_node ) { |
| 632 | + /* Go to close it */ |
| 633 | + fakePipeFound = true; |
| 634 | + continue; |
| 635 | + } |
| 636 | + if ( parentNode->type == value_node ) { |
| 637 | + closeNode( parentNode->type ); |
| 638 | + assert( parentNode->type == part_node ); |
| 639 | + } |
| 640 | + assert( ( parentNode->type == found - 2 ) || ( parentNode->parent && ( parentNode->parent->type == found - 2 ) ) ); |
| 641 | + |
| 642 | + int maxCount = found == closeBracket ? parentNode->count : parentNode->parent->count; |
| 643 | + int count = chrspn( text, found, i, maxCount ); |
| 644 | + |
| 645 | + // check for maximum matching characters (if there are 5 closing |
| 646 | + // characters, we will probably need only 3 - depending on the rules) |
| 647 | + int ruleMax = ( found == closeBrace ) ? 3 : 2; /* Known rules */ |
| 648 | + int matchingCount = 0; |
| 649 | + if ( count > ruleMax ) { |
| 650 | + // The specified maximum exists in the callback array, unless the caller |
| 651 | + // has made an error |
| 652 | + matchingCount = ruleMax; |
| 653 | + } else { |
| 654 | + // Count is less than the maximum |
| 655 | + // Skip any gaps in the callback array to find the true largest match |
| 656 | + matchingCount = count; |
| 657 | + /* Known rules: If we have three opening braces but only two closing ones, we want the two. |
| 658 | + * With less than the minimum, matchingCount = 0. |
| 659 | + */ |
| 660 | + if ( count >= 2 /* min */ ) { /* Known rules */ |
| 661 | + matchingCount = count; |
| 662 | + } |
| 663 | + } |
| 664 | + |
| 665 | + if ( matchingCount <= 0 ) { |
| 666 | + // No matching element found in callback array |
| 667 | + // Output a literal closing brace and continue |
| 668 | + assert( count == 1 ); |
| 669 | + addLiteral( text, i, count ); |
| 670 | + i += count; |
| 671 | + continue; |
| 672 | + } |
| 673 | + |
| 674 | + |
| 675 | + if ( found == closeBracket ) { /* Known rules */ |
| 676 | + // No element, just literal text |
| 677 | + parentNode->count -= matchingCount; |
| 678 | + |
| 679 | + /* The preprocessor DOM adds a new literal here, then goes |
| 680 | + * backwards and readds another node before if there are |
| 681 | + * brackets left. |
| 682 | + * We leave the same bracket node open (with decreasing counts) |
| 683 | + * until closing time, since we know that all brackets |
| 684 | + * will end up being literals. |
| 685 | + */ |
| 686 | + |
| 687 | + if ( parentNode->count < 2 ) { /* Known rules */ |
| 688 | + parentNode = breakSyntax( parentNode, nodeString, &nodeStringLen ); |
| 689 | + } |
| 690 | + |
| 691 | + addLiteral( text, i, matchingCount ); |
| 692 | + i += matchingCount; |
| 693 | + continue; |
| 694 | + } |
| 695 | + assert( ( parentNode->parent && ( parentNode->parent->type == brace_node ) ) ); |
| 696 | + |
| 697 | + assert( parentNode->type == title_node || parentNode->type == part_node ); |
| 698 | + closeNode( parentNode->type ); |
| 699 | + |
| 700 | + addNodeWithText( closebrace_node, text, i, matchingCount ); // should be on next line? |
| 701 | + // Advance input pointer |
| 702 | + i += matchingCount; |
| 703 | + |
| 704 | + parentNode->count -= matchingCount; |
| 705 | + |
| 706 | + if ( matchingCount == 2 ) { |
| 707 | + parentNode->type = template_node; |
| 708 | + } else if ( matchingCount == 3 ) { |
| 709 | + parentNode->type = tplarg_node; |
| 710 | + } else { |
| 711 | + assert( 0 ); |
| 712 | + } |
| 713 | + parentNode->contentLength = matchingCount; |
| 714 | + |
| 715 | + // Re-add the old stack element if it still has unmatched opening characters remaining |
| 716 | + if ( parentNode->count > 0 ) { |
| 717 | + int oldindex = parentNode->index; |
| 718 | + |
| 719 | + // do we still qualify for any callback with remaining count? |
| 720 | + if ( parentNode->count >= 2 ) { /* Known rules */ |
| 721 | + /* Prepend a { and a title node */ |
| 722 | + int oldcount = parentNode->count; |
| 723 | + int oldflags = parentNode->flags; |
| 724 | + |
| 725 | + parentNode->flags = 0; /* We don't begin a line since there is markup before us */ |
| 726 | + |
| 727 | + closeNode( parentNode->type ); |
| 728 | + storedLength -= oldcount; |
| 729 | + |
| 730 | + addNodeWithTags( brace_node, oldcount ); |
| 731 | + addNodeWithTags( title_node, 0 ); |
| 732 | + |
| 733 | + /* But they must be placed *before* the tag we just closed: */ |
| 734 | + |
| 735 | + /* Move all our childs two positions right */ |
| 736 | + memmove( nodeString + oldindex + NODE_LEN * 2, nodeString + oldindex, nodeStringLen - oldindex - 2 * NODE_LEN ); |
| 737 | + |
| 738 | + /* And the new tags into the positions left */ |
| 739 | + parentNode->index = oldindex + NODE_LEN; |
| 740 | + parentNode->parent->index = oldindex; |
| 741 | + parentNode->parent->flags = oldflags; |
| 742 | + } else { |
| 743 | + /* Prepend a literal node with the skipped braces */ |
| 744 | + int skippedBraces = 1 /* = parentNode->count */; |
| 745 | + closeNode( parentNode->type ); |
| 746 | + |
| 747 | + struct node tmpnode; |
| 748 | + tmpnode.type = literal_node; |
| 749 | + tmpnode.flags = 0; |
| 750 | + tmpnode.nextSibling = 0; |
| 751 | + tmpnode.contentLength = skippedBraces; |
| 752 | + |
| 753 | + ALLOC_NODESTRING(); |
| 754 | + memmove( nodeString + oldindex + NODE_LEN, nodeString + oldindex, nodeStringLen - oldindex ); |
| 755 | + nodeStringLen += NODE_LEN; |
| 756 | + |
| 757 | + serializeNode(nodeString + oldindex, &tmpnode); |
| 758 | + } |
| 759 | + } else { |
| 760 | + closeNode( parentNode->type ); |
| 761 | + } |
| 762 | + |
| 763 | + getFlags(); |
| 764 | + } else if ( found == pipe ) { |
| 765 | + findEquals = true; // shortcut for getFlags() |
| 766 | + if ( parentNode->type == title_node ) { |
| 767 | + closeNode( title_node ); |
| 768 | + } else if ( parentNode->type == name_node ) { |
| 769 | + assert( ( parentNode->parent && ( parentNode->parent->type == part_node ) ) ); |
| 770 | + assert( ( parentNode->parent->parent && ( parentNode->parent->parent->type == brace_node ) ) ); |
| 771 | + |
| 772 | + /* This was a value node, the name is empty */ |
| 773 | + parentNode->type = value_node; |
| 774 | + int len = parentNode->contentLength; |
| 775 | + parentNode->contentLength = 0; |
| 776 | + int oldindex = parentNode->index; |
| 777 | + |
| 778 | + /* Relocate the children one position right */ |
| 779 | + ALLOC_NODESTRING(); |
| 780 | + memmove( nodeString + oldindex + NODE_LEN * 2, nodeString + oldindex + NODE_LEN, nodeStringLen - oldindex - NODE_LEN ); /* (nodeStringLen - oldindex) will often be 0 */ |
| 781 | + nodeStringLen += NODE_LEN; |
| 782 | + /* And the father, too */ |
| 783 | + parentNode->index += NODE_LEN; |
| 784 | + closeNode( value_node ); |
| 785 | + |
| 786 | + /* Place the name */ |
| 787 | + struct node tmpnode; |
| 788 | + tmpnode.type = name_node; |
| 789 | + tmpnode.flags = parentNode->flags = ++parentNode->parent->argIndex; |
| 790 | + tmpnode.nextSibling = 0; |
| 791 | + tmpnode.contentLength = len; |
| 792 | + assert( len == 0 ); |
| 793 | + |
| 794 | + serializeNode(nodeString + oldindex, &tmpnode); |
| 795 | + if ( !fakePipeFound ) closeNode( part_node ); |
| 796 | + } else { |
| 797 | + closeNode( value_node ); |
| 798 | + closeNode( part_node ); |
| 799 | + } |
| 800 | + if ( fakePipeFound ) { |
| 801 | + fakePipeFound = false; |
| 802 | + continue; |
| 803 | + } |
| 804 | + addNodeWithTags( part_node, 1 ); |
| 805 | + addNodeWithTags( name_node, 0 ); |
| 806 | + ++i; |
| 807 | + } else if ( found == equals ) { |
| 808 | + findEquals = false; // shortcut for getFlags() |
| 809 | + assert( parentNode->type == name_node ); /* If we are searching for an equal we are inside parts\name */ |
| 810 | + closeNode( name_node ); |
| 811 | + addLiteral( text, i, 1 ); |
| 812 | + addNodeWithTags( value_node, 0 ); /* We could piggyback some literals on value_nodes */ |
| 813 | + |
| 814 | + //parentNode->eqpos = i; // we could remove eqpost member |
| 815 | + ++i; |
| 816 | + } else { |
| 817 | + assert( 2 + 2 == 5 ); |
| 818 | + } |
| 819 | + } |
| 820 | + while ( parentNode ) { |
| 821 | + if ( parentNode->type == brace_node || parentNode->type == bracket_node ) { |
| 822 | + parentNode = breakSyntax( parentNode, nodeString, &nodeStringLen ); |
| 823 | + } else { |
| 824 | + closeNode( parentNode->type ); |
| 825 | + } |
| 826 | + } |
| 827 | + |
| 828 | + nodeString[nodeStringLen] = '\0'; |
| 829 | + *preprocessed_len = nodeStringLen; |
| 830 | + return nodeString; |
| 831 | +} |
Property changes on: trunk/extensions/NativePreprocessor/preprocesstoobj.c |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 832 | + native |
Index: trunk/extensions/NativePreprocessor/in_array.c |
— | — | @@ -0,0 +1,41 @@ |
| 2 | + |
| 3 | +#include <stdbool.h> |
| 4 | +#include "php.h" |
| 5 | +#define const |
| 6 | +#include "in_array.h" |
| 7 | + |
| 8 | +/** |
| 9 | + * This defines an interface for internally performing in_array() |
| 10 | + * You will notice that the similarity with php_search_array() is not casual. |
| 11 | + */ |
| 12 | +bool zval_in_array(const zval* value, const HashTable* array, bool strict) { |
| 13 | + zval **entry; /* pointer to array entry */ |
| 14 | + zval res; /* comparison result */ |
| 15 | + HashPosition pos; /* hash iterator */ |
| 16 | + int (*is_equal_func)(zval *, zval *, zval * TSRMLS_DC); |
| 17 | + |
| 18 | + TSRMLS_FETCH(); /* Useless for simple arrays, since it's only needed when comparing array values */ |
| 19 | + |
| 20 | + is_equal_func = strict ? is_identical_function : is_equal_function; |
| 21 | + |
| 22 | + zend_hash_internal_pointer_reset_ex(array, &pos); |
| 23 | + while (zend_hash_get_current_data_ex(array, (void **)&entry, &pos) == SUCCESS) { |
| 24 | + is_equal_func(&res, value, *entry TSRMLS_CC); |
| 25 | + if (Z_LVAL(res)) { /* if ( (long)res ), ie. if ( res == true ) */ |
| 26 | + return true; |
| 27 | + } |
| 28 | + zend_hash_move_forward_ex(array, &pos); |
| 29 | + } |
| 30 | + return false; |
| 31 | +} |
| 32 | + |
| 33 | +bool str_in_array(const char* string, int string_len, const HashTable* array, bool strict) { |
| 34 | + zval zstring; |
| 35 | + INIT_ZVAL(zstring); |
| 36 | + zstring.type = IS_STRING; |
| 37 | + zstring.value.str.val = string; |
| 38 | + zstring.value.str.len = string_len; |
| 39 | + |
| 40 | + return zval_in_array(&zstring, array, strict); |
| 41 | +} |
| 42 | + |
Property changes on: trunk/extensions/NativePreprocessor/in_array.c |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 43 | + native |
Index: trunk/extensions/NativePreprocessor/php_mediawiki_preprocessor.h |
— | — | @@ -0,0 +1,28 @@ |
| 2 | +#ifndef PHP_MEDIAWIKI_PREPROCESSOR_H |
| 3 | +#define PHP_MEDIAWIKI_PREPROCESSOR_H 1 |
| 4 | + |
| 5 | +#ifdef ZTS |
| 6 | +#include "TSRM.h" |
| 7 | +#endif |
| 8 | + |
| 9 | +ZEND_BEGIN_MODULE_GLOBALS(mediawiki_preprocessor) |
| 10 | + |
| 11 | +ZEND_END_MODULE_GLOBALS(mediawiki_preprocessor) |
| 12 | + |
| 13 | +#ifdef ZTS |
| 14 | +#define MWPP_G(v) TSRMG(mediawiki_preprocessor_globals_id, zend_notas_globals *, v) |
| 15 | +#else |
| 16 | +#define MWPP_G(v) (mediawiki_preprocessor_globals.v) |
| 17 | +#endif |
| 18 | + |
| 19 | +#define PHP_MEDIAWIKI_PREPROCESSOR_VERSION "0.1" |
| 20 | +#define PHP_MEDIAWIKI_PREPROCESSOR_EXTNAME "MediaWiki Preprocessor" |
| 21 | + |
| 22 | +PHP_MINIT_FUNCTION(mediawiki_preprocessor); |
| 23 | +PHP_MSHUTDOWN_FUNCTION(mediawiki_preprocessor); |
| 24 | +PHP_RINIT_FUNCTION(mediawiki_preprocessor); |
| 25 | + |
| 26 | +extern zend_module_entry mediawiki_preprocessor_module_entry; |
| 27 | +#define phpext_mediawiki_preprocessor_ptr &mediawiki_preprocessor_module_entry |
| 28 | + |
| 29 | +#endif |
Property changes on: trunk/extensions/NativePreprocessor/php_mediawiki_preprocessor.h |
___________________________________________________________________ |
Added: svn:keywords |
1 | 30 | + Author Date Id Rev URL |
Added: svn:eol-style |
2 | 31 | + native |
Index: trunk/extensions/NativePreprocessor/in_array.h |
— | — | @@ -0,0 +1,5 @@ |
| 2 | +#include <stdbool.h> |
| 3 | + |
| 4 | +bool zval_in_array(const zval* value, const HashTable* array, bool strict); |
| 5 | +bool str_in_array(const char* string, int string_len, const HashTable* array, bool strict); |
| 6 | + |
Property changes on: trunk/extensions/NativePreprocessor/in_array.h |
___________________________________________________________________ |
Added: svn:keywords |
1 | 7 | + Author Date Id Rev URL |
Added: svn:eol-style |
2 | 8 | + native |
Index: trunk/extensions/NativePreprocessor/mediawiki_preprocessor.c |
— | — | @@ -0,0 +1,141 @@ |
| 2 | +#ifdef HAVE_CONFIG_H |
| 3 | +#include "config.h" |
| 4 | +#endif |
| 5 | + |
| 6 | + |
| 7 | +#include "php.h" |
| 8 | +#include "php_ini.h" |
| 9 | +#include "php_mediawiki_preprocessor.h" |
| 10 | + |
| 11 | + |
| 12 | +#if ZEND_DEBUG || 1 |
| 13 | +#define DEBUG(x,...) php_printf("[MWPP] "x"\n", __VA_ARGS__) |
| 14 | +#else |
| 15 | +#define DEBUG(x,...) |
| 16 | +#endif |
| 17 | + |
| 18 | +typedef struct _mediawiki_preprocessor { |
| 19 | + zend_object std; /* Inherit from a standard php object */ |
| 20 | + |
| 21 | +} mwppobj; |
| 22 | + |
| 23 | +ZEND_DECLARE_MODULE_GLOBALS(mediawiki_preprocessor) |
| 24 | + |
| 25 | +zend_module_entry mediawiki_preprocessor_module_entry = { |
| 26 | +#if ZEND_MODULE_API_NO >= 20010901 |
| 27 | + STANDARD_MODULE_HEADER, |
| 28 | +#endif |
| 29 | + PHP_MEDIAWIKI_PREPROCESSOR_EXTNAME, |
| 30 | + NULL, /* No procedures */ |
| 31 | + PHP_MINIT(mediawiki_preprocessor), /* module_startup_func */ |
| 32 | + PHP_MSHUTDOWN(mediawiki_preprocessor), /* module_shutdown_func */ |
| 33 | + PHP_RINIT(mediawiki_preprocessor), /* request_startup_func */ |
| 34 | + NULL, /* request_shutdown_func */ |
| 35 | + NULL, /* info_func */ |
| 36 | +#if ZEND_MODULE_API_NO >= 20010901 |
| 37 | + PHP_MEDIAWIKI_PREPROCESSOR_VERSION, |
| 38 | +#endif |
| 39 | + STANDARD_MODULE_PROPERTIES |
| 40 | +}; |
| 41 | + |
| 42 | +#ifdef COMPILE_DL_MEDIAWIKI_PREPROCESSOR |
| 43 | +ZEND_GET_MODULE(mediawiki_preprocessor) |
| 44 | +#endif |
| 45 | + |
| 46 | +PHP_RINIT_FUNCTION(mediawiki_preprocessor) |
| 47 | +{ |
| 48 | + /* Request init */ |
| 49 | + |
| 50 | + return SUCCESS; |
| 51 | +} |
| 52 | + |
| 53 | +PHP_MSHUTDOWN_FUNCTION(mediawiki_preprocessor) |
| 54 | +{ |
| 55 | + /* Module shutdown */ |
| 56 | + |
| 57 | + return SUCCESS; |
| 58 | +} |
| 59 | + |
| 60 | +PHP_METHOD(WikiTextPreprocessor,preprocessToObj); |
| 61 | +/* {{{ arginfo__construct */ |
| 62 | +ZEND_BEGIN_ARG_INFO_EX(/*name*/ arginfopreprocessToObj, /*pass_rest_by_reference*/ 0, /*return_reference*/ 0, /*required_num_args*/ 3) |
| 63 | + ZEND_ARG_INFO(/*pass_by_ref*/ 0, /*name*/ "WikiText") |
| 64 | +ZEND_END_ARG_INFO() |
| 65 | +/* }}} */ |
| 66 | + |
| 67 | +static const zend_function_entry mwpp_methods[] = { |
| 68 | + PHP_ME(WikiTextPreprocessor, preprocessToObj, arginfopreprocessToObj, ZEND_ACC_PUBLIC) |
| 69 | + {NULL, NULL, NULL} |
| 70 | +}; |
| 71 | + |
| 72 | +static void free_mwppobj(void *object TSRMLS_DC); |
| 73 | +static zend_object_value create_mwppobj (zend_class_entry *class_type TSRMLS_DC); |
| 74 | + |
| 75 | +static void php_mwpp_init_globals(zend_mediawiki_preprocessor_globals *mwpp_globals) |
| 76 | +{ |
| 77 | + /* No globals to init */ |
| 78 | +} |
| 79 | + |
| 80 | +PHP_MINIT_FUNCTION(mediawiki_preprocessor) |
| 81 | +{ |
| 82 | + /* Module init */ |
| 83 | + zend_class_entry ce; |
| 84 | + zend_class_entry* registered_class; |
| 85 | + |
| 86 | + ZEND_INIT_MODULE_GLOBALS(mediawiki_preprocessor, php_mwpp_init_globals, NULL); |
| 87 | + |
| 88 | + INIT_CLASS_ENTRY(ce, "MediaWikiPreprocessor", mwpp_methods); /* Define class MediaWikiPreprocessor */ |
| 89 | + |
| 90 | + ce.create_object = create_mwppobj; |
| 91 | + registered_class = zend_register_internal_class(&ce TSRMLS_CC); /* Bring it to existence */ |
| 92 | + |
| 93 | + return SUCCESS; |
| 94 | +} |
| 95 | + |
| 96 | +static zend_object_value create_mwppobj (zend_class_entry *class_type TSRMLS_DC) |
| 97 | +{ |
| 98 | + zend_object_value retval; |
| 99 | + mwppobj *intern; |
| 100 | + zval *tmp; |
| 101 | + |
| 102 | + intern = emalloc(sizeof(mwppobj)); |
| 103 | + |
| 104 | + DEBUG("Creating MediaWikiPreprocessor %p", intern); |
| 105 | + |
| 106 | + zend_object_std_init(&intern->std, class_type TSRMLS_CC); |
| 107 | + zend_hash_copy(intern->std.properties, &class_type->default_properties, (copy_ctor_func_t) zval_add_ref, (void *) &tmp, sizeof(zval *)); |
| 108 | + |
| 109 | + retval.handle = zend_objects_store_put(intern, (zend_objects_store_dtor_t)NULL, (zend_objects_free_object_storage_t) free_mwppobj, NULL TSRMLS_CC); |
| 110 | + retval.handlers = zend_get_std_object_handlers(); /* Default handlers */ |
| 111 | + |
| 112 | + return retval; |
| 113 | +} |
| 114 | + |
| 115 | +static void free_mwppobj(void *object TSRMLS_DC) |
| 116 | +{ |
| 117 | + mwppobj *intern = (mwppobj *)object; |
| 118 | + |
| 119 | + zend_object_std_dtor(&intern->std TSRMLS_CC); |
| 120 | + efree(object); |
| 121 | + |
| 122 | + DEBUG("MediaWikiPreprocessor %p destroyed", object); |
| 123 | +} |
| 124 | + |
| 125 | +char* preprocessToObj( const char* text, int text_len, int flags, HashTable* parserStripList, int* preprocessed_len ); |
| 126 | +PHP_METHOD(WikiTextPreprocessor, preprocessToObj) |
| 127 | +{ |
| 128 | + zend_class_entry *class_entry; |
| 129 | + char *wikitext, *preprocessed; |
| 130 | + int wikitext_len, flags; |
| 131 | + int preprocessed_len; |
| 132 | + zval *array, *result; |
| 133 | + |
| 134 | + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sda", &wikitext, &wikitext_len, &flags, &array) == FAILURE) { |
| 135 | + return; |
| 136 | + } |
| 137 | + wikitext_len = strlen(wikitext); |
| 138 | + DEBUG("Constructed with text «%s» of length %d, flags %d", wikitext, wikitext_len, flags ); |
| 139 | + preprocessed = preprocessToObj( wikitext, wikitext_len, flags, Z_ARRVAL_P(array), &preprocessed_len ); |
| 140 | + |
| 141 | + RETURN_STRINGL( preprocessed, preprocessed_len, 0 ); |
| 142 | +} |
Property changes on: trunk/extensions/NativePreprocessor/mediawiki_preprocessor.c |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 143 | + native |
Index: trunk/extensions/NativePreprocessor/config.h |
— | — | @@ -0,0 +1,59 @@ |
| 2 | +/* config.h. Generated from config.h.in by configure. */ |
| 3 | +/* config.h.in. Generated from configure.in by autoheader. */ |
| 4 | + |
| 5 | +/* Whether to build mediawiki_preprocessor as dynamic module */ |
| 6 | +#define COMPILE_DL_MEDIAWIKI_PREPROCESSOR 1 |
| 7 | + |
| 8 | +/* Define to 1 if you have the <dlfcn.h> header file. */ |
| 9 | +#define HAVE_DLFCN_H 1 |
| 10 | + |
| 11 | +/* Define to 1 if you have the <inttypes.h> header file. */ |
| 12 | +#define HAVE_INTTYPES_H 1 |
| 13 | + |
| 14 | +/* Define to 1 if you have the <memory.h> header file. */ |
| 15 | +#define HAVE_MEMORY_H 1 |
| 16 | + |
| 17 | +/* Define to 1 if you have the <stdint.h> header file. */ |
| 18 | +#define HAVE_STDINT_H 1 |
| 19 | + |
| 20 | +/* Define to 1 if you have the <stdlib.h> header file. */ |
| 21 | +#define HAVE_STDLIB_H 1 |
| 22 | + |
| 23 | +/* Define to 1 if you have the <strings.h> header file. */ |
| 24 | +#define HAVE_STRINGS_H 1 |
| 25 | + |
| 26 | +/* Define to 1 if you have the <string.h> header file. */ |
| 27 | +#define HAVE_STRING_H 1 |
| 28 | + |
| 29 | +/* Define to 1 if you have the <sys/stat.h> header file. */ |
| 30 | +#define HAVE_SYS_STAT_H 1 |
| 31 | + |
| 32 | +/* Define to 1 if you have the <sys/types.h> header file. */ |
| 33 | +#define HAVE_SYS_TYPES_H 1 |
| 34 | + |
| 35 | +/* Define to 1 if you have the <unistd.h> header file. */ |
| 36 | +#define HAVE_UNISTD_H 1 |
| 37 | + |
| 38 | +/* Define to 1 if your C compiler doesn't accept -c and -o together. */ |
| 39 | +/* #undef NO_MINUS_C_MINUS_O */ |
| 40 | + |
| 41 | +/* Define to the address where bug reports for this package should be sent. */ |
| 42 | +#define PACKAGE_BUGREPORT "" |
| 43 | + |
| 44 | +/* Define to the full name of this package. */ |
| 45 | +#define PACKAGE_NAME "" |
| 46 | + |
| 47 | +/* Define to the full name and version of this package. */ |
| 48 | +#define PACKAGE_STRING "" |
| 49 | + |
| 50 | +/* Define to the one symbol short name of this package. */ |
| 51 | +#define PACKAGE_TARNAME "" |
| 52 | + |
| 53 | +/* Define to the home page for this package. */ |
| 54 | +#define PACKAGE_URL "" |
| 55 | + |
| 56 | +/* Define to the version of this package. */ |
| 57 | +#define PACKAGE_VERSION "" |
| 58 | + |
| 59 | +/* Define to 1 if you have the ANSI C header files. */ |
| 60 | +#define STDC_HEADERS 1 |
Property changes on: trunk/extensions/NativePreprocessor/config.h |
___________________________________________________________________ |
Added: svn:keywords |
1 | 61 | + Author Date Id Rev URL |
Added: svn:eol-style |
2 | 62 | + native |
Index: trunk/extensions/NativePreprocessor/nodes.h |
— | — | @@ -0,0 +1,287 @@ |
| 2 | + |
| 3 | +enum nodeTypes { |
| 4 | + root_node = '/', |
| 5 | + literal_node = 'L', |
| 6 | + ignore_node = 'I', |
| 7 | + comment_node = '-', |
| 8 | + |
| 9 | + ext_node = '<', /* Encloses an extension tag */ |
| 10 | + name_node = 'N', /* Tag name or part name */ |
| 11 | + attr_node = 'a', /* Tag attributes */ |
| 12 | + inner_node = '.', /* Tag contents, optional */ |
| 13 | + end_name_node = 'e', /* > or /> closing a name node. Missing in Preprocessor_DOM */ |
| 14 | + close_node = '>', /* Closing tag, optional */ |
| 15 | + |
| 16 | + heading_node = 'h', /* Used when working with a heading candidate */ |
| 17 | + h1_node = 'i', |
| 18 | + h2_node = 'j', |
| 19 | + h3_node = 'k', |
| 20 | + h4_node = 'l', |
| 21 | + h5_node = 'm', |
| 22 | + h6_node = 'n', |
| 23 | + |
| 24 | + brace_node = '{', /* Used when we still don't know its identity (template/tplarg) */ |
| 25 | + bracket_node = '[', |
| 26 | + template_node = 't', |
| 27 | + tplarg_node = 'p', |
| 28 | + title_node = 'T', |
| 29 | + part_node = '|', |
| 30 | + value_node = 'v', |
| 31 | + closebrace_node = '}', |
| 32 | +}; |
| 33 | + |
| 34 | +struct node { |
| 35 | + enum nodeTypes type; |
| 36 | + char flags; |
| 37 | + int nextSibling; |
| 38 | + int contentLength; |
| 39 | + |
| 40 | + /* Relevant only for nodes with childs */ |
| 41 | + int index; /* index inside nodeString */ |
| 42 | + struct node* parent; |
| 43 | + |
| 44 | + /* Used for headings */ |
| 45 | + int commentEnd; |
| 46 | + int visualEnd; /* Point where the last text ends (ie. without spaces, comments...) */ |
| 47 | + |
| 48 | + /* Used for brace and bracket nodes */ |
| 49 | + int count; |
| 50 | + |
| 51 | + /* Used for template parts */ |
| 52 | + int eqpos; /* Name nodes */ |
| 53 | + int argIndex; /* Brace nodes */ |
| 54 | + /* Compact me: Move the last three blocks into an union */ |
| 55 | +}; |
| 56 | + |
| 57 | +struct literalNode { |
| 58 | + int from; |
| 59 | + int len; |
| 60 | +}; |
| 61 | + |
| 62 | +#define UNKNOWN_NODE_LEN -1 |
| 63 | + |
| 64 | +#define DEFINE_NODE_STRING() char* nodeString = NULL; \ |
| 65 | + int nodeStringLen = 0; /* Length used of nodeString. Initialised to 1 for a \0 terminator */ \ |
| 66 | + struct literalNode currentLiteral = { 0, 0 }; \ |
| 67 | + int storedLength = 0; /* Length of text already stored in the nodes */ \ |
| 68 | + struct node* parentNode = NULL; \ |
| 69 | + addNodeWithTags(root_node, 0); |
| 70 | + |
| 71 | +#define NODE_LEN 16 /* Length of a serialized node */ |
| 72 | + |
| 73 | +/** |
| 74 | + * Adds a node of the specified type to the nodeString |
| 75 | + * @param nodeType enum nodeTypes: Type of the node to add. |
| 76 | + * @param txt char*: Text pointer. Must be 'text' |
| 77 | + * @param offset int: Offset from txt to copy from |
| 78 | + * @param length int: Length to copy from 'from'. -1 to copy until the end of the string. |
| 79 | + */ |
| 80 | +#define addNodeWithText(nodeType,txt,offset,length) \ |
| 81 | + do { \ |
| 82 | + int mylen = length; \ |
| 83 | + assert( txt == text ); \ |
| 84 | + if ( currentLiteral.len && nodeType != literal_node ) { \ |
| 85 | + storeNodeWithText(literal_node, currentLiteral.from, currentLiteral.len); \ |
| 86 | + currentLiteral.len = 0; \ |
| 87 | + } \ |
| 88 | + if ( -1 == mylen ) { \ |
| 89 | + mylen = text_len - offset; \ |
| 90 | + } \ |
| 91 | + \ |
| 92 | + storeNodeWithText(nodeType,offset,mylen); \ |
| 93 | + } while (0) |
| 94 | + |
| 95 | +/** |
| 96 | + * Like addNodeWithText() but doesn't commit the literals |
| 97 | + */ |
| 98 | +#define storeNodeWithText(nodeType,offset,length) \ |
| 99 | + do { \ |
| 100 | + assert(storedLength == offset); \ |
| 101 | + struct node tmpnode; \ |
| 102 | + tmpnode.type = nodeType; \ |
| 103 | + tmpnode.flags = 0; \ |
| 104 | + tmpnode.nextSibling = 0; \ |
| 105 | + tmpnode.contentLength = length; \ |
| 106 | + \ |
| 107 | + ALLOC_NODESTRING(); \ |
| 108 | + serializeNode(nodeString + nodeStringLen, &tmpnode); \ |
| 109 | + nodeStringLen += NODE_LEN; \ |
| 110 | + storedLength += length; printf("storedLength: %d @%d\n", storedLength, __LINE__);\ |
| 111 | + } while (0); |
| 112 | + |
| 113 | +/** |
| 114 | + * Records the passed literal inside currentLiteral |
| 115 | + * Adjacent literal nodes are stored inside of a single node. |
| 116 | + */ |
| 117 | +#define addLiteral(literalText,offset,length) \ |
| 118 | + do { printf("Addliteral '%.*s'\n", length, literalText+offset);\ |
| 119 | + int my_len = length; \ |
| 120 | + assert( literalText == text ); \ |
| 121 | + if ( my_len == -1 ) { \ |
| 122 | + my_len = text_len - offset; \ |
| 123 | + } \ |
| 124 | + if ( currentLiteral.len ) { \ |
| 125 | + assert( currentLiteral.from + currentLiteral.len == (offset) ); \ |
| 126 | + } else { \ |
| 127 | + currentLiteral.from = (offset); \ |
| 128 | + } \ |
| 129 | + currentLiteral.len += my_len; \ |
| 130 | + assert( (length) >= 0 ); \ |
| 131 | + } while (0) |
| 132 | + |
| 133 | +/** |
| 134 | + * Adds a node which contains other tags |
| 135 | + * @param nodeType enum nodeTypes: Type of the node. |
| 136 | + * @param charsToSkip int: Number of characters that 'belong' to this node. Used to skip characters |
| 137 | + */ |
| 138 | +#define addNodeWithTags(nodeType, charsToSkip) \ |
| 139 | + do { \ |
| 140 | + struct node* tmpnode; \ |
| 141 | + if ( currentLiteral.len ) { \ |
| 142 | + storeNodeWithText(literal_node, currentLiteral.from, currentLiteral.len); \ |
| 143 | + currentLiteral.len = 0; printf("currentLiteral committed\n"); \ |
| 144 | + } \ |
| 145 | + \ |
| 146 | + tmpnode = alloc_node(); \ |
| 147 | + tmpnode->type = nodeType; \ |
| 148 | + tmpnode->flags = 0; \ |
| 149 | + tmpnode->nextSibling = UNKNOWN_NODE_LEN; \ |
| 150 | + tmpnode->contentLength = charsToSkip; \ |
| 151 | + tmpnode->index = nodeStringLen; \ |
| 152 | + tmpnode->parent = parentNode; \ |
| 153 | + tmpnode->commentEnd = -1; \ |
| 154 | + tmpnode->eqpos = -1; \ |
| 155 | + parentNode = tmpnode; \ |
| 156 | + \ |
| 157 | + ALLOC_NODESTRING(); \ |
| 158 | + serializeNode(nodeString + nodeStringLen, tmpnode); \ |
| 159 | + nodeStringLen += NODE_LEN; \ |
| 160 | + storedLength += charsToSkip; printf("storedLength: %d @%d\n", storedLength, __LINE__);\ |
| 161 | + } while(0) |
| 162 | + |
| 163 | +#define closeNode(nodeType) \ |
| 164 | + do { \ |
| 165 | + struct node* tmpnode = parentNode; \ |
| 166 | + assert( nodeType == tmpnode->type ); \ |
| 167 | + if ( currentLiteral.len ) { \ |
| 168 | + storeNodeWithText(literal_node, currentLiteral.from, currentLiteral.len); \ |
| 169 | + printf("adding literal of %d with parent %c\n", currentLiteral.len, tmpnode->type); \ |
| 170 | + currentLiteral.len = 0; \ |
| 171 | + } printf("closing node %c at %d with len of %d\n", tmpnode->type, tmpnode->index, tmpnode->contentLength); \ |
| 172 | + tmpnode->nextSibling = nodeStringLen - tmpnode->index - NODE_LEN; \ |
| 173 | + serializeNode( nodeString + tmpnode->index, tmpnode ); \ |
| 174 | + parentNode = parentNode->parent; \ |
| 175 | + free_node( tmpnode ); \ |
| 176 | + } while (0) |
| 177 | + |
| 178 | +#define alloc_node() emalloc( sizeof(struct node) ) |
| 179 | +#define free_node(x) efree(x) |
| 180 | + |
| 181 | +#define ALLOC_NODESTRING() \ |
| 182 | + do { \ |
| 183 | + nodeString = erealloc(nodeString, nodeStringLen + NODE_LEN + 1); \ |
| 184 | + assert( nodeString ); \ |
| 185 | + } while(0) |
| 186 | + |
| 187 | +/** |
| 188 | + * Serializes a node into string. |
| 189 | + * The caller must ensure that there are at least NODE_LEN bytes |
| 190 | + * available from pointer, and NODE_LEN + 1 writable. |
| 191 | + */ |
| 192 | +static void serializeNode(char* pointer, struct node* node) { |
| 193 | + int c; |
| 194 | + pointer[0] = node->type; |
| 195 | + pointer[1] = '0' + node->flags; |
| 196 | + assert( node->nextSibling < (1 << 24) ); |
| 197 | + if ( node->nextSibling == UNKNOWN_NODE_LEN ) { |
| 198 | + pointer[2] = pointer[3] = pointer[4] = pointer[5] = pointer[6] = pointer[7] = '?'; |
| 199 | + } else { |
| 200 | + sprintf(&pointer[2], "%06x", node->nextSibling); |
| 201 | + } |
| 202 | + c = pointer[16]; |
| 203 | + snprintf(&pointer[8], 9, "%08x", node->contentLength); |
| 204 | + pointer[16] = c; |
| 205 | +} |
| 206 | + |
| 207 | +static inline int hex2dec(char val) { |
| 208 | + switch (val) { |
| 209 | + case '0'...'9': |
| 210 | + return val - '0'; |
| 211 | + case 'a'...'f': |
| 212 | + return val - 'a' + 10; |
| 213 | + } |
| 214 | + assert(0); |
| 215 | +} |
| 216 | + |
| 217 | +/** |
| 218 | + * Get the nextSibling value from a node serialized at pointer. |
| 219 | + */ |
| 220 | +static inline int getNextSibling(const char* pointer) { |
| 221 | + assert( pointer[2] != '?' ); |
| 222 | + return ( ( ( ( ( hex2dec(pointer[2]) << 4 ) | hex2dec(pointer[3]) << 4 ) | hex2dec(pointer[4]) << 4 ) | hex2dec(pointer[5]) << 4 ) | hex2dec(pointer[6]) << 4 ) | hex2dec(pointer[7]); |
| 223 | +} |
| 224 | + |
| 225 | +/** |
| 226 | + * Get the output string that would result if the close is not found. |
| 227 | + * |
| 228 | + * TODO: Reduce space by collapsing nodes here. |
| 229 | + */ |
| 230 | +static struct node* breakSyntax( struct node* node, char * const nodeString, int *nodeStringLen ) { |
| 231 | + struct node* parent; |
| 232 | + |
| 233 | + /* Note we cannot coalesce with a previous literal node since it |
| 234 | + * may be our nephew, instead of our sister (we could ask our |
| 235 | + * parent, though) |
| 236 | + */ |
| 237 | +printf("breakSyntax(%.*s)\n", *nodeStringLen, nodeString); |
| 238 | + if ( node->type == bracket_node ) { |
| 239 | + node->type = literal_node; |
| 240 | + node->nextSibling = 0; |
| 241 | + serializeNode( nodeString + node->index, node ); |
| 242 | + parent = node->parent; |
| 243 | + free_node( node ); |
| 244 | + return parent; |
| 245 | + } else if ( node->type == brace_node ) { |
| 246 | + /* Literalize this node and its children (title, part, part\name, part\value) */ |
| 247 | + node->type = literal_node; |
| 248 | + node->nextSibling = 0; |
| 249 | + serializeNode( nodeString + node->index, node ); |
| 250 | + int writepos = node->index + NODE_LEN; |
| 251 | + int readpos = node->index + NODE_LEN; |
| 252 | + int nextSibling = getNextSibling( nodeString + readpos ); |
| 253 | + readpos += NODE_LEN; |
| 254 | + |
| 255 | + /* Move up the title contents */ |
| 256 | + if ( nextSibling ) { |
| 257 | + memmove( nodeString + writepos, nodeString + readpos, nextSibling ); |
| 258 | + readpos += nextSibling; |
| 259 | + writepos += nextSibling; |
| 260 | + } |
| 261 | + |
| 262 | + /* Go for part nodes */ |
| 263 | + while ( readpos < *nodeStringLen ) { |
| 264 | + readpos += NODE_LEN; /* <part> */ |
| 265 | + assert( readpos < *nodeStringLen ); /* All part nodes contain one name node inside */ |
| 266 | + int nameChildren = getNextSibling( nodeString + readpos ); /* <name> */ |
| 267 | + readpos += NODE_LEN; |
| 268 | + if ( nameChildren ) { |
| 269 | + memmove( nodeString + writepos, nodeString + readpos, nameChildren ); |
| 270 | + readpos += nameChildren; |
| 271 | + writepos += nameChildren; |
| 272 | + } |
| 273 | + if (readpos >= *nodeStringLen) break; /* It may be the case for eg. {{Foo|Bar */ |
| 274 | + int valueChildren = getNextSibling( nodeString + readpos ); /* <value> */ |
| 275 | + if ( valueChildren ) { |
| 276 | + memmove( nodeString + writepos, nodeString + readpos, nameChildren ); |
| 277 | + readpos += nameChildren; |
| 278 | + writepos += nameChildren; |
| 279 | + } |
| 280 | + } |
| 281 | + *nodeStringLen = writepos; |
| 282 | + parent = node->parent; |
| 283 | + free_node( node ); |
| 284 | + return parent; |
| 285 | + } else { |
| 286 | + assert( 0 ); |
| 287 | + } |
| 288 | +} |
Property changes on: trunk/extensions/NativePreprocessor/nodes.h |
___________________________________________________________________ |
Added: svn:keywords |
1 | 289 | + Author Date Id Rev URL |
Added: svn:eol-style |
2 | 290 | + native |