Index: trunk/phase3/maintenance/parserTests.inc |
— | — | @@ -257,6 +257,7 @@ |
258 | 258 | * @return bool |
259 | 259 | */ |
260 | 260 | private function runTest( $desc, $input, $result, $opts ) { |
| 261 | + global $wgParserConf; |
261 | 262 | if( $this->showProgress ) { |
262 | 263 | $this->showTesting( $desc ); |
263 | 264 | } |
— | — | @@ -281,7 +282,8 @@ |
282 | 283 | |
283 | 284 | $noxml = (bool)preg_match( '~\\b noxml \\b~x', $opts ); |
284 | 285 | |
285 | | - $parser = new Parser; |
| 286 | + $class = $wgParserConf['class']; |
| 287 | + $parser = new $class( $wgParserConf ); |
286 | 288 | foreach( $this->hooks as $tag => $callback ) { |
287 | 289 | $parser->setHook( $tag, $callback ); |
288 | 290 | } |
Index: trunk/phase3/maintenance/preprocessorFuzzTest.php |
— | — | @@ -8,17 +8,21 @@ |
9 | 9 | var $hairs = array( |
10 | 10 | '[[', ']]', '{{', '}}', '{{{', '}}}', |
11 | 11 | '<', '>', '<nowiki', '<gallery', '</nowiki>', '</gallery>', '<nOwIkI>', '</NoWiKi>', |
12 | | - //'<!--' , '-->', |
13 | | - //'<ref>', '</ref>', '<references/>', |
| 12 | + '<!--' , '-->', |
14 | 13 | "\n==", "==\n", |
15 | 14 | '|', '=', "\n", ' ', "\t", "\x7f", |
| 15 | + '~~', '~~~', '~~~~', 'subst:', |
16 | 16 | 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', |
17 | 17 | 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', |
| 18 | + |
| 19 | + // extensions |
| 20 | + //'<ref>', '</ref>', '<references/>', |
18 | 21 | ); |
19 | 22 | var $minLength = 0; |
20 | 23 | var $maxLength = 20; |
21 | 24 | var $maxTemplates = 5; |
22 | | - var $outputTypes = array( 'OT_HTML', 'OT_WIKI', 'OT_MSG', 'OT_PREPROCESS' ); |
| 25 | + //var $outputTypes = array( 'OT_HTML', 'OT_WIKI', 'OT_PREPROCESS' ); |
| 26 | + var $entryPoints = array( 'testSrvus', 'testPst', 'testPreprocess' ); |
23 | 27 | static $currentTest = false; |
24 | 28 | |
25 | 29 | function execute() { |
— | — | @@ -71,26 +75,35 @@ |
72 | 76 | return Title::newFromText( mt_rand( 0, 1000000 ), mt_rand( 0, 10 ) ); |
73 | 77 | } |
74 | 78 | |
| 79 | + /* |
75 | 80 | function pickOutputType() { |
76 | 81 | $count = count( $this->outputTypes ); |
77 | 82 | return $this->outputTypes[ mt_rand( 0, $count - 1 ) ]; |
| 83 | + }*/ |
| 84 | + |
| 85 | + function pickEntryPoint() { |
| 86 | + $count = count( $this->entryPoints ); |
| 87 | + return $this->entryPoints[ mt_rand( 0, $count - 1 ) ]; |
78 | 88 | } |
79 | 89 | } |
80 | 90 | |
81 | 91 | class PPFuzzTest { |
82 | | - var $templates, $mainText, $title; |
| 92 | + var $templates, $mainText, $title, $entryPoint; |
83 | 93 | |
84 | 94 | function __construct( $tester ) { |
85 | 95 | $this->parent = $tester; |
86 | 96 | $this->mainText = $tester->makeInputText(); |
87 | 97 | $this->title = $tester->makeTitle(); |
88 | | - $this->outputType = $tester->pickOutputType(); |
| 98 | + //$this->outputType = $tester->pickOutputType(); |
| 99 | + $this->entryPoint = $tester->pickEntryPoint(); |
| 100 | + $this->nickname = $tester->makeInputText(); |
| 101 | + $this->fancySig = (bool)mt_rand( 0, 1 ); |
89 | 102 | $this->templates = array(); |
90 | 103 | } |
91 | 104 | |
92 | 105 | function templateHook( $title ) { |
93 | 106 | $titleText = $title->getPrefixedDBkey(); |
94 | | - |
| 107 | + |
95 | 108 | if ( !isset( $this->templates[$titleText] ) ) { |
96 | 109 | $finalTitle = $title; |
97 | 110 | if ( count( $this->templates ) >= $this->parent->maxTemplates ) { |
— | — | @@ -116,16 +129,24 @@ |
117 | 130 | } |
118 | 131 | |
119 | 132 | function execute() { |
120 | | - global $wgParser; |
| 133 | + global $wgParser, $wgUser; |
| 134 | + |
| 135 | + $wgUser = new PPFuzzUser; |
| 136 | + $wgUser->mName = 'Fuzz'; |
| 137 | + $wgUser->mFrom = 'name'; |
| 138 | + $wgUser->ppfz_test = $this; |
| 139 | + |
121 | 140 | $options = new ParserOptions; |
122 | 141 | $options->setTemplateCallback( array( $this, 'templateHook' ) ); |
123 | | - $wgParser->startExternalParse( $this->title, $options, constant( $this->outputType ) ); |
124 | | - return $wgParser->srvus( $this->mainText ); |
| 142 | + //$wgParser->startExternalParse( $this->title, $options, constant( $this->outputType ) ); |
| 143 | + return call_user_func( array( $wgParser, $this->entryPoint ), $this->mainText, $this->title, $options ); |
125 | 144 | } |
126 | 145 | |
127 | 146 | function getReport() { |
128 | 147 | $s = "Title: " . $this->title->getPrefixedDBkey() . "\n" . |
129 | | - "Output type: {$this->outputType}\n" . |
| 148 | +// "Output type: {$this->outputType}\n" . |
| 149 | + "Entry point: {$this->entryPoint}\n" . |
| 150 | + "User: " . ( $this->fancySig ? 'fancy' : 'no-fancy' ) . ' ' . var_export( $this->nickname, true ) . "\n" . |
130 | 151 | "Main text: " . var_export( $this->mainText, true ) . "\n"; |
131 | 152 | foreach ( $this->templates as $titleText => $template ) { |
132 | 153 | $finalTitle = $template['finalTitle']; |
— | — | @@ -139,6 +160,20 @@ |
140 | 161 | } |
141 | 162 | } |
142 | 163 | |
| 164 | +class PPFuzzUser extends User { |
| 165 | + var $ppfz_test; |
| 166 | + |
| 167 | + function getOption( $option, $defaultOverride = '' ) { |
| 168 | + if ( $option === 'fancysig' ) { |
| 169 | + return $this->ppfz_test->fancySig; |
| 170 | + } elseif ( $option === 'nickname' ) { |
| 171 | + return $this->ppfz_test->nickname; |
| 172 | + } else { |
| 173 | + return parent::getOption( $option, $defaultOverride ); |
| 174 | + } |
| 175 | + } |
| 176 | +} |
| 177 | + |
143 | 178 | ini_set( 'memory_limit', '50M' ); |
144 | 179 | if ( isset( $args[0] ) ) { |
145 | 180 | $testText = file_get_contents( $args[0] ); |
Index: trunk/phase3/includes/Preprocessor_DOM.php |
— | — | @@ -0,0 +1,1243 @@ |
| 2 | +<?php |
| 3 | + |
| 4 | +class Preprocessor_DOM implements Preprocessor { |
| 5 | + var $parser; |
| 6 | + |
| 7 | + function __construct( $parser ) { |
| 8 | + $this->parser = $parser; |
| 9 | + } |
| 10 | + |
| 11 | + function newFrame() { |
| 12 | + return new PPFrame_DOM( $this ); |
| 13 | + } |
| 14 | + |
| 15 | + /** |
| 16 | + * Preprocess some wikitext and return the document tree. |
| 17 | + * This is the ghost of Parser::replace_variables(). |
| 18 | + * |
| 19 | + * @param string $text The text to parse |
| 20 | + * @param integer flags Bitwise combination of: |
| 21 | + * Parser::PTD_FOR_INCLUSION Handle <noinclude>/<includeonly> as if the text is being |
| 22 | + * included. Default is to assume a direct page view. |
| 23 | + * |
| 24 | + * The generated DOM tree must depend only on the input text and the flags. |
| 25 | + * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of bug 4899. |
| 26 | + * |
| 27 | + * Any flag added to the $flags parameter here, or any other parameter liable to cause a |
| 28 | + * change in the DOM tree for a given text, must be passed through the section identifier |
| 29 | + * in the section edit link and thus back to extractSections(). |
| 30 | + * |
| 31 | + * The output of this function is currently only cached in process memory, but a persistent |
| 32 | + * cache may be implemented at a later date which takes further advantage of these strict |
| 33 | + * dependency requirements. |
| 34 | + * |
| 35 | + * @private |
| 36 | + */ |
| 37 | + function preprocessToObj( $text, $flags = 0 ) { |
| 38 | + wfProfileIn( __METHOD__ ); |
| 39 | + wfProfileIn( __METHOD__.'-makexml' ); |
| 40 | + |
| 41 | + $rules = array( |
| 42 | + '{' => array( |
| 43 | + 'end' => '}', |
| 44 | + 'names' => array( |
| 45 | + 2 => 'template', |
| 46 | + 3 => 'tplarg', |
| 47 | + ), |
| 48 | + 'min' => 2, |
| 49 | + 'max' => 3, |
| 50 | + ), |
| 51 | + '[' => array( |
| 52 | + 'end' => ']', |
| 53 | + 'names' => array( 2 => null ), |
| 54 | + 'min' => 2, |
| 55 | + 'max' => 2, |
| 56 | + ) |
| 57 | + ); |
| 58 | + |
| 59 | + $forInclusion = $flags & Parser::PTD_FOR_INCLUSION; |
| 60 | + |
| 61 | + $xmlishElements = $this->parser->getStripList(); |
| 62 | + $enableOnlyinclude = false; |
| 63 | + if ( $forInclusion ) { |
| 64 | + $ignoredTags = array( 'includeonly', '/includeonly' ); |
| 65 | + $ignoredElements = array( 'noinclude' ); |
| 66 | + $xmlishElements[] = 'noinclude'; |
| 67 | + if ( strpos( $text, '<onlyinclude>' ) !== false && strpos( $text, '</onlyinclude>' ) !== false ) { |
| 68 | + $enableOnlyinclude = true; |
| 69 | + } |
| 70 | + } else { |
| 71 | + $ignoredTags = array( 'noinclude', '/noinclude', 'onlyinclude', '/onlyinclude' ); |
| 72 | + $ignoredElements = array( 'includeonly' ); |
| 73 | + $xmlishElements[] = 'includeonly'; |
| 74 | + } |
| 75 | + $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) ); |
| 76 | + |
| 77 | + // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset |
| 78 | + $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; |
| 79 | + |
| 80 | + $stack = new PPDStack; |
| 81 | + |
| 82 | + $searchBase = '[{<'; |
| 83 | + $revText = strrev( $text ); // For fast reverse searches |
| 84 | + |
| 85 | + $i = 0; # Input pointer, starts out pointing to a pseudo-newline before the start |
| 86 | + $accum =& $stack->getAccum(); # Current text accumulator |
| 87 | + $accum = '<root>'; |
| 88 | + $findEquals = false; # True to find equals signs in arguments |
| 89 | + $findPipe = false; # True to take notice of pipe characters |
| 90 | + $headingIndex = 1; |
| 91 | + $inHeading = false; # True if $i is inside a possible heading |
| 92 | + $noMoreGT = false; # True if there are no more greater-than (>) signs right of $i |
| 93 | + $findOnlyinclude = $enableOnlyinclude; # True to ignore all input up to the next <onlyinclude> |
| 94 | + $fakeLineStart = true; # Do a line-start run without outputting an LF character |
| 95 | + |
| 96 | + while ( true ) { |
| 97 | + if ( $findOnlyinclude ) { |
| 98 | + // Ignore all input up to the next <onlyinclude> |
| 99 | + $startPos = strpos( $text, '<onlyinclude>', $i ); |
| 100 | + if ( $startPos === false ) { |
| 101 | + // Ignored section runs to the end |
| 102 | + $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i ) ) . '</ignore>'; |
| 103 | + break; |
| 104 | + } |
| 105 | + $tagEndPos = $startPos + strlen( '<onlyinclude>' ); // past-the-end |
| 106 | + $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i, $tagEndPos - $i ) ) . '</ignore>'; |
| 107 | + $i = $tagEndPos; |
| 108 | + $findOnlyinclude = false; |
| 109 | + } |
| 110 | + |
| 111 | + if ( $fakeLineStart ) { |
| 112 | + $found = 'line-start'; |
| 113 | + $curChar = ''; |
| 114 | + } else { |
| 115 | + # Find next opening brace, closing brace or pipe |
| 116 | + $search = $searchBase; |
| 117 | + if ( $stack->top === false ) { |
| 118 | + $currentClosing = ''; |
| 119 | + } else { |
| 120 | + $currentClosing = $stack->top->close; |
| 121 | + $search .= $currentClosing; |
| 122 | + } |
| 123 | + if ( $findPipe ) { |
| 124 | + $search .= '|'; |
| 125 | + } |
| 126 | + if ( $findEquals ) { |
| 127 | + // First equals will be for the template |
| 128 | + $search .= '='; |
| 129 | + } else { |
| 130 | + // Look for headings |
| 131 | + // We can't look for headings when $findEquals is true, because the ambiguity |
| 132 | + // between template name/value separators and heading starts would be unresolved |
| 133 | + // until the closing double-brace is found. This would mean either infinite |
| 134 | + // backtrack, or creating and updating two separate tree structures until the |
| 135 | + // end of the ambiguity -- one tree structure assuming a heading, and the other |
| 136 | + // assuming a template argument. |
| 137 | + // |
| 138 | + // Easier to just break some section edit links. |
| 139 | + $search .= "\n"; |
| 140 | + } |
| 141 | + $rule = null; |
| 142 | + # Output literal section, advance input counter |
| 143 | + $literalLength = strcspn( $text, $search, $i ); |
| 144 | + if ( $literalLength > 0 ) { |
| 145 | + $accum .= htmlspecialchars( substr( $text, $i, $literalLength ) ); |
| 146 | + $i += $literalLength; |
| 147 | + } |
| 148 | + if ( $i >= strlen( $text ) ) { |
| 149 | + if ( $currentClosing == "\n" ) { |
| 150 | + // Do a past-the-end run to finish off the heading |
| 151 | + $curChar = ''; |
| 152 | + $found = 'line-end'; |
| 153 | + } else { |
| 154 | + # All done |
| 155 | + break; |
| 156 | + } |
| 157 | + } else { |
| 158 | + $curChar = $text[$i]; |
| 159 | + if ( $curChar == '|' ) { |
| 160 | + $found = 'pipe'; |
| 161 | + } elseif ( $curChar == '=' ) { |
| 162 | + $found = 'equals'; |
| 163 | + } elseif ( $curChar == '<' ) { |
| 164 | + $found = 'angle'; |
| 165 | + } elseif ( $curChar == "\n" ) { |
| 166 | + if ( $inHeading ) { |
| 167 | + $found = 'line-end'; |
| 168 | + } else { |
| 169 | + $found = 'line-start'; |
| 170 | + } |
| 171 | + } elseif ( $curChar == $currentClosing ) { |
| 172 | + $found = 'close'; |
| 173 | + } elseif ( isset( $rules[$curChar] ) ) { |
| 174 | + $found = 'open'; |
| 175 | + $rule = $rules[$curChar]; |
| 176 | + } else { |
| 177 | + # Some versions of PHP have a strcspn which stops on null characters |
| 178 | + # Ignore and continue |
| 179 | + ++$i; |
| 180 | + continue; |
| 181 | + } |
| 182 | + } |
| 183 | + } |
| 184 | + |
| 185 | + if ( $found == 'angle' ) { |
| 186 | + $matches = false; |
| 187 | + // Handle </onlyinclude> |
| 188 | + if ( $enableOnlyinclude && substr( $text, $i, strlen( '</onlyinclude>' ) ) == '</onlyinclude>' ) { |
| 189 | + $findOnlyinclude = true; |
| 190 | + continue; |
| 191 | + } |
| 192 | + |
| 193 | + // Determine element name |
| 194 | + if ( !preg_match( $elementsRegex, $text, $matches, 0, $i + 1 ) ) { |
| 195 | + // Element name missing or not listed |
| 196 | + $accum .= '<'; |
| 197 | + ++$i; |
| 198 | + continue; |
| 199 | + } |
| 200 | + // Handle comments |
| 201 | + if ( isset( $matches[2] ) && $matches[2] == '!--' ) { |
| 202 | + // To avoid leaving blank lines, when a comment is both preceded |
| 203 | + // and followed by a newline (ignoring spaces), trim leading and |
| 204 | + // trailing spaces and one of the newlines. |
| 205 | + |
| 206 | + // Find the end |
| 207 | + $endPos = strpos( $text, '-->', $i + 4 ); |
| 208 | + if ( $endPos === false ) { |
| 209 | + // Unclosed comment in input, runs to end |
| 210 | + $inner = substr( $text, $i ); |
| 211 | + $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>'; |
| 212 | + $i = strlen( $text ); |
| 213 | + } else { |
| 214 | + // Search backwards for leading whitespace |
| 215 | + $wsStart = $i ? ( $i - strspn( $revText, ' ', strlen( $text ) - $i ) ) : 0; |
| 216 | + // Search forwards for trailing whitespace |
| 217 | + // $wsEnd will be the position of the last space |
| 218 | + $wsEnd = $endPos + 2 + strspn( $text, ' ', $endPos + 3 ); |
| 219 | + // Eat the line if possible |
| 220 | + // TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at |
| 221 | + // the overall start. That's not how Sanitizer::removeHTMLcomments() does it, but |
| 222 | + // it's a possible beneficial b/c break. |
| 223 | + if ( $wsStart > 0 && substr( $text, $wsStart - 1, 1 ) == "\n" |
| 224 | + && substr( $text, $wsEnd + 1, 1 ) == "\n" ) |
| 225 | + { |
| 226 | + $startPos = $wsStart; |
| 227 | + $endPos = $wsEnd + 1; |
| 228 | + // Remove leading whitespace from the end of the accumulator |
| 229 | + // Sanity check first though |
| 230 | + $wsLength = $i - $wsStart; |
| 231 | + if ( $wsLength > 0 && substr( $accum, -$wsLength ) === str_repeat( ' ', $wsLength ) ) { |
| 232 | + $accum = substr( $accum, 0, -$wsLength ); |
| 233 | + } |
| 234 | + // Do a line-start run next time to look for headings after the comment, |
| 235 | + // but only if stack->top===false, because headings don't exist at deeper levels. |
| 236 | + if ( $stack->top === false ) { |
| 237 | + $fakeLineStart = true; |
| 238 | + } |
| 239 | + } else { |
| 240 | + // No line to eat, just take the comment itself |
| 241 | + $startPos = $i; |
| 242 | + $endPos += 2; |
| 243 | + } |
| 244 | + |
| 245 | + $i = $endPos + 1; |
| 246 | + $inner = substr( $text, $startPos, $endPos - $startPos + 1 ); |
| 247 | + $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>'; |
| 248 | + } |
| 249 | + continue; |
| 250 | + } |
| 251 | + $name = $matches[1]; |
| 252 | + $attrStart = $i + strlen( $name ) + 1; |
| 253 | + |
| 254 | + // Find end of tag |
| 255 | + $tagEndPos = $noMoreGT ? false : strpos( $text, '>', $attrStart ); |
| 256 | + if ( $tagEndPos === false ) { |
| 257 | + // Infinite backtrack |
| 258 | + // Disable tag search to prevent worst-case O(N^2) performance |
| 259 | + $noMoreGT = true; |
| 260 | + $accum .= '<'; |
| 261 | + ++$i; |
| 262 | + continue; |
| 263 | + } |
| 264 | + |
| 265 | + // Handle ignored tags |
| 266 | + if ( in_array( $name, $ignoredTags ) ) { |
| 267 | + $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i, $tagEndPos - $i + 1 ) ) . '</ignore>'; |
| 268 | + $i = $tagEndPos + 1; |
| 269 | + continue; |
| 270 | + } |
| 271 | + |
| 272 | + $tagStartPos = $i; |
| 273 | + if ( $text[$tagEndPos-1] == '/' ) { |
| 274 | + $attrEnd = $tagEndPos - 1; |
| 275 | + $inner = null; |
| 276 | + $i = $tagEndPos + 1; |
| 277 | + $close = ''; |
| 278 | + } else { |
| 279 | + $attrEnd = $tagEndPos; |
| 280 | + // Find closing tag |
| 281 | + if ( preg_match( "/<\/$name\s*>/i", $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) ) { |
| 282 | + $inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 ); |
| 283 | + $i = $matches[0][1] + strlen( $matches[0][0] ); |
| 284 | + $close = '<close>' . htmlspecialchars( $matches[0][0] ) . '</close>'; |
| 285 | + } else { |
| 286 | + // No end tag -- let it run out to the end of the text. |
| 287 | + $inner = substr( $text, $tagEndPos + 1 ); |
| 288 | + $i = strlen( $text ); |
| 289 | + $close = ''; |
| 290 | + } |
| 291 | + } |
| 292 | + // <includeonly> and <noinclude> just become <ignore> tags |
| 293 | + if ( in_array( $name, $ignoredElements ) ) { |
| 294 | + $accum .= '<ignore>' . htmlspecialchars( substr( $text, $tagStartPos, $i - $tagStartPos ) ) |
| 295 | + . '</ignore>'; |
| 296 | + continue; |
| 297 | + } |
| 298 | + |
| 299 | + $accum .= '<ext>'; |
| 300 | + if ( $attrEnd <= $attrStart ) { |
| 301 | + $attr = ''; |
| 302 | + } else { |
| 303 | + $attr = substr( $text, $attrStart, $attrEnd - $attrStart ); |
| 304 | + } |
| 305 | + $accum .= '<name>' . htmlspecialchars( $name ) . '</name>' . |
| 306 | + // Note that the attr element contains the whitespace between name and attribute, |
| 307 | + // this is necessary for precise reconstruction during pre-save transform. |
| 308 | + '<attr>' . htmlspecialchars( $attr ) . '</attr>'; |
| 309 | + if ( $inner !== null ) { |
| 310 | + $accum .= '<inner>' . htmlspecialchars( $inner ) . '</inner>'; |
| 311 | + } |
| 312 | + $accum .= $close . '</ext>'; |
| 313 | + } |
| 314 | + |
| 315 | + elseif ( $found == 'line-start' ) { |
| 316 | + // Is this the start of a heading? |
| 317 | + // Line break belongs before the heading element in any case |
| 318 | + if ( $fakeLineStart ) { |
| 319 | + $fakeLineStart = false; |
| 320 | + } else { |
| 321 | + $accum .= $curChar; |
| 322 | + $i++; |
| 323 | + } |
| 324 | + |
| 325 | + $count = strspn( $text, '=', $i, 6 ); |
| 326 | + if ( $count > 0 ) { |
| 327 | + $piece = array( |
| 328 | + 'open' => "\n", |
| 329 | + 'close' => "\n", |
| 330 | + 'parts' => array( str_repeat( '=', $count ) ), |
| 331 | + 'startPos' => $i, |
| 332 | + 'count' => $count ); |
| 333 | + $stack->push( $piece ); |
| 334 | + $accum =& $stack->getAccum(); |
| 335 | + extract( $stack->getFlags() ); |
| 336 | + $i += $count; |
| 337 | + } |
| 338 | + } |
| 339 | + |
| 340 | + elseif ( $found == 'line-end' ) { |
| 341 | + $piece = $stack->top; |
| 342 | + // A heading must be open, otherwise \n wouldn't have been in the search list |
| 343 | + assert( $piece->open == "\n" ); |
| 344 | + // Search back through the input to see if it has a proper close |
| 345 | + // Do this using the reversed string since the other solutions (end anchor, etc.) are inefficient |
| 346 | + $m = false; |
| 347 | + $count = $piece->count; |
| 348 | + if ( preg_match( "/\s*(=+)/A", $revText, $m, 0, strlen( $text ) - $i ) ) { |
| 349 | + if ( $i - strlen( $m[0] ) == $piece->startPos ) { |
| 350 | + // This is just a single string of equals signs on its own line |
| 351 | + // Replicate the doHeadings behaviour /={count}(.+)={count}/ |
| 352 | + // First find out how many equals signs there really are (don't stop at 6) |
| 353 | + $count = strlen( $m[1] ); |
| 354 | + if ( $count < 3 ) { |
| 355 | + $count = 0; |
| 356 | + } else { |
| 357 | + $count = min( 6, intval( ( $count - 1 ) / 2 ) ); |
| 358 | + } |
| 359 | + } else { |
| 360 | + $count = min( strlen( $m[1] ), $count ); |
| 361 | + } |
| 362 | + if ( $count > 0 ) { |
| 363 | + // Normal match, output <h> |
| 364 | + $element = "<h level=\"$count\" i=\"$headingIndex\">$accum</h>"; |
| 365 | + $headingIndex++; |
| 366 | + } else { |
| 367 | + // Single equals sign on its own line, count=0 |
| 368 | + $element = $accum; |
| 369 | + } |
| 370 | + } else { |
| 371 | + // No match, no <h>, just pass down the inner text |
| 372 | + $element = $accum; |
| 373 | + } |
| 374 | + // Unwind the stack |
| 375 | + $stack->pop(); |
| 376 | + $accum =& $stack->getAccum(); |
| 377 | + extract( $stack->getFlags() ); |
| 378 | + |
| 379 | + // Append the result to the enclosing accumulator |
| 380 | + $accum .= $element; |
| 381 | + // Note that we do NOT increment the input pointer. |
| 382 | + // This is because the closing linebreak could be the opening linebreak of |
| 383 | + // another heading. Infinite loops are avoided because the next iteration MUST |
| 384 | + // hit the heading open case above, which unconditionally increments the |
| 385 | + // input pointer. |
| 386 | + } |
| 387 | + |
| 388 | + elseif ( $found == 'open' ) { |
| 389 | + # count opening brace characters |
| 390 | + $count = strspn( $text, $curChar, $i ); |
| 391 | + |
| 392 | + # we need to add to stack only if opening brace count is enough for one of the rules |
| 393 | + if ( $count >= $rule['min'] ) { |
| 394 | + # Add it to the stack |
| 395 | + $piece = array( |
| 396 | + 'open' => $curChar, |
| 397 | + 'close' => $rule['end'], |
| 398 | + 'count' => $count, |
| 399 | + 'parts' => array( '' ), |
| 400 | + 'eqpos' => array(), |
| 401 | + 'lineStart' => ($i > 0 && $text[$i-1] == "\n"), |
| 402 | + ); |
| 403 | + |
| 404 | + $stack->push( $piece ); |
| 405 | + $accum =& $stack->getAccum(); |
| 406 | + extract( $stack->getFlags() ); |
| 407 | + } else { |
| 408 | + # Add literal brace(s) |
| 409 | + $accum .= htmlspecialchars( str_repeat( $curChar, $count ) ); |
| 410 | + } |
| 411 | + $i += $count; |
| 412 | + } |
| 413 | + |
| 414 | + elseif ( $found == 'close' ) { |
| 415 | + $piece = $stack->top; |
| 416 | + # lets check if there are enough characters for closing brace |
| 417 | + $maxCount = $piece->count; |
| 418 | + $count = strspn( $text, $curChar, $i, $maxCount ); |
| 419 | + |
| 420 | + # check for maximum matching characters (if there are 5 closing |
| 421 | + # characters, we will probably need only 3 - depending on the rules) |
| 422 | + $matchingCount = 0; |
| 423 | + $rule = $rules[$piece->open]; |
| 424 | + if ( $count > $rule['max'] ) { |
| 425 | + # The specified maximum exists in the callback array, unless the caller |
| 426 | + # has made an error |
| 427 | + $matchingCount = $rule['max']; |
| 428 | + } else { |
| 429 | + # Count is less than the maximum |
| 430 | + # Skip any gaps in the callback array to find the true largest match |
| 431 | + # Need to use array_key_exists not isset because the callback can be null |
| 432 | + $matchingCount = $count; |
| 433 | + while ( $matchingCount > 0 && !array_key_exists( $matchingCount, $rule['names'] ) ) { |
| 434 | + --$matchingCount; |
| 435 | + } |
| 436 | + } |
| 437 | + |
| 438 | + if ($matchingCount <= 0) { |
| 439 | + # No matching element found in callback array |
| 440 | + # Output a literal closing brace and continue |
| 441 | + $accum .= htmlspecialchars( str_repeat( $curChar, $count ) ); |
| 442 | + $i += $count; |
| 443 | + continue; |
| 444 | + } |
| 445 | + $name = $rule['names'][$matchingCount]; |
| 446 | + if ( $name === null ) { |
| 447 | + // No element, just literal text |
| 448 | + $element = str_repeat( $piece->open, $matchingCount ) . |
| 449 | + implode( '|', $piece->parts ) . |
| 450 | + str_repeat( $rule['end'], $matchingCount ); |
| 451 | + } else { |
| 452 | + # Create XML element |
| 453 | + # Note: $parts is already XML, does not need to be encoded further |
| 454 | + $parts = $piece->parts; |
| 455 | + $title = $parts[0]; |
| 456 | + unset( $parts[0] ); |
| 457 | + |
| 458 | + # The invocation is at the start of the line if lineStart is set in |
| 459 | + # the stack, and all opening brackets are used up. |
| 460 | + if ( $maxCount == $matchingCount && !empty( $piece->lineStart ) ) { |
| 461 | + $attr = ' lineStart="1"'; |
| 462 | + } else { |
| 463 | + $attr = ''; |
| 464 | + } |
| 465 | + |
| 466 | + $element = "<$name$attr>"; |
| 467 | + $element .= "<title>$title</title>"; |
| 468 | + $argIndex = 1; |
| 469 | + foreach ( $parts as $partIndex => $part ) { |
| 470 | + if ( isset( $piece->eqpos[$partIndex] ) ) { |
| 471 | + $eqpos = $piece->eqpos[$partIndex]; |
| 472 | + $argName = substr( $part, 0, $eqpos ); |
| 473 | + $argValue = substr( $part, $eqpos + 1 ); |
| 474 | + $element .= "<part><name>$argName</name>=<value>$argValue</value></part>"; |
| 475 | + } else { |
| 476 | + $element .= "<part><name index=\"$argIndex\" /><value>$part</value></part>"; |
| 477 | + $argIndex++; |
| 478 | + } |
| 479 | + } |
| 480 | + $element .= "</$name>"; |
| 481 | + } |
| 482 | + |
| 483 | + # Advance input pointer |
| 484 | + $i += $matchingCount; |
| 485 | + |
| 486 | + # Unwind the stack |
| 487 | + $stack->pop(); |
| 488 | + $accum =& $stack->getAccum(); |
| 489 | + |
| 490 | + # Re-add the old stack element if it still has unmatched opening characters remaining |
| 491 | + if ($matchingCount < $piece->count) { |
| 492 | + $piece->parts = array( '' ); |
| 493 | + $piece->count -= $matchingCount; |
| 494 | + $piece->eqpos = array(); |
| 495 | + # do we still qualify for any callback with remaining count? |
| 496 | + $names = $rules[$piece->open]['names']; |
| 497 | + $skippedBraces = 0; |
| 498 | + $enclosingAccum =& $accum; |
| 499 | + while ( $piece->count ) { |
| 500 | + if ( array_key_exists( $piece->count, $names ) ) { |
| 501 | + $stack->push( $piece ); |
| 502 | + $accum =& $stack->getAccum(); |
| 503 | + break; |
| 504 | + } |
| 505 | + --$piece->count; |
| 506 | + $skippedBraces ++; |
| 507 | + } |
| 508 | + $enclosingAccum .= str_repeat( $piece->open, $skippedBraces ); |
| 509 | + } |
| 510 | + |
| 511 | + extract( $stack->getFlags() ); |
| 512 | + |
| 513 | + # Add XML element to the enclosing accumulator |
| 514 | + $accum .= $element; |
| 515 | + } |
| 516 | + |
| 517 | + elseif ( $found == 'pipe' ) { |
| 518 | + $findEquals = true; // shortcut for getFlags() |
| 519 | + $stack->top->addPart(); |
| 520 | + $accum =& $stack->getAccum(); |
| 521 | + ++$i; |
| 522 | + } |
| 523 | + |
| 524 | + elseif ( $found == 'equals' ) { |
| 525 | + $findEquals = false; // shortcut for getFlags() |
| 526 | + $partsCount = count( $stack->top->parts ); |
| 527 | + $stack->top->eqpos[$partsCount - 1] = strlen( $accum ); |
| 528 | + $accum .= '='; |
| 529 | + ++$i; |
| 530 | + } |
| 531 | + } |
| 532 | + |
| 533 | + # Output any remaining unclosed brackets |
| 534 | + foreach ( $stack->stack as $piece ) { |
| 535 | + if ( $piece->open == "\n" ) { |
| 536 | + $stack->topAccum .= $piece->parts[0]; |
| 537 | + } else { |
| 538 | + $stack->topAccum .= str_repeat( $piece->open, $piece->count ) . implode( '|', $piece->parts ); |
| 539 | + } |
| 540 | + } |
| 541 | + $stack->topAccum .= '</root>'; |
| 542 | + $xml = $stack->topAccum; |
| 543 | + |
| 544 | + wfProfileOut( __METHOD__.'-makexml' ); |
| 545 | + wfProfileIn( __METHOD__.'-loadXML' ); |
| 546 | + $dom = new DOMDocument; |
| 547 | + wfSuppressWarnings(); |
| 548 | + $result = $dom->loadXML( $xml ); |
| 549 | + wfRestoreWarnings(); |
| 550 | + if ( !$result ) { |
| 551 | + // Try running the XML through UtfNormal to get rid of invalid characters |
| 552 | + $xml = UtfNormal::cleanUp( $xml ); |
| 553 | + $result = $dom->loadXML( $xml ); |
| 554 | + if ( !$result ) { |
| 555 | + throw new MWException( __METHOD__.' generated invalid XML' ); |
| 556 | + } |
| 557 | + } |
| 558 | + $obj = new PPNode_DOM( $dom->documentElement ); |
| 559 | + wfProfileOut( __METHOD__.'-loadXML' ); |
| 560 | + wfProfileOut( __METHOD__ ); |
| 561 | + return $obj; |
| 562 | + } |
| 563 | +} |
| 564 | + |
| 565 | +/** |
| 566 | + * An expansion frame, used as a context to expand the result of preprocessToDom() |
| 567 | + */ |
| 568 | +class PPFrame_DOM implements PPFrame { |
| 569 | + var $preprocessor, $parser, $title; |
| 570 | + var $titleCache; |
| 571 | + |
| 572 | + /** |
| 573 | + * Hashtable listing templates which are disallowed for expansion in this frame, |
| 574 | + * having been encountered previously in parent frames. |
| 575 | + */ |
| 576 | + var $loopCheckHash; |
| 577 | + |
| 578 | + /** |
| 579 | + * Recursion depth of this frame, top = 0 |
| 580 | + */ |
| 581 | + var $depth; |
| 582 | + |
| 583 | + |
| 584 | + /** |
| 585 | + * Construct a new preprocessor frame. |
| 586 | + * @param Preprocessor $preprocessor The parent preprocessor |
| 587 | + */ |
| 588 | + function __construct( $preprocessor ) { |
| 589 | + $this->preprocessor = $preprocessor; |
| 590 | + $this->parser = $preprocessor->parser; |
| 591 | + $this->title = $this->parser->mTitle; |
| 592 | + $this->titleCache = array( $this->title ? $this->title->getPrefixedDBkey() : false ); |
| 593 | + $this->loopCheckHash = array(); |
| 594 | + $this->depth = 0; |
| 595 | + } |
| 596 | + |
| 597 | + /** |
| 598 | + * Create a new child frame |
| 599 | + * $args is optionally a multi-root PPNode or array containing the template arguments |
| 600 | + */ |
| 601 | + function newChild( $args = false, $title = false ) { |
| 602 | + $namedArgs = array(); |
| 603 | + $numberedArgs = array(); |
| 604 | + if ( $title === false ) { |
| 605 | + $title = $this->title; |
| 606 | + } |
| 607 | + if ( $args !== false ) { |
| 608 | + $xpath = false; |
| 609 | + if ( $args instanceof PPNode ) { |
| 610 | + $args = $args->node; |
| 611 | + } |
| 612 | + foreach ( $args as $arg ) { |
| 613 | + if ( !$xpath ) { |
| 614 | + $xpath = new DOMXPath( $arg->ownerDocument ); |
| 615 | + } |
| 616 | + |
| 617 | + $nameNodes = $xpath->query( 'name', $arg ); |
| 618 | + $value = $xpath->query( 'value', $arg ); |
| 619 | + if ( $nameNodes->item( 0 )->hasAttributes() ) { |
| 620 | + // Numbered parameter |
| 621 | + $index = $nameNodes->item( 0 )->attributes->getNamedItem( 'index' )->textContent; |
| 622 | + $numberedArgs[$index] = $value->item( 0 ); |
| 623 | + unset( $namedArgs[$index] ); |
| 624 | + } else { |
| 625 | + // Named parameter |
| 626 | + $name = trim( $this->expand( $nameNodes->item( 0 ), PPFrame::STRIP_COMMENTS ) ); |
| 627 | + $namedArgs[$name] = $value->item( 0 ); |
| 628 | + unset( $numberedArgs[$name] ); |
| 629 | + } |
| 630 | + } |
| 631 | + } |
| 632 | + return new PPTemplateFrame_DOM( $this->preprocessor, $this, $numberedArgs, $namedArgs, $title ); |
| 633 | + } |
| 634 | + |
| 635 | + function expand( $root, $flags = 0 ) { |
| 636 | + if ( is_string( $root ) ) { |
| 637 | + return $root; |
| 638 | + } |
| 639 | + |
| 640 | + if ( ++$this->parser->mPPNodeCount > $this->parser->mOptions->mMaxPPNodeCount ) |
| 641 | + { |
| 642 | + return '<span class="error">Node-count limit exceeded</span>'; |
| 643 | + } |
| 644 | + |
| 645 | + if ( $root instanceof PPNode_DOM ) { |
| 646 | + $root = $root->node; |
| 647 | + } |
| 648 | + if ( $root instanceof DOMDocument ) { |
| 649 | + $root = $root->documentElement; |
| 650 | + } |
| 651 | + |
| 652 | + $outStack = array( '', '' ); |
| 653 | + $iteratorStack = array( false, $root ); |
| 654 | + $indexStack = array( 0, 0 ); |
| 655 | + |
| 656 | + while ( count( $iteratorStack ) > 1 ) { |
| 657 | + $level = count( $outStack ) - 1; |
| 658 | + $iteratorNode =& $iteratorStack[ $level ]; |
| 659 | + $out =& $outStack[$level]; |
| 660 | + $index =& $indexStack[$level]; |
| 661 | + |
| 662 | + if ( $iteratorNode instanceof PPNode_DOM ) $iteratorNode = $iteratorNode->node; |
| 663 | + |
| 664 | + if ( is_array( $iteratorNode ) ) { |
| 665 | + if ( $index >= count( $iteratorNode ) ) { |
| 666 | + // All done with this iterator |
| 667 | + $iteratorStack[$level] = false; |
| 668 | + $contextNode = false; |
| 669 | + } else { |
| 670 | + $contextNode = $iteratorNode[$index]; |
| 671 | + $index++; |
| 672 | + } |
| 673 | + } elseif ( $iteratorNode instanceof DOMNodeList ) { |
| 674 | + if ( $index >= $iteratorNode->length ) { |
| 675 | + // All done with this iterator |
| 676 | + $iteratorStack[$level] = false; |
| 677 | + $contextNode = false; |
| 678 | + } else { |
| 679 | + $contextNode = $iteratorNode->item( $index ); |
| 680 | + $index++; |
| 681 | + } |
| 682 | + } else { |
| 683 | + // Copy to $contextNode and then delete from iterator stack, |
| 684 | + // because this is not an iterator but we do have to execute it once |
| 685 | + $contextNode = $iteratorStack[$level]; |
| 686 | + $iteratorStack[$level] = false; |
| 687 | + } |
| 688 | + |
| 689 | + if ( $contextNode instanceof PPNode_DOM ) $contextNode = $contextNode->node; |
| 690 | + |
| 691 | + $newIterator = false; |
| 692 | + |
| 693 | + if ( $contextNode === false ) { |
| 694 | + // nothing to do |
| 695 | + } elseif ( is_string( $contextNode ) ) { |
| 696 | + $out .= $contextNode; |
| 697 | + } elseif ( is_array( $contextNode ) || $contextNode instanceof DOMNodeList ) { |
| 698 | + $newIterator = $contextNode; |
| 699 | + } elseif ( $contextNode instanceof DOMNode ) { |
| 700 | + /* |
| 701 | + print str_repeat( ' ', count( debug_backtrace() ) ) . $contextNode->nodeName; |
| 702 | + if ( $contextNode->nodeName == 'title' ) { |
| 703 | + print ' = ' . $contextNode->textContent; |
| 704 | + } |
| 705 | + print "<br/>\n"; |
| 706 | + */ |
| 707 | + if ( $contextNode->nodeType == XML_TEXT_NODE ) { |
| 708 | + $out .= $contextNode->nodeValue; |
| 709 | + } elseif ( $contextNode->nodeName == 'template' ) { |
| 710 | + # Double-brace expansion |
| 711 | + $xpath = new DOMXPath( $contextNode->ownerDocument ); |
| 712 | + $titles = $xpath->query( 'title', $contextNode ); |
| 713 | + $title = $titles->item( 0 ); |
| 714 | + $parts = $xpath->query( 'part', $contextNode ); |
| 715 | + if ( $flags & self::NO_TEMPLATES ) { |
| 716 | + $newIterator = $this->virtualBracketedImplode( '{{', '|', '}}', $title, $parts ); |
| 717 | + } else { |
| 718 | + $lineStart = $contextNode->getAttribute( 'lineStart' ); |
| 719 | + $params = array( |
| 720 | + 'title' => new PPNode_DOM( $title ), |
| 721 | + 'parts' => new PPNode_DOM( $parts ), |
| 722 | + 'lineStart' => $lineStart ); |
| 723 | + $ret = $this->parser->braceSubstitution( $params, $this ); |
| 724 | + if ( isset( $ret['object'] ) ) { |
| 725 | + $newIterator = $ret['object']; |
| 726 | + } else { |
| 727 | + $out .= $ret['text']; |
| 728 | + } |
| 729 | + } |
| 730 | + } elseif ( $contextNode->nodeName == 'tplarg' ) { |
| 731 | + # Triple-brace expansion |
| 732 | + $xpath = new DOMXPath( $contextNode->ownerDocument ); |
| 733 | + $titles = $xpath->query( 'title', $contextNode ); |
| 734 | + $title = $titles->item( 0 ); |
| 735 | + $parts = $xpath->query( 'part', $contextNode ); |
| 736 | + if ( $flags & self::NO_ARGS ) { |
| 737 | + $newIterator = $this->virtualBracketedImplode( '{{{', '|', '}}}', $title, $parts ); |
| 738 | + } else { |
| 739 | + $params = array( |
| 740 | + 'title' => new PPNode_DOM( $title ), |
| 741 | + 'parts' => new PPNode_DOM( $parts ) ); |
| 742 | + $ret = $this->parser->argSubstitution( $params, $this ); |
| 743 | + if ( isset( $ret['object'] ) ) { |
| 744 | + $newIterator = $ret['object']; |
| 745 | + } else { |
| 746 | + $out .= $ret['text']; |
| 747 | + } |
| 748 | + } |
| 749 | + } elseif ( $contextNode->nodeName == 'comment' ) { |
| 750 | + # HTML-style comment |
| 751 | + if ( $this->parser->ot['html'] |
| 752 | + || ( $this->parser->ot['pre'] && $this->parser->mOptions->getRemoveComments() ) |
| 753 | + || ( $flags & self::STRIP_COMMENTS ) ) |
| 754 | + { |
| 755 | + $out .= ''; |
| 756 | + } else { |
| 757 | + $out .= $contextNode->textContent; |
| 758 | + } |
| 759 | + } elseif ( $contextNode->nodeName == 'ignore' ) { |
| 760 | + # Output suppression used by <includeonly> etc. |
| 761 | + # OT_WIKI will only respect <ignore> in substed templates. |
| 762 | + # The other output types respect it unless NO_IGNORE is set. |
| 763 | + # extractSections() sets NO_IGNORE and so never respects it. |
| 764 | + if ( ( !isset( $this->parent ) && $this->parser->ot['wiki'] ) || ( $flags & self::NO_IGNORE ) ) { |
| 765 | + $out .= $contextNode->textContent; |
| 766 | + } else { |
| 767 | + $out .= ''; |
| 768 | + } |
| 769 | + } elseif ( $contextNode->nodeName == 'ext' ) { |
| 770 | + # Extension tag |
| 771 | + $xpath = new DOMXPath( $contextNode->ownerDocument ); |
| 772 | + $names = $xpath->query( 'name', $contextNode ); |
| 773 | + $attrs = $xpath->query( 'attr', $contextNode ); |
| 774 | + $inners = $xpath->query( 'inner', $contextNode ); |
| 775 | + $closes = $xpath->query( 'close', $contextNode ); |
| 776 | + $params = array( |
| 777 | + 'name' => new PPNode_DOM( $names->item( 0 ) ), |
| 778 | + 'attr' => $attrs->length > 0 ? new PPNode_DOM( $attrs->item( 0 ) ) : null, |
| 779 | + 'inner' => $inners->length > 0 ? new PPNode_DOM( $inners->item( 0 ) ) : null, |
| 780 | + 'close' => $closes->length > 0 ? new PPNode_DOM( $closes->item( 0 ) ) : null, |
| 781 | + ); |
| 782 | + $out .= $this->parser->extensionSubstitution( $params, $this ); |
| 783 | + } elseif ( $contextNode->nodeName == 'h' ) { |
| 784 | + # Heading |
| 785 | + $s = $this->expand( $contextNode->childNodes, $flags ); |
| 786 | + |
| 787 | + if ( $this->parser->ot['html'] ) { |
| 788 | + # Insert heading index marker |
| 789 | + $headingIndex = $contextNode->getAttribute( 'i' ); |
| 790 | + $titleText = $this->title->getPrefixedDBkey(); |
| 791 | + $this->parser->mHeadings[] = array( $titleText, $headingIndex ); |
| 792 | + $serial = count( $this->parser->mHeadings ) - 1; |
| 793 | + $marker = "{$this->parser->mUniqPrefix}-h-$serial-{$this->parser->mMarkerSuffix}"; |
| 794 | + $count = $contextNode->getAttribute( 'level' ); |
| 795 | + $s = substr( $s, 0, $count ) . $marker . substr( $s, $count ); |
| 796 | + $this->parser->mStripState->general->setPair( $marker, '' ); |
| 797 | + } |
| 798 | + $out .= $s; |
| 799 | + } else { |
| 800 | + # Generic recursive expansion |
| 801 | + $newIterator = $contextNode->childNodes; |
| 802 | + } |
| 803 | + } else { |
| 804 | + throw new MWException( __METHOD__.': Invalid parameter type' ); |
| 805 | + } |
| 806 | + |
| 807 | + if ( $newIterator !== false ) { |
| 808 | + if ( $newIterator instanceof PPNode_DOM ) { |
| 809 | + $newIterator = $newIterator->node; |
| 810 | + } |
| 811 | + $outStack[] = ''; |
| 812 | + $iteratorStack[] = $newIterator; |
| 813 | + $indexStack[] = 0; |
| 814 | + } elseif ( $iteratorStack[$level] === false ) { |
| 815 | + // Return accumulated value to parent |
| 816 | + // With tail recursion |
| 817 | + while ( $iteratorStack[$level] === false && $level > 0 ) { |
| 818 | + $outStack[$level - 1] .= $out; |
| 819 | + array_pop( $outStack ); |
| 820 | + array_pop( $iteratorStack ); |
| 821 | + array_pop( $indexStack ); |
| 822 | + $level--; |
| 823 | + } |
| 824 | + } |
| 825 | + } |
| 826 | + return $outStack[0]; |
| 827 | + } |
| 828 | + |
| 829 | + function implodeWithFlags( $sep, $flags /*, ... */ ) { |
| 830 | + $args = array_slice( func_get_args(), 2 ); |
| 831 | + |
| 832 | + $first = true; |
| 833 | + $s = ''; |
| 834 | + foreach ( $args as $root ) { |
| 835 | + if ( $root instanceof PPNode_DOM ) $root = $root->node; |
| 836 | + if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) { |
| 837 | + $root = array( $root ); |
| 838 | + } |
| 839 | + foreach ( $root as $node ) { |
| 840 | + if ( $first ) { |
| 841 | + $first = false; |
| 842 | + } else { |
| 843 | + $s .= $sep; |
| 844 | + } |
| 845 | + $s .= $this->expand( $node, $flags ); |
| 846 | + } |
| 847 | + } |
| 848 | + return $s; |
| 849 | + } |
| 850 | + |
| 851 | + /** |
| 852 | + * Implode with no flags specified |
| 853 | + * This previously called implodeWithFlags but has now been inlined to reduce stack depth |
| 854 | + */ |
| 855 | + function implode( $sep /*, ... */ ) { |
| 856 | + $args = array_slice( func_get_args(), 1 ); |
| 857 | + |
| 858 | + $first = true; |
| 859 | + $s = ''; |
| 860 | + foreach ( $args as $root ) { |
| 861 | + if ( $root instanceof PPNode_DOM ) $root = $root->node; |
| 862 | + if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) { |
| 863 | + $root = array( $root ); |
| 864 | + } |
| 865 | + foreach ( $root as $node ) { |
| 866 | + if ( $first ) { |
| 867 | + $first = false; |
| 868 | + } else { |
| 869 | + $s .= $sep; |
| 870 | + } |
| 871 | + $s .= $this->expand( $node ); |
| 872 | + } |
| 873 | + } |
| 874 | + return $s; |
| 875 | + } |
| 876 | + |
| 877 | + /** |
| 878 | + * Makes an object that, when expand()ed, will be the same as one obtained |
| 879 | + * with implode() |
| 880 | + */ |
| 881 | + function virtualImplode( $sep /*, ... */ ) { |
| 882 | + $args = array_slice( func_get_args(), 1 ); |
| 883 | + $out = array(); |
| 884 | + $first = true; |
| 885 | + if ( $root instanceof PPNode_DOM ) $root = $root->node; |
| 886 | + |
| 887 | + foreach ( $args as $root ) { |
| 888 | + if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) { |
| 889 | + $root = array( $root ); |
| 890 | + } |
| 891 | + foreach ( $root as $node ) { |
| 892 | + if ( $first ) { |
| 893 | + $first = false; |
| 894 | + } else { |
| 895 | + $out[] = $sep; |
| 896 | + } |
| 897 | + $out[] = $node; |
| 898 | + } |
| 899 | + } |
| 900 | + return $out; |
| 901 | + } |
| 902 | + |
| 903 | + /** |
| 904 | + * Virtual implode with brackets |
| 905 | + */ |
| 906 | + function virtualBracketedImplode( $start, $sep, $end /*, ... */ ) { |
| 907 | + $args = array_slice( func_get_args(), 3 ); |
| 908 | + $out = array( $start ); |
| 909 | + $first = true; |
| 910 | + |
| 911 | + foreach ( $args as $root ) { |
| 912 | + if ( $root instanceof PPNode_DOM ) $root = $root->node; |
| 913 | + if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) { |
| 914 | + $root = array( $root ); |
| 915 | + } |
| 916 | + foreach ( $root as $node ) { |
| 917 | + if ( $first ) { |
| 918 | + $first = false; |
| 919 | + } else { |
| 920 | + $out[] = $sep; |
| 921 | + } |
| 922 | + $out[] = $node; |
| 923 | + } |
| 924 | + } |
| 925 | + $out[] = $end; |
| 926 | + return $out; |
| 927 | + } |
| 928 | + |
| 929 | + |
| 930 | + function __toString() { |
| 931 | + return 'frame{}'; |
| 932 | + } |
| 933 | + |
| 934 | + function getPDBK( $level = false ) { |
| 935 | + if ( $level === false ) { |
| 936 | + return $this->title->getPrefixedDBkey(); |
| 937 | + } else { |
| 938 | + return isset( $this->titleCache[$level] ) ? $this->titleCache[$level] : false; |
| 939 | + } |
| 940 | + } |
| 941 | + |
| 942 | + /** |
| 943 | + * Returns true if there are no arguments in this frame |
| 944 | + */ |
| 945 | + function isEmpty() { |
| 946 | + return true; |
| 947 | + } |
| 948 | + |
| 949 | + function getArgument( $name ) { |
| 950 | + return false; |
| 951 | + } |
| 952 | + |
| 953 | + /** |
| 954 | + * Returns true if the infinite loop check is OK, false if a loop is detected |
| 955 | + */ |
| 956 | + function loopCheck( $title ) { |
| 957 | + return !isset( $this->loopCheckHash[$title->getPrefixedDBkey()] ); |
| 958 | + } |
| 959 | +} |
| 960 | + |
| 961 | +/** |
| 962 | + * Expansion frame with template arguments |
| 963 | + */ |
| 964 | +class PPTemplateFrame_DOM extends PPFrame_DOM { |
| 965 | + var $numberedArgs, $namedArgs, $parent; |
| 966 | + var $numberedExpansionCache, $namedExpansionCache; |
| 967 | + |
| 968 | + function __construct( $preprocessor, $parent = false, $numberedArgs = array(), $namedArgs = array(), $title = false ) { |
| 969 | + $this->preprocessor = $preprocessor; |
| 970 | + $this->parser = $preprocessor->parser; |
| 971 | + $this->parent = $parent; |
| 972 | + $this->numberedArgs = $numberedArgs; |
| 973 | + $this->namedArgs = $namedArgs; |
| 974 | + $this->title = $title; |
| 975 | + $pdbk = $title ? $title->getPrefixedDBkey() : false; |
| 976 | + $this->titleCache = $parent->titleCache; |
| 977 | + $this->titleCache[] = $pdbk; |
| 978 | + $this->loopCheckHash = /*clone*/ $parent->loopCheckHash; |
| 979 | + if ( $pdbk !== false ) { |
| 980 | + $this->loopCheckHash[$pdbk] = true; |
| 981 | + } |
| 982 | + $this->depth = $parent->depth + 1; |
| 983 | + $this->numberedExpansionCache = $this->namedExpansionCache = array(); |
| 984 | + } |
| 985 | + |
| 986 | + function __toString() { |
| 987 | + $s = 'tplframe{'; |
| 988 | + $first = true; |
| 989 | + $args = $this->numberedArgs + $this->namedArgs; |
| 990 | + foreach ( $args as $name => $value ) { |
| 991 | + if ( $first ) { |
| 992 | + $first = false; |
| 993 | + } else { |
| 994 | + $s .= ', '; |
| 995 | + } |
| 996 | + $s .= "\"$name\":\"" . |
| 997 | + str_replace( '"', '\\"', $value->ownerDocument->saveXML( $value ) ) . '"'; |
| 998 | + } |
| 999 | + $s .= '}'; |
| 1000 | + return $s; |
| 1001 | + } |
| 1002 | + /** |
| 1003 | + * Returns true if there are no arguments in this frame |
| 1004 | + */ |
| 1005 | + function isEmpty() { |
| 1006 | + return !count( $this->numberedArgs ) && !count( $this->namedArgs ); |
| 1007 | + } |
| 1008 | + |
| 1009 | + function getNumberedArgument( $index ) { |
| 1010 | + if ( !isset( $this->numberedArgs[$index] ) ) { |
| 1011 | + return false; |
| 1012 | + } |
| 1013 | + if ( !isset( $this->numberedExpansionCache[$index] ) ) { |
| 1014 | + # No trimming for unnamed arguments |
| 1015 | + $this->numberedExpansionCache[$index] = $this->parent->expand( $this->numberedArgs[$index], self::STRIP_COMMENTS ); |
| 1016 | + } |
| 1017 | + return $this->numberedExpansionCache[$index]; |
| 1018 | + } |
| 1019 | + |
| 1020 | + function getNamedArgument( $name ) { |
| 1021 | + if ( !isset( $this->namedArgs[$name] ) ) { |
| 1022 | + return false; |
| 1023 | + } |
| 1024 | + if ( !isset( $this->namedExpansionCache[$name] ) ) { |
| 1025 | + # Trim named arguments post-expand, for backwards compatibility |
| 1026 | + $this->namedExpansionCache[$name] = trim( |
| 1027 | + $this->parent->expand( $this->namedArgs[$name], self::STRIP_COMMENTS ) ); |
| 1028 | + } |
| 1029 | + return $this->namedExpansionCache[$name]; |
| 1030 | + } |
| 1031 | + |
| 1032 | + function getArgument( $name ) { |
| 1033 | + $text = $this->getNumberedArgument( $name ); |
| 1034 | + if ( $text === false ) { |
| 1035 | + $text = $this->getNamedArgument( $name ); |
| 1036 | + } |
| 1037 | + return $text; |
| 1038 | + } |
| 1039 | +} |
| 1040 | + |
| 1041 | +/** |
| 1042 | + * Stack class to help Parser::preprocessToDom() |
| 1043 | + */ |
| 1044 | +class PPDStack { |
| 1045 | + var $stack, $topAccum, $top; |
| 1046 | + |
| 1047 | + function __construct() { |
| 1048 | + $this->stack = array(); |
| 1049 | + $this->topAccum = ''; |
| 1050 | + $this->top = false; |
| 1051 | + } |
| 1052 | + |
| 1053 | + function &getAccum() { |
| 1054 | + if ( count( $this->stack ) ) { |
| 1055 | + return $this->top->getAccum(); |
| 1056 | + } else { |
| 1057 | + return $this->topAccum; |
| 1058 | + } |
| 1059 | + } |
| 1060 | + |
| 1061 | + function push( $data ) { |
| 1062 | + if ( $data instanceof PPDStackElement ) { |
| 1063 | + $this->stack[] = $data; |
| 1064 | + } else { |
| 1065 | + $this->stack[] = new PPDStackElement( $data ); |
| 1066 | + } |
| 1067 | + $this->top =& $this->stack[ count( $this->stack ) - 1 ]; |
| 1068 | + } |
| 1069 | + |
| 1070 | + function pop() { |
| 1071 | + if ( !count( $this->stack ) ) { |
| 1072 | + throw new MWException( __METHOD__.': no elements remaining' ); |
| 1073 | + } |
| 1074 | + $temp = array_pop( $this->stack ); |
| 1075 | + if ( count( $this->stack ) ) { |
| 1076 | + $this->top =& $this->stack[ count( $this->stack ) - 1 ]; |
| 1077 | + } else { |
| 1078 | + $this->top = false; |
| 1079 | + } |
| 1080 | + } |
| 1081 | + |
| 1082 | + function getFlags() { |
| 1083 | + if ( !count( $this->stack ) ) { |
| 1084 | + return array( |
| 1085 | + 'findEquals' => false, |
| 1086 | + 'findPipe' => false, |
| 1087 | + 'inHeading' => false, |
| 1088 | + ); |
| 1089 | + } else { |
| 1090 | + return $this->top->getFlags(); |
| 1091 | + } |
| 1092 | + } |
| 1093 | +} |
| 1094 | + |
| 1095 | +class PPDStackElement { |
| 1096 | + var $open, $close, $count, $parts, $eqpos, $lineStart; |
| 1097 | + |
| 1098 | + function __construct( $data = array() ) { |
| 1099 | + $this->parts = array( '' ); |
| 1100 | + $this->eqpos = array(); |
| 1101 | + |
| 1102 | + foreach ( $data as $name => $value ) { |
| 1103 | + $this->$name = $value; |
| 1104 | + } |
| 1105 | + } |
| 1106 | + |
| 1107 | + function &getAccum() { |
| 1108 | + return $this->parts[count($this->parts) - 1]; |
| 1109 | + } |
| 1110 | + |
| 1111 | + function addPart( $s = '' ) { |
| 1112 | + $this->parts[] = $s; |
| 1113 | + } |
| 1114 | + |
| 1115 | + function getFlags() { |
| 1116 | + $partCount = count( $this->parts ); |
| 1117 | + $findPipe = $this->open != "\n" && $this->open != '['; |
| 1118 | + return array( |
| 1119 | + 'findPipe' => $findPipe, |
| 1120 | + 'findEquals' => $findPipe && $partCount > 1 && !isset( $this->eqpos[$partCount - 1] ), |
| 1121 | + 'inHeading' => $this->open == "\n", |
| 1122 | + ); |
| 1123 | + } |
| 1124 | +} |
| 1125 | + |
| 1126 | +class PPNode_DOM implements PPNode { |
| 1127 | + var $node; |
| 1128 | + |
| 1129 | + function __construct( $node, $xpath = false ) { |
| 1130 | + $this->node = $node; |
| 1131 | + } |
| 1132 | + |
| 1133 | + function __get( $name ) { |
| 1134 | + if ( $name == 'xpath' ) { |
| 1135 | + $this->xpath = new DOMXPath( $this->node->ownerDocument ); |
| 1136 | + } |
| 1137 | + return $this->xpath; |
| 1138 | + } |
| 1139 | + |
| 1140 | + function __toString() { |
| 1141 | + if ( $this->node instanceof DOMNodeList ) { |
| 1142 | + $s = ''; |
| 1143 | + foreach ( $this->node as $node ) { |
| 1144 | + $s .= $node->ownerDocument->saveXML( $node ); |
| 1145 | + } |
| 1146 | + } else { |
| 1147 | + $s = $this->node->ownerDocument->saveXML( $node ); |
| 1148 | + } |
| 1149 | + return $s; |
| 1150 | + } |
| 1151 | + |
| 1152 | + function getChildren() { |
| 1153 | + return $this->node->childNodes ? new self( $this->node->childNodes ) : false; |
| 1154 | + } |
| 1155 | + |
| 1156 | + function getFirstChild() { |
| 1157 | + return $this->node->firstChild ? new self( $this->node->firstChild ) : false; |
| 1158 | + } |
| 1159 | + |
| 1160 | + function getNextSibling() { |
| 1161 | + return $this->node->nextSibling ? new self( $this->node->nextSibling ) : false; |
| 1162 | + } |
| 1163 | + |
| 1164 | + function getChildrenOfType( $type ) { |
| 1165 | + return new self( $this->xpath->query( $type, $this->node ) ); |
| 1166 | + } |
| 1167 | + |
| 1168 | + function getLength() { |
| 1169 | + if ( $this->node instanceof DOMNodeList ) { |
| 1170 | + return $this->node->length; |
| 1171 | + } else { |
| 1172 | + return false; |
| 1173 | + } |
| 1174 | + } |
| 1175 | + |
| 1176 | + function item( $i ) { |
| 1177 | + $item = $this->node->item( $i ); |
| 1178 | + return $item ? new self( $item ) : false; |
| 1179 | + } |
| 1180 | + |
| 1181 | + function getName() { |
| 1182 | + if ( $this->node instanceof DOMNodeList ) { |
| 1183 | + return '#nodelist'; |
| 1184 | + } else { |
| 1185 | + return $this->node->nodeName; |
| 1186 | + } |
| 1187 | + } |
| 1188 | + |
| 1189 | + /** |
| 1190 | + * Split an <arg> node into a three-element array: |
| 1191 | + * PPNode name, string index and PPNode value |
| 1192 | + */ |
| 1193 | + function splitArg() { |
| 1194 | + $names = $this->xpath->query( 'name', $this->node ); |
| 1195 | + $values = $this->xpath->query( 'value', $this->node ); |
| 1196 | + if ( !$names->length || !$values->length ) { |
| 1197 | + throw new MWException( 'Invalid brace node passed to ' . __METHOD__ ); |
| 1198 | + } |
| 1199 | + $name = $names->item( 0 ); |
| 1200 | + $index = $name->getAttribute( 'index' ); |
| 1201 | + return array( |
| 1202 | + 'name' => new self( $name ), |
| 1203 | + 'index' => $index, |
| 1204 | + 'value' => new self( $values->item( 0 ) ) ); |
| 1205 | + } |
| 1206 | + |
| 1207 | + /** |
| 1208 | + * Split an <ext> node into an associative array containing name, attr, inner and close |
| 1209 | + * All values in the resulting array are PPNodes. Inner and close are optional. |
| 1210 | + */ |
| 1211 | + function splitExt() { |
| 1212 | + $names = $this->xpath->query( 'name', $this->node ); |
| 1213 | + $attrs = $this->xpath->query( 'attr', $this->node ); |
| 1214 | + $inners = $this->xpath->query( 'inner', $this->node ); |
| 1215 | + $closes = $this->xpath->query( 'close', $this->node ); |
| 1216 | + if ( !$names->length || !$attrs->length ) { |
| 1217 | + throw new MWException( 'Invalid ext node passed to ' . __METHOD__ ); |
| 1218 | + } |
| 1219 | + $parts = array( |
| 1220 | + 'name' => new self( $names->item( 0 ) ), |
| 1221 | + 'attr' => new self( $attrs->item( 0 ) ) ); |
| 1222 | + if ( $inners->length ) { |
| 1223 | + $parts['inner'] = new self( $inners->item( 0 ) ); |
| 1224 | + } |
| 1225 | + if ( $closes->length ) { |
| 1226 | + $parts['close'] = new self( $closes->item( 0 ) ); |
| 1227 | + } |
| 1228 | + return $parts; |
| 1229 | + } |
| 1230 | + |
| 1231 | + /** |
| 1232 | + * Split a <h> node |
| 1233 | + */ |
| 1234 | + function splitHeading() { |
| 1235 | + if ( !$this->nodeName == 'h' ) { |
| 1236 | + throw new MWException( 'Invalid h node passed to ' . __METHOD__ ); |
| 1237 | + } |
| 1238 | + return array( |
| 1239 | + 'i' => $this->node->getAttribute( 'i' ), |
| 1240 | + 'level' => $this->node->getAttribute( 'level' ), |
| 1241 | + 'contents' => $this->getChildren() |
| 1242 | + ); |
| 1243 | + } |
| 1244 | +} |
Property changes on: trunk/phase3/includes/Preprocessor_DOM.php |
___________________________________________________________________ |
Name: svn:eol-style |
1 | 1245 | + native |
Index: trunk/phase3/includes/Parser.php |
— | — | @@ -82,7 +82,7 @@ |
83 | 83 | # Persistent: |
84 | 84 | var $mTagHooks, $mTransparentTagHooks, $mFunctionHooks, $mFunctionSynonyms, $mVariables, |
85 | 85 | $mImageParams, $mImageParamsMagicArray, $mStripList, $mMarkerSuffix, |
86 | | - $mExtLinkBracketedRegex; |
| 86 | + $mExtLinkBracketedRegex, $mPreprocessor; |
87 | 87 | |
88 | 88 | # Cleared with clearState(): |
89 | 89 | var $mOutput, $mAutonumber, $mDTopen, $mStripState; |
— | — | @@ -118,6 +118,11 @@ |
119 | 119 | $this->mMarkerSuffix = "-QINU\x7f"; |
120 | 120 | $this->mExtLinkBracketedRegex = '/\[(\b(' . wfUrlProtocols() . ')'. |
121 | 121 | '[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x0a\\x0d]*?)\]/S'; |
| 122 | + if ( isset( $conf['preprocessorClass'] ) ) { |
| 123 | + $this->mPreprocessorClass = $conf['preprocessorClass']; |
| 124 | + } else { |
| 125 | + $this->mPreprocessorClass = 'Preprocessor_DOM'; |
| 126 | + } |
122 | 127 | $this->mFirstCall = true; |
123 | 128 | } |
124 | 129 | |
— | — | @@ -481,6 +486,17 @@ |
482 | 487 | } |
483 | 488 | |
484 | 489 | /** |
| 490 | + * Get a preprocessor object |
| 491 | + */ |
| 492 | + function getPreprocessor() { |
| 493 | + if ( !isset( $this->mPreprocessor ) ) { |
| 494 | + $class = $this->mPreprocessorClass; |
| 495 | + $this->mPreprocessor = new $class( $this ); |
| 496 | + } |
| 497 | + return $this->mPreprocessor; |
| 498 | + } |
| 499 | + |
| 500 | + /** |
485 | 501 | * Replaces all occurrences of HTML-style comments and the given tags |
486 | 502 | * in the text with a random marker and returns teh next text. The output |
487 | 503 | * parameter $matches will be an associative array filled with data in |
— | — | @@ -2596,528 +2612,7 @@ |
2597 | 2613 | * @private |
2598 | 2614 | */ |
2599 | 2615 | function preprocessToDom ( $text, $flags = 0 ) { |
2600 | | - wfProfileIn( __METHOD__ ); |
2601 | | - wfProfileIn( __METHOD__.'-makexml' ); |
2602 | | - |
2603 | | - $rules = array( |
2604 | | - '{' => array( |
2605 | | - 'end' => '}', |
2606 | | - 'names' => array( |
2607 | | - 2 => 'template', |
2608 | | - 3 => 'tplarg', |
2609 | | - ), |
2610 | | - 'min' => 2, |
2611 | | - 'max' => 3, |
2612 | | - ), |
2613 | | - '[' => array( |
2614 | | - 'end' => ']', |
2615 | | - 'names' => array( 2 => null ), |
2616 | | - 'min' => 2, |
2617 | | - 'max' => 2, |
2618 | | - ) |
2619 | | - ); |
2620 | | - |
2621 | | - $forInclusion = $flags & self::PTD_FOR_INCLUSION; |
2622 | | - |
2623 | | - $xmlishElements = $this->getStripList(); |
2624 | | - $enableOnlyinclude = false; |
2625 | | - if ( $forInclusion ) { |
2626 | | - $ignoredTags = array( 'includeonly', '/includeonly' ); |
2627 | | - $ignoredElements = array( 'noinclude' ); |
2628 | | - $xmlishElements[] = 'noinclude'; |
2629 | | - if ( strpos( $text, '<onlyinclude>' ) !== false && strpos( $text, '</onlyinclude>' ) !== false ) { |
2630 | | - $enableOnlyinclude = true; |
2631 | | - } |
2632 | | - } else { |
2633 | | - $ignoredTags = array( 'noinclude', '/noinclude', 'onlyinclude', '/onlyinclude' ); |
2634 | | - $ignoredElements = array( 'includeonly' ); |
2635 | | - $xmlishElements[] = 'includeonly'; |
2636 | | - } |
2637 | | - $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) ); |
2638 | | - |
2639 | | - // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset |
2640 | | - $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; |
2641 | | - |
2642 | | - $stack = new PPDStack; |
2643 | | - |
2644 | | - $searchBase = '[{<'; |
2645 | | - $revText = strrev( $text ); // For fast reverse searches |
2646 | | - |
2647 | | - $i = 0; # Input pointer, starts out pointing to a pseudo-newline before the start |
2648 | | - $accum =& $stack->getAccum(); # Current text accumulator |
2649 | | - $accum = '<root>'; |
2650 | | - $findEquals = false; # True to find equals signs in arguments |
2651 | | - $findPipe = false; # True to take notice of pipe characters |
2652 | | - $headingIndex = 1; |
2653 | | - $inHeading = false; # True if $i is inside a possible heading |
2654 | | - $noMoreGT = false; # True if there are no more greater-than (>) signs right of $i |
2655 | | - $findOnlyinclude = $enableOnlyinclude; # True to ignore all input up to the next <onlyinclude> |
2656 | | - $fakeLineStart = true; # Do a line-start run without outputting an LF character |
2657 | | - |
2658 | | - while ( true ) { |
2659 | | - if ( $findOnlyinclude ) { |
2660 | | - // Ignore all input up to the next <onlyinclude> |
2661 | | - $startPos = strpos( $text, '<onlyinclude>', $i ); |
2662 | | - if ( $startPos === false ) { |
2663 | | - // Ignored section runs to the end |
2664 | | - $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i ) ) . '</ignore>'; |
2665 | | - break; |
2666 | | - } |
2667 | | - $tagEndPos = $startPos + strlen( '<onlyinclude>' ); // past-the-end |
2668 | | - $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i, $tagEndPos - $i ) ) . '</ignore>'; |
2669 | | - $i = $tagEndPos; |
2670 | | - $findOnlyinclude = false; |
2671 | | - } |
2672 | | - |
2673 | | - if ( $fakeLineStart ) { |
2674 | | - $found = 'line-start'; |
2675 | | - $curChar = ''; |
2676 | | - } else { |
2677 | | - # Find next opening brace, closing brace or pipe |
2678 | | - $search = $searchBase; |
2679 | | - if ( $stack->top === false ) { |
2680 | | - $currentClosing = ''; |
2681 | | - } else { |
2682 | | - $currentClosing = $stack->top->close; |
2683 | | - $search .= $currentClosing; |
2684 | | - } |
2685 | | - if ( $findPipe ) { |
2686 | | - $search .= '|'; |
2687 | | - } |
2688 | | - if ( $findEquals ) { |
2689 | | - // First equals will be for the template |
2690 | | - $search .= '='; |
2691 | | - } else { |
2692 | | - // Look for headings |
2693 | | - // We can't look for headings when $findEquals is true, because the ambiguity |
2694 | | - // between template name/value separators and heading starts would be unresolved |
2695 | | - // until the closing double-brace is found. This would mean either infinite |
2696 | | - // backtrack, or creating and updating two separate tree structures until the |
2697 | | - // end of the ambiguity -- one tree structure assuming a heading, and the other |
2698 | | - // assuming a template argument. |
2699 | | - // |
2700 | | - // Easier to just break some section edit links. |
2701 | | - $search .= "\n"; |
2702 | | - } |
2703 | | - $rule = null; |
2704 | | - # Output literal section, advance input counter |
2705 | | - $literalLength = strcspn( $text, $search, $i ); |
2706 | | - if ( $literalLength > 0 ) { |
2707 | | - $accum .= htmlspecialchars( substr( $text, $i, $literalLength ) ); |
2708 | | - $i += $literalLength; |
2709 | | - } |
2710 | | - if ( $i >= strlen( $text ) ) { |
2711 | | - if ( $currentClosing == "\n" ) { |
2712 | | - // Do a past-the-end run to finish off the heading |
2713 | | - $curChar = ''; |
2714 | | - $found = 'line-end'; |
2715 | | - } else { |
2716 | | - # All done |
2717 | | - break; |
2718 | | - } |
2719 | | - } else { |
2720 | | - $curChar = $text[$i]; |
2721 | | - if ( $curChar == '|' ) { |
2722 | | - $found = 'pipe'; |
2723 | | - } elseif ( $curChar == '=' ) { |
2724 | | - $found = 'equals'; |
2725 | | - } elseif ( $curChar == '<' ) { |
2726 | | - $found = 'angle'; |
2727 | | - } elseif ( $curChar == "\n" ) { |
2728 | | - if ( $inHeading ) { |
2729 | | - $found = 'line-end'; |
2730 | | - } else { |
2731 | | - $found = 'line-start'; |
2732 | | - } |
2733 | | - } elseif ( $curChar == $currentClosing ) { |
2734 | | - $found = 'close'; |
2735 | | - } elseif ( isset( $rules[$curChar] ) ) { |
2736 | | - $found = 'open'; |
2737 | | - $rule = $rules[$curChar]; |
2738 | | - } else { |
2739 | | - # Some versions of PHP have a strcspn which stops on null characters |
2740 | | - # Ignore and continue |
2741 | | - ++$i; |
2742 | | - continue; |
2743 | | - } |
2744 | | - } |
2745 | | - } |
2746 | | - |
2747 | | - if ( $found == 'angle' ) { |
2748 | | - $matches = false; |
2749 | | - // Handle </onlyinclude> |
2750 | | - if ( $enableOnlyinclude && substr( $text, $i, strlen( '</onlyinclude>' ) ) == '</onlyinclude>' ) { |
2751 | | - $findOnlyinclude = true; |
2752 | | - continue; |
2753 | | - } |
2754 | | - |
2755 | | - // Determine element name |
2756 | | - if ( !preg_match( $elementsRegex, $text, $matches, 0, $i + 1 ) ) { |
2757 | | - // Element name missing or not listed |
2758 | | - $accum .= '<'; |
2759 | | - ++$i; |
2760 | | - continue; |
2761 | | - } |
2762 | | - // Handle comments |
2763 | | - if ( isset( $matches[2] ) && $matches[2] == '!--' ) { |
2764 | | - // To avoid leaving blank lines, when a comment is both preceded |
2765 | | - // and followed by a newline (ignoring spaces), trim leading and |
2766 | | - // trailing spaces and one of the newlines. |
2767 | | - |
2768 | | - // Find the end |
2769 | | - $endPos = strpos( $text, '-->', $i + 4 ); |
2770 | | - if ( $endPos === false ) { |
2771 | | - // Unclosed comment in input, runs to end |
2772 | | - $inner = substr( $text, $i ); |
2773 | | - $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>'; |
2774 | | - $i = strlen( $text ); |
2775 | | - } else { |
2776 | | - // Search backwards for leading whitespace |
2777 | | - $wsStart = $i ? ( $i - strspn( $revText, ' ', strlen( $text ) - $i ) ) : 0; |
2778 | | - // Search forwards for trailing whitespace |
2779 | | - // $wsEnd will be the position of the last space |
2780 | | - $wsEnd = $endPos + 2 + strspn( $text, ' ', $endPos + 3 ); |
2781 | | - // Eat the line if possible |
2782 | | - // TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at |
2783 | | - // the overall start. That's not how Sanitizer::removeHTMLcomments() does it, but |
2784 | | - // it's a possible beneficial b/c break. |
2785 | | - if ( $wsStart > 0 && substr( $text, $wsStart - 1, 1 ) == "\n" |
2786 | | - && substr( $text, $wsEnd + 1, 1 ) == "\n" ) |
2787 | | - { |
2788 | | - $startPos = $wsStart; |
2789 | | - $endPos = $wsEnd + 1; |
2790 | | - // Remove leading whitespace from the end of the accumulator |
2791 | | - // Sanity check first though |
2792 | | - $wsLength = $i - $wsStart; |
2793 | | - if ( $wsLength > 0 && substr( $accum, -$wsLength ) === str_repeat( ' ', $wsLength ) ) { |
2794 | | - $accum = substr( $accum, 0, -$wsLength ); |
2795 | | - } |
2796 | | - // Do a line-start run next time to look for headings after the comment, |
2797 | | - // but only if stack->top===false, because headings don't exist at deeper levels. |
2798 | | - if ( $stack->top === false ) { |
2799 | | - $fakeLineStart = true; |
2800 | | - } |
2801 | | - } else { |
2802 | | - // No line to eat, just take the comment itself |
2803 | | - $startPos = $i; |
2804 | | - $endPos += 2; |
2805 | | - } |
2806 | | - |
2807 | | - $i = $endPos + 1; |
2808 | | - $inner = substr( $text, $startPos, $endPos - $startPos + 1 ); |
2809 | | - $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>'; |
2810 | | - } |
2811 | | - continue; |
2812 | | - } |
2813 | | - $name = $matches[1]; |
2814 | | - $attrStart = $i + strlen( $name ) + 1; |
2815 | | - |
2816 | | - // Find end of tag |
2817 | | - $tagEndPos = $noMoreGT ? false : strpos( $text, '>', $attrStart ); |
2818 | | - if ( $tagEndPos === false ) { |
2819 | | - // Infinite backtrack |
2820 | | - // Disable tag search to prevent worst-case O(N^2) performance |
2821 | | - $noMoreGT = true; |
2822 | | - $accum .= '<'; |
2823 | | - ++$i; |
2824 | | - continue; |
2825 | | - } |
2826 | | - |
2827 | | - // Handle ignored tags |
2828 | | - if ( in_array( $name, $ignoredTags ) ) { |
2829 | | - $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i, $tagEndPos - $i + 1 ) ) . '</ignore>'; |
2830 | | - $i = $tagEndPos + 1; |
2831 | | - continue; |
2832 | | - } |
2833 | | - |
2834 | | - $tagStartPos = $i; |
2835 | | - if ( $text[$tagEndPos-1] == '/' ) { |
2836 | | - $attrEnd = $tagEndPos - 1; |
2837 | | - $inner = null; |
2838 | | - $i = $tagEndPos + 1; |
2839 | | - $close = ''; |
2840 | | - } else { |
2841 | | - $attrEnd = $tagEndPos; |
2842 | | - // Find closing tag |
2843 | | - if ( preg_match( "/<\/$name\s*>/i", $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) ) { |
2844 | | - $inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 ); |
2845 | | - $i = $matches[0][1] + strlen( $matches[0][0] ); |
2846 | | - $close = '<close>' . htmlspecialchars( $matches[0][0] ) . '</close>'; |
2847 | | - } else { |
2848 | | - // No end tag -- let it run out to the end of the text. |
2849 | | - $inner = substr( $text, $tagEndPos + 1 ); |
2850 | | - $i = strlen( $text ); |
2851 | | - $close = ''; |
2852 | | - } |
2853 | | - } |
2854 | | - // <includeonly> and <noinclude> just become <ignore> tags |
2855 | | - if ( in_array( $name, $ignoredElements ) ) { |
2856 | | - $accum .= '<ignore>' . htmlspecialchars( substr( $text, $tagStartPos, $i - $tagStartPos ) ) |
2857 | | - . '</ignore>'; |
2858 | | - continue; |
2859 | | - } |
2860 | | - |
2861 | | - $accum .= '<ext>'; |
2862 | | - if ( $attrEnd <= $attrStart ) { |
2863 | | - $attr = ''; |
2864 | | - } else { |
2865 | | - $attr = substr( $text, $attrStart, $attrEnd - $attrStart ); |
2866 | | - } |
2867 | | - $accum .= '<name>' . htmlspecialchars( $name ) . '</name>' . |
2868 | | - // Note that the attr element contains the whitespace between name and attribute, |
2869 | | - // this is necessary for precise reconstruction during pre-save transform. |
2870 | | - '<attr>' . htmlspecialchars( $attr ) . '</attr>'; |
2871 | | - if ( $inner !== null ) { |
2872 | | - $accum .= '<inner>' . htmlspecialchars( $inner ) . '</inner>'; |
2873 | | - } |
2874 | | - $accum .= $close . '</ext>'; |
2875 | | - } |
2876 | | - |
2877 | | - elseif ( $found == 'line-start' ) { |
2878 | | - // Is this the start of a heading? |
2879 | | - // Line break belongs before the heading element in any case |
2880 | | - if ( $fakeLineStart ) { |
2881 | | - $fakeLineStart = false; |
2882 | | - } else { |
2883 | | - $accum .= $curChar; |
2884 | | - $i++; |
2885 | | - } |
2886 | | - |
2887 | | - $count = strspn( $text, '=', $i, 6 ); |
2888 | | - if ( $count > 0 ) { |
2889 | | - $piece = array( |
2890 | | - 'open' => "\n", |
2891 | | - 'close' => "\n", |
2892 | | - 'parts' => array( str_repeat( '=', $count ) ), |
2893 | | - 'startPos' => $i, |
2894 | | - 'count' => $count ); |
2895 | | - $stack->push( $piece ); |
2896 | | - $accum =& $stack->getAccum(); |
2897 | | - extract( $stack->getFlags() ); |
2898 | | - $i += $count; |
2899 | | - } |
2900 | | - } |
2901 | | - |
2902 | | - elseif ( $found == 'line-end' ) { |
2903 | | - $piece = $stack->top; |
2904 | | - // A heading must be open, otherwise \n wouldn't have been in the search list |
2905 | | - assert( $piece->open == "\n" ); |
2906 | | - // Search back through the input to see if it has a proper close |
2907 | | - // Do this using the reversed string since the other solutions (end anchor, etc.) are inefficient |
2908 | | - $m = false; |
2909 | | - $count = $piece->count; |
2910 | | - if ( preg_match( "/\s*(=+)/A", $revText, $m, 0, strlen( $text ) - $i ) ) { |
2911 | | - if ( $i - strlen( $m[0] ) == $piece->startPos ) { |
2912 | | - // This is just a single string of equals signs on its own line |
2913 | | - // Replicate the doHeadings behaviour /={count}(.+)={count}/ |
2914 | | - // First find out how many equals signs there really are (don't stop at 6) |
2915 | | - $count = strlen( $m[1] ); |
2916 | | - if ( $count < 3 ) { |
2917 | | - $count = 0; |
2918 | | - } else { |
2919 | | - $count = min( 6, intval( ( $count - 1 ) / 2 ) ); |
2920 | | - } |
2921 | | - } else { |
2922 | | - $count = min( strlen( $m[1] ), $count ); |
2923 | | - } |
2924 | | - if ( $count > 0 ) { |
2925 | | - // Normal match, output <h> |
2926 | | - $element = "<h level=\"$count\" i=\"$headingIndex\">$accum</h>"; |
2927 | | - $headingIndex++; |
2928 | | - } else { |
2929 | | - // Single equals sign on its own line, count=0 |
2930 | | - $element = $accum; |
2931 | | - } |
2932 | | - } else { |
2933 | | - // No match, no <h>, just pass down the inner text |
2934 | | - $element = $accum; |
2935 | | - } |
2936 | | - // Unwind the stack |
2937 | | - $stack->pop(); |
2938 | | - $accum =& $stack->getAccum(); |
2939 | | - extract( $stack->getFlags() ); |
2940 | | - |
2941 | | - // Append the result to the enclosing accumulator |
2942 | | - $accum .= $element; |
2943 | | - // Note that we do NOT increment the input pointer. |
2944 | | - // This is because the closing linebreak could be the opening linebreak of |
2945 | | - // another heading. Infinite loops are avoided because the next iteration MUST |
2946 | | - // hit the heading open case above, which unconditionally increments the |
2947 | | - // input pointer. |
2948 | | - } |
2949 | | - |
2950 | | - elseif ( $found == 'open' ) { |
2951 | | - # count opening brace characters |
2952 | | - $count = strspn( $text, $curChar, $i ); |
2953 | | - |
2954 | | - # we need to add to stack only if opening brace count is enough for one of the rules |
2955 | | - if ( $count >= $rule['min'] ) { |
2956 | | - # Add it to the stack |
2957 | | - $piece = array( |
2958 | | - 'open' => $curChar, |
2959 | | - 'close' => $rule['end'], |
2960 | | - 'count' => $count, |
2961 | | - 'parts' => array( '' ), |
2962 | | - 'eqpos' => array(), |
2963 | | - 'lineStart' => ($i > 0 && $text[$i-1] == "\n"), |
2964 | | - ); |
2965 | | - |
2966 | | - $stack->push( $piece ); |
2967 | | - $accum =& $stack->getAccum(); |
2968 | | - extract( $stack->getFlags() ); |
2969 | | - } else { |
2970 | | - # Add literal brace(s) |
2971 | | - $accum .= htmlspecialchars( str_repeat( $curChar, $count ) ); |
2972 | | - } |
2973 | | - $i += $count; |
2974 | | - } |
2975 | | - |
2976 | | - elseif ( $found == 'close' ) { |
2977 | | - $piece = $stack->top; |
2978 | | - # lets check if there are enough characters for closing brace |
2979 | | - $maxCount = $piece->count; |
2980 | | - $count = strspn( $text, $curChar, $i, $maxCount ); |
2981 | | - |
2982 | | - # check for maximum matching characters (if there are 5 closing |
2983 | | - # characters, we will probably need only 3 - depending on the rules) |
2984 | | - $matchingCount = 0; |
2985 | | - $rule = $rules[$piece->open]; |
2986 | | - if ( $count > $rule['max'] ) { |
2987 | | - # The specified maximum exists in the callback array, unless the caller |
2988 | | - # has made an error |
2989 | | - $matchingCount = $rule['max']; |
2990 | | - } else { |
2991 | | - # Count is less than the maximum |
2992 | | - # Skip any gaps in the callback array to find the true largest match |
2993 | | - # Need to use array_key_exists not isset because the callback can be null |
2994 | | - $matchingCount = $count; |
2995 | | - while ( $matchingCount > 0 && !array_key_exists( $matchingCount, $rule['names'] ) ) { |
2996 | | - --$matchingCount; |
2997 | | - } |
2998 | | - } |
2999 | | - |
3000 | | - if ($matchingCount <= 0) { |
3001 | | - # No matching element found in callback array |
3002 | | - # Output a literal closing brace and continue |
3003 | | - $accum .= htmlspecialchars( str_repeat( $curChar, $count ) ); |
3004 | | - $i += $count; |
3005 | | - continue; |
3006 | | - } |
3007 | | - $name = $rule['names'][$matchingCount]; |
3008 | | - if ( $name === null ) { |
3009 | | - // No element, just literal text |
3010 | | - $element = str_repeat( $piece->open, $matchingCount ) . |
3011 | | - implode( '|', $piece->parts ) . |
3012 | | - str_repeat( $rule['end'], $matchingCount ); |
3013 | | - } else { |
3014 | | - # Create XML element |
3015 | | - # Note: $parts is already XML, does not need to be encoded further |
3016 | | - $parts = $piece->parts; |
3017 | | - $title = $parts[0]; |
3018 | | - unset( $parts[0] ); |
3019 | | - |
3020 | | - # The invocation is at the start of the line if lineStart is set in |
3021 | | - # the stack, and all opening brackets are used up. |
3022 | | - if ( $maxCount == $matchingCount && !empty( $piece->lineStart ) ) { |
3023 | | - $attr = ' lineStart="1"'; |
3024 | | - } else { |
3025 | | - $attr = ''; |
3026 | | - } |
3027 | | - |
3028 | | - $element = "<$name$attr>"; |
3029 | | - $element .= "<title>$title</title>"; |
3030 | | - $argIndex = 1; |
3031 | | - foreach ( $parts as $partIndex => $part ) { |
3032 | | - if ( isset( $piece->eqpos[$partIndex] ) ) { |
3033 | | - $eqpos = $piece->eqpos[$partIndex]; |
3034 | | - $argName = substr( $part, 0, $eqpos ); |
3035 | | - $argValue = substr( $part, $eqpos + 1 ); |
3036 | | - $element .= "<part><name>$argName</name>=<value>$argValue</value></part>"; |
3037 | | - } else { |
3038 | | - $element .= "<part><name index=\"$argIndex\" /><value>$part</value></part>"; |
3039 | | - $argIndex++; |
3040 | | - } |
3041 | | - } |
3042 | | - $element .= "</$name>"; |
3043 | | - } |
3044 | | - |
3045 | | - # Advance input pointer |
3046 | | - $i += $matchingCount; |
3047 | | - |
3048 | | - # Unwind the stack |
3049 | | - $stack->pop(); |
3050 | | - $accum =& $stack->getAccum(); |
3051 | | - |
3052 | | - # Re-add the old stack element if it still has unmatched opening characters remaining |
3053 | | - if ($matchingCount < $piece->count) { |
3054 | | - $piece->parts = array( '' ); |
3055 | | - $piece->count -= $matchingCount; |
3056 | | - $piece->eqpos = array(); |
3057 | | - # do we still qualify for any callback with remaining count? |
3058 | | - $names = $rules[$piece->open]['names']; |
3059 | | - $skippedBraces = 0; |
3060 | | - $enclosingAccum =& $accum; |
3061 | | - while ( $piece->count ) { |
3062 | | - if ( array_key_exists( $piece->count, $names ) ) { |
3063 | | - $stack->push( $piece ); |
3064 | | - $accum =& $stack->getAccum(); |
3065 | | - break; |
3066 | | - } |
3067 | | - --$piece->count; |
3068 | | - $skippedBraces ++; |
3069 | | - } |
3070 | | - $enclosingAccum .= str_repeat( $piece->open, $skippedBraces ); |
3071 | | - } |
3072 | | - |
3073 | | - extract( $stack->getFlags() ); |
3074 | | - |
3075 | | - # Add XML element to the enclosing accumulator |
3076 | | - $accum .= $element; |
3077 | | - } |
3078 | | - |
3079 | | - elseif ( $found == 'pipe' ) { |
3080 | | - $findEquals = true; // shortcut for getFlags() |
3081 | | - $stack->top->addPart(); |
3082 | | - $accum =& $stack->getAccum(); |
3083 | | - ++$i; |
3084 | | - } |
3085 | | - |
3086 | | - elseif ( $found == 'equals' ) { |
3087 | | - $findEquals = false; // shortcut for getFlags() |
3088 | | - $partsCount = count( $stack->top->parts ); |
3089 | | - $stack->top->eqpos[$partsCount - 1] = strlen( $accum ); |
3090 | | - $accum .= '='; |
3091 | | - ++$i; |
3092 | | - } |
3093 | | - } |
3094 | | - |
3095 | | - # Output any remaining unclosed brackets |
3096 | | - foreach ( $stack->stack as $piece ) { |
3097 | | - if ( $piece->open == "\n" ) { |
3098 | | - $stack->topAccum .= $piece->parts[0]; |
3099 | | - } else { |
3100 | | - $stack->topAccum .= str_repeat( $piece->open, $piece->count ) . implode( '|', $piece->parts ); |
3101 | | - } |
3102 | | - } |
3103 | | - $stack->topAccum .= '</root>'; |
3104 | | - $xml = $stack->topAccum; |
3105 | | - |
3106 | | - wfProfileOut( __METHOD__.'-makexml' ); |
3107 | | - wfProfileIn( __METHOD__.'-loadXML' ); |
3108 | | - $dom = new DOMDocument; |
3109 | | - wfSuppressWarnings(); |
3110 | | - $result = $dom->loadXML( $xml ); |
3111 | | - wfRestoreWarnings(); |
3112 | | - if ( !$result ) { |
3113 | | - // Try running the XML through UtfNormal to get rid of invalid characters |
3114 | | - $xml = UtfNormal::cleanUp( $xml ); |
3115 | | - $result = $dom->loadXML( $xml ); |
3116 | | - if ( !$result ) { |
3117 | | - throw new MWException( __METHOD__.' generated invalid XML' ); |
3118 | | - } |
3119 | | - } |
3120 | | - wfProfileOut( __METHOD__.'-loadXML' ); |
3121 | | - wfProfileOut( __METHOD__ ); |
| 2616 | + $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags ); |
3122 | 2617 | return $dom; |
3123 | 2618 | } |
3124 | 2619 | |
— | — | @@ -3162,7 +2657,7 @@ |
3163 | 2658 | wfProfileIn( $fname ); |
3164 | 2659 | |
3165 | 2660 | if ( $frame === false ) { |
3166 | | - $frame = new PPFrame( $this ); |
| 2661 | + $frame = $this->getPreprocessor()->newFrame(); |
3167 | 2662 | } elseif ( !( $frame instanceof PPFrame ) ) { |
3168 | 2663 | throw new MWException( __METHOD__ . ' called using the old argument format' ); |
3169 | 2664 | } |
— | — | @@ -3203,9 +2698,9 @@ |
3204 | 2699 | * replacing any variables or templates within the template. |
3205 | 2700 | * |
3206 | 2701 | * @param array $piece The parts of the template |
3207 | | - * $piece['text']: matched text |
3208 | 2702 | * $piece['title']: the title, i.e. the part before the | |
3209 | 2703 | * $piece['parts']: the parameter array |
| 2704 | + * $piece['lineStart']: whether the brace was at the start of a line |
3210 | 2705 | * @param PPFrame The current frame, contains template arguments |
3211 | 2706 | * @return string the text of the template |
3212 | 2707 | * @private |
— | — | @@ -3221,7 +2716,8 @@ |
3222 | 2717 | $nowiki = false; # wiki markup in $text should be escaped |
3223 | 2718 | $isHTML = false; # $text is HTML, armour it against wikitext transformation |
3224 | 2719 | $forceRawInterwiki = false; # Force interwiki transclusion to be done in raw mode not rendered |
3225 | | - $isDOM = false; # $text is a DOM node needing expansion |
| 2720 | + $isChildObj = false; # $text is a DOM node needing expansion in a child frame |
| 2721 | + $isLocalObj = false; # $text is a DOM node needing expansion in the current frame |
3226 | 2722 | |
3227 | 2723 | # Title object, where $text came from |
3228 | 2724 | $title = NULL; |
— | — | @@ -3248,13 +2744,14 @@ |
3249 | 2745 | # 1) Found SUBST but not in the PST phase |
3250 | 2746 | # 2) Didn't find SUBST and in the PST phase |
3251 | 2747 | # In either case, return without further processing |
3252 | | - $text = '{{' . $frame->implode( '|', $titleWithSpaces, $args ) . '}}'; |
| 2748 | + $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args ); |
| 2749 | + $isLocalObj = true; |
3253 | 2750 | $found = true; |
3254 | 2751 | } |
3255 | 2752 | } |
3256 | 2753 | |
3257 | 2754 | # Variables |
3258 | | - if ( !$found && $args->length == 0 ) { |
| 2755 | + if ( !$found && $args->getLength() == 0 ) { |
3259 | 2756 | $id = $this->mVariables->matchStartToEnd( $part1 ); |
3260 | 2757 | if ( $id !== false ) { |
3261 | 2758 | $text = $this->getVariableValue( $id ); |
— | — | @@ -3311,14 +2808,14 @@ |
3312 | 2809 | # Add a frame parameter, and pass the arguments as an array |
3313 | 2810 | $allArgs = $initialArgs; |
3314 | 2811 | $allArgs[] = $frame; |
3315 | | - foreach ( $args as $arg ) { |
3316 | | - $funcArgs[] = $arg; |
| 2812 | + for ( $i = 0; $i < $args->getLength(); $i++ ) { |
| 2813 | + $funcArgs[] = $args->item( $i ); |
3317 | 2814 | } |
3318 | 2815 | $allArgs[] = $funcArgs; |
3319 | 2816 | } else { |
3320 | 2817 | # Convert arguments to plain text |
3321 | | - foreach ( $args as $arg ) { |
3322 | | - $funcArgs[] = trim( $frame->expand( $arg ) ); |
| 2818 | + for ( $i = 0; $i < $args->getLength(); $i++ ) { |
| 2819 | + $funcArgs[] = trim( $frame->expand( $args->item( $i ) ) ); |
3323 | 2820 | } |
3324 | 2821 | $allArgs = array_merge( $initialArgs, $funcArgs ); |
3325 | 2822 | } |
— | — | @@ -3393,7 +2890,7 @@ |
3394 | 2891 | list( $text, $title ) = $this->getTemplateDom( $title ); |
3395 | 2892 | if ( $text !== false ) { |
3396 | 2893 | $found = true; |
3397 | | - $isDOM = true; |
| 2894 | + $isChildObj = true; |
3398 | 2895 | } |
3399 | 2896 | } |
3400 | 2897 | |
— | — | @@ -3411,7 +2908,7 @@ |
3412 | 2909 | $text = $this->interwikiTransclude( $title, 'raw' ); |
3413 | 2910 | // Preprocess it like a template |
3414 | 2911 | $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION ); |
3415 | | - $isDOM = true; |
| 2912 | + $isChildObj = true; |
3416 | 2913 | } |
3417 | 2914 | $found = true; |
3418 | 2915 | } |
— | — | @@ -3421,13 +2918,13 @@ |
3422 | 2919 | # If we haven't found text to substitute by now, we're done |
3423 | 2920 | # Recover the source wikitext and return it |
3424 | 2921 | if ( !$found ) { |
3425 | | - $text = '{{' . $frame->implode( '|', $titleWithSpaces, $args ) . '}}'; |
| 2922 | + $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args ); |
3426 | 2923 | wfProfileOut( $fname ); |
3427 | | - return $text; |
| 2924 | + return array( 'object' => $text ); |
3428 | 2925 | } |
3429 | 2926 | |
3430 | 2927 | # Expand DOM-style return values in a child frame |
3431 | | - if ( $isDOM ) { |
| 2928 | + if ( $isChildObj ) { |
3432 | 2929 | # Clean up argument array |
3433 | 2930 | $newFrame = $frame->newChild( $args, $title ); |
3434 | 2931 | |
— | — | @@ -3458,18 +2955,24 @@ |
3459 | 2956 | # Bug 529: if the template begins with a table or block-level |
3460 | 2957 | # element, it should be treated as beginning a new line. |
3461 | 2958 | # This behaviour is somewhat controversial. |
3462 | | - elseif ( !$piece['lineStart'] && preg_match('/^(?:{\\||:|;|#|\*)/', $text)) /*}*/{ |
| 2959 | + elseif ( is_string( $text ) && !$piece['lineStart'] && preg_match('/^(?:{\\||:|;|#|\*)/', $text)) /*}*/{ |
3463 | 2960 | $text = "\n" . $text; |
3464 | 2961 | } |
3465 | 2962 | |
3466 | | - if ( !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) { |
| 2963 | + if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) { |
3467 | 2964 | # Error, oversize inclusion |
3468 | 2965 | $text = "[[$originalTitle]]" . |
3469 | 2966 | $this->insertStripItem( '<!-- WARNING: template omitted, post-expand include size too large -->' ); |
3470 | 2967 | } |
3471 | 2968 | |
| 2969 | + if ( $isLocalObj ) { |
| 2970 | + $ret = array( 'object' => $text ); |
| 2971 | + } else { |
| 2972 | + $ret = array( 'text' => $text ); |
| 2973 | + } |
| 2974 | + |
3472 | 2975 | wfProfileOut( $fname ); |
3473 | | - return $text; |
| 2976 | + return $ret; |
3474 | 2977 | } |
3475 | 2978 | |
3476 | 2979 | /** |
— | — | @@ -3639,26 +3142,31 @@ |
3640 | 3143 | $parts = $piece['parts']; |
3641 | 3144 | $nameWithSpaces = $frame->expand( $piece['title'] ); |
3642 | 3145 | $argName = trim( $nameWithSpaces ); |
3643 | | - |
| 3146 | + $object = false; |
3644 | 3147 | $text = $frame->getArgument( $argName ); |
3645 | | - if ( $text === false && ( $this->ot['html'] || $this->ot['pre'] ) && $parts->length > 0 ) { |
| 3148 | + if ( $text === false && ( $this->ot['html'] || $this->ot['pre'] ) && $parts->getLength() > 0 ) { |
3646 | 3149 | # No match in frame, use the supplied default |
3647 | | - $text = $frame->expand( $parts->item( 0 ) ); |
| 3150 | + $object = $parts->item( 0 )->getChildren(); |
3648 | 3151 | } |
3649 | 3152 | if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) { |
3650 | 3153 | $error = '<!-- WARNING: argument omitted, expansion size too large -->'; |
3651 | 3154 | } |
3652 | 3155 | |
3653 | | - if ( $text === false ) { |
| 3156 | + if ( $text === false && $object === false ) { |
3654 | 3157 | # No match anywhere |
3655 | | - $text = '{{{' . $frame->implode( '|', $nameWithSpaces, $parts ) . '}}}'; |
| 3158 | + $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts ); |
3656 | 3159 | } |
3657 | 3160 | if ( $error !== false ) { |
3658 | 3161 | $text .= $error; |
3659 | 3162 | } |
| 3163 | + if ( $object !== false ) { |
| 3164 | + $ret = array( 'object' => $object ); |
| 3165 | + } else { |
| 3166 | + $ret = array( 'text' => $text ); |
| 3167 | + } |
3660 | 3168 | |
3661 | 3169 | wfProfileOut( __METHOD__ ); |
3662 | | - return $text; |
| 3170 | + return $ret; |
3663 | 3171 | } |
3664 | 3172 | |
3665 | 3173 | /** |
— | — | @@ -3666,8 +3174,8 @@ |
3667 | 3175 | * This is the ghost of strip(). |
3668 | 3176 | * |
3669 | 3177 | * @param array $params Associative array of parameters: |
3670 | | - * name DOMNode for the tag name |
3671 | | - * attr DOMNode for unparsed text where tag attributes are thought to be |
| 3178 | + * name PPNode for the tag name |
| 3179 | + * attr PPNode for unparsed text where tag attributes are thought to be |
3672 | 3180 | * attributes Optional associative array of parsed attributes |
3673 | 3181 | * inner Contents of extension element |
3674 | 3182 | * noClose Original text did not have a close tag |
— | — | @@ -4252,8 +3760,8 @@ |
4253 | 3761 | $text = preg_replace( $substRegex, $substText, $text ); |
4254 | 3762 | $text = $this->cleanSigInSig( $text ); |
4255 | 3763 | $dom = $this->preprocessToDom( $text ); |
4256 | | - $frame = new PPFrame( $this ); |
4257 | | - $text = $frame->expand( $dom->documentElement ); |
| 3764 | + $frame = $this->getPreprocessor()->newFrame(); |
| 3765 | + $text = $frame->expand( $dom ); |
4258 | 3766 | |
4259 | 3767 | if ( !$parsing ) { |
4260 | 3768 | $text = $this->mStripState->unstripBoth( $text ); |
— | — | @@ -5026,7 +4534,7 @@ |
5027 | 4535 | $this->setOutputType( OT_WIKI ); |
5028 | 4536 | $curIndex = 0; |
5029 | 4537 | $outText = ''; |
5030 | | - $frame = new PPFrame( $this ); |
| 4538 | + $frame = $this->getPreprocessor()->newFrame(); |
5031 | 4539 | |
5032 | 4540 | // Process section extraction flags |
5033 | 4541 | $flags = 0; |
— | — | @@ -5038,12 +4546,11 @@ |
5039 | 4547 | } |
5040 | 4548 | } |
5041 | 4549 | // Preprocess the text |
5042 | | - $dom = $this->preprocessToDom( $text, $flags ); |
5043 | | - $root = $dom->documentElement; |
| 4550 | + $root = $this->preprocessToDom( $text, $flags ); |
5044 | 4551 | |
5045 | 4552 | // <h> nodes indicate section breaks |
5046 | 4553 | // They can only occur at the top level, so we can find them by iterating the root's children |
5047 | | - $node = $root->firstChild; |
| 4554 | + $node = $root->getFirstChild(); |
5048 | 4555 | |
5049 | 4556 | // Find the target section |
5050 | 4557 | if ( $sectionIndex == 0 ) { |
— | — | @@ -5051,7 +4558,7 @@ |
5052 | 4559 | $targetLevel = 1000; |
5053 | 4560 | } else { |
5054 | 4561 | while ( $node ) { |
5055 | | - if ( $node->nodeName == 'h' ) { |
| 4562 | + if ( $node->getName() == 'h' ) { |
5056 | 4563 | if ( $curIndex + 1 == $sectionIndex ) { |
5057 | 4564 | break; |
5058 | 4565 | } |
— | — | @@ -5060,10 +4567,11 @@ |
5061 | 4568 | if ( $mode == 'replace' ) { |
5062 | 4569 | $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); |
5063 | 4570 | } |
5064 | | - $node = $node->nextSibling; |
| 4571 | + $node = $node->getNextSibling(); |
5065 | 4572 | } |
5066 | 4573 | if ( $node ) { |
5067 | | - $targetLevel = $node->getAttribute( 'level' ); |
| 4574 | + $bits = $node->splitHeading(); |
| 4575 | + $targetLevel = $bits['level']; |
5068 | 4576 | } |
5069 | 4577 | } |
5070 | 4578 | |
— | — | @@ -5078,9 +4586,10 @@ |
5079 | 4587 | |
5080 | 4588 | // Find the end of the section, including nested sections |
5081 | 4589 | do { |
5082 | | - if ( $node->nodeName == 'h' ) { |
| 4590 | + if ( $node->getName() == 'h' ) { |
5083 | 4591 | $curIndex++; |
5084 | | - $curLevel = $node->getAttribute( 'level' ); |
| 4592 | + $bits = $node->splitHeading(); |
| 4593 | + $curLevel = $bits['level']; |
5085 | 4594 | if ( $curIndex != $sectionIndex && $curLevel <= $targetLevel ) { |
5086 | 4595 | break; |
5087 | 4596 | } |
— | — | @@ -5088,7 +4597,7 @@ |
5089 | 4598 | if ( $mode == 'get' ) { |
5090 | 4599 | $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); |
5091 | 4600 | } |
5092 | | - $node = $node->nextSibling; |
| 4601 | + $node = $node->getNextSibling(); |
5093 | 4602 | } while ( $node ); |
5094 | 4603 | |
5095 | 4604 | // Write out the remainder (in replace mode only) |
— | — | @@ -5099,7 +4608,7 @@ |
5100 | 4609 | $outText .= $newText . "\n\n"; |
5101 | 4610 | while ( $node ) { |
5102 | 4611 | $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); |
5103 | | - $node = $node->nextSibling; |
| 4612 | + $node = $node->getNextSibling(); |
5104 | 4613 | } |
5105 | 4614 | } |
5106 | 4615 | |
— | — | @@ -5242,15 +4751,32 @@ |
5243 | 4752 | return $text; |
5244 | 4753 | } |
5245 | 4754 | |
| 4755 | + function srvus( $text ) { |
| 4756 | + return $this->testSrvus( $text, $this->mOutputType ); |
| 4757 | + } |
| 4758 | + |
5246 | 4759 | /** |
5247 | 4760 | * strip/replaceVariables/unstrip for preprocessor regression testing |
5248 | 4761 | */ |
5249 | | - function srvus( $text ) { |
| 4762 | + function testSrvus( $text, $title, $options, $outputType = OT_HTML ) { |
| 4763 | + $this->clearState(); |
| 4764 | + $this->mTitle = $title; |
| 4765 | + $this->mOptions = $options; |
| 4766 | + $this->setOutputType( $outputType ); |
5250 | 4767 | $text = $this->replaceVariables( $text ); |
5251 | 4768 | $text = $this->mStripState->unstripBoth( $text ); |
5252 | 4769 | $text = Sanitizer::removeHTMLtags( $text ); |
5253 | 4770 | return $text; |
5254 | 4771 | } |
| 4772 | + |
| 4773 | + function testPst( $text, $title, $options ) { |
| 4774 | + global $wgUser; |
| 4775 | + return $this->preSaveTransform( $text, $title, $wgUser, $options ); |
| 4776 | + } |
| 4777 | + |
| 4778 | + function testPreprocess( $text, $title, $options ) { |
| 4779 | + return $this->testSrvus( $text, $title, $options, OT_PREPROCESS ); |
| 4780 | + } |
5255 | 4781 | } |
5256 | 4782 | |
5257 | 4783 | /** |
— | — | @@ -5313,456 +4839,3 @@ |
5314 | 4840 | } |
5315 | 4841 | } |
5316 | 4842 | |
5317 | | -/** |
5318 | | - * An expansion frame, used as a context to expand the result of preprocessToDom() |
5319 | | - */ |
5320 | | -class PPFrame { |
5321 | | - var $parser, $title; |
5322 | | - var $titleCache; |
5323 | | - |
5324 | | - /** |
5325 | | - * Hashtable listing templates which are disallowed for expansion in this frame, |
5326 | | - * having been encountered previously in parent frames. |
5327 | | - */ |
5328 | | - var $loopCheckHash; |
5329 | | - |
5330 | | - /** |
5331 | | - * Recursion depth of this frame, top = 0 |
5332 | | - */ |
5333 | | - var $depth; |
5334 | | - |
5335 | | - const NO_ARGS = 1; |
5336 | | - const NO_TEMPLATES = 2; |
5337 | | - const STRIP_COMMENTS = 4; |
5338 | | - const NO_IGNORE = 8; |
5339 | | - |
5340 | | - const RECOVER_ORIG = 11; |
5341 | | - |
5342 | | - /** |
5343 | | - * Construct a new preprocessor frame. |
5344 | | - * @param Parser $parser The parent parser |
5345 | | - * @param Title $title The context title, or false if there isn't one |
5346 | | - */ |
5347 | | - function __construct( $parser ) { |
5348 | | - $this->parser = $parser; |
5349 | | - $this->title = $parser->mTitle; |
5350 | | - $this->titleCache = array( $this->title ? $this->title->getPrefixedDBkey() : false ); |
5351 | | - $this->loopCheckHash = array(); |
5352 | | - $this->depth = 0; |
5353 | | - } |
5354 | | - |
5355 | | - /** |
5356 | | - * Create a new child frame |
5357 | | - * $args is optionally a DOMNodeList containing the template arguments |
5358 | | - */ |
5359 | | - function newChild( $args = false, $title = false ) { |
5360 | | - $namedArgs = array(); |
5361 | | - $numberedArgs = array(); |
5362 | | - if ( $title === false ) { |
5363 | | - $title = $this->title; |
5364 | | - } |
5365 | | - if ( $args !== false ) { |
5366 | | - $xpath = false; |
5367 | | - foreach ( $args as $arg ) { |
5368 | | - if ( !$xpath ) { |
5369 | | - $xpath = new DOMXPath( $arg->ownerDocument ); |
5370 | | - } |
5371 | | - |
5372 | | - $nameNodes = $xpath->query( 'name', $arg ); |
5373 | | - $value = $xpath->query( 'value', $arg ); |
5374 | | - if ( $nameNodes->item( 0 )->hasAttributes() ) { |
5375 | | - // Numbered parameter |
5376 | | - $index = $nameNodes->item( 0 )->attributes->getNamedItem( 'index' )->textContent; |
5377 | | - $numberedArgs[$index] = $value->item( 0 ); |
5378 | | - unset( $namedArgs[$index] ); |
5379 | | - } else { |
5380 | | - // Named parameter |
5381 | | - $name = trim( $this->expand( $nameNodes->item( 0 ), PPFrame::STRIP_COMMENTS ) ); |
5382 | | - $namedArgs[$name] = $value->item( 0 ); |
5383 | | - unset( $numberedArgs[$name] ); |
5384 | | - } |
5385 | | - } |
5386 | | - } |
5387 | | - return new PPTemplateFrame( $this->parser, $this, $numberedArgs, $namedArgs, $title ); |
5388 | | - } |
5389 | | - |
5390 | | - /** |
5391 | | - * Expand a DOMNode describing a preprocessed document into plain wikitext, |
5392 | | - * using the current context |
5393 | | - * @param $root the node |
5394 | | - */ |
5395 | | - function expand( $root, $flags = 0 ) { |
5396 | | - if ( is_string( $root ) ) { |
5397 | | - return $root; |
5398 | | - } |
5399 | | - |
5400 | | - if ( ++$this->parser->mPPNodeCount > $this->parser->mOptions->mMaxPPNodeCount ) |
5401 | | - { |
5402 | | - return '<span class="error">Node-count limit exceeded</span>'; |
5403 | | - } |
5404 | | - |
5405 | | - if ( is_array( $root ) || $root instanceof DOMNodeList ) { |
5406 | | - $s = ''; |
5407 | | - foreach ( $root as $node ) { |
5408 | | - $s .= $this->expand( $node, $flags ); |
5409 | | - } |
5410 | | - } elseif ( $root instanceof DOMNode ) { |
5411 | | - if ( $root->nodeType == XML_TEXT_NODE ) { |
5412 | | - $s = $root->nodeValue; |
5413 | | - } elseif ( $root->nodeName == 'template' ) { |
5414 | | - # Double-brace expansion |
5415 | | - $xpath = new DOMXPath( $root->ownerDocument ); |
5416 | | - $titles = $xpath->query( 'title', $root ); |
5417 | | - $title = $titles->item( 0 ); |
5418 | | - $parts = $xpath->query( 'part', $root ); |
5419 | | - if ( $flags & self::NO_TEMPLATES ) { |
5420 | | - $s = '{{' . $this->implodeWithFlags( '|', $flags, $title, $parts ) . '}}'; |
5421 | | - } else { |
5422 | | - $lineStart = $root->getAttribute( 'lineStart' ); |
5423 | | - $params = array( |
5424 | | - 'title' => $title, |
5425 | | - 'parts' => $parts, |
5426 | | - 'lineStart' => $lineStart, |
5427 | | - 'text' => 'FIXME' ); |
5428 | | - $s = $this->parser->braceSubstitution( $params, $this ); |
5429 | | - } |
5430 | | - } elseif ( $root->nodeName == 'tplarg' ) { |
5431 | | - # Triple-brace expansion |
5432 | | - $xpath = new DOMXPath( $root->ownerDocument ); |
5433 | | - $titles = $xpath->query( 'title', $root ); |
5434 | | - $title = $titles->item( 0 ); |
5435 | | - $parts = $xpath->query( 'part', $root ); |
5436 | | - if ( $flags & self::NO_ARGS ) { |
5437 | | - $s = '{{{' . $this->implodeWithFlags( '|', $flags, $title, $parts ) . '}}}'; |
5438 | | - } else { |
5439 | | - $params = array( 'title' => $title, 'parts' => $parts, 'text' => 'FIXME' ); |
5440 | | - $s = $this->parser->argSubstitution( $params, $this ); |
5441 | | - } |
5442 | | - } elseif ( $root->nodeName == 'comment' ) { |
5443 | | - # HTML-style comment |
5444 | | - if ( $this->parser->ot['html'] |
5445 | | - || ( $this->parser->ot['pre'] && $this->parser->mOptions->getRemoveComments() ) |
5446 | | - || ( $flags & self::STRIP_COMMENTS ) ) |
5447 | | - { |
5448 | | - $s = ''; |
5449 | | - } else { |
5450 | | - $s = $root->textContent; |
5451 | | - } |
5452 | | - } elseif ( $root->nodeName == 'ignore' ) { |
5453 | | - # Output suppression used by <includeonly> etc. |
5454 | | - # OT_WIKI will only respect <ignore> in substed templates. |
5455 | | - # The other output types respect it unless NO_IGNORE is set. |
5456 | | - # extractSections() sets NO_IGNORE and so never respects it. |
5457 | | - if ( ( !isset( $this->parent ) && $this->parser->ot['wiki'] ) || ( $flags & self::NO_IGNORE ) ) { |
5458 | | - $s = $root->textContent; |
5459 | | - } else { |
5460 | | - $s = ''; |
5461 | | - } |
5462 | | - } elseif ( $root->nodeName == 'ext' ) { |
5463 | | - # Extension tag |
5464 | | - $xpath = new DOMXPath( $root->ownerDocument ); |
5465 | | - $names = $xpath->query( 'name', $root ); |
5466 | | - $attrs = $xpath->query( 'attr', $root ); |
5467 | | - $inners = $xpath->query( 'inner', $root ); |
5468 | | - $closes = $xpath->query( 'close', $root ); |
5469 | | - $params = array( |
5470 | | - 'name' => $names->item( 0 ), |
5471 | | - 'attr' => $attrs->length > 0 ? $attrs->item( 0 ) : null, |
5472 | | - 'inner' => $inners->length > 0 ? $inners->item( 0 ) : null, |
5473 | | - 'close' => $closes->length > 0 ? $closes->item( 0 ) : null, |
5474 | | - ); |
5475 | | - $s = $this->parser->extensionSubstitution( $params, $this ); |
5476 | | - } elseif ( $root->nodeName == 'h' ) { |
5477 | | - # Heading |
5478 | | - $s = $this->expand( $root->childNodes, $flags ); |
5479 | | - |
5480 | | - if ( $this->parser->ot['html'] ) { |
5481 | | - # Insert heading index marker |
5482 | | - $headingIndex = $root->getAttribute( 'i' ); |
5483 | | - $titleText = $this->title->getPrefixedDBkey(); |
5484 | | - $this->parser->mHeadings[] = array( $titleText, $headingIndex ); |
5485 | | - $serial = count( $this->parser->mHeadings ) - 1; |
5486 | | - $marker = "{$this->parser->mUniqPrefix}-h-$serial-{$this->parser->mMarkerSuffix}"; |
5487 | | - $count = $root->getAttribute( 'level' ); |
5488 | | - $s = substr( $s, 0, $count ) . $marker . substr( $s, $count ); |
5489 | | - $this->parser->mStripState->general->setPair( $marker, '' ); |
5490 | | - } |
5491 | | - } else { |
5492 | | - # Generic recursive expansion |
5493 | | - $s = ''; |
5494 | | - for ( $node = $root->firstChild; $node; $node = $node->nextSibling ) { |
5495 | | - if ( $node->nodeType == XML_TEXT_NODE ) { |
5496 | | - $s .= $node->nodeValue; |
5497 | | - } elseif ( $node->nodeType == XML_ELEMENT_NODE ) { |
5498 | | - $s .= $this->expand( $node, $flags ); |
5499 | | - } |
5500 | | - } |
5501 | | - } |
5502 | | - } else { |
5503 | | - throw new MWException( __METHOD__.': Invalid parameter type' ); |
5504 | | - } |
5505 | | - return $s; |
5506 | | - } |
5507 | | - |
5508 | | - function implodeWithFlags( $sep, $flags /*, ... */ ) { |
5509 | | - $args = array_slice( func_get_args(), 2 ); |
5510 | | - |
5511 | | - $first = true; |
5512 | | - $s = ''; |
5513 | | - foreach ( $args as $root ) { |
5514 | | - if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) { |
5515 | | - $root = array( $root ); |
5516 | | - } |
5517 | | - foreach ( $root as $node ) { |
5518 | | - if ( $first ) { |
5519 | | - $first = false; |
5520 | | - } else { |
5521 | | - $s .= $sep; |
5522 | | - } |
5523 | | - $s .= $this->expand( $node, $flags ); |
5524 | | - } |
5525 | | - } |
5526 | | - return $s; |
5527 | | - } |
5528 | | - |
5529 | | - function implode( $sep /*, ... */ ) { |
5530 | | - $args = func_get_args(); |
5531 | | - $args = array_merge( array_slice( $args, 0, 1 ), array( 0 ), array_slice( $args, 1 ) ); |
5532 | | - return call_user_func_array( array( $this, 'implodeWithFlags' ), $args ); |
5533 | | - } |
5534 | | - |
5535 | | - /** |
5536 | | - * Split an <arg> or <template> node into a three-element array: |
5537 | | - * DOMNode name, string index and DOMNode value |
5538 | | - */ |
5539 | | - function splitBraceNode( $node ) { |
5540 | | - $xpath = new DOMXPath( $node->ownerDocument ); |
5541 | | - $names = $xpath->query( 'name', $node ); |
5542 | | - $values = $xpath->query( 'value', $node ); |
5543 | | - if ( !$names->length || !$values->length ) { |
5544 | | - throw new MWException( 'Invalid brace node passed to ' . __METHOD__ ); |
5545 | | - } |
5546 | | - $name = $names->item( 0 ); |
5547 | | - $index = $name->getAttribute( 'index' ); |
5548 | | - return array( $name, $index, $values->item( 0 ) ); |
5549 | | - } |
5550 | | - |
5551 | | - /** |
5552 | | - * Split an <ext> node into an associative array containing name, attr, inner and close |
5553 | | - * All values in the resulting array are DOMNodes. Inner and close are optional. |
5554 | | - */ |
5555 | | - function splitExtNode( $node ) { |
5556 | | - $xpath = new DOMXPath( $node->ownerDocument ); |
5557 | | - $names = $xpath->query( 'name', $node ); |
5558 | | - $attrs = $xpath->query( 'attr', $node ); |
5559 | | - $inners = $xpath->query( 'inner', $node ); |
5560 | | - $closes = $xpath->query( 'close', $node ); |
5561 | | - if ( !$names->length || !$attrs->length ) { |
5562 | | - throw new MWException( 'Invalid ext node passed to ' . __METHOD__ ); |
5563 | | - } |
5564 | | - $parts = array( |
5565 | | - 'name' => $names->item( 0 ), |
5566 | | - 'attr' => $attrs->item( 0 ) ); |
5567 | | - if ( $inners->length ) { |
5568 | | - $parts['inner'] = $inners->item( 0 ); |
5569 | | - } |
5570 | | - if ( $closes->length ) { |
5571 | | - $parts['close'] = $closes->item( 0 ); |
5572 | | - } |
5573 | | - return $parts; |
5574 | | - } |
5575 | | - |
5576 | | - function __toString() { |
5577 | | - return 'frame{}'; |
5578 | | - } |
5579 | | - |
5580 | | - function getPDBK( $level = false ) { |
5581 | | - if ( $level === false ) { |
5582 | | - return $this->title->getPrefixedDBkey(); |
5583 | | - } else { |
5584 | | - return isset( $this->titleCache[$level] ) ? $this->titleCache[$level] : false; |
5585 | | - } |
5586 | | - } |
5587 | | - |
5588 | | - /** |
5589 | | - * Returns true if there are no arguments in this frame |
5590 | | - */ |
5591 | | - function isEmpty() { |
5592 | | - return true; |
5593 | | - } |
5594 | | - |
5595 | | - function getArgument( $name ) { |
5596 | | - return false; |
5597 | | - } |
5598 | | - |
5599 | | - /** |
5600 | | - * Returns true if the infinite loop check is OK, false if a loop is detected |
5601 | | - */ |
5602 | | - function loopCheck( $title ) { |
5603 | | - return !isset( $this->loopCheckHash[$title->getPrefixedDBkey()] ); |
5604 | | - } |
5605 | | -} |
5606 | | - |
5607 | | -/** |
5608 | | - * Expansion frame with template arguments |
5609 | | - */ |
5610 | | -class PPTemplateFrame extends PPFrame { |
5611 | | - var $numberedArgs, $namedArgs, $parent; |
5612 | | - var $numberedExpansionCache, $namedExpansionCache; |
5613 | | - |
5614 | | - function __construct( $parser, $parent = false, $numberedArgs = array(), $namedArgs = array(), $title = false ) { |
5615 | | - $this->parser = $parser; |
5616 | | - $this->parent = $parent; |
5617 | | - $this->numberedArgs = $numberedArgs; |
5618 | | - $this->namedArgs = $namedArgs; |
5619 | | - $this->title = $title; |
5620 | | - $pdbk = $title ? $title->getPrefixedDBkey() : false; |
5621 | | - $this->titleCache = $parent->titleCache; |
5622 | | - $this->titleCache[] = $pdbk; |
5623 | | - $this->loopCheckHash = /*clone*/ $parent->loopCheckHash; |
5624 | | - if ( $pdbk !== false ) { |
5625 | | - $this->loopCheckHash[$pdbk] = true; |
5626 | | - } |
5627 | | - $this->depth = $parent->depth + 1; |
5628 | | - $this->numberedExpansionCache = $this->namedExpansionCache = array(); |
5629 | | - } |
5630 | | - |
5631 | | - function __toString() { |
5632 | | - $s = 'tplframe{'; |
5633 | | - $first = true; |
5634 | | - $args = $this->numberedArgs + $this->namedArgs; |
5635 | | - foreach ( $args as $name => $value ) { |
5636 | | - if ( $first ) { |
5637 | | - $first = false; |
5638 | | - } else { |
5639 | | - $s .= ', '; |
5640 | | - } |
5641 | | - $s .= "\"$name\":\"" . |
5642 | | - str_replace( '"', '\\"', $value->ownerDocument->saveXML( $value ) ) . '"'; |
5643 | | - } |
5644 | | - $s .= '}'; |
5645 | | - return $s; |
5646 | | - } |
5647 | | - /** |
5648 | | - * Returns true if there are no arguments in this frame |
5649 | | - */ |
5650 | | - function isEmpty() { |
5651 | | - return !count( $this->numberedArgs ) && !count( $this->namedArgs ); |
5652 | | - } |
5653 | | - |
5654 | | - function getNumberedArgument( $index ) { |
5655 | | - if ( !isset( $this->numberedArgs[$index] ) ) { |
5656 | | - return false; |
5657 | | - } |
5658 | | - if ( !isset( $this->numberedExpansionCache[$index] ) ) { |
5659 | | - # No trimming for unnamed arguments |
5660 | | - $this->numberedExpansionCache[$index] = $this->parent->expand( $this->numberedArgs[$index], self::STRIP_COMMENTS ); |
5661 | | - } |
5662 | | - return $this->numberedExpansionCache[$index]; |
5663 | | - } |
5664 | | - |
5665 | | - function getNamedArgument( $name ) { |
5666 | | - if ( !isset( $this->namedArgs[$name] ) ) { |
5667 | | - return false; |
5668 | | - } |
5669 | | - if ( !isset( $this->namedExpansionCache[$name] ) ) { |
5670 | | - # Trim named arguments post-expand, for backwards compatibility |
5671 | | - $this->namedExpansionCache[$name] = trim( |
5672 | | - $this->parent->expand( $this->namedArgs[$name], self::STRIP_COMMENTS ) ); |
5673 | | - } |
5674 | | - return $this->namedExpansionCache[$name]; |
5675 | | - } |
5676 | | - |
5677 | | - function getArgument( $name ) { |
5678 | | - $text = $this->getNumberedArgument( $name ); |
5679 | | - if ( $text === false ) { |
5680 | | - $text = $this->getNamedArgument( $name ); |
5681 | | - } |
5682 | | - return $text; |
5683 | | - } |
5684 | | -} |
5685 | | - |
5686 | | -/** |
5687 | | - * Stack class to help Parser::preprocessToDom() |
5688 | | - */ |
5689 | | -class PPDStack { |
5690 | | - var $stack, $topAccum, $top; |
5691 | | - |
5692 | | - function __construct() { |
5693 | | - $this->stack = array(); |
5694 | | - $this->topAccum = ''; |
5695 | | - $this->top = false; |
5696 | | - } |
5697 | | - |
5698 | | - function &getAccum() { |
5699 | | - if ( count( $this->stack ) ) { |
5700 | | - return $this->top->getAccum(); |
5701 | | - } else { |
5702 | | - return $this->topAccum; |
5703 | | - } |
5704 | | - } |
5705 | | - |
5706 | | - function push( $data ) { |
5707 | | - if ( $data instanceof PPDStackElement ) { |
5708 | | - $this->stack[] = $data; |
5709 | | - } else { |
5710 | | - $this->stack[] = new PPDStackElement( $data ); |
5711 | | - } |
5712 | | - $this->top =& $this->stack[ count( $this->stack ) - 1 ]; |
5713 | | - } |
5714 | | - |
5715 | | - function pop() { |
5716 | | - if ( !count( $this->stack ) ) { |
5717 | | - throw new MWException( __METHOD__.': no elements remaining' ); |
5718 | | - } |
5719 | | - $temp = array_pop( $this->stack ); |
5720 | | - if ( count( $this->stack ) ) { |
5721 | | - $this->top =& $this->stack[ count( $this->stack ) - 1 ]; |
5722 | | - } else { |
5723 | | - $this->top = false; |
5724 | | - } |
5725 | | - } |
5726 | | - |
5727 | | - function getFlags() { |
5728 | | - if ( !count( $this->stack ) ) { |
5729 | | - return array( |
5730 | | - 'findEquals' => false, |
5731 | | - 'findPipe' => false, |
5732 | | - 'inHeading' => false, |
5733 | | - ); |
5734 | | - } else { |
5735 | | - return $this->top->getFlags(); |
5736 | | - } |
5737 | | - } |
5738 | | -} |
5739 | | - |
5740 | | -class PPDStackElement { |
5741 | | - var $open, $close, $count, $parts, $eqpos, $lineStart; |
5742 | | - |
5743 | | - function __construct( $data = array() ) { |
5744 | | - $this->parts = array( '' ); |
5745 | | - $this->eqpos = array(); |
5746 | | - |
5747 | | - foreach ( $data as $name => $value ) { |
5748 | | - $this->$name = $value; |
5749 | | - } |
5750 | | - } |
5751 | | - |
5752 | | - function &getAccum() { |
5753 | | - return $this->parts[count($this->parts) - 1]; |
5754 | | - } |
5755 | | - |
5756 | | - function addPart( $s = '' ) { |
5757 | | - $this->parts[] = $s; |
5758 | | - } |
5759 | | - |
5760 | | - function getFlags() { |
5761 | | - $partCount = count( $this->parts ); |
5762 | | - $findPipe = $this->open != "\n" && $this->open != '['; |
5763 | | - return array( |
5764 | | - 'findPipe' => $findPipe, |
5765 | | - 'findEquals' => $findPipe && $partCount > 1 && !isset( $this->eqpos[$partCount - 1] ), |
5766 | | - 'inHeading' => $this->open == "\n", |
5767 | | - ); |
5768 | | - } |
5769 | | -} |
Index: trunk/phase3/includes/Preprocessor.php |
— | — | @@ -0,0 +1,74 @@ |
| 2 | +<?php |
| 3 | + |
| 4 | +interface Preprocessor { |
| 5 | + function __construct( $parser ); |
| 6 | + function newFrame(); |
| 7 | + function preprocessToObj( $text, $flags = 0 ); |
| 8 | +} |
| 9 | + |
| 10 | +interface PPFrame { |
| 11 | + const NO_ARGS = 1; |
| 12 | + const NO_TEMPLATES = 2; |
| 13 | + const STRIP_COMMENTS = 4; |
| 14 | + const NO_IGNORE = 8; |
| 15 | + |
| 16 | + const RECOVER_ORIG = 11; |
| 17 | + |
| 18 | + /** |
| 19 | + * Create a child frame |
| 20 | + */ |
| 21 | + function newChild( $args = false, $title = false ); |
| 22 | + |
| 23 | + /** |
| 24 | + * Expand a document tree node |
| 25 | + */ |
| 26 | + function expand( $root, $flags = 0 ); |
| 27 | + |
| 28 | + /** |
| 29 | + * Implode with flags for expand() |
| 30 | + */ |
| 31 | + function implodeWithFlags( $sep, $flags /*, ... */ ); |
| 32 | + |
| 33 | + /** |
| 34 | + * Implode with no flags specified |
| 35 | + */ |
| 36 | + function implode( $sep /*, ... */ ); |
| 37 | + |
| 38 | + /** |
| 39 | + * Makes an object that, when expand()ed, will be the same as one obtained |
| 40 | + * with implode() |
| 41 | + */ |
| 42 | + function virtualImplode( $sep /*, ... */ ); |
| 43 | + |
| 44 | + /** |
| 45 | + * Virtual implode with brackets |
| 46 | + */ |
| 47 | + function virtualBracketedImplode( $start, $sep, $end /*, ... */ ); |
| 48 | + |
| 49 | + /** |
| 50 | + * Returns true if there are no arguments in this frame |
| 51 | + */ |
| 52 | + function isEmpty(); |
| 53 | + |
| 54 | + function getArgument( $name ); |
| 55 | + |
| 56 | + /** |
| 57 | + * Returns true if the infinite loop check is OK, false if a loop is detected |
| 58 | + */ |
| 59 | + function loopCheck( $title ); |
| 60 | +} |
| 61 | + |
| 62 | +interface PPNode { |
| 63 | + function getChildren(); |
| 64 | + function getFirstChild(); |
| 65 | + function getNextSibling(); |
| 66 | + function getChildrenOfType( $type ); |
| 67 | + function getLength(); |
| 68 | + function item( $i ); |
| 69 | + function getName(); |
| 70 | + |
| 71 | + function splitArg(); |
| 72 | + function splitExt(); |
| 73 | + function splitHeading(); |
| 74 | +} |
| 75 | + |
Property changes on: trunk/phase3/includes/Preprocessor.php |
___________________________________________________________________ |
Name: svn:eol-style |
1 | 76 | + native |
Index: trunk/phase3/includes/AutoLoader.php |
— | — | @@ -143,6 +143,15 @@ |
144 | 144 | 'ParserOutput' => 'includes/ParserOutput.php', |
145 | 145 | 'ParserOptions' => 'includes/ParserOptions.php', |
146 | 146 | 'PatrolLog' => 'includes/PatrolLog.php', |
| 147 | + 'Preprocessor' => 'includes/Preprocessor.php', |
| 148 | + 'PPFrame' => 'includes/Preprocessor.php', |
| 149 | + 'PPNode' => 'includes/Preprocessor.php', |
| 150 | + 'Preprocessor_DOM' => 'includes/Preprocessor_DOM.php', |
| 151 | + 'PPFrame_DOM' => 'includes/Preprocessor_DOM.php', |
| 152 | + 'PPTemplateFrame_DOM' => 'includes/Preprocessor_DOM.php', |
| 153 | + 'PPDStack' => 'includes/Preprocessor_DOM.php', |
| 154 | + 'PPDStackElement' => 'includes/Preprocessor_DOM.php', |
| 155 | + 'PPNode_DOM' => 'includes/Preprocessor_DOM.php', |
147 | 156 | 'ProfilerSimple' => 'includes/ProfilerSimple.php', |
148 | 157 | 'ProfilerSimpleUDP' => 'includes/ProfilerSimpleUDP.php', |
149 | 158 | 'Profiler' => 'includes/Profiler.php', |
Index: trunk/extensions/ParserFunctions/ParserFunctions.php |
— | — | @@ -206,7 +206,11 @@ |
207 | 207 | $lastItemHadNoEquals = false; |
208 | 208 | $mwDefault =& MagicWord::get( 'default' ); |
209 | 209 | foreach ( $args as $arg ) { |
210 | | - list( $nameNode, $index, $valueNode ) = $frame->splitBraceNode( $arg ); |
| 210 | + $bits = $arg->splitArg(); |
| 211 | + $nameNode = $bits['name']; |
| 212 | + $index = $bits['index']; |
| 213 | + $valueNode = $bits['value']; |
| 214 | + |
211 | 215 | if ( $index === '' ) { |
212 | 216 | # Found "=" |
213 | 217 | $lastItemHadNoEquals = false; |
Index: trunk/extensions/LabeledSectionTransclusion/lst.php |
— | — | @@ -339,14 +339,13 @@ |
340 | 340 | "<!-- WARNING: LST loop detected -->"; |
341 | 341 | } |
342 | 342 | |
343 | | - list( $dom, $finalTitle ) = $parser->getTemplateDom( $title ); |
| 343 | + list( $root, $finalTitle ) = $parser->getTemplateDom( $title ); |
344 | 344 | |
345 | 345 | // if article doesn't exist, return a red link. |
346 | | - if ($dom === false) { |
| 346 | + if ($root === false) { |
347 | 347 | return "[[" . $title->getPrefixedText() . "]]"; |
348 | 348 | } |
349 | 349 | |
350 | | - $root = $dom->documentElement; |
351 | 350 | $newFrame = $frame->newChild( false, $finalTitle ); |
352 | 351 | if ( !count( $args ) ) { |
353 | 352 | return $newFrame->expand( $root ); |
— | — | @@ -408,15 +407,15 @@ |
409 | 408 | extract( $setup ); |
410 | 409 | |
411 | 410 | $text = ''; |
412 | | - $node = $root->firstChild; |
| 411 | + $node = $root->getFirstChild(); |
413 | 412 | while ( $node ) { |
414 | 413 | // Find the begin node |
415 | 414 | $found = false; |
416 | | - for ( ; $node; $node = $node->nextSibling ) { |
417 | | - if ( $node->nodeName != 'ext' ) { |
| 415 | + for ( ; $node; $node = $node->getNextSibling() ) { |
| 416 | + if ( $node->getName() != 'ext' ) { |
418 | 417 | continue; |
419 | 418 | } |
420 | | - $parts = $newFrame->splitExtNode( $node ); |
| 419 | + $parts = $node->splitExt(); |
421 | 420 | $parts = array_map( array( $newFrame, 'expand' ), $parts ); |
422 | 421 | if ( self::isSection( $parts['name'] ) ) { |
423 | 422 | if ( preg_match( $beginRegex, $parts['attr'] ) ) { |
— | — | @@ -431,9 +430,9 @@ |
432 | 431 | |
433 | 432 | // Write the text out while looking for the end node |
434 | 433 | $found = false; |
435 | | - for ( ; $node; $node = $node->nextSibling ) { |
436 | | - if ( $node->nodeName === 'ext' ) { |
437 | | - $parts = $newFrame->splitExtNode( $node ); |
| 434 | + for ( ; $node; $node = $node->getNextSibling() ) { |
| 435 | + if ( $node->getName() === 'ext' ) { |
| 436 | + $parts = $node->splitExt(); |
438 | 437 | $parts = array_map( array( $newFrame, 'expand' ), $parts ); |
439 | 438 | if ( self::isSection( $parts['name'] ) ) { |
440 | 439 | if ( preg_match( $endRegex, $parts['attr'] ) ) { |
— | — | @@ -451,7 +450,7 @@ |
452 | 451 | if ( !$found ) { |
453 | 452 | break; |
454 | 453 | } |
455 | | - $node = $node->nextSibling; |
| 454 | + $node = $node->getNextSibling(); |
456 | 455 | } |
457 | 456 | return $text; |
458 | 457 | } |
— | — | @@ -487,12 +486,12 @@ |
488 | 487 | extract( $setup ); |
489 | 488 | |
490 | 489 | $text = ''; |
491 | | - for ( $node = $root->firstChild; $node; $node = $node ? $node->nextSibling : false ) { |
| 490 | + for ( $node = $root->getFirstChild(); $node; $node = $node ? $node->getNextSibling() : false ) { |
492 | 491 | // Search for the start tag |
493 | 492 | $found = false; |
494 | | - for ( ; $node; $node = $node->nextSibling ) { |
495 | | - if ( $node->nodeName == 'ext' ) { |
496 | | - $parts = $newFrame->splitExtNode( $node ); |
| 493 | + for ( ; $node; $node = $node->getNextSibling() ) { |
| 494 | + if ( $node->getName() == 'ext' ) { |
| 495 | + $parts = $node->splitExt(); |
497 | 496 | $parts = array_map( array( $newFrame, 'expand' ), $parts ); |
498 | 497 | if ( self::isSection( $parts['name'] ) ) { |
499 | 498 | if ( preg_match( $beginRegex, $parts['attr'] ) ) { |
— | — | @@ -516,9 +515,9 @@ |
517 | 516 | $text .= $repl; |
518 | 517 | |
519 | 518 | // Search for the end tag |
520 | | - for ( ; $node; $node = $node->nextSibling ) { |
521 | | - if ( $node->nodeName == 'ext' ) { |
522 | | - $parts = $newFrame->splitExtNode( $node ); |
| 519 | + for ( ; $node; $node = $node->getNextSibling() ) { |
| 520 | + if ( $node->getName() == 'ext' ) { |
| 521 | + $parts = $node->splitExt( $node ); |
523 | 522 | $parts = array_map( array( $newFrame, 'expand' ), $parts ); |
524 | 523 | if ( self::isSection( $parts['name'] ) ) { |
525 | 524 | if ( preg_match( $endRegex, $parts['attr'] ) ) { |