r30022 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r30021‎ | r30022 | r30023 >
Date:16:36, 21 January 2008
Author:tstarling
Status:old (Comments)
Tags:
Comment:
Postcard from linuxland.
* Reduced stack depth by using an internal stack in expand(), and by having some common code paths (e.g. non-subst double-brace during PST) return objects which can be expanded in that internal stack instead of the PHP stack. This is friendly to xdebug but slightly slower than the original version. Also it probably helps robustness when you don't add 7 stack levels per pair of double braces.
* Profiling indicates that expand and PPD are now good targets for porting to C. Abstracted and refactored the relevant code to allow for a drop-in replacement. A factor of 2 reduction in average-case replaceVariables() time may be possible.
* Verified with preprocessorFuzzTest.php against r29950, updated to allow better PST tests.
* Made parserTests.php respect $wgParserConf
* LST and ParserFunctions need a simultaneous update with the core due to changed interfaces. DOM objects are now wrapped rather than directly exposed.
Modified paths:
  • /trunk/extensions/LabeledSectionTransclusion/lst.php (modified) (history)
  • /trunk/extensions/ParserFunctions/ParserFunctions.php (modified) (history)
  • /trunk/phase3/includes/AutoLoader.php (modified) (history)
  • /trunk/phase3/includes/Parser.php (modified) (history)
  • /trunk/phase3/includes/Preprocessor.php (added) (history)
  • /trunk/phase3/includes/Preprocessor_DOM.php (added) (history)
  • /trunk/phase3/maintenance/parserTests.inc (modified) (history)
  • /trunk/phase3/maintenance/preprocessorFuzzTest.php (modified) (history)

Diff [purge]

Index: trunk/phase3/maintenance/parserTests.inc
@@ -257,6 +257,7 @@
258258 * @return bool
259259 */
260260 private function runTest( $desc, $input, $result, $opts ) {
 261+ global $wgParserConf;
261262 if( $this->showProgress ) {
262263 $this->showTesting( $desc );
263264 }
@@ -281,7 +282,8 @@
282283
283284 $noxml = (bool)preg_match( '~\\b noxml \\b~x', $opts );
284285
285 - $parser = new Parser;
 286+ $class = $wgParserConf['class'];
 287+ $parser = new $class( $wgParserConf );
286288 foreach( $this->hooks as $tag => $callback ) {
287289 $parser->setHook( $tag, $callback );
288290 }
Index: trunk/phase3/maintenance/preprocessorFuzzTest.php
@@ -8,17 +8,21 @@
99 var $hairs = array(
1010 '[[', ']]', '{{', '}}', '{{{', '}}}',
1111 '<', '>', '<nowiki', '<gallery', '</nowiki>', '</gallery>', '<nOwIkI>', '</NoWiKi>',
12 - //'<!--' , '-->',
13 - //'<ref>', '</ref>', '<references/>',
 12+ '<!--' , '-->',
1413 "\n==", "==\n",
1514 '|', '=', "\n", ' ', "\t", "\x7f",
 15+ '~~', '~~~', '~~~~', 'subst:',
1616 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
1717 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
 18+
 19+ // extensions
 20+ //'<ref>', '</ref>', '<references/>',
1821 );
1922 var $minLength = 0;
2023 var $maxLength = 20;
2124 var $maxTemplates = 5;
22 - var $outputTypes = array( 'OT_HTML', 'OT_WIKI', 'OT_MSG', 'OT_PREPROCESS' );
 25+ //var $outputTypes = array( 'OT_HTML', 'OT_WIKI', 'OT_PREPROCESS' );
 26+ var $entryPoints = array( 'testSrvus', 'testPst', 'testPreprocess' );
2327 static $currentTest = false;
2428
2529 function execute() {
@@ -71,26 +75,35 @@
7276 return Title::newFromText( mt_rand( 0, 1000000 ), mt_rand( 0, 10 ) );
7377 }
7478
 79+ /*
7580 function pickOutputType() {
7681 $count = count( $this->outputTypes );
7782 return $this->outputTypes[ mt_rand( 0, $count - 1 ) ];
 83+ }*/
 84+
 85+ function pickEntryPoint() {
 86+ $count = count( $this->entryPoints );
 87+ return $this->entryPoints[ mt_rand( 0, $count - 1 ) ];
7888 }
7989 }
8090
8191 class PPFuzzTest {
82 - var $templates, $mainText, $title;
 92+ var $templates, $mainText, $title, $entryPoint;
8393
8494 function __construct( $tester ) {
8595 $this->parent = $tester;
8696 $this->mainText = $tester->makeInputText();
8797 $this->title = $tester->makeTitle();
88 - $this->outputType = $tester->pickOutputType();
 98+ //$this->outputType = $tester->pickOutputType();
 99+ $this->entryPoint = $tester->pickEntryPoint();
 100+ $this->nickname = $tester->makeInputText();
 101+ $this->fancySig = (bool)mt_rand( 0, 1 );
89102 $this->templates = array();
90103 }
91104
92105 function templateHook( $title ) {
93106 $titleText = $title->getPrefixedDBkey();
94 -
 107+
95108 if ( !isset( $this->templates[$titleText] ) ) {
96109 $finalTitle = $title;
97110 if ( count( $this->templates ) >= $this->parent->maxTemplates ) {
@@ -116,16 +129,24 @@
117130 }
118131
119132 function execute() {
120 - global $wgParser;
 133+ global $wgParser, $wgUser;
 134+
 135+ $wgUser = new PPFuzzUser;
 136+ $wgUser->mName = 'Fuzz';
 137+ $wgUser->mFrom = 'name';
 138+ $wgUser->ppfz_test = $this;
 139+
121140 $options = new ParserOptions;
122141 $options->setTemplateCallback( array( $this, 'templateHook' ) );
123 - $wgParser->startExternalParse( $this->title, $options, constant( $this->outputType ) );
124 - return $wgParser->srvus( $this->mainText );
 142+ //$wgParser->startExternalParse( $this->title, $options, constant( $this->outputType ) );
 143+ return call_user_func( array( $wgParser, $this->entryPoint ), $this->mainText, $this->title, $options );
125144 }
126145
127146 function getReport() {
128147 $s = "Title: " . $this->title->getPrefixedDBkey() . "\n" .
129 - "Output type: {$this->outputType}\n" .
 148+// "Output type: {$this->outputType}\n" .
 149+ "Entry point: {$this->entryPoint}\n" .
 150+ "User: " . ( $this->fancySig ? 'fancy' : 'no-fancy' ) . ' ' . var_export( $this->nickname, true ) . "\n" .
130151 "Main text: " . var_export( $this->mainText, true ) . "\n";
131152 foreach ( $this->templates as $titleText => $template ) {
132153 $finalTitle = $template['finalTitle'];
@@ -139,6 +160,20 @@
140161 }
141162 }
142163
 164+class PPFuzzUser extends User {
 165+ var $ppfz_test;
 166+
 167+ function getOption( $option, $defaultOverride = '' ) {
 168+ if ( $option === 'fancysig' ) {
 169+ return $this->ppfz_test->fancySig;
 170+ } elseif ( $option === 'nickname' ) {
 171+ return $this->ppfz_test->nickname;
 172+ } else {
 173+ return parent::getOption( $option, $defaultOverride );
 174+ }
 175+ }
 176+}
 177+
143178 ini_set( 'memory_limit', '50M' );
144179 if ( isset( $args[0] ) ) {
145180 $testText = file_get_contents( $args[0] );
Index: trunk/phase3/includes/Preprocessor_DOM.php
@@ -0,0 +1,1243 @@
 2+<?php
 3+
 4+class Preprocessor_DOM implements Preprocessor {
 5+ var $parser;
 6+
 7+ function __construct( $parser ) {
 8+ $this->parser = $parser;
 9+ }
 10+
 11+ function newFrame() {
 12+ return new PPFrame_DOM( $this );
 13+ }
 14+
 15+ /**
 16+ * Preprocess some wikitext and return the document tree.
 17+ * This is the ghost of Parser::replace_variables().
 18+ *
 19+ * @param string $text The text to parse
 20+ * @param integer flags Bitwise combination of:
 21+ * Parser::PTD_FOR_INCLUSION Handle <noinclude>/<includeonly> as if the text is being
 22+ * included. Default is to assume a direct page view.
 23+ *
 24+ * The generated DOM tree must depend only on the input text and the flags.
 25+ * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of bug 4899.
 26+ *
 27+ * Any flag added to the $flags parameter here, or any other parameter liable to cause a
 28+ * change in the DOM tree for a given text, must be passed through the section identifier
 29+ * in the section edit link and thus back to extractSections().
 30+ *
 31+ * The output of this function is currently only cached in process memory, but a persistent
 32+ * cache may be implemented at a later date which takes further advantage of these strict
 33+ * dependency requirements.
 34+ *
 35+ * @private
 36+ */
 37+ function preprocessToObj( $text, $flags = 0 ) {
 38+ wfProfileIn( __METHOD__ );
 39+ wfProfileIn( __METHOD__.'-makexml' );
 40+
 41+ $rules = array(
 42+ '{' => array(
 43+ 'end' => '}',
 44+ 'names' => array(
 45+ 2 => 'template',
 46+ 3 => 'tplarg',
 47+ ),
 48+ 'min' => 2,
 49+ 'max' => 3,
 50+ ),
 51+ '[' => array(
 52+ 'end' => ']',
 53+ 'names' => array( 2 => null ),
 54+ 'min' => 2,
 55+ 'max' => 2,
 56+ )
 57+ );
 58+
 59+ $forInclusion = $flags & Parser::PTD_FOR_INCLUSION;
 60+
 61+ $xmlishElements = $this->parser->getStripList();
 62+ $enableOnlyinclude = false;
 63+ if ( $forInclusion ) {
 64+ $ignoredTags = array( 'includeonly', '/includeonly' );
 65+ $ignoredElements = array( 'noinclude' );
 66+ $xmlishElements[] = 'noinclude';
 67+ if ( strpos( $text, '<onlyinclude>' ) !== false && strpos( $text, '</onlyinclude>' ) !== false ) {
 68+ $enableOnlyinclude = true;
 69+ }
 70+ } else {
 71+ $ignoredTags = array( 'noinclude', '/noinclude', 'onlyinclude', '/onlyinclude' );
 72+ $ignoredElements = array( 'includeonly' );
 73+ $xmlishElements[] = 'includeonly';
 74+ }
 75+ $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
 76+
 77+ // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset
 78+ $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA";
 79+
 80+ $stack = new PPDStack;
 81+
 82+ $searchBase = '[{<';
 83+ $revText = strrev( $text ); // For fast reverse searches
 84+
 85+ $i = 0; # Input pointer, starts out pointing to a pseudo-newline before the start
 86+ $accum =& $stack->getAccum(); # Current text accumulator
 87+ $accum = '<root>';
 88+ $findEquals = false; # True to find equals signs in arguments
 89+ $findPipe = false; # True to take notice of pipe characters
 90+ $headingIndex = 1;
 91+ $inHeading = false; # True if $i is inside a possible heading
 92+ $noMoreGT = false; # True if there are no more greater-than (>) signs right of $i
 93+ $findOnlyinclude = $enableOnlyinclude; # True to ignore all input up to the next <onlyinclude>
 94+ $fakeLineStart = true; # Do a line-start run without outputting an LF character
 95+
 96+ while ( true ) {
 97+ if ( $findOnlyinclude ) {
 98+ // Ignore all input up to the next <onlyinclude>
 99+ $startPos = strpos( $text, '<onlyinclude>', $i );
 100+ if ( $startPos === false ) {
 101+ // Ignored section runs to the end
 102+ $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i ) ) . '</ignore>';
 103+ break;
 104+ }
 105+ $tagEndPos = $startPos + strlen( '<onlyinclude>' ); // past-the-end
 106+ $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i, $tagEndPos - $i ) ) . '</ignore>';
 107+ $i = $tagEndPos;
 108+ $findOnlyinclude = false;
 109+ }
 110+
 111+ if ( $fakeLineStart ) {
 112+ $found = 'line-start';
 113+ $curChar = '';
 114+ } else {
 115+ # Find next opening brace, closing brace or pipe
 116+ $search = $searchBase;
 117+ if ( $stack->top === false ) {
 118+ $currentClosing = '';
 119+ } else {
 120+ $currentClosing = $stack->top->close;
 121+ $search .= $currentClosing;
 122+ }
 123+ if ( $findPipe ) {
 124+ $search .= '|';
 125+ }
 126+ if ( $findEquals ) {
 127+ // First equals will be for the template
 128+ $search .= '=';
 129+ } else {
 130+ // Look for headings
 131+ // We can't look for headings when $findEquals is true, because the ambiguity
 132+ // between template name/value separators and heading starts would be unresolved
 133+ // until the closing double-brace is found. This would mean either infinite
 134+ // backtrack, or creating and updating two separate tree structures until the
 135+ // end of the ambiguity -- one tree structure assuming a heading, and the other
 136+ // assuming a template argument.
 137+ //
 138+ // Easier to just break some section edit links.
 139+ $search .= "\n";
 140+ }
 141+ $rule = null;
 142+ # Output literal section, advance input counter
 143+ $literalLength = strcspn( $text, $search, $i );
 144+ if ( $literalLength > 0 ) {
 145+ $accum .= htmlspecialchars( substr( $text, $i, $literalLength ) );
 146+ $i += $literalLength;
 147+ }
 148+ if ( $i >= strlen( $text ) ) {
 149+ if ( $currentClosing == "\n" ) {
 150+ // Do a past-the-end run to finish off the heading
 151+ $curChar = '';
 152+ $found = 'line-end';
 153+ } else {
 154+ # All done
 155+ break;
 156+ }
 157+ } else {
 158+ $curChar = $text[$i];
 159+ if ( $curChar == '|' ) {
 160+ $found = 'pipe';
 161+ } elseif ( $curChar == '=' ) {
 162+ $found = 'equals';
 163+ } elseif ( $curChar == '<' ) {
 164+ $found = 'angle';
 165+ } elseif ( $curChar == "\n" ) {
 166+ if ( $inHeading ) {
 167+ $found = 'line-end';
 168+ } else {
 169+ $found = 'line-start';
 170+ }
 171+ } elseif ( $curChar == $currentClosing ) {
 172+ $found = 'close';
 173+ } elseif ( isset( $rules[$curChar] ) ) {
 174+ $found = 'open';
 175+ $rule = $rules[$curChar];
 176+ } else {
 177+ # Some versions of PHP have a strcspn which stops on null characters
 178+ # Ignore and continue
 179+ ++$i;
 180+ continue;
 181+ }
 182+ }
 183+ }
 184+
 185+ if ( $found == 'angle' ) {
 186+ $matches = false;
 187+ // Handle </onlyinclude>
 188+ if ( $enableOnlyinclude && substr( $text, $i, strlen( '</onlyinclude>' ) ) == '</onlyinclude>' ) {
 189+ $findOnlyinclude = true;
 190+ continue;
 191+ }
 192+
 193+ // Determine element name
 194+ if ( !preg_match( $elementsRegex, $text, $matches, 0, $i + 1 ) ) {
 195+ // Element name missing or not listed
 196+ $accum .= '&lt;';
 197+ ++$i;
 198+ continue;
 199+ }
 200+ // Handle comments
 201+ if ( isset( $matches[2] ) && $matches[2] == '!--' ) {
 202+ // To avoid leaving blank lines, when a comment is both preceded
 203+ // and followed by a newline (ignoring spaces), trim leading and
 204+ // trailing spaces and one of the newlines.
 205+
 206+ // Find the end
 207+ $endPos = strpos( $text, '-->', $i + 4 );
 208+ if ( $endPos === false ) {
 209+ // Unclosed comment in input, runs to end
 210+ $inner = substr( $text, $i );
 211+ $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>';
 212+ $i = strlen( $text );
 213+ } else {
 214+ // Search backwards for leading whitespace
 215+ $wsStart = $i ? ( $i - strspn( $revText, ' ', strlen( $text ) - $i ) ) : 0;
 216+ // Search forwards for trailing whitespace
 217+ // $wsEnd will be the position of the last space
 218+ $wsEnd = $endPos + 2 + strspn( $text, ' ', $endPos + 3 );
 219+ // Eat the line if possible
 220+ // TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at
 221+ // the overall start. That's not how Sanitizer::removeHTMLcomments() does it, but
 222+ // it's a possible beneficial b/c break.
 223+ if ( $wsStart > 0 && substr( $text, $wsStart - 1, 1 ) == "\n"
 224+ && substr( $text, $wsEnd + 1, 1 ) == "\n" )
 225+ {
 226+ $startPos = $wsStart;
 227+ $endPos = $wsEnd + 1;
 228+ // Remove leading whitespace from the end of the accumulator
 229+ // Sanity check first though
 230+ $wsLength = $i - $wsStart;
 231+ if ( $wsLength > 0 && substr( $accum, -$wsLength ) === str_repeat( ' ', $wsLength ) ) {
 232+ $accum = substr( $accum, 0, -$wsLength );
 233+ }
 234+ // Do a line-start run next time to look for headings after the comment,
 235+ // but only if stack->top===false, because headings don't exist at deeper levels.
 236+ if ( $stack->top === false ) {
 237+ $fakeLineStart = true;
 238+ }
 239+ } else {
 240+ // No line to eat, just take the comment itself
 241+ $startPos = $i;
 242+ $endPos += 2;
 243+ }
 244+
 245+ $i = $endPos + 1;
 246+ $inner = substr( $text, $startPos, $endPos - $startPos + 1 );
 247+ $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>';
 248+ }
 249+ continue;
 250+ }
 251+ $name = $matches[1];
 252+ $attrStart = $i + strlen( $name ) + 1;
 253+
 254+ // Find end of tag
 255+ $tagEndPos = $noMoreGT ? false : strpos( $text, '>', $attrStart );
 256+ if ( $tagEndPos === false ) {
 257+ // Infinite backtrack
 258+ // Disable tag search to prevent worst-case O(N^2) performance
 259+ $noMoreGT = true;
 260+ $accum .= '&lt;';
 261+ ++$i;
 262+ continue;
 263+ }
 264+
 265+ // Handle ignored tags
 266+ if ( in_array( $name, $ignoredTags ) ) {
 267+ $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i, $tagEndPos - $i + 1 ) ) . '</ignore>';
 268+ $i = $tagEndPos + 1;
 269+ continue;
 270+ }
 271+
 272+ $tagStartPos = $i;
 273+ if ( $text[$tagEndPos-1] == '/' ) {
 274+ $attrEnd = $tagEndPos - 1;
 275+ $inner = null;
 276+ $i = $tagEndPos + 1;
 277+ $close = '';
 278+ } else {
 279+ $attrEnd = $tagEndPos;
 280+ // Find closing tag
 281+ if ( preg_match( "/<\/$name\s*>/i", $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) ) {
 282+ $inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 );
 283+ $i = $matches[0][1] + strlen( $matches[0][0] );
 284+ $close = '<close>' . htmlspecialchars( $matches[0][0] ) . '</close>';
 285+ } else {
 286+ // No end tag -- let it run out to the end of the text.
 287+ $inner = substr( $text, $tagEndPos + 1 );
 288+ $i = strlen( $text );
 289+ $close = '';
 290+ }
 291+ }
 292+ // <includeonly> and <noinclude> just become <ignore> tags
 293+ if ( in_array( $name, $ignoredElements ) ) {
 294+ $accum .= '<ignore>' . htmlspecialchars( substr( $text, $tagStartPos, $i - $tagStartPos ) )
 295+ . '</ignore>';
 296+ continue;
 297+ }
 298+
 299+ $accum .= '<ext>';
 300+ if ( $attrEnd <= $attrStart ) {
 301+ $attr = '';
 302+ } else {
 303+ $attr = substr( $text, $attrStart, $attrEnd - $attrStart );
 304+ }
 305+ $accum .= '<name>' . htmlspecialchars( $name ) . '</name>' .
 306+ // Note that the attr element contains the whitespace between name and attribute,
 307+ // this is necessary for precise reconstruction during pre-save transform.
 308+ '<attr>' . htmlspecialchars( $attr ) . '</attr>';
 309+ if ( $inner !== null ) {
 310+ $accum .= '<inner>' . htmlspecialchars( $inner ) . '</inner>';
 311+ }
 312+ $accum .= $close . '</ext>';
 313+ }
 314+
 315+ elseif ( $found == 'line-start' ) {
 316+ // Is this the start of a heading?
 317+ // Line break belongs before the heading element in any case
 318+ if ( $fakeLineStart ) {
 319+ $fakeLineStart = false;
 320+ } else {
 321+ $accum .= $curChar;
 322+ $i++;
 323+ }
 324+
 325+ $count = strspn( $text, '=', $i, 6 );
 326+ if ( $count > 0 ) {
 327+ $piece = array(
 328+ 'open' => "\n",
 329+ 'close' => "\n",
 330+ 'parts' => array( str_repeat( '=', $count ) ),
 331+ 'startPos' => $i,
 332+ 'count' => $count );
 333+ $stack->push( $piece );
 334+ $accum =& $stack->getAccum();
 335+ extract( $stack->getFlags() );
 336+ $i += $count;
 337+ }
 338+ }
 339+
 340+ elseif ( $found == 'line-end' ) {
 341+ $piece = $stack->top;
 342+ // A heading must be open, otherwise \n wouldn't have been in the search list
 343+ assert( $piece->open == "\n" );
 344+ // Search back through the input to see if it has a proper close
 345+ // Do this using the reversed string since the other solutions (end anchor, etc.) are inefficient
 346+ $m = false;
 347+ $count = $piece->count;
 348+ if ( preg_match( "/\s*(=+)/A", $revText, $m, 0, strlen( $text ) - $i ) ) {
 349+ if ( $i - strlen( $m[0] ) == $piece->startPos ) {
 350+ // This is just a single string of equals signs on its own line
 351+ // Replicate the doHeadings behaviour /={count}(.+)={count}/
 352+ // First find out how many equals signs there really are (don't stop at 6)
 353+ $count = strlen( $m[1] );
 354+ if ( $count < 3 ) {
 355+ $count = 0;
 356+ } else {
 357+ $count = min( 6, intval( ( $count - 1 ) / 2 ) );
 358+ }
 359+ } else {
 360+ $count = min( strlen( $m[1] ), $count );
 361+ }
 362+ if ( $count > 0 ) {
 363+ // Normal match, output <h>
 364+ $element = "<h level=\"$count\" i=\"$headingIndex\">$accum</h>";
 365+ $headingIndex++;
 366+ } else {
 367+ // Single equals sign on its own line, count=0
 368+ $element = $accum;
 369+ }
 370+ } else {
 371+ // No match, no <h>, just pass down the inner text
 372+ $element = $accum;
 373+ }
 374+ // Unwind the stack
 375+ $stack->pop();
 376+ $accum =& $stack->getAccum();
 377+ extract( $stack->getFlags() );
 378+
 379+ // Append the result to the enclosing accumulator
 380+ $accum .= $element;
 381+ // Note that we do NOT increment the input pointer.
 382+ // This is because the closing linebreak could be the opening linebreak of
 383+ // another heading. Infinite loops are avoided because the next iteration MUST
 384+ // hit the heading open case above, which unconditionally increments the
 385+ // input pointer.
 386+ }
 387+
 388+ elseif ( $found == 'open' ) {
 389+ # count opening brace characters
 390+ $count = strspn( $text, $curChar, $i );
 391+
 392+ # we need to add to stack only if opening brace count is enough for one of the rules
 393+ if ( $count >= $rule['min'] ) {
 394+ # Add it to the stack
 395+ $piece = array(
 396+ 'open' => $curChar,
 397+ 'close' => $rule['end'],
 398+ 'count' => $count,
 399+ 'parts' => array( '' ),
 400+ 'eqpos' => array(),
 401+ 'lineStart' => ($i > 0 && $text[$i-1] == "\n"),
 402+ );
 403+
 404+ $stack->push( $piece );
 405+ $accum =& $stack->getAccum();
 406+ extract( $stack->getFlags() );
 407+ } else {
 408+ # Add literal brace(s)
 409+ $accum .= htmlspecialchars( str_repeat( $curChar, $count ) );
 410+ }
 411+ $i += $count;
 412+ }
 413+
 414+ elseif ( $found == 'close' ) {
 415+ $piece = $stack->top;
 416+ # lets check if there are enough characters for closing brace
 417+ $maxCount = $piece->count;
 418+ $count = strspn( $text, $curChar, $i, $maxCount );
 419+
 420+ # check for maximum matching characters (if there are 5 closing
 421+ # characters, we will probably need only 3 - depending on the rules)
 422+ $matchingCount = 0;
 423+ $rule = $rules[$piece->open];
 424+ if ( $count > $rule['max'] ) {
 425+ # The specified maximum exists in the callback array, unless the caller
 426+ # has made an error
 427+ $matchingCount = $rule['max'];
 428+ } else {
 429+ # Count is less than the maximum
 430+ # Skip any gaps in the callback array to find the true largest match
 431+ # Need to use array_key_exists not isset because the callback can be null
 432+ $matchingCount = $count;
 433+ while ( $matchingCount > 0 && !array_key_exists( $matchingCount, $rule['names'] ) ) {
 434+ --$matchingCount;
 435+ }
 436+ }
 437+
 438+ if ($matchingCount <= 0) {
 439+ # No matching element found in callback array
 440+ # Output a literal closing brace and continue
 441+ $accum .= htmlspecialchars( str_repeat( $curChar, $count ) );
 442+ $i += $count;
 443+ continue;
 444+ }
 445+ $name = $rule['names'][$matchingCount];
 446+ if ( $name === null ) {
 447+ // No element, just literal text
 448+ $element = str_repeat( $piece->open, $matchingCount ) .
 449+ implode( '|', $piece->parts ) .
 450+ str_repeat( $rule['end'], $matchingCount );
 451+ } else {
 452+ # Create XML element
 453+ # Note: $parts is already XML, does not need to be encoded further
 454+ $parts = $piece->parts;
 455+ $title = $parts[0];
 456+ unset( $parts[0] );
 457+
 458+ # The invocation is at the start of the line if lineStart is set in
 459+ # the stack, and all opening brackets are used up.
 460+ if ( $maxCount == $matchingCount && !empty( $piece->lineStart ) ) {
 461+ $attr = ' lineStart="1"';
 462+ } else {
 463+ $attr = '';
 464+ }
 465+
 466+ $element = "<$name$attr>";
 467+ $element .= "<title>$title</title>";
 468+ $argIndex = 1;
 469+ foreach ( $parts as $partIndex => $part ) {
 470+ if ( isset( $piece->eqpos[$partIndex] ) ) {
 471+ $eqpos = $piece->eqpos[$partIndex];
 472+ $argName = substr( $part, 0, $eqpos );
 473+ $argValue = substr( $part, $eqpos + 1 );
 474+ $element .= "<part><name>$argName</name>=<value>$argValue</value></part>";
 475+ } else {
 476+ $element .= "<part><name index=\"$argIndex\" /><value>$part</value></part>";
 477+ $argIndex++;
 478+ }
 479+ }
 480+ $element .= "</$name>";
 481+ }
 482+
 483+ # Advance input pointer
 484+ $i += $matchingCount;
 485+
 486+ # Unwind the stack
 487+ $stack->pop();
 488+ $accum =& $stack->getAccum();
 489+
 490+ # Re-add the old stack element if it still has unmatched opening characters remaining
 491+ if ($matchingCount < $piece->count) {
 492+ $piece->parts = array( '' );
 493+ $piece->count -= $matchingCount;
 494+ $piece->eqpos = array();
 495+ # do we still qualify for any callback with remaining count?
 496+ $names = $rules[$piece->open]['names'];
 497+ $skippedBraces = 0;
 498+ $enclosingAccum =& $accum;
 499+ while ( $piece->count ) {
 500+ if ( array_key_exists( $piece->count, $names ) ) {
 501+ $stack->push( $piece );
 502+ $accum =& $stack->getAccum();
 503+ break;
 504+ }
 505+ --$piece->count;
 506+ $skippedBraces ++;
 507+ }
 508+ $enclosingAccum .= str_repeat( $piece->open, $skippedBraces );
 509+ }
 510+
 511+ extract( $stack->getFlags() );
 512+
 513+ # Add XML element to the enclosing accumulator
 514+ $accum .= $element;
 515+ }
 516+
 517+ elseif ( $found == 'pipe' ) {
 518+ $findEquals = true; // shortcut for getFlags()
 519+ $stack->top->addPart();
 520+ $accum =& $stack->getAccum();
 521+ ++$i;
 522+ }
 523+
 524+ elseif ( $found == 'equals' ) {
 525+ $findEquals = false; // shortcut for getFlags()
 526+ $partsCount = count( $stack->top->parts );
 527+ $stack->top->eqpos[$partsCount - 1] = strlen( $accum );
 528+ $accum .= '=';
 529+ ++$i;
 530+ }
 531+ }
 532+
 533+ # Output any remaining unclosed brackets
 534+ foreach ( $stack->stack as $piece ) {
 535+ if ( $piece->open == "\n" ) {
 536+ $stack->topAccum .= $piece->parts[0];
 537+ } else {
 538+ $stack->topAccum .= str_repeat( $piece->open, $piece->count ) . implode( '|', $piece->parts );
 539+ }
 540+ }
 541+ $stack->topAccum .= '</root>';
 542+ $xml = $stack->topAccum;
 543+
 544+ wfProfileOut( __METHOD__.'-makexml' );
 545+ wfProfileIn( __METHOD__.'-loadXML' );
 546+ $dom = new DOMDocument;
 547+ wfSuppressWarnings();
 548+ $result = $dom->loadXML( $xml );
 549+ wfRestoreWarnings();
 550+ if ( !$result ) {
 551+ // Try running the XML through UtfNormal to get rid of invalid characters
 552+ $xml = UtfNormal::cleanUp( $xml );
 553+ $result = $dom->loadXML( $xml );
 554+ if ( !$result ) {
 555+ throw new MWException( __METHOD__.' generated invalid XML' );
 556+ }
 557+ }
 558+ $obj = new PPNode_DOM( $dom->documentElement );
 559+ wfProfileOut( __METHOD__.'-loadXML' );
 560+ wfProfileOut( __METHOD__ );
 561+ return $obj;
 562+ }
 563+}
 564+
 565+/**
 566+ * An expansion frame, used as a context to expand the result of preprocessToDom()
 567+ */
 568+class PPFrame_DOM implements PPFrame {
 569+ var $preprocessor, $parser, $title;
 570+ var $titleCache;
 571+
 572+ /**
 573+ * Hashtable listing templates which are disallowed for expansion in this frame,
 574+ * having been encountered previously in parent frames.
 575+ */
 576+ var $loopCheckHash;
 577+
 578+ /**
 579+ * Recursion depth of this frame, top = 0
 580+ */
 581+ var $depth;
 582+
 583+
 584+ /**
 585+ * Construct a new preprocessor frame.
 586+ * @param Preprocessor $preprocessor The parent preprocessor
 587+ */
 588+ function __construct( $preprocessor ) {
 589+ $this->preprocessor = $preprocessor;
 590+ $this->parser = $preprocessor->parser;
 591+ $this->title = $this->parser->mTitle;
 592+ $this->titleCache = array( $this->title ? $this->title->getPrefixedDBkey() : false );
 593+ $this->loopCheckHash = array();
 594+ $this->depth = 0;
 595+ }
 596+
 597+ /**
 598+ * Create a new child frame
 599+ * $args is optionally a multi-root PPNode or array containing the template arguments
 600+ */
 601+ function newChild( $args = false, $title = false ) {
 602+ $namedArgs = array();
 603+ $numberedArgs = array();
 604+ if ( $title === false ) {
 605+ $title = $this->title;
 606+ }
 607+ if ( $args !== false ) {
 608+ $xpath = false;
 609+ if ( $args instanceof PPNode ) {
 610+ $args = $args->node;
 611+ }
 612+ foreach ( $args as $arg ) {
 613+ if ( !$xpath ) {
 614+ $xpath = new DOMXPath( $arg->ownerDocument );
 615+ }
 616+
 617+ $nameNodes = $xpath->query( 'name', $arg );
 618+ $value = $xpath->query( 'value', $arg );
 619+ if ( $nameNodes->item( 0 )->hasAttributes() ) {
 620+ // Numbered parameter
 621+ $index = $nameNodes->item( 0 )->attributes->getNamedItem( 'index' )->textContent;
 622+ $numberedArgs[$index] = $value->item( 0 );
 623+ unset( $namedArgs[$index] );
 624+ } else {
 625+ // Named parameter
 626+ $name = trim( $this->expand( $nameNodes->item( 0 ), PPFrame::STRIP_COMMENTS ) );
 627+ $namedArgs[$name] = $value->item( 0 );
 628+ unset( $numberedArgs[$name] );
 629+ }
 630+ }
 631+ }
 632+ return new PPTemplateFrame_DOM( $this->preprocessor, $this, $numberedArgs, $namedArgs, $title );
 633+ }
 634+
 635+ function expand( $root, $flags = 0 ) {
 636+ if ( is_string( $root ) ) {
 637+ return $root;
 638+ }
 639+
 640+ if ( ++$this->parser->mPPNodeCount > $this->parser->mOptions->mMaxPPNodeCount )
 641+ {
 642+ return '<span class="error">Node-count limit exceeded</span>';
 643+ }
 644+
 645+ if ( $root instanceof PPNode_DOM ) {
 646+ $root = $root->node;
 647+ }
 648+ if ( $root instanceof DOMDocument ) {
 649+ $root = $root->documentElement;
 650+ }
 651+
 652+ $outStack = array( '', '' );
 653+ $iteratorStack = array( false, $root );
 654+ $indexStack = array( 0, 0 );
 655+
 656+ while ( count( $iteratorStack ) > 1 ) {
 657+ $level = count( $outStack ) - 1;
 658+ $iteratorNode =& $iteratorStack[ $level ];
 659+ $out =& $outStack[$level];
 660+ $index =& $indexStack[$level];
 661+
 662+ if ( $iteratorNode instanceof PPNode_DOM ) $iteratorNode = $iteratorNode->node;
 663+
 664+ if ( is_array( $iteratorNode ) ) {
 665+ if ( $index >= count( $iteratorNode ) ) {
 666+ // All done with this iterator
 667+ $iteratorStack[$level] = false;
 668+ $contextNode = false;
 669+ } else {
 670+ $contextNode = $iteratorNode[$index];
 671+ $index++;
 672+ }
 673+ } elseif ( $iteratorNode instanceof DOMNodeList ) {
 674+ if ( $index >= $iteratorNode->length ) {
 675+ // All done with this iterator
 676+ $iteratorStack[$level] = false;
 677+ $contextNode = false;
 678+ } else {
 679+ $contextNode = $iteratorNode->item( $index );
 680+ $index++;
 681+ }
 682+ } else {
 683+ // Copy to $contextNode and then delete from iterator stack,
 684+ // because this is not an iterator but we do have to execute it once
 685+ $contextNode = $iteratorStack[$level];
 686+ $iteratorStack[$level] = false;
 687+ }
 688+
 689+ if ( $contextNode instanceof PPNode_DOM ) $contextNode = $contextNode->node;
 690+
 691+ $newIterator = false;
 692+
 693+ if ( $contextNode === false ) {
 694+ // nothing to do
 695+ } elseif ( is_string( $contextNode ) ) {
 696+ $out .= $contextNode;
 697+ } elseif ( is_array( $contextNode ) || $contextNode instanceof DOMNodeList ) {
 698+ $newIterator = $contextNode;
 699+ } elseif ( $contextNode instanceof DOMNode ) {
 700+ /*
 701+ print str_repeat( '&nbsp;', count( debug_backtrace() ) ) . $contextNode->nodeName;
 702+ if ( $contextNode->nodeName == 'title' ) {
 703+ print ' = ' . $contextNode->textContent;
 704+ }
 705+ print "<br/>\n";
 706+ */
 707+ if ( $contextNode->nodeType == XML_TEXT_NODE ) {
 708+ $out .= $contextNode->nodeValue;
 709+ } elseif ( $contextNode->nodeName == 'template' ) {
 710+ # Double-brace expansion
 711+ $xpath = new DOMXPath( $contextNode->ownerDocument );
 712+ $titles = $xpath->query( 'title', $contextNode );
 713+ $title = $titles->item( 0 );
 714+ $parts = $xpath->query( 'part', $contextNode );
 715+ if ( $flags & self::NO_TEMPLATES ) {
 716+ $newIterator = $this->virtualBracketedImplode( '{{', '|', '}}', $title, $parts );
 717+ } else {
 718+ $lineStart = $contextNode->getAttribute( 'lineStart' );
 719+ $params = array(
 720+ 'title' => new PPNode_DOM( $title ),
 721+ 'parts' => new PPNode_DOM( $parts ),
 722+ 'lineStart' => $lineStart );
 723+ $ret = $this->parser->braceSubstitution( $params, $this );
 724+ if ( isset( $ret['object'] ) ) {
 725+ $newIterator = $ret['object'];
 726+ } else {
 727+ $out .= $ret['text'];
 728+ }
 729+ }
 730+ } elseif ( $contextNode->nodeName == 'tplarg' ) {
 731+ # Triple-brace expansion
 732+ $xpath = new DOMXPath( $contextNode->ownerDocument );
 733+ $titles = $xpath->query( 'title', $contextNode );
 734+ $title = $titles->item( 0 );
 735+ $parts = $xpath->query( 'part', $contextNode );
 736+ if ( $flags & self::NO_ARGS ) {
 737+ $newIterator = $this->virtualBracketedImplode( '{{{', '|', '}}}', $title, $parts );
 738+ } else {
 739+ $params = array(
 740+ 'title' => new PPNode_DOM( $title ),
 741+ 'parts' => new PPNode_DOM( $parts ) );
 742+ $ret = $this->parser->argSubstitution( $params, $this );
 743+ if ( isset( $ret['object'] ) ) {
 744+ $newIterator = $ret['object'];
 745+ } else {
 746+ $out .= $ret['text'];
 747+ }
 748+ }
 749+ } elseif ( $contextNode->nodeName == 'comment' ) {
 750+ # HTML-style comment
 751+ if ( $this->parser->ot['html']
 752+ || ( $this->parser->ot['pre'] && $this->parser->mOptions->getRemoveComments() )
 753+ || ( $flags & self::STRIP_COMMENTS ) )
 754+ {
 755+ $out .= '';
 756+ } else {
 757+ $out .= $contextNode->textContent;
 758+ }
 759+ } elseif ( $contextNode->nodeName == 'ignore' ) {
 760+ # Output suppression used by <includeonly> etc.
 761+ # OT_WIKI will only respect <ignore> in substed templates.
 762+ # The other output types respect it unless NO_IGNORE is set.
 763+ # extractSections() sets NO_IGNORE and so never respects it.
 764+ if ( ( !isset( $this->parent ) && $this->parser->ot['wiki'] ) || ( $flags & self::NO_IGNORE ) ) {
 765+ $out .= $contextNode->textContent;
 766+ } else {
 767+ $out .= '';
 768+ }
 769+ } elseif ( $contextNode->nodeName == 'ext' ) {
 770+ # Extension tag
 771+ $xpath = new DOMXPath( $contextNode->ownerDocument );
 772+ $names = $xpath->query( 'name', $contextNode );
 773+ $attrs = $xpath->query( 'attr', $contextNode );
 774+ $inners = $xpath->query( 'inner', $contextNode );
 775+ $closes = $xpath->query( 'close', $contextNode );
 776+ $params = array(
 777+ 'name' => new PPNode_DOM( $names->item( 0 ) ),
 778+ 'attr' => $attrs->length > 0 ? new PPNode_DOM( $attrs->item( 0 ) ) : null,
 779+ 'inner' => $inners->length > 0 ? new PPNode_DOM( $inners->item( 0 ) ) : null,
 780+ 'close' => $closes->length > 0 ? new PPNode_DOM( $closes->item( 0 ) ) : null,
 781+ );
 782+ $out .= $this->parser->extensionSubstitution( $params, $this );
 783+ } elseif ( $contextNode->nodeName == 'h' ) {
 784+ # Heading
 785+ $s = $this->expand( $contextNode->childNodes, $flags );
 786+
 787+ if ( $this->parser->ot['html'] ) {
 788+ # Insert heading index marker
 789+ $headingIndex = $contextNode->getAttribute( 'i' );
 790+ $titleText = $this->title->getPrefixedDBkey();
 791+ $this->parser->mHeadings[] = array( $titleText, $headingIndex );
 792+ $serial = count( $this->parser->mHeadings ) - 1;
 793+ $marker = "{$this->parser->mUniqPrefix}-h-$serial-{$this->parser->mMarkerSuffix}";
 794+ $count = $contextNode->getAttribute( 'level' );
 795+ $s = substr( $s, 0, $count ) . $marker . substr( $s, $count );
 796+ $this->parser->mStripState->general->setPair( $marker, '' );
 797+ }
 798+ $out .= $s;
 799+ } else {
 800+ # Generic recursive expansion
 801+ $newIterator = $contextNode->childNodes;
 802+ }
 803+ } else {
 804+ throw new MWException( __METHOD__.': Invalid parameter type' );
 805+ }
 806+
 807+ if ( $newIterator !== false ) {
 808+ if ( $newIterator instanceof PPNode_DOM ) {
 809+ $newIterator = $newIterator->node;
 810+ }
 811+ $outStack[] = '';
 812+ $iteratorStack[] = $newIterator;
 813+ $indexStack[] = 0;
 814+ } elseif ( $iteratorStack[$level] === false ) {
 815+ // Return accumulated value to parent
 816+ // With tail recursion
 817+ while ( $iteratorStack[$level] === false && $level > 0 ) {
 818+ $outStack[$level - 1] .= $out;
 819+ array_pop( $outStack );
 820+ array_pop( $iteratorStack );
 821+ array_pop( $indexStack );
 822+ $level--;
 823+ }
 824+ }
 825+ }
 826+ return $outStack[0];
 827+ }
 828+
 829+ function implodeWithFlags( $sep, $flags /*, ... */ ) {
 830+ $args = array_slice( func_get_args(), 2 );
 831+
 832+ $first = true;
 833+ $s = '';
 834+ foreach ( $args as $root ) {
 835+ if ( $root instanceof PPNode_DOM ) $root = $root->node;
 836+ if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) {
 837+ $root = array( $root );
 838+ }
 839+ foreach ( $root as $node ) {
 840+ if ( $first ) {
 841+ $first = false;
 842+ } else {
 843+ $s .= $sep;
 844+ }
 845+ $s .= $this->expand( $node, $flags );
 846+ }
 847+ }
 848+ return $s;
 849+ }
 850+
 851+ /**
 852+ * Implode with no flags specified
 853+ * This previously called implodeWithFlags but has now been inlined to reduce stack depth
 854+ */
 855+ function implode( $sep /*, ... */ ) {
 856+ $args = array_slice( func_get_args(), 1 );
 857+
 858+ $first = true;
 859+ $s = '';
 860+ foreach ( $args as $root ) {
 861+ if ( $root instanceof PPNode_DOM ) $root = $root->node;
 862+ if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) {
 863+ $root = array( $root );
 864+ }
 865+ foreach ( $root as $node ) {
 866+ if ( $first ) {
 867+ $first = false;
 868+ } else {
 869+ $s .= $sep;
 870+ }
 871+ $s .= $this->expand( $node );
 872+ }
 873+ }
 874+ return $s;
 875+ }
 876+
 877+ /**
 878+ * Makes an object that, when expand()ed, will be the same as one obtained
 879+ * with implode()
 880+ */
 881+ function virtualImplode( $sep /*, ... */ ) {
 882+ $args = array_slice( func_get_args(), 1 );
 883+ $out = array();
 884+ $first = true;
 885+ if ( $root instanceof PPNode_DOM ) $root = $root->node;
 886+
 887+ foreach ( $args as $root ) {
 888+ if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) {
 889+ $root = array( $root );
 890+ }
 891+ foreach ( $root as $node ) {
 892+ if ( $first ) {
 893+ $first = false;
 894+ } else {
 895+ $out[] = $sep;
 896+ }
 897+ $out[] = $node;
 898+ }
 899+ }
 900+ return $out;
 901+ }
 902+
 903+ /**
 904+ * Virtual implode with brackets
 905+ */
 906+ function virtualBracketedImplode( $start, $sep, $end /*, ... */ ) {
 907+ $args = array_slice( func_get_args(), 3 );
 908+ $out = array( $start );
 909+ $first = true;
 910+
 911+ foreach ( $args as $root ) {
 912+ if ( $root instanceof PPNode_DOM ) $root = $root->node;
 913+ if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) {
 914+ $root = array( $root );
 915+ }
 916+ foreach ( $root as $node ) {
 917+ if ( $first ) {
 918+ $first = false;
 919+ } else {
 920+ $out[] = $sep;
 921+ }
 922+ $out[] = $node;
 923+ }
 924+ }
 925+ $out[] = $end;
 926+ return $out;
 927+ }
 928+
 929+
 930+ function __toString() {
 931+ return 'frame{}';
 932+ }
 933+
 934+ function getPDBK( $level = false ) {
 935+ if ( $level === false ) {
 936+ return $this->title->getPrefixedDBkey();
 937+ } else {
 938+ return isset( $this->titleCache[$level] ) ? $this->titleCache[$level] : false;
 939+ }
 940+ }
 941+
 942+ /**
 943+ * Returns true if there are no arguments in this frame
 944+ */
 945+ function isEmpty() {
 946+ return true;
 947+ }
 948+
 949+ function getArgument( $name ) {
 950+ return false;
 951+ }
 952+
 953+ /**
 954+ * Returns true if the infinite loop check is OK, false if a loop is detected
 955+ */
 956+ function loopCheck( $title ) {
 957+ return !isset( $this->loopCheckHash[$title->getPrefixedDBkey()] );
 958+ }
 959+}
 960+
 961+/**
 962+ * Expansion frame with template arguments
 963+ */
 964+class PPTemplateFrame_DOM extends PPFrame_DOM {
 965+ var $numberedArgs, $namedArgs, $parent;
 966+ var $numberedExpansionCache, $namedExpansionCache;
 967+
 968+ function __construct( $preprocessor, $parent = false, $numberedArgs = array(), $namedArgs = array(), $title = false ) {
 969+ $this->preprocessor = $preprocessor;
 970+ $this->parser = $preprocessor->parser;
 971+ $this->parent = $parent;
 972+ $this->numberedArgs = $numberedArgs;
 973+ $this->namedArgs = $namedArgs;
 974+ $this->title = $title;
 975+ $pdbk = $title ? $title->getPrefixedDBkey() : false;
 976+ $this->titleCache = $parent->titleCache;
 977+ $this->titleCache[] = $pdbk;
 978+ $this->loopCheckHash = /*clone*/ $parent->loopCheckHash;
 979+ if ( $pdbk !== false ) {
 980+ $this->loopCheckHash[$pdbk] = true;
 981+ }
 982+ $this->depth = $parent->depth + 1;
 983+ $this->numberedExpansionCache = $this->namedExpansionCache = array();
 984+ }
 985+
 986+ function __toString() {
 987+ $s = 'tplframe{';
 988+ $first = true;
 989+ $args = $this->numberedArgs + $this->namedArgs;
 990+ foreach ( $args as $name => $value ) {
 991+ if ( $first ) {
 992+ $first = false;
 993+ } else {
 994+ $s .= ', ';
 995+ }
 996+ $s .= "\"$name\":\"" .
 997+ str_replace( '"', '\\"', $value->ownerDocument->saveXML( $value ) ) . '"';
 998+ }
 999+ $s .= '}';
 1000+ return $s;
 1001+ }
 1002+ /**
 1003+ * Returns true if there are no arguments in this frame
 1004+ */
 1005+ function isEmpty() {
 1006+ return !count( $this->numberedArgs ) && !count( $this->namedArgs );
 1007+ }
 1008+
 1009+ function getNumberedArgument( $index ) {
 1010+ if ( !isset( $this->numberedArgs[$index] ) ) {
 1011+ return false;
 1012+ }
 1013+ if ( !isset( $this->numberedExpansionCache[$index] ) ) {
 1014+ # No trimming for unnamed arguments
 1015+ $this->numberedExpansionCache[$index] = $this->parent->expand( $this->numberedArgs[$index], self::STRIP_COMMENTS );
 1016+ }
 1017+ return $this->numberedExpansionCache[$index];
 1018+ }
 1019+
 1020+ function getNamedArgument( $name ) {
 1021+ if ( !isset( $this->namedArgs[$name] ) ) {
 1022+ return false;
 1023+ }
 1024+ if ( !isset( $this->namedExpansionCache[$name] ) ) {
 1025+ # Trim named arguments post-expand, for backwards compatibility
 1026+ $this->namedExpansionCache[$name] = trim(
 1027+ $this->parent->expand( $this->namedArgs[$name], self::STRIP_COMMENTS ) );
 1028+ }
 1029+ return $this->namedExpansionCache[$name];
 1030+ }
 1031+
 1032+ function getArgument( $name ) {
 1033+ $text = $this->getNumberedArgument( $name );
 1034+ if ( $text === false ) {
 1035+ $text = $this->getNamedArgument( $name );
 1036+ }
 1037+ return $text;
 1038+ }
 1039+}
 1040+
 1041+/**
 1042+ * Stack class to help Parser::preprocessToDom()
 1043+ */
 1044+class PPDStack {
 1045+ var $stack, $topAccum, $top;
 1046+
 1047+ function __construct() {
 1048+ $this->stack = array();
 1049+ $this->topAccum = '';
 1050+ $this->top = false;
 1051+ }
 1052+
 1053+ function &getAccum() {
 1054+ if ( count( $this->stack ) ) {
 1055+ return $this->top->getAccum();
 1056+ } else {
 1057+ return $this->topAccum;
 1058+ }
 1059+ }
 1060+
 1061+ function push( $data ) {
 1062+ if ( $data instanceof PPDStackElement ) {
 1063+ $this->stack[] = $data;
 1064+ } else {
 1065+ $this->stack[] = new PPDStackElement( $data );
 1066+ }
 1067+ $this->top =& $this->stack[ count( $this->stack ) - 1 ];
 1068+ }
 1069+
 1070+ function pop() {
 1071+ if ( !count( $this->stack ) ) {
 1072+ throw new MWException( __METHOD__.': no elements remaining' );
 1073+ }
 1074+ $temp = array_pop( $this->stack );
 1075+ if ( count( $this->stack ) ) {
 1076+ $this->top =& $this->stack[ count( $this->stack ) - 1 ];
 1077+ } else {
 1078+ $this->top = false;
 1079+ }
 1080+ }
 1081+
 1082+ function getFlags() {
 1083+ if ( !count( $this->stack ) ) {
 1084+ return array(
 1085+ 'findEquals' => false,
 1086+ 'findPipe' => false,
 1087+ 'inHeading' => false,
 1088+ );
 1089+ } else {
 1090+ return $this->top->getFlags();
 1091+ }
 1092+ }
 1093+}
 1094+
 1095+class PPDStackElement {
 1096+ var $open, $close, $count, $parts, $eqpos, $lineStart;
 1097+
 1098+ function __construct( $data = array() ) {
 1099+ $this->parts = array( '' );
 1100+ $this->eqpos = array();
 1101+
 1102+ foreach ( $data as $name => $value ) {
 1103+ $this->$name = $value;
 1104+ }
 1105+ }
 1106+
 1107+ function &getAccum() {
 1108+ return $this->parts[count($this->parts) - 1];
 1109+ }
 1110+
 1111+ function addPart( $s = '' ) {
 1112+ $this->parts[] = $s;
 1113+ }
 1114+
 1115+ function getFlags() {
 1116+ $partCount = count( $this->parts );
 1117+ $findPipe = $this->open != "\n" && $this->open != '[';
 1118+ return array(
 1119+ 'findPipe' => $findPipe,
 1120+ 'findEquals' => $findPipe && $partCount > 1 && !isset( $this->eqpos[$partCount - 1] ),
 1121+ 'inHeading' => $this->open == "\n",
 1122+ );
 1123+ }
 1124+}
 1125+
 1126+class PPNode_DOM implements PPNode {
 1127+ var $node;
 1128+
 1129+ function __construct( $node, $xpath = false ) {
 1130+ $this->node = $node;
 1131+ }
 1132+
 1133+ function __get( $name ) {
 1134+ if ( $name == 'xpath' ) {
 1135+ $this->xpath = new DOMXPath( $this->node->ownerDocument );
 1136+ }
 1137+ return $this->xpath;
 1138+ }
 1139+
 1140+ function __toString() {
 1141+ if ( $this->node instanceof DOMNodeList ) {
 1142+ $s = '';
 1143+ foreach ( $this->node as $node ) {
 1144+ $s .= $node->ownerDocument->saveXML( $node );
 1145+ }
 1146+ } else {
 1147+ $s = $this->node->ownerDocument->saveXML( $node );
 1148+ }
 1149+ return $s;
 1150+ }
 1151+
 1152+ function getChildren() {
 1153+ return $this->node->childNodes ? new self( $this->node->childNodes ) : false;
 1154+ }
 1155+
 1156+ function getFirstChild() {
 1157+ return $this->node->firstChild ? new self( $this->node->firstChild ) : false;
 1158+ }
 1159+
 1160+ function getNextSibling() {
 1161+ return $this->node->nextSibling ? new self( $this->node->nextSibling ) : false;
 1162+ }
 1163+
 1164+ function getChildrenOfType( $type ) {
 1165+ return new self( $this->xpath->query( $type, $this->node ) );
 1166+ }
 1167+
 1168+ function getLength() {
 1169+ if ( $this->node instanceof DOMNodeList ) {
 1170+ return $this->node->length;
 1171+ } else {
 1172+ return false;
 1173+ }
 1174+ }
 1175+
 1176+ function item( $i ) {
 1177+ $item = $this->node->item( $i );
 1178+ return $item ? new self( $item ) : false;
 1179+ }
 1180+
 1181+ function getName() {
 1182+ if ( $this->node instanceof DOMNodeList ) {
 1183+ return '#nodelist';
 1184+ } else {
 1185+ return $this->node->nodeName;
 1186+ }
 1187+ }
 1188+
 1189+ /**
 1190+ * Split an <arg> node into a three-element array:
 1191+ * PPNode name, string index and PPNode value
 1192+ */
 1193+ function splitArg() {
 1194+ $names = $this->xpath->query( 'name', $this->node );
 1195+ $values = $this->xpath->query( 'value', $this->node );
 1196+ if ( !$names->length || !$values->length ) {
 1197+ throw new MWException( 'Invalid brace node passed to ' . __METHOD__ );
 1198+ }
 1199+ $name = $names->item( 0 );
 1200+ $index = $name->getAttribute( 'index' );
 1201+ return array(
 1202+ 'name' => new self( $name ),
 1203+ 'index' => $index,
 1204+ 'value' => new self( $values->item( 0 ) ) );
 1205+ }
 1206+
 1207+ /**
 1208+ * Split an <ext> node into an associative array containing name, attr, inner and close
 1209+ * All values in the resulting array are PPNodes. Inner and close are optional.
 1210+ */
 1211+ function splitExt() {
 1212+ $names = $this->xpath->query( 'name', $this->node );
 1213+ $attrs = $this->xpath->query( 'attr', $this->node );
 1214+ $inners = $this->xpath->query( 'inner', $this->node );
 1215+ $closes = $this->xpath->query( 'close', $this->node );
 1216+ if ( !$names->length || !$attrs->length ) {
 1217+ throw new MWException( 'Invalid ext node passed to ' . __METHOD__ );
 1218+ }
 1219+ $parts = array(
 1220+ 'name' => new self( $names->item( 0 ) ),
 1221+ 'attr' => new self( $attrs->item( 0 ) ) );
 1222+ if ( $inners->length ) {
 1223+ $parts['inner'] = new self( $inners->item( 0 ) );
 1224+ }
 1225+ if ( $closes->length ) {
 1226+ $parts['close'] = new self( $closes->item( 0 ) );
 1227+ }
 1228+ return $parts;
 1229+ }
 1230+
 1231+ /**
 1232+ * Split a <h> node
 1233+ */
 1234+ function splitHeading() {
 1235+ if ( !$this->nodeName == 'h' ) {
 1236+ throw new MWException( 'Invalid h node passed to ' . __METHOD__ );
 1237+ }
 1238+ return array(
 1239+ 'i' => $this->node->getAttribute( 'i' ),
 1240+ 'level' => $this->node->getAttribute( 'level' ),
 1241+ 'contents' => $this->getChildren()
 1242+ );
 1243+ }
 1244+}
Property changes on: trunk/phase3/includes/Preprocessor_DOM.php
___________________________________________________________________
Name: svn:eol-style
11245 + native
Index: trunk/phase3/includes/Parser.php
@@ -82,7 +82,7 @@
8383 # Persistent:
8484 var $mTagHooks, $mTransparentTagHooks, $mFunctionHooks, $mFunctionSynonyms, $mVariables,
8585 $mImageParams, $mImageParamsMagicArray, $mStripList, $mMarkerSuffix,
86 - $mExtLinkBracketedRegex;
 86+ $mExtLinkBracketedRegex, $mPreprocessor;
8787
8888 # Cleared with clearState():
8989 var $mOutput, $mAutonumber, $mDTopen, $mStripState;
@@ -118,6 +118,11 @@
119119 $this->mMarkerSuffix = "-QINU\x7f";
120120 $this->mExtLinkBracketedRegex = '/\[(\b(' . wfUrlProtocols() . ')'.
121121 '[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x0a\\x0d]*?)\]/S';
 122+ if ( isset( $conf['preprocessorClass'] ) ) {
 123+ $this->mPreprocessorClass = $conf['preprocessorClass'];
 124+ } else {
 125+ $this->mPreprocessorClass = 'Preprocessor_DOM';
 126+ }
122127 $this->mFirstCall = true;
123128 }
124129
@@ -481,6 +486,17 @@
482487 }
483488
484489 /**
 490+ * Get a preprocessor object
 491+ */
 492+ function getPreprocessor() {
 493+ if ( !isset( $this->mPreprocessor ) ) {
 494+ $class = $this->mPreprocessorClass;
 495+ $this->mPreprocessor = new $class( $this );
 496+ }
 497+ return $this->mPreprocessor;
 498+ }
 499+
 500+ /**
485501 * Replaces all occurrences of HTML-style comments and the given tags
486502 * in the text with a random marker and returns teh next text. The output
487503 * parameter $matches will be an associative array filled with data in
@@ -2596,528 +2612,7 @@
25972613 * @private
25982614 */
25992615 function preprocessToDom ( $text, $flags = 0 ) {
2600 - wfProfileIn( __METHOD__ );
2601 - wfProfileIn( __METHOD__.'-makexml' );
2602 -
2603 - $rules = array(
2604 - '{' => array(
2605 - 'end' => '}',
2606 - 'names' => array(
2607 - 2 => 'template',
2608 - 3 => 'tplarg',
2609 - ),
2610 - 'min' => 2,
2611 - 'max' => 3,
2612 - ),
2613 - '[' => array(
2614 - 'end' => ']',
2615 - 'names' => array( 2 => null ),
2616 - 'min' => 2,
2617 - 'max' => 2,
2618 - )
2619 - );
2620 -
2621 - $forInclusion = $flags & self::PTD_FOR_INCLUSION;
2622 -
2623 - $xmlishElements = $this->getStripList();
2624 - $enableOnlyinclude = false;
2625 - if ( $forInclusion ) {
2626 - $ignoredTags = array( 'includeonly', '/includeonly' );
2627 - $ignoredElements = array( 'noinclude' );
2628 - $xmlishElements[] = 'noinclude';
2629 - if ( strpos( $text, '<onlyinclude>' ) !== false && strpos( $text, '</onlyinclude>' ) !== false ) {
2630 - $enableOnlyinclude = true;
2631 - }
2632 - } else {
2633 - $ignoredTags = array( 'noinclude', '/noinclude', 'onlyinclude', '/onlyinclude' );
2634 - $ignoredElements = array( 'includeonly' );
2635 - $xmlishElements[] = 'includeonly';
2636 - }
2637 - $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
2638 -
2639 - // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset
2640 - $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA";
2641 -
2642 - $stack = new PPDStack;
2643 -
2644 - $searchBase = '[{<';
2645 - $revText = strrev( $text ); // For fast reverse searches
2646 -
2647 - $i = 0; # Input pointer, starts out pointing to a pseudo-newline before the start
2648 - $accum =& $stack->getAccum(); # Current text accumulator
2649 - $accum = '<root>';
2650 - $findEquals = false; # True to find equals signs in arguments
2651 - $findPipe = false; # True to take notice of pipe characters
2652 - $headingIndex = 1;
2653 - $inHeading = false; # True if $i is inside a possible heading
2654 - $noMoreGT = false; # True if there are no more greater-than (>) signs right of $i
2655 - $findOnlyinclude = $enableOnlyinclude; # True to ignore all input up to the next <onlyinclude>
2656 - $fakeLineStart = true; # Do a line-start run without outputting an LF character
2657 -
2658 - while ( true ) {
2659 - if ( $findOnlyinclude ) {
2660 - // Ignore all input up to the next <onlyinclude>
2661 - $startPos = strpos( $text, '<onlyinclude>', $i );
2662 - if ( $startPos === false ) {
2663 - // Ignored section runs to the end
2664 - $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i ) ) . '</ignore>';
2665 - break;
2666 - }
2667 - $tagEndPos = $startPos + strlen( '<onlyinclude>' ); // past-the-end
2668 - $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i, $tagEndPos - $i ) ) . '</ignore>';
2669 - $i = $tagEndPos;
2670 - $findOnlyinclude = false;
2671 - }
2672 -
2673 - if ( $fakeLineStart ) {
2674 - $found = 'line-start';
2675 - $curChar = '';
2676 - } else {
2677 - # Find next opening brace, closing brace or pipe
2678 - $search = $searchBase;
2679 - if ( $stack->top === false ) {
2680 - $currentClosing = '';
2681 - } else {
2682 - $currentClosing = $stack->top->close;
2683 - $search .= $currentClosing;
2684 - }
2685 - if ( $findPipe ) {
2686 - $search .= '|';
2687 - }
2688 - if ( $findEquals ) {
2689 - // First equals will be for the template
2690 - $search .= '=';
2691 - } else {
2692 - // Look for headings
2693 - // We can't look for headings when $findEquals is true, because the ambiguity
2694 - // between template name/value separators and heading starts would be unresolved
2695 - // until the closing double-brace is found. This would mean either infinite
2696 - // backtrack, or creating and updating two separate tree structures until the
2697 - // end of the ambiguity -- one tree structure assuming a heading, and the other
2698 - // assuming a template argument.
2699 - //
2700 - // Easier to just break some section edit links.
2701 - $search .= "\n";
2702 - }
2703 - $rule = null;
2704 - # Output literal section, advance input counter
2705 - $literalLength = strcspn( $text, $search, $i );
2706 - if ( $literalLength > 0 ) {
2707 - $accum .= htmlspecialchars( substr( $text, $i, $literalLength ) );
2708 - $i += $literalLength;
2709 - }
2710 - if ( $i >= strlen( $text ) ) {
2711 - if ( $currentClosing == "\n" ) {
2712 - // Do a past-the-end run to finish off the heading
2713 - $curChar = '';
2714 - $found = 'line-end';
2715 - } else {
2716 - # All done
2717 - break;
2718 - }
2719 - } else {
2720 - $curChar = $text[$i];
2721 - if ( $curChar == '|' ) {
2722 - $found = 'pipe';
2723 - } elseif ( $curChar == '=' ) {
2724 - $found = 'equals';
2725 - } elseif ( $curChar == '<' ) {
2726 - $found = 'angle';
2727 - } elseif ( $curChar == "\n" ) {
2728 - if ( $inHeading ) {
2729 - $found = 'line-end';
2730 - } else {
2731 - $found = 'line-start';
2732 - }
2733 - } elseif ( $curChar == $currentClosing ) {
2734 - $found = 'close';
2735 - } elseif ( isset( $rules[$curChar] ) ) {
2736 - $found = 'open';
2737 - $rule = $rules[$curChar];
2738 - } else {
2739 - # Some versions of PHP have a strcspn which stops on null characters
2740 - # Ignore and continue
2741 - ++$i;
2742 - continue;
2743 - }
2744 - }
2745 - }
2746 -
2747 - if ( $found == 'angle' ) {
2748 - $matches = false;
2749 - // Handle </onlyinclude>
2750 - if ( $enableOnlyinclude && substr( $text, $i, strlen( '</onlyinclude>' ) ) == '</onlyinclude>' ) {
2751 - $findOnlyinclude = true;
2752 - continue;
2753 - }
2754 -
2755 - // Determine element name
2756 - if ( !preg_match( $elementsRegex, $text, $matches, 0, $i + 1 ) ) {
2757 - // Element name missing or not listed
2758 - $accum .= '&lt;';
2759 - ++$i;
2760 - continue;
2761 - }
2762 - // Handle comments
2763 - if ( isset( $matches[2] ) && $matches[2] == '!--' ) {
2764 - // To avoid leaving blank lines, when a comment is both preceded
2765 - // and followed by a newline (ignoring spaces), trim leading and
2766 - // trailing spaces and one of the newlines.
2767 -
2768 - // Find the end
2769 - $endPos = strpos( $text, '-->', $i + 4 );
2770 - if ( $endPos === false ) {
2771 - // Unclosed comment in input, runs to end
2772 - $inner = substr( $text, $i );
2773 - $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>';
2774 - $i = strlen( $text );
2775 - } else {
2776 - // Search backwards for leading whitespace
2777 - $wsStart = $i ? ( $i - strspn( $revText, ' ', strlen( $text ) - $i ) ) : 0;
2778 - // Search forwards for trailing whitespace
2779 - // $wsEnd will be the position of the last space
2780 - $wsEnd = $endPos + 2 + strspn( $text, ' ', $endPos + 3 );
2781 - // Eat the line if possible
2782 - // TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at
2783 - // the overall start. That's not how Sanitizer::removeHTMLcomments() does it, but
2784 - // it's a possible beneficial b/c break.
2785 - if ( $wsStart > 0 && substr( $text, $wsStart - 1, 1 ) == "\n"
2786 - && substr( $text, $wsEnd + 1, 1 ) == "\n" )
2787 - {
2788 - $startPos = $wsStart;
2789 - $endPos = $wsEnd + 1;
2790 - // Remove leading whitespace from the end of the accumulator
2791 - // Sanity check first though
2792 - $wsLength = $i - $wsStart;
2793 - if ( $wsLength > 0 && substr( $accum, -$wsLength ) === str_repeat( ' ', $wsLength ) ) {
2794 - $accum = substr( $accum, 0, -$wsLength );
2795 - }
2796 - // Do a line-start run next time to look for headings after the comment,
2797 - // but only if stack->top===false, because headings don't exist at deeper levels.
2798 - if ( $stack->top === false ) {
2799 - $fakeLineStart = true;
2800 - }
2801 - } else {
2802 - // No line to eat, just take the comment itself
2803 - $startPos = $i;
2804 - $endPos += 2;
2805 - }
2806 -
2807 - $i = $endPos + 1;
2808 - $inner = substr( $text, $startPos, $endPos - $startPos + 1 );
2809 - $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>';
2810 - }
2811 - continue;
2812 - }
2813 - $name = $matches[1];
2814 - $attrStart = $i + strlen( $name ) + 1;
2815 -
2816 - // Find end of tag
2817 - $tagEndPos = $noMoreGT ? false : strpos( $text, '>', $attrStart );
2818 - if ( $tagEndPos === false ) {
2819 - // Infinite backtrack
2820 - // Disable tag search to prevent worst-case O(N^2) performance
2821 - $noMoreGT = true;
2822 - $accum .= '&lt;';
2823 - ++$i;
2824 - continue;
2825 - }
2826 -
2827 - // Handle ignored tags
2828 - if ( in_array( $name, $ignoredTags ) ) {
2829 - $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i, $tagEndPos - $i + 1 ) ) . '</ignore>';
2830 - $i = $tagEndPos + 1;
2831 - continue;
2832 - }
2833 -
2834 - $tagStartPos = $i;
2835 - if ( $text[$tagEndPos-1] == '/' ) {
2836 - $attrEnd = $tagEndPos - 1;
2837 - $inner = null;
2838 - $i = $tagEndPos + 1;
2839 - $close = '';
2840 - } else {
2841 - $attrEnd = $tagEndPos;
2842 - // Find closing tag
2843 - if ( preg_match( "/<\/$name\s*>/i", $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) ) {
2844 - $inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 );
2845 - $i = $matches[0][1] + strlen( $matches[0][0] );
2846 - $close = '<close>' . htmlspecialchars( $matches[0][0] ) . '</close>';
2847 - } else {
2848 - // No end tag -- let it run out to the end of the text.
2849 - $inner = substr( $text, $tagEndPos + 1 );
2850 - $i = strlen( $text );
2851 - $close = '';
2852 - }
2853 - }
2854 - // <includeonly> and <noinclude> just become <ignore> tags
2855 - if ( in_array( $name, $ignoredElements ) ) {
2856 - $accum .= '<ignore>' . htmlspecialchars( substr( $text, $tagStartPos, $i - $tagStartPos ) )
2857 - . '</ignore>';
2858 - continue;
2859 - }
2860 -
2861 - $accum .= '<ext>';
2862 - if ( $attrEnd <= $attrStart ) {
2863 - $attr = '';
2864 - } else {
2865 - $attr = substr( $text, $attrStart, $attrEnd - $attrStart );
2866 - }
2867 - $accum .= '<name>' . htmlspecialchars( $name ) . '</name>' .
2868 - // Note that the attr element contains the whitespace between name and attribute,
2869 - // this is necessary for precise reconstruction during pre-save transform.
2870 - '<attr>' . htmlspecialchars( $attr ) . '</attr>';
2871 - if ( $inner !== null ) {
2872 - $accum .= '<inner>' . htmlspecialchars( $inner ) . '</inner>';
2873 - }
2874 - $accum .= $close . '</ext>';
2875 - }
2876 -
2877 - elseif ( $found == 'line-start' ) {
2878 - // Is this the start of a heading?
2879 - // Line break belongs before the heading element in any case
2880 - if ( $fakeLineStart ) {
2881 - $fakeLineStart = false;
2882 - } else {
2883 - $accum .= $curChar;
2884 - $i++;
2885 - }
2886 -
2887 - $count = strspn( $text, '=', $i, 6 );
2888 - if ( $count > 0 ) {
2889 - $piece = array(
2890 - 'open' => "\n",
2891 - 'close' => "\n",
2892 - 'parts' => array( str_repeat( '=', $count ) ),
2893 - 'startPos' => $i,
2894 - 'count' => $count );
2895 - $stack->push( $piece );
2896 - $accum =& $stack->getAccum();
2897 - extract( $stack->getFlags() );
2898 - $i += $count;
2899 - }
2900 - }
2901 -
2902 - elseif ( $found == 'line-end' ) {
2903 - $piece = $stack->top;
2904 - // A heading must be open, otherwise \n wouldn't have been in the search list
2905 - assert( $piece->open == "\n" );
2906 - // Search back through the input to see if it has a proper close
2907 - // Do this using the reversed string since the other solutions (end anchor, etc.) are inefficient
2908 - $m = false;
2909 - $count = $piece->count;
2910 - if ( preg_match( "/\s*(=+)/A", $revText, $m, 0, strlen( $text ) - $i ) ) {
2911 - if ( $i - strlen( $m[0] ) == $piece->startPos ) {
2912 - // This is just a single string of equals signs on its own line
2913 - // Replicate the doHeadings behaviour /={count}(.+)={count}/
2914 - // First find out how many equals signs there really are (don't stop at 6)
2915 - $count = strlen( $m[1] );
2916 - if ( $count < 3 ) {
2917 - $count = 0;
2918 - } else {
2919 - $count = min( 6, intval( ( $count - 1 ) / 2 ) );
2920 - }
2921 - } else {
2922 - $count = min( strlen( $m[1] ), $count );
2923 - }
2924 - if ( $count > 0 ) {
2925 - // Normal match, output <h>
2926 - $element = "<h level=\"$count\" i=\"$headingIndex\">$accum</h>";
2927 - $headingIndex++;
2928 - } else {
2929 - // Single equals sign on its own line, count=0
2930 - $element = $accum;
2931 - }
2932 - } else {
2933 - // No match, no <h>, just pass down the inner text
2934 - $element = $accum;
2935 - }
2936 - // Unwind the stack
2937 - $stack->pop();
2938 - $accum =& $stack->getAccum();
2939 - extract( $stack->getFlags() );
2940 -
2941 - // Append the result to the enclosing accumulator
2942 - $accum .= $element;
2943 - // Note that we do NOT increment the input pointer.
2944 - // This is because the closing linebreak could be the opening linebreak of
2945 - // another heading. Infinite loops are avoided because the next iteration MUST
2946 - // hit the heading open case above, which unconditionally increments the
2947 - // input pointer.
2948 - }
2949 -
2950 - elseif ( $found == 'open' ) {
2951 - # count opening brace characters
2952 - $count = strspn( $text, $curChar, $i );
2953 -
2954 - # we need to add to stack only if opening brace count is enough for one of the rules
2955 - if ( $count >= $rule['min'] ) {
2956 - # Add it to the stack
2957 - $piece = array(
2958 - 'open' => $curChar,
2959 - 'close' => $rule['end'],
2960 - 'count' => $count,
2961 - 'parts' => array( '' ),
2962 - 'eqpos' => array(),
2963 - 'lineStart' => ($i > 0 && $text[$i-1] == "\n"),
2964 - );
2965 -
2966 - $stack->push( $piece );
2967 - $accum =& $stack->getAccum();
2968 - extract( $stack->getFlags() );
2969 - } else {
2970 - # Add literal brace(s)
2971 - $accum .= htmlspecialchars( str_repeat( $curChar, $count ) );
2972 - }
2973 - $i += $count;
2974 - }
2975 -
2976 - elseif ( $found == 'close' ) {
2977 - $piece = $stack->top;
2978 - # lets check if there are enough characters for closing brace
2979 - $maxCount = $piece->count;
2980 - $count = strspn( $text, $curChar, $i, $maxCount );
2981 -
2982 - # check for maximum matching characters (if there are 5 closing
2983 - # characters, we will probably need only 3 - depending on the rules)
2984 - $matchingCount = 0;
2985 - $rule = $rules[$piece->open];
2986 - if ( $count > $rule['max'] ) {
2987 - # The specified maximum exists in the callback array, unless the caller
2988 - # has made an error
2989 - $matchingCount = $rule['max'];
2990 - } else {
2991 - # Count is less than the maximum
2992 - # Skip any gaps in the callback array to find the true largest match
2993 - # Need to use array_key_exists not isset because the callback can be null
2994 - $matchingCount = $count;
2995 - while ( $matchingCount > 0 && !array_key_exists( $matchingCount, $rule['names'] ) ) {
2996 - --$matchingCount;
2997 - }
2998 - }
2999 -
3000 - if ($matchingCount <= 0) {
3001 - # No matching element found in callback array
3002 - # Output a literal closing brace and continue
3003 - $accum .= htmlspecialchars( str_repeat( $curChar, $count ) );
3004 - $i += $count;
3005 - continue;
3006 - }
3007 - $name = $rule['names'][$matchingCount];
3008 - if ( $name === null ) {
3009 - // No element, just literal text
3010 - $element = str_repeat( $piece->open, $matchingCount ) .
3011 - implode( '|', $piece->parts ) .
3012 - str_repeat( $rule['end'], $matchingCount );
3013 - } else {
3014 - # Create XML element
3015 - # Note: $parts is already XML, does not need to be encoded further
3016 - $parts = $piece->parts;
3017 - $title = $parts[0];
3018 - unset( $parts[0] );
3019 -
3020 - # The invocation is at the start of the line if lineStart is set in
3021 - # the stack, and all opening brackets are used up.
3022 - if ( $maxCount == $matchingCount && !empty( $piece->lineStart ) ) {
3023 - $attr = ' lineStart="1"';
3024 - } else {
3025 - $attr = '';
3026 - }
3027 -
3028 - $element = "<$name$attr>";
3029 - $element .= "<title>$title</title>";
3030 - $argIndex = 1;
3031 - foreach ( $parts as $partIndex => $part ) {
3032 - if ( isset( $piece->eqpos[$partIndex] ) ) {
3033 - $eqpos = $piece->eqpos[$partIndex];
3034 - $argName = substr( $part, 0, $eqpos );
3035 - $argValue = substr( $part, $eqpos + 1 );
3036 - $element .= "<part><name>$argName</name>=<value>$argValue</value></part>";
3037 - } else {
3038 - $element .= "<part><name index=\"$argIndex\" /><value>$part</value></part>";
3039 - $argIndex++;
3040 - }
3041 - }
3042 - $element .= "</$name>";
3043 - }
3044 -
3045 - # Advance input pointer
3046 - $i += $matchingCount;
3047 -
3048 - # Unwind the stack
3049 - $stack->pop();
3050 - $accum =& $stack->getAccum();
3051 -
3052 - # Re-add the old stack element if it still has unmatched opening characters remaining
3053 - if ($matchingCount < $piece->count) {
3054 - $piece->parts = array( '' );
3055 - $piece->count -= $matchingCount;
3056 - $piece->eqpos = array();
3057 - # do we still qualify for any callback with remaining count?
3058 - $names = $rules[$piece->open]['names'];
3059 - $skippedBraces = 0;
3060 - $enclosingAccum =& $accum;
3061 - while ( $piece->count ) {
3062 - if ( array_key_exists( $piece->count, $names ) ) {
3063 - $stack->push( $piece );
3064 - $accum =& $stack->getAccum();
3065 - break;
3066 - }
3067 - --$piece->count;
3068 - $skippedBraces ++;
3069 - }
3070 - $enclosingAccum .= str_repeat( $piece->open, $skippedBraces );
3071 - }
3072 -
3073 - extract( $stack->getFlags() );
3074 -
3075 - # Add XML element to the enclosing accumulator
3076 - $accum .= $element;
3077 - }
3078 -
3079 - elseif ( $found == 'pipe' ) {
3080 - $findEquals = true; // shortcut for getFlags()
3081 - $stack->top->addPart();
3082 - $accum =& $stack->getAccum();
3083 - ++$i;
3084 - }
3085 -
3086 - elseif ( $found == 'equals' ) {
3087 - $findEquals = false; // shortcut for getFlags()
3088 - $partsCount = count( $stack->top->parts );
3089 - $stack->top->eqpos[$partsCount - 1] = strlen( $accum );
3090 - $accum .= '=';
3091 - ++$i;
3092 - }
3093 - }
3094 -
3095 - # Output any remaining unclosed brackets
3096 - foreach ( $stack->stack as $piece ) {
3097 - if ( $piece->open == "\n" ) {
3098 - $stack->topAccum .= $piece->parts[0];
3099 - } else {
3100 - $stack->topAccum .= str_repeat( $piece->open, $piece->count ) . implode( '|', $piece->parts );
3101 - }
3102 - }
3103 - $stack->topAccum .= '</root>';
3104 - $xml = $stack->topAccum;
3105 -
3106 - wfProfileOut( __METHOD__.'-makexml' );
3107 - wfProfileIn( __METHOD__.'-loadXML' );
3108 - $dom = new DOMDocument;
3109 - wfSuppressWarnings();
3110 - $result = $dom->loadXML( $xml );
3111 - wfRestoreWarnings();
3112 - if ( !$result ) {
3113 - // Try running the XML through UtfNormal to get rid of invalid characters
3114 - $xml = UtfNormal::cleanUp( $xml );
3115 - $result = $dom->loadXML( $xml );
3116 - if ( !$result ) {
3117 - throw new MWException( __METHOD__.' generated invalid XML' );
3118 - }
3119 - }
3120 - wfProfileOut( __METHOD__.'-loadXML' );
3121 - wfProfileOut( __METHOD__ );
 2616+ $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
31222617 return $dom;
31232618 }
31242619
@@ -3162,7 +2657,7 @@
31632658 wfProfileIn( $fname );
31642659
31652660 if ( $frame === false ) {
3166 - $frame = new PPFrame( $this );
 2661+ $frame = $this->getPreprocessor()->newFrame();
31672662 } elseif ( !( $frame instanceof PPFrame ) ) {
31682663 throw new MWException( __METHOD__ . ' called using the old argument format' );
31692664 }
@@ -3203,9 +2698,9 @@
32042699 * replacing any variables or templates within the template.
32052700 *
32062701 * @param array $piece The parts of the template
3207 - * $piece['text']: matched text
32082702 * $piece['title']: the title, i.e. the part before the |
32092703 * $piece['parts']: the parameter array
 2704+ * $piece['lineStart']: whether the brace was at the start of a line
32102705 * @param PPFrame The current frame, contains template arguments
32112706 * @return string the text of the template
32122707 * @private
@@ -3221,7 +2716,8 @@
32222717 $nowiki = false; # wiki markup in $text should be escaped
32232718 $isHTML = false; # $text is HTML, armour it against wikitext transformation
32242719 $forceRawInterwiki = false; # Force interwiki transclusion to be done in raw mode not rendered
3225 - $isDOM = false; # $text is a DOM node needing expansion
 2720+ $isChildObj = false; # $text is a DOM node needing expansion in a child frame
 2721+ $isLocalObj = false; # $text is a DOM node needing expansion in the current frame
32262722
32272723 # Title object, where $text came from
32282724 $title = NULL;
@@ -3248,13 +2744,14 @@
32492745 # 1) Found SUBST but not in the PST phase
32502746 # 2) Didn't find SUBST and in the PST phase
32512747 # In either case, return without further processing
3252 - $text = '{{' . $frame->implode( '|', $titleWithSpaces, $args ) . '}}';
 2748+ $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
 2749+ $isLocalObj = true;
32532750 $found = true;
32542751 }
32552752 }
32562753
32572754 # Variables
3258 - if ( !$found && $args->length == 0 ) {
 2755+ if ( !$found && $args->getLength() == 0 ) {
32592756 $id = $this->mVariables->matchStartToEnd( $part1 );
32602757 if ( $id !== false ) {
32612758 $text = $this->getVariableValue( $id );
@@ -3311,14 +2808,14 @@
33122809 # Add a frame parameter, and pass the arguments as an array
33132810 $allArgs = $initialArgs;
33142811 $allArgs[] = $frame;
3315 - foreach ( $args as $arg ) {
3316 - $funcArgs[] = $arg;
 2812+ for ( $i = 0; $i < $args->getLength(); $i++ ) {
 2813+ $funcArgs[] = $args->item( $i );
33172814 }
33182815 $allArgs[] = $funcArgs;
33192816 } else {
33202817 # Convert arguments to plain text
3321 - foreach ( $args as $arg ) {
3322 - $funcArgs[] = trim( $frame->expand( $arg ) );
 2818+ for ( $i = 0; $i < $args->getLength(); $i++ ) {
 2819+ $funcArgs[] = trim( $frame->expand( $args->item( $i ) ) );
33232820 }
33242821 $allArgs = array_merge( $initialArgs, $funcArgs );
33252822 }
@@ -3393,7 +2890,7 @@
33942891 list( $text, $title ) = $this->getTemplateDom( $title );
33952892 if ( $text !== false ) {
33962893 $found = true;
3397 - $isDOM = true;
 2894+ $isChildObj = true;
33982895 }
33992896 }
34002897
@@ -3411,7 +2908,7 @@
34122909 $text = $this->interwikiTransclude( $title, 'raw' );
34132910 // Preprocess it like a template
34142911 $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3415 - $isDOM = true;
 2912+ $isChildObj = true;
34162913 }
34172914 $found = true;
34182915 }
@@ -3421,13 +2918,13 @@
34222919 # If we haven't found text to substitute by now, we're done
34232920 # Recover the source wikitext and return it
34242921 if ( !$found ) {
3425 - $text = '{{' . $frame->implode( '|', $titleWithSpaces, $args ) . '}}';
 2922+ $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
34262923 wfProfileOut( $fname );
3427 - return $text;
 2924+ return array( 'object' => $text );
34282925 }
34292926
34302927 # Expand DOM-style return values in a child frame
3431 - if ( $isDOM ) {
 2928+ if ( $isChildObj ) {
34322929 # Clean up argument array
34332930 $newFrame = $frame->newChild( $args, $title );
34342931
@@ -3458,18 +2955,24 @@
34592956 # Bug 529: if the template begins with a table or block-level
34602957 # element, it should be treated as beginning a new line.
34612958 # This behaviour is somewhat controversial.
3462 - elseif ( !$piece['lineStart'] && preg_match('/^(?:{\\||:|;|#|\*)/', $text)) /*}*/{
 2959+ elseif ( is_string( $text ) && !$piece['lineStart'] && preg_match('/^(?:{\\||:|;|#|\*)/', $text)) /*}*/{
34632960 $text = "\n" . $text;
34642961 }
34652962
3466 - if ( !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
 2963+ if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
34672964 # Error, oversize inclusion
34682965 $text = "[[$originalTitle]]" .
34692966 $this->insertStripItem( '<!-- WARNING: template omitted, post-expand include size too large -->' );
34702967 }
34712968
 2969+ if ( $isLocalObj ) {
 2970+ $ret = array( 'object' => $text );
 2971+ } else {
 2972+ $ret = array( 'text' => $text );
 2973+ }
 2974+
34722975 wfProfileOut( $fname );
3473 - return $text;
 2976+ return $ret;
34742977 }
34752978
34762979 /**
@@ -3639,26 +3142,31 @@
36403143 $parts = $piece['parts'];
36413144 $nameWithSpaces = $frame->expand( $piece['title'] );
36423145 $argName = trim( $nameWithSpaces );
3643 -
 3146+ $object = false;
36443147 $text = $frame->getArgument( $argName );
3645 - if ( $text === false && ( $this->ot['html'] || $this->ot['pre'] ) && $parts->length > 0 ) {
 3148+ if ( $text === false && ( $this->ot['html'] || $this->ot['pre'] ) && $parts->getLength() > 0 ) {
36463149 # No match in frame, use the supplied default
3647 - $text = $frame->expand( $parts->item( 0 ) );
 3150+ $object = $parts->item( 0 )->getChildren();
36483151 }
36493152 if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
36503153 $error = '<!-- WARNING: argument omitted, expansion size too large -->';
36513154 }
36523155
3653 - if ( $text === false ) {
 3156+ if ( $text === false && $object === false ) {
36543157 # No match anywhere
3655 - $text = '{{{' . $frame->implode( '|', $nameWithSpaces, $parts ) . '}}}';
 3158+ $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
36563159 }
36573160 if ( $error !== false ) {
36583161 $text .= $error;
36593162 }
 3163+ if ( $object !== false ) {
 3164+ $ret = array( 'object' => $object );
 3165+ } else {
 3166+ $ret = array( 'text' => $text );
 3167+ }
36603168
36613169 wfProfileOut( __METHOD__ );
3662 - return $text;
 3170+ return $ret;
36633171 }
36643172
36653173 /**
@@ -3666,8 +3174,8 @@
36673175 * This is the ghost of strip().
36683176 *
36693177 * @param array $params Associative array of parameters:
3670 - * name DOMNode for the tag name
3671 - * attr DOMNode for unparsed text where tag attributes are thought to be
 3178+ * name PPNode for the tag name
 3179+ * attr PPNode for unparsed text where tag attributes are thought to be
36723180 * attributes Optional associative array of parsed attributes
36733181 * inner Contents of extension element
36743182 * noClose Original text did not have a close tag
@@ -4252,8 +3760,8 @@
42533761 $text = preg_replace( $substRegex, $substText, $text );
42543762 $text = $this->cleanSigInSig( $text );
42553763 $dom = $this->preprocessToDom( $text );
4256 - $frame = new PPFrame( $this );
4257 - $text = $frame->expand( $dom->documentElement );
 3764+ $frame = $this->getPreprocessor()->newFrame();
 3765+ $text = $frame->expand( $dom );
42583766
42593767 if ( !$parsing ) {
42603768 $text = $this->mStripState->unstripBoth( $text );
@@ -5026,7 +4534,7 @@
50274535 $this->setOutputType( OT_WIKI );
50284536 $curIndex = 0;
50294537 $outText = '';
5030 - $frame = new PPFrame( $this );
 4538+ $frame = $this->getPreprocessor()->newFrame();
50314539
50324540 // Process section extraction flags
50334541 $flags = 0;
@@ -5038,12 +4546,11 @@
50394547 }
50404548 }
50414549 // Preprocess the text
5042 - $dom = $this->preprocessToDom( $text, $flags );
5043 - $root = $dom->documentElement;
 4550+ $root = $this->preprocessToDom( $text, $flags );
50444551
50454552 // <h> nodes indicate section breaks
50464553 // They can only occur at the top level, so we can find them by iterating the root's children
5047 - $node = $root->firstChild;
 4554+ $node = $root->getFirstChild();
50484555
50494556 // Find the target section
50504557 if ( $sectionIndex == 0 ) {
@@ -5051,7 +4558,7 @@
50524559 $targetLevel = 1000;
50534560 } else {
50544561 while ( $node ) {
5055 - if ( $node->nodeName == 'h' ) {
 4562+ if ( $node->getName() == 'h' ) {
50564563 if ( $curIndex + 1 == $sectionIndex ) {
50574564 break;
50584565 }
@@ -5060,10 +4567,11 @@
50614568 if ( $mode == 'replace' ) {
50624569 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
50634570 }
5064 - $node = $node->nextSibling;
 4571+ $node = $node->getNextSibling();
50654572 }
50664573 if ( $node ) {
5067 - $targetLevel = $node->getAttribute( 'level' );
 4574+ $bits = $node->splitHeading();
 4575+ $targetLevel = $bits['level'];
50684576 }
50694577 }
50704578
@@ -5078,9 +4586,10 @@
50794587
50804588 // Find the end of the section, including nested sections
50814589 do {
5082 - if ( $node->nodeName == 'h' ) {
 4590+ if ( $node->getName() == 'h' ) {
50834591 $curIndex++;
5084 - $curLevel = $node->getAttribute( 'level' );
 4592+ $bits = $node->splitHeading();
 4593+ $curLevel = $bits['level'];
50854594 if ( $curIndex != $sectionIndex && $curLevel <= $targetLevel ) {
50864595 break;
50874596 }
@@ -5088,7 +4597,7 @@
50894598 if ( $mode == 'get' ) {
50904599 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
50914600 }
5092 - $node = $node->nextSibling;
 4601+ $node = $node->getNextSibling();
50934602 } while ( $node );
50944603
50954604 // Write out the remainder (in replace mode only)
@@ -5099,7 +4608,7 @@
51004609 $outText .= $newText . "\n\n";
51014610 while ( $node ) {
51024611 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5103 - $node = $node->nextSibling;
 4612+ $node = $node->getNextSibling();
51044613 }
51054614 }
51064615
@@ -5242,15 +4751,32 @@
52434752 return $text;
52444753 }
52454754
 4755+ function srvus( $text ) {
 4756+ return $this->testSrvus( $text, $this->mOutputType );
 4757+ }
 4758+
52464759 /**
52474760 * strip/replaceVariables/unstrip for preprocessor regression testing
52484761 */
5249 - function srvus( $text ) {
 4762+ function testSrvus( $text, $title, $options, $outputType = OT_HTML ) {
 4763+ $this->clearState();
 4764+ $this->mTitle = $title;
 4765+ $this->mOptions = $options;
 4766+ $this->setOutputType( $outputType );
52504767 $text = $this->replaceVariables( $text );
52514768 $text = $this->mStripState->unstripBoth( $text );
52524769 $text = Sanitizer::removeHTMLtags( $text );
52534770 return $text;
52544771 }
 4772+
 4773+ function testPst( $text, $title, $options ) {
 4774+ global $wgUser;
 4775+ return $this->preSaveTransform( $text, $title, $wgUser, $options );
 4776+ }
 4777+
 4778+ function testPreprocess( $text, $title, $options ) {
 4779+ return $this->testSrvus( $text, $title, $options, OT_PREPROCESS );
 4780+ }
52554781 }
52564782
52574783 /**
@@ -5313,456 +4839,3 @@
53144840 }
53154841 }
53164842
5317 -/**
5318 - * An expansion frame, used as a context to expand the result of preprocessToDom()
5319 - */
5320 -class PPFrame {
5321 - var $parser, $title;
5322 - var $titleCache;
5323 -
5324 - /**
5325 - * Hashtable listing templates which are disallowed for expansion in this frame,
5326 - * having been encountered previously in parent frames.
5327 - */
5328 - var $loopCheckHash;
5329 -
5330 - /**
5331 - * Recursion depth of this frame, top = 0
5332 - */
5333 - var $depth;
5334 -
5335 - const NO_ARGS = 1;
5336 - const NO_TEMPLATES = 2;
5337 - const STRIP_COMMENTS = 4;
5338 - const NO_IGNORE = 8;
5339 -
5340 - const RECOVER_ORIG = 11;
5341 -
5342 - /**
5343 - * Construct a new preprocessor frame.
5344 - * @param Parser $parser The parent parser
5345 - * @param Title $title The context title, or false if there isn't one
5346 - */
5347 - function __construct( $parser ) {
5348 - $this->parser = $parser;
5349 - $this->title = $parser->mTitle;
5350 - $this->titleCache = array( $this->title ? $this->title->getPrefixedDBkey() : false );
5351 - $this->loopCheckHash = array();
5352 - $this->depth = 0;
5353 - }
5354 -
5355 - /**
5356 - * Create a new child frame
5357 - * $args is optionally a DOMNodeList containing the template arguments
5358 - */
5359 - function newChild( $args = false, $title = false ) {
5360 - $namedArgs = array();
5361 - $numberedArgs = array();
5362 - if ( $title === false ) {
5363 - $title = $this->title;
5364 - }
5365 - if ( $args !== false ) {
5366 - $xpath = false;
5367 - foreach ( $args as $arg ) {
5368 - if ( !$xpath ) {
5369 - $xpath = new DOMXPath( $arg->ownerDocument );
5370 - }
5371 -
5372 - $nameNodes = $xpath->query( 'name', $arg );
5373 - $value = $xpath->query( 'value', $arg );
5374 - if ( $nameNodes->item( 0 )->hasAttributes() ) {
5375 - // Numbered parameter
5376 - $index = $nameNodes->item( 0 )->attributes->getNamedItem( 'index' )->textContent;
5377 - $numberedArgs[$index] = $value->item( 0 );
5378 - unset( $namedArgs[$index] );
5379 - } else {
5380 - // Named parameter
5381 - $name = trim( $this->expand( $nameNodes->item( 0 ), PPFrame::STRIP_COMMENTS ) );
5382 - $namedArgs[$name] = $value->item( 0 );
5383 - unset( $numberedArgs[$name] );
5384 - }
5385 - }
5386 - }
5387 - return new PPTemplateFrame( $this->parser, $this, $numberedArgs, $namedArgs, $title );
5388 - }
5389 -
5390 - /**
5391 - * Expand a DOMNode describing a preprocessed document into plain wikitext,
5392 - * using the current context
5393 - * @param $root the node
5394 - */
5395 - function expand( $root, $flags = 0 ) {
5396 - if ( is_string( $root ) ) {
5397 - return $root;
5398 - }
5399 -
5400 - if ( ++$this->parser->mPPNodeCount > $this->parser->mOptions->mMaxPPNodeCount )
5401 - {
5402 - return '<span class="error">Node-count limit exceeded</span>';
5403 - }
5404 -
5405 - if ( is_array( $root ) || $root instanceof DOMNodeList ) {
5406 - $s = '';
5407 - foreach ( $root as $node ) {
5408 - $s .= $this->expand( $node, $flags );
5409 - }
5410 - } elseif ( $root instanceof DOMNode ) {
5411 - if ( $root->nodeType == XML_TEXT_NODE ) {
5412 - $s = $root->nodeValue;
5413 - } elseif ( $root->nodeName == 'template' ) {
5414 - # Double-brace expansion
5415 - $xpath = new DOMXPath( $root->ownerDocument );
5416 - $titles = $xpath->query( 'title', $root );
5417 - $title = $titles->item( 0 );
5418 - $parts = $xpath->query( 'part', $root );
5419 - if ( $flags & self::NO_TEMPLATES ) {
5420 - $s = '{{' . $this->implodeWithFlags( '|', $flags, $title, $parts ) . '}}';
5421 - } else {
5422 - $lineStart = $root->getAttribute( 'lineStart' );
5423 - $params = array(
5424 - 'title' => $title,
5425 - 'parts' => $parts,
5426 - 'lineStart' => $lineStart,
5427 - 'text' => 'FIXME' );
5428 - $s = $this->parser->braceSubstitution( $params, $this );
5429 - }
5430 - } elseif ( $root->nodeName == 'tplarg' ) {
5431 - # Triple-brace expansion
5432 - $xpath = new DOMXPath( $root->ownerDocument );
5433 - $titles = $xpath->query( 'title', $root );
5434 - $title = $titles->item( 0 );
5435 - $parts = $xpath->query( 'part', $root );
5436 - if ( $flags & self::NO_ARGS ) {
5437 - $s = '{{{' . $this->implodeWithFlags( '|', $flags, $title, $parts ) . '}}}';
5438 - } else {
5439 - $params = array( 'title' => $title, 'parts' => $parts, 'text' => 'FIXME' );
5440 - $s = $this->parser->argSubstitution( $params, $this );
5441 - }
5442 - } elseif ( $root->nodeName == 'comment' ) {
5443 - # HTML-style comment
5444 - if ( $this->parser->ot['html']
5445 - || ( $this->parser->ot['pre'] && $this->parser->mOptions->getRemoveComments() )
5446 - || ( $flags & self::STRIP_COMMENTS ) )
5447 - {
5448 - $s = '';
5449 - } else {
5450 - $s = $root->textContent;
5451 - }
5452 - } elseif ( $root->nodeName == 'ignore' ) {
5453 - # Output suppression used by <includeonly> etc.
5454 - # OT_WIKI will only respect <ignore> in substed templates.
5455 - # The other output types respect it unless NO_IGNORE is set.
5456 - # extractSections() sets NO_IGNORE and so never respects it.
5457 - if ( ( !isset( $this->parent ) && $this->parser->ot['wiki'] ) || ( $flags & self::NO_IGNORE ) ) {
5458 - $s = $root->textContent;
5459 - } else {
5460 - $s = '';
5461 - }
5462 - } elseif ( $root->nodeName == 'ext' ) {
5463 - # Extension tag
5464 - $xpath = new DOMXPath( $root->ownerDocument );
5465 - $names = $xpath->query( 'name', $root );
5466 - $attrs = $xpath->query( 'attr', $root );
5467 - $inners = $xpath->query( 'inner', $root );
5468 - $closes = $xpath->query( 'close', $root );
5469 - $params = array(
5470 - 'name' => $names->item( 0 ),
5471 - 'attr' => $attrs->length > 0 ? $attrs->item( 0 ) : null,
5472 - 'inner' => $inners->length > 0 ? $inners->item( 0 ) : null,
5473 - 'close' => $closes->length > 0 ? $closes->item( 0 ) : null,
5474 - );
5475 - $s = $this->parser->extensionSubstitution( $params, $this );
5476 - } elseif ( $root->nodeName == 'h' ) {
5477 - # Heading
5478 - $s = $this->expand( $root->childNodes, $flags );
5479 -
5480 - if ( $this->parser->ot['html'] ) {
5481 - # Insert heading index marker
5482 - $headingIndex = $root->getAttribute( 'i' );
5483 - $titleText = $this->title->getPrefixedDBkey();
5484 - $this->parser->mHeadings[] = array( $titleText, $headingIndex );
5485 - $serial = count( $this->parser->mHeadings ) - 1;
5486 - $marker = "{$this->parser->mUniqPrefix}-h-$serial-{$this->parser->mMarkerSuffix}";
5487 - $count = $root->getAttribute( 'level' );
5488 - $s = substr( $s, 0, $count ) . $marker . substr( $s, $count );
5489 - $this->parser->mStripState->general->setPair( $marker, '' );
5490 - }
5491 - } else {
5492 - # Generic recursive expansion
5493 - $s = '';
5494 - for ( $node = $root->firstChild; $node; $node = $node->nextSibling ) {
5495 - if ( $node->nodeType == XML_TEXT_NODE ) {
5496 - $s .= $node->nodeValue;
5497 - } elseif ( $node->nodeType == XML_ELEMENT_NODE ) {
5498 - $s .= $this->expand( $node, $flags );
5499 - }
5500 - }
5501 - }
5502 - } else {
5503 - throw new MWException( __METHOD__.': Invalid parameter type' );
5504 - }
5505 - return $s;
5506 - }
5507 -
5508 - function implodeWithFlags( $sep, $flags /*, ... */ ) {
5509 - $args = array_slice( func_get_args(), 2 );
5510 -
5511 - $first = true;
5512 - $s = '';
5513 - foreach ( $args as $root ) {
5514 - if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) {
5515 - $root = array( $root );
5516 - }
5517 - foreach ( $root as $node ) {
5518 - if ( $first ) {
5519 - $first = false;
5520 - } else {
5521 - $s .= $sep;
5522 - }
5523 - $s .= $this->expand( $node, $flags );
5524 - }
5525 - }
5526 - return $s;
5527 - }
5528 -
5529 - function implode( $sep /*, ... */ ) {
5530 - $args = func_get_args();
5531 - $args = array_merge( array_slice( $args, 0, 1 ), array( 0 ), array_slice( $args, 1 ) );
5532 - return call_user_func_array( array( $this, 'implodeWithFlags' ), $args );
5533 - }
5534 -
5535 - /**
5536 - * Split an <arg> or <template> node into a three-element array:
5537 - * DOMNode name, string index and DOMNode value
5538 - */
5539 - function splitBraceNode( $node ) {
5540 - $xpath = new DOMXPath( $node->ownerDocument );
5541 - $names = $xpath->query( 'name', $node );
5542 - $values = $xpath->query( 'value', $node );
5543 - if ( !$names->length || !$values->length ) {
5544 - throw new MWException( 'Invalid brace node passed to ' . __METHOD__ );
5545 - }
5546 - $name = $names->item( 0 );
5547 - $index = $name->getAttribute( 'index' );
5548 - return array( $name, $index, $values->item( 0 ) );
5549 - }
5550 -
5551 - /**
5552 - * Split an <ext> node into an associative array containing name, attr, inner and close
5553 - * All values in the resulting array are DOMNodes. Inner and close are optional.
5554 - */
5555 - function splitExtNode( $node ) {
5556 - $xpath = new DOMXPath( $node->ownerDocument );
5557 - $names = $xpath->query( 'name', $node );
5558 - $attrs = $xpath->query( 'attr', $node );
5559 - $inners = $xpath->query( 'inner', $node );
5560 - $closes = $xpath->query( 'close', $node );
5561 - if ( !$names->length || !$attrs->length ) {
5562 - throw new MWException( 'Invalid ext node passed to ' . __METHOD__ );
5563 - }
5564 - $parts = array(
5565 - 'name' => $names->item( 0 ),
5566 - 'attr' => $attrs->item( 0 ) );
5567 - if ( $inners->length ) {
5568 - $parts['inner'] = $inners->item( 0 );
5569 - }
5570 - if ( $closes->length ) {
5571 - $parts['close'] = $closes->item( 0 );
5572 - }
5573 - return $parts;
5574 - }
5575 -
5576 - function __toString() {
5577 - return 'frame{}';
5578 - }
5579 -
5580 - function getPDBK( $level = false ) {
5581 - if ( $level === false ) {
5582 - return $this->title->getPrefixedDBkey();
5583 - } else {
5584 - return isset( $this->titleCache[$level] ) ? $this->titleCache[$level] : false;
5585 - }
5586 - }
5587 -
5588 - /**
5589 - * Returns true if there are no arguments in this frame
5590 - */
5591 - function isEmpty() {
5592 - return true;
5593 - }
5594 -
5595 - function getArgument( $name ) {
5596 - return false;
5597 - }
5598 -
5599 - /**
5600 - * Returns true if the infinite loop check is OK, false if a loop is detected
5601 - */
5602 - function loopCheck( $title ) {
5603 - return !isset( $this->loopCheckHash[$title->getPrefixedDBkey()] );
5604 - }
5605 -}
5606 -
5607 -/**
5608 - * Expansion frame with template arguments
5609 - */
5610 -class PPTemplateFrame extends PPFrame {
5611 - var $numberedArgs, $namedArgs, $parent;
5612 - var $numberedExpansionCache, $namedExpansionCache;
5613 -
5614 - function __construct( $parser, $parent = false, $numberedArgs = array(), $namedArgs = array(), $title = false ) {
5615 - $this->parser = $parser;
5616 - $this->parent = $parent;
5617 - $this->numberedArgs = $numberedArgs;
5618 - $this->namedArgs = $namedArgs;
5619 - $this->title = $title;
5620 - $pdbk = $title ? $title->getPrefixedDBkey() : false;
5621 - $this->titleCache = $parent->titleCache;
5622 - $this->titleCache[] = $pdbk;
5623 - $this->loopCheckHash = /*clone*/ $parent->loopCheckHash;
5624 - if ( $pdbk !== false ) {
5625 - $this->loopCheckHash[$pdbk] = true;
5626 - }
5627 - $this->depth = $parent->depth + 1;
5628 - $this->numberedExpansionCache = $this->namedExpansionCache = array();
5629 - }
5630 -
5631 - function __toString() {
5632 - $s = 'tplframe{';
5633 - $first = true;
5634 - $args = $this->numberedArgs + $this->namedArgs;
5635 - foreach ( $args as $name => $value ) {
5636 - if ( $first ) {
5637 - $first = false;
5638 - } else {
5639 - $s .= ', ';
5640 - }
5641 - $s .= "\"$name\":\"" .
5642 - str_replace( '"', '\\"', $value->ownerDocument->saveXML( $value ) ) . '"';
5643 - }
5644 - $s .= '}';
5645 - return $s;
5646 - }
5647 - /**
5648 - * Returns true if there are no arguments in this frame
5649 - */
5650 - function isEmpty() {
5651 - return !count( $this->numberedArgs ) && !count( $this->namedArgs );
5652 - }
5653 -
5654 - function getNumberedArgument( $index ) {
5655 - if ( !isset( $this->numberedArgs[$index] ) ) {
5656 - return false;
5657 - }
5658 - if ( !isset( $this->numberedExpansionCache[$index] ) ) {
5659 - # No trimming for unnamed arguments
5660 - $this->numberedExpansionCache[$index] = $this->parent->expand( $this->numberedArgs[$index], self::STRIP_COMMENTS );
5661 - }
5662 - return $this->numberedExpansionCache[$index];
5663 - }
5664 -
5665 - function getNamedArgument( $name ) {
5666 - if ( !isset( $this->namedArgs[$name] ) ) {
5667 - return false;
5668 - }
5669 - if ( !isset( $this->namedExpansionCache[$name] ) ) {
5670 - # Trim named arguments post-expand, for backwards compatibility
5671 - $this->namedExpansionCache[$name] = trim(
5672 - $this->parent->expand( $this->namedArgs[$name], self::STRIP_COMMENTS ) );
5673 - }
5674 - return $this->namedExpansionCache[$name];
5675 - }
5676 -
5677 - function getArgument( $name ) {
5678 - $text = $this->getNumberedArgument( $name );
5679 - if ( $text === false ) {
5680 - $text = $this->getNamedArgument( $name );
5681 - }
5682 - return $text;
5683 - }
5684 -}
5685 -
5686 -/**
5687 - * Stack class to help Parser::preprocessToDom()
5688 - */
5689 -class PPDStack {
5690 - var $stack, $topAccum, $top;
5691 -
5692 - function __construct() {
5693 - $this->stack = array();
5694 - $this->topAccum = '';
5695 - $this->top = false;
5696 - }
5697 -
5698 - function &getAccum() {
5699 - if ( count( $this->stack ) ) {
5700 - return $this->top->getAccum();
5701 - } else {
5702 - return $this->topAccum;
5703 - }
5704 - }
5705 -
5706 - function push( $data ) {
5707 - if ( $data instanceof PPDStackElement ) {
5708 - $this->stack[] = $data;
5709 - } else {
5710 - $this->stack[] = new PPDStackElement( $data );
5711 - }
5712 - $this->top =& $this->stack[ count( $this->stack ) - 1 ];
5713 - }
5714 -
5715 - function pop() {
5716 - if ( !count( $this->stack ) ) {
5717 - throw new MWException( __METHOD__.': no elements remaining' );
5718 - }
5719 - $temp = array_pop( $this->stack );
5720 - if ( count( $this->stack ) ) {
5721 - $this->top =& $this->stack[ count( $this->stack ) - 1 ];
5722 - } else {
5723 - $this->top = false;
5724 - }
5725 - }
5726 -
5727 - function getFlags() {
5728 - if ( !count( $this->stack ) ) {
5729 - return array(
5730 - 'findEquals' => false,
5731 - 'findPipe' => false,
5732 - 'inHeading' => false,
5733 - );
5734 - } else {
5735 - return $this->top->getFlags();
5736 - }
5737 - }
5738 -}
5739 -
5740 -class PPDStackElement {
5741 - var $open, $close, $count, $parts, $eqpos, $lineStart;
5742 -
5743 - function __construct( $data = array() ) {
5744 - $this->parts = array( '' );
5745 - $this->eqpos = array();
5746 -
5747 - foreach ( $data as $name => $value ) {
5748 - $this->$name = $value;
5749 - }
5750 - }
5751 -
5752 - function &getAccum() {
5753 - return $this->parts[count($this->parts) - 1];
5754 - }
5755 -
5756 - function addPart( $s = '' ) {
5757 - $this->parts[] = $s;
5758 - }
5759 -
5760 - function getFlags() {
5761 - $partCount = count( $this->parts );
5762 - $findPipe = $this->open != "\n" && $this->open != '[';
5763 - return array(
5764 - 'findPipe' => $findPipe,
5765 - 'findEquals' => $findPipe && $partCount > 1 && !isset( $this->eqpos[$partCount - 1] ),
5766 - 'inHeading' => $this->open == "\n",
5767 - );
5768 - }
5769 -}
Index: trunk/phase3/includes/Preprocessor.php
@@ -0,0 +1,74 @@
 2+<?php
 3+
 4+interface Preprocessor {
 5+ function __construct( $parser );
 6+ function newFrame();
 7+ function preprocessToObj( $text, $flags = 0 );
 8+}
 9+
 10+interface PPFrame {
 11+ const NO_ARGS = 1;
 12+ const NO_TEMPLATES = 2;
 13+ const STRIP_COMMENTS = 4;
 14+ const NO_IGNORE = 8;
 15+
 16+ const RECOVER_ORIG = 11;
 17+
 18+ /**
 19+ * Create a child frame
 20+ */
 21+ function newChild( $args = false, $title = false );
 22+
 23+ /**
 24+ * Expand a document tree node
 25+ */
 26+ function expand( $root, $flags = 0 );
 27+
 28+ /**
 29+ * Implode with flags for expand()
 30+ */
 31+ function implodeWithFlags( $sep, $flags /*, ... */ );
 32+
 33+ /**
 34+ * Implode with no flags specified
 35+ */
 36+ function implode( $sep /*, ... */ );
 37+
 38+ /**
 39+ * Makes an object that, when expand()ed, will be the same as one obtained
 40+ * with implode()
 41+ */
 42+ function virtualImplode( $sep /*, ... */ );
 43+
 44+ /**
 45+ * Virtual implode with brackets
 46+ */
 47+ function virtualBracketedImplode( $start, $sep, $end /*, ... */ );
 48+
 49+ /**
 50+ * Returns true if there are no arguments in this frame
 51+ */
 52+ function isEmpty();
 53+
 54+ function getArgument( $name );
 55+
 56+ /**
 57+ * Returns true if the infinite loop check is OK, false if a loop is detected
 58+ */
 59+ function loopCheck( $title );
 60+}
 61+
 62+interface PPNode {
 63+ function getChildren();
 64+ function getFirstChild();
 65+ function getNextSibling();
 66+ function getChildrenOfType( $type );
 67+ function getLength();
 68+ function item( $i );
 69+ function getName();
 70+
 71+ function splitArg();
 72+ function splitExt();
 73+ function splitHeading();
 74+}
 75+
Property changes on: trunk/phase3/includes/Preprocessor.php
___________________________________________________________________
Name: svn:eol-style
176 + native
Index: trunk/phase3/includes/AutoLoader.php
@@ -143,6 +143,15 @@
144144 'ParserOutput' => 'includes/ParserOutput.php',
145145 'ParserOptions' => 'includes/ParserOptions.php',
146146 'PatrolLog' => 'includes/PatrolLog.php',
 147+ 'Preprocessor' => 'includes/Preprocessor.php',
 148+ 'PPFrame' => 'includes/Preprocessor.php',
 149+ 'PPNode' => 'includes/Preprocessor.php',
 150+ 'Preprocessor_DOM' => 'includes/Preprocessor_DOM.php',
 151+ 'PPFrame_DOM' => 'includes/Preprocessor_DOM.php',
 152+ 'PPTemplateFrame_DOM' => 'includes/Preprocessor_DOM.php',
 153+ 'PPDStack' => 'includes/Preprocessor_DOM.php',
 154+ 'PPDStackElement' => 'includes/Preprocessor_DOM.php',
 155+ 'PPNode_DOM' => 'includes/Preprocessor_DOM.php',
147156 'ProfilerSimple' => 'includes/ProfilerSimple.php',
148157 'ProfilerSimpleUDP' => 'includes/ProfilerSimpleUDP.php',
149158 'Profiler' => 'includes/Profiler.php',
Index: trunk/extensions/ParserFunctions/ParserFunctions.php
@@ -206,7 +206,11 @@
207207 $lastItemHadNoEquals = false;
208208 $mwDefault =& MagicWord::get( 'default' );
209209 foreach ( $args as $arg ) {
210 - list( $nameNode, $index, $valueNode ) = $frame->splitBraceNode( $arg );
 210+ $bits = $arg->splitArg();
 211+ $nameNode = $bits['name'];
 212+ $index = $bits['index'];
 213+ $valueNode = $bits['value'];
 214+
211215 if ( $index === '' ) {
212216 # Found "="
213217 $lastItemHadNoEquals = false;
Index: trunk/extensions/LabeledSectionTransclusion/lst.php
@@ -339,14 +339,13 @@
340340 "<!-- WARNING: LST loop detected -->";
341341 }
342342
343 - list( $dom, $finalTitle ) = $parser->getTemplateDom( $title );
 343+ list( $root, $finalTitle ) = $parser->getTemplateDom( $title );
344344
345345 // if article doesn't exist, return a red link.
346 - if ($dom === false) {
 346+ if ($root === false) {
347347 return "[[" . $title->getPrefixedText() . "]]";
348348 }
349349
350 - $root = $dom->documentElement;
351350 $newFrame = $frame->newChild( false, $finalTitle );
352351 if ( !count( $args ) ) {
353352 return $newFrame->expand( $root );
@@ -408,15 +407,15 @@
409408 extract( $setup );
410409
411410 $text = '';
412 - $node = $root->firstChild;
 411+ $node = $root->getFirstChild();
413412 while ( $node ) {
414413 // Find the begin node
415414 $found = false;
416 - for ( ; $node; $node = $node->nextSibling ) {
417 - if ( $node->nodeName != 'ext' ) {
 415+ for ( ; $node; $node = $node->getNextSibling() ) {
 416+ if ( $node->getName() != 'ext' ) {
418417 continue;
419418 }
420 - $parts = $newFrame->splitExtNode( $node );
 419+ $parts = $node->splitExt();
421420 $parts = array_map( array( $newFrame, 'expand' ), $parts );
422421 if ( self::isSection( $parts['name'] ) ) {
423422 if ( preg_match( $beginRegex, $parts['attr'] ) ) {
@@ -431,9 +430,9 @@
432431
433432 // Write the text out while looking for the end node
434433 $found = false;
435 - for ( ; $node; $node = $node->nextSibling ) {
436 - if ( $node->nodeName === 'ext' ) {
437 - $parts = $newFrame->splitExtNode( $node );
 434+ for ( ; $node; $node = $node->getNextSibling() ) {
 435+ if ( $node->getName() === 'ext' ) {
 436+ $parts = $node->splitExt();
438437 $parts = array_map( array( $newFrame, 'expand' ), $parts );
439438 if ( self::isSection( $parts['name'] ) ) {
440439 if ( preg_match( $endRegex, $parts['attr'] ) ) {
@@ -451,7 +450,7 @@
452451 if ( !$found ) {
453452 break;
454453 }
455 - $node = $node->nextSibling;
 454+ $node = $node->getNextSibling();
456455 }
457456 return $text;
458457 }
@@ -487,12 +486,12 @@
488487 extract( $setup );
489488
490489 $text = '';
491 - for ( $node = $root->firstChild; $node; $node = $node ? $node->nextSibling : false ) {
 490+ for ( $node = $root->getFirstChild(); $node; $node = $node ? $node->getNextSibling() : false ) {
492491 // Search for the start tag
493492 $found = false;
494 - for ( ; $node; $node = $node->nextSibling ) {
495 - if ( $node->nodeName == 'ext' ) {
496 - $parts = $newFrame->splitExtNode( $node );
 493+ for ( ; $node; $node = $node->getNextSibling() ) {
 494+ if ( $node->getName() == 'ext' ) {
 495+ $parts = $node->splitExt();
497496 $parts = array_map( array( $newFrame, 'expand' ), $parts );
498497 if ( self::isSection( $parts['name'] ) ) {
499498 if ( preg_match( $beginRegex, $parts['attr'] ) ) {
@@ -516,9 +515,9 @@
517516 $text .= $repl;
518517
519518 // Search for the end tag
520 - for ( ; $node; $node = $node->nextSibling ) {
521 - if ( $node->nodeName == 'ext' ) {
522 - $parts = $newFrame->splitExtNode( $node );
 519+ for ( ; $node; $node = $node->getNextSibling() ) {
 520+ if ( $node->getName() == 'ext' ) {
 521+ $parts = $node->splitExt( $node );
523522 $parts = array_map( array( $newFrame, 'expand' ), $parts );
524523 if ( self::isSection( $parts['name'] ) ) {
525524 if ( preg_match( $endRegex, $parts['attr'] ) ) {

Follow-up revisions

RevisionCommit summaryAuthorDate
r70812Fix fixme placed on r30022 by me (fixing per comment)...reedy12:55, 10 August 2010

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r29950Fix for more than 6 equals signs on their own line.tstarling13:53, 19 January 2008

Comments

#Comment by Reedy (talk | contribs)   07:23, 29 July 2010

Line 1163 in Preprocessor_DOM.php, in virtualImplode()

+		if ( $root instanceof PPNode_DOM ) $root = $root->node;

$root at that point is undefined.

Per implode() above, is that line supposed to be in the loop?

#Comment by Reedy (talk | contribs)   12:56, 10 August 2010

Marking as resolved per r70812

Status & tagging log