r64669 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r64668‎ | r64669 | r64670 >
Date:20:55, 6 April 2010
Author:than4213
Status:deferred
Tags:
Comment:
Changing approach to hooking up MediaWiki with the new parser.
1 - undo previous work
2 - merge changes from trunk
3 - hook up new parser from the bottom up. starting with preSaveTransform
Modified paths:
  • /branches/parser-work/phase3/includes/AutoLoader.php (modified) (history)
  • /branches/parser-work/phase3/includes/parser/Parser.php (modified) (history)
  • /branches/parser-work/phase3/includes/parser/Preprocessor.php (added) (history)
  • /branches/parser-work/phase3/includes/parser/Preprocessor_DOM.php (added) (history)
  • /branches/parser-work/phase3/includes/parser/Preprocessor_Hash.php (added) (history)
  • /branches/parser-work/phase3/maintenance/parserTests.txt (modified) (history)

Diff [purge]

Index: branches/parser-work/phase3/maintenance/parserTests.txt
@@ -78,8 +78,9 @@
7979 * Item 1
8080 * Item 2
8181 !! result
82 -<ul><li> Item 1</li>
83 -<li> Item 2</li></ul>
 82+<ul><li> Item 1
 83+</li><li> Item 2
 84+</li></ul>
8485
8586 !! end
8687
@@ -102,21 +103,22 @@
103104 * plain l'''italic''plain
104105 * plain l''''bold''' plain
105106 !! result
106 -<ul><li> plain</li>
107 -<li> plain<i>italic</i>plain</li>
108 -<li> plain<i>italic</i>plain<i>italic</i>plain</li>
109 -<li> plain<b>bold</b>plain</li>
110 -<li> plain<b>bold</b>plain<b>bold</b>plain</li>
111 -<li> plain<i>italic</i>plain<b>bold</b>plain</li>
112 -<li> plain<b>bold</b>plain<i>italic</i>plain</li>
113 -<li> plain<i>italic<b>bold-italic</b>italic</i>plain</li>
114 -<li> plain<b>bold<i>bold-italic</i>bold</b>plain</li>
115 -<li> plain<i><b>bold-italic</b>italic</i>plain</li>
116 -<li> plain<b><i>bold-italic</i>bold</b>plain</li>
117 -<li> plain<i>italic<b>bold-italic</b></i>plain</li>
118 -<li> plain<b>bold<i>bold-italic</i></b>plain</li>
119 -<li> plain l'<i>italic</i>plain</li>
120 -<li> plain l'<b>bold</b> plain</li></ul>
 107+<ul><li> plain
 108+</li><li> plain<i>italic</i>plain
 109+</li><li> plain<i>italic</i>plain<i>italic</i>plain
 110+</li><li> plain<b>bold</b>plain
 111+</li><li> plain<b>bold</b>plain<b>bold</b>plain
 112+</li><li> plain<i>italic</i>plain<b>bold</b>plain
 113+</li><li> plain<b>bold</b>plain<i>italic</i>plain
 114+</li><li> plain<i>italic<b>bold-italic</b>italic</i>plain
 115+</li><li> plain<b>bold<i>bold-italic</i>bold</b>plain
 116+</li><li> plain<i><b>bold-italic</b>italic</i>plain
 117+</li><li> plain<b><i>bold-italic</i>bold</b>plain
 118+</li><li> plain<i>italic<b>bold-italic</b></i>plain
 119+</li><li> plain<b>bold<i>bold-italic</i></b>plain
 120+</li><li> plain l'<i>italic</i>plain
 121+</li><li> plain l'<b>bold</b> plain
 122+</li></ul>
121123
122124 !! end
123125
@@ -1793,19 +1795,29 @@
17941796 *#number level 2
17951797 *Level 1
17961798 !! result
1797 -<ul><li>Mixed list</li>
1798 -<ol><li> with numbers</li></ol>
1799 -<ul><li> and bullets</li></ul>
1800 -<ol><li> and numbers</li></ol>
1801 -<li>bullets again</li>
1802 -<ul><li>bullet level 2</li>
1803 -<ul><li>bullet level 3</li>
1804 -<ol><li>Number on level 4</li></ol></ul>
1805 -<li>bullet level 2</li>
1806 -<ol><li>Number on level 3</li>
1807 -<li>Number on level 3</li></ol></ul>
1808 -<ol><li>number level 2</li></ol>
1809 -<li>Level 1</li></ul>
 1799+<ul><li>Mixed list
 1800+<ol><li> with numbers
 1801+</li></ol>
 1802+<ul><li> and bullets
 1803+</li></ul>
 1804+<ol><li> and numbers
 1805+</li></ol>
 1806+</li><li>bullets again
 1807+<ul><li>bullet level 2
 1808+<ul><li>bullet level 3
 1809+<ol><li>Number on level 4
 1810+</li></ol>
 1811+</li></ul>
 1812+</li><li>bullet level 2
 1813+<ol><li>Number on level 3
 1814+</li><li>Number on level 3
 1815+</li></ol>
 1816+</li></ul>
 1817+<ol><li>number level 2
 1818+</li></ol>
 1819+</li><li>Level 1
 1820+</li></ul>
 1821+
18101822 !! end
18111823
18121824 !! test
@@ -7253,18 +7265,30 @@
72547266 !! end
72557267
72567268 !! test
7257 -HHP3.1: Heuristics for headings in preprocessor parenthetical structures
 7269+HHP2.1: Heuristics for headings in preprocessor parenthetical structures
72587270 !! input
72597271 {{foo|
72607272 =heading=
72617273 !! result
72627274 <p>{{foo|
72637275 </p>
7264 -<h1><span class="editsection">[<a href="https://www.mediawiki.org/index.php?title=Parser_test&amp;action=edit&amp;section=1" title="Edit section: heading">edit</a>]</span> <span class="mw-headline" id="heading">heading</span></h1>
 7276+<h1> <span class="mw-headline" id="heading">heading</span></h1>
72657277
72667278 !! end
72677279
72687280 !! test
 7281+HHP2.2: Heuristics for headings in preprocessor parenthetical structures
 7282+!! input
 7283+{{foo|
 7284+==heading==
 7285+!! result
 7286+<p>{{foo|
 7287+</p>
 7288+<h2><span class="editsection">[<a href="https://www.mediawiki.org/index.php?title=Parser_test&amp;action=edit&amp;section=1" title="Edit section: heading">edit</a>]</span> <span class="mw-headline" id="heading">heading</span></h2>
 7289+
 7290+!! end
 7291+
 7292+!! test
72697293 Tildes in comments
72707294 !! options
72717295 pst
Index: branches/parser-work/phase3/includes/parser/Preprocessor_Hash.php
@@ -0,0 +1,1619 @@
 2+<?php
 3+
 4+/**
 5+ * Differences from DOM schema:
 6+ * * attribute nodes are children
 7+ * * <h> nodes that aren't at the top are replaced with <possible-h>
 8+ * @ingroup Parser
 9+ */
 10+class Preprocessor_Hash implements Preprocessor {
 11+ var $parser;
 12+
 13+ const CACHE_VERSION = 1;
 14+
 15+ function __construct( $parser ) {
 16+ $this->parser = $parser;
 17+ }
 18+
 19+ function newFrame() {
 20+ return new PPFrame_Hash( $this );
 21+ }
 22+
 23+ function newCustomFrame( $args ) {
 24+ return new PPCustomFrame_Hash( $this, $args );
 25+ }
 26+
 27+ /**
 28+ * Preprocess some wikitext and return the document tree.
 29+ * This is the ghost of Parser::replace_variables().
 30+ *
 31+ * @param string $text The text to parse
 32+ * @param integer flags Bitwise combination of:
 33+ * Parser::PTD_FOR_INCLUSION Handle <noinclude>/<includeonly> as if the text is being
 34+ * included. Default is to assume a direct page view.
 35+ *
 36+ * The generated DOM tree must depend only on the input text and the flags.
 37+ * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of bug 4899.
 38+ *
 39+ * Any flag added to the $flags parameter here, or any other parameter liable to cause a
 40+ * change in the DOM tree for a given text, must be passed through the section identifier
 41+ * in the section edit link and thus back to extractSections().
 42+ *
 43+ * The output of this function is currently only cached in process memory, but a persistent
 44+ * cache may be implemented at a later date which takes further advantage of these strict
 45+ * dependency requirements.
 46+ *
 47+ * @private
 48+ */
 49+ function preprocessToObj( $text, $flags = 0 ) {
 50+ wfProfileIn( __METHOD__ );
 51+
 52+
 53+ // Check cache.
 54+ global $wgMemc, $wgPreprocessorCacheThreshold;
 55+
 56+ $cacheable = strlen( $text ) > $wgPreprocessorCacheThreshold;
 57+ if ( $cacheable ) {
 58+ wfProfileIn( __METHOD__.'-cacheable' );
 59+
 60+ $cacheKey = wfMemcKey( 'preprocess-hash', md5($text), $flags );
 61+ $cacheValue = $wgMemc->get( $cacheKey );
 62+ if ( $cacheValue ) {
 63+ $version = substr( $cacheValue, 0, 8 );
 64+ if ( intval( $version ) == self::CACHE_VERSION ) {
 65+ $hash = unserialize( substr( $cacheValue, 8 ) );
 66+ // From the cache
 67+ wfDebugLog( "Preprocessor",
 68+ "Loaded preprocessor hash from memcached (key $cacheKey)" );
 69+ wfProfileOut( __METHOD__.'-cacheable' );
 70+ wfProfileOut( __METHOD__ );
 71+ return $hash;
 72+ }
 73+ }
 74+ wfProfileIn( __METHOD__.'-cache-miss' );
 75+ }
 76+
 77+ $rules = array(
 78+ '{' => array(
 79+ 'end' => '}',
 80+ 'names' => array(
 81+ 2 => 'template',
 82+ 3 => 'tplarg',
 83+ ),
 84+ 'min' => 2,
 85+ 'max' => 3,
 86+ ),
 87+ '[' => array(
 88+ 'end' => ']',
 89+ 'names' => array( 2 => null ),
 90+ 'min' => 2,
 91+ 'max' => 2,
 92+ )
 93+ );
 94+
 95+ $forInclusion = $flags & Parser::PTD_FOR_INCLUSION;
 96+
 97+ $xmlishElements = $this->parser->getStripList();
 98+ $enableOnlyinclude = false;
 99+ if ( $forInclusion ) {
 100+ $ignoredTags = array( 'includeonly', '/includeonly' );
 101+ $ignoredElements = array( 'noinclude' );
 102+ $xmlishElements[] = 'noinclude';
 103+ if ( strpos( $text, '<onlyinclude>' ) !== false && strpos( $text, '</onlyinclude>' ) !== false ) {
 104+ $enableOnlyinclude = true;
 105+ }
 106+ } else {
 107+ $ignoredTags = array( 'noinclude', '/noinclude', 'onlyinclude', '/onlyinclude' );
 108+ $ignoredElements = array( 'includeonly' );
 109+ $xmlishElements[] = 'includeonly';
 110+ }
 111+ $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
 112+
 113+ // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset
 114+ $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA";
 115+
 116+ $stack = new PPDStack_Hash;
 117+
 118+ $searchBase = "[{<\n";
 119+ $revText = strrev( $text ); // For fast reverse searches
 120+
 121+ $i = 0; # Input pointer, starts out pointing to a pseudo-newline before the start
 122+ $accum =& $stack->getAccum(); # Current accumulator
 123+ $findEquals = false; # True to find equals signs in arguments
 124+ $findPipe = false; # True to take notice of pipe characters
 125+ $headingIndex = 1;
 126+ $inHeading = false; # True if $i is inside a possible heading
 127+ $noMoreGT = false; # True if there are no more greater-than (>) signs right of $i
 128+ $findOnlyinclude = $enableOnlyinclude; # True to ignore all input up to the next <onlyinclude>
 129+ $fakeLineStart = true; # Do a line-start run without outputting an LF character
 130+
 131+ while ( true ) {
 132+ //$this->memCheck();
 133+
 134+ if ( $findOnlyinclude ) {
 135+ // Ignore all input up to the next <onlyinclude>
 136+ $startPos = strpos( $text, '<onlyinclude>', $i );
 137+ if ( $startPos === false ) {
 138+ // Ignored section runs to the end
 139+ $accum->addNodeWithText( 'ignore', substr( $text, $i ) );
 140+ break;
 141+ }
 142+ $tagEndPos = $startPos + strlen( '<onlyinclude>' ); // past-the-end
 143+ $accum->addNodeWithText( 'ignore', substr( $text, $i, $tagEndPos - $i ) );
 144+ $i = $tagEndPos;
 145+ $findOnlyinclude = false;
 146+ }
 147+
 148+ if ( $fakeLineStart ) {
 149+ $found = 'line-start';
 150+ $curChar = '';
 151+ } else {
 152+ # Find next opening brace, closing brace or pipe
 153+ $search = $searchBase;
 154+ if ( $stack->top === false ) {
 155+ $currentClosing = '';
 156+ } else {
 157+ $currentClosing = $stack->top->close;
 158+ $search .= $currentClosing;
 159+ }
 160+ if ( $findPipe ) {
 161+ $search .= '|';
 162+ }
 163+ if ( $findEquals ) {
 164+ // First equals will be for the template
 165+ $search .= '=';
 166+ }
 167+ $rule = null;
 168+ # Output literal section, advance input counter
 169+ $literalLength = strcspn( $text, $search, $i );
 170+ if ( $literalLength > 0 ) {
 171+ $accum->addLiteral( substr( $text, $i, $literalLength ) );
 172+ $i += $literalLength;
 173+ }
 174+ if ( $i >= strlen( $text ) ) {
 175+ if ( $currentClosing == "\n" ) {
 176+ // Do a past-the-end run to finish off the heading
 177+ $curChar = '';
 178+ $found = 'line-end';
 179+ } else {
 180+ # All done
 181+ break;
 182+ }
 183+ } else {
 184+ $curChar = $text[$i];
 185+ if ( $curChar == '|' ) {
 186+ $found = 'pipe';
 187+ } elseif ( $curChar == '=' ) {
 188+ $found = 'equals';
 189+ } elseif ( $curChar == '<' ) {
 190+ $found = 'angle';
 191+ } elseif ( $curChar == "\n" ) {
 192+ if ( $inHeading ) {
 193+ $found = 'line-end';
 194+ } else {
 195+ $found = 'line-start';
 196+ }
 197+ } elseif ( $curChar == $currentClosing ) {
 198+ $found = 'close';
 199+ } elseif ( isset( $rules[$curChar] ) ) {
 200+ $found = 'open';
 201+ $rule = $rules[$curChar];
 202+ } else {
 203+ # Some versions of PHP have a strcspn which stops on null characters
 204+ # Ignore and continue
 205+ ++$i;
 206+ continue;
 207+ }
 208+ }
 209+ }
 210+
 211+ if ( $found == 'angle' ) {
 212+ $matches = false;
 213+ // Handle </onlyinclude>
 214+ if ( $enableOnlyinclude && substr( $text, $i, strlen( '</onlyinclude>' ) ) == '</onlyinclude>' ) {
 215+ $findOnlyinclude = true;
 216+ continue;
 217+ }
 218+
 219+ // Determine element name
 220+ if ( !preg_match( $elementsRegex, $text, $matches, 0, $i + 1 ) ) {
 221+ // Element name missing or not listed
 222+ $accum->addLiteral( '<' );
 223+ ++$i;
 224+ continue;
 225+ }
 226+ // Handle comments
 227+ if ( isset( $matches[2] ) && $matches[2] == '!--' ) {
 228+ // To avoid leaving blank lines, when a comment is both preceded
 229+ // and followed by a newline (ignoring spaces), trim leading and
 230+ // trailing spaces and one of the newlines.
 231+
 232+ // Find the end
 233+ $endPos = strpos( $text, '-->', $i + 4 );
 234+ if ( $endPos === false ) {
 235+ // Unclosed comment in input, runs to end
 236+ $inner = substr( $text, $i );
 237+ $accum->addNodeWithText( 'comment', $inner );
 238+ $i = strlen( $text );
 239+ } else {
 240+ // Search backwards for leading whitespace
 241+ $wsStart = $i ? ( $i - strspn( $revText, ' ', strlen( $text ) - $i ) ) : 0;
 242+ // Search forwards for trailing whitespace
 243+ // $wsEnd will be the position of the last space
 244+ $wsEnd = $endPos + 2 + strspn( $text, ' ', $endPos + 3 );
 245+ // Eat the line if possible
 246+ // TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at
 247+ // the overall start. That's not how Sanitizer::removeHTMLcomments() did it, but
 248+ // it's a possible beneficial b/c break.
 249+ if ( $wsStart > 0 && substr( $text, $wsStart - 1, 1 ) == "\n"
 250+ && substr( $text, $wsEnd + 1, 1 ) == "\n" )
 251+ {
 252+ $startPos = $wsStart;
 253+ $endPos = $wsEnd + 1;
 254+ // Remove leading whitespace from the end of the accumulator
 255+ // Sanity check first though
 256+ $wsLength = $i - $wsStart;
 257+ if ( $wsLength > 0
 258+ && $accum->lastNode instanceof PPNode_Hash_Text
 259+ && substr( $accum->lastNode->value, -$wsLength ) === str_repeat( ' ', $wsLength ) )
 260+ {
 261+ $accum->lastNode->value = substr( $accum->lastNode->value, 0, -$wsLength );
 262+ }
 263+ // Do a line-start run next time to look for headings after the comment
 264+ $fakeLineStart = true;
 265+ } else {
 266+ // No line to eat, just take the comment itself
 267+ $startPos = $i;
 268+ $endPos += 2;
 269+ }
 270+
 271+ if ( $stack->top ) {
 272+ $part = $stack->top->getCurrentPart();
 273+ if ( isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 ) {
 274+ // Comments abutting, no change in visual end
 275+ $part->commentEnd = $wsEnd;
 276+ } else {
 277+ $part->visualEnd = $wsStart;
 278+ $part->commentEnd = $endPos;
 279+ }
 280+ }
 281+ $i = $endPos + 1;
 282+ $inner = substr( $text, $startPos, $endPos - $startPos + 1 );
 283+ $accum->addNodeWithText( 'comment', $inner );
 284+ }
 285+ continue;
 286+ }
 287+ $name = $matches[1];
 288+ $lowerName = strtolower( $name );
 289+ $attrStart = $i + strlen( $name ) + 1;
 290+
 291+ // Find end of tag
 292+ $tagEndPos = $noMoreGT ? false : strpos( $text, '>', $attrStart );
 293+ if ( $tagEndPos === false ) {
 294+ // Infinite backtrack
 295+ // Disable tag search to prevent worst-case O(N^2) performance
 296+ $noMoreGT = true;
 297+ $accum->addLiteral( '<' );
 298+ ++$i;
 299+ continue;
 300+ }
 301+
 302+ // Handle ignored tags
 303+ if ( in_array( $lowerName, $ignoredTags ) ) {
 304+ $accum->addNodeWithText( 'ignore', substr( $text, $i, $tagEndPos - $i + 1 ) );
 305+ $i = $tagEndPos + 1;
 306+ continue;
 307+ }
 308+
 309+ $tagStartPos = $i;
 310+ if ( $text[$tagEndPos-1] == '/' ) {
 311+ // Short end tag
 312+ $attrEnd = $tagEndPos - 1;
 313+ $inner = null;
 314+ $i = $tagEndPos + 1;
 315+ $close = null;
 316+ } else {
 317+ $attrEnd = $tagEndPos;
 318+ // Find closing tag
 319+ if ( preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",
 320+ $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) )
 321+ {
 322+ $inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 );
 323+ $i = $matches[0][1] + strlen( $matches[0][0] );
 324+ $close = $matches[0][0];
 325+ } else {
 326+ // No end tag -- let it run out to the end of the text.
 327+ $inner = substr( $text, $tagEndPos + 1 );
 328+ $i = strlen( $text );
 329+ $close = null;
 330+ }
 331+ }
 332+ // <includeonly> and <noinclude> just become <ignore> tags
 333+ if ( in_array( $lowerName, $ignoredElements ) ) {
 334+ $accum->addNodeWithText( 'ignore', substr( $text, $tagStartPos, $i - $tagStartPos ) );
 335+ continue;
 336+ }
 337+
 338+ if ( $attrEnd <= $attrStart ) {
 339+ $attr = '';
 340+ } else {
 341+ // Note that the attr element contains the whitespace between name and attribute,
 342+ // this is necessary for precise reconstruction during pre-save transform.
 343+ $attr = substr( $text, $attrStart, $attrEnd - $attrStart );
 344+ }
 345+
 346+ $extNode = new PPNode_Hash_Tree( 'ext' );
 347+ $extNode->addChild( PPNode_Hash_Tree::newWithText( 'name', $name ) );
 348+ $extNode->addChild( PPNode_Hash_Tree::newWithText( 'attr', $attr ) );
 349+ if ( $inner !== null ) {
 350+ $extNode->addChild( PPNode_Hash_Tree::newWithText( 'inner', $inner ) );
 351+ }
 352+ if ( $close !== null ) {
 353+ $extNode->addChild( PPNode_Hash_Tree::newWithText( 'close', $close ) );
 354+ }
 355+ $accum->addNode( $extNode );
 356+ }
 357+
 358+ elseif ( $found == 'line-start' ) {
 359+ // Is this the start of a heading?
 360+ // Line break belongs before the heading element in any case
 361+ if ( $fakeLineStart ) {
 362+ $fakeLineStart = false;
 363+ } else {
 364+ $accum->addLiteral( $curChar );
 365+ $i++;
 366+ }
 367+
 368+ $count = strspn( $text, '=', $i, 6 );
 369+ if ( $count == 1 && $findEquals ) {
 370+ // DWIM: This looks kind of like a name/value separator
 371+ // Let's let the equals handler have it and break the potential heading
 372+ // This is heuristic, but AFAICT the methods for completely correct disambiguation are very complex.
 373+ } elseif ( $count > 0 ) {
 374+ $piece = array(
 375+ 'open' => "\n",
 376+ 'close' => "\n",
 377+ 'parts' => array( new PPDPart_Hash( str_repeat( '=', $count ) ) ),
 378+ 'startPos' => $i,
 379+ 'count' => $count );
 380+ $stack->push( $piece );
 381+ $accum =& $stack->getAccum();
 382+ extract( $stack->getFlags() );
 383+ $i += $count;
 384+ }
 385+ }
 386+
 387+ elseif ( $found == 'line-end' ) {
 388+ $piece = $stack->top;
 389+ // A heading must be open, otherwise \n wouldn't have been in the search list
 390+ assert( $piece->open == "\n" );
 391+ $part = $piece->getCurrentPart();
 392+ // Search back through the input to see if it has a proper close
 393+ // Do this using the reversed string since the other solutions (end anchor, etc.) are inefficient
 394+ $wsLength = strspn( $revText, " \t", strlen( $text ) - $i );
 395+ $searchStart = $i - $wsLength;
 396+ if ( isset( $part->commentEnd ) && $searchStart - 1 == $part->commentEnd ) {
 397+ // Comment found at line end
 398+ // Search for equals signs before the comment
 399+ $searchStart = $part->visualEnd;
 400+ $searchStart -= strspn( $revText, " \t", strlen( $text ) - $searchStart );
 401+ }
 402+ $count = $piece->count;
 403+ $equalsLength = strspn( $revText, '=', strlen( $text ) - $searchStart );
 404+ if ( $equalsLength > 0 ) {
 405+ if ( $i - $equalsLength == $piece->startPos ) {
 406+ // This is just a single string of equals signs on its own line
 407+ // Replicate the doHeadings behaviour /={count}(.+)={count}/
 408+ // First find out how many equals signs there really are (don't stop at 6)
 409+ $count = $equalsLength;
 410+ if ( $count < 3 ) {
 411+ $count = 0;
 412+ } else {
 413+ $count = min( 6, intval( ( $count - 1 ) / 2 ) );
 414+ }
 415+ } else {
 416+ $count = min( $equalsLength, $count );
 417+ }
 418+ if ( $count > 0 ) {
 419+ // Normal match, output <h>
 420+ $element = new PPNode_Hash_Tree( 'possible-h' );
 421+ $element->addChild( new PPNode_Hash_Attr( 'level', $count ) );
 422+ $element->addChild( new PPNode_Hash_Attr( 'i', $headingIndex++ ) );
 423+ $element->lastChild->nextSibling = $accum->firstNode;
 424+ $element->lastChild = $accum->lastNode;
 425+ } else {
 426+ // Single equals sign on its own line, count=0
 427+ $element = $accum;
 428+ }
 429+ } else {
 430+ // No match, no <h>, just pass down the inner text
 431+ $element = $accum;
 432+ }
 433+ // Unwind the stack
 434+ $stack->pop();
 435+ $accum =& $stack->getAccum();
 436+ extract( $stack->getFlags() );
 437+
 438+ // Append the result to the enclosing accumulator
 439+ if ( $element instanceof PPNode ) {
 440+ $accum->addNode( $element );
 441+ } else {
 442+ $accum->addAccum( $element );
 443+ }
 444+ // Note that we do NOT increment the input pointer.
 445+ // This is because the closing linebreak could be the opening linebreak of
 446+ // another heading. Infinite loops are avoided because the next iteration MUST
 447+ // hit the heading open case above, which unconditionally increments the
 448+ // input pointer.
 449+ }
 450+
 451+ elseif ( $found == 'open' ) {
 452+ # count opening brace characters
 453+ $count = strspn( $text, $curChar, $i );
 454+
 455+ # we need to add to stack only if opening brace count is enough for one of the rules
 456+ if ( $count >= $rule['min'] ) {
 457+ # Add it to the stack
 458+ $piece = array(
 459+ 'open' => $curChar,
 460+ 'close' => $rule['end'],
 461+ 'count' => $count,
 462+ 'lineStart' => ($i > 0 && $text[$i-1] == "\n"),
 463+ );
 464+
 465+ $stack->push( $piece );
 466+ $accum =& $stack->getAccum();
 467+ extract( $stack->getFlags() );
 468+ } else {
 469+ # Add literal brace(s)
 470+ $accum->addLiteral( str_repeat( $curChar, $count ) );
 471+ }
 472+ $i += $count;
 473+ }
 474+
 475+ elseif ( $found == 'close' ) {
 476+ $piece = $stack->top;
 477+ # lets check if there are enough characters for closing brace
 478+ $maxCount = $piece->count;
 479+ $count = strspn( $text, $curChar, $i, $maxCount );
 480+
 481+ # check for maximum matching characters (if there are 5 closing
 482+ # characters, we will probably need only 3 - depending on the rules)
 483+ $matchingCount = 0;
 484+ $rule = $rules[$piece->open];
 485+ if ( $count > $rule['max'] ) {
 486+ # The specified maximum exists in the callback array, unless the caller
 487+ # has made an error
 488+ $matchingCount = $rule['max'];
 489+ } else {
 490+ # Count is less than the maximum
 491+ # Skip any gaps in the callback array to find the true largest match
 492+ # Need to use array_key_exists not isset because the callback can be null
 493+ $matchingCount = $count;
 494+ while ( $matchingCount > 0 && !array_key_exists( $matchingCount, $rule['names'] ) ) {
 495+ --$matchingCount;
 496+ }
 497+ }
 498+
 499+ if ($matchingCount <= 0) {
 500+ # No matching element found in callback array
 501+ # Output a literal closing brace and continue
 502+ $accum->addLiteral( str_repeat( $curChar, $count ) );
 503+ $i += $count;
 504+ continue;
 505+ }
 506+ $name = $rule['names'][$matchingCount];
 507+ if ( $name === null ) {
 508+ // No element, just literal text
 509+ $element = $piece->breakSyntax( $matchingCount );
 510+ $element->addLiteral( str_repeat( $rule['end'], $matchingCount ) );
 511+ } else {
 512+ # Create XML element
 513+ # Note: $parts is already XML, does not need to be encoded further
 514+ $parts = $piece->parts;
 515+ $titleAccum = $parts[0]->out;
 516+ unset( $parts[0] );
 517+
 518+ $element = new PPNode_Hash_Tree( $name );
 519+
 520+ # The invocation is at the start of the line if lineStart is set in
 521+ # the stack, and all opening brackets are used up.
 522+ if ( $maxCount == $matchingCount && !empty( $piece->lineStart ) ) {
 523+ $element->addChild( new PPNode_Hash_Attr( 'lineStart', 1 ) );
 524+ }
 525+ $titleNode = new PPNode_Hash_Tree( 'title' );
 526+ $titleNode->firstChild = $titleAccum->firstNode;
 527+ $titleNode->lastChild = $titleAccum->lastNode;
 528+ $element->addChild( $titleNode );
 529+ $argIndex = 1;
 530+ foreach ( $parts as $partIndex => $part ) {
 531+ if ( isset( $part->eqpos ) ) {
 532+ // Find equals
 533+ $lastNode = false;
 534+ for ( $node = $part->out->firstNode; $node; $node = $node->nextSibling ) {
 535+ if ( $node === $part->eqpos ) {
 536+ break;
 537+ }
 538+ $lastNode = $node;
 539+ }
 540+ if ( !$node ) {
 541+ throw new MWException( __METHOD__. ': eqpos not found' );
 542+ }
 543+ if ( $node->name !== 'equals' ) {
 544+ throw new MWException( __METHOD__ .': eqpos is not equals' );
 545+ }
 546+ $equalsNode = $node;
 547+
 548+ // Construct name node
 549+ $nameNode = new PPNode_Hash_Tree( 'name' );
 550+ if ( $lastNode !== false ) {
 551+ $lastNode->nextSibling = false;
 552+ $nameNode->firstChild = $part->out->firstNode;
 553+ $nameNode->lastChild = $lastNode;
 554+ }
 555+
 556+ // Construct value node
 557+ $valueNode = new PPNode_Hash_Tree( 'value' );
 558+ if ( $equalsNode->nextSibling !== false ) {
 559+ $valueNode->firstChild = $equalsNode->nextSibling;
 560+ $valueNode->lastChild = $part->out->lastNode;
 561+ }
 562+ $partNode = new PPNode_Hash_Tree( 'part' );
 563+ $partNode->addChild( $nameNode );
 564+ $partNode->addChild( $equalsNode->firstChild );
 565+ $partNode->addChild( $valueNode );
 566+ $element->addChild( $partNode );
 567+ } else {
 568+ $partNode = new PPNode_Hash_Tree( 'part' );
 569+ $nameNode = new PPNode_Hash_Tree( 'name' );
 570+ $nameNode->addChild( new PPNode_Hash_Attr( 'index', $argIndex++ ) );
 571+ $valueNode = new PPNode_Hash_Tree( 'value' );
 572+ $valueNode->firstChild = $part->out->firstNode;
 573+ $valueNode->lastChild = $part->out->lastNode;
 574+ $partNode->addChild( $nameNode );
 575+ $partNode->addChild( $valueNode );
 576+ $element->addChild( $partNode );
 577+ }
 578+ }
 579+ }
 580+
 581+ # Advance input pointer
 582+ $i += $matchingCount;
 583+
 584+ # Unwind the stack
 585+ $stack->pop();
 586+ $accum =& $stack->getAccum();
 587+
 588+ # Re-add the old stack element if it still has unmatched opening characters remaining
 589+ if ($matchingCount < $piece->count) {
 590+ $piece->parts = array( new PPDPart_Hash );
 591+ $piece->count -= $matchingCount;
 592+ # do we still qualify for any callback with remaining count?
 593+ $names = $rules[$piece->open]['names'];
 594+ $skippedBraces = 0;
 595+ $enclosingAccum =& $accum;
 596+ while ( $piece->count ) {
 597+ if ( array_key_exists( $piece->count, $names ) ) {
 598+ $stack->push( $piece );
 599+ $accum =& $stack->getAccum();
 600+ break;
 601+ }
 602+ --$piece->count;
 603+ $skippedBraces ++;
 604+ }
 605+ $enclosingAccum->addLiteral( str_repeat( $piece->open, $skippedBraces ) );
 606+ }
 607+
 608+ extract( $stack->getFlags() );
 609+
 610+ # Add XML element to the enclosing accumulator
 611+ if ( $element instanceof PPNode ) {
 612+ $accum->addNode( $element );
 613+ } else {
 614+ $accum->addAccum( $element );
 615+ }
 616+ }
 617+
 618+ elseif ( $found == 'pipe' ) {
 619+ $findEquals = true; // shortcut for getFlags()
 620+ $stack->addPart();
 621+ $accum =& $stack->getAccum();
 622+ ++$i;
 623+ }
 624+
 625+ elseif ( $found == 'equals' ) {
 626+ $findEquals = false; // shortcut for getFlags()
 627+ $accum->addNodeWithText( 'equals', '=' );
 628+ $stack->getCurrentPart()->eqpos = $accum->lastNode;
 629+ ++$i;
 630+ }
 631+ }
 632+
 633+ # Output any remaining unclosed brackets
 634+ foreach ( $stack->stack as $piece ) {
 635+ $stack->rootAccum->addAccum( $piece->breakSyntax() );
 636+ }
 637+
 638+ # Enable top-level headings
 639+ for ( $node = $stack->rootAccum->firstNode; $node; $node = $node->nextSibling ) {
 640+ if ( isset( $node->name ) && $node->name === 'possible-h' ) {
 641+ $node->name = 'h';
 642+ }
 643+ }
 644+
 645+ $rootNode = new PPNode_Hash_Tree( 'root' );
 646+ $rootNode->firstChild = $stack->rootAccum->firstNode;
 647+ $rootNode->lastChild = $stack->rootAccum->lastNode;
 648+
 649+ // Cache
 650+ if ($cacheable) {
 651+ $cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . serialize( $rootNode );;
 652+ $wgMemc->set( $cacheKey, $cacheValue, 86400 );
 653+ wfProfileOut( __METHOD__.'-cache-miss' );
 654+ wfProfileOut( __METHOD__.'-cacheable' );
 655+ wfDebugLog( "Preprocessor", "Saved preprocessor Hash to memcached (key $cacheKey)" );
 656+ }
 657+
 658+ wfProfileOut( __METHOD__ );
 659+ return $rootNode;
 660+ }
 661+}
 662+
 663+/**
 664+ * Stack class to help Preprocessor::preprocessToObj()
 665+ * @ingroup Parser
 666+ */
 667+class PPDStack_Hash extends PPDStack {
 668+ function __construct() {
 669+ $this->elementClass = 'PPDStackElement_Hash';
 670+ parent::__construct();
 671+ $this->rootAccum = new PPDAccum_Hash;
 672+ }
 673+}
 674+
 675+/**
 676+ * @ingroup Parser
 677+ */
 678+class PPDStackElement_Hash extends PPDStackElement {
 679+ function __construct( $data = array() ) {
 680+ $this->partClass = 'PPDPart_Hash';
 681+ parent::__construct( $data );
 682+ }
 683+
 684+ /**
 685+ * Get the accumulator that would result if the close is not found.
 686+ */
 687+ function breakSyntax( $openingCount = false ) {
 688+ if ( $this->open == "\n" ) {
 689+ $accum = $this->parts[0]->out;
 690+ } else {
 691+ if ( $openingCount === false ) {
 692+ $openingCount = $this->count;
 693+ }
 694+ $accum = new PPDAccum_Hash;
 695+ $accum->addLiteral( str_repeat( $this->open, $openingCount ) );
 696+ $first = true;
 697+ foreach ( $this->parts as $part ) {
 698+ if ( $first ) {
 699+ $first = false;
 700+ } else {
 701+ $accum->addLiteral( '|' );
 702+ }
 703+ $accum->addAccum( $part->out );
 704+ }
 705+ }
 706+ return $accum;
 707+ }
 708+}
 709+
 710+/**
 711+ * @ingroup Parser
 712+ */
 713+class PPDPart_Hash extends PPDPart {
 714+ function __construct( $out = '' ) {
 715+ $accum = new PPDAccum_Hash;
 716+ if ( $out !== '' ) {
 717+ $accum->addLiteral( $out );
 718+ }
 719+ parent::__construct( $accum );
 720+ }
 721+}
 722+
 723+/**
 724+ * @ingroup Parser
 725+ */
 726+class PPDAccum_Hash {
 727+ var $firstNode, $lastNode;
 728+
 729+ function __construct() {
 730+ $this->firstNode = $this->lastNode = false;
 731+ }
 732+
 733+ /**
 734+ * Append a string literal
 735+ */
 736+ function addLiteral( $s ) {
 737+ if ( $this->lastNode === false ) {
 738+ $this->firstNode = $this->lastNode = new PPNode_Hash_Text( $s );
 739+ } elseif ( $this->lastNode instanceof PPNode_Hash_Text ) {
 740+ $this->lastNode->value .= $s;
 741+ } else {
 742+ $this->lastNode->nextSibling = new PPNode_Hash_Text( $s );
 743+ $this->lastNode = $this->lastNode->nextSibling;
 744+ }
 745+ }
 746+
 747+ /**
 748+ * Append a PPNode
 749+ */
 750+ function addNode( PPNode $node ) {
 751+ if ( $this->lastNode === false ) {
 752+ $this->firstNode = $this->lastNode = $node;
 753+ } else {
 754+ $this->lastNode->nextSibling = $node;
 755+ $this->lastNode = $node;
 756+ }
 757+ }
 758+
 759+ /**
 760+ * Append a tree node with text contents
 761+ */
 762+ function addNodeWithText( $name, $value ) {
 763+ $node = PPNode_Hash_Tree::newWithText( $name, $value );
 764+ $this->addNode( $node );
 765+ }
 766+
 767+ /**
 768+ * Append a PPAccum_Hash
 769+ * Takes over ownership of the nodes in the source argument. These nodes may
 770+ * subsequently be modified, especially nextSibling.
 771+ */
 772+ function addAccum( $accum ) {
 773+ if ( $accum->lastNode === false ) {
 774+ // nothing to add
 775+ } elseif ( $this->lastNode === false ) {
 776+ $this->firstNode = $accum->firstNode;
 777+ $this->lastNode = $accum->lastNode;
 778+ } else {
 779+ $this->lastNode->nextSibling = $accum->firstNode;
 780+ $this->lastNode = $accum->lastNode;
 781+ }
 782+ }
 783+}
 784+
 785+/**
 786+ * An expansion frame, used as a context to expand the result of preprocessToObj()
 787+ * @ingroup Parser
 788+ */
 789+class PPFrame_Hash implements PPFrame {
 790+ var $preprocessor, $parser, $title;
 791+ var $titleCache;
 792+
 793+ /**
 794+ * Hashtable listing templates which are disallowed for expansion in this frame,
 795+ * having been encountered previously in parent frames.
 796+ */
 797+ var $loopCheckHash;
 798+
 799+ /**
 800+ * Recursion depth of this frame, top = 0
 801+ * Note that this is NOT the same as expansion depth in expand()
 802+ */
 803+ var $depth;
 804+
 805+
 806+ /**
 807+ * Construct a new preprocessor frame.
 808+ * @param Preprocessor $preprocessor The parent preprocessor
 809+ */
 810+ function __construct( $preprocessor ) {
 811+ $this->preprocessor = $preprocessor;
 812+ $this->parser = $preprocessor->parser;
 813+ $this->title = $this->parser->mTitle;
 814+ $this->titleCache = array( $this->title ? $this->title->getPrefixedDBkey() : false );
 815+ $this->loopCheckHash = array();
 816+ $this->depth = 0;
 817+ }
 818+
 819+ /**
 820+ * Create a new child frame
 821+ * $args is optionally a multi-root PPNode or array containing the template arguments
 822+ */
 823+ function newChild( $args = false, $title = false ) {
 824+ $namedArgs = array();
 825+ $numberedArgs = array();
 826+ if ( $title === false ) {
 827+ $title = $this->title;
 828+ }
 829+ if ( $args !== false ) {
 830+ $xpath = false;
 831+ if ( $args instanceof PPNode_Hash_Array ) {
 832+ $args = $args->value;
 833+ } elseif ( !is_array( $args ) ) {
 834+ throw new MWException( __METHOD__ . ': $args must be array or PPNode_Hash_Array' );
 835+ }
 836+ foreach ( $args as $arg ) {
 837+ $bits = $arg->splitArg();
 838+ if ( $bits['index'] !== '' ) {
 839+ // Numbered parameter
 840+ $numberedArgs[$bits['index']] = $bits['value'];
 841+ unset( $namedArgs[$bits['index']] );
 842+ } else {
 843+ // Named parameter
 844+ $name = trim( $this->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
 845+ $namedArgs[$name] = $bits['value'];
 846+ unset( $numberedArgs[$name] );
 847+ }
 848+ }
 849+ }
 850+ return new PPTemplateFrame_Hash( $this->preprocessor, $this, $numberedArgs, $namedArgs, $title );
 851+ }
 852+
 853+ function expand( $root, $flags = 0 ) {
 854+ static $expansionDepth = 0;
 855+ if ( is_string( $root ) ) {
 856+ return $root;
 857+ }
 858+
 859+ if ( ++$this->parser->mPPNodeCount > $this->parser->mOptions->mMaxPPNodeCount )
 860+ {
 861+ return '<span class="error">Node-count limit exceeded</span>';
 862+ }
 863+ if ( $expansionDepth > $this->parser->mOptions->mMaxPPExpandDepth ) {
 864+ return '<span class="error">Expansion depth limit exceeded</span>';
 865+ }
 866+ ++$expansionDepth;
 867+
 868+ $outStack = array( '', '' );
 869+ $iteratorStack = array( false, $root );
 870+ $indexStack = array( 0, 0 );
 871+
 872+ while ( count( $iteratorStack ) > 1 ) {
 873+ $level = count( $outStack ) - 1;
 874+ $iteratorNode =& $iteratorStack[ $level ];
 875+ $out =& $outStack[$level];
 876+ $index =& $indexStack[$level];
 877+
 878+ if ( is_array( $iteratorNode ) ) {
 879+ if ( $index >= count( $iteratorNode ) ) {
 880+ // All done with this iterator
 881+ $iteratorStack[$level] = false;
 882+ $contextNode = false;
 883+ } else {
 884+ $contextNode = $iteratorNode[$index];
 885+ $index++;
 886+ }
 887+ } elseif ( $iteratorNode instanceof PPNode_Hash_Array ) {
 888+ if ( $index >= $iteratorNode->getLength() ) {
 889+ // All done with this iterator
 890+ $iteratorStack[$level] = false;
 891+ $contextNode = false;
 892+ } else {
 893+ $contextNode = $iteratorNode->item( $index );
 894+ $index++;
 895+ }
 896+ } else {
 897+ // Copy to $contextNode and then delete from iterator stack,
 898+ // because this is not an iterator but we do have to execute it once
 899+ $contextNode = $iteratorStack[$level];
 900+ $iteratorStack[$level] = false;
 901+ }
 902+
 903+ $newIterator = false;
 904+
 905+ if ( $contextNode === false ) {
 906+ // nothing to do
 907+ } elseif ( is_string( $contextNode ) ) {
 908+ $out .= $contextNode;
 909+ } elseif ( is_array( $contextNode ) || $contextNode instanceof PPNode_Hash_Array ) {
 910+ $newIterator = $contextNode;
 911+ } elseif ( $contextNode instanceof PPNode_Hash_Attr ) {
 912+ // No output
 913+ } elseif ( $contextNode instanceof PPNode_Hash_Text ) {
 914+ $out .= $contextNode->value;
 915+ } elseif ( $contextNode instanceof PPNode_Hash_Tree ) {
 916+ if ( $contextNode->name == 'template' ) {
 917+ # Double-brace expansion
 918+ $bits = $contextNode->splitTemplate();
 919+ if ( $flags & self::NO_TEMPLATES ) {
 920+ $newIterator = $this->virtualBracketedImplode( '{{', '|', '}}', $bits['title'], $bits['parts'] );
 921+ } else {
 922+ $ret = $this->parser->braceSubstitution( $bits, $this );
 923+ if ( isset( $ret['object'] ) ) {
 924+ $newIterator = $ret['object'];
 925+ } else {
 926+ $out .= $ret['text'];
 927+ }
 928+ }
 929+ } elseif ( $contextNode->name == 'tplarg' ) {
 930+ # Triple-brace expansion
 931+ $bits = $contextNode->splitTemplate();
 932+ if ( $flags & self::NO_ARGS ) {
 933+ $newIterator = $this->virtualBracketedImplode( '{{{', '|', '}}}', $bits['title'], $bits['parts'] );
 934+ } else {
 935+ $ret = $this->parser->argSubstitution( $bits, $this );
 936+ if ( isset( $ret['object'] ) ) {
 937+ $newIterator = $ret['object'];
 938+ } else {
 939+ $out .= $ret['text'];
 940+ }
 941+ }
 942+ } elseif ( $contextNode->name == 'comment' ) {
 943+ # HTML-style comment
 944+ # Remove it in HTML, pre+remove and STRIP_COMMENTS modes
 945+ if ( $this->parser->ot['html']
 946+ || ( $this->parser->ot['pre'] && $this->parser->mOptions->getRemoveComments() )
 947+ || ( $flags & self::STRIP_COMMENTS ) )
 948+ {
 949+ $out .= '';
 950+ }
 951+ # Add a strip marker in PST mode so that pstPass2() can run some old-fashioned regexes on the result
 952+ # Not in RECOVER_COMMENTS mode (extractSections) though
 953+ elseif ( $this->parser->ot['wiki'] && ! ( $flags & self::RECOVER_COMMENTS ) ) {
 954+ $out .= $this->parser->insertStripItem( $contextNode->firstChild->value );
 955+ }
 956+ # Recover the literal comment in RECOVER_COMMENTS and pre+no-remove
 957+ else {
 958+ $out .= $contextNode->firstChild->value;
 959+ }
 960+ } elseif ( $contextNode->name == 'ignore' ) {
 961+ # Output suppression used by <includeonly> etc.
 962+ # OT_WIKI will only respect <ignore> in substed templates.
 963+ # The other output types respect it unless NO_IGNORE is set.
 964+ # extractSections() sets NO_IGNORE and so never respects it.
 965+ if ( ( !isset( $this->parent ) && $this->parser->ot['wiki'] ) || ( $flags & self::NO_IGNORE ) ) {
 966+ $out .= $contextNode->firstChild->value;
 967+ } else {
 968+ //$out .= '';
 969+ }
 970+ } elseif ( $contextNode->name == 'ext' ) {
 971+ # Extension tag
 972+ $bits = $contextNode->splitExt() + array( 'attr' => null, 'inner' => null, 'close' => null );
 973+ $out .= $this->parser->extensionSubstitution( $bits, $this );
 974+ } elseif ( $contextNode->name == 'h' ) {
 975+ # Heading
 976+ if ( $this->parser->ot['html'] ) {
 977+ # Expand immediately and insert heading index marker
 978+ $s = '';
 979+ for ( $node = $contextNode->firstChild; $node; $node = $node->nextSibling ) {
 980+ $s .= $this->expand( $node, $flags );
 981+ }
 982+
 983+ $bits = $contextNode->splitHeading();
 984+ $titleText = $this->title->getPrefixedDBkey();
 985+ $this->parser->mHeadings[] = array( $titleText, $bits['i'] );
 986+ $serial = count( $this->parser->mHeadings ) - 1;
 987+ $marker = "{$this->parser->mUniqPrefix}-h-$serial-" . Parser::MARKER_SUFFIX;
 988+ $s = substr( $s, 0, $bits['level'] ) . $marker . substr( $s, $bits['level'] );
 989+ $this->parser->mStripState->general->setPair( $marker, '' );
 990+ $out .= $s;
 991+ } else {
 992+ # Expand in virtual stack
 993+ $newIterator = $contextNode->getChildren();
 994+ }
 995+ } else {
 996+ # Generic recursive expansion
 997+ $newIterator = $contextNode->getChildren();
 998+ }
 999+ } else {
 1000+ throw new MWException( __METHOD__.': Invalid parameter type' );
 1001+ }
 1002+
 1003+ if ( $newIterator !== false ) {
 1004+ $outStack[] = '';
 1005+ $iteratorStack[] = $newIterator;
 1006+ $indexStack[] = 0;
 1007+ } elseif ( $iteratorStack[$level] === false ) {
 1008+ // Return accumulated value to parent
 1009+ // With tail recursion
 1010+ while ( $iteratorStack[$level] === false && $level > 0 ) {
 1011+ $outStack[$level - 1] .= $out;
 1012+ array_pop( $outStack );
 1013+ array_pop( $iteratorStack );
 1014+ array_pop( $indexStack );
 1015+ $level--;
 1016+ }
 1017+ }
 1018+ }
 1019+ --$expansionDepth;
 1020+ return $outStack[0];
 1021+ }
 1022+
 1023+ function implodeWithFlags( $sep, $flags /*, ... */ ) {
 1024+ $args = array_slice( func_get_args(), 2 );
 1025+
 1026+ $first = true;
 1027+ $s = '';
 1028+ foreach ( $args as $root ) {
 1029+ if ( $root instanceof PPNode_Hash_Array ) {
 1030+ $root = $root->value;
 1031+ }
 1032+ if ( !is_array( $root ) ) {
 1033+ $root = array( $root );
 1034+ }
 1035+ foreach ( $root as $node ) {
 1036+ if ( $first ) {
 1037+ $first = false;
 1038+ } else {
 1039+ $s .= $sep;
 1040+ }
 1041+ $s .= $this->expand( $node, $flags );
 1042+ }
 1043+ }
 1044+ return $s;
 1045+ }
 1046+
 1047+ /**
 1048+ * Implode with no flags specified
 1049+ * This previously called implodeWithFlags but has now been inlined to reduce stack depth
 1050+ */
 1051+ function implode( $sep /*, ... */ ) {
 1052+ $args = array_slice( func_get_args(), 1 );
 1053+
 1054+ $first = true;
 1055+ $s = '';
 1056+ foreach ( $args as $root ) {
 1057+ if ( $root instanceof PPNode_Hash_Array ) {
 1058+ $root = $root->value;
 1059+ }
 1060+ if ( !is_array( $root ) ) {
 1061+ $root = array( $root );
 1062+ }
 1063+ foreach ( $root as $node ) {
 1064+ if ( $first ) {
 1065+ $first = false;
 1066+ } else {
 1067+ $s .= $sep;
 1068+ }
 1069+ $s .= $this->expand( $node );
 1070+ }
 1071+ }
 1072+ return $s;
 1073+ }
 1074+
 1075+ /**
 1076+ * Makes an object that, when expand()ed, will be the same as one obtained
 1077+ * with implode()
 1078+ */
 1079+ function virtualImplode( $sep /*, ... */ ) {
 1080+ $args = array_slice( func_get_args(), 1 );
 1081+ $out = array();
 1082+ $first = true;
 1083+
 1084+ foreach ( $args as $root ) {
 1085+ if ( $root instanceof PPNode_Hash_Array ) {
 1086+ $root = $root->value;
 1087+ }
 1088+ if ( !is_array( $root ) ) {
 1089+ $root = array( $root );
 1090+ }
 1091+ foreach ( $root as $node ) {
 1092+ if ( $first ) {
 1093+ $first = false;
 1094+ } else {
 1095+ $out[] = $sep;
 1096+ }
 1097+ $out[] = $node;
 1098+ }
 1099+ }
 1100+ return new PPNode_Hash_Array( $out );
 1101+ }
 1102+
 1103+ /**
 1104+ * Virtual implode with brackets
 1105+ */
 1106+ function virtualBracketedImplode( $start, $sep, $end /*, ... */ ) {
 1107+ $args = array_slice( func_get_args(), 3 );
 1108+ $out = array( $start );
 1109+ $first = true;
 1110+
 1111+ foreach ( $args as $root ) {
 1112+ if ( $root instanceof PPNode_Hash_Array ) {
 1113+ $root = $root->value;
 1114+ }
 1115+ if ( !is_array( $root ) ) {
 1116+ $root = array( $root );
 1117+ }
 1118+ foreach ( $root as $node ) {
 1119+ if ( $first ) {
 1120+ $first = false;
 1121+ } else {
 1122+ $out[] = $sep;
 1123+ }
 1124+ $out[] = $node;
 1125+ }
 1126+ }
 1127+ $out[] = $end;
 1128+ return new PPNode_Hash_Array( $out );
 1129+ }
 1130+
 1131+ function __toString() {
 1132+ return 'frame{}';
 1133+ }
 1134+
 1135+ function getPDBK( $level = false ) {
 1136+ if ( $level === false ) {
 1137+ return $this->title->getPrefixedDBkey();
 1138+ } else {
 1139+ return isset( $this->titleCache[$level] ) ? $this->titleCache[$level] : false;
 1140+ }
 1141+ }
 1142+
 1143+ function getArguments() {
 1144+ return array();
 1145+ }
 1146+
 1147+ function getNumberedArguments() {
 1148+ return array();
 1149+ }
 1150+
 1151+ function getNamedArguments() {
 1152+ return array();
 1153+ }
 1154+
 1155+ /**
 1156+ * Returns true if there are no arguments in this frame
 1157+ */
 1158+ function isEmpty() {
 1159+ return true;
 1160+ }
 1161+
 1162+ function getArgument( $name ) {
 1163+ return false;
 1164+ }
 1165+
 1166+ /**
 1167+ * Returns true if the infinite loop check is OK, false if a loop is detected
 1168+ */
 1169+ function loopCheck( $title ) {
 1170+ return !isset( $this->loopCheckHash[$title->getPrefixedDBkey()] );
 1171+ }
 1172+
 1173+ /**
 1174+ * Return true if the frame is a template frame
 1175+ */
 1176+ function isTemplate() {
 1177+ return false;
 1178+ }
 1179+}
 1180+
 1181+/**
 1182+ * Expansion frame with template arguments
 1183+ * @ingroup Parser
 1184+ */
 1185+class PPTemplateFrame_Hash extends PPFrame_Hash {
 1186+ var $numberedArgs, $namedArgs, $parent;
 1187+ var $numberedExpansionCache, $namedExpansionCache;
 1188+
 1189+ function __construct( $preprocessor, $parent = false, $numberedArgs = array(), $namedArgs = array(), $title = false ) {
 1190+ PPFrame_Hash::__construct( $preprocessor );
 1191+ $this->parent = $parent;
 1192+ $this->numberedArgs = $numberedArgs;
 1193+ $this->namedArgs = $namedArgs;
 1194+ $this->title = $title;
 1195+ $pdbk = $title ? $title->getPrefixedDBkey() : false;
 1196+ $this->titleCache = $parent->titleCache;
 1197+ $this->titleCache[] = $pdbk;
 1198+ $this->loopCheckHash = /*clone*/ $parent->loopCheckHash;
 1199+ if ( $pdbk !== false ) {
 1200+ $this->loopCheckHash[$pdbk] = true;
 1201+ }
 1202+ $this->depth = $parent->depth + 1;
 1203+ $this->numberedExpansionCache = $this->namedExpansionCache = array();
 1204+ }
 1205+
 1206+ function __toString() {
 1207+ $s = 'tplframe{';
 1208+ $first = true;
 1209+ $args = $this->numberedArgs + $this->namedArgs;
 1210+ foreach ( $args as $name => $value ) {
 1211+ if ( $first ) {
 1212+ $first = false;
 1213+ } else {
 1214+ $s .= ', ';
 1215+ }
 1216+ $s .= "\"$name\":\"" .
 1217+ str_replace( '"', '\\"', $value->__toString() ) . '"';
 1218+ }
 1219+ $s .= '}';
 1220+ return $s;
 1221+ }
 1222+ /**
 1223+ * Returns true if there are no arguments in this frame
 1224+ */
 1225+ function isEmpty() {
 1226+ return !count( $this->numberedArgs ) && !count( $this->namedArgs );
 1227+ }
 1228+
 1229+ function getArguments() {
 1230+ $arguments = array();
 1231+ foreach ( array_merge(
 1232+ array_keys($this->numberedArgs),
 1233+ array_keys($this->namedArgs)) as $key ) {
 1234+ $arguments[$key] = $this->getArgument($key);
 1235+ }
 1236+ return $arguments;
 1237+ }
 1238+
 1239+ function getNumberedArguments() {
 1240+ $arguments = array();
 1241+ foreach ( array_keys($this->numberedArgs) as $key ) {
 1242+ $arguments[$key] = $this->getArgument($key);
 1243+ }
 1244+ return $arguments;
 1245+ }
 1246+
 1247+ function getNamedArguments() {
 1248+ $arguments = array();
 1249+ foreach ( array_keys($this->namedArgs) as $key ) {
 1250+ $arguments[$key] = $this->getArgument($key);
 1251+ }
 1252+ return $arguments;
 1253+ }
 1254+
 1255+ function getNumberedArgument( $index ) {
 1256+ if ( !isset( $this->numberedArgs[$index] ) ) {
 1257+ return false;
 1258+ }
 1259+ if ( !isset( $this->numberedExpansionCache[$index] ) ) {
 1260+ # No trimming for unnamed arguments
 1261+ $this->numberedExpansionCache[$index] = $this->parent->expand( $this->numberedArgs[$index], self::STRIP_COMMENTS );
 1262+ }
 1263+ return $this->numberedExpansionCache[$index];
 1264+ }
 1265+
 1266+ function getNamedArgument( $name ) {
 1267+ if ( !isset( $this->namedArgs[$name] ) ) {
 1268+ return false;
 1269+ }
 1270+ if ( !isset( $this->namedExpansionCache[$name] ) ) {
 1271+ # Trim named arguments post-expand, for backwards compatibility
 1272+ $this->namedExpansionCache[$name] = trim(
 1273+ $this->parent->expand( $this->namedArgs[$name], self::STRIP_COMMENTS ) );
 1274+ }
 1275+ return $this->namedExpansionCache[$name];
 1276+ }
 1277+
 1278+ function getArgument( $name ) {
 1279+ $text = $this->getNumberedArgument( $name );
 1280+ if ( $text === false ) {
 1281+ $text = $this->getNamedArgument( $name );
 1282+ }
 1283+ return $text;
 1284+ }
 1285+
 1286+ /**
 1287+ * Return true if the frame is a template frame
 1288+ */
 1289+ function isTemplate() {
 1290+ return true;
 1291+ }
 1292+}
 1293+
 1294+/**
 1295+ * Expansion frame with custom arguments
 1296+ * @ingroup Parser
 1297+ */
 1298+class PPCustomFrame_Hash extends PPFrame_Hash {
 1299+ var $args;
 1300+
 1301+ function __construct( $preprocessor, $args ) {
 1302+ PPFrame_Hash::__construct( $preprocessor );
 1303+ $this->args = $args;
 1304+ }
 1305+
 1306+ function __toString() {
 1307+ $s = 'cstmframe{';
 1308+ $first = true;
 1309+ foreach ( $this->args as $name => $value ) {
 1310+ if ( $first ) {
 1311+ $first = false;
 1312+ } else {
 1313+ $s .= ', ';
 1314+ }
 1315+ $s .= "\"$name\":\"" .
 1316+ str_replace( '"', '\\"', $value->__toString() ) . '"';
 1317+ }
 1318+ $s .= '}';
 1319+ return $s;
 1320+ }
 1321+
 1322+ function isEmpty() {
 1323+ return !count( $this->args );
 1324+ }
 1325+
 1326+ function getArgument( $index ) {
 1327+ if ( !isset( $this->args[$index] ) ) {
 1328+ return false;
 1329+ }
 1330+ return $this->args[$index];
 1331+ }
 1332+}
 1333+
 1334+/**
 1335+ * @ingroup Parser
 1336+ */
 1337+class PPNode_Hash_Tree implements PPNode {
 1338+ var $name, $firstChild, $lastChild, $nextSibling;
 1339+
 1340+ function __construct( $name ) {
 1341+ $this->name = $name;
 1342+ $this->firstChild = $this->lastChild = $this->nextSibling = false;
 1343+ }
 1344+
 1345+ function __toString() {
 1346+ $inner = '';
 1347+ $attribs = '';
 1348+ for ( $node = $this->firstChild; $node; $node = $node->nextSibling ) {
 1349+ if ( $node instanceof PPNode_Hash_Attr ) {
 1350+ $attribs .= ' ' . $node->name . '="' . htmlspecialchars( $node->value ) . '"';
 1351+ } else {
 1352+ $inner .= $node->__toString();
 1353+ }
 1354+ }
 1355+ if ( $inner === '' ) {
 1356+ return "<{$this->name}$attribs/>";
 1357+ } else {
 1358+ return "<{$this->name}$attribs>$inner</{$this->name}>";
 1359+ }
 1360+ }
 1361+
 1362+ static function newWithText( $name, $text ) {
 1363+ $obj = new self( $name );
 1364+ $obj->addChild( new PPNode_Hash_Text( $text ) );
 1365+ return $obj;
 1366+ }
 1367+
 1368+ function addChild( $node ) {
 1369+ if ( $this->lastChild === false ) {
 1370+ $this->firstChild = $this->lastChild = $node;
 1371+ } else {
 1372+ $this->lastChild->nextSibling = $node;
 1373+ $this->lastChild = $node;
 1374+ }
 1375+ }
 1376+
 1377+ function getChildren() {
 1378+ $children = array();
 1379+ for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) {
 1380+ $children[] = $child;
 1381+ }
 1382+ return new PPNode_Hash_Array( $children );
 1383+ }
 1384+
 1385+ function getFirstChild() {
 1386+ return $this->firstChild;
 1387+ }
 1388+
 1389+ function getNextSibling() {
 1390+ return $this->nextSibling;
 1391+ }
 1392+
 1393+ function getChildrenOfType( $name ) {
 1394+ $children = array();
 1395+ for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) {
 1396+ if ( isset( $child->name ) && $child->name === $name ) {
 1397+ $children[] = $name;
 1398+ }
 1399+ }
 1400+ return $children;
 1401+ }
 1402+
 1403+ function getLength() { return false; }
 1404+ function item( $i ) { return false; }
 1405+
 1406+ function getName() {
 1407+ return $this->name;
 1408+ }
 1409+
 1410+ /**
 1411+ * Split a <part> node into an associative array containing:
 1412+ * name PPNode name
 1413+ * index String index
 1414+ * value PPNode value
 1415+ */
 1416+ function splitArg() {
 1417+ $bits = array();
 1418+ for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) {
 1419+ if ( !isset( $child->name ) ) {
 1420+ continue;
 1421+ }
 1422+ if ( $child->name === 'name' ) {
 1423+ $bits['name'] = $child;
 1424+ if ( $child->firstChild instanceof PPNode_Hash_Attr
 1425+ && $child->firstChild->name === 'index' )
 1426+ {
 1427+ $bits['index'] = $child->firstChild->value;
 1428+ }
 1429+ } elseif ( $child->name === 'value' ) {
 1430+ $bits['value'] = $child;
 1431+ }
 1432+ }
 1433+
 1434+ if ( !isset( $bits['name'] ) ) {
 1435+ throw new MWException( 'Invalid brace node passed to ' . __METHOD__ );
 1436+ }
 1437+ if ( !isset( $bits['index'] ) ) {
 1438+ $bits['index'] = '';
 1439+ }
 1440+ return $bits;
 1441+ }
 1442+
 1443+ /**
 1444+ * Split an <ext> node into an associative array containing name, attr, inner and close
 1445+ * All values in the resulting array are PPNodes. Inner and close are optional.
 1446+ */
 1447+ function splitExt() {
 1448+ $bits = array();
 1449+ for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) {
 1450+ if ( !isset( $child->name ) ) {
 1451+ continue;
 1452+ }
 1453+ if ( $child->name == 'name' ) {
 1454+ $bits['name'] = $child;
 1455+ } elseif ( $child->name == 'attr' ) {
 1456+ $bits['attr'] = $child;
 1457+ } elseif ( $child->name == 'inner' ) {
 1458+ $bits['inner'] = $child;
 1459+ } elseif ( $child->name == 'close' ) {
 1460+ $bits['close'] = $child;
 1461+ }
 1462+ }
 1463+ if ( !isset( $bits['name'] ) ) {
 1464+ throw new MWException( 'Invalid ext node passed to ' . __METHOD__ );
 1465+ }
 1466+ return $bits;
 1467+ }
 1468+
 1469+ /**
 1470+ * Split an <h> node
 1471+ */
 1472+ function splitHeading() {
 1473+ if ( $this->name !== 'h' ) {
 1474+ throw new MWException( 'Invalid h node passed to ' . __METHOD__ );
 1475+ }
 1476+ $bits = array();
 1477+ for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) {
 1478+ if ( !isset( $child->name ) ) {
 1479+ continue;
 1480+ }
 1481+ if ( $child->name == 'i' ) {
 1482+ $bits['i'] = $child->value;
 1483+ } elseif ( $child->name == 'level' ) {
 1484+ $bits['level'] = $child->value;
 1485+ }
 1486+ }
 1487+ if ( !isset( $bits['i'] ) ) {
 1488+ throw new MWException( 'Invalid h node passed to ' . __METHOD__ );
 1489+ }
 1490+ return $bits;
 1491+ }
 1492+
 1493+ /**
 1494+ * Split a <template> or <tplarg> node
 1495+ */
 1496+ function splitTemplate() {
 1497+ $parts = array();
 1498+ $bits = array( 'lineStart' => '' );
 1499+ for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) {
 1500+ if ( !isset( $child->name ) ) {
 1501+ continue;
 1502+ }
 1503+ if ( $child->name == 'title' ) {
 1504+ $bits['title'] = $child;
 1505+ }
 1506+ if ( $child->name == 'part' ) {
 1507+ $parts[] = $child;
 1508+ }
 1509+ if ( $child->name == 'lineStart' ) {
 1510+ $bits['lineStart'] = '1';
 1511+ }
 1512+ }
 1513+ if ( !isset( $bits['title'] ) ) {
 1514+ throw new MWException( 'Invalid node passed to ' . __METHOD__ );
 1515+ }
 1516+ $bits['parts'] = new PPNode_Hash_Array( $parts );
 1517+ return $bits;
 1518+ }
 1519+}
 1520+
 1521+/**
 1522+ * @ingroup Parser
 1523+ */
 1524+class PPNode_Hash_Text implements PPNode {
 1525+ var $value, $nextSibling;
 1526+
 1527+ function __construct( $value ) {
 1528+ if ( is_object( $value ) ) {
 1529+ throw new MWException( __CLASS__ . ' given object instead of string' );
 1530+ }
 1531+ $this->value = $value;
 1532+ }
 1533+
 1534+ function __toString() {
 1535+ return htmlspecialchars( $this->value );
 1536+ }
 1537+
 1538+ function getNextSibling() {
 1539+ return $this->nextSibling;
 1540+ }
 1541+
 1542+ function getChildren() { return false; }
 1543+ function getFirstChild() { return false; }
 1544+ function getChildrenOfType( $name ) { return false; }
 1545+ function getLength() { return false; }
 1546+ function item( $i ) { return false; }
 1547+ function getName() { return '#text'; }
 1548+ function splitArg() { throw new MWException( __METHOD__ . ': not supported' ); }
 1549+ function splitExt() { throw new MWException( __METHOD__ . ': not supported' ); }
 1550+ function splitHeading() { throw new MWException( __METHOD__ . ': not supported' ); }
 1551+}
 1552+
 1553+/**
 1554+ * @ingroup Parser
 1555+ */
 1556+class PPNode_Hash_Array implements PPNode {
 1557+ var $value, $nextSibling;
 1558+
 1559+ function __construct( $value ) {
 1560+ $this->value = $value;
 1561+ }
 1562+
 1563+ function __toString() {
 1564+ return var_export( $this, true );
 1565+ }
 1566+
 1567+ function getLength() {
 1568+ return count( $this->value );
 1569+ }
 1570+
 1571+ function item( $i ) {
 1572+ return $this->value[$i];
 1573+ }
 1574+
 1575+ function getName() { return '#nodelist'; }
 1576+
 1577+ function getNextSibling() {
 1578+ return $this->nextSibling;
 1579+ }
 1580+
 1581+ function getChildren() { return false; }
 1582+ function getFirstChild() { return false; }
 1583+ function getChildrenOfType( $name ) { return false; }
 1584+ function splitArg() { throw new MWException( __METHOD__ . ': not supported' ); }
 1585+ function splitExt() { throw new MWException( __METHOD__ . ': not supported' ); }
 1586+ function splitHeading() { throw new MWException( __METHOD__ . ': not supported' ); }
 1587+}
 1588+
 1589+/**
 1590+ * @ingroup Parser
 1591+ */
 1592+class PPNode_Hash_Attr implements PPNode {
 1593+ var $name, $value, $nextSibling;
 1594+
 1595+ function __construct( $name, $value ) {
 1596+ $this->name = $name;
 1597+ $this->value = $value;
 1598+ }
 1599+
 1600+ function __toString() {
 1601+ return "<@{$this->name}>" . htmlspecialchars( $this->value ) . "</@{$this->name}>";
 1602+ }
 1603+
 1604+ function getName() {
 1605+ return $this->name;
 1606+ }
 1607+
 1608+ function getNextSibling() {
 1609+ return $this->nextSibling;
 1610+ }
 1611+
 1612+ function getChildren() { return false; }
 1613+ function getFirstChild() { return false; }
 1614+ function getChildrenOfType( $name ) { return false; }
 1615+ function getLength() { return false; }
 1616+ function item( $i ) { return false; }
 1617+ function splitArg() { throw new MWException( __METHOD__ . ': not supported' ); }
 1618+ function splitExt() { throw new MWException( __METHOD__ . ': not supported' ); }
 1619+ function splitHeading() { throw new MWException( __METHOD__ . ': not supported' ); }
 1620+}
Property changes on: branches/parser-work/phase3/includes/parser/Preprocessor_Hash.php
___________________________________________________________________
Name: svn:eol-style
11621 + native
Index: branches/parser-work/phase3/includes/parser/Parser.php
@@ -75,14 +75,8 @@
7676 const COLON_STATE_COMMENTDASH = 6;
7777 const COLON_STATE_COMMENTDASHDASH = 7;
7878
79 - // State flags for DOM expansion
80 - const NO_ARGS = 1;
81 - const NO_TEMPLATES = 2;
82 - const STRIP_COMMENTS = 4;
83 - const NO_IGNORE = 8;
84 - const RECOVER_COMMENTS = 16;
85 - const PTD_FOR_INCLUSION = 32;
86 - const RECOVER_ORIG = 27; // = 1|2|8|16 no constant expression support in PHP yet
 79+ // Flags for preprocessToDom
 80+ const PTD_FOR_INCLUSION = 1;
8781
8882 // Allowed values for $this->mOutputType
8983 // Parameter to startExternalParse().
@@ -101,7 +95,7 @@
10296 # Persistent:
10397 var $mTagHooks, $mTransparentTagHooks, $mFunctionHooks, $mFunctionSynonyms, $mVariables,
10498 $mSubsts, $mImageParams, $mImageParamsMagicArray, $mStripList, $mMarkerIndex,
105 - $mParseEngine, $mExtLinkBracketedRegex, $mUrlProtocols, $mDefaultStripList,
 99+ $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols, $mDefaultStripList,
106100 $mVarCache, $mConf, $mFunctionTagHooks;
107101
108102
@@ -143,10 +137,19 @@
144138 $this->mExtLinkBracketedRegex = '/\[(\b(' . wfUrlProtocols() . ')'.
145139 '[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x0a\\x0d]*?)\]/S';
146140 $this->mVarCache = array();
 141+ if ( isset( $conf['preprocessorClass'] ) ) {
 142+ $this->mPreprocessorClass = $conf['preprocessorClass'];
 143+ } elseif ( extension_loaded( 'domxml' ) ) {
 144+ // PECL extension that conflicts with the core DOM extension (bug 13770)
 145+ wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" );
 146+ $this->mPreprocessorClass = 'Preprocessor_Hash';
 147+ } elseif ( extension_loaded( 'dom' ) ) {
 148+ $this->mPreprocessorClass = 'Preprocessor_DOM';
 149+ } else {
 150+ $this->mPreprocessorClass = 'Preprocessor_Hash';
 151+ }
147152 $this->mMarkerIndex = 0;
148153 $this->mFirstCall = true;
149 -
150 - $this->mParseEngine = new ParseEngine("includes/parser/WikiTextGrammar.xml");
151154 }
152155
153156 /**
@@ -233,6 +236,11 @@
234237 $this->mDoubleUnderscores = array();
235238 $this->mExpensiveFunctionCount = 0;
236239
 240+ # Fix cloning
 241+ if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
 242+ $this->mPreprocessor = null;
 243+ }
 244+
237245 wfRunHooks( 'ParserClearState', array( &$this ) );
238246 wfProfileOut( __METHOD__ );
239247 }
@@ -455,12 +463,13 @@
456464 * If $frame is not provided, then template variables (e.g., {{{1}}}) within $text are not expanded
457465 *
458466 * @param $text String: text extension wants to have parsed
 467+ * @param PPFrame $frame: The frame to use for expanding any template variables
459468 */
460 - function recursiveTagParse( $text ) {
 469+ function recursiveTagParse( $text, $frame=false ) {
461470 wfProfileIn( __METHOD__ );
462471 wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
463472 wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
464 - $text = $this->internalParse( $text, false );
 473+ $text = $this->internalParse( $text, false, $frame );
465474 wfProfileOut( __METHOD__ );
466475 return $text;
467476 }
@@ -502,8 +511,8 @@
503512 $this->setTitle( new FakeTitle );
504513
505514 list( $text, $title ) = $this->getTemplateDom( $title );
506 - $flags = self::NO_ARGS | self::NO_TEMPLATES;
507 - return ParseEngine::expand($text->childNodes, $this, $flags);
 515+ $flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES;
 516+ return $this->getPreprocessor()->newFrame()->expand( $text, $flags );
508517 }
509518
510519 /**
@@ -534,6 +543,17 @@
535544 }
536545
537546 /**
 547+ * Get a preprocessor object
 548+ */
 549+ function getPreprocessor() {
 550+ if ( !isset( $this->mPreprocessor ) ) {
 551+ $class = $this->mPreprocessorClass;
 552+ $this->mPreprocessor = new $class( $this );
 553+ }
 554+ return $this->mPreprocessor;
 555+ }
 556+
 557+ /**
538558 * Replaces all occurrences of HTML-style comments and the given tags
539559 * in the text with a random marker and returns the next text. The output
540560 * parameter $matches will be an associative array filled with data in
@@ -901,8 +921,8 @@
902922 $flag = 0;
903923 else
904924 $flag = Parser::PTD_FOR_INCLUSION;
905 - $dom = $this->mParseEngine->parse($text);
906 - $text = ParseEngine::expand( $dom, $this, $flag );
 925+ $dom = $this->preprocessToDom( $text, $flag );
 926+ $text = $frame->expand( $dom );
907927 }
908928 // if $frame is not provided, then use old-style replaceVariables
909929 else {
@@ -2050,9 +2070,11 @@
20512071 #
20522072 $textLines = StringUtils::explode( "\n", $text );
20532073
2054 - $output = '';
 2074+ $lastPrefix = $output = '';
20552075 $this->mDTopen = $inBlockElem = false;
 2076+ $prefixLength = 0;
20562077 $paragraphStack = false;
 2078+
20572079 foreach ( $textLines as $oLine ) {
20582080 # Fix up $linestart
20592081 if ( !$linestart ) {
@@ -2060,76 +2082,158 @@
20612083 $linestart = true;
20622084 continue;
20632085 }
 2086+ // * = ul
 2087+ // # = ol
20642088 // ; = dt
20652089 // : = dd
20662090
2067 - wfProfileIn( __METHOD__."-paragraph" );
2068 - // XXX: use a stack for nestable elements like span, table and div
 2091+ $lastPrefixLength = strlen( $lastPrefix );
20692092 $preCloseMatch = preg_match('/<\\/pre/i', $oLine );
20702093 $preOpenMatch = preg_match('/<pre/i', $oLine );
2071 - $openmatch = preg_match('/(?:<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<ol|<li|<\\/tr|<\\/td|<\\/th)/iS', $oLine );
2072 - $closematch = preg_match(
2073 - '/(?:<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
2074 - '<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<\\/?center)/iS', $oLine );
2075 - if ( $openmatch or $closematch ) {
 2094+ // If not in a <pre> element, scan for and figure out what prefixes are there.
 2095+ if ( !$this->mInPre ) {
 2096+ # Multiple prefixes may abut each other for nested lists.
 2097+ $prefixLength = strspn( $oLine, '*#:;' );
 2098+ $prefix = substr( $oLine, 0, $prefixLength );
 2099+
 2100+ # eh?
 2101+ // ; and : are both from definition-lists, so they're equivalent
 2102+ // for the purposes of determining whether or not we need to open/close
 2103+ // elements.
 2104+ $prefix2 = str_replace( ';', ':', $prefix );
 2105+ $t = substr( $oLine, $prefixLength );
 2106+ $this->mInPre = (bool)$preOpenMatch;
 2107+ } else {
 2108+ # Don't interpret any other prefixes in preformatted text
 2109+ $prefixLength = 0;
 2110+ $prefix = $prefix2 = '';
 2111+ $t = $oLine;
 2112+ }
 2113+
 2114+ # List generation
 2115+ if( $prefixLength && $lastPrefix === $prefix2 ) {
 2116+ # Same as the last item, so no need to deal with nesting or opening stuff
 2117+ $output .= $this->nextItem( substr( $prefix, -1 ) );
20762118 $paragraphStack = false;
2077 - # TODO bug 5718: paragraph closed
2078 - $output .= $this->closeParagraph();
2079 - if ( $preOpenMatch and !$preCloseMatch ) {
2080 - $this->mInPre = true;
 2119+
 2120+ if ( substr( $prefix, -1 ) === ';') {
 2121+ # The one nasty exception: definition lists work like this:
 2122+ # ; title : definition text
 2123+ # So we check for : in the remainder text to split up the
 2124+ # title and definition, without b0rking links.
 2125+ $term = $t2 = '';
 2126+ if ($this->findColonNoLinks($t, $term, $t2) !== false) {
 2127+ $t = $t2;
 2128+ $output .= $term . $this->nextItem( ':' );
 2129+ }
20812130 }
2082 - if ( $closematch ) {
2083 - $inBlockElem = false;
2084 - } else {
2085 - $inBlockElem = true;
 2131+ } elseif( $prefixLength || $lastPrefixLength ) {
 2132+ // We need to open or close prefixes, or both.
 2133+
 2134+ # Either open or close a level...
 2135+ $commonPrefixLength = $this->getCommon( $prefix, $lastPrefix );
 2136+ $paragraphStack = false;
 2137+
 2138+ // Close all the prefixes which aren't shared.
 2139+ while( $commonPrefixLength < $lastPrefixLength ) {
 2140+ $output .= $this->closeList( $lastPrefix[$lastPrefixLength-1] );
 2141+ --$lastPrefixLength;
20862142 }
2087 - } else if ( !$inBlockElem && !$this->mInPre ) {
2088 - if ( ' ' == substr( $oLine, 0, 1 ) and ( $this->mLastSection === 'pre' or trim($oLine) != '' ) ) {
2089 - // pre
2090 - if ($this->mLastSection !== 'pre') {
2091 - $paragraphStack = false;
2092 - $output .= $this->closeParagraph().'<pre>';
2093 - $this->mLastSection = 'pre';
 2143+
 2144+ // Continue the current prefix if appropriate.
 2145+ if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
 2146+ $output .= $this->nextItem( $prefix[$commonPrefixLength-1] );
 2147+ }
 2148+
 2149+ // Open prefixes where appropriate.
 2150+ while ( $prefixLength > $commonPrefixLength ) {
 2151+ $char = substr( $prefix, $commonPrefixLength, 1 );
 2152+ $output .= $this->openList( $char );
 2153+
 2154+ if ( ';' === $char ) {
 2155+ # FIXME: This is dupe of code above
 2156+ if ($this->findColonNoLinks($t, $term, $t2) !== false) {
 2157+ $t = $t2;
 2158+ $output .= $term . $this->nextItem( ':' );
 2159+ }
20942160 }
2095 - $oLine = substr( $oLine, 1 );
2096 - } else {
2097 - // paragraph
2098 - if ( trim($oLine) == '' ) {
2099 - if ( $paragraphStack ) {
2100 - $output .= $paragraphStack.'<br />';
 2161+ ++$commonPrefixLength;
 2162+ }
 2163+ $lastPrefix = $prefix2;
 2164+ }
 2165+
 2166+ // If we have no prefixes, go to paragraph mode.
 2167+ if( 0 == $prefixLength ) {
 2168+ wfProfileIn( __METHOD__."-paragraph" );
 2169+ # No prefix (not in list)--go to paragraph mode
 2170+ // XXX: use a stack for nestable elements like span, table and div
 2171+ $openmatch = preg_match('/(?:<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<ol|<li|<\\/tr|<\\/td|<\\/th)/iS', $t );
 2172+ $closematch = preg_match(
 2173+ '/(?:<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
 2174+ '<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<\\/?center)/iS', $t );
 2175+ if ( $openmatch or $closematch ) {
 2176+ $paragraphStack = false;
 2177+ # TODO bug 5718: paragraph closed
 2178+ $output .= $this->closeParagraph();
 2179+ if ( $preOpenMatch and !$preCloseMatch ) {
 2180+ $this->mInPre = true;
 2181+ }
 2182+ if ( $closematch ) {
 2183+ $inBlockElem = false;
 2184+ } else {
 2185+ $inBlockElem = true;
 2186+ }
 2187+ } else if ( !$inBlockElem && !$this->mInPre ) {
 2188+ if ( ' ' == substr( $t, 0, 1 ) and ( $this->mLastSection === 'pre' or trim($t) != '' ) ) {
 2189+ // pre
 2190+ if ($this->mLastSection !== 'pre') {
21012191 $paragraphStack = false;
2102 - $this->mLastSection = 'p';
2103 - } else {
2104 - if ($this->mLastSection !== 'p' ) {
2105 - $output .= $this->closeParagraph();
2106 - $this->mLastSection = '';
2107 - $paragraphStack = '<p>';
 2192+ $output .= $this->closeParagraph().'<pre>';
 2193+ $this->mLastSection = 'pre';
 2194+ }
 2195+ $t = substr( $t, 1 );
 2196+ } else {
 2197+ // paragraph
 2198+ if ( trim($t) == '' ) {
 2199+ if ( $paragraphStack ) {
 2200+ $output .= $paragraphStack.'<br />';
 2201+ $paragraphStack = false;
 2202+ $this->mLastSection = 'p';
21082203 } else {
2109 - $paragraphStack = '</p><p>';
 2204+ if ($this->mLastSection !== 'p' ) {
 2205+ $output .= $this->closeParagraph();
 2206+ $this->mLastSection = '';
 2207+ $paragraphStack = '<p>';
 2208+ } else {
 2209+ $paragraphStack = '</p><p>';
 2210+ }
21102211 }
 2212+ } else {
 2213+ if ( $paragraphStack ) {
 2214+ $output .= $paragraphStack;
 2215+ $paragraphStack = false;
 2216+ $this->mLastSection = 'p';
 2217+ } else if ($this->mLastSection !== 'p') {
 2218+ $output .= $this->closeParagraph().'<p>';
 2219+ $this->mLastSection = 'p';
 2220+ }
21112221 }
2112 - } else {
2113 - if ( $paragraphStack ) {
2114 - $output .= $paragraphStack;
2115 - $paragraphStack = false;
2116 - $this->mLastSection = 'p';
2117 - } else if ($this->mLastSection !== 'p') {
2118 - $output .= $this->closeParagraph().'<p>';
2119 - $this->mLastSection = 'p';
2120 - }
21212222 }
21222223 }
 2224+ wfProfileOut( __METHOD__."-paragraph" );
21232225 }
2124 - wfProfileOut( __METHOD__."-paragraph" );
2125 -
21262226 // somewhere above we forget to get out of pre block (bug 785)
21272227 if($preCloseMatch && $this->mInPre) {
21282228 $this->mInPre = false;
21292229 }
21302230 if ($paragraphStack === false) {
2131 - $output .= $oLine."\n";
 2231+ $output .= $t."\n";
21322232 }
21332233 }
 2234+ while ( $prefixLength ) {
 2235+ $output .= $this->closeList( $prefix2[$prefixLength-1] );
 2236+ --$prefixLength;
 2237+ }
21342238 if ( $this->mLastSection != '' ) {
21352239 $output .= '</' . $this->mLastSection . '>';
21362240 $this->mLastSection = '';
@@ -2620,6 +2724,33 @@
26212725 wfProfileOut( __METHOD__ );
26222726 }
26232727
 2728+ /**
 2729+ * Preprocess some wikitext and return the document tree.
 2730+ * This is the ghost of replace_variables().
 2731+ *
 2732+ * @param string $text The text to parse
 2733+ * @param integer flags Bitwise combination of:
 2734+ * self::PTD_FOR_INCLUSION Handle <noinclude>/<includeonly> as if the text is being
 2735+ * included. Default is to assume a direct page view.
 2736+ *
 2737+ * The generated DOM tree must depend only on the input text and the flags.
 2738+ * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of bug 4899.
 2739+ *
 2740+ * Any flag added to the $flags parameter here, or any other parameter liable to cause a
 2741+ * change in the DOM tree for a given text, must be passed through the section identifier
 2742+ * in the section edit link and thus back to extractSections().
 2743+ *
 2744+ * The output of this function is currently only cached in process memory, but a persistent
 2745+ * cache may be implemented at a later date which takes further advantage of these strict
 2746+ * dependency requirements.
 2747+ *
 2748+ * @private
 2749+ */
 2750+ function preprocessToDom ( $text, $flags = 0 ) {
 2751+ $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
 2752+ return $dom;
 2753+ }
 2754+
26242755 /*
26252756 * Return a three-element array: leading whitespace, string contents, trailing whitespace
26262757 */
@@ -2647,18 +2778,30 @@
26482779 * self::OT_HTML: all templates and extension tags
26492780 *
26502781 * @param string $tex The text to transform
 2782+ * @param PPFrame $frame Object describing the arguments passed to the template.
 2783+ * Arguments may also be provided as an associative array, as was the usual case before MW1.12.
 2784+ * Providing arguments this way may be useful for extensions wishing to perform variable replacement explicitly.
 2785+ * @param bool $argsOnly Only do argument (triple-brace) expansion, not double-brace expansion
26512786 * @private
26522787 */
2653 - function replaceVariables( $text ) {
 2788+ function replaceVariables( $text, $frame = false, $argsOnly = false ) {
26542789 # Is there any text? Also, Prevent too big inclusions!
26552790 if ( strlen( $text ) < 1 || strlen( $text ) > $this->mOptions->getMaxIncludeSize() ) {
26562791 return $text;
26572792 }
26582793 wfProfileIn( __METHOD__ );
26592794
2660 - $dom = $this->mParseEngine->parse($text);
2661 - $text = ParseEngine::expand($dom->childNodes, $this);
 2795+ if ( $frame === false ) {
 2796+ $frame = $this->getPreprocessor()->newFrame();
 2797+ } elseif ( !( $frame instanceof PPFrame ) ) {
 2798+ wfDebug( __METHOD__." called using plain parameters instead of a PPFrame instance. Creating custom frame.\n" );
 2799+ $frame = $this->getPreprocessor()->newCustomFrame($frame);
 2800+ }
26622801
 2802+ $dom = $this->preprocessToDom( $text );
 2803+ $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
 2804+ $text = $frame->expand( $dom, $flags );
 2805+
26632806 wfProfileOut( __METHOD__ );
26642807 return $text;
26652808 }
@@ -2712,11 +2855,11 @@
27132856 * $piece['title']: the title, i.e. the part before the |
27142857 * $piece['parts']: the parameter array
27152858 * $piece['lineStart']: whether the brace was at the start of a line
 2859+ * @param PPFrame The current frame, contains template arguments
27162860 * @return string the text of the template
27172861 * @private
27182862 */
2719 - function templateSubstitution($inNode, &$outText, $flags = 0) {
2720 - return FALSE;
 2863+ function braceSubstitution( $piece, $frame ) {
27212864 global $wgContLang, $wgNonincludableNamespaces;
27222865 wfProfileIn( __METHOD__ );
27232866 wfProfileIn( __METHOD__.'-setup' );
@@ -2730,12 +2873,11 @@
27312874 $isLocalObj = false; # $text is a DOM node needing expansion in the current frame
27322875
27332876 # Title object, where $text came from
2734 - $xpath = new DOMXPath($template->ownerDocument);
27352877 $title = null;
27362878
27372879 # $part1 is the bit before the first |, and must contain only title characters.
27382880 # Various prefixes will be stripped from it later.
2739 - $titleWithSpaces = $xpath->query("title", $template)->item(0)->textContent;
 2881+ $titleWithSpaces = $frame->expand( $piece['title'] );
27402882 $part1 = trim( $titleWithSpaces );
27412883 $titleText = false;
27422884
@@ -2743,10 +2885,7 @@
27442886 $originalTitle = $part1;
27452887
27462888 # $args is a list of argument nodes, starting from index 0, not including $part1
2747 - $args = array();
2748 - foreach ($xpath->query("part", $template) as $part) {
2749 - $args[] = $part;
2750 - }
 2889+ $args = (null == $piece['parts']) ? array() : $piece['parts'];
27512890 wfProfileOut( __METHOD__.'-setup' );
27522891
27532892 # SUBST
@@ -2761,15 +2900,14 @@
27622901 # safesubst || (subst && PST) || (false && !PST) => transclude (skip the if)
27632902 # (false && PST) || (subst && !PST) => return input (handled by if)
27642903 if ( $substMatch != 'safesubst' && ($substMatch == 'subst' xor $this->ot['wiki']) ) {
2765 - $outText = ParseEngine::unparse($template);
2766 - $template->parentNode->replaceChild($template->ownerDocument->createTextNode($outText), $template);
 2904+ $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
27672905 $isLocalObj = true;
27682906 $found = true;
27692907 }
27702908 }
27712909
27722910 # Variables
2773 - if ( !$found && $args->length == 0 ) {
 2911+ if ( !$found && $args->getLength() == 0 ) {
27742912 $id = $this->mVariables->matchStartToEnd( $part1 );
27752913 if ( $id !== false ) {
27762914 $text = $this->getVariableValue( $id, $frame );
@@ -2826,12 +2964,14 @@
28272965 # Add a frame parameter, and pass the arguments as an array
28282966 $allArgs = $initialArgs;
28292967 $allArgs[] = $frame;
2830 - $funcArgs = array_merge( $funcArgs, $args );
 2968+ for ( $i = 0; $i < $args->getLength(); $i++ ) {
 2969+ $funcArgs[] = $args->item( $i );
 2970+ }
28312971 $allArgs[] = $funcArgs;
28322972 } else {
28332973 # Convert arguments to plain text
2834 - foreach ($args as $arg) {
2835 - $funcArgs[] = substr(ParseEngine::unparse($arg), 1);
 2974+ for ( $i = 0; $i < $args->getLength(); $i++ ) {
 2975+ $funcArgs[] = trim( $frame->expand( $args->item( $i ) ) );
28362976 }
28372977 $allArgs = array_merge( $initialArgs, $funcArgs );
28382978 }
@@ -2860,7 +3000,7 @@
28613001 $text = $result;
28623002 }
28633003 if ( !$noparse ) {
2864 - $text = $this->mParseEngine->parse($text);
 3004+ $text = $this->preprocessToDom( $text, $preprocessFlags );
28653005 $isChildObj = true;
28663006 }
28673007 }
@@ -2929,7 +3069,7 @@
29303070 } else {
29313071 $text = $this->interwikiTransclude( $title, 'raw' );
29323072 // Preprocess it like a template
2933 - $text = $this->mParseEngine->parse($text);
 3073+ $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
29343074 $isChildObj = true;
29353075 }
29363076 $found = true;
@@ -2948,9 +3088,9 @@
29493089 # If we haven't found text to substitute by now, we're done
29503090 # Recover the source wikitext and return it
29513091 if ( !$found ) {
2952 - $outText = ParseEngine::unparse($template);
2953 - $template->parentNode->replaceChild($template->ownerDocument->createTextNode($outText), $template);
 3092+ $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
29543093 wfProfileOut( __METHOD__ );
 3094+ return array( 'object' => $text );
29553095 }
29563096
29573097 # Expand DOM-style return values in a child frame
@@ -2959,22 +3099,22 @@
29603100 $newFrame = $frame->newChild( $args, $title );
29613101
29623102 if ( $nowiki ) {
2963 - $text = ParseEngine::expand( $text, self::RECOVER_ORIG );
 3103+ $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
29643104 } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
29653105 # Expansion is eligible for the empty-frame cache
29663106 if ( isset( $this->mTplExpandCache[$titleText] ) ) {
29673107 $text = $this->mTplExpandCache[$titleText];
29683108 } else {
2969 - $text = ParseEngine::expand( $text, self::PTD_FOR_INCLUSION );
 3109+ $text = $newFrame->expand( $text );
29703110 $this->mTplExpandCache[$titleText] = $text;
29713111 }
29723112 } else {
29733113 # Uncached expansion
2974 - $text = ParseEngine::expand( $text );
 3114+ $text = $newFrame->expand( $text );
29753115 }
29763116 }
29773117 if ( $isLocalObj && $nowiki ) {
2978 - $text = ParseEngine::expand( $text, self::RECOVER_ORIG );
 3118+ $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
29793119 $isLocalObj = false;
29803120 }
29813121
@@ -3001,11 +3141,15 @@
30023142 $this->insertStripItem( '<!-- WARNING: template omitted, post-expand include size too large -->' );
30033143 $this->limitationWarn( 'post-expand-template-inclusion' );
30043144 }
3005 - if ($template->parentNode != NULL) {
3006 - $template->parentNode->replaceChild($template->ownerDocument->createTextNode($text), $template);
 3145+
 3146+ if ( $isLocalObj ) {
 3147+ $ret = array( 'object' => $text );
 3148+ } else {
 3149+ $ret = array( 'text' => $text );
30073150 }
30083151
30093152 wfProfileOut( __METHOD__ );
 3153+ return $ret;
30103154 }
30113155
30123156 /**
@@ -3033,7 +3177,7 @@
30343178 return array( false, $title );
30353179 }
30363180
3037 - $dom = $this->mParseEngine->parse($text);
 3181+ $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
30383182 $this->mTplDomCache[ $titleText ] = $dom;
30393183
30403184 if (! $title->equals($cacheTitle)) {
@@ -3170,16 +3314,16 @@
31713315 * Triple brace replacement -- used for template arguments
31723316 * @private
31733317 */
3174 - function tplargSubstitution($inNode, &$outText, $flags = 0) {
 3318+ function argSubstitution( $piece, $frame ) {
31753319 wfProfileIn( __METHOD__ );
31763320
3177 - $xpath = new DOMXPath($tplArg->ownerDocument);
3178 - $parts = $xpath->query("part", $tplArg);
3179 - $nameWithSpaces = $xpath->query("title", $tplArg)->item(0)->textContent;
 3321+ $error = false;
 3322+ $parts = $piece['parts'];
 3323+ $nameWithSpaces = $frame->expand( $piece['title'] );
31803324 $argName = trim( $nameWithSpaces );
31813325 $object = false;
31823326 $text = $frame->getArgument( $argName );
3183 - if ( $text === false && $parts->length > 0
 3327+ if ( $text === false && $parts->getLength() > 0
31843328 && (
31853329 $this->ot['html']
31863330 || $this->ot['pre']
@@ -3187,18 +3331,28 @@
31883332 )
31893333 ) {
31903334 # No match in frame, use the supplied default
3191 - $text = $parts->item( 0 )->firstChild->textContent;
 3335+ $object = $parts->item( 0 )->getChildren();
31923336 }
31933337 if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
3194 - $text .= '<!-- WARNING: argument omitted, expansion size too large -->';
 3338+ $error = '<!-- WARNING: argument omitted, expansion size too large -->';
31953339 $this->limitationWarn( 'post-expand-template-argument' );
31963340 }
3197 - if ($text == NULL) {
3198 - $text = ParseEngine::unparse($tplArg);
 3341+
 3342+ if ( $text === false && $object === false ) {
 3343+ # No match anywhere
 3344+ $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
31993345 }
3200 - $tplArg->parentNode->replaceChild($tplArg->ownerDocument->createTextNode($text), $tplArg);
 3346+ if ( $error !== false ) {
 3347+ $text .= $error;
 3348+ }
 3349+ if ( $object !== false ) {
 3350+ $ret = array( 'object' => $object );
 3351+ } else {
 3352+ $ret = array( 'text' => $text );
 3353+ }
32013354
32023355 wfProfileOut( __METHOD__ );
 3356+ return $ret;
32033357 }
32043358
32053359 /**
@@ -3211,110 +3365,86 @@
32123366 * attributes Optional associative array of parsed attributes
32133367 * inner Contents of extension element
32143368 * noClose Original text did not have a close tag
 3369+ * @param PPFrame $frame
32153370 */
3216 - function xmltagSubstitution($inNode, &$outText, $flags = 0) {
 3371+ function extensionSubstitution( $params, $frame ) {
32173372 global $wgRawHtml, $wgContLang;
32183373
3219 - $xpath = new DOMXPath($inNode->ownerDocument);
3220 - $name = $xpath->query("name", $inNode)->item(0)->getAttribute("tag");
3221 - $name = strtolower( $name );
3222 - $isFunctionTag = isset( $this->mFunctionTagHooks[$name] ) && ( $this->ot['html'] || $this->ot['pre'] );
3223 - $retCode = $this->ot['html'] || $isFunctionTag;
3224 - if ($retCode) {
3225 - $inner = $xpath->query("inner", $inNode);
3226 - $content = $inner->length == 0 ? NULL : ParseEngine::expand($inner->item(0)->childNodes, $this);
3227 - $attributes = $xpath->query("attribute", $inNode);
 3374+ $name = $frame->expand( $params['name'] );
 3375+ $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
 3376+ $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
 3377+ $marker = "{$this->mUniqPrefix}-$name-" . sprintf('%08X', $this->mMarkerIndex++) . self::MARKER_SUFFIX;
32283378
 3379+ $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower($name)] ) &&
 3380+ ( $this->ot['html'] || $this->ot['pre'] );
 3381+ if ( $isFunctionTag ) {
 3382+ $markerType = 'none';
 3383+ } else {
 3384+ $markerType = 'general';
 3385+ }
 3386+ if ( $this->ot['html'] || $isFunctionTag ) {
 3387+ $name = strtolower( $name );
 3388+ $attributes = Sanitizer::decodeTagAttributes( $attrText );
 3389+ if ( isset( $params['attributes'] ) ) {
 3390+ $attributes = $attributes + $params['attributes'];
 3391+ }
 3392+
32293393 if( isset( $this->mTagHooks[$name] ) ) {
32303394 # Workaround for PHP bug 35229 and similar
32313395 if ( !is_callable( $this->mTagHooks[$name] ) ) {
32323396 throw new MWException( "Tag hook for $name is not callable\n" );
32333397 }
3234 - $outText = call_user_func_array( $this->mTagHooks[$name],
3235 - array($content, $attributes, $this));
 3398+ $output = call_user_func_array( $this->mTagHooks[$name],
 3399+ array( $content, $attributes, $this, $frame ) );
32363400 } elseif( isset( $this->mFunctionTagHooks[$name] ) ) {
32373401 list( $callback, $flags ) = $this->mFunctionTagHooks[$name];
32383402 if( !is_callable( $callback ) )
32393403 throw new MWException( "Tag hook for $name is not callable\n" );
32403404
3241 - $outText = call_user_func_array( $callback,
 3405+ $output = call_user_func_array( $callback,
32423406 array( &$this, $frame, $content, $attributes ) );
32433407 } else {
3244 - $outText = '<span class="error">Invalid tag extension name: ' .
 3408+ $output = '<span class="error">Invalid tag extension name: ' .
32453409 htmlspecialchars( $name ) . '</span>';
32463410 }
32473411
3248 - if ( is_array( $outText ) ) {
3249 - $outText = $outText[0];
 3412+ if ( is_array( $output ) ) {
 3413+ // Extract flags to local scope (to override $markerType)
 3414+ $flags = $output;
 3415+ $output = $flags[0];
 3416+ unset( $flags[0] );
 3417+ extract( $flags );
32503418 }
3251 - }
3252 -
3253 - return $retCode;
3254 - }
3255 -
3256 - function onlyincludeSubstitution($inNode, &$outText, $flags = 0) {
3257 - return FALSE;
3258 - }
3259 -
3260 - function commentSubstitution($inNode, &$outText, $flags = 0) {
3261 - $comment = $contextNode->getAttribute("startTag");
3262 - # HTML-style comment
3263 - # Remove it in HTML, pre+remove and STRIP_COMMENTS modes
3264 - if ( $this->parser->ot['html']
3265 - || ( $this->parser->ot['pre'] && $this->parser->mOptions->getRemoveComments() )
3266 - || ( $flags & self::STRIP_COMMENTS ) ) {
3267 - if ($comment[0] == "\n" || $comment[strlen($comment) - 1] == "\n") {
3268 - $contextNode->parentNode->replaceChild($contextNode->ownerDocument->createTextNode("\n"), $contextNode);
 3419+ } else {
 3420+ if ( is_null( $attrText ) ) {
 3421+ $attrText = '';
 3422+ }
 3423+ if ( isset( $params['attributes'] ) ) {
 3424+ foreach ( $params['attributes'] as $attrName => $attrValue ) {
 3425+ $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
 3426+ htmlspecialchars( $attrValue ) . '"';
 3427+ }
 3428+ }
 3429+ if ( $content === null ) {
 3430+ $output = "<$name$attrText/>";
32693431 } else {
3270 - $contextNode->parentNode->removeChild($contextNode);
 3432+ $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
 3433+ $output = "<$name$attrText>$content$close";
32713434 }
32723435 }
3273 - # Add a strip marker in PST mode so that pstPass2() can run some old-fashioned regexes on the result
3274 - # Not in RECOVER_COMMENTS mode (extractSections) though
3275 - elseif ( $this->parser->ot['wiki'] && ! ( $flags & self::RECOVER_COMMENTS ) ) {
3276 - $outText = $this->parser->insertStripItem($contextNode->getAttribute("startTag"));
3277 - $contextNode->parentNode->replaceChild($contextNode->ownerDocument->createTextNode($outText), $contextNode);
 3436+
 3437+ if( $markerType === 'none' ) {
 3438+ return $output;
 3439+ } elseif ( $markerType === 'nowiki' ) {
 3440+ $this->mStripState->nowiki->setPair( $marker, $output );
 3441+ } elseif ( $markerType === 'general' ) {
 3442+ $this->mStripState->general->setPair( $marker, $output );
 3443+ } else {
 3444+ throw new MWException( __METHOD__.': invalid marker type' );
32783445 }
3279 - # Recover the literal comment in RECOVER_COMMENTS and pre+no-remove
3280 - else {
3281 - $contextNode->parentNode->replaceChild($contextNode->ownerDocument->createTextNode($comment), $contextNode);
3282 - }
 3446+ return $marker;
32833447 }
32843448
3285 - function newlineSubstitution($inNode, &$outText, $flags = 0) {
3286 - return FALSE;
3287 - }
3288 -
3289 - function hSubstitution($inNode, &$outText, $flags = 0) {
3290 - # Insert a heading marker only for <h> children of <root>
3291 - # This is to stop extractSections from going over multiple tree levels
3292 - # Insert heading index marker
3293 - $this->expandRec($contextNode->childNodes, $flags, $headingIndex);
3294 - $titleText = $this->title->getPrefixedDBkey();
3295 - $this->parser->mHeadings[] = array( $titleText, $headingIndex );
3296 - $serial = count( $this->parser->mHeadings ) - 1;
3297 - $marker = "{$this->parser->mUniqPrefix}-h-$serial-" . Parser::MARKER_SUFFIX;
3298 - $this->parser->mStripState->general->setPair( $marker, '' );
3299 - $outText = $contextNode->getAttribute("startTag") . $marker . $contextNode->firstChild->wholeText . $contextNode->getAttribute("endTag");
3300 - $contextNode->parentNode->replaceChild($contextNode->ownerDocument->createTextNode($outText), $contextNode);
3301 - $headingIndex ++;
3302 - }
3303 -
3304 - function orderedListSubstitution($inNode, &$outText, $flags = 0) {
3305 - $outText = "<ol>" . ParseEngine::expand($inNode->childNodes, $this, $flags) . "</ol>";
3306 - return TRUE;
3307 - }
3308 -
3309 - function unorderedListSubstitution($inNode, &$outText, $flags = 0) {
3310 - $outText = "<ul>" . ParseEngine::expand($inNode->childNodes, $this, $flags) . "</ul>";
3311 - return TRUE;
3312 - }
3313 -
3314 - function listItemSubstitution($inNode, &$outText, $flags = 0) {
3315 - $outText = "<li>" . ParseEngine::expand($inNode->childNodes, $this, $flags) . "</li>";
3316 - return TRUE;
3317 - }
3318 -
33193449 /**
33203450 * Increment an include size counter
33213451 *
@@ -3501,8 +3631,9 @@
35023632 $baseTitleText = $this->mTitle->getPrefixedDBkey();
35033633 $oldType = $this->mOutputType;
35043634 $this->setOutputType( self::OT_WIKI );
3505 - $root = $this->mParseEngine->parse($origText);
3506 - $node = $root->firstChild;
 3635+ $frame = $this->getPreprocessor()->newFrame();
 3636+ $root = $this->preprocessToDom( $origText );
 3637+ $node = $root->getFirstChild();
35073638 $byteOffset = 0;
35083639 $tocraw = array();
35093640
@@ -3679,14 +3810,14 @@
36803811 # Add the section to the section tree
36813812 # Find the DOM node for this header
36823813 while ( $node && !$isTemplate ) {
3683 - if ( $node->nodeName === 'h' ) {
 3814+ if ( $node->getName() === 'h' ) {
36843815 $bits = $node->splitHeading();
36853816 if ( $bits['i'] == $sectionIndex )
36863817 break;
36873818 }
36883819 $byteOffset += mb_strlen( $this->mStripState->unstripBoth(
3689 - ParseEngine::expand( $node, $this, self::RECOVER_ORIG ) ) );
3690 - $node = $node->nextSibling;
 3820+ $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
 3821+ $node = $node->getNextSibling();
36913822 }
36923823 $tocraw[] = array(
36933824 'toclevel' => $toclevel,
@@ -4064,8 +4195,9 @@
40654196
40664197 $text = preg_replace( $substRegex, $substText, $text );
40674198 $text = $this->cleanSigInSig( $text );
4068 - $dom = $this->mParseEngine->parse($text);
4069 - $text = ParseEngine::expand( $dom, $this );
 4199+ $dom = $this->preprocessToDom( $text );
 4200+ $frame = $this->getPreprocessor()->newFrame();
 4201+ $text = $frame->expand( $dom );
40704202
40714203 if ( !$parsing ) {
40724204 $text = $this->mStripState->unstripBoth( $text );
@@ -4194,6 +4326,10 @@
41954327 * branches and thus speed up parsing. It is also possible to analyse the parse tree of
41964328 * the arguments, and to control the way they are expanded.
41974329 *
 4330+ * The $frame parameter is a PPFrame. This can be used to produce expanded text from the
 4331+ * arguments, for instance:
 4332+ * $text = isset( $args[0] ) ? $frame->expand( $args[0] ) : '';
 4333+ *
41984334 * For technical reasons, $args[0] is pre-expanded and will be a string. This may change in
41994335 * future versions. Please call $frame->expand() on it anyway so that your code keeps
42004336 * working if/when this is changed.
@@ -4201,6 +4337,9 @@
42024338 * If you want whitespace to be trimmed from $args, you need to do it yourself, post-
42034339 * expansion.
42044340 *
 4341+ * Please read the documentation in includes/parser/Preprocessor.php for more information
 4342+ * about the methods available in PPFrame and PPNode.
 4343+ *
42054344 * @return The old callback function for this name, if any
42064345 */
42074346 function setFunctionHook( $id, $callback, $flags = 0 ) {
@@ -4636,11 +4775,12 @@
46374776 * Callback from the Sanitizer for expanding items found in HTML attribute
46384777 * values, so they can be safely tested and escaped.
46394778 * @param string $text
 4779+ * @param PPFrame $frame
46404780 * @return string
46414781 * @private
46424782 */
4643 - function attributeStripCallback( &$text ) {
4644 - $text = $this->replaceVariables( $text );
 4783+ function attributeStripCallback( &$text, $frame = false ) {
 4784+ $text = $this->replaceVariables( $text, $frame );
46454785 $text = $this->mStripState->unstripBoth( $text );
46464786 return $text;
46474787 }
@@ -4694,6 +4834,7 @@
46954835 $this->mOptions = new ParserOptions;
46964836 $this->setOutputType( self::OT_WIKI );
46974837 $outText = '';
 4838+ $frame = $this->getPreprocessor()->newFrame();
46984839
46994840 // Process section extraction flags
47004841 $flags = 0;
@@ -4705,30 +4846,29 @@
47064847 }
47074848 }
47084849 // Preprocess the text
4709 - $root = $this->mParseEngine->parse($text);
 4850+ $root = $this->preprocessToDom( $text, $flags );
47104851
47114852 // <h> nodes indicate section breaks
47124853 // They can only occur at the top level, so we can find them by iterating the root's children
4713 - $node = $root->firstChild->firstChild;
 4854+ $node = $root->getFirstChild();
47144855
47154856 // Find the target section
4716 - $ind = 1;
47174857 if ( $sectionIndex == 0 ) {
47184858 // Section zero doesn't nest, level=big
47194859 $targetLevel = 1000;
47204860 } else {
4721 - while ( $node ) {
4722 - if ( $node->nodeName === 'h' ) {
4723 - if ( $ind == $sectionIndex ) {
4724 - $targetLevel = strlen($node->getAttribute("endTag"));
 4861+ while ( $node ) {
 4862+ if ( $node->getName() === 'h' ) {
 4863+ $bits = $node->splitHeading();
 4864+ if ( $bits['i'] == $sectionIndex ) {
 4865+ $targetLevel = $bits['level'];
47254866 break;
47264867 }
4727 - $ind ++;
47284868 }
47294869 if ( $mode === 'replace' ) {
4730 - $outText .= ParseEngine::unparse($node);
 4870+ $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
47314871 }
4732 - $node = $node->nextSibling;
 4872+ $node = $node->getNextSibling();
47334873 }
47344874 }
47354875
@@ -4743,17 +4883,17 @@
47444884
47454885 // Find the end of the section, including nested sections
47464886 do {
4747 - if ( $node->nodeName === 'h' ) {
4748 - $curLevel = strlen($node->getAttribute("endTag"));
4749 - if ( $ind != $sectionIndex && $curLevel <= $targetLevel ) {
 4887+ if ( $node->getName() === 'h' ) {
 4888+ $bits = $node->splitHeading();
 4889+ $curLevel = $bits['level'];
 4890+ if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
47504891 break;
47514892 }
4752 - $ind ++;
47534893 }
47544894 if ( $mode === 'get' ) {
4755 - $outText .= ParseEngine::unparse($node);
 4895+ $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
47564896 }
4757 - $node = $node->nextSibling;
 4897+ $node = $node->getNextSibling();
47584898 } while ( $node );
47594899
47604900 // Write out the remainder (in replace mode only)
@@ -4767,8 +4907,8 @@
47684908 }
47694909
47704910 while ( $node ) {
4771 - $outText .= ParseEngine::expand( $node, $this, self::RECOVER_ORIG );
4772 - $node = $node->nextSibling;
 4911+ $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
 4912+ $node = $node->getNextSibling();
47734913 }
47744914 }
47754915
@@ -4776,9 +4916,6 @@
47774917 // Re-insert stripped tags
47784918 $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
47794919 }
4780 - if ($outText[0] == "\n") {
4781 - $outText = substr($outText, 1);
4782 - }
47834920
47844921 return $outText;
47854922 }
Index: branches/parser-work/phase3/includes/parser/Preprocessor_DOM.php
@@ -0,0 +1,1509 @@
 2+<?php
 3+
 4+/**
 5+ * @ingroup Parser
 6+ */
 7+class Preprocessor_DOM implements Preprocessor {
 8+ var $parser, $memoryLimit;
 9+
 10+ const CACHE_VERSION = 1;
 11+
 12+ function __construct( $parser ) {
 13+ $this->parser = $parser;
 14+ $mem = ini_get( 'memory_limit' );
 15+ $this->memoryLimit = false;
 16+ if ( strval( $mem ) !== '' && $mem != -1 ) {
 17+ if ( preg_match( '/^\d+$/', $mem ) ) {
 18+ $this->memoryLimit = $mem;
 19+ } elseif ( preg_match( '/^(\d+)M$/i', $mem, $m ) ) {
 20+ $this->memoryLimit = $m[1] * 1048576;
 21+ }
 22+ }
 23+ }
 24+
 25+ function newFrame() {
 26+ return new PPFrame_DOM( $this );
 27+ }
 28+
 29+ function newCustomFrame( $args ) {
 30+ return new PPCustomFrame_DOM( $this, $args );
 31+ }
 32+
 33+ function memCheck() {
 34+ if ( $this->memoryLimit === false ) {
 35+ return;
 36+ }
 37+ $usage = memory_get_usage();
 38+ if ( $usage > $this->memoryLimit * 0.9 ) {
 39+ $limit = intval( $this->memoryLimit * 0.9 / 1048576 + 0.5 );
 40+ throw new MWException( "Preprocessor hit 90% memory limit ($limit MB)" );
 41+ }
 42+ return $usage <= $this->memoryLimit * 0.8;
 43+ }
 44+
 45+ /**
 46+ * Preprocess some wikitext and return the document tree.
 47+ * This is the ghost of Parser::replace_variables().
 48+ *
 49+ * @param string $text The text to parse
 50+ * @param integer flags Bitwise combination of:
 51+ * Parser::PTD_FOR_INCLUSION Handle <noinclude>/<includeonly> as if the text is being
 52+ * included. Default is to assume a direct page view.
 53+ *
 54+ * The generated DOM tree must depend only on the input text and the flags.
 55+ * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of bug 4899.
 56+ *
 57+ * Any flag added to the $flags parameter here, or any other parameter liable to cause a
 58+ * change in the DOM tree for a given text, must be passed through the section identifier
 59+ * in the section edit link and thus back to extractSections().
 60+ *
 61+ * The output of this function is currently only cached in process memory, but a persistent
 62+ * cache may be implemented at a later date which takes further advantage of these strict
 63+ * dependency requirements.
 64+ *
 65+ * @private
 66+ */
 67+ function preprocessToObj( $text, $flags = 0 ) {
 68+ wfProfileIn( __METHOD__ );
 69+ global $wgMemc, $wgPreprocessorCacheThreshold;
 70+
 71+ $xml = false;
 72+ $cacheable = strlen( $text ) > $wgPreprocessorCacheThreshold;
 73+ if ( $cacheable ) {
 74+ wfProfileIn( __METHOD__.'-cacheable' );
 75+
 76+ $cacheKey = wfMemcKey( 'preprocess-xml', md5($text), $flags );
 77+ $cacheValue = $wgMemc->get( $cacheKey );
 78+ if ( $cacheValue ) {
 79+ $version = substr( $cacheValue, 0, 8 );
 80+ if ( intval( $version ) == self::CACHE_VERSION ) {
 81+ $xml = substr( $cacheValue, 8 );
 82+ // From the cache
 83+ wfDebugLog( "Preprocessor", "Loaded preprocessor XML from memcached (key $cacheKey)" );
 84+ }
 85+ }
 86+ }
 87+ if ( $xml === false ) {
 88+ if ( $cacheable ) {
 89+ wfProfileIn( __METHOD__.'-cache-miss' );
 90+ $xml = $this->preprocessToXml( $text, $flags );
 91+ $cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . $xml;
 92+ $wgMemc->set( $cacheKey, $cacheValue, 86400 );
 93+ wfProfileOut( __METHOD__.'-cache-miss' );
 94+ wfDebugLog( "Preprocessor", "Saved preprocessor XML to memcached (key $cacheKey)" );
 95+ } else {
 96+ $xml = $this->preprocessToXml( $text, $flags );
 97+ }
 98+
 99+ }
 100+ wfProfileIn( __METHOD__.'-loadXML' );
 101+ $dom = new DOMDocument;
 102+ wfSuppressWarnings();
 103+ $result = $dom->loadXML( $xml );
 104+ wfRestoreWarnings();
 105+ if ( !$result ) {
 106+ // Try running the XML through UtfNormal to get rid of invalid characters
 107+ $xml = UtfNormal::cleanUp( $xml );
 108+ $result = $dom->loadXML( $xml );
 109+ if ( !$result ) {
 110+ throw new MWException( __METHOD__.' generated invalid XML' );
 111+ }
 112+ }
 113+ $obj = new PPNode_DOM( $dom->documentElement );
 114+ wfProfileOut( __METHOD__.'-loadXML' );
 115+ if ( $cacheable ) {
 116+ wfProfileOut( __METHOD__.'-cacheable' );
 117+ }
 118+ wfProfileOut( __METHOD__ );
 119+ return $obj;
 120+ }
 121+
 122+ function preprocessToXml( $text, $flags = 0 ) {
 123+ wfProfileIn( __METHOD__ );
 124+ $rules = array(
 125+ '{' => array(
 126+ 'end' => '}',
 127+ 'names' => array(
 128+ 2 => 'template',
 129+ 3 => 'tplarg',
 130+ ),
 131+ 'min' => 2,
 132+ 'max' => 3,
 133+ ),
 134+ '[' => array(
 135+ 'end' => ']',
 136+ 'names' => array( 2 => null ),
 137+ 'min' => 2,
 138+ 'max' => 2,
 139+ )
 140+ );
 141+
 142+ $forInclusion = $flags & Parser::PTD_FOR_INCLUSION;
 143+
 144+ $xmlishElements = $this->parser->getStripList();
 145+ $enableOnlyinclude = false;
 146+ if ( $forInclusion ) {
 147+ $ignoredTags = array( 'includeonly', '/includeonly' );
 148+ $ignoredElements = array( 'noinclude' );
 149+ $xmlishElements[] = 'noinclude';
 150+ if ( strpos( $text, '<onlyinclude>' ) !== false && strpos( $text, '</onlyinclude>' ) !== false ) {
 151+ $enableOnlyinclude = true;
 152+ }
 153+ } else {
 154+ $ignoredTags = array( 'noinclude', '/noinclude', 'onlyinclude', '/onlyinclude' );
 155+ $ignoredElements = array( 'includeonly' );
 156+ $xmlishElements[] = 'includeonly';
 157+ }
 158+ $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
 159+
 160+ // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset
 161+ $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA";
 162+
 163+ $stack = new PPDStack;
 164+
 165+ $searchBase = "[{<\n"; #}
 166+ $revText = strrev( $text ); // For fast reverse searches
 167+
 168+ $i = 0; # Input pointer, starts out pointing to a pseudo-newline before the start
 169+ $accum =& $stack->getAccum(); # Current accumulator
 170+ $accum = '<root>';
 171+ $findEquals = false; # True to find equals signs in arguments
 172+ $findPipe = false; # True to take notice of pipe characters
 173+ $headingIndex = 1;
 174+ $inHeading = false; # True if $i is inside a possible heading
 175+ $noMoreGT = false; # True if there are no more greater-than (>) signs right of $i
 176+ $findOnlyinclude = $enableOnlyinclude; # True to ignore all input up to the next <onlyinclude>
 177+ $fakeLineStart = true; # Do a line-start run without outputting an LF character
 178+
 179+ while ( true ) {
 180+ //$this->memCheck();
 181+
 182+ if ( $findOnlyinclude ) {
 183+ // Ignore all input up to the next <onlyinclude>
 184+ $startPos = strpos( $text, '<onlyinclude>', $i );
 185+ if ( $startPos === false ) {
 186+ // Ignored section runs to the end
 187+ $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i ) ) . '</ignore>';
 188+ break;
 189+ }
 190+ $tagEndPos = $startPos + strlen( '<onlyinclude>' ); // past-the-end
 191+ $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i, $tagEndPos - $i ) ) . '</ignore>';
 192+ $i = $tagEndPos;
 193+ $findOnlyinclude = false;
 194+ }
 195+
 196+ if ( $fakeLineStart ) {
 197+ $found = 'line-start';
 198+ $curChar = '';
 199+ } else {
 200+ # Find next opening brace, closing brace or pipe
 201+ $search = $searchBase;
 202+ if ( $stack->top === false ) {
 203+ $currentClosing = '';
 204+ } else {
 205+ $currentClosing = $stack->top->close;
 206+ $search .= $currentClosing;
 207+ }
 208+ if ( $findPipe ) {
 209+ $search .= '|';
 210+ }
 211+ if ( $findEquals ) {
 212+ // First equals will be for the template
 213+ $search .= '=';
 214+ }
 215+ $rule = null;
 216+ # Output literal section, advance input counter
 217+ $literalLength = strcspn( $text, $search, $i );
 218+ if ( $literalLength > 0 ) {
 219+ $accum .= htmlspecialchars( substr( $text, $i, $literalLength ) );
 220+ $i += $literalLength;
 221+ }
 222+ if ( $i >= strlen( $text ) ) {
 223+ if ( $currentClosing == "\n" ) {
 224+ // Do a past-the-end run to finish off the heading
 225+ $curChar = '';
 226+ $found = 'line-end';
 227+ } else {
 228+ # All done
 229+ break;
 230+ }
 231+ } else {
 232+ $curChar = $text[$i];
 233+ if ( $curChar == '|' ) {
 234+ $found = 'pipe';
 235+ } elseif ( $curChar == '=' ) {
 236+ $found = 'equals';
 237+ } elseif ( $curChar == '<' ) {
 238+ $found = 'angle';
 239+ } elseif ( $curChar == "\n" ) {
 240+ if ( $inHeading ) {
 241+ $found = 'line-end';
 242+ } else {
 243+ $found = 'line-start';
 244+ }
 245+ } elseif ( $curChar == $currentClosing ) {
 246+ $found = 'close';
 247+ } elseif ( isset( $rules[$curChar] ) ) {
 248+ $found = 'open';
 249+ $rule = $rules[$curChar];
 250+ } else {
 251+ # Some versions of PHP have a strcspn which stops on null characters
 252+ # Ignore and continue
 253+ ++$i;
 254+ continue;
 255+ }
 256+ }
 257+ }
 258+
 259+ if ( $found == 'angle' ) {
 260+ $matches = false;
 261+ // Handle </onlyinclude>
 262+ if ( $enableOnlyinclude && substr( $text, $i, strlen( '</onlyinclude>' ) ) == '</onlyinclude>' ) {
 263+ $findOnlyinclude = true;
 264+ continue;
 265+ }
 266+
 267+ // Determine element name
 268+ if ( !preg_match( $elementsRegex, $text, $matches, 0, $i + 1 ) ) {
 269+ // Element name missing or not listed
 270+ $accum .= '&lt;';
 271+ ++$i;
 272+ continue;
 273+ }
 274+ // Handle comments
 275+ if ( isset( $matches[2] ) && $matches[2] == '!--' ) {
 276+ // To avoid leaving blank lines, when a comment is both preceded
 277+ // and followed by a newline (ignoring spaces), trim leading and
 278+ // trailing spaces and one of the newlines.
 279+
 280+ // Find the end
 281+ $endPos = strpos( $text, '-->', $i + 4 );
 282+ if ( $endPos === false ) {
 283+ // Unclosed comment in input, runs to end
 284+ $inner = substr( $text, $i );
 285+ $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>';
 286+ $i = strlen( $text );
 287+ } else {
 288+ // Search backwards for leading whitespace
 289+ $wsStart = $i ? ( $i - strspn( $revText, ' ', strlen( $text ) - $i ) ) : 0;
 290+ // Search forwards for trailing whitespace
 291+ // $wsEnd will be the position of the last space
 292+ $wsEnd = $endPos + 2 + strspn( $text, ' ', $endPos + 3 );
 293+ // Eat the line if possible
 294+ // TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at
 295+ // the overall start. That's not how Sanitizer::removeHTMLcomments() did it, but
 296+ // it's a possible beneficial b/c break.
 297+ if ( $wsStart > 0 && substr( $text, $wsStart - 1, 1 ) == "\n"
 298+ && substr( $text, $wsEnd + 1, 1 ) == "\n" )
 299+ {
 300+ $startPos = $wsStart;
 301+ $endPos = $wsEnd + 1;
 302+ // Remove leading whitespace from the end of the accumulator
 303+ // Sanity check first though
 304+ $wsLength = $i - $wsStart;
 305+ if ( $wsLength > 0 && substr( $accum, -$wsLength ) === str_repeat( ' ', $wsLength ) ) {
 306+ $accum = substr( $accum, 0, -$wsLength );
 307+ }
 308+ // Do a line-start run next time to look for headings after the comment
 309+ $fakeLineStart = true;
 310+ } else {
 311+ // No line to eat, just take the comment itself
 312+ $startPos = $i;
 313+ $endPos += 2;
 314+ }
 315+
 316+ if ( $stack->top ) {
 317+ $part = $stack->top->getCurrentPart();
 318+ if ( isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 ) {
 319+ // Comments abutting, no change in visual end
 320+ $part->commentEnd = $wsEnd;
 321+ } else {
 322+ $part->visualEnd = $wsStart;
 323+ $part->commentEnd = $endPos;
 324+ }
 325+ }
 326+ $i = $endPos + 1;
 327+ $inner = substr( $text, $startPos, $endPos - $startPos + 1 );
 328+ $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>';
 329+ }
 330+ continue;
 331+ }
 332+ $name = $matches[1];
 333+ $lowerName = strtolower( $name );
 334+ $attrStart = $i + strlen( $name ) + 1;
 335+
 336+ // Find end of tag
 337+ $tagEndPos = $noMoreGT ? false : strpos( $text, '>', $attrStart );
 338+ if ( $tagEndPos === false ) {
 339+ // Infinite backtrack
 340+ // Disable tag search to prevent worst-case O(N^2) performance
 341+ $noMoreGT = true;
 342+ $accum .= '&lt;';
 343+ ++$i;
 344+ continue;
 345+ }
 346+
 347+ // Handle ignored tags
 348+ if ( in_array( $lowerName, $ignoredTags ) ) {
 349+ $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i, $tagEndPos - $i + 1 ) ) . '</ignore>';
 350+ $i = $tagEndPos + 1;
 351+ continue;
 352+ }
 353+
 354+ $tagStartPos = $i;
 355+ if ( $text[$tagEndPos-1] == '/' ) {
 356+ $attrEnd = $tagEndPos - 1;
 357+ $inner = null;
 358+ $i = $tagEndPos + 1;
 359+ $close = '';
 360+ } else {
 361+ $attrEnd = $tagEndPos;
 362+ // Find closing tag
 363+ if ( preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",
 364+ $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) )
 365+ {
 366+ $inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 );
 367+ $i = $matches[0][1] + strlen( $matches[0][0] );
 368+ $close = '<close>' . htmlspecialchars( $matches[0][0] ) . '</close>';
 369+ } else {
 370+ // No end tag -- let it run out to the end of the text.
 371+ $inner = substr( $text, $tagEndPos + 1 );
 372+ $i = strlen( $text );
 373+ $close = '';
 374+ }
 375+ }
 376+ // <includeonly> and <noinclude> just become <ignore> tags
 377+ if ( in_array( $lowerName, $ignoredElements ) ) {
 378+ $accum .= '<ignore>' . htmlspecialchars( substr( $text, $tagStartPos, $i - $tagStartPos ) )
 379+ . '</ignore>';
 380+ continue;
 381+ }
 382+
 383+ $accum .= '<ext>';
 384+ if ( $attrEnd <= $attrStart ) {
 385+ $attr = '';
 386+ } else {
 387+ $attr = substr( $text, $attrStart, $attrEnd - $attrStart );
 388+ }
 389+ $accum .= '<name>' . htmlspecialchars( $name ) . '</name>' .
 390+ // Note that the attr element contains the whitespace between name and attribute,
 391+ // this is necessary for precise reconstruction during pre-save transform.
 392+ '<attr>' . htmlspecialchars( $attr ) . '</attr>';
 393+ if ( $inner !== null ) {
 394+ $accum .= '<inner>' . htmlspecialchars( $inner ) . '</inner>';
 395+ }
 396+ $accum .= $close . '</ext>';
 397+ }
 398+
 399+ elseif ( $found == 'line-start' ) {
 400+ // Is this the start of a heading?
 401+ // Line break belongs before the heading element in any case
 402+ if ( $fakeLineStart ) {
 403+ $fakeLineStart = false;
 404+ } else {
 405+ $accum .= $curChar;
 406+ $i++;
 407+ }
 408+
 409+ $count = strspn( $text, '=', $i, 6 );
 410+ if ( $count == 1 && $findEquals ) {
 411+ // DWIM: This looks kind of like a name/value separator
 412+ // Let's let the equals handler have it and break the potential heading
 413+ // This is heuristic, but AFAICT the methods for completely correct disambiguation are very complex.
 414+ } elseif ( $count > 0 ) {
 415+ $piece = array(
 416+ 'open' => "\n",
 417+ 'close' => "\n",
 418+ 'parts' => array( new PPDPart( str_repeat( '=', $count ) ) ),
 419+ 'startPos' => $i,
 420+ 'count' => $count );
 421+ $stack->push( $piece );
 422+ $accum =& $stack->getAccum();
 423+ $flags = $stack->getFlags();
 424+ extract( $flags );
 425+ $i += $count;
 426+ }
 427+ }
 428+
 429+ elseif ( $found == 'line-end' ) {
 430+ $piece = $stack->top;
 431+ // A heading must be open, otherwise \n wouldn't have been in the search list
 432+ assert( $piece->open == "\n" );
 433+ $part = $piece->getCurrentPart();
 434+ // Search back through the input to see if it has a proper close
 435+ // Do this using the reversed string since the other solutions (end anchor, etc.) are inefficient
 436+ $wsLength = strspn( $revText, " \t", strlen( $text ) - $i );
 437+ $searchStart = $i - $wsLength;
 438+ if ( isset( $part->commentEnd ) && $searchStart - 1 == $part->commentEnd ) {
 439+ // Comment found at line end
 440+ // Search for equals signs before the comment
 441+ $searchStart = $part->visualEnd;
 442+ $searchStart -= strspn( $revText, " \t", strlen( $text ) - $searchStart );
 443+ }
 444+ $count = $piece->count;
 445+ $equalsLength = strspn( $revText, '=', strlen( $text ) - $searchStart );
 446+ if ( $equalsLength > 0 ) {
 447+ if ( $i - $equalsLength == $piece->startPos ) {
 448+ // This is just a single string of equals signs on its own line
 449+ // Replicate the doHeadings behaviour /={count}(.+)={count}/
 450+ // First find out how many equals signs there really are (don't stop at 6)
 451+ $count = $equalsLength;
 452+ if ( $count < 3 ) {
 453+ $count = 0;
 454+ } else {
 455+ $count = min( 6, intval( ( $count - 1 ) / 2 ) );
 456+ }
 457+ } else {
 458+ $count = min( $equalsLength, $count );
 459+ }
 460+ if ( $count > 0 ) {
 461+ // Normal match, output <h>
 462+ $element = "<h level=\"$count\" i=\"$headingIndex\">$accum</h>";
 463+ $headingIndex++;
 464+ } else {
 465+ // Single equals sign on its own line, count=0
 466+ $element = $accum;
 467+ }
 468+ } else {
 469+ // No match, no <h>, just pass down the inner text
 470+ $element = $accum;
 471+ }
 472+ // Unwind the stack
 473+ $stack->pop();
 474+ $accum =& $stack->getAccum();
 475+ $flags = $stack->getFlags();
 476+ extract( $flags );
 477+
 478+ // Append the result to the enclosing accumulator
 479+ $accum .= $element;
 480+ // Note that we do NOT increment the input pointer.
 481+ // This is because the closing linebreak could be the opening linebreak of
 482+ // another heading. Infinite loops are avoided because the next iteration MUST
 483+ // hit the heading open case above, which unconditionally increments the
 484+ // input pointer.
 485+ }
 486+
 487+ elseif ( $found == 'open' ) {
 488+ # count opening brace characters
 489+ $count = strspn( $text, $curChar, $i );
 490+
 491+ # we need to add to stack only if opening brace count is enough for one of the rules
 492+ if ( $count >= $rule['min'] ) {
 493+ # Add it to the stack
 494+ $piece = array(
 495+ 'open' => $curChar,
 496+ 'close' => $rule['end'],
 497+ 'count' => $count,
 498+ 'lineStart' => ($i > 0 && $text[$i-1] == "\n"),
 499+ );
 500+
 501+ $stack->push( $piece );
 502+ $accum =& $stack->getAccum();
 503+ $flags = $stack->getFlags();
 504+ extract( $flags );
 505+ } else {
 506+ # Add literal brace(s)
 507+ $accum .= htmlspecialchars( str_repeat( $curChar, $count ) );
 508+ }
 509+ $i += $count;
 510+ }
 511+
 512+ elseif ( $found == 'close' ) {
 513+ $piece = $stack->top;
 514+ # lets check if there are enough characters for closing brace
 515+ $maxCount = $piece->count;
 516+ $count = strspn( $text, $curChar, $i, $maxCount );
 517+
 518+ # check for maximum matching characters (if there are 5 closing
 519+ # characters, we will probably need only 3 - depending on the rules)
 520+ $matchingCount = 0;
 521+ $rule = $rules[$piece->open];
 522+ if ( $count > $rule['max'] ) {
 523+ # The specified maximum exists in the callback array, unless the caller
 524+ # has made an error
 525+ $matchingCount = $rule['max'];
 526+ } else {
 527+ # Count is less than the maximum
 528+ # Skip any gaps in the callback array to find the true largest match
 529+ # Need to use array_key_exists not isset because the callback can be null
 530+ $matchingCount = $count;
 531+ while ( $matchingCount > 0 && !array_key_exists( $matchingCount, $rule['names'] ) ) {
 532+ --$matchingCount;
 533+ }
 534+ }
 535+
 536+ if ($matchingCount <= 0) {
 537+ # No matching element found in callback array
 538+ # Output a literal closing brace and continue
 539+ $accum .= htmlspecialchars( str_repeat( $curChar, $count ) );
 540+ $i += $count;
 541+ continue;
 542+ }
 543+ $name = $rule['names'][$matchingCount];
 544+ if ( $name === null ) {
 545+ // No element, just literal text
 546+ $element = $piece->breakSyntax( $matchingCount ) . str_repeat( $rule['end'], $matchingCount );
 547+ } else {
 548+ # Create XML element
 549+ # Note: $parts is already XML, does not need to be encoded further
 550+ $parts = $piece->parts;
 551+ $title = $parts[0]->out;
 552+ unset( $parts[0] );
 553+
 554+ # The invocation is at the start of the line if lineStart is set in
 555+ # the stack, and all opening brackets are used up.
 556+ if ( $maxCount == $matchingCount && !empty( $piece->lineStart ) ) {
 557+ $attr = ' lineStart="1"';
 558+ } else {
 559+ $attr = '';
 560+ }
 561+
 562+ $element = "<$name$attr>";
 563+ $element .= "<title>$title</title>";
 564+ $argIndex = 1;
 565+ foreach ( $parts as $partIndex => $part ) {
 566+ if ( isset( $part->eqpos ) ) {
 567+ $argName = substr( $part->out, 0, $part->eqpos );
 568+ $argValue = substr( $part->out, $part->eqpos + 1 );
 569+ $element .= "<part><name>$argName</name>=<value>$argValue</value></part>";
 570+ } else {
 571+ $element .= "<part><name index=\"$argIndex\" /><value>{$part->out}</value></part>";
 572+ $argIndex++;
 573+ }
 574+ }
 575+ $element .= "</$name>";
 576+ }
 577+
 578+ # Advance input pointer
 579+ $i += $matchingCount;
 580+
 581+ # Unwind the stack
 582+ $stack->pop();
 583+ $accum =& $stack->getAccum();
 584+
 585+ # Re-add the old stack element if it still has unmatched opening characters remaining
 586+ if ($matchingCount < $piece->count) {
 587+ $piece->parts = array( new PPDPart );
 588+ $piece->count -= $matchingCount;
 589+ # do we still qualify for any callback with remaining count?
 590+ $names = $rules[$piece->open]['names'];
 591+ $skippedBraces = 0;
 592+ $enclosingAccum =& $accum;
 593+ while ( $piece->count ) {
 594+ if ( array_key_exists( $piece->count, $names ) ) {
 595+ $stack->push( $piece );
 596+ $accum =& $stack->getAccum();
 597+ break;
 598+ }
 599+ --$piece->count;
 600+ $skippedBraces ++;
 601+ }
 602+ $enclosingAccum .= str_repeat( $piece->open, $skippedBraces );
 603+ }
 604+ $flags = $stack->getFlags();
 605+ extract( $flags );
 606+
 607+ # Add XML element to the enclosing accumulator
 608+ $accum .= $element;
 609+ }
 610+
 611+ elseif ( $found == 'pipe' ) {
 612+ $findEquals = true; // shortcut for getFlags()
 613+ $stack->addPart();
 614+ $accum =& $stack->getAccum();
 615+ ++$i;
 616+ }
 617+
 618+ elseif ( $found == 'equals' ) {
 619+ $findEquals = false; // shortcut for getFlags()
 620+ $stack->getCurrentPart()->eqpos = strlen( $accum );
 621+ $accum .= '=';
 622+ ++$i;
 623+ }
 624+ }
 625+
 626+ # Output any remaining unclosed brackets
 627+ foreach ( $stack->stack as $piece ) {
 628+ $stack->rootAccum .= $piece->breakSyntax();
 629+ }
 630+ $stack->rootAccum .= '</root>';
 631+ $xml = $stack->rootAccum;
 632+
 633+ wfProfileOut( __METHOD__ );
 634+
 635+ return $xml;
 636+ }
 637+}
 638+
 639+/**
 640+ * Stack class to help Preprocessor::preprocessToObj()
 641+ * @ingroup Parser
 642+ */
 643+class PPDStack {
 644+ var $stack, $rootAccum, $top;
 645+ var $out;
 646+ var $elementClass = 'PPDStackElement';
 647+
 648+ static $false = false;
 649+
 650+ function __construct() {
 651+ $this->stack = array();
 652+ $this->top = false;
 653+ $this->rootAccum = '';
 654+ $this->accum =& $this->rootAccum;
 655+ }
 656+
 657+ function count() {
 658+ return count( $this->stack );
 659+ }
 660+
 661+ function &getAccum() {
 662+ return $this->accum;
 663+ }
 664+
 665+ function getCurrentPart() {
 666+ if ( $this->top === false ) {
 667+ return false;
 668+ } else {
 669+ return $this->top->getCurrentPart();
 670+ }
 671+ }
 672+
 673+ function push( $data ) {
 674+ if ( $data instanceof $this->elementClass ) {
 675+ $this->stack[] = $data;
 676+ } else {
 677+ $class = $this->elementClass;
 678+ $this->stack[] = new $class( $data );
 679+ }
 680+ $this->top = $this->stack[ count( $this->stack ) - 1 ];
 681+ $this->accum =& $this->top->getAccum();
 682+ }
 683+
 684+ function pop() {
 685+ if ( !count( $this->stack ) ) {
 686+ throw new MWException( __METHOD__.': no elements remaining' );
 687+ }
 688+ $temp = array_pop( $this->stack );
 689+
 690+ if ( count( $this->stack ) ) {
 691+ $this->top = $this->stack[ count( $this->stack ) - 1 ];
 692+ $this->accum =& $this->top->getAccum();
 693+ } else {
 694+ $this->top = self::$false;
 695+ $this->accum =& $this->rootAccum;
 696+ }
 697+ return $temp;
 698+ }
 699+
 700+ function addPart( $s = '' ) {
 701+ $this->top->addPart( $s );
 702+ $this->accum =& $this->top->getAccum();
 703+ }
 704+
 705+ function getFlags() {
 706+ if ( !count( $this->stack ) ) {
 707+ return array(
 708+ 'findEquals' => false,
 709+ 'findPipe' => false,
 710+ 'inHeading' => false,
 711+ );
 712+ } else {
 713+ return $this->top->getFlags();
 714+ }
 715+ }
 716+}
 717+
 718+/**
 719+ * @ingroup Parser
 720+ */
 721+class PPDStackElement {
 722+ var $open, // Opening character (\n for heading)
 723+ $close, // Matching closing character
 724+ $count, // Number of opening characters found (number of "=" for heading)
 725+ $parts, // Array of PPDPart objects describing pipe-separated parts.
 726+ $lineStart; // True if the open char appeared at the start of the input line. Not set for headings.
 727+
 728+ var $partClass = 'PPDPart';
 729+
 730+ function __construct( $data = array() ) {
 731+ $class = $this->partClass;
 732+ $this->parts = array( new $class );
 733+
 734+ foreach ( $data as $name => $value ) {
 735+ $this->$name = $value;
 736+ }
 737+ }
 738+
 739+ function &getAccum() {
 740+ return $this->parts[count($this->parts) - 1]->out;
 741+ }
 742+
 743+ function addPart( $s = '' ) {
 744+ $class = $this->partClass;
 745+ $this->parts[] = new $class( $s );
 746+ }
 747+
 748+ function getCurrentPart() {
 749+ return $this->parts[count($this->parts) - 1];
 750+ }
 751+
 752+ function getFlags() {
 753+ $partCount = count( $this->parts );
 754+ $findPipe = $this->open != "\n" && $this->open != '[';
 755+ return array(
 756+ 'findPipe' => $findPipe,
 757+ 'findEquals' => $findPipe && $partCount > 1 && !isset( $this->parts[$partCount - 1]->eqpos ),
 758+ 'inHeading' => $this->open == "\n",
 759+ );
 760+ }
 761+
 762+ /**
 763+ * Get the output string that would result if the close is not found.
 764+ */
 765+ function breakSyntax( $openingCount = false ) {
 766+ if ( $this->open == "\n" ) {
 767+ $s = $this->parts[0]->out;
 768+ } else {
 769+ if ( $openingCount === false ) {
 770+ $openingCount = $this->count;
 771+ }
 772+ $s = str_repeat( $this->open, $openingCount );
 773+ $first = true;
 774+ foreach ( $this->parts as $part ) {
 775+ if ( $first ) {
 776+ $first = false;
 777+ } else {
 778+ $s .= '|';
 779+ }
 780+ $s .= $part->out;
 781+ }
 782+ }
 783+ return $s;
 784+ }
 785+}
 786+
 787+/**
 788+ * @ingroup Parser
 789+ */
 790+class PPDPart {
 791+ var $out; // Output accumulator string
 792+
 793+ // Optional member variables:
 794+ // eqpos Position of equals sign in output accumulator
 795+ // commentEnd Past-the-end input pointer for the last comment encountered
 796+ // visualEnd Past-the-end input pointer for the end of the accumulator minus comments
 797+
 798+ function __construct( $out = '' ) {
 799+ $this->out = $out;
 800+ }
 801+}
 802+
 803+/**
 804+ * An expansion frame, used as a context to expand the result of preprocessToObj()
 805+ * @ingroup Parser
 806+ */
 807+class PPFrame_DOM implements PPFrame {
 808+ var $preprocessor, $parser, $title;
 809+ var $titleCache;
 810+
 811+ /**
 812+ * Hashtable listing templates which are disallowed for expansion in this frame,
 813+ * having been encountered previously in parent frames.
 814+ */
 815+ var $loopCheckHash;
 816+
 817+ /**
 818+ * Recursion depth of this frame, top = 0
 819+ * Note that this is NOT the same as expansion depth in expand()
 820+ */
 821+ var $depth;
 822+
 823+
 824+ /**
 825+ * Construct a new preprocessor frame.
 826+ * @param Preprocessor $preprocessor The parent preprocessor
 827+ */
 828+ function __construct( $preprocessor ) {
 829+ $this->preprocessor = $preprocessor;
 830+ $this->parser = $preprocessor->parser;
 831+ $this->title = $this->parser->mTitle;
 832+ $this->titleCache = array( $this->title ? $this->title->getPrefixedDBkey() : false );
 833+ $this->loopCheckHash = array();
 834+ $this->depth = 0;
 835+ }
 836+
 837+ /**
 838+ * Create a new child frame
 839+ * $args is optionally a multi-root PPNode or array containing the template arguments
 840+ */
 841+ function newChild( $args = false, $title = false ) {
 842+ $namedArgs = array();
 843+ $numberedArgs = array();
 844+ if ( $title === false ) {
 845+ $title = $this->title;
 846+ }
 847+ if ( $args !== false ) {
 848+ $xpath = false;
 849+ if ( $args instanceof PPNode ) {
 850+ $args = $args->node;
 851+ }
 852+ foreach ( $args as $arg ) {
 853+ if ( !$xpath ) {
 854+ $xpath = new DOMXPath( $arg->ownerDocument );
 855+ }
 856+
 857+ $nameNodes = $xpath->query( 'name', $arg );
 858+ $value = $xpath->query( 'value', $arg );
 859+ if ( $nameNodes->item( 0 )->hasAttributes() ) {
 860+ // Numbered parameter
 861+ $index = $nameNodes->item( 0 )->attributes->getNamedItem( 'index' )->textContent;
 862+ $numberedArgs[$index] = $value->item( 0 );
 863+ unset( $namedArgs[$index] );
 864+ } else {
 865+ // Named parameter
 866+ $name = trim( $this->expand( $nameNodes->item( 0 ), PPFrame::STRIP_COMMENTS ) );
 867+ $namedArgs[$name] = $value->item( 0 );
 868+ unset( $numberedArgs[$name] );
 869+ }
 870+ }
 871+ }
 872+ return new PPTemplateFrame_DOM( $this->preprocessor, $this, $numberedArgs, $namedArgs, $title );
 873+ }
 874+
 875+ function expand( $root, $flags = 0 ) {
 876+ static $expansionDepth = 0;
 877+ if ( is_string( $root ) ) {
 878+ return $root;
 879+ }
 880+
 881+ if ( ++$this->parser->mPPNodeCount > $this->parser->mOptions->mMaxPPNodeCount )
 882+ {
 883+ return '<span class="error">Node-count limit exceeded</span>';
 884+ }
 885+
 886+ if ( $expansionDepth > $this->parser->mOptions->mMaxPPExpandDepth ) {
 887+ return '<span class="error">Expansion depth limit exceeded</span>';
 888+ }
 889+ wfProfileIn( __METHOD__ );
 890+ ++$expansionDepth;
 891+
 892+ if ( $root instanceof PPNode_DOM ) {
 893+ $root = $root->node;
 894+ }
 895+ if ( $root instanceof DOMDocument ) {
 896+ $root = $root->documentElement;
 897+ }
 898+
 899+ $outStack = array( '', '' );
 900+ $iteratorStack = array( false, $root );
 901+ $indexStack = array( 0, 0 );
 902+
 903+ while ( count( $iteratorStack ) > 1 ) {
 904+ $level = count( $outStack ) - 1;
 905+ $iteratorNode =& $iteratorStack[ $level ];
 906+ $out =& $outStack[$level];
 907+ $index =& $indexStack[$level];
 908+
 909+ if ( $iteratorNode instanceof PPNode_DOM ) $iteratorNode = $iteratorNode->node;
 910+
 911+ if ( is_array( $iteratorNode ) ) {
 912+ if ( $index >= count( $iteratorNode ) ) {
 913+ // All done with this iterator
 914+ $iteratorStack[$level] = false;
 915+ $contextNode = false;
 916+ } else {
 917+ $contextNode = $iteratorNode[$index];
 918+ $index++;
 919+ }
 920+ } elseif ( $iteratorNode instanceof DOMNodeList ) {
 921+ if ( $index >= $iteratorNode->length ) {
 922+ // All done with this iterator
 923+ $iteratorStack[$level] = false;
 924+ $contextNode = false;
 925+ } else {
 926+ $contextNode = $iteratorNode->item( $index );
 927+ $index++;
 928+ }
 929+ } else {
 930+ // Copy to $contextNode and then delete from iterator stack,
 931+ // because this is not an iterator but we do have to execute it once
 932+ $contextNode = $iteratorStack[$level];
 933+ $iteratorStack[$level] = false;
 934+ }
 935+
 936+ if ( $contextNode instanceof PPNode_DOM ) $contextNode = $contextNode->node;
 937+
 938+ $newIterator = false;
 939+
 940+ if ( $contextNode === false ) {
 941+ // nothing to do
 942+ } elseif ( is_string( $contextNode ) ) {
 943+ $out .= $contextNode;
 944+ } elseif ( is_array( $contextNode ) || $contextNode instanceof DOMNodeList ) {
 945+ $newIterator = $contextNode;
 946+ } elseif ( $contextNode instanceof DOMNode ) {
 947+ if ( $contextNode->nodeType == XML_TEXT_NODE ) {
 948+ $out .= $contextNode->nodeValue;
 949+ } elseif ( $contextNode->nodeName == 'template' ) {
 950+ # Double-brace expansion
 951+ $xpath = new DOMXPath( $contextNode->ownerDocument );
 952+ $titles = $xpath->query( 'title', $contextNode );
 953+ $title = $titles->item( 0 );
 954+ $parts = $xpath->query( 'part', $contextNode );
 955+ if ( $flags & self::NO_TEMPLATES ) {
 956+ $newIterator = $this->virtualBracketedImplode( '{{', '|', '}}', $title, $parts );
 957+ } else {
 958+ $lineStart = $contextNode->getAttribute( 'lineStart' );
 959+ $params = array(
 960+ 'title' => new PPNode_DOM( $title ),
 961+ 'parts' => new PPNode_DOM( $parts ),
 962+ 'lineStart' => $lineStart );
 963+ $ret = $this->parser->braceSubstitution( $params, $this );
 964+ if ( isset( $ret['object'] ) ) {
 965+ $newIterator = $ret['object'];
 966+ } else {
 967+ $out .= $ret['text'];
 968+ }
 969+ }
 970+ } elseif ( $contextNode->nodeName == 'tplarg' ) {
 971+ # Triple-brace expansion
 972+ $xpath = new DOMXPath( $contextNode->ownerDocument );
 973+ $titles = $xpath->query( 'title', $contextNode );
 974+ $title = $titles->item( 0 );
 975+ $parts = $xpath->query( 'part', $contextNode );
 976+ if ( $flags & self::NO_ARGS ) {
 977+ $newIterator = $this->virtualBracketedImplode( '{{{', '|', '}}}', $title, $parts );
 978+ } else {
 979+ $params = array(
 980+ 'title' => new PPNode_DOM( $title ),
 981+ 'parts' => new PPNode_DOM( $parts ) );
 982+ $ret = $this->parser->argSubstitution( $params, $this );
 983+ if ( isset( $ret['object'] ) ) {
 984+ $newIterator = $ret['object'];
 985+ } else {
 986+ $out .= $ret['text'];
 987+ }
 988+ }
 989+ } elseif ( $contextNode->nodeName == 'comment' ) {
 990+ # HTML-style comment
 991+ # Remove it in HTML, pre+remove and STRIP_COMMENTS modes
 992+ if ( $this->parser->ot['html']
 993+ || ( $this->parser->ot['pre'] && $this->parser->mOptions->getRemoveComments() )
 994+ || ( $flags & self::STRIP_COMMENTS ) )
 995+ {
 996+ $out .= '';
 997+ }
 998+ # Add a strip marker in PST mode so that pstPass2() can run some old-fashioned regexes on the result
 999+ # Not in RECOVER_COMMENTS mode (extractSections) though
 1000+ elseif ( $this->parser->ot['wiki'] && ! ( $flags & self::RECOVER_COMMENTS ) ) {
 1001+ $out .= $this->parser->insertStripItem( $contextNode->textContent );
 1002+ }
 1003+ # Recover the literal comment in RECOVER_COMMENTS and pre+no-remove
 1004+ else {
 1005+ $out .= $contextNode->textContent;
 1006+ }
 1007+ } elseif ( $contextNode->nodeName == 'ignore' ) {
 1008+ # Output suppression used by <includeonly> etc.
 1009+ # OT_WIKI will only respect <ignore> in substed templates.
 1010+ # The other output types respect it unless NO_IGNORE is set.
 1011+ # extractSections() sets NO_IGNORE and so never respects it.
 1012+ if ( ( !isset( $this->parent ) && $this->parser->ot['wiki'] ) || ( $flags & self::NO_IGNORE ) ) {
 1013+ $out .= $contextNode->textContent;
 1014+ } else {
 1015+ $out .= '';
 1016+ }
 1017+ } elseif ( $contextNode->nodeName == 'ext' ) {
 1018+ # Extension tag
 1019+ $xpath = new DOMXPath( $contextNode->ownerDocument );
 1020+ $names = $xpath->query( 'name', $contextNode );
 1021+ $attrs = $xpath->query( 'attr', $contextNode );
 1022+ $inners = $xpath->query( 'inner', $contextNode );
 1023+ $closes = $xpath->query( 'close', $contextNode );
 1024+ $params = array(
 1025+ 'name' => new PPNode_DOM( $names->item( 0 ) ),
 1026+ 'attr' => $attrs->length > 0 ? new PPNode_DOM( $attrs->item( 0 ) ) : null,
 1027+ 'inner' => $inners->length > 0 ? new PPNode_DOM( $inners->item( 0 ) ) : null,
 1028+ 'close' => $closes->length > 0 ? new PPNode_DOM( $closes->item( 0 ) ) : null,
 1029+ );
 1030+ $out .= $this->parser->extensionSubstitution( $params, $this );
 1031+ } elseif ( $contextNode->nodeName == 'h' ) {
 1032+ # Heading
 1033+ $s = $this->expand( $contextNode->childNodes, $flags );
 1034+
 1035+ # Insert a heading marker only for <h> children of <root>
 1036+ # This is to stop extractSections from going over multiple tree levels
 1037+ if ( $contextNode->parentNode->nodeName == 'root'
 1038+ && $this->parser->ot['html'] )
 1039+ {
 1040+ # Insert heading index marker
 1041+ $headingIndex = $contextNode->getAttribute( 'i' );
 1042+ $titleText = $this->title->getPrefixedDBkey();
 1043+ $this->parser->mHeadings[] = array( $titleText, $headingIndex );
 1044+ $serial = count( $this->parser->mHeadings ) - 1;
 1045+ $marker = "{$this->parser->mUniqPrefix}-h-$serial-" . Parser::MARKER_SUFFIX;
 1046+ $count = $contextNode->getAttribute( 'level' );
 1047+ $s = substr( $s, 0, $count ) . $marker . substr( $s, $count );
 1048+ $this->parser->mStripState->general->setPair( $marker, '' );
 1049+ }
 1050+ $out .= $s;
 1051+ } else {
 1052+ # Generic recursive expansion
 1053+ $newIterator = $contextNode->childNodes;
 1054+ }
 1055+ } else {
 1056+ wfProfileOut( __METHOD__ );
 1057+ throw new MWException( __METHOD__.': Invalid parameter type' );
 1058+ }
 1059+
 1060+ if ( $newIterator !== false ) {
 1061+ if ( $newIterator instanceof PPNode_DOM ) {
 1062+ $newIterator = $newIterator->node;
 1063+ }
 1064+ $outStack[] = '';
 1065+ $iteratorStack[] = $newIterator;
 1066+ $indexStack[] = 0;
 1067+ } elseif ( $iteratorStack[$level] === false ) {
 1068+ // Return accumulated value to parent
 1069+ // With tail recursion
 1070+ while ( $iteratorStack[$level] === false && $level > 0 ) {
 1071+ $outStack[$level - 1] .= $out;
 1072+ array_pop( $outStack );
 1073+ array_pop( $iteratorStack );
 1074+ array_pop( $indexStack );
 1075+ $level--;
 1076+ }
 1077+ }
 1078+ }
 1079+ --$expansionDepth;
 1080+ wfProfileOut( __METHOD__ );
 1081+ return $outStack[0];
 1082+ }
 1083+
 1084+ function implodeWithFlags( $sep, $flags /*, ... */ ) {
 1085+ $args = array_slice( func_get_args(), 2 );
 1086+
 1087+ $first = true;
 1088+ $s = '';
 1089+ foreach ( $args as $root ) {
 1090+ if ( $root instanceof PPNode_DOM ) $root = $root->node;
 1091+ if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) {
 1092+ $root = array( $root );
 1093+ }
 1094+ foreach ( $root as $node ) {
 1095+ if ( $first ) {
 1096+ $first = false;
 1097+ } else {
 1098+ $s .= $sep;
 1099+ }
 1100+ $s .= $this->expand( $node, $flags );
 1101+ }
 1102+ }
 1103+ return $s;
 1104+ }
 1105+
 1106+ /**
 1107+ * Implode with no flags specified
 1108+ * This previously called implodeWithFlags but has now been inlined to reduce stack depth
 1109+ */
 1110+ function implode( $sep /*, ... */ ) {
 1111+ $args = array_slice( func_get_args(), 1 );
 1112+
 1113+ $first = true;
 1114+ $s = '';
 1115+ foreach ( $args as $root ) {
 1116+ if ( $root instanceof PPNode_DOM ) $root = $root->node;
 1117+ if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) {
 1118+ $root = array( $root );
 1119+ }
 1120+ foreach ( $root as $node ) {
 1121+ if ( $first ) {
 1122+ $first = false;
 1123+ } else {
 1124+ $s .= $sep;
 1125+ }
 1126+ $s .= $this->expand( $node );
 1127+ }
 1128+ }
 1129+ return $s;
 1130+ }
 1131+
 1132+ /**
 1133+ * Makes an object that, when expand()ed, will be the same as one obtained
 1134+ * with implode()
 1135+ */
 1136+ function virtualImplode( $sep /*, ... */ ) {
 1137+ $args = array_slice( func_get_args(), 1 );
 1138+ $out = array();
 1139+ $first = true;
 1140+ if ( $root instanceof PPNode_DOM ) $root = $root->node;
 1141+
 1142+ foreach ( $args as $root ) {
 1143+ if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) {
 1144+ $root = array( $root );
 1145+ }
 1146+ foreach ( $root as $node ) {
 1147+ if ( $first ) {
 1148+ $first = false;
 1149+ } else {
 1150+ $out[] = $sep;
 1151+ }
 1152+ $out[] = $node;
 1153+ }
 1154+ }
 1155+ return $out;
 1156+ }
 1157+
 1158+ /**
 1159+ * Virtual implode with brackets
 1160+ */
 1161+ function virtualBracketedImplode( $start, $sep, $end /*, ... */ ) {
 1162+ $args = array_slice( func_get_args(), 3 );
 1163+ $out = array( $start );
 1164+ $first = true;
 1165+
 1166+ foreach ( $args as $root ) {
 1167+ if ( $root instanceof PPNode_DOM ) $root = $root->node;
 1168+ if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) {
 1169+ $root = array( $root );
 1170+ }
 1171+ foreach ( $root as $node ) {
 1172+ if ( $first ) {
 1173+ $first = false;
 1174+ } else {
 1175+ $out[] = $sep;
 1176+ }
 1177+ $out[] = $node;
 1178+ }
 1179+ }
 1180+ $out[] = $end;
 1181+ return $out;
 1182+ }
 1183+
 1184+ function __toString() {
 1185+ return 'frame{}';
 1186+ }
 1187+
 1188+ function getPDBK( $level = false ) {
 1189+ if ( $level === false ) {
 1190+ return $this->title->getPrefixedDBkey();
 1191+ } else {
 1192+ return isset( $this->titleCache[$level] ) ? $this->titleCache[$level] : false;
 1193+ }
 1194+ }
 1195+
 1196+ function getArguments() {
 1197+ return array();
 1198+ }
 1199+
 1200+ function getNumberedArguments() {
 1201+ return array();
 1202+ }
 1203+
 1204+ function getNamedArguments() {
 1205+ return array();
 1206+ }
 1207+
 1208+ /**
 1209+ * Returns true if there are no arguments in this frame
 1210+ */
 1211+ function isEmpty() {
 1212+ return true;
 1213+ }
 1214+
 1215+ function getArgument( $name ) {
 1216+ return false;
 1217+ }
 1218+
 1219+ /**
 1220+ * Returns true if the infinite loop check is OK, false if a loop is detected
 1221+ */
 1222+ function loopCheck( $title ) {
 1223+ return !isset( $this->loopCheckHash[$title->getPrefixedDBkey()] );
 1224+ }
 1225+
 1226+ /**
 1227+ * Return true if the frame is a template frame
 1228+ */
 1229+ function isTemplate() {
 1230+ return false;
 1231+ }
 1232+}
 1233+
 1234+/**
 1235+ * Expansion frame with template arguments
 1236+ * @ingroup Parser
 1237+ */
 1238+class PPTemplateFrame_DOM extends PPFrame_DOM {
 1239+ var $numberedArgs, $namedArgs, $parent;
 1240+ var $numberedExpansionCache, $namedExpansionCache;
 1241+
 1242+ function __construct( $preprocessor, $parent = false, $numberedArgs = array(), $namedArgs = array(), $title = false ) {
 1243+ PPFrame_DOM::__construct( $preprocessor );
 1244+ $this->parent = $parent;
 1245+ $this->numberedArgs = $numberedArgs;
 1246+ $this->namedArgs = $namedArgs;
 1247+ $this->title = $title;
 1248+ $pdbk = $title ? $title->getPrefixedDBkey() : false;
 1249+ $this->titleCache = $parent->titleCache;
 1250+ $this->titleCache[] = $pdbk;
 1251+ $this->loopCheckHash = /*clone*/ $parent->loopCheckHash;
 1252+ if ( $pdbk !== false ) {
 1253+ $this->loopCheckHash[$pdbk] = true;
 1254+ }
 1255+ $this->depth = $parent->depth + 1;
 1256+ $this->numberedExpansionCache = $this->namedExpansionCache = array();
 1257+ }
 1258+
 1259+ function __toString() {
 1260+ $s = 'tplframe{';
 1261+ $first = true;
 1262+ $args = $this->numberedArgs + $this->namedArgs;
 1263+ foreach ( $args as $name => $value ) {
 1264+ if ( $first ) {
 1265+ $first = false;
 1266+ } else {
 1267+ $s .= ', ';
 1268+ }
 1269+ $s .= "\"$name\":\"" .
 1270+ str_replace( '"', '\\"', $value->ownerDocument->saveXML( $value ) ) . '"';
 1271+ }
 1272+ $s .= '}';
 1273+ return $s;
 1274+ }
 1275+ /**
 1276+ * Returns true if there are no arguments in this frame
 1277+ */
 1278+ function isEmpty() {
 1279+ return !count( $this->numberedArgs ) && !count( $this->namedArgs );
 1280+ }
 1281+
 1282+ function getArguments() {
 1283+ $arguments = array();
 1284+ foreach ( array_merge(
 1285+ array_keys($this->numberedArgs),
 1286+ array_keys($this->namedArgs)) as $key ) {
 1287+ $arguments[$key] = $this->getArgument($key);
 1288+ }
 1289+ return $arguments;
 1290+ }
 1291+
 1292+ function getNumberedArguments() {
 1293+ $arguments = array();
 1294+ foreach ( array_keys($this->numberedArgs) as $key ) {
 1295+ $arguments[$key] = $this->getArgument($key);
 1296+ }
 1297+ return $arguments;
 1298+ }
 1299+
 1300+ function getNamedArguments() {
 1301+ $arguments = array();
 1302+ foreach ( array_keys($this->namedArgs) as $key ) {
 1303+ $arguments[$key] = $this->getArgument($key);
 1304+ }
 1305+ return $arguments;
 1306+ }
 1307+
 1308+ function getNumberedArgument( $index ) {
 1309+ if ( !isset( $this->numberedArgs[$index] ) ) {
 1310+ return false;
 1311+ }
 1312+ if ( !isset( $this->numberedExpansionCache[$index] ) ) {
 1313+ # No trimming for unnamed arguments
 1314+ $this->numberedExpansionCache[$index] = $this->parent->expand( $this->numberedArgs[$index], self::STRIP_COMMENTS );
 1315+ }
 1316+ return $this->numberedExpansionCache[$index];
 1317+ }
 1318+
 1319+ function getNamedArgument( $name ) {
 1320+ if ( !isset( $this->namedArgs[$name] ) ) {
 1321+ return false;
 1322+ }
 1323+ if ( !isset( $this->namedExpansionCache[$name] ) ) {
 1324+ # Trim named arguments post-expand, for backwards compatibility
 1325+ $this->namedExpansionCache[$name] = trim(
 1326+ $this->parent->expand( $this->namedArgs[$name], self::STRIP_COMMENTS ) );
 1327+ }
 1328+ return $this->namedExpansionCache[$name];
 1329+ }
 1330+
 1331+ function getArgument( $name ) {
 1332+ $text = $this->getNumberedArgument( $name );
 1333+ if ( $text === false ) {
 1334+ $text = $this->getNamedArgument( $name );
 1335+ }
 1336+ return $text;
 1337+ }
 1338+
 1339+ /**
 1340+ * Return true if the frame is a template frame
 1341+ */
 1342+ function isTemplate() {
 1343+ return true;
 1344+ }
 1345+}
 1346+
 1347+/**
 1348+ * Expansion frame with custom arguments
 1349+ * @ingroup Parser
 1350+ */
 1351+class PPCustomFrame_DOM extends PPFrame_DOM {
 1352+ var $args;
 1353+
 1354+ function __construct( $preprocessor, $args ) {
 1355+ PPFrame_DOM::__construct( $preprocessor );
 1356+ $this->args = $args;
 1357+ }
 1358+
 1359+ function __toString() {
 1360+ $s = 'cstmframe{';
 1361+ $first = true;
 1362+ foreach ( $this->args as $name => $value ) {
 1363+ if ( $first ) {
 1364+ $first = false;
 1365+ } else {
 1366+ $s .= ', ';
 1367+ }
 1368+ $s .= "\"$name\":\"" .
 1369+ str_replace( '"', '\\"', $value->__toString() ) . '"';
 1370+ }
 1371+ $s .= '}';
 1372+ return $s;
 1373+ }
 1374+
 1375+ function isEmpty() {
 1376+ return !count( $this->args );
 1377+ }
 1378+
 1379+ function getArgument( $index ) {
 1380+ if ( !isset( $this->args[$index] ) ) {
 1381+ return false;
 1382+ }
 1383+ return $this->args[$index];
 1384+ }
 1385+}
 1386+
 1387+/**
 1388+ * @ingroup Parser
 1389+ */
 1390+class PPNode_DOM implements PPNode {
 1391+ var $node;
 1392+
 1393+ function __construct( $node, $xpath = false ) {
 1394+ $this->node = $node;
 1395+ }
 1396+
 1397+ function __get( $name ) {
 1398+ if ( $name == 'xpath' ) {
 1399+ $this->xpath = new DOMXPath( $this->node->ownerDocument );
 1400+ }
 1401+ return $this->xpath;
 1402+ }
 1403+
 1404+ function __toString() {
 1405+ if ( $this->node instanceof DOMNodeList ) {
 1406+ $s = '';
 1407+ foreach ( $this->node as $node ) {
 1408+ $s .= $node->ownerDocument->saveXML( $node );
 1409+ }
 1410+ } else {
 1411+ $s = $this->node->ownerDocument->saveXML( $this->node );
 1412+ }
 1413+ return $s;
 1414+ }
 1415+
 1416+ function getChildren() {
 1417+ return $this->node->childNodes ? new self( $this->node->childNodes ) : false;
 1418+ }
 1419+
 1420+ function getFirstChild() {
 1421+ return $this->node->firstChild ? new self( $this->node->firstChild ) : false;
 1422+ }
 1423+
 1424+ function getNextSibling() {
 1425+ return $this->node->nextSibling ? new self( $this->node->nextSibling ) : false;
 1426+ }
 1427+
 1428+ function getChildrenOfType( $type ) {
 1429+ return new self( $this->xpath->query( $type, $this->node ) );
 1430+ }
 1431+
 1432+ function getLength() {
 1433+ if ( $this->node instanceof DOMNodeList ) {
 1434+ return $this->node->length;
 1435+ } else {
 1436+ return false;
 1437+ }
 1438+ }
 1439+
 1440+ function item( $i ) {
 1441+ $item = $this->node->item( $i );
 1442+ return $item ? new self( $item ) : false;
 1443+ }
 1444+
 1445+ function getName() {
 1446+ if ( $this->node instanceof DOMNodeList ) {
 1447+ return '#nodelist';
 1448+ } else {
 1449+ return $this->node->nodeName;
 1450+ }
 1451+ }
 1452+
 1453+ /**
 1454+ * Split a <part> node into an associative array containing:
 1455+ * name PPNode name
 1456+ * index String index
 1457+ * value PPNode value
 1458+ */
 1459+ function splitArg() {
 1460+ $names = $this->xpath->query( 'name', $this->node );
 1461+ $values = $this->xpath->query( 'value', $this->node );
 1462+ if ( !$names->length || !$values->length ) {
 1463+ throw new MWException( 'Invalid brace node passed to ' . __METHOD__ );
 1464+ }
 1465+ $name = $names->item( 0 );
 1466+ $index = $name->getAttribute( 'index' );
 1467+ return array(
 1468+ 'name' => new self( $name ),
 1469+ 'index' => $index,
 1470+ 'value' => new self( $values->item( 0 ) ) );
 1471+ }
 1472+
 1473+ /**
 1474+ * Split an <ext> node into an associative array containing name, attr, inner and close
 1475+ * All values in the resulting array are PPNodes. Inner and close are optional.
 1476+ */
 1477+ function splitExt() {
 1478+ $names = $this->xpath->query( 'name', $this->node );
 1479+ $attrs = $this->xpath->query( 'attr', $this->node );
 1480+ $inners = $this->xpath->query( 'inner', $this->node );
 1481+ $closes = $this->xpath->query( 'close', $this->node );
 1482+ if ( !$names->length || !$attrs->length ) {
 1483+ throw new MWException( 'Invalid ext node passed to ' . __METHOD__ );
 1484+ }
 1485+ $parts = array(
 1486+ 'name' => new self( $names->item( 0 ) ),
 1487+ 'attr' => new self( $attrs->item( 0 ) ) );
 1488+ if ( $inners->length ) {
 1489+ $parts['inner'] = new self( $inners->item( 0 ) );
 1490+ }
 1491+ if ( $closes->length ) {
 1492+ $parts['close'] = new self( $closes->item( 0 ) );
 1493+ }
 1494+ return $parts;
 1495+ }
 1496+
 1497+ /**
 1498+ * Split a <h> node
 1499+ */
 1500+ function splitHeading() {
 1501+ if ( !$this->nodeName == 'h' ) {
 1502+ throw new MWException( 'Invalid h node passed to ' . __METHOD__ );
 1503+ }
 1504+ return array(
 1505+ 'i' => $this->node->getAttribute( 'i' ),
 1506+ 'level' => $this->node->getAttribute( 'level' ),
 1507+ 'contents' => $this->getChildren()
 1508+ );
 1509+ }
 1510+}
Property changes on: branches/parser-work/phase3/includes/parser/Preprocessor_DOM.php
___________________________________________________________________
Name: svn:eol-style
11511 + native
Index: branches/parser-work/phase3/includes/parser/Preprocessor.php
@@ -0,0 +1,178 @@
 2+<?php
 3+
 4+/**
 5+ * @ingroup Parser
 6+ */
 7+interface Preprocessor {
 8+ /** Create a new preprocessor object based on an initialised Parser object */
 9+ function __construct( $parser );
 10+
 11+ /** Create a new top-level frame for expansion of a page */
 12+ function newFrame();
 13+
 14+ /** Create a new custom frame for programmatic use of parameter replacement as used in some extensions */
 15+ function newCustomFrame( $args );
 16+
 17+ /** Preprocess text to a PPNode */
 18+ function preprocessToObj( $text, $flags = 0 );
 19+}
 20+
 21+/**
 22+ * @ingroup Parser
 23+ */
 24+interface PPFrame {
 25+ const NO_ARGS = 1;
 26+ const NO_TEMPLATES = 2;
 27+ const STRIP_COMMENTS = 4;
 28+ const NO_IGNORE = 8;
 29+ const RECOVER_COMMENTS = 16;
 30+
 31+ const RECOVER_ORIG = 27; // = 1|2|8|16 no constant expression support in PHP yet
 32+
 33+ /**
 34+ * Create a child frame
 35+ */
 36+ function newChild( $args = false, $title = false );
 37+
 38+ /**
 39+ * Expand a document tree node
 40+ */
 41+ function expand( $root, $flags = 0 );
 42+
 43+ /**
 44+ * Implode with flags for expand()
 45+ */
 46+ function implodeWithFlags( $sep, $flags /*, ... */ );
 47+
 48+ /**
 49+ * Implode with no flags specified
 50+ */
 51+ function implode( $sep /*, ... */ );
 52+
 53+ /**
 54+ * Makes an object that, when expand()ed, will be the same as one obtained
 55+ * with implode()
 56+ */
 57+ function virtualImplode( $sep /*, ... */ );
 58+
 59+ /**
 60+ * Virtual implode with brackets
 61+ */
 62+ function virtualBracketedImplode( $start, $sep, $end /*, ... */ );
 63+
 64+ /**
 65+ * Returns true if there are no arguments in this frame
 66+ */
 67+ function isEmpty();
 68+
 69+ /**
 70+ * Returns all arguments of this frame
 71+ */
 72+ function getArguments();
 73+
 74+ /**
 75+ * Returns all numbered arguments of this frame
 76+ */
 77+ function getNumberedArguments();
 78+
 79+ /**
 80+ * Returns all named arguments of this frame
 81+ */
 82+ function getNamedArguments();
 83+
 84+ /**
 85+ * Get an argument to this frame by name
 86+ */
 87+ function getArgument( $name );
 88+
 89+ /**
 90+ * Returns true if the infinite loop check is OK, false if a loop is detected
 91+ */
 92+ function loopCheck( $title );
 93+
 94+ /**
 95+ * Return true if the frame is a template frame
 96+ */
 97+ function isTemplate();
 98+}
 99+
 100+/**
 101+ * There are three types of nodes:
 102+ * * Tree nodes, which have a name and contain other nodes as children
 103+ * * Array nodes, which also contain other nodes but aren't considered part of a tree
 104+ * * Leaf nodes, which contain the actual data
 105+ *
 106+ * This interface provides access to the tree structure and to the contents of array nodes,
 107+ * but it does not provide access to the internal structure of leaf nodes. Access to leaf
 108+ * data is provided via two means:
 109+ * * PPFrame::expand(), which provides expanded text
 110+ * * The PPNode::split*() functions, which provide metadata about certain types of tree node
 111+ * @ingroup Parser
 112+ */
 113+interface PPNode {
 114+ /**
 115+ * Get an array-type node containing the children of this node.
 116+ * Returns false if this is not a tree node.
 117+ */
 118+ function getChildren();
 119+
 120+ /**
 121+ * Get the first child of a tree node. False if there isn't one.
 122+ */
 123+ function getFirstChild();
 124+
 125+ /**
 126+ * Get the next sibling of any node. False if there isn't one
 127+ */
 128+ function getNextSibling();
 129+
 130+ /**
 131+ * Get all children of this tree node which have a given name.
 132+ * Returns an array-type node, or false if this is not a tree node.
 133+ */
 134+ function getChildrenOfType( $type );
 135+
 136+
 137+ /**
 138+ * Returns the length of the array, or false if this is not an array-type node
 139+ */
 140+ function getLength();
 141+
 142+ /**
 143+ * Returns an item of an array-type node
 144+ */
 145+ function item( $i );
 146+
 147+ /**
 148+ * Get the name of this node. The following names are defined here:
 149+ *
 150+ * h A heading node.
 151+ * template A double-brace node.
 152+ * tplarg A triple-brace node.
 153+ * title The first argument to a template or tplarg node.
 154+ * part Subsequent arguments to a template or tplarg node.
 155+ * #nodelist An array-type node
 156+ *
 157+ * The subclass may define various other names for tree and leaf nodes.
 158+ */
 159+ function getName();
 160+
 161+ /**
 162+ * Split a <part> node into an associative array containing:
 163+ * name PPNode name
 164+ * index String index
 165+ * value PPNode value
 166+ */
 167+ function splitArg();
 168+
 169+ /**
 170+ * Split an <ext> node into an associative array containing name, attr, inner and close
 171+ * All values in the resulting array are PPNodes. Inner and close are optional.
 172+ */
 173+ function splitExt();
 174+
 175+ /**
 176+ * Split an <h> node
 177+ */
 178+ function splitHeading();
 179+}
Property changes on: branches/parser-work/phase3/includes/parser/Preprocessor.php
___________________________________________________________________
Name: svn:eol-style
1180 + native
Index: branches/parser-work/phase3/includes/AutoLoader.php
@@ -443,15 +443,35 @@
444444 'LinkHolderArray' => 'includes/parser/LinkHolderArray.php',
445445 'LinkMarkerReplacer' => 'includes/parser/Parser_LinkHooks.php',
446446 'OnlyIncludeReplacer' => 'includes/parser/Parser.php',
 447+ 'PPCustomFrame_Hash' => 'includes/parser/Preprocessor_Hash.php',
 448+ 'PPCustomFrame_DOM' => 'includes/parser/Preprocessor_DOM.php',
 449+ 'PPDAccum_Hash' => 'includes/parser/Preprocessor_Hash.php',
 450+ 'PPDPart' => 'includes/parser/Preprocessor_DOM.php',
 451+ 'PPDPart_Hash' => 'includes/parser/Preprocessor_Hash.php',
 452+ 'PPDStack' => 'includes/parser/Preprocessor_DOM.php',
 453+ 'PPDStackElement' => 'includes/parser/Preprocessor_DOM.php',
 454+ 'PPDStackElement_Hash' => 'includes/parser/Preprocessor_Hash.php',
 455+ 'PPDStack_Hash' => 'includes/parser/Preprocessor_Hash.php',
447456 'PPFrame' => 'includes/parser/Preprocessor.php',
448 - 'PPTemplateFrame' => 'includes/parser/Preprocessor.php',
449 - 'ParseEngine' => 'includes/parser/ParseEngine.php',
 457+ 'PPFrame_DOM' => 'includes/parser/Preprocessor_DOM.php',
 458+ 'PPFrame_Hash' => 'includes/parser/Preprocessor_Hash.php',
 459+ 'PPNode' => 'includes/parser/Preprocessor.php',
 460+ 'PPNode_DOM' => 'includes/parser/Preprocessor_DOM.php',
 461+ 'PPNode_Hash_Array' => 'includes/parser/Preprocessor_Hash.php',
 462+ 'PPNode_Hash_Attr' => 'includes/parser/Preprocessor_Hash.php',
 463+ 'PPNode_Hash_Text' => 'includes/parser/Preprocessor_Hash.php',
 464+ 'PPNode_Hash_Tree' => 'includes/parser/Preprocessor_Hash.php',
 465+ 'PPTemplateFrame_DOM' => 'includes/parser/Preprocessor_DOM.php',
 466+ 'PPTemplateFrame_Hash' => 'includes/parser/Preprocessor_Hash.php',
450467 'Parser' => 'includes/parser/Parser.php',
451468 'ParserCache' => 'includes/parser/ParserCache.php',
452469 'ParserOptions' => 'includes/parser/ParserOptions.php',
453470 'ParserOutput' => 'includes/parser/ParserOutput.php',
454471 'Parser_DiffTest' => 'includes/parser/Parser_DiffTest.php',
455472 'Parser_LinkHooks' => 'includes/parser/Parser_LinkHooks.php',
 473+ 'Preprocessor' => 'includes/parser/Preprocessor.php',
 474+ 'Preprocessor_DOM' => 'includes/parser/Preprocessor_DOM.php',
 475+ 'Preprocessor_Hash' => 'includes/parser/Preprocessor_Hash.php',
456476 'StripState' => 'includes/parser/Parser.php',
457477 'MWTidy' => 'includes/parser/Tidy.php',
458478

Status & tagging log