Index: branches/parser-work/phase3/maintenance/parserTests.txt |
— | — | @@ -78,8 +78,9 @@ |
79 | 79 | * Item 1 |
80 | 80 | * Item 2 |
81 | 81 | !! result |
82 | | -<ul><li> Item 1</li> |
83 | | -<li> Item 2</li></ul> |
| 82 | +<ul><li> Item 1 |
| 83 | +</li><li> Item 2 |
| 84 | +</li></ul> |
84 | 85 | |
85 | 86 | !! end |
86 | 87 | |
— | — | @@ -102,21 +103,22 @@ |
103 | 104 | * plain l'''italic''plain |
104 | 105 | * plain l''''bold''' plain |
105 | 106 | !! result |
106 | | -<ul><li> plain</li> |
107 | | -<li> plain<i>italic</i>plain</li> |
108 | | -<li> plain<i>italic</i>plain<i>italic</i>plain</li> |
109 | | -<li> plain<b>bold</b>plain</li> |
110 | | -<li> plain<b>bold</b>plain<b>bold</b>plain</li> |
111 | | -<li> plain<i>italic</i>plain<b>bold</b>plain</li> |
112 | | -<li> plain<b>bold</b>plain<i>italic</i>plain</li> |
113 | | -<li> plain<i>italic<b>bold-italic</b>italic</i>plain</li> |
114 | | -<li> plain<b>bold<i>bold-italic</i>bold</b>plain</li> |
115 | | -<li> plain<i><b>bold-italic</b>italic</i>plain</li> |
116 | | -<li> plain<b><i>bold-italic</i>bold</b>plain</li> |
117 | | -<li> plain<i>italic<b>bold-italic</b></i>plain</li> |
118 | | -<li> plain<b>bold<i>bold-italic</i></b>plain</li> |
119 | | -<li> plain l'<i>italic</i>plain</li> |
120 | | -<li> plain l'<b>bold</b> plain</li></ul> |
| 107 | +<ul><li> plain |
| 108 | +</li><li> plain<i>italic</i>plain |
| 109 | +</li><li> plain<i>italic</i>plain<i>italic</i>plain |
| 110 | +</li><li> plain<b>bold</b>plain |
| 111 | +</li><li> plain<b>bold</b>plain<b>bold</b>plain |
| 112 | +</li><li> plain<i>italic</i>plain<b>bold</b>plain |
| 113 | +</li><li> plain<b>bold</b>plain<i>italic</i>plain |
| 114 | +</li><li> plain<i>italic<b>bold-italic</b>italic</i>plain |
| 115 | +</li><li> plain<b>bold<i>bold-italic</i>bold</b>plain |
| 116 | +</li><li> plain<i><b>bold-italic</b>italic</i>plain |
| 117 | +</li><li> plain<b><i>bold-italic</i>bold</b>plain |
| 118 | +</li><li> plain<i>italic<b>bold-italic</b></i>plain |
| 119 | +</li><li> plain<b>bold<i>bold-italic</i></b>plain |
| 120 | +</li><li> plain l'<i>italic</i>plain |
| 121 | +</li><li> plain l'<b>bold</b> plain |
| 122 | +</li></ul> |
121 | 123 | |
122 | 124 | !! end |
123 | 125 | |
— | — | @@ -1793,19 +1795,29 @@ |
1794 | 1796 | *#number level 2 |
1795 | 1797 | *Level 1 |
1796 | 1798 | !! result |
1797 | | -<ul><li>Mixed list</li> |
1798 | | -<ol><li> with numbers</li></ol> |
1799 | | -<ul><li> and bullets</li></ul> |
1800 | | -<ol><li> and numbers</li></ol> |
1801 | | -<li>bullets again</li> |
1802 | | -<ul><li>bullet level 2</li> |
1803 | | -<ul><li>bullet level 3</li> |
1804 | | -<ol><li>Number on level 4</li></ol></ul> |
1805 | | -<li>bullet level 2</li> |
1806 | | -<ol><li>Number on level 3</li> |
1807 | | -<li>Number on level 3</li></ol></ul> |
1808 | | -<ol><li>number level 2</li></ol> |
1809 | | -<li>Level 1</li></ul> |
| 1799 | +<ul><li>Mixed list |
| 1800 | +<ol><li> with numbers |
| 1801 | +</li></ol> |
| 1802 | +<ul><li> and bullets |
| 1803 | +</li></ul> |
| 1804 | +<ol><li> and numbers |
| 1805 | +</li></ol> |
| 1806 | +</li><li>bullets again |
| 1807 | +<ul><li>bullet level 2 |
| 1808 | +<ul><li>bullet level 3 |
| 1809 | +<ol><li>Number on level 4 |
| 1810 | +</li></ol> |
| 1811 | +</li></ul> |
| 1812 | +</li><li>bullet level 2 |
| 1813 | +<ol><li>Number on level 3 |
| 1814 | +</li><li>Number on level 3 |
| 1815 | +</li></ol> |
| 1816 | +</li></ul> |
| 1817 | +<ol><li>number level 2 |
| 1818 | +</li></ol> |
| 1819 | +</li><li>Level 1 |
| 1820 | +</li></ul> |
| 1821 | + |
1810 | 1822 | !! end |
1811 | 1823 | |
1812 | 1824 | !! test |
— | — | @@ -7253,18 +7265,30 @@ |
7254 | 7266 | !! end |
7255 | 7267 | |
7256 | 7268 | !! test |
7257 | | -HHP3.1: Heuristics for headings in preprocessor parenthetical structures |
| 7269 | +HHP2.1: Heuristics for headings in preprocessor parenthetical structures |
7258 | 7270 | !! input |
7259 | 7271 | {{foo| |
7260 | 7272 | =heading= |
7261 | 7273 | !! result |
7262 | 7274 | <p>{{foo| |
7263 | 7275 | </p> |
7264 | | -<h1><span class="editsection">[<a href="https://www.mediawiki.org/index.php?title=Parser_test&action=edit&section=1" title="Edit section: heading">edit</a>]</span> <span class="mw-headline" id="heading">heading</span></h1> |
| 7276 | +<h1> <span class="mw-headline" id="heading">heading</span></h1> |
7265 | 7277 | |
7266 | 7278 | !! end |
7267 | 7279 | |
7268 | 7280 | !! test |
| 7281 | +HHP2.2: Heuristics for headings in preprocessor parenthetical structures |
| 7282 | +!! input |
| 7283 | +{{foo| |
| 7284 | +==heading== |
| 7285 | +!! result |
| 7286 | +<p>{{foo| |
| 7287 | +</p> |
| 7288 | +<h2><span class="editsection">[<a href="https://www.mediawiki.org/index.php?title=Parser_test&action=edit&section=1" title="Edit section: heading">edit</a>]</span> <span class="mw-headline" id="heading">heading</span></h2> |
| 7289 | + |
| 7290 | +!! end |
| 7291 | + |
| 7292 | +!! test |
7269 | 7293 | Tildes in comments |
7270 | 7294 | !! options |
7271 | 7295 | pst |
Index: branches/parser-work/phase3/includes/parser/Preprocessor_Hash.php |
— | — | @@ -0,0 +1,1619 @@ |
| 2 | +<?php |
| 3 | + |
| 4 | +/** |
| 5 | + * Differences from DOM schema: |
| 6 | + * * attribute nodes are children |
| 7 | + * * <h> nodes that aren't at the top are replaced with <possible-h> |
| 8 | + * @ingroup Parser |
| 9 | + */ |
| 10 | +class Preprocessor_Hash implements Preprocessor { |
| 11 | + var $parser; |
| 12 | + |
| 13 | + const CACHE_VERSION = 1; |
| 14 | + |
| 15 | + function __construct( $parser ) { |
| 16 | + $this->parser = $parser; |
| 17 | + } |
| 18 | + |
| 19 | + function newFrame() { |
| 20 | + return new PPFrame_Hash( $this ); |
| 21 | + } |
| 22 | + |
| 23 | + function newCustomFrame( $args ) { |
| 24 | + return new PPCustomFrame_Hash( $this, $args ); |
| 25 | + } |
| 26 | + |
| 27 | + /** |
| 28 | + * Preprocess some wikitext and return the document tree. |
| 29 | + * This is the ghost of Parser::replace_variables(). |
| 30 | + * |
| 31 | + * @param string $text The text to parse |
| 32 | + * @param integer flags Bitwise combination of: |
| 33 | + * Parser::PTD_FOR_INCLUSION Handle <noinclude>/<includeonly> as if the text is being |
| 34 | + * included. Default is to assume a direct page view. |
| 35 | + * |
| 36 | + * The generated DOM tree must depend only on the input text and the flags. |
| 37 | + * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of bug 4899. |
| 38 | + * |
| 39 | + * Any flag added to the $flags parameter here, or any other parameter liable to cause a |
| 40 | + * change in the DOM tree for a given text, must be passed through the section identifier |
| 41 | + * in the section edit link and thus back to extractSections(). |
| 42 | + * |
| 43 | + * The output of this function is currently only cached in process memory, but a persistent |
| 44 | + * cache may be implemented at a later date which takes further advantage of these strict |
| 45 | + * dependency requirements. |
| 46 | + * |
| 47 | + * @private |
| 48 | + */ |
| 49 | + function preprocessToObj( $text, $flags = 0 ) { |
| 50 | + wfProfileIn( __METHOD__ ); |
| 51 | + |
| 52 | + |
| 53 | + // Check cache. |
| 54 | + global $wgMemc, $wgPreprocessorCacheThreshold; |
| 55 | + |
| 56 | + $cacheable = strlen( $text ) > $wgPreprocessorCacheThreshold; |
| 57 | + if ( $cacheable ) { |
| 58 | + wfProfileIn( __METHOD__.'-cacheable' ); |
| 59 | + |
| 60 | + $cacheKey = wfMemcKey( 'preprocess-hash', md5($text), $flags ); |
| 61 | + $cacheValue = $wgMemc->get( $cacheKey ); |
| 62 | + if ( $cacheValue ) { |
| 63 | + $version = substr( $cacheValue, 0, 8 ); |
| 64 | + if ( intval( $version ) == self::CACHE_VERSION ) { |
| 65 | + $hash = unserialize( substr( $cacheValue, 8 ) ); |
| 66 | + // From the cache |
| 67 | + wfDebugLog( "Preprocessor", |
| 68 | + "Loaded preprocessor hash from memcached (key $cacheKey)" ); |
| 69 | + wfProfileOut( __METHOD__.'-cacheable' ); |
| 70 | + wfProfileOut( __METHOD__ ); |
| 71 | + return $hash; |
| 72 | + } |
| 73 | + } |
| 74 | + wfProfileIn( __METHOD__.'-cache-miss' ); |
| 75 | + } |
| 76 | + |
| 77 | + $rules = array( |
| 78 | + '{' => array( |
| 79 | + 'end' => '}', |
| 80 | + 'names' => array( |
| 81 | + 2 => 'template', |
| 82 | + 3 => 'tplarg', |
| 83 | + ), |
| 84 | + 'min' => 2, |
| 85 | + 'max' => 3, |
| 86 | + ), |
| 87 | + '[' => array( |
| 88 | + 'end' => ']', |
| 89 | + 'names' => array( 2 => null ), |
| 90 | + 'min' => 2, |
| 91 | + 'max' => 2, |
| 92 | + ) |
| 93 | + ); |
| 94 | + |
| 95 | + $forInclusion = $flags & Parser::PTD_FOR_INCLUSION; |
| 96 | + |
| 97 | + $xmlishElements = $this->parser->getStripList(); |
| 98 | + $enableOnlyinclude = false; |
| 99 | + if ( $forInclusion ) { |
| 100 | + $ignoredTags = array( 'includeonly', '/includeonly' ); |
| 101 | + $ignoredElements = array( 'noinclude' ); |
| 102 | + $xmlishElements[] = 'noinclude'; |
| 103 | + if ( strpos( $text, '<onlyinclude>' ) !== false && strpos( $text, '</onlyinclude>' ) !== false ) { |
| 104 | + $enableOnlyinclude = true; |
| 105 | + } |
| 106 | + } else { |
| 107 | + $ignoredTags = array( 'noinclude', '/noinclude', 'onlyinclude', '/onlyinclude' ); |
| 108 | + $ignoredElements = array( 'includeonly' ); |
| 109 | + $xmlishElements[] = 'includeonly'; |
| 110 | + } |
| 111 | + $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) ); |
| 112 | + |
| 113 | + // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset |
| 114 | + $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; |
| 115 | + |
| 116 | + $stack = new PPDStack_Hash; |
| 117 | + |
| 118 | + $searchBase = "[{<\n"; |
| 119 | + $revText = strrev( $text ); // For fast reverse searches |
| 120 | + |
| 121 | + $i = 0; # Input pointer, starts out pointing to a pseudo-newline before the start |
| 122 | + $accum =& $stack->getAccum(); # Current accumulator |
| 123 | + $findEquals = false; # True to find equals signs in arguments |
| 124 | + $findPipe = false; # True to take notice of pipe characters |
| 125 | + $headingIndex = 1; |
| 126 | + $inHeading = false; # True if $i is inside a possible heading |
| 127 | + $noMoreGT = false; # True if there are no more greater-than (>) signs right of $i |
| 128 | + $findOnlyinclude = $enableOnlyinclude; # True to ignore all input up to the next <onlyinclude> |
| 129 | + $fakeLineStart = true; # Do a line-start run without outputting an LF character |
| 130 | + |
| 131 | + while ( true ) { |
| 132 | + //$this->memCheck(); |
| 133 | + |
| 134 | + if ( $findOnlyinclude ) { |
| 135 | + // Ignore all input up to the next <onlyinclude> |
| 136 | + $startPos = strpos( $text, '<onlyinclude>', $i ); |
| 137 | + if ( $startPos === false ) { |
| 138 | + // Ignored section runs to the end |
| 139 | + $accum->addNodeWithText( 'ignore', substr( $text, $i ) ); |
| 140 | + break; |
| 141 | + } |
| 142 | + $tagEndPos = $startPos + strlen( '<onlyinclude>' ); // past-the-end |
| 143 | + $accum->addNodeWithText( 'ignore', substr( $text, $i, $tagEndPos - $i ) ); |
| 144 | + $i = $tagEndPos; |
| 145 | + $findOnlyinclude = false; |
| 146 | + } |
| 147 | + |
| 148 | + if ( $fakeLineStart ) { |
| 149 | + $found = 'line-start'; |
| 150 | + $curChar = ''; |
| 151 | + } else { |
| 152 | + # Find next opening brace, closing brace or pipe |
| 153 | + $search = $searchBase; |
| 154 | + if ( $stack->top === false ) { |
| 155 | + $currentClosing = ''; |
| 156 | + } else { |
| 157 | + $currentClosing = $stack->top->close; |
| 158 | + $search .= $currentClosing; |
| 159 | + } |
| 160 | + if ( $findPipe ) { |
| 161 | + $search .= '|'; |
| 162 | + } |
| 163 | + if ( $findEquals ) { |
| 164 | + // First equals will be for the template |
| 165 | + $search .= '='; |
| 166 | + } |
| 167 | + $rule = null; |
| 168 | + # Output literal section, advance input counter |
| 169 | + $literalLength = strcspn( $text, $search, $i ); |
| 170 | + if ( $literalLength > 0 ) { |
| 171 | + $accum->addLiteral( substr( $text, $i, $literalLength ) ); |
| 172 | + $i += $literalLength; |
| 173 | + } |
| 174 | + if ( $i >= strlen( $text ) ) { |
| 175 | + if ( $currentClosing == "\n" ) { |
| 176 | + // Do a past-the-end run to finish off the heading |
| 177 | + $curChar = ''; |
| 178 | + $found = 'line-end'; |
| 179 | + } else { |
| 180 | + # All done |
| 181 | + break; |
| 182 | + } |
| 183 | + } else { |
| 184 | + $curChar = $text[$i]; |
| 185 | + if ( $curChar == '|' ) { |
| 186 | + $found = 'pipe'; |
| 187 | + } elseif ( $curChar == '=' ) { |
| 188 | + $found = 'equals'; |
| 189 | + } elseif ( $curChar == '<' ) { |
| 190 | + $found = 'angle'; |
| 191 | + } elseif ( $curChar == "\n" ) { |
| 192 | + if ( $inHeading ) { |
| 193 | + $found = 'line-end'; |
| 194 | + } else { |
| 195 | + $found = 'line-start'; |
| 196 | + } |
| 197 | + } elseif ( $curChar == $currentClosing ) { |
| 198 | + $found = 'close'; |
| 199 | + } elseif ( isset( $rules[$curChar] ) ) { |
| 200 | + $found = 'open'; |
| 201 | + $rule = $rules[$curChar]; |
| 202 | + } else { |
| 203 | + # Some versions of PHP have a strcspn which stops on null characters |
| 204 | + # Ignore and continue |
| 205 | + ++$i; |
| 206 | + continue; |
| 207 | + } |
| 208 | + } |
| 209 | + } |
| 210 | + |
| 211 | + if ( $found == 'angle' ) { |
| 212 | + $matches = false; |
| 213 | + // Handle </onlyinclude> |
| 214 | + if ( $enableOnlyinclude && substr( $text, $i, strlen( '</onlyinclude>' ) ) == '</onlyinclude>' ) { |
| 215 | + $findOnlyinclude = true; |
| 216 | + continue; |
| 217 | + } |
| 218 | + |
| 219 | + // Determine element name |
| 220 | + if ( !preg_match( $elementsRegex, $text, $matches, 0, $i + 1 ) ) { |
| 221 | + // Element name missing or not listed |
| 222 | + $accum->addLiteral( '<' ); |
| 223 | + ++$i; |
| 224 | + continue; |
| 225 | + } |
| 226 | + // Handle comments |
| 227 | + if ( isset( $matches[2] ) && $matches[2] == '!--' ) { |
| 228 | + // To avoid leaving blank lines, when a comment is both preceded |
| 229 | + // and followed by a newline (ignoring spaces), trim leading and |
| 230 | + // trailing spaces and one of the newlines. |
| 231 | + |
| 232 | + // Find the end |
| 233 | + $endPos = strpos( $text, '-->', $i + 4 ); |
| 234 | + if ( $endPos === false ) { |
| 235 | + // Unclosed comment in input, runs to end |
| 236 | + $inner = substr( $text, $i ); |
| 237 | + $accum->addNodeWithText( 'comment', $inner ); |
| 238 | + $i = strlen( $text ); |
| 239 | + } else { |
| 240 | + // Search backwards for leading whitespace |
| 241 | + $wsStart = $i ? ( $i - strspn( $revText, ' ', strlen( $text ) - $i ) ) : 0; |
| 242 | + // Search forwards for trailing whitespace |
| 243 | + // $wsEnd will be the position of the last space |
| 244 | + $wsEnd = $endPos + 2 + strspn( $text, ' ', $endPos + 3 ); |
| 245 | + // Eat the line if possible |
| 246 | + // TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at |
| 247 | + // the overall start. That's not how Sanitizer::removeHTMLcomments() did it, but |
| 248 | + // it's a possible beneficial b/c break. |
| 249 | + if ( $wsStart > 0 && substr( $text, $wsStart - 1, 1 ) == "\n" |
| 250 | + && substr( $text, $wsEnd + 1, 1 ) == "\n" ) |
| 251 | + { |
| 252 | + $startPos = $wsStart; |
| 253 | + $endPos = $wsEnd + 1; |
| 254 | + // Remove leading whitespace from the end of the accumulator |
| 255 | + // Sanity check first though |
| 256 | + $wsLength = $i - $wsStart; |
| 257 | + if ( $wsLength > 0 |
| 258 | + && $accum->lastNode instanceof PPNode_Hash_Text |
| 259 | + && substr( $accum->lastNode->value, -$wsLength ) === str_repeat( ' ', $wsLength ) ) |
| 260 | + { |
| 261 | + $accum->lastNode->value = substr( $accum->lastNode->value, 0, -$wsLength ); |
| 262 | + } |
| 263 | + // Do a line-start run next time to look for headings after the comment |
| 264 | + $fakeLineStart = true; |
| 265 | + } else { |
| 266 | + // No line to eat, just take the comment itself |
| 267 | + $startPos = $i; |
| 268 | + $endPos += 2; |
| 269 | + } |
| 270 | + |
| 271 | + if ( $stack->top ) { |
| 272 | + $part = $stack->top->getCurrentPart(); |
| 273 | + if ( isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 ) { |
| 274 | + // Comments abutting, no change in visual end |
| 275 | + $part->commentEnd = $wsEnd; |
| 276 | + } else { |
| 277 | + $part->visualEnd = $wsStart; |
| 278 | + $part->commentEnd = $endPos; |
| 279 | + } |
| 280 | + } |
| 281 | + $i = $endPos + 1; |
| 282 | + $inner = substr( $text, $startPos, $endPos - $startPos + 1 ); |
| 283 | + $accum->addNodeWithText( 'comment', $inner ); |
| 284 | + } |
| 285 | + continue; |
| 286 | + } |
| 287 | + $name = $matches[1]; |
| 288 | + $lowerName = strtolower( $name ); |
| 289 | + $attrStart = $i + strlen( $name ) + 1; |
| 290 | + |
| 291 | + // Find end of tag |
| 292 | + $tagEndPos = $noMoreGT ? false : strpos( $text, '>', $attrStart ); |
| 293 | + if ( $tagEndPos === false ) { |
| 294 | + // Infinite backtrack |
| 295 | + // Disable tag search to prevent worst-case O(N^2) performance |
| 296 | + $noMoreGT = true; |
| 297 | + $accum->addLiteral( '<' ); |
| 298 | + ++$i; |
| 299 | + continue; |
| 300 | + } |
| 301 | + |
| 302 | + // Handle ignored tags |
| 303 | + if ( in_array( $lowerName, $ignoredTags ) ) { |
| 304 | + $accum->addNodeWithText( 'ignore', substr( $text, $i, $tagEndPos - $i + 1 ) ); |
| 305 | + $i = $tagEndPos + 1; |
| 306 | + continue; |
| 307 | + } |
| 308 | + |
| 309 | + $tagStartPos = $i; |
| 310 | + if ( $text[$tagEndPos-1] == '/' ) { |
| 311 | + // Short end tag |
| 312 | + $attrEnd = $tagEndPos - 1; |
| 313 | + $inner = null; |
| 314 | + $i = $tagEndPos + 1; |
| 315 | + $close = null; |
| 316 | + } else { |
| 317 | + $attrEnd = $tagEndPos; |
| 318 | + // Find closing tag |
| 319 | + if ( preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i", |
| 320 | + $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) ) |
| 321 | + { |
| 322 | + $inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 ); |
| 323 | + $i = $matches[0][1] + strlen( $matches[0][0] ); |
| 324 | + $close = $matches[0][0]; |
| 325 | + } else { |
| 326 | + // No end tag -- let it run out to the end of the text. |
| 327 | + $inner = substr( $text, $tagEndPos + 1 ); |
| 328 | + $i = strlen( $text ); |
| 329 | + $close = null; |
| 330 | + } |
| 331 | + } |
| 332 | + // <includeonly> and <noinclude> just become <ignore> tags |
| 333 | + if ( in_array( $lowerName, $ignoredElements ) ) { |
| 334 | + $accum->addNodeWithText( 'ignore', substr( $text, $tagStartPos, $i - $tagStartPos ) ); |
| 335 | + continue; |
| 336 | + } |
| 337 | + |
| 338 | + if ( $attrEnd <= $attrStart ) { |
| 339 | + $attr = ''; |
| 340 | + } else { |
| 341 | + // Note that the attr element contains the whitespace between name and attribute, |
| 342 | + // this is necessary for precise reconstruction during pre-save transform. |
| 343 | + $attr = substr( $text, $attrStart, $attrEnd - $attrStart ); |
| 344 | + } |
| 345 | + |
| 346 | + $extNode = new PPNode_Hash_Tree( 'ext' ); |
| 347 | + $extNode->addChild( PPNode_Hash_Tree::newWithText( 'name', $name ) ); |
| 348 | + $extNode->addChild( PPNode_Hash_Tree::newWithText( 'attr', $attr ) ); |
| 349 | + if ( $inner !== null ) { |
| 350 | + $extNode->addChild( PPNode_Hash_Tree::newWithText( 'inner', $inner ) ); |
| 351 | + } |
| 352 | + if ( $close !== null ) { |
| 353 | + $extNode->addChild( PPNode_Hash_Tree::newWithText( 'close', $close ) ); |
| 354 | + } |
| 355 | + $accum->addNode( $extNode ); |
| 356 | + } |
| 357 | + |
| 358 | + elseif ( $found == 'line-start' ) { |
| 359 | + // Is this the start of a heading? |
| 360 | + // Line break belongs before the heading element in any case |
| 361 | + if ( $fakeLineStart ) { |
| 362 | + $fakeLineStart = false; |
| 363 | + } else { |
| 364 | + $accum->addLiteral( $curChar ); |
| 365 | + $i++; |
| 366 | + } |
| 367 | + |
| 368 | + $count = strspn( $text, '=', $i, 6 ); |
| 369 | + if ( $count == 1 && $findEquals ) { |
| 370 | + // DWIM: This looks kind of like a name/value separator |
| 371 | + // Let's let the equals handler have it and break the potential heading |
| 372 | + // This is heuristic, but AFAICT the methods for completely correct disambiguation are very complex. |
| 373 | + } elseif ( $count > 0 ) { |
| 374 | + $piece = array( |
| 375 | + 'open' => "\n", |
| 376 | + 'close' => "\n", |
| 377 | + 'parts' => array( new PPDPart_Hash( str_repeat( '=', $count ) ) ), |
| 378 | + 'startPos' => $i, |
| 379 | + 'count' => $count ); |
| 380 | + $stack->push( $piece ); |
| 381 | + $accum =& $stack->getAccum(); |
| 382 | + extract( $stack->getFlags() ); |
| 383 | + $i += $count; |
| 384 | + } |
| 385 | + } |
| 386 | + |
| 387 | + elseif ( $found == 'line-end' ) { |
| 388 | + $piece = $stack->top; |
| 389 | + // A heading must be open, otherwise \n wouldn't have been in the search list |
| 390 | + assert( $piece->open == "\n" ); |
| 391 | + $part = $piece->getCurrentPart(); |
| 392 | + // Search back through the input to see if it has a proper close |
| 393 | + // Do this using the reversed string since the other solutions (end anchor, etc.) are inefficient |
| 394 | + $wsLength = strspn( $revText, " \t", strlen( $text ) - $i ); |
| 395 | + $searchStart = $i - $wsLength; |
| 396 | + if ( isset( $part->commentEnd ) && $searchStart - 1 == $part->commentEnd ) { |
| 397 | + // Comment found at line end |
| 398 | + // Search for equals signs before the comment |
| 399 | + $searchStart = $part->visualEnd; |
| 400 | + $searchStart -= strspn( $revText, " \t", strlen( $text ) - $searchStart ); |
| 401 | + } |
| 402 | + $count = $piece->count; |
| 403 | + $equalsLength = strspn( $revText, '=', strlen( $text ) - $searchStart ); |
| 404 | + if ( $equalsLength > 0 ) { |
| 405 | + if ( $i - $equalsLength == $piece->startPos ) { |
| 406 | + // This is just a single string of equals signs on its own line |
| 407 | + // Replicate the doHeadings behaviour /={count}(.+)={count}/ |
| 408 | + // First find out how many equals signs there really are (don't stop at 6) |
| 409 | + $count = $equalsLength; |
| 410 | + if ( $count < 3 ) { |
| 411 | + $count = 0; |
| 412 | + } else { |
| 413 | + $count = min( 6, intval( ( $count - 1 ) / 2 ) ); |
| 414 | + } |
| 415 | + } else { |
| 416 | + $count = min( $equalsLength, $count ); |
| 417 | + } |
| 418 | + if ( $count > 0 ) { |
| 419 | + // Normal match, output <h> |
| 420 | + $element = new PPNode_Hash_Tree( 'possible-h' ); |
| 421 | + $element->addChild( new PPNode_Hash_Attr( 'level', $count ) ); |
| 422 | + $element->addChild( new PPNode_Hash_Attr( 'i', $headingIndex++ ) ); |
| 423 | + $element->lastChild->nextSibling = $accum->firstNode; |
| 424 | + $element->lastChild = $accum->lastNode; |
| 425 | + } else { |
| 426 | + // Single equals sign on its own line, count=0 |
| 427 | + $element = $accum; |
| 428 | + } |
| 429 | + } else { |
| 430 | + // No match, no <h>, just pass down the inner text |
| 431 | + $element = $accum; |
| 432 | + } |
| 433 | + // Unwind the stack |
| 434 | + $stack->pop(); |
| 435 | + $accum =& $stack->getAccum(); |
| 436 | + extract( $stack->getFlags() ); |
| 437 | + |
| 438 | + // Append the result to the enclosing accumulator |
| 439 | + if ( $element instanceof PPNode ) { |
| 440 | + $accum->addNode( $element ); |
| 441 | + } else { |
| 442 | + $accum->addAccum( $element ); |
| 443 | + } |
| 444 | + // Note that we do NOT increment the input pointer. |
| 445 | + // This is because the closing linebreak could be the opening linebreak of |
| 446 | + // another heading. Infinite loops are avoided because the next iteration MUST |
| 447 | + // hit the heading open case above, which unconditionally increments the |
| 448 | + // input pointer. |
| 449 | + } |
| 450 | + |
| 451 | + elseif ( $found == 'open' ) { |
| 452 | + # count opening brace characters |
| 453 | + $count = strspn( $text, $curChar, $i ); |
| 454 | + |
| 455 | + # we need to add to stack only if opening brace count is enough for one of the rules |
| 456 | + if ( $count >= $rule['min'] ) { |
| 457 | + # Add it to the stack |
| 458 | + $piece = array( |
| 459 | + 'open' => $curChar, |
| 460 | + 'close' => $rule['end'], |
| 461 | + 'count' => $count, |
| 462 | + 'lineStart' => ($i > 0 && $text[$i-1] == "\n"), |
| 463 | + ); |
| 464 | + |
| 465 | + $stack->push( $piece ); |
| 466 | + $accum =& $stack->getAccum(); |
| 467 | + extract( $stack->getFlags() ); |
| 468 | + } else { |
| 469 | + # Add literal brace(s) |
| 470 | + $accum->addLiteral( str_repeat( $curChar, $count ) ); |
| 471 | + } |
| 472 | + $i += $count; |
| 473 | + } |
| 474 | + |
| 475 | + elseif ( $found == 'close' ) { |
| 476 | + $piece = $stack->top; |
| 477 | + # lets check if there are enough characters for closing brace |
| 478 | + $maxCount = $piece->count; |
| 479 | + $count = strspn( $text, $curChar, $i, $maxCount ); |
| 480 | + |
| 481 | + # check for maximum matching characters (if there are 5 closing |
| 482 | + # characters, we will probably need only 3 - depending on the rules) |
| 483 | + $matchingCount = 0; |
| 484 | + $rule = $rules[$piece->open]; |
| 485 | + if ( $count > $rule['max'] ) { |
| 486 | + # The specified maximum exists in the callback array, unless the caller |
| 487 | + # has made an error |
| 488 | + $matchingCount = $rule['max']; |
| 489 | + } else { |
| 490 | + # Count is less than the maximum |
| 491 | + # Skip any gaps in the callback array to find the true largest match |
| 492 | + # Need to use array_key_exists not isset because the callback can be null |
| 493 | + $matchingCount = $count; |
| 494 | + while ( $matchingCount > 0 && !array_key_exists( $matchingCount, $rule['names'] ) ) { |
| 495 | + --$matchingCount; |
| 496 | + } |
| 497 | + } |
| 498 | + |
| 499 | + if ($matchingCount <= 0) { |
| 500 | + # No matching element found in callback array |
| 501 | + # Output a literal closing brace and continue |
| 502 | + $accum->addLiteral( str_repeat( $curChar, $count ) ); |
| 503 | + $i += $count; |
| 504 | + continue; |
| 505 | + } |
| 506 | + $name = $rule['names'][$matchingCount]; |
| 507 | + if ( $name === null ) { |
| 508 | + // No element, just literal text |
| 509 | + $element = $piece->breakSyntax( $matchingCount ); |
| 510 | + $element->addLiteral( str_repeat( $rule['end'], $matchingCount ) ); |
| 511 | + } else { |
| 512 | + # Create XML element |
| 513 | + # Note: $parts is already XML, does not need to be encoded further |
| 514 | + $parts = $piece->parts; |
| 515 | + $titleAccum = $parts[0]->out; |
| 516 | + unset( $parts[0] ); |
| 517 | + |
| 518 | + $element = new PPNode_Hash_Tree( $name ); |
| 519 | + |
| 520 | + # The invocation is at the start of the line if lineStart is set in |
| 521 | + # the stack, and all opening brackets are used up. |
| 522 | + if ( $maxCount == $matchingCount && !empty( $piece->lineStart ) ) { |
| 523 | + $element->addChild( new PPNode_Hash_Attr( 'lineStart', 1 ) ); |
| 524 | + } |
| 525 | + $titleNode = new PPNode_Hash_Tree( 'title' ); |
| 526 | + $titleNode->firstChild = $titleAccum->firstNode; |
| 527 | + $titleNode->lastChild = $titleAccum->lastNode; |
| 528 | + $element->addChild( $titleNode ); |
| 529 | + $argIndex = 1; |
| 530 | + foreach ( $parts as $partIndex => $part ) { |
| 531 | + if ( isset( $part->eqpos ) ) { |
| 532 | + // Find equals |
| 533 | + $lastNode = false; |
| 534 | + for ( $node = $part->out->firstNode; $node; $node = $node->nextSibling ) { |
| 535 | + if ( $node === $part->eqpos ) { |
| 536 | + break; |
| 537 | + } |
| 538 | + $lastNode = $node; |
| 539 | + } |
| 540 | + if ( !$node ) { |
| 541 | + throw new MWException( __METHOD__. ': eqpos not found' ); |
| 542 | + } |
| 543 | + if ( $node->name !== 'equals' ) { |
| 544 | + throw new MWException( __METHOD__ .': eqpos is not equals' ); |
| 545 | + } |
| 546 | + $equalsNode = $node; |
| 547 | + |
| 548 | + // Construct name node |
| 549 | + $nameNode = new PPNode_Hash_Tree( 'name' ); |
| 550 | + if ( $lastNode !== false ) { |
| 551 | + $lastNode->nextSibling = false; |
| 552 | + $nameNode->firstChild = $part->out->firstNode; |
| 553 | + $nameNode->lastChild = $lastNode; |
| 554 | + } |
| 555 | + |
| 556 | + // Construct value node |
| 557 | + $valueNode = new PPNode_Hash_Tree( 'value' ); |
| 558 | + if ( $equalsNode->nextSibling !== false ) { |
| 559 | + $valueNode->firstChild = $equalsNode->nextSibling; |
| 560 | + $valueNode->lastChild = $part->out->lastNode; |
| 561 | + } |
| 562 | + $partNode = new PPNode_Hash_Tree( 'part' ); |
| 563 | + $partNode->addChild( $nameNode ); |
| 564 | + $partNode->addChild( $equalsNode->firstChild ); |
| 565 | + $partNode->addChild( $valueNode ); |
| 566 | + $element->addChild( $partNode ); |
| 567 | + } else { |
| 568 | + $partNode = new PPNode_Hash_Tree( 'part' ); |
| 569 | + $nameNode = new PPNode_Hash_Tree( 'name' ); |
| 570 | + $nameNode->addChild( new PPNode_Hash_Attr( 'index', $argIndex++ ) ); |
| 571 | + $valueNode = new PPNode_Hash_Tree( 'value' ); |
| 572 | + $valueNode->firstChild = $part->out->firstNode; |
| 573 | + $valueNode->lastChild = $part->out->lastNode; |
| 574 | + $partNode->addChild( $nameNode ); |
| 575 | + $partNode->addChild( $valueNode ); |
| 576 | + $element->addChild( $partNode ); |
| 577 | + } |
| 578 | + } |
| 579 | + } |
| 580 | + |
| 581 | + # Advance input pointer |
| 582 | + $i += $matchingCount; |
| 583 | + |
| 584 | + # Unwind the stack |
| 585 | + $stack->pop(); |
| 586 | + $accum =& $stack->getAccum(); |
| 587 | + |
| 588 | + # Re-add the old stack element if it still has unmatched opening characters remaining |
| 589 | + if ($matchingCount < $piece->count) { |
| 590 | + $piece->parts = array( new PPDPart_Hash ); |
| 591 | + $piece->count -= $matchingCount; |
| 592 | + # do we still qualify for any callback with remaining count? |
| 593 | + $names = $rules[$piece->open]['names']; |
| 594 | + $skippedBraces = 0; |
| 595 | + $enclosingAccum =& $accum; |
| 596 | + while ( $piece->count ) { |
| 597 | + if ( array_key_exists( $piece->count, $names ) ) { |
| 598 | + $stack->push( $piece ); |
| 599 | + $accum =& $stack->getAccum(); |
| 600 | + break; |
| 601 | + } |
| 602 | + --$piece->count; |
| 603 | + $skippedBraces ++; |
| 604 | + } |
| 605 | + $enclosingAccum->addLiteral( str_repeat( $piece->open, $skippedBraces ) ); |
| 606 | + } |
| 607 | + |
| 608 | + extract( $stack->getFlags() ); |
| 609 | + |
| 610 | + # Add XML element to the enclosing accumulator |
| 611 | + if ( $element instanceof PPNode ) { |
| 612 | + $accum->addNode( $element ); |
| 613 | + } else { |
| 614 | + $accum->addAccum( $element ); |
| 615 | + } |
| 616 | + } |
| 617 | + |
| 618 | + elseif ( $found == 'pipe' ) { |
| 619 | + $findEquals = true; // shortcut for getFlags() |
| 620 | + $stack->addPart(); |
| 621 | + $accum =& $stack->getAccum(); |
| 622 | + ++$i; |
| 623 | + } |
| 624 | + |
| 625 | + elseif ( $found == 'equals' ) { |
| 626 | + $findEquals = false; // shortcut for getFlags() |
| 627 | + $accum->addNodeWithText( 'equals', '=' ); |
| 628 | + $stack->getCurrentPart()->eqpos = $accum->lastNode; |
| 629 | + ++$i; |
| 630 | + } |
| 631 | + } |
| 632 | + |
| 633 | + # Output any remaining unclosed brackets |
| 634 | + foreach ( $stack->stack as $piece ) { |
| 635 | + $stack->rootAccum->addAccum( $piece->breakSyntax() ); |
| 636 | + } |
| 637 | + |
| 638 | + # Enable top-level headings |
| 639 | + for ( $node = $stack->rootAccum->firstNode; $node; $node = $node->nextSibling ) { |
| 640 | + if ( isset( $node->name ) && $node->name === 'possible-h' ) { |
| 641 | + $node->name = 'h'; |
| 642 | + } |
| 643 | + } |
| 644 | + |
| 645 | + $rootNode = new PPNode_Hash_Tree( 'root' ); |
| 646 | + $rootNode->firstChild = $stack->rootAccum->firstNode; |
| 647 | + $rootNode->lastChild = $stack->rootAccum->lastNode; |
| 648 | + |
| 649 | + // Cache |
| 650 | + if ($cacheable) { |
| 651 | + $cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . serialize( $rootNode );; |
| 652 | + $wgMemc->set( $cacheKey, $cacheValue, 86400 ); |
| 653 | + wfProfileOut( __METHOD__.'-cache-miss' ); |
| 654 | + wfProfileOut( __METHOD__.'-cacheable' ); |
| 655 | + wfDebugLog( "Preprocessor", "Saved preprocessor Hash to memcached (key $cacheKey)" ); |
| 656 | + } |
| 657 | + |
| 658 | + wfProfileOut( __METHOD__ ); |
| 659 | + return $rootNode; |
| 660 | + } |
| 661 | +} |
| 662 | + |
| 663 | +/** |
| 664 | + * Stack class to help Preprocessor::preprocessToObj() |
| 665 | + * @ingroup Parser |
| 666 | + */ |
| 667 | +class PPDStack_Hash extends PPDStack { |
| 668 | + function __construct() { |
| 669 | + $this->elementClass = 'PPDStackElement_Hash'; |
| 670 | + parent::__construct(); |
| 671 | + $this->rootAccum = new PPDAccum_Hash; |
| 672 | + } |
| 673 | +} |
| 674 | + |
| 675 | +/** |
| 676 | + * @ingroup Parser |
| 677 | + */ |
| 678 | +class PPDStackElement_Hash extends PPDStackElement { |
| 679 | + function __construct( $data = array() ) { |
| 680 | + $this->partClass = 'PPDPart_Hash'; |
| 681 | + parent::__construct( $data ); |
| 682 | + } |
| 683 | + |
| 684 | + /** |
| 685 | + * Get the accumulator that would result if the close is not found. |
| 686 | + */ |
| 687 | + function breakSyntax( $openingCount = false ) { |
| 688 | + if ( $this->open == "\n" ) { |
| 689 | + $accum = $this->parts[0]->out; |
| 690 | + } else { |
| 691 | + if ( $openingCount === false ) { |
| 692 | + $openingCount = $this->count; |
| 693 | + } |
| 694 | + $accum = new PPDAccum_Hash; |
| 695 | + $accum->addLiteral( str_repeat( $this->open, $openingCount ) ); |
| 696 | + $first = true; |
| 697 | + foreach ( $this->parts as $part ) { |
| 698 | + if ( $first ) { |
| 699 | + $first = false; |
| 700 | + } else { |
| 701 | + $accum->addLiteral( '|' ); |
| 702 | + } |
| 703 | + $accum->addAccum( $part->out ); |
| 704 | + } |
| 705 | + } |
| 706 | + return $accum; |
| 707 | + } |
| 708 | +} |
| 709 | + |
| 710 | +/** |
| 711 | + * @ingroup Parser |
| 712 | + */ |
| 713 | +class PPDPart_Hash extends PPDPart { |
| 714 | + function __construct( $out = '' ) { |
| 715 | + $accum = new PPDAccum_Hash; |
| 716 | + if ( $out !== '' ) { |
| 717 | + $accum->addLiteral( $out ); |
| 718 | + } |
| 719 | + parent::__construct( $accum ); |
| 720 | + } |
| 721 | +} |
| 722 | + |
| 723 | +/** |
| 724 | + * @ingroup Parser |
| 725 | + */ |
| 726 | +class PPDAccum_Hash { |
| 727 | + var $firstNode, $lastNode; |
| 728 | + |
| 729 | + function __construct() { |
| 730 | + $this->firstNode = $this->lastNode = false; |
| 731 | + } |
| 732 | + |
| 733 | + /** |
| 734 | + * Append a string literal |
| 735 | + */ |
| 736 | + function addLiteral( $s ) { |
| 737 | + if ( $this->lastNode === false ) { |
| 738 | + $this->firstNode = $this->lastNode = new PPNode_Hash_Text( $s ); |
| 739 | + } elseif ( $this->lastNode instanceof PPNode_Hash_Text ) { |
| 740 | + $this->lastNode->value .= $s; |
| 741 | + } else { |
| 742 | + $this->lastNode->nextSibling = new PPNode_Hash_Text( $s ); |
| 743 | + $this->lastNode = $this->lastNode->nextSibling; |
| 744 | + } |
| 745 | + } |
| 746 | + |
| 747 | + /** |
| 748 | + * Append a PPNode |
| 749 | + */ |
| 750 | + function addNode( PPNode $node ) { |
| 751 | + if ( $this->lastNode === false ) { |
| 752 | + $this->firstNode = $this->lastNode = $node; |
| 753 | + } else { |
| 754 | + $this->lastNode->nextSibling = $node; |
| 755 | + $this->lastNode = $node; |
| 756 | + } |
| 757 | + } |
| 758 | + |
| 759 | + /** |
| 760 | + * Append a tree node with text contents |
| 761 | + */ |
| 762 | + function addNodeWithText( $name, $value ) { |
| 763 | + $node = PPNode_Hash_Tree::newWithText( $name, $value ); |
| 764 | + $this->addNode( $node ); |
| 765 | + } |
| 766 | + |
| 767 | + /** |
| 768 | + * Append a PPAccum_Hash |
| 769 | + * Takes over ownership of the nodes in the source argument. These nodes may |
| 770 | + * subsequently be modified, especially nextSibling. |
| 771 | + */ |
| 772 | + function addAccum( $accum ) { |
| 773 | + if ( $accum->lastNode === false ) { |
| 774 | + // nothing to add |
| 775 | + } elseif ( $this->lastNode === false ) { |
| 776 | + $this->firstNode = $accum->firstNode; |
| 777 | + $this->lastNode = $accum->lastNode; |
| 778 | + } else { |
| 779 | + $this->lastNode->nextSibling = $accum->firstNode; |
| 780 | + $this->lastNode = $accum->lastNode; |
| 781 | + } |
| 782 | + } |
| 783 | +} |
| 784 | + |
| 785 | +/** |
| 786 | + * An expansion frame, used as a context to expand the result of preprocessToObj() |
| 787 | + * @ingroup Parser |
| 788 | + */ |
| 789 | +class PPFrame_Hash implements PPFrame { |
| 790 | + var $preprocessor, $parser, $title; |
| 791 | + var $titleCache; |
| 792 | + |
| 793 | + /** |
| 794 | + * Hashtable listing templates which are disallowed for expansion in this frame, |
| 795 | + * having been encountered previously in parent frames. |
| 796 | + */ |
| 797 | + var $loopCheckHash; |
| 798 | + |
| 799 | + /** |
| 800 | + * Recursion depth of this frame, top = 0 |
| 801 | + * Note that this is NOT the same as expansion depth in expand() |
| 802 | + */ |
| 803 | + var $depth; |
| 804 | + |
| 805 | + |
| 806 | + /** |
| 807 | + * Construct a new preprocessor frame. |
| 808 | + * @param Preprocessor $preprocessor The parent preprocessor |
| 809 | + */ |
| 810 | + function __construct( $preprocessor ) { |
| 811 | + $this->preprocessor = $preprocessor; |
| 812 | + $this->parser = $preprocessor->parser; |
| 813 | + $this->title = $this->parser->mTitle; |
| 814 | + $this->titleCache = array( $this->title ? $this->title->getPrefixedDBkey() : false ); |
| 815 | + $this->loopCheckHash = array(); |
| 816 | + $this->depth = 0; |
| 817 | + } |
| 818 | + |
| 819 | + /** |
| 820 | + * Create a new child frame |
| 821 | + * $args is optionally a multi-root PPNode or array containing the template arguments |
| 822 | + */ |
| 823 | + function newChild( $args = false, $title = false ) { |
| 824 | + $namedArgs = array(); |
| 825 | + $numberedArgs = array(); |
| 826 | + if ( $title === false ) { |
| 827 | + $title = $this->title; |
| 828 | + } |
| 829 | + if ( $args !== false ) { |
| 830 | + $xpath = false; |
| 831 | + if ( $args instanceof PPNode_Hash_Array ) { |
| 832 | + $args = $args->value; |
| 833 | + } elseif ( !is_array( $args ) ) { |
| 834 | + throw new MWException( __METHOD__ . ': $args must be array or PPNode_Hash_Array' ); |
| 835 | + } |
| 836 | + foreach ( $args as $arg ) { |
| 837 | + $bits = $arg->splitArg(); |
| 838 | + if ( $bits['index'] !== '' ) { |
| 839 | + // Numbered parameter |
| 840 | + $numberedArgs[$bits['index']] = $bits['value']; |
| 841 | + unset( $namedArgs[$bits['index']] ); |
| 842 | + } else { |
| 843 | + // Named parameter |
| 844 | + $name = trim( $this->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) ); |
| 845 | + $namedArgs[$name] = $bits['value']; |
| 846 | + unset( $numberedArgs[$name] ); |
| 847 | + } |
| 848 | + } |
| 849 | + } |
| 850 | + return new PPTemplateFrame_Hash( $this->preprocessor, $this, $numberedArgs, $namedArgs, $title ); |
| 851 | + } |
| 852 | + |
| 853 | + function expand( $root, $flags = 0 ) { |
| 854 | + static $expansionDepth = 0; |
| 855 | + if ( is_string( $root ) ) { |
| 856 | + return $root; |
| 857 | + } |
| 858 | + |
| 859 | + if ( ++$this->parser->mPPNodeCount > $this->parser->mOptions->mMaxPPNodeCount ) |
| 860 | + { |
| 861 | + return '<span class="error">Node-count limit exceeded</span>'; |
| 862 | + } |
| 863 | + if ( $expansionDepth > $this->parser->mOptions->mMaxPPExpandDepth ) { |
| 864 | + return '<span class="error">Expansion depth limit exceeded</span>'; |
| 865 | + } |
| 866 | + ++$expansionDepth; |
| 867 | + |
| 868 | + $outStack = array( '', '' ); |
| 869 | + $iteratorStack = array( false, $root ); |
| 870 | + $indexStack = array( 0, 0 ); |
| 871 | + |
| 872 | + while ( count( $iteratorStack ) > 1 ) { |
| 873 | + $level = count( $outStack ) - 1; |
| 874 | + $iteratorNode =& $iteratorStack[ $level ]; |
| 875 | + $out =& $outStack[$level]; |
| 876 | + $index =& $indexStack[$level]; |
| 877 | + |
| 878 | + if ( is_array( $iteratorNode ) ) { |
| 879 | + if ( $index >= count( $iteratorNode ) ) { |
| 880 | + // All done with this iterator |
| 881 | + $iteratorStack[$level] = false; |
| 882 | + $contextNode = false; |
| 883 | + } else { |
| 884 | + $contextNode = $iteratorNode[$index]; |
| 885 | + $index++; |
| 886 | + } |
| 887 | + } elseif ( $iteratorNode instanceof PPNode_Hash_Array ) { |
| 888 | + if ( $index >= $iteratorNode->getLength() ) { |
| 889 | + // All done with this iterator |
| 890 | + $iteratorStack[$level] = false; |
| 891 | + $contextNode = false; |
| 892 | + } else { |
| 893 | + $contextNode = $iteratorNode->item( $index ); |
| 894 | + $index++; |
| 895 | + } |
| 896 | + } else { |
| 897 | + // Copy to $contextNode and then delete from iterator stack, |
| 898 | + // because this is not an iterator but we do have to execute it once |
| 899 | + $contextNode = $iteratorStack[$level]; |
| 900 | + $iteratorStack[$level] = false; |
| 901 | + } |
| 902 | + |
| 903 | + $newIterator = false; |
| 904 | + |
| 905 | + if ( $contextNode === false ) { |
| 906 | + // nothing to do |
| 907 | + } elseif ( is_string( $contextNode ) ) { |
| 908 | + $out .= $contextNode; |
| 909 | + } elseif ( is_array( $contextNode ) || $contextNode instanceof PPNode_Hash_Array ) { |
| 910 | + $newIterator = $contextNode; |
| 911 | + } elseif ( $contextNode instanceof PPNode_Hash_Attr ) { |
| 912 | + // No output |
| 913 | + } elseif ( $contextNode instanceof PPNode_Hash_Text ) { |
| 914 | + $out .= $contextNode->value; |
| 915 | + } elseif ( $contextNode instanceof PPNode_Hash_Tree ) { |
| 916 | + if ( $contextNode->name == 'template' ) { |
| 917 | + # Double-brace expansion |
| 918 | + $bits = $contextNode->splitTemplate(); |
| 919 | + if ( $flags & self::NO_TEMPLATES ) { |
| 920 | + $newIterator = $this->virtualBracketedImplode( '{{', '|', '}}', $bits['title'], $bits['parts'] ); |
| 921 | + } else { |
| 922 | + $ret = $this->parser->braceSubstitution( $bits, $this ); |
| 923 | + if ( isset( $ret['object'] ) ) { |
| 924 | + $newIterator = $ret['object']; |
| 925 | + } else { |
| 926 | + $out .= $ret['text']; |
| 927 | + } |
| 928 | + } |
| 929 | + } elseif ( $contextNode->name == 'tplarg' ) { |
| 930 | + # Triple-brace expansion |
| 931 | + $bits = $contextNode->splitTemplate(); |
| 932 | + if ( $flags & self::NO_ARGS ) { |
| 933 | + $newIterator = $this->virtualBracketedImplode( '{{{', '|', '}}}', $bits['title'], $bits['parts'] ); |
| 934 | + } else { |
| 935 | + $ret = $this->parser->argSubstitution( $bits, $this ); |
| 936 | + if ( isset( $ret['object'] ) ) { |
| 937 | + $newIterator = $ret['object']; |
| 938 | + } else { |
| 939 | + $out .= $ret['text']; |
| 940 | + } |
| 941 | + } |
| 942 | + } elseif ( $contextNode->name == 'comment' ) { |
| 943 | + # HTML-style comment |
| 944 | + # Remove it in HTML, pre+remove and STRIP_COMMENTS modes |
| 945 | + if ( $this->parser->ot['html'] |
| 946 | + || ( $this->parser->ot['pre'] && $this->parser->mOptions->getRemoveComments() ) |
| 947 | + || ( $flags & self::STRIP_COMMENTS ) ) |
| 948 | + { |
| 949 | + $out .= ''; |
| 950 | + } |
| 951 | + # Add a strip marker in PST mode so that pstPass2() can run some old-fashioned regexes on the result |
| 952 | + # Not in RECOVER_COMMENTS mode (extractSections) though |
| 953 | + elseif ( $this->parser->ot['wiki'] && ! ( $flags & self::RECOVER_COMMENTS ) ) { |
| 954 | + $out .= $this->parser->insertStripItem( $contextNode->firstChild->value ); |
| 955 | + } |
| 956 | + # Recover the literal comment in RECOVER_COMMENTS and pre+no-remove |
| 957 | + else { |
| 958 | + $out .= $contextNode->firstChild->value; |
| 959 | + } |
| 960 | + } elseif ( $contextNode->name == 'ignore' ) { |
| 961 | + # Output suppression used by <includeonly> etc. |
| 962 | + # OT_WIKI will only respect <ignore> in substed templates. |
| 963 | + # The other output types respect it unless NO_IGNORE is set. |
| 964 | + # extractSections() sets NO_IGNORE and so never respects it. |
| 965 | + if ( ( !isset( $this->parent ) && $this->parser->ot['wiki'] ) || ( $flags & self::NO_IGNORE ) ) { |
| 966 | + $out .= $contextNode->firstChild->value; |
| 967 | + } else { |
| 968 | + //$out .= ''; |
| 969 | + } |
| 970 | + } elseif ( $contextNode->name == 'ext' ) { |
| 971 | + # Extension tag |
| 972 | + $bits = $contextNode->splitExt() + array( 'attr' => null, 'inner' => null, 'close' => null ); |
| 973 | + $out .= $this->parser->extensionSubstitution( $bits, $this ); |
| 974 | + } elseif ( $contextNode->name == 'h' ) { |
| 975 | + # Heading |
| 976 | + if ( $this->parser->ot['html'] ) { |
| 977 | + # Expand immediately and insert heading index marker |
| 978 | + $s = ''; |
| 979 | + for ( $node = $contextNode->firstChild; $node; $node = $node->nextSibling ) { |
| 980 | + $s .= $this->expand( $node, $flags ); |
| 981 | + } |
| 982 | + |
| 983 | + $bits = $contextNode->splitHeading(); |
| 984 | + $titleText = $this->title->getPrefixedDBkey(); |
| 985 | + $this->parser->mHeadings[] = array( $titleText, $bits['i'] ); |
| 986 | + $serial = count( $this->parser->mHeadings ) - 1; |
| 987 | + $marker = "{$this->parser->mUniqPrefix}-h-$serial-" . Parser::MARKER_SUFFIX; |
| 988 | + $s = substr( $s, 0, $bits['level'] ) . $marker . substr( $s, $bits['level'] ); |
| 989 | + $this->parser->mStripState->general->setPair( $marker, '' ); |
| 990 | + $out .= $s; |
| 991 | + } else { |
| 992 | + # Expand in virtual stack |
| 993 | + $newIterator = $contextNode->getChildren(); |
| 994 | + } |
| 995 | + } else { |
| 996 | + # Generic recursive expansion |
| 997 | + $newIterator = $contextNode->getChildren(); |
| 998 | + } |
| 999 | + } else { |
| 1000 | + throw new MWException( __METHOD__.': Invalid parameter type' ); |
| 1001 | + } |
| 1002 | + |
| 1003 | + if ( $newIterator !== false ) { |
| 1004 | + $outStack[] = ''; |
| 1005 | + $iteratorStack[] = $newIterator; |
| 1006 | + $indexStack[] = 0; |
| 1007 | + } elseif ( $iteratorStack[$level] === false ) { |
| 1008 | + // Return accumulated value to parent |
| 1009 | + // With tail recursion |
| 1010 | + while ( $iteratorStack[$level] === false && $level > 0 ) { |
| 1011 | + $outStack[$level - 1] .= $out; |
| 1012 | + array_pop( $outStack ); |
| 1013 | + array_pop( $iteratorStack ); |
| 1014 | + array_pop( $indexStack ); |
| 1015 | + $level--; |
| 1016 | + } |
| 1017 | + } |
| 1018 | + } |
| 1019 | + --$expansionDepth; |
| 1020 | + return $outStack[0]; |
| 1021 | + } |
| 1022 | + |
| 1023 | + function implodeWithFlags( $sep, $flags /*, ... */ ) { |
| 1024 | + $args = array_slice( func_get_args(), 2 ); |
| 1025 | + |
| 1026 | + $first = true; |
| 1027 | + $s = ''; |
| 1028 | + foreach ( $args as $root ) { |
| 1029 | + if ( $root instanceof PPNode_Hash_Array ) { |
| 1030 | + $root = $root->value; |
| 1031 | + } |
| 1032 | + if ( !is_array( $root ) ) { |
| 1033 | + $root = array( $root ); |
| 1034 | + } |
| 1035 | + foreach ( $root as $node ) { |
| 1036 | + if ( $first ) { |
| 1037 | + $first = false; |
| 1038 | + } else { |
| 1039 | + $s .= $sep; |
| 1040 | + } |
| 1041 | + $s .= $this->expand( $node, $flags ); |
| 1042 | + } |
| 1043 | + } |
| 1044 | + return $s; |
| 1045 | + } |
| 1046 | + |
| 1047 | + /** |
| 1048 | + * Implode with no flags specified |
| 1049 | + * This previously called implodeWithFlags but has now been inlined to reduce stack depth |
| 1050 | + */ |
| 1051 | + function implode( $sep /*, ... */ ) { |
| 1052 | + $args = array_slice( func_get_args(), 1 ); |
| 1053 | + |
| 1054 | + $first = true; |
| 1055 | + $s = ''; |
| 1056 | + foreach ( $args as $root ) { |
| 1057 | + if ( $root instanceof PPNode_Hash_Array ) { |
| 1058 | + $root = $root->value; |
| 1059 | + } |
| 1060 | + if ( !is_array( $root ) ) { |
| 1061 | + $root = array( $root ); |
| 1062 | + } |
| 1063 | + foreach ( $root as $node ) { |
| 1064 | + if ( $first ) { |
| 1065 | + $first = false; |
| 1066 | + } else { |
| 1067 | + $s .= $sep; |
| 1068 | + } |
| 1069 | + $s .= $this->expand( $node ); |
| 1070 | + } |
| 1071 | + } |
| 1072 | + return $s; |
| 1073 | + } |
| 1074 | + |
| 1075 | + /** |
| 1076 | + * Makes an object that, when expand()ed, will be the same as one obtained |
| 1077 | + * with implode() |
| 1078 | + */ |
| 1079 | + function virtualImplode( $sep /*, ... */ ) { |
| 1080 | + $args = array_slice( func_get_args(), 1 ); |
| 1081 | + $out = array(); |
| 1082 | + $first = true; |
| 1083 | + |
| 1084 | + foreach ( $args as $root ) { |
| 1085 | + if ( $root instanceof PPNode_Hash_Array ) { |
| 1086 | + $root = $root->value; |
| 1087 | + } |
| 1088 | + if ( !is_array( $root ) ) { |
| 1089 | + $root = array( $root ); |
| 1090 | + } |
| 1091 | + foreach ( $root as $node ) { |
| 1092 | + if ( $first ) { |
| 1093 | + $first = false; |
| 1094 | + } else { |
| 1095 | + $out[] = $sep; |
| 1096 | + } |
| 1097 | + $out[] = $node; |
| 1098 | + } |
| 1099 | + } |
| 1100 | + return new PPNode_Hash_Array( $out ); |
| 1101 | + } |
| 1102 | + |
| 1103 | + /** |
| 1104 | + * Virtual implode with brackets |
| 1105 | + */ |
| 1106 | + function virtualBracketedImplode( $start, $sep, $end /*, ... */ ) { |
| 1107 | + $args = array_slice( func_get_args(), 3 ); |
| 1108 | + $out = array( $start ); |
| 1109 | + $first = true; |
| 1110 | + |
| 1111 | + foreach ( $args as $root ) { |
| 1112 | + if ( $root instanceof PPNode_Hash_Array ) { |
| 1113 | + $root = $root->value; |
| 1114 | + } |
| 1115 | + if ( !is_array( $root ) ) { |
| 1116 | + $root = array( $root ); |
| 1117 | + } |
| 1118 | + foreach ( $root as $node ) { |
| 1119 | + if ( $first ) { |
| 1120 | + $first = false; |
| 1121 | + } else { |
| 1122 | + $out[] = $sep; |
| 1123 | + } |
| 1124 | + $out[] = $node; |
| 1125 | + } |
| 1126 | + } |
| 1127 | + $out[] = $end; |
| 1128 | + return new PPNode_Hash_Array( $out ); |
| 1129 | + } |
| 1130 | + |
| 1131 | + function __toString() { |
| 1132 | + return 'frame{}'; |
| 1133 | + } |
| 1134 | + |
| 1135 | + function getPDBK( $level = false ) { |
| 1136 | + if ( $level === false ) { |
| 1137 | + return $this->title->getPrefixedDBkey(); |
| 1138 | + } else { |
| 1139 | + return isset( $this->titleCache[$level] ) ? $this->titleCache[$level] : false; |
| 1140 | + } |
| 1141 | + } |
| 1142 | + |
| 1143 | + function getArguments() { |
| 1144 | + return array(); |
| 1145 | + } |
| 1146 | + |
| 1147 | + function getNumberedArguments() { |
| 1148 | + return array(); |
| 1149 | + } |
| 1150 | + |
| 1151 | + function getNamedArguments() { |
| 1152 | + return array(); |
| 1153 | + } |
| 1154 | + |
| 1155 | + /** |
| 1156 | + * Returns true if there are no arguments in this frame |
| 1157 | + */ |
| 1158 | + function isEmpty() { |
| 1159 | + return true; |
| 1160 | + } |
| 1161 | + |
| 1162 | + function getArgument( $name ) { |
| 1163 | + return false; |
| 1164 | + } |
| 1165 | + |
| 1166 | + /** |
| 1167 | + * Returns true if the infinite loop check is OK, false if a loop is detected |
| 1168 | + */ |
| 1169 | + function loopCheck( $title ) { |
| 1170 | + return !isset( $this->loopCheckHash[$title->getPrefixedDBkey()] ); |
| 1171 | + } |
| 1172 | + |
| 1173 | + /** |
| 1174 | + * Return true if the frame is a template frame |
| 1175 | + */ |
| 1176 | + function isTemplate() { |
| 1177 | + return false; |
| 1178 | + } |
| 1179 | +} |
| 1180 | + |
| 1181 | +/** |
| 1182 | + * Expansion frame with template arguments |
| 1183 | + * @ingroup Parser |
| 1184 | + */ |
| 1185 | +class PPTemplateFrame_Hash extends PPFrame_Hash { |
| 1186 | + var $numberedArgs, $namedArgs, $parent; |
| 1187 | + var $numberedExpansionCache, $namedExpansionCache; |
| 1188 | + |
| 1189 | + function __construct( $preprocessor, $parent = false, $numberedArgs = array(), $namedArgs = array(), $title = false ) { |
| 1190 | + PPFrame_Hash::__construct( $preprocessor ); |
| 1191 | + $this->parent = $parent; |
| 1192 | + $this->numberedArgs = $numberedArgs; |
| 1193 | + $this->namedArgs = $namedArgs; |
| 1194 | + $this->title = $title; |
| 1195 | + $pdbk = $title ? $title->getPrefixedDBkey() : false; |
| 1196 | + $this->titleCache = $parent->titleCache; |
| 1197 | + $this->titleCache[] = $pdbk; |
| 1198 | + $this->loopCheckHash = /*clone*/ $parent->loopCheckHash; |
| 1199 | + if ( $pdbk !== false ) { |
| 1200 | + $this->loopCheckHash[$pdbk] = true; |
| 1201 | + } |
| 1202 | + $this->depth = $parent->depth + 1; |
| 1203 | + $this->numberedExpansionCache = $this->namedExpansionCache = array(); |
| 1204 | + } |
| 1205 | + |
| 1206 | + function __toString() { |
| 1207 | + $s = 'tplframe{'; |
| 1208 | + $first = true; |
| 1209 | + $args = $this->numberedArgs + $this->namedArgs; |
| 1210 | + foreach ( $args as $name => $value ) { |
| 1211 | + if ( $first ) { |
| 1212 | + $first = false; |
| 1213 | + } else { |
| 1214 | + $s .= ', '; |
| 1215 | + } |
| 1216 | + $s .= "\"$name\":\"" . |
| 1217 | + str_replace( '"', '\\"', $value->__toString() ) . '"'; |
| 1218 | + } |
| 1219 | + $s .= '}'; |
| 1220 | + return $s; |
| 1221 | + } |
| 1222 | + /** |
| 1223 | + * Returns true if there are no arguments in this frame |
| 1224 | + */ |
| 1225 | + function isEmpty() { |
| 1226 | + return !count( $this->numberedArgs ) && !count( $this->namedArgs ); |
| 1227 | + } |
| 1228 | + |
| 1229 | + function getArguments() { |
| 1230 | + $arguments = array(); |
| 1231 | + foreach ( array_merge( |
| 1232 | + array_keys($this->numberedArgs), |
| 1233 | + array_keys($this->namedArgs)) as $key ) { |
| 1234 | + $arguments[$key] = $this->getArgument($key); |
| 1235 | + } |
| 1236 | + return $arguments; |
| 1237 | + } |
| 1238 | + |
| 1239 | + function getNumberedArguments() { |
| 1240 | + $arguments = array(); |
| 1241 | + foreach ( array_keys($this->numberedArgs) as $key ) { |
| 1242 | + $arguments[$key] = $this->getArgument($key); |
| 1243 | + } |
| 1244 | + return $arguments; |
| 1245 | + } |
| 1246 | + |
| 1247 | + function getNamedArguments() { |
| 1248 | + $arguments = array(); |
| 1249 | + foreach ( array_keys($this->namedArgs) as $key ) { |
| 1250 | + $arguments[$key] = $this->getArgument($key); |
| 1251 | + } |
| 1252 | + return $arguments; |
| 1253 | + } |
| 1254 | + |
| 1255 | + function getNumberedArgument( $index ) { |
| 1256 | + if ( !isset( $this->numberedArgs[$index] ) ) { |
| 1257 | + return false; |
| 1258 | + } |
| 1259 | + if ( !isset( $this->numberedExpansionCache[$index] ) ) { |
| 1260 | + # No trimming for unnamed arguments |
| 1261 | + $this->numberedExpansionCache[$index] = $this->parent->expand( $this->numberedArgs[$index], self::STRIP_COMMENTS ); |
| 1262 | + } |
| 1263 | + return $this->numberedExpansionCache[$index]; |
| 1264 | + } |
| 1265 | + |
| 1266 | + function getNamedArgument( $name ) { |
| 1267 | + if ( !isset( $this->namedArgs[$name] ) ) { |
| 1268 | + return false; |
| 1269 | + } |
| 1270 | + if ( !isset( $this->namedExpansionCache[$name] ) ) { |
| 1271 | + # Trim named arguments post-expand, for backwards compatibility |
| 1272 | + $this->namedExpansionCache[$name] = trim( |
| 1273 | + $this->parent->expand( $this->namedArgs[$name], self::STRIP_COMMENTS ) ); |
| 1274 | + } |
| 1275 | + return $this->namedExpansionCache[$name]; |
| 1276 | + } |
| 1277 | + |
| 1278 | + function getArgument( $name ) { |
| 1279 | + $text = $this->getNumberedArgument( $name ); |
| 1280 | + if ( $text === false ) { |
| 1281 | + $text = $this->getNamedArgument( $name ); |
| 1282 | + } |
| 1283 | + return $text; |
| 1284 | + } |
| 1285 | + |
| 1286 | + /** |
| 1287 | + * Return true if the frame is a template frame |
| 1288 | + */ |
| 1289 | + function isTemplate() { |
| 1290 | + return true; |
| 1291 | + } |
| 1292 | +} |
| 1293 | + |
| 1294 | +/** |
| 1295 | + * Expansion frame with custom arguments |
| 1296 | + * @ingroup Parser |
| 1297 | + */ |
| 1298 | +class PPCustomFrame_Hash extends PPFrame_Hash { |
| 1299 | + var $args; |
| 1300 | + |
| 1301 | + function __construct( $preprocessor, $args ) { |
| 1302 | + PPFrame_Hash::__construct( $preprocessor ); |
| 1303 | + $this->args = $args; |
| 1304 | + } |
| 1305 | + |
| 1306 | + function __toString() { |
| 1307 | + $s = 'cstmframe{'; |
| 1308 | + $first = true; |
| 1309 | + foreach ( $this->args as $name => $value ) { |
| 1310 | + if ( $first ) { |
| 1311 | + $first = false; |
| 1312 | + } else { |
| 1313 | + $s .= ', '; |
| 1314 | + } |
| 1315 | + $s .= "\"$name\":\"" . |
| 1316 | + str_replace( '"', '\\"', $value->__toString() ) . '"'; |
| 1317 | + } |
| 1318 | + $s .= '}'; |
| 1319 | + return $s; |
| 1320 | + } |
| 1321 | + |
| 1322 | + function isEmpty() { |
| 1323 | + return !count( $this->args ); |
| 1324 | + } |
| 1325 | + |
| 1326 | + function getArgument( $index ) { |
| 1327 | + if ( !isset( $this->args[$index] ) ) { |
| 1328 | + return false; |
| 1329 | + } |
| 1330 | + return $this->args[$index]; |
| 1331 | + } |
| 1332 | +} |
| 1333 | + |
| 1334 | +/** |
| 1335 | + * @ingroup Parser |
| 1336 | + */ |
| 1337 | +class PPNode_Hash_Tree implements PPNode { |
| 1338 | + var $name, $firstChild, $lastChild, $nextSibling; |
| 1339 | + |
| 1340 | + function __construct( $name ) { |
| 1341 | + $this->name = $name; |
| 1342 | + $this->firstChild = $this->lastChild = $this->nextSibling = false; |
| 1343 | + } |
| 1344 | + |
| 1345 | + function __toString() { |
| 1346 | + $inner = ''; |
| 1347 | + $attribs = ''; |
| 1348 | + for ( $node = $this->firstChild; $node; $node = $node->nextSibling ) { |
| 1349 | + if ( $node instanceof PPNode_Hash_Attr ) { |
| 1350 | + $attribs .= ' ' . $node->name . '="' . htmlspecialchars( $node->value ) . '"'; |
| 1351 | + } else { |
| 1352 | + $inner .= $node->__toString(); |
| 1353 | + } |
| 1354 | + } |
| 1355 | + if ( $inner === '' ) { |
| 1356 | + return "<{$this->name}$attribs/>"; |
| 1357 | + } else { |
| 1358 | + return "<{$this->name}$attribs>$inner</{$this->name}>"; |
| 1359 | + } |
| 1360 | + } |
| 1361 | + |
| 1362 | + static function newWithText( $name, $text ) { |
| 1363 | + $obj = new self( $name ); |
| 1364 | + $obj->addChild( new PPNode_Hash_Text( $text ) ); |
| 1365 | + return $obj; |
| 1366 | + } |
| 1367 | + |
| 1368 | + function addChild( $node ) { |
| 1369 | + if ( $this->lastChild === false ) { |
| 1370 | + $this->firstChild = $this->lastChild = $node; |
| 1371 | + } else { |
| 1372 | + $this->lastChild->nextSibling = $node; |
| 1373 | + $this->lastChild = $node; |
| 1374 | + } |
| 1375 | + } |
| 1376 | + |
| 1377 | + function getChildren() { |
| 1378 | + $children = array(); |
| 1379 | + for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) { |
| 1380 | + $children[] = $child; |
| 1381 | + } |
| 1382 | + return new PPNode_Hash_Array( $children ); |
| 1383 | + } |
| 1384 | + |
| 1385 | + function getFirstChild() { |
| 1386 | + return $this->firstChild; |
| 1387 | + } |
| 1388 | + |
| 1389 | + function getNextSibling() { |
| 1390 | + return $this->nextSibling; |
| 1391 | + } |
| 1392 | + |
| 1393 | + function getChildrenOfType( $name ) { |
| 1394 | + $children = array(); |
| 1395 | + for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) { |
| 1396 | + if ( isset( $child->name ) && $child->name === $name ) { |
| 1397 | + $children[] = $name; |
| 1398 | + } |
| 1399 | + } |
| 1400 | + return $children; |
| 1401 | + } |
| 1402 | + |
| 1403 | + function getLength() { return false; } |
| 1404 | + function item( $i ) { return false; } |
| 1405 | + |
| 1406 | + function getName() { |
| 1407 | + return $this->name; |
| 1408 | + } |
| 1409 | + |
| 1410 | + /** |
| 1411 | + * Split a <part> node into an associative array containing: |
| 1412 | + * name PPNode name |
| 1413 | + * index String index |
| 1414 | + * value PPNode value |
| 1415 | + */ |
| 1416 | + function splitArg() { |
| 1417 | + $bits = array(); |
| 1418 | + for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) { |
| 1419 | + if ( !isset( $child->name ) ) { |
| 1420 | + continue; |
| 1421 | + } |
| 1422 | + if ( $child->name === 'name' ) { |
| 1423 | + $bits['name'] = $child; |
| 1424 | + if ( $child->firstChild instanceof PPNode_Hash_Attr |
| 1425 | + && $child->firstChild->name === 'index' ) |
| 1426 | + { |
| 1427 | + $bits['index'] = $child->firstChild->value; |
| 1428 | + } |
| 1429 | + } elseif ( $child->name === 'value' ) { |
| 1430 | + $bits['value'] = $child; |
| 1431 | + } |
| 1432 | + } |
| 1433 | + |
| 1434 | + if ( !isset( $bits['name'] ) ) { |
| 1435 | + throw new MWException( 'Invalid brace node passed to ' . __METHOD__ ); |
| 1436 | + } |
| 1437 | + if ( !isset( $bits['index'] ) ) { |
| 1438 | + $bits['index'] = ''; |
| 1439 | + } |
| 1440 | + return $bits; |
| 1441 | + } |
| 1442 | + |
| 1443 | + /** |
| 1444 | + * Split an <ext> node into an associative array containing name, attr, inner and close |
| 1445 | + * All values in the resulting array are PPNodes. Inner and close are optional. |
| 1446 | + */ |
| 1447 | + function splitExt() { |
| 1448 | + $bits = array(); |
| 1449 | + for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) { |
| 1450 | + if ( !isset( $child->name ) ) { |
| 1451 | + continue; |
| 1452 | + } |
| 1453 | + if ( $child->name == 'name' ) { |
| 1454 | + $bits['name'] = $child; |
| 1455 | + } elseif ( $child->name == 'attr' ) { |
| 1456 | + $bits['attr'] = $child; |
| 1457 | + } elseif ( $child->name == 'inner' ) { |
| 1458 | + $bits['inner'] = $child; |
| 1459 | + } elseif ( $child->name == 'close' ) { |
| 1460 | + $bits['close'] = $child; |
| 1461 | + } |
| 1462 | + } |
| 1463 | + if ( !isset( $bits['name'] ) ) { |
| 1464 | + throw new MWException( 'Invalid ext node passed to ' . __METHOD__ ); |
| 1465 | + } |
| 1466 | + return $bits; |
| 1467 | + } |
| 1468 | + |
| 1469 | + /** |
| 1470 | + * Split an <h> node |
| 1471 | + */ |
| 1472 | + function splitHeading() { |
| 1473 | + if ( $this->name !== 'h' ) { |
| 1474 | + throw new MWException( 'Invalid h node passed to ' . __METHOD__ ); |
| 1475 | + } |
| 1476 | + $bits = array(); |
| 1477 | + for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) { |
| 1478 | + if ( !isset( $child->name ) ) { |
| 1479 | + continue; |
| 1480 | + } |
| 1481 | + if ( $child->name == 'i' ) { |
| 1482 | + $bits['i'] = $child->value; |
| 1483 | + } elseif ( $child->name == 'level' ) { |
| 1484 | + $bits['level'] = $child->value; |
| 1485 | + } |
| 1486 | + } |
| 1487 | + if ( !isset( $bits['i'] ) ) { |
| 1488 | + throw new MWException( 'Invalid h node passed to ' . __METHOD__ ); |
| 1489 | + } |
| 1490 | + return $bits; |
| 1491 | + } |
| 1492 | + |
| 1493 | + /** |
| 1494 | + * Split a <template> or <tplarg> node |
| 1495 | + */ |
| 1496 | + function splitTemplate() { |
| 1497 | + $parts = array(); |
| 1498 | + $bits = array( 'lineStart' => '' ); |
| 1499 | + for ( $child = $this->firstChild; $child; $child = $child->nextSibling ) { |
| 1500 | + if ( !isset( $child->name ) ) { |
| 1501 | + continue; |
| 1502 | + } |
| 1503 | + if ( $child->name == 'title' ) { |
| 1504 | + $bits['title'] = $child; |
| 1505 | + } |
| 1506 | + if ( $child->name == 'part' ) { |
| 1507 | + $parts[] = $child; |
| 1508 | + } |
| 1509 | + if ( $child->name == 'lineStart' ) { |
| 1510 | + $bits['lineStart'] = '1'; |
| 1511 | + } |
| 1512 | + } |
| 1513 | + if ( !isset( $bits['title'] ) ) { |
| 1514 | + throw new MWException( 'Invalid node passed to ' . __METHOD__ ); |
| 1515 | + } |
| 1516 | + $bits['parts'] = new PPNode_Hash_Array( $parts ); |
| 1517 | + return $bits; |
| 1518 | + } |
| 1519 | +} |
| 1520 | + |
| 1521 | +/** |
| 1522 | + * @ingroup Parser |
| 1523 | + */ |
| 1524 | +class PPNode_Hash_Text implements PPNode { |
| 1525 | + var $value, $nextSibling; |
| 1526 | + |
| 1527 | + function __construct( $value ) { |
| 1528 | + if ( is_object( $value ) ) { |
| 1529 | + throw new MWException( __CLASS__ . ' given object instead of string' ); |
| 1530 | + } |
| 1531 | + $this->value = $value; |
| 1532 | + } |
| 1533 | + |
| 1534 | + function __toString() { |
| 1535 | + return htmlspecialchars( $this->value ); |
| 1536 | + } |
| 1537 | + |
| 1538 | + function getNextSibling() { |
| 1539 | + return $this->nextSibling; |
| 1540 | + } |
| 1541 | + |
| 1542 | + function getChildren() { return false; } |
| 1543 | + function getFirstChild() { return false; } |
| 1544 | + function getChildrenOfType( $name ) { return false; } |
| 1545 | + function getLength() { return false; } |
| 1546 | + function item( $i ) { return false; } |
| 1547 | + function getName() { return '#text'; } |
| 1548 | + function splitArg() { throw new MWException( __METHOD__ . ': not supported' ); } |
| 1549 | + function splitExt() { throw new MWException( __METHOD__ . ': not supported' ); } |
| 1550 | + function splitHeading() { throw new MWException( __METHOD__ . ': not supported' ); } |
| 1551 | +} |
| 1552 | + |
| 1553 | +/** |
| 1554 | + * @ingroup Parser |
| 1555 | + */ |
| 1556 | +class PPNode_Hash_Array implements PPNode { |
| 1557 | + var $value, $nextSibling; |
| 1558 | + |
| 1559 | + function __construct( $value ) { |
| 1560 | + $this->value = $value; |
| 1561 | + } |
| 1562 | + |
| 1563 | + function __toString() { |
| 1564 | + return var_export( $this, true ); |
| 1565 | + } |
| 1566 | + |
| 1567 | + function getLength() { |
| 1568 | + return count( $this->value ); |
| 1569 | + } |
| 1570 | + |
| 1571 | + function item( $i ) { |
| 1572 | + return $this->value[$i]; |
| 1573 | + } |
| 1574 | + |
| 1575 | + function getName() { return '#nodelist'; } |
| 1576 | + |
| 1577 | + function getNextSibling() { |
| 1578 | + return $this->nextSibling; |
| 1579 | + } |
| 1580 | + |
| 1581 | + function getChildren() { return false; } |
| 1582 | + function getFirstChild() { return false; } |
| 1583 | + function getChildrenOfType( $name ) { return false; } |
| 1584 | + function splitArg() { throw new MWException( __METHOD__ . ': not supported' ); } |
| 1585 | + function splitExt() { throw new MWException( __METHOD__ . ': not supported' ); } |
| 1586 | + function splitHeading() { throw new MWException( __METHOD__ . ': not supported' ); } |
| 1587 | +} |
| 1588 | + |
| 1589 | +/** |
| 1590 | + * @ingroup Parser |
| 1591 | + */ |
| 1592 | +class PPNode_Hash_Attr implements PPNode { |
| 1593 | + var $name, $value, $nextSibling; |
| 1594 | + |
| 1595 | + function __construct( $name, $value ) { |
| 1596 | + $this->name = $name; |
| 1597 | + $this->value = $value; |
| 1598 | + } |
| 1599 | + |
| 1600 | + function __toString() { |
| 1601 | + return "<@{$this->name}>" . htmlspecialchars( $this->value ) . "</@{$this->name}>"; |
| 1602 | + } |
| 1603 | + |
| 1604 | + function getName() { |
| 1605 | + return $this->name; |
| 1606 | + } |
| 1607 | + |
| 1608 | + function getNextSibling() { |
| 1609 | + return $this->nextSibling; |
| 1610 | + } |
| 1611 | + |
| 1612 | + function getChildren() { return false; } |
| 1613 | + function getFirstChild() { return false; } |
| 1614 | + function getChildrenOfType( $name ) { return false; } |
| 1615 | + function getLength() { return false; } |
| 1616 | + function item( $i ) { return false; } |
| 1617 | + function splitArg() { throw new MWException( __METHOD__ . ': not supported' ); } |
| 1618 | + function splitExt() { throw new MWException( __METHOD__ . ': not supported' ); } |
| 1619 | + function splitHeading() { throw new MWException( __METHOD__ . ': not supported' ); } |
| 1620 | +} |
Property changes on: branches/parser-work/phase3/includes/parser/Preprocessor_Hash.php |
___________________________________________________________________ |
Name: svn:eol-style |
1 | 1621 | + native |
Index: branches/parser-work/phase3/includes/parser/Parser.php |
— | — | @@ -75,14 +75,8 @@ |
76 | 76 | const COLON_STATE_COMMENTDASH = 6; |
77 | 77 | const COLON_STATE_COMMENTDASHDASH = 7; |
78 | 78 | |
79 | | - // State flags for DOM expansion |
80 | | - const NO_ARGS = 1; |
81 | | - const NO_TEMPLATES = 2; |
82 | | - const STRIP_COMMENTS = 4; |
83 | | - const NO_IGNORE = 8; |
84 | | - const RECOVER_COMMENTS = 16; |
85 | | - const PTD_FOR_INCLUSION = 32; |
86 | | - const RECOVER_ORIG = 27; // = 1|2|8|16 no constant expression support in PHP yet |
| 79 | + // Flags for preprocessToDom |
| 80 | + const PTD_FOR_INCLUSION = 1; |
87 | 81 | |
88 | 82 | // Allowed values for $this->mOutputType |
89 | 83 | // Parameter to startExternalParse(). |
— | — | @@ -101,7 +95,7 @@ |
102 | 96 | # Persistent: |
103 | 97 | var $mTagHooks, $mTransparentTagHooks, $mFunctionHooks, $mFunctionSynonyms, $mVariables, |
104 | 98 | $mSubsts, $mImageParams, $mImageParamsMagicArray, $mStripList, $mMarkerIndex, |
105 | | - $mParseEngine, $mExtLinkBracketedRegex, $mUrlProtocols, $mDefaultStripList, |
| 99 | + $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols, $mDefaultStripList, |
106 | 100 | $mVarCache, $mConf, $mFunctionTagHooks; |
107 | 101 | |
108 | 102 | |
— | — | @@ -143,10 +137,19 @@ |
144 | 138 | $this->mExtLinkBracketedRegex = '/\[(\b(' . wfUrlProtocols() . ')'. |
145 | 139 | '[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x0a\\x0d]*?)\]/S'; |
146 | 140 | $this->mVarCache = array(); |
| 141 | + if ( isset( $conf['preprocessorClass'] ) ) { |
| 142 | + $this->mPreprocessorClass = $conf['preprocessorClass']; |
| 143 | + } elseif ( extension_loaded( 'domxml' ) ) { |
| 144 | + // PECL extension that conflicts with the core DOM extension (bug 13770) |
| 145 | + wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" ); |
| 146 | + $this->mPreprocessorClass = 'Preprocessor_Hash'; |
| 147 | + } elseif ( extension_loaded( 'dom' ) ) { |
| 148 | + $this->mPreprocessorClass = 'Preprocessor_DOM'; |
| 149 | + } else { |
| 150 | + $this->mPreprocessorClass = 'Preprocessor_Hash'; |
| 151 | + } |
147 | 152 | $this->mMarkerIndex = 0; |
148 | 153 | $this->mFirstCall = true; |
149 | | - |
150 | | - $this->mParseEngine = new ParseEngine("includes/parser/WikiTextGrammar.xml"); |
151 | 154 | } |
152 | 155 | |
153 | 156 | /** |
— | — | @@ -233,6 +236,11 @@ |
234 | 237 | $this->mDoubleUnderscores = array(); |
235 | 238 | $this->mExpensiveFunctionCount = 0; |
236 | 239 | |
| 240 | + # Fix cloning |
| 241 | + if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) { |
| 242 | + $this->mPreprocessor = null; |
| 243 | + } |
| 244 | + |
237 | 245 | wfRunHooks( 'ParserClearState', array( &$this ) ); |
238 | 246 | wfProfileOut( __METHOD__ ); |
239 | 247 | } |
— | — | @@ -455,12 +463,13 @@ |
456 | 464 | * If $frame is not provided, then template variables (e.g., {{{1}}}) within $text are not expanded |
457 | 465 | * |
458 | 466 | * @param $text String: text extension wants to have parsed |
| 467 | + * @param PPFrame $frame: The frame to use for expanding any template variables |
459 | 468 | */ |
460 | | - function recursiveTagParse( $text ) { |
| 469 | + function recursiveTagParse( $text, $frame=false ) { |
461 | 470 | wfProfileIn( __METHOD__ ); |
462 | 471 | wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); |
463 | 472 | wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) ); |
464 | | - $text = $this->internalParse( $text, false ); |
| 473 | + $text = $this->internalParse( $text, false, $frame ); |
465 | 474 | wfProfileOut( __METHOD__ ); |
466 | 475 | return $text; |
467 | 476 | } |
— | — | @@ -502,8 +511,8 @@ |
503 | 512 | $this->setTitle( new FakeTitle ); |
504 | 513 | |
505 | 514 | list( $text, $title ) = $this->getTemplateDom( $title ); |
506 | | - $flags = self::NO_ARGS | self::NO_TEMPLATES; |
507 | | - return ParseEngine::expand($text->childNodes, $this, $flags); |
| 515 | + $flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES; |
| 516 | + return $this->getPreprocessor()->newFrame()->expand( $text, $flags ); |
508 | 517 | } |
509 | 518 | |
510 | 519 | /** |
— | — | @@ -534,6 +543,17 @@ |
535 | 544 | } |
536 | 545 | |
537 | 546 | /** |
| 547 | + * Get a preprocessor object |
| 548 | + */ |
| 549 | + function getPreprocessor() { |
| 550 | + if ( !isset( $this->mPreprocessor ) ) { |
| 551 | + $class = $this->mPreprocessorClass; |
| 552 | + $this->mPreprocessor = new $class( $this ); |
| 553 | + } |
| 554 | + return $this->mPreprocessor; |
| 555 | + } |
| 556 | + |
| 557 | + /** |
538 | 558 | * Replaces all occurrences of HTML-style comments and the given tags |
539 | 559 | * in the text with a random marker and returns the next text. The output |
540 | 560 | * parameter $matches will be an associative array filled with data in |
— | — | @@ -901,8 +921,8 @@ |
902 | 922 | $flag = 0; |
903 | 923 | else |
904 | 924 | $flag = Parser::PTD_FOR_INCLUSION; |
905 | | - $dom = $this->mParseEngine->parse($text); |
906 | | - $text = ParseEngine::expand( $dom, $this, $flag ); |
| 925 | + $dom = $this->preprocessToDom( $text, $flag ); |
| 926 | + $text = $frame->expand( $dom ); |
907 | 927 | } |
908 | 928 | // if $frame is not provided, then use old-style replaceVariables |
909 | 929 | else { |
— | — | @@ -2050,9 +2070,11 @@ |
2051 | 2071 | # |
2052 | 2072 | $textLines = StringUtils::explode( "\n", $text ); |
2053 | 2073 | |
2054 | | - $output = ''; |
| 2074 | + $lastPrefix = $output = ''; |
2055 | 2075 | $this->mDTopen = $inBlockElem = false; |
| 2076 | + $prefixLength = 0; |
2056 | 2077 | $paragraphStack = false; |
| 2078 | + |
2057 | 2079 | foreach ( $textLines as $oLine ) { |
2058 | 2080 | # Fix up $linestart |
2059 | 2081 | if ( !$linestart ) { |
— | — | @@ -2060,76 +2082,158 @@ |
2061 | 2083 | $linestart = true; |
2062 | 2084 | continue; |
2063 | 2085 | } |
| 2086 | + // * = ul |
| 2087 | + // # = ol |
2064 | 2088 | // ; = dt |
2065 | 2089 | // : = dd |
2066 | 2090 | |
2067 | | - wfProfileIn( __METHOD__."-paragraph" ); |
2068 | | - // XXX: use a stack for nestable elements like span, table and div |
| 2091 | + $lastPrefixLength = strlen( $lastPrefix ); |
2069 | 2092 | $preCloseMatch = preg_match('/<\\/pre/i', $oLine ); |
2070 | 2093 | $preOpenMatch = preg_match('/<pre/i', $oLine ); |
2071 | | - $openmatch = preg_match('/(?:<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<ol|<li|<\\/tr|<\\/td|<\\/th)/iS', $oLine ); |
2072 | | - $closematch = preg_match( |
2073 | | - '/(?:<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'. |
2074 | | - '<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<\\/?center)/iS', $oLine ); |
2075 | | - if ( $openmatch or $closematch ) { |
| 2094 | + // If not in a <pre> element, scan for and figure out what prefixes are there. |
| 2095 | + if ( !$this->mInPre ) { |
| 2096 | + # Multiple prefixes may abut each other for nested lists. |
| 2097 | + $prefixLength = strspn( $oLine, '*#:;' ); |
| 2098 | + $prefix = substr( $oLine, 0, $prefixLength ); |
| 2099 | + |
| 2100 | + # eh? |
| 2101 | + // ; and : are both from definition-lists, so they're equivalent |
| 2102 | + // for the purposes of determining whether or not we need to open/close |
| 2103 | + // elements. |
| 2104 | + $prefix2 = str_replace( ';', ':', $prefix ); |
| 2105 | + $t = substr( $oLine, $prefixLength ); |
| 2106 | + $this->mInPre = (bool)$preOpenMatch; |
| 2107 | + } else { |
| 2108 | + # Don't interpret any other prefixes in preformatted text |
| 2109 | + $prefixLength = 0; |
| 2110 | + $prefix = $prefix2 = ''; |
| 2111 | + $t = $oLine; |
| 2112 | + } |
| 2113 | + |
| 2114 | + # List generation |
| 2115 | + if( $prefixLength && $lastPrefix === $prefix2 ) { |
| 2116 | + # Same as the last item, so no need to deal with nesting or opening stuff |
| 2117 | + $output .= $this->nextItem( substr( $prefix, -1 ) ); |
2076 | 2118 | $paragraphStack = false; |
2077 | | - # TODO bug 5718: paragraph closed |
2078 | | - $output .= $this->closeParagraph(); |
2079 | | - if ( $preOpenMatch and !$preCloseMatch ) { |
2080 | | - $this->mInPre = true; |
| 2119 | + |
| 2120 | + if ( substr( $prefix, -1 ) === ';') { |
| 2121 | + # The one nasty exception: definition lists work like this: |
| 2122 | + # ; title : definition text |
| 2123 | + # So we check for : in the remainder text to split up the |
| 2124 | + # title and definition, without b0rking links. |
| 2125 | + $term = $t2 = ''; |
| 2126 | + if ($this->findColonNoLinks($t, $term, $t2) !== false) { |
| 2127 | + $t = $t2; |
| 2128 | + $output .= $term . $this->nextItem( ':' ); |
| 2129 | + } |
2081 | 2130 | } |
2082 | | - if ( $closematch ) { |
2083 | | - $inBlockElem = false; |
2084 | | - } else { |
2085 | | - $inBlockElem = true; |
| 2131 | + } elseif( $prefixLength || $lastPrefixLength ) { |
| 2132 | + // We need to open or close prefixes, or both. |
| 2133 | + |
| 2134 | + # Either open or close a level... |
| 2135 | + $commonPrefixLength = $this->getCommon( $prefix, $lastPrefix ); |
| 2136 | + $paragraphStack = false; |
| 2137 | + |
| 2138 | + // Close all the prefixes which aren't shared. |
| 2139 | + while( $commonPrefixLength < $lastPrefixLength ) { |
| 2140 | + $output .= $this->closeList( $lastPrefix[$lastPrefixLength-1] ); |
| 2141 | + --$lastPrefixLength; |
2086 | 2142 | } |
2087 | | - } else if ( !$inBlockElem && !$this->mInPre ) { |
2088 | | - if ( ' ' == substr( $oLine, 0, 1 ) and ( $this->mLastSection === 'pre' or trim($oLine) != '' ) ) { |
2089 | | - // pre |
2090 | | - if ($this->mLastSection !== 'pre') { |
2091 | | - $paragraphStack = false; |
2092 | | - $output .= $this->closeParagraph().'<pre>'; |
2093 | | - $this->mLastSection = 'pre'; |
| 2143 | + |
| 2144 | + // Continue the current prefix if appropriate. |
| 2145 | + if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) { |
| 2146 | + $output .= $this->nextItem( $prefix[$commonPrefixLength-1] ); |
| 2147 | + } |
| 2148 | + |
| 2149 | + // Open prefixes where appropriate. |
| 2150 | + while ( $prefixLength > $commonPrefixLength ) { |
| 2151 | + $char = substr( $prefix, $commonPrefixLength, 1 ); |
| 2152 | + $output .= $this->openList( $char ); |
| 2153 | + |
| 2154 | + if ( ';' === $char ) { |
| 2155 | + # FIXME: This is dupe of code above |
| 2156 | + if ($this->findColonNoLinks($t, $term, $t2) !== false) { |
| 2157 | + $t = $t2; |
| 2158 | + $output .= $term . $this->nextItem( ':' ); |
| 2159 | + } |
2094 | 2160 | } |
2095 | | - $oLine = substr( $oLine, 1 ); |
2096 | | - } else { |
2097 | | - // paragraph |
2098 | | - if ( trim($oLine) == '' ) { |
2099 | | - if ( $paragraphStack ) { |
2100 | | - $output .= $paragraphStack.'<br />'; |
| 2161 | + ++$commonPrefixLength; |
| 2162 | + } |
| 2163 | + $lastPrefix = $prefix2; |
| 2164 | + } |
| 2165 | + |
| 2166 | + // If we have no prefixes, go to paragraph mode. |
| 2167 | + if( 0 == $prefixLength ) { |
| 2168 | + wfProfileIn( __METHOD__."-paragraph" ); |
| 2169 | + # No prefix (not in list)--go to paragraph mode |
| 2170 | + // XXX: use a stack for nestable elements like span, table and div |
| 2171 | + $openmatch = preg_match('/(?:<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<ol|<li|<\\/tr|<\\/td|<\\/th)/iS', $t ); |
| 2172 | + $closematch = preg_match( |
| 2173 | + '/(?:<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'. |
| 2174 | + '<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<\\/?center)/iS', $t ); |
| 2175 | + if ( $openmatch or $closematch ) { |
| 2176 | + $paragraphStack = false; |
| 2177 | + # TODO bug 5718: paragraph closed |
| 2178 | + $output .= $this->closeParagraph(); |
| 2179 | + if ( $preOpenMatch and !$preCloseMatch ) { |
| 2180 | + $this->mInPre = true; |
| 2181 | + } |
| 2182 | + if ( $closematch ) { |
| 2183 | + $inBlockElem = false; |
| 2184 | + } else { |
| 2185 | + $inBlockElem = true; |
| 2186 | + } |
| 2187 | + } else if ( !$inBlockElem && !$this->mInPre ) { |
| 2188 | + if ( ' ' == substr( $t, 0, 1 ) and ( $this->mLastSection === 'pre' or trim($t) != '' ) ) { |
| 2189 | + // pre |
| 2190 | + if ($this->mLastSection !== 'pre') { |
2101 | 2191 | $paragraphStack = false; |
2102 | | - $this->mLastSection = 'p'; |
2103 | | - } else { |
2104 | | - if ($this->mLastSection !== 'p' ) { |
2105 | | - $output .= $this->closeParagraph(); |
2106 | | - $this->mLastSection = ''; |
2107 | | - $paragraphStack = '<p>'; |
| 2192 | + $output .= $this->closeParagraph().'<pre>'; |
| 2193 | + $this->mLastSection = 'pre'; |
| 2194 | + } |
| 2195 | + $t = substr( $t, 1 ); |
| 2196 | + } else { |
| 2197 | + // paragraph |
| 2198 | + if ( trim($t) == '' ) { |
| 2199 | + if ( $paragraphStack ) { |
| 2200 | + $output .= $paragraphStack.'<br />'; |
| 2201 | + $paragraphStack = false; |
| 2202 | + $this->mLastSection = 'p'; |
2108 | 2203 | } else { |
2109 | | - $paragraphStack = '</p><p>'; |
| 2204 | + if ($this->mLastSection !== 'p' ) { |
| 2205 | + $output .= $this->closeParagraph(); |
| 2206 | + $this->mLastSection = ''; |
| 2207 | + $paragraphStack = '<p>'; |
| 2208 | + } else { |
| 2209 | + $paragraphStack = '</p><p>'; |
| 2210 | + } |
2110 | 2211 | } |
| 2212 | + } else { |
| 2213 | + if ( $paragraphStack ) { |
| 2214 | + $output .= $paragraphStack; |
| 2215 | + $paragraphStack = false; |
| 2216 | + $this->mLastSection = 'p'; |
| 2217 | + } else if ($this->mLastSection !== 'p') { |
| 2218 | + $output .= $this->closeParagraph().'<p>'; |
| 2219 | + $this->mLastSection = 'p'; |
| 2220 | + } |
2111 | 2221 | } |
2112 | | - } else { |
2113 | | - if ( $paragraphStack ) { |
2114 | | - $output .= $paragraphStack; |
2115 | | - $paragraphStack = false; |
2116 | | - $this->mLastSection = 'p'; |
2117 | | - } else if ($this->mLastSection !== 'p') { |
2118 | | - $output .= $this->closeParagraph().'<p>'; |
2119 | | - $this->mLastSection = 'p'; |
2120 | | - } |
2121 | 2222 | } |
2122 | 2223 | } |
| 2224 | + wfProfileOut( __METHOD__."-paragraph" ); |
2123 | 2225 | } |
2124 | | - wfProfileOut( __METHOD__."-paragraph" ); |
2125 | | - |
2126 | 2226 | // somewhere above we forget to get out of pre block (bug 785) |
2127 | 2227 | if($preCloseMatch && $this->mInPre) { |
2128 | 2228 | $this->mInPre = false; |
2129 | 2229 | } |
2130 | 2230 | if ($paragraphStack === false) { |
2131 | | - $output .= $oLine."\n"; |
| 2231 | + $output .= $t."\n"; |
2132 | 2232 | } |
2133 | 2233 | } |
| 2234 | + while ( $prefixLength ) { |
| 2235 | + $output .= $this->closeList( $prefix2[$prefixLength-1] ); |
| 2236 | + --$prefixLength; |
| 2237 | + } |
2134 | 2238 | if ( $this->mLastSection != '' ) { |
2135 | 2239 | $output .= '</' . $this->mLastSection . '>'; |
2136 | 2240 | $this->mLastSection = ''; |
— | — | @@ -2620,6 +2724,33 @@ |
2621 | 2725 | wfProfileOut( __METHOD__ ); |
2622 | 2726 | } |
2623 | 2727 | |
| 2728 | + /** |
| 2729 | + * Preprocess some wikitext and return the document tree. |
| 2730 | + * This is the ghost of replace_variables(). |
| 2731 | + * |
| 2732 | + * @param string $text The text to parse |
| 2733 | + * @param integer flags Bitwise combination of: |
| 2734 | + * self::PTD_FOR_INCLUSION Handle <noinclude>/<includeonly> as if the text is being |
| 2735 | + * included. Default is to assume a direct page view. |
| 2736 | + * |
| 2737 | + * The generated DOM tree must depend only on the input text and the flags. |
| 2738 | + * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of bug 4899. |
| 2739 | + * |
| 2740 | + * Any flag added to the $flags parameter here, or any other parameter liable to cause a |
| 2741 | + * change in the DOM tree for a given text, must be passed through the section identifier |
| 2742 | + * in the section edit link and thus back to extractSections(). |
| 2743 | + * |
| 2744 | + * The output of this function is currently only cached in process memory, but a persistent |
| 2745 | + * cache may be implemented at a later date which takes further advantage of these strict |
| 2746 | + * dependency requirements. |
| 2747 | + * |
| 2748 | + * @private |
| 2749 | + */ |
| 2750 | + function preprocessToDom ( $text, $flags = 0 ) { |
| 2751 | + $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags ); |
| 2752 | + return $dom; |
| 2753 | + } |
| 2754 | + |
2624 | 2755 | /* |
2625 | 2756 | * Return a three-element array: leading whitespace, string contents, trailing whitespace |
2626 | 2757 | */ |
— | — | @@ -2647,18 +2778,30 @@ |
2648 | 2779 | * self::OT_HTML: all templates and extension tags |
2649 | 2780 | * |
2650 | 2781 | * @param string $tex The text to transform |
| 2782 | + * @param PPFrame $frame Object describing the arguments passed to the template. |
| 2783 | + * Arguments may also be provided as an associative array, as was the usual case before MW1.12. |
| 2784 | + * Providing arguments this way may be useful for extensions wishing to perform variable replacement explicitly. |
| 2785 | + * @param bool $argsOnly Only do argument (triple-brace) expansion, not double-brace expansion |
2651 | 2786 | * @private |
2652 | 2787 | */ |
2653 | | - function replaceVariables( $text ) { |
| 2788 | + function replaceVariables( $text, $frame = false, $argsOnly = false ) { |
2654 | 2789 | # Is there any text? Also, Prevent too big inclusions! |
2655 | 2790 | if ( strlen( $text ) < 1 || strlen( $text ) > $this->mOptions->getMaxIncludeSize() ) { |
2656 | 2791 | return $text; |
2657 | 2792 | } |
2658 | 2793 | wfProfileIn( __METHOD__ ); |
2659 | 2794 | |
2660 | | - $dom = $this->mParseEngine->parse($text); |
2661 | | - $text = ParseEngine::expand($dom->childNodes, $this); |
| 2795 | + if ( $frame === false ) { |
| 2796 | + $frame = $this->getPreprocessor()->newFrame(); |
| 2797 | + } elseif ( !( $frame instanceof PPFrame ) ) { |
| 2798 | + wfDebug( __METHOD__." called using plain parameters instead of a PPFrame instance. Creating custom frame.\n" ); |
| 2799 | + $frame = $this->getPreprocessor()->newCustomFrame($frame); |
| 2800 | + } |
2662 | 2801 | |
| 2802 | + $dom = $this->preprocessToDom( $text ); |
| 2803 | + $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0; |
| 2804 | + $text = $frame->expand( $dom, $flags ); |
| 2805 | + |
2663 | 2806 | wfProfileOut( __METHOD__ ); |
2664 | 2807 | return $text; |
2665 | 2808 | } |
— | — | @@ -2712,11 +2855,11 @@ |
2713 | 2856 | * $piece['title']: the title, i.e. the part before the | |
2714 | 2857 | * $piece['parts']: the parameter array |
2715 | 2858 | * $piece['lineStart']: whether the brace was at the start of a line |
| 2859 | + * @param PPFrame The current frame, contains template arguments |
2716 | 2860 | * @return string the text of the template |
2717 | 2861 | * @private |
2718 | 2862 | */ |
2719 | | - function templateSubstitution($inNode, &$outText, $flags = 0) { |
2720 | | - return FALSE; |
| 2863 | + function braceSubstitution( $piece, $frame ) { |
2721 | 2864 | global $wgContLang, $wgNonincludableNamespaces; |
2722 | 2865 | wfProfileIn( __METHOD__ ); |
2723 | 2866 | wfProfileIn( __METHOD__.'-setup' ); |
— | — | @@ -2730,12 +2873,11 @@ |
2731 | 2874 | $isLocalObj = false; # $text is a DOM node needing expansion in the current frame |
2732 | 2875 | |
2733 | 2876 | # Title object, where $text came from |
2734 | | - $xpath = new DOMXPath($template->ownerDocument); |
2735 | 2877 | $title = null; |
2736 | 2878 | |
2737 | 2879 | # $part1 is the bit before the first |, and must contain only title characters. |
2738 | 2880 | # Various prefixes will be stripped from it later. |
2739 | | - $titleWithSpaces = $xpath->query("title", $template)->item(0)->textContent; |
| 2881 | + $titleWithSpaces = $frame->expand( $piece['title'] ); |
2740 | 2882 | $part1 = trim( $titleWithSpaces ); |
2741 | 2883 | $titleText = false; |
2742 | 2884 | |
— | — | @@ -2743,10 +2885,7 @@ |
2744 | 2886 | $originalTitle = $part1; |
2745 | 2887 | |
2746 | 2888 | # $args is a list of argument nodes, starting from index 0, not including $part1 |
2747 | | - $args = array(); |
2748 | | - foreach ($xpath->query("part", $template) as $part) { |
2749 | | - $args[] = $part; |
2750 | | - } |
| 2889 | + $args = (null == $piece['parts']) ? array() : $piece['parts']; |
2751 | 2890 | wfProfileOut( __METHOD__.'-setup' ); |
2752 | 2891 | |
2753 | 2892 | # SUBST |
— | — | @@ -2761,15 +2900,14 @@ |
2762 | 2901 | # safesubst || (subst && PST) || (false && !PST) => transclude (skip the if) |
2763 | 2902 | # (false && PST) || (subst && !PST) => return input (handled by if) |
2764 | 2903 | if ( $substMatch != 'safesubst' && ($substMatch == 'subst' xor $this->ot['wiki']) ) { |
2765 | | - $outText = ParseEngine::unparse($template); |
2766 | | - $template->parentNode->replaceChild($template->ownerDocument->createTextNode($outText), $template); |
| 2904 | + $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args ); |
2767 | 2905 | $isLocalObj = true; |
2768 | 2906 | $found = true; |
2769 | 2907 | } |
2770 | 2908 | } |
2771 | 2909 | |
2772 | 2910 | # Variables |
2773 | | - if ( !$found && $args->length == 0 ) { |
| 2911 | + if ( !$found && $args->getLength() == 0 ) { |
2774 | 2912 | $id = $this->mVariables->matchStartToEnd( $part1 ); |
2775 | 2913 | if ( $id !== false ) { |
2776 | 2914 | $text = $this->getVariableValue( $id, $frame ); |
— | — | @@ -2826,12 +2964,14 @@ |
2827 | 2965 | # Add a frame parameter, and pass the arguments as an array |
2828 | 2966 | $allArgs = $initialArgs; |
2829 | 2967 | $allArgs[] = $frame; |
2830 | | - $funcArgs = array_merge( $funcArgs, $args ); |
| 2968 | + for ( $i = 0; $i < $args->getLength(); $i++ ) { |
| 2969 | + $funcArgs[] = $args->item( $i ); |
| 2970 | + } |
2831 | 2971 | $allArgs[] = $funcArgs; |
2832 | 2972 | } else { |
2833 | 2973 | # Convert arguments to plain text |
2834 | | - foreach ($args as $arg) { |
2835 | | - $funcArgs[] = substr(ParseEngine::unparse($arg), 1); |
| 2974 | + for ( $i = 0; $i < $args->getLength(); $i++ ) { |
| 2975 | + $funcArgs[] = trim( $frame->expand( $args->item( $i ) ) ); |
2836 | 2976 | } |
2837 | 2977 | $allArgs = array_merge( $initialArgs, $funcArgs ); |
2838 | 2978 | } |
— | — | @@ -2860,7 +3000,7 @@ |
2861 | 3001 | $text = $result; |
2862 | 3002 | } |
2863 | 3003 | if ( !$noparse ) { |
2864 | | - $text = $this->mParseEngine->parse($text); |
| 3004 | + $text = $this->preprocessToDom( $text, $preprocessFlags ); |
2865 | 3005 | $isChildObj = true; |
2866 | 3006 | } |
2867 | 3007 | } |
— | — | @@ -2929,7 +3069,7 @@ |
2930 | 3070 | } else { |
2931 | 3071 | $text = $this->interwikiTransclude( $title, 'raw' ); |
2932 | 3072 | // Preprocess it like a template |
2933 | | - $text = $this->mParseEngine->parse($text); |
| 3073 | + $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION ); |
2934 | 3074 | $isChildObj = true; |
2935 | 3075 | } |
2936 | 3076 | $found = true; |
— | — | @@ -2948,9 +3088,9 @@ |
2949 | 3089 | # If we haven't found text to substitute by now, we're done |
2950 | 3090 | # Recover the source wikitext and return it |
2951 | 3091 | if ( !$found ) { |
2952 | | - $outText = ParseEngine::unparse($template); |
2953 | | - $template->parentNode->replaceChild($template->ownerDocument->createTextNode($outText), $template); |
| 3092 | + $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args ); |
2954 | 3093 | wfProfileOut( __METHOD__ ); |
| 3094 | + return array( 'object' => $text ); |
2955 | 3095 | } |
2956 | 3096 | |
2957 | 3097 | # Expand DOM-style return values in a child frame |
— | — | @@ -2959,22 +3099,22 @@ |
2960 | 3100 | $newFrame = $frame->newChild( $args, $title ); |
2961 | 3101 | |
2962 | 3102 | if ( $nowiki ) { |
2963 | | - $text = ParseEngine::expand( $text, self::RECOVER_ORIG ); |
| 3103 | + $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG ); |
2964 | 3104 | } elseif ( $titleText !== false && $newFrame->isEmpty() ) { |
2965 | 3105 | # Expansion is eligible for the empty-frame cache |
2966 | 3106 | if ( isset( $this->mTplExpandCache[$titleText] ) ) { |
2967 | 3107 | $text = $this->mTplExpandCache[$titleText]; |
2968 | 3108 | } else { |
2969 | | - $text = ParseEngine::expand( $text, self::PTD_FOR_INCLUSION ); |
| 3109 | + $text = $newFrame->expand( $text ); |
2970 | 3110 | $this->mTplExpandCache[$titleText] = $text; |
2971 | 3111 | } |
2972 | 3112 | } else { |
2973 | 3113 | # Uncached expansion |
2974 | | - $text = ParseEngine::expand( $text ); |
| 3114 | + $text = $newFrame->expand( $text ); |
2975 | 3115 | } |
2976 | 3116 | } |
2977 | 3117 | if ( $isLocalObj && $nowiki ) { |
2978 | | - $text = ParseEngine::expand( $text, self::RECOVER_ORIG ); |
| 3118 | + $text = $frame->expand( $text, PPFrame::RECOVER_ORIG ); |
2979 | 3119 | $isLocalObj = false; |
2980 | 3120 | } |
2981 | 3121 | |
— | — | @@ -3001,11 +3141,15 @@ |
3002 | 3142 | $this->insertStripItem( '<!-- WARNING: template omitted, post-expand include size too large -->' ); |
3003 | 3143 | $this->limitationWarn( 'post-expand-template-inclusion' ); |
3004 | 3144 | } |
3005 | | - if ($template->parentNode != NULL) { |
3006 | | - $template->parentNode->replaceChild($template->ownerDocument->createTextNode($text), $template); |
| 3145 | + |
| 3146 | + if ( $isLocalObj ) { |
| 3147 | + $ret = array( 'object' => $text ); |
| 3148 | + } else { |
| 3149 | + $ret = array( 'text' => $text ); |
3007 | 3150 | } |
3008 | 3151 | |
3009 | 3152 | wfProfileOut( __METHOD__ ); |
| 3153 | + return $ret; |
3010 | 3154 | } |
3011 | 3155 | |
3012 | 3156 | /** |
— | — | @@ -3033,7 +3177,7 @@ |
3034 | 3178 | return array( false, $title ); |
3035 | 3179 | } |
3036 | 3180 | |
3037 | | - $dom = $this->mParseEngine->parse($text); |
| 3181 | + $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION ); |
3038 | 3182 | $this->mTplDomCache[ $titleText ] = $dom; |
3039 | 3183 | |
3040 | 3184 | if (! $title->equals($cacheTitle)) { |
— | — | @@ -3170,16 +3314,16 @@ |
3171 | 3315 | * Triple brace replacement -- used for template arguments |
3172 | 3316 | * @private |
3173 | 3317 | */ |
3174 | | - function tplargSubstitution($inNode, &$outText, $flags = 0) { |
| 3318 | + function argSubstitution( $piece, $frame ) { |
3175 | 3319 | wfProfileIn( __METHOD__ ); |
3176 | 3320 | |
3177 | | - $xpath = new DOMXPath($tplArg->ownerDocument); |
3178 | | - $parts = $xpath->query("part", $tplArg); |
3179 | | - $nameWithSpaces = $xpath->query("title", $tplArg)->item(0)->textContent; |
| 3321 | + $error = false; |
| 3322 | + $parts = $piece['parts']; |
| 3323 | + $nameWithSpaces = $frame->expand( $piece['title'] ); |
3180 | 3324 | $argName = trim( $nameWithSpaces ); |
3181 | 3325 | $object = false; |
3182 | 3326 | $text = $frame->getArgument( $argName ); |
3183 | | - if ( $text === false && $parts->length > 0 |
| 3327 | + if ( $text === false && $parts->getLength() > 0 |
3184 | 3328 | && ( |
3185 | 3329 | $this->ot['html'] |
3186 | 3330 | || $this->ot['pre'] |
— | — | @@ -3187,18 +3331,28 @@ |
3188 | 3332 | ) |
3189 | 3333 | ) { |
3190 | 3334 | # No match in frame, use the supplied default |
3191 | | - $text = $parts->item( 0 )->firstChild->textContent; |
| 3335 | + $object = $parts->item( 0 )->getChildren(); |
3192 | 3336 | } |
3193 | 3337 | if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) { |
3194 | | - $text .= '<!-- WARNING: argument omitted, expansion size too large -->'; |
| 3338 | + $error = '<!-- WARNING: argument omitted, expansion size too large -->'; |
3195 | 3339 | $this->limitationWarn( 'post-expand-template-argument' ); |
3196 | 3340 | } |
3197 | | - if ($text == NULL) { |
3198 | | - $text = ParseEngine::unparse($tplArg); |
| 3341 | + |
| 3342 | + if ( $text === false && $object === false ) { |
| 3343 | + # No match anywhere |
| 3344 | + $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts ); |
3199 | 3345 | } |
3200 | | - $tplArg->parentNode->replaceChild($tplArg->ownerDocument->createTextNode($text), $tplArg); |
| 3346 | + if ( $error !== false ) { |
| 3347 | + $text .= $error; |
| 3348 | + } |
| 3349 | + if ( $object !== false ) { |
| 3350 | + $ret = array( 'object' => $object ); |
| 3351 | + } else { |
| 3352 | + $ret = array( 'text' => $text ); |
| 3353 | + } |
3201 | 3354 | |
3202 | 3355 | wfProfileOut( __METHOD__ ); |
| 3356 | + return $ret; |
3203 | 3357 | } |
3204 | 3358 | |
3205 | 3359 | /** |
— | — | @@ -3211,110 +3365,86 @@ |
3212 | 3366 | * attributes Optional associative array of parsed attributes |
3213 | 3367 | * inner Contents of extension element |
3214 | 3368 | * noClose Original text did not have a close tag |
| 3369 | + * @param PPFrame $frame |
3215 | 3370 | */ |
3216 | | - function xmltagSubstitution($inNode, &$outText, $flags = 0) { |
| 3371 | + function extensionSubstitution( $params, $frame ) { |
3217 | 3372 | global $wgRawHtml, $wgContLang; |
3218 | 3373 | |
3219 | | - $xpath = new DOMXPath($inNode->ownerDocument); |
3220 | | - $name = $xpath->query("name", $inNode)->item(0)->getAttribute("tag"); |
3221 | | - $name = strtolower( $name ); |
3222 | | - $isFunctionTag = isset( $this->mFunctionTagHooks[$name] ) && ( $this->ot['html'] || $this->ot['pre'] ); |
3223 | | - $retCode = $this->ot['html'] || $isFunctionTag; |
3224 | | - if ($retCode) { |
3225 | | - $inner = $xpath->query("inner", $inNode); |
3226 | | - $content = $inner->length == 0 ? NULL : ParseEngine::expand($inner->item(0)->childNodes, $this); |
3227 | | - $attributes = $xpath->query("attribute", $inNode); |
| 3374 | + $name = $frame->expand( $params['name'] ); |
| 3375 | + $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] ); |
| 3376 | + $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] ); |
| 3377 | + $marker = "{$this->mUniqPrefix}-$name-" . sprintf('%08X', $this->mMarkerIndex++) . self::MARKER_SUFFIX; |
3228 | 3378 | |
| 3379 | + $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower($name)] ) && |
| 3380 | + ( $this->ot['html'] || $this->ot['pre'] ); |
| 3381 | + if ( $isFunctionTag ) { |
| 3382 | + $markerType = 'none'; |
| 3383 | + } else { |
| 3384 | + $markerType = 'general'; |
| 3385 | + } |
| 3386 | + if ( $this->ot['html'] || $isFunctionTag ) { |
| 3387 | + $name = strtolower( $name ); |
| 3388 | + $attributes = Sanitizer::decodeTagAttributes( $attrText ); |
| 3389 | + if ( isset( $params['attributes'] ) ) { |
| 3390 | + $attributes = $attributes + $params['attributes']; |
| 3391 | + } |
| 3392 | + |
3229 | 3393 | if( isset( $this->mTagHooks[$name] ) ) { |
3230 | 3394 | # Workaround for PHP bug 35229 and similar |
3231 | 3395 | if ( !is_callable( $this->mTagHooks[$name] ) ) { |
3232 | 3396 | throw new MWException( "Tag hook for $name is not callable\n" ); |
3233 | 3397 | } |
3234 | | - $outText = call_user_func_array( $this->mTagHooks[$name], |
3235 | | - array($content, $attributes, $this)); |
| 3398 | + $output = call_user_func_array( $this->mTagHooks[$name], |
| 3399 | + array( $content, $attributes, $this, $frame ) ); |
3236 | 3400 | } elseif( isset( $this->mFunctionTagHooks[$name] ) ) { |
3237 | 3401 | list( $callback, $flags ) = $this->mFunctionTagHooks[$name]; |
3238 | 3402 | if( !is_callable( $callback ) ) |
3239 | 3403 | throw new MWException( "Tag hook for $name is not callable\n" ); |
3240 | 3404 | |
3241 | | - $outText = call_user_func_array( $callback, |
| 3405 | + $output = call_user_func_array( $callback, |
3242 | 3406 | array( &$this, $frame, $content, $attributes ) ); |
3243 | 3407 | } else { |
3244 | | - $outText = '<span class="error">Invalid tag extension name: ' . |
| 3408 | + $output = '<span class="error">Invalid tag extension name: ' . |
3245 | 3409 | htmlspecialchars( $name ) . '</span>'; |
3246 | 3410 | } |
3247 | 3411 | |
3248 | | - if ( is_array( $outText ) ) { |
3249 | | - $outText = $outText[0]; |
| 3412 | + if ( is_array( $output ) ) { |
| 3413 | + // Extract flags to local scope (to override $markerType) |
| 3414 | + $flags = $output; |
| 3415 | + $output = $flags[0]; |
| 3416 | + unset( $flags[0] ); |
| 3417 | + extract( $flags ); |
3250 | 3418 | } |
3251 | | - } |
3252 | | - |
3253 | | - return $retCode; |
3254 | | - } |
3255 | | - |
3256 | | - function onlyincludeSubstitution($inNode, &$outText, $flags = 0) { |
3257 | | - return FALSE; |
3258 | | - } |
3259 | | - |
3260 | | - function commentSubstitution($inNode, &$outText, $flags = 0) { |
3261 | | - $comment = $contextNode->getAttribute("startTag"); |
3262 | | - # HTML-style comment |
3263 | | - # Remove it in HTML, pre+remove and STRIP_COMMENTS modes |
3264 | | - if ( $this->parser->ot['html'] |
3265 | | - || ( $this->parser->ot['pre'] && $this->parser->mOptions->getRemoveComments() ) |
3266 | | - || ( $flags & self::STRIP_COMMENTS ) ) { |
3267 | | - if ($comment[0] == "\n" || $comment[strlen($comment) - 1] == "\n") { |
3268 | | - $contextNode->parentNode->replaceChild($contextNode->ownerDocument->createTextNode("\n"), $contextNode); |
| 3419 | + } else { |
| 3420 | + if ( is_null( $attrText ) ) { |
| 3421 | + $attrText = ''; |
| 3422 | + } |
| 3423 | + if ( isset( $params['attributes'] ) ) { |
| 3424 | + foreach ( $params['attributes'] as $attrName => $attrValue ) { |
| 3425 | + $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' . |
| 3426 | + htmlspecialchars( $attrValue ) . '"'; |
| 3427 | + } |
| 3428 | + } |
| 3429 | + if ( $content === null ) { |
| 3430 | + $output = "<$name$attrText/>"; |
3269 | 3431 | } else { |
3270 | | - $contextNode->parentNode->removeChild($contextNode); |
| 3432 | + $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] ); |
| 3433 | + $output = "<$name$attrText>$content$close"; |
3271 | 3434 | } |
3272 | 3435 | } |
3273 | | - # Add a strip marker in PST mode so that pstPass2() can run some old-fashioned regexes on the result |
3274 | | - # Not in RECOVER_COMMENTS mode (extractSections) though |
3275 | | - elseif ( $this->parser->ot['wiki'] && ! ( $flags & self::RECOVER_COMMENTS ) ) { |
3276 | | - $outText = $this->parser->insertStripItem($contextNode->getAttribute("startTag")); |
3277 | | - $contextNode->parentNode->replaceChild($contextNode->ownerDocument->createTextNode($outText), $contextNode); |
| 3436 | + |
| 3437 | + if( $markerType === 'none' ) { |
| 3438 | + return $output; |
| 3439 | + } elseif ( $markerType === 'nowiki' ) { |
| 3440 | + $this->mStripState->nowiki->setPair( $marker, $output ); |
| 3441 | + } elseif ( $markerType === 'general' ) { |
| 3442 | + $this->mStripState->general->setPair( $marker, $output ); |
| 3443 | + } else { |
| 3444 | + throw new MWException( __METHOD__.': invalid marker type' ); |
3278 | 3445 | } |
3279 | | - # Recover the literal comment in RECOVER_COMMENTS and pre+no-remove |
3280 | | - else { |
3281 | | - $contextNode->parentNode->replaceChild($contextNode->ownerDocument->createTextNode($comment), $contextNode); |
3282 | | - } |
| 3446 | + return $marker; |
3283 | 3447 | } |
3284 | 3448 | |
3285 | | - function newlineSubstitution($inNode, &$outText, $flags = 0) { |
3286 | | - return FALSE; |
3287 | | - } |
3288 | | - |
3289 | | - function hSubstitution($inNode, &$outText, $flags = 0) { |
3290 | | - # Insert a heading marker only for <h> children of <root> |
3291 | | - # This is to stop extractSections from going over multiple tree levels |
3292 | | - # Insert heading index marker |
3293 | | - $this->expandRec($contextNode->childNodes, $flags, $headingIndex); |
3294 | | - $titleText = $this->title->getPrefixedDBkey(); |
3295 | | - $this->parser->mHeadings[] = array( $titleText, $headingIndex ); |
3296 | | - $serial = count( $this->parser->mHeadings ) - 1; |
3297 | | - $marker = "{$this->parser->mUniqPrefix}-h-$serial-" . Parser::MARKER_SUFFIX; |
3298 | | - $this->parser->mStripState->general->setPair( $marker, '' ); |
3299 | | - $outText = $contextNode->getAttribute("startTag") . $marker . $contextNode->firstChild->wholeText . $contextNode->getAttribute("endTag"); |
3300 | | - $contextNode->parentNode->replaceChild($contextNode->ownerDocument->createTextNode($outText), $contextNode); |
3301 | | - $headingIndex ++; |
3302 | | - } |
3303 | | - |
3304 | | - function orderedListSubstitution($inNode, &$outText, $flags = 0) { |
3305 | | - $outText = "<ol>" . ParseEngine::expand($inNode->childNodes, $this, $flags) . "</ol>"; |
3306 | | - return TRUE; |
3307 | | - } |
3308 | | - |
3309 | | - function unorderedListSubstitution($inNode, &$outText, $flags = 0) { |
3310 | | - $outText = "<ul>" . ParseEngine::expand($inNode->childNodes, $this, $flags) . "</ul>"; |
3311 | | - return TRUE; |
3312 | | - } |
3313 | | - |
3314 | | - function listItemSubstitution($inNode, &$outText, $flags = 0) { |
3315 | | - $outText = "<li>" . ParseEngine::expand($inNode->childNodes, $this, $flags) . "</li>"; |
3316 | | - return TRUE; |
3317 | | - } |
3318 | | - |
3319 | 3449 | /** |
3320 | 3450 | * Increment an include size counter |
3321 | 3451 | * |
— | — | @@ -3501,8 +3631,9 @@ |
3502 | 3632 | $baseTitleText = $this->mTitle->getPrefixedDBkey(); |
3503 | 3633 | $oldType = $this->mOutputType; |
3504 | 3634 | $this->setOutputType( self::OT_WIKI ); |
3505 | | - $root = $this->mParseEngine->parse($origText); |
3506 | | - $node = $root->firstChild; |
| 3635 | + $frame = $this->getPreprocessor()->newFrame(); |
| 3636 | + $root = $this->preprocessToDom( $origText ); |
| 3637 | + $node = $root->getFirstChild(); |
3507 | 3638 | $byteOffset = 0; |
3508 | 3639 | $tocraw = array(); |
3509 | 3640 | |
— | — | @@ -3679,14 +3810,14 @@ |
3680 | 3811 | # Add the section to the section tree |
3681 | 3812 | # Find the DOM node for this header |
3682 | 3813 | while ( $node && !$isTemplate ) { |
3683 | | - if ( $node->nodeName === 'h' ) { |
| 3814 | + if ( $node->getName() === 'h' ) { |
3684 | 3815 | $bits = $node->splitHeading(); |
3685 | 3816 | if ( $bits['i'] == $sectionIndex ) |
3686 | 3817 | break; |
3687 | 3818 | } |
3688 | 3819 | $byteOffset += mb_strlen( $this->mStripState->unstripBoth( |
3689 | | - ParseEngine::expand( $node, $this, self::RECOVER_ORIG ) ) ); |
3690 | | - $node = $node->nextSibling; |
| 3820 | + $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) ); |
| 3821 | + $node = $node->getNextSibling(); |
3691 | 3822 | } |
3692 | 3823 | $tocraw[] = array( |
3693 | 3824 | 'toclevel' => $toclevel, |
— | — | @@ -4064,8 +4195,9 @@ |
4065 | 4196 | |
4066 | 4197 | $text = preg_replace( $substRegex, $substText, $text ); |
4067 | 4198 | $text = $this->cleanSigInSig( $text ); |
4068 | | - $dom = $this->mParseEngine->parse($text); |
4069 | | - $text = ParseEngine::expand( $dom, $this ); |
| 4199 | + $dom = $this->preprocessToDom( $text ); |
| 4200 | + $frame = $this->getPreprocessor()->newFrame(); |
| 4201 | + $text = $frame->expand( $dom ); |
4070 | 4202 | |
4071 | 4203 | if ( !$parsing ) { |
4072 | 4204 | $text = $this->mStripState->unstripBoth( $text ); |
— | — | @@ -4194,6 +4326,10 @@ |
4195 | 4327 | * branches and thus speed up parsing. It is also possible to analyse the parse tree of |
4196 | 4328 | * the arguments, and to control the way they are expanded. |
4197 | 4329 | * |
| 4330 | + * The $frame parameter is a PPFrame. This can be used to produce expanded text from the |
| 4331 | + * arguments, for instance: |
| 4332 | + * $text = isset( $args[0] ) ? $frame->expand( $args[0] ) : ''; |
| 4333 | + * |
4198 | 4334 | * For technical reasons, $args[0] is pre-expanded and will be a string. This may change in |
4199 | 4335 | * future versions. Please call $frame->expand() on it anyway so that your code keeps |
4200 | 4336 | * working if/when this is changed. |
— | — | @@ -4201,6 +4337,9 @@ |
4202 | 4338 | * If you want whitespace to be trimmed from $args, you need to do it yourself, post- |
4203 | 4339 | * expansion. |
4204 | 4340 | * |
| 4341 | + * Please read the documentation in includes/parser/Preprocessor.php for more information |
| 4342 | + * about the methods available in PPFrame and PPNode. |
| 4343 | + * |
4205 | 4344 | * @return The old callback function for this name, if any |
4206 | 4345 | */ |
4207 | 4346 | function setFunctionHook( $id, $callback, $flags = 0 ) { |
— | — | @@ -4636,11 +4775,12 @@ |
4637 | 4776 | * Callback from the Sanitizer for expanding items found in HTML attribute |
4638 | 4777 | * values, so they can be safely tested and escaped. |
4639 | 4778 | * @param string $text |
| 4779 | + * @param PPFrame $frame |
4640 | 4780 | * @return string |
4641 | 4781 | * @private |
4642 | 4782 | */ |
4643 | | - function attributeStripCallback( &$text ) { |
4644 | | - $text = $this->replaceVariables( $text ); |
| 4783 | + function attributeStripCallback( &$text, $frame = false ) { |
| 4784 | + $text = $this->replaceVariables( $text, $frame ); |
4645 | 4785 | $text = $this->mStripState->unstripBoth( $text ); |
4646 | 4786 | return $text; |
4647 | 4787 | } |
— | — | @@ -4694,6 +4834,7 @@ |
4695 | 4835 | $this->mOptions = new ParserOptions; |
4696 | 4836 | $this->setOutputType( self::OT_WIKI ); |
4697 | 4837 | $outText = ''; |
| 4838 | + $frame = $this->getPreprocessor()->newFrame(); |
4698 | 4839 | |
4699 | 4840 | // Process section extraction flags |
4700 | 4841 | $flags = 0; |
— | — | @@ -4705,30 +4846,29 @@ |
4706 | 4847 | } |
4707 | 4848 | } |
4708 | 4849 | // Preprocess the text |
4709 | | - $root = $this->mParseEngine->parse($text); |
| 4850 | + $root = $this->preprocessToDom( $text, $flags ); |
4710 | 4851 | |
4711 | 4852 | // <h> nodes indicate section breaks |
4712 | 4853 | // They can only occur at the top level, so we can find them by iterating the root's children |
4713 | | - $node = $root->firstChild->firstChild; |
| 4854 | + $node = $root->getFirstChild(); |
4714 | 4855 | |
4715 | 4856 | // Find the target section |
4716 | | - $ind = 1; |
4717 | 4857 | if ( $sectionIndex == 0 ) { |
4718 | 4858 | // Section zero doesn't nest, level=big |
4719 | 4859 | $targetLevel = 1000; |
4720 | 4860 | } else { |
4721 | | - while ( $node ) { |
4722 | | - if ( $node->nodeName === 'h' ) { |
4723 | | - if ( $ind == $sectionIndex ) { |
4724 | | - $targetLevel = strlen($node->getAttribute("endTag")); |
| 4861 | + while ( $node ) { |
| 4862 | + if ( $node->getName() === 'h' ) { |
| 4863 | + $bits = $node->splitHeading(); |
| 4864 | + if ( $bits['i'] == $sectionIndex ) { |
| 4865 | + $targetLevel = $bits['level']; |
4725 | 4866 | break; |
4726 | 4867 | } |
4727 | | - $ind ++; |
4728 | 4868 | } |
4729 | 4869 | if ( $mode === 'replace' ) { |
4730 | | - $outText .= ParseEngine::unparse($node); |
| 4870 | + $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); |
4731 | 4871 | } |
4732 | | - $node = $node->nextSibling; |
| 4872 | + $node = $node->getNextSibling(); |
4733 | 4873 | } |
4734 | 4874 | } |
4735 | 4875 | |
— | — | @@ -4743,17 +4883,17 @@ |
4744 | 4884 | |
4745 | 4885 | // Find the end of the section, including nested sections |
4746 | 4886 | do { |
4747 | | - if ( $node->nodeName === 'h' ) { |
4748 | | - $curLevel = strlen($node->getAttribute("endTag")); |
4749 | | - if ( $ind != $sectionIndex && $curLevel <= $targetLevel ) { |
| 4887 | + if ( $node->getName() === 'h' ) { |
| 4888 | + $bits = $node->splitHeading(); |
| 4889 | + $curLevel = $bits['level']; |
| 4890 | + if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) { |
4750 | 4891 | break; |
4751 | 4892 | } |
4752 | | - $ind ++; |
4753 | 4893 | } |
4754 | 4894 | if ( $mode === 'get' ) { |
4755 | | - $outText .= ParseEngine::unparse($node); |
| 4895 | + $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); |
4756 | 4896 | } |
4757 | | - $node = $node->nextSibling; |
| 4897 | + $node = $node->getNextSibling(); |
4758 | 4898 | } while ( $node ); |
4759 | 4899 | |
4760 | 4900 | // Write out the remainder (in replace mode only) |
— | — | @@ -4767,8 +4907,8 @@ |
4768 | 4908 | } |
4769 | 4909 | |
4770 | 4910 | while ( $node ) { |
4771 | | - $outText .= ParseEngine::expand( $node, $this, self::RECOVER_ORIG ); |
4772 | | - $node = $node->nextSibling; |
| 4911 | + $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); |
| 4912 | + $node = $node->getNextSibling(); |
4773 | 4913 | } |
4774 | 4914 | } |
4775 | 4915 | |
— | — | @@ -4776,9 +4916,6 @@ |
4777 | 4917 | // Re-insert stripped tags |
4778 | 4918 | $outText = rtrim( $this->mStripState->unstripBoth( $outText ) ); |
4779 | 4919 | } |
4780 | | - if ($outText[0] == "\n") { |
4781 | | - $outText = substr($outText, 1); |
4782 | | - } |
4783 | 4920 | |
4784 | 4921 | return $outText; |
4785 | 4922 | } |
Index: branches/parser-work/phase3/includes/parser/Preprocessor_DOM.php |
— | — | @@ -0,0 +1,1509 @@ |
| 2 | +<?php |
| 3 | + |
| 4 | +/** |
| 5 | + * @ingroup Parser |
| 6 | + */ |
| 7 | +class Preprocessor_DOM implements Preprocessor { |
| 8 | + var $parser, $memoryLimit; |
| 9 | + |
| 10 | + const CACHE_VERSION = 1; |
| 11 | + |
| 12 | + function __construct( $parser ) { |
| 13 | + $this->parser = $parser; |
| 14 | + $mem = ini_get( 'memory_limit' ); |
| 15 | + $this->memoryLimit = false; |
| 16 | + if ( strval( $mem ) !== '' && $mem != -1 ) { |
| 17 | + if ( preg_match( '/^\d+$/', $mem ) ) { |
| 18 | + $this->memoryLimit = $mem; |
| 19 | + } elseif ( preg_match( '/^(\d+)M$/i', $mem, $m ) ) { |
| 20 | + $this->memoryLimit = $m[1] * 1048576; |
| 21 | + } |
| 22 | + } |
| 23 | + } |
| 24 | + |
| 25 | + function newFrame() { |
| 26 | + return new PPFrame_DOM( $this ); |
| 27 | + } |
| 28 | + |
| 29 | + function newCustomFrame( $args ) { |
| 30 | + return new PPCustomFrame_DOM( $this, $args ); |
| 31 | + } |
| 32 | + |
| 33 | + function memCheck() { |
| 34 | + if ( $this->memoryLimit === false ) { |
| 35 | + return; |
| 36 | + } |
| 37 | + $usage = memory_get_usage(); |
| 38 | + if ( $usage > $this->memoryLimit * 0.9 ) { |
| 39 | + $limit = intval( $this->memoryLimit * 0.9 / 1048576 + 0.5 ); |
| 40 | + throw new MWException( "Preprocessor hit 90% memory limit ($limit MB)" ); |
| 41 | + } |
| 42 | + return $usage <= $this->memoryLimit * 0.8; |
| 43 | + } |
| 44 | + |
| 45 | + /** |
| 46 | + * Preprocess some wikitext and return the document tree. |
| 47 | + * This is the ghost of Parser::replace_variables(). |
| 48 | + * |
| 49 | + * @param string $text The text to parse |
| 50 | + * @param integer flags Bitwise combination of: |
| 51 | + * Parser::PTD_FOR_INCLUSION Handle <noinclude>/<includeonly> as if the text is being |
| 52 | + * included. Default is to assume a direct page view. |
| 53 | + * |
| 54 | + * The generated DOM tree must depend only on the input text and the flags. |
| 55 | + * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of bug 4899. |
| 56 | + * |
| 57 | + * Any flag added to the $flags parameter here, or any other parameter liable to cause a |
| 58 | + * change in the DOM tree for a given text, must be passed through the section identifier |
| 59 | + * in the section edit link and thus back to extractSections(). |
| 60 | + * |
| 61 | + * The output of this function is currently only cached in process memory, but a persistent |
| 62 | + * cache may be implemented at a later date which takes further advantage of these strict |
| 63 | + * dependency requirements. |
| 64 | + * |
| 65 | + * @private |
| 66 | + */ |
| 67 | + function preprocessToObj( $text, $flags = 0 ) { |
| 68 | + wfProfileIn( __METHOD__ ); |
| 69 | + global $wgMemc, $wgPreprocessorCacheThreshold; |
| 70 | + |
| 71 | + $xml = false; |
| 72 | + $cacheable = strlen( $text ) > $wgPreprocessorCacheThreshold; |
| 73 | + if ( $cacheable ) { |
| 74 | + wfProfileIn( __METHOD__.'-cacheable' ); |
| 75 | + |
| 76 | + $cacheKey = wfMemcKey( 'preprocess-xml', md5($text), $flags ); |
| 77 | + $cacheValue = $wgMemc->get( $cacheKey ); |
| 78 | + if ( $cacheValue ) { |
| 79 | + $version = substr( $cacheValue, 0, 8 ); |
| 80 | + if ( intval( $version ) == self::CACHE_VERSION ) { |
| 81 | + $xml = substr( $cacheValue, 8 ); |
| 82 | + // From the cache |
| 83 | + wfDebugLog( "Preprocessor", "Loaded preprocessor XML from memcached (key $cacheKey)" ); |
| 84 | + } |
| 85 | + } |
| 86 | + } |
| 87 | + if ( $xml === false ) { |
| 88 | + if ( $cacheable ) { |
| 89 | + wfProfileIn( __METHOD__.'-cache-miss' ); |
| 90 | + $xml = $this->preprocessToXml( $text, $flags ); |
| 91 | + $cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . $xml; |
| 92 | + $wgMemc->set( $cacheKey, $cacheValue, 86400 ); |
| 93 | + wfProfileOut( __METHOD__.'-cache-miss' ); |
| 94 | + wfDebugLog( "Preprocessor", "Saved preprocessor XML to memcached (key $cacheKey)" ); |
| 95 | + } else { |
| 96 | + $xml = $this->preprocessToXml( $text, $flags ); |
| 97 | + } |
| 98 | + |
| 99 | + } |
| 100 | + wfProfileIn( __METHOD__.'-loadXML' ); |
| 101 | + $dom = new DOMDocument; |
| 102 | + wfSuppressWarnings(); |
| 103 | + $result = $dom->loadXML( $xml ); |
| 104 | + wfRestoreWarnings(); |
| 105 | + if ( !$result ) { |
| 106 | + // Try running the XML through UtfNormal to get rid of invalid characters |
| 107 | + $xml = UtfNormal::cleanUp( $xml ); |
| 108 | + $result = $dom->loadXML( $xml ); |
| 109 | + if ( !$result ) { |
| 110 | + throw new MWException( __METHOD__.' generated invalid XML' ); |
| 111 | + } |
| 112 | + } |
| 113 | + $obj = new PPNode_DOM( $dom->documentElement ); |
| 114 | + wfProfileOut( __METHOD__.'-loadXML' ); |
| 115 | + if ( $cacheable ) { |
| 116 | + wfProfileOut( __METHOD__.'-cacheable' ); |
| 117 | + } |
| 118 | + wfProfileOut( __METHOD__ ); |
| 119 | + return $obj; |
| 120 | + } |
| 121 | + |
| 122 | + function preprocessToXml( $text, $flags = 0 ) { |
| 123 | + wfProfileIn( __METHOD__ ); |
| 124 | + $rules = array( |
| 125 | + '{' => array( |
| 126 | + 'end' => '}', |
| 127 | + 'names' => array( |
| 128 | + 2 => 'template', |
| 129 | + 3 => 'tplarg', |
| 130 | + ), |
| 131 | + 'min' => 2, |
| 132 | + 'max' => 3, |
| 133 | + ), |
| 134 | + '[' => array( |
| 135 | + 'end' => ']', |
| 136 | + 'names' => array( 2 => null ), |
| 137 | + 'min' => 2, |
| 138 | + 'max' => 2, |
| 139 | + ) |
| 140 | + ); |
| 141 | + |
| 142 | + $forInclusion = $flags & Parser::PTD_FOR_INCLUSION; |
| 143 | + |
| 144 | + $xmlishElements = $this->parser->getStripList(); |
| 145 | + $enableOnlyinclude = false; |
| 146 | + if ( $forInclusion ) { |
| 147 | + $ignoredTags = array( 'includeonly', '/includeonly' ); |
| 148 | + $ignoredElements = array( 'noinclude' ); |
| 149 | + $xmlishElements[] = 'noinclude'; |
| 150 | + if ( strpos( $text, '<onlyinclude>' ) !== false && strpos( $text, '</onlyinclude>' ) !== false ) { |
| 151 | + $enableOnlyinclude = true; |
| 152 | + } |
| 153 | + } else { |
| 154 | + $ignoredTags = array( 'noinclude', '/noinclude', 'onlyinclude', '/onlyinclude' ); |
| 155 | + $ignoredElements = array( 'includeonly' ); |
| 156 | + $xmlishElements[] = 'includeonly'; |
| 157 | + } |
| 158 | + $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) ); |
| 159 | + |
| 160 | + // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset |
| 161 | + $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; |
| 162 | + |
| 163 | + $stack = new PPDStack; |
| 164 | + |
| 165 | + $searchBase = "[{<\n"; #} |
| 166 | + $revText = strrev( $text ); // For fast reverse searches |
| 167 | + |
| 168 | + $i = 0; # Input pointer, starts out pointing to a pseudo-newline before the start |
| 169 | + $accum =& $stack->getAccum(); # Current accumulator |
| 170 | + $accum = '<root>'; |
| 171 | + $findEquals = false; # True to find equals signs in arguments |
| 172 | + $findPipe = false; # True to take notice of pipe characters |
| 173 | + $headingIndex = 1; |
| 174 | + $inHeading = false; # True if $i is inside a possible heading |
| 175 | + $noMoreGT = false; # True if there are no more greater-than (>) signs right of $i |
| 176 | + $findOnlyinclude = $enableOnlyinclude; # True to ignore all input up to the next <onlyinclude> |
| 177 | + $fakeLineStart = true; # Do a line-start run without outputting an LF character |
| 178 | + |
| 179 | + while ( true ) { |
| 180 | + //$this->memCheck(); |
| 181 | + |
| 182 | + if ( $findOnlyinclude ) { |
| 183 | + // Ignore all input up to the next <onlyinclude> |
| 184 | + $startPos = strpos( $text, '<onlyinclude>', $i ); |
| 185 | + if ( $startPos === false ) { |
| 186 | + // Ignored section runs to the end |
| 187 | + $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i ) ) . '</ignore>'; |
| 188 | + break; |
| 189 | + } |
| 190 | + $tagEndPos = $startPos + strlen( '<onlyinclude>' ); // past-the-end |
| 191 | + $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i, $tagEndPos - $i ) ) . '</ignore>'; |
| 192 | + $i = $tagEndPos; |
| 193 | + $findOnlyinclude = false; |
| 194 | + } |
| 195 | + |
| 196 | + if ( $fakeLineStart ) { |
| 197 | + $found = 'line-start'; |
| 198 | + $curChar = ''; |
| 199 | + } else { |
| 200 | + # Find next opening brace, closing brace or pipe |
| 201 | + $search = $searchBase; |
| 202 | + if ( $stack->top === false ) { |
| 203 | + $currentClosing = ''; |
| 204 | + } else { |
| 205 | + $currentClosing = $stack->top->close; |
| 206 | + $search .= $currentClosing; |
| 207 | + } |
| 208 | + if ( $findPipe ) { |
| 209 | + $search .= '|'; |
| 210 | + } |
| 211 | + if ( $findEquals ) { |
| 212 | + // First equals will be for the template |
| 213 | + $search .= '='; |
| 214 | + } |
| 215 | + $rule = null; |
| 216 | + # Output literal section, advance input counter |
| 217 | + $literalLength = strcspn( $text, $search, $i ); |
| 218 | + if ( $literalLength > 0 ) { |
| 219 | + $accum .= htmlspecialchars( substr( $text, $i, $literalLength ) ); |
| 220 | + $i += $literalLength; |
| 221 | + } |
| 222 | + if ( $i >= strlen( $text ) ) { |
| 223 | + if ( $currentClosing == "\n" ) { |
| 224 | + // Do a past-the-end run to finish off the heading |
| 225 | + $curChar = ''; |
| 226 | + $found = 'line-end'; |
| 227 | + } else { |
| 228 | + # All done |
| 229 | + break; |
| 230 | + } |
| 231 | + } else { |
| 232 | + $curChar = $text[$i]; |
| 233 | + if ( $curChar == '|' ) { |
| 234 | + $found = 'pipe'; |
| 235 | + } elseif ( $curChar == '=' ) { |
| 236 | + $found = 'equals'; |
| 237 | + } elseif ( $curChar == '<' ) { |
| 238 | + $found = 'angle'; |
| 239 | + } elseif ( $curChar == "\n" ) { |
| 240 | + if ( $inHeading ) { |
| 241 | + $found = 'line-end'; |
| 242 | + } else { |
| 243 | + $found = 'line-start'; |
| 244 | + } |
| 245 | + } elseif ( $curChar == $currentClosing ) { |
| 246 | + $found = 'close'; |
| 247 | + } elseif ( isset( $rules[$curChar] ) ) { |
| 248 | + $found = 'open'; |
| 249 | + $rule = $rules[$curChar]; |
| 250 | + } else { |
| 251 | + # Some versions of PHP have a strcspn which stops on null characters |
| 252 | + # Ignore and continue |
| 253 | + ++$i; |
| 254 | + continue; |
| 255 | + } |
| 256 | + } |
| 257 | + } |
| 258 | + |
| 259 | + if ( $found == 'angle' ) { |
| 260 | + $matches = false; |
| 261 | + // Handle </onlyinclude> |
| 262 | + if ( $enableOnlyinclude && substr( $text, $i, strlen( '</onlyinclude>' ) ) == '</onlyinclude>' ) { |
| 263 | + $findOnlyinclude = true; |
| 264 | + continue; |
| 265 | + } |
| 266 | + |
| 267 | + // Determine element name |
| 268 | + if ( !preg_match( $elementsRegex, $text, $matches, 0, $i + 1 ) ) { |
| 269 | + // Element name missing or not listed |
| 270 | + $accum .= '<'; |
| 271 | + ++$i; |
| 272 | + continue; |
| 273 | + } |
| 274 | + // Handle comments |
| 275 | + if ( isset( $matches[2] ) && $matches[2] == '!--' ) { |
| 276 | + // To avoid leaving blank lines, when a comment is both preceded |
| 277 | + // and followed by a newline (ignoring spaces), trim leading and |
| 278 | + // trailing spaces and one of the newlines. |
| 279 | + |
| 280 | + // Find the end |
| 281 | + $endPos = strpos( $text, '-->', $i + 4 ); |
| 282 | + if ( $endPos === false ) { |
| 283 | + // Unclosed comment in input, runs to end |
| 284 | + $inner = substr( $text, $i ); |
| 285 | + $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>'; |
| 286 | + $i = strlen( $text ); |
| 287 | + } else { |
| 288 | + // Search backwards for leading whitespace |
| 289 | + $wsStart = $i ? ( $i - strspn( $revText, ' ', strlen( $text ) - $i ) ) : 0; |
| 290 | + // Search forwards for trailing whitespace |
| 291 | + // $wsEnd will be the position of the last space |
| 292 | + $wsEnd = $endPos + 2 + strspn( $text, ' ', $endPos + 3 ); |
| 293 | + // Eat the line if possible |
| 294 | + // TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at |
| 295 | + // the overall start. That's not how Sanitizer::removeHTMLcomments() did it, but |
| 296 | + // it's a possible beneficial b/c break. |
| 297 | + if ( $wsStart > 0 && substr( $text, $wsStart - 1, 1 ) == "\n" |
| 298 | + && substr( $text, $wsEnd + 1, 1 ) == "\n" ) |
| 299 | + { |
| 300 | + $startPos = $wsStart; |
| 301 | + $endPos = $wsEnd + 1; |
| 302 | + // Remove leading whitespace from the end of the accumulator |
| 303 | + // Sanity check first though |
| 304 | + $wsLength = $i - $wsStart; |
| 305 | + if ( $wsLength > 0 && substr( $accum, -$wsLength ) === str_repeat( ' ', $wsLength ) ) { |
| 306 | + $accum = substr( $accum, 0, -$wsLength ); |
| 307 | + } |
| 308 | + // Do a line-start run next time to look for headings after the comment |
| 309 | + $fakeLineStart = true; |
| 310 | + } else { |
| 311 | + // No line to eat, just take the comment itself |
| 312 | + $startPos = $i; |
| 313 | + $endPos += 2; |
| 314 | + } |
| 315 | + |
| 316 | + if ( $stack->top ) { |
| 317 | + $part = $stack->top->getCurrentPart(); |
| 318 | + if ( isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 ) { |
| 319 | + // Comments abutting, no change in visual end |
| 320 | + $part->commentEnd = $wsEnd; |
| 321 | + } else { |
| 322 | + $part->visualEnd = $wsStart; |
| 323 | + $part->commentEnd = $endPos; |
| 324 | + } |
| 325 | + } |
| 326 | + $i = $endPos + 1; |
| 327 | + $inner = substr( $text, $startPos, $endPos - $startPos + 1 ); |
| 328 | + $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>'; |
| 329 | + } |
| 330 | + continue; |
| 331 | + } |
| 332 | + $name = $matches[1]; |
| 333 | + $lowerName = strtolower( $name ); |
| 334 | + $attrStart = $i + strlen( $name ) + 1; |
| 335 | + |
| 336 | + // Find end of tag |
| 337 | + $tagEndPos = $noMoreGT ? false : strpos( $text, '>', $attrStart ); |
| 338 | + if ( $tagEndPos === false ) { |
| 339 | + // Infinite backtrack |
| 340 | + // Disable tag search to prevent worst-case O(N^2) performance |
| 341 | + $noMoreGT = true; |
| 342 | + $accum .= '<'; |
| 343 | + ++$i; |
| 344 | + continue; |
| 345 | + } |
| 346 | + |
| 347 | + // Handle ignored tags |
| 348 | + if ( in_array( $lowerName, $ignoredTags ) ) { |
| 349 | + $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i, $tagEndPos - $i + 1 ) ) . '</ignore>'; |
| 350 | + $i = $tagEndPos + 1; |
| 351 | + continue; |
| 352 | + } |
| 353 | + |
| 354 | + $tagStartPos = $i; |
| 355 | + if ( $text[$tagEndPos-1] == '/' ) { |
| 356 | + $attrEnd = $tagEndPos - 1; |
| 357 | + $inner = null; |
| 358 | + $i = $tagEndPos + 1; |
| 359 | + $close = ''; |
| 360 | + } else { |
| 361 | + $attrEnd = $tagEndPos; |
| 362 | + // Find closing tag |
| 363 | + if ( preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i", |
| 364 | + $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) ) |
| 365 | + { |
| 366 | + $inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 ); |
| 367 | + $i = $matches[0][1] + strlen( $matches[0][0] ); |
| 368 | + $close = '<close>' . htmlspecialchars( $matches[0][0] ) . '</close>'; |
| 369 | + } else { |
| 370 | + // No end tag -- let it run out to the end of the text. |
| 371 | + $inner = substr( $text, $tagEndPos + 1 ); |
| 372 | + $i = strlen( $text ); |
| 373 | + $close = ''; |
| 374 | + } |
| 375 | + } |
| 376 | + // <includeonly> and <noinclude> just become <ignore> tags |
| 377 | + if ( in_array( $lowerName, $ignoredElements ) ) { |
| 378 | + $accum .= '<ignore>' . htmlspecialchars( substr( $text, $tagStartPos, $i - $tagStartPos ) ) |
| 379 | + . '</ignore>'; |
| 380 | + continue; |
| 381 | + } |
| 382 | + |
| 383 | + $accum .= '<ext>'; |
| 384 | + if ( $attrEnd <= $attrStart ) { |
| 385 | + $attr = ''; |
| 386 | + } else { |
| 387 | + $attr = substr( $text, $attrStart, $attrEnd - $attrStart ); |
| 388 | + } |
| 389 | + $accum .= '<name>' . htmlspecialchars( $name ) . '</name>' . |
| 390 | + // Note that the attr element contains the whitespace between name and attribute, |
| 391 | + // this is necessary for precise reconstruction during pre-save transform. |
| 392 | + '<attr>' . htmlspecialchars( $attr ) . '</attr>'; |
| 393 | + if ( $inner !== null ) { |
| 394 | + $accum .= '<inner>' . htmlspecialchars( $inner ) . '</inner>'; |
| 395 | + } |
| 396 | + $accum .= $close . '</ext>'; |
| 397 | + } |
| 398 | + |
| 399 | + elseif ( $found == 'line-start' ) { |
| 400 | + // Is this the start of a heading? |
| 401 | + // Line break belongs before the heading element in any case |
| 402 | + if ( $fakeLineStart ) { |
| 403 | + $fakeLineStart = false; |
| 404 | + } else { |
| 405 | + $accum .= $curChar; |
| 406 | + $i++; |
| 407 | + } |
| 408 | + |
| 409 | + $count = strspn( $text, '=', $i, 6 ); |
| 410 | + if ( $count == 1 && $findEquals ) { |
| 411 | + // DWIM: This looks kind of like a name/value separator |
| 412 | + // Let's let the equals handler have it and break the potential heading |
| 413 | + // This is heuristic, but AFAICT the methods for completely correct disambiguation are very complex. |
| 414 | + } elseif ( $count > 0 ) { |
| 415 | + $piece = array( |
| 416 | + 'open' => "\n", |
| 417 | + 'close' => "\n", |
| 418 | + 'parts' => array( new PPDPart( str_repeat( '=', $count ) ) ), |
| 419 | + 'startPos' => $i, |
| 420 | + 'count' => $count ); |
| 421 | + $stack->push( $piece ); |
| 422 | + $accum =& $stack->getAccum(); |
| 423 | + $flags = $stack->getFlags(); |
| 424 | + extract( $flags ); |
| 425 | + $i += $count; |
| 426 | + } |
| 427 | + } |
| 428 | + |
| 429 | + elseif ( $found == 'line-end' ) { |
| 430 | + $piece = $stack->top; |
| 431 | + // A heading must be open, otherwise \n wouldn't have been in the search list |
| 432 | + assert( $piece->open == "\n" ); |
| 433 | + $part = $piece->getCurrentPart(); |
| 434 | + // Search back through the input to see if it has a proper close |
| 435 | + // Do this using the reversed string since the other solutions (end anchor, etc.) are inefficient |
| 436 | + $wsLength = strspn( $revText, " \t", strlen( $text ) - $i ); |
| 437 | + $searchStart = $i - $wsLength; |
| 438 | + if ( isset( $part->commentEnd ) && $searchStart - 1 == $part->commentEnd ) { |
| 439 | + // Comment found at line end |
| 440 | + // Search for equals signs before the comment |
| 441 | + $searchStart = $part->visualEnd; |
| 442 | + $searchStart -= strspn( $revText, " \t", strlen( $text ) - $searchStart ); |
| 443 | + } |
| 444 | + $count = $piece->count; |
| 445 | + $equalsLength = strspn( $revText, '=', strlen( $text ) - $searchStart ); |
| 446 | + if ( $equalsLength > 0 ) { |
| 447 | + if ( $i - $equalsLength == $piece->startPos ) { |
| 448 | + // This is just a single string of equals signs on its own line |
| 449 | + // Replicate the doHeadings behaviour /={count}(.+)={count}/ |
| 450 | + // First find out how many equals signs there really are (don't stop at 6) |
| 451 | + $count = $equalsLength; |
| 452 | + if ( $count < 3 ) { |
| 453 | + $count = 0; |
| 454 | + } else { |
| 455 | + $count = min( 6, intval( ( $count - 1 ) / 2 ) ); |
| 456 | + } |
| 457 | + } else { |
| 458 | + $count = min( $equalsLength, $count ); |
| 459 | + } |
| 460 | + if ( $count > 0 ) { |
| 461 | + // Normal match, output <h> |
| 462 | + $element = "<h level=\"$count\" i=\"$headingIndex\">$accum</h>"; |
| 463 | + $headingIndex++; |
| 464 | + } else { |
| 465 | + // Single equals sign on its own line, count=0 |
| 466 | + $element = $accum; |
| 467 | + } |
| 468 | + } else { |
| 469 | + // No match, no <h>, just pass down the inner text |
| 470 | + $element = $accum; |
| 471 | + } |
| 472 | + // Unwind the stack |
| 473 | + $stack->pop(); |
| 474 | + $accum =& $stack->getAccum(); |
| 475 | + $flags = $stack->getFlags(); |
| 476 | + extract( $flags ); |
| 477 | + |
| 478 | + // Append the result to the enclosing accumulator |
| 479 | + $accum .= $element; |
| 480 | + // Note that we do NOT increment the input pointer. |
| 481 | + // This is because the closing linebreak could be the opening linebreak of |
| 482 | + // another heading. Infinite loops are avoided because the next iteration MUST |
| 483 | + // hit the heading open case above, which unconditionally increments the |
| 484 | + // input pointer. |
| 485 | + } |
| 486 | + |
| 487 | + elseif ( $found == 'open' ) { |
| 488 | + # count opening brace characters |
| 489 | + $count = strspn( $text, $curChar, $i ); |
| 490 | + |
| 491 | + # we need to add to stack only if opening brace count is enough for one of the rules |
| 492 | + if ( $count >= $rule['min'] ) { |
| 493 | + # Add it to the stack |
| 494 | + $piece = array( |
| 495 | + 'open' => $curChar, |
| 496 | + 'close' => $rule['end'], |
| 497 | + 'count' => $count, |
| 498 | + 'lineStart' => ($i > 0 && $text[$i-1] == "\n"), |
| 499 | + ); |
| 500 | + |
| 501 | + $stack->push( $piece ); |
| 502 | + $accum =& $stack->getAccum(); |
| 503 | + $flags = $stack->getFlags(); |
| 504 | + extract( $flags ); |
| 505 | + } else { |
| 506 | + # Add literal brace(s) |
| 507 | + $accum .= htmlspecialchars( str_repeat( $curChar, $count ) ); |
| 508 | + } |
| 509 | + $i += $count; |
| 510 | + } |
| 511 | + |
| 512 | + elseif ( $found == 'close' ) { |
| 513 | + $piece = $stack->top; |
| 514 | + # lets check if there are enough characters for closing brace |
| 515 | + $maxCount = $piece->count; |
| 516 | + $count = strspn( $text, $curChar, $i, $maxCount ); |
| 517 | + |
| 518 | + # check for maximum matching characters (if there are 5 closing |
| 519 | + # characters, we will probably need only 3 - depending on the rules) |
| 520 | + $matchingCount = 0; |
| 521 | + $rule = $rules[$piece->open]; |
| 522 | + if ( $count > $rule['max'] ) { |
| 523 | + # The specified maximum exists in the callback array, unless the caller |
| 524 | + # has made an error |
| 525 | + $matchingCount = $rule['max']; |
| 526 | + } else { |
| 527 | + # Count is less than the maximum |
| 528 | + # Skip any gaps in the callback array to find the true largest match |
| 529 | + # Need to use array_key_exists not isset because the callback can be null |
| 530 | + $matchingCount = $count; |
| 531 | + while ( $matchingCount > 0 && !array_key_exists( $matchingCount, $rule['names'] ) ) { |
| 532 | + --$matchingCount; |
| 533 | + } |
| 534 | + } |
| 535 | + |
| 536 | + if ($matchingCount <= 0) { |
| 537 | + # No matching element found in callback array |
| 538 | + # Output a literal closing brace and continue |
| 539 | + $accum .= htmlspecialchars( str_repeat( $curChar, $count ) ); |
| 540 | + $i += $count; |
| 541 | + continue; |
| 542 | + } |
| 543 | + $name = $rule['names'][$matchingCount]; |
| 544 | + if ( $name === null ) { |
| 545 | + // No element, just literal text |
| 546 | + $element = $piece->breakSyntax( $matchingCount ) . str_repeat( $rule['end'], $matchingCount ); |
| 547 | + } else { |
| 548 | + # Create XML element |
| 549 | + # Note: $parts is already XML, does not need to be encoded further |
| 550 | + $parts = $piece->parts; |
| 551 | + $title = $parts[0]->out; |
| 552 | + unset( $parts[0] ); |
| 553 | + |
| 554 | + # The invocation is at the start of the line if lineStart is set in |
| 555 | + # the stack, and all opening brackets are used up. |
| 556 | + if ( $maxCount == $matchingCount && !empty( $piece->lineStart ) ) { |
| 557 | + $attr = ' lineStart="1"'; |
| 558 | + } else { |
| 559 | + $attr = ''; |
| 560 | + } |
| 561 | + |
| 562 | + $element = "<$name$attr>"; |
| 563 | + $element .= "<title>$title</title>"; |
| 564 | + $argIndex = 1; |
| 565 | + foreach ( $parts as $partIndex => $part ) { |
| 566 | + if ( isset( $part->eqpos ) ) { |
| 567 | + $argName = substr( $part->out, 0, $part->eqpos ); |
| 568 | + $argValue = substr( $part->out, $part->eqpos + 1 ); |
| 569 | + $element .= "<part><name>$argName</name>=<value>$argValue</value></part>"; |
| 570 | + } else { |
| 571 | + $element .= "<part><name index=\"$argIndex\" /><value>{$part->out}</value></part>"; |
| 572 | + $argIndex++; |
| 573 | + } |
| 574 | + } |
| 575 | + $element .= "</$name>"; |
| 576 | + } |
| 577 | + |
| 578 | + # Advance input pointer |
| 579 | + $i += $matchingCount; |
| 580 | + |
| 581 | + # Unwind the stack |
| 582 | + $stack->pop(); |
| 583 | + $accum =& $stack->getAccum(); |
| 584 | + |
| 585 | + # Re-add the old stack element if it still has unmatched opening characters remaining |
| 586 | + if ($matchingCount < $piece->count) { |
| 587 | + $piece->parts = array( new PPDPart ); |
| 588 | + $piece->count -= $matchingCount; |
| 589 | + # do we still qualify for any callback with remaining count? |
| 590 | + $names = $rules[$piece->open]['names']; |
| 591 | + $skippedBraces = 0; |
| 592 | + $enclosingAccum =& $accum; |
| 593 | + while ( $piece->count ) { |
| 594 | + if ( array_key_exists( $piece->count, $names ) ) { |
| 595 | + $stack->push( $piece ); |
| 596 | + $accum =& $stack->getAccum(); |
| 597 | + break; |
| 598 | + } |
| 599 | + --$piece->count; |
| 600 | + $skippedBraces ++; |
| 601 | + } |
| 602 | + $enclosingAccum .= str_repeat( $piece->open, $skippedBraces ); |
| 603 | + } |
| 604 | + $flags = $stack->getFlags(); |
| 605 | + extract( $flags ); |
| 606 | + |
| 607 | + # Add XML element to the enclosing accumulator |
| 608 | + $accum .= $element; |
| 609 | + } |
| 610 | + |
| 611 | + elseif ( $found == 'pipe' ) { |
| 612 | + $findEquals = true; // shortcut for getFlags() |
| 613 | + $stack->addPart(); |
| 614 | + $accum =& $stack->getAccum(); |
| 615 | + ++$i; |
| 616 | + } |
| 617 | + |
| 618 | + elseif ( $found == 'equals' ) { |
| 619 | + $findEquals = false; // shortcut for getFlags() |
| 620 | + $stack->getCurrentPart()->eqpos = strlen( $accum ); |
| 621 | + $accum .= '='; |
| 622 | + ++$i; |
| 623 | + } |
| 624 | + } |
| 625 | + |
| 626 | + # Output any remaining unclosed brackets |
| 627 | + foreach ( $stack->stack as $piece ) { |
| 628 | + $stack->rootAccum .= $piece->breakSyntax(); |
| 629 | + } |
| 630 | + $stack->rootAccum .= '</root>'; |
| 631 | + $xml = $stack->rootAccum; |
| 632 | + |
| 633 | + wfProfileOut( __METHOD__ ); |
| 634 | + |
| 635 | + return $xml; |
| 636 | + } |
| 637 | +} |
| 638 | + |
| 639 | +/** |
| 640 | + * Stack class to help Preprocessor::preprocessToObj() |
| 641 | + * @ingroup Parser |
| 642 | + */ |
| 643 | +class PPDStack { |
| 644 | + var $stack, $rootAccum, $top; |
| 645 | + var $out; |
| 646 | + var $elementClass = 'PPDStackElement'; |
| 647 | + |
| 648 | + static $false = false; |
| 649 | + |
| 650 | + function __construct() { |
| 651 | + $this->stack = array(); |
| 652 | + $this->top = false; |
| 653 | + $this->rootAccum = ''; |
| 654 | + $this->accum =& $this->rootAccum; |
| 655 | + } |
| 656 | + |
| 657 | + function count() { |
| 658 | + return count( $this->stack ); |
| 659 | + } |
| 660 | + |
| 661 | + function &getAccum() { |
| 662 | + return $this->accum; |
| 663 | + } |
| 664 | + |
| 665 | + function getCurrentPart() { |
| 666 | + if ( $this->top === false ) { |
| 667 | + return false; |
| 668 | + } else { |
| 669 | + return $this->top->getCurrentPart(); |
| 670 | + } |
| 671 | + } |
| 672 | + |
| 673 | + function push( $data ) { |
| 674 | + if ( $data instanceof $this->elementClass ) { |
| 675 | + $this->stack[] = $data; |
| 676 | + } else { |
| 677 | + $class = $this->elementClass; |
| 678 | + $this->stack[] = new $class( $data ); |
| 679 | + } |
| 680 | + $this->top = $this->stack[ count( $this->stack ) - 1 ]; |
| 681 | + $this->accum =& $this->top->getAccum(); |
| 682 | + } |
| 683 | + |
| 684 | + function pop() { |
| 685 | + if ( !count( $this->stack ) ) { |
| 686 | + throw new MWException( __METHOD__.': no elements remaining' ); |
| 687 | + } |
| 688 | + $temp = array_pop( $this->stack ); |
| 689 | + |
| 690 | + if ( count( $this->stack ) ) { |
| 691 | + $this->top = $this->stack[ count( $this->stack ) - 1 ]; |
| 692 | + $this->accum =& $this->top->getAccum(); |
| 693 | + } else { |
| 694 | + $this->top = self::$false; |
| 695 | + $this->accum =& $this->rootAccum; |
| 696 | + } |
| 697 | + return $temp; |
| 698 | + } |
| 699 | + |
| 700 | + function addPart( $s = '' ) { |
| 701 | + $this->top->addPart( $s ); |
| 702 | + $this->accum =& $this->top->getAccum(); |
| 703 | + } |
| 704 | + |
| 705 | + function getFlags() { |
| 706 | + if ( !count( $this->stack ) ) { |
| 707 | + return array( |
| 708 | + 'findEquals' => false, |
| 709 | + 'findPipe' => false, |
| 710 | + 'inHeading' => false, |
| 711 | + ); |
| 712 | + } else { |
| 713 | + return $this->top->getFlags(); |
| 714 | + } |
| 715 | + } |
| 716 | +} |
| 717 | + |
| 718 | +/** |
| 719 | + * @ingroup Parser |
| 720 | + */ |
| 721 | +class PPDStackElement { |
| 722 | + var $open, // Opening character (\n for heading) |
| 723 | + $close, // Matching closing character |
| 724 | + $count, // Number of opening characters found (number of "=" for heading) |
| 725 | + $parts, // Array of PPDPart objects describing pipe-separated parts. |
| 726 | + $lineStart; // True if the open char appeared at the start of the input line. Not set for headings. |
| 727 | + |
| 728 | + var $partClass = 'PPDPart'; |
| 729 | + |
| 730 | + function __construct( $data = array() ) { |
| 731 | + $class = $this->partClass; |
| 732 | + $this->parts = array( new $class ); |
| 733 | + |
| 734 | + foreach ( $data as $name => $value ) { |
| 735 | + $this->$name = $value; |
| 736 | + } |
| 737 | + } |
| 738 | + |
| 739 | + function &getAccum() { |
| 740 | + return $this->parts[count($this->parts) - 1]->out; |
| 741 | + } |
| 742 | + |
| 743 | + function addPart( $s = '' ) { |
| 744 | + $class = $this->partClass; |
| 745 | + $this->parts[] = new $class( $s ); |
| 746 | + } |
| 747 | + |
| 748 | + function getCurrentPart() { |
| 749 | + return $this->parts[count($this->parts) - 1]; |
| 750 | + } |
| 751 | + |
| 752 | + function getFlags() { |
| 753 | + $partCount = count( $this->parts ); |
| 754 | + $findPipe = $this->open != "\n" && $this->open != '['; |
| 755 | + return array( |
| 756 | + 'findPipe' => $findPipe, |
| 757 | + 'findEquals' => $findPipe && $partCount > 1 && !isset( $this->parts[$partCount - 1]->eqpos ), |
| 758 | + 'inHeading' => $this->open == "\n", |
| 759 | + ); |
| 760 | + } |
| 761 | + |
| 762 | + /** |
| 763 | + * Get the output string that would result if the close is not found. |
| 764 | + */ |
| 765 | + function breakSyntax( $openingCount = false ) { |
| 766 | + if ( $this->open == "\n" ) { |
| 767 | + $s = $this->parts[0]->out; |
| 768 | + } else { |
| 769 | + if ( $openingCount === false ) { |
| 770 | + $openingCount = $this->count; |
| 771 | + } |
| 772 | + $s = str_repeat( $this->open, $openingCount ); |
| 773 | + $first = true; |
| 774 | + foreach ( $this->parts as $part ) { |
| 775 | + if ( $first ) { |
| 776 | + $first = false; |
| 777 | + } else { |
| 778 | + $s .= '|'; |
| 779 | + } |
| 780 | + $s .= $part->out; |
| 781 | + } |
| 782 | + } |
| 783 | + return $s; |
| 784 | + } |
| 785 | +} |
| 786 | + |
| 787 | +/** |
| 788 | + * @ingroup Parser |
| 789 | + */ |
| 790 | +class PPDPart { |
| 791 | + var $out; // Output accumulator string |
| 792 | + |
| 793 | + // Optional member variables: |
| 794 | + // eqpos Position of equals sign in output accumulator |
| 795 | + // commentEnd Past-the-end input pointer for the last comment encountered |
| 796 | + // visualEnd Past-the-end input pointer for the end of the accumulator minus comments |
| 797 | + |
| 798 | + function __construct( $out = '' ) { |
| 799 | + $this->out = $out; |
| 800 | + } |
| 801 | +} |
| 802 | + |
| 803 | +/** |
| 804 | + * An expansion frame, used as a context to expand the result of preprocessToObj() |
| 805 | + * @ingroup Parser |
| 806 | + */ |
| 807 | +class PPFrame_DOM implements PPFrame { |
| 808 | + var $preprocessor, $parser, $title; |
| 809 | + var $titleCache; |
| 810 | + |
| 811 | + /** |
| 812 | + * Hashtable listing templates which are disallowed for expansion in this frame, |
| 813 | + * having been encountered previously in parent frames. |
| 814 | + */ |
| 815 | + var $loopCheckHash; |
| 816 | + |
| 817 | + /** |
| 818 | + * Recursion depth of this frame, top = 0 |
| 819 | + * Note that this is NOT the same as expansion depth in expand() |
| 820 | + */ |
| 821 | + var $depth; |
| 822 | + |
| 823 | + |
| 824 | + /** |
| 825 | + * Construct a new preprocessor frame. |
| 826 | + * @param Preprocessor $preprocessor The parent preprocessor |
| 827 | + */ |
| 828 | + function __construct( $preprocessor ) { |
| 829 | + $this->preprocessor = $preprocessor; |
| 830 | + $this->parser = $preprocessor->parser; |
| 831 | + $this->title = $this->parser->mTitle; |
| 832 | + $this->titleCache = array( $this->title ? $this->title->getPrefixedDBkey() : false ); |
| 833 | + $this->loopCheckHash = array(); |
| 834 | + $this->depth = 0; |
| 835 | + } |
| 836 | + |
| 837 | + /** |
| 838 | + * Create a new child frame |
| 839 | + * $args is optionally a multi-root PPNode or array containing the template arguments |
| 840 | + */ |
| 841 | + function newChild( $args = false, $title = false ) { |
| 842 | + $namedArgs = array(); |
| 843 | + $numberedArgs = array(); |
| 844 | + if ( $title === false ) { |
| 845 | + $title = $this->title; |
| 846 | + } |
| 847 | + if ( $args !== false ) { |
| 848 | + $xpath = false; |
| 849 | + if ( $args instanceof PPNode ) { |
| 850 | + $args = $args->node; |
| 851 | + } |
| 852 | + foreach ( $args as $arg ) { |
| 853 | + if ( !$xpath ) { |
| 854 | + $xpath = new DOMXPath( $arg->ownerDocument ); |
| 855 | + } |
| 856 | + |
| 857 | + $nameNodes = $xpath->query( 'name', $arg ); |
| 858 | + $value = $xpath->query( 'value', $arg ); |
| 859 | + if ( $nameNodes->item( 0 )->hasAttributes() ) { |
| 860 | + // Numbered parameter |
| 861 | + $index = $nameNodes->item( 0 )->attributes->getNamedItem( 'index' )->textContent; |
| 862 | + $numberedArgs[$index] = $value->item( 0 ); |
| 863 | + unset( $namedArgs[$index] ); |
| 864 | + } else { |
| 865 | + // Named parameter |
| 866 | + $name = trim( $this->expand( $nameNodes->item( 0 ), PPFrame::STRIP_COMMENTS ) ); |
| 867 | + $namedArgs[$name] = $value->item( 0 ); |
| 868 | + unset( $numberedArgs[$name] ); |
| 869 | + } |
| 870 | + } |
| 871 | + } |
| 872 | + return new PPTemplateFrame_DOM( $this->preprocessor, $this, $numberedArgs, $namedArgs, $title ); |
| 873 | + } |
| 874 | + |
| 875 | + function expand( $root, $flags = 0 ) { |
| 876 | + static $expansionDepth = 0; |
| 877 | + if ( is_string( $root ) ) { |
| 878 | + return $root; |
| 879 | + } |
| 880 | + |
| 881 | + if ( ++$this->parser->mPPNodeCount > $this->parser->mOptions->mMaxPPNodeCount ) |
| 882 | + { |
| 883 | + return '<span class="error">Node-count limit exceeded</span>'; |
| 884 | + } |
| 885 | + |
| 886 | + if ( $expansionDepth > $this->parser->mOptions->mMaxPPExpandDepth ) { |
| 887 | + return '<span class="error">Expansion depth limit exceeded</span>'; |
| 888 | + } |
| 889 | + wfProfileIn( __METHOD__ ); |
| 890 | + ++$expansionDepth; |
| 891 | + |
| 892 | + if ( $root instanceof PPNode_DOM ) { |
| 893 | + $root = $root->node; |
| 894 | + } |
| 895 | + if ( $root instanceof DOMDocument ) { |
| 896 | + $root = $root->documentElement; |
| 897 | + } |
| 898 | + |
| 899 | + $outStack = array( '', '' ); |
| 900 | + $iteratorStack = array( false, $root ); |
| 901 | + $indexStack = array( 0, 0 ); |
| 902 | + |
| 903 | + while ( count( $iteratorStack ) > 1 ) { |
| 904 | + $level = count( $outStack ) - 1; |
| 905 | + $iteratorNode =& $iteratorStack[ $level ]; |
| 906 | + $out =& $outStack[$level]; |
| 907 | + $index =& $indexStack[$level]; |
| 908 | + |
| 909 | + if ( $iteratorNode instanceof PPNode_DOM ) $iteratorNode = $iteratorNode->node; |
| 910 | + |
| 911 | + if ( is_array( $iteratorNode ) ) { |
| 912 | + if ( $index >= count( $iteratorNode ) ) { |
| 913 | + // All done with this iterator |
| 914 | + $iteratorStack[$level] = false; |
| 915 | + $contextNode = false; |
| 916 | + } else { |
| 917 | + $contextNode = $iteratorNode[$index]; |
| 918 | + $index++; |
| 919 | + } |
| 920 | + } elseif ( $iteratorNode instanceof DOMNodeList ) { |
| 921 | + if ( $index >= $iteratorNode->length ) { |
| 922 | + // All done with this iterator |
| 923 | + $iteratorStack[$level] = false; |
| 924 | + $contextNode = false; |
| 925 | + } else { |
| 926 | + $contextNode = $iteratorNode->item( $index ); |
| 927 | + $index++; |
| 928 | + } |
| 929 | + } else { |
| 930 | + // Copy to $contextNode and then delete from iterator stack, |
| 931 | + // because this is not an iterator but we do have to execute it once |
| 932 | + $contextNode = $iteratorStack[$level]; |
| 933 | + $iteratorStack[$level] = false; |
| 934 | + } |
| 935 | + |
| 936 | + if ( $contextNode instanceof PPNode_DOM ) $contextNode = $contextNode->node; |
| 937 | + |
| 938 | + $newIterator = false; |
| 939 | + |
| 940 | + if ( $contextNode === false ) { |
| 941 | + // nothing to do |
| 942 | + } elseif ( is_string( $contextNode ) ) { |
| 943 | + $out .= $contextNode; |
| 944 | + } elseif ( is_array( $contextNode ) || $contextNode instanceof DOMNodeList ) { |
| 945 | + $newIterator = $contextNode; |
| 946 | + } elseif ( $contextNode instanceof DOMNode ) { |
| 947 | + if ( $contextNode->nodeType == XML_TEXT_NODE ) { |
| 948 | + $out .= $contextNode->nodeValue; |
| 949 | + } elseif ( $contextNode->nodeName == 'template' ) { |
| 950 | + # Double-brace expansion |
| 951 | + $xpath = new DOMXPath( $contextNode->ownerDocument ); |
| 952 | + $titles = $xpath->query( 'title', $contextNode ); |
| 953 | + $title = $titles->item( 0 ); |
| 954 | + $parts = $xpath->query( 'part', $contextNode ); |
| 955 | + if ( $flags & self::NO_TEMPLATES ) { |
| 956 | + $newIterator = $this->virtualBracketedImplode( '{{', '|', '}}', $title, $parts ); |
| 957 | + } else { |
| 958 | + $lineStart = $contextNode->getAttribute( 'lineStart' ); |
| 959 | + $params = array( |
| 960 | + 'title' => new PPNode_DOM( $title ), |
| 961 | + 'parts' => new PPNode_DOM( $parts ), |
| 962 | + 'lineStart' => $lineStart ); |
| 963 | + $ret = $this->parser->braceSubstitution( $params, $this ); |
| 964 | + if ( isset( $ret['object'] ) ) { |
| 965 | + $newIterator = $ret['object']; |
| 966 | + } else { |
| 967 | + $out .= $ret['text']; |
| 968 | + } |
| 969 | + } |
| 970 | + } elseif ( $contextNode->nodeName == 'tplarg' ) { |
| 971 | + # Triple-brace expansion |
| 972 | + $xpath = new DOMXPath( $contextNode->ownerDocument ); |
| 973 | + $titles = $xpath->query( 'title', $contextNode ); |
| 974 | + $title = $titles->item( 0 ); |
| 975 | + $parts = $xpath->query( 'part', $contextNode ); |
| 976 | + if ( $flags & self::NO_ARGS ) { |
| 977 | + $newIterator = $this->virtualBracketedImplode( '{{{', '|', '}}}', $title, $parts ); |
| 978 | + } else { |
| 979 | + $params = array( |
| 980 | + 'title' => new PPNode_DOM( $title ), |
| 981 | + 'parts' => new PPNode_DOM( $parts ) ); |
| 982 | + $ret = $this->parser->argSubstitution( $params, $this ); |
| 983 | + if ( isset( $ret['object'] ) ) { |
| 984 | + $newIterator = $ret['object']; |
| 985 | + } else { |
| 986 | + $out .= $ret['text']; |
| 987 | + } |
| 988 | + } |
| 989 | + } elseif ( $contextNode->nodeName == 'comment' ) { |
| 990 | + # HTML-style comment |
| 991 | + # Remove it in HTML, pre+remove and STRIP_COMMENTS modes |
| 992 | + if ( $this->parser->ot['html'] |
| 993 | + || ( $this->parser->ot['pre'] && $this->parser->mOptions->getRemoveComments() ) |
| 994 | + || ( $flags & self::STRIP_COMMENTS ) ) |
| 995 | + { |
| 996 | + $out .= ''; |
| 997 | + } |
| 998 | + # Add a strip marker in PST mode so that pstPass2() can run some old-fashioned regexes on the result |
| 999 | + # Not in RECOVER_COMMENTS mode (extractSections) though |
| 1000 | + elseif ( $this->parser->ot['wiki'] && ! ( $flags & self::RECOVER_COMMENTS ) ) { |
| 1001 | + $out .= $this->parser->insertStripItem( $contextNode->textContent ); |
| 1002 | + } |
| 1003 | + # Recover the literal comment in RECOVER_COMMENTS and pre+no-remove |
| 1004 | + else { |
| 1005 | + $out .= $contextNode->textContent; |
| 1006 | + } |
| 1007 | + } elseif ( $contextNode->nodeName == 'ignore' ) { |
| 1008 | + # Output suppression used by <includeonly> etc. |
| 1009 | + # OT_WIKI will only respect <ignore> in substed templates. |
| 1010 | + # The other output types respect it unless NO_IGNORE is set. |
| 1011 | + # extractSections() sets NO_IGNORE and so never respects it. |
| 1012 | + if ( ( !isset( $this->parent ) && $this->parser->ot['wiki'] ) || ( $flags & self::NO_IGNORE ) ) { |
| 1013 | + $out .= $contextNode->textContent; |
| 1014 | + } else { |
| 1015 | + $out .= ''; |
| 1016 | + } |
| 1017 | + } elseif ( $contextNode->nodeName == 'ext' ) { |
| 1018 | + # Extension tag |
| 1019 | + $xpath = new DOMXPath( $contextNode->ownerDocument ); |
| 1020 | + $names = $xpath->query( 'name', $contextNode ); |
| 1021 | + $attrs = $xpath->query( 'attr', $contextNode ); |
| 1022 | + $inners = $xpath->query( 'inner', $contextNode ); |
| 1023 | + $closes = $xpath->query( 'close', $contextNode ); |
| 1024 | + $params = array( |
| 1025 | + 'name' => new PPNode_DOM( $names->item( 0 ) ), |
| 1026 | + 'attr' => $attrs->length > 0 ? new PPNode_DOM( $attrs->item( 0 ) ) : null, |
| 1027 | + 'inner' => $inners->length > 0 ? new PPNode_DOM( $inners->item( 0 ) ) : null, |
| 1028 | + 'close' => $closes->length > 0 ? new PPNode_DOM( $closes->item( 0 ) ) : null, |
| 1029 | + ); |
| 1030 | + $out .= $this->parser->extensionSubstitution( $params, $this ); |
| 1031 | + } elseif ( $contextNode->nodeName == 'h' ) { |
| 1032 | + # Heading |
| 1033 | + $s = $this->expand( $contextNode->childNodes, $flags ); |
| 1034 | + |
| 1035 | + # Insert a heading marker only for <h> children of <root> |
| 1036 | + # This is to stop extractSections from going over multiple tree levels |
| 1037 | + if ( $contextNode->parentNode->nodeName == 'root' |
| 1038 | + && $this->parser->ot['html'] ) |
| 1039 | + { |
| 1040 | + # Insert heading index marker |
| 1041 | + $headingIndex = $contextNode->getAttribute( 'i' ); |
| 1042 | + $titleText = $this->title->getPrefixedDBkey(); |
| 1043 | + $this->parser->mHeadings[] = array( $titleText, $headingIndex ); |
| 1044 | + $serial = count( $this->parser->mHeadings ) - 1; |
| 1045 | + $marker = "{$this->parser->mUniqPrefix}-h-$serial-" . Parser::MARKER_SUFFIX; |
| 1046 | + $count = $contextNode->getAttribute( 'level' ); |
| 1047 | + $s = substr( $s, 0, $count ) . $marker . substr( $s, $count ); |
| 1048 | + $this->parser->mStripState->general->setPair( $marker, '' ); |
| 1049 | + } |
| 1050 | + $out .= $s; |
| 1051 | + } else { |
| 1052 | + # Generic recursive expansion |
| 1053 | + $newIterator = $contextNode->childNodes; |
| 1054 | + } |
| 1055 | + } else { |
| 1056 | + wfProfileOut( __METHOD__ ); |
| 1057 | + throw new MWException( __METHOD__.': Invalid parameter type' ); |
| 1058 | + } |
| 1059 | + |
| 1060 | + if ( $newIterator !== false ) { |
| 1061 | + if ( $newIterator instanceof PPNode_DOM ) { |
| 1062 | + $newIterator = $newIterator->node; |
| 1063 | + } |
| 1064 | + $outStack[] = ''; |
| 1065 | + $iteratorStack[] = $newIterator; |
| 1066 | + $indexStack[] = 0; |
| 1067 | + } elseif ( $iteratorStack[$level] === false ) { |
| 1068 | + // Return accumulated value to parent |
| 1069 | + // With tail recursion |
| 1070 | + while ( $iteratorStack[$level] === false && $level > 0 ) { |
| 1071 | + $outStack[$level - 1] .= $out; |
| 1072 | + array_pop( $outStack ); |
| 1073 | + array_pop( $iteratorStack ); |
| 1074 | + array_pop( $indexStack ); |
| 1075 | + $level--; |
| 1076 | + } |
| 1077 | + } |
| 1078 | + } |
| 1079 | + --$expansionDepth; |
| 1080 | + wfProfileOut( __METHOD__ ); |
| 1081 | + return $outStack[0]; |
| 1082 | + } |
| 1083 | + |
| 1084 | + function implodeWithFlags( $sep, $flags /*, ... */ ) { |
| 1085 | + $args = array_slice( func_get_args(), 2 ); |
| 1086 | + |
| 1087 | + $first = true; |
| 1088 | + $s = ''; |
| 1089 | + foreach ( $args as $root ) { |
| 1090 | + if ( $root instanceof PPNode_DOM ) $root = $root->node; |
| 1091 | + if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) { |
| 1092 | + $root = array( $root ); |
| 1093 | + } |
| 1094 | + foreach ( $root as $node ) { |
| 1095 | + if ( $first ) { |
| 1096 | + $first = false; |
| 1097 | + } else { |
| 1098 | + $s .= $sep; |
| 1099 | + } |
| 1100 | + $s .= $this->expand( $node, $flags ); |
| 1101 | + } |
| 1102 | + } |
| 1103 | + return $s; |
| 1104 | + } |
| 1105 | + |
| 1106 | + /** |
| 1107 | + * Implode with no flags specified |
| 1108 | + * This previously called implodeWithFlags but has now been inlined to reduce stack depth |
| 1109 | + */ |
| 1110 | + function implode( $sep /*, ... */ ) { |
| 1111 | + $args = array_slice( func_get_args(), 1 ); |
| 1112 | + |
| 1113 | + $first = true; |
| 1114 | + $s = ''; |
| 1115 | + foreach ( $args as $root ) { |
| 1116 | + if ( $root instanceof PPNode_DOM ) $root = $root->node; |
| 1117 | + if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) { |
| 1118 | + $root = array( $root ); |
| 1119 | + } |
| 1120 | + foreach ( $root as $node ) { |
| 1121 | + if ( $first ) { |
| 1122 | + $first = false; |
| 1123 | + } else { |
| 1124 | + $s .= $sep; |
| 1125 | + } |
| 1126 | + $s .= $this->expand( $node ); |
| 1127 | + } |
| 1128 | + } |
| 1129 | + return $s; |
| 1130 | + } |
| 1131 | + |
| 1132 | + /** |
| 1133 | + * Makes an object that, when expand()ed, will be the same as one obtained |
| 1134 | + * with implode() |
| 1135 | + */ |
| 1136 | + function virtualImplode( $sep /*, ... */ ) { |
| 1137 | + $args = array_slice( func_get_args(), 1 ); |
| 1138 | + $out = array(); |
| 1139 | + $first = true; |
| 1140 | + if ( $root instanceof PPNode_DOM ) $root = $root->node; |
| 1141 | + |
| 1142 | + foreach ( $args as $root ) { |
| 1143 | + if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) { |
| 1144 | + $root = array( $root ); |
| 1145 | + } |
| 1146 | + foreach ( $root as $node ) { |
| 1147 | + if ( $first ) { |
| 1148 | + $first = false; |
| 1149 | + } else { |
| 1150 | + $out[] = $sep; |
| 1151 | + } |
| 1152 | + $out[] = $node; |
| 1153 | + } |
| 1154 | + } |
| 1155 | + return $out; |
| 1156 | + } |
| 1157 | + |
| 1158 | + /** |
| 1159 | + * Virtual implode with brackets |
| 1160 | + */ |
| 1161 | + function virtualBracketedImplode( $start, $sep, $end /*, ... */ ) { |
| 1162 | + $args = array_slice( func_get_args(), 3 ); |
| 1163 | + $out = array( $start ); |
| 1164 | + $first = true; |
| 1165 | + |
| 1166 | + foreach ( $args as $root ) { |
| 1167 | + if ( $root instanceof PPNode_DOM ) $root = $root->node; |
| 1168 | + if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) { |
| 1169 | + $root = array( $root ); |
| 1170 | + } |
| 1171 | + foreach ( $root as $node ) { |
| 1172 | + if ( $first ) { |
| 1173 | + $first = false; |
| 1174 | + } else { |
| 1175 | + $out[] = $sep; |
| 1176 | + } |
| 1177 | + $out[] = $node; |
| 1178 | + } |
| 1179 | + } |
| 1180 | + $out[] = $end; |
| 1181 | + return $out; |
| 1182 | + } |
| 1183 | + |
| 1184 | + function __toString() { |
| 1185 | + return 'frame{}'; |
| 1186 | + } |
| 1187 | + |
| 1188 | + function getPDBK( $level = false ) { |
| 1189 | + if ( $level === false ) { |
| 1190 | + return $this->title->getPrefixedDBkey(); |
| 1191 | + } else { |
| 1192 | + return isset( $this->titleCache[$level] ) ? $this->titleCache[$level] : false; |
| 1193 | + } |
| 1194 | + } |
| 1195 | + |
| 1196 | + function getArguments() { |
| 1197 | + return array(); |
| 1198 | + } |
| 1199 | + |
| 1200 | + function getNumberedArguments() { |
| 1201 | + return array(); |
| 1202 | + } |
| 1203 | + |
| 1204 | + function getNamedArguments() { |
| 1205 | + return array(); |
| 1206 | + } |
| 1207 | + |
| 1208 | + /** |
| 1209 | + * Returns true if there are no arguments in this frame |
| 1210 | + */ |
| 1211 | + function isEmpty() { |
| 1212 | + return true; |
| 1213 | + } |
| 1214 | + |
| 1215 | + function getArgument( $name ) { |
| 1216 | + return false; |
| 1217 | + } |
| 1218 | + |
| 1219 | + /** |
| 1220 | + * Returns true if the infinite loop check is OK, false if a loop is detected |
| 1221 | + */ |
| 1222 | + function loopCheck( $title ) { |
| 1223 | + return !isset( $this->loopCheckHash[$title->getPrefixedDBkey()] ); |
| 1224 | + } |
| 1225 | + |
| 1226 | + /** |
| 1227 | + * Return true if the frame is a template frame |
| 1228 | + */ |
| 1229 | + function isTemplate() { |
| 1230 | + return false; |
| 1231 | + } |
| 1232 | +} |
| 1233 | + |
| 1234 | +/** |
| 1235 | + * Expansion frame with template arguments |
| 1236 | + * @ingroup Parser |
| 1237 | + */ |
| 1238 | +class PPTemplateFrame_DOM extends PPFrame_DOM { |
| 1239 | + var $numberedArgs, $namedArgs, $parent; |
| 1240 | + var $numberedExpansionCache, $namedExpansionCache; |
| 1241 | + |
| 1242 | + function __construct( $preprocessor, $parent = false, $numberedArgs = array(), $namedArgs = array(), $title = false ) { |
| 1243 | + PPFrame_DOM::__construct( $preprocessor ); |
| 1244 | + $this->parent = $parent; |
| 1245 | + $this->numberedArgs = $numberedArgs; |
| 1246 | + $this->namedArgs = $namedArgs; |
| 1247 | + $this->title = $title; |
| 1248 | + $pdbk = $title ? $title->getPrefixedDBkey() : false; |
| 1249 | + $this->titleCache = $parent->titleCache; |
| 1250 | + $this->titleCache[] = $pdbk; |
| 1251 | + $this->loopCheckHash = /*clone*/ $parent->loopCheckHash; |
| 1252 | + if ( $pdbk !== false ) { |
| 1253 | + $this->loopCheckHash[$pdbk] = true; |
| 1254 | + } |
| 1255 | + $this->depth = $parent->depth + 1; |
| 1256 | + $this->numberedExpansionCache = $this->namedExpansionCache = array(); |
| 1257 | + } |
| 1258 | + |
| 1259 | + function __toString() { |
| 1260 | + $s = 'tplframe{'; |
| 1261 | + $first = true; |
| 1262 | + $args = $this->numberedArgs + $this->namedArgs; |
| 1263 | + foreach ( $args as $name => $value ) { |
| 1264 | + if ( $first ) { |
| 1265 | + $first = false; |
| 1266 | + } else { |
| 1267 | + $s .= ', '; |
| 1268 | + } |
| 1269 | + $s .= "\"$name\":\"" . |
| 1270 | + str_replace( '"', '\\"', $value->ownerDocument->saveXML( $value ) ) . '"'; |
| 1271 | + } |
| 1272 | + $s .= '}'; |
| 1273 | + return $s; |
| 1274 | + } |
| 1275 | + /** |
| 1276 | + * Returns true if there are no arguments in this frame |
| 1277 | + */ |
| 1278 | + function isEmpty() { |
| 1279 | + return !count( $this->numberedArgs ) && !count( $this->namedArgs ); |
| 1280 | + } |
| 1281 | + |
| 1282 | + function getArguments() { |
| 1283 | + $arguments = array(); |
| 1284 | + foreach ( array_merge( |
| 1285 | + array_keys($this->numberedArgs), |
| 1286 | + array_keys($this->namedArgs)) as $key ) { |
| 1287 | + $arguments[$key] = $this->getArgument($key); |
| 1288 | + } |
| 1289 | + return $arguments; |
| 1290 | + } |
| 1291 | + |
| 1292 | + function getNumberedArguments() { |
| 1293 | + $arguments = array(); |
| 1294 | + foreach ( array_keys($this->numberedArgs) as $key ) { |
| 1295 | + $arguments[$key] = $this->getArgument($key); |
| 1296 | + } |
| 1297 | + return $arguments; |
| 1298 | + } |
| 1299 | + |
| 1300 | + function getNamedArguments() { |
| 1301 | + $arguments = array(); |
| 1302 | + foreach ( array_keys($this->namedArgs) as $key ) { |
| 1303 | + $arguments[$key] = $this->getArgument($key); |
| 1304 | + } |
| 1305 | + return $arguments; |
| 1306 | + } |
| 1307 | + |
| 1308 | + function getNumberedArgument( $index ) { |
| 1309 | + if ( !isset( $this->numberedArgs[$index] ) ) { |
| 1310 | + return false; |
| 1311 | + } |
| 1312 | + if ( !isset( $this->numberedExpansionCache[$index] ) ) { |
| 1313 | + # No trimming for unnamed arguments |
| 1314 | + $this->numberedExpansionCache[$index] = $this->parent->expand( $this->numberedArgs[$index], self::STRIP_COMMENTS ); |
| 1315 | + } |
| 1316 | + return $this->numberedExpansionCache[$index]; |
| 1317 | + } |
| 1318 | + |
| 1319 | + function getNamedArgument( $name ) { |
| 1320 | + if ( !isset( $this->namedArgs[$name] ) ) { |
| 1321 | + return false; |
| 1322 | + } |
| 1323 | + if ( !isset( $this->namedExpansionCache[$name] ) ) { |
| 1324 | + # Trim named arguments post-expand, for backwards compatibility |
| 1325 | + $this->namedExpansionCache[$name] = trim( |
| 1326 | + $this->parent->expand( $this->namedArgs[$name], self::STRIP_COMMENTS ) ); |
| 1327 | + } |
| 1328 | + return $this->namedExpansionCache[$name]; |
| 1329 | + } |
| 1330 | + |
| 1331 | + function getArgument( $name ) { |
| 1332 | + $text = $this->getNumberedArgument( $name ); |
| 1333 | + if ( $text === false ) { |
| 1334 | + $text = $this->getNamedArgument( $name ); |
| 1335 | + } |
| 1336 | + return $text; |
| 1337 | + } |
| 1338 | + |
| 1339 | + /** |
| 1340 | + * Return true if the frame is a template frame |
| 1341 | + */ |
| 1342 | + function isTemplate() { |
| 1343 | + return true; |
| 1344 | + } |
| 1345 | +} |
| 1346 | + |
| 1347 | +/** |
| 1348 | + * Expansion frame with custom arguments |
| 1349 | + * @ingroup Parser |
| 1350 | + */ |
| 1351 | +class PPCustomFrame_DOM extends PPFrame_DOM { |
| 1352 | + var $args; |
| 1353 | + |
| 1354 | + function __construct( $preprocessor, $args ) { |
| 1355 | + PPFrame_DOM::__construct( $preprocessor ); |
| 1356 | + $this->args = $args; |
| 1357 | + } |
| 1358 | + |
| 1359 | + function __toString() { |
| 1360 | + $s = 'cstmframe{'; |
| 1361 | + $first = true; |
| 1362 | + foreach ( $this->args as $name => $value ) { |
| 1363 | + if ( $first ) { |
| 1364 | + $first = false; |
| 1365 | + } else { |
| 1366 | + $s .= ', '; |
| 1367 | + } |
| 1368 | + $s .= "\"$name\":\"" . |
| 1369 | + str_replace( '"', '\\"', $value->__toString() ) . '"'; |
| 1370 | + } |
| 1371 | + $s .= '}'; |
| 1372 | + return $s; |
| 1373 | + } |
| 1374 | + |
| 1375 | + function isEmpty() { |
| 1376 | + return !count( $this->args ); |
| 1377 | + } |
| 1378 | + |
| 1379 | + function getArgument( $index ) { |
| 1380 | + if ( !isset( $this->args[$index] ) ) { |
| 1381 | + return false; |
| 1382 | + } |
| 1383 | + return $this->args[$index]; |
| 1384 | + } |
| 1385 | +} |
| 1386 | + |
| 1387 | +/** |
| 1388 | + * @ingroup Parser |
| 1389 | + */ |
| 1390 | +class PPNode_DOM implements PPNode { |
| 1391 | + var $node; |
| 1392 | + |
| 1393 | + function __construct( $node, $xpath = false ) { |
| 1394 | + $this->node = $node; |
| 1395 | + } |
| 1396 | + |
| 1397 | + function __get( $name ) { |
| 1398 | + if ( $name == 'xpath' ) { |
| 1399 | + $this->xpath = new DOMXPath( $this->node->ownerDocument ); |
| 1400 | + } |
| 1401 | + return $this->xpath; |
| 1402 | + } |
| 1403 | + |
| 1404 | + function __toString() { |
| 1405 | + if ( $this->node instanceof DOMNodeList ) { |
| 1406 | + $s = ''; |
| 1407 | + foreach ( $this->node as $node ) { |
| 1408 | + $s .= $node->ownerDocument->saveXML( $node ); |
| 1409 | + } |
| 1410 | + } else { |
| 1411 | + $s = $this->node->ownerDocument->saveXML( $this->node ); |
| 1412 | + } |
| 1413 | + return $s; |
| 1414 | + } |
| 1415 | + |
| 1416 | + function getChildren() { |
| 1417 | + return $this->node->childNodes ? new self( $this->node->childNodes ) : false; |
| 1418 | + } |
| 1419 | + |
| 1420 | + function getFirstChild() { |
| 1421 | + return $this->node->firstChild ? new self( $this->node->firstChild ) : false; |
| 1422 | + } |
| 1423 | + |
| 1424 | + function getNextSibling() { |
| 1425 | + return $this->node->nextSibling ? new self( $this->node->nextSibling ) : false; |
| 1426 | + } |
| 1427 | + |
| 1428 | + function getChildrenOfType( $type ) { |
| 1429 | + return new self( $this->xpath->query( $type, $this->node ) ); |
| 1430 | + } |
| 1431 | + |
| 1432 | + function getLength() { |
| 1433 | + if ( $this->node instanceof DOMNodeList ) { |
| 1434 | + return $this->node->length; |
| 1435 | + } else { |
| 1436 | + return false; |
| 1437 | + } |
| 1438 | + } |
| 1439 | + |
| 1440 | + function item( $i ) { |
| 1441 | + $item = $this->node->item( $i ); |
| 1442 | + return $item ? new self( $item ) : false; |
| 1443 | + } |
| 1444 | + |
| 1445 | + function getName() { |
| 1446 | + if ( $this->node instanceof DOMNodeList ) { |
| 1447 | + return '#nodelist'; |
| 1448 | + } else { |
| 1449 | + return $this->node->nodeName; |
| 1450 | + } |
| 1451 | + } |
| 1452 | + |
| 1453 | + /** |
| 1454 | + * Split a <part> node into an associative array containing: |
| 1455 | + * name PPNode name |
| 1456 | + * index String index |
| 1457 | + * value PPNode value |
| 1458 | + */ |
| 1459 | + function splitArg() { |
| 1460 | + $names = $this->xpath->query( 'name', $this->node ); |
| 1461 | + $values = $this->xpath->query( 'value', $this->node ); |
| 1462 | + if ( !$names->length || !$values->length ) { |
| 1463 | + throw new MWException( 'Invalid brace node passed to ' . __METHOD__ ); |
| 1464 | + } |
| 1465 | + $name = $names->item( 0 ); |
| 1466 | + $index = $name->getAttribute( 'index' ); |
| 1467 | + return array( |
| 1468 | + 'name' => new self( $name ), |
| 1469 | + 'index' => $index, |
| 1470 | + 'value' => new self( $values->item( 0 ) ) ); |
| 1471 | + } |
| 1472 | + |
| 1473 | + /** |
| 1474 | + * Split an <ext> node into an associative array containing name, attr, inner and close |
| 1475 | + * All values in the resulting array are PPNodes. Inner and close are optional. |
| 1476 | + */ |
| 1477 | + function splitExt() { |
| 1478 | + $names = $this->xpath->query( 'name', $this->node ); |
| 1479 | + $attrs = $this->xpath->query( 'attr', $this->node ); |
| 1480 | + $inners = $this->xpath->query( 'inner', $this->node ); |
| 1481 | + $closes = $this->xpath->query( 'close', $this->node ); |
| 1482 | + if ( !$names->length || !$attrs->length ) { |
| 1483 | + throw new MWException( 'Invalid ext node passed to ' . __METHOD__ ); |
| 1484 | + } |
| 1485 | + $parts = array( |
| 1486 | + 'name' => new self( $names->item( 0 ) ), |
| 1487 | + 'attr' => new self( $attrs->item( 0 ) ) ); |
| 1488 | + if ( $inners->length ) { |
| 1489 | + $parts['inner'] = new self( $inners->item( 0 ) ); |
| 1490 | + } |
| 1491 | + if ( $closes->length ) { |
| 1492 | + $parts['close'] = new self( $closes->item( 0 ) ); |
| 1493 | + } |
| 1494 | + return $parts; |
| 1495 | + } |
| 1496 | + |
| 1497 | + /** |
| 1498 | + * Split a <h> node |
| 1499 | + */ |
| 1500 | + function splitHeading() { |
| 1501 | + if ( !$this->nodeName == 'h' ) { |
| 1502 | + throw new MWException( 'Invalid h node passed to ' . __METHOD__ ); |
| 1503 | + } |
| 1504 | + return array( |
| 1505 | + 'i' => $this->node->getAttribute( 'i' ), |
| 1506 | + 'level' => $this->node->getAttribute( 'level' ), |
| 1507 | + 'contents' => $this->getChildren() |
| 1508 | + ); |
| 1509 | + } |
| 1510 | +} |
Property changes on: branches/parser-work/phase3/includes/parser/Preprocessor_DOM.php |
___________________________________________________________________ |
Name: svn:eol-style |
1 | 1511 | + native |
Index: branches/parser-work/phase3/includes/parser/Preprocessor.php |
— | — | @@ -0,0 +1,178 @@ |
| 2 | +<?php |
| 3 | + |
| 4 | +/** |
| 5 | + * @ingroup Parser |
| 6 | + */ |
| 7 | +interface Preprocessor { |
| 8 | + /** Create a new preprocessor object based on an initialised Parser object */ |
| 9 | + function __construct( $parser ); |
| 10 | + |
| 11 | + /** Create a new top-level frame for expansion of a page */ |
| 12 | + function newFrame(); |
| 13 | + |
| 14 | + /** Create a new custom frame for programmatic use of parameter replacement as used in some extensions */ |
| 15 | + function newCustomFrame( $args ); |
| 16 | + |
| 17 | + /** Preprocess text to a PPNode */ |
| 18 | + function preprocessToObj( $text, $flags = 0 ); |
| 19 | +} |
| 20 | + |
| 21 | +/** |
| 22 | + * @ingroup Parser |
| 23 | + */ |
| 24 | +interface PPFrame { |
| 25 | + const NO_ARGS = 1; |
| 26 | + const NO_TEMPLATES = 2; |
| 27 | + const STRIP_COMMENTS = 4; |
| 28 | + const NO_IGNORE = 8; |
| 29 | + const RECOVER_COMMENTS = 16; |
| 30 | + |
| 31 | + const RECOVER_ORIG = 27; // = 1|2|8|16 no constant expression support in PHP yet |
| 32 | + |
| 33 | + /** |
| 34 | + * Create a child frame |
| 35 | + */ |
| 36 | + function newChild( $args = false, $title = false ); |
| 37 | + |
| 38 | + /** |
| 39 | + * Expand a document tree node |
| 40 | + */ |
| 41 | + function expand( $root, $flags = 0 ); |
| 42 | + |
| 43 | + /** |
| 44 | + * Implode with flags for expand() |
| 45 | + */ |
| 46 | + function implodeWithFlags( $sep, $flags /*, ... */ ); |
| 47 | + |
| 48 | + /** |
| 49 | + * Implode with no flags specified |
| 50 | + */ |
| 51 | + function implode( $sep /*, ... */ ); |
| 52 | + |
| 53 | + /** |
| 54 | + * Makes an object that, when expand()ed, will be the same as one obtained |
| 55 | + * with implode() |
| 56 | + */ |
| 57 | + function virtualImplode( $sep /*, ... */ ); |
| 58 | + |
| 59 | + /** |
| 60 | + * Virtual implode with brackets |
| 61 | + */ |
| 62 | + function virtualBracketedImplode( $start, $sep, $end /*, ... */ ); |
| 63 | + |
| 64 | + /** |
| 65 | + * Returns true if there are no arguments in this frame |
| 66 | + */ |
| 67 | + function isEmpty(); |
| 68 | + |
| 69 | + /** |
| 70 | + * Returns all arguments of this frame |
| 71 | + */ |
| 72 | + function getArguments(); |
| 73 | + |
| 74 | + /** |
| 75 | + * Returns all numbered arguments of this frame |
| 76 | + */ |
| 77 | + function getNumberedArguments(); |
| 78 | + |
| 79 | + /** |
| 80 | + * Returns all named arguments of this frame |
| 81 | + */ |
| 82 | + function getNamedArguments(); |
| 83 | + |
| 84 | + /** |
| 85 | + * Get an argument to this frame by name |
| 86 | + */ |
| 87 | + function getArgument( $name ); |
| 88 | + |
| 89 | + /** |
| 90 | + * Returns true if the infinite loop check is OK, false if a loop is detected |
| 91 | + */ |
| 92 | + function loopCheck( $title ); |
| 93 | + |
| 94 | + /** |
| 95 | + * Return true if the frame is a template frame |
| 96 | + */ |
| 97 | + function isTemplate(); |
| 98 | +} |
| 99 | + |
| 100 | +/** |
| 101 | + * There are three types of nodes: |
| 102 | + * * Tree nodes, which have a name and contain other nodes as children |
| 103 | + * * Array nodes, which also contain other nodes but aren't considered part of a tree |
| 104 | + * * Leaf nodes, which contain the actual data |
| 105 | + * |
| 106 | + * This interface provides access to the tree structure and to the contents of array nodes, |
| 107 | + * but it does not provide access to the internal structure of leaf nodes. Access to leaf |
| 108 | + * data is provided via two means: |
| 109 | + * * PPFrame::expand(), which provides expanded text |
| 110 | + * * The PPNode::split*() functions, which provide metadata about certain types of tree node |
| 111 | + * @ingroup Parser |
| 112 | + */ |
| 113 | +interface PPNode { |
| 114 | + /** |
| 115 | + * Get an array-type node containing the children of this node. |
| 116 | + * Returns false if this is not a tree node. |
| 117 | + */ |
| 118 | + function getChildren(); |
| 119 | + |
| 120 | + /** |
| 121 | + * Get the first child of a tree node. False if there isn't one. |
| 122 | + */ |
| 123 | + function getFirstChild(); |
| 124 | + |
| 125 | + /** |
| 126 | + * Get the next sibling of any node. False if there isn't one |
| 127 | + */ |
| 128 | + function getNextSibling(); |
| 129 | + |
| 130 | + /** |
| 131 | + * Get all children of this tree node which have a given name. |
| 132 | + * Returns an array-type node, or false if this is not a tree node. |
| 133 | + */ |
| 134 | + function getChildrenOfType( $type ); |
| 135 | + |
| 136 | + |
| 137 | + /** |
| 138 | + * Returns the length of the array, or false if this is not an array-type node |
| 139 | + */ |
| 140 | + function getLength(); |
| 141 | + |
| 142 | + /** |
| 143 | + * Returns an item of an array-type node |
| 144 | + */ |
| 145 | + function item( $i ); |
| 146 | + |
| 147 | + /** |
| 148 | + * Get the name of this node. The following names are defined here: |
| 149 | + * |
| 150 | + * h A heading node. |
| 151 | + * template A double-brace node. |
| 152 | + * tplarg A triple-brace node. |
| 153 | + * title The first argument to a template or tplarg node. |
| 154 | + * part Subsequent arguments to a template or tplarg node. |
| 155 | + * #nodelist An array-type node |
| 156 | + * |
| 157 | + * The subclass may define various other names for tree and leaf nodes. |
| 158 | + */ |
| 159 | + function getName(); |
| 160 | + |
| 161 | + /** |
| 162 | + * Split a <part> node into an associative array containing: |
| 163 | + * name PPNode name |
| 164 | + * index String index |
| 165 | + * value PPNode value |
| 166 | + */ |
| 167 | + function splitArg(); |
| 168 | + |
| 169 | + /** |
| 170 | + * Split an <ext> node into an associative array containing name, attr, inner and close |
| 171 | + * All values in the resulting array are PPNodes. Inner and close are optional. |
| 172 | + */ |
| 173 | + function splitExt(); |
| 174 | + |
| 175 | + /** |
| 176 | + * Split an <h> node |
| 177 | + */ |
| 178 | + function splitHeading(); |
| 179 | +} |
Property changes on: branches/parser-work/phase3/includes/parser/Preprocessor.php |
___________________________________________________________________ |
Name: svn:eol-style |
1 | 180 | + native |
Index: branches/parser-work/phase3/includes/AutoLoader.php |
— | — | @@ -443,15 +443,35 @@ |
444 | 444 | 'LinkHolderArray' => 'includes/parser/LinkHolderArray.php', |
445 | 445 | 'LinkMarkerReplacer' => 'includes/parser/Parser_LinkHooks.php', |
446 | 446 | 'OnlyIncludeReplacer' => 'includes/parser/Parser.php', |
| 447 | + 'PPCustomFrame_Hash' => 'includes/parser/Preprocessor_Hash.php', |
| 448 | + 'PPCustomFrame_DOM' => 'includes/parser/Preprocessor_DOM.php', |
| 449 | + 'PPDAccum_Hash' => 'includes/parser/Preprocessor_Hash.php', |
| 450 | + 'PPDPart' => 'includes/parser/Preprocessor_DOM.php', |
| 451 | + 'PPDPart_Hash' => 'includes/parser/Preprocessor_Hash.php', |
| 452 | + 'PPDStack' => 'includes/parser/Preprocessor_DOM.php', |
| 453 | + 'PPDStackElement' => 'includes/parser/Preprocessor_DOM.php', |
| 454 | + 'PPDStackElement_Hash' => 'includes/parser/Preprocessor_Hash.php', |
| 455 | + 'PPDStack_Hash' => 'includes/parser/Preprocessor_Hash.php', |
447 | 456 | 'PPFrame' => 'includes/parser/Preprocessor.php', |
448 | | - 'PPTemplateFrame' => 'includes/parser/Preprocessor.php', |
449 | | - 'ParseEngine' => 'includes/parser/ParseEngine.php', |
| 457 | + 'PPFrame_DOM' => 'includes/parser/Preprocessor_DOM.php', |
| 458 | + 'PPFrame_Hash' => 'includes/parser/Preprocessor_Hash.php', |
| 459 | + 'PPNode' => 'includes/parser/Preprocessor.php', |
| 460 | + 'PPNode_DOM' => 'includes/parser/Preprocessor_DOM.php', |
| 461 | + 'PPNode_Hash_Array' => 'includes/parser/Preprocessor_Hash.php', |
| 462 | + 'PPNode_Hash_Attr' => 'includes/parser/Preprocessor_Hash.php', |
| 463 | + 'PPNode_Hash_Text' => 'includes/parser/Preprocessor_Hash.php', |
| 464 | + 'PPNode_Hash_Tree' => 'includes/parser/Preprocessor_Hash.php', |
| 465 | + 'PPTemplateFrame_DOM' => 'includes/parser/Preprocessor_DOM.php', |
| 466 | + 'PPTemplateFrame_Hash' => 'includes/parser/Preprocessor_Hash.php', |
450 | 467 | 'Parser' => 'includes/parser/Parser.php', |
451 | 468 | 'ParserCache' => 'includes/parser/ParserCache.php', |
452 | 469 | 'ParserOptions' => 'includes/parser/ParserOptions.php', |
453 | 470 | 'ParserOutput' => 'includes/parser/ParserOutput.php', |
454 | 471 | 'Parser_DiffTest' => 'includes/parser/Parser_DiffTest.php', |
455 | 472 | 'Parser_LinkHooks' => 'includes/parser/Parser_LinkHooks.php', |
| 473 | + 'Preprocessor' => 'includes/parser/Preprocessor.php', |
| 474 | + 'Preprocessor_DOM' => 'includes/parser/Preprocessor_DOM.php', |
| 475 | + 'Preprocessor_Hash' => 'includes/parser/Preprocessor_Hash.php', |
456 | 476 | 'StripState' => 'includes/parser/Parser.php', |
457 | 477 | 'MWTidy' => 'includes/parser/Tidy.php', |
458 | 478 | |