Index: trunk/phase3/maintenance/parserTests.txt |
— | — | @@ -5348,11 +5348,10 @@ |
5349 | 5349 | section=1 |
5350 | 5350 | !! input |
5351 | 5351 | ==a== |
5352 | | -==unmarked== <!-- an unmarked section --> |
5353 | | -==b== |
| 5352 | +==b== <!-- --> |
| 5353 | +==c== |
5354 | 5354 | !! result |
5355 | 5355 | ==a== |
5356 | | -==unmarked== <!-- an unmarked section --> |
5357 | 5356 | !! end |
5358 | 5357 | |
5359 | 5358 | !! test |
— | — | @@ -5361,10 +5360,10 @@ |
5362 | 5361 | section=2 |
5363 | 5362 | !! input |
5364 | 5363 | ==a== |
5365 | | -==unmarked== <!-- an unmarked section --> |
5366 | | -==b== |
| 5364 | +==b== <!-- --> |
| 5365 | +==c== |
5367 | 5366 | !! result |
5368 | | -==b== |
| 5367 | +==b== <!-- --> |
5369 | 5368 | !! end |
5370 | 5369 | |
5371 | 5370 | !! test |
— | — | @@ -6712,59 +6711,29 @@ |
6713 | 6712 | !! end |
6714 | 6713 | |
6715 | 6714 | !! test |
6716 | | -HHP1: Heuristics for headings in preprocessor parenthetical structures |
| 6715 | +HHP2.1: Heuristics for headings in preprocessor parenthetical structures |
6717 | 6716 | !! input |
6718 | | -{{foo |
6719 | | -==heading== |
6720 | | -!! result |
6721 | | -<p>{{foo |
6722 | | -</p> |
6723 | | -<a name="heading"></a><h2><span class="editsection">[<a href="https://www.mediawiki.org/index.php?title=Parser_test&action=edit&section=1" title="Edit section: heading">edit</a>]</span> <span class="mw-headline">heading</span></h2> |
6724 | | - |
6725 | | -!! end |
6726 | | - |
6727 | | -!! test |
6728 | | -HHP2: Heuristics for headings in preprocessor parenthetical structures |
6729 | | -!! input |
6730 | 6717 | {{foo| |
6731 | | -==heading== |
| 6718 | +=heading= |
6732 | 6719 | !! result |
6733 | 6720 | <p>{{foo| |
6734 | 6721 | </p> |
6735 | | -<a name="heading"></a><h2> <span class="mw-headline">heading</span></h2> |
| 6722 | +<a name="heading"></a><h1> <span class="mw-headline">heading</span></h1> |
6736 | 6723 | |
6737 | 6724 | !! end |
6738 | 6725 | |
6739 | 6726 | !! test |
6740 | | -HHP3: Heuristics for headings in preprocessor parenthetical structures |
| 6727 | +HHP2.2: Heuristics for headings in preprocessor parenthetical structures |
6741 | 6728 | !! input |
6742 | 6729 | {{foo| |
6743 | | -==heading 1== |
6744 | | -==heading 2== |
| 6730 | +==heading== |
6745 | 6731 | !! result |
6746 | 6732 | <p>{{foo| |
6747 | 6733 | </p> |
6748 | | -<a name="heading_1"></a><h2> <span class="mw-headline">heading 1</span></h2> |
6749 | | -<a name="heading_2"></a><h2><span class="editsection">[<a href="https://www.mediawiki.org/index.php?title=Parser_test&action=edit&section=1" title="Edit section: heading 2">edit</a>]</span> <span class="mw-headline">heading 2</span></h2> |
| 6734 | +<a name="heading"></a><h2><span class="editsection">[<a href="https://www.mediawiki.org/index.php?title=Parser_test&action=edit&section=1" title="Edit section: heading">edit</a>]</span> <span class="mw-headline">heading</span></h2> |
6750 | 6735 | |
6751 | 6736 | !! end |
6752 | 6737 | |
6753 | | -# Note that heading 2 is counted, so heading 3 gets section=2 not section=1 |
6754 | | -!! test |
6755 | | -HHP4: Heuristics for headings in preprocessor parenthetical structures |
6756 | | -!! input |
6757 | | -{{foo| |
6758 | | -==heading 1== |
6759 | | -==heading 2== |
6760 | | -}} |
6761 | | -==heading 3== |
6762 | | -!! result |
6763 | | -<p>FOO |
6764 | | -</p> |
6765 | | -<a name="heading_3"></a><h2><span class="editsection">[<a href="https://www.mediawiki.org/index.php?title=Parser_test&action=edit&section=2" title="Edit section: heading 3">edit</a>]</span> <span class="mw-headline">heading 3</span></h2> |
6766 | | - |
6767 | | -!! end |
6768 | | - |
6769 | 6738 | # |
6770 | 6739 | # |
6771 | 6740 | # |
Index: trunk/phase3/includes/Preprocessor_DOM.php |
— | — | @@ -99,7 +99,7 @@ |
100 | 100 | |
101 | 101 | $stack = new PPDStack; |
102 | 102 | |
103 | | - $searchBase = '[{<'; #} |
| 103 | + $searchBase = "[{<\n"; #} |
104 | 104 | $revText = strrev( $text ); // For fast reverse searches |
105 | 105 | |
106 | 106 | $i = 0; # Input pointer, starts out pointing to a pseudo-newline before the start |
— | — | @@ -148,17 +148,6 @@ |
149 | 149 | if ( $findEquals ) { |
150 | 150 | // First equals will be for the template |
151 | 151 | $search .= '='; |
152 | | - } else { |
153 | | - // Look for headings |
154 | | - // We can't look for headings when $findEquals is true, because the ambiguity |
155 | | - // between template name/value separators and heading starts would be unresolved |
156 | | - // until the closing double-brace is found. This would mean either infinite |
157 | | - // backtrack, or creating and updating two separate tree structures until the |
158 | | - // end of the ambiguity -- one tree structure assuming a heading, and the other |
159 | | - // assuming a template argument. |
160 | | - // |
161 | | - // Easier to just break some section edit links. |
162 | | - $search .= "\n"; |
163 | 152 | } |
164 | 153 | $rule = null; |
165 | 154 | # Output literal section, advance input counter |
— | — | @@ -240,7 +229,7 @@ |
241 | 230 | $wsEnd = $endPos + 2 + strspn( $text, ' ', $endPos + 3 ); |
242 | 231 | // Eat the line if possible |
243 | 232 | // TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at |
244 | | - // the overall start. That's not how Sanitizer::removeHTMLcomments() does it, but |
| 233 | + // the overall start. That's not how Sanitizer::removeHTMLcomments() did it, but |
245 | 234 | // it's a possible beneficial b/c break. |
246 | 235 | if ( $wsStart > 0 && substr( $text, $wsStart - 1, 1 ) == "\n" |
247 | 236 | && substr( $text, $wsEnd + 1, 1 ) == "\n" ) |
— | — | @@ -253,28 +242,24 @@ |
254 | 243 | if ( $wsLength > 0 && substr( $accum, -$wsLength ) === str_repeat( ' ', $wsLength ) ) { |
255 | 244 | $accum = substr( $accum, 0, -$wsLength ); |
256 | 245 | } |
257 | | - // Do a line-start run next time to look for headings after the comment, |
258 | | - // but only if stack->top===false, because headings don't exist at deeper levels. |
259 | | - if ( $stack->top === false ) { |
260 | | - $fakeLineStart = true; |
261 | | - } |
| 246 | + // Do a line-start run next time to look for headings after the comment |
| 247 | + $fakeLineStart = true; |
262 | 248 | } else { |
263 | 249 | // No line to eat, just take the comment itself |
264 | 250 | $startPos = $i; |
265 | 251 | $endPos += 2; |
266 | 252 | } |
267 | 253 | |
268 | | - /* |
269 | 254 | if ( $stack->top ) { |
270 | | - if ( $stack->top->commentEndPos !== false && $stack->top->commentEndPos == $wsStart ) { |
| 255 | + $part = $stack->top->getCurrentPart(); |
| 256 | + if ( isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 ) { |
271 | 257 | // Comments abutting, no change in visual end |
272 | | - $stack->top->commentEndPos = $wsEnd; |
| 258 | + $part->commentEnd = $wsEnd; |
273 | 259 | } else { |
274 | | - $stack->top->visualEndPos = $wsStart; |
275 | | - $stack->top->commentEndPos = $wsEnd; |
| 260 | + $part->visualEnd = $wsStart; |
| 261 | + $part->commentEnd = $endPos; |
276 | 262 | } |
277 | 263 | } |
278 | | - */ |
279 | 264 | $i = $endPos + 1; |
280 | 265 | $inner = substr( $text, $startPos, $endPos - $startPos + 1 ); |
281 | 266 | $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>'; |
— | — | @@ -356,7 +341,11 @@ |
357 | 342 | } |
358 | 343 | |
359 | 344 | $count = strspn( $text, '=', $i, 6 ); |
360 | | - if ( $count > 0 ) { |
| 345 | + if ( $count == 1 && $findEquals ) { |
| 346 | + // DWIM: This looks kind of like a name/value separator |
| 347 | + // Let's let the equals handler have it and break the potential heading |
| 348 | + // This is heuristic, but AFAICT the methods for completely correct disambiguation are very complex. |
| 349 | + } elseif ( $count > 0 ) { |
361 | 350 | $piece = array( |
362 | 351 | 'open' => "\n", |
363 | 352 | 'close' => "\n", |
— | — | @@ -374,23 +363,32 @@ |
375 | 364 | $piece = $stack->top; |
376 | 365 | // A heading must be open, otherwise \n wouldn't have been in the search list |
377 | 366 | assert( $piece->open == "\n" ); |
| 367 | + $part = $piece->getCurrentPart(); |
378 | 368 | // Search back through the input to see if it has a proper close |
379 | 369 | // Do this using the reversed string since the other solutions (end anchor, etc.) are inefficient |
380 | | - $m = false; |
| 370 | + $wsLength = strspn( $revText, " \t", strlen( $text ) - $i ); |
| 371 | + $searchStart = $i - $wsLength; |
| 372 | + if ( isset( $part->commentEnd ) && $searchStart - 1 == $part->commentEnd ) { |
| 373 | + // Comment found at line end |
| 374 | + // Search for equals signs before the comment |
| 375 | + $searchStart = $part->visualEnd; |
| 376 | + $searchStart -= strspn( $revText, " \t", strlen( $text ) - $searchStart ); |
| 377 | + } |
381 | 378 | $count = $piece->count; |
382 | | - if ( preg_match( "/\s*(=+)/A", $revText, $m, 0, strlen( $text ) - $i ) ) { |
383 | | - if ( $i - strlen( $m[0] ) == $piece->startPos ) { |
| 379 | + $equalsLength = strspn( $revText, '=', strlen( $text ) - $searchStart ); |
| 380 | + if ( $equalsLength > 0 ) { |
| 381 | + if ( $i - $equalsLength == $piece->startPos ) { |
384 | 382 | // This is just a single string of equals signs on its own line |
385 | 383 | // Replicate the doHeadings behaviour /={count}(.+)={count}/ |
386 | 384 | // First find out how many equals signs there really are (don't stop at 6) |
387 | | - $count = strlen( $m[1] ); |
| 385 | + $count = $equalsLength; |
388 | 386 | if ( $count < 3 ) { |
389 | 387 | $count = 0; |
390 | 388 | } else { |
391 | 389 | $count = min( 6, intval( ( $count - 1 ) / 2 ) ); |
392 | 390 | } |
393 | 391 | } else { |
394 | | - $count = min( strlen( $m[1] ), $count ); |
| 392 | + $count = min( $equalsLength, $count ); |
395 | 393 | } |
396 | 394 | if ( $count > 0 ) { |
397 | 395 | // Normal match, output <h> |
— | — | @@ -869,13 +867,6 @@ |
870 | 868 | } elseif ( is_array( $contextNode ) || $contextNode instanceof DOMNodeList ) { |
871 | 869 | $newIterator = $contextNode; |
872 | 870 | } elseif ( $contextNode instanceof DOMNode ) { |
873 | | - /* |
874 | | - print str_repeat( ' ', count( debug_backtrace() ) ) . $contextNode->nodeName; |
875 | | - if ( $contextNode->nodeName == 'title' ) { |
876 | | - print ' = ' . $contextNode->textContent; |
877 | | - } |
878 | | - print "<br/>\n"; |
879 | | - */ |
880 | 871 | if ( $contextNode->nodeType == XML_TEXT_NODE ) { |
881 | 872 | $out .= $contextNode->nodeValue; |
882 | 873 | } elseif ( $contextNode->nodeName == 'template' ) { |
Index: trunk/phase3/includes/Parser.php |
— | — | @@ -4815,6 +4815,30 @@ |
4816 | 4816 | } |
4817 | 4817 | return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS ); |
4818 | 4818 | } |
| 4819 | + |
| 4820 | + function markerSkipCallback( $s, $callback ) { |
| 4821 | + $i = 0; |
| 4822 | + $out = ''; |
| 4823 | + while ( $i < strlen( $s ) ) { |
| 4824 | + $markerStart = strpos( $s, $this->mUniqPrefix, $i ); |
| 4825 | + if ( $markerStart === false ) { |
| 4826 | + $out .= call_user_func( $callback, substr( $s, $i ) ); |
| 4827 | + break; |
| 4828 | + } else { |
| 4829 | + $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) ); |
| 4830 | + $markerEnd = strpos( $s, $this->mMarkerSuffix, $markerStart ); |
| 4831 | + if ( $markerEnd === false ) { |
| 4832 | + $out .= substr( $s, $markerStart ); |
| 4833 | + break; |
| 4834 | + } else { |
| 4835 | + $markerEnd += strlen( $this->mMarkerSuffix ); |
| 4836 | + $out .= substr( $s, $markerStart, $markerEnd - $markerStart ); |
| 4837 | + $i = $markerEnd; |
| 4838 | + } |
| 4839 | + } |
| 4840 | + } |
| 4841 | + return $out; |
| 4842 | + } |
4819 | 4843 | } |
4820 | 4844 | |
4821 | 4845 | /** |
Index: trunk/phase3/includes/CoreParserFunctions.php |
— | — | @@ -51,12 +51,20 @@ |
52 | 52 | |
53 | 53 | static function lc( $parser, $s = '' ) { |
54 | 54 | global $wgContLang; |
55 | | - return $wgContLang->lc( $s ); |
| 55 | + if ( is_callable( array( $parser, 'markerSkipCallback' ) ) ) { |
| 56 | + return $parser->markerSkipCallback( $s, array( $wgContLang, 'lc' ) ); |
| 57 | + } else { |
| 58 | + return $wgContLang->lc( $s ); |
| 59 | + } |
56 | 60 | } |
57 | 61 | |
58 | 62 | static function uc( $parser, $s = '' ) { |
59 | 63 | global $wgContLang; |
60 | | - return $wgContLang->uc( $s ); |
| 64 | + if ( is_callable( array( $parser, 'markerSkipCallback' ) ) ) { |
| 65 | + return $parser->markerSkipCallback( $s, array( $wgContLang, 'uc' ) ); |
| 66 | + } else { |
| 67 | + return $wgContLang->uc( $s ); |
| 68 | + } |
61 | 69 | } |
62 | 70 | |
63 | 71 | static function localurl( $parser, $s = '', $arg = null ) { return self::urlFunction( 'getLocalURL', $s, $arg ); } |