r28588 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r28587‎ | r28588 | r28589 >
Date:15:07, 17 December 2007
Author:tstarling
Status:old
Tags:
Comment:
* Strip comments early, before template expansion. This mimics the behaviour in the old parser. Added parser tests demonstrating the regression this fixes. The syntactic effect is fairly elegant, with comments taking effect at source level, as expected. The removeHTMLcomments() and preprocessToDom() passes could be merged at a later date.
* No need for comment stripping in Expr.php anymore
* Updated srvus() to roughly account for these changes
* Gave comment handling its own preprocessor tag, and split off comment handling from extensionSubstitution(). This only applies for the non-HTML modes, since in HTML mode, comments are stripped early.
* Strip comments from template argument names (PPFrame::newChild).
Modified paths:
  • /trunk/extensions/ParserFunctions/Expr.php (modified) (history)
  • /trunk/phase3/includes/Parser.php (modified) (history)
  • /trunk/phase3/includes/Parser_OldPP.php (modified) (history)
  • /trunk/phase3/maintenance/parserTests.txt (modified) (history)

Diff [purge]

Index: trunk/phase3/maintenance/parserTests.txt
@@ -33,6 +33,18 @@
3434 blah blah
3535 !! endarticle
3636
 37+!!article
 38+Template:Foo
 39+!!text
 40+FOO
 41+!!endarticle
 42+
 43+!! article
 44+Template:Blank
 45+!! text
 46+!! endarticle
 47+
 48+
3749 ###
3850 ### Basic tests
3951 ###
@@ -278,7 +290,27 @@
279291
280292 !! end
281293
 294+!! test
 295+Comment in template title
 296+!! input
 297+{{f<!---->oo}}
 298+!! result
 299+<p>FOO
 300+</p>
 301+!! end
282302
 303+!! test
 304+Comment on its own line post-expand
 305+!! input
 306+a
 307+{{blank}}<!---->
 308+b
 309+!! result
 310+<p>a
 311+</p><p>b
 312+</p>
 313+!! end
 314+
283315 ###
284316 ### Preformatted text
285317 ###
Index: trunk/phase3/includes/Parser.php
@@ -2693,18 +2693,18 @@
26942694 $endpos = strpos( $text, '-->', $i + 4 );
26952695 if ( $endpos === false ) {
26962696 // Unclosed comment in input, runs to end
2697 - $accum .= htmlspecialchars( substr( $text, $i ) );
 2697+ $inner = substr( $text, $i );
26982698 if ( $this->ot['html'] ) {
26992699 // Close it so later stripping can remove it
2700 - $accum .= htmlspecialchars( '-->' );
 2700+ $inner .= '-->';
27012701 }
 2702+ $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>';
27022703 $i = strlen( $text );
2703 - continue;
 2704+ } else {
 2705+ $inner = substr( $text, $i, $endpos - $i + 3 );
 2706+ $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>';
 2707+ $i = $endpos + 3;
27042708 }
2705 - $accum .= htmlspecialchars( substr( $text, $i, $endpos - $i + 3 ) );
2706 - #$inner = substr( $text, $i + 4, $endpos - $i - 4 );
2707 - #$accum .= '<ext><name>!--</name><inner>' . htmlspecialchars( $inner ) . '</inner></ext>';
2708 - $i = $endpos + 3;
27092709 continue;
27102710 }
27112711 $name = $matches[1];
@@ -3047,6 +3047,12 @@
30483048 throw new MWException( __METHOD__ . ' called using the old argument format' );
30493049 }
30503050
 3051+ # Remove comments
 3052+ # This could theoretically be merged into preprocessToDom()
 3053+ if ( $this->ot['html'] || ( $this->ot['pre'] && $this->mOptions->getRemoveComments() ) ) {
 3054+ $text = Sanitizer::removeHTMLcomments( $text );
 3055+ }
 3056+
30513057 $dom = $this->preprocessToDom( $text );
30523058 $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
30533059 $text = $frame->expand( $dom, $flags );
@@ -3439,6 +3445,12 @@
34403446 $text = strtr( $text, array( '<includeonly>' => '' , '</includeonly>' => '' ) );
34413447 }
34423448
 3449+ # Remove comments
 3450+ # This could theoretically be merged into preprocessToDom()
 3451+ if ( $this->ot['html'] || ( $this->ot['pre'] && $this->mOptions->getRemoveComments() ) ) {
 3452+ $text = Sanitizer::removeHTMLcomments( $text );
 3453+ }
 3454+
34433455 $dom = $this->preprocessToDom( $text );
34443456
34453457 $this->mTplDomCache[ $titleText ] = $dom;
@@ -3626,9 +3638,6 @@
36273639 $marker = "{$this->mUniqPrefix}-$name-" . sprintf('%08X', $n++) . $this->mMarkerSuffix;
36283640
36293641 if ( $this->ot['html'] ) {
3630 - if ( $name == '!--' ) {
3631 - return '';
3632 - }
36333642 $name = strtolower( $name );
36343643
36353644 $params = Sanitizer::decodeTagAttributes( $attrText );
@@ -3659,15 +3668,11 @@
36603669 }
36613670 }
36623671 } else {
3663 - if ( $name == '!--' ) {
3664 - $output = '<!--' . $content . '-->';
 3672+ if ( $content === null ) {
 3673+ $output = "<$name$attrText/>";
36653674 } else {
3666 - if ( $content === null ) {
3667 - $output = "<$name$attrText/>";
3668 - } else {
3669 - $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
3670 - $output = "<$name$attrText>$content$close";
3671 - }
 3675+ $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
 3676+ $output = "<$name$attrText>$content$close";
36723677 }
36733678 }
36743679
@@ -4961,7 +4966,9 @@
49624967 * for "replace", the whole page with the section replaced.
49634968 */
49644969 private function extractSections( $text, $section, $mode, $newText='' ) {
 4970+ global $wgTitle;
49654971 $this->clearState();
 4972+ $this->mTitle = $wgTitle; // not generally used but removes an ugly failure mode
49664973 $this->mOptions = new ParserOptions;
49674974 $this->setOutputType( OT_WIKI );
49684975 $curIndex = 0;
@@ -5179,6 +5186,7 @@
51805187 function srvus( $text ) {
51815188 $text = $this->replaceVariables( $text );
51825189 $text = $this->mStripState->unstripBoth( $text );
 5190+ $text = Sanitizer::removeHTMLtags( $text );
51835191 return $text;
51845192 }
51855193 }
@@ -5252,6 +5260,8 @@
52535261
52545262 const NO_ARGS = 1;
52555263 const NO_TEMPLATES = 2;
 5264+ const STRIP_COMMENTS = 4;
 5265+
52565266 const RECOVER_ORIG = 3;
52575267
52585268 /**
@@ -5287,7 +5297,7 @@
52885298 $name = $nameNodes->item( 0 )->attributes->getNamedItem( 'index' )->textContent;
52895299 } else {
52905300 // Named parameter
5291 - $name = $this->expand( $nameNodes->item( 0 ) );
 5301+ $name = $this->expand( $nameNodes->item( 0 ), PPFrame::STRIP_COMMENTS );
52925302 }
52935303
52945304 $value = $xpath->query( 'value', $arg );
@@ -5355,6 +5365,13 @@
53565366 $params = array( 'title' => $title, 'parts' => $parts, 'text' => 'FIXME' );
53575367 $s = $this->parser->argSubstitution( $params, $this );
53585368 }
 5369+ } elseif ( $root->nodeName == 'comment' ) {
 5370+ # HTML-style comment
 5371+ if ( $flags & self::STRIP_COMMENTS ) {
 5372+ $s = '';
 5373+ } else {
 5374+ $s = $root->textContent;
 5375+ }
53595376 } elseif ( $root->nodeName == 'ext' ) {
53605377 # Extension tag
53615378 $xpath = new DOMXPath( $root->ownerDocument );
Index: trunk/phase3/includes/Parser_OldPP.php
@@ -4918,6 +4918,7 @@
49194919 */
49204920 function srvus( $text ) {
49214921 $text = $this->strip( $text, $this->mStripState );
 4922+ $text = Sanitizer::removeHTMLtags( $text );
49224923 $text = $this->replaceVariables( $text );
49234924 $text = preg_replace( '/<!--MWTEMPLATESECTION.*?-->/', '', $text );
49244925 $text = $this->mStripState->unstripBoth( $text );
Index: trunk/extensions/ParserFunctions/Expr.php
@@ -103,21 +103,13 @@
104104 $operands = array();
105105 $operators = array();
106106
107 - # Remove HTML-style comments
108 - # Not necessary in versions where StringUtils::delimiterReplace didn't exist
109 - if ( is_callable( array( 'StringUtils', 'delimiterReplace' ) ) ) {
110 - $expr = StringUtils::delimiterReplace( '<!--', '-->', '', $expr );
111 - }
112 -
113107 # Unescape inequality operators
114108 $expr = strtr( $expr, array( '&lt;' => '<', '&gt;' => '>' ) );
115109
116 -
117110 $p = 0;
118111 $end = strlen( $expr );
119112 $expecting = 'expression';
120113
121 -
122114 while ( $p < $end ) {
123115 if ( count( $operands ) > $this->maxStackSize || count( $operators ) > $this->maxStackSize ) {
124116 throw new ExprError('stack_exhausted');

Follow-up revisions

RevisionCommit summaryAuthorDate
r29292* Merged comment handling with the main loop of preprocessToDom(). This fixes...tstarling12:39, 5 January 2008

Status & tagging log