r114231 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r114230‎ | r114231 | r114232 >
Date:04:39, 20 March 2012
Author:tstarling
Status:ok (Comments)
Tags:
Comment:
Fixed a few "strip tag exposed" bugs.
* Introduced Parser::killMarkers() based on the concept from StringFunctions. Used it in cases where markerStripCallback() doesn't make sense semantically, namely grammar, padleft, padright and anchorencode. Used markerStripCallback() in other cases.
* Changed headline unstrip order as suggested by P.Copp on bug 18295
* In CPF::lc() and CPF::uc(), removed the is_callable(). This was a temporary testing hack committed by me in r30109, which allowed me to do differential testing against a copy of the parser from before that revision.
Modified paths:
  • /trunk/phase3/RELEASE-NOTES-1.19 (modified) (history)
  • /trunk/phase3/includes/parser/CoreParserFunctions.php (modified) (history)
  • /trunk/phase3/includes/parser/Parser.php (modified) (history)
  • /trunk/phase3/includes/parser/StripState.php (modified) (history)
  • /trunk/phase3/tests/parser/parserTests.txt (modified) (history)

Diff [purge]

Index: trunk/phase3/RELEASE-NOTES-1.19
@@ -27,6 +27,10 @@
2828 * (bug 35303) Proxy and DNS blacklist blocking works again
2929 * (bug 35294) jquery.byteLimit shouldn't set element specific variables outside
3030 the "return this.each" loop.
 31+* (bug 21054) Remove or skip strip markers from tag hooks like <nowiki> in
 32+ core parser functions which operate on strings, such as formatnum.
 33+* (bug 18295) Don't expose strip markers when a tag appears inside a link
 34+ inside a heading.
3135
3236 === Configuration changes in 1.19 ===
3337 * Removed SkinTemplateSetupPageCss hook; use BeforePageDisplay instead.
Index: trunk/phase3/tests/parser/parserTests.txt
@@ -9096,6 +9096,96 @@
90979097
90989098 !! end
90999099
 9100+!! test
 9101+Strip marker in urlencode
 9102+!! input
 9103+{{urlencode:x<nowiki/>y}}
 9104+{{urlencode:x<nowiki/>y|wiki}}
 9105+{{urlencode:x<nowiki/>y|path}}
 9106+!! result
 9107+<p>xy
 9108+xy
 9109+xy
 9110+</p>
 9111+!! end
 9112+
 9113+!! test
 9114+Strip marker in lc
 9115+!! input
 9116+{{lc:x<nowiki/>y}}
 9117+!! result
 9118+<p>xy
 9119+</p>
 9120+!! end
 9121+
 9122+!! test
 9123+Strip marker in uc
 9124+!! input
 9125+{{uc:x<nowiki/>y}}
 9126+!! result
 9127+<p>XY
 9128+</p>
 9129+!! end
 9130+
 9131+!! test
 9132+Strip marker in formatNum
 9133+!! input
 9134+{{formatnum:1<nowiki/>2}}
 9135+{{formatnum:1<nowiki/>2|R}}
 9136+!! result
 9137+<p>12
 9138+12
 9139+</p>
 9140+!! end
 9141+
 9142+!! test
 9143+Strip marker in grammar
 9144+!! options
 9145+language=fi
 9146+!! input
 9147+{{grammar:elative|foo<nowiki/>bar}}
 9148+!! result
 9149+<p>foobarista
 9150+</p>
 9151+!! end
 9152+
 9153+!! test
 9154+Strip marker in padleft
 9155+!! input
 9156+{{padleft:|2|x<nowiki/>y}}
 9157+!! result
 9158+<p>xy
 9159+</p>
 9160+!! end
 9161+
 9162+!! test
 9163+Strip marker in padright
 9164+!! input
 9165+{{padright:|2|x<nowiki/>y}}
 9166+!! result
 9167+<p>xy
 9168+</p>
 9169+!! end
 9170+
 9171+!! test
 9172+Strip marker in anchorencode
 9173+!! input
 9174+{{anchorencode:x<nowiki/>y}}
 9175+!! result
 9176+<p>xy
 9177+</p>
 9178+!! end
 9179+
 9180+!! test
 9181+nowiki inside link inside heading (bug 18295)
 9182+!! input
 9183+==[[foo|x<nowiki>y</nowiki>z]]==
 9184+!! result
 9185+<h2><span class="editsection">[<a href="https://www.mediawiki.org/index.php?title=Parser_test&amp;action=edit&amp;section=1" title="Edit section: xyz">edit</a>]</span> <span class="mw-headline" id="xyz"><a href="https://www.mediawiki.org/index.php?title=Foo&amp;action=edit&amp;redlink=1" class="new" title="Foo (page does not exist)">xyz</a></span></h2>
 9186+
 9187+!! end
 9188+
 9189+
91009190 TODO:
91019191 more images
91029192 more tables
Index: trunk/phase3/includes/parser/Parser.php
@@ -4069,15 +4069,17 @@
40704070 }
40714071
40724072 # The safe header is a version of the header text safe to use for links
4073 - # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4074 - $safeHeadline = $this->mStripState->unstripBoth( $headline );
40754073
40764074 # Remove link placeholders by the link text.
40774075 # <!--LINK number-->
40784076 # turns into
40794077 # link text with suffix
4080 - $safeHeadline = $this->replaceLinkHoldersText( $safeHeadline );
 4078+ # Do this before unstrip since link text can contain strip markers
 4079+ $safeHeadline = $this->replaceLinkHoldersText( $headline );
40814080
 4081+ # Avoid insertion of weird stuff like <math> by expanding the relevant sections
 4082+ $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
 4083+
40824084 # Strip out HTML (first regex removes any tag not allowed)
40834085 # Allowed tags are <sup> and <sub> (bug 8393), <i> (bug 26375) and <b> (r105284)
40844086 # We strip any parameter from accepted tags (second regex)
@@ -5647,6 +5649,16 @@
56485650 }
56495651
56505652 /**
 5653+ * Remove any strip markers found in the given text.
 5654+ *
 5655+ * @param $text Input string
 5656+ * @return string
 5657+ */
 5658+ function killMarkers( $text ) {
 5659+ return $this->mStripState->killMarkers( $text );
 5660+ }
 5661+
 5662+ /**
56515663 * Save the parser state required to convert the given half-parsed text to
56525664 * HTML. "Half-parsed" in this context means the output of
56535665 * recursiveTagParse() or internalParse(). This output has strip markers
Index: trunk/phase3/includes/parser/CoreParserFunctions.php
@@ -165,17 +165,21 @@
166166
167167 // Encode as though it's a wiki page, '_' for ' '.
168168 case 'url_wiki':
169 - return wfUrlencode( str_replace( ' ', '_', $s ) );
 169+ $func = 'wfUrlencode';
 170+ $s = str_replace( ' ', '_', $s );
 171+ break;
170172
171173 // Encode for an HTTP Path, '%20' for ' '.
172174 case 'url_path':
173 - return rawurlencode( $s );
 175+ $func = 'rawurlencode';
 176+ break;
174177
175178 // Encode for HTTP query, '+' for ' '.
176179 case 'url_query':
177180 default:
178 - return urlencode( $s );
 181+ $func = 'urlencode';
179182 }
 183+ return $parser->markerSkipCallback( $s, $func );
180184 }
181185
182186 static function lcfirst( $parser, $s = '' ) {
@@ -195,11 +199,7 @@
196200 */
197201 static function lc( $parser, $s = '' ) {
198202 global $wgContLang;
199 - if ( is_callable( array( $parser, 'markerSkipCallback' ) ) ) {
200 - return $parser->markerSkipCallback( $s, array( $wgContLang, 'lc' ) );
201 - } else {
202 - return $wgContLang->lc( $s );
203 - }
 203+ return $parser->markerSkipCallback( $s, array( $wgContLang, 'lc' ) );
204204 }
205205
206206 /**
@@ -209,11 +209,7 @@
210210 */
211211 static function uc( $parser, $s = '' ) {
212212 global $wgContLang;
213 - if ( is_callable( array( $parser, 'markerSkipCallback' ) ) ) {
214 - return $parser->markerSkipCallback( $s, array( $wgContLang, 'uc' ) );
215 - } else {
216 - return $wgContLang->uc( $s );
217 - }
 213+ return $parser->markerSkipCallback( $s, array( $wgContLang, 'uc' ) );
218214 }
219215
220216 static function localurl( $parser, $s = '', $arg = null ) { return self::urlFunction( 'getLocalURL', $s, $arg ); }
@@ -253,12 +249,13 @@
254250 * @param null $raw
255251 * @return
256252 */
257 - static function formatNum( $parser, $num = '', $raw = null) {
258 - if ( self::israw( $raw ) ) {
259 - return $parser->getFunctionLang()->parseFormattedNumber( $num );
 253+ static function formatnum( $parser, $num = '', $raw = null) {
 254+ if ( self::isRaw( $raw ) ) {
 255+ $func = array( $parser->getFunctionLang(), 'parseFormattedNumber' );
260256 } else {
261 - return $parser->getFunctionLang()->formatNum( $num );
 257+ $func = array( $parser->getFunctionLang(), 'formatNum' );
262258 }
 259+ return $parser->markerSkipCallback( $num, $func );
263260 }
264261
265262 /**
@@ -268,6 +265,7 @@
269266 * @return
270267 */
271268 static function grammar( $parser, $case = '', $word = '' ) {
 269+ $word = $parser->killMarkers( $word );
272270 return $parser->getFunctionLang()->convertGrammar( $word, $case );
273271 }
274272
@@ -637,7 +635,8 @@
638636 * Unicode-safe str_pad with the restriction that $length is forced to be <= 500
639637 * @return string
640638 */
641 - static function pad( $string, $length, $padding = '0', $direction = STR_PAD_RIGHT ) {
 639+ static function pad( $parser, $string, $length, $padding = '0', $direction = STR_PAD_RIGHT ) {
 640+ $padding = $parser->killMarkers( $padding );
642641 $lengthOfPadding = mb_strlen( $padding );
643642 if ( $lengthOfPadding == 0 ) return $string;
644643
@@ -661,11 +660,11 @@
662661 }
663662
664663 static function padleft( $parser, $string = '', $length = 0, $padding = '0' ) {
665 - return self::pad( $string, $length, $padding, STR_PAD_LEFT );
 664+ return self::pad( $parser, $string, $length, $padding, STR_PAD_LEFT );
666665 }
667666
668667 static function padright( $parser, $string = '', $length = 0, $padding = '0' ) {
669 - return self::pad( $string, $length, $padding );
 668+ return self::pad( $parser, $string, $length, $padding );
670669 }
671670
672671 /**
@@ -674,6 +673,7 @@
675674 * @return string
676675 */
677676 static function anchorencode( $parser, $text ) {
 677+ $text = $parser->killMarkers( $text );
678678 return substr( $parser->guessSectionNameFromWikiText( $text ), 1);
679679 }
680680
Index: trunk/phase3/includes/parser/StripState.php
@@ -181,5 +181,15 @@
182182 $key = $m[1];
183183 return "{$this->prefix}{$this->tempMergePrefix}-$key" . Parser::MARKER_SUFFIX;
184184 }
 185+
 186+ /**
 187+ * Remove any strip markers found in the given text.
 188+ *
 189+ * @param $text Input string
 190+ * @return string
 191+ */
 192+ function killMarkers( $text ) {
 193+ return preg_replace( $this->regex, '', $text );
 194+ }
185195 }
186196

Follow-up revisions

RevisionCommit summaryAuthorDate
r114232Better bug reference for r114231.tstarling04:52, 20 March 2012
r114338Backported the bug 22555 part of r114232 and cleaned up RELEASE-NOTES-1.18tstarling00:31, 21 March 2012
r114340Merge r114338 from 1.18: fix for bug 22555: strip markers in padleft etc.tstarling01:02, 21 March 2012
r114346MFT r114231: fix "strip tag exposed" bugststarling05:14, 21 March 2012
r114347MFT r114231: strip marker exposedtstarling05:19, 21 March 2012

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r30109* Make lc and uc parser functions skip strip markers...tstarling09:07, 24 January 2008

Comments

#Comment by MZMcBride (talk | contribs)   19:44, 22 March 2012

This is related to bug 35315.

Status & tagging log