r114346 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r114345‎ | r114346 | r114347 >
Date:05:14, 21 March 2012
Author:tstarling
Status:ok
Tags:
Comment:
MFT r114231: fix "strip tag exposed" bugs
Modified paths:
  • /branches/REL1_19/phase3/RELEASE-NOTES-1.19 (modified) (history)
  • /branches/REL1_19/phase3/includes/parser/CoreParserFunctions.php (modified) (history)
  • /branches/REL1_19/phase3/includes/parser/Parser.php (modified) (history)
  • /branches/REL1_19/phase3/includes/parser/StripState.php (modified) (history)
  • /branches/REL1_19/phase3/tests/parser/parserTests.txt (modified) (history)

Diff [purge]

Index: branches/REL1_19/phase3/RELEASE-NOTES-1.19
@@ -25,6 +25,10 @@
2626 * (bug 31417) New ID mw-content-text around the actual page text, without categories,
2727 contentSub, ... The same div often also contains the class mw-content-ltr/rtl.
2828 * (bug 35303) Proxy and DNS blacklist blocking works again
 29+* (bug 22555) Remove or skip strip markers from tag hooks like <nowiki> in
 30+ core parser functions which operate on strings, such as padleft.
 31+* (bug 18295) Don't expose strip markers when a tag appears inside a link
 32+ inside a heading.
2933
3034 === Configuration changes in 1.19 ===
3135 * Removed SkinTemplateSetupPageCss hook; use BeforePageDisplay instead.
Index: branches/REL1_19/phase3/tests/parser/parserTests.txt
@@ -9086,6 +9086,96 @@
90879087
90889088 !! end
90899089
 9090+!! test
 9091+Strip marker in urlencode
 9092+!! input
 9093+{{urlencode:x<nowiki/>y}}
 9094+{{urlencode:x<nowiki/>y|wiki}}
 9095+{{urlencode:x<nowiki/>y|path}}
 9096+!! result
 9097+<p>xy
 9098+xy
 9099+xy
 9100+</p>
 9101+!! end
 9102+
 9103+!! test
 9104+Strip marker in lc
 9105+!! input
 9106+{{lc:x<nowiki/>y}}
 9107+!! result
 9108+<p>xy
 9109+</p>
 9110+!! end
 9111+
 9112+!! test
 9113+Strip marker in uc
 9114+!! input
 9115+{{uc:x<nowiki/>y}}
 9116+!! result
 9117+<p>XY
 9118+</p>
 9119+!! end
 9120+
 9121+!! test
 9122+Strip marker in formatNum
 9123+!! input
 9124+{{formatnum:1<nowiki/>2}}
 9125+{{formatnum:1<nowiki/>2|R}}
 9126+!! result
 9127+<p>12
 9128+12
 9129+</p>
 9130+!! end
 9131+
 9132+!! test
 9133+Strip marker in grammar
 9134+!! options
 9135+language=fi
 9136+!! input
 9137+{{grammar:elative|foo<nowiki/>bar}}
 9138+!! result
 9139+<p>foobarista
 9140+</p>
 9141+!! end
 9142+
 9143+!! test
 9144+Strip marker in padleft
 9145+!! input
 9146+{{padleft:|2|x<nowiki/>y}}
 9147+!! result
 9148+<p>xy
 9149+</p>
 9150+!! end
 9151+
 9152+!! test
 9153+Strip marker in padright
 9154+!! input
 9155+{{padright:|2|x<nowiki/>y}}
 9156+!! result
 9157+<p>xy
 9158+</p>
 9159+!! end
 9160+
 9161+!! test
 9162+Strip marker in anchorencode
 9163+!! input
 9164+{{anchorencode:x<nowiki/>y}}
 9165+!! result
 9166+<p>xy
 9167+</p>
 9168+!! end
 9169+
 9170+!! test
 9171+nowiki inside link inside heading (bug 18295)
 9172+!! input
 9173+==[[foo|x<nowiki>y</nowiki>z]]==
 9174+!! result
 9175+<h2><span class="editsection">[<a href="https://www.mediawiki.org/index.php?title=Parser_test&amp;action=edit&amp;section=1" title="Edit section: xyz">edit</a>]</span> <span class="mw-headline" id="xyz"><a href="https://www.mediawiki.org/index.php?title=Foo&amp;action=edit&amp;redlink=1" class="new" title="Foo (page does not exist)">xyz</a></span></h2>
 9176+
 9177+!! end
 9178+
 9179+
90909180 TODO:
90919181 more images
90929182 more tables
Index: branches/REL1_19/phase3/includes/parser/Parser.php
@@ -4065,15 +4065,17 @@
40664066 }
40674067
40684068 # The safe header is a version of the header text safe to use for links
4069 - # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4070 - $safeHeadline = $this->mStripState->unstripBoth( $headline );
40714069
40724070 # Remove link placeholders by the link text.
40734071 # <!--LINK number-->
40744072 # turns into
40754073 # link text with suffix
4076 - $safeHeadline = $this->replaceLinkHoldersText( $safeHeadline );
 4074+ # Do this before unstrip since link text can contain strip markers
 4075+ $safeHeadline = $this->replaceLinkHoldersText( $headline );
40774076
 4077+ # Avoid insertion of weird stuff like <math> by expanding the relevant sections
 4078+ $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
 4079+
40784080 # Strip out HTML (first regex removes any tag not allowed)
40794081 # Allowed tags are <sup> and <sub> (bug 8393), <i> (bug 26375) and <b> (r105284)
40804082 # We strip any parameter from accepted tags (second regex)
@@ -5638,6 +5640,16 @@
56395641 }
56405642
56415643 /**
 5644+ * Remove any strip markers found in the given text.
 5645+ *
 5646+ * @param $text Input string
 5647+ * @return string
 5648+ */
 5649+ function killMarkers( $text ) {
 5650+ return $this->mStripState->killMarkers( $text );
 5651+ }
 5652+
 5653+ /**
56425654 * Save the parser state required to convert the given half-parsed text to
56435655 * HTML. "Half-parsed" in this context means the output of
56445656 * recursiveTagParse() or internalParse(). This output has strip markers
Index: branches/REL1_19/phase3/includes/parser/CoreParserFunctions.php
@@ -164,17 +164,21 @@
165165
166166 // Encode as though it's a wiki page, '_' for ' '.
167167 case 'url_wiki':
168 - return wfUrlencode( str_replace( ' ', '_', $s ) );
 168+ $func = 'wfUrlencode';
 169+ $s = str_replace( ' ', '_', $s );
 170+ break;
169171
170172 // Encode for an HTTP Path, '%20' for ' '.
171173 case 'url_path':
172 - return rawurlencode( $s );
 174+ $func = 'rawurlencode';
 175+ break;
173176
174177 // Encode for HTTP query, '+' for ' '.
175178 case 'url_query':
176179 default:
177 - return urlencode( $s );
 180+ $func = 'urlencode';
178181 }
 182+ return $parser->markerSkipCallback( $s, $func );
179183 }
180184
181185 static function lcfirst( $parser, $s = '' ) {
@@ -194,11 +198,7 @@
195199 */
196200 static function lc( $parser, $s = '' ) {
197201 global $wgContLang;
198 - if ( is_callable( array( $parser, 'markerSkipCallback' ) ) ) {
199 - return $parser->markerSkipCallback( $s, array( $wgContLang, 'lc' ) );
200 - } else {
201 - return $wgContLang->lc( $s );
202 - }
 202+ return $parser->markerSkipCallback( $s, array( $wgContLang, 'lc' ) );
203203 }
204204
205205 /**
@@ -208,11 +208,7 @@
209209 */
210210 static function uc( $parser, $s = '' ) {
211211 global $wgContLang;
212 - if ( is_callable( array( $parser, 'markerSkipCallback' ) ) ) {
213 - return $parser->markerSkipCallback( $s, array( $wgContLang, 'uc' ) );
214 - } else {
215 - return $wgContLang->uc( $s );
216 - }
 212+ return $parser->markerSkipCallback( $s, array( $wgContLang, 'uc' ) );
217213 }
218214
219215 static function localurl( $parser, $s = '', $arg = null ) { return self::urlFunction( 'getLocalURL', $s, $arg ); }
@@ -252,12 +248,13 @@
253249 * @param null $raw
254250 * @return
255251 */
256 - static function formatNum( $parser, $num = '', $raw = null) {
257 - if ( self::israw( $raw ) ) {
258 - return $parser->getFunctionLang()->parseFormattedNumber( $num );
 252+ static function formatnum( $parser, $num = '', $raw = null) {
 253+ if ( self::isRaw( $raw ) ) {
 254+ $func = array( $parser->getFunctionLang(), 'parseFormattedNumber' );
259255 } else {
260 - return $parser->getFunctionLang()->formatNum( $num );
 256+ $func = array( $parser->getFunctionLang(), 'formatNum' );
261257 }
 258+ return $parser->markerSkipCallback( $num, $func );
262259 }
263260
264261 /**
@@ -267,6 +264,7 @@
268265 * @return
269266 */
270267 static function grammar( $parser, $case = '', $word = '' ) {
 268+ $word = $parser->killMarkers( $word );
271269 return $parser->getFunctionLang()->convertGrammar( $word, $case );
272270 }
273271
@@ -635,7 +633,8 @@
636634 /**
637635 * Unicode-safe str_pad with the restriction that $length is forced to be <= 500
638636 */
639 - static function pad( $string, $length, $padding = '0', $direction = STR_PAD_RIGHT ) {
 637+ static function pad( $parser, $string, $length, $padding = '0', $direction = STR_PAD_RIGHT ) {
 638+ $padding = $parser->killMarkers( $padding );
640639 $lengthOfPadding = mb_strlen( $padding );
641640 if ( $lengthOfPadding == 0 ) return $string;
642641
@@ -659,11 +658,11 @@
660659 }
661660
662661 static function padleft( $parser, $string = '', $length = 0, $padding = '0' ) {
663 - return self::pad( $string, $length, $padding, STR_PAD_LEFT );
 662+ return self::pad( $parser, $string, $length, $padding, STR_PAD_LEFT );
664663 }
665664
666665 static function padright( $parser, $string = '', $length = 0, $padding = '0' ) {
667 - return self::pad( $string, $length, $padding );
 666+ return self::pad( $parser, $string, $length, $padding );
668667 }
669668
670669 /**
@@ -672,6 +671,7 @@
673672 * @return string
674673 */
675674 static function anchorencode( $parser, $text ) {
 675+ $text = $parser->killMarkers( $text );
676676 return substr( $parser->guessSectionNameFromWikiText( $text ), 1);
677677 }
678678
Index: branches/REL1_19/phase3/includes/parser/StripState.php
@@ -181,5 +181,15 @@
182182 $key = $m[1];
183183 return "{$this->prefix}{$this->tempMergePrefix}-$key" . Parser::MARKER_SUFFIX;
184184 }
 185+
 186+ /**
 187+ * Remove any strip markers found in the given text.
 188+ *
 189+ * @param $text Input string
 190+ * @return string
 191+ */
 192+ function killMarkers( $text ) {
 193+ return preg_replace( $this->regex, '', $text );
 194+ }
185195 }
186196

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r114231Fixed a few "strip tag exposed" bugs....tstarling04:39, 20 March 2012

Status & tagging log