r77128 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r77127‎ | r77128 | r77129 >
Date:02:16, 23 November 2010
Author:tstarling
Status:ok
Tags:
Comment:
* Avoid XSS via malicious RSS on wikis with $wgRawHtml = true. Mostly reverts r77031.
* Avoid formatting errors due to RSS descriptions which contain "}}", which was ending the item template prematurely.
Modified paths:
  • /trunk/extensions/RSS/RSSParser.php (modified) (history)

Diff [purge]

Index: trunk/extensions/RSS/RSSParser.php
@@ -13,7 +13,6 @@
1414 protected $xml;
1515 protected $error;
1616 protected $displayFields = array( 'author', 'title', 'encodedContent', 'description' );
17 - protected $validScheme = array( 'http', 'https', 'ftp' );
1817
1918 public $client;
2019
@@ -243,8 +242,6 @@
244243 $output = "";
245244 if ( isset( $parser ) && isset( $frame ) ) {
246245 $rendered = $this->itemTemplate;
247 - $validScheme = array_flip( $this->validScheme );
248 -
249246 // $info will only be an XML element name, so we're safe
250247 // using it. $item[$info] is handled by the XML parser --
251248 // and that means bad RSS with stuff like
@@ -252,15 +249,9 @@
253250 // rogue <script> tags neutered.
254251 foreach ( array_keys( $item ) as $info ) {
255252 if ( $info != 'link' ) {
256 - $txt = $this->highlightTerms( wfEscapeWikiText( $item[ $info ] ) );
 253+ $txt = $this->highlightTerms( $this->escapeTemplateParameter( $item[ $info ] ) );
257254 } else {
258 - $url = $item[ $info ];
259 - $scheme = parse_url( $url, PHP_URL_SCHEME );
260 - if( isset( $validScheme[$scheme] ) ) {
261 - $txt = $url;
262 - } else {
263 - $txt = wfEscapeWikiText( $url );
264 - }
 255+ $txt = $this->sanitizeUrl( $item[ $info ] );
265256 }
266257 $rendered = str_replace( '{{{' . $info . '}}}', $txt, $rendered );
267258 }
@@ -271,6 +262,48 @@
272263 }
273264
274265 /**
 266+ * Sanitize a URL for inclusion in wikitext. Escapes characters that have
 267+ * a special meaning in wikitext, replacing them with URL escape codes, so
 268+ * that arbitrary input can be included as a free or bracketed external
 269+ * link and both work and be safe.
 270+ */
 271+ protected function sanitizeUrl( $url ) {
 272+ # Remove control characters
 273+ $url = preg_replace( '/[\000-\037\177]/', '', $url );
 274+ # Escape other problematic characters
 275+ $i = 0;
 276+ $out = '';
 277+ for ( $i = 0; $i < strlen( $url ); $i++ ) {
 278+ $boringLength = strcspn( $url, '<>"[|]\ {', $i );
 279+ if ( $boringLength ) {
 280+ $out .= substr( $url, $i, $boringLength );
 281+ $i += $boringLength;
 282+ }
 283+ if ( $i < strlen( $url ) ) {
 284+ $out .= rawurlencode( $url[$i] );
 285+ }
 286+ }
 287+ return $out;
 288+ }
 289+
 290+ /**
 291+ * Sanitize user input for inclusion as a template parameter.
 292+ * Unlike in wfEscapeWikiText() as of r77127, this escapes }} in addition
 293+ * to the other kinds of markup, to avoid user input ending a template
 294+ * invocation.
 295+ */
 296+ protected function escapeTemplateParameter( $text ) {
 297+ $text = str_replace(
 298+ array( '[', '|', ']', '\'', 'ISBN ',
 299+ 'RFC ', '://', "\n=", '{{', '}}' ),
 300+ array( '&#91;', '&#124;', '&#93;', '&#39;', 'ISBN&#32;',
 301+ 'RFC&#32;', '&#58;//', "\n&#61;", '&#123;&#123;', '&#125;&#125;' ),
 302+ htmlspecialchars( $text )
 303+ );
 304+ return $text;
 305+ }
 306+
 307+ /**
275308 * Parse an HTTP response object into an array of relevant RSS data
276309 *
277310 * @param $key String: the key to use to store the parsed response in the cache

Follow-up revisions

RevisionCommit summaryAuthorDate
r77129In wfEscapeWikiText(), add "}}" to the list of things to escape, for callers ...tstarling02:39, 23 November 2010

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r77031followup r76848 by using wfEscapeWikiText() on all bits except links. Escape...mah21:02, 19 November 2010

Status & tagging log