Index: trunk/extensions/RSS/RSSParser.php |
— | — | @@ -13,7 +13,6 @@ |
14 | 14 | protected $xml; |
15 | 15 | protected $error; |
16 | 16 | protected $displayFields = array( 'author', 'title', 'encodedContent', 'description' ); |
17 | | - protected $validScheme = array( 'http', 'https', 'ftp' ); |
18 | 17 | |
19 | 18 | public $client; |
20 | 19 | |
— | — | @@ -243,8 +242,6 @@ |
244 | 243 | $output = ""; |
245 | 244 | if ( isset( $parser ) && isset( $frame ) ) { |
246 | 245 | $rendered = $this->itemTemplate; |
247 | | - $validScheme = array_flip( $this->validScheme ); |
248 | | - |
249 | 246 | // $info will only be an XML element name, so we're safe |
250 | 247 | // using it. $item[$info] is handled by the XML parser -- |
251 | 248 | // and that means bad RSS with stuff like |
— | — | @@ -252,15 +249,9 @@ |
253 | 250 | // rogue <script> tags neutered. |
254 | 251 | foreach ( array_keys( $item ) as $info ) { |
255 | 252 | if ( $info != 'link' ) { |
256 | | - $txt = $this->highlightTerms( wfEscapeWikiText( $item[ $info ] ) ); |
| 253 | + $txt = $this->highlightTerms( $this->escapeTemplateParameter( $item[ $info ] ) ); |
257 | 254 | } else { |
258 | | - $url = $item[ $info ]; |
259 | | - $scheme = parse_url( $url, PHP_URL_SCHEME ); |
260 | | - if( isset( $validScheme[$scheme] ) ) { |
261 | | - $txt = $url; |
262 | | - } else { |
263 | | - $txt = wfEscapeWikiText( $url ); |
264 | | - } |
| 255 | + $txt = $this->sanitizeUrl( $item[ $info ] ); |
265 | 256 | } |
266 | 257 | $rendered = str_replace( '{{{' . $info . '}}}', $txt, $rendered ); |
267 | 258 | } |
— | — | @@ -271,6 +262,48 @@ |
272 | 263 | } |
273 | 264 | |
274 | 265 | /** |
| 266 | + * Sanitize a URL for inclusion in wikitext. Escapes characters that have |
| 267 | + * a special meaning in wikitext, replacing them with URL escape codes, so |
| 268 | + * that arbitrary input can be included as a free or bracketed external |
| 269 | + * link and both work and be safe. |
| 270 | + */ |
| 271 | + protected function sanitizeUrl( $url ) { |
| 272 | + # Remove control characters |
| 273 | + $url = preg_replace( '/[\000-\037\177]/', '', $url ); |
| 274 | + # Escape other problematic characters |
| 275 | + $i = 0; |
| 276 | + $out = ''; |
| 277 | + for ( $i = 0; $i < strlen( $url ); $i++ ) { |
| 278 | + $boringLength = strcspn( $url, '<>"[|]\ {', $i ); |
| 279 | + if ( $boringLength ) { |
| 280 | + $out .= substr( $url, $i, $boringLength ); |
| 281 | + $i += $boringLength; |
| 282 | + } |
| 283 | + if ( $i < strlen( $url ) ) { |
| 284 | + $out .= rawurlencode( $url[$i] ); |
| 285 | + } |
| 286 | + } |
| 287 | + return $out; |
| 288 | + } |
| 289 | + |
| 290 | + /** |
| 291 | + * Sanitize user input for inclusion as a template parameter. |
| 292 | + * Unlike in wfEscapeWikiText() as of r77127, this escapes }} in addition |
| 293 | + * to the other kinds of markup, to avoid user input ending a template |
| 294 | + * invocation. |
| 295 | + */ |
| 296 | + protected function escapeTemplateParameter( $text ) { |
| 297 | + $text = str_replace( |
| 298 | + array( '[', '|', ']', '\'', 'ISBN ', |
| 299 | + 'RFC ', '://', "\n=", '{{', '}}' ), |
| 300 | + array( '[', '|', ']', ''', 'ISBN ', |
| 301 | + 'RFC ', '://', "\n=", '{{', '}}' ), |
| 302 | + htmlspecialchars( $text ) |
| 303 | + ); |
| 304 | + return $text; |
| 305 | + } |
| 306 | + |
| 307 | + /** |
275 | 308 | * Parse an HTTP response object into an array of relevant RSS data |
276 | 309 | * |
277 | 310 | * @param $key String: the key to use to store the parsed response in the cache |