r112465 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r112464‎ | r112465 | r112466 >
Date:01:10, 27 February 2012
Author:wikinaut
Status:reverted
Tags:gerritmigration 
Comment:
fix for ultra bug 30028 . The RSS extension can parse RSS and ATOM feeds of different flavours. The php xml dom xpath query uses now a namespace-safe method to find all elements like item (RSS, RDF) or entry (ATOM). Further fixed a hidden problem when the feed url was redirecting, this threw the Cannot parse RSS for XML error, which is now history. Introduced a new parameter wgRSSUrlNumberOfAllowedRedirects which defaults to zero, i.e. no redirects are allowed by default. See Manual page
Modified paths:
  • /trunk/extensions/RSS/RELEASE-NOTES (modified) (history)
  • /trunk/extensions/RSS/RSS.i18n.php (modified) (history)
  • /trunk/extensions/RSS/RSS.php (modified) (history)
  • /trunk/extensions/RSS/RSSData.php (modified) (history)
  • /trunk/extensions/RSS/RSSHooks.php (modified) (history)
  • /trunk/extensions/RSS/RSSParser.php (modified) (history)

Diff [purge]

Index: trunk/extensions/RSS/RSSHooks.php
@@ -21,19 +21,24 @@
2222 * @param $frame PPFrame parser context
2323 */
2424 static function renderRss( $input, $args, $parser, $frame ) {
25 - global $wgRSSCacheAge, $wgRSSCacheCompare, $wgRSSNamespaces, $wgRSSUrlWhitelist;
 25+ global $wgRSSCacheAge, $wgRSSCacheCompare, $wgRSSNamespaces,
 26+ $wgRSSUrlWhitelist,$wgRSSAllowedFeeds;
2627
2728 if ( is_array( $wgRSSNamespaces ) && count( $wgRSSNamespaces ) ) {
2829 $ns = $parser->getTitle()->getNamespace();
2930 $checkNS = array_flip( $wgRSSNamespaces );
3031
3132 if( !isset( $checkNS[$ns] ) ) {
32 - return wfMsg( 'rss-ns-permission' );
 33+ return RSSUtils::RSSError( 'rss-ns-permission' );
3334 }
3435 }
3536
3637 switch ( true ) {
3738
 39+ case ( isset( $wgRSSAllowedFeeds ) ):
 40+ return RSSUtils::RSSError( 'rss-deprecated-wgrssallowedfeeds-found' );
 41+ break;
 42+
3843 # disallow because there is no whitelist or empty whitelist
3944 case ( !isset( $wgRSSUrlWhitelist )
4045 || !is_array( $wgRSSUrlWhitelist )
@@ -59,7 +64,7 @@
6065 }
6166
6267 if ( !Http::isValidURI( $input ) ) {
63 - return wfMsg( 'rss-invalid-url', htmlspecialchars( $input ) );
 68+ return RSSutils::RSSError( 'rss-invalid-url', htmlspecialchars( $input ) );
6469 }
6570 if ( $wgRSSCacheCompare ) {
6671 $timeout = $wgRSSCacheCompare;
@@ -79,7 +84,7 @@
8085 }
8186
8287 if ( !is_object( $rss->rss ) || !is_array( $rss->rss->items ) ) {
83 - return wfMsg( 'rss-empty', htmlspecialchars( $input ) );
 88+ return RSSUtils::RSSError( 'rss-empty', htmlspecialchars( $input ) );
8489 }
8590
8691 return $rss->renderFeed( $parser, $frame );
Index: trunk/extensions/RSS/RELEASE-NOTES
@@ -10,13 +10,20 @@
1111 coming in. Then you could abort cleanly once it's gotten too much
1212 (otherwise using the defaults - PHP will abort the entire program when your
1313 memory usage gets too high)
14 -* bug 30028 "Error parsing XML for RSS" - improve and harden Extension:RSS when
15 - parsing differently flavoured RSS feeds and ATOM feeds
1614
 15+=== Version 2.10 2012-02-27 ===
 16+* final solution of bug 30028 "Error parsing XML for RSS" - improve and harden
 17+ Extension:RSS when parsing differently flavoured RSS feeds and ATOM feeds
 18+* new parameter $wgRSSUrlNumberOfAllowedRedirects (default = 0)
 19+ Some feed urls redirect. The new RSS version can deal with redirects,
 20+ but it must be expressly enabled. For example, you can set
 21+ $wgRSSUrlNumberOfAllowedRedirects = 1;
 22+
1723 === Version 2.01 2012-02-24 ===
1824 * "summary" element of ATOM feed items are shown
1925 which is handled like "description" element of RSS
2026 * handling of basic HTML layout tags <p> <br> <b> <i> <u> <s> in item description
 27+
2128 === Version 2.00 2012-02-24 ===
2229 * first version which can parse RSS and at least some ATOM feeds
2330 partial solution of bug 30028 "Error parsing XML for RSS" - improve and harden
Index: trunk/extensions/RSS/RSSData.php
@@ -16,26 +16,13 @@
1717 }
1818 $xpath = new DOMXPath( $xml );
1919
20 - // register namespace as below, and apply a regex to the expression
21 - // http://de3.php.net/manual/en/domxpath.query.php#103461
22 - $namespaceURI = $xml->lookupnamespaceURI( NULL );
 20+ // namespace-safe method to find all elements
 21+ $items = $xpath->query( "//*[local-name() = 'item']" );
2322
24 - if ( ( null !== $namespaceURI ) ) {
25 - $defaultNS = "defaultNS";
26 - $xpath->registerNamespace( $defaultNS, $namespaceURI );
27 - $defaultNS = "defaultNS:";
28 - } else {
29 - $defaultNS = "";
 23+ if ( $items->length == 0 ) {
 24+ $items = $xpath->query( "//*[local-name() = 'entry']" );
3025 }
3126
32 - // is it an RSS feed ?
33 - $items = $xpath->query( $this->namespacePrefixedQuery( "/rss/channel/item", $defaultNS ) );
34 -
35 - if ( $items->length === 0 ) {
36 - // or is it an ATOM feed ?
37 - $items = $xpath->query( $this->namespacePrefixedQuery( "/feed/entry", $defaultNS ) );
38 - }
39 -
4027 if( $items->length !== 0 ) {
4128 foreach ( $items as $item ) {
4229 $bit = array();
@@ -61,14 +48,6 @@
6249 }
6350 }
6451
65 - protected function namespacePrefixedQuery( $query, $namespace = "" ) {
66 - if ( $namespace !== "" ) {
67 - $ret = preg_replace( '#(::|/\s*|\A)(?![/@].+?|[a-z\-]+::)#', '$1' . $namespace . '$2', $query );
68 - } else {
69 - $ret = $query;
70 - }
71 - return $ret;
72 - }
7352 /**
7453 * Return a string that will be used to map RSS elements that
7554 * contain similar data (e.g. dc:date, date, and pubDate) to the
Index: trunk/extensions/RSS/RSSParser.php
@@ -218,7 +218,8 @@
219219 * @return Status object
220220 */
221221 protected function fetchRemote( $key, array $headers = array()) {
222 - global $wgRSSFetchTimeout, $wgRSSUserAgent, $wgRSSProxy;
 222+ global $wgRSSFetchTimeout, $wgRSSUserAgent, $wgRSSProxy,
 223+ $wgRSSUrlNumberOfAllowedRedirects;
223224
224225 if ( $this->etag ) {
225226 wfDebugLog( 'RSS', 'Used etag: ' . $this->etag );
@@ -244,16 +245,54 @@
245246 */
246247
247248 $url = $this->url;
248 - $noProxy = false;
 249+ $noProxy = !isset( $wgRSSProxy );
249250
250251 // Example for disabling proxy use for certain urls
251252 // $noProxy = preg_match( '!\.internal\.example\.com$!i', parse_url( $url, PHP_URL_HOST ) );
252 -
 253+
 254+ /**
 255+ * Copied from HttpFunctions.php
 256+ * Perform an HTTP request
 257+ *
 258+ * @param $method String: HTTP method. Usually GET/POST
 259+ * @param $url String: full URL to act on. If protocol-relative, will be expanded to an http:// URL
 260+ * @param $options Array: options to pass to MWHttpRequest object.
 261+ * Possible keys for the array:
 262+ * - timeout Timeout length in seconds
 263+ * - postData An array of key-value pairs or a url-encoded form data
 264+ * - proxy The proxy to use.
 265+ * Otherwise it will use $wgHTTPProxy (if set)
 266+ * Otherwise it will use the environment variable "http_proxy" (if set)
 267+ * - noProxy Don't use any proxy at all. Takes precedence over proxy value(s).
 268+ * - sslVerifyHost (curl only) Verify hostname against certificate
 269+ * - sslVerifyCert (curl only) Verify SSL certificate
 270+ * - caInfo (curl only) Provide CA information
 271+ * - maxRedirects Maximum number of redirects to follow (defaults to 5)
 272+ * - followRedirects Whether to follow redirects (defaults to false).
 273+ * Note: this should only be used when the target URL is trusted,
 274+ * to avoid attacks on intranet services accessible by HTTP.
 275+ * - userAgent A user agent, if you want to override the default
 276+ * MediaWiki/$wgVersion
 277+ * @return Mixed: (bool)false on failure or a string on success
 278+ */
 279+
 280+ if ( isset( $wgRSSUrlNumberOfAllowedRedirects )
 281+ && is_numeric( $wgRSSUrlNumberOfAllowedRedirects ) ) {
 282+ $maxRedirects = $wgRSSUrlNumberOfAllowedRedirects;
 283+ } else {
 284+ $maxRedirects = 0;
 285+ }
 286+
 287+ // we set followRedirects intentionally to true to see error messages
 288+ // in cases where the maximum number of redirects is reached
253289 $client = HttpRequest::factory( $url,
254290 array(
255 - 'timeout' => $wgRSSFetchTimeout,
256 - 'proxy' => $wgRSSProxy,
257 - 'noProxy' => $noProxy,
 291+ 'timeout' => $wgRSSFetchTimeout,
 292+ 'followRedirects' => true,
 293+ 'maxRedirects' => $maxRedirects,
 294+ 'proxy' => $wgRSSProxy,
 295+ 'noProxy' => $noProxy,
 296+ 'userAgent' => $wgRSSUserAgent,
258297 )
259298 );
260299
@@ -506,8 +545,8 @@
507546 *
508547 * @param $text String: the text to examine
509548 * @param $filterType String: "filterOut" to check for matches in the
510 - * filterOut member list.
511 - * Otherwise, uses the filter member list.
 549+ * filterOut member list.
 550+ * Otherwise, uses the filter member list.
512551 * @return Boolean: decision to filter or not.
513552 */
514553 protected function filter( $text, $filterType ) {
@@ -591,7 +630,7 @@
592631 * @param String|Array $param Error parameter (or parameters)
593632 * @return String Html that is the error.
594633 */
595 - public static function RSSError( $errorMessageName, $param ) {
 634+ public static function RSSError( $errorMessageName, $param = false ) {
596635
597636 // Anything from a parser tag should use Content lang for message,
598637 // since the cache doesn't vary by user language: do not use wfMsgForContent but wfMsgForContent
Index: trunk/extensions/RSS/RSS.i18n.php
@@ -22,6 +22,7 @@
2323 'rss-ns-permission' => 'RSS is not allowed in this namespace',
2424 'rss-url-is-not-whitelisted' => '"$1" is not in the whitelist of allowed feeds. {{PLURAL:$3|$2 is the only allowed feed|The allowed feeds are as follows: $2}}.',
2525 'rss-empty-whitelist' => '"$1" is not in the whitelist of allowed feeds. There are no allowed feed URLs in the whitelist.',
 26+ 'rss-deprecated-wgrssallowedfeeds-found' => 'The deprecated variable $wgRSSAllowedFeeds has been detected. Since RSS version 2.0 this variable has to be replaced by $wgRSSUrlWhitelist as described in the manual page Extension:RSS.',
2627 'rss-item' => '{{$1 | title = {{{title}}} | link = {{{link}}} | date = {{{date}}} | author = {{{author}}} | description = {{{description}}} }}',
2728 'rss-feed' => "<!-- the following are two alternative templates. The first is the basic default template for feeds -->; '''<span class='plainlinks'>[{{{link}}} {{{title}}}]</span>'''
2829 : {{{description}}}
Index: trunk/extensions/RSS/RSS.php
@@ -4,7 +4,7 @@
55 *
66 * @file
77 * @ingroup Extensions
8 - * @version 2.01
 8+ * @version 2.10
99 * @author mutante, Daniel Kinzler, Rdb, Mafs, Thomas Gries, Alxndr, Chris Reigrut, K001
1010 * @author Kellan Elliott-McCrea <kellan@protest.net> -- author of MagpieRSS
1111 * @author Jeroen De Dauw
@@ -14,7 +14,7 @@
1515 * @link http://www.mediawiki.org/wiki/Extension:RSS Documentation
1616 */
1717
18 -define( "EXTENSION_RSS_VERSION", "2.01 20120224" );
 18+define( "EXTENSION_RSS_VERSION", "2.10 20120227" );
1919
2020 if ( !defined( 'MEDIAWIKI' ) ) {
2121 die( "This is not a valid entry point.\n" );
@@ -52,7 +52,7 @@
5353 // for debugging set $wgRSSCacheCompare = 1;
5454 $wgRSSCacheCompare = false;
5555
56 -// 5 second timeout
 56+// 15 second timeout
5757 $wgRSSFetchTimeout = 15;
5858
5959 // Ignore the RSS tag in all but the namespaces listed here.
@@ -77,6 +77,11 @@
7878 // include "*" if you expressly want to allow all urls (you should not do this)
7979 // $wgRSSUrlWhitelist = array( "*" );
8080
 81+// Maximum number of redirects to follow (defaults to 0)
 82+// Note: this should only be used when the target URLs are trusted,
 83+// to avoid attacks on intranet services accessible by HTTP.
 84+$wgRSSUrlNumberOfAllowedRedirects = 0;
 85+
8186 // Agent to use for fetching feeds
8287 $wgRSSUserAgent = "MediaWikiRSS/" . strtok( EXTENSION_RSS_VERSION, " " ) . " (+http://www.mediawiki.org/wiki/Extension:RSS) / MediaWiki RSS extension";
8388
@@ -89,4 +94,4 @@
9095 // limit the number of characters in the item description
9196 // or set to false for unlimited length.
9297 // $wgRSSItemMaxLength = false;
93 -// $wgRSSItemMaxLength = 100;
 98+$wgRSSItemMaxLength = 200;

Follow-up revisions

RevisionCommit summaryAuthorDate
r114390Revert r111347, r111348, r111350, r111351, r111515, r111816, r112243, r112251......catrope18:40, 21 March 2012

Status & tagging log