r76053 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r76052‎ | r76053 | r76054 >
Date:23:19, 4 November 2010
Author:mah
Status:ok
Tags:
Comment:
Re Roan's new review on IRC yesterday:
* Add function documentation that I should have written before.
* Create separate RSSParser class to clean up RSS.php
* Create separate RSSHooks class to handle parser initialization and
public interface for parser to use.
* Move User Agent definition to $wgRSSUserAgent from a define.
* Eliminate references to $wgRSSUseGzip (since were using MW's
internal HTTP client, it didn't do anything anyway, ATM).

Re Tim's emailed review (discovered yesterday):
* Switch to DOMXPath::query() from DOMXPath::evaluate() at Tim's
suggestion.
* Move highlighting callback to its own class, RSSHighlighter, so that
it works as a callback without create_function() and other mess.
Modified paths:
  • /trunk/extensions/RSS/RSS.php (modified) (history)
  • /trunk/extensions/RSS/RSSData.php (modified) (history)
  • /trunk/extensions/RSS/RSSHooks.php (added) (history)
  • /trunk/extensions/RSS/RSSParser.php (added) (history)

Diff [purge]

Index: trunk/extensions/RSS/RSSHooks.php
@@ -0,0 +1,46 @@
 2+<?php
 3+class RSSHooks {
 4+ /**
 5+ * Tell the parser how to handle <rss> elements
 6+ * @param Parser Object
 7+ */
 8+ static function parserInit( $parser ) {
 9+ # Install parser hook for <rss> tags
 10+ $parser->setHook( 'rss', array( __CLASS__, 'renderRss' ) );
 11+ return true;
 12+ }
 13+
 14+ /**
 15+ * Static function wrapping RSSParser to handle rendering of RSS elements
 16+ * @param String Text inside the tags.
 17+ * @param Array value associative list of the element attributes and their values.
 18+ * @param Frame parser context
 19+ */
 20+ static function renderRss( $input, $args, $parser, $frame ) {
 21+ global $wgRSSCacheAge, $wgRSSCacheCompare;
 22+
 23+ if ( !HTTP::isValidURI( $input ) ) {
 24+ return wfMsg( 'rss-invalid-url', htmlspecialchars( $input ) );
 25+ }
 26+ if ( $wgRSSCacheCompare ) {
 27+ $timeout = $wgRSSCacheCompare;
 28+ } else {
 29+ $timeout = $wgRSSCacheAge;
 30+ }
 31+ $parser->disableCache();
 32+ /* $parser->getOutput()->updateCacheExpiry( $timeout ); */
 33+
 34+ $rss = new RSSParser( $input, $args );
 35+
 36+ $status = $rss->fetch();
 37+
 38+ # Check for errors.
 39+ if ( !$status->isGood() )
 40+ return wfMsg( 'rss-error', htmlspecialchars( $input), $status->getWikiText() );
 41+
 42+ if ( !$status->isGood() || !is_object( $rss->rss ) || !is_array( $rss->rss->items ) )
 43+ return wfMsg( 'rss-empty', htmlspecialchars( $input ) );
 44+
 45+ return $rss->renderFeed( $parser, $frame );
 46+ }
 47+}
Property changes on: trunk/extensions/RSS/RSSHooks.php
___________________________________________________________________
Added: svn:eol-syle
148 + native
Index: trunk/extensions/RSS/RSSData.php
@@ -4,32 +4,52 @@
55 public $ERROR;
66 public $items;
77
 8+ /**
 9+ * Constructor, takes a DOMDocument and returns an array of parsed items.
 10+ * @param DOMDocument The pre-parsed XML Document
 11+ * @returns Object RSSData object with a member items that is an array of parsed items,
 12+ */
813 function __construct( $xml ) {
914 if ( !( $xml instanceOf DOMDocument ) ) {
10 - return null;
 15+ $this->ERROR = "Not passed DOMDocument object.";
 16+ return;
1117 }
1218 $xpath = new DOMXPath( $xml );
13 - $items = $xpath->evaluate( '/rss/channel/item' );
 19+ $items = $xpath->query( '/rss/channel/item' );
1420
15 - foreach ( $items as $item ) {
16 - $bit = array();
17 - foreach ( $item->childNodes as $n ) {
18 - $name = $this->rssTokenToName( $n->nodeName );
19 - if ( $name != null ) {
20 - /* Because for DOMElements the nodeValue is just
21 - * the text of the containing element, without any
22 - * tags, it makes this a safe, if unattractive,
23 - * value to use. If you want to allow people to
24 - * mark up their RSS, some more precautions are
25 - * needed. */
26 - $bit[$name] = $n->nodeValue;
 21+ if($items->length !== 0) {
 22+ foreach ( $items as $item ) {
 23+ $bit = array();
 24+ foreach ( $item->childNodes as $n ) {
 25+ $name = $this->rssTokenToName( $n->nodeName );
 26+ if ( $name != null ) {
 27+ /* Because for DOMElements the nodeValue is just
 28+ * the text of the containing element, without any
 29+ * tags, it makes this a safe, if unattractive,
 30+ * value to use. If you want to allow people to
 31+ * mark up their RSS, some more precautions are
 32+ * needed. */
 33+ $bit[$name] = $n->nodeValue;
 34+ }
2735 }
 36+ $this->items[] = $bit;
2837 }
29 - $this->items[] = $bit;
 38+ } else {
 39+ $this->ERROR = "No RSS items found.";
 40+ return;
3041 }
3142 }
3243
33 - function rssTokenToName( $n ) {
 44+ /**
 45+ * Return a string that will be used to map RSS elements that
 46+ * contain similar data (e.g. dc:date, date, and pubDate) to the
 47+ * same array key. This works on WordPress feeds as-is, but it
 48+ * probably needs a way to concert dc:date format dates to be the
 49+ * same as pubDate.
 50+ * @param String $elementName Name of the element we have
 51+ * @returns String Name to map it to
 52+ */
 53+ protected function rssTokenToName( $n ) {
3454 switch( $n ) {
3555 case 'dc:date':
3656 return 'date';
Index: trunk/extensions/RSS/RSSParser.php
@@ -0,0 +1,428 @@
 2+<?php
 3+
 4+class RSSParser {
 5+ protected $charset;
 6+ protected $maxheads = 32;
 7+ protected $reversed = false;
 8+ protected $highlight = array();
 9+ protected $filter = array();
 10+ protected $filterOut = array();
 11+ protected $itemTemplate;
 12+ protected $url;
 13+ protected $etag;
 14+ protected $lastModified;
 15+ protected $xml;
 16+ protected $ERROR;
 17+ protected $displayFields = array( 'author', 'title', 'encodedContent', 'description' );
 18+
 19+ public $client;
 20+
 21+ /**
 22+ * Convenience function that takes a space-separated string and returns an array of words
 23+ * @param String list of words
 24+ * @returns Array words found
 25+ */
 26+ private static function explodeOnSpaces( $str ) {
 27+ $found = preg_split( '# +#', $str );
 28+ return is_array( $found ) ? $found : array();
 29+ }
 30+
 31+ /**
 32+ * Take a bit of WikiText that looks like
 33+ * <rss max=5>http://example.com/</rss>
 34+ * and return an object that can produce rendered output.
 35+ */
 36+ function __construct( $url, $args ) {
 37+
 38+ $this->url = $url;
 39+
 40+ # Get charset from argument array
 41+ # FIXME: not used yet
 42+ if ( isset( $args['charset'] ) ) {
 43+ $this->charset = $args['charset'];
 44+ } else {
 45+ global $wgOutputEncoding;
 46+ $this->charset = $wgOutputEncoding;
 47+ }
 48+
 49+ # Get max number of headlines from argument-array
 50+ if ( isset( $args['max'] ) ) {
 51+ $this->maxheads = $args['max'];
 52+ }
 53+
 54+ # Get reverse flag from argument array
 55+ if ( isset( $args['reverse'] ) ) {
 56+ $this->reversed = true;
 57+ }
 58+
 59+ # Get date format from argument array
 60+ # FIXME: not used yet
 61+ if ( isset( $args['date'] ) ) {
 62+ $this->date = $args['date'];
 63+ }
 64+
 65+ # Get highlight terms from argument array
 66+ if ( isset( $args['highlight'] ) ) {
 67+ # mapping to lowercase here so the regex can be case insensitive below.
 68+ $this->highlight = self::explodeOnSpaces( $args['highlight'] );
 69+ }
 70+
 71+ # Get filter terms from argument array
 72+ if ( isset( $args['filter'] ) ) {
 73+ $this->filter = self::explodeOnSpaces( $args['filter'] );
 74+ }
 75+
 76+ if ( isset( $args['filterout'] ) ) {
 77+ $this->filterOut = self::explodeOnSpaces( $args['filterout'] );
 78+
 79+ }
 80+
 81+ if ( isset( $args['template'] ) ) {
 82+ $titleObject = Title::newFromText( $args['template'], NS_TEMPLATE );
 83+ $article = new Article( $titleObject, 0 );
 84+ $this->itemTemplate = $article->fetchContent( );
 85+ } else {
 86+ $this->itemTemplate = wfMsgNoTrans( 'rss-item' );
 87+ }
 88+ }
 89+
 90+ /**
 91+ * Return RSS object for the given URL, maintaining caching.
 92+ *
 93+ * NOTES ON RETRIEVING REMOTE FILES:
 94+ * No attempt will be made to fetch remote files if there is something in cache.
 95+ *
 96+ * NOTES ON FAILED REQUESTS:
 97+ * If there is an HTTP error while fetching an RSS object, the cached version
 98+ * will be returned, if it exists (and if $wgRSSCacheFreshOnly is false)
 99+ *
 100+ * @param $url String: URL of RSS file
 101+ * @return boolean Status object
 102+ */
 103+ function fetch( ) {
 104+ global $wgRSSCacheAge, $wgRSSCacheFreshOnly;
 105+ global $wgRSSCacheDirectory, $wgRSSFetchTimeout;
 106+ global $wgRSSOutputEncoding, $wgRSSInputEncoding;
 107+ global $wgRSSDetectEncoding;
 108+
 109+ if ( !isset( $this->url ) ) {
 110+ return Status::newFatal( 'rss-fetch-nourl' );
 111+ }
 112+
 113+ // Flow
 114+ // 1. check cache
 115+ // 2. if there is a hit, make sure its fresh
 116+ // 3. if cached obj fails freshness check, fetch remote
 117+ // 4. if remote fails, return stale object, or error
 118+ $key = wfMemcKey( $this->url );
 119+ $cachedFeed = $this->loadFromCache( $key );
 120+ if ( $cachedFeed !== false ) {
 121+ wfDebugLog( 'RSS', 'Outputting cached feed for ' . $this->url );
 122+ return Status::newGood();
 123+ }
 124+ wfDebugLog( 'RSS', 'Cache Failed, fetching ' . $this->url. ' from remote.' );
 125+
 126+ $status = $this->fetchRemote( $key );
 127+ return $status;
 128+ }
 129+
 130+ /**
 131+ * Retrieve the URL from the cache
 132+ * @param string $key lookup key to associate with this item
 133+ * @returns boolean
 134+ */
 135+ protected function loadFromCache( $key ) {
 136+ global $wgMemc, $wgRSSCacheCompare;
 137+
 138+ $data = $wgMemc->get( $key );
 139+ if ( $data === false ) {
 140+ return false;
 141+ }
 142+
 143+ list( $etag, $lastModified, $rss ) =
 144+ $data;
 145+
 146+ if ( !isset( $rss->items ) ) {
 147+ return false;
 148+ }
 149+
 150+ wfDebugLog( 'RSS', "Got '$key' from cache" );
 151+
 152+ # Now that we've verified that we got useful data, keep it around.
 153+ $this->rss = $rss;
 154+ $this->etag = $etag;
 155+ $this->lastModified = $lastModified;
 156+
 157+ // We only care if $wgRSSCacheCompare is > 0
 158+ if ( $wgRSSCacheCompare && time() - $wgRSSCacheCompare > $lastModified ) {
 159+ wfDebugLog( 'RSS', "Content is old enough that we need to check cached content");
 160+ return false;
 161+ }
 162+
 163+ return true;
 164+ }
 165+
 166+ /**
 167+ * Store this objects (e.g. etag, lastModified, and RSS) in the cache.
 168+ * @param string $key lookup key to associate with this item
 169+ * @returns boolean
 170+ */
 171+ protected function storeInCache( $key ) {
 172+ global $wgMemc, $wgRSSCacheAge;
 173+
 174+ if ( !isset( $this->rss ) ) {
 175+ return false;
 176+ }
 177+ $r = $wgMemc->set( $key,
 178+ array( $this->etag, $this->lastModified, $this->rss ),
 179+ $wgRSSCacheAge );
 180+
 181+ wfDebugLog( 'RSS', "Stored '$key' as in cache? $r");
 182+ return true;
 183+ }
 184+
 185+ /**
 186+ * Retrieve a feed.
 187+ * @param $url String: URL of the feed.
 188+ * @param $headers Array: headers to send along with the request
 189+ * @return Status object
 190+ */
 191+ protected function fetchRemote( $key, array $headers = array()) {
 192+ global $wgRSSFetchTimeout;
 193+ global $wgRSSUserAgent;
 194+
 195+ if ( $this->etag ) {
 196+ wfDebugLog( 'RSS', 'Used etag: ' . $this->etag );
 197+ $headers['If-None-Match'] = $this->etag;
 198+ }
 199+ if ( $this->lastModified ) {
 200+ $lm = gmdate('r', $this->lastModified);
 201+ wfDebugLog( 'RSS', "Used last modified: $lm" );
 202+ $headers['If-Modified-Since'] = $lm;
 203+ }
 204+
 205+ $client =
 206+ HttpRequest::factory( $this->url, array( 'timeout' => $wgRSSFetchTimeout ) );
 207+ $client->setUserAgent( $wgRSSUserAgent );
 208+ foreach ( $headers as $header => $value ) {
 209+ $client->setHeader( $header, $value );
 210+ }
 211+
 212+ $fetch = $client->execute();
 213+ $this->client = $client;
 214+
 215+ if ( !$fetch->isGood() ) {
 216+ wfDebug( 'RSS', 'Request Failed: ' . $fetch->getWikiText() );
 217+ return $fetch;
 218+ }
 219+
 220+ $ret = $this->responseToXML( $key );
 221+ return $ret;
 222+ }
 223+
 224+ /**
 225+ * Render the entire feed so that each item is passed to the
 226+ * template which the MediaWiki then displays.
 227+ *
 228+ * @param $parser the parser param to pass to recursiveTagParse()
 229+ * @param $frame the frame param to pass to recursiveTagParse()
 230+ */
 231+ function renderFeed( $parser, $frame ) {
 232+ $output = "";
 233+ if ( $this->itemTemplate ) {
 234+ $headcnt = 0;
 235+ if ( $this->reversed ) {
 236+ $this->rss->items = array_reverse( $this->rss->items );
 237+ }
 238+
 239+ foreach ( $this->rss->items as $item ) {
 240+ if ( $this->maxheads > 0 && $headcnt >= $this->maxheads ) {
 241+ continue;
 242+ }
 243+
 244+ if ( $this->canDisplay( $item ) ) {
 245+ $output .= $this->renderItem( $item, $parser, $frame );
 246+ $headcnt++;
 247+ }
 248+ }
 249+ }
 250+ return $output;
 251+ }
 252+
 253+ /**
 254+ * Render each item, filtering it out if necessary, applying any highlighting,
 255+ * @param $item an array produced by RSSData where keys are the names of the RSS elements
 256+ * @param $parser the parser param to pass to recursiveTagParse()
 257+ * @param $frame the frame param to pass to recursiveTagParse()
 258+ */
 259+ protected function renderItem( $item, $parser, $frame ) {
 260+ $parts = explode( '|', $this->itemTemplate );
 261+
 262+ $output = "";
 263+ if ( count( $parts ) > 1 && isset( $parser ) && isset( $frame ) ) {
 264+ $rendered = array();
 265+ foreach ( $this->displayFields as $field ) {
 266+ if ( isset($item[$field] ) ) {
 267+ $item[$field] = $this->highlightTerms( wfEscapeWikiText( $item[$field] ) );
 268+ }
 269+ }
 270+
 271+ foreach ( $parts as $part ) {
 272+ $bits = explode( '=', $part );
 273+ $left = null;
 274+
 275+ if ( count( $bits ) == 2 ) {
 276+ $left = trim( $bits[0] );
 277+ }
 278+
 279+ if ( isset( $item[$left] ) ) {
 280+ $leftValue = str_replace( '{{{' . $left . '}}}', $item[$left], $bits[1] );
 281+ $rendered[] = "$left = $leftValue";
 282+ } else {
 283+ $rendered[] = $part;
 284+ }
 285+ }
 286+ $output .= $parser->recursiveTagParse( implode( " | ", $rendered ), $frame );
 287+ }
 288+ return $output;
 289+ }
 290+
 291+ /**
 292+ * Parse an HTTP response object into an array of relevant RSS data
 293+ * @param $key the to use to store the parsaed response in the cache
 294+ * @return parsed RSS object (see RSSParse) or false
 295+ */
 296+ protected function responseToXML( $key ) {
 297+ wfDebugLog( 'RSS', "Got '".$this->client->getStatus()."', updating cache for $key" );
 298+ if ( $this->client->getStatus() === 304 ) {
 299+ # Not modified, update cache
 300+ wfDebugLog( 'RSS', "Got 304, updating cache for $key" );
 301+ $this->storeInCache( $key );
 302+ } else {
 303+ $this->xml = new DOMDocument;
 304+ $raw_xml = $this->client->getContent();
 305+
 306+ if( $raw_xml == "" ) {
 307+ return Status::newFatal( 'rss-parse-error', "No XML content" );
 308+ }
 309+
 310+ wfSuppressWarnings();
 311+ $this->xml->loadXML( $raw_xml );
 312+ wfRestoreWarnings();
 313+
 314+ $this->rss = new RSSData( $this->xml );
 315+
 316+ // if RSS parsed successfully
 317+ if ( $this->rss && !$this->rss->ERROR ) {
 318+ $this->etag = $this->client->getResponseHeader( 'Etag' );
 319+ $this->lastModified =
 320+ strtotime( $this->client->getResponseHeader( 'Last-Modified' ) );
 321+
 322+ wfDebugLog( 'RSS', 'Stored etag (' . $this->etag . ') and Last-Modified (' .
 323+ $this->client->getResponseHeader( 'Last-Modified' ) . ') and items (' .
 324+ count( $this->rss->items ) . ')!' );
 325+ $this->storeInCache( $key );
 326+ } else {
 327+ return Status::newFatal( 'rss-parse-error', $this->rss->ERROR );
 328+ }
 329+ }
 330+ return Status::newGood();
 331+ }
 332+
 333+ /**
 334+ * Determine if a given item should or should not be displayed
 335+ * @param associative array that RSSData produced for an <item>
 336+ * @returns boolean
 337+ */
 338+ protected function canDisplay( array $item ) {
 339+ $check = "";
 340+
 341+ /* We're only going to check the displayable fields */
 342+ foreach ( $this->displayFields as $field ) {
 343+ if ( isset( $item[$field] ) ) {
 344+ $check .= $item[$field];
 345+ }
 346+ }
 347+
 348+ if ( $this->filter( $check, 'filterOut' ) ) {
 349+ return false;
 350+ }
 351+ if ( $this->filter( $check, 'filter' ) ) {
 352+ return true;
 353+ }
 354+ return false;
 355+ }
 356+
 357+ /**
 358+ * Filters items in or out if the match a string we're looking for.
 359+ * @param String the text to examine
 360+ * @param String "filterOut" to check for matches in the filterOut member list. Otherwise, uses the filter member list.
 361+ * @returns boolean decision to filter or not.
 362+ */
 363+ protected function filter( $text, $filterType ) {
 364+ if ( $filterType === 'filterOut' ) {
 365+ $filter = $this->filterOut;
 366+ } else {
 367+ $filter = $this->filter;
 368+ }
 369+
 370+ if ( count( $filter ) == 0 ) return $filterType !== 'filterOut';
 371+
 372+ /* Using : for delimiter here since it'll be quoted automatically. */
 373+ $match = preg_match( ':(' . implode( "|", array_map('preg_quote', $filter ) ) . '):i', $text ) ;
 374+ if ( $match ) {
 375+ return true;
 376+ }
 377+ return false;
 378+ }
 379+
 380+ /**
 381+ * Highlight the words we're supposed to be looking for
 382+ * @param String the text to look in.
 383+ * @returns String with matched text highlighted in a <span> element
 384+ */
 385+ protected function highlightTerms( $text ) {
 386+ if ( count( $this->highlight ) === 0 ) {
 387+ return $text;
 388+ }
 389+
 390+ RSSHighlighter::setTerms( $this->highlight );
 391+ $highlight = ':'. implode( "|", array_map( 'preg_quote', array_values( $this->highlight ) ) ) . ':i';
 392+ return preg_replace_callback( $highlight, 'RSSHighlighter::highlightThis', $text );
 393+ }
 394+}
 395+
 396+
 397+class RSSHighlighter {
 398+ static $terms = array();
 399+
 400+ /**
 401+ * Set the list of terms to match for the next highlighting session
 402+ * @param List of words to match.
 403+ */
 404+ static function setTerms( array $terms ) {
 405+ self::$terms = array_flip( array_map( 'strtolower', $terms ) );
 406+ }
 407+
 408+ /**
 409+ * Actually replace the supplied list of words with HTML code to highlight the words.
 410+ * @param List of matched words to highlight. The words are assigned colors based upon the order they were supplied in setTerms()
 411+ * @returns String word wrapped in HTML code.
 412+ */
 413+ static function highlightThis( $match ) {
 414+ $styleStart = "<span style='font-weight: bold; background: none repeat scroll 0%% 0%% rgb(%s); color: %s;'>";
 415+ $styleEnd = "</span>";
 416+
 417+ # bg colors cribbed from Google's highlighting of search teerms
 418+ $bgcolor = array( '255, 255, 102', '160, 255, 255', '153, 255, 153',
 419+ '255, 153, 153', '255, 102, 255', '136, 0, 0', '0, 170, 0', '136, 104, 0',
 420+ '0, 70, 153', '153, 0, 153' );
 421+ # Spelling out the fg colors instead of using processing time to create this list
 422+ $color = array("black", "black", "black", "black", "black",
 423+ "white", "white", "white", "white", "white" );
 424+
 425+ $index = self::$terms[strtolower($match[0])] % count( $bgcolor );
 426+
 427+ return sprintf($styleStart, $bgcolor[$index], $color[$index]). $match[0] .$styleEnd;
 428+ }
 429+}
Property changes on: trunk/extensions/RSS/RSSParser.php
___________________________________________________________________
Added: svn:eol-syle
1430 + native
Index: trunk/extensions/RSS/RSS.php
@@ -18,7 +18,8 @@
1919 die( "This is not a valid entry point.\n" );
2020 }
2121
22 -define( 'RSS_USER_AGENT', 'MediaWikiRSS/0.01 (+http://www.mediawiki.org/wiki/Extension:RSS) / MediaWiki RSS extension' );
 22+// Agent to use for fetching feeds
 23+$wgRSSUserAgent='MediaWikiRSS/0.01 (+http://www.mediawiki.org/wiki/Extension:RSS) / MediaWiki RSS extension';
2324
2425 // Extension credits that will show up on Special:Version
2526 $wgExtensionCredits['parserhook'][] = array(
@@ -45,383 +46,19 @@
4647 // Internationalization file and autoloadable classes
4748 $dir = dirname( __FILE__ ) . '/';
4849 $wgExtensionMessagesFiles['RSS'] = $dir . 'RSS.i18n.php';
 50+$wgAutoloadClasses['RSSHooks'] = $dir . 'RSSHooks.php';
 51+$wgAutoloadClasses['RSSParser'] = $dir . 'RSSParser.php';
4952 $wgAutoloadClasses['RSSData'] = $dir . 'RSSData.php';
5053
51 -$wgHooks['ParserFirstCallInit'][] = 'RSS::parserInit';
 54+$wgHooks['ParserFirstCallInit'][] = 'RSSHooks::parserInit';
5255
5356 $wgRSSCacheAge = 3600; // one hour
5457 $wgRSSCacheFreshOnly = false;
 58+$wgRSSCacheCompare = false; // Check cached content, if available, against remote.
 59+ // $wgRSSCacheCompare should be set to false or a timeout
 60+ // (less than $wgRSSCacheAge) after which a comparison will
 61+ // be made.
5562 $wgRSSOutputEncoding = 'ISO-8859-1';
5663 $wgRSSInputEncoding = null;
5764 $wgRSSDetectEncoding = true;
5865 $wgRSSFetchTimeout = 5; // 5 second timeout
59 -$wgRSSUseGzip = true;
60 -
61 -class RSS {
62 - protected $charset;
63 - protected $maxheads = 32;
64 - protected $reversed = false;
65 - protected $highlight = array();
66 - protected $filter = array();
67 - protected $filterOut = array();
68 - protected $itemTemplate;
69 - protected $url;
70 - protected $etag;
71 - protected $lastModified;
72 - protected $xml;
73 - protected $ERROR;
74 - protected $displayFields = array( 'author', 'title', 'encodedContent', 'description' );
75 -
76 - public $client;
77 -
78 - static function parserInit( $parser ) {
79 - # Install parser hook for <rss> tags
80 - $parser->setHook( 'rss', array( __CLASS__, 'renderRss' ) );
81 - return true;
82 - }
83 -
84 - # Parser hook callback function
85 - static function renderRss( $input, $args, $parser, $frame ) {
86 - if ( !$input ) {
87 - return ''; # if <rss>-section is empty, return nothing
88 - }
89 - $parser->disableCache();
90 -
91 - $rss = new RSS( $input, $args );
92 -
93 - $status = $rss->fetch();
94 -
95 - # Check for errors.
96 - if ( $status === false || !is_object( $rss->rss ) || !is_array( $rss->rss->items ) )
97 - return wfMsg( 'rss-empty', $input );
98 -
99 - if ( isset( $rss->ERROR ) )
100 - return wfMsg( 'rss-error', $rss->ERROR );
101 -
102 - return $rss->renderFeed( $parser, $frame );
103 - }
104 -
105 - static function explodeOnSpaces( $str ) {
106 - $found = preg_split( '# +#', $str );
107 - return is_array( $found ) ? $found : array();
108 - }
109 -
110 - function __construct( $url, $args ) {
111 -
112 - if ( isset( $url ) ) {
113 - $this->url = $url;
114 - }
115 -
116 - # Get charset from argument array
117 - if ( isset( $args['charset'] ) ) {
118 - $this->charset = $args['charset'];
119 - } else {
120 - global $wgOutputEncoding;
121 - $args['charset'] = $wgOutputEncoding;
122 - }
123 -
124 - # Get max number of headlines from argument-array
125 - if ( isset( $args['max'] ) ) {
126 - $this->maxheads = $args['max'];
127 - }
128 -
129 - # Get reverse flag from argument array
130 - if ( isset( $args['reverse'] ) ) {
131 - $this->reversed = true;
132 - }
133 -
134 - # Get date format from argument array
135 - # FIXME: not used yet
136 - if ( isset( $args['date'] ) ) {
137 - $this->date = $args['date'];
138 - }
139 -
140 - # Get highlight terms from argument array
141 - if ( isset( $args['highlight'] ) ) {
142 - # mapping to lowercase here so the regex can be case insensitive below.
143 - $this->highlight = array_flip( array_map( 'strtolower', self::explodeOnSpaces( $args['highlight'] ) ) );
144 - }
145 -
146 - # Get filter terms from argument array
147 - if ( isset( $args['filter'] ) ) {
148 - $this->filter = self::explodeOnSpaces( $args['filter'] );
149 - }
150 -
151 - if ( isset( $args['filterout'] ) ) {
152 - $this->filterOut = self::explodeOnSpaces( $args['filterout'] );
153 -
154 - }
155 -
156 - if ( isset( $args['template'] ) ) {
157 - $titleObject = Title::newFromText( $args['template'], NS_TEMPLATE );
158 - $article = new Article( $titleObject, 0 );
159 - $this->itemTemplate = $article->fetchContent( 0 );
160 - } else {
161 - $this->itemTemplate = wfMsgNoTrans( 'rss-item' );
162 - }
163 - }
164 -
165 - /**
166 - * Return RSS object for the given URL, maintaining caching.
167 - *
168 - * NOTES ON RETRIEVING REMOTE FILES:
169 - * If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will
170 - * return a cached object, and touch the cache object upon recieving a 304.
171 - *
172 - * NOTES ON FAILED REQUESTS:
173 - * If there is an HTTP error while fetching an RSS object, the cached version
174 - * will be returned, if it exists (and if $wgRSSCacheFreshOnly is off
175 - *
176 - * @param $url String: URL of RSS file
177 - * @return boolean true if the fetch worked.
178 - */
179 - function fetch( ) {
180 - global $wgRSSCacheAge, $wgRSSCacheFreshOnly;
181 - global $wgRSSCacheDirectory, $wgRSSFetchTimeout;
182 - global $wgRSSOutputEncoding, $wgRSSInputEncoding;
183 - global $wgRSSDetectEncoding, $wgRSSUseGzip;
184 -
185 - if ( !isset( $this->url ) ) {
186 - wfDebugLog( 'RSS', 'Fetch called without a URL!' );
187 - return false;
188 - }
189 -
190 - // Flow
191 - // 1. check cache
192 - // 2. if there is a hit, make sure its fresh
193 - // 3. if cached obj fails freshness check, fetch remote
194 - // 4. if remote fails, return stale object, or error
195 - $key = wfMemcKey( $this->url );
196 - $cachedFeed = $this->loadFromCache( $key );
197 - if ( $cachedFeed !== false ) {
198 - wfDebugLog( 'RSS', 'Outputting cached feed for ' . $this->url );
199 - return true;
200 - }
201 - wfDebugLog( 'RSS', 'Cache Failed ' . $this->url );
202 -
203 - $status = $this->fetchRemote( $key );
204 - return $status;
205 - }
206 -
207 - function loadFromCache( $key ) {
208 - global $wgMemc;
209 -
210 - $data = $wgMemc->get( $key );
211 - if ( $data === false ) {
212 - return false;
213 - }
214 -
215 - list( $etag, $lastModified, $rss ) =
216 - unserialize( $data );
217 -
218 - if ( !isset( $rss->items ) ) {
219 - return false;
220 - }
221 -
222 - wfDebugLog( 'RSS', "Got '$key' from cache" );
223 -
224 - # Now that we've verified that we got useful data, keep it around.
225 - $this->rss = $rss;
226 - $this->etag = $etag;
227 - $this->lastModified = $lastModified;
228 -
229 - return true;
230 - }
231 -
232 - function storeInCache( $key ) {
233 - global $wgMemc, $wgRSSCacheAge;
234 -
235 - if ( !isset( $this->rss ) ) {
236 - return false;
237 - }
238 - $wgMemc->set( $key,
239 - serialize( array( $this->etag, $this->lastModified, $this->rss ) ),
240 - $wgRSSCacheAge );
241 -
242 - wfDebugLog( 'RSS', "Stored '$key' in cache" );
243 - return true;
244 - }
245 -
246 - /**
247 - * Retrieve a feed.
248 - * @param $url String: URL of the feed.
249 - * @param $headers Array: headers to send along with the request
250 - * @return Status object
251 - */
252 - protected function fetchRemote( $key, $headers = '' ) {
253 - global $wgRSSFetchTimeout, $wgRSSUseGzip;
254 -
255 - if ( $this->etag ) {
256 - wfDebugLog( 'RSS', 'Used etag: ' . $this->etag );
257 - $headers['If-None-Match'] = $this->etag;
258 - }
259 - if ( $this->lastModified ) {
260 - wfDebugLog( 'RSS', 'Used last modified: ' . $this->lastModified );
261 - $headers['If-Last-Modified'] = $this->lastModified;
262 - }
263 -
264 - $client =
265 - HttpRequest::factory( $this->url, array( 'timeout' => $wgRSSFetchTimeout ) );
266 - $client->setUserAgent( RSS_USER_AGENT );
267 - /* $client->use_gzip = $wgRSSUseGzip; */
268 - if ( is_array( $headers ) && count( $headers ) > 0 ) {
269 - foreach ( $headers as $h ) {
270 - if ( count( $h ) > 1 ) {
271 - $client->setHeader( $h[0], $h[1] );
272 - }
273 - }
274 - }
275 -
276 - $fetch = $client->execute();
277 - $this->client = $client;
278 -
279 - if ( !$fetch->isGood() ) {
280 - wfDebug( 'RSS', 'Request Failed: ' . $fetch->getWikiText() );
281 - return $fetch;
282 - }
283 -
284 - $ret = $this->responseToXML( $key );
285 - return $ret;
286 - }
287 -
288 - function renderFeed( $parser, $frame ) {
289 - $output = "";
290 - if ( $this->itemTemplate ) {
291 - $headcnt = 0;
292 - if ( $this->reversed ) {
293 - $this->rss->items = array_reverse( $this->rss->items );
294 - }
295 -
296 - foreach ( $this->rss->items as $item ) {
297 - if ( $this->maxheads > 0 && $headcnt >= $this->maxheads ) {
298 - continue;
299 - }
300 -
301 - if ( $this->canDisplay( $item ) ) {
302 - $output .= $this->renderItem( $item, $parser, $frame );
303 - $headcnt++;
304 - }
305 - }
306 - }
307 - return $output;
308 - }
309 -
310 - function renderItem( $item, $parser, $frame ) {
311 - $parts = explode( '|', $this->itemTemplate );
312 -
313 - $output = "";
314 - if ( count( $parts ) > 1 && isset( $parser ) && isset( $frame ) ) {
315 - $rendered = array();
316 - foreach ( $this->displayFields as $field ) {
317 - if ( isset( $item[$field] ) ) {
318 - $item[$field] = $this->highlightTerms( $item[$field] );
319 - }
320 - }
321 -
322 - foreach ( $parts as $part ) {
323 - $bits = explode( '=', $part );
324 - $left = null;
325 -
326 - if ( count( $bits ) == 2 ) {
327 - $left = trim( $bits[0] );
328 - }
329 -
330 - if ( isset( $item[$left] ) ) {
331 - $leftValue = preg_replace( '/{{{' . preg_quote( $left, '/' ) . '}}}/',
332 - $item[$left], $bits[1] );
333 - $rendered[] = implode( '=', array( $left, $leftValue ) );
334 - } else {
335 - $rendered[] = $part;
336 - }
337 - }
338 - $output .= $parser->recursiveTagParse( implode( " | ", $rendered ), $frame );
339 - }
340 - return $output;
341 - }
342 -
343 - /**
344 - * Parse an HTTP response object into an RSS object.
345 - * @param $resp Object: an HTTP response object (see Snoopy)
346 - * @return parsed RSS object (see RSSParse) or false
347 - */
348 - function responseToXML( $key ) {
349 - $this->xml = new DOMDocument;
350 - $this->xml->loadXML( $this->client->getContent() );
351 - $this->rss = new RSSData( $this->xml );
352 -
353 - // if RSS parsed successfully
354 - if ( $this->rss && !$this->rss->ERROR ) {
355 - $this->etag = $this->client->getResponseHeader( 'Etag' );
356 - $this->lastModified = $this->client->getResponseHeader( 'Last-Modified' );
357 - wfDebugLog( 'RSS', 'Stored etag (' . $this->etag . ') and Last-Modified (' .
358 - $this->lastModified . ') and items (' . count( $this->rss->items ) . ')!' );
359 - $this->storeInCache( $key );
360 -
361 - return Status::newGood();
362 - } else {
363 - return Status::newfatal( 'rss-parse-error', $this->rss->ERROR );
364 - }
365 - }
366 -
367 - function canDisplay( $item ) {
368 - $check = "";
369 - foreach ( $this->displayFields as $field ) {
370 - if ( isset( $item[$field] ) ) {
371 - $check .= $item[$field];
372 - }
373 - }
374 -
375 - if ( $this->filter( $check, 'filterOut' ) ) {
376 - return false;
377 - }
378 - if ( $this->filter( $check, 'filter' ) ) {
379 - return true;
380 - }
381 - return false;
382 - }
383 -
384 - function filter( $text, $filterType ) {
385 - if ( $filterType === 'filterOut' ) {
386 - $filter = $this->filterOut;
387 - } else {
388 - $filter = $this->filter;
389 - }
390 -
391 - if ( count( $filter ) == 0 ) return $filterType !== 'filterOut';
392 -
393 - /* Using : for delimiter here since it'll be quoted automatically. */
394 - $match = preg_match( ':(' . implode( "|", array_map('preg_quote', $filter ) ) . '):i', $text ) ;
395 - if ( $match ) {
396 - return true;
397 - }
398 - return false;
399 - }
400 -
401 - static function highlightThis( $term, $match ) {
402 - $styleStart = "<span style='font-weight: bold; background: none repeat scroll 0%% 0%% rgb(%s); color: %s;'>";
403 - $styleEnd = "</span>";
404 -
405 - # bg colors cribbed from Google's highlighting of search teerms
406 - $bgcolor = array( '255, 255, 102', '160, 255, 255', '153, 255, 153',
407 - '255, 153, 153', '255, 102, 255', '136, 0, 0', '0, 170, 0', '136, 104, 0',
408 - '0, 70, 153', '153, 0, 153' );
409 - # Spelling out the fg colors instead of using processing time to create this list
410 - $color = array("black", "black", "black", "black", "black",
411 - "white", "white", "white", "white", "white" );
412 -
413 - $index = $term[strtolower($match[0])] % count( $bgcolor );
414 -
415 - return sprintf($styleStart, $bgcolor[$index], $color[$index]). $match[0] .$styleEnd;
416 - }
417 -
418 - function highlightTerms( $text ) {
419 - if ( count( $this->highlight ) === 0 ) {
420 - return $text;
421 - }
422 - # SIGH ... anonymous functions are not available until 5.3
423 - $f = create_function('$match', '$term = '.var_export($this->highlight, true).'; return RSS::highlightThis($term, $match);');
424 -
425 - $highlight = '/'. implode( "|", array_map( 'preg_quote', array_keys( $this->highlight ) ) ) . '/i';
426 - return preg_replace_callback( $highlight, $f, $text );
427 - }
428 -}
\ No newline at end of file

Follow-up revisions

RevisionCommit summaryAuthorDate
r76054followup r76053 — add missed i18n messagesmah23:20, 4 November 2010

Status & tagging log