r75799 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r75798‎ | r75799 | r75800 >
Date:16:25, 1 November 2010
Author:mah
Status:resolved (Comments)
Tags:
Comment:
* Remove RSSCache.php and use $parserMemc to store parsed RSS feeds.
* Remove RSSFetch.php and move functionality to main RSS class
* Simplify how RSSData is constructed so we can pass it a DOMDocument
object instead of an HTTPResponse.
* Move wf functions into RSS class since they shouldn't really be
globally available.
Modified paths:
  • /trunk/extensions/RSS/RSS.php (modified) (history)
  • /trunk/extensions/RSS/RSSCache.php (deleted) (history)
  • /trunk/extensions/RSS/RSSData.php (modified) (history)
  • /trunk/extensions/RSS/RSSFetch.php (deleted) (history)

Diff [purge]

Index: trunk/extensions/RSS/RSSCache.php
@@ -1,159 +0,0 @@
2 -<?php
3 -/**
4 - * A simple, rolling (no GC) cache for RSS objects, keyed on URL.
5 - *
6 - * @file
7 - */
8 -
9 -class RSSCache {
10 - public $BASE_CACHE = './cache'; // where the cache files are stored
11 - public $MAX_AGE = 3600; // when are files stale, default one hour
12 - public $ERROR = ''; // accumulate error messages
13 -
14 - function __construct( $base = '', $age = '' ) {
15 - if ( $base ) {
16 - $this->BASE_CACHE = $base;
17 - }
18 - if ( $age ) {
19 - $this->MAX_AGE = $age;
20 - }
21 -
22 - // attempt to make the cache directory
23 - if ( !file_exists( $this->BASE_CACHE ) ) {
24 - $status = @mkdir( $this->BASE_CACHE, 0755 );
25 -
26 - // if make failed
27 - if ( !$status ) {
28 - wfDebugLog(
29 - 'RSS',
30 - "Cache couldn't make dir '" . $this->BASE_CACHE . "'."
31 - );
32 - }
33 - }
34 -
35 - // check if it is writable.
36 - if ( !is_writable( $this->BASE_CACHE ) ) {
37 - wfDebugLog(
38 - 'RSS',
39 - "Cache dir '" . $this->BASE_CACHE . "' is not writable."
40 - );
41 - }
42 - }
43 -
44 - /**
45 - * Add an item to the cache, keyed on URL.
46 - * @param $url String: URL from which the RSS file was fetched
47 - * @param $rss Mixed: data to serialize
48 - */
49 - function set( $url, $rss ) {
50 - $this->ERROR = '';
51 - $cache_file = $this->file_name( $url );
52 - $fp = @fopen( $cache_file, 'w' );
53 -
54 - if ( !$fp ) {
55 - wfDebugLog(
56 - 'RSS',
57 - "Cache unable to open file for writing: $cache_file"
58 - );
59 - return 0;
60 - }
61 -
62 - $data = serialize( $rss );
63 - fwrite( $fp, $data );
64 - fclose( $fp );
65 -
66 - return $cache_file;
67 - }
68 -
69 - /**
70 - * Fetch an item from the cache.
71 - * @param $url String: URL from which the RSS file was fetched
72 - * @return Object or false: cached object on HIT, false on MISS
73 - */
74 - function get( $url ) {
75 - $this->ERROR = '';
76 - $cache_file = $this->file_name( $url );
77 -
78 - if ( !file_exists( $cache_file ) ) {
79 - wfDebugLog(
80 - 'RSS',
81 - "Cache doesn't contain: $url (cache file: $cache_file)"
82 - );
83 - return 0;
84 - }
85 -
86 - $fp = @fopen( $cache_file, 'r' );
87 - if ( !$fp ) {
88 - wfDebugLog(
89 - 'RSS',
90 - "Failed to open cache file for reading: $cache_file"
91 - );
92 - return 0;
93 - }
94 -
95 - $filesize = filesize( $cache_file );
96 - if ( $filesize ) {
97 - $data = fread( $fp, filesize( $cache_file ) );
98 - $rss = unserialize( $data );
99 -
100 - return $rss;
101 - }
102 -
103 - return 0;
104 - }
105 -
106 - /**
107 - * Check a URL for membership in the cache and whether the object is older
108 - * then MAX_AGE (ie. STALE)
109 - *
110 - * @param $url String: URL from which the RSS file was fetched
111 - * @return String: indicates whether there was a cache hit or not
112 - */
113 - function check_cache( $url ) {
114 - $this->ERROR = '';
115 - $filename = $this->file_name( $url );
116 -
117 - if ( file_exists( $filename ) ) {
118 - // find how long ago the file was added to the cache
119 - // and whether that is longer then MAX_AGE
120 - $mtime = filemtime( $filename );
121 - $age = time() - $mtime;
122 - if ( $this->MAX_AGE > $age ) {
123 - // object exists and is current
124 - return 'HIT';
125 - } else {
126 - // object exists but is old
127 - return 'STALE';
128 - }
129 - } else {
130 - // object does not exist
131 - return 'MISS';
132 - }
133 - }
134 -
135 - /**
136 - * @param $cache_key String: cache key, consisting of the URL + output enc.
137 - * @return Integer
138 - */
139 - function cache_age( $cache_key ) {
140 - $filename = $this->file_name( $url );
141 - if ( file_exists( $filename ) ) {
142 - $mtime = filemtime( $filename );
143 - $age = time() - $mtime;
144 - return $age;
145 - } else {
146 - return -1;
147 - }
148 - }
149 -
150 - /**
151 - * Map URL to location in cache.
152 - * @param $url String: URL from which the RSS file was fetched
153 - * @return String: file name
154 - */
155 - function file_name( $url ) {
156 - $filename = md5( $url );
157 - return join( DIRECTORY_SEPARATOR, array( $this->BASE_CACHE, $filename ) );
158 - }
159 -
160 -}
\ No newline at end of file
Index: trunk/extensions/RSS/RSSFetch.php
@@ -1,246 +0,0 @@
2 -<?php
3 -/**
4 - * A simple functional interface to fetching and parsing RSS files, via the
5 - * function fetch_rss().
6 - *
7 - * @file
8 - */
9 -
10 -/**
11 - * Globals - redefine these in your script to change the
12 - * behaviour of fetch_rss() currently, most options effect the cache
13 - *
14 - * $wgRSSCache - Should we cache parsed RSS objects?
15 - *
16 - * $wgRSSCacheDirectory - Where should we cache parsed RSS objects?
17 - * This should be a location that the webserver can write to. If this
18 - * directory does not already exist, We will try to be smart and create it.
19 - * This will often fail for permissions reasons.
20 - *
21 - * $wgRSSCacheAge - How long to store cached RSS objects (in seconds)?.
22 - *
23 - * $wgRSSCacheFreshOnly - If remote fetch fails, throw an error
24 - * instead of returning stale object?
25 - */
26 -
27 -$RSS_FETCH_ERROR = '';
28 -
29 -/**
30 - * Return RSS object for the given URL, maintaining caching.
31 - *
32 - * NOTES ON CACHING:
33 - * If caching is on ($wgRSSCache) fetch_rss will first check the cache.
34 - *
35 - * NOTES ON RETRIEVING REMOTE FILES:
36 - * If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will
37 - * return a cached object, and touch the cache object upon recieving a 304.
38 - *
39 - * NOTES ON FAILED REQUESTS:
40 - * If there is an HTTP error while fetching an RSS object, the cached version
41 - * will be returned, if it exists (and if $wgRSSCacheFreshOnly is off)
42 - *
43 - * @param $url String: URL of RSS file
44 - * @return parsed RSS object (see RSSParse)
45 - */
46 -function fetch_rss( $url ) {
47 - global $wgRSSCache, $wgRSSCacheAge, $wgRSSCacheFreshOnly;
48 - global $wgRSSCacheDirectory, $wgRSSFetchTimeout;
49 - global $wgRSSOutputEncoding, $wgRSSInputEncoding;
50 - global $wgRSSDetectEncoding, $wgRSSUseGzip;
51 -
52 - $nameValue = array(
53 - 'wgRSSCache' => true,
54 - 'wgRSSCacheAge' => 60 * 60, // one hour
55 - 'wgRSSCacheFreshOnly' => false,
56 - 'wgRSSCacheDirectory' => '/extensions/RSS/cache',
57 - 'wgRSSOutputEncoding' => 'ISO-8859-1',
58 - 'wgRSSInputEncoding' => null,
59 - 'wgRSSDetectEncoding' => true,
60 - 'wgRSSFetchTimeout' => 5, // 5 second timeout
61 - 'wgRSSUseGzip' => true
62 - );
63 -
64 - foreach ( $nameValue as $n => $v ) {
65 - if ( !isset( $GLOBALS[$n] ) ) {
66 - $GLOBALS[$n] = $v;
67 - }
68 - }
69 -
70 - if ( !isset( $url ) ) {
71 - wfDebugLog( 'RSS', 'fetch_rss (RSSFetch.php) called without a URL!' );
72 - return false;
73 - }
74 -
75 - // if cache is disabled
76 - if ( !$wgRSSCache ) {
77 - // fetch file, and parse it
78 - $resp = _fetch_remote_file( $url );
79 - $errs = $resp->getErrorsArray();
80 - if ( count( $errs ) == 0 ) {
81 - return _response_to_rss( $resp );
82 - } else {
83 - wfDebugLog( 'RSS', "Failed to fetch $url and cache is off" );
84 - return false;
85 - }
86 - } else { // else cache is ON
87 - // Flow
88 - // 1. check cache
89 - // 2. if there is a hit, make sure its fresh
90 - // 3. if cached obj fails freshness check, fetch remote
91 - // 4. if remote fails, return stale object, or error
92 - $cache = new RSSCache( $wgRSSCacheDirectory, $wgRSSCacheAge );
93 -
94 - if ( $cache->ERROR ) {
95 - wfDebugLog(
96 - 'RSS',
97 - 'cache error on RSSFetch.php! Error msg: ' .
98 - $cache->ERROR
99 - );
100 - }
101 -
102 - $cache_status = 0; // response of check_cache
103 - $request_headers = array(); // HTTP headers to send with fetch
104 - $rss = 0; // parsed RSS object
105 - $errormsg = 0; // errors, if any
106 -
107 - // store parsed XML by desired output encoding
108 - // as character munging happens at parse time
109 - $cache_key = $url . $wgRSSOutputEncoding;
110 -
111 - if ( !$cache->ERROR ) {
112 - // return cache HIT, MISS, or STALE
113 - $cache_status = $cache->check_cache( $cache_key );
114 - }
115 -
116 - // if object cached, and cache is fresh, return cached obj
117 - if ( $cache_status == 'HIT' ) {
118 - $rss = $cache->get( $cache_key );
119 - if ( isset( $rss ) && $rss ) {
120 - // should be cache age
121 - $rss->from_cache = 1;
122 - wfDebugLog( 'RSS', 'Cache HIT' );
123 - return $rss;
124 - }
125 - }
126 -
127 - // else attempt a conditional get
128 -
129 - // setup headers
130 - if ( $cache_status == 'STALE' ) {
131 - $rss = $cache->get( $cache_key );
132 - if ( $rss && $rss->etag && $rss->last_modified ) {
133 - $request_headers['If-None-Match'] = $rss->etag;
134 - $request_headers['If-Last-Modified'] = $rss->last_modified;
135 - }
136 - }
137 -
138 - $resp = _fetch_remote_file( $url, $request_headers );
139 -
140 - if ( isset( $resp ) && $resp ) {
141 - if ( $resp->getStatus() === 304 ) {
142 - // we have the most current copy
143 - wfDebugLog( 'RSS', "Got 304 for $url" );
144 - // reset cache on 304 (at minutillo insistent prodding)
145 - $cache->set( $cache_key, $rss );
146 - return $rss;
147 - } elseif ( $resp->getStatus() >= 200 && $resp->getStatus() < 300 ) {
148 - $rss = _response_to_rss( $resp );
149 - if ( $rss ) {
150 - wfDebugLog( 'RSS', 'Fetch successful' );
151 - // add object to cache
152 - $cache->set( $cache_key, $rss );
153 - return $rss;
154 - }
155 - } else {
156 - $errormsg = "Failed to fetch $url ";
157 - if ( $resp->getStatus() === -100 ) {
158 - global $wgRSSFetchTimeout;
159 - $errormsg .= '(Request timed out after ' . $wgRSSFetchTimeout . ' seconds)';
160 - } elseif ( $resp->error ) {
161 - $http_error = substr( $resp->error, 0, -2 );
162 - $errormsg .= "(HTTP Error: $http_error)";
163 - } else {
164 - $errormsg .= '(HTTP Response: ' . $resp->response_code . ')';
165 - }
166 - }
167 - } else {
168 - $errormsg = 'Unable to retrieve RSS file for unknown reasons.';
169 - }
170 -
171 - // else fetch failed
172 -
173 - // attempt to return cached object
174 - if ( $rss ) {
175 - wfDebugLog( 'RSS', "Returning STALE object for $url" );
176 - return $rss;
177 - }
178 -
179 - // else we totally failed
180 - $RSS_FETCH_ERROR = $errormsg;
181 - wfDebugLog( 'RSS',
182 - 'RSSFetch: we totally failed :-( Error message:' .
183 - $errormsg
184 - );
185 -
186 - return false;
187 - } // end if ( !$wgRSSCache ) {
188 -} // end fetch_rss()
189 -
190 -/**
191 - * Retrieve an arbitrary remote file.
192 - * @param $url String: URL of the remote file
193 - * @param $headers Array: headers to send along with the request
194 - * @return an HTTP response object
195 - */
196 -function _fetch_remote_file( $url, $headers = '' ) {
197 - global $wgRSSFetchTimeout, $wgRSSUseGzip;
198 -
199 - $client =
200 - HttpRequest::factory( $url, array( 'timeout' => $wgRSSFetchTimeout ) );
201 - $client->setUserAgent( 'MediaWikiRSS/0.01 (+http://www.mediawiki.org/wiki/Extension:RSS) / MediaWiki RSS extension' );
202 - /* $client->use_gzip = $wgRSSUseGzip; */
203 - if ( is_array( $headers ) && count( $headers ) > 0 ) {
204 - foreach ( $headers as $h ) {
205 - if ( count( $h ) > 1 ) {
206 - $client->setHeader( $h[0], $h[1] );
207 - }
208 - }
209 - }
210 -
211 - $fetch = $client->execute();
212 -
213 - /* @$client->fetch( $url ); */
214 - if ( $fetch->isGood() ) {
215 - return $client;
216 - } else {
217 - wfDebugLog( 'RSS', 'error fetching $url: ' . $fetch->getWikiText() );
218 - }
219 -}
220 -
221 -/**
222 - * Parse an HTTP response object into an RSS object.
223 - * @param $resp Object: an HTTP response object (see Snoopy)
224 - * @return parsed RSS object (see RSSParse) or false
225 - */
226 -function _response_to_rss( $resp ) {
227 - global $wgRSSOutputEncoding, $wgRSSInputEncoding, $wgRSSDetectEncoding;
228 - $rss = new RSSData( $resp );
229 -
230 - // if RSS parsed successfully
231 - if ( $rss && !$rss->ERROR ) {
232 - // find Etag and Last-Modified
233 -
234 - return $rss;
235 - } else { // else construct error message
236 - $errormsg = 'Failed to parsex RSS file.';
237 -
238 - if ( $rss ) {
239 - $errormsg .= ' (' . $rss->ERROR . ')';
240 - }
241 - $RSS_FETCH_ERROR = $errormsg;
242 - wfDebugLog( 'RSS', 'error!' . $errormsg );
243 -
244 - return false;
245 - } // end if ( $rss && !$rss->ERROR )
246 -}
247 -
Index: trunk/extensions/RSS/RSSData.php
@@ -1,21 +1,14 @@
22 <?php
33
44 class RSSData {
5 - public $etag;
6 - public $last_modified;
75 public $ERROR;
8 - public $xml;
96 public $items;
107
11 - function __construct( $resp ) {
12 - $this->xml = new DOMDocument;
13 - $this->xml->loadXML( $resp->getContent() );
14 - $h = $resp->getResponseHeader( 'ETag' );
15 - $this->etag = $h;
16 - $h = $resp->getResponseHeader( 'Last-Modified' );
17 - $this->last_modified = $h;
18 -
19 - $xpath = new DOMXPath( $this->xml );
 8+ function __construct( $xml ) {
 9+ if( !( $xml instanceOf DOMDocument ) ) {
 10+ return null;
 11+ }
 12+ $xpath = new DOMXPath( $xml );
2013 $items = $xpath->evaluate( '/rss/channel/item' );
2114
2215 foreach ( $items as $item ) {
Index: trunk/extensions/RSS/RSS.php
@@ -44,220 +44,353 @@
4545 $dir = dirname( __FILE__ ) . '/';
4646 $wgExtensionMessagesFiles['RSS'] = $dir . 'RSS.i18n.php';
4747 $wgAutoloadClasses['RSSData'] = $dir . 'RSSData.php';
48 -$wgAutoloadClasses['RSSCache'] = $dir . 'RSSCache.php';
4948
50 -$wgHooks['ParserFirstCallInit'][] = 'wfRssExtension';
 49+$wgHooks['ParserFirstCallInit'][] = 'RSS::parserInit';
5150
52 -# Extension hook callback function
53 -function wfRssExtension( &$parser ) {
54 - # Install parser hook for <rss> tags
55 - $parser->setHook( 'rss', 'renderRss' );
56 - return true;
57 -}
 51+$wgRSSCacheAge = 3600; // one hour
 52+$wgRSSCacheFreshOnly = false;
 53+$wgRSSOutputEncoding = 'ISO-8859-1';
 54+$wgRSSInputEncoding = null;
 55+$wgRSSDetectEncoding = true;
 56+$wgRSSFetchTimeout = 5; // 5 second timeout
 57+$wgRSSUseGzip = true;
5858
59 -# Parser hook callback function
60 -function renderRss( $input, $args, $parser, $frame ) {
61 - global $wgOutputEncoding;
 59+class RSS {
 60+ protected $charset;
 61+ protected $maxheads = 32;
 62+ protected $reversed = false;
 63+ protected $highlight = array();
 64+ protected $filter = array();
 65+ protected $filterOut = array();
 66+ protected $itemTemplate;
 67+ protected $url;
 68+ protected $etag;
 69+ protected $last_modified;
 70+ protected $xml;
 71+ protected $ERROR;
6272
63 - // Kill parser cache
64 - $parser->disableCache();
 73+ public $client;
6574
66 - if ( !$input ) {
67 - return ''; # if <rss>-section is empty, return nothing
 75+ static function parserInit( $parser ) {
 76+ # Install parser hook for <rss> tags
 77+ $parser->setHook( 'rss', array( __CLASS__, 'renderRss' ) );
 78+ return true;
6879 }
6980
70 - # Parse fields in rss section
71 - $url = $input;
 81+ # Parser hook callback function
 82+ static function renderRss( $input, $args, $parser, $frame ) {
 83+ if ( !$input ) {
 84+ return ''; # if <rss>-section is empty, return nothing
 85+ }
 86+ $parser->disableCache();
7287
73 - # Get charset from argument array
74 - if ( isset( $args['charset'] ) ) {
75 - $charset = $args['charset'];
76 - } else {
77 - $charset = $wgOutputEncoding;
78 - }
 88+ $rss = new RSS($input, $args);
7989
80 - # Get max number of headlines from argument-array
81 - if ( isset( $args['max'] ) ) {
82 - $maxheads = $args['max'];
83 - } else {
84 - $maxheads = 32;
85 - }
 90+ $status = $rss->fetch();
8691
87 - # Get short flag from argument array
88 - # If short is set, no description text is printed
89 - if ( isset( $args['short'] ) ) {
90 - $short = true;
91 - } else {
92 - $short = false;
93 - }
 92+ # Check for errors.
 93+ if ( $status === false || !is_array( $rss->rss->items ) )
 94+ return wfMsg( 'rss-empty', $input );
9495
95 - # Get reverse flag from argument array
96 - if ( isset( $args['reverse'] ) ) {
97 - $rss->items = array_reverse( $rss->items );
98 - }
 96+ if ( isset( $rss->ERROR ) )
 97+ return wfMsg( 'rss-error', $rss->ERROR );
9998
100 - # Get date format from argument array
101 - if ( isset( $args['date'] ) ) {
102 - $date = $args['date'];
103 - } else {
104 - $date = 'd M Y H:i';
 99+ return $rss->renderFeed($parser, $frame);
105100 }
106101
107 - # Get highlight terms from argument array
108 - if ( isset( $args['highlight'] ) ) {
109 - $rssHighlight = $args['highlight'];
110 - $rssHighlight = str_replace( ' ', ' ', $rssHighlight );
111 - $rssHighlight = explode( ' ', trim( $rssHighlight ) );
112 - } else {
113 - $rssHighlight = false;
 102+ static function explodeOnSpaces( $str ) {
 103+ $found = preg_split( '# +#', $str );
 104+ return is_array( $found ) ? $found : array();
114105 }
115106
116 - # Get filter terms from argument array
117 - if ( isset( $args['filter'] ) ) {
118 - $rssFilter = $args['filter'];
119 - $rssFilter = str_replace( ' ', ' ', $rssFilter );
120 - $rssFilter = explode( ' ', trim( $rssFilter ) );
121 - } else {
122 - $rssFilter = false;
123 - }
 107+ function __construct($url, $args) {
124108
125 - # Filterout terms
126 - if ( isset( $args['filterout'] ) ) {
127 - $rssFilterout = $args['filterout'];
128 - $rssFilterout = str_replace( ' ', ' ', $rssFilterout );
129 - $rssFilterout = explode( ' ', trim( $rssFilterout ) );
130 - } else {
131 - $rssFilterout = false;
132 - }
 109+ if( isset($url) ) {
 110+ $this->url = $url;
 111+ }
133112
134 - if ( isset( $args['template'] ) ) {
135 - $template = 'Template:' . $args['template'];
136 - } else {
137 - $template = wfMsgNoTrans( 'rss-item' );
138 - }
 113+ # Get charset from argument array
 114+ if ( isset( $args['charset'] ) ) {
 115+ $this->charset = $args['charset'];
 116+ } else {
 117+ global $wgOutputEncoding;
 118+ $args['charset'] = $wgOutputEncoding;
 119+ }
139120
140 - $headcnt = 0;
 121+ # Get max number of headlines from argument-array
 122+ if ( isset( $args['max'] ) ) {
 123+ $this->maxheads = $args['max'];
 124+ }
141125
142 - # Fetch RSS. May be cached locally.
143 - # Refer to the documentation of MagpieRSS for details.
144 - if ( !function_exists( 'fetch_rss' ) ) {
145 - include( dirname( __FILE__ ) . '/RSSFetch.php' ); // provides fetch_rss() function
 126+ # Get reverse flag from argument array
 127+ if ( isset( $args['reverse'] ) ) {
 128+ $this->reversed = true;
 129+ }
 130+
 131+ # Get date format from argument array
 132+ # FIXME: not used yet
 133+ if ( isset( $args['date'] ) ) {
 134+ $this->date = $args['date'];
 135+ }
 136+
 137+ # Get highlight terms from argument array
 138+ if ( isset( $args['highlight'] ) ) {
 139+ $this->highlight = self::explodeOnSpaces( $args['highlight'] );
 140+ }
 141+
 142+ # Get filter terms from argument array
 143+ if ( isset( $args['filter'] ) ) {
 144+ $this->filter = self::explodeOnSpaces( $args['filter'] );
 145+ }
 146+
 147+ if ( isset( $args['filterout'] ) ) {
 148+ $this->filterOut = self::explodeOnSpaces( $args['filterout'] );
 149+
 150+ }
 151+
 152+ if ( isset( $args['template'] ) ) {
 153+ $titleObject = Title::newFromText($args['template'], NS_TEMPLATE);
 154+ $article = new Article($titleObject, 0);
 155+ $this->itemTemplate = $article->fetchContent(0);
 156+ } else {
 157+ $this->itemTemplate = wfMsgNoTrans( 'rss-item' );
 158+ }
146159 }
147 - $rss = fetch_rss( $url );
148160
149 - # Check for errors.
150 - if ( empty( $rss ) ) {
151 - return wfMsg( 'rss-empty', $url );
 161+ /**
 162+ * Return RSS object for the given URL, maintaining caching.
 163+ *
 164+ * NOTES ON RETRIEVING REMOTE FILES:
 165+ * If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will
 166+ * return a cached object, and touch the cache object upon recieving a 304.
 167+ *
 168+ * NOTES ON FAILED REQUESTS:
 169+ * If there is an HTTP error while fetching an RSS object, the cached version
 170+ * will be returned, if it exists (and if $wgRSSCacheFreshOnly is off
 171+ *
 172+ * @param $url String: URL of RSS file
 173+ * @return boolean true if the fetch worked.
 174+ */
 175+ function fetch( ) {
 176+ global $wgRSSCacheAge, $wgRSSCacheFreshOnly;
 177+ global $wgRSSCacheDirectory, $wgRSSFetchTimeout;
 178+ global $wgRSSOutputEncoding, $wgRSSInputEncoding;
 179+ global $wgRSSDetectEncoding, $wgRSSUseGzip;
 180+
 181+ if ( !isset( $this->url ) ) {
 182+ wfDebugLog( 'RSS: fetch called without a URL!' );
 183+ return false;
 184+ }
 185+
 186+ // Flow
 187+ // 1. check cache
 188+ // 2. if there is a hit, make sure its fresh
 189+ // 3. if cached obj fails freshness check, fetch remote
 190+ // 4. if remote fails, return stale object, or error
 191+ $key = wfMemcKey( $this->url );
 192+ $cachedFeed = $this->loadFromCache($key);
 193+ if( $cachedFeed !== false ) {
 194+ wfDebugLog( 'RSS', 'Outputting cached feed for '.$this->url );
 195+ return true;
 196+ }
 197+ wfDebugLog( 'RSS', 'Cache Failed '.$this->url );
 198+
 199+ $status = $this->fetchRemote($key);
 200+ return $status;
152201 }
153202
154 - if ( $rss->ERROR ) {
155 - return '<div>' . wfMsg( 'rss-error', $url, $rss->ERROR ) . '</div>';
 203+ function loadFromCache( $key ) {
 204+ global $parserMemc;
 205+
 206+ $data = $parserMemc->get($key);
 207+ if ($data === false) {
 208+ return false;
 209+ }
 210+
 211+ list($etag, $last_modified, $rss) =
 212+ unserialize($data);
 213+
 214+ if( !isset( $rss->items ) ) {
 215+ return false;
 216+ }
 217+
 218+ # Now that we've verified that we got useful data, keep it around.
 219+ $this->rss = $rss;
 220+ $this->etag = $etag;
 221+ $this->last_modified = $last_modified;
 222+
 223+ return true;
156224 }
157225
158 - if ( !is_array( $rss->items ) ) {
159 - return '<div>' . wfMsg( 'rss-empty', $url ) . '</div>';
 226+ function storeInCache( $key ) {
 227+ global $parserMemc, $wgRSSCacheAge;
 228+
 229+ if( isset( $this->rss ) ) {
 230+ return $parserMemc->set($key,
 231+ serialize( array($this->etag, $this->last_modified,
 232+ $this->rss) ), $wgRSSCacheAge);
 233+ }
160234 }
161235
162 - $output = '';
163 -
164236 /**
165 - * This would be better served by preg_replace_callback, but
166 - * I can't create a callback that carries $item in PHP < 5.3
 237+ * Retrieve a feed.
 238+ * @param $url String: URL of the feed.
 239+ * @param $headers Array: headers to send along with the request
 240+ * @return Status object
167241 */
168 - if ( $template ) {
169 - $headcnt = 0;
170 - foreach ( $rss->items as $item ) {
171 - if ( $maxheads > 0 && $headcnt >= $maxheads ) {
172 - continue;
 242+ protected function fetchRemote( $key, $headers = '' ) {
 243+ global $wgRSSFetchTimeout, $wgRSSUseGzip;
 244+
 245+ if ( $this->etag ) {
 246+ wfDebugLog( 'RSS', 'Used etag: '.$this->etag );
 247+ $headers['If-None-Match'] = $this->etag;
 248+ }
 249+ if ( $this->last_modified ) {
 250+ wfDebugLog( 'RSS', 'Used last modified: '.$this->last_modified );
 251+ $headers['If-Last-Modified'] = $this->last_modified;
 252+ }
 253+
 254+ $client =
 255+ HttpRequest::factory( $this->url, array( 'timeout' => $wgRSSFetchTimeout ) );
 256+ $client->setUserAgent( 'MediaWikiRSS/0.01 (+http://www.mediawiki.org/wiki/Extension:RSS) / MediaWiki RSS extension' );
 257+ /* $client->use_gzip = $wgRSSUseGzip; */
 258+ if ( is_array( $headers ) && count( $headers ) > 0 ) {
 259+ foreach ( $headers as $h ) {
 260+ if ( count( $h ) > 1 ) {
 261+ $client->setHeader( $h[0], $h[1] );
 262+ }
173263 }
 264+ }
174265
175 - $decision = true;
176 - foreach ( array( 'title', 'author', 'description', 'category' ) as $check ) {
177 - if ( isset( $item[$check] ) ) {
178 - $decision &= wfRssFilter( $item[$check], $rssFilter ) & wfRssFilterout( $item[$check], $rssFilterout );
179 - if ( !$decision ) {
180 - continue 2;
181 - }
 266+ $fetch = $client->execute();
 267+ $this->client = $client;
182268
183 - $item[$check] = wfRssHighlight( $item[$check], $rssHighlight );
 269+ if ( !$fetch->isGood() ) {
 270+ wfDebug( 'RSS', 'Request Failed: '.$fetch->getWikiText() );
 271+ return $fetch;
 272+ }
 273+
 274+ $ret = $this->responseToXML($key);
 275+ return $ret;
 276+ }
 277+
 278+ function renderFeed( $parser, $frame ) {
 279+ $output = "";
 280+ if ( $this->itemTemplate ) {
 281+ $headcnt = 0;
 282+ if ($this->reversed) {
 283+ $this->rss->items = array_reverse( $this->rss->items );
 284+ }
 285+
 286+ foreach ( $this->rss->items as $item ) {
 287+ if ( $this->maxheads > 0 && $headcnt >= $this->maxheads ) {
 288+ continue;
184289 }
185290
 291+ if ( $this->canDisplay( $item ) ) {
 292+ $output .= $this->renderItem( $item, $parser, $frame );
 293+ $headcnt++;
 294+ }
186295 }
 296+ }
 297+ return $output;
 298+ }
187299
188 - $rssTemp = '';
 300+ function renderItem( $item, $parser, $frame ) {
 301+ $parts = explode( '|', $this->itemTemplate );
189302
190 - foreach ( explode( '|', $template ) as $bit ) {
191 - $bits = explode( '=', $bit );
 303+ $output = "";
 304+ if( count( $parts ) > 1 && isset( $parser ) && isset( $frame ) ) {
 305+ $rendered = array();
 306+ foreach( $parts as $part ) {
 307+ $bits = explode( '=', $part );
 308+ $left = null;
 309+
192310 if ( count( $bits ) == 2 ) {
193311 $left = trim( $bits[0] );
 312+ }
194313
195 - if ( isset( $item[$left] ) ) {
196 - $right = $item[$left];
197 - }
198 -
199 - $rssTemp .= implode( ' = ', array( $left, $right ) );
 314+ if ( isset( $item[$left] ) ) {
 315+ $leftValue = preg_replace( '#{{{'.$left.'}}}#', $item[$left], $bits[1] );
 316+ $rendered[] = implode( '=', array( $left, $leftValue ) );
200317 } else {
201 - $rssTemp .= $bit;
 318+ $rendered[] = $part;
202319 }
203 - $rssTemp .= '|';
204320 }
205 - $rssTemp .= '}}';
206 -
 321+ $rssTemp = implode(" | ", $rendered);
207322 $output .= $parser->recursiveTagParse( $rssTemp, $frame );
208 - $headcnt++;
209323 }
 324+ return $output;
210325 }
211 - return $output;
212 -}
213326
214 -function wfRssFilter( $text, $rssFilter ) {
215 - $display = true;
216 - if ( is_array( $rssFilter ) ) {
217 - foreach ( $rssFilter as $term ) {
218 - if ( $term ) {
219 - $display = false;
220 - if ( preg_match( "|$term|i", $text, $a ) ) {
221 - $display = true;
222 - return $display;
223 - }
224 - }
225 - if ( $display ) {
226 - break;
227 - }
 327+ /**
 328+ * Parse an HTTP response object into an RSS object.
 329+ * @param $resp Object: an HTTP response object (see Snoopy)
 330+ * @return parsed RSS object (see RSSParse) or false
 331+ */
 332+ function responseToXML( $key ) {
 333+ $this->xml = new DOMDocument;
 334+ $this->xml->loadXML( $this->client->getContent() );
 335+ $this->rss = new RSSData( $this->xml );
 336+
 337+ // if RSS parsed successfully
 338+ if ( $this->rss && !$this->rss->ERROR ) {
 339+ $this->etag = $this->client->getResponseHeader('Etag');
 340+ $this->last_modified = $this->client->getResponseHeader('Last-Modified');
 341+ wfDebugLog( 'RSS', 'Stored etag ('.$this->etag.') and Last-Modified ('.$this->last_modified.') and items ('.count($this->rss->items).')!' );
 342+ $this->storeInCache( $key );
 343+
 344+ return Status::newGood();
 345+ } else {
 346+ return Status::newfatal( 'rss-parse-error', $this->rss->ERROR );
228347 }
229348 }
230 - return $display;
231 -}
232349
233 -function wfRssFilterout( $text, $rssFilterout ) {
234 - $display = true;
235 - if ( is_array( $rssFilterout ) ) {
236 - foreach ( $rssFilterout as $term ) {
 350+ function canDisplay( $item ) {
 351+ if($this->filter($item['description'], 'filterOut')) {
 352+ error_log($item['description']);
 353+ return true;
 354+ }
 355+ return false;
 356+ }
 357+
 358+ function filter( $text, $filterType ) {
 359+ if($filterType === 'filterOut') {
 360+ $keep = false;
 361+ $filter = $this->filterOut;
 362+ } else {
 363+ $keep = true;
 364+ $filter = $this->filter;
 365+ }
 366+
 367+ if( count($filter) == 0 ) return !$keep;
 368+
 369+ foreach( $filter as $term ) {
237370 if ( $term ) {
238 - if ( preg_match( "|$term|i", $text, $a ) ) {
239 - $display = false;
240 - return $display;
 371+ $match = preg_match( "|$term|i", $text );
 372+ if ( $match ) {
 373+ return $keep;
241374 }
242375 }
 376+ return !$keep;
243377 }
 378+
244379 }
245 - return $display;
246 -}
247380
248 -function wfRssHighlight( $text, $rssHighlight ) {
249 - $i = 0;
250 - $starttag = 'v8x5u3t3u8h';
251 - $endtag = 'q8n4f6n4n4x';
252381
253 - $color[] = 'coral';
254 - $color[] = 'greenyellow';
255 - $color[] = 'lightskyblue';
256 - $color[] = 'gold';
257 - $color[] = 'violet';
258 - $count_color = count( $color );
 382+ function highlightTerms( $text ) {
 383+ $i = 0;
 384+ $starttag = 'v8x5u3t3u8h';
 385+ $endtag = 'q8n4f6n4n4x';
259386
260 - if ( is_array( $rssHighlight ) ) {
261 - foreach ( $rssHighlight as $term ) {
 387+ $color[] = 'coral';
 388+ $color[] = 'greenyellow';
 389+ $color[] = 'lightskyblue';
 390+ $color[] = 'gold';
 391+ $color[] = 'violet';
 392+ $count_color = count( $color );
 393+
 394+ foreach ( $this->highlight as $term ) {
262395 if ( $term ) {
263396 $text = preg_replace( "|\b(\w*?" . $term . "\w*?)\b|i", "$starttag" . "_" . $i . "\\1$endtag", $text );
264397 $i++;
@@ -266,13 +399,13 @@
267400 }
268401 }
269402 }
270 - }
271403
272 - # To avoid trouble should someone wants to highlight the terms "span", "style", …
273 - for ( $i = 0; $i < 5; $i++ ) {
274 - $text = preg_replace( "|$starttag" . "_" . $i . "|", "<span style=\"background-color:" . $color[$i] . "; font-weight: bold;\">", $text );
275 - $text = preg_replace( "|$endtag|", '</span>', $text );
 404+ # To avoid trouble should someone wants to highlight the terms "span", "style", …
 405+ for ( $i = 0; $i < 5; $i++ ) {
 406+ $text = preg_replace( "|$starttag" . "_" . $i . "|", "<span style=\"background-color:" . $color[$i] . "; font-weight: bold;\">", $text );
 407+ $text = preg_replace( "|$endtag|", '</span>', $text );
 408+ }
 409+
 410+ return $text;
276411 }
277 -
278 - return $text;
279 -}
 412+}
\ No newline at end of file

Comments

#Comment by 😂 (talk | contribs)   16:29, 1 November 2010

Should probably go in $wgMemc, not $parserMemc. The latter is only for parsed wikitext, not an extension's cached data.

#Comment by MarkAHershberger (talk | contribs)   17:35, 1 November 2010

I thought that's what the documentation said, but I think I recall seeing one other instance where $parserMemc was used in an extension. If I find it again, I'll ping you ;)

#Comment by MarkAHershberger (talk | contribs)   21:18, 1 November 2010

see r75812

#Comment by Jack Phoenix (talk | contribs)   16:59, 1 November 2010
+	function canDisplay( $item ) {
+		if($this->filter($item['description'], 'filterOut')) {
+			error_log($item['description']);
+			return true;
+		}
+		return false;
+	}
+

That error_log() should be removed. I'm also seeing some spacing (such as the above) that doesn't follow our coding conventions and missing braces in some places.

#Comment by MarkAHershberger (talk | contribs)   17:33, 1 November 2010

thanks for pointing out the error_log problem. This is work-in-progress, so I wasn't done, but certainly didn't mean to commit that. I'll check on spacing issues.

#Comment by MarkAHershberger (talk | contribs)   18:51, 1 November 2010

I think you'll find that your concerns were addressed in r75811.

Status & tagging log