Index: trunk/extensions/RSS/RSSCache.php |
— | — | @@ -1,159 +0,0 @@ |
2 | | -<?php |
3 | | -/** |
4 | | - * A simple, rolling (no GC) cache for RSS objects, keyed on URL. |
5 | | - * |
6 | | - * @file |
7 | | - */ |
8 | | - |
9 | | -class RSSCache { |
10 | | - public $BASE_CACHE = './cache'; // where the cache files are stored |
11 | | - public $MAX_AGE = 3600; // when are files stale, default one hour |
12 | | - public $ERROR = ''; // accumulate error messages |
13 | | - |
14 | | - function __construct( $base = '', $age = '' ) { |
15 | | - if ( $base ) { |
16 | | - $this->BASE_CACHE = $base; |
17 | | - } |
18 | | - if ( $age ) { |
19 | | - $this->MAX_AGE = $age; |
20 | | - } |
21 | | - |
22 | | - // attempt to make the cache directory |
23 | | - if ( !file_exists( $this->BASE_CACHE ) ) { |
24 | | - $status = @mkdir( $this->BASE_CACHE, 0755 ); |
25 | | - |
26 | | - // if make failed |
27 | | - if ( !$status ) { |
28 | | - wfDebugLog( |
29 | | - 'RSS', |
30 | | - "Cache couldn't make dir '" . $this->BASE_CACHE . "'." |
31 | | - ); |
32 | | - } |
33 | | - } |
34 | | - |
35 | | - // check if it is writable. |
36 | | - if ( !is_writable( $this->BASE_CACHE ) ) { |
37 | | - wfDebugLog( |
38 | | - 'RSS', |
39 | | - "Cache dir '" . $this->BASE_CACHE . "' is not writable." |
40 | | - ); |
41 | | - } |
42 | | - } |
43 | | - |
44 | | - /** |
45 | | - * Add an item to the cache, keyed on URL. |
46 | | - * @param $url String: URL from which the RSS file was fetched |
47 | | - * @param $rss Mixed: data to serialize |
48 | | - */ |
49 | | - function set( $url, $rss ) { |
50 | | - $this->ERROR = ''; |
51 | | - $cache_file = $this->file_name( $url ); |
52 | | - $fp = @fopen( $cache_file, 'w' ); |
53 | | - |
54 | | - if ( !$fp ) { |
55 | | - wfDebugLog( |
56 | | - 'RSS', |
57 | | - "Cache unable to open file for writing: $cache_file" |
58 | | - ); |
59 | | - return 0; |
60 | | - } |
61 | | - |
62 | | - $data = serialize( $rss ); |
63 | | - fwrite( $fp, $data ); |
64 | | - fclose( $fp ); |
65 | | - |
66 | | - return $cache_file; |
67 | | - } |
68 | | - |
69 | | - /** |
70 | | - * Fetch an item from the cache. |
71 | | - * @param $url String: URL from which the RSS file was fetched |
72 | | - * @return Object or false: cached object on HIT, false on MISS |
73 | | - */ |
74 | | - function get( $url ) { |
75 | | - $this->ERROR = ''; |
76 | | - $cache_file = $this->file_name( $url ); |
77 | | - |
78 | | - if ( !file_exists( $cache_file ) ) { |
79 | | - wfDebugLog( |
80 | | - 'RSS', |
81 | | - "Cache doesn't contain: $url (cache file: $cache_file)" |
82 | | - ); |
83 | | - return 0; |
84 | | - } |
85 | | - |
86 | | - $fp = @fopen( $cache_file, 'r' ); |
87 | | - if ( !$fp ) { |
88 | | - wfDebugLog( |
89 | | - 'RSS', |
90 | | - "Failed to open cache file for reading: $cache_file" |
91 | | - ); |
92 | | - return 0; |
93 | | - } |
94 | | - |
95 | | - $filesize = filesize( $cache_file ); |
96 | | - if ( $filesize ) { |
97 | | - $data = fread( $fp, filesize( $cache_file ) ); |
98 | | - $rss = unserialize( $data ); |
99 | | - |
100 | | - return $rss; |
101 | | - } |
102 | | - |
103 | | - return 0; |
104 | | - } |
105 | | - |
106 | | - /** |
107 | | - * Check a URL for membership in the cache and whether the object is older |
108 | | - * then MAX_AGE (ie. STALE) |
109 | | - * |
110 | | - * @param $url String: URL from which the RSS file was fetched |
111 | | - * @return String: indicates whether there was a cache hit or not |
112 | | - */ |
113 | | - function check_cache( $url ) { |
114 | | - $this->ERROR = ''; |
115 | | - $filename = $this->file_name( $url ); |
116 | | - |
117 | | - if ( file_exists( $filename ) ) { |
118 | | - // find how long ago the file was added to the cache |
119 | | - // and whether that is longer then MAX_AGE |
120 | | - $mtime = filemtime( $filename ); |
121 | | - $age = time() - $mtime; |
122 | | - if ( $this->MAX_AGE > $age ) { |
123 | | - // object exists and is current |
124 | | - return 'HIT'; |
125 | | - } else { |
126 | | - // object exists but is old |
127 | | - return 'STALE'; |
128 | | - } |
129 | | - } else { |
130 | | - // object does not exist |
131 | | - return 'MISS'; |
132 | | - } |
133 | | - } |
134 | | - |
135 | | - /** |
136 | | - * @param $cache_key String: cache key, consisting of the URL + output enc. |
137 | | - * @return Integer |
138 | | - */ |
139 | | - function cache_age( $cache_key ) { |
140 | | - $filename = $this->file_name( $url ); |
141 | | - if ( file_exists( $filename ) ) { |
142 | | - $mtime = filemtime( $filename ); |
143 | | - $age = time() - $mtime; |
144 | | - return $age; |
145 | | - } else { |
146 | | - return -1; |
147 | | - } |
148 | | - } |
149 | | - |
150 | | - /** |
151 | | - * Map URL to location in cache. |
152 | | - * @param $url String: URL from which the RSS file was fetched |
153 | | - * @return String: file name |
154 | | - */ |
155 | | - function file_name( $url ) { |
156 | | - $filename = md5( $url ); |
157 | | - return join( DIRECTORY_SEPARATOR, array( $this->BASE_CACHE, $filename ) ); |
158 | | - } |
159 | | - |
160 | | -} |
\ No newline at end of file |
Index: trunk/extensions/RSS/RSSFetch.php |
— | — | @@ -1,246 +0,0 @@ |
2 | | -<?php |
3 | | -/** |
4 | | - * A simple functional interface to fetching and parsing RSS files, via the |
5 | | - * function fetch_rss(). |
6 | | - * |
7 | | - * @file |
8 | | - */ |
9 | | - |
10 | | -/** |
11 | | - * Globals - redefine these in your script to change the |
12 | | - * behaviour of fetch_rss() currently, most options effect the cache |
13 | | - * |
14 | | - * $wgRSSCache - Should we cache parsed RSS objects? |
15 | | - * |
16 | | - * $wgRSSCacheDirectory - Where should we cache parsed RSS objects? |
17 | | - * This should be a location that the webserver can write to. If this |
18 | | - * directory does not already exist, We will try to be smart and create it. |
19 | | - * This will often fail for permissions reasons. |
20 | | - * |
21 | | - * $wgRSSCacheAge - How long to store cached RSS objects (in seconds)?. |
22 | | - * |
23 | | - * $wgRSSCacheFreshOnly - If remote fetch fails, throw an error |
24 | | - * instead of returning stale object? |
25 | | - */ |
26 | | - |
27 | | -$RSS_FETCH_ERROR = ''; |
28 | | - |
29 | | -/** |
30 | | - * Return RSS object for the given URL, maintaining caching. |
31 | | - * |
32 | | - * NOTES ON CACHING: |
33 | | - * If caching is on ($wgRSSCache) fetch_rss will first check the cache. |
34 | | - * |
35 | | - * NOTES ON RETRIEVING REMOTE FILES: |
36 | | - * If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will |
37 | | - * return a cached object, and touch the cache object upon recieving a 304. |
38 | | - * |
39 | | - * NOTES ON FAILED REQUESTS: |
40 | | - * If there is an HTTP error while fetching an RSS object, the cached version |
41 | | - * will be returned, if it exists (and if $wgRSSCacheFreshOnly is off) |
42 | | - * |
43 | | - * @param $url String: URL of RSS file |
44 | | - * @return parsed RSS object (see RSSParse) |
45 | | - */ |
46 | | -function fetch_rss( $url ) { |
47 | | - global $wgRSSCache, $wgRSSCacheAge, $wgRSSCacheFreshOnly; |
48 | | - global $wgRSSCacheDirectory, $wgRSSFetchTimeout; |
49 | | - global $wgRSSOutputEncoding, $wgRSSInputEncoding; |
50 | | - global $wgRSSDetectEncoding, $wgRSSUseGzip; |
51 | | - |
52 | | - $nameValue = array( |
53 | | - 'wgRSSCache' => true, |
54 | | - 'wgRSSCacheAge' => 60 * 60, // one hour |
55 | | - 'wgRSSCacheFreshOnly' => false, |
56 | | - 'wgRSSCacheDirectory' => '/extensions/RSS/cache', |
57 | | - 'wgRSSOutputEncoding' => 'ISO-8859-1', |
58 | | - 'wgRSSInputEncoding' => null, |
59 | | - 'wgRSSDetectEncoding' => true, |
60 | | - 'wgRSSFetchTimeout' => 5, // 5 second timeout |
61 | | - 'wgRSSUseGzip' => true |
62 | | - ); |
63 | | - |
64 | | - foreach ( $nameValue as $n => $v ) { |
65 | | - if ( !isset( $GLOBALS[$n] ) ) { |
66 | | - $GLOBALS[$n] = $v; |
67 | | - } |
68 | | - } |
69 | | - |
70 | | - if ( !isset( $url ) ) { |
71 | | - wfDebugLog( 'RSS', 'fetch_rss (RSSFetch.php) called without a URL!' ); |
72 | | - return false; |
73 | | - } |
74 | | - |
75 | | - // if cache is disabled |
76 | | - if ( !$wgRSSCache ) { |
77 | | - // fetch file, and parse it |
78 | | - $resp = _fetch_remote_file( $url ); |
79 | | - $errs = $resp->getErrorsArray(); |
80 | | - if ( count( $errs ) == 0 ) { |
81 | | - return _response_to_rss( $resp ); |
82 | | - } else { |
83 | | - wfDebugLog( 'RSS', "Failed to fetch $url and cache is off" ); |
84 | | - return false; |
85 | | - } |
86 | | - } else { // else cache is ON |
87 | | - // Flow |
88 | | - // 1. check cache |
89 | | - // 2. if there is a hit, make sure its fresh |
90 | | - // 3. if cached obj fails freshness check, fetch remote |
91 | | - // 4. if remote fails, return stale object, or error |
92 | | - $cache = new RSSCache( $wgRSSCacheDirectory, $wgRSSCacheAge ); |
93 | | - |
94 | | - if ( $cache->ERROR ) { |
95 | | - wfDebugLog( |
96 | | - 'RSS', |
97 | | - 'cache error on RSSFetch.php! Error msg: ' . |
98 | | - $cache->ERROR |
99 | | - ); |
100 | | - } |
101 | | - |
102 | | - $cache_status = 0; // response of check_cache |
103 | | - $request_headers = array(); // HTTP headers to send with fetch |
104 | | - $rss = 0; // parsed RSS object |
105 | | - $errormsg = 0; // errors, if any |
106 | | - |
107 | | - // store parsed XML by desired output encoding |
108 | | - // as character munging happens at parse time |
109 | | - $cache_key = $url . $wgRSSOutputEncoding; |
110 | | - |
111 | | - if ( !$cache->ERROR ) { |
112 | | - // return cache HIT, MISS, or STALE |
113 | | - $cache_status = $cache->check_cache( $cache_key ); |
114 | | - } |
115 | | - |
116 | | - // if object cached, and cache is fresh, return cached obj |
117 | | - if ( $cache_status == 'HIT' ) { |
118 | | - $rss = $cache->get( $cache_key ); |
119 | | - if ( isset( $rss ) && $rss ) { |
120 | | - // should be cache age |
121 | | - $rss->from_cache = 1; |
122 | | - wfDebugLog( 'RSS', 'Cache HIT' ); |
123 | | - return $rss; |
124 | | - } |
125 | | - } |
126 | | - |
127 | | - // else attempt a conditional get |
128 | | - |
129 | | - // setup headers |
130 | | - if ( $cache_status == 'STALE' ) { |
131 | | - $rss = $cache->get( $cache_key ); |
132 | | - if ( $rss && $rss->etag && $rss->last_modified ) { |
133 | | - $request_headers['If-None-Match'] = $rss->etag; |
134 | | - $request_headers['If-Last-Modified'] = $rss->last_modified; |
135 | | - } |
136 | | - } |
137 | | - |
138 | | - $resp = _fetch_remote_file( $url, $request_headers ); |
139 | | - |
140 | | - if ( isset( $resp ) && $resp ) { |
141 | | - if ( $resp->getStatus() === 304 ) { |
142 | | - // we have the most current copy |
143 | | - wfDebugLog( 'RSS', "Got 304 for $url" ); |
144 | | - // reset cache on 304 (at minutillo insistent prodding) |
145 | | - $cache->set( $cache_key, $rss ); |
146 | | - return $rss; |
147 | | - } elseif ( $resp->getStatus() >= 200 && $resp->getStatus() < 300 ) { |
148 | | - $rss = _response_to_rss( $resp ); |
149 | | - if ( $rss ) { |
150 | | - wfDebugLog( 'RSS', 'Fetch successful' ); |
151 | | - // add object to cache |
152 | | - $cache->set( $cache_key, $rss ); |
153 | | - return $rss; |
154 | | - } |
155 | | - } else { |
156 | | - $errormsg = "Failed to fetch $url "; |
157 | | - if ( $resp->getStatus() === -100 ) { |
158 | | - global $wgRSSFetchTimeout; |
159 | | - $errormsg .= '(Request timed out after ' . $wgRSSFetchTimeout . ' seconds)'; |
160 | | - } elseif ( $resp->error ) { |
161 | | - $http_error = substr( $resp->error, 0, -2 ); |
162 | | - $errormsg .= "(HTTP Error: $http_error)"; |
163 | | - } else { |
164 | | - $errormsg .= '(HTTP Response: ' . $resp->response_code . ')'; |
165 | | - } |
166 | | - } |
167 | | - } else { |
168 | | - $errormsg = 'Unable to retrieve RSS file for unknown reasons.'; |
169 | | - } |
170 | | - |
171 | | - // else fetch failed |
172 | | - |
173 | | - // attempt to return cached object |
174 | | - if ( $rss ) { |
175 | | - wfDebugLog( 'RSS', "Returning STALE object for $url" ); |
176 | | - return $rss; |
177 | | - } |
178 | | - |
179 | | - // else we totally failed |
180 | | - $RSS_FETCH_ERROR = $errormsg; |
181 | | - wfDebugLog( 'RSS', |
182 | | - 'RSSFetch: we totally failed :-( Error message:' . |
183 | | - $errormsg |
184 | | - ); |
185 | | - |
186 | | - return false; |
187 | | - } // end if ( !$wgRSSCache ) { |
188 | | -} // end fetch_rss() |
189 | | - |
190 | | -/** |
191 | | - * Retrieve an arbitrary remote file. |
192 | | - * @param $url String: URL of the remote file |
193 | | - * @param $headers Array: headers to send along with the request |
194 | | - * @return an HTTP response object |
195 | | - */ |
196 | | -function _fetch_remote_file( $url, $headers = '' ) { |
197 | | - global $wgRSSFetchTimeout, $wgRSSUseGzip; |
198 | | - |
199 | | - $client = |
200 | | - HttpRequest::factory( $url, array( 'timeout' => $wgRSSFetchTimeout ) ); |
201 | | - $client->setUserAgent( 'MediaWikiRSS/0.01 (+http://www.mediawiki.org/wiki/Extension:RSS) / MediaWiki RSS extension' ); |
202 | | - /* $client->use_gzip = $wgRSSUseGzip; */ |
203 | | - if ( is_array( $headers ) && count( $headers ) > 0 ) { |
204 | | - foreach ( $headers as $h ) { |
205 | | - if ( count( $h ) > 1 ) { |
206 | | - $client->setHeader( $h[0], $h[1] ); |
207 | | - } |
208 | | - } |
209 | | - } |
210 | | - |
211 | | - $fetch = $client->execute(); |
212 | | - |
213 | | - /* @$client->fetch( $url ); */ |
214 | | - if ( $fetch->isGood() ) { |
215 | | - return $client; |
216 | | - } else { |
217 | | - wfDebugLog( 'RSS', 'error fetching $url: ' . $fetch->getWikiText() ); |
218 | | - } |
219 | | -} |
220 | | - |
221 | | -/** |
222 | | - * Parse an HTTP response object into an RSS object. |
223 | | - * @param $resp Object: an HTTP response object (see Snoopy) |
224 | | - * @return parsed RSS object (see RSSParse) or false |
225 | | - */ |
226 | | -function _response_to_rss( $resp ) { |
227 | | - global $wgRSSOutputEncoding, $wgRSSInputEncoding, $wgRSSDetectEncoding; |
228 | | - $rss = new RSSData( $resp ); |
229 | | - |
230 | | - // if RSS parsed successfully |
231 | | - if ( $rss && !$rss->ERROR ) { |
232 | | - // find Etag and Last-Modified |
233 | | - |
234 | | - return $rss; |
235 | | - } else { // else construct error message |
236 | | - $errormsg = 'Failed to parsex RSS file.'; |
237 | | - |
238 | | - if ( $rss ) { |
239 | | - $errormsg .= ' (' . $rss->ERROR . ')'; |
240 | | - } |
241 | | - $RSS_FETCH_ERROR = $errormsg; |
242 | | - wfDebugLog( 'RSS', 'error!' . $errormsg ); |
243 | | - |
244 | | - return false; |
245 | | - } // end if ( $rss && !$rss->ERROR ) |
246 | | -} |
247 | | - |
Index: trunk/extensions/RSS/RSSData.php |
— | — | @@ -1,21 +1,14 @@ |
2 | 2 | <?php |
3 | 3 | |
4 | 4 | class RSSData { |
5 | | - public $etag; |
6 | | - public $last_modified; |
7 | 5 | public $ERROR; |
8 | | - public $xml; |
9 | 6 | public $items; |
10 | 7 | |
11 | | - function __construct( $resp ) { |
12 | | - $this->xml = new DOMDocument; |
13 | | - $this->xml->loadXML( $resp->getContent() ); |
14 | | - $h = $resp->getResponseHeader( 'ETag' ); |
15 | | - $this->etag = $h; |
16 | | - $h = $resp->getResponseHeader( 'Last-Modified' ); |
17 | | - $this->last_modified = $h; |
18 | | - |
19 | | - $xpath = new DOMXPath( $this->xml ); |
| 8 | + function __construct( $xml ) { |
| 9 | + if( !( $xml instanceOf DOMDocument ) ) { |
| 10 | + return null; |
| 11 | + } |
| 12 | + $xpath = new DOMXPath( $xml ); |
20 | 13 | $items = $xpath->evaluate( '/rss/channel/item' ); |
21 | 14 | |
22 | 15 | foreach ( $items as $item ) { |
Index: trunk/extensions/RSS/RSS.php |
— | — | @@ -44,220 +44,353 @@ |
45 | 45 | $dir = dirname( __FILE__ ) . '/'; |
46 | 46 | $wgExtensionMessagesFiles['RSS'] = $dir . 'RSS.i18n.php'; |
47 | 47 | $wgAutoloadClasses['RSSData'] = $dir . 'RSSData.php'; |
48 | | -$wgAutoloadClasses['RSSCache'] = $dir . 'RSSCache.php'; |
49 | 48 | |
50 | | -$wgHooks['ParserFirstCallInit'][] = 'wfRssExtension'; |
| 49 | +$wgHooks['ParserFirstCallInit'][] = 'RSS::parserInit'; |
51 | 50 | |
52 | | -# Extension hook callback function |
53 | | -function wfRssExtension( &$parser ) { |
54 | | - # Install parser hook for <rss> tags |
55 | | - $parser->setHook( 'rss', 'renderRss' ); |
56 | | - return true; |
57 | | -} |
| 51 | +$wgRSSCacheAge = 3600; // one hour |
| 52 | +$wgRSSCacheFreshOnly = false; |
| 53 | +$wgRSSOutputEncoding = 'ISO-8859-1'; |
| 54 | +$wgRSSInputEncoding = null; |
| 55 | +$wgRSSDetectEncoding = true; |
| 56 | +$wgRSSFetchTimeout = 5; // 5 second timeout |
| 57 | +$wgRSSUseGzip = true; |
58 | 58 | |
59 | | -# Parser hook callback function |
60 | | -function renderRss( $input, $args, $parser, $frame ) { |
61 | | - global $wgOutputEncoding; |
| 59 | +class RSS { |
| 60 | + protected $charset; |
| 61 | + protected $maxheads = 32; |
| 62 | + protected $reversed = false; |
| 63 | + protected $highlight = array(); |
| 64 | + protected $filter = array(); |
| 65 | + protected $filterOut = array(); |
| 66 | + protected $itemTemplate; |
| 67 | + protected $url; |
| 68 | + protected $etag; |
| 69 | + protected $last_modified; |
| 70 | + protected $xml; |
| 71 | + protected $ERROR; |
62 | 72 | |
63 | | - // Kill parser cache |
64 | | - $parser->disableCache(); |
| 73 | + public $client; |
65 | 74 | |
66 | | - if ( !$input ) { |
67 | | - return ''; # if <rss>-section is empty, return nothing |
| 75 | + static function parserInit( $parser ) { |
| 76 | + # Install parser hook for <rss> tags |
| 77 | + $parser->setHook( 'rss', array( __CLASS__, 'renderRss' ) ); |
| 78 | + return true; |
68 | 79 | } |
69 | 80 | |
70 | | - # Parse fields in rss section |
71 | | - $url = $input; |
| 81 | + # Parser hook callback function |
| 82 | + static function renderRss( $input, $args, $parser, $frame ) { |
| 83 | + if ( !$input ) { |
| 84 | + return ''; # if <rss>-section is empty, return nothing |
| 85 | + } |
| 86 | + $parser->disableCache(); |
72 | 87 | |
73 | | - # Get charset from argument array |
74 | | - if ( isset( $args['charset'] ) ) { |
75 | | - $charset = $args['charset']; |
76 | | - } else { |
77 | | - $charset = $wgOutputEncoding; |
78 | | - } |
| 88 | + $rss = new RSS($input, $args); |
79 | 89 | |
80 | | - # Get max number of headlines from argument-array |
81 | | - if ( isset( $args['max'] ) ) { |
82 | | - $maxheads = $args['max']; |
83 | | - } else { |
84 | | - $maxheads = 32; |
85 | | - } |
| 90 | + $status = $rss->fetch(); |
86 | 91 | |
87 | | - # Get short flag from argument array |
88 | | - # If short is set, no description text is printed |
89 | | - if ( isset( $args['short'] ) ) { |
90 | | - $short = true; |
91 | | - } else { |
92 | | - $short = false; |
93 | | - } |
| 92 | + # Check for errors. |
| 93 | + if ( $status === false || !is_array( $rss->rss->items ) ) |
| 94 | + return wfMsg( 'rss-empty', $input ); |
94 | 95 | |
95 | | - # Get reverse flag from argument array |
96 | | - if ( isset( $args['reverse'] ) ) { |
97 | | - $rss->items = array_reverse( $rss->items ); |
98 | | - } |
| 96 | + if ( isset( $rss->ERROR ) ) |
| 97 | + return wfMsg( 'rss-error', $rss->ERROR ); |
99 | 98 | |
100 | | - # Get date format from argument array |
101 | | - if ( isset( $args['date'] ) ) { |
102 | | - $date = $args['date']; |
103 | | - } else { |
104 | | - $date = 'd M Y H:i'; |
| 99 | + return $rss->renderFeed($parser, $frame); |
105 | 100 | } |
106 | 101 | |
107 | | - # Get highlight terms from argument array |
108 | | - if ( isset( $args['highlight'] ) ) { |
109 | | - $rssHighlight = $args['highlight']; |
110 | | - $rssHighlight = str_replace( ' ', ' ', $rssHighlight ); |
111 | | - $rssHighlight = explode( ' ', trim( $rssHighlight ) ); |
112 | | - } else { |
113 | | - $rssHighlight = false; |
| 102 | + static function explodeOnSpaces( $str ) { |
| 103 | + $found = preg_split( '# +#', $str ); |
| 104 | + return is_array( $found ) ? $found : array(); |
114 | 105 | } |
115 | 106 | |
116 | | - # Get filter terms from argument array |
117 | | - if ( isset( $args['filter'] ) ) { |
118 | | - $rssFilter = $args['filter']; |
119 | | - $rssFilter = str_replace( ' ', ' ', $rssFilter ); |
120 | | - $rssFilter = explode( ' ', trim( $rssFilter ) ); |
121 | | - } else { |
122 | | - $rssFilter = false; |
123 | | - } |
| 107 | + function __construct($url, $args) { |
124 | 108 | |
125 | | - # Filterout terms |
126 | | - if ( isset( $args['filterout'] ) ) { |
127 | | - $rssFilterout = $args['filterout']; |
128 | | - $rssFilterout = str_replace( ' ', ' ', $rssFilterout ); |
129 | | - $rssFilterout = explode( ' ', trim( $rssFilterout ) ); |
130 | | - } else { |
131 | | - $rssFilterout = false; |
132 | | - } |
| 109 | + if( isset($url) ) { |
| 110 | + $this->url = $url; |
| 111 | + } |
133 | 112 | |
134 | | - if ( isset( $args['template'] ) ) { |
135 | | - $template = 'Template:' . $args['template']; |
136 | | - } else { |
137 | | - $template = wfMsgNoTrans( 'rss-item' ); |
138 | | - } |
| 113 | + # Get charset from argument array |
| 114 | + if ( isset( $args['charset'] ) ) { |
| 115 | + $this->charset = $args['charset']; |
| 116 | + } else { |
| 117 | + global $wgOutputEncoding; |
| 118 | + $args['charset'] = $wgOutputEncoding; |
| 119 | + } |
139 | 120 | |
140 | | - $headcnt = 0; |
| 121 | + # Get max number of headlines from argument-array |
| 122 | + if ( isset( $args['max'] ) ) { |
| 123 | + $this->maxheads = $args['max']; |
| 124 | + } |
141 | 125 | |
142 | | - # Fetch RSS. May be cached locally. |
143 | | - # Refer to the documentation of MagpieRSS for details. |
144 | | - if ( !function_exists( 'fetch_rss' ) ) { |
145 | | - include( dirname( __FILE__ ) . '/RSSFetch.php' ); // provides fetch_rss() function |
| 126 | + # Get reverse flag from argument array |
| 127 | + if ( isset( $args['reverse'] ) ) { |
| 128 | + $this->reversed = true; |
| 129 | + } |
| 130 | + |
| 131 | + # Get date format from argument array |
| 132 | + # FIXME: not used yet |
| 133 | + if ( isset( $args['date'] ) ) { |
| 134 | + $this->date = $args['date']; |
| 135 | + } |
| 136 | + |
| 137 | + # Get highlight terms from argument array |
| 138 | + if ( isset( $args['highlight'] ) ) { |
| 139 | + $this->highlight = self::explodeOnSpaces( $args['highlight'] ); |
| 140 | + } |
| 141 | + |
| 142 | + # Get filter terms from argument array |
| 143 | + if ( isset( $args['filter'] ) ) { |
| 144 | + $this->filter = self::explodeOnSpaces( $args['filter'] ); |
| 145 | + } |
| 146 | + |
| 147 | + if ( isset( $args['filterout'] ) ) { |
| 148 | + $this->filterOut = self::explodeOnSpaces( $args['filterout'] ); |
| 149 | + |
| 150 | + } |
| 151 | + |
| 152 | + if ( isset( $args['template'] ) ) { |
| 153 | + $titleObject = Title::newFromText($args['template'], NS_TEMPLATE); |
| 154 | + $article = new Article($titleObject, 0); |
| 155 | + $this->itemTemplate = $article->fetchContent(0); |
| 156 | + } else { |
| 157 | + $this->itemTemplate = wfMsgNoTrans( 'rss-item' ); |
| 158 | + } |
146 | 159 | } |
147 | | - $rss = fetch_rss( $url ); |
148 | 160 | |
149 | | - # Check for errors. |
150 | | - if ( empty( $rss ) ) { |
151 | | - return wfMsg( 'rss-empty', $url ); |
| 161 | + /** |
| 162 | + * Return RSS object for the given URL, maintaining caching. |
| 163 | + * |
| 164 | + * NOTES ON RETRIEVING REMOTE FILES: |
| 165 | + * If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will |
| 166 | + * return a cached object, and touch the cache object upon recieving a 304. |
| 167 | + * |
| 168 | + * NOTES ON FAILED REQUESTS: |
| 169 | + * If there is an HTTP error while fetching an RSS object, the cached version |
| 170 | + * will be returned, if it exists (and if $wgRSSCacheFreshOnly is off |
| 171 | + * |
| 172 | + * @param $url String: URL of RSS file |
| 173 | + * @return boolean true if the fetch worked. |
| 174 | + */ |
| 175 | + function fetch( ) { |
| 176 | + global $wgRSSCacheAge, $wgRSSCacheFreshOnly; |
| 177 | + global $wgRSSCacheDirectory, $wgRSSFetchTimeout; |
| 178 | + global $wgRSSOutputEncoding, $wgRSSInputEncoding; |
| 179 | + global $wgRSSDetectEncoding, $wgRSSUseGzip; |
| 180 | + |
| 181 | + if ( !isset( $this->url ) ) { |
| 182 | + wfDebugLog( 'RSS: fetch called without a URL!' ); |
| 183 | + return false; |
| 184 | + } |
| 185 | + |
| 186 | + // Flow |
| 187 | + // 1. check cache |
| 188 | + // 2. if there is a hit, make sure its fresh |
| 189 | + // 3. if cached obj fails freshness check, fetch remote |
| 190 | + // 4. if remote fails, return stale object, or error |
| 191 | + $key = wfMemcKey( $this->url ); |
| 192 | + $cachedFeed = $this->loadFromCache($key); |
| 193 | + if( $cachedFeed !== false ) { |
| 194 | + wfDebugLog( 'RSS', 'Outputting cached feed for '.$this->url ); |
| 195 | + return true; |
| 196 | + } |
| 197 | + wfDebugLog( 'RSS', 'Cache Failed '.$this->url ); |
| 198 | + |
| 199 | + $status = $this->fetchRemote($key); |
| 200 | + return $status; |
152 | 201 | } |
153 | 202 | |
154 | | - if ( $rss->ERROR ) { |
155 | | - return '<div>' . wfMsg( 'rss-error', $url, $rss->ERROR ) . '</div>'; |
| 203 | + function loadFromCache( $key ) { |
| 204 | + global $parserMemc; |
| 205 | + |
| 206 | + $data = $parserMemc->get($key); |
| 207 | + if ($data === false) { |
| 208 | + return false; |
| 209 | + } |
| 210 | + |
| 211 | + list($etag, $last_modified, $rss) = |
| 212 | + unserialize($data); |
| 213 | + |
| 214 | + if( !isset( $rss->items ) ) { |
| 215 | + return false; |
| 216 | + } |
| 217 | + |
| 218 | + # Now that we've verified that we got useful data, keep it around. |
| 219 | + $this->rss = $rss; |
| 220 | + $this->etag = $etag; |
| 221 | + $this->last_modified = $last_modified; |
| 222 | + |
| 223 | + return true; |
156 | 224 | } |
157 | 225 | |
158 | | - if ( !is_array( $rss->items ) ) { |
159 | | - return '<div>' . wfMsg( 'rss-empty', $url ) . '</div>'; |
| 226 | + function storeInCache( $key ) { |
| 227 | + global $parserMemc, $wgRSSCacheAge; |
| 228 | + |
| 229 | + if( isset( $this->rss ) ) { |
| 230 | + return $parserMemc->set($key, |
| 231 | + serialize( array($this->etag, $this->last_modified, |
| 232 | + $this->rss) ), $wgRSSCacheAge); |
| 233 | + } |
160 | 234 | } |
161 | 235 | |
162 | | - $output = ''; |
163 | | - |
164 | 236 | /** |
165 | | - * This would be better served by preg_replace_callback, but |
166 | | - * I can't create a callback that carries $item in PHP < 5.3 |
| 237 | + * Retrieve a feed. |
| 238 | + * @param $url String: URL of the feed. |
| 239 | + * @param $headers Array: headers to send along with the request |
| 240 | + * @return Status object |
167 | 241 | */ |
168 | | - if ( $template ) { |
169 | | - $headcnt = 0; |
170 | | - foreach ( $rss->items as $item ) { |
171 | | - if ( $maxheads > 0 && $headcnt >= $maxheads ) { |
172 | | - continue; |
| 242 | + protected function fetchRemote( $key, $headers = '' ) { |
| 243 | + global $wgRSSFetchTimeout, $wgRSSUseGzip; |
| 244 | + |
| 245 | + if ( $this->etag ) { |
| 246 | + wfDebugLog( 'RSS', 'Used etag: '.$this->etag ); |
| 247 | + $headers['If-None-Match'] = $this->etag; |
| 248 | + } |
| 249 | + if ( $this->last_modified ) { |
| 250 | + wfDebugLog( 'RSS', 'Used last modified: '.$this->last_modified ); |
| 251 | + $headers['If-Last-Modified'] = $this->last_modified; |
| 252 | + } |
| 253 | + |
| 254 | + $client = |
| 255 | + HttpRequest::factory( $this->url, array( 'timeout' => $wgRSSFetchTimeout ) ); |
| 256 | + $client->setUserAgent( 'MediaWikiRSS/0.01 (+http://www.mediawiki.org/wiki/Extension:RSS) / MediaWiki RSS extension' ); |
| 257 | + /* $client->use_gzip = $wgRSSUseGzip; */ |
| 258 | + if ( is_array( $headers ) && count( $headers ) > 0 ) { |
| 259 | + foreach ( $headers as $h ) { |
| 260 | + if ( count( $h ) > 1 ) { |
| 261 | + $client->setHeader( $h[0], $h[1] ); |
| 262 | + } |
173 | 263 | } |
| 264 | + } |
174 | 265 | |
175 | | - $decision = true; |
176 | | - foreach ( array( 'title', 'author', 'description', 'category' ) as $check ) { |
177 | | - if ( isset( $item[$check] ) ) { |
178 | | - $decision &= wfRssFilter( $item[$check], $rssFilter ) & wfRssFilterout( $item[$check], $rssFilterout ); |
179 | | - if ( !$decision ) { |
180 | | - continue 2; |
181 | | - } |
| 266 | + $fetch = $client->execute(); |
| 267 | + $this->client = $client; |
182 | 268 | |
183 | | - $item[$check] = wfRssHighlight( $item[$check], $rssHighlight ); |
| 269 | + if ( !$fetch->isGood() ) { |
| 270 | + wfDebug( 'RSS', 'Request Failed: '.$fetch->getWikiText() ); |
| 271 | + return $fetch; |
| 272 | + } |
| 273 | + |
| 274 | + $ret = $this->responseToXML($key); |
| 275 | + return $ret; |
| 276 | + } |
| 277 | + |
| 278 | + function renderFeed( $parser, $frame ) { |
| 279 | + $output = ""; |
| 280 | + if ( $this->itemTemplate ) { |
| 281 | + $headcnt = 0; |
| 282 | + if ($this->reversed) { |
| 283 | + $this->rss->items = array_reverse( $this->rss->items ); |
| 284 | + } |
| 285 | + |
| 286 | + foreach ( $this->rss->items as $item ) { |
| 287 | + if ( $this->maxheads > 0 && $headcnt >= $this->maxheads ) { |
| 288 | + continue; |
184 | 289 | } |
185 | 290 | |
| 291 | + if ( $this->canDisplay( $item ) ) { |
| 292 | + $output .= $this->renderItem( $item, $parser, $frame ); |
| 293 | + $headcnt++; |
| 294 | + } |
186 | 295 | } |
| 296 | + } |
| 297 | + return $output; |
| 298 | + } |
187 | 299 | |
188 | | - $rssTemp = ''; |
| 300 | + function renderItem( $item, $parser, $frame ) { |
| 301 | + $parts = explode( '|', $this->itemTemplate ); |
189 | 302 | |
190 | | - foreach ( explode( '|', $template ) as $bit ) { |
191 | | - $bits = explode( '=', $bit ); |
| 303 | + $output = ""; |
| 304 | + if( count( $parts ) > 1 && isset( $parser ) && isset( $frame ) ) { |
| 305 | + $rendered = array(); |
| 306 | + foreach( $parts as $part ) { |
| 307 | + $bits = explode( '=', $part ); |
| 308 | + $left = null; |
| 309 | + |
192 | 310 | if ( count( $bits ) == 2 ) { |
193 | 311 | $left = trim( $bits[0] ); |
| 312 | + } |
194 | 313 | |
195 | | - if ( isset( $item[$left] ) ) { |
196 | | - $right = $item[$left]; |
197 | | - } |
198 | | - |
199 | | - $rssTemp .= implode( ' = ', array( $left, $right ) ); |
| 314 | + if ( isset( $item[$left] ) ) { |
| 315 | + $leftValue = preg_replace( '#{{{'.$left.'}}}#', $item[$left], $bits[1] ); |
| 316 | + $rendered[] = implode( '=', array( $left, $leftValue ) ); |
200 | 317 | } else { |
201 | | - $rssTemp .= $bit; |
| 318 | + $rendered[] = $part; |
202 | 319 | } |
203 | | - $rssTemp .= '|'; |
204 | 320 | } |
205 | | - $rssTemp .= '}}'; |
206 | | - |
| 321 | + $rssTemp = implode(" | ", $rendered); |
207 | 322 | $output .= $parser->recursiveTagParse( $rssTemp, $frame ); |
208 | | - $headcnt++; |
209 | 323 | } |
| 324 | + return $output; |
210 | 325 | } |
211 | | - return $output; |
212 | | -} |
213 | 326 | |
214 | | -function wfRssFilter( $text, $rssFilter ) { |
215 | | - $display = true; |
216 | | - if ( is_array( $rssFilter ) ) { |
217 | | - foreach ( $rssFilter as $term ) { |
218 | | - if ( $term ) { |
219 | | - $display = false; |
220 | | - if ( preg_match( "|$term|i", $text, $a ) ) { |
221 | | - $display = true; |
222 | | - return $display; |
223 | | - } |
224 | | - } |
225 | | - if ( $display ) { |
226 | | - break; |
227 | | - } |
| 327 | + /** |
| 328 | + * Parse an HTTP response object into an RSS object. |
| 329 | + * @param $resp Object: an HTTP response object (see Snoopy) |
| 330 | + * @return parsed RSS object (see RSSParse) or false |
| 331 | + */ |
| 332 | + function responseToXML( $key ) { |
| 333 | + $this->xml = new DOMDocument; |
| 334 | + $this->xml->loadXML( $this->client->getContent() ); |
| 335 | + $this->rss = new RSSData( $this->xml ); |
| 336 | + |
| 337 | + // if RSS parsed successfully |
| 338 | + if ( $this->rss && !$this->rss->ERROR ) { |
| 339 | + $this->etag = $this->client->getResponseHeader('Etag'); |
| 340 | + $this->last_modified = $this->client->getResponseHeader('Last-Modified'); |
| 341 | + wfDebugLog( 'RSS', 'Stored etag ('.$this->etag.') and Last-Modified ('.$this->last_modified.') and items ('.count($this->rss->items).')!' ); |
| 342 | + $this->storeInCache( $key ); |
| 343 | + |
| 344 | + return Status::newGood(); |
| 345 | + } else { |
| 346 | + return Status::newfatal( 'rss-parse-error', $this->rss->ERROR ); |
228 | 347 | } |
229 | 348 | } |
230 | | - return $display; |
231 | | -} |
232 | 349 | |
233 | | -function wfRssFilterout( $text, $rssFilterout ) { |
234 | | - $display = true; |
235 | | - if ( is_array( $rssFilterout ) ) { |
236 | | - foreach ( $rssFilterout as $term ) { |
| 350 | + function canDisplay( $item ) { |
| 351 | + if($this->filter($item['description'], 'filterOut')) { |
| 352 | + error_log($item['description']); |
| 353 | + return true; |
| 354 | + } |
| 355 | + return false; |
| 356 | + } |
| 357 | + |
| 358 | + function filter( $text, $filterType ) { |
| 359 | + if($filterType === 'filterOut') { |
| 360 | + $keep = false; |
| 361 | + $filter = $this->filterOut; |
| 362 | + } else { |
| 363 | + $keep = true; |
| 364 | + $filter = $this->filter; |
| 365 | + } |
| 366 | + |
| 367 | + if( count($filter) == 0 ) return !$keep; |
| 368 | + |
| 369 | + foreach( $filter as $term ) { |
237 | 370 | if ( $term ) { |
238 | | - if ( preg_match( "|$term|i", $text, $a ) ) { |
239 | | - $display = false; |
240 | | - return $display; |
| 371 | + $match = preg_match( "|$term|i", $text ); |
| 372 | + if ( $match ) { |
| 373 | + return $keep; |
241 | 374 | } |
242 | 375 | } |
| 376 | + return !$keep; |
243 | 377 | } |
| 378 | + |
244 | 379 | } |
245 | | - return $display; |
246 | | -} |
247 | 380 | |
248 | | -function wfRssHighlight( $text, $rssHighlight ) { |
249 | | - $i = 0; |
250 | | - $starttag = 'v8x5u3t3u8h'; |
251 | | - $endtag = 'q8n4f6n4n4x'; |
252 | 381 | |
253 | | - $color[] = 'coral'; |
254 | | - $color[] = 'greenyellow'; |
255 | | - $color[] = 'lightskyblue'; |
256 | | - $color[] = 'gold'; |
257 | | - $color[] = 'violet'; |
258 | | - $count_color = count( $color ); |
| 382 | + function highlightTerms( $text ) { |
| 383 | + $i = 0; |
| 384 | + $starttag = 'v8x5u3t3u8h'; |
| 385 | + $endtag = 'q8n4f6n4n4x'; |
259 | 386 | |
260 | | - if ( is_array( $rssHighlight ) ) { |
261 | | - foreach ( $rssHighlight as $term ) { |
| 387 | + $color[] = 'coral'; |
| 388 | + $color[] = 'greenyellow'; |
| 389 | + $color[] = 'lightskyblue'; |
| 390 | + $color[] = 'gold'; |
| 391 | + $color[] = 'violet'; |
| 392 | + $count_color = count( $color ); |
| 393 | + |
| 394 | + foreach ( $this->highlight as $term ) { |
262 | 395 | if ( $term ) { |
263 | 396 | $text = preg_replace( "|\b(\w*?" . $term . "\w*?)\b|i", "$starttag" . "_" . $i . "\\1$endtag", $text ); |
264 | 397 | $i++; |
— | — | @@ -266,13 +399,13 @@ |
267 | 400 | } |
268 | 401 | } |
269 | 402 | } |
270 | | - } |
271 | 403 | |
272 | | - # To avoid trouble should someone wants to highlight the terms "span", "style", … |
273 | | - for ( $i = 0; $i < 5; $i++ ) { |
274 | | - $text = preg_replace( "|$starttag" . "_" . $i . "|", "<span style=\"background-color:" . $color[$i] . "; font-weight: bold;\">", $text ); |
275 | | - $text = preg_replace( "|$endtag|", '</span>', $text ); |
| 404 | + # To avoid trouble should someone wants to highlight the terms "span", "style", … |
| 405 | + for ( $i = 0; $i < 5; $i++ ) { |
| 406 | + $text = preg_replace( "|$starttag" . "_" . $i . "|", "<span style=\"background-color:" . $color[$i] . "; font-weight: bold;\">", $text ); |
| 407 | + $text = preg_replace( "|$endtag|", '</span>', $text ); |
| 408 | + } |
| 409 | + |
| 410 | + return $text; |
276 | 411 | } |
277 | | - |
278 | | - return $text; |
279 | | -} |
| 412 | +} |
\ No newline at end of file |