Index: trunk/extensions/RSS/RSSHooks.php |
— | — | @@ -0,0 +1,46 @@ |
| 2 | +<?php |
| 3 | +class RSSHooks { |
| 4 | + /** |
| 5 | + * Tell the parser how to handle <rss> elements |
| 6 | + * @param Parser Object |
| 7 | + */ |
| 8 | + static function parserInit( $parser ) { |
| 9 | + # Install parser hook for <rss> tags |
| 10 | + $parser->setHook( 'rss', array( __CLASS__, 'renderRss' ) ); |
| 11 | + return true; |
| 12 | + } |
| 13 | + |
| 14 | + /** |
| 15 | + * Static function wrapping RSSParser to handle rendering of RSS elements |
| 16 | + * @param String Text inside the tags. |
| 17 | + * @param Array value associative list of the element attributes and their values. |
| 18 | + * @param Frame parser context |
| 19 | + */ |
| 20 | + static function renderRss( $input, $args, $parser, $frame ) { |
| 21 | + global $wgRSSCacheAge, $wgRSSCacheCompare; |
| 22 | + |
| 23 | + if ( !HTTP::isValidURI( $input ) ) { |
| 24 | + return wfMsg( 'rss-invalid-url', htmlspecialchars( $input ) ); |
| 25 | + } |
| 26 | + if ( $wgRSSCacheCompare ) { |
| 27 | + $timeout = $wgRSSCacheCompare; |
| 28 | + } else { |
| 29 | + $timeout = $wgRSSCacheAge; |
| 30 | + } |
| 31 | + $parser->disableCache(); |
| 32 | + /* $parser->getOutput()->updateCacheExpiry( $timeout ); */ |
| 33 | + |
| 34 | + $rss = new RSSParser( $input, $args ); |
| 35 | + |
| 36 | + $status = $rss->fetch(); |
| 37 | + |
| 38 | + # Check for errors. |
| 39 | + if ( !$status->isGood() ) |
| 40 | + return wfMsg( 'rss-error', htmlspecialchars( $input), $status->getWikiText() ); |
| 41 | + |
| 42 | + if ( !$status->isGood() || !is_object( $rss->rss ) || !is_array( $rss->rss->items ) ) |
| 43 | + return wfMsg( 'rss-empty', htmlspecialchars( $input ) ); |
| 44 | + |
| 45 | + return $rss->renderFeed( $parser, $frame ); |
| 46 | + } |
| 47 | +} |
Property changes on: trunk/extensions/RSS/RSSHooks.php |
___________________________________________________________________ |
Added: svn:eol-syle |
1 | 48 | + native |
Index: trunk/extensions/RSS/RSSData.php |
— | — | @@ -4,32 +4,52 @@ |
5 | 5 | public $ERROR; |
6 | 6 | public $items; |
7 | 7 | |
| 8 | + /** |
| 9 | + * Constructor, takes a DOMDocument and returns an array of parsed items. |
| 10 | + * @param DOMDocument The pre-parsed XML Document |
| 11 | + * @returns Object RSSData object with a member items that is an array of parsed items, |
| 12 | + */ |
8 | 13 | function __construct( $xml ) { |
9 | 14 | if ( !( $xml instanceOf DOMDocument ) ) { |
10 | | - return null; |
| 15 | + $this->ERROR = "Not passed DOMDocument object."; |
| 16 | + return; |
11 | 17 | } |
12 | 18 | $xpath = new DOMXPath( $xml ); |
13 | | - $items = $xpath->evaluate( '/rss/channel/item' ); |
| 19 | + $items = $xpath->query( '/rss/channel/item' ); |
14 | 20 | |
15 | | - foreach ( $items as $item ) { |
16 | | - $bit = array(); |
17 | | - foreach ( $item->childNodes as $n ) { |
18 | | - $name = $this->rssTokenToName( $n->nodeName ); |
19 | | - if ( $name != null ) { |
20 | | - /* Because for DOMElements the nodeValue is just |
21 | | - * the text of the containing element, without any |
22 | | - * tags, it makes this a safe, if unattractive, |
23 | | - * value to use. If you want to allow people to |
24 | | - * mark up their RSS, some more precautions are |
25 | | - * needed. */ |
26 | | - $bit[$name] = $n->nodeValue; |
| 21 | + if($items->length !== 0) { |
| 22 | + foreach ( $items as $item ) { |
| 23 | + $bit = array(); |
| 24 | + foreach ( $item->childNodes as $n ) { |
| 25 | + $name = $this->rssTokenToName( $n->nodeName ); |
| 26 | + if ( $name != null ) { |
| 27 | + /* Because for DOMElements the nodeValue is just |
| 28 | + * the text of the containing element, without any |
| 29 | + * tags, it makes this a safe, if unattractive, |
| 30 | + * value to use. If you want to allow people to |
| 31 | + * mark up their RSS, some more precautions are |
| 32 | + * needed. */ |
| 33 | + $bit[$name] = $n->nodeValue; |
| 34 | + } |
27 | 35 | } |
| 36 | + $this->items[] = $bit; |
28 | 37 | } |
29 | | - $this->items[] = $bit; |
| 38 | + } else { |
| 39 | + $this->ERROR = "No RSS items found."; |
| 40 | + return; |
30 | 41 | } |
31 | 42 | } |
32 | 43 | |
33 | | - function rssTokenToName( $n ) { |
| 44 | + /** |
| 45 | + * Return a string that will be used to map RSS elements that |
| 46 | + * contain similar data (e.g. dc:date, date, and pubDate) to the |
| 47 | + * same array key. This works on WordPress feeds as-is, but it |
| 48 | + * probably needs a way to concert dc:date format dates to be the |
| 49 | + * same as pubDate. |
| 50 | + * @param String $elementName Name of the element we have |
| 51 | + * @returns String Name to map it to |
| 52 | + */ |
| 53 | + protected function rssTokenToName( $n ) { |
34 | 54 | switch( $n ) { |
35 | 55 | case 'dc:date': |
36 | 56 | return 'date'; |
Index: trunk/extensions/RSS/RSSParser.php |
— | — | @@ -0,0 +1,428 @@ |
| 2 | +<?php |
| 3 | + |
| 4 | +class RSSParser { |
| 5 | + protected $charset; |
| 6 | + protected $maxheads = 32; |
| 7 | + protected $reversed = false; |
| 8 | + protected $highlight = array(); |
| 9 | + protected $filter = array(); |
| 10 | + protected $filterOut = array(); |
| 11 | + protected $itemTemplate; |
| 12 | + protected $url; |
| 13 | + protected $etag; |
| 14 | + protected $lastModified; |
| 15 | + protected $xml; |
| 16 | + protected $ERROR; |
| 17 | + protected $displayFields = array( 'author', 'title', 'encodedContent', 'description' ); |
| 18 | + |
| 19 | + public $client; |
| 20 | + |
| 21 | + /** |
| 22 | + * Convenience function that takes a space-separated string and returns an array of words |
| 23 | + * @param String list of words |
| 24 | + * @returns Array words found |
| 25 | + */ |
| 26 | + private static function explodeOnSpaces( $str ) { |
| 27 | + $found = preg_split( '# +#', $str ); |
| 28 | + return is_array( $found ) ? $found : array(); |
| 29 | + } |
| 30 | + |
| 31 | + /** |
| 32 | + * Take a bit of WikiText that looks like |
| 33 | + * <rss max=5>http://example.com/</rss> |
| 34 | + * and return an object that can produce rendered output. |
| 35 | + */ |
| 36 | + function __construct( $url, $args ) { |
| 37 | + |
| 38 | + $this->url = $url; |
| 39 | + |
| 40 | + # Get charset from argument array |
| 41 | + # FIXME: not used yet |
| 42 | + if ( isset( $args['charset'] ) ) { |
| 43 | + $this->charset = $args['charset']; |
| 44 | + } else { |
| 45 | + global $wgOutputEncoding; |
| 46 | + $this->charset = $wgOutputEncoding; |
| 47 | + } |
| 48 | + |
| 49 | + # Get max number of headlines from argument-array |
| 50 | + if ( isset( $args['max'] ) ) { |
| 51 | + $this->maxheads = $args['max']; |
| 52 | + } |
| 53 | + |
| 54 | + # Get reverse flag from argument array |
| 55 | + if ( isset( $args['reverse'] ) ) { |
| 56 | + $this->reversed = true; |
| 57 | + } |
| 58 | + |
| 59 | + # Get date format from argument array |
| 60 | + # FIXME: not used yet |
| 61 | + if ( isset( $args['date'] ) ) { |
| 62 | + $this->date = $args['date']; |
| 63 | + } |
| 64 | + |
| 65 | + # Get highlight terms from argument array |
| 66 | + if ( isset( $args['highlight'] ) ) { |
| 67 | + # mapping to lowercase here so the regex can be case insensitive below. |
| 68 | + $this->highlight = self::explodeOnSpaces( $args['highlight'] ); |
| 69 | + } |
| 70 | + |
| 71 | + # Get filter terms from argument array |
| 72 | + if ( isset( $args['filter'] ) ) { |
| 73 | + $this->filter = self::explodeOnSpaces( $args['filter'] ); |
| 74 | + } |
| 75 | + |
| 76 | + if ( isset( $args['filterout'] ) ) { |
| 77 | + $this->filterOut = self::explodeOnSpaces( $args['filterout'] ); |
| 78 | + |
| 79 | + } |
| 80 | + |
| 81 | + if ( isset( $args['template'] ) ) { |
| 82 | + $titleObject = Title::newFromText( $args['template'], NS_TEMPLATE ); |
| 83 | + $article = new Article( $titleObject, 0 ); |
| 84 | + $this->itemTemplate = $article->fetchContent( ); |
| 85 | + } else { |
| 86 | + $this->itemTemplate = wfMsgNoTrans( 'rss-item' ); |
| 87 | + } |
| 88 | + } |
| 89 | + |
| 90 | + /** |
| 91 | + * Return RSS object for the given URL, maintaining caching. |
| 92 | + * |
| 93 | + * NOTES ON RETRIEVING REMOTE FILES: |
| 94 | + * No attempt will be made to fetch remote files if there is something in cache. |
| 95 | + * |
| 96 | + * NOTES ON FAILED REQUESTS: |
| 97 | + * If there is an HTTP error while fetching an RSS object, the cached version |
| 98 | + * will be returned, if it exists (and if $wgRSSCacheFreshOnly is false) |
| 99 | + * |
| 100 | + * @param $url String: URL of RSS file |
| 101 | + * @return boolean Status object |
| 102 | + */ |
| 103 | + function fetch( ) { |
| 104 | + global $wgRSSCacheAge, $wgRSSCacheFreshOnly; |
| 105 | + global $wgRSSCacheDirectory, $wgRSSFetchTimeout; |
| 106 | + global $wgRSSOutputEncoding, $wgRSSInputEncoding; |
| 107 | + global $wgRSSDetectEncoding; |
| 108 | + |
| 109 | + if ( !isset( $this->url ) ) { |
| 110 | + return Status::newFatal( 'rss-fetch-nourl' ); |
| 111 | + } |
| 112 | + |
| 113 | + // Flow |
| 114 | + // 1. check cache |
| 115 | + // 2. if there is a hit, make sure its fresh |
| 116 | + // 3. if cached obj fails freshness check, fetch remote |
| 117 | + // 4. if remote fails, return stale object, or error |
| 118 | + $key = wfMemcKey( $this->url ); |
| 119 | + $cachedFeed = $this->loadFromCache( $key ); |
| 120 | + if ( $cachedFeed !== false ) { |
| 121 | + wfDebugLog( 'RSS', 'Outputting cached feed for ' . $this->url ); |
| 122 | + return Status::newGood(); |
| 123 | + } |
| 124 | + wfDebugLog( 'RSS', 'Cache Failed, fetching ' . $this->url. ' from remote.' ); |
| 125 | + |
| 126 | + $status = $this->fetchRemote( $key ); |
| 127 | + return $status; |
| 128 | + } |
| 129 | + |
| 130 | + /** |
| 131 | + * Retrieve the URL from the cache |
| 132 | + * @param string $key lookup key to associate with this item |
| 133 | + * @returns boolean |
| 134 | + */ |
| 135 | + protected function loadFromCache( $key ) { |
| 136 | + global $wgMemc, $wgRSSCacheCompare; |
| 137 | + |
| 138 | + $data = $wgMemc->get( $key ); |
| 139 | + if ( $data === false ) { |
| 140 | + return false; |
| 141 | + } |
| 142 | + |
| 143 | + list( $etag, $lastModified, $rss ) = |
| 144 | + $data; |
| 145 | + |
| 146 | + if ( !isset( $rss->items ) ) { |
| 147 | + return false; |
| 148 | + } |
| 149 | + |
| 150 | + wfDebugLog( 'RSS', "Got '$key' from cache" ); |
| 151 | + |
| 152 | + # Now that we've verified that we got useful data, keep it around. |
| 153 | + $this->rss = $rss; |
| 154 | + $this->etag = $etag; |
| 155 | + $this->lastModified = $lastModified; |
| 156 | + |
| 157 | + // We only care if $wgRSSCacheCompare is > 0 |
| 158 | + if ( $wgRSSCacheCompare && time() - $wgRSSCacheCompare > $lastModified ) { |
| 159 | + wfDebugLog( 'RSS', "Content is old enough that we need to check cached content"); |
| 160 | + return false; |
| 161 | + } |
| 162 | + |
| 163 | + return true; |
| 164 | + } |
| 165 | + |
| 166 | + /** |
| 167 | + * Store this objects (e.g. etag, lastModified, and RSS) in the cache. |
| 168 | + * @param string $key lookup key to associate with this item |
| 169 | + * @returns boolean |
| 170 | + */ |
| 171 | + protected function storeInCache( $key ) { |
| 172 | + global $wgMemc, $wgRSSCacheAge; |
| 173 | + |
| 174 | + if ( !isset( $this->rss ) ) { |
| 175 | + return false; |
| 176 | + } |
| 177 | + $r = $wgMemc->set( $key, |
| 178 | + array( $this->etag, $this->lastModified, $this->rss ), |
| 179 | + $wgRSSCacheAge ); |
| 180 | + |
| 181 | + wfDebugLog( 'RSS', "Stored '$key' as in cache? $r"); |
| 182 | + return true; |
| 183 | + } |
| 184 | + |
| 185 | + /** |
| 186 | + * Retrieve a feed. |
| 187 | + * @param $url String: URL of the feed. |
| 188 | + * @param $headers Array: headers to send along with the request |
| 189 | + * @return Status object |
| 190 | + */ |
| 191 | + protected function fetchRemote( $key, array $headers = array()) { |
| 192 | + global $wgRSSFetchTimeout; |
| 193 | + global $wgRSSUserAgent; |
| 194 | + |
| 195 | + if ( $this->etag ) { |
| 196 | + wfDebugLog( 'RSS', 'Used etag: ' . $this->etag ); |
| 197 | + $headers['If-None-Match'] = $this->etag; |
| 198 | + } |
| 199 | + if ( $this->lastModified ) { |
| 200 | + $lm = gmdate('r', $this->lastModified); |
| 201 | + wfDebugLog( 'RSS', "Used last modified: $lm" ); |
| 202 | + $headers['If-Modified-Since'] = $lm; |
| 203 | + } |
| 204 | + |
| 205 | + $client = |
| 206 | + HttpRequest::factory( $this->url, array( 'timeout' => $wgRSSFetchTimeout ) ); |
| 207 | + $client->setUserAgent( $wgRSSUserAgent ); |
| 208 | + foreach ( $headers as $header => $value ) { |
| 209 | + $client->setHeader( $header, $value ); |
| 210 | + } |
| 211 | + |
| 212 | + $fetch = $client->execute(); |
| 213 | + $this->client = $client; |
| 214 | + |
| 215 | + if ( !$fetch->isGood() ) { |
| 216 | + wfDebug( 'RSS', 'Request Failed: ' . $fetch->getWikiText() ); |
| 217 | + return $fetch; |
| 218 | + } |
| 219 | + |
| 220 | + $ret = $this->responseToXML( $key ); |
| 221 | + return $ret; |
| 222 | + } |
| 223 | + |
| 224 | + /** |
| 225 | + * Render the entire feed so that each item is passed to the |
| 226 | + * template which the MediaWiki then displays. |
| 227 | + * |
| 228 | + * @param $parser the parser param to pass to recursiveTagParse() |
| 229 | + * @param $frame the frame param to pass to recursiveTagParse() |
| 230 | + */ |
| 231 | + function renderFeed( $parser, $frame ) { |
| 232 | + $output = ""; |
| 233 | + if ( $this->itemTemplate ) { |
| 234 | + $headcnt = 0; |
| 235 | + if ( $this->reversed ) { |
| 236 | + $this->rss->items = array_reverse( $this->rss->items ); |
| 237 | + } |
| 238 | + |
| 239 | + foreach ( $this->rss->items as $item ) { |
| 240 | + if ( $this->maxheads > 0 && $headcnt >= $this->maxheads ) { |
| 241 | + continue; |
| 242 | + } |
| 243 | + |
| 244 | + if ( $this->canDisplay( $item ) ) { |
| 245 | + $output .= $this->renderItem( $item, $parser, $frame ); |
| 246 | + $headcnt++; |
| 247 | + } |
| 248 | + } |
| 249 | + } |
| 250 | + return $output; |
| 251 | + } |
| 252 | + |
| 253 | + /** |
| 254 | + * Render each item, filtering it out if necessary, applying any highlighting, |
| 255 | + * @param $item an array produced by RSSData where keys are the names of the RSS elements |
| 256 | + * @param $parser the parser param to pass to recursiveTagParse() |
| 257 | + * @param $frame the frame param to pass to recursiveTagParse() |
| 258 | + */ |
| 259 | + protected function renderItem( $item, $parser, $frame ) { |
| 260 | + $parts = explode( '|', $this->itemTemplate ); |
| 261 | + |
| 262 | + $output = ""; |
| 263 | + if ( count( $parts ) > 1 && isset( $parser ) && isset( $frame ) ) { |
| 264 | + $rendered = array(); |
| 265 | + foreach ( $this->displayFields as $field ) { |
| 266 | + if ( isset($item[$field] ) ) { |
| 267 | + $item[$field] = $this->highlightTerms( wfEscapeWikiText( $item[$field] ) ); |
| 268 | + } |
| 269 | + } |
| 270 | + |
| 271 | + foreach ( $parts as $part ) { |
| 272 | + $bits = explode( '=', $part ); |
| 273 | + $left = null; |
| 274 | + |
| 275 | + if ( count( $bits ) == 2 ) { |
| 276 | + $left = trim( $bits[0] ); |
| 277 | + } |
| 278 | + |
| 279 | + if ( isset( $item[$left] ) ) { |
| 280 | + $leftValue = str_replace( '{{{' . $left . '}}}', $item[$left], $bits[1] ); |
| 281 | + $rendered[] = "$left = $leftValue"; |
| 282 | + } else { |
| 283 | + $rendered[] = $part; |
| 284 | + } |
| 285 | + } |
| 286 | + $output .= $parser->recursiveTagParse( implode( " | ", $rendered ), $frame ); |
| 287 | + } |
| 288 | + return $output; |
| 289 | + } |
| 290 | + |
| 291 | + /** |
| 292 | + * Parse an HTTP response object into an array of relevant RSS data |
| 293 | + * @param $key the to use to store the parsaed response in the cache |
| 294 | + * @return parsed RSS object (see RSSParse) or false |
| 295 | + */ |
| 296 | + protected function responseToXML( $key ) { |
| 297 | + wfDebugLog( 'RSS', "Got '".$this->client->getStatus()."', updating cache for $key" ); |
| 298 | + if ( $this->client->getStatus() === 304 ) { |
| 299 | + # Not modified, update cache |
| 300 | + wfDebugLog( 'RSS', "Got 304, updating cache for $key" ); |
| 301 | + $this->storeInCache( $key ); |
| 302 | + } else { |
| 303 | + $this->xml = new DOMDocument; |
| 304 | + $raw_xml = $this->client->getContent(); |
| 305 | + |
| 306 | + if( $raw_xml == "" ) { |
| 307 | + return Status::newFatal( 'rss-parse-error', "No XML content" ); |
| 308 | + } |
| 309 | + |
| 310 | + wfSuppressWarnings(); |
| 311 | + $this->xml->loadXML( $raw_xml ); |
| 312 | + wfRestoreWarnings(); |
| 313 | + |
| 314 | + $this->rss = new RSSData( $this->xml ); |
| 315 | + |
| 316 | + // if RSS parsed successfully |
| 317 | + if ( $this->rss && !$this->rss->ERROR ) { |
| 318 | + $this->etag = $this->client->getResponseHeader( 'Etag' ); |
| 319 | + $this->lastModified = |
| 320 | + strtotime( $this->client->getResponseHeader( 'Last-Modified' ) ); |
| 321 | + |
| 322 | + wfDebugLog( 'RSS', 'Stored etag (' . $this->etag . ') and Last-Modified (' . |
| 323 | + $this->client->getResponseHeader( 'Last-Modified' ) . ') and items (' . |
| 324 | + count( $this->rss->items ) . ')!' ); |
| 325 | + $this->storeInCache( $key ); |
| 326 | + } else { |
| 327 | + return Status::newFatal( 'rss-parse-error', $this->rss->ERROR ); |
| 328 | + } |
| 329 | + } |
| 330 | + return Status::newGood(); |
| 331 | + } |
| 332 | + |
| 333 | + /** |
| 334 | + * Determine if a given item should or should not be displayed |
| 335 | + * @param associative array that RSSData produced for an <item> |
| 336 | + * @returns boolean |
| 337 | + */ |
| 338 | + protected function canDisplay( array $item ) { |
| 339 | + $check = ""; |
| 340 | + |
| 341 | + /* We're only going to check the displayable fields */ |
| 342 | + foreach ( $this->displayFields as $field ) { |
| 343 | + if ( isset( $item[$field] ) ) { |
| 344 | + $check .= $item[$field]; |
| 345 | + } |
| 346 | + } |
| 347 | + |
| 348 | + if ( $this->filter( $check, 'filterOut' ) ) { |
| 349 | + return false; |
| 350 | + } |
| 351 | + if ( $this->filter( $check, 'filter' ) ) { |
| 352 | + return true; |
| 353 | + } |
| 354 | + return false; |
| 355 | + } |
| 356 | + |
| 357 | + /** |
| 358 | + * Filters items in or out if the match a string we're looking for. |
| 359 | + * @param String the text to examine |
| 360 | + * @param String "filterOut" to check for matches in the filterOut member list. Otherwise, uses the filter member list. |
| 361 | + * @returns boolean decision to filter or not. |
| 362 | + */ |
| 363 | + protected function filter( $text, $filterType ) { |
| 364 | + if ( $filterType === 'filterOut' ) { |
| 365 | + $filter = $this->filterOut; |
| 366 | + } else { |
| 367 | + $filter = $this->filter; |
| 368 | + } |
| 369 | + |
| 370 | + if ( count( $filter ) == 0 ) return $filterType !== 'filterOut'; |
| 371 | + |
| 372 | + /* Using : for delimiter here since it'll be quoted automatically. */ |
| 373 | + $match = preg_match( ':(' . implode( "|", array_map('preg_quote', $filter ) ) . '):i', $text ) ; |
| 374 | + if ( $match ) { |
| 375 | + return true; |
| 376 | + } |
| 377 | + return false; |
| 378 | + } |
| 379 | + |
| 380 | + /** |
| 381 | + * Highlight the words we're supposed to be looking for |
| 382 | + * @param String the text to look in. |
| 383 | + * @returns String with matched text highlighted in a <span> element |
| 384 | + */ |
| 385 | + protected function highlightTerms( $text ) { |
| 386 | + if ( count( $this->highlight ) === 0 ) { |
| 387 | + return $text; |
| 388 | + } |
| 389 | + |
| 390 | + RSSHighlighter::setTerms( $this->highlight ); |
| 391 | + $highlight = ':'. implode( "|", array_map( 'preg_quote', array_values( $this->highlight ) ) ) . ':i'; |
| 392 | + return preg_replace_callback( $highlight, 'RSSHighlighter::highlightThis', $text ); |
| 393 | + } |
| 394 | +} |
| 395 | + |
| 396 | + |
| 397 | +class RSSHighlighter { |
| 398 | + static $terms = array(); |
| 399 | + |
| 400 | + /** |
| 401 | + * Set the list of terms to match for the next highlighting session |
| 402 | + * @param List of words to match. |
| 403 | + */ |
| 404 | + static function setTerms( array $terms ) { |
| 405 | + self::$terms = array_flip( array_map( 'strtolower', $terms ) ); |
| 406 | + } |
| 407 | + |
| 408 | + /** |
| 409 | + * Actually replace the supplied list of words with HTML code to highlight the words. |
| 410 | + * @param List of matched words to highlight. The words are assigned colors based upon the order they were supplied in setTerms() |
| 411 | + * @returns String word wrapped in HTML code. |
| 412 | + */ |
| 413 | + static function highlightThis( $match ) { |
| 414 | + $styleStart = "<span style='font-weight: bold; background: none repeat scroll 0%% 0%% rgb(%s); color: %s;'>"; |
| 415 | + $styleEnd = "</span>"; |
| 416 | + |
| 417 | + # bg colors cribbed from Google's highlighting of search teerms |
| 418 | + $bgcolor = array( '255, 255, 102', '160, 255, 255', '153, 255, 153', |
| 419 | + '255, 153, 153', '255, 102, 255', '136, 0, 0', '0, 170, 0', '136, 104, 0', |
| 420 | + '0, 70, 153', '153, 0, 153' ); |
| 421 | + # Spelling out the fg colors instead of using processing time to create this list |
| 422 | + $color = array("black", "black", "black", "black", "black", |
| 423 | + "white", "white", "white", "white", "white" ); |
| 424 | + |
| 425 | + $index = self::$terms[strtolower($match[0])] % count( $bgcolor ); |
| 426 | + |
| 427 | + return sprintf($styleStart, $bgcolor[$index], $color[$index]). $match[0] .$styleEnd; |
| 428 | + } |
| 429 | +} |
Property changes on: trunk/extensions/RSS/RSSParser.php |
___________________________________________________________________ |
Added: svn:eol-syle |
1 | 430 | + native |
Index: trunk/extensions/RSS/RSS.php |
— | — | @@ -18,7 +18,8 @@ |
19 | 19 | die( "This is not a valid entry point.\n" ); |
20 | 20 | } |
21 | 21 | |
22 | | -define( 'RSS_USER_AGENT', 'MediaWikiRSS/0.01 (+http://www.mediawiki.org/wiki/Extension:RSS) / MediaWiki RSS extension' ); |
| 22 | +// Agent to use for fetching feeds |
| 23 | +$wgRSSUserAgent='MediaWikiRSS/0.01 (+http://www.mediawiki.org/wiki/Extension:RSS) / MediaWiki RSS extension'; |
23 | 24 | |
24 | 25 | // Extension credits that will show up on Special:Version |
25 | 26 | $wgExtensionCredits['parserhook'][] = array( |
— | — | @@ -45,383 +46,19 @@ |
46 | 47 | // Internationalization file and autoloadable classes |
47 | 48 | $dir = dirname( __FILE__ ) . '/'; |
48 | 49 | $wgExtensionMessagesFiles['RSS'] = $dir . 'RSS.i18n.php'; |
| 50 | +$wgAutoloadClasses['RSSHooks'] = $dir . 'RSSHooks.php'; |
| 51 | +$wgAutoloadClasses['RSSParser'] = $dir . 'RSSParser.php'; |
49 | 52 | $wgAutoloadClasses['RSSData'] = $dir . 'RSSData.php'; |
50 | 53 | |
51 | | -$wgHooks['ParserFirstCallInit'][] = 'RSS::parserInit'; |
| 54 | +$wgHooks['ParserFirstCallInit'][] = 'RSSHooks::parserInit'; |
52 | 55 | |
53 | 56 | $wgRSSCacheAge = 3600; // one hour |
54 | 57 | $wgRSSCacheFreshOnly = false; |
| 58 | +$wgRSSCacheCompare = false; // Check cached content, if available, against remote. |
| 59 | + // $wgRSSCacheCompare should be set to false or a timeout |
| 60 | + // (less than $wgRSSCacheAge) after which a comparison will |
| 61 | + // be made. |
55 | 62 | $wgRSSOutputEncoding = 'ISO-8859-1'; |
56 | 63 | $wgRSSInputEncoding = null; |
57 | 64 | $wgRSSDetectEncoding = true; |
58 | 65 | $wgRSSFetchTimeout = 5; // 5 second timeout |
59 | | -$wgRSSUseGzip = true; |
60 | | - |
61 | | -class RSS { |
62 | | - protected $charset; |
63 | | - protected $maxheads = 32; |
64 | | - protected $reversed = false; |
65 | | - protected $highlight = array(); |
66 | | - protected $filter = array(); |
67 | | - protected $filterOut = array(); |
68 | | - protected $itemTemplate; |
69 | | - protected $url; |
70 | | - protected $etag; |
71 | | - protected $lastModified; |
72 | | - protected $xml; |
73 | | - protected $ERROR; |
74 | | - protected $displayFields = array( 'author', 'title', 'encodedContent', 'description' ); |
75 | | - |
76 | | - public $client; |
77 | | - |
78 | | - static function parserInit( $parser ) { |
79 | | - # Install parser hook for <rss> tags |
80 | | - $parser->setHook( 'rss', array( __CLASS__, 'renderRss' ) ); |
81 | | - return true; |
82 | | - } |
83 | | - |
84 | | - # Parser hook callback function |
85 | | - static function renderRss( $input, $args, $parser, $frame ) { |
86 | | - if ( !$input ) { |
87 | | - return ''; # if <rss>-section is empty, return nothing |
88 | | - } |
89 | | - $parser->disableCache(); |
90 | | - |
91 | | - $rss = new RSS( $input, $args ); |
92 | | - |
93 | | - $status = $rss->fetch(); |
94 | | - |
95 | | - # Check for errors. |
96 | | - if ( $status === false || !is_object( $rss->rss ) || !is_array( $rss->rss->items ) ) |
97 | | - return wfMsg( 'rss-empty', $input ); |
98 | | - |
99 | | - if ( isset( $rss->ERROR ) ) |
100 | | - return wfMsg( 'rss-error', $rss->ERROR ); |
101 | | - |
102 | | - return $rss->renderFeed( $parser, $frame ); |
103 | | - } |
104 | | - |
105 | | - static function explodeOnSpaces( $str ) { |
106 | | - $found = preg_split( '# +#', $str ); |
107 | | - return is_array( $found ) ? $found : array(); |
108 | | - } |
109 | | - |
110 | | - function __construct( $url, $args ) { |
111 | | - |
112 | | - if ( isset( $url ) ) { |
113 | | - $this->url = $url; |
114 | | - } |
115 | | - |
116 | | - # Get charset from argument array |
117 | | - if ( isset( $args['charset'] ) ) { |
118 | | - $this->charset = $args['charset']; |
119 | | - } else { |
120 | | - global $wgOutputEncoding; |
121 | | - $args['charset'] = $wgOutputEncoding; |
122 | | - } |
123 | | - |
124 | | - # Get max number of headlines from argument-array |
125 | | - if ( isset( $args['max'] ) ) { |
126 | | - $this->maxheads = $args['max']; |
127 | | - } |
128 | | - |
129 | | - # Get reverse flag from argument array |
130 | | - if ( isset( $args['reverse'] ) ) { |
131 | | - $this->reversed = true; |
132 | | - } |
133 | | - |
134 | | - # Get date format from argument array |
135 | | - # FIXME: not used yet |
136 | | - if ( isset( $args['date'] ) ) { |
137 | | - $this->date = $args['date']; |
138 | | - } |
139 | | - |
140 | | - # Get highlight terms from argument array |
141 | | - if ( isset( $args['highlight'] ) ) { |
142 | | - # mapping to lowercase here so the regex can be case insensitive below. |
143 | | - $this->highlight = array_flip( array_map( 'strtolower', self::explodeOnSpaces( $args['highlight'] ) ) ); |
144 | | - } |
145 | | - |
146 | | - # Get filter terms from argument array |
147 | | - if ( isset( $args['filter'] ) ) { |
148 | | - $this->filter = self::explodeOnSpaces( $args['filter'] ); |
149 | | - } |
150 | | - |
151 | | - if ( isset( $args['filterout'] ) ) { |
152 | | - $this->filterOut = self::explodeOnSpaces( $args['filterout'] ); |
153 | | - |
154 | | - } |
155 | | - |
156 | | - if ( isset( $args['template'] ) ) { |
157 | | - $titleObject = Title::newFromText( $args['template'], NS_TEMPLATE ); |
158 | | - $article = new Article( $titleObject, 0 ); |
159 | | - $this->itemTemplate = $article->fetchContent( 0 ); |
160 | | - } else { |
161 | | - $this->itemTemplate = wfMsgNoTrans( 'rss-item' ); |
162 | | - } |
163 | | - } |
164 | | - |
165 | | - /** |
166 | | - * Return RSS object for the given URL, maintaining caching. |
167 | | - * |
168 | | - * NOTES ON RETRIEVING REMOTE FILES: |
169 | | - * If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will |
170 | | - * return a cached object, and touch the cache object upon recieving a 304. |
171 | | - * |
172 | | - * NOTES ON FAILED REQUESTS: |
173 | | - * If there is an HTTP error while fetching an RSS object, the cached version |
174 | | - * will be returned, if it exists (and if $wgRSSCacheFreshOnly is off |
175 | | - * |
176 | | - * @param $url String: URL of RSS file |
177 | | - * @return boolean true if the fetch worked. |
178 | | - */ |
179 | | - function fetch( ) { |
180 | | - global $wgRSSCacheAge, $wgRSSCacheFreshOnly; |
181 | | - global $wgRSSCacheDirectory, $wgRSSFetchTimeout; |
182 | | - global $wgRSSOutputEncoding, $wgRSSInputEncoding; |
183 | | - global $wgRSSDetectEncoding, $wgRSSUseGzip; |
184 | | - |
185 | | - if ( !isset( $this->url ) ) { |
186 | | - wfDebugLog( 'RSS', 'Fetch called without a URL!' ); |
187 | | - return false; |
188 | | - } |
189 | | - |
190 | | - // Flow |
191 | | - // 1. check cache |
192 | | - // 2. if there is a hit, make sure its fresh |
193 | | - // 3. if cached obj fails freshness check, fetch remote |
194 | | - // 4. if remote fails, return stale object, or error |
195 | | - $key = wfMemcKey( $this->url ); |
196 | | - $cachedFeed = $this->loadFromCache( $key ); |
197 | | - if ( $cachedFeed !== false ) { |
198 | | - wfDebugLog( 'RSS', 'Outputting cached feed for ' . $this->url ); |
199 | | - return true; |
200 | | - } |
201 | | - wfDebugLog( 'RSS', 'Cache Failed ' . $this->url ); |
202 | | - |
203 | | - $status = $this->fetchRemote( $key ); |
204 | | - return $status; |
205 | | - } |
206 | | - |
207 | | - function loadFromCache( $key ) { |
208 | | - global $wgMemc; |
209 | | - |
210 | | - $data = $wgMemc->get( $key ); |
211 | | - if ( $data === false ) { |
212 | | - return false; |
213 | | - } |
214 | | - |
215 | | - list( $etag, $lastModified, $rss ) = |
216 | | - unserialize( $data ); |
217 | | - |
218 | | - if ( !isset( $rss->items ) ) { |
219 | | - return false; |
220 | | - } |
221 | | - |
222 | | - wfDebugLog( 'RSS', "Got '$key' from cache" ); |
223 | | - |
224 | | - # Now that we've verified that we got useful data, keep it around. |
225 | | - $this->rss = $rss; |
226 | | - $this->etag = $etag; |
227 | | - $this->lastModified = $lastModified; |
228 | | - |
229 | | - return true; |
230 | | - } |
231 | | - |
232 | | - function storeInCache( $key ) { |
233 | | - global $wgMemc, $wgRSSCacheAge; |
234 | | - |
235 | | - if ( !isset( $this->rss ) ) { |
236 | | - return false; |
237 | | - } |
238 | | - $wgMemc->set( $key, |
239 | | - serialize( array( $this->etag, $this->lastModified, $this->rss ) ), |
240 | | - $wgRSSCacheAge ); |
241 | | - |
242 | | - wfDebugLog( 'RSS', "Stored '$key' in cache" ); |
243 | | - return true; |
244 | | - } |
245 | | - |
246 | | - /** |
247 | | - * Retrieve a feed. |
248 | | - * @param $url String: URL of the feed. |
249 | | - * @param $headers Array: headers to send along with the request |
250 | | - * @return Status object |
251 | | - */ |
252 | | - protected function fetchRemote( $key, $headers = '' ) { |
253 | | - global $wgRSSFetchTimeout, $wgRSSUseGzip; |
254 | | - |
255 | | - if ( $this->etag ) { |
256 | | - wfDebugLog( 'RSS', 'Used etag: ' . $this->etag ); |
257 | | - $headers['If-None-Match'] = $this->etag; |
258 | | - } |
259 | | - if ( $this->lastModified ) { |
260 | | - wfDebugLog( 'RSS', 'Used last modified: ' . $this->lastModified ); |
261 | | - $headers['If-Last-Modified'] = $this->lastModified; |
262 | | - } |
263 | | - |
264 | | - $client = |
265 | | - HttpRequest::factory( $this->url, array( 'timeout' => $wgRSSFetchTimeout ) ); |
266 | | - $client->setUserAgent( RSS_USER_AGENT ); |
267 | | - /* $client->use_gzip = $wgRSSUseGzip; */ |
268 | | - if ( is_array( $headers ) && count( $headers ) > 0 ) { |
269 | | - foreach ( $headers as $h ) { |
270 | | - if ( count( $h ) > 1 ) { |
271 | | - $client->setHeader( $h[0], $h[1] ); |
272 | | - } |
273 | | - } |
274 | | - } |
275 | | - |
276 | | - $fetch = $client->execute(); |
277 | | - $this->client = $client; |
278 | | - |
279 | | - if ( !$fetch->isGood() ) { |
280 | | - wfDebug( 'RSS', 'Request Failed: ' . $fetch->getWikiText() ); |
281 | | - return $fetch; |
282 | | - } |
283 | | - |
284 | | - $ret = $this->responseToXML( $key ); |
285 | | - return $ret; |
286 | | - } |
287 | | - |
288 | | - function renderFeed( $parser, $frame ) { |
289 | | - $output = ""; |
290 | | - if ( $this->itemTemplate ) { |
291 | | - $headcnt = 0; |
292 | | - if ( $this->reversed ) { |
293 | | - $this->rss->items = array_reverse( $this->rss->items ); |
294 | | - } |
295 | | - |
296 | | - foreach ( $this->rss->items as $item ) { |
297 | | - if ( $this->maxheads > 0 && $headcnt >= $this->maxheads ) { |
298 | | - continue; |
299 | | - } |
300 | | - |
301 | | - if ( $this->canDisplay( $item ) ) { |
302 | | - $output .= $this->renderItem( $item, $parser, $frame ); |
303 | | - $headcnt++; |
304 | | - } |
305 | | - } |
306 | | - } |
307 | | - return $output; |
308 | | - } |
309 | | - |
310 | | - function renderItem( $item, $parser, $frame ) { |
311 | | - $parts = explode( '|', $this->itemTemplate ); |
312 | | - |
313 | | - $output = ""; |
314 | | - if ( count( $parts ) > 1 && isset( $parser ) && isset( $frame ) ) { |
315 | | - $rendered = array(); |
316 | | - foreach ( $this->displayFields as $field ) { |
317 | | - if ( isset( $item[$field] ) ) { |
318 | | - $item[$field] = $this->highlightTerms( $item[$field] ); |
319 | | - } |
320 | | - } |
321 | | - |
322 | | - foreach ( $parts as $part ) { |
323 | | - $bits = explode( '=', $part ); |
324 | | - $left = null; |
325 | | - |
326 | | - if ( count( $bits ) == 2 ) { |
327 | | - $left = trim( $bits[0] ); |
328 | | - } |
329 | | - |
330 | | - if ( isset( $item[$left] ) ) { |
331 | | - $leftValue = preg_replace( '/{{{' . preg_quote( $left, '/' ) . '}}}/', |
332 | | - $item[$left], $bits[1] ); |
333 | | - $rendered[] = implode( '=', array( $left, $leftValue ) ); |
334 | | - } else { |
335 | | - $rendered[] = $part; |
336 | | - } |
337 | | - } |
338 | | - $output .= $parser->recursiveTagParse( implode( " | ", $rendered ), $frame ); |
339 | | - } |
340 | | - return $output; |
341 | | - } |
342 | | - |
343 | | - /** |
344 | | - * Parse an HTTP response object into an RSS object. |
345 | | - * @param $resp Object: an HTTP response object (see Snoopy) |
346 | | - * @return parsed RSS object (see RSSParse) or false |
347 | | - */ |
348 | | - function responseToXML( $key ) { |
349 | | - $this->xml = new DOMDocument; |
350 | | - $this->xml->loadXML( $this->client->getContent() ); |
351 | | - $this->rss = new RSSData( $this->xml ); |
352 | | - |
353 | | - // if RSS parsed successfully |
354 | | - if ( $this->rss && !$this->rss->ERROR ) { |
355 | | - $this->etag = $this->client->getResponseHeader( 'Etag' ); |
356 | | - $this->lastModified = $this->client->getResponseHeader( 'Last-Modified' ); |
357 | | - wfDebugLog( 'RSS', 'Stored etag (' . $this->etag . ') and Last-Modified (' . |
358 | | - $this->lastModified . ') and items (' . count( $this->rss->items ) . ')!' ); |
359 | | - $this->storeInCache( $key ); |
360 | | - |
361 | | - return Status::newGood(); |
362 | | - } else { |
363 | | - return Status::newfatal( 'rss-parse-error', $this->rss->ERROR ); |
364 | | - } |
365 | | - } |
366 | | - |
367 | | - function canDisplay( $item ) { |
368 | | - $check = ""; |
369 | | - foreach ( $this->displayFields as $field ) { |
370 | | - if ( isset( $item[$field] ) ) { |
371 | | - $check .= $item[$field]; |
372 | | - } |
373 | | - } |
374 | | - |
375 | | - if ( $this->filter( $check, 'filterOut' ) ) { |
376 | | - return false; |
377 | | - } |
378 | | - if ( $this->filter( $check, 'filter' ) ) { |
379 | | - return true; |
380 | | - } |
381 | | - return false; |
382 | | - } |
383 | | - |
384 | | - function filter( $text, $filterType ) { |
385 | | - if ( $filterType === 'filterOut' ) { |
386 | | - $filter = $this->filterOut; |
387 | | - } else { |
388 | | - $filter = $this->filter; |
389 | | - } |
390 | | - |
391 | | - if ( count( $filter ) == 0 ) return $filterType !== 'filterOut'; |
392 | | - |
393 | | - /* Using : for delimiter here since it'll be quoted automatically. */ |
394 | | - $match = preg_match( ':(' . implode( "|", array_map('preg_quote', $filter ) ) . '):i', $text ) ; |
395 | | - if ( $match ) { |
396 | | - return true; |
397 | | - } |
398 | | - return false; |
399 | | - } |
400 | | - |
401 | | - static function highlightThis( $term, $match ) { |
402 | | - $styleStart = "<span style='font-weight: bold; background: none repeat scroll 0%% 0%% rgb(%s); color: %s;'>"; |
403 | | - $styleEnd = "</span>"; |
404 | | - |
405 | | - # bg colors cribbed from Google's highlighting of search teerms |
406 | | - $bgcolor = array( '255, 255, 102', '160, 255, 255', '153, 255, 153', |
407 | | - '255, 153, 153', '255, 102, 255', '136, 0, 0', '0, 170, 0', '136, 104, 0', |
408 | | - '0, 70, 153', '153, 0, 153' ); |
409 | | - # Spelling out the fg colors instead of using processing time to create this list |
410 | | - $color = array("black", "black", "black", "black", "black", |
411 | | - "white", "white", "white", "white", "white" ); |
412 | | - |
413 | | - $index = $term[strtolower($match[0])] % count( $bgcolor ); |
414 | | - |
415 | | - return sprintf($styleStart, $bgcolor[$index], $color[$index]). $match[0] .$styleEnd; |
416 | | - } |
417 | | - |
418 | | - function highlightTerms( $text ) { |
419 | | - if ( count( $this->highlight ) === 0 ) { |
420 | | - return $text; |
421 | | - } |
422 | | - # SIGH ... anonymous functions are not available until 5.3 |
423 | | - $f = create_function('$match', '$term = '.var_export($this->highlight, true).'; return RSS::highlightThis($term, $match);'); |
424 | | - |
425 | | - $highlight = '/'. implode( "|", array_map( 'preg_quote', array_keys( $this->highlight ) ) ) . '/i'; |
426 | | - return preg_replace_callback( $highlight, $f, $text ); |
427 | | - } |
428 | | -} |
\ No newline at end of file |