r69793 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r69792‎ | r69793 | r69794 >
Date:15:26, 23 July 2010
Author:ashley
Status:resolved (Comments)
Tags:
Comment:
committing my work on RSS extension. Now this extension should be totally self-contained. RSSCache.php, RSSFetch.php and RSSParse.php are MagpieRSS extension files by Kellan Elliott-McCrea, with some MediaWikification by me. Snoopy.class.php is Snoopy, with some coding style changes and whatnot by me.

to-do: use memcached for caching instead of RSSCache.php
Modified paths:
  • /trunk/extensions/RSS/RSS.i18n.php (modified) (history)
  • /trunk/extensions/RSS/RSS.php (modified) (history)
  • /trunk/extensions/RSS/RSSCache.php (added) (history)
  • /trunk/extensions/RSS/RSSFetch.php (added) (history)
  • /trunk/extensions/RSS/RSSParse.php (added) (history)
  • /trunk/extensions/RSS/Snoopy.class.php (added) (history)
  • /trunk/extensions/RSS/magpierss (deleted) (history)

Diff [purge]

Index: trunk/extensions/RSS/RSSCache.php
@@ -0,0 +1,151 @@
 2+<?php
 3+/**
 4+ * A simple, rolling (no GC) cache for RSS objects, keyed on URL.
 5+ *
 6+ * @file
 7+ */
 8+
 9+class RSSCache {
 10+ public $BASE_CACHE = './cache'; // where the cache files are stored
 11+ public $MAX_AGE = 3600; // when are files stale, default one hour
 12+ public $ERROR = ''; // accumulate error messages
 13+
 14+ function __construct( $base = '', $age = '' ) {
 15+ if ( $base ) {
 16+ $this->BASE_CACHE = $base;
 17+ }
 18+ if ( $age ) {
 19+ $this->MAX_AGE = $age;
 20+ }
 21+
 22+ // attempt to make the cache directory
 23+ if ( !file_exists( $this->BASE_CACHE ) ) {
 24+ $status = @mkdir( $this->BASE_CACHE, 0755 );
 25+
 26+ // if make failed
 27+ if ( !$status ) {
 28+ wfDebugLog(
 29+ 'RSS',
 30+ "Cache couldn't make dir '" . $this->BASE_CACHE . "'."
 31+ );
 32+ }
 33+ }
 34+ }
 35+
 36+ /**
 37+ * Add an item to the cache, keyed on URL.
 38+ * @param $url String: URL from which the RSS file was fetched
 39+ * @param $rss Mixed: data to serialize
 40+ */
 41+ function set( $url, $rss ) {
 42+ $this->ERROR = '';
 43+ $cache_file = $this->file_name( $url );
 44+ $fp = @fopen( $cache_file, 'w' );
 45+
 46+ if ( !$fp ) {
 47+ wfDebugLog(
 48+ 'RSS',
 49+ "Cache unable to open file for writing: $cache_file"
 50+ );
 51+ return 0;
 52+ }
 53+
 54+ $data = serialize( $rss );
 55+ fwrite( $fp, $data );
 56+ fclose( $fp );
 57+
 58+ return $cache_file;
 59+ }
 60+
 61+ /**
 62+ * Fetch an item from the cache.
 63+ * @param $url String: URL from which the RSS file was fetched
 64+ * @return Object or false: cached object on HIT, false on MISS
 65+ */
 66+ function get( $url ) {
 67+ $this->ERROR = '';
 68+ $cache_file = $this->file_name( $url );
 69+
 70+ if ( !file_exists( $cache_file ) ) {
 71+ wfDebugLog(
 72+ 'RSS',
 73+ "Cache doesn't contain: $url (cache file: $cache_file)"
 74+ );
 75+ return 0;
 76+ }
 77+
 78+ $fp = @fopen( $cache_file, 'r' );
 79+ if ( !$fp ) {
 80+ wfDebugLog(
 81+ 'RSS',
 82+ "Failed to open cache file for reading: $cache_file"
 83+ );
 84+ return 0;
 85+ }
 86+
 87+ $filesize = filesize( $cache_file );
 88+ if ( $filesize ) {
 89+ $data = fread( $fp, filesize( $cache_file ) );
 90+ $rss = unserialize( $data );
 91+
 92+ return $rss;
 93+ }
 94+
 95+ return 0;
 96+ }
 97+
 98+ /**
 99+ * Check a URL for membership in the cache and whether the object is older
 100+ * then MAX_AGE (ie. STALE)
 101+ *
 102+ * @param $url String: URL from which the RSS file was fetched
 103+ * @return String: indicates whether there was a cache hit or not
 104+ */
 105+ function check_cache( $url ) {
 106+ $this->ERROR = '';
 107+ $filename = $this->file_name( $url );
 108+
 109+ if ( file_exists( $filename ) ) {
 110+ // find how long ago the file was added to the cache
 111+ // and whether that is longer then MAX_AGE
 112+ $mtime = filemtime( $filename );
 113+ $age = time() - $mtime;
 114+ if ( $this->MAX_AGE > $age ) {
 115+ // object exists and is current
 116+ return 'HIT';
 117+ } else {
 118+ // object exists but is old
 119+ return 'STALE';
 120+ }
 121+ } else {
 122+ // object does not exist
 123+ return 'MISS';
 124+ }
 125+ }
 126+
 127+ /**
 128+ * @param $cache_key String: cache key, consisting of the URL + output enc.
 129+ * @return Integer
 130+ */
 131+ function cache_age( $cache_key ) {
 132+ $filename = $this->file_name( $url );
 133+ if ( file_exists( $filename ) ) {
 134+ $mtime = filemtime( $filename );
 135+ $age = time() - $mtime;
 136+ return $age;
 137+ } else {
 138+ return -1;
 139+ }
 140+ }
 141+
 142+ /**
 143+ * Map URL to location in cache.
 144+ * @param $url String: URL from which the RSS file was fetched
 145+ * @return String: file name
 146+ */
 147+ function file_name( $url ) {
 148+ $filename = md5( $url );
 149+ return join( DIRECTORY_SEPARATOR, array( $this->BASE_CACHE, $filename ) );
 150+ }
 151+
 152+}
\ No newline at end of file
Property changes on: trunk/extensions/RSS/RSSCache.php
___________________________________________________________________
Added: svn:eol-style
1153 + native
Index: trunk/extensions/RSS/Snoopy.class.php
@@ -0,0 +1,1241 @@
 2+<?php
 3+
 4+/*************************************************
 5+
 6+Snoopy - the PHP net client
 7+Author: Monte Ohrt <monte@ispi.net>
 8+Copyright (c): 1999-2008 New Digital Group, all rights reserved
 9+Version: 1.2.5-dev (revision 1.27)
 10+Note: some coding style changes by Jack Phoenix <jack@countervandalism.net>
 11+ var -> public, added some braces, double quotes -> single quotes, etc.
 12+ also added the gzip support stuff from MagpieRSS' Snoopy to this ver
 13+
 14+ * This library is free software; you can redistribute it and/or
 15+ * modify it under the terms of the GNU Lesser General Public
 16+ * License as published by the Free Software Foundation; either
 17+ * version 2.1 of the License, or (at your option) any later version.
 18+ *
 19+ * This library is distributed in the hope that it will be useful,
 20+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
 21+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 22+ * Lesser General Public License for more details.
 23+ *
 24+ * You should have received a copy of the GNU Lesser General Public
 25+ * License along with this library; if not, write to the Free Software
 26+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 27+
 28+You may contact the author of Snoopy by e-mail at:
 29+monte@ohrt.com
 30+
 31+The latest version of Snoopy can be obtained from:
 32+http://snoopy.sourceforge.net/
 33+
 34+*************************************************/
 35+
 36+class Snoopy {
 37+ /**** Public variables ****/
 38+
 39+ /* user definable vars */
 40+ public $host = 'www.php.net'; // host name we are connecting to
 41+ public $port = 80; // port we are connecting to
 42+ public $proxy_host = ''; // proxy host to use
 43+ public $proxy_port = ''; // proxy port to use
 44+ public $proxy_user = ''; // proxy user to use
 45+ public $proxy_pass = ''; // proxy password to use
 46+
 47+ public $agent = 'Snoopy v1.2.5-dev'; // agent we masquerade as
 48+ public $referer = ''; // referer info to pass
 49+ public $cookies = array(); // array of cookies to pass
 50+ // $cookies['username'] = 'joe';
 51+ public $rawheaders = array(); // array of raw headers to send
 52+ // $rawheaders['Content-type'] = 'text/html';
 53+
 54+ public $maxredirs = 5; // http redirection depth maximum. 0 = disallow
 55+ public $lastredirectaddr = ''; // contains address of last redirected address
 56+ public $offsiteok = true; // allows redirection off-site
 57+ public $maxframes = 0; // frame content depth maximum. 0 = disallow
 58+ public $expandlinks = true; // expand links to fully qualified URLs.
 59+ // this only applies to fetchlinks()
 60+ // submitlinks(), and submittext()
 61+ public $passcookies = true; // pass set cookies back through redirects
 62+ // NOTE: this currently does not respect
 63+ // dates, domains or paths.
 64+
 65+ public $user = ''; // user for http authentication
 66+ public $pass = ''; // password for http authentication
 67+
 68+ // http accept types
 69+ public $accept = 'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*';
 70+
 71+ public $results = ''; // where the content is put
 72+
 73+ public $error = ''; // error messages sent here
 74+ public $response_code = ''; // response code returned from server
 75+ public $headers = array(); // headers returned from server sent here
 76+ public $maxlength = 500000; // max return data length (body)
 77+ public $read_timeout = 0; // timeout on read operations, in seconds
 78+ // supported only since PHP 4 Beta 4
 79+ // set to 0 to disallow timeouts
 80+ public $timed_out = false; // if a read operation timed out
 81+ public $status = 0; // http request status
 82+
 83+ public $temp_dir = '/tmp'; // temporary directory that the webserver
 84+ // has permission to write to.
 85+ // under Windows, this should be C:\temp
 86+
 87+ public $curl_path = '/usr/local/bin/curl';
 88+ // Snoopy will use cURL for fetching
 89+ // SSL content if a full system path to
 90+ // the cURL binary is supplied here.
 91+ // set to false if you do not have
 92+ // cURL installed. See http://curl.haxx.se
 93+ // for details on installing cURL.
 94+ // Snoopy does *not* use the cURL
 95+ // library functions built into php,
 96+ // as these functions are not stable
 97+ // as of this Snoopy release.
 98+
 99+ // send Accept-encoding: gzip?
 100+ public $use_gzip = true;
 101+
 102+ /**** Private variables ****/
 103+ var $_maxlinelen = 4096; // max line length (headers)
 104+
 105+ var $_httpmethod = 'GET'; // default http request method
 106+ var $_httpversion = 'HTTP/1.0'; // default http request version
 107+ var $_submit_method = 'POST'; // default submit method
 108+ var $_submit_type = 'application/x-www-form-urlencoded'; // default submit type
 109+ var $_mime_boundary = ''; // MIME boundary for multipart/form-data submit type
 110+ var $_redirectaddr = false; // will be set if page fetched is a redirect
 111+ var $_redirectdepth = 0; // increments on an http redirect
 112+ var $_frameurls = array(); // frame src urls
 113+ var $_framedepth = 0; // increments on frame depth
 114+
 115+ var $_isproxy = false; // set if using a proxy server
 116+ var $_fp_timeout = 30; // timeout for socket connection
 117+
 118+ /*======================================================================*\
 119+ Function: fetch
 120+ Purpose: fetch the contents of a web page
 121+ (and possibly other protocols in the
 122+ future like ftp, nntp, gopher, etc.)
 123+ Input: $URI the location of the page to fetch
 124+ Output: $this->results the output text from the fetch
 125+ \*======================================================================*/
 126+ function fetch( $URI ) {
 127+ //preg_match( "|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|", $URI, $URI_PARTS );
 128+ $URI_PARTS = parse_url( $URI );
 129+ if ( !empty( $URI_PARTS['user'] ) ) {
 130+ $this->user = $URI_PARTS['user'];
 131+ }
 132+ if ( !empty( $URI_PARTS['pass'] ) ) {
 133+ $this->pass = $URI_PARTS['pass'];
 134+ }
 135+ if ( empty( $URI_PARTS['query'] ) ) {
 136+ $URI_PARTS['query'] = '';
 137+ }
 138+ if ( empty( $URI_PARTS['path'] ) ) {
 139+ $URI_PARTS['path'] = '';
 140+ }
 141+
 142+ switch( strtolower( $URI_PARTS['scheme'] ) ) {
 143+ case 'http':
 144+ $this->host = $URI_PARTS['host'];
 145+ if( !empty( $URI_PARTS['port'] ) ) {
 146+ $this->port = $URI_PARTS['port'];
 147+ }
 148+ if( $this->_connect( $fp ) ) {
 149+ if( $this->_isproxy ) {
 150+ // using proxy, send entire URI
 151+ $this->_httprequest( $URI, $fp, $URI, $this->_httpmethod );
 152+ } else {
 153+ $path = $URI_PARTS['path'] . ( isset( $URI_PARTS['query'] ) ? '?' . $URI_PARTS['query'] : '' );
 154+ // no proxy, send only the path
 155+ $this->_httprequest( $path, $fp, $URI, $this->_httpmethod );
 156+ }
 157+
 158+ $this->_disconnect( $fp );
 159+
 160+ if( $this->_redirectaddr ) {
 161+ /* url was redirected, check if we've hit the max depth */
 162+ if( $this->maxredirs > $this->_redirectdepth ) {
 163+ // only follow redirect if it's on this site, or offsiteok is true
 164+ if( preg_match( "|^http://" . preg_quote( $this->host ) . "|i", $this->_redirectaddr ) || $this->offsiteok )
 165+ {
 166+ /* follow the redirect */
 167+ $this->_redirectdepth++;
 168+ $this->lastredirectaddr = $this->_redirectaddr;
 169+ $this->fetch( $this->_redirectaddr );
 170+ }
 171+ }
 172+ }
 173+
 174+ if( $this->_framedepth < $this->maxframes && count( $this->_frameurls ) > 0 )
 175+ {
 176+ $frameurls = $this->_frameurls;
 177+ $this->_frameurls = array();
 178+
 179+ while( list( , $frameurl ) = each( $frameurls ) ) {
 180+ if( $this->_framedepth < $this->maxframes ) {
 181+ $this->fetch( $frameurl );
 182+ $this->_framedepth++;
 183+ } else {
 184+ break;
 185+ }
 186+ }
 187+ }
 188+ } else {
 189+ return false;
 190+ }
 191+ return true;
 192+ break;
 193+ case 'https':
 194+ if( !$this->curl_path ) {
 195+ return false;
 196+ }
 197+ if( function_exists( 'is_executable' ) ) {
 198+ if ( !is_executable( $this->curl_path ) ) {
 199+ $this->error = "Bad curl ($this->curl_path), can't fetch HTTPS \n";
 200+ return false;
 201+ }
 202+ }
 203+ $this->host = $URI_PARTS['host'];
 204+ if( !empty( $URI_PARTS['port'] ) ) {
 205+ $this->port = $URI_PARTS['port'];
 206+ }
 207+ if( $this->_isproxy ) {
 208+ // using proxy, send entire URI
 209+ $this->_httpsrequest( $URI, $URI, $this->_httpmethod );
 210+ } else {
 211+ $path = $URI_PARTS['path'] . ( $URI_PARTS['query'] ? '?' . $URI_PARTS['query'] : '' );
 212+ // no proxy, send only the path
 213+ $this->_httpsrequest( $path, $URI, $this->_httpmethod );
 214+ }
 215+
 216+ if( $this->_redirectaddr ) {
 217+ /* url was redirected, check if we've hit the max depth */
 218+ if( $this->maxredirs > $this->_redirectdepth ) {
 219+ // only follow redirect if it's on this site, or offsiteok is true
 220+ if( preg_match( "|^http://" . preg_quote( $this->host ) . "|i", $this->_redirectaddr ) || $this->offsiteok )
 221+ {
 222+ /* follow the redirect */
 223+ $this->_redirectdepth++;
 224+ $this->lastredirectaddr = $this->_redirectaddr;
 225+ $this->fetch( $this->_redirectaddr );
 226+ }
 227+ }
 228+ }
 229+
 230+ if( $this->_framedepth < $this->maxframes && count( $this->_frameurls ) > 0 )
 231+ {
 232+ $frameurls = $this->_frameurls;
 233+ $this->_frameurls = array();
 234+
 235+ while( list( , $frameurl ) = each( $frameurls ) ) {
 236+ if( $this->_framedepth < $this->maxframes ) {
 237+ $this->fetch( $frameurl );
 238+ $this->_framedepth++;
 239+ } else {
 240+ break;
 241+ }
 242+ }
 243+ }
 244+ return true;
 245+ break;
 246+ default:
 247+ // not a valid protocol
 248+ $this->error = 'Invalid protocol "' . $URI_PARTS['scheme'] . '"\n';
 249+ return false;
 250+ break;
 251+ }
 252+ return true;
 253+ }
 254+
 255+ /*======================================================================*\
 256+ Function: submit
 257+ Purpose: submit an HTTP form
 258+ Input: $URI the location to post the data
 259+ $formvars the formvars to use.
 260+ format: $formvars['var'] = 'val';
 261+ $formfiles an array of files to submit
 262+ format: $formfiles['var'] = '/dir/filename.ext';
 263+ Output: $this->results the text output from the post
 264+ \*======================================================================*/
 265+ function submit( $URI, $formvars = '', $formfiles = '' ) {
 266+ unset( $postdata );
 267+
 268+ $postdata = $this->_prepare_post_body( $formvars, $formfiles );
 269+
 270+ $URI_PARTS = parse_url( $URI );
 271+ if ( !empty( $URI_PARTS['user'] ) ) {
 272+ $this->user = $URI_PARTS['user'];
 273+ }
 274+ if ( !empty( $URI_PARTS['pass'] ) ) {
 275+ $this->pass = $URI_PARTS['pass'];
 276+ }
 277+ if ( empty( $URI_PARTS['query'] ) ) {
 278+ $URI_PARTS['query'] = '';
 279+ }
 280+ if ( empty( $URI_PARTS['path'] ) ) {
 281+ $URI_PARTS['path'] = '';
 282+ }
 283+
 284+ switch( strtolower( $URI_PARTS['scheme'] ) ) {
 285+ case 'http':
 286+ $this->host = $URI_PARTS['host'];
 287+ if( !empty( $URI_PARTS['port'] ) ) {
 288+ $this->port = $URI_PARTS['port'];
 289+ }
 290+ if( $this->_connect( $fp ) ) {
 291+ if( $this->_isproxy ) {
 292+ // using proxy, send entire URI
 293+ $this->_httprequest( $URI, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata );
 294+ } else {
 295+ $path = $URI_PARTS['path'] . ( $URI_PARTS['query'] ? '?' . $URI_PARTS['query'] : '' );
 296+ // no proxy, send only the path
 297+ $this->_httprequest(
 298+ $path, $fp, $URI,
 299+ $this->_submit_method,
 300+ $this->_submit_type,
 301+ $postdata
 302+ );
 303+ }
 304+
 305+ $this->_disconnect( $fp );
 306+
 307+ if( $this->_redirectaddr ) {
 308+ /* url was redirected, check if we've hit the max depth */
 309+ if( $this->maxredirs > $this->_redirectdepth ) {
 310+ if( !preg_match( "|^" . $URI_PARTS['scheme'] . "://|", $this->_redirectaddr ) ) {
 311+ $this->_redirectaddr = $this->_expandlinks( $this->_redirectaddr, $URI_PARTS['scheme'] . '://' . $URI_PARTS['host'] );
 312+ }
 313+
 314+ // only follow redirect if it's on this site, or offsiteok is true
 315+ if( preg_match( "|^http://" . preg_quote( $this->host ) . "|i", $this->_redirectaddr ) || $this->offsiteok )
 316+ {
 317+ /* follow the redirect */
 318+ $this->_redirectdepth++;
 319+ $this->lastredirectaddr = $this->_redirectaddr;
 320+ if( strpos( $this->_redirectaddr, '?' ) > 0 ) {
 321+ $this->fetch( $this->_redirectaddr ); // the redirect has changed the request method from post to get
 322+ } else {
 323+ $this->submit( $this->_redirectaddr, $formvars, $formfiles );
 324+ }
 325+ }
 326+ }
 327+ }
 328+
 329+ if( $this->_framedepth < $this->maxframes && count( $this->_frameurls ) > 0 )
 330+ {
 331+ $frameurls = $this->_frameurls;
 332+ $this->_frameurls = array();
 333+
 334+ while( list( , $frameurl ) = each( $frameurls ) ) {
 335+ if( $this->_framedepth < $this->maxframes ) {
 336+ $this->fetch( $frameurl );
 337+ $this->_framedepth++;
 338+ } else {
 339+ break;
 340+ }
 341+ }
 342+ }
 343+ } else {
 344+ return false;
 345+ }
 346+ return true;
 347+ break;
 348+ case 'https':
 349+ if( !$this->curl_path ) {
 350+ return false;
 351+ }
 352+ if( function_exists( 'is_executable' ) ) {
 353+ if ( !is_executable( $this->curl_path ) ) {
 354+ return false;
 355+ }
 356+ }
 357+ $this->host = $URI_PARTS['host'];
 358+ if( !empty( $URI_PARTS['port'] ) ) {
 359+ $this->port = $URI_PARTS['port'];
 360+ }
 361+ if( $this->_isproxy ) {
 362+ // using proxy, send entire URI
 363+ $this->_httpsrequest(
 364+ $URI,
 365+ $URI,
 366+ $this->_submit_method,
 367+ $this->_submit_type,
 368+ $postdata
 369+ );
 370+ } else {
 371+ $path = $URI_PARTS['path'] . ( $URI_PARTS['query'] ? '?' . $URI_PARTS['query'] : '' );
 372+ // no proxy, send only the path
 373+ $this->_httpsrequest(
 374+ $path,
 375+ $URI,
 376+ $this->_submit_method,
 377+ $this->_submit_type,
 378+ $postdata
 379+ );
 380+ }
 381+
 382+ if( $this->_redirectaddr ) {
 383+ /* url was redirected, check if we've hit the max depth */
 384+ if( $this->maxredirs > $this->_redirectdepth ) {
 385+ if( !preg_match( "|^" . $URI_PARTS['scheme'] . "://|", $this->_redirectaddr ) ) {
 386+ $this->_redirectaddr = $this->_expandlinks(
 387+ $this->_redirectaddr,
 388+ $URI_PARTS['scheme'] . '://' . $URI_PARTS['host']
 389+ );
 390+ }
 391+
 392+ // only follow redirect if it's on this site, or offsiteok is true
 393+ if( preg_match( "|^http://" . preg_quote( $this->host ) . "|i", $this->_redirectaddr ) || $this->offsiteok )
 394+ {
 395+ /* follow the redirect */
 396+ $this->_redirectdepth++;
 397+ $this->lastredirectaddr = $this->_redirectaddr;
 398+ if( strpos( $this->_redirectaddr, '?' ) > 0 ) {
 399+ $this->fetch( $this->_redirectaddr ); // the redirect has changed the request method from post to get
 400+ } else {
 401+ $this->submit( $this->_redirectaddr, $formvars, $formfiles );
 402+ }
 403+ }
 404+ }
 405+ }
 406+
 407+ if( $this->_framedepth < $this->maxframes && count( $this->_frameurls ) > 0 )
 408+ {
 409+ $frameurls = $this->_frameurls;
 410+ $this->_frameurls = array();
 411+
 412+ while( list( , $frameurl ) = each( $frameurls ) ) {
 413+ if( $this->_framedepth < $this->maxframes ) {
 414+ $this->fetch( $frameurl );
 415+ $this->_framedepth++;
 416+ } else {
 417+ break;
 418+ }
 419+ }
 420+ }
 421+ return true;
 422+ break;
 423+
 424+ default:
 425+ // not a valid protocol
 426+ $this->error = 'Invalid protocol "' . $URI_PARTS['scheme'] . '"\n';
 427+ return false;
 428+ break;
 429+ }
 430+ return true;
 431+ }
 432+
 433+ /*======================================================================*\
 434+ Function: fetchlinks
 435+ Purpose: fetch the links from a web page
 436+ Input: $URI where you are fetching from
 437+ Output: $this->results an array of the URLs
 438+ \*======================================================================*/
 439+ function fetchlinks( $URI ) {
 440+ if ( $this->fetch( $URI ) ) {
 441+ if( $this->lastredirectaddr ) {
 442+ $URI = $this->lastredirectaddr;
 443+ }
 444+ if( is_array( $this->results ) ) {
 445+ for( $x = 0; $x < count( $this->results ); $x++ ) {
 446+ $this->results[$x] = $this->_striplinks( $this->results[$x] );
 447+ }
 448+ } else {
 449+ $this->results = $this->_striplinks( $this->results );
 450+ }
 451+
 452+ if( $this->expandlinks ) {
 453+ $this->results = $this->_expandlinks( $this->results, $URI );
 454+ }
 455+ return true;
 456+ } else {
 457+ return false;
 458+ }
 459+ }
 460+
 461+ /*======================================================================*\
 462+ Function: fetchform
 463+ Purpose: fetch the form elements from a web page
 464+ Input: $URI where you are fetching from
 465+ Output: $this->results the resulting html form
 466+ \*======================================================================*/
 467+ function fetchform( $URI ) {
 468+ if ( $this->fetch( $URI ) ) {
 469+ if( is_array( $this->results ) ) {
 470+ for( $x = 0; $x < count( $this->results ); $x++ ) {
 471+ $this->results[$x] = $this->_stripform( $this->results[$x] );
 472+ }
 473+ } else {
 474+ $this->results = $this->_stripform( $this->results );
 475+ }
 476+
 477+ return true;
 478+ } else {
 479+ return false;
 480+ }
 481+ }
 482+
 483+
 484+ /*======================================================================*\
 485+ Function: fetchtext
 486+ Purpose: fetch the text from a web page, stripping the links
 487+ Input: $URI where you are fetching from
 488+ Output: $this->results the text from the web page
 489+ \*======================================================================*/
 490+ function fetchtext( $URI ) {
 491+ if( $this->fetch( $URI ) ) {
 492+ if( is_array( $this->results ) ) {
 493+ for( $x = 0; $x < count( $this->results ); $x++ ) {
 494+ $this->results[$x] = $this->_striptext( $this->results[$x] );
 495+ }
 496+ } else {
 497+ $this->results = $this->_striptext( $this->results );
 498+ }
 499+ return true;
 500+ } else {
 501+ return false;
 502+ }
 503+ }
 504+
 505+ /*======================================================================*\
 506+ Function: submitlinks
 507+ Purpose: grab links from a form submission
 508+ Input: $URI where you are submitting from
 509+ Output: $this->results an array of the links from the post
 510+ \*======================================================================*/
 511+ function submitlinks( $URI, $formvars = '', $formfiles = '' ) {
 512+ if( $this->submit( $URI, $formvars, $formfiles ) ) {
 513+ if( $this->lastredirectaddr ) {
 514+ $URI = $this->lastredirectaddr;
 515+ }
 516+ if( is_array( $this->results ) ) {
 517+ for( $x = 0; $x < count( $this->results ); $x++ ) {
 518+ $this->results[$x] = $this->_striplinks( $this->results[$x] );
 519+ if( $this->expandlinks ) {
 520+ $this->results[$x] = $this->_expandlinks( $this->results[$x], $URI );
 521+ }
 522+ }
 523+ } else {
 524+ $this->results = $this->_striplinks( $this->results );
 525+ if( $this->expandlinks ) {
 526+ $this->results = $this->_expandlinks( $this->results, $URI );
 527+ }
 528+ }
 529+ return true;
 530+ } else {
 531+ return false;
 532+ }
 533+ }
 534+
 535+ /*======================================================================*\
 536+ Function: submittext
 537+ Purpose: grab text from a form submission
 538+ Input: $URI where you are submitting from
 539+ Output: $this->results the text from the web page
 540+ \*======================================================================*/
 541+ function submittext( $URI, $formvars = '', $formfiles = '' ) {
 542+ if( $this->submit( $URI, $formvars, $formfiles ) ) {
 543+ if( $this->lastredirectaddr ) {
 544+ $URI = $this->lastredirectaddr;
 545+ }
 546+ if( is_array( $this->results ) ) {
 547+ for( $x = 0; $x < count( $this->results ); $x++ ) {
 548+ $this->results[$x] = $this->_striptext( $this->results[$x] );
 549+ if( $this->expandlinks ) {
 550+ $this->results[$x] = $this->_expandlinks( $this->results[$x], $URI );
 551+ }
 552+ }
 553+ } else {
 554+ $this->results = $this->_striptext( $this->results );
 555+ if( $this->expandlinks ) {
 556+ $this->results = $this->_expandlinks( $this->results, $URI );
 557+ }
 558+ }
 559+ return true;
 560+ } else {
 561+ return false;
 562+ }
 563+ }
 564+
 565+ /*======================================================================*\
 566+ Function: set_submit_multipart
 567+ Purpose: Set the form submission content type to
 568+ multipart/form-data
 569+ \*======================================================================*/
 570+ function set_submit_multipart() {
 571+ $this->_submit_type = 'multipart/form-data';
 572+ }
 573+
 574+ /*======================================================================*\
 575+ Function: set_submit_normal
 576+ Purpose: Set the form submission content type to
 577+ application/x-www-form-urlencoded
 578+ \*======================================================================*/
 579+ function set_submit_normal() {
 580+ $this->_submit_type = 'application/x-www-form-urlencoded';
 581+ }
 582+
 583+ /*======================================================================*\
 584+ Private functions
 585+ \*======================================================================*/
 586+
 587+ /*======================================================================*\
 588+ Function: _striplinks
 589+ Purpose: strip the hyperlinks from an html document
 590+ Input: $document document to strip.
 591+ Output: $match an array of the links
 592+ \*======================================================================*/
 593+ function _striplinks( $document ) {
 594+ preg_match_all("'<\s*a\s.*?href\s*=\s* # find <a href=
 595+ ([\"\'])? # find single or double quote
 596+ (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching
 597+ # quote, otherwise match up to next space
 598+ 'isx", $document, $links
 599+ );
 600+
 601+ // catenate the non-empty matches from the conditional subpattern
 602+ while( list( $key, $val ) = each( $links[2] ) ) {
 603+ if( !empty( $val ) ) {
 604+ $match[] = $val;
 605+ }
 606+ }
 607+
 608+ while( list( $key, $val ) = each( $links[3] ) ) {
 609+ if( !empty( $val ) ) {
 610+ $match[] = $val;
 611+ }
 612+ }
 613+
 614+ // return the links
 615+ return $match;
 616+ }
 617+
 618+ /*======================================================================*\
 619+ Function: _stripform
 620+ Purpose: strip the form elements from an HTML document
 621+ Input: $document document to strip.
 622+ Output: $match an array of the links
 623+ \*======================================================================*/
 624+ function _stripform( $document ) {
 625+ preg_match_all(
 626+ "'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",
 627+ $document,
 628+ $elements
 629+ );
 630+
 631+ // catenate the matches
 632+ $match = implode( "\r\n", $elements[0] );
 633+
 634+ // return the links
 635+ return $match;
 636+ }
 637+
 638+ /*======================================================================*\
 639+ Function: _striptext
 640+ Purpose: strip the text from an html document
 641+ Input: $document document to strip.
 642+ Output: $text the resulting text
 643+ \*======================================================================*/
 644+ function _striptext( $document ) {
 645+ // I didn't use preg eval (//e) since that is only available in PHP 4.0.
 646+ // so, list your entities one by one here. I included some of the
 647+ // more common ones.
 648+ $search = array(
 649+ "'<script[^>]*?>.*?</script>'si", // strip out JavaScript
 650+ "'<[\/\!]*?[^<>]*?>'si", // strip out HTML tags
 651+ "'([\r\n])[\s]+'", // strip out white space
 652+ "'&(quot|#34|#034|#x22);'i", // replace HTML entities
 653+ "'&(amp|#38|#038|#x26);'i", // added hexadecimal values
 654+ "'&(lt|#60|#060|#x3c);'i",
 655+ "'&(gt|#62|#062|#x3e);'i",
 656+ "'&(nbsp|#160|#xa0);'i",
 657+ "'&(iexcl|#161);'i",
 658+ "'&(cent|#162);'i",
 659+ "'&(pound|#163);'i",
 660+ "'&(copy|#169);'i",
 661+ "'&(reg|#174);'i",
 662+ "'&(deg|#176);'i",
 663+ "'&(#39|#039|#x27);'",
 664+ "'&(euro|#8364);'i", // Europe
 665+ "'&a(uml|UML);'", // German
 666+ "'&o(uml|UML);'",
 667+ "'&u(uml|UML);'",
 668+ "'&A(uml|UML);'",
 669+ "'&O(uml|UML);'",
 670+ "'&U(uml|UML);'",
 671+ "'&szlig;'i",
 672+ );
 673+ $replace = array(
 674+ '',
 675+ '',
 676+ "\\1",
 677+ "\"",
 678+ '&',
 679+ '<',
 680+ '>',
 681+ ' ',
 682+ chr( 161 ),
 683+ chr( 162 ),
 684+ chr( 163 ),
 685+ chr( 169 ),
 686+ chr( 174 ),
 687+ chr( 176 ),
 688+ chr( 39 ),
 689+ chr( 128 ),
 690+ 'ä',
 691+ 'ö',
 692+ 'ü',
 693+ 'Ä',
 694+ 'Ö',
 695+ 'Ü',
 696+ 'ß',
 697+ );
 698+
 699+ $text = preg_replace( $search, $replace, $document );
 700+
 701+ return $text;
 702+ }
 703+
 704+ /*======================================================================*\
 705+ Function: _expandlinks
 706+ Purpose: expand each link into a fully qualified URL
 707+ Input: $links the links to qualify
 708+ $URI the full URI to get the base from
 709+ Output: $expandedLinks the expanded links
 710+ \*======================================================================*/
 711+ function _expandlinks( $links, $URI ) {
 712+ preg_match( "/^[^\?]+/", $URI, $match );
 713+
 714+ $match = preg_replace( "|/[^\/\.]+\.[^\/\.]+$|", '', $match[0] );
 715+ $match = preg_replace( "|/$|", '', $match );
 716+ $match_part = parse_url( $match );
 717+ $match_root = $match_part['scheme'] . '://' . $match_part['host'];
 718+
 719+ $search = array(
 720+ "|^http://" . preg_quote( $this->host ) . "|i",
 721+ "|^(\/)|i",
 722+ "|^(?!http://)(?!mailto:)|i",
 723+ "|/\./|",
 724+ "|/[^\/]+/\.\./|"
 725+ );
 726+
 727+ $replace = array(
 728+ '',
 729+ $match_root . '/',
 730+ $match . '/',
 731+ '/',
 732+ '/'
 733+ );
 734+
 735+ $expandedLinks = preg_replace( $search, $replace, $links );
 736+
 737+ return $expandedLinks;
 738+ }
 739+
 740+ /*======================================================================*\
 741+ Function: _httprequest
 742+ Purpose: go get the http data from the server
 743+ Input: $url the url to fetch
 744+ $fp the current open file pointer
 745+ $URI the full URI
 746+ $body body contents to send if any (POST)
 747+ Output:
 748+ \*======================================================================*/
 749+ function _httprequest( $url, $fp, $URI, $http_method, $content_type = '', $body = '' ) {
 750+ $cookie_headers = '';
 751+ if( $this->passcookies && $this->_redirectaddr ) {
 752+ $this->setcookies();
 753+ }
 754+
 755+ $URI_PARTS = parse_url( $URI );
 756+ if( empty( $url ) ) {
 757+ $url = '/';
 758+ }
 759+ $headers = $http_method . ' ' . $url . ' ' . $this->_httpversion . "\r\n";
 760+ if( !empty( $this->agent ) ) {
 761+ $headers .= 'User-Agent: ' . $this->agent . "\r\n";
 762+ }
 763+ if( !empty( $this->host ) && !isset( $this->rawheaders['Host'] ) ) {
 764+ $headers .= 'Host: ' . $this->host;
 765+ if( !empty( $this->port ) ) {
 766+ $headers .= ':' . $this->port;
 767+ }
 768+ $headers .= "\r\n";
 769+ }
 770+ if( !empty( $this->accept ) ) {
 771+ $headers .= 'Accept: ' . $this->accept . "\r\n";
 772+ }
 773+
 774+ if( $this->use_gzip ) {
 775+ // make sure PHP was built with --with-zlib
 776+ // and we can handle gzipp'ed data
 777+ if ( function_exists( 'gzinflate' ) ) {
 778+ $headers .= "Accept-encoding: gzip\r\n";
 779+ } else {
 780+ trigger_error(
 781+ 'use_gzip is on, but PHP was built without zlib support.' .
 782+ ' Requesting file(s) without gzip encoding.',
 783+ E_USER_NOTICE
 784+ );
 785+ }
 786+ }
 787+
 788+ if( !empty( $this->referer ) ) {
 789+ $headers .= 'Referer: ' . $this->referer . "\r\n";
 790+ }
 791+ if( !empty( $this->cookies ) ) {
 792+ if( !is_array( $this->cookies ) ) {
 793+ $this->cookies = (array)$this->cookies;
 794+ }
 795+
 796+ reset( $this->cookies );
 797+ if ( count( $this->cookies ) > 0 ) {
 798+ $cookie_headers .= 'Cookie: ';
 799+ foreach ( $this->cookies as $cookieKey => $cookieVal ) {
 800+ $cookie_headers .= $cookieKey . '=' . urlencode( $cookieVal ) . '; ';
 801+ }
 802+ $headers .= substr( $cookie_headers, 0, -2 ) . "\r\n";
 803+ }
 804+ }
 805+ if( !empty( $this->rawheaders ) ) {
 806+ if( !is_array( $this->rawheaders ) ) {
 807+ $this->rawheaders = (array)$this->rawheaders;
 808+ }
 809+ while( list( $headerKey, $headerVal ) = each( $this->rawheaders ) ) {
 810+ $headers .= $headerKey . ': ' . $headerVal . "\r\n";
 811+ }
 812+ }
 813+ if( !empty( $content_type ) ) {
 814+ $headers .= "Content-type: $content_type";
 815+ if ( $content_type == 'multipart/form-data' ) {
 816+ $headers .= '; boundary=' . $this->_mime_boundary;
 817+ }
 818+ $headers .= "\r\n";
 819+ }
 820+ if( !empty( $body ) ) {
 821+ $headers .= 'Content-length: ' . strlen( $body ) . "\r\n";
 822+ }
 823+ if( !empty( $this->user ) || !empty( $this->pass ) ) {
 824+ $headers .= 'Authorization: Basic ' . base64_encode( $this->user . ':' . $this->pass ) . "\r\n";
 825+ }
 826+
 827+ // add proxy auth headers
 828+ if( !empty( $this->proxy_user ) ) {
 829+ $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode( $this->proxy_user . ':' . $this->proxy_pass ) . "\r\n";
 830+ }
 831+
 832+ $headers .= "\r\n";
 833+
 834+ // set the read timeout if needed
 835+ if ( $this->read_timeout > 0 ) {
 836+ socket_set_timeout( $fp, $this->read_timeout );
 837+ }
 838+ $this->timed_out = false;
 839+
 840+ fwrite( $fp, $headers . $body, strlen( $headers . $body ) );
 841+
 842+ $this->_redirectaddr = false;
 843+ unset( $this->headers );
 844+
 845+ // content was returned gzip encoded?
 846+ $is_gzipped = false;
 847+
 848+ while( $currentHeader = fgets( $fp, $this->_maxlinelen ) ) {
 849+ if ( $this->read_timeout > 0 && $this->_check_timeout( $fp ) ) {
 850+ $this->status = -100;
 851+ return false;
 852+ }
 853+
 854+ //if( $currentHeader == "\r\n" ) {
 855+ if( preg_match( "/^\r?\n$/", $currentHeader ) ) {
 856+ break;
 857+ }
 858+
 859+ // if a header begins with Location: or URI:, set the redirect
 860+ if( preg_match( "/^(Location:|URI:)/i", $currentHeader ) ) {
 861+ // get URL portion of the redirect
 862+ preg_match( "/^(Location:|URI:)[ ]+(.*)/i", chop( $currentHeader ), $matches );
 863+ // look for :// in the Location header to see if hostname is included
 864+ if( !preg_match( "|\:\/\/|", $matches[2] ) ) {
 865+ // no host in the path, so prepend
 866+ $this->_redirectaddr = $URI_PARTS['scheme'] . '://' . $this->host . ':' . $this->port;
 867+ // eliminate double slash
 868+ if( !preg_match( "|^/|", $matches[2] ) ) {
 869+ $this->_redirectaddr .= '/' . $matches[2];
 870+ } else {
 871+ $this->_redirectaddr .= $matches[2];
 872+ }
 873+ } else {
 874+ $this->_redirectaddr = $matches[2];
 875+ }
 876+ }
 877+
 878+ if( preg_match( "|^HTTP/|", $currentHeader ) ) {
 879+ if( preg_match( "|^HTTP/[^\s]*\s(.*?)\s|", $currentHeader, $status ) ) {
 880+ $this->status = $status[1];
 881+ }
 882+ $this->response_code = $currentHeader;
 883+ }
 884+
 885+ if ( preg_match( "/Content-Encoding: gzip/", $currentHeader ) ) {
 886+ $is_gzipped = true;
 887+ }
 888+
 889+ $this->headers[] = $currentHeader;
 890+ }
 891+
 892+ $results = '';
 893+ do {
 894+ $_data = fread( $fp, $this->maxlength );
 895+ if ( strlen( $_data ) == 0 || strlen( $results ) > $this->maxlength ) {
 896+ break;
 897+ }
 898+ $results .= $_data;
 899+ } while( true );
 900+
 901+ // gunzip
 902+ if ( $is_gzipped ) {
 903+ // per http://www.php.net/manual/en/function.gzencode.php
 904+ $results = substr( $results, 10 );
 905+ $results = gzinflate( $results );
 906+ }
 907+
 908+ if ( $this->read_timeout > 0 && $this->_check_timeout( $fp ) ) {
 909+ $this->status = -100;
 910+ return false;
 911+ }
 912+
 913+ // check if there is a a redirect meta tag
 914+ if( preg_match( "'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i", $results, $match ) )
 915+ {
 916+ $this->_redirectaddr = $this->_expandlinks( $match[1], $URI );
 917+ }
 918+
 919+ // have we hit our frame depth and is there frame src to fetch?
 920+ if( ( $this->_framedepth < $this->maxframes ) && preg_match_all( "'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i", $results, $match ) )
 921+ {
 922+ $this->results[] = $results;
 923+ for( $x = 0; $x < count( $match[1] ); $x++ ) {
 924+ $this->_frameurls[] = $this->_expandlinks( $match[1][$x], $URI_PARTS['scheme'] . '://' . $this->host );
 925+ }
 926+ } elseif( is_array( $this->results ) ) { // have we already fetched framed content?
 927+ $this->results[] = $results;
 928+ } else { // no framed content
 929+ $this->results = $results;
 930+ }
 931+
 932+ return true;
 933+ }
 934+
 935+ /*======================================================================*\
 936+ Function: _httpsrequest
 937+ Purpose: go get the https data from the server using curl
 938+ Input: $url the url to fetch
 939+ $URI the full URI
 940+ $body body contents to send if any (POST)
 941+ Output:
 942+ \*======================================================================*/
 943+ function _httpsrequest( $url, $URI, $http_method, $content_type = '', $body = '' ) {
 944+ if( $this->passcookies && $this->_redirectaddr ) {
 945+ $this->setcookies();
 946+ }
 947+
 948+ $headers = array();
 949+
 950+ $URI_PARTS = parse_url( $URI );
 951+ if( empty( $url ) ) {
 952+ $url = '/';
 953+ }
 954+ // GET ... header not needed for curl
 955+ //$headers[] = $http_method." ".$url." ".$this->_httpversion;
 956+ if( !empty( $this->agent ) ) {
 957+ $headers[] = 'User-Agent: ' . $this->agent;
 958+ }
 959+ if( !empty( $this->host ) ) {
 960+ if( !empty( $this->port ) ) {
 961+ $headers[] = 'Host: ' . $this->host . ':' . $this->port;
 962+ } else {
 963+ $headers[] = 'Host: ' . $this->host;
 964+ }
 965+ }
 966+ if( !empty( $this->accept ) ) {
 967+ $headers[] = 'Accept: ' . $this->accept;
 968+ }
 969+ if( !empty( $this->referer ) ) {
 970+ $headers[] = 'Referer: ' . $this->referer;
 971+ }
 972+ if( !empty( $this->cookies ) ) {
 973+ if( !is_array( $this->cookies ) ) {
 974+ $this->cookies = (array)$this->cookies;
 975+ }
 976+
 977+ reset( $this->cookies );
 978+ if ( count( $this->cookies ) > 0 ) {
 979+ $cookie_str = 'Cookie: ';
 980+ foreach ( $this->cookies as $cookieKey => $cookieVal ) {
 981+ $cookie_str .= $cookieKey . '=' . urlencode( $cookieVal ) . '; ';
 982+ }
 983+ $headers[] = substr( $cookie_str, 0, -2 );
 984+ }
 985+ }
 986+ if( !empty( $this->rawheaders ) ) {
 987+ if( !is_array( $this->rawheaders ) ) {
 988+ $this->rawheaders = (array)$this->rawheaders;
 989+ }
 990+ while( list( $headerKey, $headerVal ) = each( $this->rawheaders ) ) {
 991+ $headers[] = $headerKey . ': ' . $headerVal;
 992+ }
 993+ }
 994+ if( !empty( $content_type ) ) {
 995+ if ( $content_type == 'multipart/form-data' ) {
 996+ $headers[] = "Content-type: $content_type; boundary=" . $this->_mime_boundary;
 997+ } else {
 998+ $headers[] = "Content-type: $content_type";
 999+ }
 1000+ }
 1001+ if( !empty( $body ) ) {
 1002+ $headers[] = 'Content-length: ' . strlen( $body );
 1003+ }
 1004+ if( !empty( $this->user ) || !empty( $this->pass ) ) {
 1005+ $headers[] = 'Authorization: BASIC ' . base64_encode( $this->user . ':' . $this->pass );
 1006+ }
 1007+
 1008+ for( $curr_header = 0; $curr_header < count( $headers ); $curr_header++ ) {
 1009+ $cmdline_params .= " -H \"" . escapeshellcmd( $headers[$curr_header] ) . "\"";
 1010+ }
 1011+
 1012+ if( !empty( $body ) ) {
 1013+ $cmdline_params .= " -d \"" . escapeshellcmd( $body ) . "\"";
 1014+ }
 1015+
 1016+ if( $this->read_timeout > 0 ) {
 1017+ $cmdline_params .= ' -m ' . $this->read_timeout;
 1018+ }
 1019+
 1020+ $headerfile = tempnam( $temp_dir, 'sno' );
 1021+
 1022+ exec(
 1023+ $this->curl_path . " -k -D \"$headerfile\"" . $cmdline_params . " \"" . escapeshellcmd( $URI ) . "\"",
 1024+ $results,
 1025+ $return
 1026+ );
 1027+
 1028+ if( $return ) {
 1029+ $this->error = "Error: cURL could not retrieve the document, error $return.";
 1030+ return false;
 1031+ }
 1032+
 1033+ $results = implode( "\r\n", $results );
 1034+
 1035+ $result_headers = file( "$headerfile" );
 1036+
 1037+ $this->_redirectaddr = false;
 1038+ unset( $this->headers );
 1039+
 1040+ for( $currentHeader = 0; $currentHeader < count( $result_headers ); $currentHeader++ ) {
 1041+ // if a header begins with Location: or URI:, set the redirect
 1042+ if( preg_match( "/^(Location: |URI: )/i", $result_headers[$currentHeader] ) ) {
 1043+ // get URL portion of the redirect
 1044+ preg_match( "/^(Location: |URI:)\s+(.*)/", chop( $result_headers[$currentHeader] ), $matches );
 1045+ // look for :// in the Location header to see if hostname is included
 1046+ if( !preg_match( "|\:\/\/|", $matches[2] ) ) {
 1047+ // no host in the path, so prepend
 1048+ $this->_redirectaddr = $URI_PARTS['scheme'] . '://' . $this->host . ':' . $this->port;
 1049+ // eliminate double slash
 1050+ if( !preg_match( "|^/|", $matches[2] ) ) {
 1051+ $this->_redirectaddr .= '/' . $matches[2];
 1052+ } else {
 1053+ $this->_redirectaddr .= $matches[2];
 1054+ }
 1055+ } else {
 1056+ $this->_redirectaddr = $matches[2];
 1057+ }
 1058+ }
 1059+
 1060+ if( preg_match( "|^HTTP/|", $result_headers[$currentHeader] ) ) {
 1061+ $this->response_code = $result_headers[$currentHeader];
 1062+ if( preg_match( "|^HTTP/[^\s]*\s(.*?)\s|", $this->response_code, $match ) ) {
 1063+ $this->status = $match[1];
 1064+ }
 1065+ }
 1066+
 1067+ $this->headers[] = $result_headers[$currentHeader];
 1068+ }
 1069+
 1070+ // check if there is a a redirect meta tag
 1071+ if( preg_match( "'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i", $results, $match ) )
 1072+ {
 1073+ $this->_redirectaddr = $this->_expandlinks( $match[1], $URI );
 1074+ }
 1075+
 1076+ // have we hit our frame depth and is there frame src to fetch?
 1077+ if( ( $this->_framedepth < $this->maxframes ) && preg_match_all( "'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i", $results, $match ) )
 1078+ {
 1079+ $this->results[] = $results;
 1080+ for( $x = 0; $x < count( $match[1] ); $x++ ) {
 1081+ $this->_frameurls[] = $this->_expandlinks( $match[1][$x], $URI_PARTS['scheme'] . '://' . $this->host );
 1082+ }
 1083+ } elseif( is_array( $this->results ) ) { // have we already fetched framed content?
 1084+ $this->results[] = $results;
 1085+ } else { // no framed content
 1086+ $this->results = $results;
 1087+ }
 1088+
 1089+ unlink( "$headerfile" );
 1090+
 1091+ return true;
 1092+ }
 1093+
 1094+ /*======================================================================*\
 1095+ Function: setcookies()
 1096+ Purpose: set cookies for a redirection
 1097+ \*======================================================================*/
 1098+ function setcookies() {
 1099+ for( $x = 0; $x < count( $this->headers ); $x++ ) {
 1100+ if( preg_match( '/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x], $match ) ) {
 1101+ $this->cookies[$match[1]] = urldecode( $match[2] );
 1102+ }
 1103+ }
 1104+ }
 1105+
 1106+ /*======================================================================*\
 1107+ Function: _check_timeout
 1108+ Purpose: checks whether timeout has occurred
 1109+ Input: $fp file pointer
 1110+ \*======================================================================*/
 1111+ function _check_timeout( $fp ) {
 1112+ if ( $this->read_timeout > 0 ) {
 1113+ $fp_status = socket_get_status( $fp );
 1114+ if ( $fp_status['timed_out'] ) {
 1115+ $this->timed_out = true;
 1116+ return true;
 1117+ }
 1118+ }
 1119+ return false;
 1120+ }
 1121+
 1122+ /*======================================================================*\
 1123+ Function: _connect
 1124+ Purpose: make a socket connection
 1125+ Input: $fp file pointer
 1126+ \*======================================================================*/
 1127+ function _connect( &$fp ) {
 1128+ if( !empty( $this->proxy_host ) && !empty( $this->proxy_port ) ) {
 1129+ $this->_isproxy = true;
 1130+ $host = $this->proxy_host;
 1131+ $port = $this->proxy_port;
 1132+ } else {
 1133+ $host = $this->host;
 1134+ $port = $this->port;
 1135+ }
 1136+
 1137+ $this->status = 0;
 1138+
 1139+ $fp = fsockopen( $host, $port, $errno, $errstr, $this->_fp_timeout );
 1140+
 1141+ if ( $fp ) {
 1142+ // socket connection succeeded
 1143+ return true;
 1144+ } else {
 1145+ // socket connection failed
 1146+ $this->status = $errno;
 1147+ switch( $errno ) {
 1148+ case -3:
 1149+ $this->error = 'socket creation failed (-3)';
 1150+ case -4:
 1151+ $this->error = 'dns lookup failure (-4)';
 1152+ case -5:
 1153+ $this->error = 'connection refused or timed out (-5)';
 1154+ default:
 1155+ $this->error = 'connection failed (' . $errno . ')';
 1156+ }
 1157+ return false;
 1158+ }
 1159+ }
 1160+
 1161+ /*======================================================================*\
 1162+ Function: _disconnect
 1163+ Purpose: disconnect a socket connection
 1164+ Input: $fp file pointer
 1165+ \*======================================================================*/
 1166+ function _disconnect( $fp ) {
 1167+ return( fclose( $fp ) );
 1168+ }
 1169+
 1170+ /*======================================================================*\
 1171+ Function: _prepare_post_body
 1172+ Purpose: Prepare post body according to encoding type
 1173+ Input: $formvars - form variables
 1174+ $formfiles - form upload files
 1175+ Output: post body
 1176+ \*======================================================================*/
 1177+ function _prepare_post_body( $formvars, $formfiles ) {
 1178+ settype( $formvars, 'array' );
 1179+ settype( $formfiles, 'array' );
 1180+ $postdata = '';
 1181+
 1182+ if ( count( $formvars ) == 0 && count( $formfiles ) == 0 ) {
 1183+ return;
 1184+ }
 1185+
 1186+ switch ( $this->_submit_type ) {
 1187+ case 'application/x-www-form-urlencoded':
 1188+ reset( $formvars );
 1189+ while( list( $key, $val ) = each( $formvars ) ) {
 1190+ if ( is_array( $val ) || is_object( $val ) ) {
 1191+ while ( list( $cur_key, $cur_val ) = each( $val ) ) {
 1192+ $postdata .= urlencode( $key ) . '[]=' . urlencode( $cur_val ) . '&';
 1193+ }
 1194+ } else {
 1195+ $postdata .= urlencode( $key ) . '=' . urlencode( $val ) . '&';
 1196+ }
 1197+ }
 1198+ break;
 1199+
 1200+ case 'multipart/form-data':
 1201+ $this->_mime_boundary = 'Snoopy' . md5( uniqid( microtime() ) );
 1202+
 1203+ reset( $formvars );
 1204+ while( list( $key, $val ) = each( $formvars ) ) {
 1205+ if ( is_array( $val ) || is_object( $val ) ) {
 1206+ while ( list( $cur_key, $cur_val ) = each( $val ) ) {
 1207+ $postdata .= '--' . $this->_mime_boundary . "\r\n";
 1208+ $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
 1209+ $postdata .= "$cur_val\r\n";
 1210+ }
 1211+ } else {
 1212+ $postdata .= '--' . $this->_mime_boundary . "\r\n";
 1213+ $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
 1214+ $postdata .= "$val\r\n";
 1215+ }
 1216+ }
 1217+
 1218+ reset( $formfiles );
 1219+ while ( list( $field_name, $file_names ) = each( $formfiles ) ) {
 1220+ settype( $file_names, 'array' );
 1221+ while ( list( , $file_name ) = each( $file_names ) ) {
 1222+ if ( !is_readable( $file_name ) ) {
 1223+ continue;
 1224+ }
 1225+
 1226+ $fp = fopen( $file_name, 'r' );
 1227+ $file_content = fread( $fp, filesize( $file_name ) );
 1228+ fclose( $fp );
 1229+ $base_name = basename( $file_name );
 1230+
 1231+ $postdata .= '--' . $this->_mime_boundary . "\r\n";
 1232+ $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
 1233+ $postdata .= "$file_content\r\n";
 1234+ }
 1235+ }
 1236+ $postdata .= '--' . $this->_mime_boundary . "--\r\n";
 1237+ break;
 1238+ }
 1239+
 1240+ return $postdata;
 1241+ }
 1242+}
\ No newline at end of file
Property changes on: trunk/extensions/RSS/Snoopy.class.php
___________________________________________________________________
Added: svn:eol-style
11243 + native
Index: trunk/extensions/RSS/RSSFetch.php
@@ -0,0 +1,256 @@
 2+<?php
 3+/**
 4+ * A simple functional interface to fetching and parsing RSS files, via the
 5+ * function fetch_rss().
 6+ *
 7+ * @file
 8+ */
 9+
 10+/**
 11+ * Globals - redefine these in your script to change the
 12+ * behaviour of fetch_rss() currently, most options effect the cache
 13+ *
 14+ * $wgMagpieRSSCache - Should Magpie cache parsed RSS objects?
 15+ *
 16+ * $wgMagpieRSSCacheDirectory - Where should Magpie cache parsed RSS objects?
 17+ * This should be a location that the webserver can write to. If this
 18+ * directory does not already exist, Magpie will try to be smart and create it.
 19+ * This will often fail for permissions reasons.
 20+ *
 21+ * $wgMagpieRSSCacheAge - How long to store cached RSS objects (in seconds)?.
 22+ *
 23+ * $wgMagpieRSSCacheFreshOnly - If remote fetch fails, throw an error
 24+ * instead of returning stale object?
 25+ */
 26+
 27+$MAGPIE_ERROR = '';
 28+
 29+/**
 30+ * Return RSS object for the given URL, maintaining caching.
 31+ *
 32+ * NOTES ON CACHING:
 33+ * If caching is on ($wgMagpieRSSCache) fetch_rss will first check the cache.
 34+ *
 35+ * NOTES ON RETRIEVING REMOTE FILES:
 36+ * If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will
 37+ * return a cached object, and touch the cache object upon recieving a 304.
 38+ *
 39+ * NOTES ON FAILED REQUESTS:
 40+ * If there is an HTTP error while fetching an RSS object, the cached version
 41+ * will be returned, if it exists (and if $wgMagpieRSSCacheFreshOnly is off)
 42+ *
 43+ * @param $url String: URL of RSS file
 44+ * @return parsed RSS object (see RSSParse)
 45+ */
 46+function fetch_rss( $url ) {
 47+ global $wgMagpieRSSCache, $wgMagpieRSSCacheAge, $wgMagpieRSSCacheFreshOnly;
 48+ global $wgMagpieRSSCacheDirectory, $wgMagpieRSSFetchTimeout;
 49+ global $wgMagpieRSSOutputEncoding, $wgMagpieRSSInputEncoding;
 50+ global $wgMagpieRSSDetectEncoding, $wgMagpieRSSUseGzip;
 51+
 52+ $wgMagpieRSSCache = true;
 53+ $wgMagpieRSSCacheAge = 60 * 60; // one hour
 54+ $wgMagpieRSSCacheFreshOnly = false;
 55+ $wgMagpieRSSCacheDirectory = '/extensions/RSS/cache';
 56+ $wgMagpieRSSOutputEncoding = 'ISO-8859-1';
 57+ $wgMagpieRSSInputEncoding = null;
 58+ $wgMagpieRSSDetectEncoding = true;
 59+
 60+ $wgMagpieRSSFetchTimeout = 5; // 5 second timeout
 61+
 62+ // use gzip encoding to fetch RSS files if supported?
 63+ $wgMagpieRSSUseGzip = true;
 64+
 65+ if ( !isset( $url ) ) {
 66+ wfDebugLog( 'RSS', 'fetch_rss (RSSFetch.php) called without a URL!' );
 67+ return false;
 68+ }
 69+
 70+ // if cache is disabled
 71+ if ( !$wgMagpieRSSCache ) {
 72+ // fetch file, and parse it
 73+ $resp = _fetch_remote_file( $url );
 74+ if ( $resp->status >= 200 && $resp->status < 300 ) {
 75+ return _response_to_rss( $resp );
 76+ } else {
 77+ wfDebugLog( 'RSS', "Failed to fetch $url and cache is off" );
 78+ return false;
 79+ }
 80+ } else { // else cache is ON
 81+ // Flow
 82+ // 1. check cache
 83+ // 2. if there is a hit, make sure its fresh
 84+ // 3. if cached obj fails freshness check, fetch remote
 85+ // 4. if remote fails, return stale object, or error
 86+ $cache = new RSSCache( $wgMagpieRSSCacheDirectory, $wgMagpieRSSCacheAge );
 87+
 88+ if ( $cache->ERROR ) {
 89+ wfDebugLog(
 90+ 'RSS',
 91+ 'MagpieRSS: cache error on RSSFetch.php! Error msg: ' .
 92+ $cache->ERROR
 93+ );
 94+ }
 95+
 96+ $cache_status = 0; // response of check_cache
 97+ $request_headers = array(); // HTTP headers to send with fetch
 98+ $rss = 0; // parsed RSS object
 99+ $errormsg = 0; // errors, if any
 100+
 101+ // store parsed XML by desired output encoding
 102+ // as character munging happens at parse time
 103+ $cache_key = $url . $wgMagpieRSSOutputEncoding;
 104+
 105+ if ( !$cache->ERROR ) {
 106+ // return cache HIT, MISS, or STALE
 107+ $cache_status = $cache->check_cache( $cache_key );
 108+ }
 109+
 110+ // if object cached, and cache is fresh, return cached obj
 111+ if ( $cache_status == 'HIT' ) {
 112+ $rss = $cache->get( $cache_key );
 113+ if ( isset( $rss ) && $rss ) {
 114+ // should be cache age
 115+ $rss->from_cache = 1;
 116+ wfDebugLog( 'RSS', 'MagpieRSS: Cache HIT' );
 117+ return $rss;
 118+ }
 119+ }
 120+
 121+ // else attempt a conditional get
 122+
 123+ // setup headers
 124+ if ( $cache_status == 'STALE' ) {
 125+ $rss = $cache->get( $cache_key );
 126+ if ( $rss && $rss->etag && $rss->last_modified ) {
 127+ $request_headers['If-None-Match'] = $rss->etag;
 128+ $request_headers['If-Last-Modified'] = $rss->last_modified;
 129+ }
 130+ }
 131+
 132+ $resp = _fetch_remote_file( $url, $request_headers );
 133+
 134+ if ( isset( $resp ) && $resp ) {
 135+ if ( $resp->status == '304' ) {
 136+ // we have the most current copy
 137+ wfDebugLog( 'RSS', "Got 304 for $url" );
 138+ // reset cache on 304 (at minutillo insistent prodding)
 139+ $cache->set( $cache_key, $rss );
 140+ return $rss;
 141+ } elseif ( $resp->status >= 200 && $resp->status < 300 ) {
 142+ $rss = _response_to_rss( $resp );
 143+ if ( $rss ) {
 144+ wfDebugLog( 'RSS', 'Fetch successful' );
 145+ // add object to cache
 146+ $cache->set( $cache_key, $rss );
 147+ return $rss;
 148+ }
 149+ } else {
 150+ $errormsg = "Failed to fetch $url ";
 151+ if ( $resp->status == '-100' ) {
 152+ global $wgMagpieRSSFetchTimeout;
 153+ $errormsg .= '(Request timed out after ' . $wgMagpieRSSFetchTimeout . ' seconds)';
 154+ } elseif ( $resp->error ) {
 155+ // compensate for Snoopy's annoying habbit to tacking
 156+ // on '\n'
 157+ $http_error = substr( $resp->error, 0, -2 );
 158+ $errormsg .= "(HTTP Error: $http_error)";
 159+ } else {
 160+ $errormsg .= '(HTTP Response: ' . $resp->response_code . ')';
 161+ }
 162+ }
 163+ } else {
 164+ $errormsg = 'Unable to retrieve RSS file for unknown reasons.';
 165+ }
 166+
 167+ // else fetch failed
 168+
 169+ // attempt to return cached object
 170+ if ( $rss ) {
 171+ wfDebugLog( 'RSS', "Returning STALE object for $url" );
 172+ return $rss;
 173+ }
 174+
 175+ // else we totally failed
 176+ $MAGPIE_ERROR = $errormsg;
 177+ wfDebugLog(
 178+ 'MagpieRSS (RSSFetch): we totally failed :-( Error message:' .
 179+ $errormsg
 180+ );
 181+
 182+ return false;
 183+ } // end if ( !$wgMagpieRSSCache ) {
 184+} // end fetch_rss()
 185+
 186+/**
 187+ * Retrieve an arbitrary remote file.
 188+ * @param $url String: URL of the remote file
 189+ * @param $headers Array: headers to send along with the request
 190+ * @return an HTTP response object (see Snoopy.class.php)
 191+ */
 192+function _fetch_remote_file( $url, $headers = '' ) {
 193+ global $wgMagpieRSSFetchTimeout, $wgMagpieRSSUseGzip;
 194+ // Snoopy is an HTTP client in PHP
 195+ if ( !class_exists( 'Snoopy', false ) ) {
 196+ require_once( dirname( __FILE__ ) . '/Snoopy.class.php' );
 197+ }
 198+ $client = new Snoopy();
 199+ $client->agent = 'MagpieRSS/0.72 (+http://magpierss.sourceforge.net) / MediaWiki RSS extension';
 200+ $client->read_timeout = $wgMagpieRSSFetchTimeout;
 201+ $client->use_gzip = $wgMagpieRSSUseGzip;
 202+ if ( is_array( $headers ) ) {
 203+ $client->rawheaders = $headers;
 204+ }
 205+
 206+ @$client->fetch( $url );
 207+ return $client;
 208+}
 209+
 210+/**
 211+ * Parse an HTTP response object into an RSS object.
 212+ * @param $resp Object: an HTTP response object (see Snoopy)
 213+ * @return parsed RSS object (see RSSParse) or false
 214+ */
 215+function _response_to_rss( $resp ) {
 216+ global $wgMagpieRSSOutputEncoding, $wgMagpieRSSInputEncoding, $wgMagpieRSSDetectEncoding;
 217+ $rss = new MagpieRSS(
 218+ $resp->results,
 219+ $wgMagpieRSSOutputEncoding,
 220+ $wgMagpieRSSInputEncoding,
 221+ $wgMagpieRSSDetectEncoding
 222+ );
 223+
 224+ // if RSS parsed successfully
 225+ if ( $rss && !$rss->ERROR ) {
 226+ // find Etag and Last-Modified
 227+ foreach( $resp->headers as $h ) {
 228+ // 2003-03-02 - Nicola Asuni (www.tecnick.com) - fixed bug "Undefined offset: 1"
 229+ if ( strpos( $h, ': ' ) ) {
 230+ list( $field, $val ) = explode( ': ', $h, 2 );
 231+ } else {
 232+ $field = $h;
 233+ $val = '';
 234+ }
 235+
 236+ if ( $field == 'ETag' ) {
 237+ $rss->etag = $val;
 238+ }
 239+
 240+ if ( $field == 'Last-Modified' ) {
 241+ $rss->last_modified = $val;
 242+ }
 243+ }
 244+
 245+ return $rss;
 246+ } else { // else construct error message
 247+ $errormsg = 'Failed to parse RSS file.';
 248+
 249+ if ( $rss ) {
 250+ $errormsg .= ' (' . $rss->ERROR . ')';
 251+ }
 252+ $MAGPIE_ERROR = $errormsg;
 253+ wfDebugLog( 'RSS', 'error!' . $errormsg );
 254+
 255+ return false;
 256+ } // end if ( $rss && !$rss->ERROR )
 257+}
\ No newline at end of file
Property changes on: trunk/extensions/RSS/RSSFetch.php
___________________________________________________________________
Added: svn:eol-style
1258 + native
Index: trunk/extensions/RSS/RSSParse.php
@@ -0,0 +1,494 @@
 2+<?php
 3+/**
 4+ * Hybrid parser, and object, takes RSS or Atom feed as a string and returns a
 5+ * simple object.
 6+ * Handles RSS 0.9x, RSS 2.0, RSS 1.0, and Atom 0.3
 7+ *
 8+ * @file
 9+ * @see RSSFetch.php for a simpler interface with integrated caching support
 10+ */
 11+class MagpieRSS {
 12+ public $parser;
 13+
 14+ public $current_item = array(); // item currently being parsed
 15+ public $items = array(); // collection of parsed items
 16+ public $channel = array(); // hash of channel fields
 17+ public $textinput = array();
 18+ public $image = array();
 19+ public $feed_type;
 20+ public $feed_version;
 21+ public $encoding = ''; // output encoding of parsed rss
 22+
 23+ public $_source_encoding = ''; // only set if we have to parse xml prolog
 24+
 25+ public $ERROR = '';
 26+ public $WARNING = '';
 27+
 28+ // define some constants
 29+ public $_CONTENT_CONSTRUCTS = array( 'content', 'summary', 'info', 'title', 'tagline', 'copyright' );
 30+ public $_KNOWN_ENCODINGS = array( 'UTF-8', 'US-ASCII', 'ISO-8859-1' );
 31+
 32+ // parser variables, useless if you're not a parser, treat as private
 33+ public $stack = array(); // parser stack
 34+ public $inchannel = false;
 35+ public $initem = false;
 36+ public $incontent = false; // if in Atom <content mode="xml"> field
 37+ public $intextinput = false;
 38+ public $inimage = false;
 39+ public $current_namespace = false;
 40+
 41+ /**
 42+ * Set up XML parser, parse source, and return populated RSS object..
 43+ *
 44+ * @param $source String: string containing the RSS to be parsed
 45+ *
 46+ * NOTE: Probably a good idea to leave the encoding options alone unless
 47+ * you know what you're doing as PHP's character set support is
 48+ * a little weird.
 49+ *
 50+ * NOTE: A lot of this is unnecessary but harmless with PHP5
 51+ *
 52+ *
 53+ * @param $output_encoding String: output the parsed RSS in this character
 54+ * set defaults to ISO-8859-1 as this is PHP's
 55+ * default.
 56+ *
 57+ * NOTE: might be changed to UTF-8 in future
 58+ * versions.
 59+ *
 60+ * @param $input_encoding String: the character set of the incoming RSS source.
 61+ * Leave blank and Magpie will try to figure it
 62+ * out.
 63+ *
 64+ * @param $detect_encoding Boolean: if false, Magpie won't attempt to
 65+ * detect source encoding. (caveat emptor)
 66+ */
 67+ function __construct( $source, $output_encoding = 'ISO-8859-1',
 68+ $input_encoding = null, $detect_encoding = true )
 69+ {
 70+ # if PHP xml isn't compiled in, die
 71+ if ( !function_exists( 'xml_parser_create' ) ) {
 72+ $this->error(
 73+ "Failed to load PHP's XML Extension. " .
 74+ 'http://www.php.net/manual/en/ref.xml.php',
 75+ E_USER_ERROR
 76+ );
 77+ }
 78+
 79+ list( $parser, $source ) = $this->create_parser(
 80+ $source,
 81+ $output_encoding,
 82+ $input_encoding,
 83+ $detect_encoding
 84+ );
 85+
 86+ if ( !is_resource( $parser ) ) {
 87+ $this->error(
 88+ "Failed to create an instance of PHP's XML parser. " .
 89+ 'http://www.php.net/manual/en/ref.xml.php',
 90+ E_USER_ERROR
 91+ );
 92+ }
 93+
 94+ $this->parser = $parser;
 95+
 96+ # pass in parser, and a reference to this object
 97+ # setup handlers
 98+ xml_set_object( $this->parser, $this );
 99+ xml_set_element_handler(
 100+ $this->parser,
 101+ 'feed_start_element',
 102+ 'feed_end_element'
 103+ );
 104+
 105+ xml_set_character_data_handler( $this->parser, 'feed_cdata' );
 106+
 107+ $status = xml_parse( $this->parser, $source );
 108+
 109+ if ( !$status ) {
 110+ $errorcode = xml_get_error_code( $this->parser );
 111+ if ( $errorcode != XML_ERROR_NONE ) {
 112+ $xml_error = xml_error_string( $errorcode );
 113+ $error_line = xml_get_current_line_number( $this->parser );
 114+ $error_col = xml_get_current_column_number( $this->parser );
 115+ $errormsg = "$xml_error at line $error_line, column $error_col";
 116+
 117+ $this->error( $errormsg );
 118+ }
 119+ }
 120+
 121+ xml_parser_free( $this->parser );
 122+
 123+ $this->normalize();
 124+ }
 125+
 126+ function feed_start_element( $p, $element, &$attrs ) {
 127+ $el = $element = strtolower( $element );
 128+ $attrs = array_change_key_case( $attrs, CASE_LOWER );
 129+
 130+ // check for a namespace, and split if found
 131+ $ns = false;
 132+ if ( strpos( $element, ':' ) ) {
 133+ list( $ns, $el ) = explode( ':', $element, 2 );
 134+ }
 135+ if ( $ns && $ns != 'rdf' ) {
 136+ $this->current_namespace = $ns;
 137+ }
 138+
 139+ // if feed type isn't set, then this is first element of feed
 140+ // identify feed from root element
 141+ if ( !isset( $this->feed_type ) ) {
 142+ if ( $el == 'rdf' ) {
 143+ $this->feed_type = 'RSS';
 144+ $this->feed_version = '1.0';
 145+ } elseif ( $el == 'rss' ) {
 146+ $this->feed_type = 'RSS';
 147+ $this->feed_version = $attrs['version'];
 148+ } elseif ( $el == 'feed' ) {
 149+ $this->feed_type = 'Atom';
 150+ $this->feed_version = $attrs['version'];
 151+ $this->inchannel = true;
 152+ }
 153+ return;
 154+ }
 155+
 156+ if ( $el == 'channel' ) {
 157+ $this->inchannel = true;
 158+ } elseif ( $el == 'item' || $el == 'entry' ) {
 159+ $this->initem = true;
 160+ if ( isset( $attrs['rdf:about'] ) ) {
 161+ $this->current_item['about'] = $attrs['rdf:about'];
 162+ }
 163+ }
 164+
 165+ // if we're in the default namespace of an RSS feed,
 166+ // record textinput or image fields
 167+ elseif (
 168+ $this->feed_type == 'RSS' &&
 169+ $this->current_namespace == '' &&
 170+ $el == 'textinput' )
 171+ {
 172+ $this->intextinput = true;
 173+ } elseif (
 174+ $this->feed_type == 'RSS' &&
 175+ $this->current_namespace == '' &&
 176+ $el == 'image' )
 177+ {
 178+ $this->inimage = true;
 179+ }
 180+
 181+ // handle Atom content constructs
 182+ elseif ( $this->feed_type == 'Atom' && in_array( $el, $this->_CONTENT_CONSTRUCTS ) ) {
 183+ // avoid clashing w/ RSS mod_content
 184+ if ( $el == 'content' ) {
 185+ $el = 'atom_content';
 186+ }
 187+
 188+ $this->incontent = $el;
 189+ }
 190+
 191+ // if inside an Atom content construct (e.g. content or summary) field treat tags as text
 192+ elseif ( $this->feed_type == 'Atom' && $this->incontent ) {
 193+ // if tags are inlined, then flatten
 194+ $attrs_str = join(
 195+ ' ',
 196+ array_map(
 197+ array( 'MagpieRSS', 'mapAttributes' ),
 198+ array_keys( $attrs ),
 199+ array_values( $attrs )
 200+ )
 201+ );
 202+
 203+ $this->append_content( "<$element $attrs_str>" );
 204+
 205+ array_unshift( $this->stack, $el );
 206+ }
 207+
 208+ // Atom support many links per containging element.
 209+ // Magpie treats link elements of type rel='alternate'
 210+ // as being equivalent to RSS's simple link element.
 211+ elseif ( $this->feed_type == 'Atom' && $el == 'link' ) {
 212+ if ( isset( $attrs['rel'] ) && $attrs['rel'] == 'alternate' ) {
 213+ $link_el = 'link';
 214+ } else {
 215+ $link_el = 'link_' . $attrs['rel'];
 216+ }
 217+
 218+ $this->append( $link_el, $attrs['href'] );
 219+ } else { // set stack[0] to current element
 220+ array_unshift( $this->stack, $el );
 221+ }
 222+ }
 223+
 224+ function feed_cdata( $p, $text ) {
 225+ if ( $this->feed_type == 'Atom' && $this->incontent ) {
 226+ $this->append_content( $text );
 227+ } else {
 228+ $current_el = join( '_', array_reverse( $this->stack ) );
 229+ $this->append( $current_el, $text );
 230+ }
 231+ }
 232+
 233+ function feed_end_element( $p, $el ) {
 234+ $el = strtolower( $el );
 235+
 236+ if ( $el == 'item' || $el == 'entry' ) {
 237+ $this->items[] = $this->current_item;
 238+ $this->current_item = array();
 239+ $this->initem = false;
 240+ } elseif ( $this->feed_type == 'RSS' && $this->current_namespace == '' && $el == 'textinput' ) {
 241+ $this->intextinput = false;
 242+ } elseif ( $this->feed_type == 'RSS' && $this->current_namespace == '' && $el == 'image' ) {
 243+ $this->inimage = false;
 244+ } elseif ( $this->feed_type == 'Atom' && in_array( $el, $this->_CONTENT_CONSTRUCTS ) ) {
 245+ $this->incontent = false;
 246+ } elseif ( $el == 'channel' || $el == 'feed' ) {
 247+ $this->inchannel = false;
 248+ } elseif ( $this->feed_type == 'Atom' && $this->incontent ) {
 249+ // balance tags properly
 250+ // note: I don't think this is actually neccessary
 251+ if ( $this->stack[0] == $el ) {
 252+ $this->append_content( "</$el>" );
 253+ } else {
 254+ $this->append_content( "<$el />" );
 255+ }
 256+
 257+ array_shift( $this->stack );
 258+ } else {
 259+ array_shift( $this->stack );
 260+ }
 261+
 262+ $this->current_namespace = false;
 263+ }
 264+
 265+ function concat( &$str1, $str2 = '' ) {
 266+ if ( !isset( $str1 ) ) {
 267+ $str1 = '';
 268+ }
 269+ $str1 .= $str2;
 270+ }
 271+
 272+ function append_content( $text ) {
 273+ if ( $this->initem ) {
 274+ $this->concat( $this->current_item[$this->incontent], $text );
 275+ } elseif ( $this->inchannel ) {
 276+ $this->concat( $this->channel[$this->incontent], $text );
 277+ }
 278+ }
 279+
 280+ // smart append - field and namespace aware
 281+ function append( $el, $text ) {
 282+ if ( !$el ) {
 283+ return;
 284+ }
 285+ if ( $this->current_namespace ) {
 286+ if ( $this->initem ) {
 287+ $this->concat(
 288+ $this->current_item[$this->current_namespace][$el], $text
 289+ );
 290+ } elseif ( $this->inchannel ) {
 291+ $this->concat(
 292+ $this->channel[$this->current_namespace][$el], $text
 293+ );
 294+ } elseif ( $this->intextinput ) {
 295+ $this->concat(
 296+ $this->textinput[$this->current_namespace][$el], $text
 297+ );
 298+ } elseif ( $this->inimage ) {
 299+ $this->concat(
 300+ $this->image[$this->current_namespace][$el], $text
 301+ );
 302+ }
 303+ } else {
 304+ if ( $this->initem ) {
 305+ $this->concat(
 306+ $this->current_item[$el], $text
 307+ );
 308+ } elseif ( $this->intextinput ) {
 309+ $this->concat(
 310+ $this->textinput[$el], $text
 311+ );
 312+ } elseif ( $this->inimage ) {
 313+ $this->concat(
 314+ $this->image[$el], $text
 315+ );
 316+ } elseif ( $this->inchannel ) {
 317+ $this->concat(
 318+ $this->channel[$el], $text
 319+ );
 320+ }
 321+ }
 322+ }
 323+
 324+ function normalize() {
 325+ // if atom populate rss fields
 326+ if ( $this->is_atom() ) {
 327+ $this->channel['description'] = $this->channel['tagline'];
 328+ for ( $i = 0; $i < count( $this->items ); $i++ ) {
 329+ $item = $this->items[$i];
 330+ if ( isset( $item['summary'] ) ) {
 331+ $item['description'] = $item['summary'];
 332+ }
 333+ if ( isset( $item['atom_content'] ) ) {
 334+ $item['content']['encoded'] = $item['atom_content'];
 335+ }
 336+
 337+ $atom_date = ( isset( $item['issued'] ) ) ? $item['issued'] : $item['modified'];
 338+ if ( $atom_date ) {
 339+ $epoch = @$this->parse_w3cdtf( $atom_date );
 340+ if ( $epoch && $epoch > 0 ) {
 341+ $item['date_timestamp'] = $epoch;
 342+ }
 343+ }
 344+
 345+ $this->items[$i] = $item;
 346+ }
 347+ } elseif ( $this->is_rss() ) {
 348+ $this->channel['tagline'] = $this->channel['description'];
 349+ for ( $i = 0; $i < count( $this->items ); $i++ ) {
 350+ $item = $this->items[$i];
 351+ if ( isset( $item['description'] ) ) {
 352+ $item['summary'] = $item['description'];
 353+ }
 354+ if ( isset( $item['content']['encoded'] ) ) {
 355+ $item['atom_content'] = $item['content']['encoded'];
 356+ }
 357+
 358+ if ( $this->is_rss() == '1.0' && isset( $item['dc']['date'] ) ) {
 359+ $epoch = @$this->parse_w3cdtf( $item['dc']['date'] );
 360+ if ( $epoch && $epoch > 0 ) {
 361+ $item['date_timestamp'] = $epoch;
 362+ }
 363+ } elseif ( isset( $item['pubdate'] ) ) {
 364+ $epoch = @strtotime( $item['pubdate'] );
 365+ if ( $epoch > 0 ) {
 366+ $item['date_timestamp'] = $epoch;
 367+ }
 368+ }
 369+
 370+ $this->items[$i] = $item;
 371+ }
 372+ }
 373+ }
 374+
 375+ function is_rss() {
 376+ if ( $this->feed_type == 'RSS' ) {
 377+ return $this->feed_version;
 378+ } else {
 379+ return false;
 380+ }
 381+ }
 382+
 383+ function is_atom() {
 384+ if ( $this->feed_type == 'Atom' ) {
 385+ return $this->feed_version;
 386+ } else {
 387+ return false;
 388+ }
 389+ }
 390+
 391+ /**
 392+ * Instantiate an XML parser.
 393+ * @return XML parser, and possibly re-encoded source
 394+ */
 395+ function create_parser( $source, $out_enc, $in_enc, $detect ) {
 396+ // by default PHP5 does a fine job of detecting input encodings
 397+ if( !$detect && $in_enc ) {
 398+ $parser = xml_parser_create( $in_enc );
 399+ } else {
 400+ $parser = xml_parser_create( '' );
 401+ }
 402+ if ( $out_enc ) {
 403+ $this->encoding = $out_enc;
 404+ xml_parser_set_option(
 405+ $parser,
 406+ XML_OPTION_TARGET_ENCODING,
 407+ $out_enc
 408+ );
 409+ }
 410+
 411+ return array( $parser, $source );
 412+ }
 413+
 414+ /**
 415+ * Checks if $enc is an encoding type supported by MagpieRSS.
 416+ * @param $enc String: encoding name
 417+ * @return String or false
 418+ */
 419+ function known_encoding( $enc ) {
 420+ $enc = strtoupper( $enc );
 421+ if ( in_array( $enc, $this->_KNOWN_ENCODINGS ) ) {
 422+ return $enc;
 423+ } else {
 424+ return false;
 425+ }
 426+ }
 427+
 428+ function error( $errormsg, $lvl = E_USER_WARNING ) {
 429+ // append PHP's error message if track_errors is enabled
 430+ if ( isset( $php_errormsg ) ) {
 431+ $errormsg .= " ($php_errormsg)";
 432+ }
 433+
 434+ $notices = E_USER_NOTICE|E_NOTICE;
 435+ if ( $lvl&$notices ) {
 436+ $this->WARNING = $errormsg;
 437+ } else {
 438+ $this->ERROR = $errormsg;
 439+ }
 440+ }
 441+
 442+ /**
 443+ * Parse a W3CDTF date into unix epoch.
 444+ * This used to be in its own file.
 445+ * @note http://www.w3.org/TR/NOTE-datetime
 446+ * @param $date_str String: date string to parse
 447+ * @return Integer
 448+ */
 449+ public static function parse_w3cdtf( $date_str ) {
 450+ // regex to match wc3dtf
 451+ $pat = "/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/";
 452+
 453+ if ( preg_match( $pat, $date_str, $match ) ) {
 454+ list( $year, $month, $day, $hours, $minutes, $seconds ) =
 455+ array( $match[1], $match[2], $match[3], $match[4], $match[5], $match[6] );
 456+
 457+ // calculate epoch for current date assuming GMT
 458+ $epoch = gmmktime( $hours, $minutes, $seconds, $month, $day, $year );
 459+
 460+ $offset = 0;
 461+ if ( $match[10] == 'Z' ) {
 462+ // zulu time, aka GMT
 463+ } else {
 464+ list( $tz_mod, $tz_hour, $tz_min ) =
 465+ array( $match[8], $match[9], $match[10] );
 466+
 467+ // zero out the variables
 468+ if ( !$tz_hour ) {
 469+ $tz_hour = 0;
 470+ }
 471+ if ( !$tz_min ) {
 472+ $tz_min = 0;
 473+ }
 474+
 475+ $offset_secs = ( ( $tz_hour * 60 ) + $tz_min ) * 60;
 476+
 477+ // is timezone ahead of GMT? then subtract offset
 478+ if ( $tz_mod == '+' ) {
 479+ $offset_secs = $offset_secs * -1;
 480+ }
 481+
 482+ $offset = $offset_secs;
 483+ }
 484+ $epoch = $epoch + $offset;
 485+ return $epoch;
 486+ } else {
 487+ return -1;
 488+ }
 489+ }
 490+
 491+ public static function mapAttributes( $k, $v ) {
 492+ return "$k=\"$v\"";
 493+ }
 494+
 495+} // end class MagpieRSS
\ No newline at end of file
Property changes on: trunk/extensions/RSS/RSSParse.php
___________________________________________________________________
Added: svn:eol-style
1496 + native
Index: trunk/extensions/RSS/RSS.i18n.php
@@ -1,6 +1,6 @@
22 <?php
33 /**
4 - * Internationalisation file for extension RSS.
 4+ * Internationalization file for RSS extension.
55 *
66 * @file
77 * @ingroup Extensions
@@ -8,7 +8,35 @@
99
1010 $messages = array();
1111
 12+/** English
 13+ * @author Łukasz Garczewski (TOR) <tor@wikia-inc.com>
 14+ */
1215 $messages['en'] = array(
1316 'rss-desc' => 'Displays an RSS feed on a wiki page',
 17+ 'rss-error' => 'Failed to load RSS feed from $1: $2',
 18+ 'rss-empty' => 'Failed to load RSS feed from $1!',
1419 );
1520
 21+/** Finnish (Suomi)
 22+ * @author Jack Phoenix <jack@countervandalism.net>
 23+ */
 24+$messages['fi'] = array(
 25+ 'rss-error' => 'RSS-syötteen lataaminen osoitteesta $1 epäonnistui: $2',
 26+ 'rss-empty' => 'RSS-syötteen lataaminen osoitteesta $1 epäonnistui!',
 27+);
 28+
 29+/** Dutch (Nederlands)
 30+ * @author Mitchel Corstjens
 31+ */
 32+$messages['nl'] = array(
 33+ 'rss-error' => 'Kon RSS feed van $1 niet laden, fout: $2',
 34+ 'rss-empty' => 'Kon RSS feed van $1 niet laden!',
 35+);
 36+
 37+/** Polish (Polski)
 38+ * @author Łukasz Garczewski (TOR) <tor@wikia-inc.com>
 39+ */
 40+$messages['pl'] = array(
 41+ 'rss-error' => 'Nie udało się odczytać kanału $1: $2',
 42+ 'rss-empty' => 'Nie udało się odczytać kanału $1!',
 43+);
\ No newline at end of file
Index: trunk/extensions/RSS/RSS.php
@@ -1,61 +1,57 @@
22 <?php
3 -
43 /**
5 - * RSS-Feed MediaWiki extension.
6 - * @link http://www.mediawiki.org/wiki/Extension:RSS Documentation
 4+ * RSS-Feed MediaWiki extension
75 *
8 - * @file RSS.php
 6+ * @file
97 * @ingroup Extensions
10 - *
11 - * TODO: replace all @ by wfSurpressWarnings and wfResumeWarnings
 8+ * @version 1.7
 9+ * @author mutante, Daniel Kinzler, Rdb, Mafs, Alxndr, Chris Reigrut, K001
 10+ * @author Kellan Elliott-McCrea <kellan@protest.net> -- author of MagpieRSS
 11+ * @author Jeroen De Dauw
 12+ * @author Jack Phoenix <jack@countervandalism.net>
 13+ * @copyright © Kellan Elliott-McCrea <kellan@protest.net>
 14+ * @copyright © mutante, Daniel Kinzler, Rdb, Mafs, Alxndr, Chris Reigrut, K001
 15+ * @link http://www.mediawiki.org/wiki/Extension:RSS Documentation
1216 */
1317
14 -if ( !defined( 'MEDIAWIKI' ) ) {
 18+if( !defined( 'MEDIAWIKI' ) ) {
1519 die( "This is not a valid entry point.\n" );
1620 }
1721
18 -define( 'RSS_VERSION', '1.7' );
19 -
 22+// Extension credits that will show up on Special:Version
2023 $wgExtensionCredits['parserhook'][] = array(
21 - 'path' => __FILE__,
2224 'name' => 'RSS feed',
2325 'author' => array(
 26+ 'Kellan Elliott-McCrea',
2427 'mutante',
25 - 'Duesentrieb',
 28+ 'Daniel Kinzler',
2629 'Rdb',
2730 'Mafs',
2831 'Alxndr',
2932 'Wikinaut',
30 - 'Cmreigrut',
 33+ 'Chris Reigrut',
3134 'K001',
32 - '[http://www.mediawiki.org/wiki/User:Jeroen_De_Dauw Jeroen De Dauw]'
 35+ 'Jack Phoenix',
 36+ 'Jeroen De Dauw'
3337 ),
34 - 'version' => RSS_VERSION,
 38+ 'version' => '1.7',
3539 'url' => 'http://www.mediawiki.org/wiki/Extension:RSS',
 40+ 'description' => 'Displays an RSS feed on a wiki page',
3641 'descriptionmsg' => 'rss-desc',
3742 );
3843
39 -$dir = dirname( __FILE__ );
40 -$wgExtensionMessagesFiles['RSS'] = "$dir/RSS.i18n.php";
 44+// Internationalization file and autoloadable classes
 45+$dir = dirname( __FILE__ ) . '/';
 46+$wgExtensionMessagesFiles['RSS'] = $dir . 'RSS.i18n.php';
 47+$wgAutoloadClasses['MagpieRSS'] = $dir . 'RSSParse.php';
 48+$wgAutoloadClasses['RSSCache'] = $dir . 'RSSCache.php';
4149
42 -define( 'MAGPIE_OUTPUT_ENCODING', 'UTF-8' );
 50+$wgHooks['ParserFirstCallInit'][] = 'wfRssExtension';
4351
44 -# change this according to your magpie installation!
45 -require_once( dirname( __FILE__ ) . '/magpierss/rss_fetch.inc' );
46 -
47 -// Avoid unstubbing $wgParser too early on modern (1.12+) MW versions, as per r35980
48 -if ( defined( 'MW_SUPPORTS_PARSERFIRSTCALLINIT' ) ) {
49 - $wgHooks['ParserFirstCallInit'][] = 'wfRssExtension';
50 -} else {
51 - $wgExtensionFunctions[] = 'wfRssExtension';
52 -}
53 -
5452 # Extension hook callback function
55 -function wfRssExtension() {
56 - global $wgParser;
57 -
 53+function wfRssExtension( &$parser ) {
5854 # Install parser hook for <rss> tags
59 - $wgParser->setHook( 'rss', 'renderRss' );
 55+ $parser->setHook( 'rss', 'renderRss' );
6056 return true;
6157 }
6258
@@ -66,90 +62,137 @@
6763 // Kill parser cache
6864 $wgParser->disableCache();
6965
70 - if ( !$input ) return ''; # if <rss>-section is empty, return nothing
 66+ if ( !$input ) {
 67+ return ''; # if <rss>-section is empty, return nothing
 68+ }
7169
72 - # Parse fields in rss-section
 70+ # Parse fields in rss section
7371 $fields = explode( '|', $input );
74 - $url = @$fields[0];
 72+ wfSuppressWarnings();
 73+ $url = $fields[0];
 74+ wfRestoreWarnings();
7575
7676 $args = array();
7777 for ( $i = 1; $i < sizeof( $fields ); $i++ ) {
7878 $f = $fields[$i];
7979
80 - if ( strpos( $f, '=' ) === false ) $args[strtolower( trim( $f ) )] = false;
81 - else {
 80+ if ( strpos( $f, '=' ) === false ) {
 81+ $args[strtolower( trim( $f ) )] = false;
 82+ } else {
8283 list( $k, $v ) = explode( '=', $f, 2 );
83 - if ( trim( $v ) == false ) $args[strtolower( trim( $k ) )] = false;
84 - else $args[strtolower( trim( $k ) )] = trim( $v );
 84+ if ( trim( $v ) == false ) {
 85+ $args[strtolower( trim( $k ) )] = false;
 86+ } else {
 87+ $args[strtolower( trim( $k ) )] = trim( $v );
 88+ }
8589 }
8690 }
8791
88 - # Get charset from argument-array
89 - $charset = @$args['charset'];
90 - if ( !$charset ) $charset = $wgOutputEncoding;
 92+ # Get charset from argument array
 93+ wfSuppressWarnings();
 94+ $charset = $args['charset'];
 95+ wfRestoreWarnings();
 96+ if( !$charset ) {
 97+ $charset = $wgOutputEncoding;
 98+ }
 99+
91100 # Get max number of headlines from argument-array
92 - $maxheads = @$args['max'];
 101+ wfSuppressWarnings();
 102+ $maxheads = $args['max'];
 103+ wfRestoreWarnings();
93104 $headcnt = 0;
94105
95 - # Get short-flag from argument-array
 106+ # Get short flag from argument array
96107 # If short is set, no description text is printed
97 - if ( isset( $args['short'] ) ) $short = true; else $short = false;
98 - # Get reverse-flag from argument-array
99 - if ( isset( $args['reverse'] ) ) $reverse = true; else $reverse = false;
 108+ if( isset( $args['short'] ) ) {
 109+ $short = true;
 110+ } else {
 111+ $short = false;
 112+ }
100113
101 - # Get date format from argument-array
102 - if ( isset( $args["date"] ) ) {
103 - $date = @$args["date"];
104 - if ( $date == '' )
 114+ # Get reverse flag from argument array
 115+ if( isset( $args['reverse'] ) ) {
 116+ $reverse = true;
 117+ } else {
 118+ $reverse = false;
 119+ }
 120+
 121+ # Get date format from argument array
 122+ if ( isset( $args['date'] ) ) {
 123+ wfSuppressWarnings();
 124+ $date = $args['date'];
 125+ wfRestoreWarnings();
 126+ if ( $date == '' ) {
105127 $date = 'd M Y H:i';
 128+ }
 129+ } else {
 130+ $date = false;
106131 }
107 - else
108 - $date = false;
109132
110133 # Get highlight terms from argument array
111 - $rssHighlight = @$args['highlight'];
 134+ wfSuppressWarnings();
 135+ $rssHighlight = $args['highlight'];
 136+ wfRestoreWarnings();
112137 $rssHighlight = str_replace( ' ', ' ', $rssHighlight );
113138 $rssHighlight = explode( ' ', trim( $rssHighlight ) );
114139
115 - # Get filter terms from argument-array
116 - $rssFilter = @$args['filter'];
 140+ # Get filter terms from argument array
 141+ wfSuppressWarnings();
 142+ $rssFilter = $args['filter'];
 143+ wfRestoreWarnings();
117144 $rssFilter = str_replace( ' ', ' ', $rssFilter );
118145 $rssFilter = explode( ' ', trim( $rssFilter ) );
119146
120147 # Filterout terms
121 - $rssFilterout = @$args['filterout'];
 148+ wfSuppressWarnings();
 149+ $rssFilterout = $args['filterout'];
 150+ wfRestoreWarnings();
122151 $rssFilterout = str_replace( ' ', ' ', $rssFilterout );
123152 $rssFilterout = explode( ' ', trim( $rssFilterout ) );
124153
125154 # Fetch RSS. May be cached locally.
126 - # Refer to the documentation of magpie for details.
127 - $rss = @fetch_rss( $url );
 155+ # Refer to the documentation of MagpieRSS for details.
 156+ if ( !function_exists( 'fetch_rss' ) ) {
 157+ include( dirname( __FILE__ ) . '/RSSFetch.php' ); // provides fetch_rss() function
 158+ }
 159+ wfSuppressWarnings();
 160+ $rss = fetch_rss( $url );
 161+ wfRestoreWarnings();
128162
129163 # Check for errors.
 164+ if ( empty( $rss ) ) {
 165+ wfLoadExtensionMessages( 'RSS' );
 166+ return wfMsg( 'rss-empty', $url );
 167+ }
 168+
130169 if ( $rss->ERROR ) {
131 - return "<div>Failed to load RSS feed from $url: " . $rss->ERROR . "</div>"; # localize…
 170+ wfLoadExtensionMessages( 'RSS' );
 171+ return '<div>' . wfMsg( 'rss-error', $url, $rss->ERROR ) . '</div>';
132172 }
133173
134174 if ( !is_array( $rss->items ) ) {
135 - return "<div>Failed to load RSS feed from $url!</div>"; # localize…
 175+ wfLoadExtensionMessages( 'RSS' );
 176+ return '<div>' . wfMsg( 'rss-empty', $url ) . '</div>';
136177 }
137178
138179 # Build title line
139 - # $title = iconv($charset, $wgOutputEncoding, $rss->channel['title']);
140 - # if( $rss->channel['link'] ) $title = "<a href='".$rss->channel['link']."'>$title</a>";
 180+ #$title = iconv( $charset, $wgOutputEncoding, $rss->channel['title'] );
 181+ #if( $rss->channel['link'] ) $title = "<a href='" . $rss->channel['link'] . "'>$title</a>";
141182
142183 $output = '';
143 - if ( $reverse ) $rss->items = array_reverse( $rss->items );
 184+ if( $reverse ) {
 185+ $rss->items = array_reverse( $rss->items );
 186+ }
144187 $description = false;
145188 foreach ( $rss->items as $item ) {
146 - if ( $item['description'] ) {
 189+ if ( isset( $item['description'] ) && $item['description'] ) {
147190 $description = true;
148191 break;
149192 }
150193 }
151194
152195 # Build items
153 - if ( !$short and $description ) { # full item list
 196+ if ( !$short && $description ) { # full item list
154197 $output .= '<dl>';
155198
156199 foreach ( $rss->items as $item ) {
@@ -171,7 +214,7 @@
172215 # Build description text if desired
173216 if ( $item['description'] ) {
174217 $text = trim( iconv( $charset, $wgOutputEncoding, $item['description'] ) );
175 - # Avoid pre-tags
 218+ # Avoid <pre> tags
176219 $text = str_replace( "\r", ' ', $text );
177220 $text = str_replace( "\n", ' ', $text );
178221 $text = str_replace( "\t", ' ', $text );
@@ -180,23 +223,29 @@
181224 $d_text = wfRssFilter( $text, $rssFilter );
182225 $d_text = wfRssFilterout( $text, $rssFilterout );
183226 $text = wfRssHighlight( $text, $rssHighlight );
184 - $display = $d_text or $d_title;
 227+ $display = $d_text || $d_title;
185228 } else {
186229 $text = '';
187230 $display = $d_title;
188231 }
189232 if ( $display ) {
190 - $output .= "<dt><a href='$href'><b>$title</b></a></dt>";
191 - if ( $date ) $output .= " ($pubdate)";
192 - if ( $text ) $output .= "<dd>$text <b>[<a href='$href'>?</a>]</b></dd>";
 233+ $output.= "<dt><a href='$href'><b>$title</b></a></dt>";
 234+ if ( $date ) {
 235+ $output .= " ($pubdate)";
 236+ }
 237+ if ( $text ) {
 238+ $output .= "<dd>$text <b>[<a href='$href'>?</a>]</b></dd>";
 239+ }
193240 }
194241 # Cut off output when maxheads is reached:
195 - if ( ++$headcnt == $maxheads ) break;
 242+ if ( ++$headcnt == $maxheads ) {
 243+ break;
 244+ }
196245 }
197246
198247 $output .= '</dl>';
199248 } else { # short item list
200 - # # HACKY HACKY HACKY
 249+ ## HACKY HACKY HACKY
201250 $output .= '<ul>';
202251 $displayed = array();
203252 foreach ( $rss->items as $item ) {
@@ -205,26 +254,29 @@
206255 $d_title = wfRssFilter( $title, $rssFilter ) && wfRssFilterout( $title, $rssFilterout );
207256 $title = wfRssHighlight( $title, $rssHighlight );
208257 if ( $date ) {
209 - $pubdate = trim( iconv( $charset, $wgOutputEncoding, $item['pubdate'] ) );
 258+ $pubdate = isset( $item['pubdate'] ) ? trim( iconv( $charset, $wgOutputEncoding, $item['pubdate'] ) ) : '';
210259 if ( $pubdate == '' ) {
211260 $pubdate = trim( iconv( $charset, $wgOutputEncoding, $item['dc']['date'] ) );
212 - }
 261+ }
213262 $pubdate = date( $date, strtotime( $pubdate ) );
214263 }
 264+
215265 if ( $d_title && !in_array( $title, $displayed ) ) {
216266 // Add date to ouput if specified
217267 $output .= '<li><a href="' . $href . '" title="' . $title . '">' . $title . '</a>';
218 - if ( $date ) {
 268+ if( $date ) {
219269 $output .= " ($pubdate)";
220270 }
221271 $output .= '</li>';
222272
223273 $displayed[] = $title;
224274 # Cut off output when maxheads is reached:
225 - if ( ++$headcnt == $maxheads ) break;
 275+ if ( ++$headcnt == $maxheads ) {
 276+ break;
 277+ }
226278 }
227279 }
228 - $output .= '</ul>';
 280+ $output.= '</ul>';
229281 }
230282
231283 return $output;
@@ -233,7 +285,7 @@
234286 function wfRssFilter( $text, $rssFilter ) {
235287 $display = true;
236288 if ( is_array( $rssFilter ) ) {
237 - foreach ( $rssFilter as $term ) {
 289+ foreach( $rssFilter as $term ) {
238290 if ( $term ) {
239291 $display = false;
240292 if ( preg_match( "|$term|i", $text, $a ) ) {
@@ -241,7 +293,9 @@
242294 return $display;
243295 }
244296 }
245 - if ( $display ) break;
 297+ if ( $display ) {
 298+ break;
 299+ }
246300 }
247301 }
248302 return $display;
@@ -275,11 +329,13 @@
276330 $count_color = count( $color );
277331
278332 if ( is_array( $rssHighlight ) ) {
279 - foreach ( $rssHighlight as $term ) {
 333+ foreach( $rssHighlight as $term ) {
280334 if ( $term ) {
281335 $text = preg_replace( "|\b(\w*?" . $term . "\w*?)\b|i", "$starttag" . "_" . $i . "\\1$endtag", $text );
282336 $i++;
283 - if ( $i == $count_color ) $i = 0;
 337+ if ( $i == $count_color ) {
 338+ $i = 0;
 339+ }
284340 }
285341 }
286342 }
@@ -291,5 +347,4 @@
292348 }
293349
294350 return $text;
295 -}
296 -# PHP closing tag intentionally left blank
\ No newline at end of file
 351+}
\ No newline at end of file

Comments

#Comment by 😂 (talk | contribs)   00:30, 22 January 2011

Full review was on r75046, which resolved some issues since this commit.

Status & tagging log