Index: trunk/extensions/RSS/RSSCache.php |
— | — | @@ -0,0 +1,151 @@ |
| 2 | +<?php |
| 3 | +/** |
| 4 | + * A simple, rolling (no GC) cache for RSS objects, keyed on URL. |
| 5 | + * |
| 6 | + * @file |
| 7 | + */ |
| 8 | + |
| 9 | +class RSSCache { |
| 10 | + public $BASE_CACHE = './cache'; // where the cache files are stored |
| 11 | + public $MAX_AGE = 3600; // when are files stale, default one hour |
| 12 | + public $ERROR = ''; // accumulate error messages |
| 13 | + |
| 14 | + function __construct( $base = '', $age = '' ) { |
| 15 | + if ( $base ) { |
| 16 | + $this->BASE_CACHE = $base; |
| 17 | + } |
| 18 | + if ( $age ) { |
| 19 | + $this->MAX_AGE = $age; |
| 20 | + } |
| 21 | + |
| 22 | + // attempt to make the cache directory |
| 23 | + if ( !file_exists( $this->BASE_CACHE ) ) { |
| 24 | + $status = @mkdir( $this->BASE_CACHE, 0755 ); |
| 25 | + |
| 26 | + // if make failed |
| 27 | + if ( !$status ) { |
| 28 | + wfDebugLog( |
| 29 | + 'RSS', |
| 30 | + "Cache couldn't make dir '" . $this->BASE_CACHE . "'." |
| 31 | + ); |
| 32 | + } |
| 33 | + } |
| 34 | + } |
| 35 | + |
| 36 | + /** |
| 37 | + * Add an item to the cache, keyed on URL. |
| 38 | + * @param $url String: URL from which the RSS file was fetched |
| 39 | + * @param $rss Mixed: data to serialize |
| 40 | + */ |
| 41 | + function set( $url, $rss ) { |
| 42 | + $this->ERROR = ''; |
| 43 | + $cache_file = $this->file_name( $url ); |
| 44 | + $fp = @fopen( $cache_file, 'w' ); |
| 45 | + |
| 46 | + if ( !$fp ) { |
| 47 | + wfDebugLog( |
| 48 | + 'RSS', |
| 49 | + "Cache unable to open file for writing: $cache_file" |
| 50 | + ); |
| 51 | + return 0; |
| 52 | + } |
| 53 | + |
| 54 | + $data = serialize( $rss ); |
| 55 | + fwrite( $fp, $data ); |
| 56 | + fclose( $fp ); |
| 57 | + |
| 58 | + return $cache_file; |
| 59 | + } |
| 60 | + |
| 61 | + /** |
| 62 | + * Fetch an item from the cache. |
| 63 | + * @param $url String: URL from which the RSS file was fetched |
| 64 | + * @return Object or false: cached object on HIT, false on MISS |
| 65 | + */ |
| 66 | + function get( $url ) { |
| 67 | + $this->ERROR = ''; |
| 68 | + $cache_file = $this->file_name( $url ); |
| 69 | + |
| 70 | + if ( !file_exists( $cache_file ) ) { |
| 71 | + wfDebugLog( |
| 72 | + 'RSS', |
| 73 | + "Cache doesn't contain: $url (cache file: $cache_file)" |
| 74 | + ); |
| 75 | + return 0; |
| 76 | + } |
| 77 | + |
| 78 | + $fp = @fopen( $cache_file, 'r' ); |
| 79 | + if ( !$fp ) { |
| 80 | + wfDebugLog( |
| 81 | + 'RSS', |
| 82 | + "Failed to open cache file for reading: $cache_file" |
| 83 | + ); |
| 84 | + return 0; |
| 85 | + } |
| 86 | + |
| 87 | + $filesize = filesize( $cache_file ); |
| 88 | + if ( $filesize ) { |
| 89 | + $data = fread( $fp, filesize( $cache_file ) ); |
| 90 | + $rss = unserialize( $data ); |
| 91 | + |
| 92 | + return $rss; |
| 93 | + } |
| 94 | + |
| 95 | + return 0; |
| 96 | + } |
| 97 | + |
| 98 | + /** |
| 99 | + * Check a URL for membership in the cache and whether the object is older |
| 100 | + * then MAX_AGE (ie. STALE) |
| 101 | + * |
| 102 | + * @param $url String: URL from which the RSS file was fetched |
| 103 | + * @return String: indicates whether there was a cache hit or not |
| 104 | + */ |
| 105 | + function check_cache( $url ) { |
| 106 | + $this->ERROR = ''; |
| 107 | + $filename = $this->file_name( $url ); |
| 108 | + |
| 109 | + if ( file_exists( $filename ) ) { |
| 110 | + // find how long ago the file was added to the cache |
| 111 | + // and whether that is longer then MAX_AGE |
| 112 | + $mtime = filemtime( $filename ); |
| 113 | + $age = time() - $mtime; |
| 114 | + if ( $this->MAX_AGE > $age ) { |
| 115 | + // object exists and is current |
| 116 | + return 'HIT'; |
| 117 | + } else { |
| 118 | + // object exists but is old |
| 119 | + return 'STALE'; |
| 120 | + } |
| 121 | + } else { |
| 122 | + // object does not exist |
| 123 | + return 'MISS'; |
| 124 | + } |
| 125 | + } |
| 126 | + |
| 127 | + /** |
| 128 | + * @param $cache_key String: cache key, consisting of the URL + output enc. |
| 129 | + * @return Integer |
| 130 | + */ |
| 131 | + function cache_age( $cache_key ) { |
| 132 | + $filename = $this->file_name( $url ); |
| 133 | + if ( file_exists( $filename ) ) { |
| 134 | + $mtime = filemtime( $filename ); |
| 135 | + $age = time() - $mtime; |
| 136 | + return $age; |
| 137 | + } else { |
| 138 | + return -1; |
| 139 | + } |
| 140 | + } |
| 141 | + |
| 142 | + /** |
| 143 | + * Map URL to location in cache. |
| 144 | + * @param $url String: URL from which the RSS file was fetched |
| 145 | + * @return String: file name |
| 146 | + */ |
| 147 | + function file_name( $url ) { |
| 148 | + $filename = md5( $url ); |
| 149 | + return join( DIRECTORY_SEPARATOR, array( $this->BASE_CACHE, $filename ) ); |
| 150 | + } |
| 151 | + |
| 152 | +} |
\ No newline at end of file |
Property changes on: trunk/extensions/RSS/RSSCache.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 153 | + native |
Index: trunk/extensions/RSS/Snoopy.class.php |
— | — | @@ -0,0 +1,1241 @@ |
| 2 | +<?php |
| 3 | + |
| 4 | +/************************************************* |
| 5 | + |
| 6 | +Snoopy - the PHP net client |
| 7 | +Author: Monte Ohrt <monte@ispi.net> |
| 8 | +Copyright (c): 1999-2008 New Digital Group, all rights reserved |
| 9 | +Version: 1.2.5-dev (revision 1.27) |
| 10 | +Note: some coding style changes by Jack Phoenix <jack@countervandalism.net> |
| 11 | + var -> public, added some braces, double quotes -> single quotes, etc. |
| 12 | + also added the gzip support stuff from MagpieRSS' Snoopy to this ver |
| 13 | + |
| 14 | + * This library is free software; you can redistribute it and/or |
| 15 | + * modify it under the terms of the GNU Lesser General Public |
| 16 | + * License as published by the Free Software Foundation; either |
| 17 | + * version 2.1 of the License, or (at your option) any later version. |
| 18 | + * |
| 19 | + * This library is distributed in the hope that it will be useful, |
| 20 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 21 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 22 | + * Lesser General Public License for more details. |
| 23 | + * |
| 24 | + * You should have received a copy of the GNU Lesser General Public |
| 25 | + * License along with this library; if not, write to the Free Software |
| 26 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| 27 | + |
| 28 | +You may contact the author of Snoopy by e-mail at: |
| 29 | +monte@ohrt.com |
| 30 | + |
| 31 | +The latest version of Snoopy can be obtained from: |
| 32 | +http://snoopy.sourceforge.net/ |
| 33 | + |
| 34 | +*************************************************/ |
| 35 | + |
| 36 | +class Snoopy { |
| 37 | + /**** Public variables ****/ |
| 38 | + |
| 39 | + /* user definable vars */ |
| 40 | + public $host = 'www.php.net'; // host name we are connecting to |
| 41 | + public $port = 80; // port we are connecting to |
| 42 | + public $proxy_host = ''; // proxy host to use |
| 43 | + public $proxy_port = ''; // proxy port to use |
| 44 | + public $proxy_user = ''; // proxy user to use |
| 45 | + public $proxy_pass = ''; // proxy password to use |
| 46 | + |
| 47 | + public $agent = 'Snoopy v1.2.5-dev'; // agent we masquerade as |
| 48 | + public $referer = ''; // referer info to pass |
| 49 | + public $cookies = array(); // array of cookies to pass |
| 50 | + // $cookies['username'] = 'joe'; |
| 51 | + public $rawheaders = array(); // array of raw headers to send |
| 52 | + // $rawheaders['Content-type'] = 'text/html'; |
| 53 | + |
| 54 | + public $maxredirs = 5; // http redirection depth maximum. 0 = disallow |
| 55 | + public $lastredirectaddr = ''; // contains address of last redirected address |
| 56 | + public $offsiteok = true; // allows redirection off-site |
| 57 | + public $maxframes = 0; // frame content depth maximum. 0 = disallow |
| 58 | + public $expandlinks = true; // expand links to fully qualified URLs. |
| 59 | + // this only applies to fetchlinks() |
| 60 | + // submitlinks(), and submittext() |
| 61 | + public $passcookies = true; // pass set cookies back through redirects |
| 62 | + // NOTE: this currently does not respect |
| 63 | + // dates, domains or paths. |
| 64 | + |
| 65 | + public $user = ''; // user for http authentication |
| 66 | + public $pass = ''; // password for http authentication |
| 67 | + |
| 68 | + // http accept types |
| 69 | + public $accept = 'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*'; |
| 70 | + |
| 71 | + public $results = ''; // where the content is put |
| 72 | + |
| 73 | + public $error = ''; // error messages sent here |
| 74 | + public $response_code = ''; // response code returned from server |
| 75 | + public $headers = array(); // headers returned from server sent here |
| 76 | + public $maxlength = 500000; // max return data length (body) |
| 77 | + public $read_timeout = 0; // timeout on read operations, in seconds |
| 78 | + // supported only since PHP 4 Beta 4 |
| 79 | + // set to 0 to disallow timeouts |
| 80 | + public $timed_out = false; // if a read operation timed out |
| 81 | + public $status = 0; // http request status |
| 82 | + |
| 83 | + public $temp_dir = '/tmp'; // temporary directory that the webserver |
| 84 | + // has permission to write to. |
| 85 | + // under Windows, this should be C:\temp |
| 86 | + |
| 87 | + public $curl_path = '/usr/local/bin/curl'; |
| 88 | + // Snoopy will use cURL for fetching |
| 89 | + // SSL content if a full system path to |
| 90 | + // the cURL binary is supplied here. |
| 91 | + // set to false if you do not have |
| 92 | + // cURL installed. See http://curl.haxx.se |
| 93 | + // for details on installing cURL. |
| 94 | + // Snoopy does *not* use the cURL |
| 95 | + // library functions built into php, |
| 96 | + // as these functions are not stable |
| 97 | + // as of this Snoopy release. |
| 98 | + |
| 99 | + // send Accept-encoding: gzip? |
| 100 | + public $use_gzip = true; |
| 101 | + |
| 102 | + /**** Private variables ****/ |
| 103 | + var $_maxlinelen = 4096; // max line length (headers) |
| 104 | + |
| 105 | + var $_httpmethod = 'GET'; // default http request method |
| 106 | + var $_httpversion = 'HTTP/1.0'; // default http request version |
| 107 | + var $_submit_method = 'POST'; // default submit method |
| 108 | + var $_submit_type = 'application/x-www-form-urlencoded'; // default submit type |
| 109 | + var $_mime_boundary = ''; // MIME boundary for multipart/form-data submit type |
| 110 | + var $_redirectaddr = false; // will be set if page fetched is a redirect |
| 111 | + var $_redirectdepth = 0; // increments on an http redirect |
| 112 | + var $_frameurls = array(); // frame src urls |
| 113 | + var $_framedepth = 0; // increments on frame depth |
| 114 | + |
| 115 | + var $_isproxy = false; // set if using a proxy server |
| 116 | + var $_fp_timeout = 30; // timeout for socket connection |
| 117 | + |
| 118 | + /*======================================================================*\ |
| 119 | + Function: fetch |
| 120 | + Purpose: fetch the contents of a web page |
| 121 | + (and possibly other protocols in the |
| 122 | + future like ftp, nntp, gopher, etc.) |
| 123 | + Input: $URI the location of the page to fetch |
| 124 | + Output: $this->results the output text from the fetch |
| 125 | + \*======================================================================*/ |
| 126 | + function fetch( $URI ) { |
| 127 | + //preg_match( "|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|", $URI, $URI_PARTS ); |
| 128 | + $URI_PARTS = parse_url( $URI ); |
| 129 | + if ( !empty( $URI_PARTS['user'] ) ) { |
| 130 | + $this->user = $URI_PARTS['user']; |
| 131 | + } |
| 132 | + if ( !empty( $URI_PARTS['pass'] ) ) { |
| 133 | + $this->pass = $URI_PARTS['pass']; |
| 134 | + } |
| 135 | + if ( empty( $URI_PARTS['query'] ) ) { |
| 136 | + $URI_PARTS['query'] = ''; |
| 137 | + } |
| 138 | + if ( empty( $URI_PARTS['path'] ) ) { |
| 139 | + $URI_PARTS['path'] = ''; |
| 140 | + } |
| 141 | + |
| 142 | + switch( strtolower( $URI_PARTS['scheme'] ) ) { |
| 143 | + case 'http': |
| 144 | + $this->host = $URI_PARTS['host']; |
| 145 | + if( !empty( $URI_PARTS['port'] ) ) { |
| 146 | + $this->port = $URI_PARTS['port']; |
| 147 | + } |
| 148 | + if( $this->_connect( $fp ) ) { |
| 149 | + if( $this->_isproxy ) { |
| 150 | + // using proxy, send entire URI |
| 151 | + $this->_httprequest( $URI, $fp, $URI, $this->_httpmethod ); |
| 152 | + } else { |
| 153 | + $path = $URI_PARTS['path'] . ( isset( $URI_PARTS['query'] ) ? '?' . $URI_PARTS['query'] : '' ); |
| 154 | + // no proxy, send only the path |
| 155 | + $this->_httprequest( $path, $fp, $URI, $this->_httpmethod ); |
| 156 | + } |
| 157 | + |
| 158 | + $this->_disconnect( $fp ); |
| 159 | + |
| 160 | + if( $this->_redirectaddr ) { |
| 161 | + /* url was redirected, check if we've hit the max depth */ |
| 162 | + if( $this->maxredirs > $this->_redirectdepth ) { |
| 163 | + // only follow redirect if it's on this site, or offsiteok is true |
| 164 | + if( preg_match( "|^http://" . preg_quote( $this->host ) . "|i", $this->_redirectaddr ) || $this->offsiteok ) |
| 165 | + { |
| 166 | + /* follow the redirect */ |
| 167 | + $this->_redirectdepth++; |
| 168 | + $this->lastredirectaddr = $this->_redirectaddr; |
| 169 | + $this->fetch( $this->_redirectaddr ); |
| 170 | + } |
| 171 | + } |
| 172 | + } |
| 173 | + |
| 174 | + if( $this->_framedepth < $this->maxframes && count( $this->_frameurls ) > 0 ) |
| 175 | + { |
| 176 | + $frameurls = $this->_frameurls; |
| 177 | + $this->_frameurls = array(); |
| 178 | + |
| 179 | + while( list( , $frameurl ) = each( $frameurls ) ) { |
| 180 | + if( $this->_framedepth < $this->maxframes ) { |
| 181 | + $this->fetch( $frameurl ); |
| 182 | + $this->_framedepth++; |
| 183 | + } else { |
| 184 | + break; |
| 185 | + } |
| 186 | + } |
| 187 | + } |
| 188 | + } else { |
| 189 | + return false; |
| 190 | + } |
| 191 | + return true; |
| 192 | + break; |
| 193 | + case 'https': |
| 194 | + if( !$this->curl_path ) { |
| 195 | + return false; |
| 196 | + } |
| 197 | + if( function_exists( 'is_executable' ) ) { |
| 198 | + if ( !is_executable( $this->curl_path ) ) { |
| 199 | + $this->error = "Bad curl ($this->curl_path), can't fetch HTTPS \n"; |
| 200 | + return false; |
| 201 | + } |
| 202 | + } |
| 203 | + $this->host = $URI_PARTS['host']; |
| 204 | + if( !empty( $URI_PARTS['port'] ) ) { |
| 205 | + $this->port = $URI_PARTS['port']; |
| 206 | + } |
| 207 | + if( $this->_isproxy ) { |
| 208 | + // using proxy, send entire URI |
| 209 | + $this->_httpsrequest( $URI, $URI, $this->_httpmethod ); |
| 210 | + } else { |
| 211 | + $path = $URI_PARTS['path'] . ( $URI_PARTS['query'] ? '?' . $URI_PARTS['query'] : '' ); |
| 212 | + // no proxy, send only the path |
| 213 | + $this->_httpsrequest( $path, $URI, $this->_httpmethod ); |
| 214 | + } |
| 215 | + |
| 216 | + if( $this->_redirectaddr ) { |
| 217 | + /* url was redirected, check if we've hit the max depth */ |
| 218 | + if( $this->maxredirs > $this->_redirectdepth ) { |
| 219 | + // only follow redirect if it's on this site, or offsiteok is true |
| 220 | + if( preg_match( "|^http://" . preg_quote( $this->host ) . "|i", $this->_redirectaddr ) || $this->offsiteok ) |
| 221 | + { |
| 222 | + /* follow the redirect */ |
| 223 | + $this->_redirectdepth++; |
| 224 | + $this->lastredirectaddr = $this->_redirectaddr; |
| 225 | + $this->fetch( $this->_redirectaddr ); |
| 226 | + } |
| 227 | + } |
| 228 | + } |
| 229 | + |
| 230 | + if( $this->_framedepth < $this->maxframes && count( $this->_frameurls ) > 0 ) |
| 231 | + { |
| 232 | + $frameurls = $this->_frameurls; |
| 233 | + $this->_frameurls = array(); |
| 234 | + |
| 235 | + while( list( , $frameurl ) = each( $frameurls ) ) { |
| 236 | + if( $this->_framedepth < $this->maxframes ) { |
| 237 | + $this->fetch( $frameurl ); |
| 238 | + $this->_framedepth++; |
| 239 | + } else { |
| 240 | + break; |
| 241 | + } |
| 242 | + } |
| 243 | + } |
| 244 | + return true; |
| 245 | + break; |
| 246 | + default: |
| 247 | + // not a valid protocol |
| 248 | + $this->error = 'Invalid protocol "' . $URI_PARTS['scheme'] . '"\n'; |
| 249 | + return false; |
| 250 | + break; |
| 251 | + } |
| 252 | + return true; |
| 253 | + } |
| 254 | + |
| 255 | + /*======================================================================*\ |
| 256 | + Function: submit |
| 257 | + Purpose: submit an HTTP form |
| 258 | + Input: $URI the location to post the data |
| 259 | + $formvars the formvars to use. |
| 260 | + format: $formvars['var'] = 'val'; |
| 261 | + $formfiles an array of files to submit |
| 262 | + format: $formfiles['var'] = '/dir/filename.ext'; |
| 263 | + Output: $this->results the text output from the post |
| 264 | + \*======================================================================*/ |
| 265 | + function submit( $URI, $formvars = '', $formfiles = '' ) { |
| 266 | + unset( $postdata ); |
| 267 | + |
| 268 | + $postdata = $this->_prepare_post_body( $formvars, $formfiles ); |
| 269 | + |
| 270 | + $URI_PARTS = parse_url( $URI ); |
| 271 | + if ( !empty( $URI_PARTS['user'] ) ) { |
| 272 | + $this->user = $URI_PARTS['user']; |
| 273 | + } |
| 274 | + if ( !empty( $URI_PARTS['pass'] ) ) { |
| 275 | + $this->pass = $URI_PARTS['pass']; |
| 276 | + } |
| 277 | + if ( empty( $URI_PARTS['query'] ) ) { |
| 278 | + $URI_PARTS['query'] = ''; |
| 279 | + } |
| 280 | + if ( empty( $URI_PARTS['path'] ) ) { |
| 281 | + $URI_PARTS['path'] = ''; |
| 282 | + } |
| 283 | + |
| 284 | + switch( strtolower( $URI_PARTS['scheme'] ) ) { |
| 285 | + case 'http': |
| 286 | + $this->host = $URI_PARTS['host']; |
| 287 | + if( !empty( $URI_PARTS['port'] ) ) { |
| 288 | + $this->port = $URI_PARTS['port']; |
| 289 | + } |
| 290 | + if( $this->_connect( $fp ) ) { |
| 291 | + if( $this->_isproxy ) { |
| 292 | + // using proxy, send entire URI |
| 293 | + $this->_httprequest( $URI, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata ); |
| 294 | + } else { |
| 295 | + $path = $URI_PARTS['path'] . ( $URI_PARTS['query'] ? '?' . $URI_PARTS['query'] : '' ); |
| 296 | + // no proxy, send only the path |
| 297 | + $this->_httprequest( |
| 298 | + $path, $fp, $URI, |
| 299 | + $this->_submit_method, |
| 300 | + $this->_submit_type, |
| 301 | + $postdata |
| 302 | + ); |
| 303 | + } |
| 304 | + |
| 305 | + $this->_disconnect( $fp ); |
| 306 | + |
| 307 | + if( $this->_redirectaddr ) { |
| 308 | + /* url was redirected, check if we've hit the max depth */ |
| 309 | + if( $this->maxredirs > $this->_redirectdepth ) { |
| 310 | + if( !preg_match( "|^" . $URI_PARTS['scheme'] . "://|", $this->_redirectaddr ) ) { |
| 311 | + $this->_redirectaddr = $this->_expandlinks( $this->_redirectaddr, $URI_PARTS['scheme'] . '://' . $URI_PARTS['host'] ); |
| 312 | + } |
| 313 | + |
| 314 | + // only follow redirect if it's on this site, or offsiteok is true |
| 315 | + if( preg_match( "|^http://" . preg_quote( $this->host ) . "|i", $this->_redirectaddr ) || $this->offsiteok ) |
| 316 | + { |
| 317 | + /* follow the redirect */ |
| 318 | + $this->_redirectdepth++; |
| 319 | + $this->lastredirectaddr = $this->_redirectaddr; |
| 320 | + if( strpos( $this->_redirectaddr, '?' ) > 0 ) { |
| 321 | + $this->fetch( $this->_redirectaddr ); // the redirect has changed the request method from post to get |
| 322 | + } else { |
| 323 | + $this->submit( $this->_redirectaddr, $formvars, $formfiles ); |
| 324 | + } |
| 325 | + } |
| 326 | + } |
| 327 | + } |
| 328 | + |
| 329 | + if( $this->_framedepth < $this->maxframes && count( $this->_frameurls ) > 0 ) |
| 330 | + { |
| 331 | + $frameurls = $this->_frameurls; |
| 332 | + $this->_frameurls = array(); |
| 333 | + |
| 334 | + while( list( , $frameurl ) = each( $frameurls ) ) { |
| 335 | + if( $this->_framedepth < $this->maxframes ) { |
| 336 | + $this->fetch( $frameurl ); |
| 337 | + $this->_framedepth++; |
| 338 | + } else { |
| 339 | + break; |
| 340 | + } |
| 341 | + } |
| 342 | + } |
| 343 | + } else { |
| 344 | + return false; |
| 345 | + } |
| 346 | + return true; |
| 347 | + break; |
| 348 | + case 'https': |
| 349 | + if( !$this->curl_path ) { |
| 350 | + return false; |
| 351 | + } |
| 352 | + if( function_exists( 'is_executable' ) ) { |
| 353 | + if ( !is_executable( $this->curl_path ) ) { |
| 354 | + return false; |
| 355 | + } |
| 356 | + } |
| 357 | + $this->host = $URI_PARTS['host']; |
| 358 | + if( !empty( $URI_PARTS['port'] ) ) { |
| 359 | + $this->port = $URI_PARTS['port']; |
| 360 | + } |
| 361 | + if( $this->_isproxy ) { |
| 362 | + // using proxy, send entire URI |
| 363 | + $this->_httpsrequest( |
| 364 | + $URI, |
| 365 | + $URI, |
| 366 | + $this->_submit_method, |
| 367 | + $this->_submit_type, |
| 368 | + $postdata |
| 369 | + ); |
| 370 | + } else { |
| 371 | + $path = $URI_PARTS['path'] . ( $URI_PARTS['query'] ? '?' . $URI_PARTS['query'] : '' ); |
| 372 | + // no proxy, send only the path |
| 373 | + $this->_httpsrequest( |
| 374 | + $path, |
| 375 | + $URI, |
| 376 | + $this->_submit_method, |
| 377 | + $this->_submit_type, |
| 378 | + $postdata |
| 379 | + ); |
| 380 | + } |
| 381 | + |
| 382 | + if( $this->_redirectaddr ) { |
| 383 | + /* url was redirected, check if we've hit the max depth */ |
| 384 | + if( $this->maxredirs > $this->_redirectdepth ) { |
| 385 | + if( !preg_match( "|^" . $URI_PARTS['scheme'] . "://|", $this->_redirectaddr ) ) { |
| 386 | + $this->_redirectaddr = $this->_expandlinks( |
| 387 | + $this->_redirectaddr, |
| 388 | + $URI_PARTS['scheme'] . '://' . $URI_PARTS['host'] |
| 389 | + ); |
| 390 | + } |
| 391 | + |
| 392 | + // only follow redirect if it's on this site, or offsiteok is true |
| 393 | + if( preg_match( "|^http://" . preg_quote( $this->host ) . "|i", $this->_redirectaddr ) || $this->offsiteok ) |
| 394 | + { |
| 395 | + /* follow the redirect */ |
| 396 | + $this->_redirectdepth++; |
| 397 | + $this->lastredirectaddr = $this->_redirectaddr; |
| 398 | + if( strpos( $this->_redirectaddr, '?' ) > 0 ) { |
| 399 | + $this->fetch( $this->_redirectaddr ); // the redirect has changed the request method from post to get |
| 400 | + } else { |
| 401 | + $this->submit( $this->_redirectaddr, $formvars, $formfiles ); |
| 402 | + } |
| 403 | + } |
| 404 | + } |
| 405 | + } |
| 406 | + |
| 407 | + if( $this->_framedepth < $this->maxframes && count( $this->_frameurls ) > 0 ) |
| 408 | + { |
| 409 | + $frameurls = $this->_frameurls; |
| 410 | + $this->_frameurls = array(); |
| 411 | + |
| 412 | + while( list( , $frameurl ) = each( $frameurls ) ) { |
| 413 | + if( $this->_framedepth < $this->maxframes ) { |
| 414 | + $this->fetch( $frameurl ); |
| 415 | + $this->_framedepth++; |
| 416 | + } else { |
| 417 | + break; |
| 418 | + } |
| 419 | + } |
| 420 | + } |
| 421 | + return true; |
| 422 | + break; |
| 423 | + |
| 424 | + default: |
| 425 | + // not a valid protocol |
| 426 | + $this->error = 'Invalid protocol "' . $URI_PARTS['scheme'] . '"\n'; |
| 427 | + return false; |
| 428 | + break; |
| 429 | + } |
| 430 | + return true; |
| 431 | + } |
| 432 | + |
| 433 | + /*======================================================================*\ |
| 434 | + Function: fetchlinks |
| 435 | + Purpose: fetch the links from a web page |
| 436 | + Input: $URI where you are fetching from |
| 437 | + Output: $this->results an array of the URLs |
| 438 | + \*======================================================================*/ |
| 439 | + function fetchlinks( $URI ) { |
| 440 | + if ( $this->fetch( $URI ) ) { |
| 441 | + if( $this->lastredirectaddr ) { |
| 442 | + $URI = $this->lastredirectaddr; |
| 443 | + } |
| 444 | + if( is_array( $this->results ) ) { |
| 445 | + for( $x = 0; $x < count( $this->results ); $x++ ) { |
| 446 | + $this->results[$x] = $this->_striplinks( $this->results[$x] ); |
| 447 | + } |
| 448 | + } else { |
| 449 | + $this->results = $this->_striplinks( $this->results ); |
| 450 | + } |
| 451 | + |
| 452 | + if( $this->expandlinks ) { |
| 453 | + $this->results = $this->_expandlinks( $this->results, $URI ); |
| 454 | + } |
| 455 | + return true; |
| 456 | + } else { |
| 457 | + return false; |
| 458 | + } |
| 459 | + } |
| 460 | + |
| 461 | + /*======================================================================*\ |
| 462 | + Function: fetchform |
| 463 | + Purpose: fetch the form elements from a web page |
| 464 | + Input: $URI where you are fetching from |
| 465 | + Output: $this->results the resulting html form |
| 466 | + \*======================================================================*/ |
| 467 | + function fetchform( $URI ) { |
| 468 | + if ( $this->fetch( $URI ) ) { |
| 469 | + if( is_array( $this->results ) ) { |
| 470 | + for( $x = 0; $x < count( $this->results ); $x++ ) { |
| 471 | + $this->results[$x] = $this->_stripform( $this->results[$x] ); |
| 472 | + } |
| 473 | + } else { |
| 474 | + $this->results = $this->_stripform( $this->results ); |
| 475 | + } |
| 476 | + |
| 477 | + return true; |
| 478 | + } else { |
| 479 | + return false; |
| 480 | + } |
| 481 | + } |
| 482 | + |
| 483 | + |
| 484 | + /*======================================================================*\ |
| 485 | + Function: fetchtext |
| 486 | + Purpose: fetch the text from a web page, stripping the links |
| 487 | + Input: $URI where you are fetching from |
| 488 | + Output: $this->results the text from the web page |
| 489 | + \*======================================================================*/ |
| 490 | + function fetchtext( $URI ) { |
| 491 | + if( $this->fetch( $URI ) ) { |
| 492 | + if( is_array( $this->results ) ) { |
| 493 | + for( $x = 0; $x < count( $this->results ); $x++ ) { |
| 494 | + $this->results[$x] = $this->_striptext( $this->results[$x] ); |
| 495 | + } |
| 496 | + } else { |
| 497 | + $this->results = $this->_striptext( $this->results ); |
| 498 | + } |
| 499 | + return true; |
| 500 | + } else { |
| 501 | + return false; |
| 502 | + } |
| 503 | + } |
| 504 | + |
| 505 | + /*======================================================================*\ |
| 506 | + Function: submitlinks |
| 507 | + Purpose: grab links from a form submission |
| 508 | + Input: $URI where you are submitting from |
| 509 | + Output: $this->results an array of the links from the post |
| 510 | + \*======================================================================*/ |
| 511 | + function submitlinks( $URI, $formvars = '', $formfiles = '' ) { |
| 512 | + if( $this->submit( $URI, $formvars, $formfiles ) ) { |
| 513 | + if( $this->lastredirectaddr ) { |
| 514 | + $URI = $this->lastredirectaddr; |
| 515 | + } |
| 516 | + if( is_array( $this->results ) ) { |
| 517 | + for( $x = 0; $x < count( $this->results ); $x++ ) { |
| 518 | + $this->results[$x] = $this->_striplinks( $this->results[$x] ); |
| 519 | + if( $this->expandlinks ) { |
| 520 | + $this->results[$x] = $this->_expandlinks( $this->results[$x], $URI ); |
| 521 | + } |
| 522 | + } |
| 523 | + } else { |
| 524 | + $this->results = $this->_striplinks( $this->results ); |
| 525 | + if( $this->expandlinks ) { |
| 526 | + $this->results = $this->_expandlinks( $this->results, $URI ); |
| 527 | + } |
| 528 | + } |
| 529 | + return true; |
| 530 | + } else { |
| 531 | + return false; |
| 532 | + } |
| 533 | + } |
| 534 | + |
| 535 | + /*======================================================================*\ |
| 536 | + Function: submittext |
| 537 | + Purpose: grab text from a form submission |
| 538 | + Input: $URI where you are submitting from |
| 539 | + Output: $this->results the text from the web page |
| 540 | + \*======================================================================*/ |
| 541 | + function submittext( $URI, $formvars = '', $formfiles = '' ) { |
| 542 | + if( $this->submit( $URI, $formvars, $formfiles ) ) { |
| 543 | + if( $this->lastredirectaddr ) { |
| 544 | + $URI = $this->lastredirectaddr; |
| 545 | + } |
| 546 | + if( is_array( $this->results ) ) { |
| 547 | + for( $x = 0; $x < count( $this->results ); $x++ ) { |
| 548 | + $this->results[$x] = $this->_striptext( $this->results[$x] ); |
| 549 | + if( $this->expandlinks ) { |
| 550 | + $this->results[$x] = $this->_expandlinks( $this->results[$x], $URI ); |
| 551 | + } |
| 552 | + } |
| 553 | + } else { |
| 554 | + $this->results = $this->_striptext( $this->results ); |
| 555 | + if( $this->expandlinks ) { |
| 556 | + $this->results = $this->_expandlinks( $this->results, $URI ); |
| 557 | + } |
| 558 | + } |
| 559 | + return true; |
| 560 | + } else { |
| 561 | + return false; |
| 562 | + } |
| 563 | + } |
| 564 | + |
| 565 | + /*======================================================================*\ |
| 566 | + Function: set_submit_multipart |
| 567 | + Purpose: Set the form submission content type to |
| 568 | + multipart/form-data |
| 569 | + \*======================================================================*/ |
| 570 | + function set_submit_multipart() { |
| 571 | + $this->_submit_type = 'multipart/form-data'; |
| 572 | + } |
| 573 | + |
| 574 | + /*======================================================================*\ |
| 575 | + Function: set_submit_normal |
| 576 | + Purpose: Set the form submission content type to |
| 577 | + application/x-www-form-urlencoded |
| 578 | + \*======================================================================*/ |
| 579 | + function set_submit_normal() { |
| 580 | + $this->_submit_type = 'application/x-www-form-urlencoded'; |
| 581 | + } |
| 582 | + |
| 583 | + /*======================================================================*\ |
| 584 | + Private functions |
| 585 | + \*======================================================================*/ |
| 586 | + |
| 587 | + /*======================================================================*\ |
| 588 | + Function: _striplinks |
| 589 | + Purpose: strip the hyperlinks from an html document |
| 590 | + Input: $document document to strip. |
| 591 | + Output: $match an array of the links |
| 592 | + \*======================================================================*/ |
| 593 | + function _striplinks( $document ) { |
| 594 | + preg_match_all("'<\s*a\s.*?href\s*=\s* # find <a href= |
| 595 | + ([\"\'])? # find single or double quote |
| 596 | + (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching |
| 597 | + # quote, otherwise match up to next space |
| 598 | + 'isx", $document, $links |
| 599 | + ); |
| 600 | + |
| 601 | + // catenate the non-empty matches from the conditional subpattern |
| 602 | + while( list( $key, $val ) = each( $links[2] ) ) { |
| 603 | + if( !empty( $val ) ) { |
| 604 | + $match[] = $val; |
| 605 | + } |
| 606 | + } |
| 607 | + |
| 608 | + while( list( $key, $val ) = each( $links[3] ) ) { |
| 609 | + if( !empty( $val ) ) { |
| 610 | + $match[] = $val; |
| 611 | + } |
| 612 | + } |
| 613 | + |
| 614 | + // return the links |
| 615 | + return $match; |
| 616 | + } |
| 617 | + |
| 618 | + /*======================================================================*\ |
| 619 | + Function: _stripform |
| 620 | + Purpose: strip the form elements from an HTML document |
| 621 | + Input: $document document to strip. |
| 622 | + Output: $match an array of the links |
| 623 | + \*======================================================================*/ |
| 624 | + function _stripform( $document ) { |
| 625 | + preg_match_all( |
| 626 | + "'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi", |
| 627 | + $document, |
| 628 | + $elements |
| 629 | + ); |
| 630 | + |
| 631 | + // catenate the matches |
| 632 | + $match = implode( "\r\n", $elements[0] ); |
| 633 | + |
| 634 | + // return the links |
| 635 | + return $match; |
| 636 | + } |
| 637 | + |
| 638 | + /*======================================================================*\ |
| 639 | + Function: _striptext |
| 640 | + Purpose: strip the text from an html document |
| 641 | + Input: $document document to strip. |
| 642 | + Output: $text the resulting text |
| 643 | + \*======================================================================*/ |
| 644 | + function _striptext( $document ) { |
| 645 | + // I didn't use preg eval (//e) since that is only available in PHP 4.0. |
| 646 | + // so, list your entities one by one here. I included some of the |
| 647 | + // more common ones. |
| 648 | + $search = array( |
| 649 | + "'<script[^>]*?>.*?</script>'si", // strip out JavaScript |
| 650 | + "'<[\/\!]*?[^<>]*?>'si", // strip out HTML tags |
| 651 | + "'([\r\n])[\s]+'", // strip out white space |
| 652 | + "'&(quot|#34|#034|#x22);'i", // replace HTML entities |
| 653 | + "'&(amp|#38|#038|#x26);'i", // added hexadecimal values |
| 654 | + "'&(lt|#60|#060|#x3c);'i", |
| 655 | + "'&(gt|#62|#062|#x3e);'i", |
| 656 | + "'&(nbsp|#160|#xa0);'i", |
| 657 | + "'&(iexcl|#161);'i", |
| 658 | + "'&(cent|#162);'i", |
| 659 | + "'&(pound|#163);'i", |
| 660 | + "'&(copy|#169);'i", |
| 661 | + "'&(reg|#174);'i", |
| 662 | + "'&(deg|#176);'i", |
| 663 | + "'&(#39|#039|#x27);'", |
| 664 | + "'&(euro|#8364);'i", // Europe |
| 665 | + "'&a(uml|UML);'", // German |
| 666 | + "'&o(uml|UML);'", |
| 667 | + "'&u(uml|UML);'", |
| 668 | + "'&A(uml|UML);'", |
| 669 | + "'&O(uml|UML);'", |
| 670 | + "'&U(uml|UML);'", |
| 671 | + "'ß'i", |
| 672 | + ); |
| 673 | + $replace = array( |
| 674 | + '', |
| 675 | + '', |
| 676 | + "\\1", |
| 677 | + "\"", |
| 678 | + '&', |
| 679 | + '<', |
| 680 | + '>', |
| 681 | + ' ', |
| 682 | + chr( 161 ), |
| 683 | + chr( 162 ), |
| 684 | + chr( 163 ), |
| 685 | + chr( 169 ), |
| 686 | + chr( 174 ), |
| 687 | + chr( 176 ), |
| 688 | + chr( 39 ), |
| 689 | + chr( 128 ), |
| 690 | + 'ä', |
| 691 | + 'ö', |
| 692 | + 'ü', |
| 693 | + 'Ä', |
| 694 | + 'Ö', |
| 695 | + 'Ü', |
| 696 | + 'ß', |
| 697 | + ); |
| 698 | + |
| 699 | + $text = preg_replace( $search, $replace, $document ); |
| 700 | + |
| 701 | + return $text; |
| 702 | + } |
| 703 | + |
| 704 | + /*======================================================================*\ |
| 705 | + Function: _expandlinks |
| 706 | + Purpose: expand each link into a fully qualified URL |
| 707 | + Input: $links the links to qualify |
| 708 | + $URI the full URI to get the base from |
| 709 | + Output: $expandedLinks the expanded links |
| 710 | + \*======================================================================*/ |
| 711 | + function _expandlinks( $links, $URI ) { |
| 712 | + preg_match( "/^[^\?]+/", $URI, $match ); |
| 713 | + |
| 714 | + $match = preg_replace( "|/[^\/\.]+\.[^\/\.]+$|", '', $match[0] ); |
| 715 | + $match = preg_replace( "|/$|", '', $match ); |
| 716 | + $match_part = parse_url( $match ); |
| 717 | + $match_root = $match_part['scheme'] . '://' . $match_part['host']; |
| 718 | + |
| 719 | + $search = array( |
| 720 | + "|^http://" . preg_quote( $this->host ) . "|i", |
| 721 | + "|^(\/)|i", |
| 722 | + "|^(?!http://)(?!mailto:)|i", |
| 723 | + "|/\./|", |
| 724 | + "|/[^\/]+/\.\./|" |
| 725 | + ); |
| 726 | + |
| 727 | + $replace = array( |
| 728 | + '', |
| 729 | + $match_root . '/', |
| 730 | + $match . '/', |
| 731 | + '/', |
| 732 | + '/' |
| 733 | + ); |
| 734 | + |
| 735 | + $expandedLinks = preg_replace( $search, $replace, $links ); |
| 736 | + |
| 737 | + return $expandedLinks; |
| 738 | + } |
| 739 | + |
| 740 | + /*======================================================================*\ |
| 741 | + Function: _httprequest |
| 742 | + Purpose: go get the http data from the server |
| 743 | + Input: $url the url to fetch |
| 744 | + $fp the current open file pointer |
| 745 | + $URI the full URI |
| 746 | + $body body contents to send if any (POST) |
| 747 | + Output: |
| 748 | + \*======================================================================*/ |
| 749 | + function _httprequest( $url, $fp, $URI, $http_method, $content_type = '', $body = '' ) { |
| 750 | + $cookie_headers = ''; |
| 751 | + if( $this->passcookies && $this->_redirectaddr ) { |
| 752 | + $this->setcookies(); |
| 753 | + } |
| 754 | + |
| 755 | + $URI_PARTS = parse_url( $URI ); |
| 756 | + if( empty( $url ) ) { |
| 757 | + $url = '/'; |
| 758 | + } |
| 759 | + $headers = $http_method . ' ' . $url . ' ' . $this->_httpversion . "\r\n"; |
| 760 | + if( !empty( $this->agent ) ) { |
| 761 | + $headers .= 'User-Agent: ' . $this->agent . "\r\n"; |
| 762 | + } |
| 763 | + if( !empty( $this->host ) && !isset( $this->rawheaders['Host'] ) ) { |
| 764 | + $headers .= 'Host: ' . $this->host; |
| 765 | + if( !empty( $this->port ) ) { |
| 766 | + $headers .= ':' . $this->port; |
| 767 | + } |
| 768 | + $headers .= "\r\n"; |
| 769 | + } |
| 770 | + if( !empty( $this->accept ) ) { |
| 771 | + $headers .= 'Accept: ' . $this->accept . "\r\n"; |
| 772 | + } |
| 773 | + |
| 774 | + if( $this->use_gzip ) { |
| 775 | + // make sure PHP was built with --with-zlib |
| 776 | + // and we can handle gzipp'ed data |
| 777 | + if ( function_exists( 'gzinflate' ) ) { |
| 778 | + $headers .= "Accept-encoding: gzip\r\n"; |
| 779 | + } else { |
| 780 | + trigger_error( |
| 781 | + 'use_gzip is on, but PHP was built without zlib support.' . |
| 782 | + ' Requesting file(s) without gzip encoding.', |
| 783 | + E_USER_NOTICE |
| 784 | + ); |
| 785 | + } |
| 786 | + } |
| 787 | + |
| 788 | + if( !empty( $this->referer ) ) { |
| 789 | + $headers .= 'Referer: ' . $this->referer . "\r\n"; |
| 790 | + } |
| 791 | + if( !empty( $this->cookies ) ) { |
| 792 | + if( !is_array( $this->cookies ) ) { |
| 793 | + $this->cookies = (array)$this->cookies; |
| 794 | + } |
| 795 | + |
| 796 | + reset( $this->cookies ); |
| 797 | + if ( count( $this->cookies ) > 0 ) { |
| 798 | + $cookie_headers .= 'Cookie: '; |
| 799 | + foreach ( $this->cookies as $cookieKey => $cookieVal ) { |
| 800 | + $cookie_headers .= $cookieKey . '=' . urlencode( $cookieVal ) . '; '; |
| 801 | + } |
| 802 | + $headers .= substr( $cookie_headers, 0, -2 ) . "\r\n"; |
| 803 | + } |
| 804 | + } |
| 805 | + if( !empty( $this->rawheaders ) ) { |
| 806 | + if( !is_array( $this->rawheaders ) ) { |
| 807 | + $this->rawheaders = (array)$this->rawheaders; |
| 808 | + } |
| 809 | + while( list( $headerKey, $headerVal ) = each( $this->rawheaders ) ) { |
| 810 | + $headers .= $headerKey . ': ' . $headerVal . "\r\n"; |
| 811 | + } |
| 812 | + } |
| 813 | + if( !empty( $content_type ) ) { |
| 814 | + $headers .= "Content-type: $content_type"; |
| 815 | + if ( $content_type == 'multipart/form-data' ) { |
| 816 | + $headers .= '; boundary=' . $this->_mime_boundary; |
| 817 | + } |
| 818 | + $headers .= "\r\n"; |
| 819 | + } |
| 820 | + if( !empty( $body ) ) { |
| 821 | + $headers .= 'Content-length: ' . strlen( $body ) . "\r\n"; |
| 822 | + } |
| 823 | + if( !empty( $this->user ) || !empty( $this->pass ) ) { |
| 824 | + $headers .= 'Authorization: Basic ' . base64_encode( $this->user . ':' . $this->pass ) . "\r\n"; |
| 825 | + } |
| 826 | + |
| 827 | + // add proxy auth headers |
| 828 | + if( !empty( $this->proxy_user ) ) { |
| 829 | + $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode( $this->proxy_user . ':' . $this->proxy_pass ) . "\r\n"; |
| 830 | + } |
| 831 | + |
| 832 | + $headers .= "\r\n"; |
| 833 | + |
| 834 | + // set the read timeout if needed |
| 835 | + if ( $this->read_timeout > 0 ) { |
| 836 | + socket_set_timeout( $fp, $this->read_timeout ); |
| 837 | + } |
| 838 | + $this->timed_out = false; |
| 839 | + |
| 840 | + fwrite( $fp, $headers . $body, strlen( $headers . $body ) ); |
| 841 | + |
| 842 | + $this->_redirectaddr = false; |
| 843 | + unset( $this->headers ); |
| 844 | + |
| 845 | + // content was returned gzip encoded? |
| 846 | + $is_gzipped = false; |
| 847 | + |
| 848 | + while( $currentHeader = fgets( $fp, $this->_maxlinelen ) ) { |
| 849 | + if ( $this->read_timeout > 0 && $this->_check_timeout( $fp ) ) { |
| 850 | + $this->status = -100; |
| 851 | + return false; |
| 852 | + } |
| 853 | + |
| 854 | + //if( $currentHeader == "\r\n" ) { |
| 855 | + if( preg_match( "/^\r?\n$/", $currentHeader ) ) { |
| 856 | + break; |
| 857 | + } |
| 858 | + |
| 859 | + // if a header begins with Location: or URI:, set the redirect |
| 860 | + if( preg_match( "/^(Location:|URI:)/i", $currentHeader ) ) { |
| 861 | + // get URL portion of the redirect |
| 862 | + preg_match( "/^(Location:|URI:)[ ]+(.*)/i", chop( $currentHeader ), $matches ); |
| 863 | + // look for :// in the Location header to see if hostname is included |
| 864 | + if( !preg_match( "|\:\/\/|", $matches[2] ) ) { |
| 865 | + // no host in the path, so prepend |
| 866 | + $this->_redirectaddr = $URI_PARTS['scheme'] . '://' . $this->host . ':' . $this->port; |
| 867 | + // eliminate double slash |
| 868 | + if( !preg_match( "|^/|", $matches[2] ) ) { |
| 869 | + $this->_redirectaddr .= '/' . $matches[2]; |
| 870 | + } else { |
| 871 | + $this->_redirectaddr .= $matches[2]; |
| 872 | + } |
| 873 | + } else { |
| 874 | + $this->_redirectaddr = $matches[2]; |
| 875 | + } |
| 876 | + } |
| 877 | + |
| 878 | + if( preg_match( "|^HTTP/|", $currentHeader ) ) { |
| 879 | + if( preg_match( "|^HTTP/[^\s]*\s(.*?)\s|", $currentHeader, $status ) ) { |
| 880 | + $this->status = $status[1]; |
| 881 | + } |
| 882 | + $this->response_code = $currentHeader; |
| 883 | + } |
| 884 | + |
| 885 | + if ( preg_match( "/Content-Encoding: gzip/", $currentHeader ) ) { |
| 886 | + $is_gzipped = true; |
| 887 | + } |
| 888 | + |
| 889 | + $this->headers[] = $currentHeader; |
| 890 | + } |
| 891 | + |
| 892 | + $results = ''; |
| 893 | + do { |
| 894 | + $_data = fread( $fp, $this->maxlength ); |
| 895 | + if ( strlen( $_data ) == 0 || strlen( $results ) > $this->maxlength ) { |
| 896 | + break; |
| 897 | + } |
| 898 | + $results .= $_data; |
| 899 | + } while( true ); |
| 900 | + |
| 901 | + // gunzip |
| 902 | + if ( $is_gzipped ) { |
| 903 | + // per http://www.php.net/manual/en/function.gzencode.php |
| 904 | + $results = substr( $results, 10 ); |
| 905 | + $results = gzinflate( $results ); |
| 906 | + } |
| 907 | + |
| 908 | + if ( $this->read_timeout > 0 && $this->_check_timeout( $fp ) ) { |
| 909 | + $this->status = -100; |
| 910 | + return false; |
| 911 | + } |
| 912 | + |
| 913 | + // check if there is a a redirect meta tag |
| 914 | + if( preg_match( "'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i", $results, $match ) ) |
| 915 | + { |
| 916 | + $this->_redirectaddr = $this->_expandlinks( $match[1], $URI ); |
| 917 | + } |
| 918 | + |
| 919 | + // have we hit our frame depth and is there frame src to fetch? |
| 920 | + if( ( $this->_framedepth < $this->maxframes ) && preg_match_all( "'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i", $results, $match ) ) |
| 921 | + { |
| 922 | + $this->results[] = $results; |
| 923 | + for( $x = 0; $x < count( $match[1] ); $x++ ) { |
| 924 | + $this->_frameurls[] = $this->_expandlinks( $match[1][$x], $URI_PARTS['scheme'] . '://' . $this->host ); |
| 925 | + } |
| 926 | + } elseif( is_array( $this->results ) ) { // have we already fetched framed content? |
| 927 | + $this->results[] = $results; |
| 928 | + } else { // no framed content |
| 929 | + $this->results = $results; |
| 930 | + } |
| 931 | + |
| 932 | + return true; |
| 933 | + } |
| 934 | + |
| 935 | + /*======================================================================*\ |
| 936 | + Function: _httpsrequest |
| 937 | + Purpose: go get the https data from the server using curl |
| 938 | + Input: $url the url to fetch |
| 939 | + $URI the full URI |
| 940 | + $body body contents to send if any (POST) |
| 941 | + Output: |
| 942 | + \*======================================================================*/ |
| 943 | + function _httpsrequest( $url, $URI, $http_method, $content_type = '', $body = '' ) { |
| 944 | + if( $this->passcookies && $this->_redirectaddr ) { |
| 945 | + $this->setcookies(); |
| 946 | + } |
| 947 | + |
| 948 | + $headers = array(); |
| 949 | + |
| 950 | + $URI_PARTS = parse_url( $URI ); |
| 951 | + if( empty( $url ) ) { |
| 952 | + $url = '/'; |
| 953 | + } |
| 954 | + // GET ... header not needed for curl |
| 955 | + //$headers[] = $http_method." ".$url." ".$this->_httpversion; |
| 956 | + if( !empty( $this->agent ) ) { |
| 957 | + $headers[] = 'User-Agent: ' . $this->agent; |
| 958 | + } |
| 959 | + if( !empty( $this->host ) ) { |
| 960 | + if( !empty( $this->port ) ) { |
| 961 | + $headers[] = 'Host: ' . $this->host . ':' . $this->port; |
| 962 | + } else { |
| 963 | + $headers[] = 'Host: ' . $this->host; |
| 964 | + } |
| 965 | + } |
| 966 | + if( !empty( $this->accept ) ) { |
| 967 | + $headers[] = 'Accept: ' . $this->accept; |
| 968 | + } |
| 969 | + if( !empty( $this->referer ) ) { |
| 970 | + $headers[] = 'Referer: ' . $this->referer; |
| 971 | + } |
| 972 | + if( !empty( $this->cookies ) ) { |
| 973 | + if( !is_array( $this->cookies ) ) { |
| 974 | + $this->cookies = (array)$this->cookies; |
| 975 | + } |
| 976 | + |
| 977 | + reset( $this->cookies ); |
| 978 | + if ( count( $this->cookies ) > 0 ) { |
| 979 | + $cookie_str = 'Cookie: '; |
| 980 | + foreach ( $this->cookies as $cookieKey => $cookieVal ) { |
| 981 | + $cookie_str .= $cookieKey . '=' . urlencode( $cookieVal ) . '; '; |
| 982 | + } |
| 983 | + $headers[] = substr( $cookie_str, 0, -2 ); |
| 984 | + } |
| 985 | + } |
| 986 | + if( !empty( $this->rawheaders ) ) { |
| 987 | + if( !is_array( $this->rawheaders ) ) { |
| 988 | + $this->rawheaders = (array)$this->rawheaders; |
| 989 | + } |
| 990 | + while( list( $headerKey, $headerVal ) = each( $this->rawheaders ) ) { |
| 991 | + $headers[] = $headerKey . ': ' . $headerVal; |
| 992 | + } |
| 993 | + } |
| 994 | + if( !empty( $content_type ) ) { |
| 995 | + if ( $content_type == 'multipart/form-data' ) { |
| 996 | + $headers[] = "Content-type: $content_type; boundary=" . $this->_mime_boundary; |
| 997 | + } else { |
| 998 | + $headers[] = "Content-type: $content_type"; |
| 999 | + } |
| 1000 | + } |
| 1001 | + if( !empty( $body ) ) { |
| 1002 | + $headers[] = 'Content-length: ' . strlen( $body ); |
| 1003 | + } |
| 1004 | + if( !empty( $this->user ) || !empty( $this->pass ) ) { |
| 1005 | + $headers[] = 'Authorization: BASIC ' . base64_encode( $this->user . ':' . $this->pass ); |
| 1006 | + } |
| 1007 | + |
| 1008 | + for( $curr_header = 0; $curr_header < count( $headers ); $curr_header++ ) { |
| 1009 | + $cmdline_params .= " -H \"" . escapeshellcmd( $headers[$curr_header] ) . "\""; |
| 1010 | + } |
| 1011 | + |
| 1012 | + if( !empty( $body ) ) { |
| 1013 | + $cmdline_params .= " -d \"" . escapeshellcmd( $body ) . "\""; |
| 1014 | + } |
| 1015 | + |
| 1016 | + if( $this->read_timeout > 0 ) { |
| 1017 | + $cmdline_params .= ' -m ' . $this->read_timeout; |
| 1018 | + } |
| 1019 | + |
| 1020 | + $headerfile = tempnam( $temp_dir, 'sno' ); |
| 1021 | + |
| 1022 | + exec( |
| 1023 | + $this->curl_path . " -k -D \"$headerfile\"" . $cmdline_params . " \"" . escapeshellcmd( $URI ) . "\"", |
| 1024 | + $results, |
| 1025 | + $return |
| 1026 | + ); |
| 1027 | + |
| 1028 | + if( $return ) { |
| 1029 | + $this->error = "Error: cURL could not retrieve the document, error $return."; |
| 1030 | + return false; |
| 1031 | + } |
| 1032 | + |
| 1033 | + $results = implode( "\r\n", $results ); |
| 1034 | + |
| 1035 | + $result_headers = file( "$headerfile" ); |
| 1036 | + |
| 1037 | + $this->_redirectaddr = false; |
| 1038 | + unset( $this->headers ); |
| 1039 | + |
| 1040 | + for( $currentHeader = 0; $currentHeader < count( $result_headers ); $currentHeader++ ) { |
| 1041 | + // if a header begins with Location: or URI:, set the redirect |
| 1042 | + if( preg_match( "/^(Location: |URI: )/i", $result_headers[$currentHeader] ) ) { |
| 1043 | + // get URL portion of the redirect |
| 1044 | + preg_match( "/^(Location: |URI:)\s+(.*)/", chop( $result_headers[$currentHeader] ), $matches ); |
| 1045 | + // look for :// in the Location header to see if hostname is included |
| 1046 | + if( !preg_match( "|\:\/\/|", $matches[2] ) ) { |
| 1047 | + // no host in the path, so prepend |
| 1048 | + $this->_redirectaddr = $URI_PARTS['scheme'] . '://' . $this->host . ':' . $this->port; |
| 1049 | + // eliminate double slash |
| 1050 | + if( !preg_match( "|^/|", $matches[2] ) ) { |
| 1051 | + $this->_redirectaddr .= '/' . $matches[2]; |
| 1052 | + } else { |
| 1053 | + $this->_redirectaddr .= $matches[2]; |
| 1054 | + } |
| 1055 | + } else { |
| 1056 | + $this->_redirectaddr = $matches[2]; |
| 1057 | + } |
| 1058 | + } |
| 1059 | + |
| 1060 | + if( preg_match( "|^HTTP/|", $result_headers[$currentHeader] ) ) { |
| 1061 | + $this->response_code = $result_headers[$currentHeader]; |
| 1062 | + if( preg_match( "|^HTTP/[^\s]*\s(.*?)\s|", $this->response_code, $match ) ) { |
| 1063 | + $this->status = $match[1]; |
| 1064 | + } |
| 1065 | + } |
| 1066 | + |
| 1067 | + $this->headers[] = $result_headers[$currentHeader]; |
| 1068 | + } |
| 1069 | + |
| 1070 | + // check if there is a a redirect meta tag |
| 1071 | + if( preg_match( "'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i", $results, $match ) ) |
| 1072 | + { |
| 1073 | + $this->_redirectaddr = $this->_expandlinks( $match[1], $URI ); |
| 1074 | + } |
| 1075 | + |
| 1076 | + // have we hit our frame depth and is there frame src to fetch? |
| 1077 | + if( ( $this->_framedepth < $this->maxframes ) && preg_match_all( "'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i", $results, $match ) ) |
| 1078 | + { |
| 1079 | + $this->results[] = $results; |
| 1080 | + for( $x = 0; $x < count( $match[1] ); $x++ ) { |
| 1081 | + $this->_frameurls[] = $this->_expandlinks( $match[1][$x], $URI_PARTS['scheme'] . '://' . $this->host ); |
| 1082 | + } |
| 1083 | + } elseif( is_array( $this->results ) ) { // have we already fetched framed content? |
| 1084 | + $this->results[] = $results; |
| 1085 | + } else { // no framed content |
| 1086 | + $this->results = $results; |
| 1087 | + } |
| 1088 | + |
| 1089 | + unlink( "$headerfile" ); |
| 1090 | + |
| 1091 | + return true; |
| 1092 | + } |
| 1093 | + |
| 1094 | + /*======================================================================*\ |
| 1095 | + Function: setcookies() |
| 1096 | + Purpose: set cookies for a redirection |
| 1097 | + \*======================================================================*/ |
| 1098 | + function setcookies() { |
| 1099 | + for( $x = 0; $x < count( $this->headers ); $x++ ) { |
| 1100 | + if( preg_match( '/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x], $match ) ) { |
| 1101 | + $this->cookies[$match[1]] = urldecode( $match[2] ); |
| 1102 | + } |
| 1103 | + } |
| 1104 | + } |
| 1105 | + |
| 1106 | + /*======================================================================*\ |
| 1107 | + Function: _check_timeout |
| 1108 | + Purpose: checks whether timeout has occurred |
| 1109 | + Input: $fp file pointer |
| 1110 | + \*======================================================================*/ |
| 1111 | + function _check_timeout( $fp ) { |
| 1112 | + if ( $this->read_timeout > 0 ) { |
| 1113 | + $fp_status = socket_get_status( $fp ); |
| 1114 | + if ( $fp_status['timed_out'] ) { |
| 1115 | + $this->timed_out = true; |
| 1116 | + return true; |
| 1117 | + } |
| 1118 | + } |
| 1119 | + return false; |
| 1120 | + } |
| 1121 | + |
| 1122 | + /*======================================================================*\ |
| 1123 | + Function: _connect |
| 1124 | + Purpose: make a socket connection |
| 1125 | + Input: $fp file pointer |
| 1126 | + \*======================================================================*/ |
| 1127 | + function _connect( &$fp ) { |
| 1128 | + if( !empty( $this->proxy_host ) && !empty( $this->proxy_port ) ) { |
| 1129 | + $this->_isproxy = true; |
| 1130 | + $host = $this->proxy_host; |
| 1131 | + $port = $this->proxy_port; |
| 1132 | + } else { |
| 1133 | + $host = $this->host; |
| 1134 | + $port = $this->port; |
| 1135 | + } |
| 1136 | + |
| 1137 | + $this->status = 0; |
| 1138 | + |
| 1139 | + $fp = fsockopen( $host, $port, $errno, $errstr, $this->_fp_timeout ); |
| 1140 | + |
| 1141 | + if ( $fp ) { |
| 1142 | + // socket connection succeeded |
| 1143 | + return true; |
| 1144 | + } else { |
| 1145 | + // socket connection failed |
| 1146 | + $this->status = $errno; |
| 1147 | + switch( $errno ) { |
| 1148 | + case -3: |
| 1149 | + $this->error = 'socket creation failed (-3)'; |
| 1150 | + case -4: |
| 1151 | + $this->error = 'dns lookup failure (-4)'; |
| 1152 | + case -5: |
| 1153 | + $this->error = 'connection refused or timed out (-5)'; |
| 1154 | + default: |
| 1155 | + $this->error = 'connection failed (' . $errno . ')'; |
| 1156 | + } |
| 1157 | + return false; |
| 1158 | + } |
| 1159 | + } |
| 1160 | + |
| 1161 | + /*======================================================================*\ |
| 1162 | + Function: _disconnect |
| 1163 | + Purpose: disconnect a socket connection |
| 1164 | + Input: $fp file pointer |
| 1165 | + \*======================================================================*/ |
| 1166 | + function _disconnect( $fp ) { |
| 1167 | + return( fclose( $fp ) ); |
| 1168 | + } |
| 1169 | + |
| 1170 | + /*======================================================================*\ |
| 1171 | + Function: _prepare_post_body |
| 1172 | + Purpose: Prepare post body according to encoding type |
| 1173 | + Input: $formvars - form variables |
| 1174 | + $formfiles - form upload files |
| 1175 | + Output: post body |
| 1176 | + \*======================================================================*/ |
| 1177 | + function _prepare_post_body( $formvars, $formfiles ) { |
| 1178 | + settype( $formvars, 'array' ); |
| 1179 | + settype( $formfiles, 'array' ); |
| 1180 | + $postdata = ''; |
| 1181 | + |
| 1182 | + if ( count( $formvars ) == 0 && count( $formfiles ) == 0 ) { |
| 1183 | + return; |
| 1184 | + } |
| 1185 | + |
| 1186 | + switch ( $this->_submit_type ) { |
| 1187 | + case 'application/x-www-form-urlencoded': |
| 1188 | + reset( $formvars ); |
| 1189 | + while( list( $key, $val ) = each( $formvars ) ) { |
| 1190 | + if ( is_array( $val ) || is_object( $val ) ) { |
| 1191 | + while ( list( $cur_key, $cur_val ) = each( $val ) ) { |
| 1192 | + $postdata .= urlencode( $key ) . '[]=' . urlencode( $cur_val ) . '&'; |
| 1193 | + } |
| 1194 | + } else { |
| 1195 | + $postdata .= urlencode( $key ) . '=' . urlencode( $val ) . '&'; |
| 1196 | + } |
| 1197 | + } |
| 1198 | + break; |
| 1199 | + |
| 1200 | + case 'multipart/form-data': |
| 1201 | + $this->_mime_boundary = 'Snoopy' . md5( uniqid( microtime() ) ); |
| 1202 | + |
| 1203 | + reset( $formvars ); |
| 1204 | + while( list( $key, $val ) = each( $formvars ) ) { |
| 1205 | + if ( is_array( $val ) || is_object( $val ) ) { |
| 1206 | + while ( list( $cur_key, $cur_val ) = each( $val ) ) { |
| 1207 | + $postdata .= '--' . $this->_mime_boundary . "\r\n"; |
| 1208 | + $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n"; |
| 1209 | + $postdata .= "$cur_val\r\n"; |
| 1210 | + } |
| 1211 | + } else { |
| 1212 | + $postdata .= '--' . $this->_mime_boundary . "\r\n"; |
| 1213 | + $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n"; |
| 1214 | + $postdata .= "$val\r\n"; |
| 1215 | + } |
| 1216 | + } |
| 1217 | + |
| 1218 | + reset( $formfiles ); |
| 1219 | + while ( list( $field_name, $file_names ) = each( $formfiles ) ) { |
| 1220 | + settype( $file_names, 'array' ); |
| 1221 | + while ( list( , $file_name ) = each( $file_names ) ) { |
| 1222 | + if ( !is_readable( $file_name ) ) { |
| 1223 | + continue; |
| 1224 | + } |
| 1225 | + |
| 1226 | + $fp = fopen( $file_name, 'r' ); |
| 1227 | + $file_content = fread( $fp, filesize( $file_name ) ); |
| 1228 | + fclose( $fp ); |
| 1229 | + $base_name = basename( $file_name ); |
| 1230 | + |
| 1231 | + $postdata .= '--' . $this->_mime_boundary . "\r\n"; |
| 1232 | + $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n"; |
| 1233 | + $postdata .= "$file_content\r\n"; |
| 1234 | + } |
| 1235 | + } |
| 1236 | + $postdata .= '--' . $this->_mime_boundary . "--\r\n"; |
| 1237 | + break; |
| 1238 | + } |
| 1239 | + |
| 1240 | + return $postdata; |
| 1241 | + } |
| 1242 | +} |
\ No newline at end of file |
Property changes on: trunk/extensions/RSS/Snoopy.class.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 1243 | + native |
Index: trunk/extensions/RSS/RSSFetch.php |
— | — | @@ -0,0 +1,256 @@ |
| 2 | +<?php |
| 3 | +/** |
| 4 | + * A simple functional interface to fetching and parsing RSS files, via the |
| 5 | + * function fetch_rss(). |
| 6 | + * |
| 7 | + * @file |
| 8 | + */ |
| 9 | + |
| 10 | +/** |
| 11 | + * Globals - redefine these in your script to change the |
| 12 | + * behaviour of fetch_rss() currently, most options effect the cache |
| 13 | + * |
| 14 | + * $wgMagpieRSSCache - Should Magpie cache parsed RSS objects? |
| 15 | + * |
| 16 | + * $wgMagpieRSSCacheDirectory - Where should Magpie cache parsed RSS objects? |
| 17 | + * This should be a location that the webserver can write to. If this |
| 18 | + * directory does not already exist, Magpie will try to be smart and create it. |
| 19 | + * This will often fail for permissions reasons. |
| 20 | + * |
| 21 | + * $wgMagpieRSSCacheAge - How long to store cached RSS objects (in seconds)?. |
| 22 | + * |
| 23 | + * $wgMagpieRSSCacheFreshOnly - If remote fetch fails, throw an error |
| 24 | + * instead of returning stale object? |
| 25 | + */ |
| 26 | + |
| 27 | +$MAGPIE_ERROR = ''; |
| 28 | + |
| 29 | +/** |
| 30 | + * Return RSS object for the given URL, maintaining caching. |
| 31 | + * |
| 32 | + * NOTES ON CACHING: |
| 33 | + * If caching is on ($wgMagpieRSSCache) fetch_rss will first check the cache. |
| 34 | + * |
| 35 | + * NOTES ON RETRIEVING REMOTE FILES: |
| 36 | + * If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will |
| 37 | + * return a cached object, and touch the cache object upon recieving a 304. |
| 38 | + * |
| 39 | + * NOTES ON FAILED REQUESTS: |
| 40 | + * If there is an HTTP error while fetching an RSS object, the cached version |
| 41 | + * will be returned, if it exists (and if $wgMagpieRSSCacheFreshOnly is off) |
| 42 | + * |
| 43 | + * @param $url String: URL of RSS file |
| 44 | + * @return parsed RSS object (see RSSParse) |
| 45 | + */ |
| 46 | +function fetch_rss( $url ) { |
| 47 | + global $wgMagpieRSSCache, $wgMagpieRSSCacheAge, $wgMagpieRSSCacheFreshOnly; |
| 48 | + global $wgMagpieRSSCacheDirectory, $wgMagpieRSSFetchTimeout; |
| 49 | + global $wgMagpieRSSOutputEncoding, $wgMagpieRSSInputEncoding; |
| 50 | + global $wgMagpieRSSDetectEncoding, $wgMagpieRSSUseGzip; |
| 51 | + |
| 52 | + $wgMagpieRSSCache = true; |
| 53 | + $wgMagpieRSSCacheAge = 60 * 60; // one hour |
| 54 | + $wgMagpieRSSCacheFreshOnly = false; |
| 55 | + $wgMagpieRSSCacheDirectory = '/extensions/RSS/cache'; |
| 56 | + $wgMagpieRSSOutputEncoding = 'ISO-8859-1'; |
| 57 | + $wgMagpieRSSInputEncoding = null; |
| 58 | + $wgMagpieRSSDetectEncoding = true; |
| 59 | + |
| 60 | + $wgMagpieRSSFetchTimeout = 5; // 5 second timeout |
| 61 | + |
| 62 | + // use gzip encoding to fetch RSS files if supported? |
| 63 | + $wgMagpieRSSUseGzip = true; |
| 64 | + |
| 65 | + if ( !isset( $url ) ) { |
| 66 | + wfDebugLog( 'RSS', 'fetch_rss (RSSFetch.php) called without a URL!' ); |
| 67 | + return false; |
| 68 | + } |
| 69 | + |
| 70 | + // if cache is disabled |
| 71 | + if ( !$wgMagpieRSSCache ) { |
| 72 | + // fetch file, and parse it |
| 73 | + $resp = _fetch_remote_file( $url ); |
| 74 | + if ( $resp->status >= 200 && $resp->status < 300 ) { |
| 75 | + return _response_to_rss( $resp ); |
| 76 | + } else { |
| 77 | + wfDebugLog( 'RSS', "Failed to fetch $url and cache is off" ); |
| 78 | + return false; |
| 79 | + } |
| 80 | + } else { // else cache is ON |
| 81 | + // Flow |
| 82 | + // 1. check cache |
| 83 | + // 2. if there is a hit, make sure its fresh |
| 84 | + // 3. if cached obj fails freshness check, fetch remote |
| 85 | + // 4. if remote fails, return stale object, or error |
| 86 | + $cache = new RSSCache( $wgMagpieRSSCacheDirectory, $wgMagpieRSSCacheAge ); |
| 87 | + |
| 88 | + if ( $cache->ERROR ) { |
| 89 | + wfDebugLog( |
| 90 | + 'RSS', |
| 91 | + 'MagpieRSS: cache error on RSSFetch.php! Error msg: ' . |
| 92 | + $cache->ERROR |
| 93 | + ); |
| 94 | + } |
| 95 | + |
| 96 | + $cache_status = 0; // response of check_cache |
| 97 | + $request_headers = array(); // HTTP headers to send with fetch |
| 98 | + $rss = 0; // parsed RSS object |
| 99 | + $errormsg = 0; // errors, if any |
| 100 | + |
| 101 | + // store parsed XML by desired output encoding |
| 102 | + // as character munging happens at parse time |
| 103 | + $cache_key = $url . $wgMagpieRSSOutputEncoding; |
| 104 | + |
| 105 | + if ( !$cache->ERROR ) { |
| 106 | + // return cache HIT, MISS, or STALE |
| 107 | + $cache_status = $cache->check_cache( $cache_key ); |
| 108 | + } |
| 109 | + |
| 110 | + // if object cached, and cache is fresh, return cached obj |
| 111 | + if ( $cache_status == 'HIT' ) { |
| 112 | + $rss = $cache->get( $cache_key ); |
| 113 | + if ( isset( $rss ) && $rss ) { |
| 114 | + // should be cache age |
| 115 | + $rss->from_cache = 1; |
| 116 | + wfDebugLog( 'RSS', 'MagpieRSS: Cache HIT' ); |
| 117 | + return $rss; |
| 118 | + } |
| 119 | + } |
| 120 | + |
| 121 | + // else attempt a conditional get |
| 122 | + |
| 123 | + // setup headers |
| 124 | + if ( $cache_status == 'STALE' ) { |
| 125 | + $rss = $cache->get( $cache_key ); |
| 126 | + if ( $rss && $rss->etag && $rss->last_modified ) { |
| 127 | + $request_headers['If-None-Match'] = $rss->etag; |
| 128 | + $request_headers['If-Last-Modified'] = $rss->last_modified; |
| 129 | + } |
| 130 | + } |
| 131 | + |
| 132 | + $resp = _fetch_remote_file( $url, $request_headers ); |
| 133 | + |
| 134 | + if ( isset( $resp ) && $resp ) { |
| 135 | + if ( $resp->status == '304' ) { |
| 136 | + // we have the most current copy |
| 137 | + wfDebugLog( 'RSS', "Got 304 for $url" ); |
| 138 | + // reset cache on 304 (at minutillo insistent prodding) |
| 139 | + $cache->set( $cache_key, $rss ); |
| 140 | + return $rss; |
| 141 | + } elseif ( $resp->status >= 200 && $resp->status < 300 ) { |
| 142 | + $rss = _response_to_rss( $resp ); |
| 143 | + if ( $rss ) { |
| 144 | + wfDebugLog( 'RSS', 'Fetch successful' ); |
| 145 | + // add object to cache |
| 146 | + $cache->set( $cache_key, $rss ); |
| 147 | + return $rss; |
| 148 | + } |
| 149 | + } else { |
| 150 | + $errormsg = "Failed to fetch $url "; |
| 151 | + if ( $resp->status == '-100' ) { |
| 152 | + global $wgMagpieRSSFetchTimeout; |
| 153 | + $errormsg .= '(Request timed out after ' . $wgMagpieRSSFetchTimeout . ' seconds)'; |
| 154 | + } elseif ( $resp->error ) { |
| 155 | + // compensate for Snoopy's annoying habbit to tacking |
| 156 | + // on '\n' |
| 157 | + $http_error = substr( $resp->error, 0, -2 ); |
| 158 | + $errormsg .= "(HTTP Error: $http_error)"; |
| 159 | + } else { |
| 160 | + $errormsg .= '(HTTP Response: ' . $resp->response_code . ')'; |
| 161 | + } |
| 162 | + } |
| 163 | + } else { |
| 164 | + $errormsg = 'Unable to retrieve RSS file for unknown reasons.'; |
| 165 | + } |
| 166 | + |
| 167 | + // else fetch failed |
| 168 | + |
| 169 | + // attempt to return cached object |
| 170 | + if ( $rss ) { |
| 171 | + wfDebugLog( 'RSS', "Returning STALE object for $url" ); |
| 172 | + return $rss; |
| 173 | + } |
| 174 | + |
| 175 | + // else we totally failed |
| 176 | + $MAGPIE_ERROR = $errormsg; |
| 177 | + wfDebugLog( |
| 178 | + 'MagpieRSS (RSSFetch): we totally failed :-( Error message:' . |
| 179 | + $errormsg |
| 180 | + ); |
| 181 | + |
| 182 | + return false; |
| 183 | + } // end if ( !$wgMagpieRSSCache ) { |
| 184 | +} // end fetch_rss() |
| 185 | + |
| 186 | +/** |
| 187 | + * Retrieve an arbitrary remote file. |
| 188 | + * @param $url String: URL of the remote file |
| 189 | + * @param $headers Array: headers to send along with the request |
| 190 | + * @return an HTTP response object (see Snoopy.class.php) |
| 191 | + */ |
| 192 | +function _fetch_remote_file( $url, $headers = '' ) { |
| 193 | + global $wgMagpieRSSFetchTimeout, $wgMagpieRSSUseGzip; |
| 194 | + // Snoopy is an HTTP client in PHP |
| 195 | + if ( !class_exists( 'Snoopy', false ) ) { |
| 196 | + require_once( dirname( __FILE__ ) . '/Snoopy.class.php' ); |
| 197 | + } |
| 198 | + $client = new Snoopy(); |
| 199 | + $client->agent = 'MagpieRSS/0.72 (+http://magpierss.sourceforge.net) / MediaWiki RSS extension'; |
| 200 | + $client->read_timeout = $wgMagpieRSSFetchTimeout; |
| 201 | + $client->use_gzip = $wgMagpieRSSUseGzip; |
| 202 | + if ( is_array( $headers ) ) { |
| 203 | + $client->rawheaders = $headers; |
| 204 | + } |
| 205 | + |
| 206 | + @$client->fetch( $url ); |
| 207 | + return $client; |
| 208 | +} |
| 209 | + |
| 210 | +/** |
| 211 | + * Parse an HTTP response object into an RSS object. |
| 212 | + * @param $resp Object: an HTTP response object (see Snoopy) |
| 213 | + * @return parsed RSS object (see RSSParse) or false |
| 214 | + */ |
| 215 | +function _response_to_rss( $resp ) { |
| 216 | + global $wgMagpieRSSOutputEncoding, $wgMagpieRSSInputEncoding, $wgMagpieRSSDetectEncoding; |
| 217 | + $rss = new MagpieRSS( |
| 218 | + $resp->results, |
| 219 | + $wgMagpieRSSOutputEncoding, |
| 220 | + $wgMagpieRSSInputEncoding, |
| 221 | + $wgMagpieRSSDetectEncoding |
| 222 | + ); |
| 223 | + |
| 224 | + // if RSS parsed successfully |
| 225 | + if ( $rss && !$rss->ERROR ) { |
| 226 | + // find Etag and Last-Modified |
| 227 | + foreach( $resp->headers as $h ) { |
| 228 | + // 2003-03-02 - Nicola Asuni (www.tecnick.com) - fixed bug "Undefined offset: 1" |
| 229 | + if ( strpos( $h, ': ' ) ) { |
| 230 | + list( $field, $val ) = explode( ': ', $h, 2 ); |
| 231 | + } else { |
| 232 | + $field = $h; |
| 233 | + $val = ''; |
| 234 | + } |
| 235 | + |
| 236 | + if ( $field == 'ETag' ) { |
| 237 | + $rss->etag = $val; |
| 238 | + } |
| 239 | + |
| 240 | + if ( $field == 'Last-Modified' ) { |
| 241 | + $rss->last_modified = $val; |
| 242 | + } |
| 243 | + } |
| 244 | + |
| 245 | + return $rss; |
| 246 | + } else { // else construct error message |
| 247 | + $errormsg = 'Failed to parse RSS file.'; |
| 248 | + |
| 249 | + if ( $rss ) { |
| 250 | + $errormsg .= ' (' . $rss->ERROR . ')'; |
| 251 | + } |
| 252 | + $MAGPIE_ERROR = $errormsg; |
| 253 | + wfDebugLog( 'RSS', 'error!' . $errormsg ); |
| 254 | + |
| 255 | + return false; |
| 256 | + } // end if ( $rss && !$rss->ERROR ) |
| 257 | +} |
\ No newline at end of file |
Property changes on: trunk/extensions/RSS/RSSFetch.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 258 | + native |
Index: trunk/extensions/RSS/RSSParse.php |
— | — | @@ -0,0 +1,494 @@ |
| 2 | +<?php |
| 3 | +/** |
| 4 | + * Hybrid parser, and object, takes RSS or Atom feed as a string and returns a |
| 5 | + * simple object. |
| 6 | + * Handles RSS 0.9x, RSS 2.0, RSS 1.0, and Atom 0.3 |
| 7 | + * |
| 8 | + * @file |
| 9 | + * @see RSSFetch.php for a simpler interface with integrated caching support |
| 10 | + */ |
| 11 | +class MagpieRSS { |
| 12 | + public $parser; |
| 13 | + |
| 14 | + public $current_item = array(); // item currently being parsed |
| 15 | + public $items = array(); // collection of parsed items |
| 16 | + public $channel = array(); // hash of channel fields |
| 17 | + public $textinput = array(); |
| 18 | + public $image = array(); |
| 19 | + public $feed_type; |
| 20 | + public $feed_version; |
| 21 | + public $encoding = ''; // output encoding of parsed rss |
| 22 | + |
| 23 | + public $_source_encoding = ''; // only set if we have to parse xml prolog |
| 24 | + |
| 25 | + public $ERROR = ''; |
| 26 | + public $WARNING = ''; |
| 27 | + |
| 28 | + // define some constants |
| 29 | + public $_CONTENT_CONSTRUCTS = array( 'content', 'summary', 'info', 'title', 'tagline', 'copyright' ); |
| 30 | + public $_KNOWN_ENCODINGS = array( 'UTF-8', 'US-ASCII', 'ISO-8859-1' ); |
| 31 | + |
| 32 | + // parser variables, useless if you're not a parser, treat as private |
| 33 | + public $stack = array(); // parser stack |
| 34 | + public $inchannel = false; |
| 35 | + public $initem = false; |
| 36 | + public $incontent = false; // if in Atom <content mode="xml"> field |
| 37 | + public $intextinput = false; |
| 38 | + public $inimage = false; |
| 39 | + public $current_namespace = false; |
| 40 | + |
| 41 | + /** |
| 42 | + * Set up XML parser, parse source, and return populated RSS object.. |
| 43 | + * |
| 44 | + * @param $source String: string containing the RSS to be parsed |
| 45 | + * |
| 46 | + * NOTE: Probably a good idea to leave the encoding options alone unless |
| 47 | + * you know what you're doing as PHP's character set support is |
| 48 | + * a little weird. |
| 49 | + * |
| 50 | + * NOTE: A lot of this is unnecessary but harmless with PHP5 |
| 51 | + * |
| 52 | + * |
| 53 | + * @param $output_encoding String: output the parsed RSS in this character |
| 54 | + * set defaults to ISO-8859-1 as this is PHP's |
| 55 | + * default. |
| 56 | + * |
| 57 | + * NOTE: might be changed to UTF-8 in future |
| 58 | + * versions. |
| 59 | + * |
| 60 | + * @param $input_encoding String: the character set of the incoming RSS source. |
| 61 | + * Leave blank and Magpie will try to figure it |
| 62 | + * out. |
| 63 | + * |
| 64 | + * @param $detect_encoding Boolean: if false, Magpie won't attempt to |
| 65 | + * detect source encoding. (caveat emptor) |
| 66 | + */ |
| 67 | + function __construct( $source, $output_encoding = 'ISO-8859-1', |
| 68 | + $input_encoding = null, $detect_encoding = true ) |
| 69 | + { |
| 70 | + # if PHP xml isn't compiled in, die |
| 71 | + if ( !function_exists( 'xml_parser_create' ) ) { |
| 72 | + $this->error( |
| 73 | + "Failed to load PHP's XML Extension. " . |
| 74 | + 'http://www.php.net/manual/en/ref.xml.php', |
| 75 | + E_USER_ERROR |
| 76 | + ); |
| 77 | + } |
| 78 | + |
| 79 | + list( $parser, $source ) = $this->create_parser( |
| 80 | + $source, |
| 81 | + $output_encoding, |
| 82 | + $input_encoding, |
| 83 | + $detect_encoding |
| 84 | + ); |
| 85 | + |
| 86 | + if ( !is_resource( $parser ) ) { |
| 87 | + $this->error( |
| 88 | + "Failed to create an instance of PHP's XML parser. " . |
| 89 | + 'http://www.php.net/manual/en/ref.xml.php', |
| 90 | + E_USER_ERROR |
| 91 | + ); |
| 92 | + } |
| 93 | + |
| 94 | + $this->parser = $parser; |
| 95 | + |
| 96 | + # pass in parser, and a reference to this object |
| 97 | + # setup handlers |
| 98 | + xml_set_object( $this->parser, $this ); |
| 99 | + xml_set_element_handler( |
| 100 | + $this->parser, |
| 101 | + 'feed_start_element', |
| 102 | + 'feed_end_element' |
| 103 | + ); |
| 104 | + |
| 105 | + xml_set_character_data_handler( $this->parser, 'feed_cdata' ); |
| 106 | + |
| 107 | + $status = xml_parse( $this->parser, $source ); |
| 108 | + |
| 109 | + if ( !$status ) { |
| 110 | + $errorcode = xml_get_error_code( $this->parser ); |
| 111 | + if ( $errorcode != XML_ERROR_NONE ) { |
| 112 | + $xml_error = xml_error_string( $errorcode ); |
| 113 | + $error_line = xml_get_current_line_number( $this->parser ); |
| 114 | + $error_col = xml_get_current_column_number( $this->parser ); |
| 115 | + $errormsg = "$xml_error at line $error_line, column $error_col"; |
| 116 | + |
| 117 | + $this->error( $errormsg ); |
| 118 | + } |
| 119 | + } |
| 120 | + |
| 121 | + xml_parser_free( $this->parser ); |
| 122 | + |
| 123 | + $this->normalize(); |
| 124 | + } |
| 125 | + |
| 126 | + function feed_start_element( $p, $element, &$attrs ) { |
| 127 | + $el = $element = strtolower( $element ); |
| 128 | + $attrs = array_change_key_case( $attrs, CASE_LOWER ); |
| 129 | + |
| 130 | + // check for a namespace, and split if found |
| 131 | + $ns = false; |
| 132 | + if ( strpos( $element, ':' ) ) { |
| 133 | + list( $ns, $el ) = explode( ':', $element, 2 ); |
| 134 | + } |
| 135 | + if ( $ns && $ns != 'rdf' ) { |
| 136 | + $this->current_namespace = $ns; |
| 137 | + } |
| 138 | + |
| 139 | + // if feed type isn't set, then this is first element of feed |
| 140 | + // identify feed from root element |
| 141 | + if ( !isset( $this->feed_type ) ) { |
| 142 | + if ( $el == 'rdf' ) { |
| 143 | + $this->feed_type = 'RSS'; |
| 144 | + $this->feed_version = '1.0'; |
| 145 | + } elseif ( $el == 'rss' ) { |
| 146 | + $this->feed_type = 'RSS'; |
| 147 | + $this->feed_version = $attrs['version']; |
| 148 | + } elseif ( $el == 'feed' ) { |
| 149 | + $this->feed_type = 'Atom'; |
| 150 | + $this->feed_version = $attrs['version']; |
| 151 | + $this->inchannel = true; |
| 152 | + } |
| 153 | + return; |
| 154 | + } |
| 155 | + |
| 156 | + if ( $el == 'channel' ) { |
| 157 | + $this->inchannel = true; |
| 158 | + } elseif ( $el == 'item' || $el == 'entry' ) { |
| 159 | + $this->initem = true; |
| 160 | + if ( isset( $attrs['rdf:about'] ) ) { |
| 161 | + $this->current_item['about'] = $attrs['rdf:about']; |
| 162 | + } |
| 163 | + } |
| 164 | + |
| 165 | + // if we're in the default namespace of an RSS feed, |
| 166 | + // record textinput or image fields |
| 167 | + elseif ( |
| 168 | + $this->feed_type == 'RSS' && |
| 169 | + $this->current_namespace == '' && |
| 170 | + $el == 'textinput' ) |
| 171 | + { |
| 172 | + $this->intextinput = true; |
| 173 | + } elseif ( |
| 174 | + $this->feed_type == 'RSS' && |
| 175 | + $this->current_namespace == '' && |
| 176 | + $el == 'image' ) |
| 177 | + { |
| 178 | + $this->inimage = true; |
| 179 | + } |
| 180 | + |
| 181 | + // handle Atom content constructs |
| 182 | + elseif ( $this->feed_type == 'Atom' && in_array( $el, $this->_CONTENT_CONSTRUCTS ) ) { |
| 183 | + // avoid clashing w/ RSS mod_content |
| 184 | + if ( $el == 'content' ) { |
| 185 | + $el = 'atom_content'; |
| 186 | + } |
| 187 | + |
| 188 | + $this->incontent = $el; |
| 189 | + } |
| 190 | + |
| 191 | + // if inside an Atom content construct (e.g. content or summary) field treat tags as text |
| 192 | + elseif ( $this->feed_type == 'Atom' && $this->incontent ) { |
| 193 | + // if tags are inlined, then flatten |
| 194 | + $attrs_str = join( |
| 195 | + ' ', |
| 196 | + array_map( |
| 197 | + array( 'MagpieRSS', 'mapAttributes' ), |
| 198 | + array_keys( $attrs ), |
| 199 | + array_values( $attrs ) |
| 200 | + ) |
| 201 | + ); |
| 202 | + |
| 203 | + $this->append_content( "<$element $attrs_str>" ); |
| 204 | + |
| 205 | + array_unshift( $this->stack, $el ); |
| 206 | + } |
| 207 | + |
| 208 | + // Atom support many links per containging element. |
| 209 | + // Magpie treats link elements of type rel='alternate' |
| 210 | + // as being equivalent to RSS's simple link element. |
| 211 | + elseif ( $this->feed_type == 'Atom' && $el == 'link' ) { |
| 212 | + if ( isset( $attrs['rel'] ) && $attrs['rel'] == 'alternate' ) { |
| 213 | + $link_el = 'link'; |
| 214 | + } else { |
| 215 | + $link_el = 'link_' . $attrs['rel']; |
| 216 | + } |
| 217 | + |
| 218 | + $this->append( $link_el, $attrs['href'] ); |
| 219 | + } else { // set stack[0] to current element |
| 220 | + array_unshift( $this->stack, $el ); |
| 221 | + } |
| 222 | + } |
| 223 | + |
| 224 | + function feed_cdata( $p, $text ) { |
| 225 | + if ( $this->feed_type == 'Atom' && $this->incontent ) { |
| 226 | + $this->append_content( $text ); |
| 227 | + } else { |
| 228 | + $current_el = join( '_', array_reverse( $this->stack ) ); |
| 229 | + $this->append( $current_el, $text ); |
| 230 | + } |
| 231 | + } |
| 232 | + |
| 233 | + function feed_end_element( $p, $el ) { |
| 234 | + $el = strtolower( $el ); |
| 235 | + |
| 236 | + if ( $el == 'item' || $el == 'entry' ) { |
| 237 | + $this->items[] = $this->current_item; |
| 238 | + $this->current_item = array(); |
| 239 | + $this->initem = false; |
| 240 | + } elseif ( $this->feed_type == 'RSS' && $this->current_namespace == '' && $el == 'textinput' ) { |
| 241 | + $this->intextinput = false; |
| 242 | + } elseif ( $this->feed_type == 'RSS' && $this->current_namespace == '' && $el == 'image' ) { |
| 243 | + $this->inimage = false; |
| 244 | + } elseif ( $this->feed_type == 'Atom' && in_array( $el, $this->_CONTENT_CONSTRUCTS ) ) { |
| 245 | + $this->incontent = false; |
| 246 | + } elseif ( $el == 'channel' || $el == 'feed' ) { |
| 247 | + $this->inchannel = false; |
| 248 | + } elseif ( $this->feed_type == 'Atom' && $this->incontent ) { |
| 249 | + // balance tags properly |
| 250 | + // note: I don't think this is actually neccessary |
| 251 | + if ( $this->stack[0] == $el ) { |
| 252 | + $this->append_content( "</$el>" ); |
| 253 | + } else { |
| 254 | + $this->append_content( "<$el />" ); |
| 255 | + } |
| 256 | + |
| 257 | + array_shift( $this->stack ); |
| 258 | + } else { |
| 259 | + array_shift( $this->stack ); |
| 260 | + } |
| 261 | + |
| 262 | + $this->current_namespace = false; |
| 263 | + } |
| 264 | + |
| 265 | + function concat( &$str1, $str2 = '' ) { |
| 266 | + if ( !isset( $str1 ) ) { |
| 267 | + $str1 = ''; |
| 268 | + } |
| 269 | + $str1 .= $str2; |
| 270 | + } |
| 271 | + |
| 272 | + function append_content( $text ) { |
| 273 | + if ( $this->initem ) { |
| 274 | + $this->concat( $this->current_item[$this->incontent], $text ); |
| 275 | + } elseif ( $this->inchannel ) { |
| 276 | + $this->concat( $this->channel[$this->incontent], $text ); |
| 277 | + } |
| 278 | + } |
| 279 | + |
| 280 | + // smart append - field and namespace aware |
| 281 | + function append( $el, $text ) { |
| 282 | + if ( !$el ) { |
| 283 | + return; |
| 284 | + } |
| 285 | + if ( $this->current_namespace ) { |
| 286 | + if ( $this->initem ) { |
| 287 | + $this->concat( |
| 288 | + $this->current_item[$this->current_namespace][$el], $text |
| 289 | + ); |
| 290 | + } elseif ( $this->inchannel ) { |
| 291 | + $this->concat( |
| 292 | + $this->channel[$this->current_namespace][$el], $text |
| 293 | + ); |
| 294 | + } elseif ( $this->intextinput ) { |
| 295 | + $this->concat( |
| 296 | + $this->textinput[$this->current_namespace][$el], $text |
| 297 | + ); |
| 298 | + } elseif ( $this->inimage ) { |
| 299 | + $this->concat( |
| 300 | + $this->image[$this->current_namespace][$el], $text |
| 301 | + ); |
| 302 | + } |
| 303 | + } else { |
| 304 | + if ( $this->initem ) { |
| 305 | + $this->concat( |
| 306 | + $this->current_item[$el], $text |
| 307 | + ); |
| 308 | + } elseif ( $this->intextinput ) { |
| 309 | + $this->concat( |
| 310 | + $this->textinput[$el], $text |
| 311 | + ); |
| 312 | + } elseif ( $this->inimage ) { |
| 313 | + $this->concat( |
| 314 | + $this->image[$el], $text |
| 315 | + ); |
| 316 | + } elseif ( $this->inchannel ) { |
| 317 | + $this->concat( |
| 318 | + $this->channel[$el], $text |
| 319 | + ); |
| 320 | + } |
| 321 | + } |
| 322 | + } |
| 323 | + |
| 324 | + function normalize() { |
| 325 | + // if atom populate rss fields |
| 326 | + if ( $this->is_atom() ) { |
| 327 | + $this->channel['description'] = $this->channel['tagline']; |
| 328 | + for ( $i = 0; $i < count( $this->items ); $i++ ) { |
| 329 | + $item = $this->items[$i]; |
| 330 | + if ( isset( $item['summary'] ) ) { |
| 331 | + $item['description'] = $item['summary']; |
| 332 | + } |
| 333 | + if ( isset( $item['atom_content'] ) ) { |
| 334 | + $item['content']['encoded'] = $item['atom_content']; |
| 335 | + } |
| 336 | + |
| 337 | + $atom_date = ( isset( $item['issued'] ) ) ? $item['issued'] : $item['modified']; |
| 338 | + if ( $atom_date ) { |
| 339 | + $epoch = @$this->parse_w3cdtf( $atom_date ); |
| 340 | + if ( $epoch && $epoch > 0 ) { |
| 341 | + $item['date_timestamp'] = $epoch; |
| 342 | + } |
| 343 | + } |
| 344 | + |
| 345 | + $this->items[$i] = $item; |
| 346 | + } |
| 347 | + } elseif ( $this->is_rss() ) { |
| 348 | + $this->channel['tagline'] = $this->channel['description']; |
| 349 | + for ( $i = 0; $i < count( $this->items ); $i++ ) { |
| 350 | + $item = $this->items[$i]; |
| 351 | + if ( isset( $item['description'] ) ) { |
| 352 | + $item['summary'] = $item['description']; |
| 353 | + } |
| 354 | + if ( isset( $item['content']['encoded'] ) ) { |
| 355 | + $item['atom_content'] = $item['content']['encoded']; |
| 356 | + } |
| 357 | + |
| 358 | + if ( $this->is_rss() == '1.0' && isset( $item['dc']['date'] ) ) { |
| 359 | + $epoch = @$this->parse_w3cdtf( $item['dc']['date'] ); |
| 360 | + if ( $epoch && $epoch > 0 ) { |
| 361 | + $item['date_timestamp'] = $epoch; |
| 362 | + } |
| 363 | + } elseif ( isset( $item['pubdate'] ) ) { |
| 364 | + $epoch = @strtotime( $item['pubdate'] ); |
| 365 | + if ( $epoch > 0 ) { |
| 366 | + $item['date_timestamp'] = $epoch; |
| 367 | + } |
| 368 | + } |
| 369 | + |
| 370 | + $this->items[$i] = $item; |
| 371 | + } |
| 372 | + } |
| 373 | + } |
| 374 | + |
| 375 | + function is_rss() { |
| 376 | + if ( $this->feed_type == 'RSS' ) { |
| 377 | + return $this->feed_version; |
| 378 | + } else { |
| 379 | + return false; |
| 380 | + } |
| 381 | + } |
| 382 | + |
| 383 | + function is_atom() { |
| 384 | + if ( $this->feed_type == 'Atom' ) { |
| 385 | + return $this->feed_version; |
| 386 | + } else { |
| 387 | + return false; |
| 388 | + } |
| 389 | + } |
| 390 | + |
| 391 | + /** |
| 392 | + * Instantiate an XML parser. |
| 393 | + * @return XML parser, and possibly re-encoded source |
| 394 | + */ |
| 395 | + function create_parser( $source, $out_enc, $in_enc, $detect ) { |
| 396 | + // by default PHP5 does a fine job of detecting input encodings |
| 397 | + if( !$detect && $in_enc ) { |
| 398 | + $parser = xml_parser_create( $in_enc ); |
| 399 | + } else { |
| 400 | + $parser = xml_parser_create( '' ); |
| 401 | + } |
| 402 | + if ( $out_enc ) { |
| 403 | + $this->encoding = $out_enc; |
| 404 | + xml_parser_set_option( |
| 405 | + $parser, |
| 406 | + XML_OPTION_TARGET_ENCODING, |
| 407 | + $out_enc |
| 408 | + ); |
| 409 | + } |
| 410 | + |
| 411 | + return array( $parser, $source ); |
| 412 | + } |
| 413 | + |
| 414 | + /** |
| 415 | + * Checks if $enc is an encoding type supported by MagpieRSS. |
| 416 | + * @param $enc String: encoding name |
| 417 | + * @return String or false |
| 418 | + */ |
| 419 | + function known_encoding( $enc ) { |
| 420 | + $enc = strtoupper( $enc ); |
| 421 | + if ( in_array( $enc, $this->_KNOWN_ENCODINGS ) ) { |
| 422 | + return $enc; |
| 423 | + } else { |
| 424 | + return false; |
| 425 | + } |
| 426 | + } |
| 427 | + |
| 428 | + function error( $errormsg, $lvl = E_USER_WARNING ) { |
| 429 | + // append PHP's error message if track_errors is enabled |
| 430 | + if ( isset( $php_errormsg ) ) { |
| 431 | + $errormsg .= " ($php_errormsg)"; |
| 432 | + } |
| 433 | + |
| 434 | + $notices = E_USER_NOTICE|E_NOTICE; |
| 435 | + if ( $lvl&$notices ) { |
| 436 | + $this->WARNING = $errormsg; |
| 437 | + } else { |
| 438 | + $this->ERROR = $errormsg; |
| 439 | + } |
| 440 | + } |
| 441 | + |
| 442 | + /** |
| 443 | + * Parse a W3CDTF date into unix epoch. |
| 444 | + * This used to be in its own file. |
| 445 | + * @note http://www.w3.org/TR/NOTE-datetime |
| 446 | + * @param $date_str String: date string to parse |
| 447 | + * @return Integer |
| 448 | + */ |
| 449 | + public static function parse_w3cdtf( $date_str ) { |
| 450 | + // regex to match wc3dtf |
| 451 | + $pat = "/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/"; |
| 452 | + |
| 453 | + if ( preg_match( $pat, $date_str, $match ) ) { |
| 454 | + list( $year, $month, $day, $hours, $minutes, $seconds ) = |
| 455 | + array( $match[1], $match[2], $match[3], $match[4], $match[5], $match[6] ); |
| 456 | + |
| 457 | + // calculate epoch for current date assuming GMT |
| 458 | + $epoch = gmmktime( $hours, $minutes, $seconds, $month, $day, $year ); |
| 459 | + |
| 460 | + $offset = 0; |
| 461 | + if ( $match[10] == 'Z' ) { |
| 462 | + // zulu time, aka GMT |
| 463 | + } else { |
| 464 | + list( $tz_mod, $tz_hour, $tz_min ) = |
| 465 | + array( $match[8], $match[9], $match[10] ); |
| 466 | + |
| 467 | + // zero out the variables |
| 468 | + if ( !$tz_hour ) { |
| 469 | + $tz_hour = 0; |
| 470 | + } |
| 471 | + if ( !$tz_min ) { |
| 472 | + $tz_min = 0; |
| 473 | + } |
| 474 | + |
| 475 | + $offset_secs = ( ( $tz_hour * 60 ) + $tz_min ) * 60; |
| 476 | + |
| 477 | + // is timezone ahead of GMT? then subtract offset |
| 478 | + if ( $tz_mod == '+' ) { |
| 479 | + $offset_secs = $offset_secs * -1; |
| 480 | + } |
| 481 | + |
| 482 | + $offset = $offset_secs; |
| 483 | + } |
| 484 | + $epoch = $epoch + $offset; |
| 485 | + return $epoch; |
| 486 | + } else { |
| 487 | + return -1; |
| 488 | + } |
| 489 | + } |
| 490 | + |
| 491 | + public static function mapAttributes( $k, $v ) { |
| 492 | + return "$k=\"$v\""; |
| 493 | + } |
| 494 | + |
| 495 | +} // end class MagpieRSS |
\ No newline at end of file |
Property changes on: trunk/extensions/RSS/RSSParse.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 496 | + native |
Index: trunk/extensions/RSS/RSS.i18n.php |
— | — | @@ -1,6 +1,6 @@ |
2 | 2 | <?php |
3 | 3 | /** |
4 | | - * Internationalisation file for extension RSS. |
| 4 | + * Internationalization file for RSS extension. |
5 | 5 | * |
6 | 6 | * @file |
7 | 7 | * @ingroup Extensions |
— | — | @@ -8,7 +8,35 @@ |
9 | 9 | |
10 | 10 | $messages = array(); |
11 | 11 | |
| 12 | +/** English |
| 13 | + * @author Łukasz Garczewski (TOR) <tor@wikia-inc.com> |
| 14 | + */ |
12 | 15 | $messages['en'] = array( |
13 | 16 | 'rss-desc' => 'Displays an RSS feed on a wiki page', |
| 17 | + 'rss-error' => 'Failed to load RSS feed from $1: $2', |
| 18 | + 'rss-empty' => 'Failed to load RSS feed from $1!', |
14 | 19 | ); |
15 | 20 | |
| 21 | +/** Finnish (Suomi) |
| 22 | + * @author Jack Phoenix <jack@countervandalism.net> |
| 23 | + */ |
| 24 | +$messages['fi'] = array( |
| 25 | + 'rss-error' => 'RSS-syötteen lataaminen osoitteesta $1 epäonnistui: $2', |
| 26 | + 'rss-empty' => 'RSS-syötteen lataaminen osoitteesta $1 epäonnistui!', |
| 27 | +); |
| 28 | + |
| 29 | +/** Dutch (Nederlands) |
| 30 | + * @author Mitchel Corstjens |
| 31 | + */ |
| 32 | +$messages['nl'] = array( |
| 33 | + 'rss-error' => 'Kon RSS feed van $1 niet laden, fout: $2', |
| 34 | + 'rss-empty' => 'Kon RSS feed van $1 niet laden!', |
| 35 | +); |
| 36 | + |
| 37 | +/** Polish (Polski) |
| 38 | + * @author Łukasz Garczewski (TOR) <tor@wikia-inc.com> |
| 39 | + */ |
| 40 | +$messages['pl'] = array( |
| 41 | + 'rss-error' => 'Nie udało się odczytać kanału $1: $2', |
| 42 | + 'rss-empty' => 'Nie udało się odczytać kanału $1!', |
| 43 | +); |
\ No newline at end of file |
Index: trunk/extensions/RSS/RSS.php |
— | — | @@ -1,61 +1,57 @@ |
2 | 2 | <?php |
3 | | - |
4 | 3 | /** |
5 | | - * RSS-Feed MediaWiki extension. |
6 | | - * @link http://www.mediawiki.org/wiki/Extension:RSS Documentation |
| 4 | + * RSS-Feed MediaWiki extension |
7 | 5 | * |
8 | | - * @file RSS.php |
| 6 | + * @file |
9 | 7 | * @ingroup Extensions |
10 | | - * |
11 | | - * TODO: replace all @ by wfSurpressWarnings and wfResumeWarnings |
| 8 | + * @version 1.7 |
| 9 | + * @author mutante, Daniel Kinzler, Rdb, Mafs, Alxndr, Chris Reigrut, K001 |
| 10 | + * @author Kellan Elliott-McCrea <kellan@protest.net> -- author of MagpieRSS |
| 11 | + * @author Jeroen De Dauw |
| 12 | + * @author Jack Phoenix <jack@countervandalism.net> |
| 13 | + * @copyright © Kellan Elliott-McCrea <kellan@protest.net> |
| 14 | + * @copyright © mutante, Daniel Kinzler, Rdb, Mafs, Alxndr, Chris Reigrut, K001 |
| 15 | + * @link http://www.mediawiki.org/wiki/Extension:RSS Documentation |
12 | 16 | */ |
13 | 17 | |
14 | | -if ( !defined( 'MEDIAWIKI' ) ) { |
| 18 | +if( !defined( 'MEDIAWIKI' ) ) { |
15 | 19 | die( "This is not a valid entry point.\n" ); |
16 | 20 | } |
17 | 21 | |
18 | | -define( 'RSS_VERSION', '1.7' ); |
19 | | - |
| 22 | +// Extension credits that will show up on Special:Version |
20 | 23 | $wgExtensionCredits['parserhook'][] = array( |
21 | | - 'path' => __FILE__, |
22 | 24 | 'name' => 'RSS feed', |
23 | 25 | 'author' => array( |
| 26 | + 'Kellan Elliott-McCrea', |
24 | 27 | 'mutante', |
25 | | - 'Duesentrieb', |
| 28 | + 'Daniel Kinzler', |
26 | 29 | 'Rdb', |
27 | 30 | 'Mafs', |
28 | 31 | 'Alxndr', |
29 | 32 | 'Wikinaut', |
30 | | - 'Cmreigrut', |
| 33 | + 'Chris Reigrut', |
31 | 34 | 'K001', |
32 | | - '[http://www.mediawiki.org/wiki/User:Jeroen_De_Dauw Jeroen De Dauw]' |
| 35 | + 'Jack Phoenix', |
| 36 | + 'Jeroen De Dauw' |
33 | 37 | ), |
34 | | - 'version' => RSS_VERSION, |
| 38 | + 'version' => '1.7', |
35 | 39 | 'url' => 'http://www.mediawiki.org/wiki/Extension:RSS', |
| 40 | + 'description' => 'Displays an RSS feed on a wiki page', |
36 | 41 | 'descriptionmsg' => 'rss-desc', |
37 | 42 | ); |
38 | 43 | |
39 | | -$dir = dirname( __FILE__ ); |
40 | | -$wgExtensionMessagesFiles['RSS'] = "$dir/RSS.i18n.php"; |
| 44 | +// Internationalization file and autoloadable classes |
| 45 | +$dir = dirname( __FILE__ ) . '/'; |
| 46 | +$wgExtensionMessagesFiles['RSS'] = $dir . 'RSS.i18n.php'; |
| 47 | +$wgAutoloadClasses['MagpieRSS'] = $dir . 'RSSParse.php'; |
| 48 | +$wgAutoloadClasses['RSSCache'] = $dir . 'RSSCache.php'; |
41 | 49 | |
42 | | -define( 'MAGPIE_OUTPUT_ENCODING', 'UTF-8' ); |
| 50 | +$wgHooks['ParserFirstCallInit'][] = 'wfRssExtension'; |
43 | 51 | |
44 | | -# change this according to your magpie installation! |
45 | | -require_once( dirname( __FILE__ ) . '/magpierss/rss_fetch.inc' ); |
46 | | - |
47 | | -// Avoid unstubbing $wgParser too early on modern (1.12+) MW versions, as per r35980 |
48 | | -if ( defined( 'MW_SUPPORTS_PARSERFIRSTCALLINIT' ) ) { |
49 | | - $wgHooks['ParserFirstCallInit'][] = 'wfRssExtension'; |
50 | | -} else { |
51 | | - $wgExtensionFunctions[] = 'wfRssExtension'; |
52 | | -} |
53 | | - |
54 | 52 | # Extension hook callback function |
55 | | -function wfRssExtension() { |
56 | | - global $wgParser; |
57 | | - |
| 53 | +function wfRssExtension( &$parser ) { |
58 | 54 | # Install parser hook for <rss> tags |
59 | | - $wgParser->setHook( 'rss', 'renderRss' ); |
| 55 | + $parser->setHook( 'rss', 'renderRss' ); |
60 | 56 | return true; |
61 | 57 | } |
62 | 58 | |
— | — | @@ -66,90 +62,137 @@ |
67 | 63 | // Kill parser cache |
68 | 64 | $wgParser->disableCache(); |
69 | 65 | |
70 | | - if ( !$input ) return ''; # if <rss>-section is empty, return nothing |
| 66 | + if ( !$input ) { |
| 67 | + return ''; # if <rss>-section is empty, return nothing |
| 68 | + } |
71 | 69 | |
72 | | - # Parse fields in rss-section |
| 70 | + # Parse fields in rss section |
73 | 71 | $fields = explode( '|', $input ); |
74 | | - $url = @$fields[0]; |
| 72 | + wfSuppressWarnings(); |
| 73 | + $url = $fields[0]; |
| 74 | + wfRestoreWarnings(); |
75 | 75 | |
76 | 76 | $args = array(); |
77 | 77 | for ( $i = 1; $i < sizeof( $fields ); $i++ ) { |
78 | 78 | $f = $fields[$i]; |
79 | 79 | |
80 | | - if ( strpos( $f, '=' ) === false ) $args[strtolower( trim( $f ) )] = false; |
81 | | - else { |
| 80 | + if ( strpos( $f, '=' ) === false ) { |
| 81 | + $args[strtolower( trim( $f ) )] = false; |
| 82 | + } else { |
82 | 83 | list( $k, $v ) = explode( '=', $f, 2 ); |
83 | | - if ( trim( $v ) == false ) $args[strtolower( trim( $k ) )] = false; |
84 | | - else $args[strtolower( trim( $k ) )] = trim( $v ); |
| 84 | + if ( trim( $v ) == false ) { |
| 85 | + $args[strtolower( trim( $k ) )] = false; |
| 86 | + } else { |
| 87 | + $args[strtolower( trim( $k ) )] = trim( $v ); |
| 88 | + } |
85 | 89 | } |
86 | 90 | } |
87 | 91 | |
88 | | - # Get charset from argument-array |
89 | | - $charset = @$args['charset']; |
90 | | - if ( !$charset ) $charset = $wgOutputEncoding; |
| 92 | + # Get charset from argument array |
| 93 | + wfSuppressWarnings(); |
| 94 | + $charset = $args['charset']; |
| 95 | + wfRestoreWarnings(); |
| 96 | + if( !$charset ) { |
| 97 | + $charset = $wgOutputEncoding; |
| 98 | + } |
| 99 | + |
91 | 100 | # Get max number of headlines from argument-array |
92 | | - $maxheads = @$args['max']; |
| 101 | + wfSuppressWarnings(); |
| 102 | + $maxheads = $args['max']; |
| 103 | + wfRestoreWarnings(); |
93 | 104 | $headcnt = 0; |
94 | 105 | |
95 | | - # Get short-flag from argument-array |
| 106 | + # Get short flag from argument array |
96 | 107 | # If short is set, no description text is printed |
97 | | - if ( isset( $args['short'] ) ) $short = true; else $short = false; |
98 | | - # Get reverse-flag from argument-array |
99 | | - if ( isset( $args['reverse'] ) ) $reverse = true; else $reverse = false; |
| 108 | + if( isset( $args['short'] ) ) { |
| 109 | + $short = true; |
| 110 | + } else { |
| 111 | + $short = false; |
| 112 | + } |
100 | 113 | |
101 | | - # Get date format from argument-array |
102 | | - if ( isset( $args["date"] ) ) { |
103 | | - $date = @$args["date"]; |
104 | | - if ( $date == '' ) |
| 114 | + # Get reverse flag from argument array |
| 115 | + if( isset( $args['reverse'] ) ) { |
| 116 | + $reverse = true; |
| 117 | + } else { |
| 118 | + $reverse = false; |
| 119 | + } |
| 120 | + |
| 121 | + # Get date format from argument array |
| 122 | + if ( isset( $args['date'] ) ) { |
| 123 | + wfSuppressWarnings(); |
| 124 | + $date = $args['date']; |
| 125 | + wfRestoreWarnings(); |
| 126 | + if ( $date == '' ) { |
105 | 127 | $date = 'd M Y H:i'; |
| 128 | + } |
| 129 | + } else { |
| 130 | + $date = false; |
106 | 131 | } |
107 | | - else |
108 | | - $date = false; |
109 | 132 | |
110 | 133 | # Get highlight terms from argument array |
111 | | - $rssHighlight = @$args['highlight']; |
| 134 | + wfSuppressWarnings(); |
| 135 | + $rssHighlight = $args['highlight']; |
| 136 | + wfRestoreWarnings(); |
112 | 137 | $rssHighlight = str_replace( ' ', ' ', $rssHighlight ); |
113 | 138 | $rssHighlight = explode( ' ', trim( $rssHighlight ) ); |
114 | 139 | |
115 | | - # Get filter terms from argument-array |
116 | | - $rssFilter = @$args['filter']; |
| 140 | + # Get filter terms from argument array |
| 141 | + wfSuppressWarnings(); |
| 142 | + $rssFilter = $args['filter']; |
| 143 | + wfRestoreWarnings(); |
117 | 144 | $rssFilter = str_replace( ' ', ' ', $rssFilter ); |
118 | 145 | $rssFilter = explode( ' ', trim( $rssFilter ) ); |
119 | 146 | |
120 | 147 | # Filterout terms |
121 | | - $rssFilterout = @$args['filterout']; |
| 148 | + wfSuppressWarnings(); |
| 149 | + $rssFilterout = $args['filterout']; |
| 150 | + wfRestoreWarnings(); |
122 | 151 | $rssFilterout = str_replace( ' ', ' ', $rssFilterout ); |
123 | 152 | $rssFilterout = explode( ' ', trim( $rssFilterout ) ); |
124 | 153 | |
125 | 154 | # Fetch RSS. May be cached locally. |
126 | | - # Refer to the documentation of magpie for details. |
127 | | - $rss = @fetch_rss( $url ); |
| 155 | + # Refer to the documentation of MagpieRSS for details. |
| 156 | + if ( !function_exists( 'fetch_rss' ) ) { |
| 157 | + include( dirname( __FILE__ ) . '/RSSFetch.php' ); // provides fetch_rss() function |
| 158 | + } |
| 159 | + wfSuppressWarnings(); |
| 160 | + $rss = fetch_rss( $url ); |
| 161 | + wfRestoreWarnings(); |
128 | 162 | |
129 | 163 | # Check for errors. |
| 164 | + if ( empty( $rss ) ) { |
| 165 | + wfLoadExtensionMessages( 'RSS' ); |
| 166 | + return wfMsg( 'rss-empty', $url ); |
| 167 | + } |
| 168 | + |
130 | 169 | if ( $rss->ERROR ) { |
131 | | - return "<div>Failed to load RSS feed from $url: " . $rss->ERROR . "</div>"; # localize… |
| 170 | + wfLoadExtensionMessages( 'RSS' ); |
| 171 | + return '<div>' . wfMsg( 'rss-error', $url, $rss->ERROR ) . '</div>'; |
132 | 172 | } |
133 | 173 | |
134 | 174 | if ( !is_array( $rss->items ) ) { |
135 | | - return "<div>Failed to load RSS feed from $url!</div>"; # localize… |
| 175 | + wfLoadExtensionMessages( 'RSS' ); |
| 176 | + return '<div>' . wfMsg( 'rss-empty', $url ) . '</div>'; |
136 | 177 | } |
137 | 178 | |
138 | 179 | # Build title line |
139 | | - # $title = iconv($charset, $wgOutputEncoding, $rss->channel['title']); |
140 | | - # if( $rss->channel['link'] ) $title = "<a href='".$rss->channel['link']."'>$title</a>"; |
| 180 | + #$title = iconv( $charset, $wgOutputEncoding, $rss->channel['title'] ); |
| 181 | + #if( $rss->channel['link'] ) $title = "<a href='" . $rss->channel['link'] . "'>$title</a>"; |
141 | 182 | |
142 | 183 | $output = ''; |
143 | | - if ( $reverse ) $rss->items = array_reverse( $rss->items ); |
| 184 | + if( $reverse ) { |
| 185 | + $rss->items = array_reverse( $rss->items ); |
| 186 | + } |
144 | 187 | $description = false; |
145 | 188 | foreach ( $rss->items as $item ) { |
146 | | - if ( $item['description'] ) { |
| 189 | + if ( isset( $item['description'] ) && $item['description'] ) { |
147 | 190 | $description = true; |
148 | 191 | break; |
149 | 192 | } |
150 | 193 | } |
151 | 194 | |
152 | 195 | # Build items |
153 | | - if ( !$short and $description ) { # full item list |
| 196 | + if ( !$short && $description ) { # full item list |
154 | 197 | $output .= '<dl>'; |
155 | 198 | |
156 | 199 | foreach ( $rss->items as $item ) { |
— | — | @@ -171,7 +214,7 @@ |
172 | 215 | # Build description text if desired |
173 | 216 | if ( $item['description'] ) { |
174 | 217 | $text = trim( iconv( $charset, $wgOutputEncoding, $item['description'] ) ); |
175 | | - # Avoid pre-tags |
| 218 | + # Avoid <pre> tags |
176 | 219 | $text = str_replace( "\r", ' ', $text ); |
177 | 220 | $text = str_replace( "\n", ' ', $text ); |
178 | 221 | $text = str_replace( "\t", ' ', $text ); |
— | — | @@ -180,23 +223,29 @@ |
181 | 224 | $d_text = wfRssFilter( $text, $rssFilter ); |
182 | 225 | $d_text = wfRssFilterout( $text, $rssFilterout ); |
183 | 226 | $text = wfRssHighlight( $text, $rssHighlight ); |
184 | | - $display = $d_text or $d_title; |
| 227 | + $display = $d_text || $d_title; |
185 | 228 | } else { |
186 | 229 | $text = ''; |
187 | 230 | $display = $d_title; |
188 | 231 | } |
189 | 232 | if ( $display ) { |
190 | | - $output .= "<dt><a href='$href'><b>$title</b></a></dt>"; |
191 | | - if ( $date ) $output .= " ($pubdate)"; |
192 | | - if ( $text ) $output .= "<dd>$text <b>[<a href='$href'>?</a>]</b></dd>"; |
| 233 | + $output.= "<dt><a href='$href'><b>$title</b></a></dt>"; |
| 234 | + if ( $date ) { |
| 235 | + $output .= " ($pubdate)"; |
| 236 | + } |
| 237 | + if ( $text ) { |
| 238 | + $output .= "<dd>$text <b>[<a href='$href'>?</a>]</b></dd>"; |
| 239 | + } |
193 | 240 | } |
194 | 241 | # Cut off output when maxheads is reached: |
195 | | - if ( ++$headcnt == $maxheads ) break; |
| 242 | + if ( ++$headcnt == $maxheads ) { |
| 243 | + break; |
| 244 | + } |
196 | 245 | } |
197 | 246 | |
198 | 247 | $output .= '</dl>'; |
199 | 248 | } else { # short item list |
200 | | - # # HACKY HACKY HACKY |
| 249 | + ## HACKY HACKY HACKY |
201 | 250 | $output .= '<ul>'; |
202 | 251 | $displayed = array(); |
203 | 252 | foreach ( $rss->items as $item ) { |
— | — | @@ -205,26 +254,29 @@ |
206 | 255 | $d_title = wfRssFilter( $title, $rssFilter ) && wfRssFilterout( $title, $rssFilterout ); |
207 | 256 | $title = wfRssHighlight( $title, $rssHighlight ); |
208 | 257 | if ( $date ) { |
209 | | - $pubdate = trim( iconv( $charset, $wgOutputEncoding, $item['pubdate'] ) ); |
| 258 | + $pubdate = isset( $item['pubdate'] ) ? trim( iconv( $charset, $wgOutputEncoding, $item['pubdate'] ) ) : ''; |
210 | 259 | if ( $pubdate == '' ) { |
211 | 260 | $pubdate = trim( iconv( $charset, $wgOutputEncoding, $item['dc']['date'] ) ); |
212 | | - } |
| 261 | + } |
213 | 262 | $pubdate = date( $date, strtotime( $pubdate ) ); |
214 | 263 | } |
| 264 | + |
215 | 265 | if ( $d_title && !in_array( $title, $displayed ) ) { |
216 | 266 | // Add date to ouput if specified |
217 | 267 | $output .= '<li><a href="' . $href . '" title="' . $title . '">' . $title . '</a>'; |
218 | | - if ( $date ) { |
| 268 | + if( $date ) { |
219 | 269 | $output .= " ($pubdate)"; |
220 | 270 | } |
221 | 271 | $output .= '</li>'; |
222 | 272 | |
223 | 273 | $displayed[] = $title; |
224 | 274 | # Cut off output when maxheads is reached: |
225 | | - if ( ++$headcnt == $maxheads ) break; |
| 275 | + if ( ++$headcnt == $maxheads ) { |
| 276 | + break; |
| 277 | + } |
226 | 278 | } |
227 | 279 | } |
228 | | - $output .= '</ul>'; |
| 280 | + $output.= '</ul>'; |
229 | 281 | } |
230 | 282 | |
231 | 283 | return $output; |
— | — | @@ -233,7 +285,7 @@ |
234 | 286 | function wfRssFilter( $text, $rssFilter ) { |
235 | 287 | $display = true; |
236 | 288 | if ( is_array( $rssFilter ) ) { |
237 | | - foreach ( $rssFilter as $term ) { |
| 289 | + foreach( $rssFilter as $term ) { |
238 | 290 | if ( $term ) { |
239 | 291 | $display = false; |
240 | 292 | if ( preg_match( "|$term|i", $text, $a ) ) { |
— | — | @@ -241,7 +293,9 @@ |
242 | 294 | return $display; |
243 | 295 | } |
244 | 296 | } |
245 | | - if ( $display ) break; |
| 297 | + if ( $display ) { |
| 298 | + break; |
| 299 | + } |
246 | 300 | } |
247 | 301 | } |
248 | 302 | return $display; |
— | — | @@ -275,11 +329,13 @@ |
276 | 330 | $count_color = count( $color ); |
277 | 331 | |
278 | 332 | if ( is_array( $rssHighlight ) ) { |
279 | | - foreach ( $rssHighlight as $term ) { |
| 333 | + foreach( $rssHighlight as $term ) { |
280 | 334 | if ( $term ) { |
281 | 335 | $text = preg_replace( "|\b(\w*?" . $term . "\w*?)\b|i", "$starttag" . "_" . $i . "\\1$endtag", $text ); |
282 | 336 | $i++; |
283 | | - if ( $i == $count_color ) $i = 0; |
| 337 | + if ( $i == $count_color ) { |
| 338 | + $i = 0; |
| 339 | + } |
284 | 340 | } |
285 | 341 | } |
286 | 342 | } |
— | — | @@ -291,5 +347,4 @@ |
292 | 348 | } |
293 | 349 | |
294 | 350 | return $text; |
295 | | -} |
296 | | -# PHP closing tag intentionally left blank |
\ No newline at end of file |
| 351 | +} |
\ No newline at end of file |