Index: trunk/extensions/RSS/Snoopy.class.php |
— | — | @@ -1,1241 +0,0 @@ |
2 | | -<?php |
3 | | - |
4 | | -/************************************************* |
5 | | - |
6 | | -Snoopy - the PHP net client |
7 | | -Author: Monte Ohrt <monte@ispi.net> |
8 | | -Copyright (c): 1999-2008 New Digital Group, all rights reserved |
9 | | -Version: 1.2.5-dev (revision 1.27) |
10 | | -Note: some coding style changes by Jack Phoenix <jack@countervandalism.net> |
11 | | - var -> public, added some braces, double quotes -> single quotes, etc. |
12 | | - also added the gzip support stuff from MagpieRSS' Snoopy to this ver |
13 | | - |
14 | | - * This library is free software; you can redistribute it and/or |
15 | | - * modify it under the terms of the GNU Lesser General Public |
16 | | - * License as published by the Free Software Foundation; either |
17 | | - * version 2.1 of the License, or (at your option) any later version. |
18 | | - * |
19 | | - * This library is distributed in the hope that it will be useful, |
20 | | - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
21 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
22 | | - * Lesser General Public License for more details. |
23 | | - * |
24 | | - * You should have received a copy of the GNU Lesser General Public |
25 | | - * License along with this library; if not, write to the Free Software |
26 | | - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
27 | | - |
28 | | -You may contact the author of Snoopy by e-mail at: |
29 | | -monte@ohrt.com |
30 | | - |
31 | | -The latest version of Snoopy can be obtained from: |
32 | | -http://snoopy.sourceforge.net/ |
33 | | - |
34 | | -*************************************************/ |
35 | | - |
36 | | -class Snoopy { |
37 | | - /**** Public variables ****/ |
38 | | - |
39 | | - /* user definable vars */ |
40 | | - public $host = 'www.php.net'; // host name we are connecting to |
41 | | - public $port = 80; // port we are connecting to |
42 | | - public $proxy_host = ''; // proxy host to use |
43 | | - public $proxy_port = ''; // proxy port to use |
44 | | - public $proxy_user = ''; // proxy user to use |
45 | | - public $proxy_pass = ''; // proxy password to use |
46 | | - |
47 | | - public $agent = 'Snoopy v1.2.5-dev'; // agent we masquerade as |
48 | | - public $referer = ''; // referer info to pass |
49 | | - public $cookies = array(); // array of cookies to pass |
50 | | - // $cookies['username'] = 'joe'; |
51 | | - public $rawheaders = array(); // array of raw headers to send |
52 | | - // $rawheaders['Content-type'] = 'text/html'; |
53 | | - |
54 | | - public $maxredirs = 5; // http redirection depth maximum. 0 = disallow |
55 | | - public $lastredirectaddr = ''; // contains address of last redirected address |
56 | | - public $offsiteok = true; // allows redirection off-site |
57 | | - public $maxframes = 0; // frame content depth maximum. 0 = disallow |
58 | | - public $expandlinks = true; // expand links to fully qualified URLs. |
59 | | - // this only applies to fetchlinks() |
60 | | - // submitlinks(), and submittext() |
61 | | - public $passcookies = true; // pass set cookies back through redirects |
62 | | - // NOTE: this currently does not respect |
63 | | - // dates, domains or paths. |
64 | | - |
65 | | - public $user = ''; // user for http authentication |
66 | | - public $pass = ''; // password for http authentication |
67 | | - |
68 | | - // http accept types |
69 | | - public $accept = 'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*'; |
70 | | - |
71 | | - public $results = ''; // where the content is put |
72 | | - |
73 | | - public $error = ''; // error messages sent here |
74 | | - public $response_code = ''; // response code returned from server |
75 | | - public $headers = array(); // headers returned from server sent here |
76 | | - public $maxlength = 500000; // max return data length (body) |
77 | | - public $read_timeout = 0; // timeout on read operations, in seconds |
78 | | - // supported only since PHP 4 Beta 4 |
79 | | - // set to 0 to disallow timeouts |
80 | | - public $timed_out = false; // if a read operation timed out |
81 | | - public $status = 0; // http request status |
82 | | - |
83 | | - public $temp_dir = '/tmp'; // temporary directory that the webserver |
84 | | - // has permission to write to. |
85 | | - // under Windows, this should be C:\temp |
86 | | - |
87 | | - public $curl_path = '/usr/local/bin/curl'; |
88 | | - // Snoopy will use cURL for fetching |
89 | | - // SSL content if a full system path to |
90 | | - // the cURL binary is supplied here. |
91 | | - // set to false if you do not have |
92 | | - // cURL installed. See http://curl.haxx.se |
93 | | - // for details on installing cURL. |
94 | | - // Snoopy does *not* use the cURL |
95 | | - // library functions built into php, |
96 | | - // as these functions are not stable |
97 | | - // as of this Snoopy release. |
98 | | - |
99 | | - // send Accept-encoding: gzip? |
100 | | - public $use_gzip = true; |
101 | | - |
102 | | - /**** Private variables ****/ |
103 | | - var $_maxlinelen = 4096; // max line length (headers) |
104 | | - |
105 | | - var $_httpmethod = 'GET'; // default http request method |
106 | | - var $_httpversion = 'HTTP/1.0'; // default http request version |
107 | | - var $_submit_method = 'POST'; // default submit method |
108 | | - var $_submit_type = 'application/x-www-form-urlencoded'; // default submit type |
109 | | - var $_mime_boundary = ''; // MIME boundary for multipart/form-data submit type |
110 | | - var $_redirectaddr = false; // will be set if page fetched is a redirect |
111 | | - var $_redirectdepth = 0; // increments on an http redirect |
112 | | - var $_frameurls = array(); // frame src urls |
113 | | - var $_framedepth = 0; // increments on frame depth |
114 | | - |
115 | | - var $_isproxy = false; // set if using a proxy server |
116 | | - var $_fp_timeout = 30; // timeout for socket connection |
117 | | - |
118 | | - /*======================================================================*\ |
119 | | - Function: fetch |
120 | | - Purpose: fetch the contents of a web page |
121 | | - (and possibly other protocols in the |
122 | | - future like ftp, nntp, gopher, etc.) |
123 | | - Input: $URI the location of the page to fetch |
124 | | - Output: $this->results the output text from the fetch |
125 | | - \*======================================================================*/ |
126 | | - function fetch( $URI ) { |
127 | | - //preg_match( "|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|", $URI, $URI_PARTS ); |
128 | | - $URI_PARTS = parse_url( $URI ); |
129 | | - if ( !empty( $URI_PARTS['user'] ) ) { |
130 | | - $this->user = $URI_PARTS['user']; |
131 | | - } |
132 | | - if ( !empty( $URI_PARTS['pass'] ) ) { |
133 | | - $this->pass = $URI_PARTS['pass']; |
134 | | - } |
135 | | - if ( empty( $URI_PARTS['query'] ) ) { |
136 | | - $URI_PARTS['query'] = ''; |
137 | | - } |
138 | | - if ( empty( $URI_PARTS['path'] ) ) { |
139 | | - $URI_PARTS['path'] = ''; |
140 | | - } |
141 | | - |
142 | | - switch( strtolower( $URI_PARTS['scheme'] ) ) { |
143 | | - case 'http': |
144 | | - $this->host = $URI_PARTS['host']; |
145 | | - if( !empty( $URI_PARTS['port'] ) ) { |
146 | | - $this->port = $URI_PARTS['port']; |
147 | | - } |
148 | | - if( $this->_connect( $fp ) ) { |
149 | | - if( $this->_isproxy ) { |
150 | | - // using proxy, send entire URI |
151 | | - $this->_httprequest( $URI, $fp, $URI, $this->_httpmethod ); |
152 | | - } else { |
153 | | - $path = $URI_PARTS['path'] . ( isset( $URI_PARTS['query'] ) ? '?' . $URI_PARTS['query'] : '' ); |
154 | | - // no proxy, send only the path |
155 | | - $this->_httprequest( $path, $fp, $URI, $this->_httpmethod ); |
156 | | - } |
157 | | - |
158 | | - $this->_disconnect( $fp ); |
159 | | - |
160 | | - if( $this->_redirectaddr ) { |
161 | | - /* url was redirected, check if we've hit the max depth */ |
162 | | - if( $this->maxredirs > $this->_redirectdepth ) { |
163 | | - // only follow redirect if it's on this site, or offsiteok is true |
164 | | - if( preg_match( "|^http://" . preg_quote( $this->host ) . "|i", $this->_redirectaddr ) || $this->offsiteok ) |
165 | | - { |
166 | | - /* follow the redirect */ |
167 | | - $this->_redirectdepth++; |
168 | | - $this->lastredirectaddr = $this->_redirectaddr; |
169 | | - $this->fetch( $this->_redirectaddr ); |
170 | | - } |
171 | | - } |
172 | | - } |
173 | | - |
174 | | - if( $this->_framedepth < $this->maxframes && count( $this->_frameurls ) > 0 ) |
175 | | - { |
176 | | - $frameurls = $this->_frameurls; |
177 | | - $this->_frameurls = array(); |
178 | | - |
179 | | - while( list( , $frameurl ) = each( $frameurls ) ) { |
180 | | - if( $this->_framedepth < $this->maxframes ) { |
181 | | - $this->fetch( $frameurl ); |
182 | | - $this->_framedepth++; |
183 | | - } else { |
184 | | - break; |
185 | | - } |
186 | | - } |
187 | | - } |
188 | | - } else { |
189 | | - return false; |
190 | | - } |
191 | | - return true; |
192 | | - break; |
193 | | - case 'https': |
194 | | - if( !$this->curl_path ) { |
195 | | - return false; |
196 | | - } |
197 | | - if( function_exists( 'is_executable' ) ) { |
198 | | - if ( !is_executable( $this->curl_path ) ) { |
199 | | - $this->error = "Bad curl ($this->curl_path), can't fetch HTTPS \n"; |
200 | | - return false; |
201 | | - } |
202 | | - } |
203 | | - $this->host = $URI_PARTS['host']; |
204 | | - if( !empty( $URI_PARTS['port'] ) ) { |
205 | | - $this->port = $URI_PARTS['port']; |
206 | | - } |
207 | | - if( $this->_isproxy ) { |
208 | | - // using proxy, send entire URI |
209 | | - $this->_httpsrequest( $URI, $URI, $this->_httpmethod ); |
210 | | - } else { |
211 | | - $path = $URI_PARTS['path'] . ( $URI_PARTS['query'] ? '?' . $URI_PARTS['query'] : '' ); |
212 | | - // no proxy, send only the path |
213 | | - $this->_httpsrequest( $path, $URI, $this->_httpmethod ); |
214 | | - } |
215 | | - |
216 | | - if( $this->_redirectaddr ) { |
217 | | - /* url was redirected, check if we've hit the max depth */ |
218 | | - if( $this->maxredirs > $this->_redirectdepth ) { |
219 | | - // only follow redirect if it's on this site, or offsiteok is true |
220 | | - if( preg_match( "|^http://" . preg_quote( $this->host ) . "|i", $this->_redirectaddr ) || $this->offsiteok ) |
221 | | - { |
222 | | - /* follow the redirect */ |
223 | | - $this->_redirectdepth++; |
224 | | - $this->lastredirectaddr = $this->_redirectaddr; |
225 | | - $this->fetch( $this->_redirectaddr ); |
226 | | - } |
227 | | - } |
228 | | - } |
229 | | - |
230 | | - if( $this->_framedepth < $this->maxframes && count( $this->_frameurls ) > 0 ) |
231 | | - { |
232 | | - $frameurls = $this->_frameurls; |
233 | | - $this->_frameurls = array(); |
234 | | - |
235 | | - while( list( , $frameurl ) = each( $frameurls ) ) { |
236 | | - if( $this->_framedepth < $this->maxframes ) { |
237 | | - $this->fetch( $frameurl ); |
238 | | - $this->_framedepth++; |
239 | | - } else { |
240 | | - break; |
241 | | - } |
242 | | - } |
243 | | - } |
244 | | - return true; |
245 | | - break; |
246 | | - default: |
247 | | - // not a valid protocol |
248 | | - $this->error = 'Invalid protocol "' . $URI_PARTS['scheme'] . '"\n'; |
249 | | - return false; |
250 | | - break; |
251 | | - } |
252 | | - return true; |
253 | | - } |
254 | | - |
255 | | - /*======================================================================*\ |
256 | | - Function: submit |
257 | | - Purpose: submit an HTTP form |
258 | | - Input: $URI the location to post the data |
259 | | - $formvars the formvars to use. |
260 | | - format: $formvars['var'] = 'val'; |
261 | | - $formfiles an array of files to submit |
262 | | - format: $formfiles['var'] = '/dir/filename.ext'; |
263 | | - Output: $this->results the text output from the post |
264 | | - \*======================================================================*/ |
265 | | - function submit( $URI, $formvars = '', $formfiles = '' ) { |
266 | | - unset( $postdata ); |
267 | | - |
268 | | - $postdata = $this->_prepare_post_body( $formvars, $formfiles ); |
269 | | - |
270 | | - $URI_PARTS = parse_url( $URI ); |
271 | | - if ( !empty( $URI_PARTS['user'] ) ) { |
272 | | - $this->user = $URI_PARTS['user']; |
273 | | - } |
274 | | - if ( !empty( $URI_PARTS['pass'] ) ) { |
275 | | - $this->pass = $URI_PARTS['pass']; |
276 | | - } |
277 | | - if ( empty( $URI_PARTS['query'] ) ) { |
278 | | - $URI_PARTS['query'] = ''; |
279 | | - } |
280 | | - if ( empty( $URI_PARTS['path'] ) ) { |
281 | | - $URI_PARTS['path'] = ''; |
282 | | - } |
283 | | - |
284 | | - switch( strtolower( $URI_PARTS['scheme'] ) ) { |
285 | | - case 'http': |
286 | | - $this->host = $URI_PARTS['host']; |
287 | | - if( !empty( $URI_PARTS['port'] ) ) { |
288 | | - $this->port = $URI_PARTS['port']; |
289 | | - } |
290 | | - if( $this->_connect( $fp ) ) { |
291 | | - if( $this->_isproxy ) { |
292 | | - // using proxy, send entire URI |
293 | | - $this->_httprequest( $URI, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata ); |
294 | | - } else { |
295 | | - $path = $URI_PARTS['path'] . ( $URI_PARTS['query'] ? '?' . $URI_PARTS['query'] : '' ); |
296 | | - // no proxy, send only the path |
297 | | - $this->_httprequest( |
298 | | - $path, $fp, $URI, |
299 | | - $this->_submit_method, |
300 | | - $this->_submit_type, |
301 | | - $postdata |
302 | | - ); |
303 | | - } |
304 | | - |
305 | | - $this->_disconnect( $fp ); |
306 | | - |
307 | | - if( $this->_redirectaddr ) { |
308 | | - /* url was redirected, check if we've hit the max depth */ |
309 | | - if( $this->maxredirs > $this->_redirectdepth ) { |
310 | | - if( !preg_match( "|^" . $URI_PARTS['scheme'] . "://|", $this->_redirectaddr ) ) { |
311 | | - $this->_redirectaddr = $this->_expandlinks( $this->_redirectaddr, $URI_PARTS['scheme'] . '://' . $URI_PARTS['host'] ); |
312 | | - } |
313 | | - |
314 | | - // only follow redirect if it's on this site, or offsiteok is true |
315 | | - if( preg_match( "|^http://" . preg_quote( $this->host ) . "|i", $this->_redirectaddr ) || $this->offsiteok ) |
316 | | - { |
317 | | - /* follow the redirect */ |
318 | | - $this->_redirectdepth++; |
319 | | - $this->lastredirectaddr = $this->_redirectaddr; |
320 | | - if( strpos( $this->_redirectaddr, '?' ) > 0 ) { |
321 | | - $this->fetch( $this->_redirectaddr ); // the redirect has changed the request method from post to get |
322 | | - } else { |
323 | | - $this->submit( $this->_redirectaddr, $formvars, $formfiles ); |
324 | | - } |
325 | | - } |
326 | | - } |
327 | | - } |
328 | | - |
329 | | - if( $this->_framedepth < $this->maxframes && count( $this->_frameurls ) > 0 ) |
330 | | - { |
331 | | - $frameurls = $this->_frameurls; |
332 | | - $this->_frameurls = array(); |
333 | | - |
334 | | - while( list( , $frameurl ) = each( $frameurls ) ) { |
335 | | - if( $this->_framedepth < $this->maxframes ) { |
336 | | - $this->fetch( $frameurl ); |
337 | | - $this->_framedepth++; |
338 | | - } else { |
339 | | - break; |
340 | | - } |
341 | | - } |
342 | | - } |
343 | | - } else { |
344 | | - return false; |
345 | | - } |
346 | | - return true; |
347 | | - break; |
348 | | - case 'https': |
349 | | - if( !$this->curl_path ) { |
350 | | - return false; |
351 | | - } |
352 | | - if( function_exists( 'is_executable' ) ) { |
353 | | - if ( !is_executable( $this->curl_path ) ) { |
354 | | - return false; |
355 | | - } |
356 | | - } |
357 | | - $this->host = $URI_PARTS['host']; |
358 | | - if( !empty( $URI_PARTS['port'] ) ) { |
359 | | - $this->port = $URI_PARTS['port']; |
360 | | - } |
361 | | - if( $this->_isproxy ) { |
362 | | - // using proxy, send entire URI |
363 | | - $this->_httpsrequest( |
364 | | - $URI, |
365 | | - $URI, |
366 | | - $this->_submit_method, |
367 | | - $this->_submit_type, |
368 | | - $postdata |
369 | | - ); |
370 | | - } else { |
371 | | - $path = $URI_PARTS['path'] . ( $URI_PARTS['query'] ? '?' . $URI_PARTS['query'] : '' ); |
372 | | - // no proxy, send only the path |
373 | | - $this->_httpsrequest( |
374 | | - $path, |
375 | | - $URI, |
376 | | - $this->_submit_method, |
377 | | - $this->_submit_type, |
378 | | - $postdata |
379 | | - ); |
380 | | - } |
381 | | - |
382 | | - if( $this->_redirectaddr ) { |
383 | | - /* url was redirected, check if we've hit the max depth */ |
384 | | - if( $this->maxredirs > $this->_redirectdepth ) { |
385 | | - if( !preg_match( "|^" . $URI_PARTS['scheme'] . "://|", $this->_redirectaddr ) ) { |
386 | | - $this->_redirectaddr = $this->_expandlinks( |
387 | | - $this->_redirectaddr, |
388 | | - $URI_PARTS['scheme'] . '://' . $URI_PARTS['host'] |
389 | | - ); |
390 | | - } |
391 | | - |
392 | | - // only follow redirect if it's on this site, or offsiteok is true |
393 | | - if( preg_match( "|^http://" . preg_quote( $this->host ) . "|i", $this->_redirectaddr ) || $this->offsiteok ) |
394 | | - { |
395 | | - /* follow the redirect */ |
396 | | - $this->_redirectdepth++; |
397 | | - $this->lastredirectaddr = $this->_redirectaddr; |
398 | | - if( strpos( $this->_redirectaddr, '?' ) > 0 ) { |
399 | | - $this->fetch( $this->_redirectaddr ); // the redirect has changed the request method from post to get |
400 | | - } else { |
401 | | - $this->submit( $this->_redirectaddr, $formvars, $formfiles ); |
402 | | - } |
403 | | - } |
404 | | - } |
405 | | - } |
406 | | - |
407 | | - if( $this->_framedepth < $this->maxframes && count( $this->_frameurls ) > 0 ) |
408 | | - { |
409 | | - $frameurls = $this->_frameurls; |
410 | | - $this->_frameurls = array(); |
411 | | - |
412 | | - while( list( , $frameurl ) = each( $frameurls ) ) { |
413 | | - if( $this->_framedepth < $this->maxframes ) { |
414 | | - $this->fetch( $frameurl ); |
415 | | - $this->_framedepth++; |
416 | | - } else { |
417 | | - break; |
418 | | - } |
419 | | - } |
420 | | - } |
421 | | - return true; |
422 | | - break; |
423 | | - |
424 | | - default: |
425 | | - // not a valid protocol |
426 | | - $this->error = 'Invalid protocol "' . $URI_PARTS['scheme'] . '"\n'; |
427 | | - return false; |
428 | | - break; |
429 | | - } |
430 | | - return true; |
431 | | - } |
432 | | - |
433 | | - /*======================================================================*\ |
434 | | - Function: fetchlinks |
435 | | - Purpose: fetch the links from a web page |
436 | | - Input: $URI where you are fetching from |
437 | | - Output: $this->results an array of the URLs |
438 | | - \*======================================================================*/ |
439 | | - function fetchlinks( $URI ) { |
440 | | - if ( $this->fetch( $URI ) ) { |
441 | | - if( $this->lastredirectaddr ) { |
442 | | - $URI = $this->lastredirectaddr; |
443 | | - } |
444 | | - if( is_array( $this->results ) ) { |
445 | | - for( $x = 0; $x < count( $this->results ); $x++ ) { |
446 | | - $this->results[$x] = $this->_striplinks( $this->results[$x] ); |
447 | | - } |
448 | | - } else { |
449 | | - $this->results = $this->_striplinks( $this->results ); |
450 | | - } |
451 | | - |
452 | | - if( $this->expandlinks ) { |
453 | | - $this->results = $this->_expandlinks( $this->results, $URI ); |
454 | | - } |
455 | | - return true; |
456 | | - } else { |
457 | | - return false; |
458 | | - } |
459 | | - } |
460 | | - |
461 | | - /*======================================================================*\ |
462 | | - Function: fetchform |
463 | | - Purpose: fetch the form elements from a web page |
464 | | - Input: $URI where you are fetching from |
465 | | - Output: $this->results the resulting html form |
466 | | - \*======================================================================*/ |
467 | | - function fetchform( $URI ) { |
468 | | - if ( $this->fetch( $URI ) ) { |
469 | | - if( is_array( $this->results ) ) { |
470 | | - for( $x = 0; $x < count( $this->results ); $x++ ) { |
471 | | - $this->results[$x] = $this->_stripform( $this->results[$x] ); |
472 | | - } |
473 | | - } else { |
474 | | - $this->results = $this->_stripform( $this->results ); |
475 | | - } |
476 | | - |
477 | | - return true; |
478 | | - } else { |
479 | | - return false; |
480 | | - } |
481 | | - } |
482 | | - |
483 | | - |
484 | | - /*======================================================================*\ |
485 | | - Function: fetchtext |
486 | | - Purpose: fetch the text from a web page, stripping the links |
487 | | - Input: $URI where you are fetching from |
488 | | - Output: $this->results the text from the web page |
489 | | - \*======================================================================*/ |
490 | | - function fetchtext( $URI ) { |
491 | | - if( $this->fetch( $URI ) ) { |
492 | | - if( is_array( $this->results ) ) { |
493 | | - for( $x = 0; $x < count( $this->results ); $x++ ) { |
494 | | - $this->results[$x] = $this->_striptext( $this->results[$x] ); |
495 | | - } |
496 | | - } else { |
497 | | - $this->results = $this->_striptext( $this->results ); |
498 | | - } |
499 | | - return true; |
500 | | - } else { |
501 | | - return false; |
502 | | - } |
503 | | - } |
504 | | - |
505 | | - /*======================================================================*\ |
506 | | - Function: submitlinks |
507 | | - Purpose: grab links from a form submission |
508 | | - Input: $URI where you are submitting from |
509 | | - Output: $this->results an array of the links from the post |
510 | | - \*======================================================================*/ |
511 | | - function submitlinks( $URI, $formvars = '', $formfiles = '' ) { |
512 | | - if( $this->submit( $URI, $formvars, $formfiles ) ) { |
513 | | - if( $this->lastredirectaddr ) { |
514 | | - $URI = $this->lastredirectaddr; |
515 | | - } |
516 | | - if( is_array( $this->results ) ) { |
517 | | - for( $x = 0; $x < count( $this->results ); $x++ ) { |
518 | | - $this->results[$x] = $this->_striplinks( $this->results[$x] ); |
519 | | - if( $this->expandlinks ) { |
520 | | - $this->results[$x] = $this->_expandlinks( $this->results[$x], $URI ); |
521 | | - } |
522 | | - } |
523 | | - } else { |
524 | | - $this->results = $this->_striplinks( $this->results ); |
525 | | - if( $this->expandlinks ) { |
526 | | - $this->results = $this->_expandlinks( $this->results, $URI ); |
527 | | - } |
528 | | - } |
529 | | - return true; |
530 | | - } else { |
531 | | - return false; |
532 | | - } |
533 | | - } |
534 | | - |
535 | | - /*======================================================================*\ |
536 | | - Function: submittext |
537 | | - Purpose: grab text from a form submission |
538 | | - Input: $URI where you are submitting from |
539 | | - Output: $this->results the text from the web page |
540 | | - \*======================================================================*/ |
541 | | - function submittext( $URI, $formvars = '', $formfiles = '' ) { |
542 | | - if( $this->submit( $URI, $formvars, $formfiles ) ) { |
543 | | - if( $this->lastredirectaddr ) { |
544 | | - $URI = $this->lastredirectaddr; |
545 | | - } |
546 | | - if( is_array( $this->results ) ) { |
547 | | - for( $x = 0; $x < count( $this->results ); $x++ ) { |
548 | | - $this->results[$x] = $this->_striptext( $this->results[$x] ); |
549 | | - if( $this->expandlinks ) { |
550 | | - $this->results[$x] = $this->_expandlinks( $this->results[$x], $URI ); |
551 | | - } |
552 | | - } |
553 | | - } else { |
554 | | - $this->results = $this->_striptext( $this->results ); |
555 | | - if( $this->expandlinks ) { |
556 | | - $this->results = $this->_expandlinks( $this->results, $URI ); |
557 | | - } |
558 | | - } |
559 | | - return true; |
560 | | - } else { |
561 | | - return false; |
562 | | - } |
563 | | - } |
564 | | - |
565 | | - /*======================================================================*\ |
566 | | - Function: set_submit_multipart |
567 | | - Purpose: Set the form submission content type to |
568 | | - multipart/form-data |
569 | | - \*======================================================================*/ |
570 | | - function set_submit_multipart() { |
571 | | - $this->_submit_type = 'multipart/form-data'; |
572 | | - } |
573 | | - |
574 | | - /*======================================================================*\ |
575 | | - Function: set_submit_normal |
576 | | - Purpose: Set the form submission content type to |
577 | | - application/x-www-form-urlencoded |
578 | | - \*======================================================================*/ |
579 | | - function set_submit_normal() { |
580 | | - $this->_submit_type = 'application/x-www-form-urlencoded'; |
581 | | - } |
582 | | - |
583 | | - /*======================================================================*\ |
584 | | - Private functions |
585 | | - \*======================================================================*/ |
586 | | - |
587 | | - /*======================================================================*\ |
588 | | - Function: _striplinks |
589 | | - Purpose: strip the hyperlinks from an html document |
590 | | - Input: $document document to strip. |
591 | | - Output: $match an array of the links |
592 | | - \*======================================================================*/ |
593 | | - function _striplinks( $document ) { |
594 | | - preg_match_all("'<\s*a\s.*?href\s*=\s* # find <a href= |
595 | | - ([\"\'])? # find single or double quote |
596 | | - (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching |
597 | | - # quote, otherwise match up to next space |
598 | | - 'isx", $document, $links |
599 | | - ); |
600 | | - |
601 | | - // catenate the non-empty matches from the conditional subpattern |
602 | | - while( list( $key, $val ) = each( $links[2] ) ) { |
603 | | - if( !empty( $val ) ) { |
604 | | - $match[] = $val; |
605 | | - } |
606 | | - } |
607 | | - |
608 | | - while( list( $key, $val ) = each( $links[3] ) ) { |
609 | | - if( !empty( $val ) ) { |
610 | | - $match[] = $val; |
611 | | - } |
612 | | - } |
613 | | - |
614 | | - // return the links |
615 | | - return $match; |
616 | | - } |
617 | | - |
618 | | - /*======================================================================*\ |
619 | | - Function: _stripform |
620 | | - Purpose: strip the form elements from an HTML document |
621 | | - Input: $document document to strip. |
622 | | - Output: $match an array of the links |
623 | | - \*======================================================================*/ |
624 | | - function _stripform( $document ) { |
625 | | - preg_match_all( |
626 | | - "'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi", |
627 | | - $document, |
628 | | - $elements |
629 | | - ); |
630 | | - |
631 | | - // catenate the matches |
632 | | - $match = implode( "\r\n", $elements[0] ); |
633 | | - |
634 | | - // return the links |
635 | | - return $match; |
636 | | - } |
637 | | - |
638 | | - /*======================================================================*\ |
639 | | - Function: _striptext |
640 | | - Purpose: strip the text from an html document |
641 | | - Input: $document document to strip. |
642 | | - Output: $text the resulting text |
643 | | - \*======================================================================*/ |
644 | | - function _striptext( $document ) { |
645 | | - // I didn't use preg eval (//e) since that is only available in PHP 4.0. |
646 | | - // so, list your entities one by one here. I included some of the |
647 | | - // more common ones. |
648 | | - $search = array( |
649 | | - "'<script[^>]*?>.*?</script>'si", // strip out JavaScript |
650 | | - "'<[\/\!]*?[^<>]*?>'si", // strip out HTML tags |
651 | | - "'([\r\n])[\s]+'", // strip out white space |
652 | | - "'&(quot|#34|#034|#x22);'i", // replace HTML entities |
653 | | - "'&(amp|#38|#038|#x26);'i", // added hexadecimal values |
654 | | - "'&(lt|#60|#060|#x3c);'i", |
655 | | - "'&(gt|#62|#062|#x3e);'i", |
656 | | - "'&(nbsp|#160|#xa0);'i", |
657 | | - "'&(iexcl|#161);'i", |
658 | | - "'&(cent|#162);'i", |
659 | | - "'&(pound|#163);'i", |
660 | | - "'&(copy|#169);'i", |
661 | | - "'&(reg|#174);'i", |
662 | | - "'&(deg|#176);'i", |
663 | | - "'&(#39|#039|#x27);'", |
664 | | - "'&(euro|#8364);'i", // Europe |
665 | | - "'&a(uml|UML);'", // German |
666 | | - "'&o(uml|UML);'", |
667 | | - "'&u(uml|UML);'", |
668 | | - "'&A(uml|UML);'", |
669 | | - "'&O(uml|UML);'", |
670 | | - "'&U(uml|UML);'", |
671 | | - "'ß'i", |
672 | | - ); |
673 | | - $replace = array( |
674 | | - '', |
675 | | - '', |
676 | | - "\\1", |
677 | | - "\"", |
678 | | - '&', |
679 | | - '<', |
680 | | - '>', |
681 | | - ' ', |
682 | | - chr( 161 ), |
683 | | - chr( 162 ), |
684 | | - chr( 163 ), |
685 | | - chr( 169 ), |
686 | | - chr( 174 ), |
687 | | - chr( 176 ), |
688 | | - chr( 39 ), |
689 | | - chr( 128 ), |
690 | | - 'ä', |
691 | | - 'ö', |
692 | | - 'ü', |
693 | | - 'Ä', |
694 | | - 'Ö', |
695 | | - 'Ü', |
696 | | - 'ß', |
697 | | - ); |
698 | | - |
699 | | - $text = preg_replace( $search, $replace, $document ); |
700 | | - |
701 | | - return $text; |
702 | | - } |
703 | | - |
704 | | - /*======================================================================*\ |
705 | | - Function: _expandlinks |
706 | | - Purpose: expand each link into a fully qualified URL |
707 | | - Input: $links the links to qualify |
708 | | - $URI the full URI to get the base from |
709 | | - Output: $expandedLinks the expanded links |
710 | | - \*======================================================================*/ |
711 | | - function _expandlinks( $links, $URI ) { |
712 | | - preg_match( "/^[^\?]+/", $URI, $match ); |
713 | | - |
714 | | - $match = preg_replace( "|/[^\/\.]+\.[^\/\.]+$|", '', $match[0] ); |
715 | | - $match = preg_replace( "|/$|", '', $match ); |
716 | | - $match_part = parse_url( $match ); |
717 | | - $match_root = $match_part['scheme'] . '://' . $match_part['host']; |
718 | | - |
719 | | - $search = array( |
720 | | - "|^http://" . preg_quote( $this->host ) . "|i", |
721 | | - "|^(\/)|i", |
722 | | - "|^(?!http://)(?!mailto:)|i", |
723 | | - "|/\./|", |
724 | | - "|/[^\/]+/\.\./|" |
725 | | - ); |
726 | | - |
727 | | - $replace = array( |
728 | | - '', |
729 | | - $match_root . '/', |
730 | | - $match . '/', |
731 | | - '/', |
732 | | - '/' |
733 | | - ); |
734 | | - |
735 | | - $expandedLinks = preg_replace( $search, $replace, $links ); |
736 | | - |
737 | | - return $expandedLinks; |
738 | | - } |
739 | | - |
740 | | - /*======================================================================*\ |
741 | | - Function: _httprequest |
742 | | - Purpose: go get the http data from the server |
743 | | - Input: $url the url to fetch |
744 | | - $fp the current open file pointer |
745 | | - $URI the full URI |
746 | | - $body body contents to send if any (POST) |
747 | | - Output: |
748 | | - \*======================================================================*/ |
749 | | - function _httprequest( $url, $fp, $URI, $http_method, $content_type = '', $body = '' ) { |
750 | | - $cookie_headers = ''; |
751 | | - if( $this->passcookies && $this->_redirectaddr ) { |
752 | | - $this->setcookies(); |
753 | | - } |
754 | | - |
755 | | - $URI_PARTS = parse_url( $URI ); |
756 | | - if( empty( $url ) ) { |
757 | | - $url = '/'; |
758 | | - } |
759 | | - $headers = $http_method . ' ' . $url . ' ' . $this->_httpversion . "\r\n"; |
760 | | - if( !empty( $this->agent ) ) { |
761 | | - $headers .= 'User-Agent: ' . $this->agent . "\r\n"; |
762 | | - } |
763 | | - if( !empty( $this->host ) && !isset( $this->rawheaders['Host'] ) ) { |
764 | | - $headers .= 'Host: ' . $this->host; |
765 | | - if( !empty( $this->port ) ) { |
766 | | - $headers .= ':' . $this->port; |
767 | | - } |
768 | | - $headers .= "\r\n"; |
769 | | - } |
770 | | - if( !empty( $this->accept ) ) { |
771 | | - $headers .= 'Accept: ' . $this->accept . "\r\n"; |
772 | | - } |
773 | | - |
774 | | - if( $this->use_gzip ) { |
775 | | - // make sure PHP was built with --with-zlib |
776 | | - // and we can handle gzipp'ed data |
777 | | - if ( function_exists( 'gzinflate' ) ) { |
778 | | - $headers .= "Accept-encoding: gzip\r\n"; |
779 | | - } else { |
780 | | - trigger_error( |
781 | | - 'use_gzip is on, but PHP was built without zlib support.' . |
782 | | - ' Requesting file(s) without gzip encoding.', |
783 | | - E_USER_NOTICE |
784 | | - ); |
785 | | - } |
786 | | - } |
787 | | - |
788 | | - if( !empty( $this->referer ) ) { |
789 | | - $headers .= 'Referer: ' . $this->referer . "\r\n"; |
790 | | - } |
791 | | - if( !empty( $this->cookies ) ) { |
792 | | - if( !is_array( $this->cookies ) ) { |
793 | | - $this->cookies = (array)$this->cookies; |
794 | | - } |
795 | | - |
796 | | - reset( $this->cookies ); |
797 | | - if ( count( $this->cookies ) > 0 ) { |
798 | | - $cookie_headers .= 'Cookie: '; |
799 | | - foreach ( $this->cookies as $cookieKey => $cookieVal ) { |
800 | | - $cookie_headers .= $cookieKey . '=' . urlencode( $cookieVal ) . '; '; |
801 | | - } |
802 | | - $headers .= substr( $cookie_headers, 0, -2 ) . "\r\n"; |
803 | | - } |
804 | | - } |
805 | | - if( !empty( $this->rawheaders ) ) { |
806 | | - if( !is_array( $this->rawheaders ) ) { |
807 | | - $this->rawheaders = (array)$this->rawheaders; |
808 | | - } |
809 | | - while( list( $headerKey, $headerVal ) = each( $this->rawheaders ) ) { |
810 | | - $headers .= $headerKey . ': ' . $headerVal . "\r\n"; |
811 | | - } |
812 | | - } |
813 | | - if( !empty( $content_type ) ) { |
814 | | - $headers .= "Content-type: $content_type"; |
815 | | - if ( $content_type == 'multipart/form-data' ) { |
816 | | - $headers .= '; boundary=' . $this->_mime_boundary; |
817 | | - } |
818 | | - $headers .= "\r\n"; |
819 | | - } |
820 | | - if( !empty( $body ) ) { |
821 | | - $headers .= 'Content-length: ' . strlen( $body ) . "\r\n"; |
822 | | - } |
823 | | - if( !empty( $this->user ) || !empty( $this->pass ) ) { |
824 | | - $headers .= 'Authorization: Basic ' . base64_encode( $this->user . ':' . $this->pass ) . "\r\n"; |
825 | | - } |
826 | | - |
827 | | - // add proxy auth headers |
828 | | - if( !empty( $this->proxy_user ) ) { |
829 | | - $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode( $this->proxy_user . ':' . $this->proxy_pass ) . "\r\n"; |
830 | | - } |
831 | | - |
832 | | - $headers .= "\r\n"; |
833 | | - |
834 | | - // set the read timeout if needed |
835 | | - if ( $this->read_timeout > 0 ) { |
836 | | - socket_set_timeout( $fp, $this->read_timeout ); |
837 | | - } |
838 | | - $this->timed_out = false; |
839 | | - |
840 | | - fwrite( $fp, $headers . $body, strlen( $headers . $body ) ); |
841 | | - |
842 | | - $this->_redirectaddr = false; |
843 | | - unset( $this->headers ); |
844 | | - |
845 | | - // content was returned gzip encoded? |
846 | | - $is_gzipped = false; |
847 | | - |
848 | | - while( $currentHeader = fgets( $fp, $this->_maxlinelen ) ) { |
849 | | - if ( $this->read_timeout > 0 && $this->_check_timeout( $fp ) ) { |
850 | | - $this->status = -100; |
851 | | - return false; |
852 | | - } |
853 | | - |
854 | | - //if( $currentHeader == "\r\n" ) { |
855 | | - if( preg_match( "/^\r?\n$/", $currentHeader ) ) { |
856 | | - break; |
857 | | - } |
858 | | - |
859 | | - // if a header begins with Location: or URI:, set the redirect |
860 | | - if( preg_match( "/^(Location:|URI:)/i", $currentHeader ) ) { |
861 | | - // get URL portion of the redirect |
862 | | - preg_match( "/^(Location:|URI:)[ ]+(.*)/i", chop( $currentHeader ), $matches ); |
863 | | - // look for :// in the Location header to see if hostname is included |
864 | | - if( !preg_match( "|\:\/\/|", $matches[2] ) ) { |
865 | | - // no host in the path, so prepend |
866 | | - $this->_redirectaddr = $URI_PARTS['scheme'] . '://' . $this->host . ':' . $this->port; |
867 | | - // eliminate double slash |
868 | | - if( !preg_match( "|^/|", $matches[2] ) ) { |
869 | | - $this->_redirectaddr .= '/' . $matches[2]; |
870 | | - } else { |
871 | | - $this->_redirectaddr .= $matches[2]; |
872 | | - } |
873 | | - } else { |
874 | | - $this->_redirectaddr = $matches[2]; |
875 | | - } |
876 | | - } |
877 | | - |
878 | | - if( preg_match( "|^HTTP/|", $currentHeader ) ) { |
879 | | - if( preg_match( "|^HTTP/[^\s]*\s(.*?)\s|", $currentHeader, $status ) ) { |
880 | | - $this->status = $status[1]; |
881 | | - } |
882 | | - $this->response_code = $currentHeader; |
883 | | - } |
884 | | - |
885 | | - if ( preg_match( "/Content-Encoding: gzip/", $currentHeader ) ) { |
886 | | - $is_gzipped = true; |
887 | | - } |
888 | | - |
889 | | - $this->headers[] = $currentHeader; |
890 | | - } |
891 | | - |
892 | | - $results = ''; |
893 | | - do { |
894 | | - $_data = fread( $fp, $this->maxlength ); |
895 | | - if ( strlen( $_data ) == 0 || strlen( $results ) > $this->maxlength ) { |
896 | | - break; |
897 | | - } |
898 | | - $results .= $_data; |
899 | | - } while( true ); |
900 | | - |
901 | | - // gunzip |
902 | | - if ( $is_gzipped ) { |
903 | | - // per http://www.php.net/manual/en/function.gzencode.php |
904 | | - $results = substr( $results, 10 ); |
905 | | - $results = gzinflate( $results ); |
906 | | - } |
907 | | - |
908 | | - if ( $this->read_timeout > 0 && $this->_check_timeout( $fp ) ) { |
909 | | - $this->status = -100; |
910 | | - return false; |
911 | | - } |
912 | | - |
913 | | - // check if there is a a redirect meta tag |
914 | | - if( preg_match( "'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i", $results, $match ) ) |
915 | | - { |
916 | | - $this->_redirectaddr = $this->_expandlinks( $match[1], $URI ); |
917 | | - } |
918 | | - |
919 | | - // have we hit our frame depth and is there frame src to fetch? |
920 | | - if( ( $this->_framedepth < $this->maxframes ) && preg_match_all( "'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i", $results, $match ) ) |
921 | | - { |
922 | | - $this->results[] = $results; |
923 | | - for( $x = 0; $x < count( $match[1] ); $x++ ) { |
924 | | - $this->_frameurls[] = $this->_expandlinks( $match[1][$x], $URI_PARTS['scheme'] . '://' . $this->host ); |
925 | | - } |
926 | | - } elseif( is_array( $this->results ) ) { // have we already fetched framed content? |
927 | | - $this->results[] = $results; |
928 | | - } else { // no framed content |
929 | | - $this->results = $results; |
930 | | - } |
931 | | - |
932 | | - return true; |
933 | | - } |
934 | | - |
935 | | - /*======================================================================*\ |
936 | | - Function: _httpsrequest |
937 | | - Purpose: go get the https data from the server using curl |
938 | | - Input: $url the url to fetch |
939 | | - $URI the full URI |
940 | | - $body body contents to send if any (POST) |
941 | | - Output: |
942 | | - \*======================================================================*/ |
943 | | - function _httpsrequest( $url, $URI, $http_method, $content_type = '', $body = '' ) { |
944 | | - if( $this->passcookies && $this->_redirectaddr ) { |
945 | | - $this->setcookies(); |
946 | | - } |
947 | | - |
948 | | - $headers = array(); |
949 | | - |
950 | | - $URI_PARTS = parse_url( $URI ); |
951 | | - if( empty( $url ) ) { |
952 | | - $url = '/'; |
953 | | - } |
954 | | - // GET ... header not needed for curl |
955 | | - //$headers[] = $http_method." ".$url." ".$this->_httpversion; |
956 | | - if( !empty( $this->agent ) ) { |
957 | | - $headers[] = 'User-Agent: ' . $this->agent; |
958 | | - } |
959 | | - if( !empty( $this->host ) ) { |
960 | | - if( !empty( $this->port ) ) { |
961 | | - $headers[] = 'Host: ' . $this->host . ':' . $this->port; |
962 | | - } else { |
963 | | - $headers[] = 'Host: ' . $this->host; |
964 | | - } |
965 | | - } |
966 | | - if( !empty( $this->accept ) ) { |
967 | | - $headers[] = 'Accept: ' . $this->accept; |
968 | | - } |
969 | | - if( !empty( $this->referer ) ) { |
970 | | - $headers[] = 'Referer: ' . $this->referer; |
971 | | - } |
972 | | - if( !empty( $this->cookies ) ) { |
973 | | - if( !is_array( $this->cookies ) ) { |
974 | | - $this->cookies = (array)$this->cookies; |
975 | | - } |
976 | | - |
977 | | - reset( $this->cookies ); |
978 | | - if ( count( $this->cookies ) > 0 ) { |
979 | | - $cookie_str = 'Cookie: '; |
980 | | - foreach ( $this->cookies as $cookieKey => $cookieVal ) { |
981 | | - $cookie_str .= $cookieKey . '=' . urlencode( $cookieVal ) . '; '; |
982 | | - } |
983 | | - $headers[] = substr( $cookie_str, 0, -2 ); |
984 | | - } |
985 | | - } |
986 | | - if( !empty( $this->rawheaders ) ) { |
987 | | - if( !is_array( $this->rawheaders ) ) { |
988 | | - $this->rawheaders = (array)$this->rawheaders; |
989 | | - } |
990 | | - while( list( $headerKey, $headerVal ) = each( $this->rawheaders ) ) { |
991 | | - $headers[] = $headerKey . ': ' . $headerVal; |
992 | | - } |
993 | | - } |
994 | | - if( !empty( $content_type ) ) { |
995 | | - if ( $content_type == 'multipart/form-data' ) { |
996 | | - $headers[] = "Content-type: $content_type; boundary=" . $this->_mime_boundary; |
997 | | - } else { |
998 | | - $headers[] = "Content-type: $content_type"; |
999 | | - } |
1000 | | - } |
1001 | | - if( !empty( $body ) ) { |
1002 | | - $headers[] = 'Content-length: ' . strlen( $body ); |
1003 | | - } |
1004 | | - if( !empty( $this->user ) || !empty( $this->pass ) ) { |
1005 | | - $headers[] = 'Authorization: BASIC ' . base64_encode( $this->user . ':' . $this->pass ); |
1006 | | - } |
1007 | | - |
1008 | | - for( $curr_header = 0; $curr_header < count( $headers ); $curr_header++ ) { |
1009 | | - $cmdline_params .= " -H \"" . escapeshellcmd( $headers[$curr_header] ) . "\""; |
1010 | | - } |
1011 | | - |
1012 | | - if( !empty( $body ) ) { |
1013 | | - $cmdline_params .= " -d \"" . escapeshellcmd( $body ) . "\""; |
1014 | | - } |
1015 | | - |
1016 | | - if( $this->read_timeout > 0 ) { |
1017 | | - $cmdline_params .= ' -m ' . $this->read_timeout; |
1018 | | - } |
1019 | | - |
1020 | | - $headerfile = tempnam( $temp_dir, 'sno' ); |
1021 | | - |
1022 | | - exec( |
1023 | | - $this->curl_path . " -k -D \"$headerfile\"" . $cmdline_params . " \"" . escapeshellcmd( $URI ) . "\"", |
1024 | | - $results, |
1025 | | - $return |
1026 | | - ); |
1027 | | - |
1028 | | - if( $return ) { |
1029 | | - $this->error = "Error: cURL could not retrieve the document, error $return."; |
1030 | | - return false; |
1031 | | - } |
1032 | | - |
1033 | | - $results = implode( "\r\n", $results ); |
1034 | | - |
1035 | | - $result_headers = file( "$headerfile" ); |
1036 | | - |
1037 | | - $this->_redirectaddr = false; |
1038 | | - unset( $this->headers ); |
1039 | | - |
1040 | | - for( $currentHeader = 0; $currentHeader < count( $result_headers ); $currentHeader++ ) { |
1041 | | - // if a header begins with Location: or URI:, set the redirect |
1042 | | - if( preg_match( "/^(Location: |URI: )/i", $result_headers[$currentHeader] ) ) { |
1043 | | - // get URL portion of the redirect |
1044 | | - preg_match( "/^(Location: |URI:)\s+(.*)/", chop( $result_headers[$currentHeader] ), $matches ); |
1045 | | - // look for :// in the Location header to see if hostname is included |
1046 | | - if( !preg_match( "|\:\/\/|", $matches[2] ) ) { |
1047 | | - // no host in the path, so prepend |
1048 | | - $this->_redirectaddr = $URI_PARTS['scheme'] . '://' . $this->host . ':' . $this->port; |
1049 | | - // eliminate double slash |
1050 | | - if( !preg_match( "|^/|", $matches[2] ) ) { |
1051 | | - $this->_redirectaddr .= '/' . $matches[2]; |
1052 | | - } else { |
1053 | | - $this->_redirectaddr .= $matches[2]; |
1054 | | - } |
1055 | | - } else { |
1056 | | - $this->_redirectaddr = $matches[2]; |
1057 | | - } |
1058 | | - } |
1059 | | - |
1060 | | - if( preg_match( "|^HTTP/|", $result_headers[$currentHeader] ) ) { |
1061 | | - $this->response_code = $result_headers[$currentHeader]; |
1062 | | - if( preg_match( "|^HTTP/[^\s]*\s(.*?)\s|", $this->response_code, $match ) ) { |
1063 | | - $this->status = $match[1]; |
1064 | | - } |
1065 | | - } |
1066 | | - |
1067 | | - $this->headers[] = $result_headers[$currentHeader]; |
1068 | | - } |
1069 | | - |
1070 | | - // check if there is a a redirect meta tag |
1071 | | - if( preg_match( "'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i", $results, $match ) ) |
1072 | | - { |
1073 | | - $this->_redirectaddr = $this->_expandlinks( $match[1], $URI ); |
1074 | | - } |
1075 | | - |
1076 | | - // have we hit our frame depth and is there frame src to fetch? |
1077 | | - if( ( $this->_framedepth < $this->maxframes ) && preg_match_all( "'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i", $results, $match ) ) |
1078 | | - { |
1079 | | - $this->results[] = $results; |
1080 | | - for( $x = 0; $x < count( $match[1] ); $x++ ) { |
1081 | | - $this->_frameurls[] = $this->_expandlinks( $match[1][$x], $URI_PARTS['scheme'] . '://' . $this->host ); |
1082 | | - } |
1083 | | - } elseif( is_array( $this->results ) ) { // have we already fetched framed content? |
1084 | | - $this->results[] = $results; |
1085 | | - } else { // no framed content |
1086 | | - $this->results = $results; |
1087 | | - } |
1088 | | - |
1089 | | - unlink( "$headerfile" ); |
1090 | | - |
1091 | | - return true; |
1092 | | - } |
1093 | | - |
1094 | | - /*======================================================================*\ |
1095 | | - Function: setcookies() |
1096 | | - Purpose: set cookies for a redirection |
1097 | | - \*======================================================================*/ |
1098 | | - function setcookies() { |
1099 | | - for( $x = 0; $x < count( $this->headers ); $x++ ) { |
1100 | | - if( preg_match( '/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x], $match ) ) { |
1101 | | - $this->cookies[$match[1]] = urldecode( $match[2] ); |
1102 | | - } |
1103 | | - } |
1104 | | - } |
1105 | | - |
1106 | | - /*======================================================================*\ |
1107 | | - Function: _check_timeout |
1108 | | - Purpose: checks whether timeout has occurred |
1109 | | - Input: $fp file pointer |
1110 | | - \*======================================================================*/ |
1111 | | - function _check_timeout( $fp ) { |
1112 | | - if ( $this->read_timeout > 0 ) { |
1113 | | - $fp_status = socket_get_status( $fp ); |
1114 | | - if ( $fp_status['timed_out'] ) { |
1115 | | - $this->timed_out = true; |
1116 | | - return true; |
1117 | | - } |
1118 | | - } |
1119 | | - return false; |
1120 | | - } |
1121 | | - |
1122 | | - /*======================================================================*\ |
1123 | | - Function: _connect |
1124 | | - Purpose: make a socket connection |
1125 | | - Input: $fp file pointer |
1126 | | - \*======================================================================*/ |
1127 | | - function _connect( &$fp ) { |
1128 | | - if( !empty( $this->proxy_host ) && !empty( $this->proxy_port ) ) { |
1129 | | - $this->_isproxy = true; |
1130 | | - $host = $this->proxy_host; |
1131 | | - $port = $this->proxy_port; |
1132 | | - } else { |
1133 | | - $host = $this->host; |
1134 | | - $port = $this->port; |
1135 | | - } |
1136 | | - |
1137 | | - $this->status = 0; |
1138 | | - |
1139 | | - $fp = fsockopen( $host, $port, $errno, $errstr, $this->_fp_timeout ); |
1140 | | - |
1141 | | - if ( $fp ) { |
1142 | | - // socket connection succeeded |
1143 | | - return true; |
1144 | | - } else { |
1145 | | - // socket connection failed |
1146 | | - $this->status = $errno; |
1147 | | - switch( $errno ) { |
1148 | | - case -3: |
1149 | | - $this->error = 'socket creation failed (-3)'; |
1150 | | - case -4: |
1151 | | - $this->error = 'dns lookup failure (-4)'; |
1152 | | - case -5: |
1153 | | - $this->error = 'connection refused or timed out (-5)'; |
1154 | | - default: |
1155 | | - $this->error = 'connection failed (' . $errno . ')'; |
1156 | | - } |
1157 | | - return false; |
1158 | | - } |
1159 | | - } |
1160 | | - |
1161 | | - /*======================================================================*\ |
1162 | | - Function: _disconnect |
1163 | | - Purpose: disconnect a socket connection |
1164 | | - Input: $fp file pointer |
1165 | | - \*======================================================================*/ |
1166 | | - function _disconnect( $fp ) { |
1167 | | - return( fclose( $fp ) ); |
1168 | | - } |
1169 | | - |
1170 | | - /*======================================================================*\ |
1171 | | - Function: _prepare_post_body |
1172 | | - Purpose: Prepare post body according to encoding type |
1173 | | - Input: $formvars - form variables |
1174 | | - $formfiles - form upload files |
1175 | | - Output: post body |
1176 | | - \*======================================================================*/ |
1177 | | - function _prepare_post_body( $formvars, $formfiles ) { |
1178 | | - settype( $formvars, 'array' ); |
1179 | | - settype( $formfiles, 'array' ); |
1180 | | - $postdata = ''; |
1181 | | - |
1182 | | - if ( count( $formvars ) == 0 && count( $formfiles ) == 0 ) { |
1183 | | - return; |
1184 | | - } |
1185 | | - |
1186 | | - switch ( $this->_submit_type ) { |
1187 | | - case 'application/x-www-form-urlencoded': |
1188 | | - reset( $formvars ); |
1189 | | - while( list( $key, $val ) = each( $formvars ) ) { |
1190 | | - if ( is_array( $val ) || is_object( $val ) ) { |
1191 | | - while ( list( $cur_key, $cur_val ) = each( $val ) ) { |
1192 | | - $postdata .= urlencode( $key ) . '[]=' . urlencode( $cur_val ) . '&'; |
1193 | | - } |
1194 | | - } else { |
1195 | | - $postdata .= urlencode( $key ) . '=' . urlencode( $val ) . '&'; |
1196 | | - } |
1197 | | - } |
1198 | | - break; |
1199 | | - |
1200 | | - case 'multipart/form-data': |
1201 | | - $this->_mime_boundary = 'Snoopy' . md5( uniqid( microtime() ) ); |
1202 | | - |
1203 | | - reset( $formvars ); |
1204 | | - while( list( $key, $val ) = each( $formvars ) ) { |
1205 | | - if ( is_array( $val ) || is_object( $val ) ) { |
1206 | | - while ( list( $cur_key, $cur_val ) = each( $val ) ) { |
1207 | | - $postdata .= '--' . $this->_mime_boundary . "\r\n"; |
1208 | | - $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n"; |
1209 | | - $postdata .= "$cur_val\r\n"; |
1210 | | - } |
1211 | | - } else { |
1212 | | - $postdata .= '--' . $this->_mime_boundary . "\r\n"; |
1213 | | - $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n"; |
1214 | | - $postdata .= "$val\r\n"; |
1215 | | - } |
1216 | | - } |
1217 | | - |
1218 | | - reset( $formfiles ); |
1219 | | - while ( list( $field_name, $file_names ) = each( $formfiles ) ) { |
1220 | | - settype( $file_names, 'array' ); |
1221 | | - while ( list( , $file_name ) = each( $file_names ) ) { |
1222 | | - if ( !is_readable( $file_name ) ) { |
1223 | | - continue; |
1224 | | - } |
1225 | | - |
1226 | | - $fp = fopen( $file_name, 'r' ); |
1227 | | - $file_content = fread( $fp, filesize( $file_name ) ); |
1228 | | - fclose( $fp ); |
1229 | | - $base_name = basename( $file_name ); |
1230 | | - |
1231 | | - $postdata .= '--' . $this->_mime_boundary . "\r\n"; |
1232 | | - $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n"; |
1233 | | - $postdata .= "$file_content\r\n"; |
1234 | | - } |
1235 | | - } |
1236 | | - $postdata .= '--' . $this->_mime_boundary . "--\r\n"; |
1237 | | - break; |
1238 | | - } |
1239 | | - |
1240 | | - return $postdata; |
1241 | | - } |
1242 | | -} |
\ No newline at end of file |
Index: trunk/extensions/RSS/RSSParse.php |
— | — | @@ -1,494 +0,0 @@ |
2 | | -<?php |
3 | | -/** |
4 | | - * Hybrid parser, and object, takes RSS or Atom feed as a string and returns a |
5 | | - * simple object. |
6 | | - * Handles RSS 0.9x, RSS 2.0, RSS 1.0, and Atom 0.3 |
7 | | - * |
8 | | - * @file |
9 | | - * @see RSSFetch.php for a simpler interface with integrated caching support |
10 | | - */ |
11 | | -class MagpieRSS { |
12 | | - public $parser; |
13 | | - |
14 | | - public $current_item = array(); // item currently being parsed |
15 | | - public $items = array(); // collection of parsed items |
16 | | - public $channel = array(); // hash of channel fields |
17 | | - public $textinput = array(); |
18 | | - public $image = array(); |
19 | | - public $feed_type; |
20 | | - public $feed_version; |
21 | | - public $encoding = ''; // output encoding of parsed rss |
22 | | - |
23 | | - public $_source_encoding = ''; // only set if we have to parse xml prolog |
24 | | - |
25 | | - public $ERROR = ''; |
26 | | - public $WARNING = ''; |
27 | | - |
28 | | - // define some constants |
29 | | - public $_CONTENT_CONSTRUCTS = array( 'content', 'summary', 'info', 'title', 'tagline', 'copyright' ); |
30 | | - public $_KNOWN_ENCODINGS = array( 'UTF-8', 'US-ASCII', 'ISO-8859-1' ); |
31 | | - |
32 | | - // parser variables, useless if you're not a parser, treat as private |
33 | | - public $stack = array(); // parser stack |
34 | | - public $inchannel = false; |
35 | | - public $initem = false; |
36 | | - public $incontent = false; // if in Atom <content mode="xml"> field |
37 | | - public $intextinput = false; |
38 | | - public $inimage = false; |
39 | | - public $current_namespace = false; |
40 | | - |
41 | | - /** |
42 | | - * Set up XML parser, parse source, and return populated RSS object.. |
43 | | - * |
44 | | - * @param $source String: string containing the RSS to be parsed |
45 | | - * |
46 | | - * NOTE: Probably a good idea to leave the encoding options alone unless |
47 | | - * you know what you're doing as PHP's character set support is |
48 | | - * a little weird. |
49 | | - * |
50 | | - * NOTE: A lot of this is unnecessary but harmless with PHP5 |
51 | | - * |
52 | | - * |
53 | | - * @param $output_encoding String: output the parsed RSS in this character |
54 | | - * set defaults to ISO-8859-1 as this is PHP's |
55 | | - * default. |
56 | | - * |
57 | | - * NOTE: might be changed to UTF-8 in future |
58 | | - * versions. |
59 | | - * |
60 | | - * @param $input_encoding String: the character set of the incoming RSS source. |
61 | | - * Leave blank and Magpie will try to figure it |
62 | | - * out. |
63 | | - * |
64 | | - * @param $detect_encoding Boolean: if false, Magpie won't attempt to |
65 | | - * detect source encoding. (caveat emptor) |
66 | | - */ |
67 | | - function __construct( $source, $output_encoding = 'ISO-8859-1', |
68 | | - $input_encoding = null, $detect_encoding = true ) |
69 | | - { |
70 | | - # if PHP xml isn't compiled in, die |
71 | | - if ( !function_exists( 'xml_parser_create' ) ) { |
72 | | - $this->error( |
73 | | - "Failed to load PHP's XML Extension. " . |
74 | | - 'http://www.php.net/manual/en/ref.xml.php', |
75 | | - E_USER_ERROR |
76 | | - ); |
77 | | - } |
78 | | - |
79 | | - list( $parser, $source ) = $this->create_parser( |
80 | | - $source, |
81 | | - $output_encoding, |
82 | | - $input_encoding, |
83 | | - $detect_encoding |
84 | | - ); |
85 | | - |
86 | | - if ( !is_resource( $parser ) ) { |
87 | | - $this->error( |
88 | | - "Failed to create an instance of PHP's XML parser. " . |
89 | | - 'http://www.php.net/manual/en/ref.xml.php', |
90 | | - E_USER_ERROR |
91 | | - ); |
92 | | - } |
93 | | - |
94 | | - $this->parser = $parser; |
95 | | - |
96 | | - # pass in parser, and a reference to this object |
97 | | - # setup handlers |
98 | | - xml_set_object( $this->parser, $this ); |
99 | | - xml_set_element_handler( |
100 | | - $this->parser, |
101 | | - 'feed_start_element', |
102 | | - 'feed_end_element' |
103 | | - ); |
104 | | - |
105 | | - xml_set_character_data_handler( $this->parser, 'feed_cdata' ); |
106 | | - |
107 | | - $status = xml_parse( $this->parser, $source ); |
108 | | - |
109 | | - if ( !$status ) { |
110 | | - $errorcode = xml_get_error_code( $this->parser ); |
111 | | - if ( $errorcode != XML_ERROR_NONE ) { |
112 | | - $xml_error = xml_error_string( $errorcode ); |
113 | | - $error_line = xml_get_current_line_number( $this->parser ); |
114 | | - $error_col = xml_get_current_column_number( $this->parser ); |
115 | | - $errormsg = "$xml_error at line $error_line, column $error_col"; |
116 | | - |
117 | | - $this->error( $errormsg ); |
118 | | - } |
119 | | - } |
120 | | - |
121 | | - xml_parser_free( $this->parser ); |
122 | | - |
123 | | - $this->normalize(); |
124 | | - } |
125 | | - |
126 | | - function feed_start_element( $p, $element, &$attrs ) { |
127 | | - $el = $element = strtolower( $element ); |
128 | | - $attrs = array_change_key_case( $attrs, CASE_LOWER ); |
129 | | - |
130 | | - // check for a namespace, and split if found |
131 | | - $ns = false; |
132 | | - if ( strpos( $element, ':' ) ) { |
133 | | - list( $ns, $el ) = explode( ':', $element, 2 ); |
134 | | - } |
135 | | - if ( $ns && $ns != 'rdf' ) { |
136 | | - $this->current_namespace = $ns; |
137 | | - } |
138 | | - |
139 | | - // if feed type isn't set, then this is first element of feed |
140 | | - // identify feed from root element |
141 | | - if ( !isset( $this->feed_type ) ) { |
142 | | - if ( $el == 'rdf' ) { |
143 | | - $this->feed_type = 'RSS'; |
144 | | - $this->feed_version = '1.0'; |
145 | | - } elseif ( $el == 'rss' ) { |
146 | | - $this->feed_type = 'RSS'; |
147 | | - $this->feed_version = $attrs['version']; |
148 | | - } elseif ( $el == 'feed' ) { |
149 | | - $this->feed_type = 'Atom'; |
150 | | - $this->feed_version = $attrs['version']; |
151 | | - $this->inchannel = true; |
152 | | - } |
153 | | - return; |
154 | | - } |
155 | | - |
156 | | - if ( $el == 'channel' ) { |
157 | | - $this->inchannel = true; |
158 | | - } elseif ( $el == 'item' || $el == 'entry' ) { |
159 | | - $this->initem = true; |
160 | | - if ( isset( $attrs['rdf:about'] ) ) { |
161 | | - $this->current_item['about'] = $attrs['rdf:about']; |
162 | | - } |
163 | | - } |
164 | | - |
165 | | - // if we're in the default namespace of an RSS feed, |
166 | | - // record textinput or image fields |
167 | | - elseif ( |
168 | | - $this->feed_type == 'RSS' && |
169 | | - $this->current_namespace == '' && |
170 | | - $el == 'textinput' ) |
171 | | - { |
172 | | - $this->intextinput = true; |
173 | | - } elseif ( |
174 | | - $this->feed_type == 'RSS' && |
175 | | - $this->current_namespace == '' && |
176 | | - $el == 'image' ) |
177 | | - { |
178 | | - $this->inimage = true; |
179 | | - } |
180 | | - |
181 | | - // handle Atom content constructs |
182 | | - elseif ( $this->feed_type == 'Atom' && in_array( $el, $this->_CONTENT_CONSTRUCTS ) ) { |
183 | | - // avoid clashing w/ RSS mod_content |
184 | | - if ( $el == 'content' ) { |
185 | | - $el = 'atom_content'; |
186 | | - } |
187 | | - |
188 | | - $this->incontent = $el; |
189 | | - } |
190 | | - |
191 | | - // if inside an Atom content construct (e.g. content or summary) field treat tags as text |
192 | | - elseif ( $this->feed_type == 'Atom' && $this->incontent ) { |
193 | | - // if tags are inlined, then flatten |
194 | | - $attrs_str = join( |
195 | | - ' ', |
196 | | - array_map( |
197 | | - array( 'MagpieRSS', 'mapAttributes' ), |
198 | | - array_keys( $attrs ), |
199 | | - array_values( $attrs ) |
200 | | - ) |
201 | | - ); |
202 | | - |
203 | | - $this->append_content( "<$element $attrs_str>" ); |
204 | | - |
205 | | - array_unshift( $this->stack, $el ); |
206 | | - } |
207 | | - |
208 | | - // Atom support many links per containging element. |
209 | | - // Magpie treats link elements of type rel='alternate' |
210 | | - // as being equivalent to RSS's simple link element. |
211 | | - elseif ( $this->feed_type == 'Atom' && $el == 'link' ) { |
212 | | - if ( isset( $attrs['rel'] ) && $attrs['rel'] == 'alternate' ) { |
213 | | - $link_el = 'link'; |
214 | | - } else { |
215 | | - $link_el = 'link_' . $attrs['rel']; |
216 | | - } |
217 | | - |
218 | | - $this->append( $link_el, $attrs['href'] ); |
219 | | - } else { // set stack[0] to current element |
220 | | - array_unshift( $this->stack, $el ); |
221 | | - } |
222 | | - } |
223 | | - |
224 | | - function feed_cdata( $p, $text ) { |
225 | | - if ( $this->feed_type == 'Atom' && $this->incontent ) { |
226 | | - $this->append_content( $text ); |
227 | | - } else { |
228 | | - $current_el = join( '_', array_reverse( $this->stack ) ); |
229 | | - $this->append( $current_el, $text ); |
230 | | - } |
231 | | - } |
232 | | - |
233 | | - function feed_end_element( $p, $el ) { |
234 | | - $el = strtolower( $el ); |
235 | | - |
236 | | - if ( $el == 'item' || $el == 'entry' ) { |
237 | | - $this->items[] = $this->current_item; |
238 | | - $this->current_item = array(); |
239 | | - $this->initem = false; |
240 | | - } elseif ( $this->feed_type == 'RSS' && $this->current_namespace == '' && $el == 'textinput' ) { |
241 | | - $this->intextinput = false; |
242 | | - } elseif ( $this->feed_type == 'RSS' && $this->current_namespace == '' && $el == 'image' ) { |
243 | | - $this->inimage = false; |
244 | | - } elseif ( $this->feed_type == 'Atom' && in_array( $el, $this->_CONTENT_CONSTRUCTS ) ) { |
245 | | - $this->incontent = false; |
246 | | - } elseif ( $el == 'channel' || $el == 'feed' ) { |
247 | | - $this->inchannel = false; |
248 | | - } elseif ( $this->feed_type == 'Atom' && $this->incontent ) { |
249 | | - // balance tags properly |
250 | | - // note: I don't think this is actually neccessary |
251 | | - if ( $this->stack[0] == $el ) { |
252 | | - $this->append_content( "</$el>" ); |
253 | | - } else { |
254 | | - $this->append_content( "<$el />" ); |
255 | | - } |
256 | | - |
257 | | - array_shift( $this->stack ); |
258 | | - } else { |
259 | | - array_shift( $this->stack ); |
260 | | - } |
261 | | - |
262 | | - $this->current_namespace = false; |
263 | | - } |
264 | | - |
265 | | - function concat( &$str1, $str2 = '' ) { |
266 | | - if ( !isset( $str1 ) ) { |
267 | | - $str1 = ''; |
268 | | - } |
269 | | - $str1 .= $str2; |
270 | | - } |
271 | | - |
272 | | - function append_content( $text ) { |
273 | | - if ( $this->initem ) { |
274 | | - $this->concat( $this->current_item[$this->incontent], $text ); |
275 | | - } elseif ( $this->inchannel ) { |
276 | | - $this->concat( $this->channel[$this->incontent], $text ); |
277 | | - } |
278 | | - } |
279 | | - |
280 | | - // smart append - field and namespace aware |
281 | | - function append( $el, $text ) { |
282 | | - if ( !$el ) { |
283 | | - return; |
284 | | - } |
285 | | - if ( $this->current_namespace ) { |
286 | | - if ( $this->initem ) { |
287 | | - $this->concat( |
288 | | - $this->current_item[$this->current_namespace][$el], $text |
289 | | - ); |
290 | | - } elseif ( $this->inchannel ) { |
291 | | - $this->concat( |
292 | | - $this->channel[$this->current_namespace][$el], $text |
293 | | - ); |
294 | | - } elseif ( $this->intextinput ) { |
295 | | - $this->concat( |
296 | | - $this->textinput[$this->current_namespace][$el], $text |
297 | | - ); |
298 | | - } elseif ( $this->inimage ) { |
299 | | - $this->concat( |
300 | | - $this->image[$this->current_namespace][$el], $text |
301 | | - ); |
302 | | - } |
303 | | - } else { |
304 | | - if ( $this->initem ) { |
305 | | - $this->concat( |
306 | | - $this->current_item[$el], $text |
307 | | - ); |
308 | | - } elseif ( $this->intextinput ) { |
309 | | - $this->concat( |
310 | | - $this->textinput[$el], $text |
311 | | - ); |
312 | | - } elseif ( $this->inimage ) { |
313 | | - $this->concat( |
314 | | - $this->image[$el], $text |
315 | | - ); |
316 | | - } elseif ( $this->inchannel ) { |
317 | | - $this->concat( |
318 | | - $this->channel[$el], $text |
319 | | - ); |
320 | | - } |
321 | | - } |
322 | | - } |
323 | | - |
324 | | - function normalize() { |
325 | | - // if atom populate rss fields |
326 | | - if ( $this->is_atom() ) { |
327 | | - $this->channel['description'] = $this->channel['tagline']; |
328 | | - for ( $i = 0; $i < count( $this->items ); $i++ ) { |
329 | | - $item = $this->items[$i]; |
330 | | - if ( isset( $item['summary'] ) ) { |
331 | | - $item['description'] = $item['summary']; |
332 | | - } |
333 | | - if ( isset( $item['atom_content'] ) ) { |
334 | | - $item['content']['encoded'] = $item['atom_content']; |
335 | | - } |
336 | | - |
337 | | - $atom_date = ( isset( $item['issued'] ) ) ? $item['issued'] : $item['modified']; |
338 | | - if ( $atom_date ) { |
339 | | - $epoch = @$this->parse_w3cdtf( $atom_date ); |
340 | | - if ( $epoch && $epoch > 0 ) { |
341 | | - $item['date_timestamp'] = $epoch; |
342 | | - } |
343 | | - } |
344 | | - |
345 | | - $this->items[$i] = $item; |
346 | | - } |
347 | | - } elseif ( $this->is_rss() ) { |
348 | | - $this->channel['tagline'] = $this->channel['description']; |
349 | | - for ( $i = 0; $i < count( $this->items ); $i++ ) { |
350 | | - $item = $this->items[$i]; |
351 | | - if ( isset( $item['description'] ) ) { |
352 | | - $item['summary'] = $item['description']; |
353 | | - } |
354 | | - if ( isset( $item['content']['encoded'] ) ) { |
355 | | - $item['atom_content'] = $item['content']['encoded']; |
356 | | - } |
357 | | - |
358 | | - if ( $this->is_rss() == '1.0' && isset( $item['dc']['date'] ) ) { |
359 | | - $epoch = @$this->parse_w3cdtf( $item['dc']['date'] ); |
360 | | - if ( $epoch && $epoch > 0 ) { |
361 | | - $item['date_timestamp'] = $epoch; |
362 | | - } |
363 | | - } elseif ( isset( $item['pubdate'] ) ) { |
364 | | - $epoch = @strtotime( $item['pubdate'] ); |
365 | | - if ( $epoch > 0 ) { |
366 | | - $item['date_timestamp'] = $epoch; |
367 | | - } |
368 | | - } |
369 | | - |
370 | | - $this->items[$i] = $item; |
371 | | - } |
372 | | - } |
373 | | - } |
374 | | - |
375 | | - function is_rss() { |
376 | | - if ( $this->feed_type == 'RSS' ) { |
377 | | - return $this->feed_version; |
378 | | - } else { |
379 | | - return false; |
380 | | - } |
381 | | - } |
382 | | - |
383 | | - function is_atom() { |
384 | | - if ( $this->feed_type == 'Atom' ) { |
385 | | - return $this->feed_version; |
386 | | - } else { |
387 | | - return false; |
388 | | - } |
389 | | - } |
390 | | - |
391 | | - /** |
392 | | - * Instantiate an XML parser. |
393 | | - * @return XML parser, and possibly re-encoded source |
394 | | - */ |
395 | | - function create_parser( $source, $out_enc, $in_enc, $detect ) { |
396 | | - // by default PHP5 does a fine job of detecting input encodings |
397 | | - if( !$detect && $in_enc ) { |
398 | | - $parser = xml_parser_create( $in_enc ); |
399 | | - } else { |
400 | | - $parser = xml_parser_create( '' ); |
401 | | - } |
402 | | - if ( $out_enc ) { |
403 | | - $this->encoding = $out_enc; |
404 | | - xml_parser_set_option( |
405 | | - $parser, |
406 | | - XML_OPTION_TARGET_ENCODING, |
407 | | - $out_enc |
408 | | - ); |
409 | | - } |
410 | | - |
411 | | - return array( $parser, $source ); |
412 | | - } |
413 | | - |
414 | | - /** |
415 | | - * Checks if $enc is an encoding type supported by MagpieRSS. |
416 | | - * @param $enc String: encoding name |
417 | | - * @return String or false |
418 | | - */ |
419 | | - function known_encoding( $enc ) { |
420 | | - $enc = strtoupper( $enc ); |
421 | | - if ( in_array( $enc, $this->_KNOWN_ENCODINGS ) ) { |
422 | | - return $enc; |
423 | | - } else { |
424 | | - return false; |
425 | | - } |
426 | | - } |
427 | | - |
428 | | - function error( $errormsg, $lvl = E_USER_WARNING ) { |
429 | | - // append PHP's error message if track_errors is enabled |
430 | | - if ( isset( $php_errormsg ) ) { |
431 | | - $errormsg .= " ($php_errormsg)"; |
432 | | - } |
433 | | - |
434 | | - $notices = E_USER_NOTICE|E_NOTICE; |
435 | | - if ( $lvl&$notices ) { |
436 | | - $this->WARNING = $errormsg; |
437 | | - } else { |
438 | | - $this->ERROR = $errormsg; |
439 | | - } |
440 | | - } |
441 | | - |
442 | | - /** |
443 | | - * Parse a W3CDTF date into unix epoch. |
444 | | - * This used to be in its own file. |
445 | | - * @note http://www.w3.org/TR/NOTE-datetime |
446 | | - * @param $date_str String: date string to parse |
447 | | - * @return Integer |
448 | | - */ |
449 | | - public static function parse_w3cdtf( $date_str ) { |
450 | | - // regex to match wc3dtf |
451 | | - $pat = "/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/"; |
452 | | - |
453 | | - if ( preg_match( $pat, $date_str, $match ) ) { |
454 | | - list( $year, $month, $day, $hours, $minutes, $seconds ) = |
455 | | - array( $match[1], $match[2], $match[3], $match[4], $match[5], $match[6] ); |
456 | | - |
457 | | - // calculate epoch for current date assuming GMT |
458 | | - $epoch = gmmktime( $hours, $minutes, $seconds, $month, $day, $year ); |
459 | | - |
460 | | - $offset = 0; |
461 | | - if ( $match[10] == 'Z' ) { |
462 | | - // zulu time, aka GMT |
463 | | - } else { |
464 | | - list( $tz_mod, $tz_hour, $tz_min ) = |
465 | | - array( $match[8], $match[9], $match[10] ); |
466 | | - |
467 | | - // zero out the variables |
468 | | - if ( !$tz_hour ) { |
469 | | - $tz_hour = 0; |
470 | | - } |
471 | | - if ( !$tz_min ) { |
472 | | - $tz_min = 0; |
473 | | - } |
474 | | - |
475 | | - $offset_secs = ( ( $tz_hour * 60 ) + $tz_min ) * 60; |
476 | | - |
477 | | - // is timezone ahead of GMT? then subtract offset |
478 | | - if ( $tz_mod == '+' ) { |
479 | | - $offset_secs = $offset_secs * -1; |
480 | | - } |
481 | | - |
482 | | - $offset = $offset_secs; |
483 | | - } |
484 | | - $epoch = $epoch + $offset; |
485 | | - return $epoch; |
486 | | - } else { |
487 | | - return -1; |
488 | | - } |
489 | | - } |
490 | | - |
491 | | - public static function mapAttributes( $k, $v ) { |
492 | | - return "$k=\"$v\""; |
493 | | - } |
494 | | - |
495 | | -} // end class MagpieRSS |
\ No newline at end of file |
Index: trunk/extensions/RSS/RSSCache.php |
— | — | @@ -30,6 +30,14 @@ |
31 | 31 | ); |
32 | 32 | } |
33 | 33 | } |
| 34 | + |
| 35 | + // check if it is writable. |
| 36 | + if ( !is_writable( $this->BASE_CACHE ) ) { |
| 37 | + wfDebugLog( |
| 38 | + 'RSS', |
| 39 | + "Cache dir '" . $this->BASE_CACHE . "' is not writable." |
| 40 | + ); |
| 41 | + } |
34 | 42 | } |
35 | 43 | |
36 | 44 | /** |
Index: trunk/extensions/RSS/RSSData.php |
— | — | @@ -0,0 +1,61 @@ |
| 2 | +<?php |
| 3 | +class RSSData { |
| 4 | + public $etag; |
| 5 | + public $last_modified; |
| 6 | + public $ERROR; |
| 7 | + public $xml; |
| 8 | + public $items; |
| 9 | + |
| 10 | + function __construct( $resp ) { |
| 11 | + $this->xml = new DOMDocument; |
| 12 | + $this->xml->loadXML($resp->getContent()); |
| 13 | + $h = $resp->getResponseHeader('ETag'); |
| 14 | + $this->etag = $h; |
| 15 | + $h = $resp->getResponseHeader('Last-Modified'); |
| 16 | + $this->last_modified = $h; |
| 17 | + |
| 18 | + $xpath = new DOMXPath($this->xml); |
| 19 | + $items = $xpath->evaluate("/rss/channel/item"); |
| 20 | + |
| 21 | + foreach($items as $item) { |
| 22 | + $bit = array(); |
| 23 | + foreach($item->childNodes as $n) { |
| 24 | + $name = $this->rssTokenToName($n->nodeName); |
| 25 | + if($name != null) |
| 26 | + $bit[$name] = $n->nodeValue; |
| 27 | + } |
| 28 | + var_dump(implode(":: ", array_keys($bit))); |
| 29 | + $this->items[] = $bit; |
| 30 | + } |
| 31 | + } |
| 32 | + |
| 33 | + function rssTokenToName($n) { |
| 34 | + switch($n) { |
| 35 | + case "dc:date": |
| 36 | + return "date"; |
| 37 | + # parse "2010-10-18T18:07:00Z" |
| 38 | + case "pubDate": |
| 39 | + return "date"; |
| 40 | + # parse RFC date |
| 41 | + case "dc:creator": |
| 42 | + return "author"; |
| 43 | + case "title": |
| 44 | + return "title"; |
| 45 | + case "content:encoded": |
| 46 | + return "encodedContent"; |
| 47 | + |
| 48 | + case "slash:comments": |
| 49 | + case "slash:department": |
| 50 | + case "slash:section": |
| 51 | + case "slash:hit_parade": |
| 52 | + case "feedburner:origLink": |
| 53 | + case "wfw:commentRss": |
| 54 | + case "comments": |
| 55 | + case "category": |
| 56 | + return null; |
| 57 | + |
| 58 | + default: |
| 59 | + return $n; |
| 60 | + } |
| 61 | + } |
| 62 | +} |
\ No newline at end of file |
Property changes on: trunk/extensions/RSS/RSSData.php |
___________________________________________________________________ |
Added: svn:eol-syle |
1 | 63 | + native |
Index: trunk/extensions/RSS/RSSFetch.php |
— | — | @@ -10,26 +10,26 @@ |
11 | 11 | * Globals - redefine these in your script to change the |
12 | 12 | * behaviour of fetch_rss() currently, most options effect the cache |
13 | 13 | * |
14 | | - * $wgMagpieRSSCache - Should Magpie cache parsed RSS objects? |
| 14 | + * $wgRSSCache - Should we cache parsed RSS objects? |
15 | 15 | * |
16 | | - * $wgMagpieRSSCacheDirectory - Where should Magpie cache parsed RSS objects? |
| 16 | + * $wgRSSCacheDirectory - Where should we cache parsed RSS objects? |
17 | 17 | * This should be a location that the webserver can write to. If this |
18 | | - * directory does not already exist, Magpie will try to be smart and create it. |
| 18 | + * directory does not already exist, We will try to be smart and create it. |
19 | 19 | * This will often fail for permissions reasons. |
20 | 20 | * |
21 | | - * $wgMagpieRSSCacheAge - How long to store cached RSS objects (in seconds)?. |
| 21 | + * $wgRSSCacheAge - How long to store cached RSS objects (in seconds)?. |
22 | 22 | * |
23 | | - * $wgMagpieRSSCacheFreshOnly - If remote fetch fails, throw an error |
| 23 | + * $wgRSSCacheFreshOnly - If remote fetch fails, throw an error |
24 | 24 | * instead of returning stale object? |
25 | 25 | */ |
26 | 26 | |
27 | | -$MAGPIE_ERROR = ''; |
| 27 | +$RSS_FETCH_ERROR = ''; |
28 | 28 | |
29 | 29 | /** |
30 | 30 | * Return RSS object for the given URL, maintaining caching. |
31 | 31 | * |
32 | 32 | * NOTES ON CACHING: |
33 | | - * If caching is on ($wgMagpieRSSCache) fetch_rss will first check the cache. |
| 33 | + * If caching is on ($wgRSSCache) fetch_rss will first check the cache. |
34 | 34 | * |
35 | 35 | * NOTES ON RETRIEVING REMOTE FILES: |
36 | 36 | * If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will |
— | — | @@ -37,40 +37,44 @@ |
38 | 38 | * |
39 | 39 | * NOTES ON FAILED REQUESTS: |
40 | 40 | * If there is an HTTP error while fetching an RSS object, the cached version |
41 | | - * will be returned, if it exists (and if $wgMagpieRSSCacheFreshOnly is off) |
| 41 | + * will be returned, if it exists (and if $wgRSSCacheFreshOnly is off) |
42 | 42 | * |
43 | 43 | * @param $url String: URL of RSS file |
44 | 44 | * @return parsed RSS object (see RSSParse) |
45 | 45 | */ |
46 | 46 | function fetch_rss( $url ) { |
47 | | - global $wgMagpieRSSCache, $wgMagpieRSSCacheAge, $wgMagpieRSSCacheFreshOnly; |
48 | | - global $wgMagpieRSSCacheDirectory, $wgMagpieRSSFetchTimeout; |
49 | | - global $wgMagpieRSSOutputEncoding, $wgMagpieRSSInputEncoding; |
50 | | - global $wgMagpieRSSDetectEncoding, $wgMagpieRSSUseGzip; |
| 47 | + global $wgRSSCache, $wgRSSCacheAge, $wgRSSCacheFreshOnly; |
| 48 | + global $wgRSSCacheDirectory, $wgRSSFetchTimeout; |
| 49 | + global $wgRSSOutputEncoding, $wgRSSInputEncoding; |
| 50 | + global $wgRSSDetectEncoding, $wgRSSUseGzip; |
51 | 51 | |
52 | | - $wgMagpieRSSCache = true; |
53 | | - $wgMagpieRSSCacheAge = 60 * 60; // one hour |
54 | | - $wgMagpieRSSCacheFreshOnly = false; |
55 | | - $wgMagpieRSSCacheDirectory = '/extensions/RSS/cache'; |
56 | | - $wgMagpieRSSOutputEncoding = 'ISO-8859-1'; |
57 | | - $wgMagpieRSSInputEncoding = null; |
58 | | - $wgMagpieRSSDetectEncoding = true; |
| 52 | + $nameValue = array('wgRSSCache' => true, |
| 53 | + 'wgRSSCacheAge' => 60 * 60, // one hour |
| 54 | + 'wgRSSCacheFreshOnly' => false, |
| 55 | + 'wgRSSCacheDirectory' => '/extensions/RSS/cache', |
| 56 | + 'wgRSSOutputEncoding' => 'ISO-8859-1', |
| 57 | + 'wgRSSInputEncoding' => null, |
| 58 | + 'wgRSSDetectEncoding' => true, |
| 59 | + 'wgRSSFetchTimeout' => 5, // 5 second timeout |
| 60 | + 'wgRSSUseGzip' => true); |
59 | 61 | |
60 | | - $wgMagpieRSSFetchTimeout = 5; // 5 second timeout |
| 62 | + foreach($nameValue as $n => $v) { |
| 63 | + if( !isset( $GLOBALS[$n] ) ) { |
| 64 | + $GLOBALS[$n] = $v; |
| 65 | + } |
| 66 | + } |
61 | 67 | |
62 | | - // use gzip encoding to fetch RSS files if supported? |
63 | | - $wgMagpieRSSUseGzip = true; |
64 | | - |
65 | 68 | if ( !isset( $url ) ) { |
66 | 69 | wfDebugLog( 'RSS', 'fetch_rss (RSSFetch.php) called without a URL!' ); |
67 | 70 | return false; |
68 | 71 | } |
69 | 72 | |
70 | 73 | // if cache is disabled |
71 | | - if ( !$wgMagpieRSSCache ) { |
| 74 | + if ( !$wgRSSCache ) { |
72 | 75 | // fetch file, and parse it |
73 | 76 | $resp = _fetch_remote_file( $url ); |
74 | | - if ( $resp->status >= 200 && $resp->status < 300 ) { |
| 77 | + $errs = $resp->getErrorsArray(); |
| 78 | + if ( count( $errs ) == 0 ) { |
75 | 79 | return _response_to_rss( $resp ); |
76 | 80 | } else { |
77 | 81 | wfDebugLog( 'RSS', "Failed to fetch $url and cache is off" ); |
— | — | @@ -82,12 +86,12 @@ |
83 | 87 | // 2. if there is a hit, make sure its fresh |
84 | 88 | // 3. if cached obj fails freshness check, fetch remote |
85 | 89 | // 4. if remote fails, return stale object, or error |
86 | | - $cache = new RSSCache( $wgMagpieRSSCacheDirectory, $wgMagpieRSSCacheAge ); |
| 90 | + $cache = new RSSCache( $wgRSSCacheDirectory, $wgRSSCacheAge ); |
87 | 91 | |
88 | 92 | if ( $cache->ERROR ) { |
89 | 93 | wfDebugLog( |
90 | 94 | 'RSS', |
91 | | - 'MagpieRSS: cache error on RSSFetch.php! Error msg: ' . |
| 95 | + 'cache error on RSSFetch.php! Error msg: ' . |
92 | 96 | $cache->ERROR |
93 | 97 | ); |
94 | 98 | } |
— | — | @@ -99,7 +103,7 @@ |
100 | 104 | |
101 | 105 | // store parsed XML by desired output encoding |
102 | 106 | // as character munging happens at parse time |
103 | | - $cache_key = $url . $wgMagpieRSSOutputEncoding; |
| 107 | + $cache_key = $url . $wgRSSOutputEncoding; |
104 | 108 | |
105 | 109 | if ( !$cache->ERROR ) { |
106 | 110 | // return cache HIT, MISS, or STALE |
— | — | @@ -112,7 +116,7 @@ |
113 | 117 | if ( isset( $rss ) && $rss ) { |
114 | 118 | // should be cache age |
115 | 119 | $rss->from_cache = 1; |
116 | | - wfDebugLog( 'RSS', 'MagpieRSS: Cache HIT' ); |
| 120 | + wfDebugLog( 'RSS', 'Cache HIT' ); |
117 | 121 | return $rss; |
118 | 122 | } |
119 | 123 | } |
— | — | @@ -128,16 +132,17 @@ |
129 | 133 | } |
130 | 134 | } |
131 | 135 | |
| 136 | + var_dump($request_headers); |
132 | 137 | $resp = _fetch_remote_file( $url, $request_headers ); |
133 | 138 | |
134 | 139 | if ( isset( $resp ) && $resp ) { |
135 | | - if ( $resp->status == '304' ) { |
| 140 | + if ( $resp->getStatus() === 304 ) { |
136 | 141 | // we have the most current copy |
137 | 142 | wfDebugLog( 'RSS', "Got 304 for $url" ); |
138 | 143 | // reset cache on 304 (at minutillo insistent prodding) |
139 | 144 | $cache->set( $cache_key, $rss ); |
140 | 145 | return $rss; |
141 | | - } elseif ( $resp->status >= 200 && $resp->status < 300 ) { |
| 146 | + } elseif ( $resp->getStatus() >= 200 && $resp->getStatus() < 300 ) { |
142 | 147 | $rss = _response_to_rss( $resp ); |
143 | 148 | if ( $rss ) { |
144 | 149 | wfDebugLog( 'RSS', 'Fetch successful' ); |
— | — | @@ -147,12 +152,10 @@ |
148 | 153 | } |
149 | 154 | } else { |
150 | 155 | $errormsg = "Failed to fetch $url "; |
151 | | - if ( $resp->status == '-100' ) { |
152 | | - global $wgMagpieRSSFetchTimeout; |
153 | | - $errormsg .= '(Request timed out after ' . $wgMagpieRSSFetchTimeout . ' seconds)'; |
| 156 | + if ( $resp->getStatus() === -100 ) { |
| 157 | + global $wgRSSFetchTimeout; |
| 158 | + $errormsg .= '(Request timed out after ' . $wgRSSFetchTimeout . ' seconds)'; |
154 | 159 | } elseif ( $resp->error ) { |
155 | | - // compensate for Snoopy's annoying habbit to tacking |
156 | | - // on '\n' |
157 | 160 | $http_error = substr( $resp->error, 0, -2 ); |
158 | 161 | $errormsg .= "(HTTP Error: $http_error)"; |
159 | 162 | } else { |
— | — | @@ -172,38 +175,47 @@ |
173 | 176 | } |
174 | 177 | |
175 | 178 | // else we totally failed |
176 | | - $MAGPIE_ERROR = $errormsg; |
| 179 | + $RSS_FETCH_ERROR = $errormsg; |
177 | 180 | wfDebugLog( |
178 | | - 'MagpieRSS (RSSFetch): we totally failed :-( Error message:' . |
| 181 | + 'RSSFetch: we totally failed :-( Error message:' . |
179 | 182 | $errormsg |
180 | 183 | ); |
181 | 184 | |
182 | 185 | return false; |
183 | | - } // end if ( !$wgMagpieRSSCache ) { |
| 186 | + } // end if ( !$wgRSSCache ) { |
184 | 187 | } // end fetch_rss() |
185 | 188 | |
186 | 189 | /** |
187 | 190 | * Retrieve an arbitrary remote file. |
188 | 191 | * @param $url String: URL of the remote file |
189 | 192 | * @param $headers Array: headers to send along with the request |
190 | | - * @return an HTTP response object (see Snoopy.class.php) |
| 193 | + * @return an HTTP response object |
191 | 194 | */ |
192 | 195 | function _fetch_remote_file( $url, $headers = '' ) { |
193 | | - global $wgMagpieRSSFetchTimeout, $wgMagpieRSSUseGzip; |
194 | | - // Snoopy is an HTTP client in PHP |
195 | | - if ( !class_exists( 'Snoopy', false ) ) { |
196 | | - require_once( dirname( __FILE__ ) . '/Snoopy.class.php' ); |
| 196 | + global $wgRSSFetchTimeout, $wgRSSUseGzip; |
| 197 | + |
| 198 | + $client = |
| 199 | + HttpRequest::factory($url, array('timeout' => $wgRSSFetchTimeout)); |
| 200 | + $client->setUserAgent('MediawikiRSS/0.01 (+http://www.mediawiki.org/wiki/Extension:RSS) / MediaWiki RSS extension'); |
| 201 | + /* $client->use_gzip = $wgRSSUseGzip; */ |
| 202 | + if ( is_array( $headers ) && count( $headers ) > 0 ) { |
| 203 | + foreach($headers as $h) { |
| 204 | + if( count( $h ) > 1 ) { |
| 205 | + $client->setHeader($h[0], $h[1]); |
| 206 | + } else { |
| 207 | + var_dump($h); |
| 208 | + } |
| 209 | + } |
197 | 210 | } |
198 | | - $client = new Snoopy(); |
199 | | - $client->agent = 'MagpieRSS/0.72 (+http://magpierss.sourceforge.net) / MediaWiki RSS extension'; |
200 | | - $client->read_timeout = $wgMagpieRSSFetchTimeout; |
201 | | - $client->use_gzip = $wgMagpieRSSUseGzip; |
202 | | - if ( is_array( $headers ) ) { |
203 | | - $client->rawheaders = $headers; |
204 | | - } |
205 | 211 | |
206 | | - @$client->fetch( $url ); |
207 | | - return $client; |
| 212 | + $fetch = $client->execute(); |
| 213 | + |
| 214 | + /* @$client->fetch( $url ); */ |
| 215 | + if( $fetch->isGood() ) { |
| 216 | + return $client; |
| 217 | + } else { |
| 218 | + wfDebugLog( 'RSS', 'error fetching $url: ' . $fetch->getMessage() ); |
| 219 | + } |
208 | 220 | } |
209 | 221 | |
210 | 222 | /** |
— | — | @@ -212,45 +224,24 @@ |
213 | 225 | * @return parsed RSS object (see RSSParse) or false |
214 | 226 | */ |
215 | 227 | function _response_to_rss( $resp ) { |
216 | | - global $wgMagpieRSSOutputEncoding, $wgMagpieRSSInputEncoding, $wgMagpieRSSDetectEncoding; |
217 | | - $rss = new MagpieRSS( |
218 | | - $resp->results, |
219 | | - $wgMagpieRSSOutputEncoding, |
220 | | - $wgMagpieRSSInputEncoding, |
221 | | - $wgMagpieRSSDetectEncoding |
222 | | - ); |
| 228 | + global $wgRSSOutputEncoding, $wgRSSInputEncoding, $wgRSSDetectEncoding; |
| 229 | + $rss = new RSSData($resp); |
223 | 230 | |
224 | 231 | // if RSS parsed successfully |
225 | 232 | if ( $rss && !$rss->ERROR ) { |
226 | 233 | // find Etag and Last-Modified |
227 | | - foreach( $resp->headers as $h ) { |
228 | | - // 2003-03-02 - Nicola Asuni (www.tecnick.com) - fixed bug "Undefined offset: 1" |
229 | | - if ( strpos( $h, ': ' ) ) { |
230 | | - list( $field, $val ) = explode( ': ', $h, 2 ); |
231 | | - } else { |
232 | | - $field = $h; |
233 | | - $val = ''; |
234 | | - } |
235 | 234 | |
236 | | - if ( $field == 'ETag' ) { |
237 | | - $rss->etag = $val; |
238 | | - } |
239 | | - |
240 | | - if ( $field == 'Last-Modified' ) { |
241 | | - $rss->last_modified = $val; |
242 | | - } |
243 | | - } |
244 | | - |
245 | 235 | return $rss; |
246 | 236 | } else { // else construct error message |
247 | | - $errormsg = 'Failed to parse RSS file.'; |
| 237 | + $errormsg = 'Failed to parsex RSS file.'; |
248 | 238 | |
249 | 239 | if ( $rss ) { |
250 | 240 | $errormsg .= ' (' . $rss->ERROR . ')'; |
251 | 241 | } |
252 | | - $MAGPIE_ERROR = $errormsg; |
| 242 | + $RSS_FETCH_ERROR = $errormsg; |
253 | 243 | wfDebugLog( 'RSS', 'error!' . $errormsg ); |
254 | 244 | |
255 | 245 | return false; |
256 | 246 | } // end if ( $rss && !$rss->ERROR ) |
257 | | -} |
\ No newline at end of file |
| 247 | +} |
| 248 | + |
Index: trunk/extensions/RSS/RSS.i18n.php |
— | — | @@ -15,8 +15,14 @@ |
16 | 16 | 'rss-desc' => 'Displays an RSS feed on a wiki page', |
17 | 17 | 'rss-error' => 'Failed to load RSS feed from $1: $2', |
18 | 18 | 'rss-empty' => 'Failed to load RSS feed from $1!', |
| 19 | + 'rss-item' => '{{RSSPost | title = {{{title}}} | link = {{{link}}} | date = {{{date}}} | author = {{{author}}} }}', |
19 | 20 | ); |
20 | 21 | |
| 22 | +$messages['qqq'] = array( |
| 23 | + 'rss-item' => 'Do not translate', |
| 24 | +); |
| 25 | + |
| 26 | + |
21 | 27 | /** Afrikaans (Afrikaans) |
22 | 28 | * @author Naudefj |
23 | 29 | */ |
Index: trunk/extensions/RSS/RSS.php |
— | — | @@ -42,7 +42,7 @@ |
43 | 43 | // Internationalization file and autoloadable classes |
44 | 44 | $dir = dirname( __FILE__ ) . '/'; |
45 | 45 | $wgExtensionMessagesFiles['RSS'] = $dir . 'RSS.i18n.php'; |
46 | | -$wgAutoloadClasses['MagpieRSS'] = $dir . 'RSSParse.php'; |
| 46 | +$wgAutoloadClasses['RSSData'] = $dir . 'RSSData.php'; |
47 | 47 | $wgAutoloadClasses['RSSCache'] = $dir . 'RSSCache.php'; |
48 | 48 | |
49 | 49 | $wgHooks['ParserFirstCallInit'][] = 'wfRssExtension'; |
— | — | @@ -55,51 +55,32 @@ |
56 | 56 | } |
57 | 57 | |
58 | 58 | # Parser hook callback function |
59 | | -function renderRss( $input ) { |
60 | | - global $wgOutputEncoding, $wgParser; |
| 59 | +function renderRss( $input, $args, $parser, $frame ) { |
| 60 | + global $wgOutputEncoding; |
61 | 61 | |
62 | 62 | // Kill parser cache |
63 | | - $wgParser->disableCache(); |
| 63 | + $parser->disableCache(); |
64 | 64 | |
65 | 65 | if ( !$input ) { |
66 | 66 | return ''; # if <rss>-section is empty, return nothing |
67 | 67 | } |
68 | 68 | |
69 | 69 | # Parse fields in rss section |
70 | | - $fields = explode( '|', $input ); |
71 | | - wfSuppressWarnings(); |
72 | | - $url = $fields[0]; |
73 | | - wfRestoreWarnings(); |
| 70 | + $url = $input; |
74 | 71 | |
75 | | - $args = array(); |
76 | | - for ( $i = 1; $i < sizeof( $fields ); $i++ ) { |
77 | | - $f = $fields[$i]; |
78 | | - |
79 | | - if ( strpos( $f, '=' ) === false ) { |
80 | | - $args[strtolower( trim( $f ) )] = false; |
81 | | - } else { |
82 | | - list( $k, $v ) = explode( '=', $f, 2 ); |
83 | | - if ( trim( $v ) == false ) { |
84 | | - $args[strtolower( trim( $k ) )] = false; |
85 | | - } else { |
86 | | - $args[strtolower( trim( $k ) )] = trim( $v ); |
87 | | - } |
88 | | - } |
89 | | - } |
90 | | - |
91 | 72 | # Get charset from argument array |
92 | | - wfSuppressWarnings(); |
93 | | - $charset = $args['charset']; |
94 | | - wfRestoreWarnings(); |
95 | | - if( !$charset ) { |
| 73 | + if( isset( $args['charset'] ) ) { |
| 74 | + $charset = $args['charset']; |
| 75 | + } else { |
96 | 76 | $charset = $wgOutputEncoding; |
97 | 77 | } |
98 | 78 | |
99 | 79 | # Get max number of headlines from argument-array |
100 | | - wfSuppressWarnings(); |
101 | | - $maxheads = $args['max']; |
102 | | - wfRestoreWarnings(); |
103 | | - $headcnt = 0; |
| 80 | + if( isset( $args['max'] ) ) { |
| 81 | + $maxheads = $args['max']; |
| 82 | + } else { |
| 83 | + $maxheads = 32; |
| 84 | + } |
104 | 85 | |
105 | 86 | # Get short flag from argument array |
106 | 87 | # If short is set, no description text is printed |
— | — | @@ -111,52 +92,57 @@ |
112 | 93 | |
113 | 94 | # Get reverse flag from argument array |
114 | 95 | if( isset( $args['reverse'] ) ) { |
115 | | - $reverse = true; |
116 | | - } else { |
117 | | - $reverse = false; |
| 96 | + $rss->items = array_reverse( $rss->items ); |
118 | 97 | } |
119 | 98 | |
120 | 99 | # Get date format from argument array |
121 | 100 | if ( isset( $args['date'] ) ) { |
122 | | - wfSuppressWarnings(); |
123 | 101 | $date = $args['date']; |
124 | | - wfRestoreWarnings(); |
125 | | - if ( $date == '' ) { |
126 | | - $date = 'd M Y H:i'; |
127 | | - } |
128 | 102 | } else { |
129 | | - $date = false; |
| 103 | + $date = 'd M Y H:i'; |
130 | 104 | } |
131 | 105 | |
132 | 106 | # Get highlight terms from argument array |
133 | | - wfSuppressWarnings(); |
134 | | - $rssHighlight = $args['highlight']; |
135 | | - wfRestoreWarnings(); |
136 | | - $rssHighlight = str_replace( ' ', ' ', $rssHighlight ); |
137 | | - $rssHighlight = explode( ' ', trim( $rssHighlight ) ); |
| 107 | + if( isset( $args['highlight'] ) ) { |
| 108 | + $rssHighlight = $args['highlight']; |
| 109 | + $rssHighlight = str_replace( ' ', ' ', $rssHighlight ); |
| 110 | + $rssHighlight = explode( ' ', trim( $rssHighlight ) ); |
| 111 | + } else { |
| 112 | + $rssHighlight = false; |
| 113 | + } |
138 | 114 | |
139 | 115 | # Get filter terms from argument array |
140 | | - wfSuppressWarnings(); |
141 | | - $rssFilter = $args['filter']; |
142 | | - wfRestoreWarnings(); |
143 | | - $rssFilter = str_replace( ' ', ' ', $rssFilter ); |
144 | | - $rssFilter = explode( ' ', trim( $rssFilter ) ); |
| 116 | + if( isset( $args['filter'] ) ) { |
| 117 | + $rssFilter = $args['filter']; |
| 118 | + $rssFilter = str_replace( ' ', ' ', $rssFilter ); |
| 119 | + $rssFilter = explode( ' ', trim( $rssFilter ) ); |
| 120 | + } else { |
| 121 | + $rssFilter = false; |
| 122 | + } |
145 | 123 | |
146 | 124 | # Filterout terms |
147 | | - wfSuppressWarnings(); |
148 | | - $rssFilterout = $args['filterout']; |
149 | | - wfRestoreWarnings(); |
150 | | - $rssFilterout = str_replace( ' ', ' ', $rssFilterout ); |
151 | | - $rssFilterout = explode( ' ', trim( $rssFilterout ) ); |
| 125 | + if( isset( $args['filterout'] ) ) { |
| 126 | + $rssFilterout = $args['filterout']; |
| 127 | + $rssFilterout = str_replace( ' ', ' ', $rssFilterout ); |
| 128 | + $rssFilterout = explode( ' ', trim( $rssFilterout ) ); |
| 129 | + } else { |
| 130 | + $rssFilterout = false; |
| 131 | + } |
152 | 132 | |
| 133 | + if( isset( $args['template'] ) ) { |
| 134 | + $template = 'Template:'.$args['template']; |
| 135 | + } else { |
| 136 | + $template = wfMsgNoTrans("rss-item"); |
| 137 | + } |
| 138 | + |
| 139 | + $headcnt = 0; |
| 140 | + |
153 | 141 | # Fetch RSS. May be cached locally. |
154 | 142 | # Refer to the documentation of MagpieRSS for details. |
155 | 143 | if ( !function_exists( 'fetch_rss' ) ) { |
156 | 144 | include( dirname( __FILE__ ) . '/RSSFetch.php' ); // provides fetch_rss() function |
157 | 145 | } |
158 | | - wfSuppressWarnings(); |
159 | 146 | $rss = fetch_rss( $url ); |
160 | | - wfRestoreWarnings(); |
161 | 147 | |
162 | 148 | # Check for errors. |
163 | 149 | if ( empty( $rss ) ) { |
— | — | @@ -171,110 +157,50 @@ |
172 | 158 | return '<div>' . wfMsg( 'rss-empty', $url ) . '</div>'; |
173 | 159 | } |
174 | 160 | |
175 | | - # Build title line |
176 | | - #$title = iconv( $charset, $wgOutputEncoding, $rss->channel['title'] ); |
177 | | - #if( $rss->channel['link'] ) $title = "<a href='" . $rss->channel['link'] . "'>$title</a>"; |
178 | | - |
179 | 161 | $output = ''; |
180 | | - if( $reverse ) { |
181 | | - $rss->items = array_reverse( $rss->items ); |
182 | | - } |
183 | | - $description = false; |
184 | | - foreach ( $rss->items as $item ) { |
185 | | - if ( isset( $item['description'] ) && $item['description'] ) { |
186 | | - $description = true; |
187 | | - break; |
188 | | - } |
189 | | - } |
190 | 162 | |
191 | | - # Build items |
192 | | - if ( !$short && $description ) { # full item list |
193 | | - $output .= '<dl>'; |
| 163 | + /* This would be better served by preg_replace_callback, but |
| 164 | + * I can't create a callback that carries $item in PHP < 5.3 |
| 165 | + */ |
| 166 | + if ( $template ) { |
| 167 | + $headcnt = 0; |
| 168 | + foreach( $rss->items as $item ) { |
| 169 | + if($maxheads > 0 && $headcnt >= $maxheads) continue; |
194 | 170 | |
195 | | - foreach ( $rss->items as $item ) { |
196 | | - $d_text = true; |
197 | | - $d_title = true; |
| 171 | + $decision = true; |
| 172 | + foreach(array('title', 'author', 'description', 'category') as $check) { |
| 173 | + if( isset( $item[$check] ) ) { |
| 174 | + $decision &= wfRssFilter($item[$check], $rssFilter) & wfRssFilterout($item[$check], $rssFilterout); |
| 175 | + if( !$decision ) continue 2; |
198 | 176 | |
199 | | - $href = htmlspecialchars( trim( iconv( $charset, $wgOutputEncoding, $item['link'] ) ) ); |
200 | | - $title = htmlspecialchars( trim( iconv( $charset, $wgOutputEncoding, $item['title'] ) ) ); |
| 177 | + $item[$check] = wfRssHighlight( $item[$check], $rssHighlight ); |
| 178 | + } |
201 | 179 | |
202 | | - if ( $date ) { |
203 | | - $pubdate = trim( iconv( $charset, $wgOutputEncoding, $item['pubdate'] ) ); |
204 | | - $pubdate = date( $date, strtotime( $pubdate ) ); |
205 | 180 | } |
206 | 181 | |
207 | | - $d_title = wfRssFilter( $title, $rssFilter ); |
208 | | - $d_title = wfRssFilterout( $title, $rssFilterout ); |
209 | | - $title = wfRssHighlight( $title, $rssHighlight ); |
| 182 | + $rssTemp = ""; |
210 | 183 | |
211 | | - # Build description text if desired |
212 | | - if ( $item['description'] ) { |
213 | | - $text = trim( iconv( $charset, $wgOutputEncoding, $item['description'] ) ); |
214 | | - # Avoid <pre> tags |
215 | | - $text = str_replace( "\r", ' ', $text ); |
216 | | - $text = str_replace( "\n", ' ', $text ); |
217 | | - $text = str_replace( "\t", ' ', $text ); |
218 | | - $text = str_replace( '<br>', '', $text ); |
| 184 | + foreach(explode("|", $template) as $bit) { |
| 185 | + $bits = explode("=", $bit); |
| 186 | + if( count($bits) == 2 ) { |
| 187 | + $left = trim($bits[0]); |
219 | 188 | |
220 | | - $d_text = wfRssFilter( $text, $rssFilter ); |
221 | | - $d_text = wfRssFilterout( $text, $rssFilterout ); |
222 | | - $text = wfRssHighlight( $text, $rssHighlight ); |
223 | | - $display = $d_text || $d_title; |
224 | | - } else { |
225 | | - $text = ''; |
226 | | - $display = $d_title; |
227 | | - } |
228 | | - if ( $display ) { |
229 | | - $output.= "<dt><a href='$href'><b>$title</b></a></dt>"; |
230 | | - if ( $date ) { |
231 | | - $output .= " ($pubdate)"; |
232 | | - } |
233 | | - if ( $text ) { |
234 | | - $output .= "<dd>$text <b>[<a href='$href'>?</a>]</b></dd>"; |
235 | | - } |
236 | | - } |
237 | | - # Cut off output when maxheads is reached: |
238 | | - if ( ++$headcnt == $maxheads ) { |
239 | | - break; |
240 | | - } |
241 | | - } |
| 189 | + if( isset( $item[$left] ) ) { |
| 190 | + $right = $item[$left]; |
| 191 | + } |
242 | 192 | |
243 | | - $output .= '</dl>'; |
244 | | - } else { # short item list |
245 | | - ## HACKY HACKY HACKY |
246 | | - $output .= '<ul>'; |
247 | | - $displayed = array(); |
248 | | - foreach ( $rss->items as $item ) { |
249 | | - $href = htmlspecialchars( trim( iconv( $charset, $wgOutputEncoding, $item['link'] ) ) ); |
250 | | - $title = htmlspecialchars( trim( iconv( $charset, $wgOutputEncoding, $item['title'] ) ) ); |
251 | | - $d_title = wfRssFilter( $title, $rssFilter ) && wfRssFilterout( $title, $rssFilterout ); |
252 | | - $title = wfRssHighlight( $title, $rssHighlight ); |
253 | | - if ( $date ) { |
254 | | - $pubdate = isset( $item['pubdate'] ) ? trim( iconv( $charset, $wgOutputEncoding, $item['pubdate'] ) ) : ''; |
255 | | - if ( $pubdate == '' ) { |
256 | | - $pubdate = trim( iconv( $charset, $wgOutputEncoding, $item['dc']['date'] ) ); |
| 193 | + $rssTemp .= implode( " = ", array($left, $right) ); |
| 194 | + } else { |
| 195 | + $rssTemp .= $bit; |
257 | 196 | } |
258 | | - $pubdate = date( $date, strtotime( $pubdate ) ); |
| 197 | + $rssTemp .= "|"; |
259 | 198 | } |
| 199 | + $rssTemp .= "}}"; |
260 | 200 | |
261 | | - if ( $d_title && !in_array( $title, $displayed ) ) { |
262 | | - // Add date to ouput if specified |
263 | | - $output .= '<li><a href="' . $href . '" title="' . $title . '">' . $title . '</a>'; |
264 | | - if( $date ) { |
265 | | - $output .= " ($pubdate)"; |
266 | | - } |
267 | | - $output .= '</li>'; |
268 | | - |
269 | | - $displayed[] = $title; |
270 | | - # Cut off output when maxheads is reached: |
271 | | - if ( ++$headcnt == $maxheads ) { |
272 | | - break; |
273 | | - } |
274 | | - } |
| 201 | + $output .= $parser->recursiveTagParse( $rssTemp, $frame ); |
| 202 | + $headcnt++; |
275 | 203 | } |
276 | | - $output.= '</ul>'; |
277 | 204 | } |
278 | | - |
279 | 205 | return $output; |
280 | 206 | } |
281 | 207 | |
— | — | @@ -343,4 +269,4 @@ |
344 | 270 | } |
345 | 271 | |
346 | 272 | return $text; |
347 | | -} |
\ No newline at end of file |
| 273 | +} |