Index: trunk/extensions/RSS/magpierss/rss_utils.inc |
— | — | @@ -0,0 +1,67 @@ |
| 2 | +<?php |
| 3 | +/* |
| 4 | + * Project: MagpieRSS: a simple RSS integration tool |
| 5 | + * File: rss_utils.inc, utility methods for working with RSS |
| 6 | + * Author: Kellan Elliott-McCrea <kellan@protest.net> |
| 7 | + * Version: 0.51 |
| 8 | + * License: GPL |
| 9 | + * |
| 10 | + * The lastest version of MagpieRSS can be obtained from: |
| 11 | + * http://magpierss.sourceforge.net |
| 12 | + * |
| 13 | + * For questions, help, comments, discussion, etc., please join the |
| 14 | + * Magpie mailing list: |
| 15 | + * magpierss-general@lists.sourceforge.net |
| 16 | + */ |
| 17 | + |
| 18 | + |
| 19 | +/*======================================================================*\ |
| 20 | + Function: parse_w3cdtf |
| 21 | + Purpose: parse a W3CDTF date into unix epoch |
| 22 | + |
| 23 | + NOTE: http://www.w3.org/TR/NOTE-datetime |
| 24 | +\*======================================================================*/ |
| 25 | + |
| 26 | +function parse_w3cdtf ( $date_str ) { |
| 27 | + |
| 28 | + # regex to match wc3dtf |
| 29 | + $pat = "/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/"; |
| 30 | + |
| 31 | + if ( preg_match( $pat, $date_str, $match ) ) { |
| 32 | + list( $year, $month, $day, $hours, $minutes, $seconds) = |
| 33 | + array( $match[1], $match[2], $match[3], $match[4], $match[5], $match[6]); |
| 34 | + |
| 35 | + # calc epoch for current date assuming GMT |
| 36 | + $epoch = gmmktime( $hours, $minutes, $seconds, $month, $day, $year); |
| 37 | + |
| 38 | + $offset = 0; |
| 39 | + if ( $match[10] == 'Z' ) { |
| 40 | + # zulu time, aka GMT |
| 41 | + } |
| 42 | + else { |
| 43 | + list( $tz_mod, $tz_hour, $tz_min ) = |
| 44 | + array( $match[8], $match[9], $match[10]); |
| 45 | + |
| 46 | + # zero out the variables |
| 47 | + if ( ! $tz_hour ) { $tz_hour = 0; } |
| 48 | + if ( ! $tz_min ) { $tz_min = 0; } |
| 49 | + |
| 50 | + $offset_secs = (($tz_hour*60)+$tz_min)*60; |
| 51 | + |
| 52 | + # is timezone ahead of GMT? then subtract offset |
| 53 | + # |
| 54 | + if ( $tz_mod == '+' ) { |
| 55 | + $offset_secs = $offset_secs * -1; |
| 56 | + } |
| 57 | + |
| 58 | + $offset = $offset_secs; |
| 59 | + } |
| 60 | + $epoch = $epoch + $offset; |
| 61 | + return $epoch; |
| 62 | + } |
| 63 | + else { |
| 64 | + return -1; |
| 65 | + } |
| 66 | +} |
| 67 | + |
| 68 | +?> |
Property changes on: trunk/extensions/RSS/magpierss/rss_utils.inc |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 69 | + native |
Index: trunk/extensions/RSS/magpierss/rss_cache.inc |
— | — | @@ -0,0 +1,200 @@ |
| 2 | +<?php |
| 3 | +/* |
| 4 | + * Project: MagpieRSS: a simple RSS integration tool |
| 5 | + * File: rss_cache.inc, a simple, rolling(no GC), cache |
| 6 | + * for RSS objects, keyed on URL. |
| 7 | + * Author: Kellan Elliott-McCrea <kellan@protest.net> |
| 8 | + * Version: 0.51 |
| 9 | + * License: GPL |
| 10 | + * |
| 11 | + * The lastest version of MagpieRSS can be obtained from: |
| 12 | + * http://magpierss.sourceforge.net |
| 13 | + * |
| 14 | + * For questions, help, comments, discussion, etc., please join the |
| 15 | + * Magpie mailing list: |
| 16 | + * http://lists.sourceforge.net/lists/listinfo/magpierss-general |
| 17 | + * |
| 18 | + */ |
| 19 | + |
| 20 | +class RSSCache { |
| 21 | + var $BASE_CACHE = './cache'; // where the cache files are stored |
| 22 | + var $MAX_AGE = 3600; // when are files stale, default one hour |
| 23 | + var $ERROR = ""; // accumulate error messages |
| 24 | + |
| 25 | + function RSSCache ($base='', $age='') { |
| 26 | + if ( $base ) { |
| 27 | + $this->BASE_CACHE = $base; |
| 28 | + } |
| 29 | + if ( $age ) { |
| 30 | + $this->MAX_AGE = $age; |
| 31 | + } |
| 32 | + |
| 33 | + // attempt to make the cache directory |
| 34 | + if ( ! file_exists( $this->BASE_CACHE ) ) { |
| 35 | + $status = @mkdir( $this->BASE_CACHE, 0755 ); |
| 36 | + |
| 37 | + // if make failed |
| 38 | + if ( ! $status ) { |
| 39 | + $this->error( |
| 40 | + "Cache couldn't make dir '" . $this->BASE_CACHE . "'." |
| 41 | + ); |
| 42 | + } |
| 43 | + } |
| 44 | + } |
| 45 | + |
| 46 | +/*=======================================================================*\ |
| 47 | + Function: set |
| 48 | + Purpose: add an item to the cache, keyed on url |
| 49 | + Input: url from wich the rss file was fetched |
| 50 | + Output: true on sucess |
| 51 | +\*=======================================================================*/ |
| 52 | + function set ($url, $rss) { |
| 53 | + $this->ERROR = ""; |
| 54 | + $cache_file = $this->file_name( $url ); |
| 55 | + $fp = @fopen( $cache_file, 'w' ); |
| 56 | + |
| 57 | + if ( ! $fp ) { |
| 58 | + $this->error( |
| 59 | + "Cache unable to open file for writing: $cache_file" |
| 60 | + ); |
| 61 | + return 0; |
| 62 | + } |
| 63 | + |
| 64 | + |
| 65 | + $data = $this->serialize( $rss ); |
| 66 | + fwrite( $fp, $data ); |
| 67 | + fclose( $fp ); |
| 68 | + |
| 69 | + return $cache_file; |
| 70 | + } |
| 71 | + |
| 72 | +/*=======================================================================*\ |
| 73 | + Function: get |
| 74 | + Purpose: fetch an item from the cache |
| 75 | + Input: url from wich the rss file was fetched |
| 76 | + Output: cached object on HIT, false on MISS |
| 77 | +\*=======================================================================*/ |
| 78 | + function get ($url) { |
| 79 | + $this->ERROR = ""; |
| 80 | + $cache_file = $this->file_name( $url ); |
| 81 | + |
| 82 | + if ( ! file_exists( $cache_file ) ) { |
| 83 | + $this->debug( |
| 84 | + "Cache doesn't contain: $url (cache file: $cache_file)" |
| 85 | + ); |
| 86 | + return 0; |
| 87 | + } |
| 88 | + |
| 89 | + $fp = @fopen($cache_file, 'r'); |
| 90 | + if ( ! $fp ) { |
| 91 | + $this->error( |
| 92 | + "Failed to open cache file for reading: $cache_file" |
| 93 | + ); |
| 94 | + return 0; |
| 95 | + } |
| 96 | + |
| 97 | + if ($filesize = filesize($cache_file) ) { |
| 98 | + $data = fread( $fp, filesize($cache_file) ); |
| 99 | + $rss = $this->unserialize( $data ); |
| 100 | + |
| 101 | + return $rss; |
| 102 | + } |
| 103 | + |
| 104 | + return 0; |
| 105 | + } |
| 106 | + |
| 107 | +/*=======================================================================*\ |
| 108 | + Function: check_cache |
| 109 | + Purpose: check a url for membership in the cache |
| 110 | + and whether the object is older then MAX_AGE (ie. STALE) |
| 111 | + Input: url from wich the rss file was fetched |
| 112 | + Output: cached object on HIT, false on MISS |
| 113 | +\*=======================================================================*/ |
| 114 | + function check_cache ( $url ) { |
| 115 | + $this->ERROR = ""; |
| 116 | + $filename = $this->file_name( $url ); |
| 117 | + |
| 118 | + if ( file_exists( $filename ) ) { |
| 119 | + // find how long ago the file was added to the cache |
| 120 | + // and whether that is longer then MAX_AGE |
| 121 | + $mtime = filemtime( $filename ); |
| 122 | + $age = time() - $mtime; |
| 123 | + if ( $this->MAX_AGE > $age ) { |
| 124 | + // object exists and is current |
| 125 | + return 'HIT'; |
| 126 | + } |
| 127 | + else { |
| 128 | + // object exists but is old |
| 129 | + return 'STALE'; |
| 130 | + } |
| 131 | + } |
| 132 | + else { |
| 133 | + // object does not exist |
| 134 | + return 'MISS'; |
| 135 | + } |
| 136 | + } |
| 137 | + |
| 138 | + function cache_age( $cache_key ) { |
| 139 | + $filename = $this->file_name( $url ); |
| 140 | + if ( file_exists( $filename ) ) { |
| 141 | + $mtime = filemtime( $filename ); |
| 142 | + $age = time() - $mtime; |
| 143 | + return $age; |
| 144 | + } |
| 145 | + else { |
| 146 | + return -1; |
| 147 | + } |
| 148 | + } |
| 149 | + |
| 150 | +/*=======================================================================*\ |
| 151 | + Function: serialize |
| 152 | +\*=======================================================================*/ |
| 153 | + function serialize ( $rss ) { |
| 154 | + return serialize( $rss ); |
| 155 | + } |
| 156 | + |
| 157 | +/*=======================================================================*\ |
| 158 | + Function: unserialize |
| 159 | +\*=======================================================================*/ |
| 160 | + function unserialize ( $data ) { |
| 161 | + return unserialize( $data ); |
| 162 | + } |
| 163 | + |
| 164 | +/*=======================================================================*\ |
| 165 | + Function: file_name |
| 166 | + Purpose: map url to location in cache |
| 167 | + Input: url from wich the rss file was fetched |
| 168 | + Output: a file name |
| 169 | +\*=======================================================================*/ |
| 170 | + function file_name ($url) { |
| 171 | + $filename = md5( $url ); |
| 172 | + return join( DIRECTORY_SEPARATOR, array( $this->BASE_CACHE, $filename ) ); |
| 173 | + } |
| 174 | + |
| 175 | +/*=======================================================================*\ |
| 176 | + Function: error |
| 177 | + Purpose: register error |
| 178 | +\*=======================================================================*/ |
| 179 | + function error ($errormsg, $lvl=E_USER_WARNING) { |
| 180 | + // append PHP's error message if track_errors enabled |
| 181 | + if ( isset($php_errormsg) ) { |
| 182 | + $errormsg .= " ($php_errormsg)"; |
| 183 | + } |
| 184 | + $this->ERROR = $errormsg; |
| 185 | + if ( MAGPIE_DEBUG ) { |
| 186 | + trigger_error( $errormsg, $lvl); |
| 187 | + } |
| 188 | + else { |
| 189 | + error_log( $errormsg, 0); |
| 190 | + } |
| 191 | + } |
| 192 | + |
| 193 | + function debug ($debugmsg, $lvl=E_USER_NOTICE) { |
| 194 | + if ( MAGPIE_DEBUG ) { |
| 195 | + $this->error("MagpieRSS [debug] $debugmsg", $lvl); |
| 196 | + } |
| 197 | + } |
| 198 | + |
| 199 | +} |
| 200 | + |
| 201 | +?> |
Property changes on: trunk/extensions/RSS/magpierss/rss_cache.inc |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 202 | + native |
Index: trunk/extensions/RSS/magpierss/htdocs/cookbook.html |
— | — | @@ -0,0 +1,237 @@ |
| 2 | +<html> |
| 3 | + <head> |
| 4 | + <title>Magie RSS Recipes: Simple PHP RSS How To</title> |
| 5 | + <style> |
| 6 | + body { |
| 7 | + font-family:trebuchet MS, trebuchet, verdana, arial, sans-serif; |
| 8 | + font-size: 11px; |
| 9 | + |
| 10 | + } |
| 11 | + |
| 12 | + pre { font-family: "Courier New", monospace; |
| 13 | + padding: 1em; |
| 14 | + margin: 0.2em 2.5em 0.2em 3em; |
| 15 | + background-color: #efeff5; |
| 16 | + border: 1px solid #cfcfcf; |
| 17 | + white-space: pre; |
| 18 | + } |
| 19 | + |
| 20 | + </style> |
| 21 | + </head> |
| 22 | + <body> |
| 23 | +<p> |
| 24 | +<h1>MagpieRSS Recipes: Cooking with Corbies</h1> |
| 25 | + |
| 26 | +<div align="center"><h3><em>"Four and twenty blackbirds baked in a |
| 27 | +pie."</em></h3></div> |
| 28 | +</p> |
| 29 | +<p> |
| 30 | +<ol> |
| 31 | +<li><a href="#limit">Limit the Number of Headlines(aka Items) Returned</a></li> |
| 32 | +<li><a href="#error_message">Display a Custom Error Message if Something Goes |
| 33 | +Wrong</a></li> |
| 34 | +<li><a href="#write_rss">Generate a New RSS Feed</a></li> |
| 35 | +<li><a href="#by_date">Display Headlines More Recent then X Date</a></li> |
| 36 | +<li><a href="#from_file">Parse a Local File Containing RSS</a></li> |
| 37 | + |
| 38 | +</ol> |
| 39 | +</p> |
| 40 | + |
| 41 | +<a name="limit"></a><h2>1. Limit the Number of Headlines(aka Items) Returned.</h2> |
| 42 | + |
| 43 | +<h3>Problem:</h3> |
| 44 | + |
| 45 | +You want to display the 10 (or 3 or whatever) most recent headlines, but the RSS feed |
| 46 | +contains 15. |
| 47 | + |
| 48 | +<h3>Solution:</h3> |
| 49 | + |
| 50 | +<pre> |
| 51 | +$num_items = 10; |
| 52 | +$rss = fetch_rss($url); |
| 53 | + |
| 54 | +$items = array_slice($rss->items, 0, $num_items); |
| 55 | + |
| 56 | +foreach ( $items as $item ) { |
| 57 | +</pre> |
| 58 | +<h3>Discussion:</h3> |
| 59 | + |
| 60 | +Rather then trying to limit the number of items Magpie parses, a much simpler, |
| 61 | +and more flexible approach is to take a "slice" of the array of items. And |
| 62 | +array_slice() is smart enough to do the right thing if the feed has less items |
| 63 | +then $num_items. |
| 64 | + |
| 65 | +<h3>See:</h3> <a href="http://www.php.net/array_slice">http://www.php.net/array_slice</a> |
| 66 | +</p> |
| 67 | + |
| 68 | +<a name="error_message"></a><h2>2. Display a Custom Error Message if Something Goes Wrong</h2> |
| 69 | + |
| 70 | +<h3>Problem:</h3> |
| 71 | + |
| 72 | +You don't want Magpie's error messages showing up if something goes wrong. |
| 73 | + |
| 74 | +<h3>Solution:</h3> |
| 75 | +<pre> |
| 76 | +# Magpie throws USER_WARNINGS only |
| 77 | +# so you can cloak these, by only showing ERRORs |
| 78 | +error_reporting(E_ERROR); |
| 79 | + |
| 80 | +# check the return value of fetch_rss() |
| 81 | + |
| 82 | +$rss = fetch_rss($url); |
| 83 | + |
| 84 | +if ( $rss ) { |
| 85 | +...display rss feed... |
| 86 | +} |
| 87 | +else { |
| 88 | + echo "An error occured! " . |
| 89 | + "Consider donating more $$$ for restoration of services." . |
| 90 | + "<br>Error Message: " . magpie_error(); |
| 91 | +} |
| 92 | +</pre> |
| 93 | +<h3>Discussion:</h3> |
| 94 | + |
| 95 | +MagpieRSS triggers a warning in a number of circumstances. The 2 most common |
| 96 | +circumstances are: if the specified RSS file isn't properly formed (usually |
| 97 | +because it includes illegal HTML), or if Magpie can't download the remote RSS |
| 98 | +file, and there is no cached version. |
| 99 | + |
| 100 | +If you don't want your users to see these warnings change your error_reporting |
| 101 | +settings to only display ERRORs.<br /> |
| 102 | +Another option is to turn off display_error, |
| 103 | +so that WARNINGs, and NOTICEs still go to the error_log but not to the webpages. |
| 104 | + |
| 105 | +You can do this with: |
| 106 | + |
| 107 | +<pre> |
| 108 | +# you can also do this in your php.ini file |
| 109 | +ini_set('display_errors', 0); |
| 110 | +</pre> |
| 111 | + |
| 112 | +<h3>See:</h3> |
| 113 | +<a |
| 114 | +href="http://www.php.net/error_reporting">http://www.php.net/error_reporting</a>,<br |
| 115 | +/> |
| 116 | +<a href="http://www.php.net/ini_set">http://www.php.net/ini_set</a>, <br /> |
| 117 | +<a |
| 118 | +href="http://www.php.net/manual/en/ref.errorfunc.php">http://www.php.net/manual/en/ref.errorfunc.php</a><br |
| 119 | +/> |
| 120 | + |
| 121 | +<a name="write_rss"></a><h2>3. Generate a New RSS Feed</h2> |
| 122 | + |
| 123 | +<h3>Problem:</h3> |
| 124 | + |
| 125 | +Create an RSS feed for other people to use. |
| 126 | + |
| 127 | +<h3>Solution:</h3> |
| 128 | + |
| 129 | +Use Useful Inc's <a href="http://usefulinc.com/rss/rsswriter/">RSSWriter</a>. |
| 130 | + |
| 131 | +<h3>Discussion:</h3> |
| 132 | + |
| 133 | +An example of turning a Magpie parsed RSS object back into an RSS file is |
| 134 | +forthcoming. In the meantime RSSWriter is well documented. |
| 135 | + |
| 136 | +<a name="by_date"></a><h2>4. Display Headlines More Recent then X Date</h2> |
| 137 | + |
| 138 | +<h3>Problem:</h3> |
| 139 | + |
| 140 | +You only want to display headlines that were published on, or after a certain |
| 141 | +date. |
| 142 | + |
| 143 | + |
| 144 | +<h3>Solution:</h3> |
| 145 | +<pre> |
| 146 | +require_once('rss_utils.inc'); |
| 147 | + |
| 148 | +# get all headlines published today |
| 149 | +$today = getdate(); |
| 150 | + |
| 151 | +# today, 12AM |
| 152 | +$date = mktime(0,0,0,$today['mon'], $today['mday'], $today['year']); |
| 153 | + |
| 154 | +$rss = fetch_rss($url); |
| 155 | + |
| 156 | +foreach ( $rss->items as $item ) { |
| 157 | + $published = parse_w3cdtf($item['dc']['date']); |
| 158 | + if ( $published >= $date ) { |
| 159 | + echo "Title: " . $item['title']; |
| 160 | + echo "Published: " . date("h:i:s A", $published); |
| 161 | + echo "<p>"; |
| 162 | + } |
| 163 | +} |
| 164 | +</pre> |
| 165 | +<h3>Discussion:</h3> |
| 166 | + |
| 167 | +This recipe only works for RSS 1.0 feeds that include the <dc:date> field. |
| 168 | +(which is very good RSS style) <br /> |
| 169 | +<code>parse_w3cdtf()</code> is defined in |
| 170 | +<code>rss_utils.inc</code>, and parses RSS style dates into Unix epoch |
| 171 | +seconds. |
| 172 | + |
| 173 | +<h3>See: </h3> |
| 174 | +<a |
| 175 | +href="http://www.php.net/manual/en/ref.datetime.php">http://www.php.net/manual/en/ref.datetime.php</a> |
| 176 | + |
| 177 | +<a name="from_file"></a> |
| 178 | +<h2>5. Parse a Local File Containing RSS</h2> |
| 179 | +<h3>Problem:</h3> |
| 180 | +MagpieRSS provides <code>fetch_rss()</code> which takes a URL and returns a |
| 181 | +parsed RSS object, but what if you want to parse a file stored locally that |
| 182 | +doesn't have a URL? |
| 183 | + |
| 184 | +<h3>Solution</h3> |
| 185 | +<pre> |
| 186 | +require_once('rss_parse.inc'); |
| 187 | + |
| 188 | +$rss_file = 'some_rss_file.rdf'; |
| 189 | +$rss_string = read_file($rss_file); |
| 190 | +$rss = new MagpieRSS( $rss_string ); |
| 191 | + |
| 192 | +if ( $rss and !$rss->ERROR) { |
| 193 | +...display rss... |
| 194 | +} |
| 195 | +else { |
| 196 | + echo "Error: " . $rss->ERROR; |
| 197 | +} |
| 198 | + |
| 199 | +# efficiently read a file into a string |
| 200 | +# in php >= 4.3.0 you can simply use file_get_contents() |
| 201 | +# |
| 202 | +function read_file($filename) { |
| 203 | + $fh = fopen($filename, 'r') or die($php_errormsg); |
| 204 | + $rss_string = fread($fh, filesize($filename) ); |
| 205 | + fclose($fh); |
| 206 | + return $rss_string; |
| 207 | +} |
| 208 | +</pre> |
| 209 | + |
| 210 | +<h3>Discussion</h3> |
| 211 | +Here we are using MagpieRSS's RSS parser directly without the convience wrapper |
| 212 | +of <code>fetch_rss()</code>. We read the contents of the RSS file into a |
| 213 | +string, and pass it to the parser constructor. Notice also that error handling |
| 214 | +is subtly different. |
| 215 | + |
| 216 | +<h3>See: </h3> |
| 217 | +<a |
| 218 | +href="http://www.php.net/manual/en/ref.filesystem.php">http://www.php.net/manual/en/ref.filesystem.php</a>,<br |
| 219 | +/> |
| 220 | +<a |
| 221 | +href="http://www.php.net/manual/en/language.oop.php">http://www.php.net/manual/en/language.oop.php</a> |
| 222 | + |
| 223 | +<!-- |
| 224 | +<a name="link"></a><h2>#. Recipe</h2> |
| 225 | +<h3>Problem:</h3> |
| 226 | +Problem description |
| 227 | +<h3>Solution</h3> |
| 228 | +<pre> |
| 229 | +code |
| 230 | +</pre> |
| 231 | +<h3>Discussion/h3> |
| 232 | +Discuss code |
| 233 | +<h3>See: </h3> |
| 234 | +Documentation links: |
| 235 | +--> |
| 236 | + |
| 237 | +</body> |
| 238 | +</html> |
Index: trunk/extensions/RSS/magpierss/htdocs/index.html |
— | — | @@ -0,0 +1,419 @@ |
| 2 | +<html> |
| 3 | + <head> |
| 4 | + <title>Magpie RSS - PHP RSS Parser</title> |
| 5 | + <link rel="alternate" type="application/rss+xml" title="RSS" |
| 6 | + href="http://laughingmeme.org/magpierss.rdf" /> |
| 7 | + <style> |
| 8 | + body { |
| 9 | + font-family:trebuchet MS, trebuchet, verdana, arial, sans-serif; |
| 10 | + font-size: 11px; |
| 11 | + |
| 12 | + } |
| 13 | + |
| 14 | + pre { font-family: "Courier New", monospace; |
| 15 | + padding: 1em; |
| 16 | + margin: 0.2em 2.5em 0.2em 3em; |
| 17 | + background-color: #efeff5; |
| 18 | + border: 1px solid #cfcfcf; |
| 19 | + white-space: pre; |
| 20 | + } |
| 21 | + |
| 22 | + li.news { |
| 23 | + padding-bottom:15px; |
| 24 | + } |
| 25 | + |
| 26 | + a.nav { color: #FFFFFF; } |
| 27 | + |
| 28 | + div.nav { |
| 29 | + width: 2in; |
| 30 | + float: right; |
| 31 | + border: 2px solid #cfcfcf; |
| 32 | + padding: 5px; |
| 33 | + background-color: #996699; |
| 34 | + } |
| 35 | + |
| 36 | + </style> |
| 37 | + </head> |
| 38 | + <body> |
| 39 | + <img src="magpie-photo.jpg"> |
| 40 | + <h1>MagpieRSS</h1> |
| 41 | + <p> |
| 42 | + <h2>MagpieRSS provides an XML-based (expat) RSS parser in PHP.</h2> |
| 43 | + <p> |
| 44 | + MagpieRSS is compatible with RSS .9 through RSS 1.0, and supports the |
| 45 | + RSS 1.0's modules. (with a few exceptions) |
| 46 | + <p> |
| 47 | + <div class="nav"> |
| 48 | + <center><h3>Project Info</h3></center> |
| 49 | + <ul> |
| 50 | + <li><a class="nav" |
| 51 | +href="http://sourceforge.net/project/showfiles.php?group_id=55691">Download |
| 52 | +Magpie</a></li> |
| 53 | + <li><a class="nav" |
| 54 | +href="http://sourceforge.net/mail/?group_id=55691">Mailing |
| 55 | +Lists</a></li> |
| 56 | + <li><a class="nav" href="#news">News!</a></li> |
| 57 | + <li><a class="nav" href="#why">Why?</a></li> |
| 58 | + <li><a class="nav" href="#features">Features</a></li> |
| 59 | + <li><a class="nav" href="#philosophy">Philosophy</a></li> |
| 60 | + <li><a class="nav" href="#usage">Usage Examples</a></li> |
| 61 | + <li><a class="nav" href="https://www.mediawiki.org/cookbook.html">Cookbook</a></li> |
| 62 | + <li><a class="nav" href="#todo">Todo</a></li> |
| 63 | +<li style="list-style: none; padding-top: 5px;"><a title="Keep up on MagpieRSS news via RSS" href="http://laughingmeme.org/magpierss.rdf"><img |
| 64 | +src="http://magpierss.sf.net/black_grey_magpie_news.gif" border="0"></a></li> |
| 65 | +</ul> |
| 66 | +</div> |
| 67 | + <a name="news"></a> |
| 68 | + <h3>News!</h3> |
| 69 | + <ul> |
| 70 | + |
| 71 | +<li class="news"> |
| 72 | + <a |
| 73 | +href="http://sourceforge.net/project/showfiles.php?group_id=55691">MagpieRSS |
| 74 | +0.51 Released</a> |
| 75 | +<ul> |
| 76 | +<li> important bugfix!</li> |
| 77 | +<li> fix <a href="http://laughingmeme.org/archives/000811.html |
| 78 | +">"silent failure"</a> when PHP doesn't have zlib</li> |
| 79 | +</ul> |
| 80 | + |
| 81 | +</li> |
| 82 | + |
| 83 | +<li class="news"> |
| 84 | + <a href="http://minutillo.com/steve/feedonfeeds/">Feed On Feeds Uses Magpie</a> |
| 85 | +<ul> |
| 86 | +<li> server based PHP RSS aggregator built with MagpieRSS</li> |
| 87 | +<li> easy to install, easy to use.</li> |
| 88 | +</ul> |
| 89 | + |
| 90 | +</li> |
| 91 | + |
| 92 | + |
| 93 | +<li class="news"> |
| 94 | + <a |
| 95 | +href="http://sourceforge.net/project/showfiles.php?group_id=55691&release_id=158897">MagpieRSS |
| 96 | +0.5 Released</a> |
| 97 | +<ul> |
| 98 | +<li> supports transparent HTTP gzip content negotiation for reduced bandwidth usage</li> |
| 99 | +<li> quashed some undefined index notices</li> |
| 100 | +</ul> |
| 101 | + |
| 102 | +</li> |
| 103 | + |
| 104 | + |
| 105 | +<li class="news"> |
| 106 | + <a |
| 107 | +href="http://sourceforge.net/project/showfiles.php?group_id=55691&release_id=139643">MagpieRSS |
| 108 | +0.46 Released</a> |
| 109 | +<ul> |
| 110 | +<li> minor release, more error handling clean up</li> |
| 111 | +<li> documentation fixes, simpler example</li> |
| 112 | +<li> new <a href="https://www.mediawiki.org/TROUBLESHOOTING">trouble shooting</a> guide for installation and usage problems</a> |
| 113 | +</ul> |
| 114 | + |
| 115 | +</li> |
| 116 | + |
| 117 | +<li class="news"> |
| 118 | + <a |
| 119 | +href="http://laughingmeme.org/magpierss.rdf">Magpie News as RSS</a> |
| 120 | +<ul> |
| 121 | +<li> releases, bug fixes, releated stories as an RSS feed</li> |
| 122 | +</ul> |
| 123 | + |
| 124 | +</li> |
| 125 | + |
| 126 | + |
| 127 | +<li class="news"> |
| 128 | + <a |
| 129 | +href="http://magpierss.sourceforge.net/cookbook.html">MagpieRSS |
| 130 | +Cookbook: Simple PHP RSS How Tos</a> |
| 131 | +<ul> |
| 132 | +<li> answers some of the most frequently asked Magpie questions</li> |
| 133 | +<li> feedback, suggestions, requests, recipes welcome</li> |
| 134 | +</ul> |
| 135 | + |
| 136 | +</li> |
| 137 | + |
| 138 | +<li clas="news"> |
| 139 | + <a href="http://sourceforge.net/project/showfiles.php?group_id=55691&release_id=134850">MagpieRSS 0.4 Released!</a> |
| 140 | +<ul> |
| 141 | +<li> improved error handling, more flexibility for script authors, |
| 142 | +backwards compatible</li> |
| 143 | +<li> new and better examples! including using MagpieRSS and <a |
| 144 | +href="http://smarty.php.net">Smarty</a></li> |
| 145 | +<li> new Smarty plugin for RSS date parsing</li> |
| 146 | +</ul> |
| 147 | +<br /> |
| 148 | +</li> |
| 149 | +<!-- |
| 150 | +<li class="news"> |
| 151 | +<a href="http://www.infinitepenguins.net/rss/">Infinite Penguin now |
| 152 | +supports Magpie 0.3</a> |
| 153 | +<ul> |
| 154 | +<li> simple, sophisticated RSS viewer</li> |
| 155 | +<li> includes auto-generated javascript ticker from RSS feed</li> |
| 156 | +</ul> |
| 157 | + |
| 158 | +</li> |
| 159 | + |
| 160 | + |
| 161 | +<li class="news"> |
| 162 | +<a |
| 163 | +href="http://traumwind.tierpfad.de/blog/magpie/magpie_alike.php">Traumwind |
| 164 | +releases REX backend for MagpieRSS</a> |
| 165 | +<ul> |
| 166 | +<li>drop in support using regex based XML parser</li> |
| 167 | +<li>parses improperly formed XML that chokes expat</li> |
| 168 | +</ul> |
| 169 | + |
| 170 | +</li> |
| 171 | + |
| 172 | +<li class="news"> |
| 173 | + <a |
| 174 | +href="http://sourceforge.net/project/showfiles.php?group_id=55691&release_id=118652"> |
| 175 | + MagpieRSS 0.3 Released!</a> |
| 176 | + <ul> |
| 177 | + <li>Support added for |
| 178 | + <a href="http://fishbowl.pastiche.org/archives/001132.html">HTTP |
| 179 | + Conditional GETs</a>.</li> |
| 180 | + <li>See <a href="http://sourceforge.net/project/shownotes.php?group_id=55691&release_id=118652">ChangeLog</a> |
| 181 | + for more info.</li> |
| 182 | + </ul> |
| 183 | + </li> |
| 184 | + <li class="news">MagpieRSS 0.2!</a> |
| 185 | + <ul> |
| 186 | + <li>Major clean up of the code. Easier to use.</li> |
| 187 | + <li>Simpler install on shared hosts.</li> |
| 188 | + <li>Better documentation and comments.</li> |
| 189 | + </ul> |
| 190 | + </li> |
| 191 | + <li class="news">We've <a href="http://sourceforge.net/projects/magpierss/">moved to |
| 192 | + Sourceforge!</a></li> |
| 193 | + --> |
| 194 | + </ul> |
| 195 | + </p> |
| 196 | + <p> |
| 197 | + <a name="why"></a> |
| 198 | + <h3>Why?</h3> |
| 199 | + I wrote MagpieRSS out of a frustration with the limitations of existing |
| 200 | + solutions. In particular many of the existing PHP solutions seemed to: |
| 201 | + <ul> |
| 202 | + <li>use a parser based on regular expressions, making for an inherently |
| 203 | + fragile solution |
| 204 | + <li>only support early versions of RSS |
| 205 | + <li>discard all the interesting information besides item title, description, |
| 206 | + and link. |
| 207 | + <li>not build proper separation between parsing the RSS and displaying it. |
| 208 | + </ul> |
| 209 | + In particular I failed to find any PHP RSS parsers that could sufficiently |
| 210 | + parse RSS 1.0 feeds, to be useful on the RSS based event feeds we generate |
| 211 | + at <a href="http://protest.net">Protest.net</a>. |
| 212 | + </p> |
| 213 | + <p> |
| 214 | + <a name="features"></a> |
| 215 | + <h3>Features</h3> |
| 216 | + |
| 217 | +<ul> |
| 218 | + <li class="toplevel"> |
| 219 | + <h4>Easy to Use</h4> |
| 220 | + As simple as: |
| 221 | +<pre> |
| 222 | +require('rss_fetch.inc'); |
| 223 | +$rss = fetch_rss($url); |
| 224 | +</pre> |
| 225 | + |
| 226 | + </li> |
| 227 | + <li class="toplevel"> |
| 228 | + <h4>Parses RSS 0.9 - RSS 1.0</h4> |
| 229 | + |
| 230 | + Parses most RSS formats, including support for |
| 231 | + <a href="http://www.purl.org/rss/1.0/modules/">1.0 modules</a> and limited |
| 232 | + namespace support. RSS is packed into convenient data structures; easy to |
| 233 | + use in PHP, and appropriate for passing to a templating system, like |
| 234 | + <a href="http://smarty.php.net">Smarty</a>. |
| 235 | + </li> |
| 236 | + <li> |
| 237 | + <h4>Integrated Object Cache</h4> |
| 238 | + |
| 239 | + Caching the parsed RSS means that the 2nd request is fast, and that |
| 240 | +including the rss_fetch call in your PHP page won't destroy your performance, |
| 241 | +and force you to reply on an external cron job. And it happens transparently. |
| 242 | + |
| 243 | + </li> |
| 244 | + <li> |
| 245 | + <h4>HTTP Conditional GETs</h4> |
| 246 | + |
| 247 | + Save bandwidth and speed up download times with intelligent use of |
| 248 | + Last-Modified and ETag.<br /> See <a |
| 249 | + href="http://fishbowl.pastiche.org/archives/001132.html">HTTP Conditional Get for RSS Hackers</a> |
| 250 | + </li> |
| 251 | + |
| 252 | + <li><h4>Configurable</h4> |
| 253 | + |
| 254 | + Makes extensive use of constants to allow overriding default behaviour, and |
| 255 | + installation on shared hosts. |
| 256 | + </li> |
| 257 | + <li><h4>Modular</h4> |
| 258 | + <ul> |
| 259 | + <li>rss_fetch.inc - wraps a simple interface (<code>fetch_rss()</code>) |
| 260 | + around the library. |
| 261 | + <li>rss_parse.inc - provides the RSS parser, and the RSS object |
| 262 | + <li>rss_cache.inc - a simple (no GC) object cache, optimized for RSS objects |
| 263 | + <li>rss_utils.inc - utility functions for working with RSS. currently |
| 264 | + provides <code>parse_w3cdtf()</code>, for parsing <a |
| 265 | + href="http://www.w3.org/TR/NOTE-datetime">W3CDTF</a> into epoch seconds. |
| 266 | + </ul> |
| 267 | +</ul> |
| 268 | + |
| 269 | + |
| 270 | + </p> |
| 271 | +<p> |
| 272 | + <a name="philosophy"></a> |
| 273 | + <h3>Magpie's approach to parsing RSS</h3> |
| 274 | + |
| 275 | + Magpie takes a naive, and inclusive approach. Absolutely |
| 276 | + non-validating, as long as the RSS feed is well formed, Magpie will |
| 277 | + cheerfully parse new, and never before seen tags in your RSS feeds. |
| 278 | + </p> |
| 279 | + <p> |
| 280 | + This makes it very simple support the varied versions of RSS simply, but |
| 281 | + forces the consumer of a RSS feed to be cognizant of how it is |
| 282 | + structured.(at least if you want to do something fancy) |
| 283 | + </p> |
| 284 | + <p> |
| 285 | + Magpie parses a RSS feed into a simple object, with 4 fields: |
| 286 | + <code>channel</code>, <code>items</code>, <code>image</code>, and |
| 287 | + <code>textinput</code>. |
| 288 | + </p> |
| 289 | + <p> |
| 290 | + <h4>channel</h4> |
| 291 | + <code>$rss->channel</code> contains key-value pairs of all tags, without |
| 292 | + nested tags, found between the root tag (<rdf:RDF>, or <rss>) |
| 293 | + and the end of the document. |
| 294 | + </p> |
| 295 | + <p> |
| 296 | + <h4>items</h4> |
| 297 | + <code>$rss->items</code> is an array of associative arrays, each one |
| 298 | + describing a single item. An example that looks like: |
| 299 | + <pre> |
| 300 | +<item rdf:about="http://protest.net/NorthEast/calendrome.cgi?span=event&ID=210257"> |
| 301 | +<title>Weekly Peace Vigil</title> |
| 302 | +<link>http://protest.net/NorthEast/calendrome.cgi?span=event&ID=210257</link> |
| 303 | +<description>Wear a white ribbon</description> |
| 304 | +<dc:subject>Peace</dc:subject> |
| 305 | +<ev:startdate>2002-06-01T11:00:00</ev:startdate> |
| 306 | +<ev:location>Northampton, MA</ev:location> |
| 307 | +<ev:enddate>2002-06-01T12:00:00</ev:enddate> |
| 308 | +<ev:type>Protest</ev:type> |
| 309 | +</item> |
| 310 | + </pre><p> |
| 311 | + Is parsed, and pushed on the <code>$rss->items</code> array as: |
| 312 | + <p><pre> |
| 313 | +array( |
| 314 | + title => 'Weekly Peace Vigil', |
| 315 | + link => 'http://protest.net/NorthEast/calendrome.cgi?span=event&ID=210257', |
| 316 | + description => 'Wear a white ribbon', |
| 317 | + dc => array ( |
| 318 | + subject => 'Peace' |
| 319 | + ), |
| 320 | + ev => array ( |
| 321 | + startdate => '2002-06-01T11:00:00', |
| 322 | + enddate => '2002-06-01T12:00:00', |
| 323 | + type => 'Protest', |
| 324 | + location => 'Northampton, MA' |
| 325 | + ) |
| 326 | +); |
| 327 | +</pre> |
| 328 | +</p> |
| 329 | +<p> |
| 330 | +<h4>image and textinput</h4> |
| 331 | +<code>$rss->image</code> and <code>$rss-textinput</code> are associative arrays |
| 332 | +including name-value pairs for anything found between the respective parent |
| 333 | +tags. |
| 334 | +</p> |
| 335 | +<p> |
| 336 | +<a name="usage"></a> |
| 337 | +<h3>Usage Examples:</h3> |
| 338 | + |
| 339 | +A very simple example would be: |
| 340 | +<pre> |
| 341 | +require_once 'rss_fetch.inc'; |
| 342 | + |
| 343 | +$url = 'http://magpie.sf.net/samples/imc.1-0.rdf'; |
| 344 | +$rss = fetch_rss($url); |
| 345 | + |
| 346 | +echo "Site: ", $rss->channel['title'], "<br>\n"; |
| 347 | +foreach ($rss->items as $item ) { |
| 348 | + $title = $item[title]; |
| 349 | + $url = $item[link]; |
| 350 | + echo "<a href=$url>$title</a></li><br>\n"; |
| 351 | +} |
| 352 | +</pre> |
| 353 | +More soon....in the meantime you can check out a |
| 354 | +<a href="http://www.infinitepenguins.net/rss/">cool tool built with |
| 355 | +MagpieRSS</a>, version 0.1. |
| 356 | +</p> |
| 357 | +<p> |
| 358 | +<a name="todo"></a> |
| 359 | +<h3>Todos</h3> |
| 360 | + <h4>RSS Parser</h4> |
| 361 | + <ul> |
| 362 | + <li>Swap in a smarter parser that includes optional |
| 363 | + support for validation, and required fields.</li> |
| 364 | + |
| 365 | + <li>Support RSS 2.0 (as much as I'm annoyed by it)</li> |
| 366 | + |
| 367 | + <li>Improve support for modules that rely on attributes</li> |
| 368 | + </ul> |
| 369 | + |
| 370 | + <h4>RSS Cache</h4> |
| 371 | + <ul> |
| 372 | + <li>Light-weight garbage collection |
| 373 | + </ul> |
| 374 | + |
| 375 | + <h4>Fetch RSS</h4> |
| 376 | + <ul> |
| 377 | + <li>Attempt to <a |
| 378 | + href="http://diveintomark.org/archives/2002/08/15.html">auto-detect an |
| 379 | + RSS feed</a>, given a URL following, much like <a |
| 380 | + href="http://diveintomark.org/projects/misc/rssfinder.py.txt">rssfinder.py</a>does. |
| 381 | + </li> |
| 382 | + </ul> |
| 383 | + <h4>Misc</h4> |
| 384 | + <ul> |
| 385 | + <li>More examples</li> |
| 386 | + <li>A test suite</li> |
| 387 | + <li>RSS generation, perhaps with <a |
| 388 | + href="http://usefulinc.com/rss/rsswriter/">RSSwriter</a>? |
| 389 | + </li> |
| 390 | + </ul> |
| 391 | + |
| 392 | +</p> |
| 393 | +<p> |
| 394 | +<h3>RSS Resources</h3> |
| 395 | + <ul> |
| 396 | + <li><a href="http://mnot.net/rss/tutorial/">RSS Tutorial for Content Publishers |
| 397 | + and Webmasters</a> is a great place to start. |
| 398 | + <li><a href="http://gils.utah.gov/rss/">RSS Workshop: Publish and Syndicate |
| 399 | + Your News to the Web</a> is also a good introduction</li> |
| 400 | + <li><a href="http://www.disobey.com/amphetadesk/finding_more.html">Finding |
| 401 | + More Channels</a> on how to find RSS feeds. |
| 402 | + <li>Hammersley's <a href="http://rss.benhammersley.com/">Content Syndication |
| 403 | + with XML and RSS</a> is a blog covering RSS current events. |
| 404 | + <li><a href="http://groups.yahoo.com/group/rss-dev/">RSS-DEV mailing |
| 405 | + list</a> is generally a very helpful, informative space, with the occasional |
| 406 | + heated debate |
| 407 | + <li><a href="http://feeds.archive.org/validator/">RSS Validator</a> |
| 408 | + </ul>. |
| 409 | +</p> |
| 410 | +<h3>License and Contact Info</h3> |
| 411 | +Magpie is distributed under the GPL license... |
| 412 | +<p> |
| 413 | +coded by: kellan (at) protest.net, feedback is always appreciated. |
| 414 | +<p> |
| 415 | +<a href="http://sourceforge.net"><img |
| 416 | +src="http://sourceforge.net/sflogo.php?group_id=55691&type=3" |
| 417 | +width="125" height="37" border="0" alt="SourceForge.net Logo"></a> |
| 418 | +<img src="http://laughingmeme.org/magpie_views.gif"> |
| 419 | +</body> |
| 420 | +</html> |
Index: trunk/extensions/RSS/magpierss/AUTHORS |
— | — | @@ -0,0 +1 @@ |
| 2 | +kellan <kellan@protest.net> |
Index: trunk/extensions/RSS/magpierss/INSTALL |
— | — | @@ -0,0 +1,143 @@ |
| 2 | +REQUIREMENTS |
| 3 | + |
| 4 | + MapieRSS requires a recent PHP 4+ (developed with 4.2.0) |
| 5 | + with xml (expat) support. |
| 6 | + |
| 7 | + Optionally: |
| 8 | + * PHP5 with libxml2 support. |
| 9 | + * cURL for SSL support |
| 10 | + * iconv (preferred) or mb_string for expanded character set support |
| 11 | + |
| 12 | +QUICK START |
| 13 | + |
| 14 | + Magpie consists of 4 files (rss_fetch.inc, rss_parser.inc, rss_cache.inc, |
| 15 | + and rss_utils.inc), and the directory extlib (which contains a modified |
| 16 | + version of the Snoopy HTTP client) |
| 17 | + |
| 18 | + Copy these 5 resources to a directory named 'magpierss' in the same |
| 19 | + directory as your PHP script. |
| 20 | + |
| 21 | + At the top of your script add the following line: |
| 22 | + |
| 23 | + require_once('magpierss/rss_fetch.inc'); |
| 24 | + |
| 25 | + Now you can use the fetch_rss() method: |
| 26 | + |
| 27 | + $rss = fetch_rss($url); |
| 28 | + |
| 29 | + Done. That's it. See README for more details on using MagpieRSS. |
| 30 | + |
| 31 | +NEXT STEPS |
| 32 | + |
| 33 | + Important: you'll probably want to get the cache directory working in |
| 34 | + order to speed up your application, and not abuse the webserver you're |
| 35 | + downloading the RSS from. |
| 36 | + |
| 37 | + Optionally you can install MagpieRSS in your PHP include path in order to |
| 38 | + make it available server wide. |
| 39 | + |
| 40 | + Lastly you might want to look through the constants in rss_fetch.inc see if |
| 41 | + there is anything you want to override (the defaults are pretty good) |
| 42 | + |
| 43 | + For more info, or if you have trouble, see TROUBLESHOOTING |
| 44 | + |
| 45 | +SETTING UP CACHING |
| 46 | + |
| 47 | + Magpie has built-in transparent caching. With caching Magpie will only |
| 48 | + fetch and parse RSS feeds when there is new content. Without this feature |
| 49 | + your pages will be slow, and the sites serving the RSS feed will be annoyed |
| 50 | + with you. |
| 51 | + |
| 52 | +** Simple and Automatic ** |
| 53 | + |
| 54 | + By default Magpie will try to create a cache directory named 'cache' in the |
| 55 | + same directory as your PHP script. |
| 56 | + |
| 57 | +** Creating a Local Cache Directory ** |
| 58 | + |
| 59 | + Often this will fail, because your webserver doesn't have sufficient |
| 60 | + permissions to create the directory. |
| 61 | + |
| 62 | + Exact instructions for how to do this will vary from install to install and |
| 63 | + platform to platform. The steps are: |
| 64 | + |
| 65 | + 1. Make a directory named 'cache' |
| 66 | + 2. Give the web server write access to that directory. |
| 67 | + |
| 68 | + An example of how to do this on Debian would be: |
| 69 | + |
| 70 | + 1. mkdir /path/to/script/cache |
| 71 | + 2. chgrp www-data /path/to/script/cache |
| 72 | + 3. chmod 775 /path/to/script/cache |
| 73 | + |
| 74 | + On other Unixes you'll need to change 'www-data' to what ever user Apache |
| 75 | + runs as. (on MacOS X the user would be 'www') |
| 76 | + |
| 77 | +** Cache in /tmp ** |
| 78 | + |
| 79 | + Sometimes you won't be able to create a local cache directory. Some reasons |
| 80 | + might be: |
| 81 | + |
| 82 | + 1. No shell account |
| 83 | + 2. Insufficient permissions to change ownership of a directory |
| 84 | + 3. Webserver runs as 'nobody' |
| 85 | + |
| 86 | + In these situations using a cache directory in /tmp can often be a good |
| 87 | + option. |
| 88 | + |
| 89 | + The drawback is /tmp is public, so anyone on the box can read the cache |
| 90 | + files. Usually RSS feeds are public information, so you'll have to decide |
| 91 | + how much of an issue that is. |
| 92 | + |
| 93 | + To use /tmp as your cache directory you need to add the following line to |
| 94 | + your script: |
| 95 | + |
| 96 | + define('MAGPIE_CACHE_DIR', '/tmp/magpie_cache'); |
| 97 | + |
| 98 | +** Global Cache ** |
| 99 | + |
| 100 | + If you have several applications using Magpie, you can create a single |
| 101 | + shared cache directory, either using the /tmp cache, or somewhere else on |
| 102 | + the system. |
| 103 | + |
| 104 | + The upside is that you'll distribute fetching and parsing feeds across |
| 105 | + several applications. |
| 106 | + |
| 107 | +INSTALLING MAGPIE SERVER WIDE |
| 108 | + |
| 109 | + Rather then following the Quickstart instructions which requires you to have |
| 110 | + a copy of Magpie per application, alternately you can place it in some |
| 111 | + shared location. |
| 112 | + |
| 113 | +** Adding Magpie to Your Include Path ** |
| 114 | + |
| 115 | + Copy the 5 resources (rss_fetch.inc, rss_parser.inc, rss_cache.inc, |
| 116 | + rss_utils.inc, and extlib) to a directory named 'magpierss' in your include |
| 117 | + path. Now any PHP file on your system can use Magpie with: |
| 118 | + |
| 119 | + require_once('magpierss/rss_fetch.inc'); |
| 120 | + |
| 121 | + Different installs have different include paths, and you'll have to figure |
| 122 | + out what your include_path is. |
| 123 | + |
| 124 | + From shell you can try: |
| 125 | + |
| 126 | + php -i | grep 'include_path' |
| 127 | + |
| 128 | + Alternatley you can create a phpinfo.php file with contains: |
| 129 | + |
| 130 | + <?php phpinfo(); ?> |
| 131 | + |
| 132 | + Debian's default is: |
| 133 | + |
| 134 | + /usr/share/php |
| 135 | + |
| 136 | + (though more idealogically pure location would be /usr/local/share/php) |
| 137 | + |
| 138 | + Apple's default include path is: |
| 139 | + |
| 140 | + /usr/lib/php |
| 141 | + |
| 142 | + While the Entropy PHP build seems to use: |
| 143 | + |
| 144 | + /usr/local/php/lib/php |
\ No newline at end of file |
Index: trunk/extensions/RSS/magpierss/cookbook |
— | — | @@ -0,0 +1,125 @@ |
| 2 | +MAGPIERSS RECIPES: Cooking with Corbies |
| 3 | + |
| 4 | + "Four and twenty blackbirds baked in a pie." |
| 5 | + |
| 6 | +1. LIMIT THE NUMBER OF HEADLINES(AKA ITEMS) RETURNED. |
| 7 | + |
| 8 | +PROBLEM: |
| 9 | + |
| 10 | +You want to display the 10 (or 3) most recent headlines, but the RSS feed |
| 11 | +contains 15. |
| 12 | + |
| 13 | +SOLUTION: |
| 14 | + |
| 15 | +$num_items = 10; |
| 16 | +$rss = fetch_rss($url); |
| 17 | + |
| 18 | +$items = array_slice($rss->items, 0, $num_items); |
| 19 | + |
| 20 | +DISCUSSION: |
| 21 | + |
| 22 | +Rather then trying to limit the number of items Magpie parses, a much simpler, |
| 23 | +and more flexible approach is to take a "slice" of the array of items. And |
| 24 | +array_slice() is smart enough to do the right thing if the feed has less items |
| 25 | +then $num_items. |
| 26 | + |
| 27 | +See: http://www.php.net/array_slice |
| 28 | + |
| 29 | + |
| 30 | +2. DISPLAY A CUSTOM ERROR MESSAGE IF SOMETHING GOES WRONG |
| 31 | + |
| 32 | +PROBLEM: |
| 33 | + |
| 34 | +You don't want Magpie's error messages showing up if something goes wrong. |
| 35 | + |
| 36 | +SOLUTION: |
| 37 | + |
| 38 | +# Magpie throws USER_WARNINGS only |
| 39 | +# so you can cloak these, by only showing ERRORs |
| 40 | +error_reporting(E_ERROR); |
| 41 | + |
| 42 | +# check the return value of fetch_rss() |
| 43 | + |
| 44 | +$rss = fetch_rss($url); |
| 45 | + |
| 46 | +if ( $rss ) { |
| 47 | +...display rss feed... |
| 48 | +} |
| 49 | +else { |
| 50 | + echo "An error occured! " . |
| 51 | + "Consider donating more $$$ for restoration of services." . |
| 52 | + "<br>Error Message: " . magpie_error(); |
| 53 | +} |
| 54 | + |
| 55 | +DISCUSSION: |
| 56 | + |
| 57 | +MagpieRSS triggers a warning in a number of circumstances. The 2 most common |
| 58 | +circumstances are: if the specified RSS file isn't properly formed (usually |
| 59 | +because it includes illegal HTML), or if Magpie can't download the remote RSS |
| 60 | +file, and there is no cached version. |
| 61 | + |
| 62 | +If you don't want your users to see these warnings change your error_reporting |
| 63 | +settings to only display ERRORs. Another option is to turn off display_error, |
| 64 | +so that WARNINGs, and NOTICEs still go to the error_log but not to the webpages. |
| 65 | + |
| 66 | +You can do this with: |
| 67 | + |
| 68 | +ini_set('display_errors', 0); |
| 69 | + |
| 70 | +See: http://www.php.net/error_reporting, |
| 71 | + http://www.php.net/ini_set, |
| 72 | + http://www.php.net/manual/en/ref.errorfunc.php |
| 73 | + |
| 74 | +3. GENERATE A NEW RSS FEED |
| 75 | + |
| 76 | +PROBLEM: |
| 77 | + |
| 78 | +Create an RSS feed for other people to use. |
| 79 | + |
| 80 | +SOLUTION: |
| 81 | + |
| 82 | +Use Useful Inc's RSSWriter (http://usefulinc.com/rss/rsswriter/) |
| 83 | + |
| 84 | +DISCUSSION: |
| 85 | + |
| 86 | +An example of turning a Magpie parsed RSS object back into an RSS file is forth |
| 87 | +coming. In the meantime RSSWriter has great documentation. |
| 88 | + |
| 89 | +4. DISPLAY HEADLINES MORE RECENT THEN X DATE |
| 90 | + |
| 91 | +PROBLEM: |
| 92 | + |
| 93 | +You only want to display headlines that were published on, or after a certain |
| 94 | +date. |
| 95 | + |
| 96 | + |
| 97 | +SOLUTION: |
| 98 | + |
| 99 | +require 'rss_utils.inc'; |
| 100 | + |
| 101 | +# get all headlines published today |
| 102 | +$today = getdate(); |
| 103 | + |
| 104 | +# today, 12AM |
| 105 | +$date = mktime(0,0,0,$today['mon'], $today['mday'], $today['year']); |
| 106 | + |
| 107 | +$rss = fetch_rss($url); |
| 108 | + |
| 109 | +foreach ( $rss->items as $item ) { |
| 110 | + $published = parse_w3cdtf($item['dc']['date']); |
| 111 | + if ( $published >= $date ) { |
| 112 | + echo "Title: " . $item['title']; |
| 113 | + echo "Published: " . date("h:i:s A", $published); |
| 114 | + echo "<p>"; |
| 115 | + } |
| 116 | +} |
| 117 | + |
| 118 | +DISCUSSION: |
| 119 | + |
| 120 | +This recipe only works for RSS 1.0 feeds that include the <dc:date> field. |
| 121 | +(which is very good RSS style) |
| 122 | + |
| 123 | +parse_w3cdtf is defined in rss_utils.inc, and parses RSS style dates into Unix |
| 124 | +epoch seconds. |
| 125 | + |
| 126 | +See: http://www.php.net/manual/en/ref.datetime.php |
Index: trunk/extensions/RSS/magpierss/ChangeLog |
— | — | @@ -0,0 +1,405 @@ |
| 2 | +2005-10-28 14:11 kellan |
| 3 | + |
| 4 | + * extlib/Snoopy.class.inc: a better solution |
| 5 | + |
| 6 | +2005-10-28 11:51 kellan |
| 7 | + |
| 8 | + * extlib/Snoopy.class.inc: fix arbtriary code execution |
| 9 | + vulnerability when using curl+ssl |
| 10 | + |
| 11 | + http://www.sec-consult.com/216.html |
| 12 | + |
| 13 | +2005-03-08 10:46 kellan |
| 14 | + |
| 15 | + * rss_parse.inc: fix bug w/ atom and date normalization |
| 16 | + |
| 17 | +2005-02-09 14:59 kellan |
| 18 | + |
| 19 | + * rss_fetch.inc: fix stale cache bug |
| 20 | + |
| 21 | +2005-01-28 02:27 kellan |
| 22 | + |
| 23 | + * rss_parse.inc: support php w/o array_change_case |
| 24 | + |
| 25 | +2005-01-23 20:02 kellan |
| 26 | + |
| 27 | + * rss_fetch.inc: fix cache bug introduced by charset encoding |
| 28 | + |
| 29 | +2005-01-12 09:14 kellan |
| 30 | + |
| 31 | + * rss_cache.inc, rss_fetch.inc: more sanity checks for when things |
| 32 | + go wrong |
| 33 | + |
| 34 | +2004-12-12 13:44 kellan |
| 35 | + |
| 36 | + * INSTALL, rss_cache.inc, rss_utils.inc: detab |
| 37 | + |
| 38 | +2004-11-23 20:15 kellan |
| 39 | + |
| 40 | + * rss_parse.inc: fix calling iconv instead of mb_convert_encoding |
| 41 | + |
| 42 | +2004-11-22 02:11 kellan |
| 43 | + |
| 44 | + * CHANGES, ChangeLog, rss_parse.inc, scripts/magpie_debug.php: last |
| 45 | + bit of tidying |
| 46 | + |
| 47 | +2004-11-22 01:45 kellan |
| 48 | + |
| 49 | + * rss_fetch.inc: detab, bump version |
| 50 | + |
| 51 | +2004-11-22 01:43 kellan |
| 52 | + |
| 53 | + * rss_parse.inc: was filtering too much |
| 54 | + |
| 55 | +2004-11-22 00:03 kellan |
| 56 | + |
| 57 | + * rss_fetch.inc, rss_parse.inc: cache on $url . $output_encoding |
| 58 | + otherwise we can get munged output |
| 59 | + |
| 60 | +2004-11-21 23:52 kellan |
| 61 | + |
| 62 | + * rss_parse.inc: add WARNING |
| 63 | + |
| 64 | +2004-11-21 23:45 kellan |
| 65 | + |
| 66 | + * rss_parse.inc: don't set ERROR on notice or warning (rss_fetch |
| 67 | + dies on parse errors) |
| 68 | + |
| 69 | +2004-11-21 23:44 kellan |
| 70 | + |
| 71 | + * rss_fetch.inc: add encoding defines (fix timeout error reporting) |
| 72 | + |
| 73 | +2004-11-21 20:21 kellan |
| 74 | + |
| 75 | + * rss_parse.inc: incorporate steve's patch |
| 76 | + |
| 77 | +2004-11-21 19:26 kellan |
| 78 | + |
| 79 | + * rss_parse.inc: remove old debugging functions, totally |
| 80 | + arbitrarily. might break stuff. can't really explain why i'm |
| 81 | + doing this. |
| 82 | + |
| 83 | +2004-10-28 15:52 kellan |
| 84 | + |
| 85 | + * rss_parse.inc: fixed '=' instead of '==' |
| 86 | + |
| 87 | +2004-10-26 00:48 kellan |
| 88 | + |
| 89 | + * rss_parse.inc: chance epoch to timestamp to conform w/ php naming |
| 90 | + conventions |
| 91 | + |
| 92 | +2004-06-15 12:00 kellan |
| 93 | + |
| 94 | + * rss_parse.inc: [no log message] |
| 95 | + |
| 96 | +2004-04-26 14:16 kellan |
| 97 | + |
| 98 | + * rss_fetch.inc: bump version |
| 99 | + |
| 100 | +2004-04-26 12:36 kellan |
| 101 | + |
| 102 | + * rss_parse.inc: fix field doubling |
| 103 | + |
| 104 | +2004-04-24 17:47 kellan |
| 105 | + |
| 106 | + * CHANGES, ChangeLog: updated |
| 107 | + |
| 108 | +2004-04-24 17:35 kellan |
| 109 | + |
| 110 | + * rss_fetch.inc: bumped version |
| 111 | + |
| 112 | +2004-04-24 16:52 kellan |
| 113 | + |
| 114 | + * rss_parse.inc: support arbitrary atom content constructs |
| 115 | + |
| 116 | + some refactoring |
| 117 | + |
| 118 | +2004-04-24 16:15 kellan |
| 119 | + |
| 120 | + * rss_parse.inc: support summary content contstruct. add normalize |
| 121 | + function |
| 122 | + |
| 123 | +2004-03-27 16:29 kellan |
| 124 | + |
| 125 | + * extlib/Snoopy.class.inc: accept self-signed certs |
| 126 | + |
| 127 | +2004-03-27 12:53 kellan |
| 128 | + |
| 129 | + * extlib/Snoopy.class.inc: fixed SSL support * set status * set |
| 130 | + error on bad curl |
| 131 | + |
| 132 | + (also ripped out big chunks of dead weight (submit_form) which |
| 133 | + were getting in my way |
| 134 | + |
| 135 | +2004-01-25 02:25 kellan |
| 136 | + |
| 137 | + * rss_parse.inc: make RSS 1.0's rdf:about available |
| 138 | + |
| 139 | +2004-01-25 02:07 kellan |
| 140 | + |
| 141 | + * rss_parse.inc: clean up text, and line formats. add support item |
| 142 | + rdf:about |
| 143 | + |
| 144 | +2004-01-24 23:40 kellan |
| 145 | + |
| 146 | + * CHANGES, ChangeLog: update changes |
| 147 | + |
| 148 | +2004-01-24 23:37 kellan |
| 149 | + |
| 150 | + * rss_fetch.inc: updated version |
| 151 | + |
| 152 | +2004-01-24 23:35 kellan |
| 153 | + |
| 154 | + * rss_parse.inc: whitespace |
| 155 | + |
| 156 | +2004-01-24 23:23 kellan |
| 157 | + |
| 158 | + * extlib/Snoopy.class.inc: support badly formatted http headers |
| 159 | + |
| 160 | +2004-01-24 23:20 kellan |
| 161 | + |
| 162 | + * rss_parse.inc: added alpha atom parsing support |
| 163 | + |
| 164 | +2003-06-25 22:34 kellan |
| 165 | + |
| 166 | + * extlib/Snoopy.class.inc: fixed fread 4.3.2 compatibility problems |
| 167 | + |
| 168 | +2003-06-13 11:31 kellan |
| 169 | + |
| 170 | + * rss_fetch.inc: reset cache on 304 |
| 171 | + |
| 172 | +2003-06-12 21:37 kellan |
| 173 | + |
| 174 | + * rss_cache.inc, rss_fetch.inc, rss_parse.inc, rss_utils.inc: |
| 175 | + bumped up version numbers |
| 176 | + |
| 177 | +2003-06-12 21:32 kellan |
| 178 | + |
| 179 | + * htdocs/index.html: updated news |
| 180 | + |
| 181 | +2003-06-12 21:27 kellan |
| 182 | + |
| 183 | + * NEWS: a manual blog :) |
| 184 | + |
| 185 | +2003-06-12 21:22 kellan |
| 186 | + |
| 187 | + * htdocs/index.html: fully qualified img |
| 188 | + |
| 189 | +2003-06-12 21:20 kellan |
| 190 | + |
| 191 | + * htdocs/index.html: clean up. added badge. |
| 192 | + |
| 193 | +2003-06-12 21:04 kellan |
| 194 | + |
| 195 | + * rss_utils.inc: clean up regex |
| 196 | + |
| 197 | +2003-06-12 21:02 kellan |
| 198 | + |
| 199 | + * rss_cache.inc: suppress some warnings |
| 200 | + |
| 201 | +2003-05-30 20:44 kellan |
| 202 | + |
| 203 | + * extlib/Snoopy.class.inc: more comments, cleaned up notice |
| 204 | + |
| 205 | +2003-05-30 15:14 kellan |
| 206 | + |
| 207 | + * extlib/Snoopy.class.inc: don't advertise gzip support if the user |
| 208 | + hasn't built php with gzinflate support |
| 209 | + |
| 210 | +2003-05-12 22:32 kellan |
| 211 | + |
| 212 | + * ChangeLog: changes |
| 213 | + |
| 214 | +2003-05-12 22:11 kellan |
| 215 | + |
| 216 | + * htdocs/index.html: announce 0.5 |
| 217 | + |
| 218 | +2003-05-12 21:42 kellan |
| 219 | + |
| 220 | + * htdocs/index.html: change |
| 221 | + |
| 222 | +2003-05-12 21:39 kellan |
| 223 | + |
| 224 | + * rss_fetch.inc: use gzip |
| 225 | + |
| 226 | +2003-05-12 21:37 kellan |
| 227 | + |
| 228 | + * extlib/Snoopy.class.inc: added support gzip encoded content |
| 229 | + negoiation |
| 230 | + |
| 231 | +2003-05-12 21:32 kellan |
| 232 | + |
| 233 | + * rss_cache.inc, rss_fetch.inc, rss_parse.inc, rss_utils.inc: fixed |
| 234 | + typoes |
| 235 | + |
| 236 | +2003-04-26 21:44 kellan |
| 237 | + |
| 238 | + * rss_parse.inc: fix minor typo |
| 239 | + |
| 240 | +2003-04-18 08:19 kellan |
| 241 | + |
| 242 | + * htdocs/cookbook.html: updated cookbook to show more code for |
| 243 | + limiting items |
| 244 | + |
| 245 | +2003-03-03 16:02 kellan |
| 246 | + |
| 247 | + * rss_parse.inc, scripts/magpie_slashbox.php: committed (or |
| 248 | + adpated) patch from Nicola (www.technick.com) to quell 'Undefined |
| 249 | + Indexes' notices |
| 250 | + |
| 251 | +2003-03-03 15:59 kellan |
| 252 | + |
| 253 | + * rss_fetch.inc: commited patch from nicola (www.technick.com) to |
| 254 | + quell 'undefined indexes' notices. |
| 255 | + |
| 256 | + * Magpie now automatically includes its version in the |
| 257 | + user-agent, & whether cacheing is turned on. |
| 258 | + |
| 259 | +2003-02-12 01:22 kellan |
| 260 | + |
| 261 | + * CHANGES, ChangeLog: ChangeLog now auto-generated by cvs2cl |
| 262 | + |
| 263 | +2003-02-12 00:21 kellan |
| 264 | + |
| 265 | + * rss_fetch.inc: better errors, hopefully stomped on pesky notices |
| 266 | + |
| 267 | +2003-02-12 00:19 kellan |
| 268 | + |
| 269 | + * rss_parse.inc: check to see is xml is supported, if not die |
| 270 | + |
| 271 | + also throw better xml errors |
| 272 | + |
| 273 | +2003-02-12 00:18 kellan |
| 274 | + |
| 275 | + * rss_cache.inc: hopefully cleared up some notices that were being |
| 276 | + thrown into the log |
| 277 | + |
| 278 | + fixed a debug statement that was being called as an error |
| 279 | + |
| 280 | +2003-02-12 00:15 kellan |
| 281 | + |
| 282 | + * scripts/: magpie_simple.php, magpie_slashbox.php: moved |
| 283 | + magpie_simple to magpie_slashbox, and replaced it with a simpler |
| 284 | + demo. |
| 285 | + |
| 286 | +2003-02-12 00:02 kellan |
| 287 | + |
| 288 | + * INSTALL, README, TROUBLESHOOTING: Improved documentation. Better |
| 289 | + install instructions. |
| 290 | + |
| 291 | + TROUBLESHOOTING cover common installation and usage problems |
| 292 | + |
| 293 | +2003-01-22 14:40 kellan |
| 294 | + |
| 295 | + * htdocs/cookbook.html: added cookbook.html |
| 296 | + |
| 297 | +2003-01-21 23:47 kellan |
| 298 | + |
| 299 | + * cookbook: a magpie cookbook |
| 300 | + |
| 301 | +2003-01-20 10:09 kellan |
| 302 | + |
| 303 | + * ChangeLog: updated |
| 304 | + |
| 305 | +2003-01-20 09:23 kellan |
| 306 | + |
| 307 | + * scripts/simple_smarty.php: minor clean up |
| 308 | + |
| 309 | +2003-01-20 09:15 kellan |
| 310 | + |
| 311 | + * scripts/README: added smarty url |
| 312 | + |
| 313 | +2003-01-20 09:14 kellan |
| 314 | + |
| 315 | + * magpie_simple.php, htdocs/index.html, scripts/README, |
| 316 | + scripts/magpie_debug.php, scripts/magpie_simple.php, |
| 317 | + scripts/simple_smarty.php, |
| 318 | + scripts/smarty_plugin/modifier.rss_date_parse.php, |
| 319 | + scripts/templates/simple.smarty: Added scripts directory for |
| 320 | + examples on how to use MagpieRSS |
| 321 | + |
| 322 | + magpie_simple - is a simple example magpie_debug - spew all the |
| 323 | + information from a parsed RSS feed simple_smary - example of |
| 324 | + using magpie with Smarty template system |
| 325 | + smarty_plugin/modifier.rss_date_parse.php - support file for the |
| 326 | + smarty demo templates/simple.smary - template for the smarty demo |
| 327 | + |
| 328 | +2003-01-20 09:11 kellan |
| 329 | + |
| 330 | + * rss_fetch.inc, rss_parse.inc: changes to error handling to give |
| 331 | + script authors more access to magpie's errors. |
| 332 | + |
| 333 | + added method magpie_error() to retrieve global MAGPIE_ERROR |
| 334 | + variable for when fetch_rss() returns false |
| 335 | + |
| 336 | +2002-10-26 19:02 kellan |
| 337 | + |
| 338 | + * htdocs/index.html: putting the website under source control |
| 339 | + |
| 340 | +2002-10-26 18:43 kellan |
| 341 | + |
| 342 | + * AUTHORS, ChangeLog, INSTALL, README: some documentation to make |
| 343 | + it all look official :) |
| 344 | + |
| 345 | +2002-10-25 23:04 kellan |
| 346 | + |
| 347 | + * magpie_simple.php: quxx |
| 348 | + |
| 349 | +2002-10-25 23:04 kellan |
| 350 | + |
| 351 | + * rss_parse.inc: added support for textinput and image |
| 352 | + |
| 353 | +2002-10-25 19:23 kellan |
| 354 | + |
| 355 | + * magpie_simple.php, rss_cache.inc, rss_fetch.inc, rss_parse.inc, |
| 356 | + rss_utils.inc: switched to using Snoopy for fetching remote RSS |
| 357 | + files. |
| 358 | + |
| 359 | + added support for conditional gets |
| 360 | + |
| 361 | +2002-10-25 19:22 kellan |
| 362 | + |
| 363 | + * rss_cache.inc, rss_fetch.inc, rss_parse.inc, rss_utils.inc: |
| 364 | + Change comment style to slavishly imitate the phpinsider style |
| 365 | + found in Smarty and Snoopy :) |
| 366 | + |
| 367 | +2002-10-25 19:18 kellan |
| 368 | + |
| 369 | + * extlib/Snoopy.class.inc: added Snoopy in order to support |
| 370 | + conditional gets |
| 371 | + |
| 372 | +2002-10-23 23:19 kellan |
| 373 | + |
| 374 | + * magpie_simple.php, rss_cache.inc, rss_fetch.inc, rss_parse.inc: |
| 375 | + MAJOR CLEANUP! |
| 376 | + |
| 377 | + * rss_fetch got rid of the options array, replaced it with a more |
| 378 | + PHP-like solution of using defines. constants are setup, with |
| 379 | + defaults, in the function init() |
| 380 | + |
| 381 | + got rid of the idiom of passing back an array, its was awkward to |
| 382 | + deal with in PHP, and unusual (and consquently confusing to |
| 383 | + people). now i return true/false values, and try to setup error |
| 384 | + string where appropiate (rss_cache has the most complete example |
| 385 | + of this) |
| 386 | + |
| 387 | + change the logic for interacting with the cache |
| 388 | + |
| 389 | + * rss_cache major re-working of how error are handled. tried to |
| 390 | + make the code more resillient. the cache is now much more aware |
| 391 | + of MAX_AGE, where before this was being driven out of rss_fetch |
| 392 | + (which was silly) |
| 393 | + |
| 394 | + * rss_parse properly handles xml parse errors. used to sail |
| 395 | + along blithely unaware. |
| 396 | + |
| 397 | +2002-09-11 11:11 kellan |
| 398 | + |
| 399 | + * rss_cache.inc, rss_parse.inc, magpie_simple.php, rss_fetch.inc, |
| 400 | + rss_utils.inc: Initial revision |
| 401 | + |
| 402 | +2002-09-11 11:11 kellan |
| 403 | + |
| 404 | + * rss_cache.inc, rss_parse.inc, magpie_simple.php, rss_fetch.inc, |
| 405 | + rss_utils.inc: initial import |
| 406 | + |
Index: trunk/extensions/RSS/magpierss/TROUBLESHOOTING |
— | — | @@ -0,0 +1,152 @@ |
| 2 | +TROUBLESHOOTING |
| 3 | + |
| 4 | + |
| 5 | +Trouble Installing MagpieRSS: |
| 6 | + |
| 7 | +1. Fatal error: Failed opening required '/path/to/script/rss_fetch.inc' |
| 8 | + (include_path='.:/usr/local/lib/php:/usr/local/lib/php/pear') |
| 9 | + |
| 10 | +2. Cache couldn't make dir './cache'. |
| 11 | + |
| 12 | +3. Fatal error: Failed to load PHP's XML Extension. |
| 13 | + http://www.php.net/manual/en/ref.xml.php |
| 14 | + |
| 15 | +Trouble Using MagpieRSS |
| 16 | + |
| 17 | +4. Warning: MagpieRSS: Failed to fetch example.com/index.rdf. |
| 18 | + (HTTP Error: Invalid protocol "") |
| 19 | + |
| 20 | +5. Warning: MagpieRSS: Failed to parse RSS file. |
| 21 | + (not well-formed (invalid token) at line 19, column 98) |
| 22 | + |
| 23 | +6. Warning: MagpieRSS: Failed to fetch http://localhost/rss/features.1-0.rss. |
| 24 | + (HTTP Response: HTTP/1.1 404 Not Found) |
| 25 | + |
| 26 | +If you would rather provide a custom error, see the COOKBOOK |
| 27 | +(http://magpierss.sf.net/cookbook.html) recipe 2. |
| 28 | + |
| 29 | +************************************************************************* |
| 30 | +1. Fatal error: Failed opening required '/path/to/script/rss_fetch.inc' |
| 31 | + (include_path='.:/usr/local/lib/php:/usr/local/lib/php/pear') |
| 32 | + |
| 33 | + This could mean that: |
| 34 | + |
| 35 | + a) PHP can't find the MagpieRSS files. |
| 36 | + b) PHP found them the MagpieRSS files, but can't read them. |
| 37 | + |
| 38 | + a. Telling PHP where to look for MagpieRSS file. |
| 39 | + |
| 40 | + This might mean your PHP program can't find the MagpieRSS libraries. |
| 41 | + Magpie relies on 4 include files, rss_fetch.inc, rss_parse.inc, |
| 42 | + rss_cache.inc, rss_util.inc, and for normal use you'll need all 4 (see the |
| 43 | + cookbook for exceptions). |
| 44 | + |
| 45 | + This can be fixed by making sure the MagpieRSS files are in your include |
| 46 | + path. |
| 47 | + |
| 48 | + If you can edit your include path (for example your on a shared host) then |
| 49 | + you need to replace: |
| 50 | + |
| 51 | + require_once('rss_fetch.inc'); |
| 52 | + |
| 53 | + -with- |
| 54 | + |
| 55 | + define('MAGPIE_DIR', '/path/to/magpierss/'); |
| 56 | + require_once(MAGPIE_DIR.'rss_fetch.inc'); |
| 57 | + |
| 58 | + b. PHP can't read the MagpieRSS files |
| 59 | + |
| 60 | + All PHP libraries need to be readable by your webserver. |
| 61 | + |
| 62 | + On Unix you can accomplish this with: |
| 63 | + |
| 64 | + chmod 755 rss_fetch.inc rss_parse.inc rss_cache.inc rss_util.inc |
| 65 | + |
| 66 | +************************************************************************* |
| 67 | +2. Cache couldn't make dir './cache'. |
| 68 | + |
| 69 | + MagpieRSS caches the results of fetched and parsed RSS to reduce the load on |
| 70 | + both your server, and the remote server providing the RSS. It does this by |
| 71 | + writing files to a cache directory. |
| 72 | + |
| 73 | + This error means the webserver doesn't have write access to the current |
| 74 | + directory. |
| 75 | + |
| 76 | + a. Make a webserver writeable cache directory |
| 77 | + |
| 78 | + Find the webserver's group. (on my system it is 'www') |
| 79 | + |
| 80 | + mkdir ./cache |
| 81 | + chgrp www directory_name |
| 82 | + chmod g+w directory_name |
| 83 | + |
| 84 | + (this is the best, and desired solution) |
| 85 | + |
| 86 | + b. Tell MagpieRSS to create the cache directory somewhere the webserver can |
| 87 | + write to. |
| 88 | + |
| 89 | + define('MAGPIE_CACHE_DIR', '/tmp/magpierss'); |
| 90 | + |
| 91 | + (this is not a great solution, and might have security considerations) |
| 92 | + |
| 93 | + c. Turn off cacheing. |
| 94 | + |
| 95 | + Magpie can work fine with cacheing, but it will be slower, and you might |
| 96 | + become a nuiance to the RSS provider, but it is an option. |
| 97 | + |
| 98 | + define('MAGPIE_CACHE_ON', 0); |
| 99 | + |
| 100 | + d. And lastly, do NOT |
| 101 | + |
| 102 | + chmod 777 ./cache |
| 103 | + |
| 104 | + Any of the above solutions are better then this. |
| 105 | + |
| 106 | + NOTE: If none of this works for you, let me know. I've got root, and a |
| 107 | + custom compiled Apache on almost any box I ever touch, so I can be a little |
| 108 | + out of touch with reality. But I won't know that if I don't feedback. |
| 109 | + |
| 110 | +************************************************************************* 3. |
| 111 | +3. Fatal error: Failed to load PHP's XML Extension. |
| 112 | + http://www.php.net/manual/en/ref.xml.php |
| 113 | + |
| 114 | + -or- |
| 115 | + |
| 116 | + Fatal error: Failed to create an instance of PHP's XML parser. |
| 117 | + http://www.php.net/manual/en/ref.xml.php |
| 118 | + |
| 119 | + Make sure your PHP was built with --with-xml |
| 120 | + |
| 121 | + This has been turned on by default for several versions of PHP, but it might |
| 122 | + be turned off in your build. |
| 123 | + |
| 124 | + See php.net for details on building and configuring PHP. |
| 125 | + |
| 126 | + |
| 127 | +************************************************************************* |
| 128 | +4. Warning: MagpieRSS: Failed to fetch index.rdf. |
| 129 | + (HTTP Error: Invalid protocol "") |
| 130 | + |
| 131 | + You need to put http:// in front of your the URL to your RSS feed |
| 132 | + |
| 133 | +************************************************************************* |
| 134 | +5. Warning: MagpieRSS: Failed to parse RSS file. |
| 135 | + (not well-formed (invalid token) at line 19, column 98) |
| 136 | + |
| 137 | + There is a problem with the RSS feed you are trying to read. |
| 138 | + MagpieRSS is an XML parser, and therefore can't parse RSS feed with invalid |
| 139 | + characters. Some RSS parser are based on regular expressions, and can |
| 140 | + parse invalid RSS but they have their own problems. |
| 141 | + |
| 142 | + You could try contacting the author of the RSS feed, and pointing them to |
| 143 | + the online RSS validator at: |
| 144 | + |
| 145 | + http://feeds.archive.org/validator/ |
| 146 | + |
| 147 | +************************************************************************* |
| 148 | +6. Warning: MagpieRSS: Failed to fetch http://example.com/index.rdf |
| 149 | + (HTTP Response: HTTP/1.1 404 Not Found) |
| 150 | + |
| 151 | + Its a 404! The RSS file ain't there. |
| 152 | + |
| 153 | + |
Index: trunk/extensions/RSS/magpierss/extlib/Snoopy.class.inc |
— | — | @@ -0,0 +1,900 @@ |
| 2 | +<?php |
| 3 | + |
| 4 | +/************************************************* |
| 5 | + |
| 6 | +Snoopy - the PHP net client |
| 7 | +Author: Monte Ohrt <monte@ispi.net> |
| 8 | +Copyright (c): 1999-2000 ispi, all rights reserved |
| 9 | +Version: 1.0 |
| 10 | + |
| 11 | + * This library is free software; you can redistribute it and/or |
| 12 | + * modify it under the terms of the GNU Lesser General Public |
| 13 | + * License as published by the Free Software Foundation; either |
| 14 | + * version 2.1 of the License, or (at your option) any later version. |
| 15 | + * |
| 16 | + * This library is distributed in the hope that it will be useful, |
| 17 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 18 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 19 | + * Lesser General Public License for more details. |
| 20 | + * |
| 21 | + * You should have received a copy of the GNU Lesser General Public |
| 22 | + * License along with this library; if not, write to the Free Software |
| 23 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| 24 | + |
| 25 | +You may contact the author of Snoopy by e-mail at: |
| 26 | +monte@ispi.net |
| 27 | + |
| 28 | +Or, write to: |
| 29 | +Monte Ohrt |
| 30 | +CTO, ispi |
| 31 | +237 S. 70th suite 220 |
| 32 | +Lincoln, NE 68510 |
| 33 | + |
| 34 | +The latest version of Snoopy can be obtained from: |
| 35 | +http://snoopy.sourceforge.com |
| 36 | + |
| 37 | +*************************************************/ |
| 38 | + |
| 39 | +class Snoopy |
| 40 | +{ |
| 41 | + /**** Public variables ****/ |
| 42 | + |
| 43 | + /* user definable vars */ |
| 44 | + |
| 45 | + var $host = "www.php.net"; // host name we are connecting to |
| 46 | + var $port = 80; // port we are connecting to |
| 47 | + var $proxy_host = ""; // proxy host to use |
| 48 | + var $proxy_port = ""; // proxy port to use |
| 49 | + var $agent = "Snoopy v1.0"; // agent we masquerade as |
| 50 | + var $referer = ""; // referer info to pass |
| 51 | + var $cookies = array(); // array of cookies to pass |
| 52 | + // $cookies["username"]="joe"; |
| 53 | + var $rawheaders = array(); // array of raw headers to send |
| 54 | + // $rawheaders["Content-type"]="text/html"; |
| 55 | + |
| 56 | + var $maxredirs = 5; // http redirection depth maximum. 0 = disallow |
| 57 | + var $lastredirectaddr = ""; // contains address of last redirected address |
| 58 | + var $offsiteok = true; // allows redirection off-site |
| 59 | + var $maxframes = 0; // frame content depth maximum. 0 = disallow |
| 60 | + var $expandlinks = true; // expand links to fully qualified URLs. |
| 61 | + // this only applies to fetchlinks() |
| 62 | + // or submitlinks() |
| 63 | + var $passcookies = true; // pass set cookies back through redirects |
| 64 | + // NOTE: this currently does not respect |
| 65 | + // dates, domains or paths. |
| 66 | + |
| 67 | + var $user = ""; // user for http authentication |
| 68 | + var $pass = ""; // password for http authentication |
| 69 | + |
| 70 | + // http accept types |
| 71 | + var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*"; |
| 72 | + |
| 73 | + var $results = ""; // where the content is put |
| 74 | + |
| 75 | + var $error = ""; // error messages sent here |
| 76 | + var $response_code = ""; // response code returned from server |
| 77 | + var $headers = array(); // headers returned from server sent here |
| 78 | + var $maxlength = 500000; // max return data length (body) |
| 79 | + var $read_timeout = 0; // timeout on read operations, in seconds |
| 80 | + // supported only since PHP 4 Beta 4 |
| 81 | + // set to 0 to disallow timeouts |
| 82 | + var $timed_out = false; // if a read operation timed out |
| 83 | + var $status = 0; // http request status |
| 84 | + |
| 85 | + var $curl_path = "/usr/bin/curl"; |
| 86 | + // Snoopy will use cURL for fetching |
| 87 | + // SSL content if a full system path to |
| 88 | + // the cURL binary is supplied here. |
| 89 | + // set to false if you do not have |
| 90 | + // cURL installed. See http://curl.haxx.se |
| 91 | + // for details on installing cURL. |
| 92 | + // Snoopy does *not* use the cURL |
| 93 | + // library functions built into php, |
| 94 | + // as these functions are not stable |
| 95 | + // as of this Snoopy release. |
| 96 | + |
| 97 | + // send Accept-encoding: gzip? |
| 98 | + var $use_gzip = true; |
| 99 | + |
| 100 | + /**** Private variables ****/ |
| 101 | + |
| 102 | + var $_maxlinelen = 4096; // max line length (headers) |
| 103 | + |
| 104 | + var $_httpmethod = "GET"; // default http request method |
| 105 | + var $_httpversion = "HTTP/1.0"; // default http request version |
| 106 | + var $_submit_method = "POST"; // default submit method |
| 107 | + var $_submit_type = "application/x-www-form-urlencoded"; // default submit type |
| 108 | + var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type |
| 109 | + var $_redirectaddr = false; // will be set if page fetched is a redirect |
| 110 | + var $_redirectdepth = 0; // increments on an http redirect |
| 111 | + var $_frameurls = array(); // frame src urls |
| 112 | + var $_framedepth = 0; // increments on frame depth |
| 113 | + |
| 114 | + var $_isproxy = false; // set if using a proxy server |
| 115 | + var $_fp_timeout = 30; // timeout for socket connection |
| 116 | + |
| 117 | +/*======================================================================*\ |
| 118 | + Function: fetch |
| 119 | + Purpose: fetch the contents of a web page |
| 120 | + (and possibly other protocols in the |
| 121 | + future like ftp, nntp, gopher, etc.) |
| 122 | + Input: $URI the location of the page to fetch |
| 123 | + Output: $this->results the output text from the fetch |
| 124 | +\*======================================================================*/ |
| 125 | + |
| 126 | + function fetch($URI) |
| 127 | + { |
| 128 | + |
| 129 | + //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS); |
| 130 | + $URI_PARTS = parse_url($URI); |
| 131 | + if (!empty($URI_PARTS["user"])) |
| 132 | + $this->user = $URI_PARTS["user"]; |
| 133 | + if (!empty($URI_PARTS["pass"])) |
| 134 | + $this->pass = $URI_PARTS["pass"]; |
| 135 | + |
| 136 | + switch($URI_PARTS["scheme"]) |
| 137 | + { |
| 138 | + case "http": |
| 139 | + $this->host = $URI_PARTS["host"]; |
| 140 | + if(!empty($URI_PARTS["port"])) |
| 141 | + $this->port = $URI_PARTS["port"]; |
| 142 | + if($this->_connect($fp)) |
| 143 | + { |
| 144 | + if($this->_isproxy) |
| 145 | + { |
| 146 | + // using proxy, send entire URI |
| 147 | + $this->_httprequest($URI,$fp,$URI,$this->_httpmethod); |
| 148 | + } |
| 149 | + else |
| 150 | + { |
| 151 | + $path = $URI_PARTS["path"].(isset($URI_PARTS["query"]) ? "?".$URI_PARTS["query"] : ""); |
| 152 | + // no proxy, send only the path |
| 153 | + $this->_httprequest($path, $fp, $URI, $this->_httpmethod); |
| 154 | + } |
| 155 | + |
| 156 | + $this->_disconnect($fp); |
| 157 | + |
| 158 | + if($this->_redirectaddr) |
| 159 | + { |
| 160 | + /* url was redirected, check if we've hit the max depth */ |
| 161 | + if($this->maxredirs > $this->_redirectdepth) |
| 162 | + { |
| 163 | + // only follow redirect if it's on this site, or offsiteok is true |
| 164 | + if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) |
| 165 | + { |
| 166 | + /* follow the redirect */ |
| 167 | + $this->_redirectdepth++; |
| 168 | + $this->lastredirectaddr=$this->_redirectaddr; |
| 169 | + $this->fetch($this->_redirectaddr); |
| 170 | + } |
| 171 | + } |
| 172 | + } |
| 173 | + |
| 174 | + if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) |
| 175 | + { |
| 176 | + $frameurls = $this->_frameurls; |
| 177 | + $this->_frameurls = array(); |
| 178 | + |
| 179 | + while(list(,$frameurl) = each($frameurls)) |
| 180 | + { |
| 181 | + if($this->_framedepth < $this->maxframes) |
| 182 | + { |
| 183 | + $this->fetch($frameurl); |
| 184 | + $this->_framedepth++; |
| 185 | + } |
| 186 | + else |
| 187 | + break; |
| 188 | + } |
| 189 | + } |
| 190 | + } |
| 191 | + else |
| 192 | + { |
| 193 | + return false; |
| 194 | + } |
| 195 | + return true; |
| 196 | + break; |
| 197 | + case "https": |
| 198 | + if(!$this->curl_path || (!is_executable($this->curl_path))) { |
| 199 | + $this->error = "Bad curl ($this->curl_path), can't fetch HTTPS \n"; |
| 200 | + return false; |
| 201 | + } |
| 202 | + $this->host = $URI_PARTS["host"]; |
| 203 | + if(!empty($URI_PARTS["port"])) |
| 204 | + $this->port = $URI_PARTS["port"]; |
| 205 | + if($this->_isproxy) |
| 206 | + { |
| 207 | + // using proxy, send entire URI |
| 208 | + $this->_httpsrequest($URI,$URI,$this->_httpmethod); |
| 209 | + } |
| 210 | + else |
| 211 | + { |
| 212 | + $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); |
| 213 | + // no proxy, send only the path |
| 214 | + $this->_httpsrequest($path, $URI, $this->_httpmethod); |
| 215 | + } |
| 216 | + |
| 217 | + if($this->_redirectaddr) |
| 218 | + { |
| 219 | + /* url was redirected, check if we've hit the max depth */ |
| 220 | + if($this->maxredirs > $this->_redirectdepth) |
| 221 | + { |
| 222 | + // only follow redirect if it's on this site, or offsiteok is true |
| 223 | + if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) |
| 224 | + { |
| 225 | + /* follow the redirect */ |
| 226 | + $this->_redirectdepth++; |
| 227 | + $this->lastredirectaddr=$this->_redirectaddr; |
| 228 | + $this->fetch($this->_redirectaddr); |
| 229 | + } |
| 230 | + } |
| 231 | + } |
| 232 | + |
| 233 | + if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) |
| 234 | + { |
| 235 | + $frameurls = $this->_frameurls; |
| 236 | + $this->_frameurls = array(); |
| 237 | + |
| 238 | + while(list(,$frameurl) = each($frameurls)) |
| 239 | + { |
| 240 | + if($this->_framedepth < $this->maxframes) |
| 241 | + { |
| 242 | + $this->fetch($frameurl); |
| 243 | + $this->_framedepth++; |
| 244 | + } |
| 245 | + else |
| 246 | + break; |
| 247 | + } |
| 248 | + } |
| 249 | + return true; |
| 250 | + break; |
| 251 | + default: |
| 252 | + // not a valid protocol |
| 253 | + $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n'; |
| 254 | + return false; |
| 255 | + break; |
| 256 | + } |
| 257 | + return true; |
| 258 | + } |
| 259 | + |
| 260 | + |
| 261 | + |
| 262 | +/*======================================================================*\ |
| 263 | + Private functions |
| 264 | +\*======================================================================*/ |
| 265 | + |
| 266 | + |
| 267 | +/*======================================================================*\ |
| 268 | + Function: _striplinks |
| 269 | + Purpose: strip the hyperlinks from an html document |
| 270 | + Input: $document document to strip. |
| 271 | + Output: $match an array of the links |
| 272 | +\*======================================================================*/ |
| 273 | + |
| 274 | + function _striplinks($document) |
| 275 | + { |
| 276 | + preg_match_all("'<\s*a\s+.*href\s*=\s* # find <a href= |
| 277 | + ([\"\'])? # find single or double quote |
| 278 | + (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching |
| 279 | + # quote, otherwise match up to next space |
| 280 | + 'isx",$document,$links); |
| 281 | + |
| 282 | + |
| 283 | + // catenate the non-empty matches from the conditional subpattern |
| 284 | + |
| 285 | + while(list($key,$val) = each($links[2])) |
| 286 | + { |
| 287 | + if(!empty($val)) |
| 288 | + $match[] = $val; |
| 289 | + } |
| 290 | + |
| 291 | + while(list($key,$val) = each($links[3])) |
| 292 | + { |
| 293 | + if(!empty($val)) |
| 294 | + $match[] = $val; |
| 295 | + } |
| 296 | + |
| 297 | + // return the links |
| 298 | + return $match; |
| 299 | + } |
| 300 | + |
| 301 | +/*======================================================================*\ |
| 302 | + Function: _stripform |
| 303 | + Purpose: strip the form elements from an html document |
| 304 | + Input: $document document to strip. |
| 305 | + Output: $match an array of the links |
| 306 | +\*======================================================================*/ |
| 307 | + |
| 308 | + function _stripform($document) |
| 309 | + { |
| 310 | + preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements); |
| 311 | + |
| 312 | + // catenate the matches |
| 313 | + $match = implode("\r\n",$elements[0]); |
| 314 | + |
| 315 | + // return the links |
| 316 | + return $match; |
| 317 | + } |
| 318 | + |
| 319 | + |
| 320 | + |
| 321 | +/*======================================================================*\ |
| 322 | + Function: _striptext |
| 323 | + Purpose: strip the text from an html document |
| 324 | + Input: $document document to strip. |
| 325 | + Output: $text the resulting text |
| 326 | +\*======================================================================*/ |
| 327 | + |
| 328 | + function _striptext($document) |
| 329 | + { |
| 330 | + |
| 331 | + // I didn't use preg eval (//e) since that is only available in PHP 4.0. |
| 332 | + // so, list your entities one by one here. I included some of the |
| 333 | + // more common ones. |
| 334 | + |
| 335 | + $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript |
| 336 | + "'<[\/\!]*?[^<>]*?>'si", // strip out html tags |
| 337 | + "'([\r\n])[\s]+'", // strip out white space |
| 338 | + "'&(quote|#34);'i", // replace html entities |
| 339 | + "'&(amp|#38);'i", |
| 340 | + "'&(lt|#60);'i", |
| 341 | + "'&(gt|#62);'i", |
| 342 | + "'&(nbsp|#160);'i", |
| 343 | + "'&(iexcl|#161);'i", |
| 344 | + "'&(cent|#162);'i", |
| 345 | + "'&(pound|#163);'i", |
| 346 | + "'&(copy|#169);'i" |
| 347 | + ); |
| 348 | + $replace = array( "", |
| 349 | + "", |
| 350 | + "\\1", |
| 351 | + "\"", |
| 352 | + "&", |
| 353 | + "<", |
| 354 | + ">", |
| 355 | + " ", |
| 356 | + chr(161), |
| 357 | + chr(162), |
| 358 | + chr(163), |
| 359 | + chr(169)); |
| 360 | + |
| 361 | + $text = preg_replace($search,$replace,$document); |
| 362 | + |
| 363 | + return $text; |
| 364 | + } |
| 365 | + |
| 366 | +/*======================================================================*\ |
| 367 | + Function: _expandlinks |
| 368 | + Purpose: expand each link into a fully qualified URL |
| 369 | + Input: $links the links to qualify |
| 370 | + $URI the full URI to get the base from |
| 371 | + Output: $expandedLinks the expanded links |
| 372 | +\*======================================================================*/ |
| 373 | + |
| 374 | + function _expandlinks($links,$URI) |
| 375 | + { |
| 376 | + |
| 377 | + preg_match("/^[^\?]+/",$URI,$match); |
| 378 | + |
| 379 | + $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]); |
| 380 | + |
| 381 | + $search = array( "|^http://".preg_quote($this->host)."|i", |
| 382 | + "|^(?!http://)(\/)?(?!mailto:)|i", |
| 383 | + "|/\./|", |
| 384 | + "|/[^\/]+/\.\./|" |
| 385 | + ); |
| 386 | + |
| 387 | + $replace = array( "", |
| 388 | + $match."/", |
| 389 | + "/", |
| 390 | + "/" |
| 391 | + ); |
| 392 | + |
| 393 | + $expandedLinks = preg_replace($search,$replace,$links); |
| 394 | + |
| 395 | + return $expandedLinks; |
| 396 | + } |
| 397 | + |
| 398 | +/*======================================================================*\ |
| 399 | + Function: _httprequest |
| 400 | + Purpose: go get the http data from the server |
| 401 | + Input: $url the url to fetch |
| 402 | + $fp the current open file pointer |
| 403 | + $URI the full URI |
| 404 | + $body body contents to send if any (POST) |
| 405 | + Output: |
| 406 | +\*======================================================================*/ |
| 407 | + |
| 408 | + function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="") |
| 409 | + { |
| 410 | + if($this->passcookies && $this->_redirectaddr) |
| 411 | + $this->setcookies(); |
| 412 | + |
| 413 | + $URI_PARTS = parse_url($URI); |
| 414 | + if(empty($url)) |
| 415 | + $url = "/"; |
| 416 | + $headers = $http_method." ".$url." ".$this->_httpversion."\r\n"; |
| 417 | + if(!empty($this->agent)) |
| 418 | + $headers .= "User-Agent: ".$this->agent."\r\n"; |
| 419 | + if(!empty($this->host) && !isset($this->rawheaders['Host'])) |
| 420 | + $headers .= "Host: ".$this->host."\r\n"; |
| 421 | + if(!empty($this->accept)) |
| 422 | + $headers .= "Accept: ".$this->accept."\r\n"; |
| 423 | + |
| 424 | + if($this->use_gzip) { |
| 425 | + // make sure PHP was built with --with-zlib |
| 426 | + // and we can handle gzipp'ed data |
| 427 | + if ( function_exists(gzinflate) ) { |
| 428 | + $headers .= "Accept-encoding: gzip\r\n"; |
| 429 | + } |
| 430 | + else { |
| 431 | + trigger_error( |
| 432 | + "use_gzip is on, but PHP was built without zlib support.". |
| 433 | + " Requesting file(s) without gzip encoding.", |
| 434 | + E_USER_NOTICE); |
| 435 | + } |
| 436 | + } |
| 437 | + |
| 438 | + if(!empty($this->referer)) |
| 439 | + $headers .= "Referer: ".$this->referer."\r\n"; |
| 440 | + if(!empty($this->cookies)) |
| 441 | + { |
| 442 | + if(!is_array($this->cookies)) |
| 443 | + $this->cookies = (array)$this->cookies; |
| 444 | + |
| 445 | + reset($this->cookies); |
| 446 | + if ( count($this->cookies) > 0 ) { |
| 447 | + $cookie_headers .= 'Cookie: '; |
| 448 | + foreach ( $this->cookies as $cookieKey => $cookieVal ) { |
| 449 | + $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; "; |
| 450 | + } |
| 451 | + $headers .= substr($cookie_headers,0,-2) . "\r\n"; |
| 452 | + } |
| 453 | + } |
| 454 | + if(!empty($this->rawheaders)) |
| 455 | + { |
| 456 | + if(!is_array($this->rawheaders)) |
| 457 | + $this->rawheaders = (array)$this->rawheaders; |
| 458 | + while(list($headerKey,$headerVal) = each($this->rawheaders)) |
| 459 | + $headers .= $headerKey.": ".$headerVal."\r\n"; |
| 460 | + } |
| 461 | + if(!empty($content_type)) { |
| 462 | + $headers .= "Content-type: $content_type"; |
| 463 | + if ($content_type == "multipart/form-data") |
| 464 | + $headers .= "; boundary=".$this->_mime_boundary; |
| 465 | + $headers .= "\r\n"; |
| 466 | + } |
| 467 | + if(!empty($body)) |
| 468 | + $headers .= "Content-length: ".strlen($body)."\r\n"; |
| 469 | + if(!empty($this->user) || !empty($this->pass)) |
| 470 | + $headers .= "Authorization: BASIC ".base64_encode($this->user.":".$this->pass)."\r\n"; |
| 471 | + |
| 472 | + $headers .= "\r\n"; |
| 473 | + |
| 474 | + // set the read timeout if needed |
| 475 | + if ($this->read_timeout > 0) |
| 476 | + socket_set_timeout($fp, $this->read_timeout); |
| 477 | + $this->timed_out = false; |
| 478 | + |
| 479 | + fwrite($fp,$headers.$body,strlen($headers.$body)); |
| 480 | + |
| 481 | + $this->_redirectaddr = false; |
| 482 | + unset($this->headers); |
| 483 | + |
| 484 | + // content was returned gzip encoded? |
| 485 | + $is_gzipped = false; |
| 486 | + |
| 487 | + while($currentHeader = fgets($fp,$this->_maxlinelen)) |
| 488 | + { |
| 489 | + if ($this->read_timeout > 0 && $this->_check_timeout($fp)) |
| 490 | + { |
| 491 | + $this->status=-100; |
| 492 | + return false; |
| 493 | + } |
| 494 | + |
| 495 | + // if($currentHeader == "\r\n") |
| 496 | + if(preg_match("/^\r?\n$/", $currentHeader) ) |
| 497 | + break; |
| 498 | + |
| 499 | + // if a header begins with Location: or URI:, set the redirect |
| 500 | + if(preg_match("/^(Location:|URI:)/i",$currentHeader)) |
| 501 | + { |
| 502 | + // get URL portion of the redirect |
| 503 | + preg_match("/^(Location:|URI:)\s+(.*)/",chop($currentHeader),$matches); |
| 504 | + // look for :// in the Location header to see if hostname is included |
| 505 | + if(!preg_match("|\:\/\/|",$matches[2])) |
| 506 | + { |
| 507 | + // no host in the path, so prepend |
| 508 | + $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port; |
| 509 | + // eliminate double slash |
| 510 | + if(!preg_match("|^/|",$matches[2])) |
| 511 | + $this->_redirectaddr .= "/".$matches[2]; |
| 512 | + else |
| 513 | + $this->_redirectaddr .= $matches[2]; |
| 514 | + } |
| 515 | + else |
| 516 | + $this->_redirectaddr = $matches[2]; |
| 517 | + } |
| 518 | + |
| 519 | + if(preg_match("|^HTTP/|",$currentHeader)) |
| 520 | + { |
| 521 | + if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status)) |
| 522 | + { |
| 523 | + $this->status= $status[1]; |
| 524 | + } |
| 525 | + $this->response_code = $currentHeader; |
| 526 | + } |
| 527 | + |
| 528 | + if (preg_match("/Content-Encoding: gzip/", $currentHeader) ) { |
| 529 | + $is_gzipped = true; |
| 530 | + } |
| 531 | + |
| 532 | + $this->headers[] = $currentHeader; |
| 533 | + } |
| 534 | + |
| 535 | + # $results = fread($fp, $this->maxlength); |
| 536 | + $results = ""; |
| 537 | + while ( $data = fread($fp, $this->maxlength) ) { |
| 538 | + $results .= $data; |
| 539 | + if ( |
| 540 | + strlen($results) > $this->maxlength ) { |
| 541 | + break; |
| 542 | + } |
| 543 | + } |
| 544 | + |
| 545 | + // gunzip |
| 546 | + if ( $is_gzipped ) { |
| 547 | + // per http://www.php.net/manual/en/function.gzencode.php |
| 548 | + $results = substr($results, 10); |
| 549 | + $results = gzinflate($results); |
| 550 | + } |
| 551 | + |
| 552 | + if ($this->read_timeout > 0 && $this->_check_timeout($fp)) |
| 553 | + { |
| 554 | + $this->status=-100; |
| 555 | + return false; |
| 556 | + } |
| 557 | + |
| 558 | + // check if there is a a redirect meta tag |
| 559 | + |
| 560 | + if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match)) |
| 561 | + { |
| 562 | + $this->_redirectaddr = $this->_expandlinks($match[1],$URI); |
| 563 | + } |
| 564 | + |
| 565 | + // have we hit our frame depth and is there frame src to fetch? |
| 566 | + if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match)) |
| 567 | + { |
| 568 | + $this->results[] = $results; |
| 569 | + for($x=0; $x<count($match[1]); $x++) |
| 570 | + $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host); |
| 571 | + } |
| 572 | + // have we already fetched framed content? |
| 573 | + elseif(is_array($this->results)) |
| 574 | + $this->results[] = $results; |
| 575 | + // no framed content |
| 576 | + else |
| 577 | + $this->results = $results; |
| 578 | + |
| 579 | + return true; |
| 580 | + } |
| 581 | + |
| 582 | +/*======================================================================*\ |
| 583 | + Function: _httpsrequest |
| 584 | + Purpose: go get the https data from the server using curl |
| 585 | + Input: $url the url to fetch |
| 586 | + $URI the full URI |
| 587 | + $body body contents to send if any (POST) |
| 588 | + Output: |
| 589 | +\*======================================================================*/ |
| 590 | + |
| 591 | + function _httpsrequest($url,$URI,$http_method,$content_type="",$body="") |
| 592 | + { |
| 593 | + if($this->passcookies && $this->_redirectaddr) |
| 594 | + $this->setcookies(); |
| 595 | + |
| 596 | + $headers = array(); |
| 597 | + |
| 598 | + $URI_PARTS = parse_url($URI); |
| 599 | + if(empty($url)) |
| 600 | + $url = "/"; |
| 601 | + // GET ... header not needed for curl |
| 602 | + //$headers[] = $http_method." ".$url." ".$this->_httpversion; |
| 603 | + if(!empty($this->agent)) |
| 604 | + $headers[] = "User-Agent: ".$this->agent; |
| 605 | + if(!empty($this->host)) |
| 606 | + $headers[] = "Host: ".$this->host; |
| 607 | + if(!empty($this->accept)) |
| 608 | + $headers[] = "Accept: ".$this->accept; |
| 609 | + if(!empty($this->referer)) |
| 610 | + $headers[] = "Referer: ".$this->referer; |
| 611 | + if(!empty($this->cookies)) |
| 612 | + { |
| 613 | + if(!is_array($this->cookies)) |
| 614 | + $this->cookies = (array)$this->cookies; |
| 615 | + |
| 616 | + reset($this->cookies); |
| 617 | + if ( count($this->cookies) > 0 ) { |
| 618 | + $cookie_str = 'Cookie: '; |
| 619 | + foreach ( $this->cookies as $cookieKey => $cookieVal ) { |
| 620 | + $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; "; |
| 621 | + } |
| 622 | + $headers[] = substr($cookie_str,0,-2); |
| 623 | + } |
| 624 | + } |
| 625 | + if(!empty($this->rawheaders)) |
| 626 | + { |
| 627 | + if(!is_array($this->rawheaders)) |
| 628 | + $this->rawheaders = (array)$this->rawheaders; |
| 629 | + while(list($headerKey,$headerVal) = each($this->rawheaders)) |
| 630 | + $headers[] = $headerKey.": ".$headerVal; |
| 631 | + } |
| 632 | + if(!empty($content_type)) { |
| 633 | + if ($content_type == "multipart/form-data") |
| 634 | + $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary; |
| 635 | + else |
| 636 | + $headers[] = "Content-type: $content_type"; |
| 637 | + } |
| 638 | + if(!empty($body)) |
| 639 | + $headers[] = "Content-length: ".strlen($body); |
| 640 | + if(!empty($this->user) || !empty($this->pass)) |
| 641 | + $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass); |
| 642 | + |
| 643 | + for($curr_header = 0; $curr_header < count($headers); $curr_header++) { |
| 644 | + $cmdline_params .= " -H \"".$headers[$curr_header]."\""; |
| 645 | + } |
| 646 | + |
| 647 | + if(!empty($body)) |
| 648 | + $cmdline_params .= " -d \"$body\""; |
| 649 | + |
| 650 | + if($this->read_timeout > 0) |
| 651 | + $cmdline_params .= " -m ".$this->read_timeout; |
| 652 | + |
| 653 | + $headerfile = uniqid(time()); |
| 654 | + |
| 655 | + # accept self-signed certs |
| 656 | + $cmdline_params .= " -k"; |
| 657 | + exec($this->curl_path." -D \"/tmp/$headerfile\"".escapeshellcmd($cmdline_params)." ".escapeshellcmd($URI),$results,$return); |
| 658 | + |
| 659 | + if($return) |
| 660 | + { |
| 661 | + $this->error = "Error: cURL could not retrieve the document, error $return."; |
| 662 | + return false; |
| 663 | + } |
| 664 | + |
| 665 | + |
| 666 | + $results = implode("\r\n",$results); |
| 667 | + |
| 668 | + $result_headers = file("/tmp/$headerfile"); |
| 669 | + |
| 670 | + $this->_redirectaddr = false; |
| 671 | + unset($this->headers); |
| 672 | + |
| 673 | + for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++) |
| 674 | + { |
| 675 | + |
| 676 | + // if a header begins with Location: or URI:, set the redirect |
| 677 | + if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader])) |
| 678 | + { |
| 679 | + // get URL portion of the redirect |
| 680 | + preg_match("/^(Location: |URI:)(.*)/",chop($result_headers[$currentHeader]),$matches); |
| 681 | + // look for :// in the Location header to see if hostname is included |
| 682 | + if(!preg_match("|\:\/\/|",$matches[2])) |
| 683 | + { |
| 684 | + // no host in the path, so prepend |
| 685 | + $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port; |
| 686 | + // eliminate double slash |
| 687 | + if(!preg_match("|^/|",$matches[2])) |
| 688 | + $this->_redirectaddr .= "/".$matches[2]; |
| 689 | + else |
| 690 | + $this->_redirectaddr .= $matches[2]; |
| 691 | + } |
| 692 | + else |
| 693 | + $this->_redirectaddr = $matches[2]; |
| 694 | + } |
| 695 | + |
| 696 | + if(preg_match("|^HTTP/|",$result_headers[$currentHeader])) |
| 697 | + { |
| 698 | + $this->response_code = $result_headers[$currentHeader]; |
| 699 | + if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$this->response_code, $match)) |
| 700 | + { |
| 701 | + $this->status= $match[1]; |
| 702 | + } |
| 703 | + } |
| 704 | + $this->headers[] = $result_headers[$currentHeader]; |
| 705 | + } |
| 706 | + |
| 707 | + // check if there is a a redirect meta tag |
| 708 | + |
| 709 | + if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match)) |
| 710 | + { |
| 711 | + $this->_redirectaddr = $this->_expandlinks($match[1],$URI); |
| 712 | + } |
| 713 | + |
| 714 | + // have we hit our frame depth and is there frame src to fetch? |
| 715 | + if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match)) |
| 716 | + { |
| 717 | + $this->results[] = $results; |
| 718 | + for($x=0; $x<count($match[1]); $x++) |
| 719 | + $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host); |
| 720 | + } |
| 721 | + // have we already fetched framed content? |
| 722 | + elseif(is_array($this->results)) |
| 723 | + $this->results[] = $results; |
| 724 | + // no framed content |
| 725 | + else |
| 726 | + $this->results = $results; |
| 727 | + |
| 728 | + unlink("/tmp/$headerfile"); |
| 729 | + |
| 730 | + return true; |
| 731 | + } |
| 732 | + |
| 733 | +/*======================================================================*\ |
| 734 | + Function: setcookies() |
| 735 | + Purpose: set cookies for a redirection |
| 736 | +\*======================================================================*/ |
| 737 | + |
| 738 | + function setcookies() |
| 739 | + { |
| 740 | + for($x=0; $x<count($this->headers); $x++) |
| 741 | + { |
| 742 | + if(preg_match("/^set-cookie:[\s]+([^=]+)=([^;]+)/i", $this->headers[$x],$match)) |
| 743 | + $this->cookies[$match[1]] = $match[2]; |
| 744 | + } |
| 745 | + } |
| 746 | + |
| 747 | + |
| 748 | +/*======================================================================*\ |
| 749 | + Function: _check_timeout |
| 750 | + Purpose: checks whether timeout has occurred |
| 751 | + Input: $fp file pointer |
| 752 | +\*======================================================================*/ |
| 753 | + |
| 754 | + function _check_timeout($fp) |
| 755 | + { |
| 756 | + if ($this->read_timeout > 0) { |
| 757 | + $fp_status = socket_get_status($fp); |
| 758 | + if ($fp_status["timed_out"]) { |
| 759 | + $this->timed_out = true; |
| 760 | + return true; |
| 761 | + } |
| 762 | + } |
| 763 | + return false; |
| 764 | + } |
| 765 | + |
| 766 | +/*======================================================================*\ |
| 767 | + Function: _connect |
| 768 | + Purpose: make a socket connection |
| 769 | + Input: $fp file pointer |
| 770 | +\*======================================================================*/ |
| 771 | + |
| 772 | + function _connect(&$fp) |
| 773 | + { |
| 774 | + if(!empty($this->proxy_host) && !empty($this->proxy_port)) |
| 775 | + { |
| 776 | + $this->_isproxy = true; |
| 777 | + $host = $this->proxy_host; |
| 778 | + $port = $this->proxy_port; |
| 779 | + } |
| 780 | + else |
| 781 | + { |
| 782 | + $host = $this->host; |
| 783 | + $port = $this->port; |
| 784 | + } |
| 785 | + |
| 786 | + $this->status = 0; |
| 787 | + |
| 788 | + if($fp = fsockopen( |
| 789 | + $host, |
| 790 | + $port, |
| 791 | + $errno, |
| 792 | + $errstr, |
| 793 | + $this->_fp_timeout |
| 794 | + )) |
| 795 | + { |
| 796 | + // socket connection succeeded |
| 797 | + |
| 798 | + return true; |
| 799 | + } |
| 800 | + else |
| 801 | + { |
| 802 | + // socket connection failed |
| 803 | + $this->status = $errno; |
| 804 | + switch($errno) |
| 805 | + { |
| 806 | + case -3: |
| 807 | + $this->error="socket creation failed (-3)"; |
| 808 | + case -4: |
| 809 | + $this->error="dns lookup failure (-4)"; |
| 810 | + case -5: |
| 811 | + $this->error="connection refused or timed out (-5)"; |
| 812 | + default: |
| 813 | + $this->error="connection failed (".$errno.")"; |
| 814 | + } |
| 815 | + return false; |
| 816 | + } |
| 817 | + } |
| 818 | +/*======================================================================*\ |
| 819 | + Function: _disconnect |
| 820 | + Purpose: disconnect a socket connection |
| 821 | + Input: $fp file pointer |
| 822 | +\*======================================================================*/ |
| 823 | + |
| 824 | + function _disconnect($fp) |
| 825 | + { |
| 826 | + return(fclose($fp)); |
| 827 | + } |
| 828 | + |
| 829 | + |
| 830 | +/*======================================================================*\ |
| 831 | + Function: _prepare_post_body |
| 832 | + Purpose: Prepare post body according to encoding type |
| 833 | + Input: $formvars - form variables |
| 834 | + $formfiles - form upload files |
| 835 | + Output: post body |
| 836 | +\*======================================================================*/ |
| 837 | + |
| 838 | + function _prepare_post_body($formvars, $formfiles) |
| 839 | + { |
| 840 | + settype($formvars, "array"); |
| 841 | + settype($formfiles, "array"); |
| 842 | + |
| 843 | + if (count($formvars) == 0 && count($formfiles) == 0) |
| 844 | + return; |
| 845 | + |
| 846 | + switch ($this->_submit_type) { |
| 847 | + case "application/x-www-form-urlencoded": |
| 848 | + reset($formvars); |
| 849 | + while(list($key,$val) = each($formvars)) { |
| 850 | + if (is_array($val) || is_object($val)) { |
| 851 | + while (list($cur_key, $cur_val) = each($val)) { |
| 852 | + $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&"; |
| 853 | + } |
| 854 | + } else |
| 855 | + $postdata .= urlencode($key)."=".urlencode($val)."&"; |
| 856 | + } |
| 857 | + break; |
| 858 | + |
| 859 | + case "multipart/form-data": |
| 860 | + $this->_mime_boundary = "Snoopy".md5(uniqid(microtime())); |
| 861 | + |
| 862 | + reset($formvars); |
| 863 | + while(list($key,$val) = each($formvars)) { |
| 864 | + if (is_array($val) || is_object($val)) { |
| 865 | + while (list($cur_key, $cur_val) = each($val)) { |
| 866 | + $postdata .= "--".$this->_mime_boundary."\r\n"; |
| 867 | + $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n"; |
| 868 | + $postdata .= "$cur_val\r\n"; |
| 869 | + } |
| 870 | + } else { |
| 871 | + $postdata .= "--".$this->_mime_boundary."\r\n"; |
| 872 | + $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n"; |
| 873 | + $postdata .= "$val\r\n"; |
| 874 | + } |
| 875 | + } |
| 876 | + |
| 877 | + reset($formfiles); |
| 878 | + while (list($field_name, $file_names) = each($formfiles)) { |
| 879 | + settype($file_names, "array"); |
| 880 | + while (list(, $file_name) = each($file_names)) { |
| 881 | + if (!is_readable($file_name)) continue; |
| 882 | + |
| 883 | + $fp = fopen($file_name, "r"); |
| 884 | + $file_content = fread($fp, filesize($file_name)); |
| 885 | + fclose($fp); |
| 886 | + $base_name = basename($file_name); |
| 887 | + |
| 888 | + $postdata .= "--".$this->_mime_boundary."\r\n"; |
| 889 | + $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n"; |
| 890 | + $postdata .= "$file_content\r\n"; |
| 891 | + } |
| 892 | + } |
| 893 | + $postdata .= "--".$this->_mime_boundary."--\r\n"; |
| 894 | + break; |
| 895 | + } |
| 896 | + |
| 897 | + return $postdata; |
| 898 | + } |
| 899 | +} |
| 900 | + |
| 901 | +?> |
Property changes on: trunk/extensions/RSS/magpierss/extlib/Snoopy.class.inc |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 902 | + native |
Index: trunk/extensions/RSS/magpierss/scripts/magpie_simple.php |
— | — | @@ -0,0 +1,29 @@ |
| 2 | +<?php |
| 3 | + |
| 4 | +define('MAGPIE_DIR', '../'); |
| 5 | +require_once(MAGPIE_DIR.'rss_fetch.inc'); |
| 6 | + |
| 7 | +$url = $_GET['url']; |
| 8 | + |
| 9 | +if ( $url ) { |
| 10 | + $rss = fetch_rss( $url ); |
| 11 | + echo "Channel: " . $rss->channel['title'] . "<p>"; |
| 12 | + echo "<ul>"; |
| 13 | + foreach ($rss->items as $item) { |
| 14 | + $href = $item['link']; |
| 15 | + $title = $item['title']; |
| 16 | + echo "<li><a href=$href>$title</a></li>"; |
| 17 | + } |
| 18 | + echo "</ul>"; |
| 19 | +} |
| 20 | +?> |
| 21 | + |
| 22 | +<form> |
| 23 | + RSS URL: <input type="text" size="30" name="url" value="<?php echo $url ?>"><br /> |
| 24 | + <input type="submit" value="Parse RSS"> |
| 25 | +</form> |
| 26 | + |
| 27 | +<p> |
| 28 | +<h2>Security Note:</h2> |
| 29 | +This is a simple <b>example</b> script. If this was a <b>real</b> script we probably wouldn't allow strangers to submit random URLs, and we certainly wouldn't simply echo anything passed in the URL. Additionally its a bad idea to leave this example script lying around. |
| 30 | +</p> |
\ No newline at end of file |
Property changes on: trunk/extensions/RSS/magpierss/scripts/magpie_simple.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 31 | + native |
Index: trunk/extensions/RSS/magpierss/scripts/magpie_slashbox.php |
— | — | @@ -0,0 +1,66 @@ |
| 2 | +<?php |
| 3 | + |
| 4 | +define('MAGPIE_DIR', '../'); |
| 5 | +require_once(MAGPIE_DIR.'rss_fetch.inc'); |
| 6 | + |
| 7 | +$url = $_GET['rss_url']; |
| 8 | + |
| 9 | +?> |
| 10 | + |
| 11 | +<html |
| 12 | +<body LINK="#999999" VLINK="#000000"> |
| 13 | + |
| 14 | +<form> |
| 15 | +<input type="text" name="rss_url" size="40" value="<?php echo $url ?>"><input type="Submit"> |
| 16 | +</form> |
| 17 | + |
| 18 | +<?php |
| 19 | + |
| 20 | +if ( $url ) { |
| 21 | + echo "displaying: $url<p>"; |
| 22 | + $rss = fetch_rss( $url ); |
| 23 | + echo slashbox ($rss); |
| 24 | +} |
| 25 | + |
| 26 | +echo "<pre>"; |
| 27 | +print_r($rss); |
| 28 | +echo "</pre>"; |
| 29 | +?> |
| 30 | + |
| 31 | +</body> |
| 32 | +</html> |
| 33 | + |
| 34 | +<?php |
| 35 | + |
| 36 | +# just some quick and ugly php to generate html |
| 37 | +# |
| 38 | +# |
| 39 | +function slashbox ($rss) { |
| 40 | + echo "<table cellpadding=2 cellspacing=0><tr>"; |
| 41 | + echo "<td bgcolor=#006666>"; |
| 42 | + |
| 43 | + # get the channel title and link properties off of the rss object |
| 44 | + # |
| 45 | + $title = $rss->channel['title']; |
| 46 | + $link = $rss->channel['link']; |
| 47 | + |
| 48 | + echo "<a href=$link><font color=#FFFFFF><b>$title</b></font></a>"; |
| 49 | + echo "</td></tr>"; |
| 50 | + |
| 51 | + # foreach over each item in the array. |
| 52 | + # displaying simple links |
| 53 | + # |
| 54 | + # we could be doing all sorts of neat things with the dublin core |
| 55 | + # info, or the event info, or what not, but keeping it simple for now. |
| 56 | + # |
| 57 | + foreach ($rss->items as $item ) { |
| 58 | + echo "<tr><td bgcolor=#cccccc>"; |
| 59 | + echo "<a href=$item[link]>"; |
| 60 | + echo $item['title']; |
| 61 | + echo "</a></td></tr>"; |
| 62 | + } |
| 63 | + |
| 64 | + echo "</table>"; |
| 65 | +} |
| 66 | + |
| 67 | +?> |
Property changes on: trunk/extensions/RSS/magpierss/scripts/magpie_slashbox.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 68 | + native |
Index: trunk/extensions/RSS/magpierss/scripts/README |
— | — | @@ -0,0 +1,27 @@ |
| 2 | +Some example on how to use Magpie: |
| 3 | + |
| 4 | +* magpie_simple.php * |
| 5 | + Simple example of fetching and parsing an RSS file. Expects to be |
| 6 | + called with a query param 'rss_url=http://<some rss file>' |
| 7 | + |
| 8 | +* simple_smarty.php * |
| 9 | + Similiar to magpie_simple, but using the Smarty template engine to do |
| 10 | + display. Also demostrates using rss_utils.inc and a smarty plugin to |
| 11 | + parse and display when each RSS item was published. |
| 12 | + |
| 13 | +* magpie_debug.php * |
| 14 | + Displays all the information available from a parsed feed. |
| 15 | + |
| 16 | +* smarty_plugin/modifier.rss_date_parse.php * |
| 17 | + |
| 18 | + A Smarty plugin for parsing RSS style dates. You must include rss_utils.inc |
| 19 | + for this plugin to work. It also must be installed in the Smarty plugin |
| 20 | + directory, see the Smarty docs for details. |
| 21 | + |
| 22 | +* templates/simple.smarty |
| 23 | + A Smarty template used by simple_smarty.php which demostrates |
| 24 | + displaying an RSS feed and using the date parse plugin. |
| 25 | + |
| 26 | + |
| 27 | +The Smarty template engine and documentation on how to use it are available from |
| 28 | +http://smarty.php.net |
Index: trunk/extensions/RSS/magpierss/scripts/smarty_plugin/modifier.rss_date_parse.php |
— | — | @@ -0,0 +1,31 @@ |
| 2 | +<?php |
| 3 | + |
| 4 | +/* |
| 5 | + * Smarty plugin |
| 6 | + * ------------------------------------------------------------- |
| 7 | + * Type: modifier |
| 8 | + * Name: rss_date_parse |
| 9 | + * Purpose: parse rss date into unix epoch |
| 10 | + * Input: string: rss date |
| 11 | + * default_date: default date if $rss_date is empty |
| 12 | + * |
| 13 | + * NOTE!!! parse_w3cdtf provided by MagpieRSS's rss_utils.inc |
| 14 | + * this file needs to be included somewhere in your script |
| 15 | + * ------------------------------------------------------------- |
| 16 | + */ |
| 17 | + |
| 18 | +function smarty_modifier_rss_date_parse ($rss_date, $default_date=null) |
| 19 | +{ |
| 20 | + if($rss_date != '') { |
| 21 | + return parse_w3cdtf( $rss_date ); |
| 22 | + } elseif (isset($default_date) && $default_date != '') { |
| 23 | + return parse_w3cdtf( $default_date ); |
| 24 | + } else { |
| 25 | + return; |
| 26 | + } |
| 27 | +} |
| 28 | + |
| 29 | + |
| 30 | + |
| 31 | + |
| 32 | +?> |
Property changes on: trunk/extensions/RSS/magpierss/scripts/smarty_plugin/modifier.rss_date_parse.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 33 | + native |
Index: trunk/extensions/RSS/magpierss/scripts/magpie_debug.php |
— | — | @@ -0,0 +1,80 @@ |
| 2 | +<?php |
| 3 | + |
| 4 | +ini_set('display_errors', 1); |
| 5 | +ini_set('error_reporting', E_ALL); |
| 6 | +define('MAGPIE_OUTPUT_ENCODING', 'UTF-8'); |
| 7 | +define('MAGPIE_DIR', '../'); |
| 8 | +define('MAGPIE_DEBUG', 1); |
| 9 | + |
| 10 | +// flush cache quickly for debugging purposes, don't do this on a live site |
| 11 | +define('MAGPIE_CACHE_AGE', 10); |
| 12 | + |
| 13 | +require_once(MAGPIE_DIR.'rss_fetch.inc'); |
| 14 | + |
| 15 | + |
| 16 | +if ( isset($_GET['url']) ) { |
| 17 | + $url = $_GET['url']; |
| 18 | +} |
| 19 | +else { |
| 20 | + $url = 'http://magpierss.sf.net/test.rss'; |
| 21 | +} |
| 22 | + |
| 23 | + |
| 24 | +test_library_support(); |
| 25 | + |
| 26 | +$rss = fetch_rss( $url ); |
| 27 | + |
| 28 | +if ($rss) { |
| 29 | + echo "<h3>Example Output</h3>"; |
| 30 | + echo "Channel: " . $rss->channel['title'] . "<p>"; |
| 31 | + echo "<ul>"; |
| 32 | + foreach ($rss->items as $item) { |
| 33 | + $href = $item['link']; |
| 34 | + $title = $item['title']; |
| 35 | + echo "<li><a href=$href>$title</a></li>"; |
| 36 | + } |
| 37 | + echo "</ul>"; |
| 38 | +} |
| 39 | +else { |
| 40 | + echo "Error: " . magpie_error(); |
| 41 | +} |
| 42 | +?> |
| 43 | + |
| 44 | +<form> |
| 45 | + RSS URL: <input type="text" size="30" name="url" value="<?php echo $url ?>"><br /> |
| 46 | + <input type="submit" value="Parse RSS"> |
| 47 | +</form> |
| 48 | + |
| 49 | +<h3>Parsed Results (var_dump'ed)</h3> |
| 50 | +<pre> |
| 51 | +<?php var_dump($rss); ?> |
| 52 | +</pre> |
| 53 | + |
| 54 | +<?php |
| 55 | + |
| 56 | +function test_library_support() { |
| 57 | + if (!function_exists('xml_parser_create')) { |
| 58 | + echo "<b>Error:</b> PHP compiled without XML support (--with-xml), Mapgie won't work without PHP support for XML.<br />\n"; |
| 59 | + exit; |
| 60 | + } |
| 61 | + else { |
| 62 | + echo "<b>OK:</b> Found an XML parser. <br />\n"; |
| 63 | + } |
| 64 | + |
| 65 | + if ( ! function_exists('gzinflate') ) { |
| 66 | + echo "<b>Warning:</b> PHP compiled without Zlib support (--with-zlib). No support for GZIP encoding.<br />\n"; |
| 67 | + } |
| 68 | + else { |
| 69 | + echo "<b>OK:</b> Support for GZIP encoding.<br />\n"; |
| 70 | + } |
| 71 | + |
| 72 | + if ( ! (function_exists('iconv') and function_exists('mb_convert_encoding') ) ) { |
| 73 | + echo "<b>Warning:</b> No support for iconv (--with-iconv) or multi-byte strings (--enable-mbstring)." . |
| 74 | + "No support character set munging.<br />\n"; |
| 75 | + } |
| 76 | + else { |
| 77 | + echo "<b>OK:</b> Support for character munging.<br />\n"; |
| 78 | + } |
| 79 | +} |
| 80 | + |
| 81 | +?> |
Property changes on: trunk/extensions/RSS/magpierss/scripts/magpie_debug.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 82 | + native |
Index: trunk/extensions/RSS/magpierss/scripts/simple_smarty.php |
— | — | @@ -0,0 +1,58 @@ |
| 2 | +<?php |
| 3 | + |
| 4 | +// Define path to Smarty files (don't forget trailing slash) |
| 5 | +// and load library. (you'll want to change this value) |
| 6 | +// |
| 7 | +// NOTE: you can also simply add Smarty to your include path |
| 8 | +define('SMARTY_DIR', '/home/kellan/projs/magpierss/scripts/Smarty/'); |
| 9 | +require_once(SMARTY_DIR.'Smarty.class.php'); |
| 10 | + |
| 11 | +// define path to Magpie files and load library |
| 12 | +// (you'll want to change this value) |
| 13 | +// |
| 14 | +// NOTE: you can also simple add MagpieRSS to your include path |
| 15 | +define('MAGPIE_DIR', '/home/kellan/projs/magpierss/'); |
| 16 | +require_once(MAGPIE_DIR.'rss_fetch.inc'); |
| 17 | +require_once(MAGPIE_DIR.'rss_utils.inc'); |
| 18 | + |
| 19 | + |
| 20 | +// optionally show lots of debugging info |
| 21 | +# define('MAGPIE_DEBUG', 2); |
| 22 | + |
| 23 | +// optionally flush cache quickly for debugging purposes, |
| 24 | +// don't do this on a live site |
| 25 | +# define('MAGPIE_CACHE_AGE', 10); |
| 26 | + |
| 27 | +// use cache? default is yes. see rss_fetch for other Magpie options |
| 28 | +# define('MAGPIE_CACHE_ON', 1) |
| 29 | + |
| 30 | +// setup template object |
| 31 | +$smarty = new Smarty; |
| 32 | +$smarty->compile_check = true; |
| 33 | + |
| 34 | +// url of an rss file |
| 35 | +$url = $_GET['rss_url']; |
| 36 | + |
| 37 | + |
| 38 | +if ( $url ) { |
| 39 | + // assign a variable to smarty for use in the template |
| 40 | + $smarty->assign('rss_url', $url); |
| 41 | + |
| 42 | + // use MagpieRSS to fetch remote RSS file, and parse it |
| 43 | + $rss = fetch_rss( $url ); |
| 44 | + |
| 45 | + // if fetch_rss returned false, we encountered an error |
| 46 | + if ( !$rss ) { |
| 47 | + $smarty->assign( 'error', magpie_error() ); |
| 48 | + } |
| 49 | + $smarty->assign('rss', $rss ); |
| 50 | + |
| 51 | + $item = $rss->items[0]; |
| 52 | + $date = parse_w3cdtf( $item['dc']['date'] ); |
| 53 | + $smarty->assign( 'date', $date ); |
| 54 | +} |
| 55 | + |
| 56 | +// parse smarty template, and display using the variables we assigned |
| 57 | +$smarty->display('simple.smarty'); |
| 58 | + |
| 59 | +?> |
Property changes on: trunk/extensions/RSS/magpierss/scripts/simple_smarty.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 60 | + native |
Index: trunk/extensions/RSS/magpierss/scripts/templates/simple.smarty |
— | — | @@ -0,0 +1,46 @@ |
| 2 | +<html> |
| 3 | +<head> |
| 4 | +<title>A Simple RSS Box: I'm not a designer</title> |
| 5 | +</head> |
| 6 | +<body> |
| 7 | + |
| 8 | +<form> |
| 9 | +<b>RSS File:</b> |
| 10 | +<input type=text" name="rss_url" value="{$rss_url}" size="50"> |
| 11 | +<input type="submit"> |
| 12 | +</form> |
| 13 | + |
| 14 | +<b>Displaying:</b> {$rss_url} |
| 15 | +<p> |
| 16 | + |
| 17 | +{* if $error display the error |
| 18 | + elseif parsed RSS object display the RSS |
| 19 | + else solicit user for a URL |
| 20 | +*} |
| 21 | + |
| 22 | +{if $error } |
| 23 | +<b>Error:</b> {$error} |
| 24 | +{elseif $rss} |
| 25 | +<table border=1> |
| 26 | + <tr> |
| 27 | + <th colspan=2> |
| 28 | + <a href="{$rss->channel.link}">{$rss->channel.title}</a> |
| 29 | + </th> |
| 30 | + </tr> |
| 31 | + {foreach from=$rss->items item=item} |
| 32 | + <tr> |
| 33 | + <td> |
| 34 | + <a href="{$item.link}">{$item.title}</a> |
| 35 | + </td> |
| 36 | + <td> |
| 37 | + {$item.dc.date|rss_date_parse|date_format:"%A, %B %e, %Y"} |
| 38 | + </td> |
| 39 | + </tr> |
| 40 | + {/foreach} |
| 41 | +</table> |
| 42 | +{else} |
| 43 | + Enter the URL of an RSS file to display. |
| 44 | +{/if} |
| 45 | + |
| 46 | +</body> |
| 47 | +</html> |
Index: trunk/extensions/RSS/magpierss/CHANGES |
— | — | @@ -0,0 +1,41 @@ |
| 2 | +Version 0.72 |
| 3 | +----------- |
| 4 | + - fix security exploit: http://www.sec-consult.com/216.html |
| 5 | + |
| 6 | +Version 0.7 |
| 7 | +----------- |
| 8 | + - support for input and output charset encoding |
| 9 | + based on the work in FoF, uses iconv or mbstring if available |
| 10 | + - |
| 11 | + |
| 12 | +Version 0.6 |
| 13 | +----------- |
| 14 | + - basic support for Atom syndication format |
| 15 | + including support for Atom content constructs |
| 16 | + - fixed support for private feeds (HTTP Auth and SSL) |
| 17 | + (thanks to silverorange.com for providing test feeds) |
| 18 | + - support for some broken webservers |
| 19 | + |
| 20 | +Version 0.52 |
| 21 | +----------- |
| 22 | + - support GZIP content negoiation |
| 23 | + - PHP 4.3.2 support |
| 24 | + |
| 25 | +Version 0.4 |
| 26 | +----------- |
| 27 | + - improved error handling, better access for script authors |
| 28 | + - included example scripts of working with MagpieRSS |
| 29 | + - new Smarty plugin for RSS date parsing |
| 30 | + |
| 31 | +Version 0.3 |
| 32 | +----------- |
| 33 | + - added support for conditional gets (Last-Modified, ETag) |
| 34 | + - now use Snoopy to handle fetching RSS files |
| 35 | + |
| 36 | +Version 0.2 |
| 37 | +----------- |
| 38 | + - MAJOR CLEAN UP |
| 39 | + - removed kludgy $options array in favour of constants |
| 40 | + - phased out returning arrays |
| 41 | + - added better error handling |
| 42 | + - re-worked comments |
Index: trunk/extensions/RSS/magpierss/rss_fetch.inc |
— | — | @@ -0,0 +1,458 @@ |
| 2 | +<?php |
| 3 | +/* |
| 4 | + * Project: MagpieRSS: a simple RSS integration tool |
| 5 | + * File: rss_fetch.inc, a simple functional interface |
| 6 | + to fetching and parsing RSS files, via the |
| 7 | + function fetch_rss() |
| 8 | + * Author: Kellan Elliott-McCrea <kellan@protest.net> |
| 9 | + * License: GPL |
| 10 | + * |
| 11 | + * The lastest version of MagpieRSS can be obtained from: |
| 12 | + * http://magpierss.sourceforge.net |
| 13 | + * |
| 14 | + * For questions, help, comments, discussion, etc., please join the |
| 15 | + * Magpie mailing list: |
| 16 | + * magpierss-general@lists.sourceforge.net |
| 17 | + * |
| 18 | + */ |
| 19 | + |
| 20 | +// Setup MAGPIE_DIR for use on hosts that don't include |
| 21 | +// the current path in include_path. |
| 22 | +// with thanks to rajiv and smarty |
| 23 | +if (!defined('DIR_SEP')) { |
| 24 | + define('DIR_SEP', DIRECTORY_SEPARATOR); |
| 25 | +} |
| 26 | + |
| 27 | +if (!defined('MAGPIE_DIR')) { |
| 28 | + define('MAGPIE_DIR', dirname(__FILE__) . DIR_SEP); |
| 29 | +} |
| 30 | + |
| 31 | +require_once( MAGPIE_DIR . 'rss_parse.inc' ); |
| 32 | +require_once( MAGPIE_DIR . 'rss_cache.inc' ); |
| 33 | + |
| 34 | +// for including 3rd party libraries |
| 35 | +define('MAGPIE_EXTLIB', MAGPIE_DIR . 'extlib' . DIR_SEP); |
| 36 | +require_once( MAGPIE_EXTLIB . 'Snoopy.class.inc'); |
| 37 | + |
| 38 | + |
| 39 | +/* |
| 40 | + * CONSTANTS - redefine these in your script to change the |
| 41 | + * behaviour of fetch_rss() currently, most options effect the cache |
| 42 | + * |
| 43 | + * MAGPIE_CACHE_ON - Should Magpie cache parsed RSS objects? |
| 44 | + * For me a built in cache was essential to creating a "PHP-like" |
| 45 | + * feel to Magpie, see rss_cache.inc for rationale |
| 46 | + * |
| 47 | + * |
| 48 | + * MAGPIE_CACHE_DIR - Where should Magpie cache parsed RSS objects? |
| 49 | + * This should be a location that the webserver can write to. If this |
| 50 | + * directory does not already exist Mapie will try to be smart and create |
| 51 | + * it. This will often fail for permissions reasons. |
| 52 | + * |
| 53 | + * |
| 54 | + * MAGPIE_CACHE_AGE - How long to store cached RSS objects? In seconds. |
| 55 | + * |
| 56 | + * |
| 57 | + * MAGPIE_CACHE_FRESH_ONLY - If remote fetch fails, throw error |
| 58 | + * instead of returning stale object? |
| 59 | + * |
| 60 | + * MAGPIE_DEBUG - Display debugging notices? |
| 61 | + * |
| 62 | +*/ |
| 63 | + |
| 64 | + |
| 65 | +/*=======================================================================*\ |
| 66 | + Function: fetch_rss: |
| 67 | + Purpose: return RSS object for the give url |
| 68 | + maintain the cache |
| 69 | + Input: url of RSS file |
| 70 | + Output: parsed RSS object (see rss_parse.inc) |
| 71 | + |
| 72 | + NOTES ON CACHEING: |
| 73 | + If caching is on (MAGPIE_CACHE_ON) fetch_rss will first check the cache. |
| 74 | + |
| 75 | + NOTES ON RETRIEVING REMOTE FILES: |
| 76 | + If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will |
| 77 | + return a cached object, and touch the cache object upon recieving a |
| 78 | + 304. |
| 79 | + |
| 80 | + NOTES ON FAILED REQUESTS: |
| 81 | + If there is an HTTP error while fetching an RSS object, the cached |
| 82 | + version will be return, if it exists (and if MAGPIE_CACHE_FRESH_ONLY is off) |
| 83 | +\*=======================================================================*/ |
| 84 | + |
| 85 | +define('MAGPIE_VERSION', '0.72'); |
| 86 | + |
| 87 | +$MAGPIE_ERROR = ""; |
| 88 | + |
| 89 | +function fetch_rss ($url) { |
| 90 | + // initialize constants |
| 91 | + init(); |
| 92 | + |
| 93 | + if ( !isset($url) ) { |
| 94 | + error("fetch_rss called without a url"); |
| 95 | + return false; |
| 96 | + } |
| 97 | + |
| 98 | + // if cache is disabled |
| 99 | + if ( !MAGPIE_CACHE_ON ) { |
| 100 | + // fetch file, and parse it |
| 101 | + $resp = _fetch_remote_file( $url ); |
| 102 | + if ( is_success( $resp->status ) ) { |
| 103 | + return _response_to_rss( $resp ); |
| 104 | + } |
| 105 | + else { |
| 106 | + error("Failed to fetch $url and cache is off"); |
| 107 | + return false; |
| 108 | + } |
| 109 | + } |
| 110 | + // else cache is ON |
| 111 | + else { |
| 112 | + // Flow |
| 113 | + // 1. check cache |
| 114 | + // 2. if there is a hit, make sure its fresh |
| 115 | + // 3. if cached obj fails freshness check, fetch remote |
| 116 | + // 4. if remote fails, return stale object, or error |
| 117 | + |
| 118 | + $cache = new RSSCache( MAGPIE_CACHE_DIR, MAGPIE_CACHE_AGE ); |
| 119 | + |
| 120 | + if (MAGPIE_DEBUG and $cache->ERROR) { |
| 121 | + debug($cache->ERROR, E_USER_WARNING); |
| 122 | + } |
| 123 | + |
| 124 | + |
| 125 | + $cache_status = 0; // response of check_cache |
| 126 | + $request_headers = array(); // HTTP headers to send with fetch |
| 127 | + $rss = 0; // parsed RSS object |
| 128 | + $errormsg = 0; // errors, if any |
| 129 | + |
| 130 | + // store parsed XML by desired output encoding |
| 131 | + // as character munging happens at parse time |
| 132 | + $cache_key = $url . MAGPIE_OUTPUT_ENCODING; |
| 133 | + |
| 134 | + if (!$cache->ERROR) { |
| 135 | + // return cache HIT, MISS, or STALE |
| 136 | + $cache_status = $cache->check_cache( $cache_key); |
| 137 | + } |
| 138 | + |
| 139 | + // if object cached, and cache is fresh, return cached obj |
| 140 | + if ( $cache_status == 'HIT' ) { |
| 141 | + $rss = $cache->get( $cache_key ); |
| 142 | + if ( isset($rss) and $rss ) { |
| 143 | + // should be cache age |
| 144 | + $rss->from_cache = 1; |
| 145 | + if ( MAGPIE_DEBUG > 1) { |
| 146 | + debug("MagpieRSS: Cache HIT", E_USER_NOTICE); |
| 147 | + } |
| 148 | + return $rss; |
| 149 | + } |
| 150 | + } |
| 151 | + |
| 152 | + // else attempt a conditional get |
| 153 | + |
| 154 | + // setup headers |
| 155 | + if ( $cache_status == 'STALE' ) { |
| 156 | + $rss = $cache->get( $cache_key ); |
| 157 | + if ( $rss and $rss->etag and $rss->last_modified ) { |
| 158 | + $request_headers['If-None-Match'] = $rss->etag; |
| 159 | + $request_headers['If-Last-Modified'] = $rss->last_modified; |
| 160 | + } |
| 161 | + } |
| 162 | + |
| 163 | + $resp = _fetch_remote_file( $url, $request_headers ); |
| 164 | + |
| 165 | + if (isset($resp) and $resp) { |
| 166 | + if ($resp->status == '304' ) { |
| 167 | + // we have the most current copy |
| 168 | + if ( MAGPIE_DEBUG > 1) { |
| 169 | + debug("Got 304 for $url"); |
| 170 | + } |
| 171 | + // reset cache on 304 (at minutillo insistent prodding) |
| 172 | + $cache->set($cache_key, $rss); |
| 173 | + return $rss; |
| 174 | + } |
| 175 | + elseif ( is_success( $resp->status ) ) { |
| 176 | + $rss = _response_to_rss( $resp ); |
| 177 | + if ( $rss ) { |
| 178 | + if (MAGPIE_DEBUG > 1) { |
| 179 | + debug("Fetch successful"); |
| 180 | + } |
| 181 | + // add object to cache |
| 182 | + $cache->set( $cache_key, $rss ); |
| 183 | + return $rss; |
| 184 | + } |
| 185 | + } |
| 186 | + else { |
| 187 | + $errormsg = "Failed to fetch $url "; |
| 188 | + if ( $resp->status == '-100' ) { |
| 189 | + $errormsg .= "(Request timed out after " . MAGPIE_FETCH_TIME_OUT . " seconds)"; |
| 190 | + } |
| 191 | + elseif ( $resp->error ) { |
| 192 | + # compensate for Snoopy's annoying habbit to tacking |
| 193 | + # on '\n' |
| 194 | + $http_error = substr($resp->error, 0, -2); |
| 195 | + $errormsg .= "(HTTP Error: $http_error)"; |
| 196 | + } |
| 197 | + else { |
| 198 | + $errormsg .= "(HTTP Response: " . $resp->response_code .')'; |
| 199 | + } |
| 200 | + } |
| 201 | + } |
| 202 | + else { |
| 203 | + $errormsg = "Unable to retrieve RSS file for unknown reasons."; |
| 204 | + } |
| 205 | + |
| 206 | + // else fetch failed |
| 207 | + |
| 208 | + // attempt to return cached object |
| 209 | + if ($rss) { |
| 210 | + if ( MAGPIE_DEBUG ) { |
| 211 | + debug("Returning STALE object for $url"); |
| 212 | + } |
| 213 | + return $rss; |
| 214 | + } |
| 215 | + |
| 216 | + // else we totally failed |
| 217 | + error( $errormsg ); |
| 218 | + |
| 219 | + return false; |
| 220 | + |
| 221 | + } // end if ( !MAGPIE_CACHE_ON ) { |
| 222 | +} // end fetch_rss() |
| 223 | + |
| 224 | +/*=======================================================================*\ |
| 225 | + Function: error |
| 226 | + Purpose: set MAGPIE_ERROR, and trigger error |
| 227 | +\*=======================================================================*/ |
| 228 | + |
| 229 | +function error ($errormsg, $lvl=E_USER_WARNING) { |
| 230 | + global $MAGPIE_ERROR; |
| 231 | + |
| 232 | + // append PHP's error message if track_errors enabled |
| 233 | + if ( isset($php_errormsg) ) { |
| 234 | + $errormsg .= " ($php_errormsg)"; |
| 235 | + } |
| 236 | + if ( $errormsg ) { |
| 237 | + $errormsg = "MagpieRSS: $errormsg"; |
| 238 | + $MAGPIE_ERROR = $errormsg; |
| 239 | + trigger_error( $errormsg, $lvl); |
| 240 | + } |
| 241 | +} |
| 242 | + |
| 243 | +function debug ($debugmsg, $lvl=E_USER_NOTICE) { |
| 244 | + trigger_error("MagpieRSS [debug] $debugmsg", $lvl); |
| 245 | +} |
| 246 | + |
| 247 | +/*=======================================================================*\ |
| 248 | + Function: magpie_error |
| 249 | + Purpose: accessor for the magpie error variable |
| 250 | +\*=======================================================================*/ |
| 251 | +function magpie_error ($errormsg="") { |
| 252 | + global $MAGPIE_ERROR; |
| 253 | + |
| 254 | + if ( isset($errormsg) and $errormsg ) { |
| 255 | + $MAGPIE_ERROR = $errormsg; |
| 256 | + } |
| 257 | + |
| 258 | + return $MAGPIE_ERROR; |
| 259 | +} |
| 260 | + |
| 261 | +/*=======================================================================*\ |
| 262 | + Function: _fetch_remote_file |
| 263 | + Purpose: retrieve an arbitrary remote file |
| 264 | + Input: url of the remote file |
| 265 | + headers to send along with the request (optional) |
| 266 | + Output: an HTTP response object (see Snoopy.class.inc) |
| 267 | +\*=======================================================================*/ |
| 268 | +function _fetch_remote_file ($url, $headers = "" ) { |
| 269 | + // Snoopy is an HTTP client in PHP |
| 270 | + $client = new Snoopy(); |
| 271 | + $client->agent = MAGPIE_USER_AGENT; |
| 272 | + $client->read_timeout = MAGPIE_FETCH_TIME_OUT; |
| 273 | + $client->use_gzip = MAGPIE_USE_GZIP; |
| 274 | + if (is_array($headers) ) { |
| 275 | + $client->rawheaders = $headers; |
| 276 | + } |
| 277 | + |
| 278 | + @$client->fetch($url); |
| 279 | + return $client; |
| 280 | + |
| 281 | +} |
| 282 | + |
| 283 | +/*=======================================================================*\ |
| 284 | + Function: _response_to_rss |
| 285 | + Purpose: parse an HTTP response object into an RSS object |
| 286 | + Input: an HTTP response object (see Snoopy) |
| 287 | + Output: parsed RSS object (see rss_parse) |
| 288 | +\*=======================================================================*/ |
| 289 | +function _response_to_rss ($resp) { |
| 290 | + $rss = new MagpieRSS( $resp->results, MAGPIE_OUTPUT_ENCODING, MAGPIE_INPUT_ENCODING, MAGPIE_DETECT_ENCODING ); |
| 291 | + |
| 292 | + // if RSS parsed successfully |
| 293 | + if ( $rss and !$rss->ERROR) { |
| 294 | + |
| 295 | + // find Etag, and Last-Modified |
| 296 | + foreach($resp->headers as $h) { |
| 297 | + // 2003-03-02 - Nicola Asuni (www.tecnick.com) - fixed bug "Undefined offset: 1" |
| 298 | + if (strpos($h, ": ")) { |
| 299 | + list($field, $val) = explode(": ", $h, 2); |
| 300 | + } |
| 301 | + else { |
| 302 | + $field = $h; |
| 303 | + $val = ""; |
| 304 | + } |
| 305 | + |
| 306 | + if ( $field == 'ETag' ) { |
| 307 | + $rss->etag = $val; |
| 308 | + } |
| 309 | + |
| 310 | + if ( $field == 'Last-Modified' ) { |
| 311 | + $rss->last_modified = $val; |
| 312 | + } |
| 313 | + } |
| 314 | + |
| 315 | + return $rss; |
| 316 | + } // else construct error message |
| 317 | + else { |
| 318 | + $errormsg = "Failed to parse RSS file."; |
| 319 | + |
| 320 | + if ($rss) { |
| 321 | + $errormsg .= " (" . $rss->ERROR . ")"; |
| 322 | + } |
| 323 | + error($errormsg); |
| 324 | + |
| 325 | + return false; |
| 326 | + } // end if ($rss and !$rss->error) |
| 327 | +} |
| 328 | + |
| 329 | +/*=======================================================================*\ |
| 330 | + Function: init |
| 331 | + Purpose: setup constants with default values |
| 332 | + check for user overrides |
| 333 | +\*=======================================================================*/ |
| 334 | +function init () { |
| 335 | + if ( defined('MAGPIE_INITALIZED') ) { |
| 336 | + return; |
| 337 | + } |
| 338 | + else { |
| 339 | + define('MAGPIE_INITALIZED', true); |
| 340 | + } |
| 341 | + |
| 342 | + if ( !defined('MAGPIE_CACHE_ON') ) { |
| 343 | + define('MAGPIE_CACHE_ON', true); |
| 344 | + } |
| 345 | + |
| 346 | + if ( !defined('MAGPIE_CACHE_DIR') ) { |
| 347 | + define('MAGPIE_CACHE_DIR', './cache'); |
| 348 | + } |
| 349 | + |
| 350 | + if ( !defined('MAGPIE_CACHE_AGE') ) { |
| 351 | + define('MAGPIE_CACHE_AGE', 60*60); // one hour |
| 352 | + } |
| 353 | + |
| 354 | + if ( !defined('MAGPIE_CACHE_FRESH_ONLY') ) { |
| 355 | + define('MAGPIE_CACHE_FRESH_ONLY', false); |
| 356 | + } |
| 357 | + |
| 358 | + if ( !defined('MAGPIE_OUTPUT_ENCODING') ) { |
| 359 | + define('MAGPIE_OUTPUT_ENCODING', 'ISO-8859-1'); |
| 360 | + } |
| 361 | + |
| 362 | + if ( !defined('MAGPIE_INPUT_ENCODING') ) { |
| 363 | + define('MAGPIE_INPUT_ENCODING', null); |
| 364 | + } |
| 365 | + |
| 366 | + if ( !defined('MAGPIE_DETECT_ENCODING') ) { |
| 367 | + define('MAGPIE_DETECT_ENCODING', true); |
| 368 | + } |
| 369 | + |
| 370 | + if ( !defined('MAGPIE_DEBUG') ) { |
| 371 | + define('MAGPIE_DEBUG', 0); |
| 372 | + } |
| 373 | + |
| 374 | + if ( !defined('MAGPIE_USER_AGENT') ) { |
| 375 | + $ua = 'MagpieRSS/'. MAGPIE_VERSION . ' (+http://magpierss.sf.net'; |
| 376 | + |
| 377 | + if ( MAGPIE_CACHE_ON ) { |
| 378 | + $ua = $ua . ')'; |
| 379 | + } |
| 380 | + else { |
| 381 | + $ua = $ua . '; No cache)'; |
| 382 | + } |
| 383 | + |
| 384 | + define('MAGPIE_USER_AGENT', $ua); |
| 385 | + } |
| 386 | + |
| 387 | + if ( !defined('MAGPIE_FETCH_TIME_OUT') ) { |
| 388 | + define('MAGPIE_FETCH_TIME_OUT', 5); // 5 second timeout |
| 389 | + } |
| 390 | + |
| 391 | + // use gzip encoding to fetch rss files if supported? |
| 392 | + if ( !defined('MAGPIE_USE_GZIP') ) { |
| 393 | + define('MAGPIE_USE_GZIP', true); |
| 394 | + } |
| 395 | +} |
| 396 | + |
| 397 | +// NOTE: the following code should really be in Snoopy, or at least |
| 398 | +// somewhere other then rss_fetch! |
| 399 | + |
| 400 | +/*=======================================================================*\ |
| 401 | + HTTP STATUS CODE PREDICATES |
| 402 | + These functions attempt to classify an HTTP status code |
| 403 | + based on RFC 2616 and RFC 2518. |
| 404 | + |
| 405 | + All of them take an HTTP status code as input, and return true or false |
| 406 | + |
| 407 | + All this code is adapted from LWP's HTTP::Status. |
| 408 | +\*=======================================================================*/ |
| 409 | + |
| 410 | + |
| 411 | +/*=======================================================================*\ |
| 412 | + Function: is_info |
| 413 | + Purpose: return true if Informational status code |
| 414 | +\*=======================================================================*/ |
| 415 | +function is_info ($sc) { |
| 416 | + return $sc >= 100 && $sc < 200; |
| 417 | +} |
| 418 | + |
| 419 | +/*=======================================================================*\ |
| 420 | + Function: is_success |
| 421 | + Purpose: return true if Successful status code |
| 422 | +\*=======================================================================*/ |
| 423 | +function is_success ($sc) { |
| 424 | + return $sc >= 200 && $sc < 300; |
| 425 | +} |
| 426 | + |
| 427 | +/*=======================================================================*\ |
| 428 | + Function: is_redirect |
| 429 | + Purpose: return true if Redirection status code |
| 430 | +\*=======================================================================*/ |
| 431 | +function is_redirect ($sc) { |
| 432 | + return $sc >= 300 && $sc < 400; |
| 433 | +} |
| 434 | + |
| 435 | +/*=======================================================================*\ |
| 436 | + Function: is_error |
| 437 | + Purpose: return true if Error status code |
| 438 | +\*=======================================================================*/ |
| 439 | +function is_error ($sc) { |
| 440 | + return $sc >= 400 && $sc < 600; |
| 441 | +} |
| 442 | + |
| 443 | +/*=======================================================================*\ |
| 444 | + Function: is_client_error |
| 445 | + Purpose: return true if Error status code, and its a client error |
| 446 | +\*=======================================================================*/ |
| 447 | +function is_client_error ($sc) { |
| 448 | + return $sc >= 400 && $sc < 500; |
| 449 | +} |
| 450 | + |
| 451 | +/*=======================================================================*\ |
| 452 | + Function: is_client_error |
| 453 | + Purpose: return true if Error status code, and its a server error |
| 454 | +\*=======================================================================*/ |
| 455 | +function is_server_error ($sc) { |
| 456 | + return $sc >= 500 && $sc < 600; |
| 457 | +} |
| 458 | + |
| 459 | +?> |
Property changes on: trunk/extensions/RSS/magpierss/rss_fetch.inc |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 460 | + native |
Index: trunk/extensions/RSS/magpierss/rss_parse.inc |
— | — | @@ -0,0 +1,605 @@ |
| 2 | +<?php |
| 3 | + |
| 4 | +/** |
| 5 | +* Project: MagpieRSS: a simple RSS integration tool |
| 6 | +* File: rss_parse.inc - parse an RSS or Atom feed |
| 7 | +* return as a simple object. |
| 8 | +* |
| 9 | +* Handles RSS 0.9x, RSS 2.0, RSS 1.0, and Atom 0.3 |
| 10 | +* |
| 11 | +* The lastest version of MagpieRSS can be obtained from: |
| 12 | +* http://magpierss.sourceforge.net |
| 13 | +* |
| 14 | +* For questions, help, comments, discussion, etc., please join the |
| 15 | +* Magpie mailing list: |
| 16 | +* magpierss-general@lists.sourceforge.net |
| 17 | +* |
| 18 | +* @author Kellan Elliott-McCrea <kellan@protest.net> |
| 19 | +* @version 0.7a |
| 20 | +* @license GPL |
| 21 | +* |
| 22 | +*/ |
| 23 | + |
| 24 | +define('RSS', 'RSS'); |
| 25 | +define('ATOM', 'Atom'); |
| 26 | + |
| 27 | +require_once (MAGPIE_DIR . 'rss_utils.inc'); |
| 28 | + |
| 29 | +/** |
| 30 | +* Hybrid parser, and object, takes RSS as a string and returns a simple object. |
| 31 | +* |
| 32 | +* see: rss_fetch.inc for a simpler interface with integrated caching support |
| 33 | +* |
| 34 | +*/ |
| 35 | +class MagpieRSS { |
| 36 | + var $parser; |
| 37 | + |
| 38 | + var $current_item = array(); // item currently being parsed |
| 39 | + var $items = array(); // collection of parsed items |
| 40 | + var $channel = array(); // hash of channel fields |
| 41 | + var $textinput = array(); |
| 42 | + var $image = array(); |
| 43 | + var $feed_type; |
| 44 | + var $feed_version; |
| 45 | + var $encoding = ''; // output encoding of parsed rss |
| 46 | + |
| 47 | + var $_source_encoding = ''; // only set if we have to parse xml prolog |
| 48 | + |
| 49 | + var $ERROR = ""; |
| 50 | + var $WARNING = ""; |
| 51 | + |
| 52 | + // define some constants |
| 53 | + |
| 54 | + var $_CONTENT_CONSTRUCTS = array('content', 'summary', 'info', 'title', 'tagline', 'copyright'); |
| 55 | + var $_KNOWN_ENCODINGS = array('UTF-8', 'US-ASCII', 'ISO-8859-1'); |
| 56 | + |
| 57 | + // parser variables, useless if you're not a parser, treat as private |
| 58 | + var $stack = array(); // parser stack |
| 59 | + var $inchannel = false; |
| 60 | + var $initem = false; |
| 61 | + var $incontent = false; // if in Atom <content mode="xml"> field |
| 62 | + var $intextinput = false; |
| 63 | + var $inimage = false; |
| 64 | + var $current_namespace = false; |
| 65 | + |
| 66 | + |
| 67 | + /** |
| 68 | + * Set up XML parser, parse source, and return populated RSS object.. |
| 69 | + * |
| 70 | + * @param string $source string containing the RSS to be parsed |
| 71 | + * |
| 72 | + * NOTE: Probably a good idea to leave the encoding options alone unless |
| 73 | + * you know what you're doing as PHP's character set support is |
| 74 | + * a little weird. |
| 75 | + * |
| 76 | + * NOTE: A lot of this is unnecessary but harmless with PHP5 |
| 77 | + * |
| 78 | + * |
| 79 | + * @param string $output_encoding output the parsed RSS in this character |
| 80 | + * set defaults to ISO-8859-1 as this is PHP's |
| 81 | + * default. |
| 82 | + * |
| 83 | + * NOTE: might be changed to UTF-8 in future |
| 84 | + * versions. |
| 85 | + * |
| 86 | + * @param string $input_encoding the character set of the incoming RSS source. |
| 87 | + * Leave blank and Magpie will try to figure it |
| 88 | + * out. |
| 89 | + * |
| 90 | + * |
| 91 | + * @param bool $detect_encoding if false Magpie won't attempt to detect |
| 92 | + * source encoding. (caveat emptor) |
| 93 | + * |
| 94 | + */ |
| 95 | + function MagpieRSS ($source, $output_encoding='ISO-8859-1', |
| 96 | + $input_encoding=null, $detect_encoding=true) |
| 97 | + { |
| 98 | + # if PHP xml isn't compiled in, die |
| 99 | + # |
| 100 | + if (!function_exists('xml_parser_create')) { |
| 101 | + $this->error( "Failed to load PHP's XML Extension. " . |
| 102 | + "http://www.php.net/manual/en/ref.xml.php", |
| 103 | + E_USER_ERROR ); |
| 104 | + } |
| 105 | + |
| 106 | + list($parser, $source) = $this->create_parser($source, |
| 107 | + $output_encoding, $input_encoding, $detect_encoding); |
| 108 | + |
| 109 | + |
| 110 | + if (!is_resource($parser)) { |
| 111 | + $this->error( "Failed to create an instance of PHP's XML parser. " . |
| 112 | + "http://www.php.net/manual/en/ref.xml.php", |
| 113 | + E_USER_ERROR ); |
| 114 | + } |
| 115 | + |
| 116 | + |
| 117 | + $this->parser = $parser; |
| 118 | + |
| 119 | + # pass in parser, and a reference to this object |
| 120 | + # setup handlers |
| 121 | + # |
| 122 | + xml_set_object( $this->parser, $this ); |
| 123 | + xml_set_element_handler($this->parser, |
| 124 | + 'feed_start_element', 'feed_end_element' ); |
| 125 | + |
| 126 | + xml_set_character_data_handler( $this->parser, 'feed_cdata' ); |
| 127 | + |
| 128 | + $status = xml_parse( $this->parser, $source ); |
| 129 | + |
| 130 | + if (! $status ) { |
| 131 | + $errorcode = xml_get_error_code( $this->parser ); |
| 132 | + if ( $errorcode != XML_ERROR_NONE ) { |
| 133 | + $xml_error = xml_error_string( $errorcode ); |
| 134 | + $error_line = xml_get_current_line_number($this->parser); |
| 135 | + $error_col = xml_get_current_column_number($this->parser); |
| 136 | + $errormsg = "$xml_error at line $error_line, column $error_col"; |
| 137 | + |
| 138 | + $this->error( $errormsg ); |
| 139 | + } |
| 140 | + } |
| 141 | + |
| 142 | + xml_parser_free( $this->parser ); |
| 143 | + |
| 144 | + $this->normalize(); |
| 145 | + } |
| 146 | + |
| 147 | + function feed_start_element($p, $element, &$attrs) { |
| 148 | + $el = $element = strtolower($element); |
| 149 | + $attrs = array_change_key_case($attrs, CASE_LOWER); |
| 150 | + |
| 151 | + // check for a namespace, and split if found |
| 152 | + $ns = false; |
| 153 | + if ( strpos( $element, ':' ) ) { |
| 154 | + list($ns, $el) = split( ':', $element, 2); |
| 155 | + } |
| 156 | + if ( $ns and $ns != 'rdf' ) { |
| 157 | + $this->current_namespace = $ns; |
| 158 | + } |
| 159 | + |
| 160 | + # if feed type isn't set, then this is first element of feed |
| 161 | + # identify feed from root element |
| 162 | + # |
| 163 | + if (!isset($this->feed_type) ) { |
| 164 | + if ( $el == 'rdf' ) { |
| 165 | + $this->feed_type = RSS; |
| 166 | + $this->feed_version = '1.0'; |
| 167 | + } |
| 168 | + elseif ( $el == 'rss' ) { |
| 169 | + $this->feed_type = RSS; |
| 170 | + $this->feed_version = $attrs['version']; |
| 171 | + } |
| 172 | + elseif ( $el == 'feed' ) { |
| 173 | + $this->feed_type = ATOM; |
| 174 | + $this->feed_version = $attrs['version']; |
| 175 | + $this->inchannel = true; |
| 176 | + } |
| 177 | + return; |
| 178 | + } |
| 179 | + |
| 180 | + if ( $el == 'channel' ) |
| 181 | + { |
| 182 | + $this->inchannel = true; |
| 183 | + } |
| 184 | + elseif ($el == 'item' or $el == 'entry' ) |
| 185 | + { |
| 186 | + $this->initem = true; |
| 187 | + if ( isset($attrs['rdf:about']) ) { |
| 188 | + $this->current_item['about'] = $attrs['rdf:about']; |
| 189 | + } |
| 190 | + } |
| 191 | + |
| 192 | + // if we're in the default namespace of an RSS feed, |
| 193 | + // record textinput or image fields |
| 194 | + elseif ( |
| 195 | + $this->feed_type == RSS and |
| 196 | + $this->current_namespace == '' and |
| 197 | + $el == 'textinput' ) |
| 198 | + { |
| 199 | + $this->intextinput = true; |
| 200 | + } |
| 201 | + |
| 202 | + elseif ( |
| 203 | + $this->feed_type == RSS and |
| 204 | + $this->current_namespace == '' and |
| 205 | + $el == 'image' ) |
| 206 | + { |
| 207 | + $this->inimage = true; |
| 208 | + } |
| 209 | + |
| 210 | + # handle atom content constructs |
| 211 | + elseif ( $this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) ) |
| 212 | + { |
| 213 | + // avoid clashing w/ RSS mod_content |
| 214 | + if ($el == 'content' ) { |
| 215 | + $el = 'atom_content'; |
| 216 | + } |
| 217 | + |
| 218 | + $this->incontent = $el; |
| 219 | + |
| 220 | + |
| 221 | + } |
| 222 | + |
| 223 | + // if inside an Atom content construct (e.g. content or summary) field treat tags as text |
| 224 | + elseif ($this->feed_type == ATOM and $this->incontent ) |
| 225 | + { |
| 226 | + // if tags are inlined, then flatten |
| 227 | + $attrs_str = join(' ', |
| 228 | + array_map('map_attrs', |
| 229 | + array_keys($attrs), |
| 230 | + array_values($attrs) ) ); |
| 231 | + |
| 232 | + $this->append_content( "<$element $attrs_str>" ); |
| 233 | + |
| 234 | + array_unshift( $this->stack, $el ); |
| 235 | + } |
| 236 | + |
| 237 | + // Atom support many links per containging element. |
| 238 | + // Magpie treats link elements of type rel='alternate' |
| 239 | + // as being equivalent to RSS's simple link element. |
| 240 | + // |
| 241 | + elseif ($this->feed_type == ATOM and $el == 'link' ) |
| 242 | + { |
| 243 | + if ( isset($attrs['rel']) and $attrs['rel'] == 'alternate' ) |
| 244 | + { |
| 245 | + $link_el = 'link'; |
| 246 | + } |
| 247 | + else { |
| 248 | + $link_el = 'link_' . $attrs['rel']; |
| 249 | + } |
| 250 | + |
| 251 | + $this->append($link_el, $attrs['href']); |
| 252 | + } |
| 253 | + // set stack[0] to current element |
| 254 | + else { |
| 255 | + array_unshift($this->stack, $el); |
| 256 | + } |
| 257 | + } |
| 258 | + |
| 259 | + |
| 260 | + |
| 261 | + function feed_cdata ($p, $text) { |
| 262 | + if ($this->feed_type == ATOM and $this->incontent) |
| 263 | + { |
| 264 | + $this->append_content( $text ); |
| 265 | + } |
| 266 | + else { |
| 267 | + $current_el = join('_', array_reverse($this->stack)); |
| 268 | + $this->append($current_el, $text); |
| 269 | + } |
| 270 | + } |
| 271 | + |
| 272 | + function feed_end_element ($p, $el) { |
| 273 | + $el = strtolower($el); |
| 274 | + |
| 275 | + if ( $el == 'item' or $el == 'entry' ) |
| 276 | + { |
| 277 | + $this->items[] = $this->current_item; |
| 278 | + $this->current_item = array(); |
| 279 | + $this->initem = false; |
| 280 | + } |
| 281 | + elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'textinput' ) |
| 282 | + { |
| 283 | + $this->intextinput = false; |
| 284 | + } |
| 285 | + elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'image' ) |
| 286 | + { |
| 287 | + $this->inimage = false; |
| 288 | + } |
| 289 | + elseif ($this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) ) |
| 290 | + { |
| 291 | + $this->incontent = false; |
| 292 | + } |
| 293 | + elseif ($el == 'channel' or $el == 'feed' ) |
| 294 | + { |
| 295 | + $this->inchannel = false; |
| 296 | + } |
| 297 | + elseif ($this->feed_type == ATOM and $this->incontent ) { |
| 298 | + // balance tags properly |
| 299 | + // note: i don't think this is actually neccessary |
| 300 | + if ( $this->stack[0] == $el ) |
| 301 | + { |
| 302 | + $this->append_content("</$el>"); |
| 303 | + } |
| 304 | + else { |
| 305 | + $this->append_content("<$el />"); |
| 306 | + } |
| 307 | + |
| 308 | + array_shift( $this->stack ); |
| 309 | + } |
| 310 | + else { |
| 311 | + array_shift( $this->stack ); |
| 312 | + } |
| 313 | + |
| 314 | + $this->current_namespace = false; |
| 315 | + } |
| 316 | + |
| 317 | + function concat (&$str1, $str2="") { |
| 318 | + if (!isset($str1) ) { |
| 319 | + $str1=""; |
| 320 | + } |
| 321 | + $str1 .= $str2; |
| 322 | + } |
| 323 | + |
| 324 | + |
| 325 | + |
| 326 | + function append_content($text) { |
| 327 | + if ( $this->initem ) { |
| 328 | + $this->concat( $this->current_item[ $this->incontent ], $text ); |
| 329 | + } |
| 330 | + elseif ( $this->inchannel ) { |
| 331 | + $this->concat( $this->channel[ $this->incontent ], $text ); |
| 332 | + } |
| 333 | + } |
| 334 | + |
| 335 | + // smart append - field and namespace aware |
| 336 | + function append($el, $text) { |
| 337 | + if (!$el) { |
| 338 | + return; |
| 339 | + } |
| 340 | + if ( $this->current_namespace ) |
| 341 | + { |
| 342 | + if ( $this->initem ) { |
| 343 | + $this->concat( |
| 344 | + $this->current_item[ $this->current_namespace ][ $el ], $text); |
| 345 | + } |
| 346 | + elseif ($this->inchannel) { |
| 347 | + $this->concat( |
| 348 | + $this->channel[ $this->current_namespace][ $el ], $text ); |
| 349 | + } |
| 350 | + elseif ($this->intextinput) { |
| 351 | + $this->concat( |
| 352 | + $this->textinput[ $this->current_namespace][ $el ], $text ); |
| 353 | + } |
| 354 | + elseif ($this->inimage) { |
| 355 | + $this->concat( |
| 356 | + $this->image[ $this->current_namespace ][ $el ], $text ); |
| 357 | + } |
| 358 | + } |
| 359 | + else { |
| 360 | + if ( $this->initem ) { |
| 361 | + $this->concat( |
| 362 | + $this->current_item[ $el ], $text); |
| 363 | + } |
| 364 | + elseif ($this->intextinput) { |
| 365 | + $this->concat( |
| 366 | + $this->textinput[ $el ], $text ); |
| 367 | + } |
| 368 | + elseif ($this->inimage) { |
| 369 | + $this->concat( |
| 370 | + $this->image[ $el ], $text ); |
| 371 | + } |
| 372 | + elseif ($this->inchannel) { |
| 373 | + $this->concat( |
| 374 | + $this->channel[ $el ], $text ); |
| 375 | + } |
| 376 | + |
| 377 | + } |
| 378 | + } |
| 379 | + |
| 380 | + function normalize () { |
| 381 | + // if atom populate rss fields |
| 382 | + if ( $this->is_atom() ) { |
| 383 | + $this->channel['description'] = $this->channel['tagline']; |
| 384 | + for ( $i = 0; $i < count($this->items); $i++) { |
| 385 | + $item = $this->items[$i]; |
| 386 | + if ( isset($item['summary']) ) |
| 387 | + $item['description'] = $item['summary']; |
| 388 | + if ( isset($item['atom_content'])) |
| 389 | + $item['content']['encoded'] = $item['atom_content']; |
| 390 | + |
| 391 | + $atom_date = (isset($item['issued']) ) ? $item['issued'] : $item['modified']; |
| 392 | + if ( $atom_date ) { |
| 393 | + $epoch = @parse_w3cdtf($atom_date); |
| 394 | + if ($epoch and $epoch > 0) { |
| 395 | + $item['date_timestamp'] = $epoch; |
| 396 | + } |
| 397 | + } |
| 398 | + |
| 399 | + $this->items[$i] = $item; |
| 400 | + } |
| 401 | + } |
| 402 | + elseif ( $this->is_rss() ) { |
| 403 | + $this->channel['tagline'] = $this->channel['description']; |
| 404 | + for ( $i = 0; $i < count($this->items); $i++) { |
| 405 | + $item = $this->items[$i]; |
| 406 | + if ( isset($item['description'])) |
| 407 | + $item['summary'] = $item['description']; |
| 408 | + if ( isset($item['content']['encoded'] ) ) |
| 409 | + $item['atom_content'] = $item['content']['encoded']; |
| 410 | + |
| 411 | + if ( $this->is_rss() == '1.0' and isset($item['dc']['date']) ) { |
| 412 | + $epoch = @parse_w3cdtf($item['dc']['date']); |
| 413 | + if ($epoch and $epoch > 0) { |
| 414 | + $item['date_timestamp'] = $epoch; |
| 415 | + } |
| 416 | + } |
| 417 | + elseif ( isset($item['pubdate']) ) { |
| 418 | + $epoch = @strtotime($item['pubdate']); |
| 419 | + if ($epoch > 0) { |
| 420 | + $item['date_timestamp'] = $epoch; |
| 421 | + } |
| 422 | + } |
| 423 | + |
| 424 | + $this->items[$i] = $item; |
| 425 | + } |
| 426 | + } |
| 427 | + } |
| 428 | + |
| 429 | + |
| 430 | + function is_rss () { |
| 431 | + if ( $this->feed_type == RSS ) { |
| 432 | + return $this->feed_version; |
| 433 | + } |
| 434 | + else { |
| 435 | + return false; |
| 436 | + } |
| 437 | + } |
| 438 | + |
| 439 | + function is_atom() { |
| 440 | + if ( $this->feed_type == ATOM ) { |
| 441 | + return $this->feed_version; |
| 442 | + } |
| 443 | + else { |
| 444 | + return false; |
| 445 | + } |
| 446 | + } |
| 447 | + |
| 448 | + /** |
| 449 | + * return XML parser, and possibly re-encoded source |
| 450 | + * |
| 451 | + */ |
| 452 | + function create_parser($source, $out_enc, $in_enc, $detect) { |
| 453 | + if ( substr(phpversion(),0,1) == 5) { |
| 454 | + $parser = $this->php5_create_parser($in_enc, $detect); |
| 455 | + } |
| 456 | + else { |
| 457 | + list($parser, $source) = $this->php4_create_parser($source, $in_enc, $detect); |
| 458 | + } |
| 459 | + if ($out_enc) { |
| 460 | + $this->encoding = $out_enc; |
| 461 | + xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, $out_enc); |
| 462 | + } |
| 463 | + |
| 464 | + return array($parser, $source); |
| 465 | + } |
| 466 | + |
| 467 | + /** |
| 468 | + * Instantiate an XML parser under PHP5 |
| 469 | + * |
| 470 | + * PHP5 will do a fine job of detecting input encoding |
| 471 | + * if passed an empty string as the encoding. |
| 472 | + * |
| 473 | + * All hail libxml2! |
| 474 | + * |
| 475 | + */ |
| 476 | + function php5_create_parser($in_enc, $detect) { |
| 477 | + // by default php5 does a fine job of detecting input encodings |
| 478 | + if(!$detect && $in_enc) { |
| 479 | + return xml_parser_create($in_enc); |
| 480 | + } |
| 481 | + else { |
| 482 | + return xml_parser_create(''); |
| 483 | + } |
| 484 | + } |
| 485 | + |
| 486 | + /** |
| 487 | + * Instaniate an XML parser under PHP4 |
| 488 | + * |
| 489 | + * Unfortunately PHP4's support for character encodings |
| 490 | + * and especially XML and character encodings sucks. As |
| 491 | + * long as the documents you parse only contain characters |
| 492 | + * from the ISO-8859-1 character set (a superset of ASCII, |
| 493 | + * and a subset of UTF-8) you're fine. However once you |
| 494 | + * step out of that comfy little world things get mad, bad, |
| 495 | + * and dangerous to know. |
| 496 | + * |
| 497 | + * The following code is based on SJM's work with FoF |
| 498 | + * @see http://minutillo.com/steve/weblog/2004/6/17/php-xml-and-character-encodings-a-tale-of-sadness-rage-and-data-loss |
| 499 | + * |
| 500 | + */ |
| 501 | + function php4_create_parser($source, $in_enc, $detect) { |
| 502 | + if ( !$detect ) { |
| 503 | + return array(xml_parser_create($in_enc), $source); |
| 504 | + } |
| 505 | + |
| 506 | + if (!$in_enc) { |
| 507 | + if (preg_match('/<?xml.*encoding=[\'"](.*?)[\'"].*?>/m', $source, $m)) { |
| 508 | + $in_enc = strtoupper($m[1]); |
| 509 | + $this->source_encoding = $in_enc; |
| 510 | + } |
| 511 | + else { |
| 512 | + $in_enc = 'UTF-8'; |
| 513 | + } |
| 514 | + } |
| 515 | + |
| 516 | + if ($this->known_encoding($in_enc)) { |
| 517 | + return array(xml_parser_create($in_enc), $source); |
| 518 | + } |
| 519 | + |
| 520 | + // the dectected encoding is not one of the simple encodings PHP knows |
| 521 | + |
| 522 | + // attempt to use the iconv extension to |
| 523 | + // cast the XML to a known encoding |
| 524 | + // @see http://php.net/iconv |
| 525 | + |
| 526 | + if (function_exists('iconv')) { |
| 527 | + $encoded_source = iconv($in_enc,'UTF-8', $source); |
| 528 | + if ($encoded_source) { |
| 529 | + return array(xml_parser_create('UTF-8'), $encoded_source); |
| 530 | + } |
| 531 | + } |
| 532 | + |
| 533 | + // iconv didn't work, try mb_convert_encoding |
| 534 | + // @see http://php.net/mbstring |
| 535 | + if(function_exists('mb_convert_encoding')) { |
| 536 | + $encoded_source = mb_convert_encoding($source, 'UTF-8', $in_enc ); |
| 537 | + if ($encoded_source) { |
| 538 | + return array(xml_parser_create('UTF-8'), $encoded_source); |
| 539 | + } |
| 540 | + } |
| 541 | + |
| 542 | + // else |
| 543 | + $this->error("Feed is in an unsupported character encoding. ($in_enc) " . |
| 544 | + "You may see strange artifacts, and mangled characters.", |
| 545 | + E_USER_NOTICE); |
| 546 | + |
| 547 | + return array(xml_parser_create(), $source); |
| 548 | + } |
| 549 | + |
| 550 | + function known_encoding($enc) { |
| 551 | + $enc = strtoupper($enc); |
| 552 | + if ( in_array($enc, $this->_KNOWN_ENCODINGS) ) { |
| 553 | + return $enc; |
| 554 | + } |
| 555 | + else { |
| 556 | + return false; |
| 557 | + } |
| 558 | + } |
| 559 | + |
| 560 | + function error ($errormsg, $lvl=E_USER_WARNING) { |
| 561 | + // append PHP's error message if track_errors enabled |
| 562 | + if ( isset($php_errormsg) ) { |
| 563 | + $errormsg .= " ($php_errormsg)"; |
| 564 | + } |
| 565 | + if ( MAGPIE_DEBUG ) { |
| 566 | + trigger_error( $errormsg, $lvl); |
| 567 | + } |
| 568 | + else { |
| 569 | + error_log( $errormsg, 0); |
| 570 | + } |
| 571 | + |
| 572 | + $notices = E_USER_NOTICE|E_NOTICE; |
| 573 | + if ( $lvl&$notices ) { |
| 574 | + $this->WARNING = $errormsg; |
| 575 | + } else { |
| 576 | + $this->ERROR = $errormsg; |
| 577 | + } |
| 578 | + } |
| 579 | + |
| 580 | + |
| 581 | +} // end class RSS |
| 582 | + |
| 583 | +function map_attrs($k, $v) { |
| 584 | + return "$k=\"$v\""; |
| 585 | +} |
| 586 | + |
| 587 | +// patch to support medieval versions of PHP4.1.x, |
| 588 | +// courtesy, Ryan Currie, ryan@digibliss.com |
| 589 | + |
| 590 | +if (!function_exists('array_change_key_case')) { |
| 591 | + define("CASE_UPPER",1); |
| 592 | + define("CASE_LOWER",0); |
| 593 | + |
| 594 | + |
| 595 | + function array_change_key_case($array,$case=CASE_LOWER) { |
| 596 | + if ($case=CASE_LOWER) $cmd=strtolower; |
| 597 | + elseif ($case=CASE_UPPER) $cmd=strtoupper; |
| 598 | + foreach($array as $key=>$value) { |
| 599 | + $output[$cmd($key)]=$value; |
| 600 | + } |
| 601 | + return $output; |
| 602 | + } |
| 603 | + |
| 604 | +} |
| 605 | + |
| 606 | +?> |
Property changes on: trunk/extensions/RSS/magpierss/rss_parse.inc |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 607 | + native |
Index: trunk/extensions/RSS/magpierss/NEWS |
— | — | @@ -0,0 +1,53 @@ |
| 2 | +MagpieRSS News |
| 3 | + |
| 4 | +MAGPIERSS 0.51 RELEASED |
| 5 | + * important bugfix! |
| 6 | + * fix "silent failure" when PHP doesn't have zlib |
| 7 | + |
| 8 | +FEED ON FEEDS USES MAGPIE |
| 9 | + * web-based RSS aggregator built with Magpie |
| 10 | + * easy to install, easy to use. |
| 11 | + http://minutillo.com/steve/feedonfeeds/ |
| 12 | + |
| 13 | +MAGPIERSS 0.5 RELEASED |
| 14 | + * supports transparent HTTP gzip content negotiation for reduced bandwidth usage |
| 15 | + * quashed some undefined index notices |
| 16 | + |
| 17 | +MAGPIERSS 0.46 RELEASED |
| 18 | + * minor release, more error handling clean up |
| 19 | + * documentation fixes, simpler example |
| 20 | + * new trouble shooting guide for installation and usage problems |
| 21 | + http://magpierss.sourceforge.net/TROUBLESHOOTING |
| 22 | + |
| 23 | +MAGPIE NEWS AS RSS |
| 24 | + * releases, bug fixes, releated stories in RSS |
| 25 | + |
| 26 | +MAGPIERSS COOKBOOK: SIMPLE PHP RSS HOW TOS |
| 27 | + * answers some of the most frequently asked Magpie questions |
| 28 | + * feedback, suggestions, requests, recipes welcome |
| 29 | + http://magpierss.sourceforge.net/cookbook.html |
| 30 | + |
| 31 | +MAGPIERSS 0.4 RELEASED! |
| 32 | + * improved error handling, more flexibility for script authors, backwards compatible |
| 33 | + * new and better examples! including using MagpieRSS and Smarty |
| 34 | + * new Smarty plugin for RSS date parsing |
| 35 | + http://smarty.php.net |
| 36 | + |
| 37 | +INFINITE PENGUIN NOW SUPPORTS MAGPIE 0.3 |
| 38 | + * simple, sophisticated RSS viewer |
| 39 | + * includes auto-generated javascript ticker from RSS feed |
| 40 | + http://www.infinitepenguins.net/rss/ |
| 41 | + |
| 42 | +TRAUMWIND RELEASES REX BACKEND FOR MAGPIERSS |
| 43 | + * drop in support using regex based XML parser |
| 44 | + * parses improperly formed XML that chokes expat |
| 45 | + http://traumwind.de/blog/magpie/magpie_alike.php |
| 46 | + |
| 47 | +MAGPIERSS 0.3 RELEASED! |
| 48 | + * Support added for HTTP Conditional GETs. |
| 49 | + http://fishbowl.pastiche.org/archives/001132.html |
| 50 | + |
| 51 | +MAGPIERSS 0.2! |
| 52 | + * Major clean up of the code. Easier to use. |
| 53 | + * Simpler install on shared hosts. |
| 54 | + * Better documentation and comments. |
Index: trunk/extensions/RSS/magpierss/README |
— | — | @@ -0,0 +1,48 @@ |
| 2 | +NAME |
| 3 | + |
| 4 | + MagpieRSS - a simple RSS integration tool |
| 5 | + |
| 6 | +SYNOPSIS |
| 7 | + |
| 8 | + require_once(rss_fetch.inc); |
| 9 | + $url = $_GET['url']; |
| 10 | + $rss = fetch_rss( $url ); |
| 11 | + |
| 12 | + echo "Channel Title: " . $rss->channel['title'] . "<p>"; |
| 13 | + echo "<ul>"; |
| 14 | + foreach ($rss->items as $item) { |
| 15 | + $href = $item['link']; |
| 16 | + $title = $item['title']; |
| 17 | + echo "<li><a href=$href>$title</a></li>"; |
| 18 | + } |
| 19 | + echo "</ul>"; |
| 20 | + |
| 21 | +DESCRIPTION |
| 22 | + |
| 23 | + MapieRSS is an XML-based RSS parser in PHP. It attempts to be "PHP-like", |
| 24 | + and simple to use. |
| 25 | + |
| 26 | + Some features include: |
| 27 | + |
| 28 | + * supports RSS 0.9 - 1.0, with limited RSS 2.0 support |
| 29 | + * supports namespaces, and modules, including mod_content and mod_event |
| 30 | + * open minded [1] |
| 31 | + * simple, functional interface, to object oriented backend parser |
| 32 | + * automatic caching of parsed RSS objects makes its easy to integrate |
| 33 | + * supports conditional GET with Last-Modified, and ETag |
| 34 | + * uses constants for easy override of default behaviour |
| 35 | + * heavily commented |
| 36 | + |
| 37 | + |
| 38 | +1. By open minded I mean Magpie will accept any tag it finds in good faith that |
| 39 | + it was supposed to be here. For strict validation, look elsewhere. |
| 40 | + |
| 41 | + |
| 42 | +GETTING STARTED |
| 43 | + |
| 44 | + |
| 45 | + |
| 46 | +COPYRIGHT: |
| 47 | + Copyright(c) 2002 kellan@protest.net. All rights reserved. |
| 48 | + This software is released under the GNU General Public License. |
| 49 | + Please read the disclaimer at the top of the Snoopy.class.inc file. |