Index: trunk/extensions/GoogleNewsSitemap/FeedSMItem.php |
— | — | @@ -6,48 +6,70 @@ |
7 | 7 | ** |
8 | 8 | * Base class for basic SiteMap support, for building url containers. |
9 | 9 | **/ |
10 | | -class FeedSMItem { |
11 | | - /** |
12 | | - * Var string |
13 | | - **/ |
14 | | - var $url = '', $pubDate = '', $keywords = '', $lastMod = '', $priority = ''; |
| 10 | +class FeedSMItem extends FeedItem { |
15 | 11 | |
16 | | - function __construct( $url, $pubDate, $keywords = '', $lastMod = '', $priority = '' ) { |
17 | | - $this->url = $url; |
18 | | - $this->pubDate = $pubDate; |
| 12 | + private $keywords = ''; |
| 13 | + private $title; // Title object, not string. |
| 14 | + |
| 15 | + function __construct( $title, $pubDate, $keywords = '' ) { |
| 16 | + parent::__construct( $title->getText(), '' /* description */, $title->getFullUrl(), $pubDate ); |
| 17 | + $this->title = $title; |
19 | 18 | $this->keywords = $keywords; |
20 | | - $this->lastMod = $lastMod; |
21 | | - $this->priority = $priority; |
22 | 19 | } |
23 | 20 | |
24 | | - public function xmlEncode( $string ) { |
25 | | - $string = str_replace( "\r\n", "\n", $string ); |
26 | | - $string = preg_replace( '/[\x00-\x08\x0b\x0c\x0e-\x1f]/', '', $string ); |
27 | | - return htmlspecialchars( $string ); |
| 21 | + /** |
| 22 | + * Convert a FeedItem to an FeedSMItem. |
| 23 | + * This is to make sitemap feed get along with normal MediaWiki feeds. |
| 24 | + * @param FeedItem Original item. |
| 25 | + * @return FeedSMItem Converted item. |
| 26 | + */ |
| 27 | + static function newFromFeedItem( FeedItem $item ) { |
| 28 | + // FIXME: This is borked (esp. on history), but better than a fatal (not by much). |
| 29 | + // maybe try and get title from url? |
| 30 | + $title = Title::newFromText( $item->getTitle() ); |
| 31 | + if ( !$title ) { |
| 32 | + throw new MWException( "Error getting title object from string in FeedItem." ); |
| 33 | + } |
| 34 | + $date = $item->getDate(); |
| 35 | + return new FeedSMItem( $title, $date ); |
28 | 36 | } |
29 | 37 | |
30 | | - public function getUrl() { |
31 | | - return $this->url; |
32 | | - } |
33 | | - |
34 | | - public function getPriority() { |
35 | | - return $this->priority; |
36 | | - } |
37 | | - |
38 | 38 | public function getLastMod() { |
39 | | - return $this->lastMod; |
| 39 | + return $this->title->getTouched(); |
40 | 40 | } |
41 | 41 | |
42 | | - public function getKeywords () { |
| 42 | + public function getKeywords() { |
43 | 43 | return $this->xmlEncode( $this->keywords ); |
44 | 44 | } |
45 | 45 | |
46 | | - public function getPubDate() { |
47 | | - return $this->pubDate; |
| 46 | + /** |
| 47 | + * Overrides parent class. Meant to be used in rss feed. |
| 48 | + * Currently return the article, its debatable if thats a good idea |
| 49 | + * or not, but not sure of what better to do. Could regex the wikitext |
| 50 | + * and try to return the first paragraph, but thats iffy. |
| 51 | + * |
| 52 | + * Note, this is only called by the atom/rss feed output, not by |
| 53 | + * the sitemap output. |
| 54 | + * @return String |
| 55 | + */ |
| 56 | + public function getDescription() { |
| 57 | + // This is probably rather inefficient to do for several pages |
| 58 | + // but not much worse than the rest of this extension. |
| 59 | + $req = new FauxRequest( array( |
| 60 | + 'action' => 'parse', |
| 61 | + 'page' => $this->title->getPrefixedDBKey(), |
| 62 | + 'prop' => 'text', |
| 63 | + ) ); |
| 64 | + $main = new ApiMain( $req ); |
| 65 | + $main->execute(); |
| 66 | + $data = $main->getResultData(); |
| 67 | + if ( isset( $data['parse']['text']['*'] ) ) { |
| 68 | + return $this->xmlEncode( |
| 69 | + $data['parse']['text']['*'] |
| 70 | + ); |
| 71 | + } else { |
| 72 | + return ''; |
| 73 | + } |
48 | 74 | } |
| 75 | +} |
49 | 76 | |
50 | | - function formatTime( $ts ) { |
51 | | - // need to use RFC 822 time format at least for rss2.0 |
52 | | - return gmdate( 'Y-m-d\TH:i:s', wfTimestamp( TS_UNIX, $ts ) ); |
53 | | - } |
54 | | -} |
\ No newline at end of file |
Index: trunk/extensions/GoogleNewsSitemap/SitemapFeed.php |
— | — | @@ -1,36 +1,29 @@ |
2 | 2 | <?php |
3 | 3 | if ( !defined( 'MEDIAWIKI' ) ) die(); |
4 | 4 | |
5 | | -class SitemapFeed extends FeedSMItem { |
| 5 | +class SitemapFeed extends ChannelFeed { |
6 | 6 | private $writer; |
7 | 7 | |
8 | 8 | function __construct() { |
9 | | - global $wgOut; |
10 | 9 | $this->writer = new XMLWriter(); |
11 | | - $wgOut->disable(); |
12 | 10 | } |
13 | 11 | |
| 12 | + function contentType() { |
| 13 | + return 'application/xml'; |
| 14 | + } |
| 15 | + |
14 | 16 | /** |
15 | | - * Output feed headers |
| 17 | + * Output feed headers. |
16 | 18 | */ |
17 | 19 | function outHeader() { |
18 | | - global $wgOut, $wgRequest; |
| 20 | + $this->httpHeaders(); |
19 | 21 | |
20 | | - // FIXME: Why can't we just pick one mime type and always send that? |
21 | | - $ctype = $wgRequest->getVal( 'ctype', 'application/xml' ); |
22 | | - $allowedctypes = array( 'application/xml', 'text/xml', 'application/rss+xml', 'application/atom+xml' ); |
23 | | - $mimetype = in_array( $ctype, $allowedctypes ) ? $ctype : 'application/xml'; |
24 | | - |
25 | | - header( "Content-type: $mimetype; charset=UTF-8" ); |
26 | | - $wgOut->sendCacheControl(); |
27 | | - |
28 | 22 | $this->writer->openURI( 'php://output' ); |
29 | 23 | $this->writer->setIndent( true ); |
30 | 24 | $this->writer->startDocument( "1.0", "UTF-8" ); |
31 | 25 | $this->writer->startElement( "urlset" ); |
32 | 26 | $this->writer->writeAttribute( "xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9" ); |
33 | 27 | $this->writer->writeAttribute( "xmlns:news", "http://www.google.com/schemas/sitemap-news/0.9" ); |
34 | | - $this->writer->flush(); |
35 | 28 | } |
36 | 29 | |
37 | 30 | /** |
— | — | @@ -38,30 +31,42 @@ |
39 | 32 | * @param FeedSMItem $item to be output |
40 | 33 | */ |
41 | 34 | function outItem( $item ) { |
| 35 | + |
| 36 | + if ( !( $item instanceof FeedItem ) ) { |
| 37 | + throw new MWException( "Requires a FeedItem or subclass." ); |
| 38 | + } |
| 39 | + if ( !( $item instanceof FeedSMItem ) ) { |
| 40 | + $item = FeedSMItem::newFromFeedItem( $item ); |
| 41 | + } |
| 42 | + |
42 | 43 | $this->writer->startElement( "url" ); |
| 44 | + |
43 | 45 | $this->writer->startElement( "loc" ); |
44 | 46 | $this->writer->text( $item->getUrl() ); |
45 | 47 | $this->writer->endElement(); |
| 48 | + |
46 | 49 | $this->writer->startElement( "news:news" ); |
| 50 | + |
47 | 51 | $this->writer->startElement( "news:publication_date" ); |
48 | | - $this->writer->text( $item->getPubDate() ); |
| 52 | + $this->writer->text( wfTimestamp( TS_ISO_8601, $item->getDate() ) ); |
49 | 53 | $this->writer->endElement(); |
| 54 | + |
| 55 | + $this->writer->startElement( "news:title" ); |
| 56 | + $this->writer->text( $item->getTitle() ); |
| 57 | + $this->writer->endElement(); |
| 58 | + |
50 | 59 | if ( $item->getKeywords() ) { |
51 | 60 | $this->writer->startElement( "news:keywords" ); |
52 | 61 | $this->writer->text( $item->getKeywords() ); |
53 | 62 | $this->writer->endElement(); |
54 | 63 | } |
| 64 | + |
55 | 65 | $this->writer->endElement(); // end news:news |
56 | 66 | if ( $item->getLastMod() ) { |
57 | 67 | $this->writer->startElement( "lastmod" ); |
58 | | - $this->writer->text( $item->getLastMod() ); |
| 68 | + $this->writer->text( wfTimestamp( TS_ISO_8601, $item->getLastMod() ) ); |
59 | 69 | $this->writer->endElement(); |
60 | 70 | } |
61 | | - if ( $item->getPriority() ) { |
62 | | - $this->writer->startElement( "priority" ); |
63 | | - $this->writer->text( $item->getPriority() ); |
64 | | - $this->writer->endElement(); |
65 | | - } |
66 | 71 | $this->writer->endElement(); // end url |
67 | 72 | } |
68 | 73 | |
Index: trunk/extensions/GoogleNewsSitemap/GoogleNewsSitemap_body.php |
— | — | @@ -20,26 +20,10 @@ |
21 | 21 | * * redirects = string ; default = exclude |
22 | 22 | * * stablepages = string ; default = null |
23 | 23 | * * qualitypages = string ; default = null |
24 | | - * * feed = string ; default = atom |
25 | | - * usenamespace = bool ; default = false |
26 | | - * usecurid = bool ; default = false |
27 | | - * suppresserrors = bool ; default = false |
| 24 | + * * feed = string ; default = sitemap |
28 | 25 | **/ |
29 | 26 | |
30 | 27 | class GoogleNewsSitemap extends SpecialPage { |
31 | | - /** |
32 | | - * FIXME: Some of this might need a config eventually |
33 | | - * @var string |
34 | | - **/ |
35 | | - var $Title = ''; |
36 | | - var $Description = ''; |
37 | | - var $Url = ''; |
38 | | - var $Date = ''; |
39 | | - var $Author = ''; |
40 | | - var $pubDate = ''; |
41 | | - var $keywords = ''; |
42 | | - var $lastMod = ''; |
43 | | - var $priority = ''; |
44 | 28 | |
45 | 29 | /** |
46 | 30 | * Script default values - correctly spelt, naming standard. |
— | — | @@ -66,37 +50,29 @@ |
67 | 51 | * main() |
68 | 52 | **/ |
69 | 53 | public function execute( $par ) { |
70 | | - global $wgUser, $wgLang, $wgContLang, $wgRequest, $wgOut, |
71 | | - $wgSitename, $wgServer, $wgScriptPath, $wgFeedClasses, |
72 | | - $wgLocaltimezone; |
| 54 | + global $wgContLang, $wgSitename, $wgFeedClasses, $wgLanguageCode; |
73 | 55 | |
74 | | - // Not sure how clean $wgLocaltimezone is |
75 | | - // In fact, it's default setting is null... |
76 | | - if ( null == $wgLocaltimezone ) { |
77 | | - $wgLocaltimezone = date_default_timezone_get(); |
78 | | - } |
79 | | - date_default_timezone_set( $wgLocaltimezone ); |
80 | | - // $url = __FILE__; |
81 | | - |
82 | 56 | $this->unload_params(); // populates this->params as a side effect |
83 | 57 | |
84 | 58 | // if there's an error parsing the params, bail out and return |
85 | 59 | if ( isset( $this->params['error'] ) ) { |
86 | | - if ( false == $this->params['suppressErrors'] ) { |
87 | | - $wgOut->disable(); |
88 | | - echo $this->params['error']; |
89 | | - } |
| 60 | + wfHttpError( 500, "Internal Server Error", $this->params['error'] ); |
90 | 61 | return; |
91 | 62 | } |
92 | 63 | |
93 | | - |
94 | | - $feed = new $wgFeedClasses[ $this->params['feed'] ]( |
95 | | - $wgSitename, |
96 | | - $wgSitename . ' ' . $this->params['feed'] . ' feed', |
97 | | - $wgServer . $wgScriptPath, |
98 | | - date( DATE_ATOM ), |
99 | | - $wgSitename |
| 64 | + // Check to make sure that feed type is supported. |
| 65 | + if ( FeedUtils::checkFeedOutput( $this->params['feed'] ) ) { |
| 66 | + // TODO: should feed title be a message. |
| 67 | + $feed = new $wgFeedClasses[ $this->params['feed'] ]( |
| 68 | + $wgSitename . " [$wgLanguageCode] " |
| 69 | + . $wgContLang->uc( $this->params['feed'] ) . ' feed', |
| 70 | + wfMsgExt( 'tagline', 'parsemag' ), |
| 71 | + Title::newMainPage()->getFullUrl() |
100 | 72 | ); |
| 73 | + } else { |
| 74 | + // Can't really do anything if wrong feed type. |
| 75 | + return; |
| 76 | + } |
101 | 77 | |
102 | 78 | $res = $this->doQuery(); |
103 | 79 | |
— | — | @@ -109,37 +85,15 @@ |
110 | 86 | return; |
111 | 87 | } |
112 | 88 | |
113 | | - if ( 'sitemap' == $this->params['feed'] ) { |
| 89 | + // Fixme: Under what circumstance would cl_timestamp not be set? |
| 90 | + // possibly worth an exception if that happens. |
| 91 | + $this->pubDate = isset( $row->cl_timestamp ) ? $row->cl_timestamp : wfTimestampNow(); |
114 | 92 | |
115 | | - $this->pubDate = isset( $row->cl_timestamp ) ? $row->cl_timestamp : date( DATE_ATOM ); |
116 | | - $feedArticle = new Article( $title ); |
117 | | - |
118 | | - $feedItem = new FeedSMItem( |
119 | | - trim( $title->getFullURL() ), |
120 | | - wfTimeStamp( TS_ISO_8601, $this->pubDate ), |
121 | | - $this->getKeywords( $title ), |
122 | | - wfTimeStamp( TS_ISO_8601, $feedArticle->getTouched() ), |
123 | | - $feed->getPriority( $this->priority ) |
124 | | - ); |
125 | | - |
126 | | - } elseif ( ( 'atom' == $this->params['feed'] ) || ( 'rss' == $this->params['feed'] ) ) { |
127 | | - |
128 | | - $this->Date = isset( $row->cl_timestamp ) ? $row->cl_timestamp : date( DATE_ATOM ); |
129 | | - if ( isset( $row->comment ) ) { |
130 | | - $comments = htmlspecialchars( $row->comment ); |
131 | | - } else { |
132 | | - $talkpage = $title->getTalkPage(); |
133 | | - $comments = $talkpage->getFullURL(); |
134 | | - } |
135 | | - $titleText = ( true === $this->params['nameSpace'] ) ? $title->getPrefixedText() : $title->getText(); |
136 | | - $feedItem = new FeedItem( |
137 | | - $titleText, |
138 | | - $this->feedItemDesc( $row ), |
139 | | - $title->getFullURL(), |
140 | | - $this->Date, |
141 | | - $this->feedItemAuthor( $row ), |
142 | | - $comments ); |
143 | | - } |
| 93 | + $feedItem = new FeedSMItem( |
| 94 | + $title, |
| 95 | + $this->pubDate, |
| 96 | + $this->getKeywords( $title ) |
| 97 | + ); |
144 | 98 | $feed->outItem( $feedItem ); |
145 | 99 | |
146 | 100 | } // end while fetchobject |
— | — | @@ -285,9 +239,6 @@ |
286 | 240 | $this->params['redirects'] = $wgRequest->getVal( 'redirects', 'exclude' ); |
287 | 241 | $this->params['stable'] = $wgRequest->getVal( 'stable', 'only' ); |
288 | 242 | $this->params['quality'] = $wgRequest->getVal( 'qualitypages', 'only' ); |
289 | | - $this->params['suppressErrors'] = $wgRequest->getBool( 'supresserrors', false ); |
290 | | - $this->params['useNameSpace'] = $wgRequest->getBool( 'usenamespace', false ); |
291 | | - $this->params['useCurId'] = $wgRequest->getBool( 'usecurid', false ); |
292 | 243 | $this->params['feed'] = $wgRequest->getVal( 'feed', 'sitemap' ); |
293 | 244 | |
294 | 245 | $this->params['catCount'] = count( $this->categories ); |
— | — | @@ -317,14 +268,6 @@ |
318 | 269 | |
319 | 270 | } |
320 | 271 | |
321 | | - function feedItemAuthor( $row ) { |
322 | | - return isset( $row->user_text ) ? $row->user_text : 'Wikinews'; |
323 | | - } |
324 | | - |
325 | | - function feedItemDesc( $row ) { |
326 | | - return isset( $row->comment ) ? htmlspecialchars( $row->comment ) : ''; |
327 | | - } |
328 | | - |
329 | 272 | /** |
330 | 273 | * @param Title $title |
331 | 274 | * @return string |