r83808 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r83807‎ | r83808 | r83809 >
Date:09:09, 13 March 2011
Author:bawolff
Status:ok
Tags:
Comment:
Cleanup parts of GNSM, especially parts relating to the SitemapFeed class and FeedSMItem.
*Make it so fatal errors cannot be generated by using an invalid feed name
*Make it so no more fatal if feed=sitemap on some other page (This part is
somewhat borked though, as it assumes a feed item title is a page title, which
is true on say Special:Recentchanges, but not ?action=history. Its not expected
that anyone would want to do that, maybe could output (500) error if not given a FeedSMItem(?)
*Remove usenamespace, usecurid, and supresserrors parameters. They're carried over from DPL, and useless here.
*Remove weird stuff with $wgLocalTimezone (Don't know what the intention with it was).
*Have sensible errors with 500 status code, instead of random text. Make having no results not be an error
since it could happen, and saying Error no results is not machine readable, like an xml document with
no entries (which might not be valid according to the schema, but certainly better than the alternative)
*Generally fix up which parameters are needed for the feed. Reduce code duplication with ChannelFeed class.
and unify the Sitemap and other feed types somewhat.
*Remove feedItemAuthors() and feedItemDesc that appearently are dead code
*Don't make the talk page a comment page advertized by atom. On en wikinews, the apropriate page is the comments namespace.
Look into how that parameter is actually used, and potentially re-add later.
*Make the atom feed component return the article as the description. Might be slightly inefficient (?) but so is the rest
of the extension ;). Certainly better than returning nothing
*Remove priority feature. Doesn't really make sense since we have no way of figuring out what has a high priority.
Modified paths:
  • /trunk/extensions/GoogleNewsSitemap/FeedSMItem.php (modified) (history)
  • /trunk/extensions/GoogleNewsSitemap/GoogleNewsSitemap_body.php (modified) (history)
  • /trunk/extensions/GoogleNewsSitemap/SitemapFeed.php (modified) (history)

Diff [purge]

Index: trunk/extensions/GoogleNewsSitemap/FeedSMItem.php
@@ -6,48 +6,70 @@
77 **
88 * Base class for basic SiteMap support, for building url containers.
99 **/
10 -class FeedSMItem {
11 - /**
12 - * Var string
13 - **/
14 - var $url = '', $pubDate = '', $keywords = '', $lastMod = '', $priority = '';
 10+class FeedSMItem extends FeedItem {
1511
16 - function __construct( $url, $pubDate, $keywords = '', $lastMod = '', $priority = '' ) {
17 - $this->url = $url;
18 - $this->pubDate = $pubDate;
 12+ private $keywords = '';
 13+ private $title; // Title object, not string.
 14+
 15+ function __construct( $title, $pubDate, $keywords = '' ) {
 16+ parent::__construct( $title->getText(), '' /* description */, $title->getFullUrl(), $pubDate );
 17+ $this->title = $title;
1918 $this->keywords = $keywords;
20 - $this->lastMod = $lastMod;
21 - $this->priority = $priority;
2219 }
2320
24 - public function xmlEncode( $string ) {
25 - $string = str_replace( "\r\n", "\n", $string );
26 - $string = preg_replace( '/[\x00-\x08\x0b\x0c\x0e-\x1f]/', '', $string );
27 - return htmlspecialchars( $string );
 21+ /**
 22+ * Convert a FeedItem to an FeedSMItem.
 23+ * This is to make sitemap feed get along with normal MediaWiki feeds.
 24+ * @param FeedItem Original item.
 25+ * @return FeedSMItem Converted item.
 26+ */
 27+ static function newFromFeedItem( FeedItem $item ) {
 28+ // FIXME: This is borked (esp. on history), but better than a fatal (not by much).
 29+ // maybe try and get title from url?
 30+ $title = Title::newFromText( $item->getTitle() );
 31+ if ( !$title ) {
 32+ throw new MWException( "Error getting title object from string in FeedItem." );
 33+ }
 34+ $date = $item->getDate();
 35+ return new FeedSMItem( $title, $date );
2836 }
2937
30 - public function getUrl() {
31 - return $this->url;
32 - }
33 -
34 - public function getPriority() {
35 - return $this->priority;
36 - }
37 -
3838 public function getLastMod() {
39 - return $this->lastMod;
 39+ return $this->title->getTouched();
4040 }
4141
42 - public function getKeywords () {
 42+ public function getKeywords() {
4343 return $this->xmlEncode( $this->keywords );
4444 }
4545
46 - public function getPubDate() {
47 - return $this->pubDate;
 46+ /**
 47+ * Overrides parent class. Meant to be used in rss feed.
 48+ * Currently return the article, its debatable if thats a good idea
 49+ * or not, but not sure of what better to do. Could regex the wikitext
 50+ * and try to return the first paragraph, but thats iffy.
 51+ *
 52+ * Note, this is only called by the atom/rss feed output, not by
 53+ * the sitemap output.
 54+ * @return String
 55+ */
 56+ public function getDescription() {
 57+ // This is probably rather inefficient to do for several pages
 58+ // but not much worse than the rest of this extension.
 59+ $req = new FauxRequest( array(
 60+ 'action' => 'parse',
 61+ 'page' => $this->title->getPrefixedDBKey(),
 62+ 'prop' => 'text',
 63+ ) );
 64+ $main = new ApiMain( $req );
 65+ $main->execute();
 66+ $data = $main->getResultData();
 67+ if ( isset( $data['parse']['text']['*'] ) ) {
 68+ return $this->xmlEncode(
 69+ $data['parse']['text']['*']
 70+ );
 71+ } else {
 72+ return '';
 73+ }
4874 }
 75+}
4976
50 - function formatTime( $ts ) {
51 - // need to use RFC 822 time format at least for rss2.0
52 - return gmdate( 'Y-m-d\TH:i:s', wfTimestamp( TS_UNIX, $ts ) );
53 - }
54 -}
\ No newline at end of file
Index: trunk/extensions/GoogleNewsSitemap/SitemapFeed.php
@@ -1,36 +1,29 @@
22 <?php
33 if ( !defined( 'MEDIAWIKI' ) ) die();
44
5 -class SitemapFeed extends FeedSMItem {
 5+class SitemapFeed extends ChannelFeed {
66 private $writer;
77
88 function __construct() {
9 - global $wgOut;
109 $this->writer = new XMLWriter();
11 - $wgOut->disable();
1210 }
1311
 12+ function contentType() {
 13+ return 'application/xml';
 14+ }
 15+
1416 /**
15 - * Output feed headers
 17+ * Output feed headers.
1618 */
1719 function outHeader() {
18 - global $wgOut, $wgRequest;
 20+ $this->httpHeaders();
1921
20 - // FIXME: Why can't we just pick one mime type and always send that?
21 - $ctype = $wgRequest->getVal( 'ctype', 'application/xml' );
22 - $allowedctypes = array( 'application/xml', 'text/xml', 'application/rss+xml', 'application/atom+xml' );
23 - $mimetype = in_array( $ctype, $allowedctypes ) ? $ctype : 'application/xml';
24 -
25 - header( "Content-type: $mimetype; charset=UTF-8" );
26 - $wgOut->sendCacheControl();
27 -
2822 $this->writer->openURI( 'php://output' );
2923 $this->writer->setIndent( true );
3024 $this->writer->startDocument( "1.0", "UTF-8" );
3125 $this->writer->startElement( "urlset" );
3226 $this->writer->writeAttribute( "xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9" );
3327 $this->writer->writeAttribute( "xmlns:news", "http://www.google.com/schemas/sitemap-news/0.9" );
34 - $this->writer->flush();
3528 }
3629
3730 /**
@@ -38,30 +31,42 @@
3932 * @param FeedSMItem $item to be output
4033 */
4134 function outItem( $item ) {
 35+
 36+ if ( !( $item instanceof FeedItem ) ) {
 37+ throw new MWException( "Requires a FeedItem or subclass." );
 38+ }
 39+ if ( !( $item instanceof FeedSMItem ) ) {
 40+ $item = FeedSMItem::newFromFeedItem( $item );
 41+ }
 42+
4243 $this->writer->startElement( "url" );
 44+
4345 $this->writer->startElement( "loc" );
4446 $this->writer->text( $item->getUrl() );
4547 $this->writer->endElement();
 48+
4649 $this->writer->startElement( "news:news" );
 50+
4751 $this->writer->startElement( "news:publication_date" );
48 - $this->writer->text( $item->getPubDate() );
 52+ $this->writer->text( wfTimestamp( TS_ISO_8601, $item->getDate() ) );
4953 $this->writer->endElement();
 54+
 55+ $this->writer->startElement( "news:title" );
 56+ $this->writer->text( $item->getTitle() );
 57+ $this->writer->endElement();
 58+
5059 if ( $item->getKeywords() ) {
5160 $this->writer->startElement( "news:keywords" );
5261 $this->writer->text( $item->getKeywords() );
5362 $this->writer->endElement();
5463 }
 64+
5565 $this->writer->endElement(); // end news:news
5666 if ( $item->getLastMod() ) {
5767 $this->writer->startElement( "lastmod" );
58 - $this->writer->text( $item->getLastMod() );
 68+ $this->writer->text( wfTimestamp( TS_ISO_8601, $item->getLastMod() ) );
5969 $this->writer->endElement();
6070 }
61 - if ( $item->getPriority() ) {
62 - $this->writer->startElement( "priority" );
63 - $this->writer->text( $item->getPriority() );
64 - $this->writer->endElement();
65 - }
6671 $this->writer->endElement(); // end url
6772 }
6873
Index: trunk/extensions/GoogleNewsSitemap/GoogleNewsSitemap_body.php
@@ -20,26 +20,10 @@
2121 * * redirects = string ; default = exclude
2222 * * stablepages = string ; default = null
2323 * * qualitypages = string ; default = null
24 - * * feed = string ; default = atom
25 - * usenamespace = bool ; default = false
26 - * usecurid = bool ; default = false
27 - * suppresserrors = bool ; default = false
 24+ * * feed = string ; default = sitemap
2825 **/
2926
3027 class GoogleNewsSitemap extends SpecialPage {
31 - /**
32 - * FIXME: Some of this might need a config eventually
33 - * @var string
34 - **/
35 - var $Title = '';
36 - var $Description = '';
37 - var $Url = '';
38 - var $Date = '';
39 - var $Author = '';
40 - var $pubDate = '';
41 - var $keywords = '';
42 - var $lastMod = '';
43 - var $priority = '';
4428
4529 /**
4630 * Script default values - correctly spelt, naming standard.
@@ -66,37 +50,29 @@
6751 * main()
6852 **/
6953 public function execute( $par ) {
70 - global $wgUser, $wgLang, $wgContLang, $wgRequest, $wgOut,
71 - $wgSitename, $wgServer, $wgScriptPath, $wgFeedClasses,
72 - $wgLocaltimezone;
 54+ global $wgContLang, $wgSitename, $wgFeedClasses, $wgLanguageCode;
7355
74 - // Not sure how clean $wgLocaltimezone is
75 - // In fact, it's default setting is null...
76 - if ( null == $wgLocaltimezone ) {
77 - $wgLocaltimezone = date_default_timezone_get();
78 - }
79 - date_default_timezone_set( $wgLocaltimezone );
80 - // $url = __FILE__;
81 -
8256 $this->unload_params(); // populates this->params as a side effect
8357
8458 // if there's an error parsing the params, bail out and return
8559 if ( isset( $this->params['error'] ) ) {
86 - if ( false == $this->params['suppressErrors'] ) {
87 - $wgOut->disable();
88 - echo $this->params['error'];
89 - }
 60+ wfHttpError( 500, "Internal Server Error", $this->params['error'] );
9061 return;
9162 }
9263
93 -
94 - $feed = new $wgFeedClasses[ $this->params['feed'] ](
95 - $wgSitename,
96 - $wgSitename . ' ' . $this->params['feed'] . ' feed',
97 - $wgServer . $wgScriptPath,
98 - date( DATE_ATOM ),
99 - $wgSitename
 64+ // Check to make sure that feed type is supported.
 65+ if ( FeedUtils::checkFeedOutput( $this->params['feed'] ) ) {
 66+ // TODO: should feed title be a message.
 67+ $feed = new $wgFeedClasses[ $this->params['feed'] ](
 68+ $wgSitename . " [$wgLanguageCode] "
 69+ . $wgContLang->uc( $this->params['feed'] ) . ' feed',
 70+ wfMsgExt( 'tagline', 'parsemag' ),
 71+ Title::newMainPage()->getFullUrl()
10072 );
 73+ } else {
 74+ // Can't really do anything if wrong feed type.
 75+ return;
 76+ }
10177
10278 $res = $this->doQuery();
10379
@@ -109,37 +85,15 @@
11086 return;
11187 }
11288
113 - if ( 'sitemap' == $this->params['feed'] ) {
 89+ // Fixme: Under what circumstance would cl_timestamp not be set?
 90+ // possibly worth an exception if that happens.
 91+ $this->pubDate = isset( $row->cl_timestamp ) ? $row->cl_timestamp : wfTimestampNow();
11492
115 - $this->pubDate = isset( $row->cl_timestamp ) ? $row->cl_timestamp : date( DATE_ATOM );
116 - $feedArticle = new Article( $title );
117 -
118 - $feedItem = new FeedSMItem(
119 - trim( $title->getFullURL() ),
120 - wfTimeStamp( TS_ISO_8601, $this->pubDate ),
121 - $this->getKeywords( $title ),
122 - wfTimeStamp( TS_ISO_8601, $feedArticle->getTouched() ),
123 - $feed->getPriority( $this->priority )
124 - );
125 -
126 - } elseif ( ( 'atom' == $this->params['feed'] ) || ( 'rss' == $this->params['feed'] ) ) {
127 -
128 - $this->Date = isset( $row->cl_timestamp ) ? $row->cl_timestamp : date( DATE_ATOM );
129 - if ( isset( $row->comment ) ) {
130 - $comments = htmlspecialchars( $row->comment );
131 - } else {
132 - $talkpage = $title->getTalkPage();
133 - $comments = $talkpage->getFullURL();
134 - }
135 - $titleText = ( true === $this->params['nameSpace'] ) ? $title->getPrefixedText() : $title->getText();
136 - $feedItem = new FeedItem(
137 - $titleText,
138 - $this->feedItemDesc( $row ),
139 - $title->getFullURL(),
140 - $this->Date,
141 - $this->feedItemAuthor( $row ),
142 - $comments );
143 - }
 93+ $feedItem = new FeedSMItem(
 94+ $title,
 95+ $this->pubDate,
 96+ $this->getKeywords( $title )
 97+ );
14498 $feed->outItem( $feedItem );
14599
146100 } // end while fetchobject
@@ -285,9 +239,6 @@
286240 $this->params['redirects'] = $wgRequest->getVal( 'redirects', 'exclude' );
287241 $this->params['stable'] = $wgRequest->getVal( 'stable', 'only' );
288242 $this->params['quality'] = $wgRequest->getVal( 'qualitypages', 'only' );
289 - $this->params['suppressErrors'] = $wgRequest->getBool( 'supresserrors', false );
290 - $this->params['useNameSpace'] = $wgRequest->getBool( 'usenamespace', false );
291 - $this->params['useCurId'] = $wgRequest->getBool( 'usecurid', false );
292243 $this->params['feed'] = $wgRequest->getVal( 'feed', 'sitemap' );
293244
294245 $this->params['catCount'] = count( $this->categories );
@@ -317,14 +268,6 @@
318269
319270 }
320271
321 - function feedItemAuthor( $row ) {
322 - return isset( $row->user_text ) ? $row->user_text : 'Wikinews';
323 - }
324 -
325 - function feedItemDesc( $row ) {
326 - return isset( $row->comment ) ? htmlspecialchars( $row->comment ) : '';
327 - }
328 -
329272 /**
330273 * @param Title $title
331274 * @return string

Status & tagging log