r84132 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r84131‎ | r84132 | r84133 >
Date:22:43, 16 March 2011
Author:bawolff
Status:ok
Tags:
Comment:
This extension does lots of queries, so try to cache the result.

Will also try to check validity of cache based on the add time of the most
recently added category, and the number of articles in each category.

Also changes a message to be for content, since it will be cached now.
Modified paths:
  • /trunk/extensions/GoogleNewsSitemap/GoogleNewsSitemap_body.php (modified) (history)

Diff [purge]

Index: trunk/extensions/GoogleNewsSitemap/GoogleNewsSitemap_body.php
@@ -35,6 +35,7 @@
3636 var $wgDPLmaxResultCount = 50; // Maximum number of results to allow
3737
3838 var $fallbackCategory = 'Published';
 39+ var $maxCacheTime = 43200; // 12 hours. Chosen rather arbitrarily for now. Might want to tweak.
3940
4041 /**
4142 * @var array Parameters array
@@ -54,7 +55,7 @@
5556 * main()
5657 **/
5758 public function execute( $par ) {
58 - global $wgContLang, $wgSitename, $wgFeedClasses, $wgLanguageCode;
 59+ global $wgContLang, $wgSitename, $wgFeedClasses, $wgLanguageCode, $wgMemc;
5960
6061 $this->unload_params(); // populates this->params as a side effect
6162
@@ -73,7 +74,7 @@
7475 $wgContLang->uc( $this->params['feed'] ),
7576 $wgLanguageCode
7677 ),
77 - wfMsgExt( 'tagline', 'parsemag' ),
 78+ wfMsgExt( 'tagline', array( 'parsemag', 'content') ),
7879 Title::newMainPage()->getFullUrl()
7980 );
8081 } else {
@@ -82,8 +83,78 @@
8384 return;
8485 }
8586
86 - $res = $this->doQuery();
 87+ $cacheInvalidationInfo = $this->getCacheInvalidationInfo();
8788
 89+ $cacheKey = $this->getCacheKey();
 90+
 91+ // The way this does caching is based on ChangesFeed::execute.
 92+ $cached = $this->getCachedVersion( $cacheKey, $cacheInvalidationInfo );
 93+ if ( $cached !== false ) {
 94+ $feed->httpHeaders();
 95+ echo $cached;
 96+ echo "<!-- From cache: $cacheKey -->";
 97+ } else {
 98+ $res = $this->doQuery();
 99+ ob_start();
 100+ $this->makeFeed( $feed, $res );
 101+ $output = ob_get_contents();
 102+ ob_end_flush();
 103+ echo "<!-- Not cached. Saved as: $cacheKey -->";
 104+ $wgMemc->set( $cacheKey,
 105+ array( $cacheInvalidationInfo, $output ),
 106+ $this->maxCacheTime
 107+ );
 108+ }
 109+
 110+ }
 111+
 112+ /**
 113+ * Get the cache key to cache this request.
 114+ * @return String the key.
 115+ */
 116+ private function getCacheKey() {
 117+ global $wgRenderHashAppend;
 118+ // Note, the implode relies on Title::__toString, which needs php > 5.2
 119+ // Which I think is above the minimum we support.
 120+ $sum = md5( serialize( $this->params )
 121+ . implode( "|", $this->categories ) . "||"
 122+ . implode( "|", $this->notCategories )
 123+ );
 124+ return wfMemcKey( "GNSM", $sum, $wgRenderHashAppend );
 125+ }
 126+
 127+ /**
 128+ * Get the cached version of the feed if possible.
 129+ * Checks to see if the cached version is still valid.
 130+ * @param $key String Cache key
 131+ * @param $invalidInfo String String to check if cache is clean from getCacheInvalidationInfo.
 132+ * @return Mixed String or Boolean: The cached feed, or false.
 133+ */
 134+ private function getCachedVersion ( $key, $invalidInfo ) {
 135+ global $wgMemc, $wgRequest;
 136+ $action = $wgRequest->getVal( 'action', 'view' );
 137+ if ( $action === 'purge' ) {
 138+ return false;
 139+ }
 140+
 141+ $cached = $wgMemc->get( $key );
 142+
 143+ if ( !$cached
 144+ || ( count( $cached ) !== 2 )
 145+ || ( $cached[0] !== $invalidInfo ) )
 146+ {
 147+ // Cache is dirty or doesn't exist.
 148+ return false;
 149+ }
 150+ return $cached[1];
 151+ }
 152+ /**
 153+ * Actually output a feed.
 154+ * @param ChannelFeed $feed Feed object.
 155+ * @param $res Result of sql query
 156+ */
 157+
 158+ private function makeFeed( $feed, $res ) {
88159 $feed->outHeader();
89160 foreach ( $res as $row ) {
90161 $title = Title::makeTitle( $row->page_namespace, $row->page_title );
@@ -103,13 +174,85 @@
104175 $this->getKeywords( $title )
105176 );
106177 $feed->outItem( $feedItem );
 178+ }
 179+ $feed->outFooter();
 180+ }
107181
108 - } // end while fetchobject
 182+ /**
 183+ * Tries to determine if the cached version of the feed is still
 184+ * good. Does this by checking the cl_timestamp of the latest article
 185+ * in each category we're using (Which will be different if category added)
 186+ * and the total pages in Category (Protect against an article being removed)
 187+ * The first check (cl_timestamp) is needed to protect against someone removing
 188+ * one article and adding another article (the page count would stay the same).
 189+ *
 190+ * When we save to cache, we save a two element array with this value and the feed.
 191+ * If the value from this function doesn't match the value from the cache, we throw
 192+ * out the cache.
 193+ *
 194+ * @return String All the above info concatenated.
 195+ */
 196+ private function getCacheInvalidationInfo () {
 197+ $dbr = wfGetDB( DB_SLAVE );
 198+ $cacheInfo = '';
 199+ $categories = array();
 200+ $tsQueries = array();
109201
110 - $feed->outFooter();
 202+ // This would perhaps be nicer just using a Category object,
 203+ // but this way can do all at once.
111204
112 - } // end public function execute
 205+ // Add each category and notcategory to the query.
 206+ for ( $i = 0; $i < $this->params['catCount']; $i++ ) {
 207+ $key = $this->categories[$i]->getDBkey();
 208+ $categories[] = $key;
 209+ $tsQueries[] = $dbr->selectSQLText(
 210+ 'categorylinks',
 211+ 'MAX(cl_timestamp) as ts',
 212+ array( 'cl_to' => $key ),
 213+ __METHOD__
 214+ );
 215+ }
 216+ for ( $i = 0; $i < $this->params['notCatCount']; $i++ ) {
 217+ $key = $this->notCategories[$i]->getDBkey();
 218+ $categories[] = $key;
 219+ $tsQueries[] = $dbr->selectSQLText(
 220+ 'categorylinks',
 221+ 'MAX(cl_timestamp) AS ts',
 222+ array( 'cl_to' => $key ),
 223+ __METHOD__
 224+ );
 225+ }
113226
 227+ // phase 1: How many pages in each cat.
 228+ // cat_pages includes all pages (even images/subcats).
 229+ $res = $dbr->select( 'category', 'cat_pages',
 230+ array( 'cat_title' => $categories ),
 231+ __METHOD__,
 232+ array( 'ORDER BY' => 'cat_title' )
 233+ );
 234+
 235+ foreach ( $res as $row ) {
 236+ $cacheInfo .= $row->cat_pages . '!';
 237+ }
 238+
 239+ $cacheInfo .= '|';
 240+
 241+ // Part 2: cl_timestamp:
 242+ // TODO: Double check that the order of the result of union queries
 243+ // is one after another from the order you specified the queries in.
 244+ $res2 = $dbr->query($dbr->unionQueries( $tsQueries, true ), __METHOD__);
 245+
 246+ foreach ( $res2 as $row ) {
 247+ if ( is_null($row->ts) ) {
 248+ $ts = "empty";
 249+ } else {
 250+ $ts = wfTimestamp( TS_MW, $row->ts );
 251+ }
 252+ $cacheInfo .= $ts . '!';
 253+ }
 254+
 255+ return $cacheInfo;
 256+ }
114257 /**
115258 * Build sql
116259 **/

Status & tagging log