Index: trunk/extensions/GoogleNewsSitemap/GoogleNewsSitemap_body.php |
— | — | @@ -35,6 +35,7 @@ |
36 | 36 | var $wgDPLmaxResultCount = 50; // Maximum number of results to allow |
37 | 37 | |
38 | 38 | var $fallbackCategory = 'Published'; |
| 39 | + var $maxCacheTime = 43200; // 12 hours. Chosen rather arbitrarily for now. Might want to tweak. |
39 | 40 | |
40 | 41 | /** |
41 | 42 | * @var array Parameters array |
— | — | @@ -54,7 +55,7 @@ |
55 | 56 | * main() |
56 | 57 | **/ |
57 | 58 | public function execute( $par ) { |
58 | | - global $wgContLang, $wgSitename, $wgFeedClasses, $wgLanguageCode; |
| 59 | + global $wgContLang, $wgSitename, $wgFeedClasses, $wgLanguageCode, $wgMemc; |
59 | 60 | |
60 | 61 | $this->unload_params(); // populates this->params as a side effect |
61 | 62 | |
— | — | @@ -73,7 +74,7 @@ |
74 | 75 | $wgContLang->uc( $this->params['feed'] ), |
75 | 76 | $wgLanguageCode |
76 | 77 | ), |
77 | | - wfMsgExt( 'tagline', 'parsemag' ), |
| 78 | + wfMsgExt( 'tagline', array( 'parsemag', 'content') ), |
78 | 79 | Title::newMainPage()->getFullUrl() |
79 | 80 | ); |
80 | 81 | } else { |
— | — | @@ -82,8 +83,78 @@ |
83 | 84 | return; |
84 | 85 | } |
85 | 86 | |
86 | | - $res = $this->doQuery(); |
| 87 | + $cacheInvalidationInfo = $this->getCacheInvalidationInfo(); |
87 | 88 | |
| 89 | + $cacheKey = $this->getCacheKey(); |
| 90 | + |
| 91 | + // The way this does caching is based on ChangesFeed::execute. |
| 92 | + $cached = $this->getCachedVersion( $cacheKey, $cacheInvalidationInfo ); |
| 93 | + if ( $cached !== false ) { |
| 94 | + $feed->httpHeaders(); |
| 95 | + echo $cached; |
| 96 | + echo "<!-- From cache: $cacheKey -->"; |
| 97 | + } else { |
| 98 | + $res = $this->doQuery(); |
| 99 | + ob_start(); |
| 100 | + $this->makeFeed( $feed, $res ); |
| 101 | + $output = ob_get_contents(); |
| 102 | + ob_end_flush(); |
| 103 | + echo "<!-- Not cached. Saved as: $cacheKey -->"; |
| 104 | + $wgMemc->set( $cacheKey, |
| 105 | + array( $cacheInvalidationInfo, $output ), |
| 106 | + $this->maxCacheTime |
| 107 | + ); |
| 108 | + } |
| 109 | + |
| 110 | + } |
| 111 | + |
| 112 | + /** |
| 113 | + * Get the cache key to cache this request. |
| 114 | + * @return String the key. |
| 115 | + */ |
| 116 | + private function getCacheKey() { |
| 117 | + global $wgRenderHashAppend; |
| 118 | + // Note, the implode relies on Title::__toString, which needs php > 5.2 |
| 119 | + // Which I think is above the minimum we support. |
| 120 | + $sum = md5( serialize( $this->params ) |
| 121 | + . implode( "|", $this->categories ) . "||" |
| 122 | + . implode( "|", $this->notCategories ) |
| 123 | + ); |
| 124 | + return wfMemcKey( "GNSM", $sum, $wgRenderHashAppend ); |
| 125 | + } |
| 126 | + |
| 127 | + /** |
| 128 | + * Get the cached version of the feed if possible. |
| 129 | + * Checks to see if the cached version is still valid. |
| 130 | + * @param $key String Cache key |
| 131 | + * @param $invalidInfo String String to check if cache is clean from getCacheInvalidationInfo. |
| 132 | + * @return Mixed String or Boolean: The cached feed, or false. |
| 133 | + */ |
| 134 | + private function getCachedVersion ( $key, $invalidInfo ) { |
| 135 | + global $wgMemc, $wgRequest; |
| 136 | + $action = $wgRequest->getVal( 'action', 'view' ); |
| 137 | + if ( $action === 'purge' ) { |
| 138 | + return false; |
| 139 | + } |
| 140 | + |
| 141 | + $cached = $wgMemc->get( $key ); |
| 142 | + |
| 143 | + if ( !$cached |
| 144 | + || ( count( $cached ) !== 2 ) |
| 145 | + || ( $cached[0] !== $invalidInfo ) ) |
| 146 | + { |
| 147 | + // Cache is dirty or doesn't exist. |
| 148 | + return false; |
| 149 | + } |
| 150 | + return $cached[1]; |
| 151 | + } |
| 152 | + /** |
| 153 | + * Actually output a feed. |
| 154 | + * @param ChannelFeed $feed Feed object. |
| 155 | + * @param $res Result of sql query |
| 156 | + */ |
| 157 | + |
| 158 | + private function makeFeed( $feed, $res ) { |
88 | 159 | $feed->outHeader(); |
89 | 160 | foreach ( $res as $row ) { |
90 | 161 | $title = Title::makeTitle( $row->page_namespace, $row->page_title ); |
— | — | @@ -103,13 +174,85 @@ |
104 | 175 | $this->getKeywords( $title ) |
105 | 176 | ); |
106 | 177 | $feed->outItem( $feedItem ); |
| 178 | + } |
| 179 | + $feed->outFooter(); |
| 180 | + } |
107 | 181 | |
108 | | - } // end while fetchobject |
| 182 | + /** |
| 183 | + * Tries to determine if the cached version of the feed is still |
| 184 | + * good. Does this by checking the cl_timestamp of the latest article |
| 185 | + * in each category we're using (Which will be different if category added) |
| 186 | + * and the total pages in Category (Protect against an article being removed) |
| 187 | + * The first check (cl_timestamp) is needed to protect against someone removing |
| 188 | + * one article and adding another article (the page count would stay the same). |
| 189 | + * |
| 190 | + * When we save to cache, we save a two element array with this value and the feed. |
| 191 | + * If the value from this function doesn't match the value from the cache, we throw |
| 192 | + * out the cache. |
| 193 | + * |
| 194 | + * @return String All the above info concatenated. |
| 195 | + */ |
| 196 | + private function getCacheInvalidationInfo () { |
| 197 | + $dbr = wfGetDB( DB_SLAVE ); |
| 198 | + $cacheInfo = ''; |
| 199 | + $categories = array(); |
| 200 | + $tsQueries = array(); |
109 | 201 | |
110 | | - $feed->outFooter(); |
| 202 | + // This would perhaps be nicer just using a Category object, |
| 203 | + // but this way can do all at once. |
111 | 204 | |
112 | | - } // end public function execute |
| 205 | + // Add each category and notcategory to the query. |
| 206 | + for ( $i = 0; $i < $this->params['catCount']; $i++ ) { |
| 207 | + $key = $this->categories[$i]->getDBkey(); |
| 208 | + $categories[] = $key; |
| 209 | + $tsQueries[] = $dbr->selectSQLText( |
| 210 | + 'categorylinks', |
| 211 | + 'MAX(cl_timestamp) as ts', |
| 212 | + array( 'cl_to' => $key ), |
| 213 | + __METHOD__ |
| 214 | + ); |
| 215 | + } |
| 216 | + for ( $i = 0; $i < $this->params['notCatCount']; $i++ ) { |
| 217 | + $key = $this->notCategories[$i]->getDBkey(); |
| 218 | + $categories[] = $key; |
| 219 | + $tsQueries[] = $dbr->selectSQLText( |
| 220 | + 'categorylinks', |
| 221 | + 'MAX(cl_timestamp) AS ts', |
| 222 | + array( 'cl_to' => $key ), |
| 223 | + __METHOD__ |
| 224 | + ); |
| 225 | + } |
113 | 226 | |
| 227 | + // phase 1: How many pages in each cat. |
| 228 | + // cat_pages includes all pages (even images/subcats). |
| 229 | + $res = $dbr->select( 'category', 'cat_pages', |
| 230 | + array( 'cat_title' => $categories ), |
| 231 | + __METHOD__, |
| 232 | + array( 'ORDER BY' => 'cat_title' ) |
| 233 | + ); |
| 234 | + |
| 235 | + foreach ( $res as $row ) { |
| 236 | + $cacheInfo .= $row->cat_pages . '!'; |
| 237 | + } |
| 238 | + |
| 239 | + $cacheInfo .= '|'; |
| 240 | + |
| 241 | + // Part 2: cl_timestamp: |
| 242 | + // TODO: Double check that the order of the result of union queries |
| 243 | + // is one after another from the order you specified the queries in. |
| 244 | + $res2 = $dbr->query($dbr->unionQueries( $tsQueries, true ), __METHOD__); |
| 245 | + |
| 246 | + foreach ( $res2 as $row ) { |
| 247 | + if ( is_null($row->ts) ) { |
| 248 | + $ts = "empty"; |
| 249 | + } else { |
| 250 | + $ts = wfTimestamp( TS_MW, $row->ts ); |
| 251 | + } |
| 252 | + $cacheInfo .= $ts . '!'; |
| 253 | + } |
| 254 | + |
| 255 | + return $cacheInfo; |
| 256 | + } |
114 | 257 | /** |
115 | 258 | * Build sql |
116 | 259 | **/ |