Index: trunk/extensions/ArticleFeedback/SpecialArticleFeedback.php |
— | — | @@ -43,8 +43,8 @@ |
44 | 44 | |
45 | 45 | /* |
46 | 46 | This functionality does not exist yet. |
47 | | - $this->renderWeeklyMostChanged(); |
48 | | - $this->renderRecentLows();*/ |
| 47 | + $this->renderWeeklyMostChanged();*/ |
| 48 | + $this->renderProblems(); |
49 | 49 | } else { |
50 | 50 | $wgOut->addWikiText( 'This page has been disabled.' ); |
51 | 51 | } |
— | — | @@ -187,15 +187,15 @@ |
188 | 188 | } |
189 | 189 | |
190 | 190 | /** |
191 | | - * Renders recent lows |
| 191 | + * Renders problem articles table |
192 | 192 | * |
193 | 193 | * @return String: HTML table of recent lows |
194 | 194 | */ |
195 | | - protected function renderRecentLows() { |
| 195 | + protected function renderProblems() { |
196 | 196 | global $wgOut, $wgUser, $wgArticleFeedbackRatings; |
197 | 197 | |
198 | 198 | $rows = array(); |
199 | | - foreach ( $this->getRecentLows() as $page ) { |
| 199 | + foreach ( $this->getProblems() as $page ) { |
200 | 200 | $row = array(); |
201 | 201 | $pageTitle = Title::newFromText( $page['page'] ); |
202 | 202 | $row['page'] = $wgUser->getSkin()->link( $pageTitle, $pageTitle->getPrefixedText() ); |
— | — | @@ -227,6 +227,54 @@ |
228 | 228 | } |
229 | 229 | |
230 | 230 | /** |
| 231 | + * Gets a list of articles which were rated exceptionally low |
| 232 | + */ |
| 233 | + protected function getProblems() { |
| 234 | + global $wgMemc; |
| 235 | + // check if we've got results in the cache |
| 236 | + $key = wfMemcKey( 'article_feedback_stats_problems' ); |
| 237 | + $cache = $wgMemc->get( $key ); |
| 238 | + if ( is_array( $cache )) { |
| 239 | + $highs_lows = $cache; |
| 240 | + } else { |
| 241 | + $dbr = wfGetDB( DB_SLAVE ); |
| 242 | + // first find the freshest timestamp |
| 243 | + $row = $dbr->selectRow( |
| 244 | + 'article_feedback_stats', |
| 245 | + array( 'afs_ts' ), |
| 246 | + "", |
| 247 | + __METHOD__, |
| 248 | + array( "ORDER BY" => "afs_ts DESC", "LIMIT" => 1 ) |
| 249 | + ); |
| 250 | + |
| 251 | + // if we have no results, just return |
| 252 | + if ( !$row || !$row->afs_ts ) { |
| 253 | + return array(); |
| 254 | + } |
| 255 | + |
| 256 | + // select ratings with that ts |
| 257 | + $result = $dbr->select( |
| 258 | + 'article_feedback_stats', |
| 259 | + array( |
| 260 | + 'afs_page_id', |
| 261 | + 'afs_orderable_data', |
| 262 | + 'afs_data' |
| 263 | + ), |
| 264 | + array( |
| 265 | + 'afs_ts' => $row->afs_ts, |
| 266 | + 'afs_stats_type_id' => self::getStatsTypeId( 'problems' ) |
| 267 | + ), |
| 268 | + __METHOD__, |
| 269 | + array( "ORDER BY" => "afs_orderable_data" ) |
| 270 | + ); |
| 271 | + $problems = $this->buildProblems( $result ); |
| 272 | + $wgMemc->set( $key, $problems, 86400 ); |
| 273 | + } |
| 274 | + |
| 275 | + return $problems; |
| 276 | + } |
| 277 | + |
| 278 | + /** |
231 | 279 | * Gets a list of articles which were rated exceptionally high or low. |
232 | 280 | * |
233 | 281 | * - Based on average of all rating categories |
— | — | @@ -237,7 +285,6 @@ |
238 | 286 | */ |
239 | 287 | protected function getDailyHighsAndLows() { |
240 | 288 | global $wgMemc; |
241 | | - |
242 | 289 | // check if we've got results in the cache |
243 | 290 | $key = wfMemcKey( 'article_feedback_stats_highs_lows' ); |
244 | 291 | $cache = $wgMemc->get( $key ); |
— | — | @@ -247,29 +294,32 @@ |
248 | 295 | $dbr = wfGetDB( DB_SLAVE ); |
249 | 296 | // first find the freshest timestamp |
250 | 297 | $row = $dbr->selectRow( |
251 | | - 'article_feedback_stats_highs_lows', |
252 | | - array( 'afshl_ts' ), |
| 298 | + 'article_feedback_stats', |
| 299 | + array( 'afs_ts' ), |
253 | 300 | "", |
254 | 301 | __METHOD__, |
255 | | - array( "ORDER BY" => "afshl_ts DESC", "LIMIT" => 1 ) |
| 302 | + array( "ORDER BY" => "afs_ts DESC", "LIMIT" => 1 ) |
256 | 303 | ); |
257 | 304 | |
258 | 305 | // if we have no results, just return |
259 | | - if ( !$row || !$row->afshl_ts ) { |
| 306 | + if ( !$row || !$row->afs_ts ) { |
260 | 307 | return array(); |
261 | 308 | } |
262 | 309 | |
263 | 310 | // select ratings with that ts |
264 | 311 | $result = $dbr->select( |
265 | | - 'article_feedback_stats_highs_lows', |
| 312 | + 'article_feedback_stats', |
266 | 313 | array( |
267 | | - 'afshl_page_id', |
268 | | - 'afshl_avg_overall', |
269 | | - 'afshl_avg_ratings' |
| 314 | + 'afs_page_id', |
| 315 | + 'afs_orderable_data', |
| 316 | + 'afs_data' |
270 | 317 | ), |
271 | | - array( 'afshl_ts' => $row->afshl_ts ), |
| 318 | + array( |
| 319 | + 'afs_ts' => $row->afs_ts, |
| 320 | + 'afs_stats_type_id' => self::getStatsTypeId( 'highs_and_lows' ) |
| 321 | + ), |
272 | 322 | __METHOD__, |
273 | | - array( "ORDER BY" => "afshl_avg_overall" ) |
| 323 | + array( "ORDER BY" => "afs_orderable_data" ) |
274 | 324 | ); |
275 | 325 | $highs_lows = $this->buildHighsAndLows( $result ); |
276 | 326 | $wgMemc->set( $key, $highs_lows, 86400 ); |
— | — | @@ -331,15 +381,56 @@ |
332 | 382 | $highs_lows = array(); |
333 | 383 | foreach ( $result as $row ) { |
334 | 384 | $highs_lows[] = array( |
335 | | - 'page' => $row->afshl_page_id, |
336 | | - 'ratings' => FormatJson::decode( $row->afshl_avg_ratings ), |
337 | | - 'average' => $row->afshl_avg_overall |
| 385 | + 'page' => $row->afs_page_id, |
| 386 | + 'ratings' => FormatJson::decode( $row->afs_data ), |
| 387 | + 'average' => $row->afs_orderable_data |
338 | 388 | ); |
339 | 389 | } |
340 | 390 | return $highs_lows; |
341 | 391 | } |
342 | 392 | |
343 | 393 | /** |
| 394 | + * Build data store of problems for use when rendering table |
| 395 | + * @param object Database result |
| 396 | + * @return array |
| 397 | + */ |
| 398 | + public static function buildProblems( $result ) { |
| 399 | + $problems = array(); |
| 400 | + foreach( $result as $row ) { |
| 401 | + $problems[] = array( |
| 402 | + 'page' => $row->afs_page_id, |
| 403 | + 'ratings' => FormatJson::decode( $row->afs_data ), |
| 404 | + 'average' => $row->afs_orderable_data |
| 405 | + ); |
| 406 | + } |
| 407 | + return $problems; |
| 408 | + } |
| 409 | + |
| 410 | + /** |
| 411 | + * Get the stats type id for a given stat type |
| 412 | + * @param string $stats_type |
| 413 | + */ |
| 414 | + public static function getStatsTypeId( $stats_type ) { |
| 415 | + global $wgMemc; |
| 416 | + $key = wfMemcKey( 'article_feedback_stats_type_' . $stats_type ); |
| 417 | + $cache = $wgMemc->get( $key ); |
| 418 | + if ( $cache ) { |
| 419 | + return $cache; |
| 420 | + } |
| 421 | + |
| 422 | + $dbr = wfGetDB( DB_SLAVE ); |
| 423 | + $row = $dbr->selectRow( |
| 424 | + 'article_feedback_stats_types', |
| 425 | + array( 'afst_id' ), |
| 426 | + array( 'afst_type' => $stats_type ), |
| 427 | + __METHOD__, |
| 428 | + array( ) |
| 429 | + ); |
| 430 | + $wgMemc->set( $key, $row->afst_id ); |
| 431 | + return $row->afst_id; |
| 432 | + } |
| 433 | + |
| 434 | + /** |
344 | 435 | * Gets a list of articles which have quickly changing ratings. |
345 | 436 | * |
346 | 437 | * - Based on any rating category |
Index: trunk/extensions/ArticleFeedback/sql/AddArticleFeedbackStatsTable.sql |
— | — | @@ -0,0 +1,13 @@ |
| 2 | +DROP TABLE IF EXISTS article_feedback_stats; |
| 3 | +CREATE TABLE IF NOT EXISTS /*_*/article_feedback_stats ( |
| 4 | + afs_page_id integer unsigned NOT NULL, |
| 5 | + -- data point to be used for ordering this data |
| 6 | + afs_orderable_data double unsigned NOT NULL, |
| 7 | + -- json object of stat data |
| 8 | + afs_data varbinary(255) NOT NULL, |
| 9 | + afs_stats_type_id integer unsigned NOT NULL, |
| 10 | + -- timestamp of insertion job |
| 11 | + afs_ts binary(14) NOT NULL |
| 12 | +) /*$wgDBTableOptions*/; |
| 13 | +CREATE UNIQUE INDEX /*i*/ afs_page_ts_type ON /*_*/ article_feedback_stats( afs_page_id, afs_ts, afs_stats_type_id ); |
| 14 | +CREATE INDEX /*i*/ afs_ts_avg_overall ON /*_*/article_feedback_stats (afs_ts, afs_orderable_data); |
Index: trunk/extensions/ArticleFeedback/sql/MigrateArticleFeedbackStatsHighsLows.sql |
— | — | @@ -0,0 +1,22 @@ |
| 2 | +-- migrate data from article_feedback_stats_highs_lows into article_feedback_stats |
| 3 | +INSERT INTO /*_*/article_feedback_stats ( |
| 4 | + afs_page_id, |
| 5 | + afs_orderable_data, |
| 6 | + afs_data, |
| 7 | + afs_ts, |
| 8 | + afs_stats_type_id |
| 9 | +) |
| 10 | +SELECT |
| 11 | + afshl_page_id, |
| 12 | + afshl_avg_overall, |
| 13 | + afshl_avg_ratings, |
| 14 | + afshl_ts, |
| 15 | + afst_id |
| 16 | +FROM |
| 17 | + /*_*/article_feedback_stats_highs_lows, |
| 18 | + /*_*/article_feedback_stats_types |
| 19 | +WHERE |
| 20 | + /*_*/article_feedback_stats_types.afst_type='highs_and_lows'; |
| 21 | + |
| 22 | +-- get rid of article_feedback_stats_highs_lows as it is no longer necessary |
| 23 | +DROP TABLE /*_*/article_feedback_stats_highs_lows; |
Index: trunk/extensions/ArticleFeedback/sql/AddArticleFeedbackStatsTypeTable.sql |
— | — | @@ -0,0 +1,9 @@ |
| 2 | +CREATE TABLE IF NOT EXISTS /*_*/ article_feedback_stats_types ( |
| 3 | + afst_id integer unsigned NOT NULL PRIMARY KEY AUTO_INCREMENT, |
| 4 | + afst_type varbinary(255) NOT NULL |
| 5 | +) /*$wgDBTableOptions*/; |
| 6 | +CREATE UNIQUE INDEX /*i*/ afst_type ON /*_*/ article_feedback_stats_types( afst_type ); |
| 7 | + |
| 8 | +-- Pre-populate table with stat types |
| 9 | +INSERT INTO article_feedback_stats_types ( afst_type ) VALUES ( 'highs_and_lows' ); |
| 10 | +INSERT INTO article_feedback_stats_types ( afst_type ) VALUES ( 'problems' ); |
\ No newline at end of file |
Index: trunk/extensions/ArticleFeedback/ArticleFeedback.hooks.php |
— | — | @@ -174,13 +174,53 @@ |
175 | 175 | $dir . '/sql/AddRevisionsTable.sql', |
176 | 176 | true |
177 | 177 | ) ); |
| 178 | + |
| 179 | + if ( $db->tableExists( 'article_feedback_stats_highs_lows') ) { |
| 180 | + if ( !$db->tableExists( 'article_feedback_stats_types' )) { |
| 181 | + // add article_feedback_stats_type if necessaray |
| 182 | + $updater->addExtensionUpdate( array( |
| 183 | + 'addTable', |
| 184 | + 'article_feedback_stats_types', |
| 185 | + $dir . '/sql/AddArticleFeedbackStatsTypesTable.sql', |
| 186 | + true |
| 187 | + ) ); |
| 188 | + } |
| 189 | + |
| 190 | + $updater->addExtensionUpdate( array( |
| 191 | + 'addTable', |
| 192 | + 'article_feedback_stats', |
| 193 | + $dir . '/sql/AddArticleFeedbackStatsTable.sql', |
| 194 | + true |
| 195 | + ) ); |
| 196 | + |
| 197 | + // migrate article_feedback_stats_highs_lows to article_feedback_stats |
| 198 | + $updater->addExtensionUpdate( array( |
| 199 | + 'applyPatch', |
| 200 | + $dir . '/sql/MigrateArticleFeedbackStatsHighsLows.sql', |
| 201 | + true |
| 202 | + ) ); |
| 203 | + } else { |
| 204 | + // add article_feedback_stats and article_feedback_stats_type |
| 205 | + if ( !$db->tableExists( 'article_feedback_stats_type' )) { |
| 206 | + $updater->addExtensionUpdate( array( |
| 207 | + 'addTable', |
| 208 | + 'article_feedback_stats_types', |
| 209 | + $dir . '/sql/AddArticleFeedbackStatsTypesTable.sql', |
| 210 | + true |
| 211 | + ) ); |
| 212 | + } |
| 213 | + |
| 214 | + if ( !$db->tableExists( 'article_feedback_stats' )) { |
| 215 | + $updater->addExtensionUpdate( array( |
| 216 | + 'addTable', |
| 217 | + 'article_feedback_stats', |
| 218 | + $dir . '/sql/AddArticleFeedbackStatsTable.sql', |
| 219 | + true |
| 220 | + ) ); |
| 221 | + } |
| 222 | + } |
| 223 | + |
178 | 224 | $updater->addExtensionUpdate( array( |
179 | | - 'addTable', |
180 | | - 'article_feedback_stats_highs_lows', |
181 | | - $dir . '/sql/AddStatsHighsLowsTable.sql', |
182 | | - true |
183 | | - ) ); |
184 | | - $updater->addExtensionUpdate( array( |
185 | 225 | 'addIndex', |
186 | 226 | 'article_feedback', |
187 | 227 | 'article_feedback_timestamp', |
Index: trunk/extensions/ArticleFeedback/populateAFStatistics.php |
— | — | @@ -37,9 +37,38 @@ |
38 | 38 | */ |
39 | 39 | protected $dbw; |
40 | 40 | |
| 41 | + /** |
| 42 | + * Valid operations and their execution methods for this script to perform |
| 43 | + * |
| 44 | + * Operations are passed in as options during run-time - only valid options, |
| 45 | + * which are defined here, can be executed. Valid operations are mapped here |
| 46 | + * to a corresponding method ( array( 'operation' => 'method' )) |
| 47 | + * @var array |
| 48 | + */ |
| 49 | + protected $operation_map = array( |
| 50 | + 'highslows' => 'populateHighsLows', |
| 51 | + 'problems' => 'populateProblems', |
| 52 | + ); |
| 53 | + |
| 54 | + /** |
| 55 | + * Operations to execute |
| 56 | + * @var array |
| 57 | + */ |
| 58 | + public $operations = array(); |
| 59 | + |
| 60 | + /** |
| 61 | + * The minimum number of rating sets required before taking some action |
| 62 | + * @var int |
| 63 | + */ |
| 64 | + public $rating_set_threshold = 10; |
| 65 | + |
41 | 66 | public function __construct() { |
42 | 67 | parent::__construct(); |
43 | 68 | $this->mDescription = "Populates the article feedback stats tables"; |
| 69 | + |
| 70 | + $this->addOption( 'op', 'The ArticleFeedback stats gathering operation to run (eg "highslows"). Can specify multiple operations, separated by comma.', true, true ); |
| 71 | + $this->addOption( 'rating_sets', 'The minimum number of rating sets before taking an action.', false, true ); |
| 72 | + $this->addOption( 'poll_period', 'The polling period for fetching data, in seconds.', false, true ); |
44 | 73 | } |
45 | 74 | |
46 | 75 | public function syncDBs() { |
— | — | @@ -54,72 +83,179 @@ |
55 | 84 | } |
56 | 85 | } |
57 | 86 | |
58 | | - public function execute() { |
59 | | - global $wgMemc; |
| 87 | + /** |
| 88 | + * Bootstrap this maintenance script |
| 89 | + * |
| 90 | + * Performs operations necessary for this maintenance script to run which |
| 91 | + * cannot or do not make sense to run in the constructor. |
| 92 | + */ |
| 93 | + public function bootstrap() { |
| 94 | + /** |
| 95 | + * Set user-specified operations to perform |
| 96 | + */ |
| 97 | + $operations = explode( ',', $this->getOption( 'op' )); |
| 98 | + // check sanity of specified operations |
| 99 | + if ( !$this->checkOperations( $operations )) { |
| 100 | + $this->error( 'Invalid operation specified.', true ); |
| 101 | + } else { |
| 102 | + $this->operations = $operations; |
| 103 | + } |
| 104 | + |
| 105 | + /** |
| 106 | + * Set user-specified rating set threshold |
| 107 | + */ |
| 108 | + $rating_set_threshold = $this->getOption( 'rating_sets', $this->rating_set_threshold ); |
| 109 | + if ( !is_numeric( $rating_set_threshold )) { |
| 110 | + $this->error( 'Rating sets must be numeric.', true ); |
| 111 | + } else { |
| 112 | + $this->rating_set_threshold = $rating_set_threshold; |
| 113 | + } |
| 114 | + |
| 115 | + /** |
| 116 | + * Set user-specified polling period |
| 117 | + */ |
| 118 | + $polling_period = $this->getOption( 'poll_period', $this->polling_period ); |
| 119 | + if ( !is_numeric( $polling_period )) { |
| 120 | + $this->error( 'Poll period must be numeric.', true ); |
| 121 | + } else { |
| 122 | + $this->polling_period = $polling_period; |
| 123 | + } |
| 124 | + |
| 125 | + // set db objects |
60 | 126 | $this->dbr = wfGetDB( DB_SLAVE ); |
61 | 127 | $this->dbw = wfGetDB( DB_MASTER ); |
| 128 | + } |
| 129 | + |
| 130 | + /** |
| 131 | + * Check whether or not specified operations are valid. |
| 132 | + * |
| 133 | + * A specified operation is considered valid if it exists |
| 134 | + * as a key in the operation map. |
| 135 | + * |
| 136 | + * @param array $ops An array of operations to check |
| 137 | + * @return bool |
| 138 | + */ |
| 139 | + public function checkOperations( array $ops ) { |
| 140 | + foreach ( $ops as $operation ) { |
| 141 | + if ( !isset( $this->operation_map[ $operation ] )) { |
| 142 | + return false; |
| 143 | + } |
| 144 | + } |
| 145 | + return true; |
| 146 | + } |
| 147 | + |
| 148 | + public function execute() { |
| 149 | + // finish bootstrapping the script |
| 150 | + $this->bootstrap(); |
62 | 151 | |
63 | | - // the data structure to store ratings for a given page |
64 | | - $ratings = array(); // stores rating-specific info |
65 | | - $rating_set_count = array(); // keep track of rating sets |
66 | | - $highs_and_lows = array(); // store highest/lowest rated page stats |
67 | | - $averages = array(); // store overall averages for a given page |
| 152 | + // execute requested operations |
| 153 | + foreach ( $this->operations as $operation ) { |
| 154 | + $method = $this->operation_map[ $operation ]; |
| 155 | + $this->$method(); |
| 156 | + } |
| 157 | + } |
| 158 | + |
| 159 | + public function populateProblems() { |
| 160 | + global $wgMemc; |
68 | 161 | |
69 | | - // fetch the ratings since the lower bound timestamp |
70 | | - $this->output( 'Fetching page ratings between now and ' . date('Y-m-d H:i:s', strtotime( $this->getLowerBoundTimestamp())) . "...\n"); |
71 | | - $res = $this->dbr->select( |
72 | | - 'article_feedback', |
| 162 | + /** |
| 163 | + * Chck to see if we already have a collection of pages to operate on. |
| 164 | + * If not, generate the collection of pages and their associated ratings. |
| 165 | + */ |
| 166 | + if ( !isset( $this->pages )) { |
| 167 | + $ts = $this->getLowerBoundTimestamp(); |
| 168 | + $this->pages = $this->populatePageRatingsSince( $ts ); |
| 169 | + } |
| 170 | + $problems = array(); |
| 171 | + // iterate through pages, look for pages that meet criteria for problem articles |
| 172 | + $this->output( "Finding problem articles ...\n" ); |
| 173 | + foreach ( $this->pages as $page ) { |
| 174 | + // make sure that we have more rating sets than the req'd threshold for this page in order to qualify for calculating |
| 175 | + if ( $page->rating_set_count < $this->rating_set_threshold ) { |
| 176 | + continue; |
| 177 | + } |
| 178 | + |
| 179 | + if ( $page->isProblematic() ) { |
| 180 | + array_push( $problems, $page->page_id ); |
| 181 | + } |
| 182 | + } |
| 183 | + |
| 184 | + // populate stats table with problem articles & associated data |
| 185 | + // fetch stats type id - add stat type if it's non-existant |
| 186 | + $stats_type_id = SpecialArticleFeedback::getStatsTypeId( 'problems' ); |
| 187 | + if ( !$stats_type_id ) { |
| 188 | + $stats_type_id = $this->addStatType( 'problems' ); |
| 189 | + } |
| 190 | + foreach( $problems as $page_id ) { |
| 191 | + $page = $this->pages->getPage( $page_id ); |
| 192 | + $rows[] = array( |
| 193 | + 'afs_page_id' => $page_id, |
| 194 | + 'afs_orderable_data' => $page->overall_average, |
| 195 | + 'afs_data' => FormatJson::encode( $page->rating_averages ), |
| 196 | + 'afs_ts' => $cur_ts, |
| 197 | + 'afs_stats_type_id' => $stats_type_id, |
| 198 | + ); |
| 199 | + } |
| 200 | + $this->output( "Done.\n" ); |
| 201 | + |
| 202 | + // populate cache with current problem articles |
| 203 | + // loading data into cache |
| 204 | + $this->output( "Caching latest problems (if cache present).\n" ); |
| 205 | + $key = wfMemcKey( 'article_feedback_stats_problems' ); |
| 206 | + $result = $this->dbr->select( |
| 207 | + 'article_feedback_stats', |
| 208 | + array( |
| 209 | + 'afs_page_id', |
| 210 | + 'afs_orderable_data', |
| 211 | + 'afs_data' |
| 212 | + ), |
73 | 213 | array( |
74 | | - 'aa_revision', |
75 | | - 'aa_user_text', |
76 | | - 'aa_rating_id', |
77 | | - 'aa_user_anon_token', |
78 | | - 'aa_page_id', |
79 | | - 'aa_rating_value', |
80 | | - ), |
81 | | - array( 'aa_timestamp >= ' . $this->dbr->addQuotes( $this->getLowerBoundTimestamp() ) ), |
| 214 | + 'afs_ts' => $cur_ts, |
| 215 | + 'afs_stats_type_id' => $stats_type_id |
| 216 | + ), |
82 | 217 | __METHOD__, |
83 | | - array() |
| 218 | + array( "ORDER BY" => "afs_orderable_data" ) |
84 | 219 | ); |
| 220 | + // grab the article feedback special page so we can reuse the data structure building code |
| 221 | + // FIXME this logic should not be in the special page class |
| 222 | + $problems = SpecialArticleFeedback::buildProblems( $result ); |
| 223 | + // stash the data structure in the cache |
| 224 | + $wgMemc->set( $key, $problems, 86400 ); |
| 225 | + $this->output( "Done.\n" ); |
| 226 | + } |
| 227 | + |
| 228 | + /** |
| 229 | + * Populate stats about highest/lowest rated articles |
| 230 | + */ |
| 231 | + public function populateHighsLows() { |
| 232 | + global $wgMemc; |
85 | 233 | |
86 | | - // assign the rating data to our data structure |
87 | | - foreach ( $res as $row ) { |
88 | | - // determine the unique hash for a given rating set (page rev + user identifying info) |
89 | | - $rating_hash = md5( $row->aa_revision . $row->aa_user_text . $row->aa_user_anon_token ); |
90 | | - |
91 | | - // keep track of how many rating sets a particular page has |
92 | | - if ( !isset( $rating_count[ $row->aa_page_id ][ $rating_hash ] )) { |
93 | | - // we store the rating hash as a key rather than value as checking isset( $arr[$hash] ) is way faster |
94 | | - // than doing something like array_search( $hash, $arr ) when dealing with large arrays |
95 | | - $rating_set_count[ $row->aa_page_id ][ $rating_hash ] = 1; |
96 | | - } |
97 | | - |
98 | | - $ratings[ $row->aa_page_id ][ $row->aa_rating_id ][] = $row->aa_rating_value; |
| 234 | + $averages = array(); // store overall averages for a given page |
| 235 | + |
| 236 | + /** |
| 237 | + * Chck to see if we already have a collection of pages to operate on. |
| 238 | + * If not, generate the collection of pages and their associated ratings. |
| 239 | + */ |
| 240 | + if ( !isset( $this->pages )) { |
| 241 | + $ts = $this->getLowerBoundTimestamp(); |
| 242 | + $this->pages = $this->populatePageRatingsSince( $ts ); |
99 | 243 | } |
100 | | - $this->output( "Done\n" ); |
101 | 244 | |
102 | 245 | // determine the average ratings for a given page |
103 | 246 | $this->output( "Determining average ratings for articles ...\n" ); |
104 | | - foreach ( $ratings as $page_id => $data ) { |
105 | | - // make sure that we have at least 10 rating sets for this page in order to qualify for ranking |
106 | | - if ( count( array_keys( $rating_set_count[ $page_id ] )) < 10 ) { |
| 247 | + foreach ( $this->pages as $page ) { |
| 248 | + // make sure that we have more rating sets than the req'd threshold for this page in order to qualify for ranking |
| 249 | + if ( $page->rating_set_count < $this->rating_set_threshold ) { |
107 | 250 | continue; |
108 | 251 | } |
109 | 252 | |
110 | | - // calculate the rating averages for a given page |
111 | | - foreach( $data as $rating_id => $rating ) { |
112 | | - $rating_sum = array_sum( $rating ); |
113 | | - $rating_avg = $rating_sum / count( $rating ); |
114 | | - $highs_and_lows[ $page_id ][ 'avg_ratings' ][ $rating_id ] = $rating_avg; |
| 253 | + // calculate the rating averages if they haven't already been calculated |
| 254 | + if ( !count( $page->rating_averages )) { |
| 255 | + $page->calculateRatingAverages(); |
115 | 256 | } |
116 | 257 | |
117 | | - // calculate the overall average for a page |
118 | | - $overall_rating_sum = array_sum( $highs_and_lows[ $page_id ][ 'avg_ratings' ] ); |
119 | | - $overall_rating_average = $overall_rating_sum / count( $highs_and_lows[ $page_id ][ 'avg_ratings' ] ); |
120 | | - $highs_and_lows[ $page_id ][ 'average' ] = $overall_rating_average; |
121 | | - |
122 | 258 | // store overall average rating seperately so we can easily sort |
123 | | - $averages[ $page_id ] = $overall_rating_average; |
| 259 | + $averages[ $page->page_id ] = $page->overall_average; |
124 | 260 | } |
125 | 261 | $this->output( "Done.\n" ); |
126 | 262 | |
— | — | @@ -129,35 +265,42 @@ |
130 | 266 | // take lowest 50 and highest 50 |
131 | 267 | $highest_and_lowest_page_ids = array_slice( $averages, 0, 50, true ); |
132 | 268 | if ( count( $averages ) > 50 ) { |
| 269 | + // in the event that we have < 100 $averages total, this will still |
| 270 | + // work nicely - it will select duplicate averages, but the += |
| 271 | + // will cause items with the same keys to essentially be ignored |
133 | 272 | $highest_and_lowest_page_ids += array_slice( $averages, -50, 50, true ); |
134 | 273 | } |
135 | 274 | $this->output( "Done\n" ); |
136 | 275 | |
| 276 | + // fetch stats type id - add stat type if it's non-existant |
| 277 | + $stats_type_id = SpecialArticleFeedback::getStatsTypeId( 'highs_and_lows' ); |
| 278 | + if ( !$stats_type_id ) { |
| 279 | + $stats_type_id = $this->addStatType( 'highs_and_lows' ); |
| 280 | + } |
| 281 | + |
137 | 282 | // prepare data for insert into db |
138 | 283 | $this->output( "Preparing data for db insertion ...\n"); |
139 | 284 | $cur_ts = $this->dbw->timestamp(); |
140 | 285 | $rows = array(); |
141 | | - foreach( $highs_and_lows as $page_id => $data ) { |
142 | | - // make sure this is one of the highest/lowest average ratings |
143 | | - if ( !isset( $highest_and_lowest_page_ids[ $page_id ] )) { |
144 | | - continue; |
145 | | - } |
| 286 | + foreach( $highest_and_lowest_page_ids as $page_id => $overall_average ) { |
| 287 | + $page = $this->pages->getPage( $page_id ); |
146 | 288 | $rows[] = array( |
147 | | - 'afshl_page_id' => $page_id, |
148 | | - 'afshl_avg_overall' => $data[ 'average' ], |
149 | | - 'afshl_avg_ratings' => FormatJson::encode( $data[ 'avg_ratings' ] ), |
150 | | - 'afshl_ts' => $cur_ts, |
| 289 | + 'afs_page_id' => $page_id, |
| 290 | + 'afs_orderable_data' => $page->overall_average, |
| 291 | + 'afs_data' => FormatJson::encode( $page->rating_averages ), |
| 292 | + 'afs_ts' => $cur_ts, |
| 293 | + 'afs_stats_type_id' => $stats_type_id, |
151 | 294 | ); |
152 | 295 | } |
153 | 296 | $this->output( "Done.\n" ); |
154 | 297 | |
155 | 298 | // insert data to db |
156 | | - $this->output( "Writing data to article_feedback_stats_highs_lows ...\n" ); |
| 299 | + $this->output( "Writing data to article_feedback_stats ...\n" ); |
157 | 300 | $rowsInserted = 0; |
158 | 301 | while( $rows ) { |
159 | 302 | $batch = array_splice( $rows, 0, $this->insert_batch_size ); |
160 | 303 | $this->dbw->insert( |
161 | | - 'article_feedback_stats_highs_lows', |
| 304 | + 'article_feedback_stats', |
162 | 305 | $batch, |
163 | 306 | __METHOD__ |
164 | 307 | ); |
— | — | @@ -167,30 +310,96 @@ |
168 | 311 | } |
169 | 312 | $this->output( "Done.\n" ); |
170 | 313 | |
171 | | - // loading data into caching |
| 314 | + // loading data into cache |
172 | 315 | $this->output( "Caching latest highs/lows (if cache present).\n" ); |
173 | 316 | $key = wfMemcKey( 'article_feedback_stats_highs_lows' ); |
174 | 317 | $result = $this->dbr->select( |
175 | | - 'article_feedback_stats_highs_lows', |
| 318 | + 'article_feedback_stats', |
176 | 319 | array( |
177 | | - 'afshl_page_id', |
178 | | - 'afshl_avg_overall', |
179 | | - 'afshl_avg_ratings' |
| 320 | + 'afs_page_id', |
| 321 | + 'afs_orderable_data', |
| 322 | + 'afs_data' |
180 | 323 | ), |
181 | | - array( 'afshl_ts' => $cur_ts ), |
| 324 | + array( |
| 325 | + 'afs_ts' => $cur_ts, |
| 326 | + 'afs_stats_type_id' => $stats_type_id |
| 327 | + ), |
182 | 328 | __METHOD__, |
183 | | - array( "ORDER BY" => "afshl_avg_overall" ) |
| 329 | + array( "ORDER BY" => "afs_orderable_data" ) |
184 | 330 | ); |
185 | 331 | // grab the article feedback special page so we can reuse the data structure building code |
186 | 332 | // FIXME this logic should not be in the special page class |
187 | 333 | $highs_lows = SpecialArticleFeedback::buildHighsAndLows( $result ); |
188 | 334 | // stash the data structure in the cache |
189 | 335 | $wgMemc->set( $key, $highs_lows, 86400 ); |
190 | | - $this->output( "Done\n" ); |
| 336 | + $this->output( "Done\n" ); |
191 | 337 | } |
192 | 338 | |
| 339 | + /** |
| 340 | + * Fetch ratings newer than a given time stamp. |
| 341 | + * |
| 342 | + * If no timestamp is provided, relies on $this->lowerBoundTimestamp |
| 343 | + * @param numeric $ts |
| 344 | + * @return database result object |
| 345 | + */ |
| 346 | + public function fetchRatingsNewerThanTs( $ts=null ) { |
| 347 | + if ( !$ts ) { |
| 348 | + $ts = $this->getLowerBoundTimestamp(); |
| 349 | + } |
| 350 | + |
| 351 | + if ( !is_numeric( $ts )) { |
| 352 | + throw new InvalidArgumentException( 'Timestamp expected to be numeric.' ); |
| 353 | + } |
| 354 | + |
| 355 | + $res = $this->dbr->select( |
| 356 | + 'article_feedback', |
| 357 | + array( |
| 358 | + 'aa_revision', |
| 359 | + 'aa_user_text', |
| 360 | + 'aa_rating_id', |
| 361 | + 'aa_user_anon_token', |
| 362 | + 'aa_page_id', |
| 363 | + 'aa_rating_value', |
| 364 | + ), |
| 365 | + array( 'aa_timestamp >= ' . $this->dbr->addQuotes( $ts )), |
| 366 | + __METHOD__, |
| 367 | + array() |
| 368 | + ); |
| 369 | + |
| 370 | + return $res; |
| 371 | + } |
193 | 372 | |
194 | 373 | /** |
| 374 | + * Construct collection of pages and their ratings since a given time stamp |
| 375 | + * @param $ts |
| 376 | + * @return object The colelction of pages |
| 377 | + */ |
| 378 | + public function populatePageRatingsSince( $ts ) { |
| 379 | + $pages = new Pages(); |
| 380 | + // fetch the ratings since the lower bound timestamp |
| 381 | + $this->output( 'Fetching page ratings between now and ' . date( 'Y-m-d H:i:s', strtotime( $ts )) . "...\n" ); |
| 382 | + $res = $this->fetchRatingsNewerThanTs( $ts ); |
| 383 | + $this->output( "Done.\n" ); |
| 384 | + |
| 385 | + // assign the rating data to our data structure |
| 386 | + $this->output( "Assigning fetched ratings to internal data structure ...\n" ); |
| 387 | + foreach ( $res as $row ) { |
| 388 | + // fetch the page from the page store referentially so we can |
| 389 | + // perform actions on it that will automagically be saved in the |
| 390 | + // object for easy access later |
| 391 | + $page =& $pages->getPage( $row->aa_page_id ); |
| 392 | + |
| 393 | + // determine the unique hash for a given rating set (page rev + user identifying info) |
| 394 | + $rating_hash = $row->aa_revision . "|" . $row->aa_user_text . "|" . $row->aa_user_anon_token; |
| 395 | + |
| 396 | + // add rating data for this page |
| 397 | + $page->addRating( $row->aa_rating_id, $row->aa_rating_value, $rating_hash ); |
| 398 | + } |
| 399 | + $this->output( "Done.\n" ); |
| 400 | + return $pages; |
| 401 | + } |
| 402 | + |
| 403 | + /** |
195 | 404 | * Set $this->timestamp |
196 | 405 | * @param int $ts |
197 | 406 | */ |
— | — | @@ -216,7 +425,177 @@ |
217 | 426 | } |
218 | 427 | return $this->lowerBoundTimestamp; |
219 | 428 | } |
| 429 | + |
| 430 | + /** |
| 431 | + * Add stat type record to article_feedbak_stats_types |
| 432 | + * @param string $stat_type The identifying name of the stat type (eg 'highs_lows') |
| 433 | + */ |
| 434 | + public function addStatType( $stat_type ) { |
| 435 | + $this->dbw->insert( |
| 436 | + 'article_feedback_stats', |
| 437 | + array( 'afst_type' => $stat_type ), |
| 438 | + __METHOD__ |
| 439 | + ); |
| 440 | + return $this->dbw->insertId(); |
| 441 | + } |
220 | 442 | } |
221 | 443 | |
| 444 | +/** |
| 445 | + * A class to represent a page and data about its ratings |
| 446 | + */ |
| 447 | +class Page { |
| 448 | + public $page_id; |
| 449 | + |
| 450 | + /** |
| 451 | + * The number of rating sets recorded for this page |
| 452 | + * @var int |
| 453 | + */ |
| 454 | + public $rating_set_count = 0; |
| 455 | + |
| 456 | + /** |
| 457 | + * An array of ratings for this page |
| 458 | + * @var array |
| 459 | + */ |
| 460 | + public $ratings = array(); |
| 461 | + |
| 462 | + /** |
| 463 | + * An array to hold mean ratings by rating type id |
| 464 | + * @var array |
| 465 | + */ |
| 466 | + public $rating_averages = array(); |
| 467 | + |
| 468 | + /** |
| 469 | + * Mean of all ratings for this page |
| 470 | + * @var float |
| 471 | + */ |
| 472 | + public $overall_average; |
| 473 | + |
| 474 | + /** |
| 475 | + * An array of rating set hashes, which are used to identify unique sets of |
| 476 | + * ratings |
| 477 | + * @var array |
| 478 | + */ |
| 479 | + protected $rating_set_hashes = array(); |
| 480 | + |
| 481 | + public function __construct( $page_id ) { |
| 482 | + if ( !is_numeric( $page_id )) { |
| 483 | + throw new Exception( 'Page id must be numeric.' ); |
| 484 | + } |
| 485 | + $this->page_id = $page_id; |
| 486 | + } |
| 487 | + |
| 488 | + /** |
| 489 | + * Add a new rating for this particular page |
| 490 | + * @param int $rating_id |
| 491 | + * @param int $rating_value |
| 492 | + * @param string $rating_set_hash |
| 493 | + */ |
| 494 | + public function addRating( $rating_id, $rating_value, $rating_set_hash = null ) { |
| 495 | + $this->ratings[ $rating_id ][] = $rating_value; |
| 496 | + |
| 497 | + if ( $rating_set_hash ) { |
| 498 | + $this->trackRatingSet( $rating_set_hash ); |
| 499 | + } |
| 500 | + } |
| 501 | + |
| 502 | + /** |
| 503 | + * Keep track of rating sets |
| 504 | + * |
| 505 | + * Record when we see a new rating set and increment the set count |
| 506 | + * @param string $rating_set_hash |
| 507 | + */ |
| 508 | + protected function trackRatingSet( $rating_set_hash ) { |
| 509 | + if ( isset( $this->rating_set_hashes[ $rating_set_hash ] )) { |
| 510 | + return; |
| 511 | + } |
| 512 | + |
| 513 | + $this->rating_set_hashes[ $rating_set_hash ] = 1; |
| 514 | + $this->rating_set_count += 1; |
| 515 | + } |
| 516 | + |
| 517 | + public function calculateRatingAverages() { |
| 518 | + // determine averages for each rating type |
| 519 | + foreach( $this->ratings as $rating_id => $rating ) { |
| 520 | + $rating_sum = array_sum( $rating ); |
| 521 | + $rating_avg = $rating_sum / count( $rating ); |
| 522 | + $this->rating_averages[ $rating_id ] = $rating_avg; |
| 523 | + } |
| 524 | + |
| 525 | + // determine overall rating average for this page |
| 526 | + if ( count( $this->rating_averages )) { |
| 527 | + $overall_rating_sum = array_sum( $this->rating_averages ); |
| 528 | + $overall_rating_average = $overall_rating_sum / count( $this->rating_averages ); |
| 529 | + } else { |
| 530 | + $overall_rating_average = 0; |
| 531 | + } |
| 532 | + $this->overall_average = $overall_rating_average; |
| 533 | + } |
| 534 | + |
| 535 | + /** |
| 536 | + * Returns whether or not this page is considered problematic |
| 537 | + * @return bool |
| 538 | + */ |
| 539 | + public function isProblematic() { |
| 540 | + if ( !isset( $this->problematic )) { |
| 541 | + $this->determineProblematicStatus(); |
| 542 | + } |
| 543 | + return $this->probematic; |
| 544 | + } |
| 545 | + |
| 546 | + /** |
| 547 | + * Determine whether this article is 'problematic' |
| 548 | + * |
| 549 | + * If a page has one more rating categories where 70% of the ratings are |
| 550 | + * <= 2, it is considered problematic. |
| 551 | + */ |
| 552 | + public function determineProblematicStatus() { |
| 553 | + foreach( $this->ratings as $rating_id => $ratings ) { |
| 554 | + $count = 0; |
| 555 | + foreach ( $ratings as $rating ) { |
| 556 | + if ( $rating <= 2 ) { |
| 557 | + $count += 1; |
| 558 | + } |
| 559 | + } |
| 560 | + |
| 561 | + $threshold = round( 0.7 * count( $ratings )); |
| 562 | + if ( $count >= $threshold ) { |
| 563 | + $this->problematic = true; |
| 564 | + return; |
| 565 | + } |
| 566 | + } |
| 567 | + |
| 568 | + $this->problematic = false; |
| 569 | + return; |
| 570 | + } |
| 571 | +} |
| 572 | + |
| 573 | +/** |
| 574 | + * A storage class to keep track of PageRatings object by page |
| 575 | + * |
| 576 | + * Iterable on array of pages. |
| 577 | + */ |
| 578 | +class Pages implements IteratorAggregate { |
| 579 | + /** |
| 580 | + * An array of page rating objects |
| 581 | + * @var array |
| 582 | + */ |
| 583 | + public $pages = array(); |
| 584 | + |
| 585 | + public function getPage( $page_id ) { |
| 586 | + if ( !isset( $this->pages[ $page_id ] )) { |
| 587 | + $this->addPage( $page_id ); |
| 588 | + } |
| 589 | + return $this->pages[ $page_id ]; |
| 590 | + } |
| 591 | + |
| 592 | + public function addPage( $page_id ) { |
| 593 | + $this->pages[ $page_id ] = new Page( $page_id ); |
| 594 | + } |
| 595 | + |
| 596 | + public function getIterator() { |
| 597 | + return new ArrayIterator( $this->pages ); |
| 598 | + } |
| 599 | +} |
| 600 | + |
222 | 601 | $maintClass = "PopulateAFStatistics"; |
223 | 602 | require_once( DO_MAINTENANCE ); |
Index: trunk/extensions/ArticleFeedback/ArticleFeedback.php |
— | — | @@ -11,7 +11,7 @@ |
12 | 12 | */ |
13 | 13 | |
14 | 14 | /* XXX: Survey setup */ |
15 | | -require_once( dirname( dirname( __FILE__ ) ) . '/SimpleSurvey/SimpleSurvey.php' ); |
| 15 | +require_once( $IP . '/extensions/SimpleSurvey/SimpleSurvey.php' ); |
16 | 16 | |
17 | 17 | /* Configuration */ |
18 | 18 | |