| Index: trunk/extensions/ArticleFeedbackv5/populateAFStatistics.php |
| — | — | @@ -1,613 +0,0 @@ |
| 2 | | -<?php |
| 3 | | - |
| 4 | | -$IP = getenv( 'MW_INSTALL_PATH' ); |
| 5 | | -if ( $IP === false ) { |
| 6 | | - $IP = dirname( __FILE__ ) . '/../..'; |
| 7 | | -} |
| 8 | | -require( "$IP/maintenance/Maintenance.php" ); |
| 9 | | - |
| 10 | | -class PopulateAFStatistics extends Maintenance { |
| 11 | | - /** |
| 12 | | - * The number of records to attempt to insert at any given time. |
| 13 | | - * @var int |
| 14 | | - */ |
| 15 | | - public $insert_batch_size = 100; |
| 16 | | - |
| 17 | | - /** |
| 18 | | - * The period (in seconds) before now for which to gather stats |
| 19 | | - * @var int |
| 20 | | - */ |
| 21 | | - public $polling_period = 86400; |
| 22 | | - |
| 23 | | - /** |
| 24 | | - * The formatted timestamp from which to determine stats |
| 25 | | - * @var int |
| 26 | | - */ |
| 27 | | - protected $lowerBoundTimestamp; |
| 28 | | - |
| 29 | | - /** |
| 30 | | - * DB slave |
| 31 | | - * @var object |
| 32 | | - */ |
| 33 | | - protected $dbr; |
| 34 | | - |
| 35 | | - /** |
| 36 | | - * DB master |
| 37 | | - * @var object |
| 38 | | - */ |
| 39 | | - protected $dbw; |
| 40 | | - |
| 41 | | - /** |
| 42 | | - * Valid operations and their execution methods for this script to perform |
| 43 | | - * |
| 44 | | - * Operations are passed in as options during run-time - only valid options, |
| 45 | | - * which are defined here, can be executed. Valid operations are mapped here |
| 46 | | - * to a corresponding method ( array( 'operation' => 'method' )) |
| 47 | | - * @var array |
| 48 | | - */ |
| 49 | | - protected $operation_map = array( |
| 50 | | - 'highslows' => 'populateHighsLows', |
| 51 | | - 'problems' => 'populateProblems', |
| 52 | | - ); |
| 53 | | - |
| 54 | | - /** |
| 55 | | - * Operations to execute |
| 56 | | - * @var array |
| 57 | | - */ |
| 58 | | - public $operations = array(); |
| 59 | | - |
| 60 | | - /** |
| 61 | | - * The minimum number of rating sets required before taking some action |
| 62 | | - * @var int |
| 63 | | - */ |
| 64 | | - public $rating_set_threshold = 10; |
| 65 | | - |
| 66 | | - public function __construct() { |
| 67 | | - parent::__construct(); |
| 68 | | - $this->mDescription = "Populates the article feedback stats tables"; |
| 69 | | - |
| 70 | | - $this->addOption( 'op', 'The ArticleFeedback stats gathering operation to run (eg "highslows"). Can specify multiple operations, separated by comma.', true, true ); |
| 71 | | - $this->addOption( 'rating_sets', 'The minimum number of rating sets before taking an action.', false, true ); |
| 72 | | - $this->addOption( 'poll_period', 'The polling period for fetching data, in seconds.', false, true ); |
| 73 | | - } |
| 74 | | - |
| 75 | | - public function syncDBs() { |
| 76 | | - // FIXME: Copied from populateAFRevisions.php, which coppied from updateCollation.php, should be centralized somewhere |
| 77 | | - $lb = wfGetLB(); |
| 78 | | - // bug 27975 - Don't try to wait for slaves if there are none |
| 79 | | - // Prevents permission error when getting master position |
| 80 | | - if ( $lb->getServerCount() > 1 ) { |
| 81 | | - $dbw = $lb->getConnection( DB_MASTER ); |
| 82 | | - $pos = $dbw->getMasterPos(); |
| 83 | | - $lb->waitForAll( $pos ); |
| 84 | | - } |
| 85 | | - } |
| 86 | | - |
| 87 | | - /** |
| 88 | | - * Bootstrap this maintenance script |
| 89 | | - * |
| 90 | | - * Performs operations necessary for this maintenance script to run which |
| 91 | | - * cannot or do not make sense to run in the constructor. |
| 92 | | - */ |
| 93 | | - public function bootstrap() { |
| 94 | | - /** |
| 95 | | - * Set user-specified operations to perform |
| 96 | | - */ |
| 97 | | - $operations = explode( ',', $this->getOption( 'op' )); |
| 98 | | - // check sanity of specified operations |
| 99 | | - if ( !$this->checkOperations( $operations )) { |
| 100 | | - $this->error( 'Invalid operation specified.', true ); |
| 101 | | - } else { |
| 102 | | - $this->operations = $operations; |
| 103 | | - } |
| 104 | | - |
| 105 | | - /** |
| 106 | | - * Set user-specified rating set threshold |
| 107 | | - */ |
| 108 | | - $rating_set_threshold = $this->getOption( 'rating_sets', $this->rating_set_threshold ); |
| 109 | | - if ( !is_numeric( $rating_set_threshold )) { |
| 110 | | - $this->error( 'Rating sets must be numeric.', true ); |
| 111 | | - } else { |
| 112 | | - $this->rating_set_threshold = $rating_set_threshold; |
| 113 | | - } |
| 114 | | - |
| 115 | | - /** |
| 116 | | - * Set user-specified polling period |
| 117 | | - */ |
| 118 | | - $polling_period = $this->getOption( 'poll_period', $this->polling_period ); |
| 119 | | - if ( !is_numeric( $polling_period )) { |
| 120 | | - $this->error( 'Poll period must be numeric.', true ); |
| 121 | | - } else { |
| 122 | | - $this->polling_period = $polling_period; |
| 123 | | - } |
| 124 | | - |
| 125 | | - // set db objects |
| 126 | | - $this->dbr = wfGetDB( DB_SLAVE ); |
| 127 | | - $this->dbw = wfGetDB( DB_MASTER ); |
| 128 | | - } |
| 129 | | - |
| 130 | | - /** |
| 131 | | - * Check whether or not specified operations are valid. |
| 132 | | - * |
| 133 | | - * A specified operation is considered valid if it exists |
| 134 | | - * as a key in the operation map. |
| 135 | | - * |
| 136 | | - * @param array $ops An array of operations to check |
| 137 | | - * @return bool |
| 138 | | - */ |
| 139 | | - public function checkOperations( array $ops ) { |
| 140 | | - foreach ( $ops as $operation ) { |
| 141 | | - if ( !isset( $this->operation_map[ $operation ] )) { |
| 142 | | - return false; |
| 143 | | - } |
| 144 | | - } |
| 145 | | - return true; |
| 146 | | - } |
| 147 | | - |
| 148 | | - public function execute() { |
| 149 | | - // finish bootstrapping the script |
| 150 | | - $this->bootstrap(); |
| 151 | | - |
| 152 | | - // execute requested operations |
| 153 | | - foreach ( $this->operations as $operation ) { |
| 154 | | - $method = $this->operation_map[ $operation ]; |
| 155 | | - $this->$method(); |
| 156 | | - } |
| 157 | | - } |
| 158 | | - |
| 159 | | - public function populateProblems() { |
| 160 | | - global $wgMemc; |
| 161 | | - |
| 162 | | - /** |
| 163 | | - * Chck to see if we already have a collection of pages to operate on. |
| 164 | | - * If not, generate the collection of pages and their associated ratings. |
| 165 | | - */ |
| 166 | | - if ( !isset( $this->pages )) { |
| 167 | | - $ts = $this->getLowerBoundTimestamp(); |
| 168 | | - $this->pages = $this->populatePageRatingsSince( $ts ); |
| 169 | | - } |
| 170 | | - $problems = array(); |
| 171 | | - // iterate through pages, look for pages that meet criteria for problem articles |
| 172 | | - $this->output( "Finding problem articles ...\n" ); |
| 173 | | - foreach ( $this->pages as $page ) { |
| 174 | | - // make sure that we have more rating sets than the req'd threshold for this page in order to qualify for calculating |
| 175 | | - if ( $page->rating_set_count < $this->rating_set_threshold ) { |
| 176 | | - continue; |
| 177 | | - } |
| 178 | | - |
| 179 | | - if ( $page->isProblematic() ) { |
| 180 | | - $problems[] = $page->page_id; |
| 181 | | - } |
| 182 | | - } |
| 183 | | - |
| 184 | | - // populate stats table with problem articles & associated data |
| 185 | | - // fetch stats type id - add stat type if it's non-existent |
| 186 | | - $stats_type_id = SpecialArticleFeedbackv5::getStatsTypeId( 'problems' ); |
| 187 | | - if ( !$stats_type_id ) { |
| 188 | | - $stats_type_id = $this->addStatType( 'problems' ); |
| 189 | | - } |
| 190 | | - |
| 191 | | - $rows = array(); |
| 192 | | - $cur_ts = $this->dbw->timestamp(); |
| 193 | | - $count = 0; |
| 194 | | - foreach( $problems as $page_id ) { |
| 195 | | - $page = $this->pages->getPage( $page_id ); |
| 196 | | - // calculate the rating averages if they haven't already been calculated |
| 197 | | - if ( !count( $page->rating_averages )) { |
| 198 | | - $page->calculateRatingAverages(); |
| 199 | | - } |
| 200 | | - $rows[] = array( |
| 201 | | - 'afs_page_id' => $page_id, |
| 202 | | - 'afs_orderable_data' => $page->overall_average, |
| 203 | | - 'afs_data' => FormatJson::encode( $page->rating_averages ), |
| 204 | | - 'afs_ts' => $cur_ts, |
| 205 | | - 'afs_stats_type_id' => $stats_type_id, |
| 206 | | - ); |
| 207 | | - |
| 208 | | - $count++; |
| 209 | | - if ( $count >= 50 ) { |
| 210 | | - // No more than 50 |
| 211 | | - // TODO: Get the 50 most problematic articles rather than 50 random problematic ones |
| 212 | | - break; |
| 213 | | - } |
| 214 | | - } |
| 215 | | - $this->output( "Done.\n" ); |
| 216 | | - |
| 217 | | - // Insert the problem rows into the database |
| 218 | | - $this->output( "Writing data to article_feedback_stats ...\n" ); |
| 219 | | - $rowsInserted = 0; |
| 220 | | - // $rows is gonna be modified by array_splice(), so make a copy for later use |
| 221 | | - $rowsCopy = $rows; |
| 222 | | - while( $rows ) { |
| 223 | | - $batch = array_splice( $rows, 0, $this->insert_batch_size ); |
| 224 | | - $this->dbw->insert( |
| 225 | | - 'article_feedback_stats', |
| 226 | | - $batch, |
| 227 | | - __METHOD__ |
| 228 | | - ); |
| 229 | | - $rowsInserted += count( $batch ); |
| 230 | | - $this->syncDBs(); |
| 231 | | - $this->output( "Inserted " . $rowsInserted . " rows\n" ); |
| 232 | | - } |
| 233 | | - $this->output( "Done.\n" ); |
| 234 | | - |
| 235 | | - // populate cache with current problem articles |
| 236 | | - $this->output( "Caching latest problems (if cache present).\n" ); |
| 237 | | - // grab the article feedback special page so we can reuse the data structure building code |
| 238 | | - // FIXME this logic should not be in the special page class |
| 239 | | - $problems = SpecialArticleFeedbackv5::buildProblems( $rowsCopy ); |
| 240 | | - // stash the data structure in the cache |
| 241 | | - $key = wfMemcKey( 'article_feedback_stats_problems' ); |
| 242 | | - $wgMemc->set( $key, $problems, 86400 ); |
| 243 | | - $this->output( "Done.\n" ); |
| 244 | | - } |
| 245 | | - |
| 246 | | - /** |
| 247 | | - * Populate stats about highest/lowest rated articles |
| 248 | | - */ |
| 249 | | - public function populateHighsLows() { |
| 250 | | - global $wgMemc; |
| 251 | | - |
| 252 | | - $averages = array(); // store overall averages for a given page |
| 253 | | - |
| 254 | | - /** |
| 255 | | - * Chck to see if we already have a collection of pages to operate on. |
| 256 | | - * If not, generate the collection of pages and their associated ratings. |
| 257 | | - */ |
| 258 | | - if ( !isset( $this->pages )) { |
| 259 | | - $ts = $this->getLowerBoundTimestamp(); |
| 260 | | - $this->pages = $this->populatePageRatingsSince( $ts ); |
| 261 | | - } |
| 262 | | - |
| 263 | | - // determine the average ratings for a given page |
| 264 | | - $this->output( "Determining average ratings for articles ...\n" ); |
| 265 | | - foreach ( $this->pages as $page ) { |
| 266 | | - // make sure that we have more rating sets than the req'd threshold for this page in order to qualify for ranking |
| 267 | | - if ( $page->rating_set_count < $this->rating_set_threshold ) { |
| 268 | | - continue; |
| 269 | | - } |
| 270 | | - |
| 271 | | - // calculate the rating averages if they haven't already been calculated |
| 272 | | - if ( !count( $page->rating_averages )) { |
| 273 | | - $page->calculateRatingAverages(); |
| 274 | | - } |
| 275 | | - |
| 276 | | - // store overall average rating seperately so we can easily sort |
| 277 | | - $averages[ $page->page_id ] = $page->overall_average; |
| 278 | | - } |
| 279 | | - $this->output( "Done.\n" ); |
| 280 | | - |
| 281 | | - // determine highest 50 and lowest 50 |
| 282 | | - $this->output( "Determining 50 highest and 50 lowest rated articles...\n" ); |
| 283 | | - asort( $averages ); |
| 284 | | - // take lowest 50 and highest 50 |
| 285 | | - $highest_and_lowest_page_ids = array_slice( $averages, 0, 50, true ); |
| 286 | | - if ( count( $averages ) > 50 ) { |
| 287 | | - // in the event that we have < 100 $averages total, this will still |
| 288 | | - // work nicely - it will select duplicate averages, but the += |
| 289 | | - // will cause items with the same keys to essentially be ignored |
| 290 | | - $highest_and_lowest_page_ids += array_slice( $averages, -50, 50, true ); |
| 291 | | - } |
| 292 | | - $this->output( "Done\n" ); |
| 293 | | - |
| 294 | | - // fetch stats type id - add stat type if it's non-existant |
| 295 | | - $stats_type_id = SpecialArticleFeedbackv5::getStatsTypeId( 'highs_and_lows' ); |
| 296 | | - if ( !$stats_type_id ) { |
| 297 | | - $stats_type_id = $this->addStatType( 'highs_and_lows' ); |
| 298 | | - } |
| 299 | | - |
| 300 | | - // prepare data for insert into db |
| 301 | | - $this->output( "Preparing data for db insertion ...\n"); |
| 302 | | - $cur_ts = $this->dbw->timestamp(); |
| 303 | | - $rows = array(); |
| 304 | | - foreach( $highest_and_lowest_page_ids as $page_id => $overall_average ) { |
| 305 | | - $page = $this->pages->getPage( $page_id ); |
| 306 | | - $rows[] = array( |
| 307 | | - 'afs_page_id' => $page_id, |
| 308 | | - 'afs_orderable_data' => $page->overall_average, |
| 309 | | - 'afs_data' => FormatJson::encode( $page->rating_averages ), |
| 310 | | - 'afs_ts' => $cur_ts, |
| 311 | | - 'afs_stats_type_id' => $stats_type_id, |
| 312 | | - ); |
| 313 | | - } |
| 314 | | - $this->output( "Done.\n" ); |
| 315 | | - |
| 316 | | - // insert data to db |
| 317 | | - $this->output( "Writing data to article_feedback_stats ...\n" ); |
| 318 | | - $rowsInserted = 0; |
| 319 | | - // $rows is gonna be modified by array_splice(), so make a copy for later use |
| 320 | | - $rowsCopy = $rows; |
| 321 | | - while( $rows ) { |
| 322 | | - $batch = array_splice( $rows, 0, $this->insert_batch_size ); |
| 323 | | - $this->dbw->insert( |
| 324 | | - 'article_feedback_stats', |
| 325 | | - $batch, |
| 326 | | - __METHOD__ |
| 327 | | - ); |
| 328 | | - $rowsInserted += count( $batch ); |
| 329 | | - $this->syncDBs(); |
| 330 | | - $this->output( "Inserted " . $rowsInserted . " rows\n" ); |
| 331 | | - } |
| 332 | | - $this->output( "Done.\n" ); |
| 333 | | - |
| 334 | | - // loading data into cache |
| 335 | | - $this->output( "Caching latest highs/lows (if cache present).\n" ); |
| 336 | | - $key = wfMemcKey( 'article_feedback_stats_highs_lows' ); |
| 337 | | - // grab the article feedback special page so we can reuse the data structure building code |
| 338 | | - // FIXME this logic should not be in the special page class |
| 339 | | - $highs_lows = SpecialArticleFeedbackv5::buildHighsAndLows( $rowsCopy ); |
| 340 | | - // stash the data structure in the cache |
| 341 | | - $wgMemc->set( $key, $highs_lows, 86400 ); |
| 342 | | - $this->output( "Done\n" ); |
| 343 | | - } |
| 344 | | - |
| 345 | | - /** |
| 346 | | - * Fetch ratings newer than a given time stamp. |
| 347 | | - * |
| 348 | | - * If no timestamp is provided, relies on $this->lowerBoundTimestamp |
| 349 | | - * @param numeric $ts |
| 350 | | - * @return database result object |
| 351 | | - */ |
| 352 | | - public function fetchRatingsNewerThanTs( $ts=null ) { |
| 353 | | - if ( !$ts ) { |
| 354 | | - $ts = $this->getLowerBoundTimestamp(); |
| 355 | | - } |
| 356 | | - |
| 357 | | - if ( !is_numeric( $ts )) { |
| 358 | | - throw new InvalidArgumentException( 'Timestamp expected to be numeric.' ); |
| 359 | | - } |
| 360 | | - |
| 361 | | - $res = $this->dbr->select( |
| 362 | | - 'article_feedback', |
| 363 | | - array( |
| 364 | | - 'aa_revision', |
| 365 | | - 'aa_user_text', |
| 366 | | - 'aa_rating_id', |
| 367 | | - 'aa_user_anon_token', |
| 368 | | - 'aa_page_id', |
| 369 | | - 'aa_rating_value', |
| 370 | | - ), |
| 371 | | - array( 'aa_timestamp >= ' . $this->dbr->addQuotes( $this->dbr->timestamp( $ts ) ) ), |
| 372 | | - __METHOD__, |
| 373 | | - array() |
| 374 | | - ); |
| 375 | | - |
| 376 | | - return $res; |
| 377 | | - } |
| 378 | | - |
| 379 | | - /** |
| 380 | | - * Construct collection of pages and their ratings since a given time stamp |
| 381 | | - * @param $ts |
| 382 | | - * @return object The colelction of pages |
| 383 | | - */ |
| 384 | | - public function populatePageRatingsSince( $ts ) { |
| 385 | | - $pages = new AFPages(); |
| 386 | | - // fetch the ratings since the lower bound timestamp |
| 387 | | - $this->output( 'Fetching page ratings between now and ' . date( 'Y-m-d H:i:s', strtotime( $ts )) . "...\n" ); |
| 388 | | - $res = $this->fetchRatingsNewerThanTs( $ts ); |
| 389 | | - $this->output( "Done.\n" ); |
| 390 | | - |
| 391 | | - // assign the rating data to our data structure |
| 392 | | - $this->output( "Assigning fetched ratings to internal data structure ...\n" ); |
| 393 | | - foreach ( $res as $row ) { |
| 394 | | - // fetch the page from the page store referentially so we can |
| 395 | | - // perform actions on it that will automagically be saved in the |
| 396 | | - // object for easy access later |
| 397 | | - |
| 398 | | - $page =& $pages->getPage( $row->aa_page_id ); |
| 399 | | - |
| 400 | | - // determine the unique hash for a given rating set (page rev + user identifying info) |
| 401 | | - $rating_hash = $row->aa_revision . "|" . $row->aa_user_text . "|" . $row->aa_user_anon_token; |
| 402 | | - |
| 403 | | - // add rating data for this page |
| 404 | | - $page->addRating( $row->aa_rating_id, $row->aa_rating_value, $rating_hash ); |
| 405 | | - } |
| 406 | | - $this->output( "Done.\n" ); |
| 407 | | - return $pages; |
| 408 | | - } |
| 409 | | - |
| 410 | | - /** |
| 411 | | - * Set $this->timestamp |
| 412 | | - * @param int $ts |
| 413 | | - */ |
| 414 | | - public function setLowerBoundTimestamp( $ts ) { |
| 415 | | - if ( !is_numeric( $ts )) { |
| 416 | | - throw new InvalidArgumentException( 'Timestamp must be numeric.' ); |
| 417 | | - } |
| 418 | | - $this->lowerBoundTimestamp = $ts; |
| 419 | | - } |
| 420 | | - |
| 421 | | - |
| 422 | | - /** |
| 423 | | - * Get $this->lowerBoundTimestamp |
| 424 | | - * |
| 425 | | - * If it hasn't been set yet, set it based on the defined polling period. |
| 426 | | - * |
| 427 | | - * @return int |
| 428 | | - */ |
| 429 | | - public function getLowerBoundTimestamp() { |
| 430 | | - if ( !$this->lowerBoundTimestamp ) { |
| 431 | | - $timestamp = $this->dbw->timestamp( strtotime( $this->polling_period . ' seconds ago' )); |
| 432 | | - $this->setLowerBoundTimestamp( $timestamp ); |
| 433 | | - } |
| 434 | | - return $this->lowerBoundTimestamp; |
| 435 | | - } |
| 436 | | - |
| 437 | | - /** |
| 438 | | - * Add stat type record to article_feedbak_stats_types |
| 439 | | - * @param string $stat_type The identifying name of the stat type (eg 'highs_lows') |
| 440 | | - */ |
| 441 | | - public function addStatType( $stat_type ) { |
| 442 | | - $this->dbw->insert( |
| 443 | | - 'article_feedback_stats', |
| 444 | | - array( 'afst_type' => $stat_type ), |
| 445 | | - __METHOD__ |
| 446 | | - ); |
| 447 | | - return $this->dbw->insertId(); |
| 448 | | - } |
| 449 | | -} |
| 450 | | - |
| 451 | | -/** |
| 452 | | - * A class to represent a page and data about its ratings |
| 453 | | - */ |
| 454 | | -class AFPage { |
| 455 | | - public $page_id; |
| 456 | | - |
| 457 | | - /** |
| 458 | | - * The number of rating sets recorded for this page |
| 459 | | - * @var int |
| 460 | | - */ |
| 461 | | - public $rating_set_count = 0; |
| 462 | | - |
| 463 | | - /** |
| 464 | | - * An array of ratings for this page |
| 465 | | - * @var array |
| 466 | | - */ |
| 467 | | - public $ratings = array(); |
| 468 | | - |
| 469 | | - /** |
| 470 | | - * An array to hold mean ratings by rating type id |
| 471 | | - * @var array |
| 472 | | - */ |
| 473 | | - public $rating_averages = array(); |
| 474 | | - |
| 475 | | - /** |
| 476 | | - * Mean of all ratings for this page |
| 477 | | - * @var float |
| 478 | | - */ |
| 479 | | - public $overall_average; |
| 480 | | - |
| 481 | | - /** |
| 482 | | - * An array of rating set hashes, which are used to identify unique sets of |
| 483 | | - * ratings |
| 484 | | - * @var array |
| 485 | | - */ |
| 486 | | - protected $rating_set_hashes = array(); |
| 487 | | - |
| 488 | | - public function __construct( $page_id ) { |
| 489 | | - if ( !is_numeric( $page_id )) { |
| 490 | | - throw new Exception( 'Page id must be numeric.' ); |
| 491 | | - } |
| 492 | | - $this->page_id = $page_id; |
| 493 | | - } |
| 494 | | - |
| 495 | | - /** |
| 496 | | - * Add a new rating for this particular page |
| 497 | | - * @param int $rating_id |
| 498 | | - * @param int $rating_value |
| 499 | | - * @param string $rating_set_hash |
| 500 | | - */ |
| 501 | | - public function addRating( $rating_id, $rating_value, $rating_set_hash = null ) { |
| 502 | | - if ( intval( $rating_value ) == 0 ) { |
| 503 | | - // Ignore zero ratings |
| 504 | | - return; |
| 505 | | - } |
| 506 | | - |
| 507 | | - $this->ratings[ $rating_id ][] = $rating_value; |
| 508 | | - |
| 509 | | - if ( $rating_set_hash ) { |
| 510 | | - $this->trackRatingSet( $rating_set_hash ); |
| 511 | | - } |
| 512 | | - } |
| 513 | | - |
| 514 | | - /** |
| 515 | | - * Keep track of rating sets |
| 516 | | - * |
| 517 | | - * Record when we see a new rating set and increment the set count |
| 518 | | - * @param string $rating_set_hash |
| 519 | | - */ |
| 520 | | - protected function trackRatingSet( $rating_set_hash ) { |
| 521 | | - if ( isset( $this->rating_set_hashes[ $rating_set_hash ] )) { |
| 522 | | - return; |
| 523 | | - } |
| 524 | | - |
| 525 | | - $this->rating_set_hashes[ $rating_set_hash ] = 1; |
| 526 | | - $this->rating_set_count += 1; |
| 527 | | - } |
| 528 | | - |
| 529 | | - public function calculateRatingAverages() { |
| 530 | | - // determine averages for each rating type |
| 531 | | - foreach( $this->ratings as $rating_id => $rating ) { |
| 532 | | - $rating_sum = array_sum( $rating ); |
| 533 | | - $rating_avg = $rating_sum / count( $rating ); |
| 534 | | - $this->rating_averages[ $rating_id ] = $rating_avg; |
| 535 | | - } |
| 536 | | - |
| 537 | | - // determine overall rating average for this page |
| 538 | | - if ( count( $this->rating_averages )) { |
| 539 | | - $overall_rating_sum = array_sum( $this->rating_averages ); |
| 540 | | - $overall_rating_average = $overall_rating_sum / count( $this->rating_averages ); |
| 541 | | - } else { |
| 542 | | - $overall_rating_average = 0; |
| 543 | | - } |
| 544 | | - $this->overall_average = $overall_rating_average; |
| 545 | | - } |
| 546 | | - |
| 547 | | - /** |
| 548 | | - * Returns whether or not this page is considered problematic |
| 549 | | - * @return bool |
| 550 | | - */ |
| 551 | | - public function isProblematic() { |
| 552 | | - if ( !isset( $this->problematic )) { |
| 553 | | - $this->determineProblematicStatus(); |
| 554 | | - } |
| 555 | | - return $this->problematic; |
| 556 | | - } |
| 557 | | - |
| 558 | | - /** |
| 559 | | - * Determine whether this article is 'problematic' |
| 560 | | - * |
| 561 | | - * If a page has one or more rating categories where 70% of the ratings are |
| 562 | | - * <= 2, it is considered problematic. |
| 563 | | - */ |
| 564 | | - public function determineProblematicStatus() { |
| 565 | | - foreach( $this->ratings as $rating_id => $ratings ) { |
| 566 | | - $count = 0; |
| 567 | | - foreach ( $ratings as $rating ) { |
| 568 | | - if ( $rating <= 2 ) { |
| 569 | | - $count += 1; |
| 570 | | - } |
| 571 | | - } |
| 572 | | - |
| 573 | | - $threshold = round( 0.7 * count( $ratings )); |
| 574 | | - if ( $count >= $threshold ) { |
| 575 | | - $this->problematic = true; |
| 576 | | - return; |
| 577 | | - } |
| 578 | | - } |
| 579 | | - |
| 580 | | - $this->problematic = false; |
| 581 | | - return; |
| 582 | | - } |
| 583 | | -} |
| 584 | | - |
| 585 | | -/** |
| 586 | | - * A storage class to keep track of PageRatings object by page |
| 587 | | - * |
| 588 | | - * Iterable on array of pages. |
| 589 | | - */ |
| 590 | | -class AFPages implements IteratorAggregate { |
| 591 | | - /** |
| 592 | | - * An array of page rating objects |
| 593 | | - * @var array |
| 594 | | - */ |
| 595 | | - public $pages = array(); |
| 596 | | - |
| 597 | | - public function &getPage( $page_id ) { |
| 598 | | - if ( !isset( $this->pages[ $page_id ] )) { |
| 599 | | - $this->addPage( $page_id ); |
| 600 | | - } |
| 601 | | - return $this->pages[ $page_id ]; |
| 602 | | - } |
| 603 | | - |
| 604 | | - public function addPage( $page_id ) { |
| 605 | | - $this->pages[ $page_id ] = new AFPage( $page_id ); |
| 606 | | - } |
| 607 | | - |
| 608 | | - public function getIterator() { |
| 609 | | - return new ArrayIterator( $this->pages ); |
| 610 | | - } |
| 611 | | -} |
| 612 | | - |
| 613 | | -$maintClass = "PopulateAFStatistics"; |
| 614 | | -require_once( DO_MAINTENANCE ); |
| Index: trunk/extensions/ArticleFeedbackv5/populateAFRevisions.php |
| — | — | @@ -1,144 +0,0 @@ |
| 2 | | -<?php |
| 3 | | - |
| 4 | | -$IP = getenv( 'MW_INSTALL_PATH' ); |
| 5 | | -if ( $IP === false ) { |
| 6 | | - $IP = dirname( __FILE__ ) . '/../..'; |
| 7 | | -} |
| 8 | | -require( "$IP/maintenance/Maintenance.php" ); |
| 9 | | - |
| 10 | | -class PopulateAFRevisions extends Maintenance { |
| 11 | | - const REPORTING_INTERVAL = 100; |
| 12 | | - const BATCH_SIZE = 100; |
| 13 | | - |
| 14 | | - public function __construct() { |
| 15 | | - parent::__construct(); |
| 16 | | - $this->mDescription = "Populates the article_feedback_revisions table"; |
| 17 | | - } |
| 18 | | - |
| 19 | | - public function syncDBs() { |
| 20 | | - // FIXME: Copied from updateCollation.php, should be centralized somewhere |
| 21 | | - $lb = wfGetLB(); |
| 22 | | - // bug 27975 - Don't try to wait for slaves if there are none |
| 23 | | - // Prevents permission error when getting master position |
| 24 | | - if ( $lb->getServerCount() > 1 ) { |
| 25 | | - $dbw = $lb->getConnection( DB_MASTER ); |
| 26 | | - $pos = $dbw->getMasterPos(); |
| 27 | | - $lb->waitForAll( $pos ); |
| 28 | | - } |
| 29 | | - } |
| 30 | | - |
| 31 | | - public function execute() { |
| 32 | | - global $wgArticleFeedbackRatingTypes; |
| 33 | | - |
| 34 | | - $this->output( "Populating article_feedback_revisions table ...\n" ); |
| 35 | | - |
| 36 | | - // Data structure where we accumulate the data |
| 37 | | - // We need this because more recent ratings of the same user to the same page |
| 38 | | - // need to overwrite older ratings |
| 39 | | - // array( pageid => array( 'userid|anontoken' => array( 'revid' => revid, 'ratings' => array( id => value ) ) ) ) |
| 40 | | - $data = array(); |
| 41 | | - |
| 42 | | - $lastRevID = 0; |
| 43 | | - $i = 0; |
| 44 | | - $dbw = wfGetDB( DB_MASTER ); |
| 45 | | - $this->output( "Reading data from article_feedback ...\n" ); |
| 46 | | - while ( true ) { |
| 47 | | - // Get the next revision ID |
| 48 | | - $row = $dbw->selectRow( 'article_feedback', array( 'aa_revision', 'aa_page_id' ), |
| 49 | | - "aa_revision > $lastRevID", __METHOD__, |
| 50 | | - array( 'ORDER BY' => 'aa_revision', 'LIMIT' => 1 ) |
| 51 | | - ); |
| 52 | | - if ( $row === false ) { |
| 53 | | - // No next revision, we're done |
| 54 | | - break; |
| 55 | | - } |
| 56 | | - $revid = intval( $row->aa_revision ); |
| 57 | | - $pageid = intval( $row->aa_page_id ); |
| 58 | | - |
| 59 | | - // Get all article_feedback rows for this revision |
| 60 | | - $res = $dbw->select( 'article_feedback', |
| 61 | | - array( 'aa_rating_id', 'aa_rating_value', 'aa_user_id', 'aa_user_anon_token' ), |
| 62 | | - array( 'aa_revision' => $revid ), |
| 63 | | - __METHOD__ |
| 64 | | - ); |
| 65 | | - |
| 66 | | - // Initialize counts and sums for each rating |
| 67 | | - // If array_keys( $wgArticleFeedbackRatingTypes ) = array( 1, 2, 3, 4 ) this initializes them |
| 68 | | - // to array( 1 => 0, 2 => 0, 3 => 0, 4 => 0 ) |
| 69 | | - $counts = $sums = array_combine( array_keys( $wgArticleFeedbackRatingTypes ), |
| 70 | | - array_fill( 0, count( $wgArticleFeedbackRatingTypes ), 0 ) |
| 71 | | - ); |
| 72 | | - |
| 73 | | - // Process each of the queried rows and update $data |
| 74 | | - foreach ( $res as $row ) { |
| 75 | | - $u = "{$row->aa_user_id}|{$row->aa_user_anon_token}"; |
| 76 | | - // Add entry if not present |
| 77 | | - if ( !isset( $data[$pageid][$u] ) ) { |
| 78 | | - $data[$pageid][$u] = array( 'revid' => $revid ); |
| 79 | | - } |
| 80 | | - // Update the entry if this row belongs to the same or a more recent revision |
| 81 | | - // for the specific user |
| 82 | | - if ( $data[$pageid][$u]['revid'] <= $revid ) { |
| 83 | | - $data[$pageid][$u]['ratings'][$row->aa_rating_id] = $row->aa_rating_value; |
| 84 | | - $data[$pageid][$u]['revid'] = $revid; |
| 85 | | - } |
| 86 | | - } |
| 87 | | - |
| 88 | | - $lastRevID = $revid; |
| 89 | | - |
| 90 | | - $i++; |
| 91 | | - if ( $i % self::REPORTING_INTERVAL ) { |
| 92 | | - $this->output( "$lastRevID\n" ); |
| 93 | | - } |
| 94 | | - } |
| 95 | | - $this->output( "done\n" ); |
| 96 | | - |
| 97 | | - // Reorganize the data into per-revision counts and totals |
| 98 | | - $data2 = array(); // array( revid => array( 'pageid' => pageid, 'ratings' => array( ratingid => array( 'count' => count, 'total' => total ) ) |
| 99 | | - foreach ( $data as $pageid => $pageData ) { |
| 100 | | - foreach ( $pageData as $user => $userData ) { |
| 101 | | - $data2[$userData['revid']]['pageid'] = $pageid; |
| 102 | | - foreach ( $userData['ratings'] as $id => $value ) { |
| 103 | | - if ( !isset( $data2[$userData['revid']]['ratings'][$id] ) ) { |
| 104 | | - $data2[$userData['revid']]['ratings'][$id] = array( 'count' => 0, 'total' => 0 ); |
| 105 | | - } |
| 106 | | - if ( $value > 0 ) { |
| 107 | | - $data2[$userData['revid']]['ratings'][$id]['count']++; |
| 108 | | - } |
| 109 | | - $data2[$userData['revid']]['ratings'][$id]['total'] += $value; |
| 110 | | - } |
| 111 | | - } |
| 112 | | - } |
| 113 | | - // Reorganize the data again, into DB rows this time |
| 114 | | - $rows = array(); |
| 115 | | - foreach ( $data2 as $revid => $revData ) { |
| 116 | | - foreach ( $revData['ratings'] as $ratingID => $ratingData ) { |
| 117 | | - $rows[] = array( |
| 118 | | - 'afr_page_id' => $revData['pageid'], |
| 119 | | - 'afr_revision' => $revid, |
| 120 | | - 'afr_rating_id' => $ratingID, |
| 121 | | - 'afr_total' => $ratingData['total'], |
| 122 | | - 'afr_count' => $ratingData['count'] |
| 123 | | - ); |
| 124 | | - } |
| 125 | | - } |
| 126 | | - |
| 127 | | - $this->output( "Writing data to article_feedback_revisions ...\n" ); |
| 128 | | - $rowsInserted = 0; |
| 129 | | - while ( $rows ) { |
| 130 | | - $batch = array_splice( $rows, 0, self::BATCH_SIZE ); |
| 131 | | - $dbw->replace( 'article_feedback_revisions', |
| 132 | | - array( array( 'afr_page_id', 'afr_rating_id', 'afr_revision' ) ), |
| 133 | | - $batch, __METHOD__ |
| 134 | | - ); |
| 135 | | - $rowsInserted += count( $batch ); |
| 136 | | - $this->syncDBs(); |
| 137 | | - $this->output( "$rowsInserted rows\n" ); |
| 138 | | - } |
| 139 | | - $this->output( "done\n" ); |
| 140 | | - |
| 141 | | - } |
| 142 | | -} |
| 143 | | - |
| 144 | | -$maintClass = "PopulateAFRevisions"; |
| 145 | | -require_once( DO_MAINTENANCE ); |
| \ No newline at end of file |
| Index: trunk/extensions/ArticleFeedbackv5/sql/FixPropertiesAnonTokenSchema.sql |
| — | — | @@ -1 +0,0 @@ |
| 2 | | -ALTER TABLE /*_*/article_feedback_properties MODIFY afp_user_anon_token varbinary(32) NOT NULL DEFAULT ''; |
| Index: trunk/extensions/ArticleFeedbackv5/sql/AddRevisionsTable.sql |
| — | — | @@ -1,15 +0,0 @@ |
| 2 | | -CREATE TABLE IF NOT EXISTS /*_*/article_feedback_revisions ( |
| 3 | | - -- Foreign key to page.page_id |
| 4 | | - afr_page_id integer unsigned NOT NULL, |
| 5 | | - -- Revision that totals are relevant to |
| 6 | | - afr_revision integer unsigned NOT NULL, |
| 7 | | - -- Rating ID, mapped to a name in $wgArticleFeedbackRatingTypes |
| 8 | | - afr_rating_id integer unsigned NOT NULL, |
| 9 | | - -- Sum (total) of all the ratings for this article revision |
| 10 | | - afr_total integer unsigned NOT NULL, |
| 11 | | - -- Number of ratings |
| 12 | | - afr_count integer unsigned NOT NULL, |
| 13 | | - -- One rating row per page |
| 14 | | - PRIMARY KEY (afr_page_id, afr_rating_id, afr_revision) |
| 15 | | -) /*$wgDBTableOptions*/; |
| Index: trunk/extensions/ArticleFeedbackv5/sql/RenameTables.sql |
| — | — | @@ -1,3 +0,0 @@ |
| 2 | | -RENAME TABLE /*_*/article_assessment_ratings TO /*_*/article_feedback_ratings, |
| 3 | | - /*_*/article_assessment TO /*_*/article_feedback, |
| 4 | | - /*_*/article_assessment_pages TO /*_*/article_feedback_pages; |
| \ No newline at end of file |
| Index: trunk/extensions/ArticleFeedbackv5/sql/FixAnonTokenSchema.sql |
| — | — | @@ -1 +0,0 @@ |
| 2 | | -ALTER TABLE /*_*/article_feedback MODIFY aa_user_anon_token varbinary(32) NOT NULL DEFAULT ''; |
| Index: trunk/extensions/ArticleFeedbackv5/sql/RecreatePK.sql |
| — | — | @@ -1,38 +0,0 @@ |
| 2 | | - |
| 3 | | - |
| 4 | | -CREATE TABLE /*_*/article_feedback2 ( |
| 5 | | - aa_id integer unsigned NOT NULL PRIMARY KEY AUTO_INCREMENT, |
| 6 | | - aa_page_id integer unsigned NOT NULL, |
| 7 | | - aa_user_id integer NOT NULL, |
| 8 | | - aa_user_text varbinary(255) NOT NULL, |
| 9 | | - aa_user_anon_token varbinary(32) NOT NULL DEFAULT '', |
| 10 | | - aa_revision integer unsigned NOT NULL, |
| 11 | | - aa_timestamp binary(14) NOT NULL DEFAULT '', |
| 12 | | - aa_rating_id int unsigned NOT NULL, |
| 13 | | - aa_rating_value int unsigned NOT NULL, |
| 14 | | - aa_design_bucket int unsigned NOT NULL DEFAULT 0 |
| 15 | | -) /*$wgDBTableOptions*/; |
| 16 | | -CREATE INDEX /*i*/aa_page_user_token_id ON /*_*/article_feedback2 (aa_page_id, aa_user_text, aa_user_anon_token, aa_id); |
| 17 | | -CREATE INDEX /*i*/aa_revision ON /*_*/article_feedback2 (aa_revision); |
| 18 | | -CREATE INDEX /*i*/article_feedback_timestamp ON /*_*/article_feedback2 (aa_timestamp); |
| 19 | | - |
| 20 | | -INSERT INTO /*_*/article_feedback2 |
| 21 | | - (aa_page_id, aa_user_id, aa_user_text, aa_user_anon_token, aa_revision, aa_timestamp, aa_rating_id, aa_rating_value, aa_design_bucket) |
| 22 | | - SELECT aa_page_id, aa_user_id, aa_user_text, aa_user_anon_token, aa_revision, aa_timestamp, aa_rating_id, aa_rating_value, aa_design_bucket |
| 23 | | - FROM /*_*/article_feedback |
| 24 | | - ORDER BY aa_revision, aa_user_text, aa_rating_id, aa_user_anon_token; |
| 25 | | - |
| 26 | | -DROP TABLE /*_*/article_feedback; |
| 27 | | -ALTER TABLE /*_*/article_feedback2 RENAME TO /*_*/article_feedback; |
| Index: trunk/extensions/ArticleFeedbackv5/sql/AddArticleFeedbackPageIndex.sql |
| — | — | @@ -1 +0,0 @@ |
| 2 | | -CREATE INDEX /*i*/aa_page_id ON /*_*/article_feedback (aa_page_id, aa_timestamp); |
| Index: trunk/extensions/ArticleFeedbackv5/sql/AddPropertiesValueText.sql |
| — | — | @@ -1,2 +0,0 @@ |
| 2 | | -ALTER TABLE /*_*/article_feedback_properties |
| 3 | | - ADD afp_value_text varbinary(255) DEFAULT '' NOT NULL; |
| \ No newline at end of file |
| Index: trunk/extensions/ArticleFeedbackv5/sql/AddArticleFeedbackStatsTable.sql |
| — | — | @@ -1,13 +0,0 @@ |
| 2 | | -DROP TABLE IF EXISTS article_feedback_stats; |
| 3 | | -CREATE TABLE IF NOT EXISTS /*_*/article_feedback_stats ( |
| 4 | | - afs_page_id integer unsigned NOT NULL, |
| 5 | | - -- data point to be used for ordering this data |
| 6 | | - afs_orderable_data double unsigned NOT NULL, |
| 7 | | - -- json object of stat data |
| 8 | | - afs_data varbinary(255) NOT NULL, |
| 9 | | - afs_stats_type_id integer unsigned NOT NULL, |
| 10 | | - -- timestamp of insertion job |
| 11 | | - afs_ts binary(14) NOT NULL |
| 12 | | -) /*$wgDBTableOptions*/; |
| 13 | | -CREATE UNIQUE INDEX /*i*/afs_type_ts_page ON /*_*/article_feedback_stats(afs_stats_type_id, afs_ts, afs_page_id); |
| 14 | | -CREATE INDEX /*i*/ afs_type_ts_orderable ON /*_*/article_feedback_stats (afs_stats_type_id, afs_ts, afs_orderable_data); |
| Index: trunk/extensions/ArticleFeedbackv5/sql/MigrateArticleFeedbackStatsHighsLows.sql |
| — | — | @@ -1,22 +0,0 @@ |
| 2 | | -INSERT INTO /*_*/article_feedback_stats ( |
| 3 | | - afs_page_id, |
| 4 | | - afs_orderable_data, |
| 5 | | - afs_data, |
| 6 | | - afs_ts, |
| 7 | | - afs_stats_type_id |
| 8 | | -) |
| 9 | | -SELECT |
| 10 | | - afshl_page_id, |
| 11 | | - afshl_avg_overall, |
| 12 | | - afshl_avg_ratings, |
| 13 | | - afshl_ts, |
| 14 | | - afst_id |
| 15 | | -FROM |
| 16 | | - /*_*/article_feedback_stats_highs_lows, |
| 17 | | - /*_*/article_feedback_stats_types |
| 18 | | -WHERE |
| 19 | | - /*_*/article_feedback_stats_types.afst_type='highs_and_lows'; |
| 20 | | - |
| 21 | | -DROP TABLE /*_*/article_feedback_stats_highs_lows; |
| Index: trunk/extensions/ArticleFeedbackv5/sql/AddArticleFeedbackStatsTypeTable.sql |
| — | — | @@ -1,9 +0,0 @@ |
| 2 | | -CREATE TABLE IF NOT EXISTS /*_*/article_feedback_stats_types ( |
| 3 | | - afst_id integer unsigned NOT NULL PRIMARY KEY AUTO_INCREMENT, |
| 4 | | - afst_type varbinary(255) NOT NULL |
| 5 | | -) /*$wgDBTableOptions*/; |
| 6 | | -CREATE UNIQUE INDEX /*i*/afst_type ON /*_*/article_feedback_stats_types( afst_type ); |
| 7 | | - |
| 8 | | -INSERT INTO article_feedback_stats_types ( afst_type ) VALUES ( 'highs_and_lows' ); |
| 9 | | -INSERT INTO article_feedback_stats_types ( afst_type ) VALUES ( 'problems' ); |
| \ No newline at end of file |
| Index: trunk/extensions/ArticleFeedbackv5/sql/AddRatingBucket.sql |
| — | — | @@ -1,2 +0,0 @@ |
| 2 | | -ALTER TABLE /*_*/article_feedback |
| 3 | | - ADD aa_design_bucket int unsigned NOT NULL DEFAULT 0; |
| \ No newline at end of file |
| Index: trunk/extensions/ArticleFeedbackv5/sql/AddArticleFeedbackTimestampIndex.sql |
| — | — | @@ -1,2 +0,0 @@ |
| 2 | | -CREATE INDEX /*i*/article_feedback_timestamp ON /*_*/article_feedback (aa_timestamp); |
| \ No newline at end of file |
| Index: trunk/extensions/ArticleFeedbackv5/sql/AddPropertiesTable.sql |
| — | — | @@ -1,15 +0,0 @@ |
| 2 | | -CREATE TABLE /*_*/article_feedback_properties ( |
| 3 | | - -- Keys to the primary key fields in article_feedback, except aa_rating_id |
| 4 | | - -- article_feedback doesn't have a nice PK, blegh |
| 5 | | - afp_revision integer unsigned NOT NULL, |
| 6 | | - afp_user_text varbinary(255) NOT NULL, |
| 7 | | - afp_user_anon_token varbinary(32) NOT NULL DEFAULT '', |
| 8 | | - |
| 9 | | - -- Key/value pairs |
| 10 | | - afp_key varbinary(255) NOT NULL, |
| 11 | | - -- Integer value |
| 12 | | - afp_value integer signed NOT NULL, |
| 13 | | - -- Text value |
| 14 | | - afp_value_text varbinary(255) DEFAULT '' NOT NULL |
| 15 | | -) /*$wgDBTableOptions*/; |
| 16 | | -CREATE UNIQUE INDEX /*i*/afp_rating_key ON /*_*/article_feedback_properties (afp_revision, afp_user_text, afp_user_anon_token, afp_key); |