Index: trunk/extensions/ArticleFeedback/populateAFStatistics.php |
— | — | @@ -12,65 +12,65 @@ |
13 | 13 | * @var int |
14 | 14 | */ |
15 | 15 | public $insert_batch_size = 100; |
16 | | - |
| 16 | + |
17 | 17 | /** |
18 | 18 | * The period (in seconds) before now for which to gather stats |
19 | 19 | * @var int |
20 | 20 | */ |
21 | 21 | public $polling_period = 86400; |
22 | | - |
| 22 | + |
23 | 23 | /** |
24 | 24 | * The formatted timestamp from which to determine stats |
25 | 25 | * @var int |
26 | 26 | */ |
27 | 27 | protected $lowerBoundTimestamp; |
28 | | - |
| 28 | + |
29 | 29 | /** |
30 | 30 | * DB slave |
31 | 31 | * @var object |
32 | 32 | */ |
33 | 33 | protected $dbr; |
34 | | - |
| 34 | + |
35 | 35 | /** |
36 | 36 | * DB master |
37 | 37 | * @var object |
38 | 38 | */ |
39 | 39 | protected $dbw; |
40 | | - |
| 40 | + |
41 | 41 | /** |
42 | 42 | * Valid operations and their execution methods for this script to perform |
43 | | - * |
| 43 | + * |
44 | 44 | * Operations are passed in as options during run-time - only valid options, |
45 | 45 | * which are defined here, can be executed. Valid operations are mapped here |
46 | 46 | * to a corresponding method ( array( 'operation' => 'method' )) |
47 | 47 | * @var array |
48 | 48 | */ |
49 | | - protected $operation_map = array( |
50 | | - 'highslows' => 'populateHighsLows', |
| 49 | + protected $operation_map = array( |
| 50 | + 'highslows' => 'populateHighsLows', |
51 | 51 | 'problems' => 'populateProblems', |
52 | 52 | ); |
53 | | - |
| 53 | + |
54 | 54 | /** |
55 | 55 | * Operations to execute |
56 | 56 | * @var array |
57 | 57 | */ |
58 | 58 | public $operations = array(); |
59 | | - |
| 59 | + |
60 | 60 | /** |
61 | 61 | * The minimum number of rating sets required before taking some action |
62 | 62 | * @var int |
63 | 63 | */ |
64 | 64 | public $rating_set_threshold = 10; |
65 | | - |
| 65 | + |
66 | 66 | public function __construct() { |
67 | 67 | parent::__construct(); |
68 | 68 | $this->mDescription = "Populates the article feedback stats tables"; |
69 | | - |
| 69 | + |
70 | 70 | $this->addOption( 'op', 'The ArticleFeedback stats gathering operation to run (eg "highslows"). Can specify multiple operations, separated by comma.', true, true ); |
71 | 71 | $this->addOption( 'rating_sets', 'The minimum number of rating sets before taking an action.', false, true ); |
72 | 72 | $this->addOption( 'poll_period', 'The polling period for fetching data, in seconds.', false, true ); |
73 | 73 | } |
74 | | - |
| 74 | + |
75 | 75 | public function syncDBs() { |
76 | 76 | // FIXME: Copied from populateAFRevisions.php, which coppied from updateCollation.php, should be centralized somewhere |
77 | 77 | $lb = wfGetLB(); |
— | — | @@ -82,11 +82,11 @@ |
83 | 83 | $lb->waitForAll( $pos ); |
84 | 84 | } |
85 | 85 | } |
86 | | - |
| 86 | + |
87 | 87 | /** |
88 | 88 | * Bootstrap this maintenance script |
89 | | - * |
90 | | - * Performs operations necessary for this maintenance script to run which |
| 89 | + * |
| 90 | + * Performs operations necessary for this maintenance script to run which |
91 | 91 | * cannot or do not make sense to run in the constructor. |
92 | 92 | */ |
93 | 93 | public function bootstrap() { |
— | — | @@ -120,18 +120,18 @@ |
121 | 121 | } else { |
122 | 122 | $this->polling_period = $polling_period; |
123 | 123 | } |
124 | | - |
| 124 | + |
125 | 125 | // set db objects |
126 | 126 | $this->dbr = wfGetDB( DB_SLAVE ); |
127 | 127 | $this->dbw = wfGetDB( DB_MASTER ); |
128 | 128 | } |
129 | | - |
| 129 | + |
130 | 130 | /** |
131 | 131 | * Check whether or not specified operations are valid. |
132 | | - * |
| 132 | + * |
133 | 133 | * A specified operation is considered valid if it exists |
134 | 134 | * as a key in the operation map. |
135 | | - * |
| 135 | + * |
136 | 136 | * @param array $ops An array of operations to check |
137 | 137 | * @return bool |
138 | 138 | */ |
— | — | @@ -143,21 +143,21 @@ |
144 | 144 | } |
145 | 145 | return true; |
146 | 146 | } |
147 | | - |
| 147 | + |
148 | 148 | public function execute() { |
149 | 149 | // finish bootstrapping the script |
150 | 150 | $this->bootstrap(); |
151 | | - |
| 151 | + |
152 | 152 | // execute requested operations |
153 | 153 | foreach ( $this->operations as $operation ) { |
154 | 154 | $method = $this->operation_map[ $operation ]; |
155 | 155 | $this->$method(); |
156 | 156 | } |
157 | 157 | } |
158 | | - |
| 158 | + |
159 | 159 | public function populateProblems() { |
160 | 160 | global $wgMemc; |
161 | | - |
| 161 | + |
162 | 162 | /** |
163 | 163 | * Chck to see if we already have a collection of pages to operate on. |
164 | 164 | * If not, generate the collection of pages and their associated ratings. |
— | — | @@ -174,19 +174,19 @@ |
175 | 175 | if ( $page->rating_set_count < $this->rating_set_threshold ) { |
176 | 176 | continue; |
177 | 177 | } |
178 | | - |
| 178 | + |
179 | 179 | if ( $page->isProblematic() ) { |
180 | 180 | $problems[] = $page->page_id; |
181 | 181 | } |
182 | 182 | } |
183 | | - |
| 183 | + |
184 | 184 | // populate stats table with problem articles & associated data |
185 | 185 | // fetch stats type id - add stat type if it's non-existent |
186 | 186 | $stats_type_id = SpecialArticleFeedback::getStatsTypeId( 'problems' ); |
187 | 187 | if ( !$stats_type_id ) { |
188 | 188 | $stats_type_id = $this->addStatType( 'problems' ); |
189 | 189 | } |
190 | | - |
| 190 | + |
191 | 191 | $rows = array(); |
192 | 192 | $cur_ts = $this->dbw->timestamp(); |
193 | 193 | $count = 0; |
— | — | @@ -199,7 +199,7 @@ |
200 | 200 | 'afs_ts' => $cur_ts, |
201 | 201 | 'afs_stats_type_id' => $stats_type_id, |
202 | 202 | ); |
203 | | - |
| 203 | + |
204 | 204 | $count++; |
205 | 205 | if ( $count >= 50 ) { |
206 | 206 | // No more than 50 |
— | — | @@ -208,7 +208,7 @@ |
209 | 209 | } |
210 | 210 | } |
211 | 211 | $this->output( "Done.\n" ); |
212 | | - |
| 212 | + |
213 | 213 | // Insert the problem rows into the database |
214 | 214 | $this->output( "Writing data to article_feedback_stats ...\n" ); |
215 | 215 | $rowsInserted = 0; |
— | — | @@ -216,7 +216,7 @@ |
217 | 217 | $rowsCopy = $rows; |
218 | 218 | while( $rows ) { |
219 | 219 | $batch = array_splice( $rows, 0, $this->insert_batch_size ); |
220 | | - $this->dbw->insert( |
| 220 | + $this->dbw->insert( |
221 | 221 | 'article_feedback_stats', |
222 | 222 | $batch, |
223 | 223 | __METHOD__ |
— | — | @@ -226,7 +226,7 @@ |
227 | 227 | $this->output( "Inserted " . $rowsInserted . " rows\n" ); |
228 | 228 | } |
229 | 229 | $this->output( "Done.\n" ); |
230 | | - |
| 230 | + |
231 | 231 | // populate cache with current problem articles |
232 | 232 | $this->output( "Caching latest problems (if cache present).\n" ); |
233 | 233 | // grab the article feedback special page so we can reuse the data structure building code |
— | — | @@ -237,15 +237,15 @@ |
238 | 238 | $wgMemc->set( $key, $problems, 86400 ); |
239 | 239 | $this->output( "Done.\n" ); |
240 | 240 | } |
241 | | - |
| 241 | + |
242 | 242 | /** |
243 | 243 | * Populate stats about highest/lowest rated articles |
244 | 244 | */ |
245 | 245 | public function populateHighsLows() { |
246 | 246 | global $wgMemc; |
247 | | - |
| 247 | + |
248 | 248 | $averages = array(); // store overall averages for a given page |
249 | | - |
| 249 | + |
250 | 250 | /** |
251 | 251 | * Chck to see if we already have a collection of pages to operate on. |
252 | 252 | * If not, generate the collection of pages and their associated ratings. |
— | — | @@ -262,12 +262,12 @@ |
263 | 263 | if ( $page->rating_set_count < $this->rating_set_threshold ) { |
264 | 264 | continue; |
265 | 265 | } |
266 | | - |
| 266 | + |
267 | 267 | // calculate the rating averages if they haven't already been calculated |
268 | 268 | if ( !count( $page->rating_averages )) { |
269 | 269 | $page->calculateRatingAverages(); |
270 | 270 | } |
271 | | - |
| 271 | + |
272 | 272 | // store overall average rating seperately so we can easily sort |
273 | 273 | $averages[ $page->page_id ] = $page->overall_average; |
274 | 274 | } |
— | — | @@ -285,13 +285,13 @@ |
286 | 286 | $highest_and_lowest_page_ids += array_slice( $averages, -50, 50, true ); |
287 | 287 | } |
288 | 288 | $this->output( "Done\n" ); |
289 | | - |
| 289 | + |
290 | 290 | // fetch stats type id - add stat type if it's non-existant |
291 | 291 | $stats_type_id = SpecialArticleFeedback::getStatsTypeId( 'highs_and_lows' ); |
292 | 292 | if ( !$stats_type_id ) { |
293 | 293 | $stats_type_id = $this->addStatType( 'highs_and_lows' ); |
294 | 294 | } |
295 | | - |
| 295 | + |
296 | 296 | // prepare data for insert into db |
297 | 297 | $this->output( "Preparing data for db insertion ...\n"); |
298 | 298 | $cur_ts = $this->dbw->timestamp(); |
— | — | @@ -315,7 +315,7 @@ |
316 | 316 | $rowsCopy = $rows; |
317 | 317 | while( $rows ) { |
318 | 318 | $batch = array_splice( $rows, 0, $this->insert_batch_size ); |
319 | | - $this->dbw->insert( |
| 319 | + $this->dbw->insert( |
320 | 320 | 'article_feedback_stats', |
321 | 321 | $batch, |
322 | 322 | __METHOD__ |
— | — | @@ -325,7 +325,7 @@ |
326 | 326 | $this->output( "Inserted " . $rowsInserted . " rows\n" ); |
327 | 327 | } |
328 | 328 | $this->output( "Done.\n" ); |
329 | | - |
| 329 | + |
330 | 330 | // loading data into cache |
331 | 331 | $this->output( "Caching latest highs/lows (if cache present).\n" ); |
332 | 332 | $key = wfMemcKey( 'article_feedback_stats_highs_lows' ); |
— | — | @@ -336,10 +336,10 @@ |
337 | 337 | $wgMemc->set( $key, $highs_lows, 86400 ); |
338 | 338 | $this->output( "Done\n" ); |
339 | 339 | } |
340 | | - |
| 340 | + |
341 | 341 | /** |
342 | 342 | * Fetch ratings newer than a given time stamp. |
343 | | - * |
| 343 | + * |
344 | 344 | * If no timestamp is provided, relies on $this->lowerBoundTimestamp |
345 | 345 | * @param numeric $ts |
346 | 346 | * @return database result object |
— | — | @@ -348,60 +348,60 @@ |
349 | 349 | if ( !$ts ) { |
350 | 350 | $ts = $this->getLowerBoundTimestamp(); |
351 | 351 | } |
352 | | - |
| 352 | + |
353 | 353 | if ( !is_numeric( $ts )) { |
354 | 354 | throw new InvalidArgumentException( 'Timestamp expected to be numeric.' ); |
355 | 355 | } |
356 | | - |
| 356 | + |
357 | 357 | $res = $this->dbr->select( |
358 | | - 'article_feedback', |
359 | | - array( |
| 358 | + 'article_feedback', |
| 359 | + array( |
360 | 360 | 'aa_revision', |
361 | 361 | 'aa_user_text', |
362 | 362 | 'aa_rating_id', |
363 | 363 | 'aa_user_anon_token', |
364 | | - 'aa_page_id', |
| 364 | + 'aa_page_id', |
365 | 365 | 'aa_rating_value', |
366 | | - ), |
| 366 | + ), |
367 | 367 | array( 'aa_timestamp >= ' . $this->dbr->addQuotes( $this->dbr->timestamp( $ts ) ) ), |
368 | 368 | __METHOD__, |
369 | 369 | array() |
370 | 370 | ); |
371 | | - |
| 371 | + |
372 | 372 | return $res; |
373 | 373 | } |
374 | | - |
| 374 | + |
375 | 375 | /** |
376 | 376 | * Construct collection of pages and their ratings since a given time stamp |
377 | 377 | * @param $ts |
378 | 378 | * @return object The colelction of pages |
379 | 379 | */ |
380 | 380 | public function populatePageRatingsSince( $ts ) { |
381 | | - $pages = new Pages(); |
| 381 | + $pages = new AFPages(); |
382 | 382 | // fetch the ratings since the lower bound timestamp |
383 | 383 | $this->output( 'Fetching page ratings between now and ' . date( 'Y-m-d H:i:s', strtotime( $ts )) . "...\n" ); |
384 | 384 | $res = $this->fetchRatingsNewerThanTs( $ts ); |
385 | 385 | $this->output( "Done.\n" ); |
386 | | - |
| 386 | + |
387 | 387 | // assign the rating data to our data structure |
388 | 388 | $this->output( "Assigning fetched ratings to internal data structure ...\n" ); |
389 | 389 | foreach ( $res as $row ) { |
390 | 390 | // fetch the page from the page store referentially so we can |
391 | 391 | // perform actions on it that will automagically be saved in the |
392 | 392 | // object for easy access later |
393 | | - |
| 393 | + |
394 | 394 | $page =& $pages->getPage( $row->aa_page_id ); |
395 | | - |
| 395 | + |
396 | 396 | // determine the unique hash for a given rating set (page rev + user identifying info) |
397 | 397 | $rating_hash = $row->aa_revision . "|" . $row->aa_user_text . "|" . $row->aa_user_anon_token; |
398 | | - |
| 398 | + |
399 | 399 | // add rating data for this page |
400 | | - $page->addRating( $row->aa_rating_id, $row->aa_rating_value, $rating_hash ); |
| 400 | + $page->addRating( $row->aa_rating_id, $row->aa_rating_value, $rating_hash ); |
401 | 401 | } |
402 | 402 | $this->output( "Done.\n" ); |
403 | 403 | return $pages; |
404 | 404 | } |
405 | | - |
| 405 | + |
406 | 406 | /** |
407 | 407 | * Set $this->timestamp |
408 | 408 | * @param int $ts |
— | — | @@ -412,13 +412,13 @@ |
413 | 413 | } |
414 | 414 | $this->lowerBoundTimestamp = $ts; |
415 | 415 | } |
416 | | - |
417 | 416 | |
| 417 | + |
418 | 418 | /** |
419 | 419 | * Get $this->lowerBoundTimestamp |
420 | | - * |
| 420 | + * |
421 | 421 | * If it hasn't been set yet, set it based on the defined polling period. |
422 | | - * |
| 422 | + * |
423 | 423 | * @return int |
424 | 424 | */ |
425 | 425 | public function getLowerBoundTimestamp() { |
— | — | @@ -428,13 +428,13 @@ |
429 | 429 | } |
430 | 430 | return $this->lowerBoundTimestamp; |
431 | 431 | } |
432 | | - |
| 432 | + |
433 | 433 | /** |
434 | 434 | * Add stat type record to article_feedbak_stats_types |
435 | 435 | * @param string $stat_type The identifying name of the stat type (eg 'highs_lows') |
436 | 436 | */ |
437 | 437 | public function addStatType( $stat_type ) { |
438 | | - $this->dbw->insert( |
| 438 | + $this->dbw->insert( |
439 | 439 | 'article_feedback_stats', |
440 | 440 | array( 'afst_type' => $stat_type ), |
441 | 441 | __METHOD__ |
— | — | @@ -446,15 +446,15 @@ |
447 | 447 | /** |
448 | 448 | * A class to represent a page and data about its ratings |
449 | 449 | */ |
450 | | -class Page { |
| 450 | +class AFPage { |
451 | 451 | public $page_id; |
452 | | - |
| 452 | + |
453 | 453 | /** |
454 | 454 | * The number of rating sets recorded for this page |
455 | 455 | * @var int |
456 | 456 | */ |
457 | 457 | public $rating_set_count = 0; |
458 | | - |
| 458 | + |
459 | 459 | /** |
460 | 460 | * An array of ratings for this page |
461 | 461 | * @var array |
— | — | @@ -466,27 +466,27 @@ |
467 | 467 | * @var array |
468 | 468 | */ |
469 | 469 | public $rating_averages = array(); |
470 | | - |
| 470 | + |
471 | 471 | /** |
472 | 472 | * Mean of all ratings for this page |
473 | 473 | * @var float |
474 | 474 | */ |
475 | 475 | public $overall_average; |
476 | | - |
| 476 | + |
477 | 477 | /** |
478 | 478 | * An array of rating set hashes, which are used to identify unique sets of |
479 | 479 | * ratings |
480 | 480 | * @var array |
481 | 481 | */ |
482 | 482 | protected $rating_set_hashes = array(); |
483 | | - |
| 483 | + |
484 | 484 | public function __construct( $page_id ) { |
485 | 485 | if ( !is_numeric( $page_id )) { |
486 | 486 | throw new Exception( 'Page id must be numeric.' ); |
487 | 487 | } |
488 | 488 | $this->page_id = $page_id; |
489 | 489 | } |
490 | | - |
| 490 | + |
491 | 491 | /** |
492 | 492 | * Add a new rating for this particular page |
493 | 493 | * @param int $rating_id |
— | — | @@ -495,15 +495,15 @@ |
496 | 496 | */ |
497 | 497 | public function addRating( $rating_id, $rating_value, $rating_set_hash = null ) { |
498 | 498 | $this->ratings[ $rating_id ][] = $rating_value; |
499 | | - |
| 499 | + |
500 | 500 | if ( $rating_set_hash ) { |
501 | | - $this->trackRatingSet( $rating_set_hash ); |
| 501 | + $this->trackRatingSet( $rating_set_hash ); |
502 | 502 | } |
503 | 503 | } |
504 | | - |
| 504 | + |
505 | 505 | /** |
506 | 506 | * Keep track of rating sets |
507 | | - * |
| 507 | + * |
508 | 508 | * Record when we see a new rating set and increment the set count |
509 | 509 | * @param string $rating_set_hash |
510 | 510 | */ |
— | — | @@ -511,11 +511,11 @@ |
512 | 512 | if ( isset( $this->rating_set_hashes[ $rating_set_hash ] )) { |
513 | 513 | return; |
514 | 514 | } |
515 | | - |
| 515 | + |
516 | 516 | $this->rating_set_hashes[ $rating_set_hash ] = 1; |
517 | 517 | $this->rating_set_count += 1; |
518 | 518 | } |
519 | | - |
| 519 | + |
520 | 520 | public function calculateRatingAverages() { |
521 | 521 | // determine averages for each rating type |
522 | 522 | foreach( $this->ratings as $rating_id => $rating ) { |
— | — | @@ -523,8 +523,8 @@ |
524 | 524 | $rating_avg = $rating_sum / count( $rating ); |
525 | 525 | $this->rating_averages[ $rating_id ] = $rating_avg; |
526 | 526 | } |
527 | | - |
528 | | - // determine overall rating average for this page |
| 527 | + |
| 528 | + // determine overall rating average for this page |
529 | 529 | if ( count( $this->rating_averages )) { |
530 | 530 | $overall_rating_sum = array_sum( $this->rating_averages ); |
531 | 531 | $overall_rating_average = $overall_rating_sum / count( $this->rating_averages ); |
— | — | @@ -533,7 +533,7 @@ |
534 | 534 | } |
535 | 535 | $this->overall_average = $overall_rating_average; |
536 | 536 | } |
537 | | - |
| 537 | + |
538 | 538 | /** |
539 | 539 | * Returns whether or not this page is considered problematic |
540 | 540 | * @return bool |
— | — | @@ -544,11 +544,11 @@ |
545 | 545 | } |
546 | 546 | return $this->problematic; |
547 | 547 | } |
548 | | - |
| 548 | + |
549 | 549 | /** |
550 | 550 | * Determine whether this article is 'problematic' |
551 | 551 | * |
552 | | - * If a page has one or more rating categories where 70% of the ratings are |
| 552 | + * If a page has one or more rating categories where 70% of the ratings are |
553 | 553 | * <= 2, it is considered problematic. |
554 | 554 | */ |
555 | 555 | public function determineProblematicStatus() { |
— | — | @@ -559,14 +559,14 @@ |
560 | 560 | $count += 1; |
561 | 561 | } |
562 | 562 | } |
563 | | - |
| 563 | + |
564 | 564 | $threshold = round( 0.7 * count( $ratings )); |
565 | 565 | if ( $count >= $threshold ) { |
566 | 566 | $this->problematic = true; |
567 | 567 | return; |
568 | 568 | } |
569 | 569 | } |
570 | | - |
| 570 | + |
571 | 571 | $this->problematic = false; |
572 | 572 | return; |
573 | 573 | } |
— | — | @@ -574,27 +574,27 @@ |
575 | 575 | |
576 | 576 | /** |
577 | 577 | * A storage class to keep track of PageRatings object by page |
578 | | - * |
| 578 | + * |
579 | 579 | * Iterable on array of pages. |
580 | 580 | */ |
581 | | -class Pages implements IteratorAggregate { |
| 581 | +class AFPages implements IteratorAggregate { |
582 | 582 | /** |
583 | 583 | * An array of page rating objects |
584 | 584 | * @var array |
585 | 585 | */ |
586 | 586 | public $pages = array(); |
587 | | - |
| 587 | + |
588 | 588 | public function &getPage( $page_id ) { |
589 | 589 | if ( !isset( $this->pages[ $page_id ] )) { |
590 | 590 | $this->addPage( $page_id ); |
591 | 591 | } |
592 | 592 | return $this->pages[ $page_id ]; |
593 | 593 | } |
594 | | - |
| 594 | + |
595 | 595 | public function addPage( $page_id ) { |
596 | | - $this->pages[ $page_id ] = new Page( $page_id ); |
| 596 | + $this->pages[ $page_id ] = new AFPage( $page_id ); |
597 | 597 | } |
598 | | - |
| 598 | + |
599 | 599 | public function getIterator() { |
600 | 600 | return new ArrayIterator( $this->pages ); |
601 | 601 | } |