r91245 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r91244‎ | r91245 | r91246 >
Date:02:10, 1 July 2011
Author:reedy
Status:ok
Tags:
Comment:
Fix name collision with Page class in core
Modified paths:
  • /trunk/extensions/ArticleFeedback/populateAFStatistics.php (modified) (history)

Diff [purge]

Index: trunk/extensions/ArticleFeedback/populateAFStatistics.php
@@ -12,65 +12,65 @@
1313 * @var int
1414 */
1515 public $insert_batch_size = 100;
16 -
 16+
1717 /**
1818 * The period (in seconds) before now for which to gather stats
1919 * @var int
2020 */
2121 public $polling_period = 86400;
22 -
 22+
2323 /**
2424 * The formatted timestamp from which to determine stats
2525 * @var int
2626 */
2727 protected $lowerBoundTimestamp;
28 -
 28+
2929 /**
3030 * DB slave
3131 * @var object
3232 */
3333 protected $dbr;
34 -
 34+
3535 /**
3636 * DB master
3737 * @var object
3838 */
3939 protected $dbw;
40 -
 40+
4141 /**
4242 * Valid operations and their execution methods for this script to perform
43 - *
 43+ *
4444 * Operations are passed in as options during run-time - only valid options,
4545 * which are defined here, can be executed. Valid operations are mapped here
4646 * to a corresponding method ( array( 'operation' => 'method' ))
4747 * @var array
4848 */
49 - protected $operation_map = array(
50 - 'highslows' => 'populateHighsLows',
 49+ protected $operation_map = array(
 50+ 'highslows' => 'populateHighsLows',
5151 'problems' => 'populateProblems',
5252 );
53 -
 53+
5454 /**
5555 * Operations to execute
5656 * @var array
5757 */
5858 public $operations = array();
59 -
 59+
6060 /**
6161 * The minimum number of rating sets required before taking some action
6262 * @var int
6363 */
6464 public $rating_set_threshold = 10;
65 -
 65+
6666 public function __construct() {
6767 parent::__construct();
6868 $this->mDescription = "Populates the article feedback stats tables";
69 -
 69+
7070 $this->addOption( 'op', 'The ArticleFeedback stats gathering operation to run (eg "highslows"). Can specify multiple operations, separated by comma.', true, true );
7171 $this->addOption( 'rating_sets', 'The minimum number of rating sets before taking an action.', false, true );
7272 $this->addOption( 'poll_period', 'The polling period for fetching data, in seconds.', false, true );
7373 }
74 -
 74+
7575 public function syncDBs() {
7676 // FIXME: Copied from populateAFRevisions.php, which coppied from updateCollation.php, should be centralized somewhere
7777 $lb = wfGetLB();
@@ -82,11 +82,11 @@
8383 $lb->waitForAll( $pos );
8484 }
8585 }
86 -
 86+
8787 /**
8888 * Bootstrap this maintenance script
89 - *
90 - * Performs operations necessary for this maintenance script to run which
 89+ *
 90+ * Performs operations necessary for this maintenance script to run which
9191 * cannot or do not make sense to run in the constructor.
9292 */
9393 public function bootstrap() {
@@ -120,18 +120,18 @@
121121 } else {
122122 $this->polling_period = $polling_period;
123123 }
124 -
 124+
125125 // set db objects
126126 $this->dbr = wfGetDB( DB_SLAVE );
127127 $this->dbw = wfGetDB( DB_MASTER );
128128 }
129 -
 129+
130130 /**
131131 * Check whether or not specified operations are valid.
132 - *
 132+ *
133133 * A specified operation is considered valid if it exists
134134 * as a key in the operation map.
135 - *
 135+ *
136136 * @param array $ops An array of operations to check
137137 * @return bool
138138 */
@@ -143,21 +143,21 @@
144144 }
145145 return true;
146146 }
147 -
 147+
148148 public function execute() {
149149 // finish bootstrapping the script
150150 $this->bootstrap();
151 -
 151+
152152 // execute requested operations
153153 foreach ( $this->operations as $operation ) {
154154 $method = $this->operation_map[ $operation ];
155155 $this->$method();
156156 }
157157 }
158 -
 158+
159159 public function populateProblems() {
160160 global $wgMemc;
161 -
 161+
162162 /**
163163 * Chck to see if we already have a collection of pages to operate on.
164164 * If not, generate the collection of pages and their associated ratings.
@@ -174,19 +174,19 @@
175175 if ( $page->rating_set_count < $this->rating_set_threshold ) {
176176 continue;
177177 }
178 -
 178+
179179 if ( $page->isProblematic() ) {
180180 $problems[] = $page->page_id;
181181 }
182182 }
183 -
 183+
184184 // populate stats table with problem articles & associated data
185185 // fetch stats type id - add stat type if it's non-existent
186186 $stats_type_id = SpecialArticleFeedback::getStatsTypeId( 'problems' );
187187 if ( !$stats_type_id ) {
188188 $stats_type_id = $this->addStatType( 'problems' );
189189 }
190 -
 190+
191191 $rows = array();
192192 $cur_ts = $this->dbw->timestamp();
193193 $count = 0;
@@ -199,7 +199,7 @@
200200 'afs_ts' => $cur_ts,
201201 'afs_stats_type_id' => $stats_type_id,
202202 );
203 -
 203+
204204 $count++;
205205 if ( $count >= 50 ) {
206206 // No more than 50
@@ -208,7 +208,7 @@
209209 }
210210 }
211211 $this->output( "Done.\n" );
212 -
 212+
213213 // Insert the problem rows into the database
214214 $this->output( "Writing data to article_feedback_stats ...\n" );
215215 $rowsInserted = 0;
@@ -216,7 +216,7 @@
217217 $rowsCopy = $rows;
218218 while( $rows ) {
219219 $batch = array_splice( $rows, 0, $this->insert_batch_size );
220 - $this->dbw->insert(
 220+ $this->dbw->insert(
221221 'article_feedback_stats',
222222 $batch,
223223 __METHOD__
@@ -226,7 +226,7 @@
227227 $this->output( "Inserted " . $rowsInserted . " rows\n" );
228228 }
229229 $this->output( "Done.\n" );
230 -
 230+
231231 // populate cache with current problem articles
232232 $this->output( "Caching latest problems (if cache present).\n" );
233233 // grab the article feedback special page so we can reuse the data structure building code
@@ -237,15 +237,15 @@
238238 $wgMemc->set( $key, $problems, 86400 );
239239 $this->output( "Done.\n" );
240240 }
241 -
 241+
242242 /**
243243 * Populate stats about highest/lowest rated articles
244244 */
245245 public function populateHighsLows() {
246246 global $wgMemc;
247 -
 247+
248248 $averages = array(); // store overall averages for a given page
249 -
 249+
250250 /**
251251 * Chck to see if we already have a collection of pages to operate on.
252252 * If not, generate the collection of pages and their associated ratings.
@@ -262,12 +262,12 @@
263263 if ( $page->rating_set_count < $this->rating_set_threshold ) {
264264 continue;
265265 }
266 -
 266+
267267 // calculate the rating averages if they haven't already been calculated
268268 if ( !count( $page->rating_averages )) {
269269 $page->calculateRatingAverages();
270270 }
271 -
 271+
272272 // store overall average rating seperately so we can easily sort
273273 $averages[ $page->page_id ] = $page->overall_average;
274274 }
@@ -285,13 +285,13 @@
286286 $highest_and_lowest_page_ids += array_slice( $averages, -50, 50, true );
287287 }
288288 $this->output( "Done\n" );
289 -
 289+
290290 // fetch stats type id - add stat type if it's non-existant
291291 $stats_type_id = SpecialArticleFeedback::getStatsTypeId( 'highs_and_lows' );
292292 if ( !$stats_type_id ) {
293293 $stats_type_id = $this->addStatType( 'highs_and_lows' );
294294 }
295 -
 295+
296296 // prepare data for insert into db
297297 $this->output( "Preparing data for db insertion ...\n");
298298 $cur_ts = $this->dbw->timestamp();
@@ -315,7 +315,7 @@
316316 $rowsCopy = $rows;
317317 while( $rows ) {
318318 $batch = array_splice( $rows, 0, $this->insert_batch_size );
319 - $this->dbw->insert(
 319+ $this->dbw->insert(
320320 'article_feedback_stats',
321321 $batch,
322322 __METHOD__
@@ -325,7 +325,7 @@
326326 $this->output( "Inserted " . $rowsInserted . " rows\n" );
327327 }
328328 $this->output( "Done.\n" );
329 -
 329+
330330 // loading data into cache
331331 $this->output( "Caching latest highs/lows (if cache present).\n" );
332332 $key = wfMemcKey( 'article_feedback_stats_highs_lows' );
@@ -336,10 +336,10 @@
337337 $wgMemc->set( $key, $highs_lows, 86400 );
338338 $this->output( "Done\n" );
339339 }
340 -
 340+
341341 /**
342342 * Fetch ratings newer than a given time stamp.
343 - *
 343+ *
344344 * If no timestamp is provided, relies on $this->lowerBoundTimestamp
345345 * @param numeric $ts
346346 * @return database result object
@@ -348,60 +348,60 @@
349349 if ( !$ts ) {
350350 $ts = $this->getLowerBoundTimestamp();
351351 }
352 -
 352+
353353 if ( !is_numeric( $ts )) {
354354 throw new InvalidArgumentException( 'Timestamp expected to be numeric.' );
355355 }
356 -
 356+
357357 $res = $this->dbr->select(
358 - 'article_feedback',
359 - array(
 358+ 'article_feedback',
 359+ array(
360360 'aa_revision',
361361 'aa_user_text',
362362 'aa_rating_id',
363363 'aa_user_anon_token',
364 - 'aa_page_id',
 364+ 'aa_page_id',
365365 'aa_rating_value',
366 - ),
 366+ ),
367367 array( 'aa_timestamp >= ' . $this->dbr->addQuotes( $this->dbr->timestamp( $ts ) ) ),
368368 __METHOD__,
369369 array()
370370 );
371 -
 371+
372372 return $res;
373373 }
374 -
 374+
375375 /**
376376 * Construct collection of pages and their ratings since a given time stamp
377377 * @param $ts
378378 * @return object The colelction of pages
379379 */
380380 public function populatePageRatingsSince( $ts ) {
381 - $pages = new Pages();
 381+ $pages = new AFPages();
382382 // fetch the ratings since the lower bound timestamp
383383 $this->output( 'Fetching page ratings between now and ' . date( 'Y-m-d H:i:s', strtotime( $ts )) . "...\n" );
384384 $res = $this->fetchRatingsNewerThanTs( $ts );
385385 $this->output( "Done.\n" );
386 -
 386+
387387 // assign the rating data to our data structure
388388 $this->output( "Assigning fetched ratings to internal data structure ...\n" );
389389 foreach ( $res as $row ) {
390390 // fetch the page from the page store referentially so we can
391391 // perform actions on it that will automagically be saved in the
392392 // object for easy access later
393 -
 393+
394394 $page =& $pages->getPage( $row->aa_page_id );
395 -
 395+
396396 // determine the unique hash for a given rating set (page rev + user identifying info)
397397 $rating_hash = $row->aa_revision . "|" . $row->aa_user_text . "|" . $row->aa_user_anon_token;
398 -
 398+
399399 // add rating data for this page
400 - $page->addRating( $row->aa_rating_id, $row->aa_rating_value, $rating_hash );
 400+ $page->addRating( $row->aa_rating_id, $row->aa_rating_value, $rating_hash );
401401 }
402402 $this->output( "Done.\n" );
403403 return $pages;
404404 }
405 -
 405+
406406 /**
407407 * Set $this->timestamp
408408 * @param int $ts
@@ -412,13 +412,13 @@
413413 }
414414 $this->lowerBoundTimestamp = $ts;
415415 }
416 -
417416
 417+
418418 /**
419419 * Get $this->lowerBoundTimestamp
420 - *
 420+ *
421421 * If it hasn't been set yet, set it based on the defined polling period.
422 - *
 422+ *
423423 * @return int
424424 */
425425 public function getLowerBoundTimestamp() {
@@ -428,13 +428,13 @@
429429 }
430430 return $this->lowerBoundTimestamp;
431431 }
432 -
 432+
433433 /**
434434 * Add stat type record to article_feedbak_stats_types
435435 * @param string $stat_type The identifying name of the stat type (eg 'highs_lows')
436436 */
437437 public function addStatType( $stat_type ) {
438 - $this->dbw->insert(
 438+ $this->dbw->insert(
439439 'article_feedback_stats',
440440 array( 'afst_type' => $stat_type ),
441441 __METHOD__
@@ -446,15 +446,15 @@
447447 /**
448448 * A class to represent a page and data about its ratings
449449 */
450 -class Page {
 450+class AFPage {
451451 public $page_id;
452 -
 452+
453453 /**
454454 * The number of rating sets recorded for this page
455455 * @var int
456456 */
457457 public $rating_set_count = 0;
458 -
 458+
459459 /**
460460 * An array of ratings for this page
461461 * @var array
@@ -466,27 +466,27 @@
467467 * @var array
468468 */
469469 public $rating_averages = array();
470 -
 470+
471471 /**
472472 * Mean of all ratings for this page
473473 * @var float
474474 */
475475 public $overall_average;
476 -
 476+
477477 /**
478478 * An array of rating set hashes, which are used to identify unique sets of
479479 * ratings
480480 * @var array
481481 */
482482 protected $rating_set_hashes = array();
483 -
 483+
484484 public function __construct( $page_id ) {
485485 if ( !is_numeric( $page_id )) {
486486 throw new Exception( 'Page id must be numeric.' );
487487 }
488488 $this->page_id = $page_id;
489489 }
490 -
 490+
491491 /**
492492 * Add a new rating for this particular page
493493 * @param int $rating_id
@@ -495,15 +495,15 @@
496496 */
497497 public function addRating( $rating_id, $rating_value, $rating_set_hash = null ) {
498498 $this->ratings[ $rating_id ][] = $rating_value;
499 -
 499+
500500 if ( $rating_set_hash ) {
501 - $this->trackRatingSet( $rating_set_hash );
 501+ $this->trackRatingSet( $rating_set_hash );
502502 }
503503 }
504 -
 504+
505505 /**
506506 * Keep track of rating sets
507 - *
 507+ *
508508 * Record when we see a new rating set and increment the set count
509509 * @param string $rating_set_hash
510510 */
@@ -511,11 +511,11 @@
512512 if ( isset( $this->rating_set_hashes[ $rating_set_hash ] )) {
513513 return;
514514 }
515 -
 515+
516516 $this->rating_set_hashes[ $rating_set_hash ] = 1;
517517 $this->rating_set_count += 1;
518518 }
519 -
 519+
520520 public function calculateRatingAverages() {
521521 // determine averages for each rating type
522522 foreach( $this->ratings as $rating_id => $rating ) {
@@ -523,8 +523,8 @@
524524 $rating_avg = $rating_sum / count( $rating );
525525 $this->rating_averages[ $rating_id ] = $rating_avg;
526526 }
527 -
528 - // determine overall rating average for this page
 527+
 528+ // determine overall rating average for this page
529529 if ( count( $this->rating_averages )) {
530530 $overall_rating_sum = array_sum( $this->rating_averages );
531531 $overall_rating_average = $overall_rating_sum / count( $this->rating_averages );
@@ -533,7 +533,7 @@
534534 }
535535 $this->overall_average = $overall_rating_average;
536536 }
537 -
 537+
538538 /**
539539 * Returns whether or not this page is considered problematic
540540 * @return bool
@@ -544,11 +544,11 @@
545545 }
546546 return $this->problematic;
547547 }
548 -
 548+
549549 /**
550550 * Determine whether this article is 'problematic'
551551 *
552 - * If a page has one or more rating categories where 70% of the ratings are
 552+ * If a page has one or more rating categories where 70% of the ratings are
553553 * <= 2, it is considered problematic.
554554 */
555555 public function determineProblematicStatus() {
@@ -559,14 +559,14 @@
560560 $count += 1;
561561 }
562562 }
563 -
 563+
564564 $threshold = round( 0.7 * count( $ratings ));
565565 if ( $count >= $threshold ) {
566566 $this->problematic = true;
567567 return;
568568 }
569569 }
570 -
 570+
571571 $this->problematic = false;
572572 return;
573573 }
@@ -574,27 +574,27 @@
575575
576576 /**
577577 * A storage class to keep track of PageRatings object by page
578 - *
 578+ *
579579 * Iterable on array of pages.
580580 */
581 -class Pages implements IteratorAggregate {
 581+class AFPages implements IteratorAggregate {
582582 /**
583583 * An array of page rating objects
584584 * @var array
585585 */
586586 public $pages = array();
587 -
 587+
588588 public function &getPage( $page_id ) {
589589 if ( !isset( $this->pages[ $page_id ] )) {
590590 $this->addPage( $page_id );
591591 }
592592 return $this->pages[ $page_id ];
593593 }
594 -
 594+
595595 public function addPage( $page_id ) {
596 - $this->pages[ $page_id ] = new Page( $page_id );
 596+ $this->pages[ $page_id ] = new AFPage( $page_id );
597597 }
598 -
 598+
599599 public function getIterator() {
600600 return new ArrayIterator( $this->pages );
601601 }

Status & tagging log