r89764 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r89763‎ | r89764 | r89765 >
Date:10:14, 9 June 2011
Author:catrope
Status:deferred
Tags:
Comment:
Fix r89277, there were tons of things wrong with it:

* Fix indexing on the stats table
* Use $foo[] instead of array_push()
* Actually insert the rows for problem articles into the DB
* Use the data we just inserted to build the cache entry, instead of re-querying the data
** Re-querying is generally redundant
** Re-querying from the slave is guaranteed to fail in a replicated environment with transactions
** Tweaked SpecialArticleFeedback::buildProblems() and buildHighsAndLows() to accept arrays as well as database results
* Run potentially user-supplied timestamp through Database::timestamp()
* Declare getPage() to return a reference. The $page =& $pages->getPage() assignment fails with a notice otherwise
* Remove $page['category'] stuff in problems view and make it more like the highs/lows view instead. We simply don't have that data and it was spewing notices
* $page['page'] is an ID, not a title
* Cache the results of Title::newFromID() in SpecialArticleFeedback (there's no built-in caching in Title for this function) and bulk-request arrays of page IDs
* Fix variable name in getProblems(); copypaste mistake was causing it to always return null when there was a cache entry
* Get the latest timestamp per statistic type, not globally. Queries will fail for all but the last-populated statistic otherwise
* Kill getRecentLows(), unused, not even referenced in commented-out code
* Make formatNumber() and getCategories(), which claim to be static and are called statically, actually static
Modified paths:
  • /trunk/extensions/ArticleFeedback/SpecialArticleFeedback.php (modified) (history)
  • /trunk/extensions/ArticleFeedback/populateAFStatistics.php (modified) (history)
  • /trunk/extensions/ArticleFeedback/sql/AddArticleFeedbackStatsTable.sql (modified) (history)

Diff [purge]

Index: trunk/extensions/ArticleFeedback/SpecialArticleFeedback.php
@@ -113,12 +113,19 @@
114114 */
115115 protected function renderDailyHighsAndLows( $pages, $caption ) {
116116 global $wgOut, $wgUser;
 117+
 118+ // Pre-fill page ID cache
 119+ $ids = array();
 120+ foreach ( $pages as $page ) {
 121+ $ids[] = $page['page'];
 122+ }
 123+ self::populateTitleCache( $ids );
117124
118125 $rows = array();
119126 if ( $pages ) {
120127 foreach ( $pages as $page ) {
121128 $row = array();
122 - $pageTitle = Title::newFromId( $page['page'] );
 129+ $pageTitle = self::getTitleFromID( $page['page'] );
123130 $row['page'] = $wgUser->getSkin()->link( $pageTitle, $pageTitle->getPrefixedText() );
124131 foreach ( $page['ratings'] as $id => $value ) {
125132 $row[] = array(
@@ -194,32 +201,45 @@
195202 protected function renderProblems() {
196203 global $wgOut, $wgUser, $wgArticleFeedbackRatings;
197204
 205+
 206+ $problems = $this->getProblems();
 207+
 208+ // Pre-fill page ID cache
 209+ $ids = array();
 210+ foreach ( $problems as $page ) {
 211+ $ids[] = $page['page'];
 212+ }
 213+ self::populateTitleCache( $ids );
 214+
198215 $rows = array();
199 - foreach ( $this->getProblems() as $page ) {
 216+ foreach ( $problems as $page ) {
200217 $row = array();
201 - $pageTitle = Title::newFromText( $page['page'] );
 218+ $pageTitle = self::getTitleFromID( $page['page'] );
202219 $row['page'] = $wgUser->getSkin()->link( $pageTitle, $pageTitle->getPrefixedText() );
203 - foreach ( $wgArticleFeedbackRatings as $category ) {
 220+ foreach ( $page['ratings'] as $id => $value ) {
204221 $row[] = array(
205 - 'attr' => in_array( $category, $page['categories'] )
206 - ? array(
207 - 'class' => 'articleFeedback-table-column-bad',
208 - 'data-sort-value' => 0
209 - )
210 - : array(
211 - 'class' => 'articleFeedback-table-column-good',
212 - 'data-sort-value' => 1
213 - ),
214 - 'html' => ' '
 222+ 'text' => $this->formatNumber( $value ),
 223+ 'attr' => array(
 224+ 'class' => 'articleFeedback-table-column-rating ' .
 225+ 'articleFeedback-table-column-score-' . round( $value )
 226+ )
215227 );
216228 }
 229+ $row[] = array(
 230+ 'text' => $this->formatNumber( $page['average'] ),
 231+ 'attr' => array(
 232+ 'class' => 'articleFeedback-table-column-average ' .
 233+ 'articleFeedback-table-column-score-' . round( $page['average'] )
 234+ )
 235+ );
217236 $rows[] = $row;
218237 }
219238 $this->renderTable(
220239 wfMsg( 'articleFeedback-table-caption-recentlows' ),
221240 array_merge(
222241 array( wfMsg( 'articleFeedback-table-heading-page' ) ),
223 - self::getCategories()
 242+ self::getCategories(),
 243+ array( wfMsg( 'articleFeedback-table-heading-average' ) )
224244 ),
225245 $rows,
226246 'articleFeedback-table-recentlows'
@@ -235,14 +255,15 @@
236256 $key = wfMemcKey( 'article_feedback_stats_problems' );
237257 $cache = $wgMemc->get( $key );
238258 if ( is_array( $cache )) {
239 - $highs_lows = $cache;
 259+ $problems = $cache;
240260 } else {
241261 $dbr = wfGetDB( DB_SLAVE );
 262+ $typeID = self::getStatsTypeId( 'problems' );
242263 // first find the freshest timestamp
243264 $row = $dbr->selectRow(
244265 'article_feedback_stats',
245266 array( 'afs_ts' ),
246 - "",
 267+ array( 'afs_stats_type_id' => $typeID ),
247268 __METHOD__,
248269 array( "ORDER BY" => "afs_ts DESC", "LIMIT" => 1 )
249270 );
@@ -262,7 +283,7 @@
263284 ),
264285 array(
265286 'afs_ts' => $row->afs_ts,
266 - 'afs_stats_type_id' => self::getStatsTypeId( 'problems' )
 287+ 'afs_stats_type_id' => $typeID
267288 ),
268289 __METHOD__,
269290 array( "ORDER BY" => "afs_orderable_data" )
@@ -292,11 +313,12 @@
293314 $highs_lows = $cache;
294315 } else {
295316 $dbr = wfGetDB( DB_SLAVE );
 317+ $typeID = self::getStatsTypeId( 'highs_and_lows' );
296318 // first find the freshest timestamp
297319 $row = $dbr->selectRow(
298320 'article_feedback_stats',
299321 array( 'afs_ts' ),
300 - "",
 322+ array( 'afs_stats_type_id' => $typeID ),
301323 __METHOD__,
302324 array( "ORDER BY" => "afs_ts DESC", "LIMIT" => 1 )
303325 );
@@ -316,7 +338,7 @@
317339 ),
318340 array(
319341 'afs_ts' => $row->afs_ts,
320 - 'afs_stats_type_id' => self::getStatsTypeId( 'highs_and_lows' )
 342+ 'afs_stats_type_id' => $typeID,
321343 ),
322344 __METHOD__,
323345 array( "ORDER BY" => "afs_orderable_data" )
@@ -374,16 +396,19 @@
375397
376398 /**
377399 * Build data store of highs/lows for use when rendering table
378 - * @param object Database result
 400+ * @param object Database result or array of rows
379401 * @return array
380402 */
381403 public static function buildHighsAndLows( $result ) {
382404 $highs_lows = array();
383405 foreach ( $result as $row ) {
 406+ if ( is_array( $row ) ) {
 407+ $row = (object)$row;
 408+ }
384409 $highs_lows[] = array(
385410 'page' => $row->afs_page_id,
386411 'ratings' => FormatJson::decode( $row->afs_data ),
387 - 'average' => $row->afs_orderable_data
 412+ 'average' => $row->afs_orderable_data
388413 );
389414 }
390415 return $highs_lows;
@@ -391,16 +416,19 @@
392417
393418 /**
394419 * Build data store of problems for use when rendering table
395 - * @param object Database result
 420+ * @param object Database result or array of rows
396421 * @return array
397422 */
398423 public static function buildProblems( $result ) {
399424 $problems = array();
400425 foreach( $result as $row ) {
 426+ if ( is_array( $row ) ) {
 427+ $row = (object)$row;
 428+ }
401429 $problems[] = array(
402430 'page' => $row->afs_page_id,
403431 'ratings' => FormatJson::decode( $row->afs_data ),
404 - 'average' => $row->afs_orderable_data
 432+ 'average' => $row->afs_orderable_data
405433 );
406434 }
407435 return $problems;
@@ -463,54 +491,20 @@
464492 );
465493 }
466494
467 - /**
468 - * Gets a list of articles which have recently recieved exceptionally low ratings.
469 - *
470 - * - Based on any rating category
471 - * - Gets up to 100 most recently poorly rated articles
472 - * - Only consider articles which were rated lower than 3 for 7 out of the last 10 ratings
473 - *
474 - * This data should be updated whenever article ratings are changed, ideally through a hook
475 - */
476 - protected function getRecentLows() {
477 - return array(
478 - array(
479 - 'page' => 'Main Page',
480 - // List of rating IDs that qualify as recent lows
481 - 'categories' => array( 1, 4 ),
482 - ),
483 - array(
484 - 'page' => 'Test Article 1',
485 - 'categories' => array( 1, 3 ),
486 - ),
487 - array(
488 - 'page' => 'Test Article 2',
489 - 'categories' => array( 2, 3 ),
490 - ),
491 - array(
492 - 'page' => 'Test Article 3',
493 - 'categories' => array( 3, 4 ),
494 - ),
495 - array(
496 - 'page' => 'Test Article 4',
497 - 'categories' => array( 1, 2 ),
498 - )
499 - );
500 - }
501 -
502495 /* Protected Static Members */
503496
504497 protected static $categories;
 498+ protected static $titleCache = array();
505499
506500 /* Protected Static Methods */
507501
508 - protected function formatNumber( $number ) {
 502+ protected static function formatNumber( $number ) {
509503 global $wgLang;
510504
511505 return $wgLang->formatNum( number_format( $number, 2 ) );
512506 }
513507
514 - protected function getCategories() {
 508+ protected static function getCategories() {
515509 global $wgArticleFeedbackRatings;
516510
517511 if ( !isset( self::$categories ) ) {
@@ -518,7 +512,8 @@
519513 $res = $dbr->select(
520514 'article_feedback_ratings',
521515 array( 'aar_id', 'aar_rating' ),
522 - array( 'aar_id' => $wgArticleFeedbackRatings )
 516+ array( 'aar_id' => $wgArticleFeedbackRatings ),
 517+ __METHOD__
523518 );
524519 self::$categories = array();
525520 foreach ( $res as $row ) {
@@ -527,4 +522,20 @@
528523 }
529524 return self::$categories;
530525 }
 526+
 527+ protected static function getTitleFromID( $id ) {
 528+ // There's no caching in Title::newFromId() so we hack our own around it
 529+ if ( !isset( self::$titleCache[$id] ) ) {
 530+ self::$titleCache[$id] = Title::newFromId( $id );
 531+ }
 532+ return self::$titleCache[$id];
 533+ }
 534+
 535+ protected static function populateTitleCache( $ids ) {
 536+ $toQuery = array_diff( $ids, array_keys( self::$titleCache ) );
 537+ $titles = Title::newFromIds( $toQuery );
 538+ foreach ( $titles as $title ) {
 539+ self::$titleCache[$title->getArticleID()] = $title;
 540+ }
 541+ }
531542 }
Index: trunk/extensions/ArticleFeedback/sql/AddArticleFeedbackStatsTable.sql
@@ -9,5 +9,5 @@
1010 -- timestamp of insertion job
1111 afs_ts binary(14) NOT NULL
1212 ) /*$wgDBTableOptions*/;
13 -CREATE UNIQUE INDEX /*i*/ afs_page_ts_type ON /*_*/article_feedback_stats( afs_page_id, afs_ts, afs_stats_type_id );
14 -CREATE INDEX /*i*/ afs_ts_avg_overall ON /*_*/article_feedback_stats (afs_ts, afs_orderable_data);
 13+CREATE UNIQUE INDEX /*i*/afs_type_ts_page ON /*_*/article_feedback_stats(afs_stats_type_id, afs_ts, afs_page_id);
 14+CREATE INDEX /*i*/ afs_type_ts_orderable ON /*_*/article_feedback_stats (afs_stats_type_id, afs_ts, afs_orderable_data);
Index: trunk/extensions/ArticleFeedback/populateAFStatistics.php
@@ -176,12 +176,12 @@
177177 }
178178
179179 if ( $page->isProblematic() ) {
180 - array_push( $problems, $page->page_id );
 180+ $problems[] = $page->page_id;
181181 }
182182 }
183183
184184 // populate stats table with problem articles & associated data
185 - // fetch stats type id - add stat type if it's non-existant
 185+ // fetch stats type id - add stat type if it's non-existent
186186 $stats_type_id = SpecialArticleFeedback::getStatsTypeId( 'problems' );
187187 if ( !$stats_type_id ) {
188188 $stats_type_id = $this->addStatType( 'problems' );
@@ -201,28 +201,31 @@
202202 }
203203 $this->output( "Done.\n" );
204204
 205+ // Insert the problem rows into the database
 206+ $this->output( "Writing data to article_feedback_stats ...\n" );
 207+ $rowsInserted = 0;
 208+ // $rows is gonna be modified by array_splice(), so make a copy for later use
 209+ $rowsCopy = $rows;
 210+ while( $rows ) {
 211+ $batch = array_splice( $rows, 0, $this->insert_batch_size );
 212+ $this->dbw->insert(
 213+ 'article_feedback_stats',
 214+ $batch,
 215+ __METHOD__
 216+ );
 217+ $rowsInserted += count( $batch );
 218+ $this->syncDBs();
 219+ $this->output( "Inserted " . $rowsInserted . " rows\n" );
 220+ }
 221+ $this->output( "Done.\n" );
 222+
205223 // populate cache with current problem articles
206 - // loading data into cache
207224 $this->output( "Caching latest problems (if cache present).\n" );
208 - $key = wfMemcKey( 'article_feedback_stats_problems' );
209 - $result = $this->dbr->select(
210 - 'article_feedback_stats',
211 - array(
212 - 'afs_page_id',
213 - 'afs_orderable_data',
214 - 'afs_data'
215 - ),
216 - array(
217 - 'afs_ts' => $cur_ts,
218 - 'afs_stats_type_id' => $stats_type_id
219 - ),
220 - __METHOD__,
221 - array( "ORDER BY" => "afs_orderable_data" )
222 - );
223225 // grab the article feedback special page so we can reuse the data structure building code
224226 // FIXME this logic should not be in the special page class
225 - $problems = SpecialArticleFeedback::buildProblems( $result );
 227+ $problems = SpecialArticleFeedback::buildProblems( $rowsCopy );
226228 // stash the data structure in the cache
 229+ $key = wfMemcKey( 'article_feedback_stats_problems' );
227230 $wgMemc->set( $key, $problems, 86400 );
228231 $this->output( "Done.\n" );
229232 }
@@ -300,6 +303,8 @@
301304 // insert data to db
302305 $this->output( "Writing data to article_feedback_stats ...\n" );
303306 $rowsInserted = 0;
 307+ // $rows is gonna be modified by array_splice(), so make a copy for later use
 308+ $rowsCopy = $rows;
304309 while( $rows ) {
305310 $batch = array_splice( $rows, 0, $this->insert_batch_size );
306311 $this->dbw->insert(
@@ -316,26 +321,12 @@
317322 // loading data into cache
318323 $this->output( "Caching latest highs/lows (if cache present).\n" );
319324 $key = wfMemcKey( 'article_feedback_stats_highs_lows' );
320 - $result = $this->dbr->select(
321 - 'article_feedback_stats',
322 - array(
323 - 'afs_page_id',
324 - 'afs_orderable_data',
325 - 'afs_data'
326 - ),
327 - array(
328 - 'afs_ts' => $cur_ts,
329 - 'afs_stats_type_id' => $stats_type_id
330 - ),
331 - __METHOD__,
332 - array( "ORDER BY" => "afs_orderable_data" )
333 - );
334325 // grab the article feedback special page so we can reuse the data structure building code
335326 // FIXME this logic should not be in the special page class
336 - $highs_lows = SpecialArticleFeedback::buildHighsAndLows( $result );
 327+ $highs_lows = SpecialArticleFeedback::buildHighsAndLows( $rowsCopy );
337328 // stash the data structure in the cache
338329 $wgMemc->set( $key, $highs_lows, 86400 );
339 - $this->output( "Done\n" );
 330+ $this->output( "Done\n" );
340331 }
341332
342333 /**
@@ -364,7 +355,7 @@
365356 'aa_page_id',
366357 'aa_rating_value',
367358 ),
368 - array( 'aa_timestamp >= ' . $this->dbr->addQuotes( $ts )),
 359+ array( 'aa_timestamp >= ' . $this->dbr->addQuotes( $this->dbr->timestamp( $ts ) ) ),
369360 __METHOD__,
370361 array()
371362 );
@@ -390,6 +381,7 @@
391382 // fetch the page from the page store referentially so we can
392383 // perform actions on it that will automagically be saved in the
393384 // object for easy access later
 385+
394386 $page =& $pages->getPage( $row->aa_page_id );
395387
396388 // determine the unique hash for a given rating set (page rev + user identifying info)
@@ -584,7 +576,7 @@
585577 */
586578 public $pages = array();
587579
588 - public function getPage( $page_id ) {
 580+ public function &getPage( $page_id ) {
589581 if ( !isset( $this->pages[ $page_id ] )) {
590582 $this->addPage( $page_id );
591583 }

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r89277Added 'problem articles' view to dashboard; refactored dashboard code (popula...awjrichards18:32, 1 June 2011

Status & tagging log