r89277 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r89276‎ | r89277 | r89278 >
Date:18:32, 1 June 2011
Author:awjrichards
Status:resolved (Comments)
Tags:
Comment:
Added 'problem articles' view to dashboard; refactored dashboard code (populateAFStatistics.php, primarily); Added new schema sql scripts as well as sql migration script
Modified paths:
  • /trunk/extensions/ArticleFeedback/ArticleFeedback.hooks.php (modified) (history)
  • /trunk/extensions/ArticleFeedback/ArticleFeedback.php (modified) (history)
  • /trunk/extensions/ArticleFeedback/SpecialArticleFeedback.php (modified) (history)
  • /trunk/extensions/ArticleFeedback/populateAFStatistics.php (modified) (history)
  • /trunk/extensions/ArticleFeedback/sql/AddArticleFeedbackStatsTable.sql (added) (history)
  • /trunk/extensions/ArticleFeedback/sql/AddArticleFeedbackStatsTypeTable.sql (added) (history)
  • /trunk/extensions/ArticleFeedback/sql/MigrateArticleFeedbackStatsHighsLows.sql (added) (history)

Diff [purge]

Index: trunk/extensions/ArticleFeedback/SpecialArticleFeedback.php
@@ -43,8 +43,8 @@
4444
4545 /*
4646 This functionality does not exist yet.
47 - $this->renderWeeklyMostChanged();
48 - $this->renderRecentLows();*/
 47+ $this->renderWeeklyMostChanged();*/
 48+ $this->renderProblems();
4949 } else {
5050 $wgOut->addWikiText( 'This page has been disabled.' );
5151 }
@@ -187,15 +187,15 @@
188188 }
189189
190190 /**
191 - * Renders recent lows
 191+ * Renders problem articles table
192192 *
193193 * @return String: HTML table of recent lows
194194 */
195 - protected function renderRecentLows() {
 195+ protected function renderProblems() {
196196 global $wgOut, $wgUser, $wgArticleFeedbackRatings;
197197
198198 $rows = array();
199 - foreach ( $this->getRecentLows() as $page ) {
 199+ foreach ( $this->getProblems() as $page ) {
200200 $row = array();
201201 $pageTitle = Title::newFromText( $page['page'] );
202202 $row['page'] = $wgUser->getSkin()->link( $pageTitle, $pageTitle->getPrefixedText() );
@@ -227,6 +227,54 @@
228228 }
229229
230230 /**
 231+ * Gets a list of articles which were rated exceptionally low
 232+ */
 233+ protected function getProblems() {
 234+ global $wgMemc;
 235+ // check if we've got results in the cache
 236+ $key = wfMemcKey( 'article_feedback_stats_problems' );
 237+ $cache = $wgMemc->get( $key );
 238+ if ( is_array( $cache )) {
 239+ $highs_lows = $cache;
 240+ } else {
 241+ $dbr = wfGetDB( DB_SLAVE );
 242+ // first find the freshest timestamp
 243+ $row = $dbr->selectRow(
 244+ 'article_feedback_stats',
 245+ array( 'afs_ts' ),
 246+ "",
 247+ __METHOD__,
 248+ array( "ORDER BY" => "afs_ts DESC", "LIMIT" => 1 )
 249+ );
 250+
 251+ // if we have no results, just return
 252+ if ( !$row || !$row->afs_ts ) {
 253+ return array();
 254+ }
 255+
 256+ // select ratings with that ts
 257+ $result = $dbr->select(
 258+ 'article_feedback_stats',
 259+ array(
 260+ 'afs_page_id',
 261+ 'afs_orderable_data',
 262+ 'afs_data'
 263+ ),
 264+ array(
 265+ 'afs_ts' => $row->afs_ts,
 266+ 'afs_stats_type_id' => self::getStatsTypeId( 'problems' )
 267+ ),
 268+ __METHOD__,
 269+ array( "ORDER BY" => "afs_orderable_data" )
 270+ );
 271+ $problems = $this->buildProblems( $result );
 272+ $wgMemc->set( $key, $problems, 86400 );
 273+ }
 274+
 275+ return $problems;
 276+ }
 277+
 278+ /**
231279 * Gets a list of articles which were rated exceptionally high or low.
232280 *
233281 * - Based on average of all rating categories
@@ -237,7 +285,6 @@
238286 */
239287 protected function getDailyHighsAndLows() {
240288 global $wgMemc;
241 -
242289 // check if we've got results in the cache
243290 $key = wfMemcKey( 'article_feedback_stats_highs_lows' );
244291 $cache = $wgMemc->get( $key );
@@ -247,29 +294,32 @@
248295 $dbr = wfGetDB( DB_SLAVE );
249296 // first find the freshest timestamp
250297 $row = $dbr->selectRow(
251 - 'article_feedback_stats_highs_lows',
252 - array( 'afshl_ts' ),
 298+ 'article_feedback_stats',
 299+ array( 'afs_ts' ),
253300 "",
254301 __METHOD__,
255 - array( "ORDER BY" => "afshl_ts DESC", "LIMIT" => 1 )
 302+ array( "ORDER BY" => "afs_ts DESC", "LIMIT" => 1 )
256303 );
257304
258305 // if we have no results, just return
259 - if ( !$row || !$row->afshl_ts ) {
 306+ if ( !$row || !$row->afs_ts ) {
260307 return array();
261308 }
262309
263310 // select ratings with that ts
264311 $result = $dbr->select(
265 - 'article_feedback_stats_highs_lows',
 312+ 'article_feedback_stats',
266313 array(
267 - 'afshl_page_id',
268 - 'afshl_avg_overall',
269 - 'afshl_avg_ratings'
 314+ 'afs_page_id',
 315+ 'afs_orderable_data',
 316+ 'afs_data'
270317 ),
271 - array( 'afshl_ts' => $row->afshl_ts ),
 318+ array(
 319+ 'afs_ts' => $row->afs_ts,
 320+ 'afs_stats_type_id' => self::getStatsTypeId( 'highs_and_lows' )
 321+ ),
272322 __METHOD__,
273 - array( "ORDER BY" => "afshl_avg_overall" )
 323+ array( "ORDER BY" => "afs_orderable_data" )
274324 );
275325 $highs_lows = $this->buildHighsAndLows( $result );
276326 $wgMemc->set( $key, $highs_lows, 86400 );
@@ -331,15 +381,56 @@
332382 $highs_lows = array();
333383 foreach ( $result as $row ) {
334384 $highs_lows[] = array(
335 - 'page' => $row->afshl_page_id,
336 - 'ratings' => FormatJson::decode( $row->afshl_avg_ratings ),
337 - 'average' => $row->afshl_avg_overall
 385+ 'page' => $row->afs_page_id,
 386+ 'ratings' => FormatJson::decode( $row->afs_data ),
 387+ 'average' => $row->afs_orderable_data
338388 );
339389 }
340390 return $highs_lows;
341391 }
342392
343393 /**
 394+ * Build data store of problems for use when rendering table
 395+ * @param object Database result
 396+ * @return array
 397+ */
 398+ public static function buildProblems( $result ) {
 399+ $problems = array();
 400+ foreach( $result as $row ) {
 401+ $problems[] = array(
 402+ 'page' => $row->afs_page_id,
 403+ 'ratings' => FormatJson::decode( $row->afs_data ),
 404+ 'average' => $row->afs_orderable_data
 405+ );
 406+ }
 407+ return $problems;
 408+ }
 409+
 410+ /**
 411+ * Get the stats type id for a given stat type
 412+ * @param string $stats_type
 413+ */
 414+ public static function getStatsTypeId( $stats_type ) {
 415+ global $wgMemc;
 416+ $key = wfMemcKey( 'article_feedback_stats_type_' . $stats_type );
 417+ $cache = $wgMemc->get( $key );
 418+ if ( $cache ) {
 419+ return $cache;
 420+ }
 421+
 422+ $dbr = wfGetDB( DB_SLAVE );
 423+ $row = $dbr->selectRow(
 424+ 'article_feedback_stats_types',
 425+ array( 'afst_id' ),
 426+ array( 'afst_type' => $stats_type ),
 427+ __METHOD__,
 428+ array( )
 429+ );
 430+ $wgMemc->set( $key, $row->afst_id );
 431+ return $row->afst_id;
 432+ }
 433+
 434+ /**
344435 * Gets a list of articles which have quickly changing ratings.
345436 *
346437 * - Based on any rating category
Index: trunk/extensions/ArticleFeedback/sql/AddArticleFeedbackStatsTable.sql
@@ -0,0 +1,13 @@
 2+DROP TABLE IF EXISTS article_feedback_stats;
 3+CREATE TABLE IF NOT EXISTS /*_*/article_feedback_stats (
 4+ afs_page_id integer unsigned NOT NULL,
 5+ -- data point to be used for ordering this data
 6+ afs_orderable_data double unsigned NOT NULL,
 7+ -- json object of stat data
 8+ afs_data varbinary(255) NOT NULL,
 9+ afs_stats_type_id integer unsigned NOT NULL,
 10+ -- timestamp of insertion job
 11+ afs_ts binary(14) NOT NULL
 12+) /*$wgDBTableOptions*/;
 13+CREATE UNIQUE INDEX /*i*/ afs_page_ts_type ON /*_*/ article_feedback_stats( afs_page_id, afs_ts, afs_stats_type_id );
 14+CREATE INDEX /*i*/ afs_ts_avg_overall ON /*_*/article_feedback_stats (afs_ts, afs_orderable_data);
Index: trunk/extensions/ArticleFeedback/sql/MigrateArticleFeedbackStatsHighsLows.sql
@@ -0,0 +1,22 @@
 2+-- migrate data from article_feedback_stats_highs_lows into article_feedback_stats
 3+INSERT INTO /*_*/article_feedback_stats (
 4+ afs_page_id,
 5+ afs_orderable_data,
 6+ afs_data,
 7+ afs_ts,
 8+ afs_stats_type_id
 9+)
 10+SELECT
 11+ afshl_page_id,
 12+ afshl_avg_overall,
 13+ afshl_avg_ratings,
 14+ afshl_ts,
 15+ afst_id
 16+FROM
 17+ /*_*/article_feedback_stats_highs_lows,
 18+ /*_*/article_feedback_stats_types
 19+WHERE
 20+ /*_*/article_feedback_stats_types.afst_type='highs_and_lows';
 21+
 22+-- get rid of article_feedback_stats_highs_lows as it is no longer necessary
 23+DROP TABLE /*_*/article_feedback_stats_highs_lows;
Index: trunk/extensions/ArticleFeedback/sql/AddArticleFeedbackStatsTypeTable.sql
@@ -0,0 +1,9 @@
 2+CREATE TABLE IF NOT EXISTS /*_*/ article_feedback_stats_types (
 3+ afst_id integer unsigned NOT NULL PRIMARY KEY AUTO_INCREMENT,
 4+ afst_type varbinary(255) NOT NULL
 5+) /*$wgDBTableOptions*/;
 6+CREATE UNIQUE INDEX /*i*/ afst_type ON /*_*/ article_feedback_stats_types( afst_type );
 7+
 8+-- Pre-populate table with stat types
 9+INSERT INTO article_feedback_stats_types ( afst_type ) VALUES ( 'highs_and_lows' );
 10+INSERT INTO article_feedback_stats_types ( afst_type ) VALUES ( 'problems' );
\ No newline at end of file
Index: trunk/extensions/ArticleFeedback/ArticleFeedback.hooks.php
@@ -174,13 +174,53 @@
175175 $dir . '/sql/AddRevisionsTable.sql',
176176 true
177177 ) );
 178+
 179+ if ( $db->tableExists( 'article_feedback_stats_highs_lows') ) {
 180+ if ( !$db->tableExists( 'article_feedback_stats_types' )) {
 181+ // add article_feedback_stats_type if necessaray
 182+ $updater->addExtensionUpdate( array(
 183+ 'addTable',
 184+ 'article_feedback_stats_types',
 185+ $dir . '/sql/AddArticleFeedbackStatsTypesTable.sql',
 186+ true
 187+ ) );
 188+ }
 189+
 190+ $updater->addExtensionUpdate( array(
 191+ 'addTable',
 192+ 'article_feedback_stats',
 193+ $dir . '/sql/AddArticleFeedbackStatsTable.sql',
 194+ true
 195+ ) );
 196+
 197+ // migrate article_feedback_stats_highs_lows to article_feedback_stats
 198+ $updater->addExtensionUpdate( array(
 199+ 'applyPatch',
 200+ $dir . '/sql/MigrateArticleFeedbackStatsHighsLows.sql',
 201+ true
 202+ ) );
 203+ } else {
 204+ // add article_feedback_stats and article_feedback_stats_type
 205+ if ( !$db->tableExists( 'article_feedback_stats_type' )) {
 206+ $updater->addExtensionUpdate( array(
 207+ 'addTable',
 208+ 'article_feedback_stats_types',
 209+ $dir . '/sql/AddArticleFeedbackStatsTypesTable.sql',
 210+ true
 211+ ) );
 212+ }
 213+
 214+ if ( !$db->tableExists( 'article_feedback_stats' )) {
 215+ $updater->addExtensionUpdate( array(
 216+ 'addTable',
 217+ 'article_feedback_stats',
 218+ $dir . '/sql/AddArticleFeedbackStatsTable.sql',
 219+ true
 220+ ) );
 221+ }
 222+ }
 223+
178224 $updater->addExtensionUpdate( array(
179 - 'addTable',
180 - 'article_feedback_stats_highs_lows',
181 - $dir . '/sql/AddStatsHighsLowsTable.sql',
182 - true
183 - ) );
184 - $updater->addExtensionUpdate( array(
185225 'addIndex',
186226 'article_feedback',
187227 'article_feedback_timestamp',
Index: trunk/extensions/ArticleFeedback/populateAFStatistics.php
@@ -37,9 +37,38 @@
3838 */
3939 protected $dbw;
4040
 41+ /**
 42+ * Valid operations and their execution methods for this script to perform
 43+ *
 44+ * Operations are passed in as options during run-time - only valid options,
 45+ * which are defined here, can be executed. Valid operations are mapped here
 46+ * to a corresponding method ( array( 'operation' => 'method' ))
 47+ * @var array
 48+ */
 49+ protected $operation_map = array(
 50+ 'highslows' => 'populateHighsLows',
 51+ 'problems' => 'populateProblems',
 52+ );
 53+
 54+ /**
 55+ * Operations to execute
 56+ * @var array
 57+ */
 58+ public $operations = array();
 59+
 60+ /**
 61+ * The minimum number of rating sets required before taking some action
 62+ * @var int
 63+ */
 64+ public $rating_set_threshold = 10;
 65+
4166 public function __construct() {
4267 parent::__construct();
4368 $this->mDescription = "Populates the article feedback stats tables";
 69+
 70+ $this->addOption( 'op', 'The ArticleFeedback stats gathering operation to run (eg "highslows"). Can specify multiple operations, separated by comma.', true, true );
 71+ $this->addOption( 'rating_sets', 'The minimum number of rating sets before taking an action.', false, true );
 72+ $this->addOption( 'poll_period', 'The polling period for fetching data, in seconds.', false, true );
4473 }
4574
4675 public function syncDBs() {
@@ -54,72 +83,179 @@
5584 }
5685 }
5786
58 - public function execute() {
59 - global $wgMemc;
 87+ /**
 88+ * Bootstrap this maintenance script
 89+ *
 90+ * Performs operations necessary for this maintenance script to run which
 91+ * cannot or do not make sense to run in the constructor.
 92+ */
 93+ public function bootstrap() {
 94+ /**
 95+ * Set user-specified operations to perform
 96+ */
 97+ $operations = explode( ',', $this->getOption( 'op' ));
 98+ // check sanity of specified operations
 99+ if ( !$this->checkOperations( $operations )) {
 100+ $this->error( 'Invalid operation specified.', true );
 101+ } else {
 102+ $this->operations = $operations;
 103+ }
 104+
 105+ /**
 106+ * Set user-specified rating set threshold
 107+ */
 108+ $rating_set_threshold = $this->getOption( 'rating_sets', $this->rating_set_threshold );
 109+ if ( !is_numeric( $rating_set_threshold )) {
 110+ $this->error( 'Rating sets must be numeric.', true );
 111+ } else {
 112+ $this->rating_set_threshold = $rating_set_threshold;
 113+ }
 114+
 115+ /**
 116+ * Set user-specified polling period
 117+ */
 118+ $polling_period = $this->getOption( 'poll_period', $this->polling_period );
 119+ if ( !is_numeric( $polling_period )) {
 120+ $this->error( 'Poll period must be numeric.', true );
 121+ } else {
 122+ $this->polling_period = $polling_period;
 123+ }
 124+
 125+ // set db objects
60126 $this->dbr = wfGetDB( DB_SLAVE );
61127 $this->dbw = wfGetDB( DB_MASTER );
 128+ }
 129+
 130+ /**
 131+ * Check whether or not specified operations are valid.
 132+ *
 133+ * A specified operation is considered valid if it exists
 134+ * as a key in the operation map.
 135+ *
 136+ * @param array $ops An array of operations to check
 137+ * @return bool
 138+ */
 139+ public function checkOperations( array $ops ) {
 140+ foreach ( $ops as $operation ) {
 141+ if ( !isset( $this->operation_map[ $operation ] )) {
 142+ return false;
 143+ }
 144+ }
 145+ return true;
 146+ }
 147+
 148+ public function execute() {
 149+ // finish bootstrapping the script
 150+ $this->bootstrap();
62151
63 - // the data structure to store ratings for a given page
64 - $ratings = array(); // stores rating-specific info
65 - $rating_set_count = array(); // keep track of rating sets
66 - $highs_and_lows = array(); // store highest/lowest rated page stats
67 - $averages = array(); // store overall averages for a given page
 152+ // execute requested operations
 153+ foreach ( $this->operations as $operation ) {
 154+ $method = $this->operation_map[ $operation ];
 155+ $this->$method();
 156+ }
 157+ }
 158+
 159+ public function populateProblems() {
 160+ global $wgMemc;
68161
69 - // fetch the ratings since the lower bound timestamp
70 - $this->output( 'Fetching page ratings between now and ' . date('Y-m-d H:i:s', strtotime( $this->getLowerBoundTimestamp())) . "...\n");
71 - $res = $this->dbr->select(
72 - 'article_feedback',
 162+ /**
 163+ * Chck to see if we already have a collection of pages to operate on.
 164+ * If not, generate the collection of pages and their associated ratings.
 165+ */
 166+ if ( !isset( $this->pages )) {
 167+ $ts = $this->getLowerBoundTimestamp();
 168+ $this->pages = $this->populatePageRatingsSince( $ts );
 169+ }
 170+ $problems = array();
 171+ // iterate through pages, look for pages that meet criteria for problem articles
 172+ $this->output( "Finding problem articles ...\n" );
 173+ foreach ( $this->pages as $page ) {
 174+ // make sure that we have more rating sets than the req'd threshold for this page in order to qualify for calculating
 175+ if ( $page->rating_set_count < $this->rating_set_threshold ) {
 176+ continue;
 177+ }
 178+
 179+ if ( $page->isProblematic() ) {
 180+ array_push( $problems, $page->page_id );
 181+ }
 182+ }
 183+
 184+ // populate stats table with problem articles & associated data
 185+ // fetch stats type id - add stat type if it's non-existant
 186+ $stats_type_id = SpecialArticleFeedback::getStatsTypeId( 'problems' );
 187+ if ( !$stats_type_id ) {
 188+ $stats_type_id = $this->addStatType( 'problems' );
 189+ }
 190+ foreach( $problems as $page_id ) {
 191+ $page = $this->pages->getPage( $page_id );
 192+ $rows[] = array(
 193+ 'afs_page_id' => $page_id,
 194+ 'afs_orderable_data' => $page->overall_average,
 195+ 'afs_data' => FormatJson::encode( $page->rating_averages ),
 196+ 'afs_ts' => $cur_ts,
 197+ 'afs_stats_type_id' => $stats_type_id,
 198+ );
 199+ }
 200+ $this->output( "Done.\n" );
 201+
 202+ // populate cache with current problem articles
 203+ // loading data into cache
 204+ $this->output( "Caching latest problems (if cache present).\n" );
 205+ $key = wfMemcKey( 'article_feedback_stats_problems' );
 206+ $result = $this->dbr->select(
 207+ 'article_feedback_stats',
 208+ array(
 209+ 'afs_page_id',
 210+ 'afs_orderable_data',
 211+ 'afs_data'
 212+ ),
73213 array(
74 - 'aa_revision',
75 - 'aa_user_text',
76 - 'aa_rating_id',
77 - 'aa_user_anon_token',
78 - 'aa_page_id',
79 - 'aa_rating_value',
80 - ),
81 - array( 'aa_timestamp >= ' . $this->dbr->addQuotes( $this->getLowerBoundTimestamp() ) ),
 214+ 'afs_ts' => $cur_ts,
 215+ 'afs_stats_type_id' => $stats_type_id
 216+ ),
82217 __METHOD__,
83 - array()
 218+ array( "ORDER BY" => "afs_orderable_data" )
84219 );
 220+ // grab the article feedback special page so we can reuse the data structure building code
 221+ // FIXME this logic should not be in the special page class
 222+ $problems = SpecialArticleFeedback::buildProblems( $result );
 223+ // stash the data structure in the cache
 224+ $wgMemc->set( $key, $problems, 86400 );
 225+ $this->output( "Done.\n" );
 226+ }
 227+
 228+ /**
 229+ * Populate stats about highest/lowest rated articles
 230+ */
 231+ public function populateHighsLows() {
 232+ global $wgMemc;
85233
86 - // assign the rating data to our data structure
87 - foreach ( $res as $row ) {
88 - // determine the unique hash for a given rating set (page rev + user identifying info)
89 - $rating_hash = md5( $row->aa_revision . $row->aa_user_text . $row->aa_user_anon_token );
90 -
91 - // keep track of how many rating sets a particular page has
92 - if ( !isset( $rating_count[ $row->aa_page_id ][ $rating_hash ] )) {
93 - // we store the rating hash as a key rather than value as checking isset( $arr[$hash] ) is way faster
94 - // than doing something like array_search( $hash, $arr ) when dealing with large arrays
95 - $rating_set_count[ $row->aa_page_id ][ $rating_hash ] = 1;
96 - }
97 -
98 - $ratings[ $row->aa_page_id ][ $row->aa_rating_id ][] = $row->aa_rating_value;
 234+ $averages = array(); // store overall averages for a given page
 235+
 236+ /**
 237+ * Chck to see if we already have a collection of pages to operate on.
 238+ * If not, generate the collection of pages and their associated ratings.
 239+ */
 240+ if ( !isset( $this->pages )) {
 241+ $ts = $this->getLowerBoundTimestamp();
 242+ $this->pages = $this->populatePageRatingsSince( $ts );
99243 }
100 - $this->output( "Done\n" );
101244
102245 // determine the average ratings for a given page
103246 $this->output( "Determining average ratings for articles ...\n" );
104 - foreach ( $ratings as $page_id => $data ) {
105 - // make sure that we have at least 10 rating sets for this page in order to qualify for ranking
106 - if ( count( array_keys( $rating_set_count[ $page_id ] )) < 10 ) {
 247+ foreach ( $this->pages as $page ) {
 248+ // make sure that we have more rating sets than the req'd threshold for this page in order to qualify for ranking
 249+ if ( $page->rating_set_count < $this->rating_set_threshold ) {
107250 continue;
108251 }
109252
110 - // calculate the rating averages for a given page
111 - foreach( $data as $rating_id => $rating ) {
112 - $rating_sum = array_sum( $rating );
113 - $rating_avg = $rating_sum / count( $rating );
114 - $highs_and_lows[ $page_id ][ 'avg_ratings' ][ $rating_id ] = $rating_avg;
 253+ // calculate the rating averages if they haven't already been calculated
 254+ if ( !count( $page->rating_averages )) {
 255+ $page->calculateRatingAverages();
115256 }
116257
117 - // calculate the overall average for a page
118 - $overall_rating_sum = array_sum( $highs_and_lows[ $page_id ][ 'avg_ratings' ] );
119 - $overall_rating_average = $overall_rating_sum / count( $highs_and_lows[ $page_id ][ 'avg_ratings' ] );
120 - $highs_and_lows[ $page_id ][ 'average' ] = $overall_rating_average;
121 -
122258 // store overall average rating seperately so we can easily sort
123 - $averages[ $page_id ] = $overall_rating_average;
 259+ $averages[ $page->page_id ] = $page->overall_average;
124260 }
125261 $this->output( "Done.\n" );
126262
@@ -129,35 +265,42 @@
130266 // take lowest 50 and highest 50
131267 $highest_and_lowest_page_ids = array_slice( $averages, 0, 50, true );
132268 if ( count( $averages ) > 50 ) {
 269+ // in the event that we have < 100 $averages total, this will still
 270+ // work nicely - it will select duplicate averages, but the +=
 271+ // will cause items with the same keys to essentially be ignored
133272 $highest_and_lowest_page_ids += array_slice( $averages, -50, 50, true );
134273 }
135274 $this->output( "Done\n" );
136275
 276+ // fetch stats type id - add stat type if it's non-existant
 277+ $stats_type_id = SpecialArticleFeedback::getStatsTypeId( 'highs_and_lows' );
 278+ if ( !$stats_type_id ) {
 279+ $stats_type_id = $this->addStatType( 'highs_and_lows' );
 280+ }
 281+
137282 // prepare data for insert into db
138283 $this->output( "Preparing data for db insertion ...\n");
139284 $cur_ts = $this->dbw->timestamp();
140285 $rows = array();
141 - foreach( $highs_and_lows as $page_id => $data ) {
142 - // make sure this is one of the highest/lowest average ratings
143 - if ( !isset( $highest_and_lowest_page_ids[ $page_id ] )) {
144 - continue;
145 - }
 286+ foreach( $highest_and_lowest_page_ids as $page_id => $overall_average ) {
 287+ $page = $this->pages->getPage( $page_id );
146288 $rows[] = array(
147 - 'afshl_page_id' => $page_id,
148 - 'afshl_avg_overall' => $data[ 'average' ],
149 - 'afshl_avg_ratings' => FormatJson::encode( $data[ 'avg_ratings' ] ),
150 - 'afshl_ts' => $cur_ts,
 289+ 'afs_page_id' => $page_id,
 290+ 'afs_orderable_data' => $page->overall_average,
 291+ 'afs_data' => FormatJson::encode( $page->rating_averages ),
 292+ 'afs_ts' => $cur_ts,
 293+ 'afs_stats_type_id' => $stats_type_id,
151294 );
152295 }
153296 $this->output( "Done.\n" );
154297
155298 // insert data to db
156 - $this->output( "Writing data to article_feedback_stats_highs_lows ...\n" );
 299+ $this->output( "Writing data to article_feedback_stats ...\n" );
157300 $rowsInserted = 0;
158301 while( $rows ) {
159302 $batch = array_splice( $rows, 0, $this->insert_batch_size );
160303 $this->dbw->insert(
161 - 'article_feedback_stats_highs_lows',
 304+ 'article_feedback_stats',
162305 $batch,
163306 __METHOD__
164307 );
@@ -167,30 +310,96 @@
168311 }
169312 $this->output( "Done.\n" );
170313
171 - // loading data into caching
 314+ // loading data into cache
172315 $this->output( "Caching latest highs/lows (if cache present).\n" );
173316 $key = wfMemcKey( 'article_feedback_stats_highs_lows' );
174317 $result = $this->dbr->select(
175 - 'article_feedback_stats_highs_lows',
 318+ 'article_feedback_stats',
176319 array(
177 - 'afshl_page_id',
178 - 'afshl_avg_overall',
179 - 'afshl_avg_ratings'
 320+ 'afs_page_id',
 321+ 'afs_orderable_data',
 322+ 'afs_data'
180323 ),
181 - array( 'afshl_ts' => $cur_ts ),
 324+ array(
 325+ 'afs_ts' => $cur_ts,
 326+ 'afs_stats_type_id' => $stats_type_id
 327+ ),
182328 __METHOD__,
183 - array( "ORDER BY" => "afshl_avg_overall" )
 329+ array( "ORDER BY" => "afs_orderable_data" )
184330 );
185331 // grab the article feedback special page so we can reuse the data structure building code
186332 // FIXME this logic should not be in the special page class
187333 $highs_lows = SpecialArticleFeedback::buildHighsAndLows( $result );
188334 // stash the data structure in the cache
189335 $wgMemc->set( $key, $highs_lows, 86400 );
190 - $this->output( "Done\n" );
 336+ $this->output( "Done\n" );
191337 }
192338
 339+ /**
 340+ * Fetch ratings newer than a given time stamp.
 341+ *
 342+ * If no timestamp is provided, relies on $this->lowerBoundTimestamp
 343+ * @param numeric $ts
 344+ * @return database result object
 345+ */
 346+ public function fetchRatingsNewerThanTs( $ts=null ) {
 347+ if ( !$ts ) {
 348+ $ts = $this->getLowerBoundTimestamp();
 349+ }
 350+
 351+ if ( !is_numeric( $ts )) {
 352+ throw new InvalidArgumentException( 'Timestamp expected to be numeric.' );
 353+ }
 354+
 355+ $res = $this->dbr->select(
 356+ 'article_feedback',
 357+ array(
 358+ 'aa_revision',
 359+ 'aa_user_text',
 360+ 'aa_rating_id',
 361+ 'aa_user_anon_token',
 362+ 'aa_page_id',
 363+ 'aa_rating_value',
 364+ ),
 365+ array( 'aa_timestamp >= ' . $this->dbr->addQuotes( $ts )),
 366+ __METHOD__,
 367+ array()
 368+ );
 369+
 370+ return $res;
 371+ }
193372
194373 /**
 374+ * Construct collection of pages and their ratings since a given time stamp
 375+ * @param $ts
 376+ * @return object The colelction of pages
 377+ */
 378+ public function populatePageRatingsSince( $ts ) {
 379+ $pages = new Pages();
 380+ // fetch the ratings since the lower bound timestamp
 381+ $this->output( 'Fetching page ratings between now and ' . date( 'Y-m-d H:i:s', strtotime( $ts )) . "...\n" );
 382+ $res = $this->fetchRatingsNewerThanTs( $ts );
 383+ $this->output( "Done.\n" );
 384+
 385+ // assign the rating data to our data structure
 386+ $this->output( "Assigning fetched ratings to internal data structure ...\n" );
 387+ foreach ( $res as $row ) {
 388+ // fetch the page from the page store referentially so we can
 389+ // perform actions on it that will automagically be saved in the
 390+ // object for easy access later
 391+ $page =& $pages->getPage( $row->aa_page_id );
 392+
 393+ // determine the unique hash for a given rating set (page rev + user identifying info)
 394+ $rating_hash = $row->aa_revision . "|" . $row->aa_user_text . "|" . $row->aa_user_anon_token;
 395+
 396+ // add rating data for this page
 397+ $page->addRating( $row->aa_rating_id, $row->aa_rating_value, $rating_hash );
 398+ }
 399+ $this->output( "Done.\n" );
 400+ return $pages;
 401+ }
 402+
 403+ /**
195404 * Set $this->timestamp
196405 * @param int $ts
197406 */
@@ -216,7 +425,177 @@
217426 }
218427 return $this->lowerBoundTimestamp;
219428 }
 429+
 430+ /**
 431+ * Add stat type record to article_feedbak_stats_types
 432+ * @param string $stat_type The identifying name of the stat type (eg 'highs_lows')
 433+ */
 434+ public function addStatType( $stat_type ) {
 435+ $this->dbw->insert(
 436+ 'article_feedback_stats',
 437+ array( 'afst_type' => $stat_type ),
 438+ __METHOD__
 439+ );
 440+ return $this->dbw->insertId();
 441+ }
220442 }
221443
 444+/**
 445+ * A class to represent a page and data about its ratings
 446+ */
 447+class Page {
 448+ public $page_id;
 449+
 450+ /**
 451+ * The number of rating sets recorded for this page
 452+ * @var int
 453+ */
 454+ public $rating_set_count = 0;
 455+
 456+ /**
 457+ * An array of ratings for this page
 458+ * @var array
 459+ */
 460+ public $ratings = array();
 461+
 462+ /**
 463+ * An array to hold mean ratings by rating type id
 464+ * @var array
 465+ */
 466+ public $rating_averages = array();
 467+
 468+ /**
 469+ * Mean of all ratings for this page
 470+ * @var float
 471+ */
 472+ public $overall_average;
 473+
 474+ /**
 475+ * An array of rating set hashes, which are used to identify unique sets of
 476+ * ratings
 477+ * @var array
 478+ */
 479+ protected $rating_set_hashes = array();
 480+
 481+ public function __construct( $page_id ) {
 482+ if ( !is_numeric( $page_id )) {
 483+ throw new Exception( 'Page id must be numeric.' );
 484+ }
 485+ $this->page_id = $page_id;
 486+ }
 487+
 488+ /**
 489+ * Add a new rating for this particular page
 490+ * @param int $rating_id
 491+ * @param int $rating_value
 492+ * @param string $rating_set_hash
 493+ */
 494+ public function addRating( $rating_id, $rating_value, $rating_set_hash = null ) {
 495+ $this->ratings[ $rating_id ][] = $rating_value;
 496+
 497+ if ( $rating_set_hash ) {
 498+ $this->trackRatingSet( $rating_set_hash );
 499+ }
 500+ }
 501+
 502+ /**
 503+ * Keep track of rating sets
 504+ *
 505+ * Record when we see a new rating set and increment the set count
 506+ * @param string $rating_set_hash
 507+ */
 508+ protected function trackRatingSet( $rating_set_hash ) {
 509+ if ( isset( $this->rating_set_hashes[ $rating_set_hash ] )) {
 510+ return;
 511+ }
 512+
 513+ $this->rating_set_hashes[ $rating_set_hash ] = 1;
 514+ $this->rating_set_count += 1;
 515+ }
 516+
 517+ public function calculateRatingAverages() {
 518+ // determine averages for each rating type
 519+ foreach( $this->ratings as $rating_id => $rating ) {
 520+ $rating_sum = array_sum( $rating );
 521+ $rating_avg = $rating_sum / count( $rating );
 522+ $this->rating_averages[ $rating_id ] = $rating_avg;
 523+ }
 524+
 525+ // determine overall rating average for this page
 526+ if ( count( $this->rating_averages )) {
 527+ $overall_rating_sum = array_sum( $this->rating_averages );
 528+ $overall_rating_average = $overall_rating_sum / count( $this->rating_averages );
 529+ } else {
 530+ $overall_rating_average = 0;
 531+ }
 532+ $this->overall_average = $overall_rating_average;
 533+ }
 534+
 535+ /**
 536+ * Returns whether or not this page is considered problematic
 537+ * @return bool
 538+ */
 539+ public function isProblematic() {
 540+ if ( !isset( $this->problematic )) {
 541+ $this->determineProblematicStatus();
 542+ }
 543+ return $this->probematic;
 544+ }
 545+
 546+ /**
 547+ * Determine whether this article is 'problematic'
 548+ *
 549+ * If a page has one more rating categories where 70% of the ratings are
 550+ * <= 2, it is considered problematic.
 551+ */
 552+ public function determineProblematicStatus() {
 553+ foreach( $this->ratings as $rating_id => $ratings ) {
 554+ $count = 0;
 555+ foreach ( $ratings as $rating ) {
 556+ if ( $rating <= 2 ) {
 557+ $count += 1;
 558+ }
 559+ }
 560+
 561+ $threshold = round( 0.7 * count( $ratings ));
 562+ if ( $count >= $threshold ) {
 563+ $this->problematic = true;
 564+ return;
 565+ }
 566+ }
 567+
 568+ $this->problematic = false;
 569+ return;
 570+ }
 571+}
 572+
 573+/**
 574+ * A storage class to keep track of PageRatings object by page
 575+ *
 576+ * Iterable on array of pages.
 577+ */
 578+class Pages implements IteratorAggregate {
 579+ /**
 580+ * An array of page rating objects
 581+ * @var array
 582+ */
 583+ public $pages = array();
 584+
 585+ public function getPage( $page_id ) {
 586+ if ( !isset( $this->pages[ $page_id ] )) {
 587+ $this->addPage( $page_id );
 588+ }
 589+ return $this->pages[ $page_id ];
 590+ }
 591+
 592+ public function addPage( $page_id ) {
 593+ $this->pages[ $page_id ] = new Page( $page_id );
 594+ }
 595+
 596+ public function getIterator() {
 597+ return new ArrayIterator( $this->pages );
 598+ }
 599+}
 600+
222601 $maintClass = "PopulateAFStatistics";
223602 require_once( DO_MAINTENANCE );
Index: trunk/extensions/ArticleFeedback/ArticleFeedback.php
@@ -11,7 +11,7 @@
1212 */
1313
1414 /* XXX: Survey setup */
15 -require_once( dirname( dirname( __FILE__ ) ) . '/SimpleSurvey/SimpleSurvey.php' );
 15+require_once( $IP . '/extensions/SimpleSurvey/SimpleSurvey.php' );
1616
1717 /* Configuration */
1818

Follow-up revisions

RevisionCommit summaryAuthorDate
r89281Follow up r89277, fixing a change that should've been local to me onlyawjrichards19:09, 1 June 2011
r89366Reverting r89281, and and part of r89277, restoring the path to SimpleSurvey ...tparscal21:10, 2 June 2011
r89493Fix filepath (typo), crashes updater (follow-up r89277)krinkle10:58, 5 June 2011
r89494Fix syntax in .sql (spaces), crashed updater (follow-up r89277)krinkle10:59, 5 June 2011
r89760Fix some typos in r89277catrope08:32, 9 June 2011
r89762Followup r89277, r89494: another space breaking stuffcatrope09:16, 9 June 2011
r89764Fix r89277, there were tons of things wrong with it:...catrope10:14, 9 June 2011

Comments

#Comment by Trevor Parscal (WMF) (talk | contribs)   21:11, 2 June 2011

The modifications to trunk/extensions/ArticleFeedback/ArticleFeedback.php were reverted in r89366.

#Comment by Krinkle (talk | contribs)   10:53, 5 June 2011

Marking FIXME. The widget on-wiki says "An error has occured. Please try again later." (since the API returns error:internal_api_error_DBQueryError due to the missing tables).

Upon running update.php I get the below output

KrinkleMac:maintenance krinklemac$ php update.php
[snip]
Creating click_tracking table...ok
Creating click_tracking_events table...ok
Creating click_tracking_user_properties table...ok
...click_tracking_action_time key already set on click_tracking table.
...have additional_info field in click_tracking table.
Creating email_capture table...ok
Creating prefswitch_survey table...ok
Creating article_feedback table...ok
...have aa_design_bucket field in article_feedback table.
...have afp_value_text field in article_feedback_properties table.
...article_feedback_properties table already exists.
...article_feedback_revisions table already exists.
Creating article_feedback_stats_types table...Could not open "/Applications/MAMP/htdocs/SVN/mediawiki/trunk/extensions/ArticleFeedback/sql/AddArticleFeedbackStatsTypesTable.sql".

Backtrace:
#0 /Applications/MAMP/htdocs/SVN/mediawiki/trunk/phase3/includes/installer/DatabaseUpdater.php(386): DatabaseBase->sourceFile('/Applications/M...')
#1 /Applications/MAMP/htdocs/SVN/mediawiki/trunk/phase3/includes/installer/DatabaseUpdater.php(403): DatabaseUpdater->applyPatch('/Applications/M...', true)
#2 [internal function]: DatabaseUpdater->addTable('article_feedbac...', '/Applications/M...', true)
#3 /Applications/MAMP/htdocs/SVN/mediawiki/trunk/phase3/includes/installer/DatabaseUpdater.php(261): call_user_func_array(Array, Array)
#4 /Applications/MAMP/htdocs/SVN/mediawiki/trunk/phase3/includes/installer/DatabaseUpdater.php(229): DatabaseUpdater->runUpdates(Array, true)
#5 /Applications/MAMP/htdocs/SVN/mediawiki/trunk/phase3/maintenance/update.php(112): DatabaseUpdater->doUpdates(Array)
#6 /Applications/MAMP/htdocs/SVN/mediawiki/trunk/phase3/maintenance/doMaintenance.php(106): UpdateMediaWiki->execute()
#7 /Applications/MAMP/htdocs/SVN/mediawiki/trunk/phase3/maintenance/update.php(138): require_once('/Applications/M...')
#8 {main}

Something went wrong ?

#Comment by Krinkle (talk | contribs)   10:58, 5 June 2011

Ah, there's a little typo in the filename. AddArticleFeedbackStatsTypesTable.sql vs. AddArticleFeedbackStatsTypesTable.sql.

After fixing that (r89493) though, there's still issues:

update.php

[snip]


...article_feedback_properties table already exists.
...article_feedback_revisions table already exists.
Creating article_feedback_stats_types table...
Notice: Uninitialized string offset: 0 in /Applications/MAMP/htdocs/SVN/mediawiki/trunk/phase3/includes/db/DatabaseMysql.php on line 333

Notice: Uninitialized string offset: 0 in /Applications/MAMP/htdocs/SVN/mediawiki/trunk/phase3/includes/db/DatabaseMysql.php on line 333
A database query syntax error has occurred.
The last attempted database query was:
"CREATE TABLE IF NOT EXISTS `` article_feedback_stats_types (
 afst_id integer unsigned NOT NULL PRIMARY KEY AUTO_INCREMENT,
 afst_type varbinary(255) NOT NULL
 ) ENGINE=InnoDB, DEFAULT CHARSET=binary
"
from within function "DatabaseBase::sourceFile( /Applications/MAMP/htdocs/SVN/mediawiki/trunk/extensions/ArticleFeedback/sql/AddArticleFeedbackStatsTypeTable.sql )".
Database returned error "1103: Incorrect table name '' (localhost)"
Backtrace:
#0 /Applications/MAMP/htdocs/SVN/mediawiki/trunk/phase3/includes/db/Database.php(769): DatabaseBase->reportQueryError('Incorrect table...', 1103, 'CREATE TABLE IF...', 'DatabaseBase::s...', false)
#1 /Applications/MAMP/htdocs/SVN/mediawiki/trunk/phase3/includes/db/Database.php(2680): DatabaseBase->query('CREATE TABLE IF...', 'DatabaseBase::s...')
#2 /Applications/MAMP/htdocs/SVN/mediawiki/trunk/phase3/includes/db/Database.php(2583): DatabaseBase->sourceStream(Resource id #281, false, false, 'DatabaseBase::s...')
#3 /Applications/MAMP/htdocs/SVN/mediawiki/trunk/phase3/includes/installer/DatabaseUpdater.php(386): DatabaseBase->sourceFile('/Applications/M...')
#4 /Applications/MAMP/htdocs/SVN/mediawiki/trunk/phase3/includes/installer/DatabaseUpdater.php(403): DatabaseUpdater->applyPatch('/Applications/M...', true)
#5 [internal function]: DatabaseUpdater->addTable('article_feedbac...', '/Applications/M...', true)
#6 /Applications/MAMP/htdocs/SVN/mediawiki/trunk/phase3/includes/installer/DatabaseUpdater.php(261): call_user_func_array(Array, Array)
#7 /Applications/MAMP/htdocs/SVN/mediawiki/trunk/phase3/includes/installer/DatabaseUpdater.php(229): DatabaseUpdater->runUpdates(Array, true)
#8 /Applications/MAMP/htdocs/SVN/mediawiki/trunk/phase3/maintenance/update.php(112): DatabaseUpdater->doUpdates(Array)
#9 /Applications/MAMP/htdocs/SVN/mediawiki/trunk/phase3/maintenance/doMaintenance.php(106): UpdateMediaWiki->execute()
#10 /Applications/MAMP/htdocs/SVN/mediawiki/trunk/phase3/maintenance/update.php(138): require_once('/Applications/M...')
#11 {main}
#Comment by Krinkle (talk | contribs)   11:01, 5 June 2011

It was trying to create a table by an empty name (TABLE IF NOT EXISTS `` article_feedback_stats_types) due to a simple spacing problem. A similar thing was going on with the indices.

Fixed in r89494, setting status back to new.

#Comment by Catrope (talk | contribs)   21:43, 8 June 2011

There's quite a bit wrong with this rev. I'll fix it because Arthur is in fundraising land now.

Krinkle reports:

Notice: Undefined variable: cur_ts in /htdocs/SVN/mediawiki/trunk/extensions/ArticleFeedback/populateAFStatistics.php on line 213
Notice: Undefined property: Page::$probematic in /htdocs/SVN/mediawiki/trunk/extensions/ArticleFeedback/populateAFStatistics.php on line 543
+CREATE UNIQUE INDEX /*i*/ afs_page_ts_type ON /*_*/ article_feedback_stats( afs_page_id, afs_ts, afs_stats_type_id );
+CREATE INDEX /*i*/ afs_ts_avg_overall ON /*_*/article_feedback_stats (afs_ts, afs_orderable_data);

These indexes are insufficient. Making a note for myself to add proper indexing.

+				if ( !$db->tableExists( 'article_feedback_stats_type' )) {

Typo in table name

+				array_push( $problems, $page->page_id );

Per the PHP docs, just use $problems[] = $page->page_id;

Problem articles data is built but never inserted.

Re-fetches data it just inserted, and does so from the slave, which is guaranteed to fail in a replicated environment.

+			array( 'aa_timestamp >= ' . $this->dbr->addQuotes( $ts )),

Needs $this->dbr->timestamp() too.

#Comment by Awjrichards (talk | contribs)   21:54, 8 June 2011

Thanks for catching these and taking care of them - I never had the chance to fully test before I had to hand this stuff over :(

Status & tagging log