Index: trunk/extensions/ArticleFeedbackv5/populateAFStatistics.php |
— | — | @@ -1,613 +0,0 @@ |
2 | | -<?php |
3 | | - |
4 | | -$IP = getenv( 'MW_INSTALL_PATH' ); |
5 | | -if ( $IP === false ) { |
6 | | - $IP = dirname( __FILE__ ) . '/../..'; |
7 | | -} |
8 | | -require( "$IP/maintenance/Maintenance.php" ); |
9 | | - |
10 | | -class PopulateAFStatistics extends Maintenance { |
11 | | - /** |
12 | | - * The number of records to attempt to insert at any given time. |
13 | | - * @var int |
14 | | - */ |
15 | | - public $insert_batch_size = 100; |
16 | | - |
17 | | - /** |
18 | | - * The period (in seconds) before now for which to gather stats |
19 | | - * @var int |
20 | | - */ |
21 | | - public $polling_period = 86400; |
22 | | - |
23 | | - /** |
24 | | - * The formatted timestamp from which to determine stats |
25 | | - * @var int |
26 | | - */ |
27 | | - protected $lowerBoundTimestamp; |
28 | | - |
29 | | - /** |
30 | | - * DB slave |
31 | | - * @var object |
32 | | - */ |
33 | | - protected $dbr; |
34 | | - |
35 | | - /** |
36 | | - * DB master |
37 | | - * @var object |
38 | | - */ |
39 | | - protected $dbw; |
40 | | - |
41 | | - /** |
42 | | - * Valid operations and their execution methods for this script to perform |
43 | | - * |
44 | | - * Operations are passed in as options during run-time - only valid options, |
45 | | - * which are defined here, can be executed. Valid operations are mapped here |
46 | | - * to a corresponding method ( array( 'operation' => 'method' )) |
47 | | - * @var array |
48 | | - */ |
49 | | - protected $operation_map = array( |
50 | | - 'highslows' => 'populateHighsLows', |
51 | | - 'problems' => 'populateProblems', |
52 | | - ); |
53 | | - |
54 | | - /** |
55 | | - * Operations to execute |
56 | | - * @var array |
57 | | - */ |
58 | | - public $operations = array(); |
59 | | - |
60 | | - /** |
61 | | - * The minimum number of rating sets required before taking some action |
62 | | - * @var int |
63 | | - */ |
64 | | - public $rating_set_threshold = 10; |
65 | | - |
66 | | - public function __construct() { |
67 | | - parent::__construct(); |
68 | | - $this->mDescription = "Populates the article feedback stats tables"; |
69 | | - |
70 | | - $this->addOption( 'op', 'The ArticleFeedback stats gathering operation to run (eg "highslows"). Can specify multiple operations, separated by comma.', true, true ); |
71 | | - $this->addOption( 'rating_sets', 'The minimum number of rating sets before taking an action.', false, true ); |
72 | | - $this->addOption( 'poll_period', 'The polling period for fetching data, in seconds.', false, true ); |
73 | | - } |
74 | | - |
75 | | - public function syncDBs() { |
76 | | - // FIXME: Copied from populateAFRevisions.php, which coppied from updateCollation.php, should be centralized somewhere |
77 | | - $lb = wfGetLB(); |
78 | | - // bug 27975 - Don't try to wait for slaves if there are none |
79 | | - // Prevents permission error when getting master position |
80 | | - if ( $lb->getServerCount() > 1 ) { |
81 | | - $dbw = $lb->getConnection( DB_MASTER ); |
82 | | - $pos = $dbw->getMasterPos(); |
83 | | - $lb->waitForAll( $pos ); |
84 | | - } |
85 | | - } |
86 | | - |
87 | | - /** |
88 | | - * Bootstrap this maintenance script |
89 | | - * |
90 | | - * Performs operations necessary for this maintenance script to run which |
91 | | - * cannot or do not make sense to run in the constructor. |
92 | | - */ |
93 | | - public function bootstrap() { |
94 | | - /** |
95 | | - * Set user-specified operations to perform |
96 | | - */ |
97 | | - $operations = explode( ',', $this->getOption( 'op' )); |
98 | | - // check sanity of specified operations |
99 | | - if ( !$this->checkOperations( $operations )) { |
100 | | - $this->error( 'Invalid operation specified.', true ); |
101 | | - } else { |
102 | | - $this->operations = $operations; |
103 | | - } |
104 | | - |
105 | | - /** |
106 | | - * Set user-specified rating set threshold |
107 | | - */ |
108 | | - $rating_set_threshold = $this->getOption( 'rating_sets', $this->rating_set_threshold ); |
109 | | - if ( !is_numeric( $rating_set_threshold )) { |
110 | | - $this->error( 'Rating sets must be numeric.', true ); |
111 | | - } else { |
112 | | - $this->rating_set_threshold = $rating_set_threshold; |
113 | | - } |
114 | | - |
115 | | - /** |
116 | | - * Set user-specified polling period |
117 | | - */ |
118 | | - $polling_period = $this->getOption( 'poll_period', $this->polling_period ); |
119 | | - if ( !is_numeric( $polling_period )) { |
120 | | - $this->error( 'Poll period must be numeric.', true ); |
121 | | - } else { |
122 | | - $this->polling_period = $polling_period; |
123 | | - } |
124 | | - |
125 | | - // set db objects |
126 | | - $this->dbr = wfGetDB( DB_SLAVE ); |
127 | | - $this->dbw = wfGetDB( DB_MASTER ); |
128 | | - } |
129 | | - |
130 | | - /** |
131 | | - * Check whether or not specified operations are valid. |
132 | | - * |
133 | | - * A specified operation is considered valid if it exists |
134 | | - * as a key in the operation map. |
135 | | - * |
136 | | - * @param array $ops An array of operations to check |
137 | | - * @return bool |
138 | | - */ |
139 | | - public function checkOperations( array $ops ) { |
140 | | - foreach ( $ops as $operation ) { |
141 | | - if ( !isset( $this->operation_map[ $operation ] )) { |
142 | | - return false; |
143 | | - } |
144 | | - } |
145 | | - return true; |
146 | | - } |
147 | | - |
148 | | - public function execute() { |
149 | | - // finish bootstrapping the script |
150 | | - $this->bootstrap(); |
151 | | - |
152 | | - // execute requested operations |
153 | | - foreach ( $this->operations as $operation ) { |
154 | | - $method = $this->operation_map[ $operation ]; |
155 | | - $this->$method(); |
156 | | - } |
157 | | - } |
158 | | - |
159 | | - public function populateProblems() { |
160 | | - global $wgMemc; |
161 | | - |
162 | | - /** |
163 | | - * Chck to see if we already have a collection of pages to operate on. |
164 | | - * If not, generate the collection of pages and their associated ratings. |
165 | | - */ |
166 | | - if ( !isset( $this->pages )) { |
167 | | - $ts = $this->getLowerBoundTimestamp(); |
168 | | - $this->pages = $this->populatePageRatingsSince( $ts ); |
169 | | - } |
170 | | - $problems = array(); |
171 | | - // iterate through pages, look for pages that meet criteria for problem articles |
172 | | - $this->output( "Finding problem articles ...\n" ); |
173 | | - foreach ( $this->pages as $page ) { |
174 | | - // make sure that we have more rating sets than the req'd threshold for this page in order to qualify for calculating |
175 | | - if ( $page->rating_set_count < $this->rating_set_threshold ) { |
176 | | - continue; |
177 | | - } |
178 | | - |
179 | | - if ( $page->isProblematic() ) { |
180 | | - $problems[] = $page->page_id; |
181 | | - } |
182 | | - } |
183 | | - |
184 | | - // populate stats table with problem articles & associated data |
185 | | - // fetch stats type id - add stat type if it's non-existent |
186 | | - $stats_type_id = SpecialArticleFeedbackv5::getStatsTypeId( 'problems' ); |
187 | | - if ( !$stats_type_id ) { |
188 | | - $stats_type_id = $this->addStatType( 'problems' ); |
189 | | - } |
190 | | - |
191 | | - $rows = array(); |
192 | | - $cur_ts = $this->dbw->timestamp(); |
193 | | - $count = 0; |
194 | | - foreach( $problems as $page_id ) { |
195 | | - $page = $this->pages->getPage( $page_id ); |
196 | | - // calculate the rating averages if they haven't already been calculated |
197 | | - if ( !count( $page->rating_averages )) { |
198 | | - $page->calculateRatingAverages(); |
199 | | - } |
200 | | - $rows[] = array( |
201 | | - 'afs_page_id' => $page_id, |
202 | | - 'afs_orderable_data' => $page->overall_average, |
203 | | - 'afs_data' => FormatJson::encode( $page->rating_averages ), |
204 | | - 'afs_ts' => $cur_ts, |
205 | | - 'afs_stats_type_id' => $stats_type_id, |
206 | | - ); |
207 | | - |
208 | | - $count++; |
209 | | - if ( $count >= 50 ) { |
210 | | - // No more than 50 |
211 | | - // TODO: Get the 50 most problematic articles rather than 50 random problematic ones |
212 | | - break; |
213 | | - } |
214 | | - } |
215 | | - $this->output( "Done.\n" ); |
216 | | - |
217 | | - // Insert the problem rows into the database |
218 | | - $this->output( "Writing data to article_feedback_stats ...\n" ); |
219 | | - $rowsInserted = 0; |
220 | | - // $rows is gonna be modified by array_splice(), so make a copy for later use |
221 | | - $rowsCopy = $rows; |
222 | | - while( $rows ) { |
223 | | - $batch = array_splice( $rows, 0, $this->insert_batch_size ); |
224 | | - $this->dbw->insert( |
225 | | - 'article_feedback_stats', |
226 | | - $batch, |
227 | | - __METHOD__ |
228 | | - ); |
229 | | - $rowsInserted += count( $batch ); |
230 | | - $this->syncDBs(); |
231 | | - $this->output( "Inserted " . $rowsInserted . " rows\n" ); |
232 | | - } |
233 | | - $this->output( "Done.\n" ); |
234 | | - |
235 | | - // populate cache with current problem articles |
236 | | - $this->output( "Caching latest problems (if cache present).\n" ); |
237 | | - // grab the article feedback special page so we can reuse the data structure building code |
238 | | - // FIXME this logic should not be in the special page class |
239 | | - $problems = SpecialArticleFeedbackv5::buildProblems( $rowsCopy ); |
240 | | - // stash the data structure in the cache |
241 | | - $key = wfMemcKey( 'article_feedback_stats_problems' ); |
242 | | - $wgMemc->set( $key, $problems, 86400 ); |
243 | | - $this->output( "Done.\n" ); |
244 | | - } |
245 | | - |
246 | | - /** |
247 | | - * Populate stats about highest/lowest rated articles |
248 | | - */ |
249 | | - public function populateHighsLows() { |
250 | | - global $wgMemc; |
251 | | - |
252 | | - $averages = array(); // store overall averages for a given page |
253 | | - |
254 | | - /** |
255 | | - * Chck to see if we already have a collection of pages to operate on. |
256 | | - * If not, generate the collection of pages and their associated ratings. |
257 | | - */ |
258 | | - if ( !isset( $this->pages )) { |
259 | | - $ts = $this->getLowerBoundTimestamp(); |
260 | | - $this->pages = $this->populatePageRatingsSince( $ts ); |
261 | | - } |
262 | | - |
263 | | - // determine the average ratings for a given page |
264 | | - $this->output( "Determining average ratings for articles ...\n" ); |
265 | | - foreach ( $this->pages as $page ) { |
266 | | - // make sure that we have more rating sets than the req'd threshold for this page in order to qualify for ranking |
267 | | - if ( $page->rating_set_count < $this->rating_set_threshold ) { |
268 | | - continue; |
269 | | - } |
270 | | - |
271 | | - // calculate the rating averages if they haven't already been calculated |
272 | | - if ( !count( $page->rating_averages )) { |
273 | | - $page->calculateRatingAverages(); |
274 | | - } |
275 | | - |
276 | | - // store overall average rating seperately so we can easily sort |
277 | | - $averages[ $page->page_id ] = $page->overall_average; |
278 | | - } |
279 | | - $this->output( "Done.\n" ); |
280 | | - |
281 | | - // determine highest 50 and lowest 50 |
282 | | - $this->output( "Determining 50 highest and 50 lowest rated articles...\n" ); |
283 | | - asort( $averages ); |
284 | | - // take lowest 50 and highest 50 |
285 | | - $highest_and_lowest_page_ids = array_slice( $averages, 0, 50, true ); |
286 | | - if ( count( $averages ) > 50 ) { |
287 | | - // in the event that we have < 100 $averages total, this will still |
288 | | - // work nicely - it will select duplicate averages, but the += |
289 | | - // will cause items with the same keys to essentially be ignored |
290 | | - $highest_and_lowest_page_ids += array_slice( $averages, -50, 50, true ); |
291 | | - } |
292 | | - $this->output( "Done\n" ); |
293 | | - |
294 | | - // fetch stats type id - add stat type if it's non-existant |
295 | | - $stats_type_id = SpecialArticleFeedbackv5::getStatsTypeId( 'highs_and_lows' ); |
296 | | - if ( !$stats_type_id ) { |
297 | | - $stats_type_id = $this->addStatType( 'highs_and_lows' ); |
298 | | - } |
299 | | - |
300 | | - // prepare data for insert into db |
301 | | - $this->output( "Preparing data for db insertion ...\n"); |
302 | | - $cur_ts = $this->dbw->timestamp(); |
303 | | - $rows = array(); |
304 | | - foreach( $highest_and_lowest_page_ids as $page_id => $overall_average ) { |
305 | | - $page = $this->pages->getPage( $page_id ); |
306 | | - $rows[] = array( |
307 | | - 'afs_page_id' => $page_id, |
308 | | - 'afs_orderable_data' => $page->overall_average, |
309 | | - 'afs_data' => FormatJson::encode( $page->rating_averages ), |
310 | | - 'afs_ts' => $cur_ts, |
311 | | - 'afs_stats_type_id' => $stats_type_id, |
312 | | - ); |
313 | | - } |
314 | | - $this->output( "Done.\n" ); |
315 | | - |
316 | | - // insert data to db |
317 | | - $this->output( "Writing data to article_feedback_stats ...\n" ); |
318 | | - $rowsInserted = 0; |
319 | | - // $rows is gonna be modified by array_splice(), so make a copy for later use |
320 | | - $rowsCopy = $rows; |
321 | | - while( $rows ) { |
322 | | - $batch = array_splice( $rows, 0, $this->insert_batch_size ); |
323 | | - $this->dbw->insert( |
324 | | - 'article_feedback_stats', |
325 | | - $batch, |
326 | | - __METHOD__ |
327 | | - ); |
328 | | - $rowsInserted += count( $batch ); |
329 | | - $this->syncDBs(); |
330 | | - $this->output( "Inserted " . $rowsInserted . " rows\n" ); |
331 | | - } |
332 | | - $this->output( "Done.\n" ); |
333 | | - |
334 | | - // loading data into cache |
335 | | - $this->output( "Caching latest highs/lows (if cache present).\n" ); |
336 | | - $key = wfMemcKey( 'article_feedback_stats_highs_lows' ); |
337 | | - // grab the article feedback special page so we can reuse the data structure building code |
338 | | - // FIXME this logic should not be in the special page class |
339 | | - $highs_lows = SpecialArticleFeedbackv5::buildHighsAndLows( $rowsCopy ); |
340 | | - // stash the data structure in the cache |
341 | | - $wgMemc->set( $key, $highs_lows, 86400 ); |
342 | | - $this->output( "Done\n" ); |
343 | | - } |
344 | | - |
345 | | - /** |
346 | | - * Fetch ratings newer than a given time stamp. |
347 | | - * |
348 | | - * If no timestamp is provided, relies on $this->lowerBoundTimestamp |
349 | | - * @param numeric $ts |
350 | | - * @return database result object |
351 | | - */ |
352 | | - public function fetchRatingsNewerThanTs( $ts=null ) { |
353 | | - if ( !$ts ) { |
354 | | - $ts = $this->getLowerBoundTimestamp(); |
355 | | - } |
356 | | - |
357 | | - if ( !is_numeric( $ts )) { |
358 | | - throw new InvalidArgumentException( 'Timestamp expected to be numeric.' ); |
359 | | - } |
360 | | - |
361 | | - $res = $this->dbr->select( |
362 | | - 'article_feedback', |
363 | | - array( |
364 | | - 'aa_revision', |
365 | | - 'aa_user_text', |
366 | | - 'aa_rating_id', |
367 | | - 'aa_user_anon_token', |
368 | | - 'aa_page_id', |
369 | | - 'aa_rating_value', |
370 | | - ), |
371 | | - array( 'aa_timestamp >= ' . $this->dbr->addQuotes( $this->dbr->timestamp( $ts ) ) ), |
372 | | - __METHOD__, |
373 | | - array() |
374 | | - ); |
375 | | - |
376 | | - return $res; |
377 | | - } |
378 | | - |
379 | | - /** |
380 | | - * Construct collection of pages and their ratings since a given time stamp |
381 | | - * @param $ts |
382 | | - * @return object The colelction of pages |
383 | | - */ |
384 | | - public function populatePageRatingsSince( $ts ) { |
385 | | - $pages = new AFPages(); |
386 | | - // fetch the ratings since the lower bound timestamp |
387 | | - $this->output( 'Fetching page ratings between now and ' . date( 'Y-m-d H:i:s', strtotime( $ts )) . "...\n" ); |
388 | | - $res = $this->fetchRatingsNewerThanTs( $ts ); |
389 | | - $this->output( "Done.\n" ); |
390 | | - |
391 | | - // assign the rating data to our data structure |
392 | | - $this->output( "Assigning fetched ratings to internal data structure ...\n" ); |
393 | | - foreach ( $res as $row ) { |
394 | | - // fetch the page from the page store referentially so we can |
395 | | - // perform actions on it that will automagically be saved in the |
396 | | - // object for easy access later |
397 | | - |
398 | | - $page =& $pages->getPage( $row->aa_page_id ); |
399 | | - |
400 | | - // determine the unique hash for a given rating set (page rev + user identifying info) |
401 | | - $rating_hash = $row->aa_revision . "|" . $row->aa_user_text . "|" . $row->aa_user_anon_token; |
402 | | - |
403 | | - // add rating data for this page |
404 | | - $page->addRating( $row->aa_rating_id, $row->aa_rating_value, $rating_hash ); |
405 | | - } |
406 | | - $this->output( "Done.\n" ); |
407 | | - return $pages; |
408 | | - } |
409 | | - |
410 | | - /** |
411 | | - * Set $this->timestamp |
412 | | - * @param int $ts |
413 | | - */ |
414 | | - public function setLowerBoundTimestamp( $ts ) { |
415 | | - if ( !is_numeric( $ts )) { |
416 | | - throw new InvalidArgumentException( 'Timestamp must be numeric.' ); |
417 | | - } |
418 | | - $this->lowerBoundTimestamp = $ts; |
419 | | - } |
420 | | - |
421 | | - |
422 | | - /** |
423 | | - * Get $this->lowerBoundTimestamp |
424 | | - * |
425 | | - * If it hasn't been set yet, set it based on the defined polling period. |
426 | | - * |
427 | | - * @return int |
428 | | - */ |
429 | | - public function getLowerBoundTimestamp() { |
430 | | - if ( !$this->lowerBoundTimestamp ) { |
431 | | - $timestamp = $this->dbw->timestamp( strtotime( $this->polling_period . ' seconds ago' )); |
432 | | - $this->setLowerBoundTimestamp( $timestamp ); |
433 | | - } |
434 | | - return $this->lowerBoundTimestamp; |
435 | | - } |
436 | | - |
437 | | - /** |
438 | | - * Add stat type record to article_feedbak_stats_types |
439 | | - * @param string $stat_type The identifying name of the stat type (eg 'highs_lows') |
440 | | - */ |
441 | | - public function addStatType( $stat_type ) { |
442 | | - $this->dbw->insert( |
443 | | - 'article_feedback_stats', |
444 | | - array( 'afst_type' => $stat_type ), |
445 | | - __METHOD__ |
446 | | - ); |
447 | | - return $this->dbw->insertId(); |
448 | | - } |
449 | | -} |
450 | | - |
451 | | -/** |
452 | | - * A class to represent a page and data about its ratings |
453 | | - */ |
454 | | -class AFPage { |
455 | | - public $page_id; |
456 | | - |
457 | | - /** |
458 | | - * The number of rating sets recorded for this page |
459 | | - * @var int |
460 | | - */ |
461 | | - public $rating_set_count = 0; |
462 | | - |
463 | | - /** |
464 | | - * An array of ratings for this page |
465 | | - * @var array |
466 | | - */ |
467 | | - public $ratings = array(); |
468 | | - |
469 | | - /** |
470 | | - * An array to hold mean ratings by rating type id |
471 | | - * @var array |
472 | | - */ |
473 | | - public $rating_averages = array(); |
474 | | - |
475 | | - /** |
476 | | - * Mean of all ratings for this page |
477 | | - * @var float |
478 | | - */ |
479 | | - public $overall_average; |
480 | | - |
481 | | - /** |
482 | | - * An array of rating set hashes, which are used to identify unique sets of |
483 | | - * ratings |
484 | | - * @var array |
485 | | - */ |
486 | | - protected $rating_set_hashes = array(); |
487 | | - |
488 | | - public function __construct( $page_id ) { |
489 | | - if ( !is_numeric( $page_id )) { |
490 | | - throw new Exception( 'Page id must be numeric.' ); |
491 | | - } |
492 | | - $this->page_id = $page_id; |
493 | | - } |
494 | | - |
495 | | - /** |
496 | | - * Add a new rating for this particular page |
497 | | - * @param int $rating_id |
498 | | - * @param int $rating_value |
499 | | - * @param string $rating_set_hash |
500 | | - */ |
501 | | - public function addRating( $rating_id, $rating_value, $rating_set_hash = null ) { |
502 | | - if ( intval( $rating_value ) == 0 ) { |
503 | | - // Ignore zero ratings |
504 | | - return; |
505 | | - } |
506 | | - |
507 | | - $this->ratings[ $rating_id ][] = $rating_value; |
508 | | - |
509 | | - if ( $rating_set_hash ) { |
510 | | - $this->trackRatingSet( $rating_set_hash ); |
511 | | - } |
512 | | - } |
513 | | - |
514 | | - /** |
515 | | - * Keep track of rating sets |
516 | | - * |
517 | | - * Record when we see a new rating set and increment the set count |
518 | | - * @param string $rating_set_hash |
519 | | - */ |
520 | | - protected function trackRatingSet( $rating_set_hash ) { |
521 | | - if ( isset( $this->rating_set_hashes[ $rating_set_hash ] )) { |
522 | | - return; |
523 | | - } |
524 | | - |
525 | | - $this->rating_set_hashes[ $rating_set_hash ] = 1; |
526 | | - $this->rating_set_count += 1; |
527 | | - } |
528 | | - |
529 | | - public function calculateRatingAverages() { |
530 | | - // determine averages for each rating type |
531 | | - foreach( $this->ratings as $rating_id => $rating ) { |
532 | | - $rating_sum = array_sum( $rating ); |
533 | | - $rating_avg = $rating_sum / count( $rating ); |
534 | | - $this->rating_averages[ $rating_id ] = $rating_avg; |
535 | | - } |
536 | | - |
537 | | - // determine overall rating average for this page |
538 | | - if ( count( $this->rating_averages )) { |
539 | | - $overall_rating_sum = array_sum( $this->rating_averages ); |
540 | | - $overall_rating_average = $overall_rating_sum / count( $this->rating_averages ); |
541 | | - } else { |
542 | | - $overall_rating_average = 0; |
543 | | - } |
544 | | - $this->overall_average = $overall_rating_average; |
545 | | - } |
546 | | - |
547 | | - /** |
548 | | - * Returns whether or not this page is considered problematic |
549 | | - * @return bool |
550 | | - */ |
551 | | - public function isProblematic() { |
552 | | - if ( !isset( $this->problematic )) { |
553 | | - $this->determineProblematicStatus(); |
554 | | - } |
555 | | - return $this->problematic; |
556 | | - } |
557 | | - |
558 | | - /** |
559 | | - * Determine whether this article is 'problematic' |
560 | | - * |
561 | | - * If a page has one or more rating categories where 70% of the ratings are |
562 | | - * <= 2, it is considered problematic. |
563 | | - */ |
564 | | - public function determineProblematicStatus() { |
565 | | - foreach( $this->ratings as $rating_id => $ratings ) { |
566 | | - $count = 0; |
567 | | - foreach ( $ratings as $rating ) { |
568 | | - if ( $rating <= 2 ) { |
569 | | - $count += 1; |
570 | | - } |
571 | | - } |
572 | | - |
573 | | - $threshold = round( 0.7 * count( $ratings )); |
574 | | - if ( $count >= $threshold ) { |
575 | | - $this->problematic = true; |
576 | | - return; |
577 | | - } |
578 | | - } |
579 | | - |
580 | | - $this->problematic = false; |
581 | | - return; |
582 | | - } |
583 | | -} |
584 | | - |
585 | | -/** |
586 | | - * A storage class to keep track of PageRatings object by page |
587 | | - * |
588 | | - * Iterable on array of pages. |
589 | | - */ |
590 | | -class AFPages implements IteratorAggregate { |
591 | | - /** |
592 | | - * An array of page rating objects |
593 | | - * @var array |
594 | | - */ |
595 | | - public $pages = array(); |
596 | | - |
597 | | - public function &getPage( $page_id ) { |
598 | | - if ( !isset( $this->pages[ $page_id ] )) { |
599 | | - $this->addPage( $page_id ); |
600 | | - } |
601 | | - return $this->pages[ $page_id ]; |
602 | | - } |
603 | | - |
604 | | - public function addPage( $page_id ) { |
605 | | - $this->pages[ $page_id ] = new AFPage( $page_id ); |
606 | | - } |
607 | | - |
608 | | - public function getIterator() { |
609 | | - return new ArrayIterator( $this->pages ); |
610 | | - } |
611 | | -} |
612 | | - |
613 | | -$maintClass = "PopulateAFStatistics"; |
614 | | -require_once( DO_MAINTENANCE ); |
Index: trunk/extensions/ArticleFeedbackv5/populateAFRevisions.php |
— | — | @@ -1,144 +0,0 @@ |
2 | | -<?php |
3 | | - |
4 | | -$IP = getenv( 'MW_INSTALL_PATH' ); |
5 | | -if ( $IP === false ) { |
6 | | - $IP = dirname( __FILE__ ) . '/../..'; |
7 | | -} |
8 | | -require( "$IP/maintenance/Maintenance.php" ); |
9 | | - |
10 | | -class PopulateAFRevisions extends Maintenance { |
11 | | - const REPORTING_INTERVAL = 100; |
12 | | - const BATCH_SIZE = 100; |
13 | | - |
14 | | - public function __construct() { |
15 | | - parent::__construct(); |
16 | | - $this->mDescription = "Populates the article_feedback_revisions table"; |
17 | | - } |
18 | | - |
19 | | - public function syncDBs() { |
20 | | - // FIXME: Copied from updateCollation.php, should be centralized somewhere |
21 | | - $lb = wfGetLB(); |
22 | | - // bug 27975 - Don't try to wait for slaves if there are none |
23 | | - // Prevents permission error when getting master position |
24 | | - if ( $lb->getServerCount() > 1 ) { |
25 | | - $dbw = $lb->getConnection( DB_MASTER ); |
26 | | - $pos = $dbw->getMasterPos(); |
27 | | - $lb->waitForAll( $pos ); |
28 | | - } |
29 | | - } |
30 | | - |
31 | | - public function execute() { |
32 | | - global $wgArticleFeedbackRatingTypes; |
33 | | - |
34 | | - $this->output( "Populating article_feedback_revisions table ...\n" ); |
35 | | - |
36 | | - // Data structure where we accumulate the data |
37 | | - // We need this because more recent ratings of the same user to the same page |
38 | | - // need to overwrite older ratings |
39 | | - // array( pageid => array( 'userid|anontoken' => array( 'revid' => revid, 'ratings' => array( id => value ) ) ) ) |
40 | | - $data = array(); |
41 | | - |
42 | | - $lastRevID = 0; |
43 | | - $i = 0; |
44 | | - $dbw = wfGetDB( DB_MASTER ); |
45 | | - $this->output( "Reading data from article_feedback ...\n" ); |
46 | | - while ( true ) { |
47 | | - // Get the next revision ID |
48 | | - $row = $dbw->selectRow( 'article_feedback', array( 'aa_revision', 'aa_page_id' ), |
49 | | - "aa_revision > $lastRevID", __METHOD__, |
50 | | - array( 'ORDER BY' => 'aa_revision', 'LIMIT' => 1 ) |
51 | | - ); |
52 | | - if ( $row === false ) { |
53 | | - // No next revision, we're done |
54 | | - break; |
55 | | - } |
56 | | - $revid = intval( $row->aa_revision ); |
57 | | - $pageid = intval( $row->aa_page_id ); |
58 | | - |
59 | | - // Get all article_feedback rows for this revision |
60 | | - $res = $dbw->select( 'article_feedback', |
61 | | - array( 'aa_rating_id', 'aa_rating_value', 'aa_user_id', 'aa_user_anon_token' ), |
62 | | - array( 'aa_revision' => $revid ), |
63 | | - __METHOD__ |
64 | | - ); |
65 | | - |
66 | | - // Initialize counts and sums for each rating |
67 | | - // If array_keys( $wgArticleFeedbackRatingTypes ) = array( 1, 2, 3, 4 ) this initializes them |
68 | | - // to array( 1 => 0, 2 => 0, 3 => 0, 4 => 0 ) |
69 | | - $counts = $sums = array_combine( array_keys( $wgArticleFeedbackRatingTypes ), |
70 | | - array_fill( 0, count( $wgArticleFeedbackRatingTypes ), 0 ) |
71 | | - ); |
72 | | - |
73 | | - // Process each of the queried rows and update $data |
74 | | - foreach ( $res as $row ) { |
75 | | - $u = "{$row->aa_user_id}|{$row->aa_user_anon_token}"; |
76 | | - // Add entry if not present |
77 | | - if ( !isset( $data[$pageid][$u] ) ) { |
78 | | - $data[$pageid][$u] = array( 'revid' => $revid ); |
79 | | - } |
80 | | - // Update the entry if this row belongs to the same or a more recent revision |
81 | | - // for the specific user |
82 | | - if ( $data[$pageid][$u]['revid'] <= $revid ) { |
83 | | - $data[$pageid][$u]['ratings'][$row->aa_rating_id] = $row->aa_rating_value; |
84 | | - $data[$pageid][$u]['revid'] = $revid; |
85 | | - } |
86 | | - } |
87 | | - |
88 | | - $lastRevID = $revid; |
89 | | - |
90 | | - $i++; |
91 | | - if ( $i % self::REPORTING_INTERVAL ) { |
92 | | - $this->output( "$lastRevID\n" ); |
93 | | - } |
94 | | - } |
95 | | - $this->output( "done\n" ); |
96 | | - |
97 | | - // Reorganize the data into per-revision counts and totals |
98 | | - $data2 = array(); // array( revid => array( 'pageid' => pageid, 'ratings' => array( ratingid => array( 'count' => count, 'total' => total ) ) |
99 | | - foreach ( $data as $pageid => $pageData ) { |
100 | | - foreach ( $pageData as $user => $userData ) { |
101 | | - $data2[$userData['revid']]['pageid'] = $pageid; |
102 | | - foreach ( $userData['ratings'] as $id => $value ) { |
103 | | - if ( !isset( $data2[$userData['revid']]['ratings'][$id] ) ) { |
104 | | - $data2[$userData['revid']]['ratings'][$id] = array( 'count' => 0, 'total' => 0 ); |
105 | | - } |
106 | | - if ( $value > 0 ) { |
107 | | - $data2[$userData['revid']]['ratings'][$id]['count']++; |
108 | | - } |
109 | | - $data2[$userData['revid']]['ratings'][$id]['total'] += $value; |
110 | | - } |
111 | | - } |
112 | | - } |
113 | | - // Reorganize the data again, into DB rows this time |
114 | | - $rows = array(); |
115 | | - foreach ( $data2 as $revid => $revData ) { |
116 | | - foreach ( $revData['ratings'] as $ratingID => $ratingData ) { |
117 | | - $rows[] = array( |
118 | | - 'afr_page_id' => $revData['pageid'], |
119 | | - 'afr_revision' => $revid, |
120 | | - 'afr_rating_id' => $ratingID, |
121 | | - 'afr_total' => $ratingData['total'], |
122 | | - 'afr_count' => $ratingData['count'] |
123 | | - ); |
124 | | - } |
125 | | - } |
126 | | - |
127 | | - $this->output( "Writing data to article_feedback_revisions ...\n" ); |
128 | | - $rowsInserted = 0; |
129 | | - while ( $rows ) { |
130 | | - $batch = array_splice( $rows, 0, self::BATCH_SIZE ); |
131 | | - $dbw->replace( 'article_feedback_revisions', |
132 | | - array( array( 'afr_page_id', 'afr_rating_id', 'afr_revision' ) ), |
133 | | - $batch, __METHOD__ |
134 | | - ); |
135 | | - $rowsInserted += count( $batch ); |
136 | | - $this->syncDBs(); |
137 | | - $this->output( "$rowsInserted rows\n" ); |
138 | | - } |
139 | | - $this->output( "done\n" ); |
140 | | - |
141 | | - } |
142 | | -} |
143 | | - |
144 | | -$maintClass = "PopulateAFRevisions"; |
145 | | -require_once( DO_MAINTENANCE ); |
\ No newline at end of file |
Index: trunk/extensions/ArticleFeedbackv5/sql/FixPropertiesAnonTokenSchema.sql |
— | — | @@ -1 +0,0 @@ |
2 | | -ALTER TABLE /*_*/article_feedback_properties MODIFY afp_user_anon_token varbinary(32) NOT NULL DEFAULT ''; |
Index: trunk/extensions/ArticleFeedbackv5/sql/AddRevisionsTable.sql |
— | — | @@ -1,15 +0,0 @@ |
2 | | -CREATE TABLE IF NOT EXISTS /*_*/article_feedback_revisions ( |
3 | | - -- Foreign key to page.page_id |
4 | | - afr_page_id integer unsigned NOT NULL, |
5 | | - -- Revision that totals are relevant to |
6 | | - afr_revision integer unsigned NOT NULL, |
7 | | - -- Rating ID, mapped to a name in $wgArticleFeedbackRatingTypes |
8 | | - afr_rating_id integer unsigned NOT NULL, |
9 | | - -- Sum (total) of all the ratings for this article revision |
10 | | - afr_total integer unsigned NOT NULL, |
11 | | - -- Number of ratings |
12 | | - afr_count integer unsigned NOT NULL, |
13 | | - -- One rating row per page |
14 | | - PRIMARY KEY (afr_page_id, afr_rating_id, afr_revision) |
15 | | -) /*$wgDBTableOptions*/; |
Index: trunk/extensions/ArticleFeedbackv5/sql/RenameTables.sql |
— | — | @@ -1,3 +0,0 @@ |
2 | | -RENAME TABLE /*_*/article_assessment_ratings TO /*_*/article_feedback_ratings, |
3 | | - /*_*/article_assessment TO /*_*/article_feedback, |
4 | | - /*_*/article_assessment_pages TO /*_*/article_feedback_pages; |
\ No newline at end of file |
Index: trunk/extensions/ArticleFeedbackv5/sql/FixAnonTokenSchema.sql |
— | — | @@ -1 +0,0 @@ |
2 | | -ALTER TABLE /*_*/article_feedback MODIFY aa_user_anon_token varbinary(32) NOT NULL DEFAULT ''; |
Index: trunk/extensions/ArticleFeedbackv5/sql/RecreatePK.sql |
— | — | @@ -1,38 +0,0 @@ |
2 | | - |
3 | | - |
4 | | -CREATE TABLE /*_*/article_feedback2 ( |
5 | | - aa_id integer unsigned NOT NULL PRIMARY KEY AUTO_INCREMENT, |
6 | | - aa_page_id integer unsigned NOT NULL, |
7 | | - aa_user_id integer NOT NULL, |
8 | | - aa_user_text varbinary(255) NOT NULL, |
9 | | - aa_user_anon_token varbinary(32) NOT NULL DEFAULT '', |
10 | | - aa_revision integer unsigned NOT NULL, |
11 | | - aa_timestamp binary(14) NOT NULL DEFAULT '', |
12 | | - aa_rating_id int unsigned NOT NULL, |
13 | | - aa_rating_value int unsigned NOT NULL, |
14 | | - aa_design_bucket int unsigned NOT NULL DEFAULT 0 |
15 | | -) /*$wgDBTableOptions*/; |
16 | | -CREATE INDEX /*i*/aa_page_user_token_id ON /*_*/article_feedback2 (aa_page_id, aa_user_text, aa_user_anon_token, aa_id); |
17 | | -CREATE INDEX /*i*/aa_revision ON /*_*/article_feedback2 (aa_revision); |
18 | | -CREATE INDEX /*i*/article_feedback_timestamp ON /*_*/article_feedback2 (aa_timestamp); |
19 | | - |
20 | | -INSERT INTO /*_*/article_feedback2 |
21 | | - (aa_page_id, aa_user_id, aa_user_text, aa_user_anon_token, aa_revision, aa_timestamp, aa_rating_id, aa_rating_value, aa_design_bucket) |
22 | | - SELECT aa_page_id, aa_user_id, aa_user_text, aa_user_anon_token, aa_revision, aa_timestamp, aa_rating_id, aa_rating_value, aa_design_bucket |
23 | | - FROM /*_*/article_feedback |
24 | | - ORDER BY aa_revision, aa_user_text, aa_rating_id, aa_user_anon_token; |
25 | | - |
26 | | -DROP TABLE /*_*/article_feedback; |
27 | | -ALTER TABLE /*_*/article_feedback2 RENAME TO /*_*/article_feedback; |
Index: trunk/extensions/ArticleFeedbackv5/sql/AddArticleFeedbackPageIndex.sql |
— | — | @@ -1 +0,0 @@ |
2 | | -CREATE INDEX /*i*/aa_page_id ON /*_*/article_feedback (aa_page_id, aa_timestamp); |
Index: trunk/extensions/ArticleFeedbackv5/sql/AddPropertiesValueText.sql |
— | — | @@ -1,2 +0,0 @@ |
2 | | -ALTER TABLE /*_*/article_feedback_properties |
3 | | - ADD afp_value_text varbinary(255) DEFAULT '' NOT NULL; |
\ No newline at end of file |
Index: trunk/extensions/ArticleFeedbackv5/sql/AddArticleFeedbackStatsTable.sql |
— | — | @@ -1,13 +0,0 @@ |
2 | | -DROP TABLE IF EXISTS article_feedback_stats; |
3 | | -CREATE TABLE IF NOT EXISTS /*_*/article_feedback_stats ( |
4 | | - afs_page_id integer unsigned NOT NULL, |
5 | | - -- data point to be used for ordering this data |
6 | | - afs_orderable_data double unsigned NOT NULL, |
7 | | - -- json object of stat data |
8 | | - afs_data varbinary(255) NOT NULL, |
9 | | - afs_stats_type_id integer unsigned NOT NULL, |
10 | | - -- timestamp of insertion job |
11 | | - afs_ts binary(14) NOT NULL |
12 | | -) /*$wgDBTableOptions*/; |
13 | | -CREATE UNIQUE INDEX /*i*/afs_type_ts_page ON /*_*/article_feedback_stats(afs_stats_type_id, afs_ts, afs_page_id); |
14 | | -CREATE INDEX /*i*/ afs_type_ts_orderable ON /*_*/article_feedback_stats (afs_stats_type_id, afs_ts, afs_orderable_data); |
Index: trunk/extensions/ArticleFeedbackv5/sql/MigrateArticleFeedbackStatsHighsLows.sql |
— | — | @@ -1,22 +0,0 @@ |
2 | | -INSERT INTO /*_*/article_feedback_stats ( |
3 | | - afs_page_id, |
4 | | - afs_orderable_data, |
5 | | - afs_data, |
6 | | - afs_ts, |
7 | | - afs_stats_type_id |
8 | | -) |
9 | | -SELECT |
10 | | - afshl_page_id, |
11 | | - afshl_avg_overall, |
12 | | - afshl_avg_ratings, |
13 | | - afshl_ts, |
14 | | - afst_id |
15 | | -FROM |
16 | | - /*_*/article_feedback_stats_highs_lows, |
17 | | - /*_*/article_feedback_stats_types |
18 | | -WHERE |
19 | | - /*_*/article_feedback_stats_types.afst_type='highs_and_lows'; |
20 | | - |
21 | | -DROP TABLE /*_*/article_feedback_stats_highs_lows; |
Index: trunk/extensions/ArticleFeedbackv5/sql/AddArticleFeedbackStatsTypeTable.sql |
— | — | @@ -1,9 +0,0 @@ |
2 | | -CREATE TABLE IF NOT EXISTS /*_*/article_feedback_stats_types ( |
3 | | - afst_id integer unsigned NOT NULL PRIMARY KEY AUTO_INCREMENT, |
4 | | - afst_type varbinary(255) NOT NULL |
5 | | -) /*$wgDBTableOptions*/; |
6 | | -CREATE UNIQUE INDEX /*i*/afst_type ON /*_*/article_feedback_stats_types( afst_type ); |
7 | | - |
8 | | -INSERT INTO article_feedback_stats_types ( afst_type ) VALUES ( 'highs_and_lows' ); |
9 | | -INSERT INTO article_feedback_stats_types ( afst_type ) VALUES ( 'problems' ); |
\ No newline at end of file |
Index: trunk/extensions/ArticleFeedbackv5/sql/AddRatingBucket.sql |
— | — | @@ -1,2 +0,0 @@ |
2 | | -ALTER TABLE /*_*/article_feedback |
3 | | - ADD aa_design_bucket int unsigned NOT NULL DEFAULT 0; |
\ No newline at end of file |
Index: trunk/extensions/ArticleFeedbackv5/sql/AddArticleFeedbackTimestampIndex.sql |
— | — | @@ -1,2 +0,0 @@ |
2 | | -CREATE INDEX /*i*/article_feedback_timestamp ON /*_*/article_feedback (aa_timestamp); |
\ No newline at end of file |
Index: trunk/extensions/ArticleFeedbackv5/sql/AddPropertiesTable.sql |
— | — | @@ -1,15 +0,0 @@ |
2 | | -CREATE TABLE /*_*/article_feedback_properties ( |
3 | | - -- Keys to the primary key fields in article_feedback, except aa_rating_id |
4 | | - -- article_feedback doesn't have a nice PK, blegh |
5 | | - afp_revision integer unsigned NOT NULL, |
6 | | - afp_user_text varbinary(255) NOT NULL, |
7 | | - afp_user_anon_token varbinary(32) NOT NULL DEFAULT '', |
8 | | - |
9 | | - -- Key/value pairs |
10 | | - afp_key varbinary(255) NOT NULL, |
11 | | - -- Integer value |
12 | | - afp_value integer signed NOT NULL, |
13 | | - -- Text value |
14 | | - afp_value_text varbinary(255) DEFAULT '' NOT NULL |
15 | | -) /*$wgDBTableOptions*/; |
16 | | -CREATE UNIQUE INDEX /*i*/afp_rating_key ON /*_*/article_feedback_properties (afp_revision, afp_user_text, afp_user_anon_token, afp_key); |