r105601 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r105600‎ | r105601 | r105602 >
Date:22:34, 8 December 2011
Author:gregchiasson
Status:resolved (Comments)
Tags:
Comment:
Rework the way AFTv5 rollups tables work, which should alleviate performance concerns over huge group-by queries on the answers table.
Modified paths:
  • /trunk/extensions/ArticleFeedbackv5/api/ApiArticleFeedbackv5.php (modified) (history)
  • /trunk/extensions/ArticleFeedbackv5/sql/ArticleFeedbackv5.sql (modified) (history)
  • /trunk/extensions/ArticleFeedbackv5/sql/alter.sql (modified) (history)

Diff [purge]

Index: trunk/extensions/ArticleFeedbackv5/sql/ArticleFeedbackv5.sql
@@ -104,7 +104,7 @@
105105 */
106106 CREATE TABLE IF NOT EXISTS /*_*/aft_article_feedback_ratings_rollup (
107107 arr_page_id integer unsigned NOT NULL,
108 - arr_rating_id integer unsigned NOT NULL,
 108+ arr_field_id integer unsigned NOT NULL,
109109 arr_total integer unsigned NOT NULL,
110110 arr_count integer unsigned NOT NULL,
111111 PRIMARY KEY (arr_page_id, arr_rating_id)
@@ -113,7 +113,7 @@
114114 CREATE TABLE IF NOT EXISTS /*_*/aft_article_revision_feedback_ratings_rollup (
115115 afrr_page_id integer unsigned NOT NULL,
116116 afrr_revision_id integer unsigned NOT NULL,
117 - afrr_rating_id integer unsigned NOT NULL,
 117+ afrr_field_id integer unsigned NOT NULL,
118118 afrr_total integer unsigned NOT NULL,
119119 afrr_count integer unsigned NOT NULL,
120120 PRIMARY KEY (afrr_page_id, afrr_rating_id, afrr_revision_id)
@@ -122,6 +122,7 @@
123123 CREATE TABLE IF NOT EXISTS /*_*/aft_article_feedback_select_rollup (
124124 afsr_page_id integer unsigned NOT NULL,
125125 afsr_option_id integer unsigned NOT NULL,
 126+ afsr_field_id integer unsigned NOT NULL,
126127 afsr_total integer unsigned NOT NULL,
127128 afsr_count integer unsigned NOT NULL,
128129 PRIMARY KEY (afsr_page_id, afsr_option_id)
@@ -131,6 +132,7 @@
132133 arfsr_page_id integer unsigned NOT NULL,
133134 arfsr_revision_id integer unsigned NOT NULL,
134135 arfsr_option_id integer unsigned NOT NULL,
 136+ arfsr_field_id integer unsigned NOT NULL,
135137 arfsr_total integer unsigned NOT NULL,
136138 arfsr_count integer unsigned NOT NULL,
137139 PRIMARY KEY (arfsr_revision_id, arfsr_option_id)
Index: trunk/extensions/ArticleFeedbackv5/sql/alter.sql
@@ -30,3 +30,9 @@
3131 ALTER TABLE aft_article_feedback DROP COLUMN af_user_text;
3232 ALTER TABLE aft_article_feedback DROP COLUMN af_modified;
3333 ALTER TABLE aft_article_feedback MODIFY COLUMN af_created binary(14) NOT NULL DEFAULT '';
 34+
 35+-- added 12/8 (greg)
 36+ALTER TABLE aft_article_revision_feedback_select_rollup ADD COLUMN arfsr_field_id int NOT NULL;
 37+ALTER TABLE aft_article_revision_feedback_ratings_rollup CHANGE COLUMN afrr_rating_id afrr_field_id integer unsigned NOT NULL;
 38+ALTER TABLE aft_article_feedback_ratings_rollup CHANGE COLUMN arr_rating_id arr_field_id integer unsigned NOT NULL;
 39+ALTER TABLE aft_article_feedback_select_rollup ADD COLUMN afsr_field_id int NOT NULL;
Index: trunk/extensions/ArticleFeedbackv5/api/ApiArticleFeedbackv5.php
@@ -89,7 +89,7 @@
9090 }
9191
9292 $ctaId = $this->saveUserRatings( $user_answers, $feedbackId, $bucket );
93 - $this->updateRollupTables( $pageId, $revisionId );
 93+ $this->updateRollupTables( $pageId, $revisionId, $user_answers );
9494
9595 if( $params['email'] ) {
9696 $this->captureEmail ( $params['email'], json_encode(
@@ -181,12 +181,13 @@
182182 /**
183183 * Update the rollup tables
184184 *
185 - * @param $page int the page id
186 - * @param $revision int the revision id
 185+ * @param $page int the page id
 186+ * @param $revision int the revision id
 187+ * @param $data array the user's validated feedback answers
187188 */
188 - public function updateRollupTables( $page, $revision ) {
 189+ public function updateRollupTables( $page, $revision, $data ) {
189190 foreach( array( 'rating', 'boolean', 'option_id' ) as $type ) {
190 - $this->updateRollup( $page, $revision, $type );
 191+ $this->updateRollup( $page, $revision, $type, $data );
191192 }
192193 }
193194
@@ -210,130 +211,228 @@
211212 * @param $page int the page id
212213 * @param $revision int the revision id
213214 * @param $type string the type (rating, select, or boolean)
 215+ * @param $raw_data array the user's validated feedback answers
214216
215 - * Selects feedback across the last $wgArticleFeedbackv5RatingLifetime
216 - * revisions of this page, grouped by revisionId. Each revision row
217 - * is replaced, as well as the page-level one in that rollup, which is
218 - * the sum of all revision-level rollups. We don't know if the revision
219 - * window has changed, so every time we're checking for the relevent
220 - * revisions and replacing the page value, as old revisions fall out of
221 - * the window.
 217+ * This should:
 218+ * 0. Attempt to insert a blank revision rollup row for each $data of type $type, based on revId, fieldId.
 219+ * 1. Increment said revision rollup for each $data of type $type, based on revId, fieldId, and value
 220+ * 2. Re-calculate the page value, across the last [X] revisions (an old revision, or more, may have moved outside of the wgArticleFeedbackv5RatingLifetime window, so we can't just increment the page level rollups - revision-level, absolutely)
222221
223222 */
224 - private function updateRollup( $pageId, $revId, $type ) {
 223+ private function updateRollup( $pageId, $revId, $type, $raw_data ) {
225224 # sanity check
226225 if ( $type != 'rating' && $type != 'option_id' && $type != 'boolean' ) {
227226 return 0;
228227 }
 228+
 229+ // Strip out the data not of this type.
 230+ foreach ( $raw_data as $row ) {
 231+ if ( $row["aa_response_$type"] != null ) {
 232+ $this->updateRollupRow( $pageId, $revId, $type, $row );
 233+ }
 234+ }
229235
230 - global $wgArticleFeedbackv5RatingLifetime;
231 - $dbr = wfGetDB( DB_SLAVE );
232 - $dbw = wfGetDB( DB_MASTER );
233 - $limit = $this->getRevisionLimit( $pageId );
234 - $page_data = array();
235 - $rev_data = array();
236 - $rev_table = 'aft_article_revision_feedback_'
237 - . ( $type == 'option_id' ? 'select' : 'ratings' )
238 - . '_rollup';
239 - $page_table = 'aft_article_feedback_'
240 - . ( $type == 'option_id' ? 'select' : 'ratings' )
241 - . '_rollup';
 236+ }
242237
243 - if ( $type == 'option_id' ) {
244 - $page_prefix = 'afsr_';
245 - $rev_prefix = 'arfsr_';
246 - $select = array( 'aa_field_id', 'aa_response_option_id', 'COUNT(aa_response_option_id) AS earned', '0 AS submits' );
247 - $group = array( 'GROUP BY' => 'aa_response_option_id' );
248 - } else {
249 - $page_prefix = 'arr_';
250 - $rev_prefix = 'afrr_';
251 - $select = array( 'aa_field_id', "SUM(aa_response_$type) AS earned", 'COUNT(*) AS submits' );
252 - $group = array( 'GROUP BY' => 'aa_field_id' );
 238+ /**
 239+ * Update the rollup tables
 240+ *
 241+ * @param $page int the page id
 242+ * @param $revision int the revision id
 243+ * @param $type string the type (rating, select, or boolean)
 244+ * @param $row array a user's validated feedback answer
253245
254 - }
 246+ * This should:
 247+ * 0. Attempt to insert a blank revision rollup row, based on revId, fieldId.
 248+ * 1. Increment said revision rollup, based on revId, fieldId, and value
 249+ * 2. Re-calculate the page rolup value, across the last [X] revisions (an old revision, or more, may have moved outside of the wgArticleFeedbackv5RatingLifetime window, so we can't just increment the page level rollups - revision-level, absolutely)
255250
256 - $rows = $dbr->select(
257 - array(
258 - 'aft_article_answer',
259 - 'aft_article_feedback',
260 - 'aft_article_field'
261 - ),
262 - $select,
263 - array(
264 - 'afi_data_type' => $type,
265 - 'af_page_id' => $pageId,
266 - 'aa_feedback_id = af_id',
267 - 'afi_id = aa_field_id',
268 - "af_revision_id >= $limit",
269 - ),
270 - __METHOD__,
271 - $group
272 - );
 251+ */
 252+ private function updateRollupRow( $pageId, $revId, $type, $row ) {
 253+ $dbr = wfGetDB( DB_SLAVE );
 254+ $dbw = wfGetDB( DB_MASTER );
 255+ $limit = $this->getRevisionLimit( $pageId );
 256+ $field = $row['aa_field_id'];
 257+ $value = $row["aa_response_$type"];
273258
274 - // We've already grouped by option_id, so in order to get
275 - // counts grouped by field_id, we need to sum them up here.
276 - // Necessary for select rollups, unused on ratings/booleans.
277 - $totals = array();
278259 if( $type == 'option_id' ) {
279 - foreach ( $rows as $row ) {
280 - if( !array_key_exists( $row->aa_field_id, $totals ) ) {
281 - $totals[$row->aa_field_id] = 0;
282 - }
283 - $totals[$row->aa_field_id] += $row->earned;
284 - }
285 - }
 260+ // Selects are kind of a odd bird. We store one row
 261+ // per option per field, and each one has the number
 262+ // of times that option was chosen, and the number of
 263+ // times the question was shown in total. So, you'd
 264+ // have 1/10, 2/10, 7/10, eg. We increment the times
 265+ // chosen on the one that was chosen, and the times
 266+ // shown on all of them.
286267
287 - foreach ( $rows as $row ) {
288 - if( $type == 'option_id' ) {
289 - $key = $row->aa_response_option_id;
290 - $field = 'option_id';
291 - $value = $row->aa_response_option_id;
292 - $count = $totals[$row->aa_field_id];
293 - } else {
294 - $key = $row->aa_field_id;
295 - $field = 'rating_id';
296 - $value = $row->aa_field_id;
297 - $count = $row->submits;
298 - }
 268+ // Fetch all the options for this field.
 269+ $options = $dbr->select(
 270+ 'aft_article_field_option',
 271+ array( 'afo_option_id' ),
 272+ array( 'afo_field_id' => $field ),
 273+ __METHOD__
 274+ );
299275
300 - if ( !array_key_exists( $key, $page_data ) ) {
301 - $page_data[$key] = array(
302 - $page_prefix . 'page_id' => $pageId,
303 - $page_prefix . 'total' => 0,
304 - $page_prefix . 'count' => 0,
305 - $page_prefix . $field => $value
 276+ // For each option this field has, make sure we have
 277+ // a row by inserting one - will fail silently if the
 278+ // row already exists.
 279+ foreach( $options as $option ) {
 280+ // These inserts could possibly fail or succeed
 281+ // individually, so we can't use the multiple-
 282+ // insert functionality of the insert class.
 283+ $dbw->insert(
 284+ 'aft_article_revision_feedback_select_rollup',
 285+ array(
 286+ 'arfsr_page_id' => $pageId,
 287+ 'arfsr_revision_id' => $revId,
 288+ 'arfsr_field_id' => $field,
 289+ 'arfsr_option_id' => $option->afo_option_id,
 290+ 'arfsr_total' => 0,
 291+ 'arfsr_count' => 0,
 292+ ),
 293+ __METHOD__,
 294+ array( 'IGNORE' )
306295 );
307296 }
308297
309 - $rev_data[] = array(
310 - $rev_prefix . 'page_id' => $pageId,
311 - $rev_prefix . 'revision_id' => $revId,
312 - $rev_prefix . 'total' => $row->earned,
313 - $rev_prefix . 'count' => $count,
314 - $rev_prefix . $field => $value
 298+ // Increment number of picks for this option.
 299+ $dbw->update(
 300+ 'aft_article_revision_feedback_select_rollup',
 301+ array(
 302+ 'arfsr_total = arfsr_total + 1',
 303+ ),
 304+ array(
 305+ 'arfsr_page_id' => $pageId,
 306+ 'arfsr_revision_id' => $revId,
 307+ 'arfsr_field_id' => $field,
 308+ 'arfsr_option_id' => $value,
 309+ ),
 310+ __METHOD__
315311 );
 312+ // Increment count for ALL options on this field.
 313+ $dbw->update(
 314+ 'aft_article_revision_feedback_select_rollup',
 315+ array(
 316+ 'arfsr_count = arfsr_count + 1',
 317+ ),
 318+ array(
 319+ 'arfsr_page_id' => $pageId,
 320+ 'arfsr_revision_id' => $revId,
 321+ 'arfsr_field_id' => $field,
 322+ ),
 323+ __METHOD__
 324+ );
 325+ } else {
 326+ // Make sure we have a row by inserting one - will fail
 327+ // silently if the row already exists.
 328+ $dbw->insert(
 329+ 'aft_article_revision_feedback_ratings_rollup',
 330+ array(
 331+ 'afrr_page_id' => $pageId,
 332+ 'afrr_revision_id' => $revId,
 333+ 'afrr_field_id' => $field,
 334+ 'afrr_total' => 0,
 335+ 'afrr_count' => 0,
 336+ ),
 337+ __METHOD__,
 338+ array( 'IGNORE' )
 339+ );
316340
317 - $page_data[$key][$page_prefix . 'total'] += $row->earned;
318 - $page_data[$key][$page_prefix . 'count'] += $count;
 341+ // Update total rating, and increment count.
 342+ $dbw->update(
 343+ 'aft_article_revision_feedback_ratings_rollup',
 344+ array(
 345+ "afrr_total = afrr_total + $value",
 346+ "afrr_total = afrr_count + 1",
 347+ ),
 348+ array(
 349+ 'afrr_page_id' => $pageId,
 350+ 'afrr_revision_id' => $revId,
 351+ 'afrr_field_id' => $field,
 352+ ),
 353+ __METHOD__
 354+ );
319355 }
320356
321 - if ( count( $page_data ) < 1 ) {
322 - return;
 357+ // Revision rollups being done, we update the page rollups.
 358+ // These are built off of the revision rollups, and only
 359+ // count revisions back to the user-specified limit, so
 360+ // they need to be recalculated every time, since we don't
 361+ // know what revision we're dealing with, or how many times
 362+ // a page has changed since the last feedback.
 363+
 364+ // Select rollup data for revisions, grouped up by field, so we
 365+ // can drop it into the page rollups.
 366+ if( $type == 'option_id' ) {
 367+ $table = 'aft_article_feedback_select_rollup';
 368+ $prefix = 'afsr_';
 369+ $rows = $dbr->select(
 370+ 'aft_article_revision_feedback_select_rollup',
 371+ array(
 372+ 'arfsr_option_id',
 373+ 'SUM(arfsr_total) AS total',
 374+ 'SUM(arfsr_count) AS count'
 375+ ),
 376+ array(
 377+ 'arfsr_page_id' => $pageId,
 378+ "arfsr_revision_id > $limit",
 379+ 'arfsr_field_id' => $field
 380+ ),
 381+ __METHOD__,
 382+ array( 'GROUP BY' => 'arfsr_option_id' )
 383+ );
 384+
 385+ $page_data = array();
 386+ foreach( $rows as $row ) {
 387+ $page_data[] = array(
 388+ 'afsr_page_id' => $pageId,
 389+ 'afsr_field_id' => $field,
 390+ 'afsr_option_id' => $row->arfsr_option_id,
 391+ 'afsr_total' => $row->total,
 392+ 'afsr_count' => $row->count
 393+ );
 394+ }
 395+ } else {
 396+ $table = 'aft_article_feedback_ratings_rollup';
 397+ $prefix = 'arr_';
 398+ $row = $dbr->selectRow(
 399+ 'aft_article_revision_feedback_ratings_rollup',
 400+ array(
 401+ 'SUM(afrr_total) AS total',
 402+ 'SUM(afrr_count) AS count'
 403+ ),
 404+ array(
 405+ 'afrr_page_id' => $pageId,
 406+ "afrr_revision_id > $limit",
 407+ 'afrr_field_id' => $field
 408+ ),
 409+ __METHOD__,
 410+ array( 'GROUP BY' => 'afrr_field_id' )
 411+ );
 412+
 413+ $page_data = array(
 414+ 'arr_page_id' => $pageId,
 415+ 'arr_field_id' => $field,
 416+ 'arr_total' => $row->total,
 417+ 'arr_count' => $row->count
 418+ );
323419 }
 420+
 421+ $dbw->begin();
 422+ // Delete the existing page rollup rows.
 423+ $dbw->delete( $table, array(
 424+ $prefix . 'page_id' => $pageId,
 425+ $prefix . 'field_id' => $field
 426+ ), __METHOD__ );
324427
325 - $dbw->begin();
326 - $dbw->delete( $rev_table, array(
327 - $rev_prefix . 'page_id' => $pageId,
328 - $rev_prefix . 'revision_id' => $revId,
329 - $rev_prefix . $field => array_keys( $page_data ),
330 - ) );
331 - $dbw->insert( $rev_table, $rev_data );
332 - $dbw->delete( $page_table, array(
333 - $page_prefix . 'page_id' => $pageId,
334 - $page_prefix . $field => array_keys( $page_data ),
335 - ) );
336 - $dbw->insert( $page_table, array_values ( $page_data ) );
 428+ // Insert the new ones.
 429+ $dbw->insert( $table, $page_data, __METHOD__ );
337430 $dbw->commit();
 431+
 432+ // One way to speed this up would be to purge old rows from
 433+ // the revision_rollup tables, as soon as they're out of the
 434+ // window in which we count them. 30 revisions per page is still
 435+ // a lot, but it'd be better than this, which has no limit and
 436+ // will only get larger over time.
338437 }
339438
340439 /**

Follow-up revisions

RevisionCommit summaryAuthorDate
r106334AFTv5 - Remove extraneous group by and pass variables through intval() to clo...gregchiasson16:46, 15 December 2011

Comments

#Comment by Catrope (talk | contribs)   19:18, 12 December 2011
+					'arfsr_page_id'     => $pageId,
+					'arfsr_revision_id' => $revId,
+					'arfsr_field_id'    => $field,
+					'arfsr_option_id'   => $value,

If you're doing this, then all these four fields need to be in the primary key for this table. Right now, only two of them are. The best order would probably be page, field, revision, option (not because that makes sense but because of the queries issued). Ditto for the other table (afrr_): there's a PK with three fields but it's missing field_id. Order should be page, field, revision.

+					"afrr_total = afrr_total + $value",
[...]
+					"arfsr_revision_id > $limit",

I would feel safer if $value and $limit were run through intval() before being injected into SQL here.

+				array( 'GROUP BY' => 'afrr_field_id' )

??? You've got a WHERE afrr_field_id = N so this GROUP BY seems useless to me.

OK otherwise.

Status & tagging log