r97146 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r97145‎ | r97146 | r97147 >
Date:12:17, 15 September 2011
Author:catrope
Status:resolved
Tags:
Comment:
Merge live hacks from r83992 to trunk, after cleaning some things up.

* Wait for slaves after every thousand rows rather than after processing every batch. r83992 had 1000 hard-coded, I put it in SYNC_INTERVAL
* Set $lb->waitTimeout(100000). I have no idea why, but it was in the live hack. Maybe Tim or Domas could enlighten me
* Use a STRAIGHT JOIN for the query on categorylinks and page because MySQL appears to want to join the tables the wrong way around
* Use cl_collation='previousValue' rather than cl_collation!='newValue' if possible. This was originally a dirty live hack, but I re-implemented it nicely with a --previous-collation command line option
* Print a status update both before and after the SELECT query. This allows the user to notice when the SELECT queries are getting increasingly slower, which is an indication you may want to set --previous-collation
Modified paths:
  • /trunk/phase3/maintenance/updateCollation.php (modified) (history)

Diff [purge]

Index: trunk/phase3/maintenance/updateCollation.php
@@ -29,7 +29,8 @@
3030 require_once( dirname( __FILE__ ) . '/Maintenance.php' );
3131
3232 class UpdateCollation extends Maintenance {
33 - const BATCH_SIZE = 50;
 33+ const BATCH_SIZE = 50; // Number of rows to process in one batch
 34+ const SYNC_INTERVAL = 20; // Wait for slaves after this many batches
3435
3536 public function __construct() {
3637 parent::__construct();
@@ -44,10 +45,17 @@
4546
4647 $this->addOption( 'force', 'Run on all rows, even if the collation is ' .
4748 'supposed to be up-to-date.' );
 49+ $this->addOption( 'previous-collation', 'Set the previous value of ' .
 50+ '$wgCategoryCollation here to speed up this script, especially if your ' .
 51+ 'categorylinks table is large. This will only update rows with that ' .
 52+ 'collation, though, so it may miss out-of-date rows with a different, ' .
 53+ 'even older collation.', false, true );
4854 }
4955
5056 public function syncDBs() {
 57+ // TODO: Most of this is duplicated from wfWaitForSlaves(), except for the waitTimeout() call
5158 $lb = wfGetLB();
 59+ $lb->waitTimeout(100000);
5260 // bug 27975 - Don't try to wait for slaves if there are none
5361 // Prevents permission error when getting master position
5462 if ( $lb->getServerCount() > 1 ) {
@@ -63,14 +71,19 @@
6472 $dbw = $this->getDB( DB_MASTER );
6573 $force = $this->getOption( 'force' );
6674
67 - $options = array( 'LIMIT' => self::BATCH_SIZE );
 75+ $options = array( 'LIMIT' => self::BATCH_SIZE, 'STRAIGHT_JOIN' );
6876
6977 if ( $force ) {
7078 $options['ORDER BY'] = 'cl_from, cl_to';
7179 $collationConds = array();
7280 } else {
73 - $collationConds = array( 0 =>
74 - 'cl_collation != ' . $dbw->addQuotes( $wgCategoryCollation ) );
 81+ if ( $this->hasOption( 'previous-collation' ) ) {
 82+ $collationConds['cl_collation'] = $this->getOption( 'previous-collation' );
 83+ } else {
 84+ $collationConds = array( 0 =>
 85+ 'cl_collation != ' . $dbw->addQuotes( $wgCategoryCollation )
 86+ );
 87+ }
7588
7689 if ( !$wgMiserMode ) {
7790 $count = $dbw->selectField(
@@ -89,9 +102,10 @@
90103 }
91104
92105 $count = 0;
 106+ $batchCount = 0;
93107 $batchConds = array();
94108 do {
95 - $this->output( 'Processing next ' . self::BATCH_SIZE . ' rows... ');
 109+ $this->output( "Selecting next " . self::BATCH_SIZE . " rows..." );
96110 $res = $dbw->select(
97111 array( 'categorylinks', 'page' ),
98112 array( 'cl_from', 'cl_to', 'cl_sortkey_prefix', 'cl_collation',
@@ -101,6 +115,7 @@
102116 __METHOD__,
103117 $options
104118 );
 119+ $this->output( " processing..." );
105120
106121 $dbw->begin();
107122 foreach ( $res as $row ) {
@@ -154,7 +169,11 @@
155170 $count += $res->numRows();
156171 $this->output( "$count done.\n" );
157172
158 - $this->syncDBs();
 173+ if ( ++$batchCount % self::SYNC_INTERVAL == 0 ) {
 174+ $this->output( "Waiting for slaves ... " );
 175+ $this->syncDBs();
 176+ $this->output( "done\n" );
 177+ }
159178 } while ( $res->numRows() == self::BATCH_SIZE );
160179 }
161180 }
Property changes on: trunk/phase3/maintenance/updateCollation.php
___________________________________________________________________
Added: svn:mergeinfo
162181 Merged /branches/REL1_17/phase3/maintenance/updateCollation.php:r81445,81448
163182 Merged /branches/sqlite/maintenance/updateCollation.php:r58211-58321
164183 Merged /branches/new-installer/phase3/maintenance/updateCollation.php:r43664-66004
165184 Merged /branches/wmf/1.17wmf1/maintenance/updateCollation.php:r83992
166185 Merged /branches/REL1_15/phase3/maintenance/updateCollation.php:r51646

Follow-up revisions

RevisionCommit summaryAuthorDate
r97148Followup r97146: drop the $lb->waitTimeout() call per Tim. Was used so Tim co...catrope12:42, 15 September 2011
r97173Merged revisions 97087,97091-97092,97094,97096-97098,97100-97101,97103,97136,...dantman16:19, 15 September 2011
r97276REL1_18 MFT r97144, r97146, r97192reedy14:41, 16 September 2011

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r83992Committing some live patches by Roantstarling02:39, 15 March 2011

Status & tagging log