r43151 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r43150‎ | r43151 | r43152 >
Date:21:06, 3 November 2008
Author:tstarling
Status:old (Comments)
Tags:
Comment:
Assorted tweaks on further review and local testing:
* Default 1 child process
* Introduce a log system so that critical errors can be filtered out of the progress messages
* Warn on abnormal child termination
* Use the actual row count for progress instead of the end ID. I think we can afford to count rows.
* Check for bt_moved=0 in doOrphanList() after finishIncompleteMoves(), to avoid attempted double-move and resulting spurious "conflict detected" warning
Modified paths:
  • /trunk/phase3/maintenance/storage/recompressTracked.php (modified) (history)

Diff [purge]

Index: trunk/phase3/maintenance/storage/recompressTracked.php
@@ -8,8 +8,11 @@
99 Moves blobs indexed by trackBlobs.php to a specified list of destination clusters, and recompresses them in the process. Restartable.
1010
1111 Options:
12 - --procs <procs> Set the number of child processes (default 8)
13 - --copy-only Copy only, do not update the text table. Restart without this option to complete.
 12+ --procs <procs> Set the number of child processes (default 1)
 13+ --copy-only Copy only, do not update the text table. Restart without this option to complete.
 14+ --debug-log <file> Log debugging data to the specified file
 15+ --info-log <file> Log progress messages to the specified file
 16+ --critical-log <file> Log error messages to the specified file
1417 ";
1518 exit( 1 );
1619 }
@@ -20,21 +23,26 @@
2124 class RecompressTracked {
2225 var $destClusters;
2326 var $batchSize = 1000;
 27+ var $orphanBatchSize = 1000;
2428 var $reportingInterval = 10;
25 - var $numProcs = 8;
 29+ var $numProcs = 1;
2630 var $useDiff, $pageBlobClass, $orphanBlobClass;
2731 var $slavePipes, $slaveProcs, $prevSlaveId;
2832 var $copyOnly = false;
2933 var $isChild = false;
3034 var $slaveId = false;
 35+ var $debugLog, $infoLog, $criticalLog;
3136 var $store;
3237
33 - static $optionsWithArgs = array( 'procs', 'slave-id' );
 38+ static $optionsWithArgs = array( 'procs', 'slave-id', 'debug-log', 'info-log', 'critical-log' );
3439 static $cmdLineOptionMap = array(
3540 'procs' => 'numProcs',
3641 'copy-only' => 'copyOnly',
3742 'child' => 'isChild',
3843 'slave-id' => 'slaveId',
 44+ 'debug-log' => 'debugLog',
 45+ 'info-log' => 'infoLog',
 46+ 'critical-log' => 'criticalLog',
3947 );
4048
4149 static function getOptionsWithArgs() {
@@ -68,8 +76,35 @@
6977
7078 function debug( $msg ) {
7179 wfDebug( "$msg\n" );
 80+ if ( $this->debugLog ) {
 81+ $this->logToFile( $msg, $this->debugLog );
 82+ }
 83+
7284 }
7385
 86+ function info( $msg ) {
 87+ echo "$msg\n";
 88+ if ( $this->infoLog ) {
 89+ $this->logToFile( $msg, $this->infoLog );
 90+ }
 91+ }
 92+
 93+ function critical( $msg ) {
 94+ echo "$msg\n";
 95+ if ( $this->criticalLog ) {
 96+ $this->logToFile( $msg, $this->criticalLog );
 97+ }
 98+ }
 99+
 100+ function logToFile( $msg, $file ) {
 101+ $header = '[' . date('d\TH:i:s') . '] ' . wfHostname() . ' ' . posix_getpid();
 102+ if ( $this->slaveId !== false ) {
 103+ $header .= "({$this->slaveId})";
 104+ }
 105+ $header .= ' ' . wfWikiID();
 106+ wfErrorLog( sprintf( "%-50s %s\n", $header, $msg ), $file );
 107+ }
 108+
74109 /**
75110 * Wait until the selected slave has caught up to the master.
76111 * This allows us to use the slave for things that were committed in a
@@ -114,12 +149,12 @@
115150 function checkTrackingTable() {
116151 $dbr = wfGetDB( DB_SLAVE );
117152 if ( !$dbr->tableExists( 'blob_tracking' ) ) {
118 - echo "Error: blob_tracking table does not exist\n";
 153+ $this->critical( "Error: blob_tracking table does not exist" );
119154 return false;
120155 }
121156 $row = $dbr->selectRow( 'blob_tracking', '*', false, __METHOD__ );
122157 if ( !$row ) {
123 - echo "Warning: blob_tracking table contains no rows, skipping this wiki.\n";
 158+ $this->info( "Warning: blob_tracking table contains no rows, skipping this wiki." );
124159 return false;
125160 }
126161 return true;
@@ -156,7 +191,7 @@
157192 $proc = proc_open( "$cmd --slave-id $i", $spec, $pipes );
158193 wfRestoreWarnings();
159194 if ( !$proc ) {
160 - echo "Error opening slave process\n";
 195+ $this->critical( "Error opening slave process" );
161196 exit( 1 );
162197 }
163198 $this->slaveProcs[$i] = $proc;
@@ -169,12 +204,17 @@
170205 * Gracefully terminate the child processes
171206 */
172207 function killSlaveProcs() {
 208+ $this->info( "Waiting for slave processes to finish..." );
173209 for ( $i = 0; $i < $this->numProcs; $i++ ) {
174210 $this->dispatchToSlave( $i, 'quit' );
175211 }
176212 for ( $i = 0; $i < $this->numProcs; $i++ ) {
177 - proc_close( $this->slaveProcs[$i] );
 213+ $status = proc_close( $this->slaveProcs[$i] );
 214+ if ( $status ) {
 215+ $this->critical( "Warning: child #$i exited with status $status" );
 216+ }
178217 }
 218+ $this->info( "Done." );
179219 }
180220
181221 /**
@@ -186,7 +226,7 @@
187227 $pipes = $this->slavePipes;
188228 $numPipes = stream_select( $x=array(), $pipes, $y=array(), 3600 );
189229 if ( !$numPipes ) {
190 - echo "Error waiting to write to slaves. Aborting\n";
 230+ $this->critical( "Error waiting to write to slaves. Aborting" );
191231 exit( 1 );
192232 }
193233 for ( $i = 0; $i < $this->numProcs; $i++ ) {
@@ -197,7 +237,7 @@
198238 return;
199239 }
200240 }
201 - echo "Unreachable\n";
 241+ $this->critical( "Unreachable" );
202242 exit( 1 );
203243 }
204244
@@ -215,12 +255,19 @@
216256 */
217257 function doAllPages() {
218258 $dbr = wfGetDB( DB_SLAVE );
 259+ $i = 0;
219260 $startId = 0;
220 - $endId = $dbr->selectField( 'blob_tracking', 'MAX(bt_page)',
 261+ $numPages = $dbr->selectField( 'blob_tracking',
 262+ 'COUNT(DISTINCT bt_page)',
221263 # A condition is required so that this query uses the index
222264 array( 'bt_moved' => 0 ),
223 - __METHOD__ );
224 - echo "Moving pages...\n";
 265+ __METHOD__
 266+ );
 267+ if ( $this->copyOnly ) {
 268+ $this->info( "Copying pages..." );
 269+ } else {
 270+ $this->info( "Moving pages..." );
 271+ }
225272 while ( true ) {
226273 $res = $dbr->select( 'blob_tracking',
227274 array( 'bt_page' ),
@@ -240,21 +287,27 @@
241288 }
242289 foreach ( $res as $row ) {
243290 $this->dispatch( 'doPage', $row->bt_page );
 291+ $i++;
244292 }
245293 $startId = $row->bt_page;
246 - $this->report( $startId, $endId );
 294+ $this->report( 'pages', $i, $numPages );
247295 }
248 - echo "Done moving pages.\n";
 296+ $this->report( 'pages', $i, $numPages );
 297+ if ( $this->copyOnly ) {
 298+ $this->info( "All page copies queued." );
 299+ } else {
 300+ $this->info( "All page moves queued." );
 301+ }
249302 }
250303
251304 /**
252305 * Display a progress report
253306 */
254 - function report( $start, $end ) {
 307+ function report( $label, $current, $end ) {
255308 $this->numBatches++;
256 - if ( $this->numBatches >= $this->reportingInterval ) {
 309+ if ( $current == $end || $this->numBatches >= $this->reportingInterval ) {
257310 $this->numBatches = 0;
258 - echo "$start / $end\n";
 311+ $this->info( "$label: $current / $end" );
259312 wfWaitForSlaves( 5 );
260313 }
261314 }
@@ -265,13 +318,20 @@
266319 function doAllOrphans() {
267320 $dbr = wfGetDB( DB_SLAVE );
268321 $startId = 0;
269 - $endId = $dbr->selectField( 'blob_tracking', 'MAX(bt_text_id)',
 322+ $i = 0;
 323+ $numOrphans = $dbr->selectField( 'blob_tracking',
 324+ 'COUNT(DISTINCT bt_text_id)',
270325 array( 'bt_moved' => 0, 'bt_page' => 0 ),
271326 __METHOD__ );
272 - if ( !$endId ) {
 327+ if ( !$numOrphans ) {
273328 return;
274329 }
275 - echo "Moving orphans...\n";
 330+ if ( $this->copyOnly ) {
 331+ $this->info( "Copying orphans..." );
 332+ } else {
 333+ $this->info( "Moving orphans..." );
 334+ }
 335+ $ids = array();
276336
277337 while ( true ) {
278338 $res = $dbr->select( 'blob_tracking',
@@ -291,15 +351,24 @@
292352 if ( !$res->numRows() ) {
293353 break;
294354 }
295 - $args = array( 'doOrphanList' );
296355 foreach ( $res as $row ) {
297 - $args[] = $row->bt_text_id;
 356+ $ids[] = $row->bt_text_id;
 357+ $i++;
298358 }
299 - call_user_func_array( array( $this, 'dispatch' ), $args );
 359+ // Need to send enough orphan IDs to the child at a time to fill a blob,
 360+ // so orphanBatchSize needs to be at least ~100.
 361+ // batchSize can be smaller or larger.
 362+ while ( count( $ids ) > $this->orphanBatchSize ) {
 363+ $args = array_slice( $ids, 0, $this->orphanBatchSize );
 364+ $ids = array_slice( $ids, $this->orphanBatchSize );
 365+ array_unshift( $args, 'doOrphanList' );
 366+ call_user_func_array( array( $this, 'dispatch' ), $args );
 367+ }
300368 $startId = $row->bt_text_id;
301 - $this->report( $startId, $endId );
 369+ $this->report( 'orphans', $i, $numOrphans );
302370 }
303 - echo "Done moving orphans.\n";
 371+ $this->report( 'orphans', $i, $numOrphans );
 372+ $this->info( "All orphans queued." );
304373 }
305374
306375 /**
@@ -345,6 +414,7 @@
346415 // Finish any incomplete transactions
347416 if ( !$this->copyOnly ) {
348417 $this->finishIncompleteMoves( array( 'bt_page' => $pageId ) );
 418+ $this->syncDBs();
349419 }
350420
351421 $startId = 0;
@@ -381,7 +451,7 @@
382452 // Load the text
383453 $text = Revision::getRevisionText( $row );
384454 if ( $text === false ) {
385 - echo "Error loading {$row->bt_rev_id}/{$row->bt_text_id}\n";
 455+ $this->critical( "Error loading {$row->bt_rev_id}/{$row->bt_text_id}" );
386456 continue;
387457 }
388458
@@ -410,6 +480,10 @@
411481 * The transaction is kept short to reduce locking.
412482 */
413483 function moveTextRow( $textId, $url ) {
 484+ if ( $this->copyOnly ) {
 485+ $this->critical( "Internal error: can't call moveTextRow() in --copy-only mode" );
 486+ exit( 1 );
 487+ }
414488 $dbw = wfGetDB( DB_MASTER );
415489 $dbw->begin();
416490 $dbw->update( 'text',
@@ -491,19 +565,33 @@
492566 */
493567 function doOrphanList( $textIds ) {
494568 // Finish incomplete moves
495 - $this->finishIncompleteMoves( array( 'bt_text_id' => $textIds ) );
 569+ if ( !$this->copyOnly ) {
 570+ $this->finishIncompleteMoves( array( 'bt_text_id' => $textIds ) );
 571+ $this->syncDBs();
 572+ }
496573
497574 $trx = new CgzCopyTransaction( $this, $this->orphanBlobClass );
498 - foreach ( $textIds as $textId ) {
499 - $row = wfGetDB( DB_SLAVE )->selectRow( 'text', array( 'old_text', 'old_flags' ),
500 - array( 'old_id' => $textId ), __METHOD__ );
 575+
 576+ $res = wfGetDB( DB_SLAVE )->select(
 577+ array( 'text', 'blob_tracking' ),
 578+ array( 'old_id', 'old_text', 'old_flags' ),
 579+ array(
 580+ 'old_id' => $textIds,
 581+ 'bt_text_id=old_id',
 582+ 'bt_moved' => 0,
 583+ ),
 584+ __METHOD__,
 585+ array( 'DISTINCT' )
 586+ );
 587+
 588+ foreach ( $res as $row ) {
501589 $text = Revision::getRevisionText( $row );
502590 if ( $text === false ) {
503 - echo "Error: cannot load revision text for $textId\n";
 591+ $this->critical( "Error: cannot load revision text for old_id=$textId" );
504592 continue;
505593 }
506594
507 - if ( !$trx->addItem( $text, $textId ) ) {
 595+ if ( !$trx->addItem( $text, $row->old_id ) ) {
508596 $this->debug( "[orphan]: committing blob with " . $trx->getSize() . " rows" );
509597 $trx->commit();
510598 $trx = new CgzCopyTransaction( $this, $this->orphanBlobClass );
@@ -605,8 +693,8 @@
606694 // All have been moved already
607695 if ( $originalCount > 1 ) {
608696 // This is suspcious, make noise
609 - echo "Warning: concurrent operation detected, are there two conflicting " .
610 - "processes running, doing the same job?\n";
 697+ $this->critical( "Warning: concurrent operation detected, are there two conflicting " .
 698+ "processes running, doing the same job?" );
611699 }
612700 return;
613701 }

Comments

#Comment by Brion VIBBER (talk | contribs)   01:35, 14 November 2008

Assuming OK per tim.... test at leisure :D

Status & tagging log