Index: trunk/phase3/maintenance/storage/recompressTracked.php |
— | — | @@ -4,9 +4,13 @@ |
5 | 5 | require( dirname( __FILE__ ) .'/../commandLine.inc' ); |
6 | 6 | |
7 | 7 | if ( count( $args ) < 1 ) { |
8 | | - echo "Usage: php recompressTracked.php <cluster> [... <cluster>...]\n"; |
9 | | - echo "Moves blobs indexed by trackBlobs.php to a specified list of destination |
10 | | -clusters, and recompresses them in the process. Restartable.\n"; |
| 8 | + echo "Usage: php recompressTracked.php [options] <cluster> [... <cluster>...] |
| 9 | +Moves blobs indexed by trackBlobs.php to a specified list of destination clusters, and recompresses them in the process. Restartable. |
| 10 | + |
| 11 | +Options: |
| 12 | + --procs <procs> Set the number of child processes (default 8) |
| 13 | + --copy-only Copy only, do not update the text table. Restart without this option to complete. |
| 14 | +"; |
11 | 15 | exit( 1 ); |
12 | 16 | } |
13 | 17 | |
— | — | @@ -18,17 +22,16 @@ |
19 | 23 | var $batchSize = 1000; |
20 | 24 | var $reportingInterval = 10; |
21 | 25 | var $numProcs = 8; |
| 26 | + var $useDiff, $pageBlobClass, $orphanBlobClass; |
22 | 27 | var $slavePipes, $slaveProcs, $prevSlaveId; |
23 | | - var $blobClass = 'DiffHistoryBlob'; |
24 | 28 | var $copyOnly = false; |
25 | 29 | var $isChild = false; |
26 | 30 | var $slaveId = false; |
27 | 31 | var $store; |
28 | 32 | |
29 | | - static $optionsWithArgs = array( 'procs', 'class' ); |
| 33 | + static $optionsWithArgs = array( 'procs', 'slave-id' ); |
30 | 34 | static $cmdLineOptionMap = array( |
31 | 35 | 'procs' => 'numProcs', |
32 | | - 'class' => 'blobClass', |
33 | 36 | 'copy-only' => 'copyOnly', |
34 | 37 | 'child' => 'isChild', |
35 | 38 | 'slave-id' => 'slaveId', |
— | — | @@ -53,14 +56,18 @@ |
54 | 57 | $this->$name = $value; |
55 | 58 | } |
56 | 59 | $this->store = new ExternalStoreDB; |
| 60 | + if ( !$this->isChild ) { |
| 61 | + $GLOBALS['wgDebugLogPrefix'] = "RCT M: "; |
| 62 | + } elseif ( $this->slaveId !== false ) { |
| 63 | + $GLOBALS['wgDebugLogPrefix'] = "RCT {$this->slaveId}: "; |
| 64 | + } |
| 65 | + $this->useDiff = function_exists( 'xdiff_string_bdiff' ); |
| 66 | + $this->pageBlobClass = $this->useDiff ? 'DiffHistoryBlob' : 'ConcatenatedGzipHistoryBlob'; |
| 67 | + $this->orphanBlobClass = 'ConcatenatedGzipHistoryBlob'; |
57 | 68 | } |
58 | 69 | |
59 | 70 | function debug( $msg ) { |
60 | | - if ( $this->slaveId !== false ) { |
61 | | - $msg = "{$this->slaveId}: $msg"; |
62 | | - } |
63 | | - $msg .= "\n"; |
64 | | - wfDebug( $msg ); |
| 71 | + wfDebug( "$msg\n" ); |
65 | 72 | } |
66 | 73 | |
67 | 74 | /** |
— | — | @@ -146,7 +153,7 @@ |
147 | 154 | array( 'file', '/dev/stderr', 'w' ) |
148 | 155 | ); |
149 | 156 | wfSuppressWarnings(); |
150 | | - $proc = proc_open( $cmd, $spec, $pipes ); |
| 157 | + $proc = proc_open( "$cmd --slave-id $i", $spec, $pipes ); |
151 | 158 | wfRestoreWarnings(); |
152 | 159 | if ( !$proc ) { |
153 | 160 | echo "Error opening slave process\n"; |
— | — | @@ -299,6 +306,7 @@ |
300 | 307 | * Main entry point for worker processes |
301 | 308 | */ |
302 | 309 | function executeChild() { |
| 310 | + $this->debug( 'starting' ); |
303 | 311 | $this->syncDBs(); |
304 | 312 | |
305 | 313 | while ( !feof( STDIN ) ) { |
— | — | @@ -306,6 +314,7 @@ |
307 | 315 | if ( $line == '' ) { |
308 | 316 | continue; |
309 | 317 | } |
| 318 | + $this->debug( $line ); |
310 | 319 | $args = explode( ' ', $line ); |
311 | 320 | $cmd = array_shift( $args ); |
312 | 321 | switch ( $cmd ) { |
— | — | @@ -325,15 +334,21 @@ |
326 | 335 | * Move tracked text in a given page |
327 | 336 | */ |
328 | 337 | function doPage( $pageId ) { |
| 338 | + $title = Title::newFromId( $pageId ); |
| 339 | + if ( $title ) { |
| 340 | + $titleText = $title->getPrefixedText(); |
| 341 | + } else { |
| 342 | + $titleText = '[deleted]'; |
| 343 | + } |
329 | 344 | $dbr = wfGetDB( DB_SLAVE ); |
330 | 345 | |
331 | 346 | // Finish any incomplete transactions |
332 | 347 | if ( !$this->copyOnly ) { |
333 | | - $this->finishIncompleteMoves(); |
| 348 | + $this->finishIncompleteMoves( array( 'bt_page' => $pageId ) ); |
334 | 349 | } |
335 | 350 | |
336 | 351 | $startId = 0; |
337 | | - $trx = new CgzCopyTransaction( $this ); |
| 352 | + $trx = new CgzCopyTransaction( $this, $this->pageBlobClass ); |
338 | 353 | |
339 | 354 | while ( true ) { |
340 | 355 | $res = $dbr->select( |
— | — | @@ -343,7 +358,7 @@ |
344 | 359 | 'bt_page' => $pageId, |
345 | 360 | 'bt_text_id > ' . $dbr->addQuotes( $startId ), |
346 | 361 | 'bt_moved' => 0, |
347 | | - 'bt_new_url' => '', |
| 362 | + 'bt_new_url IS NULL', |
348 | 363 | 'bt_text_id=old_id', |
349 | 364 | ), |
350 | 365 | __METHOD__, |
— | — | @@ -372,12 +387,15 @@ |
373 | 388 | |
374 | 389 | // Queue it |
375 | 390 | if ( !$trx->addItem( $text, $row->bt_text_id ) ) { |
| 391 | + $this->debug( "$titleText: committing blob with " . $trx->getSize() . " items" ); |
376 | 392 | $trx->commit(); |
377 | | - $trx = new CgzCopyTransaction( $this ); |
| 393 | + $trx = new CgzCopyTransaction( $this, $this->pageBlobClass ); |
378 | 394 | } |
379 | 395 | } |
380 | 396 | $startId = $row->bt_text_id; |
381 | 397 | } |
| 398 | + |
| 399 | + $this->debug( "$titleText: committing blob with " . $trx->getSize() . " items" ); |
382 | 400 | $trx->commit(); |
383 | 401 | } |
384 | 402 | |
— | — | @@ -420,18 +438,18 @@ |
421 | 439 | * This function completes any moves that only have done bt_new_url. This |
422 | 440 | * can happen when the script is interrupted, or when --copy-only is used. |
423 | 441 | */ |
424 | | - function finishIncompleteMoves() { |
| 442 | + function finishIncompleteMoves( $conds ) { |
425 | 443 | $dbr = wfGetDB( DB_SLAVE ); |
426 | 444 | |
427 | 445 | $startId = 0; |
| 446 | + $conds = array_merge( $conds, array( |
| 447 | + 'bt_moved' => 0, |
| 448 | + 'bt_new_url IS NOT NULL' |
| 449 | + )); |
428 | 450 | while ( true ) { |
429 | 451 | $res = $dbr->select( 'blob_tracking', |
430 | 452 | '*', |
431 | | - array( |
432 | | - 'bt_text_id > ' . $dbr->addQuotes( $startId ), |
433 | | - 'bt_moved' => 0, |
434 | | - "bt_new_url <> ''", |
435 | | - ), |
| 453 | + array_merge( $conds, array( 'bt_text_id > ' . $dbr->addQuotes( $startId ) ) ), |
436 | 454 | __METHOD__, |
437 | 455 | array( |
438 | 456 | 'ORDER BY' => 'bt_text_id', |
— | — | @@ -441,6 +459,7 @@ |
442 | 460 | if ( !$res->numRows() ) { |
443 | 461 | break; |
444 | 462 | } |
| 463 | + $this->debug( 'Incomplete: ' . $row->numRows() . ' rows' ); |
445 | 464 | foreach ( $res as $row ) { |
446 | 465 | $this->moveTextRow( $row->bt_text_id, $row->bt_new_url ); |
447 | 466 | } |
— | — | @@ -471,7 +490,10 @@ |
472 | 491 | * Move an orphan text_id to the new cluster |
473 | 492 | */ |
474 | 493 | function doOrphanList( $textIds ) { |
475 | | - $trx = new CgzCopyTransaction( $this ); |
| 494 | + // Finish incomplete moves |
| 495 | + $this->finishIncompleteMoves( array( 'bt_text_id' => $textIds ) ); |
| 496 | + |
| 497 | + $trx = new CgzCopyTransaction( $this, $this->orphanBlobClass ); |
476 | 498 | foreach ( $textIds as $textId ) { |
477 | 499 | $row = wfGetDB( DB_SLAVE )->selectRow( 'text', array( 'old_text', 'old_flags' ), |
478 | 500 | array( 'old_id' => $textId ), __METHOD__ ); |
— | — | @@ -482,10 +504,13 @@ |
483 | 505 | } |
484 | 506 | |
485 | 507 | if ( !$trx->addItem( $text, $textId ) ) { |
| 508 | + $this->debug( "[orphan]: committing blob with " . $trx->getSize() . " rows" ); |
486 | 509 | $trx->commit(); |
487 | | - $trx = new CgzCopyTransaction( $this ); |
| 510 | + $trx = new CgzCopyTransaction( $this, $this->orphanBlobClass ); |
488 | 511 | } |
489 | 512 | } |
| 513 | + $this->debug( "[orphan]: committing blob with " . $trx->getSize() . " rows" ); |
| 514 | + $trx->commit(); |
490 | 515 | } |
491 | 516 | } |
492 | 517 | |
— | — | @@ -493,6 +518,7 @@ |
494 | 519 | * Class to represent a recompression operation for a single CGZ blob |
495 | 520 | */ |
496 | 521 | class CgzCopyTransaction { |
| 522 | + var $parent; |
497 | 523 | var $blobClass; |
498 | 524 | var $cgz; |
499 | 525 | var $referrers; |
— | — | @@ -500,10 +526,11 @@ |
501 | 527 | /** |
502 | 528 | * Create a transaction from a RecompressTracked object |
503 | 529 | */ |
504 | | - function __construct( $parent ) { |
505 | | - $this->blobClass = $parent->blobClass; |
| 530 | + function __construct( $parent, $blobClass ) { |
| 531 | + $this->blobClass = $blobClass; |
506 | 532 | $this->cgz = false; |
507 | 533 | $this->texts = array(); |
| 534 | + $this->parent = $parent; |
508 | 535 | } |
509 | 536 | |
510 | 537 | /** |
— | — | @@ -521,6 +548,10 @@ |
522 | 549 | return $this->cgz->isHappy(); |
523 | 550 | } |
524 | 551 | |
| 552 | + function getSize() { |
| 553 | + return count( $this->texts ); |
| 554 | + } |
| 555 | + |
525 | 556 | /** |
526 | 557 | * Recompress text after some aberrant modification |
527 | 558 | */ |
— | — | @@ -554,16 +585,16 @@ |
555 | 586 | // We do a locking read to prevent closer-run race conditions. |
556 | 587 | $dbw = wfGetDB( DB_MASTER ); |
557 | 588 | $dbw->begin(); |
| 589 | + $res = $dbw->select( 'blob_tracking', |
| 590 | + array( 'bt_text_id', 'bt_moved' ), |
| 591 | + array( 'bt_text_id' => array_keys( $this->referrers ) ), |
| 592 | + __METHOD__, array( 'FOR UPDATE' ) ); |
558 | 593 | $dirty = false; |
559 | | - foreach ( $this->referrers as $textId => $hash ) { |
560 | | - $moved = $dbw->selectField( 'blob_tracking', 'bt_moved', |
561 | | - array( 'bt_text_id' => $textId ), |
562 | | - __METHOD__, |
563 | | - array( 'FOR UPDATE' ) |
564 | | - ); |
565 | | - if ( !$moved ) { |
| 594 | + foreach ( $res as $row ) { |
| 595 | + if ( $row->bt_moved ) { |
566 | 596 | # This row has already been moved, remove it |
567 | | - unset( $this->texts[$textId] ); |
| 597 | + $this->parent->debug( "TRX: conflict detected in old_id={$row->bt_text_id}" ); |
| 598 | + unset( $this->texts[$row->bt_text_id] ); |
568 | 599 | $dirty = true; |
569 | 600 | } |
570 | 601 | } |
— | — | @@ -574,7 +605,7 @@ |
575 | 606 | // All have been moved already |
576 | 607 | if ( $originalCount > 1 ) { |
577 | 608 | // This is suspcious, make noise |
578 | | - echo "Warning: concurrent operation detected, are there two conflicting\n" . |
| 609 | + echo "Warning: concurrent operation detected, are there two conflicting " . |
579 | 610 | "processes running, doing the same job?\n"; |
580 | 611 | } |
581 | 612 | return; |
— | — | @@ -616,9 +647,5 @@ |
617 | 648 | } |
618 | 649 | } |
619 | 650 | } |
620 | | - |
621 | | - function signalHandler() { |
622 | | - $this->signalled = true; |
623 | | - } |
624 | 651 | } |
625 | 652 | |
Index: trunk/phase3/maintenance/storage/testCompression.php |
— | — | @@ -15,7 +15,13 @@ |
16 | 16 | } else { |
17 | 17 | $start = '19700101000000'; |
18 | 18 | } |
19 | | -$limit = isset( $options['limit'] ) ? $options['limit'] : 10; |
| 19 | +if ( isset( $options['limit'] ) ) { |
| 20 | + $limit = $options['limit']; |
| 21 | + $untilHappy = false; |
| 22 | +} else { |
| 23 | + $limit = 1000; |
| 24 | + $untilHappy = true; |
| 25 | +} |
20 | 26 | $type = isset( $options['type'] ) ? $options['type'] : 'ConcatenatedGzipHistoryBlob'; |
21 | 27 | |
22 | 28 | |
— | — | @@ -43,16 +49,21 @@ |
44 | 50 | $uncompressedSize += strlen( $text ); |
45 | 51 | $hashes[$row->rev_id] = md5( $text ); |
46 | 52 | $keys[$row->rev_id] = $blob->addItem( $text ); |
| 53 | + if ( $untilHappy && !$blob->isHappy() ) { |
| 54 | + break; |
| 55 | + } |
47 | 56 | } |
48 | 57 | |
49 | 58 | $serialized = serialize( $blob ); |
50 | 59 | $t += microtime( true ); |
| 60 | +#print_r( $blob->mDiffMap ); |
51 | 61 | |
52 | | -printf( "Compression ratio for %d revisions: %5.2f, %s -> %s\n", |
53 | | - $res->numRows(), |
| 62 | +printf( "%s\nCompression ratio for %d revisions: %5.2f, %s -> %d\n", |
| 63 | + $type, |
| 64 | + count( $hashes ), |
54 | 65 | $uncompressedSize / strlen( $serialized ), |
55 | 66 | $wgLang->formatSize( $uncompressedSize ), |
56 | | - $wgLang->formatSize( strlen( $serialized ) ) |
| 67 | + strlen( $serialized ) |
57 | 68 | ); |
58 | 69 | printf( "Compression time: %5.2f ms\n", $t * 1000 ); |
59 | 70 | |
Index: trunk/phase3/maintenance/storage/blob_tracking.sql |
— | — | @@ -4,10 +4,14 @@ |
5 | 5 | CREATE TABLE /*$wgDBprefix*/blob_tracking ( |
6 | 6 | -- page.page_id |
7 | 7 | -- This may be zero for orphan or deleted text |
| 8 | + -- Note that this is for compression grouping only -- it doesn't need to be |
| 9 | + -- accurate at the time recompressTracked is run. Operations such as a |
| 10 | + -- delete/undelete cycle may make it inaccurate. |
8 | 11 | bt_page integer not null, |
9 | 12 | |
10 | 13 | -- revision.rev_id |
11 | 14 | -- This may be zero for orphan or deleted text |
| 15 | + -- Like bt_page, it does not need to be accurate when recompressTracked is run. |
12 | 16 | bt_rev_id integer not null, |
13 | 17 | |
14 | 18 | -- text.old_id |
Index: trunk/phase3/includes/GlobalFunctions.php |
— | — | @@ -195,6 +195,7 @@ |
196 | 196 | */ |
197 | 197 | function wfDebug( $text, $logonly = false ) { |
198 | 198 | global $wgOut, $wgDebugLogFile, $wgDebugComments, $wgProfileOnly, $wgDebugRawPage; |
| 199 | + global $wgDebugLogPrefix; |
199 | 200 | static $recursion = 0; |
200 | 201 | |
201 | 202 | static $cache = array(); // Cache of unoutputted messages |
— | — | @@ -227,6 +228,7 @@ |
228 | 229 | # Strip unprintables; they can switch terminal modes when binary data |
229 | 230 | # gets dumped, which is pretty annoying. |
230 | 231 | $text = preg_replace( '![\x00-\x08\x0b\x0c\x0e-\x1f]!', ' ', $text ); |
| 232 | + $text = $wgDebugLogPrefix . $text; |
231 | 233 | wfErrorLog( $text, $wgDebugLogFile ); |
232 | 234 | } |
233 | 235 | } |
Index: trunk/phase3/includes/DefaultSettings.php |
— | — | @@ -838,7 +838,6 @@ |
839 | 839 | |
840 | 840 | /** |
841 | 841 | * Translation using MediaWiki: namespace. |
842 | | - * This will increase load times by 25-60% unless memcached is installed. |
843 | 842 | * Interface messages will be loaded from the database. |
844 | 843 | */ |
845 | 844 | $wgUseDatabaseMessages = true; |
— | — | @@ -952,6 +951,16 @@ |
953 | 952 | $wgExtraSubtitle = ''; |
954 | 953 | $wgSiteSupportPage = ''; # A page where you users can receive donations |
955 | 954 | |
| 955 | +/** |
| 956 | + * Set this to a string to put the wiki into read-only mode. The text will be |
| 957 | + * used as an explanation to users. |
| 958 | + * |
| 959 | + * This prevents most write operations via the web interface. Cache updates may |
| 960 | + * still be possible. To prevent database writes completely, use the read_only |
| 961 | + * option in MySQL. |
| 962 | + */ |
| 963 | +$wgReadOnly = null; |
| 964 | + |
956 | 965 | /*** |
957 | 966 | * If this lock file exists, the wiki will be forced into read-only mode. |
958 | 967 | * Its contents will be shown to users as part of the read-only warning |
— | — | @@ -960,15 +969,42 @@ |
961 | 970 | $wgReadOnlyFile = false; ///< defaults to "{$wgUploadDirectory}/lock_yBgMBwiR"; |
962 | 971 | |
963 | 972 | /** |
| 973 | + * Filename for debug logging. |
964 | 974 | * The debug log file should be not be publicly accessible if it is used, as it |
965 | | - * may contain private data. */ |
| 975 | + * may contain private data. |
| 976 | + */ |
966 | 977 | $wgDebugLogFile = ''; |
967 | 978 | |
| 979 | +/** |
| 980 | + * Prefix for debug log lines |
| 981 | + */ |
| 982 | +$wgDebugLogPrefix = ''; |
| 983 | + |
| 984 | +/** |
| 985 | + * If true, instead of redirecting, show a page with a link to the redirect |
| 986 | + * destination. This allows for the inspection of PHP error messages, and easy |
| 987 | + * resubmission of form data. For developer use only. |
| 988 | + */ |
968 | 989 | $wgDebugRedirects = false; |
969 | | -$wgDebugRawPage = false; # Avoid overlapping debug entries by leaving out CSS |
970 | 990 | |
| 991 | +/** |
| 992 | + * If true, log debugging data from action=raw. |
| 993 | + * This is normally false to avoid overlapping debug entries due to gen=css and |
| 994 | + * gen=js requests. |
| 995 | + */ |
| 996 | +$wgDebugRawPage = false; |
| 997 | + |
| 998 | +/** |
| 999 | + * Send debug data to an HTML comment in the output. |
| 1000 | + * |
| 1001 | + * This may occasionally be useful when supporting a non-technical end-user. It's |
| 1002 | + * more secure than exposing the debug log file to the web, since the output only |
| 1003 | + * contains private data for the current user. But it's not ideal for development |
| 1004 | + * use since data is lost on fatal errors and redirects. |
| 1005 | + */ |
971 | 1006 | $wgDebugComments = false; |
972 | | -$wgReadOnly = null; |
| 1007 | + |
| 1008 | +/** Does nothing. Obsolete? */ |
973 | 1009 | $wgLogQueries = false; |
974 | 1010 | |
975 | 1011 | /** |
— | — | @@ -1027,7 +1063,8 @@ |
1028 | 1064 | * same options. |
1029 | 1065 | * |
1030 | 1066 | * This can provide a significant speedup for medium to large pages, |
1031 | | - * so you probably want to keep it on. |
| 1067 | + * so you probably want to keep it on. Extensions that conflict with the |
| 1068 | + * parser cache should disable the cache on a per-page basis instead. |
1032 | 1069 | */ |
1033 | 1070 | $wgEnableParserCache = true; |
1034 | 1071 | |