Index: trunk/phase3/maintenance/storage/compressOld.php |
— | — | @@ -45,7 +45,7 @@ |
46 | 46 | 't' => 'concat', |
47 | 47 | 'c' => 20, |
48 | 48 | 's' => 0, |
49 | | - 'f' => 3, |
| 49 | + 'f' => 5, |
50 | 50 | 'h' => 100, |
51 | 51 | 'b' => '', |
52 | 52 | 'e' => '', |
Index: trunk/phase3/maintenance/storage/compressOld.inc |
— | — | @@ -103,10 +103,13 @@ |
104 | 104 | */ |
105 | 105 | |
106 | 106 | # For each article, get a list of revisions which fit the criteria |
| 107 | + |
107 | 108 | # No recompression, use a condition on old_flags |
| 109 | + # Don't compress object type entities, because that might produce data loss when |
| 110 | + # overwriting bulk storage concat rows. Don't compress external references, because |
| 111 | + # the script doesn't yet delete rows from external storage. |
108 | 112 | $conds = array( |
109 | | - "old_flags NOT LIKE '%object%' " . |
110 | | - " AND (old_flags NOT LIKE '%external%' OR old_text NOT LIKE 'DB://%/%/%')"); |
| 113 | + "old_flags NOT LIKE '%object%' AND old_flags NOT LIKE '%external%'"); |
111 | 114 | |
112 | 115 | if ( $beginDate ) { |
113 | 116 | $conds[] = "rev_timestamp>'" . $beginDate . "'"; |
— | — | @@ -133,8 +136,12 @@ |
134 | 137 | $totalMatchingRevisions = 0; |
135 | 138 | $masterPos = false; |
136 | 139 | for ( $pageId = $startId; $pageId <= $maxPageId; $pageId++ ) { |
137 | | - wfWaitForSlaves( 10 ); |
| 140 | + wfWaitForSlaves( 5 ); |
138 | 141 | |
| 142 | + # Wake up |
| 143 | + $dbr->ping(); |
| 144 | + |
| 145 | + # Get the page row |
139 | 146 | $pageRes = $dbr->select( 'page', array('page_id', 'page_namespace', 'page_title'), |
140 | 147 | $pageConds + array('page_id' => $pageId), $fname ); |
141 | 148 | if ( $dbr->numRows( $pageRes ) == 0 ) { |
— | — | @@ -213,7 +220,6 @@ |
214 | 221 | $stub = $chunk->addItem( $text ); |
215 | 222 | $stub->setLocation( $primaryOldid ); |
216 | 223 | $stub->setReferrer( $oldid ); |
217 | | - $hash = $stub->getHash(); |
218 | 224 | print '.'; |
219 | 225 | $usedChunk = true; |
220 | 226 | } |
— | — | @@ -277,6 +283,7 @@ |
278 | 284 | print "/"; |
279 | 285 | $dbw->commit(); |
280 | 286 | $i += $thisChunkSize; |
| 287 | + wfWaitForSlaves( 5 ); |
281 | 288 | } |
282 | 289 | print "\n"; |
283 | 290 | } |
Index: trunk/phase3/maintenance/storage/moveToExternal.php |
— | — | @@ -1,7 +1,6 @@ |
2 | 2 | <?php |
3 | 3 | |
4 | 4 | define( 'REPORTING_INTERVAL', 100 ); |
5 | | -define( 'STUB_HEADER', 'O:15:"historyblobstub"' ); |
6 | 5 | |
7 | 6 | if ( !defined( 'MEDIAWIKI' ) ) { |
8 | 7 | $optionsWithArgs = array( 'm' ); |
— | — | @@ -53,24 +52,42 @@ |
54 | 53 | } |
55 | 54 | |
56 | 55 | # Resolve stubs |
57 | | - $flags = explode( ',', $row->old_flags ); |
58 | | - if ( in_array( 'object', $flags ) |
59 | | - && substr( $row->old_text, 0, strlen( STUB_HEADER ) ) === STUB_HEADER ) |
60 | | - { |
61 | | - resolveStub( $id, $row->old_text, $row->old_flags ); |
| 56 | + $text = $row->old_text; |
| 57 | + if ( $row->old_flags === '' ) { |
| 58 | + $flags = 'external'; |
| 59 | + } else { |
| 60 | + $flags = "{$row->old_flags},external"; |
| 61 | + } |
| 62 | + |
| 63 | + if ( strpos( $flags, 'object' ) !== false ) { |
| 64 | + $obj = unserialize( $text ); |
| 65 | + $className = strtolower( get_class( $obj ) ); |
| 66 | + if ( $className == 'historyblobstub' ) { |
| 67 | + resolveStub( $id, $row->old_text, $row->old_flags ); |
| 68 | + continue; |
| 69 | + } elseif ( $className == 'historyblobcurstub' ) { |
| 70 | + $text = gzdeflate( $obj->getText() ); |
| 71 | + $flags = 'utf-8,gzip,external'; |
| 72 | + } elseif ( $className == 'concatenatedgziphistoryblob' ) { |
| 73 | + // Do nothing |
| 74 | + } else { |
| 75 | + print "Warning: unrecognised object class \"$className\"\n"; |
| 76 | + continue; |
| 77 | + } |
| 78 | + } |
| 79 | + |
| 80 | + if ( strlen( $text ) < 100 ) { |
| 81 | + // Don't move tiny revisions |
62 | 82 | continue; |
63 | 83 | } |
64 | 84 | |
65 | | - $url = $ext->store( $cluster, $row->old_text ); |
| 85 | + #print "Storing " . strlen( $text ) . " bytes to $url\n"; |
| 86 | + |
| 87 | + $url = $ext->store( $cluster, $text ); |
66 | 88 | if ( !$url ) { |
67 | 89 | print "Error writing to external storage\n"; |
68 | 90 | exit; |
69 | 91 | } |
70 | | - if ( $row->old_flags === '' ) { |
71 | | - $flags = 'external'; |
72 | | - } else { |
73 | | - $flags = "{$row->old_flags},external"; |
74 | | - } |
75 | 92 | $dbw->update( 'text', |
76 | 93 | array( 'old_flags' => $flags, 'old_text' => $url ), |
77 | 94 | array( 'old_id' => $id ), $fname ); |
Index: trunk/phase3/maintenance/storage/resolveStubs.php |
— | — | @@ -18,45 +18,37 @@ |
19 | 19 | function resolveStubs() { |
20 | 20 | $fname = 'resolveStubs'; |
21 | 21 | |
22 | | - print "Retrieving stub rows...\n"; |
23 | 22 | $dbr =& wfGetDB( DB_SLAVE ); |
| 23 | + $dbw =& wfGetDB( DB_MASTER ); |
24 | 24 | $maxID = $dbr->selectField( 'text', 'MAX(old_id)', false, $fname ); |
25 | | - $stubs = array(); |
26 | | - $flagsArray = array(); |
| 25 | + $blockSize = 10000; |
| 26 | + $numBlocks = intval( $maxID / $blockSize ) + 1; |
27 | 27 | |
28 | | - # Do it in 100 blocks |
29 | | - for ( $b = 0; $b < 100; $b++ ) { |
30 | | - print "$b%\r"; |
31 | | - $start = intval($maxID / 100) * $b + 1; |
32 | | - $end = intval($maxID / 100) * ($b + 1); |
| 28 | + for ( $b = 0; $b < $numBlocks; $b++ ) { |
| 29 | + wfWaitForSlaves( 5 ); |
| 30 | + |
| 31 | + printf( "%5.2f%%\n", $b / $numBlocks * 100 ); |
| 32 | + $start = intval($maxID / $numBlocks) * $b + 1; |
| 33 | + $end = intval($maxID / $numBlocks) * ($b + 1); |
| 34 | + $stubs = array(); |
| 35 | + $flagsArray = array(); |
33 | 36 | |
| 37 | + |
34 | 38 | $res = $dbr->select( 'text', array( 'old_id', 'old_text', 'old_flags' ), |
35 | | - "old_id>=$start AND old_id<=$end AND old_flags like '%object%' ". |
| 39 | + "old_id>=$start AND old_id<=$end " . |
| 40 | + # Using a more restrictive flag set for now, until I do some more analysis -- TS |
| 41 | + #"AND old_flags LIKE '%object%' AND old_flags NOT LIKE '%external%' ". |
| 42 | + |
| 43 | + "AND old_flags='object' " . |
36 | 44 | "AND old_text LIKE 'O:15:\"historyblobstub\"%'", $fname ); |
37 | 45 | while ( $row = $dbr->fetchObject( $res ) ) { |
38 | | - $stubs[$row->old_id] = $row->old_text; |
39 | | - $flagsArray[$row->old_id] = $row->old_flags; |
| 46 | + resolveStub( $row->old_id, $row->old_text, $row->old_flags ); |
40 | 47 | } |
41 | 48 | $dbr->freeResult( $res ); |
42 | | - } |
43 | | - print "100%\n"; |
44 | 49 | |
45 | | - print "\nConverting " . count( $stubs ) . " rows ...\n"; |
46 | | - |
47 | | - # Get master database, no transactions |
48 | | - $dbw =& wfGetDB( DB_MASTER ); |
49 | | - $dbw->clearFlag( DBO_TRX ); |
50 | | - $dbw->immediateCommit(); |
51 | | - |
52 | | - $i = 0; |
53 | | - foreach( $stubs as $id => $stub ) { |
54 | | - if ( !(++$i % REPORTING_INTERVAL) ) { |
55 | | - print "$i\n"; |
56 | | - wfWaitForSlaves( 5 ); |
57 | | - } |
58 | | - |
59 | | - resolveStub( $id, $stub, $flagsArray[$id] ); |
| 50 | + |
60 | 51 | } |
| 52 | + print "100%\n"; |
61 | 53 | } |
62 | 54 | |
63 | 55 | /** |
— | — | @@ -71,8 +63,8 @@ |
72 | 64 | $dbr =& wfGetDB( DB_SLAVE ); |
73 | 65 | $dbw =& wfGetDB( DB_MASTER ); |
74 | 66 | |
75 | | - if ( get_class( $stub ) !== 'historyblobstub' ) { |
76 | | - print "Error, invalid stub object\n"; |
| 67 | + if ( strtolower( get_class( $stub ) ) !== 'historyblobstub' ) { |
| 68 | + print "Error found object of class " . get_class( $stub ) . ", expecting historyblobstub\n"; |
77 | 69 | return; |
78 | 70 | } |
79 | 71 | |
— | — | @@ -84,7 +76,7 @@ |
85 | 77 | |
86 | 78 | if ( !$externalRow ) { |
87 | 79 | # Object wasn't external |
88 | | - continue; |
| 80 | + return; |
89 | 81 | } |
90 | 82 | |
91 | 83 | # Preserve the legacy encoding flag, but switch from object to external |