r13837 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r13836‎ | r13837 | r13838 >
Date:08:49, 24 April 2006
Author:tstarling
Status:old
Tags:
Comment:
Added experimental support for restoration
Modified paths:
  • /trunk/phase3/maintenance/storage/checkStorage.php (modified) (history)

Diff [purge]

Index: trunk/phase3/maintenance/storage/checkStorage.php
@@ -8,316 +8,461 @@
99
1010 if ( !defined( 'MEDIAWIKI' ) ) {
1111 require_once( dirname(__FILE__) . '/../commandLine.inc' );
12 - require_once('ExternalStore.php');
 12+ require_once( 'ExternalStore.php' );
1313 require_once( 'ExternalStoreDB.php' );
 14+ require_once( 'SpecialImport.php' );
1415
15 - checkStorage();
 16+ $cs = new CheckStorage;
 17+ $fix = isset( $options['fix'] );
 18+ if ( isset( $args[0] ) ) {
 19+ $xml = $args[0];
 20+ } else {
 21+ $xml = false;
 22+ }
 23+ $cs->check( $fix, $xml );
1624 }
1725
1826
1927 //----------------------------------------------------------------------------------
2028
21 -function checkStorage() {
22 - global $oldIdMap, $brokenRevisions;
23 -
24 - $fname = 'checkStorage';
25 - $dbr =& wfGetDB( DB_SLAVE );
26 - $maxRevId = $dbr->selectField( 'revision', 'MAX(rev_id)', false, $fname );
27 - $chunkSize = 1000;
28 - $flagStats = array();
29 - $objectStats = array();
30 - $knownFlags = array( 'external', 'gzip', 'object', 'utf-8' );
31 - $dbStore = null;
32 - $brokenRevisions = array();
 29+class CheckStorage
 30+{
 31+ var $oldIdMap, $errors;
 32+ var $dbStore = null;
3333
34 - for ( $chunkStart = 1 ; $chunkStart < $maxRevId; $chunkStart += $chunkSize ) {
35 - $chunkEnd = $chunkStart + $chunkSize - 1;
36 - //print "$chunkStart of $maxRevId\n";
 34+ var $errorDescriptions = array(
 35+ 'restore text' => 'Damaged text, need to be restored from a backup',
 36+ 'restore revision' => 'Damaged revision row, need to be restored from a backup',
 37+ 'unfixable' => 'Unexpected errors with no automated fixing method',
 38+ 'fixed' => 'Errors already fixed',
 39+ 'fixable' => 'Errors which would already be fixed if --fix was specified',
 40+ );
3741
38 - // Fetch revision rows
39 - $oldIdMap = array();
40 - $res = $dbr->select( 'revision', array( 'rev_id', 'rev_text_id' ),
41 - array( "rev_id BETWEEN $chunkStart AND $chunkEnd" ), $fname );
42 - while ( $row = $dbr->fetchObject( $res ) ) {
43 - $oldIdMap[$row->rev_id] = $row->rev_text_id;
 42+ function check( $fix = false, $xml = '' ) {
 43+ $fname = 'checkStorage';
 44+ $dbr =& wfGetDB( DB_SLAVE );
 45+ if ( $fix ) {
 46+ $dbw =& wfGetDB( DB_MASTER );
 47+ print "Checking, will fix errors if possible...\n";
 48+ } else {
 49+ print "Checking...\n";
4450 }
45 - $dbr->freeResult( $res );
 51+ $maxRevId = $dbr->selectField( 'revision', 'MAX(rev_id)', false, $fname );
 52+ $chunkSize = 1000;
 53+ $flagStats = array();
 54+ $objectStats = array();
 55+ $knownFlags = array( 'external', 'gzip', 'object', 'utf-8' );
 56+ $this->errors = array(
 57+ 'restore text' => array(),
 58+ 'restore revision' => array(),
 59+ 'unfixable' => array(),
 60+ 'fixed' => array(),
 61+ 'fixable' => array(),
 62+ );
4663
47 - if ( !count( $oldIdMap ) ) {
48 - continue;
49 - }
 64+ for ( $chunkStart = 1 ; $chunkStart < $maxRevId; $chunkStart += $chunkSize ) {
 65+ $chunkEnd = $chunkStart + $chunkSize - 1;
 66+ //print "$chunkStart of $maxRevId\n";
5067
51 - // Fetch old_flags
52 - $missingTextRows = array_flip( $oldIdMap );
53 - $externalRevs = array();
54 - $objectRevs = array();
55 - $flagsFields = array();
56 - $res = $dbr->select( 'text', array( 'old_id', 'old_flags' ),
57 - 'old_id IN (' . implode( ',', $oldIdMap ) . ')', $fname );
58 - while ( $row = $dbr->fetchObject( $res ) ) {
59 - $flags = $row->old_flags;
60 - $id = $row->old_id;
 68+ // Fetch revision rows
 69+ $this->oldIdMap = array();
 70+ $dbr->ping();
 71+ $res = $dbr->select( 'revision', array( 'rev_id', 'rev_text_id' ),
 72+ array( "rev_id BETWEEN $chunkStart AND $chunkEnd" ), $fname );
 73+ while ( $row = $dbr->fetchObject( $res ) ) {
 74+ $this->oldIdMap[$row->rev_id] = $row->rev_text_id;
 75+ }
 76+ $dbr->freeResult( $res );
6177
62 - // Create flagStats row if it doesn't exist
63 - $flagStats = $flagStats + array( $flags => 0 );
64 - // Increment counter
65 - $flagStats[$flags]++;
 78+ if ( !count( $this->oldIdMap ) ) {
 79+ continue;
 80+ }
6681
67 - // Not missing
68 - unset( $missingTextRows[$row->old_id] );
 82+ // Fetch old_flags
 83+ $missingTextRows = array_flip( $this->oldIdMap );
 84+ $externalRevs = array();
 85+ $objectRevs = array();
 86+ $res = $dbr->select( 'text', array( 'old_id', 'old_flags' ),
 87+ 'old_id IN (' . implode( ',', $this->oldIdMap ) . ')', $fname );
 88+ while ( $row = $dbr->fetchObject( $res ) ) {
 89+ $flags = $row->old_flags;
 90+ $id = $row->old_id;
6991
70 - // Check for external or object
71 - if ( $flags == '' ) {
72 - $flagArray = array();
73 - } else {
74 - $flagArray = explode( ',', $flags );
75 - }
76 - if ( in_array( 'external', $flagArray ) ) {
77 - $flagsFields[$id] = $flags; // is this needed?
78 - $externalRevs[] = $id;
79 - } elseif ( in_array( 'object', $flagArray ) ) {
80 - $flagsFields[$id] = $flags; // is this needed?
81 - $objectRevs[] = $id;
82 - }
 92+ // Create flagStats row if it doesn't exist
 93+ $flagStats = $flagStats + array( $flags => 0 );
 94+ // Increment counter
 95+ $flagStats[$flags]++;
8396
84 - // Check for unrecognised flags
85 - if ( count( array_diff( $flagArray, $knownFlags ) ) ) {
86 - checkError( "Warning: invalid flags field \"$flags\"", $id );
87 - }
88 - }
89 - $dbr->freeResult( $res );
 97+ // Not missing
 98+ unset( $missingTextRows[$row->old_id] );
9099
91 - // Output errors for any missing text rows
92 - foreach ( $missingTextRows as $oldId => $revId ) {
93 - print "Error: missing text row $oldId for revision $revId\n";
94 - }
95 -
96 - // Verify external revisions
97 - $externalConcatBlobs = array();
98 - $externalNormalBlobs = array();
99 - if ( count( $externalRevs ) ) {
100 - $res = $dbr->select( 'text', array( 'old_id', 'old_flags', 'old_text' ),
101 - array( 'old_id IN (' . implode( ',', $externalRevs ) . ')' ), $fname );
102 - while ( $row = $dbr->fetchObject( $res ) ) {
103 - $urlParts = explode( '://', $row->old_text, 2 );
104 - if ( count( $urlParts ) !== 2 || $urlParts[1] == '' ) {
105 - checkError( "Error: invalid URL \"{$row->old_text}\"", $row->old_id );
106 - continue;
 100+ // Check for external or object
 101+ if ( $flags == '' ) {
 102+ $flagArray = array();
 103+ } else {
 104+ $flagArray = explode( ',', $flags );
107105 }
108 - list( $proto, $path ) = $urlParts;
109 - if ( $proto != 'DB' ) {
110 - checkError( "Error: invalid external protocol \"$proto\"", $row->old_id );
111 - continue;
 106+ if ( in_array( 'external', $flagArray ) ) {
 107+ $externalRevs[] = $id;
 108+ } elseif ( in_array( 'object', $flagArray ) ) {
 109+ $objectRevs[] = $id;
112110 }
113 - $path = explode( '/', $row->old_text );
114 - $cluster = $path[2];
115 - $id = $path[3];
116 - if ( isset( $path[4] ) ) {
117 - $externalConcatBlobs[$cluster][$id][] = $row->old_id;
118 - } else {
119 - $externalNormalBlobs[$cluster][$id][] = $row->old_id;
 111+
 112+ // Check for unrecognised flags
 113+ if ( $flags == '0' ) {
 114+ // This is a known bug from 2004
 115+ // It's safe to just erase the old_flags field
 116+ if ( $fix ) {
 117+ $this->error( 'fixed', "Warning: old_flags set to 0", $id );
 118+ $dbw->ping();
 119+ $dbw->update( 'text', array( 'old_flags' => '' ),
 120+ array( 'old_id' => $id ), $fname );
 121+ echo "Fixed\n";
 122+ } else {
 123+ $this->error( 'fixable', "Warning: old_flags set to 0", $id );
 124+ }
 125+ } elseif ( count( array_diff( $flagArray, $knownFlags ) ) ) {
 126+ $this->error( 'unfixable', "Error: invalid flags field \"$flags\"", $id );
120127 }
121128 }
122129 $dbr->freeResult( $res );
123 - }
124130
125 - // Check external concat blobs for the right header
126 - checkExternalConcatBlobs( $externalConcatBlobs );
127 -
 131+ // Output errors for any missing text rows
 132+ foreach ( $missingTextRows as $oldId => $revId ) {
 133+ $this->error( 'restore revision', "Error: missing text row", $oldId );
 134+ }
128135
129 - // Check external normal blobs for existence
130 - if ( count( $externalNormalBlobs ) ) {
131 - if ( is_null( $dbStore ) ) {
132 - $dbStore = new ExternalStoreDB;
 136+ // Verify external revisions
 137+ $externalConcatBlobs = array();
 138+ $externalNormalBlobs = array();
 139+ if ( count( $externalRevs ) ) {
 140+ $res = $dbr->select( 'text', array( 'old_id', 'old_flags', 'old_text' ),
 141+ array( 'old_id IN (' . implode( ',', $externalRevs ) . ')' ), $fname );
 142+ while ( $row = $dbr->fetchObject( $res ) ) {
 143+ $urlParts = explode( '://', $row->old_text, 2 );
 144+ if ( count( $urlParts ) !== 2 || $urlParts[1] == '' ) {
 145+ $this->error( 'restore text', "Error: invalid URL \"{$row->old_text}\"", $row->old_id );
 146+ continue;
 147+ }
 148+ list( $proto, $path ) = $urlParts;
 149+ if ( $proto != 'DB' ) {
 150+ $this->error( 'restore text', "Error: invalid external protocol \"$proto\"", $row->old_id );
 151+ continue;
 152+ }
 153+ $path = explode( '/', $row->old_text );
 154+ $cluster = $path[2];
 155+ $id = $path[3];
 156+ if ( isset( $path[4] ) ) {
 157+ $externalConcatBlobs[$cluster][$id][] = $row->old_id;
 158+ } else {
 159+ $externalNormalBlobs[$cluster][$id][] = $row->old_id;
 160+ }
 161+ }
 162+ $dbr->freeResult( $res );
133163 }
134 - foreach ( $externalConcatBlobs as $cluster => $xBlobIds ) {
135 - $blobIds = array_keys( $xBlobIds );
136 - $extDb =& $dbStore->getSlave( $cluster );
137 - $blobsTable = $dbStore->getTable( $extDb );
138 - $res = $extDb->select( $blobsTable,
139 - array( 'blob_id' ),
140 - array( 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ), $fname );
141 - while ( $row = $extDb->fetchObject( $res ) ) {
142 - unset( $xBlobIds[$row->blob_id] );
 164+
 165+ // Check external concat blobs for the right header
 166+ $this->checkExternalConcatBlobs( $externalConcatBlobs );
 167+
 168+ // Check external normal blobs for existence
 169+ if ( count( $externalNormalBlobs ) ) {
 170+ if ( is_null( $this->dbStore ) ) {
 171+ $this->dbStore = new ExternalStoreDB;
143172 }
144 - $extDb->freeResult( $res );
145 - // Print errors for missing blobs rows
146 - foreach ( $xBlobIds as $blobId => $oldId ) {
147 - checkError( "Error: missing target $blobId for one-part ES URL", $oldId );
 173+ foreach ( $externalConcatBlobs as $cluster => $xBlobIds ) {
 174+ $blobIds = array_keys( $xBlobIds );
 175+ $extDb =& $this->dbStore->getSlave( $cluster );
 176+ $blobsTable = $this->dbStore->getTable( $extDb );
 177+ $res = $extDb->select( $blobsTable,
 178+ array( 'blob_id' ),
 179+ array( 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ), $fname );
 180+ while ( $row = $extDb->fetchObject( $res ) ) {
 181+ unset( $xBlobIds[$row->blob_id] );
 182+ }
 183+ $extDb->freeResult( $res );
 184+ // Print errors for missing blobs rows
 185+ foreach ( $xBlobIds as $blobId => $oldId ) {
 186+ $this->error( 'restore text', "Error: missing target $blobId for one-part ES URL", $oldId );
 187+ }
148188 }
149189 }
150 - }
151190
152 - // Check local objects
153 - $dbr->ping();
154 - $concatBlobs = array();
155 - $curIds = array();
156 - if ( count( $objectRevs ) ) {
157 - $headerLength = 300;
158 - $res = $dbr->select( 'text', array( 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ),
159 - array( 'old_id IN (' . implode( ',', $objectRevs ) . ')' ), $fname );
160 - while ( $row = $dbr->fetchObject( $res ) ) {
161 - $oldId = $row->old_id;
162 - if ( !preg_match( '/^O:(\d+):"(\w+)"/', $row->header, $matches ) ) {
163 - checkError( "Error: invalid object header", $oldId );
164 - continue;
165 - }
 191+ // Check local objects
 192+ $dbr->ping();
 193+ $concatBlobs = array();
 194+ $curIds = array();
 195+ if ( count( $objectRevs ) ) {
 196+ $headerLength = 300;
 197+ $res = $dbr->select( 'text', array( 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ),
 198+ array( 'old_id IN (' . implode( ',', $objectRevs ) . ')' ), $fname );
 199+ while ( $row = $dbr->fetchObject( $res ) ) {
 200+ $oldId = $row->old_id;
 201+ if ( !preg_match( '/^O:(\d+):"(\w+)"/', $row->header, $matches ) ) {
 202+ $this->error( 'restore text', "Error: invalid object header", $oldId );
 203+ continue;
 204+ }
166205
167 - $className = strtolower( $matches[2] );
168 - if ( strlen( $className ) != $matches[1] ) {
169 - checkError( "Error: invalid object header, wrong class name length", $oldId );
170 - continue;
171 - }
 206+ $className = strtolower( $matches[2] );
 207+ if ( strlen( $className ) != $matches[1] ) {
 208+ $this->error( 'restore text', "Error: invalid object header, wrong class name length", $oldId );
 209+ continue;
 210+ }
172211
173 - $objectStats = $objectStats + array( $className => 0 );
174 - $objectStats[$className]++;
 212+ $objectStats = $objectStats + array( $className => 0 );
 213+ $objectStats[$className]++;
175214
176 - switch ( $className ) {
177 - case 'concatenatedgziphistoryblob':
178 - // Good
179 - break;
180 - case 'historyblobstub':
181 - case 'historyblobcurstub':
182 - if ( strlen( $row->header ) == $headerLength ) {
183 - checkError( "Error: overlong stub header", $oldId );
184 - continue;
185 - }
186 - $stubObj = unserialize( $row->header );
187 - if ( !is_object( $stubObj ) ) {
188 - checkError( "Error: unable to unserialize stub object", $oldId );
189 - continue;
190 - }
191 - if ( $className == 'historyblobstub' ) {
192 - $concatBlobs[$stubObj->mOldId][] = $oldId;
193 - } else {
194 - $curIds[$stubObj->mCurId][] = $oldId;
195 - }
196 - break;
197 - default:
198 - checkError( "Error: unrecognised object class \"$className\"", $oldId );
 215+ switch ( $className ) {
 216+ case 'concatenatedgziphistoryblob':
 217+ // Good
 218+ break;
 219+ case 'historyblobstub':
 220+ case 'historyblobcurstub':
 221+ if ( strlen( $row->header ) == $headerLength ) {
 222+ $this->error( 'unfixable', "Error: overlong stub header", $oldId );
 223+ continue;
 224+ }
 225+ $stubObj = unserialize( $row->header );
 226+ if ( !is_object( $stubObj ) ) {
 227+ $this->error( 'restore text', "Error: unable to unserialize stub object", $oldId );
 228+ continue;
 229+ }
 230+ if ( $className == 'historyblobstub' ) {
 231+ $concatBlobs[$stubObj->mOldId][] = $oldId;
 232+ } else {
 233+ $curIds[$stubObj->mCurId][] = $oldId;
 234+ }
 235+ break;
 236+ default:
 237+ $this->error( 'unfixable', "Error: unrecognised object class \"$className\"", $oldId );
 238+ }
199239 }
 240+ $dbr->freeResult( $res );
200241 }
201 - $dbr->freeResult( $res );
202 - }
203242
204 - // Check local concat blob validity
205 - $externalConcatBlobs = array();
206 - if ( count( $concatBlobs ) ) {
207 - $headerLength = 300;
208 - $res = $dbr->select( 'text', array( 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ),
209 - array( 'old_id IN (' . implode( ',', array_keys( $concatBlobs ) ) . ')' ), $fname );
210 - while ( $row = $dbr->fetchObject( $res ) ) {
211 - $flags = explode( ',', $row->old_flags );
212 - if ( in_array( 'external', $flags ) ) {
213 - // Concat blob is in external storage?
214 - if ( in_array( 'object', $flags ) ) {
215 - $urlParts = explode( '/', $row->header );
216 - if ( $urlParts[0] != 'DB:' ) {
217 - checkError( "Error: unrecognised external storage type \"{$urlParts[0]}", $row->old_id );
 243+ // Check local concat blob validity
 244+ $externalConcatBlobs = array();
 245+ if ( count( $concatBlobs ) ) {
 246+ $headerLength = 300;
 247+ $res = $dbr->select( 'text', array( 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ),
 248+ array( 'old_id IN (' . implode( ',', array_keys( $concatBlobs ) ) . ')' ), $fname );
 249+ while ( $row = $dbr->fetchObject( $res ) ) {
 250+ $flags = explode( ',', $row->old_flags );
 251+ if ( in_array( 'external', $flags ) ) {
 252+ // Concat blob is in external storage?
 253+ if ( in_array( 'object', $flags ) ) {
 254+ $urlParts = explode( '/', $row->header );
 255+ if ( $urlParts[0] != 'DB:' ) {
 256+ $this->error( 'unfixable', "Error: unrecognised external storage type \"{$urlParts[0]}", $row->old_id );
 257+ } else {
 258+ $cluster = $urlParts[2];
 259+ $id = $urlParts[3];
 260+ if ( !isset( $externalConcatBlobs[$cluster][$id] ) ) {
 261+ $externalConcatBlobs[$cluster][$id] = array();
 262+ }
 263+ $externalConcatBlobs[$cluster][$id] = array_merge(
 264+ $externalConcatBlobs[$cluster][$id], $concatBlobs[$row->old_id]
 265+ );
 266+ }
218267 } else {
219 - $cluster = $urlParts[2];
220 - $id = $urlParts[3];
221 - if ( !isset( $externalConcatBlobs[$cluster][$id] ) ) {
222 - $externalConcatBlobs[$cluster][$id] = array();
223 - }
224 - $externalConcatBlobs[$cluster][$id] = array_merge(
225 - $externalConcatBlobs[$cluster][$id], $concatBlobs[$row->old_id]
226 - );
 268+ $this->error( 'unfixable', "Error: invalid flags \"{$row->old_flags}\" on concat bulk row {$row->old_id}",
 269+ $concatBlobs[$row->old_id] );
227270 }
228 - } else {
229 - checkError( "Error: invalid flags \"{$row->old_flags}\" on concat bulk row {$row->old_id}",
 271+ } elseif ( strcasecmp( substr( $row->header, 0, strlen( CONCAT_HEADER ) ), CONCAT_HEADER ) ) {
 272+ $this->error( 'restore text', "Error: Incorrect object header for concat bulk row {$row->old_id}",
230273 $concatBlobs[$row->old_id] );
231 - }
232 - } elseif ( strcasecmp( substr( $row->header, 0, strlen( CONCAT_HEADER ) ), CONCAT_HEADER ) ) {
233 - checkError( "Error: Incorrect object header for concat bulk row {$row->old_id}",
234 - $concatBlobs[$row->old_id] );
235 - } # else good
 274+ } # else good
236275
237 - unset( $concatBlobs[$row->old_id] );
 276+ unset( $concatBlobs[$row->old_id] );
 277+ }
 278+ $dbr->freeResult( $res );
238279 }
239 - $dbr->freeResult( $res );
 280+
 281+ // Check targets of unresolved stubs
 282+ $this->checkExternalConcatBlobs( $externalConcatBlobs );
 283+
 284+ // next chunk
240285 }
241286
242 - // Check targets of unresolved stubs
243 - checkExternalConcatBlobs( $externalConcatBlobs );
244 - $dbr->ping();
 287+ print "\n\nErrors:\n";
 288+ foreach( $this->errors as $name => $errors ) {
 289+ if ( count( $errors ) ) {
 290+ $description = $this->errorDescriptions[$name];
 291+ echo "$description: " . implode( ',', array_keys( $errors ) ) . "\n";
 292+ }
 293+ }
245294
246 - // next chunk
247 - }
 295+ if ( count( $this->errors['restore text'] ) && $fix ) {
 296+ if ( (string)$xml !== '' ) {
 297+ $this->restoreText( array_keys( $this->errors['restore text'] ), $xml );
 298+ } else {
 299+ echo "Can't fix text, no XML backup specified\n";
 300+ }
 301+ }
248302
249 - print "\n\n" . count( $brokenRevisions ) . " broken revisions\n";
250 -
251 - print "\nFlag statistics:\n";
252 - $total = array_sum( $flagStats );
253 - foreach ( $flagStats as $flag => $count ) {
254 - printf( "%-30s %10d %5.2f%%\n", $flag, $count, $count / $total * 100 );
 303+ print "\nFlag statistics:\n";
 304+ $total = array_sum( $flagStats );
 305+ foreach ( $flagStats as $flag => $count ) {
 306+ printf( "%-30s %10d %5.2f%%\n", $flag, $count, $count / $total * 100 );
 307+ }
 308+ print "\nLocal object statistics:\n";
 309+ $total = array_sum( $objectStats );
 310+ foreach ( $objectStats as $className => $count ) {
 311+ printf( "%-30s %10d %5.2f%%\n", $className, $count, $count / $total * 100 );
 312+ }
255313 }
256 - print "\nObject statistics:\n";
257 - $total = array_sum( $objectStats );
258 - foreach ( $objectStats as $className => $count ) {
259 - printf( "%-30s %10d %5.2f%%\n", $className, $count, $count / $total * 100 );
260 - }
261 -}
262314
263315
264 -function checkError( $msg, $ids ) {
265 - global $oldIdMap, $brokenRevisions;
266 - if ( is_array( $ids ) && count( $ids ) == 1 ) {
267 - $ids = reset( $ids );
268 - }
269 - if ( is_array( $ids ) ) {
270 - $revIds = array();
271 - foreach ( $ids as $id ) {
272 - $revIds = array_merge( $revIds, array_keys( $oldIdMap, $id ) );
 316+ function error( $type, $msg, $ids ) {
 317+ if ( is_array( $ids ) && count( $ids ) == 1 ) {
 318+ $ids = reset( $ids );
273319 }
274 - print "$msg in text rows " . implode( ', ', $ids ) .
275 - ", revisions " . implode( ', ', $revIds ) . "\n";
276 - } else {
277 - $id = $ids;
278 - $revIds = array_keys( $oldIdMap, $id );
279 - if ( count( $revIds ) == 1 ) {
280 - print "$msg in old_id $id, rev_id {$revIds[0]}\n";
 320+ if ( is_array( $ids ) ) {
 321+ $revIds = array();
 322+ foreach ( $ids as $id ) {
 323+ $revIds = array_merge( $revIds, array_keys( $this->oldIdMap, $id ) );
 324+ }
 325+ print "$msg in text rows " . implode( ', ', $ids ) .
 326+ ", revisions " . implode( ', ', $revIds ) . "\n";
281327 } else {
282 - print "$msg in old_id $id, revisions " . implode( ', ', $revIds ) . "\n";
 328+ $id = $ids;
 329+ $revIds = array_keys( $this->oldIdMap, $id );
 330+ if ( count( $revIds ) == 1 ) {
 331+ print "$msg in old_id $id, rev_id {$revIds[0]}\n";
 332+ } else {
 333+ print "$msg in old_id $id, revisions " . implode( ', ', $revIds ) . "\n";
 334+ }
283335 }
 336+ $this->errors[$type] = $this->errors[$type] + array_flip( $revIds );
284337 }
285 - $brokenRevisions = $brokenRevisions + array_flip( $revIds );
286 -}
287338
288 -function checkExternalConcatBlobs( $externalConcatBlobs ) {
289 - static $dbStore = null;
290 - $fname = 'checkExternalConcatBlobs';
291 - if ( !count( $externalConcatBlobs ) ) {
292 - return;
 339+ function checkExternalConcatBlobs( $externalConcatBlobs ) {
 340+ $fname = 'CheckStorage::checkExternalConcatBlobs';
 341+ if ( !count( $externalConcatBlobs ) ) {
 342+ return;
 343+ }
 344+
 345+ if ( is_null( $this->dbStore ) ) {
 346+ $this->dbStore = new ExternalStoreDB;
 347+ }
 348+
 349+ foreach ( $externalConcatBlobs as $cluster => $oldIds ) {
 350+ $blobIds = array_keys( $oldIds );
 351+ $extDb =& $this->dbStore->getSlave( $cluster );
 352+ $blobsTable = $this->dbStore->getTable( $extDb );
 353+ $headerLength = strlen( CONCAT_HEADER );
 354+ $res = $extDb->select( $blobsTable,
 355+ array( 'blob_id', "LEFT(blob_text, $headerLength) AS header" ),
 356+ array( 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ), $fname );
 357+ while ( $row = $extDb->fetchObject( $res ) ) {
 358+ if ( strcasecmp( $row->header, CONCAT_HEADER ) ) {
 359+ $this->error( 'restore text', "Error: invalid header on target $cluster/{$row->blob_id} of two-part ES URL",
 360+ $oldIds[$row->blob_id] );
 361+ }
 362+ unset( $oldIds[$row->blob_id] );
 363+
 364+ }
 365+ $extDb->freeResult( $res );
 366+
 367+ // Print errors for missing blobs rows
 368+ foreach ( $oldIds as $blobId => $oldIds ) {
 369+ $this->error( 'restore text', "Error: missing target $cluster/$blobId for two-part ES URL", $oldIds );
 370+ }
 371+ }
293372 }
294373
295 - if ( is_null( $dbStore ) ) {
296 - $dbStore = new ExternalStoreDB;
 374+ function restoreText( $revIds, $xml ) {
 375+ global $wgTmpDirectory, $wgDBname;
 376+
 377+ if ( !count( $revIds ) ) {
 378+ return;
 379+ }
 380+
 381+ print "Restoring text from XML backup...\n";
 382+
 383+ $revFileName = "$wgTmpDirectory/broken-revlist-$wgDBname";
 384+ $filteredXmlFileName = "$wgTmpDirectory/filtered-$wgDBname.xml";
 385+
 386+ // Write revision list
 387+ if ( !file_put_contents( $revFileName, implode( "\n", $revIds ) ) ) {
 388+ echo "Error writing revision list, can't restore text\n";
 389+ return;
 390+ }
 391+
 392+ // Run mwdumper
 393+ echo "Filtering XML dump...\n";
 394+ $exitStatus = 0;
 395+ passthru( 'mwdumper ' .
 396+ wfEscapeShellArg(
 397+ "--output=file:$filteredXmlFileName",
 398+ "--filter=revlist:$revFileName",
 399+ $xml
 400+ ), $exitStatus
 401+ );
 402+
 403+ if ( $exitStatus ) {
 404+ echo "mwdumper died with exit status $exitStatus\n";
 405+ return;
 406+ }
 407+
 408+ $file = fopen( $filteredXmlFileName, 'r' );
 409+ if ( !$file ) {
 410+ echo "Unable to open filtered XML file\n";
 411+ return;
 412+ }
 413+
 414+ $dbr =& wfGetDB( DB_SLAVE );
 415+ $dbw =& wfGetDB( DB_MASTER );
 416+ $dbr->ping();
 417+ $dbw->ping();
 418+
 419+ $source = new ImportStreamSource( $file );
 420+ $importer = new WikiImporter( $source );
 421+ $importer->setRevisionCallback( array( &$this, 'importRevision' ) );
 422+ $importer->doImport();
297423 }
298 -
299 - foreach ( $externalConcatBlobs as $cluster => $oldIds ) {
300 - $blobIds = array_keys( $oldIds );
301 - $extDb =& $dbStore->getSlave( $cluster );
302 - $blobsTable = $dbStore->getTable( $extDb );
303 - $headerLength = strlen( CONCAT_HEADER );
304 - $res = $extDb->select( $blobsTable,
305 - array( 'blob_id', "LEFT(blob_text, $headerLength) AS header" ),
306 - array( 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ), $fname );
307 - while ( $row = $extDb->fetchObject( $res ) ) {
308 - if ( strcasecmp( $row->header, CONCAT_HEADER ) ) {
309 - checkError( "Error: invalid header on target $cluster/{$row->blob_id} of two-part ES URL",
310 - $oldIds[$row->blob_id] );
311 - }
312 - unset( $oldIds[$row->blob_id] );
313424
 425+ function importRevision( &$revision, &$importer ) {
 426+ $fname = 'CheckStorage::importRevision';
 427+
 428+ $id = $revision->getID();
 429+ $text = $revision->getText();
 430+ if ( $text === '' ) {
 431+ // This is what happens if the revision was broken at the time the
 432+ // dump was made. Unfortunately, it also happens if the revision was
 433+ // legitimately blank, so there's no way to tell the difference. To
 434+ // be safe, we'll skip it and leave it broken
 435+ $id = $id ? $id : '';
 436+ echo "Revision $id is blank in the dump, may have been broken before export\n";
 437+ return;
314438 }
315 - $extDb->freeResult( $res );
316439
317 - // Print errors for missing blobs rows
318 - foreach ( $oldIds as $blobId => $oldIds ) {
319 - checkError( "Error: missing target $cluster/$blobId for two-part ES URL", $oldIds );
 440+ if ( !$id ) {
 441+ // No ID, can't import
 442+ echo "No id tag in revision, can't import\n";
 443+ return;
320444 }
 445+
 446+ // Find text row again
 447+ $dbr =& wfGetDB( DB_SLAVE );
 448+ $oldId = $dbr->selectField( 'revision', 'rev_text_id', array( 'rev_id' => $id ), $fname );
 449+ if ( !$oldId ) {
 450+ echo "Missing revision row for rev_id $id\n";
 451+ return;
 452+ }
 453+
 454+ // Compress the text
 455+ $flags = Revision::compressRevisionText( $text );
 456+
 457+ // Update the text row
 458+ $dbw->update( 'text',
 459+ array( 'old_flags' => $flags, 'old_text' => $text ),
 460+ array( 'old_id' => $oldId ),
 461+ $fname, array( 'LIMIT' => 1 )
 462+ );
 463+
 464+ // Remove it from the unfixed list and add it to the fixed list
 465+ unset( $this->errors['restore text'][$id] );
 466+ $this->errors['fixed'][$id] = true;
321467 }
322468 }
323 -
324469 ?>

Status & tagging log