Index: trunk/phase3/maintenance/storage/checkStorage.php |
— | — | @@ -8,316 +8,461 @@ |
9 | 9 |
|
10 | 10 | if ( !defined( 'MEDIAWIKI' ) ) {
|
11 | 11 | require_once( dirname(__FILE__) . '/../commandLine.inc' );
|
12 | | - require_once('ExternalStore.php');
|
| 12 | + require_once( 'ExternalStore.php' );
|
13 | 13 | require_once( 'ExternalStoreDB.php' );
|
| 14 | + require_once( 'SpecialImport.php' );
|
14 | 15 |
|
15 | | - checkStorage();
|
| 16 | + $cs = new CheckStorage;
|
| 17 | + $fix = isset( $options['fix'] );
|
| 18 | + if ( isset( $args[0] ) ) {
|
| 19 | + $xml = $args[0];
|
| 20 | + } else {
|
| 21 | + $xml = false;
|
| 22 | + }
|
| 23 | + $cs->check( $fix, $xml );
|
16 | 24 | }
|
17 | 25 |
|
18 | 26 |
|
19 | 27 | //----------------------------------------------------------------------------------
|
20 | 28 |
|
21 | | -function checkStorage() {
|
22 | | - global $oldIdMap, $brokenRevisions;
|
23 | | -
|
24 | | - $fname = 'checkStorage';
|
25 | | - $dbr =& wfGetDB( DB_SLAVE );
|
26 | | - $maxRevId = $dbr->selectField( 'revision', 'MAX(rev_id)', false, $fname );
|
27 | | - $chunkSize = 1000;
|
28 | | - $flagStats = array();
|
29 | | - $objectStats = array();
|
30 | | - $knownFlags = array( 'external', 'gzip', 'object', 'utf-8' );
|
31 | | - $dbStore = null;
|
32 | | - $brokenRevisions = array();
|
| 29 | +class CheckStorage
|
| 30 | +{
|
| 31 | + var $oldIdMap, $errors;
|
| 32 | + var $dbStore = null;
|
33 | 33 |
|
34 | | - for ( $chunkStart = 1 ; $chunkStart < $maxRevId; $chunkStart += $chunkSize ) {
|
35 | | - $chunkEnd = $chunkStart + $chunkSize - 1;
|
36 | | - //print "$chunkStart of $maxRevId\n";
|
| 34 | + var $errorDescriptions = array(
|
| 35 | + 'restore text' => 'Damaged text, need to be restored from a backup',
|
| 36 | + 'restore revision' => 'Damaged revision row, need to be restored from a backup',
|
| 37 | + 'unfixable' => 'Unexpected errors with no automated fixing method',
|
| 38 | + 'fixed' => 'Errors already fixed',
|
| 39 | + 'fixable' => 'Errors which would already be fixed if --fix was specified',
|
| 40 | + );
|
37 | 41 |
|
38 | | - // Fetch revision rows
|
39 | | - $oldIdMap = array();
|
40 | | - $res = $dbr->select( 'revision', array( 'rev_id', 'rev_text_id' ),
|
41 | | - array( "rev_id BETWEEN $chunkStart AND $chunkEnd" ), $fname );
|
42 | | - while ( $row = $dbr->fetchObject( $res ) ) {
|
43 | | - $oldIdMap[$row->rev_id] = $row->rev_text_id;
|
| 42 | + function check( $fix = false, $xml = '' ) {
|
| 43 | + $fname = 'checkStorage';
|
| 44 | + $dbr =& wfGetDB( DB_SLAVE );
|
| 45 | + if ( $fix ) {
|
| 46 | + $dbw =& wfGetDB( DB_MASTER );
|
| 47 | + print "Checking, will fix errors if possible...\n";
|
| 48 | + } else {
|
| 49 | + print "Checking...\n";
|
44 | 50 | }
|
45 | | - $dbr->freeResult( $res );
|
| 51 | + $maxRevId = $dbr->selectField( 'revision', 'MAX(rev_id)', false, $fname );
|
| 52 | + $chunkSize = 1000;
|
| 53 | + $flagStats = array();
|
| 54 | + $objectStats = array();
|
| 55 | + $knownFlags = array( 'external', 'gzip', 'object', 'utf-8' );
|
| 56 | + $this->errors = array(
|
| 57 | + 'restore text' => array(),
|
| 58 | + 'restore revision' => array(),
|
| 59 | + 'unfixable' => array(),
|
| 60 | + 'fixed' => array(),
|
| 61 | + 'fixable' => array(),
|
| 62 | + );
|
46 | 63 |
|
47 | | - if ( !count( $oldIdMap ) ) {
|
48 | | - continue;
|
49 | | - }
|
| 64 | + for ( $chunkStart = 1 ; $chunkStart < $maxRevId; $chunkStart += $chunkSize ) {
|
| 65 | + $chunkEnd = $chunkStart + $chunkSize - 1;
|
| 66 | + //print "$chunkStart of $maxRevId\n";
|
50 | 67 |
|
51 | | - // Fetch old_flags
|
52 | | - $missingTextRows = array_flip( $oldIdMap );
|
53 | | - $externalRevs = array();
|
54 | | - $objectRevs = array();
|
55 | | - $flagsFields = array();
|
56 | | - $res = $dbr->select( 'text', array( 'old_id', 'old_flags' ),
|
57 | | - 'old_id IN (' . implode( ',', $oldIdMap ) . ')', $fname );
|
58 | | - while ( $row = $dbr->fetchObject( $res ) ) {
|
59 | | - $flags = $row->old_flags;
|
60 | | - $id = $row->old_id;
|
| 68 | + // Fetch revision rows
|
| 69 | + $this->oldIdMap = array();
|
| 70 | + $dbr->ping();
|
| 71 | + $res = $dbr->select( 'revision', array( 'rev_id', 'rev_text_id' ),
|
| 72 | + array( "rev_id BETWEEN $chunkStart AND $chunkEnd" ), $fname );
|
| 73 | + while ( $row = $dbr->fetchObject( $res ) ) {
|
| 74 | + $this->oldIdMap[$row->rev_id] = $row->rev_text_id;
|
| 75 | + }
|
| 76 | + $dbr->freeResult( $res );
|
61 | 77 |
|
62 | | - // Create flagStats row if it doesn't exist
|
63 | | - $flagStats = $flagStats + array( $flags => 0 );
|
64 | | - // Increment counter
|
65 | | - $flagStats[$flags]++;
|
| 78 | + if ( !count( $this->oldIdMap ) ) {
|
| 79 | + continue;
|
| 80 | + }
|
66 | 81 |
|
67 | | - // Not missing
|
68 | | - unset( $missingTextRows[$row->old_id] );
|
| 82 | + // Fetch old_flags
|
| 83 | + $missingTextRows = array_flip( $this->oldIdMap );
|
| 84 | + $externalRevs = array();
|
| 85 | + $objectRevs = array();
|
| 86 | + $res = $dbr->select( 'text', array( 'old_id', 'old_flags' ),
|
| 87 | + 'old_id IN (' . implode( ',', $this->oldIdMap ) . ')', $fname );
|
| 88 | + while ( $row = $dbr->fetchObject( $res ) ) {
|
| 89 | + $flags = $row->old_flags;
|
| 90 | + $id = $row->old_id;
|
69 | 91 |
|
70 | | - // Check for external or object
|
71 | | - if ( $flags == '' ) {
|
72 | | - $flagArray = array();
|
73 | | - } else {
|
74 | | - $flagArray = explode( ',', $flags );
|
75 | | - }
|
76 | | - if ( in_array( 'external', $flagArray ) ) {
|
77 | | - $flagsFields[$id] = $flags; // is this needed?
|
78 | | - $externalRevs[] = $id;
|
79 | | - } elseif ( in_array( 'object', $flagArray ) ) {
|
80 | | - $flagsFields[$id] = $flags; // is this needed?
|
81 | | - $objectRevs[] = $id;
|
82 | | - }
|
| 92 | + // Create flagStats row if it doesn't exist
|
| 93 | + $flagStats = $flagStats + array( $flags => 0 );
|
| 94 | + // Increment counter
|
| 95 | + $flagStats[$flags]++;
|
83 | 96 |
|
84 | | - // Check for unrecognised flags
|
85 | | - if ( count( array_diff( $flagArray, $knownFlags ) ) ) {
|
86 | | - checkError( "Warning: invalid flags field \"$flags\"", $id );
|
87 | | - }
|
88 | | - }
|
89 | | - $dbr->freeResult( $res );
|
| 97 | + // Not missing
|
| 98 | + unset( $missingTextRows[$row->old_id] );
|
90 | 99 |
|
91 | | - // Output errors for any missing text rows
|
92 | | - foreach ( $missingTextRows as $oldId => $revId ) {
|
93 | | - print "Error: missing text row $oldId for revision $revId\n";
|
94 | | - }
|
95 | | -
|
96 | | - // Verify external revisions
|
97 | | - $externalConcatBlobs = array();
|
98 | | - $externalNormalBlobs = array();
|
99 | | - if ( count( $externalRevs ) ) {
|
100 | | - $res = $dbr->select( 'text', array( 'old_id', 'old_flags', 'old_text' ),
|
101 | | - array( 'old_id IN (' . implode( ',', $externalRevs ) . ')' ), $fname );
|
102 | | - while ( $row = $dbr->fetchObject( $res ) ) {
|
103 | | - $urlParts = explode( '://', $row->old_text, 2 );
|
104 | | - if ( count( $urlParts ) !== 2 || $urlParts[1] == '' ) {
|
105 | | - checkError( "Error: invalid URL \"{$row->old_text}\"", $row->old_id );
|
106 | | - continue;
|
| 100 | + // Check for external or object
|
| 101 | + if ( $flags == '' ) {
|
| 102 | + $flagArray = array();
|
| 103 | + } else {
|
| 104 | + $flagArray = explode( ',', $flags );
|
107 | 105 | }
|
108 | | - list( $proto, $path ) = $urlParts;
|
109 | | - if ( $proto != 'DB' ) {
|
110 | | - checkError( "Error: invalid external protocol \"$proto\"", $row->old_id );
|
111 | | - continue;
|
| 106 | + if ( in_array( 'external', $flagArray ) ) {
|
| 107 | + $externalRevs[] = $id;
|
| 108 | + } elseif ( in_array( 'object', $flagArray ) ) {
|
| 109 | + $objectRevs[] = $id;
|
112 | 110 | }
|
113 | | - $path = explode( '/', $row->old_text );
|
114 | | - $cluster = $path[2];
|
115 | | - $id = $path[3];
|
116 | | - if ( isset( $path[4] ) ) {
|
117 | | - $externalConcatBlobs[$cluster][$id][] = $row->old_id;
|
118 | | - } else {
|
119 | | - $externalNormalBlobs[$cluster][$id][] = $row->old_id;
|
| 111 | +
|
| 112 | + // Check for unrecognised flags
|
| 113 | + if ( $flags == '0' ) {
|
| 114 | + // This is a known bug from 2004
|
| 115 | + // It's safe to just erase the old_flags field
|
| 116 | + if ( $fix ) {
|
| 117 | + $this->error( 'fixed', "Warning: old_flags set to 0", $id );
|
| 118 | + $dbw->ping();
|
| 119 | + $dbw->update( 'text', array( 'old_flags' => '' ),
|
| 120 | + array( 'old_id' => $id ), $fname );
|
| 121 | + echo "Fixed\n";
|
| 122 | + } else {
|
| 123 | + $this->error( 'fixable', "Warning: old_flags set to 0", $id );
|
| 124 | + }
|
| 125 | + } elseif ( count( array_diff( $flagArray, $knownFlags ) ) ) {
|
| 126 | + $this->error( 'unfixable', "Error: invalid flags field \"$flags\"", $id );
|
120 | 127 | }
|
121 | 128 | }
|
122 | 129 | $dbr->freeResult( $res );
|
123 | | - }
|
124 | 130 |
|
125 | | - // Check external concat blobs for the right header
|
126 | | - checkExternalConcatBlobs( $externalConcatBlobs );
|
127 | | -
|
| 131 | + // Output errors for any missing text rows
|
| 132 | + foreach ( $missingTextRows as $oldId => $revId ) {
|
| 133 | + $this->error( 'restore revision', "Error: missing text row", $oldId );
|
| 134 | + }
|
128 | 135 |
|
129 | | - // Check external normal blobs for existence
|
130 | | - if ( count( $externalNormalBlobs ) ) {
|
131 | | - if ( is_null( $dbStore ) ) {
|
132 | | - $dbStore = new ExternalStoreDB;
|
| 136 | + // Verify external revisions
|
| 137 | + $externalConcatBlobs = array();
|
| 138 | + $externalNormalBlobs = array();
|
| 139 | + if ( count( $externalRevs ) ) {
|
| 140 | + $res = $dbr->select( 'text', array( 'old_id', 'old_flags', 'old_text' ),
|
| 141 | + array( 'old_id IN (' . implode( ',', $externalRevs ) . ')' ), $fname );
|
| 142 | + while ( $row = $dbr->fetchObject( $res ) ) {
|
| 143 | + $urlParts = explode( '://', $row->old_text, 2 );
|
| 144 | + if ( count( $urlParts ) !== 2 || $urlParts[1] == '' ) {
|
| 145 | + $this->error( 'restore text', "Error: invalid URL \"{$row->old_text}\"", $row->old_id );
|
| 146 | + continue;
|
| 147 | + }
|
| 148 | + list( $proto, $path ) = $urlParts;
|
| 149 | + if ( $proto != 'DB' ) {
|
| 150 | + $this->error( 'restore text', "Error: invalid external protocol \"$proto\"", $row->old_id );
|
| 151 | + continue;
|
| 152 | + }
|
| 153 | + $path = explode( '/', $row->old_text );
|
| 154 | + $cluster = $path[2];
|
| 155 | + $id = $path[3];
|
| 156 | + if ( isset( $path[4] ) ) {
|
| 157 | + $externalConcatBlobs[$cluster][$id][] = $row->old_id;
|
| 158 | + } else {
|
| 159 | + $externalNormalBlobs[$cluster][$id][] = $row->old_id;
|
| 160 | + }
|
| 161 | + }
|
| 162 | + $dbr->freeResult( $res );
|
133 | 163 | }
|
134 | | - foreach ( $externalConcatBlobs as $cluster => $xBlobIds ) {
|
135 | | - $blobIds = array_keys( $xBlobIds );
|
136 | | - $extDb =& $dbStore->getSlave( $cluster );
|
137 | | - $blobsTable = $dbStore->getTable( $extDb );
|
138 | | - $res = $extDb->select( $blobsTable,
|
139 | | - array( 'blob_id' ),
|
140 | | - array( 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ), $fname );
|
141 | | - while ( $row = $extDb->fetchObject( $res ) ) {
|
142 | | - unset( $xBlobIds[$row->blob_id] );
|
| 164 | +
|
| 165 | + // Check external concat blobs for the right header
|
| 166 | + $this->checkExternalConcatBlobs( $externalConcatBlobs );
|
| 167 | +
|
| 168 | + // Check external normal blobs for existence
|
| 169 | + if ( count( $externalNormalBlobs ) ) {
|
| 170 | + if ( is_null( $this->dbStore ) ) {
|
| 171 | + $this->dbStore = new ExternalStoreDB;
|
143 | 172 | }
|
144 | | - $extDb->freeResult( $res );
|
145 | | - // Print errors for missing blobs rows
|
146 | | - foreach ( $xBlobIds as $blobId => $oldId ) {
|
147 | | - checkError( "Error: missing target $blobId for one-part ES URL", $oldId );
|
| 173 | + foreach ( $externalConcatBlobs as $cluster => $xBlobIds ) {
|
| 174 | + $blobIds = array_keys( $xBlobIds );
|
| 175 | + $extDb =& $this->dbStore->getSlave( $cluster );
|
| 176 | + $blobsTable = $this->dbStore->getTable( $extDb );
|
| 177 | + $res = $extDb->select( $blobsTable,
|
| 178 | + array( 'blob_id' ),
|
| 179 | + array( 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ), $fname );
|
| 180 | + while ( $row = $extDb->fetchObject( $res ) ) {
|
| 181 | + unset( $xBlobIds[$row->blob_id] );
|
| 182 | + }
|
| 183 | + $extDb->freeResult( $res );
|
| 184 | + // Print errors for missing blobs rows
|
| 185 | + foreach ( $xBlobIds as $blobId => $oldId ) {
|
| 186 | + $this->error( 'restore text', "Error: missing target $blobId for one-part ES URL", $oldId );
|
| 187 | + }
|
148 | 188 | }
|
149 | 189 | }
|
150 | | - }
|
151 | 190 |
|
152 | | - // Check local objects
|
153 | | - $dbr->ping();
|
154 | | - $concatBlobs = array();
|
155 | | - $curIds = array();
|
156 | | - if ( count( $objectRevs ) ) {
|
157 | | - $headerLength = 300;
|
158 | | - $res = $dbr->select( 'text', array( 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ),
|
159 | | - array( 'old_id IN (' . implode( ',', $objectRevs ) . ')' ), $fname );
|
160 | | - while ( $row = $dbr->fetchObject( $res ) ) {
|
161 | | - $oldId = $row->old_id;
|
162 | | - if ( !preg_match( '/^O:(\d+):"(\w+)"/', $row->header, $matches ) ) {
|
163 | | - checkError( "Error: invalid object header", $oldId );
|
164 | | - continue;
|
165 | | - }
|
| 191 | + // Check local objects
|
| 192 | + $dbr->ping();
|
| 193 | + $concatBlobs = array();
|
| 194 | + $curIds = array();
|
| 195 | + if ( count( $objectRevs ) ) {
|
| 196 | + $headerLength = 300;
|
| 197 | + $res = $dbr->select( 'text', array( 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ),
|
| 198 | + array( 'old_id IN (' . implode( ',', $objectRevs ) . ')' ), $fname );
|
| 199 | + while ( $row = $dbr->fetchObject( $res ) ) {
|
| 200 | + $oldId = $row->old_id;
|
| 201 | + if ( !preg_match( '/^O:(\d+):"(\w+)"/', $row->header, $matches ) ) {
|
| 202 | + $this->error( 'restore text', "Error: invalid object header", $oldId );
|
| 203 | + continue;
|
| 204 | + }
|
166 | 205 |
|
167 | | - $className = strtolower( $matches[2] );
|
168 | | - if ( strlen( $className ) != $matches[1] ) {
|
169 | | - checkError( "Error: invalid object header, wrong class name length", $oldId );
|
170 | | - continue;
|
171 | | - }
|
| 206 | + $className = strtolower( $matches[2] );
|
| 207 | + if ( strlen( $className ) != $matches[1] ) {
|
| 208 | + $this->error( 'restore text', "Error: invalid object header, wrong class name length", $oldId );
|
| 209 | + continue;
|
| 210 | + }
|
172 | 211 |
|
173 | | - $objectStats = $objectStats + array( $className => 0 );
|
174 | | - $objectStats[$className]++;
|
| 212 | + $objectStats = $objectStats + array( $className => 0 );
|
| 213 | + $objectStats[$className]++;
|
175 | 214 |
|
176 | | - switch ( $className ) {
|
177 | | - case 'concatenatedgziphistoryblob':
|
178 | | - // Good
|
179 | | - break;
|
180 | | - case 'historyblobstub':
|
181 | | - case 'historyblobcurstub':
|
182 | | - if ( strlen( $row->header ) == $headerLength ) {
|
183 | | - checkError( "Error: overlong stub header", $oldId );
|
184 | | - continue;
|
185 | | - }
|
186 | | - $stubObj = unserialize( $row->header );
|
187 | | - if ( !is_object( $stubObj ) ) {
|
188 | | - checkError( "Error: unable to unserialize stub object", $oldId );
|
189 | | - continue;
|
190 | | - }
|
191 | | - if ( $className == 'historyblobstub' ) {
|
192 | | - $concatBlobs[$stubObj->mOldId][] = $oldId;
|
193 | | - } else {
|
194 | | - $curIds[$stubObj->mCurId][] = $oldId;
|
195 | | - }
|
196 | | - break;
|
197 | | - default:
|
198 | | - checkError( "Error: unrecognised object class \"$className\"", $oldId );
|
| 215 | + switch ( $className ) {
|
| 216 | + case 'concatenatedgziphistoryblob':
|
| 217 | + // Good
|
| 218 | + break;
|
| 219 | + case 'historyblobstub':
|
| 220 | + case 'historyblobcurstub':
|
| 221 | + if ( strlen( $row->header ) == $headerLength ) {
|
| 222 | + $this->error( 'unfixable', "Error: overlong stub header", $oldId );
|
| 223 | + continue;
|
| 224 | + }
|
| 225 | + $stubObj = unserialize( $row->header );
|
| 226 | + if ( !is_object( $stubObj ) ) {
|
| 227 | + $this->error( 'restore text', "Error: unable to unserialize stub object", $oldId );
|
| 228 | + continue;
|
| 229 | + }
|
| 230 | + if ( $className == 'historyblobstub' ) {
|
| 231 | + $concatBlobs[$stubObj->mOldId][] = $oldId;
|
| 232 | + } else {
|
| 233 | + $curIds[$stubObj->mCurId][] = $oldId;
|
| 234 | + }
|
| 235 | + break;
|
| 236 | + default:
|
| 237 | + $this->error( 'unfixable', "Error: unrecognised object class \"$className\"", $oldId );
|
| 238 | + }
|
199 | 239 | }
|
| 240 | + $dbr->freeResult( $res );
|
200 | 241 | }
|
201 | | - $dbr->freeResult( $res );
|
202 | | - }
|
203 | 242 |
|
204 | | - // Check local concat blob validity
|
205 | | - $externalConcatBlobs = array();
|
206 | | - if ( count( $concatBlobs ) ) {
|
207 | | - $headerLength = 300;
|
208 | | - $res = $dbr->select( 'text', array( 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ),
|
209 | | - array( 'old_id IN (' . implode( ',', array_keys( $concatBlobs ) ) . ')' ), $fname );
|
210 | | - while ( $row = $dbr->fetchObject( $res ) ) {
|
211 | | - $flags = explode( ',', $row->old_flags );
|
212 | | - if ( in_array( 'external', $flags ) ) {
|
213 | | - // Concat blob is in external storage?
|
214 | | - if ( in_array( 'object', $flags ) ) {
|
215 | | - $urlParts = explode( '/', $row->header );
|
216 | | - if ( $urlParts[0] != 'DB:' ) {
|
217 | | - checkError( "Error: unrecognised external storage type \"{$urlParts[0]}", $row->old_id );
|
| 243 | + // Check local concat blob validity
|
| 244 | + $externalConcatBlobs = array();
|
| 245 | + if ( count( $concatBlobs ) ) {
|
| 246 | + $headerLength = 300;
|
| 247 | + $res = $dbr->select( 'text', array( 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ),
|
| 248 | + array( 'old_id IN (' . implode( ',', array_keys( $concatBlobs ) ) . ')' ), $fname );
|
| 249 | + while ( $row = $dbr->fetchObject( $res ) ) {
|
| 250 | + $flags = explode( ',', $row->old_flags );
|
| 251 | + if ( in_array( 'external', $flags ) ) {
|
| 252 | + // Concat blob is in external storage?
|
| 253 | + if ( in_array( 'object', $flags ) ) {
|
| 254 | + $urlParts = explode( '/', $row->header );
|
| 255 | + if ( $urlParts[0] != 'DB:' ) {
|
| 256 | + $this->error( 'unfixable', "Error: unrecognised external storage type \"{$urlParts[0]}", $row->old_id );
|
| 257 | + } else {
|
| 258 | + $cluster = $urlParts[2];
|
| 259 | + $id = $urlParts[3];
|
| 260 | + if ( !isset( $externalConcatBlobs[$cluster][$id] ) ) {
|
| 261 | + $externalConcatBlobs[$cluster][$id] = array();
|
| 262 | + }
|
| 263 | + $externalConcatBlobs[$cluster][$id] = array_merge(
|
| 264 | + $externalConcatBlobs[$cluster][$id], $concatBlobs[$row->old_id]
|
| 265 | + );
|
| 266 | + }
|
218 | 267 | } else {
|
219 | | - $cluster = $urlParts[2];
|
220 | | - $id = $urlParts[3];
|
221 | | - if ( !isset( $externalConcatBlobs[$cluster][$id] ) ) {
|
222 | | - $externalConcatBlobs[$cluster][$id] = array();
|
223 | | - }
|
224 | | - $externalConcatBlobs[$cluster][$id] = array_merge(
|
225 | | - $externalConcatBlobs[$cluster][$id], $concatBlobs[$row->old_id]
|
226 | | - );
|
| 268 | + $this->error( 'unfixable', "Error: invalid flags \"{$row->old_flags}\" on concat bulk row {$row->old_id}",
|
| 269 | + $concatBlobs[$row->old_id] );
|
227 | 270 | }
|
228 | | - } else {
|
229 | | - checkError( "Error: invalid flags \"{$row->old_flags}\" on concat bulk row {$row->old_id}",
|
| 271 | + } elseif ( strcasecmp( substr( $row->header, 0, strlen( CONCAT_HEADER ) ), CONCAT_HEADER ) ) {
|
| 272 | + $this->error( 'restore text', "Error: Incorrect object header for concat bulk row {$row->old_id}",
|
230 | 273 | $concatBlobs[$row->old_id] );
|
231 | | - }
|
232 | | - } elseif ( strcasecmp( substr( $row->header, 0, strlen( CONCAT_HEADER ) ), CONCAT_HEADER ) ) {
|
233 | | - checkError( "Error: Incorrect object header for concat bulk row {$row->old_id}",
|
234 | | - $concatBlobs[$row->old_id] );
|
235 | | - } # else good
|
| 274 | + } # else good
|
236 | 275 |
|
237 | | - unset( $concatBlobs[$row->old_id] );
|
| 276 | + unset( $concatBlobs[$row->old_id] );
|
| 277 | + }
|
| 278 | + $dbr->freeResult( $res );
|
238 | 279 | }
|
239 | | - $dbr->freeResult( $res );
|
| 280 | +
|
| 281 | + // Check targets of unresolved stubs
|
| 282 | + $this->checkExternalConcatBlobs( $externalConcatBlobs );
|
| 283 | +
|
| 284 | + // next chunk
|
240 | 285 | }
|
241 | 286 |
|
242 | | - // Check targets of unresolved stubs
|
243 | | - checkExternalConcatBlobs( $externalConcatBlobs );
|
244 | | - $dbr->ping();
|
| 287 | + print "\n\nErrors:\n";
|
| 288 | + foreach( $this->errors as $name => $errors ) {
|
| 289 | + if ( count( $errors ) ) {
|
| 290 | + $description = $this->errorDescriptions[$name];
|
| 291 | + echo "$description: " . implode( ',', array_keys( $errors ) ) . "\n";
|
| 292 | + }
|
| 293 | + }
|
245 | 294 |
|
246 | | - // next chunk
|
247 | | - }
|
| 295 | + if ( count( $this->errors['restore text'] ) && $fix ) {
|
| 296 | + if ( (string)$xml !== '' ) {
|
| 297 | + $this->restoreText( array_keys( $this->errors['restore text'] ), $xml );
|
| 298 | + } else {
|
| 299 | + echo "Can't fix text, no XML backup specified\n";
|
| 300 | + }
|
| 301 | + }
|
248 | 302 |
|
249 | | - print "\n\n" . count( $brokenRevisions ) . " broken revisions\n";
|
250 | | -
|
251 | | - print "\nFlag statistics:\n";
|
252 | | - $total = array_sum( $flagStats );
|
253 | | - foreach ( $flagStats as $flag => $count ) {
|
254 | | - printf( "%-30s %10d %5.2f%%\n", $flag, $count, $count / $total * 100 );
|
| 303 | + print "\nFlag statistics:\n";
|
| 304 | + $total = array_sum( $flagStats );
|
| 305 | + foreach ( $flagStats as $flag => $count ) {
|
| 306 | + printf( "%-30s %10d %5.2f%%\n", $flag, $count, $count / $total * 100 );
|
| 307 | + }
|
| 308 | + print "\nLocal object statistics:\n";
|
| 309 | + $total = array_sum( $objectStats );
|
| 310 | + foreach ( $objectStats as $className => $count ) {
|
| 311 | + printf( "%-30s %10d %5.2f%%\n", $className, $count, $count / $total * 100 );
|
| 312 | + }
|
255 | 313 | }
|
256 | | - print "\nObject statistics:\n";
|
257 | | - $total = array_sum( $objectStats );
|
258 | | - foreach ( $objectStats as $className => $count ) {
|
259 | | - printf( "%-30s %10d %5.2f%%\n", $className, $count, $count / $total * 100 );
|
260 | | - }
|
261 | | -}
|
262 | 314 |
|
263 | 315 |
|
264 | | -function checkError( $msg, $ids ) {
|
265 | | - global $oldIdMap, $brokenRevisions;
|
266 | | - if ( is_array( $ids ) && count( $ids ) == 1 ) {
|
267 | | - $ids = reset( $ids );
|
268 | | - }
|
269 | | - if ( is_array( $ids ) ) {
|
270 | | - $revIds = array();
|
271 | | - foreach ( $ids as $id ) {
|
272 | | - $revIds = array_merge( $revIds, array_keys( $oldIdMap, $id ) );
|
| 316 | + function error( $type, $msg, $ids ) {
|
| 317 | + if ( is_array( $ids ) && count( $ids ) == 1 ) {
|
| 318 | + $ids = reset( $ids );
|
273 | 319 | }
|
274 | | - print "$msg in text rows " . implode( ', ', $ids ) .
|
275 | | - ", revisions " . implode( ', ', $revIds ) . "\n";
|
276 | | - } else {
|
277 | | - $id = $ids;
|
278 | | - $revIds = array_keys( $oldIdMap, $id );
|
279 | | - if ( count( $revIds ) == 1 ) {
|
280 | | - print "$msg in old_id $id, rev_id {$revIds[0]}\n";
|
| 320 | + if ( is_array( $ids ) ) {
|
| 321 | + $revIds = array();
|
| 322 | + foreach ( $ids as $id ) {
|
| 323 | + $revIds = array_merge( $revIds, array_keys( $this->oldIdMap, $id ) );
|
| 324 | + }
|
| 325 | + print "$msg in text rows " . implode( ', ', $ids ) .
|
| 326 | + ", revisions " . implode( ', ', $revIds ) . "\n";
|
281 | 327 | } else {
|
282 | | - print "$msg in old_id $id, revisions " . implode( ', ', $revIds ) . "\n";
|
| 328 | + $id = $ids;
|
| 329 | + $revIds = array_keys( $this->oldIdMap, $id );
|
| 330 | + if ( count( $revIds ) == 1 ) {
|
| 331 | + print "$msg in old_id $id, rev_id {$revIds[0]}\n";
|
| 332 | + } else {
|
| 333 | + print "$msg in old_id $id, revisions " . implode( ', ', $revIds ) . "\n";
|
| 334 | + }
|
283 | 335 | }
|
| 336 | + $this->errors[$type] = $this->errors[$type] + array_flip( $revIds );
|
284 | 337 | }
|
285 | | - $brokenRevisions = $brokenRevisions + array_flip( $revIds );
|
286 | | -}
|
287 | 338 |
|
288 | | -function checkExternalConcatBlobs( $externalConcatBlobs ) {
|
289 | | - static $dbStore = null;
|
290 | | - $fname = 'checkExternalConcatBlobs';
|
291 | | - if ( !count( $externalConcatBlobs ) ) {
|
292 | | - return;
|
| 339 | + function checkExternalConcatBlobs( $externalConcatBlobs ) {
|
| 340 | + $fname = 'CheckStorage::checkExternalConcatBlobs';
|
| 341 | + if ( !count( $externalConcatBlobs ) ) {
|
| 342 | + return;
|
| 343 | + }
|
| 344 | +
|
| 345 | + if ( is_null( $this->dbStore ) ) {
|
| 346 | + $this->dbStore = new ExternalStoreDB;
|
| 347 | + }
|
| 348 | +
|
| 349 | + foreach ( $externalConcatBlobs as $cluster => $oldIds ) {
|
| 350 | + $blobIds = array_keys( $oldIds );
|
| 351 | + $extDb =& $this->dbStore->getSlave( $cluster );
|
| 352 | + $blobsTable = $this->dbStore->getTable( $extDb );
|
| 353 | + $headerLength = strlen( CONCAT_HEADER );
|
| 354 | + $res = $extDb->select( $blobsTable,
|
| 355 | + array( 'blob_id', "LEFT(blob_text, $headerLength) AS header" ),
|
| 356 | + array( 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ), $fname );
|
| 357 | + while ( $row = $extDb->fetchObject( $res ) ) {
|
| 358 | + if ( strcasecmp( $row->header, CONCAT_HEADER ) ) {
|
| 359 | + $this->error( 'restore text', "Error: invalid header on target $cluster/{$row->blob_id} of two-part ES URL",
|
| 360 | + $oldIds[$row->blob_id] );
|
| 361 | + }
|
| 362 | + unset( $oldIds[$row->blob_id] );
|
| 363 | +
|
| 364 | + }
|
| 365 | + $extDb->freeResult( $res );
|
| 366 | +
|
| 367 | + // Print errors for missing blobs rows
|
| 368 | + foreach ( $oldIds as $blobId => $oldIds ) {
|
| 369 | + $this->error( 'restore text', "Error: missing target $cluster/$blobId for two-part ES URL", $oldIds );
|
| 370 | + }
|
| 371 | + }
|
293 | 372 | }
|
294 | 373 |
|
295 | | - if ( is_null( $dbStore ) ) {
|
296 | | - $dbStore = new ExternalStoreDB;
|
| 374 | + function restoreText( $revIds, $xml ) {
|
| 375 | + global $wgTmpDirectory, $wgDBname;
|
| 376 | +
|
| 377 | + if ( !count( $revIds ) ) {
|
| 378 | + return;
|
| 379 | + }
|
| 380 | +
|
| 381 | + print "Restoring text from XML backup...\n";
|
| 382 | +
|
| 383 | + $revFileName = "$wgTmpDirectory/broken-revlist-$wgDBname";
|
| 384 | + $filteredXmlFileName = "$wgTmpDirectory/filtered-$wgDBname.xml";
|
| 385 | +
|
| 386 | + // Write revision list
|
| 387 | + if ( !file_put_contents( $revFileName, implode( "\n", $revIds ) ) ) {
|
| 388 | + echo "Error writing revision list, can't restore text\n";
|
| 389 | + return;
|
| 390 | + }
|
| 391 | +
|
| 392 | + // Run mwdumper
|
| 393 | + echo "Filtering XML dump...\n";
|
| 394 | + $exitStatus = 0;
|
| 395 | + passthru( 'mwdumper ' .
|
| 396 | + wfEscapeShellArg(
|
| 397 | + "--output=file:$filteredXmlFileName",
|
| 398 | + "--filter=revlist:$revFileName",
|
| 399 | + $xml
|
| 400 | + ), $exitStatus
|
| 401 | + );
|
| 402 | +
|
| 403 | + if ( $exitStatus ) {
|
| 404 | + echo "mwdumper died with exit status $exitStatus\n";
|
| 405 | + return;
|
| 406 | + }
|
| 407 | +
|
| 408 | + $file = fopen( $filteredXmlFileName, 'r' );
|
| 409 | + if ( !$file ) {
|
| 410 | + echo "Unable to open filtered XML file\n";
|
| 411 | + return;
|
| 412 | + }
|
| 413 | +
|
| 414 | + $dbr =& wfGetDB( DB_SLAVE );
|
| 415 | + $dbw =& wfGetDB( DB_MASTER );
|
| 416 | + $dbr->ping();
|
| 417 | + $dbw->ping();
|
| 418 | +
|
| 419 | + $source = new ImportStreamSource( $file );
|
| 420 | + $importer = new WikiImporter( $source );
|
| 421 | + $importer->setRevisionCallback( array( &$this, 'importRevision' ) );
|
| 422 | + $importer->doImport();
|
297 | 423 | }
|
298 | | -
|
299 | | - foreach ( $externalConcatBlobs as $cluster => $oldIds ) {
|
300 | | - $blobIds = array_keys( $oldIds );
|
301 | | - $extDb =& $dbStore->getSlave( $cluster );
|
302 | | - $blobsTable = $dbStore->getTable( $extDb );
|
303 | | - $headerLength = strlen( CONCAT_HEADER );
|
304 | | - $res = $extDb->select( $blobsTable,
|
305 | | - array( 'blob_id', "LEFT(blob_text, $headerLength) AS header" ),
|
306 | | - array( 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ), $fname );
|
307 | | - while ( $row = $extDb->fetchObject( $res ) ) {
|
308 | | - if ( strcasecmp( $row->header, CONCAT_HEADER ) ) {
|
309 | | - checkError( "Error: invalid header on target $cluster/{$row->blob_id} of two-part ES URL",
|
310 | | - $oldIds[$row->blob_id] );
|
311 | | - }
|
312 | | - unset( $oldIds[$row->blob_id] );
|
313 | 424 |
|
| 425 | + function importRevision( &$revision, &$importer ) {
|
| 426 | + $fname = 'CheckStorage::importRevision';
|
| 427 | +
|
| 428 | + $id = $revision->getID();
|
| 429 | + $text = $revision->getText();
|
| 430 | + if ( $text === '' ) {
|
| 431 | + // This is what happens if the revision was broken at the time the
|
| 432 | + // dump was made. Unfortunately, it also happens if the revision was
|
| 433 | + // legitimately blank, so there's no way to tell the difference. To
|
| 434 | + // be safe, we'll skip it and leave it broken
|
| 435 | + $id = $id ? $id : '';
|
| 436 | + echo "Revision $id is blank in the dump, may have been broken before export\n";
|
| 437 | + return;
|
314 | 438 | }
|
315 | | - $extDb->freeResult( $res );
|
316 | 439 |
|
317 | | - // Print errors for missing blobs rows
|
318 | | - foreach ( $oldIds as $blobId => $oldIds ) {
|
319 | | - checkError( "Error: missing target $cluster/$blobId for two-part ES URL", $oldIds );
|
| 440 | + if ( !$id ) {
|
| 441 | + // No ID, can't import
|
| 442 | + echo "No id tag in revision, can't import\n";
|
| 443 | + return;
|
320 | 444 | }
|
| 445 | +
|
| 446 | + // Find text row again
|
| 447 | + $dbr =& wfGetDB( DB_SLAVE );
|
| 448 | + $oldId = $dbr->selectField( 'revision', 'rev_text_id', array( 'rev_id' => $id ), $fname );
|
| 449 | + if ( !$oldId ) {
|
| 450 | + echo "Missing revision row for rev_id $id\n";
|
| 451 | + return;
|
| 452 | + }
|
| 453 | +
|
| 454 | + // Compress the text
|
| 455 | + $flags = Revision::compressRevisionText( $text );
|
| 456 | +
|
| 457 | + // Update the text row
|
| 458 | + $dbw->update( 'text',
|
| 459 | + array( 'old_flags' => $flags, 'old_text' => $text ),
|
| 460 | + array( 'old_id' => $oldId ),
|
| 461 | + $fname, array( 'LIMIT' => 1 )
|
| 462 | + );
|
| 463 | +
|
| 464 | + // Remove it from the unfixed list and add it to the fixed list
|
| 465 | + unset( $this->errors['restore text'][$id] );
|
| 466 | + $this->errors['fixed'][$id] = true;
|
321 | 467 | }
|
322 | 468 | }
|
323 | | -
|
324 | 469 | ?>
|