r62807 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r62806‎ | r62807 | r62808 >
Date:02:33, 22 February 2010
Author:tstarling
Status:ok
Tags:
Comment:

In fixBug20757.php:
* Don't use buildLike() just yet, for easy backporting to wmf-deployment.
* Don't try to fetch old_flags=object,external rows
* Skip rows with the wrong class using a MySQL condition since we really don't care about them
* Propagate old_flags to the new pointer row. This could be necessary if moveToExternal.php was run after compressOld.php from MW>1.5. This never actually happened at Wikimedia.
* Don't skip rows with bt_moved=0, we need to resolve these stubs if RCT is to continue.
* Fix isUnbrokenStub() to not overwrite its input, fixes minor progress message issue
* Tested locally.

In resolveStubs.php:
* Fixed two bugs in the condition intended to limit the rows acted on to HistoryBlobStub: the string we compared against was not all in lower case, and the character set was binary, making LOWER() do nothing.
* Resolve stubs with old_flags='object,utf-8', analysis showed that we have some of these on enwiki
* Tested locally.

In trackBlobs.php:
* Fixed a bug causing incorrect values to be inserted into bt_cgz_hash. There was no impact on RCT or any other script since bt_cgz_hash is unused at present. Tested locally.
Modified paths:
  • /trunk/phase3/maintenance/storage/fixBug20757.php (modified) (history)
  • /trunk/phase3/maintenance/storage/resolveStubs.php (modified) (history)
  • /trunk/phase3/maintenance/storage/trackBlobs.php (modified) (history)

Diff [purge]

Index: trunk/phase3/maintenance/storage/fixBug20757.php
@@ -39,7 +39,8 @@
4040 array( 'old_id', 'old_flags', 'old_text' ),
4141 array(
4242 'old_id > ' . intval( $startId ),
43 - 'old_flags ' . $dbr->buildLike( $dbr->anyString(), 'object', $dbr->anyString )
 43+ 'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\'',
 44+ 'LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'',
4445 ),
4546 __METHOD__,
4647 array(
@@ -80,10 +81,19 @@
8182 continue;
8283 }
8384
 85+ // Process flags
 86+ $flags = explode( ',', $row->old_flags );
 87+ if ( in_array( 'utf-8', $flags ) || in_array( 'utf8', $flags ) ) {
 88+ $legacyEncoding = false;
 89+ } else {
 90+ $legacyEncoding = true;
 91+ }
 92+
8493 // Queue the stub for future batch processing
8594 $id = intval( $obj->mOldId );
8695 $secondaryIds[] = $id;
8796 $stubs[$row->old_id] = array(
 97+ 'legacyEncoding' => $legacyEncoding,
8898 'secondaryId' => $id,
8999 'hash' => $obj->mHash,
90100 );
@@ -101,7 +111,6 @@
102112 '*',
103113 array(
104114 'bt_text_id' => $secondaryIds,
105 - 'bt_moved' => 1,
106115 ),
107116 __METHOD__
108117 );
@@ -170,6 +179,8 @@
171180 }
172181 }
173182
 183+ $newFlags = $stub['legacyEncoding'] ? 'external' : 'external,utf-8';
 184+
174185 if ( !$dryRun ) {
175186 // Reset the text row to point to the original copy
176187 $dbw->begin();
@@ -177,7 +188,7 @@
178189 'text',
179190 // SET
180191 array(
181 - 'old_flags' => 'external', // use legacy encoding
 192+ 'old_flags' => $newFlags,
182193 'old_text' => $url
183194 ),
184195 // WHERE
@@ -264,22 +275,23 @@
265276 */
266277 function isUnbrokenStub( $stub, $secondaryRow ) {
267278 $flags = explode( ',', $secondaryRow->old_flags );
 279+ $text = $secondaryRow->old_text;
268280 if( in_array( 'external', $flags ) ) {
269 - $url = $secondaryRow->old_text;
 281+ $url = $text;
270282 @list( /* $proto */ , $path ) = explode( '://', $url, 2 );
271283 if ( $path == "" ) {
272284 return false;
273285 }
274 - $secondaryRow->old_text = ExternalStore::fetchFromUrl( $url );
 286+ $text = ExternalStore::fetchFromUrl( $url );
275287 }
276288 if( !in_array( 'object', $flags ) ) {
277289 return false;
278290 }
279291
280292 if( in_array( 'gzip', $flags ) ) {
281 - $obj = unserialize( gzinflate( $secondaryRow->old_text ) );
 293+ $obj = unserialize( gzinflate( $text ) );
282294 } else {
283 - $obj = unserialize( $secondaryRow->old_text );
 295+ $obj = unserialize( $text );
284296 }
285297
286298 if( !is_object( $obj ) ) {
Index: trunk/phase3/maintenance/storage/resolveStubs.php
@@ -35,11 +35,9 @@
3636
3737 $res = $dbr->select( 'text', array( 'old_id', 'old_text', 'old_flags' ),
3838 "old_id>=$start AND old_id<=$end " .
39 - # Using a more restrictive flag set for now, until I do some more analysis -- TS
40 - #"AND old_flags LIKE '%object%' AND old_flags NOT LIKE '%external%' ".
41 -
42 - "AND old_flags='object' " .
43 - "AND LOWER(LEFT(old_text,22)) = 'O:15:\"historyblobstub\"'", $fname );
 39+ "AND old_flags LIKE '%object%' AND old_flags NOT LIKE '%external%' ".
 40+ 'AND LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'',
 41+ $fname );
4442 while ( $row = $dbr->fetchObject( $res ) ) {
4543 resolveStub( $row->old_id, $row->old_text, $row->old_flags );
4644 }
Index: trunk/phase3/maintenance/storage/trackBlobs.php
@@ -73,7 +73,7 @@
7474 return array(
7575 'cluster' => $m[1],
7676 'id' => intval( $m[2] ),
77 - 'hash' => isset( $m[3] ) ? $m[2] : null
 77+ 'hash' => isset( $m[3] ) ? $m[3] : null
7878 );
7979 }
8080

Status & tagging log