Index: trunk/phase3/maintenance/archives/patch-ar_sha1.sql |
— | — | @@ -0,0 +1,3 @@ |
| 2 | +-- Adding ar_sha1 field |
| 3 | +ALTER TABLE /*$wgDBprefix*/archive |
| 4 | + ADD ar_sha1 varbinary(32) NOT NULL default ''; |
Property changes on: trunk/phase3/maintenance/archives/patch-ar_sha1.sql |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 5 | + native |
Index: trunk/phase3/maintenance/archives/patch-rev_sha1.sql |
— | — | @@ -0,0 +1,3 @@ |
| 2 | +-- Adding rev_sha1 field |
| 3 | +ALTER TABLE /*$wgDBprefix*/revision |
| 4 | + ADD rev_sha1 varbinary(32) NOT NULL default ''; |
Property changes on: trunk/phase3/maintenance/archives/patch-rev_sha1.sql |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 5 | + native |
Index: trunk/phase3/maintenance/populateRevisionSha1.php |
— | — | @@ -0,0 +1,108 @@ |
| 2 | +<?php |
| 3 | +/** |
| 4 | + * Fills the rev_sha1 and ar_sha1 columns of revision |
| 5 | + * and archive tables for revisions created before MW 1.19. |
| 6 | + * |
| 7 | + * This program is free software; you can redistribute it and/or modify |
| 8 | + * it under the terms of the GNU General Public License as published by |
| 9 | + * the Free Software Foundation; either version 2 of the License, or |
| 10 | + * (at your option) any later version. |
| 11 | + * |
| 12 | + * This program is distributed in the hope that it will be useful, |
| 13 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 15 | + * GNU General Public License for more details. |
| 16 | + * |
| 17 | + * You should have received a copy of the GNU General Public License along |
| 18 | + * with this program; if not, write to the Free Software Foundation, Inc., |
| 19 | + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
| 20 | + * http://www.gnu.org/copyleft/gpl.html |
| 21 | + * |
| 22 | + * @ingroup Maintenance |
| 23 | + */ |
| 24 | + |
| 25 | +require_once( dirname( __FILE__ ) . '/Maintenance.php' ); |
| 26 | + |
| 27 | +class PopulateRevisionSha1 extends LoggedUpdateMaintenance { |
| 28 | + public function __construct() { |
| 29 | + parent::__construct(); |
| 30 | + $this->mDescription = "Populates the rev_sha1 and ar_sha1 fields"; |
| 31 | + $this->setBatchSize( 200 ); |
| 32 | + } |
| 33 | + |
| 34 | + protected function getUpdateKey() { |
| 35 | + return 'populate rev_sha1'; |
| 36 | + } |
| 37 | + |
| 38 | + protected function doDBUpdates() { |
| 39 | + $db = $this->getDB( DB_MASTER ); |
| 40 | + if ( !$db->tableExists( 'revision' ) ) { |
| 41 | + $this->error( "revision table does not exist", true ); |
| 42 | + } |
| 43 | + if ( !$db->tableExists( 'archive' ) ) { |
| 44 | + $this->error( "archive table does not exist", true ); |
| 45 | + } |
| 46 | + |
| 47 | + $this->output( "Populating rev_sha1 column\n" ); |
| 48 | + $rc = $this->doSha1Updates( $db, 'revision', 'rev_id', 'rev' ); |
| 49 | + |
| 50 | + $this->output( "Populating ar_sha1 column\n" ); |
| 51 | + $ac = $this->doSha1Updates( $db, 'archive', 'ar_rev_id', 'ar' ); |
| 52 | + |
| 53 | + $this->output( "rev_sha1 and ar_sha1 population complete [$rc revision rows, $ac archive rows].\n" ); |
| 54 | + return true; |
| 55 | + } |
| 56 | + |
| 57 | + /** |
| 58 | + * @return Integer Rows changed |
| 59 | + */ |
| 60 | + protected function doSha1Updates( $db, $table, $idCol, $prefix ) { |
| 61 | + $start = $db->selectField( $table, "MIN($idCol)", false, __METHOD__ ); |
| 62 | + $end = $db->selectField( $table, "MAX($idCol)", false, __METHOD__ ); |
| 63 | + if ( !$start || !$end ) { |
| 64 | + $this->output( "...$table table seems to be empty.\n" ); |
| 65 | + return true; |
| 66 | + } |
| 67 | + |
| 68 | + $count = 0; |
| 69 | + # Do remaining chunk |
| 70 | + $end += $this->mBatchSize - 1; |
| 71 | + $blockStart = $start; |
| 72 | + $blockEnd = $start + $this->mBatchSize - 1; |
| 73 | + while ( $blockEnd <= $end ) { |
| 74 | + $this->output( "...doing $idCol from $blockStart to $blockEnd\n" ); |
| 75 | + $cond = "$idCol BETWEEN $blockStart AND $blockEnd |
| 76 | + AND $idCol IS NOT NULL AND {$prefix}_sha1 = ''"; |
| 77 | + $res = $db->select( $table, '*', $cond, __METHOD__ ); |
| 78 | + |
| 79 | + $db->begin(); |
| 80 | + foreach ( $res as $row ) { |
| 81 | + if ( $table === 'archive' ) { |
| 82 | + $rev = Revision::newFromArchiveRow( $row ); |
| 83 | + } else { |
| 84 | + $rev = new Revision( $row ); |
| 85 | + } |
| 86 | + $text = $rev->getRawText(); |
| 87 | + if ( !is_string( $text ) ) { |
| 88 | + # This should not happen, but sometimes does (bug 20757) |
| 89 | + $this->output( "Text of revision {$row->$idCol} unavailable!\n" ); |
| 90 | + } else { |
| 91 | + $db->update( $table, |
| 92 | + array( "{$prefix}_sha1" => Revision::base36Sha1( $text ) ), |
| 93 | + array( $idCol => $row->$idCol ), |
| 94 | + __METHOD__ ); |
| 95 | + $count++; |
| 96 | + } |
| 97 | + } |
| 98 | + $db->commit(); |
| 99 | + |
| 100 | + $blockStart += $this->mBatchSize; |
| 101 | + $blockEnd += $this->mBatchSize; |
| 102 | + wfWaitForSlaves(); |
| 103 | + } |
| 104 | + return $count; |
| 105 | + } |
| 106 | +} |
| 107 | + |
| 108 | +$maintClass = "PopulateRevisionSha1"; |
| 109 | +require_once( RUN_MAINTENANCE_IF_MAIN ); |
Property changes on: trunk/phase3/maintenance/populateRevisionSha1.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 110 | + native |
Index: trunk/phase3/maintenance/tables.sql |
— | — | @@ -311,8 +311,11 @@ |
312 | 312 | |
313 | 313 | -- Key to revision.rev_id |
314 | 314 | -- This field is used to add support for a tree structure (The Adjacency List Model) |
315 | | - rev_parent_id int unsigned default NULL |
| 315 | + rev_parent_id int unsigned default NULL, |
316 | 316 | |
| 317 | + -- SHA-1 text content hash in base-36 |
| 318 | + rev_sha1 varbinary(32) NOT NULL default '' |
| 319 | + |
317 | 320 | ) /*$wgDBTableOptions*/ MAX_ROWS=10000000 AVG_ROW_LENGTH=1024; |
318 | 321 | -- In case tables are created as MyISAM, use row hints for MySQL <5.0 to avoid 4GB limit |
319 | 322 | |
— | — | @@ -418,7 +421,10 @@ |
419 | 422 | ar_page_id int unsigned, |
420 | 423 | |
421 | 424 | -- Original previous revision |
422 | | - ar_parent_id int unsigned default NULL |
| 425 | + ar_parent_id int unsigned default NULL, |
| 426 | + |
| 427 | + -- SHA-1 text content hash in base-36 |
| 428 | + ar_sha1 varbinary(32) NOT NULL default '' |
423 | 429 | ) /*$wgDBTableOptions*/; |
424 | 430 | |
425 | 431 | CREATE INDEX /*i*/name_title_timestamp ON /*_*/archive (ar_namespace,ar_title,ar_timestamp); |
Index: trunk/phase3/includes/installer/DatabaseUpdater.php |
— | — | @@ -41,7 +41,9 @@ |
42 | 42 | |
43 | 43 | protected $postDatabaseUpdateMaintenance = array( |
44 | 44 | 'DeleteDefaultMessages', |
45 | | - 'PopulateRevisionLength' |
| 45 | + 'PopulateRevisionLength', |
| 46 | + 'PopulateRevisionSha1', |
| 47 | + 'PopulateImageSha1' |
46 | 48 | ); |
47 | 49 | |
48 | 50 | /** |
Index: trunk/phase3/includes/installer/MysqlUpdater.php |
— | — | @@ -187,7 +187,8 @@ |
188 | 188 | array( 'addIndex', 'logging', 'type_action', 'patch-logging-type-action-index.sql'), |
189 | 189 | array( 'doMigrateUserOptions' ), |
190 | 190 | array( 'dropField', 'user', 'user_options', 'patch-drop-user_options.sql' ), |
191 | | - |
| 191 | + array( 'addField', 'revision', 'rev_sha1', 'patch-rev_sha1.sql' ), |
| 192 | + array( 'addField', 'archive', 'ar_sha1', 'patch-ar_sha1.sql' ) |
192 | 193 | ); |
193 | 194 | } |
194 | 195 | |
Index: trunk/phase3/includes/installer/SqliteUpdater.php |
— | — | @@ -65,6 +65,8 @@ |
66 | 66 | array( 'addIndex', 'logging', 'type_action', 'patch-logging-type-action-index.sql'), |
67 | 67 | array( 'doMigrateUserOptions' ), |
68 | 68 | array( 'dropField', 'user', 'user_options', 'patch-drop-user_options.sql' ), |
| 69 | + array( 'addField', 'revision', 'rev_sha1', 'patch-rev_sha1.sql' ), |
| 70 | + array( 'addField', 'archive', 'ar_sha1', 'patch-ar_sha1.sql' ) |
69 | 71 | ); |
70 | 72 | } |
71 | 73 | |
Index: trunk/phase3/includes/Revision.php |
— | — | @@ -13,6 +13,7 @@ |
14 | 14 | protected $mTimestamp; |
15 | 15 | protected $mDeleted; |
16 | 16 | protected $mSize; |
| 17 | + protected $mSha1; |
17 | 18 | protected $mParentId; |
18 | 19 | protected $mComment; |
19 | 20 | protected $mText; |
— | — | @@ -122,7 +123,8 @@ |
123 | 124 | 'minor_edit' => $row->ar_minor_edit, |
124 | 125 | 'text_id' => isset( $row->ar_text_id ) ? $row->ar_text_id : null, |
125 | 126 | 'deleted' => $row->ar_deleted, |
126 | | - 'len' => $row->ar_len |
| 127 | + 'len' => $row->ar_len, |
| 128 | + 'sha1' => $row->ar_sha1 |
127 | 129 | ); |
128 | 130 | if ( isset( $row->ar_text ) && !$row->ar_text_id ) { |
129 | 131 | // Pre-1.5 ar_text row |
— | — | @@ -313,7 +315,8 @@ |
314 | 316 | 'rev_minor_edit', |
315 | 317 | 'rev_deleted', |
316 | 318 | 'rev_len', |
317 | | - 'rev_parent_id' |
| 319 | + 'rev_parent_id', |
| 320 | + 'rev_sha1' |
318 | 321 | ); |
319 | 322 | } |
320 | 323 | |
— | — | @@ -375,6 +378,12 @@ |
376 | 379 | $this->mSize = intval( $row->rev_len ); |
377 | 380 | } |
378 | 381 | |
| 382 | + if ( !isset( $row->rev_sha1 ) ) { |
| 383 | + $this->mSha1 = null; |
| 384 | + } else { |
| 385 | + $this->mSha1 = $row->rev_sha1; |
| 386 | + } |
| 387 | + |
379 | 388 | if( isset( $row->page_latest ) ) { |
380 | 389 | $this->mCurrent = ( $row->rev_id == $row->page_latest ); |
381 | 390 | $this->mTitle = Title::newFromRow( $row ); |
— | — | @@ -402,7 +411,7 @@ |
403 | 412 | $this->mOrigUserText = $row->rev_user_text; |
404 | 413 | } elseif( is_array( $row ) ) { |
405 | 414 | // Build a new revision to be saved... |
406 | | - global $wgUser; |
| 415 | + global $wgUser; // ugh |
407 | 416 | |
408 | 417 | $this->mId = isset( $row['id'] ) ? intval( $row['id'] ) : null; |
409 | 418 | $this->mPage = isset( $row['page'] ) ? intval( $row['page'] ) : null; |
— | — | @@ -414,6 +423,7 @@ |
415 | 424 | $this->mDeleted = isset( $row['deleted'] ) ? intval( $row['deleted'] ) : 0; |
416 | 425 | $this->mSize = isset( $row['len'] ) ? intval( $row['len'] ) : null; |
417 | 426 | $this->mParentId = isset( $row['parent_id'] ) ? intval( $row['parent_id'] ) : null; |
| 427 | + $this->mSha1 = isset( $row['sha1'] ) ? strval( $row['sha1'] ) : null; |
418 | 428 | |
419 | 429 | // Enforce spacing trimming on supplied text |
420 | 430 | $this->mComment = isset( $row['comment'] ) ? trim( strval( $row['comment'] ) ) : null; |
— | — | @@ -422,10 +432,14 @@ |
423 | 433 | |
424 | 434 | $this->mTitle = null; # Load on demand if needed |
425 | 435 | $this->mCurrent = false; |
426 | | - # If we still have no len_size, see it we have the text to figure it out |
| 436 | + # If we still have no length, see it we have the text to figure it out |
427 | 437 | if ( !$this->mSize ) { |
428 | | - $this->mSize = is_null( $this->mText ) ? null : strlen( $this->mText ); |
| 438 | + $this->mSize = is_null( $this->mText ) ? null : strlen( $this->mText ); |
429 | 439 | } |
| 440 | + # Same for sha1 |
| 441 | + if ( $this->mSha1 === null ) { |
| 442 | + $this->mSha1 = is_null( $this->mText ) ? null : self::base36Sha1( $this->mText ); |
| 443 | + } |
430 | 444 | } else { |
431 | 445 | throw new MWException( 'Revision constructor passed invalid row format.' ); |
432 | 446 | } |
— | — | @@ -469,6 +483,15 @@ |
470 | 484 | } |
471 | 485 | |
472 | 486 | /** |
| 487 | + * Returns the base36 sha1 of the text in this revision, or null if unknown. |
| 488 | + * |
| 489 | + * @return String |
| 490 | + */ |
| 491 | + public function getSha1() { |
| 492 | + return $this->mSha1; |
| 493 | + } |
| 494 | + |
| 495 | + /** |
473 | 496 | * Returns the title of the page associated with this entry. |
474 | 497 | * |
475 | 498 | * @return Title |
— | — | @@ -938,8 +961,12 @@ |
939 | 962 | 'rev_timestamp' => $dbw->timestamp( $this->mTimestamp ), |
940 | 963 | 'rev_deleted' => $this->mDeleted, |
941 | 964 | 'rev_len' => $this->mSize, |
942 | | - 'rev_parent_id' => is_null($this->mParentId) ? |
943 | | - $this->getPreviousRevisionId( $dbw ) : $this->mParentId |
| 965 | + 'rev_parent_id' => is_null( $this->mParentId ) |
| 966 | + ? $this->getPreviousRevisionId( $dbw ) |
| 967 | + : $this->mParentId, |
| 968 | + 'rev_sha1' => is_null( $this->mSha1 ) |
| 969 | + ? Revision::base36Sha1( $this->mText ) |
| 970 | + : $this->mSha1 |
944 | 971 | ), __METHOD__ |
945 | 972 | ); |
946 | 973 | |
— | — | @@ -952,6 +979,15 @@ |
953 | 980 | } |
954 | 981 | |
955 | 982 | /** |
| 983 | + * Get the base 36 SHA-1 value for a string of text |
| 984 | + * @param $text String |
| 985 | + * @return String |
| 986 | + */ |
| 987 | + public static function base36Sha1( $text ) { |
| 988 | + return wfBaseConvert( sha1( $text ), 16, 36, 31 ); |
| 989 | + } |
| 990 | + |
| 991 | + /** |
956 | 992 | * Lazy-load the revision's text. |
957 | 993 | * Currently hardcoded to the 'text' table storage engine. |
958 | 994 | * |
Index: trunk/phase3/includes/AutoLoader.php |
— | — | @@ -865,6 +865,7 @@ |
866 | 866 | 'PopulateLogUsertext' => 'maintenance/populateLogUsertext.php', |
867 | 867 | 'PopulateParentId' => 'maintenance/populateParentId.php', |
868 | 868 | 'PopulateRevisionLength' => 'maintenance/populateRevisionLength.php', |
| 869 | + 'PopulateRevisionSha1' => 'maintenance/populateRevisionSha1.php', |
869 | 870 | 'SevenZipStream' => 'maintenance/7zip.inc', |
870 | 871 | 'Sqlite' => 'maintenance/sqlite.inc', |
871 | 872 | 'UpdateCollation' => 'maintenance/updateCollation.php', |
Index: trunk/phase3/includes/WikiPage.php |
— | — | @@ -1667,7 +1667,8 @@ |
1668 | 1668 | 'ar_flags' => '\'\'', // MySQL's "strict mode"... |
1669 | 1669 | 'ar_len' => 'rev_len', |
1670 | 1670 | 'ar_page_id' => 'page_id', |
1671 | | - 'ar_deleted' => $bitfield |
| 1671 | + 'ar_deleted' => $bitfield, |
| 1672 | + 'ar_sha1' => 'rev_sha1' |
1672 | 1673 | ), array( |
1673 | 1674 | 'page_id' => $id, |
1674 | 1675 | 'page_id = rev_page' |
Index: trunk/phase3/includes/specials/SpecialUndelete.php |
— | — | @@ -116,7 +116,7 @@ |
117 | 117 | $res = $dbr->select( 'archive', |
118 | 118 | array( |
119 | 119 | 'ar_minor_edit', 'ar_timestamp', 'ar_user', 'ar_user_text', |
120 | | - 'ar_comment', 'ar_len', 'ar_deleted', 'ar_rev_id' |
| 120 | + 'ar_comment', 'ar_len', 'ar_deleted', 'ar_rev_id', 'ar_sha1' |
121 | 121 | ), |
122 | 122 | array( 'ar_namespace' => $this->title->getNamespace(), |
123 | 123 | 'ar_title' => $this->title->getDBkey() ), |
— | — | @@ -460,7 +460,8 @@ |
461 | 461 | 'ar_text_id', |
462 | 462 | 'ar_deleted', |
463 | 463 | 'ar_page_id', |
464 | | - 'ar_len' ), |
| 464 | + 'ar_len', |
| 465 | + 'ar_sha1' ), |
465 | 466 | /* WHERE */ array( |
466 | 467 | 'ar_namespace' => $this->title->getNamespace(), |
467 | 468 | 'ar_title' => $this->title->getDBkey(), |