r101021 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r101020‎ | r101021 | r101022 >
Date:18:44, 27 October 2011
Author:aaron
Status:resolved (Comments)
Tags:scaptrap 
Comment:
Reinstated r94289 et all - rev_sha1/ar_sha1 field for bug 21860
Modified paths:
  • /trunk/phase3/includes/AutoLoader.php (modified) (history)
  • /trunk/phase3/includes/Revision.php (modified) (history)
  • /trunk/phase3/includes/WikiPage.php (modified) (history)
  • /trunk/phase3/includes/installer/DatabaseUpdater.php (modified) (history)
  • /trunk/phase3/includes/installer/MysqlUpdater.php (modified) (history)
  • /trunk/phase3/includes/installer/SqliteUpdater.php (modified) (history)
  • /trunk/phase3/includes/specials/SpecialUndelete.php (modified) (history)
  • /trunk/phase3/maintenance/archives/patch-ar_sha1.sql (added) (history)
  • /trunk/phase3/maintenance/archives/patch-rev_sha1.sql (added) (history)
  • /trunk/phase3/maintenance/populateRevisionSha1.php (added) (history)
  • /trunk/phase3/maintenance/tables.sql (modified) (history)

Diff [purge]

Index: trunk/phase3/maintenance/archives/patch-ar_sha1.sql
@@ -0,0 +1,3 @@
 2+-- Adding ar_sha1 field
 3+ALTER TABLE /*$wgDBprefix*/archive
 4+ ADD ar_sha1 varbinary(32) NOT NULL default '';
Property changes on: trunk/phase3/maintenance/archives/patch-ar_sha1.sql
___________________________________________________________________
Added: svn:eol-style
15 + native
Index: trunk/phase3/maintenance/archives/patch-rev_sha1.sql
@@ -0,0 +1,3 @@
 2+-- Adding rev_sha1 field
 3+ALTER TABLE /*$wgDBprefix*/revision
 4+ ADD rev_sha1 varbinary(32) NOT NULL default '';
Property changes on: trunk/phase3/maintenance/archives/patch-rev_sha1.sql
___________________________________________________________________
Added: svn:eol-style
15 + native
Index: trunk/phase3/maintenance/populateRevisionSha1.php
@@ -0,0 +1,108 @@
 2+<?php
 3+/**
 4+ * Fills the rev_sha1 and ar_sha1 columns of revision
 5+ * and archive tables for revisions created before MW 1.19.
 6+ *
 7+ * This program is free software; you can redistribute it and/or modify
 8+ * it under the terms of the GNU General Public License as published by
 9+ * the Free Software Foundation; either version 2 of the License, or
 10+ * (at your option) any later version.
 11+ *
 12+ * This program is distributed in the hope that it will be useful,
 13+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
 14+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 15+ * GNU General Public License for more details.
 16+ *
 17+ * You should have received a copy of the GNU General Public License along
 18+ * with this program; if not, write to the Free Software Foundation, Inc.,
 19+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 20+ * http://www.gnu.org/copyleft/gpl.html
 21+ *
 22+ * @ingroup Maintenance
 23+ */
 24+
 25+require_once( dirname( __FILE__ ) . '/Maintenance.php' );
 26+
 27+class PopulateRevisionSha1 extends LoggedUpdateMaintenance {
 28+ public function __construct() {
 29+ parent::__construct();
 30+ $this->mDescription = "Populates the rev_sha1 and ar_sha1 fields";
 31+ $this->setBatchSize( 200 );
 32+ }
 33+
 34+ protected function getUpdateKey() {
 35+ return 'populate rev_sha1';
 36+ }
 37+
 38+ protected function doDBUpdates() {
 39+ $db = $this->getDB( DB_MASTER );
 40+ if ( !$db->tableExists( 'revision' ) ) {
 41+ $this->error( "revision table does not exist", true );
 42+ }
 43+ if ( !$db->tableExists( 'archive' ) ) {
 44+ $this->error( "archive table does not exist", true );
 45+ }
 46+
 47+ $this->output( "Populating rev_sha1 column\n" );
 48+ $rc = $this->doSha1Updates( $db, 'revision', 'rev_id', 'rev' );
 49+
 50+ $this->output( "Populating ar_sha1 column\n" );
 51+ $ac = $this->doSha1Updates( $db, 'archive', 'ar_rev_id', 'ar' );
 52+
 53+ $this->output( "rev_sha1 and ar_sha1 population complete [$rc revision rows, $ac archive rows].\n" );
 54+ return true;
 55+ }
 56+
 57+ /**
 58+ * @return Integer Rows changed
 59+ */
 60+ protected function doSha1Updates( $db, $table, $idCol, $prefix ) {
 61+ $start = $db->selectField( $table, "MIN($idCol)", false, __METHOD__ );
 62+ $end = $db->selectField( $table, "MAX($idCol)", false, __METHOD__ );
 63+ if ( !$start || !$end ) {
 64+ $this->output( "...$table table seems to be empty.\n" );
 65+ return true;
 66+ }
 67+
 68+ $count = 0;
 69+ # Do remaining chunk
 70+ $end += $this->mBatchSize - 1;
 71+ $blockStart = $start;
 72+ $blockEnd = $start + $this->mBatchSize - 1;
 73+ while ( $blockEnd <= $end ) {
 74+ $this->output( "...doing $idCol from $blockStart to $blockEnd\n" );
 75+ $cond = "$idCol BETWEEN $blockStart AND $blockEnd
 76+ AND $idCol IS NOT NULL AND {$prefix}_sha1 = ''";
 77+ $res = $db->select( $table, '*', $cond, __METHOD__ );
 78+
 79+ $db->begin();
 80+ foreach ( $res as $row ) {
 81+ if ( $table === 'archive' ) {
 82+ $rev = Revision::newFromArchiveRow( $row );
 83+ } else {
 84+ $rev = new Revision( $row );
 85+ }
 86+ $text = $rev->getRawText();
 87+ if ( !is_string( $text ) ) {
 88+ # This should not happen, but sometimes does (bug 20757)
 89+ $this->output( "Text of revision {$row->$idCol} unavailable!\n" );
 90+ } else {
 91+ $db->update( $table,
 92+ array( "{$prefix}_sha1" => Revision::base36Sha1( $text ) ),
 93+ array( $idCol => $row->$idCol ),
 94+ __METHOD__ );
 95+ $count++;
 96+ }
 97+ }
 98+ $db->commit();
 99+
 100+ $blockStart += $this->mBatchSize;
 101+ $blockEnd += $this->mBatchSize;
 102+ wfWaitForSlaves();
 103+ }
 104+ return $count;
 105+ }
 106+}
 107+
 108+$maintClass = "PopulateRevisionSha1";
 109+require_once( RUN_MAINTENANCE_IF_MAIN );
Property changes on: trunk/phase3/maintenance/populateRevisionSha1.php
___________________________________________________________________
Added: svn:eol-style
1110 + native
Index: trunk/phase3/maintenance/tables.sql
@@ -311,8 +311,11 @@
312312
313313 -- Key to revision.rev_id
314314 -- This field is used to add support for a tree structure (The Adjacency List Model)
315 - rev_parent_id int unsigned default NULL
 315+ rev_parent_id int unsigned default NULL,
316316
 317+ -- SHA-1 text content hash in base-36
 318+ rev_sha1 varbinary(32) NOT NULL default ''
 319+
317320 ) /*$wgDBTableOptions*/ MAX_ROWS=10000000 AVG_ROW_LENGTH=1024;
318321 -- In case tables are created as MyISAM, use row hints for MySQL <5.0 to avoid 4GB limit
319322
@@ -418,7 +421,10 @@
419422 ar_page_id int unsigned,
420423
421424 -- Original previous revision
422 - ar_parent_id int unsigned default NULL
 425+ ar_parent_id int unsigned default NULL,
 426+
 427+ -- SHA-1 text content hash in base-36
 428+ ar_sha1 varbinary(32) NOT NULL default ''
423429 ) /*$wgDBTableOptions*/;
424430
425431 CREATE INDEX /*i*/name_title_timestamp ON /*_*/archive (ar_namespace,ar_title,ar_timestamp);
Index: trunk/phase3/includes/installer/DatabaseUpdater.php
@@ -41,7 +41,9 @@
4242
4343 protected $postDatabaseUpdateMaintenance = array(
4444 'DeleteDefaultMessages',
45 - 'PopulateRevisionLength'
 45+ 'PopulateRevisionLength',
 46+ 'PopulateRevisionSha1',
 47+ 'PopulateImageSha1'
4648 );
4749
4850 /**
Index: trunk/phase3/includes/installer/MysqlUpdater.php
@@ -187,7 +187,8 @@
188188 array( 'addIndex', 'logging', 'type_action', 'patch-logging-type-action-index.sql'),
189189 array( 'doMigrateUserOptions' ),
190190 array( 'dropField', 'user', 'user_options', 'patch-drop-user_options.sql' ),
191 -
 191+ array( 'addField', 'revision', 'rev_sha1', 'patch-rev_sha1.sql' ),
 192+ array( 'addField', 'archive', 'ar_sha1', 'patch-ar_sha1.sql' )
192193 );
193194 }
194195
Index: trunk/phase3/includes/installer/SqliteUpdater.php
@@ -65,6 +65,8 @@
6666 array( 'addIndex', 'logging', 'type_action', 'patch-logging-type-action-index.sql'),
6767 array( 'doMigrateUserOptions' ),
6868 array( 'dropField', 'user', 'user_options', 'patch-drop-user_options.sql' ),
 69+ array( 'addField', 'revision', 'rev_sha1', 'patch-rev_sha1.sql' ),
 70+ array( 'addField', 'archive', 'ar_sha1', 'patch-ar_sha1.sql' )
6971 );
7072 }
7173
Index: trunk/phase3/includes/Revision.php
@@ -13,6 +13,7 @@
1414 protected $mTimestamp;
1515 protected $mDeleted;
1616 protected $mSize;
 17+ protected $mSha1;
1718 protected $mParentId;
1819 protected $mComment;
1920 protected $mText;
@@ -122,7 +123,8 @@
123124 'minor_edit' => $row->ar_minor_edit,
124125 'text_id' => isset( $row->ar_text_id ) ? $row->ar_text_id : null,
125126 'deleted' => $row->ar_deleted,
126 - 'len' => $row->ar_len
 127+ 'len' => $row->ar_len,
 128+ 'sha1' => $row->ar_sha1
127129 );
128130 if ( isset( $row->ar_text ) && !$row->ar_text_id ) {
129131 // Pre-1.5 ar_text row
@@ -313,7 +315,8 @@
314316 'rev_minor_edit',
315317 'rev_deleted',
316318 'rev_len',
317 - 'rev_parent_id'
 319+ 'rev_parent_id',
 320+ 'rev_sha1'
318321 );
319322 }
320323
@@ -375,6 +378,12 @@
376379 $this->mSize = intval( $row->rev_len );
377380 }
378381
 382+ if ( !isset( $row->rev_sha1 ) ) {
 383+ $this->mSha1 = null;
 384+ } else {
 385+ $this->mSha1 = $row->rev_sha1;
 386+ }
 387+
379388 if( isset( $row->page_latest ) ) {
380389 $this->mCurrent = ( $row->rev_id == $row->page_latest );
381390 $this->mTitle = Title::newFromRow( $row );
@@ -402,7 +411,7 @@
403412 $this->mOrigUserText = $row->rev_user_text;
404413 } elseif( is_array( $row ) ) {
405414 // Build a new revision to be saved...
406 - global $wgUser;
 415+ global $wgUser; // ugh
407416
408417 $this->mId = isset( $row['id'] ) ? intval( $row['id'] ) : null;
409418 $this->mPage = isset( $row['page'] ) ? intval( $row['page'] ) : null;
@@ -414,6 +423,7 @@
415424 $this->mDeleted = isset( $row['deleted'] ) ? intval( $row['deleted'] ) : 0;
416425 $this->mSize = isset( $row['len'] ) ? intval( $row['len'] ) : null;
417426 $this->mParentId = isset( $row['parent_id'] ) ? intval( $row['parent_id'] ) : null;
 427+ $this->mSha1 = isset( $row['sha1'] ) ? strval( $row['sha1'] ) : null;
418428
419429 // Enforce spacing trimming on supplied text
420430 $this->mComment = isset( $row['comment'] ) ? trim( strval( $row['comment'] ) ) : null;
@@ -422,10 +432,14 @@
423433
424434 $this->mTitle = null; # Load on demand if needed
425435 $this->mCurrent = false;
426 - # If we still have no len_size, see it we have the text to figure it out
 436+ # If we still have no length, see it we have the text to figure it out
427437 if ( !$this->mSize ) {
428 - $this->mSize = is_null( $this->mText ) ? null : strlen( $this->mText );
 438+ $this->mSize = is_null( $this->mText ) ? null : strlen( $this->mText );
429439 }
 440+ # Same for sha1
 441+ if ( $this->mSha1 === null ) {
 442+ $this->mSha1 = is_null( $this->mText ) ? null : self::base36Sha1( $this->mText );
 443+ }
430444 } else {
431445 throw new MWException( 'Revision constructor passed invalid row format.' );
432446 }
@@ -469,6 +483,15 @@
470484 }
471485
472486 /**
 487+ * Returns the base36 sha1 of the text in this revision, or null if unknown.
 488+ *
 489+ * @return String
 490+ */
 491+ public function getSha1() {
 492+ return $this->mSha1;
 493+ }
 494+
 495+ /**
473496 * Returns the title of the page associated with this entry.
474497 *
475498 * @return Title
@@ -938,8 +961,12 @@
939962 'rev_timestamp' => $dbw->timestamp( $this->mTimestamp ),
940963 'rev_deleted' => $this->mDeleted,
941964 'rev_len' => $this->mSize,
942 - 'rev_parent_id' => is_null($this->mParentId) ?
943 - $this->getPreviousRevisionId( $dbw ) : $this->mParentId
 965+ 'rev_parent_id' => is_null( $this->mParentId )
 966+ ? $this->getPreviousRevisionId( $dbw )
 967+ : $this->mParentId,
 968+ 'rev_sha1' => is_null( $this->mSha1 )
 969+ ? Revision::base36Sha1( $this->mText )
 970+ : $this->mSha1
944971 ), __METHOD__
945972 );
946973
@@ -952,6 +979,15 @@
953980 }
954981
955982 /**
 983+ * Get the base 36 SHA-1 value for a string of text
 984+ * @param $text String
 985+ * @return String
 986+ */
 987+ public static function base36Sha1( $text ) {
 988+ return wfBaseConvert( sha1( $text ), 16, 36, 31 );
 989+ }
 990+
 991+ /**
956992 * Lazy-load the revision's text.
957993 * Currently hardcoded to the 'text' table storage engine.
958994 *
Index: trunk/phase3/includes/AutoLoader.php
@@ -865,6 +865,7 @@
866866 'PopulateLogUsertext' => 'maintenance/populateLogUsertext.php',
867867 'PopulateParentId' => 'maintenance/populateParentId.php',
868868 'PopulateRevisionLength' => 'maintenance/populateRevisionLength.php',
 869+ 'PopulateRevisionSha1' => 'maintenance/populateRevisionSha1.php',
869870 'SevenZipStream' => 'maintenance/7zip.inc',
870871 'Sqlite' => 'maintenance/sqlite.inc',
871872 'UpdateCollation' => 'maintenance/updateCollation.php',
Index: trunk/phase3/includes/WikiPage.php
@@ -1667,7 +1667,8 @@
16681668 'ar_flags' => '\'\'', // MySQL's "strict mode"...
16691669 'ar_len' => 'rev_len',
16701670 'ar_page_id' => 'page_id',
1671 - 'ar_deleted' => $bitfield
 1671+ 'ar_deleted' => $bitfield,
 1672+ 'ar_sha1' => 'rev_sha1'
16721673 ), array(
16731674 'page_id' => $id,
16741675 'page_id = rev_page'
Index: trunk/phase3/includes/specials/SpecialUndelete.php
@@ -116,7 +116,7 @@
117117 $res = $dbr->select( 'archive',
118118 array(
119119 'ar_minor_edit', 'ar_timestamp', 'ar_user', 'ar_user_text',
120 - 'ar_comment', 'ar_len', 'ar_deleted', 'ar_rev_id'
 120+ 'ar_comment', 'ar_len', 'ar_deleted', 'ar_rev_id', 'ar_sha1'
121121 ),
122122 array( 'ar_namespace' => $this->title->getNamespace(),
123123 'ar_title' => $this->title->getDBkey() ),
@@ -460,7 +460,8 @@
461461 'ar_text_id',
462462 'ar_deleted',
463463 'ar_page_id',
464 - 'ar_len' ),
 464+ 'ar_len',
 465+ 'ar_sha1' ),
465466 /* WHERE */ array(
466467 'ar_namespace' => $this->title->getNamespace(),
467468 'ar_title' => $this->title->getDBkey(),

Follow-up revisions

RevisionCommit summaryAuthorDate
r101294FU r101021: made newNullRevision() re-use the sha1 of the reference revisionaaron00:55, 30 October 2011
r101397Followup to r101021, add back to Pg schema so we can install mediawikioverlordq20:03, 31 October 2011
r106514Expose rev_sha1/ar_sha1 to API (bug 21860)aaron18:27, 17 December 2011

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r94289* Added rev_sha1 and ar_sha1 columns to revision/archive tables (useful for b......aaron21:52, 11 August 2011

Comments

#Comment by Duplicatebug (talk | contribs)   23:09, 28 October 2011

Revision::newNullRevision should reuse the sha1 value from the selected revision.

Status & tagging log