r113704 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r113703‎ | r113704 | r113705 >
Date:01:46, 13 March 2012
Author:aaron
Status:reverted
Tags:gerritmigration 
Comment:
[FileBackend]
* Added FileJournal class to log file changes for file backends. This can be used for migrations (like moving to Swift), syncing mirror repos, consistency checks, finishing/reverting operation batches, and such. The default journal is the "null" journal, which simply does nothing.
* Added the optional schema change required for using the DBFileJournal (MySQL, SQLite).
Modified paths:
  • /trunk/phase3/includes/AutoLoader.php (modified) (history)
  • /trunk/phase3/includes/filerepo/backend/FileBackend.php (modified) (history)
  • /trunk/phase3/includes/filerepo/backend/FileBackendMultiWrite.php (modified) (history)
  • /trunk/phase3/includes/filerepo/backend/FileBackendStore.php (modified) (history)
  • /trunk/phase3/includes/filerepo/backend/FileOp.php (modified) (history)
  • /trunk/phase3/includes/filerepo/backend/filejournal (added) (history)
  • /trunk/phase3/includes/filerepo/backend/filejournal/DBFileJournal.php (added) (history)
  • /trunk/phase3/includes/filerepo/backend/filejournal/FileJournal.php (added) (history)
  • /trunk/phase3/languages/messages/MessagesEn.php (modified) (history)
  • /trunk/phase3/maintenance/archives/patch-filejournal.sql (added) (history)
  • /trunk/phase3/maintenance/language/messages.inc (modified) (history)

Diff [purge]

Index: trunk/phase3/maintenance/archives/patch-filejournal.sql
@@ -0,0 +1,24 @@
 2+-- File backend operation journal
 3+CREATE TABLE /*_*/filejournal (
 4+ -- Unique ID for each file operation
 5+ fj_id bigint unsigned NOT NULL PRIMARY KEY auto_increment,
 6+ -- UUID of the batch this operation belongs to
 7+ fj_batch_uuid varbinary(32) NOT NULL,
 8+ -- The registered file backend name
 9+ fj_backend varchar(255) NOT NULL,
 10+ -- The storage path that was affected (may be internal paths)
 11+ fj_path blob NOT NULL,
 12+ -- SHA-1 file path hash in base-36
 13+ fj_path_sha1 varbinary(32) NOT NULL default '',
 14+ -- Primitive operation description (create/update/delete)
 15+ fj_op varchar(16) NOT NULL default '',
 16+ -- SHA-1 file content hash in base-36
 17+ fj_new_sha1 varbinary(32) NOT NULL default '',
 18+ -- Timestamp of the batch operation
 19+ fj_timestamp varbinary(14) NOT NULL default ''
 20+);
 21+
 22+CREATE INDEX /*i*/fj_batch_id ON /*_*/filejournal (fj_batch_uuid,fj_id);
 23+CREATE INDEX /*i*/fj_path_id ON /*_*/filejournal (fj_path_sha1,fj_id);
 24+CREATE INDEX /*i*/fj_new_sha1 ON /*_*/filejournal (fj_new_sha1,fj_id);
 25+CREATE INDEX /*i*/fj_timestamp ON /*_*/filejournal (fj_timestamp);
Property changes on: trunk/phase3/maintenance/archives/patch-filejournal.sql
___________________________________________________________________
Added: svn:eol-style
126 + native
Index: trunk/phase3/maintenance/language/messages.inc
@@ -1377,6 +1377,11 @@
13781378 'backend-fail-batchsize'
13791379 ),
13801380
 1381+ 'filejournal-errors' => array(
 1382+ 'filejournal-fail-dbconnect',
 1383+ 'filejournal-fail-dbquery'
 1384+ ),
 1385+
13811386 'lockmanager-errors' => array(
13821387 'lockmanager-notlocked',
13831388 'lockmanager-fail-closelock',
Index: trunk/phase3/includes/filerepo/backend/FileBackendMultiWrite.php
@@ -133,7 +133,7 @@
134134 }
135135
136136 // Actually attempt the operation batch...
137 - $subStatus = FileOp::attemptBatch( $performOps, $opts );
 137+ $subStatus = FileOp::attemptBatch( $performOps, $opts, $this->fileJournal );
138138
139139 $success = array();
140140 $failCount = 0;
Index: trunk/phase3/includes/filerepo/backend/FileOp.php
@@ -24,6 +24,7 @@
2525 protected $state = self::STATE_NEW; // integer
2626 protected $failed = false; // boolean
2727 protected $useLatest = true; // boolean
 28+ protected $batchId; // string
2829
2930 protected $sourceSha1; // string
3031 protected $destSameAsSource; // boolean
@@ -63,6 +64,16 @@
6465 }
6566
6667 /**
 68+ * Set the batch UUID this operation belongs to
 69+ *
 70+ * @param $batchId string
 71+ * @return void
 72+ */
 73+ final protected function setBatchId( $batchId ) {
 74+ $this->batchId = $batchId;
 75+ }
 76+
 77+ /**
6778 * Whether to allow stale data for file reads and stat checks
6879 *
6980 * @param $allowStale bool
@@ -73,43 +84,57 @@
7485 }
7586
7687 /**
77 - * Attempt a series of file operations.
 88+ * Attempt to perform a series of file operations.
7889 * Callers are responsible for handling file locking.
7990 *
8091 * $opts is an array of options, including:
81 - * 'force' : Errors that would normally cause a rollback do not.
82 - * The remaining operations are still attempted if any fail.
83 - * 'allowStale' : Don't require the latest available data.
84 - * This can increase performance for non-critical writes.
85 - * This has no effect unless the 'force' flag is set.
86 - *
 92+ * 'force' : Errors that would normally cause a rollback do not.
 93+ * The remaining operations are still attempted if any fail.
 94+ * 'allowStale' : Don't require the latest available data.
 95+ * This can increase performance for non-critical writes.
 96+ * This has no effect unless the 'force' flag is set.
 97+ * 'nonJournaled' : Don't log this operation batch in the file journal.
 98+ *
8799 * The resulting Status will be "OK" unless:
88100 * a) unexpected operation errors occurred (network partitions, disk full...)
89101 * b) significant operation errors occured and 'force' was not set
90102 *
91103 * @param $performOps Array List of FileOp operations
92104 * @param $opts Array Batch operation options
 105+ * @param $journal FileJournal Journal to log operations to
93106 * @return Status
94107 */
95 - final public static function attemptBatch( array $performOps, array $opts ) {
 108+ final public static function attemptBatch(
 109+ array $performOps, array $opts, FileJournal $journal
 110+ ) {
96111 $status = Status::newGood();
97112
98 - $allowStale = !empty( $opts['allowStale'] );
99 - $ignoreErrors = !empty( $opts['force'] );
100 -
101113 $n = count( $performOps );
102114 if ( $n > self::MAX_BATCH_SIZE ) {
103115 $status->fatal( 'backend-fail-batchsize', $n, self::MAX_BATCH_SIZE );
104116 return $status;
105117 }
106118
 119+ $batchId = $journal->getTimestampedUUID();
 120+ $allowStale = !empty( $opts['allowStale'] );
 121+ $ignoreErrors = !empty( $opts['force'] );
 122+ $journaled = empty( $opts['nonJournaled'] );
 123+
 124+ $entries = array(); // file journal entries
107125 $predicates = FileOp::newPredicates(); // account for previous op in prechecks
108126 // Do pre-checks for each operation; abort on failure...
109127 foreach ( $performOps as $index => $fileOp ) {
 128+ $fileOp->setBatchId( $batchId );
110129 $fileOp->allowStaleReads( $allowStale );
111 - $subStatus = $fileOp->precheck( $predicates );
 130+ $oldPredicates = $predicates;
 131+ $subStatus = $fileOp->precheck( $predicates ); // updates $predicates
112132 $status->merge( $subStatus );
113 - if ( !$subStatus->isOK() ) { // operation failed?
 133+ if ( $subStatus->isOK() ) {
 134+ if ( $journaled ) { // journal log entry
 135+ $entries = array_merge( $entries,
 136+ self::getJournalEntries( $fileOp, $oldPredicates, $predicates ) );
 137+ }
 138+ } else { // operation failed?
114139 $status->success[$index] = false;
115140 ++$status->failCount;
116141 if ( !$ignoreErrors ) {
@@ -118,8 +143,15 @@
119144 }
120145 }
121146
122 - if ( $ignoreErrors ) {
123 - # Treat all precheck() fatals as merely warnings
 147+ // Log the operations in file journal...
 148+ if ( count( $entries ) ) {
 149+ $subStatus = $journal->logChangeBatch( $entries, $batchId );
 150+ if ( !$subStatus->isOK() ) {
 151+ return $subStatus; // abort
 152+ }
 153+ }
 154+
 155+ if ( $ignoreErrors ) { // treat precheck() fatals as mere warnings
124156 $status->setResult( true, $status->value );
125157 }
126158
@@ -155,6 +187,46 @@
156188 }
157189
158190 /**
 191+ * Get the file journal entries for a single file operation
 192+ *
 193+ * @param $fileOp FileOp
 194+ * @param $oPredicates Array Pre-op information about files
 195+ * @param $nPredicates Array Post-op information about files
 196+ * @return Array
 197+ */
 198+ final protected static function getJournalEntries(
 199+ FileOp $fileOp, array $oPredicates, array $nPredicates
 200+ ) {
 201+ $nullEntries = array();
 202+ $updateEntries = array();
 203+ $deleteEntries = array();
 204+ $pathsUsed = array_merge( $fileOp->storagePathsRead(), $fileOp->storagePathsChanged() );
 205+ foreach ( $pathsUsed as $path ) {
 206+ $nullEntries[] = array( // assertion for recovery
 207+ 'op' => 'null',
 208+ 'path' => $path,
 209+ 'newSha1' => $fileOp->fileSha1( $path, $oPredicates )
 210+ );
 211+ }
 212+ foreach ( $fileOp->storagePathsChanged() as $path ) {
 213+ if ( $nPredicates['sha1'][$path] === false ) { // deleted
 214+ $deleteEntries[] = array(
 215+ 'op' => 'delete',
 216+ 'path' => $path,
 217+ 'newSha1' => ''
 218+ );
 219+ } else { // created/updated
 220+ $updateEntries[] = array(
 221+ 'op' => $fileOp->fileExists( $path, $oPredicates ) ? 'update' : 'create',
 222+ 'path' => $path,
 223+ 'newSha1' => $nPredicates['sha1'][$path]
 224+ );
 225+ }
 226+ }
 227+ return array_merge( $nullEntries, $updateEntries, $deleteEntries );
 228+ }
 229+
 230+ /**
159231 * Get the value of the parameter with the given name
160232 *
161233 * @param $name string
@@ -352,8 +424,8 @@
353425 $params = $this->params;
354426 $params['failedAction'] = $action;
355427 try {
356 - wfDebugLog( 'FileOperation',
357 - get_class( $this ) . ' failed: ' . FormatJson::encode( $params ) );
 428+ wfDebugLog( 'FileOperation', get_class( $this ) .
 429+ " failed (batch #{$this->batchId}): " . FormatJson::encode( $params ) );
358430 } catch ( Exception $e ) {
359431 // bad config? debug log error?
360432 }
Index: trunk/phase3/includes/filerepo/backend/FileBackendStore.php
@@ -708,7 +708,7 @@
709709 $this->clearCache();
710710
711711 // Actually attempt the operation batch...
712 - $subStatus = FileOp::attemptBatch( $performOps, $opts );
 712+ $subStatus = FileOp::attemptBatch( $performOps, $opts, $this->fileJournal );
713713
714714 // Merge errors into status fields
715715 $status->merge( $subStatus );
Index: trunk/phase3/includes/filerepo/backend/filejournal/FileJournal.php
@@ -0,0 +1,131 @@
 2+<?php
 3+/**
 4+ * @defgroup FileJournal File journal
 5+ * @ingroup FileBackend
 6+ */
 7+
 8+/**
 9+ * @file
 10+ * @ingroup FileJournal
 11+ * @author Aaron Schulz
 12+ */
 13+
 14+/**
 15+ * @brief Class for handling file operation journaling.
 16+ *
 17+ * Subclasses should avoid throwing exceptions at all costs.
 18+ *
 19+ * @ingroup FileJournal
 20+ * @since 1.20
 21+ */
 22+abstract class FileJournal {
 23+ protected $backend; // string
 24+ protected $ttlDays; // integer
 25+
 26+ /**
 27+ * Construct a new instance from configuration.
 28+ * $config includes:
 29+ * 'ttlDays' : days to keep log entries around (false means "forever")
 30+ *
 31+ * @param $config Array
 32+ */
 33+ protected function __construct( array $config ) {
 34+ $this->ttlDays = isset( $config['ttlDays'] ) ? $config['ttlDays'] : false;
 35+ }
 36+
 37+ /**
 38+ * Create an appropriate FileJournal object from config
 39+ *
 40+ * @param $config Array
 41+ * @param $backend string A registered file backend name
 42+ * @return FileJournal
 43+ */
 44+ final public static function factory( array $config, $backend ) {
 45+ $class = $config['class'];
 46+ $jrn = new $class( $config );
 47+ if ( !$jrn instanceof self ) {
 48+ throw new MWException( "Class given is not an instance of FileJournal." );
 49+ }
 50+ $jrn->backend = $backend;
 51+ return $jrn;
 52+ }
 53+
 54+ /**
 55+ * Get a statistically unique ID string
 56+ *
 57+ * @return string <9 char TS_MW timestamp in base 36><22 random base 36 chars>
 58+ */
 59+ final public function getTimestampedUUID() {
 60+ $s = '';
 61+ for ( $i = 0; $i < 5; $i++ ) {
 62+ $s .= mt_rand( 0, 2147483647 );
 63+ }
 64+ $s = wfBaseConvert( sha1( $s ), 16, 36, 31 );
 65+ return substr( wfBaseConvert( wfTimestamp( TS_MW ), 10, 36, 9 ) . $s, 0, 31 );
 66+ }
 67+
 68+ /**
 69+ * Log changes made by a batch file operation.
 70+ * $entries is an array of log entries, each of which contains:
 71+ * op : Basic operation name (create, store, copy, delete)
 72+ * path : The storage path of the file
 73+ * newSha1 : The final base 36 SHA-1 of the file
 74+ * Note that 'false' should be used as the SHA-1 for non-existing files.
 75+ *
 76+ * @param $entries Array List of file operations (each an array of parameters)
 77+ * @param $batchId string UUID string that identifies the operation batch
 78+ * @return Status
 79+ */
 80+ final public function logChangeBatch( array $entries, $batchId ) {
 81+ if ( !count( $entries ) ) {
 82+ return Status::newGood();
 83+ }
 84+ return $this->doLogChangeBatch( $entries, $batchId );
 85+ }
 86+
 87+ /**
 88+ * @see FileJournal::logChangeBatch()
 89+ *
 90+ * @param $entries Array List of file operations (each an array of parameters)
 91+ * @param $batchId string UUID string that identifies the operation batch
 92+ * @return Status
 93+ */
 94+ abstract protected function doLogChangeBatch( array $entries, $batchId );
 95+
 96+ /**
 97+ * Purge any old log entries
 98+ *
 99+ * @return Status
 100+ */
 101+ final public function purgeOldLogs() {
 102+ return $this->doPurgeOldLogs();
 103+ }
 104+
 105+ /**
 106+ * @see FileJournal::purgeOldLogs()
 107+ * @return Status
 108+ */
 109+ abstract protected function doPurgeOldLogs();
 110+}
 111+
 112+/**
 113+ * Simple version of FileJournal that does nothing
 114+ * @since 1.20
 115+ */
 116+class NullFileJournal extends FileJournal {
 117+ /**
 118+ * @see FileJournal::logChangeBatch()
 119+ * @return Status
 120+ */
 121+ protected function doLogChangeBatch( array $entries, $batchId ) {
 122+ return Status::newGood();
 123+ }
 124+
 125+ /**
 126+ * @see FileJournal::purgeOldLogs()
 127+ * @return Status
 128+ */
 129+ protected function doPurgeOldLogs() {
 130+ return Status::newGood();
 131+ }
 132+}
Property changes on: trunk/phase3/includes/filerepo/backend/filejournal/FileJournal.php
___________________________________________________________________
Added: svn:eol-style
1133 + native
Index: trunk/phase3/includes/filerepo/backend/filejournal/DBFileJournal.php
@@ -0,0 +1,112 @@
 2+<?php
 3+/**
 4+ * @file
 5+ * @ingroup FileJournal
 6+ * @author Aaron Schulz
 7+ */
 8+
 9+/**
 10+ * Version of FileJournal that logs to a DB table
 11+ * @since 1.20
 12+ */
 13+class DBFileJournal extends FileJournal {
 14+ protected $wiki = false; // string; wiki DB name
 15+
 16+ /**
 17+ * Construct a new instance from configuration.
 18+ * $config includes:
 19+ * 'wiki' : wiki name to use for LoadBalancer
 20+ *
 21+ * @param $config Array
 22+ */
 23+ protected function __construct( array $config ) {
 24+ parent::__construct( $config );
 25+
 26+ $this->wiki = $config['wiki'];
 27+ }
 28+
 29+ /**
 30+ * @see FileJournal::logChangeBatch()
 31+ * @return Status
 32+ */
 33+ protected function doLogChangeBatch( array $entries, $batchId ) {
 34+ $status = Status::newGood();
 35+
 36+ $dbw = $this->getMasterDB();
 37+ if ( !$dbw ) {
 38+ $status->fatal( 'filejournal-fail-dbconnect', $this->backend );
 39+ return $status;
 40+ }
 41+ $now = wfTimestamp( TS_UNIX );
 42+
 43+ $data = array();
 44+ foreach ( $entries as $entry ) {
 45+ $data[] = array(
 46+ 'fj_batch_uuid' => $batchId,
 47+ 'fj_backend' => $this->backend,
 48+ 'fj_op' => $entry['op'],
 49+ 'fj_path' => $entry['path'],
 50+ 'fj_path_sha1' => wfBaseConvert( sha1( $entry['path'] ), 16, 36, 31 ),
 51+ 'fj_new_sha1' => $entry['newSha1'],
 52+ 'fj_timestamp' => $dbw->timestamp( $now )
 53+ );
 54+ }
 55+
 56+ try {
 57+ $dbw->begin();
 58+ $dbw->insert( 'filejournal', $data, __METHOD__ );
 59+ $dbw->commit();
 60+ } catch ( DBError $e ) {
 61+ $status->fatal( 'filejournal-fail-dbquery', $this->backend );
 62+ return $status;
 63+ }
 64+
 65+ return $status;
 66+ }
 67+
 68+ /**
 69+ * @see FileJournal::purgeOldLogs()
 70+ * @return Status
 71+ */
 72+ protected function doPurgeOldLogs() {
 73+ $status = Status::newGood();
 74+ if ( $this->ttlDays <= 0 ) {
 75+ return $status; // nothing to do
 76+ }
 77+
 78+ $dbw = $this->getMasterDB();
 79+ if ( !$dbw ) {
 80+ $status->fatal( 'filejournal-fail-dbconnect', $this->backend );
 81+ return $status;
 82+ }
 83+ $dbCutoff = $dbw->timestamp( time() - 86400 * $this->ttlDays );
 84+
 85+ try {
 86+ $dbw->begin();
 87+ $dbw->delete( 'filejournal',
 88+ array( 'fj_timestamp < ' . $dbw->addQuotes( $dbCutoff ) ),
 89+ __METHOD__
 90+ );
 91+ $dbw->commit();
 92+ } catch ( DBError $e ) {
 93+ $status->fatal( 'filejournal-fail-dbquery', $this->backend );
 94+ return $status;
 95+ }
 96+
 97+ return $status;
 98+ }
 99+
 100+ /**
 101+ * Get a master connection to the logging DB
 102+ *
 103+ * @return DatabaseBase|null
 104+ */
 105+ protected function getMasterDB() {
 106+ try {
 107+ $lb = wfGetLBFactory()->newMainLB();
 108+ return $lb->getConnection( DB_MASTER, array(), $this->wiki );
 109+ } catch ( DBConnectionError $e ) {
 110+ return null;
 111+ }
 112+ }
 113+}
Property changes on: trunk/phase3/includes/filerepo/backend/filejournal/DBFileJournal.php
___________________________________________________________________
Added: svn:eol-style
1114 + native
Index: trunk/phase3/includes/filerepo/backend/FileBackend.php
@@ -45,6 +45,8 @@
4646 protected $readOnly; // string; read-only explanation message
4747 /** @var LockManager */
4848 protected $lockManager;
 49+ /** @var FileJournal */
 50+ protected $fileJournal;
4951
5052 /**
5153 * Create a new backend instance from configuration.
@@ -73,6 +75,9 @@
7476 $this->lockManager = ( $config['lockManager'] instanceof LockManager )
7577 ? $config['lockManager']
7678 : LockManagerGroup::singleton()->get( $config['lockManager'] );
 79+ $this->fileJournal = isset( $config['fileJournal'] )
 80+ ? FileJournal::factory( $config['fileJournal'], $this->name )
 81+ : FileJournal::factory( array( 'class' => 'NullFileJournal' ), $this->name );
7782 $this->readOnly = isset( $config['readOnly'] )
7883 ? (string)$config['readOnly']
7984 : '';
@@ -177,6 +182,8 @@
178183 * 'allowStale' : Don't require the latest available data.
179184 * This can increase performance for non-critical writes.
180185 * This has no effect unless the 'force' flag is set.
 186+ * 'nonJournaled' : Don't log this operation batch in the file journal.
 187+ * This limits the ability of recovery scripts.
181188 *
182189 * Remarks on locking:
183190 * File system paths given to operations should refer to files that are
Index: trunk/phase3/includes/AutoLoader.php
@@ -507,6 +507,9 @@
508508 'FSFileBackendFileList' => 'includes/filerepo/backend/FSFileBackend.php',
509509 'SwiftFileBackend' => 'includes/filerepo/backend/SwiftFileBackend.php',
510510 'SwiftFileBackendFileList' => 'includes/filerepo/backend/SwiftFileBackend.php',
 511+ 'FileJournal' => 'includes/filerepo/backend/filejournal/FileJournal.php',
 512+ 'DBFileJournal' => 'includes/filerepo/backend/filejournal/DBFileJournal.php',
 513+ 'NullFileJournal' => 'includes/filerepo/backend/filejournal/FileJournal.php',
511514 'LockManagerGroup' => 'includes/filerepo/backend/lockmanager/LockManagerGroup.php',
512515 'LockManager' => 'includes/filerepo/backend/lockmanager/LockManager.php',
513516 'ScopedLock' => 'includes/filerepo/backend/lockmanager/LockManager.php',
Index: trunk/phase3/languages/messages/MessagesEn.php
@@ -2274,6 +2274,10 @@
22752275 'backend-fail-contenttype' => 'Could not determine the content type of the file to store at "$1".',
22762276 'backend-fail-batchsize' => 'Storage backend given a batch of $1 file {{PLURAL:$1|operation|operations}}; the limit is $2 {{PLURAL:$2|operation|operations}}.',
22772277
 2278+# File journal
 2279+'filejournal-fail-dbconnect' => 'Could not connect to the journal database for storage backend "$1".',
 2280+'filejournal-fail-dbquery' => 'Could not update the journal database for storage backend "$1".',
 2281+
22782282 # Lock manager
22792283 'lockmanager-notlocked' => 'Could not unlock "$1"; it is not locked.',
22802284 'lockmanager-fail-closelock' => 'Could not close lock file for "$1".',

Follow-up revisions

RevisionCommit summaryAuthorDate
r113742[FileBackend] r113704: updated FileBackend constructor documentationaaron18:30, 13 March 2012
r114335Revert r107309, r113601, r113704, r113742, r113792, r113838, r113859, r113893......catrope00:16, 21 March 2012

Status & tagging log