r106236 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r106235‎ | r106236 | r106237 >
Date:19:59, 14 December 2011
Author:aaron
Status:deferred
Tags:
Comment:
* Replaced getHash() function with getSha1Base36(). This standardizes the hashes and makes store/create ops able to use them (since the source is not on storage). In some cases ETags might just be a concatenation of ETags rather than an md5 or such (segmented objects). Also, we have to calculate it anyway for img_sha1. Sha1 should be calculated on insertion and stored as metadata for quick use later.
* Updated hash-related FileOp code accordingly.
* Made FSFileBackend use the default getFileProps() function.
* Added error suppression to some FS functions as needed.
* Removed TempFSFile::purge() calls in FileOp::finish(), useless since r106162.
Modified paths:
  • /branches/FileBackend/phase3/includes/filerepo/backend/FSFileBackend.php (modified) (history)
  • /branches/FileBackend/phase3/includes/filerepo/backend/FileBackend.php (modified) (history)
  • /branches/FileBackend/phase3/includes/filerepo/backend/FileBackendMultiWrite.php (modified) (history)
  • /branches/FileBackend/phase3/includes/filerepo/backend/FileOp.php (modified) (history)

Diff [purge]

Index: branches/FileBackend/phase3/includes/filerepo/backend/FileBackendMultiWrite.php
@@ -197,16 +197,17 @@
198198 return $this->backends[$this->masterIndex]->getFileTimestamp( $params );
199199 }
200200
201 - function getFileHash( array $params ) {
202 - // Skip non-master for consistent hash formats
203 - return $this->backends[$this->masterIndex]->getFileHash( $params );
 201+ function getSha1Base36(array $params) {
 202+ # Hit all backends in case of failed operations (out of sync)
 203+ foreach ( $this->backends as $backend ) {
 204+ $hash = $backend->getSha1Base36( $params );
 205+ if ( $hash !== false ) {
 206+ return $hash;
 207+ }
 208+ }
 209+ return false;
204210 }
205211
206 - function getHashType() {
207 - // Skip non-master for consistent hash formats
208 - return $this->backends[$this->masterIndex]->getHashType();
209 - }
210 -
211212 function getFileProps( array $params ) {
212213 # Hit all backends in case of failed operations (out of sync)
213214 foreach ( $this->backends as $backend ) {
Index: branches/FileBackend/phase3/includes/filerepo/backend/FileOp.php
@@ -136,13 +136,6 @@
137137 if ( $this->state !== self::STATE_ATTEMPTED ) {
138138 return Status::newFatal( 'fileop-fail-state', self::STATE_ATTEMPTED, $this->state );
139139 }
140 - // Kill any backup files (useful for background scripts)
141 - if ( isset( $this->tmpDestFile ) ) {
142 - $this->tmpDestFile->purge();
143 - }
144 - if ( isset( $this->tmpSourceFile ) ) {
145 - $this->tmpSourceFile->purge();
146 - }
147140 $this->state = self::STATE_DONE;
148141 if ( $this->failed ) {
149142 $status = Status::newGood(); // nothing to finish
@@ -280,12 +273,12 @@
281274 return $status;
282275 }
283276 } elseif ( $this->getParam( 'overwriteSame' ) ) {
284 - $shash = $this->getSourceMD5();
 277+ $shash = $this->getSourceSha1Base36();
285278 // If there is a single source, then we can do some checks already.
286279 // For things like concatenate(), we would need to build a temp file
287280 // first and thus don't support 'overwriteSame' ($shash is null).
288281 if ( $shash !== null ) {
289 - $dhash = $this->getFileMD5( $this->params['dst'] );
 282+ $dhash = $this->getFileSha1Base36( $this->params['dst'] );
290283 if ( !strlen( $shash ) || !strlen( $dhash ) ) {
291284 $status->fatal( 'backend-fail-hashes' );
292285 } elseif ( $shash !== $dhash ) {
@@ -305,37 +298,34 @@
306299 }
307300
308301 /**
309 - * checkAndBackupDest() helper function to get the source file MD5.
 302+ * checkAndBackupDest() helper function to get the source file Sha1.
310303 * Returns false on failure and null if there is no single source.
311304 *
312305 * @return string|false|null
313306 */
314 - protected function getSourceMD5() {
 307+ protected function getSourceSha1Base36() {
315308 return null; // N/A
316309 }
317310
318311 /**
319 - * checkAndBackupDest() helper function to get the MD5 of a file.
 312+ * checkAndBackupDest() helper function to get the Sha1 of a file.
320313 *
321314 * @return string|false False on failure
322315 */
323 - protected function getFileMD5( $path ) {
 316+ protected function getFileSha1Base36( $path ) {
324317 // Source file is in backend
325318 if ( FileBackend::isStoragePath( $path ) ) {
326319 // For some backends (e.g. Swift, Azure) we can get
327320 // standard hashes to use for this types of comparisons.
328 - if ( $this->backend->getHashType() === 'md5' ) {
329 - $hash = $this->backend->getFileHash( array( 'src' => $path ) );
330 - } else {
331 - $tmp = $this->backend->getLocalCopy( array( 'src' => $path ) );
332 - if ( !$tmp ) {
333 - return false; // error
334 - }
335 - $hash = md5_file( $tmp->getPath() );
336 - }
 321+ $hash = $this->backend->getSha1Base36( array( 'src' => $path ) );
337322 // Source file is on file system
338323 } else {
339 - $hash = md5_file( $path );
 324+ wfSuppressWarnings();
 325+ $hash = sha1_file( $path );
 326+ wfRestoreWarnings();
 327+ if ( $hash !== false ) {
 328+ $hash = wfBaseConvert( $hash, 16, 36, 31 );
 329+ }
340330 }
341331 return $hash;
342332 }
@@ -459,8 +449,8 @@
460450 return $status;
461451 }
462452
463 - protected function getSourceMD5() {
464 - return md5_file( $this->params['src'] );
 453+ protected function getSourceSha1Base36() {
 454+ return $this->getFileSha1Base36( $this->params['src'] );
465455 }
466456
467457 public function storagePathsChanged() {
@@ -524,8 +514,8 @@
525515 return $status;
526516 }
527517
528 - protected function getSourceMD5() {
529 - return md5( $this->params['content'] );
 518+ protected function getSourceSha1Base36() {
 519+ return wfBaseConvert( sha1( $this->params['content'] ), 16, 36, 31 );
530520 }
531521
532522 public function storagePathsChanged() {
@@ -594,8 +584,8 @@
595585 return $status;
596586 }
597587
598 - protected function getSourceMD5() {
599 - return $this->getFileMD5( $this->params['src'] );
 588+ protected function getSourceSha1Base36() {
 589+ return $this->getFileSha1Base36( $this->params['src'] );
600590 }
601591
602592 public function storagePathsRead() {
@@ -726,8 +716,8 @@
727717 return $status;
728718 }
729719
730 - protected function getSourceMD5() {
731 - return $this->getFileMD5( $this->params['src'] );
 720+ protected function getSourceSha1Base36() {
 721+ return $this->getFileSha1Base36( $this->params['src'] );
732722 }
733723
734724 public function storagePathsRead() {
@@ -796,7 +786,7 @@
797787 return $status;
798788 }
799789
800 - protected function getSourceMD5() {
 790+ protected function getSourceSha1Base36() {
801791 return null; // defer this until we finish building the new file
802792 }
803793
Index: branches/FileBackend/phase3/includes/filerepo/backend/FSFileBackend.php
@@ -420,21 +420,12 @@
421421 if ( $source === null ) {
422422 return false; // invalid storage path
423423 }
424 - return is_file( $source );
 424+ wfSuppressWarnings();
 425+ $exists = is_file( $source );
 426+ wfRestoreWarnings();
 427+ return $exists;
425428 }
426429
427 - function getHashType() {
428 - return 'md5';
429 - }
430 -
431 - function getFileHash( array $params ) {
432 - list( $c, $source ) = $this->resolveStoragePath( $params['src'] );
433 - if ( $source === null ) {
434 - return false; // invalid storage path
435 - }
436 - return md5_file( $source );
437 - }
438 -
439430 function getFileTimestamp( array $params ) {
440431 list( $c, $source ) = $this->resolveStoragePath( $params['src'] );
441432 if ( $source === null ) {
@@ -444,15 +435,6 @@
445436 return $fsFile->getTimestamp();
446437 }
447438
448 - function getFileProps( array $params ) {
449 - list( $c, $source ) = $this->resolveStoragePath( $params['src'] );
450 - if ( $source === null ) {
451 - return FSFile::placeholderProps(); // invalid storage path
452 - }
453 - $fsFile = new FSFile( $source );
454 - return $fsFile->getProps();
455 - }
456 -
457439 function getFileList( array $params ) {
458440 list( $c, $dir ) = $this->resolveStoragePath( $params['dir'] );
459441 if ( $dir === null ) { // invalid storage path
Index: branches/FileBackend/phase3/includes/filerepo/backend/FileBackend.php
@@ -202,8 +202,7 @@
203203 abstract public function fileExists( array $params );
204204
205205 /**
206 - * Get a hash of the file at a storage path in the backend.
207 - * Typically this will be a SHA-1 hash, MD5 hash, or something similar.
 206+ * Get a SHA-1 hash of the file at a storage path in the backend.
208207 *
209208 * $params include:
210209 * src : source storage path
@@ -211,16 +210,9 @@
212211 * @param $params Array
213212 * @return string|false Hash string or false on failure
214213 */
215 - abstract public function getFileHash( array $params );
 214+ abstract public function getSha1Base36( array $params );
216215
217216 /**
218 - * Get the format of the hash that getFileHash() uses
219 - *
220 - * @return string (md5, sha1, internal, ...)
221 - */
222 - abstract public function getHashType();
223 -
224 - /**
225217 * Get the last-modified timestamp of the file at a storage path.
226218 *
227219 * $params include:
@@ -464,6 +456,15 @@
465457 return false; // not implemented
466458 }
467459
 460+ public function getSha1Base36( array $params ) {
 461+ $fsFile = $this->getLocalReference( array( 'src' => $params['src'] ) );
 462+ if ( !$fsFile ) {
 463+ return false;
 464+ } else {
 465+ return $fsFile->sha1Base36();
 466+ }
 467+ }
 468+
468469 public function getFileProps( array $params ) {
469470 $fsFile = $this->getLocalReference( array( 'src' => $params['src'] ) );
470471 if ( !$fsFile ) {
@@ -473,10 +474,6 @@
474475 }
475476 }
476477
477 - public function getHashType() {
478 - return 'internal';
479 - }
480 -
481478 public function getLocalReference( array $params ) {
482479 return $this->getLocalCopy( $params );
483480 }

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r106162* Altered TempFSFile purging to only be deferred for non-cli scripts, avoids ...aaron01:58, 14 December 2011

Status & tagging log