r88145 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r88144‎ | r88145 | r88146 >
Date:10:39, 15 May 2011
Author:btongminh
Status:resolved (Comments)
Tags:
Comment:
Follow-up r87176: Make importDump.php import files
* Fixes for Import.php: Check sha1 of the file; only delete source files if they are temporary
* importDump.php now imports embedded files if --uploads is set; if they are not present it will try to get them from --image-base-path
Modified paths:
  • /trunk/phase3/RELEASE-NOTES-1.19 (modified) (history)
  • /trunk/phase3/includes/Import.php (modified) (history)
  • /trunk/phase3/maintenance/importDump.php (modified) (history)

Diff [purge]

Index: trunk/phase3/maintenance/importDump.php
@@ -38,6 +38,7 @@
3939 var $dryRun = false;
4040 var $debug = false;
4141 var $uploads = false;
 42+ var $imageBasePath = false;
4243 var $nsFilter = false;
4344
4445 function __construct() {
@@ -201,6 +202,12 @@
202203 array( &$this, 'handleUpload' ) );
203204 $this->logItemCallback = $importer->setLogItemCallback(
204205 array( &$this, 'handleLogItem' ) );
 206+ if ( $this->uploads ) {
 207+ $importer->setImportUploads( true );
 208+ }
 209+ if ( $this->imageBasePath ) {
 210+ $importer->setImageBasePath( $this->imageBasePath );
 211+ }
205212
206213 if ( $this->dryRun ) {
207214 $importer->setPageOutCallback( null );
@@ -230,6 +237,7 @@
231238 echo " --dry-run Parse dump without actually importing pages.\n";
232239 echo " --debug Output extra verbose debug information\n";
233240 echo " --uploads Process file upload data if included (experimental)\n";
 241+ echo " --image-base-path=path Import files from a specified path\n";
234242 echo "\n";
235243 echo "Compressed XML files may be read directly:\n";
236244 echo " .gz $gz\n";
@@ -259,6 +267,9 @@
260268 if ( isset( $options['uploads'] ) ) {
261269 $reader->uploads = true; // experimental!
262270 }
 271+if ( isset( $options['image-base-path'] ) ) {
 272+ $reader->imageBasePath = $options['image-base-path'];
 273+}
263274 if ( isset( $options['namespaces'] ) ) {
264275 $reader->setNsfilter( explode( '|', $options['namespaces'] ) );
265276 }
Index: trunk/phase3/RELEASE-NOTES-1.19
@@ -25,6 +25,9 @@
2626 * (bug 28503) Support for ircs:// URL protocols
2727 * (bug 26033) It is now possible to count all non-redirect pages in content
2828 namespaces as articles
 29+* Images can now be embedded in an XML dump stream using backupDump.php
 30+ --include-files and can be imported using importDump.php --uploads;
 31+ furthermore, it can import files from the filesystem using --image-base-path
2932
3033 === Bug fixes in 1.19 ===
3134 * (bug 10154) Don't allow user to specify days beyond $wgRCMaxAge.
Index: trunk/phase3/includes/Import.php
@@ -177,6 +177,9 @@
178178 public function setImageBasePath( $dir ) {
179179 $this->mImageBasePath = $dir;
180180 }
 181+ public function setImportUploads( $import ) {
 182+ $this->mImportUploads = $import;
 183+ }
181184
182185 /**
183186 * Default per-revision callback, performs the import.
@@ -612,6 +615,7 @@
613616 $encoding = $this->reader->getAttribute( 'encoding' );
614617 if ( $encoding === 'base64' ) {
615618 $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) );
 619+ $uploadInfo['isTempSrc'] = true;
616620 }
617621 } elseif ( $tag != '#text' ) {
618622 $this->warn( "Unhandled upload XML tag $tag" );
@@ -623,6 +627,7 @@
624628 $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}";
625629 if ( file_exists( $path ) ) {
626630 $uploadInfo['fileSrc'] = $path;
 631+ $uploadInfo['isTempSrc'] = false;
627632 }
628633 }
629634
@@ -652,8 +657,12 @@
653658 }
654659 $revision->setSrc( $uploadInfo['src'] );
655660 if ( isset( $uploadInfo['fileSrc'] ) ) {
656 - $revision->setFileSrc( $uploadInfo['fileSrc'] );
 661+ $revision->setFileSrc( $uploadInfo['fileSrc'],
 662+ !empty( $uploadInfo['isTempSrc'] ) );
657663 }
 664+ if ( isset( $uploadInfo['sha1base36'] ) ) {
 665+ $revision->setSha1Base36( $uploadInfo['sha1base36'] );
 666+ }
658667 $revision->setSize( intval( $uploadInfo['size'] ) );
659668 $revision->setComment( $uploadInfo['comment'] );
660669
@@ -836,6 +845,8 @@
837846 var $action = "";
838847 var $params = "";
839848 var $fileSrc = '';
 849+ var $sha1base36 = false;
 850+ var $isTemp = false;
840851 var $archiveName = '';
841852
842853 function setTitle( $title ) {
@@ -880,9 +891,13 @@
881892 function setSrc( $src ) {
882893 $this->src = $src;
883894 }
884 - function setFileSrc( $src ) {
 895+ function setFileSrc( $src, $isTemp ) {
885896 $this->fileSrc = $src;
 897+ $this->fileIsTemp = $isTemp;
886898 }
 899+ function setSha1Base36( $sha1base36 ) {
 900+ $this->sha1base36 = $sha1base36;
 901+ }
887902
888903 function setFilename( $filename ) {
889904 $this->filename = $filename;
@@ -941,9 +956,18 @@
942957 function getSrc() {
943958 return $this->src;
944959 }
 960+ function getSha1() {
 961+ if ( $this->sha1base36 ) {
 962+ return wfBaseConvert( $this->sha1base36, 36, 16 );
 963+ }
 964+ return false;
 965+ }
945966 function getFileSrc() {
946967 return $this->fileSrc;
947968 }
 969+ function isTempSrc() {
 970+ return $this->isTemp;
 971+ }
948972
949973 function getFilename() {
950974 return $this->filename;
@@ -1118,23 +1142,30 @@
11191143
11201144 # Get the file source or download if necessary
11211145 $source = $this->getFileSrc();
 1146+ $flags = $this->isTempSrc() ? File::DELETE_SOURCE : 0;
11221147 if ( !$source ) {
11231148 $source = $this->downloadSource();
 1149+ $flags |= File::DELETE_SOURCE;
11241150 }
11251151 if( !$source ) {
11261152 wfDebug( __METHOD__ . ": Could not fetch remote file.\n" );
11271153 return false;
11281154 }
 1155+ $sha1 = $this->getSha1();
 1156+ if ( $sha1 && ( $sha1 !== sha1_file( $source ) ) ) {
 1157+ wfDebug( __METHOD__ . ": Corrupt file $source.\n" );
 1158+ return false;
 1159+ }
11291160
11301161 $user = User::newFromName( $this->user_text );
11311162
11321163 # Do the actual upload
11331164 if ( $archiveName ) {
11341165 $status = $file->uploadOld( $source, $archiveName,
1135 - $this->getTimestamp(), $this->getComment(), $user, File::DELETE_SOURCE );
 1166+ $this->getTimestamp(), $this->getComment(), $user, $flags );
11361167 } else {
11371168 $status = $file->upload( $source, $this->getComment(), $this->getComment(),
1138 - File::DELETE_SOURCE, false, $this->getTimestamp(), $user );
 1169+ $flags, false, $this->getTimestamp(), $user );
11391170 }
11401171
11411172 if ( $status->isGood() ) {

Follow-up revisions

RevisionCommit summaryAuthorDate
r90294Per comments on r88145: unlink file if it is brokenbtongminh16:48, 17 June 2011

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r87176Add --include-files option to dumpBackup.php to include the uploaded files in...btongminh21:35, 30 April 2011

Comments

#Comment by Brion VIBBER (talk | contribs)   23:45, 7 June 2011

If we have a SHA-1 mismatch, does the temp file get deleted?

Hmm, looks like the sequence for a mismatched file is:

  • extract and save to temp file / download and save to temp file / find permanent source file in an alt directory
  • set $flags for File::upload / File::uploadOld to remove the source file if it was our temporary file
  • calc the sha1 sum of the file
    • if mismatch, exit
  • call File::upload / File::uploadFile to import the file
    • removes the temporary file if needed

(For files retrieved via URL, mismatches due to the source file having been updated since the export is possible.)

#Comment by Bryan (talk | contribs)   16:49, 17 June 2011

Fixed in r90294.

Status & tagging log