Index: branches/wmf/1.19wmf1/maintenance/fixBug35048Files.php |
— | — | @@ -0,0 +1,148 @@ |
| 2 | +<?php |
| 3 | +/** |
| 4 | + * This program is free software; you can redistribute it and/or modify |
| 5 | + * it under the terms of the GNU General Public License as published by |
| 6 | + * the Free Software Foundation; either version 2 of the License, or |
| 7 | + * (at your option) any later version. |
| 8 | + * |
| 9 | + * This program is distributed in the hope that it will be useful, |
| 10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 | + * GNU General Public License for more details. |
| 13 | + * |
| 14 | + * You should have received a copy of the GNU General Public License along |
| 15 | + * with this program; if not, write to the Free Software Foundation, Inc., |
| 16 | + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
| 17 | + * http://www.gnu.org/copyleft/gpl.html |
| 18 | + * |
| 19 | + * @file |
| 20 | + * @ingroup Maintenance |
| 21 | + */ |
| 22 | + |
| 23 | +require_once( dirname( __FILE__ ) . '/Maintenance.php' ); |
| 24 | + |
| 25 | +class FixBug35048Files extends Maintenance { |
| 26 | + public function __construct() { |
| 27 | + parent::__construct(); |
| 28 | + $this->mDescription = "Fix the image/oldimage DB records for a file."; |
| 29 | + $this->addOption( 'indir', "Dir containing orphaned archive file lists", true, true ); |
| 30 | + $this->addOption( 'outdir', "Log changes to files under this dir", true, true ); |
| 31 | + } |
| 32 | + |
| 33 | + public function execute() { |
| 34 | + global $wgUploadDirectory, $wgDBname; // assumes FS |
| 35 | + |
| 36 | + $inDir = $this->getOption( 'indir' ); |
| 37 | + if ( !is_file( "$inDir/$wgDBname" ) ) { |
| 38 | + $this->error( "$inDir/$wgDBname does not exists", 1 ); |
| 39 | + } |
| 40 | + |
| 41 | + $outDir = $this->getOption( 'outdir' ); |
| 42 | + if ( file_exists( "$outDir/$wgDBname" ) ) { |
| 43 | + $this->error( "$outDir/$wgDBname already exists", 1 ); |
| 44 | + } |
| 45 | + |
| 46 | + if ( !wfMkdirParents( "$wgUploadDirectory/lost+found" ) || !wfMkdirParents( $outDir ) ) { |
| 47 | + return; |
| 48 | + } |
| 49 | + |
| 50 | + $dbw = wfGetDB( DB_MASTER ); |
| 51 | + $repo = RepoGroup::singleton()->getLocalRepo(); |
| 52 | + |
| 53 | + $list = array_filter( explode( "\n", file_get_contents( "$inDir/$wgDBname" ) ) ); |
| 54 | + foreach ( $list as $archiveName ) { |
| 55 | + list( $archTimestamp, $name ) = explode( '!', $archiveName, 2 ); |
| 56 | + $title = Title::makeTitle( NS_FILE, $name ); |
| 57 | + |
| 58 | + $oaFile = $repo->newFromArchiveName( $title, $archiveName ); |
| 59 | + if ( $oaFile && $oaFile->exists() ) { |
| 60 | + continue; // fixed already |
| 61 | + } |
| 62 | + |
| 63 | + $file = $repo->findFile( $title ); |
| 64 | + if ( $file && $file->exists() ) { |
| 65 | + $currentFilePath = "$wgUploadDirectory/" . $file->getRel(); |
| 66 | + $archivedFilePath = "$wgUploadDirectory/" . $file->getArchiveRel( $archiveName ); |
| 67 | + if ( !is_file( $currentFilePath ) || !is_file( $archivedFilePath ) ) { |
| 68 | + $this->error( "Failed sanity check for file existence for '$name'." ); |
| 69 | + continue; |
| 70 | + } |
| 71 | + $archivedFileSha1 = wfBaseConvert( sha1_file( $archivedFilePath ), 16, 36, 31 ); |
| 72 | + $currentFileSha1 = $repo->getFileSha1( $file->getPath() ); // base 36 |
| 73 | + |
| 74 | + $history = $file->getHistory(); // old file versions |
| 75 | + |
| 76 | + #$this->output( "Checking '$archiveName', sha1 $archivedFileSha1.\n" ); |
| 77 | + if ( $file->getSha1() !== $currentFileSha1 ) { // mismatch? |
| 78 | + $this->output( "'$name' gives sha1 {$file->getSha1()}, not $currentFileSha1.\n" ); |
| 79 | + } |
| 80 | + if ( |
| 81 | + // Current file row is pointing to the wrong file |
| 82 | + $file->getSha1() !== $currentFileSha1 && |
| 83 | + // Current file row matches this orphaned file |
| 84 | + $file->getSha1() === $archivedFileSha1 |
| 85 | + ) { |
| 86 | + // We often have two rows where the current row points to the wrong file |
| 87 | + // and the old row points to none at all. Often, from the sha1s, the orphan |
| 88 | + // and current files should have their names swapped and the old row should |
| 89 | + // point to the current file (which we move to an archive name). |
| 90 | + if ( count( $history ) && $history[0]->getArchiveName() === '' |
| 91 | + && $history[0]->getSha1() === $currentFileSha1 ) |
| 92 | + { |
| 93 | + // Switch the current version and the orphaned version on the FS. |
| 94 | + $this->output( "Switching '$currentFilePath' with '$archivedFilePath'.\n" ); |
| 95 | + $tmpFilePath = "$wgUploadDirectory/lost+found/" . md5( $currentFilePath ); |
| 96 | + if ( !is_writable( $currentFilePath ) || !is_writable( $archivedFilePath ) ) { |
| 97 | + continue; |
| 98 | + } |
| 99 | + $ok = rename( $currentFilePath, $tmpFilePath ) // temp |
| 100 | + && rename( $archivedFilePath, $currentFilePath ) |
| 101 | + && rename( $tmpFilePath, $archivedFilePath ); |
| 102 | + if ( $outDir ) { // log changes |
| 103 | + file_put_contents( "$outDir/$wgDBname", |
| 104 | + "$currentFilePath $tmpFilePath\n", FILE_APPEND ); |
| 105 | + file_put_contents( "$outDir/$wgDBname", |
| 106 | + "$archivedFilePath $currentFilePath\n", FILE_APPEND ); |
| 107 | + file_put_contents( "$outDir/$wgDBname", |
| 108 | + "$tmpFilePath $archivedFilePath\n", FILE_APPEND ); |
| 109 | + } |
| 110 | + // Update DB to point to former current version next run (in $history loop)... |
| 111 | + } else { |
| 112 | + // Evict the current version to lost+found so it can be properly |
| 113 | + // re-uploaded later, with the username, comment, and log entry. |
| 114 | + $this->output( "Evicting '$currentFilePath' to '$wgUploadDirectory/lost+found/$name'.\n" ); |
| 115 | + #rename( $currentFilePath, "$wgUploadDirectory/lost+found/" . $file->getName() ); |
| 116 | + // Restore the orphaned archived version to current version file |
| 117 | + // name so that it matches up with the current version metadata. |
| 118 | + $this->output( "Moving '$archivedFilePath' back to '$currentFilePath'.\n" ); |
| 119 | + #rename( $archivedFilePath, $currentFilePath ); |
| 120 | + if ( $outDir ) { // log changes |
| 121 | + #file_put_contents( "$outDir/$wgDBname", "$currentFilePath $wgUploadDirectory/lost+found/$name\n", FILE_APPEND ); |
| 122 | + #file_put_contents( "$outDir/$wgDBname", "$archivedFilePath $currentFilePath\n", FILE_APPEND ); |
| 123 | + } |
| 124 | + } |
| 125 | + } |
| 126 | + // While at it, fix files with empty oi_archive_name but with oi_sha1 pointing |
| 127 | + // to this file. This can happen if is_file() fails in the File:publish() function. |
| 128 | + foreach ( $history as $oldFile ) { |
| 129 | + if ( $oldFile->getArchiveName() === '' && // broken row |
| 130 | + $oldFile->getSha1() === $archivedFileSha1 ) // should have this name |
| 131 | + { |
| 132 | + $this->output( "Fixed empty oi_archive_name via sha1 $archivedFileSha1.\n" ); |
| 133 | + $dbw->update( 'oldimage', array( 'oi_archive_name' => $archiveName ), |
| 134 | + array( |
| 135 | + 'oi_name' => $name, |
| 136 | + 'oi_sha1' => $archivedFileSha1, |
| 137 | + 'oi_timestamp' => $dbw->timestamp( $oldFile->getTimestamp() ), |
| 138 | + 'oi_archive_name' => '' // sanity |
| 139 | + ) |
| 140 | + ); |
| 141 | + } |
| 142 | + } |
| 143 | + } |
| 144 | + } |
| 145 | + } |
| 146 | +} |
| 147 | + |
| 148 | +$maintClass = "FixBug35048Files"; |
| 149 | +require_once( RUN_MAINTENANCE_IF_MAIN ); |
Index: branches/wmf/1.19wmf1/maintenance/FindFilesMissingDBRows.php |
— | — | @@ -0,0 +1,78 @@ |
| 2 | +<?php |
| 3 | +/** |
| 4 | + * This program is free software; you can redistribute it and/or modify |
| 5 | + * it under the terms of the GNU General Public License as published by |
| 6 | + * the Free Software Foundation; either version 2 of the License, or |
| 7 | + * (at your option) any later version. |
| 8 | + * |
| 9 | + * This program is distributed in the hope that it will be useful, |
| 10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 | + * GNU General Public License for more details. |
| 13 | + * |
| 14 | + * You should have received a copy of the GNU General Public License along |
| 15 | + * with this program; if not, write to the Free Software Foundation, Inc., |
| 16 | + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
| 17 | + * http://www.gnu.org/copyleft/gpl.html |
| 18 | + * |
| 19 | + * @file |
| 20 | + * @ingroup Maintenance |
| 21 | + */ |
| 22 | + |
| 23 | +require_once( dirname( __FILE__ ) . '/Maintenance.php' ); |
| 24 | + |
| 25 | +class FindFilesMissingDBRows extends Maintenance { |
| 26 | + public function __construct() { |
| 27 | + parent::__construct(); |
| 28 | + $this->mDescription = "Fix the image/oldimage DB records for a file."; |
| 29 | + $this->addOption( 'outdir', "List bad files under this dir.", false, true ); |
| 30 | + } |
| 31 | + |
| 32 | + public function execute() { |
| 33 | + global $wgUploadDirectory, $wgDBname; // assumes FS |
| 34 | + |
| 35 | + if ( !is_dir( "{$wgUploadDirectory}/archive" ) ) { |
| 36 | + return; |
| 37 | + } |
| 38 | + |
| 39 | + $outDir = $this->getOption( 'outdir' ); |
| 40 | + if ( $outDir && file_exists( "$outDir/$wgDBname" ) ) { |
| 41 | + $this->error( "$outDir/$wgDBname already exists", 1 ); |
| 42 | + } |
| 43 | + |
| 44 | + $dbr = wfGetDB( DB_SLAVE ); |
| 45 | + for ( $i=0; $i<256; $i++ ) { // 16*16=256 shards |
| 46 | + $shard = wfBaseConvert( $i, 10, 16, 2 ); |
| 47 | + $shardDir = "{$wgUploadDirectory}/archive/{$shard[0]}/{$shard}"; |
| 48 | + if ( !is_dir( $shardDir ) ) { |
| 49 | + continue; |
| 50 | + } |
| 51 | + $this->output( "Doing shard $shard.\n" ); |
| 52 | + |
| 53 | + // Files only from 2012 (we really only need feb-march) |
| 54 | + $fsList = array_filter( array_map( 'wfBaseName', explode( "\n", |
| 55 | + shell_exec( "find $shardDir -name \"2012*\"" ) |
| 56 | + ) ) ); |
| 57 | + if ( $fsList ) { |
| 58 | + $res = $dbr->select( 'oldimage', array( 'oi_archive_name' ), |
| 59 | + array( 'oi_archive_name' => $fsList ) |
| 60 | + ); |
| 61 | + $dbList = array(); |
| 62 | + foreach ( $res as $row ) { |
| 63 | + $dbList[$row->oi_archive_name] = 1; |
| 64 | + } |
| 65 | + foreach ( $fsList as $archiveName ) { |
| 66 | + if ( !isset( $dbList[$archiveName] ) ) { |
| 67 | + $this->output( "No DB record for file $archiveName.\n" ); |
| 68 | + if ( $outDir ) { |
| 69 | + file_put_contents( "$outDir/$wgDBname", "$archiveName\n", FILE_APPEND ); |
| 70 | + } |
| 71 | + } |
| 72 | + } |
| 73 | + } |
| 74 | + } |
| 75 | + } |
| 76 | +} |
| 77 | + |
| 78 | +$maintClass = "FindFilesMissingDBRows"; |
| 79 | +require_once( RUN_MAINTENANCE_IF_MAIN ); |