r114411 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r114410‎ | r114411 | r114412 >
Date:21:42, 21 March 2012
Author:aaron
Status:deferred
Tags:
Comment:
Added some live cleanup scripts
Modified paths:
  • /branches/wmf/1.19wmf1/maintenance/FindFilesMissingDBRows.php (added) (history)
  • /branches/wmf/1.19wmf1/maintenance/fixBug35048Files.php (added) (history)

Diff [purge]

Index: branches/wmf/1.19wmf1/maintenance/fixBug35048Files.php
@@ -0,0 +1,148 @@
 2+<?php
 3+/**
 4+ * This program is free software; you can redistribute it and/or modify
 5+ * it under the terms of the GNU General Public License as published by
 6+ * the Free Software Foundation; either version 2 of the License, or
 7+ * (at your option) any later version.
 8+ *
 9+ * This program is distributed in the hope that it will be useful,
 10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
 11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 12+ * GNU General Public License for more details.
 13+ *
 14+ * You should have received a copy of the GNU General Public License along
 15+ * with this program; if not, write to the Free Software Foundation, Inc.,
 16+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 17+ * http://www.gnu.org/copyleft/gpl.html
 18+ *
 19+ * @file
 20+ * @ingroup Maintenance
 21+ */
 22+
 23+require_once( dirname( __FILE__ ) . '/Maintenance.php' );
 24+
 25+class FixBug35048Files extends Maintenance {
 26+ public function __construct() {
 27+ parent::__construct();
 28+ $this->mDescription = "Fix the image/oldimage DB records for a file.";
 29+ $this->addOption( 'indir', "Dir containing orphaned archive file lists", true, true );
 30+ $this->addOption( 'outdir', "Log changes to files under this dir", true, true );
 31+ }
 32+
 33+ public function execute() {
 34+ global $wgUploadDirectory, $wgDBname; // assumes FS
 35+
 36+ $inDir = $this->getOption( 'indir' );
 37+ if ( !is_file( "$inDir/$wgDBname" ) ) {
 38+ $this->error( "$inDir/$wgDBname does not exists", 1 );
 39+ }
 40+
 41+ $outDir = $this->getOption( 'outdir' );
 42+ if ( file_exists( "$outDir/$wgDBname" ) ) {
 43+ $this->error( "$outDir/$wgDBname already exists", 1 );
 44+ }
 45+
 46+ if ( !wfMkdirParents( "$wgUploadDirectory/lost+found" ) || !wfMkdirParents( $outDir ) ) {
 47+ return;
 48+ }
 49+
 50+ $dbw = wfGetDB( DB_MASTER );
 51+ $repo = RepoGroup::singleton()->getLocalRepo();
 52+
 53+ $list = array_filter( explode( "\n", file_get_contents( "$inDir/$wgDBname" ) ) );
 54+ foreach ( $list as $archiveName ) {
 55+ list( $archTimestamp, $name ) = explode( '!', $archiveName, 2 );
 56+ $title = Title::makeTitle( NS_FILE, $name );
 57+
 58+ $oaFile = $repo->newFromArchiveName( $title, $archiveName );
 59+ if ( $oaFile && $oaFile->exists() ) {
 60+ continue; // fixed already
 61+ }
 62+
 63+ $file = $repo->findFile( $title );
 64+ if ( $file && $file->exists() ) {
 65+ $currentFilePath = "$wgUploadDirectory/" . $file->getRel();
 66+ $archivedFilePath = "$wgUploadDirectory/" . $file->getArchiveRel( $archiveName );
 67+ if ( !is_file( $currentFilePath ) || !is_file( $archivedFilePath ) ) {
 68+ $this->error( "Failed sanity check for file existence for '$name'." );
 69+ continue;
 70+ }
 71+ $archivedFileSha1 = wfBaseConvert( sha1_file( $archivedFilePath ), 16, 36, 31 );
 72+ $currentFileSha1 = $repo->getFileSha1( $file->getPath() ); // base 36
 73+
 74+ $history = $file->getHistory(); // old file versions
 75+
 76+ #$this->output( "Checking '$archiveName', sha1 $archivedFileSha1.\n" );
 77+ if ( $file->getSha1() !== $currentFileSha1 ) { // mismatch?
 78+ $this->output( "'$name' gives sha1 {$file->getSha1()}, not $currentFileSha1.\n" );
 79+ }
 80+ if (
 81+ // Current file row is pointing to the wrong file
 82+ $file->getSha1() !== $currentFileSha1 &&
 83+ // Current file row matches this orphaned file
 84+ $file->getSha1() === $archivedFileSha1
 85+ ) {
 86+ // We often have two rows where the current row points to the wrong file
 87+ // and the old row points to none at all. Often, from the sha1s, the orphan
 88+ // and current files should have their names swapped and the old row should
 89+ // point to the current file (which we move to an archive name).
 90+ if ( count( $history ) && $history[0]->getArchiveName() === ''
 91+ && $history[0]->getSha1() === $currentFileSha1 )
 92+ {
 93+ // Switch the current version and the orphaned version on the FS.
 94+ $this->output( "Switching '$currentFilePath' with '$archivedFilePath'.\n" );
 95+ $tmpFilePath = "$wgUploadDirectory/lost+found/" . md5( $currentFilePath );
 96+ if ( !is_writable( $currentFilePath ) || !is_writable( $archivedFilePath ) ) {
 97+ continue;
 98+ }
 99+ $ok = rename( $currentFilePath, $tmpFilePath ) // temp
 100+ && rename( $archivedFilePath, $currentFilePath )
 101+ && rename( $tmpFilePath, $archivedFilePath );
 102+ if ( $outDir ) { // log changes
 103+ file_put_contents( "$outDir/$wgDBname",
 104+ "$currentFilePath $tmpFilePath\n", FILE_APPEND );
 105+ file_put_contents( "$outDir/$wgDBname",
 106+ "$archivedFilePath $currentFilePath\n", FILE_APPEND );
 107+ file_put_contents( "$outDir/$wgDBname",
 108+ "$tmpFilePath $archivedFilePath\n", FILE_APPEND );
 109+ }
 110+ // Update DB to point to former current version next run (in $history loop)...
 111+ } else {
 112+ // Evict the current version to lost+found so it can be properly
 113+ // re-uploaded later, with the username, comment, and log entry.
 114+ $this->output( "Evicting '$currentFilePath' to '$wgUploadDirectory/lost+found/$name'.\n" );
 115+ #rename( $currentFilePath, "$wgUploadDirectory/lost+found/" . $file->getName() );
 116+ // Restore the orphaned archived version to current version file
 117+ // name so that it matches up with the current version metadata.
 118+ $this->output( "Moving '$archivedFilePath' back to '$currentFilePath'.\n" );
 119+ #rename( $archivedFilePath, $currentFilePath );
 120+ if ( $outDir ) { // log changes
 121+ #file_put_contents( "$outDir/$wgDBname", "$currentFilePath $wgUploadDirectory/lost+found/$name\n", FILE_APPEND );
 122+ #file_put_contents( "$outDir/$wgDBname", "$archivedFilePath $currentFilePath\n", FILE_APPEND );
 123+ }
 124+ }
 125+ }
 126+ // While at it, fix files with empty oi_archive_name but with oi_sha1 pointing
 127+ // to this file. This can happen if is_file() fails in the File:publish() function.
 128+ foreach ( $history as $oldFile ) {
 129+ if ( $oldFile->getArchiveName() === '' && // broken row
 130+ $oldFile->getSha1() === $archivedFileSha1 ) // should have this name
 131+ {
 132+ $this->output( "Fixed empty oi_archive_name via sha1 $archivedFileSha1.\n" );
 133+ $dbw->update( 'oldimage', array( 'oi_archive_name' => $archiveName ),
 134+ array(
 135+ 'oi_name' => $name,
 136+ 'oi_sha1' => $archivedFileSha1,
 137+ 'oi_timestamp' => $dbw->timestamp( $oldFile->getTimestamp() ),
 138+ 'oi_archive_name' => '' // sanity
 139+ )
 140+ );
 141+ }
 142+ }
 143+ }
 144+ }
 145+ }
 146+}
 147+
 148+$maintClass = "FixBug35048Files";
 149+require_once( RUN_MAINTENANCE_IF_MAIN );
Index: branches/wmf/1.19wmf1/maintenance/FindFilesMissingDBRows.php
@@ -0,0 +1,78 @@
 2+<?php
 3+/**
 4+ * This program is free software; you can redistribute it and/or modify
 5+ * it under the terms of the GNU General Public License as published by
 6+ * the Free Software Foundation; either version 2 of the License, or
 7+ * (at your option) any later version.
 8+ *
 9+ * This program is distributed in the hope that it will be useful,
 10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
 11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 12+ * GNU General Public License for more details.
 13+ *
 14+ * You should have received a copy of the GNU General Public License along
 15+ * with this program; if not, write to the Free Software Foundation, Inc.,
 16+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 17+ * http://www.gnu.org/copyleft/gpl.html
 18+ *
 19+ * @file
 20+ * @ingroup Maintenance
 21+ */
 22+
 23+require_once( dirname( __FILE__ ) . '/Maintenance.php' );
 24+
 25+class FindFilesMissingDBRows extends Maintenance {
 26+ public function __construct() {
 27+ parent::__construct();
 28+ $this->mDescription = "Fix the image/oldimage DB records for a file.";
 29+ $this->addOption( 'outdir', "List bad files under this dir.", false, true );
 30+ }
 31+
 32+ public function execute() {
 33+ global $wgUploadDirectory, $wgDBname; // assumes FS
 34+
 35+ if ( !is_dir( "{$wgUploadDirectory}/archive" ) ) {
 36+ return;
 37+ }
 38+
 39+ $outDir = $this->getOption( 'outdir' );
 40+ if ( $outDir && file_exists( "$outDir/$wgDBname" ) ) {
 41+ $this->error( "$outDir/$wgDBname already exists", 1 );
 42+ }
 43+
 44+ $dbr = wfGetDB( DB_SLAVE );
 45+ for ( $i=0; $i<256; $i++ ) { // 16*16=256 shards
 46+ $shard = wfBaseConvert( $i, 10, 16, 2 );
 47+ $shardDir = "{$wgUploadDirectory}/archive/{$shard[0]}/{$shard}";
 48+ if ( !is_dir( $shardDir ) ) {
 49+ continue;
 50+ }
 51+ $this->output( "Doing shard $shard.\n" );
 52+
 53+ // Files only from 2012 (we really only need feb-march)
 54+ $fsList = array_filter( array_map( 'wfBaseName', explode( "\n",
 55+ shell_exec( "find $shardDir -name \"2012*\"" )
 56+ ) ) );
 57+ if ( $fsList ) {
 58+ $res = $dbr->select( 'oldimage', array( 'oi_archive_name' ),
 59+ array( 'oi_archive_name' => $fsList )
 60+ );
 61+ $dbList = array();
 62+ foreach ( $res as $row ) {
 63+ $dbList[$row->oi_archive_name] = 1;
 64+ }
 65+ foreach ( $fsList as $archiveName ) {
 66+ if ( !isset( $dbList[$archiveName] ) ) {
 67+ $this->output( "No DB record for file $archiveName.\n" );
 68+ if ( $outDir ) {
 69+ file_put_contents( "$outDir/$wgDBname", "$archiveName\n", FILE_APPEND );
 70+ }
 71+ }
 72+ }
 73+ }
 74+ }
 75+ }
 76+}
 77+
 78+$maintClass = "FindFilesMissingDBRows";
 79+require_once( RUN_MAINTENANCE_IF_MAIN );

Status & tagging log