r95458 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r95457‎ | r95458 | r95459 >
Date:05:33, 25 August 2011
Author:bawolff
Status:ok (Comments)
Tags:
Comment:
New maintenance script for refreshing image metadata (refreshImageMetadata.php)

This is very similar to rebuildImages.php, except more specific to img_metadata field,
and does the images in batches instead of all at once.

Also, while I'm here, I added a line to Maintenance.php to make sure it casted
$this->mBatchSize to an integer when gotten from command line (thought it was weird
that it didn't do that)

(I'm going to tag this revision 1.18 because I think it'd be nice to have this script
in 1.18 given new image metadata stuff added in 1.18, but not super-important
because rebuildImages.php does already work to refresh image metadata)
Modified paths:
  • /trunk/phase3/RELEASE-NOTES-1.18 (modified) (history)
  • /trunk/phase3/maintenance/Maintenance.php (modified) (history)
  • /trunk/phase3/maintenance/refreshImageMetadata.php (added) (history)

Diff [purge]

Index: trunk/phase3/RELEASE-NOTES-1.18
@@ -205,6 +205,7 @@
206206 targets and $wgServer.
207207 * Introduced $wgVaryOnXFPForAPI which will cause the API to send
208208 Vary: X-Forwarded-Proto headers.
 209+* New maintenance script to refresh image metadata (maintenance/refreshImageMetadata.php)
209210
210211 === Bug fixes in 1.18 ===
211212 * mw.util.getScript has been implemented (like wfScript in GlobalFunctions.php)
Index: trunk/phase3/maintenance/refreshImageMetadata.php
@@ -0,0 +1,199 @@
 2+<?php
 3+/**
 4+ * Script to refresh image metadata fields. See also rebuildImages.php
 5+ *
 6+ * Usage: php refreshImageMetadata.php
 7+ *
 8+ * Copyright © 2011 Brian Wolff
 9+ * http://www.mediawiki.org/
 10+ *
 11+ * This program is free software; you can redistribute it and/or modify
 12+ * it under the terms of the GNU General Public License as published by
 13+ * the Free Software Foundation; either version 2 of the License, or
 14+ * (at your option) any later version.
 15+ *
 16+ * This program is distributed in the hope that it will be useful,
 17+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
 18+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 19+ * GNU General Public License for more details.
 20+ *
 21+ * You should have received a copy of the GNU General Public License along
 22+ * with this program; if not, write to the Free Software Foundation, Inc.,
 23+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 24+ * http://www.gnu.org/copyleft/gpl.html
 25+ *
 26+ * @file
 27+ * @author Brian Wolff
 28+ * @ingroup maintenance
 29+ */
 30+
 31+require_once( dirname( __FILE__ ) . '/Maintenance.php' );
 32+
 33+class RefreshImageMetadata extends Maintenance {
 34+
 35+ /**
 36+ * @var DatabaseBase
 37+ */
 38+ protected $dbw;
 39+
 40+ function __construct() {
 41+ parent::__construct();
 42+
 43+ $this->mDescription = 'Script to update image metadata records';
 44+ $this->setBatchSize( 200 );
 45+
 46+ $this->addOption( 'force', 'Reload metadata from file even if the metadata looks ok', false, false, 'f' );
 47+ $this->addOption( 'broken-only', 'Only fix really broken records, leave old but still compatible records alone.' );
 48+ $this->addOption( 'verbose', 'Output extra information about each upgraded/non-upgraded file.', false, false, 'v' );
 49+ $this->addOption( 'start', 'Name of file to start with', false, true );
 50+ $this->addOption( 'end', 'Name of file to end with', false, true );
 51+
 52+ $this->addOption( 'mime', '(Inefficient!) Only refresh files with this mime type. Can accept wild-card image/*' , false, true );
 53+ $this->addOption( 'metadata-contains', '(Inefficient!) Only refresh files where the img_metadata field contains this string. Can be used if its known a specific property was being extracted incorrectly.', false, true );
 54+
 55+ }
 56+
 57+ public function execute() {
 58+ $force = $this->hasOption( 'force' );
 59+ $brokenOnly = $this->hasOption( 'broken-only' );
 60+ $verbose = $this->hasOption( 'verbose' );
 61+ $start = $this->getOption( 'start', false );
 62+ $this->setupParameters( $force, $brokenOnly );
 63+
 64+ $upgraded = 0;
 65+ $leftAlone = 0;
 66+ $error = 0;
 67+
 68+ $dbw = wfGetDB( DB_MASTER );
 69+ if ( $this->mBatchSize <= 0 ) {
 70+ $this->error( "Batch size is too low...", 12 );
 71+ }
 72+
 73+ $repo = RepoGroup::singleton()->getLocalRepo();
 74+ $conds = $this->getConditions( $dbw );
 75+
 76+ // For the WHERE img_name > 'foo' condition that comes after doing a batch
 77+ $conds2 = array();
 78+ if ( $start !== false ) {
 79+ $conds2[] = 'img_name >= ' . $dbw->addQuotes( $start );
 80+ }
 81+
 82+ $options = array(
 83+ 'LIMIT' => $this->mBatchSize,
 84+ 'ORDER BY' => 'img_name ASC',
 85+ );
 86+
 87+ do {
 88+ $res = $dbw->select(
 89+ 'image',
 90+ '*',
 91+ array_merge( $conds, $conds2 ),
 92+ __METHOD__,
 93+ $options
 94+ );
 95+
 96+ if ( $res->numRows() > 0 ) {
 97+ $row1 = $res->current();
 98+ $this->output( "Processing next {$this->mBatchSize} rows starting with {$row1->img_name}.\n");
 99+ $res->rewind();
 100+ } else {
 101+ $this->error( "No images to process.", 4 );
 102+ }
 103+
 104+ foreach ( $res as $row ) {
 105+ $file = $repo->newFileFromRow( $row );
 106+ if ( $file->getUpgraded() ) {
 107+ // File was upgraded.
 108+ $upgraded++;
 109+ $newLength = strlen( $file->getMetadata() );
 110+ $oldLength = strlen( $row->img_metadata );
 111+ if ( $newLength < $oldLength - 5 ) {
 112+ // If after updating, the metadata is smaller then
 113+ // what it was before, that's probably not a good thing
 114+ // because we extract more data with time, not less.
 115+ // Thus this probably indicates an error of some sort,
 116+ // or at the very least is suspicious. Have the - 5 just
 117+ // to weed out any inconsequential changes.
 118+ $error++;
 119+ $this->output( "Warning: File:{$row->img_name} used to have " .
 120+ "$oldLength bytes of metadata but now has $newLength bytes.\n" );
 121+ } elseif ( $verbose ) {
 122+ $this->output("Refreshed File:{$row->img_name}.\n" );
 123+ }
 124+ } else {
 125+ $leftAlone++;
 126+ if ( $force ) {
 127+ $file->upgradeRow();
 128+ $newLength = strlen( $file->getMetadata() );
 129+ $oldLength = strlen( $row->img_metadata );
 130+ if ( $newLength < $oldLength - 5 ) {
 131+ $error++;
 132+ $this->output( "Warning: File:{$row->img_name} used to have " .
 133+ "$oldLength bytes of metadata but now has $newLength bytes. (forced)\n" );
 134+
 135+ }
 136+ if ( $verbose ) {
 137+ $this->output("Forcibly refreshed File:{$row->img_name}.\n" );
 138+ }
 139+ }
 140+ else {
 141+ if ( $verbose ) {
 142+ $this->output( "Skipping File:{$row->img_name}.\n" );
 143+ }
 144+ }
 145+ }
 146+
 147+ }
 148+ $conds2 = array( 'img_name > ' . $dbw->addQuotes( $row->img_name ) );
 149+ wfWaitForSlaves();
 150+ } while( $res->numRows() === $this->mBatchSize );
 151+
 152+ $total = $upgraded + $leftAlone;
 153+ if ( $force ) {
 154+ $this->output( "\nFinished refreshing file metadata for $total files. $upgraded needed to be refreshed, $leftAlone did not need to be but were refreshed anyways, and $error refreshes were suspicious.\n" );
 155+ } else {
 156+ $this->output( "\nFinished refreshing file metadata for $total files. $upgraded were refreshed, $leftAlone were already up to date, and $error refreshes were suspicious.\n" );
 157+ }
 158+ }
 159+
 160+ function getConditions( $dbw ) {
 161+ $conds = array();
 162+
 163+ $end = $this->getOption( 'end', false );
 164+ $mime = $this->getOption( 'mime', false );
 165+ $like = $this->getOption( 'metadata-contains', false );
 166+
 167+ if ( $end !== false ) {
 168+ $conds[] = 'img_name <= ' . $dbw->addQuotes( $end ) ;
 169+ }
 170+ if ( $mime !== false ) {
 171+ list( $major, $minor ) = File::splitMime( $mime );
 172+ $conds['img_major_mime'] = $major;
 173+ if ( $minor !== '*' ) {
 174+ $conds['img_minor_mime'] = $minor;
 175+ }
 176+ }
 177+ if ( $like ) {
 178+ $conds[] = 'img_metadata ' . $dbw->buildLike( $dbw->anyString(), $like, $dbw->anyString() );
 179+ }
 180+ return $conds;
 181+ }
 182+
 183+ function setupParameters( $force, $brokenOnly ) {
 184+ global $wgUpdateCompatibleMetadata, $wgReadOnly;
 185+
 186+ if ( $brokenOnly ) {
 187+ $wgUpdateCompatibleMetadata = false;
 188+ } else {
 189+ $wgUpdateCompatibleMetadata = true;
 190+ }
 191+
 192+ if ( $brokenOnly && $force ) {
 193+ $this->error( 'Cannot use --broken-only and --force together. ', 2 );
 194+ }
 195+ }
 196+}
 197+
 198+
 199+$maintClass = 'RefreshImageMetadata';
 200+require_once( RUN_MAINTENANCE_IF_MAIN );
Property changes on: trunk/phase3/maintenance/refreshImageMetadata.php
___________________________________________________________________
Added: svn:eol-style
1201 + native
Index: trunk/phase3/maintenance/Maintenance.php
@@ -699,7 +699,7 @@
700700 $this->mQuiet = true;
701701 }
702702 if ( $this->hasOption( 'batch-size' ) ) {
703 - $this->mBatchSize = $this->getOption( 'batch-size' );
 703+ $this->mBatchSize = intval( $this->getOption( 'batch-size' ) );
704704 }
705705 }
706706

Sign-offs

UserFlagDate
Reedyinspected15:43, 6 September 2011
Reedytested15:46, 6 September 2011

Follow-up revisions

RevisionCommit summaryAuthorDate
r96345Followup r95458...reedy15:45, 6 September 2011
r964981.18: MFT r95171, r95409, r95436, r95458, r95467, r95470, r95475, r95493, r95...catrope21:04, 7 September 2011

Comments

#Comment by Reedy (talk | contribs)   15:46, 6 September 2011

Looks fine/seems to work fine. Minor followup committed, but no need to bother backporting it

Status & tagging log