r92030 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r92029‎ | r92030 | r92031 >
Date:00:11, 13 July 2011
Author:raindrift
Status:ok (Comments)
Tags:
Comment:
Added maintenance script for cleaning up abandoned uploads
Added methods for working with UploadStash without authenticating, for maintenance script
Modified paths:
  • /trunk/phase3/includes/upload/UploadStash.php (modified) (history)
  • /trunk/phase3/maintenance/cleanupUploadStash.php (added) (history)

Diff [purge]

Index: trunk/phase3/maintenance/cleanupUploadStash.php
@@ -0,0 +1,74 @@
 2+<?php
 3+/**
 4+ * Remove old or broken uploads from temporary uploaded file storage,
 5+ * clean up associated database records
 6+ *
 7+ * Copyright © 2011, Wikimedia Foundation
 8+ *
 9+ * This program is free software; you can redistribute it and/or modify
 10+ * it under the terms of the GNU General Public License as published by
 11+ * the Free Software Foundation; either version 2 of the License, or
 12+ * (at your option) any later version.
 13+ *
 14+ * This program is distributed in the hope that it will be useful,
 15+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
 16+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 17+ * GNU General Public License for more details.
 18+ *
 19+ * You should have received a copy of the GNU General Public License along
 20+ * with this program; if not, write to the Free Software Foundation, Inc.,
 21+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 22+ * http://www.gnu.org/copyleft/gpl.html
 23+ *
 24+ * @file
 25+ * @author Ian Baker <ibaker@wikimedia.org>
 26+ * @ingroup Maintenance
 27+ */
 28+
 29+require_once( dirname( __FILE__ ) . '/Maintenance.php' );
 30+
 31+class UploadStashCleanup extends Maintenance {
 32+
 33+ public function __construct() {
 34+ parent::__construct();
 35+ $this->mDescription = "Clean up abandoned files in temporary uploaded file stash";
 36+ }
 37+
 38+ public function execute() {
 39+ $dbr = wfGetDB( DB_SLAVE );
 40+
 41+ $this->output( "Getting list of files to clean up...\n" );
 42+ $res = $dbr->select(
 43+ 'uploadstash',
 44+ 'us_key',
 45+ 'us_timestamp < ' . wfTimestamp( TS_MW, time() - UploadStash::REPO_AGE * 3600 ),
 46+ __METHOD__
 47+ );
 48+
 49+ if( !is_object( $res ) ) {
 50+ // nothing to do.
 51+ return false;
 52+ }
 53+
 54+ // finish the read before starting writes.
 55+ $keys = array();
 56+ while( $row = $dbr->fetchRow( $res ) ) {
 57+ array_push( $keys, $row['us_key'] );
 58+ }
 59+
 60+ $this->output( 'Removing ' . count($keys) . " file(s)...\n" );
 61+ // this could be done some other, more direct/efficient way, but using
 62+ // UploadStash's own methods means it's less likely to fall accidentally
 63+ // out-of-date someday
 64+ $repo = RepoGroup::singleton()->getLocalRepo();
 65+ $stash = new UploadStash( $repo );
 66+
 67+ foreach( $keys as $key ) {
 68+ $stash->getFile( $key, true );
 69+ $stash->removeFileNoAuth( $key );
 70+ }
 71+ }
 72+}
 73+
 74+$maintClass = "UploadStashCleanup";
 75+require_once( RUN_MAINTENANCE_IF_MAIN );
\ No newline at end of file
Property changes on: trunk/phase3/maintenance/cleanupUploadStash.php
___________________________________________________________________
Added: svn:eol-style
176 + native
Index: trunk/phase3/includes/upload/UploadStash.php
@@ -23,6 +23,9 @@
2424 // behind, throw an exception instead. (at what point is broken better than slow?)
2525 const MAX_LAG = 30;
2626
 27+ // Age of the repository in hours. That is, after how long will files be assumed abandoned and deleted?
 28+ const REPO_AGE = 6;
 29+
2730 /**
2831 * repository that this uses to store temp files
2932 * public because we sometimes need to get a LocalFile within the same repo.
@@ -55,24 +58,27 @@
5659 * Get a file and its metadata from the stash.
5760 *
5861 * @param $key String: key under which file information is stored
 62+ * @param $noauth Boolean (optional) Don't check authentication. Used by maintenance scripts.
5963 * @throws UploadStashFileNotFoundException
6064 * @throws UploadStashNotLoggedInException
6165 * @throws UploadStashWrongOwnerException
6266 * @throws UploadStashBadPathException
6367 * @return UploadStashFile
6468 */
65 - public function getFile( $key ) {
 69+ public function getFile( $key, $noAuth = false ) {
6670 global $wgUser;
6771
6872 if ( ! preg_match( self::KEY_FORMAT_REGEX, $key ) ) {
6973 throw new UploadStashBadPathException( "key '$key' is not in a proper format" );
7074 }
71 -
72 - $userId = $wgUser->getId();
73 - if( !$userId ) {
74 - throw new UploadStashNotLoggedInException( 'No user is logged in, files must belong to users' );
 75+
 76+ if( !$noAuth ) {
 77+ $userId = $wgUser->getId();
 78+ if( !$userId ) {
 79+ throw new UploadStashNotLoggedInException( 'No user is logged in, files must belong to users' );
 80+ }
7581 }
76 -
 82+
7783 if ( !isset( $this->fileMetadata[$key] ) ) {
7884 // try this first. if it fails to find the row, check for lag, wait, try again. if its still missing, throw an exception.
7985 // this more complex solution keeps things moving for page loads with many requests
@@ -108,10 +114,12 @@
109115 $this->fileProps[$key] = File::getPropsFromPath( $path );
110116 }
111117
112 - if( $this->fileMetadata[$key]['us_user'] != $userId ) {
113 - throw new UploadStashWrongOwnerException( "This file ($key) doesn't belong to the current user." );
 118+ if( !$noAuth ) {
 119+ if( $this->fileMetadata[$key]['us_user'] != $userId ) {
 120+ throw new UploadStashWrongOwnerException( "This file ($key) doesn't belong to the current user." );
 121+ }
114122 }
115 -
 123+
116124 return $this->files[$key];
117125 }
118126
@@ -275,14 +283,14 @@
276284 return true;
277285 }
278286
279 -
280287 /**
281288 * Remove a particular file from the stash. Also removes it from the repo.
282289 *
283290 * @throws UploadStashNotLoggedInException
 291+ * @throws UploadStashWrongOwnerException
284292 * @return boolean: success
285293 */
286 - public function removeFile( $key ) {
 294+ public function removeFile( $key ){
287295 global $wgUser;
288296
289297 $userId = $wgUser->getId();
@@ -290,7 +298,6 @@
291299 throw new UploadStashNotLoggedInException( 'No user is logged in, files must belong to users' );
292300 }
293301
294 - wfDebug( __METHOD__ . " clearing row $key for user $userId\n" );
295302 $dbw = wfGetDB( DB_MASTER );
296303
297304 // this is a cheap query. it runs on the master so that this function still works when there's lag.
@@ -306,11 +313,28 @@
307314 throw new UploadStashWrongOwnerException( "Can't delete: the file ($key) doesn't belong to this user." );
308315 }
309316
 317+ return $this->removeFileNoAuth( $key );
 318+ }
 319+
 320+
 321+ /**
 322+ * Remove a file (see removeFile), but doesn't check ownership first.
 323+ *
 324+ * @return boolean: success
 325+ */
 326+ public function removeFileNoAuth( $key ) {
 327+ wfDebug( __METHOD__ . " clearing row $key\n" );
 328+
 329+ $dbw = wfGetDB( DB_MASTER );
 330+
 331+ // this gets its own transaction since it's called serially by the cleanupUploadStash maintenance script
 332+ $dbw->begin();
310333 $dbw->delete(
311334 'uploadstash',
312 - array( 'us_key' => $key, 'us_user' => $userId ),
 335+ array( 'us_key' => $key),
313336 __METHOD__
314337 );
 338+ $dbw->commit();
315339
316340 // TODO: look into UnregisteredLocalFile and find out why the rv here is sometimes wrong (false when file was removed)
317341 // for now, ignore.

Follow-up revisions

RevisionCommit summaryAuthorDate
r93476Changed stash age parameter from a constant to a config option, switched to s...raindrift18:46, 29 July 2011

Comments

#Comment by 😂 (talk | contribs)   00:13, 13 July 2011

Rather than specifying REPO_AGE in hours, how about making it seconds so you can skip the multiplication? Also making it configurable might be nice.

#Comment by NeilK (talk | contribs)   03:29, 23 July 2011

I had the same feeling about REPO_AGE -- we usually (not always) define any time period in seconds. Also, it ought to be configurable so that probably means it's another global. See includes/DefaultSettings.php

Otherwise ok.

Status & tagging log