r96536 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r96535‎ | r96536 | r96537 >
Date:01:21, 8 September 2011
Author:nelson
Status:ok
Tags:
Comment:
copyover: Don't ship with LocalSettings.php; Strip out secrets from proxy.config
rewrite: Get rid of wmf/config.py; Move the 404 handler out into a separate subroutine
rewrite: Attempt to keep the line lengths < 80
SwiftMedia: Fix bug 30192; Copy less code for File::transform()
Modified paths:
  • /trunk/extensions/SwiftMedia/SwiftMedia.body.php (modified) (history)
  • /trunk/extensions/SwiftMedia/copyover (modified) (history)
  • /trunk/extensions/SwiftMedia/smtest.py (modified) (history)
  • /trunk/extensions/SwiftMedia/wmf/rewrite.py (modified) (history)

Diff [purge]

Index: trunk/extensions/SwiftMedia/copyover
@@ -1,8 +1,8 @@
22 #!/bin/sh
33
4 -scp rnelson@ersch.wikimedia.org:/var/www/LocalSettings.php LocalSettings.real
5 -sed '1,/SWIFT/d' <LocalSettings.real >LocalSettings.php
6 -scp rnelson@alsted.wikimedia.org:/etc/swift/proxy-server.conf .
74 scp rnelson@ersch.wikimedia.org:/var/www/extensions/SwiftMedia/{SwiftMedia.body.php,SwiftMedia.i18n.php,SwiftMedia.php,TODO} .
8 -scp rnelson@alsted.wikimedia.org:/usr/local/lib/python2.6/dist-packages/wmf/{client.py,__init__.py,rewrite.py,config.py} wmf/
9 -perl -pi -e "s/'key' => '.*'/'key' => 'secret'/" LocalSettings.php
 5+scp rnelson@alsted.wikimedia.org:/etc/swift/proxy-server.conf proxy-server.sample
 6+scp rnelson@alsted.wikimedia.org:/usr/local/lib/python2.6/dist-packages/wmf/{client.py,__init__.py,rewrite.py} wmf/
 7+
 8+# strip out passwords to prevent funny stuff.
 9+perl -pi -e "s/^super_admin_key = .*/super_admin_key = notreal/;s/^key = .*/key = alsonotreal/;" proxy-server.sample
Index: trunk/extensions/SwiftMedia/smtest.py
@@ -10,7 +10,7 @@
1111 rss_title = "Wikipedia watchlist"
1212 rss_link = "http://en.wikipedia.org"
1313 host = "http://ersch.wikimedia.org/"
14 -host = "http://www.ra-tes.org/phase3/"
 14+#host = "http://127.0.0.1/wiki/"
1515
1616 def login(username, password):
1717 t.add_extra_header("User-Agent", "python-twill-russnelson@gmail.com")
Index: trunk/extensions/SwiftMedia/wmf/rewrite.py
@@ -9,7 +9,6 @@
1010 import re
1111 from eventlet.green import urllib2
1212 import wmf.client
13 -import wmf.config
1413 import time
1514
1615 # the auth system turns our login and key into an account / token pair.
@@ -73,37 +72,82 @@
7473
7574 def __init__(self, app, conf):
7675 self.app = app
77 - self.account = wmf.config.account
 76+ self.account = conf['account'].strip()
7877 self.authurl = conf['url'].strip()
7978 self.login = conf['login'].strip()
8079 self.key = conf['key'].strip()
 80+ self.thumbhost = conf['thumbhost'].strip()
 81+ self.user_agent = conf['user_agent'].strip()
8182
 83+ def handle404(self, reqorig, url, container, obj):
 84+ """ return a webob.Response which reads the request, but from the thumb host.
 85+ """
 86+ # go to the thumb media store for unknown files
 87+ reqorig.host = self.thumbhost
 88+ # upload doesn't like our User-agent, otherwise we could call it using urllib2.url()
 89+ opener = urllib2.build_opener()
 90+ opener.addheaders = [('User-agent', self.user_agent)]
 91+ # At least in theory, we shouldn't be handing out links to files that we don't have
 92+ # (or in the case of thumbs, can't generate). However, someone may have a formerly
 93+ # valid link to a file, so we should do them the favor of giving them a 404.
 94+ try:
 95+ upcopy = opener.open(reqorig.url)
 96+ except urllib2.HTTPError,status:
 97+ if status == 404:
 98+ resp = webob.exc.HTTPNotFound('Expected original file not found')
 99+ return resp
 100+ else:
 101+ resp = webob.exc.HTTPNotFound('Unexpected error %s' % `status`)
 102+ return resp
 103+
 104+ # get the Content-Type.
 105+ uinfo = upcopy.info()
 106+ c_t = uinfo.gettype()
 107+ last_modified = time.mktime(uinfo.getdate('Last-Modified'))
 108+ # Fetch from upload, write into the cluster, and return it
 109+ app_iter = Copy2(upcopy, self.app, url,
 110+ urllib2.quote(container), obj, self.authurl, self.login,
 111+ self.key, content_type=c_t, modified=last_modified)
 112+
 113+ resp = webob.Response(app_iter=app_iter, content_type=c_t)
 114+ resp.headers.add('Things', "%s %s %s %s %s %s %s %s" % (url, urllib2.quote(container), obj, self.authurl, self.login, self.key, c_t, last_modified))
 115+ resp.headers.add('Last-Modified', uinfo.getheader('Last-Modified'))
 116+ return resp
 117+
82118 def __call__(self, env, start_response):
83119 #try:
84120 req = webob.Request(env)
 121+ # PUT requests never need rewriting.
85122 if req.method == 'PUT':
86123 return self.app(env, start_response)
 124+
 125+ # if it already has AUTH, presume that it's good. #07
 126+ if req.path.startswith('/auth') or req.path.find('AUTH') >= 0:
 127+ return self.app(env, start_response)
 128+
 129+ # keep a copy of the original request so we can ask the scalers for it
87130 reqorig = req.copy()
 131+ # match these two URL forms, with the wikipedia, commons, and the rest
 132+ # going into groups.
88133 # http://upload.wikimedia.org/wikipedia/commons/a/aa/000_Finlanda_harta.PNG
89134 # http://upload.wikimedia.org/wikipedia/commons/thumb/a/aa/000_Finlanda_harta.PNG/75px-000_Finlanda_harta.PNG
90135 match = re.match(r'/(.*?)/(.*?)/(.*)', req.path)
91 - if req.path.startswith('/auth') or req.path.find('AUTH') >= 0:
92 - # if it already has AUTH, presume that it's good. #07
93 - return self.app(env, start_response)
94 - elif match:
 136+ if match:
 137+ # Our target URL is as follows:
95138 # https://alsted.wikimedia.org:8080/v1/AUTH_6790933748e741268babd69804c6298b/wikipedia%252Fen/2/25/Machinesmith.png
96 - container = match.group(2) #02
 139+
 140+ # quote slashes in the container name
 141+ container = "%s%%2F%s" % (match.group(1), match.group(2)) #02
97142 obj = match.group(3)
98143 # include the thumb in the container.
99144 if obj.startswith("thumb/"): #03
100145 container += "%2Fthumb"
101146 obj = obj[len("thumb/"):]
102 - # quote slashes in the container name
103 - container = "%s%%2F%s" % (match.group(1), container)
104147
105148 if not obj:
106 - # don't let them list the container (it's really FRICKING huge) #08
107 - return webob.exc.HTTPForbidden('No container listing')(env, start_response)
 149+ # don't let them list the container (it's CRAZY huge) #08
 150+ resp = webob.exc.HTTPForbidden('No container listing')
 151+ return resp(env, start_response)
108152
109153 # save a url with just the account name in it.
110154 req.path_info = "/v1/%s" % (self.account)
@@ -113,7 +157,8 @@
114158 req.path_info = "/v1/%s/%s/%s" % (self.account, container, urllib2.unquote(obj))
115159
116160 controller = ObjectController()
117 - # do_start_response just remembers what it got called with, because we may want to generate a different response.
 161+ # do_start_response just remembers what it got called with,
 162+ # because our 404 handler will generate a different response.
118163 app_iter = self.app(env, controller.do_start_response) #01
119164 status = int(controller.response_args[0].split()[0])
120165 headers = dict(controller.response_args[1])
@@ -126,40 +171,18 @@
127172 return webob.Response(status=status, headers=headers ,
128173 app_iter=app_iter)(env, start_response) #01a
129174 elif status == 404: #4
130 - # go to the thumb media store for unknown files
131 - reqorig.host = wmf.config.thumbhost
132 - # upload doesn't like our User-agent, otherwise we could call it using urllib2.url()
133 - opener = urllib2.build_opener()
134 - opener.addheaders = [('User-agent', wmf.config.user_agent)]
135 - # At least in theory, we shouldn't be handing out links to files that we don't have
136 - # (or in the case of thumbs, can't generate). However, someone may have a formerly
137 - # valid link to a file, so we should do them the favor of giving them a 404.
138 - try:
139 - upcopy = opener.open(reqorig.url)
140 - except urllib2.HTTPError,status:
141 - if status == 404:
142 - return webob.exc.HTTPNotFound('Expected original file not found')(env, start_response)
143 - else:
144 - return webob.exc.HTTPNotFound('Unexpected error %s' % `status`)(env, start_response)
145 -
146 - # get the Content-Type.
147 - uinfo = upcopy.info()
148 - c_t = uinfo.gettype()
149 - last_modified = time.mktime(uinfo.getdate('Last-Modified'))
150 - # Fetch from upload, write into the cluster, and return it to them.
151 - resp = webob.Response(app_iter=Copy2(upcopy, self.app, url,
152 - urllib2.quote(container), obj, self.authurl, self.login,
153 - self.key, content_type=c_t, modified=last_modified), content_type=c_t)
154 - resp.headers.add('Things', "%s %s %s %s %s %s %s %s" % (url, urllib2.quote(container), obj, self.authurl, self.login, self.key, c_t, last_modified))
155 - resp.headers.add('Last-Modified', uinfo.getheader('Last-Modified'))
 175+ resp = self.handle404(reqorig)
156176 return resp(env, start_response)
157177 elif status == 401:
158178 # if the Storage URL is invalid or has expired we'll get this error.
159 - return webob.exc.HTTPUnauthorized('Token may have timed out')(env, start_response) #05
 179+ resp = webob.exc.HTTPUnauthorized('Token may have timed out') #05
 180+ return resp(env, start_response)
160181 else:
161 - return webob.exc.HTTPNotImplemented('Unknown Status: %s' % (status))(env, start_response) #10
 182+ resp = webob.exc.HTTPNotImplemented('Unknown Status: %s' % (status)) #10
 183+ return resp(env, start_response)
162184 else:
163 - return webob.exc.HTTPBadRequest('Regexp failed: "%s"' % (req.path))(env, start_response) #11
 185+ resp = webob.exc.HTTPBadRequest('Regexp failed: "%s"' % (req.path)) #11
 186+ return resp(env, start_response)
164187 #except:
165188 #return webob.exc.HTTPNotFound('Internal error')(env, start_response)
166189
Index: trunk/extensions/SwiftMedia/SwiftMedia.body.php
@@ -133,66 +133,69 @@
134134 /** getThumbnail inherited */
135135
136136 /**
137 - * Transform a media file
138 - *
139 - * @param $params Array: an associative array of handler-specific parameters.
140 - * Typical keys are width, height and page.
141 - * @param $flags Integer: a bitfield, may contain self::RENDER_NOW to force rendering
142 - * @return MediaTransformOutput | false
 137+ * class-specific transform (from an original into a thumb).
143138 */
144 - function transform( $params, $flags = 0 ) {
145 - global $wgUseSquid, $wgIgnoreImageErrors, $wgThumbnailEpoch, $wgServer;
146 - global $wgTmpDirectory;
 139+ function maybeDoTransform( $thumbName, $thumbUrl, $params ) {
 140+ global $wgIgnoreImageErrors, $wgThumbnailEpoch, $wgTmpDirectory;
147141
148 - wfProfileIn( __METHOD__ );
149 - do {
150 - if ( !$this->canRender() ) {
151 - // not a bitmap or renderable image, don't try.
152 - $thumb = $this->iconThumb();
153 - break;
154 - }
 142+ // get a temporary place to put the original.
 143+ $thumbPath = tempnam( $wgTmpDirectory, 'transform_out_' );
155144
156 - // Get the descriptionUrl to embed it as comment into the thumbnail. Bug 19791.
157 - $descriptionUrl = $this->getDescriptionUrl();
158 - if ( $descriptionUrl ) {
159 - $params['descriptionUrl'] = $wgServer . $descriptionUrl;
160 - }
 145+ if ( $this->repo && $this->repo->canTransformVia404() && !($flags & self::RENDER_NOW ) ) {
 146+ return $this->handler->getTransform( $this, $thumbPath, $thumbUrl, $params );
 147+ }
161148
162 - // make the thumb name and URL out of the normalized parameters.
163 - // we only use the thumbTemp for a temporary file.
164 - $normalisedParams = $params;
165 - $this->handler->normaliseParams( $this, $normalisedParams );
166 - $thumbName = $this->thumbName( $normalisedParams );
167 - $thumbUrl = $this->getThumbUrl( $thumbName );
 149+ // see if the file exists, and if it exists, is not too old.
 150+ $conn = $this->repo->connect();
 151+ $container = $this->repo->get_container($conn,$this->repo->container . "%2Fthumb");
 152+ try {
 153+ $pic = $container->get_object($this->getRel() . "/" . $thumbName);
 154+ } catch (NoSuchObjectException $e) {
 155+ $pic = NULL;
 156+ }
 157+ if ( $pic ) {
 158+ $thumbTime = $pic->last_modified;
 159+ $tm = strptime($thumbTime, "%a, %d %b %Y %H:%M:%S GMT");
 160+ $thumbGMT = gmmktime($tm["tm_hour"], $tm["tm_min"], $tm["tm_sec"], $tm["tm_mon"]+1, $tm["tm_mday"], $tm["tm_year"] + 1900);
 161+ wfDebug( __METHOD__.": $thumbName is dated $thumbGMT\n" );
 162+ if ( gmdate( 'YmdHis', $thumbGMT ) >= $wgThumbnailEpoch ) {
168163
169 - // get a temporary place to put the original.
170 - $thumbTemp = tempnam( $wgTmpDirectory, 'transform_out_' );
 164+ return $this->handler->getTransform( $this, $thumbPath, $thumbUrl, $params );
 165+ }
 166+ }
 167+ $thumb = $this->handler->doTransform( $this, $thumbPath, $thumbUrl, $params );
171168
172 - $thumb = $this->handler->doTransform( $this, $thumbTemp, $thumbUrl, $params );
 169+ // Ignore errors if requested
 170+ if ( !$thumb ) {
 171+ $thumb = null;
 172+ } elseif ( $thumb->isError() ) {
 173+ $this->lastError = $thumb->toText();
 174+ if ( $wgIgnoreImageErrors && !($flags & self::RENDER_NOW) ) {
 175+ $thumb = $this->handler->getTransform( $this, $thumbPath, $thumbUrl, $params );
 176+ }
 177+ }
173178
 179+ // what if they didn't actually write out a thumbnail? Check the file size.
 180+ if ( $thumb && filesize($thumbPath)) {
174181 // Store the thumbnail into Swift, but in the thumb version of the container.
175 - wfDebug( __METHOD__ . "Creating thumb " . $this->getRel() . "/" . $thumbName . "\n");
176 - $conn = $this->repo->connect();
177 - $container = $this->repo->get_container($conn,$this->repo->container . "%2Fthumb");
178 - $this->repo->write_swift_object( $thumbTemp, $container, $this->getRel() . "/" . $thumbName);
 182+ wfDebug( __METHOD__ . "Creating thumb " . $this->getRel() . "/" . $thumbName . "\n" );
 183+ $this->repo->write_swift_object( $thumbPath, $container, $this->getRel() . "/" . $thumbName );
179184 // php-cloudfiles throws exceptions, so failure never gets here.
 185+ }
180186
181 - // Clean up temporary data.
182 - unlink($thumbTemp);
 187+ // Clean up temporary data.
 188+ unlink( $thumbPath );
183189
184 - } while (false);
185 -
186 - wfProfileOut( __METHOD__ );
187 - return is_object( $thumb ) ? $thumb : false;
 190+ return $thumb;
188191 }
189192
 193+ /** transform inherited */
190194
191195 /**
192 - * Fix thumbnail files from 1.4 or before, with extreme prejudice
193 - * Upgrading directly from 1.4 to 1.8/SwiftMedia is not supported.
 196+ * We have nothing to do here.
194197 */
195198 function migrateThumbFile( $thumbName ) {
196 - throw new MWException( __METHOD__.": not implemented" );
 199+ return;
197200 }
198201 /**
199202 * Get the public root directory of the repository.
@@ -208,45 +211,44 @@
209212
210213 /**
211214 * Get all thumbnail names previously generated for this file
 215+ * @param $archiveName string: the article name for the archived file (if archived).
 216+ * @return a list of files, the first entry of which is the directory name (if applicable).
212217 */
213 - function getThumbnails() {
 218+ function getThumbnails($archiveName = false) {
214219 $this->load();
215220
216 - $prefix = $this->getRel();
 221+ if ($archiveName) {
 222+ $prefix = $this->getArchiveRel($archiveName);
 223+ } else {
 224+ $prefix = $this->getRel();
 225+ }
217226 $conn = $this->repo->connect();
218227 $container = $this->repo->get_container($conn,$this->repo->container . "%2Fthumb");
219228 $files = $container->list_objects(0, NULL, $prefix);
 229+ array_unshift($files, "unused"); # return an unused $dir.
220230 return $files;
221231 }
222232
223233 /**
224234 * Delete cached transformed files
 235+ * @param $dir string If needed for this repo, the directory prefix.
 236+ * @param $files array of strings listing the thumbs to be deleted.
225237 */
226 - function purgeThumbnails() {
227 - global $wgUseSquid, $wgExcludeFromThumbnailPurge;
 238+ function purgeThumbList($dir, $files) {
 239+ global $wgExcludeFromThumbnailPurge;
228240
229 - // Delete thumbnails
230 - $files = $this->getThumbnails();
231 - $urls = array();
232 -
233241 $conn = $this->repo->connect();
234242 $container = $this->repo->get_container($conn,$this->repo->container . "%2Fthumb");
235243 foreach ( $files as $file ) {
236 - // I have no idea how to implement this given that we don't have paths in Swift
237244 // Only remove files not in the $wgExcludeFromThumbnailPurge configuration variable
238 - // $ext = pathinfo( "$dir/$file", PATHINFO_EXTENSION );
239 - //if ( in_array( $ext, $wgExcludeFromThumbnailPurge ) ) {
240 - // continue;
241 - //}
 245+ $ext = pathinfo( "$file", PATHINFO_EXTENSION );
 246+ if ( in_array( $ext, $wgExcludeFromThumbnailPurge ) ) {
 247+ continue;
 248+ }
242249
243 - $urls[] = $this->getThumbUrl($file);
 250+ wfDebug( __METHOD__ . " deleting " . $container->name . "/$file\n");
244251 $this->repo->swift_delete($container, $file);
245252 }
246 -
247 - // Purge the squid
248 - if ( $wgUseSquid ) {
249 - SquidUpdate::purge( $urls );
250 - }
251253 }
252254
253255 } // SwiftFile class
@@ -759,7 +761,7 @@
760762
761763 $dbw = $this->getMasterDB();
762764 $status = $this->newGood();
763 - $storageKeys = array_unique($storageKeys);
 765+ $storageKeys = array_unique( $storageKeys );
764766 foreach ( $storageKeys as $key ) {
765767 $hashPath = $this->getDeletedHashPath( $key );
766768 $rel = "$hashPath$key";
@@ -834,6 +836,7 @@
835837 if ( $container === false) {
836838 throw new MWException( __METHOD__.": invalid zone: $zone" );
837839 }
 840+ wfDebug( __METHOD__.": Z$zone C$container R$rel\n" );
838841 return array($container, rawurldecode( $rel ));
839842 }
840843

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r94212(bug 30192) Old thumbnails not properly purged. Unlike the bug suggests, we d...demon23:29, 10 August 2011
r96373(bug 30192) Thumbnails of archived images don't get deleted. Patch by Russ an...demon21:01, 6 September 2011

Status & tagging log