Index: trunk/extensions/SwiftMedia/copyover |
— | — | @@ -1,8 +1,8 @@ |
2 | 2 | #!/bin/sh |
3 | 3 | |
4 | | -scp rnelson@ersch.wikimedia.org:/var/www/LocalSettings.php LocalSettings.real |
5 | | -sed '1,/SWIFT/d' <LocalSettings.real >LocalSettings.php |
6 | | -scp rnelson@alsted.wikimedia.org:/etc/swift/proxy-server.conf . |
7 | 4 | scp rnelson@ersch.wikimedia.org:/var/www/extensions/SwiftMedia/{SwiftMedia.body.php,SwiftMedia.i18n.php,SwiftMedia.php,TODO} . |
8 | | -scp rnelson@alsted.wikimedia.org:/usr/local/lib/python2.6/dist-packages/wmf/{client.py,__init__.py,rewrite.py,config.py} wmf/ |
9 | | -perl -pi -e "s/'key' => '.*'/'key' => 'secret'/" LocalSettings.php |
| 5 | +scp rnelson@alsted.wikimedia.org:/etc/swift/proxy-server.conf proxy-server.sample |
| 6 | +scp rnelson@alsted.wikimedia.org:/usr/local/lib/python2.6/dist-packages/wmf/{client.py,__init__.py,rewrite.py} wmf/ |
| 7 | + |
| 8 | +# strip out passwords to prevent funny stuff. |
| 9 | +perl -pi -e "s/^super_admin_key = .*/super_admin_key = notreal/;s/^key = .*/key = alsonotreal/;" proxy-server.sample |
Index: trunk/extensions/SwiftMedia/smtest.py |
— | — | @@ -10,7 +10,7 @@ |
11 | 11 | rss_title = "Wikipedia watchlist" |
12 | 12 | rss_link = "http://en.wikipedia.org" |
13 | 13 | host = "http://ersch.wikimedia.org/" |
14 | | -host = "http://www.ra-tes.org/phase3/" |
| 14 | +#host = "http://127.0.0.1/wiki/" |
15 | 15 | |
16 | 16 | def login(username, password): |
17 | 17 | t.add_extra_header("User-Agent", "python-twill-russnelson@gmail.com") |
Index: trunk/extensions/SwiftMedia/wmf/rewrite.py |
— | — | @@ -9,7 +9,6 @@ |
10 | 10 | import re |
11 | 11 | from eventlet.green import urllib2 |
12 | 12 | import wmf.client |
13 | | -import wmf.config |
14 | 13 | import time |
15 | 14 | |
16 | 15 | # the auth system turns our login and key into an account / token pair. |
— | — | @@ -73,37 +72,82 @@ |
74 | 73 | |
75 | 74 | def __init__(self, app, conf): |
76 | 75 | self.app = app |
77 | | - self.account = wmf.config.account |
| 76 | + self.account = conf['account'].strip() |
78 | 77 | self.authurl = conf['url'].strip() |
79 | 78 | self.login = conf['login'].strip() |
80 | 79 | self.key = conf['key'].strip() |
| 80 | + self.thumbhost = conf['thumbhost'].strip() |
| 81 | + self.user_agent = conf['user_agent'].strip() |
81 | 82 | |
| 83 | + def handle404(self, reqorig, url, container, obj): |
| 84 | + """ return a webob.Response which reads the request, but from the thumb host. |
| 85 | + """ |
| 86 | + # go to the thumb media store for unknown files |
| 87 | + reqorig.host = self.thumbhost |
| 88 | + # upload doesn't like our User-agent, otherwise we could call it using urllib2.url() |
| 89 | + opener = urllib2.build_opener() |
| 90 | + opener.addheaders = [('User-agent', self.user_agent)] |
| 91 | + # At least in theory, we shouldn't be handing out links to files that we don't have |
| 92 | + # (or in the case of thumbs, can't generate). However, someone may have a formerly |
| 93 | + # valid link to a file, so we should do them the favor of giving them a 404. |
| 94 | + try: |
| 95 | + upcopy = opener.open(reqorig.url) |
| 96 | + except urllib2.HTTPError,status: |
| 97 | + if status == 404: |
| 98 | + resp = webob.exc.HTTPNotFound('Expected original file not found') |
| 99 | + return resp |
| 100 | + else: |
| 101 | + resp = webob.exc.HTTPNotFound('Unexpected error %s' % `status`) |
| 102 | + return resp |
| 103 | + |
| 104 | + # get the Content-Type. |
| 105 | + uinfo = upcopy.info() |
| 106 | + c_t = uinfo.gettype() |
| 107 | + last_modified = time.mktime(uinfo.getdate('Last-Modified')) |
| 108 | + # Fetch from upload, write into the cluster, and return it |
| 109 | + app_iter = Copy2(upcopy, self.app, url, |
| 110 | + urllib2.quote(container), obj, self.authurl, self.login, |
| 111 | + self.key, content_type=c_t, modified=last_modified) |
| 112 | + |
| 113 | + resp = webob.Response(app_iter=app_iter, content_type=c_t) |
| 114 | + resp.headers.add('Things', "%s %s %s %s %s %s %s %s" % (url, urllib2.quote(container), obj, self.authurl, self.login, self.key, c_t, last_modified)) |
| 115 | + resp.headers.add('Last-Modified', uinfo.getheader('Last-Modified')) |
| 116 | + return resp |
| 117 | + |
82 | 118 | def __call__(self, env, start_response): |
83 | 119 | #try: |
84 | 120 | req = webob.Request(env) |
| 121 | + # PUT requests never need rewriting. |
85 | 122 | if req.method == 'PUT': |
86 | 123 | return self.app(env, start_response) |
| 124 | + |
| 125 | + # if it already has AUTH, presume that it's good. #07 |
| 126 | + if req.path.startswith('/auth') or req.path.find('AUTH') >= 0: |
| 127 | + return self.app(env, start_response) |
| 128 | + |
| 129 | + # keep a copy of the original request so we can ask the scalers for it |
87 | 130 | reqorig = req.copy() |
| 131 | + # match these two URL forms, with the wikipedia, commons, and the rest |
| 132 | + # going into groups. |
88 | 133 | # http://upload.wikimedia.org/wikipedia/commons/a/aa/000_Finlanda_harta.PNG |
89 | 134 | # http://upload.wikimedia.org/wikipedia/commons/thumb/a/aa/000_Finlanda_harta.PNG/75px-000_Finlanda_harta.PNG |
90 | 135 | match = re.match(r'/(.*?)/(.*?)/(.*)', req.path) |
91 | | - if req.path.startswith('/auth') or req.path.find('AUTH') >= 0: |
92 | | - # if it already has AUTH, presume that it's good. #07 |
93 | | - return self.app(env, start_response) |
94 | | - elif match: |
| 136 | + if match: |
| 137 | + # Our target URL is as follows: |
95 | 138 | # https://alsted.wikimedia.org:8080/v1/AUTH_6790933748e741268babd69804c6298b/wikipedia%252Fen/2/25/Machinesmith.png |
96 | | - container = match.group(2) #02 |
| 139 | + |
| 140 | + # quote slashes in the container name |
| 141 | + container = "%s%%2F%s" % (match.group(1), match.group(2)) #02 |
97 | 142 | obj = match.group(3) |
98 | 143 | # include the thumb in the container. |
99 | 144 | if obj.startswith("thumb/"): #03 |
100 | 145 | container += "%2Fthumb" |
101 | 146 | obj = obj[len("thumb/"):] |
102 | | - # quote slashes in the container name |
103 | | - container = "%s%%2F%s" % (match.group(1), container) |
104 | 147 | |
105 | 148 | if not obj: |
106 | | - # don't let them list the container (it's really FRICKING huge) #08 |
107 | | - return webob.exc.HTTPForbidden('No container listing')(env, start_response) |
| 149 | + # don't let them list the container (it's CRAZY huge) #08 |
| 150 | + resp = webob.exc.HTTPForbidden('No container listing') |
| 151 | + return resp(env, start_response) |
108 | 152 | |
109 | 153 | # save a url with just the account name in it. |
110 | 154 | req.path_info = "/v1/%s" % (self.account) |
— | — | @@ -113,7 +157,8 @@ |
114 | 158 | req.path_info = "/v1/%s/%s/%s" % (self.account, container, urllib2.unquote(obj)) |
115 | 159 | |
116 | 160 | controller = ObjectController() |
117 | | - # do_start_response just remembers what it got called with, because we may want to generate a different response. |
| 161 | + # do_start_response just remembers what it got called with, |
| 162 | + # because our 404 handler will generate a different response. |
118 | 163 | app_iter = self.app(env, controller.do_start_response) #01 |
119 | 164 | status = int(controller.response_args[0].split()[0]) |
120 | 165 | headers = dict(controller.response_args[1]) |
— | — | @@ -126,40 +171,18 @@ |
127 | 172 | return webob.Response(status=status, headers=headers , |
128 | 173 | app_iter=app_iter)(env, start_response) #01a |
129 | 174 | elif status == 404: #4 |
130 | | - # go to the thumb media store for unknown files |
131 | | - reqorig.host = wmf.config.thumbhost |
132 | | - # upload doesn't like our User-agent, otherwise we could call it using urllib2.url() |
133 | | - opener = urllib2.build_opener() |
134 | | - opener.addheaders = [('User-agent', wmf.config.user_agent)] |
135 | | - # At least in theory, we shouldn't be handing out links to files that we don't have |
136 | | - # (or in the case of thumbs, can't generate). However, someone may have a formerly |
137 | | - # valid link to a file, so we should do them the favor of giving them a 404. |
138 | | - try: |
139 | | - upcopy = opener.open(reqorig.url) |
140 | | - except urllib2.HTTPError,status: |
141 | | - if status == 404: |
142 | | - return webob.exc.HTTPNotFound('Expected original file not found')(env, start_response) |
143 | | - else: |
144 | | - return webob.exc.HTTPNotFound('Unexpected error %s' % `status`)(env, start_response) |
145 | | - |
146 | | - # get the Content-Type. |
147 | | - uinfo = upcopy.info() |
148 | | - c_t = uinfo.gettype() |
149 | | - last_modified = time.mktime(uinfo.getdate('Last-Modified')) |
150 | | - # Fetch from upload, write into the cluster, and return it to them. |
151 | | - resp = webob.Response(app_iter=Copy2(upcopy, self.app, url, |
152 | | - urllib2.quote(container), obj, self.authurl, self.login, |
153 | | - self.key, content_type=c_t, modified=last_modified), content_type=c_t) |
154 | | - resp.headers.add('Things', "%s %s %s %s %s %s %s %s" % (url, urllib2.quote(container), obj, self.authurl, self.login, self.key, c_t, last_modified)) |
155 | | - resp.headers.add('Last-Modified', uinfo.getheader('Last-Modified')) |
| 175 | + resp = self.handle404(reqorig) |
156 | 176 | return resp(env, start_response) |
157 | 177 | elif status == 401: |
158 | 178 | # if the Storage URL is invalid or has expired we'll get this error. |
159 | | - return webob.exc.HTTPUnauthorized('Token may have timed out')(env, start_response) #05 |
| 179 | + resp = webob.exc.HTTPUnauthorized('Token may have timed out') #05 |
| 180 | + return resp(env, start_response) |
160 | 181 | else: |
161 | | - return webob.exc.HTTPNotImplemented('Unknown Status: %s' % (status))(env, start_response) #10 |
| 182 | + resp = webob.exc.HTTPNotImplemented('Unknown Status: %s' % (status)) #10 |
| 183 | + return resp(env, start_response) |
162 | 184 | else: |
163 | | - return webob.exc.HTTPBadRequest('Regexp failed: "%s"' % (req.path))(env, start_response) #11 |
| 185 | + resp = webob.exc.HTTPBadRequest('Regexp failed: "%s"' % (req.path)) #11 |
| 186 | + return resp(env, start_response) |
164 | 187 | #except: |
165 | 188 | #return webob.exc.HTTPNotFound('Internal error')(env, start_response) |
166 | 189 | |
Index: trunk/extensions/SwiftMedia/SwiftMedia.body.php |
— | — | @@ -133,66 +133,69 @@ |
134 | 134 | /** getThumbnail inherited */ |
135 | 135 | |
136 | 136 | /** |
137 | | - * Transform a media file |
138 | | - * |
139 | | - * @param $params Array: an associative array of handler-specific parameters. |
140 | | - * Typical keys are width, height and page. |
141 | | - * @param $flags Integer: a bitfield, may contain self::RENDER_NOW to force rendering |
142 | | - * @return MediaTransformOutput | false |
| 137 | + * class-specific transform (from an original into a thumb). |
143 | 138 | */ |
144 | | - function transform( $params, $flags = 0 ) { |
145 | | - global $wgUseSquid, $wgIgnoreImageErrors, $wgThumbnailEpoch, $wgServer; |
146 | | - global $wgTmpDirectory; |
| 139 | + function maybeDoTransform( $thumbName, $thumbUrl, $params ) { |
| 140 | + global $wgIgnoreImageErrors, $wgThumbnailEpoch, $wgTmpDirectory; |
147 | 141 | |
148 | | - wfProfileIn( __METHOD__ ); |
149 | | - do { |
150 | | - if ( !$this->canRender() ) { |
151 | | - // not a bitmap or renderable image, don't try. |
152 | | - $thumb = $this->iconThumb(); |
153 | | - break; |
154 | | - } |
| 142 | + // get a temporary place to put the original. |
| 143 | + $thumbPath = tempnam( $wgTmpDirectory, 'transform_out_' ); |
155 | 144 | |
156 | | - // Get the descriptionUrl to embed it as comment into the thumbnail. Bug 19791. |
157 | | - $descriptionUrl = $this->getDescriptionUrl(); |
158 | | - if ( $descriptionUrl ) { |
159 | | - $params['descriptionUrl'] = $wgServer . $descriptionUrl; |
160 | | - } |
| 145 | + if ( $this->repo && $this->repo->canTransformVia404() && !($flags & self::RENDER_NOW ) ) { |
| 146 | + return $this->handler->getTransform( $this, $thumbPath, $thumbUrl, $params ); |
| 147 | + } |
161 | 148 | |
162 | | - // make the thumb name and URL out of the normalized parameters. |
163 | | - // we only use the thumbTemp for a temporary file. |
164 | | - $normalisedParams = $params; |
165 | | - $this->handler->normaliseParams( $this, $normalisedParams ); |
166 | | - $thumbName = $this->thumbName( $normalisedParams ); |
167 | | - $thumbUrl = $this->getThumbUrl( $thumbName ); |
| 149 | + // see if the file exists, and if it exists, is not too old. |
| 150 | + $conn = $this->repo->connect(); |
| 151 | + $container = $this->repo->get_container($conn,$this->repo->container . "%2Fthumb"); |
| 152 | + try { |
| 153 | + $pic = $container->get_object($this->getRel() . "/" . $thumbName); |
| 154 | + } catch (NoSuchObjectException $e) { |
| 155 | + $pic = NULL; |
| 156 | + } |
| 157 | + if ( $pic ) { |
| 158 | + $thumbTime = $pic->last_modified; |
| 159 | + $tm = strptime($thumbTime, "%a, %d %b %Y %H:%M:%S GMT"); |
| 160 | + $thumbGMT = gmmktime($tm["tm_hour"], $tm["tm_min"], $tm["tm_sec"], $tm["tm_mon"]+1, $tm["tm_mday"], $tm["tm_year"] + 1900); |
| 161 | + wfDebug( __METHOD__.": $thumbName is dated $thumbGMT\n" ); |
| 162 | + if ( gmdate( 'YmdHis', $thumbGMT ) >= $wgThumbnailEpoch ) { |
168 | 163 | |
169 | | - // get a temporary place to put the original. |
170 | | - $thumbTemp = tempnam( $wgTmpDirectory, 'transform_out_' ); |
| 164 | + return $this->handler->getTransform( $this, $thumbPath, $thumbUrl, $params ); |
| 165 | + } |
| 166 | + } |
| 167 | + $thumb = $this->handler->doTransform( $this, $thumbPath, $thumbUrl, $params ); |
171 | 168 | |
172 | | - $thumb = $this->handler->doTransform( $this, $thumbTemp, $thumbUrl, $params ); |
| 169 | + // Ignore errors if requested |
| 170 | + if ( !$thumb ) { |
| 171 | + $thumb = null; |
| 172 | + } elseif ( $thumb->isError() ) { |
| 173 | + $this->lastError = $thumb->toText(); |
| 174 | + if ( $wgIgnoreImageErrors && !($flags & self::RENDER_NOW) ) { |
| 175 | + $thumb = $this->handler->getTransform( $this, $thumbPath, $thumbUrl, $params ); |
| 176 | + } |
| 177 | + } |
173 | 178 | |
| 179 | + // what if they didn't actually write out a thumbnail? Check the file size. |
| 180 | + if ( $thumb && filesize($thumbPath)) { |
174 | 181 | // Store the thumbnail into Swift, but in the thumb version of the container. |
175 | | - wfDebug( __METHOD__ . "Creating thumb " . $this->getRel() . "/" . $thumbName . "\n"); |
176 | | - $conn = $this->repo->connect(); |
177 | | - $container = $this->repo->get_container($conn,$this->repo->container . "%2Fthumb"); |
178 | | - $this->repo->write_swift_object( $thumbTemp, $container, $this->getRel() . "/" . $thumbName); |
| 182 | + wfDebug( __METHOD__ . "Creating thumb " . $this->getRel() . "/" . $thumbName . "\n" ); |
| 183 | + $this->repo->write_swift_object( $thumbPath, $container, $this->getRel() . "/" . $thumbName ); |
179 | 184 | // php-cloudfiles throws exceptions, so failure never gets here. |
| 185 | + } |
180 | 186 | |
181 | | - // Clean up temporary data. |
182 | | - unlink($thumbTemp); |
| 187 | + // Clean up temporary data. |
| 188 | + unlink( $thumbPath ); |
183 | 189 | |
184 | | - } while (false); |
185 | | - |
186 | | - wfProfileOut( __METHOD__ ); |
187 | | - return is_object( $thumb ) ? $thumb : false; |
| 190 | + return $thumb; |
188 | 191 | } |
189 | 192 | |
| 193 | + /** transform inherited */ |
190 | 194 | |
191 | 195 | /** |
192 | | - * Fix thumbnail files from 1.4 or before, with extreme prejudice |
193 | | - * Upgrading directly from 1.4 to 1.8/SwiftMedia is not supported. |
| 196 | + * We have nothing to do here. |
194 | 197 | */ |
195 | 198 | function migrateThumbFile( $thumbName ) { |
196 | | - throw new MWException( __METHOD__.": not implemented" ); |
| 199 | + return; |
197 | 200 | } |
198 | 201 | /** |
199 | 202 | * Get the public root directory of the repository. |
— | — | @@ -208,45 +211,44 @@ |
209 | 212 | |
210 | 213 | /** |
211 | 214 | * Get all thumbnail names previously generated for this file |
| 215 | + * @param $archiveName string: the article name for the archived file (if archived). |
| 216 | + * @return a list of files, the first entry of which is the directory name (if applicable). |
212 | 217 | */ |
213 | | - function getThumbnails() { |
| 218 | + function getThumbnails($archiveName = false) { |
214 | 219 | $this->load(); |
215 | 220 | |
216 | | - $prefix = $this->getRel(); |
| 221 | + if ($archiveName) { |
| 222 | + $prefix = $this->getArchiveRel($archiveName); |
| 223 | + } else { |
| 224 | + $prefix = $this->getRel(); |
| 225 | + } |
217 | 226 | $conn = $this->repo->connect(); |
218 | 227 | $container = $this->repo->get_container($conn,$this->repo->container . "%2Fthumb"); |
219 | 228 | $files = $container->list_objects(0, NULL, $prefix); |
| 229 | + array_unshift($files, "unused"); # return an unused $dir. |
220 | 230 | return $files; |
221 | 231 | } |
222 | 232 | |
223 | 233 | /** |
224 | 234 | * Delete cached transformed files |
| 235 | + * @param $dir string If needed for this repo, the directory prefix. |
| 236 | + * @param $files array of strings listing the thumbs to be deleted. |
225 | 237 | */ |
226 | | - function purgeThumbnails() { |
227 | | - global $wgUseSquid, $wgExcludeFromThumbnailPurge; |
| 238 | + function purgeThumbList($dir, $files) { |
| 239 | + global $wgExcludeFromThumbnailPurge; |
228 | 240 | |
229 | | - // Delete thumbnails |
230 | | - $files = $this->getThumbnails(); |
231 | | - $urls = array(); |
232 | | - |
233 | 241 | $conn = $this->repo->connect(); |
234 | 242 | $container = $this->repo->get_container($conn,$this->repo->container . "%2Fthumb"); |
235 | 243 | foreach ( $files as $file ) { |
236 | | - // I have no idea how to implement this given that we don't have paths in Swift |
237 | 244 | // Only remove files not in the $wgExcludeFromThumbnailPurge configuration variable |
238 | | - // $ext = pathinfo( "$dir/$file", PATHINFO_EXTENSION ); |
239 | | - //if ( in_array( $ext, $wgExcludeFromThumbnailPurge ) ) { |
240 | | - // continue; |
241 | | - //} |
| 245 | + $ext = pathinfo( "$file", PATHINFO_EXTENSION ); |
| 246 | + if ( in_array( $ext, $wgExcludeFromThumbnailPurge ) ) { |
| 247 | + continue; |
| 248 | + } |
242 | 249 | |
243 | | - $urls[] = $this->getThumbUrl($file); |
| 250 | + wfDebug( __METHOD__ . " deleting " . $container->name . "/$file\n"); |
244 | 251 | $this->repo->swift_delete($container, $file); |
245 | 252 | } |
246 | | - |
247 | | - // Purge the squid |
248 | | - if ( $wgUseSquid ) { |
249 | | - SquidUpdate::purge( $urls ); |
250 | | - } |
251 | 253 | } |
252 | 254 | |
253 | 255 | } // SwiftFile class |
— | — | @@ -759,7 +761,7 @@ |
760 | 762 | |
761 | 763 | $dbw = $this->getMasterDB(); |
762 | 764 | $status = $this->newGood(); |
763 | | - $storageKeys = array_unique($storageKeys); |
| 765 | + $storageKeys = array_unique( $storageKeys ); |
764 | 766 | foreach ( $storageKeys as $key ) { |
765 | 767 | $hashPath = $this->getDeletedHashPath( $key ); |
766 | 768 | $rel = "$hashPath$key"; |
— | — | @@ -834,6 +836,7 @@ |
835 | 837 | if ( $container === false) { |
836 | 838 | throw new MWException( __METHOD__.": invalid zone: $zone" ); |
837 | 839 | } |
| 840 | + wfDebug( __METHOD__.": Z$zone C$container R$rel\n" ); |
838 | 841 | return array($container, rawurldecode( $rel )); |
839 | 842 | } |
840 | 843 | |