Index: trunk/extensions/SwiftMedia/wmf/rewrite.py |
— | — | @@ -173,47 +173,35 @@ |
174 | 174 | # keep a copy of the original request so we can ask the scalers for it |
175 | 175 | reqorig = req.copy() |
176 | 176 | |
177 | | - # match these two URL forms (source files and thumbnails): |
178 | | - # http://upload.wikimedia.org/<proj>/<lang>/.* |
179 | | - # http://upload.wikimedia.org/<proj>/<lang>/thumb/.* |
180 | | - # example: |
181 | | - # http://upload.wikimedia.org/wikipedia/commons/a/aa/000_Finlanda_harta.PNG |
182 | | - # http://upload.wikimedia.org/wikipedia/commons/thumb/a/aa/000_Finlanda_harta.PNG/75px-000_Finlanda_harta.PNG |
183 | | - # http://upload.wikimedia.org/wikipedia/commons/thumb/archive/b/b6/20101108115418!Gilbert_Stuart_Williamstown_Portrait_of_George_Washington.jpg/100px-Gilbert_Stuart_Williamstown_Portrait_of_George_Washington.jpg |
184 | | - match = re.match(r'/(?P<proj>[^/]+?)/(?P<lang>[^/]+?)/(?P<thumb>thumb/)?(?P<archive>(temp|archive)/)?(?P<shard>[0-9a-f]/[0-9a-f]{2}/)?(?P<path>.*)', req.path) |
| 177 | + # Rewrite URLs of these forms (source, temp, and thumbnail files): |
| 178 | + # (a) http://upload.wikimedia.org/<proj>/<lang>/.* |
| 179 | + # => http://msfe/v1/AUTH_<hash>/<proj>-<lang>-local-public/.* |
| 180 | + # (b) http://upload.wikimedia.org/<proj>/<lang>/archive/.* |
| 181 | + # => http://msfe/v1/AUTH_<hash>/<proj>-<lang>-local-public/archive.* |
| 182 | + # (c) http://upload.wikimedia.org/<proj>/<lang>/thumb/.* |
| 183 | + # => http://msfe/v1/AUTH_<hash>/<proj>-<lang>-local-thumb/.* |
| 184 | + # (d) http://upload.wikimedia.org/<proj>/<lang>/thumb/archive/.* |
| 185 | + # => http://msfe/v1/AUTH_<hash>/<proj>-<lang>-local-thumb/archive/.* |
| 186 | + # (e) http://upload.wikimedia.org/<proj>/<lang>/thumb/temp/.* |
| 187 | + # => http://msfe/v1/AUTH_<hash>/<proj>-<lang>-local-thumb/temp/.* |
| 188 | + # (f) http://upload.wikimedia.org/<proj>/<lang>/temp/.* |
| 189 | + # => http://msfe/v1/AUTH_<hash>/<proj>-<lang>-local-temp/.* |
| 190 | + match = re.match(r'^/(?P<proj>[^/]+)/(?P<lang>[^/]+)/(?P<zone>(thumb|temp)/)?(?P<path>((temp|archive)/)?[0-9a-f]/(?P<shard>[0-9a-f]{2})/.+)$', req.path) |
185 | 191 | if match: |
186 | | - # Our target URL is as follows (example): |
187 | | - # https://alsted.wikimedia.org:8080/v1/AUTH_6790933748e741268babd69804c6298b/wikipedia-en-25/Machinesmith.png |
188 | | - # http://msfe/v1/AUTH_6790933748e741268babd69804c6298b/wikipedia-commons-aa/000_Finlanda_harta.PNG |
189 | | - # http://mfse/v1/AUTH_6790933748e741268babd69804c6298b/wikipedia-commons-thumb-aa/000_Finlanda_harta.PNG/75px-000_Finlanda_harta.PNG |
| 192 | + # Get the repo zone (if not provided that means "public") |
| 193 | + zone = match.group('zone') if match.group('zone') else 'public' |
| 194 | + # Get the object path relative to the zone (and thus container) |
| 195 | + obj = match.group('path') # e.g. "archive/a/ab/..." |
190 | 196 | |
191 | | - # turn slashes in the container name into hyphens |
192 | | - container = "%s-%s" % (match.group('proj'), match.group('lang')) #02 |
193 | | - thumb = match.group('thumb') |
194 | | - arch = match.group('archive') |
195 | | - shard = match.group('shard') |
196 | | - obj = match.group('path') |
197 | | - # include the thumb in the container. |
198 | | - if thumb: #03 |
199 | | - container += "-thumb" |
200 | | - |
201 | | - # only pull out shard if we're supposed to shard this container |
| 197 | + # Get the per-project "conceptual" container name, e.g. "<proj><lang><repo><zone>" |
| 198 | + container = "%s-%s-local-%s" % (match.group('proj'), match.group('lang'), zone) #02/#03 |
| 199 | + # Add 2-digit shard to the container if it is supposed to be sharded. |
| 200 | + # We may thus have an "actual" container name like "<proj><lang><repo><zone>.<shard>" |
202 | 201 | if ( (self.shard_containers == 'all') or \ |
203 | | - ((self.shard_containers == 'some') and (container in self.shard_container_list))): |
204 | | - if shard: |
205 | | - #add only the 2-digit shard to the container name |
206 | | - container += ".%s" % shard[2:4] |
207 | | - if arch: |
208 | | - # for urls that go /wiki/thumb/archive/a/ab/path, the container is wiki-thumb-ab and the obj is archive/path |
209 | | - # aka pull the shard into the container if necessary but the string 'archive' or 'temp' goes into the object. |
210 | | - obj = "%s%s" % (arch, obj) |
| 202 | + ((self.shard_containers == 'some') and (container in self.shard_container_list)) ): |
| 203 | + container += ".%s" % match.group('shard') |
211 | 204 | |
212 | | - if not obj: |
213 | | - # don't let them list the contents of the container (it's CRAZY huge) #08 |
214 | | - resp = webob.exc.HTTPForbidden('No container listing') |
215 | | - return resp(env, start_response) |
216 | | - |
217 | | - # save a url with just the account name in it. |
| 205 | + # Save a url with just the account name in it. |
218 | 206 | req.path_info = "/v1/%s" % (self.account) |
219 | 207 | port = self.bind_port |
220 | 208 | req.host = '127.0.0.1:%s' % port |
— | — | @@ -248,8 +236,6 @@ |
249 | 237 | else: |
250 | 238 | resp = webob.exc.HTTPBadRequest('Regexp failed: "%s"' % (req.path)) #11 |
251 | 239 | return resp(env, start_response) |
252 | | - #except: |
253 | | - #return webob.exc.HTTPNotFound('Internal error')(env, start_response) |
254 | 240 | |
255 | 241 | def filter_factory(global_conf, **local_conf): |
256 | 242 | conf = global_conf.copy() |