Index: trunk/phase3/includes/filerepo/backend/FSFileBackend.php |
— | — | @@ -7,6 +7,10 @@ |
8 | 8 | |
9 | 9 | /** |
10 | 10 | * Class for a file system based file backend. |
| 11 | + * Containers are just directories and container sharding is not supported. |
| 12 | + * Also, for backwards-compatibility, the wiki ID prefix is not used. |
| 13 | + * Users of this class should set wiki-specific container paths as needed. |
| 14 | + * |
11 | 15 | * Status messages should avoid mentioning the internal FS paths. |
12 | 16 | * Likewise, error suppression should be used to avoid path disclosure. |
13 | 17 | * |
— | — | @@ -23,7 +27,7 @@ |
24 | 28 | * containerPaths : Map of container names to absolute file system paths |
25 | 29 | * fileMode : Octal UNIX file permissions to use on files stored |
26 | 30 | */ |
27 | | - function __construct( array $config ) { |
| 31 | + public function __construct( array $config ) { |
28 | 32 | parent::__construct( $config ); |
29 | 33 | $this->containerPaths = (array)$config['containerPaths']; |
30 | 34 | foreach ( $this->containerPaths as &$path ) { |
— | — | @@ -53,7 +57,7 @@ |
54 | 58 | protected function doStoreInternal( array $params ) { |
55 | 59 | $status = Status::newGood(); |
56 | 60 | |
57 | | - list( $c, $dest ) = $this->resolveStoragePath( $params['dst'] ); |
| 61 | + list( $c, $dest ) = $this->resolveStoragePathReal( $params['dst'] ); |
58 | 62 | if ( $dest === null ) { |
59 | 63 | $status->fatal( 'backend-fail-invalidpath', $params['dst'] ); |
60 | 64 | return $status; |
— | — | @@ -97,13 +101,13 @@ |
98 | 102 | protected function doCopyInternal( array $params ) { |
99 | 103 | $status = Status::newGood(); |
100 | 104 | |
101 | | - list( $c, $source ) = $this->resolveStoragePath( $params['src'] ); |
| 105 | + list( $c, $source ) = $this->resolveStoragePathReal( $params['src'] ); |
102 | 106 | if ( $source === null ) { |
103 | 107 | $status->fatal( 'backend-fail-invalidpath', $params['src'] ); |
104 | 108 | return $status; |
105 | 109 | } |
106 | 110 | |
107 | | - list( $c, $dest ) = $this->resolveStoragePath( $params['dst'] ); |
| 111 | + list( $c, $dest ) = $this->resolveStoragePathReal( $params['dst'] ); |
108 | 112 | if ( $dest === null ) { |
109 | 113 | $status->fatal( 'backend-fail-invalidpath', $params['dst'] ); |
110 | 114 | return $status; |
— | — | @@ -148,12 +152,12 @@ |
149 | 153 | protected function doMoveInternal( array $params ) { |
150 | 154 | $status = Status::newGood(); |
151 | 155 | |
152 | | - list( $c, $source ) = $this->resolveStoragePath( $params['src'] ); |
| 156 | + list( $c, $source ) = $this->resolveStoragePathReal( $params['src'] ); |
153 | 157 | if ( $source === null ) { |
154 | 158 | $status->fatal( 'backend-fail-invalidpath', $params['src'] ); |
155 | 159 | return $status; |
156 | 160 | } |
157 | | - list( $c, $dest ) = $this->resolveStoragePath( $params['dst'] ); |
| 161 | + list( $c, $dest ) = $this->resolveStoragePathReal( $params['dst'] ); |
158 | 162 | if ( $dest === null ) { |
159 | 163 | $status->fatal( 'backend-fail-invalidpath', $params['dst'] ); |
160 | 164 | return $status; |
— | — | @@ -200,7 +204,7 @@ |
201 | 205 | protected function doDeleteInternal( array $params ) { |
202 | 206 | $status = Status::newGood(); |
203 | 207 | |
204 | | - list( $c, $source ) = $this->resolveStoragePath( $params['src'] ); |
| 208 | + list( $c, $source ) = $this->resolveStoragePathReal( $params['src'] ); |
205 | 209 | if ( $source === null ) { |
206 | 210 | $status->fatal( 'backend-fail-invalidpath', $params['src'] ); |
207 | 211 | return $status; |
— | — | @@ -230,7 +234,7 @@ |
231 | 235 | protected function doCreateInternal( array $params ) { |
232 | 236 | $status = Status::newGood(); |
233 | 237 | |
234 | | - list( $c, $dest ) = $this->resolveStoragePath( $params['dst'] ); |
| 238 | + list( $c, $dest ) = $this->resolveStoragePathReal( $params['dst'] ); |
235 | 239 | if ( $dest === null ) { |
236 | 240 | $status->fatal( 'backend-fail-invalidpath', $params['dst'] ); |
237 | 241 | return $status; |
— | — | @@ -270,59 +274,49 @@ |
271 | 275 | } |
272 | 276 | |
273 | 277 | /** |
274 | | - * @see FileBackend::prepare() |
| 278 | + * @see FileBackend::doPrepare() |
275 | 279 | */ |
276 | | - function prepare( array $params ) { |
| 280 | + protected function doPrepare( $container, $dir, array $params ) { |
277 | 281 | $status = Status::newGood(); |
278 | | - list( $c, $dir ) = $this->resolveStoragePath( $params['dir'] ); |
279 | | - if ( $dir === null ) { |
280 | | - $status->fatal( 'backend-fail-invalidpath', $params['dir'] ); |
281 | | - return $status; // invalid storage path |
282 | | - } |
283 | 282 | if ( !wfMkdirParents( $dir ) ) { |
284 | 283 | $status->fatal( 'directorycreateerror', $params['dir'] ); |
285 | | - return $status; |
286 | 284 | } elseif ( !is_writable( $dir ) ) { |
287 | 285 | $status->fatal( 'directoryreadonlyerror', $params['dir'] ); |
288 | | - return $status; |
289 | 286 | } elseif ( !is_readable( $dir ) ) { |
290 | 287 | $status->fatal( 'directorynotreadableerror', $params['dir'] ); |
291 | | - return $status; |
292 | 288 | } |
293 | 289 | return $status; |
294 | 290 | } |
295 | 291 | |
296 | 292 | /** |
297 | | - * @see FileBackend::secure() |
| 293 | + * @see FileBackend::doSecure() |
298 | 294 | */ |
299 | | - function secure( array $params ) { |
| 295 | + protected function doSecure( $container, $dir, array $params ) { |
300 | 296 | $status = Status::newGood(); |
301 | | - list( $c, $dir ) = $this->resolveStoragePath( $params['dir'] ); |
302 | | - if ( $dir === null ) { |
303 | | - $status->fatal( 'backend-fail-invalidpath', $params['dir'] ); |
304 | | - return $status; // invalid storage path |
305 | | - } |
306 | 297 | if ( !wfMkdirParents( $dir ) ) { |
307 | 298 | $status->fatal( 'directorycreateerror', $params['dir'] ); |
308 | 299 | return $status; |
309 | 300 | } |
310 | | - // Add a .htaccess file to the root of the deleted zone |
311 | | - if ( !empty( $params['noAccess'] ) && !file_exists( "{$dir}/.htaccess" ) ) { |
| 301 | + // Seed new directories with a blank index.html, to prevent crawling... |
| 302 | + if ( !empty( $params['noListing'] ) && !file_exists( "{$dir}/index.html" ) ) { |
312 | 303 | wfSuppressWarnings(); |
313 | | - $ok = file_put_contents( "{$dir}/.htaccess", "Deny from all\n" ); |
| 304 | + $ok = file_put_contents( "{$dir}/index.html", '' ); |
314 | 305 | wfRestoreWarnings(); |
315 | 306 | if ( !$ok ) { |
316 | | - $status->fatal( 'backend-fail-create', $params['dir'] . '/.htaccess' ); |
| 307 | + $status->fatal( 'backend-fail-create', $params['dir'] . '/index.html' ); |
317 | 308 | return $status; |
318 | 309 | } |
319 | 310 | } |
320 | | - // Seed new directories with a blank index.html, to prevent crawling |
321 | | - if ( !empty( $params['noListing'] ) && !file_exists( "{$dir}/index.html" ) ) { |
| 311 | + // Add a .htaccess file to the root of the container... |
| 312 | + list( $b, $container, $r ) = FileBackend::splitStoragePath( $params['dir'] ); |
| 313 | + $dirRoot = $this->containerPaths[$container]; // real path |
| 314 | + if ( !empty( $params['noAccess'] ) && !file_exists( "{$dirRoot}/.htaccess" ) ) { |
322 | 315 | wfSuppressWarnings(); |
323 | | - $ok = file_put_contents( "{$dir}/index.html", '' ); |
| 316 | + $ok = file_put_contents( "{$dirRoot}/.htaccess", "Deny from all\n" ); |
324 | 317 | wfRestoreWarnings(); |
325 | 318 | if ( !$ok ) { |
326 | | - $status->fatal( 'backend-fail-create', $params['dir'] . '/index.html' ); |
| 319 | + $storeDir = "mwstore://{$this->name}/{$container}"; |
| 320 | + $status->fatal( 'backend-fail-create', "$storeDir/.htaccess" ); |
327 | 321 | return $status; |
328 | 322 | } |
329 | 323 | } |
— | — | @@ -330,15 +324,10 @@ |
331 | 325 | } |
332 | 326 | |
333 | 327 | /** |
334 | | - * @see FileBackend::clean() |
| 328 | + * @see FileBackend::doClean() |
335 | 329 | */ |
336 | | - function clean( array $params ) { |
| 330 | + protected function doClean( $container, $dir, array $params ) { |
337 | 331 | $status = Status::newGood(); |
338 | | - list( $c, $dir ) = $this->resolveStoragePath( $params['dir'] ); |
339 | | - if ( $dir === null ) { |
340 | | - $status->fatal( 'backend-fail-invalidpath', $params['dir'] ); |
341 | | - return $status; // invalid storage path |
342 | | - } |
343 | 332 | wfSuppressWarnings(); |
344 | 333 | if ( is_dir( $dir ) ) { |
345 | 334 | rmdir( $dir ); // remove directory if empty |
— | — | @@ -350,8 +339,8 @@ |
351 | 340 | /** |
352 | 341 | * @see FileBackend::fileExists() |
353 | 342 | */ |
354 | | - function fileExists( array $params ) { |
355 | | - list( $c, $source ) = $this->resolveStoragePath( $params['src'] ); |
| 343 | + public function fileExists( array $params ) { |
| 344 | + list( $c, $source ) = $this->resolveStoragePathReal( $params['src'] ); |
356 | 345 | if ( $source === null ) { |
357 | 346 | return false; // invalid storage path |
358 | 347 | } |
— | — | @@ -364,8 +353,8 @@ |
365 | 354 | /** |
366 | 355 | * @see FileBackend::getFileTimestamp() |
367 | 356 | */ |
368 | | - function getFileTimestamp( array $params ) { |
369 | | - list( $c, $source ) = $this->resolveStoragePath( $params['src'] ); |
| 357 | + public function getFileTimestamp( array $params ) { |
| 358 | + list( $c, $source ) = $this->resolveStoragePathReal( $params['src'] ); |
370 | 359 | if ( $source === null ) { |
371 | 360 | return false; // invalid storage path |
372 | 361 | } |
— | — | @@ -374,13 +363,9 @@ |
375 | 364 | } |
376 | 365 | |
377 | 366 | /** |
378 | | - * @see FileBackend::getFileList() |
| 367 | + * @see FileBackend::getFileListInternal() |
379 | 368 | */ |
380 | | - function getFileList( array $params ) { |
381 | | - list( $c, $dir ) = $this->resolveStoragePath( $params['dir'] ); |
382 | | - if ( $dir === null ) { // invalid storage path |
383 | | - return null; |
384 | | - } |
| 369 | + public function getFileListInternal( $container, $dir, array $params ) { |
385 | 370 | wfSuppressWarnings(); |
386 | 371 | $exists = is_dir( $dir ); |
387 | 372 | wfRestoreWarnings(); |
— | — | @@ -399,8 +384,8 @@ |
400 | 385 | /** |
401 | 386 | * @see FileBackend::getLocalReference() |
402 | 387 | */ |
403 | | - function getLocalReference( array $params ) { |
404 | | - list( $c, $source ) = $this->resolveStoragePath( $params['src'] ); |
| 388 | + public function getLocalReference( array $params ) { |
| 389 | + list( $c, $source ) = $this->resolveStoragePathReal( $params['src'] ); |
405 | 390 | if ( $source === null ) { |
406 | 391 | return null; |
407 | 392 | } |
— | — | @@ -410,16 +395,14 @@ |
411 | 396 | /** |
412 | 397 | * @see FileBackend::getLocalCopy() |
413 | 398 | */ |
414 | | - function getLocalCopy( array $params ) { |
415 | | - list( $c, $source ) = $this->resolveStoragePath( $params['src'] ); |
| 399 | + public function getLocalCopy( array $params ) { |
| 400 | + list( $c, $source ) = $this->resolveStoragePathReal( $params['src'] ); |
416 | 401 | if ( $source === null ) { |
417 | 402 | return null; |
418 | 403 | } |
419 | 404 | |
420 | | - // Get source file extension |
421 | | - $i = strrpos( $source, '.' ); |
422 | | - $ext = strtolower( $i ? substr( $source, $i + 1 ) : '' ); |
423 | | - // Create a new temporary file... |
| 405 | + // Create a new temporary file with the same extension... |
| 406 | + $ext = FileBackend::extensionFromPath( $params['src'] ); |
424 | 407 | $tmpFile = TempFSFile::factory( wfBaseName( $source ) . '_', $ext ); |
425 | 408 | if ( !$tmpFile ) { |
426 | 409 | return null; |
Index: trunk/phase3/includes/filerepo/backend/FileBackend.php |
— | — | @@ -475,6 +475,8 @@ |
476 | 476 | /** @var Array */ |
477 | 477 | protected $cache = array(); // (storage path => key => value) |
478 | 478 | protected $maxCacheSize = 50; // integer; max paths with entries |
| 479 | + /** @var Array */ |
| 480 | + protected $shardViaHashLevels = array(); // (container name => integer) |
479 | 481 | |
480 | 482 | /** |
481 | 483 | * Create a file in the backend with the given contents. |
— | — | @@ -600,7 +602,7 @@ |
601 | 603 | * Do not call this function from places outside FileBackend and FileOp. |
602 | 604 | * $params include: |
603 | 605 | * srcs : ordered source storage paths (e.g. chunk1, chunk2, ...) |
604 | | - * dst : destination storage path |
| 606 | + * dst : file system path to 0-byte temp file |
605 | 607 | * overwriteDest : overwrite any file that exists at the destination |
606 | 608 | * |
607 | 609 | * @param $params Array |
— | — | @@ -608,7 +610,6 @@ |
609 | 611 | */ |
610 | 612 | final public function concatenateInternal( array $params ) { |
611 | 613 | $status = $this->doConcatenateInternal( $params ); |
612 | | - $this->clearCache( array( $params['dst'] ) ); |
613 | 614 | return $status; |
614 | 615 | } |
615 | 616 | |
— | — | @@ -668,21 +669,87 @@ |
669 | 670 | /** |
670 | 671 | * @see FileBackendBase::prepare() |
671 | 672 | */ |
672 | | - public function prepare( array $params ) { |
| 673 | + final public function prepare( array $params ) { |
| 674 | + $status = Status::newGood(); |
| 675 | + list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] ); |
| 676 | + if ( $dir === null ) { |
| 677 | + $status->fatal( 'backend-fail-invalidpath', $params['dir'] ); |
| 678 | + return $status; // invalid storage path |
| 679 | + } |
| 680 | + if ( $shard !== null ) { // confined to a single container/shard |
| 681 | + $status->merge( $this->doPrepare( $fullCont, $dir, $params ) ); |
| 682 | + } else { // directory is on several shards |
| 683 | + wfDebug( __METHOD__ . ": iterating over all container shards.\n" ); |
| 684 | + list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] ); |
| 685 | + foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) { |
| 686 | + $status->merge( $this->doPrepare( "{$fullCont}{$suffix}", $dir, $params ) ); |
| 687 | + } |
| 688 | + } |
| 689 | + return $status; |
| 690 | + } |
| 691 | + |
| 692 | + /** |
| 693 | + * @see FileBackend::prepare() |
| 694 | + */ |
| 695 | + protected function doPrepare( $container, $dir, array $params ) { |
673 | 696 | return Status::newGood(); |
674 | 697 | } |
675 | 698 | |
676 | 699 | /** |
677 | 700 | * @see FileBackendBase::secure() |
678 | 701 | */ |
679 | | - public function secure( array $params ) { |
| 702 | + final public function secure( array $params ) { |
| 703 | + $status = Status::newGood(); |
| 704 | + list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] ); |
| 705 | + if ( $dir === null ) { |
| 706 | + $status->fatal( 'backend-fail-invalidpath', $params['dir'] ); |
| 707 | + return $status; // invalid storage path |
| 708 | + } |
| 709 | + if ( $shard !== null ) { // confined to a single container/shard |
| 710 | + $status->merge( $this->doSecure( $fullCont, $dir, $params ) ); |
| 711 | + } else { // directory is on several shards |
| 712 | + wfDebug( __METHOD__ . ": iterating over all container shards.\n" ); |
| 713 | + list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] ); |
| 714 | + foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) { |
| 715 | + $status->merge( $this->doSecure( "{$fullCont}{$suffix}", $dir, $params ) ); |
| 716 | + } |
| 717 | + } |
| 718 | + return $status; |
| 719 | + } |
| 720 | + |
| 721 | + /** |
| 722 | + * @see FileBackend::secure() |
| 723 | + */ |
| 724 | + protected function doSecure( $container, $dir, array $params ) { |
680 | 725 | return Status::newGood(); |
681 | 726 | } |
682 | 727 | |
683 | 728 | /** |
684 | 729 | * @see FileBackendBase::clean() |
685 | 730 | */ |
686 | | - public function clean( array $params ) { |
| 731 | + final public function clean( array $params ) { |
| 732 | + $status = Status::newGood(); |
| 733 | + list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] ); |
| 734 | + if ( $dir === null ) { |
| 735 | + $status->fatal( 'backend-fail-invalidpath', $params['dir'] ); |
| 736 | + return $status; // invalid storage path |
| 737 | + } |
| 738 | + if ( $shard !== null ) { // confined to a single container/shard |
| 739 | + $status->merge( $this->doClean( $fullCont, $dir, $params ) ); |
| 740 | + } else { // directory is on several shards |
| 741 | + wfDebug( __METHOD__ . ": iterating over all container shards.\n" ); |
| 742 | + list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] ); |
| 743 | + foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) { |
| 744 | + $status->merge( $this->doClean( "{$fullCont}{$suffix}", $dir, $params ) ); |
| 745 | + } |
| 746 | + } |
| 747 | + return $status; |
| 748 | + } |
| 749 | + |
| 750 | + /** |
| 751 | + * @see FileBackend::clean() |
| 752 | + */ |
| 753 | + protected function doClean( $container, $dir, array $params ) { |
687 | 754 | return Status::newGood(); |
688 | 755 | } |
689 | 756 | |
— | — | @@ -752,6 +819,36 @@ |
753 | 820 | } |
754 | 821 | |
755 | 822 | /** |
| 823 | + * @see FileBackendBase::getFileList() |
| 824 | + */ |
| 825 | + final public function getFileList( array $params ) { |
| 826 | + list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] ); |
| 827 | + if ( $dir === null ) { // invalid storage path |
| 828 | + return null; |
| 829 | + } |
| 830 | + if ( $shard !== null ) { |
| 831 | + // File listing is confined to a single container/shard |
| 832 | + return $this->getFileListInternal( $fullCont, $dir, $params ); |
| 833 | + } else { |
| 834 | + wfDebug( __METHOD__ . ": iterating over all container shards.\n" ); |
| 835 | + // File listing spans multiple containers/shards |
| 836 | + list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] ); |
| 837 | + return new ContainerShardListIterator( $this, |
| 838 | + $fullCont, $this->getContainerSuffixes( $shortCont ), $params ); |
| 839 | + } |
| 840 | + } |
| 841 | + |
| 842 | + /** |
| 843 | + * Do not call this function from places outside FileBackend and ContainerFileListIterator |
| 844 | + * |
| 845 | + * @param $container string Resolved container name |
| 846 | + * @param $dir string Resolved path relative to container |
| 847 | + * @param $params Array |
| 848 | + * @see FileBackend::getFileList() |
| 849 | + */ |
| 850 | + abstract public function getFileListInternal( $container, $dir, array $params ); |
| 851 | + |
| 852 | + /** |
756 | 853 | * Get the list of supported operations and their corresponding FileOp classes. |
757 | 854 | * |
758 | 855 | * @return Array |
— | — | @@ -906,7 +1003,8 @@ |
907 | 1004 | // This accounts for Swift and S3 restrictions. Also note |
908 | 1005 | // that these urlencode to the same string, which is useful |
909 | 1006 | // since the Swift size limit is *after* URL encoding. |
910 | | - return preg_match( '/^[a-zA-Z0-9._-]{1,256}$/u', $container ); |
| 1007 | + // Limit to 200 to leave room for '.shard-XX' or '.segment'. |
| 1008 | + return preg_match( '/^[a-zA-Z0-9._-]{1,200}$/u', $container ); |
911 | 1009 | } |
912 | 1010 | |
913 | 1011 | /** |
— | — | @@ -937,34 +1035,123 @@ |
938 | 1036 | } |
939 | 1037 | |
940 | 1038 | /** |
941 | | - * Split a storage path (e.g. "mwstore://backend/container/path/to/object") |
942 | | - * into an internal container name and an internal relative object name. |
943 | | - * This also checks that the storage path is valid and is within this backend. |
| 1039 | + * Splits a storage path into an internal container name, |
| 1040 | + * an internal relative object name, and a container shard suffix. |
| 1041 | + * Any shard suffix is already appended to the internal container name. |
| 1042 | + * This also checks that the storage path is valid and within this backend. |
944 | 1043 | * |
| 1044 | + * If the container is sharded but a suffix could not be determined, |
| 1045 | + * this means that the path can only refer to a directory and can only |
| 1046 | + * be scanned by looking in all the container shards. |
| 1047 | + * |
945 | 1048 | * @param $storagePath string |
946 | | - * @return Array (container, object name) or (null, null) if path is invalid |
| 1049 | + * @return Array (container, path, container suffix) or (null, null, null) if invalid |
947 | 1050 | */ |
948 | 1051 | final protected function resolveStoragePath( $storagePath ) { |
949 | 1052 | list( $backend, $container, $relPath ) = self::splitStoragePath( $storagePath ); |
950 | 1053 | if ( $backend === $this->name ) { // must be for this backend |
951 | 1054 | $relPath = self::normalizeStoragePath( $relPath ); |
952 | 1055 | if ( $relPath !== null ) { |
| 1056 | + // Get shard for the normalized path if this container is sharded |
| 1057 | + $cShard = $this->getContainerShard( $container, $relPath ); |
| 1058 | + // Validate and sanitize the relative path (backend-specific) |
953 | 1059 | $relPath = $this->resolveContainerPath( $container, $relPath ); |
954 | 1060 | if ( $relPath !== null ) { |
| 1061 | + // Prepend any wiki ID prefix to the container name |
955 | 1062 | $container = $this->fullContainerName( $container ); |
956 | 1063 | if ( self::isValidContainerName( $container ) ) { |
957 | | - $container = $this->resolveContainerName( $container ); |
| 1064 | + // Validate and sanitize the container name (backend-specific) |
| 1065 | + $container = $this->resolveContainerName( "{$container}{$cShard}" ); |
958 | 1066 | if ( $container !== null ) { |
959 | | - return array( $container, $relPath ); |
| 1067 | + return array( $container, $relPath, $cShard ); |
960 | 1068 | } |
961 | 1069 | } |
962 | 1070 | } |
963 | 1071 | } |
964 | 1072 | } |
| 1073 | + return array( null, null, null ); |
| 1074 | + } |
| 1075 | + |
| 1076 | + /** |
| 1077 | + * Like resolveStoragePath() except null values are returned if |
| 1078 | + * the container is sharded and the shard could not be determined. |
| 1079 | + * |
| 1080 | + * @see FileBackend::resolveStoragePath() |
| 1081 | + * |
| 1082 | + * @param $storagePath string |
| 1083 | + * @return Array (container, path) or (null, null) if invalid |
| 1084 | + */ |
| 1085 | + final protected function resolveStoragePathReal( $storagePath ) { |
| 1086 | + list( $container, $relPath, $cShard ) = $this->resolveStoragePath( $storagePath ); |
| 1087 | + if ( $cShard !== null ) { |
| 1088 | + return array( $container, $relPath ); |
| 1089 | + } |
965 | 1090 | return array( null, null ); |
966 | 1091 | } |
967 | 1092 | |
968 | 1093 | /** |
| 1094 | + * Get the container name shard suffix for a given path. |
| 1095 | + * Any empty suffix means the container is not sharded. |
| 1096 | + * |
| 1097 | + * @param $container string Container name |
| 1098 | + * @param $relStoragePath string Storage path relative to the container |
| 1099 | + * @return string|null Returns null if shard could not be determined |
| 1100 | + */ |
| 1101 | + final protected function getContainerShard( $container, $relPath ) { |
| 1102 | + $hashLevels = $this->getContainerHashLevels( $container ); |
| 1103 | + if ( $hashLevels === 1 ) { // 16 shards per container |
| 1104 | + $hashDirRegex = '(?P<shard>[0-9a-f])'; |
| 1105 | + } elseif ( $hashLevels === 2 ) { // 256 shards per container |
| 1106 | + $hashDirRegex = '[0-9a-f]/(?P<shard>[0-9a-f]{2})'; |
| 1107 | + } else { |
| 1108 | + return ''; // no sharding |
| 1109 | + } |
| 1110 | + // Allow certain directories to be above the hash dirs so as |
| 1111 | + // to work with FileRepo (e.g. "archive/a/ab" or "temp/a/ab"). |
| 1112 | + // They must be 2+ chars to avoid any hash directory ambiguity. |
| 1113 | + if ( preg_match( "!^(?:[^/]{2,}/)*$hashDirRegex(?:/|$)!", $relPath, $m ) ) { |
| 1114 | + return '.shard-' . str_pad( $m['shard'], $hashLevels, '0', STR_PAD_LEFT ); |
| 1115 | + } |
| 1116 | + return null; // failed to match |
| 1117 | + } |
| 1118 | + |
| 1119 | + /** |
| 1120 | + * Get the number of hash levels for a container. |
| 1121 | + * If greater than 0, then all file storage paths within |
| 1122 | + * the container are required to be hashed accordingly. |
| 1123 | + * |
| 1124 | + * @param $container string |
| 1125 | + * @return integer |
| 1126 | + */ |
| 1127 | + final protected function getContainerHashLevels( $container ) { |
| 1128 | + if ( isset( $this->shardViaHashLevels[$container] ) ) { |
| 1129 | + $hashLevels = (int)$this->shardViaHashLevels[$container]; |
| 1130 | + if ( $hashLevels >= 0 && $hashLevels <= 2 ) { |
| 1131 | + return $hashLevels; |
| 1132 | + } |
| 1133 | + } |
| 1134 | + return 0; // no sharding |
| 1135 | + } |
| 1136 | + |
| 1137 | + /** |
| 1138 | + * Get a list of full container shard suffixes for a container |
| 1139 | + * |
| 1140 | + * @param $container string |
| 1141 | + * @return Array |
| 1142 | + */ |
| 1143 | + final protected function getContainerSuffixes( $container ) { |
| 1144 | + $shards = array(); |
| 1145 | + $digits = $this->getContainerHashLevels( $container ); |
| 1146 | + if ( $digits > 0 ) { |
| 1147 | + $numShards = 1 << ( $digits * 4 ); |
| 1148 | + for ( $index = 0; $index < $numShards; $index++ ) { |
| 1149 | + $shards[] = '.shard-' . str_pad( dechex( $index ), $digits, '0', STR_PAD_LEFT ); |
| 1150 | + } |
| 1151 | + } |
| 1152 | + return $shards; |
| 1153 | + } |
| 1154 | + |
| 1155 | + /** |
969 | 1156 | * Get the full container name, including the wiki ID prefix |
970 | 1157 | * |
971 | 1158 | * @param $container string |
— | — | @@ -996,8 +1183,8 @@ |
997 | 1184 | * getting absolute paths (e.g. FS based backends). Note that the relative path |
998 | 1185 | * may be the empty string (e.g. the path is simply to the container). |
999 | 1186 | * |
1000 | | - * @param $container string Container the path is relative to |
1001 | | - * @param $relStoragePath string Relative storage path |
| 1187 | + * @param $container string Container name |
| 1188 | + * @param $relStoragePath string Storage path relative to the container |
1002 | 1189 | * @return string|null Path or null if not valid |
1003 | 1190 | */ |
1004 | 1191 | protected function resolveContainerPath( $container, $relStoragePath ) { |
— | — | @@ -1015,3 +1202,102 @@ |
1016 | 1203 | return strtolower( $i ? substr( $path, $i + 1 ) : '' ); |
1017 | 1204 | } |
1018 | 1205 | } |
| 1206 | + |
| 1207 | +/** |
| 1208 | + * FileBackend helper function to handle file listings that span container shards. |
| 1209 | + * Do not use this class from places outside of FileBackend. |
| 1210 | + * |
| 1211 | + * @ingroup FileBackend |
| 1212 | + */ |
| 1213 | +class ContainerShardListIterator implements Iterator { |
| 1214 | + /* @var FileBackend */ |
| 1215 | + protected $backend; |
| 1216 | + /* @var Array */ |
| 1217 | + protected $params; |
| 1218 | + /* @var Array */ |
| 1219 | + protected $shardSuffixes; |
| 1220 | + protected $container; // string |
| 1221 | + protected $directory; // string |
| 1222 | + |
| 1223 | + /* @var Traversable */ |
| 1224 | + protected $iter; |
| 1225 | + protected $curShard = 0; // integer |
| 1226 | + protected $pos = 0; // integer |
| 1227 | + |
| 1228 | + /** |
| 1229 | + * @param $backend FileBackend |
| 1230 | + * @param $container string Full storage container name |
| 1231 | + * @param $dir string Storage directory relative to container |
| 1232 | + * @param $suffixes Array List of container shard suffixes |
| 1233 | + * @param $params Array |
| 1234 | + */ |
| 1235 | + public function __construct( |
| 1236 | + FileBackend $backend, $container, $dir, array $suffixes, array $params |
| 1237 | + ) { |
| 1238 | + $this->backend = $backend; |
| 1239 | + $this->container = $container; |
| 1240 | + $this->directory = $dir; |
| 1241 | + $this->shardSuffixes = $suffixes; |
| 1242 | + $this->params = $params; |
| 1243 | + } |
| 1244 | + |
| 1245 | + public function current() { |
| 1246 | + if ( is_array( $this->iter ) ) { |
| 1247 | + return current( $this->iter ); |
| 1248 | + } else { |
| 1249 | + return $this->iter->current(); |
| 1250 | + } |
| 1251 | + } |
| 1252 | + |
| 1253 | + public function key() { |
| 1254 | + return $this->pos; |
| 1255 | + } |
| 1256 | + |
| 1257 | + public function next() { |
| 1258 | + ++$this->pos; |
| 1259 | + if ( is_array( $this->iter ) ) { |
| 1260 | + next( $this->iter ); |
| 1261 | + } else { |
| 1262 | + $this->iter->next(); |
| 1263 | + } |
| 1264 | + // Find the next non-empty shard if no elements are left |
| 1265 | + $this->nextShardIteratorIfNotValid(); |
| 1266 | + } |
| 1267 | + |
| 1268 | + /** |
| 1269 | + * If the iterator for this container shard is out of items, |
| 1270 | + * then move on to the next container that has items. |
| 1271 | + */ |
| 1272 | + protected function nextShardIteratorIfNotValid() { |
| 1273 | + while ( !$this->valid() ) { |
| 1274 | + if ( ++$this->curShard >= count( $this->shardSuffixes ) ) { |
| 1275 | + break; // no more container shards |
| 1276 | + } |
| 1277 | + $this->setIteratorFromCurrentShard(); |
| 1278 | + } |
| 1279 | + } |
| 1280 | + |
| 1281 | + protected function setIteratorFromCurrentShard() { |
| 1282 | + $suffix = $this->shardSuffixes[$this->curShard]; |
| 1283 | + $this->iter = $this->backend->getFileListInternal( |
| 1284 | + "{$this->container}{$suffix}", $this->directory, $this->params ); |
| 1285 | + } |
| 1286 | + |
| 1287 | + public function rewind() { |
| 1288 | + $this->pos = 0; |
| 1289 | + $this->curShard = 0; |
| 1290 | + $this->setIteratorFromCurrentShard(); |
| 1291 | + // Find the next non-empty shard if this one has no elements |
| 1292 | + $this->nextShardIteratorIfNotValid(); |
| 1293 | + } |
| 1294 | + |
| 1295 | + public function valid() { |
| 1296 | + if ( $this->iter == null ) { |
| 1297 | + return false; // some failure? |
| 1298 | + } elseif ( is_array( $this->iter ) ) { |
| 1299 | + return ( current( $this->iter ) !== false ); // no paths can have this value |
| 1300 | + } else { |
| 1301 | + return $this->iter->valid(); |
| 1302 | + } |
| 1303 | + } |
| 1304 | +} |