Index: trunk/phase3/includes/filerepo/backend/SwiftFileBackend.php |
— | — | @@ -39,7 +39,11 @@ |
40 | 40 | * swiftKey : Swift authentication key for the above user |
41 | 41 | * swiftAuthTTL : Swift authentication TTL (seconds) |
42 | 42 | * swiftAnonUser : Swift user used for end-user requests (account:username) |
43 | | - * shardViaHashLevels : Map of container names to the number of hash levels |
| 43 | + * shardViaHashLevels : Map of container names to sharding config with: |
| 44 | + * 'base' : base of hash characters, 16 or 36 |
| 45 | + * 'levels' : the number of hash levels (and digits) |
| 46 | + * 'repeat' : hash subdirectories are prefixed with all the |
| 47 | + * parent hash directory names (e.g. "a/ab/abc") |
44 | 48 | */ |
45 | 49 | public function __construct( array $config ) { |
46 | 50 | parent::__construct( $config ); |
Index: trunk/phase3/includes/filerepo/backend/FileBackend.php |
— | — | @@ -696,8 +696,8 @@ |
697 | 697 | protected $expCache = array(); // (storage path => key => value) |
698 | 698 | protected $maxExpCacheSize = 10; // integer; max paths with entries |
699 | 699 | |
700 | | - /** @var Array */ |
701 | | - protected $shardViaHashLevels = array(); // (container name => integer) |
| 700 | + /** @var Array Map of container names to sharding settings */ |
| 701 | + protected $shardViaHashLevels = array(); // (container name => config array) |
702 | 702 | |
703 | 703 | protected $maxFileSize = 1000000000; // integer bytes (1GB) |
704 | 704 | |
— | — | @@ -1492,40 +1492,53 @@ |
1493 | 1493 | * @return string|null Returns null if shard could not be determined |
1494 | 1494 | */ |
1495 | 1495 | final protected function getContainerShard( $container, $relPath ) { |
1496 | | - $hashLevels = $this->getContainerHashLevels( $container ); |
1497 | | - if ( $hashLevels === 1 ) { // 16 shards per container |
1498 | | - $hashDirRegex = '(?P<shard>[0-9a-f])'; |
1499 | | - } elseif ( $hashLevels === 2 ) { // 256 shards per container |
1500 | | - $hashDirRegex = '[0-9a-f]/(?P<shard>[0-9a-f]{2})'; |
1501 | | - } else { |
1502 | | - return ''; // no sharding |
| 1496 | + list( $levels, $base, $repeat ) = $this->getContainerHashLevels( $container ); |
| 1497 | + if ( $levels == 1 || $levels == 2 ) { |
| 1498 | + // Hash characters are either base 16 or 36 |
| 1499 | + $char = ( $base == 36 ) ? '[0-9a-z]' : '[0-9a-f]'; |
| 1500 | + // Get a regex that represents the shard portion of paths. |
| 1501 | + // The concatenation of the captures gives us the shard. |
| 1502 | + if ( $levels === 1 ) { // 16 or 36 shards per container |
| 1503 | + $hashDirRegex = '(' . $char . ')'; |
| 1504 | + } else { // 256 or 1296 shards per container |
| 1505 | + if ( $repeat ) { // verbose hash dir format (e.g. "a/ab/abc") |
| 1506 | + $hashDirRegex = $char . '/(' . $char . '{2})'; |
| 1507 | + } else { // short hash dir format (e.g. "a/b/c") |
| 1508 | + $hashDirRegex = '(' . $char . ')/(' . $char . ')'; |
| 1509 | + } |
| 1510 | + } |
| 1511 | + // Allow certain directories to be above the hash dirs so as |
| 1512 | + // to work with FileRepo (e.g. "archive/a/ab" or "temp/a/ab"). |
| 1513 | + // They must be 2+ chars to avoid any hash directory ambiguity. |
| 1514 | + $m = array(); |
| 1515 | + if ( preg_match( "!^(?:[^/]{2,}/)*$hashDirRegex(?:/|$)!", $relPath, $m ) ) { |
| 1516 | + return '.' . implode( '', array_slice( $m, 1 ) ); |
| 1517 | + } |
| 1518 | + return null; // failed to match |
1503 | 1519 | } |
1504 | | - // Allow certain directories to be above the hash dirs so as |
1505 | | - // to work with FileRepo (e.g. "archive/a/ab" or "temp/a/ab"). |
1506 | | - // They must be 2+ chars to avoid any hash directory ambiguity. |
1507 | | - $m = array(); |
1508 | | - if ( preg_match( "!^(?:[^/]{2,}/)*$hashDirRegex(?:/|$)!", $relPath, $m ) ) { |
1509 | | - return '.' . $m['shard']; |
1510 | | - } |
1511 | | - return null; // failed to match |
| 1520 | + return ''; // no sharding |
1512 | 1521 | } |
1513 | 1522 | |
1514 | 1523 | /** |
1515 | | - * Get the number of hash levels for a container. |
| 1524 | + * Get the sharding config for a container. |
1516 | 1525 | * If greater than 0, then all file storage paths within |
1517 | 1526 | * the container are required to be hashed accordingly. |
1518 | 1527 | * |
1519 | 1528 | * @param $container string |
1520 | | - * @return integer |
| 1529 | + * @return Array (integer levels, integer base, repeat flag) or (0, 0, false) |
1521 | 1530 | */ |
1522 | 1531 | final protected function getContainerHashLevels( $container ) { |
1523 | 1532 | if ( isset( $this->shardViaHashLevels[$container] ) ) { |
1524 | | - $hashLevels = (int)$this->shardViaHashLevels[$container]; |
1525 | | - if ( $hashLevels >= 0 && $hashLevels <= 2 ) { |
1526 | | - return $hashLevels; |
| 1533 | + $config = $this->shardViaHashLevels[$container]; |
| 1534 | + $hashLevels = (int)$config['levels']; |
| 1535 | + if ( $hashLevels == 0 || $hashLevels == 2 ) { |
| 1536 | + $hashBase = (int)$config['base']; |
| 1537 | + if ( $hashBase == 16 || $hashBase == 36 ) { |
| 1538 | + return array( $hashLevels, $hashBase, $config['repeat'] ); |
| 1539 | + } |
1527 | 1540 | } |
1528 | 1541 | } |
1529 | | - return 0; // no sharding |
| 1542 | + return array( 0, 0, false ); // no sharding |
1530 | 1543 | } |
1531 | 1544 | |
1532 | 1545 | /** |
— | — | @@ -1536,11 +1549,11 @@ |
1537 | 1550 | */ |
1538 | 1551 | final protected function getContainerSuffixes( $container ) { |
1539 | 1552 | $shards = array(); |
1540 | | - $digits = $this->getContainerHashLevels( $container ); |
| 1553 | + list( $digits, $base ) = $this->getContainerHashLevels( $container ); |
1541 | 1554 | if ( $digits > 0 ) { |
1542 | | - $numShards = 1 << ( $digits * 4 ); |
| 1555 | + $numShards = pow( $base, $digits ); |
1543 | 1556 | for ( $index = 0; $index < $numShards; $index++ ) { |
1544 | | - $shards[] = '.' . str_pad( dechex( $index ), $digits, '0', STR_PAD_LEFT ); |
| 1557 | + $shards[] = '.' . wfBaseConvert( $index, 10, $base, $digits ); |
1545 | 1558 | } |
1546 | 1559 | } |
1547 | 1560 | return $shards; |