Index: branches/FileBackend/phase3/includes/filerepo/backend/FileBackendMultiWrite.php |
— | — | @@ -79,7 +79,6 @@ |
80 | 80 | foreach ( $performOps as $index => $fileOp ) { |
81 | 81 | $filesToLock = array_merge( $filesToLock, $fileOp->storagePathsToLock() ); |
82 | 82 | } |
83 | | - $filesToLock = array_unique( $filesToLock ); // avoid warnings |
84 | 83 | } |
85 | 84 | } |
86 | 85 | |
Index: branches/FileBackend/phase3/includes/filerepo/backend/FileLockManager.php |
— | — | @@ -28,7 +28,7 @@ |
29 | 29 | * @return Status |
30 | 30 | */ |
31 | 31 | final public function lock( array $paths, $type = self::LOCK_EX ) { |
32 | | - $keys = array_map( 'sha1', $paths ); |
| 32 | + $keys = array_unique( array_map( 'sha1', $paths ) ); |
33 | 33 | return $this->doLock( $keys, $type ); |
34 | 34 | } |
35 | 35 | |
— | — | @@ -39,7 +39,7 @@ |
40 | 40 | * @return Status |
41 | 41 | */ |
42 | 42 | final public function unlock( array $paths ) { |
43 | | - $keys = array_map( 'sha1', $paths ); |
| 43 | + $keys = array_unique( array_map( 'sha1', $paths ) ); |
44 | 44 | return $this->doUnlock( $keys, 0 ); |
45 | 45 | } |
46 | 46 | |
— | — | @@ -80,7 +80,7 @@ |
81 | 81 | $this->lockDir = $config['lockDirectory']; |
82 | 82 | } |
83 | 83 | |
84 | | - function doLock( array $keys, $type ) { |
| 84 | + protected function doLock( array $keys, $type ) { |
85 | 85 | $status = Status::newGood(); |
86 | 86 | |
87 | 87 | $lockedKeys = array(); // files locked in this attempt |
— | — | @@ -103,7 +103,7 @@ |
104 | 104 | return $status; |
105 | 105 | } |
106 | 106 | |
107 | | - function doUnlock( array $keys, $type ) { |
| 107 | + protected function doUnlock( array $keys, $type ) { |
108 | 108 | $status = Status::newGood(); |
109 | 109 | |
110 | 110 | foreach ( $keys as $key ) { |
— | — | @@ -224,9 +224,9 @@ |
225 | 225 | * |
226 | 226 | * All lock requests for a resource, identified by a hash string, will |
227 | 227 | * map to one bucket. Each bucket maps to one or several peer DB servers, |
228 | | - * each of which have a `file_locks` table with row-level locking. |
| 228 | + * each having a `file_locks` table with row-level locking. |
229 | 229 | * |
230 | | - * All peer servers must agree to a lock in order for it to be acquired. |
| 230 | + * A majority of peer servers must agree for a lock to be acquired. |
231 | 231 | * As long as one peer server is up, lock requests will not be blocked |
232 | 232 | * just because another peer server cannot be contacted. A global status |
233 | 233 | * cache can be setup to track servers that recently missed queries; such |
— | — | @@ -242,6 +242,7 @@ |
243 | 243 | /** @var BagOStuff */ |
244 | 244 | protected $statusCache; |
245 | 245 | |
| 246 | + protected $trustCache; // boolean |
246 | 247 | protected $webTimeout; // integer number of seconds |
247 | 248 | protected $cliTimeout; // integer number of seconds |
248 | 249 | protected $safeDelay; // integer number of seconds |
— | — | @@ -254,18 +255,21 @@ |
255 | 256 | /** |
256 | 257 | * Construct a new instance from configuration. |
257 | 258 | * $config paramaters include: |
258 | | - * 'dbsByBucket' : Array of 1-16 consecutive integer keys, starting from 0, |
259 | | - * with a list of database names (peers) as values. |
260 | | - * Each DB should be on its own server. |
261 | | - * 'statusCache' : $wgMemc if set to a global memcached instance. [optional] |
262 | | - * This tracks peer servers that couldn't be queried recently. |
263 | | - * 'webTimeout' : Connection timeout (seconds) for non-CLI scripts. [optional] |
264 | | - * This tells the DB server how long to wait before giving up |
265 | | - * and releasing all the locks made in a session transaction. |
266 | | - * 'cliTimeout' : Connection timeout (seconds) for CLI scripts. [optional] |
| 259 | + * 'dbsByBucket' : Array of 1-16 consecutive integer keys, starting from 0, with |
| 260 | + * a list of DB names (peers) as values. Each list should have |
| 261 | + * an odd number of items and each DB should have its own server. |
| 262 | + * 'webTimeout' : Lock timeout (seconds) for non-CLI scripts. [optional] |
| 263 | + * This tells the DB server how long to wait before assuming |
| 264 | + * connection failure and releasing all the locks for a session. |
| 265 | + * 'cliTimeout' : Lock timeout (seconds) for CLI scripts. [optional] |
| 266 | + * This tells the DB server how long to wait before assuming |
| 267 | + * connection failure and releasing all the locks for a session. |
267 | 268 | * 'safeDelay' : Seconds to mistrust a DB after restart/query loss. [optional] |
268 | 269 | * This should reflect the highest max_execution_time that PHP |
269 | 270 | * scripts might use on a wiki. Locks are lost on server restart. |
| 271 | + * 'cache' : $wgMemc (if set to a global memcached instance). [optional] |
| 272 | + * This tracks peer servers that couldn't be queried recently. |
| 273 | + * 'trustCache' : Assume cache knows all servers missing queries recently. [optional] |
270 | 274 | * |
271 | 275 | * @param Array $config |
272 | 276 | */ |
— | — | @@ -280,25 +284,28 @@ |
281 | 285 | wfWarn( "No valid key for bucket $i in dbsByBucket." ); |
282 | 286 | } |
283 | 287 | } |
284 | | - if ( isset( $config['statusCache'] ) ) { |
285 | | - $this->statusCache = $config['statusCache']; |
286 | | - } |
287 | 288 | if ( isset( $config['webTimeout'] ) ) { |
288 | 289 | $this->webTimeout = $config['webTimeout']; |
289 | 290 | } else { |
290 | | - $this->webTimeout = ini_get( 'max_execution_time' ) // disallow 0 |
291 | | - ? ini_get( 'max_execution_time' ) |
292 | | - : 60; // some sane number |
| 291 | + $met = ini_get( 'max_execution_time' ); |
| 292 | + $this->webTimeout = $met ? $met : 60; // use some same amount if 0 |
293 | 293 | } |
294 | 294 | $this->cliTimeout = isset( $config['cliTimeout'] ) |
295 | 295 | ? $config['cliTimeout'] |
296 | | - : 60; // some sane number |
| 296 | + : 60; // some sane amount |
297 | 297 | $this->safeDelay = isset( $config['safeDelay'] ) |
298 | 298 | ? $config['safeDelay'] |
299 | | - : max( $this->cliTimeout, $this->webTimeout ); |
| 299 | + : max( $this->cliTimeout, $this->webTimeout ); // cover worst case |
| 300 | + if ( isset( $config['cache'] ) && $config['cache'] instanceof BagOStuff ) { |
| 301 | + $this->statusCache = $config['cache']; |
| 302 | + $this->trustCache = ( !empty( $config['trustCache'] ) && $this->safeDelay > 0 ); |
| 303 | + } else { |
| 304 | + $this->statusCache = null; |
| 305 | + $this->trustCache = false; |
| 306 | + } |
300 | 307 | } |
301 | 308 | |
302 | | - function doLock( array $keys, $type ) { |
| 309 | + protected function doLock( array $keys, $type ) { |
303 | 310 | $status = Status::newGood(); |
304 | 311 | |
305 | 312 | $keysToLock = array(); |
— | — | @@ -310,10 +317,6 @@ |
311 | 318 | $status->warning( 'lockmanager-alreadylocked', $key ); |
312 | 319 | } else { |
313 | 320 | $bucket = $this->getBucketFromKey( $key ); |
314 | | - if ( !$bucket ) { // config error? |
315 | | - $status->fatal( 'lockmanager-fail-config', $key ); |
316 | | - return $status; |
317 | | - } |
318 | 321 | $keysToLock[$bucket][] = $key; |
319 | 322 | } |
320 | 323 | } |
— | — | @@ -325,19 +328,19 @@ |
326 | 329 | // (a) First server is up; common case |
327 | 330 | // (b) First server is down but a peer is up |
328 | 331 | // (c) First server is down and no peer are up (or none defined) |
329 | | - $count = $this->doLockingQueryAll( $bucket, $keys, $type ); |
330 | | - if ( $count == -1 ) { |
| 332 | + $res = $this->doLockingQueryAll( $bucket, $keys, $type ); |
| 333 | + if ( $res === 'cantacquire' ) { |
331 | 334 | // Resources already locked by another process. |
332 | 335 | // Abort and unlock everything we just locked. |
333 | 336 | $status->fatal( 'lockmanager-fail-acquirelocks' ); |
334 | 337 | $status->merge( $this->doUnlock( $lockedKeys, $type ) ); |
335 | 338 | return $status; |
336 | | - } elseif ( $count <= 0 ) { |
| 339 | + } elseif ( $res !== true ) { |
337 | 340 | // Couldn't contact any servers for this bucket. |
338 | 341 | // Abort and unlock everything we just locked. |
339 | 342 | $status->fatal( 'lockmanager-fail-db-bucket', $bucket ); |
340 | 343 | $status->merge( $this->doUnlock( $lockedKeys, $type ) ); |
341 | | - return $status; // error |
| 344 | + return $status; |
342 | 345 | } |
343 | 346 | // Record these locks as active |
344 | 347 | foreach ( $keys as $key ) { |
— | — | @@ -350,7 +353,7 @@ |
351 | 354 | return $status; |
352 | 355 | } |
353 | 356 | |
354 | | - function doUnlock( array $keys, $type ) { |
| 357 | + protected function doUnlock( array $keys, $type ) { |
355 | 358 | $status = Status::newGood(); |
356 | 359 | |
357 | 360 | foreach ( $keys as $key ) { |
— | — | @@ -424,39 +427,50 @@ |
425 | 428 | } |
426 | 429 | |
427 | 430 | /** |
428 | | - * Attept to acquire a lock on the primary server as well |
429 | | - * as all peer servers for a bucket. Return value is either: |
430 | | - * a) The number of servers, considered reliable, where the locks were acquired |
431 | | - * b) -1; if any server claimed that a resource was already locked |
| 431 | + * Attempt to acquire locks with the peers for a bucket. |
432 | 432 | * This should avoid throwing any exceptions. |
433 | 433 | * |
434 | 434 | * @param $bucket integer |
435 | 435 | * @param $keys Array List of resource keys to lock |
436 | 436 | * @param $type integer FileLockManager::LOCK_EX or FileLockManager::LOCK_SH |
437 | | - * @return integer |
| 437 | + * @return bool|string One of (true, 'cantacquire', 'dberrors') |
438 | 438 | */ |
439 | 439 | protected function doLockingQueryAll( $bucket, array $keys, $type ) { |
440 | | - $locksMade = 0; // locks made on trustable servers |
| 440 | + $yesVotes = 0; // locks made on trustable servers |
| 441 | + $votesLeft = count( $this->dbsByBucket[$bucket] ); // remaining servers |
| 442 | + $quorum = floor( $votesLeft/2 + 1 ); // simple majority |
441 | 443 | foreach ( $this->dbsByBucket[$bucket] as $server ) { |
442 | | - try { |
443 | | - $this->doLockingQuery( $server, $keys, $type ); |
444 | | - // Servers that have any signs of lock loss are treated as suspect |
445 | | - if ( $this->checkReliable( $server ) ) { |
446 | | - ++$locksMade; // success for this peer |
447 | | - } elseif ( !$this->statusCache ) { |
448 | | - // If we are only checking for restarts, this won't catch |
449 | | - // cases were are only server got a lock and was restarted. |
450 | | - return 0; |
| 444 | + // Check that server is not *known* to have issues |
| 445 | + if ( $this->checkStatusCache( $server ) ) { |
| 446 | + try { |
| 447 | + // Attempt to acquire the lock on this server |
| 448 | + $this->doLockingQuery( $server, $keys, $type ); |
| 449 | + // Check that server has no signs of lock loss |
| 450 | + if ( $this->checkUptime( $server ) ) { |
| 451 | + ++$yesVotes; // success for this peer |
| 452 | + if ( $yesVotes >= $quorum ) { |
| 453 | + return true; // lock obtained |
| 454 | + } |
| 455 | + } |
| 456 | + } catch ( DBError $e ) { |
| 457 | + if ( $this->lastErrorIndicatesLocked( $server ) ) { |
| 458 | + return 'cantacquire'; // vetoed; resource locked |
| 459 | + } else { // can't connect? |
| 460 | + $this->cacheRecordFailure( $server ); |
| 461 | + } |
451 | 462 | } |
452 | | - } catch ( DBError $e ) { |
453 | | - if ( $this->lastErrorIndicatesLocked( $server ) ) { |
454 | | - return -1; // resource locked |
455 | | - } else { // can't connect? |
456 | | - $this->recordFailure( $server ); |
457 | | - } |
458 | 463 | } |
| 464 | + $votesLeft--; |
| 465 | + $votesNeeded = $quorum - $yesVotes; |
| 466 | + if ( $votesNeeded > $votesLeft ) { |
| 467 | + break; // short-circuit |
| 468 | + } |
459 | 469 | } |
460 | | - return $locksMade; |
| 470 | + // At this point, we must not have meet the quorum |
| 471 | + if ( $yesVotes > 0 && $this->trustCache ) { |
| 472 | + return true; // we are trusting the cache; may comprimise correctness |
| 473 | + } |
| 474 | + return 'dberrors'; // not enough votes to ensure correctness |
461 | 475 | } |
462 | 476 | |
463 | 477 | /** |
— | — | @@ -493,29 +507,19 @@ |
494 | 508 | } |
495 | 509 | |
496 | 510 | /** |
497 | | - * Checks if none of the following happened: |
498 | | - * a) The DB server recently restarted. |
499 | | - * This curtails the problem of locks falling off when servers restart. |
500 | | - * b) The DB server has recently missed lock queries. |
501 | | - * This curtails the problem of peers occasionally not getting locks. |
| 511 | + * Checks if the DB server did not recently restart. |
| 512 | + * This curtails the problem of locks falling off when servers restart. |
502 | 513 | * |
503 | 514 | * @param $server string |
504 | 515 | * @return bool |
505 | 516 | */ |
506 | | - protected function checkReliable( $server ) { |
| 517 | + protected function checkUptime( $server ) { |
507 | 518 | if ( isset( $this->activeConns[$server] ) ) { // sanity |
508 | 519 | if ( $this->safeDelay > 0 ) { |
509 | 520 | $db = $this->activeConns[$server]; |
510 | 521 | if ( $db->getServerUptime() < $this->safeDelay ) { |
511 | 522 | return false; |
512 | 523 | } |
513 | | - if ( $this->statusCache ) { |
514 | | - $key = $this->getMissKey( $server ); |
515 | | - $misses = $this->statusCache->get( $key ); |
516 | | - if ( $misses > 0 ) { |
517 | | - return false; |
518 | | - } |
519 | | - } |
520 | 524 | } |
521 | 525 | return true; |
522 | 526 | } |
— | — | @@ -523,7 +527,25 @@ |
524 | 528 | } |
525 | 529 | |
526 | 530 | /** |
527 | | - * Log a lock request failure to the log server. |
| 531 | + * Checks if the DB server has not recently missed lock queries. |
| 532 | + * This curtails the problem of peers occasionally not getting locks. |
| 533 | + * |
| 534 | + * @param $server string |
| 535 | + * @return bool |
| 536 | + */ |
| 537 | + protected function checkStatusCache( $server ) { |
| 538 | + if ( $this->statusCache && $this->safeDelay > 0 ) { |
| 539 | + $key = $this->getMissKey( $server ); |
| 540 | + $misses = $this->statusCache->get( $key ); |
| 541 | + if ( $misses > 0 ) { |
| 542 | + return false; |
| 543 | + } |
| 544 | + } |
| 545 | + return true; |
| 546 | + } |
| 547 | + |
| 548 | + /** |
| 549 | + * Log a lock request failure to the cache. |
528 | 550 | * |
529 | 551 | * Worst case scenario is that a resource lock was only |
530 | 552 | * on one peer and then that peer is restarted or goes down. |
— | — | @@ -532,7 +554,7 @@ |
533 | 555 | * @param $server string |
534 | 556 | * @return bool Success |
535 | 557 | */ |
536 | | - protected function recordFailure( $server ) { |
| 558 | + protected function cacheRecordFailure( $server ) { |
537 | 559 | if ( $this->statusCache && $this->safeDelay > 0 ) { |
538 | 560 | $key = $this->getMissKey( $server ); |
539 | 561 | $misses = $this->statusCache->get( $key ); |
— | — | @@ -574,11 +596,11 @@ |
575 | 597 | class NullFileLockManager extends FileLockManager { |
576 | 598 | function __construct( array $config ) {} |
577 | 599 | |
578 | | - function doLock( array $keys, $type ) { |
| 600 | + protected function doLock( array $keys, $type ) { |
579 | 601 | return Status::newGood(); |
580 | 602 | } |
581 | 603 | |
582 | | - function doUnlock( array $keys, $type ) { |
| 604 | + protected function doUnlock( array $keys, $type ) { |
583 | 605 | return Status::newGood(); |
584 | 606 | } |
585 | 607 | } |
Index: branches/FileBackend/phase3/includes/filerepo/backend/FileBackend.php |
— | — | @@ -65,6 +65,8 @@ |
66 | 66 | * ) |
67 | 67 | * ); |
68 | 68 | * </code> |
| 69 | + * If any serious errors occur, the operations will be rolled back. |
| 70 | + * However, the 'ignoreErrors' parameter can be used on any operation to ignore errors. |
69 | 71 | * |
70 | 72 | * @param Array $ops Array of arrays containing N operations to execute IN ORDER |
71 | 73 | * @return Status |
— | — | @@ -321,6 +323,7 @@ |
322 | 324 | // Get params for this operation |
323 | 325 | $params = $operation; |
324 | 326 | unset( $params['operation'] ); // don't need this |
| 327 | + unset( $params['ignoreErrors'] ); // don't need this |
325 | 328 | // Append the FileOp class |
326 | 329 | $performOps[] = new $class( $params ); |
327 | 330 | } else { |
— | — | @@ -342,7 +345,6 @@ |
343 | 346 | foreach ( $performOps as $index => $fileOp ) { |
344 | 347 | $filesToLock = array_merge( $filesToLock, $fileOp->storagePathsToLock() ); |
345 | 348 | } |
346 | | - $filesToLock = array_unique( $filesToLock ); // avoid warnings |
347 | 349 | |
348 | 350 | // Try to lock those files... |
349 | 351 | $status->merge( $this->lockFiles( $filesToLock ) ); |