r104263 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r104262‎ | r104263 | r104264 >
Date:07:07, 26 November 2011
Author:aaron
Status:deferred
Tags:
Comment:
* Optimized DBFileLockManager::doLockingQueryAll with a quorum and better use of cache
* Various fixes after more testing
Modified paths:
  • /branches/FileBackend/phase3/includes/filerepo/backend/FileBackend.php (modified) (history)
  • /branches/FileBackend/phase3/includes/filerepo/backend/FileBackendMultiWrite.php (modified) (history)
  • /branches/FileBackend/phase3/includes/filerepo/backend/FileLockManager.php (modified) (history)

Diff [purge]

Index: branches/FileBackend/phase3/includes/filerepo/backend/FileBackendMultiWrite.php
@@ -79,7 +79,6 @@
8080 foreach ( $performOps as $index => $fileOp ) {
8181 $filesToLock = array_merge( $filesToLock, $fileOp->storagePathsToLock() );
8282 }
83 - $filesToLock = array_unique( $filesToLock ); // avoid warnings
8483 }
8584 }
8685
Index: branches/FileBackend/phase3/includes/filerepo/backend/FileLockManager.php
@@ -28,7 +28,7 @@
2929 * @return Status
3030 */
3131 final public function lock( array $paths, $type = self::LOCK_EX ) {
32 - $keys = array_map( 'sha1', $paths );
 32+ $keys = array_unique( array_map( 'sha1', $paths ) );
3333 return $this->doLock( $keys, $type );
3434 }
3535
@@ -39,7 +39,7 @@
4040 * @return Status
4141 */
4242 final public function unlock( array $paths ) {
43 - $keys = array_map( 'sha1', $paths );
 43+ $keys = array_unique( array_map( 'sha1', $paths ) );
4444 return $this->doUnlock( $keys, 0 );
4545 }
4646
@@ -80,7 +80,7 @@
8181 $this->lockDir = $config['lockDirectory'];
8282 }
8383
84 - function doLock( array $keys, $type ) {
 84+ protected function doLock( array $keys, $type ) {
8585 $status = Status::newGood();
8686
8787 $lockedKeys = array(); // files locked in this attempt
@@ -103,7 +103,7 @@
104104 return $status;
105105 }
106106
107 - function doUnlock( array $keys, $type ) {
 107+ protected function doUnlock( array $keys, $type ) {
108108 $status = Status::newGood();
109109
110110 foreach ( $keys as $key ) {
@@ -224,9 +224,9 @@
225225 *
226226 * All lock requests for a resource, identified by a hash string, will
227227 * map to one bucket. Each bucket maps to one or several peer DB servers,
228 - * each of which have a `file_locks` table with row-level locking.
 228+ * each having a `file_locks` table with row-level locking.
229229 *
230 - * All peer servers must agree to a lock in order for it to be acquired.
 230+ * A majority of peer servers must agree for a lock to be acquired.
231231 * As long as one peer server is up, lock requests will not be blocked
232232 * just because another peer server cannot be contacted. A global status
233233 * cache can be setup to track servers that recently missed queries; such
@@ -242,6 +242,7 @@
243243 /** @var BagOStuff */
244244 protected $statusCache;
245245
 246+ protected $trustCache; // boolean
246247 protected $webTimeout; // integer number of seconds
247248 protected $cliTimeout; // integer number of seconds
248249 protected $safeDelay; // integer number of seconds
@@ -254,18 +255,21 @@
255256 /**
256257 * Construct a new instance from configuration.
257258 * $config paramaters include:
258 - * 'dbsByBucket' : Array of 1-16 consecutive integer keys, starting from 0,
259 - * with a list of database names (peers) as values.
260 - * Each DB should be on its own server.
261 - * 'statusCache' : $wgMemc if set to a global memcached instance. [optional]
262 - * This tracks peer servers that couldn't be queried recently.
263 - * 'webTimeout' : Connection timeout (seconds) for non-CLI scripts. [optional]
264 - * This tells the DB server how long to wait before giving up
265 - * and releasing all the locks made in a session transaction.
266 - * 'cliTimeout' : Connection timeout (seconds) for CLI scripts. [optional]
 259+ * 'dbsByBucket' : Array of 1-16 consecutive integer keys, starting from 0, with
 260+ * a list of DB names (peers) as values. Each list should have
 261+ * an odd number of items and each DB should have its own server.
 262+ * 'webTimeout' : Lock timeout (seconds) for non-CLI scripts. [optional]
 263+ * This tells the DB server how long to wait before assuming
 264+ * connection failure and releasing all the locks for a session.
 265+ * 'cliTimeout' : Lock timeout (seconds) for CLI scripts. [optional]
 266+ * This tells the DB server how long to wait before assuming
 267+ * connection failure and releasing all the locks for a session.
267268 * 'safeDelay' : Seconds to mistrust a DB after restart/query loss. [optional]
268269 * This should reflect the highest max_execution_time that PHP
269270 * scripts might use on a wiki. Locks are lost on server restart.
 271+ * 'cache' : $wgMemc (if set to a global memcached instance). [optional]
 272+ * This tracks peer servers that couldn't be queried recently.
 273+ * 'trustCache' : Assume cache knows all servers missing queries recently. [optional]
270274 *
271275 * @param Array $config
272276 */
@@ -280,25 +284,28 @@
281285 wfWarn( "No valid key for bucket $i in dbsByBucket." );
282286 }
283287 }
284 - if ( isset( $config['statusCache'] ) ) {
285 - $this->statusCache = $config['statusCache'];
286 - }
287288 if ( isset( $config['webTimeout'] ) ) {
288289 $this->webTimeout = $config['webTimeout'];
289290 } else {
290 - $this->webTimeout = ini_get( 'max_execution_time' ) // disallow 0
291 - ? ini_get( 'max_execution_time' )
292 - : 60; // some sane number
 291+ $met = ini_get( 'max_execution_time' );
 292+ $this->webTimeout = $met ? $met : 60; // use some same amount if 0
293293 }
294294 $this->cliTimeout = isset( $config['cliTimeout'] )
295295 ? $config['cliTimeout']
296 - : 60; // some sane number
 296+ : 60; // some sane amount
297297 $this->safeDelay = isset( $config['safeDelay'] )
298298 ? $config['safeDelay']
299 - : max( $this->cliTimeout, $this->webTimeout );
 299+ : max( $this->cliTimeout, $this->webTimeout ); // cover worst case
 300+ if ( isset( $config['cache'] ) && $config['cache'] instanceof BagOStuff ) {
 301+ $this->statusCache = $config['cache'];
 302+ $this->trustCache = ( !empty( $config['trustCache'] ) && $this->safeDelay > 0 );
 303+ } else {
 304+ $this->statusCache = null;
 305+ $this->trustCache = false;
 306+ }
300307 }
301308
302 - function doLock( array $keys, $type ) {
 309+ protected function doLock( array $keys, $type ) {
303310 $status = Status::newGood();
304311
305312 $keysToLock = array();
@@ -310,10 +317,6 @@
311318 $status->warning( 'lockmanager-alreadylocked', $key );
312319 } else {
313320 $bucket = $this->getBucketFromKey( $key );
314 - if ( !$bucket ) { // config error?
315 - $status->fatal( 'lockmanager-fail-config', $key );
316 - return $status;
317 - }
318321 $keysToLock[$bucket][] = $key;
319322 }
320323 }
@@ -325,19 +328,19 @@
326329 // (a) First server is up; common case
327330 // (b) First server is down but a peer is up
328331 // (c) First server is down and no peer are up (or none defined)
329 - $count = $this->doLockingQueryAll( $bucket, $keys, $type );
330 - if ( $count == -1 ) {
 332+ $res = $this->doLockingQueryAll( $bucket, $keys, $type );
 333+ if ( $res === 'cantacquire' ) {
331334 // Resources already locked by another process.
332335 // Abort and unlock everything we just locked.
333336 $status->fatal( 'lockmanager-fail-acquirelocks' );
334337 $status->merge( $this->doUnlock( $lockedKeys, $type ) );
335338 return $status;
336 - } elseif ( $count <= 0 ) {
 339+ } elseif ( $res !== true ) {
337340 // Couldn't contact any servers for this bucket.
338341 // Abort and unlock everything we just locked.
339342 $status->fatal( 'lockmanager-fail-db-bucket', $bucket );
340343 $status->merge( $this->doUnlock( $lockedKeys, $type ) );
341 - return $status; // error
 344+ return $status;
342345 }
343346 // Record these locks as active
344347 foreach ( $keys as $key ) {
@@ -350,7 +353,7 @@
351354 return $status;
352355 }
353356
354 - function doUnlock( array $keys, $type ) {
 357+ protected function doUnlock( array $keys, $type ) {
355358 $status = Status::newGood();
356359
357360 foreach ( $keys as $key ) {
@@ -424,39 +427,50 @@
425428 }
426429
427430 /**
428 - * Attept to acquire a lock on the primary server as well
429 - * as all peer servers for a bucket. Return value is either:
430 - * a) The number of servers, considered reliable, where the locks were acquired
431 - * b) -1; if any server claimed that a resource was already locked
 431+ * Attempt to acquire locks with the peers for a bucket.
432432 * This should avoid throwing any exceptions.
433433 *
434434 * @param $bucket integer
435435 * @param $keys Array List of resource keys to lock
436436 * @param $type integer FileLockManager::LOCK_EX or FileLockManager::LOCK_SH
437 - * @return integer
 437+ * @return bool|string One of (true, 'cantacquire', 'dberrors')
438438 */
439439 protected function doLockingQueryAll( $bucket, array $keys, $type ) {
440 - $locksMade = 0; // locks made on trustable servers
 440+ $yesVotes = 0; // locks made on trustable servers
 441+ $votesLeft = count( $this->dbsByBucket[$bucket] ); // remaining servers
 442+ $quorum = floor( $votesLeft/2 + 1 ); // simple majority
441443 foreach ( $this->dbsByBucket[$bucket] as $server ) {
442 - try {
443 - $this->doLockingQuery( $server, $keys, $type );
444 - // Servers that have any signs of lock loss are treated as suspect
445 - if ( $this->checkReliable( $server ) ) {
446 - ++$locksMade; // success for this peer
447 - } elseif ( !$this->statusCache ) {
448 - // If we are only checking for restarts, this won't catch
449 - // cases were are only server got a lock and was restarted.
450 - return 0;
 444+ // Check that server is not *known* to have issues
 445+ if ( $this->checkStatusCache( $server ) ) {
 446+ try {
 447+ // Attempt to acquire the lock on this server
 448+ $this->doLockingQuery( $server, $keys, $type );
 449+ // Check that server has no signs of lock loss
 450+ if ( $this->checkUptime( $server ) ) {
 451+ ++$yesVotes; // success for this peer
 452+ if ( $yesVotes >= $quorum ) {
 453+ return true; // lock obtained
 454+ }
 455+ }
 456+ } catch ( DBError $e ) {
 457+ if ( $this->lastErrorIndicatesLocked( $server ) ) {
 458+ return 'cantacquire'; // vetoed; resource locked
 459+ } else { // can't connect?
 460+ $this->cacheRecordFailure( $server );
 461+ }
451462 }
452 - } catch ( DBError $e ) {
453 - if ( $this->lastErrorIndicatesLocked( $server ) ) {
454 - return -1; // resource locked
455 - } else { // can't connect?
456 - $this->recordFailure( $server );
457 - }
458463 }
 464+ $votesLeft--;
 465+ $votesNeeded = $quorum - $yesVotes;
 466+ if ( $votesNeeded > $votesLeft ) {
 467+ break; // short-circuit
 468+ }
459469 }
460 - return $locksMade;
 470+ // At this point, we must not have meet the quorum
 471+ if ( $yesVotes > 0 && $this->trustCache ) {
 472+ return true; // we are trusting the cache; may comprimise correctness
 473+ }
 474+ return 'dberrors'; // not enough votes to ensure correctness
461475 }
462476
463477 /**
@@ -493,29 +507,19 @@
494508 }
495509
496510 /**
497 - * Checks if none of the following happened:
498 - * a) The DB server recently restarted.
499 - * This curtails the problem of locks falling off when servers restart.
500 - * b) The DB server has recently missed lock queries.
501 - * This curtails the problem of peers occasionally not getting locks.
 511+ * Checks if the DB server did not recently restart.
 512+ * This curtails the problem of locks falling off when servers restart.
502513 *
503514 * @param $server string
504515 * @return bool
505516 */
506 - protected function checkReliable( $server ) {
 517+ protected function checkUptime( $server ) {
507518 if ( isset( $this->activeConns[$server] ) ) { // sanity
508519 if ( $this->safeDelay > 0 ) {
509520 $db = $this->activeConns[$server];
510521 if ( $db->getServerUptime() < $this->safeDelay ) {
511522 return false;
512523 }
513 - if ( $this->statusCache ) {
514 - $key = $this->getMissKey( $server );
515 - $misses = $this->statusCache->get( $key );
516 - if ( $misses > 0 ) {
517 - return false;
518 - }
519 - }
520524 }
521525 return true;
522526 }
@@ -523,7 +527,25 @@
524528 }
525529
526530 /**
527 - * Log a lock request failure to the log server.
 531+ * Checks if the DB server has not recently missed lock queries.
 532+ * This curtails the problem of peers occasionally not getting locks.
 533+ *
 534+ * @param $server string
 535+ * @return bool
 536+ */
 537+ protected function checkStatusCache( $server ) {
 538+ if ( $this->statusCache && $this->safeDelay > 0 ) {
 539+ $key = $this->getMissKey( $server );
 540+ $misses = $this->statusCache->get( $key );
 541+ if ( $misses > 0 ) {
 542+ return false;
 543+ }
 544+ }
 545+ return true;
 546+ }
 547+
 548+ /**
 549+ * Log a lock request failure to the cache.
528550 *
529551 * Worst case scenario is that a resource lock was only
530552 * on one peer and then that peer is restarted or goes down.
@@ -532,7 +554,7 @@
533555 * @param $server string
534556 * @return bool Success
535557 */
536 - protected function recordFailure( $server ) {
 558+ protected function cacheRecordFailure( $server ) {
537559 if ( $this->statusCache && $this->safeDelay > 0 ) {
538560 $key = $this->getMissKey( $server );
539561 $misses = $this->statusCache->get( $key );
@@ -574,11 +596,11 @@
575597 class NullFileLockManager extends FileLockManager {
576598 function __construct( array $config ) {}
577599
578 - function doLock( array $keys, $type ) {
 600+ protected function doLock( array $keys, $type ) {
579601 return Status::newGood();
580602 }
581603
582 - function doUnlock( array $keys, $type ) {
 604+ protected function doUnlock( array $keys, $type ) {
583605 return Status::newGood();
584606 }
585607 }
Index: branches/FileBackend/phase3/includes/filerepo/backend/FileBackend.php
@@ -65,6 +65,8 @@
6666 * )
6767 * );
6868 * </code>
 69+ * If any serious errors occur, the operations will be rolled back.
 70+ * However, the 'ignoreErrors' parameter can be used on any operation to ignore errors.
6971 *
7072 * @param Array $ops Array of arrays containing N operations to execute IN ORDER
7173 * @return Status
@@ -321,6 +323,7 @@
322324 // Get params for this operation
323325 $params = $operation;
324326 unset( $params['operation'] ); // don't need this
 327+ unset( $params['ignoreErrors'] ); // don't need this
325328 // Append the FileOp class
326329 $performOps[] = new $class( $params );
327330 } else {
@@ -342,7 +345,6 @@
343346 foreach ( $performOps as $index => $fileOp ) {
344347 $filesToLock = array_merge( $filesToLock, $fileOp->storagePathsToLock() );
345348 }
346 - $filesToLock = array_unique( $filesToLock ); // avoid warnings
347349
348350 // Try to lock those files...
349351 $status->merge( $this->lockFiles( $filesToLock ) );

Status & tagging log