r41086 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r41085‎ | r41086 | r41087 >
Date:06:42, 21 September 2008
Author:tstarling
Status:old
Tags:
Comment:
Revert/rewrite of r40696.
* We used to have parameters to ignore errors, but they're obsolete now that we have exceptions. Implemented ES master failover using exceptions instead.
* Changing the number of DB connection attempts from 3 to 2 for some random getConnection() calls is almost pointless, adds lots of ugly formal parameters all of the place, and misses the big picture. It should be 2 by default, based on the original rationale. Any reasonable implementation of failover should have zero timeouts per request, by storing state. Changed the default to 2, or 1 if a long timeout is set.
Modified paths:
  • /trunk/phase3/includes/ExternalStore.php (modified) (history)
  • /trunk/phase3/includes/ExternalStoreDB.php (modified) (history)
  • /trunk/phase3/includes/db/Database.php (modified) (history)
  • /trunk/phase3/includes/db/LoadBalancer.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/db/Database.php
@@ -257,11 +257,9 @@
258258 * @param failFunction
259259 * @param $flags
260260 * @param $tablePrefix String: database table prefixes. By default use the prefix gave in LocalSettings.php
261 - * @param int $max, max connection attempts
262 - ** After the first retry (second attempt), each retry waits 1 second
263261 */
264262 function __construct( $server = false, $user = false, $password = false, $dbName = false,
265 - $failFunction = false, $flags = 0, $tablePrefix = 'get from global', $max = false ) {
 263+ $failFunction = false, $flags = 0, $tablePrefix = 'get from global' ) {
266264
267265 global $wgOut, $wgDBprefix, $wgCommandLineMode;
268266 # Can't get a reference if it hasn't been set yet
@@ -295,7 +293,7 @@
296294 }
297295
298296 if ( $server ) {
299 - $this->open( $server, $user, $password, $dbName, $max );
 297+ $this->open( $server, $user, $password, $dbName );
300298 }
301299 }
302300
@@ -313,7 +311,7 @@
314312 * Usually aborts on failure
315313 * If the failFunction is set to a non-zero integer, returns success
316314 */
317 - function open( $server, $user, $password, $dbName, $max = false ) {
 315+ function open( $server, $user, $password, $dbName ) {
318316 global $wguname, $wgAllDBsAreLocalhost;
319317 wfProfileIn( __METHOD__ );
320318
@@ -345,13 +343,17 @@
346344
347345 wfProfileIn("dbconnect-$server");
348346
349 - if( !$max ) { $max = 3; }
350 - # Try to connect up to three times (by default)
351347 # The kernel's default SYN retransmission period is far too slow for us,
352 - # so we use a short timeout plus a manual retry.
 348+ # so we use a short timeout plus a manual retry. Retrying means that a small
 349+ # but finite rate of SYN packet loss won't cause user-visible errors.
353350 $this->mConn = false;
 351+ if ( ini_get( 'mysql.connect_timeout' ) <= 3 ) {
 352+ $numAttempts = 2;
 353+ } else {
 354+ $numAttempts = 1;
 355+ }
354356 $this->installErrorHandler();
355 - for ( $i = 0; $i < $max && !$this->mConn; $i++ ) {
 357+ for ( $i = 0; $i < $numAttempts && !$this->mConn; $i++ ) {
356358 if ( $i > 1 ) {
357359 usleep( 1000 );
358360 }
@@ -367,6 +369,7 @@
368370 }
369371 }
370372 $phpError = $this->restoreErrorHandler();
 373+ # Always log connection errors
371374 if ( !$this->mConn ) {
372375 $error = $this->lastError();
373376 if ( !$error ) {
@@ -2507,6 +2510,13 @@
25082511
25092512 $text = str_replace( '$1', $this->error, $noconnect );
25102513
 2514+ global $wgShowExceptionDetails;
 2515+ if ( $GLOBALS['wgShowExceptionDetails'] ) {
 2516+ $text .= '</p><p>Backtrace:</p><p>' .
 2517+ nl2br( htmlspecialchars( $this->getTraceAsString() ) ) .
 2518+ "</p>\n";
 2519+ }
 2520+
25112521 if($wgUseFileCache) {
25122522 if($wgTitle) {
25132523 $t =& $wgTitle;
Index: trunk/phase3/includes/db/LoadBalancer.php
@@ -11,8 +11,6 @@
1212 * @ingroup Database
1313 */
1414 class LoadBalancer {
15 - const GRACEFUL = 1;
16 -
1715 /* private */ var $mServers, $mConns, $mLoads, $mGroupLoads;
1816 /* private */ var $mFailFunction, $mErrorConnection;
1917 /* private */ var $mReadIndex, $mAllowLagged;
@@ -172,7 +170,7 @@
173171 *
174172 * Side effect: opens connections to databases
175173 */
176 - function getReaderIndex( $group = false, $wiki = false, $attempts = false ) {
 174+ function getReaderIndex( $group = false, $wiki = false ) {
177175 global $wgReadOnly, $wgDBClusterTimeout, $wgDBAvgStatusPoll, $wgDBtype;
178176
179177 # FIXME: For now, only go through all this for mysql databases
@@ -249,7 +247,7 @@
250248 }
251249
252250 wfDebugLog( 'connect', __METHOD__.": Using reader #$i: {$this->mServers[$i]['host']}...\n" );
253 - $conn = $this->openConnection( $i, $wiki, $attempts );
 251+ $conn = $this->openConnection( $i, $wiki );
254252
255253 if ( !$conn ) {
256254 wfDebugLog( 'connect', __METHOD__.": Failed connecting to $i/$wiki\n" );
@@ -403,10 +401,8 @@
404402 * @param int $i Database
405403 * @param array $groups Query groups
406404 * @param string $wiki Wiki ID
407 - * @param int $attempts Max DB connect attempts
408 - * @param int $flags
409405 */
410 - public function &getConnection( $i, $groups = array(), $wiki = false, $attempts = false, $flags = 0 ) {
 406+ public function &getConnection( $i, $groups = array(), $wiki = false ) {
411407 global $wgDBtype;
412408 wfProfileIn( __METHOD__ );
413409
@@ -438,21 +434,18 @@
439435
440436 # Operation-based index
441437 if ( $i == DB_SLAVE ) {
442 - $i = $this->getReaderIndex( false, $wiki, $attempts );
 438+ $i = $this->getReaderIndex( false, $wiki );
443439 } elseif ( $i == DB_LAST ) {
444440 throw new MWException( 'Attempt to call ' . __METHOD__ . ' with deprecated server index DB_LAST' );
445441 }
446442 # Couldn't find a working server in getReaderIndex()?
447443 if ( $i === false ) {
448 - if( $flags && self::GRACEFUL ) {
449 - return false;
450 - }
451444 $this->reportConnectionError( $this->mErrorConnection );
452445 }
453446
454447 # Now we have an explicit index into the servers array
455 - $conn = $this->openConnection( $i, $wiki, $attempts );
456 - if ( !$conn && !($flags & self::GRACEFUL) ) {
 448+ $conn = $this->openConnection( $i, $wiki );
 449+ if ( !$conn ) {
457450 $this->reportConnectionError( $this->mErrorConnection );
458451 }
459452
@@ -512,12 +505,11 @@
513506 *
514507 * @param integer $i Server index
515508 * @param string $wiki Wiki ID to open
516 - * @param int $attempts Maximum connection attempts
517509 * @return Database
518510 *
519511 * @access private
520512 */
521 - function openConnection( $i, $wiki = false, $attempts = false ) {
 513+ function openConnection( $i, $wiki = false ) {
522514 wfProfileIn( __METHOD__ );
523515 if ( $wiki !== false ) {
524516 $conn = $this->openForeignConnection( $i, $wiki );
@@ -529,7 +521,7 @@
530522 } else {
531523 $server = $this->mServers[$i];
532524 $server['serverIndex'] = $i;
533 - $conn = $this->reallyOpenConnection( $server, false, $attempts );
 525+ $conn = $this->reallyOpenConnection( $server, false );
534526 if ( $conn->isOpen() ) {
535527 $this->mConns['local'][$i][0] = $conn;
536528 } else {
@@ -631,7 +623,7 @@
632624 * Returns a Database object whether or not the connection was successful.
633625 * @access private
634626 */
635 - function reallyOpenConnection( $server, $dbNameOverride = false, $attempts = false ) {
 627+ function reallyOpenConnection( $server, $dbNameOverride = false ) {
636628 if( !is_array( $server ) ) {
637629 throw new MWException( 'You must update your load-balancing configuration. See DefaultSettings.php entry for $wgDBservers.' );
638630 }
@@ -646,7 +638,7 @@
647639
648640 # Create object
649641 wfDebug( "Connecting to $host $dbname...\n" );
650 - $db = new $class( $host, $user, $password, $dbname, 1, $flags, 'get from global', $attempts );
 642+ $db = new $class( $host, $user, $password, $dbname, 1, $flags );
651643 if ( $db->isOpen() ) {
652644 wfDebug( "Connected\n" );
653645 } else {
Index: trunk/phase3/includes/ExternalStore.php
@@ -77,30 +77,36 @@
7878 */
7979 public static function randomInsert( $data ) {
8080 global $wgDefaultExternalStore;
81 - $tryStorages = (array)$wgDefaultExternalStore;
82 - // Do not wait and do second retry per master if we
83 - // have other active cluster masters to try instead.
84 - $retry = count($tryStorages) > 1 ? false : true;
85 - while ( count($tryStorages) > 0 ) {
86 - $index = mt_rand(0, count( $tryStorages ) - 1);
87 - $storeUrl = $tryStorages[$index];
 81+ $tryStores = (array)$wgDefaultExternalStore;
 82+ $error = false;
 83+ while ( count( $tryStores ) > 0 ) {
 84+ $index = mt_rand(0, count( $tryStores ) - 1);
 85+ $storeUrl = $tryStores[$index];
 86+ wfDebug( __METHOD__.": trying $storeUrl\n" );
8887 list( $proto, $params ) = explode( '://', $storeUrl, 2 );
8988 $store = self::getStoreObject( $proto );
9089 if ( $store === false ) {
9190 throw new MWException( "Invalid external storage protocol - $storeUrl" );
92 - return false;
 91+ }
 92+ try {
 93+ $url = $store->store( $params, $data ); // Try to save the object
 94+ } catch ( DBConnectionError $error ) {
 95+ $url = false;
 96+ unset( $tryStores[$index] ); // Don't try this one again!
 97+ $tryStores = array_values( $tryStores ); // Must have consecutive keys
 98+ }
 99+ if ( $url ) {
 100+ return $url; // Done!
93101 } else {
94 - $url = $store->store( $params, $data, $retry ); // Try to save the object
95 - if ( $url ) {
96 - return $url; // Done!
97 - } else {
98 - unset( $tryStorages[$index] ); // Don't try this one again!
99 - sort( $tryStorages ); // Must have consecutive keys
100 - wfDebugLog( 'ExternalStorage', "Unable to store text to external storage $storeUrl" );
101 - }
 102+ wfDebugLog( 'ExternalStorage', "Unable to store text to external storage $storeUrl" );
102103 }
103104 }
104 - throw new MWException( "Unable to store text to external storage" );
105 - return false; // All cluster masters dead :(
 105+ // All stores failed
 106+ if ( $error ) {
 107+ // Rethrow the last connection error
 108+ throw $error;
 109+ } else {
 110+ throw new MWException( "Unable to store text to external storage" );
 111+ }
106112 }
107113 }
Index: trunk/phase3/includes/ExternalStoreDB.php
@@ -34,14 +34,13 @@
3535 /** @todo Document.*/
3636 function &getSlave( $cluster ) {
3737 $lb =& $this->getLoadBalancer( $cluster );
38 - // Make only two connection attempts, since we still have the master to try
39 - return $lb->getConnection( DB_SLAVE, array(), false, 2 );
 38+ return $lb->getConnection( DB_SLAVE );
4039 }
4140
4241 /** @todo Document.*/
43 - function &getMaster( $cluster, $retry = true ) {
 42+ function &getMaster( $cluster ) {
4443 $lb =& $this->getLoadBalancer( $cluster );
45 - return $lb->getConnection( DB_MASTER, array(), false, false, LoadBalancer::GRACEFUL );
 44+ return $lb->getConnection( DB_MASTER, array() );
4645 }
4746
4847 /** @todo Document.*/
@@ -120,13 +119,11 @@
121120 *
122121 * @param $cluster String: the cluster name
123122 * @param $data String: the data item
124 - * @param $retry bool: allows an extra DB connection retry after 1 second
125123 * @return string URL
126124 */
127 - function store( $cluster, $data, $retry = true ) {
128 - if( !$dbw = $this->getMaster( $cluster, $retry ) ) {
129 - return false; // failed, maybe another cluster is up...
130 - }
 125+ function store( $cluster, $data ) {
 126+ $dbw = $this->getMaster( $cluster );
 127+
131128 $id = $dbw->nextSequenceValue( 'blob_blob_id_seq' );
132129 $dbw->insert( $this->getTable( $dbw ),
133130 array( 'blob_id' => $id, 'blob_text' => $data ),

Follow-up revisions

RevisionCommit summaryAuthorDate
r41229Revert untested code change from r41086. Don't call methods on non-objects, t...aaron16:09, 24 September 2008
r41231Another fix for r41086. Don't keep retrying server if connection fails.aaron17:28, 24 September 2008
r41234Revert some recent ES-related changes -- they made behavior much worse when w...brion18:09, 24 September 2008

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r40696ExternalStore tweaks:...aaron12:07, 10 September 2008