r40696 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r40695‎ | r40696 | r40697 >
Date:12:07, 10 September 2008
Author:aaron
Status:old
Tags:
Comment:
ExternalStore tweaks:
* On read, spend less time checking on dead slaves
* Add randomInsert() to ES. This does the cluster picking for us
* Make revision text use randomInsert(). On write, fails-over to other clusters as needed instead of throwing db errors
Modified paths:
  • /trunk/phase3/includes/ExternalStore.php (modified) (history)
  • /trunk/phase3/includes/ExternalStoreDB.php (modified) (history)
  • /trunk/phase3/includes/Revision.php (modified) (history)
  • /trunk/phase3/includes/db/Database.php (modified) (history)
  • /trunk/phase3/includes/db/LoadBalancer.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/db/Database.php
@@ -257,9 +257,11 @@
258258 * @param failFunction
259259 * @param $flags
260260 * @param $tablePrefix String: database table prefixes. By default use the prefix gave in LocalSettings.php
 261+ * @param int $max, max connection attempts
 262+ ** After the first retry (second attempt), each retry waits 1 second
261263 */
262264 function __construct( $server = false, $user = false, $password = false, $dbName = false,
263 - $failFunction = false, $flags = 0, $tablePrefix = 'get from global' ) {
 265+ $failFunction = false, $flags = 0, $tablePrefix = 'get from global', $max = false ) {
264266
265267 global $wgOut, $wgDBprefix, $wgCommandLineMode;
266268 # Can't get a reference if it hasn't been set yet
@@ -293,7 +295,7 @@
294296 }
295297
296298 if ( $server ) {
297 - $this->open( $server, $user, $password, $dbName );
 299+ $this->open( $server, $user, $password, $dbName, $max );
298300 }
299301 }
300302
@@ -311,7 +313,7 @@
312314 * Usually aborts on failure
313315 * If the failFunction is set to a non-zero integer, returns success
314316 */
315 - function open( $server, $user, $password, $dbName ) {
 317+ function open( $server, $user, $password, $dbName, $max = false ) {
316318 global $wguname, $wgAllDBsAreLocalhost;
317319 wfProfileIn( __METHOD__ );
318320
@@ -343,11 +345,11 @@
344346
345347 wfProfileIn("dbconnect-$server");
346348
347 - # Try to connect up to three times
 349+ if( !$max ) { $max = 3; }
 350+ # Try to connect up to three times (by default)
348351 # The kernel's default SYN retransmission period is far too slow for us,
349352 # so we use a short timeout plus a manual retry.
350353 $this->mConn = false;
351 - $max = 3;
352354 $this->installErrorHandler();
353355 for ( $i = 0; $i < $max && !$this->mConn; $i++ ) {
354356 if ( $i > 1 ) {
Index: trunk/phase3/includes/db/LoadBalancer.php
@@ -11,6 +11,8 @@
1212 * @ingroup Database
1313 */
1414 class LoadBalancer {
 15+ const GRACEFUL = 1;
 16+
1517 /* private */ var $mServers, $mConns, $mLoads, $mGroupLoads;
1618 /* private */ var $mFailFunction, $mErrorConnection;
1719 /* private */ var $mReadIndex, $mLastIndex, $mAllowLagged;
@@ -171,7 +173,7 @@
172174 *
173175 * Side effect: opens connections to databases
174176 */
175 - function getReaderIndex( $group = false, $wiki = false ) {
 177+ function getReaderIndex( $group = false, $wiki = false, $attempts = false ) {
176178 global $wgReadOnly, $wgDBClusterTimeout, $wgDBAvgStatusPoll, $wgDBtype;
177179
178180 # FIXME: For now, only go through all this for mysql databases
@@ -248,7 +250,7 @@
249251 }
250252
251253 wfDebugLog( 'connect', __METHOD__.": Using reader #$i: {$this->mServers[$i]['host']}...\n" );
252 - $conn = $this->openConnection( $i, $wiki );
 254+ $conn = $this->openConnection( $i, $wiki, $attempts );
253255
254256 if ( !$conn ) {
255257 wfDebugLog( 'connect', __METHOD__.": Failed connecting to $i/$wiki\n" );
@@ -399,8 +401,13 @@
400402 /**
401403 * Get a connection by index
402404 * This is the main entry point for this class.
 405+ * @param int $i Database
 406+ * @param array $groups Query groups
 407+ * @param string $wiki Wiki ID
 408+ * @param int $attempts Max DB connect attempts
 409+ * @param int $flags
403410 */
404 - public function &getConnection( $i, $groups = array(), $wiki = false ) {
 411+ public function &getConnection( $i, $groups = array(), $wiki = false, $attempts = false, $flags = 0 ) {
405412 global $wgDBtype;
406413 wfProfileIn( __METHOD__ );
407414
@@ -432,7 +439,7 @@
433440
434441 # Operation-based index
435442 if ( $i == DB_SLAVE ) {
436 - $i = $this->getReaderIndex( false, $wiki );
 443+ $i = $this->getReaderIndex( false, $wiki, $attempts );
437444 } elseif ( $i == DB_LAST ) {
438445 # Just use $this->mLastIndex, which should already be set
439446 $i = $this->mLastIndex;
@@ -444,12 +451,15 @@
445452 }
446453 # Couldn't find a working server in getReaderIndex()?
447454 if ( $i === false ) {
 455+ if( $flags && self::GRACEFUL ) {
 456+ return false;
 457+ }
448458 $this->reportConnectionError( $this->mErrorConnection );
449459 }
450460
451461 # Now we have an explicit index into the servers array
452 - $conn = $this->openConnection( $i, $wiki );
453 - if ( !$conn ) {
 462+ $conn = $this->openConnection( $i, $wiki, $attempts );
 463+ if ( !$conn && !($flags & self::GRACEFUL) ) {
454464 $this->reportConnectionError( $this->mErrorConnection );
455465 }
456466
@@ -509,11 +519,12 @@
510520 *
511521 * @param integer $i Server index
512522 * @param string $wiki Wiki ID to open
 523+ * @param int $attempts Maximum connection attempts
513524 * @return Database
514525 *
515526 * @access private
516527 */
517 - function openConnection( $i, $wiki = false ) {
 528+ function openConnection( $i, $wiki = false, $attempts = false ) {
518529 wfProfileIn( __METHOD__ );
519530 if ( $wiki !== false ) {
520531 $conn = $this->openForeignConnection( $i, $wiki );
@@ -525,7 +536,7 @@
526537 } else {
527538 $server = $this->mServers[$i];
528539 $server['serverIndex'] = $i;
529 - $conn = $this->reallyOpenConnection( $server );
 540+ $conn = $this->reallyOpenConnection( $server, false, $attempts );
530541 if ( $conn->isOpen() ) {
531542 $this->mConns['local'][$i][0] = $conn;
532543 } else {
@@ -628,7 +639,7 @@
629640 * Returns a Database object whether or not the connection was successful.
630641 * @access private
631642 */
632 - function reallyOpenConnection( $server, $dbNameOverride = false ) {
 643+ function reallyOpenConnection( $server, $dbNameOverride = false, $attempts = false ) {
633644 if( !is_array( $server ) ) {
634645 throw new MWException( 'You must update your load-balancing configuration. See DefaultSettings.php entry for $wgDBservers.' );
635646 }
@@ -643,7 +654,7 @@
644655
645656 # Create object
646657 wfDebug( "Connecting to $host $dbname...\n" );
647 - $db = new $class( $host, $user, $password, $dbname, 1, $flags );
 658+ $db = new $class( $host, $user, $password, $dbname, 1, $flags, 'get from global', $attempts );
648659 if ( $db->isOpen() ) {
649660 wfDebug( "Connected\n" );
650661 } else {
Index: trunk/phase3/includes/Revision.php
@@ -719,20 +719,13 @@
720720 $flags = Revision::compressRevisionText( $data );
721721
722722 # Write to external storage if required
723 - if ( $wgDefaultExternalStore ) {
724 - if ( is_array( $wgDefaultExternalStore ) ) {
725 - // Distribute storage across multiple clusters
726 - $store = $wgDefaultExternalStore[mt_rand(0, count( $wgDefaultExternalStore ) - 1)];
727 - } else {
728 - $store = $wgDefaultExternalStore;
729 - }
 723+ if( $wgDefaultExternalStore ) {
730724 // Store and get the URL
731 - $data = ExternalStore::insert( $store, $data );
732 - if ( !$data ) {
733 - # This should only happen in the case of a configuration error, where the external store is not valid
734 - throw new MWException( "Unable to store text to external storage $store" );
 725+ $data = ExternalStore::randomInsert( $data );
 726+ if( !$data ) {
 727+ throw new MWException( "Unable to store text to external storage" );
735728 }
736 - if ( $flags ) {
 729+ if( $flags ) {
737730 $flags .= ',';
738731 }
739732 $flags .= 'external';
Index: trunk/phase3/includes/ExternalStore.php
@@ -14,7 +14,7 @@
1515 */
1616 class ExternalStore {
1717 /* Fetch data from given URL */
18 - static function fetchFromURL($url) {
 18+ static function fetchFromURL( $url ) {
1919 global $wgExternalStores;
2020
2121 if( !$wgExternalStores )
@@ -44,7 +44,7 @@
4545
4646 $class = 'ExternalStore' . ucfirst( $proto );
4747 /* Any custom modules should be added to $wgAutoLoadClasses for on-demand loading */
48 - if( !class_exists( $class ) ){
 48+ if( !class_exists( $class ) ) {
4949 return false;
5050 }
5151
@@ -66,4 +66,41 @@
6767 return $store->store( $params, $data );
6868 }
6969 }
 70+
 71+ /**
 72+ * Like insert() above, but does more of the work for us.
 73+ * This function does not need a url param, it builds it by
 74+ * itself. It also fails-over to the next possible clusters.
 75+ *
 76+ * @param string $data
 77+ * Returns the URL of the stored data item, or false on error
 78+ */
 79+ public static function randomInsert( $data ) {
 80+ global $wgDefaultExternalStore;
 81+ $tryStorages = (array)$wgDefaultExternalStore;
 82+ // Do not wait and do second retry per master if we
 83+ // have other active cluster masters to try instead.
 84+ $retry = count($tryStorages) > 1 ? false : true;
 85+ while ( count($tryStorages) > 0 ) {
 86+ $index = mt_rand(0, count( $tryStorages ) - 1);
 87+ $storeUrl = $tryStorages[$index];
 88+ list( $proto, $params ) = explode( '://', $storeUrl, 2 );
 89+ $store = self::getStoreObject( $proto );
 90+ if ( $store === false ) {
 91+ throw new MWException( "Invalid external storage protocol - $storeUrl" );
 92+ return false;
 93+ } else {
 94+ $url = $store->store( $params, $data, $retry ); // Try to save the object
 95+ if ( $url ) {
 96+ return $url; // Done!
 97+ } else {
 98+ unset( $tryStorages[$index] ); // Don't try this one again!
 99+ sort( $tryStorages ); // Must have consecutive keys
 100+ wfDebugLog( 'ExternalStorage', "Unable to store text to external storage $storeUrl" );
 101+ }
 102+ }
 103+ }
 104+ throw new MWException( "Unable to store text to external storage" );
 105+ return false; // All cluster masters dead :(
 106+ }
70107 }
Index: trunk/phase3/includes/ExternalStoreDB.php
@@ -34,13 +34,16 @@
3535 /** @todo Document.*/
3636 function &getSlave( $cluster ) {
3737 $lb =& $this->getLoadBalancer( $cluster );
38 - return $lb->getConnection( DB_SLAVE );
 38+ // Make only two connection attempts, since we still have the master to try
 39+ return $lb->getConnection( DB_SLAVE, array(), false, 2 );
3940 }
4041
4142 /** @todo Document.*/
42 - function &getMaster( $cluster ) {
 43+ function &getMaster( $cluster, $retry = true ) {
4344 $lb =& $this->getLoadBalancer( $cluster );
44 - return $lb->getConnection( DB_MASTER );
 45+ // Make only two connection attempts if there are other clusters to try
 46+ $attempts = $retry ? false : 2;
 47+ return $lb->getConnection( DB_MASTER, array(), false, $attempts, LoadBalancer::GRACEFUL );
4548 }
4649
4750 /** @todo Document.*/
@@ -56,7 +59,7 @@
5760 * Fetch data from given URL
5861 * @param string $url An url of the form DB://cluster/id or DB://cluster/id/itemid for concatened storage.
5962 */
60 - function fetchFromURL($url) {
 63+ function fetchFromURL( $url ) {
6164 $path = explode( '/', $url );
6265 $cluster = $path[2];
6366 $id = $path[3];
@@ -119,15 +122,17 @@
120123 *
121124 * @param $cluster String: the cluster name
122125 * @param $data String: the data item
 126+ * @param $retry bool: allows an extra DB connection retry after 1 second
123127 * @return string URL
124128 */
125 - function store( $cluster, $data ) {
126 - $fname = 'ExternalStoreDB::store';
127 -
128 - $dbw =& $this->getMaster( $cluster );
129 -
 129+ function store( $cluster, $data, $retry = true ) {
 130+ if( !$dbw = $this->getMaster( $cluster, $retry ) ) {
 131+ return false; // failed, maybe another cluster is up...
 132+ }
130133 $id = $dbw->nextSequenceValue( 'blob_blob_id_seq' );
131 - $dbw->insert( $this->getTable( $dbw ), array( 'blob_id' => $id, 'blob_text' => $data ), $fname );
 134+ $dbw->insert( $this->getTable( $dbw ),
 135+ array( 'blob_id' => $id, 'blob_text' => $data ),
 136+ __METHOD__ );
132137 $id = $dbw->insertId();
133138 if ( $dbw->getFlag( DBO_TRX ) ) {
134139 $dbw->immediateCommit();

Follow-up revisions

RevisionCommit summaryAuthorDate
r41086Revert/rewrite of r40696....tstarling06:42, 21 September 2008
r41234Revert some recent ES-related changes -- they made behavior much worse when w...brion18:09, 24 September 2008