r114252 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r114251‎ | r114252 | r114253 >
Date:11:11, 20 March 2012
Author:qchris
Status:reverted (Comments)
Tags:gerritmigration 
Comment:
Refactoring dumpTextPass's error handling
Modified paths:
  • /trunk/phase3/maintenance/dumpTextPass.php (modified) (history)

Diff [purge]

Index: trunk/phase3/maintenance/dumpTextPass.php
@@ -41,9 +41,7 @@
4242 var $prefetchCountLast = 0;
4343 var $fetchCountLast = 0;
4444
45 - var $failures = 0;
4645 var $maxFailures = 5;
47 - var $failedTextRetrievals = 0;
4846 var $maxConsecutiveFailedTextRetrievals = 200;
4947 var $failureTimeout = 5; // Seconds to sleep after db failure
5048
@@ -71,6 +69,51 @@
7270 */
7371 protected $db;
7472
 73+
 74+ /**
 75+ * Drop the database connection $this->db and try to get a new one.
 76+ *
 77+ * This function tries to get a /different/ connection if this is
 78+ * possible. Hence, (if this is possible) it switches to a different
 79+ * failover upon each call.
 80+ *
 81+ * This function resets $this->lb and closes all connections on it.
 82+ *
 83+ * @throws MWException
 84+ */
 85+ function rotateDb() {
 86+ // Cleaning up old connections
 87+ if ( isset( $this->lb ) ) {
 88+ $this->lb->closeAll();
 89+ unset( $this->lb );
 90+ }
 91+ assert( '! isset( $this->db ) || ! $this->db->isOpen() /* DB is either unset, or been closed via LB */' );
 92+
 93+ unset( $this->db );
 94+
 95+ // Trying to set up new connection.
 96+ // We do /not/ retry upon failure, but delegate to encapsulating logic, to avoid
 97+ // individually retrying at different layers of code.
 98+
 99+ // 1. The LoadBalancer.
 100+ try {
 101+ $this->lb = wfGetLBFactory()->newMainLB();
 102+ } catch (Exception $e) {
 103+ throw new MWException( __METHOD__ . " rotating DB failed to obtain new load balancer (" . $e->getMessage() . ")" );
 104+ }
 105+
 106+
 107+ // 2. The Connection, through the load balancer.
 108+ try {
 109+ $this->db = $this->lb->getConnection( DB_SLAVE, 'backup' );
 110+ } catch (Exception $e) {
 111+ throw new MWException( __METHOD__ . " rotating DB failed to obtain new database (" . $e->getMessage() . ")" );
 112+ }
 113+
 114+ assert( 'isset( $this->lb ) && isset( $this->db ) && $this->db->isOpen() /* rotating the DB worked */' );
 115+ }
 116+
 117+
75118 function initProgress( $history ) {
76119 parent::initProgress();
77120 $this->timeOfCheckpoint = $this->startTime;
@@ -87,7 +130,19 @@
88131
89132 $this->initProgress( $this->history );
90133
91 - $this->db = $this->backupDb();
 134+ // We are trying to get an initial database connection to avoid that the
 135+ // first try of this request's first call to getText fails. However, if
 136+ // obtaining a good DB connection fails it's not a serious issue, as
 137+ // getText does retry upon failure and can start without having a working
 138+ // DB connection.
 139+ try {
 140+ $this->rotateDb();
 141+ } catch (Exception $e) {
 142+ // We do not even count this as failure. Just let eventual
 143+ // watchdogs know.
 144+ $this->progress( "Getting initial DB connection failed (" .
 145+ $e->getMessage() . ")" );
 146+ }
92147
93148 $this->egress = new ExportProgressFilter( $this->sink, $this );
94149
@@ -316,98 +371,146 @@
317372 return true;
318373 }
319374
 375+ /**
 376+ * Tries to get the revision text for a revision id.
 377+ *
 378+ * Upon errors, retries (Up to $this->maxFailures tries each call).
 379+ * If still no good revision get could be found even after this retrying, "" is returned.
 380+ * If no good revision text could be returned for
 381+ * $this->maxConsecutiveFailedTextRetrievals consecutive calls to getText, MWException
 382+ * is thrown.
 383+ *
 384+ * @param $id string The revision id to get the text for
 385+ *
 386+ * @return string The revision text for $id, or ""
 387+ * @throws MWException
 388+ */
320389 function getText( $id ) {
 390+ $prefetchNotTried = true; // Whether or not we already tried to get the text via prefetch.
 391+ $text = false; // The candidate for a good text. false if no proper value.
 392+ $failures = 0; // The number of times, this invocation of getText already failed.
 393+
 394+ static $consecutiveFailedTextRetrievals = 0; // The number of times getText failed without
 395+ // yielding a good text in between.
 396+
321397 $this->fetchCount++;
322 - if ( isset( $this->prefetch ) ) {
323 - $text = $this->prefetch->prefetch( $this->thisPage, $this->thisRev );
324 - if ( $text !== null ) { // Entry missing from prefetch dump
325 - $dbr = wfGetDB( DB_SLAVE );
 398+
 399+ // To allow to simply return on success and do not have to worry about book keeping,
 400+ // we assume, this fetch works (possible after some retries). Nevertheless, we koop
 401+ // the old value, so we can restore it, if problems occur (See after the while loop).
 402+ $oldConsecutiveFailedTextRetrievals = $consecutiveFailedTextRetrievals;
 403+ $consecutiveFailedTextRetrievals = 0;
 404+
 405+ while ( $failures < $this->maxFailures ) {
 406+
 407+ // As soon as we found a good text for the $id, we will return immediately.
 408+ // Hence, if we make it past the try catch block, we know that we did not
 409+ // find a good text.
 410+
 411+ try {
 412+ // Step 1: Get some text (or reuse from previous iteratuon if checking
 413+ // for plausibility failed)
 414+
 415+ // Trying to get prefetch, if it has not been tried before
 416+ if ( $text === false && isset( $this->prefetch ) && $prefetchNotTried ) {
 417+ $prefetchNotTried = false;
 418+ $tryIsPrefetch = true;
 419+ $text = $this->prefetch->prefetch( $this->thisPage, $this->thisRev );
 420+ if ( $text === null ) {
 421+ $text = false;
 422+ }
 423+ }
 424+
 425+ if ( $text === false ) {
 426+ // Fallback to asking the database
 427+ $tryIsPrefetch = false;
 428+ if ( $this->spawn ) {
 429+ $text = $this->getTextSpawned( $id );
 430+ } else {
 431+ $text = $this->getTextDb( $id );
 432+ }
 433+ }
 434+
 435+ if ( $text === false ) {
 436+ throw new MWException( "Generic error while obtaining text for id " . $id );
 437+ }
 438+
 439+ assert( '$text !== false' );
 440+ // We received a good candidate for the text of $id via some method
 441+
 442+ // Step 2: Checking for plausibility and return the text if it is
 443+ // plausible
326444 $revID = intval( $this->thisRev );
327 - $revLength = $dbr->selectField( 'revision', 'rev_len', array( 'rev_id' => $revID ) );
328 - // if length of rev text in file doesn't match length in db, we reload
329 - // this avoids carrying forward broken data from previous xml dumps
 445+ if ( ! isset( $this->db ) ) {
 446+ throw new MWException( "No database available" );
 447+ }
 448+ $revLength = $this->db->selectField( 'revision', 'rev_len', array( 'rev_id' => $revID ) );
330449 if( strlen( $text ) == $revLength ) {
331 - $this->prefetchCount++;
 450+ if ( $tryIsPrefetch ) {
 451+ $this->prefetchCount++;
 452+ }
332453 return $text;
333454 }
 455+
 456+ assert( 'strlen( $text ) != $revLength /* Obtained text unplausible */' );
 457+ $text = false;
 458+ throw new MWException( "Received text is unplausible for id " . $id );
 459+
 460+ assert( 'false /* text is either returned or exception has been thrown */' );
 461+
 462+ } catch (Exception $e) {
 463+ $msg = "getting/checking text " . $id . " failed (".$e->getMessage().")";
 464+ if ( $failures + 1 < $this->maxFailures ) {
 465+ $msg .= " (Will retry " . ( $this->maxFailures - $failures - 1) . " more times)";
 466+ }
 467+ $this->progress( $msg );
334468 }
335 - }
336 - return $this->doGetText( $id );
337 - }
 469+
 470+ // Something went wrong; we did not a text that was plausible :(
 471+ $failures++;
338472
339 - private function doGetText( $id ) {
340 - $id = intval( $id );
341 - $this->failures = 0;
342 - $ex = new MWException( "Graceful storage failure" );
343 - while (true) {
344 - if ( $this->spawn ) {
345 - if ($this->failures) {
346 - // we don't know why it failed, could be the child process
347 - // borked, could be db entry busted, could be db server out to lunch,
348 - // so cover all bases
 473+
 474+ // After backing off for some time, we try to reboot the whole process as
 475+ // much as possible to not carry over failures from one part to the other
 476+ // parts
 477+ sleep( $this->failureTimeout );
 478+ try {
 479+ $this->rotateDb();
 480+ if ( $this->spawn ) {
349481 $this->closeSpawn();
350482 $this->openSpawn();
351483 }
352 - $text = $this->getTextSpawned( $id );
353 - } else {
354 - $text = $this->getTextDbSafe( $id );
 484+ } catch (Exception $e) {
 485+ $this->progress( "Rebooting getText infrastructure failed (".$e->getMessage().")" .
 486+ " Trying to continue anyways" );
355487 }
356 - if ( $text === false ) {
357 - $this->failures++;
358 - if ( $this->failures > $this->maxFailures) {
359 - $this->progress( "Failed to retrieve revision text for text id ".
360 - "$id after $this->maxFailures tries, giving up" );
361 - // were there so many bad retrievals in a row we want to bail?
362 - // at some point we have to declare the dump irretrievably broken
363 - $this->failedTextRetrievals++;
364 - if ($this->failedTextRetrievals > $this->maxConsecutiveFailedTextRetrievals) {
365 - throw $ex;
366 - } else {
367 - // would be nice to return something better to the caller someday,
368 - // log what we know about the failure and about the revision
369 - return "";
370 - }
371 - } else {
372 - $this->progress( "Error $this->failures " .
373 - "of allowed $this->maxFailures retrieving revision text for text id $id! " .
374 - "Pausing $this->failureTimeout seconds before retry..." );
375 - sleep( $this->failureTimeout );
376 - }
377 - } else {
378 - $this->failedTextRetrievals= 0;
379 - return $text;
380 - }
381488 }
382 - return '';
383 - }
384489
385 - /**
386 - * Fetch a text revision from the database, retrying in case of failure.
387 - * This may survive some transitory errors by reconnecting, but
388 - * may not survive a long-term server outage.
389 - *
390 - * FIXME: WTF? Why is it using a loop and then returning unconditionally?
391 - * @param $id int
392 - * @return bool|string
393 - */
394 - private function getTextDbSafe( $id ) {
395 - while ( true ) {
396 - try {
397 - $text = $this->getTextDb( $id );
398 - } catch ( DBQueryError $ex ) {
399 - $text = false;
400 - }
401 - return $text;
 490+ // Retirieving a good text for $id failed (at least) maxFailures times.
 491+ // We abort for this $id.
 492+
 493+ // Restoring the consecutive failures, and maybe aborting, if the dump
 494+ // is too broken.
 495+ $consecutiveFailedTextRetrievals = $oldConsecutiveFailedTextRetrievals + 1;
 496+ if ( $consecutiveFailedTextRetrievals > $this->maxConsecutiveFailedTextRetrievals ) {
 497+ throw new MWException( "Graceful storage failure" );
402498 }
 499+
 500+ return "";
403501 }
404502
 503+
405504 /**
406505 * May throw a database error if, say, the server dies during query.
407506 * @param $id
408507 * @return bool|string
 508+ * @throws MWException
409509 */
410510 private function getTextDb( $id ) {
411511 global $wgContLang;
 512+ if ( ! isset( $this->db ) ) {
 513+ throw new MWException( __METHOD__ . "No database available" );
 514+ }
412515 $row = $this->db->selectRow( 'text',
413516 array( 'old_text', 'old_flags' ),
414517 array( 'old_id' => $id ),

Follow-up revisions

RevisionCommit summaryAuthorDate
r114256Follow up to r114252: Removing assertsqchris11:53, 20 March 2012
r114335Revert r107309, r113601, r113704, r113742, r113792, r113838, r113859, r113893......catrope00:16, 21 March 2012

Comments

#Comment by MaxSem (talk | contribs)   11:28, 20 March 2012

Aaaaaaaaaa... ssert! :P

I thought we convinced you?

#Comment by QChris (WMF) (talk | contribs)   11:39, 20 March 2012

Sorry. Yes. This patch was a few days old, before I got ... 'convinced' ;) I will provide a fix today.

#Comment by QChris (WMF) (talk | contribs)   12:04, 20 March 2012

See r114256, and r114257.

I transformed one assert into an if-guard, but removed the other asserts, as they were a double safety net for future updates.

Status & tagging log