Index: trunk/phase3/maintenance/dumpTextPass.php |
— | — | @@ -41,9 +41,7 @@ |
42 | 42 | var $prefetchCountLast = 0; |
43 | 43 | var $fetchCountLast = 0; |
44 | 44 | |
45 | | - var $failures = 0; |
46 | 45 | var $maxFailures = 5; |
47 | | - var $failedTextRetrievals = 0; |
48 | 46 | var $maxConsecutiveFailedTextRetrievals = 200; |
49 | 47 | var $failureTimeout = 5; // Seconds to sleep after db failure |
50 | 48 | |
— | — | @@ -71,6 +69,51 @@ |
72 | 70 | */ |
73 | 71 | protected $db; |
74 | 72 | |
| 73 | + |
| 74 | + /** |
| 75 | + * Drop the database connection $this->db and try to get a new one. |
| 76 | + * |
| 77 | + * This function tries to get a /different/ connection if this is |
| 78 | + * possible. Hence, (if this is possible) it switches to a different |
| 79 | + * failover upon each call. |
| 80 | + * |
| 81 | + * This function resets $this->lb and closes all connections on it. |
| 82 | + * |
| 83 | + * @throws MWException |
| 84 | + */ |
| 85 | + function rotateDb() { |
| 86 | + // Cleaning up old connections |
| 87 | + if ( isset( $this->lb ) ) { |
| 88 | + $this->lb->closeAll(); |
| 89 | + unset( $this->lb ); |
| 90 | + } |
| 91 | + assert( '! isset( $this->db ) || ! $this->db->isOpen() /* DB is either unset, or been closed via LB */' ); |
| 92 | + |
| 93 | + unset( $this->db ); |
| 94 | + |
| 95 | + // Trying to set up new connection. |
| 96 | + // We do /not/ retry upon failure, but delegate to encapsulating logic, to avoid |
| 97 | + // individually retrying at different layers of code. |
| 98 | + |
| 99 | + // 1. The LoadBalancer. |
| 100 | + try { |
| 101 | + $this->lb = wfGetLBFactory()->newMainLB(); |
| 102 | + } catch (Exception $e) { |
| 103 | + throw new MWException( __METHOD__ . " rotating DB failed to obtain new load balancer (" . $e->getMessage() . ")" ); |
| 104 | + } |
| 105 | + |
| 106 | + |
| 107 | + // 2. The Connection, through the load balancer. |
| 108 | + try { |
| 109 | + $this->db = $this->lb->getConnection( DB_SLAVE, 'backup' ); |
| 110 | + } catch (Exception $e) { |
| 111 | + throw new MWException( __METHOD__ . " rotating DB failed to obtain new database (" . $e->getMessage() . ")" ); |
| 112 | + } |
| 113 | + |
| 114 | + assert( 'isset( $this->lb ) && isset( $this->db ) && $this->db->isOpen() /* rotating the DB worked */' ); |
| 115 | + } |
| 116 | + |
| 117 | + |
75 | 118 | function initProgress( $history ) { |
76 | 119 | parent::initProgress(); |
77 | 120 | $this->timeOfCheckpoint = $this->startTime; |
— | — | @@ -87,7 +130,19 @@ |
88 | 131 | |
89 | 132 | $this->initProgress( $this->history ); |
90 | 133 | |
91 | | - $this->db = $this->backupDb(); |
| 134 | + // We are trying to get an initial database connection to avoid that the |
| 135 | + // first try of this request's first call to getText fails. However, if |
| 136 | + // obtaining a good DB connection fails it's not a serious issue, as |
| 137 | + // getText does retry upon failure and can start without having a working |
| 138 | + // DB connection. |
| 139 | + try { |
| 140 | + $this->rotateDb(); |
| 141 | + } catch (Exception $e) { |
| 142 | + // We do not even count this as failure. Just let eventual |
| 143 | + // watchdogs know. |
| 144 | + $this->progress( "Getting initial DB connection failed (" . |
| 145 | + $e->getMessage() . ")" ); |
| 146 | + } |
92 | 147 | |
93 | 148 | $this->egress = new ExportProgressFilter( $this->sink, $this ); |
94 | 149 | |
— | — | @@ -316,98 +371,146 @@ |
317 | 372 | return true; |
318 | 373 | } |
319 | 374 | |
| 375 | + /** |
| 376 | + * Tries to get the revision text for a revision id. |
| 377 | + * |
| 378 | + * Upon errors, retries (Up to $this->maxFailures tries each call). |
| 379 | + * If still no good revision get could be found even after this retrying, "" is returned. |
| 380 | + * If no good revision text could be returned for |
| 381 | + * $this->maxConsecutiveFailedTextRetrievals consecutive calls to getText, MWException |
| 382 | + * is thrown. |
| 383 | + * |
| 384 | + * @param $id string The revision id to get the text for |
| 385 | + * |
| 386 | + * @return string The revision text for $id, or "" |
| 387 | + * @throws MWException |
| 388 | + */ |
320 | 389 | function getText( $id ) { |
| 390 | + $prefetchNotTried = true; // Whether or not we already tried to get the text via prefetch. |
| 391 | + $text = false; // The candidate for a good text. false if no proper value. |
| 392 | + $failures = 0; // The number of times, this invocation of getText already failed. |
| 393 | + |
| 394 | + static $consecutiveFailedTextRetrievals = 0; // The number of times getText failed without |
| 395 | + // yielding a good text in between. |
| 396 | + |
321 | 397 | $this->fetchCount++; |
322 | | - if ( isset( $this->prefetch ) ) { |
323 | | - $text = $this->prefetch->prefetch( $this->thisPage, $this->thisRev ); |
324 | | - if ( $text !== null ) { // Entry missing from prefetch dump |
325 | | - $dbr = wfGetDB( DB_SLAVE ); |
| 398 | + |
| 399 | + // To allow to simply return on success and do not have to worry about book keeping, |
| 400 | + // we assume, this fetch works (possible after some retries). Nevertheless, we koop |
| 401 | + // the old value, so we can restore it, if problems occur (See after the while loop). |
| 402 | + $oldConsecutiveFailedTextRetrievals = $consecutiveFailedTextRetrievals; |
| 403 | + $consecutiveFailedTextRetrievals = 0; |
| 404 | + |
| 405 | + while ( $failures < $this->maxFailures ) { |
| 406 | + |
| 407 | + // As soon as we found a good text for the $id, we will return immediately. |
| 408 | + // Hence, if we make it past the try catch block, we know that we did not |
| 409 | + // find a good text. |
| 410 | + |
| 411 | + try { |
| 412 | + // Step 1: Get some text (or reuse from previous iteratuon if checking |
| 413 | + // for plausibility failed) |
| 414 | + |
| 415 | + // Trying to get prefetch, if it has not been tried before |
| 416 | + if ( $text === false && isset( $this->prefetch ) && $prefetchNotTried ) { |
| 417 | + $prefetchNotTried = false; |
| 418 | + $tryIsPrefetch = true; |
| 419 | + $text = $this->prefetch->prefetch( $this->thisPage, $this->thisRev ); |
| 420 | + if ( $text === null ) { |
| 421 | + $text = false; |
| 422 | + } |
| 423 | + } |
| 424 | + |
| 425 | + if ( $text === false ) { |
| 426 | + // Fallback to asking the database |
| 427 | + $tryIsPrefetch = false; |
| 428 | + if ( $this->spawn ) { |
| 429 | + $text = $this->getTextSpawned( $id ); |
| 430 | + } else { |
| 431 | + $text = $this->getTextDb( $id ); |
| 432 | + } |
| 433 | + } |
| 434 | + |
| 435 | + if ( $text === false ) { |
| 436 | + throw new MWException( "Generic error while obtaining text for id " . $id ); |
| 437 | + } |
| 438 | + |
| 439 | + assert( '$text !== false' ); |
| 440 | + // We received a good candidate for the text of $id via some method |
| 441 | + |
| 442 | + // Step 2: Checking for plausibility and return the text if it is |
| 443 | + // plausible |
326 | 444 | $revID = intval( $this->thisRev ); |
327 | | - $revLength = $dbr->selectField( 'revision', 'rev_len', array( 'rev_id' => $revID ) ); |
328 | | - // if length of rev text in file doesn't match length in db, we reload |
329 | | - // this avoids carrying forward broken data from previous xml dumps |
| 445 | + if ( ! isset( $this->db ) ) { |
| 446 | + throw new MWException( "No database available" ); |
| 447 | + } |
| 448 | + $revLength = $this->db->selectField( 'revision', 'rev_len', array( 'rev_id' => $revID ) ); |
330 | 449 | if( strlen( $text ) == $revLength ) { |
331 | | - $this->prefetchCount++; |
| 450 | + if ( $tryIsPrefetch ) { |
| 451 | + $this->prefetchCount++; |
| 452 | + } |
332 | 453 | return $text; |
333 | 454 | } |
| 455 | + |
| 456 | + assert( 'strlen( $text ) != $revLength /* Obtained text unplausible */' ); |
| 457 | + $text = false; |
| 458 | + throw new MWException( "Received text is unplausible for id " . $id ); |
| 459 | + |
| 460 | + assert( 'false /* text is either returned or exception has been thrown */' ); |
| 461 | + |
| 462 | + } catch (Exception $e) { |
| 463 | + $msg = "getting/checking text " . $id . " failed (".$e->getMessage().")"; |
| 464 | + if ( $failures + 1 < $this->maxFailures ) { |
| 465 | + $msg .= " (Will retry " . ( $this->maxFailures - $failures - 1) . " more times)"; |
| 466 | + } |
| 467 | + $this->progress( $msg ); |
334 | 468 | } |
335 | | - } |
336 | | - return $this->doGetText( $id ); |
337 | | - } |
| 469 | + |
| 470 | + // Something went wrong; we did not a text that was plausible :( |
| 471 | + $failures++; |
338 | 472 | |
339 | | - private function doGetText( $id ) { |
340 | | - $id = intval( $id ); |
341 | | - $this->failures = 0; |
342 | | - $ex = new MWException( "Graceful storage failure" ); |
343 | | - while (true) { |
344 | | - if ( $this->spawn ) { |
345 | | - if ($this->failures) { |
346 | | - // we don't know why it failed, could be the child process |
347 | | - // borked, could be db entry busted, could be db server out to lunch, |
348 | | - // so cover all bases |
| 473 | + |
| 474 | + // After backing off for some time, we try to reboot the whole process as |
| 475 | + // much as possible to not carry over failures from one part to the other |
| 476 | + // parts |
| 477 | + sleep( $this->failureTimeout ); |
| 478 | + try { |
| 479 | + $this->rotateDb(); |
| 480 | + if ( $this->spawn ) { |
349 | 481 | $this->closeSpawn(); |
350 | 482 | $this->openSpawn(); |
351 | 483 | } |
352 | | - $text = $this->getTextSpawned( $id ); |
353 | | - } else { |
354 | | - $text = $this->getTextDbSafe( $id ); |
| 484 | + } catch (Exception $e) { |
| 485 | + $this->progress( "Rebooting getText infrastructure failed (".$e->getMessage().")" . |
| 486 | + " Trying to continue anyways" ); |
355 | 487 | } |
356 | | - if ( $text === false ) { |
357 | | - $this->failures++; |
358 | | - if ( $this->failures > $this->maxFailures) { |
359 | | - $this->progress( "Failed to retrieve revision text for text id ". |
360 | | - "$id after $this->maxFailures tries, giving up" ); |
361 | | - // were there so many bad retrievals in a row we want to bail? |
362 | | - // at some point we have to declare the dump irretrievably broken |
363 | | - $this->failedTextRetrievals++; |
364 | | - if ($this->failedTextRetrievals > $this->maxConsecutiveFailedTextRetrievals) { |
365 | | - throw $ex; |
366 | | - } else { |
367 | | - // would be nice to return something better to the caller someday, |
368 | | - // log what we know about the failure and about the revision |
369 | | - return ""; |
370 | | - } |
371 | | - } else { |
372 | | - $this->progress( "Error $this->failures " . |
373 | | - "of allowed $this->maxFailures retrieving revision text for text id $id! " . |
374 | | - "Pausing $this->failureTimeout seconds before retry..." ); |
375 | | - sleep( $this->failureTimeout ); |
376 | | - } |
377 | | - } else { |
378 | | - $this->failedTextRetrievals= 0; |
379 | | - return $text; |
380 | | - } |
381 | 488 | } |
382 | | - return ''; |
383 | | - } |
384 | 489 | |
385 | | - /** |
386 | | - * Fetch a text revision from the database, retrying in case of failure. |
387 | | - * This may survive some transitory errors by reconnecting, but |
388 | | - * may not survive a long-term server outage. |
389 | | - * |
390 | | - * FIXME: WTF? Why is it using a loop and then returning unconditionally? |
391 | | - * @param $id int |
392 | | - * @return bool|string |
393 | | - */ |
394 | | - private function getTextDbSafe( $id ) { |
395 | | - while ( true ) { |
396 | | - try { |
397 | | - $text = $this->getTextDb( $id ); |
398 | | - } catch ( DBQueryError $ex ) { |
399 | | - $text = false; |
400 | | - } |
401 | | - return $text; |
| 490 | + // Retirieving a good text for $id failed (at least) maxFailures times. |
| 491 | + // We abort for this $id. |
| 492 | + |
| 493 | + // Restoring the consecutive failures, and maybe aborting, if the dump |
| 494 | + // is too broken. |
| 495 | + $consecutiveFailedTextRetrievals = $oldConsecutiveFailedTextRetrievals + 1; |
| 496 | + if ( $consecutiveFailedTextRetrievals > $this->maxConsecutiveFailedTextRetrievals ) { |
| 497 | + throw new MWException( "Graceful storage failure" ); |
402 | 498 | } |
| 499 | + |
| 500 | + return ""; |
403 | 501 | } |
404 | 502 | |
| 503 | + |
405 | 504 | /** |
406 | 505 | * May throw a database error if, say, the server dies during query. |
407 | 506 | * @param $id |
408 | 507 | * @return bool|string |
| 508 | + * @throws MWException |
409 | 509 | */ |
410 | 510 | private function getTextDb( $id ) { |
411 | 511 | global $wgContLang; |
| 512 | + if ( ! isset( $this->db ) ) { |
| 513 | + throw new MWException( __METHOD__ . "No database available" ); |
| 514 | + } |
412 | 515 | $row = $this->db->selectRow( 'text', |
413 | 516 | array( 'old_text', 'old_flags' ), |
414 | 517 | array( 'old_id' => $id ), |