Index: trunk/phase3/maintenance/fetchText.php |
— | — | @@ -28,7 +28,17 @@ |
29 | 29 | $this->mDescription = "Fetch the revision text from an old_id"; |
30 | 30 | } |
31 | 31 | |
32 | | - public function execute() { |
| 32 | + /* |
| 33 | + * returns a string containing the following in order: |
| 34 | + * textid |
| 35 | + * \n |
| 36 | + * length of text (-1 on error = failure to retrieve/unserialize/gunzip/etc) |
| 37 | + * \n |
| 38 | + * text (may be empty) |
| 39 | + * |
| 40 | + * note that that the text string itself is *not* followed by newline |
| 41 | + */ |
| 42 | + public function execute() { |
33 | 43 | $db = wfGetDB( DB_SLAVE ); |
34 | 44 | $stdin = $this->getStdin(); |
35 | 45 | while ( !feof( $stdin ) ) { |
— | — | @@ -39,7 +49,14 @@ |
40 | 50 | } |
41 | 51 | $textId = intval( $line ); |
42 | 52 | $text = $this->doGetText( $db, $textId ); |
43 | | - $this->output( $textId . "\n" . strlen( $text ) . "\n" . $text ); |
| 53 | + if ($text === false) { |
| 54 | + # actual error, not zero-length text |
| 55 | + $textLen = "-1"; |
| 56 | + } |
| 57 | + else { |
| 58 | + $textLen = strlen($text); |
| 59 | + } |
| 60 | + $this->output( $textId . "\n" . $textLen . "\n" . $text ); |
44 | 61 | } |
45 | 62 | } |
46 | 63 | |
Index: trunk/phase3/maintenance/dumpTextPass.php |
— | — | @@ -38,7 +38,9 @@ |
39 | 39 | var $prefetchCount = 0; |
40 | 40 | |
41 | 41 | var $failures = 0; |
42 | | - var $maxFailures = 200; |
| 42 | + var $maxFailures = 5; |
| 43 | + var $failedTextRetrievals = 0; |
| 44 | + var $maxConsecutiveFailedTextRetrievals = 200; |
43 | 45 | var $failureTimeout = 5; // Seconds to sleep after db failure |
44 | 46 | |
45 | 47 | var $php = "php"; |
— | — | @@ -203,11 +205,50 @@ |
204 | 206 | } |
205 | 207 | |
206 | 208 | private function doGetText( $id ) { |
207 | | - if ( $this->spawn ) { |
208 | | - return $this->getTextSpawned( $id ); |
209 | | - } else { |
210 | | - return $this->getTextDbSafe( $id ); |
| 209 | + |
| 210 | + $this->failures = 0; |
| 211 | + $ex = new MWException( "Graceful storage failure" ); |
| 212 | + while (true) { |
| 213 | + if ( $this->spawn ) { |
| 214 | + if ($this->failures) { |
| 215 | + // we don't know why it failed, could be the child process |
| 216 | + // borked, could be db entry busted, could be db server out to lunch, |
| 217 | + // so cover all bases |
| 218 | + $this->closeSpawn(); |
| 219 | + $this->openSpawn(); |
| 220 | + } |
| 221 | + $text = $this->getTextSpawned( $id ); |
| 222 | + } else { |
| 223 | + $text = $this->getTextDbSafe( $id ); |
| 224 | + } |
| 225 | + if ( $text === false ) { |
| 226 | + $this->failures++; |
| 227 | + if ( $this->failures > $this->maxFailures) { |
| 228 | + $this->progress( "Failed to retrieve revision text for text id ". |
| 229 | + "$id after $this->maxFailures tries, giving up" ); |
| 230 | + // were there so many bad retrievals in a row we want to bail? |
| 231 | + // at some point we have to declare the dump irretrievably broken |
| 232 | + $this->failedTextRetrievals++; |
| 233 | + if ($this->failedTextRetrievals > $this->maxConsecutiveFailedTextRetrievals) { |
| 234 | + throw $ex; |
| 235 | + } |
| 236 | + else { |
| 237 | + // would be nice to return something better to the caller someday, |
| 238 | + // log what we know about the failure and about the revision |
| 239 | + return(""); |
| 240 | + } |
| 241 | + } else { |
| 242 | + $this->progress( "Error $this->failures " . |
| 243 | + "of allowed $this->maxFailures retrieving revision text for text id $id! " . |
| 244 | + "Pausing $this->failureTimeout seconds before retry..." ); |
| 245 | + sleep( $this->failureTimeout ); |
| 246 | + } |
| 247 | + } else { |
| 248 | + $this->failedTextRetrievals= 0; |
| 249 | + return( $text ); |
| 250 | + } |
211 | 251 | } |
| 252 | + |
212 | 253 | } |
213 | 254 | |
214 | 255 | /** |
— | — | @@ -223,19 +264,7 @@ |
224 | 265 | } catch ( DBQueryError $ex ) { |
225 | 266 | $text = false; |
226 | 267 | } |
227 | | - if ( $text === false ) { |
228 | | - $this->failures++; |
229 | | - if ( $this->failures > $this->maxFailures ) { |
230 | | - throw $ex; |
231 | | - } else { |
232 | | - $this->progress( "Database failure $this->failures " . |
233 | | - "of allowed $this->maxFailures for revision $id! " . |
234 | | - "Pausing $this->failureTimeout seconds..." ); |
235 | | - sleep( $this->failureTimeout ); |
236 | | - } |
237 | | - } else { |
238 | | - return $text; |
239 | | - } |
| 268 | + return $text; |
240 | 269 | } |
241 | 270 | } |
242 | 271 | |
— | — | @@ -264,21 +293,9 @@ |
265 | 294 | // First time? |
266 | 295 | $this->openSpawn(); |
267 | 296 | } |
268 | | - while ( true ) { |
269 | | - |
270 | | - $text = $this->getTextSpawnedOnce( $id ); |
271 | | - if ( !is_string( $text ) ) { |
272 | | - $this->progress( "Database subprocess failed. Respawning..." ); |
273 | | - |
274 | | - $this->closeSpawn(); |
275 | | - sleep( $this->failureTimeout ); |
276 | | - $this->openSpawn(); |
277 | | - |
278 | | - continue; |
279 | | - } |
280 | | - wfRestoreWarnings(); |
281 | | - return $text; |
282 | | - } |
| 297 | + $text = $this->getTextSpawnedOnce( $id ); |
| 298 | + wfRestoreWarnings(); |
| 299 | + return $text; |
283 | 300 | } |
284 | 301 | |
285 | 302 | function openSpawn() { |
— | — | @@ -354,6 +371,9 @@ |
355 | 372 | if ( $len === false ) return false; |
356 | 373 | |
357 | 374 | $nbytes = intval( $len ); |
| 375 | + // actual error, not zero-length text |
| 376 | + if ($nbytes < 0 ) return false; |
| 377 | + |
358 | 378 | $text = ""; |
359 | 379 | |
360 | 380 | // Subprocess may not send everything at once, we have to loop. |