Index: branches/REL1_18/phase3/maintenance/backup.inc |
— | — | @@ -51,6 +51,10 @@ |
52 | 52 | var $stubText = false; // include rev_text_id instead of text; for 2-pass dump |
53 | 53 | var $dumpUploads = false; |
54 | 54 | var $dumpUploadFileContents = false; |
| 55 | + var $lastTime = 0; |
| 56 | + var $pageCountLast = 0; |
| 57 | + var $revCountLast = 0; |
| 58 | + var $ID = 0; |
55 | 59 | |
56 | 60 | function BackupDumper( $args ) { |
57 | 61 | $this->stderr = fopen( "php://stderr", "wt" ); |
— | — | @@ -233,6 +237,8 @@ |
234 | 238 | $dbr = wfGetDB( DB_SLAVE ); |
235 | 239 | $this->maxCount = $dbr->selectField( $table, "MAX($field)", '', __METHOD__ ); |
236 | 240 | $this->startTime = wfTime(); |
| 241 | + $this->lastTime = $this->startTime; |
| 242 | + $this->ID = getmypid(); |
237 | 243 | } |
238 | 244 | |
239 | 245 | /** |
— | — | @@ -281,21 +287,35 @@ |
282 | 288 | |
283 | 289 | function showReport() { |
284 | 290 | if ( $this->reporting ) { |
285 | | - $delta = wfTime() - $this->startTime; |
286 | 291 | $now = wfTimestamp( TS_DB ); |
287 | | - if ( $delta ) { |
288 | | - $rate = $this->pageCount / $delta; |
289 | | - $revrate = $this->revCount / $delta; |
| 292 | + $nowts = wfTime(); |
| 293 | + $deltaAll = wfTime() - $this->startTime; |
| 294 | + $deltaPart = wfTime() - $this->lastTime; |
| 295 | + $this->pageCountPart = $this->pageCount - $this->pageCountLast; |
| 296 | + $this->revCountPart = $this->revCount - $this->revCountLast; |
| 297 | + |
| 298 | + if ( $deltaAll ) { |
290 | 299 | $portion = $this->revCount / $this->maxCount; |
291 | | - $eta = $this->startTime + $delta / $portion; |
| 300 | + $eta = $this->startTime + $deltaAll / $portion; |
292 | 301 | $etats = wfTimestamp( TS_DB, intval( $eta ) ); |
| 302 | + $pageRate = $this->pageCount / $deltaAll; |
| 303 | + $revRate = $this->revCount / $deltaAll; |
293 | 304 | } else { |
294 | | - $rate = '-'; |
295 | | - $revrate = '-'; |
| 305 | + $pageRate = '-'; |
| 306 | + $revRate = '-'; |
296 | 307 | $etats = '-'; |
297 | 308 | } |
298 | | - $this->progress( sprintf( "%s: %s %d pages (%0.3f/sec), %d revs (%0.3f/sec), ETA %s [max %d]", |
299 | | - $now, wfWikiID(), $this->pageCount, $rate, $this->revCount, $revrate, $etats, $this->maxCount ) ); |
| 309 | + if ( $deltaPart ) { |
| 310 | + $pageRatePart = $this->pageCountPart / $deltaPart; |
| 311 | + $revRatePart = $this->revCountPart / $deltaPart; |
| 312 | + } else { |
| 313 | + $pageRatePart = '-'; |
| 314 | + $revRatePart = '-'; |
| 315 | + } |
| 316 | + $this->progress( sprintf( "%s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), %d revs (%0.1f|%0.1f/sec all|curr), ETA %s [max %d]", |
| 317 | + $now, wfWikiID(), $this->ID, $this->pageCount, $pageRate, $pageRatePart, $this->revCount, $revRate, $revRatePart, $etats, $this->maxCount ) ); |
| 318 | + $this->lastTime = $nowts; |
| 319 | + $this->revCountLast = $this->revCount; |
300 | 320 | } |
301 | 321 | } |
302 | 322 | |
Index: branches/REL1_18/phase3/maintenance/dumpTextPass.php |
— | — | @@ -2,7 +2,7 @@ |
3 | 3 | /** |
4 | 4 | * Script that postprocesses XML dumps from dumpBackup.php to add page text |
5 | 5 | * |
6 | | - * Copyright © 2005 Brion Vibber <brion@pobox.com>, 2010 Alexandre Emsenhuber |
| 6 | + * Copyright � 2005 Brion Vibber <brion@pobox.com>, 2010 Alexandre Emsenhuber |
7 | 7 | * http://www.mediawiki.org/ |
8 | 8 | * |
9 | 9 | * This program is free software; you can redistribute it and/or modify |
— | — | @@ -35,11 +35,9 @@ |
36 | 36 | class TextPassDumper extends BackupDumper { |
37 | 37 | var $prefetch = null; |
38 | 38 | var $input = "php://stdin"; |
| 39 | + var $history = WikiExporter::FULL; |
39 | 40 | var $fetchCount = 0; |
40 | 41 | var $prefetchCount = 0; |
41 | | - var $lastTime = 0; |
42 | | - var $pageCountLast = 0; |
43 | | - var $revCountLast = 0; |
44 | 42 | var $prefetchCountLast = 0; |
45 | 43 | var $fetchCountLast = 0; |
46 | 44 | |
— | — | @@ -56,12 +54,21 @@ |
57 | 55 | var $spawnRead = false; |
58 | 56 | var $spawnErr = false; |
59 | 57 | |
60 | | - var $ID = 0; |
| 58 | + var $xmlwriterobj = false; |
61 | 59 | |
| 60 | + # when we spend more than maxTimeAllowed seconds on this run, we continue |
| 61 | + # processing until we write out the next complete page, then save output file(s), |
| 62 | + # rename it/them and open new one(s) |
| 63 | + var $maxTimeAllowed = 0; // 0 = no limit |
| 64 | + var $timeExceeded = false; |
| 65 | + var $firstPageWritten = false; |
| 66 | + var $lastPageWritten = false; |
| 67 | + var $checkpointJustWritten = false; |
| 68 | + var $checkpointFiles = array(); |
| 69 | + |
62 | 70 | function initProgress( $history ) { |
63 | 71 | parent::initProgress(); |
64 | | - $this->ID = getmypid(); |
65 | | - $this->lastTime = $this->startTime; |
| 72 | + $this->timeOfCheckpoint = $this->startTime; |
66 | 73 | } |
67 | 74 | |
68 | 75 | function dump( $history, $text = WikiExporter::TEXT ) { |
— | — | @@ -73,12 +80,25 @@ |
74 | 81 | if ( ini_get( 'display_errors' ) ) |
75 | 82 | ini_set( 'display_errors', 'stderr' ); |
76 | 83 | |
77 | | - $this->initProgress( $history ); |
| 84 | + $this->initProgress( $this->history ); |
78 | 85 | |
79 | 86 | $this->db = $this->backupDb(); |
80 | 87 | |
81 | | - $this->readDump(); |
| 88 | + $this->egress = new ExportProgressFilter( $this->sink, $this ); |
82 | 89 | |
| 90 | + # it would be nice to do it in the constructor, oh well. need egress set |
| 91 | + $this->finalOptionCheck(); |
| 92 | + |
| 93 | + # we only want this so we know how to close a stream :-P |
| 94 | + $this->xmlwriterobj = new XmlDumpWriter(); |
| 95 | + |
| 96 | + $input = fopen( $this->input, "rt" ); |
| 97 | + $result = $this->readDump( $input ); |
| 98 | + |
| 99 | + if ( WikiError::isError( $result ) ) { |
| 100 | + throw new MWException( $result->getMessage() ); |
| 101 | + } |
| 102 | + |
83 | 103 | if ( $this->spawnProc ) { |
84 | 104 | $this->closeSpawn(); |
85 | 105 | } |
— | — | @@ -98,6 +118,18 @@ |
99 | 119 | case 'stub': |
100 | 120 | $this->input = $url; |
101 | 121 | break; |
| 122 | + case 'maxtime': |
| 123 | + $this->maxTimeAllowed = intval($val)*60; |
| 124 | + break; |
| 125 | + case 'checkpointfile': |
| 126 | + $this->checkpointFiles[] = $val; |
| 127 | + break; |
| 128 | + case 'current': |
| 129 | + $this->history = WikiExporter::CURRENT; |
| 130 | + break; |
| 131 | + case 'full': |
| 132 | + $this->history = WikiExporter::FULL; |
| 133 | + break; |
102 | 134 | case 'spawn': |
103 | 135 | $this->spawn = true; |
104 | 136 | if ( $val ) { |
— | — | @@ -142,6 +174,7 @@ |
143 | 175 | |
144 | 176 | if ( $this->reporting ) { |
145 | 177 | $now = wfTimestamp( TS_DB ); |
| 178 | + $nowts = wfTime(); |
146 | 179 | $deltaAll = wfTime() - $this->startTime; |
147 | 180 | $deltaPart = wfTime() - $this->lastTime; |
148 | 181 | $this->pageCountPart = $this->pageCount - $this->pageCountLast; |
— | — | @@ -180,86 +213,98 @@ |
181 | 214 | $pageRatePart = '-'; |
182 | 215 | $revRatePart = '-'; |
183 | 216 | } |
184 | | - $this->progress( sprintf( "%s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), %d revs (%0.1f|%0.1f/sec all|curr), %0.1f%%|%0.1f%% prefetched (all|curr), ETA %s [max %d]",- |
| 217 | + $this->progress( sprintf( "%s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), %d revs (%0.1f|%0.1f/sec all|curr), %0.1f%%|%0.1f%% prefetched (all|curr), ETA %s [max %d]", |
185 | 218 | $now, wfWikiID(), $this->ID, $this->pageCount, $pageRate, $pageRatePart, $this->revCount, $revRate, $revRatePart, $fetchRate, $fetchRatePart, $etats, $this->maxCount ) ); |
186 | | - $this->lastTime = $now; |
187 | | - $this->partCountLast = $this->partCount; |
| 219 | + $this->lastTime = $nowts; |
188 | 220 | $this->revCountLast = $this->revCount; |
189 | 221 | $this->prefetchCountLast = $this->prefetchCount; |
190 | 222 | $this->fetchCountLast = $this->fetchCount; |
191 | 223 | } |
192 | 224 | } |
193 | 225 | |
194 | | - function readDump() { |
195 | | - $state = ''; |
196 | | - $lastName = ''; |
197 | | - $this->thisPage = 0; |
198 | | - $this->thisRev = 0; |
| 226 | + function setTimeExceeded() { |
| 227 | + $this->timeExceeded = True; |
| 228 | + } |
199 | 229 | |
200 | | - $reader = new XMLReader(); |
201 | | - $reader->open( $this->input ); |
202 | | - $writer = new XMLWriter(); |
203 | | - $writer->openMemory(); |
| 230 | + function checkIfTimeExceeded() { |
| 231 | + if ( $this->maxTimeAllowed && ( $this->lastTime - $this->timeOfCheckpoint > $this->maxTimeAllowed ) ) { |
| 232 | + return True; |
| 233 | + } |
| 234 | + return False; |
| 235 | + } |
204 | 236 | |
| 237 | + function finalOptionCheck() { |
| 238 | + if (($this->checkpointFiles && ! $this->maxTimeAllowed) || |
| 239 | + ($this->maxTimeAllowed && !$this->checkpointFiles)) { |
| 240 | + throw new MWException("Options checkpointfile and maxtime must be specified together.\n"); |
| 241 | + } |
| 242 | + foreach ($this->checkpointFiles as $checkpointFile) { |
| 243 | + $count = substr_count ($checkpointFile,"%s"); |
| 244 | + if (substr_count ($checkpointFile,"%s") != 2) { |
| 245 | + throw new MWException("Option checkpointfile must contain two '%s' for substitution of first and last pageids, count is $count instead, file is $checkpointFile.\n"); |
| 246 | + } |
| 247 | + } |
205 | 248 | |
206 | | - while ( $reader->read() ) { |
207 | | - $tag = $reader->name; |
208 | | - $type = $reader->nodeType; |
| 249 | + if ($this->checkpointFiles) { |
| 250 | + $filenameList = $this->egress->getFilename(); |
| 251 | + if (! is_array($filenameList)) { |
| 252 | + $filenameList = array( $filenameList ); |
| 253 | + } |
| 254 | + if (count($filenameList) != count($this->checkpointFiles)) { |
| 255 | + throw new MWException("One checkpointfile must be specified for each output option, if maxtime is used.\n"); |
| 256 | + } |
| 257 | + } |
| 258 | + } |
209 | 259 | |
210 | | - if ( $type == XmlReader::END_ELEMENT ) { |
211 | | - $writer->endElement(); |
| 260 | + function readDump( $input ) { |
| 261 | + $this->buffer = ""; |
| 262 | + $this->openElement = false; |
| 263 | + $this->atStart = true; |
| 264 | + $this->state = ""; |
| 265 | + $this->lastName = ""; |
| 266 | + $this->thisPage = 0; |
| 267 | + $this->thisRev = 0; |
212 | 268 | |
213 | | - if ( $tag == 'revision' ) { |
214 | | - $this->revCount(); |
215 | | - $this->thisRev = ''; |
216 | | - } elseif ( $tag == 'page' ) { |
217 | | - $this->reportPage(); |
218 | | - $this->thisPage = ''; |
219 | | - } |
220 | | - } elseif ( $type == XmlReader::ELEMENT ) { |
221 | | - $attribs = array(); |
222 | | - if ( $reader->hasAttributes ) { |
223 | | - for ( $i = 0; $reader->moveToAttributeNo( $i ); $i++ ) { |
224 | | - $attribs[$reader->name] = $reader->value; |
225 | | - } |
226 | | - } |
| 269 | + $parser = xml_parser_create( "UTF-8" ); |
| 270 | + xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false ); |
227 | 271 | |
228 | | - if ( $reader->isEmptyElement && $tag == 'text' && isset( $attribs['id'] ) ) { |
229 | | - $writer->startElement( 'text' ); |
230 | | - $writer->writeAttribute( 'xml:space', 'preserve' ); |
231 | | - $text = $this->getText( $attribs['id'] ); |
232 | | - if ( strlen( $text ) ) { |
233 | | - $writer->text( $text ); |
234 | | - } |
235 | | - $writer->endElement(); |
236 | | - } else { |
237 | | - $writer->startElement( $tag ); |
238 | | - foreach( $attribs as $name => $val ) { |
239 | | - $writer->writeAttribute( $name, $val ); |
240 | | - } |
241 | | - if ( $reader->isEmptyElement ) { |
242 | | - $writer->endElement(); |
243 | | - } |
244 | | - } |
| 272 | + xml_set_element_handler( $parser, array( &$this, 'startElement' ), array( &$this, 'endElement' ) ); |
| 273 | + xml_set_character_data_handler( $parser, array( &$this, 'characterData' ) ); |
245 | 274 | |
246 | | - $lastName = $tag; |
247 | | - if ( $tag == 'revision' ) { |
248 | | - $state = 'revision'; |
249 | | - } elseif ( $tag == 'page' ) { |
250 | | - $state = 'page'; |
| 275 | + $offset = 0; // for context extraction on error reporting |
| 276 | + $bufferSize = 512 * 1024; |
| 277 | + do { |
| 278 | + if ($this->checkIfTimeExceeded()) { |
| 279 | + $this->setTimeExceeded(); |
| 280 | + } |
| 281 | + $chunk = fread( $input, $bufferSize ); |
| 282 | + if ( !xml_parse( $parser, $chunk, feof( $input ) ) ) { |
| 283 | + wfDebug( "TextDumpPass::readDump encountered XML parsing error\n" ); |
| 284 | + return new WikiXmlError( $parser, 'XML import parse failure', $chunk, $offset ); |
| 285 | + } |
| 286 | + $offset += strlen( $chunk ); |
| 287 | + } while ( $chunk !== false && !feof( $input ) ); |
| 288 | + if ($this->maxTimeAllowed) { |
| 289 | + $filenameList = $this->egress->getFilename(); |
| 290 | + # we wrote some stuff after last checkpoint that needs renamed */ |
| 291 | + if (! is_array($filenameList)) { |
| 292 | + $filenameList = array( $filenameList ); |
| 293 | + } |
| 294 | + if (file_exists($filenameList[0])) { |
| 295 | + $newFilenames = array(); |
| 296 | + $firstPageID = str_pad($this->firstPageWritten,9,"0",STR_PAD_LEFT); |
| 297 | + $lastPageID = str_pad($this->lastPageWritten,9,"0",STR_PAD_LEFT); |
| 298 | + for ($i =0; $i < count($filenameList); $i++) { |
| 299 | + $checkpointNameFilledIn = sprintf($this->checkpointFiles[$i], $firstPageID, $lastPageID); |
| 300 | + $fileinfo = pathinfo($filenameList[$i]); |
| 301 | + $newFilenames[] = $fileinfo{'dirname'} . '/' . $checkpointNameFilledIn; |
251 | 302 | } |
252 | | - } elseif ( $type == XMLReader::SIGNIFICANT_WHITESPACE || $type == XMLReader::TEXT ) { |
253 | | - if ( $lastName == 'id' ) { |
254 | | - if ( $state == 'revision' ) { |
255 | | - $this->thisRev .= $reader->value; |
256 | | - } elseif ( $state == 'page' ) { |
257 | | - $this->thisPage .= $reader->value; |
258 | | - } |
259 | | - } |
260 | | - $writer->text( $reader->value ); |
| 303 | + $this->egress->closeAndRename( $newFilenames ); |
261 | 304 | } |
262 | | - $this->sink->write( $writer->outputMemory() ); |
263 | 305 | } |
| 306 | + xml_parser_free( $parser ); |
| 307 | + |
| 308 | + return true; |
264 | 309 | } |
265 | 310 | |
266 | 311 | function getText( $id ) { |
— | — | @@ -282,6 +327,7 @@ |
283 | 328 | } |
284 | 329 | |
285 | 330 | private function doGetText( $id ) { |
| 331 | + |
286 | 332 | $id = intval( $id ); |
287 | 333 | $this->failures = 0; |
288 | 334 | $ex = new MWException( "Graceful storage failure" ); |
— | — | @@ -469,13 +515,133 @@ |
470 | 516 | $normalized = $wgContLang->normalize( $stripped ); |
471 | 517 | return $normalized; |
472 | 518 | } |
| 519 | + |
| 520 | + function startElement( $parser, $name, $attribs ) { |
| 521 | + $this->checkpointJustWritten = false; |
| 522 | + |
| 523 | + $this->clearOpenElement( null ); |
| 524 | + $this->lastName = $name; |
| 525 | + |
| 526 | + if ( $name == 'revision' ) { |
| 527 | + $this->state = $name; |
| 528 | + $this->egress->writeOpenPage( null, $this->buffer ); |
| 529 | + $this->buffer = ""; |
| 530 | + } elseif ( $name == 'page' ) { |
| 531 | + $this->state = $name; |
| 532 | + if ( $this->atStart ) { |
| 533 | + $this->egress->writeOpenStream( $this->buffer ); |
| 534 | + $this->buffer = ""; |
| 535 | + $this->atStart = false; |
| 536 | + } |
| 537 | + } |
| 538 | + |
| 539 | + if ( $name == "text" && isset( $attribs['id'] ) ) { |
| 540 | + $text = $this->getText( $attribs['id'] ); |
| 541 | + $this->openElement = array( $name, array( 'xml:space' => 'preserve' ) ); |
| 542 | + if ( strlen( $text ) > 0 ) { |
| 543 | + $this->characterData( $parser, $text ); |
| 544 | + } |
| 545 | + } else { |
| 546 | + $this->openElement = array( $name, $attribs ); |
| 547 | + } |
| 548 | + } |
| 549 | + |
| 550 | + function endElement( $parser, $name ) { |
| 551 | + $this->checkpointJustWritten = false; |
| 552 | + |
| 553 | + if ( $this->openElement ) { |
| 554 | + $this->clearOpenElement( "" ); |
| 555 | + } else { |
| 556 | + $this->buffer .= "</$name>"; |
| 557 | + } |
| 558 | + |
| 559 | + if ( $name == 'revision' ) { |
| 560 | + $this->egress->writeRevision( null, $this->buffer ); |
| 561 | + $this->buffer = ""; |
| 562 | + $this->thisRev = ""; |
| 563 | + } elseif ( $name == 'page' ) { |
| 564 | + if (! $this->firstPageWritten) { |
| 565 | + $this->firstPageWritten = trim($this->thisPage); |
| 566 | + } |
| 567 | + $this->lastPageWritten = trim($this->thisPage); |
| 568 | + if ($this->timeExceeded) { |
| 569 | + $this->egress->writeClosePage( $this->buffer ); |
| 570 | + # nasty hack, we can't just write the chardata after the |
| 571 | + # page tag, it will include leading blanks from the next line |
| 572 | + $this->egress->sink->write("\n"); |
| 573 | + |
| 574 | + $this->buffer = $this->xmlwriterobj->closeStream(); |
| 575 | + $this->egress->writeCloseStream( $this->buffer ); |
| 576 | + |
| 577 | + $this->buffer = ""; |
| 578 | + $this->thisPage = ""; |
| 579 | + /* this could be more than one file if we had more than one output arg */ |
| 580 | + $checkpointFilenames = array(); |
| 581 | + $filenameList = $this->egress->getFilename(); |
| 582 | + |
| 583 | + if (! is_array($filenameList)) { |
| 584 | + $filenameList = array( $filenameList ); |
| 585 | + } |
| 586 | + $newFilenames = array(); |
| 587 | + $firstPageID = str_pad($this->firstPageWritten,9,"0",STR_PAD_LEFT); |
| 588 | + $lastPageID = str_pad($this->lastPageWritten,9,"0",STR_PAD_LEFT); |
| 589 | + for ($i =0; $i < count($filenameList); $i++) { |
| 590 | + $checkpointNameFilledIn = sprintf($this->checkpointFiles[$i], $firstPageID, $lastPageID); |
| 591 | + $fileinfo = pathinfo($filenameList[$i]); |
| 592 | + $newFilenames[] = $fileinfo{'dirname'} . '/' . $checkpointNameFilledIn; |
| 593 | + } |
| 594 | + $this->egress->closeRenameAndReopen( $newFilenames ); |
| 595 | + $this->buffer = $this->xmlwriterobj->openStream(); |
| 596 | + $this->timeExceeded = false; |
| 597 | + $this->timeOfCheckpoint = $this->lastTime; |
| 598 | + $this->firstPageWritten = false; |
| 599 | + $this->checkpointJustWritten = true; |
| 600 | + } |
| 601 | + else { |
| 602 | + $this->egress->writeClosePage( $this->buffer ); |
| 603 | + $this->buffer = ""; |
| 604 | + $this->thisPage = ""; |
| 605 | + } |
| 606 | + |
| 607 | + } elseif ( $name == 'mediawiki' ) { |
| 608 | + $this->egress->writeCloseStream( $this->buffer ); |
| 609 | + $this->buffer = ""; |
| 610 | + } |
| 611 | + } |
| 612 | + |
| 613 | + function characterData( $parser, $data ) { |
| 614 | + $this->clearOpenElement( null ); |
| 615 | + if ( $this->lastName == "id" ) { |
| 616 | + if ( $this->state == "revision" ) { |
| 617 | + $this->thisRev .= $data; |
| 618 | + } elseif ( $this->state == "page" ) { |
| 619 | + $this->thisPage .= $data; |
| 620 | + } |
| 621 | + } |
| 622 | + # have to skip the newline left over from closepagetag line of |
| 623 | + # end of checkpoint files. nasty hack!! |
| 624 | + if ($this->checkpointJustWritten) { |
| 625 | + if ($data[0] == "\n") { |
| 626 | + $data = substr($data,1); |
| 627 | + } |
| 628 | + $this->checkpointJustWritten = false; |
| 629 | + } |
| 630 | + $this->buffer .= htmlspecialchars( $data ); |
| 631 | + } |
| 632 | + |
| 633 | + function clearOpenElement( $style ) { |
| 634 | + if ( $this->openElement ) { |
| 635 | + $this->buffer .= Xml::element( $this->openElement[0], $this->openElement[1], $style ); |
| 636 | + $this->openElement = false; |
| 637 | + } |
| 638 | + } |
473 | 639 | } |
474 | 640 | |
475 | 641 | |
476 | 642 | $dumper = new TextPassDumper( $argv ); |
477 | 643 | |
478 | 644 | if ( !isset( $options['help'] ) ) { |
479 | | - $dumper->dump( WikiExporter::FULL ); |
| 645 | + $dumper->dump( true ); |
480 | 646 | } else { |
481 | 647 | $dumper->progress( <<<ENDS |
482 | 648 | This script postprocesses XML dumps from dumpBackup.php to add |
— | — | @@ -489,17 +655,20 @@ |
490 | 656 | --stub=<type>:<file> To load a compressed stub dump instead of stdin |
491 | 657 | --prefetch=<type>:<file> Use a prior dump file as a text source, to save |
492 | 658 | pressure on the database. |
| 659 | + (Requires the XMLReader extension) |
| 660 | + --maxtime=<minutes> Write out checkpoint file after this many minutes (writing |
| 661 | + out complete page, closing xml file properly, and opening new one |
| 662 | + with header). This option requires the checkpointfile option. |
| 663 | + --checkpointfile=<filenamepattern> Use this string for checkpoint filenames, |
| 664 | + substituting first pageid written for the first %s (required) and the |
| 665 | + last pageid written for the second %s if it exists. |
493 | 666 | --quiet Don't dump status reports to stderr. |
494 | 667 | --report=n Report position and speed after every n pages processed. |
495 | 668 | (Default: 100) |
496 | 669 | --server=h Force reading from MySQL server h |
497 | | - --output=<type>:<file> Write to a file instead of stdout |
498 | | - <type>s: file, gzip, bzip2, 7zip |
499 | 670 | --current Base ETA on number of pages in database instead of all revisions |
500 | 671 | --spawn Spawn a subprocess for loading text records |
501 | 672 | --help Display this help message |
502 | 673 | ENDS |
503 | 674 | ); |
504 | 675 | } |
505 | | - |
506 | | - |
Index: branches/REL1_18/phase3/includes/Export.php |
— | — | @@ -354,6 +354,9 @@ |
355 | 355 | * @ingroup Dump |
356 | 356 | */ |
357 | 357 | class XmlDumpWriter { |
| 358 | + var $firstPageWritten = 0; |
| 359 | + var $lastPageWritten = 0; |
| 360 | + var $pageInProgress = 0; |
358 | 361 | |
359 | 362 | /** |
360 | 363 | * Returns the export schema version. |
— | — | @@ -458,6 +461,7 @@ |
459 | 462 | $title = Title::makeTitle( $row->page_namespace, $row->page_title ); |
460 | 463 | $out .= ' ' . Xml::elementClean( 'title', array(), $title->getPrefixedText() ) . "\n"; |
461 | 464 | $out .= ' ' . Xml::element( 'id', array(), strval( $row->page_id ) ) . "\n"; |
| 465 | + $this->pageInProgress = $row->page_id; |
462 | 466 | if ( $row->page_is_redirect ) { |
463 | 467 | $out .= ' ' . Xml::element( 'redirect', array() ) . "\n"; |
464 | 468 | } |
— | — | @@ -478,6 +482,10 @@ |
479 | 483 | */ |
480 | 484 | function closePage() { |
481 | 485 | return " </page>\n"; |
| 486 | + if (! $this->firstPageWritten) { |
| 487 | + $this->firstPageWritten = $this->pageInProgress; |
| 488 | + } |
| 489 | + $this->lastPageWritten = $this->pageInProgress; |
482 | 490 | } |
483 | 491 | |
484 | 492 | /** |
— | — | @@ -691,6 +699,22 @@ |
692 | 700 | function write( $string ) { |
693 | 701 | print $string; |
694 | 702 | } |
| 703 | + |
| 704 | + function closeRenameAndReopen( $newname ) { |
| 705 | + return; |
| 706 | + } |
| 707 | + |
| 708 | + function closeAndRename( $newname ) { |
| 709 | + return; |
| 710 | + } |
| 711 | + |
| 712 | + function rename( $newname ) { |
| 713 | + return; |
| 714 | + } |
| 715 | + |
| 716 | + function getFilename() { |
| 717 | + return NULL; |
| 718 | + } |
695 | 719 | } |
696 | 720 | |
697 | 721 | /** |
— | — | @@ -699,14 +723,71 @@ |
700 | 724 | */ |
701 | 725 | class DumpFileOutput extends DumpOutput { |
702 | 726 | var $handle; |
| 727 | + var $filename; |
703 | 728 | |
704 | 729 | function __construct( $file ) { |
705 | 730 | $this->handle = fopen( $file, "wt" ); |
| 731 | + $this->filename = $file; |
706 | 732 | } |
707 | 733 | |
708 | 734 | function write( $string ) { |
709 | 735 | fputs( $this->handle, $string ); |
710 | 736 | } |
| 737 | + |
| 738 | + /** |
| 739 | + * Close the old file, move it to a specified name, |
| 740 | + * and reopen new file with the old name. Use this |
| 741 | + * for writing out a file in multiple pieces |
| 742 | + * at specified checkpoints (e.g. every n hours). |
| 743 | + */ |
| 744 | + function closeRenameAndReopen( $newname ) { |
| 745 | + if ( is_array($newname) ) { |
| 746 | + if (count($newname) > 1) { |
| 747 | + WfDie("Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n"); |
| 748 | + } |
| 749 | + else { |
| 750 | + $newname = $newname[0]; |
| 751 | + } |
| 752 | + } |
| 753 | + if ( $newname ) { |
| 754 | + fclose( $this->handle ); |
| 755 | + rename( $this->filename, $newname ); |
| 756 | + $this->handle = fopen( $this->filename, "wt" ); |
| 757 | + } |
| 758 | + } |
| 759 | + |
| 760 | + function closeAndRename( $newname ) { |
| 761 | + if ( is_array($newname) ) { |
| 762 | + if (count($newname) > 1) { |
| 763 | + throw new MWException("Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n"); |
| 764 | + } |
| 765 | + else { |
| 766 | + $newname = $newname[0]; |
| 767 | + } |
| 768 | + } |
| 769 | + if ( $newname ) { |
| 770 | + fclose( $this->handle ); |
| 771 | + rename( $this->filename, $newname ); |
| 772 | + } |
| 773 | + } |
| 774 | + |
| 775 | + function rename( $newname ) { |
| 776 | + if ( is_array($newname) ) { |
| 777 | + if (count($newname) > 1) { |
| 778 | + WfDie("Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n"); |
| 779 | + } |
| 780 | + else { |
| 781 | + $newname = $newname[0]; |
| 782 | + } |
| 783 | + } |
| 784 | + if ( $newname ) { |
| 785 | + rename( $this->filename, $newname ); |
| 786 | + } |
| 787 | + } |
| 788 | + |
| 789 | + function getFilename() { |
| 790 | + return $this->filename; |
| 791 | + } |
711 | 792 | } |
712 | 793 | |
713 | 794 | /** |
— | — | @@ -716,12 +797,80 @@ |
717 | 798 | * @ingroup Dump |
718 | 799 | */ |
719 | 800 | class DumpPipeOutput extends DumpFileOutput { |
| 801 | + var $command; |
| 802 | + |
720 | 803 | function __construct( $command, $file = null ) { |
721 | 804 | if ( !is_null( $file ) ) { |
722 | 805 | $command .= " > " . wfEscapeShellArg( $file ); |
723 | 806 | } |
724 | | - $this->handle = popen( $command, "w" ); |
| 807 | + |
| 808 | + $this->startCommand($command); |
| 809 | + $this->command = $command; |
| 810 | + $this->filename = $file; |
725 | 811 | } |
| 812 | + |
| 813 | + function startCommand($command) { |
| 814 | + $spec = array( |
| 815 | + 0 => array( "pipe", "r" ), |
| 816 | + ); |
| 817 | + $pipes = array(); |
| 818 | + $this->procOpenResource = proc_open( $command, $spec, $pipes ); |
| 819 | + $this->handle = $pipes[0]; |
| 820 | + } |
| 821 | + |
| 822 | + /** |
| 823 | + * Close the old file, move it to a specified name, |
| 824 | + * and reopen new file with the old name. |
| 825 | + */ |
| 826 | + function closeRenameAndReopen( $newname ) { |
| 827 | + if ( is_array($newname) ) { |
| 828 | + if (count($newname) > 1) { |
| 829 | + WfDie("Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n"); |
| 830 | + } |
| 831 | + else { |
| 832 | + $newname = $newname[0]; |
| 833 | + } |
| 834 | + } |
| 835 | + if ( $newname ) { |
| 836 | + fclose( $this->handle ); |
| 837 | + proc_close($this->procOpenResource); |
| 838 | + rename( $this->filename, $newname ); |
| 839 | + $command = $this->command; |
| 840 | + $command .= " > " . wfEscapeShellArg( $this->filename ); |
| 841 | + $this->startCommand($command); |
| 842 | + } |
| 843 | + } |
| 844 | + |
| 845 | + function closeAndRename( $newname ) { |
| 846 | + if ( is_array($newname) ) { |
| 847 | + if (count($newname) > 1) { |
| 848 | + throw new MWException("Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n"); |
| 849 | + } |
| 850 | + else { |
| 851 | + $newname = $newname[0]; |
| 852 | + } |
| 853 | + } |
| 854 | + if ( $newname ) { |
| 855 | +# pclose( $this->handle ); |
| 856 | + fclose( $this->handle ); |
| 857 | + proc_close($this->procOpenResource); |
| 858 | + rename( $this->filename, $newname ); |
| 859 | + } |
| 860 | + } |
| 861 | + |
| 862 | + function rename( $newname ) { |
| 863 | + if ( is_array($newname) ) { |
| 864 | + if (count($newname) > 1) { |
| 865 | + WfDie("Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n"); |
| 866 | + } |
| 867 | + else { |
| 868 | + $newname = $newname[0]; |
| 869 | + } |
| 870 | + } |
| 871 | + if ( $newname ) { |
| 872 | + rename( $this->filename, $newname ); |
| 873 | + } |
| 874 | + } |
726 | 875 | } |
727 | 876 | |
728 | 877 | /** |
— | — | @@ -749,13 +898,65 @@ |
750 | 899 | * @ingroup Dump |
751 | 900 | */ |
752 | 901 | class Dump7ZipOutput extends DumpPipeOutput { |
| 902 | + var $filename; |
| 903 | + |
753 | 904 | function __construct( $file ) { |
754 | 905 | $command = "7za a -bd -si " . wfEscapeShellArg( $file ); |
755 | 906 | // Suppress annoying useless crap from p7zip |
756 | 907 | // Unfortunately this could suppress real error messages too |
757 | 908 | $command .= ' >' . wfGetNull() . ' 2>&1'; |
758 | 909 | parent::__construct( $command ); |
| 910 | + $this->filename = $file; |
759 | 911 | } |
| 912 | + |
| 913 | + function closeRenameAndReopen( $newname ) { |
| 914 | + if ( is_array($newname) ) { |
| 915 | + if (count($newname) > 1) { |
| 916 | + WfDie("Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n"); |
| 917 | + } |
| 918 | + else { |
| 919 | + $newname = $newname[0]; |
| 920 | + } |
| 921 | + } |
| 922 | + if ( $newname ) { |
| 923 | + fclose( $this->handle ); |
| 924 | + proc_close($this->procOpenResource); |
| 925 | + rename( $this->filename, $newname ); |
| 926 | + $command = "7za a -bd -si " . wfEscapeShellArg( $file ); |
| 927 | + $command .= ' >' . wfGetNull() . ' 2>&1'; |
| 928 | + $this->startCommand($command); |
| 929 | + } |
| 930 | + } |
| 931 | + |
| 932 | + function closeAndRename( $newname ) { |
| 933 | + if ( is_array($newname) ) { |
| 934 | + if (count($newname) > 1) { |
| 935 | + throw new MWException("Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n"); |
| 936 | + } |
| 937 | + else { |
| 938 | + $newname = $newname[0]; |
| 939 | + } |
| 940 | + } |
| 941 | + if ( $newname ) { |
| 942 | + fclose( $this->handle ); |
| 943 | + proc_close($this->procOpenResource); |
| 944 | + rename( $this->filename, $newname ); |
| 945 | + } |
| 946 | + } |
| 947 | + |
| 948 | + function rename( $newname ) { |
| 949 | + if ( is_array($newname) ) { |
| 950 | + if (count($newname) > 1) { |
| 951 | + WfDie("Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n"); |
| 952 | + } |
| 953 | + else { |
| 954 | + $newname = $newname[0]; |
| 955 | + } |
| 956 | + } |
| 957 | + if ( $newname ) { |
| 958 | + rename( $this->filename, $newname ); |
| 959 | + } |
| 960 | + } |
760 | 961 | } |
761 | 962 | |
762 | 963 | |
— | — | @@ -803,6 +1004,22 @@ |
804 | 1005 | $this->sink->writeRevision( $rev, $string ); |
805 | 1006 | } |
806 | 1007 | |
| 1008 | + function closeRenameAndReopen( $newname ) { |
| 1009 | + $this->sink->closeRenameAndReopen( $newname ); |
| 1010 | + } |
| 1011 | + |
| 1012 | + function closeAndRename( $newname ) { |
| 1013 | + $this->sink->closeAndRename( $newname ); |
| 1014 | + } |
| 1015 | + |
| 1016 | + function rename( $newname ) { |
| 1017 | + $this->sink->rename( $newname ); |
| 1018 | + } |
| 1019 | + |
| 1020 | + function getFilename() { |
| 1021 | + return $this->sink->getFilename(); |
| 1022 | + } |
| 1023 | + |
807 | 1024 | /** |
808 | 1025 | * Override for page-based filter types. |
809 | 1026 | * @return bool |
— | — | @@ -950,6 +1167,32 @@ |
951 | 1168 | $this->sinks[$i]->writeRevision( $rev, $string ); |
952 | 1169 | } |
953 | 1170 | } |
| 1171 | + |
| 1172 | + function closeRenameAndReopen( $newnames ) { |
| 1173 | + for( $i = 0; $i < $this->count; $i++ ) { |
| 1174 | + $this->sinks[$i]->closeRenameAndReopen( $newnames[$i] ); |
| 1175 | + } |
| 1176 | + } |
| 1177 | + |
| 1178 | + function closeAndRename( $newname ) { |
| 1179 | + for( $i = 0; $i < $this->count; $i++ ) { |
| 1180 | + $this->sinks[$i]->closeAndRename( $newnames[$i] ); |
| 1181 | + } |
| 1182 | + } |
| 1183 | + function rename( $newnames ) { |
| 1184 | + for( $i = 0; $i < $this->count; $i++ ) { |
| 1185 | + $this->sinks[$i]->rename( $newnames[$i] ); |
| 1186 | + } |
| 1187 | + } |
| 1188 | + |
| 1189 | + function getFilename() { |
| 1190 | + $filenames = array(); |
| 1191 | + for( $i = 0; $i < $this->count; $i++ ) { |
| 1192 | + $filenames[] = $this->sinks[$i]->getFilename(); |
| 1193 | + } |
| 1194 | + return $filenames; |
| 1195 | + } |
| 1196 | + |
954 | 1197 | } |
955 | 1198 | |
956 | 1199 | function xmlsafe( $string ) { |