r96556 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r96555‎ | r96556 | r96557 >
Date:12:28, 8 September 2011
Author:reedy
Status:ok
Tags:
Comment:
Modified paths:
  • /branches/REL1_18/phase3/includes/Export.php (modified) (history)
  • /branches/REL1_18/phase3/maintenance/backup.inc (modified) (history)
  • /branches/REL1_18/phase3/maintenance/dumpTextPass.php (modified) (history)

Diff [purge]

Index: branches/REL1_18/phase3/maintenance/backup.inc
@@ -51,6 +51,10 @@
5252 var $stubText = false; // include rev_text_id instead of text; for 2-pass dump
5353 var $dumpUploads = false;
5454 var $dumpUploadFileContents = false;
 55+ var $lastTime = 0;
 56+ var $pageCountLast = 0;
 57+ var $revCountLast = 0;
 58+ var $ID = 0;
5559
5660 function BackupDumper( $args ) {
5761 $this->stderr = fopen( "php://stderr", "wt" );
@@ -233,6 +237,8 @@
234238 $dbr = wfGetDB( DB_SLAVE );
235239 $this->maxCount = $dbr->selectField( $table, "MAX($field)", '', __METHOD__ );
236240 $this->startTime = wfTime();
 241+ $this->lastTime = $this->startTime;
 242+ $this->ID = getmypid();
237243 }
238244
239245 /**
@@ -281,21 +287,35 @@
282288
283289 function showReport() {
284290 if ( $this->reporting ) {
285 - $delta = wfTime() - $this->startTime;
286291 $now = wfTimestamp( TS_DB );
287 - if ( $delta ) {
288 - $rate = $this->pageCount / $delta;
289 - $revrate = $this->revCount / $delta;
 292+ $nowts = wfTime();
 293+ $deltaAll = wfTime() - $this->startTime;
 294+ $deltaPart = wfTime() - $this->lastTime;
 295+ $this->pageCountPart = $this->pageCount - $this->pageCountLast;
 296+ $this->revCountPart = $this->revCount - $this->revCountLast;
 297+
 298+ if ( $deltaAll ) {
290299 $portion = $this->revCount / $this->maxCount;
291 - $eta = $this->startTime + $delta / $portion;
 300+ $eta = $this->startTime + $deltaAll / $portion;
292301 $etats = wfTimestamp( TS_DB, intval( $eta ) );
 302+ $pageRate = $this->pageCount / $deltaAll;
 303+ $revRate = $this->revCount / $deltaAll;
293304 } else {
294 - $rate = '-';
295 - $revrate = '-';
 305+ $pageRate = '-';
 306+ $revRate = '-';
296307 $etats = '-';
297308 }
298 - $this->progress( sprintf( "%s: %s %d pages (%0.3f/sec), %d revs (%0.3f/sec), ETA %s [max %d]",
299 - $now, wfWikiID(), $this->pageCount, $rate, $this->revCount, $revrate, $etats, $this->maxCount ) );
 309+ if ( $deltaPart ) {
 310+ $pageRatePart = $this->pageCountPart / $deltaPart;
 311+ $revRatePart = $this->revCountPart / $deltaPart;
 312+ } else {
 313+ $pageRatePart = '-';
 314+ $revRatePart = '-';
 315+ }
 316+ $this->progress( sprintf( "%s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), %d revs (%0.1f|%0.1f/sec all|curr), ETA %s [max %d]",
 317+ $now, wfWikiID(), $this->ID, $this->pageCount, $pageRate, $pageRatePart, $this->revCount, $revRate, $revRatePart, $etats, $this->maxCount ) );
 318+ $this->lastTime = $nowts;
 319+ $this->revCountLast = $this->revCount;
300320 }
301321 }
302322
Index: branches/REL1_18/phase3/maintenance/dumpTextPass.php
@@ -2,7 +2,7 @@
33 /**
44 * Script that postprocesses XML dumps from dumpBackup.php to add page text
55 *
6 - * Copyright © 2005 Brion Vibber <brion@pobox.com>, 2010 Alexandre Emsenhuber
 6+ * Copyright � 2005 Brion Vibber <brion@pobox.com>, 2010 Alexandre Emsenhuber
77 * http://www.mediawiki.org/
88 *
99 * This program is free software; you can redistribute it and/or modify
@@ -35,11 +35,9 @@
3636 class TextPassDumper extends BackupDumper {
3737 var $prefetch = null;
3838 var $input = "php://stdin";
 39+ var $history = WikiExporter::FULL;
3940 var $fetchCount = 0;
4041 var $prefetchCount = 0;
41 - var $lastTime = 0;
42 - var $pageCountLast = 0;
43 - var $revCountLast = 0;
4442 var $prefetchCountLast = 0;
4543 var $fetchCountLast = 0;
4644
@@ -56,12 +54,21 @@
5755 var $spawnRead = false;
5856 var $spawnErr = false;
5957
60 - var $ID = 0;
 58+ var $xmlwriterobj = false;
6159
 60+ # when we spend more than maxTimeAllowed seconds on this run, we continue
 61+ # processing until we write out the next complete page, then save output file(s),
 62+ # rename it/them and open new one(s)
 63+ var $maxTimeAllowed = 0; // 0 = no limit
 64+ var $timeExceeded = false;
 65+ var $firstPageWritten = false;
 66+ var $lastPageWritten = false;
 67+ var $checkpointJustWritten = false;
 68+ var $checkpointFiles = array();
 69+
6270 function initProgress( $history ) {
6371 parent::initProgress();
64 - $this->ID = getmypid();
65 - $this->lastTime = $this->startTime;
 72+ $this->timeOfCheckpoint = $this->startTime;
6673 }
6774
6875 function dump( $history, $text = WikiExporter::TEXT ) {
@@ -73,12 +80,25 @@
7481 if ( ini_get( 'display_errors' ) )
7582 ini_set( 'display_errors', 'stderr' );
7683
77 - $this->initProgress( $history );
 84+ $this->initProgress( $this->history );
7885
7986 $this->db = $this->backupDb();
8087
81 - $this->readDump();
 88+ $this->egress = new ExportProgressFilter( $this->sink, $this );
8289
 90+ # it would be nice to do it in the constructor, oh well. need egress set
 91+ $this->finalOptionCheck();
 92+
 93+ # we only want this so we know how to close a stream :-P
 94+ $this->xmlwriterobj = new XmlDumpWriter();
 95+
 96+ $input = fopen( $this->input, "rt" );
 97+ $result = $this->readDump( $input );
 98+
 99+ if ( WikiError::isError( $result ) ) {
 100+ throw new MWException( $result->getMessage() );
 101+ }
 102+
83103 if ( $this->spawnProc ) {
84104 $this->closeSpawn();
85105 }
@@ -98,6 +118,18 @@
99119 case 'stub':
100120 $this->input = $url;
101121 break;
 122+ case 'maxtime':
 123+ $this->maxTimeAllowed = intval($val)*60;
 124+ break;
 125+ case 'checkpointfile':
 126+ $this->checkpointFiles[] = $val;
 127+ break;
 128+ case 'current':
 129+ $this->history = WikiExporter::CURRENT;
 130+ break;
 131+ case 'full':
 132+ $this->history = WikiExporter::FULL;
 133+ break;
102134 case 'spawn':
103135 $this->spawn = true;
104136 if ( $val ) {
@@ -142,6 +174,7 @@
143175
144176 if ( $this->reporting ) {
145177 $now = wfTimestamp( TS_DB );
 178+ $nowts = wfTime();
146179 $deltaAll = wfTime() - $this->startTime;
147180 $deltaPart = wfTime() - $this->lastTime;
148181 $this->pageCountPart = $this->pageCount - $this->pageCountLast;
@@ -180,86 +213,98 @@
181214 $pageRatePart = '-';
182215 $revRatePart = '-';
183216 }
184 - $this->progress( sprintf( "%s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), %d revs (%0.1f|%0.1f/sec all|curr), %0.1f%%|%0.1f%% prefetched (all|curr), ETA %s [max %d]",-
 217+ $this->progress( sprintf( "%s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), %d revs (%0.1f|%0.1f/sec all|curr), %0.1f%%|%0.1f%% prefetched (all|curr), ETA %s [max %d]",
185218 $now, wfWikiID(), $this->ID, $this->pageCount, $pageRate, $pageRatePart, $this->revCount, $revRate, $revRatePart, $fetchRate, $fetchRatePart, $etats, $this->maxCount ) );
186 - $this->lastTime = $now;
187 - $this->partCountLast = $this->partCount;
 219+ $this->lastTime = $nowts;
188220 $this->revCountLast = $this->revCount;
189221 $this->prefetchCountLast = $this->prefetchCount;
190222 $this->fetchCountLast = $this->fetchCount;
191223 }
192224 }
193225
194 - function readDump() {
195 - $state = '';
196 - $lastName = '';
197 - $this->thisPage = 0;
198 - $this->thisRev = 0;
 226+ function setTimeExceeded() {
 227+ $this->timeExceeded = True;
 228+ }
199229
200 - $reader = new XMLReader();
201 - $reader->open( $this->input );
202 - $writer = new XMLWriter();
203 - $writer->openMemory();
 230+ function checkIfTimeExceeded() {
 231+ if ( $this->maxTimeAllowed && ( $this->lastTime - $this->timeOfCheckpoint > $this->maxTimeAllowed ) ) {
 232+ return True;
 233+ }
 234+ return False;
 235+ }
204236
 237+ function finalOptionCheck() {
 238+ if (($this->checkpointFiles && ! $this->maxTimeAllowed) ||
 239+ ($this->maxTimeAllowed && !$this->checkpointFiles)) {
 240+ throw new MWException("Options checkpointfile and maxtime must be specified together.\n");
 241+ }
 242+ foreach ($this->checkpointFiles as $checkpointFile) {
 243+ $count = substr_count ($checkpointFile,"%s");
 244+ if (substr_count ($checkpointFile,"%s") != 2) {
 245+ throw new MWException("Option checkpointfile must contain two '%s' for substitution of first and last pageids, count is $count instead, file is $checkpointFile.\n");
 246+ }
 247+ }
205248
206 - while ( $reader->read() ) {
207 - $tag = $reader->name;
208 - $type = $reader->nodeType;
 249+ if ($this->checkpointFiles) {
 250+ $filenameList = $this->egress->getFilename();
 251+ if (! is_array($filenameList)) {
 252+ $filenameList = array( $filenameList );
 253+ }
 254+ if (count($filenameList) != count($this->checkpointFiles)) {
 255+ throw new MWException("One checkpointfile must be specified for each output option, if maxtime is used.\n");
 256+ }
 257+ }
 258+ }
209259
210 - if ( $type == XmlReader::END_ELEMENT ) {
211 - $writer->endElement();
 260+ function readDump( $input ) {
 261+ $this->buffer = "";
 262+ $this->openElement = false;
 263+ $this->atStart = true;
 264+ $this->state = "";
 265+ $this->lastName = "";
 266+ $this->thisPage = 0;
 267+ $this->thisRev = 0;
212268
213 - if ( $tag == 'revision' ) {
214 - $this->revCount();
215 - $this->thisRev = '';
216 - } elseif ( $tag == 'page' ) {
217 - $this->reportPage();
218 - $this->thisPage = '';
219 - }
220 - } elseif ( $type == XmlReader::ELEMENT ) {
221 - $attribs = array();
222 - if ( $reader->hasAttributes ) {
223 - for ( $i = 0; $reader->moveToAttributeNo( $i ); $i++ ) {
224 - $attribs[$reader->name] = $reader->value;
225 - }
226 - }
 269+ $parser = xml_parser_create( "UTF-8" );
 270+ xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
227271
228 - if ( $reader->isEmptyElement && $tag == 'text' && isset( $attribs['id'] ) ) {
229 - $writer->startElement( 'text' );
230 - $writer->writeAttribute( 'xml:space', 'preserve' );
231 - $text = $this->getText( $attribs['id'] );
232 - if ( strlen( $text ) ) {
233 - $writer->text( $text );
234 - }
235 - $writer->endElement();
236 - } else {
237 - $writer->startElement( $tag );
238 - foreach( $attribs as $name => $val ) {
239 - $writer->writeAttribute( $name, $val );
240 - }
241 - if ( $reader->isEmptyElement ) {
242 - $writer->endElement();
243 - }
244 - }
 272+ xml_set_element_handler( $parser, array( &$this, 'startElement' ), array( &$this, 'endElement' ) );
 273+ xml_set_character_data_handler( $parser, array( &$this, 'characterData' ) );
245274
246 - $lastName = $tag;
247 - if ( $tag == 'revision' ) {
248 - $state = 'revision';
249 - } elseif ( $tag == 'page' ) {
250 - $state = 'page';
 275+ $offset = 0; // for context extraction on error reporting
 276+ $bufferSize = 512 * 1024;
 277+ do {
 278+ if ($this->checkIfTimeExceeded()) {
 279+ $this->setTimeExceeded();
 280+ }
 281+ $chunk = fread( $input, $bufferSize );
 282+ if ( !xml_parse( $parser, $chunk, feof( $input ) ) ) {
 283+ wfDebug( "TextDumpPass::readDump encountered XML parsing error\n" );
 284+ return new WikiXmlError( $parser, 'XML import parse failure', $chunk, $offset );
 285+ }
 286+ $offset += strlen( $chunk );
 287+ } while ( $chunk !== false && !feof( $input ) );
 288+ if ($this->maxTimeAllowed) {
 289+ $filenameList = $this->egress->getFilename();
 290+ # we wrote some stuff after last checkpoint that needs renamed */
 291+ if (! is_array($filenameList)) {
 292+ $filenameList = array( $filenameList );
 293+ }
 294+ if (file_exists($filenameList[0])) {
 295+ $newFilenames = array();
 296+ $firstPageID = str_pad($this->firstPageWritten,9,"0",STR_PAD_LEFT);
 297+ $lastPageID = str_pad($this->lastPageWritten,9,"0",STR_PAD_LEFT);
 298+ for ($i =0; $i < count($filenameList); $i++) {
 299+ $checkpointNameFilledIn = sprintf($this->checkpointFiles[$i], $firstPageID, $lastPageID);
 300+ $fileinfo = pathinfo($filenameList[$i]);
 301+ $newFilenames[] = $fileinfo{'dirname'} . '/' . $checkpointNameFilledIn;
251302 }
252 - } elseif ( $type == XMLReader::SIGNIFICANT_WHITESPACE || $type == XMLReader::TEXT ) {
253 - if ( $lastName == 'id' ) {
254 - if ( $state == 'revision' ) {
255 - $this->thisRev .= $reader->value;
256 - } elseif ( $state == 'page' ) {
257 - $this->thisPage .= $reader->value;
258 - }
259 - }
260 - $writer->text( $reader->value );
 303+ $this->egress->closeAndRename( $newFilenames );
261304 }
262 - $this->sink->write( $writer->outputMemory() );
263305 }
 306+ xml_parser_free( $parser );
 307+
 308+ return true;
264309 }
265310
266311 function getText( $id ) {
@@ -282,6 +327,7 @@
283328 }
284329
285330 private function doGetText( $id ) {
 331+
286332 $id = intval( $id );
287333 $this->failures = 0;
288334 $ex = new MWException( "Graceful storage failure" );
@@ -469,13 +515,133 @@
470516 $normalized = $wgContLang->normalize( $stripped );
471517 return $normalized;
472518 }
 519+
 520+ function startElement( $parser, $name, $attribs ) {
 521+ $this->checkpointJustWritten = false;
 522+
 523+ $this->clearOpenElement( null );
 524+ $this->lastName = $name;
 525+
 526+ if ( $name == 'revision' ) {
 527+ $this->state = $name;
 528+ $this->egress->writeOpenPage( null, $this->buffer );
 529+ $this->buffer = "";
 530+ } elseif ( $name == 'page' ) {
 531+ $this->state = $name;
 532+ if ( $this->atStart ) {
 533+ $this->egress->writeOpenStream( $this->buffer );
 534+ $this->buffer = "";
 535+ $this->atStart = false;
 536+ }
 537+ }
 538+
 539+ if ( $name == "text" && isset( $attribs['id'] ) ) {
 540+ $text = $this->getText( $attribs['id'] );
 541+ $this->openElement = array( $name, array( 'xml:space' => 'preserve' ) );
 542+ if ( strlen( $text ) > 0 ) {
 543+ $this->characterData( $parser, $text );
 544+ }
 545+ } else {
 546+ $this->openElement = array( $name, $attribs );
 547+ }
 548+ }
 549+
 550+ function endElement( $parser, $name ) {
 551+ $this->checkpointJustWritten = false;
 552+
 553+ if ( $this->openElement ) {
 554+ $this->clearOpenElement( "" );
 555+ } else {
 556+ $this->buffer .= "</$name>";
 557+ }
 558+
 559+ if ( $name == 'revision' ) {
 560+ $this->egress->writeRevision( null, $this->buffer );
 561+ $this->buffer = "";
 562+ $this->thisRev = "";
 563+ } elseif ( $name == 'page' ) {
 564+ if (! $this->firstPageWritten) {
 565+ $this->firstPageWritten = trim($this->thisPage);
 566+ }
 567+ $this->lastPageWritten = trim($this->thisPage);
 568+ if ($this->timeExceeded) {
 569+ $this->egress->writeClosePage( $this->buffer );
 570+ # nasty hack, we can't just write the chardata after the
 571+ # page tag, it will include leading blanks from the next line
 572+ $this->egress->sink->write("\n");
 573+
 574+ $this->buffer = $this->xmlwriterobj->closeStream();
 575+ $this->egress->writeCloseStream( $this->buffer );
 576+
 577+ $this->buffer = "";
 578+ $this->thisPage = "";
 579+ /* this could be more than one file if we had more than one output arg */
 580+ $checkpointFilenames = array();
 581+ $filenameList = $this->egress->getFilename();
 582+
 583+ if (! is_array($filenameList)) {
 584+ $filenameList = array( $filenameList );
 585+ }
 586+ $newFilenames = array();
 587+ $firstPageID = str_pad($this->firstPageWritten,9,"0",STR_PAD_LEFT);
 588+ $lastPageID = str_pad($this->lastPageWritten,9,"0",STR_PAD_LEFT);
 589+ for ($i =0; $i < count($filenameList); $i++) {
 590+ $checkpointNameFilledIn = sprintf($this->checkpointFiles[$i], $firstPageID, $lastPageID);
 591+ $fileinfo = pathinfo($filenameList[$i]);
 592+ $newFilenames[] = $fileinfo{'dirname'} . '/' . $checkpointNameFilledIn;
 593+ }
 594+ $this->egress->closeRenameAndReopen( $newFilenames );
 595+ $this->buffer = $this->xmlwriterobj->openStream();
 596+ $this->timeExceeded = false;
 597+ $this->timeOfCheckpoint = $this->lastTime;
 598+ $this->firstPageWritten = false;
 599+ $this->checkpointJustWritten = true;
 600+ }
 601+ else {
 602+ $this->egress->writeClosePage( $this->buffer );
 603+ $this->buffer = "";
 604+ $this->thisPage = "";
 605+ }
 606+
 607+ } elseif ( $name == 'mediawiki' ) {
 608+ $this->egress->writeCloseStream( $this->buffer );
 609+ $this->buffer = "";
 610+ }
 611+ }
 612+
 613+ function characterData( $parser, $data ) {
 614+ $this->clearOpenElement( null );
 615+ if ( $this->lastName == "id" ) {
 616+ if ( $this->state == "revision" ) {
 617+ $this->thisRev .= $data;
 618+ } elseif ( $this->state == "page" ) {
 619+ $this->thisPage .= $data;
 620+ }
 621+ }
 622+ # have to skip the newline left over from closepagetag line of
 623+ # end of checkpoint files. nasty hack!!
 624+ if ($this->checkpointJustWritten) {
 625+ if ($data[0] == "\n") {
 626+ $data = substr($data,1);
 627+ }
 628+ $this->checkpointJustWritten = false;
 629+ }
 630+ $this->buffer .= htmlspecialchars( $data );
 631+ }
 632+
 633+ function clearOpenElement( $style ) {
 634+ if ( $this->openElement ) {
 635+ $this->buffer .= Xml::element( $this->openElement[0], $this->openElement[1], $style );
 636+ $this->openElement = false;
 637+ }
 638+ }
473639 }
474640
475641
476642 $dumper = new TextPassDumper( $argv );
477643
478644 if ( !isset( $options['help'] ) ) {
479 - $dumper->dump( WikiExporter::FULL );
 645+ $dumper->dump( true );
480646 } else {
481647 $dumper->progress( <<<ENDS
482648 This script postprocesses XML dumps from dumpBackup.php to add
@@ -489,17 +655,20 @@
490656 --stub=<type>:<file> To load a compressed stub dump instead of stdin
491657 --prefetch=<type>:<file> Use a prior dump file as a text source, to save
492658 pressure on the database.
 659+ (Requires the XMLReader extension)
 660+ --maxtime=<minutes> Write out checkpoint file after this many minutes (writing
 661+ out complete page, closing xml file properly, and opening new one
 662+ with header). This option requires the checkpointfile option.
 663+ --checkpointfile=<filenamepattern> Use this string for checkpoint filenames,
 664+ substituting first pageid written for the first %s (required) and the
 665+ last pageid written for the second %s if it exists.
493666 --quiet Don't dump status reports to stderr.
494667 --report=n Report position and speed after every n pages processed.
495668 (Default: 100)
496669 --server=h Force reading from MySQL server h
497 - --output=<type>:<file> Write to a file instead of stdout
498 - <type>s: file, gzip, bzip2, 7zip
499670 --current Base ETA on number of pages in database instead of all revisions
500671 --spawn Spawn a subprocess for loading text records
501672 --help Display this help message
502673 ENDS
503674 );
504675 }
505 -
506 -
Index: branches/REL1_18/phase3/includes/Export.php
@@ -354,6 +354,9 @@
355355 * @ingroup Dump
356356 */
357357 class XmlDumpWriter {
 358+ var $firstPageWritten = 0;
 359+ var $lastPageWritten = 0;
 360+ var $pageInProgress = 0;
358361
359362 /**
360363 * Returns the export schema version.
@@ -458,6 +461,7 @@
459462 $title = Title::makeTitle( $row->page_namespace, $row->page_title );
460463 $out .= ' ' . Xml::elementClean( 'title', array(), $title->getPrefixedText() ) . "\n";
461464 $out .= ' ' . Xml::element( 'id', array(), strval( $row->page_id ) ) . "\n";
 465+ $this->pageInProgress = $row->page_id;
462466 if ( $row->page_is_redirect ) {
463467 $out .= ' ' . Xml::element( 'redirect', array() ) . "\n";
464468 }
@@ -478,6 +482,10 @@
479483 */
480484 function closePage() {
481485 return " </page>\n";
 486+ if (! $this->firstPageWritten) {
 487+ $this->firstPageWritten = $this->pageInProgress;
 488+ }
 489+ $this->lastPageWritten = $this->pageInProgress;
482490 }
483491
484492 /**
@@ -691,6 +699,22 @@
692700 function write( $string ) {
693701 print $string;
694702 }
 703+
 704+ function closeRenameAndReopen( $newname ) {
 705+ return;
 706+ }
 707+
 708+ function closeAndRename( $newname ) {
 709+ return;
 710+ }
 711+
 712+ function rename( $newname ) {
 713+ return;
 714+ }
 715+
 716+ function getFilename() {
 717+ return NULL;
 718+ }
695719 }
696720
697721 /**
@@ -699,14 +723,71 @@
700724 */
701725 class DumpFileOutput extends DumpOutput {
702726 var $handle;
 727+ var $filename;
703728
704729 function __construct( $file ) {
705730 $this->handle = fopen( $file, "wt" );
 731+ $this->filename = $file;
706732 }
707733
708734 function write( $string ) {
709735 fputs( $this->handle, $string );
710736 }
 737+
 738+ /**
 739+ * Close the old file, move it to a specified name,
 740+ * and reopen new file with the old name. Use this
 741+ * for writing out a file in multiple pieces
 742+ * at specified checkpoints (e.g. every n hours).
 743+ */
 744+ function closeRenameAndReopen( $newname ) {
 745+ if ( is_array($newname) ) {
 746+ if (count($newname) > 1) {
 747+ WfDie("Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n");
 748+ }
 749+ else {
 750+ $newname = $newname[0];
 751+ }
 752+ }
 753+ if ( $newname ) {
 754+ fclose( $this->handle );
 755+ rename( $this->filename, $newname );
 756+ $this->handle = fopen( $this->filename, "wt" );
 757+ }
 758+ }
 759+
 760+ function closeAndRename( $newname ) {
 761+ if ( is_array($newname) ) {
 762+ if (count($newname) > 1) {
 763+ throw new MWException("Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n");
 764+ }
 765+ else {
 766+ $newname = $newname[0];
 767+ }
 768+ }
 769+ if ( $newname ) {
 770+ fclose( $this->handle );
 771+ rename( $this->filename, $newname );
 772+ }
 773+ }
 774+
 775+ function rename( $newname ) {
 776+ if ( is_array($newname) ) {
 777+ if (count($newname) > 1) {
 778+ WfDie("Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n");
 779+ }
 780+ else {
 781+ $newname = $newname[0];
 782+ }
 783+ }
 784+ if ( $newname ) {
 785+ rename( $this->filename, $newname );
 786+ }
 787+ }
 788+
 789+ function getFilename() {
 790+ return $this->filename;
 791+ }
711792 }
712793
713794 /**
@@ -716,12 +797,80 @@
717798 * @ingroup Dump
718799 */
719800 class DumpPipeOutput extends DumpFileOutput {
 801+ var $command;
 802+
720803 function __construct( $command, $file = null ) {
721804 if ( !is_null( $file ) ) {
722805 $command .= " > " . wfEscapeShellArg( $file );
723806 }
724 - $this->handle = popen( $command, "w" );
 807+
 808+ $this->startCommand($command);
 809+ $this->command = $command;
 810+ $this->filename = $file;
725811 }
 812+
 813+ function startCommand($command) {
 814+ $spec = array(
 815+ 0 => array( "pipe", "r" ),
 816+ );
 817+ $pipes = array();
 818+ $this->procOpenResource = proc_open( $command, $spec, $pipes );
 819+ $this->handle = $pipes[0];
 820+ }
 821+
 822+ /**
 823+ * Close the old file, move it to a specified name,
 824+ * and reopen new file with the old name.
 825+ */
 826+ function closeRenameAndReopen( $newname ) {
 827+ if ( is_array($newname) ) {
 828+ if (count($newname) > 1) {
 829+ WfDie("Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n");
 830+ }
 831+ else {
 832+ $newname = $newname[0];
 833+ }
 834+ }
 835+ if ( $newname ) {
 836+ fclose( $this->handle );
 837+ proc_close($this->procOpenResource);
 838+ rename( $this->filename, $newname );
 839+ $command = $this->command;
 840+ $command .= " > " . wfEscapeShellArg( $this->filename );
 841+ $this->startCommand($command);
 842+ }
 843+ }
 844+
 845+ function closeAndRename( $newname ) {
 846+ if ( is_array($newname) ) {
 847+ if (count($newname) > 1) {
 848+ throw new MWException("Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n");
 849+ }
 850+ else {
 851+ $newname = $newname[0];
 852+ }
 853+ }
 854+ if ( $newname ) {
 855+# pclose( $this->handle );
 856+ fclose( $this->handle );
 857+ proc_close($this->procOpenResource);
 858+ rename( $this->filename, $newname );
 859+ }
 860+ }
 861+
 862+ function rename( $newname ) {
 863+ if ( is_array($newname) ) {
 864+ if (count($newname) > 1) {
 865+ WfDie("Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n");
 866+ }
 867+ else {
 868+ $newname = $newname[0];
 869+ }
 870+ }
 871+ if ( $newname ) {
 872+ rename( $this->filename, $newname );
 873+ }
 874+ }
726875 }
727876
728877 /**
@@ -749,13 +898,65 @@
750899 * @ingroup Dump
751900 */
752901 class Dump7ZipOutput extends DumpPipeOutput {
 902+ var $filename;
 903+
753904 function __construct( $file ) {
754905 $command = "7za a -bd -si " . wfEscapeShellArg( $file );
755906 // Suppress annoying useless crap from p7zip
756907 // Unfortunately this could suppress real error messages too
757908 $command .= ' >' . wfGetNull() . ' 2>&1';
758909 parent::__construct( $command );
 910+ $this->filename = $file;
759911 }
 912+
 913+ function closeRenameAndReopen( $newname ) {
 914+ if ( is_array($newname) ) {
 915+ if (count($newname) > 1) {
 916+ WfDie("Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n");
 917+ }
 918+ else {
 919+ $newname = $newname[0];
 920+ }
 921+ }
 922+ if ( $newname ) {
 923+ fclose( $this->handle );
 924+ proc_close($this->procOpenResource);
 925+ rename( $this->filename, $newname );
 926+ $command = "7za a -bd -si " . wfEscapeShellArg( $file );
 927+ $command .= ' >' . wfGetNull() . ' 2>&1';
 928+ $this->startCommand($command);
 929+ }
 930+ }
 931+
 932+ function closeAndRename( $newname ) {
 933+ if ( is_array($newname) ) {
 934+ if (count($newname) > 1) {
 935+ throw new MWException("Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n");
 936+ }
 937+ else {
 938+ $newname = $newname[0];
 939+ }
 940+ }
 941+ if ( $newname ) {
 942+ fclose( $this->handle );
 943+ proc_close($this->procOpenResource);
 944+ rename( $this->filename, $newname );
 945+ }
 946+ }
 947+
 948+ function rename( $newname ) {
 949+ if ( is_array($newname) ) {
 950+ if (count($newname) > 1) {
 951+ WfDie("Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n");
 952+ }
 953+ else {
 954+ $newname = $newname[0];
 955+ }
 956+ }
 957+ if ( $newname ) {
 958+ rename( $this->filename, $newname );
 959+ }
 960+ }
760961 }
761962
762963
@@ -803,6 +1004,22 @@
8041005 $this->sink->writeRevision( $rev, $string );
8051006 }
8061007
 1008+ function closeRenameAndReopen( $newname ) {
 1009+ $this->sink->closeRenameAndReopen( $newname );
 1010+ }
 1011+
 1012+ function closeAndRename( $newname ) {
 1013+ $this->sink->closeAndRename( $newname );
 1014+ }
 1015+
 1016+ function rename( $newname ) {
 1017+ $this->sink->rename( $newname );
 1018+ }
 1019+
 1020+ function getFilename() {
 1021+ return $this->sink->getFilename();
 1022+ }
 1023+
8071024 /**
8081025 * Override for page-based filter types.
8091026 * @return bool
@@ -950,6 +1167,32 @@
9511168 $this->sinks[$i]->writeRevision( $rev, $string );
9521169 }
9531170 }
 1171+
 1172+ function closeRenameAndReopen( $newnames ) {
 1173+ for( $i = 0; $i < $this->count; $i++ ) {
 1174+ $this->sinks[$i]->closeRenameAndReopen( $newnames[$i] );
 1175+ }
 1176+ }
 1177+
 1178+ function closeAndRename( $newname ) {
 1179+ for( $i = 0; $i < $this->count; $i++ ) {
 1180+ $this->sinks[$i]->closeAndRename( $newnames[$i] );
 1181+ }
 1182+ }
 1183+ function rename( $newnames ) {
 1184+ for( $i = 0; $i < $this->count; $i++ ) {
 1185+ $this->sinks[$i]->rename( $newnames[$i] );
 1186+ }
 1187+ }
 1188+
 1189+ function getFilename() {
 1190+ $filenames = array();
 1191+ for( $i = 0; $i < $this->count; $i++ ) {
 1192+ $filenames[] = $this->sinks[$i]->getFilename();
 1193+ }
 1194+ return $filenames;
 1195+ }
 1196+
9541197 }
9551198
9561199 function xmlsafe( $string ) {

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r95260add functions that support close and rename of output files as they are being...ariel22:01, 22 August 2011
r95272add support for writing out checkpoint files of xml dump at regular intervals...ariel22:45, 22 August 2011
r95288fix a couple bad lines in previous commit from bad merge attemptariel00:04, 23 August 2011
r95290fix timestamp stuff, more fallout from bad merge attemptariel00:36, 23 August 2011
r95443remove extraneous hyphen that crept in, grrariel20:43, 24 August 2011
r95601replace WfDie()ariel15:50, 27 August 2011
r95604define and use closeAndRename() after last write of xml dump file; convert fr...ariel18:31, 27 August 2011
r95634move some member vars to parent class since they are needed there now, set la...ariel19:06, 28 August 2011
r95720check the checkpoint related options only if we specified checkpoints, duhariel22:48, 29 August 2011
r95810Remove wfDie() that Ariel keeps trying to resurrect :)demon20:45, 30 August 2011

Status & tagging log