r97245 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r97244‎ | r97245 | r97246 >
Date:07:40, 16 September 2011
Author:ariel
Status:deferred
Tags:
Comment:
redo checkpoint file for history 7z step
Modified paths:
  • /branches/ariel/xmldumps-backup/worker.py (modified) (history)

Diff [purge]

Index: branches/ariel/xmldumps-backup/worker.py
@@ -609,7 +609,7 @@
610610 "metahistory7zdump",
611611 "All pages with complete edit history (.7z)",
612612 "These dumps can be *very* large, uncompressing up to 100 times the archive download size. " +
613 - "Suitable for archival and statistical use, most mirror sites won't want or need this.", self.findItemByName('metahistorybz2dump'), self.wiki, self._getChunkToDo("metahistory7zdump"), self.chunkInfo.getPagesPerChunkHistory(), self.checkpointFile))
 613+ "Suitable for archival and statistical use, most mirror sites won't want or need this.", self.findItemByName('metahistorybz2dump'), self.wiki, self._getChunkToDo("metahistory7zdump"), self.chunkInfo.getPagesPerChunkHistory(), checkpoints, self.checkpointFile))
614614 if (self.chunkInfo.chunksEnabled() and self.chunkInfo.recombineHistory()):
615615 self.dumpItems.append(
616616 RecombineXmlRecompressDump("metahistory7zdumprecombine",
@@ -1814,9 +1814,7 @@
18151815 # of that very file. meh. how likely is it that we
18161816 # have one? these files are time based and the start/end pageids
18171817 # are going to fluctuate. whatever
1818 - cf = DumpFilename(self.wiki)
1819 - cf.newFromFilename(item.checkpointFile)
1820 - checkpoint = cf.checkpoint
 1818+ checkpoint = item.checkpointFile.checkpoint
18211819
18221820 for d in dumpNames:
18231821 self.symLinks.removeSymLinksFromOldRuns(self.wiki.date, d, chunk, checkpoint )
@@ -3135,7 +3133,7 @@
31363134 class XmlRecompressDump(Dump):
31373135 """Take a .bz2 and recompress it as 7-Zip."""
31383136
3139 - def __init__(self, subset, name, desc, detail, itemForRecompression, wiki, chunkToDo, chunks = False, checkpoints = False):
 3137+ def __init__(self, subset, name, desc, detail, itemForRecompression, wiki, chunkToDo, chunks = False, checkpoints = False, checkpointFile = None):
31403138 self._subset = subset
31413139 self._detail = detail
31423140 self._chunks = chunks
@@ -3146,6 +3144,7 @@
31473145 self.itemForRecompression = itemForRecompression
31483146 if checkpoints:
31493147 self._checkpointsEnabled = True
 3148+ self.checkpointFile = checkpointFile
31503149 Dump.__init__(self, name, desc)
31513150
31523151 def getDumpName(self):
@@ -3182,7 +3181,11 @@
31833182 commands = []
31843183 # Remove prior 7zip attempts; 7zip will try to append to an existing archive
31853184 self.cleanupOldFiles(runner.dumpDir)
3186 - if self._chunksEnabled and not self._chunkToDo:
 3185+ if self.checkpointFile:
 3186+ outputFile = DumpFilename(self.wiki, None, self.checkpointFile.dumpName, self.checkpointFile.fileType, self.fileExt, self.checkpointFile.chunk, self.checkpointFile.checkpoint)
 3187+ series = self.buildCommand(runner, [ outputFile ])
 3188+ commands.append(series)
 3189+ elif self._chunksEnabled and not self._chunkToDo:
31873190 # must set up each parallel job separately, they may have checkpoint files that
31883191 # need to be processed in series, it's a special case
31893192 for i in range(1, len(self._chunks)+1):

Status & tagging log