r97245 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r97244‎ \| r97245 \| r97246 >
Date:	07:40, 16 September 2011
Author:	ariel
Status:	deferred
Tags:
Comment:	redo checkpoint file for history 7z step
Modified paths:	/branches/ariel/xmldumps-backup/worker.py (modified) (history)

Diff [purge]

Index: branches/ariel/xmldumps-backup/worker.py
—	—	@@ -609,7 +609,7 @@
610	610	"metahistory7zdump",
611	611	"All pages with complete edit history (.7z)",
612	612	"These dumps can be very large, uncompressing up to 100 times the archive download size. " +
613		- "Suitable for archival and statistical use, most mirror sites won't want or need this.", self.findItemByName('metahistorybz2dump'), self.wiki, self._getChunkToDo("metahistory7zdump"), self.chunkInfo.getPagesPerChunkHistory(), self.checkpointFile))
	613	+ "Suitable for archival and statistical use, most mirror sites won't want or need this.", self.findItemByName('metahistorybz2dump'), self.wiki, self._getChunkToDo("metahistory7zdump"), self.chunkInfo.getPagesPerChunkHistory(), checkpoints, self.checkpointFile))
614	614	if (self.chunkInfo.chunksEnabled() and self.chunkInfo.recombineHistory()):
615	615	self.dumpItems.append(
616	616	RecombineXmlRecompressDump("metahistory7zdumprecombine",
—	—	@@ -1814,9 +1814,7 @@
1815	1815	# of that very file. meh. how likely is it that we
1816	1816	# have one? these files are time based and the start/end pageids
1817	1817	# are going to fluctuate. whatever
1818		~~- cf = DumpFilename(self.wiki)~~
1819		~~- cf.newFromFilename(item.checkpointFile)~~
1820		~~- checkpoint = cf.checkpoint~~
	1818	+ checkpoint = item.checkpointFile.checkpoint
1821	1819
1822	1820	for d in dumpNames:
1823	1821	self.symLinks.removeSymLinksFromOldRuns(self.wiki.date, d, chunk, checkpoint )
—	—	@@ -3135,7 +3133,7 @@
3136	3134	class XmlRecompressDump(Dump):
3137	3135	"""Take a .bz2 and recompress it as 7-Zip."""
3138	3136
3139		~~- def __init__(self, subset, name, desc, detail, itemForRecompression, wiki, chunkToDo, chunks = False, checkpoints = False):~~
	3137	+ def __init__(self, subset, name, desc, detail, itemForRecompression, wiki, chunkToDo, chunks = False, checkpoints = False, checkpointFile = None):
3140	3138	self._subset = subset
3141	3139	self._detail = detail
3142	3140	self._chunks = chunks
—	—	@@ -3146,6 +3144,7 @@
3147	3145	self.itemForRecompression = itemForRecompression
3148	3146	if checkpoints:
3149	3147	self._checkpointsEnabled = True
	3148	+ self.checkpointFile = checkpointFile
3150	3149	Dump.__init__(self, name, desc)
3151	3150
3152	3151	def getDumpName(self):
—	—	@@ -3182,7 +3181,11 @@
3183	3182	commands = []
3184	3183	# Remove prior 7zip attempts; 7zip will try to append to an existing archive
3185	3184	self.cleanupOldFiles(runner.dumpDir)
3186		~~- if self._chunksEnabled and not self._chunkToDo:~~
	3185	+ if self.checkpointFile:
	3186	+ outputFile = DumpFilename(self.wiki, None, self.checkpointFile.dumpName, self.checkpointFile.fileType, self.fileExt, self.checkpointFile.chunk, self.checkpointFile.checkpoint)
	3187	+ series = self.buildCommand(runner, [ outputFile ])
	3188	+ commands.append(series)
	3189	+ elif self._chunksEnabled and not self._chunkToDo:
3187	3190	# must set up each parallel job separately, they may have checkpoint files that
3188	3191	# need to be processed in series, it's a special case
3189	3192	for i in range(1, len(self._chunks)+1):

Status & tagging log

22:08, 19 September 2011 Reedy (talk | contribs) changed the status of r97245 [removed: new added: deferred]