r85331 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r85330‎ | r85331 | r85332 >
Date:14:58, 4 April 2011
Author:ariel
Status:deferred
Tags:
Comment:
write temp md5sum file, move into place at end of job(s); space between date and time in status of job
Modified paths:
  • /branches/ariel/xmldumps-backup/worker.py (modified) (history)

Diff [purge]

Index: branches/ariel/xmldumps-backup/worker.py
@@ -477,12 +477,13 @@
478478
479479 # format: name:%; updated:%; status:%
480480 def _getOldRunInfoFromLine(self, line):
481 - # get rid of leading/trailing/embedded blanks
482 - line = line.replace(" ","")
 481+ # get rid of leading/trailing/blanks
 482+ line = line.strip(" ")
483483 line = line.replace("\n","")
484484 fields = line.split(';',3)
485485 dumpRunInfo = RunInfo()
486486 for field in fields:
 487+ field = field.strip(" ")
487488 (fieldName, separator, fieldValue) = field.partition(':')
488489 if (fieldName == "name"):
489490 dumpRunInfo.setName(fieldValue)
@@ -612,21 +613,25 @@
613614 return True
614615
615616 class Checksummer(object):
616 -
617617 def __init__(self,wiki,dumpDir):
618618 self.wiki = wiki
619619 self.dumpDir = dumpDir
620 -
 620+ self.timestamp = time.strftime("%Y%m%d%H%M%S", time.gmtime())
 621+
621622 def getChecksumFileNameBasename(self):
622623 return ("md5sums.txt")
623624
624625 def getChecksumFileName(self):
625626 return (self.dumpDir.publicPath(self.getChecksumFileNameBasename()))
626627
 628+ def getChecksumFileNameTmp(self):
 629+ return (self.dumpDir.publicPath(self.getChecksumFileNameBasename() + "." + self.timestamp + ".tmp"))
 630+
627631 def prepareChecksums(self):
628 - """Create the md5 checksum file at the start of the run.
629 - This will overwrite a previous run's output, if any."""
630 - checksumFileName = self.getChecksumFileName()
 632+ """Create a temporary md5 checksum file.
 633+ Call this at the start of the dump run, and move the file
 634+ into the final location at the completion of the dump run."""
 635+ checksumFileName = self.getChecksumFileNameTmp()
631636 output = file(checksumFileName, "w")
632637
633638 def md5File(self, filename):
@@ -652,11 +657,16 @@
653658
654659 def checksum(self, filename, runner):
655660 """Run checksum for an output file, and append to the list."""
656 - checksumFileName = self.getChecksumFileName()
 661+ checksumFileName = self.getChecksumFileNameTmp()
657662 output = file(checksumFileName, "a")
658663 self.saveChecksum(filename, output, runner)
659664 output.close()
660 -
 665+
 666+ def moveMd5FileIntoPlace(self):
 667+ tmpFileName = self.getChecksumFileNameTmp()
 668+ realFileName = self.getChecksumFileName()
 669+ os.rename(tmpFileName, realFileName)
 670+
661671 class DumpDir(object):
662672 def __init__(self, wiki, dbName, date):
663673 self._wiki = wiki
@@ -925,6 +935,7 @@
926936 self.showRunnerState("Completed run restarting from job %s for %s" % (self.jobRequested, self.dbName))
927937 else:
928938 self.showRunnerState("Completed job %s for %s" % (self.jobRequested, self.dbName))
 939+
929940 else:
930941 self.checksums.prepareChecksums()
931942
@@ -1149,8 +1160,11 @@
11501161 self.debug("SUCCESS: done.")
11511162
11521163 def completeDump(self, files):
1153 - # FIXME: md5sums.txt won't be consistent with mixed data.
1154 - # later comment: which mixed data was meant here?
 1164+ # note that it's possible for links in "latest" to point to
 1165+ # files from different runs, in which case the md5sums file
 1166+ # will have accurate checksums for the run for which it was
 1167+ # produced, but not the other files. FIXME
 1168+ self.checksums.moveMd5FileIntoPlace()
11551169 self.saveSymlink(self.checksums.getChecksumFileNameBasename())
11561170
11571171 def saveSymlink(self, file):

Status & tagging log