Index: branches/ariel/xmldumps-backup/worker.py |
— | — | @@ -477,12 +477,13 @@ |
478 | 478 | |
479 | 479 | # format: name:%; updated:%; status:% |
480 | 480 | def _getOldRunInfoFromLine(self, line): |
481 | | - # get rid of leading/trailing/embedded blanks |
482 | | - line = line.replace(" ","") |
| 481 | + # get rid of leading/trailing/blanks |
| 482 | + line = line.strip(" ") |
483 | 483 | line = line.replace("\n","") |
484 | 484 | fields = line.split(';',3) |
485 | 485 | dumpRunInfo = RunInfo() |
486 | 486 | for field in fields: |
| 487 | + field = field.strip(" ") |
487 | 488 | (fieldName, separator, fieldValue) = field.partition(':') |
488 | 489 | if (fieldName == "name"): |
489 | 490 | dumpRunInfo.setName(fieldValue) |
— | — | @@ -612,21 +613,25 @@ |
613 | 614 | return True |
614 | 615 | |
615 | 616 | class Checksummer(object): |
616 | | - |
617 | 617 | def __init__(self,wiki,dumpDir): |
618 | 618 | self.wiki = wiki |
619 | 619 | self.dumpDir = dumpDir |
620 | | - |
| 620 | + self.timestamp = time.strftime("%Y%m%d%H%M%S", time.gmtime()) |
| 621 | + |
621 | 622 | def getChecksumFileNameBasename(self): |
622 | 623 | return ("md5sums.txt") |
623 | 624 | |
624 | 625 | def getChecksumFileName(self): |
625 | 626 | return (self.dumpDir.publicPath(self.getChecksumFileNameBasename())) |
626 | 627 | |
| 628 | + def getChecksumFileNameTmp(self): |
| 629 | + return (self.dumpDir.publicPath(self.getChecksumFileNameBasename() + "." + self.timestamp + ".tmp")) |
| 630 | + |
627 | 631 | def prepareChecksums(self): |
628 | | - """Create the md5 checksum file at the start of the run. |
629 | | - This will overwrite a previous run's output, if any.""" |
630 | | - checksumFileName = self.getChecksumFileName() |
| 632 | + """Create a temporary md5 checksum file. |
| 633 | + Call this at the start of the dump run, and move the file |
| 634 | + into the final location at the completion of the dump run.""" |
| 635 | + checksumFileName = self.getChecksumFileNameTmp() |
631 | 636 | output = file(checksumFileName, "w") |
632 | 637 | |
633 | 638 | def md5File(self, filename): |
— | — | @@ -652,11 +657,16 @@ |
653 | 658 | |
654 | 659 | def checksum(self, filename, runner): |
655 | 660 | """Run checksum for an output file, and append to the list.""" |
656 | | - checksumFileName = self.getChecksumFileName() |
| 661 | + checksumFileName = self.getChecksumFileNameTmp() |
657 | 662 | output = file(checksumFileName, "a") |
658 | 663 | self.saveChecksum(filename, output, runner) |
659 | 664 | output.close() |
660 | | - |
| 665 | + |
| 666 | + def moveMd5FileIntoPlace(self): |
| 667 | + tmpFileName = self.getChecksumFileNameTmp() |
| 668 | + realFileName = self.getChecksumFileName() |
| 669 | + os.rename(tmpFileName, realFileName) |
| 670 | + |
661 | 671 | class DumpDir(object): |
662 | 672 | def __init__(self, wiki, dbName, date): |
663 | 673 | self._wiki = wiki |
— | — | @@ -925,6 +935,7 @@ |
926 | 936 | self.showRunnerState("Completed run restarting from job %s for %s" % (self.jobRequested, self.dbName)) |
927 | 937 | else: |
928 | 938 | self.showRunnerState("Completed job %s for %s" % (self.jobRequested, self.dbName)) |
| 939 | + |
929 | 940 | else: |
930 | 941 | self.checksums.prepareChecksums() |
931 | 942 | |
— | — | @@ -1149,8 +1160,11 @@ |
1150 | 1161 | self.debug("SUCCESS: done.") |
1151 | 1162 | |
1152 | 1163 | def completeDump(self, files): |
1153 | | - # FIXME: md5sums.txt won't be consistent with mixed data. |
1154 | | - # later comment: which mixed data was meant here? |
| 1164 | + # note that it's possible for links in "latest" to point to |
| 1165 | + # files from different runs, in which case the md5sums file |
| 1166 | + # will have accurate checksums for the run for which it was |
| 1167 | + # produced, but not the other files. FIXME |
| 1168 | + self.checksums.moveMd5FileIntoPlace() |
1155 | 1169 | self.saveSymlink(self.checksums.getChecksumFileNameBasename()) |
1156 | 1170 | |
1157 | 1171 | def saveSymlink(self, file): |