Index: branches/ariel/xmldumps-backup/worker.py |
— | — | @@ -18,6 +18,7 @@ |
19 | 19 | import CommandManagement |
20 | 20 | import Queue |
21 | 21 | import thread |
| 22 | +import traceback |
22 | 23 | |
23 | 24 | from os.path import exists |
24 | 25 | from subprocess import Popen, PIPE |
— | — | @@ -369,9 +370,10 @@ |
370 | 371 | pass |
371 | 372 | |
372 | 373 | class RunInfoFile(object): |
373 | | - def __init__(self, wiki, enabled): |
| 374 | + def __init__(self, wiki, enabled, verbose = False): |
374 | 375 | self.wiki = wiki |
375 | 376 | self._enabled = enabled |
| 377 | + self.verbose = verbose |
376 | 378 | |
377 | 379 | def saveDumpRunInfoFile(self, text): |
378 | 380 | """Write out a simple text file with the status for this wiki's dump.""" |
— | — | @@ -379,6 +381,9 @@ |
380 | 382 | try: |
381 | 383 | self._writeDumpRunInfoFile(text) |
382 | 384 | except: |
| 385 | + if (self.verbose): |
| 386 | + exc_type, exc_value, exc_traceback = sys.exc_info() |
| 387 | + print repr(traceback.format_exception(exc_type, exc_value, exc_traceback)) |
383 | 388 | print "Couldn't save dump run info file. Continuing anyways" |
384 | 389 | |
385 | 390 | def statusOfOldDumpIsDone(self, runner, date, jobName, jobDesc): |
— | — | @@ -410,6 +415,9 @@ |
411 | 416 | infile.close |
412 | 417 | return results |
413 | 418 | except: |
| 419 | + if (self.verbose): |
| 420 | + exc_type, exc_value, exc_traceback = sys.exc_info() |
| 421 | + print repr(traceback.format_exception(exc_type, exc_value, exc_traceback)) |
414 | 422 | return False |
415 | 423 | |
416 | 424 | # |
— | — | @@ -481,6 +489,9 @@ |
482 | 490 | infile.close |
483 | 491 | return None |
484 | 492 | except: |
| 493 | + if (self.verbose): |
| 494 | + exc_type, exc_value, exc_traceback = sys.exc_info() |
| 495 | + print repr(traceback.format_exception(exc_type, exc_value, exc_traceback)) |
485 | 496 | return None |
486 | 497 | |
487 | 498 | # find desc in there, look for "class='done'" |
— | — | @@ -506,6 +517,9 @@ |
507 | 518 | infile.close |
508 | 519 | return None |
509 | 520 | except: |
| 521 | + if (self.verbose): |
| 522 | + exc_type, exc_value, exc_traceback = sys.exc_info() |
| 523 | + print repr(traceback.format_exception(exc_type, exc_value, exc_traceback)) |
510 | 524 | return None |
511 | 525 | |
512 | 526 | |
— | — | @@ -591,11 +605,11 @@ |
592 | 606 | #PrivateTable("filearchive", "filearchivetable", "Deleted image data"), |
593 | 607 | |
594 | 608 | PublicTable("site_stats", "sitestatstable", "A few statistics such as the page count."), |
595 | | - PublicTable("image", "imagetable", "Metadata on current versions of uploaded images."), |
596 | | - PublicTable("oldimage", "oldimagetable", "Metadata on prior versions of uploaded images."), |
| 609 | + PublicTable("image", "imagetable", "Metadata on current versions of uploaded media/files."), |
| 610 | + PublicTable("oldimage", "oldimagetable", "Metadata on prior versions of uploaded media/files."), |
597 | 611 | PublicTable("pagelinks", "pagelinkstable", "Wiki page-to-page link records."), |
598 | 612 | PublicTable("categorylinks", "categorylinkstable", "Wiki category membership link records."), |
599 | | - PublicTable("imagelinks", "imagelinkstable", "Wiki image usage records."), |
| 613 | + PublicTable("imagelinks", "imagelinkstable", "Wiki media/files usage records."), |
600 | 614 | PublicTable("templatelinks", "templatelinkstable", "Wiki template inclusion link records."), |
601 | 615 | PublicTable("externallinks", "externallinkstable", "Wiki external URL link records."), |
602 | 616 | PublicTable("langlinks", "langlinkstable", "Wiki interlanguage link records."), |
— | — | @@ -627,10 +641,10 @@ |
628 | 642 | self.dumpItems.append( |
629 | 643 | XmlDump("articles", |
630 | 644 | "articlesdump", |
631 | | - "<big><b>Articles, templates, image descriptions, and primary meta-pages.</b></big>", |
| 645 | + "<big><b>Articles, templates, media/file descriptions, and primary meta-pages.</b></big>", |
632 | 646 | "This contains current versions of article content, and is the archive most mirror sites will probably want.", self.findItemByName('xmlstubsdump'), self._prefetch, self._spawn, self.wiki, self._getChunkToDo("articlesdump"), self.chunkInfo.getPagesPerChunkHistory(), checkpoints, self.checkpointFile, self.pageIDRange)) |
633 | 647 | if (self.chunkInfo.chunksEnabled()): |
634 | | - self.dumpItems.append(RecombineXmlDump("articlesdumprecombine", "<big><b>Recombine articles, templates, image descriptions, and primary meta-pages.</b></big>","This contains current versions of article content, and is the archive most mirror sites will probably want.", self.findItemByName('articlesdump'))) |
| 648 | + self.dumpItems.append(RecombineXmlDump("articlesdumprecombine", "<big><b>Recombine articles, templates, media/file descriptions, and primary meta-pages.</b></big>","This contains current versions of article content, and is the archive most mirror sites will probably want.", self.findItemByName('articlesdump'))) |
635 | 649 | |
636 | 650 | self.dumpItems.append( |
637 | 651 | XmlDump("meta-current", |
— | — | @@ -801,9 +815,10 @@ |
802 | 816 | return "name:%s; status:%s; updated:%s" % (item.name(), item.status(), item.updated()) |
803 | 817 | |
804 | 818 | class Checksummer(object): |
805 | | - def __init__(self,wiki,dumpDir, enabled = True): |
| 819 | + def __init__(self,wiki,dumpDir, enabled = True, verbose = False): |
806 | 820 | self.wiki = wiki |
807 | 821 | self.dumpDir = dumpDir |
| 822 | + self.verbose = verbose |
808 | 823 | self.timestamp = time.strftime("%Y%m%d%H%M%S", time.gmtime()) |
809 | 824 | self._enabled = enabled |
810 | 825 | |
— | — | @@ -821,7 +836,7 @@ |
822 | 837 | checksumFileName = self._getChecksumFileNameTmp() |
823 | 838 | output = file(checksumFileName, "a") |
824 | 839 | runner.debug("Checksumming %s" % fileObj.filename) |
825 | | - dumpfile = DumpFile(self.wiki, runner.dumpDir.filenamePublicPath(fileObj)) |
| 840 | + dumpfile = DumpFile(self.wiki, runner.dumpDir.filenamePublicPath(fileObj),None,self.verbose) |
826 | 841 | checksum = dumpfile.md5Sum() |
827 | 842 | if checksum != None: |
828 | 843 | output.write( "%s %s\n" % (checksum, fileObj.filename)) |
— | — | @@ -1196,7 +1211,7 @@ |
1197 | 1212 | looking for page and id tags, wihout other tags in between. (hmm) |
1198 | 1213 | filename full filename with directory |
1199 | 1214 | """ |
1200 | | - def __init__(self, wiki, filename, fileObj = None): |
| 1215 | + def __init__(self, wiki, filename, fileObj = None, verbose = False): |
1201 | 1216 | """takes full filename including path""" |
1202 | 1217 | self._wiki = wiki |
1203 | 1218 | self.filename = filename |
— | — | @@ -1337,6 +1352,9 @@ |
1338 | 1353 | try: |
1339 | 1354 | os.rename(self.filename, os.path.join(self.dirname,newname)) |
1340 | 1355 | except: |
| 1356 | + if (self.verbose): |
| 1357 | + exc_type, exc_value, exc_traceback = sys.exc_info() |
| 1358 | + print repr(traceback.format_exception(exc_type, exc_value, exc_traceback)) |
1341 | 1359 | raise BackupError("failed to rename file %s" % self.filename) |
1342 | 1360 | |
1343 | 1361 | self.filename = os.path.join(self.dirname,newname) |
— | — | @@ -1344,7 +1362,7 @@ |
1345 | 1363 | # everything that has to do with reporting the status of a piece |
1346 | 1364 | # of a dump is collected here |
1347 | 1365 | class Status(object): |
1348 | | - def __init__(self, wiki, dumpDir, items, checksums, enabled, noticeFile = None, errorCallback=None): |
| 1366 | + def __init__(self, wiki, dumpDir, items, checksums, enabled, noticeFile = None, errorCallback=None, verbose = False): |
1349 | 1367 | self.wiki = wiki |
1350 | 1368 | self.dbName = wiki.dbName |
1351 | 1369 | self.dumpDir = dumpDir |
— | — | @@ -1353,6 +1371,7 @@ |
1354 | 1372 | self.noticeFile = noticeFile |
1355 | 1373 | self.errorCallback = errorCallback |
1356 | 1374 | self.failCount = 0 |
| 1375 | + self.verbose = verbose |
1357 | 1376 | self._enabled = enabled |
1358 | 1377 | |
1359 | 1378 | def updateStatusFiles(self, done=False): |
— | — | @@ -1400,6 +1419,9 @@ |
1401 | 1420 | # Short line for report extraction goes here |
1402 | 1421 | self.wiki.writeStatus(self._reportDatabaseStatusSummary(done)) |
1403 | 1422 | except: |
| 1423 | + if (self.verbose): |
| 1424 | + exc_type, exc_value, exc_traceback = sys.exc_info() |
| 1425 | + print repr(traceback.format_exception(exc_type, exc_value, exc_traceback)) |
1404 | 1426 | message = "Couldn't update status files. Continuing anyways" |
1405 | 1427 | if self.errorCallback: |
1406 | 1428 | self.errorCallback(message) |
— | — | @@ -1449,6 +1471,9 @@ |
1450 | 1472 | else: |
1451 | 1473 | raise(ValueException) |
1452 | 1474 | except: |
| 1475 | + if (self.verbose): |
| 1476 | + exc_type, exc_value, exc_traceback = sys.exc_info() |
| 1477 | + print repr(traceback.format_exception(exc_type, exc_value, exc_traceback)) |
1453 | 1478 | return "No prior dumps of this database stored." |
1454 | 1479 | prettyDate = TimeUtils.prettyDate(rawDate) |
1455 | 1480 | if done: |
— | — | @@ -1542,7 +1567,7 @@ |
1543 | 1568 | return os.path.join(self.wiki.publicDir(), self.wiki.date) |
1544 | 1569 | |
1545 | 1570 | class Runner(object): |
1546 | | - def __init__(self, wiki, prefetch=True, spawn=True, job=None, restart=False, notice="", dryrun = False, loggingEnabled=False, chunkToDo = False, checkpointFile = None, pageIDRange = None): |
| 1571 | + def __init__(self, wiki, prefetch=True, spawn=True, job=None, restart=False, notice="", dryrun = False, loggingEnabled=False, chunkToDo = False, checkpointFile = None, pageIDRange = None, verbose = False): |
1547 | 1572 | self.wiki = wiki |
1548 | 1573 | self.dbName = wiki.dbName |
1549 | 1574 | self.prefetch = prefetch |
— | — | @@ -1553,8 +1578,9 @@ |
1554 | 1579 | self.log = None |
1555 | 1580 | self.dryrun = dryrun |
1556 | 1581 | self._chunkToDo = chunkToDo |
1557 | | - self.checkpointFile = None |
| 1582 | + self.checkpointFile = checkpointFile |
1558 | 1583 | self.pageIDRange = pageIDRange |
| 1584 | + self.verbose = verbose |
1559 | 1585 | |
1560 | 1586 | if (self.checkpointFile): |
1561 | 1587 | f = DumpFilename(self.wiki) |
— | — | @@ -1611,8 +1637,7 @@ |
1612 | 1638 | self._feedsEnabled = False |
1613 | 1639 | self._noticeFileEnabled = False |
1614 | 1640 | self._makeDirEnabled = False |
1615 | | - self._cleanOldDumpsEnabled = False |
1616 | | - self._cleanupOldFilesEnabled = False |
| 1641 | + self._cleanupOldFilesEnabled = True |
1617 | 1642 | |
1618 | 1643 | self.jobRequested = job |
1619 | 1644 | |
— | — | @@ -1644,15 +1669,15 @@ |
1645 | 1670 | self.makeDir(os.path.join(self.wiki.publicDir(), self.wiki.date)) |
1646 | 1671 | self.log = Logger(self.logFileName) |
1647 | 1672 | thread.start_new_thread(self.logQueueReader,(self.log,)) |
1648 | | - self.runInfoFile = RunInfoFile(wiki,self._runInfoFileEnabled) |
| 1673 | + self.runInfoFile = RunInfoFile(wiki,self._runInfoFileEnabled, self.verbose) |
1649 | 1674 | self.symLinks = SymLinks(self.wiki, self.dumpDir, self.logAndPrint, self.debug, self._symLinksEnabled) |
1650 | 1675 | self.feeds = Feeds(self.wiki,self.dumpDir, self.dbName, self.debug, self._feedsEnabled) |
1651 | 1676 | self.htmlNoticeFile = NoticeFile(self.wiki, notice, self._noticeFileEnabled) |
1652 | | - self.checksums = Checksummer(self.wiki, self.dumpDir, self._checksummerEnabled) |
| 1677 | + self.checksums = Checksummer(self.wiki, self.dumpDir, self._checksummerEnabled, self.verbose) |
1653 | 1678 | |
1654 | 1679 | # some or all of these dumpItems will be marked to run |
1655 | 1680 | self.dumpItemList = DumpItemList(self.wiki, self.prefetch, self.spawn, self._chunkToDo, self.checkpointFile, self.jobRequested, self.chunkInfo, self.pageIDRange, self.runInfoFile, self.dumpDir) |
1656 | | - self.status = Status(self.wiki, self.dumpDir, self.dumpItemList.dumpItems, self.checksums, self._statusEnabled, self.htmlNoticeFile, self.logAndPrint) |
| 1681 | + self.status = Status(self.wiki, self.dumpDir, self.dumpItemList.dumpItems, self.checksums, self._statusEnabled, self.htmlNoticeFile, self.logAndPrint, self.verbose) |
1657 | 1682 | |
1658 | 1683 | def logQueueReader(self,log): |
1659 | 1684 | if not log: |
— | — | @@ -1806,7 +1831,11 @@ |
1807 | 1832 | try: |
1808 | 1833 | item.dump(self) |
1809 | 1834 | except Exception, ex: |
1810 | | - self.debug("*** exception! " + str(ex)) |
| 1835 | + exc_type, exc_value, exc_traceback = sys.exc_info() |
| 1836 | + if (self.verbose): |
| 1837 | + print repr(traceback.format_exception(exc_type, exc_value, exc_traceback)) |
| 1838 | + else: |
| 1839 | + self.debug("*** exception! " + str(ex)) |
1811 | 1840 | item.setStatus("failed") |
1812 | 1841 | if item.status() == "failed": |
1813 | 1842 | self.runHandleFailure() |
— | — | @@ -1824,7 +1853,13 @@ |
1825 | 1854 | # if any job succeeds we might as well make the sym link |
1826 | 1855 | if (self.status.failCount < 1): |
1827 | 1856 | self.completeDump() |
1828 | | - |
| 1857 | + |
| 1858 | + # special case... |
| 1859 | + if self.jobRequested == "latestlinks": |
| 1860 | + if (self.dumpItemList.allPossibleJobsDone()): |
| 1861 | + self.symLinks.removeSymLinksFromOldRuns(self.wiki.date) |
| 1862 | + self.feeds.cleanupFeeds() |
| 1863 | + |
1829 | 1864 | if (self.restart): |
1830 | 1865 | self.showRunnerState("Completed run restarting from job %s for %s" % (self.jobRequested, self.dbName)) |
1831 | 1866 | else: |
— | — | @@ -1841,7 +1876,11 @@ |
1842 | 1877 | try: |
1843 | 1878 | item.dump(self) |
1844 | 1879 | except Exception, ex: |
1845 | | - self.debug("*** exception! " + str(ex)) |
| 1880 | + exc_type, exc_value, exc_traceback = sys.exc_info() |
| 1881 | + if (self.verbose): |
| 1882 | + print repr(traceback.format_exception(exc_type, exc_value, exc_traceback)) |
| 1883 | + else: |
| 1884 | + self.debug("*** exception! " + str(ex)) |
1846 | 1885 | item.setStatus("failed") |
1847 | 1886 | if item.status() == "failed": |
1848 | 1887 | self.runHandleFailure() |
— | — | @@ -2071,8 +2110,9 @@ |
2072 | 2111 | os.remove(os.path.join(latestDir,f)) |
2073 | 2112 | |
2074 | 2113 | class Dump(object): |
2075 | | - def __init__(self, name, desc): |
| 2114 | + def __init__(self, name, desc, verbose = False): |
2076 | 2115 | self._desc = desc |
| 2116 | + self.verbose = verbose |
2077 | 2117 | self.progress = "" |
2078 | 2118 | self.runInfo = RunInfo(name,"waiting","") |
2079 | 2119 | self.dumpName = self.getDumpName() |
— | — | @@ -2151,6 +2191,9 @@ |
2152 | 2192 | try: |
2153 | 2193 | self.run(runner) |
2154 | 2194 | except Exception, ex: |
| 2195 | + if (self.verbose): |
| 2196 | + exc_type, exc_value, exc_traceback = sys.exc_info() |
| 2197 | + print repr(traceback.format_exception(exc_type, exc_value, exc_traceback)) |
2155 | 2198 | self.setStatus("failed") |
2156 | 2199 | raise ex |
2157 | 2200 | self.setStatus("done") |
— | — | @@ -2860,7 +2903,7 @@ |
2861 | 2904 | |
2862 | 2905 | class XmlDump(Dump): |
2863 | 2906 | """Primary XML dumps, one section at a time.""" |
2864 | | - def __init__(self, subset, name, desc, detail, itemForStubs, prefetch, spawn, wiki, chunkToDo, chunks = False, checkpoints = False, checkpointFile = None, pageIDRange = None): |
| 2907 | + def __init__(self, subset, name, desc, detail, itemForStubs, prefetch, spawn, wiki, chunkToDo, chunks = False, checkpoints = False, checkpointFile = None, pageIDRange = None, verbose = False): |
2865 | 2908 | self._subset = subset |
2866 | 2909 | self._detail = detail |
2867 | 2910 | self._desc = desc |
— | — | @@ -2941,7 +2984,7 @@ |
2942 | 2985 | else: |
2943 | 2986 | files = self.listRegularFilesPerChunkExisting(runner.dumpDir, self.getChunkList(), [ self.dumpName ]) |
2944 | 2987 | for f in files: |
2945 | | - f = DumpFile(self.wiki,runner.dumpDir.filenamePublicPath(f)) |
| 2988 | + f = DumpFile(self.wiki,runner.dumpDir.filenamePublicPath(f), None, self.verbose) |
2946 | 2989 | if (f.checkIfTruncated()): |
2947 | 2990 | runner.logAndPrint("file %s is truncated, moving out of the way" % f.filename ) |
2948 | 2991 | f.rename( f.filename + ".truncated" ) |
— | — | @@ -3125,7 +3168,7 @@ |
3126 | 3169 | if fileObj.isChunkFile and fileObj.chunkInt > maxchunks: |
3127 | 3170 | maxchunks = fileObj.chunkInt |
3128 | 3171 | if not fileObj.firstPageID: |
3129 | | - f = DumpFile(self.wiki, runner.dumpDir.filenamePublicPath(fileObj, date), fileObj) |
| 3172 | + f = DumpFile(self.wiki, runner.dumpDir.filenamePublicPath(fileObj, date), fileObj, self.verbose) |
3130 | 3173 | fileObj.firstPageID = f.findFirstPageIDInFile() |
3131 | 3174 | |
3132 | 3175 | # get the files that cover our range |
— | — | @@ -3219,6 +3262,28 @@ |
3220 | 3263 | runner.debug("Could not locate a prefetchable dump.") |
3221 | 3264 | return None |
3222 | 3265 | |
| 3266 | + def listOutputFilesForCleanup(self, dumpDir, dumpNames = None): |
| 3267 | + files = Dump.listOutputFilesForCleanup(self, dumpDir, dumpNames) |
| 3268 | + filesToReturn = [] |
| 3269 | + if self.pageIDRange: |
| 3270 | + if (',' in self.pageIDRange): |
| 3271 | + ( firstPageID, lastPageID ) = self.pageIDRange.split(',',2) |
| 3272 | + firstPageID = int(firstPageID) |
| 3273 | + lastPageID = int(lastPageID) |
| 3274 | + else: |
| 3275 | + firstPageID = int(self.pageIDRange) |
| 3276 | + lastPageID = None |
| 3277 | + # filter any checkpoint files, removing from the list any with |
| 3278 | + # page range outside of the page range this job will cover |
| 3279 | + for f in files: |
| 3280 | + if f.isCheckpointFile: |
| 3281 | + if not firstPageID or (f.firstPageID and (int(f.firstPageID) >= firstPageID)): |
| 3282 | + if not lastPageID or (f.lastPageID and (int(f.lastPageID) <= lastPageID)): |
| 3283 | + filesToReturn.append(f) |
| 3284 | + else: |
| 3285 | + filesToReturn.append(f) |
| 3286 | + return filesToReturn |
| 3287 | + |
3223 | 3288 | class RecombineXmlDump(XmlDump): |
3224 | 3289 | def __init__(self, name, desc, detail, itemForXmlDumps): |
3225 | 3290 | # no prefetch, no spawn |
— | — | @@ -3690,7 +3755,11 @@ |
3691 | 3756 | if message: |
3692 | 3757 | print message |
3693 | 3758 | print "Usage: python worker.py [options] [wikidbname]" |
3694 | | - print "Options: --checkpoint, --chunk, --configfile, --date, --job, --addnotice, --delnotice, --force, --noprefetch, --nospawn, --restartfrom, --log" |
| 3759 | + print "Options: --aftercheckpoint, --checkpoint, --chunk, --configfile, --date, --job, --addnotice, --delnotice, --force, --noprefetch, --nospawn, --restartfrom, --log" |
| 3760 | + print "--aftercheckpoint: Restart thie job from the after specified checkpoint file, doing the" |
| 3761 | + print " rest of the job for the appropriate chunk if chunks are configured" |
| 3762 | + print " or for the all the rest of the revisions if no chunks are configured;" |
| 3763 | + print " only for jobs articlesdump, metacurrentdump, metahistorybz2dump." |
3695 | 3764 | print "--checkpoint: Specify the name of the checkpoint file to rerun (requires --job," |
3696 | 3765 | print " depending on the file this may imply --chunk)" |
3697 | 3766 | print "--chunk: Specify the number of the chunk to rerun (use with a specific job" |
— | — | @@ -3721,6 +3790,8 @@ |
3722 | 3791 | print "--restartfrom: Do all jobs after the one specified via --job, including that one" |
3723 | 3792 | print "--log: Log progress messages and other output to logfile in addition to" |
3724 | 3793 | print " the usual console output" |
| 3794 | + print "--verbose: Print lots of stuff (includes printing full backtraces for any exception)" |
| 3795 | + print " This is used primarily for debugging" |
3725 | 3796 | |
3726 | 3797 | sys.exit(1) |
3727 | 3798 | |
— | — | @@ -3738,13 +3809,15 @@ |
3739 | 3810 | htmlNotice = "" |
3740 | 3811 | dryrun = False |
3741 | 3812 | chunkToDo = False |
| 3813 | + afterCheckpoint = False |
3742 | 3814 | checkpointFile = None |
3743 | 3815 | pageIDRange = None |
3744 | 3816 | result = False |
| 3817 | + verbose = False |
3745 | 3818 | |
3746 | 3819 | try: |
3747 | 3820 | (options, remainder) = getopt.gnu_getopt(sys.argv[1:], "", |
3748 | | - ['date=', 'job=', 'configfile=', 'addnotice=', 'delnotice', 'force', 'dryrun', 'noprefetch', 'nospawn', 'restartfrom', 'log', 'chunk=', 'checkpoint=', 'pageidrange=' ]) |
| 3821 | + ['date=', 'job=', 'configfile=', 'addnotice=', 'delnotice', 'force', 'dryrun', 'noprefetch', 'nospawn', 'restartfrom', 'aftercheckpoint=', 'log', 'chunk=', 'checkpoint=', 'pageidrange=', 'verbose' ]) |
3749 | 3822 | except: |
3750 | 3823 | usage("Unknown option specified") |
3751 | 3824 | |
— | — | @@ -3759,6 +3832,9 @@ |
3760 | 3833 | chunkToDo = int(val) |
3761 | 3834 | elif opt == "--force": |
3762 | 3835 | forceLock = True |
| 3836 | + elif opt == '--aftercheckpoint': |
| 3837 | + afterCheckpoint = True |
| 3838 | + checkpointFile = val |
3763 | 3839 | elif opt == "--noprefetch": |
3764 | 3840 | prefetch = False |
3765 | 3841 | elif opt == "--nospawn": |
— | — | @@ -3777,6 +3853,8 @@ |
3778 | 3854 | htmlNotice = False |
3779 | 3855 | elif opt == "--pageidrange": |
3780 | 3856 | pageIDRange = val |
| 3857 | + elif opt == "--verbose": |
| 3858 | + verbose = True |
3781 | 3859 | |
3782 | 3860 | if dryrun and (len(remainder) == 0): |
3783 | 3861 | usage("--dryrun requires the name of a wikidb to be specified") |
— | — | @@ -3834,7 +3912,20 @@ |
3835 | 3913 | date = TimeUtils.today() |
3836 | 3914 | wiki.setDate(date) |
3837 | 3915 | |
3838 | | - runner = Runner(wiki, prefetch, spawn, jobRequested, restart, htmlNotice, dryrun, enableLogging, chunkToDo, checkpointFile, pageIDRange) |
| 3916 | + if (afterCheckpoint): |
| 3917 | + f = DumpFilename(wiki) |
| 3918 | + f.newFromFilename(checkpointFile) |
| 3919 | + if not f.isCheckpointFile: |
| 3920 | + usage("--aftercheckpoint option requires the name of a checkpoint file, bad filename provided") |
| 3921 | + pageIDRange = str( int(f.lastPageID) + 1 ) |
| 3922 | + chunkToDo = f.chunkInt |
| 3923 | + # now we don't need this. |
| 3924 | + checkpointFile = None |
| 3925 | + afterCheckpointJobs = [ 'articlesdump', 'metacurrentdump', 'metahistorybz2dump' ] |
| 3926 | + if not jobRequested or not jobRequested in [ 'articlesdump', 'metacurrentdump', 'metahistorybz2dump' ]: |
| 3927 | + usage("--aftercheckpoint option requires --job option with one of %s" % ", ".join(afterCheckpointJobs)) |
| 3928 | + |
| 3929 | + runner = Runner(wiki, prefetch, spawn, jobRequested, restart, htmlNotice, dryrun, enableLogging, chunkToDo, checkpointFile, pageIDRange, verbose) |
3839 | 3930 | if (restart): |
3840 | 3931 | print "Running %s, restarting from job %s..." % (wiki.dbName, jobRequested) |
3841 | 3932 | elif (jobRequested): |