Index: branches/ariel/xmldumps-backup/worker.py |
— | — | @@ -428,6 +428,16 @@ |
429 | 429 | print "%s " % item.name() |
430 | 430 | return False |
431 | 431 | |
| 432 | + def markFollowingJobsToRun(self): |
| 433 | + # find the first one marked to run, mark the following ones |
| 434 | + i = 0; |
| 435 | + for item in self.dumpItems: |
| 436 | + i = i + 1; |
| 437 | + if item.toBeRun(): |
| 438 | + for j in range(i,len(self.dumpItems)): |
| 439 | + self.dumpItems[j].setToBeRun(True) |
| 440 | + break |
| 441 | + |
432 | 442 | # see whether job needs previous jobs that have not completed successfully |
433 | 443 | |
434 | 444 | def jobDoneSuccessfully(self, job): |
— | — | @@ -544,13 +554,14 @@ |
545 | 555 | |
546 | 556 | class Runner(object): |
547 | 557 | |
548 | | - def __init__(self, wiki, date=None, checkpoint=None, prefetch=True, spawn=True, job=None): |
| 558 | + def __init__(self, wiki, date=None, checkpoint=None, prefetch=True, spawn=True, job=None, restart=False): |
549 | 559 | self.wiki = wiki |
550 | 560 | self.config = wiki.config |
551 | 561 | self.dbName = wiki.dbName |
552 | 562 | self.prefetch = prefetch |
553 | 563 | self.spawn = spawn |
554 | 564 | self.chunkInfo = Chunk(wiki, self.dbName) |
| 565 | + self.restart = restart |
555 | 566 | |
556 | 567 | if date: |
557 | 568 | # Override, continuing a past dump? |
— | — | @@ -756,11 +767,16 @@ |
757 | 768 | # job has dependent steps that weren't already run |
758 | 769 | if (not self.dumpItemList.checkJobDependencies(self.jobRequested)): |
759 | 770 | raise RuntimeError( "Job dependencies not run beforehand, exiting" ) |
| 771 | + if (restart): |
| 772 | + # mark all the following jobs to run as well |
| 773 | + self.dumpItemList.markFollowingJobsToRun() |
760 | 774 | |
761 | 775 | self.makeDir(join(self.wiki.publicDir(), self.date)) |
762 | 776 | self.makeDir(join(self.wiki.privateDir(), self.date)) |
763 | 777 | |
764 | | - if (self.jobRequested): |
| 778 | + if (self.restart): |
| 779 | + print "Preparing for restart from job %s of %s" % (self.jobRequested, self.dbName) |
| 780 | + elif (self.jobRequested): |
765 | 781 | print "Preparing for job %s of %s" % (self.jobRequested, self.dbName) |
766 | 782 | else: |
767 | 783 | self.showRunnerState("Cleaning up old dumps for %s" % self.dbName) |
— | — | @@ -802,7 +818,10 @@ |
803 | 819 | if (self.failCount < 1): |
804 | 820 | self.completeDump(files) |
805 | 821 | |
806 | | - self.showRunnerState("Completed job %s for %s" % (self.jobRequested, self.dbName)) |
| 822 | + if (self.restart): |
| 823 | + self.showRunnerState("Completed run restarting from job %s for %s" % (self.jobRequested, self.dbName)) |
| 824 | + else: |
| 825 | + self.showRunnerState("Completed job %s for %s" % (self.jobRequested, self.dbName)) |
807 | 826 | else: |
808 | 827 | self.checksums.prepareChecksums() |
809 | 828 | |
— | — | @@ -2063,22 +2082,24 @@ |
2064 | 2083 | if message: |
2065 | 2084 | print message |
2066 | 2085 | print "Usage: python worker.py [options] [wikidbname]" |
2067 | | - print "Options: --configfile, --date, --checkpoint, --job, --force, --noprefetch, --nospawn" |
| 2086 | + print "Options: --configfile, --date, --checkpoint, --job, --force, --noprefetch, --nospawn, --restartfrom" |
2068 | 2087 | print "--configfile: Specify an alternative configuration file to read." |
2069 | | - print " Default config file name: wikidump.conf" |
2070 | | - print "--date: Rerun dump of a given date (probably unwise)" |
2071 | | - print "--checkpoint: Run just the specified step (deprecated)" |
2072 | | - print "--job: Run just the specified step or set of steps; for the list," |
2073 | | - print " give the option --job help" |
2074 | | - print " This option requires specifiying a wikidbname on which to run." |
2075 | | - print " This option cannot be specified with --force." |
2076 | | - print "--force: remove a lock file for the specified wiki (dangerous, if there is" |
2077 | | - print " another process running, useful if you want to start a second later" |
2078 | | - print " run while the first dump from a previous date is still going)" |
2079 | | - print " This option cannot be specified with --job." |
2080 | | - print "--noprefetch: Do not use a previous file's contents for speeding up the dumps" |
2081 | | - print " (helpful if the previous files may have corrupt contents)" |
2082 | | - print "--nospawn: Do not spawn a separate process in order to retrieve revision texts" |
| 2088 | + print " Default config file name: wikidump.conf" |
| 2089 | + print "--date: Rerun dump of a given date (probably unwise)" |
| 2090 | + print "--checkpoint: Run just the specified step (deprecated)" |
| 2091 | + print "--job: Run just the specified step or set of steps; for the list," |
| 2092 | + print " give the option --job help" |
| 2093 | + print " This option requires specifiying a wikidbname on which to run." |
| 2094 | + print " This option cannot be specified with --force." |
| 2095 | + print "--force: remove a lock file for the specified wiki (dangerous, if there is" |
| 2096 | + print " another process running, useful if you want to start a second later" |
| 2097 | + print " run while the first dump from a previous date is still going)" |
| 2098 | + print " This option cannot be specified with --job." |
| 2099 | + print "--noprefetch: Do not use a previous file's contents for speeding up the dumps" |
| 2100 | + print " (helpful if the previous files may have corrupt contents)" |
| 2101 | + print "--nospawn: Do not spawn a separate process in order to retrieve revision texts" |
| 2102 | + print "--restartfrom: Do all jobs after the one specified via --job, including that one" |
| 2103 | + |
2083 | 2104 | sys.exit(1) |
2084 | 2105 | |
2085 | 2106 | |
— | — | @@ -2090,11 +2111,12 @@ |
2091 | 2112 | forceLock = False |
2092 | 2113 | prefetch = True |
2093 | 2114 | spawn = True |
| 2115 | + restart = False |
2094 | 2116 | jobRequested = None |
2095 | 2117 | |
2096 | 2118 | try: |
2097 | 2119 | (options, remainder) = getopt.gnu_getopt(sys.argv[1:], "", |
2098 | | - ['date=', 'checkpoint=', 'job=', 'configfile=', 'force', 'noprefetch', 'nospawn']) |
| 2120 | + ['date=', 'checkpoint=', 'job=', 'configfile=', 'force', 'noprefetch', 'nospawn', 'restartfrom']) |
2099 | 2121 | except: |
2100 | 2122 | usage("Unknown option specified") |
2101 | 2123 | |
— | — | @@ -2113,11 +2135,15 @@ |
2114 | 2136 | spawn = False |
2115 | 2137 | elif opt == "--job": |
2116 | 2138 | jobRequested = val |
| 2139 | + elif opt == "--restartfrom": |
| 2140 | + restart = True |
2117 | 2141 | |
2118 | 2142 | if jobRequested and (len(remainder) == 0): |
2119 | 2143 | usage("--job option requires the name of a wikidb to be specified") |
2120 | 2144 | if (jobRequested and forceLock): |
2121 | 2145 | usage("--force cannot be used with --job option") |
| 2146 | + if (restart and not jobRequested): |
| 2147 | + usage("--restartfrom requires --job and the job from which to restart") |
2122 | 2148 | |
2123 | 2149 | # allow alternate config file |
2124 | 2150 | if (configFile): |
— | — | @@ -2131,20 +2157,22 @@ |
2132 | 2158 | if forceLock: |
2133 | 2159 | if wiki.isLocked(): |
2134 | 2160 | wiki.unlock() |
2135 | | - if not jobRequested: |
| 2161 | + if restart or not jobRequested: |
2136 | 2162 | wiki.lock() |
2137 | 2163 | else: |
2138 | 2164 | wiki = findAndLockNextWiki(config) |
2139 | 2165 | |
2140 | 2166 | if wiki: |
2141 | | - runner = Runner(wiki, date, checkpoint, prefetch, spawn, jobRequested) |
2142 | | - if (jobRequested): |
| 2167 | + runner = Runner(wiki, date, checkpoint, prefetch, spawn, jobRequested, restart) |
| 2168 | + if (restart): |
| 2169 | + print "Running %s, restarting from job %s..." % (wiki.dbName, jobRequested) |
| 2170 | + elif (jobRequested): |
2143 | 2171 | print "Running %s, job %s..." % (wiki.dbName, jobRequested) |
2144 | 2172 | else: |
2145 | 2173 | print "Running %s..." % wiki.dbName |
2146 | 2174 | runner.run() |
2147 | 2175 | # if we are doing one piece only of the dump, we don't unlock either |
2148 | | - if not jobRequested: |
| 2176 | + if restart or not jobRequested: |
2149 | 2177 | wiki.unlock() |
2150 | 2178 | else: |
2151 | 2179 | print "No wikis available to run." |