r81192 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r81191‎ | r81192 | r81193 >
Date:18:44, 29 January 2011
Author:ariel
Status:deferred
Tags:
Comment:
restart option allows restart of an XML dump run from a given job (runs that and all subsequent stages)
Modified paths:
  • /branches/ariel/xmldumps-backup/worker.py (modified) (history)

Diff [purge]

Index: branches/ariel/xmldumps-backup/worker.py
@@ -428,6 +428,16 @@
429429 print "%s " % item.name()
430430 return False
431431
 432+ def markFollowingJobsToRun(self):
 433+ # find the first one marked to run, mark the following ones
 434+ i = 0;
 435+ for item in self.dumpItems:
 436+ i = i + 1;
 437+ if item.toBeRun():
 438+ for j in range(i,len(self.dumpItems)):
 439+ self.dumpItems[j].setToBeRun(True)
 440+ break
 441+
432442 # see whether job needs previous jobs that have not completed successfully
433443
434444 def jobDoneSuccessfully(self, job):
@@ -544,13 +554,14 @@
545555
546556 class Runner(object):
547557
548 - def __init__(self, wiki, date=None, checkpoint=None, prefetch=True, spawn=True, job=None):
 558+ def __init__(self, wiki, date=None, checkpoint=None, prefetch=True, spawn=True, job=None, restart=False):
549559 self.wiki = wiki
550560 self.config = wiki.config
551561 self.dbName = wiki.dbName
552562 self.prefetch = prefetch
553563 self.spawn = spawn
554564 self.chunkInfo = Chunk(wiki, self.dbName)
 565+ self.restart = restart
555566
556567 if date:
557568 # Override, continuing a past dump?
@@ -756,11 +767,16 @@
757768 # job has dependent steps that weren't already run
758769 if (not self.dumpItemList.checkJobDependencies(self.jobRequested)):
759770 raise RuntimeError( "Job dependencies not run beforehand, exiting" )
 771+ if (restart):
 772+ # mark all the following jobs to run as well
 773+ self.dumpItemList.markFollowingJobsToRun()
760774
761775 self.makeDir(join(self.wiki.publicDir(), self.date))
762776 self.makeDir(join(self.wiki.privateDir(), self.date))
763777
764 - if (self.jobRequested):
 778+ if (self.restart):
 779+ print "Preparing for restart from job %s of %s" % (self.jobRequested, self.dbName)
 780+ elif (self.jobRequested):
765781 print "Preparing for job %s of %s" % (self.jobRequested, self.dbName)
766782 else:
767783 self.showRunnerState("Cleaning up old dumps for %s" % self.dbName)
@@ -802,7 +818,10 @@
803819 if (self.failCount < 1):
804820 self.completeDump(files)
805821
806 - self.showRunnerState("Completed job %s for %s" % (self.jobRequested, self.dbName))
 822+ if (self.restart):
 823+ self.showRunnerState("Completed run restarting from job %s for %s" % (self.jobRequested, self.dbName))
 824+ else:
 825+ self.showRunnerState("Completed job %s for %s" % (self.jobRequested, self.dbName))
807826 else:
808827 self.checksums.prepareChecksums()
809828
@@ -2063,22 +2082,24 @@
20642083 if message:
20652084 print message
20662085 print "Usage: python worker.py [options] [wikidbname]"
2067 - print "Options: --configfile, --date, --checkpoint, --job, --force, --noprefetch, --nospawn"
 2086+ print "Options: --configfile, --date, --checkpoint, --job, --force, --noprefetch, --nospawn, --restartfrom"
20682087 print "--configfile: Specify an alternative configuration file to read."
2069 - print " Default config file name: wikidump.conf"
2070 - print "--date: Rerun dump of a given date (probably unwise)"
2071 - print "--checkpoint: Run just the specified step (deprecated)"
2072 - print "--job: Run just the specified step or set of steps; for the list,"
2073 - print " give the option --job help"
2074 - print " This option requires specifiying a wikidbname on which to run."
2075 - print " This option cannot be specified with --force."
2076 - print "--force: remove a lock file for the specified wiki (dangerous, if there is"
2077 - print " another process running, useful if you want to start a second later"
2078 - print " run while the first dump from a previous date is still going)"
2079 - print " This option cannot be specified with --job."
2080 - print "--noprefetch: Do not use a previous file's contents for speeding up the dumps"
2081 - print " (helpful if the previous files may have corrupt contents)"
2082 - print "--nospawn: Do not spawn a separate process in order to retrieve revision texts"
 2088+ print " Default config file name: wikidump.conf"
 2089+ print "--date: Rerun dump of a given date (probably unwise)"
 2090+ print "--checkpoint: Run just the specified step (deprecated)"
 2091+ print "--job: Run just the specified step or set of steps; for the list,"
 2092+ print " give the option --job help"
 2093+ print " This option requires specifiying a wikidbname on which to run."
 2094+ print " This option cannot be specified with --force."
 2095+ print "--force: remove a lock file for the specified wiki (dangerous, if there is"
 2096+ print " another process running, useful if you want to start a second later"
 2097+ print " run while the first dump from a previous date is still going)"
 2098+ print " This option cannot be specified with --job."
 2099+ print "--noprefetch: Do not use a previous file's contents for speeding up the dumps"
 2100+ print " (helpful if the previous files may have corrupt contents)"
 2101+ print "--nospawn: Do not spawn a separate process in order to retrieve revision texts"
 2102+ print "--restartfrom: Do all jobs after the one specified via --job, including that one"
 2103+
20832104 sys.exit(1)
20842105
20852106
@@ -2090,11 +2111,12 @@
20912112 forceLock = False
20922113 prefetch = True
20932114 spawn = True
 2115+ restart = False
20942116 jobRequested = None
20952117
20962118 try:
20972119 (options, remainder) = getopt.gnu_getopt(sys.argv[1:], "",
2098 - ['date=', 'checkpoint=', 'job=', 'configfile=', 'force', 'noprefetch', 'nospawn'])
 2120+ ['date=', 'checkpoint=', 'job=', 'configfile=', 'force', 'noprefetch', 'nospawn', 'restartfrom'])
20992121 except:
21002122 usage("Unknown option specified")
21012123
@@ -2113,11 +2135,15 @@
21142136 spawn = False
21152137 elif opt == "--job":
21162138 jobRequested = val
 2139+ elif opt == "--restartfrom":
 2140+ restart = True
21172141
21182142 if jobRequested and (len(remainder) == 0):
21192143 usage("--job option requires the name of a wikidb to be specified")
21202144 if (jobRequested and forceLock):
21212145 usage("--force cannot be used with --job option")
 2146+ if (restart and not jobRequested):
 2147+ usage("--restartfrom requires --job and the job from which to restart")
21222148
21232149 # allow alternate config file
21242150 if (configFile):
@@ -2131,20 +2157,22 @@
21322158 if forceLock:
21332159 if wiki.isLocked():
21342160 wiki.unlock()
2135 - if not jobRequested:
 2161+ if restart or not jobRequested:
21362162 wiki.lock()
21372163 else:
21382164 wiki = findAndLockNextWiki(config)
21392165
21402166 if wiki:
2141 - runner = Runner(wiki, date, checkpoint, prefetch, spawn, jobRequested)
2142 - if (jobRequested):
 2167+ runner = Runner(wiki, date, checkpoint, prefetch, spawn, jobRequested, restart)
 2168+ if (restart):
 2169+ print "Running %s, restarting from job %s..." % (wiki.dbName, jobRequested)
 2170+ elif (jobRequested):
21432171 print "Running %s, job %s..." % (wiki.dbName, jobRequested)
21442172 else:
21452173 print "Running %s..." % wiki.dbName
21462174 runner.run()
21472175 # if we are doing one piece only of the dump, we don't unlock either
2148 - if not jobRequested:
 2176+ if restart or not jobRequested:
21492177 wiki.unlock()
21502178 else:
21512179 print "No wikis available to run."

Status & tagging log