r86462 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r86461‎ | r86462 | r86463 >
Date:10:37, 20 April 2011
Author:ariel
Status:deferred
Tags:
Comment:
add dryrun option, shows the commands it would run, does not remove or update any files
Modified paths:
  • /branches/ariel/xmldumps-backup/worker.py (modified) (history)

Diff [purge]

Index: branches/ariel/xmldumps-backup/worker.py
@@ -773,7 +773,6 @@
774774 # why? we might be rerunning a job from an older dumps. we might have two
775775 # runs going at once (think en pedia, one finishing up the history, another
776776 # starting at the beginning to get the new abstracts and stubs).
777 -
778777 try:
779778 dumpsInOrder = self.wiki.latestDump(all=True)
780779 meIndex = dumpsInOrder.index(self.date)
@@ -845,7 +844,7 @@
846845
847846 class Runner(object):
848847
849 - def __init__(self, wiki, date=None, prefetch=True, spawn=True, job=None, restart=False, notice="", loggingEnabled=False):
 848+ def __init__(self, wiki, date=None, prefetch=True, spawn=True, job=None, restart=False, notice="", dryrun = False, loggingEnabled=False):
850849 self.wiki = wiki
851850 self.config = wiki.config
852851 self.dbName = wiki.dbName
@@ -856,6 +855,7 @@
857856 self.loggingEnabled = loggingEnabled
858857 self.htmlNotice = notice
859858 self.log = None
 859+ self.dryrun = dryrun
860860
861861 if date:
862862 # Override, continuing a past dump?
@@ -873,18 +873,20 @@
874874
875875 # this must come after the dumpdir setup so we know which directory we are in
876876 # for the log file.
877 - if (loggingEnabled):
 877+ if (loggingEnabled and not self.dryrun):
878878 self.logFileName = self.dumpDir.publicPath(config.logFile)
879879 self.makeDir(join(self.wiki.publicDir(), self.date))
880880 self.log = Logger(self.logFileName)
881881 thread.start_new_thread(self.logQueueReader,(self.log,))
882882
883 - self.checksums = Checksummer(self.wiki, self.dumpDir)
 883+ if not dryrun:
 884+ self.checksums = Checksummer(self.wiki, self.dumpDir)
884885
885886 # some or all of these dumpItems will be marked to run
886887 self.dumpItemList = DumpItemList(self.wiki, self.prefetch, self.spawn, self.date, self.chunkInfo);
887888
888 - self.status = Status(self.wiki, self.dumpDir, self.date, self.dumpItemList.dumpItems, self.checksums, self.htmlNotice, self.logAndPrint)
 889+ if not self.dryrun:
 890+ self.status = Status(self.wiki, self.dumpDir, self.date, self.dumpItemList.dumpItems, self.checksums, self.htmlNotice, self.logAndPrint)
889891
890892 def logQueueReader(self,log):
891893 if not log:
@@ -894,7 +896,7 @@
895897 done = log.doJobOnLogQueue()
896898
897899 def logAndPrint(self, message):
898 - if hasattr(self,'log') and self.log:
 900+ if hasattr(self,'log') and self.log and not self.dryrun:
899901 self.log.addToLogQueue("%s\n" % message)
900902 print message
901903
@@ -904,6 +906,10 @@
905907 else:
906908 return ""
907909
 910+ def remove(self, filename):
 911+ if not self.dryrun:
 912+ os.remove(filename)
 913+
908914 # returns 0 on success, 1 on error
909915 def saveTable(self, table, outfile):
910916 """Dump a table from the current DB with mysqldump, save to a gzipped sql file."""
@@ -920,8 +926,21 @@
921927 """For one pipeline of commands, redirect output to a given file."""
922928 commands[-1].extend( [ ">" , outfile ] )
923929 series = [ commands ]
924 - return self.runCommand([ series ], callbackTimed = self.status.updateStatusFiles)
 930+ if (self.dryrun):
 931+ self.prettyPrintCommands([ series ])
 932+ return 0
 933+ else:
 934+ return self.runCommand([ series ], callbackTimed = self.status.updateStatusFiles)
925935
 936+ def prettyPrintCommands(self, commandSeriesList):
 937+ for series in commandSeriesList:
 938+ for pipeline in series:
 939+ commandStrings = []
 940+ for command in pipeline:
 941+ commandStrings.append(" ".join(command))
 942+ pipelineString = " | ".join(commandStrings)
 943+ print "Command to run: ", pipelineString
 944+
926945 # command series list: list of (commands plus args) is one pipeline. list of pipelines = 1 series.
927946 # this function wants a list of series.
928947 # be a list (the command name and the various args)
@@ -939,18 +958,23 @@
940959 This function spawns multiple series of pipelines in parallel.
941960
942961 """
943 - commands = CommandsInParallel(commandSeriesList, callbackStderr=callbackStderr, callbackStderrArg=callbackStderrArg, callbackTimed=callbackTimed, callbackTimedArg=callbackTimedArg, shell=shell, callbackInterval=callbackInterval)
944 - commands.runCommands()
945 - if commands.exitedSuccessfully():
 962+ if self.dryrun:
 963+ self.prettyPrintCommands(commandSeriesList)
946964 return 0
 965+
947966 else:
948 - problemCommands = commands.commandsWithErrors()
949 - errorString = "Error from command(s): "
950 - for cmd in problemCommands:
951 - errorString = errorString + "%s " % cmd
952 - self.logAndPrint(errorString)
953 -# raise BackupError(errorString)
954 - return 1
 967+ commands = CommandsInParallel(commandSeriesList, callbackStderr=callbackStderr, callbackStderrArg=callbackStderrArg, callbackTimed=callbackTimed, callbackTimedArg=callbackTimedArg, shell=shell, callbackInterval=callbackInterval)
 968+ commands.runCommands()
 969+ if commands.exitedSuccessfully():
 970+ return 0
 971+ else:
 972+ problemCommands = commands.commandsWithErrors()
 973+ errorString = "Error from command(s): "
 974+ for cmd in problemCommands:
 975+ errorString = errorString + "%s " % cmd
 976+ self.logAndPrint(errorString)
 977+ # raise BackupError(errorString)
 978+ return 1
955979
956980 def debug(self, stuff):
957981 self.logAndPrint("%s: %s %s" % (TimeUtils.prettyTime(), self.dbName, stuff))
@@ -1003,8 +1027,9 @@
10041028 # mark all the following jobs to run as well
10051029 self.dumpItemList.markFollowingJobsToRun()
10061030
1007 - self.makeDir(join(self.wiki.publicDir(), self.date))
1008 - self.makeDir(join(self.wiki.privateDir(), self.date))
 1031+ if not self.dryrun:
 1032+ self.makeDir(join(self.wiki.publicDir(), self.date))
 1033+ self.makeDir(join(self.wiki.privateDir(), self.date))
10091034
10101035 if (self.restart):
10111036 self.logAndPrint("Preparing for restart from job %s of %s" % (self.jobRequested, self.dbName))
@@ -1018,35 +1043,38 @@
10191044 files = self.listFilesFor(self.dumpItemList.dumpItems)
10201045
10211046 if (self.jobRequested):
1022 - self.checksums.prepareChecksums()
 1047+ if not self.dryrun:
 1048+ self.checksums.prepareChecksums()
10231049
10241050 for item in self.dumpItemList.dumpItems:
10251051 if (item.toBeRun()):
10261052 item.start(self)
1027 - self.status.updateStatusFiles()
1028 - self.dumpItemList.saveDumpRunInfoFile()
 1053+ if not self.dryrun:
 1054+ self.status.updateStatusFiles()
 1055+ self.dumpItemList.saveDumpRunInfoFile()
10291056 try:
10301057 item.dump(self)
10311058 except Exception, ex:
10321059 self.debug("*** exception! " + str(ex))
10331060 item.setStatus("failed")
1034 - if item.status() == "failed":
 1061+ if item.status() == "failed" and not self.dryrun:
10351062 self.runHandleFailure()
10361063 else:
10371064 self.lastFailed = False
10381065 # this ensures that, previous run or new one, the old or new md5sums go to the file
1039 - if item.status() == "done":
 1066+ if item.status() == "done" and not self.dryrun:
10401067 self.runUpdateItemFileInfo(item)
10411068
1042 - if (self.dumpItemList.allPossibleJobsDone()):
1043 - self.status.updateStatusFiles("done")
1044 - else:
1045 - self.status.updateStatusFiles("partialdone")
1046 - self.dumpItemList.saveDumpRunInfoFile()
 1069+ if not self.dryrun:
 1070+ if (self.dumpItemList.allPossibleJobsDone()):
 1071+ self.status.updateStatusFiles("done")
 1072+ else:
 1073+ self.status.updateStatusFiles("partialdone")
 1074+ self.dumpItemList.saveDumpRunInfoFile()
10471075
1048 - # if any job succeeds we might as well make the sym link
1049 - if (self.status.failCount < 1):
1050 - self.completeDump(files)
 1076+ # if any job succeeds we might as well make the sym link
 1077+ if (self.status.failCount < 1):
 1078+ self.completeDump(files)
10511079
10521080 if (self.restart):
10531081 self.showRunnerState("Completed run restarting from job %s for %s" % (self.jobRequested, self.dbName))
@@ -1054,28 +1082,31 @@
10551083 self.showRunnerState("Completed job %s for %s" % (self.jobRequested, self.dbName))
10561084
10571085 else:
1058 - self.checksums.prepareChecksums()
 1086+ if not self.dryrun:
 1087+ self.checksums.prepareChecksums()
10591088
10601089 for item in self.dumpItemList.dumpItems:
10611090 item.start(self)
1062 - self.status.updateStatusFiles()
1063 - self.dumpItemList.saveDumpRunInfoFile()
 1091+ if not self.dryrun:
 1092+ self.status.updateStatusFiles()
 1093+ self.dumpItemList.saveDumpRunInfoFile()
10641094 try:
10651095 item.dump(self)
10661096 except Exception, ex:
10671097 self.debug("*** exception! " + str(ex))
10681098 item.setStatus("failed")
1069 - if item.status() == "failed":
 1099+ if item.status() == "failed" and not self.dryrun:
10701100 self.runHandleFailure()
10711101 else:
1072 - self.runUpdateItemFileInfo(item)
 1102+ if not self.dryrun:
 1103+ self.runUpdateItemFileInfo(item)
10731104 self.lastFailed = False
10741105
1075 - self.status.updateStatusFiles("done")
1076 - self.dumpItemList.saveDumpRunInfoFile()
1077 -
1078 - if self.status.failCount < 1:
1079 - self.completeDump(files)
 1106+ if not self.dryrun:
 1107+ self.status.updateStatusFiles("done")
 1108+ self.dumpItemList.saveDumpRunInfoFile()
 1109+ if self.status.failCount < 1:
 1110+ self.completeDump(files)
10801111
10811112 self.showRunnerStateComplete()
10821113
@@ -1092,8 +1123,9 @@
10931124 if old:
10941125 for dump in old:
10951126 self.showRunnerState("Purging old dump %s for %s" % (dump, self.dbName))
1096 - base = os.path.join(self.wiki.publicDir(), dump)
1097 - shutil.rmtree("%s" % base)
 1127+ if not self.dryrun:
 1128+ base = os.path.join(self.wiki.publicDir(), dump)
 1129+ shutil.rmtree("%s" % base)
10981130 else:
10991131 self.showRunnerState("No old dumps to purge.")
11001132
@@ -1278,7 +1310,8 @@
12791311 pass
12801312
12811313 def buildRecombineCommandString(self, runner, files, outputFileBasename, compressionCommand, uncompressionCommand, endHeaderMarker="</siteinfo>"):
1282 - outputFilename = runner.dumpDir.publicPath(outputFileBasename)
 1314+# outputFilename = self.buildOutputFilename(runner, outputFileBasename)
 1315+ outputFilename = runner.dumpDir.publicPath(outputFileBasename)
12831316 chunkNum = 0
12841317 recombines = []
12851318 head = runner.config.head
@@ -1311,14 +1344,12 @@
13121345 # warning: we figure any header (<siteinfo>...</siteinfo>) is going to be less than 2000 lines!
13131346 pipeline.append([ head, "-2000"])
13141347 pipeline.append([ grep, "-n", endHeaderMarker ])
1315 - # without sheell
 1348+ # without shell
13161349 p = CommandPipeline(pipeline, quiet=True)
13171350 p.runPipelineAndGetOutput()
13181351 if (p.output()):
13191352 (headerEndNum, junk) = p.output().split(":",1)
13201353 # get headerEndNum
1321 - if exists(outputFilename):
1322 - os.remove(outputFilename)
13231354 recombine = " ".join(uncompressThisFile)
13241355 headerEndNum = int(headerEndNum) + 1
13251356 if (chunkNum == 1):
@@ -1335,7 +1366,14 @@
13361367 recombineCommandString = "(" + ";".join(recombines) + ")" + "|" + "%s %s" % (compressionCommand, outputFilename)
13371368 return(recombineCommandString)
13381369
 1370+ def cleanupOldFiles(self, runner, outputFileBasename):
 1371+ outputFilename = self.buildOutputFilename(runner, outputFileBasename)
 1372+ if exists(outputFilename):
 1373+ runner.remove(outputFilename)
13391374
 1375+ def buildOutputFilename(self, runner, outputFileBasename):
 1376+ return outputFilename
 1377+
13401378 class PublicTable(Dump):
13411379 """Dump of a table using MySQL's mysqldump utility."""
13421380
@@ -1404,51 +1442,83 @@
14051443 "stub-articles.xml.gz"]
14061444
14071445 def buildCommand(self, runner, chunk = 0):
 1446+ history = self.buildHistoryOutputFilename(runner, chunk)
 1447+ current = self.buildCurrentOutputFilename(runner, chunk)
 1448+ articles = self.buildArticlesOutputFilename(runner, chunk)
 1449+
 1450+ command = [ "%s" % runner.config.php,
 1451+ "-q", "%s/maintenance/dumpBackup.php" % runner.config.wikiDir,
 1452+ "--wiki=%s" % runner.dbName,
 1453+ "--full", "--stub", "--report=10000",
 1454+ "%s" % runner.forceNormalOption(),
 1455+ "--output=gzip:%s" % history,
 1456+ "--output=gzip:%s" % current,
 1457+ "--filter=latest", "--output=gzip:%s" % articles,
 1458+ "--filter=latest", "--filter=notalk", "--filter=namespace:!NS_USER" ]
14081459 if (chunk):
 1460+ # set up start end end pageids for this piece
 1461+ # note there is no page id 0 I guess. so we start with 1
 1462+ # start = runner.pagesPerChunk()*(chunk-1) + 1
 1463+ start = sum([ self._chunks[i] for i in range(0,chunk-1)]) + 1
 1464+ startopt = "--start=%s" % start
 1465+ # if we are on the last chunk, we should get up to the last pageid,
 1466+ # whatever that is.
 1467+ command.append(startopt)
 1468+ if chunk < len(self._chunks):
 1469+ # end = start + runner.pagesPerChunk()
 1470+ end = sum([ self._chunks[i] for i in range(0,chunk)]) +1
 1471+ endopt = "--end=%s" % end
 1472+ command.append(endopt)
 1473+
 1474+ pipeline = [ command ]
 1475+ series = [ pipeline ]
 1476+ return(series)
 1477+
 1478+ def cleanupOldFiles(self, runner, chunk = 0):
 1479+ fileList = self.buildOutputFilenames(runner, chunk)
 1480+ for filename in fileList:
 1481+ if exists(filename):
 1482+ runner.remove(filename)
 1483+
 1484+ def buildHistoryOutputFilename(self, runner, chunk = 0):
 1485+ if (chunk):
14091486 chunkinfo = "%s" % chunk
14101487 else:
14111488 chunkinfo = ""
14121489 history = runner.dumpDir.publicPath("stub-meta-history" + chunkinfo + ".xml.gz")
 1490+ return history
 1491+
 1492+ def buildCurrentOutputFilename(self, runner, chunk = 0):
 1493+ if (chunk):
 1494+ chunkinfo = "%s" % chunk
 1495+ else:
 1496+ chunkinfo = ""
14131497 current = runner.dumpDir.publicPath("stub-meta-current" + chunkinfo + ".xml.gz")
 1498+ return current
 1499+
 1500+ def buildArticlesOutputFilename(self, runner, chunk = 0):
 1501+ if (chunk):
 1502+ chunkinfo = "%s" % chunk
 1503+ else:
 1504+ chunkinfo = ""
14141505 articles = runner.dumpDir.publicPath("stub-articles" + chunkinfo + ".xml.gz")
1415 - for filename in (history, current, articles):
1416 - if exists(filename):
1417 - os.remove(filename)
1418 - command = [ "%s" % runner.config.php,
1419 - "-q", "%s/maintenance/dumpBackup.php" % runner.config.wikiDir,
1420 - "--wiki=%s" % runner.dbName,
1421 - "--full", "--stub", "--report=10000",
1422 - "%s" % runner.forceNormalOption(),
1423 - "--output=gzip:%s" % history,
1424 - "--output=gzip:%s" % current,
1425 - "--filter=latest", "--output=gzip:%s" % articles,
1426 - "--filter=latest", "--filter=notalk", "--filter=namespace:!NS_USER" ]
1427 - if (chunk):
1428 - # set up start end end pageids for this piece
1429 - # note there is no page id 0 I guess. so we start with 1
1430 - # start = runner.pagesPerChunk()*(chunk-1) + 1
1431 - start = sum([ self._chunks[i] for i in range(0,chunk-1)]) + 1
1432 - startopt = "--start=%s" % start
1433 - # if we are on the last chunk, we should get up to the last pageid,
1434 - # whatever that is.
1435 - command.append(startopt)
1436 - if chunk < len(self._chunks):
1437 - # end = start + runner.pagesPerChunk()
1438 - end = sum([ self._chunks[i] for i in range(0,chunk)]) +1
1439 - endopt = "--end=%s" % end
1440 - command.append(endopt)
 1506+ return articles
14411507
1442 - pipeline = [ command ]
1443 - series = [ pipeline ]
1444 - return(series)
1445 -
 1508+ def buildOutputFilenames(self, runner, chunk = 0):
 1509+ history = self.buildHistoryOutputFilename(runner, chunk)
 1510+ current = self.buildCurrentOutputFilename(runner, chunk)
 1511+ articles = self.buildArticlesOutputFilename(runner, chunk)
 1512+ return([ history, current, articles ])
 1513+
14461514 def run(self, runner):
14471515 commands = []
14481516 if self._chunks:
14491517 for i in range(1, len(self._chunks)+1):
 1518+ self.cleanupOldFiles(runner,i)
14501519 series = self.buildCommand(runner, i)
14511520 commands.append(series)
14521521 else:
 1522+ self.cleanupOldFiles(runner)
14531523 series = self.buildCommand(runner)
14541524 commands.append(series)
14551525 result = runner.runCommand(commands, callbackStderr=self.progressCallback, callbackStderrArg=runner)
@@ -1505,10 +1575,18 @@
15061576 def listFiles(self, runner):
15071577 return ["pages-logging.xml.gz"]
15081578
 1579+ def cleanupOldFiles(self, runner):
 1580+ logging = self.buildOutputFilename(runner)
 1581+ if exists(logging):
 1582+ runner.remove(logging)
 1583+
 1584+ def buildOutputFilename(self, runner):
 1585+ logging = runner.dumpDir.publicPath("pages-logging.xml.gz")
 1586+ return logging
 1587+
15091588 def run(self, runner):
1510 - logging = runner.dumpDir.publicPath("pages-logging.xml.gz")
1511 - if exists(logging):
1512 - os.remove(logging)
 1589+ self.cleanupOldFiles(runner)
 1590+ logging = self.buildOutputFilename(runner)
15131591 command = [ "%s" % runner.config.php,
15141592 "-q", "%s/maintenance/dumpBackup.php" % runner.config.wikiDir,
15151593 "--wiki=%s" % runner.dbName,
@@ -1723,7 +1801,10 @@
17241802 if not self.statusOfOldDumpIsDone(runner, date):
17251803 runner.debug("skipping incomplete or failed dump for prefetch %s" % possible)
17261804 continue
1727 - runner.debug("Prefetchable %s" % possible)
 1805+ if (chunk) and (self.filenameHasChunk(possible, "bz2")):
 1806+ runner.debug("Prefetchable %s etc." % possible)
 1807+ else:
 1808+ runner.debug("Prefetchable %s" % possible)
17281809 # found something workable, now check the chunk situation
17291810 if (chunk):
17301811 if (self.filenameHasChunk(possible, "bz2")):
@@ -1904,7 +1985,7 @@
19051986 def _path(self, runner, ext, chunk=0):
19061987 return runner.dumpDir.publicPath(self._file(ext,chunk))
19071988
1908 - def getOutputFilename(self, runner, chunk=0):
 1989+ def buildOutputFilename(self, runner, chunk=0):
19091990 if (chunk):
19101991 xml7z = self._path(runner, "7z", chunk)
19111992 else:
@@ -1920,25 +2001,31 @@
19212002
19222003 def buildCommand(self, runner, chunk = 0):
19232004 xmlbz2 = self.getInputFilename(runner, chunk)
1924 - xml7z = self.getOutputFilename(runner, chunk)
 2005+ xml7z = self.buildOutputFilename(runner, chunk)
19252006
1926 - # Clear prior 7zip attempts; 7zip will try to append an existing archive
1927 - if exists(xml7z):
1928 - os.remove(xml7z)
19292007 # FIXME need shell escape
19302008 commandPipe = [ [ "%s -dc %s | %s a -si %s" % (runner.config.bzip2, xmlbz2, runner.config.sevenzip, xml7z) ] ]
19312009 commandSeries = [ commandPipe ]
19322010 return(commandSeries)
19332011
 2012+ def cleanupOldFiles(self, runner, chunk = 0):
 2013+ xml7z = self.buildOutputFilename(runner, chunk)
 2014+ if exists(xml7z):
 2015+ runner.remove(xml7z)
 2016+
19342017 def run(self, runner):
19352018 if runner.lastFailed:
19362019 raise BackupError("bz2 dump incomplete, not recompressing")
19372020 commands = []
19382021 if (self._chunks):
19392022 for i in range(1, len(self._chunks)+1):
 2023+ # Clear prior 7zip attempts; 7zip will try to append an existing archive
 2024+ self.cleanupOldFiles(runner, i)
19402025 series = self.buildCommand(runner, i)
19412026 commands.append(series)
19422027 else:
 2028+ # Clear prior 7zip attempts; 7zip will try to append an existing archive
 2029+ self.cleanupOldFiles(runner)
19432030 series = self.buildCommand(runner)
19442031 commands.append(series)
19452032 result = runner.runCommand(commands, callbackTimed=self.progressCallback, callbackTimedArg=runner, shell = True)
@@ -1946,11 +2033,11 @@
19472034 # some hacks aren't so temporary - atg 3 sept 2010
19482035 if (self._chunks):
19492036 for i in range(1, len(self._chunks)+1):
1950 - xml7z = self.getOutputFilename(runner,i)
 2037+ xml7z = self.buildOutputFilename(runner,i)
19512038 if exists(xml7z):
19522039 os.chmod(xml7z, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH )
19532040 else:
1954 - xml7z = self.getOutputFilename(runner)
 2041+ xml7z = self.buildOutputFilename(runner)
19552042 if exists(xml7z):
19562043 os.chmod(xml7z, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH )
19572044 return(result)
@@ -1977,9 +2064,19 @@
19782065 def listFiles(self, runner):
19792066 return(XmlRecompressDump.listFiles(self, runner, unnumbered=True))
19802067
 2068+ def cleanupOldFiles(self, runner):
 2069+ files = self.listFiles(runner)
 2070+ print "here is cleanup"
 2071+ for filename in files:
 2072+ filename = runner.dumpDir.publicPath(filename)
 2073+ if exists(filename):
 2074+ runner.remove(filename)
 2075+
19812076 def run(self, runner):
 2077+ print "here we are"
19822078 errorresult = 0
19832079 if (self._chunks):
 2080+ self.cleanupOldFiles(runner)
19842081 files = XmlRecompressDump.listFiles(self,runner)
19852082 outputFileList = self.listFiles(runner)
19862083 for outputFile in outputFileList:
@@ -2175,6 +2272,8 @@
21762273 print " give the option --job help"
21772274 print " This option requires specifiying a wikidbname on which to run."
21782275 print " This option cannot be specified with --force."
 2276+ print "--dryrun: Don't really run the job, just print what would be done (must be used"
 2277+ print " with a specified wikidbname on which to run"
21792278 print "--force: remove a lock file for the specified wiki (dangerous, if there is"
21802279 print " another process running, useful if you want to start a second later"
21812280 print " run while the first dump from a previous date is still going)"
@@ -2200,10 +2299,11 @@
22012300 enableLogging = False
22022301 log = None
22032302 htmlNotice = ""
 2303+ dryrun = False
22042304
22052305 try:
22062306 (options, remainder) = getopt.gnu_getopt(sys.argv[1:], "",
2207 - ['date=', 'job=', 'configfile=', 'notice=', 'force', 'noprefetch', 'nospawn', 'restartfrom', 'log'])
 2307+ ['date=', 'job=', 'configfile=', 'notice=', 'force', 'dryrun', 'noprefetch', 'nospawn', 'restartfrom', 'log'])
22082308 except:
22092309 usage("Unknown option specified")
22102310
@@ -2218,6 +2318,8 @@
22192319 prefetch = False
22202320 elif opt == "--nospawn":
22212321 spawn = False
 2322+ elif opt == "--dryrun":
 2323+ dryrun = True
22222324 elif opt == "--job":
22232325 jobRequested = val
22242326 elif opt == "--restartfrom":
@@ -2227,6 +2329,8 @@
22282330 elif opt == "--notice":
22292331 htmlNotice = val
22302332
 2333+ if dryrun and (len(remainder) == 0):
 2334+ usage("--dryrun requires the name of a wikidb to be specified")
22312335 if jobRequested and (len(remainder) == 0):
22322336 usage("--job option requires the name of a wikidb to be specified")
22332337 if (jobRequested and forceLock):
@@ -2240,19 +2344,25 @@
22412345 else:
22422346 config = WikiDump.Config()
22432347
 2348+ if dryrun:
 2349+ print "***"
 2350+ print "Dry run only, no files will be updated."
 2351+ print "***"
 2352+
22442353 if len(remainder) > 0:
22452354 wiki = WikiDump.Wiki(config, remainder[0])
2246 - # if we are doing one piece only of the dump, we don't try to grab a lock.
2247 - if forceLock:
2248 - if wiki.isLocked():
 2355+ # if we are doing one piece only of the dump, we don't try to grab a lock
 2356+ # unless told to.
 2357+ if not dryrun:
 2358+ if forceLock and wiki.isLocked():
22492359 wiki.unlock()
2250 - if restart or not jobRequested:
2251 - wiki.lock()
 2360+ if restart or not jobRequested:
 2361+ wiki.lock()
22522362 else:
22532363 wiki = findAndLockNextWiki(config)
22542364
22552365 if wiki:
2256 - runner = Runner(wiki, date, prefetch, spawn, jobRequested, restart, htmlNotice, enableLogging)
 2366+ runner = Runner(wiki, date, prefetch, spawn, jobRequested, restart, htmlNotice, dryrun, enableLogging)
22572367 if (restart):
22582368 print "Running %s, restarting from job %s..." % (wiki.dbName, jobRequested)
22592369 elif (jobRequested):
@@ -2261,8 +2371,9 @@
22622372 print "Running %s..." % wiki.dbName
22632373 runner.run()
22642374 # if we are doing one piece only of the dump, we don't unlock either
2265 - if restart or not jobRequested:
2266 - wiki.unlock()
 2375+ if not dryrun:
 2376+ if restart or not jobRequested:
 2377+ wiki.unlock()
22672378 else:
22682379 print "No wikis available to run."
22692380 finally:

Status & tagging log