r86462 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r86461‎ \| r86462 \| r86463 >
Date:	10:37, 20 April 2011
Author:	ariel
Status:	deferred
Tags:
Comment:	add dryrun option, shows the commands it would run, does not remove or update any files
Modified paths:	/branches/ariel/xmldumps-backup/worker.py (modified) (history)

Diff [purge]

Index: branches/ariel/xmldumps-backup/worker.py
—	—	@@ -773,7 +773,6 @@
774	774	# why? we might be rerunning a job from an older dumps. we might have two
775	775	# runs going at once (think en pedia, one finishing up the history, another
776	776	# starting at the beginning to get the new abstracts and stubs).
777		-
778	777	try:
779	778	dumpsInOrder = self.wiki.latestDump(all=True)
780	779	meIndex = dumpsInOrder.index(self.date)
—	—	@@ -845,7 +844,7 @@
846	845
847	846	class Runner(object):
848	847
849		~~- def __init__(self, wiki, date=None, prefetch=True, spawn=True, job=None, restart=False, notice="", loggingEnabled=False):~~
	848	+ def __init__(self, wiki, date=None, prefetch=True, spawn=True, job=None, restart=False, notice="", dryrun = False, loggingEnabled=False):
850	849	self.wiki = wiki
851	850	self.config = wiki.config
852	851	self.dbName = wiki.dbName
—	—	@@ -856,6 +855,7 @@
857	856	self.loggingEnabled = loggingEnabled
858	857	self.htmlNotice = notice
859	858	self.log = None
	859	+ self.dryrun = dryrun
860	860
861	861	if date:
862	862	# Override, continuing a past dump?
—	—	@@ -873,18 +873,20 @@
874	874
875	875	# this must come after the dumpdir setup so we know which directory we are in
876	876	# for the log file.
877		~~- if (loggingEnabled):~~
	877	+ if (loggingEnabled and not self.dryrun):
878	878	self.logFileName = self.dumpDir.publicPath(config.logFile)
879	879	self.makeDir(join(self.wiki.publicDir(), self.date))
880	880	self.log = Logger(self.logFileName)
881	881	thread.start_new_thread(self.logQueueReader,(self.log,))
882	882
883		~~- self.checksums = Checksummer(self.wiki, self.dumpDir)~~
	883	+ if not dryrun:
	884	+ self.checksums = Checksummer(self.wiki, self.dumpDir)
884	885
885	886	# some or all of these dumpItems will be marked to run
886	887	self.dumpItemList = DumpItemList(self.wiki, self.prefetch, self.spawn, self.date, self.chunkInfo);
887	888
888		~~- self.status = Status(self.wiki, self.dumpDir, self.date, self.dumpItemList.dumpItems, self.checksums, self.htmlNotice, self.logAndPrint)~~
	889	+ if not self.dryrun:
	890	+ self.status = Status(self.wiki, self.dumpDir, self.date, self.dumpItemList.dumpItems, self.checksums, self.htmlNotice, self.logAndPrint)
889	891
890	892	def logQueueReader(self,log):
891	893	if not log:
—	—	@@ -894,7 +896,7 @@
895	897	done = log.doJobOnLogQueue()
896	898
897	899	def logAndPrint(self, message):
898		~~- if hasattr(self,'log') and self.log:~~
	900	+ if hasattr(self,'log') and self.log and not self.dryrun:
899	901	self.log.addToLogQueue("%s\n" % message)
900	902	print message
901	903
—	—	@@ -904,6 +906,10 @@
905	907	else:
906	908	return ""
907	909
	910	+ def remove(self, filename):
	911	+ if not self.dryrun:
	912	+ os.remove(filename)
	913	+
908	914	# returns 0 on success, 1 on error
909	915	def saveTable(self, table, outfile):
910	916	"""Dump a table from the current DB with mysqldump, save to a gzipped sql file."""
—	—	@@ -920,8 +926,21 @@
921	927	"""For one pipeline of commands, redirect output to a given file."""
922	928	commands[-1].extend( [ ">" , outfile ] )
923	929	series = [ commands ]
924		~~- return self.runCommand([ series ], callbackTimed = self.status.updateStatusFiles)~~
	930	+ if (self.dryrun):
	931	+ self.prettyPrintCommands([ series ])
	932	+ return 0
	933	+ else:
	934	+ return self.runCommand([ series ], callbackTimed = self.status.updateStatusFiles)
925	935
	936	+ def prettyPrintCommands(self, commandSeriesList):
	937	+ for series in commandSeriesList:
	938	+ for pipeline in series:
	939	+ commandStrings = []
	940	+ for command in pipeline:
	941	+ commandStrings.append(" ".join(command))
	942	+ pipelineString = " \| ".join(commandStrings)
	943	+ print "Command to run: ", pipelineString
	944	+
926	945	# command series list: list of (commands plus args) is one pipeline. list of pipelines = 1 series.
927	946	# this function wants a list of series.
928	947	# be a list (the command name and the various args)
—	—	@@ -939,18 +958,23 @@
940	959	This function spawns multiple series of pipelines in parallel.
941	960
942	961	"""
943		- commands = CommandsInParallel(commandSeriesList, callbackStderr=callbackStderr, callbackStderrArg=callbackStderrArg, callbackTimed=callbackTimed, callbackTimedArg=callbackTimedArg, shell=shell, callbackInterval=callbackInterval)
944		~~- commands.runCommands()~~
945		~~- if commands.exitedSuccessfully():~~
	962	+ if self.dryrun:
	963	+ self.prettyPrintCommands(commandSeriesList)
946	964	return 0
	965	+
947	966	else:
948		~~- problemCommands = commands.commandsWithErrors()~~
949		~~- errorString = "Error from command(s): "~~
950		~~- for cmd in problemCommands:~~
951		~~- errorString = errorString + "%s " % cmd~~
952		~~- self.logAndPrint(errorString)~~
953		~~-# raise BackupError(errorString)~~
954		~~- return 1~~
	967	+ commands = CommandsInParallel(commandSeriesList, callbackStderr=callbackStderr, callbackStderrArg=callbackStderrArg, callbackTimed=callbackTimed, callbackTimedArg=callbackTimedArg, shell=shell, callbackInterval=callbackInterval)
	968	+ commands.runCommands()
	969	+ if commands.exitedSuccessfully():
	970	+ return 0
	971	+ else:
	972	+ problemCommands = commands.commandsWithErrors()
	973	+ errorString = "Error from command(s): "
	974	+ for cmd in problemCommands:
	975	+ errorString = errorString + "%s " % cmd
	976	+ self.logAndPrint(errorString)
	977	+ # raise BackupError(errorString)
	978	+ return 1
955	979
956	980	def debug(self, stuff):
957	981	self.logAndPrint("%s: %s %s" % (TimeUtils.prettyTime(), self.dbName, stuff))
—	—	@@ -1003,8 +1027,9 @@
1004	1028	# mark all the following jobs to run as well
1005	1029	self.dumpItemList.markFollowingJobsToRun()
1006	1030
1007		~~- self.makeDir(join(self.wiki.publicDir(), self.date))~~
1008		~~- self.makeDir(join(self.wiki.privateDir(), self.date))~~
	1031	+ if not self.dryrun:
	1032	+ self.makeDir(join(self.wiki.publicDir(), self.date))
	1033	+ self.makeDir(join(self.wiki.privateDir(), self.date))
1009	1034
1010	1035	if (self.restart):
1011	1036	self.logAndPrint("Preparing for restart from job %s of %s" % (self.jobRequested, self.dbName))
—	—	@@ -1018,35 +1043,38 @@
1019	1044	files = self.listFilesFor(self.dumpItemList.dumpItems)
1020	1045
1021	1046	if (self.jobRequested):
1022		~~- self.checksums.prepareChecksums()~~
	1047	+ if not self.dryrun:
	1048	+ self.checksums.prepareChecksums()
1023	1049
1024	1050	for item in self.dumpItemList.dumpItems:
1025	1051	if (item.toBeRun()):
1026	1052	item.start(self)
1027		~~- self.status.updateStatusFiles()~~
1028		~~- self.dumpItemList.saveDumpRunInfoFile()~~
	1053	+ if not self.dryrun:
	1054	+ self.status.updateStatusFiles()
	1055	+ self.dumpItemList.saveDumpRunInfoFile()
1029	1056	try:
1030	1057	item.dump(self)
1031	1058	except Exception, ex:
1032	1059	self.debug("*** exception! " + str(ex))
1033	1060	item.setStatus("failed")
1034		~~- if item.status() == "failed":~~
	1061	+ if item.status() == "failed" and not self.dryrun:
1035	1062	self.runHandleFailure()
1036	1063	else:
1037	1064	self.lastFailed = False
1038	1065	# this ensures that, previous run or new one, the old or new md5sums go to the file
1039		~~- if item.status() == "done":~~
	1066	+ if item.status() == "done" and not self.dryrun:
1040	1067	self.runUpdateItemFileInfo(item)
1041	1068
1042		~~- if (self.dumpItemList.allPossibleJobsDone()):~~
1043		~~- self.status.updateStatusFiles("done")~~
1044		~~- else:~~
1045		~~- self.status.updateStatusFiles("partialdone")~~
1046		~~- self.dumpItemList.saveDumpRunInfoFile()~~
	1069	+ if not self.dryrun:
	1070	+ if (self.dumpItemList.allPossibleJobsDone()):
	1071	+ self.status.updateStatusFiles("done")
	1072	+ else:
	1073	+ self.status.updateStatusFiles("partialdone")
	1074	+ self.dumpItemList.saveDumpRunInfoFile()
1047	1075
1048		~~- # if any job succeeds we might as well make the sym link~~
1049		~~- if (self.status.failCount < 1):~~
1050		~~- self.completeDump(files)~~
	1076	+ # if any job succeeds we might as well make the sym link
	1077	+ if (self.status.failCount < 1):
	1078	+ self.completeDump(files)
1051	1079
1052	1080	if (self.restart):
1053	1081	self.showRunnerState("Completed run restarting from job %s for %s" % (self.jobRequested, self.dbName))
—	—	@@ -1054,28 +1082,31 @@
1055	1083	self.showRunnerState("Completed job %s for %s" % (self.jobRequested, self.dbName))
1056	1084
1057	1085	else:
1058		~~- self.checksums.prepareChecksums()~~
	1086	+ if not self.dryrun:
	1087	+ self.checksums.prepareChecksums()
1059	1088
1060	1089	for item in self.dumpItemList.dumpItems:
1061	1090	item.start(self)
1062		~~- self.status.updateStatusFiles()~~
1063		~~- self.dumpItemList.saveDumpRunInfoFile()~~
	1091	+ if not self.dryrun:
	1092	+ self.status.updateStatusFiles()
	1093	+ self.dumpItemList.saveDumpRunInfoFile()
1064	1094	try:
1065	1095	item.dump(self)
1066	1096	except Exception, ex:
1067	1097	self.debug("*** exception! " + str(ex))
1068	1098	item.setStatus("failed")
1069		~~- if item.status() == "failed":~~
	1099	+ if item.status() == "failed" and not self.dryrun:
1070	1100	self.runHandleFailure()
1071	1101	else:
1072		~~- self.runUpdateItemFileInfo(item)~~
	1102	+ if not self.dryrun:
	1103	+ self.runUpdateItemFileInfo(item)
1073	1104	self.lastFailed = False
1074	1105
1075		~~- self.status.updateStatusFiles("done")~~
1076		~~- self.dumpItemList.saveDumpRunInfoFile()~~
1077		-
1078		~~- if self.status.failCount < 1:~~
1079		~~- self.completeDump(files)~~
	1106	+ if not self.dryrun:
	1107	+ self.status.updateStatusFiles("done")
	1108	+ self.dumpItemList.saveDumpRunInfoFile()
	1109	+ if self.status.failCount < 1:
	1110	+ self.completeDump(files)
1080	1111
1081	1112	self.showRunnerStateComplete()
1082	1113
—	—	@@ -1092,8 +1123,9 @@
1093	1124	if old:
1094	1125	for dump in old:
1095	1126	self.showRunnerState("Purging old dump %s for %s" % (dump, self.dbName))
1096		~~- base = os.path.join(self.wiki.publicDir(), dump)~~
1097		~~- shutil.rmtree("%s" % base)~~
	1127	+ if not self.dryrun:
	1128	+ base = os.path.join(self.wiki.publicDir(), dump)
	1129	+ shutil.rmtree("%s" % base)
1098	1130	else:
1099	1131	self.showRunnerState("No old dumps to purge.")
1100	1132
—	—	@@ -1278,7 +1310,8 @@
1279	1311	pass
1280	1312
1281	1313	def buildRecombineCommandString(self, runner, files, outputFileBasename, compressionCommand, uncompressionCommand, endHeaderMarker="</siteinfo>"):
1282		~~- outputFilename = runner.dumpDir.publicPath(outputFileBasename)~~
	1314	+# outputFilename = self.buildOutputFilename(runner, outputFileBasename)
	1315	+ outputFilename = runner.dumpDir.publicPath(outputFileBasename)
1283	1316	chunkNum = 0
1284	1317	recombines = []
1285	1318	head = runner.config.head
—	—	@@ -1311,14 +1344,12 @@
1312	1345	# warning: we figure any header (<siteinfo>...</siteinfo>) is going to be less than 2000 lines!
1313	1346	pipeline.append([ head, "-2000"])
1314	1347	pipeline.append([ grep, "-n", endHeaderMarker ])
1315		~~- # without sheell~~
	1348	+ # without shell
1316	1349	p = CommandPipeline(pipeline, quiet=True)
1317	1350	p.runPipelineAndGetOutput()
1318	1351	if (p.output()):
1319	1352	(headerEndNum, junk) = p.output().split(":",1)
1320	1353	# get headerEndNum
1321		~~- if exists(outputFilename):~~
1322		~~- os.remove(outputFilename)~~
1323	1354	recombine = " ".join(uncompressThisFile)
1324	1355	headerEndNum = int(headerEndNum) + 1
1325	1356	if (chunkNum == 1):
—	—	@@ -1335,7 +1366,14 @@
1336	1367	recombineCommandString = "(" + ";".join(recombines) + ")" + "\|" + "%s %s" % (compressionCommand, outputFilename)
1337	1368	return(recombineCommandString)
1338	1369
	1370	+ def cleanupOldFiles(self, runner, outputFileBasename):
	1371	+ outputFilename = self.buildOutputFilename(runner, outputFileBasename)
	1372	+ if exists(outputFilename):
	1373	+ runner.remove(outputFilename)
1339	1374
	1375	+ def buildOutputFilename(self, runner, outputFileBasename):
	1376	+ return outputFilename
	1377	+
1340	1378	class PublicTable(Dump):
1341	1379	"""Dump of a table using MySQL's mysqldump utility."""
1342	1380
—	—	@@ -1404,51 +1442,83 @@
1405	1443	"stub-articles.xml.gz"]
1406	1444
1407	1445	def buildCommand(self, runner, chunk = 0):
	1446	+ history = self.buildHistoryOutputFilename(runner, chunk)
	1447	+ current = self.buildCurrentOutputFilename(runner, chunk)
	1448	+ articles = self.buildArticlesOutputFilename(runner, chunk)
	1449	+
	1450	+ command = [ "%s" % runner.config.php,
	1451	+ "-q", "%s/maintenance/dumpBackup.php" % runner.config.wikiDir,
	1452	+ "--wiki=%s" % runner.dbName,
	1453	+ "--full", "--stub", "--report=10000",
	1454	+ "%s" % runner.forceNormalOption(),
	1455	+ "--output=gzip:%s" % history,
	1456	+ "--output=gzip:%s" % current,
	1457	+ "--filter=latest", "--output=gzip:%s" % articles,
	1458	+ "--filter=latest", "--filter=notalk", "--filter=namespace:!NS_USER" ]
1408	1459	if (chunk):
	1460	+ # set up start end end pageids for this piece
	1461	+ # note there is no page id 0 I guess. so we start with 1
	1462	+ # start = runner.pagesPerChunk()*(chunk-1) + 1
	1463	+ start = sum([ self._chunks[i] for i in range(0,chunk-1)]) + 1
	1464	+ startopt = "--start=%s" % start
	1465	+ # if we are on the last chunk, we should get up to the last pageid,
	1466	+ # whatever that is.
	1467	+ command.append(startopt)
	1468	+ if chunk < len(self._chunks):
	1469	+ # end = start + runner.pagesPerChunk()
	1470	+ end = sum([ self._chunks[i] for i in range(0,chunk)]) +1
	1471	+ endopt = "--end=%s" % end
	1472	+ command.append(endopt)
	1473	+
	1474	+ pipeline = [ command ]
	1475	+ series = [ pipeline ]
	1476	+ return(series)
	1477	+
	1478	+ def cleanupOldFiles(self, runner, chunk = 0):
	1479	+ fileList = self.buildOutputFilenames(runner, chunk)
	1480	+ for filename in fileList:
	1481	+ if exists(filename):
	1482	+ runner.remove(filename)
	1483	+
	1484	+ def buildHistoryOutputFilename(self, runner, chunk = 0):
	1485	+ if (chunk):
1409	1486	chunkinfo = "%s" % chunk
1410	1487	else:
1411	1488	chunkinfo = ""
1412	1489	history = runner.dumpDir.publicPath("stub-meta-history" + chunkinfo + ".xml.gz")
	1490	+ return history
	1491	+
	1492	+ def buildCurrentOutputFilename(self, runner, chunk = 0):
	1493	+ if (chunk):
	1494	+ chunkinfo = "%s" % chunk
	1495	+ else:
	1496	+ chunkinfo = ""
1413	1497	current = runner.dumpDir.publicPath("stub-meta-current" + chunkinfo + ".xml.gz")
	1498	+ return current
	1499	+
	1500	+ def buildArticlesOutputFilename(self, runner, chunk = 0):
	1501	+ if (chunk):
	1502	+ chunkinfo = "%s" % chunk
	1503	+ else:
	1504	+ chunkinfo = ""
1414	1505	articles = runner.dumpDir.publicPath("stub-articles" + chunkinfo + ".xml.gz")
1415		~~- for filename in (history, current, articles):~~
1416		~~- if exists(filename):~~
1417		~~- os.remove(filename)~~
1418		~~- command = [ "%s" % runner.config.php,~~
1419		~~- "-q", "%s/maintenance/dumpBackup.php" % runner.config.wikiDir,~~
1420		~~- "--wiki=%s" % runner.dbName,~~
1421		~~- "--full", "--stub", "--report=10000",~~
1422		~~- "%s" % runner.forceNormalOption(),~~
1423		~~- "--output=gzip:%s" % history,~~
1424		~~- "--output=gzip:%s" % current,~~
1425		~~- "--filter=latest", "--output=gzip:%s" % articles,~~
1426		~~- "--filter=latest", "--filter=notalk", "--filter=namespace:!NS_USER" ]~~
1427		~~- if (chunk):~~
1428		~~- # set up start end end pageids for this piece~~
1429		~~- # note there is no page id 0 I guess. so we start with 1~~
1430		~~- # start = runner.pagesPerChunk()*(chunk-1) + 1~~
1431		~~- start = sum([ self._chunks[i] for i in range(0,chunk-1)]) + 1~~
1432		~~- startopt = "--start=%s" % start~~
1433		~~- # if we are on the last chunk, we should get up to the last pageid,~~
1434		~~- # whatever that is.~~
1435		~~- command.append(startopt)~~
1436		~~- if chunk < len(self._chunks):~~
1437		~~- # end = start + runner.pagesPerChunk()~~
1438		~~- end = sum([ self._chunks[i] for i in range(0,chunk)]) +1~~
1439		~~- endopt = "--end=%s" % end~~
1440		~~- command.append(endopt)~~
	1506	+ return articles
1441	1507
1442		~~- pipeline = [ command ]~~
1443		~~- series = [ pipeline ]~~
1444		~~- return(series)~~
1445		-
	1508	+ def buildOutputFilenames(self, runner, chunk = 0):
	1509	+ history = self.buildHistoryOutputFilename(runner, chunk)
	1510	+ current = self.buildCurrentOutputFilename(runner, chunk)
	1511	+ articles = self.buildArticlesOutputFilename(runner, chunk)
	1512	+ return([ history, current, articles ])
	1513	+
1446	1514	def run(self, runner):
1447	1515	commands = []
1448	1516	if self._chunks:
1449	1517	for i in range(1, len(self._chunks)+1):
	1518	+ self.cleanupOldFiles(runner,i)
1450	1519	series = self.buildCommand(runner, i)
1451	1520	commands.append(series)
1452	1521	else:
	1522	+ self.cleanupOldFiles(runner)
1453	1523	series = self.buildCommand(runner)
1454	1524	commands.append(series)
1455	1525	result = runner.runCommand(commands, callbackStderr=self.progressCallback, callbackStderrArg=runner)
—	—	@@ -1505,10 +1575,18 @@
1506	1576	def listFiles(self, runner):
1507	1577	return ["pages-logging.xml.gz"]
1508	1578
	1579	+ def cleanupOldFiles(self, runner):
	1580	+ logging = self.buildOutputFilename(runner)
	1581	+ if exists(logging):
	1582	+ runner.remove(logging)
	1583	+
	1584	+ def buildOutputFilename(self, runner):
	1585	+ logging = runner.dumpDir.publicPath("pages-logging.xml.gz")
	1586	+ return logging
	1587	+
1509	1588	def run(self, runner):
1510		~~- logging = runner.dumpDir.publicPath("pages-logging.xml.gz")~~
1511		~~- if exists(logging):~~
1512		~~- os.remove(logging)~~
	1589	+ self.cleanupOldFiles(runner)
	1590	+ logging = self.buildOutputFilename(runner)
1513	1591	command = [ "%s" % runner.config.php,
1514	1592	"-q", "%s/maintenance/dumpBackup.php" % runner.config.wikiDir,
1515	1593	"--wiki=%s" % runner.dbName,
—	—	@@ -1723,7 +1801,10 @@
1724	1802	if not self.statusOfOldDumpIsDone(runner, date):
1725	1803	runner.debug("skipping incomplete or failed dump for prefetch %s" % possible)
1726	1804	continue
1727		~~- runner.debug("Prefetchable %s" % possible)~~
	1805	+ if (chunk) and (self.filenameHasChunk(possible, "bz2")):
	1806	+ runner.debug("Prefetchable %s etc." % possible)
	1807	+ else:
	1808	+ runner.debug("Prefetchable %s" % possible)
1728	1809	# found something workable, now check the chunk situation
1729	1810	if (chunk):
1730	1811	if (self.filenameHasChunk(possible, "bz2")):
—	—	@@ -1904,7 +1985,7 @@
1905	1986	def _path(self, runner, ext, chunk=0):
1906	1987	return runner.dumpDir.publicPath(self._file(ext,chunk))
1907	1988
1908		~~- def getOutputFilename(self, runner, chunk=0):~~
	1989	+ def buildOutputFilename(self, runner, chunk=0):
1909	1990	if (chunk):
1910	1991	xml7z = self._path(runner, "7z", chunk)
1911	1992	else:
—	—	@@ -1920,25 +2001,31 @@
1921	2002
1922	2003	def buildCommand(self, runner, chunk = 0):
1923	2004	xmlbz2 = self.getInputFilename(runner, chunk)
1924		~~- xml7z = self.getOutputFilename(runner, chunk)~~
	2005	+ xml7z = self.buildOutputFilename(runner, chunk)
1925	2006
1926		~~- # Clear prior 7zip attempts; 7zip will try to append an existing archive~~
1927		~~- if exists(xml7z):~~
1928		~~- os.remove(xml7z)~~
1929	2007	# FIXME need shell escape
1930	2008	commandPipe = [ [ "%s -dc %s \| %s a -si %s" % (runner.config.bzip2, xmlbz2, runner.config.sevenzip, xml7z) ] ]
1931	2009	commandSeries = [ commandPipe ]
1932	2010	return(commandSeries)
1933	2011
	2012	+ def cleanupOldFiles(self, runner, chunk = 0):
	2013	+ xml7z = self.buildOutputFilename(runner, chunk)
	2014	+ if exists(xml7z):
	2015	+ runner.remove(xml7z)
	2016	+
1934	2017	def run(self, runner):
1935	2018	if runner.lastFailed:
1936	2019	raise BackupError("bz2 dump incomplete, not recompressing")
1937	2020	commands = []
1938	2021	if (self._chunks):
1939	2022	for i in range(1, len(self._chunks)+1):
	2023	+ # Clear prior 7zip attempts; 7zip will try to append an existing archive
	2024	+ self.cleanupOldFiles(runner, i)
1940	2025	series = self.buildCommand(runner, i)
1941	2026	commands.append(series)
1942	2027	else:
	2028	+ # Clear prior 7zip attempts; 7zip will try to append an existing archive
	2029	+ self.cleanupOldFiles(runner)
1943	2030	series = self.buildCommand(runner)
1944	2031	commands.append(series)
1945	2032	result = runner.runCommand(commands, callbackTimed=self.progressCallback, callbackTimedArg=runner, shell = True)
—	—	@@ -1946,11 +2033,11 @@
1947	2034	# some hacks aren't so temporary - atg 3 sept 2010
1948	2035	if (self._chunks):
1949	2036	for i in range(1, len(self._chunks)+1):
1950		~~- xml7z = self.getOutputFilename(runner,i)~~
	2037	+ xml7z = self.buildOutputFilename(runner,i)
1951	2038	if exists(xml7z):
1952	2039	os.chmod(xml7z, stat.S_IRUSR \| stat.S_IWUSR \| stat.S_IRGRP \| stat.S_IROTH )
1953	2040	else:
1954		~~- xml7z = self.getOutputFilename(runner)~~
	2041	+ xml7z = self.buildOutputFilename(runner)
1955	2042	if exists(xml7z):
1956	2043	os.chmod(xml7z, stat.S_IRUSR \| stat.S_IWUSR \| stat.S_IRGRP \| stat.S_IROTH )
1957	2044	return(result)
—	—	@@ -1977,9 +2064,19 @@
1978	2065	def listFiles(self, runner):
1979	2066	return(XmlRecompressDump.listFiles(self, runner, unnumbered=True))
1980	2067
	2068	+ def cleanupOldFiles(self, runner):
	2069	+ files = self.listFiles(runner)
	2070	+ print "here is cleanup"
	2071	+ for filename in files:
	2072	+ filename = runner.dumpDir.publicPath(filename)
	2073	+ if exists(filename):
	2074	+ runner.remove(filename)
	2075	+
1981	2076	def run(self, runner):
	2077	+ print "here we are"
1982	2078	errorresult = 0
1983	2079	if (self._chunks):
	2080	+ self.cleanupOldFiles(runner)
1984	2081	files = XmlRecompressDump.listFiles(self,runner)
1985	2082	outputFileList = self.listFiles(runner)
1986	2083	for outputFile in outputFileList:
—	—	@@ -2175,6 +2272,8 @@
2176	2273	print " give the option --job help"
2177	2274	print " This option requires specifiying a wikidbname on which to run."
2178	2275	print " This option cannot be specified with --force."
	2276	+ print "--dryrun: Don't really run the job, just print what would be done (must be used"
	2277	+ print " with a specified wikidbname on which to run"
2179	2278	print "--force: remove a lock file for the specified wiki (dangerous, if there is"
2180	2279	print " another process running, useful if you want to start a second later"
2181	2280	print " run while the first dump from a previous date is still going)"
—	—	@@ -2200,10 +2299,11 @@
2201	2300	enableLogging = False
2202	2301	log = None
2203	2302	htmlNotice = ""
	2303	+ dryrun = False
2204	2304
2205	2305	try:
2206	2306	(options, remainder) = getopt.gnu_getopt(sys.argv[1:], "",
2207		~~- ['date=', 'job=', 'configfile=', 'notice=', 'force', 'noprefetch', 'nospawn', 'restartfrom', 'log'])~~
	2307	+ ['date=', 'job=', 'configfile=', 'notice=', 'force', 'dryrun', 'noprefetch', 'nospawn', 'restartfrom', 'log'])
2208	2308	except:
2209	2309	usage("Unknown option specified")
2210	2310
—	—	@@ -2218,6 +2318,8 @@
2219	2319	prefetch = False
2220	2320	elif opt == "--nospawn":
2221	2321	spawn = False
	2322	+ elif opt == "--dryrun":
	2323	+ dryrun = True
2222	2324	elif opt == "--job":
2223	2325	jobRequested = val
2224	2326	elif opt == "--restartfrom":
—	—	@@ -2227,6 +2329,8 @@
2228	2330	elif opt == "--notice":
2229	2331	htmlNotice = val
2230	2332
	2333	+ if dryrun and (len(remainder) == 0):
	2334	+ usage("--dryrun requires the name of a wikidb to be specified")
2231	2335	if jobRequested and (len(remainder) == 0):
2232	2336	usage("--job option requires the name of a wikidb to be specified")
2233	2337	if (jobRequested and forceLock):
—	—	@@ -2240,19 +2344,25 @@
2241	2345	else:
2242	2346	config = WikiDump.Config()
2243	2347
	2348	+ if dryrun:
	2349	+ print "***"
	2350	+ print "Dry run only, no files will be updated."
	2351	+ print "***"
	2352	+
2244	2353	if len(remainder) > 0:
2245	2354	wiki = WikiDump.Wiki(config, remainder[0])
2246		~~- # if we are doing one piece only of the dump, we don't try to grab a lock.~~
2247		~~- if forceLock:~~
2248		~~- if wiki.isLocked():~~
	2355	+ # if we are doing one piece only of the dump, we don't try to grab a lock
	2356	+ # unless told to.
	2357	+ if not dryrun:
	2358	+ if forceLock and wiki.isLocked():
2249	2359	wiki.unlock()
2250		~~- if restart or not jobRequested:~~
2251		~~- wiki.lock()~~
	2360	+ if restart or not jobRequested:
	2361	+ wiki.lock()
2252	2362	else:
2253	2363	wiki = findAndLockNextWiki(config)
2254	2364
2255	2365	if wiki:
2256		~~- runner = Runner(wiki, date, prefetch, spawn, jobRequested, restart, htmlNotice, enableLogging)~~
	2366	+ runner = Runner(wiki, date, prefetch, spawn, jobRequested, restart, htmlNotice, dryrun, enableLogging)
2257	2367	if (restart):
2258	2368	print "Running %s, restarting from job %s..." % (wiki.dbName, jobRequested)
2259	2369	elif (jobRequested):
—	—	@@ -2261,8 +2371,9 @@
2262	2372	print "Running %s..." % wiki.dbName
2263	2373	runner.run()
2264	2374	# if we are doing one piece only of the dump, we don't unlock either
2265		~~- if restart or not jobRequested:~~
2266		~~- wiki.unlock()~~
	2375	+ if not dryrun:
	2376	+ if restart or not jobRequested:
	2377	+ wiki.unlock()
2267	2378	else:
2268	2379	print "No wikis available to run."
2269	2380	finally:

Status & tagging log

23:43, 19 May 2011 Reedy (talk | contribs) changed the status of r86462 [removed: new added: deferred]