r92610 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r92609‎ | r92610 | r92611 >
Date:07:23, 20 July 2011
Author:ariel
Status:deferred
Tags:
Comment:
add a few more enabled flags, get rid of a few more checks for dryrun etc
Modified paths:
  • /branches/ariel/xmldumps-backup/worker.py (modified) (history)

Diff [purge]

Index: branches/ariel/xmldumps-backup/worker.py
@@ -1021,7 +1021,6 @@
10221022 return os.path.join(self.wiki.publicDir(), self.date);
10231023
10241024 class Runner(object):
1025 -
10261025 def __init__(self, wiki, date=None, prefetch=True, spawn=True, job=None, restart=False, notice="", dryrun = False, loggingEnabled=False, chunkToDo = False):
10271026 self.wiki = wiki
10281027 self.dbName = wiki.dbName
@@ -1029,17 +1028,22 @@
10301029 self.spawn = spawn
10311030 self.chunkInfo = Chunk(wiki, self.dbName, self.logAndPrint)
10321031 self.restart = restart
1033 - self.loggingEnabled = loggingEnabled
10341032 self.htmlNoticeFile = None
10351033 self.log = None
10361034 self.dryrun = dryrun
10371035 self._chunkToDo = chunkToDo
 1036+
 1037+ self._loggingEnabled = loggingEnabled
10381038 self._statusEnabled = True
10391039 self._checksummerEnabled = True
10401040 self._runInfoFileEnabled = True
10411041 self._symLinksEnabled = True
10421042 self._feedsEnabled = True
10431043 self._noticeFileEnabled = True
 1044+ self._makeDirEnabled = True
 1045+ self._cleanOldDumpsEnabled = True
 1046+ self._cleanupOldFilesEnabled = False
 1047+ self._checkForTruncatedFilesEnabled = True
10441048
10451049 if self.dryrun or self._chunkToDo:
10461050 self._statusEnabled = False
@@ -1048,8 +1052,13 @@
10491053 self._symLinksEnabled = False
10501054 self._feedsEnabled = False
10511055 self._noticeFileEnabled = False
 1056+ self._makeDirEnabled = False
 1057+ self._cleanOldDumpsEnabled = False
 1058+ self._cleanupOldFilesEnables = False
 1059+
10521060 if self.dryrun:
1053 - self.loggingEnabled = False
 1061+ self._loggingEnabled = False
 1062+ self._checkForTruncatedFilesEnabled = False
10541063
10551064 if date:
10561065 # Override, continuing a past dump?
@@ -1065,7 +1074,7 @@
10661075 self.lastFailed = False
10671076
10681077 # these must come after the dumpdir setup so we know which directory we are in
1069 - if (loggingEnabled):
 1078+ if (self._loggingEnabled and self._makeDirEnabled):
10701079 self.logFileName = self.dumpDir.publicPath(self.wiki.config.logFile)
10711080 self.makeDir(join(self.wiki.publicDir(), self.date))
10721081 self.log = Logger(self.logFileName)
@@ -1088,7 +1097,7 @@
10891098 done = log.doJobOnLogQueue()
10901099
10911100 def logAndPrint(self, message):
1092 - if hasattr(self,'log') and self.log and not self.dryrun:
 1101+ if hasattr(self,'log') and self.log and self._loggingEnabled:
10931102 self.log.addToLogQueue("%s\n" % message)
10941103 print message
10951104
@@ -1098,9 +1107,8 @@
10991108 else:
11001109 return ""
11011110
1102 - def remove(self, filename):
1103 - if not self.dryrun:
1104 - os.remove(filename)
 1111+ def removeFile(self, filename):
 1112+ os.remove(filename)
11051113
11061114 # returns 0 on success, 1 on error
11071115 def saveTable(self, table, outfile):
@@ -1224,9 +1232,8 @@
12251233 # mark all the following jobs to run as well
12261234 self.dumpItemList.markFollowingJobsToRun()
12271235
1228 - if not self.dryrun:
1229 - self.makeDir(join(self.wiki.publicDir(), self.date))
1230 - self.makeDir(join(self.wiki.privateDir(), self.date))
 1236+ self.makeDir(join(self.wiki.publicDir(), self.date))
 1237+ self.makeDir(join(self.wiki.privateDir(), self.date))
12311238
12321239 if (self.restart):
12331240 self.logAndPrint("Preparing for restart from job %s of %s" % (self.jobRequested, self.dbName))
@@ -1250,12 +1257,12 @@
12511258 except Exception, ex:
12521259 self.debug("*** exception! " + str(ex))
12531260 item.setStatus("failed")
1254 - if item.status() == "failed" and not self.dryrun and not self._chunkToDo:
 1261+ if item.status() == "failed":
12551262 self.runHandleFailure()
12561263 else:
12571264 self.lastFailed = False
12581265 # this ensures that, previous run or new one, the old or new md5sums go to the file
1259 - if item.status() == "done" and not self.dryrun and not self._chunkToDo:
 1266+ if item.status() == "done":
12601267 self.runUpdateItemFileInfo(item)
12611268
12621269 if (self.dumpItemList.allPossibleJobsDone()):
@@ -1263,10 +1270,9 @@
12641271 else:
12651272 self.status.updateStatusFiles("partialdone")
12661273 self.runInfoFile.saveDumpRunInfoFile(self.dumpItemList.reportDumpRunInfo())
1267 - if not self.dryrun and not self._chunkToDo:
1268 - # if any job succeeds we might as well make the sym link
1269 - if (self.status.failCount < 1):
1270 - self.completeDump()
 1274+ # if any job succeeds we might as well make the sym link
 1275+ if (self.status.failCount < 1):
 1276+ self.completeDump()
12711277
12721278 if (self.restart):
12731279 self.showRunnerState("Completed run restarting from job %s for %s" % (self.jobRequested, self.dbName))
@@ -1285,40 +1291,38 @@
12861292 except Exception, ex:
12871293 self.debug("*** exception! " + str(ex))
12881294 item.setStatus("failed")
1289 - if item.status() == "failed" and not self.dryrun and not self._chunkToDo:
 1295+ if item.status() == "failed":
12901296 self.runHandleFailure()
12911297 else:
1292 - if not self.dryrun and not self._chunkToDo:
1293 - self.runUpdateItemFileInfo(item)
1294 - self.checksums.cpMd5TmpFileToPermFile()
 1298+ self.runUpdateItemFileInfo(item)
 1299+ self.checksums.cpMd5TmpFileToPermFile()
12951300 self.lastFailed = False
12961301
12971302 self.status.updateStatusFiles("done")
1298 - if not self.dryrun and not self._chunkToDo:
1299 - self.runInfoFile.saveDumpRunInfoFile(self.dumpItemList.reportDumpRunInfo())
1300 - if self.status.failCount < 1:
1301 - self.completeDump()
 1303+ self.runInfoFile.saveDumpRunInfoFile(self.dumpItemList.reportDumpRunInfo())
 1304+ if self.status.failCount < 1:
 1305+ self.completeDump()
13021306
13031307 self.showRunnerStateComplete()
13041308
13051309 def cleanOldDumps(self):
1306 - old = self.wiki.dumpDirs()
1307 - if old:
1308 - if old[-1] == self.date:
1309 - # If we're re-running today's (or jobs from a given day's) dump, don't count it as one
1310 - # of the old dumps to keep... or delete it halfway through!
1311 - old = old[:-1]
1312 - if self.wiki.config.keep > 0:
1313 - # Keep the last few
1314 - old = old[:-(self.wiki.config.keep)]
1315 - if old:
1316 - for dump in old:
1317 - self.showRunnerState("Purging old dump %s for %s" % (dump, self.dbName))
1318 - if not self.dryrun and not self._chunkToDo:
 1310+ if self._cleanOldDumpsEnabled:
 1311+ old = self.wiki.dumpDirs()
 1312+ if old:
 1313+ if old[-1] == self.date:
 1314+ # If we're re-running today's (or jobs from a given day's) dump, don't count it as one
 1315+ # of the old dumps to keep... or delete it halfway through!
 1316+ old = old[:-1]
 1317+ if self.wiki.config.keep > 0:
 1318+ # Keep the last few
 1319+ old = old[:-(self.wiki.config.keep)]
 1320+ if old:
 1321+ for dump in old:
 1322+ self.showRunnerState("Purging old dump %s for %s" % (dump, self.dbName))
13191323 base = os.path.join(self.wiki.publicDir(), dump)
13201324 shutil.rmtree("%s" % base)
1321 - else:
1322 - self.showRunnerState("No old dumps to purge.")
 1325+ else:
 1326+ self.showRunnerState("No old dumps to purge.")
13231327
13241328 def showRunnerState(self, message):
13251329 self.debug(message)
@@ -1335,11 +1339,12 @@
13361340 self.symLinks.saveSymlink(self.checksums.getChecksumFileNameBasename())
13371341
13381342 def makeDir(self, dir):
1339 - if exists(dir):
1340 - self.debug("Checkdir dir %s ..." % dir)
1341 - else:
1342 - self.debug("Creating %s ..." % dir)
1343 - os.makedirs(dir)
 1343+ if self._makeDirEnabled:
 1344+ if exists(dir):
 1345+ self.debug("Checkdir dir %s ..." % dir)
 1346+ else:
 1347+ self.debug("Creating %s ..." % dir)
 1348+ os.makedirs(dir)
13441349
13451350 class SymLinks(object):
13461351 def __init__(self, wiki, dumpDir, date, logfn, debugfn, enabled):
@@ -1351,11 +1356,12 @@
13521357 self.debugfn = debugfn
13531358
13541359 def makeDir(self, dir):
1355 - if exists(dir):
1356 - self.debugfn("Checkdir dir %s ..." % dir)
1357 - else:
1358 - self.debugfn("Creating %s ..." % dir)
1359 - os.makedirs(dir)
 1360+ if (self._enabled):
 1361+ if exists(dir):
 1362+ self.debugfn("Checkdir dir %s ..." % dir)
 1363+ else:
 1364+ self.debugfn("Creating %s ..." % dir)
 1365+ os.makedirs(dir)
13601366
13611367 def saveSymlink(self, file):
13621368 if (self._enabled):
@@ -1376,7 +1382,7 @@
13771383 # no file or it's older than ours... *then* remove the link
13781384 if not exists(os.path.realpath(link)) or dateinterval > 0:
13791385 self.debug("Removing old symlink %s" % link)
1380 - os.remove(link)
 1386+ runner.removeFile(link)
13811387 else:
13821388 self.logfn("What the hell dude, %s is not a symlink" % link)
13831389 raise BackupError("What the hell dude, %s is not a symlink" % link)
@@ -1395,30 +1401,31 @@
13961402 self._enabled = enabled
13971403
13981404 def makeDir(self, dir):
1399 - if exists(dir):
1400 - self.debugfn("Checkdir dir %s ..." % dir)
1401 - else:
1402 - self.debugfn("Creating %s ..." % dir)
1403 - os.makedirs(dir)
 1405+ if (self._enabled):
 1406+ if exists(dir):
 1407+ self.debugfn("Checkdir dir %s ..." % dir)
 1408+ else:
 1409+ self.debugfn("Creating %s ..." % dir)
 1410+ os.makedirs(dir)
14041411
14051412 def saveFeed(self, file):
1406 - self.makeDir(join(self.wiki.publicDir(), 'latest'))
1407 - filePath = self.dumpDir.webPath(file)
1408 - fileName = os.path.basename(filePath)
1409 - webPath = os.path.dirname(filePath)
1410 - rssText = self.wiki.config.readTemplate("feed.xml") % {
1411 - "chantitle": file,
1412 - "chanlink": webPath,
1413 - "chandesc": "Wikimedia dump updates for %s" % self.dbName,
1414 - "title": webPath,
1415 - "link": webPath,
1416 - "description": xmlEscape("<a href=\"%s\">%s</a>" % (filePath, fileName)),
1417 - "date": time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime())}
1418 - directory = self.dumpDir.latestDir()
1419 - rssPath = self.dumpDir.latestPath(file + "-rss.xml")
1420 - FileUtils.writeFile(directory, rssPath, rssText, self.wiki.config.fileperms)
 1413+ if (self._enabled):
 1414+ self.makeDir(join(self.wiki.publicDir(), 'latest'))
 1415+ filePath = self.dumpDir.webPath(file)
 1416+ fileName = os.path.basename(filePath)
 1417+ webPath = os.path.dirname(filePath)
 1418+ rssText = self.wiki.config.readTemplate("feed.xml") % {
 1419+ "chantitle": file,
 1420+ "chanlink": webPath,
 1421+ "chandesc": "Wikimedia dump updates for %s" % self.dbName,
 1422+ "title": webPath,
 1423+ "link": webPath,
 1424+ "description": xmlEscape("<a href=\"%s\">%s</a>" % (filePath, fileName)),
 1425+ "date": time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime())}
 1426+ directory = self.dumpDir.latestDir()
 1427+ rssPath = self.dumpDir.latestPath(file + "-rss.xml")
 1428+ FileUtils.writeFile(directory, rssPath, rssText, self.wiki.config.fileperms)
14211429
1422 -
14231430 class Dump(object):
14241431 def __init__(self, name, desc):
14251432 self._desc = desc
@@ -1565,9 +1572,10 @@
15661573 return(recombineCommandString)
15671574
15681575 def cleanupOldFiles(self, runner, outputFileBasename):
1569 - outputFilename = self.buildOutputFilename(runner, outputFileBasename)
1570 - if exists(outputFilename):
1571 - runner.remove(outputFilename)
 1576+ if (runner._cleanupOldFilesEnabled):
 1577+ outputFilename = self.buildOutputFilename(runner, outputFileBasename)
 1578+ if exists(outputFilename):
 1579+ runner.removeFile(outputFilename)
15721580
15731581 def buildOutputFilename(self, runner, outputFileBasename):
15741582 return outputFilename
@@ -1685,10 +1693,11 @@
16861694 return(series)
16871695
16881696 def cleanupOldFiles(self, runner, chunk = 0):
1689 - fileList = self.buildOutputFilenames(runner, chunk)
1690 - for filename in fileList:
1691 - if exists(filename):
1692 - runner.remove(filename)
 1697+ if (runner._cleanupOldFilesEnabled):
 1698+ fileList = self.buildOutputFilenames(runner, chunk)
 1699+ for filename in fileList:
 1700+ if exists(filename):
 1701+ runner.removeFile(filename)
16931702
16941703 def buildHistoryOutputFilename(self, runner, chunk = 0):
16951704 if (chunk):
@@ -1801,9 +1810,10 @@
18021811 return ["pages-logging.xml.gz"]
18031812
18041813 def cleanupOldFiles(self, runner):
1805 - logging = self.buildOutputFilename(runner)
1806 - if exists(logging):
1807 - runner.remove(logging)
 1814+ if (runner._cleanupOldFilesEnabled):
 1815+ logging = self.buildOutputFilename(runner)
 1816+ if exists(logging):
 1817+ runner.removeFile(logging)
18081818
18091819 def buildOutputFilename(self, runner):
18101820 logging = runner.dumpDir.publicPath("pages-logging.xml.gz")
@@ -1869,33 +1879,39 @@
18701880 commands.append(series)
18711881 error = runner.runCommand(commands, callbackStderr=self.progressCallback, callbackStderrArg=runner)
18721882
1873 - if (not exists( runner.wiki.config.checkforbz2footer ) ):
1874 - raise BackupError("checkforbz2footer command %s not found" % runner.wiki.config.checkforbz2footer);
1875 - checkforbz2footer = "%s" % runner.wiki.config.checkforbz2footer
1876 - if exists(checkforbz2footer):
1877 - # check to see if any of the output files are truncated
1878 - files = []
1879 - if (self._chunks):
1880 - if (self._chunkToDo):
1881 - if (self._chunkToDo < 1 or self._chunkToDo > len(self._chunks)):
1882 - raise BackupError("chunk option must be in range of available chunks to rerun, 1 through %s\n" % str(len(self._chunks)))
1883 - files.append( self._path(runner, 'bz2', self._chunkToDo ) )
1884 - else:
1885 - for i in range(1, len(self._chunks)+1):
1886 - files.append( self._path(runner, 'bz2', i ) )
 1883+ truncationError = self.checkForTruncatedFiles(runner)
18871884
1888 - for f in files:
1889 - pipeline = []
1890 - pipeline.append([ checkforbz2footer, f ])
1891 - p = CommandPipeline(pipeline, quiet=True)
1892 - p.runPipelineAndGetOutput()
1893 - if not p.exitedSuccessfully():
1894 - runner.logAndPrint("file %s is truncated, moving out of the way" %f )
1895 - os.rename( f, f + ".truncated" )
1896 - error = 1
1897 - if (error):
 1885+ if (error or truncationError):
18981886 raise BackupError("error producing xml bz2 file(s) %s" % self._subset)
18991887
 1888+ def checkForTruncatedFiles(self, runner):
 1889+ if runner._checkForTruncatedFilesEnabled:
 1890+ if (not exists( runner.wiki.config.checkforbz2footer ) ):
 1891+ raise BackupError("checkforbz2footer command %s not found" % runner.wiki.config.checkforbz2footer);
 1892+ checkforbz2footer = "%s" % runner.wiki.config.checkforbz2footer
 1893+ if exists(checkforbz2footer):
 1894+ # check to see if any of the output files are truncated
 1895+ files = []
 1896+ if (self._chunks):
 1897+ if (self._chunkToDo):
 1898+ if (self._chunkToDo < 1 or self._chunkToDo > len(self._chunks)):
 1899+ raise BackupError("chunk option must be in range of available chunks to rerun, 1 through %s\n" % str(len(self._chunks)))
 1900+ files.append( self._path(runner, 'bz2', self._chunkToDo ) )
 1901+ else:
 1902+ for i in range(1, len(self._chunks)+1):
 1903+ files.append( self._path(runner, 'bz2', i ) )
 1904+
 1905+ for f in files:
 1906+ pipeline = []
 1907+ pipeline.append([ checkforbz2footer, f ])
 1908+ p = CommandPipeline(pipeline, quiet=True)
 1909+ p.runPipelineAndGetOutput()
 1910+ if not p.exitedSuccessfully():
 1911+ runner.logAndPrint("file %s is truncated, moving out of the way" %f )
 1912+ os.renameFile( f, f + ".truncated" )
 1913+ return 1
 1914+ return 0
 1915+
19001916 def buildEta(self, runner):
19011917 """Tell the dumper script whether to make ETA estimate on page or revision count."""
19021918 return "--current"
@@ -2216,9 +2232,10 @@
22172233 return(commandSeries)
22182234
22192235 def cleanupOldFiles(self, runner, chunk = 0):
2220 - xml7z = self.buildOutputFilename(runner, chunk)
2221 - if exists(xml7z):
2222 - runner.remove(xml7z)
 2236+ if (runner._cleanupOldFilesEnabled):
 2237+ xml7z = self.buildOutputFilename(runner, chunk)
 2238+ if exists(xml7z):
 2239+ runner.removeFile(xml7z)
22232240
22242241 def run(self, runner):
22252242 if runner.lastFailed:
@@ -2297,11 +2314,12 @@
22982315 return [ self._file("7z",0) ]
22992316
23002317 def cleanupOldFiles(self, runner):
2301 - files = self.listOutputFiles(runner)
2302 - for filename in files:
2303 - filename = runner.dumpDir.publicPath(filename)
2304 - if exists(filename):
2305 - runner.remove(filename)
 2318+ if (runner._cleanupOldFilesEnabled):
 2319+ files = self.listOutputFiles(runner)
 2320+ for filename in files:
 2321+ filename = runner.dumpDir.publicPath(filename)
 2322+ if exists(filename):
 2323+ runner.removeFile(filename)
23062324
23072325 def run(self, runner):
23082326 error = 0

Status & tagging log