r91158 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r91157‎ | r91158 | r91159 >
Date:05:59, 30 June 2011
Author:ariel
Status:deferred
Tags:
Comment:
error checking for various dump steps, including bz2 file truncation
Modified paths:
  • /branches/ariel/xmldumps-backup/WikiDump.py (modified) (history)
  • /branches/ariel/xmldumps-backup/worker.py (modified) (history)

Diff [purge]

Index: branches/ariel/xmldumps-backup/WikiDump.py
@@ -172,6 +172,7 @@
173173 "tail": "/usr/bin/tail",
174174 "cat": "/bin/cat",
175175 "grep": "/bin/grep",
 176+ "checkforbz2footer": "/usr/local/bin/checkforbz2footer",
176177 #"cleanup": {
177178 "keep": "3",
178179 #"chunks": {
@@ -249,6 +250,7 @@
250251 self.tail = conf.get("tools", "tail")
251252 self.cat = conf.get("tools", "cat")
252253 self.grep = conf.get("tools", "grep")
 254+ self.checkforbz2footer = conf.get("tools","checkforbz2footer")
253255
254256 if not conf.has_section('chunks'):
255257 conf.add_section('chunks')
Index: branches/ariel/xmldumps-backup/worker.py
@@ -988,7 +988,7 @@
989989 # callbackinterval: how often we will call callbackTimed (in milliseconds), defaults to every 5 secs
990990 def runCommand(self, commandSeriesList, callbackStderr=None, callbackStderrArg=None, callbackTimed=None, callbackTimedArg=None, shell = False, callbackInterval=5000):
991991 """Nonzero return code from the shell from any command in any pipeline will cause this
992 - function to print an error message and return 1, indictating error.
 992+ function to print an error message and return 1, indicating error.
993993 Returns 0 on success.
994994 If a callback function is passed, it will receive lines of
995995 output from the call. If the callback function takes another argument (which will
@@ -1561,8 +1561,9 @@
15621562 self.cleanupOldFiles(runner)
15631563 series = self.buildCommand(runner)
15641564 commands.append(series)
1565 - result = runner.runCommand(commands, callbackStderr=self.progressCallback, callbackStderrArg=runner)
1566 - return result
 1565+ error = runner.runCommand(commands, callbackStderr=self.progressCallback, callbackStderrArg=runner)
 1566+ if (error):
 1567+ raise BackupError("error producing stub files" % self._subset)
15671568
15681569 class RecombineXmlStub(XmlStub):
15691570 def __init__(self, name, desc, chunks):
@@ -1576,7 +1577,7 @@
15771578 return(XmlStub.listFiles(self, runner, unnumbered=True))
15781579
15791580 def run(self, runner):
1580 - errorresult=0
 1581+ error=0
15811582 if (self._chunks):
15821583 files = XmlStub.listFiles(self,runner)
15831584 outputFileList = self.listFiles(runner)
@@ -1599,8 +1600,9 @@
16001601 series = [ recombinePipeline ]
16011602 result = runner.runCommand([ series ], callbackTimed=self.progressCallback, callbackTimedArg=runner, shell = True)
16021603 if result:
1603 - errorresult = result
1604 - return errorresult
 1604+ error = result
 1605+ if (error):
 1606+ raise BackupError("error recombining stub files")
16051607
16061608 class XmlLogging(Dump):
16071609 """ Create a logging dump of all page activity """
@@ -1635,8 +1637,9 @@
16361638 "--output=gzip:%s" % logging ]
16371639 pipeline = [ command ]
16381640 series = [ pipeline ]
1639 - result = runner.runCommand([ series ], callbackStderr=self.progressCallback, callbackStderrArg=runner)
1640 - return result
 1641+ error = runner.runCommand([ series ], callbackStderr=self.progressCallback, callbackStderrArg=runner)
 1642+ if (error):
 1643+ raise BackupError("error dimping log files")
16411644
16421645 class XmlDump(Dump):
16431646 """Primary XML dumps, one section at a time."""
@@ -1671,9 +1674,29 @@
16721675 else:
16731676 series = self.buildCommand(runner)
16741677 commands.append(series)
1675 - result = runner.runCommand(commands, callbackStderr=self.progressCallback, callbackStderrArg=runner)
1676 - return result
 1678+ error = runner.runCommand(commands, callbackStderr=self.progressCallback, callbackStderrArg=runner)
16771679
 1680+ checkforbz2footer = "%s" % runner.config.checkforbz2footer
 1681+ if exists(checkforbz2footer):
 1682+ # check to see if any of the output files are truncated
 1683+ files = []
 1684+ if (self._chunks):
 1685+ for i in range(1, len(self._chunks)+1):
 1686+ files.append( self._path(runner, 'bz2', i ) )
 1687+ files.append( self._path(runner, 'bz2', i ) )
 1688+
 1689+ for f in files:
 1690+ pipeline = []
 1691+ pipeline.append([ checkforbz2footer, f ])
 1692+ p = CommandPipeline(pipeline, quiet=True)
 1693+ p.runPipelineAndGetOutput()
 1694+ if not p.exitedSuccessfully():
 1695+ runner.logAndPrint("file %s is truncated, moving out of the way" %f )
 1696+ os.rename( f, f + ".truncated" )
 1697+ error = 1
 1698+ if (error):
 1699+ raise BackupError("error producing xml bz2 file(s) %s" % self._subset)
 1700+
16781701 def buildEta(self, runner):
16791702 """Tell the dumper script whether to make ETA estimate on page or revision count."""
16801703 return "--current"
@@ -1969,7 +1992,7 @@
19701993 return(XmlDump.listFiles(self, runner, unnumbered=True))
19711994
19721995 def run(self, runner):
1973 - errorresult=0
 1996+ error=0
19741997 if (self._chunks):
19751998 files = XmlDump.listFiles(self,runner)
19761999 outputFileList = self.listFiles(runner)
@@ -1992,8 +2015,9 @@
19932016 series = [ recombinePipeline ]
19942017 result = runner.runCommand([ series ], callbackTimed=self.progressCallback, callbackTimedArg=runner, shell = True)
19952018 if result:
1996 - errorresult = result
1997 - return errorresult
 2019+ error = result
 2020+ if (error):
 2021+ raise BackupError("error recombining xml bz2 files")
19982022
19992023 class BigXmlDump(XmlDump):
20002024 """XML page dump for something larger, where a 7-Zip compressed copy
@@ -2068,7 +2092,7 @@
20692093 self.cleanupOldFiles(runner)
20702094 series = self.buildCommand(runner)
20712095 commands.append(series)
2072 - result = runner.runCommand(commands, callbackTimed=self.progressCallback, callbackTimedArg=runner, shell = True)
 2096+ error = runner.runCommand(commands, callbackTimed=self.progressCallback, callbackTimedArg=runner, shell = True)
20732097 # temp hack force 644 permissions until ubuntu bug # 370618 is fixed - tomasz 5/1/2009
20742098 # some hacks aren't so temporary - atg 3 sept 2010
20752099 if (self._chunks):
@@ -2080,7 +2104,8 @@
20812105 xml7z = self.buildOutputFilename(runner)
20822106 if exists(xml7z):
20832107 os.chmod(xml7z, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH )
2084 - return(result)
 2108+ if (error):
 2109+ raise BackupError("error recompressing bz2 file(s)")
20852110
20862111 def listFiles(self, runner, unnumbered = False):
20872112 if (self._chunks) and not unnumbered:
@@ -2113,8 +2138,7 @@
21142139 runner.remove(filename)
21152140
21162141 def run(self, runner):
2117 - print "here we are"
2118 - errorresult = 0
 2142+ error = 0
21192143 if (self._chunks):
21202144 self.cleanupOldFiles(runner)
21212145 files = XmlRecompressDump.listFiles(self,runner)
@@ -2138,8 +2162,9 @@
21392163 series = [ recombinePipeline ]
21402164 result = runner.runCommand([ series ], callbackTimed=self.progressCallback, callbackTimedArg=runner, shell = True)
21412165 if result:
2142 - errorresult = result
2143 - return errorresult
 2166+ error = result
 2167+ if (error):
 2168+ raise BackupError("error recombining xml bz2 file(s)")
21442169
21452170 class AbstractDump(Dump):
21462171 """XML dump for Yahoo!'s Active Abstracts thingy"""
@@ -2186,8 +2211,11 @@
21872212 else:
21882213 series = self.buildCommand(runner)
21892214 commands.append(series)
2190 - runner.runCommand(commands, callbackStderr=self.progressCallback, callbackStderrArg=runner)
 2215+ error = runner.runCommand(commands, callbackStderr=self.progressCallback, callbackStderrArg=runner)
 2216+ if (error):
 2217+ raise BackupError("error producing abstract dump")
21912218
 2219+
21922220 def _variants(self, runner):
21932221 # If the database name looks like it's marked as Chinese language,
21942222 # return a list including Simplified and Traditional versions, so
@@ -2234,7 +2262,7 @@
22352263 return(AbstractDump.listFiles(self,runner, unnumbered = True))
22362264
22372265 def run(self, runner):
2238 - errorresult = 0
 2266+ error = 0
22392267 if (self._chunks):
22402268 files = AbstractDump.listFiles(self,runner)
22412269 outputFileList = self.listFiles(runner)
@@ -2256,8 +2284,9 @@
22572285 series = [ recombinePipeline ]
22582286 result = runner.runCommand([ series ], callbackTimed=self.progressCallback, callbackTimedArg=runner, shell = True)
22592287 if result:
2260 - errorresult = result
2261 - return errorresult
 2288+ error = result
 2289+ if (error):
 2290+ raise BackupError("error recombining abstract dump files")
22622291
22632292 class TitleDump(Dump):
22642293 """This is used by "wikiproxy", a program to add Wikipedia links to BBC news online"""
@@ -2271,7 +2300,8 @@
22722301 retries = retries + 1
22732302 time.sleep(5)
22742303 error = runner.saveSql(query, runner.dumpDir.publicPath("all-titles-in-ns0.gz"))
2275 - return error
 2304+ if (error):
 2305+ raise BackupError("error dumping titles list")
22762306
22772307 def listFiles(self, runner):
22782308 return ["all-titles-in-ns0.gz"]

Status & tagging log