r82903 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r82902‎ | r82903 | r82904 >
Date:17:53, 27 February 2011
Author:ariel
Status:deferred
Tags:
Comment:
log progress output and other messages from dump run to a per-wiki-and-date file if specified on command line
Modified paths:
  • /branches/ariel/xmldumps-backup/WikiDump.py (modified) (history)
  • /branches/ariel/xmldumps-backup/worker.py (modified) (history)

Diff [purge]

Index: branches/ariel/xmldumps-backup/WikiDump.py
@@ -104,6 +104,7 @@
105105 "index": "index.html",
106106 "templatedir": home,
107107 "perdumpindex": "index.html",
 108+ "logfile": "dumplog.txt",
108109 #"reporting": {
109110 "adminmail": "root@localhost",
110111 "mailfrom": "root@localhost",
@@ -179,7 +180,8 @@
180181 self.index = conf.get("output", "index")
181182 self.templateDir = conf.get("output", "templateDir")
182183 self.perDumpIndex = conf.get("output", "perdumpindex")
183 -
 184+ self.logFile = conf.get("output", "logfile")
 185+
184186 self.adminMail = conf.get("reporting", "adminmail")
185187 self.mailFrom = conf.get("reporting", "mailfrom")
186188 self.smtpServer = conf.get("reporting", "smtpserver")
Index: branches/ariel/xmldumps-backup/worker.py
@@ -16,6 +16,8 @@
1717 import glob
1818 import WikiDump
1919 import CommandManagement
 20+import Queue
 21+import thread
2022
2123 from os.path import dirname, exists, getsize, join, realpath
2224 from subprocess import Popen, PIPE
@@ -48,6 +50,43 @@
4951 def xmlEscape(text):
5052 return text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
5153
 54+class Logger(object):
 55+
 56+ def __init__(self, logFileName=None):
 57+ if (logFileName):
 58+ self.logFile = open(logFileName, "a")
 59+ else:
 60+ self.logFile = None
 61+ self.queue = Queue.Queue()
 62+ self.JobsDone = "JOBSDONE"
 63+
 64+ def logWrite(self, line=None):
 65+ if (self.logFile):
 66+ self.logFile.write(line)
 67+ self.logFile.flush()
 68+
 69+ def logClose(self):
 70+ if (logfile):
 71+ self.logFile.close()
 72+
 73+ # return 1 if logging terminated, 0 otherwise
 74+ def doJobOnLogQueue(self):
 75+ line = self.queue.get()
 76+ if (line == self.JobsDone):
 77+ self.logClose()
 78+ return 1
 79+ else:
 80+ self.logWrite(line)
 81+ return 0
 82+
 83+ def addToLogQueue(self,line=None):
 84+ if (line):
 85+ self.queue.put_nowait(line)
 86+
 87+ # set in order to have logging thread clean up and exit
 88+ def indicateJobsDone(self):
 89+ self.queue.put_nowait(self.JobsDone)
 90+
5291 # so if the pages/revsPerChunkAbstract/History are just one number it means
5392 # use that number for all the chunks, figure out yourself how many.
5493 # otherwise we get passed alist that says "here's now many for each chunk and it's this many chunks.
@@ -554,7 +593,7 @@
555594
556595 class Runner(object):
557596
558 - def __init__(self, wiki, date=None, checkpoint=None, prefetch=True, spawn=True, job=None, restart=False):
 597+ def __init__(self, wiki, date=None, checkpoint=None, prefetch=True, spawn=True, job=None, restart=False, loggingEnabled=False):
559598 self.wiki = wiki
560599 self.config = wiki.config
561600 self.dbName = wiki.dbName
@@ -562,6 +601,8 @@
563602 self.spawn = spawn
564603 self.chunkInfo = Chunk(wiki, self.dbName)
565604 self.restart = restart
 605+ self.loggingEnabled = loggingEnabled
 606+ self.log = None
566607
567608 if date:
568609 # Override, continuing a past dump?
@@ -577,10 +618,32 @@
578619
579620 self.jobRequested = job
580621 self.dumpDir = DumpDir(self.wiki, self.dbName, self.date)
 622+
 623+ # this must come after the dumpdir setup so we know which directory we are in
 624+ # for the log file.
 625+ if (loggingEnabled):
 626+ self.logFileName = self.dumpDir.publicPath(config.logFile)
 627+ self.makeDir(join(self.wiki.publicDir(), self.date))
 628+ self.log = Logger(self.logFileName)
 629+ thread.start_new_thread(self.logQueueReader,(self.log,))
 630+
581631 self.checksums = Checksummer(self.wiki, self.dumpDir)
 632+
582633 # some or all of these dumpItems will be marked to run
583634 self.dumpItemList = DumpItemList(self.wiki, self.prefetch, self.spawn, self.date, self.chunkInfo);
584635
 636+ def logQueueReader(self,log):
 637+ if not log:
 638+ return
 639+ done = False
 640+ while not done:
 641+ done = log.doJobOnLogQueue()
 642+
 643+ def logAndPrint(self, message):
 644+ if (self.log):
 645+ self.log.addToLogQueue("%s\n" % message)
 646+ print message
 647+
585648 def passwordOption(self):
586649 """If you pass '-pfoo' mysql uses the password 'foo',
587650 but if you pass '-p' it prompts. Sigh."""
@@ -676,6 +739,7 @@
677740 errorString = "Error from command(s): "
678741 for cmd in problemCommands:
679742 errorString = errorString + "%s " % cmd
 743+ self.logAndPrint(errorString)
680744 raise BackupError(errorString)
681745 return 1
682746
@@ -702,12 +766,14 @@
703767 output = proc.fromchild.read()
704768 retval = proc.wait()
705769 if retval:
 770+ self.logAndPrint("Non-zero return code from '%s'" % command)
706771 raise BackupError("Non-zero return code from '%s'" % command)
707772 else:
708773 return output
709774
710775 def debug(self, stuff):
711 - print "%s: %s %s" % (prettyTime(), self.dbName, stuff)
 776+ self.logAndPrint("%s: %s %s" % (prettyTime(), self.dbName, stuff))
 777+# print "%s: %s %s" % (prettyTime(), self.dbName, stuff)
712778
713779 def makeDir(self, dir):
714780 if exists(dir):
@@ -775,9 +841,9 @@
776842 self.makeDir(join(self.wiki.privateDir(), self.date))
777843
778844 if (self.restart):
779 - print "Preparing for restart from job %s of %s" % (self.jobRequested, self.dbName)
 845+ self.logAndPrint("Preparing for restart from job %s of %s" % (self.jobRequested, self.dbName))
780846 elif (self.jobRequested):
781 - print "Preparing for job %s of %s" % (self.jobRequested, self.dbName)
 847+ self.logAndPrint("Preparing for job %s of %s" % (self.jobRequested, self.dbName))
782848 else:
783849 self.showRunnerState("Cleaning up old dumps for %s" % self.dbName)
784850 self.cleanOldDumps()
@@ -901,7 +967,7 @@
902968 # Short line for report extraction goes here
903969 self.wiki.writeStatus(self.reportDatabaseStatusSummary(items, done))
904970 except:
905 - print "Couldn't update status files. Continuing anyways"
 971+ self.logAndPrint("Couldn't update status files. Continuing anyways")
906972
907973 def updateStatusFiles(self, done=False):
908974 self.saveStatusSummaryAndDetail(self.dumpItemList.dumpItems, done)
@@ -1059,6 +1125,7 @@
10601126 self.debug("Removing old symlink %s" % link)
10611127 os.remove(link)
10621128 else:
 1129+ self.logAndPrint("What the hell dude, %s is not a symlink" % link)
10631130 raise BackupError("What the hell dude, %s is not a symlink" % link)
10641131 relative = relativePath(real, dirname(link))
10651132 if exists(real):
@@ -1148,6 +1215,8 @@
11491216 """Receive a status line from a shellout and update the status files."""
11501217 # pass through...
11511218 if (line):
 1219+ if (runner.log):
 1220+ runner.log.addToLogQueue(line)
11521221 sys.stderr.write(line)
11531222 self.progress = line.strip()
11541223 runner.updateStatusFiles()
@@ -2083,7 +2152,7 @@
20842153 if message:
20852154 print message
20862155 print "Usage: python worker.py [options] [wikidbname]"
2087 - print "Options: --configfile, --date, --checkpoint, --job, --force, --noprefetch, --nospawn, --restartfrom"
 2156+ print "Options: --configfile, --date, --checkpoint, --job, --force, --noprefetch, --nospawn, --restartfrom, --log"
20882157 print "--configfile: Specify an alternative configuration file to read."
20892158 print " Default config file name: wikidump.conf"
20902159 print "--date: Rerun dump of a given date (probably unwise)"
@@ -2100,10 +2169,11 @@
21012170 print " (helpful if the previous files may have corrupt contents)"
21022171 print "--nospawn: Do not spawn a separate process in order to retrieve revision texts"
21032172 print "--restartfrom: Do all jobs after the one specified via --job, including that one"
 2173+ print "--log: Log progress messages and other output to logfile in addition to"
 2174+ print " the usual console output"
21042175
21052176 sys.exit(1)
21062177
2107 -
21082178 if __name__ == "__main__":
21092179 try:
21102180 date = None
@@ -2114,10 +2184,12 @@
21152185 spawn = True
21162186 restart = False
21172187 jobRequested = None
 2188+ enableLogging = False
 2189+ log = None
21182190
21192191 try:
21202192 (options, remainder) = getopt.gnu_getopt(sys.argv[1:], "",
2121 - ['date=', 'checkpoint=', 'job=', 'configfile=', 'force', 'noprefetch', 'nospawn', 'restartfrom'])
 2193+ ['date=', 'checkpoint=', 'job=', 'configfile=', 'force', 'noprefetch', 'nospawn', 'restartfrom', 'log'])
21222194 except:
21232195 usage("Unknown option specified")
21242196
@@ -2138,6 +2210,8 @@
21392211 jobRequested = val
21402212 elif opt == "--restartfrom":
21412213 restart = True
 2214+ elif opt == "--log":
 2215+ enableLogging = True
21422216
21432217 if jobRequested and (len(remainder) == 0):
21442218 usage("--job option requires the name of a wikidb to be specified")
@@ -2164,7 +2238,7 @@
21652239 wiki = findAndLockNextWiki(config)
21662240
21672241 if wiki:
2168 - runner = Runner(wiki, date, checkpoint, prefetch, spawn, jobRequested, restart)
 2242+ runner = Runner(wiki, date, checkpoint, prefetch, spawn, jobRequested, restart, enableLogging)
21692243 if (restart):
21702244 print "Running %s, restarting from job %s..." % (wiki.dbName, jobRequested)
21712245 elif (jobRequested):

Status & tagging log