Index: branches/ariel/xmldumps-backup/WikiDump.py |
— | — | @@ -104,6 +104,7 @@ |
105 | 105 | "index": "index.html", |
106 | 106 | "templatedir": home, |
107 | 107 | "perdumpindex": "index.html", |
| 108 | + "logfile": "dumplog.txt", |
108 | 109 | #"reporting": { |
109 | 110 | "adminmail": "root@localhost", |
110 | 111 | "mailfrom": "root@localhost", |
— | — | @@ -179,7 +180,8 @@ |
180 | 181 | self.index = conf.get("output", "index") |
181 | 182 | self.templateDir = conf.get("output", "templateDir") |
182 | 183 | self.perDumpIndex = conf.get("output", "perdumpindex") |
183 | | - |
| 184 | + self.logFile = conf.get("output", "logfile") |
| 185 | + |
184 | 186 | self.adminMail = conf.get("reporting", "adminmail") |
185 | 187 | self.mailFrom = conf.get("reporting", "mailfrom") |
186 | 188 | self.smtpServer = conf.get("reporting", "smtpserver") |
Index: branches/ariel/xmldumps-backup/worker.py |
— | — | @@ -16,6 +16,8 @@ |
17 | 17 | import glob |
18 | 18 | import WikiDump |
19 | 19 | import CommandManagement |
| 20 | +import Queue |
| 21 | +import thread |
20 | 22 | |
21 | 23 | from os.path import dirname, exists, getsize, join, realpath |
22 | 24 | from subprocess import Popen, PIPE |
— | — | @@ -48,6 +50,43 @@ |
49 | 51 | def xmlEscape(text): |
50 | 52 | return text.replace("&", "&").replace("<", "<").replace(">", ">") |
51 | 53 | |
| 54 | +class Logger(object): |
| 55 | + |
| 56 | + def __init__(self, logFileName=None): |
| 57 | + if (logFileName): |
| 58 | + self.logFile = open(logFileName, "a") |
| 59 | + else: |
| 60 | + self.logFile = None |
| 61 | + self.queue = Queue.Queue() |
| 62 | + self.JobsDone = "JOBSDONE" |
| 63 | + |
| 64 | + def logWrite(self, line=None): |
| 65 | + if (self.logFile): |
| 66 | + self.logFile.write(line) |
| 67 | + self.logFile.flush() |
| 68 | + |
| 69 | + def logClose(self): |
| 70 | + if (logfile): |
| 71 | + self.logFile.close() |
| 72 | + |
| 73 | + # return 1 if logging terminated, 0 otherwise |
| 74 | + def doJobOnLogQueue(self): |
| 75 | + line = self.queue.get() |
| 76 | + if (line == self.JobsDone): |
| 77 | + self.logClose() |
| 78 | + return 1 |
| 79 | + else: |
| 80 | + self.logWrite(line) |
| 81 | + return 0 |
| 82 | + |
| 83 | + def addToLogQueue(self,line=None): |
| 84 | + if (line): |
| 85 | + self.queue.put_nowait(line) |
| 86 | + |
| 87 | + # set in order to have logging thread clean up and exit |
| 88 | + def indicateJobsDone(self): |
| 89 | + self.queue.put_nowait(self.JobsDone) |
| 90 | + |
52 | 91 | # so if the pages/revsPerChunkAbstract/History are just one number it means |
53 | 92 | # use that number for all the chunks, figure out yourself how many. |
54 | 93 | # otherwise we get passed alist that says "here's now many for each chunk and it's this many chunks. |
— | — | @@ -554,7 +593,7 @@ |
555 | 594 | |
556 | 595 | class Runner(object): |
557 | 596 | |
558 | | - def __init__(self, wiki, date=None, checkpoint=None, prefetch=True, spawn=True, job=None, restart=False): |
| 597 | + def __init__(self, wiki, date=None, checkpoint=None, prefetch=True, spawn=True, job=None, restart=False, loggingEnabled=False): |
559 | 598 | self.wiki = wiki |
560 | 599 | self.config = wiki.config |
561 | 600 | self.dbName = wiki.dbName |
— | — | @@ -562,6 +601,8 @@ |
563 | 602 | self.spawn = spawn |
564 | 603 | self.chunkInfo = Chunk(wiki, self.dbName) |
565 | 604 | self.restart = restart |
| 605 | + self.loggingEnabled = loggingEnabled |
| 606 | + self.log = None |
566 | 607 | |
567 | 608 | if date: |
568 | 609 | # Override, continuing a past dump? |
— | — | @@ -577,10 +618,32 @@ |
578 | 619 | |
579 | 620 | self.jobRequested = job |
580 | 621 | self.dumpDir = DumpDir(self.wiki, self.dbName, self.date) |
| 622 | + |
| 623 | + # this must come after the dumpdir setup so we know which directory we are in |
| 624 | + # for the log file. |
| 625 | + if (loggingEnabled): |
| 626 | + self.logFileName = self.dumpDir.publicPath(config.logFile) |
| 627 | + self.makeDir(join(self.wiki.publicDir(), self.date)) |
| 628 | + self.log = Logger(self.logFileName) |
| 629 | + thread.start_new_thread(self.logQueueReader,(self.log,)) |
| 630 | + |
581 | 631 | self.checksums = Checksummer(self.wiki, self.dumpDir) |
| 632 | + |
582 | 633 | # some or all of these dumpItems will be marked to run |
583 | 634 | self.dumpItemList = DumpItemList(self.wiki, self.prefetch, self.spawn, self.date, self.chunkInfo); |
584 | 635 | |
| 636 | + def logQueueReader(self,log): |
| 637 | + if not log: |
| 638 | + return |
| 639 | + done = False |
| 640 | + while not done: |
| 641 | + done = log.doJobOnLogQueue() |
| 642 | + |
| 643 | + def logAndPrint(self, message): |
| 644 | + if (self.log): |
| 645 | + self.log.addToLogQueue("%s\n" % message) |
| 646 | + print message |
| 647 | + |
585 | 648 | def passwordOption(self): |
586 | 649 | """If you pass '-pfoo' mysql uses the password 'foo', |
587 | 650 | but if you pass '-p' it prompts. Sigh.""" |
— | — | @@ -676,6 +739,7 @@ |
677 | 740 | errorString = "Error from command(s): " |
678 | 741 | for cmd in problemCommands: |
679 | 742 | errorString = errorString + "%s " % cmd |
| 743 | + self.logAndPrint(errorString) |
680 | 744 | raise BackupError(errorString) |
681 | 745 | return 1 |
682 | 746 | |
— | — | @@ -702,12 +766,14 @@ |
703 | 767 | output = proc.fromchild.read() |
704 | 768 | retval = proc.wait() |
705 | 769 | if retval: |
| 770 | + self.logAndPrint("Non-zero return code from '%s'" % command) |
706 | 771 | raise BackupError("Non-zero return code from '%s'" % command) |
707 | 772 | else: |
708 | 773 | return output |
709 | 774 | |
710 | 775 | def debug(self, stuff): |
711 | | - print "%s: %s %s" % (prettyTime(), self.dbName, stuff) |
| 776 | + self.logAndPrint("%s: %s %s" % (prettyTime(), self.dbName, stuff)) |
| 777 | +# print "%s: %s %s" % (prettyTime(), self.dbName, stuff) |
712 | 778 | |
713 | 779 | def makeDir(self, dir): |
714 | 780 | if exists(dir): |
— | — | @@ -775,9 +841,9 @@ |
776 | 842 | self.makeDir(join(self.wiki.privateDir(), self.date)) |
777 | 843 | |
778 | 844 | if (self.restart): |
779 | | - print "Preparing for restart from job %s of %s" % (self.jobRequested, self.dbName) |
| 845 | + self.logAndPrint("Preparing for restart from job %s of %s" % (self.jobRequested, self.dbName)) |
780 | 846 | elif (self.jobRequested): |
781 | | - print "Preparing for job %s of %s" % (self.jobRequested, self.dbName) |
| 847 | + self.logAndPrint("Preparing for job %s of %s" % (self.jobRequested, self.dbName)) |
782 | 848 | else: |
783 | 849 | self.showRunnerState("Cleaning up old dumps for %s" % self.dbName) |
784 | 850 | self.cleanOldDumps() |
— | — | @@ -901,7 +967,7 @@ |
902 | 968 | # Short line for report extraction goes here |
903 | 969 | self.wiki.writeStatus(self.reportDatabaseStatusSummary(items, done)) |
904 | 970 | except: |
905 | | - print "Couldn't update status files. Continuing anyways" |
| 971 | + self.logAndPrint("Couldn't update status files. Continuing anyways") |
906 | 972 | |
907 | 973 | def updateStatusFiles(self, done=False): |
908 | 974 | self.saveStatusSummaryAndDetail(self.dumpItemList.dumpItems, done) |
— | — | @@ -1059,6 +1125,7 @@ |
1060 | 1126 | self.debug("Removing old symlink %s" % link) |
1061 | 1127 | os.remove(link) |
1062 | 1128 | else: |
| 1129 | + self.logAndPrint("What the hell dude, %s is not a symlink" % link) |
1063 | 1130 | raise BackupError("What the hell dude, %s is not a symlink" % link) |
1064 | 1131 | relative = relativePath(real, dirname(link)) |
1065 | 1132 | if exists(real): |
— | — | @@ -1148,6 +1215,8 @@ |
1149 | 1216 | """Receive a status line from a shellout and update the status files.""" |
1150 | 1217 | # pass through... |
1151 | 1218 | if (line): |
| 1219 | + if (runner.log): |
| 1220 | + runner.log.addToLogQueue(line) |
1152 | 1221 | sys.stderr.write(line) |
1153 | 1222 | self.progress = line.strip() |
1154 | 1223 | runner.updateStatusFiles() |
— | — | @@ -2083,7 +2152,7 @@ |
2084 | 2153 | if message: |
2085 | 2154 | print message |
2086 | 2155 | print "Usage: python worker.py [options] [wikidbname]" |
2087 | | - print "Options: --configfile, --date, --checkpoint, --job, --force, --noprefetch, --nospawn, --restartfrom" |
| 2156 | + print "Options: --configfile, --date, --checkpoint, --job, --force, --noprefetch, --nospawn, --restartfrom, --log" |
2088 | 2157 | print "--configfile: Specify an alternative configuration file to read." |
2089 | 2158 | print " Default config file name: wikidump.conf" |
2090 | 2159 | print "--date: Rerun dump of a given date (probably unwise)" |
— | — | @@ -2100,10 +2169,11 @@ |
2101 | 2170 | print " (helpful if the previous files may have corrupt contents)" |
2102 | 2171 | print "--nospawn: Do not spawn a separate process in order to retrieve revision texts" |
2103 | 2172 | print "--restartfrom: Do all jobs after the one specified via --job, including that one" |
| 2173 | + print "--log: Log progress messages and other output to logfile in addition to" |
| 2174 | + print " the usual console output" |
2104 | 2175 | |
2105 | 2176 | sys.exit(1) |
2106 | 2177 | |
2107 | | - |
2108 | 2178 | if __name__ == "__main__": |
2109 | 2179 | try: |
2110 | 2180 | date = None |
— | — | @@ -2114,10 +2184,12 @@ |
2115 | 2185 | spawn = True |
2116 | 2186 | restart = False |
2117 | 2187 | jobRequested = None |
| 2188 | + enableLogging = False |
| 2189 | + log = None |
2118 | 2190 | |
2119 | 2191 | try: |
2120 | 2192 | (options, remainder) = getopt.gnu_getopt(sys.argv[1:], "", |
2121 | | - ['date=', 'checkpoint=', 'job=', 'configfile=', 'force', 'noprefetch', 'nospawn', 'restartfrom']) |
| 2193 | + ['date=', 'checkpoint=', 'job=', 'configfile=', 'force', 'noprefetch', 'nospawn', 'restartfrom', 'log']) |
2122 | 2194 | except: |
2123 | 2195 | usage("Unknown option specified") |
2124 | 2196 | |
— | — | @@ -2138,6 +2210,8 @@ |
2139 | 2211 | jobRequested = val |
2140 | 2212 | elif opt == "--restartfrom": |
2141 | 2213 | restart = True |
| 2214 | + elif opt == "--log": |
| 2215 | + enableLogging = True |
2142 | 2216 | |
2143 | 2217 | if jobRequested and (len(remainder) == 0): |
2144 | 2218 | usage("--job option requires the name of a wikidb to be specified") |
— | — | @@ -2164,7 +2238,7 @@ |
2165 | 2239 | wiki = findAndLockNextWiki(config) |
2166 | 2240 | |
2167 | 2241 | if wiki: |
2168 | | - runner = Runner(wiki, date, checkpoint, prefetch, spawn, jobRequested, restart) |
| 2242 | + runner = Runner(wiki, date, checkpoint, prefetch, spawn, jobRequested, restart, enableLogging) |
2169 | 2243 | if (restart): |
2170 | 2244 | print "Running %s, restarting from job %s..." % (wiki.dbName, jobRequested) |
2171 | 2245 | elif (jobRequested): |