Index: branches/ariel/xmldumps-backup/worker.py |
— | — | @@ -21,13 +21,11 @@ |
22 | 22 | |
23 | 23 | from os.path import dirname, exists, getsize, join, realpath |
24 | 24 | from subprocess import Popen, PIPE |
25 | | -#from WikiDump import FileUtils, DirUtils, MiscUtils, prettyTime, prettySize, shellEscape |
26 | 25 | from WikiDump import FileUtils, MiscUtils, TimeUtils |
27 | 26 | from CommandManagement import CommandPipeline, CommandSeries, CommandsInParallel |
28 | 27 | |
29 | | -# FIXME test this change. |
30 | 28 | def xmlEscape(text): |
31 | | - return text.replace("&", "&").replace("<", "<").replace(">", ">").replace('"', """); |
| 29 | + return text.replace("&", "&").replace("<", "<").replace(">", ">"); |
32 | 30 | |
33 | 31 | class Logger(object): |
34 | 32 | |
— | — | @@ -844,7 +842,7 @@ |
845 | 843 | |
846 | 844 | class Runner(object): |
847 | 845 | |
848 | | - def __init__(self, wiki, date=None, checkpoint=None, prefetch=True, spawn=True, job=None, restart=False, loggingEnabled=False): |
| 846 | + def __init__(self, wiki, date=None, prefetch=True, spawn=True, job=None, restart=False, loggingEnabled=False): |
849 | 847 | self.wiki = wiki |
850 | 848 | self.config = wiki.config |
851 | 849 | self.dbName = wiki.dbName |
— | — | @@ -864,8 +862,6 @@ |
865 | 863 | |
866 | 864 | self.lastFailed = False |
867 | 865 | |
868 | | - self.checkpoint = checkpoint |
869 | | - |
870 | 866 | self.jobRequested = job |
871 | 867 | self.dbServerInfo = DbServerInfo(self.wiki, self.dbName, self.logAndPrint) |
872 | 868 | |
— | — | @@ -1060,15 +1056,6 @@ |
1061 | 1057 | item.start(self) |
1062 | 1058 | self.status.updateStatusFiles() |
1063 | 1059 | self.dumpItemList.saveDumpRunInfoFile() |
1064 | | - # FIXME is this checkpoint stuff useful to us now? |
1065 | | - if self.checkpoint and not item.matchCheckpoint(self.checkpoint): |
1066 | | - self.debug("*** Skipping until we reach checkpoint...") |
1067 | | - item.setStatus("done") |
1068 | | - pass |
1069 | | - else: |
1070 | | - if self.checkpoint and item.matchCheckpoint(self.checkpoint): |
1071 | | - self.debug("*** Reached checkpoint!") |
1072 | | - self.checkpoint = None |
1073 | 1060 | try: |
1074 | 1061 | item.dump(self) |
1075 | 1062 | except Exception, ex: |
— | — | @@ -1134,22 +1121,14 @@ |
1135 | 1122 | except: |
1136 | 1123 | # failure? let it die |
1137 | 1124 | pass |
1138 | | - #####date -u > $StatusLockFile |
1139 | 1125 | |
1140 | 1126 | def unlock(self): |
1141 | 1127 | self.showRunnerState("Marking complete.") |
1142 | | - ######date -u > $StatusDoneFile |
1143 | 1128 | |
1144 | | - def dateStamp(self): |
1145 | | - #date -u --iso-8601=seconds |
1146 | | - pass |
1147 | | - |
1148 | 1129 | def showRunnerState(self, message): |
1149 | | - #echo $DatabaseName `dateStamp` OK: "$1" | tee -a $StatusLog | tee -a $GlobalLog |
1150 | 1130 | self.debug(message) |
1151 | 1131 | |
1152 | 1132 | def showRunnerStateComplete(self): |
1153 | | - # echo $DatabaseName `dateStamp` SUCCESS: "done." | tee -a $StatusLog | tee -a $GlobalLog |
1154 | 1133 | self.debug("SUCCESS: done.") |
1155 | 1134 | |
1156 | 1135 | def completeDump(self, files): |
— | — | @@ -1294,9 +1273,6 @@ |
1295 | 1274 | def waitAlarmHandler(self, signum, frame): |
1296 | 1275 | pass |
1297 | 1276 | |
1298 | | - def matchCheckpoint(self, checkpoint): |
1299 | | - return checkpoint == self.__class__.__name__ |
1300 | | - |
1301 | 1277 | def buildRecombineCommandString(self, runner, files, outputFileBasename, compressionCommand, uncompressionCommand, endHeaderMarker="</siteinfo>"): |
1302 | 1278 | outputFilename = runner.dumpDir.publicPath(outputFileBasename) |
1303 | 1279 | chunkNum = 0 |
— | — | @@ -1384,9 +1360,6 @@ |
1385 | 1361 | def listFiles(self, runner): |
1386 | 1362 | return [self._file()] |
1387 | 1363 | |
1388 | | - def matchCheckpoint(self, checkpoint): |
1389 | | - return checkpoint == self.__class__.__name__ + "." + self._table |
1390 | | - |
1391 | 1364 | class PrivateTable(PublicTable): |
1392 | 1365 | """Hidden table dumps for private data.""" |
1393 | 1366 | |
— | — | @@ -1859,9 +1832,6 @@ |
1860 | 1833 | else: |
1861 | 1834 | return [ self._file("bz2",0) ] |
1862 | 1835 | |
1863 | | - def matchCheckpoint(self, checkpoint): |
1864 | | - return checkpoint == self.__class__.__name__ + "." + self._subset |
1865 | | - |
1866 | 1836 | class RecombineXmlDump(XmlDump): |
1867 | 1837 | def __init__(self, subset, name, desc, detail, chunks = False): |
1868 | 1838 | # no prefetch, no spawn |
— | — | @@ -1993,9 +1963,6 @@ |
1994 | 1964 | def getCommandOutputCallback(self, line): |
1995 | 1965 | self._output = line |
1996 | 1966 | |
1997 | | - def matchCheckpoint(self, checkpoint): |
1998 | | - return checkpoint == self.__class__.__name__ + "." + self._subset |
1999 | | - |
2000 | 1967 | class RecombineXmlRecompressDump(XmlRecompressDump): |
2001 | 1968 | def __init__(self, subset, name, desc, detail, chunks): |
2002 | 1969 | XmlRecompressDump.__init__(self, subset, name, desc, detail, chunks) |
— | — | @@ -2193,11 +2160,10 @@ |
2194 | 2161 | if message: |
2195 | 2162 | print message |
2196 | 2163 | print "Usage: python worker.py [options] [wikidbname]" |
2197 | | - print "Options: --configfile, --date, --checkpoint, --job, --force, --noprefetch, --nospawn, --restartfrom, --log" |
| 2164 | + print "Options: --configfile, --date, --job, --force, --noprefetch, --nospawn, --restartfrom, --log" |
2198 | 2165 | print "--configfile: Specify an alternative configuration file to read." |
2199 | 2166 | print " Default config file name: wikidump.conf" |
2200 | 2167 | print "--date: Rerun dump of a given date (probably unwise)" |
2201 | | - print "--checkpoint: Run just the specified step (deprecated)" |
2202 | 2168 | print "--job: Run just the specified step or set of steps; for the list," |
2203 | 2169 | print " give the option --job help" |
2204 | 2170 | print " This option requires specifiying a wikidbname on which to run." |
— | — | @@ -2218,7 +2184,6 @@ |
2219 | 2185 | if __name__ == "__main__": |
2220 | 2186 | try: |
2221 | 2187 | date = None |
2222 | | - checkpoint = None |
2223 | 2188 | configFile = False |
2224 | 2189 | forceLock = False |
2225 | 2190 | prefetch = True |
— | — | @@ -2230,15 +2195,13 @@ |
2231 | 2196 | |
2232 | 2197 | try: |
2233 | 2198 | (options, remainder) = getopt.gnu_getopt(sys.argv[1:], "", |
2234 | | - ['date=', 'checkpoint=', 'job=', 'configfile=', 'force', 'noprefetch', 'nospawn', 'restartfrom', 'log']) |
| 2199 | + ['date=', 'job=', 'configfile=', 'force', 'noprefetch', 'nospawn', 'restartfrom', 'log']) |
2235 | 2200 | except: |
2236 | 2201 | usage("Unknown option specified") |
2237 | 2202 | |
2238 | 2203 | for (opt, val) in options: |
2239 | 2204 | if opt == "--date": |
2240 | 2205 | date = val |
2241 | | - elif opt == "--checkpoint": |
2242 | | - checkpoint = val |
2243 | 2206 | elif opt == "--configfile": |
2244 | 2207 | configFile = val |
2245 | 2208 | elif opt == "--force": |
— | — | @@ -2279,7 +2242,7 @@ |
2280 | 2243 | wiki = findAndLockNextWiki(config) |
2281 | 2244 | |
2282 | 2245 | if wiki: |
2283 | | - runner = Runner(wiki, date, checkpoint, prefetch, spawn, jobRequested, restart, enableLogging) |
| 2246 | + runner = Runner(wiki, date, prefetch, spawn, jobRequested, restart, enableLogging) |
2284 | 2247 | if (restart): |
2285 | 2248 | print "Running %s, restarting from job %s..." % (wiki.dbName, jobRequested) |
2286 | 2249 | elif (jobRequested): |