Index: branches/ariel/xmldumps-backup/worker.py |
— | — | @@ -773,7 +773,6 @@ |
774 | 774 | # why? we might be rerunning a job from an older dumps. we might have two |
775 | 775 | # runs going at once (think en pedia, one finishing up the history, another |
776 | 776 | # starting at the beginning to get the new abstracts and stubs). |
777 | | - |
778 | 777 | try: |
779 | 778 | dumpsInOrder = self.wiki.latestDump(all=True) |
780 | 779 | meIndex = dumpsInOrder.index(self.date) |
— | — | @@ -845,7 +844,7 @@ |
846 | 845 | |
847 | 846 | class Runner(object): |
848 | 847 | |
849 | | - def __init__(self, wiki, date=None, prefetch=True, spawn=True, job=None, restart=False, notice="", loggingEnabled=False): |
| 848 | + def __init__(self, wiki, date=None, prefetch=True, spawn=True, job=None, restart=False, notice="", dryrun = False, loggingEnabled=False): |
850 | 849 | self.wiki = wiki |
851 | 850 | self.config = wiki.config |
852 | 851 | self.dbName = wiki.dbName |
— | — | @@ -856,6 +855,7 @@ |
857 | 856 | self.loggingEnabled = loggingEnabled |
858 | 857 | self.htmlNotice = notice |
859 | 858 | self.log = None |
| 859 | + self.dryrun = dryrun |
860 | 860 | |
861 | 861 | if date: |
862 | 862 | # Override, continuing a past dump? |
— | — | @@ -873,18 +873,20 @@ |
874 | 874 | |
875 | 875 | # this must come after the dumpdir setup so we know which directory we are in |
876 | 876 | # for the log file. |
877 | | - if (loggingEnabled): |
| 877 | + if (loggingEnabled and not self.dryrun): |
878 | 878 | self.logFileName = self.dumpDir.publicPath(config.logFile) |
879 | 879 | self.makeDir(join(self.wiki.publicDir(), self.date)) |
880 | 880 | self.log = Logger(self.logFileName) |
881 | 881 | thread.start_new_thread(self.logQueueReader,(self.log,)) |
882 | 882 | |
883 | | - self.checksums = Checksummer(self.wiki, self.dumpDir) |
| 883 | + if not dryrun: |
| 884 | + self.checksums = Checksummer(self.wiki, self.dumpDir) |
884 | 885 | |
885 | 886 | # some or all of these dumpItems will be marked to run |
886 | 887 | self.dumpItemList = DumpItemList(self.wiki, self.prefetch, self.spawn, self.date, self.chunkInfo); |
887 | 888 | |
888 | | - self.status = Status(self.wiki, self.dumpDir, self.date, self.dumpItemList.dumpItems, self.checksums, self.htmlNotice, self.logAndPrint) |
| 889 | + if not self.dryrun: |
| 890 | + self.status = Status(self.wiki, self.dumpDir, self.date, self.dumpItemList.dumpItems, self.checksums, self.htmlNotice, self.logAndPrint) |
889 | 891 | |
890 | 892 | def logQueueReader(self,log): |
891 | 893 | if not log: |
— | — | @@ -894,7 +896,7 @@ |
895 | 897 | done = log.doJobOnLogQueue() |
896 | 898 | |
897 | 899 | def logAndPrint(self, message): |
898 | | - if hasattr(self,'log') and self.log: |
| 900 | + if hasattr(self,'log') and self.log and not self.dryrun: |
899 | 901 | self.log.addToLogQueue("%s\n" % message) |
900 | 902 | print message |
901 | 903 | |
— | — | @@ -904,6 +906,10 @@ |
905 | 907 | else: |
906 | 908 | return "" |
907 | 909 | |
| 910 | + def remove(self, filename): |
| 911 | + if not self.dryrun: |
| 912 | + os.remove(filename) |
| 913 | + |
908 | 914 | # returns 0 on success, 1 on error |
909 | 915 | def saveTable(self, table, outfile): |
910 | 916 | """Dump a table from the current DB with mysqldump, save to a gzipped sql file.""" |
— | — | @@ -920,8 +926,21 @@ |
921 | 927 | """For one pipeline of commands, redirect output to a given file.""" |
922 | 928 | commands[-1].extend( [ ">" , outfile ] ) |
923 | 929 | series = [ commands ] |
924 | | - return self.runCommand([ series ], callbackTimed = self.status.updateStatusFiles) |
| 930 | + if (self.dryrun): |
| 931 | + self.prettyPrintCommands([ series ]) |
| 932 | + return 0 |
| 933 | + else: |
| 934 | + return self.runCommand([ series ], callbackTimed = self.status.updateStatusFiles) |
925 | 935 | |
| 936 | + def prettyPrintCommands(self, commandSeriesList): |
| 937 | + for series in commandSeriesList: |
| 938 | + for pipeline in series: |
| 939 | + commandStrings = [] |
| 940 | + for command in pipeline: |
| 941 | + commandStrings.append(" ".join(command)) |
| 942 | + pipelineString = " | ".join(commandStrings) |
| 943 | + print "Command to run: ", pipelineString |
| 944 | + |
926 | 945 | # command series list: list of (commands plus args) is one pipeline. list of pipelines = 1 series. |
927 | 946 | # this function wants a list of series. |
928 | 947 | # be a list (the command name and the various args) |
— | — | @@ -939,18 +958,23 @@ |
940 | 959 | This function spawns multiple series of pipelines in parallel. |
941 | 960 | |
942 | 961 | """ |
943 | | - commands = CommandsInParallel(commandSeriesList, callbackStderr=callbackStderr, callbackStderrArg=callbackStderrArg, callbackTimed=callbackTimed, callbackTimedArg=callbackTimedArg, shell=shell, callbackInterval=callbackInterval) |
944 | | - commands.runCommands() |
945 | | - if commands.exitedSuccessfully(): |
| 962 | + if self.dryrun: |
| 963 | + self.prettyPrintCommands(commandSeriesList) |
946 | 964 | return 0 |
| 965 | + |
947 | 966 | else: |
948 | | - problemCommands = commands.commandsWithErrors() |
949 | | - errorString = "Error from command(s): " |
950 | | - for cmd in problemCommands: |
951 | | - errorString = errorString + "%s " % cmd |
952 | | - self.logAndPrint(errorString) |
953 | | -# raise BackupError(errorString) |
954 | | - return 1 |
| 967 | + commands = CommandsInParallel(commandSeriesList, callbackStderr=callbackStderr, callbackStderrArg=callbackStderrArg, callbackTimed=callbackTimed, callbackTimedArg=callbackTimedArg, shell=shell, callbackInterval=callbackInterval) |
| 968 | + commands.runCommands() |
| 969 | + if commands.exitedSuccessfully(): |
| 970 | + return 0 |
| 971 | + else: |
| 972 | + problemCommands = commands.commandsWithErrors() |
| 973 | + errorString = "Error from command(s): " |
| 974 | + for cmd in problemCommands: |
| 975 | + errorString = errorString + "%s " % cmd |
| 976 | + self.logAndPrint(errorString) |
| 977 | + # raise BackupError(errorString) |
| 978 | + return 1 |
955 | 979 | |
956 | 980 | def debug(self, stuff): |
957 | 981 | self.logAndPrint("%s: %s %s" % (TimeUtils.prettyTime(), self.dbName, stuff)) |
— | — | @@ -1003,8 +1027,9 @@ |
1004 | 1028 | # mark all the following jobs to run as well |
1005 | 1029 | self.dumpItemList.markFollowingJobsToRun() |
1006 | 1030 | |
1007 | | - self.makeDir(join(self.wiki.publicDir(), self.date)) |
1008 | | - self.makeDir(join(self.wiki.privateDir(), self.date)) |
| 1031 | + if not self.dryrun: |
| 1032 | + self.makeDir(join(self.wiki.publicDir(), self.date)) |
| 1033 | + self.makeDir(join(self.wiki.privateDir(), self.date)) |
1009 | 1034 | |
1010 | 1035 | if (self.restart): |
1011 | 1036 | self.logAndPrint("Preparing for restart from job %s of %s" % (self.jobRequested, self.dbName)) |
— | — | @@ -1018,35 +1043,38 @@ |
1019 | 1044 | files = self.listFilesFor(self.dumpItemList.dumpItems) |
1020 | 1045 | |
1021 | 1046 | if (self.jobRequested): |
1022 | | - self.checksums.prepareChecksums() |
| 1047 | + if not self.dryrun: |
| 1048 | + self.checksums.prepareChecksums() |
1023 | 1049 | |
1024 | 1050 | for item in self.dumpItemList.dumpItems: |
1025 | 1051 | if (item.toBeRun()): |
1026 | 1052 | item.start(self) |
1027 | | - self.status.updateStatusFiles() |
1028 | | - self.dumpItemList.saveDumpRunInfoFile() |
| 1053 | + if not self.dryrun: |
| 1054 | + self.status.updateStatusFiles() |
| 1055 | + self.dumpItemList.saveDumpRunInfoFile() |
1029 | 1056 | try: |
1030 | 1057 | item.dump(self) |
1031 | 1058 | except Exception, ex: |
1032 | 1059 | self.debug("*** exception! " + str(ex)) |
1033 | 1060 | item.setStatus("failed") |
1034 | | - if item.status() == "failed": |
| 1061 | + if item.status() == "failed" and not self.dryrun: |
1035 | 1062 | self.runHandleFailure() |
1036 | 1063 | else: |
1037 | 1064 | self.lastFailed = False |
1038 | 1065 | # this ensures that, previous run or new one, the old or new md5sums go to the file |
1039 | | - if item.status() == "done": |
| 1066 | + if item.status() == "done" and not self.dryrun: |
1040 | 1067 | self.runUpdateItemFileInfo(item) |
1041 | 1068 | |
1042 | | - if (self.dumpItemList.allPossibleJobsDone()): |
1043 | | - self.status.updateStatusFiles("done") |
1044 | | - else: |
1045 | | - self.status.updateStatusFiles("partialdone") |
1046 | | - self.dumpItemList.saveDumpRunInfoFile() |
| 1069 | + if not self.dryrun: |
| 1070 | + if (self.dumpItemList.allPossibleJobsDone()): |
| 1071 | + self.status.updateStatusFiles("done") |
| 1072 | + else: |
| 1073 | + self.status.updateStatusFiles("partialdone") |
| 1074 | + self.dumpItemList.saveDumpRunInfoFile() |
1047 | 1075 | |
1048 | | - # if any job succeeds we might as well make the sym link |
1049 | | - if (self.status.failCount < 1): |
1050 | | - self.completeDump(files) |
| 1076 | + # if any job succeeds we might as well make the sym link |
| 1077 | + if (self.status.failCount < 1): |
| 1078 | + self.completeDump(files) |
1051 | 1079 | |
1052 | 1080 | if (self.restart): |
1053 | 1081 | self.showRunnerState("Completed run restarting from job %s for %s" % (self.jobRequested, self.dbName)) |
— | — | @@ -1054,28 +1082,31 @@ |
1055 | 1083 | self.showRunnerState("Completed job %s for %s" % (self.jobRequested, self.dbName)) |
1056 | 1084 | |
1057 | 1085 | else: |
1058 | | - self.checksums.prepareChecksums() |
| 1086 | + if not self.dryrun: |
| 1087 | + self.checksums.prepareChecksums() |
1059 | 1088 | |
1060 | 1089 | for item in self.dumpItemList.dumpItems: |
1061 | 1090 | item.start(self) |
1062 | | - self.status.updateStatusFiles() |
1063 | | - self.dumpItemList.saveDumpRunInfoFile() |
| 1091 | + if not self.dryrun: |
| 1092 | + self.status.updateStatusFiles() |
| 1093 | + self.dumpItemList.saveDumpRunInfoFile() |
1064 | 1094 | try: |
1065 | 1095 | item.dump(self) |
1066 | 1096 | except Exception, ex: |
1067 | 1097 | self.debug("*** exception! " + str(ex)) |
1068 | 1098 | item.setStatus("failed") |
1069 | | - if item.status() == "failed": |
| 1099 | + if item.status() == "failed" and not self.dryrun: |
1070 | 1100 | self.runHandleFailure() |
1071 | 1101 | else: |
1072 | | - self.runUpdateItemFileInfo(item) |
| 1102 | + if not self.dryrun: |
| 1103 | + self.runUpdateItemFileInfo(item) |
1073 | 1104 | self.lastFailed = False |
1074 | 1105 | |
1075 | | - self.status.updateStatusFiles("done") |
1076 | | - self.dumpItemList.saveDumpRunInfoFile() |
1077 | | - |
1078 | | - if self.status.failCount < 1: |
1079 | | - self.completeDump(files) |
| 1106 | + if not self.dryrun: |
| 1107 | + self.status.updateStatusFiles("done") |
| 1108 | + self.dumpItemList.saveDumpRunInfoFile() |
| 1109 | + if self.status.failCount < 1: |
| 1110 | + self.completeDump(files) |
1080 | 1111 | |
1081 | 1112 | self.showRunnerStateComplete() |
1082 | 1113 | |
— | — | @@ -1092,8 +1123,9 @@ |
1093 | 1124 | if old: |
1094 | 1125 | for dump in old: |
1095 | 1126 | self.showRunnerState("Purging old dump %s for %s" % (dump, self.dbName)) |
1096 | | - base = os.path.join(self.wiki.publicDir(), dump) |
1097 | | - shutil.rmtree("%s" % base) |
| 1127 | + if not self.dryrun: |
| 1128 | + base = os.path.join(self.wiki.publicDir(), dump) |
| 1129 | + shutil.rmtree("%s" % base) |
1098 | 1130 | else: |
1099 | 1131 | self.showRunnerState("No old dumps to purge.") |
1100 | 1132 | |
— | — | @@ -1278,7 +1310,8 @@ |
1279 | 1311 | pass |
1280 | 1312 | |
1281 | 1313 | def buildRecombineCommandString(self, runner, files, outputFileBasename, compressionCommand, uncompressionCommand, endHeaderMarker="</siteinfo>"): |
1282 | | - outputFilename = runner.dumpDir.publicPath(outputFileBasename) |
| 1314 | +# outputFilename = self.buildOutputFilename(runner, outputFileBasename) |
| 1315 | + outputFilename = runner.dumpDir.publicPath(outputFileBasename) |
1283 | 1316 | chunkNum = 0 |
1284 | 1317 | recombines = [] |
1285 | 1318 | head = runner.config.head |
— | — | @@ -1311,14 +1344,12 @@ |
1312 | 1345 | # warning: we figure any header (<siteinfo>...</siteinfo>) is going to be less than 2000 lines! |
1313 | 1346 | pipeline.append([ head, "-2000"]) |
1314 | 1347 | pipeline.append([ grep, "-n", endHeaderMarker ]) |
1315 | | - # without sheell |
| 1348 | + # without shell |
1316 | 1349 | p = CommandPipeline(pipeline, quiet=True) |
1317 | 1350 | p.runPipelineAndGetOutput() |
1318 | 1351 | if (p.output()): |
1319 | 1352 | (headerEndNum, junk) = p.output().split(":",1) |
1320 | 1353 | # get headerEndNum |
1321 | | - if exists(outputFilename): |
1322 | | - os.remove(outputFilename) |
1323 | 1354 | recombine = " ".join(uncompressThisFile) |
1324 | 1355 | headerEndNum = int(headerEndNum) + 1 |
1325 | 1356 | if (chunkNum == 1): |
— | — | @@ -1335,7 +1366,14 @@ |
1336 | 1367 | recombineCommandString = "(" + ";".join(recombines) + ")" + "|" + "%s %s" % (compressionCommand, outputFilename) |
1337 | 1368 | return(recombineCommandString) |
1338 | 1369 | |
| 1370 | + def cleanupOldFiles(self, runner, outputFileBasename): |
| 1371 | + outputFilename = self.buildOutputFilename(runner, outputFileBasename) |
| 1372 | + if exists(outputFilename): |
| 1373 | + runner.remove(outputFilename) |
1339 | 1374 | |
| 1375 | + def buildOutputFilename(self, runner, outputFileBasename): |
| 1376 | + return outputFilename |
| 1377 | + |
1340 | 1378 | class PublicTable(Dump): |
1341 | 1379 | """Dump of a table using MySQL's mysqldump utility.""" |
1342 | 1380 | |
— | — | @@ -1404,51 +1442,83 @@ |
1405 | 1443 | "stub-articles.xml.gz"] |
1406 | 1444 | |
1407 | 1445 | def buildCommand(self, runner, chunk = 0): |
| 1446 | + history = self.buildHistoryOutputFilename(runner, chunk) |
| 1447 | + current = self.buildCurrentOutputFilename(runner, chunk) |
| 1448 | + articles = self.buildArticlesOutputFilename(runner, chunk) |
| 1449 | + |
| 1450 | + command = [ "%s" % runner.config.php, |
| 1451 | + "-q", "%s/maintenance/dumpBackup.php" % runner.config.wikiDir, |
| 1452 | + "--wiki=%s" % runner.dbName, |
| 1453 | + "--full", "--stub", "--report=10000", |
| 1454 | + "%s" % runner.forceNormalOption(), |
| 1455 | + "--output=gzip:%s" % history, |
| 1456 | + "--output=gzip:%s" % current, |
| 1457 | + "--filter=latest", "--output=gzip:%s" % articles, |
| 1458 | + "--filter=latest", "--filter=notalk", "--filter=namespace:!NS_USER" ] |
1408 | 1459 | if (chunk): |
| 1460 | + # set up start end end pageids for this piece |
| 1461 | + # note there is no page id 0 I guess. so we start with 1 |
| 1462 | + # start = runner.pagesPerChunk()*(chunk-1) + 1 |
| 1463 | + start = sum([ self._chunks[i] for i in range(0,chunk-1)]) + 1 |
| 1464 | + startopt = "--start=%s" % start |
| 1465 | + # if we are on the last chunk, we should get up to the last pageid, |
| 1466 | + # whatever that is. |
| 1467 | + command.append(startopt) |
| 1468 | + if chunk < len(self._chunks): |
| 1469 | + # end = start + runner.pagesPerChunk() |
| 1470 | + end = sum([ self._chunks[i] for i in range(0,chunk)]) +1 |
| 1471 | + endopt = "--end=%s" % end |
| 1472 | + command.append(endopt) |
| 1473 | + |
| 1474 | + pipeline = [ command ] |
| 1475 | + series = [ pipeline ] |
| 1476 | + return(series) |
| 1477 | + |
| 1478 | + def cleanupOldFiles(self, runner, chunk = 0): |
| 1479 | + fileList = self.buildOutputFilenames(runner, chunk) |
| 1480 | + for filename in fileList: |
| 1481 | + if exists(filename): |
| 1482 | + runner.remove(filename) |
| 1483 | + |
| 1484 | + def buildHistoryOutputFilename(self, runner, chunk = 0): |
| 1485 | + if (chunk): |
1409 | 1486 | chunkinfo = "%s" % chunk |
1410 | 1487 | else: |
1411 | 1488 | chunkinfo = "" |
1412 | 1489 | history = runner.dumpDir.publicPath("stub-meta-history" + chunkinfo + ".xml.gz") |
| 1490 | + return history |
| 1491 | + |
| 1492 | + def buildCurrentOutputFilename(self, runner, chunk = 0): |
| 1493 | + if (chunk): |
| 1494 | + chunkinfo = "%s" % chunk |
| 1495 | + else: |
| 1496 | + chunkinfo = "" |
1413 | 1497 | current = runner.dumpDir.publicPath("stub-meta-current" + chunkinfo + ".xml.gz") |
| 1498 | + return current |
| 1499 | + |
| 1500 | + def buildArticlesOutputFilename(self, runner, chunk = 0): |
| 1501 | + if (chunk): |
| 1502 | + chunkinfo = "%s" % chunk |
| 1503 | + else: |
| 1504 | + chunkinfo = "" |
1414 | 1505 | articles = runner.dumpDir.publicPath("stub-articles" + chunkinfo + ".xml.gz") |
1415 | | - for filename in (history, current, articles): |
1416 | | - if exists(filename): |
1417 | | - os.remove(filename) |
1418 | | - command = [ "%s" % runner.config.php, |
1419 | | - "-q", "%s/maintenance/dumpBackup.php" % runner.config.wikiDir, |
1420 | | - "--wiki=%s" % runner.dbName, |
1421 | | - "--full", "--stub", "--report=10000", |
1422 | | - "%s" % runner.forceNormalOption(), |
1423 | | - "--output=gzip:%s" % history, |
1424 | | - "--output=gzip:%s" % current, |
1425 | | - "--filter=latest", "--output=gzip:%s" % articles, |
1426 | | - "--filter=latest", "--filter=notalk", "--filter=namespace:!NS_USER" ] |
1427 | | - if (chunk): |
1428 | | - # set up start end end pageids for this piece |
1429 | | - # note there is no page id 0 I guess. so we start with 1 |
1430 | | - # start = runner.pagesPerChunk()*(chunk-1) + 1 |
1431 | | - start = sum([ self._chunks[i] for i in range(0,chunk-1)]) + 1 |
1432 | | - startopt = "--start=%s" % start |
1433 | | - # if we are on the last chunk, we should get up to the last pageid, |
1434 | | - # whatever that is. |
1435 | | - command.append(startopt) |
1436 | | - if chunk < len(self._chunks): |
1437 | | - # end = start + runner.pagesPerChunk() |
1438 | | - end = sum([ self._chunks[i] for i in range(0,chunk)]) +1 |
1439 | | - endopt = "--end=%s" % end |
1440 | | - command.append(endopt) |
| 1506 | + return articles |
1441 | 1507 | |
1442 | | - pipeline = [ command ] |
1443 | | - series = [ pipeline ] |
1444 | | - return(series) |
1445 | | - |
| 1508 | + def buildOutputFilenames(self, runner, chunk = 0): |
| 1509 | + history = self.buildHistoryOutputFilename(runner, chunk) |
| 1510 | + current = self.buildCurrentOutputFilename(runner, chunk) |
| 1511 | + articles = self.buildArticlesOutputFilename(runner, chunk) |
| 1512 | + return([ history, current, articles ]) |
| 1513 | + |
1446 | 1514 | def run(self, runner): |
1447 | 1515 | commands = [] |
1448 | 1516 | if self._chunks: |
1449 | 1517 | for i in range(1, len(self._chunks)+1): |
| 1518 | + self.cleanupOldFiles(runner,i) |
1450 | 1519 | series = self.buildCommand(runner, i) |
1451 | 1520 | commands.append(series) |
1452 | 1521 | else: |
| 1522 | + self.cleanupOldFiles(runner) |
1453 | 1523 | series = self.buildCommand(runner) |
1454 | 1524 | commands.append(series) |
1455 | 1525 | result = runner.runCommand(commands, callbackStderr=self.progressCallback, callbackStderrArg=runner) |
— | — | @@ -1505,10 +1575,18 @@ |
1506 | 1576 | def listFiles(self, runner): |
1507 | 1577 | return ["pages-logging.xml.gz"] |
1508 | 1578 | |
| 1579 | + def cleanupOldFiles(self, runner): |
| 1580 | + logging = self.buildOutputFilename(runner) |
| 1581 | + if exists(logging): |
| 1582 | + runner.remove(logging) |
| 1583 | + |
| 1584 | + def buildOutputFilename(self, runner): |
| 1585 | + logging = runner.dumpDir.publicPath("pages-logging.xml.gz") |
| 1586 | + return logging |
| 1587 | + |
1509 | 1588 | def run(self, runner): |
1510 | | - logging = runner.dumpDir.publicPath("pages-logging.xml.gz") |
1511 | | - if exists(logging): |
1512 | | - os.remove(logging) |
| 1589 | + self.cleanupOldFiles(runner) |
| 1590 | + logging = self.buildOutputFilename(runner) |
1513 | 1591 | command = [ "%s" % runner.config.php, |
1514 | 1592 | "-q", "%s/maintenance/dumpBackup.php" % runner.config.wikiDir, |
1515 | 1593 | "--wiki=%s" % runner.dbName, |
— | — | @@ -1723,7 +1801,10 @@ |
1724 | 1802 | if not self.statusOfOldDumpIsDone(runner, date): |
1725 | 1803 | runner.debug("skipping incomplete or failed dump for prefetch %s" % possible) |
1726 | 1804 | continue |
1727 | | - runner.debug("Prefetchable %s" % possible) |
| 1805 | + if (chunk) and (self.filenameHasChunk(possible, "bz2")): |
| 1806 | + runner.debug("Prefetchable %s etc." % possible) |
| 1807 | + else: |
| 1808 | + runner.debug("Prefetchable %s" % possible) |
1728 | 1809 | # found something workable, now check the chunk situation |
1729 | 1810 | if (chunk): |
1730 | 1811 | if (self.filenameHasChunk(possible, "bz2")): |
— | — | @@ -1904,7 +1985,7 @@ |
1905 | 1986 | def _path(self, runner, ext, chunk=0): |
1906 | 1987 | return runner.dumpDir.publicPath(self._file(ext,chunk)) |
1907 | 1988 | |
1908 | | - def getOutputFilename(self, runner, chunk=0): |
| 1989 | + def buildOutputFilename(self, runner, chunk=0): |
1909 | 1990 | if (chunk): |
1910 | 1991 | xml7z = self._path(runner, "7z", chunk) |
1911 | 1992 | else: |
— | — | @@ -1920,25 +2001,31 @@ |
1921 | 2002 | |
1922 | 2003 | def buildCommand(self, runner, chunk = 0): |
1923 | 2004 | xmlbz2 = self.getInputFilename(runner, chunk) |
1924 | | - xml7z = self.getOutputFilename(runner, chunk) |
| 2005 | + xml7z = self.buildOutputFilename(runner, chunk) |
1925 | 2006 | |
1926 | | - # Clear prior 7zip attempts; 7zip will try to append an existing archive |
1927 | | - if exists(xml7z): |
1928 | | - os.remove(xml7z) |
1929 | 2007 | # FIXME need shell escape |
1930 | 2008 | commandPipe = [ [ "%s -dc %s | %s a -si %s" % (runner.config.bzip2, xmlbz2, runner.config.sevenzip, xml7z) ] ] |
1931 | 2009 | commandSeries = [ commandPipe ] |
1932 | 2010 | return(commandSeries) |
1933 | 2011 | |
| 2012 | + def cleanupOldFiles(self, runner, chunk = 0): |
| 2013 | + xml7z = self.buildOutputFilename(runner, chunk) |
| 2014 | + if exists(xml7z): |
| 2015 | + runner.remove(xml7z) |
| 2016 | + |
1934 | 2017 | def run(self, runner): |
1935 | 2018 | if runner.lastFailed: |
1936 | 2019 | raise BackupError("bz2 dump incomplete, not recompressing") |
1937 | 2020 | commands = [] |
1938 | 2021 | if (self._chunks): |
1939 | 2022 | for i in range(1, len(self._chunks)+1): |
| 2023 | + # Clear prior 7zip attempts; 7zip will try to append an existing archive |
| 2024 | + self.cleanupOldFiles(runner, i) |
1940 | 2025 | series = self.buildCommand(runner, i) |
1941 | 2026 | commands.append(series) |
1942 | 2027 | else: |
| 2028 | + # Clear prior 7zip attempts; 7zip will try to append an existing archive |
| 2029 | + self.cleanupOldFiles(runner) |
1943 | 2030 | series = self.buildCommand(runner) |
1944 | 2031 | commands.append(series) |
1945 | 2032 | result = runner.runCommand(commands, callbackTimed=self.progressCallback, callbackTimedArg=runner, shell = True) |
— | — | @@ -1946,11 +2033,11 @@ |
1947 | 2034 | # some hacks aren't so temporary - atg 3 sept 2010 |
1948 | 2035 | if (self._chunks): |
1949 | 2036 | for i in range(1, len(self._chunks)+1): |
1950 | | - xml7z = self.getOutputFilename(runner,i) |
| 2037 | + xml7z = self.buildOutputFilename(runner,i) |
1951 | 2038 | if exists(xml7z): |
1952 | 2039 | os.chmod(xml7z, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH ) |
1953 | 2040 | else: |
1954 | | - xml7z = self.getOutputFilename(runner) |
| 2041 | + xml7z = self.buildOutputFilename(runner) |
1955 | 2042 | if exists(xml7z): |
1956 | 2043 | os.chmod(xml7z, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH ) |
1957 | 2044 | return(result) |
— | — | @@ -1977,9 +2064,19 @@ |
1978 | 2065 | def listFiles(self, runner): |
1979 | 2066 | return(XmlRecompressDump.listFiles(self, runner, unnumbered=True)) |
1980 | 2067 | |
| 2068 | + def cleanupOldFiles(self, runner): |
| 2069 | + files = self.listFiles(runner) |
| 2070 | + print "here is cleanup" |
| 2071 | + for filename in files: |
| 2072 | + filename = runner.dumpDir.publicPath(filename) |
| 2073 | + if exists(filename): |
| 2074 | + runner.remove(filename) |
| 2075 | + |
1981 | 2076 | def run(self, runner): |
| 2077 | + print "here we are" |
1982 | 2078 | errorresult = 0 |
1983 | 2079 | if (self._chunks): |
| 2080 | + self.cleanupOldFiles(runner) |
1984 | 2081 | files = XmlRecompressDump.listFiles(self,runner) |
1985 | 2082 | outputFileList = self.listFiles(runner) |
1986 | 2083 | for outputFile in outputFileList: |
— | — | @@ -2175,6 +2272,8 @@ |
2176 | 2273 | print " give the option --job help" |
2177 | 2274 | print " This option requires specifiying a wikidbname on which to run." |
2178 | 2275 | print " This option cannot be specified with --force." |
| 2276 | + print "--dryrun: Don't really run the job, just print what would be done (must be used" |
| 2277 | + print " with a specified wikidbname on which to run" |
2179 | 2278 | print "--force: remove a lock file for the specified wiki (dangerous, if there is" |
2180 | 2279 | print " another process running, useful if you want to start a second later" |
2181 | 2280 | print " run while the first dump from a previous date is still going)" |
— | — | @@ -2200,10 +2299,11 @@ |
2201 | 2300 | enableLogging = False |
2202 | 2301 | log = None |
2203 | 2302 | htmlNotice = "" |
| 2303 | + dryrun = False |
2204 | 2304 | |
2205 | 2305 | try: |
2206 | 2306 | (options, remainder) = getopt.gnu_getopt(sys.argv[1:], "", |
2207 | | - ['date=', 'job=', 'configfile=', 'notice=', 'force', 'noprefetch', 'nospawn', 'restartfrom', 'log']) |
| 2307 | + ['date=', 'job=', 'configfile=', 'notice=', 'force', 'dryrun', 'noprefetch', 'nospawn', 'restartfrom', 'log']) |
2208 | 2308 | except: |
2209 | 2309 | usage("Unknown option specified") |
2210 | 2310 | |
— | — | @@ -2218,6 +2318,8 @@ |
2219 | 2319 | prefetch = False |
2220 | 2320 | elif opt == "--nospawn": |
2221 | 2321 | spawn = False |
| 2322 | + elif opt == "--dryrun": |
| 2323 | + dryrun = True |
2222 | 2324 | elif opt == "--job": |
2223 | 2325 | jobRequested = val |
2224 | 2326 | elif opt == "--restartfrom": |
— | — | @@ -2227,6 +2329,8 @@ |
2228 | 2330 | elif opt == "--notice": |
2229 | 2331 | htmlNotice = val |
2230 | 2332 | |
| 2333 | + if dryrun and (len(remainder) == 0): |
| 2334 | + usage("--dryrun requires the name of a wikidb to be specified") |
2231 | 2335 | if jobRequested and (len(remainder) == 0): |
2232 | 2336 | usage("--job option requires the name of a wikidb to be specified") |
2233 | 2337 | if (jobRequested and forceLock): |
— | — | @@ -2240,19 +2344,25 @@ |
2241 | 2345 | else: |
2242 | 2346 | config = WikiDump.Config() |
2243 | 2347 | |
| 2348 | + if dryrun: |
| 2349 | + print "***" |
| 2350 | + print "Dry run only, no files will be updated." |
| 2351 | + print "***" |
| 2352 | + |
2244 | 2353 | if len(remainder) > 0: |
2245 | 2354 | wiki = WikiDump.Wiki(config, remainder[0]) |
2246 | | - # if we are doing one piece only of the dump, we don't try to grab a lock. |
2247 | | - if forceLock: |
2248 | | - if wiki.isLocked(): |
| 2355 | + # if we are doing one piece only of the dump, we don't try to grab a lock |
| 2356 | + # unless told to. |
| 2357 | + if not dryrun: |
| 2358 | + if forceLock and wiki.isLocked(): |
2249 | 2359 | wiki.unlock() |
2250 | | - if restart or not jobRequested: |
2251 | | - wiki.lock() |
| 2360 | + if restart or not jobRequested: |
| 2361 | + wiki.lock() |
2252 | 2362 | else: |
2253 | 2363 | wiki = findAndLockNextWiki(config) |
2254 | 2364 | |
2255 | 2365 | if wiki: |
2256 | | - runner = Runner(wiki, date, prefetch, spawn, jobRequested, restart, htmlNotice, enableLogging) |
| 2366 | + runner = Runner(wiki, date, prefetch, spawn, jobRequested, restart, htmlNotice, dryrun, enableLogging) |
2257 | 2367 | if (restart): |
2258 | 2368 | print "Running %s, restarting from job %s..." % (wiki.dbName, jobRequested) |
2259 | 2369 | elif (jobRequested): |
— | — | @@ -2261,8 +2371,9 @@ |
2262 | 2372 | print "Running %s..." % wiki.dbName |
2263 | 2373 | runner.run() |
2264 | 2374 | # if we are doing one piece only of the dump, we don't unlock either |
2265 | | - if restart or not jobRequested: |
2266 | | - wiki.unlock() |
| 2375 | + if not dryrun: |
| 2376 | + if restart or not jobRequested: |
| 2377 | + wiki.unlock() |
2267 | 2378 | else: |
2268 | 2379 | print "No wikis available to run." |
2269 | 2380 | finally: |