Index: branches/ariel/xmldumps-backup/worker.py |
— | — | @@ -102,7 +102,10 @@ |
103 | 103 | |
104 | 104 | if (self._chunksEnabled): |
105 | 105 | self.Stats = PageAndEditStats(wiki,dbName) |
106 | | - |
| 106 | + print "total",self.Stats.totalEdits |
| 107 | + print "total2",self.Stats.totalPages |
| 108 | + if (not self.Stats.totalEdits or not self.Stats.totalPages): |
| 109 | + raise BackupError("Failed to get DB stats, exiting") |
107 | 110 | if (self._revsPerChunkHistory): |
108 | 111 | if (len(self._revsPerChunkHistory) == 1): |
109 | 112 | self._numChunksHistory = self.getNumberOfChunksForXMLDumps(self.Stats.totalEdits, self._pagesPerChunkHistory[0]) |
— | — | @@ -212,20 +215,30 @@ |
213 | 216 | |
214 | 217 | class RunSimpleCommand(object): |
215 | 218 | |
216 | | - def log(self, message, log = None): |
217 | | - if (log): |
218 | | - log.addToLogQueue("%s\n" % message) |
| 219 | + def log(message, logInfo = None): |
| 220 | + if (logInfo): |
| 221 | + logInfo.addToLogQueue("%s\n" % message) |
219 | 222 | |
220 | 223 | # FIXME rewrite to not use popen2 |
221 | | - def runAndReturn(command, log = None): |
| 224 | + def runAndReturn(command, logInfo = None): |
222 | 225 | """Run a command and return the output as a string. |
223 | 226 | Raises BackupError on non-zero return code.""" |
224 | 227 | # FIXME convert all these calls so they just use runCommand now |
| 228 | + retval = 1 |
| 229 | + retries=0 |
| 230 | + maxretries=3 |
225 | 231 | proc = popen2.Popen4(command, 64) |
226 | 232 | output = proc.fromchild.read() |
227 | 233 | retval = proc.wait() |
| 234 | + while (retval and retries < maxretries): |
| 235 | + RunSimpleCommand.log("Non-zero return code from '%s'" % command, logInfo) |
| 236 | + time.sleep(5) |
| 237 | + proc = popen2.Popen4(command, 64) |
| 238 | + output = proc.fromchild.read() |
| 239 | + retval = proc.wait() |
| 240 | + retries = retries + 1 |
228 | 241 | if retval: |
229 | | - self.log("Non-zero return code from '%s'" % command, log) |
| 242 | +# RunSimpleCommand.log("Non-zero return code from '%s'" % command, logInfo) |
230 | 243 | raise BackupError("Non-zero return code from '%s'" % command) |
231 | 244 | else: |
232 | 245 | return output |
— | — | @@ -247,6 +260,7 @@ |
248 | 261 | |
249 | 262 | runAndReturn = staticmethod(runAndReturn) |
250 | 263 | runAndReport = staticmethod(runAndReport) |
| 264 | + log = staticmethod(log) |
251 | 265 | |
252 | 266 | class PageAndEditStats(object): |
253 | 267 | def __init__(self, wiki, dbName): |
— | — | @@ -255,23 +269,43 @@ |
256 | 270 | self.config = wiki.config |
257 | 271 | self.dbName = dbName |
258 | 272 | self.dbServerInfo = DbServerInfo(wiki, dbName) |
259 | | - (self.totalPages, totalEdits) = self.getStatistics(config,dbName) |
| 273 | + self.getStatistics(config,dbName) |
260 | 274 | |
261 | 275 | def getStatistics(self, dbName, ignore): |
262 | | - """Get (cached) statistics for the wiki""" |
263 | | - totalPages = None |
264 | | - totalEdits = None |
| 276 | + """Get statistics for the wiki""" |
| 277 | + |
265 | 278 | query = "select MAX(page_id) from page;" |
| 279 | + results = None |
| 280 | + retries = 0 |
| 281 | + maxretries = 5 |
266 | 282 | results = self.dbServerInfo.runSqlAndGetOutput(query) |
| 283 | + while (results == None and retries < maxretries): |
| 284 | + retries = retries + 1 |
| 285 | + time.sleep(5) |
| 286 | + results = self.dbServerInfo.runSqlAndGetOutput(query) |
| 287 | + if (not results): |
| 288 | + return(1) |
| 289 | + |
267 | 290 | lines = results.splitlines() |
268 | 291 | if (lines and lines[1]): |
269 | | - totalPages = int(lines[1]) |
| 292 | + self.totalPages = int(lines[1]) |
| 293 | + print "totalpages here is ",self.totalPages |
270 | 294 | query = "select MAX(rev_id) from revision;" |
| 295 | + retries = 0 |
| 296 | + results = None |
271 | 297 | results = self.dbServerInfo.runSqlAndGetOutput(query) |
| 298 | + while (results == None and retries < maxretries): |
| 299 | + retries = retries + 1 |
| 300 | + time.sleep(5) |
| 301 | + results = self.dbServerInfo.runSqlAndGetOutput(query) |
| 302 | + if (not results): |
| 303 | + return(1) |
| 304 | + |
272 | 305 | lines = results.splitlines() |
273 | 306 | if (lines and lines[1]): |
274 | | - totalEdits = int(lines[1]) |
275 | | - return(totalPages, totalEdits) |
| 307 | + self.totalEdits = int(lines[1]) |
| 308 | + print "totaledits here is ",self.totalEdits |
| 309 | + return(0) |
276 | 310 | |
277 | 311 | def getTotalPages(self): |
278 | 312 | return self.totalPages |
— | — | @@ -723,6 +757,7 @@ |
724 | 758 | self.config.php, self.config.wikiDir, self.dbName)) |
725 | 759 | return RunSimpleCommand.runAndReturn(command, self.log).strip() |
726 | 760 | |
| 761 | + # returns 0 on success, 1 on error |
727 | 762 | def saveTable(self, table, outfile): |
728 | 763 | """Dump a table from the current DB with mysqldump, save to a gzipped sql file.""" |
729 | 764 | commands = [ [ "%s" % self.config.mysqldump, "-h", |
— | — | @@ -748,6 +783,7 @@ |
749 | 784 | [ "%s" % self.config.gzip ] ] |
750 | 785 | return self.saveCommand(command, outfile) |
751 | 786 | |
| 787 | + # returns 0 on success, 1 on error |
752 | 788 | def saveCommand(self, commands, outfile): |
753 | 789 | """For one pipeline of commands, redirect output to a given file.""" |
754 | 790 | commands[-1].extend( [ ">" , outfile ] ) |
— | — | @@ -759,7 +795,9 @@ |
760 | 796 | # be a list (the command name and the various args) |
761 | 797 | # If the shell option is true, all pipelines will be run under the shell. |
762 | 798 | def runCommand(self, commandSeriesList, callback=None, arg=None, shell = False): |
763 | | - """Nonzero return code from the shell from any command in any pipeline will raise a BackupError. |
| 799 | + """Nonzero return code from the shell from any command in any pipeline will cause this |
| 800 | + function to print an error message and return 1, indictating error. |
| 801 | + Returns 0 on success. |
764 | 802 | If a callback function is passed, it will receive lines of |
765 | 803 | output from the call. If the callback function takes another argument (which will |
766 | 804 | be passed before the line of output) must be specified by the arg paraemeter. |
— | — | @@ -778,7 +816,7 @@ |
779 | 817 | for cmd in problemCommands: |
780 | 818 | errorString = errorString + "%s " % cmd |
781 | 819 | self.logAndPrint(errorString) |
782 | | - raise BackupError(errorString) |
| 820 | +# raise BackupError(errorString) |
783 | 821 | return 1 |
784 | 822 | |
785 | 823 | def debug(self, stuff): |
— | — | @@ -1320,7 +1358,15 @@ |
1321 | 1359 | return runner.dumpDir.publicPath(self._file()) |
1322 | 1360 | |
1323 | 1361 | def run(self, runner): |
1324 | | - return runner.saveTable(self._table, self._path(runner)) |
| 1362 | + retries = 0 |
| 1363 | + # try this initially and see how it goes |
| 1364 | + maxretries = 3 |
| 1365 | + error = runner.saveTable(self._table, self._path(runner)) |
| 1366 | + while (error and retries < maxretries): |
| 1367 | + retries = retries + 1 |
| 1368 | + time.sleep(5) |
| 1369 | + error = runner.saveTable(self._table, self._path(runner)) |
| 1370 | + return error |
1325 | 1371 | |
1326 | 1372 | def listFiles(self, runner): |
1327 | 1373 | return [self._file()] |
— | — | @@ -2135,8 +2181,16 @@ |
2136 | 2182 | class TitleDump(Dump): |
2137 | 2183 | """This is used by "wikiproxy", a program to add Wikipedia links to BBC news online""" |
2138 | 2184 | def run(self, runner): |
2139 | | - return runner.saveSql("select page_title from page where page_namespace=0;", |
2140 | | - runner.dumpDir.publicPath("all-titles-in-ns0.gz")) |
| 2185 | + retries = 0 |
| 2186 | + # try this initially and see how it goes |
| 2187 | + maxretries = 3 |
| 2188 | + query="select page_title from page where page_namespace=0;" |
| 2189 | + error = runner.saveSql(query, runner.dumpDir.publicPath("all-titles-in-ns0.gz")) |
| 2190 | + while (error and retries < maxretries): |
| 2191 | + retries = retries + 1 |
| 2192 | + time.sleep(5) |
| 2193 | + error = runner.saveSql(query, runner.dumpDir.publicPath("all-titles-in-ns0.gz")) |
| 2194 | + return error |
2141 | 2195 | |
2142 | 2196 | def listFiles(self, runner): |
2143 | 2197 | return ["all-titles-in-ns0.gz"] |