Index: branches/ariel/xmldumps-backup/WikiDump.py |
— | — | @@ -127,10 +127,12 @@ |
128 | 128 | |
129 | 129 | class Config(object): |
130 | 130 | def __init__(self, configFile=False): |
| 131 | + self.projectName = False |
| 132 | + |
131 | 133 | home = os.path.dirname(sys.argv[0]) |
132 | 134 | if (not configFile): |
133 | 135 | configFile = "wikidump.conf" |
134 | | - files = [ |
| 136 | + self.files = [ |
135 | 137 | os.path.join(home,configFile), |
136 | 138 | "/etc/wikidump.conf", |
137 | 139 | os.path.join(os.getenv("HOME"), ".wikidump.conf")] |
— | — | @@ -193,97 +195,123 @@ |
194 | 196 | # whether or not to recombine the history pieces |
195 | 197 | "recombineHistory" : "1", |
196 | 198 | } |
197 | | - conf = ConfigParser.SafeConfigParser(defaults) |
198 | | - conf.read(files) |
| 199 | + self.conf = ConfigParser.SafeConfigParser(defaults) |
| 200 | + self.conf.read(self.files) |
199 | 201 | |
200 | | - if not conf.has_section("wiki"): |
| 202 | + if not self.conf.has_section("wiki"): |
201 | 203 | print "The mandatory configuration section 'wiki' was not defined." |
202 | 204 | raise ConfigParser.NoSectionError('wiki') |
203 | 205 | |
204 | | - if not conf.has_option("wiki","dir"): |
| 206 | + if not self.conf.has_option("wiki","dir"): |
205 | 207 | print "The mandatory setting 'dir' in the section 'wiki' was not defined." |
206 | 208 | raise ConfigParser.NoOptionError('wiki','dir') |
207 | 209 | |
208 | | - self.dbList = MiscUtils.dbList(conf.get("wiki", "dblist")) |
209 | | - self.skipDbList = MiscUtils.dbList(conf.get("wiki", "skipdblist")) |
210 | | - self.privateList = MiscUtils.dbList(conf.get("wiki", "privatelist")) |
211 | | - self.bigList = MiscUtils.dbList(conf.get("wiki", "biglist")) |
212 | | - self.flaggedRevsList = MiscUtils.dbList(conf.get("wiki", "flaggedrevslist")) |
213 | | - self.wikiDir = conf.get("wiki", "dir") |
214 | | - self.forceNormal = conf.getint("wiki", "forceNormal") |
215 | | - self.halt = conf.getint("wiki", "halt") |
| 210 | + self.parseConfFileGlobally() |
| 211 | + self.parseConfFilePerProject() |
216 | 212 | |
| 213 | + def parseConfFileGlobally(self): |
| 214 | + self.dbList = MiscUtils.dbList(self.conf.get("wiki", "dblist")) |
| 215 | + self.skipDbList = MiscUtils.dbList(self.conf.get("wiki", "skipdblist")) |
| 216 | + self.privateList = MiscUtils.dbList(self.conf.get("wiki", "privatelist")) |
| 217 | + self.bigList = MiscUtils.dbList(self.conf.get("wiki", "biglist")) |
| 218 | + self.flaggedRevsList = MiscUtils.dbList(self.conf.get("wiki", "flaggedrevslist")) |
| 219 | + self.wikiDir = self.conf.get("wiki", "dir") |
| 220 | + self.forceNormal = self.conf.getint("wiki", "forceNormal") |
| 221 | + self.halt = self.conf.getint("wiki", "halt") |
| 222 | + |
217 | 223 | self.dbList = list(set(self.dbList) - set(self.skipDbList)) |
218 | 224 | |
219 | | - if not conf.has_section('output'): |
220 | | - conf.add_section('output') |
221 | | - self.publicDir = conf.get("output", "public") |
222 | | - self.privateDir = conf.get("output", "private") |
223 | | - self.webRoot = conf.get("output", "webroot") |
224 | | - self.index = conf.get("output", "index") |
225 | | - self.templateDir = conf.get("output", "templateDir") |
226 | | - self.perDumpIndex = conf.get("output", "perdumpindex") |
227 | | - self.logFile = conf.get("output", "logfile") |
228 | | - self.fileperms = conf.get("output", "fileperms") |
| 225 | + if not self.conf.has_section('output'): |
| 226 | + self.conf.add_section('output') |
| 227 | + self.publicDir = self.conf.get("output", "public") |
| 228 | + self.privateDir = self.conf.get("output", "private") |
| 229 | + self.webRoot = self.conf.get("output", "webroot") |
| 230 | + self.index = self.conf.get("output", "index") |
| 231 | + self.templateDir = self.conf.get("output", "templateDir") |
| 232 | + self.perDumpIndex = self.conf.get("output", "perdumpindex") |
| 233 | + self.logFile = self.conf.get("output", "logfile") |
| 234 | + self.fileperms = self.conf.get("output", "fileperms") |
229 | 235 | self.fileperms = int(self.fileperms,0) |
230 | | - if not conf.has_section('reporting'): |
231 | | - conf.add_section('reporting') |
232 | | - self.adminMail = conf.get("reporting", "adminmail") |
233 | | - self.mailFrom = conf.get("reporting", "mailfrom") |
234 | | - self.smtpServer = conf.get("reporting", "smtpserver") |
235 | | - self.staleAge = conf.getint("reporting", "staleAge") |
| 236 | + if not self.conf.has_section('reporting'): |
| 237 | + self.conf.add_section('reporting') |
| 238 | + self.adminMail = self.conf.get("reporting", "adminmail") |
| 239 | + self.mailFrom = self.conf.get("reporting", "mailfrom") |
| 240 | + self.smtpServer = self.conf.get("reporting", "smtpserver") |
| 241 | + self.staleAge = self.conf.getint("reporting", "staleAge") |
236 | 242 | |
237 | | - if not conf.has_section('database'): |
238 | | - conf.add_section('database') |
239 | | - self.dbUser = conf.get("database", "user") |
240 | | - self.dbPassword = conf.get("database", "password") |
241 | | - |
242 | | - if not conf.has_section('tools'): |
243 | | - conf.add_section('tools') |
244 | | - self.php = conf.get("tools", "php") |
245 | | - self.gzip = conf.get("tools", "gzip") |
246 | | - self.bzip2 = conf.get("tools", "bzip2") |
247 | | - self.sevenzip = conf.get("tools", "sevenzip") |
248 | | - self.mysql = conf.get("tools", "mysql") |
249 | | - self.mysqldump = conf.get("tools", "mysqldump") |
250 | | - self.head = conf.get("tools", "head") |
251 | | - self.tail = conf.get("tools", "tail") |
252 | | - self.cat = conf.get("tools", "cat") |
253 | | - self.grep = conf.get("tools", "grep") |
254 | | - self.checkforbz2footer = conf.get("tools","checkforbz2footer") |
| 243 | + if not self.conf.has_section('tools'): |
| 244 | + self.conf.add_section('tools') |
| 245 | + self.php = self.conf.get("tools", "php") |
| 246 | + self.gzip = self.conf.get("tools", "gzip") |
| 247 | + self.bzip2 = self.conf.get("tools", "bzip2") |
| 248 | + self.sevenzip = self.conf.get("tools", "sevenzip") |
| 249 | + self.mysql = self.conf.get("tools", "mysql") |
| 250 | + self.mysqldump = self.conf.get("tools", "mysqldump") |
| 251 | + self.head = self.conf.get("tools", "head") |
| 252 | + self.tail = self.conf.get("tools", "tail") |
| 253 | + self.cat = self.conf.get("tools", "cat") |
| 254 | + self.grep = self.conf.get("tools", "grep") |
| 255 | + self.checkforbz2footer = self.conf.get("tools","checkforbz2footer") |
255 | 256 | |
256 | | - if not conf.has_section('chunks'): |
257 | | - conf.add_section('chunks') |
258 | | - self.chunksEnabled = conf.getint("chunks","chunksEnabled") |
259 | | - self.pagesPerChunkHistory = conf.get("chunks","pagesPerChunkHistory") |
260 | | - self.revsPerChunkHistory = conf.get("chunks","revsPerChunkHistory") |
261 | | - self.pagesPerChunkAbstract = conf.get("chunks","pagesPerChunkAbstract") |
262 | | - self.recombineHistory = conf.getint("chunks","recombineHistory") |
| 257 | + if not self.conf.has_section('cleanup'): |
| 258 | + self.conf.add_section('cleanup') |
| 259 | + self.keep = self.conf.getint("cleanup", "keep") |
263 | 260 | |
264 | | - if not conf.has_section('cleanup'): |
265 | | - conf.add_section('cleanup') |
266 | | - self.keep = conf.getint("cleanup", "keep") |
| 261 | + def parseConfFilePerProject(self, projectName = False): |
| 262 | + # we need to read from the project section without falling back |
| 263 | + # to the defaults, which has_option() normally does, ugh. so set |
| 264 | + # up a local conf instance without the defaults |
| 265 | + conf = ConfigParser.SafeConfigParser() |
| 266 | + conf.read(self.files) |
267 | 267 | |
| 268 | + if (projectName): |
| 269 | + self.projectName = projectName |
| 270 | + |
| 271 | + if not self.conf.has_section('database'): |
| 272 | + self.conf.add_section('database') |
| 273 | + self.dbUser = self.getOptionForProjectOrDefault(conf, "database", "user",0) |
| 274 | + self.dbPassword = self.getOptionForProjectOrDefault(conf, "database", "password",0) |
| 275 | + |
| 276 | + if not self.conf.has_section('chunks'): |
| 277 | + self.conf.add_section('chunks') |
| 278 | + self.chunksEnabled = self.getOptionForProjectOrDefault(conf, "chunks","chunksEnabled",1) |
| 279 | + self.pagesPerChunkHistory = self.getOptionForProjectOrDefault(conf, "chunks","pagesPerChunkHistory",0) |
| 280 | + self.revsPerChunkHistory = self.getOptionForProjectOrDefault(conf, "chunks","revsPerChunkHistory",0) |
| 281 | + self.pagesPerChunkAbstract = self.getOptionForProjectOrDefault(conf, "chunks","pagesPerChunkAbstract",0) |
| 282 | + self.recombineHistory = self.getOptionForProjectOrDefault(conf, "chunks","recombineHistory",1) |
| 283 | + |
| 284 | + def getOptionForProjectOrDefault(self, conf, sectionName, itemName, isInt): |
| 285 | + if (conf.has_section(self.projectName)): |
| 286 | + if (conf.has_option(self.projectName, itemName)): |
| 287 | + if (isInt): |
| 288 | + return(conf.getint(self.projectName,itemName)) |
| 289 | + else: |
| 290 | + return(conf.get(self.projectName,itemName)) |
| 291 | + if (isInt): |
| 292 | + return(self.conf.getint(sectionName,itemName)) |
| 293 | + else: |
| 294 | + return(self.conf.get(sectionName,itemName)) |
| 295 | + |
268 | 296 | def dbListByAge(self): |
269 | 297 | """ |
270 | | - Sort wikis in reverse order of last successful dump : |
| 298 | + Sort wikis in reverse order of last successful dump : |
271 | 299 | |
272 | | - Order is (DumpFailed, Age), and False < True : |
273 | | - First, wikis whose latest dump was successful, most recent dump first |
274 | | - Then, wikis whose latest dump failed, most recent dump first. |
275 | | - Finally, wikis which have never been dumped. |
| 300 | + Order is (DumpFailed, Age), and False < True : |
| 301 | + First, wikis whose latest dump was successful, most recent dump first |
| 302 | + Then, wikis whose latest dump failed, most recent dump first. |
| 303 | + Finally, wikis which have never been dumped. |
276 | 304 | |
277 | | - According to that sort, the last item of this list is, when applicable, |
278 | | - the oldest failed dump attempt. |
| 305 | + According to that sort, the last item of this list is, when applicable, |
| 306 | + the oldest failed dump attempt. |
279 | 307 | |
280 | | - If some error occurs checking a dump status, that dump is put last in the |
281 | | - list (sort value is (True, maxint) ) |
| 308 | + If some error occurs checking a dump status, that dump is put last in the |
| 309 | + list (sort value is (True, maxint) ) |
282 | 310 | |
283 | | - Note that we now sort this list by the date of the dump directory, not the |
284 | | - last date that a dump file in that directory may have been touched. This |
285 | | - allows us to rerun jobs to completion from older runs, for example |
286 | | - an en pedia history urn that failed in the middle, without borking the |
287 | | - index page links. |
| 311 | + Note that we now sort this list by the date of the dump directory, not the |
| 312 | + last date that a dump file in that directory may have been touched. This |
| 313 | + allows us to rerun jobs to completion from older runs, for example |
| 314 | + an en pedia history urn that failed in the middle, without borking the |
| 315 | + index page links. |
288 | 316 | """ |
289 | 317 | available = [] |
290 | 318 | for db in self.dbList: |
Index: branches/ariel/xmldumps-backup/worker.py |
— | — | @@ -73,12 +73,17 @@ |
74 | 74 | |
75 | 75 | self._dbName = dbName |
76 | 76 | self._chunksEnabled = wiki.config.chunksEnabled |
77 | | - self._pagesPerChunkHistory = self.convertCommaSepLineToNumbers(wiki.config.pagesPerChunkHistory) |
78 | | - self._revsPerChunkHistory = self.convertCommaSepLineToNumbers(wiki.config.revsPerChunkHistory) |
79 | | - self._pagesPerChunkAbstract = self.convertCommaSepLineToNumbers(wiki.config.pagesPerChunkAbstract) |
80 | | - self._recombineHistory = wiki.config.recombineHistory |
81 | | - |
82 | 77 | if (self._chunksEnabled): |
| 78 | + self._pagesPerChunkHistory = self.convertCommaSepLineToNumbers(wiki.config.pagesPerChunkHistory) |
| 79 | + self._revsPerChunkHistory = self.convertCommaSepLineToNumbers(wiki.config.revsPerChunkHistory) |
| 80 | + self._pagesPerChunkAbstract = self.convertCommaSepLineToNumbers(wiki.config.pagesPerChunkAbstract) |
| 81 | + self._recombineHistory = wiki.config.recombineHistory |
| 82 | + else: |
| 83 | + self._pagesPerChunkHistory = False |
| 84 | + self._revsPerChunkHistory = False |
| 85 | + self._pagesPerChunkAbstract = False |
| 86 | + self._recombineHistory = False |
| 87 | + if (self._chunksEnabled): |
83 | 88 | self.Stats = PageAndEditStats(wiki,dbName, errorCallback) |
84 | 89 | if (not self.Stats.totalEdits or not self.Stats.totalPages): |
85 | 90 | raise BackupError("Failed to get DB stats, exiting") |
— | — | @@ -1709,7 +1714,7 @@ |
1710 | 1715 | if (self._chunks): |
1711 | 1716 | for i in range(1, len(self._chunks)+1): |
1712 | 1717 | files.append( self._path(runner, 'bz2', i ) ) |
1713 | | - files.append( self._path(runner, 'bz2', i ) ) |
| 1718 | + files.append( self._path(runner, 'bz2', i ) ) |
1714 | 1719 | |
1715 | 1720 | for f in files: |
1716 | 1721 | pipeline = [] |
— | — | @@ -2485,6 +2490,9 @@ |
2486 | 2491 | wiki = findAndLockNextWiki(config) |
2487 | 2492 | |
2488 | 2493 | if wiki: |
| 2494 | + # process any per-project configuration options |
| 2495 | + config.parseConfFilePerProject(wiki.dbName) |
| 2496 | + |
2489 | 2497 | runner = Runner(wiki, date, prefetch, spawn, jobRequested, restart, htmlNotice, dryrun, enableLogging) |
2490 | 2498 | if (restart): |
2491 | 2499 | print "Running %s, restarting from job %s..." % (wiki.dbName, jobRequested) |