r92370 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r92369‎ | r92370 | r92371 >
Date:17:15, 16 July 2011
Author:ariel
Status:deferred
Tags:
Comment:
allow per project items in conf file; formatting cleanup; bug from previous commit, wrong indentation
Modified paths:
  • /branches/ariel/xmldumps-backup/WikiDump.py (modified) (history)
  • /branches/ariel/xmldumps-backup/worker.py (modified) (history)

Diff [purge]

Index: branches/ariel/xmldumps-backup/WikiDump.py
@@ -127,10 +127,12 @@
128128
129129 class Config(object):
130130 def __init__(self, configFile=False):
 131+ self.projectName = False
 132+
131133 home = os.path.dirname(sys.argv[0])
132134 if (not configFile):
133135 configFile = "wikidump.conf"
134 - files = [
 136+ self.files = [
135137 os.path.join(home,configFile),
136138 "/etc/wikidump.conf",
137139 os.path.join(os.getenv("HOME"), ".wikidump.conf")]
@@ -193,97 +195,123 @@
194196 # whether or not to recombine the history pieces
195197 "recombineHistory" : "1",
196198 }
197 - conf = ConfigParser.SafeConfigParser(defaults)
198 - conf.read(files)
 199+ self.conf = ConfigParser.SafeConfigParser(defaults)
 200+ self.conf.read(self.files)
199201
200 - if not conf.has_section("wiki"):
 202+ if not self.conf.has_section("wiki"):
201203 print "The mandatory configuration section 'wiki' was not defined."
202204 raise ConfigParser.NoSectionError('wiki')
203205
204 - if not conf.has_option("wiki","dir"):
 206+ if not self.conf.has_option("wiki","dir"):
205207 print "The mandatory setting 'dir' in the section 'wiki' was not defined."
206208 raise ConfigParser.NoOptionError('wiki','dir')
207209
208 - self.dbList = MiscUtils.dbList(conf.get("wiki", "dblist"))
209 - self.skipDbList = MiscUtils.dbList(conf.get("wiki", "skipdblist"))
210 - self.privateList = MiscUtils.dbList(conf.get("wiki", "privatelist"))
211 - self.bigList = MiscUtils.dbList(conf.get("wiki", "biglist"))
212 - self.flaggedRevsList = MiscUtils.dbList(conf.get("wiki", "flaggedrevslist"))
213 - self.wikiDir = conf.get("wiki", "dir")
214 - self.forceNormal = conf.getint("wiki", "forceNormal")
215 - self.halt = conf.getint("wiki", "halt")
 210+ self.parseConfFileGlobally()
 211+ self.parseConfFilePerProject()
216212
 213+ def parseConfFileGlobally(self):
 214+ self.dbList = MiscUtils.dbList(self.conf.get("wiki", "dblist"))
 215+ self.skipDbList = MiscUtils.dbList(self.conf.get("wiki", "skipdblist"))
 216+ self.privateList = MiscUtils.dbList(self.conf.get("wiki", "privatelist"))
 217+ self.bigList = MiscUtils.dbList(self.conf.get("wiki", "biglist"))
 218+ self.flaggedRevsList = MiscUtils.dbList(self.conf.get("wiki", "flaggedrevslist"))
 219+ self.wikiDir = self.conf.get("wiki", "dir")
 220+ self.forceNormal = self.conf.getint("wiki", "forceNormal")
 221+ self.halt = self.conf.getint("wiki", "halt")
 222+
217223 self.dbList = list(set(self.dbList) - set(self.skipDbList))
218224
219 - if not conf.has_section('output'):
220 - conf.add_section('output')
221 - self.publicDir = conf.get("output", "public")
222 - self.privateDir = conf.get("output", "private")
223 - self.webRoot = conf.get("output", "webroot")
224 - self.index = conf.get("output", "index")
225 - self.templateDir = conf.get("output", "templateDir")
226 - self.perDumpIndex = conf.get("output", "perdumpindex")
227 - self.logFile = conf.get("output", "logfile")
228 - self.fileperms = conf.get("output", "fileperms")
 225+ if not self.conf.has_section('output'):
 226+ self.conf.add_section('output')
 227+ self.publicDir = self.conf.get("output", "public")
 228+ self.privateDir = self.conf.get("output", "private")
 229+ self.webRoot = self.conf.get("output", "webroot")
 230+ self.index = self.conf.get("output", "index")
 231+ self.templateDir = self.conf.get("output", "templateDir")
 232+ self.perDumpIndex = self.conf.get("output", "perdumpindex")
 233+ self.logFile = self.conf.get("output", "logfile")
 234+ self.fileperms = self.conf.get("output", "fileperms")
229235 self.fileperms = int(self.fileperms,0)
230 - if not conf.has_section('reporting'):
231 - conf.add_section('reporting')
232 - self.adminMail = conf.get("reporting", "adminmail")
233 - self.mailFrom = conf.get("reporting", "mailfrom")
234 - self.smtpServer = conf.get("reporting", "smtpserver")
235 - self.staleAge = conf.getint("reporting", "staleAge")
 236+ if not self.conf.has_section('reporting'):
 237+ self.conf.add_section('reporting')
 238+ self.adminMail = self.conf.get("reporting", "adminmail")
 239+ self.mailFrom = self.conf.get("reporting", "mailfrom")
 240+ self.smtpServer = self.conf.get("reporting", "smtpserver")
 241+ self.staleAge = self.conf.getint("reporting", "staleAge")
236242
237 - if not conf.has_section('database'):
238 - conf.add_section('database')
239 - self.dbUser = conf.get("database", "user")
240 - self.dbPassword = conf.get("database", "password")
241 -
242 - if not conf.has_section('tools'):
243 - conf.add_section('tools')
244 - self.php = conf.get("tools", "php")
245 - self.gzip = conf.get("tools", "gzip")
246 - self.bzip2 = conf.get("tools", "bzip2")
247 - self.sevenzip = conf.get("tools", "sevenzip")
248 - self.mysql = conf.get("tools", "mysql")
249 - self.mysqldump = conf.get("tools", "mysqldump")
250 - self.head = conf.get("tools", "head")
251 - self.tail = conf.get("tools", "tail")
252 - self.cat = conf.get("tools", "cat")
253 - self.grep = conf.get("tools", "grep")
254 - self.checkforbz2footer = conf.get("tools","checkforbz2footer")
 243+ if not self.conf.has_section('tools'):
 244+ self.conf.add_section('tools')
 245+ self.php = self.conf.get("tools", "php")
 246+ self.gzip = self.conf.get("tools", "gzip")
 247+ self.bzip2 = self.conf.get("tools", "bzip2")
 248+ self.sevenzip = self.conf.get("tools", "sevenzip")
 249+ self.mysql = self.conf.get("tools", "mysql")
 250+ self.mysqldump = self.conf.get("tools", "mysqldump")
 251+ self.head = self.conf.get("tools", "head")
 252+ self.tail = self.conf.get("tools", "tail")
 253+ self.cat = self.conf.get("tools", "cat")
 254+ self.grep = self.conf.get("tools", "grep")
 255+ self.checkforbz2footer = self.conf.get("tools","checkforbz2footer")
255256
256 - if not conf.has_section('chunks'):
257 - conf.add_section('chunks')
258 - self.chunksEnabled = conf.getint("chunks","chunksEnabled")
259 - self.pagesPerChunkHistory = conf.get("chunks","pagesPerChunkHistory")
260 - self.revsPerChunkHistory = conf.get("chunks","revsPerChunkHistory")
261 - self.pagesPerChunkAbstract = conf.get("chunks","pagesPerChunkAbstract")
262 - self.recombineHistory = conf.getint("chunks","recombineHistory")
 257+ if not self.conf.has_section('cleanup'):
 258+ self.conf.add_section('cleanup')
 259+ self.keep = self.conf.getint("cleanup", "keep")
263260
264 - if not conf.has_section('cleanup'):
265 - conf.add_section('cleanup')
266 - self.keep = conf.getint("cleanup", "keep")
 261+ def parseConfFilePerProject(self, projectName = False):
 262+ # we need to read from the project section without falling back
 263+ # to the defaults, which has_option() normally does, ugh. so set
 264+ # up a local conf instance without the defaults
 265+ conf = ConfigParser.SafeConfigParser()
 266+ conf.read(self.files)
267267
 268+ if (projectName):
 269+ self.projectName = projectName
 270+
 271+ if not self.conf.has_section('database'):
 272+ self.conf.add_section('database')
 273+ self.dbUser = self.getOptionForProjectOrDefault(conf, "database", "user",0)
 274+ self.dbPassword = self.getOptionForProjectOrDefault(conf, "database", "password",0)
 275+
 276+ if not self.conf.has_section('chunks'):
 277+ self.conf.add_section('chunks')
 278+ self.chunksEnabled = self.getOptionForProjectOrDefault(conf, "chunks","chunksEnabled",1)
 279+ self.pagesPerChunkHistory = self.getOptionForProjectOrDefault(conf, "chunks","pagesPerChunkHistory",0)
 280+ self.revsPerChunkHistory = self.getOptionForProjectOrDefault(conf, "chunks","revsPerChunkHistory",0)
 281+ self.pagesPerChunkAbstract = self.getOptionForProjectOrDefault(conf, "chunks","pagesPerChunkAbstract",0)
 282+ self.recombineHistory = self.getOptionForProjectOrDefault(conf, "chunks","recombineHistory",1)
 283+
 284+ def getOptionForProjectOrDefault(self, conf, sectionName, itemName, isInt):
 285+ if (conf.has_section(self.projectName)):
 286+ if (conf.has_option(self.projectName, itemName)):
 287+ if (isInt):
 288+ return(conf.getint(self.projectName,itemName))
 289+ else:
 290+ return(conf.get(self.projectName,itemName))
 291+ if (isInt):
 292+ return(self.conf.getint(sectionName,itemName))
 293+ else:
 294+ return(self.conf.get(sectionName,itemName))
 295+
268296 def dbListByAge(self):
269297 """
270 - Sort wikis in reverse order of last successful dump :
 298+ Sort wikis in reverse order of last successful dump :
271299
272 - Order is (DumpFailed, Age), and False < True :
273 - First, wikis whose latest dump was successful, most recent dump first
274 - Then, wikis whose latest dump failed, most recent dump first.
275 - Finally, wikis which have never been dumped.
 300+ Order is (DumpFailed, Age), and False < True :
 301+ First, wikis whose latest dump was successful, most recent dump first
 302+ Then, wikis whose latest dump failed, most recent dump first.
 303+ Finally, wikis which have never been dumped.
276304
277 - According to that sort, the last item of this list is, when applicable,
278 - the oldest failed dump attempt.
 305+ According to that sort, the last item of this list is, when applicable,
 306+ the oldest failed dump attempt.
279307
280 - If some error occurs checking a dump status, that dump is put last in the
281 - list (sort value is (True, maxint) )
 308+ If some error occurs checking a dump status, that dump is put last in the
 309+ list (sort value is (True, maxint) )
282310
283 - Note that we now sort this list by the date of the dump directory, not the
284 - last date that a dump file in that directory may have been touched. This
285 - allows us to rerun jobs to completion from older runs, for example
286 - an en pedia history urn that failed in the middle, without borking the
287 - index page links.
 311+ Note that we now sort this list by the date of the dump directory, not the
 312+ last date that a dump file in that directory may have been touched. This
 313+ allows us to rerun jobs to completion from older runs, for example
 314+ an en pedia history urn that failed in the middle, without borking the
 315+ index page links.
288316 """
289317 available = []
290318 for db in self.dbList:
Index: branches/ariel/xmldumps-backup/worker.py
@@ -73,12 +73,17 @@
7474
7575 self._dbName = dbName
7676 self._chunksEnabled = wiki.config.chunksEnabled
77 - self._pagesPerChunkHistory = self.convertCommaSepLineToNumbers(wiki.config.pagesPerChunkHistory)
78 - self._revsPerChunkHistory = self.convertCommaSepLineToNumbers(wiki.config.revsPerChunkHistory)
79 - self._pagesPerChunkAbstract = self.convertCommaSepLineToNumbers(wiki.config.pagesPerChunkAbstract)
80 - self._recombineHistory = wiki.config.recombineHistory
81 -
8277 if (self._chunksEnabled):
 78+ self._pagesPerChunkHistory = self.convertCommaSepLineToNumbers(wiki.config.pagesPerChunkHistory)
 79+ self._revsPerChunkHistory = self.convertCommaSepLineToNumbers(wiki.config.revsPerChunkHistory)
 80+ self._pagesPerChunkAbstract = self.convertCommaSepLineToNumbers(wiki.config.pagesPerChunkAbstract)
 81+ self._recombineHistory = wiki.config.recombineHistory
 82+ else:
 83+ self._pagesPerChunkHistory = False
 84+ self._revsPerChunkHistory = False
 85+ self._pagesPerChunkAbstract = False
 86+ self._recombineHistory = False
 87+ if (self._chunksEnabled):
8388 self.Stats = PageAndEditStats(wiki,dbName, errorCallback)
8489 if (not self.Stats.totalEdits or not self.Stats.totalPages):
8590 raise BackupError("Failed to get DB stats, exiting")
@@ -1709,7 +1714,7 @@
17101715 if (self._chunks):
17111716 for i in range(1, len(self._chunks)+1):
17121717 files.append( self._path(runner, 'bz2', i ) )
1713 - files.append( self._path(runner, 'bz2', i ) )
 1718+ files.append( self._path(runner, 'bz2', i ) )
17141719
17151720 for f in files:
17161721 pipeline = []
@@ -2485,6 +2490,9 @@
24862491 wiki = findAndLockNextWiki(config)
24872492
24882493 if wiki:
 2494+ # process any per-project configuration options
 2495+ config.parseConfFilePerProject(wiki.dbName)
 2496+
24892497 runner = Runner(wiki, date, prefetch, spawn, jobRequested, restart, htmlNotice, dryrun, enableLogging)
24902498 if (restart):
24912499 print "Running %s, restarting from job %s..." % (wiki.dbName, jobRequested)

Status & tagging log