r85355 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r85354‎ | r85355 | r85356 >
Date:18:39, 4 April 2011
Author:ariel
Status:deferred
Tags:
Comment:
clean up the config stuff, fix 'chunksEnabled' so it works as bool, add 'recombineHistory' with default true (set to 0 to not recombine the bz2/7z history chunks), whine if missing only for the two absolutely mandatory items
Modified paths:
  • /branches/ariel/xmldumps-backup/WikiDump.py (modified) (history)
  • /branches/ariel/xmldumps-backup/worker.py (modified) (history)

Diff [purge]

Index: branches/ariel/xmldumps-backup/WikiDump.py
@@ -69,6 +69,8 @@
7070
7171 def dbList(filename):
7272 """Read database list from a file"""
 73+ if (not filename):
 74+ return []
7375 infile = open(filename)
7476 dbs = []
7577 for line in infile:
@@ -93,7 +95,8 @@
9496 "dblist": "",
9597 "privatelist": "",
9698 "biglist": "",
97 - "dir": "",
 99+ "flaggedrevslist": "",
 100+# "dir": "",
98101 "forcenormal": "0",
99102 "halt": "0",
100103 "skipdblist" : "",
@@ -115,7 +118,7 @@
116119 "password": "",
117120 #"tools": {
118121 "php": "/bin/php",
119 - "gzip2": "/usr/bin/gzip",
 122+ "gzip": "/usr/bin/gzip",
120123 "bzip2": "/usr/bin/bzip2",
121124 "sevenzip": "/bin/7za",
122125 "mysql": "/usr/bin/mysql",
@@ -127,8 +130,8 @@
128131 #"cleanup": {
129132 "keep": "3",
130133 #"chunks": {
131 - # set this to True to enable runing the various xml dump stages as chunks in parallel
132 - "chunksEnabled" : False,
 134+ # set this to 1 to enable runing the various xml dump stages as chunks in parallel
 135+ "chunksEnabled" : "0",
133136 # for page history runs, number of pages for each chunk, specified separately
134137 # e.g. "1000,10000,100000,2000000,2000000,2000000,2000000,2000000,2000000,2000000"
135138 # would define 10 chunks with the specified number of pages in each and any extra in
@@ -141,39 +144,33 @@
142145 "revsPerChunkHistory" : False,
143146 # pages per chunk for abstract runs
144147 "pagesPerChunkAbstract" : False,
 148+ # whether or not to recombine the history pieces
 149+ "recombineHistory" : "1",
145150 }
146151 conf = ConfigParser.SafeConfigParser(defaults)
147152 conf.read(files)
148153
149 - try:
150 - self.dbList = dbList(conf.get("wiki", "dblist"))
151 - except ConfigParser.NoSectionError:
 154+ if not conf.has_section("wiki"):
152155 print "The mandatory configuration section 'wiki' was not defined."
153 - print "Either the section was ommitted or none of the files in the list"
154 - print "%s exists. Giving up." % files
155 - raise
156 - try:
157 - self.skipDbList = dbList(conf.get("wiki", "skipdblist"))
158 - except ConfigParser.NoSectionError:
159 - self.skipDbList = []
160 - self.dbList = list(set(self.dbList) - set(self.skipDbList))
 156+ raise ConfigParser.NoSectionError('wiki')
161157
 158+ if not conf.has_option("wiki","dir"):
 159+ print "The mandatory setting 'dir' in the section 'wiki' was not defined."
 160+ raise ConfigParser.NoOptionError('wiki','dir')
 161+
 162+ self.dbList = dbList(conf.get("wiki", "dblist"))
 163+ self.skipDbList = dbList(conf.get("wiki", "skipdblist"))
162164 self.privateList = dbList(conf.get("wiki", "privatelist"))
163 - biglistFile = conf.get("wiki", "biglist")
164 - if biglistFile:
165 - self.bigList = dbList(biglistFile)
166 - else:
167 - self.bigList = []
168 - flaggedRevsFile = conf.get("wiki", "flaggedrevslist")
169 - if flaggedRevsFile:
170 - self.flaggedRevsList = dbList(flaggedRevsFile)
171 - else:
172 - self.flaggedRevsList = []
173 -
 165+ self.bigList = dbList(conf.get("wiki", "biglist"))
 166+ self.flaggedRevsList = dbList(conf.get("wiki", "flaggedrevslist"))
174167 self.wikiDir = conf.get("wiki", "dir")
175168 self.forceNormal = conf.getint("wiki", "forceNormal")
176169 self.halt = conf.getint("wiki", "halt")
177 -
 170+
 171+ self.dbList = list(set(self.dbList) - set(self.skipDbList))
 172+
 173+ if not conf.has_section('output'):
 174+ conf.add_section('output')
178175 self.publicDir = conf.get("output", "public")
179176 self.privateDir = conf.get("output", "private")
180177 self.webRoot = conf.get("output", "webroot")
@@ -182,14 +179,20 @@
183180 self.perDumpIndex = conf.get("output", "perdumpindex")
184181 self.logFile = conf.get("output", "logfile")
185182
 183+ if not conf.has_section('reporting'):
 184+ conf.add_section('reporting')
186185 self.adminMail = conf.get("reporting", "adminmail")
187186 self.mailFrom = conf.get("reporting", "mailfrom")
188187 self.smtpServer = conf.get("reporting", "smtpserver")
189188 self.staleAge = conf.getint("reporting", "staleAge")
190189
 190+ if not conf.has_section('database'):
 191+ conf.add_section('database')
191192 self.dbUser = conf.get("database", "user")
192193 self.dbPassword = conf.get("database", "password")
193194
 195+ if not conf.has_section('tools'):
 196+ conf.add_section('tools')
194197 self.php = conf.get("tools", "php")
195198 self.gzip = conf.get("tools", "gzip")
196199 self.bzip2 = conf.get("tools", "bzip2")
@@ -201,13 +204,18 @@
202205 self.cat = conf.get("tools", "cat")
203206 self.grep = conf.get("tools", "grep")
204207
205 - self.chunksEnabled = conf.get("chunks","chunksEnabled")
 208+ if not conf.has_section('chunks'):
 209+ conf.add_section('chunks')
 210+ self.chunksEnabled = conf.getint("chunks","chunksEnabled")
206211 self.pagesPerChunkHistory = conf.get("chunks","pagesPerChunkHistory")
207212 self.revsPerChunkHistory = conf.get("chunks","revsPerChunkHistory")
208213 self.pagesPerChunkAbstract = conf.get("chunks","pagesPerChunkAbstract")
 214+ self.recombineHistory = conf.getint("chunks","recombineHistory")
209215
 216+ if not conf.has_section('cleanup'):
 217+ conf.add_section('cleanup')
210218 self.keep = conf.getint("cleanup", "keep")
211 -
 219+
212220 def dbListByAge(self):
213221 """
214222 Sort wikis in reverse order of last successful dump :
Index: branches/ariel/xmldumps-backup/worker.py
@@ -99,6 +99,7 @@
100100 self._pagesPerChunkHistory = self.convertCommaSepLineToNumbers(wiki.config.pagesPerChunkHistory)
101101 self._revsPerChunkHistory = self.convertCommaSepLineToNumbers(wiki.config.revsPerChunkHistory)
102102 self._pagesPerChunkAbstract = self.convertCommaSepLineToNumbers(wiki.config.pagesPerChunkAbstract)
 103+ self._recombineHistory = wiki.config.recombineHistory
103104
104105 if (self._chunksEnabled):
105106 self.Stats = PageAndEditStats(wiki,dbName)
@@ -156,6 +157,9 @@
157158 def chunksEnabled(self):
158159 return self._chunksEnabled
159160
 161+ def recombineHistory(self):
 162+ return self._recombineHistory
 163+
160164 # args: total (pages or revs), and the number of (pages or revs) per chunk.
161165 def getNumberOfChunksForXMLDumps(self, total, perChunk):
162166 if (not total):
@@ -430,7 +434,7 @@
431435 "All pages with complete page edit history (.bz2)",
432436 "These dumps can be *very* large, uncompressing up to 20 times the archive download size. " +
433437 "Suitable for archival and statistical use, most mirror sites won't want or need this.", self._prefetch, self._spawn, self.chunkInfo.getPagesPerChunkHistory()))
434 - if (self.chunkInfo.chunksEnabled()):
 438+ if (self.chunkInfo.chunksEnabled() and self.chunkInfo.recombineHistory()):
435439 self.dumpItems.append(
436440 RecombineXmlDump("meta-history",
437441 "metahistorybz2dumprecombine",
@@ -443,7 +447,7 @@
444448 "All pages with complete edit history (.7z)",
445449 "These dumps can be *very* large, uncompressing up to 100 times the archive download size. " +
446450 "Suitable for archival and statistical use, most mirror sites won't want or need this.", self.chunkInfo.getPagesPerChunkHistory()))
447 - if (self.chunkInfo.chunksEnabled()):
 451+ if (self.chunkInfo.chunksEnabled() and self.chunkInfo.recombineHistory()):
448452 self.dumpItems.append(
449453 RecombineXmlRecompressDump("meta-history",
450454 "metahistory7zdumprecombine",

Status & tagging log