r67318 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r67317‎ | r67318 | r67319 >
Date:01:16, 4 June 2010
Author:ariel
Status:resolved (Comments)
Tags:
Comment:
backups: --noprefetch option (for when previous backups have garbage for some text revisions)
Modified paths:
  • /trunk/backup/worker.py (modified) (history)

Diff [purge]

Index: trunk/backup/worker.py
@@ -57,7 +57,7 @@
5858
5959 class Runner(object):
6060
61 - def __init__(self, wiki, date=None, checkpoint=None):
 61+ def __init__(self, wiki, date=None, checkpoint=None, prefetch=True):
6262 self.wiki = wiki
6363 self.config = wiki.config
6464 self.dbName = wiki.dbName
@@ -248,10 +248,10 @@
249249 XmlStub("First-pass for page XML data dumps"),
250250 XmlDump("articles",
251251 "<big><b>Articles, templates, image descriptions, and primary meta-pages.</b></big>",
252 - "This contains current versions of article content, and is the archive most mirror sites will probably want."),
 252+ "This contains current versions of article content, and is the archive most mirror sites will probably want.", prefetch),
253253 XmlDump("meta-current",
254254 "All pages, current versions only.",
255 - "Discussion and user pages are included in this complete archive. Most mirrors won't want this extra material."),
 255+ "Discussion and user pages are included in this complete archive. Most mirrors won't want this extra material.", prefetch),
256256 XmlLogging("Pull out all logging data")]
257257 if self.wiki.hasFlaggedRevs():
258258 self.items.append(
@@ -263,7 +263,7 @@
264264 BigXmlDump("meta-history",
265265 "All pages with complete page edit history (.bz2)",
266266 "These dumps can be *very* large, uncompressing up to 20 times the archive download size. " +
267 - "Suitable for archival and statistical use, most mirror sites won't want or need this."))
 267+ "Suitable for archival and statistical use, most mirror sites won't want or need this.", prefetch))
268268 self.items.append(
269269 XmlRecompressDump("meta-history",
270270 "All pages with complete edit history (.7z)",
@@ -724,11 +724,12 @@
725725
726726 class XmlDump(Dump):
727727 """Primary XML dumps, one section at a time."""
728 - def __init__(self, subset, desc, detail):
 728+ def __init__(self, subset, desc, detail, prefetch):
729729 Dump.__init__(self, desc)
730730 self._subset = subset
731731 self._detail = detail
732 -
 732+ self._prefetch = prefetch
 733+
733734 def detail(self):
734735 """Optionally return additional text to appear under the heading."""
735736 return self._detail
@@ -768,7 +769,10 @@
769770
770771 # Try to pull text from the previous run; most stuff hasn't changed
771772 #Source=$OutputDir/pages_$section.xml.bz2
772 - source = self._findPreviousDump(runner)
 773+ if self._prefetch:
 774+ source = self._findPreviousDump(runner)
 775+ else:
 776+ source = None
773777 if source and exists(source):
774778 runner.status("... building %s XML dump, with text prefetch from %s..." % (self._subset, source))
775779 prefetch = "--prefetch=bzip2:%s" % (source)
@@ -970,9 +974,10 @@
971975 date = None
972976 checkpoint = None
973977 forceLock = False
974 -
 978+ prefetch = True
 979+
975980 (options, remainder) = getopt.gnu_getopt(sys.argv[1:], "",
976 - ['date=', 'checkpoint=', 'force'])
 981+ ['date=', 'checkpoint=', 'force', 'noprefetch'])
977982 for (opt, val) in options:
978983 if opt == "--date":
979984 date = val
@@ -980,7 +985,9 @@
981986 checkpoint = val
982987 elif opt == "--force":
983988 forceLock = True
984 -
 989+ elif opt == "--noprefetch":
 990+ prefetch = False
 991+
985992 if len(remainder) > 0:
986993 wiki = WikiDump.Wiki(config, remainder[0])
987994 if forceLock:
@@ -991,7 +998,7 @@
992999 wiki = findAndLockNextWiki(config)
9931000
9941001 if wiki:
995 - runner = Runner(wiki, date, checkpoint)
 1002+ runner = Runner(wiki, date, checkpoint, prefetch)
9961003 print "Running %s..." % wiki.dbName
9971004 runner.run()
9981005 wiki.unlock()

Comments

#Comment by MarkAHershberger (talk | contribs)   18:27, 16 July 2010

While this may work, the scoping of "prefetch" on Runner.__init__ looks funky. I'd prefer that you assign it to a member variable and then use that as you did on the XmlDump class.

#Comment by ArielGlenn (talk | contribs)   19:03, 19 July 2010

see rev 69560.

Status & tagging log