Index: trunk/backup/worker.py |
— | — | @@ -57,7 +57,7 @@ |
58 | 58 | |
59 | 59 | class Runner(object): |
60 | 60 | |
61 | | - def __init__(self, wiki, date=None, checkpoint=None): |
| 61 | + def __init__(self, wiki, date=None, checkpoint=None, prefetch=True): |
62 | 62 | self.wiki = wiki |
63 | 63 | self.config = wiki.config |
64 | 64 | self.dbName = wiki.dbName |
— | — | @@ -248,10 +248,10 @@ |
249 | 249 | XmlStub("First-pass for page XML data dumps"), |
250 | 250 | XmlDump("articles", |
251 | 251 | "<big><b>Articles, templates, image descriptions, and primary meta-pages.</b></big>", |
252 | | - "This contains current versions of article content, and is the archive most mirror sites will probably want."), |
| 252 | + "This contains current versions of article content, and is the archive most mirror sites will probably want.", prefetch), |
253 | 253 | XmlDump("meta-current", |
254 | 254 | "All pages, current versions only.", |
255 | | - "Discussion and user pages are included in this complete archive. Most mirrors won't want this extra material."), |
| 255 | + "Discussion and user pages are included in this complete archive. Most mirrors won't want this extra material.", prefetch), |
256 | 256 | XmlLogging("Pull out all logging data")] |
257 | 257 | if self.wiki.hasFlaggedRevs(): |
258 | 258 | self.items.append( |
— | — | @@ -263,7 +263,7 @@ |
264 | 264 | BigXmlDump("meta-history", |
265 | 265 | "All pages with complete page edit history (.bz2)", |
266 | 266 | "These dumps can be *very* large, uncompressing up to 20 times the archive download size. " + |
267 | | - "Suitable for archival and statistical use, most mirror sites won't want or need this.")) |
| 267 | + "Suitable for archival and statistical use, most mirror sites won't want or need this.", prefetch)) |
268 | 268 | self.items.append( |
269 | 269 | XmlRecompressDump("meta-history", |
270 | 270 | "All pages with complete edit history (.7z)", |
— | — | @@ -724,11 +724,12 @@ |
725 | 725 | |
726 | 726 | class XmlDump(Dump): |
727 | 727 | """Primary XML dumps, one section at a time.""" |
728 | | - def __init__(self, subset, desc, detail): |
| 728 | + def __init__(self, subset, desc, detail, prefetch): |
729 | 729 | Dump.__init__(self, desc) |
730 | 730 | self._subset = subset |
731 | 731 | self._detail = detail |
732 | | - |
| 732 | + self._prefetch = prefetch |
| 733 | + |
733 | 734 | def detail(self): |
734 | 735 | """Optionally return additional text to appear under the heading.""" |
735 | 736 | return self._detail |
— | — | @@ -768,7 +769,10 @@ |
769 | 770 | |
770 | 771 | # Try to pull text from the previous run; most stuff hasn't changed |
771 | 772 | #Source=$OutputDir/pages_$section.xml.bz2 |
772 | | - source = self._findPreviousDump(runner) |
| 773 | + if self._prefetch: |
| 774 | + source = self._findPreviousDump(runner) |
| 775 | + else: |
| 776 | + source = None |
773 | 777 | if source and exists(source): |
774 | 778 | runner.status("... building %s XML dump, with text prefetch from %s..." % (self._subset, source)) |
775 | 779 | prefetch = "--prefetch=bzip2:%s" % (source) |
— | — | @@ -970,9 +974,10 @@ |
971 | 975 | date = None |
972 | 976 | checkpoint = None |
973 | 977 | forceLock = False |
974 | | - |
| 978 | + prefetch = True |
| 979 | + |
975 | 980 | (options, remainder) = getopt.gnu_getopt(sys.argv[1:], "", |
976 | | - ['date=', 'checkpoint=', 'force']) |
| 981 | + ['date=', 'checkpoint=', 'force', 'noprefetch']) |
977 | 982 | for (opt, val) in options: |
978 | 983 | if opt == "--date": |
979 | 984 | date = val |
— | — | @@ -980,7 +985,9 @@ |
981 | 986 | checkpoint = val |
982 | 987 | elif opt == "--force": |
983 | 988 | forceLock = True |
984 | | - |
| 989 | + elif opt == "--noprefetch": |
| 990 | + prefetch = False |
| 991 | + |
985 | 992 | if len(remainder) > 0: |
986 | 993 | wiki = WikiDump.Wiki(config, remainder[0]) |
987 | 994 | if forceLock: |
— | — | @@ -991,7 +998,7 @@ |
992 | 999 | wiki = findAndLockNextWiki(config) |
993 | 1000 | |
994 | 1001 | if wiki: |
995 | | - runner = Runner(wiki, date, checkpoint) |
| 1002 | + runner = Runner(wiki, date, checkpoint, prefetch) |
996 | 1003 | print "Running %s..." % wiki.dbName |
997 | 1004 | runner.run() |
998 | 1005 | wiki.unlock() |