r69443 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r69442‎ | r69443 | r69444 >
Date:21:22, 16 July 2010
Author:mah
Status:deferred (Comments)
Tags:
Comment:
re r66041: Simplify mysqldump command by using skip-* to specify the bits of “opt” to skip”
Modified paths:
  • /trunk/backup/worker.py (modified) (history)

Diff [purge]

Index: trunk/backup/worker.py
@@ -23,7 +23,7 @@
2424 parts.insert(0, file)
2525 (path, file) = os.path.split(path)
2626 return parts
27 -
 27+
2828 def relativePath(path, base):
2929 """Return a relative path to 'path' from the directory 'base'."""
3030 path = splitPath(path)
@@ -56,7 +56,7 @@
5757 pass
5858
5959 class Runner(object):
60 -
 60+
6161 def __init__(self, wiki, date=None, checkpoint=None, prefetch=True, spawn=True):
6262 self.wiki = wiki
6363 self.config = wiki.config
@@ -68,12 +68,12 @@
6969 else:
7070 self.date = WikiDump.today()
7171 wiki.setDate(self.date)
72 -
 72+
7373 self.failCount = 0
7474 self.lastFailed = False
75 -
 75+
7676 self.checkpoint = checkpoint
77 -
 77+
7878 def passwordOption(self):
7979 """If you pass '-pfoo' mysql uses the password 'foo',
8080 but if you pass '-p' it prompts. Sigh."""
@@ -81,13 +81,13 @@
8282 return None
8383 else:
8484 return "-p" + self.config.dbPassword
85 -
 85+
8686 def forceNormalOption(self):
8787 if self.config.forceNormal:
8888 return "--force-normal"
8989 else:
9090 return ""
91 -
 91+
9292 def getDBTablePrefix(self):
9393 """Get the prefix for all tables for the specific wiki ($wgDBprefix)"""
9494 command = "echo 'print $wgDBprefix; ' | %s -q %s/maintenance/eval.php --wiki=%s" % shellEscape((
@@ -96,14 +96,14 @@
9797
9898 def saveTable(self, table, outfile):
9999 """Dump a table from the current DB with mysqldump, save to a gzipped sql file."""
100 - command = "mysqldump -h %s -u %s %s --extended-insert --skip-opt --quick --create-options --add-drop-table --extended-insert --set-charset --quote-names %s %s | gzip" % shellEscape((
 100+ command = "mysqldump -h %s -u %s %s --opt --quick --skip-add-locks --skip-lock-tables %s %s | gzip" % shellEscape((
101101 self.dbServer,
102102 self.config.dbUser,
103103 self.passwordOption(),
104104 self.dbName,
105105 self.getDBTablePrefix() + table))
106106 return self.saveCommand(command, outfile, pipe=True)
107 -
 107+
108108 def saveSql(self, query, outfile):
109109 """Pass some SQL commands to the server for this DB and save output to a file."""
110110 command = "echo %s | mysql -h %s -u %s %s %s -r | gzip" % shellEscape((
@@ -113,11 +113,11 @@
114114 self.passwordOption(),
115115 self.dbName))
116116 return self.saveCommand(command, outfile, pipe=True)
117 -
 117+
118118 def saveCommand(self, command, outfile, pipe=False):
119119 """Shell out and redirect output to a given file."""
120120 return self.runCommand(command + " > " + shellEscape(outfile), pipe)
121 -
 121+
122122 def runCommand(self, command, pipe=False, callback=None):
123123 """Shell out; output is assumed to be saved usefully somehow.
124124 Nonzero return code from the shell will raise a BackupError.
@@ -135,7 +135,7 @@
136136 if retval:
137137 raise BackupError("nonzero return code from '%s'" % command)
138138 return retval
139 -
 139+
140140 def runAndReport(self, command, callback):
141141 """Shell out to a command, and feed output lines to the callback function.
142142 Returns the exit code from the program once complete.
@@ -149,7 +149,7 @@
150150 callback(self, line)
151151 line = proc.fromchild.readline()
152152 return proc.wait()
153 -
 153+
154154 def runAndReturn(self, command):
155155 """Run a command and return the output as a string.
156156 Raises BackupError on non-zero return code."""
@@ -160,16 +160,16 @@
161161 raise BackupError("Non-zero return code from '%s'" % command)
162162 else:
163163 return output
164 -
 164+
165165 def debug(self, stuff):
166166 print "%s: %s %s" % (prettyTime(), self.dbName, stuff)
167 -
 167+
168168 def buildDir(self, base, version):
169169 return join(base, self.dbName, version)
170 -
 170+
171171 def buildPath(self, base, version, filename):
172172 return join(base, version, "%s-%s-%s" % (self.dbName, version, filename))
173 -
 173+
174174 def privatePath(self, filename):
175175 """Take a given filename in the private dump dir for the selected database."""
176176 return self.buildPath(self.wiki.privateDir(), self.date, filename)
@@ -179,38 +179,38 @@
180180 If this database is marked as private, will use the private dir instead.
181181 """
182182 return self.buildPath(self.wiki.publicDir(), self.date, filename)
183 -
 183+
184184 def latestPath(self, filename):
185185 return self.buildPath(self.wiki.publicDir(), "latest", filename)
186 -
 186+
187187 def webPath(self, filename):
188188 return self.buildPath(self.wiki.webDir(), self.date, filename)
189 -
 189+
190190 def makeDir(self, dir):
191191 if exists(dir):
192192 self.debug("Checkdir dir %s ..." % dir)
193193 else:
194194 self.debug("Creating %s ..." % dir)
195195 os.makedirs(dir)
196 -
 196+
197197 def selectDatabaseServer(self):
198198 self.dbServer = self.defaultServer()
199 -
 199+
200200 def defaultServer(self):
201201 command = "%s -q %s/maintenance/getSlaveServer.php --wiki=%s --group=dump" % shellEscape((
202202 self.config.php, self.config.wikiDir, self.dbName))
203203 return self.runAndReturn(command).strip()
204 -
 204+
205205 def run(self):
206206 self.makeDir(join(self.wiki.publicDir(), self.date))
207207 self.makeDir(join(self.wiki.privateDir(), self.date))
208 -
 208+
209209 self.status("Cleaning up old dumps for %s" % self.dbName)
210210 self.cleanOldDumps()
211 -
 211+
212212 self.status("Starting backup of %s" % self.dbName)
213213 self.selectDatabaseServer()
214 -
 214+
215215 self.items = [PrivateTable("user", "User account data."),
216216 PrivateTable("watchlist", "Users' watchlist settings."),
217217 PrivateTable("ipblocks", "Data for blocks of IP addresses, ranges, and users."),
@@ -219,7 +219,7 @@
220220 PrivateTable("logging", "Data for various events (deletions, uploads, etc)."),
221221 #PrivateTable("oldimage", "Metadata on prior versions of uploaded images."),
222222 #PrivateTable("filearchive", "Deleted image data"),
223 -
 223+
224224 PublicTable("site_stats", "A few statistics such as the page count."),
225225 PublicTable("image", "Metadata on current versions of uploaded images."),
226226 PublicTable("oldimage", "Metadata on prior versions of uploaded images."),
@@ -232,7 +232,7 @@
233233 PublicTable("interwiki", "Set of defined interwiki prefixes and links for this wiki."),
234234 PublicTable("user_groups", "User group assignments."),
235235 PublicTable("category", "Category information."),
236 -
 236+
237237 PublicTable("page", "Base per-page data (id, title, old restrictions, etc)."),
238238 PublicTable("page_restrictions", "Newer per-page restrictions table."),
239239 PublicTable("page_props", "Name/value pairs for pages."),
@@ -240,11 +240,11 @@
241241 #PublicTable("revision", "Base per-revision data (does not include text)."), // safe?
242242 #PrivateTable("text", "Text blob storage. May be compressed, etc."), // ?
243243 PublicTable("redirect", "Redirect list"),
244 -
 244+
245245 TitleDump("List of page titles"),
246 -
 246+
247247 AbstractDump("Extracted page abstracts for Yahoo"),
248 -
 248+
249249 XmlStub("First-pass for page XML data dumps"),
250250 XmlDump("articles",
251251 "<big><b>Articles, templates, image descriptions, and primary meta-pages.</b></big>",
@@ -257,7 +257,7 @@
258258 self.items.append(
259259 PublicTable( "flaggedpages", "This contains a row for each flagged article, containing the stable revision ID, if the lastest edit was flagged, and how long edits have been pending." ))
260260 self.items.append(
261 - PublicTable( "flaggedrevs", "This contains a row for each flagged revision, containing who flagged it, when it was flagged, reviewer comments, the flag values, and the quality tier those flags fall under." ))
 261+ PublicTable( "flaggedrevs", "This contains a row for each flagged revision, containing who flagged it, when it was flagged, reviewer comments, the flag values, and the quality tier those flags fall under." ))
262262
263263 if not self.wiki.isBig():
264264 self.items.append(
@@ -270,10 +270,10 @@
271271 "All pages with complete edit history (.7z)",
272272 "These dumps can be *very* large, uncompressing up to 100 times the archive download size. " +
273273 "Suitable for archival and statistical use, most mirror sites won't want or need this."))
274 -
 274+
275275 files = self.listFilesFor(self.items)
276276 self.prepareChecksums()
277 -
 277+
278278 for item in self.items:
279279 item.start(self)
280280 self.updateStatusFiles()
@@ -306,9 +306,9 @@
307307
308308 if self.failCount < 1:
309309 self.completeDump(files)
310 -
 310+
311311 self.statusComplete()
312 -
 312+
313313 def cleanOldDumps(self):
314314 old = self.wiki.dumpDirs()
315315 if old:
@@ -327,7 +327,7 @@
328328 self.runCommand(command)
329329 else:
330330 self.status("No old dumps to purge.")
331 -
 331+
332332 def reportFailure(self):
333333 if self.config.adminMail:
334334 subject = "Dump failure for " + self.dbName
@@ -337,27 +337,27 @@
338338 "time": prettyTime(),
339339 "url": "/".join((self.config.webRoot, self.dbName, self.date, ''))}
340340 config.mail(subject, message)
341 -
 341+
342342 def listFilesFor(self, items):
343343 files = []
344344 for item in items:
345345 for file in item.listFiles(self):
346346 files.append(file)
347347 return files
348 -
 348+
349349 def updateStatusFiles(self, done=False):
350350 self.saveStatus(self.items, done)
351 -
 351+
352352 def saveStatus(self, items, done=False):
353353 """Write out an HTML file with the status for this wiki's dump and links to completed files."""
354 - try:
 354+ try:
355355 self.wiki.writeIndex(self.reportStatus(items, done))
356 -
 356+
357357 # Short line for report extraction
358358 self.wiki.writeStatus(self.reportDatabase(items, done))
359359 except:
360360 print "Couldn't update status files. Continuing anyways"
361 -
 361+
362362 def progressReports(self):
363363 status = {}
364364 for db in self.dblist:
@@ -366,7 +366,7 @@
367367 status[db] = item
368368 # sorted by name...
369369 return [status[db] for db in self.dblist if db in status]
370 -
 370+
371371 def readProgress(self, db):
372372 dir = self.latestDump(db)
373373 if dir:
@@ -378,18 +378,18 @@
379379 else:
380380 self.debug("No dump dir for %s?" % db)
381381 return None
382 -
 382+
383383 def reportDatabase(self, items, done=False):
384384 """Put together a brief status summary and link for the current database."""
385385 status = self.reportStatusLine(done)
386386 html = self.wiki.reportStatusLine(status)
387 -
 387+
388388 activeItems = [x for x in items if x.status == "in-progress"]
389389 if activeItems:
390390 return html + "<ul>" + "\n".join([self.reportItem(x) for x in activeItems]) + "</ul>"
391391 else:
392392 return html
393 -
 393+
394394 def reportStatus(self, items, done=False):
395395 """Put together a status page for this database, with all its component dumps."""
396396 statusItems = [self.reportItem(item) for item in items]
@@ -403,7 +403,7 @@
404404 "items": html,
405405 "checksum": self.webPath("md5sums.txt"),
406406 "index": self.config.index}
407 -
 407+
408408 def reportPreviousDump(self, done):
409409 """Produce a link to the previous dump, if any"""
410410 try:
@@ -418,7 +418,7 @@
419419 prefix = "This dump is in progress; see also the "
420420 message = "previous dump from"
421421 return "%s<a href=\"../%s/\">%s %s</a>" % (prefix, raw, message, date)
422 -
 422+
423423 def reportStatusLine(self, done=False):
424424 if done:
425425 classes = "done"
@@ -434,7 +434,7 @@
435435 ess = "s"
436436 text += ", %d item%s failed" % (self.failCount, ess)
437437 return "<span class='%s'>%s</span>" % (classes, text)
438 -
 438+
439439 def reportItem(self, item):
440440 """Return an HTML fragment with info on the progress of this item."""
441441 html = "<li class='%s'><span class='updates'>%s</span> <span class='status'>%s</span> <span class='title'>%s</span>" % (item.status, item.updated, item.status, item.description())
@@ -451,8 +451,8 @@
452452 html += "</ul>"
453453 html += "</li>"
454454 return html
455 -
456 - # Report on the file size & status of the current output and output a link if were done
 455+
 456+ # Report on the file size & status of the current output and output a link if were done
457457 def reportFile(self, file, status):
458458 filepath = self.publicPath(file)
459459 if status == "in-progress" and exists (filepath):
@@ -464,13 +464,13 @@
465465 return "<li class='file'><a href=\"%s\">%s</a> %s</li>" % (webpath, file, size)
466466 else:
467467 return "<li class='missing'>%s</li>" % file
468 -
 468+
469469 def lockFile(self):
470470 return self.publicPath("lock")
471 -
 471+
472472 def doneFile(self):
473473 return self.publicPath("done")
474 -
 474+
475475 def lock(self):
476476 self.status("Creating lock file.")
477477 lockfile = self.lockFile()
@@ -486,46 +486,46 @@
487487 # failure? let it die
488488 pass
489489 #####date -u > $StatusLockFile
490 -
 490+
491491 def unlock(self):
492492 self.status("Marking complete.")
493493 ######date -u > $StatusDoneFile
494 -
 494+
495495 def dateStamp(self):
496496 #date -u --iso-8601=seconds
497497 pass
498 -
 498+
499499 def status(self, message):
500500 #echo $DatabaseName `dateStamp` OK: "$1" | tee -a $StatusLog | tee -a $GlobalLog
501501 self.debug(message)
502 -
 502+
503503 def statusComplete(self):
504504 # echo $DatabaseName `dateStamp` SUCCESS: "done." | tee -a $StatusLog | tee -a $GlobalLog
505505 self.debug("SUCCESS: done.")
506 -
 506+
507507 def prepareChecksums(self):
508508 """Create the md5 checksum file at the start of the run.
509509 This will overwrite a previous run's output, if any."""
510510 output = file(self.publicPath("md5sums.txt"), "w")
511 -
 511+
512512 def checksum(self, filename):
513513 """Run checksum for an output file, and append to the list."""
514514 output = file(self.publicPath("md5sums.txt"), "a")
515515 self.saveChecksum(filename, output)
516516 output.close()
517 -
 517+
518518 def saveChecksum(self, file, output):
519519 self.debug("Checksumming %s" % file)
520520 path = self.publicPath(file)
521521 if os.path.exists(path):
522522 checksum = md5FileLine(path)
523523 output.write(checksum)
524 -
 524+
525525 def completeDump(self, files):
526526 # FIXME: md5sums.txt won't be consistent with mixed data.
527527 # Buuuuut life sucks, huh?
528528 self.saveSymlink("md5sums.txt")
529 -
 529+
530530 def saveSymlink(self, file):
531531 self.makeDir(join(self.wiki.publicDir(), 'latest'))
532532 real = self.publicPath(file)
@@ -539,7 +539,7 @@
540540 relative = relativePath(real, dirname(link))
541541 self.debug("Adding symlink %s -> %s" % (link, relative))
542542 os.symlink(relative, link)
543 -
 543+
544544 def saveFeed(self, file):
545545 self.makeDir(join(self.wiki.publicDir(), 'latest'))
546546 filePath = self.webPath(file)
@@ -562,26 +562,26 @@
563563 self.updated = ""
564564 self.status = "waiting"
565565 self.progress = ""
566 -
 566+
567567 def description(self):
568568 return self._desc
569 -
 569+
570570 def detail(self):
571571 """Optionally return additional text to appear under the heading."""
572572 return None
573 -
 573+
574574 def setStatus(self, status):
575575 self.status = status
576576 self.updated = prettyTime()
577 -
 577+
578578 def listFiles(self, runner):
579579 """Return a list of filenames which should be exported and checksummed"""
580580 return []
581 -
 581+
582582 def start(self, runner):
583583 """Set the 'in progress' flag so we can output status."""
584584 self.setStatus("in-progress")
585 -
 585+
586586 def dump(self, runner):
587587 """Attempt to run the operation, updating progress/status info."""
588588 try:
@@ -590,52 +590,52 @@
591591 self.setStatus("failed")
592592 raise ex
593593 self.setStatus("done")
594 -
 594+
595595 def run(self, runner):
596596 """Actually do something!"""
597597 pass
598 -
 598+
599599 def progressCallback(self, runner, line):
600600 """Receive a status line from a shellout and update the status files."""
601601 # pass through...
602602 sys.stderr.write(line)
603603 self.progress = line.strip()
604604 runner.updateStatusFiles()
605 -
 605+
606606 def matchCheckpoint(self, checkpoint):
607607 return checkpoint == self.__class__.__name__
608608
609609 class PublicTable(Dump):
610610 """Dump of a table using MySQL's mysqldump utility."""
611 -
 611+
612612 def __init__(self, table, desc):
613613 Dump.__init__(self, desc)
614614 self._table = table
615 -
 615+
616616 def _file(self):
617617 return self._table + ".sql.gz"
618 -
 618+
619619 def _path(self, runner):
620620 return runner.publicPath(self._file())
621 -
 621+
622622 def run(self, runner):
623623 return runner.saveTable(self._table, self._path(runner))
624 -
 624+
625625 def listFiles(self, runner):
626626 return [self._file()]
627 -
 627+
628628 def matchCheckpoint(self, checkpoint):
629629 return checkpoint == self.__class__.__name__ + "." + self._table
630630
631631 class PrivateTable(PublicTable):
632632 """Hidden table dumps for private data."""
633 -
 633+
634634 def description(self):
635635 return self._desc + " (private)"
636 -
 636+
637637 def _path(self, runner):
638638 return runner.privatePath(self._file())
639 -
 639+
640640 def listFiles(self, runner):
641641 """Private table won't have public files to list."""
642642 return []
@@ -645,18 +645,18 @@
646646 """Create lightweight skeleton dumps, minus bulk text.
647647 A second pass will import text from prior dumps or the database to make
648648 full files for the public."""
649 -
 649+
650650 def description(self):
651651 return "Creating split stub dumps..."
652 -
 652+
653653 def detail(self):
654654 return "These files contain no page text, only revision metadata."
655 -
 655+
656656 def listFiles(self, runner):
657657 return ["stub-meta-history.xml.gz",
658658 "stub-meta-current.xml.gz",
659659 "stub-articles.xml.gz",]
660 -
 660+
661661 def run(self, runner):
662662 history = runner.publicPath("stub-meta-history.xml.gz")
663663 current = runner.publicPath("stub-meta-current.xml.gz")
@@ -688,20 +688,20 @@
689689 history,
690690 current,
691691 articles))
692 - runner.runCommand(command, callback=self.progressCallback)
 692+ runner.runCommand(command, callback=self.progressCallback)
693693
694694 class XmlLogging(Dump):
695695 """ Create a logging dump of all page activity """
696 -
 696+
697697 def description(self):
698698 return "<big><b>Log events to all pages.</big></b>"
699 -
 699+
700700 def detail(self):
701701 return "This contains the log of actions performed on pages."
702 -
 702+
703703 def listFiles(self, runner):
704704 return ["pages-logging.xml.gz"]
705 -
 705+
706706 def run(self, runner):
707707 logging = runner.publicPath("pages-logging.xml.gz")
708708 if exists(logging):
@@ -735,24 +735,24 @@
736736 def detail(self):
737737 """Optionally return additional text to appear under the heading."""
738738 return self._detail
739 -
 739+
740740 def _file(self, ext):
741741 return "pages-" + self._subset + ".xml." + ext
742 -
 742+
743743 def _path(self, runner, ext):
744744 return runner.publicPath(self._file(ext))
745 -
 745+
746746 def run(self, runner):
747747 filters = self.buildFilters(runner)
748748 command = self.buildCommand(runner)
749749 eta = self.buildEta(runner)
750750 return runner.runCommand(command + " " + filters + " " + eta,
751751 callback=self.progressCallback)
752 -
 752+
753753 def buildEta(self, runner):
754754 """Tell the dumper script whether to make ETA estimate on page or revision count."""
755755 return "--current"
756 -
 756+
757757 def buildFilters(self, runner):
758758 """Construct the output filter options for dumpTextPass.php"""
759759 xmlbz2 = self._path(runner, "bz2")
@@ -761,14 +761,14 @@
762762 else:
763763 bz2mode = "bzip2"
764764 return "--output=%s:%s" % shellEscape((bz2mode, xmlbz2))
765 -
 765+
766766 def buildCommand(self, runner):
767767 """Build the command line for the dump, minus output and filter options"""
768 -
 768+
769769 # Page and revision data pulled from this skeleton dump...
770770 stub = runner.publicPath("stub-%s.xml.gz" % self._subset),
771771 stubOption = "--stub=gzip:%s" % stub
772 -
 772+
773773 # Try to pull text from the previous run; most stuff hasn't changed
774774 #Source=$OutputDir/pages_$section.xml.bz2
775775 if self._prefetch:
@@ -806,7 +806,7 @@
807807 spawn))
808808 command = dumpCommand
809809 return command
810 -
 810+
811811 def _findPreviousDump(self, runner):
812812 """The previously-linked previous successful dump."""
813813 bzfile = self._file("bz2")
@@ -830,52 +830,52 @@
831831 return old
832832 runner.debug("Could not locate a prefetchable dump.")
833833 return None
834 -
 834+
835835 def listFiles(self, runner):
836836 return [self._file("bz2")]
837 -
 837+
838838 def matchCheckpoint(self, checkpoint):
839839 return checkpoint == self.__class__.__name__ + "." + self._subset
840840
841841 class BigXmlDump(XmlDump):
842842 """XML page dump for something larger, where a 7-Zip compressed copy
843843 could save 75% of download time for some users."""
844 -
 844+
845845 def buildEta(self, runner):
846846 """Tell the dumper script whether to make ETA estimate on page or revision count."""
847847 return "--full"
848848
849849 class XmlRecompressDump(Dump):
850850 """Take a .bz2 and recompress it as 7-Zip."""
851 -
 851+
852852 def __init__(self, subset, desc, detail):
853853 Dump.__init__(self, desc)
854854 self._subset = subset
855855 self._detail = detail
856 -
 856+
857857 def detail(self):
858858 """Optionally return additional text to appear under the heading."""
859859 return self._detail
860 -
 860+
861861 def _file(self, ext):
862862 return "pages-" + self._subset + ".xml." + ext
863 -
 863+
864864 def _path(self, runner, ext):
865865 return runner.publicPath(self._file(ext))
866 -
 866+
867867 def run(self, runner):
868868 if runner.lastFailed:
869869 raise BackupError("bz2 dump incomplete, not recompressing")
870 -
 870+
871871 xmlbz2 = self._path(runner, "bz2")
872872 xml7z = self._path(runner, "7z")
873 -
 873+
874874 # Clear prior 7zip attempts; 7zip will try to append an existing archive
875875 if exists(xml7z):
876876 os.remove(xml7z)
877 -
 877+
878878 # temp hack force 644 permissions until ubuntu bug # 370618 is fixed - tomasz 5/1/2009
879 - command = "%s -dc < %s | %s a -si %s ; chmod 644 %s" % shellEscape((
 879+ command = "%s -dc < %s | %s a -si %s ; chmod 644 %s" % shellEscape((
880880 runner.config.bzip2,
881881 xmlbz2,
882882 runner.config.sevenzip,
@@ -883,16 +883,16 @@
884884 xml7z));
885885
886886 return runner.runCommand(command, callback=self.progressCallback)
887 -
 887+
888888 def listFiles(self, runner):
889889 return [self._file("7z")]
890 -
 890+
891891 def matchCheckpoint(self, checkpoint):
892892 return checkpoint == self.__class__.__name__ + "." + self._subset
893893
894894 class AbstractDump(Dump):
895895 """XML dump for Yahoo!'s Active Abstracts thingy"""
896 -
 896+
897897 def run(self, runner):
898898 command = """
899899 %s -q %s/maintenance/dumpBackup.php \
@@ -919,7 +919,7 @@
920920 self._variantOption(variant)))
921921 command = command + "\n"
922922 runner.runCommand(command, callback=self.progressCallback)
923 -
 923+
924924 def _variants(self, runner):
925925 # If the database name looks like it's marked as Chinese language,
926926 # return a list including Simplified and Traditional versions, so
@@ -928,28 +928,28 @@
929929 return ("", "zh-cn", "zh-tw")
930930 else:
931931 return ("",)
932 -
 932+
933933 def _variantOption(self, variant):
934934 if variant == "":
935935 return ""
936936 else:
937937 return ":variant=%s" % variant
938 -
 938+
939939 def _variantFile(self, variant):
940940 if variant == "":
941941 return "abstract.xml"
942942 else:
943943 return "abstract-%s.xml" % variant
944 -
 944+
945945 def listFiles(self, runner):
946946 return [self._variantFile(x) for x in self._variants(runner)]
947 -
 947+
948948 class TitleDump(Dump):
949949 """This is used by "wikiproxy", a program to add Wikipedia links to BBC news online"""
950950 def run(self, runner):
951951 return runner.saveSql("select page_title from page where page_namespace=0;",
952952 runner.publicPath("all-titles-in-ns0.gz"))
953 -
 953+
954954 def listFiles(self, runner):
955955 return ["all-titles-in-ns0.gz"]
956956
@@ -958,12 +958,12 @@
959959 if config.halt:
960960 print "Dump process halted by config."
961961 return None
962 -
 962+
963963 next = config.dbListByAge()
964964 next.reverse()
965965
966966 print "Finding oldest unlocked wiki..."
967 -
 967+
968968 for db in next:
969969 wiki = WikiDump.Wiki(config, db)
970970 try:
@@ -973,11 +973,11 @@
974974 print "Couldn't lock %s, someone else must have got it..." % db
975975 continue
976976 return None
977 -
 977+
978978 if __name__ == "__main__":
979979 try:
980980 config = WikiDump.Config()
981 -
 981+
982982 date = None
983983 checkpoint = None
984984 forceLock = False
@@ -1006,7 +1006,7 @@
10071007 wiki.lock()
10081008 else:
10091009 wiki = findAndLockNextWiki(config)
1010 -
 1010+
10111011 if wiki:
10121012 runner = Runner(wiki, date, checkpoint, prefetch, spawn)
10131013 print "Running %s..." % wiki.dbName

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r66041removing mysqldump lockingtomasz19:10, 7 May 2010

Comments

#Comment by Simetrical (talk | contribs)   02:36, 19 July 2010

Not really related to this commit specifically, but why on Earth is this script doing a separate mysqldump for each table? It should be doing mysqldump --single-transaction on the whole database. Otherwise you guarantee an inconsistent dump.

#Comment by MZMcBride (talk | contribs)   02:25, 20 July 2010

This diff is really annoying.

#Comment by Tim Starling (talk | contribs)   02:28, 20 July 2010

Then use svn diff -x-w.

#Comment by Krinkle (talk | contribs)   02:33, 20 July 2010

ViewVC shows it nicely aswell http://tinyurl.com/wmsvn-dif69443

#Comment by MarkAHershberger (talk | contribs)   00:00, 21 July 2010

I've looked at providing CodeReview with the option of toggling "-x-w" off and on, but the PECL extension used here doesn't allow us to pass options.

Status & tagging log