r86298 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r86297‎ | r86298 | r86299 >
Date:10:40, 18 April 2011
Author:ariel
Status:deferred
Tags:
Comment:
more class cleanup (not happy with it, more coming later), remove some dead code
Modified paths:
  • /branches/ariel/xmldumps-backup/WikiDump.py (modified) (history)
  • /branches/ariel/xmldumps-backup/worker.py (modified) (history)

Diff [purge]

Index: branches/ariel/xmldumps-backup/WikiDump.py
@@ -9,75 +9,120 @@
1010 import time
1111 import tempfile
1212
13 -def fileAge(filename):
14 - return time.time() - os.stat(filename).st_mtime
 13+class FileUtils(object):
1514
16 -def atomicCreate(filename, mode='w'):
17 - """Create a file, aborting if it already exists..."""
18 - fd = os.open(filename, os.O_EXCL + os.O_CREAT + os.O_WRONLY)
19 - return os.fdopen(fd, mode)
 15+ def fileAge(filename):
 16+ return time.time() - os.stat(filename).st_mtime
2017
21 -def shellEscape(param):
22 - """Escape a string parameter, or set of strings, for the shell."""
23 - if isinstance(param, basestring):
24 - return "'" + param.replace("'", "'\\''") + "'"
25 - elif param is None:
26 - # A blank string might actually be needed; None means we can leave it out
27 - return ""
28 - else:
29 - return tuple([shellEscape(x) for x in param])
 18+ def atomicCreate(filename, mode='w'):
 19+ """Create a file, aborting if it already exists..."""
 20+ fd = os.open(filename, os.O_EXCL + os.O_CREAT + os.O_WRONLY)
 21+ return os.fdopen(fd, mode)
3022
31 -def prettySize(size):
32 - """Return a string with an attractively formatted file size."""
33 - quanta = ("%d bytes", "%d KB", "%0.1f MB", "%0.1f GB", "%0.1f TB")
34 - return _prettySize(size, quanta)
 23+ def writeFile(dirname, filename, text):
 24+ """Write text to a file, as atomically as possible, via a temporary file in the same directory."""
 25+
 26+ (fd, tempFilename ) = tempfile.mkstemp("_txt","wikidump_",dirname);
 27+ os.write(fd,text)
 28+ os.close(fd)
 29+ # This may fail across filesystems or on Windows.
 30+ # Of course nothing else will work on Windows. ;)
 31+ os.rename(tempFilename, filename)
3532
36 -def _prettySize(size, quanta):
37 - if size < 1024 or len(quanta) == 1:
38 - return quanta[0] % size
39 - else:
40 - return _prettySize(size / 1024.0, quanta[1:])
 33+ def readFile(filename):
 34+ """Read text from a file in one fell swoop."""
 35+ file = open(filename, "r")
 36+ text = file.read()
 37+ file.close()
 38+ return text
4139
42 -def today():
43 - return time.strftime("%Y%m%d", time.gmtime())
 40+ def splitPath(path):
 41+ # For some reason, os.path.split only does one level.
 42+ parts = []
 43+ (path, file) = os.path.split(path)
 44+ if not file:
 45+ # Probably a final slash
 46+ (path, file) = os.path.split(path)
 47+ while file:
 48+ parts.insert(0, file)
 49+ (path, file) = os.path.split(path)
 50+ return parts
4451
45 -def prettyTime():
46 - return time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())
 52+ def relativePath(path, base):
 53+ """Return a relative path to 'path' from the directory 'base'."""
 54+ path = FileUtils.splitPath(path)
 55+ base = FileUtils.splitPath(base)
 56+ while base and path[0] == base[0]:
 57+ path.pop(0)
 58+ base.pop(0)
 59+ for prefix in base:
 60+ path.insert(0, "..")
 61+ return os.path.join(*path)
4762
48 -def prettyDate(key):
49 - "Prettify a MediaWiki date key"
50 - return "-".join((key[0:4], key[4:6], key[6:8]))
 63+ def prettySize(size):
 64+ """Return a string with an attractively formatted file size."""
 65+ quanta = ("%d bytes", "%d KB", "%0.1f MB", "%0.1f GB", "%0.1f TB")
 66+ return FileUtils._prettySize(size, quanta)
5167
52 -def dumpFile(dirname, filename, text):
53 - """Dump a string to a file, as atomically as possible, via a temporary file in the same directory."""
54 -
55 - (fd, tempFilename ) = tempfile.mkstemp("_txt","wikidump_",dirname);
56 - os.write(fd,text)
57 - os.close(fd)
58 - # This may fail across filesystems or on Windows.
59 - # Of course nothing else will work on Windows. ;)
60 - os.rename(tempFilename, filename)
 68+ def _prettySize(size, quanta):
 69+ if size < 1024 or len(quanta) == 1:
 70+ return quanta[0] % size
 71+ else:
 72+ return FileUtils._prettySize(size / 1024.0, quanta[1:])
6173
62 -def readFile(filename):
63 - file = open(filename, "r")
64 - text = file.read()
65 - file.close()
66 - return text
 74+ fileAge = staticmethod(fileAge)
 75+ atomicCreate = staticmethod(atomicCreate)
 76+ writeFile = staticmethod(writeFile)
 77+ readFile = staticmethod(readFile)
 78+ splitPath = staticmethod(splitPath)
 79+ relativePath = staticmethod(relativePath)
 80+ prettySize = staticmethod(prettySize)
 81+ _prettySize = staticmethod(_prettySize)
6782
68 -def dbList(filename):
69 - """Read database list from a file"""
70 - if (not filename):
71 - return []
72 - infile = open(filename)
73 - dbs = []
74 - for line in infile:
75 - line = line.strip()
76 - if line != "":
77 - dbs.append(line)
78 - infile.close()
79 - dbs.sort()
80 - return dbs
 83+class TimeUtils(object):
8184
 85+ def today():
 86+ return time.strftime("%Y%m%d", time.gmtime())
 87+
 88+ def prettyTime():
 89+ return time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())
 90+
 91+ def prettyDate(key):
 92+ "Prettify a MediaWiki date key"
 93+ return "-".join((key[0:4], key[4:6], key[6:8]))
 94+
 95+ today = staticmethod(today)
 96+ prettyTime = staticmethod(prettyTime)
 97+ prettyDate = staticmethod(prettyDate)
 98+
 99+class MiscUtils(object):
 100+ def dbList(filename):
 101+ """Read database list from a file"""
 102+ if (not filename):
 103+ return []
 104+ infile = open(filename)
 105+ dbs = []
 106+ for line in infile:
 107+ line = line.strip()
 108+ if line != "":
 109+ dbs.append(line)
 110+ infile.close()
 111+ dbs.sort()
 112+ return dbs
 113+
 114+ def shellEscape(param):
 115+ """Escape a string parameter, or set of strings, for the shell."""
 116+ if isinstance(param, basestring):
 117+ return "'" + param.replace("'", "'\\''") + "'"
 118+ elif param is None:
 119+ # A blank string might actually be needed; None means we can leave it out
 120+ return ""
 121+ else:
 122+ return tuple([MiscUtils.shellEscape(x) for x in param])
 123+
 124+ dbList = staticmethod(dbList)
 125+ shellEscape = staticmethod(shellEscape)
 126+
82127 class Config(object):
83128 def __init__(self, configFile=False):
84129 home = os.path.dirname(sys.argv[0])
@@ -155,11 +200,11 @@
156201 print "The mandatory setting 'dir' in the section 'wiki' was not defined."
157202 raise ConfigParser.NoOptionError('wiki','dir')
158203
159 - self.dbList = dbList(conf.get("wiki", "dblist"))
160 - self.skipDbList = dbList(conf.get("wiki", "skipdblist"))
161 - self.privateList = dbList(conf.get("wiki", "privatelist"))
162 - self.bigList = dbList(conf.get("wiki", "biglist"))
163 - self.flaggedRevsList = dbList(conf.get("wiki", "flaggedrevslist"))
 204+ self.dbList = MiscUtils.dbList(conf.get("wiki", "dblist"))
 205+ self.skipDbList = MiscUtils.dbList(conf.get("wiki", "skipdblist"))
 206+ self.privateList = MiscUtils.dbList(conf.get("wiki", "privatelist"))
 207+ self.bigList = MiscUtils.dbList(conf.get("wiki", "biglist"))
 208+ self.flaggedRevsList = MiscUtils.dbList(conf.get("wiki", "flaggedrevslist"))
164209 self.wikiDir = conf.get("wiki", "dir")
165210 self.forceNormal = conf.getint("wiki", "forceNormal")
166211 self.halt = conf.getint("wiki", "halt")
@@ -248,8 +293,8 @@
249294 # tack on the file mtime so that if we have multiple wikis
250295 # dumped on the same day, they get ordered properly
251296 date = int(today()) - int(last)
252 - age = fileAge(dumpStatus)
253 - status = readFile(dumpStatus)
 297+ age = FileUtils.fileAge(dumpStatus)
 298+ status = FileUtils.readFile(dumpStatus)
254299 except:
255300 print "dump dir %s corrupt?" % dumpStatus
256301 dumpFailed = (status == '') or ('dump aborted' in status)
@@ -259,7 +304,7 @@
260305
261306 def readTemplate(self, name):
262307 template = os.path.join(self.templateDir, name)
263 - return readFile(template)
 308+ return FileUtils.readFile(template)
264309
265310 def mail(self, subject, body):
266311 """Send out a quickie email."""
@@ -330,7 +375,7 @@
331376 # Maybe it was just created (race condition)?
332377 if not os.path.isdir(self.privateDir()):
333378 raise
334 - f = atomicCreate(self.lockFile(), "w")
 379+ f = FileUtils.atomicCreate(self.lockFile(), "w")
335380 f.write("%s %d" % (socket.getfqdn(), os.getpid()))
336381 f.close()
337382
@@ -358,7 +403,7 @@
359404 def writePerDumpIndex(self, html):
360405 directory = os.path.join(self.publicDir(), self.date)
361406 index = os.path.join(self.publicDir(), self.date, self.config.perDumpIndex)
362 - dumpFile(directory, index, html)
 407+ FileUtils.writeFile(directory, index, html)
363408
364409 def existsPerDumpIndex(self):
365410 index = os.path.join(self.publicDir(), self.date, self.config.perDumpIndex)
@@ -367,14 +412,14 @@
368413 def writeStatus(self, message):
369414 directory = os.path.join(self.publicDir(), self.date)
370415 index = os.path.join(self.publicDir(), self.date, "status.html")
371 - dumpFile(directory, index, message)
 416+ FileUtils.writeFile(directory, index, message)
372417
373418 def statusLine(self):
374419 date = self.latestDump()
375420 if date:
376421 status = os.path.join(self.publicDir(), date, "status.html")
377422 try:
378 - return readFile(status)
 423+ return FileUtils.readFile(status)
379424 except:
380425 return self.reportStatusLine("missing status record")
381426 else:
@@ -383,9 +428,9 @@
384429 def reportStatusLine(self, status, error=False):
385430 if error:
386431 # No state information, hide the timestamp
387 - stamp = "<span style=\"visible: none\">" + prettyTime() + "</span>"
 432+ stamp = "<span style=\"visible: none\">" + TimeUtils.prettyTime() + "</span>"
388433 else:
389 - stamp = prettyTime()
 434+ stamp = TimeUtils.prettyTime()
390435 if self.isPrivate():
391436 link = "%s (private data)" % self.dbName
392437 else:
@@ -426,7 +471,7 @@
427472 return os.path.join(self.privateDir(), "lock")
428473
429474 def lockAge(self):
430 - return fileAge(self.lockFile())
 475+ return FileUtils.fileAge(self.lockFile())
431476
432477 class LockWatchdog(threading.Thread):
433478 """Touch the given file every 10 seconds until asked to stop."""
Index: branches/ariel/xmldumps-backup/worker.py
@@ -21,34 +21,13 @@
2222
2323 from os.path import dirname, exists, getsize, join, realpath
2424 from subprocess import Popen, PIPE
25 -from WikiDump import prettyTime, prettySize, shellEscape
 25+#from WikiDump import FileUtils, DirUtils, MiscUtils, prettyTime, prettySize, shellEscape
 26+from WikiDump import FileUtils, MiscUtils, TimeUtils
2627 from CommandManagement import CommandPipeline, CommandSeries, CommandsInParallel
2728
28 -def splitPath(path):
29 - # For some reason, os.path.split only does one level.
30 - parts = []
31 - (path, file) = os.path.split(path)
32 - if not file:
33 - # Probably a final slash
34 - (path, file) = os.path.split(path)
35 - while file:
36 - parts.insert(0, file)
37 - (path, file) = os.path.split(path)
38 - return parts
39 -
40 -def relativePath(path, base):
41 - """Return a relative path to 'path' from the directory 'base'."""
42 - path = splitPath(path)
43 - base = splitPath(base)
44 - while base and path[0] == base[0]:
45 - path.pop(0)
46 - base.pop(0)
47 - for prefix in base:
48 - path.insert(0, "..")
49 - return os.path.join(*path)
50 -
 29+# FIXME test this change.
5130 def xmlEscape(text):
52 - return text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
 31+ return text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace('"', "&quot;");
5332
5433 class Logger(object):
5534
@@ -183,7 +162,7 @@
184163
185164 def defaultServer(self):
186165 # if this fails what do we do about it? Not a bleeping thing. *ugh* FIXME!!
187 - command = "%s -q %s/maintenance/getSlaveServer.php --wiki=%s --group=dump" % shellEscape((
 166+ command = "%s -q %s/maintenance/getSlaveServer.php --wiki=%s --group=dump" % MiscUtils.shellEscape((
188167 self.config.php, self.config.wikiDir, self.dbName))
189168 return RunSimpleCommand.runAndReturn(command, self.errorCallback).strip()
190169
@@ -238,7 +217,7 @@
239218 def getDBTablePrefix(self):
240219 """Get the prefix for all tables for the specific wiki ($wgDBprefix)"""
241220 # FIXME later full path
242 - command = "echo 'print $wgDBprefix; ' | %s -q %s/maintenance/eval.php --wiki=%s" % shellEscape((
 221+ command = "echo 'print $wgDBprefix; ' | %s -q %s/maintenance/eval.php --wiki=%s" % MiscUtils.shellEscape((
243222 self.config.php, self.config.wikiDir, self.dbName))
244223 return RunSimpleCommand.runAndReturn(command, self.errorCallback).strip()
245224
@@ -257,37 +236,21 @@
258237 output = proc.fromchild.read()
259238 retval = proc.wait()
260239 while (retval and retries < maxretries):
261 - if self.logCallback:
262 - self.logCallback("Non-zero return code from '%s'" % command)
 240+ if logCallback:
 241+ logCallback("Non-zero return code from '%s'" % command)
263242 time.sleep(5)
264243 proc = popen2.Popen4(command, 64)
265244 output = proc.fromchild.read()
266245 retval = proc.wait()
267246 retries = retries + 1
268247 if retval:
269 - if self.logCallback:
270 - self.logCallback("Non-zero return code from '%s'" % command)
 248+ if logCallback:
 249+ logCallback("Non-zero return code from '%s'" % command)
271250 raise BackupError("Non-zero return code from '%s'" % command)
272251 else:
273252 return output
274253
275 - def runAndReport(self, command, callback):
276 - """Shell out to a command, and feed output lines to the callback function.
277 - Returns the exit code from the program once complete.
278 - stdout and stderr will be combined into a single stream.
279 - """
280 - # FIXME convert all these calls so they just use runCommand now
281 - proc = popen2.Popen4(command, 64)
282 - #for line in proc.fromchild:
283 - # callback(self, line)
284 - line = proc.fromchild.readline()
285 - while line:
286 - callback(self, line)
287 - line = proc.fromchild.readline()
288 - return proc.wait()
289 -
290254 runAndReturn = staticmethod(runAndReturn)
291 - runAndReport = staticmethod(runAndReport)
292255
293256 class PageAndEditStats(object):
294257 def __init__(self, wiki, dbName, errorCallback = None):
@@ -559,7 +522,7 @@
560523 def writeDumpRunInfoFile(self, text):
561524 directory = self._getDumpRunInfoDirName()
562525 dumpRunInfoFilename = self._getDumpRunInfoFileName()
563 - WikiDump.dumpFile(directory, dumpRunInfoFilename, text)
 526+ FileUtils.writeFile(directory, dumpRunInfoFilename, text)
564527
565528 def saveDumpRunInfoFile(self, done=False):
566529 """Write out a simple text file with the status for this wiki's dump."""
@@ -799,7 +762,7 @@
800763 message = self.config.readTemplate("errormail.txt") % {
801764 "db": self.dbName,
802765 "date": self.date,
803 - "time": prettyTime(),
 766+ "time": TimeUtils.prettyTime(),
804767 "url": "/".join((self.config.webRoot, self.dbName, self.date, ''))}
805768 config.mail(subject, message)
806769
@@ -820,7 +783,7 @@
821784 raise(ValueException)
822785 except:
823786 return "No prior dumps of this database stored."
824 - prettyDate = WikiDump.prettyDate(rawDate)
 787+ prettyDate = TimeUtils.prettyDate(rawDate)
825788 if done:
826789 prefix = ""
827790 message = "Last dumped on"
@@ -870,10 +833,10 @@
871834 def reportFile(self, file, itemStatus):
872835 filepath = self.dumpDir.publicPath(file)
873836 if itemStatus == "in-progress" and exists (filepath):
874 - size = prettySize(getsize(filepath))
 837+ size = FileUtils.prettySize(getsize(filepath))
875838 return "<li class='file'>%s %s (written) </li>" % (file, size)
876839 elif itemStatus == "done" and exists(filepath):
877 - size = prettySize(getsize(filepath))
 840+ size = FileUtils.prettySize(getsize(filepath))
878841 webpath = self.dumpDir.webPath(file)
879842 return "<li class='file'><a href=\"%s\">%s</a> %s</li>" % (webpath, file, size)
880843 else:
@@ -896,7 +859,7 @@
897860 # Override, continuing a past dump?
898861 self.date = date
899862 else:
900 - self.date = WikiDump.today()
 863+ self.date = TimeUtils.today()
901864 wiki.setDate(self.date)
902865
903866 self.lastFailed = False
@@ -931,7 +894,7 @@
932895 done = log.doJobOnLogQueue()
933896
934897 def logAndPrint(self, message):
935 - if (self.log):
 898+ if hasattr(self,'log') and self.log:
936899 self.log.addToLogQueue("%s\n" % message)
937900 print message
938901
@@ -990,8 +953,8 @@
991954 return 1
992955
993956 def debug(self, stuff):
994 - self.logAndPrint("%s: %s %s" % (prettyTime(), self.dbName, stuff))
995 -# print "%s: %s %s" % (prettyTime(), self.dbName, stuff)
 957+ self.logAndPrint("%s: %s %s" % (TimeUtils.prettyTime(), self.dbName, stuff))
 958+# print "%s: %s %s" % (MiscUtils.prettyTime(), self.dbName, stuff)
996959
997960 def runHandleFailure(self):
998961 if self.status.failCount < 1:
@@ -1226,7 +1189,7 @@
12271190 else:
12281191 self.logAndPrint("What the hell dude, %s is not a symlink" % link)
12291192 raise BackupError("What the hell dude, %s is not a symlink" % link)
1230 - relative = relativePath(real, dirname(link))
 1193+ relative = FileUtils.relativePath(real, dirname(link))
12311194 # if we removed the link cause it's obsolete, make the new one
12321195 if exists(real) and not exists(link):
12331196 self.debug("Adding symlink %s -> %s" % (link, relative))
@@ -1247,7 +1210,7 @@
12481211 "date": time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime())}
12491212 directory = self.dumpDir.latestDir()
12501213 rssPath = self.dumpDir.latestPath(file + "-rss.xml")
1251 - WikiDump.dumpFile(directory, rssPath, rssText)
 1214+ FileUtils.writeFile(directory, rssPath, rssText)
12521215
12531216 class Dump(object):
12541217 def __init__(self, name, desc):
@@ -1279,7 +1242,7 @@
12801243 def setStatus(self,status,setUpdated = True):
12811244 self.runInfo.setStatus(status)
12821245 if (setUpdated):
1283 - self.runInfo.setUpdated(prettyTime())
 1246+ self.runInfo.setUpdated(TimeUtils.prettyTime())
12841247
12851248 def setUpdated(self, updated):
12861249 self.runInfo.setUpdated(updated)
@@ -1345,18 +1308,18 @@
13461309 # we assume the result is always going to be run in a subshell.
13471310 # much quicker than this script trying to read output
13481311 # and pass it to a subprocess
1349 - outputFilenameEsc = shellEscape(outputFilename)
1350 - headEsc = shellEscape(head)
1351 - tailEsc = shellEscape(tail)
1352 - grepEsc = shellEscape(grep)
 1312+ outputFilenameEsc = MiscUtils.shellEscape(outputFilename)
 1313+ headEsc = MiscUtils.shellEscape(head)
 1314+ tailEsc = MiscUtils.shellEscape(tail)
 1315+ grepEsc = MiscUtils.shellEscape(grep)
13531316
13541317 uncompressionCommandEsc = uncompressionCommand[:]
13551318 for u in uncompressionCommandEsc:
1356 - u = shellEscape(u)
 1319+ u = MiscUtils.shellEscape(u)
13571320 for u in compressionCommand:
1358 - u = shellEscape(u)
 1321+ u = MiscUtils.shellEscape(u)
13591322 for f in files:
1360 - f = shellEscape(f)
 1323+ f = MiscUtils.shellEscape(f)
13611324
13621325 for f in files:
13631326 f = runner.dumpDir.publicPath(f)
@@ -1690,7 +1653,7 @@
16911654 pipeline.append(uncompressionCommand)
16921655 # warning: we figure any header (<siteinfo>...</siteinfo>) is going to be less than 2000 lines!
16931656 head = runner.config.head
1694 - headEsc = shellEscape(head)
 1657+ headEsc = MiscUtils.shellEscape(head)
16951658 pipeline.append([ head, "-2000"])
16961659 # without shell
16971660 p = CommandPipeline(pipeline, quiet=True)

Status & tagging log