r105887 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r105886‎ | r105887 | r105888 >
Date:15:23, 12 December 2011
Author:ariel
Status:deferred
Tags:
Comment:
run specified query on list of wikis, one gzipped output file for each, files named by date and project
Modified paths:
  • /branches/ariel/xmldumps-backup/wikiqueries (added) (history)
  • /branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.conf.sample (added) (history)
  • /branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.py (added) (history)

Diff [purge]

Index: branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.conf.sample
@@ -0,0 +1,23 @@
 2+[wiki]
 3+mediawiki=/home/wmf/mediawiki/1.18
 4+allwikislist=/home/wmf/conf/all.dblist
 5+privatewikislist=/home/wmf/conf/private.dblist
 6+closedwikislist=/home/wmf/conf/closed.dblist
 7+
 8+[output]
 9+wikiqueriesdir=/home/wmf/output/files
 10+temp=/var/tmp
 11+fileperms=0644
 12+
 13+[database]
 14+user=dbadmin
 15+password=XXXXX
 16+
 17+[tools]
 18+php=/usr/bin/php
 19+mysql=/usr/bin/mysql
 20+gzip=/usr/bin/gzip
 21+bzip2=/usr/bin/bzip2
 22+
 23+[query]
 24+queryfile=/home/wmf/scripts/query.sql
Index: branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.py
@@ -0,0 +1,352 @@
 2+# for every wiki, run a specified query, gzipping the output.
 3+# there's a config file which needs to be set up.
 4+
 5+import getopt
 6+import os
 7+import re
 8+import sys
 9+import ConfigParser
 10+import subprocess
 11+import socket
 12+import time
 13+from subprocess import Popen, PIPE
 14+from os.path import exists
 15+import hashlib
 16+import traceback
 17+import shutil
 18+
 19+class ContentFile(object):
 20+ def __init__(self, config, date, wikiName):
 21+ self._config = config
 22+ self.date = date
 23+ self.queryDir = QueryDir(self._config)
 24+ self.wikiName = wikiName
 25+
 26+ # override this.
 27+ def getFileName(self):
 28+ return "content.txt"
 29+
 30+ def getPath(self):
 31+ return os.path.join(self.queryDir.getQueryDir(),self.getFileName())
 32+
 33+class OutputFile(ContentFile):
 34+ def getFileName(self):
 35+ return "%s-%s-wikiquery.gz" % ( self.wikiName, self.date )
 36+
 37+class Config(object):
 38+ def __init__(self, configFile=False):
 39+ self.projectName = False
 40+
 41+ home = os.path.dirname(sys.argv[0])
 42+ if (not configFile):
 43+ configFile = "wikiqueries.conf"
 44+ self.files = [
 45+ os.path.join(home,configFile),
 46+ "/etc/wikqueries.conf",
 47+ os.path.join(os.getenv("HOME"), ".wikiqueries.conf")]
 48+ defaults = {
 49+ #"wiki": {
 50+ "allwikislist": "",
 51+ "privatewikislist": "",
 52+ "closedwikislist": "",
 53+ #"output": {
 54+ "wikiqueriesdir": "/wikiqueries",
 55+ "temp":"/wikiqueries/temp",
 56+ "fileperms": "0640",
 57+ #"database": {
 58+ "user": "root",
 59+ "password": "",
 60+ #"tools": {
 61+ "php": "/bin/php",
 62+ "gzip": "/usr/bin/gzip",
 63+ "bzip2": "/usr/bin/bzip2",
 64+ "mysql": "/usr/bin/mysql",
 65+ "multiversion": "",
 66+ #"query":{
 67+ "queryfile": "wikiquery.sql"
 68+ }
 69+
 70+ self.conf = ConfigParser.SafeConfigParser(defaults)
 71+ self.conf.read(self.files)
 72+
 73+ if not self.conf.has_section("wiki"):
 74+ print "The mandatory configuration section 'wiki' was not defined."
 75+ raise ConfigParser.NoSectionError('wiki')
 76+
 77+ if not self.conf.has_option("wiki","mediawiki"):
 78+ print "The mandatory setting 'mediawiki' in the section 'wiki' was not defined."
 79+ raise ConfigParser.NoOptionError('wiki','mediawiki')
 80+
 81+ self.parseConfFile()
 82+
 83+ def parseConfFile(self):
 84+ self.mediawiki = self.conf.get("wiki", "mediawiki")
 85+ self.allWikisList = MiscUtils.dbList(self.conf.get("wiki", "allwikislist"))
 86+ self.privateWikisList = MiscUtils.dbList(self.conf.get("wiki", "privatewikislist"))
 87+ self.closedWikisList = MiscUtils.dbList(self.conf.get("wiki", "closedwikislist"))
 88+
 89+ if not self.conf.has_section('output'):
 90+ self.conf.add_section('output')
 91+ self.wikiQueriesDir = self.conf.get("output", "wikiqueriesdir")
 92+ self.tempDir = self.conf.get("output", "temp")
 93+ self.fileperms = self.conf.get("output", "fileperms")
 94+ self.fileperms = int(self.fileperms,0)
 95+
 96+ if not self.conf.has_section('database'):
 97+ self.conf.add_section('database')
 98+ self.dbUser = self.conf.get("database", "user")
 99+ self.dbPassword = self.conf.get("database", "password")
 100+
 101+ if not self.conf.has_section('tools'):
 102+ self.conf.add_section('tools')
 103+ self.php = self.conf.get("tools", "php")
 104+ self.gzip = self.conf.get("tools", "gzip")
 105+ self.bzip2 = self.conf.get("tools", "bzip2")
 106+ self.mysql = self.conf.get("tools", "mysql")
 107+ self.multiversion = self.conf.get("tools","multiversion")
 108+
 109+ if not self.conf.has_section('query'):
 110+ self.conf.add_section('query')
 111+ self.queryFile = self.conf.get("query","queryfile")
 112+
 113+class MultiVersion(object):
 114+ def MWScriptAsString(config, maintenanceScript):
 115+ return(" ".join(MultiVersion.MWScriptAsArray(config, maintenanceScript)))
 116+
 117+ def MWScriptAsArray(config, maintenanceScript):
 118+ if config.multiversion != "":
 119+ if exists(config.multiversion):
 120+ return [ config.multiversion, maintenanceScript ]
 121+ return [ "%s/maintenance/%s" % (config.mediawiki, maintenanceScript) ]
 122+
 123+ MWScriptAsString = staticmethod(MWScriptAsString)
 124+ MWScriptAsArray = staticmethod(MWScriptAsArray)
 125+
 126+class MiscUtils(object):
 127+ def dbList(filename):
 128+ """Read database list from a file"""
 129+ if (not filename):
 130+ return []
 131+ infile = open(filename)
 132+ dbs = []
 133+ for line in infile:
 134+ line = line.strip()
 135+ if line != "":
 136+ dbs.append(line)
 137+ infile.close()
 138+ dbs.sort()
 139+ return dbs
 140+
 141+ def shellEscape(param):
 142+ """Escape a string parameter, or set of strings, for the shell."""
 143+ if isinstance(param, basestring):
 144+ return "'" + param.replace("'", "'\\''") + "'"
 145+ elif param is None:
 146+ # A blank string might actually be needed; None means we can leave it out
 147+ return ""
 148+ else:
 149+ return tuple([MiscUtils.shellEscape(x) for x in param])
 150+
 151+ def today():
 152+ return time.strftime("%Y%m%d", time.gmtime())
 153+
 154+ def readFile(filename):
 155+ """Read text from a file in one fell swoop."""
 156+ file = open(filename, "r")
 157+ text = file.read()
 158+ file.close()
 159+ return text
 160+
 161+ dbList = staticmethod(dbList)
 162+ shellEscape = staticmethod(shellEscape)
 163+ today = staticmethod(today)
 164+ readFile = staticmethod(readFile)
 165+
 166+class RunSimpleCommand(object):
 167+ def runWithOutput(command, maxtries = 3, shell=False):
 168+ """Run a command and return the output as a string.
 169+ Raises WikiQueriesError on non-zero return code."""
 170+
 171+ success = False
 172+ tries = 0
 173+ while (not success and tries < maxtries):
 174+ proc = Popen(command, shell = shell, stdout = PIPE, stderr = PIPE)
 175+ output, error = proc.communicate()
 176+ if not proc.returncode:
 177+ success = True
 178+ tries = tries + 1
 179+ if not success:
 180+ if type(command).__name__=='list':
 181+ commandString = " ".join(command)
 182+ else:
 183+ commandString = command
 184+ if proc:
 185+ raise WikiQueriesError("command '" + commandString + ( "' failed with return code %s " % proc.returncode ) + " and error '" + error + "'")
 186+ else:
 187+ raise WikiQueriesError("command '" + commandString + ( "' failed" ) + " and error '" + error + "'")
 188+ return output
 189+
 190+ def runWithNoOutput(command, maxtries = 3, shell=False):
 191+ """Run a command, expecting no output.
 192+ Raises WikiQueriesError on non-zero return code."""
 193+
 194+ success = False
 195+ tries = 0
 196+ while ((not success) and tries < maxtries):
 197+ proc = Popen(command, shell = shell, stderr = PIPE)
 198+ # output will be None, we can ignore it
 199+ output, error = proc.communicate()
 200+ if not proc.returncode:
 201+ success = True
 202+ tries = tries + 1
 203+ if not success:
 204+ if type(command).__name__=='list':
 205+ commandString = " ".join(command)
 206+ else:
 207+ commandString = command
 208+ raise WikiQueriesError("command '" + commandString + ( "' failed with return code %s " % proc.returncode ) + " and error '" + error + "'")
 209+ return success
 210+
 211+ runWithOutput = staticmethod(runWithOutput)
 212+ runWithNoOutput = staticmethod(runWithNoOutput)
 213+
 214+class DBServer(object):
 215+ def __init__(self, config, wikiName):
 216+ self.config = config
 217+ self.wikiName = wikiName
 218+ self.dbServer = self.defaultServer()
 219+
 220+ def defaultServer(self):
 221+ if (not exists( self.config.php ) ):
 222+ raise BackupError("php command %s not found" % self.config.php)
 223+ commandList = MultiVersion.MWScriptAsArray(self.config, "getSlaveServer.php")
 224+ command = [ self.config.php, "-q" ]
 225+ command.extend(commandList)
 226+ command.extend( [ "--wiki=%s" % self.wikiName, "--group=dump" ])
 227+ return RunSimpleCommand.runWithOutput(command, shell=False).rstrip()
 228+
 229+ def buildSqlCommand(self, query, outFile):
 230+ """Put together a command to execute an sql query to the server for this DB."""
 231+ if (not exists( self.config.mysql ) ):
 232+ raise BackupError("mysql command %s not found" % self.config.mysql)
 233+ command = "/bin/echo '%s' | %s -h %s -u %s " % ( query, self.config.mysql, self.dbServer, self.config.dbUser )
 234+ if self.config.dbPassword != "":
 235+ command = command + "-p" + self.config.dbPassword
 236+ command = command + " -r --silent " + self.wikiName
 237+ command = command + "| %s > %s" % ( self.config.gzip, outFile )
 238+ return command
 239+
 240+class WikiQueriesError(Exception):
 241+ pass
 242+
 243+class QueryDir(object):
 244+ def __init__(self, config):
 245+ self._config = config
 246+
 247+ def getQueryDir(self):
 248+ return self._config.wikiQueriesDir
 249+
 250+class WikiQuery(object):
 251+ def __init__(self,config, wikiName, dryrun, verbose):
 252+ self._config = config
 253+ self.wikiName = wikiName
 254+ self.queryDir = QueryDir(self._config)
 255+ self.dryrun = dryrun
 256+ self.verbose = verbose
 257+
 258+ def doOneWiki(self):
 259+ """returns true on success"""
 260+ if self.wikiName not in self._config.privateWikisList and self.wikiName not in self._config.closedWikisList:
 261+ if not exists(self.queryDir.getQueryDir()):
 262+ os.makedirs(self.queryDir.getQueryDir())
 263+ try:
 264+ if (self.verbose):
 265+ print "Doing run for wiki: ",self.wikiName
 266+ if not dryrun:
 267+ if not self.runWikiQuery():
 268+ return False
 269+ except:
 270+ if (self.verbose):
 271+ traceback.print_exc(file=sys.stdout)
 272+ return False
 273+ if (self.verbose):
 274+ print "Success! Wiki", self.wikiName, "query complete."
 275+ return True
 276+
 277+ def runWikiQuery(self):
 278+ outFile = OutputFile(self._config, MiscUtils.today(), self.wikiName)
 279+ query = MiscUtils.readFile(self._config.queryFile)
 280+ db = DBServer(self._config, self.wikiName)
 281+ return RunSimpleCommand.runWithNoOutput(db.buildSqlCommand(query, outFile.getPath()), shell = True)
 282+
 283+class WikiQueryLoop(object):
 284+ def __init__(self, config, dryrun, verbose):
 285+ self._config = config
 286+ self.dryrun = dryrun
 287+ self.verbose = verbose
 288+
 289+ def doRunOnAllWikis(self):
 290+ failures = 0
 291+ for w in self._config.allWikisList:
 292+ query = WikiQuery(self._config, w, self.dryrun, self.verbose)
 293+ result = query.doOneWiki()
 294+ if result == False:
 295+ failures = failures + 1
 296+ return failures
 297+
 298+ def doAllWikisTilDone(self,numFails):
 299+ fails = 0
 300+ while 1:
 301+ failures = self.doRunOnAllWikis()
 302+ if not failures:
 303+ break
 304+ fails = fails + 1
 305+ if fails > numFails:
 306+ raise WikiQueriesError("Too many consecutive failures, giving up")
 307+ # wait 5 minutes and try another loop
 308+# raise WikiQueriesError("would sleep")
 309+ time.sleep(300)
 310+
 311+def usage(message = None):
 312+ if message:
 313+ print message
 314+ print "Usage: python wikiqueries.py [options] [wikidbname]"
 315+ print "Options: --configfile, --dryrun, --verbose"
 316+ print "--configfile: Specify an alternate config file to read. Default file is 'wikiqueries.conf' in the current directory."
 317+ print "--dryrun: Don't actually run anything but print the commands that would be run."
 318+ print "--verbose: Print error messages and other informative messages (normally the"
 319+ print " script runs silently)."
 320+ print "wikiname: Run the query only for the specific wiki."
 321+ sys.exit(1)
 322+
 323+if __name__ == "__main__":
 324+ configFile = False
 325+ result = False
 326+ dryrun = False
 327+ verbose = False
 328+
 329+ try:
 330+ (options, remainder) = getopt.gnu_getopt(sys.argv[1:], "",
 331+ [ 'configfile=', 'dryrun', 'verbose' ])
 332+ except:
 333+ usage("Unknown option specified")
 334+
 335+ for (opt, val) in options:
 336+ if opt == "--configfile":
 337+ configFile = val
 338+ elif opt == "--dryrun":
 339+ dryrun = True
 340+ elif opt == "--verbose":
 341+ verbose = True
 342+
 343+ if (configFile):
 344+ config = Config(configFile)
 345+ else:
 346+ config = Config()
 347+
 348+ if len(remainder) > 0:
 349+ query = WikiQuery(config, remainder[0], dryrun, verbose)
 350+ query.doOneWiki()
 351+ else:
 352+ queries = WikiQueryLoop(config, dryrun, verbose)
 353+ queries.doAllWikisTilDone(3)
Property changes on: branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.py
___________________________________________________________________
Added: svn:eol-style
1354 + native

Status & tagging log