Index: branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.conf.sample |
— | — | @@ -0,0 +1,23 @@ |
| 2 | +[wiki] |
| 3 | +mediawiki=/home/wmf/mediawiki/1.18 |
| 4 | +allwikislist=/home/wmf/conf/all.dblist |
| 5 | +privatewikislist=/home/wmf/conf/private.dblist |
| 6 | +closedwikislist=/home/wmf/conf/closed.dblist |
| 7 | + |
| 8 | +[output] |
| 9 | +wikiqueriesdir=/home/wmf/output/files |
| 10 | +temp=/var/tmp |
| 11 | +fileperms=0644 |
| 12 | + |
| 13 | +[database] |
| 14 | +user=dbadmin |
| 15 | +password=XXXXX |
| 16 | + |
| 17 | +[tools] |
| 18 | +php=/usr/bin/php |
| 19 | +mysql=/usr/bin/mysql |
| 20 | +gzip=/usr/bin/gzip |
| 21 | +bzip2=/usr/bin/bzip2 |
| 22 | + |
| 23 | +[query] |
| 24 | +queryfile=/home/wmf/scripts/query.sql |
Index: branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.py |
— | — | @@ -0,0 +1,352 @@ |
| 2 | +# for every wiki, run a specified query, gzipping the output. |
| 3 | +# there's a config file which needs to be set up. |
| 4 | + |
| 5 | +import getopt |
| 6 | +import os |
| 7 | +import re |
| 8 | +import sys |
| 9 | +import ConfigParser |
| 10 | +import subprocess |
| 11 | +import socket |
| 12 | +import time |
| 13 | +from subprocess import Popen, PIPE |
| 14 | +from os.path import exists |
| 15 | +import hashlib |
| 16 | +import traceback |
| 17 | +import shutil |
| 18 | + |
| 19 | +class ContentFile(object): |
| 20 | + def __init__(self, config, date, wikiName): |
| 21 | + self._config = config |
| 22 | + self.date = date |
| 23 | + self.queryDir = QueryDir(self._config) |
| 24 | + self.wikiName = wikiName |
| 25 | + |
| 26 | + # override this. |
| 27 | + def getFileName(self): |
| 28 | + return "content.txt" |
| 29 | + |
| 30 | + def getPath(self): |
| 31 | + return os.path.join(self.queryDir.getQueryDir(),self.getFileName()) |
| 32 | + |
| 33 | +class OutputFile(ContentFile): |
| 34 | + def getFileName(self): |
| 35 | + return "%s-%s-wikiquery.gz" % ( self.wikiName, self.date ) |
| 36 | + |
| 37 | +class Config(object): |
| 38 | + def __init__(self, configFile=False): |
| 39 | + self.projectName = False |
| 40 | + |
| 41 | + home = os.path.dirname(sys.argv[0]) |
| 42 | + if (not configFile): |
| 43 | + configFile = "wikiqueries.conf" |
| 44 | + self.files = [ |
| 45 | + os.path.join(home,configFile), |
| 46 | + "/etc/wikqueries.conf", |
| 47 | + os.path.join(os.getenv("HOME"), ".wikiqueries.conf")] |
| 48 | + defaults = { |
| 49 | + #"wiki": { |
| 50 | + "allwikislist": "", |
| 51 | + "privatewikislist": "", |
| 52 | + "closedwikislist": "", |
| 53 | + #"output": { |
| 54 | + "wikiqueriesdir": "/wikiqueries", |
| 55 | + "temp":"/wikiqueries/temp", |
| 56 | + "fileperms": "0640", |
| 57 | + #"database": { |
| 58 | + "user": "root", |
| 59 | + "password": "", |
| 60 | + #"tools": { |
| 61 | + "php": "/bin/php", |
| 62 | + "gzip": "/usr/bin/gzip", |
| 63 | + "bzip2": "/usr/bin/bzip2", |
| 64 | + "mysql": "/usr/bin/mysql", |
| 65 | + "multiversion": "", |
| 66 | + #"query":{ |
| 67 | + "queryfile": "wikiquery.sql" |
| 68 | + } |
| 69 | + |
| 70 | + self.conf = ConfigParser.SafeConfigParser(defaults) |
| 71 | + self.conf.read(self.files) |
| 72 | + |
| 73 | + if not self.conf.has_section("wiki"): |
| 74 | + print "The mandatory configuration section 'wiki' was not defined." |
| 75 | + raise ConfigParser.NoSectionError('wiki') |
| 76 | + |
| 77 | + if not self.conf.has_option("wiki","mediawiki"): |
| 78 | + print "The mandatory setting 'mediawiki' in the section 'wiki' was not defined." |
| 79 | + raise ConfigParser.NoOptionError('wiki','mediawiki') |
| 80 | + |
| 81 | + self.parseConfFile() |
| 82 | + |
| 83 | + def parseConfFile(self): |
| 84 | + self.mediawiki = self.conf.get("wiki", "mediawiki") |
| 85 | + self.allWikisList = MiscUtils.dbList(self.conf.get("wiki", "allwikislist")) |
| 86 | + self.privateWikisList = MiscUtils.dbList(self.conf.get("wiki", "privatewikislist")) |
| 87 | + self.closedWikisList = MiscUtils.dbList(self.conf.get("wiki", "closedwikislist")) |
| 88 | + |
| 89 | + if not self.conf.has_section('output'): |
| 90 | + self.conf.add_section('output') |
| 91 | + self.wikiQueriesDir = self.conf.get("output", "wikiqueriesdir") |
| 92 | + self.tempDir = self.conf.get("output", "temp") |
| 93 | + self.fileperms = self.conf.get("output", "fileperms") |
| 94 | + self.fileperms = int(self.fileperms,0) |
| 95 | + |
| 96 | + if not self.conf.has_section('database'): |
| 97 | + self.conf.add_section('database') |
| 98 | + self.dbUser = self.conf.get("database", "user") |
| 99 | + self.dbPassword = self.conf.get("database", "password") |
| 100 | + |
| 101 | + if not self.conf.has_section('tools'): |
| 102 | + self.conf.add_section('tools') |
| 103 | + self.php = self.conf.get("tools", "php") |
| 104 | + self.gzip = self.conf.get("tools", "gzip") |
| 105 | + self.bzip2 = self.conf.get("tools", "bzip2") |
| 106 | + self.mysql = self.conf.get("tools", "mysql") |
| 107 | + self.multiversion = self.conf.get("tools","multiversion") |
| 108 | + |
| 109 | + if not self.conf.has_section('query'): |
| 110 | + self.conf.add_section('query') |
| 111 | + self.queryFile = self.conf.get("query","queryfile") |
| 112 | + |
| 113 | +class MultiVersion(object): |
| 114 | + def MWScriptAsString(config, maintenanceScript): |
| 115 | + return(" ".join(MultiVersion.MWScriptAsArray(config, maintenanceScript))) |
| 116 | + |
| 117 | + def MWScriptAsArray(config, maintenanceScript): |
| 118 | + if config.multiversion != "": |
| 119 | + if exists(config.multiversion): |
| 120 | + return [ config.multiversion, maintenanceScript ] |
| 121 | + return [ "%s/maintenance/%s" % (config.mediawiki, maintenanceScript) ] |
| 122 | + |
| 123 | + MWScriptAsString = staticmethod(MWScriptAsString) |
| 124 | + MWScriptAsArray = staticmethod(MWScriptAsArray) |
| 125 | + |
| 126 | +class MiscUtils(object): |
| 127 | + def dbList(filename): |
| 128 | + """Read database list from a file""" |
| 129 | + if (not filename): |
| 130 | + return [] |
| 131 | + infile = open(filename) |
| 132 | + dbs = [] |
| 133 | + for line in infile: |
| 134 | + line = line.strip() |
| 135 | + if line != "": |
| 136 | + dbs.append(line) |
| 137 | + infile.close() |
| 138 | + dbs.sort() |
| 139 | + return dbs |
| 140 | + |
| 141 | + def shellEscape(param): |
| 142 | + """Escape a string parameter, or set of strings, for the shell.""" |
| 143 | + if isinstance(param, basestring): |
| 144 | + return "'" + param.replace("'", "'\\''") + "'" |
| 145 | + elif param is None: |
| 146 | + # A blank string might actually be needed; None means we can leave it out |
| 147 | + return "" |
| 148 | + else: |
| 149 | + return tuple([MiscUtils.shellEscape(x) for x in param]) |
| 150 | + |
| 151 | + def today(): |
| 152 | + return time.strftime("%Y%m%d", time.gmtime()) |
| 153 | + |
| 154 | + def readFile(filename): |
| 155 | + """Read text from a file in one fell swoop.""" |
| 156 | + file = open(filename, "r") |
| 157 | + text = file.read() |
| 158 | + file.close() |
| 159 | + return text |
| 160 | + |
| 161 | + dbList = staticmethod(dbList) |
| 162 | + shellEscape = staticmethod(shellEscape) |
| 163 | + today = staticmethod(today) |
| 164 | + readFile = staticmethod(readFile) |
| 165 | + |
| 166 | +class RunSimpleCommand(object): |
| 167 | + def runWithOutput(command, maxtries = 3, shell=False): |
| 168 | + """Run a command and return the output as a string. |
| 169 | + Raises WikiQueriesError on non-zero return code.""" |
| 170 | + |
| 171 | + success = False |
| 172 | + tries = 0 |
| 173 | + while (not success and tries < maxtries): |
| 174 | + proc = Popen(command, shell = shell, stdout = PIPE, stderr = PIPE) |
| 175 | + output, error = proc.communicate() |
| 176 | + if not proc.returncode: |
| 177 | + success = True |
| 178 | + tries = tries + 1 |
| 179 | + if not success: |
| 180 | + if type(command).__name__=='list': |
| 181 | + commandString = " ".join(command) |
| 182 | + else: |
| 183 | + commandString = command |
| 184 | + if proc: |
| 185 | + raise WikiQueriesError("command '" + commandString + ( "' failed with return code %s " % proc.returncode ) + " and error '" + error + "'") |
| 186 | + else: |
| 187 | + raise WikiQueriesError("command '" + commandString + ( "' failed" ) + " and error '" + error + "'") |
| 188 | + return output |
| 189 | + |
| 190 | + def runWithNoOutput(command, maxtries = 3, shell=False): |
| 191 | + """Run a command, expecting no output. |
| 192 | + Raises WikiQueriesError on non-zero return code.""" |
| 193 | + |
| 194 | + success = False |
| 195 | + tries = 0 |
| 196 | + while ((not success) and tries < maxtries): |
| 197 | + proc = Popen(command, shell = shell, stderr = PIPE) |
| 198 | + # output will be None, we can ignore it |
| 199 | + output, error = proc.communicate() |
| 200 | + if not proc.returncode: |
| 201 | + success = True |
| 202 | + tries = tries + 1 |
| 203 | + if not success: |
| 204 | + if type(command).__name__=='list': |
| 205 | + commandString = " ".join(command) |
| 206 | + else: |
| 207 | + commandString = command |
| 208 | + raise WikiQueriesError("command '" + commandString + ( "' failed with return code %s " % proc.returncode ) + " and error '" + error + "'") |
| 209 | + return success |
| 210 | + |
| 211 | + runWithOutput = staticmethod(runWithOutput) |
| 212 | + runWithNoOutput = staticmethod(runWithNoOutput) |
| 213 | + |
| 214 | +class DBServer(object): |
| 215 | + def __init__(self, config, wikiName): |
| 216 | + self.config = config |
| 217 | + self.wikiName = wikiName |
| 218 | + self.dbServer = self.defaultServer() |
| 219 | + |
| 220 | + def defaultServer(self): |
| 221 | + if (not exists( self.config.php ) ): |
| 222 | + raise BackupError("php command %s not found" % self.config.php) |
| 223 | + commandList = MultiVersion.MWScriptAsArray(self.config, "getSlaveServer.php") |
| 224 | + command = [ self.config.php, "-q" ] |
| 225 | + command.extend(commandList) |
| 226 | + command.extend( [ "--wiki=%s" % self.wikiName, "--group=dump" ]) |
| 227 | + return RunSimpleCommand.runWithOutput(command, shell=False).rstrip() |
| 228 | + |
| 229 | + def buildSqlCommand(self, query, outFile): |
| 230 | + """Put together a command to execute an sql query to the server for this DB.""" |
| 231 | + if (not exists( self.config.mysql ) ): |
| 232 | + raise BackupError("mysql command %s not found" % self.config.mysql) |
| 233 | + command = "/bin/echo '%s' | %s -h %s -u %s " % ( query, self.config.mysql, self.dbServer, self.config.dbUser ) |
| 234 | + if self.config.dbPassword != "": |
| 235 | + command = command + "-p" + self.config.dbPassword |
| 236 | + command = command + " -r --silent " + self.wikiName |
| 237 | + command = command + "| %s > %s" % ( self.config.gzip, outFile ) |
| 238 | + return command |
| 239 | + |
| 240 | +class WikiQueriesError(Exception): |
| 241 | + pass |
| 242 | + |
| 243 | +class QueryDir(object): |
| 244 | + def __init__(self, config): |
| 245 | + self._config = config |
| 246 | + |
| 247 | + def getQueryDir(self): |
| 248 | + return self._config.wikiQueriesDir |
| 249 | + |
| 250 | +class WikiQuery(object): |
| 251 | + def __init__(self,config, wikiName, dryrun, verbose): |
| 252 | + self._config = config |
| 253 | + self.wikiName = wikiName |
| 254 | + self.queryDir = QueryDir(self._config) |
| 255 | + self.dryrun = dryrun |
| 256 | + self.verbose = verbose |
| 257 | + |
| 258 | + def doOneWiki(self): |
| 259 | + """returns true on success""" |
| 260 | + if self.wikiName not in self._config.privateWikisList and self.wikiName not in self._config.closedWikisList: |
| 261 | + if not exists(self.queryDir.getQueryDir()): |
| 262 | + os.makedirs(self.queryDir.getQueryDir()) |
| 263 | + try: |
| 264 | + if (self.verbose): |
| 265 | + print "Doing run for wiki: ",self.wikiName |
| 266 | + if not dryrun: |
| 267 | + if not self.runWikiQuery(): |
| 268 | + return False |
| 269 | + except: |
| 270 | + if (self.verbose): |
| 271 | + traceback.print_exc(file=sys.stdout) |
| 272 | + return False |
| 273 | + if (self.verbose): |
| 274 | + print "Success! Wiki", self.wikiName, "query complete." |
| 275 | + return True |
| 276 | + |
| 277 | + def runWikiQuery(self): |
| 278 | + outFile = OutputFile(self._config, MiscUtils.today(), self.wikiName) |
| 279 | + query = MiscUtils.readFile(self._config.queryFile) |
| 280 | + db = DBServer(self._config, self.wikiName) |
| 281 | + return RunSimpleCommand.runWithNoOutput(db.buildSqlCommand(query, outFile.getPath()), shell = True) |
| 282 | + |
| 283 | +class WikiQueryLoop(object): |
| 284 | + def __init__(self, config, dryrun, verbose): |
| 285 | + self._config = config |
| 286 | + self.dryrun = dryrun |
| 287 | + self.verbose = verbose |
| 288 | + |
| 289 | + def doRunOnAllWikis(self): |
| 290 | + failures = 0 |
| 291 | + for w in self._config.allWikisList: |
| 292 | + query = WikiQuery(self._config, w, self.dryrun, self.verbose) |
| 293 | + result = query.doOneWiki() |
| 294 | + if result == False: |
| 295 | + failures = failures + 1 |
| 296 | + return failures |
| 297 | + |
| 298 | + def doAllWikisTilDone(self,numFails): |
| 299 | + fails = 0 |
| 300 | + while 1: |
| 301 | + failures = self.doRunOnAllWikis() |
| 302 | + if not failures: |
| 303 | + break |
| 304 | + fails = fails + 1 |
| 305 | + if fails > numFails: |
| 306 | + raise WikiQueriesError("Too many consecutive failures, giving up") |
| 307 | + # wait 5 minutes and try another loop |
| 308 | +# raise WikiQueriesError("would sleep") |
| 309 | + time.sleep(300) |
| 310 | + |
| 311 | +def usage(message = None): |
| 312 | + if message: |
| 313 | + print message |
| 314 | + print "Usage: python wikiqueries.py [options] [wikidbname]" |
| 315 | + print "Options: --configfile, --dryrun, --verbose" |
| 316 | + print "--configfile: Specify an alternate config file to read. Default file is 'wikiqueries.conf' in the current directory." |
| 317 | + print "--dryrun: Don't actually run anything but print the commands that would be run." |
| 318 | + print "--verbose: Print error messages and other informative messages (normally the" |
| 319 | + print " script runs silently)." |
| 320 | + print "wikiname: Run the query only for the specific wiki." |
| 321 | + sys.exit(1) |
| 322 | + |
| 323 | +if __name__ == "__main__": |
| 324 | + configFile = False |
| 325 | + result = False |
| 326 | + dryrun = False |
| 327 | + verbose = False |
| 328 | + |
| 329 | + try: |
| 330 | + (options, remainder) = getopt.gnu_getopt(sys.argv[1:], "", |
| 331 | + [ 'configfile=', 'dryrun', 'verbose' ]) |
| 332 | + except: |
| 333 | + usage("Unknown option specified") |
| 334 | + |
| 335 | + for (opt, val) in options: |
| 336 | + if opt == "--configfile": |
| 337 | + configFile = val |
| 338 | + elif opt == "--dryrun": |
| 339 | + dryrun = True |
| 340 | + elif opt == "--verbose": |
| 341 | + verbose = True |
| 342 | + |
| 343 | + if (configFile): |
| 344 | + config = Config(configFile) |
| 345 | + else: |
| 346 | + config = Config() |
| 347 | + |
| 348 | + if len(remainder) > 0: |
| 349 | + query = WikiQuery(config, remainder[0], dryrun, verbose) |
| 350 | + query.doOneWiki() |
| 351 | + else: |
| 352 | + queries = WikiQueryLoop(config, dryrun, verbose) |
| 353 | + queries.doAllWikisTilDone(3) |
Property changes on: branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.py |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 354 | + native |