r91119 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r91118‎ | r91119 | r91120 >
Date:21:57, 29 June 2011
Author:rfaulk
Status:deferred
Tags:
Comment:
Added barebones vandal reversion script (vandal_revert.py) and a data handling module (WSORSlaveDataLoader.py) based off of fundraiser analytics data handler (http://svn.wikimedia.org/svnroot/wikimedia/trunk/fundraiser-analysis/).

Template settings files describe the prerequisite settings to load.
Modified paths:
  • /trunk/tools/wsor/scripts/classes (added) (history)
  • /trunk/tools/wsor/scripts/classes/WSORSlaveDataLoader.py (added) (history)
  • /trunk/tools/wsor/scripts/classes/settings.py.example (added) (history)
  • /trunk/tools/wsor/scripts/settings.py.example (added) (history)
  • /trunk/tools/wsor/scripts/vandal_revert.py (added) (history)

Diff [purge]

Index: trunk/tools/wsor/scripts/settings.py.example
@@ -0,0 +1,9 @@
 2+
 3+"""
 4+ Example settings file. This must be configured locally to work.
 5+"""
 6+
 7+__project_home__ = '/home/projects/'
 8+__WSOR_home__ = '/home/projects/WSOR/'
 9+__analytics_project_home__ = '/home/projects/Analytics/'
 10+
Index: trunk/tools/wsor/scripts/vandal_revert.py
@@ -0,0 +1,83 @@
 2+
 3+"""
 4+
 5+WSOR script that determines how many vandals go on being vandals after there first vandal revert
 6+
 7+"""
 8+
 9+
 10+""" Script meta """
 11+__author__ = "Ryan Faulkner"
 12+__revision__ = "$Rev$"
 13+__date__ = "June 26th, 2011"
 14+
 15+
 16+""" Import python base modules """
 17+import sys, getopt, re, datetime, logging, argparse
 18+import settings
 19+
 20+""" Modify the classpath to include local projects """
 21+sys.path.append(settings.__project_home__)
 22+
 23+""" Import Analytics modules """
 24+from WSOR.scripts.classes.WSORSlaveDataLoader import VandalLoader
 25+
 26+
 27+"""
 28+ Define script usage
 29+"""
 30+class Usage(Exception):
 31+ def __init__(self, msg):
 32+ self.msg = msg
 33+
 34+"""
 35+ Handles the 'query' argument
 36+"""
 37+def query_name(input):
 38+
 39+ return input
 40+
 41+"""
 42+ Execution body of main
 43+"""
 44+def main(args):
 45+
 46+ #print args.test
 47+
 48+ """ Configure the logger """
 49+ LOGGING_STREAM = sys.stderr
 50+ logging.basicConfig(level=logging.DEBUG, stream=LOGGING_STREAM, format='%(asctime)s %(levelname)-8s %(message)s', datefmt='%b-%d %H:%M:%S')
 51+
 52+ vl = VandalLoader()
 53+ data = vl.run_query()
 54+
 55+ print data
 56+
 57+ return 0
 58+
 59+
 60+"""
 61+ Call main, exit when execution is complete
 62+
 63+ Argument parsing (argparse) and pass to main
 64+
 65+"""
 66+if __name__ == "__main__":
 67+
 68+ parser = argparse.ArgumentParser(
 69+ description='Extracts revert data in db42.wikimedia.org:halfak and db42.wikimedia.org:enwiki.'
 70+ )
 71+
 72+ """ Allow specification of the query in CLI arguments """
 73+ parser.add_argument(
 74+ '-q', '--query',
 75+ metavar="<input>",
 76+ type=query_name,
 77+ help='The name of the query to be executed.',
 78+ default=sys.stdin
 79+ )
 80+
 81+
 82+ args = parser.parse_args()
 83+
 84+ sys.exit(main(args))
Property changes on: trunk/tools/wsor/scripts/vandal_revert.py
___________________________________________________________________
Added: svn:eol-style
185 + native
Index: trunk/tools/wsor/scripts/classes/settings.py.example
@@ -0,0 +1,20 @@
 2+
 3+"""
 4+ Example settings file. This must be configured locally to work.
 5+"""
 6+
 7+__project_home__ = '/home/projects/'
 8+__WSOR_home__ = '/home/projects/WSOR/'
 9+__analytics_project_home__ = '/home/projects/Analytics/'
 10+
 11+"""
 12+ Database credentials
 13+
 14+ To setup ssh tunnel: ssh -L <local port>:<server url>:<remote port> <user>@<db serevr url>
 15+"""
 16+__user__ = '<username>'
 17+__db__ = '<dbname>'
 18+__db_server__ = '127.0.0.1' # localhost if you're using an ssh tunnel
 19+__db_port__ = <dbport> # use the port of your tunnel
 20+__pass__='<passwd>'
 21+__db_enwikislave__ = 'enwiki'
\ No newline at end of file
Index: trunk/tools/wsor/scripts/classes/WSORSlaveDataLoader.py
@@ -0,0 +1,105 @@
 2+"""
 3+
 4+WSOR dataloader class for the MySQL slave of enwiki
 5+
 6+"""
 7+
 8+
 9+""" Meta """
 10+__author__ = "Ryan Faulkner"
 11+__revision__ = "$Rev$"
 12+__date__ = "June 27th, 2011"
 13+
 14+
 15+""" Import python base modules """
 16+import sys, getopt, re, datetime, logging, MySQLdb, settings
 17+
 18+""" Import Analytics modules """
 19+from Fundraiser_Tools.classes.DataLoader import DataLoader
 20+
 21+
 22+"""
 23+ Inherits DataLoader
 24+
 25+ DataLoader class for the WSOR MySQL Slave
 26+
 27+"""
 28+class WSORSlaveDataLoader(DataLoader):
 29+
 30+ def __init__(self):
 31+
 32+ """ Configure the logger """
 33+ LOGGING_STREAM = sys.stderr
 34+ logging.basicConfig(level=logging.DEBUG, stream=LOGGING_STREAM, format='%(asctime)s %(levelname)-8s %(message)s', datefmt='%b-%d %H:%M:%S')
 35+
 36+
 37+ """
 38+ Override init_db to connect to the slave
 39+ """
 40+ def init_db(self):
 41+
 42+ logging.info('Attempting to establish a connection to the database.')
 43+
 44+ """ Establish connection """
 45+ try:
 46+ self._db_ = MySQLdb.connect(host=settings.__db_server__, user=settings.__user__, db=settings.__db__, port=settings.__db_port__, passwd=settings.__pass__)
 47+ logging.info('Successfully connected.\n')
 48+ except:
 49+ logging.DEBUG('Could not establish a connection to %s @ %s : %s' % (settings.__user__, settings.__db_server__, settings.__db__))
 50+ return
 51+
 52+ """ Create cursor """
 53+ self._cur_ = self._db_.cursor()
 54+
 55+
 56+
 57+
 58+"""
 59+ Inherits DataLoader
 60+
 61+ DataLoader class for the WSOR MySQL Slave
 62+
 63+"""
 64+class VandalLoader(WSORSlaveDataLoader):
 65+
 66+ _query_test_ = 'select count(*) from revert_20110115'
 67+ _query_vandal_count_ = 'select revision_id, username, user_id, sum(is_vandalism) from reverted_20110115 group by 1,2,3'
 68+ _query_total_reverts_ = 'select revision_id, username, user_id, sum(is_vandalism) from reverted_20110115'
 69+
 70+ def __init__(self):
 71+
 72+ DataLoader.__init__(self)
 73+ logging.info('Creating VadalLoader')
 74+
 75+
 76+ """
 77+ Main execution body and data handling for the loader object
 78+ """
 79+ def run_query(self):
 80+
 81+ logging.info('Running VandalLoader')
 82+
 83+ self.init_db()
 84+
 85+ try:
 86+ self._cur_.execute(self._query_test_)
 87+
 88+ """ GET THE COLUMN NAMES FROM THE QUERY RESULTS """
 89+ self._col_names_ = list()
 90+ for i in self._cur_.description:
 91+ self._col_names_.append(i[0])
 92+
 93+ self._results_ = self._cur_.fetchall()
 94+
 95+ logging.info('Execution Complete.')
 96+
 97+ except Exception as inst:
 98+
 99+ logging.debug(str(type(inst))) # the exception instance
 100+ logging.debug(str(inst.args)) # arguments stored in .args
 101+ logging.debug(inst.__str__()) # __str__ allows args to printed directly
 102+
 103+ # self._db_.rollback()
 104+
 105+ return self._results_
 106+
Property changes on: trunk/tools/wsor/scripts/classes/WSORSlaveDataLoader.py
___________________________________________________________________
Added: svn:eol-style
1107 + native

Status & tagging log