r94724 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r94723‎ | r94724 | r94725 >
Date:02:27, 17 August 2011
Author:halfak
Status:deferred
Tags:
Comment:
Better name for user_reg_dumb
Modified paths:
  • /trunk/tools/wsor/scripts/fix_reg_date.dumb.py (deleted) (history)
  • /trunk/tools/wsor/scripts/user_approx_registration.py (added) (history)

Diff [purge]

Index: trunk/tools/wsor/scripts/fix_reg_date.dumb.py
@@ -1,106 +0,0 @@
2 -import sys, MySQLdb, MySQLdb.cursors, argparse, os, logging, types
3 -import wmf
4 -
5 -def encode(v):
6 - if v == None: return "\N"
7 -
8 - if type(v) == types.LongType: v = int(v)
9 - elif type(v) == types.UnicodeType: v = v.encode('utf-8')
10 -
11 - return str(v).encode("string-escape")
12 -
13 -
14 -def main():
15 - parser = argparse.ArgumentParser(
16 - description='Gathers editor data for first and last session'
17 - )
18 - parser.add_argument(
19 - 'date',
20 - type=str,
21 - help='the date to start querying for users with dumb registration dates'
22 - )
23 - parser.add_argument(
24 - '-c', '--cnf',
25 - metavar="<path>",
26 - type=str,
27 - help='the path to MySQL config info (defaults to ~/.my.cnf)',
28 - default=os.path.expanduser("~/.my.cnf")
29 - )
30 - parser.add_argument(
31 - '-s', '--host',
32 - type=str,
33 - help='the database host to connect to (defaults to localhost)',
34 - default="localhost"
35 - )
36 - parser.add_argument(
37 - '-d', '--db',
38 - type=str,
39 - help='the language db to run the query in (defaults to enwiki)',
40 - default="enwiki"
41 - )
42 - args = parser.parse_args()
43 -
44 - LOGGING_STREAM = sys.stderr
45 - logging.basicConfig(
46 - level=logging.DEBUG,
47 - stream=LOGGING_STREAM,
48 - format='%(asctime)s %(levelname)-8s %(message)s',
49 - datefmt='%b-%d %H:%M:%S'
50 - )
51 -
52 - logging.info("Connecting to %s:%s using %s." % (args.host, args.db, args.cnf))
53 - db = Database(
54 - host=args.host,
55 - db=args.db,
56 - read_default_file=args.cnf
57 - )
58 - headers = [
59 - 'user_id',
60 - 'user_registration'
61 - ]
62 -
63 - lowestDate = args.date
64 - logging.info("foo")
65 - for user in db.getUsersBefore(args.date):
66 - if user['user_registration'] == None:
67 - LOGGING_STREAM.write("!")
68 - user['user_registration'] = lowestDate
69 - print("\t".join(str(user[h]) for h in headers))
70 - else:
71 - LOGGING_STREAM.write(".")
72 -
73 - lowestDate = min(user['user_registration'], lowestDate)
74 -
75 - LOGGING_STREAM.write("\n")
76 -
77 -
78 -
79 -
80 -
81 -class Database:
82 -
83 - def __init__(self, *args, **kwargs):
84 - self.args = args
85 - self.kwargs = kwargs
86 - self.usersConn = MySQLdb.connect(*args, **kwargs)
87 -
88 - def getUsersBefore(self, date):
89 - cursor = self.usersConn.cursor(MySQLdb.cursors.SSDictCursor)
90 - cursor.execute(
91 - """
92 - SELECT
93 - user_id,
94 - user_registration
95 - FROM user
96 - WHERE user_registration <= %(date)s
97 - OR user_registration IS NULL
98 - ORDER BY user_id DESC
99 - """,
100 - {
101 - 'date': date
102 - }
103 - )
104 - for row in cursor:
105 - yield row
106 -
107 -if __name__ == "__main__": main()
Index: trunk/tools/wsor/scripts/user_approx_registration.py
@@ -0,0 +1,106 @@
 2+import sys, MySQLdb, MySQLdb.cursors, argparse, os, logging, types
 3+import wmf
 4+
 5+def encode(v):
 6+ if v == None: return "\N"
 7+
 8+ if type(v) == types.LongType: v = int(v)
 9+ elif type(v) == types.UnicodeType: v = v.encode('utf-8')
 10+
 11+ return str(v).encode("string-escape")
 12+
 13+
 14+def main():
 15+ parser = argparse.ArgumentParser(
 16+ description='Gathers approximate registration date by walking ' +
 17+ 'backwards through the user table and guessing at registration ' +
 18+ 'dates based on user_id. Assumes user_id is ordered.'
 19+ )
 20+ parser.add_argument(
 21+ 'date',
 22+ type=str,
 23+ help='the date to start querying for users with dumb registration dates'
 24+ )
 25+ parser.add_argument(
 26+ '-c', '--cnf',
 27+ metavar="<path>",
 28+ type=str,
 29+ help='the path to MySQL config info (defaults to ~/.my.cnf)',
 30+ default=os.path.expanduser("~/.my.cnf")
 31+ )
 32+ parser.add_argument(
 33+ '-s', '--host',
 34+ type=str,
 35+ help='the database host to connect to (defaults to localhost)',
 36+ default="localhost"
 37+ )
 38+ parser.add_argument(
 39+ '-d', '--db',
 40+ type=str,
 41+ help='the language db to run the query in (defaults to enwiki)',
 42+ default="enwiki"
 43+ )
 44+ args = parser.parse_args()
 45+
 46+ LOGGING_STREAM = sys.stderr
 47+ logging.basicConfig(
 48+ level=logging.DEBUG,
 49+ stream=LOGGING_STREAM,
 50+ format='%(asctime)s %(levelname)-8s %(message)s',
 51+ datefmt='%b-%d %H:%M:%S'
 52+ )
 53+
 54+ logging.info("Connecting to %s:%s using %s." % (args.host, args.db, args.cnf))
 55+ db = Database(
 56+ host=args.host,
 57+ db=args.db,
 58+ read_default_file=args.cnf
 59+ )
 60+ headers = [
 61+ 'user_id',
 62+ 'user_registration'
 63+ ]
 64+
 65+ lowestDate = args.date
 66+ for user in db.getUsersBefore(args.date):
 67+ if user['user_registration'] == None:
 68+ LOGGING_STREAM.write("!")
 69+ user['user_registration'] = lowestDate
 70+ print("\t".join(str(user[h]) for h in headers))
 71+ else:
 72+ LOGGING_STREAM.write(".")
 73+
 74+ lowestDate = min(user['user_registration'], lowestDate)
 75+
 76+ LOGGING_STREAM.write("\n")
 77+
 78+
 79+
 80+
 81+class Database:
 82+
 83+ def __init__(self, *args, **kwargs):
 84+ self.args = args
 85+ self.kwargs = kwargs
 86+ self.usersConn = MySQLdb.connect(*args, **kwargs)
 87+
 88+ def getUsersBefore(self, date):
 89+ cursor = self.usersConn.cursor(MySQLdb.cursors.SSDictCursor)
 90+ cursor.execute(
 91+ """
 92+ SELECT
 93+ user_id,
 94+ user_registration
 95+ FROM user
 96+ WHERE user_registration <= %(date)s
 97+ OR user_registration IS NULL
 98+ ORDER BY user_id DESC
 99+ """,
 100+ {
 101+ 'date': date
 102+ }
 103+ )
 104+ for row in cursor:
 105+ yield row
 106+
 107+if __name__ == "__main__": main()

Status & tagging log