Index: trunk/tools/wsor/scripts/fix_reg_date.dumb.py |
— | — | @@ -1,106 +0,0 @@ |
2 | | -import sys, MySQLdb, MySQLdb.cursors, argparse, os, logging, types |
3 | | -import wmf |
4 | | - |
5 | | -def encode(v): |
6 | | - if v == None: return "\N" |
7 | | - |
8 | | - if type(v) == types.LongType: v = int(v) |
9 | | - elif type(v) == types.UnicodeType: v = v.encode('utf-8') |
10 | | - |
11 | | - return str(v).encode("string-escape") |
12 | | - |
13 | | - |
14 | | -def main(): |
15 | | - parser = argparse.ArgumentParser( |
16 | | - description='Gathers editor data for first and last session' |
17 | | - ) |
18 | | - parser.add_argument( |
19 | | - 'date', |
20 | | - type=str, |
21 | | - help='the date to start querying for users with dumb registration dates' |
22 | | - ) |
23 | | - parser.add_argument( |
24 | | - '-c', '--cnf', |
25 | | - metavar="<path>", |
26 | | - type=str, |
27 | | - help='the path to MySQL config info (defaults to ~/.my.cnf)', |
28 | | - default=os.path.expanduser("~/.my.cnf") |
29 | | - ) |
30 | | - parser.add_argument( |
31 | | - '-s', '--host', |
32 | | - type=str, |
33 | | - help='the database host to connect to (defaults to localhost)', |
34 | | - default="localhost" |
35 | | - ) |
36 | | - parser.add_argument( |
37 | | - '-d', '--db', |
38 | | - type=str, |
39 | | - help='the language db to run the query in (defaults to enwiki)', |
40 | | - default="enwiki" |
41 | | - ) |
42 | | - args = parser.parse_args() |
43 | | - |
44 | | - LOGGING_STREAM = sys.stderr |
45 | | - logging.basicConfig( |
46 | | - level=logging.DEBUG, |
47 | | - stream=LOGGING_STREAM, |
48 | | - format='%(asctime)s %(levelname)-8s %(message)s', |
49 | | - datefmt='%b-%d %H:%M:%S' |
50 | | - ) |
51 | | - |
52 | | - logging.info("Connecting to %s:%s using %s." % (args.host, args.db, args.cnf)) |
53 | | - db = Database( |
54 | | - host=args.host, |
55 | | - db=args.db, |
56 | | - read_default_file=args.cnf |
57 | | - ) |
58 | | - headers = [ |
59 | | - 'user_id', |
60 | | - 'user_registration' |
61 | | - ] |
62 | | - |
63 | | - lowestDate = args.date |
64 | | - logging.info("foo") |
65 | | - for user in db.getUsersBefore(args.date): |
66 | | - if user['user_registration'] == None: |
67 | | - LOGGING_STREAM.write("!") |
68 | | - user['user_registration'] = lowestDate |
69 | | - print("\t".join(str(user[h]) for h in headers)) |
70 | | - else: |
71 | | - LOGGING_STREAM.write(".") |
72 | | - |
73 | | - lowestDate = min(user['user_registration'], lowestDate) |
74 | | - |
75 | | - LOGGING_STREAM.write("\n") |
76 | | - |
77 | | - |
78 | | - |
79 | | - |
80 | | - |
81 | | -class Database: |
82 | | - |
83 | | - def __init__(self, *args, **kwargs): |
84 | | - self.args = args |
85 | | - self.kwargs = kwargs |
86 | | - self.usersConn = MySQLdb.connect(*args, **kwargs) |
87 | | - |
88 | | - def getUsersBefore(self, date): |
89 | | - cursor = self.usersConn.cursor(MySQLdb.cursors.SSDictCursor) |
90 | | - cursor.execute( |
91 | | - """ |
92 | | - SELECT |
93 | | - user_id, |
94 | | - user_registration |
95 | | - FROM user |
96 | | - WHERE user_registration <= %(date)s |
97 | | - OR user_registration IS NULL |
98 | | - ORDER BY user_id DESC |
99 | | - """, |
100 | | - { |
101 | | - 'date': date |
102 | | - } |
103 | | - ) |
104 | | - for row in cursor: |
105 | | - yield row |
106 | | - |
107 | | -if __name__ == "__main__": main() |
Index: trunk/tools/wsor/scripts/user_approx_registration.py |
— | — | @@ -0,0 +1,106 @@ |
| 2 | +import sys, MySQLdb, MySQLdb.cursors, argparse, os, logging, types |
| 3 | +import wmf |
| 4 | + |
| 5 | +def encode(v): |
| 6 | + if v == None: return "\N" |
| 7 | + |
| 8 | + if type(v) == types.LongType: v = int(v) |
| 9 | + elif type(v) == types.UnicodeType: v = v.encode('utf-8') |
| 10 | + |
| 11 | + return str(v).encode("string-escape") |
| 12 | + |
| 13 | + |
| 14 | +def main(): |
| 15 | + parser = argparse.ArgumentParser( |
| 16 | + description='Gathers approximate registration date by walking ' + |
| 17 | + 'backwards through the user table and guessing at registration ' + |
| 18 | + 'dates based on user_id. Assumes user_id is ordered.' |
| 19 | + ) |
| 20 | + parser.add_argument( |
| 21 | + 'date', |
| 22 | + type=str, |
| 23 | + help='the date to start querying for users with dumb registration dates' |
| 24 | + ) |
| 25 | + parser.add_argument( |
| 26 | + '-c', '--cnf', |
| 27 | + metavar="<path>", |
| 28 | + type=str, |
| 29 | + help='the path to MySQL config info (defaults to ~/.my.cnf)', |
| 30 | + default=os.path.expanduser("~/.my.cnf") |
| 31 | + ) |
| 32 | + parser.add_argument( |
| 33 | + '-s', '--host', |
| 34 | + type=str, |
| 35 | + help='the database host to connect to (defaults to localhost)', |
| 36 | + default="localhost" |
| 37 | + ) |
| 38 | + parser.add_argument( |
| 39 | + '-d', '--db', |
| 40 | + type=str, |
| 41 | + help='the language db to run the query in (defaults to enwiki)', |
| 42 | + default="enwiki" |
| 43 | + ) |
| 44 | + args = parser.parse_args() |
| 45 | + |
| 46 | + LOGGING_STREAM = sys.stderr |
| 47 | + logging.basicConfig( |
| 48 | + level=logging.DEBUG, |
| 49 | + stream=LOGGING_STREAM, |
| 50 | + format='%(asctime)s %(levelname)-8s %(message)s', |
| 51 | + datefmt='%b-%d %H:%M:%S' |
| 52 | + ) |
| 53 | + |
| 54 | + logging.info("Connecting to %s:%s using %s." % (args.host, args.db, args.cnf)) |
| 55 | + db = Database( |
| 56 | + host=args.host, |
| 57 | + db=args.db, |
| 58 | + read_default_file=args.cnf |
| 59 | + ) |
| 60 | + headers = [ |
| 61 | + 'user_id', |
| 62 | + 'user_registration' |
| 63 | + ] |
| 64 | + |
| 65 | + lowestDate = args.date |
| 66 | + for user in db.getUsersBefore(args.date): |
| 67 | + if user['user_registration'] == None: |
| 68 | + LOGGING_STREAM.write("!") |
| 69 | + user['user_registration'] = lowestDate |
| 70 | + print("\t".join(str(user[h]) for h in headers)) |
| 71 | + else: |
| 72 | + LOGGING_STREAM.write(".") |
| 73 | + |
| 74 | + lowestDate = min(user['user_registration'], lowestDate) |
| 75 | + |
| 76 | + LOGGING_STREAM.write("\n") |
| 77 | + |
| 78 | + |
| 79 | + |
| 80 | + |
| 81 | +class Database: |
| 82 | + |
| 83 | + def __init__(self, *args, **kwargs): |
| 84 | + self.args = args |
| 85 | + self.kwargs = kwargs |
| 86 | + self.usersConn = MySQLdb.connect(*args, **kwargs) |
| 87 | + |
| 88 | + def getUsersBefore(self, date): |
| 89 | + cursor = self.usersConn.cursor(MySQLdb.cursors.SSDictCursor) |
| 90 | + cursor.execute( |
| 91 | + """ |
| 92 | + SELECT |
| 93 | + user_id, |
| 94 | + user_registration |
| 95 | + FROM user |
| 96 | + WHERE user_registration <= %(date)s |
| 97 | + OR user_registration IS NULL |
| 98 | + ORDER BY user_id DESC |
| 99 | + """, |
| 100 | + { |
| 101 | + 'date': date |
| 102 | + } |
| 103 | + ) |
| 104 | + for row in cursor: |
| 105 | + yield row |
| 106 | + |
| 107 | +if __name__ == "__main__": main() |