r92541 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r92540‎ | r92541 | r92542 >
Date:16:24, 19 July 2011
Author:halfak
Status:deferred
Tags:
Comment:
added track_hugglers script
Modified paths:
  • /trunk/tools/wsor/newbie_warnings/queries.sql (modified) (history)
  • /trunk/tools/wsor/newbie_warnings/track_hugglers.py (added) (history)
  • /trunk/tools/wsor/newbie_warnings/track_messages.py (modified) (history)

Diff [purge]

Index: trunk/tools/wsor/newbie_warnings/track_messages.py
@@ -40,7 +40,7 @@
4141 )
4242 parser.add_argument(
4343 '-o', '--out',
44 - type=lambda fn:open(fn, 'a'),
 44+ type=lambda fn:open(fn, 'a+'),
4545 help='Where should output be appended',
4646 default=sys.stdout
4747 )
@@ -61,19 +61,19 @@
6262 read_default_file=args.cnf
6363 )
6464
65 - print(
 65+ args.out.write(
6666 "\t".join(
6767 [db.getTime()]+
6868 [
6969 ":".join(
7070 [
71 - e['user_id'],
72 - e['user_name'],
73 - e['messages_waiting']
 71+ encode(e['user_id']),
 72+ encode(e['user_name']),
 73+ encode(e['messages_waiting'])
7474 ]
75 - ) for e in db.getEditorsWithTalk()
 75+ ) for e in db.getEditorsWithTalk(args.user_id)
7676 ]
77 - )
 77+ ) + "\n"
7878 )
7979
8080
@@ -97,7 +97,7 @@
9898 LIMIT 1
9999 """
100100 )
101 - yield cursor.fetchone()['time']
 101+ return cursor.fetchone()['time']
102102
103103
104104 def getEditorsWithTalk(self, userId):
@@ -117,11 +117,10 @@
118118 WHERE r.rev_user = %(user_id)s
119119 AND p.page_namespace = 3
120120 ) AS tp
121 - LEFT JOIN user reciever
 121+ INNER JOIN user reciever
122122 ON reciever.user_name = REPLACE(tp.page_title, "_", " ")
123123 INNER JOIN user_newtalk nt
124124 ON reciever.user_id = nt.user_id
125 - OR nt.user_ip = tp.page_title
126125 GROUP BY reciever.user_id, reciever.user_name
127126 """,
128127 {
Index: trunk/tools/wsor/newbie_warnings/queries.sql
@@ -83,6 +83,101 @@
8484 GROUP BY tp.page_title;
8585
8686
 87+SELECT rc_timestamp AS time
 88+FROM recentchanges
 89+ORDER BY rc_timestamp DESC
 90+LIMIT 1
8791
8892
8993
 94+SELECT
 95+ reciever.user_id,
 96+ reciever.user_name,
 97+ count(*) AS messages_waiting
 98+FROM (
 99+SELECT DISTINCT p.page_title
 100+FROM revision r
 101+INNER JOIN halfak.huggler_sample h
 102+ ON r.rev_user = h.user_id
 103+ AND h.user_name NOT IN ("Tide rolls", "Falcon8765")
 104+INNER JOIN page p
 105+ ON r.rev_page = p.page_id
 106+AND p.page_namespace = 3
 107+AND r.rev_timestamp >= "20110705230000"
 108+) AS tp
 109+INNER JOIN user reciever
 110+ ON reciever.user_name = REPLACE(tp.page_title, "_", " ")
 111+INNER JOIN user_newtalk nt
 112+ ON reciever.user_id = nt.user_id
 113+GROUP BY reciever.user_id, reciever.user_name
 114+UNION
 115+SELECT
 116+ NULL AS user_id,
 117+ tp.page_title AS user_name,
 118+ count(*) AS messages_waiting
 119+FROM (
 120+SELECT DISTINCT p.page_title
 121+FROM revision r
 122+INNER JOIN halfak.huggler_sample h
 123+ ON r.rev_user = h.user_id
 124+ AND h.user_name NOT IN ("Tide rolls", "Falcon8765")
 125+INNER JOIN page p
 126+ ON r.rev_page = p.page_id
 127+WHERE p.page_namespace = 3
 128+AND r.rev_timestamp >= "20110705230000"
 129+) AS tp
 130+INNER JOIN user_newtalk nt
 131+ ON tp.page_title = nt.user_ip
 132+GROUP BY tp.page_title;
 133+
 134+
 135+SELECT DISTINCT p.page_title AS title
 136+FROM revision r
 137+INNER JOIN halfak.listed_huggler h
 138+ ON r.rev_user = h.user_id
 139+INNER JOIN page p
 140+ ON r.rev_page = p.page_id
 141+WHERE p.page_namespace = 3
 142+AND r.rev_timestamp >= "20110719000000"
 143+AND r.rev_comment LIKE "%[[WP:HG%";
 144+
 145+
 146+SELECT DISTINCT p.page_title AS title
 147+FROM revision r
 148+INNER JOIN page p
 149+ ON r.rev_page = p.page_id
 150+WHERE p.page_namespace = 3
 151+AND r.rev_timestamp >= "20110719014743"
 152+AND r.rev_comment LIKE "%DERPDERPDERP42%";
 153+
 154+
 155+
 156+SELECT
 157+ nt.user_id,
 158+ IFNULL(u.user_name, nt.user_ip) AS user_name,
 159+ count(*)
 160+FROM user_newtalk nt
 161+LEFT JOIN user u
 162+ ON u.user_id = nt.user_id
 163+WHERE u.user_name IN ("EpochFail")
 164+OR nt.user_ip IN ("EpochFail")
 165+GROUP BY nt.user_id, nt.user_ip, u.user_name;
 166+
 167+
 168+SELECT
 169+ u.user_id,
 170+ u.user_name,
 171+ count(*) as messages
 172+FROM user_newtalk nt
 173+LEFT JOIN user u
 174+ ON u.user_id = nt.user_id
 175+WHERE u.user_name IN ("EpochFail")
 176+GROUP BY u.user_id, u.user_name
 177+UNION
 178+SELECT
 179+ nt.user_ip as user_name,
 180+ NULL as user_id,
 181+ count(*) as messages
 182+FROM user_newtalk nt
 183+WHERE nt.user_ip IN ("EpochFail")
 184+GROUP BY nt.user_ip, NULL;
Index: trunk/tools/wsor/newbie_warnings/track_hugglers.py
@@ -0,0 +1,192 @@
 2+import sys, MySQLdb, MySQLdb.cursors, argparse, os, logging, types, time
 3+import wmf
 4+
 5+def encode(v):
 6+ if v == None: return "\N"
 7+
 8+ if type(v) == types.LongType: v = int(v)
 9+ elif type(v) == types.UnicodeType: v = v.encode('utf-8')
 10+
 11+ return str(v).encode("string-escape")
 12+
 13+
 14+def emit(event, p, time):
 15+ print(
 16+ "\t".join(encode(v) for v in [
 17+ event,
 18+ p['user_id'],
 19+ p['user_name'],
 20+ time
 21+ ])
 22+ )
 23+
 24+
 25+def main():
 26+ parser = argparse.ArgumentParser(
 27+ description=''
 28+ )
 29+ parser.add_argument(
 30+ '-c', '--cnf',
 31+ metavar="<path>",
 32+ type=str,
 33+ help='the path to MySQL config info (defaults to ~/.my.cnf)',
 34+ default=os.path.expanduser("~/.my.cnf")
 35+ )
 36+ parser.add_argument(
 37+ '-s', '--host',
 38+ type=str,
 39+ help='the database host to connect to (defaults to localhost)',
 40+ default="localhost"
 41+ )
 42+ parser.add_argument(
 43+ '-d', '--db',
 44+ type=str,
 45+ help='the language db to run the query in (defaults to enwiki)',
 46+ default="enwiki"
 47+ )
 48+ parser.add_argument(
 49+ '-o', '--out',
 50+ type=lambda fn:open(fn, 'a+'),
 51+ help='Where should output be appended',
 52+ default=sys.stdout
 53+ )
 54+ args = parser.parse_args()
 55+
 56+ LOGGING_STREAM = sys.stderr
 57+ logging.basicConfig(
 58+ level=logging.DEBUG,
 59+ stream=LOGGING_STREAM,
 60+ format='%(asctime)s %(levelname)-8s %(message)s',
 61+ datefmt='%b-%d %H:%M:%S'
 62+ )
 63+
 64+ logging.info("Connecting to %s:%s using %s." % (args.host, args.db, args.cnf))
 65+ db = Database(
 66+ host=args.host,
 67+ db=args.db,
 68+ read_default_file=args.cnf
 69+ )
 70+
 71+ try:
 72+ oldPosts = {}
 73+ lastTime = db.getTime()
 74+ time.sleep(5)
 75+ while True:
 76+ logging.info("Tracking %s posts. Looking for new ones since %s." % (len(oldPosts), lastTime))
 77+ newUsers = set(db.getHugglePostsSince(lastTime))
 78+ currTime = db.getTime()
 79+ currUsers = set()
 80+ for p in db.getWaitingPosts(oldPosts.viewkeys() | newUsers):
 81+ if p['user_name'] not in oldPosts:
 82+ #Found a new posting
 83+ LOGGING_STREAM.write(">")
 84+ p['posting'] = currTime
 85+ oldPosts[p['user_name']] = p
 86+ emit("received", p, currTime)
 87+ elif p['messages'] < oldPosts[p['user_name']]['messages']:
 88+ #Looks like someone checked the message
 89+ LOGGING_STREAM.write("<")
 90+ emit("read", oldPosts[p['user_name']], currTime)
 91+ del oldPosts[p['user_name']]
 92+ else:
 93+ #Same shit, different minute
 94+ pass
 95+
 96+ currUsers.add(p['user_name'])
 97+
 98+ for missing in oldPosts.viewkeys() - currUsers:
 99+ LOGGING_STREAM.write("<")
 100+ emit("read", oldPosts[missing], currTime)
 101+ del oldPosts[missing]
 102+
 103+ lastTime = currTime
 104+ LOGGING_STREAM.write("\n")
 105+ time.sleep(5)
 106+
 107+ except KeyboardInterrupt:
 108+ logging.info("Keyboard interrupt detected. Shutting down.")
 109+ except Exception as e:
 110+ logging.error(str(e))
 111+
 112+ print(repr(oldPosts))
 113+ print(lastTime)
 114+
 115+
 116+
 117+def safe(val):
 118+ return '"' + val.replace('"', '\\"') + '"'
 119+
 120+class Database:
 121+
 122+ def __init__(self, *args, **kwargs):
 123+ self.args = args
 124+ self.kwargs = kwargs
 125+ self.usersConn = MySQLdb.connect(*args, **kwargs)
 126+
 127+
 128+
 129+ def getTime(self):
 130+ cursor = self.usersConn.cursor(MySQLdb.cursors.DictCursor)
 131+ cursor.execute(
 132+ """
 133+ SELECT rc_timestamp AS time
 134+ FROM recentchanges
 135+ ORDER BY rc_timestamp DESC
 136+ LIMIT 1
 137+ """
 138+ )
 139+ self.usersConn.commit()
 140+ for row in cursor:
 141+ return row['time']
 142+
 143+
 144+ def getHugglePostsSince(self, timestamp):
 145+ cursor = self.usersConn.cursor(MySQLdb.cursors.DictCursor)
 146+ cursor.execute("""
 147+ SELECT DISTINCT p.page_title AS title
 148+ FROM revision r
 149+ INNER JOIN page p
 150+ ON r.rev_page = p.page_id
 151+ WHERE p.page_namespace = 3
 152+ AND r.rev_timestamp >= %(timestamp)s
 153+ AND (
 154+ r.rev_comment LIKE %(like)s OR
 155+ r.rev_comment LIKE %(clue)s
 156+ )
 157+ """,
 158+ {
 159+ "timestamp": timestamp,
 160+ "like": "%" + "WP:HG" + "%",
 161+ "clue": "%" + "Warning" + "%"
 162+ }
 163+ )
 164+ return (p['title'].replace("_", " ") for p in cursor)
 165+
 166+ def getWaitingPosts(self, users):
 167+ cursor = self.usersConn.cursor(MySQLdb.cursors.DictCursor)
 168+ userString = ",".join(safe(u) for u in users)
 169+ if len(userString) != 0:
 170+ cursor.execute("""
 171+ SELECT
 172+ u.user_id,
 173+ u.user_name,
 174+ count(*) as messages
 175+ FROM user_newtalk nt
 176+ LEFT JOIN user u
 177+ ON u.user_id = nt.user_id
 178+ WHERE u.user_name IN (""" + userString + """)
 179+ GROUP BY u.user_id, u.user_name
 180+ UNION
 181+ SELECT
 182+ NULL as user_id,
 183+ nt.user_ip as user_name,
 184+ count(*) as messages
 185+ FROM user_newtalk nt
 186+ WHERE nt.user_ip IN (""" + userString + """)
 187+ GROUP BY nt.user_ip, NULL
 188+ """
 189+ )
 190+ for post in cursor:
 191+ yield post
 192+
 193+if __name__ == "__main__": main()

Status & tagging log