r25647 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r25646‎ | r25647 | r25648 >
Date:18:51, 7 September 2007
Author:brion
Status:old
Tags:
Comment:
Experimental hit logging thingy
Modified paths:
  • /trunk/tools/counter (added) (history)
  • /trunk/tools/counter/counter.py (added) (history)
  • /trunk/tools/counter/hitcounter.sql (added) (history)

Diff [purge]

Index: trunk/tools/counter/hitcounter.sql
@@ -0,0 +1,9 @@
 2+CREATE TABLE hit_counter (
 3+ hc_ts TIMESTAMP,
 4+
 5+ hc_site VARCHAR(255) BINARY,
 6+ hc_page VARCHAR(255) BINARY,
 7+
 8+ KEY (hc_ts, hc_site, hc_page),
 9+ KEY (hc_site, hc_page, hc_ts)
 10+) CHARSET=binary;
Property changes on: trunk/tools/counter/hitcounter.sql
___________________________________________________________________
Added: svn:eol-style
111 + native
Index: trunk/tools/counter/counter.py
@@ -0,0 +1,83 @@
 2+import MySQLdb
 3+import re
 4+import sys
 5+import urllib
 6+
 7+globalConnection = None
 8+
 9+def runLoop(inputFile, targetPages=None):
 10+ for line in inputFile:
 11+ # Skip lines that are just going to be hitting the upload server
 12+ # or common skin files
 13+ if line.find(" GET http://upload.wikimedia.org/") == -1 \
 14+ and line.find(".org/skins-1.5/") == -1:
 15+ page = extractPage(line)
 16+ if page and (targetPages == None or page in targetPages):
 17+ recordHit(page)
 18+ closeConnection()
 19+
 20+def extractPage(line):
 21+ url = extractUrl(line)
 22+ if url and \
 23+ "?" not in url and \
 24+ url[0:7] == "http://":
 25+ bits = url[7:].split("/", 2)
 26+ if len(bits) == 3 and bits[1] == "wiki":
 27+ host = bits[0]
 28+ page = normalizePage(bits[2])
 29+ return host + ":" + page
 30+ return None
 31+
 32+def extractUrl(line):
 33+ # https://wikitech.leuksman.com/view/Squid_log_format
 34+ # $hostname %sn %ts.%03tu %tr %>a %Ss/%03Hs %<st %rm %ru %Sh/%<A %mt %{Referer}>h %{X-Forwarded-For}>h %{User-Agent}>h
 35+ # ...
 36+ # 9. URL
 37+ bits = line.split(" ", 9)
 38+ if len(bits) > 8 and bits[7] == "GET":
 39+ return bits[8]
 40+ else:
 41+ return None
 42+
 43+def normalizePage(page):
 44+ return urllib.unquote(page).replace("_", " ")
 45+
 46+def recordHit(page):
 47+ (site, pagename) = page.split(":", 1)
 48+ conn = getConnection()
 49+ # fixme: format timestamp from the log line
 50+ conn.cursor().execute(
 51+ "INSERT INTO hit_counter (hc_ts, hc_site, hc_page) " +
 52+ "VALUES (CURRENT_TIMESTAMP(), %s, %s)",
 53+ (site, pagename))
 54+ conn.commit()
 55+
 56+def getConnection():
 57+ global globalConnection
 58+ if not globalConnection:
 59+ globalConnection = openConnection()
 60+ return globalConnection
 61+
 62+def openConnection():
 63+ return MySQLdb.connect(host="localhost", user="root", passwd="", db="counter")
 64+
 65+def closeConnection():
 66+ global globalConnection
 67+ if globalConnection:
 68+ globalConnection.close()
 69+ globalConnection = None
 70+
 71+def listFromFile(filename):
 72+ """Read list of lines from a file"""
 73+ infile = open(filename)
 74+ out = [line.strip() for line in infile if line.strip() != ""]
 75+ infile.close()
 76+ out.sort()
 77+ return out
 78+
 79+if __name__ == "__main__":
 80+ if len(sys.argv) > 1:
 81+ targetPages = listFromFile(sys.argv[1])
 82+ runLoop(sys.stdin, targetPages)
 83+ else:
 84+ runLoop(sys.stdin)
Property changes on: trunk/tools/counter/counter.py
___________________________________________________________________
Added: svn:eol-style
185 + native
Added: svn:executable
286 + *

Status & tagging log