r94954 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r94953‎ | r94954 | r94955 >
Date:21:49, 18 August 2011
Author:giovanni
Status:deferred
Tags:
Comment:
added script to list top contributors by year and namespace
Modified paths:
  • /trunk/tools/wsor/contribution_inequality/topcontributors.py (added) (history)

Diff [purge]

Index: trunk/tools/wsor/contribution_inequality/topcontributors.py
@@ -0,0 +1,32 @@
 2+#!/usr/bin/python
 3+
 4+''' lists top contributors by year and namespace '''
 5+
 6+import os
 7+import sys
 8+import csv
 9+
 10+from itertools import groupby
 11+from contextlib import closing
 12+from argparse import ArgumentParser
 13+from collections import deque
 14+
 15+parser = ArgumentParser(description=__doc__)
 16+parser.add_argument('data_path', metavar='data')
 17+parser.add_argument('maxlen', metavar='number', type=int)
 18+
 19+if __name__ == '__main__':
 20+
 21+ ns = parser.parse_args()
 22+
 23+ with closing(open(ns.data_path)) as f:
 24+ reader = csv.DictReader(f, delimiter='\t', quoting=csv.QUOTE_NONE)
 25+ groupfunc = lambda row : (row['namespace'], row['year'])
 26+ for key, subiter in groupby(reader, groupfunc):
 27+ # smart way to keep only the tail
 28+ users = deque((row['user_id'] for row in subiter ), maxlen=ns.maxlen)
 29+ print '\t'.join(key + tuple(users))
 30+ sys.stdout.flush()
 31+
 32+
 33+
Property changes on: trunk/tools/wsor/contribution_inequality/topcontributors.py
___________________________________________________________________
Added: svn:executable
134 + *

Status & tagging log