Index: trunk/tools/wsor/contribution_inequality/topcontributors.py |
— | — | @@ -0,0 +1,32 @@ |
| 2 | +#!/usr/bin/python |
| 3 | + |
| 4 | +''' lists top contributors by year and namespace ''' |
| 5 | + |
| 6 | +import os |
| 7 | +import sys |
| 8 | +import csv |
| 9 | + |
| 10 | +from itertools import groupby |
| 11 | +from contextlib import closing |
| 12 | +from argparse import ArgumentParser |
| 13 | +from collections import deque |
| 14 | + |
| 15 | +parser = ArgumentParser(description=__doc__) |
| 16 | +parser.add_argument('data_path', metavar='data') |
| 17 | +parser.add_argument('maxlen', metavar='number', type=int) |
| 18 | + |
| 19 | +if __name__ == '__main__': |
| 20 | + |
| 21 | + ns = parser.parse_args() |
| 22 | + |
| 23 | + with closing(open(ns.data_path)) as f: |
| 24 | + reader = csv.DictReader(f, delimiter='\t', quoting=csv.QUOTE_NONE) |
| 25 | + groupfunc = lambda row : (row['namespace'], row['year']) |
| 26 | + for key, subiter in groupby(reader, groupfunc): |
| 27 | + # smart way to keep only the tail |
| 28 | + users = deque((row['user_id'] for row in subiter ), maxlen=ns.maxlen) |
| 29 | + print '\t'.join(key + tuple(users)) |
| 30 | + sys.stdout.flush() |
| 31 | + |
| 32 | + |
| 33 | + |
Property changes on: trunk/tools/wsor/contribution_inequality/topcontributors.py |
___________________________________________________________________ |
Added: svn:executable |
1 | 34 | + * |