Index: trunk/tools/wsor/contribution_inequality/topcontributors.py |
— | — | @@ -8,25 +8,77 @@ |
9 | 9 | |
10 | 10 | from itertools import groupby |
11 | 11 | from contextlib import closing |
12 | | -from argparse import ArgumentParser |
| 12 | +from argparse import ArgumentParser, FileType |
13 | 13 | from collections import deque |
| 14 | +from matplotlib.font_manager import FontProperties |
| 15 | +from datetime import date |
14 | 16 | |
| 17 | +import numpy as np |
| 18 | +import matplotlib.pyplot as pp |
| 19 | + |
15 | 20 | parser = ArgumentParser(description=__doc__) |
16 | 21 | parser.add_argument('data_path', metavar='data') |
17 | | -parser.add_argument('maxlen', metavar='number', type=int) |
| 22 | +parser.add_argument('output_file', metavar='output_file', type=FileType('w')) |
| 23 | +parser.add_argument('-t', '--top', dest='maxlen', type=int, default=100, |
| 24 | + help='Top users to list. default: %(default)d', metavar='NUM') |
18 | 25 | |
| 26 | +colors = 'bgrcmykw' |
| 27 | +styles = ['-', '--', '-.', ':'] |
| 28 | +markers = 'ov^<>1234' |
| 29 | + |
19 | 30 | if __name__ == '__main__': |
20 | 31 | |
21 | 32 | ns = parser.parse_args() |
22 | | - |
| 33 | + databyns = {} |
| 34 | + |
23 | 35 | with closing(open(ns.data_path)) as f: |
24 | 36 | reader = csv.DictReader(f, delimiter='\t', quoting=csv.QUOTE_NONE) |
25 | 37 | groupfunc = lambda row : (row['namespace'], row['year']) |
26 | 38 | for key, subiter in groupby(reader, groupfunc): |
27 | 39 | # smart way to keep only the tail |
28 | 40 | users = deque((row['user_id'] for row in subiter ), maxlen=ns.maxlen) |
29 | | - print '\t'.join(key + tuple(users)) |
30 | | - sys.stdout.flush() |
| 41 | + print >> ns.output_file, '\t'.join(key + tuple(users)) |
| 42 | + ns.output_file.flush() |
31 | 43 | |
| 44 | + NS, year = map(int, key) |
| 45 | + try: |
| 46 | + databyns[NS].append((year, set(users))) |
| 47 | + except KeyError: |
| 48 | + databyns[NS] = [ (year, set(users)) ] |
| 49 | + |
| 50 | + figure = pp.figure(figsize=(8,4)) |
| 51 | + ax = figure.add_axes(pp.axes([.1,.1,.8,.8], axisbg='whitesmoke')) |
| 52 | + i = 0 |
| 53 | + M = len(markers) |
| 54 | + C = len(colors) |
| 55 | + S = len(styles) |
| 56 | + |
| 57 | + for key in databyns: |
| 58 | + years, users = zip(*databyns[key]) |
| 59 | + years = [ date(year, 1, 1) for year in years ] |
| 60 | + I = np.asfarray(map(len, map(set.intersection, users[1:], users[:-1]))) |
| 61 | + U = np.asfarray(map(len, map(set.union, users[1:], users[:-1]))) |
| 62 | + label = 'NS %s' % key |
| 63 | + |
| 64 | + ax.plot(years[1:], I / U, label=label, marker=markers[i % M], |
| 65 | + color=colors[i % C], linestyle=styles[i % S]) |
| 66 | + i += 1 |
| 67 | + |
| 68 | + pp.ylim(0,1) |
| 69 | + pp.ylabel('similarity') |
| 70 | + pp.title('Top %d contributors' % ns.maxlen) |
| 71 | + pp.legend(loc='best', prop=FontProperties(size='small')) |
| 72 | + pp.draw() |
| 73 | + |
| 74 | + if not ns.output_file.isatty(): |
| 75 | + figure_path = os.path.splitext(ns.output_file.name)[0] + '.pdf' |
| 76 | + pp.savefig(figure_path, fmt='pdf') |
| 77 | + print 'figure saved to %s' % figure_path |
| 78 | + print 'output saved to %s' % ns.output_file.name |
| 79 | + |
| 80 | + pp.show() |
32 | 81 | |
| 82 | + if not ns.output_file.isatty(): |
| 83 | + ns.output_file.close() |
33 | 84 | |
| 85 | + |