r94957 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r94956‎ \| r94957 \| r94958 >
Date:	22:59, 18 August 2011
Author:	giovanni
Status:	deferred
Tags:
Comment:	now topcontributors.py plots the set similarity of top users over the years
Modified paths:	/trunk/tools/wsor/contribution_inequality/topcontributors.py (modified) (history)

Diff [purge]

Index: trunk/tools/wsor/contribution_inequality/topcontributors.py
—	—	@@ -8,25 +8,77 @@
9	9
10	10	from itertools import groupby
11	11	from contextlib import closing
12		~~-from argparse import ArgumentParser~~
	12	+from argparse import ArgumentParser, FileType
13	13	from collections import deque
	14	+from matplotlib.font_manager import FontProperties
	15	+from datetime import date
14	16
	17	+import numpy as np
	18	+import matplotlib.pyplot as pp
	19	+
15	20	parser = ArgumentParser(description=__doc__)
16	21	parser.add_argument('data_path', metavar='data')
17		~~-parser.add_argument('maxlen', metavar='number', type=int)~~
	22	+parser.add_argument('output_file', metavar='output_file', type=FileType('w'))
	23	+parser.add_argument('-t', '--top', dest='maxlen', type=int, default=100,
	24	+ help='Top users to list. default: %(default)d', metavar='NUM')
18	25
	26	+colors = 'bgrcmykw'
	27	+styles = ['-', '--', '-.', ':']
	28	+markers = 'ov^<>1234'
	29	+
19	30	if __name__ == '__main__':
20	31
21	32	ns = parser.parse_args()
22		-
	33	+ databyns = {}
	34	+
23	35	with closing(open(ns.data_path)) as f:
24	36	reader = csv.DictReader(f, delimiter='\t', quoting=csv.QUOTE_NONE)
25	37	groupfunc = lambda row : (row['namespace'], row['year'])
26	38	for key, subiter in groupby(reader, groupfunc):
27	39	# smart way to keep only the tail
28	40	users = deque((row['user_id'] for row in subiter ), maxlen=ns.maxlen)
29		~~- print '\t'.join(key + tuple(users))~~
30		~~- sys.stdout.flush()~~
	41	+ print >> ns.output_file, '\t'.join(key + tuple(users))
	42	+ ns.output_file.flush()
31	43
	44	+ NS, year = map(int, key)
	45	+ try:
	46	+ databyns[NS].append((year, set(users)))
	47	+ except KeyError:
	48	+ databyns[NS] = [ (year, set(users)) ]
	49	+
	50	+ figure = pp.figure(figsize=(8,4))
	51	+ ax = figure.add_axes(pp.axes([.1,.1,.8,.8], axisbg='whitesmoke'))
	52	+ i = 0
	53	+ M = len(markers)
	54	+ C = len(colors)
	55	+ S = len(styles)
	56	+
	57	+ for key in databyns:
	58	+ years, users = zip(*databyns[key])
	59	+ years = [ date(year, 1, 1) for year in years ]
	60	+ I = np.asfarray(map(len, map(set.intersection, users[1:], users[:-1])))
	61	+ U = np.asfarray(map(len, map(set.union, users[1:], users[:-1])))
	62	+ label = 'NS %s' % key
	63	+
	64	+ ax.plot(years[1:], I / U, label=label, marker=markers[i % M],
	65	+ color=colors[i % C], linestyle=styles[i % S])
	66	+ i += 1
	67	+
	68	+ pp.ylim(0,1)
	69	+ pp.ylabel('similarity')
	70	+ pp.title('Top %d contributors' % ns.maxlen)
	71	+ pp.legend(loc='best', prop=FontProperties(size='small'))
	72	+ pp.draw()
	73	+
	74	+ if not ns.output_file.isatty():
	75	+ figure_path = os.path.splitext(ns.output_file.name)[0] + '.pdf'
	76	+ pp.savefig(figure_path, fmt='pdf')
	77	+ print 'figure saved to %s' % figure_path
	78	+ print 'output saved to %s' % ns.output_file.name
	79	+
	80	+ pp.show()
32	81
	82	+ if not ns.output_file.isatty():
	83	+ ns.output_file.close()
33	84
	85	+

Status & tagging log

02:27, 19 August 2011 😂 (talk | contribs) changed the status of r94957 [removed: new added: deferred]