r94945 MediaWiki - Code Review archive

Revision:r94944‎ | r94945 | r94946 >
Date:20:58, 18 August 2011
added ginichart.py
Modified paths:
  • /trunk/tools/wsor/contribution_inequality/ginichart.py (added) (history)

Diff [purge]

Index: trunk/tools/wsor/contribution_inequality/ginichart.py
@@ -0,0 +1,104 @@
 3+''' computes gini coefficient of contribution to namespace per year '''
 5+import os
 6+import sys
 7+import csv
 9+import numpy as np
 10+import matplotlib.pyplot as pp
 12+from itertools import groupby
 13+from contextlib import closing
 14+from argparse import ArgumentParser
 15+from matplotlib.font_manager import FontProperties
 17+parser = ArgumentParser(description=__doc__)
 18+parser.add_argument('data_path', metavar='data')
 19+parser.add_argument('-T', '--title')
 21+colors = 'bgrcmykw'
 22+styles = ['-', '--', '-.', ':']
 23+markers = 'ov^<>1234'
 25+def gini(x):
 26+ '''
 27+ Computes an estimator of the Gini coefficient from an array x
 28+ Parameters
 29+ ----------
 30+ x - a flat array of observations
 32+ References
 33+ ----------
 34+ http://mathworld.wolfram.com/GiniCoefficient.html
 35+ '''
 36+ x.sort() # sorts in non-decreasing order
 37+ n = float(len(x))
 38+ i = np.arange(len(x)) + 1
 39+ m = np.mean(x)
 40+ return np.sum((2 * i - n - 1) * x) / ( n ** 2 * m) * (n / (n - 1))
 42+def igini(flatiter):
 43+ '''
 44+ Computes an estimator of the Gini coefficient from a sorted iterator on a
 45+ flat sample of observations
 47+ Parameters
 48+ ----------
 49+ flatiter - an iterator over observations, sorted in non-decreasing order
 51+ References
 52+ ----------
 53+ http://en.wikipedia.org/wiki/Gini_coefficient
 54+ http://mathworld.wolfram.com/GiniCoefficient.html
 55+ '''
 56+ den = 0.0
 57+ num = 0.0
 58+ for i, y in enumerate(flatiter):
 59+ num += (i + 1) * y
 60+ den += y
 61+ n = i + 1
 62+ return 1 - (2.0 / (n - 1)) * (n - num / den) * (n / (n - 1))
 64+if __name__ == '__main__':
 66+ ns = parser.parse_args()
 68+ g = []
 70+ with closing(open(ns.data_path)) as f:
 71+ reader = csv.DictReader(f, delimiter='\t', quoting=csv.QUOTE_NONE)
 72+ groupfunc = lambda row : map(int, (row['namespace'], row['year']))
 73+ for key, subiter in groupby(reader, groupfunc):
 74+ flatiter = ( float(row['total_contributions']) for row in subiter )
 75+ try:
 76+ g.append((tuple(key) + (igini(flatiter),)))
 77+ except ZeroDivisionError: # due to passing an empty iterator to igini
 78+ g.append((tuple(key) + (np.nan,)))
 80+ figure = pp.figure(figsize=(8,4))
 81+ ax = figure.add_axes(pp.axes([.1,.1,.8,.8], axisbg='whitesmoke'))
 82+ i = 0
 83+ M = len(markers)
 84+ C = len(colors)
 85+ S = len(styles)
 87+ for key, subiter in groupby(g, lambda k : k[0]):
 88+ data = np.asarray([ (y,g) for n, y, g in subiter ])
 89+ label = 'NS %d' % key
 90+ ax.plot(data.T[0], data.T[1], label=label, marker=markers[i % M],
 91+ color=colors[i % C], linestyle=styles[i % S])
 92+ i += 1
 94+ pp.ylabel('Gini coefficient')
 95+ pp.legend(loc='best', prop=FontProperties(size='small'))
 96+ pp.ylim(0,1)
 97+ pp.draw()
 98+ if ns.title:
 99+ pp.title(ns.title)
 100+ figure_path = 'gini_' + ns.title.replace(' ', '_') + '.pdf'
 101+ else:
 102+ figure_path = 'gini_' + os.path.splitext(ns.data_path)[0] + '.pdf'
 103+ pp.savefig(figure_path, fmt='pdf')
 104+ print 'output saved to %s' % figure_path
 105+ pp.show()
Property changes on: trunk/tools/wsor/contribution_inequality/ginichart.py
Added: svn:executable
1106 + *

Status & tagging log