Index: trunk/tools/wsor/contribution_inequality/ginichart.py |
— | — | @@ -0,0 +1,104 @@ |
| 2 | +#!/usr/bin/python |
| 3 | +''' computes gini coefficient of contribution to namespace per year ''' |
| 4 | + |
| 5 | +import os |
| 6 | +import sys |
| 7 | +import csv |
| 8 | + |
| 9 | +import numpy as np |
| 10 | +import matplotlib.pyplot as pp |
| 11 | + |
| 12 | +from itertools import groupby |
| 13 | +from contextlib import closing |
| 14 | +from argparse import ArgumentParser |
| 15 | +from matplotlib.font_manager import FontProperties |
| 16 | + |
| 17 | +parser = ArgumentParser(description=__doc__) |
| 18 | +parser.add_argument('data_path', metavar='data') |
| 19 | +parser.add_argument('-T', '--title') |
| 20 | + |
| 21 | +colors = 'bgrcmykw' |
| 22 | +styles = ['-', '--', '-.', ':'] |
| 23 | +markers = 'ov^<>1234' |
| 24 | + |
| 25 | +def gini(x): |
| 26 | + ''' |
| 27 | + Computes an estimator of the Gini coefficient from an array x |
| 28 | + Parameters |
| 29 | + ---------- |
| 30 | + x - a flat array of observations |
| 31 | + |
| 32 | + References |
| 33 | + ---------- |
| 34 | + http://mathworld.wolfram.com/GiniCoefficient.html |
| 35 | + ''' |
| 36 | + x.sort() # sorts in non-decreasing order |
| 37 | + n = float(len(x)) |
| 38 | + i = np.arange(len(x)) + 1 |
| 39 | + m = np.mean(x) |
| 40 | + return np.sum((2 * i - n - 1) * x) / ( n ** 2 * m) * (n / (n - 1)) |
| 41 | + |
| 42 | +def igini(flatiter): |
| 43 | + ''' |
| 44 | + Computes an estimator of the Gini coefficient from a sorted iterator on a |
| 45 | + flat sample of observations |
| 46 | + |
| 47 | + Parameters |
| 48 | + ---------- |
| 49 | + flatiter - an iterator over observations, sorted in non-decreasing order |
| 50 | + |
| 51 | + References |
| 52 | + ---------- |
| 53 | + http://en.wikipedia.org/wiki/Gini_coefficient |
| 54 | + http://mathworld.wolfram.com/GiniCoefficient.html |
| 55 | + ''' |
| 56 | + den = 0.0 |
| 57 | + num = 0.0 |
| 58 | + for i, y in enumerate(flatiter): |
| 59 | + num += (i + 1) * y |
| 60 | + den += y |
| 61 | + n = i + 1 |
| 62 | + return 1 - (2.0 / (n - 1)) * (n - num / den) * (n / (n - 1)) |
| 63 | + |
| 64 | +if __name__ == '__main__': |
| 65 | + |
| 66 | + ns = parser.parse_args() |
| 67 | + |
| 68 | + g = [] |
| 69 | + |
| 70 | + with closing(open(ns.data_path)) as f: |
| 71 | + reader = csv.DictReader(f, delimiter='\t', quoting=csv.QUOTE_NONE) |
| 72 | + groupfunc = lambda row : map(int, (row['namespace'], row['year'])) |
| 73 | + for key, subiter in groupby(reader, groupfunc): |
| 74 | + flatiter = ( float(row['total_contributions']) for row in subiter ) |
| 75 | + try: |
| 76 | + g.append((tuple(key) + (igini(flatiter),))) |
| 77 | + except ZeroDivisionError: # due to passing an empty iterator to igini |
| 78 | + g.append((tuple(key) + (np.nan,))) |
| 79 | + |
| 80 | + figure = pp.figure(figsize=(8,4)) |
| 81 | + ax = figure.add_axes(pp.axes([.1,.1,.8,.8], axisbg='whitesmoke')) |
| 82 | + i = 0 |
| 83 | + M = len(markers) |
| 84 | + C = len(colors) |
| 85 | + S = len(styles) |
| 86 | + |
| 87 | + for key, subiter in groupby(g, lambda k : k[0]): |
| 88 | + data = np.asarray([ (y,g) for n, y, g in subiter ]) |
| 89 | + label = 'NS %d' % key |
| 90 | + ax.plot(data.T[0], data.T[1], label=label, marker=markers[i % M], |
| 91 | + color=colors[i % C], linestyle=styles[i % S]) |
| 92 | + i += 1 |
| 93 | + |
| 94 | + pp.ylabel('Gini coefficient') |
| 95 | + pp.legend(loc='best', prop=FontProperties(size='small')) |
| 96 | + pp.ylim(0,1) |
| 97 | + pp.draw() |
| 98 | + if ns.title: |
| 99 | + pp.title(ns.title) |
| 100 | + figure_path = 'gini_' + ns.title.replace(' ', '_') + '.pdf' |
| 101 | + else: |
| 102 | + figure_path = 'gini_' + os.path.splitext(ns.data_path)[0] + '.pdf' |
| 103 | + pp.savefig(figure_path, fmt='pdf') |
| 104 | + print 'output saved to %s' % figure_path |
| 105 | + pp.show() |
Property changes on: trunk/tools/wsor/contribution_inequality/ginichart.py |
___________________________________________________________________ |
Added: svn:executable |
1 | 106 | + * |