Index: trunk/tools/wsor/editor_lifecycle/timechart |
— | — | @@ -1,6 +1,6 @@ |
2 | 2 | #!/usr/bin/python |
3 | 3 | |
4 | | -''' plots cohort rate date ''' |
| 4 | +''' plots cohort rate versus day since first edit ''' |
5 | 5 | |
6 | 6 | __author__ = "Giovanni Luca Ciampaglia" |
7 | 7 | __email__ = "gciampaglia@wikimedia.org" |
— | — | @@ -14,6 +14,8 @@ |
15 | 15 | from argparse import ArgumentParser |
16 | 16 | from matplotlib.font_manager import FontProperties |
17 | 17 | |
| 18 | +from lifecycle.cvsmooth import find_peak |
| 19 | + |
18 | 20 | __prog__ = os.path.basename(__file__) |
19 | 21 | |
20 | 22 | parser = ArgumentParser(description=__doc__) |
— | — | @@ -59,24 +61,34 @@ |
60 | 62 | marker = markers[i % M] |
61 | 63 | if ns.no_errbars: |
62 | 64 | l, = ax.plot(days, rate, marker=marker, color=color, label=label, |
63 | | - ls=':', lw=2) |
| 65 | + ls='none', lw=2, mfc='none', mec=color) |
64 | 66 | else: |
65 | | - l, (wu, wd), mc = ax.errorbar(days, rate, rate_err, |
66 | | - marker=marker, color=color, label=label, ecolor='none', |
67 | | - ls=':', lw=2) |
| 67 | + l, (wu, wd), mc = ax.errorbar(days, rate, rate_err / 2.0, |
| 68 | + marker=marker, mec=color, label=label, ecolor='lightgrey', |
| 69 | + ls='none', lw=2, mfc='none') |
68 | 70 | pp.setp(wd, ls='none') |
69 | 71 | |
| 72 | + xp, spl = find_peak(days, rate, rate_err / 2.0) |
| 73 | + |
| 74 | + x = np.linspace(days.min(), days.max(), endpoint=True, num=100) |
| 75 | + |
| 76 | + y = spl(x) |
| 77 | + |
| 78 | + ax.plot(x, y, label='spline fit', color=color, ls='--', marker='none', |
| 79 | + lw=2) |
| 80 | + |
| 81 | + ax.axvline(xp, color=color) |
| 82 | + |
70 | 83 | lines.append(l) |
71 | 84 | |
72 | 85 | # decorate figure |
73 | | - pp.xlabel('days since registration') |
| 86 | + pp.xlabel('days since first edit') |
74 | 87 | pp.ylabel('edits/day') |
75 | 88 | pp.figlegend(lines, [ l.get_label() for l in lines ], |
76 | 89 | loc='center right', prop=FontProperties(size='small')) |
77 | 90 | pp.minorticks_on() |
78 | 91 | pp.grid("on") |
79 | 92 | pp.axis('tight') |
80 | | -# pp.ylim(0,np.ceil(ymax * 1.01)) # fix this! |
81 | 93 | |
82 | 94 | pp.draw() |
83 | 95 | if ns.title is not None: |
Index: trunk/tools/wsor/editor_lifecycle/lifecycle/cvsmooth.py |
— | — | @@ -1,10 +1,11 @@ |
2 | 2 | import numpy as np |
3 | 3 | from scipy.interpolate import splrep, splev, UnivariateSpline |
4 | | -from scipy.optimize import fmin |
| 4 | +from scipy.optimize import fmin_tnc |
| 5 | +import scipy.optimize.tnc as tnc |
5 | 6 | |
6 | 7 | def spsmooth(x, y, ye, **kwargs): |
7 | 8 | ''' |
8 | | - Finds the best spline smoothing factor by leave-one-out cross validation |
| 9 | + Finds the best spline smoothing factor using leave-one-out cross validation |
9 | 10 | |
10 | 11 | Additional keyword arguments are passed to splrep (e.g. k for the degree) |
11 | 12 | ''' |
— | — | @@ -55,5 +56,17 @@ |
56 | 57 | ''' |
57 | 58 | s = spsmooth(x, y, ye, k=k) |
58 | 59 | spl = UnivariateSpline(x, y, ye ** -1, k=k, s=s) |
59 | | - xp = fmin(lambda k : -spl(k), x.mean()) |
60 | | - return xp, spl |
| 60 | + f = lambda k : -spl(k) |
| 61 | + fprime = np.vectorize(lambda k : - spl.derivatives(k)[1]) |
| 62 | + xp_best = None |
| 63 | + yp_best = -np.inf |
| 64 | + bounds = [(x.min(), x.max())] |
| 65 | + for i in xrange(5): |
| 66 | + x0 = (x.ptp() * np.random.rand() + x.min(),) |
| 67 | + xp, nfeval, rc = fmin_tnc(f, x0, fprime=fprime, bounds=bounds, |
| 68 | + messages=tnc.MSG_NONE) |
| 69 | + yp = spl(xp) |
| 70 | + if yp >= yp_best: |
| 71 | + xp_best = xp |
| 72 | + yp_best = yp |
| 73 | + return xp_best, spl |