r95261 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r95260‎ | r95261 | r95262 >
Date:22:03, 22 August 2011
Author:giovanni
Status:deferred
Tags:
Comment:
added estimation of maximum to timechart
Modified paths:
  • /trunk/tools/wsor/editor_lifecycle/lifecycle/cvsmooth.py (modified) (history)
  • /trunk/tools/wsor/editor_lifecycle/timechart (modified) (history)

Diff [purge]

Index: trunk/tools/wsor/editor_lifecycle/timechart
@@ -1,6 +1,6 @@
22 #!/usr/bin/python
33
4 -''' plots cohort rate date '''
 4+''' plots cohort rate versus day since first edit '''
55
66 __author__ = "Giovanni Luca Ciampaglia"
77 __email__ = "gciampaglia@wikimedia.org"
@@ -14,6 +14,8 @@
1515 from argparse import ArgumentParser
1616 from matplotlib.font_manager import FontProperties
1717
 18+from lifecycle.cvsmooth import find_peak
 19+
1820 __prog__ = os.path.basename(__file__)
1921
2022 parser = ArgumentParser(description=__doc__)
@@ -59,24 +61,34 @@
6062 marker = markers[i % M]
6163 if ns.no_errbars:
6264 l, = ax.plot(days, rate, marker=marker, color=color, label=label,
63 - ls=':', lw=2)
 65+ ls='none', lw=2, mfc='none', mec=color)
6466 else:
65 - l, (wu, wd), mc = ax.errorbar(days, rate, rate_err,
66 - marker=marker, color=color, label=label, ecolor='none',
67 - ls=':', lw=2)
 67+ l, (wu, wd), mc = ax.errorbar(days, rate, rate_err / 2.0,
 68+ marker=marker, mec=color, label=label, ecolor='lightgrey',
 69+ ls='none', lw=2, mfc='none')
6870 pp.setp(wd, ls='none')
6971
 72+ xp, spl = find_peak(days, rate, rate_err / 2.0)
 73+
 74+ x = np.linspace(days.min(), days.max(), endpoint=True, num=100)
 75+
 76+ y = spl(x)
 77+
 78+ ax.plot(x, y, label='spline fit', color=color, ls='--', marker='none',
 79+ lw=2)
 80+
 81+ ax.axvline(xp, color=color)
 82+
7083 lines.append(l)
7184
7285 # decorate figure
73 - pp.xlabel('days since registration')
 86+ pp.xlabel('days since first edit')
7487 pp.ylabel('edits/day')
7588 pp.figlegend(lines, [ l.get_label() for l in lines ],
7689 loc='center right', prop=FontProperties(size='small'))
7790 pp.minorticks_on()
7891 pp.grid("on")
7992 pp.axis('tight')
80 -# pp.ylim(0,np.ceil(ymax * 1.01)) # fix this!
8193
8294 pp.draw()
8395 if ns.title is not None:
Index: trunk/tools/wsor/editor_lifecycle/lifecycle/cvsmooth.py
@@ -1,10 +1,11 @@
22 import numpy as np
33 from scipy.interpolate import splrep, splev, UnivariateSpline
4 -from scipy.optimize import fmin
 4+from scipy.optimize import fmin_tnc
 5+import scipy.optimize.tnc as tnc
56
67 def spsmooth(x, y, ye, **kwargs):
78 '''
8 - Finds the best spline smoothing factor by leave-one-out cross validation
 9+ Finds the best spline smoothing factor using leave-one-out cross validation
910
1011 Additional keyword arguments are passed to splrep (e.g. k for the degree)
1112 '''
@@ -55,5 +56,17 @@
5657 '''
5758 s = spsmooth(x, y, ye, k=k)
5859 spl = UnivariateSpline(x, y, ye ** -1, k=k, s=s)
59 - xp = fmin(lambda k : -spl(k), x.mean())
60 - return xp, spl
 60+ f = lambda k : -spl(k)
 61+ fprime = np.vectorize(lambda k : - spl.derivatives(k)[1])
 62+ xp_best = None
 63+ yp_best = -np.inf
 64+ bounds = [(x.min(), x.max())]
 65+ for i in xrange(5):
 66+ x0 = (x.ptp() * np.random.rand() + x.min(),)
 67+ xp, nfeval, rc = fmin_tnc(f, x0, fprime=fprime, bounds=bounds,
 68+ messages=tnc.MSG_NONE)
 69+ yp = spl(xp)
 70+ if yp >= yp_best:
 71+ xp_best = xp
 72+ yp_best = yp
 73+ return xp_best, spl

Status & tagging log