r94722 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r94721‎ | r94722 | r94723 >
Date:01:46, 17 August 2011
Author:giovanni
Status:deferred
Tags:
Comment:
manually removed old revision
Modified paths:
  • /trunk/tools/wsor/editor_lifecycle/MANIFEST.in (deleted) (history)
  • /trunk/tools/wsor/editor_lifecycle/README.rst (deleted) (history)
  • /trunk/tools/wsor/editor_lifecycle/fetchcohort (deleted) (history)
  • /trunk/tools/wsor/editor_lifecycle/fetchrates (deleted) (history)
  • /trunk/tools/wsor/editor_lifecycle/fitting (deleted) (history)
  • /trunk/tools/wsor/editor_lifecycle/fitting_batch.sh (deleted) (history)
  • /trunk/tools/wsor/editor_lifecycle/graphlife (deleted) (history)
  • /trunk/tools/wsor/editor_lifecycle/lifecycle (deleted) (history)
  • /trunk/tools/wsor/editor_lifecycle/mkcohort (deleted) (history)
  • /trunk/tools/wsor/editor_lifecycle/models.py (deleted) (history)
  • /trunk/tools/wsor/editor_lifecycle/relax (deleted) (history)
  • /trunk/tools/wsor/editor_lifecycle/scale.py (deleted) (history)
  • /trunk/tools/wsor/editor_lifecycle/setup.py (deleted) (history)

Diff [purge]

Index: trunk/tools/wsor/editor_lifecycle/models.py
@@ -1,373 +0,0 @@
2 -# coding: utf8
3 -
4 -import numpy as np
5 -from scipy.stats import norm, chisqprob, normaltest
6 -from scipy.optimize import curve_fit
7 -from scipy.special import gamma
8 -from cStringIO import StringIO
9 -import datetime as dt
10 -#from scikits.statsmodels.api import OLS
11 -
12 -__all__ = ['Expon', 'PowerLaw', 'StretchedExpon' ]
13 -
14 -class Parameter(object):
15 - '''
16 - Class for parameter descriptors. Works with ParameterMixin
17 - '''
18 - def __init__(self, name, attrlist):
19 - self.name = name # parameter name
20 - for att in attrlist:
21 - if att.name == name:
22 - raise AttributeError('cannot add parameter {}'.format(name))
23 - attrlist.append(self)
24 - def __get__(self, instance, owner):
25 - if instance is not None:
26 - return instance.__dict__['_' + self.name]
27 - return self
28 - def __set__(self, instance, value):
29 - try:
30 - value, error = value
31 - except TypeError:
32 - value, error = value, None
33 - instance.__dict__['_' + self.name] = value
34 - instance.__dict__[self.name + '_err'] = error
35 - def __repr__(self):
36 - return '<Parameter {} at 0x{}>'.format(self.name, '%x' % id(self))
37 -
38 -class ParameterMixin(object):
39 - '''
40 - Class that lets you look up all Parameter instances in __params__
41 - '''
42 - def itererrors(self):
43 - for p in self.__params__:
44 - yield self.__getattribute__(p.name + '_err')
45 - def errors(self):
46 - return list(self.itererrors())
47 - def iterparams(self):
48 - '''
49 - Returns an iterator over all parameters of this model
50 - '''
51 - for p in self.__params__:
52 - yield self.__getattribute__(p.name)
53 - def params(self):
54 - '''
55 - Returns a tuple of all parameters of this model
56 - '''
57 - return list(self.iterparams())
58 - def setparams(self, *args):
59 - '''
60 - Sets unset parameters of this model to *args. Parameters that already
61 - are associated a value will *NOT* be modified by this method.
62 - '''
63 - keyf = lambda p : self.__getattribute__(p.name) is None
64 - for p, a in zip(filter(keyf, self.__params__), args):
65 - setattr(self, p.name, a)
66 -
67 -def _orNA(val, fmt='%8.5g'):
68 - if val is not None:
69 - return fmt % val
70 - else:
71 - return 'N/A'
72 -
73 -class ParametricModel(ParameterMixin):
74 - '''
75 - Callable class with Parameter descriptors. Subclasses of ParametricModel
76 - ought define, as class attributes, any number of Parameter descriptors at the
77 - class level, together with a list (conventional name: `__params__'). See
78 - Parameter.__init__ on how to instantiate a Parameter descriptor.
79 -
80 - Subclassess ought also define two static methods: `func' and `init'. The
81 - first is the actual function that accepts an argument together with the same
82 - number of parameters as in __params__. The second is used to get initial
83 - estimates for the Levenberg-Marquardt leastsq minimizer used to fit this
84 - model.
85 -
86 - From that point on, any instance of this class acts as the function `func'
87 - itself, with the only differences that it automatically performs partial
88 - application for those Parameter attributes that are being assigned a value.
89 - Example:
90 -
91 - # expon.func(x, A, B) is A * exp(B * x)
92 - >>> expon(1, -1, 2) = 0.73575888234288467
93 - >>> expon.A = 2
94 - >>> expon(1, -1) = 0.73575888234288467
95 - '''
96 - def __init__(self, *args, **kwargs):
97 - keys = [p.name for p in self.__params__]
98 - for k in keys:
99 - if k not in kwargs:
100 - kwargs[k] = None
101 - kwargs.update(zip(keys, args)) # update the rightmost parameters only
102 - for k, v in kwargs.items():
103 - setattr(self, k, v)
104 - self.goftest = tuple([None] * 3)
105 - self.residtest = tuple([None] * 2)
106 - self.Rsquared = None
107 - def __call__(self, x, *args):
108 - '''
109 - See class method `func'
110 - '''
111 - fargs = self.params()
112 - N = len(filter(None, fargs))
113 - if N + len(args) > len(fargs):
114 - raise TypeError('{} accepts only {} '
115 - 'parameters'.format(self.__class__.__name__, len(fargs)))
116 - for a in args:
117 - idx = fargs.index(None)
118 - fargs[idx] = a
119 - fargs = tuple(fargs)
120 - return self.func(x, *fargs)
121 - def fit(self, x, y, ye, **kwargs):
122 - '''
123 - Fits this parametric model to observations (x_i, y_i). Uncertainty in
124 - the y-estimates can be specified with argument `ye'. Additional keyword
125 - arguments are passed to scipy.optimize.curve_fit which in turn passes
126 - them to scipy.optimize.leastsq.
127 - '''
128 - fp0 = self.init(x, y)
129 - fargs = self.params()
130 - p0 = []
131 - for a, p in zip(fargs, fp0):
132 - if a is None:
133 - p0.append(p)
134 - p0 = tuple(p0)
135 - return curve_fit(self, x, y, sigma=ye, p0=p0, **kwargs)
136 - def gof(self, x, y, ye):
137 - '''
138 - Computes GoF test statistics and other diagnostical tests
139 -
140 - Returns:
141 - --------
142 - - GoF test: Chi^2, p-value, and ddof
143 - - Normality of residuals: K^2 and p-value
144 - '''
145 - res = {}
146 - resid = y - self(x)
147 - chisq = np.sum(((resid) / ye) ** 2)
148 - ddof = len(x) - len(filter(None, self.errors())) # number of estimated parameters
149 - chisq_pvalue = chisqprob(chisq, ddof)
150 - gof = (chisq, chisq_pvalue, ddof)
151 - resid = normaltest(resid)
152 - ym = y.mean()
153 - SStot = np.sum((y - ym) ** 2)
154 - SSerr = np.sum((y - self(x)) ** 2)
155 - Rsquared = 1.0 - SSerr / SStot
156 -# Besides being buggy, this test for homoscedasticity is supposed to work only
157 -# for linear regressions, hence is not suited for our case, but I'll keep it
158 -# here until I figure out an alternative. Remember to uncomment the import for
159 -# OLS ontop.
160 -# regresults = OLS(resid ** 2, np.c_[x, x**2]).fit()
161 -# LM =regresults.rsquared
162 -# LM_pvalue = chisqprob(LM, len(x) - ddof)
163 -# white = (LM, LM_pvalue)
164 -# return gof, resid, white
165 - return gof, resid, Rsquared
166 - def __str__(self):
167 - name = self.__class__.__name__
168 - prep = []
169 - for p in self.params():
170 - if p is not None:
171 - prep.append('%3.4g' % p)
172 - else:
173 - prep.append('*')
174 - return '{}({})'.format(name, ','.join(prep))
175 - def __repr__(self):
176 - return '<{} object at 0x{}>'.format(str(self), '%x' % id(self))
177 - def summary(self, **kwargs):
178 - '''
179 - Returns a summary of this model
180 - '''
181 - s = StringIO()
182 - print >> s, ''
183 - print >> s, 'General information'
184 - print >> s, '-------------------'
185 - print >> s, 'model: %s' % self.name.capitalize()
186 - print >> s, 'date: %s' % dt.datetime.now()
187 - for item in kwargs.items():
188 - print >> s, '{}: {}'.format(*map(str, item))
189 - print >> s, ''
190 - print >> s, 'Model parameters'
191 - print >> s, '----------------'
192 - for p, val, err in zip(self.__params__, self.params(), self.errors()):
193 - print >> s, '{}: {} ± {}'.format(p.name, _orNA(val), _orNA(err))
194 - chi, p, ddof = self.goftest
195 - print >> s, ''
196 - print >> s, 'Fit results'
197 - print >> s, '-----------'
198 - print >> s, 'Goodness-of-fit: Chi-squared = {}, p = {}, ddof = {}'.format(
199 - _orNA(chi, '%5.2f'), _orNA(p, '%8.4e'), _orNA(ddof, '%d'))
200 - D, p = self.residtest
201 - print >> s, 'Normality of residuals: K-squared = {}, p = {}'.format(
202 - _orNA(D, '%5.2f'), _orNA(p, '%8.4e'))
203 - print >> s, 'Coefficient of Determination: {}'.format(
204 - _orNA(self.Rsquared, '%5.2f'))
205 - return s.getvalue()
206 -
207 -class Expon(ParametricModel):
208 - '''
209 - y = A * exp( -(x / B)) + C
210 - '''
211 - __params__ = []
212 - A = Parameter('A', __params__)
213 - B = Parameter('B', __params__)
214 - C = Parameter('C', __params__)
215 - name = 'exponential'
216 - @staticmethod
217 - def func(x, a, b, c):
218 - return a * np.exp(-(x / b)) + c
219 - @staticmethod
220 - def init(x, y):
221 - a0 = y[np.argmin(np.abs(x))] # estimate for A = f(0)
222 - b0 = x.max() / 10.0
223 - c0 = y.min()
224 - return (a0, b0, c0)
225 - def fit(self, x, y, ye, **kwargs):
226 - if kwargs.pop('constrained', 0):
227 - self.A = y[np.argmin(np.abs(x))]
228 - return super(Expon, self).fit(x, y, ye, **kwargs)
229 -
230 -class StretchedExpon(ParametricModel):
231 - '''
232 - y = A * exp (-(t / tau) ** beta)
233 - '''
234 - __params__ = []
235 - A = Parameter('A', __params__)
236 - tau = Parameter('tau', __params__)
237 - beta = Parameter('beta', __params__)
238 - name = 'stretched exponential'
239 - @staticmethod
240 - def func(x, a, tau, beta):
241 - return a * np.exp(- (x / tau) ** beta)
242 - @staticmethod
243 - def init(x, y):
244 - a0 = y[np.argmin(np.abs(x))] # estimate for A = f(0)
245 - tau0 = x.max() / 10.0
246 - return (a0, tau0, 0.5)
247 - def fit(self, x, y, ye, **kwargs):
248 - if kwargs.pop('constrained', 0):
249 - self.A = y[np.argmin(np.abs(x))]
250 - return super(StretchedExpon, self).fit(x, y, ye, **kwargs)
251 - def summary(self, **kwargs):
252 - mrt = self.mrt(self.tau, self.beta)
253 - kwargs['Mean relaxation time <tau>'] = '%5.2f days' % mrt
254 - return super(StretchedExpon, self).summary(**kwargs)
255 - def mrt(self, tau, beta):
256 - return (tau / beta) * gamma(beta ** -1)
257 -
258 -class PowerLaw(ParametricModel):
259 - '''
260 - y = A * x ** B
261 - '''
262 - __params__ = []
263 - A = Parameter('A', __params__)
264 - B = Parameter('B', __params__)
265 - name = 'power-law'
266 - @staticmethod
267 - def func(x, a, b):
268 - return a * x ** b
269 - @staticmethod
270 - def init(x, y):
271 - return (1, y.ptp()/x.ptp())
272 -# NR says this code is more robust against roundoff errors, but presently it
273 -# does not work. Bummer.
274 -# def fit(self, x, y, ye, **kwargs):
275 -# x, y, ye = self._removezeros(x, y, ye)
276 -# ye = ye / y
277 -# x = np.log(x)
278 -# y = np.log(y)
279 -# S = np.sum(ye ** -1)
280 -# Sx = np.sum(x / ye)
281 -# Sy = np.sum(y / ye)
282 -# t = (ye ** -1) * (x - Sx / S)
283 -# Stt = np.sum(t ** 2)
284 -# b = Stt ** -1 * np.sum((y * t) / ye)
285 -# a = np.exp((Sy - Sx * b) / S)
286 -# a_var = S ** -1 * (1 + Sx ** 2 / (S * Stt))
287 -# b_var = Stt ** -1
288 -# ab_covar = - Sx / Stt
289 -# pcov = np.asarray([[a_var, ab_covar], [ab_covar, b_var]])
290 -# return (a, b), pcov
291 - def fit(self, x, y, ye, **kwargs):
292 - '''
293 - Fit by linear least squares of log-transformed data
294 - '''
295 - x, y, ye = self._removezeros(x, y, ye)
296 - ye = (ye / y) ** 2
297 - x = np.log(x)
298 - y = np.log(y)
299 - S = np.sum(ye ** -1)
300 - Sx = np.sum(x / ye)
301 - Sy = np.sum(y / ye)
302 - Sxx = np.sum(x ** 2 / ye)
303 - Sxy = np.sum((x * y) / ye)
304 - Delta = S * Sxx - Sx ** 2
305 - a = np.exp((Sxx * Sy - Sx * Sxy) / Delta)
306 - b = (S * Sxy - Sx * Sy) / Delta
307 - a_var = Sxx / Delta
308 - b_var = S / Delta
309 - ab_covar = - Sx / Delta
310 - pcov = np.asarray([[a_var, ab_covar], [ab_covar, b_var]])
311 - return (a, b), pcov
312 - def gof(self, x, y, ye):
313 - '''
314 - GoF of linear least squares of log-transformed data
315 - '''
316 - x, y, ye = self._removezeros(x, y, ye)
317 - ye = (ye / y)
318 - x = np.log(x)
319 - y = np.log(y)
320 - yp = np.log(self.A) + self.B * x
321 - chisq = np.sum(((yp - y) / ye) ** 2)
322 - ddof = len(x) - len(filter(None, self.errors())) # number of estimated parameters
323 - chisq_pvalue = chisqprob(chisq / 2., ddof)
324 - resid = normaltest(y - yp)
325 - ym = y.mean()
326 - SStot = np.sum((y - ym) ** 2)
327 - SSerr = np.sum((y - yp) ** 2)
328 - Rsquared = 1.0 - SSerr / SStot
329 - return (chisq, chisq_pvalue, ddof), resid, Rsquared
330 - @staticmethod
331 - def _removezeros(x, y, ye):
332 - idx = x > 0
333 - return x[idx], y[idx], ye[idx]
334 -
335 -if __name__ == '__main__':
336 -
337 - import matplotlib.pyplot as pp
338 - import scale
339 -
340 - model = StretchedExpon()
341 -
342 - a = 2
343 - tau = 100
344 - beta = .5
345 - c = 0.
346 - s = 0.1
347 - xmax = 1000
348 - x = np.linspace(0, xmax, 50)
349 - y = model(x, a, tau, beta, c) + np.random.randn(len(x)) * s
350 -
351 - pest, pcov = model.fit(x, y, s)
352 -
353 - model.setparams(*zip(pest, np.sqrt(np.diag(pcov))))
354 -
355 - xx = np.linspace(0, xmax, 1000)
356 - yy = model(xx)
357 -
358 - pp.errorbar(x, y, s, fmt='. ', color='k', ecolor='none', label='data')
359 - pp.plot(xx, yy, 'r-', label='Stretch. Exp. fit')
360 - pp.xscale('power', exponent=beta)
361 - pp.yscale('log')
362 -
363 - pp.legend()
364 - gof, resid, Rsquared = model.gof(x, y, s)
365 - model.goftest = gof
366 - model.residtest = resid
367 - model.Rsquared = Rsquared
368 - print model.summary()
369 - chi, p, ddof = gof
370 - pp.text(200, 1, r'$\chi^2 = %.2f,\, p-{\rm value} = %5.2g,\,'
371 - r'{\rm ddof} = %d,\, R^2 = %.2f$'
372 - % (chi,p,ddof, Rsquared),
373 - fontsize=16)
374 - pp.show()
Index: trunk/tools/wsor/editor_lifecycle/MANIFEST.in
@@ -1,3 +0,0 @@
2 -include *.py
3 -include *.sh
4 -include db.cfg
Index: trunk/tools/wsor/editor_lifecycle/relax
@@ -1,49 +0,0 @@
2 -#!/usr/bin/python
3 -#:vim:ft=python
4 -
5 -''' batch model fitting '''
6 -
7 -import re
8 -import os
9 -import sys
10 -import numpy as np
11 -from argparse import ArgumentParser
12 -from models import StretchedExpon
13 -from datetime import datetime
14 -
15 -__prog__ = os.path.basename(os.path.abspath(__file__))
16 -
17 -parser = ArgumentParser(description=__doc__)
18 -parser.add_argument('data', nargs='+')
19 -
20 -ns = parser.parse_args()
21 -
22 -output = []
23 -
24 -# XXX format should not be fixed to 2010-1 !!
25 -# TODO should use the len file
26 -
27 -for d in ns.data:
28 - k = re.match('(.*?)_.+\.?.*', d).groups()[0]
29 - t = datetime.strptime(k, '%Y-%m')
30 - if not os.path.exists(d):
31 - continue
32 - x, y, ye = np.loadtxt(d, unpack=1)
33 - model = StretchedExpon()
34 - model.A = y[0]
35 - idx = ye > 0
36 - x = x[idx]
37 - y = y[idx]
38 - ye = ye[idx]
39 - if len(x)>10: # st. dev. 0 means only 1 user
40 - pest, pcov = model.fit(x, y, ye, maxfev=100000, warning=False)
41 - perr = np.sqrt(np.diag(pcov)) / 2.
42 - model.setparams(*zip(pest,perr))
43 - mrt = model.mrt(model.tau, model.beta)
44 - else:
45 - mrt = np.nan
46 - output.append((t, mrt))
47 -
48 -output = np.asarray(output, dtype=np.dtype([('date', object), ('mrt', np.double)]))
49 -np.save('mrt.npy', output)
50 -print 'output saved to mrt.npy'
Index: trunk/tools/wsor/editor_lifecycle/fitting_batch.sh
@@ -1,43 +0,0 @@
2 -#!/bin/bash
3 -
4 -# Applies the `fitting' script to a batch of files
5 -#
6 -# author: Giovanni Luca Ciampaglia <gciampaglia@wikimedia.org>
7 -#
8 -# USAGE: fitting_batch.sh file1 file2 file3 ...
9 -#
10 -# This will produce the normal console output that fitting produces; PDF plots
11 -# will be stored in file fit.pdf (please note: no check against overwriting
12 -# existing versions is performed!)
13 -
14 -if [[ -z `type -p fitting` ]] ; then
15 - echo 'error: could not find fitting script. Check your PATH'
16 - exit 1
17 -fi
18 -
19 -if [[ -e fit.pdf ]] ; then
20 - echo 'error: cannot overwrite file fit.pdf'
21 - exit 1
22 -fi
23 -
24 -O=`mktemp -d`
25 -models="expon powerlaw stretchedexp"
26 -files="$@"
27 -
28 -for file in $files ; do
29 - for model in $models ; do
30 - fitting $model -force -loglog -batch $file -o $O/${file%.*}_$model.pdf
31 - echo
32 - echo
33 - done
34 -done
35 -
36 -pdfs=`ls $O/*.pdf | sort`
37 -
38 -gs -dNOPAUSE -sDEVICE=pdfwrite -sOUTPUTFILE=fit.pdf -dBATCH $pdfs &>/dev/null
39 -
40 -if [[ $? = 0 ]] ; then
41 - echo 'images saved in fit.pdf'
42 -else
43 - echo "error: problem saving fit.pdf. Individual image files in $O"
44 -fi
Index: trunk/tools/wsor/editor_lifecycle/scale.py
@@ -1,68 +0,0 @@
2 -from matplotlib.scale import ScaleBase, register_scale
3 -from matplotlib.transforms import Transform, nonsingular
4 -from matplotlib.ticker import LinearLocator, Formatter
5 -from math import ceil, floor
6 -import numpy as np
7 -
8 -class PowerScale(ScaleBase):
9 - name ='power'
10 - def __init__(self, axis, **kwargs):
11 - ScaleBase.__init__(self)
12 - exponent = kwargs.pop('exponent')
13 - if exponent <= 0:
14 - raise ValueError('exponent must be positive')
15 - self.exponent = exponent
16 - def get_transform(self):
17 - return PowerTransform(self.exponent)
18 - def set_default_locators_and_formatters(self, axis):
19 - axis.set_major_locator(PowerLocator(self.exponent))
20 - axis.set_major_formatter(PowerFormatter(self.exponent))
21 - axis.set_minor_formatter(PowerFormatter(self.exponent))
22 -
23 -class PowerLocator(LinearLocator):
24 - def __init__(self, exponent, **kwargs):
25 - LinearLocator.__init__(self, **kwargs)
26 - self.exponent = exponent
27 - self.numticks = 5
28 - def __call__(self):
29 - vmin, vmax = self.axis.get_view_interval()
30 - vmin, vmax = nonsingular(vmin, vmax, expander = 0.05)
31 - vmin = vmin ** self.exponent
32 - vmax = vmax ** self.exponent
33 - if vmax<vmin:
34 - vmin, vmax = vmax, vmin
35 -
36 - ticklocs = np.linspace(vmin, vmax, num=self.numticks, endpoint=True)
37 - return self.raise_if_exceeds(ticklocs ** (1.0 / self.exponent))
38 -
39 -class PowerFormatter(Formatter):
40 - def __init__(self, exponent):
41 - self.exponent = exponent
42 - def __call__(self, x, pos=None):
43 - return u'%.2g' % (x ** (1.0 / self.exponent))
44 -
45 -class PowerTransform(Transform):
46 - input_dims = 1
47 - output_dims = 1
48 - is_separable = True
49 - def __init__(self, exponent):
50 - Transform.__init__(self)
51 - self.exponent = exponent
52 - def transform(self, a):
53 - return a ** self.exponent
54 - def inverted(self):
55 - return PowerTransform(1.0 / self.exponent)
56 -
57 -register_scale(PowerScale)
58 -
59 -if __name__ == '__main__':
60 - from pylab import *
61 - import numpy as np
62 - tau = 20
63 - beta = 0.5
64 - x = np.linspace(0,100, num=10)
65 - y = np.exp(-(x / tau) ** beta)
66 - plot(x, y, 'o ', mfc='none', mew=2)
67 - xscale('power', exponent=beta)
68 - yscale('log', basey=10)
69 - show()
Index: trunk/tools/wsor/editor_lifecycle/lifecycle
@@ -1,158 +0,0 @@
2 -#!/usr/bin/python
3 -#:vim:ts=python:
4 -
5 -''' compute editor lifecycle '''
6 -
7 -import re
8 -import os
9 -from argparse import ArgumentParser
10 -import numpy as np
11 -from collections import deque
12 -import datetime as dt
13 -
14 -__prog__ = os.path.basename(os.path.abspath(__file__))
15 -
16 -def estimaterate(edits, step):
17 - '''
18 - This function takes the daily edit history of an individual editor, and a
19 - step parameter; it estimates the daily activity of the editor. It returns
20 - the daily rates every `step' days.
21 - '''
22 - N = len(edits)
23 - if N % step:
24 - NN = np.ceil(N / float(step)) * step
25 - tmp = np.zeros((NN,), dtype=edits.dtype)
26 - tmp[:N] = edits
27 - edits = tmp
28 - return edits.reshape((-1, step)).sum(axis=-1) / float(step)
29 -
30 -def itercycles(npzarchive, every, users=None):
31 - '''
32 - Iterates over the archive or over given list of users and returns estimated
33 - activity life cycle (see estimaterate())
34 - '''
35 - for uid in (users or npzarchive.files):
36 - days, edits = npzarchive[uid].T
37 - days = days - days.min()
38 - rates = estimaterate(edits, every)
39 - yield np.c_[days[::every], rates]
40 -
41 -def averagecycle(ratesbyday):
42 - '''
43 - Computes average cycle with standard errors. Takes in input a dictionary
44 - returned by groupbydayssince()
45 - '''
46 - all_days = sorted(ratesbyday.keys())
47 - result = deque()
48 - for d in all_days:
49 - s = ratesbyday[d]
50 - sqN = np.sqrt(len(s))
51 - result.append((d, np.mean(s), np.std(s)/np.sqrt(len(s))))
52 - return np.asarray(result)
53 -
54 -def groupbyday(npzarchive, every, users=None):
55 - '''
56 - This function estimates editors' activity rates and groups rate estimates by
57 - number of days elapsed since editor registration (which corresponds to time = 0)
58 - '''
59 - tmp = {}
60 - for cyclearr in itercycles(npzarchive, every, users):
61 - for d, r in cyclearr:
62 - try:
63 - tmp[d].append(r)
64 - except KeyError:
65 - tmp[d] = deque([r])
66 - return tmp
67 -
68 -def lifetimes(npzarchive, users=None):
69 - '''
70 - Returns the distribution of account lifetimes over an archive. Can take an
71 - optional list users ids to restrict the sample to a specific group of
72 - editors
73 - '''
74 - lt = deque()
75 - for uid in (users or npzarchive.files):
76 - days, edits = npzarchive[uid].T
77 - lt.append(days.ptp())
78 - return np.asarray(lt)
79 -
80 -def find_inactives(npzarchive, inactivity, minimum_activity, maximum_activity):
81 - now = dt.datetime.now().toordinal()
82 - epoch = dt.datetime(1970,1,1).toordinal()
83 - unix_now = now - epoch
84 - inactives = deque()
85 - for uid in npzarchive.files:
86 - days, edits = npzarchive[uid].T
87 - if days.ptp() <= inactivity:
88 - continue
89 - unix_last = days[-1]
90 - if (unix_now - unix_last) > inactivity:
91 - tot_edits = float(edits.sum())
92 - tot_days = float(days.ptp() - inactivity)
93 - activity = tot_edits / tot_days * 365.0
94 - if minimum_activity < activity and maximum_activity > activity:
95 - inactives.append(uid)
96 - return inactives
97 -
98 -parser = ArgumentParser(description=__doc__)
99 -parser.add_argument('data_file', metavar='data')
100 -parser.add_argument(metavar='minact', type=int, dest='minimum_activity')
101 -parser.add_argument(metavar='maxact', type=int, dest='maximum_activity')
102 -parser.add_argument('-key')
103 -parser.add_argument('-every', type=int, help='default: %(default)d days',
104 - default=30, metavar='NUM')
105 -parser.add_argument('-inactivity', type=int, default=180, help='default: '
106 - '%(default)d days', metavar='NUM')
107 -parser.add_argument('-all', dest='dump_all', action='store_true')
108 -
109 -
110 -def main(ns):
111 - if ns.key is None:
112 - m = re.match('(.*?)\.npz', ns.data_file, re.I)
113 - if m is not None:
114 - ns.key = m.groups()[0]
115 - else:
116 - print >> sys.stderr, '%s: cannot determine key from file name: %s'\
117 - % (__prog__, ns.data_file)
118 - sys.exit(1)
119 - if ns.minimum_activity >= ns.maximum_activity:
120 - print >> sys.stderr, '%s: error: minact >= maxact' % __prog__
121 - sys.exit(1)
122 -
123 - # load data
124 - npzarchive = np.load(ns.data_file)
125 -
126 - if ns.dump_all:
127 - fn = mkfn('cycles', ns, 'npz')
128 - values_iter = itercycles(npzarchive, ns.every)
129 - keys = npzarchive.files
130 - tmp = dict(zip(keys, list(values_iter)))
131 - np.savez(fn, **tmp)
132 - print '%s: output saved to %s' % (__prog__, fn)
133 - else:
134 - # compute lifetime distribution
135 - lt = lifetimes(npzarchive)
136 -
137 - # compute inactive subgroups
138 - inactive_users = find_inactives(npzarchive, ns.inactivity, ns.minimum_activity,
139 - ns.maximum_activity)
140 -
141 - ratesbyday = groupbyday(npzarchive, ns.every)
142 - ratesbyday_inact = groupbyday(npzarchive, ns.every, inactive_users)
143 -
144 - avg_all = averagecycle(ratesbyday)
145 - avg_inact = averagecycle(ratesbyday_inact)
146 -
147 - lens = [ len(npzarchive.files), len(inactive_users) ]
148 -
149 - names = ['lt', 'len', 'all', 'inact' ]
150 - arrs = [ lt, lens, avg_all, avg_inact ]
151 -
152 - for n, a in zip(names, arrs):
153 - fn = '%s_%s.%s' % (ns.key, n, 'tsv')
154 - np.savetxt(fn, a)
155 - print '%s: output saved to %s' % (__prog__, fn)
156 -
157 -if __name__ == '__main__':
158 - ns = parser.parse_args()
159 - main(ns)
Index: trunk/tools/wsor/editor_lifecycle/graphlife
@@ -1,85 +0,0 @@
2 -#!/usr/bin/python
3 -
4 -''' plot editor life cycle '''
5 -
6 -import sys
7 -import numpy as np
8 -from argparse import ArgumentParser
9 -import os
10 -
11 -__prog__ = os.path.basename(os.path.abspath(__file__))
12 -
13 -parser = ArgumentParser(description=__doc__)
14 -parser.add_argument('data_files', metavar='data', nargs='+')
15 -parser.add_argument('-l', '--label', metavar='TEXT', action='append',
16 - dest='labels_list', default=[])
17 -parser.add_argument('-inset', dest='inset_data_file', metavar='FILE')
18 -parser.add_argument('-batch', action='store_true', help='uses PDF backend')
19 -parser.add_argument('-title')
20 -parser.add_argument('-fmt', default='pdf', help='default: %(default)s')
21 -
22 -if __name__ == '__main__':
23 - ns = parser.parse_args()
24 -
25 - # checks
26 - if len(ns.data_files) != len(ns.labels_list):
27 - print >> sys.stderr, '%s: error: please provide as many labels '\
28 - 'as lines' % __prog__
29 - sys.exit(1)
30 -
31 - # import pyplot, make lists of colors and markers
32 - if ns.batch:
33 - import matplotlib
34 - matplotlib.use('PDF')
35 - import matplotlib.pyplot as pp
36 - from matplotlib.lines import lineMarkers as markers
37 - markers = dict(filter(
38 - lambda k : isinstance(k[0],str) and k[1] is not '_draw_nothing',
39 - markers.items())).keys()
40 - colors = 'krbgm'
41 -
42 - # create figure and axes
43 - fig = pp.figure()
44 - ax = pp.axes([.1, .1, .85, .8])
45 -
46 - # add lines
47 - N = len(ns.data_files)
48 - for i in xrange(N):
49 - data_file = ns.data_files[i]
50 - label = ns.labels_list[i]
51 - color = colors[i % N]
52 - marker= markers[i % N]
53 - x, y, ye = np.loadtxt(data_file, unpack=1)
54 - ax.errorbar(x, y, ye, color=color, marker=marker, mfc='none',
55 - mec=color, ls=':', label=label)
56 -
57 - ax.legend(loc=2)
58 - ax.set_xlabel('days since registration')
59 - ax.set_ylabel('edits/day')
60 - if ns.title is not None:
61 - ax.set_title(ns.title)
62 - ax.axis('tight')
63 -
64 - # plot hist of lifetimes in inset axes
65 - lt = np.loadtxt(ns.inset_data_file)
66 - inax = pp.axes([.55, .6, .35, .25], axisbg='none')
67 - inax.hist(lt, bins=20, fc='none', cumulative=-1, normed=0)
68 - for l in inax.xaxis.get_ticklabels():
69 - l.set_rotation(30)
70 - l.set_fontsize('x-small')
71 - for l in inax.yaxis.get_ticklabels():
72 - l.set_fontsize('x-small')
73 - inax.set_xlabel('lifespan $x$ (days)', fontsize='small')
74 - inax.set_ylabel('no. of users older\n more than $x$ days', fontsize='small')
75 - inax.set_title('account lifetime')
76 - inax.axis('tight')
77 -
78 - pp.draw()
79 - if ns.title is not None:
80 - fn = ns.title.replace(' ', '_').lower() + '.' + ns.fmt
81 - else:
82 - fn = 'output.' + ns.fmt
83 - print 'output saved to %s' % fn
84 -
85 - pp.savefig(fn, fmt=ns.fmt)
86 - pp.show()
Index: trunk/tools/wsor/editor_lifecycle/README.rst
@@ -1,34 +0,0 @@
2 -============
3 -README
4 -============
5 -
6 -workflow
7 -
8 -This package is a collection of python and shell scripts that can assist
9 -creating and analyzing data on user life cycle.
10 -
11 -Sample selection
12 -
13 -TBD
14 -
15 -Edit activity data collection
16 -
17 -First use `fetchrates` to download the rate data from the MySQL database. This
18 -script takes a user_id in input (and stores the rate data in a file called
19 -<user_id>.npy). This script can be parallelized. At the end you will end up with
20 -a bunch of NPY files.
21 -
22 -Cohort selection
23 -
24 -See the docstring in `mkcohort`.
25 -
26 -Cohort analysis
27 -
28 -See `graphlife`, `fitting`, `fitting_batch.sh`, and `relax`.
29 -
Index: trunk/tools/wsor/editor_lifecycle/fitting
@@ -1,125 +0,0 @@
2 -#!/usr/bin/python
3 -# coding: utf-8
4 -# :vim:ft=python
5 -
6 -''' editor lifecycle data fitting tool '''
7 -
8 -import sys
9 -import os
10 -from functools import partial
11 -import numpy as np
12 -from argparse import ArgumentParser
13 -from scipy.optimize import curve_fit
14 -
15 -from models import Expon, PowerLaw, StretchedExpon
16 -import scale
17 -
18 -__prog__ = os.path.basename(os.path.abspath(__file__))
19 -
20 -_maxfev = 10000
21 -
22 -parent = ArgumentParser(add_help=False)
23 -parent.add_argument('data_file', metavar='DATA')
24 -parent.add_argument('-output', dest='output_file', metavar='FILE')
25 -parent.add_argument('-title')
26 -group = parent.add_mutually_exclusive_group()
27 -group.add_argument('-loglog', action='store_true')
28 -group.add_argument('-loglin', action='store_true')
29 -parent.add_argument('-constrained', action='store_true')
30 -parent.add_argument('-batch', action='store_true', help='do not show graphics')
31 -parent.add_argument('-force', action='store_true', help='force overwrite')
32 -
33 -parser = ArgumentParser(description=__doc__)
34 -subparsers = parser.add_subparsers(help='Parametric models supported')
35 -
36 -parser_expon = subparsers.add_parser('expon', parents=[parent])
37 -parser_expon.set_defaults(modelclass=Expon)
38 -
39 -parser_stretch = subparsers.add_parser('stretchedexp', parents=[parent])
40 -parser_stretch.set_defaults(modelclass=StretchedExpon)
41 -
42 -parser_power = subparsers.add_parser('powerlaw', parents=[parent])
43 -parser_power.set_defaults(modelclass=PowerLaw)
44 -
45 -def plotfit(model, x, y, ye, data=None):
46 - xx = np.linspace(x.min(), x.max(), endpoint=True, num=1000)
47 - yy = model(xx)
48 - pp.errorbar(x, y, ye / 2, fmt='. ', label=data or 'data', color='k', ecolor='none')
49 - model_label = model.name.split()
50 - if len(model_label) > 1:
51 - model_label[1] = model_label[1][:3] + '.'
52 - model_label = ' '.join(model_label[:2]).capitalize()
53 - pp.plot(xx, yy, 'r--', label='{} fit'.format(model_label), lw=2.5)
54 - if ns.loglog:
55 - pp.xscale('log')
56 - pp.yscale('log')
57 - elif ns.loglin:
58 - pp.xscale('power', exponent=model.beta)
59 - pp.yscale('log')
60 - pp.legend(loc='best')
61 - if ns.title is not None:
62 - pp.title(ns.title)
63 - pp.xlabel('Days since registration')
64 - pp.ylabel('Edits/day')
65 -
66 - # residuals - uncomment lines to produce relative residuals plots
67 - pp.figure()
68 - r = model(x) - y
69 -# rm = r[True - np.isinf(r)].max()
70 -# r /= np.abs(rm)
71 - pp.axhline(y=0, c='k')
72 - pp.plot(x, r, '.:k')
73 - pp.title('Fit residuals')
74 - pp.xlabel('Days since registration')
75 -# pp.ylabel(r'Relative residual $\xi / \max{|\xi|}$')
76 -# pp.ylim(-1,1)
77 - pp.draw()
78 -
79 -def _testoverwrite(*files):
80 - exit_flag = False
81 - for fn in files:
82 - if os.path.exists(fn):
83 - exit_flag = True
84 - print '%s: error: cannot overwrite %s' % (__prog__, fn)
85 - if exit_flag:
86 - sys.exit(1)
87 -
88 -def main(ns):
89 - x, y, ye = np.loadtxt(ns.data_file, unpack=True)
90 - model = ns.modelclass()
91 - if ns.constrained:
92 - pest, pcov = model.fit(x, y, ye=ye, maxfev=_maxfev, constrained=1)
93 - else:
94 - pest, pcov = model.fit(x, y, ye=ye, maxfev=_maxfev)
95 - perr = np.sqrt(np.diag(pcov)) / 2.
96 - model.setparams(*zip(pest, perr))
97 - gof, resid, Rsquared = model.gof(x, y, ye)
98 - model.goftest = gof
99 - model.residtest = resid
100 - model.Rsquared = Rsquared
101 - print model.summary(dataset=ns.data_file, observations=len(x))
102 - plotfit(model, x, y, ye, data=os.path.splitext(ns.data_file)[0])
103 - if ns.output_file is not None:
104 - fn, ext = os.path.splitext(ns.output_file)
105 - fmt = ext[1:]
106 - if ns.batch and fmt.lower() != 'pdf':
107 - print '%s: error: batch mode supports only PDF format' % __prog__
108 - sys.exit(1)
109 - resid_output_file = fn + '_residuals' + ext
110 - if not ns.force:
111 - _testoverwrite(ns.output_file, resid_output_file)
112 - pp.figure(1)
113 - pp.savefig(ns.output_file, format=fmt)
114 - print '%s: output saved to %s' % (__prog__, ns.output_file)
115 - pp.figure(2)
116 - pp.savefig(resid_output_file, format=fmt)
117 - print '%s: output saved to %s' % (__prog__, resid_output_file)
118 - pp.show()
119 -
120 -if __name__ == '__main__':
121 - ns = parser.parse_args()
122 - if ns.batch:
123 - import matplotlib
124 - matplotlib.use('PDF')
125 - import matplotlib.pyplot as pp
126 - main(ns)
Index: trunk/tools/wsor/editor_lifecycle/mkcohort
@@ -1,118 +0,0 @@
2 -#!/usr/bin/python
3 -# coding: utf-8
4 -# :vim:ft=python
5 -
6 -''' creates cohort files, filtering out bots '''
7 -
8 -'''
9 -This script reads an index file, which is a tab-separated text file like the
10 -following:
11 -
12 - 34 WojPob 20010129110725 2524
13 - 94 AstroNomer 20010207222248 1532
14 - 43 Lee Daniel Crocker 20010314020407 4388
15 - 86 Stephen Gilbert 20010326191355 3599
16 - 3 Tobias Hoevekamp 20010326202105 1903
17 - 1273 Wathiik 20010510171751 1772
18 - 3371 Arno 20010721180708 2700
19 - 122 Ap 20010722201619 2137
20 - 182 Rjstott 20010726102546 2602
21 - 64 Uriyan 20010727141651 1634
22 -
23 -Where fields are: id, name, date, count. Dates are parsed using dateutil, so
24 -other formats are allowed too (e.g. 2010-01-29 11:07:25). Currently the last
25 -column (editcount) is not used, but the script still expects to find it, so you
26 -can put whatever you want in it.
27 -
28 -The script will aggregate users based on the date field and will lookup for
29 -files of the form <id>.npy in the current directory. These files contain the
30 -daily edits count for any individual user, stored using the NumPy binary array
31 -format. If data files are not in the current directory, a different path can be
32 -specified from the command line (-P/--datapath)
33 -
34 -Based on the level of aggregation (say: months), the script will create a
35 -compressed ZIP archive with the user edit counts data files (e.g.: 2010-01.npz
36 -for all users from January 2010). This compressed archive can be later processed
37 -with the script `fitting' or with the load() function from NumPy.
38 -
39 -The script will produce in output the name of the produced files, how many users
40 -it contains, and how many suspected BOT users it filtered out from the index
41 -(use --bot to include them). The script filters out a user base on the name
42 -field: if the name contains the pattern 'bot' at the beginning or at the end of
43 -any word, it will be filtered out (e.g. "Botuser IV" will match, but "Francis
44 -Abbott" won't).
45 -
46 -Please note that the index file must be already sorted by date, in order for the
47 -group by date aggregation to work. You can use `sort' from the commmand line,
48 -e.g.:
49 -
50 - $~ sort -t$'\t' -k3 -h unsorted.tsv
51 -
52 -should sort file unsorted.tsv.
53 -'''
54 -
55 -import re
56 -import os
57 -import sys
58 -import csv
59 -from argparse import ArgumentParser, FileType
60 -from contextlib import closing
61 -from itertools import groupby
62 -from dateutil.parser import parser as DateParser
63 -from zipfile import ZipFile
64 -
65 -__prog__ = os.path.basename(os.path.abspath(__file__))
66 -_botpat = r'\bbot|bot\b'
67 -_fields = ['id', 'name', 'date', 'count']
68 -
69 -def yearkey(date):
70 - return date.year,
71 -
72 -def monthkey(date):
73 - return date.year, date.month
74 -
75 -def daykey(date):
76 - return date.year, date.month, date.day
77 -
78 -parser = ArgumentParser(description=__doc__)
79 -parser.add_argument('index', type=FileType('r'), help='*must* be already sorted')
80 -group = parser.add_mutually_exclusive_group(required=1)
81 -group.add_argument('--year', help='group by year', action='store_const',
82 - const=yearkey, dest='keyfunc')
83 -group.add_argument('--month', help='group by month', action='store_const',
84 - const=monthkey, dest='keyfunc')
85 -group.add_argument('--day', help='group by day', action='store_const',
86 - const=daykey, dest='keyfunc')
87 -parser.add_argument('--bots', action='store_true', help='do NOT filter out bots')
88 -parser.add_argument('-P', '--datapath', help='data files location',
89 - default=os.path.curdir)
90 -
91 -dateparser = DateParser()
92 -
93 -if __name__ == '__main__':
94 - ns = parser.parse_args()
95 - reader = csv.DictReader(ns.index, _fields, dialect='excel-tab')
96 -
97 - def _keyfunc(row):
98 - date = dateparser.parse(row['date'])
99 - return ns.keyfunc(date)
100 -
101 - for key, subiter in groupby(reader, _keyfunc):
102 - tot_users = 0
103 - tot_bots = 0
104 - datestr = '-'.join(map(lambda k : '%02d' % k, key)) # (2010,1) -> '2010-01'
105 - zipfn = '{}.npz'.format(datestr)
106 - with closing(ZipFile(zipfn, 'w')) as zf:
107 - for row in subiter:
108 - user_id = row['id']
109 - if ns.bots or (re.search(_botpat, row['name'], re.I) is None):
110 - fn = os.path.join(ns.datapath, '{}.npy'.format(user_id ))
111 - if os.path.exists(fn):
112 - zf.write(fn, user_id)
113 - else:
114 - print >> sys.stderr, '%s: warning: missing %s' %\
115 - (__prog__, fn)
116 - else:
117 - tot_bots += 1
118 - tot_users += 1
119 - print '%s created (users: %5d, bots %5d)' % (zipfn, tot_users, tot_bots)
Index: trunk/tools/wsor/editor_lifecycle/setup.py
@@ -1,10 +0,0 @@
2 -from distutils.core import setup
3 -
4 -setup(
5 - name='lifecycle',
6 - description='WMF summer of research project',
7 - version='0.0.0',
8 - author='Giovanni Luca Ciampaglia',
9 - author_email='gciampaglia@wikimedia.org',
10 - scripts=['fetchrates', 'graphlife', 'fetchcohort']
11 -)
Index: trunk/tools/wsor/editor_lifecycle/fetchcohort
@@ -1,59 +0,0 @@
2 -#!/usr/bin/python
3 -# vim:ft=python:
4 -# coding : utf-8
5 -
6 -''' fetches a cohort based on year of registration and editing activity '''
7 -
8 -from argparse import ArgumentParser
9 -from oursql import connect
10 -import os
11 -import sys
12 -import datetime as dt
13 -import csv
14 -
15 -prog = os.path.basename(os.path.abspath(__file__))
16 -
17 -parser = ArgumentParser(description=__doc__, fromfile_prefix_chars='@')
18 -parser.add_argument('registration_year', metavar='year', type=int)
19 -parser.add_argument('min_activity', metavar='minedits', type=int)
20 -parser.add_argument('max_activity', metavar='maxedits', type=int)
21 -parser.add_argument('-c', '--config', dest='config_file')
22 -parser.add_argument('-l', '--limit', type=int)
23 -
24 -query = '''
25 -select
26 - user_id,
27 - user_name,
28 - user_registration,
29 - user_editcount
30 -from user u left join user_groups ug
31 -on u.user_id = ug.ug_user
32 -where
33 - (ug_group <> 'bot' or ug_user is null)
34 - and year(user_registration) = ?
35 - and user_editcount > ?
36 - and user_editcount < ?
37 -'''
38 -
39 -if __name__ == '__main__':
40 - ns = parser.parse_args()
41 - if ns.min_activity >= ns.max_activity:
42 - print >> sys.stderr, '%s: error: min_activity >= max_activity' % prog
43 - sys.exit(1)
44 - if ns.registration_year < 2001 or ns.registration_year > dt.datetime.now().year:
45 - print >> sys.stderr, '%s: error: illegal year: %d' % (prog,
46 - ns.registration_year)
47 - sys.exit(1)
48 -
49 - if ns.limit is not None:
50 - query += 'limit %d' % ns.limit
51 -
52 - if ns.config_file is None:
53 - ns.config_file = os.path.expanduser('~/.my.cnf')
54 -
55 - conn = connect(read_default_file=ns.config_file)
56 - writer = csv.writer(sys.stdout, dialect='excel-tab')
57 - cursor = conn.cursor()
58 - cursor.execute(query, (ns.registration_year, ns.min_activity, ns.max_activity))
59 - for row in cursor:
60 - writer.writerow(row)
Index: trunk/tools/wsor/editor_lifecycle/fetchrates
@@ -1,69 +0,0 @@
2 -#!/usr/bin/python
3 -
4 -import sys
5 -from oursql import connect
6 -from argparse import ArgumentParser
7 -import numpy as np
8 -import os
9 -from time import time
10 -
11 -parser = ArgumentParser(description=__doc__)
12 -parser.add_argument('user_id', type=int)
13 -parser.add_argument('-config', dest='config_file')
14 -parser.add_argument('-outdir', dest='output_dir', default=os.curdir)
15 -
16 -# TODO get also deleted revisions!
17 -
18 -query = """
19 -select unix_timestamp(rev_timestamp)/86400.0
20 -from revision
21 -where rev_user = ?
22 -order by rev_timestamp
23 -"""
24 -
25 -prog = os.path.basename(os.path.abspath(__file__))
26 -
27 -def main(ns):
28 - # get mysql client configuration file
29 - mycnf = os.path.expanduser('~/.my.cnf')
30 - if ns.config_file is None and not os.path.exists(mycnf):
31 - print >> sys.stderr, '%s: no config file specified and $HOME/.my.cnf'
32 - ' not found' % prog
33 - sys.exit(1)
34 - elif ns.config_file is None:
35 - ns.config_file = mycnf
36 -
37 - # test output directory exists
38 - if not os.path.exists(ns.output_dir):
39 - print >> sys.stderr, '%s: output directory does not exist: %s' % (
40 - prog, ns.output_dir)
41 - sys.exit(1)
42 - if not os.path.isdir(ns.output_dir):
43 - print >> sys.stderr, '%s: not a directory: %s' % (prog, ns.output_dir)
44 -
45 - # start timer
46 - tstart = time()
47 -
48 - # connect run query
49 - conn = connect(read_default_file=ns.config_file)
50 - cursor = conn.cursor()
51 - cursor.execute(query, (ns.user_id,))
52 -
53 - # compute rates and save to file
54 - revs = np.asfarray(list(cursor))
55 - m, M = np.floor(revs.min()), np.ceil(revs.max())
56 - rates, days = np.histogram(revs, range=(m,M), bins=(M-m))
57 - data = np.c_[days[:-1], rates]
58 - out_path = os.path.join(ns.output_dir, '%d.npy' % ns.user_id)
59 - np.save(out_path, data)
60 -
61 - # stop timer
62 - tstop = time()
63 - print '%s: output saved to %s (execution time: %g sec, fetched: %d rows)' % (
64 - prog, out_path, tstop - tstart, len(revs))
65 -
66 -if __name__ == '__main__':
67 - # get arguments from command line
68 - ns = parser.parse_args()
69 - main(ns)
70 -

Status & tagging log