Index: trunk/tools/wsor/editor_lifecycle/relax |
— | — | @@ -1,81 +0,0 @@ |
2 | | -#!/usr/bin/python |
3 | | -#:vim:ft=python |
4 | | - |
5 | | -''' batch model fitting (usable with xargs)''' |
6 | | - |
7 | | -import re |
8 | | -import os |
9 | | -import sys |
10 | | -import numpy as np |
11 | | -from argparse import ArgumentParser |
12 | | - |
13 | | -from lifecycle.models import StretchedExpon |
14 | | - |
15 | | -__prog__ = os.path.basename(__file__) |
16 | | - |
17 | | -parser = ArgumentParser(description=__doc__) |
18 | | -parser.add_argument('data_file', metavar='data') |
19 | | -parser.add_argument('-m', '--min-size', type=int, default=0) |
20 | | -parser.add_argument('-c', '--constrain', choices=['head', 'tail', 'both']) |
21 | | -parser.add_argument('--maxfev', type=int, default=10000) |
22 | | -parser.add_argument('--debug', action='store_true') |
23 | | -parser.add_argument('--split-name', action='store_true', help='split input file' |
24 | | - ' name into date and a rate part.') |
25 | | - |
26 | | -if __name__ == '__main__': |
27 | | - # parse command line |
28 | | - ns = parser.parse_args() |
29 | | - |
30 | | - # read data, filter data |
31 | | - x, y, ye, n = np.loadtxt(ns.data_file, unpack=1) |
32 | | - idx = (ye > 0) * (n > ns.min_size) |
33 | | - if idx.sum() == 0: |
34 | | - print >> sys.stderr, '%s: error: no data meeting requirements in %s'\ |
35 | | - % (__prog__, ns.data_file) |
36 | | - sys.exit(1) |
37 | | - if idx.sum() < 4: |
38 | | - print >> sys.stderr, '%s: error: non identifiable data in %s'\ |
39 | | - % (__prog__, ns.data_file) |
40 | | - sys.exit(1) |
41 | | - |
42 | | - # create model, set constraints |
43 | | - model = StretchedExpon() |
44 | | - if ns.constrain in ['head', 'both']: |
45 | | - model.A = y[np.argmin(np.abs(x))] |
46 | | - if ns.constrain in ['tail', 'both']: |
47 | | - model.C = y.min() |
48 | | - |
49 | | - # fit model |
50 | | - try: |
51 | | - pest, pcov = model.fit(x[idx], y[idx], ye[idx], maxfev=ns.maxfev, warning=False) |
52 | | - except ValueError, e: |
53 | | - print >> sys.stderr, '%s: error: "%s" when fitting %s' % (__prog__, |
54 | | - e.message, ns.data_file) |
55 | | - if ns.debug: |
56 | | - raise |
57 | | - else: |
58 | | - sys.exit(1) |
59 | | - if np.isscalar(pcov) or np.isinf(pcov).any(): |
60 | | - print >> sys.stderr, '%s: error: bad covariance matrix in %s' % (\ |
61 | | - __prog__, ns.data_file) |
62 | | - sys.exit(1) |
63 | | - |
64 | | - # compute errors, MRT |
65 | | - perr = np.sqrt(np.diag(pcov)) / 2. |
66 | | - model.setparams(*zip(pest,perr)) |
67 | | - mrt = model.mrt(model.tau, model.beta) |
68 | | - N = len(model.__params__) |
69 | | - params = np.empty((N * 2 + 1,), dtype=float) |
70 | | - params[:N] = [ model.A, model.tau, model.beta, model.C ] |
71 | | - params[N:2*N] = map(lambda k : k or np.nan, [ model.A_err, model.tau_err, |
72 | | - model.beta_err, model.C_err ]) |
73 | | - params[-1] = mrt |
74 | | - |
75 | | - # print output line |
76 | | - key, _ = os.path.splitext(ns.data_file) |
77 | | - if ns.split_name: |
78 | | - key = key.split('_') |
79 | | - else: |
80 | | - key = [ key ] |
81 | | - params = map(lambda k : '%12.5g' % k, params) |
82 | | - print '\t'.join(key + params) |
Index: trunk/tools/wsor/editor_lifecycle/fitting_batch.sh |
— | — | @@ -1,43 +0,0 @@ |
2 | | -#!/bin/bash |
3 | | - |
4 | | -# Applies the `fitting' script to a batch of files |
5 | | -# |
6 | | -# author: Giovanni Luca Ciampaglia <gciampaglia@wikimedia.org> |
7 | | -# |
8 | | -# USAGE: fitting_batch.sh file1 file2 file3 ... |
9 | | -# |
10 | | -# This will produce the normal console output that fitting produces; PDF plots |
11 | | -# will be stored in file fit.pdf (please note: no check against overwriting |
12 | | -# existing versions is performed!) |
13 | | - |
14 | | -if [[ -z `type -p fitting` ]] ; then |
15 | | - echo 'error: could not find fitting script. Check your PATH' |
16 | | - exit 1 |
17 | | -fi |
18 | | - |
19 | | -if [[ -e fit.pdf ]] ; then |
20 | | - echo 'error: cannot overwrite file fit.pdf' |
21 | | - exit 1 |
22 | | -fi |
23 | | - |
24 | | -O=`mktemp -d` |
25 | | -models="expon powerlaw stretchedexp" |
26 | | -files="$@" |
27 | | - |
28 | | -for file in $files ; do |
29 | | - for model in $models ; do |
30 | | - fitting $model -force -loglog -batch $file -o $O/${file%.*}_$model.pdf |
31 | | - echo |
32 | | - echo |
33 | | - done |
34 | | -done |
35 | | - |
36 | | -pdfs=`ls $O/*.pdf | sort` |
37 | | - |
38 | | -gs -dNOPAUSE -sDEVICE=pdfwrite -sOUTPUTFILE=fit.pdf -dBATCH $pdfs &>/dev/null |
39 | | - |
40 | | -if [[ $? = 0 ]] ; then |
41 | | - echo 'images saved in fit.pdf' |
42 | | -else |
43 | | - echo "error: problem saving fit.pdf. Individual image files in $O" |
44 | | -fi |
Index: trunk/tools/wsor/editor_lifecycle/timechart |
— | — | @@ -0,0 +1,79 @@ |
| 2 | +#!/usr/bin/python |
| 3 | + |
| 4 | +''' plots cohort rate date ''' |
| 5 | + |
| 6 | +import os |
| 7 | +import sys |
| 8 | + |
| 9 | +import numpy as np |
| 10 | +import matplotlib.pyplot as pp |
| 11 | + |
| 12 | +from argparse import ArgumentParser |
| 13 | +from matplotlib.font_manager import FontProperties |
| 14 | + |
| 15 | +__prog__ = os.path.basename(__file__) |
| 16 | + |
| 17 | +parser = ArgumentParser(description=__doc__) |
| 18 | +parser.add_argument('data_paths', metavar='data', nargs='+') |
| 19 | +parser.add_argument('-m', '--minsize', type=int, default=0) |
| 20 | +parser.add_argument('-o', '--output', dest='output_path', metavar='FILE') |
| 21 | +parser.add_argument('-T', '--title') |
| 22 | + |
| 23 | +markers = 'ov^<>sp*+xD' |
| 24 | +colors = 'bgrcmykw' |
| 25 | + |
| 26 | +if __name__ == '__main__': |
| 27 | + ns = parser.parse_args() |
| 28 | + |
| 29 | + # create figure |
| 30 | + fig = pp.figure(figsize=(10,4)) |
| 31 | + ax = fig.add_axes(pp.axes([.1,.1,.65,.8], axisbg='ghostwhite')) |
| 32 | + M = len(markers) |
| 33 | + C = len(colors) |
| 34 | + |
| 35 | + lines = [] |
| 36 | + |
| 37 | + for i, path in enumerate(ns.data_paths): |
| 38 | + |
| 39 | + # load cohort data and filter out estimates based on samples with size |
| 40 | + # smaller than minimum requested |
| 41 | + days, rate, rate_err, size = np.loadtxt(path, delimiter='\t', unpack=1) |
| 42 | + idx = size >= ns.minsize |
| 43 | + days = days[idx] |
| 44 | + rate = rate[idx] |
| 45 | + rate_err = rate_err[idx] |
| 46 | + if len(days) == 0: |
| 47 | + print >> sys.stderr, '%s: warning: skipping empty dataset: %s' % \ |
| 48 | + (__prog__, path) |
| 49 | + continue |
| 50 | + |
| 51 | + # plot errorbars |
| 52 | + l, (wu, wd), mc = ax.errorbar(days, rate, rate_err, marker=markers[i % M], color=colors[i |
| 53 | + % C], label=os.path.splitext(path)[0].replace('_',' '), |
| 54 | + ecolor='none', ls=':', lw=2) |
| 55 | + lines.append(l) |
| 56 | + pp.setp(wd, ls='none') |
| 57 | + |
| 58 | + # decorate figure |
| 59 | + pp.xlabel('days since registration') |
| 60 | + pp.ylabel('edits/day') |
| 61 | + pp.figlegend(lines, [ l.get_label() for l in lines ], |
| 62 | + loc='center right', prop=FontProperties(size='small')) |
| 63 | + pp.minorticks_on() |
| 64 | + pp.grid("on") |
| 65 | + pp.axis('tight') |
| 66 | + |
| 67 | + pp.draw() |
| 68 | + if ns.title is not None: |
| 69 | + pp.title(ns.title) |
| 70 | + pp.draw() |
| 71 | + |
| 72 | + # save to file, is output path specified |
| 73 | + if ns.output_path is not None: |
| 74 | + _, ext = os.path.splitext(ns.output_path) |
| 75 | + fmt = ext.strip('.') or 'pdf' |
| 76 | + pp.savefig(ns.output_path, fmt=ext) |
| 77 | + print '%s: output saved to %s' % (__prog__, ns.output_path) |
| 78 | + |
| 79 | + pp.show() |
| 80 | + |
Property changes on: trunk/tools/wsor/editor_lifecycle/timechart |
___________________________________________________________________ |
Added: svn:executable |
1 | 81 | + * |
Index: trunk/tools/wsor/editor_lifecycle/mrtchart |
— | — | @@ -1,4 +1,5 @@ |
2 | 2 | #!/usr/bin/python |
| 3 | +# coding: utf8 |
3 | 4 | |
4 | 5 | import re |
5 | 6 | import os |
— | — | @@ -16,7 +17,10 @@ |
17 | 18 | |
18 | 19 | parser = ArgumentParser(description=__doc__) |
19 | 20 | parser.add_argument('data_paths', metavar='data', nargs='+') |
20 | | -parser.add_argument('-title',) |
| 21 | +parser.add_argument('-o', '--output', dest='output_path', metavar='FILE') |
| 22 | +parser.add_argument('-T', '--title') |
| 23 | +parser.add_argument('-s', '--significance', default=0, help='plot MRT only ' |
| 24 | + 'for fits with significance >= %(metavar)s', metavar='LEVEL', type=float) |
21 | 25 | |
22 | 26 | markers = 'ov^<>sp*+xD' |
23 | 27 | colors = 'bgrcmykw' |
— | — | @@ -30,49 +34,69 @@ |
31 | 35 | ''' detect extension and loads data using numpy.io functions ''' |
32 | 36 | _, ext = os.path.splitext(path) |
33 | 37 | if re.match('^\.npy$', ext, re.I): |
34 | | - return np.load(path)[:,[0,-1]] |
| 38 | + return np.load(path) |
35 | 39 | elif re.match('^\.tsv$', ext, re.I) or re.match('^\.txt$', ext, re.I): |
36 | 40 | default = datetime(2001,1,1) |
37 | 41 | def parse(timestamp): |
38 | 42 | return dateparser.parse(timestamp, default) |
39 | | - conv = {0: parse, -1: float} |
| 43 | + conv = {0: parse, 1: int, 2: float, 3: float, 4: float, 5: float} |
40 | 44 | data = np.loadtxt(path, delimiter='\t', converters=conv, dtype=object) |
41 | | - return data[:, [0,-1]] |
| 45 | + return data |
42 | 46 | raise UnsupportedFileFormatError(path) |
43 | 47 | |
44 | | -def clean(data): |
45 | | - data = filter(lambda k : k[1] < 3000 and k[1] > 0, data) |
| 48 | +def clean(data, level=0): |
| 49 | + data = filter(lambda k : k[2] < 3000 and k[2] > 0 and k[-1] >= level, data) |
46 | 50 | data = sorted(data, key=lambda k : k[0]) |
47 | | - return zip(*data) |
| 51 | + return np.asarray(data) |
48 | 52 | |
49 | 53 | def main(ns): |
50 | 54 | |
51 | 55 | # create figure and axis |
52 | | - fig = pp.figure(figsize=(8,4)) |
53 | | - ax = fig.add_axes(pp.axes([.15,.1,.8,.8], axisbg='antiquewhite')) |
| 56 | + fig = pp.figure(figsize=(10,4)) |
| 57 | + ax = fig.add_axes(pp.axes([.1,.1,.65,.8], axisbg='ghostwhite')) |
54 | 58 | M = len(markers) |
55 | 59 | C = len(colors) |
56 | 60 | |
| 61 | + print 'date: %s' % datetime.now() |
| 62 | + |
57 | 63 | # plot lines |
58 | 64 | for i, path in enumerate(ns.data_paths): |
59 | 65 | try: |
60 | 66 | name, ext = os.path.splitext(path) |
61 | 67 | name = name.replace('_',' ') |
62 | | - data = load(path) |
63 | | - dates, mrt = clean(data) |
64 | | - ax.plot(dates, mrt, ':'+markers[i % M]+colors[i % C], label=name) |
| 68 | + data = clean(load(path), ns.significance) |
| 69 | + if len(data): |
| 70 | + dates, a, mrt, R2, Chi2, pval = data.T |
| 71 | + ax.plot(dates, mrt, ':'+markers[i % M]+colors[i % C], label=name) |
| 72 | + print 'dataset: %s, average R^2: %.5g, points at α > %d%%: %d'\ |
| 73 | + % (path, np.mean(R2), ns.significance * 100, len(data)) |
| 74 | + else: |
| 75 | + print 'dataset: %s, average R^2: N/A, points at %d%%: 0'\ |
| 76 | + % (path, ns.significance * 100) |
65 | 77 | except UnsupportedFileFormatError,e: |
66 | 78 | print >> sys.stderr, '%s: error: unsupported file type %s (.npy,'\ |
67 | 79 | '.tsv, .txt accepted)' % (__prog__, e.args[0]) |
68 | 80 | sys.exit(1) |
69 | 81 | |
70 | | - pp.title('100-500 edits/year') |
| 82 | + |
| 83 | + # decorate |
71 | 84 | pp.ylabel('average time to inactivity') |
72 | 85 | pp.xlabel('') |
73 | | - pp.legend(loc='best', prop=FontProperties(size='small')) |
| 86 | + pp.figlegend(ax.lines, [ l.get_label() for l in ax.lines ], |
| 87 | + loc='center right', prop=FontProperties(size='small')) |
74 | 88 | pp.minorticks_on() |
75 | 89 | pp.grid("on") |
76 | | - pp.savefig('test.pdf') |
| 90 | + if ns.title is not None: |
| 91 | + pp.title(ns.title) |
| 92 | + pp.draw() |
| 93 | + |
| 94 | + # save to file, is output path specified |
| 95 | + if ns.output_path is not None: |
| 96 | + _, ext = os.path.splitext(ns.output_path) |
| 97 | + fmt = ext.strip('.') or 'pdf' |
| 98 | + pp.savefig(ns.output_path, fmt=ext) |
| 99 | + print '%s: output saved to %s' % (__prog__, ns.output_path) |
| 100 | + |
77 | 101 | pp.show() |
78 | 102 | |
79 | 103 | if __name__ == '__main__': |
Index: trunk/tools/wsor/editor_lifecycle/sefit |
— | — | @@ -0,0 +1,78 @@ |
| 2 | +#!/usr/bin/python |
| 3 | +#:vim:ft=python |
| 4 | + |
| 5 | +''' batch model fitting (usable with xargs)''' |
| 6 | + |
| 7 | +import re |
| 8 | +import os |
| 9 | +import sys |
| 10 | +import numpy as np |
| 11 | +from argparse import ArgumentParser |
| 12 | + |
| 13 | +from lifecycle.models import StretchedExpon |
| 14 | + |
| 15 | +__prog__ = os.path.basename(__file__) |
| 16 | + |
| 17 | +parser = ArgumentParser(description=__doc__) |
| 18 | +parser.add_argument('data_file', metavar='data') |
| 19 | +parser.add_argument('-m', '--min-size', type=int, default=0) |
| 20 | +parser.add_argument('-c', '--constrain', choices=['head', 'tail', 'both']) |
| 21 | +parser.add_argument('--maxfev', type=int, default=10000) |
| 22 | +parser.add_argument('--debug', action='store_true') |
| 23 | + |
| 24 | +if __name__ == '__main__': |
| 25 | + # parse command line |
| 26 | + ns = parser.parse_args() |
| 27 | + |
| 28 | + # read data, filter data |
| 29 | + x, y, ye, n = np.loadtxt(ns.data_file, unpack=1) |
| 30 | + idx = (ye > 0) * (n > ns.min_size) |
| 31 | + if idx.sum() == 0: |
| 32 | + print >> sys.stderr, '%s: error: no data meeting requirements in %s'\ |
| 33 | + % (__prog__, ns.data_file) |
| 34 | + sys.exit(1) |
| 35 | + if idx.sum() < 4: |
| 36 | + print >> sys.stderr, '%s: error: non identifiable data in %s'\ |
| 37 | + % (__prog__, ns.data_file) |
| 38 | + sys.exit(1) |
| 39 | + x = x[idx] |
| 40 | + y = y[idx] |
| 41 | + ye = ye[idx] |
| 42 | + |
| 43 | + # create model, set fit constraints if any |
| 44 | + model = StretchedExpon() |
| 45 | + if ns.constrain in ['head', 'both']: |
| 46 | + model.A = y[np.argmin(np.abs(x))] |
| 47 | + if ns.constrain in ['tail', 'both']: |
| 48 | + model.C = y.min() |
| 49 | + |
| 50 | + # fit model to data |
| 51 | + try: |
| 52 | + pest, pcov = model.fit(x, y, ye, maxfev=ns.maxfev, warning=False) |
| 53 | + except ValueError, e: |
| 54 | + print >> sys.stderr, '%s: error: "%s" when fitting %s' % (__prog__, |
| 55 | + e.message, ns.data_file) |
| 56 | + if ns.debug: |
| 57 | + raise |
| 58 | + else: |
| 59 | + sys.exit(1) |
| 60 | + if np.isscalar(pcov) or np.isinf(pcov).any(): |
| 61 | + print >> sys.stderr, '%s: error: bad covariance matrix in %s' % (\ |
| 62 | + __prog__, ns.data_file) |
| 63 | + sys.exit(1) |
| 64 | + |
| 65 | + # compute errors, MRT, GoF, coefficient of determination |
| 66 | + perr = np.sqrt(np.diag(pcov)) / 2. |
| 67 | + model.setparams(*zip(pest,perr)) |
| 68 | + mrt = model.mrt(model.tau, model.beta) |
| 69 | + gof, resid, Rsquared = model.gof(x, y, ye) |
| 70 | + model.goftest = gof |
| 71 | + model.residtest = resid |
| 72 | + model.Rsquared = Rsquared |
| 73 | + |
| 74 | + # print output |
| 75 | + key, _ = os.path.splitext(ns.data_file) |
| 76 | + key = key.split('_') |
| 77 | + output = [ mrt, model.Rsquared, model.goftest[0], model.goftest[1] ] |
| 78 | + output = map(lambda k : '%12.5g' % k, output) |
| 79 | + print '\t'.join(key + output) |
Property changes on: trunk/tools/wsor/editor_lifecycle/sefit |
___________________________________________________________________ |
Added: svn:executable |
1 | 80 | + * |
Index: trunk/tools/wsor/editor_lifecycle/fitbooklet.sh |
— | — | @@ -0,0 +1,43 @@ |
| 2 | +#!/bin/bash |
| 3 | + |
| 4 | +# Applies the `fitting' script to a batch of files |
| 5 | +# |
| 6 | +# author: Giovanni Luca Ciampaglia <gciampaglia@wikimedia.org> |
| 7 | +# |
| 8 | +# USAGE: fitting_batch.sh file1 file2 file3 ... |
| 9 | +# |
| 10 | +# This will produce the normal console output that fitting produces; PDF plots |
| 11 | +# will be stored in file fit.pdf (please note: no check against overwriting |
| 12 | +# existing versions is performed!) |
| 13 | + |
| 14 | +if [[ -z `type -p fitting` ]] ; then |
| 15 | + echo 'error: could not find fitting script. Check your PATH' |
| 16 | + exit 1 |
| 17 | +fi |
| 18 | + |
| 19 | +if [[ -e fit.pdf ]] ; then |
| 20 | + echo 'error: cannot overwrite file fit.pdf' |
| 21 | + exit 1 |
| 22 | +fi |
| 23 | + |
| 24 | +O=`mktemp -d` |
| 25 | +models="expon powerlaw stretchedexp" |
| 26 | +files="$@" |
| 27 | + |
| 28 | +for file in $files ; do |
| 29 | + for model in $models ; do |
| 30 | + fitting $model -force -loglog -batch $file -o $O/${file%.*}_$model.pdf |
| 31 | + echo |
| 32 | + echo |
| 33 | + done |
| 34 | +done |
| 35 | + |
| 36 | +pdfs=`ls $O/*.pdf | sort` |
| 37 | + |
| 38 | +gs -dNOPAUSE -sDEVICE=pdfwrite -sOUTPUTFILE=fit.pdf -dBATCH $pdfs &>/dev/null |
| 39 | + |
| 40 | +if [[ $? = 0 ]] ; then |
| 41 | + echo 'images saved in fit.pdf' |
| 42 | +else |
| 43 | + echo "error: problem saving fit.pdf. Individual image files in $O" |
| 44 | +fi |
Property changes on: trunk/tools/wsor/editor_lifecycle/fitbooklet.sh |
___________________________________________________________________ |
Added: svn:executable |
1 | 45 | + * |