Index: trunk/tools/wsor/editor_lifecycle/obsolete/rates |
— | — | @@ -1,96 +0,0 @@ |
2 | | -#!/usr/bin/python |
3 | | -#:vim:ts=python: |
4 | | - |
5 | | -''' compute editor lifecycle ''' |
6 | | - |
7 | | -''' |
8 | | -Copyright (C) 2011 GIOVANNI LUCA CIAMPAGLIA, GCIAMPAGLIA@WIKIMEDIA.ORG |
9 | | -This program is free software; you can redistribute it and/or modify |
10 | | -it under the terms of the GNU General Public License as published by |
11 | | -the Free Software Foundation; either version 2 of the License, or |
12 | | -(at your option) any later version. |
13 | | - |
14 | | -This program is distributed in the hope that it will be useful, |
15 | | -but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | | -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | | -GNU General Public License for more details. |
18 | | - |
19 | | -You should have received a copy of the GNU General Public License along |
20 | | -with this program; if not, write to the Free Software Foundation, Inc., |
21 | | -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
22 | | -http://www.gnu.org/copyleft/gpl.html |
23 | | -''' |
24 | | - |
25 | | -import re |
26 | | -import os |
27 | | -from argparse import ArgumentParser |
28 | | -import numpy as np |
29 | | -from collections import deque |
30 | | -import datetime as dt |
31 | | - |
32 | | -from lifecycle.rates import * |
33 | | - |
34 | | -__prog__ = os.path.basename(os.path.abspath(__file__)) |
35 | | - |
36 | | -parser = ArgumentParser(description=__doc__) |
37 | | -parser.add_argument('data_file', metavar='data') |
38 | | -parser.add_argument(metavar='minact', type=int, dest='minimum_activity') |
39 | | -parser.add_argument(metavar='maxact', type=int, dest='maximum_activity') |
40 | | -parser.add_argument('-key') |
41 | | -parser.add_argument('-every', type=int, help='default: %(default)d days', |
42 | | - default=30, metavar='NUM') |
43 | | -parser.add_argument('-inactivity', type=int, default=180, help='default: ' |
44 | | - '%(default)d days', metavar='NUM') |
45 | | -parser.add_argument('-all', dest='dump_all', action='store_true') |
46 | | - |
47 | | - |
48 | | -def main(ns): |
49 | | - if ns.key is None: |
50 | | - m = re.match('(.*?)\.npz', ns.data_file, re.I) |
51 | | - if m is not None: |
52 | | - ns.key = m.groups()[0] |
53 | | - else: |
54 | | - print >> sys.stderr, '%s: cannot determine key from file name: %s'\ |
55 | | - % (__prog__, ns.data_file) |
56 | | - sys.exit(1) |
57 | | - if ns.minimum_activity >= ns.maximum_activity: |
58 | | - print >> sys.stderr, '%s: error: minact >= maxact' % __prog__ |
59 | | - sys.exit(1) |
60 | | - |
61 | | - # load data |
62 | | - npzarchive = np.load(ns.data_file) |
63 | | - |
64 | | - if ns.dump_all: |
65 | | - fn = mkfn('cycles', ns, 'npz') |
66 | | - values_iter = itercycles(npzarchive, ns.every) |
67 | | - keys = npzarchive.files |
68 | | - tmp = dict(zip(keys, list(values_iter))) |
69 | | - np.savez(fn, **tmp) |
70 | | - print '%s: output saved to %s' % (__prog__, fn) |
71 | | - else: |
72 | | - # compute lifetime distribution |
73 | | - lt = lifetimes(npzarchive) |
74 | | - |
75 | | - # compute inactive subgroups |
76 | | - inactive_users = find_inactives(npzarchive, ns.inactivity, ns.minimum_activity, |
77 | | - ns.maximum_activity) |
78 | | - |
79 | | - ratesbyday = groupbyday(npzarchive, ns.every) |
80 | | - ratesbyday_inact = groupbyday(npzarchive, ns.every, inactive_users) |
81 | | - |
82 | | - avg_all = averagecycle(ratesbyday) |
83 | | - avg_inact = averagecycle(ratesbyday_inact) |
84 | | - |
85 | | - lens = [ len(npzarchive.files), len(inactive_users) ] |
86 | | - |
87 | | - names = ['lt', 'len', 'all', 'inact' ] |
88 | | - arrs = [ lt, lens, avg_all, avg_inact ] |
89 | | - |
90 | | - for n, a in zip(names, arrs): |
91 | | - fn = '%s_%s.%s' % (ns.key, n, 'tsv') |
92 | | - np.savetxt(fn, a) |
93 | | - print '%s: output saved to %s' % (__prog__, fn) |
94 | | - |
95 | | -if __name__ == '__main__': |
96 | | - ns = parser.parse_args() |
97 | | - main(ns) |
Index: trunk/tools/wsor/editor_lifecycle/obsolete/graphlife |
— | — | @@ -1,108 +0,0 @@ |
2 | | -#!/usr/bin/python |
3 | | - |
4 | | -''' plot editor life cycle ''' |
5 | | - |
6 | | -''' |
7 | | -Copyright (C) 2011 GIOVANNI LUCA CIAMPAGLIA, GCIAMPAGLIA@WIKIMEDIA.ORG |
8 | | -This program is free software; you can redistribute it and/or modify |
9 | | -it under the terms of the GNU General Public License as published by |
10 | | -the Free Software Foundation; either version 2 of the License, or |
11 | | -(at your option) any later version. |
12 | | - |
13 | | -This program is distributed in the hope that it will be useful, |
14 | | -but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | | -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | | -GNU General Public License for more details. |
17 | | - |
18 | | -You should have received a copy of the GNU General Public License along |
19 | | -with this program; if not, write to the Free Software Foundation, Inc., |
20 | | -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
21 | | -http://www.gnu.org/copyleft/gpl.html |
22 | | -''' |
23 | | - |
24 | | -import sys |
25 | | -import numpy as np |
26 | | -from argparse import ArgumentParser |
27 | | -import os |
28 | | - |
29 | | -__prog__ = os.path.basename(os.path.abspath(__file__)) |
30 | | - |
31 | | -parser = ArgumentParser(description=__doc__) |
32 | | -parser.add_argument('data_files', metavar='data', nargs='+') |
33 | | -parser.add_argument('-l', '--label', metavar='TEXT', action='append', |
34 | | - dest='labels_list') |
35 | | -parser.add_argument('-inset', dest='inset_data_file', metavar='FILE') |
36 | | -parser.add_argument('-batch', action='store_true', help='uses PDF backend') |
37 | | -parser.add_argument('-title') |
38 | | -parser.add_argument('-fmt', default='pdf', help='default: %(default)s') |
39 | | - |
40 | | -if __name__ == '__main__': |
41 | | - ns = parser.parse_args() |
42 | | - |
43 | | - # checks |
44 | | - if ns.labels_list and len(ns.data_files) != len(ns.labels_list): |
45 | | - print >> sys.stderr, '%s: error: please provide as many labels '\ |
46 | | - 'as data files' % __prog__ |
47 | | - sys.exit(1) |
48 | | - |
49 | | - # import pyplot, make lists of colors and markers |
50 | | - if ns.batch: |
51 | | - import matplotlib |
52 | | - matplotlib.use('PDF') |
53 | | - import matplotlib.pyplot as pp |
54 | | - from matplotlib.lines import lineMarkers as markers |
55 | | - markers = dict(filter( |
56 | | - lambda k : isinstance(k[0],str) and k[1] is not '_draw_nothing', |
57 | | - markers.items())).keys() |
58 | | - colors = 'krbgm' |
59 | | - |
60 | | - # create figure and axes |
61 | | - fig = pp.figure() |
62 | | - ax = pp.axes([.1, .1, .85, .8]) |
63 | | - |
64 | | - # add lines |
65 | | - N = len(ns.data_files) |
66 | | - for i in xrange(N): |
67 | | - data_file = ns.data_files[i] |
68 | | - if ns.labels_list is not None: |
69 | | - label = ns.labels_list[i] |
70 | | - else: |
71 | | - label = 'line-%d' % (i + 1) |
72 | | - color = colors[i % len(colors)] |
73 | | - marker= markers[i % len(markers)] |
74 | | - x, y, ye = np.loadtxt(data_file, unpack=1) |
75 | | - ax.errorbar(x, y, ye, color=color, marker=marker, mfc='none', |
76 | | - mec=color, ls=':', label=label) |
77 | | - |
78 | | - ax.legend(loc=2) |
79 | | - ax.set_xlabel('days since registration') |
80 | | - ax.set_ylabel('edits/day') |
81 | | - if ns.title is not None: |
82 | | - ax.set_title(ns.title) |
83 | | - ax.axis('tight') |
84 | | - |
85 | | - # plot hist of lifetimes in inset axes |
86 | | - if ns.inset_data_file is not None: |
87 | | - lt = np.loadtxt(ns.inset_data_file) |
88 | | - inax = pp.axes([.55, .6, .35, .25], axisbg='none') |
89 | | - inax.hist(lt, bins=20, fc='none', cumulative=-1, normed=0) |
90 | | - for l in inax.xaxis.get_ticklabels(): |
91 | | - l.set_rotation(30) |
92 | | - l.set_fontsize('x-small') |
93 | | - for l in inax.yaxis.get_ticklabels(): |
94 | | - l.set_fontsize('x-small') |
95 | | - inax.set_xlabel('lifespan $x$ (days)', fontsize='small') |
96 | | - inax.set_ylabel('no. of users older\n more than $x$ days', |
97 | | - fontsize='small') |
98 | | - inax.set_title('account lifetime') |
99 | | - inax.axis('tight') |
100 | | - |
101 | | - pp.draw() |
102 | | - if ns.title is not None: |
103 | | - fn = ns.title.replace(' ', '_').lower() + '.' + ns.fmt |
104 | | - else: |
105 | | - fn = 'output.' + ns.fmt |
106 | | - print 'output saved to %s' % fn |
107 | | - |
108 | | - pp.savefig(fn, fmt=ns.fmt) |
109 | | - pp.show() |
Index: trunk/tools/wsor/editor_lifecycle/obsolete/userlist.sh |
— | — | @@ -1,30 +0,0 @@ |
2 | | -#!/bin/bash |
3 | | - |
4 | | -# Copyright (C) 2011 GIOVANNI LUCA CIAMPAGLIA, GCIAMPAGLIA@WIKIMEDIA.ORG |
5 | | -# This program is free software; you can redistribute it and/or modify |
6 | | -# it under the terms of the GNU General Public License as published by |
7 | | -# the Free Software Foundation; either version 2 of the License, or |
8 | | -# (at your option) any later version. |
9 | | -# |
10 | | -# This program is distributed in the hope that it will be useful, |
11 | | -# but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | -# GNU General Public License for more details. |
14 | | -# |
15 | | -# You should have received a copy of the GNU General Public License along |
16 | | -# with this program; if not, write to the Free Software Foundation, Inc., |
17 | | -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | | -# http://www.gnu.org/copyleft/gpl.html |
19 | | - |
20 | | -# This scripts writes to output a list of registered, not-flagged-as-bot users, |
21 | | -# sorted by time of first edit. Each item in the list comprises: |
22 | | -# |
23 | | -# 1. user_id |
24 | | -# 2. user_name |
25 | | -# 3. first_timestamp |
26 | | -# 4. editcount |
27 | | -# |
28 | | -# For the SQL query, check file userlist.sql. |
29 | | - |
30 | | -srcdir=`dirname $(type -p $0)` |
31 | | -mysql -BN < $srcdir/userlist.sql | sort -h -k3 -t $'\t' |
Index: trunk/tools/wsor/editor_lifecycle/obsolete/mkcohort |
— | — | @@ -1,214 +0,0 @@ |
2 | | -#!/usr/bin/python |
3 | | -# coding: utf-8 |
4 | | -# :vim:ft=python |
5 | | - |
6 | | -# TODO: obsolete |
7 | | - |
8 | | -''' creates cohort files, filtering out bots ''' |
9 | | - |
10 | | -''' |
11 | | -Copyright (C) 2011 GIOVANNI LUCA CIAMPAGLIA, GCIAMPAGLIA@WIKIMEDIA.ORG |
12 | | -This program is free software; you can redistribute it and/or modify |
13 | | -it under the terms of the GNU General Public License as published by |
14 | | -the Free Software Foundation; either version 2 of the License, or |
15 | | -(at your option) any later version. |
16 | | - |
17 | | -This program is distributed in the hope that it will be useful, |
18 | | -but WITHOUT ANY WARRANTY; without even the implied warranty of |
19 | | -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
20 | | -GNU General Public License for more details. |
21 | | - |
22 | | -You should have received a copy of the GNU General Public License along |
23 | | -with this program; if not, write to the Free Software Foundation, Inc., |
24 | | -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
25 | | -http://www.gnu.org/copyleft/gpl.html |
26 | | -''' |
27 | | - |
28 | | -''' |
29 | | -This script reads two files: an ZIP archive file, and an index file, which is a |
30 | | -tab-separated text file like the following: |
31 | | - |
32 | | - 34 WojPob 20010129110725 2524 |
33 | | - 94 AstroNomer 20010207222248 1532 |
34 | | - 43 Lee Daniel Crocker 20010314020407 4388 |
35 | | - 86 Stephen Gilbert 20010326191355 3599 |
36 | | - 3 Tobias Hoevekamp 20010326202105 1903 |
37 | | - 1273 Wathiik 20010510171751 1772 |
38 | | - 3371 Arno 20010721180708 2700 |
39 | | - 122 Ap 20010722201619 2137 |
40 | | - 182 Rjstott 20010726102546 2602 |
41 | | - 64 Uriyan 20010727141651 1634 |
42 | | - |
43 | | -Where fields are: id, name, date, count. Dates are parsed using dateutil, so |
44 | | -other formats are allowed too (e.g. 2010-01-29 11:07:25). |
45 | | - |
46 | | -The script will aggregate users based on the date field and will lookup for |
47 | | -files of the form <id>.npy in the archive file. Each of these files contains the |
48 | | -daily edits count for a single user, stored using the NumPy binary array |
49 | | -format. A relative path within the ZIP archive can be specified from the command |
50 | | -line with -P/--datapath. Once the data for a cohort (e.g. an aggregated group |
51 | | -of users) have been collected, the script will compute the average activity rate |
52 | | -since the first day of activity for all users in that cohort. |
53 | | - |
54 | | -The script produces two files per each cohort: a tab-separated values file with |
55 | | -cohort average activity rate, and a compressed NumPy binary archive with the |
56 | | -user data array files. |
57 | | - |
58 | | -For each discovered cohort, the script will print on the console the date of the |
59 | | -cohort, how many users it contains, and how many suspected BOT users it filtered |
60 | | -out from the index. Use --bot disable this chieck and always include them. The |
61 | | -check is as follows: if the name contains the pattern 'bot' at the beginning or |
62 | | -at the end of any word, it will be filtered out (e.g. "Botuser IV" will match, |
63 | | -but "Francis Abbott" won't). If arguments -mincount or -maxcount (or both) are |
64 | | -passed, the script will process only users whose edit count is below the minimum |
65 | | -count, or above the maximum count, or both. |
66 | | - |
67 | | -Please note that the index file must be already sorted by date, in order for the |
68 | | -group by date aggregation to work. You can use `sort' from the commmand line, |
69 | | -e.g.: |
70 | | - |
71 | | - $~ sort -t$'\t' -k3 -h unsorted.tsv |
72 | | - |
73 | | -should sort file unsorted.tsv. |
74 | | -''' |
75 | | - |
76 | | -import re |
77 | | -import os |
78 | | -import sys |
79 | | -import csv |
80 | | -import numpy as np |
81 | | -from argparse import ArgumentParser, FileType |
82 | | -from contextlib import closing |
83 | | -from itertools import groupby |
84 | | -from dateutil.parser import parser as DateParser |
85 | | -from datetime import datetime |
86 | | -from zipfile import ZipFile |
87 | | - |
88 | | -from rates import computerates |
89 | | - |
90 | | -__prog__ = os.path.basename(os.path.abspath(__file__)) |
91 | | -_botpat = r'\bbot|bot\b' |
92 | | -_fields = ['id', 'name', 'date', 'count'] |
93 | | - |
94 | | -def yearkey(date): |
95 | | - return date.year, |
96 | | - |
97 | | -def monthkey(date): |
98 | | - return date.year, date.month |
99 | | - |
100 | | -def daykey(date): |
101 | | - return date.year, date.month, date.day |
102 | | - |
103 | | -parser = ArgumentParser(description=__doc__) |
104 | | -parser.add_argument('index', type=FileType('r'), help='*must* be already sorted') |
105 | | -parser.add_argument('archive_path', metavar='archive', help='data archive in ZIP ' |
106 | | - 'format') |
107 | | -group = parser.add_mutually_exclusive_group(required=1) |
108 | | -group.add_argument('--year', help='group by year', action='store_const', |
109 | | - const=yearkey, dest='keyfunc') |
110 | | -group.add_argument('--month', help='group by month', action='store_const', |
111 | | - const=monthkey, dest='keyfunc') |
112 | | -group.add_argument('--day', help='group by day', action='store_const', |
113 | | - const=daykey, dest='keyfunc') |
114 | | -parser.add_argument('--bots', action='store_true', help='do NOT filter out bots') |
115 | | -parser.add_argument('-P', '--datapath', help='relative path of files within ' |
116 | | - 'archive', default='') |
117 | | -parser.add_argument('-mincount', type=int) |
118 | | -parser.add_argument('-maxcount', type=int) |
119 | | -parser.add_argument('-minperyear', type=int) |
120 | | -parser.add_argument('-maxperyear', type=int) |
121 | | -parser.add_argument('-n', '--dry-run', action='store_true', help='write to ' |
122 | | - 'console all actions, but do not produce any file') |
123 | | -parser.add_argument('-every', type=int, help='default: average over %(default)d days', |
124 | | - default=30, metavar='NUM') |
125 | | -parser.add_argument('-ns', type=int, action='append', help='select only these NS', |
126 | | - dest='only') |
127 | | - |
128 | | -dateparser = DateParser() |
129 | | - |
130 | | -# dummy ZipFile class in case we do not want do anything! |
131 | | -class DummyZipFile: |
132 | | - def __init__(self, fn, mode): |
133 | | - pass |
134 | | - def close(self): |
135 | | - pass |
136 | | - def write(self, fn, *args): |
137 | | - pass |
138 | | - |
139 | | -if __name__ == '__main__': |
140 | | - ns = parser.parse_args() |
141 | | - reader = csv.DictReader(ns.index, _fields, quoting=csv.QUOTE_NONE, |
142 | | - delimiter='\t') |
143 | | - archive = ZipFile(ns.archive_path) |
144 | | - |
145 | | - def _keyfunc(row): |
146 | | - try: |
147 | | - date = dateparser.parse(row['date']) |
148 | | - except: |
149 | | - print row |
150 | | - raise |
151 | | - |
152 | | - return ns.keyfunc(date) |
153 | | - |
154 | | - # group by index by date of registration |
155 | | - for key, subiter in groupby(reader, _keyfunc): |
156 | | - |
157 | | - # reset indices and define output file names from cohort period |
158 | | - tot_users = 0 |
159 | | - tot_bots = 0 |
160 | | - datestr = '-'.join(map(lambda k : '%02d' % k, key)) # (2010,1) -> '2010-01' |
161 | | - zipfn = '{}.npz'.format(datestr) |
162 | | - tsvfn = '{}.tsv'.format(datestr) |
163 | | - |
164 | | - # if user wants to do a dry-run, replace the Zip files class with the |
165 | | - # dummy one |
166 | | - if ns.dry_run: |
167 | | - ZipFile = DummyZipFile |
168 | | - |
169 | | - # for each user, determine if may go in cohort |
170 | | - with closing(ZipFile(zipfn, 'w')) as zf: |
171 | | - for row in subiter: |
172 | | - |
173 | | - # compute user details (edit count, yearly activity rate, etc.) |
174 | | - # and other useful variables |
175 | | - user_id = row['id'] |
176 | | - count = int(row['count']) |
177 | | - user_date = dateparser.parse(row['date']) |
178 | | - now_date = datetime.now() |
179 | | - activity_span = float((now_date - user_date).days) # in days |
180 | | - yearly_rate = count / activity_span * 365.0 |
181 | | - bot_flag = re.search(_botpat, row['name'], re.I) is not None |
182 | | - tot_bots += bot_flag # update counts of bot matches |
183 | | - |
184 | | - # define paths |
185 | | - basepath = '{}.npy'.format(user_id) |
186 | | - archivepath = os.path.join(ns.datapath, basepath) |
187 | | - |
188 | | - # check cohort membership (keep if conjunction of all given |
189 | | - # criteria is true, that is, discard if any given criterion is |
190 | | - # false) |
191 | | - if ns.mincount is not None and count <= ns.mincount: |
192 | | - continue |
193 | | - if ns.maxcount is not None and count >= ns.maxcount: |
194 | | - continue |
195 | | - if ns.minperyear is not None and yearly_rate <= ns.minperyear: |
196 | | - continue |
197 | | - if ns.maxperyear is not None and yearly_rate >= ns.maxperyear: |
198 | | - continue |
199 | | - # user can turn this test off by passing --bots |
200 | | - if not ns.bots and bot_flag: |
201 | | - continue |
202 | | - try: |
203 | | - zf.writestr(basepath, archive.read(archivepath)) |
204 | | - except KeyError: |
205 | | - print >> sys.stderr, '%s: warning: %s not in archive' %\ |
206 | | - (__prog__, archivepath) |
207 | | - tot_users += 1 |
208 | | - |
209 | | - if tot_users > 0: |
210 | | - rates = computerates(zipfn, ns.every, onlyns=ns.only) |
211 | | - np.savetxt(tsvfn, rates, fmt='%f') |
212 | | - |
213 | | - print '%s: %s, %s created (users: %5d, skipped bots %5d)' % ( |
214 | | - __prog__, tsvfn, zipfn, tot_users, tot_bots) |
215 | | - sys.stdout.flush() |
Index: trunk/tools/wsor/editor_lifecycle/obsolete/fetchcohort |
— | — | @@ -1,79 +0,0 @@ |
2 | | -#!/usr/bin/python |
3 | | -# vim:ft=python: |
4 | | -# coding : utf-8 |
5 | | - |
6 | | -# TODO: obsolete |
7 | | - |
8 | | -''' |
9 | | -Copyright (C) 2011 GIOVANNI LUCA CIAMPAGLIA, GCIAMPAGLIA@WIKIMEDIA.ORG |
10 | | -This program is free software; you can redistribute it and/or modify |
11 | | -it under the terms of the GNU General Public License as published by |
12 | | -the Free Software Foundation; either version 2 of the License, or |
13 | | -(at your option) any later version. |
14 | | - |
15 | | -This program is distributed in the hope that it will be useful, |
16 | | -but WITHOUT ANY WARRANTY; without even the implied warranty of |
17 | | -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
18 | | -GNU General Public License for more details. |
19 | | - |
20 | | -You should have received a copy of the GNU General Public License along |
21 | | -with this program; if not, write to the Free Software Foundation, Inc., |
22 | | -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
23 | | -http://www.gnu.org/copyleft/gpl.html |
24 | | -''' |
25 | | - |
26 | | -''' fetches a cohort based on year of registration and editing activity ''' |
27 | | - |
28 | | -from argparse import ArgumentParser |
29 | | -from oursql import connect |
30 | | -import os |
31 | | -import sys |
32 | | -import datetime as dt |
33 | | -import csv |
34 | | - |
35 | | -prog = os.path.basename(os.path.abspath(__file__)) |
36 | | - |
37 | | -parser = ArgumentParser(description=__doc__, fromfile_prefix_chars='@') |
38 | | -parser.add_argument('registration_year', metavar='year', type=int) |
39 | | -parser.add_argument('min_activity', metavar='minedits', type=int) |
40 | | -parser.add_argument('max_activity', metavar='maxedits', type=int) |
41 | | -parser.add_argument('-c', '--config', dest='config_file') |
42 | | -parser.add_argument('-l', '--limit', type=int) |
43 | | - |
44 | | -query = ''' |
45 | | -select |
46 | | - user_id, |
47 | | - user_name, |
48 | | - user_registration, |
49 | | - user_editcount |
50 | | -from user u left join user_groups ug |
51 | | -on u.user_id = ug.ug_user |
52 | | -where |
53 | | - (ug_group <> 'bot' or ug_user is null) |
54 | | - and year(user_registration) = ? |
55 | | - and user_editcount > ? |
56 | | - and user_editcount < ? |
57 | | -''' |
58 | | - |
59 | | -if __name__ == '__main__': |
60 | | - ns = parser.parse_args() |
61 | | - if ns.min_activity >= ns.max_activity: |
62 | | - print >> sys.stderr, '%s: error: min_activity >= max_activity' % prog |
63 | | - sys.exit(1) |
64 | | - if ns.registration_year < 2001 or ns.registration_year > dt.datetime.now().year: |
65 | | - print >> sys.stderr, '%s: error: illegal year: %d' % (prog, |
66 | | - ns.registration_year) |
67 | | - sys.exit(1) |
68 | | - |
69 | | - if ns.limit is not None: |
70 | | - query += 'limit %d' % ns.limit |
71 | | - |
72 | | - if ns.config_file is None: |
73 | | - ns.config_file = os.path.expanduser('~/.my.cnf') |
74 | | - |
75 | | - conn = connect(read_default_file=ns.config_file) |
76 | | - writer = csv.writer(sys.stdout, dialect='excel-tab') |
77 | | - cursor = conn.cursor() |
78 | | - cursor.execute(query, (ns.registration_year, ns.min_activity, ns.max_activity)) |
79 | | - for row in cursor: |
80 | | - writer.writerow(row) |
Index: trunk/tools/wsor/editor_lifecycle/obsolete/userlist.sql |
— | — | @@ -1,30 +0,0 @@ |
2 | | - |
3 | | - |
4 | | -select |
5 | | - rev_user as user_id, |
6 | | - rev_user_text as user_name, |
7 | | - min(rev_timestamp) as first_timestamp, |
8 | | - count(rev_timestamp) as editcount |
9 | | -from |
10 | | - revision r use index (usertext_timestamp) left join user_groups g |
11 | | -on r.rev_user = g.ug_user |
12 | | -where (ug_group <> 'bot' or g.ug_user is null) and rev_user > 0 |
13 | | -group by rev_user_text |