r95709 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r95708‎ \| r95709 \| r95710 >
Date:	22:07, 29 August 2011
Author:	giovanni
Status:	deferred
Tags:
Comment:	removed obsolete scripts
Modified paths:	/trunk/tools/wsor/editor_lifecycle/obsolete/fetchcohort (deleted) (history) /trunk/tools/wsor/editor_lifecycle/obsolete/graphlife (deleted) (history) /trunk/tools/wsor/editor_lifecycle/obsolete/mkcohort (deleted) (history) /trunk/tools/wsor/editor_lifecycle/obsolete/rates (deleted) (history) /trunk/tools/wsor/editor_lifecycle/obsolete/userlist.sh (deleted) (history) /trunk/tools/wsor/editor_lifecycle/obsolete/userlist.sql (deleted) (history)

Diff [purge]

Index: trunk/tools/wsor/editor_lifecycle/obsolete/rates
—	—	@@ -1,96 +0,0 @@
2		~~-#!/usr/bin/python~~
3		~~-#:vim:ts=python:~~
4		-
5		~~-''' compute editor lifecycle '''~~
6		-
7		~~-'''~~
8		~~-Copyright (C) 2011 GIOVANNI LUCA CIAMPAGLIA, GCIAMPAGLIA@WIKIMEDIA.ORG~~
9		~~-This program is free software; you can redistribute it and/or modify~~
10		~~-it under the terms of the GNU General Public License as published by~~
11		~~-the Free Software Foundation; either version 2 of the License, or~~
12		~~-(at your option) any later version.~~
13		-
14		~~-This program is distributed in the hope that it will be useful,~~
15		~~-but WITHOUT ANY WARRANTY; without even the implied warranty of~~
16		~~-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the~~
17		~~-GNU General Public License for more details.~~
18		-
19		~~-You should have received a copy of the GNU General Public License along~~
20		~~-with this program; if not, write to the Free Software Foundation, Inc.,~~
21		~~-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.~~
22		~~-http://www.gnu.org/copyleft/gpl.html~~
23		~~-'''~~
24		-
25		~~-import re~~
26		~~-import os~~
27		~~-from argparse import ArgumentParser~~
28		~~-import numpy as np~~
29		~~-from collections import deque~~
30		~~-import datetime as dt~~
31		-
32		-from lifecycle.rates import *
33		-
34		~~-__prog__ = os.path.basename(os.path.abspath(__file__))~~
35		-
36		~~-parser = ArgumentParser(description=__doc__)~~
37		~~-parser.add_argument('data_file', metavar='data')~~
38		~~-parser.add_argument(metavar='minact', type=int, dest='minimum_activity')~~
39		~~-parser.add_argument(metavar='maxact', type=int, dest='maximum_activity')~~
40		~~-parser.add_argument('-key')~~
41		~~-parser.add_argument('-every', type=int, help='default: %(default)d days',~~
42		~~- default=30, metavar='NUM')~~
43		~~-parser.add_argument('-inactivity', type=int, default=180, help='default: '~~
44		~~- '%(default)d days', metavar='NUM')~~
45		~~-parser.add_argument('-all', dest='dump_all', action='store_true')~~
46		-
47		-
48		~~-def main(ns):~~
49		~~- if ns.key is None:~~
50		~~- m = re.match('(.*?)\.npz', ns.data_file, re.I)~~
51		~~- if m is not None:~~
52		~~- ns.key = m.groups()[0]~~
53		~~- else:~~
54		~~- print >> sys.stderr, '%s: cannot determine key from file name: %s'\~~
55		~~- % (__prog__, ns.data_file)~~
56		~~- sys.exit(1)~~
57		~~- if ns.minimum_activity >= ns.maximum_activity:~~
58		~~- print >> sys.stderr, '%s: error: minact >= maxact' % __prog__~~
59		~~- sys.exit(1)~~
60		-
61		~~- # load data~~
62		~~- npzarchive = np.load(ns.data_file)~~
63		-
64		~~- if ns.dump_all:~~
65		~~- fn = mkfn('cycles', ns, 'npz')~~
66		~~- values_iter = itercycles(npzarchive, ns.every)~~
67		~~- keys = npzarchive.files~~
68		~~- tmp = dict(zip(keys, list(values_iter)))~~
69		~~- np.savez(fn, **tmp)~~
70		~~- print '%s: output saved to %s' % (__prog__, fn)~~
71		~~- else:~~
72		~~- # compute lifetime distribution~~
73		~~- lt = lifetimes(npzarchive)~~
74		-
75		~~- # compute inactive subgroups~~
76		~~- inactive_users = find_inactives(npzarchive, ns.inactivity, ns.minimum_activity,~~
77		~~- ns.maximum_activity)~~
78		-
79		~~- ratesbyday = groupbyday(npzarchive, ns.every)~~
80		~~- ratesbyday_inact = groupbyday(npzarchive, ns.every, inactive_users)~~
81		-
82		~~- avg_all = averagecycle(ratesbyday)~~
83		~~- avg_inact = averagecycle(ratesbyday_inact)~~
84		-
85		~~- lens = [ len(npzarchive.files), len(inactive_users) ]~~
86		-
87		~~- names = ['lt', 'len', 'all', 'inact' ]~~
88		~~- arrs = [ lt, lens, avg_all, avg_inact ]~~
89		-
90		~~- for n, a in zip(names, arrs):~~
91		~~- fn = '%s_%s.%s' % (ns.key, n, 'tsv')~~
92		~~- np.savetxt(fn, a)~~
93		~~- print '%s: output saved to %s' % (__prog__, fn)~~
94		-
95		~~-if __name__ == '__main__':~~
96		~~- ns = parser.parse_args()~~
97		~~- main(ns)~~
Index: trunk/tools/wsor/editor_lifecycle/obsolete/graphlife
—	—	@@ -1,108 +0,0 @@
2		~~-#!/usr/bin/python~~
3		-
4		~~-''' plot editor life cycle '''~~
5		-
6		~~-'''~~
7		~~-Copyright (C) 2011 GIOVANNI LUCA CIAMPAGLIA, GCIAMPAGLIA@WIKIMEDIA.ORG~~
8		~~-This program is free software; you can redistribute it and/or modify~~
9		~~-it under the terms of the GNU General Public License as published by~~
10		~~-the Free Software Foundation; either version 2 of the License, or~~
11		~~-(at your option) any later version.~~
12		-
13		~~-This program is distributed in the hope that it will be useful,~~
14		~~-but WITHOUT ANY WARRANTY; without even the implied warranty of~~
15		~~-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the~~
16		~~-GNU General Public License for more details.~~
17		-
18		~~-You should have received a copy of the GNU General Public License along~~
19		~~-with this program; if not, write to the Free Software Foundation, Inc.,~~
20		~~-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.~~
21		~~-http://www.gnu.org/copyleft/gpl.html~~
22		~~-'''~~
23		-
24		~~-import sys~~
25		~~-import numpy as np~~
26		~~-from argparse import ArgumentParser~~
27		~~-import os~~
28		-
29		~~-__prog__ = os.path.basename(os.path.abspath(__file__))~~
30		-
31		~~-parser = ArgumentParser(description=__doc__)~~
32		~~-parser.add_argument('data_files', metavar='data', nargs='+')~~
33		~~-parser.add_argument('-l', '--label', metavar='TEXT', action='append',~~
34		~~- dest='labels_list')~~
35		~~-parser.add_argument('-inset', dest='inset_data_file', metavar='FILE')~~
36		~~-parser.add_argument('-batch', action='store_true', help='uses PDF backend')~~
37		~~-parser.add_argument('-title')~~
38		~~-parser.add_argument('-fmt', default='pdf', help='default: %(default)s')~~
39		-
40		~~-if __name__ == '__main__':~~
41		~~- ns = parser.parse_args()~~
42		-
43		~~- # checks~~
44		~~- if ns.labels_list and len(ns.data_files) != len(ns.labels_list):~~
45		~~- print >> sys.stderr, '%s: error: please provide as many labels '\~~
46		~~- 'as data files' % __prog__~~
47		~~- sys.exit(1)~~
48		-
49		~~- # import pyplot, make lists of colors and markers~~
50		~~- if ns.batch:~~
51		~~- import matplotlib~~
52		~~- matplotlib.use('PDF')~~
53		~~- import matplotlib.pyplot as pp~~
54		~~- from matplotlib.lines import lineMarkers as markers~~
55		~~- markers = dict(filter(~~
56		~~- lambda k : isinstance(k[0],str) and k[1] is not '_draw_nothing',~~
57		~~- markers.items())).keys()~~
58		~~- colors = 'krbgm'~~
59		-
60		~~- # create figure and axes~~
61		~~- fig = pp.figure()~~
62		~~- ax = pp.axes([.1, .1, .85, .8])~~
63		-
64		~~- # add lines~~
65		~~- N = len(ns.data_files)~~
66		~~- for i in xrange(N):~~
67		~~- data_file = ns.data_files[i]~~
68		~~- if ns.labels_list is not None:~~
69		~~- label = ns.labels_list[i]~~
70		~~- else:~~
71		~~- label = 'line-%d' % (i + 1)~~
72		~~- color = colors[i % len(colors)]~~
73		~~- marker= markers[i % len(markers)]~~
74		~~- x, y, ye = np.loadtxt(data_file, unpack=1)~~
75		~~- ax.errorbar(x, y, ye, color=color, marker=marker, mfc='none',~~
76		~~- mec=color, ls=':', label=label)~~
77		-
78		~~- ax.legend(loc=2)~~
79		~~- ax.set_xlabel('days since registration')~~
80		~~- ax.set_ylabel('edits/day')~~
81		~~- if ns.title is not None:~~
82		~~- ax.set_title(ns.title)~~
83		~~- ax.axis('tight')~~
84		-
85		~~- # plot hist of lifetimes in inset axes~~
86		~~- if ns.inset_data_file is not None:~~
87		~~- lt = np.loadtxt(ns.inset_data_file)~~
88		~~- inax = pp.axes([.55, .6, .35, .25], axisbg='none')~~
89		~~- inax.hist(lt, bins=20, fc='none', cumulative=-1, normed=0)~~
90		~~- for l in inax.xaxis.get_ticklabels():~~
91		~~- l.set_rotation(30)~~
92		~~- l.set_fontsize('x-small')~~
93		~~- for l in inax.yaxis.get_ticklabels():~~
94		~~- l.set_fontsize('x-small')~~
95		~~- inax.set_xlabel('lifespan $x$ (days)', fontsize='small')~~
96		~~- inax.set_ylabel('no. of users older\n more than $x$ days',~~
97		~~- fontsize='small')~~
98		~~- inax.set_title('account lifetime')~~
99		~~- inax.axis('tight')~~
100		-
101		~~- pp.draw()~~
102		~~- if ns.title is not None:~~
103		~~- fn = ns.title.replace(' ', '_').lower() + '.' + ns.fmt~~
104		~~- else:~~
105		~~- fn = 'output.' + ns.fmt~~
106		~~- print 'output saved to %s' % fn~~
107		-
108		~~- pp.savefig(fn, fmt=ns.fmt)~~
109		~~- pp.show()~~
Index: trunk/tools/wsor/editor_lifecycle/obsolete/userlist.sh
—	—	@@ -1,30 +0,0 @@
2		~~-#!/bin/bash~~
3		-
4		~~-# Copyright (C) 2011 GIOVANNI LUCA CIAMPAGLIA, GCIAMPAGLIA@WIKIMEDIA.ORG~~
5		~~-# This program is free software; you can redistribute it and/or modify~~
6		~~-# it under the terms of the GNU General Public License as published by~~
7		~~-# the Free Software Foundation; either version 2 of the License, or~~
8		~~-# (at your option) any later version.~~
9		-#
10		~~-# This program is distributed in the hope that it will be useful,~~
11		~~-# but WITHOUT ANY WARRANTY; without even the implied warranty of~~
12		~~-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the~~
13		~~-# GNU General Public License for more details.~~
14		-#
15		~~-# You should have received a copy of the GNU General Public License along~~
16		~~-# with this program; if not, write to the Free Software Foundation, Inc.,~~
17		~~-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.~~
18		~~-# http://www.gnu.org/copyleft/gpl.html~~
19		-
20		~~-# This scripts writes to output a list of registered, not-flagged-as-bot users,~~
21		~~-# sorted by time of first edit. Each item in the list comprises:~~
22		-#
23		~~-# 1. user_id~~
24		~~-# 2. user_name~~
25		~~-# 3. first_timestamp~~
26		~~-# 4. editcount~~
27		-#
28		~~-# For the SQL query, check file userlist.sql.~~
29		-
30		~~-srcdir=`dirname $(type -p $0)`~~
31		~~-mysql -BN < $srcdir/userlist.sql \| sort -h -k3 -t $'\t'~~
Index: trunk/tools/wsor/editor_lifecycle/obsolete/mkcohort
—	—	@@ -1,214 +0,0 @@
2		~~-#!/usr/bin/python~~
3		~~-# coding: utf-8~~
4		~~-# :vim:ft=python~~
5		-
6		~~-# TODO: obsolete~~
7		-
8		~~-''' creates cohort files, filtering out bots '''~~
9		-
10		~~-'''~~
11		~~-Copyright (C) 2011 GIOVANNI LUCA CIAMPAGLIA, GCIAMPAGLIA@WIKIMEDIA.ORG~~
12		~~-This program is free software; you can redistribute it and/or modify~~
13		~~-it under the terms of the GNU General Public License as published by~~
14		~~-the Free Software Foundation; either version 2 of the License, or~~
15		~~-(at your option) any later version.~~
16		-
17		~~-This program is distributed in the hope that it will be useful,~~
18		~~-but WITHOUT ANY WARRANTY; without even the implied warranty of~~
19		~~-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the~~
20		~~-GNU General Public License for more details.~~
21		-
22		~~-You should have received a copy of the GNU General Public License along~~
23		~~-with this program; if not, write to the Free Software Foundation, Inc.,~~
24		~~-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.~~
25		~~-http://www.gnu.org/copyleft/gpl.html~~
26		~~-'''~~
27		-
28		~~-'''~~
29		~~-This script reads two files: an ZIP archive file, and an index file, which is a~~
30		~~-tab-separated text file like the following:~~
31		-
32		~~- 34 WojPob 20010129110725 2524~~
33		~~- 94 AstroNomer 20010207222248 1532~~
34		~~- 43 Lee Daniel Crocker 20010314020407 4388~~
35		~~- 86 Stephen Gilbert 20010326191355 3599~~
36		~~- 3 Tobias Hoevekamp 20010326202105 1903~~
37		~~- 1273 Wathiik 20010510171751 1772~~
38		~~- 3371 Arno 20010721180708 2700~~
39		~~- 122 Ap 20010722201619 2137~~
40		~~- 182 Rjstott 20010726102546 2602~~
41		~~- 64 Uriyan 20010727141651 1634~~
42		-
43		~~-Where fields are: id, name, date, count. Dates are parsed using dateutil, so~~
44		~~-other formats are allowed too (e.g. 2010-01-29 11:07:25).~~
45		-
46		~~-The script will aggregate users based on the date field and will lookup for~~
47		~~-files of the form <id>.npy in the archive file. Each of these files contains the~~
48		~~-daily edits count for a single user, stored using the NumPy binary array~~
49		~~-format. A relative path within the ZIP archive can be specified from the command~~
50		~~-line with -P/--datapath. Once the data for a cohort (e.g. an aggregated group~~
51		~~-of users) have been collected, the script will compute the average activity rate~~
52		~~-since the first day of activity for all users in that cohort.~~
53		-
54		~~-The script produces two files per each cohort: a tab-separated values file with~~
55		~~-cohort average activity rate, and a compressed NumPy binary archive with the~~
56		~~-user data array files.~~
57		-
58		~~-For each discovered cohort, the script will print on the console the date of the~~
59		~~-cohort, how many users it contains, and how many suspected BOT users it filtered~~
60		~~-out from the index. Use --bot disable this chieck and always include them. The~~
61		~~-check is as follows: if the name contains the pattern 'bot' at the beginning or~~
62		~~-at the end of any word, it will be filtered out (e.g. "Botuser IV" will match,~~
63		~~-but "Francis Abbott" won't). If arguments -mincount or -maxcount (or both) are~~
64		~~-passed, the script will process only users whose edit count is below the minimum~~
65		~~-count, or above the maximum count, or both.~~
66		-
67		~~-Please note that the index file must be already sorted by date, in order for the~~
68		~~-group by date aggregation to work. You can use `sort' from the commmand line,~~
69		~~-e.g.:~~
70		-
71		~~- $~ sort -t$'\t' -k3 -h unsorted.tsv~~
72		-
73		~~-should sort file unsorted.tsv.~~
74		~~-'''~~
75		-
76		~~-import re~~
77		~~-import os~~
78		~~-import sys~~
79		~~-import csv~~
80		~~-import numpy as np~~
81		~~-from argparse import ArgumentParser, FileType~~
82		~~-from contextlib import closing~~
83		~~-from itertools import groupby~~
84		~~-from dateutil.parser import parser as DateParser~~
85		~~-from datetime import datetime~~
86		~~-from zipfile import ZipFile~~
87		-
88		~~-from rates import computerates~~
89		-
90		~~-__prog__ = os.path.basename(os.path.abspath(__file__))~~
91		~~-_botpat = r'\bbot\|bot\b'~~
92		~~-_fields = ['id', 'name', 'date', 'count']~~
93		-
94		~~-def yearkey(date):~~
95		~~- return date.year,~~
96		-
97		~~-def monthkey(date):~~
98		~~- return date.year, date.month~~
99		-
100		~~-def daykey(date):~~
101		~~- return date.year, date.month, date.day~~
102		-
103		~~-parser = ArgumentParser(description=__doc__)~~
104		~~-parser.add_argument('index', type=FileType('r'), help='must be already sorted')~~
105		~~-parser.add_argument('archive_path', metavar='archive', help='data archive in ZIP '~~
106		~~- 'format')~~
107		~~-group = parser.add_mutually_exclusive_group(required=1)~~
108		~~-group.add_argument('--year', help='group by year', action='store_const',~~
109		~~- const=yearkey, dest='keyfunc')~~
110		~~-group.add_argument('--month', help='group by month', action='store_const',~~
111		~~- const=monthkey, dest='keyfunc')~~
112		~~-group.add_argument('--day', help='group by day', action='store_const',~~
113		~~- const=daykey, dest='keyfunc')~~
114		~~-parser.add_argument('--bots', action='store_true', help='do NOT filter out bots')~~
115		~~-parser.add_argument('-P', '--datapath', help='relative path of files within '~~
116		~~- 'archive', default='')~~
117		~~-parser.add_argument('-mincount', type=int)~~
118		~~-parser.add_argument('-maxcount', type=int)~~
119		~~-parser.add_argument('-minperyear', type=int)~~
120		~~-parser.add_argument('-maxperyear', type=int)~~
121		~~-parser.add_argument('-n', '--dry-run', action='store_true', help='write to '~~
122		~~- 'console all actions, but do not produce any file')~~
123		~~-parser.add_argument('-every', type=int, help='default: average over %(default)d days',~~
124		~~- default=30, metavar='NUM')~~
125		~~-parser.add_argument('-ns', type=int, action='append', help='select only these NS',~~
126		~~- dest='only')~~
127		-
128		~~-dateparser = DateParser()~~
129		-
130		~~-# dummy ZipFile class in case we do not want do anything!~~
131		~~-class DummyZipFile:~~
132		~~- def __init__(self, fn, mode):~~
133		~~- pass~~
134		~~- def close(self):~~
135		~~- pass~~
136		~~- def write(self, fn, *args):~~
137		~~- pass~~
138		-
139		~~-if __name__ == '__main__':~~
140		~~- ns = parser.parse_args()~~
141		~~- reader = csv.DictReader(ns.index, _fields, quoting=csv.QUOTE_NONE,~~
142		~~- delimiter='\t')~~
143		~~- archive = ZipFile(ns.archive_path)~~
144		-
145		~~- def _keyfunc(row):~~
146		~~- try:~~
147		~~- date = dateparser.parse(row['date'])~~
148		~~- except:~~
149		~~- print row~~
150		~~- raise~~
151		-
152		~~- return ns.keyfunc(date)~~
153		-
154		~~- # group by index by date of registration~~
155		~~- for key, subiter in groupby(reader, _keyfunc):~~
156		-
157		~~- # reset indices and define output file names from cohort period~~
158		~~- tot_users = 0~~
159		~~- tot_bots = 0~~
160		~~- datestr = '-'.join(map(lambda k : '%02d' % k, key)) # (2010,1) -> '2010-01'~~
161		~~- zipfn = '{}.npz'.format(datestr)~~
162		~~- tsvfn = '{}.tsv'.format(datestr)~~
163		-
164		~~- # if user wants to do a dry-run, replace the Zip files class with the~~
165		~~- # dummy one~~
166		~~- if ns.dry_run:~~
167		~~- ZipFile = DummyZipFile~~
168		-
169		~~- # for each user, determine if may go in cohort~~
170		~~- with closing(ZipFile(zipfn, 'w')) as zf:~~
171		~~- for row in subiter:~~
172		-
173		~~- # compute user details (edit count, yearly activity rate, etc.)~~
174		~~- # and other useful variables~~
175		~~- user_id = row['id']~~
176		~~- count = int(row['count'])~~
177		~~- user_date = dateparser.parse(row['date'])~~
178		~~- now_date = datetime.now()~~
179		~~- activity_span = float((now_date - user_date).days) # in days~~
180		~~- yearly_rate = count / activity_span * 365.0~~
181		~~- bot_flag = re.search(_botpat, row['name'], re.I) is not None~~
182		~~- tot_bots += bot_flag # update counts of bot matches~~
183		-
184		~~- # define paths~~
185		~~- basepath = '{}.npy'.format(user_id)~~
186		~~- archivepath = os.path.join(ns.datapath, basepath)~~
187		-
188		~~- # check cohort membership (keep if conjunction of all given~~
189		~~- # criteria is true, that is, discard if any given criterion is~~
190		~~- # false)~~
191		~~- if ns.mincount is not None and count <= ns.mincount:~~
192		~~- continue~~
193		~~- if ns.maxcount is not None and count >= ns.maxcount:~~
194		~~- continue~~
195		~~- if ns.minperyear is not None and yearly_rate <= ns.minperyear:~~
196		~~- continue~~
197		~~- if ns.maxperyear is not None and yearly_rate >= ns.maxperyear:~~
198		~~- continue~~
199		~~- # user can turn this test off by passing --bots~~
200		~~- if not ns.bots and bot_flag:~~
201		~~- continue~~
202		~~- try:~~
203		~~- zf.writestr(basepath, archive.read(archivepath))~~
204		~~- except KeyError:~~
205		~~- print >> sys.stderr, '%s: warning: %s not in archive' %\~~
206		~~- (__prog__, archivepath)~~
207		~~- tot_users += 1~~
208		-
209		~~- if tot_users > 0:~~
210		~~- rates = computerates(zipfn, ns.every, onlyns=ns.only)~~
211		~~- np.savetxt(tsvfn, rates, fmt='%f')~~
212		-
213		~~- print '%s: %s, %s created (users: %5d, skipped bots %5d)' % (~~
214		~~- __prog__, tsvfn, zipfn, tot_users, tot_bots)~~
215		~~- sys.stdout.flush()~~
Index: trunk/tools/wsor/editor_lifecycle/obsolete/fetchcohort
—	—	@@ -1,79 +0,0 @@
2		~~-#!/usr/bin/python~~
3		~~-# vim:ft=python:~~
4		~~-# coding : utf-8~~
5		-
6		~~-# TODO: obsolete~~
7		-
8		~~-'''~~
9		~~-Copyright (C) 2011 GIOVANNI LUCA CIAMPAGLIA, GCIAMPAGLIA@WIKIMEDIA.ORG~~
10		~~-This program is free software; you can redistribute it and/or modify~~
11		~~-it under the terms of the GNU General Public License as published by~~
12		~~-the Free Software Foundation; either version 2 of the License, or~~
13		~~-(at your option) any later version.~~
14		-
15		~~-This program is distributed in the hope that it will be useful,~~
16		~~-but WITHOUT ANY WARRANTY; without even the implied warranty of~~
17		~~-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the~~
18		~~-GNU General Public License for more details.~~
19		-
20		~~-You should have received a copy of the GNU General Public License along~~
21		~~-with this program; if not, write to the Free Software Foundation, Inc.,~~
22		~~-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.~~
23		~~-http://www.gnu.org/copyleft/gpl.html~~
24		~~-'''~~
25		-
26		~~-''' fetches a cohort based on year of registration and editing activity '''~~
27		-
28		~~-from argparse import ArgumentParser~~
29		~~-from oursql import connect~~
30		~~-import os~~
31		~~-import sys~~
32		~~-import datetime as dt~~
33		~~-import csv~~
34		-
35		~~-prog = os.path.basename(os.path.abspath(__file__))~~
36		-
37		~~-parser = ArgumentParser(description=__doc__, fromfile_prefix_chars='@')~~
38		~~-parser.add_argument('registration_year', metavar='year', type=int)~~
39		~~-parser.add_argument('min_activity', metavar='minedits', type=int)~~
40		~~-parser.add_argument('max_activity', metavar='maxedits', type=int)~~
41		~~-parser.add_argument('-c', '--config', dest='config_file')~~
42		~~-parser.add_argument('-l', '--limit', type=int)~~
43		-
44		~~-query = '''~~
45		~~-select~~
46		~~- user_id,~~
47		~~- user_name,~~
48		~~- user_registration,~~
49		~~- user_editcount~~
50		~~-from user u left join user_groups ug~~
51		~~-on u.user_id = ug.ug_user~~
52		~~-where~~
53		~~- (ug_group <> 'bot' or ug_user is null)~~
54		~~- and year(user_registration) = ?~~
55		~~- and user_editcount > ?~~
56		~~- and user_editcount < ?~~
57		~~-'''~~
58		-
59		~~-if __name__ == '__main__':~~
60		~~- ns = parser.parse_args()~~
61		~~- if ns.min_activity >= ns.max_activity:~~
62		~~- print >> sys.stderr, '%s: error: min_activity >= max_activity' % prog~~
63		~~- sys.exit(1)~~
64		~~- if ns.registration_year < 2001 or ns.registration_year > dt.datetime.now().year:~~
65		~~- print >> sys.stderr, '%s: error: illegal year: %d' % (prog,~~
66		~~- ns.registration_year)~~
67		~~- sys.exit(1)~~
68		-
69		~~- if ns.limit is not None:~~
70		~~- query += 'limit %d' % ns.limit~~
71		-
72		~~- if ns.config_file is None:~~
73		~~- ns.config_file = os.path.expanduser('~/.my.cnf')~~
74		-
75		~~- conn = connect(read_default_file=ns.config_file)~~
76		~~- writer = csv.writer(sys.stdout, dialect='excel-tab')~~
77		~~- cursor = conn.cursor()~~
78		~~- cursor.execute(query, (ns.registration_year, ns.min_activity, ns.max_activity))~~
79		~~- for row in cursor:~~
80		~~- writer.writerow(row)~~
Index: trunk/tools/wsor/editor_lifecycle/obsolete/userlist.sql
—	—	@@ -1,30 +0,0 @@
2		-
3		-
4		~~-select~~
5		~~- rev_user as user_id,~~
6		~~- rev_user_text as user_name,~~
7		~~- min(rev_timestamp) as first_timestamp,~~
8		~~- count(rev_timestamp) as editcount~~
9		~~-from~~
10		~~- revision r use index (usertext_timestamp) left join user_groups g~~
11		~~-on r.rev_user = g.ug_user~~
12		~~-where (ug_group <> 'bot' or g.ug_user is null) and rev_user > 0~~
13		~~-group by rev_user_text~~

Status & tagging log

22:09, 29 August 2011 Reedy (talk | contribs) changed the status of r95709 [removed: new added: deferred]