r78221 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r78220‎ \| r78221 \| r78222 >
Date:	13:26, 11 December 2010
Author:	reedy
Status:	deferred
Tags:
Comment:	Followup r78216, set svn:eol-style native to analyses/aggregates.py
Modified paths:	/trunk/tools/editor_trends/analyses/aggregates.py (modified) (history)

Diff [purge]

Index: trunk/tools/editor_trends/analyses/aggregates.py
—	—	@@ -1,113 +1,113 @@
2		~~-__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ])~~
3		~~-__author__email = 'dvanliere at gmail dot com'~~
4		~~-__date__ = '2010-12-10'~~
5		~~-__version__ = '0.1'~~
6		-
7		~~-import datetime~~
8		~~-import multiprocessing~~
9		~~-import calendar~~
10		~~-import sys~~
11		~~-sys.path.append('..')~~
12		-
13		~~-import configuration~~
14		~~-settings = configuration.Settings()~~
15		~~-from database import db~~
16		~~-from etl import shaper~~
17		~~-from utils import utils~~
18		-
19		-
20		~~-class Dataset:~~
21		~~- def __init__(self):~~
22		~~- pass~~
23		-
24		-
25		~~-def new_editor_count(editors, dbname, collection, month=12):~~
26		~~- '''~~
27		~~- @month should be an integer in the range of 1-12.~~
28		~~- '''~~
29		~~- assert month > 0 and month < 13~~
30		~~- mongo = db.init_mongo_db(dbname)~~
31		~~- dataset = mongo[collection + '_dataset']~~
32		~~- data = shaper.create_datacontainer(0)~~
33		~~- start_year = 2001~~
34		~~- end_year = datetime.datetime.now().year + 1~~
35		~~- while True:~~
36		~~- id = editors.get(block=False)~~
37		~~- if id == None:~~
38		~~- break~~
39		~~- editor = dataset.find_one({'editor': id}, {'edits': 1})~~
40		~~- new_editor = editor['edits'][9]['date'] #date that editor became a new editor~~
41		-
42		~~- for year in xrange(start_year, end_year):~~
43		~~- day = calendar.monthrange(year, month)[1]~~
44		~~- cut_off = datetime.datetime(year, month, day)~~
45		~~- if new_editor < cut_off:~~
46		~~- data[year] += 1~~
47		-
48		~~- return data~~
49		-
50		-
51		~~-def active_editor_count(editors, dbname, collection, month=12):~~
52		~~- '''~~
53		~~- @month should be an integer in the range of 1-12.~~
54		~~- '''~~
55		~~- assert month > 0 and month < 13~~
56		~~- mongo = db.init_mongo_db(dbname)~~
57		~~- dataset = mongo[collection + '_dataset']~~
58		~~- data = shaper.create_datacontainer('dict')~~
59		~~- data = shaper.add_months_to_datacontainer(data, 0)~~
60		~~- start_year = 2001~~
61		~~- end_year = datetime.datetime.now().year + 1~~
62		~~- while True:~~
63		~~- id = editors.get(block=False)~~
64		~~- if id == None:~~
65		~~- break~~
66		~~- editor = dataset.find_one({'editor': id}, {'monthly_edits': 1})~~
67		~~- monthly_edits = editor['monthly_edits']~~
68		-
69		~~- for year in xrange(start_year, end_year):~~
70		~~- for month in xrange(1, 13):~~
71		~~- if monthly_edits[str(year)][str(month)] > 4:~~
72		~~- data[year][month] += 1~~
73		-
74		~~- return data~~
75		-
76		-
77		-
78		~~-def new_editor_count_launcher(dbname, collection):~~
79		~~- editors = db.retrieve_distinct_keys(dbname, collection, 'editor')~~
80		~~- tasks = multiprocessing.JoinableQueue()~~
81		~~- for editor in editors:~~
82		~~- tasks.put(editor)~~
83		~~- print 'The queue contains %s editors.' % tasks.qsize()~~
84		~~- tasks.put(None)~~
85		~~- data = new_editor_count(tasks, dbname, collection, month=7)~~
86		~~- keys = data.keys()~~
87		~~- keys.sort()~~
88		~~- file = '%s_aggrate_new_editor_count.csv' % dbname~~
89		~~- fh = utils.create_txt_filehandle(settings.dataset_location, file, 'w', settings.encoding)~~
90		~~- utils.write_list_to_csv(keys, fh, recursive=False, newline=True)~~
91		~~- utils.write_dict_to_csv(data, fh, keys, write_key=False, newline=True)~~
92		~~- fh.close()~~
93		-
94		~~-def active_editor_count_launcher(dbname, collection):~~
95		~~- editors = db.retrieve_distinct_keys(dbname, collection, 'editor')~~
96		~~- tasks = multiprocessing.JoinableQueue()~~
97		~~- for editor in editors:~~
98		~~- tasks.put(editor)~~
99		~~- print 'The queue contains %s editors.' % tasks.qsize()~~
100		~~- tasks.put(None)~~
101		~~- data = active_editor_count(tasks, dbname, collection, month=7)~~
102		~~- keys = data.keys()~~
103		~~- keys.sort()~~
104		~~- headers = ['%s-%s' % (m, k) for k in keys for m in xrange(1, 13)]~~
105		~~- file = '%s_aggrate_active_editor_count.csv' % dbname~~
106		~~- fh = utils.create_txt_filehandle(settings.dataset_location, file, 'w', settings.encoding)~~
107		~~- utils.write_list_to_csv(headers, fh, recursive=False, newline=True)~~
108		~~- utils.write_dict_to_csv(data, fh, keys, write_key=False, newline=True)~~
109		~~- fh.close()~~
110		-
111		-
112		~~-if __name__ == '__main__':~~
113		~~- #new_editor_count_launcher('enwiki', 'editors')~~
114		~~- active_editor_count_launcher('enwiki', 'editors')~~
	2	+__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ])
	3	+__author__email = 'dvanliere at gmail dot com'
	4	+__date__ = '2010-12-10'
	5	+__version__ = '0.1'
	6	+
	7	+import datetime
	8	+import multiprocessing
	9	+import calendar
	10	+import sys
	11	+sys.path.append('..')
	12	+
	13	+import configuration
	14	+settings = configuration.Settings()
	15	+from database import db
	16	+from etl import shaper
	17	+from utils import utils
	18	+
	19	+
	20	+class Dataset:
	21	+ def __init__(self):
	22	+ pass
	23	+
	24	+
	25	+def new_editor_count(editors, dbname, collection, month=12):
	26	+ '''
	27	+ @month should be an integer in the range of 1-12.
	28	+ '''
	29	+ assert month > 0 and month < 13
	30	+ mongo = db.init_mongo_db(dbname)
	31	+ dataset = mongo[collection + '_dataset']
	32	+ data = shaper.create_datacontainer(0)
	33	+ start_year = 2001
	34	+ end_year = datetime.datetime.now().year + 1
	35	+ while True:
	36	+ id = editors.get(block=False)
	37	+ if id == None:
	38	+ break
	39	+ editor = dataset.find_one({'editor': id}, {'edits': 1})
	40	+ new_editor = editor['edits'][9]['date'] #date that editor became a new editor
	41	+
	42	+ for year in xrange(start_year, end_year):
	43	+ day = calendar.monthrange(year, month)[1]
	44	+ cut_off = datetime.datetime(year, month, day)
	45	+ if new_editor < cut_off:
	46	+ data[year] += 1
	47	+
	48	+ return data
	49	+
	50	+
	51	+def active_editor_count(editors, dbname, collection, month=12):
	52	+ '''
	53	+ @month should be an integer in the range of 1-12.
	54	+ '''
	55	+ assert month > 0 and month < 13
	56	+ mongo = db.init_mongo_db(dbname)
	57	+ dataset = mongo[collection + '_dataset']
	58	+ data = shaper.create_datacontainer('dict')
	59	+ data = shaper.add_months_to_datacontainer(data, 0)
	60	+ start_year = 2001
	61	+ end_year = datetime.datetime.now().year + 1
	62	+ while True:
	63	+ id = editors.get(block=False)
	64	+ if id == None:
	65	+ break
	66	+ editor = dataset.find_one({'editor': id}, {'monthly_edits': 1})
	67	+ monthly_edits = editor['monthly_edits']
	68	+
	69	+ for year in xrange(start_year, end_year):
	70	+ for month in xrange(1, 13):
	71	+ if monthly_edits[str(year)][str(month)] > 4:
	72	+ data[year][month] += 1
	73	+
	74	+ return data
	75	+
	76	+
	77	+
	78	+def new_editor_count_launcher(dbname, collection):
	79	+ editors = db.retrieve_distinct_keys(dbname, collection, 'editor')
	80	+ tasks = multiprocessing.JoinableQueue()
	81	+ for editor in editors:
	82	+ tasks.put(editor)
	83	+ print 'The queue contains %s editors.' % tasks.qsize()
	84	+ tasks.put(None)
	85	+ data = new_editor_count(tasks, dbname, collection, month=7)
	86	+ keys = data.keys()
	87	+ keys.sort()
	88	+ file = '%s_aggrate_new_editor_count.csv' % dbname
	89	+ fh = utils.create_txt_filehandle(settings.dataset_location, file, 'w', settings.encoding)
	90	+ utils.write_list_to_csv(keys, fh, recursive=False, newline=True)
	91	+ utils.write_dict_to_csv(data, fh, keys, write_key=False, newline=True)
	92	+ fh.close()
	93	+
	94	+def active_editor_count_launcher(dbname, collection):
	95	+ editors = db.retrieve_distinct_keys(dbname, collection, 'editor')
	96	+ tasks = multiprocessing.JoinableQueue()
	97	+ for editor in editors:
	98	+ tasks.put(editor)
	99	+ print 'The queue contains %s editors.' % tasks.qsize()
	100	+ tasks.put(None)
	101	+ data = active_editor_count(tasks, dbname, collection, month=7)
	102	+ keys = data.keys()
	103	+ keys.sort()
	104	+ headers = ['%s-%s' % (m, k) for k in keys for m in xrange(1, 13)]
	105	+ file = '%s_aggrate_active_editor_count.csv' % dbname
	106	+ fh = utils.create_txt_filehandle(settings.dataset_location, file, 'w', settings.encoding)
	107	+ utils.write_list_to_csv(headers, fh, recursive=False, newline=True)
	108	+ utils.write_dict_to_csv(data, fh, keys, write_key=False, newline=True)
	109	+ fh.close()
	110	+
	111	+
	112	+if __name__ == '__main__':
	113	+ #new_editor_count_launcher('enwiki', 'editors')
	114	+ active_editor_count_launcher('enwiki', 'editors')
Property changes on: trunk/tools/editor_trends/analyses/aggregates.py
___________________________________________________________________
Added: svn:eol-style
115	115	+ native

Past revisions this follows-up on

Revision	Commit summary	Author	Date
r78216	Added:...	diederik	03:06, 11 December 2010

Status & tagging log

21:55, 11 December 2010 Reedy (talk | contribs) changed the status of r78221 [removed: new added: deferred]