r78221 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r78220‎ | r78221 | r78222 >
Date:13:26, 11 December 2010
Author:reedy
Status:deferred
Tags:
Comment:
Followup r78216, set svn:eol-style native to analyses/aggregates.py
Modified paths:
  • /trunk/tools/editor_trends/analyses/aggregates.py (modified) (history)

Diff [purge]

Index: trunk/tools/editor_trends/analyses/aggregates.py
@@ -1,113 +1,113 @@
2 -__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ])
3 -__author__email = 'dvanliere at gmail dot com'
4 -__date__ = '2010-12-10'
5 -__version__ = '0.1'
6 -
7 -import datetime
8 -import multiprocessing
9 -import calendar
10 -import sys
11 -sys.path.append('..')
12 -
13 -import configuration
14 -settings = configuration.Settings()
15 -from database import db
16 -from etl import shaper
17 -from utils import utils
18 -
19 -
20 -class Dataset:
21 - def __init__(self):
22 - pass
23 -
24 -
25 -def new_editor_count(editors, dbname, collection, month=12):
26 - '''
27 - @month should be an integer in the range of 1-12.
28 - '''
29 - assert month > 0 and month < 13
30 - mongo = db.init_mongo_db(dbname)
31 - dataset = mongo[collection + '_dataset']
32 - data = shaper.create_datacontainer(0)
33 - start_year = 2001
34 - end_year = datetime.datetime.now().year + 1
35 - while True:
36 - id = editors.get(block=False)
37 - if id == None:
38 - break
39 - editor = dataset.find_one({'editor': id}, {'edits': 1})
40 - new_editor = editor['edits'][9]['date'] #date that editor became a new editor
41 -
42 - for year in xrange(start_year, end_year):
43 - day = calendar.monthrange(year, month)[1]
44 - cut_off = datetime.datetime(year, month, day)
45 - if new_editor < cut_off:
46 - data[year] += 1
47 -
48 - return data
49 -
50 -
51 -def active_editor_count(editors, dbname, collection, month=12):
52 - '''
53 - @month should be an integer in the range of 1-12.
54 - '''
55 - assert month > 0 and month < 13
56 - mongo = db.init_mongo_db(dbname)
57 - dataset = mongo[collection + '_dataset']
58 - data = shaper.create_datacontainer('dict')
59 - data = shaper.add_months_to_datacontainer(data, 0)
60 - start_year = 2001
61 - end_year = datetime.datetime.now().year + 1
62 - while True:
63 - id = editors.get(block=False)
64 - if id == None:
65 - break
66 - editor = dataset.find_one({'editor': id}, {'monthly_edits': 1})
67 - monthly_edits = editor['monthly_edits']
68 -
69 - for year in xrange(start_year, end_year):
70 - for month in xrange(1, 13):
71 - if monthly_edits[str(year)][str(month)] > 4:
72 - data[year][month] += 1
73 -
74 - return data
75 -
76 -
77 -
78 -def new_editor_count_launcher(dbname, collection):
79 - editors = db.retrieve_distinct_keys(dbname, collection, 'editor')
80 - tasks = multiprocessing.JoinableQueue()
81 - for editor in editors:
82 - tasks.put(editor)
83 - print 'The queue contains %s editors.' % tasks.qsize()
84 - tasks.put(None)
85 - data = new_editor_count(tasks, dbname, collection, month=7)
86 - keys = data.keys()
87 - keys.sort()
88 - file = '%s_aggrate_new_editor_count.csv' % dbname
89 - fh = utils.create_txt_filehandle(settings.dataset_location, file, 'w', settings.encoding)
90 - utils.write_list_to_csv(keys, fh, recursive=False, newline=True)
91 - utils.write_dict_to_csv(data, fh, keys, write_key=False, newline=True)
92 - fh.close()
93 -
94 -def active_editor_count_launcher(dbname, collection):
95 - editors = db.retrieve_distinct_keys(dbname, collection, 'editor')
96 - tasks = multiprocessing.JoinableQueue()
97 - for editor in editors:
98 - tasks.put(editor)
99 - print 'The queue contains %s editors.' % tasks.qsize()
100 - tasks.put(None)
101 - data = active_editor_count(tasks, dbname, collection, month=7)
102 - keys = data.keys()
103 - keys.sort()
104 - headers = ['%s-%s' % (m, k) for k in keys for m in xrange(1, 13)]
105 - file = '%s_aggrate_active_editor_count.csv' % dbname
106 - fh = utils.create_txt_filehandle(settings.dataset_location, file, 'w', settings.encoding)
107 - utils.write_list_to_csv(headers, fh, recursive=False, newline=True)
108 - utils.write_dict_to_csv(data, fh, keys, write_key=False, newline=True)
109 - fh.close()
110 -
111 -
112 -if __name__ == '__main__':
113 - #new_editor_count_launcher('enwiki', 'editors')
114 - active_editor_count_launcher('enwiki', 'editors')
 2+__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ])
 3+__author__email = 'dvanliere at gmail dot com'
 4+__date__ = '2010-12-10'
 5+__version__ = '0.1'
 6+
 7+import datetime
 8+import multiprocessing
 9+import calendar
 10+import sys
 11+sys.path.append('..')
 12+
 13+import configuration
 14+settings = configuration.Settings()
 15+from database import db
 16+from etl import shaper
 17+from utils import utils
 18+
 19+
 20+class Dataset:
 21+ def __init__(self):
 22+ pass
 23+
 24+
 25+def new_editor_count(editors, dbname, collection, month=12):
 26+ '''
 27+ @month should be an integer in the range of 1-12.
 28+ '''
 29+ assert month > 0 and month < 13
 30+ mongo = db.init_mongo_db(dbname)
 31+ dataset = mongo[collection + '_dataset']
 32+ data = shaper.create_datacontainer(0)
 33+ start_year = 2001
 34+ end_year = datetime.datetime.now().year + 1
 35+ while True:
 36+ id = editors.get(block=False)
 37+ if id == None:
 38+ break
 39+ editor = dataset.find_one({'editor': id}, {'edits': 1})
 40+ new_editor = editor['edits'][9]['date'] #date that editor became a new editor
 41+
 42+ for year in xrange(start_year, end_year):
 43+ day = calendar.monthrange(year, month)[1]
 44+ cut_off = datetime.datetime(year, month, day)
 45+ if new_editor < cut_off:
 46+ data[year] += 1
 47+
 48+ return data
 49+
 50+
 51+def active_editor_count(editors, dbname, collection, month=12):
 52+ '''
 53+ @month should be an integer in the range of 1-12.
 54+ '''
 55+ assert month > 0 and month < 13
 56+ mongo = db.init_mongo_db(dbname)
 57+ dataset = mongo[collection + '_dataset']
 58+ data = shaper.create_datacontainer('dict')
 59+ data = shaper.add_months_to_datacontainer(data, 0)
 60+ start_year = 2001
 61+ end_year = datetime.datetime.now().year + 1
 62+ while True:
 63+ id = editors.get(block=False)
 64+ if id == None:
 65+ break
 66+ editor = dataset.find_one({'editor': id}, {'monthly_edits': 1})
 67+ monthly_edits = editor['monthly_edits']
 68+
 69+ for year in xrange(start_year, end_year):
 70+ for month in xrange(1, 13):
 71+ if monthly_edits[str(year)][str(month)] > 4:
 72+ data[year][month] += 1
 73+
 74+ return data
 75+
 76+
 77+
 78+def new_editor_count_launcher(dbname, collection):
 79+ editors = db.retrieve_distinct_keys(dbname, collection, 'editor')
 80+ tasks = multiprocessing.JoinableQueue()
 81+ for editor in editors:
 82+ tasks.put(editor)
 83+ print 'The queue contains %s editors.' % tasks.qsize()
 84+ tasks.put(None)
 85+ data = new_editor_count(tasks, dbname, collection, month=7)
 86+ keys = data.keys()
 87+ keys.sort()
 88+ file = '%s_aggrate_new_editor_count.csv' % dbname
 89+ fh = utils.create_txt_filehandle(settings.dataset_location, file, 'w', settings.encoding)
 90+ utils.write_list_to_csv(keys, fh, recursive=False, newline=True)
 91+ utils.write_dict_to_csv(data, fh, keys, write_key=False, newline=True)
 92+ fh.close()
 93+
 94+def active_editor_count_launcher(dbname, collection):
 95+ editors = db.retrieve_distinct_keys(dbname, collection, 'editor')
 96+ tasks = multiprocessing.JoinableQueue()
 97+ for editor in editors:
 98+ tasks.put(editor)
 99+ print 'The queue contains %s editors.' % tasks.qsize()
 100+ tasks.put(None)
 101+ data = active_editor_count(tasks, dbname, collection, month=7)
 102+ keys = data.keys()
 103+ keys.sort()
 104+ headers = ['%s-%s' % (m, k) for k in keys for m in xrange(1, 13)]
 105+ file = '%s_aggrate_active_editor_count.csv' % dbname
 106+ fh = utils.create_txt_filehandle(settings.dataset_location, file, 'w', settings.encoding)
 107+ utils.write_list_to_csv(headers, fh, recursive=False, newline=True)
 108+ utils.write_dict_to_csv(data, fh, keys, write_key=False, newline=True)
 109+ fh.close()
 110+
 111+
 112+if __name__ == '__main__':
 113+ #new_editor_count_launcher('enwiki', 'editors')
 114+ active_editor_count_launcher('enwiki', 'editors')
Property changes on: trunk/tools/editor_trends/analyses/aggregates.py
___________________________________________________________________
Added: svn:eol-style
115115 + native

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r78216Added:...diederik03:06, 11 December 2010

Status & tagging log