r82764 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r82763‎ \| r82764 \| r82765 >
Date:	23:37, 24 February 2011
Author:	diederik
Status:	deferred
Tags:
Comment:	Fixed a locking situation.
Modified paths:	/trunk/tools/editor_trends/analyses/analyzer.py (modified) (history) /trunk/tools/editor_trends/classes/dataset.py (modified) (history)

Diff [purge]

Index: trunk/tools/editor_trends/analyses/analyzer.py
—	—	@@ -38,7 +38,31 @@
39	39	from utils import timer
40	40	from utils import log
41	41
	42	+class Replicator:
	43	+ def __init__(self, rts, plugin, time_unit, cutoff=None, cum_cutoff=None, **kwargs):
	44	+ self.plugin = plugin
	45	+ self.rts = rts
	46	+ self.time_unit = time_unit
	47	+ if cutoff == None:
	48	+ self.cutoff = [1, 10, 50]
	49	+ else:
	50	+ self.cutoff = cutoff
42	51
	52	+ if cutoff == None:
	53	+ self.cum_cutoff = [10]
	54	+ else:
	55	+ self.cum_cutoff = cum_cutoff
	56	+ self.kwargs = kwargs
	57	+
	58	+ def __call__(self):
	59	+ for cum_cutoff in self.cum_cutoff:
	60	+ for cutoff in self.cutoff:
	61	+ generate_chart_data(self.rts, self.plugin,
	62	+ time_unit=self.time_unit,
	63	+ cutoff=cutoff, cum_cutoff=cum_cutoff,
	64	+ **self.kwargs)
	65	+
	66	+
43	67	class Analyzer(consumers.BaseConsumer):
44	68	def __init__(self, rts, tasks, result, var):
45	69	super(Analyzer, self).__init__(rts, tasks, result)
—	—	@@ -109,9 +133,9 @@
110	134	ds.filename)
111	135	ds.write(format='csv')
112	136	print 'Serializing dataset to %s_%s' % (rts.dbname, 'charts')
113		~~- log.log_to_mongo(rts, 'chart', 'storing', stopwatch, event='start')~~
114		~~- ds.write(format='mongo')~~
115		~~- log.log_to_mongo(rts, 'chart', 'storing', stopwatch, event='finish')~~
	137	+ #log.log_to_mongo(rts, 'chart', 'storing', stopwatch, event='start')
	138	+ #ds.write(format='mongo')
	139	+ #log.log_to_mongo(rts, 'chart', 'storing', stopwatch, event='finish')
116	140
117	141
118	142	def generate_chart_data(rts, func, **kwargs):
—	—	@@ -121,6 +145,8 @@
122	146	'''
123	147	stopwatch = timer.Timer()
124	148	plugin = retrieve_plugin(func)
	149	+ if not plugin:
	150	+ raise 'Plugin function %s is unknown, please make sure that you specify an existing plugin function.' % func
125	151	feedback(plugin, rts)
126	152
127	153	obs = dict()
—	—	@@ -177,11 +203,11 @@
178	204	tasks.join()
179	205
180	206	reconstruct_observations(var)
181		~~- ds = dataset.Dataset(plugin.func_name, rts, format=fmt)~~
	207	+ ds = dataset.Dataset(plugin.func_name, rts, format=fmt, **kwargs)
182	208	ds.add_variable(var)
183	209
184	210	stopwatch.elapsed()
185		~~- #write_output(ds, rts, stopwatch)~~
	211	+ write_output(ds, rts, stopwatch)
186	212
187	213	ds.summary()
188	214	#return True
—	—	@@ -202,7 +228,7 @@
203	229	return min_year, max_year
204	230
205	231
206		~~-if __name__ == '__main__':~~
	232	+def launcher():
207	233	project, language, parser = manager.init_args_parser()
208	234	args = parser.parse_args(['django'])
209	235	rts = runtime_settings.init_environment('wiki', 'en', args)
—	—	@@ -212,15 +238,25 @@
213	239	rts.editors_dataset = 'editors_dataset'
214	240	#END TEMP FIX
215	241
216		~~- generate_chart_data(rts, 'histogram_by_backward_cohort', time_unit='year', cutoff=1, cum_cutoff=10)~~
	242	+# replicator = Replicator(rts, 'histogram_by_backward_cohort', time_unit='year')
	243	+# replicator()
	244	+# replicator = Replicator(rts, 'cohort_dataset_backward_bar', time_unit='year', format='wide')
	245	+# replicator()
	246	+
	247	+# generate_chart_data(rts, 'histogram_by_backward_cohort', time_unit='year', cutoff=1, cum_cutoff=10)
217	248	# generate_chart_data(rts, 'edit_patterns', time_unit='year', cutoff=5)
218	249	# generate_chart_data(rts, 'total_number_of_new_wikipedians', time_unit='year')
219	250	# generate_chart_data(rts, 'total_number_of_articles', time_unit='year')
220	251	# generate_chart_data(rts, 'total_cumulative_edits', time_unit='year')
221		~~-# generate_chart_data(rts, 'cohort_dataset_forward_histogram', time_unit='month', cutoff=1, cum_cutoff=10)~~
222		~~- generate_chart_data(rts, 'cohort_dataset_backward_bar', time_unit='year', cutoff=1, cum_cutoff=10, format='wide')~~
	252	+ generate_chart_data(rts, 'cohort_dataset_forward_histogram', time_unit='month', cutoff=1, cum_cutoff=10)
	253	+# generate_chart_data(rts, 'cohort_dataset_backward_bar', time_unit='year', cutoff=1, cum_cutoff=10, format='wide')
223	254	# generate_chart_data(rts, 'cohort_dataset_forward_bar', time_unit='year', cutoff=5, cum_cutoff=0, format='wide')
224	255	# generate_chart_data(rts, 'histogram_edits', time_unit='year', cutoff=0)
225	256	# generate_chart_data(rts, 'time_to_new_wikipedian', time_unit='year', cutoff=0)
226	257	# generate_chart_data(rts, 'new_editor_count', time_unit='month', cutoff=0)
227	258	# #available_analyses()
	259	+
	260	+
	261	+
	262	+if __name__ == '__main__':
	263	+ launcher()
Index: trunk/tools/editor_trends/classes/dataset.py
—	—	@@ -26,7 +26,7 @@
27	27	import cPickle
28	28	import hashlib
29	29	from pymongo.son_manipulator import SONManipulator
30		~~-from multiprocessing import Manager~~
	30	+from multiprocessing import Manager, RLock
31	31	from texttable import Texttable
32	32	from datetime import timedelta
33	33
—	—	@@ -91,13 +91,12 @@
92	92	This is a generic hash function that expects a list of variables, used
93	93	to lookup an Observation or Variable.
94	94	'''
95		~~- id = '_'.join([str(var) for var in vars])~~
	95	+ return hash('_'.join([str(var) for var in vars]))
96	96	#return id
97		~~- m = hashlib.md5()~~
98		~~- m.update(id)~~
	97	+ #m = hashlib.md5()
	98	+ #m.update(id)
99	99	#print id, m.hexdigest()
100		~~- return m.hexdigest()~~
101		~~- #return ''.join([str(var) for var in vars])~~
	100	+ #return m.hexdigest()
102	101
103	102	def encode_to_bson(self, data=None):
104	103	'''
—	—	@@ -209,20 +208,8 @@
210	209	else:
211	210	self.data += value
212	211	self.count += 1
213		~~-# self.lock.acquire()~~
214		~~-# try:~~
215		~~-# if isinstance(value, list):~~
216		~~-# if self.count == 0:~~
217		~~-# self.data = []~~
218		~~-# self.data.append(value)~~
219		~~-# else:~~
220		~~-# self.data += value~~
221		~~-# finally:~~
222		~~-# self.count += 1~~
223		~~-# self.lock.release()~~
224	212
225	213
226		-
227	214	def get_date_range(self):
228	215	return '%s-%s-%s:%s-%s-%s' % (self.t0.month, self.t0.day, self.t0.year, \
229	216	self.t1.month, self.t1.day, self.t1.year)
—	—	@@ -361,9 +348,9 @@
362	349	'''
363	350
364	351	def __init__(self, chart, rts, vars=None, **kwargs):
365		~~- self.encoder, chart, charts = json_encoders.get_json_encoder(chart)~~
	352	+ self.encoder, chart_type, charts = json_encoders.get_json_encoder(chart)
366	353	if self.encoder == None:
367		~~- raise exceptions.UnknownChartError(chart, charts)~~
	354	+ raise exceptions.UnknownChartError(chart_type, charts)
368	355	self.chart = chart
369	356	self.name = 'Dataset to construct %s' % self.chart
370	357	self.project = rts.project.name
—	—	@@ -427,7 +414,7 @@
428	415	attrs = '_'.join(['%s=%s' % (k, getattr(var, k)) for k in keys])
429	416	filename = '%s%s_%s_%s.csv' % (self.language_code,
430	417	self.project,
431		~~- self.name,~~
	418	+ self.chart,
432	419	attrs)
433	420	self.filename = filename
434	421
—	—	@@ -467,9 +454,15 @@
468	455	def to_csv(self):
469	456	data = data_converter.convert_dataset_to_lists(self, 'manage')
470	457	headers = data_converter.add_headers(self)
471		~~- fh = file_utils.create_txt_filehandle(settings.dataset_location, self.filename, 'w', settings.encoding)~~
	458	+ lock = RLock()
	459	+ fh = file_utils.create_txt_filehandle(settings.dataset_location,
	460	+ self.filename,
	461	+ 'w',
	462	+ settings.encoding)
472	463	file_utils.write_list_to_csv(headers, fh, recursive=False, newline=True)
473		~~- file_utils.write_list_to_csv(data, fh, recursive=False, newline=True, format=self.format)~~
	464	+ file_utils.write_list_to_csv(data, fh, recursive=False, newline=True,
	465	+ format=self.format,
	466	+ lock=lock)
474	467	fh.close()
475	468
476	469	def encode(self):

Status & tagging log

23:38, 24 February 2011 Reedy (talk | contribs) changed the status of r82764 [removed: new added: deferred]