r82764 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r82763‎ | r82764 | r82765 >
Date:23:37, 24 February 2011
Author:diederik
Status:deferred
Tags:
Comment:
Fixed a locking situation.
Modified paths:
  • /trunk/tools/editor_trends/analyses/analyzer.py (modified) (history)
  • /trunk/tools/editor_trends/classes/dataset.py (modified) (history)

Diff [purge]

Index: trunk/tools/editor_trends/analyses/analyzer.py
@@ -38,7 +38,31 @@
3939 from utils import timer
4040 from utils import log
4141
 42+class Replicator:
 43+ def __init__(self, rts, plugin, time_unit, cutoff=None, cum_cutoff=None, **kwargs):
 44+ self.plugin = plugin
 45+ self.rts = rts
 46+ self.time_unit = time_unit
 47+ if cutoff == None:
 48+ self.cutoff = [1, 10, 50]
 49+ else:
 50+ self.cutoff = cutoff
4251
 52+ if cutoff == None:
 53+ self.cum_cutoff = [10]
 54+ else:
 55+ self.cum_cutoff = cum_cutoff
 56+ self.kwargs = kwargs
 57+
 58+ def __call__(self):
 59+ for cum_cutoff in self.cum_cutoff:
 60+ for cutoff in self.cutoff:
 61+ generate_chart_data(self.rts, self.plugin,
 62+ time_unit=self.time_unit,
 63+ cutoff=cutoff, cum_cutoff=cum_cutoff,
 64+ **self.kwargs)
 65+
 66+
4367 class Analyzer(consumers.BaseConsumer):
4468 def __init__(self, rts, tasks, result, var):
4569 super(Analyzer, self).__init__(rts, tasks, result)
@@ -109,9 +133,9 @@
110134 ds.filename)
111135 ds.write(format='csv')
112136 print 'Serializing dataset to %s_%s' % (rts.dbname, 'charts')
113 - log.log_to_mongo(rts, 'chart', 'storing', stopwatch, event='start')
114 - ds.write(format='mongo')
115 - log.log_to_mongo(rts, 'chart', 'storing', stopwatch, event='finish')
 137+ #log.log_to_mongo(rts, 'chart', 'storing', stopwatch, event='start')
 138+ #ds.write(format='mongo')
 139+ #log.log_to_mongo(rts, 'chart', 'storing', stopwatch, event='finish')
116140
117141
118142 def generate_chart_data(rts, func, **kwargs):
@@ -121,6 +145,8 @@
122146 '''
123147 stopwatch = timer.Timer()
124148 plugin = retrieve_plugin(func)
 149+ if not plugin:
 150+ raise 'Plugin function %s is unknown, please make sure that you specify an existing plugin function.' % func
125151 feedback(plugin, rts)
126152
127153 obs = dict()
@@ -177,11 +203,11 @@
178204 tasks.join()
179205
180206 reconstruct_observations(var)
181 - ds = dataset.Dataset(plugin.func_name, rts, format=fmt)
 207+ ds = dataset.Dataset(plugin.func_name, rts, format=fmt, **kwargs)
182208 ds.add_variable(var)
183209
184210 stopwatch.elapsed()
185 - #write_output(ds, rts, stopwatch)
 211+ write_output(ds, rts, stopwatch)
186212
187213 ds.summary()
188214 #return True
@@ -202,7 +228,7 @@
203229 return min_year, max_year
204230
205231
206 -if __name__ == '__main__':
 232+def launcher():
207233 project, language, parser = manager.init_args_parser()
208234 args = parser.parse_args(['django'])
209235 rts = runtime_settings.init_environment('wiki', 'en', args)
@@ -212,15 +238,25 @@
213239 rts.editors_dataset = 'editors_dataset'
214240 #END TEMP FIX
215241
216 - generate_chart_data(rts, 'histogram_by_backward_cohort', time_unit='year', cutoff=1, cum_cutoff=10)
 242+# replicator = Replicator(rts, 'histogram_by_backward_cohort', time_unit='year')
 243+# replicator()
 244+# replicator = Replicator(rts, 'cohort_dataset_backward_bar', time_unit='year', format='wide')
 245+# replicator()
 246+
 247+# generate_chart_data(rts, 'histogram_by_backward_cohort', time_unit='year', cutoff=1, cum_cutoff=10)
217248 # generate_chart_data(rts, 'edit_patterns', time_unit='year', cutoff=5)
218249 # generate_chart_data(rts, 'total_number_of_new_wikipedians', time_unit='year')
219250 # generate_chart_data(rts, 'total_number_of_articles', time_unit='year')
220251 # generate_chart_data(rts, 'total_cumulative_edits', time_unit='year')
221 -# generate_chart_data(rts, 'cohort_dataset_forward_histogram', time_unit='month', cutoff=1, cum_cutoff=10)
222 - generate_chart_data(rts, 'cohort_dataset_backward_bar', time_unit='year', cutoff=1, cum_cutoff=10, format='wide')
 252+ generate_chart_data(rts, 'cohort_dataset_forward_histogram', time_unit='month', cutoff=1, cum_cutoff=10)
 253+# generate_chart_data(rts, 'cohort_dataset_backward_bar', time_unit='year', cutoff=1, cum_cutoff=10, format='wide')
223254 # generate_chart_data(rts, 'cohort_dataset_forward_bar', time_unit='year', cutoff=5, cum_cutoff=0, format='wide')
224255 # generate_chart_data(rts, 'histogram_edits', time_unit='year', cutoff=0)
225256 # generate_chart_data(rts, 'time_to_new_wikipedian', time_unit='year', cutoff=0)
226257 # generate_chart_data(rts, 'new_editor_count', time_unit='month', cutoff=0)
227258 # #available_analyses()
 259+
 260+
 261+
 262+if __name__ == '__main__':
 263+ launcher()
Index: trunk/tools/editor_trends/classes/dataset.py
@@ -26,7 +26,7 @@
2727 import cPickle
2828 import hashlib
2929 from pymongo.son_manipulator import SONManipulator
30 -from multiprocessing import Manager
 30+from multiprocessing import Manager, RLock
3131 from texttable import Texttable
3232 from datetime import timedelta
3333
@@ -91,13 +91,12 @@
9292 This is a generic hash function that expects a list of variables, used
9393 to lookup an Observation or Variable.
9494 '''
95 - id = '_'.join([str(var) for var in vars])
 95+ return hash('_'.join([str(var) for var in vars]))
9696 #return id
97 - m = hashlib.md5()
98 - m.update(id)
 97+ #m = hashlib.md5()
 98+ #m.update(id)
9999 #print id, m.hexdigest()
100 - return m.hexdigest()
101 - #return ''.join([str(var) for var in vars])
 100+ #return m.hexdigest()
102101
103102 def encode_to_bson(self, data=None):
104103 '''
@@ -209,20 +208,8 @@
210209 else:
211210 self.data += value
212211 self.count += 1
213 -# self.lock.acquire()
214 -# try:
215 -# if isinstance(value, list):
216 -# if self.count == 0:
217 -# self.data = []
218 -# self.data.append(value)
219 -# else:
220 -# self.data += value
221 -# finally:
222 -# self.count += 1
223 -# self.lock.release()
224212
225213
226 -
227214 def get_date_range(self):
228215 return '%s-%s-%s:%s-%s-%s' % (self.t0.month, self.t0.day, self.t0.year, \
229216 self.t1.month, self.t1.day, self.t1.year)
@@ -361,9 +348,9 @@
362349 '''
363350
364351 def __init__(self, chart, rts, vars=None, **kwargs):
365 - self.encoder, chart, charts = json_encoders.get_json_encoder(chart)
 352+ self.encoder, chart_type, charts = json_encoders.get_json_encoder(chart)
366353 if self.encoder == None:
367 - raise exceptions.UnknownChartError(chart, charts)
 354+ raise exceptions.UnknownChartError(chart_type, charts)
368355 self.chart = chart
369356 self.name = 'Dataset to construct %s' % self.chart
370357 self.project = rts.project.name
@@ -427,7 +414,7 @@
428415 attrs = '_'.join(['%s=%s' % (k, getattr(var, k)) for k in keys])
429416 filename = '%s%s_%s_%s.csv' % (self.language_code,
430417 self.project,
431 - self.name,
 418+ self.chart,
432419 attrs)
433420 self.filename = filename
434421
@@ -467,9 +454,15 @@
468455 def to_csv(self):
469456 data = data_converter.convert_dataset_to_lists(self, 'manage')
470457 headers = data_converter.add_headers(self)
471 - fh = file_utils.create_txt_filehandle(settings.dataset_location, self.filename, 'w', settings.encoding)
 458+ lock = RLock()
 459+ fh = file_utils.create_txt_filehandle(settings.dataset_location,
 460+ self.filename,
 461+ 'w',
 462+ settings.encoding)
472463 file_utils.write_list_to_csv(headers, fh, recursive=False, newline=True)
473 - file_utils.write_list_to_csv(data, fh, recursive=False, newline=True, format=self.format)
 464+ file_utils.write_list_to_csv(data, fh, recursive=False, newline=True,
 465+ format=self.format,
 466+ lock=lock)
474467 fh.close()
475468
476469 def encode(self):

Status & tagging log