r81304 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r81303‎ | r81304 | r81305 >
Date:06:15, 1 February 2011
Author:diederik
Status:deferred
Tags:
Comment:
Thread safe Dataset and Variable classes and a new plugin to create histograms per cohort.
Modified paths:
  • /trunk/tools/editor_trends/analyses/analyzer.py (modified) (history)
  • /trunk/tools/editor_trends/analyses/dataset.py (modified) (history)
  • /trunk/tools/editor_trends/analyses/plugins/cohort_dataset_backward_bar.py (modified) (history)
  • /trunk/tools/editor_trends/analyses/plugins/histogram_by_backward_cohort.py (added) (history)

Diff [purge]

Index: trunk/tools/editor_trends/analyses/plugins/histogram_by_backward_cohort.py
@@ -0,0 +1,41 @@
 2+#!/usr/bin/python
 3+# -*- coding: utf-8 -*-
 4+'''
 5+Copyright (C) 2010 by Diederik van Liere (dvanliere@gmail.com)
 6+This program is free software; you can redistribute it and/or
 7+modify it under the terms of the GNU General Public License version 2
 8+as published by the Free Software Foundation.
 9+This program is distributed in the hope that it will be useful,
 10+but WITHOUT ANY WARRANTY; without even the implied warranty of
 11+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 12+See the GNU General Public License for more details, at
 13+http://www.fsf.org/licenses/gpl.html
 14+'''
 15+
 16+__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ])
 17+__email__ = 'dvanliere at gmail dot com'
 18+__date__ = '2011-01-31'
 19+__version__ = '0.1'
 20+
 21+
 22+def histogram_by_backward_cohort(var, editor, **kwargs):
 23+ break_down=kwargs.pop('break_down', False)
 24+ new_wikipedian = editor['new_wikipedian']
 25+ n = editor['edit_count']
 26+
 27+ if n >= var.cum_cutoff:
 28+ windows = data_converter.create_windows(var, break_down_first_year=break_down)
 29+ for year in xrange(new_wikipedian.year, var.max_year):
 30+ year = str(year)
 31+ if editor['edits_by_year'][year] >= var.cutoff:
 32+ last_edit = editor['last_edit_by_year'][year]
 33+ if last_edit != 0.0:
 34+ editor_dt = relativedelta(last_edit, new_wikipedian)
 35+ editor_dt = (editor_dt.years * 12) + editor_dt.months
 36+ for w in windows:
 37+ if w >= editor_dt:
 38+ datum = datetime.datetime(int(year), 12, 31)
 39+ freq = editor['edits_by_year'][year]
 40+ var.add(datum, {w:{freq:1}})
 41+ break
 42+ return var
\ No newline at end of file
Index: trunk/tools/editor_trends/analyses/plugins/cohort_dataset_backward_bar.py
@@ -30,11 +30,12 @@
3131 value edits. If yes, then include this person in the analysis, else skip the
3232 person.
3333 '''
 34+ break_down=kwargs.pop('break_down', False)
3435 new_wikipedian = editor['new_wikipedian']
3536 n = editor['edit_count']
3637
3738 if n >= var.cum_cutoff:
38 - windows = data_converter.create_windows(var, break_down_first_year=False)
 39+ windows = data_converter.create_windows(var, break_down_first_year=break_down)
3940 for year in xrange(new_wikipedian.year, var.max_year):
4041 year = str(year)
4142 if editor['edits_by_year'][year] >= var.cutoff:
Index: trunk/tools/editor_trends/analyses/analyzer.py
@@ -55,7 +55,7 @@
5656 fn = '%s_%s.csv' % (dbname, func.func_name)
5757
5858 print 'Storing dataset: %s' % os.path.join(settings.dataset_location, fn)
59 - #ds.write(format='csv')
 59+ ds.write(format='csv')
6060
6161 print 'Serializing dataset to %s_%s' % (dbname, 'charts')
6262 log.log_to_mongo(ds, 'chart', 'storing', stopwatch, event='start')
Index: trunk/tools/editor_trends/analyses/dataset.py
@@ -138,20 +138,32 @@
139139 tallying a variable.
140140 '''
141141 assert isinstance(value, dict), 'The observation that you are adding should be a dictionary.'
142 - self.lock.acquire()
143 - try:
144 - if update:
145 - for k, v in value.iteritems():
146 - self.data.setdefault(k, 0)
147 - self.data[k] += v
148 - else:
149 - try:
150 - i = max(self.data.keys()) + 1
151 - except ValueError:
152 - i = 0
 142+
 143+ if update:
 144+ for k, v in value.iteritems():
 145+ if isinstance(v, dict):
 146+ obs = self.data.get(k, Observation(self.date))
 147+ obs.add(v, update)
 148+ #key = self.__hash__(self.date)
 149+ self.data[k] = obs
 150+ else:
 151+ self.lock.acquire()
 152+ try:
 153+ self.data.setdefault(k, 0)
 154+ self.data[k] += v
 155+ finally:
 156+ self.lock.release()
 157+ else:
 158+ self.lock.acquire()
 159+ try:
 160+ i = max(self.data.keys()) + 1
 161+ except ValueError:
 162+ i = 0
 163+
 164+ try:
153165 self.data[i] = value
154 - finally:
155 - self.lock.release()
 166+ finally:
 167+ self.lock.release()
156168
157169
158170
@@ -293,7 +305,7 @@
294306 self.language_code = language_code
295307 self.hash = self.name
296308 self._type = 'dataset'
297 - self.filename = '%s_%s.csv' % (self.project, self.name)
 309+ self.filename = '%s%s_%s.csv' % (self.language_code, self.project, self.name)
298310 self.created = datetime.datetime.now()
299311 self.format = 'long'
300312 for kw in kwargs:
@@ -314,6 +326,11 @@
315327 for var in self.variables:
316328 yield getattr(self, var)
317329
 330+ def update_filename(self, var):
 331+ attrs = '_'.join(['%s=%s' % (k,v) for k,v in var.iteritems()])
 332+ return attrs
 333+
 334+
318335 def add_variable(self, var):
319336 if isinstance(var, Variable):
320337 self.variables.append(var.name)
@@ -417,27 +434,26 @@
418435 d2 = datetime.datetime(2007, 6, 7)
419436 ds = Dataset('test', 'wiki', 'editors_dataset', 'en', 'to_bar_json', [
420437 {'name': 'count', 'time_unit': 'year'},
421 - {'name': 'testest', 'time_unit': 'year'}
 438+ # {'name': 'testest', 'time_unit': 'year'}
422439 ])
423 - ds.count.add(d1, {0:5})
424 - ds.count.add(d1, {0:135})
425 - ds.count.add(d2, 514)
426 - ds.testest.add(d1, 135)
427 - ds.testest.add(d2, 535)
 440+ ds.count.add(d1, {0:{1:10}})
 441+ ds.count.add(d1, {0:{1:135}})
 442+ ds.count.add(d2, {1: 514})
 443+ #ds.testest.add(d1, 135)
 444+ #ds.testest.add(d2, 535)
428445 #ds.summary()
429446 ds.write(format='csv')
430 - v = Variable('test', 'year')
431 - ds.summary()
 447+# v = Variable('test', 'year')
432448 ds.encode()
433449 print ds
434450
435 - mongo.test.insert({'variables': ds})
 451+ # mongo.test.insert({'variables': ds})
436452
437 - v.add(d2 , 5)
 453+ # v.add(d2 , 5)
438454 #o = v.get_observation(d2)
439 - ds = rawdata.find_one({'project': 'wiki',
440 - 'language_code': 'en',
441 - 'hash': 'cohort_dataset_backward_bar'})
 455+# ds = rawdata.find_one({'project': 'wiki',
 456+# 'language_code': 'en',
 457+# 'hash': 'cohort_dataset_backward_bar'})
442458
443459
444460 if __name__ == '__main__':

Status & tagging log