Index: trunk/tools/editor_trends/analyses/plugins/histogram_by_backward_cohort.py |
— | — | @@ -0,0 +1,41 @@ |
| 2 | +#!/usr/bin/python |
| 3 | +# -*- coding: utf-8 -*- |
| 4 | +''' |
| 5 | +Copyright (C) 2010 by Diederik van Liere (dvanliere@gmail.com) |
| 6 | +This program is free software; you can redistribute it and/or |
| 7 | +modify it under the terms of the GNU General Public License version 2 |
| 8 | +as published by the Free Software Foundation. |
| 9 | +This program is distributed in the hope that it will be useful, |
| 10 | +but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
| 12 | +See the GNU General Public License for more details, at |
| 13 | +http://www.fsf.org/licenses/gpl.html |
| 14 | +''' |
| 15 | + |
| 16 | +__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ]) |
| 17 | +__email__ = 'dvanliere at gmail dot com' |
| 18 | +__date__ = '2011-01-31' |
| 19 | +__version__ = '0.1' |
| 20 | + |
| 21 | + |
| 22 | +def histogram_by_backward_cohort(var, editor, **kwargs): |
| 23 | + break_down=kwargs.pop('break_down', False) |
| 24 | + new_wikipedian = editor['new_wikipedian'] |
| 25 | + n = editor['edit_count'] |
| 26 | + |
| 27 | + if n >= var.cum_cutoff: |
| 28 | + windows = data_converter.create_windows(var, break_down_first_year=break_down) |
| 29 | + for year in xrange(new_wikipedian.year, var.max_year): |
| 30 | + year = str(year) |
| 31 | + if editor['edits_by_year'][year] >= var.cutoff: |
| 32 | + last_edit = editor['last_edit_by_year'][year] |
| 33 | + if last_edit != 0.0: |
| 34 | + editor_dt = relativedelta(last_edit, new_wikipedian) |
| 35 | + editor_dt = (editor_dt.years * 12) + editor_dt.months |
| 36 | + for w in windows: |
| 37 | + if w >= editor_dt: |
| 38 | + datum = datetime.datetime(int(year), 12, 31) |
| 39 | + freq = editor['edits_by_year'][year] |
| 40 | + var.add(datum, {w:{freq:1}}) |
| 41 | + break |
| 42 | + return var |
\ No newline at end of file |
Index: trunk/tools/editor_trends/analyses/plugins/cohort_dataset_backward_bar.py |
— | — | @@ -30,11 +30,12 @@ |
31 | 31 | value edits. If yes, then include this person in the analysis, else skip the
|
32 | 32 | person.
|
33 | 33 | '''
|
| 34 | + break_down=kwargs.pop('break_down', False)
|
34 | 35 | new_wikipedian = editor['new_wikipedian']
|
35 | 36 | n = editor['edit_count']
|
36 | 37 |
|
37 | 38 | if n >= var.cum_cutoff:
|
38 | | - windows = data_converter.create_windows(var, break_down_first_year=False)
|
| 39 | + windows = data_converter.create_windows(var, break_down_first_year=break_down)
|
39 | 40 | for year in xrange(new_wikipedian.year, var.max_year):
|
40 | 41 | year = str(year)
|
41 | 42 | if editor['edits_by_year'][year] >= var.cutoff:
|
Index: trunk/tools/editor_trends/analyses/analyzer.py |
— | — | @@ -55,7 +55,7 @@ |
56 | 56 | fn = '%s_%s.csv' % (dbname, func.func_name) |
57 | 57 | |
58 | 58 | print 'Storing dataset: %s' % os.path.join(settings.dataset_location, fn) |
59 | | - #ds.write(format='csv') |
| 59 | + ds.write(format='csv') |
60 | 60 | |
61 | 61 | print 'Serializing dataset to %s_%s' % (dbname, 'charts') |
62 | 62 | log.log_to_mongo(ds, 'chart', 'storing', stopwatch, event='start') |
Index: trunk/tools/editor_trends/analyses/dataset.py |
— | — | @@ -138,20 +138,32 @@ |
139 | 139 | tallying a variable. |
140 | 140 | ''' |
141 | 141 | assert isinstance(value, dict), 'The observation that you are adding should be a dictionary.' |
142 | | - self.lock.acquire() |
143 | | - try: |
144 | | - if update: |
145 | | - for k, v in value.iteritems(): |
146 | | - self.data.setdefault(k, 0) |
147 | | - self.data[k] += v |
148 | | - else: |
149 | | - try: |
150 | | - i = max(self.data.keys()) + 1 |
151 | | - except ValueError: |
152 | | - i = 0 |
| 142 | + |
| 143 | + if update: |
| 144 | + for k, v in value.iteritems(): |
| 145 | + if isinstance(v, dict): |
| 146 | + obs = self.data.get(k, Observation(self.date)) |
| 147 | + obs.add(v, update) |
| 148 | + #key = self.__hash__(self.date) |
| 149 | + self.data[k] = obs |
| 150 | + else: |
| 151 | + self.lock.acquire() |
| 152 | + try: |
| 153 | + self.data.setdefault(k, 0) |
| 154 | + self.data[k] += v |
| 155 | + finally: |
| 156 | + self.lock.release() |
| 157 | + else: |
| 158 | + self.lock.acquire() |
| 159 | + try: |
| 160 | + i = max(self.data.keys()) + 1 |
| 161 | + except ValueError: |
| 162 | + i = 0 |
| 163 | + |
| 164 | + try: |
153 | 165 | self.data[i] = value |
154 | | - finally: |
155 | | - self.lock.release() |
| 166 | + finally: |
| 167 | + self.lock.release() |
156 | 168 | |
157 | 169 | |
158 | 170 | |
— | — | @@ -293,7 +305,7 @@ |
294 | 306 | self.language_code = language_code |
295 | 307 | self.hash = self.name |
296 | 308 | self._type = 'dataset' |
297 | | - self.filename = '%s_%s.csv' % (self.project, self.name) |
| 309 | + self.filename = '%s%s_%s.csv' % (self.language_code, self.project, self.name) |
298 | 310 | self.created = datetime.datetime.now() |
299 | 311 | self.format = 'long' |
300 | 312 | for kw in kwargs: |
— | — | @@ -314,6 +326,11 @@ |
315 | 327 | for var in self.variables: |
316 | 328 | yield getattr(self, var) |
317 | 329 | |
| 330 | + def update_filename(self, var): |
| 331 | + attrs = '_'.join(['%s=%s' % (k,v) for k,v in var.iteritems()]) |
| 332 | + return attrs |
| 333 | + |
| 334 | + |
318 | 335 | def add_variable(self, var): |
319 | 336 | if isinstance(var, Variable): |
320 | 337 | self.variables.append(var.name) |
— | — | @@ -417,27 +434,26 @@ |
418 | 435 | d2 = datetime.datetime(2007, 6, 7) |
419 | 436 | ds = Dataset('test', 'wiki', 'editors_dataset', 'en', 'to_bar_json', [ |
420 | 437 | {'name': 'count', 'time_unit': 'year'}, |
421 | | - {'name': 'testest', 'time_unit': 'year'} |
| 438 | + # {'name': 'testest', 'time_unit': 'year'} |
422 | 439 | ]) |
423 | | - ds.count.add(d1, {0:5}) |
424 | | - ds.count.add(d1, {0:135}) |
425 | | - ds.count.add(d2, 514) |
426 | | - ds.testest.add(d1, 135) |
427 | | - ds.testest.add(d2, 535) |
| 440 | + ds.count.add(d1, {0:{1:10}}) |
| 441 | + ds.count.add(d1, {0:{1:135}}) |
| 442 | + ds.count.add(d2, {1: 514}) |
| 443 | + #ds.testest.add(d1, 135) |
| 444 | + #ds.testest.add(d2, 535) |
428 | 445 | #ds.summary() |
429 | 446 | ds.write(format='csv') |
430 | | - v = Variable('test', 'year') |
431 | | - ds.summary() |
| 447 | +# v = Variable('test', 'year') |
432 | 448 | ds.encode() |
433 | 449 | print ds |
434 | 450 | |
435 | | - mongo.test.insert({'variables': ds}) |
| 451 | + # mongo.test.insert({'variables': ds}) |
436 | 452 | |
437 | | - v.add(d2 , 5) |
| 453 | + # v.add(d2 , 5) |
438 | 454 | #o = v.get_observation(d2) |
439 | | - ds = rawdata.find_one({'project': 'wiki', |
440 | | - 'language_code': 'en', |
441 | | - 'hash': 'cohort_dataset_backward_bar'}) |
| 455 | +# ds = rawdata.find_one({'project': 'wiki', |
| 456 | +# 'language_code': 'en', |
| 457 | +# 'hash': 'cohort_dataset_backward_bar'}) |
442 | 458 | |
443 | 459 | |
444 | 460 | if __name__ == '__main__': |