r82356 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r82355‎ | r82356 | r82357 >
Date:22:33, 17 February 2011
Author:diederik
Status:deferred
Tags:
Comment:
The plugin architecture including Variable and Observation are synchronized and work with multiprocessing now as well.
Modified paths:
  • /trunk/tools/editor_trends/analyses/analyzer.py (modified) (history)
  • /trunk/tools/editor_trends/classes/consumers.py (modified) (history)
  • /trunk/tools/editor_trends/classes/dataset.py (modified) (history)

Diff [purge]

Index: trunk/tools/editor_trends/analyses/analyzer.py
@@ -17,8 +17,10 @@
1818 __date__ = '2010-12-10'
1919 __version__ = '0.1'
2020
21 -
 21+from multiprocessing import JoinableQueue, Lock, Manager, RLock
 22+from Queue import Empty
2223 import sys
 24+import cPickle
2325 import os
2426 import progressbar
2527 import datetime
@@ -27,83 +29,151 @@
2830 sys.path.append('..')
2931
3032 import inventory
 33+import manage as manager
3134 from classes import dataset
32 -from classes import settings
33 -settings = settings.Settings()
 35+from classes import runtime_settings
 36+from classes import consumers
3437 from database import db
3538 from utils import timer
3639 from utils import log
3740
 41+class Analyzer(consumers.BaseConsumer):
3842
 43+ def __init__(self, rts, tasks, result, var):
 44+ super(Analyzer, self).__init__(rts, tasks, result)
 45+ self.var = var
3946
40 -def generate_chart_data(project, collection, language_code, func, encoder, **kwargs):
41 - '''
42 - This is the entry function to be called to generate data for creating charts.
43 - '''
44 - stopwatch = timer.Timer()
45 - res = True
46 - dbname = '%s%s' % (language_code, project)
 47+ def convert_synchronized_objects(self):
 48+ for obs in self.var:
 49+ obs = self.var[obs]
 50+ obs.data = obs.data.value
 51+
 52+ def store(self):
 53+ #self.convert_synchronized_objects()
 54+ location = os.path.join(self.rts.binary_location, '%s_%s.bin' % (self.var.name, self.name))
 55+ fh = open(location, 'wb')
 56+ cPickle.dump(self.var, fh)
 57+ fh.close()
 58+
 59+ def run(self):
 60+ '''
 61+ Generic loop function that loops over all the editors of a Wikipedia
 62+ project and then calls the function that does the actual aggregation.
 63+ '''
 64+ mongo = db.init_mongo_db(self.rts.dbname)
 65+ coll = mongo[self.rts.editors_dataset]
 66+ while True:
 67+ try:
 68+ task = self.tasks.get(block=False)
 69+ self.tasks.task_done()
 70+ if task == None:
 71+ #print self.var.number_of_obs(), len(self.var.obs)
 72+ #self.store()
 73+ self.result.put(self.var)
 74+ break
 75+ editor = coll.find_one({'editor': task.editor})
 76+
 77+ task.plugin(self.var, editor, dbname=self.rts.dbname)
 78+ self.result.put(True)
 79+ except Empty:
 80+ pass
 81+
 82+class Task:
 83+ def __init__(self, plugin, editor):
 84+ self.plugin = plugin
 85+ self.editor = editor
 86+
 87+
 88+def retrieve_plugin(func):
4789 functions = inventory.available_analyses()
4890 try:
49 - func = functions[func]
 91+ return functions[func]
5092 except KeyError:
5193 return False
5294
53 - print 'Exporting data for chart: %s' % func.func_name
54 - print 'Project: %s' % dbname
55 - print 'Dataset: %s' % collection
5695
57 - ds = loop_editors(dbname, project, collection, language_code, func, encoder, **kwargs)
 96+def feedback(plugin, rts):
 97+ print 'Exporting data for chart: %s' % plugin.func_name
 98+ print 'Project: %s' % rts.dbname
 99+ print 'Dataset: %s' % rts.editors_dataset
 100+
 101+
 102+def write_output(ds, rts, stopwatch):
58103 ds.create_filename()
59 - print 'Storing dataset: %s' % os.path.join(settings.dataset_location, ds.filename)
 104+ print 'Storing dataset: %s' % os.path.join(rts.dataset_location,
 105+ ds.filename)
60106 ds.write(format='csv')
61 -
62 - print 'Serializing dataset to %s_%s' % (dbname, 'charts')
63 - log.log_to_mongo(ds, 'chart', 'storing', stopwatch, event='start')
 107+ print 'Serializing dataset to %s_%s' % (rts.dbname, 'charts')
 108+ log.log_to_mongo(rts, 'chart', 'storing', stopwatch, event='start')
64109 ds.write(format='mongo')
65 - stopwatch.elapsed()
66 - log.log_to_mongo(ds, 'chart', 'storing', stopwatch, event='finish')
 110+ log.log_to_mongo(rts, 'chart', 'storing', stopwatch, event='finish')
67111
68 - ds.summary()
69 - return res
70112
71 -
72 -def loop_editors(dbname, project, collection, language_code, func, encoder, **kwargs):
 113+def generate_chart_data(rts, func, **kwargs):
73114 '''
74 - Generic loop function that loops over all the editors of a Wikipedia project
75 - and then calls the function that does the actual aggregation.
 115+ This is the entry function to be called to generate data for creating
 116+ charts.
76117 '''
77 - mongo = db.init_mongo_db(dbname)
78 - coll = mongo[collection]
79 - editors = db.retrieve_distinct_keys(dbname, collection, 'editor')
 118+ stopwatch = timer.Timer()
 119+ plugin = retrieve_plugin(func)
 120+ feedback(plugin, rts)
80121
81122
82 - min_year, max_year = determine_project_year_range(dbname, collection, 'new_wikipedian')
83 - pbar = progressbar.ProgressBar(maxval=len(editors)).start()
84 - print 'Number of editors: %s' % len(editors)
85 -
 123+ tasks = JoinableQueue()
 124+ result = JoinableQueue()
 125+ mgr = Manager()
 126+ lock = mgr.RLock()
 127+ editors = db.retrieve_distinct_keys(rts.dbname, rts.editors_dataset, 'editor')
 128+ min_year, max_year = determine_project_year_range(rts.dbname,
 129+ rts.editors_dataset,
 130+ 'new_wikipedian')
86131 fmt = kwargs.pop('format', 'long')
 132+ time_unit = kwargs.pop('time_unit', 'year')
87133 kwargs['min_year'] = min_year
88134 kwargs['max_year'] = max_year
89 - variables = []
90 - ds = dataset.Dataset(func.func_name,
91 - project,
92 - coll.name,
93 - language_code,
94 - encoder,
95 - variables,
96 - format=fmt)
97 - var = dataset.Variable('count', **kwargs)
98135
 136+ pbar = progressbar.ProgressBar(maxval=len(editors)).start()
 137+ var = dataset.Variable('count', time_unit, lock, **kwargs)
 138+
99139 for editor in editors:
100 - editor = coll.find_one({'editor': editor})
101 - var = func(var, editor, dbname=dbname)
102 - pbar.update(pbar.currval + 1)
 140+ tasks.put(Task(plugin, editor))
103141
 142+ consumers = [Analyzer(rts, tasks, result, var) for
 143+ x in xrange(rts.number_of_processes)]
 144+
 145+ for x in xrange(rts.number_of_processes):
 146+ tasks.put(None)
 147+
 148+ for w in consumers:
 149+ w.start()
 150+
 151+ ppills = rts.number_of_processes
 152+ while True:
 153+ while ppills > 0:
 154+ try:
 155+ res = result.get(block=True)
 156+ if res == True:
 157+ pbar.update(pbar.currval + 1)
 158+ else:
 159+ ppills -= 1
 160+ var = res
 161+ except Empty:
 162+ pass
 163+ break
 164+
 165+
 166+ tasks.join()
 167+ ds = dataset.Dataset(plugin.func_name, rts, format=fmt)
 168+ #var = consumers[0].var
104169 ds.add_variable(var)
105 - return ds
106170
 171+ stopwatch.elapsed()
 172+ write_output(ds, rts, stopwatch)
107173
 174+ ds.summary()
 175+ return True
 176+
 177+
108178 def determine_project_year_range(dbname, collection, var):
109179 '''
110180 Determine the first and final year for the observed data
@@ -120,16 +190,24 @@
121191
122192
123193 if __name__ == '__main__':
124 - generate_chart_data('wiki', 'editors_dataset', 'en', 'histogram_by_backward_cohort', 'to_bar_json', time_unit='year', cutoff=0, cum_cutoff=50)
125 - #generate_chart_data('wiki', 'editors_dataset', 'en', 'edit_patterns', 'to_bar_json', time_unit='year', cutoff=5)
126 - #generate_chart_data('wiki', 'editors_dataset', 'en', 'total_number_of_new_wikipedians', 'to_bar_json', time_unit='year')
127 - #generate_chart_data('wiki', 'editors', 'en', 'total_number_of_articles', 'to_bar_json', time_unit='year')
128 - #generate_chart_data('wiki', 'editors_dataset', 'en', 'total_cumulative_edits', 'to_bar_json', time_unit='year')
129 - #generate_chart_data('wiki', 'editors_dataset', 'en', 'cohort_dataset_forward_histogram', 'to_bar_json', time_unit='month', cutoff=5, cum_cutoff=0)
130 - #generate_chart_data('wiki', 'editors_dataset', 'en', 'cohort_dataset_backward_bar', 'to_stacked_bar_json', time_unit='year', cutoff=10, cum_cutoff=0, format='wide')
131 - #generate_chart_data('wiki', 'editors_dataset', 'en', 'cohort_dataset_forward_bar', 'to_stacked_bar_json', time_unit='year', cutoff=5, cum_cutoff=0, format='wide')
132 - #generate_chart_data('wiki', 'editors_dataset', 'en', 'histogram_edits', 'to_bar_json', time_unit='year', cutoff=0)
133 - #generate_chart_data('wiki', 'editors_dataset', 'en', 'time_to_new_wikipedian', 'to_bar_json', time_unit='year', cutoff=0)
134 - #generate_chart_data('wiki', 'editors_dataset', 'en', 'new_editor_count', 'to_bar_json', time_unit='month', cutoff=0)
 194+ project, language, parser = manager.init_args_parser()
 195+ args = parser.parse_args(['django'])
 196+ rts = runtime_settings.init_environment('wiki', 'en', args)
135197
136 - #available_analyses()
 198+ #TEMP FIX, REMOVE
 199+ rts.dbname = 'enwiki'
 200+ rts.editors_dataset = 'editors_dataset'
 201+ #END TEMP FIX
 202+
 203+ generate_chart_data(rts, 'histogram_by_backward_cohort', time_unit='year', cutoff=1, cum_cutoff=10)
 204+# generate_chart_data(rts, 'edit_patterns', time_unit='year', cutoff=5)
 205+# generate_chart_data(rts, 'total_number_of_new_wikipedians', time_unit='year')
 206+# generate_chart_data(rts, 'total_number_of_articles', time_unit='year')
 207+# generate_chart_data(rts, 'total_cumulative_edits', time_unit='year')
 208+# generate_chart_data(rts, 'cohort_dataset_forward_histogram', time_unit='month', cutoff=5, cum_cutoff=0)
 209+# generate_chart_data(rts, 'cohort_dataset_backward_bar', time_unit='year', cutoff=10, cum_cutoff=0, format='wide')
 210+# generate_chart_data(rts, 'cohort_dataset_forward_bar', time_unit='year', cutoff=5, cum_cutoff=0, format='wide')
 211+# generate_chart_data(rts, 'histogram_edits', time_unit='year', cutoff=0)
 212+# generate_chart_data(rts, 'time_to_new_wikipedian', time_unit='year', cutoff=0)
 213+# generate_chart_data(rts, 'new_editor_count', time_unit='month', cutoff=0)
 214+# #available_analyses()
Index: trunk/tools/editor_trends/classes/consumers.py
@@ -26,5 +26,3 @@
2727 self.rts = rts
2828 self.tasks = tasks
2929 self.result = result
30 -
31 -
Index: trunk/tools/editor_trends/classes/dataset.py
@@ -25,8 +25,9 @@
2626 import sys
2727 import hashlib
2828 from pymongo.son_manipulator import SONManipulator
29 -from multiprocessing import Lock
 29+from multiprocessing import RLock, Array, Value
3030 from texttable import Texttable
 31+from datetime import timedelta
3132
3233
3334 if '..' not in sys.path:
@@ -90,6 +91,7 @@
9192 to lookup an Observation or Variable.
9293 '''
9394 id = '_'.join([str(var) for var in vars])
 95+ #return id
9496 m = hashlib.md5()
9597 m.update(id)
9698 #print id, m.hexdigest()
@@ -121,7 +123,8 @@
122124 of the date
123125 '''
124126 assert self.time_unit == 'year' or self.time_unit == 'month' \
125 - or self.time_unit == 'day', 'Time unit should either be year, month or day.'
 127+ or self.time_unit == 'day', \
 128+ 'Time unit should either be year, month or day.'
126129
127130 if self.time_unit == 'year':
128131 datum = datetime.datetime(date.year, 1, 1)
@@ -139,24 +142,29 @@
140143 Determine the width of a date range for an observation.
141144 '''
142145 if self.time_unit == 'year':
143 - return datetime.datetime(date.year, 12, 31), datetime.datetime(date.year, 1, 1)
 146+ return datetime.datetime(date.year, 12, 31), \
 147+ datetime.datetime(date.year, 1, 1)
144148 elif self.time_unit == 'month':
145149 day = calendar.monthrange(date.year, date.month)[1]
146 - return datetime.datetime(date.year, date.month, day), datetime.datetime(date.year, date.month, 1)
 150+ return datetime.datetime(date.year, date.month, day), \
 151+ datetime.datetime(date.year, date.month, 1)
147152 else:
148 - return datetime.datetime(date.year, date.month, date.day), datetime.datetime(date.year, date.month, date.day)
 153+ return datetime.datetime(date.year, date.month, date.day), \
 154+ datetime.datetime(date.year, date.month, date.day)
149155
150156
151157 class Observation(Data):
152 - lock = Lock()
153158 '''
154159 The smallest unit, here the actual data is being stored.
155160 Time_unit should either be 'year', 'month' or 'day'.
156161 '''
157162 def __init__(self, date, time_unit, id, meta):
158 - assert isinstance(date, datetime.datetime), 'Date variable should be a datetime.datetime instance.'
 163+ assert isinstance(date, datetime.datetime), '''Date variable should be
 164+ a datetime.datetime instance.'''
 165+ #self.lock = lock #Lock()
159166 self.date = date
160167 self.data = 0
 168+ #self.data = Value('i', 0)
161169 self.time_unit = time_unit
162170 self.t1, self.t0 = self.set_date_range(date)
163171 self.id = id
@@ -164,7 +172,8 @@
165173 self.count = 0
166174 for mt in meta:
167175 if isinstance(mt, float):
168 - raise Exception, 'Mongo does not allow a dot "." in the name of a key, please use an integer or string as key.'
 176+ raise Exception, '''Mongo does not allow a dot "." in the name
 177+ of a key, please use an integer or string as key.'''
169178 elif not isinstance(mt, list):
170179 setattr(self, mt, meta[mt])
171180 self.props.append(mt)
@@ -174,7 +183,9 @@
175184 return '%s' % self.date
176185
177186 def __str__(self):
178 - return 'range: %s:%s' % (self.t0, self.t1)
 187+ return 'range: %s-%s-%s : %s-%s-%s' % (self.t0.month, self.t0.day, \
 188+ self.t0.year, self.t1.month, \
 189+ self.t1.day, self.t1.year)
179190
180191 def __iter__(self):
181192 for obs in self.data:
@@ -186,17 +197,19 @@
187198 def add(self, value):
188199 '''
189200 '''
190 - self.lock.acquire()
191 - try:
192 - if isinstance(value, list):
193 - if self.count == 0:
194 - self.data = []
195 - self.data.append(value)
196 - else:
197 - self.data += value
198 - finally:
199 - self.count += 1
200 - self.lock.release()
 201+ #self.lock.acquire()
 202+ #try:
 203+ if isinstance(value, list):
 204+ if self.count == 0:
 205+ self.data = []
 206+ #self.data = Array('i', 0)
 207+ self.data.append(value)
 208+ else:
 209+ self.data += value
 210+ #self.data.value += value
 211+ #finally:
 212+ self.count += 1
 213+ #self.lock.release()
201214
202215
203216 def get_date_range(self):
@@ -207,10 +220,9 @@
208221 '''
209222 This class constructs a time-based variable.
210223 '''
211 -
212 - def __init__(self, name, time_unit, **kwargs):
 224+ def __init__(self, name, time_unit, lock, **kwargs):
213225 self.name = name
214 - self.lock = Lock()
 226+ self.lock = lock
215227 self.obs = {}
216228 self.time_unit = time_unit
217229 self.groupbys = []
@@ -249,7 +261,6 @@
250262 for key in self:
251263 yield (key, self.obs[key])
252264
253 -
254265 def get_data(self):
255266 return [o for o in self.itervalues()]
256267
@@ -257,6 +268,8 @@
258269 self.lock.acquire()
259270 try:
260271 obs = self.obs.get(id, Observation(date, self.time_unit, id, meta))
 272+ #self.obs[id] = obs
 273+ x = len(self.obs)
261274 finally:
262275 self.lock.release()
263276 return obs
@@ -264,10 +277,10 @@
265278 def add(self, date, value, meta={}):
266279 '''
267280 The add function is used to add an observation to a variable. An
268 - observation is always grouped by the combination of the date and time_unit.
269 - Time_unit is a property of a Variable and indicates how granular the
270 - observations should be grouped. For example, if time_unit == year then
271 - all observations in a given year will be grouped.
 281+ observation is always grouped by the combination of the date and
 282+ time_unit. Time_unit is a property of a Variable and indicates how
 283+ granular the observations should be grouped. For example, if
 284+ time_unit == year then all observations in a given year will be grouped.
272285 When calling add you should supply at least two variables:
273286 1) date: when did the observation happen
274287 2) value: an integer or float that was observed on that date
@@ -276,25 +289,25 @@
277290 For example, if you add {'experience': 3} as the meta dict when calling
278291 add then you will create an extra grouping called experience and all
279292 future observations who fall in the same date range and the same
280 - exerience level will be grouped by that particular observation. You
281 - can use as many extra groupings as you want but usually one extra grouping
282 - should be enough.
 293+ exerience level, in this case 3, will be grouped by that particular
 294+ observation. You can use as many extra groupings as you want but
 295+ usually one extra grouping should be enough.
283296 '''
284 - assert isinstance(meta, dict), 'The meta variable should be a dict (either empty or with variables to group by.'
 297+ assert isinstance(meta, dict), '''The meta variable should be a dict
 298+ (either empty or with variables to group by.'''
285299 start, end = self.set_date_range(date)
286300 values = meta.values()
287301 values.insert(0, end)
288302 values.insert(0, start)
289303 id = self.__hash__(values)
290 -# print values
291 - self.lock.acquire()
 304+ obs = self.get_observation(id, date, meta)
 305+ obs.add(value)
292306 try:
293 - obs = self.get_observation(id, date, meta)
294 - obs.add(value)
 307+ self.lock.acquire()
295308 self.obs[id] = obs
296309 finally:
297310 self.lock.release()
298 - print len(self.obs)
 311+ #print date, id, meta.values(), obs.count, len(self.obs)
299312
300313 def number_of_obs(self):
301314 n = 0
@@ -341,7 +354,6 @@
342355 '''
343356
344357 def __init__(self, chart, rts, vars=None, **kwargs):
345 - #project, collection, language_code
346358 self.encoder, chart, charts = json_encoders.get_json_encoder(chart)
347359 if self.encoder == None:
348360 raise exceptions.UnknownChartError(chart, charts)
@@ -377,8 +389,8 @@
378390 print 'Project: %s%s' % (self.language_code, self.project)
379391 print 'JSON encoder: %s' % self.encoder
380392 print 'Raw data was retrieved from: %s%s/%s' % (self.language_code,
381 - self.project,
382 - self.collection)
 393+ self.project,
 394+ self.collection)
383395
384396 def create_filename(self):
385397 '''
@@ -422,7 +434,7 @@
423435 self.variables.append(var.name)
424436 setattr(self, var.name, var)
425437 else:
426 - raise TypeError('You can only instance of Variable to a dataset.')
 438+ raise TypeError('You can only add an instance of Variable to a dataset.')
427439
428440 def write(self, format='csv'):
429441 '''
@@ -483,14 +495,26 @@
484496 float_nums = [float(x) for x in number_list]
485497 return sum(float_nums) / len(number_list)
486498
 499+ def get_min(self, number_list):
 500+ if number_list == []:
 501+ return '.'
 502+ else:
 503+ return min(number_list)
 504+
 505+ def get_max(self, number_list):
 506+ if number_list == []:
 507+ return '.'
 508+ else:
 509+ return max(number_list)
 510+
487511 def descriptives(self):
488512 for variable in self:
489513 data = variable.get_data()
490514 variable.mean = self.get_mean(data)
491515 variable.median = self.get_median(data)
492516 variable.sds = self.get_standard_deviation(data)
493 - variable.min = min(data)
494 - variable.max = max(data)
 517+ variable.min = self.get_min(data)
 518+ variable.max = self.get_max(data)
495519 variable.num_obs = variable.number_of_obs()
496520 variable.num_dates = len(variable)
497521 variable.first_obs, variable.last_obs = variable.get_date_range()
@@ -499,7 +523,7 @@
500524 self.descriptives()
501525 table = Texttable(max_width=0)
502526 vars = ['Variable', 'Mean', 'Median', 'SD', 'Minimum', 'Maximum',
503 - 'Num Obs', 'Num of\nUnique Dates', 'First Obs', 'Final Obs']
 527+ 'Num Obs', 'Num of\nUnique Groups', 'First Obs', 'Final Obs']
504528 table.add_row([var for var in vars])
505529 table.set_cols_align(['r' for v in vars])
506530 table.set_cols_valign(['m' for v in vars])
@@ -521,29 +545,41 @@
522546
523547 d1 = datetime.datetime.today()
524548 d2 = datetime.datetime(2007, 6, 7)
525 - ds = Dataset('test', 'wiki', 'editors_dataset', 'en', 'to_bar_json', [
526 - {'name': 'count', 'time_unit': 'year'},
527 - # {'name': 'testest', 'time_unit': 'year'}
528 - ])
529 - ds.count.add(d1, 10, {'exp': 3})
530 - ds.count.add(d1, 135, {'exp': 3})
531 - ds.count.add(d2, 1, {'exp': 4})
532 - #ds.testest.add(d1, 135)
533 - #ds.testest.add(d2, 535)
534 - ds.summary()
535 - ds.write(format='csv')
536 -# v = Variable('test', 'year')
537 - ds.encode()
 549+# ds = Dataset('histogram', rts, [{'name': 'count', 'time_unit': 'year'},
 550+# #{'name': 'testest', 'time_unit': 'year'}
 551+# ])
 552+# ds.count.add(d1, 10, {'exp': 3})
 553+# ds.count.add(d1, 135, {'exp': 3})
 554+# ds.count.add(d2, 1, {'exp': 4})
 555+# #ds.testest.add(d1, 135)
 556+# #ds.testest.add(d2, 535)
 557+# ds.summary()
 558+# ds.write(format='csv')
 559+#
 560+# ds.encode()
 561+ #name, time_unit, lock, **kwargs
 562+ lock = RLock()
 563+ v = Variable('test', 'year', lock)
 564+ v.add(d1, 10, {'exp': 3, 'test': 10})
 565+ v.add(d1, 135, {'exp': 3, 'test': 10})
 566+ v.add(d2, 1, {'exp': 4, 'test': 10})
 567+ v.add(d2, 1, {'exp': 4, 'test': 10})
 568+ v.add(d2 , 1, {'exp': 3, 'test': 8})
 569+ v.add(d2 , 1, {'exp': 2, 'test': 10})
 570+ v.add(d2 , 1, {'exp': 4, 'test': 11})
 571+ v.add(d2 , 1, {'exp': 8, 'test': 13})
 572+ v.add(d2 , 1, {'exp': 9, 'test': 12})
538573
539574
540 - # mongo.test.insert({'variables': ds})
 575+# v.add(d2 + timedelta(days=400), 1, {'exp': 4, 'test': 10})
 576+# v.add(d2 + timedelta(days=900), 1, {'exp': 3, 'test': 8})
 577+# v.add(d2 + timedelta(days=1200), 1, {'exp': 2, 'test': 10})
 578+# v.add(d2 + timedelta(days=1600), 1, {'exp': 4, 'test': 11})
 579+# v.add(d2 + timedelta(days=2000), 1, {'exp': 8, 'test': 13})
 580+# v.add(d2 + timedelta(days=2400), 1, {'exp': 9, 'test': 12})
541581
542 - # v.add(d2 , 5)
543 - #o = v.get_observation(d2)
544 -# ds = rawdata.find_one({'project': 'wiki',
545 -# 'language_code': 'en',
546 -# 'hash': 'cohort_dataset_backward_bar'})
 582+ print len(v), v.number_of_obs()
547583
548 -
 584+ # mongo.test.insert({'variables': ds})
549585 if __name__ == '__main__':
550586 debug()

Status & tagging log