r82356 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r82355‎ \| r82356 \| r82357 >
Date:	22:33, 17 February 2011
Author:	diederik
Status:	deferred
Tags:
Comment:	The plugin architecture including Variable and Observation are synchronized and work with multiprocessing now as well.
Modified paths:	/trunk/tools/editor_trends/analyses/analyzer.py (modified) (history) /trunk/tools/editor_trends/classes/consumers.py (modified) (history) /trunk/tools/editor_trends/classes/dataset.py (modified) (history)

Diff [purge]

Index: trunk/tools/editor_trends/analyses/analyzer.py
—	—	@@ -17,8 +17,10 @@
18	18	__date__ = '2010-12-10'
19	19	__version__ = '0.1'
20	20
21		-
	21	+from multiprocessing import JoinableQueue, Lock, Manager, RLock
	22	+from Queue import Empty
22	23	import sys
	24	+import cPickle
23	25	import os
24	26	import progressbar
25	27	import datetime
—	—	@@ -27,83 +29,151 @@
28	30	sys.path.append('..')
29	31
30	32	import inventory
	33	+import manage as manager
31	34	from classes import dataset
32		~~-from classes import settings~~
33		~~-settings = settings.Settings()~~
	35	+from classes import runtime_settings
	36	+from classes import consumers
34	37	from database import db
35	38	from utils import timer
36	39	from utils import log
37	40
	41	+class Analyzer(consumers.BaseConsumer):
38	42
	43	+ def __init__(self, rts, tasks, result, var):
	44	+ super(Analyzer, self).__init__(rts, tasks, result)
	45	+ self.var = var
39	46
40		~~-def generate_chart_data(project, collection, language_code, func, encoder, **kwargs):~~
41		~~- '''~~
42		~~- This is the entry function to be called to generate data for creating charts.~~
43		~~- '''~~
44		~~- stopwatch = timer.Timer()~~
45		~~- res = True~~
46		~~- dbname = '%s%s' % (language_code, project)~~
	47	+ def convert_synchronized_objects(self):
	48	+ for obs in self.var:
	49	+ obs = self.var[obs]
	50	+ obs.data = obs.data.value
	51	+
	52	+ def store(self):
	53	+ #self.convert_synchronized_objects()
	54	+ location = os.path.join(self.rts.binary_location, '%s_%s.bin' % (self.var.name, self.name))
	55	+ fh = open(location, 'wb')
	56	+ cPickle.dump(self.var, fh)
	57	+ fh.close()
	58	+
	59	+ def run(self):
	60	+ '''
	61	+ Generic loop function that loops over all the editors of a Wikipedia
	62	+ project and then calls the function that does the actual aggregation.
	63	+ '''
	64	+ mongo = db.init_mongo_db(self.rts.dbname)
	65	+ coll = mongo[self.rts.editors_dataset]
	66	+ while True:
	67	+ try:
	68	+ task = self.tasks.get(block=False)
	69	+ self.tasks.task_done()
	70	+ if task == None:
	71	+ #print self.var.number_of_obs(), len(self.var.obs)
	72	+ #self.store()
	73	+ self.result.put(self.var)
	74	+ break
	75	+ editor = coll.find_one({'editor': task.editor})
	76	+
	77	+ task.plugin(self.var, editor, dbname=self.rts.dbname)
	78	+ self.result.put(True)
	79	+ except Empty:
	80	+ pass
	81	+
	82	+class Task:
	83	+ def __init__(self, plugin, editor):
	84	+ self.plugin = plugin
	85	+ self.editor = editor
	86	+
	87	+
	88	+def retrieve_plugin(func):
47	89	functions = inventory.available_analyses()
48	90	try:
49		~~- func = functions[func]~~
	91	+ return functions[func]
50	92	except KeyError:
51	93	return False
52	94
53		~~- print 'Exporting data for chart: %s' % func.func_name~~
54		~~- print 'Project: %s' % dbname~~
55		~~- print 'Dataset: %s' % collection~~
56	95
57		~~- ds = loop_editors(dbname, project, collection, language_code, func, encoder, **kwargs)~~
	96	+def feedback(plugin, rts):
	97	+ print 'Exporting data for chart: %s' % plugin.func_name
	98	+ print 'Project: %s' % rts.dbname
	99	+ print 'Dataset: %s' % rts.editors_dataset
	100	+
	101	+
	102	+def write_output(ds, rts, stopwatch):
58	103	ds.create_filename()
59		~~- print 'Storing dataset: %s' % os.path.join(settings.dataset_location, ds.filename)~~
	104	+ print 'Storing dataset: %s' % os.path.join(rts.dataset_location,
	105	+ ds.filename)
60	106	ds.write(format='csv')
61		-
62		~~- print 'Serializing dataset to %s_%s' % (dbname, 'charts')~~
63		~~- log.log_to_mongo(ds, 'chart', 'storing', stopwatch, event='start')~~
	107	+ print 'Serializing dataset to %s_%s' % (rts.dbname, 'charts')
	108	+ log.log_to_mongo(rts, 'chart', 'storing', stopwatch, event='start')
64	109	ds.write(format='mongo')
65		~~- stopwatch.elapsed()~~
66		~~- log.log_to_mongo(ds, 'chart', 'storing', stopwatch, event='finish')~~
	110	+ log.log_to_mongo(rts, 'chart', 'storing', stopwatch, event='finish')
67	111
68		~~- ds.summary()~~
69		~~- return res~~
70	112
71		-
72		~~-def loop_editors(dbname, project, collection, language_code, func, encoder, **kwargs):~~
	113	+def generate_chart_data(rts, func, **kwargs):
73	114	'''
74		~~- Generic loop function that loops over all the editors of a Wikipedia project~~
75		~~- and then calls the function that does the actual aggregation.~~
	115	+ This is the entry function to be called to generate data for creating
	116	+ charts.
76	117	'''
77		~~- mongo = db.init_mongo_db(dbname)~~
78		~~- coll = mongo[collection]~~
79		~~- editors = db.retrieve_distinct_keys(dbname, collection, 'editor')~~
	118	+ stopwatch = timer.Timer()
	119	+ plugin = retrieve_plugin(func)
	120	+ feedback(plugin, rts)
80	121
81	122
82		~~- min_year, max_year = determine_project_year_range(dbname, collection, 'new_wikipedian')~~
83		~~- pbar = progressbar.ProgressBar(maxval=len(editors)).start()~~
84		~~- print 'Number of editors: %s' % len(editors)~~
85		-
	123	+ tasks = JoinableQueue()
	124	+ result = JoinableQueue()
	125	+ mgr = Manager()
	126	+ lock = mgr.RLock()
	127	+ editors = db.retrieve_distinct_keys(rts.dbname, rts.editors_dataset, 'editor')
	128	+ min_year, max_year = determine_project_year_range(rts.dbname,
	129	+ rts.editors_dataset,
	130	+ 'new_wikipedian')
86	131	fmt = kwargs.pop('format', 'long')
	132	+ time_unit = kwargs.pop('time_unit', 'year')
87	133	kwargs['min_year'] = min_year
88	134	kwargs['max_year'] = max_year
89		~~- variables = []~~
90		~~- ds = dataset.Dataset(func.func_name,~~
91		~~- project,~~
92		~~- coll.name,~~
93		~~- language_code,~~
94		~~- encoder,~~
95		~~- variables,~~
96		~~- format=fmt)~~
97		~~- var = dataset.Variable('count', **kwargs)~~
98	135
	136	+ pbar = progressbar.ProgressBar(maxval=len(editors)).start()
	137	+ var = dataset.Variable('count', time_unit, lock, **kwargs)
	138	+
99	139	for editor in editors:
100		~~- editor = coll.find_one({'editor': editor})~~
101		~~- var = func(var, editor, dbname=dbname)~~
102		~~- pbar.update(pbar.currval + 1)~~
	140	+ tasks.put(Task(plugin, editor))
103	141
	142	+ consumers = [Analyzer(rts, tasks, result, var) for
	143	+ x in xrange(rts.number_of_processes)]
	144	+
	145	+ for x in xrange(rts.number_of_processes):
	146	+ tasks.put(None)
	147	+
	148	+ for w in consumers:
	149	+ w.start()
	150	+
	151	+ ppills = rts.number_of_processes
	152	+ while True:
	153	+ while ppills > 0:
	154	+ try:
	155	+ res = result.get(block=True)
	156	+ if res == True:
	157	+ pbar.update(pbar.currval + 1)
	158	+ else:
	159	+ ppills -= 1
	160	+ var = res
	161	+ except Empty:
	162	+ pass
	163	+ break
	164	+
	165	+
	166	+ tasks.join()
	167	+ ds = dataset.Dataset(plugin.func_name, rts, format=fmt)
	168	+ #var = consumers[0].var
104	169	ds.add_variable(var)
105		~~- return ds~~
106	170
	171	+ stopwatch.elapsed()
	172	+ write_output(ds, rts, stopwatch)
107	173
	174	+ ds.summary()
	175	+ return True
	176	+
	177	+
108	178	def determine_project_year_range(dbname, collection, var):
109	179	'''
110	180	Determine the first and final year for the observed data
—	—	@@ -120,16 +190,24 @@
121	191
122	192
123	193	if __name__ == '__main__':
124		~~- generate_chart_data('wiki', 'editors_dataset', 'en', 'histogram_by_backward_cohort', 'to_bar_json', time_unit='year', cutoff=0, cum_cutoff=50)~~
125		~~- #generate_chart_data('wiki', 'editors_dataset', 'en', 'edit_patterns', 'to_bar_json', time_unit='year', cutoff=5)~~
126		~~- #generate_chart_data('wiki', 'editors_dataset', 'en', 'total_number_of_new_wikipedians', 'to_bar_json', time_unit='year')~~
127		~~- #generate_chart_data('wiki', 'editors', 'en', 'total_number_of_articles', 'to_bar_json', time_unit='year')~~
128		~~- #generate_chart_data('wiki', 'editors_dataset', 'en', 'total_cumulative_edits', 'to_bar_json', time_unit='year')~~
129		~~- #generate_chart_data('wiki', 'editors_dataset', 'en', 'cohort_dataset_forward_histogram', 'to_bar_json', time_unit='month', cutoff=5, cum_cutoff=0)~~
130		~~- #generate_chart_data('wiki', 'editors_dataset', 'en', 'cohort_dataset_backward_bar', 'to_stacked_bar_json', time_unit='year', cutoff=10, cum_cutoff=0, format='wide')~~
131		~~- #generate_chart_data('wiki', 'editors_dataset', 'en', 'cohort_dataset_forward_bar', 'to_stacked_bar_json', time_unit='year', cutoff=5, cum_cutoff=0, format='wide')~~
132		~~- #generate_chart_data('wiki', 'editors_dataset', 'en', 'histogram_edits', 'to_bar_json', time_unit='year', cutoff=0)~~
133		~~- #generate_chart_data('wiki', 'editors_dataset', 'en', 'time_to_new_wikipedian', 'to_bar_json', time_unit='year', cutoff=0)~~
134		~~- #generate_chart_data('wiki', 'editors_dataset', 'en', 'new_editor_count', 'to_bar_json', time_unit='month', cutoff=0)~~
	194	+ project, language, parser = manager.init_args_parser()
	195	+ args = parser.parse_args(['django'])
	196	+ rts = runtime_settings.init_environment('wiki', 'en', args)
135	197
136		~~- #available_analyses()~~
	198	+ #TEMP FIX, REMOVE
	199	+ rts.dbname = 'enwiki'
	200	+ rts.editors_dataset = 'editors_dataset'
	201	+ #END TEMP FIX
	202	+
	203	+ generate_chart_data(rts, 'histogram_by_backward_cohort', time_unit='year', cutoff=1, cum_cutoff=10)
	204	+# generate_chart_data(rts, 'edit_patterns', time_unit='year', cutoff=5)
	205	+# generate_chart_data(rts, 'total_number_of_new_wikipedians', time_unit='year')
	206	+# generate_chart_data(rts, 'total_number_of_articles', time_unit='year')
	207	+# generate_chart_data(rts, 'total_cumulative_edits', time_unit='year')
	208	+# generate_chart_data(rts, 'cohort_dataset_forward_histogram', time_unit='month', cutoff=5, cum_cutoff=0)
	209	+# generate_chart_data(rts, 'cohort_dataset_backward_bar', time_unit='year', cutoff=10, cum_cutoff=0, format='wide')
	210	+# generate_chart_data(rts, 'cohort_dataset_forward_bar', time_unit='year', cutoff=5, cum_cutoff=0, format='wide')
	211	+# generate_chart_data(rts, 'histogram_edits', time_unit='year', cutoff=0)
	212	+# generate_chart_data(rts, 'time_to_new_wikipedian', time_unit='year', cutoff=0)
	213	+# generate_chart_data(rts, 'new_editor_count', time_unit='month', cutoff=0)
	214	+# #available_analyses()
Index: trunk/tools/editor_trends/classes/consumers.py
—	—	@@ -26,5 +26,3 @@
27	27	self.rts = rts
28	28	self.tasks = tasks
29	29	self.result = result
30		-
31		-
Index: trunk/tools/editor_trends/classes/dataset.py
—	—	@@ -25,8 +25,9 @@
26	26	import sys
27	27	import hashlib
28	28	from pymongo.son_manipulator import SONManipulator
29		~~-from multiprocessing import Lock~~
	29	+from multiprocessing import RLock, Array, Value
30	30	from texttable import Texttable
	31	+from datetime import timedelta
31	32
32	33
33	34	if '..' not in sys.path:
—	—	@@ -90,6 +91,7 @@
91	92	to lookup an Observation or Variable.
92	93	'''
93	94	id = '_'.join([str(var) for var in vars])
	95	+ #return id
94	96	m = hashlib.md5()
95	97	m.update(id)
96	98	#print id, m.hexdigest()
—	—	@@ -121,7 +123,8 @@
122	124	of the date
123	125	'''
124	126	assert self.time_unit == 'year' or self.time_unit == 'month' \
125		~~- or self.time_unit == 'day', 'Time unit should either be year, month or day.'~~
	127	+ or self.time_unit == 'day', \
	128	+ 'Time unit should either be year, month or day.'
126	129
127	130	if self.time_unit == 'year':
128	131	datum = datetime.datetime(date.year, 1, 1)
—	—	@@ -139,24 +142,29 @@
140	143	Determine the width of a date range for an observation.
141	144	'''
142	145	if self.time_unit == 'year':
143		~~- return datetime.datetime(date.year, 12, 31), datetime.datetime(date.year, 1, 1)~~
	146	+ return datetime.datetime(date.year, 12, 31), \
	147	+ datetime.datetime(date.year, 1, 1)
144	148	elif self.time_unit == 'month':
145	149	day = calendar.monthrange(date.year, date.month)[1]
146		~~- return datetime.datetime(date.year, date.month, day), datetime.datetime(date.year, date.month, 1)~~
	150	+ return datetime.datetime(date.year, date.month, day), \
	151	+ datetime.datetime(date.year, date.month, 1)
147	152	else:
148		~~- return datetime.datetime(date.year, date.month, date.day), datetime.datetime(date.year, date.month, date.day)~~
	153	+ return datetime.datetime(date.year, date.month, date.day), \
	154	+ datetime.datetime(date.year, date.month, date.day)
149	155
150	156
151	157	class Observation(Data):
152		~~- lock = Lock()~~
153	158	'''
154	159	The smallest unit, here the actual data is being stored.
155	160	Time_unit should either be 'year', 'month' or 'day'.
156	161	'''
157	162	def __init__(self, date, time_unit, id, meta):
158		~~- assert isinstance(date, datetime.datetime), 'Date variable should be a datetime.datetime instance.'~~
	163	+ assert isinstance(date, datetime.datetime), '''Date variable should be
	164	+ a datetime.datetime instance.'''
	165	+ #self.lock = lock #Lock()
159	166	self.date = date
160	167	self.data = 0
	168	+ #self.data = Value('i', 0)
161	169	self.time_unit = time_unit
162	170	self.t1, self.t0 = self.set_date_range(date)
163	171	self.id = id
—	—	@@ -164,7 +172,8 @@
165	173	self.count = 0
166	174	for mt in meta:
167	175	if isinstance(mt, float):
168		~~- raise Exception, 'Mongo does not allow a dot "." in the name of a key, please use an integer or string as key.'~~
	176	+ raise Exception, '''Mongo does not allow a dot "." in the name
	177	+ of a key, please use an integer or string as key.'''
169	178	elif not isinstance(mt, list):
170	179	setattr(self, mt, meta[mt])
171	180	self.props.append(mt)
—	—	@@ -174,7 +183,9 @@
175	184	return '%s' % self.date
176	185
177	186	def __str__(self):
178		~~- return 'range: %s:%s' % (self.t0, self.t1)~~
	187	+ return 'range: %s-%s-%s : %s-%s-%s' % (self.t0.month, self.t0.day, \
	188	+ self.t0.year, self.t1.month, \
	189	+ self.t1.day, self.t1.year)
179	190
180	191	def __iter__(self):
181	192	for obs in self.data:
—	—	@@ -186,17 +197,19 @@
187	198	def add(self, value):
188	199	'''
189	200	'''
190		~~- self.lock.acquire()~~
191		~~- try:~~
192		~~- if isinstance(value, list):~~
193		~~- if self.count == 0:~~
194		~~- self.data = []~~
195		~~- self.data.append(value)~~
196		~~- else:~~
197		~~- self.data += value~~
198		~~- finally:~~
199		~~- self.count += 1~~
200		~~- self.lock.release()~~
	201	+ #self.lock.acquire()
	202	+ #try:
	203	+ if isinstance(value, list):
	204	+ if self.count == 0:
	205	+ self.data = []
	206	+ #self.data = Array('i', 0)
	207	+ self.data.append(value)
	208	+ else:
	209	+ self.data += value
	210	+ #self.data.value += value
	211	+ #finally:
	212	+ self.count += 1
	213	+ #self.lock.release()
201	214
202	215
203	216	def get_date_range(self):
—	—	@@ -207,10 +220,9 @@
208	221	'''
209	222	This class constructs a time-based variable.
210	223	'''
211		-
212		~~- def __init__(self, name, time_unit, **kwargs):~~
	224	+ def __init__(self, name, time_unit, lock, **kwargs):
213	225	self.name = name
214		~~- self.lock = Lock()~~
	226	+ self.lock = lock
215	227	self.obs = {}
216	228	self.time_unit = time_unit
217	229	self.groupbys = []
—	—	@@ -249,7 +261,6 @@
250	262	for key in self:
251	263	yield (key, self.obs[key])
252	264
253		-
254	265	def get_data(self):
255	266	return [o for o in self.itervalues()]
256	267
—	—	@@ -257,6 +268,8 @@
258	269	self.lock.acquire()
259	270	try:
260	271	obs = self.obs.get(id, Observation(date, self.time_unit, id, meta))
	272	+ #self.obs[id] = obs
	273	+ x = len(self.obs)
261	274	finally:
262	275	self.lock.release()
263	276	return obs
—	—	@@ -264,10 +277,10 @@
265	278	def add(self, date, value, meta={}):
266	279	'''
267	280	The add function is used to add an observation to a variable. An
268		~~- observation is always grouped by the combination of the date and time_unit.~~
269		~~- Time_unit is a property of a Variable and indicates how granular the~~
270		~~- observations should be grouped. For example, if time_unit == year then~~
271		~~- all observations in a given year will be grouped.~~
	281	+ observation is always grouped by the combination of the date and
	282	+ time_unit. Time_unit is a property of a Variable and indicates how
	283	+ granular the observations should be grouped. For example, if
	284	+ time_unit == year then all observations in a given year will be grouped.
272	285	When calling add you should supply at least two variables:
273	286	1) date: when did the observation happen
274	287	2) value: an integer or float that was observed on that date
—	—	@@ -276,25 +289,25 @@
277	290	For example, if you add {'experience': 3} as the meta dict when calling
278	291	add then you will create an extra grouping called experience and all
279	292	future observations who fall in the same date range and the same
280		~~- exerience level will be grouped by that particular observation. You~~
281		~~- can use as many extra groupings as you want but usually one extra grouping~~
282		~~- should be enough.~~
	293	+ exerience level, in this case 3, will be grouped by that particular
	294	+ observation. You can use as many extra groupings as you want but
	295	+ usually one extra grouping should be enough.
283	296	'''
284		~~- assert isinstance(meta, dict), 'The meta variable should be a dict (either empty or with variables to group by.'~~
	297	+ assert isinstance(meta, dict), '''The meta variable should be a dict
	298	+ (either empty or with variables to group by.'''
285	299	start, end = self.set_date_range(date)
286	300	values = meta.values()
287	301	values.insert(0, end)
288	302	values.insert(0, start)
289	303	id = self.__hash__(values)
290		~~-# print values~~
291		~~- self.lock.acquire()~~
	304	+ obs = self.get_observation(id, date, meta)
	305	+ obs.add(value)
292	306	try:
293		~~- obs = self.get_observation(id, date, meta)~~
294		~~- obs.add(value)~~
	307	+ self.lock.acquire()
295	308	self.obs[id] = obs
296	309	finally:
297	310	self.lock.release()
298		~~- print len(self.obs)~~
	311	+ #print date, id, meta.values(), obs.count, len(self.obs)
299	312
300	313	def number_of_obs(self):
301	314	n = 0
—	—	@@ -341,7 +354,6 @@
342	355	'''
343	356
344	357	def __init__(self, chart, rts, vars=None, **kwargs):
345		~~- #project, collection, language_code~~
346	358	self.encoder, chart, charts = json_encoders.get_json_encoder(chart)
347	359	if self.encoder == None:
348	360	raise exceptions.UnknownChartError(chart, charts)
—	—	@@ -377,8 +389,8 @@
378	390	print 'Project: %s%s' % (self.language_code, self.project)
379	391	print 'JSON encoder: %s' % self.encoder
380	392	print 'Raw data was retrieved from: %s%s/%s' % (self.language_code,
381		~~- self.project,~~
382		~~- self.collection)~~
	393	+ self.project,
	394	+ self.collection)
383	395
384	396	def create_filename(self):
385	397	'''
—	—	@@ -422,7 +434,7 @@
423	435	self.variables.append(var.name)
424	436	setattr(self, var.name, var)
425	437	else:
426		~~- raise TypeError('You can only instance of Variable to a dataset.')~~
	438	+ raise TypeError('You can only add an instance of Variable to a dataset.')
427	439
428	440	def write(self, format='csv'):
429	441	'''
—	—	@@ -483,14 +495,26 @@
484	496	float_nums = [float(x) for x in number_list]
485	497	return sum(float_nums) / len(number_list)
486	498
	499	+ def get_min(self, number_list):
	500	+ if number_list == []:
	501	+ return '.'
	502	+ else:
	503	+ return min(number_list)
	504	+
	505	+ def get_max(self, number_list):
	506	+ if number_list == []:
	507	+ return '.'
	508	+ else:
	509	+ return max(number_list)
	510	+
487	511	def descriptives(self):
488	512	for variable in self:
489	513	data = variable.get_data()
490	514	variable.mean = self.get_mean(data)
491	515	variable.median = self.get_median(data)
492	516	variable.sds = self.get_standard_deviation(data)
493		~~- variable.min = min(data)~~
494		~~- variable.max = max(data)~~
	517	+ variable.min = self.get_min(data)
	518	+ variable.max = self.get_max(data)
495	519	variable.num_obs = variable.number_of_obs()
496	520	variable.num_dates = len(variable)
497	521	variable.first_obs, variable.last_obs = variable.get_date_range()
—	—	@@ -499,7 +523,7 @@
500	524	self.descriptives()
501	525	table = Texttable(max_width=0)
502	526	vars = ['Variable', 'Mean', 'Median', 'SD', 'Minimum', 'Maximum',
503		~~- 'Num Obs', 'Num of\nUnique Dates', 'First Obs', 'Final Obs']~~
	527	+ 'Num Obs', 'Num of\nUnique Groups', 'First Obs', 'Final Obs']
504	528	table.add_row([var for var in vars])
505	529	table.set_cols_align(['r' for v in vars])
506	530	table.set_cols_valign(['m' for v in vars])
—	—	@@ -521,29 +545,41 @@
522	546
523	547	d1 = datetime.datetime.today()
524	548	d2 = datetime.datetime(2007, 6, 7)
525		~~- ds = Dataset('test', 'wiki', 'editors_dataset', 'en', 'to_bar_json', [~~
526		~~- {'name': 'count', 'time_unit': 'year'},~~
527		~~- # {'name': 'testest', 'time_unit': 'year'}~~
528		~~- ])~~
529		~~- ds.count.add(d1, 10, {'exp': 3})~~
530		~~- ds.count.add(d1, 135, {'exp': 3})~~
531		~~- ds.count.add(d2, 1, {'exp': 4})~~
532		~~- #ds.testest.add(d1, 135)~~
533		~~- #ds.testest.add(d2, 535)~~
534		~~- ds.summary()~~
535		~~- ds.write(format='csv')~~
536		~~-# v = Variable('test', 'year')~~
537		~~- ds.encode()~~
	549	+# ds = Dataset('histogram', rts, [{'name': 'count', 'time_unit': 'year'},
	550	+# #{'name': 'testest', 'time_unit': 'year'}
	551	+# ])
	552	+# ds.count.add(d1, 10, {'exp': 3})
	553	+# ds.count.add(d1, 135, {'exp': 3})
	554	+# ds.count.add(d2, 1, {'exp': 4})
	555	+# #ds.testest.add(d1, 135)
	556	+# #ds.testest.add(d2, 535)
	557	+# ds.summary()
	558	+# ds.write(format='csv')
	559	+#
	560	+# ds.encode()
	561	+ #name, time_unit, lock, **kwargs
	562	+ lock = RLock()
	563	+ v = Variable('test', 'year', lock)
	564	+ v.add(d1, 10, {'exp': 3, 'test': 10})
	565	+ v.add(d1, 135, {'exp': 3, 'test': 10})
	566	+ v.add(d2, 1, {'exp': 4, 'test': 10})
	567	+ v.add(d2, 1, {'exp': 4, 'test': 10})
	568	+ v.add(d2 , 1, {'exp': 3, 'test': 8})
	569	+ v.add(d2 , 1, {'exp': 2, 'test': 10})
	570	+ v.add(d2 , 1, {'exp': 4, 'test': 11})
	571	+ v.add(d2 , 1, {'exp': 8, 'test': 13})
	572	+ v.add(d2 , 1, {'exp': 9, 'test': 12})
538	573
539	574
540		~~- # mongo.test.insert({'variables': ds})~~
	575	+# v.add(d2 + timedelta(days=400), 1, {'exp': 4, 'test': 10})
	576	+# v.add(d2 + timedelta(days=900), 1, {'exp': 3, 'test': 8})
	577	+# v.add(d2 + timedelta(days=1200), 1, {'exp': 2, 'test': 10})
	578	+# v.add(d2 + timedelta(days=1600), 1, {'exp': 4, 'test': 11})
	579	+# v.add(d2 + timedelta(days=2000), 1, {'exp': 8, 'test': 13})
	580	+# v.add(d2 + timedelta(days=2400), 1, {'exp': 9, 'test': 12})
541	581
542		~~- # v.add(d2 , 5)~~
543		~~- #o = v.get_observation(d2)~~
544		~~-# ds = rawdata.find_one({'project': 'wiki',~~
545		~~-# 'language_code': 'en',~~
546		~~-# 'hash': 'cohort_dataset_backward_bar'})~~
	582	+ print len(v), v.number_of_obs()
547	583
548		-
	584	+ # mongo.test.insert({'variables': ds})
549	585	if __name__ == '__main__':
550	586	debug()

Status & tagging log

16:32, 2 March 2011 Reedy (talk | contribs) changed the status of r82356 [removed: new added: deferred]