r87044 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r87043‎ | r87044 | r87045 >
Date:21:23, 27 April 2011
Author:diederik
Status:deferred
Tags:
Comment:
Removed some unnecessary code.
Modified paths:
  • /trunk/tools/editor_trends/analyses/analyzer.py (modified) (history)
  • /trunk/tools/editor_trends/classes/dataset.py (modified) (history)

Diff [purge]

Index: trunk/tools/editor_trends/analyses/analyzer.py
@@ -14,13 +14,14 @@
1515
1616 __author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ])
1717 __email__ = 'dvanliere at gmail dot com'
18 -__date__ = '2010-12-10'
 18+_date__ = '2010-12-10'
1919 __version__ = '0.1'
2020
2121 from multiprocessing import JoinableQueue, Manager, RLock, Process
2222 from multiprocessing.managers import BaseManager
2323 from Queue import Empty
2424
 25+import types
2526 import sys
2627 import cPickle
2728 import os
@@ -54,7 +55,8 @@
5556 keys = var.obs.keys()
5657 d = {}
5758 for key in keys:
58 - d[key] = cPickle.loads(var.obs[key])
 59+ d[key] = var.obs[key]
 60+ #d[key] = cPickle.loads(var.obs[key])
5961 var.obs = d
6062 return var
6163
@@ -107,6 +109,7 @@
108110
109111 db = storage.init_database(rts.storage, rts.dbname, rts.editors_dataset)
110112 editors = db.retrieve_distinct_keys('editor')
 113+ editors = editors[:500]
111114 min_year, max_year = determine_project_year_range(db, 'new_wikipedian')
112115
113116 fmt = kwargs.pop('format', 'long')
@@ -159,7 +162,7 @@
160163
161164 tasks.join()
162165
163 - reconstruct_observations(var)
 166+ var = reconstruct_observations(var)
164167 ds = dataset.Dataset(plugin.func_name, rts, format=fmt, **kwargs)
165168 ds.add_variable(var)
166169
@@ -168,7 +171,28 @@
169172
170173 ds.summary()
171174
 175+ for n, c in get_refcounts()[:100]:
 176+ print '%10d %s' % (n, c.__name__)
172177
 178+
 179+def get_refcounts():
 180+ d = {}
 181+ sys.modules
 182+ # collect all classes
 183+ for m in sys.modules.values():
 184+ for sym in dir(m):
 185+ o = getattr (m, sym)
 186+ if type(o) is types.ClassType:
 187+ d[o] = sys.getrefcount (o)
 188+ # sort by refcount
 189+ pairs = map (lambda x: (x[1], x[0]), d.items())
 190+ pairs.sort()
 191+ pairs.reverse()
 192+ return pairs
 193+
 194+
 195+
 196+
173197 def determine_project_year_range(db, var):
174198 '''
175199 Determine the first and final year for the observed data
Index: trunk/tools/editor_trends/classes/dataset.py
@@ -27,7 +27,7 @@
2828 from multiprocessing import RLock
2929 from texttable import Texttable
3030 from datetime import timedelta
31 -
 31+import cProfile
3232 if '..' not in sys.path:
3333 sys.path.append('..')
3434
@@ -277,8 +277,8 @@
278278 return [o for o in self.itervalues()]
279279
280280 def get_observation(self, key, date, meta):
281 - '''Get a single observation based on a date key and posssibly meta data'''
282 - return self.obs.get(key, Observation(date, self.time_unit, key, meta).serialize())
 281+ '''Get a single observation based on a date key and possibly meta data'''
 282+ return self.obs.get(key, Observation(date, self.time_unit, key, meta))
283283
284284 def add(self, date, value, meta={}):
285285 '''
@@ -295,12 +295,12 @@
296296 For example, if you add {'experience': 3} as the meta dict when calling
297297 add then you will create an extra grouping called experience and all
298298 future observations who fall in the same date range and the same
299 - exerience level, in this case 3, will be grouped by that particular
 299+ experience level, in this case 3, will be grouped by that particular
300300 observation. You can use as many extra groupings as you want but
301301 usually one extra grouping should be enough.
302302 '''
303303 assert isinstance(meta, dict), '''The meta variable should be a dict
304 - (either empty or with variables to group by.'''
 304+ (either empty) or with variables to group by.'''
305305 start, end = self.set_date_range(date)
306306 values = meta.values()
307307 values.insert(0, end)
@@ -310,9 +310,9 @@
311311 self.lock.acquire()
312312 try:
313313 obs = self.get_observation(id, date, meta)
314 - obs = cPickle.loads(obs)
 314+ #obs = cPickle.loads(obs)
315315 obs.add(value)
316 - obs = obs.serialize()
 316+ #obs = obs.serialize()
317317 self.obs[id] = obs
318318 finally:
319319 self.lock.release()
@@ -580,9 +580,10 @@
581581 else:
582582 return max(number_list)
583583
 584+
584585 def debug():
585586 db = storage.init_database('mongo', 'wikilytics', 'enwiki_charts')
586 - db.add_son_manipulator(Transform())
 587+ #db.add_son_manipulator(Transform())
587588
588589 d1 = datetime.datetime.today()
589590 d2 = datetime.datetime(2007, 6, 7)
@@ -600,7 +601,7 @@
601602 # ds.encode()
602603 #name, time_unit, lock, **kwargs
603604 lock = RLock()
604 - v = Variable('test', 'year', lock)
 605+ v = Variable('test', 'year', lock, {})
605606 v.add(d1, 10, {'exp': 3, 'test': 10})
606607 v.add(d1, 135, {'exp': 3, 'test': 10})
607608 v.add(d2, 1, {'exp': 4, 'test': 10})
@@ -611,6 +612,7 @@
612613 v.add(d2 , 1, {'exp': 8, 'test': 13})
613614 v.add(d2 , 1, {'exp': 9, 'test': 12})
614615
 616+ #mem = get_refcounts()
615617
616618 # v.add(d2 + timedelta(days=400), 1, {'exp': 4, 'test': 10})
617619 # v.add(d2 + timedelta(days=900), 1, {'exp': 3, 'test': 8})
@@ -619,8 +621,8 @@
620622 # v.add(d2 + timedelta(days=2000), 1, {'exp': 8, 'test': 13})
621623 # v.add(d2 + timedelta(days=2400), 1, {'exp': 9, 'test': 12})
622624
623 - print len(v), v.number_of_obs()
 625+# print len(v), v.number_of_obs()
624626
625627 # mongo.test.insert({'variables': ds})
626628 if __name__ == '__main__':
627 - debug()
 629+ cProfile.run('debug()')