r86807 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r86806‎ | r86807 | r86808 >
Date:04:26, 24 April 2011
Author:diederik
Status:deferred
Tags:
Comment:
Multiprocessing transformer.
Modified paths:
  • /trunk/tools/editor_trends/etl/transformer.py (modified) (history)
  • /trunk/tools/editor_trends/manage.py (modified) (history)

Diff [purge]

Index: trunk/tools/editor_trends/manage.py
@@ -318,7 +318,7 @@
319319 The data is ready to be stored once the sorted function has completed. This
320320 function starts storing data in MongoDB.
321321 '''
322 - print 'Start storing data in MongoDB'
 322+ print 'Start storing data in %s' % rts.storage
323323 stopwatch = timer.Timer()
324324 log.to_db(rts, 'dataset', 'store', stopwatch, event='start')
325325 log.to_csv(logger, rts, 'Start', 'Store', store_launcher)
@@ -337,7 +337,7 @@
338338 stopwatch = timer.Timer()
339339 log.to_db(rts, 'dataset', 'transform', stopwatch, event='start')
340340 log.to_csv(logger, rts, 'Start', 'Transform', transformer_launcher)
341 - transformer.transform_editors_single_launcher(rts)
 341+ transformer.transform_editors_multi_launcher(rts)
342342 stopwatch.elapsed()
343343 log.to_db(rts, 'dataset', 'transform', stopwatch, event='finish')
344344 log.to_csv(logger, rts, 'Finish', 'Transform', transformer_launcher)
@@ -381,7 +381,7 @@
382382
383383 for function, callname in functions.iteritems():
384384 if callname not in rts.ignore:
385 - print 'Starting %s' % function.func_name
 385+ print 'Launching %s' % function.func_name
386386 res = function(rts, logger)
387387 if res == False:
388388 sys.exit(False)
Index: trunk/tools/editor_trends/etl/transformer.py
@@ -307,7 +307,10 @@
308308
309309 tasks.join()
310310
 311+ db_dataset.add_index('editor')
 312+ db_dataset.add_index('new_wikipedian')
311313
 314+
312315 def setup_database(rts):
313316 '''
314317 Initialize the database, including setting indexes and dropping the older
@@ -317,8 +320,8 @@
318321 db_dataset = storage.init_database(rts.storage, rts.dbname, rts.editors_dataset)
319322 db_dataset.drop_collection()
320323 editors = db_raw.retrieve_distinct_keys('editor')
321 - db_dataset.add_index('editor')
322 - db_dataset.add_index('new_wikipedian')
 324+ #db_dataset.add_index('editor')
 325+ #db_dataset.add_index('new_wikipedian')
323326
324327 return db_raw, db_dataset, editors
325328