r84769 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r84768‎ | r84769 | r84770 >
Date:21:31, 25 March 2011
Author:diederik
Status:deferred
Tags:
Comment:
Additional logging messages.
Modified paths:
  • /trunk/tools/editor_trends/etl/enricher.py (modified) (history)

Diff [purge]

Index: trunk/tools/editor_trends/etl/enricher.py
@@ -433,13 +433,13 @@
434434
435435 if i % 1000 == 0:
436436 print 'Worker %s parsed %s articles' % (id, i)
437 - print gc.get_count()
438 - gc.collect()
439437 print '************************'
440438 gc.DEBUG_COLLECTABLE
441439 gc.DEBUG_UNCOLLECTABLE
442440 gc.DEBUG_STATS
443441 print '************************'
 442+ gc.collect()
 443+ print gc.get_count()
444444
445445 if dataset == 'training':
446446 cache.empty()
@@ -494,6 +494,15 @@
495495 input_queue.join()
496496
497497
 498+
 499+def debug():
 500+ path = '/media/wikipedia_dumps/batch2/'
 501+ files = file_utils.retrieve_file_list(path, 'bz2')
 502+ for file in files:
 503+ filename = os.path.join(path, file)
 504+ unzip(filename)
 505+
 506+
498507 def launcher_training():
499508 # launcher for creating training data
500509 path = '/media/wikipedia_dumps/batch2/'
@@ -517,4 +526,5 @@
518527 if __name__ == '__main__':
519528 #launcher_training()
520529 gc.enable()
 530+ debug()
521531 launcher_prediction()

Status & tagging log