r85984 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r85983‎ | r85984 | r85985 >
Date:19:36, 13 April 2011
Author:diederik
Status:deferred
Tags:
Comment:
Limit the number of processors for the prediction dataset.
Modified paths:
  • /trunk/tools/editor_trends/etl/extracter.py (modified) (history)

Diff [purge]

Index: trunk/tools/editor_trends/etl/extracter.py
@@ -139,6 +139,9 @@
140140 if current_namespace != False:
141141 parse = True
142142 cache.count_articles += 1
 143+ if cache.count_articles % 10000 == 0:
 144+ print 'Worker %s parsed %s articles' % (process_id, cache.count_articles)
 145+
143146 elem.clear()
144147
145148 elif elem.tag.endswith('revision') and parse == True:
@@ -298,7 +301,9 @@
299302
300303 files = file_utils.retrieve_file_list(rts.input_location)
301304
302 - if len(files) > cpu_count():
 305+ if rts.kaggle:
 306+ processors = 2
 307+ elif len(files) > cpu_count():
303308 processors = cpu_count() - 1
304309 else:
305310 processors = len(files)