Index: trunk/tools/editor_trends/etl/extracter.py |
— | — | @@ -139,6 +139,9 @@ |
140 | 140 | if current_namespace != False:
|
141 | 141 | parse = True
|
142 | 142 | cache.count_articles += 1
|
| 143 | + if cache.count_articles % 10000 == 0:
|
| 144 | + print 'Worker %s parsed %s articles' % (process_id, cache.count_articles)
|
| 145 | +
|
143 | 146 | elem.clear()
|
144 | 147 |
|
145 | 148 | elif elem.tag.endswith('revision') and parse == True:
|
— | — | @@ -298,7 +301,9 @@ |
299 | 302 |
|
300 | 303 | files = file_utils.retrieve_file_list(rts.input_location)
|
301 | 304 |
|
302 | | - if len(files) > cpu_count():
|
| 305 | + if rts.kaggle:
|
| 306 | + processors = 2
|
| 307 | + elif len(files) > cpu_count():
|
303 | 308 | processors = cpu_count() - 1
|
304 | 309 | else:
|
305 | 310 | processors = len(files)
|