Index: trunk/tools/editor_trends/etl/enricher.py |
— | — | @@ -26,6 +26,7 @@ |
27 | 27 | import re |
28 | 28 | import sys |
29 | 29 | import datetime |
| 30 | +import gc |
30 | 31 | import progressbar |
31 | 32 | from multiprocessing import JoinableQueue, Process, cpu_count, current_process |
32 | 33 | from xml.etree.cElementTree import fromstring, iterparse |
— | — | @@ -430,8 +431,15 @@ |
431 | 432 | counts = function(article, counts, bots) |
432 | 433 | buffer = cStringIO.StringIO() |
433 | 434 | |
434 | | - if i % 10000 == 0: |
| 435 | + if i % 1000 == 0: |
435 | 436 | print 'Worker %s parsed %s articles' % (id, i) |
| 437 | + print gc.get_count() |
| 438 | + gc.collect() |
| 439 | + print '************************' |
| 440 | + gc.DEBUG_COLLECTABLE |
| 441 | + gc.DEBUG_UNCOLLECTABLE |
| 442 | + gc.DEBUG_STATS |
| 443 | + print '************************' |
436 | 444 | |
437 | 445 | if dataset == 'training': |
438 | 446 | cache.empty() |
— | — | @@ -508,4 +516,5 @@ |
509 | 517 | |
510 | 518 | if __name__ == '__main__': |
511 | 519 | #launcher_training() |
| 520 | + gc.enable() |
512 | 521 | launcher_prediction() |