Index: trunk/tools/editor_trends/etl/enricher.py |
— | — | @@ -322,6 +322,13 @@ |
323 | 323 | counts.setdefault(contributor['username'], 0) |
324 | 324 | counts[contributor['username']] += 1 |
325 | 325 | revision.clear() |
| 326 | + print '************************' |
| 327 | + gc.DEBUG_COLLECTABLE |
| 328 | + gc.DEBUG_UNCOLLECTABLE |
| 329 | + gc.DEBUG_STATS |
| 330 | + print '************************' |
| 331 | + gc.collect() |
| 332 | + print gc.get_count() |
326 | 333 | |
327 | 334 | article = None |
328 | 335 | return counts |
— | — | @@ -394,7 +401,8 @@ |
395 | 402 | elif event == 'end' and elem.tag == 'id' and id == False: |
396 | 403 | article[elem.tag] = elem |
397 | 404 | id = True |
398 | | - |
| 405 | + else: |
| 406 | + event.clear() |
399 | 407 | root.clear() |
400 | 408 | return article |
401 | 409 | |
— | — | @@ -433,14 +441,8 @@ |
434 | 442 | |
435 | 443 | if i % 1000 == 0: |
436 | 444 | print 'Worker %s parsed %s articles' % (id, i) |
437 | | - print '************************' |
438 | | - gc.DEBUG_COLLECTABLE |
439 | | - gc.DEBUG_UNCOLLECTABLE |
440 | | - gc.DEBUG_STATS |
441 | | - print '************************' |
442 | | - gc.collect() |
443 | | - print gc.get_count() |
444 | 445 | |
| 446 | + |
445 | 447 | if dataset == 'training': |
446 | 448 | cache.empty() |
447 | 449 | cache.stats.summary() |