Index: trunk/tools/editor_trends/etl/enricher.py |
— | — | @@ -426,10 +426,11 @@ |
427 | 427 | print 'Processing took %s' % (t1 - t0) |
428 | 428 | t0 = t1 |
429 | 429 | fh = bz2.BZ2File(filename, 'rb') |
| 430 | + article = parse_xml(fh) |
430 | 431 | if dataset == 'training': |
431 | | - function(fh, cache, bots) |
| 432 | + function(article, cache, bots) |
432 | 433 | else: |
433 | | - counts = function(fh, counts, bots) |
| 434 | + counts = function(article, counts, bots) |
434 | 435 | fh.close() |
435 | 436 | # for data in unzip(filename): |
436 | 437 | # if data.find('<page>') > -1: |