Index: trunk/tools/editor_trends/etl/enricher.py |
— | — | @@ -382,7 +382,11 @@ |
383 | 383 | def parse_xml(buffer): |
384 | 384 | context = iterparse(buffer, events=('end',)) |
385 | 385 | context = iter(context) |
386 | | - event, root = context.next() |
| 386 | + try: |
| 387 | + event, root = context.next() |
| 388 | + except SyntaxError, e: |
| 389 | + print e |
| 390 | + print buffer.getvalue() |
387 | 391 | |
388 | 392 | article = {} |
389 | 393 | id = False |
— | — | @@ -436,7 +440,8 @@ |
437 | 441 | if dataset == 'training': |
438 | 442 | function(article, cache, bots) |
439 | 443 | else: |
440 | | - counts = function(article, counts, bots) |
| 444 | + #counts = function(article, counts, bots) |
| 445 | + pass |
441 | 446 | buffer = cStringIO.StringIO() |
442 | 447 | |
443 | 448 | if i % 10000 == 0: |