Index: trunk/tools/editor_trends/etl/enricher.py |
— | — | @@ -524,7 +524,7 @@ |
525 | 525 | print xml_namespace |
526 | 526 | elements = siteinfo.find('%s%s' % (xml_namespace, 'namespaces')) |
527 | 527 | print elements |
528 | | - for elem in elements: |
| 528 | + for elem in elements.getchildren(): |
529 | 529 | key = int(elem.get('key')) |
530 | 530 | namespaces[key] = elem.text #extract_text(ns) |
531 | 531 | text = elem.text if elem.text != None else '' |
— | — | @@ -532,6 +532,8 @@ |
533 | 533 | print key, text.encode('utf-8') |
534 | 534 | except UnicodeEncodeError: |
535 | 535 | print key |
| 536 | + elem.clear() |
| 537 | + siteinfo.clear() |
536 | 538 | if namespaces == {}: |
537 | 539 | sys.exit(-1) |
538 | 540 | return namespaces |
— | — | @@ -546,7 +548,10 @@ |
547 | 549 | xml_namespace = elem.tag |
548 | 550 | pos = xml_namespace.find('sitename') |
549 | 551 | xml_namespace = xml_namespace[0:pos] |
| 552 | + elem.clear() |
550 | 553 | return xml_namespace |
| 554 | + else: |
| 555 | + elem.clear() |
551 | 556 | |
552 | 557 | |
553 | 558 | def count_edits(article, counts, bots, xml_namespace): |
— | — | @@ -659,8 +664,8 @@ |
660 | 665 | article['revisions'] = [] |
661 | 666 | article['namespaces'] = namespaces |
662 | 667 | id = False |
663 | | - elif rts.kaggle == True and event == 'end': |
664 | | - elem.clear() |
| 668 | + #elif rts.kaggle == True and event == 'end': |
| 669 | + # elem.clear() |
665 | 670 | except SyntaxError, error: |
666 | 671 | print 'Encountered invalid XML tag. Error message: %s' % error |
667 | 672 | dump(elem) |