Index: trunk/tools/editor_trends/etl/enricher.py |
— | — | @@ -644,6 +644,7 @@ |
645 | 645 | article['revisions'] = [] |
646 | 646 | elements = [] |
647 | 647 | id = False |
| 648 | + ns = False |
648 | 649 | |
649 | 650 | try: |
650 | 651 | for event, elem in context: |
— | — | @@ -651,6 +652,7 @@ |
652 | 653 | xml_namespace = determine_xml_namespace(elem) |
653 | 654 | namespaces = create_namespace_dict(elem, xml_namespace) |
654 | 655 | article['namespaces'] = namespaces |
| 656 | + ns = True |
655 | 657 | elif event == 'end' and elem.tag.endswith('title'): |
656 | 658 | article['title'] = elem |
657 | 659 | elif event == 'end' and elem.tag.endswith('revision'): |
— | — | @@ -665,11 +667,8 @@ |
666 | 668 | article['revisions'] = [] |
667 | 669 | article['namespaces'] = namespaces |
668 | 670 | id = False |
669 | | - elements = [elem.clear() for elem in elements] |
670 | | - elements = [] |
671 | | - elif event == 'end': |
672 | | - elements.append(elem) |
673 | | - |
| 671 | + elif event == 'end' and ns == True: |
| 672 | + elem.clear() |
674 | 673 | except SyntaxError, error: |
675 | 674 | print 'Encountered invalid XML tag. Error message: %s' % error |
676 | 675 | dump(elem) |