Index: trunk/tools/editor_trends/etl/enricher.py |
— | — | @@ -35,9 +35,6 @@ |
36 | 36 | from analyses.adhoc import bot_detector |
37 | 37 | from utils import file_utils |
38 | 38 | |
39 | | -filehandles = [file_utils.create_txt_filehandle(path, '%s.csv' % fh, 'a', |
40 | | - 'utf-8') for fh in xrange(rts.max_filehandles)] |
41 | | - |
42 | 39 | EXCLUDE_NAMESPACE = { |
43 | 40 | #0:'Main', |
44 | 41 | #1:'Talk', |
— | — | @@ -656,8 +653,8 @@ |
657 | 654 | article['revisions'] = [] |
658 | 655 | article['namespaces'] = namespaces |
659 | 656 | id = False |
660 | | - elif event == 'end' and ns == True: |
661 | | - elem.clear() |
| 657 | + #elif event == 'end' and ns == True: |
| 658 | + # elem.clear() |
662 | 659 | except SyntaxError, error: |
663 | 660 | print 'Encountered invalid XML tag. Error message: %s' % error |
664 | 661 | dump(elem) |
— | — | @@ -668,8 +665,8 @@ |
669 | 666 | bots = bot_detector.retrieve_bots(rts.language.code) |
670 | 667 | path = os.path.join(rts.output_location, 'txt') |
671 | 668 | |
672 | | - #filehandles = [file_utils.create_txt_filehandle(path, '%s.csv' % fh, 'a', |
673 | | - # 'utf-8') for fh in xrange(rts.max_filehandles)] |
| 669 | + filehandles = [file_utils.create_txt_filehandle(path, '%s.csv' % fh, 'a', |
| 670 | + 'utf-8') for fh in xrange(rts.max_filehandles)] |
674 | 671 | |
675 | 672 | title_file = os.path.join(path, 'titles.csv') |
676 | 673 | comment_file = os.path.join(path, 'comments.csv') |