Index: trunk/tools/editor_trends/etl/enricher.py |
— | — | @@ -35,6 +35,9 @@ |
36 | 36 | from analyses.adhoc import bot_detector |
37 | 37 | from utils import file_utils |
38 | 38 | |
| 39 | +filehandles = [file_utils.create_txt_filehandle(path, '%s.csv' % fh, 'a', |
| 40 | + 'utf-8') for fh in xrange(rts.max_filehandles)] |
| 41 | + |
39 | 42 | EXCLUDE_NAMESPACE = { |
40 | 43 | #0:'Main', |
41 | 44 | #1:'Talk', |
— | — | @@ -87,19 +90,7 @@ |
88 | 91 | print 'Worker %s: Number of articles: %s' % (self.process_id, self.count_articles) |
89 | 92 | print 'Worker %s: Number of revisions: %s' % (self.process_id, self.count_revisions) |
90 | 93 | |
91 | | -class Dummy: |
92 | | - pass |
93 | 94 | |
94 | | -class DummyRTS: |
95 | | - def __init__(self, location, path): |
96 | | - self.input_location = location |
97 | | - self.output_location = path |
98 | | - self.language = Dummy() |
99 | | - self.project = Dummy() |
100 | | - self.language.code = 'en' |
101 | | - self.project.name = 'wiki' |
102 | | - |
103 | | - |
104 | 95 | class Buffer: |
105 | 96 | def __init__(self, storage, process_id, rts=None, filehandles=None, locks=None): |
106 | 97 | assert storage == 'cassandra' or storage == 'mongo' or storage == 'csv', \ |
— | — | @@ -677,8 +668,8 @@ |
678 | 669 | bots = bot_detector.retrieve_bots(rts.language.code) |
679 | 670 | path = os.path.join(rts.output_location, 'txt') |
680 | 671 | |
681 | | - filehandles = [file_utils.create_txt_filehandle(path, '%s.csv' % fh, 'a', |
682 | | - 'utf-8') for fh in xrange(rts.max_filehandles)] |
| 672 | + #filehandles = [file_utils.create_txt_filehandle(path, '%s.csv' % fh, 'a', |
| 673 | + # 'utf-8') for fh in xrange(rts.max_filehandles)] |
683 | 674 | |
684 | 675 | title_file = os.path.join(path, 'titles.csv') |
685 | 676 | comment_file = os.path.join(path, 'comments.csv') |