Index: trunk/tools/editor_trends/etl/store.py |
— | — | @@ -55,14 +55,15 @@ |
56 | 56 | fh = file_utils.create_txt_filehandle(self.rts.sorted, filename, |
57 | 57 | 'r', 'utf-8') |
58 | 58 | for line in file_utils.read_raw_data(fh): |
59 | | - #if len(line) == 12: |
| 59 | + if len(line) == 1: |
| 60 | + continue |
60 | 61 | editor = line[0] |
61 | 62 | #print 'Parsing %s' % editor |
62 | 63 | if prev_editor != editor and prev_editor != -1: |
63 | 64 | editor_cache.add(prev_editor, 'NEXT') |
64 | 65 | |
65 | 66 | data = prepare_data(line) |
66 | | - print editor, data['username'] |
| 67 | + #print editor, data['username'] |
67 | 68 | editor_cache.add(editor, data) |
68 | 69 | prev_editor = editor |
69 | 70 | fh.close() |
— | — | @@ -74,26 +75,30 @@ |
75 | 76 | Prepare a single line to store in the database, this entails converting |
76 | 77 | to proper variable and taking care of the encoding. |
77 | 78 | ''' |
78 | | - article_id = int(line[1]) |
79 | | - username = line[3].encode('utf-8') |
80 | | - ns = int(line[4]) |
81 | | - date = text_utils.convert_timestamp_to_datetime_utc(line[6]) |
82 | | - md5 = line[7] |
83 | | - revert = int(line[8]) |
84 | | - bot = int(line[9]) |
85 | | - cur_size = int(line[10]) |
86 | | - delta = int(line[11]) |
| 79 | + try: |
| 80 | + article_id = int(line[1]) |
| 81 | + username = line[3].encode('utf-8') |
| 82 | + ns = int(line[4]) |
| 83 | + date = text_utils.convert_timestamp_to_datetime_utc(line[6]) |
| 84 | + md5 = line[7] |
| 85 | + revert = int(line[8]) |
| 86 | + bot = int(line[9]) |
| 87 | + cur_size = int(line[10]) |
| 88 | + delta = int(line[11]) |
87 | 89 | |
88 | | - data = {'date': date, |
89 | | - 'article': article_id, |
90 | | - 'username': username, |
91 | | - 'ns': ns, |
92 | | - 'hash': md5, |
93 | | - 'revert':revert, |
94 | | - 'cur_size':cur_size, |
95 | | - 'delta':delta, |
96 | | - 'bot':bot |
97 | | - } |
| 90 | + data = {'date': date, |
| 91 | + 'article': article_id, |
| 92 | + 'username': username, |
| 93 | + 'ns': ns, |
| 94 | + 'hash': md5, |
| 95 | + 'revert':revert, |
| 96 | + 'cur_size':cur_size, |
| 97 | + 'delta':delta, |
| 98 | + 'bot':bot |
| 99 | + } |
| 100 | + except: |
| 101 | + print line, len(line) |
| 102 | + return {} |
98 | 103 | return data |
99 | 104 | |
100 | 105 | |