r86950 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r86949‎ | r86950 | r86951 >
Date:16:09, 26 April 2011
Author:diederik
Status:deferred
Tags:
Comment:
Handle empty observations correctly.
Modified paths:
  • /trunk/tools/editor_trends/etl/store.py (modified) (history)

Diff [purge]

Index: trunk/tools/editor_trends/etl/store.py
@@ -55,14 +55,15 @@
5656 fh = file_utils.create_txt_filehandle(self.rts.sorted, filename,
5757 'r', 'utf-8')
5858 for line in file_utils.read_raw_data(fh):
59 - #if len(line) == 12:
 59+ if len(line) == 1:
 60+ continue
6061 editor = line[0]
6162 #print 'Parsing %s' % editor
6263 if prev_editor != editor and prev_editor != -1:
6364 editor_cache.add(prev_editor, 'NEXT')
6465
6566 data = prepare_data(line)
66 - print editor, data['username']
 67+ #print editor, data['username']
6768 editor_cache.add(editor, data)
6869 prev_editor = editor
6970 fh.close()
@@ -74,26 +75,30 @@
7576 Prepare a single line to store in the database, this entails converting
7677 to proper variable and taking care of the encoding.
7778 '''
78 - article_id = int(line[1])
79 - username = line[3].encode('utf-8')
80 - ns = int(line[4])
81 - date = text_utils.convert_timestamp_to_datetime_utc(line[6])
82 - md5 = line[7]
83 - revert = int(line[8])
84 - bot = int(line[9])
85 - cur_size = int(line[10])
86 - delta = int(line[11])
 79+ try:
 80+ article_id = int(line[1])
 81+ username = line[3].encode('utf-8')
 82+ ns = int(line[4])
 83+ date = text_utils.convert_timestamp_to_datetime_utc(line[6])
 84+ md5 = line[7]
 85+ revert = int(line[8])
 86+ bot = int(line[9])
 87+ cur_size = int(line[10])
 88+ delta = int(line[11])
8789
88 - data = {'date': date,
89 - 'article': article_id,
90 - 'username': username,
91 - 'ns': ns,
92 - 'hash': md5,
93 - 'revert':revert,
94 - 'cur_size':cur_size,
95 - 'delta':delta,
96 - 'bot':bot
97 - }
 90+ data = {'date': date,
 91+ 'article': article_id,
 92+ 'username': username,
 93+ 'ns': ns,
 94+ 'hash': md5,
 95+ 'revert':revert,
 96+ 'cur_size':cur_size,
 97+ 'delta':delta,
 98+ 'bot':bot
 99+ }
 100+ except:
 101+ print line, len(line)
 102+ return {}
98103 return data
99104
100105