Index: trunk/tools/editor_trends/bots/bots.py |
— | — | @@ -34,6 +34,8 @@ |
35 | 35 | from etl import models |
36 | 36 | import models as botmodels |
37 | 37 | |
| 38 | +import cProfile |
| 39 | + |
38 | 40 | try: |
39 | 41 | import psyco |
40 | 42 | psyco.full() |
— | — | @@ -113,30 +115,28 @@ |
114 | 116 | ''' |
115 | 117 | lock = kwargs.get('lock') |
116 | 118 | bots = kwargs.get('bots') |
117 | | - if settings.debug: |
118 | | - messages = {} |
119 | 119 | |
120 | 120 | revisions = xml_nodes.findall('revision') |
121 | 121 | for revision in revisions: |
122 | 122 | contributor = xml.retrieve_xml_node(revision, 'contributor') |
123 | 123 | username = contributor.find('username') |
124 | | - if username == None: |
| 124 | + if username == None or username.text == None: |
125 | 125 | continue |
126 | | - username = xml.extract_text(username, None) |
| 126 | + else: |
| 127 | + username = username.text |
127 | 128 | #print username.encode('utf-8') |
128 | 129 | if username in bots and bots[username].verified == True: |
129 | | - id = contributor.find('id') |
130 | | - id = xml.extract_text(id, None) |
| 130 | + id = contributor.find('id').text |
131 | 131 | bot = bots[username] |
132 | | - bot_dict = convert_object_to_dict(bot, exclude=['time', 'name', 'written']) |
133 | | - bot_dict['_username'] = username |
134 | | - bot_dict['id'] = id |
135 | | - |
| 132 | + |
136 | 133 | if not hasattr(bot, 'written'): |
| 134 | + bot_dict = convert_object_to_dict(bot, exclude=['time', 'name', 'written']) |
| 135 | + bot_dict['_username'] = username |
| 136 | + bot_dict['id'] = id |
137 | 137 | lock.acquire() |
138 | 138 | utils.write_dict_to_csv(bot_dict, fh, write_key=False) |
139 | 139 | lock.release() |
140 | | - bot.written = True |
| 140 | + bot.written = True |
141 | 141 | #bots.pop(username) |
142 | 142 | #if bots == {}: |
143 | 143 | # print 'Found id numbers for all bots.' |
— | — | @@ -156,12 +156,8 @@ |
157 | 157 | #bot = bots.get('PseudoBot') |
158 | 158 | #bot.hours_active() |
159 | 159 | #bot.avg_lag_between_edits() |
160 | | - if settings.debug: |
161 | | - utils.report_error_messages(messages, lookup_bot_userid) |
162 | 160 | |
163 | 161 | |
164 | | - |
165 | | - |
166 | 162 | def bot_launcher(language_code, project, single=False): |
167 | 163 | ''' |
168 | 164 | This function sets the stage to launch bot id detection and collecting data |
— | — | @@ -190,13 +186,17 @@ |
191 | 187 | |
192 | 188 | utils.store_object(bots, settings.binary_location, 'bots.bin') |
193 | 189 | bot_training_dataset(bots) |
| 190 | + store_bots() |
194 | 191 | if bots != {}: |
195 | 192 | print 'The script was unable to retrieve the user id\s for the following %s bots:\n' % len(bots) |
196 | 193 | keys = bots.keys() |
197 | 194 | for key in keys: |
198 | | - print '%s' % key |
| 195 | + try: |
| 196 | + print '%s' % key.encode(settings.encoding) |
| 197 | + except: |
| 198 | + pass |
199 | 199 | |
200 | | - store_bots() |
| 200 | + |
201 | 201 | |
202 | 202 | |
203 | 203 | def bot_training_dataset(bots): |
— | — | @@ -225,12 +225,8 @@ |
226 | 226 | tasks.join() |
227 | 227 | |
228 | 228 | |
229 | | -def bot_detector_launcher(): |
230 | | - bots = retrieve_bots() |
231 | | - |
232 | | - |
233 | | - |
234 | 229 | if __name__ == '__main__': |
235 | 230 | language_code = 'en' |
236 | 231 | project = 'wiki' |
237 | | - bot_launcher(language_code, project, single=False) |
| 232 | + #bot_launcher(language_code, project, single=True) |
| 233 | + cProfile.run(bot_launcher(language_code, project, single=False), 'profile') |