Index: trunk/tools/editor_trends/kaggle/training_db.py |
— | — | @@ -253,32 +253,34 @@ |
254 | 254 | namespaces = IDGenerator() |
255 | 255 | print 'Parsing revisions...' |
256 | 256 | db_raw = storage.init_database('mongo', 'wikilytics', 'enwiki_editors_raw') |
257 | | -seen_editors = {} |
258 | | -editors = {} |
259 | | -x = 1 |
260 | | -for editor in post_editors: |
| 257 | +#seen_editors = {} |
| 258 | +#editors = {} |
| 259 | +#x = 1 |
| 260 | +#for editor in post_editors: |
| 261 | +# #print editor |
| 262 | +# editors[x] = editor |
| 263 | +# x += 2 |
| 264 | +#x = 0 |
| 265 | +#z = len(post_editors) |
| 266 | +#for y, editor in enumerate(pre_editors): |
| 267 | +# #print editor |
| 268 | +# editors[x] = editor |
| 269 | +# x += 2 |
| 270 | +# if z == y: |
| 271 | +# break |
| 272 | +# |
| 273 | +#editor_keys = editors.keys() |
| 274 | +#editor_keys.sort() |
| 275 | +#for key in editor_keys: |
| 276 | +# #print editors |
| 277 | +# #for editor in editors: |
| 278 | +# editor = editors[key] |
261 | 279 | #print editor |
262 | | - editors[x] = editor |
263 | | - x += 2 |
264 | | -x = 0 |
265 | | -z = len(post_editors) |
266 | | -for y, editor in enumerate(pre_editors): |
267 | | - #print editor |
268 | | - editors[x] = editor |
269 | | - x += 2 |
270 | | - if z == y: |
271 | | - break |
272 | | - |
273 | | -editor_keys = editors.keys() |
274 | | -editor_keys.sort() |
275 | | -for key in editor_keys: |
276 | | - #print editors |
277 | | - #for editor in editors: |
278 | | - editor = editors[key] |
279 | | - #print editor |
280 | | - go = editors_seen.get(editor, True) |
281 | | - if go: |
282 | | - editors_seen[editor] = False |
| 280 | +for editors in izip(pre_editors, post_editors): |
| 281 | + for editor in editors: |
| 282 | + #go = editors_seen.get(editor, True) |
| 283 | + #if go: |
| 284 | + # editors_seen[editor] = False |
283 | 285 | user_id = idg.get_id(editor) |
284 | 286 | print 'Parsing editor %s (%s) ...' % (editor, user_id) |
285 | 287 | revisions = db_raw.find({'user_id': str(editor)}) |