r90977 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r90976‎ | r90977 | r90978 >
Date:16:33, 28 June 2011
Author:diederik
Status:deferred
Tags:
Comment:
Kaggle datafile generator final.
Modified paths:
  • /trunk/tools/editor_trends/kaggle/training_db.py (modified) (history)

Diff [purge]

Index: trunk/tools/editor_trends/kaggle/training_db.py
@@ -253,32 +253,34 @@
254254 namespaces = IDGenerator()
255255 print 'Parsing revisions...'
256256 db_raw = storage.init_database('mongo', 'wikilytics', 'enwiki_editors_raw')
257 -seen_editors = {}
258 -editors = {}
259 -x = 1
260 -for editor in post_editors:
 257+#seen_editors = {}
 258+#editors = {}
 259+#x = 1
 260+#for editor in post_editors:
 261+# #print editor
 262+# editors[x] = editor
 263+# x += 2
 264+#x = 0
 265+#z = len(post_editors)
 266+#for y, editor in enumerate(pre_editors):
 267+# #print editor
 268+# editors[x] = editor
 269+# x += 2
 270+# if z == y:
 271+# break
 272+#
 273+#editor_keys = editors.keys()
 274+#editor_keys.sort()
 275+#for key in editor_keys:
 276+# #print editors
 277+# #for editor in editors:
 278+# editor = editors[key]
261279 #print editor
262 - editors[x] = editor
263 - x += 2
264 -x = 0
265 -z = len(post_editors)
266 -for y, editor in enumerate(pre_editors):
267 - #print editor
268 - editors[x] = editor
269 - x += 2
270 - if z == y:
271 - break
272 -
273 -editor_keys = editors.keys()
274 -editor_keys.sort()
275 -for key in editor_keys:
276 - #print editors
277 - #for editor in editors:
278 - editor = editors[key]
279 - #print editor
280 - go = editors_seen.get(editor, True)
281 - if go:
282 - editors_seen[editor] = False
 280+for editors in izip(pre_editors, post_editors):
 281+ for editor in editors:
 282+ #go = editors_seen.get(editor, True)
 283+ #if go:
 284+ # editors_seen[editor] = False
283285 user_id = idg.get_id(editor)
284286 print 'Parsing editor %s (%s) ...' % (editor, user_id)
285287 revisions = db_raw.find({'user_id': str(editor)})