r88348 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r88347‎ | r88348 | r88349 >
Date:20:49, 17 May 2011
Author:diederik
Status:deferred
Tags:
Comment:
Added index to registration date
Modified paths:
  • /trunk/tools/editor_trends/etl/adhoc/user_registration_date.py (modified) (history)

Diff [purge]

Index: trunk/tools/editor_trends/etl/adhoc/user_registration_date.py
@@ -18,29 +18,35 @@
1919 __version__ = '0.1'
2020
2121 import sys
 22+import os
2223 from datetime import datetime
2324 if '..' not in sys.path:
24 - sys.path.append('../../')
25 -
 25+ sys.path.append('..%s..%s' % (os.sep, os.sep))
 26+
2627 from classes import storage
 28+from classes import settings
2729
28 -location = '/Users/diederik/Desktop/d_20110502.tsv'
29 -fh = open(location, 'r')
 30+rts = settings.Settings()
3031 db = storage.init_database('mongo', 'wikilytics', 'enwiki_editors_dataset')
 32+location = os.path.join(rts.csv_location, 'd_20110502.tsv')
3133
 34+fh = open(location, 'r')
3235 for i, line in enumerate(fh):
33 - if i ==0:
 36+ if i == 0:
3437 continue
3538 line = line.strip()
36 - line = line.replace("'",'')
 39+ line = line.replace("'", '')
3740 line = line.split('\t')
38 - id =line[0]
39 - id = int(id[:-1])
40 - #date1=eval(line[1])
 41+ id = line[0]
 42+ id = id[:-1]
4143 if line[1] == 'None':
4244 continue
4345 date = datetime.strptime(line[1][:8], '%Y%m%d')
44 - db.update('id', id, {'reg_date': date})
45 -
 46+ if i % 1000 == 0:
 47+ print 'Updated user %s' % i
 48+ db.update('editor', id, {'reg_date': date})
 49+fh.close()
4650
47 -fh.close()
\ No newline at end of file
 51+print 'Adding index'
 52+db_dataset.add_index('reg_date')
 53+print 'Done.'