Index: trunk/tools/editor_trends/etl/adhoc/user_registration_date.py |
— | — | @@ -18,29 +18,35 @@ |
19 | 19 | __version__ = '0.1' |
20 | 20 | |
21 | 21 | import sys |
| 22 | +import os |
22 | 23 | from datetime import datetime |
23 | 24 | if '..' not in sys.path: |
24 | | - sys.path.append('../../') |
25 | | - |
| 25 | + sys.path.append('..%s..%s' % (os.sep, os.sep)) |
| 26 | + |
26 | 27 | from classes import storage |
| 28 | +from classes import settings |
27 | 29 | |
28 | | -location = '/Users/diederik/Desktop/d_20110502.tsv' |
29 | | -fh = open(location, 'r') |
| 30 | +rts = settings.Settings() |
30 | 31 | db = storage.init_database('mongo', 'wikilytics', 'enwiki_editors_dataset') |
| 32 | +location = os.path.join(rts.csv_location, 'd_20110502.tsv') |
31 | 33 | |
| 34 | +fh = open(location, 'r') |
32 | 35 | for i, line in enumerate(fh): |
33 | | - if i ==0: |
| 36 | + if i == 0: |
34 | 37 | continue |
35 | 38 | line = line.strip() |
36 | | - line = line.replace("'",'') |
| 39 | + line = line.replace("'", '') |
37 | 40 | line = line.split('\t') |
38 | | - id =line[0] |
39 | | - id = int(id[:-1]) |
40 | | - #date1=eval(line[1]) |
| 41 | + id = line[0] |
| 42 | + id = id[:-1] |
41 | 43 | if line[1] == 'None': |
42 | 44 | continue |
43 | 45 | date = datetime.strptime(line[1][:8], '%Y%m%d') |
44 | | - db.update('id', id, {'reg_date': date}) |
45 | | - |
| 46 | + if i % 1000 == 0: |
| 47 | + print 'Updated user %s' % i |
| 48 | + db.update('editor', id, {'reg_date': date}) |
| 49 | +fh.close() |
46 | 50 | |
47 | | -fh.close() |
\ No newline at end of file |
| 51 | +print 'Adding index' |
| 52 | +db_dataset.add_index('reg_date') |
| 53 | +print 'Done.' |