Index: trunk/tools/editor_trends/manage.py |
— | — | @@ -318,7 +318,7 @@ |
319 | 319 | The data is ready to be stored once the sorted function has completed. This |
320 | 320 | function starts storing data in MongoDB. |
321 | 321 | ''' |
322 | | - print 'Start storing data in MongoDB' |
| 322 | + print 'Start storing data in %s' % rts.storage |
323 | 323 | stopwatch = timer.Timer() |
324 | 324 | log.to_db(rts, 'dataset', 'store', stopwatch, event='start') |
325 | 325 | log.to_csv(logger, rts, 'Start', 'Store', store_launcher) |
— | — | @@ -337,7 +337,7 @@ |
338 | 338 | stopwatch = timer.Timer() |
339 | 339 | log.to_db(rts, 'dataset', 'transform', stopwatch, event='start') |
340 | 340 | log.to_csv(logger, rts, 'Start', 'Transform', transformer_launcher) |
341 | | - transformer.transform_editors_single_launcher(rts) |
| 341 | + transformer.transform_editors_multi_launcher(rts) |
342 | 342 | stopwatch.elapsed() |
343 | 343 | log.to_db(rts, 'dataset', 'transform', stopwatch, event='finish') |
344 | 344 | log.to_csv(logger, rts, 'Finish', 'Transform', transformer_launcher) |
— | — | @@ -381,7 +381,7 @@ |
382 | 382 | |
383 | 383 | for function, callname in functions.iteritems(): |
384 | 384 | if callname not in rts.ignore: |
385 | | - print 'Starting %s' % function.func_name |
| 385 | + print 'Launching %s' % function.func_name |
386 | 386 | res = function(rts, logger) |
387 | 387 | if res == False: |
388 | 388 | sys.exit(False) |
Index: trunk/tools/editor_trends/etl/transformer.py |
— | — | @@ -307,7 +307,10 @@ |
308 | 308 | |
309 | 309 | tasks.join() |
310 | 310 | |
| 311 | + db_dataset.add_index('editor') |
| 312 | + db_dataset.add_index('new_wikipedian') |
311 | 313 | |
| 314 | + |
312 | 315 | def setup_database(rts): |
313 | 316 | ''' |
314 | 317 | Initialize the database, including setting indexes and dropping the older |
— | — | @@ -317,8 +320,8 @@ |
318 | 321 | db_dataset = storage.init_database(rts.storage, rts.dbname, rts.editors_dataset) |
319 | 322 | db_dataset.drop_collection() |
320 | 323 | editors = db_raw.retrieve_distinct_keys('editor') |
321 | | - db_dataset.add_index('editor') |
322 | | - db_dataset.add_index('new_wikipedian') |
| 324 | + #db_dataset.add_index('editor') |
| 325 | + #db_dataset.add_index('new_wikipedian') |
323 | 326 | |
324 | 327 | return db_raw, db_dataset, editors |
325 | 328 | |