r85068 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r85067‎ | r85068 | r85069 >
Date:14:57, 31 March 2011
Author:diederik
Status:deferred
Tags:
Comment:
Incorporated fixes suggested by pylint
Modified paths:
  • /trunk/tools/editor_trends/manage.py (modified) (history)

Diff [purge]

Index: trunk/tools/editor_trends/manage.py
@@ -43,6 +43,10 @@
4444
4545
4646 def show_choices(settings, attr):
 47+ '''
 48+ Show possible choices in console, for example output valid languages or
 49+ valid projects.
 50+ '''
4751 choices = getattr(settings, attr).items()
4852 choices.sort()
4953 choices = ['%s\t%s' % (choice[0], choice[1]) for choice in choices]
@@ -194,13 +198,12 @@
195199 '''
196200 Config launcher is used to reconfigure editor trends toolkit.
197201 '''
198 -# settings.load_configuration()
 202+
199203 pc = projects.ProjectContainer()
200204 if not os.path.exists('wiki.cfg') or rts.force:
201205 config = ConfigParser.RawConfigParser()
202206 project = None
203207 language = None
204 - #language_map = languages.language_map()
205208 working_directory = raw_input('Please indicate where you installed Editor Trends Analytics.\nCurrent location is %s\nPress Enter to accept default.\n' % os.getcwd())
206209 input_location = raw_input('Please indicate where to store the Wikipedia dump files.\nDefault is: %s\nPress Enter to accept default.\n' % rts.input_location)
207210
@@ -234,7 +237,14 @@
235238 rts.working_directory = config.get('file_locations', 'working_directory')
236239 rts.input_location = config.get('file_locations', 'input_location')
237240
 241+ log.log_to_csv(logger, rts, 'New configuration', 'Creating',
 242+ config_launcher,
 243+ working_directory=working_directory,
 244+ input_location=input_location,
 245+ project=project,
 246+ language=language,)
238247
 248+
239249 def downloader_launcher(rts, logger):
240250 '''
241251 This launcher calls the dump downloader to download a Wikimedia dump file.
@@ -242,10 +252,9 @@
243253 print 'Start downloading'
244254 stopwatch = timer.Timer()
245255 log.log_to_mongo(rts, 'dataset', 'download', stopwatch, event='start')
246 - res = downloader.launcher(rts, logger)
 256+ downloader.launcher(rts, logger)
247257 stopwatch.elapsed()
248258 log.log_to_mongo(rts, 'dataset', 'download', stopwatch, event='finish')
249 - return res
250259
251260
252261 def extract_launcher(rts, logger):
@@ -257,9 +266,11 @@
258267 print 'Extracting data from XML'
259268 stopwatch = timer.Timer()
260269 log.log_to_mongo(rts, 'dataset', 'extract', stopwatch, event='start')
 270+ log.log_to_csv(logger, rts, 'Start', 'Extract', extract_launcher)
261271 enricher.launcher(rts)
262272 stopwatch.elapsed()
263273 log.log_to_mongo(rts, 'dataset', 'extract', stopwatch, event='finish')
 274+ log.log_to_csv(logger, rts, 'Finish', 'Extract', extract_launcher)
264275
265276
266277 def sort_launcher(rts, logger):
@@ -270,15 +281,11 @@
271282 print 'Start sorting data'
272283 stopwatch = timer.Timer()
273284 log.log_to_mongo(rts, 'dataset', 'sort', stopwatch, event='start')
274 -# write_message_to_log(logger, settings,
275 -# message=None,
276 -# verb=None,
277 -# location=properties.location,
278 -# input=properties.txt,
279 -# output=properties.sorted)
 285+ log.log_to_csv(logger, rts, 'Start', 'Sort', sort_launcher)
280286 sort.launcher(rts)
281287 stopwatch.elapsed()
282288 log.log_to_mongo(rts, 'dataset', 'sort', stopwatch, event='finish')
 289+ log.log_to_csv(logger, rts, 'Finish', 'Sort', sort_launcher)
283290
284291
285292 def store_launcher(rts, logger):
@@ -289,99 +296,97 @@
290297 print 'Start storing data in MongoDB'
291298 stopwatch = timer.Timer()
292299 log.log_to_mongo(rts, 'dataset', 'store', stopwatch, event='start')
 300+ log.log_to_csv(logger, rts, 'Start', 'Store', store_launcher)
293301 db.cleanup_database(rts.dbname, logger)
294 -# write_message_to_log(logger, settings,
295 -# message=None,
296 -# verb='Storing',
297 -# function=properties.function,
298 -# location=properties.location,
299 -# input=properties.sorted,
300 -# project=properties.full_project,
301 -# collection=properties.collection)
302 -# for key in properties:
303 -# print key, getattr(properties, key)
304302 store.launcher(rts)
305303 stopwatch.elapsed()
306304 log.log_to_mongo(rts, 'dataset', 'store', stopwatch, event='finish')
 305+ log.log_to_csv(logger, rts, 'Finish', 'Store', store_launcher)
307306
308307
309308 def transformer_launcher(rts, logger):
 309+ '''
 310+ This function derives a number of variables from the editors_raw collection
 311+ this will significantly improve processing speed.
 312+ '''
310313 print 'Start transforming dataset'
311314 stopwatch = timer.Timer()
312315 log.log_to_mongo(rts, 'dataset', 'transform', stopwatch, event='start')
 316+ log.log_to_csv(logger, rts, 'Start', 'Transform', transformer_launcher)
313317 db.cleanup_database(rts.dbname, logger, 'dataset')
314 -# write_message_to_log(logger, settings,
315 -# message=None,
316 -# verb='Transforming',
317 -# project=properties.project,
318 -# collection=properties.collection)
319318 transformer.transform_editors_single_launcher(rts)
320319 stopwatch.elapsed()
321320 log.log_to_mongo(rts, 'dataset', 'transform', stopwatch, event='finish')
 321+ log.log_to_csv(logger, rts, 'Finish', 'Transform', transformer_launcher)
322322
323323
324324 def dataset_launcher(rts, logger):
325 - print 'Start exporting dataset'
 325+ '''
 326+ Dataset launcher is the entry point to generate datasets from the command
 327+ line.
 328+ '''
 329+ print 'Start generating dataset'
326330 stopwatch = timer.Timer()
327331 log.log_to_mongo(rts, 'dataset', 'export', stopwatch, event='start')
328332
329333 for chart in rts.charts:
330334 analyzer.generate_chart_data(rts, chart, **rts.keywords)
331 -# write_message_to_log(logger, settings,
332 -# message=None,
333 -# verb='Exporting',
334 -# target=target,
335 -# dbname=properties.full_project,
336 -# collection=properties.collection)
 335+ log.log_to_csv(logger, rts, 'Start', 'Dataset', dataset_launcher,
 336+ chart=chart,
 337+ dbname=rts.dbname,
 338+ collection=rts.editors_dataset)
337339 stopwatch.elapsed()
338340 log.log_to_mongo(rts, 'dataset', 'export', stopwatch, event='finish')
 341+ log.log_to_csv(logger, rts, 'Finish', 'Dataset', dataset_launcher)
339342
340343
341344 def cleanup(rts, logger):
342 - directories = properties.directories[1:]
 345+ '''
 346+ This function deletes all files of a previous Wikilytics run.
 347+ '''
 348+ directories = rts.directories[1:]
 349+
 350+ #remove directories
343351 for directory in directories:
344 - write_message_to_log(logger, setting,
345 - message=None,
346 - verb='Deleting',
347 - dir=directory)
348352 file_utils.delete_file(directory, '', directory=True)
 353+ log.log_to_csv(logger, rts,
 354+ message='Deleting %s' % directory,
 355+ verb='Deleting',
 356+ function=cleanup)
349357
350 - write_message_to_log(logger, settings,
351 - message=None,
352 - verb='Creating',
353 - dir=directories)
354 - settings.verify_environment(directories)
 358+ #create directories
 359+ rts.verify_environment(directories)
 360+ log.log_to_csv(logger, rts, message='Deleting %s' % directory,
 361+ verb='Creating', function=rts.verify_environment)
355362
356 - filename = '%s%s' % (properties.full_project, '_editor.bin')
357 - write_message_to_log(logger, settings,
358 - message=None,
359 - verb='Deleting',
360 - filename=filename)
361 - file_utils.delete_file(settings.binary_location, filename)
 363+ #remove binary files
 364+ filename = '%s%s' % (rts.full_project, '_editor.bin')
 365+ file_utils.delete_file(rts.binary_location, filename)
 366+ log.log_to_csv(logger, rts, message='Deleting %s' % filename,
 367+ verb='Deleting',
 368+ function=file_utils.delete_file)
362369
363370
 371+
364372 def all_launcher(rts, logger):
365 - print 'The entire data processing chain has been called, this will take a \
366 - couple of hours (at least) to complete.'
 373+ '''
 374+ The entire data processing chain has been called, this will take a
 375+ couple of hours (at least) to complete.
 376+ '''
 377+
367378 stopwatch = timer.Timer()
368379 log.log_to_mongo(rts, 'dataset', 'all', stopwatch, event='start')
369380 print 'Start of building %s %s dataset.' % (rts.language.name, rts.project)
370381
371 -# write_message_to_log(logger, settings,
372 -# message=message,
373 -# verb=None,
374 -# full_project=properties.full_project,
375 -# ignore=properties.ignore,
376 -# clean=properties.clean)
377382 if rts.clean:
 383+ print 'Removing previous datasets...'
378384 cleanup(rts, logger)
379385
380386 functions = ordered_dict.OrderedDict(((downloader_launcher, 'download'),
381387 (extract_launcher, 'extract'),
382388 (sort_launcher, 'sort'),
383389 (store_launcher, 'store'),
384 - (transformer_launcher, 'transform'),
385 - (dataset_launcher, 'dataset')))
 390+ (transformer_launcher, 'transform')))
386391
387392 for function, callname in functions.iteritems():
388393 if callname not in rts.ignore:
@@ -390,164 +395,29 @@
391396 if res == False:
392397 sys.exit(False)
393398 elif res == None:
394 - print 'Function %s does not return a status, \
395 - implement NOW' % function.func_name
 399+ pass
396400 stopwatch.elapsed()
397401 log.log_to_mongo(rts, 'dataset', 'all', stopwatch, event='finish')
398402
399403
400404
401405 def about_statement():
 406+ '''
 407+ prints generic version information.
 408+ '''
402409 print ''
403410 print 'Wikilytics is (c) 2010-2011 by the Wikimedia Foundation.'
404411 print 'Written by Diederik van Liere (dvanliere@gmail.com).'
405 - print '''This software comes with ABSOLUTELY NO WARRANTY. This is
406 - free software, and you are welcome to distribute it under certain
407 - conditions.'''
 412+ print '''This software comes with ABSOLUTELY NO WARRANTY. This is free
 413+ software, and you are welcome to distribute it under certain conditions.'''
408414 print 'See the README.1ST file for more information.'
409415 print ''
410416
411417
412 -def init_args_parser():
 418+def main():
413419 '''
414 - Entry point for parsing command line and launching the needed function(s).
 420+ This function initializes the command line parser.
415421 '''
416 - language = languages.init()
417 - project = projects.init()
418 - pjc = projects.ProjectContainer()
419 - rts = runtime_settings.RunTimeSettings(project, language)
420 -
421 - #Init Argument Parser
422 - parser = ArgumentParser(prog='manage', formatter_class=RawTextHelpFormatter)
423 - subparsers = parser.add_subparsers(help='sub - command help')
424 -
425 - #SHOW LANGUAGES
426 - parser_languages = subparsers.add_parser('show_languages',
427 - help='Overview of all valid languages.')
428 - parser_languages.add_argument('-s', '--startswith',
429 - action='store',
430 - help='Enter the first letter of a language to see which languages are \
431 - available.')
432 - parser_languages.set_defaults(func=language.show_languages, args=[project])
433 -
434 - #CONFIG
435 - parser_config = subparsers.add_parser('config',
436 - help='The config sub command allows you set the data location of where \
437 - to store files.')
438 - parser_config.set_defaults(func=config_launcher)
439 - parser_config.add_argument('-f', '--force',
440 - action='store_true',
441 - help='Reconfigure Editor Toolkit (this will replace wiki.cfg')
442 -
443 - #DOWNLOAD
444 - parser_download = subparsers.add_parser('download',
445 - help='The download sub command allows you to download a Wikipedia dump\
446 - file.')
447 - parser_download.set_defaults(func=downloader_launcher)
448 -
449 - #EXTRACT
450 - parser_create = subparsers.add_parser('extract',
451 - help='The store sub command parsers the XML chunk files, extracts the \
452 - information and stores it in a MongoDB.')
453 - parser_create.set_defaults(func=extract_launcher)
454 -
455 - #SORT
456 - parser_sort = subparsers.add_parser('sort',
457 - help='By presorting the data, significant processing time reductions \
458 - are achieved.')
459 - parser_sort.set_defaults(func=sort_launcher)
460 -
461 - #STORE
462 - parser_store = subparsers.add_parser('store',
463 - help='The store sub command parsers the XML chunk files, extracts the \
464 - information and stores it in a MongoDB.')
465 - parser_store.set_defaults(func=store_launcher)
466 -
467 - #TRANSFORM
468 - parser_transform = subparsers.add_parser('transform',
469 - help='Transform the raw datatable to an enriched dataset that can be \
470 - exported.')
471 - parser_transform.set_defaults(func=transformer_launcher)
472 -
473 - #DATASET
474 - parser_dataset = subparsers.add_parser('dataset',
475 - help='Create a dataset from the MongoDB and write it to a csv file.')
476 - parser_dataset.set_defaults(func=dataset_launcher)
477 - parser_dataset.add_argument('-c', '--charts',
478 - action='store',
479 - help='Should be a valid function name that matches one of the plugin functions',
480 - default=inventory.available_analyses()['new_editor_count'])
481 -
482 - parser_dataset.add_argument('-k', '--keywords',
483 - action='store',
484 - help='Add additional keywords in the format keyword1=value1,keyword2=value2',
485 - default='')
486 -
487 - #ALL
488 - parser_all = subparsers.add_parser('all',
489 - help='The all sub command runs the download, split, store and dataset \
490 - commands.\n\nWARNING: THIS COULD TAKE DAYS DEPENDING ON THE \
491 - CONFIGURATION OF YOUR MACHINE AND THE SIZE OF THE WIKIMEDIA DUMP FILE.')
492 - parser_all.set_defaults(func=all_launcher)
493 - parser_all.add_argument('-e', '--except',
494 - action='store',
495 - help='Should be a list of functions that are to be ignored when \
496 - executing all.',
497 - default=[])
498 -
499 - parser_all.add_argument('-n', '--new',
500 - action='store_true',
501 - help='This will delete all previous output and starts from scratch. \
502 - Mostly useful for debugging purposes.',
503 - default=False)
504 -
505 - #DJANGO
506 - parser_django = subparsers.add_parser('django')
507 - parser_django.add_argument('-e', '--except',
508 - action='store',
509 - help='Should be a list of functions that are to be ignored when \
510 - executing all.',
511 - default=[])
512 -
513 - parser.add_argument('-l', '--language',
514 - action='store',
515 - help='Example of valid languages.',
516 - choices=project.supported_languages(),
517 - default=unicode(language.name)
518 - )
519 -
520 - parser.add_argument('-p', '--project',
521 - action='store',
522 - help='Specify the Wikimedia project that you would like to download',
523 - choices=pjc.supported_projects(),
524 - default='wiki')
525 -
526 - parser.add_argument('-c', '--collection',
527 - action='store',
528 - help='Name of MongoDB collection',
529 - default='editors_raw')
530 -
531 - parser.add_argument('-o', '--location',
532 - action='store',
533 - help='Indicate where you want to store the downloaded file.',
534 - #default=settings.input_location)
535 - default=rts.input_location)
536 -
537 - parser.add_argument('-ns', '--namespace',
538 - action='store',
539 - help='A list of namespaces to include for analysis.',
540 - default='0')
541 -
542 - parser.add_argument('-f', '--file',
543 - action='store',
544 - choices=rts.file_choices,
545 - help='Indicate which dump you want to download. Valid choices are:\n \
546 - %s' % ''.join([f + ',\n' for f in rts.file_choices]),
547 - default='stub-meta-history.xml.gz')
548 -
549 - return project, language, parser
550 -
551 -def main():
552422 project, language, parser, = init_args_parser()
553423 args = parser.parse_args()
554424 rts = runtime_settings.RunTimeSettings(project, language, args)