r80868 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r80867‎ | r80868 | r80869 >
Date:17:00, 24 January 2011
Author:diederik
Status:deferred
Tags:
Comment:
Generic log functionality that can log to Mongo and / or CSV file.
Modified paths:
  • /trunk/tools/editor_trends/utils/log.py (modified) (history)

Diff [purge]

Index: trunk/tools/editor_trends/utils/log.py
@@ -13,12 +13,13 @@
1414 '''
1515
1616 __author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ])
17 -__author__email = 'dvanliere at gmail dot com'
 17+__email__ = 'dvanliere at gmail dot com'
1818 __date__ = '2011-01-11'
1919 __version__ = '0.1'
2020
21 -
 21+import datetime
2222 import sys
 23+import progressbar
2324 sys.path.append('..')
2425
2526 import configuration
@@ -26,10 +27,146 @@
2728
2829 from database import db
2930
30 -def log_to_mongo(project, task, timer, type='start'):
 31+def log_to_mongo(properties, jobtype, task, timer, event='start'):
3132 conn = db.init_mongo_db('wikilytics')
 33+ created = datetime.datetime.now()
 34+ hash = '%s_%s' % (properties.project, properties.hash)
3235 coll = conn['jobs']
33 - if type == 'start':
34 - coll.save({'project': project, 'tasks': {task: {'start': timer.t0, 'in_progress': True}}})
35 - if type == 'finish':
36 - coll.save({'project': project, 'tasks': {task: {'finish': timer.t1, 'in_progress': False}}})
 36+
 37+ job = coll.find_one({'hash': hash})
 38+
 39+ if job == None:
 40+ if jobtype == 'dataset':
 41+ _id = coll.save({'hash': hash, 'created': created, 'finished': False,
 42+ 'language_code': properties.language_code,
 43+ 'project': properties.short_project,
 44+ 'in_progress': True, 'jobtype': jobtype,
 45+ 'tasks': {}})
 46+
 47+
 48+ elif jobtype == 'chart':
 49+ _id = coll.save({'hash': hash, 'created': created,
 50+ 'jobtype': jobtype,
 51+ 'project': properties.project,
 52+ 'language_code': properties.language_code,
 53+ 'tasks': {}})
 54+
 55+ job = coll.find_one({'_id': _id})
 56+
 57+ tasks = job['tasks']
 58+ t = tasks.get(task, {})
 59+ if event == 'start':
 60+ t['start'] = timer.t0
 61+ t['in_progress'] = True
 62+ tasks[task] = t
 63+ coll.update({'hash': hash}, {'$set': {'tasks': tasks}})
 64+ elif event == 'finish':
 65+ t['finish'] = timer.t1
 66+ t['in_progress'] = False
 67+ tasks[task] = t
 68+ if task == 'transform' or jobtype == 'chart': #final task, set entire task to finished
 69+ coll.update({'hash': hash}, {'$set': {'tasks': tasks,
 70+ 'in_progress': False,
 71+ 'finished': True}})
 72+ else:
 73+ coll.update({'hash': hash}, {'$set': {'tasks': tasks}})
 74+
 75+
 76+def log_to_csv(logger, settings, **kwargs):
 77+ '''
 78+ Writes detailed log information to logs / projectname_date.csv
 79+ '''
 80+ message = kwargs.pop('message')
 81+ verb = kwargs.pop('verb')
 82+ function = kwargs.pop('function')
 83+ logger.debug('%s\tStarting %s' \
 84+ % (datetime.datetime.now(), function.func_name))
 85+ if message:
 86+ logger.debug('%s\t%s' % (datetime.datetime.now(), message))
 87+
 88+ max_length = max([len(prop) for prop in kwargs if type(prop) != type(True)])
 89+ max_tabs = max_length // settings.tab_width
 90+ res = max_length % settings.tab_width
 91+ if res > 0:
 92+ max_tabs += 1
 93+ pos = max_tabs * settings.tab_width
 94+ for prop in kwargs:
 95+ if verb:
 96+ logger.debug('%s\tAction: %s\tSetting: %s' \
 97+ % (datetime.datetime.now(), verb, kwargs[prop]))
 98+ else:
 99+ tabs = (pos - len(prop)) // settings.tab_width
 100+ res = len(prop) % settings.tab_width
 101+ if res > 0 or tabs == 0:
 102+ tabs += 1
 103+ tabs = ''.join(['\t' for t in xrange(tabs)])
 104+ logger.debug('%s\t\tKey: %s%sSetting: %s' \
 105+ % (datetime.datetime.now(),
 106+ prop,
 107+ tabs,
 108+ kwargs[prop]))
 109+
 110+
 111+def init_progressbar_widgets(description):
 112+ widgets = ['%s: ' % description, progressbar.Percentage(), ' ',
 113+ progressbar.Bar(marker=progressbar.RotatingMarker()), ' ',
 114+ progressbar.ETA(), ' ', progressbar.FileTransferSpeed()]
 115+ return widgets
 116+
 117+
 118+# error tracking related functions
 119+def track_errors(xml_buffer, error, file, messages):
 120+ text = extract_offending_string(xml_buffer.getvalue(), error)
 121+
 122+ vars = {}
 123+ vars['file'] = file
 124+ vars['error'] = error
 125+ vars['text'] = text
 126+ #print file, error, text
 127+ key = remove_error_specific_information(error)
 128+ if key not in messages:
 129+ messages[key] = {}
 130+ if messages[key] == {}:
 131+ c = 0
 132+ else:
 133+ counters = messages[key].keys()
 134+ counters.sort()
 135+ counters.reverse()
 136+ c = counters[-1]
 137+
 138+ messages[key][c] = {}
 139+ for var in vars:
 140+ messages[key][c][var] = vars[var]
 141+
 142+ return messages
 143+
 144+
 145+def report_error_messages(messages, function):
 146+ store_object(messages, settings.log_location, function.func_name)
 147+ errors = messages.keys()
 148+ for error in errors:
 149+ for key, value in messages[error].iteritems():
 150+ print error, key, value
 151+
 152+
 153+def remove_error_specific_information(e):
 154+ pos = e.args[0].find('line')
 155+ if pos > -1:
 156+ return e.args[0][:pos]
 157+ else:
 158+ return e.args[0]
 159+
 160+
 161+def extract_offending_string(text, error):
 162+ '''
 163+ This function determines the string that causes an error when feeding it to
 164+ the XML parser. This is only useful for debugging purposes.
 165+ '''
 166+ location = re.findall(RE_ERROR_LOCATION, error.args[0])
 167+ if location != []:
 168+ location = int(location[0]) - 1
 169+ text = text.split('\n')[location]
 170+ text = text.decode('utf-8')
 171+ return text
 172+ else:
 173+ return ''

Status & tagging log