r85649 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r85648‎ | r85649 | r85650 >
Date:22:41, 7 April 2011
Author:rfaulk
Status:deferred
Tags:
Comment:
added documentation, more descriptive variables, and fixed some issues in the logic
Modified paths:
  • /trunk/tools/editor_trends/analyses/plugins/taxonomy_burnout.py (modified) (history)

Diff [purge]

Index: trunk/tools/editor_trends/analyses/plugins/taxonomy_burnout.py
@@ -18,44 +18,109 @@
1919 __date__ = '2011-01-25'
2020 __version__ = '0.1'
2121
 22+from datetime import date
2223
23 -def taxonomy_burnout(var, editor, **kwargs):
24 - '''
 24+'''
 25+ taxonomy_burnout
 26+ ================
 27+
 28+ This is a Taxonomy project metric. The "Editor Burnout" metric is intended to measure
 29+ editors that make a large number of edits in a single month and then make relatively few
 30+ in the following months. The exact number of edits and months are defined by the
 31+ following gloabal variables:
 32+
 33+ ONE_MONTH_EDIT_COUNT_CUTOFF
 34+ MONTHS_AFTER_CUTOFF_TO_RECORD_BURNOUT
 35+
 36+ While the average number of edits per month beyond MONTHS_AFTER_CUTOFF_TO_RECORD_BURNOUT
 37+ that classify an editor as a "burned out" is defined in:
 38+
 39+ CUTOFF_RATE_FOR_BURNOUT_EDITORS
 40+
 41+ More documentation may be found at:
 42+
 43+ http://meta.wikimedia.org/wiki/Contribution_Taxonomy_Project/Research_Questions
 44+
2545 If you have questions about how to use this plugin, please visit:
2646 http://meta.wikimedia.org/wiki/Wikilytics_Plugins
27 - '''
 47+
 48+'''
 49+def taxonomy_burnout(var, editor, **kwargs):
 50+
 51+ """ These parameters can be used to tune the burnout analyses """
 52+ global MONTHS_AFTER_CUTOFF_TO_RECORD_BURNOUT
 53+ global CUTOFF_RATE_FOR_BURNOUT_EDITORS
 54+ global ONE_MONTH_EDIT_COUNT_CUTOFF
 55+
 56+ MONTHS_AFTER_CUTOFF_TO_RECORD_BURNOUT = 6
 57+ CUTOFF_RATE_FOR_BURNOUT_EDITORS = 10
 58+ ONE_MONTH_EDIT_COUNT_CUTOFF = 149
 59+
 60+
 61+ """ Record editor properties """
2862 new_wikipedian = editor['new_wikipedian']
2963 edits = editor['edit_count']
30 - cutoff = kwargs.get('cutoff', 149)
 64+ cutoff = kwargs.get('cutoff', ONE_MONTH_EDIT_COUNT_CUTOFF)
3165 username = editor['username']
3266
 67+ made_cutoff = False
3368 burnout = False
34 - sum = 0.0
35 - count = 0.0
 69+
 70+ total_edits_since_cutoff = 0.0
 71+ total_months_since_cutoff = 0.0
 72+
 73+ zero_edit_months = dict()
3674
 75+ """
 76+ new_wikipedian is False if the user is not classified as a wikipedian
 77+ and returns the date of their becoming a Wikipedian otherwise
 78+
 79+ In that case iterate through the months to present trying to find:
 80+ (1) If they meet ONE_MONTH_EDIT_COUNT_CUTOFF
 81+ (2) If the fall into the burnout category thereafter
 82+ """
 83+
3784 if new_wikipedian:
3885 years = edits.keys()
 86+
3987 for year in years:
4088 months = edits[year].keys()
 89+
4190 for month in months:
42 - try:
43 - if edits[year][month].get('0', 0) > cutoff:
44 - burnout = True
45 - if burnout == True:
46 - n = edits[year][month].get('0', 0)
47 - sum += n
48 - if n > 0:
49 - count += 1.0
50 - except (AttributeError, KeyError):
51 - print 'Editor %s does not have data for year: %s and month %s. Data: %s' \
52 - % (username, year, month, edits[year])
 91+
 92+ if edits[year][month].get('0', 0) > cutoff:
 93+ made_cutoff = True
 94+ if made_cutoff == True:
 95+
 96+ """ Count every month after the cutoff is made """
 97+ total_months_since_cutoff += 1.0
 98+
 99+ """ Handle cases where no edits were recorded for a given month """
 100+ try:
 101+ edit_count_month = edits[year][month].get('0', 0)
 102+ except (AttributeError, KeyError):
 103+ edit_count_month = 0
 104+
 105+ """ Record that this editor had no edits on this month """
 106+ try:
 107+ zero_edit_months[username].append(date(year, month, 1))
 108+ except KeyError:
 109+ zero_edit_months[username] = list()
 110+ zero_edit_months[username].append(date(year, month, 1))
 111+
 112+ total_edits_since_cutoff = total_edits_since_cutoff + edit_count_month
 113+
 114+ if total_edits_since_cutoff / total_months_since_cutoff < CUTOFF_RATE_FOR_BURNOUT_EDITORS and \
 115+ total_months_since_cutoff > MONTHS_AFTER_CUTOFF_TO_RECORD_BURNOUT and burnout == False:
 116+ burnout = True
 117+ burnout_date = date(year, month, 1)
 118+
 119+ if burnout:
 120+ avg_edit = total_edits_since_cutoff / total_months_since_cutoff
53121
54 - if burnout and sum / count > 10:
55 - avg_edit = sum / count
56 -
57122 try:
58 - var.add(new_wikipedian, avg_edit, {'username' : username})
59 - except Exception, error:
 123+ var.add(burnout_date, avg_edit, {'username' : username})
 124+ except Exception as error:
60125 print 'user: %s error: %s' % (username, error)
61126
62127 return var