r90496 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r90495‎ | r90496 | r90497 >
Date:23:20, 20 June 2011
Author:rfaulk
Status:deferred
Tags:
Comment:
Added Documentation
Modified paths:
  • /trunk/fundraiser-statistics/fundraiser-scripts/classes/DataReporting.py (modified) (history)

Diff [purge]

Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/DataReporting.py
@@ -23,7 +23,7 @@
2424 import datetime
2525 import MySQLdb
2626 import pylab
27 -from matplotlib.lines import Line2D
 27+# from matplotlib.lines import Line2D
2828 import HTML
2929 import math
3030
@@ -34,7 +34,7 @@
3535 import Fundraiser_Tools.classes.HypothesisTest as HT
3636
3737
38 -matplotlib.use('Agg')
 38+# matplotlib.use('Agg')
3939
4040
4141
@@ -259,9 +259,7 @@
260260 Usage instructions for executing a report via the IntervalReporting class
261261 """
262262 def usage(self):
263 -
264 - """ !! MODIFY -- include instructions on using **kwargs """
265 -
 263+
266264 print 'Types of queries:'
267265 print ' (1) banner'
268266 print ' (2) LP'
@@ -270,8 +268,20 @@
271269 print " run('20101230160400', '20101230165400', 2, 'banner', 'imp', '20101230JA091_US', ['banner1', 'banner2'])"
272270 print " run('20101230160400', '20101230165400', 2, 'LP', 'views', '20101230JA091_US', [])"
273271 print ''
274 -
 272+ print " Keyword arguments may also be passed to the constructor:"
 273+ print ''
 274+ print " font_size - font size related to plots"
 275+ print " fig_width_pt - width of the plot"
 276+ print " inches_per_pt - define point size relative to screen"
 277+ print " use_labels - whether to include specified labels in plot"
 278+ print " fig_file_format - file format of the image (default = .png)"
 279+ print " plot_type - line or step plot"
 280+ print " item_keys - the item keys expected (only these will be included)"
 281+ print " file_path - override the file path to store the plot output"
 282+ print " query_type - the type of query to use"
 283+
275284 return
 285+
276286
277287 """
278288 Selecting a subset of the key items in a dictionary
@@ -418,14 +428,19 @@
419429 if len(self._item_keys_) > 0:
420430 self._counts_ = self.select_metric_keys(self._counts_)
421431 self._times_ = self.select_metric_keys(self._times_)
422 - print self._counts_
 432+
423433 """ Convert Times to Integers that indicate relative times AND normalize the intervals in case any are missing """
424434 for key in self._times_.keys():
425435 self._times_[key] = TP.normalize_timestamps(self._times_[key], False, 3)
426436 self._times_[key], self._counts_[key] = TP.normalize_intervals(self._times_[key], self._counts_[key], interval)
427437
428 - """ Normalize times """
429 -
 438+ """ If there are missing metrics add them as zeros """
 439+ for label in labels:
 440+
 441+ if not(label in self._times_.keys()):
 442+ self._times_[label] = self._times_[self._times_.keys()[0]]
 443+ self._counts_[label] = [0.0] * len(self._times_[label])
 444+
430445 min_time = min(self._times_)
431446 ranges = [min_time, 0]
432447
@@ -470,13 +485,8 @@
471486
472487 CLASS :: ConfidenceReporting
473488
474 - Reports confidence values on specified metrics
 489+ This class uses ConfidenceReportingLoader and HypothesisTest to execute a confidence analysis over a given campaign.
475490
476 - Types of queries supported:
477 -
478 - report_banner_confidence
479 - report_LP_confidence
480 -
481491 """
482492 class ConfidenceReporting(DataReporting):
483493
@@ -508,22 +518,31 @@
509519 DataReporting.__init__(self, **kwargs)
510520
511521 """
512 - Describes how to run a report !! MODIFY !!
 522+ Describes how to run a confidence report
513523 """
514524 def usage(self):
515 -
 525+
 526+ print 'Method Signature:'
 527+ print " run(self, test_name, query_name, metric_name, campaign, items, start_time, end_time, interval, num_samples)"
516528 print ''
 529+ print 'e.g.'
 530+ print " cr.run('mytest!','report_banner_confidence','don_per_imp', 'C_JMvTXT_smallcountry_WS', ['banner1','banner2'], '20110101000000', '20100101000000', 2, 20)"
 531+ print ''
 532+ print " Keyword arguments may also be specified to modify plots:"
 533+ print ''
 534+ print " font_size - font size related to plots"
 535+ print " fig_width_pt - width of the plot"
 536+ print " inches_per_pt - define point size relative to screen"
 537+ print " use_labels - whether to include specified labels in plot"
 538+ print " fig_file_format - file format of the image (default = .png)"
 539+ print " hyp_test - the type of hypothesis test"
517540
518541 return
519542
520543
521544 """
522 - <description>
 545+ Confidence plotting over test intervals. This plot takes into consideration test intervals displaying the means with error bars over each interval
523546
524 - INPUT:
525 -
526 - RETURN:
527 -
528547 """
529548 def _gen_plot(self,means_1, means_2, std_devs_1, std_devs_2, times_indices, title, xlabel, ylabel, ranges, subplot_index, labels, fname):
530549
@@ -568,6 +587,9 @@
569588 pylab.savefig(self._file_path_ + fname + '.' + self._fig_file_format_, format=self._fig_file_format_)
570589
571590
 591+ """
 592+ Generates a box plot of all of the data. Does not visualize test intervals.
 593+ """
572594 def _gen_box_plot(self, data, title, ylabel, subplot_index, labels, fname):
573595
574596
@@ -600,9 +622,7 @@
601623 pylab.rcParams.update(params)
602624
603625 pylab.grid()
604 - #pylab.ylim(ranges[2], ranges[3])
605 - #pylab.xlim(ranges[0], ranges[1])
606 - # pylab.legend([e1[0], e2[0]], labels,loc=2)
 626+
607627
608628 pylab.ylabel(ylabel)
609629
@@ -610,12 +630,10 @@
611631 pylab.savefig(self._file_path_ + fname + '.' + self._fig_file_format_, format=self._fig_file_format_)
612632
613633 """
614 - Print in Tabular form the means and standard deviation of each group over each
615 - interval
 634+ Print in Tabular form the means and standard deviation of each group over each interval. Provides a detailed numerical output
 635+ of the analysis.
616636
617 - INPUT:
618 -
619 - RETURN:
 637+ RETURN: the winner string, percent increase of the winner for the metric
620638
621639 """
622640 def print_metrics(self, filename, metric_name, means_1, means_2, std_devs_1, std_devs_2, times_indices, labels, test_call):
@@ -624,10 +642,20 @@
625643 file = open(self._file_path_ + filename, 'w')
626644
627645 """ Compute % increase and report """
628 - av_means_1 = sum(means_1) / len(means_1)
629 - av_means_2 = sum(means_2) / len(means_2)
630 - percent_increase = math.fabs(av_means_1 - av_means_2) / min(av_means_1,av_means_2) * 100.0
 646+ try:
 647+ av_means_1 = sum(means_1) / len(means_1)
 648+ av_means_2 = sum(means_2) / len(means_2)
 649+ percent_increase = math.fabs(av_means_1 - av_means_2) / min(av_means_1,av_means_2) * 100.0
631650
 651+ except Exception as inst:
 652+
 653+ print 'Percent increase could not be computed.'
 654+ print type(inst) # the exception instance
 655+ print inst.args # arguments stored in .args
 656+ print inst # __str__ allows args to printed directly
 657+
 658+ percent_increase = 0.0
 659+
632660 """ Compute the average standard deviations """
633661 av_std_dev_1 = 0
634662 av_std_dev_2 = 0
@@ -648,15 +676,9 @@
649677 win_str = '\nThe winner "' + winner + '" had a %.2f%s increase.'
650678 win_str = win_str % (percent_increase, '%')
651679
652 - #print '\nCOMMAND = ' + test_call
653680 file.write('\nCOMMAND = ' + test_call)
654681
655 -
656 -# print '\n\n' + metric_name
657 -# print '\nitem 1 = ' + labels[0]
658 -# print 'item 2 = ' + labels[1]
659 -# print win_str
660 -# print '\ninterval\tmean1\t\tmean2\t\tstddev1\t\tstddev2\n'
 682+
661683 file.write('\n\n' + metric_name)
662684 file.write('\nitem 1 = ' + labels[0] + '\n')
663685 file.write('\nitem 2 = ' + labels[1] + '\n')
@@ -675,11 +697,8 @@
676698 """ Print out the averaged parameters """
677699 line_args = '%.5f\t\t' + '%.5f\t\t' + '%.5f\t\t' + '%.5f\n'
678700 line_str = line_args % (av_means_1, av_means_2, av_std_dev_1, av_std_dev_2)
 701+
679702
680 -# print '\n\nOverall Parameters -- the confidence test was run with these parameters:\n'
681 -# print '\nmean1\t\tmean2\t\tstddev1\t\tstddev2\n'
682 -# print line_str
683 -
684703 file.write('\n\nOverall Parameters:\n')
685704 file.write('\nmean1\t\tmean2\t\tstddev1\t\tstddev2\n')
686705 file.write(line_str)
@@ -689,12 +708,18 @@
690709 return [winner, percent_increase]
691710
692711 """
693 - Executes the test reporting
 712+ Executes the test reporting.
694713
695 - INPUT:
696 -
697 - RETURN:
 714+ @param num_samples: the number of samples per test interval
 715+ @type num_samples: integer
698716
 717+ @param interval: the length of the test interval in minutes
 718+ @type interval: integer
 719+
 720+ @param items: datasets for paired testing
 721+ @type items: dictionary
 722+
 723+
699724 """
700725 def run(self, test_name, query_name, metric_name, campaign, items, start_time, end_time, interval, num_samples):
701726

Status & tagging log