r90496 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r90495‎ \| r90496 \| r90497 >
Date:	23:20, 20 June 2011
Author:	rfaulk
Status:	deferred
Tags:
Comment:	Added Documentation
Modified paths:	/trunk/fundraiser-statistics/fundraiser-scripts/classes/DataReporting.py (modified) (history)

Diff [purge]

Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/DataReporting.py
—	—	@@ -23,7 +23,7 @@
24	24	import datetime
25	25	import MySQLdb
26	26	import pylab
27		~~-from matplotlib.lines import Line2D~~
	27	+# from matplotlib.lines import Line2D
28	28	import HTML
29	29	import math
30	30
—	—	@@ -34,7 +34,7 @@
35	35	import Fundraiser_Tools.classes.HypothesisTest as HT
36	36
37	37
38		~~-matplotlib.use('Agg')~~
	38	+# matplotlib.use('Agg')
39	39
40	40
41	41
—	—	@@ -259,9 +259,7 @@
260	260	Usage instructions for executing a report via the IntervalReporting class
261	261	"""
262	262	def usage(self):
263		-
264		~~- """ !! MODIFY -- include instructions on using **kwargs """~~
265		-
	263	+
266	264	print 'Types of queries:'
267	265	print ' (1) banner'
268	266	print ' (2) LP'
—	—	@@ -270,8 +268,20 @@
271	269	print " run('20101230160400', '20101230165400', 2, 'banner', 'imp', '20101230JA091_US', ['banner1', 'banner2'])"
272	270	print " run('20101230160400', '20101230165400', 2, 'LP', 'views', '20101230JA091_US', [])"
273	271	print ''
274		-
	272	+ print " Keyword arguments may also be passed to the constructor:"
	273	+ print ''
	274	+ print " font_size - font size related to plots"
	275	+ print " fig_width_pt - width of the plot"
	276	+ print " inches_per_pt - define point size relative to screen"
	277	+ print " use_labels - whether to include specified labels in plot"
	278	+ print " fig_file_format - file format of the image (default = .png)"
	279	+ print " plot_type - line or step plot"
	280	+ print " item_keys - the item keys expected (only these will be included)"
	281	+ print " file_path - override the file path to store the plot output"
	282	+ print " query_type - the type of query to use"
	283	+
275	284	return
	285	+
276	286
277	287	"""
278	288	Selecting a subset of the key items in a dictionary
—	—	@@ -418,14 +428,19 @@
419	429	if len(self._item_keys_) > 0:
420	430	self._counts_ = self.select_metric_keys(self._counts_)
421	431	self._times_ = self.select_metric_keys(self._times_)
422		~~- print self._counts_~~
	432	+
423	433	""" Convert Times to Integers that indicate relative times AND normalize the intervals in case any are missing """
424	434	for key in self._times_.keys():
425	435	self._times_[key] = TP.normalize_timestamps(self._times_[key], False, 3)
426	436	self._times_[key], self._counts_[key] = TP.normalize_intervals(self._times_[key], self._counts_[key], interval)
427	437
428		~~- """ Normalize times """~~
429		-
	438	+ """ If there are missing metrics add them as zeros """
	439	+ for label in labels:
	440	+
	441	+ if not(label in self._times_.keys()):
	442	+ self._times_[label] = self._times_[self._times_.keys()[0]]
	443	+ self._counts_[label] = [0.0] * len(self._times_[label])
	444	+
430	445	min_time = min(self._times_)
431	446	ranges = [min_time, 0]
432	447
—	—	@@ -470,13 +485,8 @@
471	486
472	487	CLASS :: ConfidenceReporting
473	488
474		~~- Reports confidence values on specified metrics~~
	489	+ This class uses ConfidenceReportingLoader and HypothesisTest to execute a confidence analysis over a given campaign.
475	490
476		~~- Types of queries supported:~~
477		-
478		~~- report_banner_confidence~~
479		~~- report_LP_confidence~~
480		-
481	491	"""
482	492	class ConfidenceReporting(DataReporting):
483	493
—	—	@@ -508,22 +518,31 @@
509	519	DataReporting.__init__(self, **kwargs)
510	520
511	521	"""
512		~~- Describes how to run a report !! MODIFY !!~~
	522	+ Describes how to run a confidence report
513	523	"""
514	524	def usage(self):
515		-
	525	+
	526	+ print 'Method Signature:'
	527	+ print " run(self, test_name, query_name, metric_name, campaign, items, start_time, end_time, interval, num_samples)"
516	528	print ''
	529	+ print 'e.g.'
	530	+ print " cr.run('mytest!','report_banner_confidence','don_per_imp', 'C_JMvTXT_smallcountry_WS', ['banner1','banner2'], '20110101000000', '20100101000000', 2, 20)"
	531	+ print ''
	532	+ print " Keyword arguments may also be specified to modify plots:"
	533	+ print ''
	534	+ print " font_size - font size related to plots"
	535	+ print " fig_width_pt - width of the plot"
	536	+ print " inches_per_pt - define point size relative to screen"
	537	+ print " use_labels - whether to include specified labels in plot"
	538	+ print " fig_file_format - file format of the image (default = .png)"
	539	+ print " hyp_test - the type of hypothesis test"
517	540
518	541	return
519	542
520	543
521	544	"""
522		~~- <description>~~
	545	+ Confidence plotting over test intervals. This plot takes into consideration test intervals displaying the means with error bars over each interval
523	546
524		~~- INPUT:~~
525		-
526		~~- RETURN:~~
527		-
528	547	"""
529	548	def _gen_plot(self,means_1, means_2, std_devs_1, std_devs_2, times_indices, title, xlabel, ylabel, ranges, subplot_index, labels, fname):
530	549
—	—	@@ -568,6 +587,9 @@
569	588	pylab.savefig(self._file_path_ + fname + '.' + self._fig_file_format_, format=self._fig_file_format_)
570	589
571	590
	591	+ """
	592	+ Generates a box plot of all of the data. Does not visualize test intervals.
	593	+ """
572	594	def _gen_box_plot(self, data, title, ylabel, subplot_index, labels, fname):
573	595
574	596
—	—	@@ -600,9 +622,7 @@
601	623	pylab.rcParams.update(params)
602	624
603	625	pylab.grid()
604		~~- #pylab.ylim(ranges[2], ranges[3])~~
605		~~- #pylab.xlim(ranges[0], ranges[1])~~
606		~~- # pylab.legend([e1[0], e2[0]], labels,loc=2)~~
	626	+
607	627
608	628	pylab.ylabel(ylabel)
609	629
—	—	@@ -610,12 +630,10 @@
611	631	pylab.savefig(self._file_path_ + fname + '.' + self._fig_file_format_, format=self._fig_file_format_)
612	632
613	633	"""
614		~~- Print in Tabular form the means and standard deviation of each group over each~~
615		~~- interval~~
	634	+ Print in Tabular form the means and standard deviation of each group over each interval. Provides a detailed numerical output
	635	+ of the analysis.
616	636
617		~~- INPUT:~~
618		-
619		~~- RETURN:~~
	637	+ RETURN: the winner string, percent increase of the winner for the metric
620	638
621	639	"""
622	640	def print_metrics(self, filename, metric_name, means_1, means_2, std_devs_1, std_devs_2, times_indices, labels, test_call):
—	—	@@ -624,10 +642,20 @@
625	643	file = open(self._file_path_ + filename, 'w')
626	644
627	645	""" Compute % increase and report """
628		~~- av_means_1 = sum(means_1) / len(means_1)~~
629		~~- av_means_2 = sum(means_2) / len(means_2)~~
630		~~- percent_increase = math.fabs(av_means_1 - av_means_2) / min(av_means_1,av_means_2) * 100.0~~
	646	+ try:
	647	+ av_means_1 = sum(means_1) / len(means_1)
	648	+ av_means_2 = sum(means_2) / len(means_2)
	649	+ percent_increase = math.fabs(av_means_1 - av_means_2) / min(av_means_1,av_means_2) * 100.0
631	650
	651	+ except Exception as inst:
	652	+
	653	+ print 'Percent increase could not be computed.'
	654	+ print type(inst) # the exception instance
	655	+ print inst.args # arguments stored in .args
	656	+ print inst # __str__ allows args to printed directly
	657	+
	658	+ percent_increase = 0.0
	659	+
632	660	""" Compute the average standard deviations """
633	661	av_std_dev_1 = 0
634	662	av_std_dev_2 = 0
—	—	@@ -648,15 +676,9 @@
649	677	win_str = '\nThe winner "' + winner + '" had a %.2f%s increase.'
650	678	win_str = win_str % (percent_increase, '%')
651	679
652		~~- #print '\nCOMMAND = ' + test_call~~
653	680	file.write('\nCOMMAND = ' + test_call)
654	681
655		-
656		~~-# print '\n\n' + metric_name~~
657		~~-# print '\nitem 1 = ' + labels[0]~~
658		~~-# print 'item 2 = ' + labels[1]~~
659		~~-# print win_str~~
660		~~-# print '\ninterval\tmean1\t\tmean2\t\tstddev1\t\tstddev2\n'~~
	682	+
661	683	file.write('\n\n' + metric_name)
662	684	file.write('\nitem 1 = ' + labels[0] + '\n')
663	685	file.write('\nitem 2 = ' + labels[1] + '\n')
—	—	@@ -675,11 +697,8 @@
676	698	""" Print out the averaged parameters """
677	699	line_args = '%.5f\t\t' + '%.5f\t\t' + '%.5f\t\t' + '%.5f\n'
678	700	line_str = line_args % (av_means_1, av_means_2, av_std_dev_1, av_std_dev_2)
	701	+
679	702
680		~~-# print '\n\nOverall Parameters -- the confidence test was run with these parameters:\n'~~
681		~~-# print '\nmean1\t\tmean2\t\tstddev1\t\tstddev2\n'~~
682		~~-# print line_str~~
683		-
684	703	file.write('\n\nOverall Parameters:\n')
685	704	file.write('\nmean1\t\tmean2\t\tstddev1\t\tstddev2\n')
686	705	file.write(line_str)
—	—	@@ -689,12 +708,18 @@
690	709	return [winner, percent_increase]
691	710
692	711	"""
693		~~- Executes the test reporting~~
	712	+ Executes the test reporting.
694	713
695		~~- INPUT:~~
696		-
697		~~- RETURN:~~
	714	+ @param num_samples: the number of samples per test interval
	715	+ @type num_samples: integer
698	716
	717	+ @param interval: the length of the test interval in minutes
	718	+ @type interval: integer
	719	+
	720	+ @param items: datasets for paired testing
	721	+ @type items: dictionary
	722	+
	723	+
699	724	"""
700	725	def run(self, test_name, query_name, metric_name, campaign, items, start_time, end_time, interval, num_samples):
701	726

Status & tagging log

12:36, 21 June 2011 Reedy (talk | contribs) changed the status of r90496 [removed: new added: deferred]