r87446 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r87445‎ \| r87446 \| r87447 >
Date:	20:15, 4 May 2011
Author:	rfaulk
Status:	deferred (Comments)
Tags:
Comment:	- generalized import paths for Fundraiser_Tools - added data members to 'DataReporting' class (to be inherited and used by subclasses) - modified BannerLPReporting, exported a great deal of logic to focues on reporting functionality only
Modified paths:	/trunk/fundraiser-statistics/fundraiser-scripts/classes/DataReporting.py (modified) (history)

Diff [purge]

Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/DataReporting.py
—	—	@@ -17,7 +17,7 @@
18	18
19	19
20	20	import sys
21		~~-sys.path.append('../')~~
	21	+# sys.path.append('../')
22	22
23	23	import matplotlib
24	24	import datetime
—	—	@@ -26,11 +26,11 @@
27	27	import HTML
28	28	import math
29	29
30		~~-import QueryData as QD~~
31		~~-import miner_help as mh~~
32		~~-import TimestampProcessor as TP~~
33		~~-import DataLoader as DL~~
34		~~-import HypothesisTest as HT~~
	30	+import Fundraiser_Tools.classes.QueryData as QD
	31	+import Fundraiser_Tools.miner_help as mh
	32	+import Fundraiser_Tools.classes.TimestampProcessor as TP
	33	+import Fundraiser_Tools.classes.DataLoader as DL
	34	+import Fundraiser_Tools.classes.HypothesisTest as HT
35	35
36	36	matplotlib.use('Agg')
37	37
—	—	@@ -52,6 +52,10 @@
53	53	"""
54	54	class DataReporting(object):
55	55
	56	+ """ CLASS MEMBERS: Store the results of a query"""
	57	+ _counts_ = None
	58	+ _times_ = None
	59	+
56	60	_font_size_ = 24
57	61	_fig_width_pt_ = 246.0 # Get this from LaTeX using \showthe\columnwidth
58	62	_inches_per_pt_ = 1.0/72.27 # Convert pt to inch
—	—	@@ -545,6 +549,7 @@
546	550
547	551	class BannerLPReporting(DataReporting):
548	552
	553	+
549	554	"""
550	555	<description>
551	556
—	—	@@ -553,109 +558,19 @@
554	559	RETURN:
555	560
556	561	"""
557		~~- def __init__(self, *args):~~
558		-
559		~~- if len(args) == 2:~~
560		~~- self.campaign = args[0]~~
561		~~- self.start_time = args[1]~~
562		~~- else:~~
563		~~- self.campaign = None~~
564		~~- self.start_time = None~~
565		~~- """~~
566		~~- <description>~~
567		-
568		~~- INPUT:~~
569		-
570		~~- RETURN:~~
571		-
572		~~- """~~
573		~~- def run_query(self,start_time, end_time, campaign, query_name, metric_name):~~
574		-
575		~~- self.init_db()~~
576		-
577		~~- metric_lists = mh.AutoVivification()~~
578		~~- time_lists = mh.AutoVivification()~~
579		~~- # table_data = [] # store the results in a table for reporting~~
580		-
581		~~- # Load the SQL File & Format~~
582		~~- filename = self._sql_path_ + query_name + '.sql'~~
583		~~- sql_stmnt = mh.read_sql(filename)~~
584		-
585		~~- query_name = 'report_bannerLP_metrics' # rename query to work with query store~~
586		~~- sql_stmnt = QD.format_query(query_name, sql_stmnt, [start_time, end_time, campaign])~~
587		-
588		~~- key_index = QD.get_banner_index(query_name)~~
589		~~- time_index = QD.get_time_index(query_name)~~
590		~~- metric_index = QD.get_metric_index(query_name, metric_name)~~
591		-
592		~~- # Composes the data for each banner~~
593		~~- try:~~
594		~~- err_msg = sql_stmnt~~
595		~~- self.cur.execute(sql_stmnt)~~
	562	+ def __init__(self, **kwargs):
596	563
597		~~- results = self.cur.fetchall()~~
598		-
599		~~- # Compile Table Data~~
600		~~- # cpRow = self.listify(row)~~
601		~~- # table_data.append(cpRow)~~
602		-
603		~~- for row in results:~~
604		-
605		~~- key_name = row[key_index]~~
	564	+ self._data_loader_ = DL.BannerLPReportingLoader()
	565	+ DataReporting.__init__(self, **kwargs)
	566	+
	567	+# for key in kwargs:
	568	+#
	569	+# if key == 'font_size':
	570	+# self._font_size_ = kwargs[key]
	571	+# elif key == 'fig_width_pt':
	572	+# self._fig_width_pt_ = kwargs[key]
606	573
607		~~- try:~~
608		~~- metric_lists[key_name].append(row[metric_index])~~
609		~~- time_lists[key_name].append(row[time_index])~~
610		~~- except:~~
611		~~- metric_lists[key_name] = list()~~
612		~~- time_lists[key_name] = list()~~
613		-
614		~~- metric_lists[key_name].append(row[metric_index])~~
615		~~- time_lists[key_name].append(row[time_index])~~
616		-
617		~~- except:~~
618		~~- self.db.rollback()~~
619		~~- sys.exit("Database Interface Exception:\n" + err_msg)~~
620		-
621		~~- """ Convert Times to Integers """~~
622		~~- # Find the earliest date~~
623		~~- max_i = 0~~
624		-
625		~~- for key in time_lists.keys():~~
626		~~- for date_str in time_lists[key]:~~
627		~~- day_int = int(date_str[8:10])~~
628		~~- hr_int = int(date_str[11:13])~~
629		~~- date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13])~~
630		~~- if date_int > max_i:~~
631		~~- max_i = date_int~~
632		~~- max_day = day_int~~
633		~~- max_hr = hr_int~~
634		-
635		-
636		~~- # Normalize dates~~
637		~~- time_norm = mh.AutoVivification()~~
638		~~- for key in time_lists.keys():~~
639		~~- for date_str in time_lists[key]:~~
640		~~- day = int(date_str[8:10])~~
641		~~- hr = int(date_str[11:13])~~
642		~~- # date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13])~~
643		~~- elem = (day - max_day) * 24 + (hr - max_hr)~~
644		~~- try:~~
645		~~- time_norm[key].append(elem)~~
646		~~- except:~~
647		~~- time_norm[key] = list()~~
648		~~- time_norm[key].append(elem)~~
649		-
650		~~- # smooth out the values~~
651		~~- #window_length = 20~~
652		~~- #for banner in metric_lists.keys():~~
653		~~- # metric_lists[banner] = smooth(metric_lists[banner], window_length)~~
654		-
655		~~- self.close_db()~~
656		-
657		~~- # return [metric_lists, time_norm, table_data]~~
658		~~- return [metric_lists, time_norm]~~
659		-
	574	+
660	575	"""
661	576	<description>
662	577
—	—	@@ -699,24 +614,11 @@
700	615	type = 'LP' \|\| 'BAN' \|\| 'BAN-TEST' \|\| 'LP-TEST'
701	616
702	617	"""
703		~~- def run(self, type, metric_name):~~
	618	+ def run(self, test_type, start_time, end_time, metric_name):
	619	+
	620	+ # print '\nGenerating ' + test_type +' for ' + str(hours_back) + ' hours back. The start and end times are: ' + start_time + ' - ' + end_time +' ... \n'
704	621
705		~~- # Current date & time~~
706		~~- now = datetime.datetime.now()~~
707		~~- #UTC = 8~~
708		~~- #delta = datetime.timedelta(hours=UTC)~~
709		~~- #now = now + delta~~
710		-
711		~~- """ ESTABLISH THE START TIME TO PULL ANALYTICS - TS format=1, TS resolution=1 """~~
712		~~- hours_back = 24~~
713		~~- times = self.gen_date_strings(now, hours_back,1,1)~~
714		-
715		~~- start_time = times[0]~~
716		~~- end_time = times[1]~~
717		-
718		~~- print '\nGenerating ' + type +' for ' + str(hours_back) + ' hours back. The start and end times are: ' + start_time + ' - ' + end_time +' ... \n'~~
719		-
720		~~- if type == 'LP':~~
	622	+ if test_type == 'LP':
721	623	query_name = 'report_LP_metrics'
722	624
723	625	# Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign
—	—	@@ -727,7 +629,7 @@
728	630
729	631	title = metric_name + ': ' + start_time + ' -- ' + end_time
730	632	fname = query_name + '_' + metric_name + '.png'
731		~~- elif type == 'BAN':~~
	633	+ elif test_type == 'BAN':
732	634	query_name = 'report_banner_metrics'
733	635
734	636	# Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign
—	—	@@ -738,8 +640,8 @@
739	641
740	642	title = metric_name + ': ' + start_time + ' -- ' + end_time
741	643	fname = query_name + '_' + metric_name + '.png'
742		~~- elif type == 'BAN-TEST':~~
743		~~- r = self.get_latest_campaign()~~
	644	+ elif test_type == 'BAN-TEST':
	645	+ r = _data_loader_.get_latest_campaign()
744	646	query_name = 'report_banner_metrics'
745	647
746	648	# Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign
—	—	@@ -752,7 +654,7 @@
753	655
754	656	title = metric_name + ': ' + start_time + ' -- ' + end_time + ', CAMPAIGN =' + campaign
755	657	fname = query_name + '_' + metric_name + '_latest' + '.png'
756		~~- elif type == 'LP-TEST':~~
	658	+ elif test_type == 'LP-TEST':
757	659	r = self.get_latest_campaign()
758	660	query_name = 'report_LP_metrics'
759	661
—	—	@@ -769,10 +671,15 @@
770	672	else:
771	673	sys.exit("Invalid type name - must be 'LP' or 'BAN'.")
772	674
773		~~- return_val = self.run_query(start_time, end_time, campaign, query_name, metric_name)~~
774		~~- metrics = return_val[0]~~
775		~~- times = return_val[1]~~
	675	+ return_val = _data_loader_.run_query(start_time, end_time, campaign, query_name, metric_name)
	676	+ self._counts_ = return_val[0]
	677	+ self._times_ = return_val[1]
776	678
	679	+ """ Convert Times to Integers that indicate relative times AND normalize the intervals in case any are missing """
	680	+ for key in self._times_.keys():
	681	+ self._times_[key] = TP.normalize_timestamps(self._times_[key], False, 2)
	682	+ self._times_[key], self._counts_[key] = TP.normalize_intervals(self._times_[key], self._counts_[key], interval)
	683	+
777	684	# title = metric_name + ': ' + start_time + ' -- ' + end_time
778	685	xlabel = 'Time - Hours'
779	686	ylabel = metric_name
—	—	@@ -786,93 +693,14 @@
787	694
788	695	ranges = [min_time, 0]
789	696
790		~~- self.gen_plot(metrics, times, title, xlabel, ylabel, ranges, subplot_index, fname)~~
	697	+ self.gen_plot(self._counts_, self._times_, title, xlabel, ylabel, ranges, subplot_index, fname)
791	698
792	699	return [metrics, times]
793	700
794		~~- """ !! MOVE INTO DATA LOADER!!~~
795		-
796		~~- <description>~~
797		-
798		~~- INPUT:~~
799		-
800		~~- RETURN:~~
801		-
802		~~- """~~
803		~~- def get_latest_campaign(self):~~
804		-
805		~~- query_name = 'report_latest_campaign'~~
806		~~- self.init_db()~~
807		-
808		~~- """ Look at campaigns over the past 24 hours - TS format=1, TS resolution=1 """~~
809		~~- now = datetime.datetime.now()~~
810		~~- hours_back = 72~~
811		~~- times = self.gen_date_strings(now, hours_back,1,1)~~
812		-
813		~~- sql_stmnt = mh.read_sql('./sql/report_latest_campaign.sql')~~
814		~~- sql_stmnt = QD.format_query(query_name, sql_stmnt, [times[0]])~~
815		-
816		~~- campaign_index = QD.get_campaign_index(query_name)~~
817		~~- time_index = QD.get_time_index(query_name)~~
818		-
819		~~- try:~~
820		~~- err_msg = sql_stmnt~~
821		~~- self.cur.execute(sql_stmnt)~~
822		-
823		~~- row = self.cur.fetchone()~~
824		~~- except:~~
825		~~- self.db.rollback()~~
826		~~- sys.exit("Database Interface Exception:\n" + err_msg)~~
827		-
828		~~- campaign = row[campaign_index]~~
829		~~- timestamp = row[time_index]~~
830		-
831		~~- self.close_db()~~
832		-
833		~~- return [campaign, timestamp]~~
834	701
835		~~- """ !! SHOULD BE MOVED TO TIMEPROCESSOR !!~~
836		~~- Takes as input and converts it to a set of hours counting back from 0~~
837		~~- <description>~~
838		-
839		~~- INPUT:~~
840		~~- time_lists - a dictionary of timestamp lists~~
841		~~- time_norm - a dictionary of normalized times~~
842		-
843		~~- RETURN:~~
844		-
845		~~- """~~
846		~~- def normalize_timestamps(self, time_lists):~~
847		~~- # Find the earliest date~~
848		~~- max_i = 0~~
849		-
850		~~- for key in time_lists.keys():~~
851		~~- for date_str in time_lists[key]:~~
852		~~- day_int = int(date_str[8:10])~~
853		~~- hr_int = int(date_str[11:13])~~
854		~~- date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13])~~
855		~~- if date_int > max_i:~~
856		~~- max_i = date_int~~
857		~~- max_day = day_int~~
858		~~- max_hr = hr_int~~
859		-
860		-
861		~~- # Normalize dates~~
862		~~- time_norm = mh.AutoVivification()~~
863		~~- for key in time_lists.keys():~~
864		~~- for date_str in time_lists[key]:~~
865		~~- day = int(date_str[8:10])~~
866		~~- hr = int(date_str[11:13])~~
867		~~- # date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13])~~
868		~~- elem = (day - max_day) * 24 + (hr - max_hr)~~
869		~~- try:~~
870		~~- time_norm[key].append(elem)~~
871		~~- except:~~
872		~~- time_norm[key] = list()~~
873		~~- time_norm[key].append(elem)~~
874		-
875		~~- return time_norm~~
876	702
	703	+
	704	+
877	705	"""
878	706
879	707	CLASS :: ^MinerReporting^
—	—	@@ -1161,21 +989,22 @@
1162	990
1163	991	""" Execute the query that generates interval reporting data """
1164	992	return_val = self._data_loader_.run_query(start_time, end_time, interval, query_type, metric_name, campaign)
1165		~~- counts = return_val[0]~~
1166		~~- times = return_val[1]~~
	993	+ self._counts_ = return_val[0]
	994	+ self._times_ = return_val[1]
1167	995
1168	996	""" Select only the specified item keys """
1169	997	if len(self._item_keys_) > 0:
1170		~~- counts = self.select_metric_keys(counts)~~
1171		~~- times = self.select_metric_keys(times)~~
	998	+ self._counts_ = self.select_metric_keys(self._counts_)
	999	+ self._times_ = self.select_metric_keys(self._times_)
1172	1000
1173	1001	""" Convert Times to Integers that indicate relative times AND normalize the intervals in case any are missing """
1174		~~- for key in times.keys():~~
1175		~~- times[key] = TP.normalize_timestamps(times[key], False, 2)~~
1176		~~- times[key], counts[key] = TP.normalize_intervals(times[key], counts[key], interval)~~
	1002	+ for key in self._times_.keys():
	1003	+ self._times_[key] = TP.normalize_timestamps(self._times_[key], False, 2)
	1004	+ self._times_[key], self._counts_[key] = TP.normalize_intervals(self._times_[key], self._counts_[key], interval)
1177	1005
1178	1006	""" Normalize times """
1179		~~- min_time = min(times)~~
	1007	+
	1008	+ min_time = min(self._times_)
1180	1009	ranges = [min_time, 0]
1181	1010
1182	1011	xlabel = 'MINUTES'
—	—	@@ -1186,17 +1015,17 @@
1187	1016	title = campaign + ': ' + metric_full_name + ' -- ' + TP.timestamp_convert_format(start_time,1,2) + ' - ' + TP.timestamp_convert_format(end_time,1,2)
1188	1017	ylabel = metric_full_name
1189	1018
1190		~~- """ Determine List maximums """~~
	1019	+ """ Determine List maximums -- Pre-processing for plotting """
1191	1020	times_max = 0
1192	1021	metrics_max = 0
1193	1022
1194		~~- for key in counts.keys():~~
1195		~~- list_max = max(counts[key])~~
	1023	+ for key in self._counts_.keys():
	1024	+ list_max = max(self._counts_[key])
1196	1025	if list_max > metrics_max:
1197	1026	metrics_max = list_max
1198	1027
1199		~~- for key in times.keys():~~
1200		~~- list_max = max(times[key])~~
	1028	+ for key in self._times_.keys():
	1029	+ list_max = max(self._times_[key])
1201	1030	if list_max > times_max:
1202	1031	times_max = list_max
1203	1032
—	—	@@ -1207,7 +1036,7 @@
1208	1037	ranges.append(metrics_max * 1.1)
1209	1038
1210	1039	""" Generate plots given data """
1211		~~- self.gen_plot(counts, times, title, xlabel, ylabel, ranges, subplot_index, fname, labels)~~
	1040	+ self.gen_plot(self._counts_, self._times_, title, xlabel, ylabel, ranges, subplot_index, fname, labels)
1212	1041
1213	1042
1214	1043	"""
—	—	@@ -1222,7 +1051,6 @@
1223	1052	report_LP_confidence
1224	1053
1225	1054	"""
1226		-
1227	1055	class ConfidenceReporting(DataReporting):
1228	1056
1229	1057	_hypothesis_test_ = None

Comments

#Comment by Nikerabbit (talk | contribs) 07:30, 5 May 2011

Why is there code that is commented out with no explanation.

#Comment by Renklauf (talk | contribs) 23:09, 5 May 2011

It's a piece of functionality that I moved into the reporting class family that I wanted to revise and to ensure compatibility with any dependencies before adding in. Thanks for spotting it, I'll add some explanation to it.

Status & tagging log

21:34, 4 May 2011 Reedy (talk | contribs) changed the status of r87446 [removed: new added: deferred]