r87446 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r87445‎ | r87446 | r87447 >
Date:20:15, 4 May 2011
Author:rfaulk
Status:deferred (Comments)
Tags:
Comment:
- generalized import paths for Fundraiser_Tools
- added data members to 'DataReporting' class (to be inherited and used by subclasses)
- modified BannerLPReporting, exported a great deal of logic to focues on reporting functionality only
Modified paths:
  • /trunk/fundraiser-statistics/fundraiser-scripts/classes/DataReporting.py (modified) (history)

Diff [purge]

Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/DataReporting.py
@@ -17,7 +17,7 @@
1818
1919
2020 import sys
21 -sys.path.append('../')
 21+# sys.path.append('../')
2222
2323 import matplotlib
2424 import datetime
@@ -26,11 +26,11 @@
2727 import HTML
2828 import math
2929
30 -import QueryData as QD
31 -import miner_help as mh
32 -import TimestampProcessor as TP
33 -import DataLoader as DL
34 -import HypothesisTest as HT
 30+import Fundraiser_Tools.classes.QueryData as QD
 31+import Fundraiser_Tools.miner_help as mh
 32+import Fundraiser_Tools.classes.TimestampProcessor as TP
 33+import Fundraiser_Tools.classes.DataLoader as DL
 34+import Fundraiser_Tools.classes.HypothesisTest as HT
3535
3636 matplotlib.use('Agg')
3737
@@ -52,6 +52,10 @@
5353 """
5454 class DataReporting(object):
5555
 56+ """ CLASS MEMBERS: Store the results of a query"""
 57+ _counts_ = None
 58+ _times_ = None
 59+
5660 _font_size_ = 24
5761 _fig_width_pt_ = 246.0 # Get this from LaTeX using \showthe\columnwidth
5862 _inches_per_pt_ = 1.0/72.27 # Convert pt to inch
@@ -545,6 +549,7 @@
546550
547551 class BannerLPReporting(DataReporting):
548552
 553+
549554 """
550555 <description>
551556
@@ -553,109 +558,19 @@
554559 RETURN:
555560
556561 """
557 - def __init__(self, *args):
558 -
559 - if len(args) == 2:
560 - self.campaign = args[0]
561 - self.start_time = args[1]
562 - else:
563 - self.campaign = None
564 - self.start_time = None
565 - """
566 - <description>
567 -
568 - INPUT:
569 -
570 - RETURN:
571 -
572 - """
573 - def run_query(self,start_time, end_time, campaign, query_name, metric_name):
574 -
575 - self.init_db()
576 -
577 - metric_lists = mh.AutoVivification()
578 - time_lists = mh.AutoVivification()
579 - # table_data = [] # store the results in a table for reporting
580 -
581 - # Load the SQL File & Format
582 - filename = self._sql_path_ + query_name + '.sql'
583 - sql_stmnt = mh.read_sql(filename)
584 -
585 - query_name = 'report_bannerLP_metrics' # rename query to work with query store
586 - sql_stmnt = QD.format_query(query_name, sql_stmnt, [start_time, end_time, campaign])
587 -
588 - key_index = QD.get_banner_index(query_name)
589 - time_index = QD.get_time_index(query_name)
590 - metric_index = QD.get_metric_index(query_name, metric_name)
591 -
592 - # Composes the data for each banner
593 - try:
594 - err_msg = sql_stmnt
595 - self.cur.execute(sql_stmnt)
 562+ def __init__(self, **kwargs):
596563
597 - results = self.cur.fetchall()
598 -
599 - # Compile Table Data
600 - # cpRow = self.listify(row)
601 - # table_data.append(cpRow)
602 -
603 - for row in results:
604 -
605 - key_name = row[key_index]
 564+ self._data_loader_ = DL.BannerLPReportingLoader()
 565+ DataReporting.__init__(self, **kwargs)
 566+
 567+# for key in kwargs:
 568+#
 569+# if key == 'font_size':
 570+# self._font_size_ = kwargs[key]
 571+# elif key == 'fig_width_pt':
 572+# self._fig_width_pt_ = kwargs[key]
606573
607 - try:
608 - metric_lists[key_name].append(row[metric_index])
609 - time_lists[key_name].append(row[time_index])
610 - except:
611 - metric_lists[key_name] = list()
612 - time_lists[key_name] = list()
613 -
614 - metric_lists[key_name].append(row[metric_index])
615 - time_lists[key_name].append(row[time_index])
616 -
617 - except:
618 - self.db.rollback()
619 - sys.exit("Database Interface Exception:\n" + err_msg)
620 -
621 - """ Convert Times to Integers """
622 - # Find the earliest date
623 - max_i = 0
624 -
625 - for key in time_lists.keys():
626 - for date_str in time_lists[key]:
627 - day_int = int(date_str[8:10])
628 - hr_int = int(date_str[11:13])
629 - date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13])
630 - if date_int > max_i:
631 - max_i = date_int
632 - max_day = day_int
633 - max_hr = hr_int
634 -
635 -
636 - # Normalize dates
637 - time_norm = mh.AutoVivification()
638 - for key in time_lists.keys():
639 - for date_str in time_lists[key]:
640 - day = int(date_str[8:10])
641 - hr = int(date_str[11:13])
642 - # date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13])
643 - elem = (day - max_day) * 24 + (hr - max_hr)
644 - try:
645 - time_norm[key].append(elem)
646 - except:
647 - time_norm[key] = list()
648 - time_norm[key].append(elem)
649 -
650 - # smooth out the values
651 - #window_length = 20
652 - #for banner in metric_lists.keys():
653 - # metric_lists[banner] = smooth(metric_lists[banner], window_length)
654 -
655 - self.close_db()
656 -
657 - # return [metric_lists, time_norm, table_data]
658 - return [metric_lists, time_norm]
659 -
 574+
660575 """
661576 <description>
662577
@@ -699,24 +614,11 @@
700615 type = 'LP' || 'BAN' || 'BAN-TEST' || 'LP-TEST'
701616
702617 """
703 - def run(self, type, metric_name):
 618+ def run(self, test_type, start_time, end_time, metric_name):
 619+
 620+ # print '\nGenerating ' + test_type +' for ' + str(hours_back) + ' hours back. The start and end times are: ' + start_time + ' - ' + end_time +' ... \n'
704621
705 - # Current date & time
706 - now = datetime.datetime.now()
707 - #UTC = 8
708 - #delta = datetime.timedelta(hours=UTC)
709 - #now = now + delta
710 -
711 - """ ESTABLISH THE START TIME TO PULL ANALYTICS - TS format=1, TS resolution=1 """
712 - hours_back = 24
713 - times = self.gen_date_strings(now, hours_back,1,1)
714 -
715 - start_time = times[0]
716 - end_time = times[1]
717 -
718 - print '\nGenerating ' + type +' for ' + str(hours_back) + ' hours back. The start and end times are: ' + start_time + ' - ' + end_time +' ... \n'
719 -
720 - if type == 'LP':
 622+ if test_type == 'LP':
721623 query_name = 'report_LP_metrics'
722624
723625 # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign
@@ -727,7 +629,7 @@
728630
729631 title = metric_name + ': ' + start_time + ' -- ' + end_time
730632 fname = query_name + '_' + metric_name + '.png'
731 - elif type == 'BAN':
 633+ elif test_type == 'BAN':
732634 query_name = 'report_banner_metrics'
733635
734636 # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign
@@ -738,8 +640,8 @@
739641
740642 title = metric_name + ': ' + start_time + ' -- ' + end_time
741643 fname = query_name + '_' + metric_name + '.png'
742 - elif type == 'BAN-TEST':
743 - r = self.get_latest_campaign()
 644+ elif test_type == 'BAN-TEST':
 645+ r = _data_loader_.get_latest_campaign()
744646 query_name = 'report_banner_metrics'
745647
746648 # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign
@@ -752,7 +654,7 @@
753655
754656 title = metric_name + ': ' + start_time + ' -- ' + end_time + ', CAMPAIGN =' + campaign
755657 fname = query_name + '_' + metric_name + '_latest' + '.png'
756 - elif type == 'LP-TEST':
 658+ elif test_type == 'LP-TEST':
757659 r = self.get_latest_campaign()
758660 query_name = 'report_LP_metrics'
759661
@@ -769,10 +671,15 @@
770672 else:
771673 sys.exit("Invalid type name - must be 'LP' or 'BAN'.")
772674
773 - return_val = self.run_query(start_time, end_time, campaign, query_name, metric_name)
774 - metrics = return_val[0]
775 - times = return_val[1]
 675+ return_val = _data_loader_.run_query(start_time, end_time, campaign, query_name, metric_name)
 676+ self._counts_ = return_val[0]
 677+ self._times_ = return_val[1]
776678
 679+ """ Convert Times to Integers that indicate relative times AND normalize the intervals in case any are missing """
 680+ for key in self._times_.keys():
 681+ self._times_[key] = TP.normalize_timestamps(self._times_[key], False, 2)
 682+ self._times_[key], self._counts_[key] = TP.normalize_intervals(self._times_[key], self._counts_[key], interval)
 683+
777684 # title = metric_name + ': ' + start_time + ' -- ' + end_time
778685 xlabel = 'Time - Hours'
779686 ylabel = metric_name
@@ -786,93 +693,14 @@
787694
788695 ranges = [min_time, 0]
789696
790 - self.gen_plot(metrics, times, title, xlabel, ylabel, ranges, subplot_index, fname)
 697+ self.gen_plot(self._counts_, self._times_, title, xlabel, ylabel, ranges, subplot_index, fname)
791698
792699 return [metrics, times]
793700
794 - """ !! MOVE INTO DATA LOADER!!
795 -
796 - <description>
797 -
798 - INPUT:
799 -
800 - RETURN:
801 -
802 - """
803 - def get_latest_campaign(self):
804 -
805 - query_name = 'report_latest_campaign'
806 - self.init_db()
807 -
808 - """ Look at campaigns over the past 24 hours - TS format=1, TS resolution=1 """
809 - now = datetime.datetime.now()
810 - hours_back = 72
811 - times = self.gen_date_strings(now, hours_back,1,1)
812 -
813 - sql_stmnt = mh.read_sql('./sql/report_latest_campaign.sql')
814 - sql_stmnt = QD.format_query(query_name, sql_stmnt, [times[0]])
815 -
816 - campaign_index = QD.get_campaign_index(query_name)
817 - time_index = QD.get_time_index(query_name)
818 -
819 - try:
820 - err_msg = sql_stmnt
821 - self.cur.execute(sql_stmnt)
822 -
823 - row = self.cur.fetchone()
824 - except:
825 - self.db.rollback()
826 - sys.exit("Database Interface Exception:\n" + err_msg)
827 -
828 - campaign = row[campaign_index]
829 - timestamp = row[time_index]
830 -
831 - self.close_db()
832 -
833 - return [campaign, timestamp]
834701
835 - """ !! SHOULD BE MOVED TO TIMEPROCESSOR !!
836 - Takes as input and converts it to a set of hours counting back from 0
837 - <description>
838 -
839 - INPUT:
840 - time_lists - a dictionary of timestamp lists
841 - time_norm - a dictionary of normalized times
842 -
843 - RETURN:
844 -
845 - """
846 - def normalize_timestamps(self, time_lists):
847 - # Find the earliest date
848 - max_i = 0
849 -
850 - for key in time_lists.keys():
851 - for date_str in time_lists[key]:
852 - day_int = int(date_str[8:10])
853 - hr_int = int(date_str[11:13])
854 - date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13])
855 - if date_int > max_i:
856 - max_i = date_int
857 - max_day = day_int
858 - max_hr = hr_int
859 -
860 -
861 - # Normalize dates
862 - time_norm = mh.AutoVivification()
863 - for key in time_lists.keys():
864 - for date_str in time_lists[key]:
865 - day = int(date_str[8:10])
866 - hr = int(date_str[11:13])
867 - # date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13])
868 - elem = (day - max_day) * 24 + (hr - max_hr)
869 - try:
870 - time_norm[key].append(elem)
871 - except:
872 - time_norm[key] = list()
873 - time_norm[key].append(elem)
874 -
875 - return time_norm
876702
 703+
 704+
877705 """
878706
879707 CLASS :: ^MinerReporting^
@@ -1161,21 +989,22 @@
1162990
1163991 """ Execute the query that generates interval reporting data """
1164992 return_val = self._data_loader_.run_query(start_time, end_time, interval, query_type, metric_name, campaign)
1165 - counts = return_val[0]
1166 - times = return_val[1]
 993+ self._counts_ = return_val[0]
 994+ self._times_ = return_val[1]
1167995
1168996 """ Select only the specified item keys """
1169997 if len(self._item_keys_) > 0:
1170 - counts = self.select_metric_keys(counts)
1171 - times = self.select_metric_keys(times)
 998+ self._counts_ = self.select_metric_keys(self._counts_)
 999+ self._times_ = self.select_metric_keys(self._times_)
11721000
11731001 """ Convert Times to Integers that indicate relative times AND normalize the intervals in case any are missing """
1174 - for key in times.keys():
1175 - times[key] = TP.normalize_timestamps(times[key], False, 2)
1176 - times[key], counts[key] = TP.normalize_intervals(times[key], counts[key], interval)
 1002+ for key in self._times_.keys():
 1003+ self._times_[key] = TP.normalize_timestamps(self._times_[key], False, 2)
 1004+ self._times_[key], self._counts_[key] = TP.normalize_intervals(self._times_[key], self._counts_[key], interval)
11771005
11781006 """ Normalize times """
1179 - min_time = min(times)
 1007+
 1008+ min_time = min(self._times_)
11801009 ranges = [min_time, 0]
11811010
11821011 xlabel = 'MINUTES'
@@ -1186,17 +1015,17 @@
11871016 title = campaign + ': ' + metric_full_name + ' -- ' + TP.timestamp_convert_format(start_time,1,2) + ' - ' + TP.timestamp_convert_format(end_time,1,2)
11881017 ylabel = metric_full_name
11891018
1190 - """ Determine List maximums """
 1019+ """ Determine List maximums -- Pre-processing for plotting """
11911020 times_max = 0
11921021 metrics_max = 0
11931022
1194 - for key in counts.keys():
1195 - list_max = max(counts[key])
 1023+ for key in self._counts_.keys():
 1024+ list_max = max(self._counts_[key])
11961025 if list_max > metrics_max:
11971026 metrics_max = list_max
11981027
1199 - for key in times.keys():
1200 - list_max = max(times[key])
 1028+ for key in self._times_.keys():
 1029+ list_max = max(self._times_[key])
12011030 if list_max > times_max:
12021031 times_max = list_max
12031032
@@ -1207,7 +1036,7 @@
12081037 ranges.append(metrics_max * 1.1)
12091038
12101039 """ Generate plots given data """
1211 - self.gen_plot(counts, times, title, xlabel, ylabel, ranges, subplot_index, fname, labels)
 1040+ self.gen_plot(self._counts_, self._times_, title, xlabel, ylabel, ranges, subplot_index, fname, labels)
12121041
12131042
12141043 """
@@ -1222,7 +1051,6 @@
12231052 report_LP_confidence
12241053
12251054 """
1226 -
12271055 class ConfidenceReporting(DataReporting):
12281056
12291057 _hypothesis_test_ = None

Comments

#Comment by Nikerabbit (talk | contribs)   07:30, 5 May 2011

Why is there code that is commented out with no explanation.

#Comment by Renklauf (talk | contribs)   23:09, 5 May 2011

It's a piece of functionality that I moved into the reporting class family that I wanted to revise and to ensure compatibility with any dependencies before adding in. Thanks for spotting it, I'll add some explanation to it.

Status & tagging log