Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/DataReporting.py |
— | — | @@ -17,7 +17,7 @@ |
18 | 18 | |
19 | 19 | |
20 | 20 | import sys |
21 | | -sys.path.append('../') |
| 21 | +# sys.path.append('../') |
22 | 22 | |
23 | 23 | import matplotlib |
24 | 24 | import datetime |
— | — | @@ -26,11 +26,11 @@ |
27 | 27 | import HTML |
28 | 28 | import math |
29 | 29 | |
30 | | -import QueryData as QD |
31 | | -import miner_help as mh |
32 | | -import TimestampProcessor as TP |
33 | | -import DataLoader as DL |
34 | | -import HypothesisTest as HT |
| 30 | +import Fundraiser_Tools.classes.QueryData as QD |
| 31 | +import Fundraiser_Tools.miner_help as mh |
| 32 | +import Fundraiser_Tools.classes.TimestampProcessor as TP |
| 33 | +import Fundraiser_Tools.classes.DataLoader as DL |
| 34 | +import Fundraiser_Tools.classes.HypothesisTest as HT |
35 | 35 | |
36 | 36 | matplotlib.use('Agg') |
37 | 37 | |
— | — | @@ -52,6 +52,10 @@ |
53 | 53 | """ |
54 | 54 | class DataReporting(object): |
55 | 55 | |
| 56 | + """ CLASS MEMBERS: Store the results of a query""" |
| 57 | + _counts_ = None |
| 58 | + _times_ = None |
| 59 | + |
56 | 60 | _font_size_ = 24 |
57 | 61 | _fig_width_pt_ = 246.0 # Get this from LaTeX using \showthe\columnwidth |
58 | 62 | _inches_per_pt_ = 1.0/72.27 # Convert pt to inch |
— | — | @@ -545,6 +549,7 @@ |
546 | 550 | |
547 | 551 | class BannerLPReporting(DataReporting): |
548 | 552 | |
| 553 | + |
549 | 554 | """ |
550 | 555 | <description> |
551 | 556 | |
— | — | @@ -553,109 +558,19 @@ |
554 | 559 | RETURN: |
555 | 560 | |
556 | 561 | """ |
557 | | - def __init__(self, *args): |
558 | | - |
559 | | - if len(args) == 2: |
560 | | - self.campaign = args[0] |
561 | | - self.start_time = args[1] |
562 | | - else: |
563 | | - self.campaign = None |
564 | | - self.start_time = None |
565 | | - """ |
566 | | - <description> |
567 | | - |
568 | | - INPUT: |
569 | | - |
570 | | - RETURN: |
571 | | - |
572 | | - """ |
573 | | - def run_query(self,start_time, end_time, campaign, query_name, metric_name): |
574 | | - |
575 | | - self.init_db() |
576 | | - |
577 | | - metric_lists = mh.AutoVivification() |
578 | | - time_lists = mh.AutoVivification() |
579 | | - # table_data = [] # store the results in a table for reporting |
580 | | - |
581 | | - # Load the SQL File & Format |
582 | | - filename = self._sql_path_ + query_name + '.sql' |
583 | | - sql_stmnt = mh.read_sql(filename) |
584 | | - |
585 | | - query_name = 'report_bannerLP_metrics' # rename query to work with query store |
586 | | - sql_stmnt = QD.format_query(query_name, sql_stmnt, [start_time, end_time, campaign]) |
587 | | - |
588 | | - key_index = QD.get_banner_index(query_name) |
589 | | - time_index = QD.get_time_index(query_name) |
590 | | - metric_index = QD.get_metric_index(query_name, metric_name) |
591 | | - |
592 | | - # Composes the data for each banner |
593 | | - try: |
594 | | - err_msg = sql_stmnt |
595 | | - self.cur.execute(sql_stmnt) |
| 562 | + def __init__(self, **kwargs): |
596 | 563 | |
597 | | - results = self.cur.fetchall() |
598 | | - |
599 | | - # Compile Table Data |
600 | | - # cpRow = self.listify(row) |
601 | | - # table_data.append(cpRow) |
602 | | - |
603 | | - for row in results: |
604 | | - |
605 | | - key_name = row[key_index] |
| 564 | + self._data_loader_ = DL.BannerLPReportingLoader() |
| 565 | + DataReporting.__init__(self, **kwargs) |
| 566 | + |
| 567 | +# for key in kwargs: |
| 568 | +# |
| 569 | +# if key == 'font_size': |
| 570 | +# self._font_size_ = kwargs[key] |
| 571 | +# elif key == 'fig_width_pt': |
| 572 | +# self._fig_width_pt_ = kwargs[key] |
606 | 573 | |
607 | | - try: |
608 | | - metric_lists[key_name].append(row[metric_index]) |
609 | | - time_lists[key_name].append(row[time_index]) |
610 | | - except: |
611 | | - metric_lists[key_name] = list() |
612 | | - time_lists[key_name] = list() |
613 | | - |
614 | | - metric_lists[key_name].append(row[metric_index]) |
615 | | - time_lists[key_name].append(row[time_index]) |
616 | | - |
617 | | - except: |
618 | | - self.db.rollback() |
619 | | - sys.exit("Database Interface Exception:\n" + err_msg) |
620 | | - |
621 | | - """ Convert Times to Integers """ |
622 | | - # Find the earliest date |
623 | | - max_i = 0 |
624 | | - |
625 | | - for key in time_lists.keys(): |
626 | | - for date_str in time_lists[key]: |
627 | | - day_int = int(date_str[8:10]) |
628 | | - hr_int = int(date_str[11:13]) |
629 | | - date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13]) |
630 | | - if date_int > max_i: |
631 | | - max_i = date_int |
632 | | - max_day = day_int |
633 | | - max_hr = hr_int |
634 | | - |
635 | | - |
636 | | - # Normalize dates |
637 | | - time_norm = mh.AutoVivification() |
638 | | - for key in time_lists.keys(): |
639 | | - for date_str in time_lists[key]: |
640 | | - day = int(date_str[8:10]) |
641 | | - hr = int(date_str[11:13]) |
642 | | - # date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13]) |
643 | | - elem = (day - max_day) * 24 + (hr - max_hr) |
644 | | - try: |
645 | | - time_norm[key].append(elem) |
646 | | - except: |
647 | | - time_norm[key] = list() |
648 | | - time_norm[key].append(elem) |
649 | | - |
650 | | - # smooth out the values |
651 | | - #window_length = 20 |
652 | | - #for banner in metric_lists.keys(): |
653 | | - # metric_lists[banner] = smooth(metric_lists[banner], window_length) |
654 | | - |
655 | | - self.close_db() |
656 | | - |
657 | | - # return [metric_lists, time_norm, table_data] |
658 | | - return [metric_lists, time_norm] |
659 | | - |
| 574 | + |
660 | 575 | """ |
661 | 576 | <description> |
662 | 577 | |
— | — | @@ -699,24 +614,11 @@ |
700 | 615 | type = 'LP' || 'BAN' || 'BAN-TEST' || 'LP-TEST' |
701 | 616 | |
702 | 617 | """ |
703 | | - def run(self, type, metric_name): |
| 618 | + def run(self, test_type, start_time, end_time, metric_name): |
| 619 | + |
| 620 | + # print '\nGenerating ' + test_type +' for ' + str(hours_back) + ' hours back. The start and end times are: ' + start_time + ' - ' + end_time +' ... \n' |
704 | 621 | |
705 | | - # Current date & time |
706 | | - now = datetime.datetime.now() |
707 | | - #UTC = 8 |
708 | | - #delta = datetime.timedelta(hours=UTC) |
709 | | - #now = now + delta |
710 | | - |
711 | | - """ ESTABLISH THE START TIME TO PULL ANALYTICS - TS format=1, TS resolution=1 """ |
712 | | - hours_back = 24 |
713 | | - times = self.gen_date_strings(now, hours_back,1,1) |
714 | | - |
715 | | - start_time = times[0] |
716 | | - end_time = times[1] |
717 | | - |
718 | | - print '\nGenerating ' + type +' for ' + str(hours_back) + ' hours back. The start and end times are: ' + start_time + ' - ' + end_time +' ... \n' |
719 | | - |
720 | | - if type == 'LP': |
| 622 | + if test_type == 'LP': |
721 | 623 | query_name = 'report_LP_metrics' |
722 | 624 | |
723 | 625 | # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign |
— | — | @@ -727,7 +629,7 @@ |
728 | 630 | |
729 | 631 | title = metric_name + ': ' + start_time + ' -- ' + end_time |
730 | 632 | fname = query_name + '_' + metric_name + '.png' |
731 | | - elif type == 'BAN': |
| 633 | + elif test_type == 'BAN': |
732 | 634 | query_name = 'report_banner_metrics' |
733 | 635 | |
734 | 636 | # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign |
— | — | @@ -738,8 +640,8 @@ |
739 | 641 | |
740 | 642 | title = metric_name + ': ' + start_time + ' -- ' + end_time |
741 | 643 | fname = query_name + '_' + metric_name + '.png' |
742 | | - elif type == 'BAN-TEST': |
743 | | - r = self.get_latest_campaign() |
| 644 | + elif test_type == 'BAN-TEST': |
| 645 | + r = _data_loader_.get_latest_campaign() |
744 | 646 | query_name = 'report_banner_metrics' |
745 | 647 | |
746 | 648 | # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign |
— | — | @@ -752,7 +654,7 @@ |
753 | 655 | |
754 | 656 | title = metric_name + ': ' + start_time + ' -- ' + end_time + ', CAMPAIGN =' + campaign |
755 | 657 | fname = query_name + '_' + metric_name + '_latest' + '.png' |
756 | | - elif type == 'LP-TEST': |
| 658 | + elif test_type == 'LP-TEST': |
757 | 659 | r = self.get_latest_campaign() |
758 | 660 | query_name = 'report_LP_metrics' |
759 | 661 | |
— | — | @@ -769,10 +671,15 @@ |
770 | 672 | else: |
771 | 673 | sys.exit("Invalid type name - must be 'LP' or 'BAN'.") |
772 | 674 | |
773 | | - return_val = self.run_query(start_time, end_time, campaign, query_name, metric_name) |
774 | | - metrics = return_val[0] |
775 | | - times = return_val[1] |
| 675 | + return_val = _data_loader_.run_query(start_time, end_time, campaign, query_name, metric_name) |
| 676 | + self._counts_ = return_val[0] |
| 677 | + self._times_ = return_val[1] |
776 | 678 | |
| 679 | + """ Convert Times to Integers that indicate relative times AND normalize the intervals in case any are missing """ |
| 680 | + for key in self._times_.keys(): |
| 681 | + self._times_[key] = TP.normalize_timestamps(self._times_[key], False, 2) |
| 682 | + self._times_[key], self._counts_[key] = TP.normalize_intervals(self._times_[key], self._counts_[key], interval) |
| 683 | + |
777 | 684 | # title = metric_name + ': ' + start_time + ' -- ' + end_time |
778 | 685 | xlabel = 'Time - Hours' |
779 | 686 | ylabel = metric_name |
— | — | @@ -786,93 +693,14 @@ |
787 | 694 | |
788 | 695 | ranges = [min_time, 0] |
789 | 696 | |
790 | | - self.gen_plot(metrics, times, title, xlabel, ylabel, ranges, subplot_index, fname) |
| 697 | + self.gen_plot(self._counts_, self._times_, title, xlabel, ylabel, ranges, subplot_index, fname) |
791 | 698 | |
792 | 699 | return [metrics, times] |
793 | 700 | |
794 | | - """ !! MOVE INTO DATA LOADER!! |
795 | | - |
796 | | - <description> |
797 | | - |
798 | | - INPUT: |
799 | | - |
800 | | - RETURN: |
801 | | - |
802 | | - """ |
803 | | - def get_latest_campaign(self): |
804 | | - |
805 | | - query_name = 'report_latest_campaign' |
806 | | - self.init_db() |
807 | | - |
808 | | - """ Look at campaigns over the past 24 hours - TS format=1, TS resolution=1 """ |
809 | | - now = datetime.datetime.now() |
810 | | - hours_back = 72 |
811 | | - times = self.gen_date_strings(now, hours_back,1,1) |
812 | | - |
813 | | - sql_stmnt = mh.read_sql('./sql/report_latest_campaign.sql') |
814 | | - sql_stmnt = QD.format_query(query_name, sql_stmnt, [times[0]]) |
815 | | - |
816 | | - campaign_index = QD.get_campaign_index(query_name) |
817 | | - time_index = QD.get_time_index(query_name) |
818 | | - |
819 | | - try: |
820 | | - err_msg = sql_stmnt |
821 | | - self.cur.execute(sql_stmnt) |
822 | | - |
823 | | - row = self.cur.fetchone() |
824 | | - except: |
825 | | - self.db.rollback() |
826 | | - sys.exit("Database Interface Exception:\n" + err_msg) |
827 | | - |
828 | | - campaign = row[campaign_index] |
829 | | - timestamp = row[time_index] |
830 | | - |
831 | | - self.close_db() |
832 | | - |
833 | | - return [campaign, timestamp] |
834 | 701 | |
835 | | - """ !! SHOULD BE MOVED TO TIMEPROCESSOR !! |
836 | | - Takes as input and converts it to a set of hours counting back from 0 |
837 | | - <description> |
838 | | - |
839 | | - INPUT: |
840 | | - time_lists - a dictionary of timestamp lists |
841 | | - time_norm - a dictionary of normalized times |
842 | | - |
843 | | - RETURN: |
844 | | - |
845 | | - """ |
846 | | - def normalize_timestamps(self, time_lists): |
847 | | - # Find the earliest date |
848 | | - max_i = 0 |
849 | | - |
850 | | - for key in time_lists.keys(): |
851 | | - for date_str in time_lists[key]: |
852 | | - day_int = int(date_str[8:10]) |
853 | | - hr_int = int(date_str[11:13]) |
854 | | - date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13]) |
855 | | - if date_int > max_i: |
856 | | - max_i = date_int |
857 | | - max_day = day_int |
858 | | - max_hr = hr_int |
859 | | - |
860 | | - |
861 | | - # Normalize dates |
862 | | - time_norm = mh.AutoVivification() |
863 | | - for key in time_lists.keys(): |
864 | | - for date_str in time_lists[key]: |
865 | | - day = int(date_str[8:10]) |
866 | | - hr = int(date_str[11:13]) |
867 | | - # date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13]) |
868 | | - elem = (day - max_day) * 24 + (hr - max_hr) |
869 | | - try: |
870 | | - time_norm[key].append(elem) |
871 | | - except: |
872 | | - time_norm[key] = list() |
873 | | - time_norm[key].append(elem) |
874 | | - |
875 | | - return time_norm |
876 | 702 | |
| 703 | + |
| 704 | + |
877 | 705 | """ |
878 | 706 | |
879 | 707 | CLASS :: ^MinerReporting^ |
— | — | @@ -1161,21 +989,22 @@ |
1162 | 990 | |
1163 | 991 | """ Execute the query that generates interval reporting data """ |
1164 | 992 | return_val = self._data_loader_.run_query(start_time, end_time, interval, query_type, metric_name, campaign) |
1165 | | - counts = return_val[0] |
1166 | | - times = return_val[1] |
| 993 | + self._counts_ = return_val[0] |
| 994 | + self._times_ = return_val[1] |
1167 | 995 | |
1168 | 996 | """ Select only the specified item keys """ |
1169 | 997 | if len(self._item_keys_) > 0: |
1170 | | - counts = self.select_metric_keys(counts) |
1171 | | - times = self.select_metric_keys(times) |
| 998 | + self._counts_ = self.select_metric_keys(self._counts_) |
| 999 | + self._times_ = self.select_metric_keys(self._times_) |
1172 | 1000 | |
1173 | 1001 | """ Convert Times to Integers that indicate relative times AND normalize the intervals in case any are missing """ |
1174 | | - for key in times.keys(): |
1175 | | - times[key] = TP.normalize_timestamps(times[key], False, 2) |
1176 | | - times[key], counts[key] = TP.normalize_intervals(times[key], counts[key], interval) |
| 1002 | + for key in self._times_.keys(): |
| 1003 | + self._times_[key] = TP.normalize_timestamps(self._times_[key], False, 2) |
| 1004 | + self._times_[key], self._counts_[key] = TP.normalize_intervals(self._times_[key], self._counts_[key], interval) |
1177 | 1005 | |
1178 | 1006 | """ Normalize times """ |
1179 | | - min_time = min(times) |
| 1007 | + |
| 1008 | + min_time = min(self._times_) |
1180 | 1009 | ranges = [min_time, 0] |
1181 | 1010 | |
1182 | 1011 | xlabel = 'MINUTES' |
— | — | @@ -1186,17 +1015,17 @@ |
1187 | 1016 | title = campaign + ': ' + metric_full_name + ' -- ' + TP.timestamp_convert_format(start_time,1,2) + ' - ' + TP.timestamp_convert_format(end_time,1,2) |
1188 | 1017 | ylabel = metric_full_name |
1189 | 1018 | |
1190 | | - """ Determine List maximums """ |
| 1019 | + """ Determine List maximums -- Pre-processing for plotting """ |
1191 | 1020 | times_max = 0 |
1192 | 1021 | metrics_max = 0 |
1193 | 1022 | |
1194 | | - for key in counts.keys(): |
1195 | | - list_max = max(counts[key]) |
| 1023 | + for key in self._counts_.keys(): |
| 1024 | + list_max = max(self._counts_[key]) |
1196 | 1025 | if list_max > metrics_max: |
1197 | 1026 | metrics_max = list_max |
1198 | 1027 | |
1199 | | - for key in times.keys(): |
1200 | | - list_max = max(times[key]) |
| 1028 | + for key in self._times_.keys(): |
| 1029 | + list_max = max(self._times_[key]) |
1201 | 1030 | if list_max > times_max: |
1202 | 1031 | times_max = list_max |
1203 | 1032 | |
— | — | @@ -1207,7 +1036,7 @@ |
1208 | 1037 | ranges.append(metrics_max * 1.1) |
1209 | 1038 | |
1210 | 1039 | """ Generate plots given data """ |
1211 | | - self.gen_plot(counts, times, title, xlabel, ylabel, ranges, subplot_index, fname, labels) |
| 1040 | + self.gen_plot(self._counts_, self._times_, title, xlabel, ylabel, ranges, subplot_index, fname, labels) |
1212 | 1041 | |
1213 | 1042 | |
1214 | 1043 | """ |
— | — | @@ -1222,7 +1051,6 @@ |
1223 | 1052 | report_LP_confidence |
1224 | 1053 | |
1225 | 1054 | """ |
1226 | | - |
1227 | 1055 | class ConfidenceReporting(DataReporting): |
1228 | 1056 | |
1229 | 1057 | _hypothesis_test_ = None |