Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/DataReporting.py |
— | — | @@ -23,7 +23,7 @@ |
24 | 24 | import datetime |
25 | 25 | import MySQLdb |
26 | 26 | import pylab |
27 | | -from matplotlib.lines import Line2D |
| 27 | +# from matplotlib.lines import Line2D |
28 | 28 | import HTML |
29 | 29 | import math |
30 | 30 | |
— | — | @@ -34,7 +34,7 @@ |
35 | 35 | import Fundraiser_Tools.classes.HypothesisTest as HT |
36 | 36 | |
37 | 37 | |
38 | | -matplotlib.use('Agg') |
| 38 | +# matplotlib.use('Agg') |
39 | 39 | |
40 | 40 | |
41 | 41 | |
— | — | @@ -259,9 +259,7 @@ |
260 | 260 | Usage instructions for executing a report via the IntervalReporting class |
261 | 261 | """ |
262 | 262 | def usage(self): |
263 | | - |
264 | | - """ !! MODIFY -- include instructions on using **kwargs """ |
265 | | - |
| 263 | + |
266 | 264 | print 'Types of queries:' |
267 | 265 | print ' (1) banner' |
268 | 266 | print ' (2) LP' |
— | — | @@ -270,8 +268,20 @@ |
271 | 269 | print " run('20101230160400', '20101230165400', 2, 'banner', 'imp', '20101230JA091_US', ['banner1', 'banner2'])" |
272 | 270 | print " run('20101230160400', '20101230165400', 2, 'LP', 'views', '20101230JA091_US', [])" |
273 | 271 | print '' |
274 | | - |
| 272 | + print " Keyword arguments may also be passed to the constructor:" |
| 273 | + print '' |
| 274 | + print " font_size - font size related to plots" |
| 275 | + print " fig_width_pt - width of the plot" |
| 276 | + print " inches_per_pt - define point size relative to screen" |
| 277 | + print " use_labels - whether to include specified labels in plot" |
| 278 | + print " fig_file_format - file format of the image (default = .png)" |
| 279 | + print " plot_type - line or step plot" |
| 280 | + print " item_keys - the item keys expected (only these will be included)" |
| 281 | + print " file_path - override the file path to store the plot output" |
| 282 | + print " query_type - the type of query to use" |
| 283 | + |
275 | 284 | return |
| 285 | + |
276 | 286 | |
277 | 287 | """ |
278 | 288 | Selecting a subset of the key items in a dictionary |
— | — | @@ -418,14 +428,19 @@ |
419 | 429 | if len(self._item_keys_) > 0: |
420 | 430 | self._counts_ = self.select_metric_keys(self._counts_) |
421 | 431 | self._times_ = self.select_metric_keys(self._times_) |
422 | | - print self._counts_ |
| 432 | + |
423 | 433 | """ Convert Times to Integers that indicate relative times AND normalize the intervals in case any are missing """ |
424 | 434 | for key in self._times_.keys(): |
425 | 435 | self._times_[key] = TP.normalize_timestamps(self._times_[key], False, 3) |
426 | 436 | self._times_[key], self._counts_[key] = TP.normalize_intervals(self._times_[key], self._counts_[key], interval) |
427 | 437 | |
428 | | - """ Normalize times """ |
429 | | - |
| 438 | + """ If there are missing metrics add them as zeros """ |
| 439 | + for label in labels: |
| 440 | + |
| 441 | + if not(label in self._times_.keys()): |
| 442 | + self._times_[label] = self._times_[self._times_.keys()[0]] |
| 443 | + self._counts_[label] = [0.0] * len(self._times_[label]) |
| 444 | + |
430 | 445 | min_time = min(self._times_) |
431 | 446 | ranges = [min_time, 0] |
432 | 447 | |
— | — | @@ -470,13 +485,8 @@ |
471 | 486 | |
472 | 487 | CLASS :: ConfidenceReporting |
473 | 488 | |
474 | | - Reports confidence values on specified metrics |
| 489 | + This class uses ConfidenceReportingLoader and HypothesisTest to execute a confidence analysis over a given campaign. |
475 | 490 | |
476 | | - Types of queries supported: |
477 | | - |
478 | | - report_banner_confidence |
479 | | - report_LP_confidence |
480 | | - |
481 | 491 | """ |
482 | 492 | class ConfidenceReporting(DataReporting): |
483 | 493 | |
— | — | @@ -508,22 +518,31 @@ |
509 | 519 | DataReporting.__init__(self, **kwargs) |
510 | 520 | |
511 | 521 | """ |
512 | | - Describes how to run a report !! MODIFY !! |
| 522 | + Describes how to run a confidence report |
513 | 523 | """ |
514 | 524 | def usage(self): |
515 | | - |
| 525 | + |
| 526 | + print 'Method Signature:' |
| 527 | + print " run(self, test_name, query_name, metric_name, campaign, items, start_time, end_time, interval, num_samples)" |
516 | 528 | print '' |
| 529 | + print 'e.g.' |
| 530 | + print " cr.run('mytest!','report_banner_confidence','don_per_imp', 'C_JMvTXT_smallcountry_WS', ['banner1','banner2'], '20110101000000', '20100101000000', 2, 20)" |
| 531 | + print '' |
| 532 | + print " Keyword arguments may also be specified to modify plots:" |
| 533 | + print '' |
| 534 | + print " font_size - font size related to plots" |
| 535 | + print " fig_width_pt - width of the plot" |
| 536 | + print " inches_per_pt - define point size relative to screen" |
| 537 | + print " use_labels - whether to include specified labels in plot" |
| 538 | + print " fig_file_format - file format of the image (default = .png)" |
| 539 | + print " hyp_test - the type of hypothesis test" |
517 | 540 | |
518 | 541 | return |
519 | 542 | |
520 | 543 | |
521 | 544 | """ |
522 | | - <description> |
| 545 | + Confidence plotting over test intervals. This plot takes into consideration test intervals displaying the means with error bars over each interval |
523 | 546 | |
524 | | - INPUT: |
525 | | - |
526 | | - RETURN: |
527 | | - |
528 | 547 | """ |
529 | 548 | def _gen_plot(self,means_1, means_2, std_devs_1, std_devs_2, times_indices, title, xlabel, ylabel, ranges, subplot_index, labels, fname): |
530 | 549 | |
— | — | @@ -568,6 +587,9 @@ |
569 | 588 | pylab.savefig(self._file_path_ + fname + '.' + self._fig_file_format_, format=self._fig_file_format_) |
570 | 589 | |
571 | 590 | |
| 591 | + """ |
| 592 | + Generates a box plot of all of the data. Does not visualize test intervals. |
| 593 | + """ |
572 | 594 | def _gen_box_plot(self, data, title, ylabel, subplot_index, labels, fname): |
573 | 595 | |
574 | 596 | |
— | — | @@ -600,9 +622,7 @@ |
601 | 623 | pylab.rcParams.update(params) |
602 | 624 | |
603 | 625 | pylab.grid() |
604 | | - #pylab.ylim(ranges[2], ranges[3]) |
605 | | - #pylab.xlim(ranges[0], ranges[1]) |
606 | | - # pylab.legend([e1[0], e2[0]], labels,loc=2) |
| 626 | + |
607 | 627 | |
608 | 628 | pylab.ylabel(ylabel) |
609 | 629 | |
— | — | @@ -610,12 +630,10 @@ |
611 | 631 | pylab.savefig(self._file_path_ + fname + '.' + self._fig_file_format_, format=self._fig_file_format_) |
612 | 632 | |
613 | 633 | """ |
614 | | - Print in Tabular form the means and standard deviation of each group over each |
615 | | - interval |
| 634 | + Print in Tabular form the means and standard deviation of each group over each interval. Provides a detailed numerical output |
| 635 | + of the analysis. |
616 | 636 | |
617 | | - INPUT: |
618 | | - |
619 | | - RETURN: |
| 637 | + RETURN: the winner string, percent increase of the winner for the metric |
620 | 638 | |
621 | 639 | """ |
622 | 640 | def print_metrics(self, filename, metric_name, means_1, means_2, std_devs_1, std_devs_2, times_indices, labels, test_call): |
— | — | @@ -624,10 +642,20 @@ |
625 | 643 | file = open(self._file_path_ + filename, 'w') |
626 | 644 | |
627 | 645 | """ Compute % increase and report """ |
628 | | - av_means_1 = sum(means_1) / len(means_1) |
629 | | - av_means_2 = sum(means_2) / len(means_2) |
630 | | - percent_increase = math.fabs(av_means_1 - av_means_2) / min(av_means_1,av_means_2) * 100.0 |
| 646 | + try: |
| 647 | + av_means_1 = sum(means_1) / len(means_1) |
| 648 | + av_means_2 = sum(means_2) / len(means_2) |
| 649 | + percent_increase = math.fabs(av_means_1 - av_means_2) / min(av_means_1,av_means_2) * 100.0 |
631 | 650 | |
| 651 | + except Exception as inst: |
| 652 | + |
| 653 | + print 'Percent increase could not be computed.' |
| 654 | + print type(inst) # the exception instance |
| 655 | + print inst.args # arguments stored in .args |
| 656 | + print inst # __str__ allows args to printed directly |
| 657 | + |
| 658 | + percent_increase = 0.0 |
| 659 | + |
632 | 660 | """ Compute the average standard deviations """ |
633 | 661 | av_std_dev_1 = 0 |
634 | 662 | av_std_dev_2 = 0 |
— | — | @@ -648,15 +676,9 @@ |
649 | 677 | win_str = '\nThe winner "' + winner + '" had a %.2f%s increase.' |
650 | 678 | win_str = win_str % (percent_increase, '%') |
651 | 679 | |
652 | | - #print '\nCOMMAND = ' + test_call |
653 | 680 | file.write('\nCOMMAND = ' + test_call) |
654 | 681 | |
655 | | - |
656 | | -# print '\n\n' + metric_name |
657 | | -# print '\nitem 1 = ' + labels[0] |
658 | | -# print 'item 2 = ' + labels[1] |
659 | | -# print win_str |
660 | | -# print '\ninterval\tmean1\t\tmean2\t\tstddev1\t\tstddev2\n' |
| 682 | + |
661 | 683 | file.write('\n\n' + metric_name) |
662 | 684 | file.write('\nitem 1 = ' + labels[0] + '\n') |
663 | 685 | file.write('\nitem 2 = ' + labels[1] + '\n') |
— | — | @@ -675,11 +697,8 @@ |
676 | 698 | """ Print out the averaged parameters """ |
677 | 699 | line_args = '%.5f\t\t' + '%.5f\t\t' + '%.5f\t\t' + '%.5f\n' |
678 | 700 | line_str = line_args % (av_means_1, av_means_2, av_std_dev_1, av_std_dev_2) |
| 701 | + |
679 | 702 | |
680 | | -# print '\n\nOverall Parameters -- the confidence test was run with these parameters:\n' |
681 | | -# print '\nmean1\t\tmean2\t\tstddev1\t\tstddev2\n' |
682 | | -# print line_str |
683 | | - |
684 | 703 | file.write('\n\nOverall Parameters:\n') |
685 | 704 | file.write('\nmean1\t\tmean2\t\tstddev1\t\tstddev2\n') |
686 | 705 | file.write(line_str) |
— | — | @@ -689,12 +708,18 @@ |
690 | 709 | return [winner, percent_increase] |
691 | 710 | |
692 | 711 | """ |
693 | | - Executes the test reporting |
| 712 | + Executes the test reporting. |
694 | 713 | |
695 | | - INPUT: |
696 | | - |
697 | | - RETURN: |
| 714 | + @param num_samples: the number of samples per test interval |
| 715 | + @type num_samples: integer |
698 | 716 | |
| 717 | + @param interval: the length of the test interval in minutes |
| 718 | + @type interval: integer |
| 719 | + |
| 720 | + @param items: datasets for paired testing |
| 721 | + @type items: dictionary |
| 722 | + |
| 723 | + |
699 | 724 | """ |
700 | 725 | def run(self, test_name, query_name, metric_name, campaign, items, start_time, end_time, interval, num_samples): |
701 | 726 | |