r79292 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r79291‎ | r79292 | r79293 >
Date:20:57, 30 December 2010
Author:rfaulk
Status:deferred
Tags:
Comment:
Total amounts reporting for Amount50 totals. Fix reporting for compare reporting on statistical significance.
Modified paths:
  • /trunk/fundraiser-statistics/bash/plot_build_hr.sh (modified) (history)
  • /trunk/fundraiser-statistics/fundraiser-scripts/fundraiser_reporting.py (modified) (history)
  • /trunk/fundraiser-statistics/fundraiser-scripts/run_plots.py (modified) (history)
  • /trunk/fundraiser-statistics/fundraiser-scripts/sql/report_LP_metrics.sql (modified) (history)
  • /trunk/fundraiser-statistics/fundraiser-scripts/sql/report_banner_metrics.sql (modified) (history)
  • /trunk/fundraiser-statistics/fundraiser-scripts/sql/report_total_amounts.sql (modified) (history)
  • /trunk/fundraiser-statistics/reporting/banner_compare.html (modified) (history)
  • /trunk/fundraiser-statistics/reporting/banner_test_by_lp.sql (modified) (history)
  • /trunk/fundraiser-statistics/reporting/compare_test.php (modified) (history)
  • /trunk/fundraiser-statistics/reporting/landing_page_compare.html (modified) (history)
  • /trunk/fundraiser-statistics/reporting/reporting_totals.html (modified) (history)

Diff [purge]

Index: trunk/fundraiser-statistics/fundraiser-scripts/fundraiser_reporting.py
@@ -310,7 +310,7 @@
311311 sql_stmnt = mh.read_sql(filename)
312312 sql_stmnt = query_obj.format_query(query_name + descriptor, sql_stmnt, [start_time, end_time])
313313
314 - labels = [None] * 20
 314+ labels = [None] * 21
315315 labels[0] = 'clicks'
316316 labels[1] = 'donations'
317317 labels[2] = 'total amount'
@@ -331,6 +331,7 @@
332332 labels[17] = 'cc_completion'
333333 labels[18] = 'cc_amount'
334334 labels[19] = 'cc_max_amount'
 335+ labels[20] = 'total_amt50'
335336
336337
337338 num_keys = len(labels)
@@ -434,7 +435,7 @@
435436
436437
437438
438 - def run_day(self):
 439+ def run_day(self,type):
439440
440441 # Current date & time
441442 now = datetime.datetime.now()
@@ -461,7 +462,7 @@
462463 labels = return_val[0]
463464 counts = return_val[1]
464465
465 - r = self.get_query_fields(labels, counts, 'BAN_EM', start_time, end_time)
 466+ r = self.get_query_fields(labels, counts, type, start_time, end_time)
466467 labels = r[0]
467468 counts = r[1]
468469 title = r[2]
@@ -477,7 +478,8 @@
478479
479480 ranges = [min(time_range), max(time_range)]
480481
481 - self.gen_plot(time_range, counts, labels, title, xlabel, ylabel, ranges, subplot_index, query_name+descriptor)
 482+ fname = query_name + descriptor + '_' + type
 483+ self.gen_plot(time_range, counts, labels, title, xlabel, ylabel, ranges, subplot_index, fname)
482484
483485
484486 def get_query_fields(self, labels, counts, type, start_time, end_time):
@@ -494,6 +496,10 @@
495497 indices = [13,18]
496498 title = 'Credit Card & Paypal Total Amounts: ' + start_time + ' -- ' + end_time
497499 ylabel = 'Amount'
 500+ elif type == 'AMT_VS_AMT50':
 501+ indices = [2,20]
 502+ title = 'Amount50 and Amount Totals: ' + start_time + ' -- ' + end_time
 503+ ylabel = 'Amount'
498504 else:
499505 sys.exit("Total Amounts: You must enter a valid report type.\n" )
500506
Index: trunk/fundraiser-statistics/fundraiser-scripts/sql/report_total_amounts.sql
@@ -24,10 +24,10 @@
2525 sum(if(right(utm_source,2)='cc' and contribution_tracking.contribution_id,1,0))/sum(if(right(utm_source,2)='cc',1,0)) as cc_completion,
2626 sum(if(right(utm_source,2)='cc',converted_amount,0)) as cc_amt,
2727 max(if(right(utm_source,2)='cc',converted_amount,0)) as max_cc_amt,
 28+sum(if(converted_amount > 50,50,converted_amount)) as total_amt50,
2829 avg(converted_amount) as average,
2930 max(converted_amount) as max_amount
3031
31 -
3232 from (drupal.contribution_tracking left join civicrm.public_reporting on (drupal.contribution_tracking.contribution_id = civicrm.public_reporting.contribution_id))
3333 WHERE ts >= '%s' and ts < '%s'
3434 GROUP BY 1
Index: trunk/fundraiser-statistics/fundraiser-scripts/sql/report_banner_metrics.sql
@@ -11,7 +11,7 @@
1212 donations as donations,
1313 amount as amount,
1414 views / impressions as click_rate,
15 -donations / total_clicks as conversion_rate,
 15+donations / total_clicks as completion_rate,
1616 round(donations / impressions, 6) as don_per_imp,
1717 amount / impressions as amt_per_imp,
1818 donations / views as don_per_view,
Index: trunk/fundraiser-statistics/fundraiser-scripts/sql/report_LP_metrics.sql
@@ -10,7 +10,7 @@
1111 donations as donations,
1212 amount as amount,
1313 null as click_rate,
14 -donations / total_clicks as conversion_rate,
 14+donations / total_clicks as completion_rate,
1515 null as don_per_imp,
1616 null as amt_per_imp,
1717 donations / views as don_per_view,
Index: trunk/fundraiser-statistics/fundraiser-scripts/run_plots.py
@@ -24,7 +24,9 @@
2525 tar.run_hr('BAN_EM')
2626 tar.run_hr('CC_PP_completion')
2727 tar.run_hr('CC_PP_amount')
28 -tar.run_day()
 28+tar.run_hr('AMT_VS_AMT50')
 29+tar.run_day('BAN_EM')
 30+tar.run_day('AMT_VS_AMT50')
2931
3032 # Run the banner/lp plots
3133 blpr.run('LP', 'don_per_view')
Index: trunk/fundraiser-statistics/reporting/landing_page_compare.html
@@ -24,6 +24,19 @@
2525 <body>
2626 <h1><u>Custom Banner / LP Test:</u></h1>
2727
 28+<br/>
 29+<p>
 30+<b><u>To get the best results from this test use it across full hours.</u> That is your start and end timestamps should be
 31+right on the hour (ie. ending in '0000'). </b> The statistical test being used is the Wald test which assumes a normal
 32+distribution of the data. Due to the known variance over several hours each hour is tested indendently.
 33+</p>
 34+<p>
 35+This test compates one metric across two landing pages running concurrently (which usually also entails running over the same campaign).
 36+So to test the performance of the two pages over some metric enter the times, camapaign(s), landing page names, and finally select the metric from the
 37+radio button to test.
 38+</p>
 39+<br/>
 40+
2841 <form action="compare_test.php" method="post">
2942 <input type="hidden" name="sqlFile" value="banner_test_by_lp.sql">
3043 <br/>
Index: trunk/fundraiser-statistics/reporting/banner_test_by_lp.sql
@@ -47,6 +47,6 @@
4848
4949 on ecomm.landing_page = lp.landing_page and lp.dt_hr = ecomm.dt_hr and lp.dt_min = ecomm.dt_min
5050
51 -where views > 1000 and donations > 20
 51+where views > 100 and donations > 20
5252
5353 group by 1,2 ;
Index: trunk/fundraiser-statistics/reporting/reporting_totals.html
@@ -63,6 +63,19 @@
6464 <br>
6565
6666
 67+<h3><u>Amount 50 and Amount Totals - Last 24 Hours</u></h3>
 68+<OBJECT WIDTH="1600" HEIGHT="800" data="report_total_amounts_by_hr_AMT_VS_AMT50.png" type="image/png">
 69+</OBJECT>
 70+
 71+<br>
 72+
 73+<h3><u>Amount 50 and Amount Totals - Last 7 Days</u></h3>
 74+<OBJECT WIDTH="1600" HEIGHT="800" data="report_total_amounts_by_day_AMT_VS_AMT50.png" type="image/png">
 75+</OBJECT>
 76+
 77+<br>
 78+
 79+
6780 </body>
6881
6982 </html>
\ No newline at end of file
Index: trunk/fundraiser-statistics/reporting/compare_test.php
@@ -48,13 +48,17 @@
4949 }
5050
5151 // Format the query based on the type
52 -if ($sql_file == "banner_test_by_banner.sql" || $sql_file == "banner_test_by_lp.sql") {
 52+if ($sql_file == "banner_test_by_banner.sql") {
5353 $query1 = sprintf($query, '%','%','%','%','10','10', $start, $end, $item1, '%','%','%','%','10','10', $start, $end, $cmpgn1, $item1, '%','%','%','%','10','10', $start, $end, $cmpgn1, $item1);
5454 $query2 = sprintf($query, '%','%','%','%','10','10', $start, $end, $item2, '%','%','%','%','10','10', $start, $end, $cmpgn2, $item2, '%','%','%','%','10','10', $start, $end, $cmpgn2, $item2);
55 -} elseif ($sql_file == "landing_compare.sql") {
56 - $query = sprintf($query, $start, $end, $cmpgn, $start, $end, $cmpgn);
 55+} elseif ($sql_file == "banner_test_by_lp.sql") {
 56+ $query1 = sprintf($query, '%','%','%','%','10','10', $start, $end, $cmpgn1, $item1, '%','%','%','%','10','10', $start, $end, $cmpgn1, $item1);
 57+ $query2 = sprintf($query, '%','%','%','%','10','10', $start, $end, $cmpgn2, $item2, '%','%','%','%','10','10', $start, $end, $cmpgn2, $item2);
5758 }
5859
 60+// echo '<br><br>' . $query1 . '<br><br>';
 61+// echo '<br><br>' . $query2 . '<br><br>';
 62+
5963 // Execute Queries
6064 $result1 = mysql_query($query1);
6165 $result2 = mysql_query($query2);
@@ -80,6 +84,7 @@
8185 $time = array();
8286 $metric1 = array();
8387 $metric2 = array();
 88+$num_samples = array();
8489
8590 // Compute the means of the first item
8691 $counter = 0;
@@ -87,10 +92,9 @@
8893 while ($row = mysql_fetch_assoc($result1)) {
8994
9095 $time[$n1] = $row["day_hr"];
91 - $metric1[$n1] = $row[$metric];
 96+ $x1 = $row[$metric];
 97+ $metric1[$n1] = $x1;
9298
93 - $x1 = $row[$metric];
94 - $group1[$n] = $x1;
9599 $m1[$index] = $x1 + $m1[$index];
96100 $n1=$n1+1;
97101
@@ -106,10 +110,9 @@
107111 $counter = 0;
108112 $index = 0;
109113 while ($row = mysql_fetch_assoc($result2)) {
110 - $metric2[$n2] = $row[$metric];
 114+ $x2 = $row[$metric];
 115+ $metric2[$n2] = $x2;
111116
112 - $x2 = $row[$metric];
113 - $group2[$n] = $x2;
114117 $m2[$index] = $x2 + $m2[$index];
115118 $n2=$n2+1;
116119
@@ -137,8 +140,8 @@
138141 {
139142 $index = floor($counter / $interval);
140143
141 - $diff1 = $group1[$counter] - $m1[$index];
142 - $diff2 = $group2[$counter] - $m2[$index];
 144+ $diff1 = $metric1[$counter] - $m1[$index];
 145+ $diff2 = $metric2[$counter] - $m2[$index];
143146 $v1[$index] = $v1[$index] + pow($diff1,2);
144147 $v2[$index] = $v2[$index] + pow($diff2,2);
145148
@@ -179,6 +182,9 @@
180183
181184 echo 'The average of Group 1 for hour ' . ($i + 1). ': ' . $m1[$i] . '<br>';
182185 echo 'The average of Group 2 for hour ' . ($i + 1). ': ' . $m2[$i] . '<br>';
 186+ echo 'The standard deviation of Group 1 for hour ' . ($i + 1). ': ' . pow($v1[$i], 0.5) . '<br>';
 187+ echo 'The standard deviation of Group 2 for hour ' . ($i + 1). ': ' . pow($v2[$i], 0.5) . '<br>';
 188+
183189
184190 if ($W[$i] >= 0.1) {
185191 echo '<br>8% confident about the winner.<br>';
@@ -234,8 +240,9 @@
235241 echo "</br></br><table width='50%'><tr>";
236242 //loop thru the field names to print the correct headers
237243 echo "<th>". "Time Stamp" . "</th>";
238 -echo "<th>". mysql_field_name($result1, $metric) . "_1" . "</th>";
239 -echo "<th>". mysql_field_name($result1, $metric) . "_2" . "</th>";
 244+echo "<th>". $metric . "_1" . "</th>";
 245+echo "<th>". $metric . "_2" . "</th>";
 246+// echo "<th>". mysql_field_name($result2, $metric) . "_2" . "</th>";
240247 echo "</tr>";
241248
242249 for ( $counter = 0; $counter < $n; $counter += 1) {
Index: trunk/fundraiser-statistics/reporting/banner_compare.html
@@ -24,6 +24,19 @@
2525 <body>
2626 <h1><u>Custom Banner / LP Test:</u></h1>
2727
 28+<br/>
 29+<p>
 30+<b><u>To get the best results from this test use it across full hours.</u> That is your start and end timestamps should be
 31+right on the hour (ie. ending in '0000'). </b> The statistical test being used is the Wald test which assumes a normal
 32+distribution of the data. Due to the known variance over several hours each hour is tested indendently.
 33+</p>
 34+<p>
 35+This test compates one metric across two banners running concurrently (which usually also entails running over the same campaign).
 36+So to test the performance of the two pages over some metric enter the times, camapaign(s), banner names, and finally select the metric from the
 37+radio button to test.
 38+</p>
 39+<br/>
 40+
2841 <form action="compare_test.php" method="post">
2942 <input type="hidden" name="sqlFile" value="banner_test_by_banner.sql">
3043 <br/>
Index: trunk/fundraiser-statistics/bash/plot_build_hr.sh
@@ -4,18 +4,25 @@
55
66 python run_plots.py
77
8 -cp /home/rfaulk/fundraiser-statistics/fundraiser-scripts/report_LP_metrics_don_per_view.png /srv/org.wikimedia.fundraising/stats/
9 -cp /home/rfaulk/fundraiser-statistics/fundraiser-scripts/report_total_amounts_by_day.png /srv/org.wikimedia.fundraising/stats/
10 -cp /home/rfaulk/fundraiser-statistics/fundraiser-scripts/report_banner_metrics_don_per_imp.png /srv/org.wikimedia.fundraising/stats/
 8+# total amounts
 9+cp /home/rfaulk/fundraiser-statistics/fundraiser-scripts/report_total_amounts_by_day_BAN_EM.png /srv/org.wikimedia.fundraising/stats/
1110 cp /home/rfaulk/fundraiser-statistics/fundraiser-scripts/report_total_amounts_by_hr_BAN_EM.png /srv/org.wikimedia.fundraising/stats/
1211 cp /home/rfaulk/fundraiser-statistics/fundraiser-scripts/report_total_amounts_by_hr_CC_PP_amount.png /srv/org.wikimedia.fundraising/stats/
1312 cp /home/rfaulk/fundraiser-statistics/fundraiser-scripts/report_total_amounts_by_hr_CC_PP_completion.png /srv/org.wikimedia.fundraising/stats/
 13+cp /home/rfaulk/fundraiser-statistics/fundraiser-scripts/report_total_amounts_by_day_AMT_VS_AMT50.png /srv/org.wikimedia.fundraising/stats/
 14+cp /home/rfaulk/fundraiser-statistics/fundraiser-scripts/report_total_amounts_by_hr_AMT_VS_AMT50.png /srv/org.wikimedia.fundraising/stats/
 15+
 16+# Banner / LP
 17+cp /home/rfaulk/fundraiser-statistics/fundraiser-scripts/report_LP_metrics_don_per_view.png /srv/org.wikimedia.fundraising/stats/
 18+cp /home/rfaulk/fundraiser-statistics/fundraiser-scripts/report_banner_metrics_don_per_imp.png /srv/org.wikimedia.fundraising/stats/
1419 cp /home/rfaulk/fundraiser-statistics/fundraiser-scripts/report_banner_metrics_click_rate.png /srv/org.wikimedia.fundraising/stats/
1520 cp /home/rfaulk/fundraiser-statistics/fundraiser-scripts/report_LP_metrics_completion_rate.png /srv/org.wikimedia.fundraising/stats/
1621
 22+# latest plots
1723 cp /home/rfaulk/fundraiser-statistics/fundraiser-scripts/report_LP_metrics_don_per_view_latest.png /srv/org.wikimedia.fundraising/stats/
1824 cp /home/rfaulk/fundraiser-statistics/fundraiser-scripts/report_banner_metrics_don_per_imp_latest.png /srv/org.wikimedia.fundraising/stats/
1925 cp /home/rfaulk/fundraiser-statistics/fundraiser-scripts/report_banner_metrics_click_rate_latest.png /srv/org.wikimedia.fundraising/stats/
2026
 27+# mining counts
2128 cp /home/rfaulk/fundraiser-statistics/fundraiser-scripts/report_banner_impressions_by_hour.png /srv/org.wikimedia.fundraising/stats/
2229 cp /home/rfaulk/fundraiser-statistics/fundraiser-scripts/report_lp_views_by_hour.png /srv/org.wikimedia.fundraising/stats/
\ No newline at end of file

Status & tagging log