r85742 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r85741‎ \| r85742 \| r85743 >
Date:	03:30, 10 April 2011
Author:	rfaulk
Status:	deferred
Tags:
Comment:	Major re-factoring. Redefined query_store (QueryData) and TimeProcessor to be singleton by implementing as a set of module methods. fundraiser_reporting was renamed DataReporting and decoupled from performing queries - this functionality has been moved into DataLoader in an effort to decouple reporting and data loading functionality.
Modified paths:	/trunk/fundraiser-statistics/fundraiser-scripts/classes/20101230JA091_US+banner+imp.png (added) (history) /trunk/fundraiser-statistics/fundraiser-scripts/classes/DataLoader.py (modified) (history) /trunk/fundraiser-statistics/fundraiser-scripts/classes/DataReporting.py (added) (history) /trunk/fundraiser-statistics/fundraiser-scripts/classes/QueryData.py (added) (history) /trunk/fundraiser-statistics/fundraiser-scripts/classes/TimestampProcessor.py (modified) (history) /trunk/fundraiser-statistics/fundraiser-scripts/classes/compute_confidence.py (modified) (history) /trunk/fundraiser-statistics/fundraiser-scripts/classes/fundraiser_reporting.py (deleted) (history) /trunk/fundraiser-statistics/fundraiser-scripts/classes/query_store.py (deleted) (history)

Diff [purge]

Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/fundraiser_reporting.py
—	—	@@ -1,1093 +0,0 @@
2		-
3		-
4		~~-"""~~
5		-
6		~~-Pulls data from storage3.faulkner and generates plots.~~
7		-
8		-
9		~~-"""~~
10		-
11		~~-__author__ = "Ryan Faulkner"~~
12		~~-__revision__ = "$Rev$"~~
13		~~-__date__ = "December 16th, 2010"~~
14		-
15		-
16		~~-import sys~~
17		~~-sys.path.append('../')~~
18		-
19		~~-import matplotlib~~
20		~~-import datetime~~
21		~~-import MySQLdb~~
22		~~-import pylab~~
23		~~-import HTML~~
24		~~-import math~~
25		-
26		~~-import query_store as qs~~
27		~~-import miner_help as mh~~
28		~~-import TimestampProcessor as TP~~
29		~~-import DataLoader as DL~~
30		-
31		~~-matplotlib.use('Agg')~~
32		-
33		-
34		-
35		~~-"""~~
36		-
37		~~- CLASS :: FundraiserReporting~~
38		-
39		~~- Base class for reporting fundraiser analytics. Methods that are intended to be extended in derived classes include:~~
40		-
41		~~- METHODS:~~
42		-
43		~~- run_query - format and execute the query to obtain data~~
44		~~- gen_plot - plots the results of the report~~
45		~~- write_to_html_table - writes the results to an HTML table~~
46		~~- run~~
47		-
48		~~-"""~~
49		~~-class FundraiserReporting(TP.TimestampProcesser, DL.DataLoader):~~
50		-
51		~~- _sql_path_ = '../sql/' # Relative path for SQL files to be processed~~
52		-
53		~~- """~~
54		-
55		~~- Smooths a list of values~~
56		-
57		~~- INPUT:~~
58		~~- values - a list of datetime objects~~
59		~~- window_length - indicate whether the list counts back from the end~~
60		-
61		~~- RETURN:~~
62		~~- new_values - list of smoothed values~~
63		-
64		~~- """~~
65		~~- def smooth(self, values, window_length):~~
66		-
67		~~- window_length = int(math.floor(window_length / 2))~~
68		-
69		~~- if window_length < 1:~~
70		~~- return values~~
71		-
72		~~- list_len = len(values)~~
73		~~- new_values = list()~~
74		-
75		~~- for i in range(list_len):~~
76		~~- index_left = max([0, i - window_length])~~
77		~~- index_right = min([list_len - 1, i + window_length])~~
78		-
79		~~- width = index_right - index_left + 1~~
80		-
81		~~- new_val = sum(values[index_left : (index_right + 1)]) / width~~
82		~~- new_values.append(new_val)~~
83		-
84		~~- return new_values~~
85		-
86		~~- """~~
87		-
88		~~- workaround for issue with tuple objects in HTML.py~~
89		~~- MySQLdb returns unfamiliar tuple elements from its fetchall() method~~
90		~~- this is probably a version problem since the issue popped up in 2.5 but not 2.6~~
91		-
92		~~- INPUT:~~
93		~~- row - row object returned from MySQLdb.fetchall()~~
94		-
95		~~- RETURN:~~
96		~~- l - a list of tuple objects from the db~~
97		-
98		~~- """~~
99		~~- def listify(self, row):~~
100		~~- l = []~~
101		~~- for i in row:~~
102		~~- l.append(i)~~
103		~~- return l~~
104		-
105		-
106		~~- """~~
107		-
108		~~- To be overloaded by subclasses for specific types of queries~~
109		-
110		~~- INPUT:~~
111		~~- values - a list of datetime objects~~
112		~~- window_length - indicate whether the list counts back from the end~~
113		-
114		~~- RETURN:~~
115		~~- return_status - integer, 0 indicates un-exceptional execution~~
116		-
117		~~- """~~
118		~~- def run_query(self, start_time, end_time, query_name, metric_name):~~
119		~~- return 0~~
120		-
121		-
122		~~- """~~
123		-
124		~~- To be overloaded by subclasses for different plotting behaviour~~
125		-
126		~~- INPUT:~~
127		~~- values - a list of datetime objects~~
128		~~- window_length - indicate whether the list counts back from the end~~
129		-
130		~~- RETURN:~~
131		~~- return_status - integer, 0 indicates un-exceptional execution~~
132		-
133		~~- """~~
134		~~- def gen_plot(self,x, y_lists, labels, title, xlabel, ylabel, subplot_index, fname):~~
135		~~- return 0~~
136		-
137		~~- """~~
138		-
139		~~- To be overloaded by subclasses for writing tables - this functionality currently exists outside of this class structure (test_reporting.py)~~
140		-
141		~~- INPUT:~~
142		~~- values - a list of datetime objects~~
143		~~- window_length - indicate whether the list counts back from the end~~
144		-
145		~~- RETURN:~~
146		~~- return_status - integer, 0 indicates un-exceptional execution~~
147		-
148		~~- """~~
149		~~- def write_to_html_table(self):~~
150		-
151		~~- """~~
152		~~- FROM TEST REPORTING~~
153		-
154		~~- query_obj = qs.query_store()~~
155		-
156		~~- # Populate the campaigns table~~
157		~~- s1 = 'drop table if exists campaigns;'~~
158		~~- s2 = 'create table campaigns as (select utm_campaign from drupal.contribution_tracking where ts > \'%s\' group by utm_campaign having count(*) > 100);' % (start_time)~~
159		~~- cur.execute(s1)~~
160		~~- cur.execute(s2)~~
161		-
162		~~- table_data = []~~
163		~~- sql_stmnt = mh.read_sql(sql_path + query_type + '.sql');~~
164		-
165		~~- # open the html file for writing~~
166		~~- f = open(html_path + query_type + '.html', 'w')~~
167		-
168		~~- format_start_time = start_time[0:4] + '-' + start_time[4:6] + '-' + start_time[6:8] + '-' + start_time[8:10] + 'HRs'~~
169		-
170		~~- # Formats the statement according to query type~~
171		~~- select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time])~~
172		-
173		~~- # html formatting~~
174		~~- if query_type == 'report_campaign_ecomm':~~
175		~~- f.write('<br>Donation data since ' + format_start_time + ' ... <br><br>')~~
176		~~- select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time])~~
177		-
178		~~- elif query_type == 'report_campaign_logs':~~
179		~~- f.write('<br>Impression and landing page data since ' + format_start_time+ ' ... <br><br>')~~
180		~~- select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time])~~
181		-
182		~~- elif query_type == 'report_campaign_ecomm_by_hr':~~
183		~~- f.write('<br>Donation data by hour since ' + format_start_time + ' ... <br><br>')~~
184		~~- select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time])~~
185		-
186		~~- elif query_type == 'report_campaign_logs_by_hr':~~
187		~~- f.write('<br>Impression and landing page by hour since ' + format_start_time + ' ... <br><br>')~~
188		~~- select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time])~~
189		-
190		~~- else:~~
191		~~- select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time])~~
192		-
193		~~- try:~~
194		~~- err_msg = select_stmnt~~
195		~~- cur.execute(select_stmnt)~~
196		-
197		~~- results = cur.fetchall()~~
198		-
199		~~- for row in results:~~
200		~~- cpRow = listify(row)~~
201		~~- # t.rows.append(row)~~
202		~~- table_data.append(cpRow)~~
203		-
204		~~- except:~~
205		~~- db.rollback()~~
206		~~- sys.exit("Database Interface Exception:\n" + err_msg)~~
207		-
208		-
209		~~- t = HTML.table(table_data, header_row=header)~~
210		~~- htmlcode = str(t)~~
211		-
212		~~- f.write(htmlcode)~~
213		~~- f.close()~~
214		-
215		~~- return htmlcode~~
216		-
217		~~- """~~
218		~~- return 0~~
219		-
220		-
221		-
222		~~- """~~
223		-
224		~~- The access point of FundraiserReporting and derived objects. Will be used for executing and orchestrating the creation of plots, tables etc.~~
225		~~- To be overloaded by subclasses~~
226		-
227		~~- INPUT:~~
228		-
229		~~- RETURN:~~
230		~~- return_status - integer, 0 indicates un-exceptional execution~~
231		-
232		~~- """~~
233		~~- def run(self):~~
234		~~- return~~
235		-
236		-
237		-
238		~~-"""~~
239		-
240		~~-CLASS :: ^TotalAmountsReporting^~~
241		-
242		~~-This subclass handles reporting on total amounts for the fundraiser.~~
243		-
244		~~-"""~~
245		-
246		~~-class TotalAmountsReporting(FundraiserReporting):~~
247		-
248		~~- def __init__(self):~~
249		~~- self.data = []~~
250		-
251		~~- def run_query(self, start_time, end_time, query_name, descriptor):~~
252		-
253		~~- self.init_db()~~
254		-
255		~~- query_obj = qs.QueryStore()~~
256		-
257		~~- # Load the SQL File & Format~~
258		~~- filename = self._sql_path_ + query_name + '.sql'~~
259		~~- sql_stmnt = mh.read_sql(filename)~~
260		~~- sql_stmnt = query_obj.format_query(query_name + descriptor, sql_stmnt, [start_time, end_time])~~
261		-
262		~~- labels = [None] * 21~~
263		~~- labels[0] = 'clicks'~~
264		~~- labels[1] = 'donations'~~
265		~~- labels[2] = 'total amount'~~
266		~~- labels[3] = 'banner amount'~~
267		~~- labels[4] = 'US amount'~~
268		~~- labels[5] = 'EN amount'~~
269		~~- labels[6] = 'Other Amount'~~
270		~~- labels[7] = 'Email Amount'~~
271		~~- labels[8] = 'Recurring Guess'~~
272		~~- labels[9] = 'completion_rate'~~
273		~~- labels[10] = 'pp_clicks'~~
274		~~- labels[11] = 'pp_donations'~~
275		~~- labels[12] = 'pp_completion'~~
276		~~- labels[13] = 'pp_amount'~~
277		~~- labels[14] = 'pp_max_amount'~~
278		~~- labels[15] = 'cc_clicks'~~
279		~~- labels[16] = 'cc_donations'~~
280		~~- labels[17] = 'cc_completion'~~
281		~~- labels[18] = 'cc_amount'~~
282		~~- labels[19] = 'cc_max_amount'~~
283		~~- labels[20] = 'total_amt50'~~
284		-
285		-
286		~~- num_keys = len(labels)~~
287		-
288		~~- lists = list()~~
289		~~- for i in range(num_keys):~~
290		~~- lists.append(list())~~
291		-
292		~~- # Composes the data for each banner~~
293		~~- try:~~
294		~~- err_msg = sql_stmnt~~
295		~~- self.cur.execute(sql_stmnt)~~
296		-
297		~~- # This query store records according to dates~~
298		~~- results = self.cur.fetchall()~~
299		~~- for row in results:~~
300		~~- for i in range(num_keys):~~
301		~~- lists[i].append(row[i+1])~~
302		-
303		~~- except:~~
304		~~- self.db.rollback()~~
305		~~- sys.exit("Database Interface Exception:\n" + err_msg)~~
306		-
307		~~- self.close_db()~~
308		-
309		~~- # Only interested in amounts~~
310		~~- return [labels, lists]~~
311		-
312		-
313		-
314		~~- def gen_plot(self,x, y_lists, labels, title, xlabel, ylabel, ranges, subplot_index, fname):~~
315		~~- pylab.subplot(subplot_index)~~
316		~~- num_keys = len(y_lists)~~
317		-
318		~~- pylab.figure(num=None,figsize=[26,14])~~
319		~~- line_types = ['b-o','g-o','r-o','c-o','m-o','k-o','b--o','g--o','r--o','c--o','m--o','k--o']~~
320		-
321		~~- for i in range(num_keys):~~
322		~~- pylab.plot(x, y_lists[i], line_types[i])~~
323		-
324		~~- pylab.grid()~~
325		~~- pylab.xlim(ranges[0], ranges[1])~~
326		~~- pylab.legend(labels,loc=2)~~
327		-
328		~~- pylab.xlabel(xlabel)~~
329		~~- pylab.ylabel(ylabel)~~
330		-
331		~~- pylab.title(title)~~
332		~~- pylab.savefig(fname+'.png', format='png')~~
333		-
334		-
335		-
336		~~- def run_hr(self, type):~~
337		-
338		-
339		~~- # Current date & time~~
340		~~- now = datetime.datetime.now()~~
341		~~- #UTC = 8~~
342		~~- #delta = datetime.timedelta(hours=UTC)~~
343		~~- #now = now + delta~~
344		-
345		-
346		~~- """ ESTABLISH THE START TIME TO PULL ANALYTICS - TS format=1, TS resolution=1 """~~
347		~~- hours_back = 24~~
348		~~- times = self.gen_date_strings(now, hours_back,1,1)~~
349		-
350		~~- start_time = times[0]~~
351		~~- end_time = times[1]~~
352		-
353		~~- print '\nGenerating analytics total amount for ' + str(hours_back) + ' hours back. The start and end times are: ' + start_time + ' - ' + end_time +' ... \n'~~
354		-
355		~~- # QUERY NAME~~
356		~~- query_name = 'report_total_amounts'~~
357		-
358		-
359		~~- # RUN BY HOUR~~
360		~~- descriptor = '_by_hr'~~
361		~~- return_val = self.run_query(start_time, end_time, query_name, descriptor)~~
362		-
363		~~- labels = return_val[0] # curve labels~~
364		~~- counts = return_val[1] # curve data - lists~~
365		-
366		~~- r = self.get_query_fields(labels, counts, type, start_time, end_time)~~
367		~~- labels = r[0]~~
368		~~- counts = r[1]~~
369		~~- title = r[2]~~
370		~~- ylabel = r[3]~~
371		-
372		~~- xlabel = 'Time - Hours'~~
373		~~- subplot_index = 111~~
374		-
375		~~- # plot the curves~~
376		~~- time_range = range(len(counts[0]))~~
377		~~- for i in range(len(counts[0])):~~
378		~~- time_range[i] = time_range[i] - len(counts[0])~~
379		-
380		~~- ranges = [min(time_range), max(time_range)]~~
381		-
382		~~- fname = query_name + descriptor + '_' + type~~
383		~~- self.gen_plot(time_range, counts, labels, title, xlabel, ylabel, ranges, subplot_index, fname)~~
384		-
385		-
386		-
387		~~- def run_day(self,type):~~
388		-
389		~~- # Current date & time~~
390		~~- now = datetime.datetime.now()~~
391		~~- #UTC = 8~~
392		~~- #delta = datetime.timedelta(hours=UTC)~~
393		~~- #now = now + delta~~
394		-
395		-
396		~~- """ ESTABLISH THE START TIME TO PULL ANALYTICS - TS format=1, TS resolution=0 """~~
397		~~- hours_back = 7 * 24 # 7 days back~~
398		~~- times = self.gen_date_strings(now, hours_back,1,0)~~
399		-
400		~~- start_time = times[0]~~
401		~~- end_time = times[1]~~
402		-
403		~~- print '\nGenerating analytics total amount for ' + str(days_back) + ' days back. The start and end times are: ' + start_time + ' - ' + end_time +' ... \n'~~
404		-
405		-
406		~~- # FORMAT HEADERS & QUERY NAME~~
407		~~- query_name = 'report_total_amounts'~~
408		~~- descriptor = '_by_day'~~
409		~~- return_val = self.run_query(start_time, end_time, query_name, descriptor)~~
410		-
411		~~- labels = return_val[0]~~
412		~~- counts = return_val[1]~~
413		-
414		~~- r = self.get_query_fields(labels, counts, type, start_time, end_time)~~
415		~~- labels = r[0]~~
416		~~- counts = r[1]~~
417		~~- title = r[2]~~
418		~~- ylabel = r[3]~~
419		-
420		~~- xlabel = 'Time - Days'~~
421		~~- subplot_index = 111~~
422		-
423		~~- # Plot values~~
424		~~- time_range = range(len(counts[0]))~~
425		~~- for i in range(len(counts[0])):~~
426		~~- time_range[i] = time_range[i] - len(counts[0])~~
427		-
428		~~- ranges = [min(time_range), max(time_range)]~~
429		-
430		~~- fname = query_name + descriptor + '_' + type~~
431		~~- self.gen_plot(time_range, counts, labels, title, xlabel, ylabel, ranges, subplot_index, fname)~~
432		-
433		-
434		~~- def get_query_fields(self, labels, counts, type, start_time, end_time):~~
435		-
436		~~- if type == 'BAN_EM':~~
437		~~- indices = range(2,9)~~
438		~~- title = 'Total Amounts: ' + start_time + ' -- ' + end_time~~
439		~~- ylabel = 'Amount'~~
440		~~- elif type == 'CC_PP_completion':~~
441		~~- indices = [12,17]~~
442		~~- title = 'Credit Card & Paypal Completion Rates: ' + start_time + ' -- ' + end_time~~
443		~~- ylabel = 'Rate'~~
444		~~- elif type == 'CC_PP_amount':~~
445		~~- indices = [13,18]~~
446		~~- title = 'Credit Card & Paypal Total Amounts: ' + start_time + ' -- ' + end_time~~
447		~~- ylabel = 'Amount'~~
448		~~- elif type == 'AMT_VS_AMT50':~~
449		~~- indices = [2,20]~~
450		~~- title = 'Amount50 and Amount Totals: ' + start_time + ' -- ' + end_time~~
451		~~- ylabel = 'Amount'~~
452		~~- else:~~
453		~~- sys.exit("Total Amounts: You must enter a valid report type.\n" )~~
454		-
455		~~- # Exract relevant labels and values~~
456		~~- labels_temp = list()~~
457		~~- counts_temp = list()~~
458		-
459		~~- for i in range(len(labels)):~~
460		~~- if i in indices:~~
461		~~- labels_temp.append(labels[i])~~
462		~~- counts_temp.append(counts[i])~~
463		-
464		~~- return [labels_temp, counts_temp, title, ylabel]~~
465		-
466		-
467		~~-"""~~
468		-
469		~~-CLASS :: ^BannerLPReporting^~~
470		-
471		~~-This subclass handles reporting on banners and landing pages for the fundraiser.~~
472		-
473		~~-"""~~
474		-
475		~~-class BannerLPReporting(FundraiserReporting):~~
476		-
477		-
478		~~- def __init__(self, *args):~~
479		-
480		~~- if len(args) == 2:~~
481		~~- self.campaign = args[0]~~
482		~~- self.start_time = args[1]~~
483		~~- else:~~
484		~~- self.campaign = None~~
485		~~- self.start_time = None~~
486		-
487		~~- def run_query(self,start_time, end_time, campaign, query_name, metric_name):~~
488		-
489		~~- self.init_db()~~
490		-
491		~~- query_obj = qs.QueryStore()~~
492		-
493		~~- metric_lists = mh.AutoVivification()~~
494		~~- time_lists = mh.AutoVivification()~~
495		~~- # table_data = [] # store the results in a table for reporting~~
496		-
497		~~- # Load the SQL File & Format~~
498		~~- filename = self._sql_path_ + query_name + '.sql'~~
499		~~- sql_stmnt = mh.read_sql(filename)~~
500		-
501		~~- query_name = 'report_bannerLP_metrics' # rename query to work with query store~~
502		~~- sql_stmnt = query_obj.format_query(query_name, sql_stmnt, [start_time, end_time, campaign])~~
503		-
504		~~- key_index = query_obj.get_banner_index(query_name)~~
505		~~- time_index = query_obj.get_time_index(query_name)~~
506		~~- metric_index = query_obj.get_metric_index(query_name, metric_name)~~
507		-
508		~~- # Composes the data for each banner~~
509		~~- try:~~
510		~~- err_msg = sql_stmnt~~
511		~~- self.cur.execute(sql_stmnt)~~
512		-
513		~~- results = self.cur.fetchall()~~
514		-
515		~~- # Compile Table Data~~
516		~~- # cpRow = self.listify(row)~~
517		~~- # table_data.append(cpRow)~~
518		-
519		~~- for row in results:~~
520		-
521		~~- key_name = row[key_index]~~
522		-
523		~~- try:~~
524		~~- metric_lists[key_name].append(row[metric_index])~~
525		~~- time_lists[key_name].append(row[time_index])~~
526		~~- except:~~
527		~~- metric_lists[key_name] = list()~~
528		~~- time_lists[key_name] = list()~~
529		-
530		~~- metric_lists[key_name].append(row[metric_index])~~
531		~~- time_lists[key_name].append(row[time_index])~~
532		-
533		~~- except:~~
534		~~- self.db.rollback()~~
535		~~- sys.exit("Database Interface Exception:\n" + err_msg)~~
536		-
537		~~- """ Convert Times to Integers """~~
538		~~- # Find the earliest date~~
539		~~- max_i = 0~~
540		-
541		~~- for key in time_lists.keys():~~
542		~~- for date_str in time_lists[key]:~~
543		~~- day_int = int(date_str[8:10])~~
544		~~- hr_int = int(date_str[11:13])~~
545		~~- date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13])~~
546		~~- if date_int > max_i:~~
547		~~- max_i = date_int~~
548		~~- max_day = day_int~~
549		~~- max_hr = hr_int~~
550		-
551		-
552		~~- # Normalize dates~~
553		~~- time_norm = mh.AutoVivification()~~
554		~~- for key in time_lists.keys():~~
555		~~- for date_str in time_lists[key]:~~
556		~~- day = int(date_str[8:10])~~
557		~~- hr = int(date_str[11:13])~~
558		~~- # date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13])~~
559		~~- elem = (day - max_day) * 24 + (hr - max_hr)~~
560		~~- try:~~
561		~~- time_norm[key].append(elem)~~
562		~~- except:~~
563		~~- time_norm[key] = list()~~
564		~~- time_norm[key].append(elem)~~
565		-
566		~~- # smooth out the values~~
567		~~- #window_length = 20~~
568		~~- #for banner in metric_lists.keys():~~
569		~~- # metric_lists[banner] = smooth(metric_lists[banner], window_length)~~
570		-
571		~~- self.close_db()~~
572		-
573		~~- # return [metric_lists, time_norm, table_data]~~
574		~~- return [metric_lists, time_norm]~~
575		-
576		-
577		~~- def gen_plot(self,counts, times, title, xlabel, ylabel, ranges, subplot_index, fname):~~
578		~~- pylab.subplot(subplot_index)~~
579		~~- pylab.figure(num=None,figsize=[26,14])~~
580		~~- count_keys = counts.keys()~~
581		-
582		~~- line_types = ['b-o','g-o','r-o','c-o','m-o','k-o','y-o','b--d','g--d','r--d','c--d','m--d','k--d','y--d','b-.s','g-.s','r-.s','c-.s','m-.s','k-.s','y-.s']~~
583		-
584		~~- count = 0~~
585		~~- for key in counts.keys():~~
586		~~- pylab.plot(times[key], counts[key], line_types[count])~~
587		~~- count = count + 1~~
588		-
589		~~- pylab.grid()~~
590		~~- pylab.xlim(ranges[0], ranges[1])~~
591		~~- pylab.legend(count_keys,loc=2)~~
592		-
593		~~- pylab.xlabel(xlabel)~~
594		~~- pylab.ylabel(ylabel)~~
595		-
596		~~- pylab.title(title)~~
597		~~- pylab.savefig(fname, format='png')~~
598		-
599		-
600		~~- """~~
601		-
602		~~- type = 'LP' \|\| 'BAN' \|\| 'BAN-TEST' \|\| 'LP-TEST'~~
603		-
604		~~- """~~
605		~~- def run(self, type, metric_name):~~
606		-
607		~~- # Current date & time~~
608		~~- now = datetime.datetime.now()~~
609		~~- #UTC = 8~~
610		~~- #delta = datetime.timedelta(hours=UTC)~~
611		~~- #now = now + delta~~
612		-
613		~~- """ ESTABLISH THE START TIME TO PULL ANALYTICS - TS format=1, TS resolution=1 """~~
614		~~- hours_back = 24~~
615		~~- times = self.gen_date_strings(now, hours_back,1,1)~~
616		-
617		~~- start_time = times[0]~~
618		~~- end_time = times[1]~~
619		-
620		~~- print '\nGenerating ' + type +' for ' + str(hours_back) + ' hours back. The start and end times are: ' + start_time + ' - ' + end_time +' ... \n'~~
621		-
622		~~- if type == 'LP':~~
623		~~- query_name = 'report_LP_metrics'~~
624		-
625		~~- # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign~~
626		~~- if self.campaign == None:~~
627		~~- campaign = '[0-9](JA\|SA\|EA\|TY)[0-9]'~~
628		~~- else:~~
629		~~- campaign = self.campaign~~
630		-
631		~~- title = metric_name + ': ' + start_time + ' -- ' + end_time~~
632		~~- fname = query_name + '_' + metric_name + '.png'~~
633		~~- elif type == 'BAN':~~
634		~~- query_name = 'report_banner_metrics'~~
635		-
636		~~- # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign~~
637		~~- if self.campaign == None:~~
638		~~- campaign = '[0-9](JA\|SA\|EA\|TY)[0-9]'~~
639		~~- else:~~
640		~~- campaign = self.campaign~~
641		-
642		~~- title = metric_name + ': ' + start_time + ' -- ' + end_time~~
643		~~- fname = query_name + '_' + metric_name + '.png'~~
644		~~- elif type == 'BAN-TEST':~~
645		~~- r = self.get_latest_campaign()~~
646		~~- query_name = 'report_banner_metrics'~~
647		-
648		~~- # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign~~
649		~~- if self.campaign == None:~~
650		~~- campaign = r[0]~~
651		~~- start_time = r[1]~~
652		~~- else:~~
653		~~- campaign = self.campaign~~
654		~~- start_time = self.start_time~~
655		-
656		~~- title = metric_name + ': ' + start_time + ' -- ' + end_time + ', CAMPAIGN =' + campaign~~
657		~~- fname = query_name + '_' + metric_name + '_latest' + '.png'~~
658		~~- elif type == 'LP-TEST':~~
659		~~- r = self.get_latest_campaign()~~
660		~~- query_name = 'report_LP_metrics'~~
661		-
662		~~- # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign~~
663		~~- if self.campaign == None:~~
664		~~- campaign = r[0]~~
665		~~- start_time = r[1]~~
666		~~- else:~~
667		~~- campaign = self.campaign~~
668		~~- start_time = self.start_time~~
669		-
670		~~- title = metric_name + ': ' + start_time + ' -- ' + end_time + ', CAMPAIGN =' + campaign~~
671		~~- fname = query_name + '_' + metric_name + '_latest' + '.png'~~
672		~~- else:~~
673		~~- sys.exit("Invalid type name - must be 'LP' or 'BAN'.")~~
674		-
675		~~- return_val = self.run_query(start_time, end_time, campaign, query_name, metric_name)~~
676		~~- metrics = return_val[0]~~
677		~~- times = return_val[1]~~
678		-
679		~~- # title = metric_name + ': ' + start_time + ' -- ' + end_time~~
680		~~- xlabel = 'Time - Hours'~~
681		~~- ylabel = metric_name~~
682		~~- subplot_index = 111~~
683		-
684		~~- min_time = 99~~
685		~~- for key in times.keys():~~
686		~~- min_elem = min(times[key])~~
687		~~- if min_elem < min_time:~~
688		~~- min_time = min_elem~~
689		-
690		~~- ranges = [min_time, 0]~~
691		-
692		~~- self.gen_plot(metrics, times, title, xlabel, ylabel, ranges, subplot_index, fname)~~
693		-
694		~~- return [metrics, times]~~
695		-
696		-
697		~~- def get_latest_campaign(self):~~
698		-
699		~~- query_name = 'report_latest_campaign'~~
700		~~- self.init_db()~~
701		-
702		~~- """ Look at campaigns over the past 24 hours - TS format=1, TS resolution=1 """~~
703		~~- now = datetime.datetime.now()~~
704		~~- hours_back = 72~~
705		~~- times = self.gen_date_strings(now, hours_back,1,1)~~
706		-
707		~~- query_obj = qs.QueryStore()~~
708		~~- sql_stmnt = mh.read_sql('./sql/report_latest_campaign.sql')~~
709		~~- sql_stmnt = query_obj.format_query(query_name, sql_stmnt, [times[0]])~~
710		-
711		~~- campaign_index = query_obj.get_campaign_index(query_name)~~
712		~~- time_index = query_obj.get_time_index(query_name)~~
713		-
714		~~- try:~~
715		~~- err_msg = sql_stmnt~~
716		~~- self.cur.execute(sql_stmnt)~~
717		-
718		~~- row = self.cur.fetchone()~~
719		~~- except:~~
720		~~- self.db.rollback()~~
721		~~- sys.exit("Database Interface Exception:\n" + err_msg)~~
722		-
723		~~- campaign = row[campaign_index]~~
724		~~- timestamp = row[time_index]~~
725		-
726		~~- self.close_db()~~
727		-
728		~~- return [campaign, timestamp]~~
729		-
730		~~- """~~
731		-
732		~~- Takes as input and converts it to a set of hours counting back from 0~~
733		-
734		~~- time_lists - a dictionary of timestamp lists~~
735		~~- time_norm - a dictionary of normalized times~~
736		-
737		~~- """~~
738		~~- def normalize_timestamps(self, time_lists):~~
739		~~- # Find the earliest date~~
740		~~- max_i = 0~~
741		-
742		~~- for key in time_lists.keys():~~
743		~~- for date_str in time_lists[key]:~~
744		~~- day_int = int(date_str[8:10])~~
745		~~- hr_int = int(date_str[11:13])~~
746		~~- date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13])~~
747		~~- if date_int > max_i:~~
748		~~- max_i = date_int~~
749		~~- max_day = day_int~~
750		~~- max_hr = hr_int~~
751		-
752		-
753		~~- # Normalize dates~~
754		~~- time_norm = mh.AutoVivification()~~
755		~~- for key in time_lists.keys():~~
756		~~- for date_str in time_lists[key]:~~
757		~~- day = int(date_str[8:10])~~
758		~~- hr = int(date_str[11:13])~~
759		~~- # date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13])~~
760		~~- elem = (day - max_day) * 24 + (hr - max_hr)~~
761		~~- try:~~
762		~~- time_norm[key].append(elem)~~
763		~~- except:~~
764		~~- time_norm[key] = list()~~
765		~~- time_norm[key].append(elem)~~
766		-
767		~~- return time_norm~~
768		-
769		~~-"""~~
770		-
771		~~-CLASS :: ^MinerReporting^~~
772		-
773		~~-This subclass handles reporting on raw values imported into the database.~~
774		-
775		~~-"""~~
776		-
777		~~-class MinerReporting(FundraiserReporting):~~
778		-
779		~~- def run_query(self, start_time, end_time, query_name):~~
780		-
781		~~- self.init_db()~~
782		-
783		~~- query_obj = qs.QueryStore()~~
784		-
785		~~- counts = list()~~
786		~~- times = list()~~
787		-
788		~~- # Load the SQL File & Format~~
789		~~- filename = self._sql_path_+ query_name + '.sql'~~
790		~~- sql_stmnt = mh.read_sql(filename)~~
791		-
792		~~- sql_stmnt = query_obj.format_query(query_name, sql_stmnt, [start_time, end_time])~~
793		~~- #print sql_stmnt~~
794		-
795		~~- # Get Indexes into Query~~
796		~~- count_index = query_obj.get_count_index(query_name)~~
797		~~- time_index = query_obj.get_time_index(query_name)~~
798		-
799		~~- # Composes the data for each banner~~
800		~~- try:~~
801		~~- err_msg = sql_stmnt~~
802		~~- self.cur.execute(sql_stmnt)~~
803		-
804		~~- results = self.cur.fetchall()~~
805		-
806		~~- for row in results:~~
807		~~- counts.append(row[count_index])~~
808		~~- times.append(row[time_index])~~
809		-
810		~~- except:~~
811		~~- self.db.rollback()~~
812		~~- sys.exit("Database Interface Exception:\n" + err_msg)~~
813		-
814		~~- """ Convert Times to Integers """~~
815		~~- time_norm = self.normalize_timestamps(times)~~
816		-
817		-
818		~~- self.close_db()~~
819		-
820		~~- return [counts, time_norm]~~
821		-
822		-
823		~~- # Create histograms for hourly counts~~
824		-
825		~~- def gen_plot(self,counts, times, title, xlabel, ylabel, ranges, subplot_index, fname):~~
826		-
827		~~- pylab.subplot(subplot_index)~~
828		~~- pylab.figure(num=None,figsize=[26,14])~~
829		-
830		~~- # pylab.plot(times, counts)~~
831		~~- # pylab.hist(counts, times)~~
832		~~- pylab.bar(times, counts, width=0.5)~~
833		-
834		~~- pylab.grid()~~
835		~~- pylab.xlim(ranges[0], ranges[1])~~
836		-
837		~~- pylab.xlabel(xlabel)~~
838		~~- pylab.ylabel(ylabel)~~
839		-
840		~~- pylab.title(title)~~
841		~~- pylab.savefig(fname, format='png')~~
842		-
843		~~- def run(self, query_name):~~
844		-
845		~~- query_obj = qs.QueryStore()~~
846		-
847		~~- # Current date & time~~
848		~~- now = datetime.datetime.now()~~
849		~~- #UTC = 8~~
850		~~- #delta = datetime.timedelta(hours=UTC)~~
851		~~- #now = now + delta~~
852		-
853		~~- """ ESTABLISH THE START TIME TO PULL ANALYTICS - TS format=1, TS resolution=1 """~~
854		~~- hours_back = 24~~
855		~~- times = self.gen_date_strings_hr(now, hours_back,1,1)~~
856		-
857		~~- start_time = times[0]~~
858		~~- end_time = times[1]~~
859		-
860		~~- print '\nGenerating ' + query_name +', start and end times are: ' + start_time + ' - ' + end_time +' ... \n'~~
861		-
862		~~- # Run Query~~
863		~~- return_val = self.run_query(start_time, end_time, query_name)~~
864		~~- counts = return_val[0]~~
865		~~- times = return_val[1]~~
866		-
867		~~- # Normalize times~~
868		~~- min_time = min(times)~~
869		~~- ranges = [min_time, 0]~~
870		-
871		~~- xlabel = 'Hours'~~
872		~~- subplot_index = 111~~
873		~~- fname = query_name + '.png'~~
874		-
875		~~- title = query_obj.get_plot_title(query_name)~~
876		~~- title = title + ' -- ' + start_time + ' - ' + end_time~~
877		~~- ylabel = query_obj.get_plot_ylabel(query_name)~~
878		-
879		~~- # Convert counts to float (from Decimal) to prevent exception when bar plotting~~
880		~~- # Bbox::update_numerix_xy expected numerix array~~
881		~~- counts_new = list()~~
882		~~- for i in range(len(counts)):~~
883		~~- counts_new.append(float(counts[i]))~~
884		~~- counts = counts_new~~
885		-
886		~~- # Generate Histogram~~
887		~~- self.gen_plot(counts, times, title, xlabel, ylabel, ranges, subplot_index, fname)~~
888		-
889		-
890		~~-"""~~
891		-
892		~~-CLASS :: IntervalReporting~~
893		-
894		~~-Performs queries that take timestamps, query, and an interval as arguments. Data for a single metric~~
895		~~-is generated for each time interval in the time period defined by the start and end timestamps.~~
896		-
897		~~-Types of queries supported:~~
898		-
899		~~-report_banner_metrics_minutely~~
900		~~-report_LP_metrics_minutely~~
901		-
902		~~-"""~~
903		-
904		~~-class IntervalReporting(FundraiserReporting):~~
905		-
906		~~- def run_query(self, start_time, end_time, interval, query_name, metric_name, campaign):~~
907		-
908		~~- self.init_db()~~
909		-
910		~~- query_obj = qs.QueryStore()~~
911		-
912		~~- metrics = mh.AutoVivification()~~
913		~~- times = mh.AutoVivification()~~
914		~~- times_norm = mh.AutoVivification()~~
915		-
916		~~- """ Compose datetime objects to represent the first and last intervals """~~
917		~~- start_time_obj = self.timestamp_to_obj(start_time, 1)~~
918		~~- start_time_obj = start_time_obj.replace(minute=int(math.floor(start_time_obj.minute / interval) * interval))~~
919		~~- start_time_obj_str = self.timestamp_from_obj(start_time_obj, 1, 3)~~
920		-
921		~~- end_time_obj = self.timestamp_to_obj(end_time, 1)~~
922		~~- # end_time_obj = end_time_obj + datetime.timedelta(seconds=-1)~~
923		~~- end_time_obj = end_time_obj.replace(minute=int(math.floor(end_time_obj.minute / interval) * interval))~~
924		~~- end_time_obj_str = self.timestamp_from_obj(end_time_obj, 1, 3)~~
925		-
926		~~- """ The start time for the impression portion of the query should be one second less"""~~
927		-
928		~~- imp_start_time_obj = start_time_obj + datetime.timedelta(seconds=-1)~~
929		~~- imp_start_time_obj_str = self.timestamp_from_obj(imp_start_time_obj, 1, 3)~~
930		-
931		~~- """ Load the SQL File & Format """~~
932		~~- filename = self._sql_path_+ query_name + '.sql'~~
933		~~- sql_stmnt = mh.read_sql(filename)~~
934		-
935		~~- sql_stmnt = query_obj.format_query(query_name, sql_stmnt, [start_time, end_time, campaign, interval, imp_start_time_obj_str])~~
936		~~- # print sql_stmnt~~
937		-
938		~~- """ Get Indexes into Query """~~
939		~~- key_index = query_obj.get_banner_index(query_name)~~
940		~~- metric_index = query_obj.get_metric_index(query_name, metric_name)~~
941		~~- time_index = query_obj.get_time_index(query_name)~~
942		-
943		~~- """ Compose the data for each separate donor pipeline artifact """~~
944		~~- try:~~
945		~~- err_msg = sql_stmnt~~
946		~~- self.cur.execute(sql_stmnt)~~
947		-
948		~~- results = self.cur.fetchall()~~
949		~~- final_time = dict() # stores the last timestamp seen~~
950		~~- interval_obj = datetime.timedelta(minutes=interval) # timedelta object used to shift times by _interval_ minutes~~
951		-
952		~~- for row in results:~~
953		-
954		~~- key_name = row[key_index]~~
955		~~- time_obj = self.timestamp_to_obj(row[time_index], 1) # format = 1, 14-digit TS~~
956		-
957		~~- """ For each new dictionary index by key name start a new list if its not already there """~~
958		~~- try:~~
959		~~- metrics[key_name].append(row[metric_index])~~
960		~~- times[key_name].append(time_obj + interval_obj)~~
961		~~- final_time[key_name] = row[time_index]~~
962		~~- except:~~
963		~~- metrics[key_name] = list()~~
964		~~- times[key_name] = list()~~
965		-
966		~~- """ If the first element is not the start time add it~~
967		~~- this will be the case if there is no data for the first interval~~
968		~~- NOTE: two datapoints are added at the beginning to define the first interval """~~
969		~~- if start_time_obj_str != row[time_index]:~~
970		~~- times[key_name].append(start_time_obj)~~
971		~~- metrics[key_name].append(0.0)~~
972		-
973		~~- times[key_name].append(start_time_obj + interval_obj)~~
974		~~- metrics[key_name].append(0.0)~~
975		~~- else:~~
976		~~- metrics[key_name].append(row[metric_index])~~
977		~~- times[key_name].append(time_obj)~~
978		-
979		~~- metrics[key_name].append(row[metric_index])~~
980		~~- times[key_name].append(time_obj + interval_obj)~~
981		-
982		-
983		~~- except:~~
984		~~- self.db.rollback()~~
985		~~- sys.exit("Database Interface Exception:\n" + err_msg)~~
986		-
987		-
988		-
989		~~- """ Ensure that the last time in the list is the endtime less the interval """~~
990		-
991		~~- for key in times.keys():~~
992		~~- if final_time[key_name] != end_time_obj_str:~~
993		~~- times[key].append(end_time_obj)~~
994		~~- metrics[key].append(0.0)~~
995		-
996		~~- self.close_db()~~
997		-
998		~~- return [metrics, times]~~
999		-
1000		~~- """~~
1001		~~- Execute reporting query and generate plots~~
1002		~~- """~~
1003		-
1004		~~- """~~
1005		~~- Execute reporting query and generate plots~~
1006		~~- """~~
1007		~~- def gen_plot(self, metrics, times, title, xlabel, ylabel, ranges, subplot_index, fname):~~
1008		-
1009		~~- pylab.subplot(subplot_index)~~
1010		~~- pylab.figure(num=None,figsize=[26,14])~~
1011		-
1012		~~- line_types = ['b-o','g-o','r-o','c-o','m-o','k-o','y-o','b--d','g--d','r--d','c--d','m--d','k--d','y--d','b-.s','g-.s','r-.s','c-.s','m-.s','k-.s','y-.s']~~
1013		-
1014		~~- count = 0~~
1015		~~- for key in metrics.keys():~~
1016		~~- pylab.step(times[key], metrics[key], line_types[count])~~
1017		~~- count = count + 1~~
1018		-
1019		~~- pylab.grid()~~
1020		~~- pylab.xlim(ranges[0], ranges[1])~~
1021		~~- pylab.ylim(ranges[2], ranges[3])~~
1022		~~- pylab.legend(metrics.keys(),loc=2)~~
1023		-
1024		~~- pylab.xlabel(xlabel)~~
1025		~~- pylab.ylabel(ylabel)~~
1026		-
1027		~~- pylab.title(title)~~
1028		~~- pylab.savefig(fname, format='png')~~
1029		-
1030		-
1031		~~- """~~
1032		~~- Execute reporting query and generate plots~~
1033		~~- """~~
1034		~~- def run(self, start_time, end_time, interval, query_name, metric_name, campaign):~~
1035		-
1036		~~- query_obj = qs.QueryStore()~~
1037		-
1038		~~- print '\nGenerating ' + query_name +', start and end times are: ' + start_time + ' - ' + end_time +' ... \n'~~
1039		-
1040		~~- # Run Query~~
1041		~~- return_val = self.run_query(start_time, end_time, interval, query_name, metric_name, campaign)~~
1042		~~- counts = return_val[0]~~
1043		~~- times = return_val[1]~~
1044		-
1045		~~- """ Convert Times to Integers that indicate relative times AND normalize the intervals in case any are missing """~~
1046		~~- for key in times.keys():~~
1047		~~- times[key] = self.normalize_timestamps(times[key], False, 2)~~
1048		~~- times[key], counts[key] = self.normalize_intervals(times[key], counts[key], interval)~~
1049		-
1050		~~- # Normalize times~~
1051		~~- min_time = min(times)~~
1052		~~- ranges = [min_time, 0]~~
1053		-
1054		~~- xlabel = 'MINUTES'~~
1055		~~- subplot_index = 111~~
1056		~~- fname = campaign + ' ' + query_name + ' ' + metric_name + '.png'~~
1057		-
1058		~~- metric_full_name = query_obj.get_metric_full_name(metric_name)~~
1059		~~- title = campaign + ': ' + metric_full_name + ' -- ' + start_time + ' - ' + end_time~~
1060		~~- ylabel = metric_full_name~~
1061		-
1062		~~- """ Convert counts to float (from Decimal) to prevent exception when bar plotting~~
1063		~~- Bbox::update_numerix_xy expected numerix array """~~
1064		~~- for key in counts.keys():~~
1065		~~- counts_new = list()~~
1066		~~- for i in range(len(counts[key])):~~
1067		~~- counts_new.append(float(counts[key][i]))~~
1068		~~- counts[key] = counts_new~~
1069		-
1070		~~- """ Determine List maximums """~~
1071		~~- times_max = 0~~
1072		~~- metrics_max = 0~~
1073		-
1074		~~- for key in counts.keys():~~
1075		~~- list_max = max(counts[key])~~
1076		~~- if list_max > metrics_max:~~
1077		~~- metrics_max = list_max~~
1078		-
1079		~~- for key in times.keys():~~
1080		~~- list_max = max(times[key])~~
1081		~~- if list_max > times_max:~~
1082		~~- times_max = list_max~~
1083		-
1084		~~- ranges = list()~~
1085		~~- ranges.append(0.0)~~
1086		~~- ranges.append(times_max * 1.1)~~
1087		~~- ranges.append(0.0)~~
1088		~~- ranges.append(metrics_max * 1.1)~~
1089		-
1090		-
1091		~~- """ Generate plots given data """~~
1092		~~- self.gen_plot(counts, times, title, xlabel, ylabel, ranges, subplot_index, fname)~~
1093		-
1094		-
Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/query_store.py
—	—	@@ -1,437 +0,0 @@
2		-
3		~~-"""~~
4		-
5		~~-Class that contains and organizes query info. Depends on the contents of ../sql/ where filenames are~~
6		~~-coupled with query_name parameters~~
7		-
8		~~-"""~~
9		-
10		~~-__author__ = "Ryan Faulkner"~~
11		~~-__revision__ = "$Rev$"~~
12		~~-__date__ = "November 28th, 2010"~~
13		-
14		-
15		~~-"""~~
16		-
17		~~- CLASS :: QueryStore~~
18		-
19		-
20		-
21		~~- METHODS:~~
22		-
23		~~- format_query~~
24		~~- get_query~~
25		~~- get_query_header~~
26		~~- get_key_index~~
27		~~- get_count_index~~
28		~~- get_time_index~~
29		~~- get_campaign_index~~
30		~~- get_banner_index~~
31		~~- get_landing_page_index~~
32		~~- get_metric_index~~
33		~~- get_plot_title~~
34		~~- get_plot_ylabel~~
35		~~- get_metric_full_name~~
36		-
37		~~-"""~~
38		~~-class QueryStore:~~
39		-
40		~~- def format_query(self, query_name, sql_stmnt, args):~~
41		-
42		~~- if query_name == 'report_campaign_ecomm':~~
43		~~- start_time = args[0]~~
44		~~- sql_stmnt = sql_stmnt % (start_time)~~
45		-
46		~~- elif query_name == 'report_campaign_logs':~~
47		~~- start_time = args[0]~~
48		~~- sql_stmnt = sql_stmnt % (start_time, start_time, start_time)~~
49		-
50		~~- elif query_name == 'report_campaign_ecomm_by_hr':~~
51		~~- start_time = args[0]~~
52		~~- sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time)~~
53		-
54		~~- elif query_name == 'report_campaign_logs_by_hr':~~
55		~~- start_time = args[0]~~
56		~~- sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time, '%', '%', '%', '%', \~~
57		~~- start_time, '%', '%', '%', '%', start_time, '%')~~
58		-
59		~~- elif query_name == 'report_impressions_country':~~
60		~~- start_time = args[0]~~
61		~~- sql_stmnt = sql_stmnt % ('%', '%', '%', start_time)~~
62		-
63		~~- elif query_name == 'report_campaign_logs_by_min':~~
64		~~- start_time = args[0]~~
65		~~- sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time, '%', '%', '%', '%', \~~
66		~~- start_time, '%', '%', '%', '%', start_time)~~
67		-
68		~~- elif query_name == 'report_non_US_clicks':~~
69		~~- start_time = args[0]~~
70		~~- sql_stmnt = sql_stmnt % ('%', '%', '%', start_time, '%', '%', '%', start_time)~~
71		-
72		~~- elif query_name == 'report_contribution_tracking':~~
73		~~- start_time = args[0]~~
74		~~- sql_stmnt = sql_stmnt % ('%', '%', '%', '%', '%',start_time)~~
75		-
76		~~- elif query_name == 'report_total_amounts_by_hr':~~
77		~~- start_time = args[0]~~
78		~~- end_time = args[1]~~
79		~~- sql_stmnt = sql_stmnt % ('%', '%', '%', ' %H', start_time, end_time)~~
80		-
81		~~- elif query_name == 'report_total_amounts_by_day':~~
82		~~- start_time = args[0]~~
83		~~- end_time = args[1]~~
84		~~- sql_stmnt = sql_stmnt % ('%', '%', '%', '', start_time, end_time)~~
85		-
86		~~- elif query_name == 'report_bannerLP_metrics':~~
87		~~- start_time = args[0]~~
88		~~- end_time = args[1]~~
89		~~- campaign = args[2]~~
90		- sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time, end_time, '%', '%', '%', '%', start_time, end_time, '%', '%', '%', '%', start_time, end_time, campaign, '%', '%', '%', '%', start_time, end_time, campaign)
91		-
92		~~- elif query_name == 'report_latest_campaign':~~
93		~~- start_time = args[0]~~
94		~~- sql_stmnt = sql_stmnt % (start_time)~~
95		-
96		~~- elif query_name == 'report_confidence_banner':~~
97		~~- start = args[0]~~
98		~~- end = args[1]~~
99		~~- cmpgn = args[2]~~
100		~~- banner = args[3]~~
101		~~- sql_stmnt = sql_stmnt % ('%','%','%','%','10','10', start, end, banner, '%','%','%','%','10','10', start, end, cmpgn, banner, \~~
102		~~- '%','%','%','%','10','10', start, end, cmpgn, banner)~~
103		-
104		~~- elif query_name == 'report_confidence_lp':~~
105		~~- start = args[0]~~
106		~~- end = args[1]~~
107		~~- cmpgn = args[2]~~
108		~~- banner = args[3]~~
109		~~- sql_stmnt = sql_stmnt % ('%','%','%','%','10','10', start, end, cmpgn, banner, \~~
110		~~- '%','%','%','%','10','10', start, end, cmpgn, banner)~~
111		-
112		~~- elif query_name == 'report_banner_impressions_by_hour':~~
113		~~- start = args[0]~~
114		~~- end = args[1]~~
115		~~- sql_stmnt = sql_stmnt % ('%','%','%','%', start, end)~~
116		-
117		~~- elif query_name == 'report_lp_views_by_hour':~~
118		~~- start = args[0]~~
119		~~- end = args[1]~~
120		~~- sql_stmnt = sql_stmnt % ('%','%','%','%', start, end)~~
121		-
122		~~- elif query_name == 'report_banner_confidence':~~
123		~~- start = args[0]~~
124		~~- end = args[1]~~
125		~~- banner = args[2]~~
126		~~- campaign = args[3]~~
127		~~- sql_stmnt = sql_stmnt % (start, end, banner, start, end, campaign, start, end, banner, start, end, campaign, banner)~~
128		-
129		~~- elif query_name == 'report_LP_confidence':~~
130		~~- start = args[0]~~
131		~~- end = args[1]~~
132		~~- lp = args[2]~~
133		~~- campaign = args[3]~~
134		~~- sql_stmnt = sql_stmnt % (start, end, campaign, lp, start, end, campaign, lp)~~
135		-
136		~~- elif query_name == 'report_bannerLP_confidence':~~
137		~~- start = args[0]~~
138		~~- end = args[1]~~
139		~~- banner = args[2]~~
140		~~- lp = args[3]~~
141		~~- campaign = args[4]~~
142		~~- sql_stmnt = sql_stmnt % (start, end, banner, start, end, banner, campaign, start, end, banner, lp, campaign, start, end, banner, lp, campaign)~~
143		-
144		~~- elif query_name == 'report_ecomm_by_amount':~~
145		~~- start_time = args[0]~~
146		~~- end_time = args[1]~~
147		~~- sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time, end_time, end_time)~~
148		-
149		~~- elif query_name == 'report_ecomm_by_contact':~~
150		~~- where_str = args[0]~~
151		~~- sql_stmnt = sql_stmnt % ('%', '%', '%', '%', where_str)~~
152		-
153		~~- elif query_name == 'report_banner_metrics_minutely':~~
154		~~- start_time = args[0]~~
155		~~- end_time = args[1]~~
156		~~- campaign = args[2]~~
157		~~- interval = args[3]~~
158		~~- imp_start_time = args[4]~~
159		-
160		~~- sql_stmnt = sql_stmnt % ('%', '%', '%', '%', interval, interval, imp_start_time, end_time, '%', '%', '%', '%', interval, interval, start_time, end_time, campaign, \~~
161		~~- '%', '%', '%', '%', interval, interval, start_time, end_time, '%', '%', '%', '%', interval, interval, start_time, end_time, campaign)~~
162		-
163		~~- elif query_name == 'report_LP_metrics_minutely':~~
164		~~- start_time = args[0]~~
165		~~- end_time = args[1]~~
166		~~- campaign = args[2]~~
167		~~- interval = args[3]~~
168		-
169		~~- sql_stmnt = sql_stmnt % ('%', '%', '%', '%', interval, interval, start_time, end_time, campaign, '%', '%', '%', '%', interval, interval, start_time, end_time, campaign)~~
170		-
171		~~- else:~~
172		~~- return 'no such table\n'~~
173		-
174		~~- return sql_stmnt~~
175		-
176		~~- def get_query(self, query_name):~~
177		~~- if query_name == 'report_campaign_logs_by_min':~~
178		~~- return ''~~
179		~~- elif query_name == '':~~
180		~~- return ''~~
181		~~- else:~~
182		~~- return 'no such table'~~
183		-
184		~~- def get_query_header(self, query_name):~~
185		~~- if query_name == 'report_contribution_tracking':~~
186		~~- return ['Time','Banner','Landing Page','Campaign','Converted Amount', 'Suffix']~~
187		~~- elif query_name == 'report_ecomm_by_amount':~~
188		~~- return ['Timestamp','First Name','Last Name','Country','ISO Code', 'Amount', 'First Donation?', 'Date of First']~~
189		~~- elif query_name == 'report_ecomm_by_contact':~~
190		~~- return ['Timestamp','First Name','Last Name','Country','ISO Code', 'Amount']~~
191		~~- else:~~
192		~~- return 'no such table'~~
193		-
194		~~- """ Returns the index of the key for the query data """~~
195		~~- def get_key_index(self, query_name):~~
196		~~- if query_name == 'report_banner_metrics_minutely':~~
197		~~- return 1~~
198		~~- elif query_name == 'report_LP_metrics_minutely':~~
199		~~- return 1~~
200		-
201		~~- def get_count_index(self, query_name):~~
202		~~- if query_name == 'report_lp_views_by_hour':~~
203		~~- return 1~~
204		~~- elif query_name == 'report_banner_impressions_by_hour':~~
205		~~- return 1~~
206		~~- else:~~
207		~~- return -1~~
208		-
209		~~- def get_time_index(self, query_name):~~
210		~~- if query_name == 'report_campaign_logs_by_min':~~
211		~~- return 0~~
212		~~- elif query_name == 'report_campaign_logs_by_hr':~~
213		~~- return 0~~
214		~~- elif query_name == 'report_non_US_clicks':~~
215		~~- return 0~~
216		~~- elif query_name == 'report_contribution_tracking':~~
217		~~- return 0~~
218		~~- elif query_name == 'report_bannerLP_metrics':~~
219		~~- return 0~~
220		~~- elif query_name == 'report_latest_campaign':~~
221		~~- return 1~~
222		~~- elif query_name == 'report_banner_impressions_by_hour':~~
223		~~- return 0~~
224		~~- elif query_name == 'report_lp_views_by_hour':~~
225		~~- return 0~~
226		~~- elif query_name == 'report_banner_metrics_minutely':~~
227		~~- return 0~~
228		~~- elif query_name == 'report_LP_metrics_minutely':~~
229		~~- return 0~~
230		~~- else:~~
231		~~- return -1~~
232		-
233		~~- def get_campaign_index(self, query_name):~~
234		~~- if query_name == 'report_campaign_logs_by_min':~~
235		~~- return 2~~
236		~~- elif query_name == 'report_campaign_logs_by_hr':~~
237		~~- return 1~~
238		~~- elif query_name == 'report_contribution_tracking':~~
239		~~- return 3~~
240		~~- elif query_name == 'report_bannerLP_metrics':~~
241		~~- return 1~~
242		~~- elif query_name == 'report_latest_campaign':~~
243		~~- return 0~~
244		~~- else:~~
245		~~- return -1~~
246		-
247		~~- def get_banner_index(self, query_name):~~
248		~~- if query_name == 'report_campaign_logs_by_min':~~
249		~~- return 3~~
250		~~- elif query_name == 'report_campaign_logs_by_hr':~~
251		~~- return 2~~
252		~~- elif query_name == 'report_contribution_tracking':~~
253		~~- return 1~~
254		~~- elif query_name == 'report_bannerLP_metrics':~~
255		~~- return 1~~
256		~~- elif query_name == 'report_banner_metrics_minutely':~~
257		~~- return 1~~
258		~~- else:~~
259		~~- return -1~~
260		-
261		~~- def get_landing_page_index(self, query_name):~~
262		~~- if query_name == 'report_campaign_logs_by_min':~~
263		~~- return 4~~
264		~~- elif query_name == 'report_campaign_logs_by_hr':~~
265		~~- return 3~~
266		~~- elif query_name == 'report_non_US_clicks':~~
267		~~- return 2~~
268		~~- elif query_name == 'report_contribution_tracking':~~
269		~~- return 2~~
270		~~- elif query_name == 'report_bannerLP_metrics':~~
271		~~- return 1~~
272		~~- elif query_name == 'report_LP_metrics_minutely':~~
273		~~- return 1~~
274		~~- else:~~
275		~~- return -1~~
276		-
277		~~- def get_metric_index(self, query_name, metric_name):~~
278		~~- if query_name == 'report_campaign_logs_by_min':~~
279		~~- if metric_name == 'click_rate':~~
280		~~- return 9~~
281		~~- elif query_name == 'report_campaign_logs_by_hr':~~
282		~~- if metric_name == 'click_rate':~~
283		~~- return 8~~
284		~~- elif query_name == 'report_contribution_tracking':~~
285		~~- if metric_name == 'converted_amount':~~
286		~~- return 4~~
287		~~- elif query_name == 'report_bannerLP_metrics':~~
288		~~- if metric_name == 'total_impressions':~~
289		~~- return 2~~
290		~~- elif metric_name == 'impressions':~~
291		~~- return 3~~
292		~~- elif metric_name == 'views':~~
293		~~- return 4~~
294		~~- elif metric_name == 'clicks':~~
295		~~- return 5~~
296		~~- elif metric_name == 'donations':~~
297		~~- return 6~~
298		~~- elif metric_name == 'amount':~~
299		~~- return 7~~
300		~~- elif metric_name == 'click_rate':~~
301		~~- return 8~~
302		~~- elif metric_name == 'completion_rate':~~
303		~~- return 9~~
304		~~- elif metric_name == 'don_per_imp':~~
305		~~- return 10~~
306		~~- elif metric_name == 'amt_per_imp':~~
307		~~- return 11~~
308		~~- elif metric_name == 'don_per_view':~~
309		~~- return 12~~
310		~~- elif metric_name == 'amt_per_view':~~
311		~~- return 13~~
312		~~- elif metric_name == 'amt_per_donation':~~
313		~~- return 14~~
314		~~- elif metric_name == 'max_amt':~~
315		~~- return 15~~
316		~~- elif metric_name == 'pp_don':~~
317		~~- return 16~~
318		~~- elif metric_name == 'cc_don':~~
319		~~- return 17~~
320		~~- elif metric_name == 'paypal_click_thru':~~
321		~~- return 18~~
322		~~- elif metric_name == 'creditcard_click_thru':~~
323		~~- return 19~~
324		~~- else:~~
325		~~- return -1~~
326		~~- elif query_name == 'report_banner_confidence':~~
327		~~- if metric_name == 'click_rate':~~
328		~~- return 7~~
329		~~- elif metric_name == 'don_per_imp':~~
330		~~- return 9~~
331		~~- elif metric_name == 'amt_per_imp':~~
332		~~- return 10~~
333		~~- elif metric_name == 'amt50_per_imp':~~
334		~~- return 14~~
335		~~- elif metric_name == 'amt100_per_imp':~~
336		~~- return 15~~
337		~~- else:~~
338		~~- return -1~~
339		~~- elif query_name == 'report_LP_confidence':~~
340		~~- if metric_name == 'completion_rate':~~
341		~~- return 5~~
342		~~- elif metric_name == 'don_per_view':~~
343		~~- return 6~~
344		~~- elif metric_name == 'amt_per_view':~~
345		~~- return 7~~
346		~~- elif metric_name == 'amt_per_donation':~~
347		~~- return 8~~
348		~~- elif metric_name == 'amt50_per_view':~~
349		~~- return 9~~
350		~~- elif metric_name == 'amt100_per_view':~~
351		~~- return 10~~
352		~~- else:~~
353		~~- return -1~~
354		~~- elif query_name == 'report_bannerLP_confidence':~~
355		~~- if metric_name == 'click_rate':~~
356		~~- return 7~~
357		~~- elif metric_name == 'completion_rate':~~
358		~~- return 8~~
359		~~- elif metric_name == 'don_per_imp':~~
360		~~- return 9~~
361		~~- elif metric_name == 'amt_per_imp':~~
362		~~- return 10~~
363		~~- elif metric_name == 'don_per_view':~~
364		~~- return 11~~
365		~~- elif metric_name == 'amt_per_view':~~
366		~~- return 12~~
367		~~- elif metric_name == 'amt_per_donation':~~
368		~~- return 13~~
369		~~- elif metric_name == 'amt50_per_imp':~~
370		~~- return 14~~
371		~~- elif metric_name == 'amt100_per_imp':~~
372		~~- return 15~~
373		~~- else:~~
374		~~- return -1~~
375		~~- elif query_name == 'report_LP_metrics_minutely':~~
376		~~- if metric_name == 'views':~~
377		~~- return 2~~
378		~~- elif metric_name == 'donations':~~
379		~~- return 4~~
380		~~- elif metric_name == 'amount50':~~
381		~~- return 5~~
382		~~- elif metric_name == 'don_per_view':~~
383		~~- return 7~~
384		~~- elif metric_name == 'amt50_per_view':~~
385		~~- return 9~~
386		~~- else:~~
387		~~- return -1~~
388		~~- elif query_name == 'report_banner_metrics_minutely':~~
389		~~- if metric_name == 'imp':~~
390		~~- return 2~~
391		~~- elif metric_name == 'donations':~~
392		~~- return 5~~
393		~~- elif metric_name == 'amount50':~~
394		~~- return 7~~
395		~~- elif metric_name == 'don_per_imp':~~
396		~~- return 10~~
397		~~- elif metric_name == 'amt50_per_imp':~~
398		~~- return 12~~
399		~~- else:~~
400		~~- return -1~~
401		~~- else:~~
402		~~- return 'no such table'~~
403		-
404		~~- def get_plot_title(self, query_name):~~
405		~~- if query_name == 'report_banner_impressions_by_hour':~~
406		~~- return 'Banner Impressions Over the Past 24 Hours'~~
407		~~- elif query_name == 'report_lp_views_by_hour':~~
408		~~- return 'Landing Page Views Over the Past 24 Hours'~~
409		~~- else:~~
410		~~- return 'no such table'~~
411		-
412		~~- def get_plot_ylabel(self, query_name):~~
413		~~- if query_name == 'report_banner_impressions_by_hour':~~
414		~~- return 'IMPRESSIONS'~~
415		~~- elif query_name == 'report_lp_views_by_hour':~~
416		~~- return 'VIEWS'~~
417		~~- else:~~
418		~~- return'no such table'~~
419		-
420		~~- def get_metric_full_name(self, metric_name):~~
421		~~- if metric_name == 'imp':~~
422		~~- return 'IMPRESSIONS'~~
423		~~- elif metric_name == 'view':~~
424		~~- return 'VIEWS'~~
425		~~- elif metric_name == 'don_per_imp':~~
426		~~- return 'DONATIONS PER IMPRESSION'~~
427		~~- elif metric_name == 'don_per_view':~~
428		~~- return 'DONATIONS PER VIEW'~~
429		~~- elif metric_name == 'amt50_per_imp':~~
430		~~- return 'AMOUNT50 PER IMPRESSION'~~
431		~~- elif metric_name == 'amt50_per_view':~~
432		~~- return 'AMOUNT50 PER VIEW'~~
433		~~- elif metric_name == 'amount50':~~
434		~~- return 'AMOUNT50'~~
435		~~- elif metric_name == 'donations':~~
436		~~- return 'DONATIONS'~~
437		~~- else:~~
438		~~- return'no such table'~~
\ No newline at end of file
Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/compute_confidence.py
—	—	@@ -2,9 +2,14 @@
3	3
4	4	"""
5	5
6		~~-Generates confidence estimate for a test~~
	6	+This module defines reporting and analysis for determining the statistical confidence
	7	+of of choice metrics over time.
7	8
	9	+!!MODIFY!!
8	10
	11	+- This should extend DataReporting.DataReporting
	12	+- The querying functionality should be exported to DataLoader
	13	+
9	14	"""
10	15
11	16	__author__ = "Ryan Faulkner"
—	—	@@ -33,9 +38,13 @@
34	39
35	40
36	41	METHODS:
37		-
38		-
39		-
	42	+ query_tables
	43	+ get_time_lists
	44	+ gen_plot
	45	+ run_test
	46	+ compute_parameters
	47	+ print_metrics
	48	+
40	49	"""
41	50	class ConfidenceTest(DataLoader):
42	51
Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/20101230JA091_US banner imp.png
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
Property changes on: trunk/fundraiser-statistics/fundraiser-scripts/classes/20101230JA091_US banner imp.png
___________________________________________________________________
Added: svn:mime-type
43	52	+ application/octet-stream
Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/QueryData.py
—	—	@@ -0,0 +1,431 @@
	2	+
	3	+"""
	4	+
	5	+This module effectively functions as a Singleton class.
	6	+
	7	+This module contains and organizes query info. Depends on the contents of ../sql/ where filenames are
	8	+coupled with query_name parameters
	9	+
	10	+ METHODS:
	11	+
	12	+ format_query
	13	+ get_query
	14	+ get_query_header
	15	+ get_key_index
	16	+ get_count_index
	17	+ get_time_index
	18	+ get_campaign_index
	19	+ get_banner_index
	20	+ get_landing_page_index
	21	+ get_metric_index
	22	+ get_plot_title
	23	+ get_plot_ylabel
	24	+ get_metric_full_name
	25	+
	26	+"""
	27	+
	28	+__author__ = "Ryan Faulkner"
	29	+__revision__ = "$Rev$"
	30	+__date__ = "November 28th, 2010"
	31	+
	32	+
	33	+
	34	+def format_query(query_name, sql_stmnt, args):
	35	+
	36	+ if query_name == 'report_campaign_ecomm':
	37	+ start_time = args[0]
	38	+ sql_stmnt = sql_stmnt % (start_time)
	39	+
	40	+ elif query_name == 'report_campaign_logs':
	41	+ start_time = args[0]
	42	+ sql_stmnt = sql_stmnt % (start_time, start_time, start_time)
	43	+
	44	+ elif query_name == 'report_campaign_ecomm_by_hr':
	45	+ start_time = args[0]
	46	+ sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time)
	47	+
	48	+ elif query_name == 'report_campaign_logs_by_hr':
	49	+ start_time = args[0]
	50	+ sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time, '%', '%', '%', '%', \
	51	+ start_time, '%', '%', '%', '%', start_time, '%')
	52	+
	53	+ elif query_name == 'report_impressions_country':
	54	+ start_time = args[0]
	55	+ sql_stmnt = sql_stmnt % ('%', '%', '%', start_time)
	56	+
	57	+ elif query_name == 'report_campaign_logs_by_min':
	58	+ start_time = args[0]
	59	+ sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time, '%', '%', '%', '%', \
	60	+ start_time, '%', '%', '%', '%', start_time)
	61	+
	62	+ elif query_name == 'report_non_US_clicks':
	63	+ start_time = args[0]
	64	+ sql_stmnt = sql_stmnt % ('%', '%', '%', start_time, '%', '%', '%', start_time)
	65	+
	66	+ elif query_name == 'report_contribution_tracking':
	67	+ start_time = args[0]
	68	+ sql_stmnt = sql_stmnt % ('%', '%', '%', '%', '%',start_time)
	69	+
	70	+ elif query_name == 'report_total_amounts_by_hr':
	71	+ start_time = args[0]
	72	+ end_time = args[1]
	73	+ sql_stmnt = sql_stmnt % ('%', '%', '%', ' %H', start_time, end_time)
	74	+
	75	+ elif query_name == 'report_total_amounts_by_day':
	76	+ start_time = args[0]
	77	+ end_time = args[1]
	78	+ sql_stmnt = sql_stmnt % ('%', '%', '%', '', start_time, end_time)
	79	+
	80	+ elif query_name == 'report_bannerLP_metrics':
	81	+ start_time = args[0]
	82	+ end_time = args[1]
	83	+ campaign = args[2]
	84	+ sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time, end_time, '%', '%', '%', '%', start_time, end_time, '%', '%', '%', '%', start_time, end_time, campaign, '%', '%', '%', '%', start_time, end_time, campaign)
	85	+
	86	+ elif query_name == 'report_latest_campaign':
	87	+ start_time = args[0]
	88	+ sql_stmnt = sql_stmnt % (start_time)
	89	+
	90	+ elif query_name == 'report_confidence_banner':
	91	+ start = args[0]
	92	+ end = args[1]
	93	+ cmpgn = args[2]
	94	+ banner = args[3]
	95	+ sql_stmnt = sql_stmnt % ('%','%','%','%','10','10', start, end, banner, '%','%','%','%','10','10', start, end, cmpgn, banner, \
	96	+ '%','%','%','%','10','10', start, end, cmpgn, banner)
	97	+
	98	+ elif query_name == 'report_confidence_lp':
	99	+ start = args[0]
	100	+ end = args[1]
	101	+ cmpgn = args[2]
	102	+ banner = args[3]
	103	+ sql_stmnt = sql_stmnt % ('%','%','%','%','10','10', start, end, cmpgn, banner, \
	104	+ '%','%','%','%','10','10', start, end, cmpgn, banner)
	105	+
	106	+ elif query_name == 'report_banner_impressions_by_hour':
	107	+ start = args[0]
	108	+ end = args[1]
	109	+ sql_stmnt = sql_stmnt % ('%','%','%','%', start, end)
	110	+
	111	+ elif query_name == 'report_lp_views_by_hour':
	112	+ start = args[0]
	113	+ end = args[1]
	114	+ sql_stmnt = sql_stmnt % ('%','%','%','%', start, end)
	115	+
	116	+ elif query_name == 'report_banner_confidence':
	117	+ start = args[0]
	118	+ end = args[1]
	119	+ banner = args[2]
	120	+ campaign = args[3]
	121	+ sql_stmnt = sql_stmnt % (start, end, banner, start, end, campaign, start, end, banner, start, end, campaign, banner)
	122	+
	123	+ elif query_name == 'report_LP_confidence':
	124	+ start = args[0]
	125	+ end = args[1]
	126	+ lp = args[2]
	127	+ campaign = args[3]
	128	+ sql_stmnt = sql_stmnt % (start, end, campaign, lp, start, end, campaign, lp)
	129	+
	130	+ elif query_name == 'report_bannerLP_confidence':
	131	+ start = args[0]
	132	+ end = args[1]
	133	+ banner = args[2]
	134	+ lp = args[3]
	135	+ campaign = args[4]
	136	+ sql_stmnt = sql_stmnt % (start, end, banner, start, end, banner, campaign, start, end, banner, lp, campaign, start, end, banner, lp, campaign)
	137	+
	138	+ elif query_name == 'report_ecomm_by_amount':
	139	+ start_time = args[0]
	140	+ end_time = args[1]
	141	+ sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time, end_time, end_time)
	142	+
	143	+ elif query_name == 'report_ecomm_by_contact':
	144	+ where_str = args[0]
	145	+ sql_stmnt = sql_stmnt % ('%', '%', '%', '%', where_str)
	146	+
	147	+ elif query_name == 'report_banner_metrics_minutely':
	148	+ start_time = args[0]
	149	+ end_time = args[1]
	150	+ campaign = args[2]
	151	+ interval = args[3]
	152	+ imp_start_time = args[4]
	153	+
	154	+ sql_stmnt = sql_stmnt % ('%', '%', '%', '%', interval, interval, imp_start_time, end_time, '%', '%', '%', '%', interval, interval, start_time, end_time, campaign, \
	155	+ '%', '%', '%', '%', interval, interval, start_time, end_time, '%', '%', '%', '%', interval, interval, start_time, end_time, campaign)
	156	+
	157	+ elif query_name == 'report_LP_metrics_minutely':
	158	+ start_time = args[0]
	159	+ end_time = args[1]
	160	+ campaign = args[2]
	161	+ interval = args[3]
	162	+
	163	+ sql_stmnt = sql_stmnt % ('%', '%', '%', '%', interval, interval, start_time, end_time, campaign, '%', '%', '%', '%', interval, interval, start_time, end_time, campaign)
	164	+
	165	+ else:
	166	+ return 'no such table\n'
	167	+
	168	+ return sql_stmnt
	169	+
	170	+def get_query(query_name):
	171	+ if query_name == 'report_campaign_logs_by_min':
	172	+ return ''
	173	+ elif query_name == '':
	174	+ return ''
	175	+ else:
	176	+ return 'no such table'
	177	+
	178	+def get_query_header(query_name):
	179	+ if query_name == 'report_contribution_tracking':
	180	+ return ['Time','Banner','Landing Page','Campaign','Converted Amount', 'Suffix']
	181	+ elif query_name == 'report_ecomm_by_amount':
	182	+ return ['Timestamp','First Name','Last Name','Country','ISO Code', 'Amount', 'First Donation?', 'Date of First']
	183	+ elif query_name == 'report_ecomm_by_contact':
	184	+ return ['Timestamp','First Name','Last Name','Country','ISO Code', 'Amount']
	185	+ else:
	186	+ return 'no such table'
	187	+
	188	+""" Returns the index of the key for the query data """
	189	+def get_key_index(query_name):
	190	+ if query_name == 'report_banner_metrics_minutely':
	191	+ return 1
	192	+ elif query_name == 'report_LP_metrics_minutely':
	193	+ return 1
	194	+
	195	+def get_count_index(query_name):
	196	+ if query_name == 'report_lp_views_by_hour':
	197	+ return 1
	198	+ elif query_name == 'report_banner_impressions_by_hour':
	199	+ return 1
	200	+ else:
	201	+ return -1
	202	+
	203	+def get_time_index(query_name):
	204	+ if query_name == 'report_campaign_logs_by_min':
	205	+ return 0
	206	+ elif query_name == 'report_campaign_logs_by_hr':
	207	+ return 0
	208	+ elif query_name == 'report_non_US_clicks':
	209	+ return 0
	210	+ elif query_name == 'report_contribution_tracking':
	211	+ return 0
	212	+ elif query_name == 'report_bannerLP_metrics':
	213	+ return 0
	214	+ elif query_name == 'report_latest_campaign':
	215	+ return 1
	216	+ elif query_name == 'report_banner_impressions_by_hour':
	217	+ return 0
	218	+ elif query_name == 'report_lp_views_by_hour':
	219	+ return 0
	220	+ elif query_name == 'report_banner_metrics_minutely':
	221	+ return 0
	222	+ elif query_name == 'report_LP_metrics_minutely':
	223	+ return 0
	224	+ else:
	225	+ return -1
	226	+
	227	+def get_campaign_index(query_name):
	228	+ if query_name == 'report_campaign_logs_by_min':
	229	+ return 2
	230	+ elif query_name == 'report_campaign_logs_by_hr':
	231	+ return 1
	232	+ elif query_name == 'report_contribution_tracking':
	233	+ return 3
	234	+ elif query_name == 'report_bannerLP_metrics':
	235	+ return 1
	236	+ elif query_name == 'report_latest_campaign':
	237	+ return 0
	238	+ else:
	239	+ return -1
	240	+
	241	+def get_banner_index(query_name):
	242	+ if query_name == 'report_campaign_logs_by_min':
	243	+ return 3
	244	+ elif query_name == 'report_campaign_logs_by_hr':
	245	+ return 2
	246	+ elif query_name == 'report_contribution_tracking':
	247	+ return 1
	248	+ elif query_name == 'report_bannerLP_metrics':
	249	+ return 1
	250	+ elif query_name == 'report_banner_metrics_minutely':
	251	+ return 1
	252	+ else:
	253	+ return -1
	254	+
	255	+def get_landing_page_index(query_name):
	256	+ if query_name == 'report_campaign_logs_by_min':
	257	+ return 4
	258	+ elif query_name == 'report_campaign_logs_by_hr':
	259	+ return 3
	260	+ elif query_name == 'report_non_US_clicks':
	261	+ return 2
	262	+ elif query_name == 'report_contribution_tracking':
	263	+ return 2
	264	+ elif query_name == 'report_bannerLP_metrics':
	265	+ return 1
	266	+ elif query_name == 'report_LP_metrics_minutely':
	267	+ return 1
	268	+ else:
	269	+ return -1
	270	+
	271	+def get_metric_index(query_name, metric_name):
	272	+ if query_name == 'report_campaign_logs_by_min':
	273	+ if metric_name == 'click_rate':
	274	+ return 9
	275	+ elif query_name == 'report_campaign_logs_by_hr':
	276	+ if metric_name == 'click_rate':
	277	+ return 8
	278	+ elif query_name == 'report_contribution_tracking':
	279	+ if metric_name == 'converted_amount':
	280	+ return 4
	281	+ elif query_name == 'report_bannerLP_metrics':
	282	+ if metric_name == 'total_impressions':
	283	+ return 2
	284	+ elif metric_name == 'impressions':
	285	+ return 3
	286	+ elif metric_name == 'views':
	287	+ return 4
	288	+ elif metric_name == 'clicks':
	289	+ return 5
	290	+ elif metric_name == 'donations':
	291	+ return 6
	292	+ elif metric_name == 'amount':
	293	+ return 7
	294	+ elif metric_name == 'click_rate':
	295	+ return 8
	296	+ elif metric_name == 'completion_rate':
	297	+ return 9
	298	+ elif metric_name == 'don_per_imp':
	299	+ return 10
	300	+ elif metric_name == 'amt_per_imp':
	301	+ return 11
	302	+ elif metric_name == 'don_per_view':
	303	+ return 12
	304	+ elif metric_name == 'amt_per_view':
	305	+ return 13
	306	+ elif metric_name == 'amt_per_donation':
	307	+ return 14
	308	+ elif metric_name == 'max_amt':
	309	+ return 15
	310	+ elif metric_name == 'pp_don':
	311	+ return 16
	312	+ elif metric_name == 'cc_don':
	313	+ return 17
	314	+ elif metric_name == 'paypal_click_thru':
	315	+ return 18
	316	+ elif metric_name == 'creditcard_click_thru':
	317	+ return 19
	318	+ else:
	319	+ return -1
	320	+ elif query_name == 'report_banner_confidence':
	321	+ if metric_name == 'click_rate':
	322	+ return 7
	323	+ elif metric_name == 'don_per_imp':
	324	+ return 9
	325	+ elif metric_name == 'amt_per_imp':
	326	+ return 10
	327	+ elif metric_name == 'amt50_per_imp':
	328	+ return 14
	329	+ elif metric_name == 'amt100_per_imp':
	330	+ return 15
	331	+ else:
	332	+ return -1
	333	+ elif query_name == 'report_LP_confidence':
	334	+ if metric_name == 'completion_rate':
	335	+ return 5
	336	+ elif metric_name == 'don_per_view':
	337	+ return 6
	338	+ elif metric_name == 'amt_per_view':
	339	+ return 7
	340	+ elif metric_name == 'amt_per_donation':
	341	+ return 8
	342	+ elif metric_name == 'amt50_per_view':
	343	+ return 9
	344	+ elif metric_name == 'amt100_per_view':
	345	+ return 10
	346	+ else:
	347	+ return -1
	348	+ elif query_name == 'report_bannerLP_confidence':
	349	+ if metric_name == 'click_rate':
	350	+ return 7
	351	+ elif metric_name == 'completion_rate':
	352	+ return 8
	353	+ elif metric_name == 'don_per_imp':
	354	+ return 9
	355	+ elif metric_name == 'amt_per_imp':
	356	+ return 10
	357	+ elif metric_name == 'don_per_view':
	358	+ return 11
	359	+ elif metric_name == 'amt_per_view':
	360	+ return 12
	361	+ elif metric_name == 'amt_per_donation':
	362	+ return 13
	363	+ elif metric_name == 'amt50_per_imp':
	364	+ return 14
	365	+ elif metric_name == 'amt100_per_imp':
	366	+ return 15
	367	+ else:
	368	+ return -1
	369	+ elif query_name == 'report_LP_metrics_minutely':
	370	+ if metric_name == 'views':
	371	+ return 2
	372	+ elif metric_name == 'donations':
	373	+ return 4
	374	+ elif metric_name == 'amount50':
	375	+ return 5
	376	+ elif metric_name == 'don_per_view':
	377	+ return 7
	378	+ elif metric_name == 'amt50_per_view':
	379	+ return 9
	380	+ else:
	381	+ return -1
	382	+ elif query_name == 'report_banner_metrics_minutely':
	383	+ if metric_name == 'imp':
	384	+ return 2
	385	+ elif metric_name == 'donations':
	386	+ return 5
	387	+ elif metric_name == 'amount50':
	388	+ return 7
	389	+ elif metric_name == 'don_per_imp':
	390	+ return 10
	391	+ elif metric_name == 'amt50_per_imp':
	392	+ return 12
	393	+ else:
	394	+ return -1
	395	+ else:
	396	+ return 'no such table'
	397	+
	398	+def get_plot_title(query_name):
	399	+ if query_name == 'report_banner_impressions_by_hour':
	400	+ return 'Banner Impressions Over the Past 24 Hours'
	401	+ elif query_name == 'report_lp_views_by_hour':
	402	+ return 'Landing Page Views Over the Past 24 Hours'
	403	+ else:
	404	+ return 'no such table'
	405	+
	406	+def get_plot_ylabel(query_name):
	407	+ if query_name == 'report_banner_impressions_by_hour':
	408	+ return 'IMPRESSIONS'
	409	+ elif query_name == 'report_lp_views_by_hour':
	410	+ return 'VIEWS'
	411	+ else:
	412	+ return'no such table'
	413	+
	414	+def get_metric_full_name(metric_name):
	415	+ if metric_name == 'imp':
	416	+ return 'IMPRESSIONS'
	417	+ elif metric_name == 'view':
	418	+ return 'VIEWS'
	419	+ elif metric_name == 'don_per_imp':
	420	+ return 'DONATIONS PER IMPRESSION'
	421	+ elif metric_name == 'don_per_view':
	422	+ return 'DONATIONS PER VIEW'
	423	+ elif metric_name == 'amt50_per_imp':
	424	+ return 'AMOUNT50 PER IMPRESSION'
	425	+ elif metric_name == 'amt50_per_view':
	426	+ return 'AMOUNT50 PER VIEW'
	427	+ elif metric_name == 'amount50':
	428	+ return 'AMOUNT50'
	429	+ elif metric_name == 'donations':
	430	+ return 'DONATIONS'
	431	+ else:
	432	+ return'no such table'
\ No newline at end of file
Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/DataLoader.py
—	—	@@ -1,4 +1,15 @@
	2	+"""
2	3
	4	+This module provides access to the datasource and enables querying. The class
	5	+atructure defined has DataLoader as the base which outlines the basic members
	6	+and functionality. This interface is extended for interaction with specific
	7	+sources of data.
	8	+
	9	+These classes are used to define the data sources for the DataReporting family of
	10	+classes in an Adapter structural pattern.
	11	+
	12	+"""
	13	+
3	14	__author__ = "Ryan Faulkner"
4	15	__revision__ = "$Rev$"
5	16	__date__ = "April 8th, 2010"
—	—	@@ -6,8 +17,13 @@
7	18
8	19	import sys
9	20	import MySQLdb
	21	+import math
	22	+import datetime
	23	+
	24	+import miner_help as mh
	25	+import QueryData as QD
	26	+import TimestampProcessor as TP
10	27
11		-
12	28	"""
13	29
14	30	CLASS :: DataLoader
—	—	@@ -18,28 +34,174 @@
19	35	"""
20	36	class DataLoader(object):
21	37
22		~~- # Database and Cursor objects~~
23		~~- db = None~~
24		~~- cur = None~~
	38	+ """ Database and Cursor objects """
	39	+ _db_ = None
	40	+ _cur_ = None
	41	+ _sql_path_ = '../sql/' # Relative path for SQL files to be processed
	42	+ _query_names_ = dict()
25	43
26	44	def init_db(self):
	45	+
27	46	""" Establish connection """
28	47	#db = MySQLdb.connect(host='db10.pmtpa.wmnet', user='rfaulk', db='faulkner')
29		~~- self.db = MySQLdb.connect(host='127.0.0.1', user='rfaulk', db='faulkner', port=3307)~~
	48	+ self._db_ = MySQLdb.connect(host='127.0.0.1', user='rfaulk', db='faulkner', port=3307)
30	49	#self.db = MySQLdb.connect(host='storage3.pmtpa.wmnet', user='rfaulk', db='faulkner')
31		-
	50	+
32	51	""" Create cursor """
33		~~- self.cur = self.db.cursor()~~
	52	+ self._cur_ = self._db_.cursor()
34	53
35	54	def close_db(self):
36		~~- self.cur.close()~~
37		~~- self.db.close()~~
	55	+ self._cur_.close()
	56	+ self._db_.close()
	57	+
	58	+ """
	59	+ <DESCRIPTION>
38	60
	61	+ INPUT:
	62	+ query_type -
	63	+
	64	+
	65	+ RETURN:
	66	+ -
	67	+
	68	+ """
	69	+ def get_sql_filename_for_query(self, query_type):
	70	+ return ''
39	71
40	72	class IntervalReportingLoader(DataLoader):
	73	+
	74	+ def __init__(self):
	75	+ self._query_names_['banner'] = 'report_banner_metrics_minutely'
	76	+ self._query_names_['LP'] = 'report_LP_metrics_minutely'
	77	+
	78	+ def get_sql_filename_for_query(self, query_type):
	79	+ return self._query_names_[query_type]
	80	+
	81	+ """
	82	+ <DESCRIPTION>
	83	+
	84	+ INPUT:
	85	+ start_time -
	86	+ end_time -
	87	+ interval -
	88	+ query_type -
	89	+ metric_name -
	90	+ campaign -
	91	+
	92	+
	93	+ RETURN:
	94	+ metrics -
	95	+ times -
	96	+ """
	97	+ def run_query(self, start_time, end_time, interval, query_type, metric_name, campaign):
	98	+
	99	+ self.init_db()
	100	+
	101	+ try:
	102	+ query_name = self.get_sql_filename_for_query(query_type)
	103	+ except KeyError:
	104	+ print 'Could not find a query for type: ' + query_type
	105	+ sys.exit(2)
	106	+
	107	+ metrics = mh.AutoVivification()
	108	+ times = mh.AutoVivification()
	109	+ times_norm = mh.AutoVivification()
	110	+
	111	+ """ Compose datetime objects to represent the first and last intervals """
	112	+ start_time_obj = TP.timestamp_to_obj(start_time, 1)
	113	+ start_time_obj = start_time_obj.replace(minute=int(math.floor(start_time_obj.minute / interval) * interval))
	114	+ start_time_obj_str = TP.timestamp_from_obj(start_time_obj, 1, 3)
	115	+
	116	+ end_time_obj = TP.timestamp_to_obj(end_time, 1)
	117	+ # end_time_obj = end_time_obj + datetime.timedelta(seconds=-1)
	118	+ end_time_obj = end_time_obj.replace(minute=int(math.floor(end_time_obj.minute / interval) * interval))
	119	+ end_time_obj_str = TP.timestamp_from_obj(end_time_obj, 1, 3)
	120	+
	121	+ """ The start time for the impression portion of the query should be one second less"""
	122	+
	123	+ imp_start_time_obj = start_time_obj + datetime.timedelta(seconds=-1)
	124	+ imp_start_time_obj_str = TP.timestamp_from_obj(imp_start_time_obj, 1, 3)
	125	+
	126	+ """ Load the SQL File & Format """
	127	+ filename = self._sql_path_+ query_name + '.sql'
	128	+ sql_stmnt = mh.read_sql(filename)
	129	+
	130	+ sql_stmnt = QD.format_query(query_name, sql_stmnt, [start_time, end_time, campaign, interval, imp_start_time_obj_str])
	131	+
	132	+ """ Get Indexes into Query """
	133	+ key_index = QD.get_banner_index(query_name)
	134	+ metric_index = QD.get_metric_index(query_name, metric_name)
	135	+ time_index = QD.get_time_index(query_name)
	136	+
	137	+ """ Compose the data for each separate donor pipeline artifact """
	138	+ try:
	139	+ err_msg = sql_stmnt
	140	+ self._cur_.execute(sql_stmnt)
	141	+
	142	+ results = self._cur_.fetchall()
	143	+ final_time = dict() # stores the last timestamp seen
	144	+ interval_obj = datetime.timedelta(minutes=interval) # timedelta object used to shift times by _interval_ minutes
	145	+
	146	+ for row in results:
	147	+
	148	+ key_name = row[key_index]
	149	+ time_obj = TP.timestamp_to_obj(row[time_index], 1) # format = 1, 14-digit TS
	150	+
	151	+ """ For each new dictionary index by key name start a new list if its not already there """
	152	+ try:
	153	+ metrics[key_name].append(row[metric_index])
	154	+ times[key_name].append(time_obj + interval_obj)
	155	+ final_time[key_name] = row[time_index]
	156	+ except:
	157	+ metrics[key_name] = list()
	158	+ times[key_name] = list()
	159	+
	160	+ """ If the first element is not the start time add it
	161	+ this will be the case if there is no data for the first interval
	162	+ NOTE: two datapoints are added at the beginning to define the first interval """
	163	+ if start_time_obj_str != row[time_index]:
	164	+ times[key_name].append(start_time_obj)
	165	+ metrics[key_name].append(0.0)
	166	+
	167	+ times[key_name].append(start_time_obj + interval_obj)
	168	+ metrics[key_name].append(0.0)
	169	+ else:
	170	+ metrics[key_name].append(row[metric_index])
	171	+ times[key_name].append(time_obj)
	172	+
	173	+ metrics[key_name].append(row[metric_index])
	174	+ times[key_name].append(time_obj + interval_obj)
	175	+
	176	+
	177	+ except Exception as inst:
	178	+ print type(inst) # the exception instance
	179	+ print inst.args # arguments stored in .args
	180	+ print inst # __str__ allows args to printed directly
	181	+
	182	+ self._db_.rollback()
	183	+ sys.exit(0)
	184	+
	185	+
	186	+
	187	+ """ Ensure that the last time in the list is the endtime less the interval """
	188	+
	189	+ for key in times.keys():
	190	+ if final_time[key_name] != end_time_obj_str:
	191	+ times[key].append(end_time_obj)
	192	+ metrics[key].append(0.0)
	193	+
	194	+ self.close_db()
	195	+
	196	+ """ Convert counts to float (from Decimal) to prevent exception when bar plotting
	197	+ Bbox::update_numerix_xy expected numerix array """
	198	+ for key in metrics.keys():
	199	+ metrics_new = list()
	200	+ for i in range(len(metrics[key])):
	201	+ metrics_new.append(float(metrics[key][i]))
	202	+ metrics[key] = metrics_new
	203	+
	204	+ return [metrics, times]
41	205
42		~~- def run_query(self):~~
43		~~- return~~
44	206
45	207	class BannerLPReportingLoader(DataLoader):
46	208
Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/DataReporting.py
—	—	@@ -0,0 +1,999 @@
	2	+
	3	+
	4	+"""
	5	+
	6	+This module is used to define the reporting methodologies on different types of data. The base class
	7	+DataReporting is defined to outline the general functionality of the reporting architecture and
	8	+functionality which includes generating the data via a dataloader object and transforming the data
	9	+among different reporting mediums including matlab plots (primary medium) and html tables.
	10	+
	11	+The DataLoader class decouples the data access of the reports using the Adapter structural pattern.
	12	+
	13	+"""
	14	+
	15	+__author__ = "Ryan Faulkner"
	16	+__revision__ = "$Rev$"
	17	+__date__ = "December 16th, 2010"
	18	+
	19	+
	20	+import sys
	21	+sys.path.append('../')
	22	+
	23	+import matplotlib
	24	+import datetime
	25	+import MySQLdb
	26	+import pylab
	27	+import HTML
	28	+import math
	29	+
	30	+import QueryData as QD
	31	+import miner_help as mh
	32	+import TimestampProcessor as TP
	33	+import DataLoader as DL
	34	+
	35	+matplotlib.use('Agg')
	36	+
	37	+
	38	+
	39	+"""
	40	+
	41	+ BASE CLASS :: DataReporting
	42	+
	43	+ Base class for reporting fundraiser analytics. Methods that are intended to be extended in derived classes include:
	44	+
	45	+ METHODS:
	46	+
	47	+ run_query - format and execute the query to obtain data
	48	+ gen_plot - plots the results of the report
	49	+ write_to_html_table - writes the results to an HTML table
	50	+ run
	51	+
	52	+"""
	53	+class DataReporting(object):
	54	+
	55	+ _data_loader_ = None
	56	+
	57	+ """
	58	+
	59	+ Smooths a list of values
	60	+
	61	+ INPUT:
	62	+ values - a list of datetime objects
	63	+ window_length - indicate whether the list counts back from the end
	64	+
	65	+ RETURN:
	66	+ new_values - list of smoothed values
	67	+
	68	+ """
	69	+ def smooth(self, values, window_length):
	70	+
	71	+ window_length = int(math.floor(window_length / 2))
	72	+
	73	+ if window_length < 1:
	74	+ return values
	75	+
	76	+ list_len = len(values)
	77	+ new_values = list()
	78	+
	79	+ for i in range(list_len):
	80	+ index_left = max([0, i - window_length])
	81	+ index_right = min([list_len - 1, i + window_length])
	82	+
	83	+ width = index_right - index_left + 1
	84	+
	85	+ new_val = sum(values[index_left : (index_right + 1)]) / width
	86	+ new_values.append(new_val)
	87	+
	88	+ return new_values
	89	+
	90	+ """
	91	+
	92	+ workaround for issue with tuple objects in HTML.py
	93	+ MySQLdb returns unfamiliar tuple elements from its fetchall() method
	94	+ this is probably a version problem since the issue popped up in 2.5 but not 2.6
	95	+
	96	+ INPUT:
	97	+ row - row object returned from MySQLdb.fetchall()
	98	+
	99	+ RETURN:
	100	+ l - a list of tuple objects from the db
	101	+
	102	+ """
	103	+ def listify(self, row):
	104	+ l = []
	105	+ for i in row:
	106	+ l.append(i)
	107	+ return l
	108	+
	109	+
	110	+ """
	111	+
	112	+ To be overloaded by subclasses for specific types of queries
	113	+
	114	+ INPUT:
	115	+ values - a list of datetime objects
	116	+ window_length - indicate whether the list counts back from the end
	117	+
	118	+ RETURN:
	119	+ return_status - integer, 0 indicates un-exceptional execution
	120	+
	121	+ """
	122	+ def run_query(self, start_time, end_time, query_name, metric_name):
	123	+ return 0
	124	+
	125	+
	126	+ """
	127	+
	128	+ To be overloaded by subclasses for different plotting behaviour
	129	+
	130	+ INPUT:
	131	+ values - a list of datetime objects
	132	+ window_length - indicate whether the list counts back from the end
	133	+
	134	+ RETURN:
	135	+ return_status - integer, 0 indicates un-exceptional execution
	136	+
	137	+ """
	138	+ def gen_plot(self,x, y_lists, labels, title, xlabel, ylabel, subplot_index, fname):
	139	+ return 0
	140	+
	141	+ """
	142	+
	143	+ To be overloaded by subclasses for writing tables - this functionality currently exists outside of this class structure (test_reporting.py)
	144	+
	145	+ INPUT:
	146	+ values - a list of datetime objects
	147	+ window_length - indicate whether the list counts back from the end
	148	+
	149	+ RETURN:
	150	+ return_status - integer, 0 indicates un-exceptional execution
	151	+
	152	+ """
	153	+ def write_to_html_table(self):
	154	+
	155	+ """
	156	+ FROM TEST REPORTING
	157	+
	158	+ query_obj = qs.query_store()
	159	+
	160	+ # Populate the campaigns table
	161	+ s1 = 'drop table if exists campaigns;'
	162	+ s2 = 'create table campaigns as (select utm_campaign from drupal.contribution_tracking where ts > \'%s\' group by utm_campaign having count(*) > 100);' % (start_time)
	163	+ cur.execute(s1)
	164	+ cur.execute(s2)
	165	+
	166	+ table_data = []
	167	+ sql_stmnt = mh.read_sql(sql_path + query_type + '.sql');
	168	+
	169	+ # open the html file for writing
	170	+ f = open(html_path + query_type + '.html', 'w')
	171	+
	172	+ format_start_time = start_time[0:4] + '-' + start_time[4:6] + '-' + start_time[6:8] + '-' + start_time[8:10] + 'HRs'
	173	+
	174	+ # Formats the statement according to query type
	175	+ select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time])
	176	+
	177	+ # html formatting
	178	+ if query_type == 'report_campaign_ecomm':
	179	+ f.write('<br>Donation data since ' + format_start_time + ' ... <br><br>')
	180	+ select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time])
	181	+
	182	+ elif query_type == 'report_campaign_logs':
	183	+ f.write('<br>Impression and landing page data since ' + format_start_time+ ' ... <br><br>')
	184	+ select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time])
	185	+
	186	+ elif query_type == 'report_campaign_ecomm_by_hr':
	187	+ f.write('<br>Donation data by hour since ' + format_start_time + ' ... <br><br>')
	188	+ select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time])
	189	+
	190	+ elif query_type == 'report_campaign_logs_by_hr':
	191	+ f.write('<br>Impression and landing page by hour since ' + format_start_time + ' ... <br><br>')
	192	+ select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time])
	193	+
	194	+ else:
	195	+ select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time])
	196	+
	197	+ try:
	198	+ err_msg = select_stmnt
	199	+ cur.execute(select_stmnt)
	200	+
	201	+ results = cur.fetchall()
	202	+
	203	+ for row in results:
	204	+ cpRow = listify(row)
	205	+ # t.rows.append(row)
	206	+ table_data.append(cpRow)
	207	+
	208	+ except:
	209	+ db.rollback()
	210	+ sys.exit("Database Interface Exception:\n" + err_msg)
	211	+
	212	+
	213	+ t = HTML.table(table_data, header_row=header)
	214	+ htmlcode = str(t)
	215	+
	216	+ f.write(htmlcode)
	217	+ f.close()
	218	+
	219	+ return htmlcode
	220	+
	221	+ """
	222	+ return 0
	223	+
	224	+
	225	+
	226	+ """
	227	+
	228	+ The access point of DataReporting and derived objects. Will be used for executing and orchestrating the creation of plots, tables etc.
	229	+ To be overloaded by subclasses
	230	+
	231	+ INPUT:
	232	+
	233	+ RETURN:
	234	+ return_status - integer, 0 indicates un-exceptional execution
	235	+
	236	+ """
	237	+ def run(self):
	238	+ return
	239	+
	240	+
	241	+
	242	+"""
	243	+
	244	+CLASS :: ^TotalAmountsReporting^
	245	+
	246	+This subclass handles reporting on total amounts for the fundraiser.
	247	+
	248	+"""
	249	+
	250	+class TotalAmountsReporting(DataReporting):
	251	+
	252	+ def __init__(self):
	253	+ self.data = []
	254	+
	255	+ def run_query(self, start_time, end_time, query_name, descriptor):
	256	+
	257	+ self.init_db()
	258	+
	259	+ # Load the SQL File & Format
	260	+ filename = self._sql_path_ + query_name + '.sql'
	261	+ sql_stmnt = mh.read_sql(filename)
	262	+ sql_stmnt = QD.format_query(query_name + descriptor, sql_stmnt, [start_time, end_time])
	263	+
	264	+ labels = [None] * 21
	265	+ labels[0] = 'clicks'
	266	+ labels[1] = 'donations'
	267	+ labels[2] = 'total amount'
	268	+ labels[3] = 'banner amount'
	269	+ labels[4] = 'US amount'
	270	+ labels[5] = 'EN amount'
	271	+ labels[6] = 'Other Amount'
	272	+ labels[7] = 'Email Amount'
	273	+ labels[8] = 'Recurring Guess'
	274	+ labels[9] = 'completion_rate'
	275	+ labels[10] = 'pp_clicks'
	276	+ labels[11] = 'pp_donations'
	277	+ labels[12] = 'pp_completion'
	278	+ labels[13] = 'pp_amount'
	279	+ labels[14] = 'pp_max_amount'
	280	+ labels[15] = 'cc_clicks'
	281	+ labels[16] = 'cc_donations'
	282	+ labels[17] = 'cc_completion'
	283	+ labels[18] = 'cc_amount'
	284	+ labels[19] = 'cc_max_amount'
	285	+ labels[20] = 'total_amt50'
	286	+
	287	+
	288	+ num_keys = len(labels)
	289	+
	290	+ lists = list()
	291	+ for i in range(num_keys):
	292	+ lists.append(list())
	293	+
	294	+ # Composes the data for each banner
	295	+ try:
	296	+ err_msg = sql_stmnt
	297	+ self.cur.execute(sql_stmnt)
	298	+
	299	+ # This query store records according to dates
	300	+ results = self.cur.fetchall()
	301	+ for row in results:
	302	+ for i in range(num_keys):
	303	+ lists[i].append(row[i+1])
	304	+
	305	+ except:
	306	+ self.db.rollback()
	307	+ sys.exit("Database Interface Exception:\n" + err_msg)
	308	+
	309	+ self.close_db()
	310	+
	311	+ # Only interested in amounts
	312	+ return [labels, lists]
	313	+
	314	+
	315	+
	316	+ def gen_plot(self,x, y_lists, labels, title, xlabel, ylabel, ranges, subplot_index, fname):
	317	+ pylab.subplot(subplot_index)
	318	+ num_keys = len(y_lists)
	319	+
	320	+ pylab.figure(num=None,figsize=[26,14])
	321	+ line_types = ['b-o','g-o','r-o','c-o','m-o','k-o','b--o','g--o','r--o','c--o','m--o','k--o']
	322	+
	323	+ for i in range(num_keys):
	324	+ pylab.plot(x, y_lists[i], line_types[i])
	325	+
	326	+ pylab.grid()
	327	+ pylab.xlim(ranges[0], ranges[1])
	328	+ pylab.legend(labels,loc=2)
	329	+
	330	+ pylab.xlabel(xlabel)
	331	+ pylab.ylabel(ylabel)
	332	+
	333	+ pylab.title(title)
	334	+ pylab.savefig(fname+'.png', format='png')
	335	+
	336	+
	337	+
	338	+ def run_hr(self, type):
	339	+
	340	+
	341	+ # Current date & time
	342	+ now = datetime.datetime.now()
	343	+ #UTC = 8
	344	+ #delta = datetime.timedelta(hours=UTC)
	345	+ #now = now + delta
	346	+
	347	+
	348	+ """ ESTABLISH THE START TIME TO PULL ANALYTICS - TS format=1, TS resolution=1 """
	349	+ hours_back = 24
	350	+ times = self.gen_date_strings(now, hours_back,1,1)
	351	+
	352	+ start_time = times[0]
	353	+ end_time = times[1]
	354	+
	355	+ print '\nGenerating analytics total amount for ' + str(hours_back) + ' hours back. The start and end times are: ' + start_time + ' - ' + end_time +' ... \n'
	356	+
	357	+ # QUERY NAME
	358	+ query_name = 'report_total_amounts'
	359	+
	360	+
	361	+ # RUN BY HOUR
	362	+ descriptor = '_by_hr'
	363	+ return_val = self.run_query(start_time, end_time, query_name, descriptor)
	364	+
	365	+ labels = return_val[0] # curve labels
	366	+ counts = return_val[1] # curve data - lists
	367	+
	368	+ r = self.get_query_fields(labels, counts, type, start_time, end_time)
	369	+ labels = r[0]
	370	+ counts = r[1]
	371	+ title = r[2]
	372	+ ylabel = r[3]
	373	+
	374	+ xlabel = 'Time - Hours'
	375	+ subplot_index = 111
	376	+
	377	+ # plot the curves
	378	+ time_range = range(len(counts[0]))
	379	+ for i in range(len(counts[0])):
	380	+ time_range[i] = time_range[i] - len(counts[0])
	381	+
	382	+ ranges = [min(time_range), max(time_range)]
	383	+
	384	+ fname = query_name + descriptor + '_' + type
	385	+ self.gen_plot(time_range, counts, labels, title, xlabel, ylabel, ranges, subplot_index, fname)
	386	+
	387	+
	388	+
	389	+ def run_day(self,type):
	390	+
	391	+ # Current date & time
	392	+ now = datetime.datetime.now()
	393	+ #UTC = 8
	394	+ #delta = datetime.timedelta(hours=UTC)
	395	+ #now = now + delta
	396	+
	397	+
	398	+ """ ESTABLISH THE START TIME TO PULL ANALYTICS - TS format=1, TS resolution=0 """
	399	+ hours_back = 7 * 24 # 7 days back
	400	+ times = self.gen_date_strings(now, hours_back,1,0)
	401	+
	402	+ start_time = times[0]
	403	+ end_time = times[1]
	404	+
	405	+ print '\nGenerating analytics total amount for ' + str(days_back) + ' days back. The start and end times are: ' + start_time + ' - ' + end_time +' ... \n'
	406	+
	407	+
	408	+ # FORMAT HEADERS & QUERY NAME
	409	+ query_name = 'report_total_amounts'
	410	+ descriptor = '_by_day'
	411	+ return_val = self.run_query(start_time, end_time, query_name, descriptor)
	412	+
	413	+ labels = return_val[0]
	414	+ counts = return_val[1]
	415	+
	416	+ r = self.get_query_fields(labels, counts, type, start_time, end_time)
	417	+ labels = r[0]
	418	+ counts = r[1]
	419	+ title = r[2]
	420	+ ylabel = r[3]
	421	+
	422	+ xlabel = 'Time - Days'
	423	+ subplot_index = 111
	424	+
	425	+ # Plot values
	426	+ time_range = range(len(counts[0]))
	427	+ for i in range(len(counts[0])):
	428	+ time_range[i] = time_range[i] - len(counts[0])
	429	+
	430	+ ranges = [min(time_range), max(time_range)]
	431	+
	432	+ fname = query_name + descriptor + '_' + type
	433	+ self.gen_plot(time_range, counts, labels, title, xlabel, ylabel, ranges, subplot_index, fname)
	434	+
	435	+
	436	+ def get_query_fields(self, labels, counts, type, start_time, end_time):
	437	+
	438	+ if type == 'BAN_EM':
	439	+ indices = range(2,9)
	440	+ title = 'Total Amounts: ' + start_time + ' -- ' + end_time
	441	+ ylabel = 'Amount'
	442	+ elif type == 'CC_PP_completion':
	443	+ indices = [12,17]
	444	+ title = 'Credit Card & Paypal Completion Rates: ' + start_time + ' -- ' + end_time
	445	+ ylabel = 'Rate'
	446	+ elif type == 'CC_PP_amount':
	447	+ indices = [13,18]
	448	+ title = 'Credit Card & Paypal Total Amounts: ' + start_time + ' -- ' + end_time
	449	+ ylabel = 'Amount'
	450	+ elif type == 'AMT_VS_AMT50':
	451	+ indices = [2,20]
	452	+ title = 'Amount50 and Amount Totals: ' + start_time + ' -- ' + end_time
	453	+ ylabel = 'Amount'
	454	+ else:
	455	+ sys.exit("Total Amounts: You must enter a valid report type.\n" )
	456	+
	457	+ # Exract relevant labels and values
	458	+ labels_temp = list()
	459	+ counts_temp = list()
	460	+
	461	+ for i in range(len(labels)):
	462	+ if i in indices:
	463	+ labels_temp.append(labels[i])
	464	+ counts_temp.append(counts[i])
	465	+
	466	+ return [labels_temp, counts_temp, title, ylabel]
	467	+
	468	+
	469	+"""
	470	+
	471	+CLASS :: ^BannerLPReporting^
	472	+
	473	+This subclass handles reporting on banners and landing pages for the fundraiser.
	474	+
	475	+"""
	476	+
	477	+class BannerLPReporting(DataReporting):
	478	+
	479	+
	480	+ def __init__(self, *args):
	481	+
	482	+ if len(args) == 2:
	483	+ self.campaign = args[0]
	484	+ self.start_time = args[1]
	485	+ else:
	486	+ self.campaign = None
	487	+ self.start_time = None
	488	+
	489	+ def run_query(self,start_time, end_time, campaign, query_name, metric_name):
	490	+
	491	+ self.init_db()
	492	+
	493	+ metric_lists = mh.AutoVivification()
	494	+ time_lists = mh.AutoVivification()
	495	+ # table_data = [] # store the results in a table for reporting
	496	+
	497	+ # Load the SQL File & Format
	498	+ filename = self._sql_path_ + query_name + '.sql'
	499	+ sql_stmnt = mh.read_sql(filename)
	500	+
	501	+ query_name = 'report_bannerLP_metrics' # rename query to work with query store
	502	+ sql_stmnt = QD.format_query(query_name, sql_stmnt, [start_time, end_time, campaign])
	503	+
	504	+ key_index = QD.get_banner_index(query_name)
	505	+ time_index = QD.get_time_index(query_name)
	506	+ metric_index = QD.get_metric_index(query_name, metric_name)
	507	+
	508	+ # Composes the data for each banner
	509	+ try:
	510	+ err_msg = sql_stmnt
	511	+ self.cur.execute(sql_stmnt)
	512	+
	513	+ results = self.cur.fetchall()
	514	+
	515	+ # Compile Table Data
	516	+ # cpRow = self.listify(row)
	517	+ # table_data.append(cpRow)
	518	+
	519	+ for row in results:
	520	+
	521	+ key_name = row[key_index]
	522	+
	523	+ try:
	524	+ metric_lists[key_name].append(row[metric_index])
	525	+ time_lists[key_name].append(row[time_index])
	526	+ except:
	527	+ metric_lists[key_name] = list()
	528	+ time_lists[key_name] = list()
	529	+
	530	+ metric_lists[key_name].append(row[metric_index])
	531	+ time_lists[key_name].append(row[time_index])
	532	+
	533	+ except:
	534	+ self.db.rollback()
	535	+ sys.exit("Database Interface Exception:\n" + err_msg)
	536	+
	537	+ """ Convert Times to Integers """
	538	+ # Find the earliest date
	539	+ max_i = 0
	540	+
	541	+ for key in time_lists.keys():
	542	+ for date_str in time_lists[key]:
	543	+ day_int = int(date_str[8:10])
	544	+ hr_int = int(date_str[11:13])
	545	+ date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13])
	546	+ if date_int > max_i:
	547	+ max_i = date_int
	548	+ max_day = day_int
	549	+ max_hr = hr_int
	550	+
	551	+
	552	+ # Normalize dates
	553	+ time_norm = mh.AutoVivification()
	554	+ for key in time_lists.keys():
	555	+ for date_str in time_lists[key]:
	556	+ day = int(date_str[8:10])
	557	+ hr = int(date_str[11:13])
	558	+ # date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13])
	559	+ elem = (day - max_day) * 24 + (hr - max_hr)
	560	+ try:
	561	+ time_norm[key].append(elem)
	562	+ except:
	563	+ time_norm[key] = list()
	564	+ time_norm[key].append(elem)
	565	+
	566	+ # smooth out the values
	567	+ #window_length = 20
	568	+ #for banner in metric_lists.keys():
	569	+ # metric_lists[banner] = smooth(metric_lists[banner], window_length)
	570	+
	571	+ self.close_db()
	572	+
	573	+ # return [metric_lists, time_norm, table_data]
	574	+ return [metric_lists, time_norm]
	575	+
	576	+
	577	+ def gen_plot(self,counts, times, title, xlabel, ylabel, ranges, subplot_index, fname):
	578	+ pylab.subplot(subplot_index)
	579	+ pylab.figure(num=None,figsize=[26,14])
	580	+ count_keys = counts.keys()
	581	+
	582	+ line_types = ['b-o','g-o','r-o','c-o','m-o','k-o','y-o','b--d','g--d','r--d','c--d','m--d','k--d','y--d','b-.s','g-.s','r-.s','c-.s','m-.s','k-.s','y-.s']
	583	+
	584	+ count = 0
	585	+ for key in counts.keys():
	586	+ pylab.plot(times[key], counts[key], line_types[count])
	587	+ count = count + 1
	588	+
	589	+ pylab.grid()
	590	+ pylab.xlim(ranges[0], ranges[1])
	591	+ pylab.legend(count_keys,loc=2)
	592	+
	593	+ pylab.xlabel(xlabel)
	594	+ pylab.ylabel(ylabel)
	595	+
	596	+ pylab.title(title)
	597	+ pylab.savefig(fname, format='png')
	598	+
	599	+
	600	+ """
	601	+
	602	+ type = 'LP' \|\| 'BAN' \|\| 'BAN-TEST' \|\| 'LP-TEST'
	603	+
	604	+ """
	605	+ def run(self, type, metric_name):
	606	+
	607	+ # Current date & time
	608	+ now = datetime.datetime.now()
	609	+ #UTC = 8
	610	+ #delta = datetime.timedelta(hours=UTC)
	611	+ #now = now + delta
	612	+
	613	+ """ ESTABLISH THE START TIME TO PULL ANALYTICS - TS format=1, TS resolution=1 """
	614	+ hours_back = 24
	615	+ times = self.gen_date_strings(now, hours_back,1,1)
	616	+
	617	+ start_time = times[0]
	618	+ end_time = times[1]
	619	+
	620	+ print '\nGenerating ' + type +' for ' + str(hours_back) + ' hours back. The start and end times are: ' + start_time + ' - ' + end_time +' ... \n'
	621	+
	622	+ if type == 'LP':
	623	+ query_name = 'report_LP_metrics'
	624	+
	625	+ # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign
	626	+ if self.campaign == None:
	627	+ campaign = '[0-9](JA\|SA\|EA\|TY)[0-9]'
	628	+ else:
	629	+ campaign = self.campaign
	630	+
	631	+ title = metric_name + ': ' + start_time + ' -- ' + end_time
	632	+ fname = query_name + '_' + metric_name + '.png'
	633	+ elif type == 'BAN':
	634	+ query_name = 'report_banner_metrics'
	635	+
	636	+ # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign
	637	+ if self.campaign == None:
	638	+ campaign = '[0-9](JA\|SA\|EA\|TY)[0-9]'
	639	+ else:
	640	+ campaign = self.campaign
	641	+
	642	+ title = metric_name + ': ' + start_time + ' -- ' + end_time
	643	+ fname = query_name + '_' + metric_name + '.png'
	644	+ elif type == 'BAN-TEST':
	645	+ r = self.get_latest_campaign()
	646	+ query_name = 'report_banner_metrics'
	647	+
	648	+ # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign
	649	+ if self.campaign == None:
	650	+ campaign = r[0]
	651	+ start_time = r[1]
	652	+ else:
	653	+ campaign = self.campaign
	654	+ start_time = self.start_time
	655	+
	656	+ title = metric_name + ': ' + start_time + ' -- ' + end_time + ', CAMPAIGN =' + campaign
	657	+ fname = query_name + '_' + metric_name + '_latest' + '.png'
	658	+ elif type == 'LP-TEST':
	659	+ r = self.get_latest_campaign()
	660	+ query_name = 'report_LP_metrics'
	661	+
	662	+ # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign
	663	+ if self.campaign == None:
	664	+ campaign = r[0]
	665	+ start_time = r[1]
	666	+ else:
	667	+ campaign = self.campaign
	668	+ start_time = self.start_time
	669	+
	670	+ title = metric_name + ': ' + start_time + ' -- ' + end_time + ', CAMPAIGN =' + campaign
	671	+ fname = query_name + '_' + metric_name + '_latest' + '.png'
	672	+ else:
	673	+ sys.exit("Invalid type name - must be 'LP' or 'BAN'.")
	674	+
	675	+ return_val = self.run_query(start_time, end_time, campaign, query_name, metric_name)
	676	+ metrics = return_val[0]
	677	+ times = return_val[1]
	678	+
	679	+ # title = metric_name + ': ' + start_time + ' -- ' + end_time
	680	+ xlabel = 'Time - Hours'
	681	+ ylabel = metric_name
	682	+ subplot_index = 111
	683	+
	684	+ min_time = 99
	685	+ for key in times.keys():
	686	+ min_elem = min(times[key])
	687	+ if min_elem < min_time:
	688	+ min_time = min_elem
	689	+
	690	+ ranges = [min_time, 0]
	691	+
	692	+ self.gen_plot(metrics, times, title, xlabel, ylabel, ranges, subplot_index, fname)
	693	+
	694	+ return [metrics, times]
	695	+
	696	+
	697	+ def get_latest_campaign(self):
	698	+
	699	+ query_name = 'report_latest_campaign'
	700	+ self.init_db()
	701	+
	702	+ """ Look at campaigns over the past 24 hours - TS format=1, TS resolution=1 """
	703	+ now = datetime.datetime.now()
	704	+ hours_back = 72
	705	+ times = self.gen_date_strings(now, hours_back,1,1)
	706	+
	707	+ sql_stmnt = mh.read_sql('./sql/report_latest_campaign.sql')
	708	+ sql_stmnt = QD.format_query(query_name, sql_stmnt, [times[0]])
	709	+
	710	+ campaign_index = QD.get_campaign_index(query_name)
	711	+ time_index = QD.get_time_index(query_name)
	712	+
	713	+ try:
	714	+ err_msg = sql_stmnt
	715	+ self.cur.execute(sql_stmnt)
	716	+
	717	+ row = self.cur.fetchone()
	718	+ except:
	719	+ self.db.rollback()
	720	+ sys.exit("Database Interface Exception:\n" + err_msg)
	721	+
	722	+ campaign = row[campaign_index]
	723	+ timestamp = row[time_index]
	724	+
	725	+ self.close_db()
	726	+
	727	+ return [campaign, timestamp]
	728	+
	729	+ """
	730	+
	731	+ Takes as input and converts it to a set of hours counting back from 0
	732	+
	733	+ time_lists - a dictionary of timestamp lists
	734	+ time_norm - a dictionary of normalized times
	735	+
	736	+ """
	737	+ def normalize_timestamps(self, time_lists):
	738	+ # Find the earliest date
	739	+ max_i = 0
	740	+
	741	+ for key in time_lists.keys():
	742	+ for date_str in time_lists[key]:
	743	+ day_int = int(date_str[8:10])
	744	+ hr_int = int(date_str[11:13])
	745	+ date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13])
	746	+ if date_int > max_i:
	747	+ max_i = date_int
	748	+ max_day = day_int
	749	+ max_hr = hr_int
	750	+
	751	+
	752	+ # Normalize dates
	753	+ time_norm = mh.AutoVivification()
	754	+ for key in time_lists.keys():
	755	+ for date_str in time_lists[key]:
	756	+ day = int(date_str[8:10])
	757	+ hr = int(date_str[11:13])
	758	+ # date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13])
	759	+ elem = (day - max_day) * 24 + (hr - max_hr)
	760	+ try:
	761	+ time_norm[key].append(elem)
	762	+ except:
	763	+ time_norm[key] = list()
	764	+ time_norm[key].append(elem)
	765	+
	766	+ return time_norm
	767	+
	768	+"""
	769	+
	770	+CLASS :: ^MinerReporting^
	771	+
	772	+This subclass handles reporting on raw values imported into the database.
	773	+
	774	+"""
	775	+
	776	+class MinerReporting(DataReporting):
	777	+
	778	+ def run_query(self, start_time, end_time, query_name):
	779	+
	780	+ self.init_db()
	781	+
	782	+ counts = list()
	783	+ times = list()
	784	+
	785	+ # Load the SQL File & Format
	786	+ filename = self._sql_path_+ query_name + '.sql'
	787	+ sql_stmnt = mh.read_sql(filename)
	788	+
	789	+ sql_stmnt = QD.format_query(query_name, sql_stmnt, [start_time, end_time])
	790	+ #print sql_stmnt
	791	+
	792	+ # Get Indexes into Query
	793	+ count_index = QD.get_count_index(query_name)
	794	+ time_index = QD.get_time_index(query_name)
	795	+
	796	+ # Composes the data for each banner
	797	+ try:
	798	+ err_msg = sql_stmnt
	799	+ self.cur.execute(sql_stmnt)
	800	+
	801	+ results = self.cur.fetchall()
	802	+
	803	+ for row in results:
	804	+ counts.append(row[count_index])
	805	+ times.append(row[time_index])
	806	+
	807	+ except:
	808	+ self.db.rollback()
	809	+ sys.exit("Database Interface Exception:\n" + err_msg)
	810	+
	811	+ """ Convert Times to Integers """
	812	+ time_norm = self.normalize_timestamps(times)
	813	+
	814	+
	815	+ self.close_db()
	816	+
	817	+ return [counts, time_norm]
	818	+
	819	+
	820	+ # Create histograms for hourly counts
	821	+
	822	+ def gen_plot(self,counts, times, title, xlabel, ylabel, ranges, subplot_index, fname):
	823	+
	824	+ pylab.subplot(subplot_index)
	825	+ pylab.figure(num=None,figsize=[26,14])
	826	+
	827	+ # pylab.plot(times, counts)
	828	+ # pylab.hist(counts, times)
	829	+ pylab.bar(times, counts, width=0.5)
	830	+
	831	+ pylab.grid()
	832	+ pylab.xlim(ranges[0], ranges[1])
	833	+
	834	+ pylab.xlabel(xlabel)
	835	+ pylab.ylabel(ylabel)
	836	+
	837	+ pylab.title(title)
	838	+ pylab.savefig(fname, format='png')
	839	+
	840	+ def run(self, query_name):
	841	+
	842	+ # Current date & time
	843	+ now = datetime.datetime.now()
	844	+ #UTC = 8
	845	+ #delta = datetime.timedelta(hours=UTC)
	846	+ #now = now + delta
	847	+
	848	+ """ ESTABLISH THE START TIME TO PULL ANALYTICS - TS format=1, TS resolution=1 """
	849	+ hours_back = 24
	850	+ times = self.gen_date_strings_hr(now, hours_back,1,1)
	851	+
	852	+ start_time = times[0]
	853	+ end_time = times[1]
	854	+
	855	+ print '\nGenerating ' + query_name +', start and end times are: ' + start_time + ' - ' + end_time +' ... \n'
	856	+
	857	+ # Run Query
	858	+ return_val = self.run_query(start_time, end_time, query_name)
	859	+ counts = return_val[0]
	860	+ times = return_val[1]
	861	+
	862	+ # Normalize times
	863	+ min_time = min(times)
	864	+ ranges = [min_time, 0]
	865	+
	866	+ xlabel = 'Hours'
	867	+ subplot_index = 111
	868	+ fname = query_name + '.png'
	869	+
	870	+ title = QD.get_plot_title(query_name)
	871	+ title = title + ' -- ' + start_time + ' - ' + end_time
	872	+ ylabel = QD.get_plot_ylabel(query_name)
	873	+
	874	+ # Convert counts to float (from Decimal) to prevent exception when bar plotting
	875	+ # Bbox::update_numerix_xy expected numerix array
	876	+ counts_new = list()
	877	+ for i in range(len(counts)):
	878	+ counts_new.append(float(counts[i]))
	879	+ counts = counts_new
	880	+
	881	+ # Generate Histogram
	882	+ self.gen_plot(counts, times, title, xlabel, ylabel, ranges, subplot_index, fname)
	883	+
	884	+
	885	+"""
	886	+
	887	+CLASS :: IntervalReporting
	888	+
	889	+Performs queries that take timestamps, query, and an interval as arguments. Data for a single metric
	890	+is generated for each time interval in the time period defined by the start and end timestamps.
	891	+
	892	+Types of queries supported:
	893	+
	894	+report_banner_metrics_minutely
	895	+report_LP_metrics_minutely
	896	+
	897	+"""
	898	+
	899	+class IntervalReporting(DataReporting):
	900	+
	901	+ """
	902	+ """
	903	+ def __init__(self):
	904	+ self._data_loader_ = DL.IntervalReportingLoader()
	905	+
	906	+ """
	907	+ """
	908	+ def usage(self):
	909	+
	910	+ print 'Types of queries:'
	911	+ print ' (1) banner'
	912	+ print ' (2) LP'
	913	+ print ''
	914	+ print 'e.g.'
	915	+ print " run('20101230160400', '20101230165400', 2, 'banner', 'imp', '20101230JA091_US')"
	916	+ print " run('20101230160400', '20101230165400', 2, 'LP', 'views', '20101230JA091_US')"
	917	+ print ''
	918	+
	919	+ return
	920	+
	921	+ """
	922	+ Execute reporting query and generate plots
	923	+ """
	924	+ def gen_plot(self, metrics, times, title, xlabel, ylabel, ranges, subplot_index, fname):
	925	+
	926	+ pylab.subplot(subplot_index)
	927	+ pylab.figure(num=None,figsize=[26,14])
	928	+
	929	+ line_types = ['b-o','g-o','r-o','c-o','m-o','k-o','y-o','b--d','g--d','r--d','c--d','m--d','k--d','y--d','b-.s','g-.s','r-.s','c-.s','m-.s','k-.s','y-.s']
	930	+
	931	+ count = 0
	932	+ for key in metrics.keys():
	933	+ pylab.step(times[key], metrics[key], line_types[count])
	934	+ count = count + 1
	935	+
	936	+ pylab.grid()
	937	+ pylab.xlim(ranges[0], ranges[1])
	938	+ pylab.ylim(ranges[2], ranges[3])
	939	+ pylab.legend(metrics.keys(),loc=2)
	940	+
	941	+ pylab.xlabel(xlabel)
	942	+ pylab.ylabel(ylabel)
	943	+
	944	+ pylab.title(title)
	945	+ pylab.savefig(fname, format='png')
	946	+
	947	+
	948	+ """
	949	+ Execute reporting query and generate plots
	950	+ """
	951	+ def run(self, start_time, end_time, interval, query_type, metric_name, campaign):
	952	+
	953	+ print '\nGenerating ' + query_type +', start and end times are: ' + start_time + ' - ' + end_time +' ... \n'
	954	+
	955	+ """ Execute the query that generates interval reporting data """
	956	+ return_val = self._data_loader_.run_query(start_time, end_time, interval, query_type, metric_name, campaign)
	957	+ counts = return_val[0]
	958	+ times = return_val[1]
	959	+
	960	+ """ Convert Times to Integers that indicate relative times AND normalize the intervals in case any are missing """
	961	+ for key in times.keys():
	962	+ times[key] = TP.normalize_timestamps(times[key], False, 2)
	963	+ times[key], counts[key] = TP.normalize_intervals(times[key], counts[key], interval)
	964	+
	965	+ # Normalize times
	966	+ min_time = min(times)
	967	+ ranges = [min_time, 0]
	968	+
	969	+ xlabel = 'MINUTES'
	970	+ subplot_index = 111
	971	+ fname = campaign + ' ' + query_type + ' ' + metric_name + '.png'
	972	+
	973	+ metric_full_name = QD.get_metric_full_name(metric_name)
	974	+ title = campaign + ': ' + metric_full_name + ' -- ' + start_time + ' - ' + end_time
	975	+ ylabel = metric_full_name
	976	+
	977	+ """ Determine List maximums """
	978	+ times_max = 0
	979	+ metrics_max = 0
	980	+
	981	+ for key in counts.keys():
	982	+ list_max = max(counts[key])
	983	+ if list_max > metrics_max:
	984	+ metrics_max = list_max
	985	+
	986	+ for key in times.keys():
	987	+ list_max = max(times[key])
	988	+ if list_max > times_max:
	989	+ times_max = list_max
	990	+
	991	+ ranges = list()
	992	+ ranges.append(0.0)
	993	+ ranges.append(times_max * 1.1)
	994	+ ranges.append(0.0)
	995	+ ranges.append(metrics_max * 1.1)
	996	+
	997	+ """ Generate plots given data """
	998	+ self.gen_plot(counts, times, title, xlabel, ylabel, ranges, subplot_index, fname)
	999	+
	1000	+
Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/TimestampProcessor.py
—	—	@@ -1,10 +1,39 @@
2	2
3	3	"""
4	4
	5	+This module effectively functions as a Singleton class.
	6	+
5	7	TimestampProcesser module is used to provide definitions for dealing with date and time
6	8	objects. This is primarily used to handle
7	9
	10	+TimestampProcesser facilitates the processing of timestamps used in the CiviCRM and "faulkner" mySQL
	11	+databases. This includes mapping among timestamp formats and converting those formats to indexed
	12	+lists and dictionaries.
8	13
	14	+Examples of format definitions:
	15	+
	16	+ format 1 - 20080101000606
	17	+ format 2 - 2008-01-01 00:06:06
	18	+
	19	+Examples of resolution definitions:
	20	+
	21	+ resolution 0 - xxxx-xx-xx 00:00:00
	22	+ resolution 1 - xxxx-xx-xx xx:00:00
	23	+ resolution 2 - xxxx-xx-xx xx:xx:00
	24	+ resolution 3 - xxxx-xx-xx xx:xx:xx
	25	+
	26	+METHODS:
	27	+
	28	+ normalize_timestamps - Takes a list of timestamps as input and converts it to a set of days, hours, or minutes counting back from 0
	29	+ timestamps_to_dict - Convert lists into dictionaries before processing it is assumed that lists are composed of only simple types
	30	+ find_latest_date_in_list - Find the latest time stamp in a list
	31	+ find_earliest_date_in_list - Find the earliest time stamp in a list
	32	+ gen_date_strings - Given a datetime object produce a timestamp a number of hours in the past and according to a particular format
	33	+ timestamp_from_obj - Convert datetime objects to a timestamp of a given format.
	34	+ timestamp_to_obj - Convert timestamp to a datetime object of a given format
	35	+ normalize_intervals - Inserts missing interval points into the time and metric lists
	36	+ timestamp_convert_format - Converts from one timestamp format to another timestamp format
	37	+
9	38	"""
10	39
11	40	__author__ = "Ryan Faulkner"
—	—	@@ -15,402 +44,361 @@
16	45	import sys
17	46	sys.path.append('../')
18	47
19		~~-import matplotlib~~
20	48	import datetime
21		~~-import MySQLdb~~
22		~~-import pylab~~
23		~~-import HTML~~
24	49	import math
25		-
26	50	import miner_help as mh
27	51
28		~~-matplotlib.use('Agg')~~
29		-
30		-
31		-
	52	+
32	53	"""
	54	+
	55	+ Takes a list of timestamps as input and converts it to a set of days, hours, or minutes counting back from 0
	56	+
	57	+ INPUT:
	58	+ time_lists - a list of datetime objects
	59	+ count_back - indicate whether the list counts back from the end
	60	+ time_unit - an integer indicating what unit to measure time in (0 = day, 1 = hour, 2 = minute)
	61	+
	62	+ RETURN:
	63	+ time_norm - a dictionary of normalized times
	64	+
	65	+"""
	66	+def normalize_timestamps(time_lists, count_back, time_unit):
	67	+
	68	+ time_lists, isList = timestamps_to_dict(time_lists)
	69	+
	70	+ """ Depending on args set the start date """
	71	+ if count_back:
	72	+ start_date_obj = find_latest_date_in_list(time_lists)
	73	+ else:
	74	+ start_date_obj = find_earliest_date_in_list(time_lists)
	75	+
	76	+ start_day = start_date_obj.day
	77	+ start_hr = start_date_obj.hour
	78	+ start_mte = start_date_obj.minute
	79	+
	80	+ # Normalize dates
	81	+ time_norm = mh.AutoVivification()
	82	+ for key in time_lists.keys():
	83	+ for date_obj in time_lists[key]:
	84	+
	85	+ day = date_obj.day
	86	+ hr = date_obj.hour
	87	+ mte = date_obj.minute
	88	+
	89	+ if time_unit == 0:
	90	+ elem = (day - start_day)
	91	+ elif time_unit == 1:
	92	+ elem = (day - start_day) * 24 + (hr - start_hr)
	93	+ elif time_unit == 2:
	94	+ elem = (day - start_day) * 24 * 60 + (hr - start_hr) * 60 + (mte - start_mte)
	95	+
	96	+ try:
	97	+ time_norm[key].append(elem)
	98	+ except:
	99	+ time_norm[key] = list()
	100	+ time_norm[key].append(elem)
	101	+
	102	+ """ If the original argument was a list put it back in that form """
	103	+ if isList:
	104	+ time_norm = time_norm[key]
	105	+
	106	+ return time_norm
	107	+
	108	+
	109	+"""
33	110
34		~~- TimestampProcesser facilitates the processing of timestamps used in the CiviCRM and "faulkner" mySQL~~
35		~~- databases. This includes mapping among timestamp formats and converting those formats to indexed~~
36		~~- lists and dictionaries.~~
	111	+ HELPER METHOD for normalize_timestamps. Convert lists into dictionaries before processing it is assumed that lists
	112	+ are composed of only simple types
37	113
38		~~- Examples of format definitions:~~
39		-
40		~~- format 1 - 20080101000606~~
41		~~- format 2 - 2008-01-01 00:06:06~~
	114	+ INPUT:
	115	+ time_lists - a list of datetime objects
42	116
43		~~- Examples of resolution definitions:~~
44		-
45		~~- resolution 0 - xxxx-xx-xx 00:00:00~~
46		~~- resolution 1 - xxxx-xx-xx xx:00:00~~
47		~~- resolution 2 - xxxx-xx-xx xx:xx:00~~
48		~~- resolution 3 - xxxx-xx-xx xx:xx:xx~~
49		-
50		~~- METHODS:~~
51		-
52		~~- normalize_timestamps - Takes a list of timestamps as input and converts it to a set of days, hours, or minutes counting back from 0~~
53		~~- timestamps_to_dict - Convert lists into dictionaries before processing it is assumed that lists are composed of only simple types~~
54		~~- find_latest_date_in_list - Find the latest time stamp in a list~~
55		~~- find_earliest_date_in_list - Find the earliest time stamp in a list~~
56		~~- gen_date_strings - Given a datetime object produce a timestamp a number of hours in the past and according to a particular format~~
57		~~- timestamp_from_obj - Convert datetime objects to a timestamp of a given format.~~
58		~~- timestamp_to_obj - Convert timestamp to a datetime object of a given format~~
59		~~- normalize_intervals - Inserts missing interval points into the time and metric lists~~
60		~~- timestamp_convert_format - Converts from one timestamp format to another timestamp format~~
61		-
	117	+ RETURN:
	118	+ time_lists - dictionary with a single key 'key' that stores the list
	119	+ isList - a dictionary of normalized times
	120	+
62	121	"""
63		~~-class TimestampProcesser(object):~~
	122	+def timestamps_to_dict(time_lists):
64	123
65		~~- """~~
66		-
67		~~- Takes a list of timestamps as input and converts it to a set of days, hours, or minutes counting back from 0~~
	124	+ isList = 0
	125	+ if type(time_lists) is list:
	126	+ isList = 1
68	127
69		~~- INPUT:~~
70		~~- time_lists - a list of datetime objects~~
71		~~- count_back - indicate whether the list counts back from the end~~
72		~~- time_unit - an integer indicating what unit to measure time in (0 = day, 1 = hour, 2 = minute)~~
	128	+ old_list = time_lists
	129	+ time_lists = mh.AutoVivification()
73	130
74		~~- RETURN:~~
75		~~- time_norm - a dictionary of normalized times~~
76		-
77		~~- """~~
78		~~- def normalize_timestamps(self, time_lists, count_back, time_unit):~~
	131	+ key = 'key'
	132	+ time_lists[key] = list()
79	133
80		~~- time_lists, isList = self.timestamps_to_dict(time_lists)~~
	134	+ for i in range(len(old_list)):
	135	+ time_lists[key].append(old_list[i])
	136	+
	137	+ return [time_lists, isList]
	138	+
	139	+
	140	+"""
	141	+
	142	+ HELPER METHOD for normalize_timestamps. Find the latest time stamp in a list
	143	+
	144	+ INPUT:
	145	+ time_lists - a list of datetime objects
81	146
82		~~- """ Depending on args set the start date """~~
83		~~- if count_back:~~
84		~~- start_date_obj = self.find_latest_date_in_list(time_lists)~~
85		~~- else:~~
86		~~- start_date_obj = self.find_earliest_date_in_list(time_lists)~~
	147	+ RETURN:
	148	+ date_max - datetime object of the latest date in the list
87	149
88		~~- start_day = start_date_obj.day~~
89		~~- start_hr = start_date_obj.hour~~
90		~~- start_mte = start_date_obj.minute~~
91		-
92		~~- # Normalize dates~~
93		~~- time_norm = mh.AutoVivification()~~
94		~~- for key in time_lists.keys():~~
95		~~- for date_obj in time_lists[key]:~~
	150	+"""
	151	+def find_latest_date_in_list(time_lists):
	152	+
	153	+ date_max = datetime.datetime(1000,1,1,0,0,0)
	154	+
	155	+ for key in time_lists.keys():
	156	+ for date_obj in time_lists[key]:
	157	+ if date_int > date_min:
	158	+ date_min = date_obj
96	159
97		~~- day = date_obj.day~~
98		~~- hr = date_obj.hour~~
99		~~- mte = date_obj.minute~~
100		-
101		~~- if time_unit == 0:~~
102		~~- elem = (day - start_day)~~
103		~~- elif time_unit == 1:~~
104		~~- elem = (day - start_day) * 24 + (hr - start_hr)~~
105		~~- elif time_unit == 2:~~
106		~~- elem = (day - start_day) * 24 * 60 + (hr - start_hr) * 60 + (mte - start_mte)~~
107		-
108		~~- try:~~
109		~~- time_norm[key].append(elem)~~
110		~~- except:~~
111		~~- time_norm[key] = list()~~
112		~~- time_norm[key].append(elem)~~
	160	+ return date_max
	161	+
	162	+"""
	163	+
	164	+ HELPER METHOD for normalize_timestamps. Find the earliest timestamp in a list
	165	+
	166	+ INPUT:
	167	+ time_lists - a list of datetime objects
113	168
114		~~- """ If the original argument was a list put it back in that form """~~
115		~~- if isList:~~
116		~~- time_norm = time_norm[key]~~
117		-
118		~~- return time_norm~~
	169	+ RETURN:
	170	+ date_min - datetime object of the earliest date in the list
119	171
	172	+"""
	173	+def find_earliest_date_in_list(time_lists):
120	174
121		~~- """~~
	175	+ date_min = datetime.datetime(3000,1,1,0,0,0)
122	176
123		~~- HELPER METHOD for normalize_timestamps. Convert lists into dictionaries before processing it is assumed that lists~~
124		~~- are composed of only simple types~~
125		-
126		~~- INPUT:~~
127		~~- time_lists - a list of datetime objects~~
128		-
129		~~- RETURN:~~
130		~~- time_lists - dictionary with a single key 'key' that stores the list~~
131		~~- isList - a dictionary of normalized times~~
132		-
133		~~- """~~
134		~~- def timestamps_to_dict(self, time_lists):~~
135		-
136		~~- isList = 0~~
137		~~- if type(time_lists) is list:~~
138		~~- isList = 1~~
139		-
140		~~- old_list = time_lists~~
141		~~- time_lists = mh.AutoVivification()~~
142		-
143		~~- key = 'key'~~
144		~~- time_lists[key] = list()~~
145		-
146		~~- for i in range(len(old_list)):~~
147		~~- time_lists[key].append(old_list[i])~~
	177	+ for key in time_lists.keys():
	178	+ for date_obj in time_lists[key]:
	179	+ if date_obj < date_min:
	180	+ date_min = date_obj
	181	+
	182	+ return date_min
	183	+
	184	+
	185	+"""
	186	+
	187	+ Given a datetime object produce a timestamp a number of hours in the past and according to a particular format
148	188
149		~~- return [time_lists, isList]~~
	189	+ format 1 - 20080101000606
	190	+ format 2 - 2008-01-01 00:06:06
150	191
151		-
152		~~- """~~
	192	+ INPUT:
153	193
154		~~- HELPER METHOD for normalize_timestamps. Find the latest time stamp in a list~~
155		-
156		~~- INPUT:~~
157		~~- time_lists - a list of datetime objects~~
158		-
159		~~- RETURN:~~
160		~~- date_max - datetime object of the latest date in the list~~
161		-
162		~~- """~~
163		~~- def find_latest_date_in_list(self, time_lists):~~
164		-
165		~~- date_max = datetime.datetime(1000,1,1,0,0,0)~~
166		-
167		~~- for key in time_lists.keys():~~
168		~~- for date_obj in time_lists[key]:~~
169		~~- if date_int > date_min:~~
170		~~- date_min = date_obj~~
171		-
172		~~- return date_max~~
	194	+ now - datetime object
	195	+ hours_back - the amount of time the
	196	+ format - the format of the returned timestamp strings
	197	+ resolution - the resolution detail of the timestamp (e.g. down to the minute, down to the hour, ...)
173	198
174		~~- """~~
175	199
176		~~- HELPER METHOD for normalize_timestamps. Find the earliest timestamp in a list~~
	200	+ RETURN:
	201	+ start_time - formatted datetime string
	202	+ end_time - formatted datetime string
	203	+
	204	+"""
	205	+def gen_date_strings(time_ref, hours_back, format, resolution):
	206	+
	207	+ delta = datetime.timedelta(hours=-hours_back)
	208	+
	209	+ time_obj = time_ref + delta
	210	+ time_ref = time_ref + datetime.timedelta(hours=-1) # Move an hour back to terminate at 55 minute
	211	+
	212	+ # Cast the start and end time strings in the proper format
	213	+ start_time = timestamp_from_obj(time_obj, format, resolution)
	214	+ end_time = timestamp_from_obj(time_ref, format, resolution)
	215	+
	216	+ return [start_time, end_time]
	217	+
	218	+
	219	+
	220	+"""
	221	+
	222	+ Convert datetime objects to a timestamp of a given format. HELPER METHOD for gen_date_strings.
177	223
178		~~- INPUT:~~
179		~~- time_lists - a list of datetime objects~~
180		-
181		~~- RETURN:~~
182		~~- date_min - datetime object of the earliest date in the list~~
183		-
184		~~- """~~
185		~~- def find_earliest_date_in_list(self, time_lists):~~
186		-
187		~~- date_min = datetime.datetime(3000,1,1,0,0,0)~~
188		-
189		~~- for key in time_lists.keys():~~
190		~~- for date_obj in time_lists[key]:~~
191		~~- if date_obj < date_min:~~
192		~~- date_min = date_obj~~
193		-
194		~~- return date_min~~
195		-
	224	+ INPUT:
196	225
197		~~- """~~
	226	+ time_obj - datetime object
	227	+ format - the format of the returned timestamp strings
	228	+ resolution - the resolution detail of the timestamp (e.g. down to the minute, down to the hour, ...)
198	229
199		~~- Given a datetime object produce a timestamp a number of hours in the past and according to a particular format~~
	230	+
	231	+ RETURN:
	232	+ start_time - formatted datetime string
	233	+ end_time - formatted datetime string
	234	+
	235	+"""
	236	+def timestamp_from_obj(time_obj, format, resolution):
	237	+
	238	+ if time_obj.month < 10:
	239	+ month = '0' + str(time_obj.month)
	240	+ else:
	241	+ month = str(time_obj.month)
	242	+
	243	+ if time_obj.day < 10:
	244	+ day = '0' + str(time_obj.day)
	245	+ else:
	246	+ day = str(time_obj.day)
	247	+
	248	+ if time_obj.hour < 10:
	249	+ hour = '0' + str(time_obj.hour)
	250	+ else:
	251	+ hour = str(time_obj.hour)
200	252
201		~~- format 1 - 20080101000606~~
202		~~- format 2 - 2008-01-01 00:06:06~~
	253	+ if time_obj.minute < 10:
	254	+ minute = '0' + str(time_obj.minute)
	255	+ else:
	256	+ minute = str(time_obj.minute)
203	257
204		~~- INPUT:~~
	258	+ if time_obj.second < 10:
	259	+ second = '0' + str(time_obj.second)
	260	+ else:
	261	+ second = str(time_obj.second)
205	262
206		~~- now - datetime object~~
207		~~- hours_back - the amount of time the~~
208		~~- format - the format of the returned timestamp strings~~
209		~~- resolution - the resolution detail of the timestamp (e.g. down to the minute, down to the hour, ...)~~
	263	+ # Cast the start and end time strings in the proper format
	264	+ if format == 1:
210	265
	266	+ if resolution == 0:
	267	+ timestamp = str(time_obj.year) + month + day + '000000'
	268	+ elif resolution == 1:
	269	+ timestamp = str(time_obj.year) + month + day + hour + '0000'
	270	+ elif resolution == 2:
	271	+ timestamp = str(time_obj.year) + month + day + hour + minute + '00'
	272	+ elif resolution == 3:
	273	+ timestamp = str(time_obj.year) + month + day + hour + minute + second
	274	+
	275	+ elif format == 2:
211	276
212		~~- RETURN:~~
213		~~- start_time - formatted datetime string~~
214		~~- end_time - formatted datetime string~~
215		-
216		~~- """~~
217		~~- def gen_date_strings(self, time_ref, hours_back, format, resolution):~~
218		-
219		~~- delta = datetime.timedelta(hours=-hours_back)~~
	277	+ if resolution == 0:
	278	+ timestamp = str(time_obj.year) + '-' + month + '-' + day + ' ' + '00:00:00'
	279	+ elif resolution == 1:
	280	+ timestamp = str(time_obj.year) + '-' + month + '-' + day + ' ' + hour + ':00:00'
	281	+ elif resolution == 2:
	282	+ timestamp = str(time_obj.year) + '-' + month + '-' + day + ' ' + hour + ':' + minute + ':00'
	283	+ elif resolution == 3:
	284	+ timestamp = str(time_obj.year) + '-' + month + '-' + day + ' ' + hour + ':' + minute + ':' + second
	285	+
	286	+ return timestamp
220	287
221		~~- time_obj = time_ref + delta~~
222		~~- time_ref = time_ref + datetime.timedelta(hours=-1) # Move an hour back to terminate at 55 minute~~
223		-
224		~~- # Cast the start and end time strings in the proper format~~
225		~~- start_time = self.timestamp_from_obj(time_obj, format, resolution)~~
226		~~- end_time = self.timestamp_from_obj(time_ref, format, resolution)~~
227	288
228		~~- return [start_time, end_time]~~
	289	+"""
	290	+
	291	+ Convert timestamp to a datetime object of a given format
229	292
	293	+ INPUT:
230	294
	295	+ timestamp - timestamp string
	296	+ format - the format of the returned timestamp strings
231	297
232		~~- """~~
233	298
234		~~- Convert datetime objects to a timestamp of a given format. HELPER METHOD for gen_date_strings.~~
235		-
236		~~- INPUT:~~
237		-
238		~~- time_obj - datetime object~~
239		~~- format - the format of the returned timestamp strings~~
240		~~- resolution - the resolution detail of the timestamp (e.g. down to the minute, down to the hour, ...)~~
241		-
242		-
243		~~- RETURN:~~
244		~~- start_time - formatted datetime string~~
245		~~- end_time - formatted datetime string~~
	299	+ RETURN:
	300	+ time_obj - datetime conversion of timestamp string
	301	+
	302	+"""
	303	+def timestamp_to_obj(timestamp, format):
246	304
247		~~- """~~
248		~~- def timestamp_from_obj(self, time_obj, format, resolution):~~
	305	+ if format == 1:
	306	+ time_obj = datetime.datetime(int(timestamp[0:4]), int(timestamp[4:6]), int(timestamp[6:8]), \
	307	+ int(timestamp[8:10]), int(timestamp[10:12]), int(timestamp[12:14]))
249	308
250		~~- if time_obj.month < 10:~~
251		~~- month = '0' + str(time_obj.month)~~
252		~~- else:~~
253		~~- month = str(time_obj.month)~~
	309	+ elif format == 2:
	310	+ time_obj = datetime.datetime(int(timestamp[0:4]), int(timestamp[5:7]), int(timestamp[8:10]), \
	311	+ int(timestamp[11:13]), int(timestamp[14:16]), int(timestamp[17:19]))
254	312
255		~~- if time_obj.day < 10:~~
256		~~- day = '0' + str(time_obj.day)~~
257		~~- else:~~
258		~~- day = str(time_obj.day)~~
	313	+ return time_obj
259	314
260		~~- if time_obj.hour < 10:~~
261		~~- hour = '0' + str(time_obj.hour)~~
262		~~- else:~~
263		~~- hour = str(time_obj.hour)~~
264		-
265		~~- if time_obj.minute < 10:~~
266		~~- minute = '0' + str(time_obj.minute)~~
267		~~- else:~~
268		~~- minute = str(time_obj.minute)~~
269		-
270		~~- if time_obj.second < 10:~~
271		~~- second = '0' + str(time_obj.second)~~
272		~~- else:~~
273		~~- second = str(time_obj.second)~~
274		-
275		~~- # Cast the start and end time strings in the proper format~~
276		~~- if format == 1:~~
277		-
278		~~- if resolution == 0:~~
279		~~- timestamp = str(time_obj.year) + month + day + '000000'~~
280		~~- elif resolution == 1:~~
281		~~- timestamp = str(time_obj.year) + month + day + hour + '0000'~~
282		~~- elif resolution == 2:~~
283		~~- timestamp = str(time_obj.year) + month + day + hour + minute + '00'~~
284		~~- elif resolution == 3:~~
285		~~- timestamp = str(time_obj.year) + month + day + hour + minute + second~~
286		-
287		~~- elif format == 2:~~
288		-
289		~~- if resolution == 0:~~
290		~~- timestamp = str(time_obj.year) + '-' + month + '-' + day + ' ' + '00:00:00'~~
291		~~- elif resolution == 1:~~
292		~~- timestamp = str(time_obj.year) + '-' + month + '-' + day + ' ' + hour + ':00:00'~~
293		~~- elif resolution == 2:~~
294		~~- timestamp = str(time_obj.year) + '-' + month + '-' + day + ' ' + hour + ':' + minute + ':00'~~
295		~~- elif resolution == 3:~~
296		~~- timestamp = str(time_obj.year) + '-' + month + '-' + day + ' ' + hour + ':' + minute + ':' + second~~
297		-
298		~~- return timestamp~~
	315	+
	316	+"""
	317	+
	318	+ Inserts missing interval points into the time and metric lists
299	319
	320	+ Assumptions:
	321	+ _metrics_ and _times_ are lists of the same length
	322	+ there must be a data point at each interval
	323	+ Some data points may be missed
	324	+ where there is no metric data the metric takes on the value 0.0
300	325
301		~~- """~~
	326	+ e.g. when _interval_ = 10
	327	+ times = [0 10 30 50], metrics = [1 1 1 1] ==> [0 10 30 40 50], [1 1 0 1 0 1]
302	328
303		~~- Convert timestamp to a datetime object of a given format~~
304		-
305		~~- INPUT:~~
306		-
307		~~- timestamp - timestamp string~~
308		~~- format - the format of the returned timestamp strings~~
309		-
310		-
311		~~- RETURN:~~
312		~~- time_obj - datetime conversion of timestamp string~~
313		-
314		~~- """~~
315		~~- def timestamp_to_obj(self, timestamp, format):~~
316		-
317		~~- if format == 1:~~
318		~~- time_obj = datetime.datetime(int(timestamp[0:4]), int(timestamp[4:6]), int(timestamp[6:8]), \~~
319		~~- int(timestamp[8:10]), int(timestamp[10:12]), int(timestamp[12:14]))~~
320		-
321		~~- elif format == 2:~~
322		~~- time_obj = datetime.datetime(int(timestamp[0:4]), int(timestamp[5:7]), int(timestamp[8:10]), \~~
323		~~- int(timestamp[11:13]), int(timestamp[14:16]), int(timestamp[17:19]))~~
	329	+ INPUT:
324	330
325		~~- return time_obj~~
	331	+ times -
	332	+ metrics -
	333	+ interval -
326	334
	335	+ RETURN:
	336	+ new_times -
	337	+ new_metrics -
327	338
328		~~- """~~
	339	+"""
	340	+def normalize_intervals(times, metrics, interval):
329	341
330		~~- Inserts missing interval points into the time and metric lists~~
	342	+ current_time = 0.0
	343	+ index = 0
	344	+ iterations = 0
	345	+ max_elems = math.ceil((times[-1] - times[0]) / interval) # there should be no more elements in the list than this
	346	+
	347	+ new_times = list()
	348	+ new_metrics = list()
	349	+
	350	+ """ Iterate through the time list """
	351	+ while index < len(times):
331	352
332		~~- Assumptions:~~
333		~~- _metrics_ and _times_ are lists of the same length~~
334		~~- there must be a data point at each interval~~
335		~~- Some data points may be missed~~
336		~~- where there is no metric data the metric takes on the value 0.0~~
	353	+ """ TEMPORARY SOLUTION: break out of the loop if more than the maximum number of elements is reached """
	354	+ if iterations > max_elems:
	355	+ break;
337	356
338		~~- e.g. when _interval_ = 10~~
339		~~- times = [0 10 30 50], metrics = [1 1 1 1] ==> [0 10 30 40 50], [1 1 0 1 0 1]~~
	357	+ new_times.append(current_time)
340	358
341		~~- INPUT:~~
	359	+ """ If the current time is not in the current list then add it and a metric value of 0.0
	360	+ otherwise add the existing elements to the new lists """
	361	+ if current_time != times[index]:
	362	+ new_metrics.append(0.0)
342	363
343		~~- times -~~
344		~~- metrics -~~
345		~~- interval -~~
	364	+ else:
	365	+ new_metrics.append(metrics[index])
	366	+ index = index + 1
346	367
347		~~- RETURN:~~
348		~~- new_times -~~
349		~~- new_metrics -~~
	368	+ current_time = current_time + interval
350	369
351		~~- """~~
352		~~- def normalize_intervals(self, times, metrics, interval):~~
	370	+ iterations = iterations + 1
353	371
354		~~- current_time = 0.0~~
355		~~- index = 0~~
356		~~- iterations = 0~~
357		~~- max_elems = math.ceil((times[-1] - times[0]) / interval) # there should be no more elements in the list than this~~
	372	+ return [new_times, new_metrics]
	373	+
	374	+"""
	375	+
	376	+ Converts from one timestamp format to another timestamp format
358	377
359		~~- new_times = list()~~
360		~~- new_metrics = list()~~
	378	+ format 1 - 20080101000606
	379	+ format 2 - 2008-01-01 00:06:06
361	380
362		~~- """ Iterate through the time list """~~
363		~~- while index < len(times):~~
364		-
365		~~- """ TEMPORARY SOLUTION: break out of the loop if more than the maximum number of elements is reached """~~
366		~~- if iterations > max_elems:~~
367		~~- break;~~
368		-
369		~~- new_times.append(current_time)~~
370		-
371		~~- """ If the current time is not in the current list then add it and a metric value of 0.0~~
372		~~- otherwise add the existing elements to the new lists """~~
373		~~- if current_time != times[index]:~~
374		~~- new_metrics.append(0.0)~~
375		-
376		~~- else:~~
377		~~- new_metrics.append(metrics[index])~~
378		~~- index = index + 1~~
379		-
380		~~- current_time = current_time + interval~~
381		-
382		~~- iterations = iterations + 1~~
383		-
384		~~- return [new_times, new_metrics]~~
	381	+ INPUT:
	382	+
	383	+ ts - timestamp string
	384	+ format_from - input format
	385	+ format_to - output format
	386	+
	387	+ RETURN:
	388	+
	389	+ new_timestamp - new timestamp string
	390	+
385	391
386		~~- """~~
	392	+"""
	393	+def timestamp_convert_format(ts, format_from, format_to):
387	394
388		~~- Converts from one timestamp format to another timestamp format~~
389		-
390		~~- format 1 - 20080101000606~~
391		~~- format 2 - 2008-01-01 00:06:06~~
392		-
393		~~- INPUT:~~
	395	+ if format_from == 1:
394	396
395		~~- ts - timestamp string~~
396		~~- format_from - input format~~
397		~~- format_to - output format~~
398		-
399		~~- RETURN:~~
400		-
401		~~- new_timestamp - new timestamp string~~
402		-
403		-
404		~~- """~~
405		~~- def timestamp_convert_format(self, ts, format_from, format_to):~~
406		-
407		~~- if format_from == 1:~~
	397	+ if format_to == 2:
	398	+ new_timestamp = ts[0:4] + '-' + ts[4:6] + '-' + ts[6:8] + ' ' + ts[8:10] + ':' + ts[10:12] + ':' + ts[12:14]
408	399
409		~~- if format_to == 2:~~
410		~~- new_timestamp = ts[0:4] + '-' + ts[4:6] + '-' + ts[6:8] + ' ' + ts[8:10] + ':' + ts[10:12] + ':' + ts[12:14]~~
411		-
412		~~- elif format_from == 2:~~
413		~~- if format_to == 1:~~
414		~~- new_timestamp = ts[0:4] + ts[5:7] + ts[8:10] + ts[11:13] + ts[14:16] + ts[15:17]~~
415		-
416		~~- return new_timestamp~~
417		-
	400	+ elif format_from == 2:
	401	+ if format_to == 1:
	402	+ new_timestamp = ts[0:4] + ts[5:7] + ts[8:10] + ts[11:13] + ts[14:16] + ts[15:17]
	403	+
	404	+ return new_timestamp
	405	+

Status & tagging log

13:13, 11 April 2011 Reedy (talk | contribs) changed the status of r85742 [removed: new added: deferred]