r85742 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r85741‎ | r85742 | r85743 >
Date:03:30, 10 April 2011
Author:rfaulk
Status:deferred
Tags:
Comment:
Major re-factoring. Redefined query_store (QueryData) and TimeProcessor to be singleton by implementing as a set of module methods. fundraiser_reporting was renamed DataReporting and decoupled from performing queries - this functionality has been moved into DataLoader in an effort to decouple reporting and data loading functionality.
Modified paths:
  • /trunk/fundraiser-statistics/fundraiser-scripts/classes/20101230JA091_US+banner+imp.png (added) (history)
  • /trunk/fundraiser-statistics/fundraiser-scripts/classes/DataLoader.py (modified) (history)
  • /trunk/fundraiser-statistics/fundraiser-scripts/classes/DataReporting.py (added) (history)
  • /trunk/fundraiser-statistics/fundraiser-scripts/classes/QueryData.py (added) (history)
  • /trunk/fundraiser-statistics/fundraiser-scripts/classes/TimestampProcessor.py (modified) (history)
  • /trunk/fundraiser-statistics/fundraiser-scripts/classes/compute_confidence.py (modified) (history)
  • /trunk/fundraiser-statistics/fundraiser-scripts/classes/fundraiser_reporting.py (deleted) (history)
  • /trunk/fundraiser-statistics/fundraiser-scripts/classes/query_store.py (deleted) (history)

Diff [purge]

Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/fundraiser_reporting.py
@@ -1,1093 +0,0 @@
2 -
3 -
4 -"""
5 -
6 -Pulls data from storage3.faulkner and generates plots.
7 -
8 -
9 -"""
10 -
11 -__author__ = "Ryan Faulkner"
12 -__revision__ = "$Rev$"
13 -__date__ = "December 16th, 2010"
14 -
15 -
16 -import sys
17 -sys.path.append('../')
18 -
19 -import matplotlib
20 -import datetime
21 -import MySQLdb
22 -import pylab
23 -import HTML
24 -import math
25 -
26 -import query_store as qs
27 -import miner_help as mh
28 -import TimestampProcessor as TP
29 -import DataLoader as DL
30 -
31 -matplotlib.use('Agg')
32 -
33 -
34 -
35 -"""
36 -
37 - CLASS :: FundraiserReporting
38 -
39 - Base class for reporting fundraiser analytics. Methods that are intended to be extended in derived classes include:
40 -
41 - METHODS:
42 -
43 - run_query - format and execute the query to obtain data
44 - gen_plot - plots the results of the report
45 - write_to_html_table - writes the results to an HTML table
46 - run
47 -
48 -"""
49 -class FundraiserReporting(TP.TimestampProcesser, DL.DataLoader):
50 -
51 - _sql_path_ = '../sql/' # Relative path for SQL files to be processed
52 -
53 - """
54 -
55 - Smooths a list of values
56 -
57 - INPUT:
58 - values - a list of datetime objects
59 - window_length - indicate whether the list counts back from the end
60 -
61 - RETURN:
62 - new_values - list of smoothed values
63 -
64 - """
65 - def smooth(self, values, window_length):
66 -
67 - window_length = int(math.floor(window_length / 2))
68 -
69 - if window_length < 1:
70 - return values
71 -
72 - list_len = len(values)
73 - new_values = list()
74 -
75 - for i in range(list_len):
76 - index_left = max([0, i - window_length])
77 - index_right = min([list_len - 1, i + window_length])
78 -
79 - width = index_right - index_left + 1
80 -
81 - new_val = sum(values[index_left : (index_right + 1)]) / width
82 - new_values.append(new_val)
83 -
84 - return new_values
85 -
86 - """
87 -
88 - workaround for issue with tuple objects in HTML.py
89 - MySQLdb returns unfamiliar tuple elements from its fetchall() method
90 - this is probably a version problem since the issue popped up in 2.5 but not 2.6
91 -
92 - INPUT:
93 - row - row object returned from MySQLdb.fetchall()
94 -
95 - RETURN:
96 - l - a list of tuple objects from the db
97 -
98 - """
99 - def listify(self, row):
100 - l = []
101 - for i in row:
102 - l.append(i)
103 - return l
104 -
105 -
106 - """
107 -
108 - To be overloaded by subclasses for specific types of queries
109 -
110 - INPUT:
111 - values - a list of datetime objects
112 - window_length - indicate whether the list counts back from the end
113 -
114 - RETURN:
115 - return_status - integer, 0 indicates un-exceptional execution
116 -
117 - """
118 - def run_query(self, start_time, end_time, query_name, metric_name):
119 - return 0
120 -
121 -
122 - """
123 -
124 - To be overloaded by subclasses for different plotting behaviour
125 -
126 - INPUT:
127 - values - a list of datetime objects
128 - window_length - indicate whether the list counts back from the end
129 -
130 - RETURN:
131 - return_status - integer, 0 indicates un-exceptional execution
132 -
133 - """
134 - def gen_plot(self,x, y_lists, labels, title, xlabel, ylabel, subplot_index, fname):
135 - return 0
136 -
137 - """
138 -
139 - To be overloaded by subclasses for writing tables - this functionality currently exists outside of this class structure (test_reporting.py)
140 -
141 - INPUT:
142 - values - a list of datetime objects
143 - window_length - indicate whether the list counts back from the end
144 -
145 - RETURN:
146 - return_status - integer, 0 indicates un-exceptional execution
147 -
148 - """
149 - def write_to_html_table(self):
150 -
151 - """
152 - FROM TEST REPORTING
153 -
154 - query_obj = qs.query_store()
155 -
156 - # Populate the campaigns table
157 - s1 = 'drop table if exists campaigns;'
158 - s2 = 'create table campaigns as (select utm_campaign from drupal.contribution_tracking where ts > \'%s\' group by utm_campaign having count(*) > 100);' % (start_time)
159 - cur.execute(s1)
160 - cur.execute(s2)
161 -
162 - table_data = []
163 - sql_stmnt = mh.read_sql(sql_path + query_type + '.sql');
164 -
165 - # open the html file for writing
166 - f = open(html_path + query_type + '.html', 'w')
167 -
168 - format_start_time = start_time[0:4] + '-' + start_time[4:6] + '-' + start_time[6:8] + '-' + start_time[8:10] + 'HRs'
169 -
170 - # Formats the statement according to query type
171 - select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time])
172 -
173 - # html formatting
174 - if query_type == 'report_campaign_ecomm':
175 - f.write('<br>Donation data since ' + format_start_time + ' ... <br><br>')
176 - select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time])
177 -
178 - elif query_type == 'report_campaign_logs':
179 - f.write('<br>Impression and landing page data since ' + format_start_time+ ' ... <br><br>')
180 - select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time])
181 -
182 - elif query_type == 'report_campaign_ecomm_by_hr':
183 - f.write('<br>Donation data by hour since ' + format_start_time + ' ... <br><br>')
184 - select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time])
185 -
186 - elif query_type == 'report_campaign_logs_by_hr':
187 - f.write('<br>Impression and landing page by hour since ' + format_start_time + ' ... <br><br>')
188 - select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time])
189 -
190 - else:
191 - select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time])
192 -
193 - try:
194 - err_msg = select_stmnt
195 - cur.execute(select_stmnt)
196 -
197 - results = cur.fetchall()
198 -
199 - for row in results:
200 - cpRow = listify(row)
201 - # t.rows.append(row)
202 - table_data.append(cpRow)
203 -
204 - except:
205 - db.rollback()
206 - sys.exit("Database Interface Exception:\n" + err_msg)
207 -
208 -
209 - t = HTML.table(table_data, header_row=header)
210 - htmlcode = str(t)
211 -
212 - f.write(htmlcode)
213 - f.close()
214 -
215 - return htmlcode
216 -
217 - """
218 - return 0
219 -
220 -
221 -
222 - """
223 -
224 - The access point of FundraiserReporting and derived objects. Will be used for executing and orchestrating the creation of plots, tables etc.
225 - To be overloaded by subclasses
226 -
227 - INPUT:
228 -
229 - RETURN:
230 - return_status - integer, 0 indicates un-exceptional execution
231 -
232 - """
233 - def run(self):
234 - return
235 -
236 -
237 -
238 -"""
239 -
240 -CLASS :: ^TotalAmountsReporting^
241 -
242 -This subclass handles reporting on total amounts for the fundraiser.
243 -
244 -"""
245 -
246 -class TotalAmountsReporting(FundraiserReporting):
247 -
248 - def __init__(self):
249 - self.data = []
250 -
251 - def run_query(self, start_time, end_time, query_name, descriptor):
252 -
253 - self.init_db()
254 -
255 - query_obj = qs.QueryStore()
256 -
257 - # Load the SQL File & Format
258 - filename = self._sql_path_ + query_name + '.sql'
259 - sql_stmnt = mh.read_sql(filename)
260 - sql_stmnt = query_obj.format_query(query_name + descriptor, sql_stmnt, [start_time, end_time])
261 -
262 - labels = [None] * 21
263 - labels[0] = 'clicks'
264 - labels[1] = 'donations'
265 - labels[2] = 'total amount'
266 - labels[3] = 'banner amount'
267 - labels[4] = 'US amount'
268 - labels[5] = 'EN amount'
269 - labels[6] = 'Other Amount'
270 - labels[7] = 'Email Amount'
271 - labels[8] = 'Recurring Guess'
272 - labels[9] = 'completion_rate'
273 - labels[10] = 'pp_clicks'
274 - labels[11] = 'pp_donations'
275 - labels[12] = 'pp_completion'
276 - labels[13] = 'pp_amount'
277 - labels[14] = 'pp_max_amount'
278 - labels[15] = 'cc_clicks'
279 - labels[16] = 'cc_donations'
280 - labels[17] = 'cc_completion'
281 - labels[18] = 'cc_amount'
282 - labels[19] = 'cc_max_amount'
283 - labels[20] = 'total_amt50'
284 -
285 -
286 - num_keys = len(labels)
287 -
288 - lists = list()
289 - for i in range(num_keys):
290 - lists.append(list())
291 -
292 - # Composes the data for each banner
293 - try:
294 - err_msg = sql_stmnt
295 - self.cur.execute(sql_stmnt)
296 -
297 - # This query store records according to dates
298 - results = self.cur.fetchall()
299 - for row in results:
300 - for i in range(num_keys):
301 - lists[i].append(row[i+1])
302 -
303 - except:
304 - self.db.rollback()
305 - sys.exit("Database Interface Exception:\n" + err_msg)
306 -
307 - self.close_db()
308 -
309 - # Only interested in amounts
310 - return [labels, lists]
311 -
312 -
313 -
314 - def gen_plot(self,x, y_lists, labels, title, xlabel, ylabel, ranges, subplot_index, fname):
315 - pylab.subplot(subplot_index)
316 - num_keys = len(y_lists)
317 -
318 - pylab.figure(num=None,figsize=[26,14])
319 - line_types = ['b-o','g-o','r-o','c-o','m-o','k-o','b--o','g--o','r--o','c--o','m--o','k--o']
320 -
321 - for i in range(num_keys):
322 - pylab.plot(x, y_lists[i], line_types[i])
323 -
324 - pylab.grid()
325 - pylab.xlim(ranges[0], ranges[1])
326 - pylab.legend(labels,loc=2)
327 -
328 - pylab.xlabel(xlabel)
329 - pylab.ylabel(ylabel)
330 -
331 - pylab.title(title)
332 - pylab.savefig(fname+'.png', format='png')
333 -
334 -
335 -
336 - def run_hr(self, type):
337 -
338 -
339 - # Current date & time
340 - now = datetime.datetime.now()
341 - #UTC = 8
342 - #delta = datetime.timedelta(hours=UTC)
343 - #now = now + delta
344 -
345 -
346 - """ ESTABLISH THE START TIME TO PULL ANALYTICS - TS format=1, TS resolution=1 """
347 - hours_back = 24
348 - times = self.gen_date_strings(now, hours_back,1,1)
349 -
350 - start_time = times[0]
351 - end_time = times[1]
352 -
353 - print '\nGenerating analytics total amount for ' + str(hours_back) + ' hours back. The start and end times are: ' + start_time + ' - ' + end_time +' ... \n'
354 -
355 - # QUERY NAME
356 - query_name = 'report_total_amounts'
357 -
358 -
359 - # RUN BY HOUR
360 - descriptor = '_by_hr'
361 - return_val = self.run_query(start_time, end_time, query_name, descriptor)
362 -
363 - labels = return_val[0] # curve labels
364 - counts = return_val[1] # curve data - lists
365 -
366 - r = self.get_query_fields(labels, counts, type, start_time, end_time)
367 - labels = r[0]
368 - counts = r[1]
369 - title = r[2]
370 - ylabel = r[3]
371 -
372 - xlabel = 'Time - Hours'
373 - subplot_index = 111
374 -
375 - # plot the curves
376 - time_range = range(len(counts[0]))
377 - for i in range(len(counts[0])):
378 - time_range[i] = time_range[i] - len(counts[0])
379 -
380 - ranges = [min(time_range), max(time_range)]
381 -
382 - fname = query_name + descriptor + '_' + type
383 - self.gen_plot(time_range, counts, labels, title, xlabel, ylabel, ranges, subplot_index, fname)
384 -
385 -
386 -
387 - def run_day(self,type):
388 -
389 - # Current date & time
390 - now = datetime.datetime.now()
391 - #UTC = 8
392 - #delta = datetime.timedelta(hours=UTC)
393 - #now = now + delta
394 -
395 -
396 - """ ESTABLISH THE START TIME TO PULL ANALYTICS - TS format=1, TS resolution=0 """
397 - hours_back = 7 * 24 # 7 days back
398 - times = self.gen_date_strings(now, hours_back,1,0)
399 -
400 - start_time = times[0]
401 - end_time = times[1]
402 -
403 - print '\nGenerating analytics total amount for ' + str(days_back) + ' days back. The start and end times are: ' + start_time + ' - ' + end_time +' ... \n'
404 -
405 -
406 - # FORMAT HEADERS & QUERY NAME
407 - query_name = 'report_total_amounts'
408 - descriptor = '_by_day'
409 - return_val = self.run_query(start_time, end_time, query_name, descriptor)
410 -
411 - labels = return_val[0]
412 - counts = return_val[1]
413 -
414 - r = self.get_query_fields(labels, counts, type, start_time, end_time)
415 - labels = r[0]
416 - counts = r[1]
417 - title = r[2]
418 - ylabel = r[3]
419 -
420 - xlabel = 'Time - Days'
421 - subplot_index = 111
422 -
423 - # Plot values
424 - time_range = range(len(counts[0]))
425 - for i in range(len(counts[0])):
426 - time_range[i] = time_range[i] - len(counts[0])
427 -
428 - ranges = [min(time_range), max(time_range)]
429 -
430 - fname = query_name + descriptor + '_' + type
431 - self.gen_plot(time_range, counts, labels, title, xlabel, ylabel, ranges, subplot_index, fname)
432 -
433 -
434 - def get_query_fields(self, labels, counts, type, start_time, end_time):
435 -
436 - if type == 'BAN_EM':
437 - indices = range(2,9)
438 - title = 'Total Amounts: ' + start_time + ' -- ' + end_time
439 - ylabel = 'Amount'
440 - elif type == 'CC_PP_completion':
441 - indices = [12,17]
442 - title = 'Credit Card & Paypal Completion Rates: ' + start_time + ' -- ' + end_time
443 - ylabel = 'Rate'
444 - elif type == 'CC_PP_amount':
445 - indices = [13,18]
446 - title = 'Credit Card & Paypal Total Amounts: ' + start_time + ' -- ' + end_time
447 - ylabel = 'Amount'
448 - elif type == 'AMT_VS_AMT50':
449 - indices = [2,20]
450 - title = 'Amount50 and Amount Totals: ' + start_time + ' -- ' + end_time
451 - ylabel = 'Amount'
452 - else:
453 - sys.exit("Total Amounts: You must enter a valid report type.\n" )
454 -
455 - # Exract relevant labels and values
456 - labels_temp = list()
457 - counts_temp = list()
458 -
459 - for i in range(len(labels)):
460 - if i in indices:
461 - labels_temp.append(labels[i])
462 - counts_temp.append(counts[i])
463 -
464 - return [labels_temp, counts_temp, title, ylabel]
465 -
466 -
467 -"""
468 -
469 -CLASS :: ^BannerLPReporting^
470 -
471 -This subclass handles reporting on banners and landing pages for the fundraiser.
472 -
473 -"""
474 -
475 -class BannerLPReporting(FundraiserReporting):
476 -
477 -
478 - def __init__(self, *args):
479 -
480 - if len(args) == 2:
481 - self.campaign = args[0]
482 - self.start_time = args[1]
483 - else:
484 - self.campaign = None
485 - self.start_time = None
486 -
487 - def run_query(self,start_time, end_time, campaign, query_name, metric_name):
488 -
489 - self.init_db()
490 -
491 - query_obj = qs.QueryStore()
492 -
493 - metric_lists = mh.AutoVivification()
494 - time_lists = mh.AutoVivification()
495 - # table_data = [] # store the results in a table for reporting
496 -
497 - # Load the SQL File & Format
498 - filename = self._sql_path_ + query_name + '.sql'
499 - sql_stmnt = mh.read_sql(filename)
500 -
501 - query_name = 'report_bannerLP_metrics' # rename query to work with query store
502 - sql_stmnt = query_obj.format_query(query_name, sql_stmnt, [start_time, end_time, campaign])
503 -
504 - key_index = query_obj.get_banner_index(query_name)
505 - time_index = query_obj.get_time_index(query_name)
506 - metric_index = query_obj.get_metric_index(query_name, metric_name)
507 -
508 - # Composes the data for each banner
509 - try:
510 - err_msg = sql_stmnt
511 - self.cur.execute(sql_stmnt)
512 -
513 - results = self.cur.fetchall()
514 -
515 - # Compile Table Data
516 - # cpRow = self.listify(row)
517 - # table_data.append(cpRow)
518 -
519 - for row in results:
520 -
521 - key_name = row[key_index]
522 -
523 - try:
524 - metric_lists[key_name].append(row[metric_index])
525 - time_lists[key_name].append(row[time_index])
526 - except:
527 - metric_lists[key_name] = list()
528 - time_lists[key_name] = list()
529 -
530 - metric_lists[key_name].append(row[metric_index])
531 - time_lists[key_name].append(row[time_index])
532 -
533 - except:
534 - self.db.rollback()
535 - sys.exit("Database Interface Exception:\n" + err_msg)
536 -
537 - """ Convert Times to Integers """
538 - # Find the earliest date
539 - max_i = 0
540 -
541 - for key in time_lists.keys():
542 - for date_str in time_lists[key]:
543 - day_int = int(date_str[8:10])
544 - hr_int = int(date_str[11:13])
545 - date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13])
546 - if date_int > max_i:
547 - max_i = date_int
548 - max_day = day_int
549 - max_hr = hr_int
550 -
551 -
552 - # Normalize dates
553 - time_norm = mh.AutoVivification()
554 - for key in time_lists.keys():
555 - for date_str in time_lists[key]:
556 - day = int(date_str[8:10])
557 - hr = int(date_str[11:13])
558 - # date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13])
559 - elem = (day - max_day) * 24 + (hr - max_hr)
560 - try:
561 - time_norm[key].append(elem)
562 - except:
563 - time_norm[key] = list()
564 - time_norm[key].append(elem)
565 -
566 - # smooth out the values
567 - #window_length = 20
568 - #for banner in metric_lists.keys():
569 - # metric_lists[banner] = smooth(metric_lists[banner], window_length)
570 -
571 - self.close_db()
572 -
573 - # return [metric_lists, time_norm, table_data]
574 - return [metric_lists, time_norm]
575 -
576 -
577 - def gen_plot(self,counts, times, title, xlabel, ylabel, ranges, subplot_index, fname):
578 - pylab.subplot(subplot_index)
579 - pylab.figure(num=None,figsize=[26,14])
580 - count_keys = counts.keys()
581 -
582 - line_types = ['b-o','g-o','r-o','c-o','m-o','k-o','y-o','b--d','g--d','r--d','c--d','m--d','k--d','y--d','b-.s','g-.s','r-.s','c-.s','m-.s','k-.s','y-.s']
583 -
584 - count = 0
585 - for key in counts.keys():
586 - pylab.plot(times[key], counts[key], line_types[count])
587 - count = count + 1
588 -
589 - pylab.grid()
590 - pylab.xlim(ranges[0], ranges[1])
591 - pylab.legend(count_keys,loc=2)
592 -
593 - pylab.xlabel(xlabel)
594 - pylab.ylabel(ylabel)
595 -
596 - pylab.title(title)
597 - pylab.savefig(fname, format='png')
598 -
599 -
600 - """
601 -
602 - type = 'LP' || 'BAN' || 'BAN-TEST' || 'LP-TEST'
603 -
604 - """
605 - def run(self, type, metric_name):
606 -
607 - # Current date & time
608 - now = datetime.datetime.now()
609 - #UTC = 8
610 - #delta = datetime.timedelta(hours=UTC)
611 - #now = now + delta
612 -
613 - """ ESTABLISH THE START TIME TO PULL ANALYTICS - TS format=1, TS resolution=1 """
614 - hours_back = 24
615 - times = self.gen_date_strings(now, hours_back,1,1)
616 -
617 - start_time = times[0]
618 - end_time = times[1]
619 -
620 - print '\nGenerating ' + type +' for ' + str(hours_back) + ' hours back. The start and end times are: ' + start_time + ' - ' + end_time +' ... \n'
621 -
622 - if type == 'LP':
623 - query_name = 'report_LP_metrics'
624 -
625 - # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign
626 - if self.campaign == None:
627 - campaign = '[0-9](JA|SA|EA|TY)[0-9]'
628 - else:
629 - campaign = self.campaign
630 -
631 - title = metric_name + ': ' + start_time + ' -- ' + end_time
632 - fname = query_name + '_' + metric_name + '.png'
633 - elif type == 'BAN':
634 - query_name = 'report_banner_metrics'
635 -
636 - # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign
637 - if self.campaign == None:
638 - campaign = '[0-9](JA|SA|EA|TY)[0-9]'
639 - else:
640 - campaign = self.campaign
641 -
642 - title = metric_name + ': ' + start_time + ' -- ' + end_time
643 - fname = query_name + '_' + metric_name + '.png'
644 - elif type == 'BAN-TEST':
645 - r = self.get_latest_campaign()
646 - query_name = 'report_banner_metrics'
647 -
648 - # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign
649 - if self.campaign == None:
650 - campaign = r[0]
651 - start_time = r[1]
652 - else:
653 - campaign = self.campaign
654 - start_time = self.start_time
655 -
656 - title = metric_name + ': ' + start_time + ' -- ' + end_time + ', CAMPAIGN =' + campaign
657 - fname = query_name + '_' + metric_name + '_latest' + '.png'
658 - elif type == 'LP-TEST':
659 - r = self.get_latest_campaign()
660 - query_name = 'report_LP_metrics'
661 -
662 - # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign
663 - if self.campaign == None:
664 - campaign = r[0]
665 - start_time = r[1]
666 - else:
667 - campaign = self.campaign
668 - start_time = self.start_time
669 -
670 - title = metric_name + ': ' + start_time + ' -- ' + end_time + ', CAMPAIGN =' + campaign
671 - fname = query_name + '_' + metric_name + '_latest' + '.png'
672 - else:
673 - sys.exit("Invalid type name - must be 'LP' or 'BAN'.")
674 -
675 - return_val = self.run_query(start_time, end_time, campaign, query_name, metric_name)
676 - metrics = return_val[0]
677 - times = return_val[1]
678 -
679 - # title = metric_name + ': ' + start_time + ' -- ' + end_time
680 - xlabel = 'Time - Hours'
681 - ylabel = metric_name
682 - subplot_index = 111
683 -
684 - min_time = 99
685 - for key in times.keys():
686 - min_elem = min(times[key])
687 - if min_elem < min_time:
688 - min_time = min_elem
689 -
690 - ranges = [min_time, 0]
691 -
692 - self.gen_plot(metrics, times, title, xlabel, ylabel, ranges, subplot_index, fname)
693 -
694 - return [metrics, times]
695 -
696 -
697 - def get_latest_campaign(self):
698 -
699 - query_name = 'report_latest_campaign'
700 - self.init_db()
701 -
702 - """ Look at campaigns over the past 24 hours - TS format=1, TS resolution=1 """
703 - now = datetime.datetime.now()
704 - hours_back = 72
705 - times = self.gen_date_strings(now, hours_back,1,1)
706 -
707 - query_obj = qs.QueryStore()
708 - sql_stmnt = mh.read_sql('./sql/report_latest_campaign.sql')
709 - sql_stmnt = query_obj.format_query(query_name, sql_stmnt, [times[0]])
710 -
711 - campaign_index = query_obj.get_campaign_index(query_name)
712 - time_index = query_obj.get_time_index(query_name)
713 -
714 - try:
715 - err_msg = sql_stmnt
716 - self.cur.execute(sql_stmnt)
717 -
718 - row = self.cur.fetchone()
719 - except:
720 - self.db.rollback()
721 - sys.exit("Database Interface Exception:\n" + err_msg)
722 -
723 - campaign = row[campaign_index]
724 - timestamp = row[time_index]
725 -
726 - self.close_db()
727 -
728 - return [campaign, timestamp]
729 -
730 - """
731 -
732 - Takes as input and converts it to a set of hours counting back from 0
733 -
734 - time_lists - a dictionary of timestamp lists
735 - time_norm - a dictionary of normalized times
736 -
737 - """
738 - def normalize_timestamps(self, time_lists):
739 - # Find the earliest date
740 - max_i = 0
741 -
742 - for key in time_lists.keys():
743 - for date_str in time_lists[key]:
744 - day_int = int(date_str[8:10])
745 - hr_int = int(date_str[11:13])
746 - date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13])
747 - if date_int > max_i:
748 - max_i = date_int
749 - max_day = day_int
750 - max_hr = hr_int
751 -
752 -
753 - # Normalize dates
754 - time_norm = mh.AutoVivification()
755 - for key in time_lists.keys():
756 - for date_str in time_lists[key]:
757 - day = int(date_str[8:10])
758 - hr = int(date_str[11:13])
759 - # date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13])
760 - elem = (day - max_day) * 24 + (hr - max_hr)
761 - try:
762 - time_norm[key].append(elem)
763 - except:
764 - time_norm[key] = list()
765 - time_norm[key].append(elem)
766 -
767 - return time_norm
768 -
769 -"""
770 -
771 -CLASS :: ^MinerReporting^
772 -
773 -This subclass handles reporting on raw values imported into the database.
774 -
775 -"""
776 -
777 -class MinerReporting(FundraiserReporting):
778 -
779 - def run_query(self, start_time, end_time, query_name):
780 -
781 - self.init_db()
782 -
783 - query_obj = qs.QueryStore()
784 -
785 - counts = list()
786 - times = list()
787 -
788 - # Load the SQL File & Format
789 - filename = self._sql_path_+ query_name + '.sql'
790 - sql_stmnt = mh.read_sql(filename)
791 -
792 - sql_stmnt = query_obj.format_query(query_name, sql_stmnt, [start_time, end_time])
793 - #print sql_stmnt
794 -
795 - # Get Indexes into Query
796 - count_index = query_obj.get_count_index(query_name)
797 - time_index = query_obj.get_time_index(query_name)
798 -
799 - # Composes the data for each banner
800 - try:
801 - err_msg = sql_stmnt
802 - self.cur.execute(sql_stmnt)
803 -
804 - results = self.cur.fetchall()
805 -
806 - for row in results:
807 - counts.append(row[count_index])
808 - times.append(row[time_index])
809 -
810 - except:
811 - self.db.rollback()
812 - sys.exit("Database Interface Exception:\n" + err_msg)
813 -
814 - """ Convert Times to Integers """
815 - time_norm = self.normalize_timestamps(times)
816 -
817 -
818 - self.close_db()
819 -
820 - return [counts, time_norm]
821 -
822 -
823 - # Create histograms for hourly counts
824 -
825 - def gen_plot(self,counts, times, title, xlabel, ylabel, ranges, subplot_index, fname):
826 -
827 - pylab.subplot(subplot_index)
828 - pylab.figure(num=None,figsize=[26,14])
829 -
830 - # pylab.plot(times, counts)
831 - # pylab.hist(counts, times)
832 - pylab.bar(times, counts, width=0.5)
833 -
834 - pylab.grid()
835 - pylab.xlim(ranges[0], ranges[1])
836 -
837 - pylab.xlabel(xlabel)
838 - pylab.ylabel(ylabel)
839 -
840 - pylab.title(title)
841 - pylab.savefig(fname, format='png')
842 -
843 - def run(self, query_name):
844 -
845 - query_obj = qs.QueryStore()
846 -
847 - # Current date & time
848 - now = datetime.datetime.now()
849 - #UTC = 8
850 - #delta = datetime.timedelta(hours=UTC)
851 - #now = now + delta
852 -
853 - """ ESTABLISH THE START TIME TO PULL ANALYTICS - TS format=1, TS resolution=1 """
854 - hours_back = 24
855 - times = self.gen_date_strings_hr(now, hours_back,1,1)
856 -
857 - start_time = times[0]
858 - end_time = times[1]
859 -
860 - print '\nGenerating ' + query_name +', start and end times are: ' + start_time + ' - ' + end_time +' ... \n'
861 -
862 - # Run Query
863 - return_val = self.run_query(start_time, end_time, query_name)
864 - counts = return_val[0]
865 - times = return_val[1]
866 -
867 - # Normalize times
868 - min_time = min(times)
869 - ranges = [min_time, 0]
870 -
871 - xlabel = 'Hours'
872 - subplot_index = 111
873 - fname = query_name + '.png'
874 -
875 - title = query_obj.get_plot_title(query_name)
876 - title = title + ' -- ' + start_time + ' - ' + end_time
877 - ylabel = query_obj.get_plot_ylabel(query_name)
878 -
879 - # Convert counts to float (from Decimal) to prevent exception when bar plotting
880 - # Bbox::update_numerix_xy expected numerix array
881 - counts_new = list()
882 - for i in range(len(counts)):
883 - counts_new.append(float(counts[i]))
884 - counts = counts_new
885 -
886 - # Generate Histogram
887 - self.gen_plot(counts, times, title, xlabel, ylabel, ranges, subplot_index, fname)
888 -
889 -
890 -"""
891 -
892 -CLASS :: IntervalReporting
893 -
894 -Performs queries that take timestamps, query, and an interval as arguments. Data for a single metric
895 -is generated for each time interval in the time period defined by the start and end timestamps.
896 -
897 -Types of queries supported:
898 -
899 -report_banner_metrics_minutely
900 -report_LP_metrics_minutely
901 -
902 -"""
903 -
904 -class IntervalReporting(FundraiserReporting):
905 -
906 - def run_query(self, start_time, end_time, interval, query_name, metric_name, campaign):
907 -
908 - self.init_db()
909 -
910 - query_obj = qs.QueryStore()
911 -
912 - metrics = mh.AutoVivification()
913 - times = mh.AutoVivification()
914 - times_norm = mh.AutoVivification()
915 -
916 - """ Compose datetime objects to represent the first and last intervals """
917 - start_time_obj = self.timestamp_to_obj(start_time, 1)
918 - start_time_obj = start_time_obj.replace(minute=int(math.floor(start_time_obj.minute / interval) * interval))
919 - start_time_obj_str = self.timestamp_from_obj(start_time_obj, 1, 3)
920 -
921 - end_time_obj = self.timestamp_to_obj(end_time, 1)
922 - # end_time_obj = end_time_obj + datetime.timedelta(seconds=-1)
923 - end_time_obj = end_time_obj.replace(minute=int(math.floor(end_time_obj.minute / interval) * interval))
924 - end_time_obj_str = self.timestamp_from_obj(end_time_obj, 1, 3)
925 -
926 - """ The start time for the impression portion of the query should be one second less"""
927 -
928 - imp_start_time_obj = start_time_obj + datetime.timedelta(seconds=-1)
929 - imp_start_time_obj_str = self.timestamp_from_obj(imp_start_time_obj, 1, 3)
930 -
931 - """ Load the SQL File & Format """
932 - filename = self._sql_path_+ query_name + '.sql'
933 - sql_stmnt = mh.read_sql(filename)
934 -
935 - sql_stmnt = query_obj.format_query(query_name, sql_stmnt, [start_time, end_time, campaign, interval, imp_start_time_obj_str])
936 - # print sql_stmnt
937 -
938 - """ Get Indexes into Query """
939 - key_index = query_obj.get_banner_index(query_name)
940 - metric_index = query_obj.get_metric_index(query_name, metric_name)
941 - time_index = query_obj.get_time_index(query_name)
942 -
943 - """ Compose the data for each separate donor pipeline artifact """
944 - try:
945 - err_msg = sql_stmnt
946 - self.cur.execute(sql_stmnt)
947 -
948 - results = self.cur.fetchall()
949 - final_time = dict() # stores the last timestamp seen
950 - interval_obj = datetime.timedelta(minutes=interval) # timedelta object used to shift times by _interval_ minutes
951 -
952 - for row in results:
953 -
954 - key_name = row[key_index]
955 - time_obj = self.timestamp_to_obj(row[time_index], 1) # format = 1, 14-digit TS
956 -
957 - """ For each new dictionary index by key name start a new list if its not already there """
958 - try:
959 - metrics[key_name].append(row[metric_index])
960 - times[key_name].append(time_obj + interval_obj)
961 - final_time[key_name] = row[time_index]
962 - except:
963 - metrics[key_name] = list()
964 - times[key_name] = list()
965 -
966 - """ If the first element is not the start time add it
967 - this will be the case if there is no data for the first interval
968 - NOTE: two datapoints are added at the beginning to define the first interval """
969 - if start_time_obj_str != row[time_index]:
970 - times[key_name].append(start_time_obj)
971 - metrics[key_name].append(0.0)
972 -
973 - times[key_name].append(start_time_obj + interval_obj)
974 - metrics[key_name].append(0.0)
975 - else:
976 - metrics[key_name].append(row[metric_index])
977 - times[key_name].append(time_obj)
978 -
979 - metrics[key_name].append(row[metric_index])
980 - times[key_name].append(time_obj + interval_obj)
981 -
982 -
983 - except:
984 - self.db.rollback()
985 - sys.exit("Database Interface Exception:\n" + err_msg)
986 -
987 -
988 -
989 - """ Ensure that the last time in the list is the endtime less the interval """
990 -
991 - for key in times.keys():
992 - if final_time[key_name] != end_time_obj_str:
993 - times[key].append(end_time_obj)
994 - metrics[key].append(0.0)
995 -
996 - self.close_db()
997 -
998 - return [metrics, times]
999 -
1000 - """
1001 - Execute reporting query and generate plots
1002 - """
1003 -
1004 - """
1005 - Execute reporting query and generate plots
1006 - """
1007 - def gen_plot(self, metrics, times, title, xlabel, ylabel, ranges, subplot_index, fname):
1008 -
1009 - pylab.subplot(subplot_index)
1010 - pylab.figure(num=None,figsize=[26,14])
1011 -
1012 - line_types = ['b-o','g-o','r-o','c-o','m-o','k-o','y-o','b--d','g--d','r--d','c--d','m--d','k--d','y--d','b-.s','g-.s','r-.s','c-.s','m-.s','k-.s','y-.s']
1013 -
1014 - count = 0
1015 - for key in metrics.keys():
1016 - pylab.step(times[key], metrics[key], line_types[count])
1017 - count = count + 1
1018 -
1019 - pylab.grid()
1020 - pylab.xlim(ranges[0], ranges[1])
1021 - pylab.ylim(ranges[2], ranges[3])
1022 - pylab.legend(metrics.keys(),loc=2)
1023 -
1024 - pylab.xlabel(xlabel)
1025 - pylab.ylabel(ylabel)
1026 -
1027 - pylab.title(title)
1028 - pylab.savefig(fname, format='png')
1029 -
1030 -
1031 - """
1032 - Execute reporting query and generate plots
1033 - """
1034 - def run(self, start_time, end_time, interval, query_name, metric_name, campaign):
1035 -
1036 - query_obj = qs.QueryStore()
1037 -
1038 - print '\nGenerating ' + query_name +', start and end times are: ' + start_time + ' - ' + end_time +' ... \n'
1039 -
1040 - # Run Query
1041 - return_val = self.run_query(start_time, end_time, interval, query_name, metric_name, campaign)
1042 - counts = return_val[0]
1043 - times = return_val[1]
1044 -
1045 - """ Convert Times to Integers that indicate relative times AND normalize the intervals in case any are missing """
1046 - for key in times.keys():
1047 - times[key] = self.normalize_timestamps(times[key], False, 2)
1048 - times[key], counts[key] = self.normalize_intervals(times[key], counts[key], interval)
1049 -
1050 - # Normalize times
1051 - min_time = min(times)
1052 - ranges = [min_time, 0]
1053 -
1054 - xlabel = 'MINUTES'
1055 - subplot_index = 111
1056 - fname = campaign + ' ' + query_name + ' ' + metric_name + '.png'
1057 -
1058 - metric_full_name = query_obj.get_metric_full_name(metric_name)
1059 - title = campaign + ': ' + metric_full_name + ' -- ' + start_time + ' - ' + end_time
1060 - ylabel = metric_full_name
1061 -
1062 - """ Convert counts to float (from Decimal) to prevent exception when bar plotting
1063 - Bbox::update_numerix_xy expected numerix array """
1064 - for key in counts.keys():
1065 - counts_new = list()
1066 - for i in range(len(counts[key])):
1067 - counts_new.append(float(counts[key][i]))
1068 - counts[key] = counts_new
1069 -
1070 - """ Determine List maximums """
1071 - times_max = 0
1072 - metrics_max = 0
1073 -
1074 - for key in counts.keys():
1075 - list_max = max(counts[key])
1076 - if list_max > metrics_max:
1077 - metrics_max = list_max
1078 -
1079 - for key in times.keys():
1080 - list_max = max(times[key])
1081 - if list_max > times_max:
1082 - times_max = list_max
1083 -
1084 - ranges = list()
1085 - ranges.append(0.0)
1086 - ranges.append(times_max * 1.1)
1087 - ranges.append(0.0)
1088 - ranges.append(metrics_max * 1.1)
1089 -
1090 -
1091 - """ Generate plots given data """
1092 - self.gen_plot(counts, times, title, xlabel, ylabel, ranges, subplot_index, fname)
1093 -
1094 -
Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/query_store.py
@@ -1,437 +0,0 @@
2 -
3 -"""
4 -
5 -Class that contains and organizes query info. Depends on the contents of ../sql/ where filenames are
6 -coupled with query_name parameters
7 -
8 -"""
9 -
10 -__author__ = "Ryan Faulkner"
11 -__revision__ = "$Rev$"
12 -__date__ = "November 28th, 2010"
13 -
14 -
15 -"""
16 -
17 - CLASS :: QueryStore
18 -
19 -
20 -
21 - METHODS:
22 -
23 - format_query
24 - get_query
25 - get_query_header
26 - get_key_index
27 - get_count_index
28 - get_time_index
29 - get_campaign_index
30 - get_banner_index
31 - get_landing_page_index
32 - get_metric_index
33 - get_plot_title
34 - get_plot_ylabel
35 - get_metric_full_name
36 -
37 -"""
38 -class QueryStore:
39 -
40 - def format_query(self, query_name, sql_stmnt, args):
41 -
42 - if query_name == 'report_campaign_ecomm':
43 - start_time = args[0]
44 - sql_stmnt = sql_stmnt % (start_time)
45 -
46 - elif query_name == 'report_campaign_logs':
47 - start_time = args[0]
48 - sql_stmnt = sql_stmnt % (start_time, start_time, start_time)
49 -
50 - elif query_name == 'report_campaign_ecomm_by_hr':
51 - start_time = args[0]
52 - sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time)
53 -
54 - elif query_name == 'report_campaign_logs_by_hr':
55 - start_time = args[0]
56 - sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time, '%', '%', '%', '%', \
57 - start_time, '%', '%', '%', '%', start_time, '%')
58 -
59 - elif query_name == 'report_impressions_country':
60 - start_time = args[0]
61 - sql_stmnt = sql_stmnt % ('%', '%', '%', start_time)
62 -
63 - elif query_name == 'report_campaign_logs_by_min':
64 - start_time = args[0]
65 - sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time, '%', '%', '%', '%', \
66 - start_time, '%', '%', '%', '%', start_time)
67 -
68 - elif query_name == 'report_non_US_clicks':
69 - start_time = args[0]
70 - sql_stmnt = sql_stmnt % ('%', '%', '%', start_time, '%', '%', '%', start_time)
71 -
72 - elif query_name == 'report_contribution_tracking':
73 - start_time = args[0]
74 - sql_stmnt = sql_stmnt % ('%', '%', '%', '%', '%',start_time)
75 -
76 - elif query_name == 'report_total_amounts_by_hr':
77 - start_time = args[0]
78 - end_time = args[1]
79 - sql_stmnt = sql_stmnt % ('%', '%', '%', ' %H', start_time, end_time)
80 -
81 - elif query_name == 'report_total_amounts_by_day':
82 - start_time = args[0]
83 - end_time = args[1]
84 - sql_stmnt = sql_stmnt % ('%', '%', '%', '', start_time, end_time)
85 -
86 - elif query_name == 'report_bannerLP_metrics':
87 - start_time = args[0]
88 - end_time = args[1]
89 - campaign = args[2]
90 - sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time, end_time, '%', '%', '%', '%', start_time, end_time, '%', '%', '%', '%', start_time, end_time, campaign, '%', '%', '%', '%', start_time, end_time, campaign)
91 -
92 - elif query_name == 'report_latest_campaign':
93 - start_time = args[0]
94 - sql_stmnt = sql_stmnt % (start_time)
95 -
96 - elif query_name == 'report_confidence_banner':
97 - start = args[0]
98 - end = args[1]
99 - cmpgn = args[2]
100 - banner = args[3]
101 - sql_stmnt = sql_stmnt % ('%','%','%','%','10','10', start, end, banner, '%','%','%','%','10','10', start, end, cmpgn, banner, \
102 - '%','%','%','%','10','10', start, end, cmpgn, banner)
103 -
104 - elif query_name == 'report_confidence_lp':
105 - start = args[0]
106 - end = args[1]
107 - cmpgn = args[2]
108 - banner = args[3]
109 - sql_stmnt = sql_stmnt % ('%','%','%','%','10','10', start, end, cmpgn, banner, \
110 - '%','%','%','%','10','10', start, end, cmpgn, banner)
111 -
112 - elif query_name == 'report_banner_impressions_by_hour':
113 - start = args[0]
114 - end = args[1]
115 - sql_stmnt = sql_stmnt % ('%','%','%','%', start, end)
116 -
117 - elif query_name == 'report_lp_views_by_hour':
118 - start = args[0]
119 - end = args[1]
120 - sql_stmnt = sql_stmnt % ('%','%','%','%', start, end)
121 -
122 - elif query_name == 'report_banner_confidence':
123 - start = args[0]
124 - end = args[1]
125 - banner = args[2]
126 - campaign = args[3]
127 - sql_stmnt = sql_stmnt % (start, end, banner, start, end, campaign, start, end, banner, start, end, campaign, banner)
128 -
129 - elif query_name == 'report_LP_confidence':
130 - start = args[0]
131 - end = args[1]
132 - lp = args[2]
133 - campaign = args[3]
134 - sql_stmnt = sql_stmnt % (start, end, campaign, lp, start, end, campaign, lp)
135 -
136 - elif query_name == 'report_bannerLP_confidence':
137 - start = args[0]
138 - end = args[1]
139 - banner = args[2]
140 - lp = args[3]
141 - campaign = args[4]
142 - sql_stmnt = sql_stmnt % (start, end, banner, start, end, banner, campaign, start, end, banner, lp, campaign, start, end, banner, lp, campaign)
143 -
144 - elif query_name == 'report_ecomm_by_amount':
145 - start_time = args[0]
146 - end_time = args[1]
147 - sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time, end_time, end_time)
148 -
149 - elif query_name == 'report_ecomm_by_contact':
150 - where_str = args[0]
151 - sql_stmnt = sql_stmnt % ('%', '%', '%', '%', where_str)
152 -
153 - elif query_name == 'report_banner_metrics_minutely':
154 - start_time = args[0]
155 - end_time = args[1]
156 - campaign = args[2]
157 - interval = args[3]
158 - imp_start_time = args[4]
159 -
160 - sql_stmnt = sql_stmnt % ('%', '%', '%', '%', interval, interval, imp_start_time, end_time, '%', '%', '%', '%', interval, interval, start_time, end_time, campaign, \
161 - '%', '%', '%', '%', interval, interval, start_time, end_time, '%', '%', '%', '%', interval, interval, start_time, end_time, campaign)
162 -
163 - elif query_name == 'report_LP_metrics_minutely':
164 - start_time = args[0]
165 - end_time = args[1]
166 - campaign = args[2]
167 - interval = args[3]
168 -
169 - sql_stmnt = sql_stmnt % ('%', '%', '%', '%', interval, interval, start_time, end_time, campaign, '%', '%', '%', '%', interval, interval, start_time, end_time, campaign)
170 -
171 - else:
172 - return 'no such table\n'
173 -
174 - return sql_stmnt
175 -
176 - def get_query(self, query_name):
177 - if query_name == 'report_campaign_logs_by_min':
178 - return ''
179 - elif query_name == '':
180 - return ''
181 - else:
182 - return 'no such table'
183 -
184 - def get_query_header(self, query_name):
185 - if query_name == 'report_contribution_tracking':
186 - return ['Time','Banner','Landing Page','Campaign','Converted Amount', 'Suffix']
187 - elif query_name == 'report_ecomm_by_amount':
188 - return ['Timestamp','First Name','Last Name','Country','ISO Code', 'Amount', 'First Donation?', 'Date of First']
189 - elif query_name == 'report_ecomm_by_contact':
190 - return ['Timestamp','First Name','Last Name','Country','ISO Code', 'Amount']
191 - else:
192 - return 'no such table'
193 -
194 - """ Returns the index of the key for the query data """
195 - def get_key_index(self, query_name):
196 - if query_name == 'report_banner_metrics_minutely':
197 - return 1
198 - elif query_name == 'report_LP_metrics_minutely':
199 - return 1
200 -
201 - def get_count_index(self, query_name):
202 - if query_name == 'report_lp_views_by_hour':
203 - return 1
204 - elif query_name == 'report_banner_impressions_by_hour':
205 - return 1
206 - else:
207 - return -1
208 -
209 - def get_time_index(self, query_name):
210 - if query_name == 'report_campaign_logs_by_min':
211 - return 0
212 - elif query_name == 'report_campaign_logs_by_hr':
213 - return 0
214 - elif query_name == 'report_non_US_clicks':
215 - return 0
216 - elif query_name == 'report_contribution_tracking':
217 - return 0
218 - elif query_name == 'report_bannerLP_metrics':
219 - return 0
220 - elif query_name == 'report_latest_campaign':
221 - return 1
222 - elif query_name == 'report_banner_impressions_by_hour':
223 - return 0
224 - elif query_name == 'report_lp_views_by_hour':
225 - return 0
226 - elif query_name == 'report_banner_metrics_minutely':
227 - return 0
228 - elif query_name == 'report_LP_metrics_minutely':
229 - return 0
230 - else:
231 - return -1
232 -
233 - def get_campaign_index(self, query_name):
234 - if query_name == 'report_campaign_logs_by_min':
235 - return 2
236 - elif query_name == 'report_campaign_logs_by_hr':
237 - return 1
238 - elif query_name == 'report_contribution_tracking':
239 - return 3
240 - elif query_name == 'report_bannerLP_metrics':
241 - return 1
242 - elif query_name == 'report_latest_campaign':
243 - return 0
244 - else:
245 - return -1
246 -
247 - def get_banner_index(self, query_name):
248 - if query_name == 'report_campaign_logs_by_min':
249 - return 3
250 - elif query_name == 'report_campaign_logs_by_hr':
251 - return 2
252 - elif query_name == 'report_contribution_tracking':
253 - return 1
254 - elif query_name == 'report_bannerLP_metrics':
255 - return 1
256 - elif query_name == 'report_banner_metrics_minutely':
257 - return 1
258 - else:
259 - return -1
260 -
261 - def get_landing_page_index(self, query_name):
262 - if query_name == 'report_campaign_logs_by_min':
263 - return 4
264 - elif query_name == 'report_campaign_logs_by_hr':
265 - return 3
266 - elif query_name == 'report_non_US_clicks':
267 - return 2
268 - elif query_name == 'report_contribution_tracking':
269 - return 2
270 - elif query_name == 'report_bannerLP_metrics':
271 - return 1
272 - elif query_name == 'report_LP_metrics_minutely':
273 - return 1
274 - else:
275 - return -1
276 -
277 - def get_metric_index(self, query_name, metric_name):
278 - if query_name == 'report_campaign_logs_by_min':
279 - if metric_name == 'click_rate':
280 - return 9
281 - elif query_name == 'report_campaign_logs_by_hr':
282 - if metric_name == 'click_rate':
283 - return 8
284 - elif query_name == 'report_contribution_tracking':
285 - if metric_name == 'converted_amount':
286 - return 4
287 - elif query_name == 'report_bannerLP_metrics':
288 - if metric_name == 'total_impressions':
289 - return 2
290 - elif metric_name == 'impressions':
291 - return 3
292 - elif metric_name == 'views':
293 - return 4
294 - elif metric_name == 'clicks':
295 - return 5
296 - elif metric_name == 'donations':
297 - return 6
298 - elif metric_name == 'amount':
299 - return 7
300 - elif metric_name == 'click_rate':
301 - return 8
302 - elif metric_name == 'completion_rate':
303 - return 9
304 - elif metric_name == 'don_per_imp':
305 - return 10
306 - elif metric_name == 'amt_per_imp':
307 - return 11
308 - elif metric_name == 'don_per_view':
309 - return 12
310 - elif metric_name == 'amt_per_view':
311 - return 13
312 - elif metric_name == 'amt_per_donation':
313 - return 14
314 - elif metric_name == 'max_amt':
315 - return 15
316 - elif metric_name == 'pp_don':
317 - return 16
318 - elif metric_name == 'cc_don':
319 - return 17
320 - elif metric_name == 'paypal_click_thru':
321 - return 18
322 - elif metric_name == 'creditcard_click_thru':
323 - return 19
324 - else:
325 - return -1
326 - elif query_name == 'report_banner_confidence':
327 - if metric_name == 'click_rate':
328 - return 7
329 - elif metric_name == 'don_per_imp':
330 - return 9
331 - elif metric_name == 'amt_per_imp':
332 - return 10
333 - elif metric_name == 'amt50_per_imp':
334 - return 14
335 - elif metric_name == 'amt100_per_imp':
336 - return 15
337 - else:
338 - return -1
339 - elif query_name == 'report_LP_confidence':
340 - if metric_name == 'completion_rate':
341 - return 5
342 - elif metric_name == 'don_per_view':
343 - return 6
344 - elif metric_name == 'amt_per_view':
345 - return 7
346 - elif metric_name == 'amt_per_donation':
347 - return 8
348 - elif metric_name == 'amt50_per_view':
349 - return 9
350 - elif metric_name == 'amt100_per_view':
351 - return 10
352 - else:
353 - return -1
354 - elif query_name == 'report_bannerLP_confidence':
355 - if metric_name == 'click_rate':
356 - return 7
357 - elif metric_name == 'completion_rate':
358 - return 8
359 - elif metric_name == 'don_per_imp':
360 - return 9
361 - elif metric_name == 'amt_per_imp':
362 - return 10
363 - elif metric_name == 'don_per_view':
364 - return 11
365 - elif metric_name == 'amt_per_view':
366 - return 12
367 - elif metric_name == 'amt_per_donation':
368 - return 13
369 - elif metric_name == 'amt50_per_imp':
370 - return 14
371 - elif metric_name == 'amt100_per_imp':
372 - return 15
373 - else:
374 - return -1
375 - elif query_name == 'report_LP_metrics_minutely':
376 - if metric_name == 'views':
377 - return 2
378 - elif metric_name == 'donations':
379 - return 4
380 - elif metric_name == 'amount50':
381 - return 5
382 - elif metric_name == 'don_per_view':
383 - return 7
384 - elif metric_name == 'amt50_per_view':
385 - return 9
386 - else:
387 - return -1
388 - elif query_name == 'report_banner_metrics_minutely':
389 - if metric_name == 'imp':
390 - return 2
391 - elif metric_name == 'donations':
392 - return 5
393 - elif metric_name == 'amount50':
394 - return 7
395 - elif metric_name == 'don_per_imp':
396 - return 10
397 - elif metric_name == 'amt50_per_imp':
398 - return 12
399 - else:
400 - return -1
401 - else:
402 - return 'no such table'
403 -
404 - def get_plot_title(self, query_name):
405 - if query_name == 'report_banner_impressions_by_hour':
406 - return 'Banner Impressions Over the Past 24 Hours'
407 - elif query_name == 'report_lp_views_by_hour':
408 - return 'Landing Page Views Over the Past 24 Hours'
409 - else:
410 - return 'no such table'
411 -
412 - def get_plot_ylabel(self, query_name):
413 - if query_name == 'report_banner_impressions_by_hour':
414 - return 'IMPRESSIONS'
415 - elif query_name == 'report_lp_views_by_hour':
416 - return 'VIEWS'
417 - else:
418 - return'no such table'
419 -
420 - def get_metric_full_name(self, metric_name):
421 - if metric_name == 'imp':
422 - return 'IMPRESSIONS'
423 - elif metric_name == 'view':
424 - return 'VIEWS'
425 - elif metric_name == 'don_per_imp':
426 - return 'DONATIONS PER IMPRESSION'
427 - elif metric_name == 'don_per_view':
428 - return 'DONATIONS PER VIEW'
429 - elif metric_name == 'amt50_per_imp':
430 - return 'AMOUNT50 PER IMPRESSION'
431 - elif metric_name == 'amt50_per_view':
432 - return 'AMOUNT50 PER VIEW'
433 - elif metric_name == 'amount50':
434 - return 'AMOUNT50'
435 - elif metric_name == 'donations':
436 - return 'DONATIONS'
437 - else:
438 - return'no such table'
\ No newline at end of file
Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/compute_confidence.py
@@ -2,9 +2,14 @@
33
44 """
55
6 -Generates confidence estimate for a test
 6+This module defines reporting and analysis for determining the statistical confidence
 7+of of choice metrics over time.
78
 9+!!MODIFY!!
810
 11+- This should extend DataReporting.DataReporting
 12+- The querying functionality should be exported to DataLoader
 13+
914 """
1015
1116 __author__ = "Ryan Faulkner"
@@ -33,9 +38,13 @@
3439
3540
3641 METHODS:
37 -
38 -
39 -
 42+ query_tables
 43+ get_time_lists
 44+ gen_plot
 45+ run_test
 46+ compute_parameters
 47+ print_metrics
 48+
4049 """
4150 class ConfidenceTest(DataLoader):
4251
Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/20101230JA091_US banner imp.png
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
Property changes on: trunk/fundraiser-statistics/fundraiser-scripts/classes/20101230JA091_US banner imp.png
___________________________________________________________________
Added: svn:mime-type
4352 + application/octet-stream
Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/QueryData.py
@@ -0,0 +1,431 @@
 2+
 3+"""
 4+
 5+This module effectively functions as a Singleton class.
 6+
 7+This module contains and organizes query info. Depends on the contents of ../sql/ where filenames are
 8+coupled with query_name parameters
 9+
 10+ METHODS:
 11+
 12+ format_query
 13+ get_query
 14+ get_query_header
 15+ get_key_index
 16+ get_count_index
 17+ get_time_index
 18+ get_campaign_index
 19+ get_banner_index
 20+ get_landing_page_index
 21+ get_metric_index
 22+ get_plot_title
 23+ get_plot_ylabel
 24+ get_metric_full_name
 25+
 26+"""
 27+
 28+__author__ = "Ryan Faulkner"
 29+__revision__ = "$Rev$"
 30+__date__ = "November 28th, 2010"
 31+
 32+
 33+
 34+def format_query(query_name, sql_stmnt, args):
 35+
 36+ if query_name == 'report_campaign_ecomm':
 37+ start_time = args[0]
 38+ sql_stmnt = sql_stmnt % (start_time)
 39+
 40+ elif query_name == 'report_campaign_logs':
 41+ start_time = args[0]
 42+ sql_stmnt = sql_stmnt % (start_time, start_time, start_time)
 43+
 44+ elif query_name == 'report_campaign_ecomm_by_hr':
 45+ start_time = args[0]
 46+ sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time)
 47+
 48+ elif query_name == 'report_campaign_logs_by_hr':
 49+ start_time = args[0]
 50+ sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time, '%', '%', '%', '%', \
 51+ start_time, '%', '%', '%', '%', start_time, '%')
 52+
 53+ elif query_name == 'report_impressions_country':
 54+ start_time = args[0]
 55+ sql_stmnt = sql_stmnt % ('%', '%', '%', start_time)
 56+
 57+ elif query_name == 'report_campaign_logs_by_min':
 58+ start_time = args[0]
 59+ sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time, '%', '%', '%', '%', \
 60+ start_time, '%', '%', '%', '%', start_time)
 61+
 62+ elif query_name == 'report_non_US_clicks':
 63+ start_time = args[0]
 64+ sql_stmnt = sql_stmnt % ('%', '%', '%', start_time, '%', '%', '%', start_time)
 65+
 66+ elif query_name == 'report_contribution_tracking':
 67+ start_time = args[0]
 68+ sql_stmnt = sql_stmnt % ('%', '%', '%', '%', '%',start_time)
 69+
 70+ elif query_name == 'report_total_amounts_by_hr':
 71+ start_time = args[0]
 72+ end_time = args[1]
 73+ sql_stmnt = sql_stmnt % ('%', '%', '%', ' %H', start_time, end_time)
 74+
 75+ elif query_name == 'report_total_amounts_by_day':
 76+ start_time = args[0]
 77+ end_time = args[1]
 78+ sql_stmnt = sql_stmnt % ('%', '%', '%', '', start_time, end_time)
 79+
 80+ elif query_name == 'report_bannerLP_metrics':
 81+ start_time = args[0]
 82+ end_time = args[1]
 83+ campaign = args[2]
 84+ sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time, end_time, '%', '%', '%', '%', start_time, end_time, '%', '%', '%', '%', start_time, end_time, campaign, '%', '%', '%', '%', start_time, end_time, campaign)
 85+
 86+ elif query_name == 'report_latest_campaign':
 87+ start_time = args[0]
 88+ sql_stmnt = sql_stmnt % (start_time)
 89+
 90+ elif query_name == 'report_confidence_banner':
 91+ start = args[0]
 92+ end = args[1]
 93+ cmpgn = args[2]
 94+ banner = args[3]
 95+ sql_stmnt = sql_stmnt % ('%','%','%','%','10','10', start, end, banner, '%','%','%','%','10','10', start, end, cmpgn, banner, \
 96+ '%','%','%','%','10','10', start, end, cmpgn, banner)
 97+
 98+ elif query_name == 'report_confidence_lp':
 99+ start = args[0]
 100+ end = args[1]
 101+ cmpgn = args[2]
 102+ banner = args[3]
 103+ sql_stmnt = sql_stmnt % ('%','%','%','%','10','10', start, end, cmpgn, banner, \
 104+ '%','%','%','%','10','10', start, end, cmpgn, banner)
 105+
 106+ elif query_name == 'report_banner_impressions_by_hour':
 107+ start = args[0]
 108+ end = args[1]
 109+ sql_stmnt = sql_stmnt % ('%','%','%','%', start, end)
 110+
 111+ elif query_name == 'report_lp_views_by_hour':
 112+ start = args[0]
 113+ end = args[1]
 114+ sql_stmnt = sql_stmnt % ('%','%','%','%', start, end)
 115+
 116+ elif query_name == 'report_banner_confidence':
 117+ start = args[0]
 118+ end = args[1]
 119+ banner = args[2]
 120+ campaign = args[3]
 121+ sql_stmnt = sql_stmnt % (start, end, banner, start, end, campaign, start, end, banner, start, end, campaign, banner)
 122+
 123+ elif query_name == 'report_LP_confidence':
 124+ start = args[0]
 125+ end = args[1]
 126+ lp = args[2]
 127+ campaign = args[3]
 128+ sql_stmnt = sql_stmnt % (start, end, campaign, lp, start, end, campaign, lp)
 129+
 130+ elif query_name == 'report_bannerLP_confidence':
 131+ start = args[0]
 132+ end = args[1]
 133+ banner = args[2]
 134+ lp = args[3]
 135+ campaign = args[4]
 136+ sql_stmnt = sql_stmnt % (start, end, banner, start, end, banner, campaign, start, end, banner, lp, campaign, start, end, banner, lp, campaign)
 137+
 138+ elif query_name == 'report_ecomm_by_amount':
 139+ start_time = args[0]
 140+ end_time = args[1]
 141+ sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time, end_time, end_time)
 142+
 143+ elif query_name == 'report_ecomm_by_contact':
 144+ where_str = args[0]
 145+ sql_stmnt = sql_stmnt % ('%', '%', '%', '%', where_str)
 146+
 147+ elif query_name == 'report_banner_metrics_minutely':
 148+ start_time = args[0]
 149+ end_time = args[1]
 150+ campaign = args[2]
 151+ interval = args[3]
 152+ imp_start_time = args[4]
 153+
 154+ sql_stmnt = sql_stmnt % ('%', '%', '%', '%', interval, interval, imp_start_time, end_time, '%', '%', '%', '%', interval, interval, start_time, end_time, campaign, \
 155+ '%', '%', '%', '%', interval, interval, start_time, end_time, '%', '%', '%', '%', interval, interval, start_time, end_time, campaign)
 156+
 157+ elif query_name == 'report_LP_metrics_minutely':
 158+ start_time = args[0]
 159+ end_time = args[1]
 160+ campaign = args[2]
 161+ interval = args[3]
 162+
 163+ sql_stmnt = sql_stmnt % ('%', '%', '%', '%', interval, interval, start_time, end_time, campaign, '%', '%', '%', '%', interval, interval, start_time, end_time, campaign)
 164+
 165+ else:
 166+ return 'no such table\n'
 167+
 168+ return sql_stmnt
 169+
 170+def get_query(query_name):
 171+ if query_name == 'report_campaign_logs_by_min':
 172+ return ''
 173+ elif query_name == '':
 174+ return ''
 175+ else:
 176+ return 'no such table'
 177+
 178+def get_query_header(query_name):
 179+ if query_name == 'report_contribution_tracking':
 180+ return ['Time','Banner','Landing Page','Campaign','Converted Amount', 'Suffix']
 181+ elif query_name == 'report_ecomm_by_amount':
 182+ return ['Timestamp','First Name','Last Name','Country','ISO Code', 'Amount', 'First Donation?', 'Date of First']
 183+ elif query_name == 'report_ecomm_by_contact':
 184+ return ['Timestamp','First Name','Last Name','Country','ISO Code', 'Amount']
 185+ else:
 186+ return 'no such table'
 187+
 188+""" Returns the index of the key for the query data """
 189+def get_key_index(query_name):
 190+ if query_name == 'report_banner_metrics_minutely':
 191+ return 1
 192+ elif query_name == 'report_LP_metrics_minutely':
 193+ return 1
 194+
 195+def get_count_index(query_name):
 196+ if query_name == 'report_lp_views_by_hour':
 197+ return 1
 198+ elif query_name == 'report_banner_impressions_by_hour':
 199+ return 1
 200+ else:
 201+ return -1
 202+
 203+def get_time_index(query_name):
 204+ if query_name == 'report_campaign_logs_by_min':
 205+ return 0
 206+ elif query_name == 'report_campaign_logs_by_hr':
 207+ return 0
 208+ elif query_name == 'report_non_US_clicks':
 209+ return 0
 210+ elif query_name == 'report_contribution_tracking':
 211+ return 0
 212+ elif query_name == 'report_bannerLP_metrics':
 213+ return 0
 214+ elif query_name == 'report_latest_campaign':
 215+ return 1
 216+ elif query_name == 'report_banner_impressions_by_hour':
 217+ return 0
 218+ elif query_name == 'report_lp_views_by_hour':
 219+ return 0
 220+ elif query_name == 'report_banner_metrics_minutely':
 221+ return 0
 222+ elif query_name == 'report_LP_metrics_minutely':
 223+ return 0
 224+ else:
 225+ return -1
 226+
 227+def get_campaign_index(query_name):
 228+ if query_name == 'report_campaign_logs_by_min':
 229+ return 2
 230+ elif query_name == 'report_campaign_logs_by_hr':
 231+ return 1
 232+ elif query_name == 'report_contribution_tracking':
 233+ return 3
 234+ elif query_name == 'report_bannerLP_metrics':
 235+ return 1
 236+ elif query_name == 'report_latest_campaign':
 237+ return 0
 238+ else:
 239+ return -1
 240+
 241+def get_banner_index(query_name):
 242+ if query_name == 'report_campaign_logs_by_min':
 243+ return 3
 244+ elif query_name == 'report_campaign_logs_by_hr':
 245+ return 2
 246+ elif query_name == 'report_contribution_tracking':
 247+ return 1
 248+ elif query_name == 'report_bannerLP_metrics':
 249+ return 1
 250+ elif query_name == 'report_banner_metrics_minutely':
 251+ return 1
 252+ else:
 253+ return -1
 254+
 255+def get_landing_page_index(query_name):
 256+ if query_name == 'report_campaign_logs_by_min':
 257+ return 4
 258+ elif query_name == 'report_campaign_logs_by_hr':
 259+ return 3
 260+ elif query_name == 'report_non_US_clicks':
 261+ return 2
 262+ elif query_name == 'report_contribution_tracking':
 263+ return 2
 264+ elif query_name == 'report_bannerLP_metrics':
 265+ return 1
 266+ elif query_name == 'report_LP_metrics_minutely':
 267+ return 1
 268+ else:
 269+ return -1
 270+
 271+def get_metric_index(query_name, metric_name):
 272+ if query_name == 'report_campaign_logs_by_min':
 273+ if metric_name == 'click_rate':
 274+ return 9
 275+ elif query_name == 'report_campaign_logs_by_hr':
 276+ if metric_name == 'click_rate':
 277+ return 8
 278+ elif query_name == 'report_contribution_tracking':
 279+ if metric_name == 'converted_amount':
 280+ return 4
 281+ elif query_name == 'report_bannerLP_metrics':
 282+ if metric_name == 'total_impressions':
 283+ return 2
 284+ elif metric_name == 'impressions':
 285+ return 3
 286+ elif metric_name == 'views':
 287+ return 4
 288+ elif metric_name == 'clicks':
 289+ return 5
 290+ elif metric_name == 'donations':
 291+ return 6
 292+ elif metric_name == 'amount':
 293+ return 7
 294+ elif metric_name == 'click_rate':
 295+ return 8
 296+ elif metric_name == 'completion_rate':
 297+ return 9
 298+ elif metric_name == 'don_per_imp':
 299+ return 10
 300+ elif metric_name == 'amt_per_imp':
 301+ return 11
 302+ elif metric_name == 'don_per_view':
 303+ return 12
 304+ elif metric_name == 'amt_per_view':
 305+ return 13
 306+ elif metric_name == 'amt_per_donation':
 307+ return 14
 308+ elif metric_name == 'max_amt':
 309+ return 15
 310+ elif metric_name == 'pp_don':
 311+ return 16
 312+ elif metric_name == 'cc_don':
 313+ return 17
 314+ elif metric_name == 'paypal_click_thru':
 315+ return 18
 316+ elif metric_name == 'creditcard_click_thru':
 317+ return 19
 318+ else:
 319+ return -1
 320+ elif query_name == 'report_banner_confidence':
 321+ if metric_name == 'click_rate':
 322+ return 7
 323+ elif metric_name == 'don_per_imp':
 324+ return 9
 325+ elif metric_name == 'amt_per_imp':
 326+ return 10
 327+ elif metric_name == 'amt50_per_imp':
 328+ return 14
 329+ elif metric_name == 'amt100_per_imp':
 330+ return 15
 331+ else:
 332+ return -1
 333+ elif query_name == 'report_LP_confidence':
 334+ if metric_name == 'completion_rate':
 335+ return 5
 336+ elif metric_name == 'don_per_view':
 337+ return 6
 338+ elif metric_name == 'amt_per_view':
 339+ return 7
 340+ elif metric_name == 'amt_per_donation':
 341+ return 8
 342+ elif metric_name == 'amt50_per_view':
 343+ return 9
 344+ elif metric_name == 'amt100_per_view':
 345+ return 10
 346+ else:
 347+ return -1
 348+ elif query_name == 'report_bannerLP_confidence':
 349+ if metric_name == 'click_rate':
 350+ return 7
 351+ elif metric_name == 'completion_rate':
 352+ return 8
 353+ elif metric_name == 'don_per_imp':
 354+ return 9
 355+ elif metric_name == 'amt_per_imp':
 356+ return 10
 357+ elif metric_name == 'don_per_view':
 358+ return 11
 359+ elif metric_name == 'amt_per_view':
 360+ return 12
 361+ elif metric_name == 'amt_per_donation':
 362+ return 13
 363+ elif metric_name == 'amt50_per_imp':
 364+ return 14
 365+ elif metric_name == 'amt100_per_imp':
 366+ return 15
 367+ else:
 368+ return -1
 369+ elif query_name == 'report_LP_metrics_minutely':
 370+ if metric_name == 'views':
 371+ return 2
 372+ elif metric_name == 'donations':
 373+ return 4
 374+ elif metric_name == 'amount50':
 375+ return 5
 376+ elif metric_name == 'don_per_view':
 377+ return 7
 378+ elif metric_name == 'amt50_per_view':
 379+ return 9
 380+ else:
 381+ return -1
 382+ elif query_name == 'report_banner_metrics_minutely':
 383+ if metric_name == 'imp':
 384+ return 2
 385+ elif metric_name == 'donations':
 386+ return 5
 387+ elif metric_name == 'amount50':
 388+ return 7
 389+ elif metric_name == 'don_per_imp':
 390+ return 10
 391+ elif metric_name == 'amt50_per_imp':
 392+ return 12
 393+ else:
 394+ return -1
 395+ else:
 396+ return 'no such table'
 397+
 398+def get_plot_title(query_name):
 399+ if query_name == 'report_banner_impressions_by_hour':
 400+ return 'Banner Impressions Over the Past 24 Hours'
 401+ elif query_name == 'report_lp_views_by_hour':
 402+ return 'Landing Page Views Over the Past 24 Hours'
 403+ else:
 404+ return 'no such table'
 405+
 406+def get_plot_ylabel(query_name):
 407+ if query_name == 'report_banner_impressions_by_hour':
 408+ return 'IMPRESSIONS'
 409+ elif query_name == 'report_lp_views_by_hour':
 410+ return 'VIEWS'
 411+ else:
 412+ return'no such table'
 413+
 414+def get_metric_full_name(metric_name):
 415+ if metric_name == 'imp':
 416+ return 'IMPRESSIONS'
 417+ elif metric_name == 'view':
 418+ return 'VIEWS'
 419+ elif metric_name == 'don_per_imp':
 420+ return 'DONATIONS PER IMPRESSION'
 421+ elif metric_name == 'don_per_view':
 422+ return 'DONATIONS PER VIEW'
 423+ elif metric_name == 'amt50_per_imp':
 424+ return 'AMOUNT50 PER IMPRESSION'
 425+ elif metric_name == 'amt50_per_view':
 426+ return 'AMOUNT50 PER VIEW'
 427+ elif metric_name == 'amount50':
 428+ return 'AMOUNT50'
 429+ elif metric_name == 'donations':
 430+ return 'DONATIONS'
 431+ else:
 432+ return'no such table'
\ No newline at end of file
Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/DataLoader.py
@@ -1,4 +1,15 @@
 2+"""
23
 4+This module provides access to the datasource and enables querying. The class
 5+atructure defined has DataLoader as the base which outlines the basic members
 6+and functionality. This interface is extended for interaction with specific
 7+sources of data.
 8+
 9+These classes are used to define the data sources for the DataReporting family of
 10+classes in an Adapter structural pattern.
 11+
 12+"""
 13+
314 __author__ = "Ryan Faulkner"
415 __revision__ = "$Rev$"
516 __date__ = "April 8th, 2010"
@@ -6,8 +17,13 @@
718
819 import sys
920 import MySQLdb
 21+import math
 22+import datetime
 23+
 24+import miner_help as mh
 25+import QueryData as QD
 26+import TimestampProcessor as TP
1027
11 -
1228 """
1329
1430 CLASS :: DataLoader
@@ -18,28 +34,174 @@
1935 """
2036 class DataLoader(object):
2137
22 - # Database and Cursor objects
23 - db = None
24 - cur = None
 38+ """ Database and Cursor objects """
 39+ _db_ = None
 40+ _cur_ = None
 41+ _sql_path_ = '../sql/' # Relative path for SQL files to be processed
 42+ _query_names_ = dict()
2543
2644 def init_db(self):
 45+
2746 """ Establish connection """
2847 #db = MySQLdb.connect(host='db10.pmtpa.wmnet', user='rfaulk', db='faulkner')
29 - self.db = MySQLdb.connect(host='127.0.0.1', user='rfaulk', db='faulkner', port=3307)
 48+ self._db_ = MySQLdb.connect(host='127.0.0.1', user='rfaulk', db='faulkner', port=3307)
3049 #self.db = MySQLdb.connect(host='storage3.pmtpa.wmnet', user='rfaulk', db='faulkner')
31 -
 50+
3251 """ Create cursor """
33 - self.cur = self.db.cursor()
 52+ self._cur_ = self._db_.cursor()
3453
3554 def close_db(self):
36 - self.cur.close()
37 - self.db.close()
 55+ self._cur_.close()
 56+ self._db_.close()
 57+
 58+ """
 59+ <DESCRIPTION>
3860
 61+ INPUT:
 62+ query_type -
 63+
 64+
 65+ RETURN:
 66+ -
 67+
 68+ """
 69+ def get_sql_filename_for_query(self, query_type):
 70+ return ''
3971
4072 class IntervalReportingLoader(DataLoader):
 73+
 74+ def __init__(self):
 75+ self._query_names_['banner'] = 'report_banner_metrics_minutely'
 76+ self._query_names_['LP'] = 'report_LP_metrics_minutely'
 77+
 78+ def get_sql_filename_for_query(self, query_type):
 79+ return self._query_names_[query_type]
 80+
 81+ """
 82+ <DESCRIPTION>
 83+
 84+ INPUT:
 85+ start_time -
 86+ end_time -
 87+ interval -
 88+ query_type -
 89+ metric_name -
 90+ campaign -
 91+
 92+
 93+ RETURN:
 94+ metrics -
 95+ times -
 96+ """
 97+ def run_query(self, start_time, end_time, interval, query_type, metric_name, campaign):
 98+
 99+ self.init_db()
 100+
 101+ try:
 102+ query_name = self.get_sql_filename_for_query(query_type)
 103+ except KeyError:
 104+ print 'Could not find a query for type: ' + query_type
 105+ sys.exit(2)
 106+
 107+ metrics = mh.AutoVivification()
 108+ times = mh.AutoVivification()
 109+ times_norm = mh.AutoVivification()
 110+
 111+ """ Compose datetime objects to represent the first and last intervals """
 112+ start_time_obj = TP.timestamp_to_obj(start_time, 1)
 113+ start_time_obj = start_time_obj.replace(minute=int(math.floor(start_time_obj.minute / interval) * interval))
 114+ start_time_obj_str = TP.timestamp_from_obj(start_time_obj, 1, 3)
 115+
 116+ end_time_obj = TP.timestamp_to_obj(end_time, 1)
 117+ # end_time_obj = end_time_obj + datetime.timedelta(seconds=-1)
 118+ end_time_obj = end_time_obj.replace(minute=int(math.floor(end_time_obj.minute / interval) * interval))
 119+ end_time_obj_str = TP.timestamp_from_obj(end_time_obj, 1, 3)
 120+
 121+ """ The start time for the impression portion of the query should be one second less"""
 122+
 123+ imp_start_time_obj = start_time_obj + datetime.timedelta(seconds=-1)
 124+ imp_start_time_obj_str = TP.timestamp_from_obj(imp_start_time_obj, 1, 3)
 125+
 126+ """ Load the SQL File & Format """
 127+ filename = self._sql_path_+ query_name + '.sql'
 128+ sql_stmnt = mh.read_sql(filename)
 129+
 130+ sql_stmnt = QD.format_query(query_name, sql_stmnt, [start_time, end_time, campaign, interval, imp_start_time_obj_str])
 131+
 132+ """ Get Indexes into Query """
 133+ key_index = QD.get_banner_index(query_name)
 134+ metric_index = QD.get_metric_index(query_name, metric_name)
 135+ time_index = QD.get_time_index(query_name)
 136+
 137+ """ Compose the data for each separate donor pipeline artifact """
 138+ try:
 139+ err_msg = sql_stmnt
 140+ self._cur_.execute(sql_stmnt)
 141+
 142+ results = self._cur_.fetchall()
 143+ final_time = dict() # stores the last timestamp seen
 144+ interval_obj = datetime.timedelta(minutes=interval) # timedelta object used to shift times by _interval_ minutes
 145+
 146+ for row in results:
 147+
 148+ key_name = row[key_index]
 149+ time_obj = TP.timestamp_to_obj(row[time_index], 1) # format = 1, 14-digit TS
 150+
 151+ """ For each new dictionary index by key name start a new list if its not already there """
 152+ try:
 153+ metrics[key_name].append(row[metric_index])
 154+ times[key_name].append(time_obj + interval_obj)
 155+ final_time[key_name] = row[time_index]
 156+ except:
 157+ metrics[key_name] = list()
 158+ times[key_name] = list()
 159+
 160+ """ If the first element is not the start time add it
 161+ this will be the case if there is no data for the first interval
 162+ NOTE: two datapoints are added at the beginning to define the first interval """
 163+ if start_time_obj_str != row[time_index]:
 164+ times[key_name].append(start_time_obj)
 165+ metrics[key_name].append(0.0)
 166+
 167+ times[key_name].append(start_time_obj + interval_obj)
 168+ metrics[key_name].append(0.0)
 169+ else:
 170+ metrics[key_name].append(row[metric_index])
 171+ times[key_name].append(time_obj)
 172+
 173+ metrics[key_name].append(row[metric_index])
 174+ times[key_name].append(time_obj + interval_obj)
 175+
 176+
 177+ except Exception as inst:
 178+ print type(inst) # the exception instance
 179+ print inst.args # arguments stored in .args
 180+ print inst # __str__ allows args to printed directly
 181+
 182+ self._db_.rollback()
 183+ sys.exit(0)
 184+
 185+
 186+
 187+ """ Ensure that the last time in the list is the endtime less the interval """
 188+
 189+ for key in times.keys():
 190+ if final_time[key_name] != end_time_obj_str:
 191+ times[key].append(end_time_obj)
 192+ metrics[key].append(0.0)
 193+
 194+ self.close_db()
 195+
 196+ """ Convert counts to float (from Decimal) to prevent exception when bar plotting
 197+ Bbox::update_numerix_xy expected numerix array """
 198+ for key in metrics.keys():
 199+ metrics_new = list()
 200+ for i in range(len(metrics[key])):
 201+ metrics_new.append(float(metrics[key][i]))
 202+ metrics[key] = metrics_new
 203+
 204+ return [metrics, times]
41205
42 - def run_query(self):
43 - return
44206
45207 class BannerLPReportingLoader(DataLoader):
46208
Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/DataReporting.py
@@ -0,0 +1,999 @@
 2+
 3+
 4+"""
 5+
 6+This module is used to define the reporting methodologies on different types of data. The base class
 7+DataReporting is defined to outline the general functionality of the reporting architecture and
 8+functionality which includes generating the data via a dataloader object and transforming the data
 9+among different reporting mediums including matlab plots (primary medium) and html tables.
 10+
 11+The DataLoader class decouples the data access of the reports using the Adapter structural pattern.
 12+
 13+"""
 14+
 15+__author__ = "Ryan Faulkner"
 16+__revision__ = "$Rev$"
 17+__date__ = "December 16th, 2010"
 18+
 19+
 20+import sys
 21+sys.path.append('../')
 22+
 23+import matplotlib
 24+import datetime
 25+import MySQLdb
 26+import pylab
 27+import HTML
 28+import math
 29+
 30+import QueryData as QD
 31+import miner_help as mh
 32+import TimestampProcessor as TP
 33+import DataLoader as DL
 34+
 35+matplotlib.use('Agg')
 36+
 37+
 38+
 39+"""
 40+
 41+ BASE CLASS :: DataReporting
 42+
 43+ Base class for reporting fundraiser analytics. Methods that are intended to be extended in derived classes include:
 44+
 45+ METHODS:
 46+
 47+ run_query - format and execute the query to obtain data
 48+ gen_plot - plots the results of the report
 49+ write_to_html_table - writes the results to an HTML table
 50+ run
 51+
 52+"""
 53+class DataReporting(object):
 54+
 55+ _data_loader_ = None
 56+
 57+ """
 58+
 59+ Smooths a list of values
 60+
 61+ INPUT:
 62+ values - a list of datetime objects
 63+ window_length - indicate whether the list counts back from the end
 64+
 65+ RETURN:
 66+ new_values - list of smoothed values
 67+
 68+ """
 69+ def smooth(self, values, window_length):
 70+
 71+ window_length = int(math.floor(window_length / 2))
 72+
 73+ if window_length < 1:
 74+ return values
 75+
 76+ list_len = len(values)
 77+ new_values = list()
 78+
 79+ for i in range(list_len):
 80+ index_left = max([0, i - window_length])
 81+ index_right = min([list_len - 1, i + window_length])
 82+
 83+ width = index_right - index_left + 1
 84+
 85+ new_val = sum(values[index_left : (index_right + 1)]) / width
 86+ new_values.append(new_val)
 87+
 88+ return new_values
 89+
 90+ """
 91+
 92+ workaround for issue with tuple objects in HTML.py
 93+ MySQLdb returns unfamiliar tuple elements from its fetchall() method
 94+ this is probably a version problem since the issue popped up in 2.5 but not 2.6
 95+
 96+ INPUT:
 97+ row - row object returned from MySQLdb.fetchall()
 98+
 99+ RETURN:
 100+ l - a list of tuple objects from the db
 101+
 102+ """
 103+ def listify(self, row):
 104+ l = []
 105+ for i in row:
 106+ l.append(i)
 107+ return l
 108+
 109+
 110+ """
 111+
 112+ To be overloaded by subclasses for specific types of queries
 113+
 114+ INPUT:
 115+ values - a list of datetime objects
 116+ window_length - indicate whether the list counts back from the end
 117+
 118+ RETURN:
 119+ return_status - integer, 0 indicates un-exceptional execution
 120+
 121+ """
 122+ def run_query(self, start_time, end_time, query_name, metric_name):
 123+ return 0
 124+
 125+
 126+ """
 127+
 128+ To be overloaded by subclasses for different plotting behaviour
 129+
 130+ INPUT:
 131+ values - a list of datetime objects
 132+ window_length - indicate whether the list counts back from the end
 133+
 134+ RETURN:
 135+ return_status - integer, 0 indicates un-exceptional execution
 136+
 137+ """
 138+ def gen_plot(self,x, y_lists, labels, title, xlabel, ylabel, subplot_index, fname):
 139+ return 0
 140+
 141+ """
 142+
 143+ To be overloaded by subclasses for writing tables - this functionality currently exists outside of this class structure (test_reporting.py)
 144+
 145+ INPUT:
 146+ values - a list of datetime objects
 147+ window_length - indicate whether the list counts back from the end
 148+
 149+ RETURN:
 150+ return_status - integer, 0 indicates un-exceptional execution
 151+
 152+ """
 153+ def write_to_html_table(self):
 154+
 155+ """
 156+ FROM TEST REPORTING
 157+
 158+ query_obj = qs.query_store()
 159+
 160+ # Populate the campaigns table
 161+ s1 = 'drop table if exists campaigns;'
 162+ s2 = 'create table campaigns as (select utm_campaign from drupal.contribution_tracking where ts > \'%s\' group by utm_campaign having count(*) > 100);' % (start_time)
 163+ cur.execute(s1)
 164+ cur.execute(s2)
 165+
 166+ table_data = []
 167+ sql_stmnt = mh.read_sql(sql_path + query_type + '.sql');
 168+
 169+ # open the html file for writing
 170+ f = open(html_path + query_type + '.html', 'w')
 171+
 172+ format_start_time = start_time[0:4] + '-' + start_time[4:6] + '-' + start_time[6:8] + '-' + start_time[8:10] + 'HRs'
 173+
 174+ # Formats the statement according to query type
 175+ select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time])
 176+
 177+ # html formatting
 178+ if query_type == 'report_campaign_ecomm':
 179+ f.write('<br>Donation data since ' + format_start_time + ' ... <br><br>')
 180+ select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time])
 181+
 182+ elif query_type == 'report_campaign_logs':
 183+ f.write('<br>Impression and landing page data since ' + format_start_time+ ' ... <br><br>')
 184+ select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time])
 185+
 186+ elif query_type == 'report_campaign_ecomm_by_hr':
 187+ f.write('<br>Donation data by hour since ' + format_start_time + ' ... <br><br>')
 188+ select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time])
 189+
 190+ elif query_type == 'report_campaign_logs_by_hr':
 191+ f.write('<br>Impression and landing page by hour since ' + format_start_time + ' ... <br><br>')
 192+ select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time])
 193+
 194+ else:
 195+ select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time])
 196+
 197+ try:
 198+ err_msg = select_stmnt
 199+ cur.execute(select_stmnt)
 200+
 201+ results = cur.fetchall()
 202+
 203+ for row in results:
 204+ cpRow = listify(row)
 205+ # t.rows.append(row)
 206+ table_data.append(cpRow)
 207+
 208+ except:
 209+ db.rollback()
 210+ sys.exit("Database Interface Exception:\n" + err_msg)
 211+
 212+
 213+ t = HTML.table(table_data, header_row=header)
 214+ htmlcode = str(t)
 215+
 216+ f.write(htmlcode)
 217+ f.close()
 218+
 219+ return htmlcode
 220+
 221+ """
 222+ return 0
 223+
 224+
 225+
 226+ """
 227+
 228+ The access point of DataReporting and derived objects. Will be used for executing and orchestrating the creation of plots, tables etc.
 229+ To be overloaded by subclasses
 230+
 231+ INPUT:
 232+
 233+ RETURN:
 234+ return_status - integer, 0 indicates un-exceptional execution
 235+
 236+ """
 237+ def run(self):
 238+ return
 239+
 240+
 241+
 242+"""
 243+
 244+CLASS :: ^TotalAmountsReporting^
 245+
 246+This subclass handles reporting on total amounts for the fundraiser.
 247+
 248+"""
 249+
 250+class TotalAmountsReporting(DataReporting):
 251+
 252+ def __init__(self):
 253+ self.data = []
 254+
 255+ def run_query(self, start_time, end_time, query_name, descriptor):
 256+
 257+ self.init_db()
 258+
 259+ # Load the SQL File & Format
 260+ filename = self._sql_path_ + query_name + '.sql'
 261+ sql_stmnt = mh.read_sql(filename)
 262+ sql_stmnt = QD.format_query(query_name + descriptor, sql_stmnt, [start_time, end_time])
 263+
 264+ labels = [None] * 21
 265+ labels[0] = 'clicks'
 266+ labels[1] = 'donations'
 267+ labels[2] = 'total amount'
 268+ labels[3] = 'banner amount'
 269+ labels[4] = 'US amount'
 270+ labels[5] = 'EN amount'
 271+ labels[6] = 'Other Amount'
 272+ labels[7] = 'Email Amount'
 273+ labels[8] = 'Recurring Guess'
 274+ labels[9] = 'completion_rate'
 275+ labels[10] = 'pp_clicks'
 276+ labels[11] = 'pp_donations'
 277+ labels[12] = 'pp_completion'
 278+ labels[13] = 'pp_amount'
 279+ labels[14] = 'pp_max_amount'
 280+ labels[15] = 'cc_clicks'
 281+ labels[16] = 'cc_donations'
 282+ labels[17] = 'cc_completion'
 283+ labels[18] = 'cc_amount'
 284+ labels[19] = 'cc_max_amount'
 285+ labels[20] = 'total_amt50'
 286+
 287+
 288+ num_keys = len(labels)
 289+
 290+ lists = list()
 291+ for i in range(num_keys):
 292+ lists.append(list())
 293+
 294+ # Composes the data for each banner
 295+ try:
 296+ err_msg = sql_stmnt
 297+ self.cur.execute(sql_stmnt)
 298+
 299+ # This query store records according to dates
 300+ results = self.cur.fetchall()
 301+ for row in results:
 302+ for i in range(num_keys):
 303+ lists[i].append(row[i+1])
 304+
 305+ except:
 306+ self.db.rollback()
 307+ sys.exit("Database Interface Exception:\n" + err_msg)
 308+
 309+ self.close_db()
 310+
 311+ # Only interested in amounts
 312+ return [labels, lists]
 313+
 314+
 315+
 316+ def gen_plot(self,x, y_lists, labels, title, xlabel, ylabel, ranges, subplot_index, fname):
 317+ pylab.subplot(subplot_index)
 318+ num_keys = len(y_lists)
 319+
 320+ pylab.figure(num=None,figsize=[26,14])
 321+ line_types = ['b-o','g-o','r-o','c-o','m-o','k-o','b--o','g--o','r--o','c--o','m--o','k--o']
 322+
 323+ for i in range(num_keys):
 324+ pylab.plot(x, y_lists[i], line_types[i])
 325+
 326+ pylab.grid()
 327+ pylab.xlim(ranges[0], ranges[1])
 328+ pylab.legend(labels,loc=2)
 329+
 330+ pylab.xlabel(xlabel)
 331+ pylab.ylabel(ylabel)
 332+
 333+ pylab.title(title)
 334+ pylab.savefig(fname+'.png', format='png')
 335+
 336+
 337+
 338+ def run_hr(self, type):
 339+
 340+
 341+ # Current date & time
 342+ now = datetime.datetime.now()
 343+ #UTC = 8
 344+ #delta = datetime.timedelta(hours=UTC)
 345+ #now = now + delta
 346+
 347+
 348+ """ ESTABLISH THE START TIME TO PULL ANALYTICS - TS format=1, TS resolution=1 """
 349+ hours_back = 24
 350+ times = self.gen_date_strings(now, hours_back,1,1)
 351+
 352+ start_time = times[0]
 353+ end_time = times[1]
 354+
 355+ print '\nGenerating analytics total amount for ' + str(hours_back) + ' hours back. The start and end times are: ' + start_time + ' - ' + end_time +' ... \n'
 356+
 357+ # QUERY NAME
 358+ query_name = 'report_total_amounts'
 359+
 360+
 361+ # RUN BY HOUR
 362+ descriptor = '_by_hr'
 363+ return_val = self.run_query(start_time, end_time, query_name, descriptor)
 364+
 365+ labels = return_val[0] # curve labels
 366+ counts = return_val[1] # curve data - lists
 367+
 368+ r = self.get_query_fields(labels, counts, type, start_time, end_time)
 369+ labels = r[0]
 370+ counts = r[1]
 371+ title = r[2]
 372+ ylabel = r[3]
 373+
 374+ xlabel = 'Time - Hours'
 375+ subplot_index = 111
 376+
 377+ # plot the curves
 378+ time_range = range(len(counts[0]))
 379+ for i in range(len(counts[0])):
 380+ time_range[i] = time_range[i] - len(counts[0])
 381+
 382+ ranges = [min(time_range), max(time_range)]
 383+
 384+ fname = query_name + descriptor + '_' + type
 385+ self.gen_plot(time_range, counts, labels, title, xlabel, ylabel, ranges, subplot_index, fname)
 386+
 387+
 388+
 389+ def run_day(self,type):
 390+
 391+ # Current date & time
 392+ now = datetime.datetime.now()
 393+ #UTC = 8
 394+ #delta = datetime.timedelta(hours=UTC)
 395+ #now = now + delta
 396+
 397+
 398+ """ ESTABLISH THE START TIME TO PULL ANALYTICS - TS format=1, TS resolution=0 """
 399+ hours_back = 7 * 24 # 7 days back
 400+ times = self.gen_date_strings(now, hours_back,1,0)
 401+
 402+ start_time = times[0]
 403+ end_time = times[1]
 404+
 405+ print '\nGenerating analytics total amount for ' + str(days_back) + ' days back. The start and end times are: ' + start_time + ' - ' + end_time +' ... \n'
 406+
 407+
 408+ # FORMAT HEADERS & QUERY NAME
 409+ query_name = 'report_total_amounts'
 410+ descriptor = '_by_day'
 411+ return_val = self.run_query(start_time, end_time, query_name, descriptor)
 412+
 413+ labels = return_val[0]
 414+ counts = return_val[1]
 415+
 416+ r = self.get_query_fields(labels, counts, type, start_time, end_time)
 417+ labels = r[0]
 418+ counts = r[1]
 419+ title = r[2]
 420+ ylabel = r[3]
 421+
 422+ xlabel = 'Time - Days'
 423+ subplot_index = 111
 424+
 425+ # Plot values
 426+ time_range = range(len(counts[0]))
 427+ for i in range(len(counts[0])):
 428+ time_range[i] = time_range[i] - len(counts[0])
 429+
 430+ ranges = [min(time_range), max(time_range)]
 431+
 432+ fname = query_name + descriptor + '_' + type
 433+ self.gen_plot(time_range, counts, labels, title, xlabel, ylabel, ranges, subplot_index, fname)
 434+
 435+
 436+ def get_query_fields(self, labels, counts, type, start_time, end_time):
 437+
 438+ if type == 'BAN_EM':
 439+ indices = range(2,9)
 440+ title = 'Total Amounts: ' + start_time + ' -- ' + end_time
 441+ ylabel = 'Amount'
 442+ elif type == 'CC_PP_completion':
 443+ indices = [12,17]
 444+ title = 'Credit Card & Paypal Completion Rates: ' + start_time + ' -- ' + end_time
 445+ ylabel = 'Rate'
 446+ elif type == 'CC_PP_amount':
 447+ indices = [13,18]
 448+ title = 'Credit Card & Paypal Total Amounts: ' + start_time + ' -- ' + end_time
 449+ ylabel = 'Amount'
 450+ elif type == 'AMT_VS_AMT50':
 451+ indices = [2,20]
 452+ title = 'Amount50 and Amount Totals: ' + start_time + ' -- ' + end_time
 453+ ylabel = 'Amount'
 454+ else:
 455+ sys.exit("Total Amounts: You must enter a valid report type.\n" )
 456+
 457+ # Exract relevant labels and values
 458+ labels_temp = list()
 459+ counts_temp = list()
 460+
 461+ for i in range(len(labels)):
 462+ if i in indices:
 463+ labels_temp.append(labels[i])
 464+ counts_temp.append(counts[i])
 465+
 466+ return [labels_temp, counts_temp, title, ylabel]
 467+
 468+
 469+"""
 470+
 471+CLASS :: ^BannerLPReporting^
 472+
 473+This subclass handles reporting on banners and landing pages for the fundraiser.
 474+
 475+"""
 476+
 477+class BannerLPReporting(DataReporting):
 478+
 479+
 480+ def __init__(self, *args):
 481+
 482+ if len(args) == 2:
 483+ self.campaign = args[0]
 484+ self.start_time = args[1]
 485+ else:
 486+ self.campaign = None
 487+ self.start_time = None
 488+
 489+ def run_query(self,start_time, end_time, campaign, query_name, metric_name):
 490+
 491+ self.init_db()
 492+
 493+ metric_lists = mh.AutoVivification()
 494+ time_lists = mh.AutoVivification()
 495+ # table_data = [] # store the results in a table for reporting
 496+
 497+ # Load the SQL File & Format
 498+ filename = self._sql_path_ + query_name + '.sql'
 499+ sql_stmnt = mh.read_sql(filename)
 500+
 501+ query_name = 'report_bannerLP_metrics' # rename query to work with query store
 502+ sql_stmnt = QD.format_query(query_name, sql_stmnt, [start_time, end_time, campaign])
 503+
 504+ key_index = QD.get_banner_index(query_name)
 505+ time_index = QD.get_time_index(query_name)
 506+ metric_index = QD.get_metric_index(query_name, metric_name)
 507+
 508+ # Composes the data for each banner
 509+ try:
 510+ err_msg = sql_stmnt
 511+ self.cur.execute(sql_stmnt)
 512+
 513+ results = self.cur.fetchall()
 514+
 515+ # Compile Table Data
 516+ # cpRow = self.listify(row)
 517+ # table_data.append(cpRow)
 518+
 519+ for row in results:
 520+
 521+ key_name = row[key_index]
 522+
 523+ try:
 524+ metric_lists[key_name].append(row[metric_index])
 525+ time_lists[key_name].append(row[time_index])
 526+ except:
 527+ metric_lists[key_name] = list()
 528+ time_lists[key_name] = list()
 529+
 530+ metric_lists[key_name].append(row[metric_index])
 531+ time_lists[key_name].append(row[time_index])
 532+
 533+ except:
 534+ self.db.rollback()
 535+ sys.exit("Database Interface Exception:\n" + err_msg)
 536+
 537+ """ Convert Times to Integers """
 538+ # Find the earliest date
 539+ max_i = 0
 540+
 541+ for key in time_lists.keys():
 542+ for date_str in time_lists[key]:
 543+ day_int = int(date_str[8:10])
 544+ hr_int = int(date_str[11:13])
 545+ date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13])
 546+ if date_int > max_i:
 547+ max_i = date_int
 548+ max_day = day_int
 549+ max_hr = hr_int
 550+
 551+
 552+ # Normalize dates
 553+ time_norm = mh.AutoVivification()
 554+ for key in time_lists.keys():
 555+ for date_str in time_lists[key]:
 556+ day = int(date_str[8:10])
 557+ hr = int(date_str[11:13])
 558+ # date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13])
 559+ elem = (day - max_day) * 24 + (hr - max_hr)
 560+ try:
 561+ time_norm[key].append(elem)
 562+ except:
 563+ time_norm[key] = list()
 564+ time_norm[key].append(elem)
 565+
 566+ # smooth out the values
 567+ #window_length = 20
 568+ #for banner in metric_lists.keys():
 569+ # metric_lists[banner] = smooth(metric_lists[banner], window_length)
 570+
 571+ self.close_db()
 572+
 573+ # return [metric_lists, time_norm, table_data]
 574+ return [metric_lists, time_norm]
 575+
 576+
 577+ def gen_plot(self,counts, times, title, xlabel, ylabel, ranges, subplot_index, fname):
 578+ pylab.subplot(subplot_index)
 579+ pylab.figure(num=None,figsize=[26,14])
 580+ count_keys = counts.keys()
 581+
 582+ line_types = ['b-o','g-o','r-o','c-o','m-o','k-o','y-o','b--d','g--d','r--d','c--d','m--d','k--d','y--d','b-.s','g-.s','r-.s','c-.s','m-.s','k-.s','y-.s']
 583+
 584+ count = 0
 585+ for key in counts.keys():
 586+ pylab.plot(times[key], counts[key], line_types[count])
 587+ count = count + 1
 588+
 589+ pylab.grid()
 590+ pylab.xlim(ranges[0], ranges[1])
 591+ pylab.legend(count_keys,loc=2)
 592+
 593+ pylab.xlabel(xlabel)
 594+ pylab.ylabel(ylabel)
 595+
 596+ pylab.title(title)
 597+ pylab.savefig(fname, format='png')
 598+
 599+
 600+ """
 601+
 602+ type = 'LP' || 'BAN' || 'BAN-TEST' || 'LP-TEST'
 603+
 604+ """
 605+ def run(self, type, metric_name):
 606+
 607+ # Current date & time
 608+ now = datetime.datetime.now()
 609+ #UTC = 8
 610+ #delta = datetime.timedelta(hours=UTC)
 611+ #now = now + delta
 612+
 613+ """ ESTABLISH THE START TIME TO PULL ANALYTICS - TS format=1, TS resolution=1 """
 614+ hours_back = 24
 615+ times = self.gen_date_strings(now, hours_back,1,1)
 616+
 617+ start_time = times[0]
 618+ end_time = times[1]
 619+
 620+ print '\nGenerating ' + type +' for ' + str(hours_back) + ' hours back. The start and end times are: ' + start_time + ' - ' + end_time +' ... \n'
 621+
 622+ if type == 'LP':
 623+ query_name = 'report_LP_metrics'
 624+
 625+ # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign
 626+ if self.campaign == None:
 627+ campaign = '[0-9](JA|SA|EA|TY)[0-9]'
 628+ else:
 629+ campaign = self.campaign
 630+
 631+ title = metric_name + ': ' + start_time + ' -- ' + end_time
 632+ fname = query_name + '_' + metric_name + '.png'
 633+ elif type == 'BAN':
 634+ query_name = 'report_banner_metrics'
 635+
 636+ # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign
 637+ if self.campaign == None:
 638+ campaign = '[0-9](JA|SA|EA|TY)[0-9]'
 639+ else:
 640+ campaign = self.campaign
 641+
 642+ title = metric_name + ': ' + start_time + ' -- ' + end_time
 643+ fname = query_name + '_' + metric_name + '.png'
 644+ elif type == 'BAN-TEST':
 645+ r = self.get_latest_campaign()
 646+ query_name = 'report_banner_metrics'
 647+
 648+ # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign
 649+ if self.campaign == None:
 650+ campaign = r[0]
 651+ start_time = r[1]
 652+ else:
 653+ campaign = self.campaign
 654+ start_time = self.start_time
 655+
 656+ title = metric_name + ': ' + start_time + ' -- ' + end_time + ', CAMPAIGN =' + campaign
 657+ fname = query_name + '_' + metric_name + '_latest' + '.png'
 658+ elif type == 'LP-TEST':
 659+ r = self.get_latest_campaign()
 660+ query_name = 'report_LP_metrics'
 661+
 662+ # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign
 663+ if self.campaign == None:
 664+ campaign = r[0]
 665+ start_time = r[1]
 666+ else:
 667+ campaign = self.campaign
 668+ start_time = self.start_time
 669+
 670+ title = metric_name + ': ' + start_time + ' -- ' + end_time + ', CAMPAIGN =' + campaign
 671+ fname = query_name + '_' + metric_name + '_latest' + '.png'
 672+ else:
 673+ sys.exit("Invalid type name - must be 'LP' or 'BAN'.")
 674+
 675+ return_val = self.run_query(start_time, end_time, campaign, query_name, metric_name)
 676+ metrics = return_val[0]
 677+ times = return_val[1]
 678+
 679+ # title = metric_name + ': ' + start_time + ' -- ' + end_time
 680+ xlabel = 'Time - Hours'
 681+ ylabel = metric_name
 682+ subplot_index = 111
 683+
 684+ min_time = 99
 685+ for key in times.keys():
 686+ min_elem = min(times[key])
 687+ if min_elem < min_time:
 688+ min_time = min_elem
 689+
 690+ ranges = [min_time, 0]
 691+
 692+ self.gen_plot(metrics, times, title, xlabel, ylabel, ranges, subplot_index, fname)
 693+
 694+ return [metrics, times]
 695+
 696+
 697+ def get_latest_campaign(self):
 698+
 699+ query_name = 'report_latest_campaign'
 700+ self.init_db()
 701+
 702+ """ Look at campaigns over the past 24 hours - TS format=1, TS resolution=1 """
 703+ now = datetime.datetime.now()
 704+ hours_back = 72
 705+ times = self.gen_date_strings(now, hours_back,1,1)
 706+
 707+ sql_stmnt = mh.read_sql('./sql/report_latest_campaign.sql')
 708+ sql_stmnt = QD.format_query(query_name, sql_stmnt, [times[0]])
 709+
 710+ campaign_index = QD.get_campaign_index(query_name)
 711+ time_index = QD.get_time_index(query_name)
 712+
 713+ try:
 714+ err_msg = sql_stmnt
 715+ self.cur.execute(sql_stmnt)
 716+
 717+ row = self.cur.fetchone()
 718+ except:
 719+ self.db.rollback()
 720+ sys.exit("Database Interface Exception:\n" + err_msg)
 721+
 722+ campaign = row[campaign_index]
 723+ timestamp = row[time_index]
 724+
 725+ self.close_db()
 726+
 727+ return [campaign, timestamp]
 728+
 729+ """
 730+
 731+ Takes as input and converts it to a set of hours counting back from 0
 732+
 733+ time_lists - a dictionary of timestamp lists
 734+ time_norm - a dictionary of normalized times
 735+
 736+ """
 737+ def normalize_timestamps(self, time_lists):
 738+ # Find the earliest date
 739+ max_i = 0
 740+
 741+ for key in time_lists.keys():
 742+ for date_str in time_lists[key]:
 743+ day_int = int(date_str[8:10])
 744+ hr_int = int(date_str[11:13])
 745+ date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13])
 746+ if date_int > max_i:
 747+ max_i = date_int
 748+ max_day = day_int
 749+ max_hr = hr_int
 750+
 751+
 752+ # Normalize dates
 753+ time_norm = mh.AutoVivification()
 754+ for key in time_lists.keys():
 755+ for date_str in time_lists[key]:
 756+ day = int(date_str[8:10])
 757+ hr = int(date_str[11:13])
 758+ # date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13])
 759+ elem = (day - max_day) * 24 + (hr - max_hr)
 760+ try:
 761+ time_norm[key].append(elem)
 762+ except:
 763+ time_norm[key] = list()
 764+ time_norm[key].append(elem)
 765+
 766+ return time_norm
 767+
 768+"""
 769+
 770+CLASS :: ^MinerReporting^
 771+
 772+This subclass handles reporting on raw values imported into the database.
 773+
 774+"""
 775+
 776+class MinerReporting(DataReporting):
 777+
 778+ def run_query(self, start_time, end_time, query_name):
 779+
 780+ self.init_db()
 781+
 782+ counts = list()
 783+ times = list()
 784+
 785+ # Load the SQL File & Format
 786+ filename = self._sql_path_+ query_name + '.sql'
 787+ sql_stmnt = mh.read_sql(filename)
 788+
 789+ sql_stmnt = QD.format_query(query_name, sql_stmnt, [start_time, end_time])
 790+ #print sql_stmnt
 791+
 792+ # Get Indexes into Query
 793+ count_index = QD.get_count_index(query_name)
 794+ time_index = QD.get_time_index(query_name)
 795+
 796+ # Composes the data for each banner
 797+ try:
 798+ err_msg = sql_stmnt
 799+ self.cur.execute(sql_stmnt)
 800+
 801+ results = self.cur.fetchall()
 802+
 803+ for row in results:
 804+ counts.append(row[count_index])
 805+ times.append(row[time_index])
 806+
 807+ except:
 808+ self.db.rollback()
 809+ sys.exit("Database Interface Exception:\n" + err_msg)
 810+
 811+ """ Convert Times to Integers """
 812+ time_norm = self.normalize_timestamps(times)
 813+
 814+
 815+ self.close_db()
 816+
 817+ return [counts, time_norm]
 818+
 819+
 820+ # Create histograms for hourly counts
 821+
 822+ def gen_plot(self,counts, times, title, xlabel, ylabel, ranges, subplot_index, fname):
 823+
 824+ pylab.subplot(subplot_index)
 825+ pylab.figure(num=None,figsize=[26,14])
 826+
 827+ # pylab.plot(times, counts)
 828+ # pylab.hist(counts, times)
 829+ pylab.bar(times, counts, width=0.5)
 830+
 831+ pylab.grid()
 832+ pylab.xlim(ranges[0], ranges[1])
 833+
 834+ pylab.xlabel(xlabel)
 835+ pylab.ylabel(ylabel)
 836+
 837+ pylab.title(title)
 838+ pylab.savefig(fname, format='png')
 839+
 840+ def run(self, query_name):
 841+
 842+ # Current date & time
 843+ now = datetime.datetime.now()
 844+ #UTC = 8
 845+ #delta = datetime.timedelta(hours=UTC)
 846+ #now = now + delta
 847+
 848+ """ ESTABLISH THE START TIME TO PULL ANALYTICS - TS format=1, TS resolution=1 """
 849+ hours_back = 24
 850+ times = self.gen_date_strings_hr(now, hours_back,1,1)
 851+
 852+ start_time = times[0]
 853+ end_time = times[1]
 854+
 855+ print '\nGenerating ' + query_name +', start and end times are: ' + start_time + ' - ' + end_time +' ... \n'
 856+
 857+ # Run Query
 858+ return_val = self.run_query(start_time, end_time, query_name)
 859+ counts = return_val[0]
 860+ times = return_val[1]
 861+
 862+ # Normalize times
 863+ min_time = min(times)
 864+ ranges = [min_time, 0]
 865+
 866+ xlabel = 'Hours'
 867+ subplot_index = 111
 868+ fname = query_name + '.png'
 869+
 870+ title = QD.get_plot_title(query_name)
 871+ title = title + ' -- ' + start_time + ' - ' + end_time
 872+ ylabel = QD.get_plot_ylabel(query_name)
 873+
 874+ # Convert counts to float (from Decimal) to prevent exception when bar plotting
 875+ # Bbox::update_numerix_xy expected numerix array
 876+ counts_new = list()
 877+ for i in range(len(counts)):
 878+ counts_new.append(float(counts[i]))
 879+ counts = counts_new
 880+
 881+ # Generate Histogram
 882+ self.gen_plot(counts, times, title, xlabel, ylabel, ranges, subplot_index, fname)
 883+
 884+
 885+"""
 886+
 887+CLASS :: IntervalReporting
 888+
 889+Performs queries that take timestamps, query, and an interval as arguments. Data for a single metric
 890+is generated for each time interval in the time period defined by the start and end timestamps.
 891+
 892+Types of queries supported:
 893+
 894+report_banner_metrics_minutely
 895+report_LP_metrics_minutely
 896+
 897+"""
 898+
 899+class IntervalReporting(DataReporting):
 900+
 901+ """
 902+ """
 903+ def __init__(self):
 904+ self._data_loader_ = DL.IntervalReportingLoader()
 905+
 906+ """
 907+ """
 908+ def usage(self):
 909+
 910+ print 'Types of queries:'
 911+ print ' (1) banner'
 912+ print ' (2) LP'
 913+ print ''
 914+ print 'e.g.'
 915+ print " run('20101230160400', '20101230165400', 2, 'banner', 'imp', '20101230JA091_US')"
 916+ print " run('20101230160400', '20101230165400', 2, 'LP', 'views', '20101230JA091_US')"
 917+ print ''
 918+
 919+ return
 920+
 921+ """
 922+ Execute reporting query and generate plots
 923+ """
 924+ def gen_plot(self, metrics, times, title, xlabel, ylabel, ranges, subplot_index, fname):
 925+
 926+ pylab.subplot(subplot_index)
 927+ pylab.figure(num=None,figsize=[26,14])
 928+
 929+ line_types = ['b-o','g-o','r-o','c-o','m-o','k-o','y-o','b--d','g--d','r--d','c--d','m--d','k--d','y--d','b-.s','g-.s','r-.s','c-.s','m-.s','k-.s','y-.s']
 930+
 931+ count = 0
 932+ for key in metrics.keys():
 933+ pylab.step(times[key], metrics[key], line_types[count])
 934+ count = count + 1
 935+
 936+ pylab.grid()
 937+ pylab.xlim(ranges[0], ranges[1])
 938+ pylab.ylim(ranges[2], ranges[3])
 939+ pylab.legend(metrics.keys(),loc=2)
 940+
 941+ pylab.xlabel(xlabel)
 942+ pylab.ylabel(ylabel)
 943+
 944+ pylab.title(title)
 945+ pylab.savefig(fname, format='png')
 946+
 947+
 948+ """
 949+ Execute reporting query and generate plots
 950+ """
 951+ def run(self, start_time, end_time, interval, query_type, metric_name, campaign):
 952+
 953+ print '\nGenerating ' + query_type +', start and end times are: ' + start_time + ' - ' + end_time +' ... \n'
 954+
 955+ """ Execute the query that generates interval reporting data """
 956+ return_val = self._data_loader_.run_query(start_time, end_time, interval, query_type, metric_name, campaign)
 957+ counts = return_val[0]
 958+ times = return_val[1]
 959+
 960+ """ Convert Times to Integers that indicate relative times AND normalize the intervals in case any are missing """
 961+ for key in times.keys():
 962+ times[key] = TP.normalize_timestamps(times[key], False, 2)
 963+ times[key], counts[key] = TP.normalize_intervals(times[key], counts[key], interval)
 964+
 965+ # Normalize times
 966+ min_time = min(times)
 967+ ranges = [min_time, 0]
 968+
 969+ xlabel = 'MINUTES'
 970+ subplot_index = 111
 971+ fname = campaign + ' ' + query_type + ' ' + metric_name + '.png'
 972+
 973+ metric_full_name = QD.get_metric_full_name(metric_name)
 974+ title = campaign + ': ' + metric_full_name + ' -- ' + start_time + ' - ' + end_time
 975+ ylabel = metric_full_name
 976+
 977+ """ Determine List maximums """
 978+ times_max = 0
 979+ metrics_max = 0
 980+
 981+ for key in counts.keys():
 982+ list_max = max(counts[key])
 983+ if list_max > metrics_max:
 984+ metrics_max = list_max
 985+
 986+ for key in times.keys():
 987+ list_max = max(times[key])
 988+ if list_max > times_max:
 989+ times_max = list_max
 990+
 991+ ranges = list()
 992+ ranges.append(0.0)
 993+ ranges.append(times_max * 1.1)
 994+ ranges.append(0.0)
 995+ ranges.append(metrics_max * 1.1)
 996+
 997+ """ Generate plots given data """
 998+ self.gen_plot(counts, times, title, xlabel, ylabel, ranges, subplot_index, fname)
 999+
 1000+
Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/TimestampProcessor.py
@@ -1,10 +1,39 @@
22
33 """
44
 5+This module effectively functions as a Singleton class.
 6+
57 TimestampProcesser module is used to provide definitions for dealing with date and time
68 objects. This is primarily used to handle
79
 10+TimestampProcesser facilitates the processing of timestamps used in the CiviCRM and "faulkner" mySQL
 11+databases. This includes mapping among timestamp formats and converting those formats to indexed
 12+lists and dictionaries.
813
 14+Examples of format definitions:
 15+
 16+ format 1 - 20080101000606
 17+ format 2 - 2008-01-01 00:06:06
 18+
 19+Examples of resolution definitions:
 20+
 21+ resolution 0 - xxxx-xx-xx 00:00:00
 22+ resolution 1 - xxxx-xx-xx xx:00:00
 23+ resolution 2 - xxxx-xx-xx xx:xx:00
 24+ resolution 3 - xxxx-xx-xx xx:xx:xx
 25+
 26+METHODS:
 27+
 28+ normalize_timestamps - Takes a list of timestamps as input and converts it to a set of days, hours, or minutes counting back from 0
 29+ timestamps_to_dict - Convert lists into dictionaries before processing it is assumed that lists are composed of only simple types
 30+ find_latest_date_in_list - Find the latest time stamp in a list
 31+ find_earliest_date_in_list - Find the earliest time stamp in a list
 32+ gen_date_strings - Given a datetime object produce a timestamp a number of hours in the past and according to a particular format
 33+ timestamp_from_obj - Convert datetime objects to a timestamp of a given format.
 34+ timestamp_to_obj - Convert timestamp to a datetime object of a given format
 35+ normalize_intervals - Inserts missing interval points into the time and metric lists
 36+ timestamp_convert_format - Converts from one timestamp format to another timestamp format
 37+
938 """
1039
1140 __author__ = "Ryan Faulkner"
@@ -15,402 +44,361 @@
1645 import sys
1746 sys.path.append('../')
1847
19 -import matplotlib
2048 import datetime
21 -import MySQLdb
22 -import pylab
23 -import HTML
2449 import math
25 -
2650 import miner_help as mh
2751
28 -matplotlib.use('Agg')
29 -
30 -
31 -
 52+
3253 """
 54+
 55+ Takes a list of timestamps as input and converts it to a set of days, hours, or minutes counting back from 0
 56+
 57+ INPUT:
 58+ time_lists - a list of datetime objects
 59+ count_back - indicate whether the list counts back from the end
 60+ time_unit - an integer indicating what unit to measure time in (0 = day, 1 = hour, 2 = minute)
 61+
 62+ RETURN:
 63+ time_norm - a dictionary of normalized times
 64+
 65+"""
 66+def normalize_timestamps(time_lists, count_back, time_unit):
 67+
 68+ time_lists, isList = timestamps_to_dict(time_lists)
 69+
 70+ """ Depending on args set the start date """
 71+ if count_back:
 72+ start_date_obj = find_latest_date_in_list(time_lists)
 73+ else:
 74+ start_date_obj = find_earliest_date_in_list(time_lists)
 75+
 76+ start_day = start_date_obj.day
 77+ start_hr = start_date_obj.hour
 78+ start_mte = start_date_obj.minute
 79+
 80+ # Normalize dates
 81+ time_norm = mh.AutoVivification()
 82+ for key in time_lists.keys():
 83+ for date_obj in time_lists[key]:
 84+
 85+ day = date_obj.day
 86+ hr = date_obj.hour
 87+ mte = date_obj.minute
 88+
 89+ if time_unit == 0:
 90+ elem = (day - start_day)
 91+ elif time_unit == 1:
 92+ elem = (day - start_day) * 24 + (hr - start_hr)
 93+ elif time_unit == 2:
 94+ elem = (day - start_day) * 24 * 60 + (hr - start_hr) * 60 + (mte - start_mte)
 95+
 96+ try:
 97+ time_norm[key].append(elem)
 98+ except:
 99+ time_norm[key] = list()
 100+ time_norm[key].append(elem)
 101+
 102+ """ If the original argument was a list put it back in that form """
 103+ if isList:
 104+ time_norm = time_norm[key]
 105+
 106+ return time_norm
 107+
 108+
 109+"""
33110
34 - TimestampProcesser facilitates the processing of timestamps used in the CiviCRM and "faulkner" mySQL
35 - databases. This includes mapping among timestamp formats and converting those formats to indexed
36 - lists and dictionaries.
 111+ HELPER METHOD for normalize_timestamps. Convert lists into dictionaries before processing it is assumed that lists
 112+ are composed of only simple types
37113
38 - Examples of format definitions:
39 -
40 - format 1 - 20080101000606
41 - format 2 - 2008-01-01 00:06:06
 114+ INPUT:
 115+ time_lists - a list of datetime objects
42116
43 - Examples of resolution definitions:
44 -
45 - resolution 0 - xxxx-xx-xx 00:00:00
46 - resolution 1 - xxxx-xx-xx xx:00:00
47 - resolution 2 - xxxx-xx-xx xx:xx:00
48 - resolution 3 - xxxx-xx-xx xx:xx:xx
49 -
50 - METHODS:
51 -
52 - normalize_timestamps - Takes a list of timestamps as input and converts it to a set of days, hours, or minutes counting back from 0
53 - timestamps_to_dict - Convert lists into dictionaries before processing it is assumed that lists are composed of only simple types
54 - find_latest_date_in_list - Find the latest time stamp in a list
55 - find_earliest_date_in_list - Find the earliest time stamp in a list
56 - gen_date_strings - Given a datetime object produce a timestamp a number of hours in the past and according to a particular format
57 - timestamp_from_obj - Convert datetime objects to a timestamp of a given format.
58 - timestamp_to_obj - Convert timestamp to a datetime object of a given format
59 - normalize_intervals - Inserts missing interval points into the time and metric lists
60 - timestamp_convert_format - Converts from one timestamp format to another timestamp format
61 -
 117+ RETURN:
 118+ time_lists - dictionary with a single key 'key' that stores the list
 119+ isList - a dictionary of normalized times
 120+
62121 """
63 -class TimestampProcesser(object):
 122+def timestamps_to_dict(time_lists):
64123
65 - """
66 -
67 - Takes a list of timestamps as input and converts it to a set of days, hours, or minutes counting back from 0
 124+ isList = 0
 125+ if type(time_lists) is list:
 126+ isList = 1
68127
69 - INPUT:
70 - time_lists - a list of datetime objects
71 - count_back - indicate whether the list counts back from the end
72 - time_unit - an integer indicating what unit to measure time in (0 = day, 1 = hour, 2 = minute)
 128+ old_list = time_lists
 129+ time_lists = mh.AutoVivification()
73130
74 - RETURN:
75 - time_norm - a dictionary of normalized times
76 -
77 - """
78 - def normalize_timestamps(self, time_lists, count_back, time_unit):
 131+ key = 'key'
 132+ time_lists[key] = list()
79133
80 - time_lists, isList = self.timestamps_to_dict(time_lists)
 134+ for i in range(len(old_list)):
 135+ time_lists[key].append(old_list[i])
 136+
 137+ return [time_lists, isList]
 138+
 139+
 140+"""
 141+
 142+ HELPER METHOD for normalize_timestamps. Find the latest time stamp in a list
 143+
 144+ INPUT:
 145+ time_lists - a list of datetime objects
81146
82 - """ Depending on args set the start date """
83 - if count_back:
84 - start_date_obj = self.find_latest_date_in_list(time_lists)
85 - else:
86 - start_date_obj = self.find_earliest_date_in_list(time_lists)
 147+ RETURN:
 148+ date_max - datetime object of the latest date in the list
87149
88 - start_day = start_date_obj.day
89 - start_hr = start_date_obj.hour
90 - start_mte = start_date_obj.minute
91 -
92 - # Normalize dates
93 - time_norm = mh.AutoVivification()
94 - for key in time_lists.keys():
95 - for date_obj in time_lists[key]:
 150+"""
 151+def find_latest_date_in_list(time_lists):
 152+
 153+ date_max = datetime.datetime(1000,1,1,0,0,0)
 154+
 155+ for key in time_lists.keys():
 156+ for date_obj in time_lists[key]:
 157+ if date_int > date_min:
 158+ date_min = date_obj
96159
97 - day = date_obj.day
98 - hr = date_obj.hour
99 - mte = date_obj.minute
100 -
101 - if time_unit == 0:
102 - elem = (day - start_day)
103 - elif time_unit == 1:
104 - elem = (day - start_day) * 24 + (hr - start_hr)
105 - elif time_unit == 2:
106 - elem = (day - start_day) * 24 * 60 + (hr - start_hr) * 60 + (mte - start_mte)
107 -
108 - try:
109 - time_norm[key].append(elem)
110 - except:
111 - time_norm[key] = list()
112 - time_norm[key].append(elem)
 160+ return date_max
 161+
 162+"""
 163+
 164+ HELPER METHOD for normalize_timestamps. Find the earliest timestamp in a list
 165+
 166+ INPUT:
 167+ time_lists - a list of datetime objects
113168
114 - """ If the original argument was a list put it back in that form """
115 - if isList:
116 - time_norm = time_norm[key]
117 -
118 - return time_norm
 169+ RETURN:
 170+ date_min - datetime object of the earliest date in the list
119171
 172+"""
 173+def find_earliest_date_in_list(time_lists):
120174
121 - """
 175+ date_min = datetime.datetime(3000,1,1,0,0,0)
122176
123 - HELPER METHOD for normalize_timestamps. Convert lists into dictionaries before processing it is assumed that lists
124 - are composed of only simple types
125 -
126 - INPUT:
127 - time_lists - a list of datetime objects
128 -
129 - RETURN:
130 - time_lists - dictionary with a single key 'key' that stores the list
131 - isList - a dictionary of normalized times
132 -
133 - """
134 - def timestamps_to_dict(self, time_lists):
135 -
136 - isList = 0
137 - if type(time_lists) is list:
138 - isList = 1
139 -
140 - old_list = time_lists
141 - time_lists = mh.AutoVivification()
142 -
143 - key = 'key'
144 - time_lists[key] = list()
145 -
146 - for i in range(len(old_list)):
147 - time_lists[key].append(old_list[i])
 177+ for key in time_lists.keys():
 178+ for date_obj in time_lists[key]:
 179+ if date_obj < date_min:
 180+ date_min = date_obj
 181+
 182+ return date_min
 183+
 184+
 185+"""
 186+
 187+ Given a datetime object produce a timestamp a number of hours in the past and according to a particular format
148188
149 - return [time_lists, isList]
 189+ format 1 - 20080101000606
 190+ format 2 - 2008-01-01 00:06:06
150191
151 -
152 - """
 192+ INPUT:
153193
154 - HELPER METHOD for normalize_timestamps. Find the latest time stamp in a list
155 -
156 - INPUT:
157 - time_lists - a list of datetime objects
158 -
159 - RETURN:
160 - date_max - datetime object of the latest date in the list
161 -
162 - """
163 - def find_latest_date_in_list(self, time_lists):
164 -
165 - date_max = datetime.datetime(1000,1,1,0,0,0)
166 -
167 - for key in time_lists.keys():
168 - for date_obj in time_lists[key]:
169 - if date_int > date_min:
170 - date_min = date_obj
171 -
172 - return date_max
 194+ now - datetime object
 195+ hours_back - the amount of time the
 196+ format - the format of the returned timestamp strings
 197+ resolution - the resolution detail of the timestamp (e.g. down to the minute, down to the hour, ...)
173198
174 - """
175199
176 - HELPER METHOD for normalize_timestamps. Find the earliest timestamp in a list
 200+ RETURN:
 201+ start_time - formatted datetime string
 202+ end_time - formatted datetime string
 203+
 204+"""
 205+def gen_date_strings(time_ref, hours_back, format, resolution):
 206+
 207+ delta = datetime.timedelta(hours=-hours_back)
 208+
 209+ time_obj = time_ref + delta
 210+ time_ref = time_ref + datetime.timedelta(hours=-1) # Move an hour back to terminate at 55 minute
 211+
 212+ # Cast the start and end time strings in the proper format
 213+ start_time = timestamp_from_obj(time_obj, format, resolution)
 214+ end_time = timestamp_from_obj(time_ref, format, resolution)
 215+
 216+ return [start_time, end_time]
 217+
 218+
 219+
 220+"""
 221+
 222+ Convert datetime objects to a timestamp of a given format. HELPER METHOD for gen_date_strings.
177223
178 - INPUT:
179 - time_lists - a list of datetime objects
180 -
181 - RETURN:
182 - date_min - datetime object of the earliest date in the list
183 -
184 - """
185 - def find_earliest_date_in_list(self, time_lists):
186 -
187 - date_min = datetime.datetime(3000,1,1,0,0,0)
188 -
189 - for key in time_lists.keys():
190 - for date_obj in time_lists[key]:
191 - if date_obj < date_min:
192 - date_min = date_obj
193 -
194 - return date_min
195 -
 224+ INPUT:
196225
197 - """
 226+ time_obj - datetime object
 227+ format - the format of the returned timestamp strings
 228+ resolution - the resolution detail of the timestamp (e.g. down to the minute, down to the hour, ...)
198229
199 - Given a datetime object produce a timestamp a number of hours in the past and according to a particular format
 230+
 231+ RETURN:
 232+ start_time - formatted datetime string
 233+ end_time - formatted datetime string
 234+
 235+"""
 236+def timestamp_from_obj(time_obj, format, resolution):
 237+
 238+ if time_obj.month < 10:
 239+ month = '0' + str(time_obj.month)
 240+ else:
 241+ month = str(time_obj.month)
 242+
 243+ if time_obj.day < 10:
 244+ day = '0' + str(time_obj.day)
 245+ else:
 246+ day = str(time_obj.day)
 247+
 248+ if time_obj.hour < 10:
 249+ hour = '0' + str(time_obj.hour)
 250+ else:
 251+ hour = str(time_obj.hour)
200252
201 - format 1 - 20080101000606
202 - format 2 - 2008-01-01 00:06:06
 253+ if time_obj.minute < 10:
 254+ minute = '0' + str(time_obj.minute)
 255+ else:
 256+ minute = str(time_obj.minute)
203257
204 - INPUT:
 258+ if time_obj.second < 10:
 259+ second = '0' + str(time_obj.second)
 260+ else:
 261+ second = str(time_obj.second)
205262
206 - now - datetime object
207 - hours_back - the amount of time the
208 - format - the format of the returned timestamp strings
209 - resolution - the resolution detail of the timestamp (e.g. down to the minute, down to the hour, ...)
 263+ # Cast the start and end time strings in the proper format
 264+ if format == 1:
210265
 266+ if resolution == 0:
 267+ timestamp = str(time_obj.year) + month + day + '000000'
 268+ elif resolution == 1:
 269+ timestamp = str(time_obj.year) + month + day + hour + '0000'
 270+ elif resolution == 2:
 271+ timestamp = str(time_obj.year) + month + day + hour + minute + '00'
 272+ elif resolution == 3:
 273+ timestamp = str(time_obj.year) + month + day + hour + minute + second
 274+
 275+ elif format == 2:
211276
212 - RETURN:
213 - start_time - formatted datetime string
214 - end_time - formatted datetime string
215 -
216 - """
217 - def gen_date_strings(self, time_ref, hours_back, format, resolution):
218 -
219 - delta = datetime.timedelta(hours=-hours_back)
 277+ if resolution == 0:
 278+ timestamp = str(time_obj.year) + '-' + month + '-' + day + ' ' + '00:00:00'
 279+ elif resolution == 1:
 280+ timestamp = str(time_obj.year) + '-' + month + '-' + day + ' ' + hour + ':00:00'
 281+ elif resolution == 2:
 282+ timestamp = str(time_obj.year) + '-' + month + '-' + day + ' ' + hour + ':' + minute + ':00'
 283+ elif resolution == 3:
 284+ timestamp = str(time_obj.year) + '-' + month + '-' + day + ' ' + hour + ':' + minute + ':' + second
 285+
 286+ return timestamp
220287
221 - time_obj = time_ref + delta
222 - time_ref = time_ref + datetime.timedelta(hours=-1) # Move an hour back to terminate at 55 minute
223 -
224 - # Cast the start and end time strings in the proper format
225 - start_time = self.timestamp_from_obj(time_obj, format, resolution)
226 - end_time = self.timestamp_from_obj(time_ref, format, resolution)
227288
228 - return [start_time, end_time]
 289+"""
 290+
 291+ Convert timestamp to a datetime object of a given format
229292
 293+ INPUT:
230294
 295+ timestamp - timestamp string
 296+ format - the format of the returned timestamp strings
231297
232 - """
233298
234 - Convert datetime objects to a timestamp of a given format. HELPER METHOD for gen_date_strings.
235 -
236 - INPUT:
237 -
238 - time_obj - datetime object
239 - format - the format of the returned timestamp strings
240 - resolution - the resolution detail of the timestamp (e.g. down to the minute, down to the hour, ...)
241 -
242 -
243 - RETURN:
244 - start_time - formatted datetime string
245 - end_time - formatted datetime string
 299+ RETURN:
 300+ time_obj - datetime conversion of timestamp string
 301+
 302+"""
 303+def timestamp_to_obj(timestamp, format):
246304
247 - """
248 - def timestamp_from_obj(self, time_obj, format, resolution):
 305+ if format == 1:
 306+ time_obj = datetime.datetime(int(timestamp[0:4]), int(timestamp[4:6]), int(timestamp[6:8]), \
 307+ int(timestamp[8:10]), int(timestamp[10:12]), int(timestamp[12:14]))
249308
250 - if time_obj.month < 10:
251 - month = '0' + str(time_obj.month)
252 - else:
253 - month = str(time_obj.month)
 309+ elif format == 2:
 310+ time_obj = datetime.datetime(int(timestamp[0:4]), int(timestamp[5:7]), int(timestamp[8:10]), \
 311+ int(timestamp[11:13]), int(timestamp[14:16]), int(timestamp[17:19]))
254312
255 - if time_obj.day < 10:
256 - day = '0' + str(time_obj.day)
257 - else:
258 - day = str(time_obj.day)
 313+ return time_obj
259314
260 - if time_obj.hour < 10:
261 - hour = '0' + str(time_obj.hour)
262 - else:
263 - hour = str(time_obj.hour)
264 -
265 - if time_obj.minute < 10:
266 - minute = '0' + str(time_obj.minute)
267 - else:
268 - minute = str(time_obj.minute)
269 -
270 - if time_obj.second < 10:
271 - second = '0' + str(time_obj.second)
272 - else:
273 - second = str(time_obj.second)
274 -
275 - # Cast the start and end time strings in the proper format
276 - if format == 1:
277 -
278 - if resolution == 0:
279 - timestamp = str(time_obj.year) + month + day + '000000'
280 - elif resolution == 1:
281 - timestamp = str(time_obj.year) + month + day + hour + '0000'
282 - elif resolution == 2:
283 - timestamp = str(time_obj.year) + month + day + hour + minute + '00'
284 - elif resolution == 3:
285 - timestamp = str(time_obj.year) + month + day + hour + minute + second
286 -
287 - elif format == 2:
288 -
289 - if resolution == 0:
290 - timestamp = str(time_obj.year) + '-' + month + '-' + day + ' ' + '00:00:00'
291 - elif resolution == 1:
292 - timestamp = str(time_obj.year) + '-' + month + '-' + day + ' ' + hour + ':00:00'
293 - elif resolution == 2:
294 - timestamp = str(time_obj.year) + '-' + month + '-' + day + ' ' + hour + ':' + minute + ':00'
295 - elif resolution == 3:
296 - timestamp = str(time_obj.year) + '-' + month + '-' + day + ' ' + hour + ':' + minute + ':' + second
297 -
298 - return timestamp
 315+
 316+"""
 317+
 318+ Inserts missing interval points into the time and metric lists
299319
 320+ Assumptions:
 321+ _metrics_ and _times_ are lists of the same length
 322+ there must be a data point at each interval
 323+ Some data points may be missed
 324+ where there is no metric data the metric takes on the value 0.0
300325
301 - """
 326+ e.g. when _interval_ = 10
 327+ times = [0 10 30 50], metrics = [1 1 1 1] ==> [0 10 30 40 50], [1 1 0 1 0 1]
302328
303 - Convert timestamp to a datetime object of a given format
304 -
305 - INPUT:
306 -
307 - timestamp - timestamp string
308 - format - the format of the returned timestamp strings
309 -
310 -
311 - RETURN:
312 - time_obj - datetime conversion of timestamp string
313 -
314 - """
315 - def timestamp_to_obj(self, timestamp, format):
316 -
317 - if format == 1:
318 - time_obj = datetime.datetime(int(timestamp[0:4]), int(timestamp[4:6]), int(timestamp[6:8]), \
319 - int(timestamp[8:10]), int(timestamp[10:12]), int(timestamp[12:14]))
320 -
321 - elif format == 2:
322 - time_obj = datetime.datetime(int(timestamp[0:4]), int(timestamp[5:7]), int(timestamp[8:10]), \
323 - int(timestamp[11:13]), int(timestamp[14:16]), int(timestamp[17:19]))
 329+ INPUT:
324330
325 - return time_obj
 331+ times -
 332+ metrics -
 333+ interval -
326334
 335+ RETURN:
 336+ new_times -
 337+ new_metrics -
327338
328 - """
 339+"""
 340+def normalize_intervals(times, metrics, interval):
329341
330 - Inserts missing interval points into the time and metric lists
 342+ current_time = 0.0
 343+ index = 0
 344+ iterations = 0
 345+ max_elems = math.ceil((times[-1] - times[0]) / interval) # there should be no more elements in the list than this
 346+
 347+ new_times = list()
 348+ new_metrics = list()
 349+
 350+ """ Iterate through the time list """
 351+ while index < len(times):
331352
332 - Assumptions:
333 - _metrics_ and _times_ are lists of the same length
334 - there must be a data point at each interval
335 - Some data points may be missed
336 - where there is no metric data the metric takes on the value 0.0
 353+ """ TEMPORARY SOLUTION: break out of the loop if more than the maximum number of elements is reached """
 354+ if iterations > max_elems:
 355+ break;
337356
338 - e.g. when _interval_ = 10
339 - times = [0 10 30 50], metrics = [1 1 1 1] ==> [0 10 30 40 50], [1 1 0 1 0 1]
 357+ new_times.append(current_time)
340358
341 - INPUT:
 359+ """ If the current time is not in the current list then add it and a metric value of 0.0
 360+ otherwise add the existing elements to the new lists """
 361+ if current_time != times[index]:
 362+ new_metrics.append(0.0)
342363
343 - times -
344 - metrics -
345 - interval -
 364+ else:
 365+ new_metrics.append(metrics[index])
 366+ index = index + 1
346367
347 - RETURN:
348 - new_times -
349 - new_metrics -
 368+ current_time = current_time + interval
350369
351 - """
352 - def normalize_intervals(self, times, metrics, interval):
 370+ iterations = iterations + 1
353371
354 - current_time = 0.0
355 - index = 0
356 - iterations = 0
357 - max_elems = math.ceil((times[-1] - times[0]) / interval) # there should be no more elements in the list than this
 372+ return [new_times, new_metrics]
 373+
 374+"""
 375+
 376+ Converts from one timestamp format to another timestamp format
358377
359 - new_times = list()
360 - new_metrics = list()
 378+ format 1 - 20080101000606
 379+ format 2 - 2008-01-01 00:06:06
361380
362 - """ Iterate through the time list """
363 - while index < len(times):
364 -
365 - """ TEMPORARY SOLUTION: break out of the loop if more than the maximum number of elements is reached """
366 - if iterations > max_elems:
367 - break;
368 -
369 - new_times.append(current_time)
370 -
371 - """ If the current time is not in the current list then add it and a metric value of 0.0
372 - otherwise add the existing elements to the new lists """
373 - if current_time != times[index]:
374 - new_metrics.append(0.0)
375 -
376 - else:
377 - new_metrics.append(metrics[index])
378 - index = index + 1
379 -
380 - current_time = current_time + interval
381 -
382 - iterations = iterations + 1
383 -
384 - return [new_times, new_metrics]
 381+ INPUT:
 382+
 383+ ts - timestamp string
 384+ format_from - input format
 385+ format_to - output format
 386+
 387+ RETURN:
 388+
 389+ new_timestamp - new timestamp string
 390+
385391
386 - """
 392+"""
 393+def timestamp_convert_format(ts, format_from, format_to):
387394
388 - Converts from one timestamp format to another timestamp format
389 -
390 - format 1 - 20080101000606
391 - format 2 - 2008-01-01 00:06:06
392 -
393 - INPUT:
 395+ if format_from == 1:
394396
395 - ts - timestamp string
396 - format_from - input format
397 - format_to - output format
398 -
399 - RETURN:
400 -
401 - new_timestamp - new timestamp string
402 -
403 -
404 - """
405 - def timestamp_convert_format(self, ts, format_from, format_to):
406 -
407 - if format_from == 1:
 397+ if format_to == 2:
 398+ new_timestamp = ts[0:4] + '-' + ts[4:6] + '-' + ts[6:8] + ' ' + ts[8:10] + ':' + ts[10:12] + ':' + ts[12:14]
408399
409 - if format_to == 2:
410 - new_timestamp = ts[0:4] + '-' + ts[4:6] + '-' + ts[6:8] + ' ' + ts[8:10] + ':' + ts[10:12] + ':' + ts[12:14]
411 -
412 - elif format_from == 2:
413 - if format_to == 1:
414 - new_timestamp = ts[0:4] + ts[5:7] + ts[8:10] + ts[11:13] + ts[14:16] + ts[15:17]
415 -
416 - return new_timestamp
417 -
 400+ elif format_from == 2:
 401+ if format_to == 1:
 402+ new_timestamp = ts[0:4] + ts[5:7] + ts[8:10] + ts[11:13] + ts[14:16] + ts[15:17]
 403+
 404+ return new_timestamp
 405+

Status & tagging log