Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/fundraiser_reporting.py |
— | — | @@ -1,1093 +0,0 @@ |
2 | | - |
3 | | - |
4 | | -""" |
5 | | - |
6 | | -Pulls data from storage3.faulkner and generates plots. |
7 | | - |
8 | | - |
9 | | -""" |
10 | | - |
11 | | -__author__ = "Ryan Faulkner" |
12 | | -__revision__ = "$Rev$" |
13 | | -__date__ = "December 16th, 2010" |
14 | | - |
15 | | - |
16 | | -import sys |
17 | | -sys.path.append('../') |
18 | | - |
19 | | -import matplotlib |
20 | | -import datetime |
21 | | -import MySQLdb |
22 | | -import pylab |
23 | | -import HTML |
24 | | -import math |
25 | | - |
26 | | -import query_store as qs |
27 | | -import miner_help as mh |
28 | | -import TimestampProcessor as TP |
29 | | -import DataLoader as DL |
30 | | - |
31 | | -matplotlib.use('Agg') |
32 | | - |
33 | | - |
34 | | - |
35 | | -""" |
36 | | - |
37 | | - CLASS :: FundraiserReporting |
38 | | - |
39 | | - Base class for reporting fundraiser analytics. Methods that are intended to be extended in derived classes include: |
40 | | - |
41 | | - METHODS: |
42 | | - |
43 | | - run_query - format and execute the query to obtain data |
44 | | - gen_plot - plots the results of the report |
45 | | - write_to_html_table - writes the results to an HTML table |
46 | | - run |
47 | | - |
48 | | -""" |
49 | | -class FundraiserReporting(TP.TimestampProcesser, DL.DataLoader): |
50 | | - |
51 | | - _sql_path_ = '../sql/' # Relative path for SQL files to be processed |
52 | | - |
53 | | - """ |
54 | | - |
55 | | - Smooths a list of values |
56 | | - |
57 | | - INPUT: |
58 | | - values - a list of datetime objects |
59 | | - window_length - indicate whether the list counts back from the end |
60 | | - |
61 | | - RETURN: |
62 | | - new_values - list of smoothed values |
63 | | - |
64 | | - """ |
65 | | - def smooth(self, values, window_length): |
66 | | - |
67 | | - window_length = int(math.floor(window_length / 2)) |
68 | | - |
69 | | - if window_length < 1: |
70 | | - return values |
71 | | - |
72 | | - list_len = len(values) |
73 | | - new_values = list() |
74 | | - |
75 | | - for i in range(list_len): |
76 | | - index_left = max([0, i - window_length]) |
77 | | - index_right = min([list_len - 1, i + window_length]) |
78 | | - |
79 | | - width = index_right - index_left + 1 |
80 | | - |
81 | | - new_val = sum(values[index_left : (index_right + 1)]) / width |
82 | | - new_values.append(new_val) |
83 | | - |
84 | | - return new_values |
85 | | - |
86 | | - """ |
87 | | - |
88 | | - workaround for issue with tuple objects in HTML.py |
89 | | - MySQLdb returns unfamiliar tuple elements from its fetchall() method |
90 | | - this is probably a version problem since the issue popped up in 2.5 but not 2.6 |
91 | | - |
92 | | - INPUT: |
93 | | - row - row object returned from MySQLdb.fetchall() |
94 | | - |
95 | | - RETURN: |
96 | | - l - a list of tuple objects from the db |
97 | | - |
98 | | - """ |
99 | | - def listify(self, row): |
100 | | - l = [] |
101 | | - for i in row: |
102 | | - l.append(i) |
103 | | - return l |
104 | | - |
105 | | - |
106 | | - """ |
107 | | - |
108 | | - To be overloaded by subclasses for specific types of queries |
109 | | - |
110 | | - INPUT: |
111 | | - values - a list of datetime objects |
112 | | - window_length - indicate whether the list counts back from the end |
113 | | - |
114 | | - RETURN: |
115 | | - return_status - integer, 0 indicates un-exceptional execution |
116 | | - |
117 | | - """ |
118 | | - def run_query(self, start_time, end_time, query_name, metric_name): |
119 | | - return 0 |
120 | | - |
121 | | - |
122 | | - """ |
123 | | - |
124 | | - To be overloaded by subclasses for different plotting behaviour |
125 | | - |
126 | | - INPUT: |
127 | | - values - a list of datetime objects |
128 | | - window_length - indicate whether the list counts back from the end |
129 | | - |
130 | | - RETURN: |
131 | | - return_status - integer, 0 indicates un-exceptional execution |
132 | | - |
133 | | - """ |
134 | | - def gen_plot(self,x, y_lists, labels, title, xlabel, ylabel, subplot_index, fname): |
135 | | - return 0 |
136 | | - |
137 | | - """ |
138 | | - |
139 | | - To be overloaded by subclasses for writing tables - this functionality currently exists outside of this class structure (test_reporting.py) |
140 | | - |
141 | | - INPUT: |
142 | | - values - a list of datetime objects |
143 | | - window_length - indicate whether the list counts back from the end |
144 | | - |
145 | | - RETURN: |
146 | | - return_status - integer, 0 indicates un-exceptional execution |
147 | | - |
148 | | - """ |
149 | | - def write_to_html_table(self): |
150 | | - |
151 | | - """ |
152 | | - FROM TEST REPORTING |
153 | | - |
154 | | - query_obj = qs.query_store() |
155 | | - |
156 | | - # Populate the campaigns table |
157 | | - s1 = 'drop table if exists campaigns;' |
158 | | - s2 = 'create table campaigns as (select utm_campaign from drupal.contribution_tracking where ts > \'%s\' group by utm_campaign having count(*) > 100);' % (start_time) |
159 | | - cur.execute(s1) |
160 | | - cur.execute(s2) |
161 | | - |
162 | | - table_data = [] |
163 | | - sql_stmnt = mh.read_sql(sql_path + query_type + '.sql'); |
164 | | - |
165 | | - # open the html file for writing |
166 | | - f = open(html_path + query_type + '.html', 'w') |
167 | | - |
168 | | - format_start_time = start_time[0:4] + '-' + start_time[4:6] + '-' + start_time[6:8] + '-' + start_time[8:10] + 'HRs' |
169 | | - |
170 | | - # Formats the statement according to query type |
171 | | - select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time]) |
172 | | - |
173 | | - # html formatting |
174 | | - if query_type == 'report_campaign_ecomm': |
175 | | - f.write('<br>Donation data since ' + format_start_time + ' ... <br><br>') |
176 | | - select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time]) |
177 | | - |
178 | | - elif query_type == 'report_campaign_logs': |
179 | | - f.write('<br>Impression and landing page data since ' + format_start_time+ ' ... <br><br>') |
180 | | - select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time]) |
181 | | - |
182 | | - elif query_type == 'report_campaign_ecomm_by_hr': |
183 | | - f.write('<br>Donation data by hour since ' + format_start_time + ' ... <br><br>') |
184 | | - select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time]) |
185 | | - |
186 | | - elif query_type == 'report_campaign_logs_by_hr': |
187 | | - f.write('<br>Impression and landing page by hour since ' + format_start_time + ' ... <br><br>') |
188 | | - select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time]) |
189 | | - |
190 | | - else: |
191 | | - select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time]) |
192 | | - |
193 | | - try: |
194 | | - err_msg = select_stmnt |
195 | | - cur.execute(select_stmnt) |
196 | | - |
197 | | - results = cur.fetchall() |
198 | | - |
199 | | - for row in results: |
200 | | - cpRow = listify(row) |
201 | | - # t.rows.append(row) |
202 | | - table_data.append(cpRow) |
203 | | - |
204 | | - except: |
205 | | - db.rollback() |
206 | | - sys.exit("Database Interface Exception:\n" + err_msg) |
207 | | - |
208 | | - |
209 | | - t = HTML.table(table_data, header_row=header) |
210 | | - htmlcode = str(t) |
211 | | - |
212 | | - f.write(htmlcode) |
213 | | - f.close() |
214 | | - |
215 | | - return htmlcode |
216 | | - |
217 | | - """ |
218 | | - return 0 |
219 | | - |
220 | | - |
221 | | - |
222 | | - """ |
223 | | - |
224 | | - The access point of FundraiserReporting and derived objects. Will be used for executing and orchestrating the creation of plots, tables etc. |
225 | | - To be overloaded by subclasses |
226 | | - |
227 | | - INPUT: |
228 | | - |
229 | | - RETURN: |
230 | | - return_status - integer, 0 indicates un-exceptional execution |
231 | | - |
232 | | - """ |
233 | | - def run(self): |
234 | | - return |
235 | | - |
236 | | - |
237 | | - |
238 | | -""" |
239 | | - |
240 | | -CLASS :: ^TotalAmountsReporting^ |
241 | | - |
242 | | -This subclass handles reporting on total amounts for the fundraiser. |
243 | | - |
244 | | -""" |
245 | | - |
246 | | -class TotalAmountsReporting(FundraiserReporting): |
247 | | - |
248 | | - def __init__(self): |
249 | | - self.data = [] |
250 | | - |
251 | | - def run_query(self, start_time, end_time, query_name, descriptor): |
252 | | - |
253 | | - self.init_db() |
254 | | - |
255 | | - query_obj = qs.QueryStore() |
256 | | - |
257 | | - # Load the SQL File & Format |
258 | | - filename = self._sql_path_ + query_name + '.sql' |
259 | | - sql_stmnt = mh.read_sql(filename) |
260 | | - sql_stmnt = query_obj.format_query(query_name + descriptor, sql_stmnt, [start_time, end_time]) |
261 | | - |
262 | | - labels = [None] * 21 |
263 | | - labels[0] = 'clicks' |
264 | | - labels[1] = 'donations' |
265 | | - labels[2] = 'total amount' |
266 | | - labels[3] = 'banner amount' |
267 | | - labels[4] = 'US amount' |
268 | | - labels[5] = 'EN amount' |
269 | | - labels[6] = 'Other Amount' |
270 | | - labels[7] = 'Email Amount' |
271 | | - labels[8] = 'Recurring Guess' |
272 | | - labels[9] = 'completion_rate' |
273 | | - labels[10] = 'pp_clicks' |
274 | | - labels[11] = 'pp_donations' |
275 | | - labels[12] = 'pp_completion' |
276 | | - labels[13] = 'pp_amount' |
277 | | - labels[14] = 'pp_max_amount' |
278 | | - labels[15] = 'cc_clicks' |
279 | | - labels[16] = 'cc_donations' |
280 | | - labels[17] = 'cc_completion' |
281 | | - labels[18] = 'cc_amount' |
282 | | - labels[19] = 'cc_max_amount' |
283 | | - labels[20] = 'total_amt50' |
284 | | - |
285 | | - |
286 | | - num_keys = len(labels) |
287 | | - |
288 | | - lists = list() |
289 | | - for i in range(num_keys): |
290 | | - lists.append(list()) |
291 | | - |
292 | | - # Composes the data for each banner |
293 | | - try: |
294 | | - err_msg = sql_stmnt |
295 | | - self.cur.execute(sql_stmnt) |
296 | | - |
297 | | - # This query store records according to dates |
298 | | - results = self.cur.fetchall() |
299 | | - for row in results: |
300 | | - for i in range(num_keys): |
301 | | - lists[i].append(row[i+1]) |
302 | | - |
303 | | - except: |
304 | | - self.db.rollback() |
305 | | - sys.exit("Database Interface Exception:\n" + err_msg) |
306 | | - |
307 | | - self.close_db() |
308 | | - |
309 | | - # Only interested in amounts |
310 | | - return [labels, lists] |
311 | | - |
312 | | - |
313 | | - |
314 | | - def gen_plot(self,x, y_lists, labels, title, xlabel, ylabel, ranges, subplot_index, fname): |
315 | | - pylab.subplot(subplot_index) |
316 | | - num_keys = len(y_lists) |
317 | | - |
318 | | - pylab.figure(num=None,figsize=[26,14]) |
319 | | - line_types = ['b-o','g-o','r-o','c-o','m-o','k-o','b--o','g--o','r--o','c--o','m--o','k--o'] |
320 | | - |
321 | | - for i in range(num_keys): |
322 | | - pylab.plot(x, y_lists[i], line_types[i]) |
323 | | - |
324 | | - pylab.grid() |
325 | | - pylab.xlim(ranges[0], ranges[1]) |
326 | | - pylab.legend(labels,loc=2) |
327 | | - |
328 | | - pylab.xlabel(xlabel) |
329 | | - pylab.ylabel(ylabel) |
330 | | - |
331 | | - pylab.title(title) |
332 | | - pylab.savefig(fname+'.png', format='png') |
333 | | - |
334 | | - |
335 | | - |
336 | | - def run_hr(self, type): |
337 | | - |
338 | | - |
339 | | - # Current date & time |
340 | | - now = datetime.datetime.now() |
341 | | - #UTC = 8 |
342 | | - #delta = datetime.timedelta(hours=UTC) |
343 | | - #now = now + delta |
344 | | - |
345 | | - |
346 | | - """ ESTABLISH THE START TIME TO PULL ANALYTICS - TS format=1, TS resolution=1 """ |
347 | | - hours_back = 24 |
348 | | - times = self.gen_date_strings(now, hours_back,1,1) |
349 | | - |
350 | | - start_time = times[0] |
351 | | - end_time = times[1] |
352 | | - |
353 | | - print '\nGenerating analytics total amount for ' + str(hours_back) + ' hours back. The start and end times are: ' + start_time + ' - ' + end_time +' ... \n' |
354 | | - |
355 | | - # QUERY NAME |
356 | | - query_name = 'report_total_amounts' |
357 | | - |
358 | | - |
359 | | - # RUN BY HOUR |
360 | | - descriptor = '_by_hr' |
361 | | - return_val = self.run_query(start_time, end_time, query_name, descriptor) |
362 | | - |
363 | | - labels = return_val[0] # curve labels |
364 | | - counts = return_val[1] # curve data - lists |
365 | | - |
366 | | - r = self.get_query_fields(labels, counts, type, start_time, end_time) |
367 | | - labels = r[0] |
368 | | - counts = r[1] |
369 | | - title = r[2] |
370 | | - ylabel = r[3] |
371 | | - |
372 | | - xlabel = 'Time - Hours' |
373 | | - subplot_index = 111 |
374 | | - |
375 | | - # plot the curves |
376 | | - time_range = range(len(counts[0])) |
377 | | - for i in range(len(counts[0])): |
378 | | - time_range[i] = time_range[i] - len(counts[0]) |
379 | | - |
380 | | - ranges = [min(time_range), max(time_range)] |
381 | | - |
382 | | - fname = query_name + descriptor + '_' + type |
383 | | - self.gen_plot(time_range, counts, labels, title, xlabel, ylabel, ranges, subplot_index, fname) |
384 | | - |
385 | | - |
386 | | - |
387 | | - def run_day(self,type): |
388 | | - |
389 | | - # Current date & time |
390 | | - now = datetime.datetime.now() |
391 | | - #UTC = 8 |
392 | | - #delta = datetime.timedelta(hours=UTC) |
393 | | - #now = now + delta |
394 | | - |
395 | | - |
396 | | - """ ESTABLISH THE START TIME TO PULL ANALYTICS - TS format=1, TS resolution=0 """ |
397 | | - hours_back = 7 * 24 # 7 days back |
398 | | - times = self.gen_date_strings(now, hours_back,1,0) |
399 | | - |
400 | | - start_time = times[0] |
401 | | - end_time = times[1] |
402 | | - |
403 | | - print '\nGenerating analytics total amount for ' + str(days_back) + ' days back. The start and end times are: ' + start_time + ' - ' + end_time +' ... \n' |
404 | | - |
405 | | - |
406 | | - # FORMAT HEADERS & QUERY NAME |
407 | | - query_name = 'report_total_amounts' |
408 | | - descriptor = '_by_day' |
409 | | - return_val = self.run_query(start_time, end_time, query_name, descriptor) |
410 | | - |
411 | | - labels = return_val[0] |
412 | | - counts = return_val[1] |
413 | | - |
414 | | - r = self.get_query_fields(labels, counts, type, start_time, end_time) |
415 | | - labels = r[0] |
416 | | - counts = r[1] |
417 | | - title = r[2] |
418 | | - ylabel = r[3] |
419 | | - |
420 | | - xlabel = 'Time - Days' |
421 | | - subplot_index = 111 |
422 | | - |
423 | | - # Plot values |
424 | | - time_range = range(len(counts[0])) |
425 | | - for i in range(len(counts[0])): |
426 | | - time_range[i] = time_range[i] - len(counts[0]) |
427 | | - |
428 | | - ranges = [min(time_range), max(time_range)] |
429 | | - |
430 | | - fname = query_name + descriptor + '_' + type |
431 | | - self.gen_plot(time_range, counts, labels, title, xlabel, ylabel, ranges, subplot_index, fname) |
432 | | - |
433 | | - |
434 | | - def get_query_fields(self, labels, counts, type, start_time, end_time): |
435 | | - |
436 | | - if type == 'BAN_EM': |
437 | | - indices = range(2,9) |
438 | | - title = 'Total Amounts: ' + start_time + ' -- ' + end_time |
439 | | - ylabel = 'Amount' |
440 | | - elif type == 'CC_PP_completion': |
441 | | - indices = [12,17] |
442 | | - title = 'Credit Card & Paypal Completion Rates: ' + start_time + ' -- ' + end_time |
443 | | - ylabel = 'Rate' |
444 | | - elif type == 'CC_PP_amount': |
445 | | - indices = [13,18] |
446 | | - title = 'Credit Card & Paypal Total Amounts: ' + start_time + ' -- ' + end_time |
447 | | - ylabel = 'Amount' |
448 | | - elif type == 'AMT_VS_AMT50': |
449 | | - indices = [2,20] |
450 | | - title = 'Amount50 and Amount Totals: ' + start_time + ' -- ' + end_time |
451 | | - ylabel = 'Amount' |
452 | | - else: |
453 | | - sys.exit("Total Amounts: You must enter a valid report type.\n" ) |
454 | | - |
455 | | - # Exract relevant labels and values |
456 | | - labels_temp = list() |
457 | | - counts_temp = list() |
458 | | - |
459 | | - for i in range(len(labels)): |
460 | | - if i in indices: |
461 | | - labels_temp.append(labels[i]) |
462 | | - counts_temp.append(counts[i]) |
463 | | - |
464 | | - return [labels_temp, counts_temp, title, ylabel] |
465 | | - |
466 | | - |
467 | | -""" |
468 | | - |
469 | | -CLASS :: ^BannerLPReporting^ |
470 | | - |
471 | | -This subclass handles reporting on banners and landing pages for the fundraiser. |
472 | | - |
473 | | -""" |
474 | | - |
475 | | -class BannerLPReporting(FundraiserReporting): |
476 | | - |
477 | | - |
478 | | - def __init__(self, *args): |
479 | | - |
480 | | - if len(args) == 2: |
481 | | - self.campaign = args[0] |
482 | | - self.start_time = args[1] |
483 | | - else: |
484 | | - self.campaign = None |
485 | | - self.start_time = None |
486 | | - |
487 | | - def run_query(self,start_time, end_time, campaign, query_name, metric_name): |
488 | | - |
489 | | - self.init_db() |
490 | | - |
491 | | - query_obj = qs.QueryStore() |
492 | | - |
493 | | - metric_lists = mh.AutoVivification() |
494 | | - time_lists = mh.AutoVivification() |
495 | | - # table_data = [] # store the results in a table for reporting |
496 | | - |
497 | | - # Load the SQL File & Format |
498 | | - filename = self._sql_path_ + query_name + '.sql' |
499 | | - sql_stmnt = mh.read_sql(filename) |
500 | | - |
501 | | - query_name = 'report_bannerLP_metrics' # rename query to work with query store |
502 | | - sql_stmnt = query_obj.format_query(query_name, sql_stmnt, [start_time, end_time, campaign]) |
503 | | - |
504 | | - key_index = query_obj.get_banner_index(query_name) |
505 | | - time_index = query_obj.get_time_index(query_name) |
506 | | - metric_index = query_obj.get_metric_index(query_name, metric_name) |
507 | | - |
508 | | - # Composes the data for each banner |
509 | | - try: |
510 | | - err_msg = sql_stmnt |
511 | | - self.cur.execute(sql_stmnt) |
512 | | - |
513 | | - results = self.cur.fetchall() |
514 | | - |
515 | | - # Compile Table Data |
516 | | - # cpRow = self.listify(row) |
517 | | - # table_data.append(cpRow) |
518 | | - |
519 | | - for row in results: |
520 | | - |
521 | | - key_name = row[key_index] |
522 | | - |
523 | | - try: |
524 | | - metric_lists[key_name].append(row[metric_index]) |
525 | | - time_lists[key_name].append(row[time_index]) |
526 | | - except: |
527 | | - metric_lists[key_name] = list() |
528 | | - time_lists[key_name] = list() |
529 | | - |
530 | | - metric_lists[key_name].append(row[metric_index]) |
531 | | - time_lists[key_name].append(row[time_index]) |
532 | | - |
533 | | - except: |
534 | | - self.db.rollback() |
535 | | - sys.exit("Database Interface Exception:\n" + err_msg) |
536 | | - |
537 | | - """ Convert Times to Integers """ |
538 | | - # Find the earliest date |
539 | | - max_i = 0 |
540 | | - |
541 | | - for key in time_lists.keys(): |
542 | | - for date_str in time_lists[key]: |
543 | | - day_int = int(date_str[8:10]) |
544 | | - hr_int = int(date_str[11:13]) |
545 | | - date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13]) |
546 | | - if date_int > max_i: |
547 | | - max_i = date_int |
548 | | - max_day = day_int |
549 | | - max_hr = hr_int |
550 | | - |
551 | | - |
552 | | - # Normalize dates |
553 | | - time_norm = mh.AutoVivification() |
554 | | - for key in time_lists.keys(): |
555 | | - for date_str in time_lists[key]: |
556 | | - day = int(date_str[8:10]) |
557 | | - hr = int(date_str[11:13]) |
558 | | - # date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13]) |
559 | | - elem = (day - max_day) * 24 + (hr - max_hr) |
560 | | - try: |
561 | | - time_norm[key].append(elem) |
562 | | - except: |
563 | | - time_norm[key] = list() |
564 | | - time_norm[key].append(elem) |
565 | | - |
566 | | - # smooth out the values |
567 | | - #window_length = 20 |
568 | | - #for banner in metric_lists.keys(): |
569 | | - # metric_lists[banner] = smooth(metric_lists[banner], window_length) |
570 | | - |
571 | | - self.close_db() |
572 | | - |
573 | | - # return [metric_lists, time_norm, table_data] |
574 | | - return [metric_lists, time_norm] |
575 | | - |
576 | | - |
577 | | - def gen_plot(self,counts, times, title, xlabel, ylabel, ranges, subplot_index, fname): |
578 | | - pylab.subplot(subplot_index) |
579 | | - pylab.figure(num=None,figsize=[26,14]) |
580 | | - count_keys = counts.keys() |
581 | | - |
582 | | - line_types = ['b-o','g-o','r-o','c-o','m-o','k-o','y-o','b--d','g--d','r--d','c--d','m--d','k--d','y--d','b-.s','g-.s','r-.s','c-.s','m-.s','k-.s','y-.s'] |
583 | | - |
584 | | - count = 0 |
585 | | - for key in counts.keys(): |
586 | | - pylab.plot(times[key], counts[key], line_types[count]) |
587 | | - count = count + 1 |
588 | | - |
589 | | - pylab.grid() |
590 | | - pylab.xlim(ranges[0], ranges[1]) |
591 | | - pylab.legend(count_keys,loc=2) |
592 | | - |
593 | | - pylab.xlabel(xlabel) |
594 | | - pylab.ylabel(ylabel) |
595 | | - |
596 | | - pylab.title(title) |
597 | | - pylab.savefig(fname, format='png') |
598 | | - |
599 | | - |
600 | | - """ |
601 | | - |
602 | | - type = 'LP' || 'BAN' || 'BAN-TEST' || 'LP-TEST' |
603 | | - |
604 | | - """ |
605 | | - def run(self, type, metric_name): |
606 | | - |
607 | | - # Current date & time |
608 | | - now = datetime.datetime.now() |
609 | | - #UTC = 8 |
610 | | - #delta = datetime.timedelta(hours=UTC) |
611 | | - #now = now + delta |
612 | | - |
613 | | - """ ESTABLISH THE START TIME TO PULL ANALYTICS - TS format=1, TS resolution=1 """ |
614 | | - hours_back = 24 |
615 | | - times = self.gen_date_strings(now, hours_back,1,1) |
616 | | - |
617 | | - start_time = times[0] |
618 | | - end_time = times[1] |
619 | | - |
620 | | - print '\nGenerating ' + type +' for ' + str(hours_back) + ' hours back. The start and end times are: ' + start_time + ' - ' + end_time +' ... \n' |
621 | | - |
622 | | - if type == 'LP': |
623 | | - query_name = 'report_LP_metrics' |
624 | | - |
625 | | - # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign |
626 | | - if self.campaign == None: |
627 | | - campaign = '[0-9](JA|SA|EA|TY)[0-9]' |
628 | | - else: |
629 | | - campaign = self.campaign |
630 | | - |
631 | | - title = metric_name + ': ' + start_time + ' -- ' + end_time |
632 | | - fname = query_name + '_' + metric_name + '.png' |
633 | | - elif type == 'BAN': |
634 | | - query_name = 'report_banner_metrics' |
635 | | - |
636 | | - # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign |
637 | | - if self.campaign == None: |
638 | | - campaign = '[0-9](JA|SA|EA|TY)[0-9]' |
639 | | - else: |
640 | | - campaign = self.campaign |
641 | | - |
642 | | - title = metric_name + ': ' + start_time + ' -- ' + end_time |
643 | | - fname = query_name + '_' + metric_name + '.png' |
644 | | - elif type == 'BAN-TEST': |
645 | | - r = self.get_latest_campaign() |
646 | | - query_name = 'report_banner_metrics' |
647 | | - |
648 | | - # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign |
649 | | - if self.campaign == None: |
650 | | - campaign = r[0] |
651 | | - start_time = r[1] |
652 | | - else: |
653 | | - campaign = self.campaign |
654 | | - start_time = self.start_time |
655 | | - |
656 | | - title = metric_name + ': ' + start_time + ' -- ' + end_time + ', CAMPAIGN =' + campaign |
657 | | - fname = query_name + '_' + metric_name + '_latest' + '.png' |
658 | | - elif type == 'LP-TEST': |
659 | | - r = self.get_latest_campaign() |
660 | | - query_name = 'report_LP_metrics' |
661 | | - |
662 | | - # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign |
663 | | - if self.campaign == None: |
664 | | - campaign = r[0] |
665 | | - start_time = r[1] |
666 | | - else: |
667 | | - campaign = self.campaign |
668 | | - start_time = self.start_time |
669 | | - |
670 | | - title = metric_name + ': ' + start_time + ' -- ' + end_time + ', CAMPAIGN =' + campaign |
671 | | - fname = query_name + '_' + metric_name + '_latest' + '.png' |
672 | | - else: |
673 | | - sys.exit("Invalid type name - must be 'LP' or 'BAN'.") |
674 | | - |
675 | | - return_val = self.run_query(start_time, end_time, campaign, query_name, metric_name) |
676 | | - metrics = return_val[0] |
677 | | - times = return_val[1] |
678 | | - |
679 | | - # title = metric_name + ': ' + start_time + ' -- ' + end_time |
680 | | - xlabel = 'Time - Hours' |
681 | | - ylabel = metric_name |
682 | | - subplot_index = 111 |
683 | | - |
684 | | - min_time = 99 |
685 | | - for key in times.keys(): |
686 | | - min_elem = min(times[key]) |
687 | | - if min_elem < min_time: |
688 | | - min_time = min_elem |
689 | | - |
690 | | - ranges = [min_time, 0] |
691 | | - |
692 | | - self.gen_plot(metrics, times, title, xlabel, ylabel, ranges, subplot_index, fname) |
693 | | - |
694 | | - return [metrics, times] |
695 | | - |
696 | | - |
697 | | - def get_latest_campaign(self): |
698 | | - |
699 | | - query_name = 'report_latest_campaign' |
700 | | - self.init_db() |
701 | | - |
702 | | - """ Look at campaigns over the past 24 hours - TS format=1, TS resolution=1 """ |
703 | | - now = datetime.datetime.now() |
704 | | - hours_back = 72 |
705 | | - times = self.gen_date_strings(now, hours_back,1,1) |
706 | | - |
707 | | - query_obj = qs.QueryStore() |
708 | | - sql_stmnt = mh.read_sql('./sql/report_latest_campaign.sql') |
709 | | - sql_stmnt = query_obj.format_query(query_name, sql_stmnt, [times[0]]) |
710 | | - |
711 | | - campaign_index = query_obj.get_campaign_index(query_name) |
712 | | - time_index = query_obj.get_time_index(query_name) |
713 | | - |
714 | | - try: |
715 | | - err_msg = sql_stmnt |
716 | | - self.cur.execute(sql_stmnt) |
717 | | - |
718 | | - row = self.cur.fetchone() |
719 | | - except: |
720 | | - self.db.rollback() |
721 | | - sys.exit("Database Interface Exception:\n" + err_msg) |
722 | | - |
723 | | - campaign = row[campaign_index] |
724 | | - timestamp = row[time_index] |
725 | | - |
726 | | - self.close_db() |
727 | | - |
728 | | - return [campaign, timestamp] |
729 | | - |
730 | | - """ |
731 | | - |
732 | | - Takes as input and converts it to a set of hours counting back from 0 |
733 | | - |
734 | | - time_lists - a dictionary of timestamp lists |
735 | | - time_norm - a dictionary of normalized times |
736 | | - |
737 | | - """ |
738 | | - def normalize_timestamps(self, time_lists): |
739 | | - # Find the earliest date |
740 | | - max_i = 0 |
741 | | - |
742 | | - for key in time_lists.keys(): |
743 | | - for date_str in time_lists[key]: |
744 | | - day_int = int(date_str[8:10]) |
745 | | - hr_int = int(date_str[11:13]) |
746 | | - date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13]) |
747 | | - if date_int > max_i: |
748 | | - max_i = date_int |
749 | | - max_day = day_int |
750 | | - max_hr = hr_int |
751 | | - |
752 | | - |
753 | | - # Normalize dates |
754 | | - time_norm = mh.AutoVivification() |
755 | | - for key in time_lists.keys(): |
756 | | - for date_str in time_lists[key]: |
757 | | - day = int(date_str[8:10]) |
758 | | - hr = int(date_str[11:13]) |
759 | | - # date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13]) |
760 | | - elem = (day - max_day) * 24 + (hr - max_hr) |
761 | | - try: |
762 | | - time_norm[key].append(elem) |
763 | | - except: |
764 | | - time_norm[key] = list() |
765 | | - time_norm[key].append(elem) |
766 | | - |
767 | | - return time_norm |
768 | | - |
769 | | -""" |
770 | | - |
771 | | -CLASS :: ^MinerReporting^ |
772 | | - |
773 | | -This subclass handles reporting on raw values imported into the database. |
774 | | - |
775 | | -""" |
776 | | - |
777 | | -class MinerReporting(FundraiserReporting): |
778 | | - |
779 | | - def run_query(self, start_time, end_time, query_name): |
780 | | - |
781 | | - self.init_db() |
782 | | - |
783 | | - query_obj = qs.QueryStore() |
784 | | - |
785 | | - counts = list() |
786 | | - times = list() |
787 | | - |
788 | | - # Load the SQL File & Format |
789 | | - filename = self._sql_path_+ query_name + '.sql' |
790 | | - sql_stmnt = mh.read_sql(filename) |
791 | | - |
792 | | - sql_stmnt = query_obj.format_query(query_name, sql_stmnt, [start_time, end_time]) |
793 | | - #print sql_stmnt |
794 | | - |
795 | | - # Get Indexes into Query |
796 | | - count_index = query_obj.get_count_index(query_name) |
797 | | - time_index = query_obj.get_time_index(query_name) |
798 | | - |
799 | | - # Composes the data for each banner |
800 | | - try: |
801 | | - err_msg = sql_stmnt |
802 | | - self.cur.execute(sql_stmnt) |
803 | | - |
804 | | - results = self.cur.fetchall() |
805 | | - |
806 | | - for row in results: |
807 | | - counts.append(row[count_index]) |
808 | | - times.append(row[time_index]) |
809 | | - |
810 | | - except: |
811 | | - self.db.rollback() |
812 | | - sys.exit("Database Interface Exception:\n" + err_msg) |
813 | | - |
814 | | - """ Convert Times to Integers """ |
815 | | - time_norm = self.normalize_timestamps(times) |
816 | | - |
817 | | - |
818 | | - self.close_db() |
819 | | - |
820 | | - return [counts, time_norm] |
821 | | - |
822 | | - |
823 | | - # Create histograms for hourly counts |
824 | | - |
825 | | - def gen_plot(self,counts, times, title, xlabel, ylabel, ranges, subplot_index, fname): |
826 | | - |
827 | | - pylab.subplot(subplot_index) |
828 | | - pylab.figure(num=None,figsize=[26,14]) |
829 | | - |
830 | | - # pylab.plot(times, counts) |
831 | | - # pylab.hist(counts, times) |
832 | | - pylab.bar(times, counts, width=0.5) |
833 | | - |
834 | | - pylab.grid() |
835 | | - pylab.xlim(ranges[0], ranges[1]) |
836 | | - |
837 | | - pylab.xlabel(xlabel) |
838 | | - pylab.ylabel(ylabel) |
839 | | - |
840 | | - pylab.title(title) |
841 | | - pylab.savefig(fname, format='png') |
842 | | - |
843 | | - def run(self, query_name): |
844 | | - |
845 | | - query_obj = qs.QueryStore() |
846 | | - |
847 | | - # Current date & time |
848 | | - now = datetime.datetime.now() |
849 | | - #UTC = 8 |
850 | | - #delta = datetime.timedelta(hours=UTC) |
851 | | - #now = now + delta |
852 | | - |
853 | | - """ ESTABLISH THE START TIME TO PULL ANALYTICS - TS format=1, TS resolution=1 """ |
854 | | - hours_back = 24 |
855 | | - times = self.gen_date_strings_hr(now, hours_back,1,1) |
856 | | - |
857 | | - start_time = times[0] |
858 | | - end_time = times[1] |
859 | | - |
860 | | - print '\nGenerating ' + query_name +', start and end times are: ' + start_time + ' - ' + end_time +' ... \n' |
861 | | - |
862 | | - # Run Query |
863 | | - return_val = self.run_query(start_time, end_time, query_name) |
864 | | - counts = return_val[0] |
865 | | - times = return_val[1] |
866 | | - |
867 | | - # Normalize times |
868 | | - min_time = min(times) |
869 | | - ranges = [min_time, 0] |
870 | | - |
871 | | - xlabel = 'Hours' |
872 | | - subplot_index = 111 |
873 | | - fname = query_name + '.png' |
874 | | - |
875 | | - title = query_obj.get_plot_title(query_name) |
876 | | - title = title + ' -- ' + start_time + ' - ' + end_time |
877 | | - ylabel = query_obj.get_plot_ylabel(query_name) |
878 | | - |
879 | | - # Convert counts to float (from Decimal) to prevent exception when bar plotting |
880 | | - # Bbox::update_numerix_xy expected numerix array |
881 | | - counts_new = list() |
882 | | - for i in range(len(counts)): |
883 | | - counts_new.append(float(counts[i])) |
884 | | - counts = counts_new |
885 | | - |
886 | | - # Generate Histogram |
887 | | - self.gen_plot(counts, times, title, xlabel, ylabel, ranges, subplot_index, fname) |
888 | | - |
889 | | - |
890 | | -""" |
891 | | - |
892 | | -CLASS :: IntervalReporting |
893 | | - |
894 | | -Performs queries that take timestamps, query, and an interval as arguments. Data for a single metric |
895 | | -is generated for each time interval in the time period defined by the start and end timestamps. |
896 | | - |
897 | | -Types of queries supported: |
898 | | - |
899 | | -report_banner_metrics_minutely |
900 | | -report_LP_metrics_minutely |
901 | | - |
902 | | -""" |
903 | | - |
904 | | -class IntervalReporting(FundraiserReporting): |
905 | | - |
906 | | - def run_query(self, start_time, end_time, interval, query_name, metric_name, campaign): |
907 | | - |
908 | | - self.init_db() |
909 | | - |
910 | | - query_obj = qs.QueryStore() |
911 | | - |
912 | | - metrics = mh.AutoVivification() |
913 | | - times = mh.AutoVivification() |
914 | | - times_norm = mh.AutoVivification() |
915 | | - |
916 | | - """ Compose datetime objects to represent the first and last intervals """ |
917 | | - start_time_obj = self.timestamp_to_obj(start_time, 1) |
918 | | - start_time_obj = start_time_obj.replace(minute=int(math.floor(start_time_obj.minute / interval) * interval)) |
919 | | - start_time_obj_str = self.timestamp_from_obj(start_time_obj, 1, 3) |
920 | | - |
921 | | - end_time_obj = self.timestamp_to_obj(end_time, 1) |
922 | | - # end_time_obj = end_time_obj + datetime.timedelta(seconds=-1) |
923 | | - end_time_obj = end_time_obj.replace(minute=int(math.floor(end_time_obj.minute / interval) * interval)) |
924 | | - end_time_obj_str = self.timestamp_from_obj(end_time_obj, 1, 3) |
925 | | - |
926 | | - """ The start time for the impression portion of the query should be one second less""" |
927 | | - |
928 | | - imp_start_time_obj = start_time_obj + datetime.timedelta(seconds=-1) |
929 | | - imp_start_time_obj_str = self.timestamp_from_obj(imp_start_time_obj, 1, 3) |
930 | | - |
931 | | - """ Load the SQL File & Format """ |
932 | | - filename = self._sql_path_+ query_name + '.sql' |
933 | | - sql_stmnt = mh.read_sql(filename) |
934 | | - |
935 | | - sql_stmnt = query_obj.format_query(query_name, sql_stmnt, [start_time, end_time, campaign, interval, imp_start_time_obj_str]) |
936 | | - # print sql_stmnt |
937 | | - |
938 | | - """ Get Indexes into Query """ |
939 | | - key_index = query_obj.get_banner_index(query_name) |
940 | | - metric_index = query_obj.get_metric_index(query_name, metric_name) |
941 | | - time_index = query_obj.get_time_index(query_name) |
942 | | - |
943 | | - """ Compose the data for each separate donor pipeline artifact """ |
944 | | - try: |
945 | | - err_msg = sql_stmnt |
946 | | - self.cur.execute(sql_stmnt) |
947 | | - |
948 | | - results = self.cur.fetchall() |
949 | | - final_time = dict() # stores the last timestamp seen |
950 | | - interval_obj = datetime.timedelta(minutes=interval) # timedelta object used to shift times by _interval_ minutes |
951 | | - |
952 | | - for row in results: |
953 | | - |
954 | | - key_name = row[key_index] |
955 | | - time_obj = self.timestamp_to_obj(row[time_index], 1) # format = 1, 14-digit TS |
956 | | - |
957 | | - """ For each new dictionary index by key name start a new list if its not already there """ |
958 | | - try: |
959 | | - metrics[key_name].append(row[metric_index]) |
960 | | - times[key_name].append(time_obj + interval_obj) |
961 | | - final_time[key_name] = row[time_index] |
962 | | - except: |
963 | | - metrics[key_name] = list() |
964 | | - times[key_name] = list() |
965 | | - |
966 | | - """ If the first element is not the start time add it |
967 | | - this will be the case if there is no data for the first interval |
968 | | - NOTE: two datapoints are added at the beginning to define the first interval """ |
969 | | - if start_time_obj_str != row[time_index]: |
970 | | - times[key_name].append(start_time_obj) |
971 | | - metrics[key_name].append(0.0) |
972 | | - |
973 | | - times[key_name].append(start_time_obj + interval_obj) |
974 | | - metrics[key_name].append(0.0) |
975 | | - else: |
976 | | - metrics[key_name].append(row[metric_index]) |
977 | | - times[key_name].append(time_obj) |
978 | | - |
979 | | - metrics[key_name].append(row[metric_index]) |
980 | | - times[key_name].append(time_obj + interval_obj) |
981 | | - |
982 | | - |
983 | | - except: |
984 | | - self.db.rollback() |
985 | | - sys.exit("Database Interface Exception:\n" + err_msg) |
986 | | - |
987 | | - |
988 | | - |
989 | | - """ Ensure that the last time in the list is the endtime less the interval """ |
990 | | - |
991 | | - for key in times.keys(): |
992 | | - if final_time[key_name] != end_time_obj_str: |
993 | | - times[key].append(end_time_obj) |
994 | | - metrics[key].append(0.0) |
995 | | - |
996 | | - self.close_db() |
997 | | - |
998 | | - return [metrics, times] |
999 | | - |
1000 | | - """ |
1001 | | - Execute reporting query and generate plots |
1002 | | - """ |
1003 | | - |
1004 | | - """ |
1005 | | - Execute reporting query and generate plots |
1006 | | - """ |
1007 | | - def gen_plot(self, metrics, times, title, xlabel, ylabel, ranges, subplot_index, fname): |
1008 | | - |
1009 | | - pylab.subplot(subplot_index) |
1010 | | - pylab.figure(num=None,figsize=[26,14]) |
1011 | | - |
1012 | | - line_types = ['b-o','g-o','r-o','c-o','m-o','k-o','y-o','b--d','g--d','r--d','c--d','m--d','k--d','y--d','b-.s','g-.s','r-.s','c-.s','m-.s','k-.s','y-.s'] |
1013 | | - |
1014 | | - count = 0 |
1015 | | - for key in metrics.keys(): |
1016 | | - pylab.step(times[key], metrics[key], line_types[count]) |
1017 | | - count = count + 1 |
1018 | | - |
1019 | | - pylab.grid() |
1020 | | - pylab.xlim(ranges[0], ranges[1]) |
1021 | | - pylab.ylim(ranges[2], ranges[3]) |
1022 | | - pylab.legend(metrics.keys(),loc=2) |
1023 | | - |
1024 | | - pylab.xlabel(xlabel) |
1025 | | - pylab.ylabel(ylabel) |
1026 | | - |
1027 | | - pylab.title(title) |
1028 | | - pylab.savefig(fname, format='png') |
1029 | | - |
1030 | | - |
1031 | | - """ |
1032 | | - Execute reporting query and generate plots |
1033 | | - """ |
1034 | | - def run(self, start_time, end_time, interval, query_name, metric_name, campaign): |
1035 | | - |
1036 | | - query_obj = qs.QueryStore() |
1037 | | - |
1038 | | - print '\nGenerating ' + query_name +', start and end times are: ' + start_time + ' - ' + end_time +' ... \n' |
1039 | | - |
1040 | | - # Run Query |
1041 | | - return_val = self.run_query(start_time, end_time, interval, query_name, metric_name, campaign) |
1042 | | - counts = return_val[0] |
1043 | | - times = return_val[1] |
1044 | | - |
1045 | | - """ Convert Times to Integers that indicate relative times AND normalize the intervals in case any are missing """ |
1046 | | - for key in times.keys(): |
1047 | | - times[key] = self.normalize_timestamps(times[key], False, 2) |
1048 | | - times[key], counts[key] = self.normalize_intervals(times[key], counts[key], interval) |
1049 | | - |
1050 | | - # Normalize times |
1051 | | - min_time = min(times) |
1052 | | - ranges = [min_time, 0] |
1053 | | - |
1054 | | - xlabel = 'MINUTES' |
1055 | | - subplot_index = 111 |
1056 | | - fname = campaign + ' ' + query_name + ' ' + metric_name + '.png' |
1057 | | - |
1058 | | - metric_full_name = query_obj.get_metric_full_name(metric_name) |
1059 | | - title = campaign + ': ' + metric_full_name + ' -- ' + start_time + ' - ' + end_time |
1060 | | - ylabel = metric_full_name |
1061 | | - |
1062 | | - """ Convert counts to float (from Decimal) to prevent exception when bar plotting |
1063 | | - Bbox::update_numerix_xy expected numerix array """ |
1064 | | - for key in counts.keys(): |
1065 | | - counts_new = list() |
1066 | | - for i in range(len(counts[key])): |
1067 | | - counts_new.append(float(counts[key][i])) |
1068 | | - counts[key] = counts_new |
1069 | | - |
1070 | | - """ Determine List maximums """ |
1071 | | - times_max = 0 |
1072 | | - metrics_max = 0 |
1073 | | - |
1074 | | - for key in counts.keys(): |
1075 | | - list_max = max(counts[key]) |
1076 | | - if list_max > metrics_max: |
1077 | | - metrics_max = list_max |
1078 | | - |
1079 | | - for key in times.keys(): |
1080 | | - list_max = max(times[key]) |
1081 | | - if list_max > times_max: |
1082 | | - times_max = list_max |
1083 | | - |
1084 | | - ranges = list() |
1085 | | - ranges.append(0.0) |
1086 | | - ranges.append(times_max * 1.1) |
1087 | | - ranges.append(0.0) |
1088 | | - ranges.append(metrics_max * 1.1) |
1089 | | - |
1090 | | - |
1091 | | - """ Generate plots given data """ |
1092 | | - self.gen_plot(counts, times, title, xlabel, ylabel, ranges, subplot_index, fname) |
1093 | | - |
1094 | | - |
Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/query_store.py |
— | — | @@ -1,437 +0,0 @@ |
2 | | - |
3 | | -""" |
4 | | - |
5 | | -Class that contains and organizes query info. Depends on the contents of ../sql/ where filenames are |
6 | | -coupled with query_name parameters |
7 | | - |
8 | | -""" |
9 | | - |
10 | | -__author__ = "Ryan Faulkner" |
11 | | -__revision__ = "$Rev$" |
12 | | -__date__ = "November 28th, 2010" |
13 | | - |
14 | | - |
15 | | -""" |
16 | | - |
17 | | - CLASS :: QueryStore |
18 | | - |
19 | | - |
20 | | - |
21 | | - METHODS: |
22 | | - |
23 | | - format_query |
24 | | - get_query |
25 | | - get_query_header |
26 | | - get_key_index |
27 | | - get_count_index |
28 | | - get_time_index |
29 | | - get_campaign_index |
30 | | - get_banner_index |
31 | | - get_landing_page_index |
32 | | - get_metric_index |
33 | | - get_plot_title |
34 | | - get_plot_ylabel |
35 | | - get_metric_full_name |
36 | | - |
37 | | -""" |
38 | | -class QueryStore: |
39 | | - |
40 | | - def format_query(self, query_name, sql_stmnt, args): |
41 | | - |
42 | | - if query_name == 'report_campaign_ecomm': |
43 | | - start_time = args[0] |
44 | | - sql_stmnt = sql_stmnt % (start_time) |
45 | | - |
46 | | - elif query_name == 'report_campaign_logs': |
47 | | - start_time = args[0] |
48 | | - sql_stmnt = sql_stmnt % (start_time, start_time, start_time) |
49 | | - |
50 | | - elif query_name == 'report_campaign_ecomm_by_hr': |
51 | | - start_time = args[0] |
52 | | - sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time) |
53 | | - |
54 | | - elif query_name == 'report_campaign_logs_by_hr': |
55 | | - start_time = args[0] |
56 | | - sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time, '%', '%', '%', '%', \ |
57 | | - start_time, '%', '%', '%', '%', start_time, '%') |
58 | | - |
59 | | - elif query_name == 'report_impressions_country': |
60 | | - start_time = args[0] |
61 | | - sql_stmnt = sql_stmnt % ('%', '%', '%', start_time) |
62 | | - |
63 | | - elif query_name == 'report_campaign_logs_by_min': |
64 | | - start_time = args[0] |
65 | | - sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time, '%', '%', '%', '%', \ |
66 | | - start_time, '%', '%', '%', '%', start_time) |
67 | | - |
68 | | - elif query_name == 'report_non_US_clicks': |
69 | | - start_time = args[0] |
70 | | - sql_stmnt = sql_stmnt % ('%', '%', '%', start_time, '%', '%', '%', start_time) |
71 | | - |
72 | | - elif query_name == 'report_contribution_tracking': |
73 | | - start_time = args[0] |
74 | | - sql_stmnt = sql_stmnt % ('%', '%', '%', '%', '%',start_time) |
75 | | - |
76 | | - elif query_name == 'report_total_amounts_by_hr': |
77 | | - start_time = args[0] |
78 | | - end_time = args[1] |
79 | | - sql_stmnt = sql_stmnt % ('%', '%', '%', ' %H', start_time, end_time) |
80 | | - |
81 | | - elif query_name == 'report_total_amounts_by_day': |
82 | | - start_time = args[0] |
83 | | - end_time = args[1] |
84 | | - sql_stmnt = sql_stmnt % ('%', '%', '%', '', start_time, end_time) |
85 | | - |
86 | | - elif query_name == 'report_bannerLP_metrics': |
87 | | - start_time = args[0] |
88 | | - end_time = args[1] |
89 | | - campaign = args[2] |
90 | | - sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time, end_time, '%', '%', '%', '%', start_time, end_time, '%', '%', '%', '%', start_time, end_time, campaign, '%', '%', '%', '%', start_time, end_time, campaign) |
91 | | - |
92 | | - elif query_name == 'report_latest_campaign': |
93 | | - start_time = args[0] |
94 | | - sql_stmnt = sql_stmnt % (start_time) |
95 | | - |
96 | | - elif query_name == 'report_confidence_banner': |
97 | | - start = args[0] |
98 | | - end = args[1] |
99 | | - cmpgn = args[2] |
100 | | - banner = args[3] |
101 | | - sql_stmnt = sql_stmnt % ('%','%','%','%','10','10', start, end, banner, '%','%','%','%','10','10', start, end, cmpgn, banner, \ |
102 | | - '%','%','%','%','10','10', start, end, cmpgn, banner) |
103 | | - |
104 | | - elif query_name == 'report_confidence_lp': |
105 | | - start = args[0] |
106 | | - end = args[1] |
107 | | - cmpgn = args[2] |
108 | | - banner = args[3] |
109 | | - sql_stmnt = sql_stmnt % ('%','%','%','%','10','10', start, end, cmpgn, banner, \ |
110 | | - '%','%','%','%','10','10', start, end, cmpgn, banner) |
111 | | - |
112 | | - elif query_name == 'report_banner_impressions_by_hour': |
113 | | - start = args[0] |
114 | | - end = args[1] |
115 | | - sql_stmnt = sql_stmnt % ('%','%','%','%', start, end) |
116 | | - |
117 | | - elif query_name == 'report_lp_views_by_hour': |
118 | | - start = args[0] |
119 | | - end = args[1] |
120 | | - sql_stmnt = sql_stmnt % ('%','%','%','%', start, end) |
121 | | - |
122 | | - elif query_name == 'report_banner_confidence': |
123 | | - start = args[0] |
124 | | - end = args[1] |
125 | | - banner = args[2] |
126 | | - campaign = args[3] |
127 | | - sql_stmnt = sql_stmnt % (start, end, banner, start, end, campaign, start, end, banner, start, end, campaign, banner) |
128 | | - |
129 | | - elif query_name == 'report_LP_confidence': |
130 | | - start = args[0] |
131 | | - end = args[1] |
132 | | - lp = args[2] |
133 | | - campaign = args[3] |
134 | | - sql_stmnt = sql_stmnt % (start, end, campaign, lp, start, end, campaign, lp) |
135 | | - |
136 | | - elif query_name == 'report_bannerLP_confidence': |
137 | | - start = args[0] |
138 | | - end = args[1] |
139 | | - banner = args[2] |
140 | | - lp = args[3] |
141 | | - campaign = args[4] |
142 | | - sql_stmnt = sql_stmnt % (start, end, banner, start, end, banner, campaign, start, end, banner, lp, campaign, start, end, banner, lp, campaign) |
143 | | - |
144 | | - elif query_name == 'report_ecomm_by_amount': |
145 | | - start_time = args[0] |
146 | | - end_time = args[1] |
147 | | - sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time, end_time, end_time) |
148 | | - |
149 | | - elif query_name == 'report_ecomm_by_contact': |
150 | | - where_str = args[0] |
151 | | - sql_stmnt = sql_stmnt % ('%', '%', '%', '%', where_str) |
152 | | - |
153 | | - elif query_name == 'report_banner_metrics_minutely': |
154 | | - start_time = args[0] |
155 | | - end_time = args[1] |
156 | | - campaign = args[2] |
157 | | - interval = args[3] |
158 | | - imp_start_time = args[4] |
159 | | - |
160 | | - sql_stmnt = sql_stmnt % ('%', '%', '%', '%', interval, interval, imp_start_time, end_time, '%', '%', '%', '%', interval, interval, start_time, end_time, campaign, \ |
161 | | - '%', '%', '%', '%', interval, interval, start_time, end_time, '%', '%', '%', '%', interval, interval, start_time, end_time, campaign) |
162 | | - |
163 | | - elif query_name == 'report_LP_metrics_minutely': |
164 | | - start_time = args[0] |
165 | | - end_time = args[1] |
166 | | - campaign = args[2] |
167 | | - interval = args[3] |
168 | | - |
169 | | - sql_stmnt = sql_stmnt % ('%', '%', '%', '%', interval, interval, start_time, end_time, campaign, '%', '%', '%', '%', interval, interval, start_time, end_time, campaign) |
170 | | - |
171 | | - else: |
172 | | - return 'no such table\n' |
173 | | - |
174 | | - return sql_stmnt |
175 | | - |
176 | | - def get_query(self, query_name): |
177 | | - if query_name == 'report_campaign_logs_by_min': |
178 | | - return '' |
179 | | - elif query_name == '': |
180 | | - return '' |
181 | | - else: |
182 | | - return 'no such table' |
183 | | - |
184 | | - def get_query_header(self, query_name): |
185 | | - if query_name == 'report_contribution_tracking': |
186 | | - return ['Time','Banner','Landing Page','Campaign','Converted Amount', 'Suffix'] |
187 | | - elif query_name == 'report_ecomm_by_amount': |
188 | | - return ['Timestamp','First Name','Last Name','Country','ISO Code', 'Amount', 'First Donation?', 'Date of First'] |
189 | | - elif query_name == 'report_ecomm_by_contact': |
190 | | - return ['Timestamp','First Name','Last Name','Country','ISO Code', 'Amount'] |
191 | | - else: |
192 | | - return 'no such table' |
193 | | - |
194 | | - """ Returns the index of the key for the query data """ |
195 | | - def get_key_index(self, query_name): |
196 | | - if query_name == 'report_banner_metrics_minutely': |
197 | | - return 1 |
198 | | - elif query_name == 'report_LP_metrics_minutely': |
199 | | - return 1 |
200 | | - |
201 | | - def get_count_index(self, query_name): |
202 | | - if query_name == 'report_lp_views_by_hour': |
203 | | - return 1 |
204 | | - elif query_name == 'report_banner_impressions_by_hour': |
205 | | - return 1 |
206 | | - else: |
207 | | - return -1 |
208 | | - |
209 | | - def get_time_index(self, query_name): |
210 | | - if query_name == 'report_campaign_logs_by_min': |
211 | | - return 0 |
212 | | - elif query_name == 'report_campaign_logs_by_hr': |
213 | | - return 0 |
214 | | - elif query_name == 'report_non_US_clicks': |
215 | | - return 0 |
216 | | - elif query_name == 'report_contribution_tracking': |
217 | | - return 0 |
218 | | - elif query_name == 'report_bannerLP_metrics': |
219 | | - return 0 |
220 | | - elif query_name == 'report_latest_campaign': |
221 | | - return 1 |
222 | | - elif query_name == 'report_banner_impressions_by_hour': |
223 | | - return 0 |
224 | | - elif query_name == 'report_lp_views_by_hour': |
225 | | - return 0 |
226 | | - elif query_name == 'report_banner_metrics_minutely': |
227 | | - return 0 |
228 | | - elif query_name == 'report_LP_metrics_minutely': |
229 | | - return 0 |
230 | | - else: |
231 | | - return -1 |
232 | | - |
233 | | - def get_campaign_index(self, query_name): |
234 | | - if query_name == 'report_campaign_logs_by_min': |
235 | | - return 2 |
236 | | - elif query_name == 'report_campaign_logs_by_hr': |
237 | | - return 1 |
238 | | - elif query_name == 'report_contribution_tracking': |
239 | | - return 3 |
240 | | - elif query_name == 'report_bannerLP_metrics': |
241 | | - return 1 |
242 | | - elif query_name == 'report_latest_campaign': |
243 | | - return 0 |
244 | | - else: |
245 | | - return -1 |
246 | | - |
247 | | - def get_banner_index(self, query_name): |
248 | | - if query_name == 'report_campaign_logs_by_min': |
249 | | - return 3 |
250 | | - elif query_name == 'report_campaign_logs_by_hr': |
251 | | - return 2 |
252 | | - elif query_name == 'report_contribution_tracking': |
253 | | - return 1 |
254 | | - elif query_name == 'report_bannerLP_metrics': |
255 | | - return 1 |
256 | | - elif query_name == 'report_banner_metrics_minutely': |
257 | | - return 1 |
258 | | - else: |
259 | | - return -1 |
260 | | - |
261 | | - def get_landing_page_index(self, query_name): |
262 | | - if query_name == 'report_campaign_logs_by_min': |
263 | | - return 4 |
264 | | - elif query_name == 'report_campaign_logs_by_hr': |
265 | | - return 3 |
266 | | - elif query_name == 'report_non_US_clicks': |
267 | | - return 2 |
268 | | - elif query_name == 'report_contribution_tracking': |
269 | | - return 2 |
270 | | - elif query_name == 'report_bannerLP_metrics': |
271 | | - return 1 |
272 | | - elif query_name == 'report_LP_metrics_minutely': |
273 | | - return 1 |
274 | | - else: |
275 | | - return -1 |
276 | | - |
277 | | - def get_metric_index(self, query_name, metric_name): |
278 | | - if query_name == 'report_campaign_logs_by_min': |
279 | | - if metric_name == 'click_rate': |
280 | | - return 9 |
281 | | - elif query_name == 'report_campaign_logs_by_hr': |
282 | | - if metric_name == 'click_rate': |
283 | | - return 8 |
284 | | - elif query_name == 'report_contribution_tracking': |
285 | | - if metric_name == 'converted_amount': |
286 | | - return 4 |
287 | | - elif query_name == 'report_bannerLP_metrics': |
288 | | - if metric_name == 'total_impressions': |
289 | | - return 2 |
290 | | - elif metric_name == 'impressions': |
291 | | - return 3 |
292 | | - elif metric_name == 'views': |
293 | | - return 4 |
294 | | - elif metric_name == 'clicks': |
295 | | - return 5 |
296 | | - elif metric_name == 'donations': |
297 | | - return 6 |
298 | | - elif metric_name == 'amount': |
299 | | - return 7 |
300 | | - elif metric_name == 'click_rate': |
301 | | - return 8 |
302 | | - elif metric_name == 'completion_rate': |
303 | | - return 9 |
304 | | - elif metric_name == 'don_per_imp': |
305 | | - return 10 |
306 | | - elif metric_name == 'amt_per_imp': |
307 | | - return 11 |
308 | | - elif metric_name == 'don_per_view': |
309 | | - return 12 |
310 | | - elif metric_name == 'amt_per_view': |
311 | | - return 13 |
312 | | - elif metric_name == 'amt_per_donation': |
313 | | - return 14 |
314 | | - elif metric_name == 'max_amt': |
315 | | - return 15 |
316 | | - elif metric_name == 'pp_don': |
317 | | - return 16 |
318 | | - elif metric_name == 'cc_don': |
319 | | - return 17 |
320 | | - elif metric_name == 'paypal_click_thru': |
321 | | - return 18 |
322 | | - elif metric_name == 'creditcard_click_thru': |
323 | | - return 19 |
324 | | - else: |
325 | | - return -1 |
326 | | - elif query_name == 'report_banner_confidence': |
327 | | - if metric_name == 'click_rate': |
328 | | - return 7 |
329 | | - elif metric_name == 'don_per_imp': |
330 | | - return 9 |
331 | | - elif metric_name == 'amt_per_imp': |
332 | | - return 10 |
333 | | - elif metric_name == 'amt50_per_imp': |
334 | | - return 14 |
335 | | - elif metric_name == 'amt100_per_imp': |
336 | | - return 15 |
337 | | - else: |
338 | | - return -1 |
339 | | - elif query_name == 'report_LP_confidence': |
340 | | - if metric_name == 'completion_rate': |
341 | | - return 5 |
342 | | - elif metric_name == 'don_per_view': |
343 | | - return 6 |
344 | | - elif metric_name == 'amt_per_view': |
345 | | - return 7 |
346 | | - elif metric_name == 'amt_per_donation': |
347 | | - return 8 |
348 | | - elif metric_name == 'amt50_per_view': |
349 | | - return 9 |
350 | | - elif metric_name == 'amt100_per_view': |
351 | | - return 10 |
352 | | - else: |
353 | | - return -1 |
354 | | - elif query_name == 'report_bannerLP_confidence': |
355 | | - if metric_name == 'click_rate': |
356 | | - return 7 |
357 | | - elif metric_name == 'completion_rate': |
358 | | - return 8 |
359 | | - elif metric_name == 'don_per_imp': |
360 | | - return 9 |
361 | | - elif metric_name == 'amt_per_imp': |
362 | | - return 10 |
363 | | - elif metric_name == 'don_per_view': |
364 | | - return 11 |
365 | | - elif metric_name == 'amt_per_view': |
366 | | - return 12 |
367 | | - elif metric_name == 'amt_per_donation': |
368 | | - return 13 |
369 | | - elif metric_name == 'amt50_per_imp': |
370 | | - return 14 |
371 | | - elif metric_name == 'amt100_per_imp': |
372 | | - return 15 |
373 | | - else: |
374 | | - return -1 |
375 | | - elif query_name == 'report_LP_metrics_minutely': |
376 | | - if metric_name == 'views': |
377 | | - return 2 |
378 | | - elif metric_name == 'donations': |
379 | | - return 4 |
380 | | - elif metric_name == 'amount50': |
381 | | - return 5 |
382 | | - elif metric_name == 'don_per_view': |
383 | | - return 7 |
384 | | - elif metric_name == 'amt50_per_view': |
385 | | - return 9 |
386 | | - else: |
387 | | - return -1 |
388 | | - elif query_name == 'report_banner_metrics_minutely': |
389 | | - if metric_name == 'imp': |
390 | | - return 2 |
391 | | - elif metric_name == 'donations': |
392 | | - return 5 |
393 | | - elif metric_name == 'amount50': |
394 | | - return 7 |
395 | | - elif metric_name == 'don_per_imp': |
396 | | - return 10 |
397 | | - elif metric_name == 'amt50_per_imp': |
398 | | - return 12 |
399 | | - else: |
400 | | - return -1 |
401 | | - else: |
402 | | - return 'no such table' |
403 | | - |
404 | | - def get_plot_title(self, query_name): |
405 | | - if query_name == 'report_banner_impressions_by_hour': |
406 | | - return 'Banner Impressions Over the Past 24 Hours' |
407 | | - elif query_name == 'report_lp_views_by_hour': |
408 | | - return 'Landing Page Views Over the Past 24 Hours' |
409 | | - else: |
410 | | - return 'no such table' |
411 | | - |
412 | | - def get_plot_ylabel(self, query_name): |
413 | | - if query_name == 'report_banner_impressions_by_hour': |
414 | | - return 'IMPRESSIONS' |
415 | | - elif query_name == 'report_lp_views_by_hour': |
416 | | - return 'VIEWS' |
417 | | - else: |
418 | | - return'no such table' |
419 | | - |
420 | | - def get_metric_full_name(self, metric_name): |
421 | | - if metric_name == 'imp': |
422 | | - return 'IMPRESSIONS' |
423 | | - elif metric_name == 'view': |
424 | | - return 'VIEWS' |
425 | | - elif metric_name == 'don_per_imp': |
426 | | - return 'DONATIONS PER IMPRESSION' |
427 | | - elif metric_name == 'don_per_view': |
428 | | - return 'DONATIONS PER VIEW' |
429 | | - elif metric_name == 'amt50_per_imp': |
430 | | - return 'AMOUNT50 PER IMPRESSION' |
431 | | - elif metric_name == 'amt50_per_view': |
432 | | - return 'AMOUNT50 PER VIEW' |
433 | | - elif metric_name == 'amount50': |
434 | | - return 'AMOUNT50' |
435 | | - elif metric_name == 'donations': |
436 | | - return 'DONATIONS' |
437 | | - else: |
438 | | - return'no such table' |
\ No newline at end of file |
Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/compute_confidence.py |
— | — | @@ -2,9 +2,14 @@ |
3 | 3 | |
4 | 4 | """ |
5 | 5 | |
6 | | -Generates confidence estimate for a test |
| 6 | +This module defines reporting and analysis for determining the statistical confidence |
| 7 | +of of choice metrics over time. |
7 | 8 | |
| 9 | +!!MODIFY!! |
8 | 10 | |
| 11 | +- This should extend DataReporting.DataReporting |
| 12 | +- The querying functionality should be exported to DataLoader |
| 13 | + |
9 | 14 | """ |
10 | 15 | |
11 | 16 | __author__ = "Ryan Faulkner" |
— | — | @@ -33,9 +38,13 @@ |
34 | 39 | |
35 | 40 | |
36 | 41 | METHODS: |
37 | | - |
38 | | - |
39 | | - |
| 42 | + query_tables |
| 43 | + get_time_lists |
| 44 | + gen_plot |
| 45 | + run_test |
| 46 | + compute_parameters |
| 47 | + print_metrics |
| 48 | + |
40 | 49 | """ |
41 | 50 | class ConfidenceTest(DataLoader): |
42 | 51 | |
Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/20101230JA091_US banner imp.png |
Cannot display: file marked as a binary type. |
svn:mime-type = application/octet-stream |
Property changes on: trunk/fundraiser-statistics/fundraiser-scripts/classes/20101230JA091_US banner imp.png |
___________________________________________________________________ |
Added: svn:mime-type |
43 | 52 | + application/octet-stream |
Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/QueryData.py |
— | — | @@ -0,0 +1,431 @@ |
| 2 | + |
| 3 | +""" |
| 4 | + |
| 5 | +This module effectively functions as a Singleton class. |
| 6 | + |
| 7 | +This module contains and organizes query info. Depends on the contents of ../sql/ where filenames are |
| 8 | +coupled with query_name parameters |
| 9 | + |
| 10 | + METHODS: |
| 11 | + |
| 12 | + format_query |
| 13 | + get_query |
| 14 | + get_query_header |
| 15 | + get_key_index |
| 16 | + get_count_index |
| 17 | + get_time_index |
| 18 | + get_campaign_index |
| 19 | + get_banner_index |
| 20 | + get_landing_page_index |
| 21 | + get_metric_index |
| 22 | + get_plot_title |
| 23 | + get_plot_ylabel |
| 24 | + get_metric_full_name |
| 25 | + |
| 26 | +""" |
| 27 | + |
| 28 | +__author__ = "Ryan Faulkner" |
| 29 | +__revision__ = "$Rev$" |
| 30 | +__date__ = "November 28th, 2010" |
| 31 | + |
| 32 | + |
| 33 | + |
| 34 | +def format_query(query_name, sql_stmnt, args): |
| 35 | + |
| 36 | + if query_name == 'report_campaign_ecomm': |
| 37 | + start_time = args[0] |
| 38 | + sql_stmnt = sql_stmnt % (start_time) |
| 39 | + |
| 40 | + elif query_name == 'report_campaign_logs': |
| 41 | + start_time = args[0] |
| 42 | + sql_stmnt = sql_stmnt % (start_time, start_time, start_time) |
| 43 | + |
| 44 | + elif query_name == 'report_campaign_ecomm_by_hr': |
| 45 | + start_time = args[0] |
| 46 | + sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time) |
| 47 | + |
| 48 | + elif query_name == 'report_campaign_logs_by_hr': |
| 49 | + start_time = args[0] |
| 50 | + sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time, '%', '%', '%', '%', \ |
| 51 | + start_time, '%', '%', '%', '%', start_time, '%') |
| 52 | + |
| 53 | + elif query_name == 'report_impressions_country': |
| 54 | + start_time = args[0] |
| 55 | + sql_stmnt = sql_stmnt % ('%', '%', '%', start_time) |
| 56 | + |
| 57 | + elif query_name == 'report_campaign_logs_by_min': |
| 58 | + start_time = args[0] |
| 59 | + sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time, '%', '%', '%', '%', \ |
| 60 | + start_time, '%', '%', '%', '%', start_time) |
| 61 | + |
| 62 | + elif query_name == 'report_non_US_clicks': |
| 63 | + start_time = args[0] |
| 64 | + sql_stmnt = sql_stmnt % ('%', '%', '%', start_time, '%', '%', '%', start_time) |
| 65 | + |
| 66 | + elif query_name == 'report_contribution_tracking': |
| 67 | + start_time = args[0] |
| 68 | + sql_stmnt = sql_stmnt % ('%', '%', '%', '%', '%',start_time) |
| 69 | + |
| 70 | + elif query_name == 'report_total_amounts_by_hr': |
| 71 | + start_time = args[0] |
| 72 | + end_time = args[1] |
| 73 | + sql_stmnt = sql_stmnt % ('%', '%', '%', ' %H', start_time, end_time) |
| 74 | + |
| 75 | + elif query_name == 'report_total_amounts_by_day': |
| 76 | + start_time = args[0] |
| 77 | + end_time = args[1] |
| 78 | + sql_stmnt = sql_stmnt % ('%', '%', '%', '', start_time, end_time) |
| 79 | + |
| 80 | + elif query_name == 'report_bannerLP_metrics': |
| 81 | + start_time = args[0] |
| 82 | + end_time = args[1] |
| 83 | + campaign = args[2] |
| 84 | + sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time, end_time, '%', '%', '%', '%', start_time, end_time, '%', '%', '%', '%', start_time, end_time, campaign, '%', '%', '%', '%', start_time, end_time, campaign) |
| 85 | + |
| 86 | + elif query_name == 'report_latest_campaign': |
| 87 | + start_time = args[0] |
| 88 | + sql_stmnt = sql_stmnt % (start_time) |
| 89 | + |
| 90 | + elif query_name == 'report_confidence_banner': |
| 91 | + start = args[0] |
| 92 | + end = args[1] |
| 93 | + cmpgn = args[2] |
| 94 | + banner = args[3] |
| 95 | + sql_stmnt = sql_stmnt % ('%','%','%','%','10','10', start, end, banner, '%','%','%','%','10','10', start, end, cmpgn, banner, \ |
| 96 | + '%','%','%','%','10','10', start, end, cmpgn, banner) |
| 97 | + |
| 98 | + elif query_name == 'report_confidence_lp': |
| 99 | + start = args[0] |
| 100 | + end = args[1] |
| 101 | + cmpgn = args[2] |
| 102 | + banner = args[3] |
| 103 | + sql_stmnt = sql_stmnt % ('%','%','%','%','10','10', start, end, cmpgn, banner, \ |
| 104 | + '%','%','%','%','10','10', start, end, cmpgn, banner) |
| 105 | + |
| 106 | + elif query_name == 'report_banner_impressions_by_hour': |
| 107 | + start = args[0] |
| 108 | + end = args[1] |
| 109 | + sql_stmnt = sql_stmnt % ('%','%','%','%', start, end) |
| 110 | + |
| 111 | + elif query_name == 'report_lp_views_by_hour': |
| 112 | + start = args[0] |
| 113 | + end = args[1] |
| 114 | + sql_stmnt = sql_stmnt % ('%','%','%','%', start, end) |
| 115 | + |
| 116 | + elif query_name == 'report_banner_confidence': |
| 117 | + start = args[0] |
| 118 | + end = args[1] |
| 119 | + banner = args[2] |
| 120 | + campaign = args[3] |
| 121 | + sql_stmnt = sql_stmnt % (start, end, banner, start, end, campaign, start, end, banner, start, end, campaign, banner) |
| 122 | + |
| 123 | + elif query_name == 'report_LP_confidence': |
| 124 | + start = args[0] |
| 125 | + end = args[1] |
| 126 | + lp = args[2] |
| 127 | + campaign = args[3] |
| 128 | + sql_stmnt = sql_stmnt % (start, end, campaign, lp, start, end, campaign, lp) |
| 129 | + |
| 130 | + elif query_name == 'report_bannerLP_confidence': |
| 131 | + start = args[0] |
| 132 | + end = args[1] |
| 133 | + banner = args[2] |
| 134 | + lp = args[3] |
| 135 | + campaign = args[4] |
| 136 | + sql_stmnt = sql_stmnt % (start, end, banner, start, end, banner, campaign, start, end, banner, lp, campaign, start, end, banner, lp, campaign) |
| 137 | + |
| 138 | + elif query_name == 'report_ecomm_by_amount': |
| 139 | + start_time = args[0] |
| 140 | + end_time = args[1] |
| 141 | + sql_stmnt = sql_stmnt % ('%', '%', '%', '%', start_time, end_time, end_time) |
| 142 | + |
| 143 | + elif query_name == 'report_ecomm_by_contact': |
| 144 | + where_str = args[0] |
| 145 | + sql_stmnt = sql_stmnt % ('%', '%', '%', '%', where_str) |
| 146 | + |
| 147 | + elif query_name == 'report_banner_metrics_minutely': |
| 148 | + start_time = args[0] |
| 149 | + end_time = args[1] |
| 150 | + campaign = args[2] |
| 151 | + interval = args[3] |
| 152 | + imp_start_time = args[4] |
| 153 | + |
| 154 | + sql_stmnt = sql_stmnt % ('%', '%', '%', '%', interval, interval, imp_start_time, end_time, '%', '%', '%', '%', interval, interval, start_time, end_time, campaign, \ |
| 155 | + '%', '%', '%', '%', interval, interval, start_time, end_time, '%', '%', '%', '%', interval, interval, start_time, end_time, campaign) |
| 156 | + |
| 157 | + elif query_name == 'report_LP_metrics_minutely': |
| 158 | + start_time = args[0] |
| 159 | + end_time = args[1] |
| 160 | + campaign = args[2] |
| 161 | + interval = args[3] |
| 162 | + |
| 163 | + sql_stmnt = sql_stmnt % ('%', '%', '%', '%', interval, interval, start_time, end_time, campaign, '%', '%', '%', '%', interval, interval, start_time, end_time, campaign) |
| 164 | + |
| 165 | + else: |
| 166 | + return 'no such table\n' |
| 167 | + |
| 168 | + return sql_stmnt |
| 169 | + |
| 170 | +def get_query(query_name): |
| 171 | + if query_name == 'report_campaign_logs_by_min': |
| 172 | + return '' |
| 173 | + elif query_name == '': |
| 174 | + return '' |
| 175 | + else: |
| 176 | + return 'no such table' |
| 177 | + |
| 178 | +def get_query_header(query_name): |
| 179 | + if query_name == 'report_contribution_tracking': |
| 180 | + return ['Time','Banner','Landing Page','Campaign','Converted Amount', 'Suffix'] |
| 181 | + elif query_name == 'report_ecomm_by_amount': |
| 182 | + return ['Timestamp','First Name','Last Name','Country','ISO Code', 'Amount', 'First Donation?', 'Date of First'] |
| 183 | + elif query_name == 'report_ecomm_by_contact': |
| 184 | + return ['Timestamp','First Name','Last Name','Country','ISO Code', 'Amount'] |
| 185 | + else: |
| 186 | + return 'no such table' |
| 187 | + |
| 188 | +""" Returns the index of the key for the query data """ |
| 189 | +def get_key_index(query_name): |
| 190 | + if query_name == 'report_banner_metrics_minutely': |
| 191 | + return 1 |
| 192 | + elif query_name == 'report_LP_metrics_minutely': |
| 193 | + return 1 |
| 194 | + |
| 195 | +def get_count_index(query_name): |
| 196 | + if query_name == 'report_lp_views_by_hour': |
| 197 | + return 1 |
| 198 | + elif query_name == 'report_banner_impressions_by_hour': |
| 199 | + return 1 |
| 200 | + else: |
| 201 | + return -1 |
| 202 | + |
| 203 | +def get_time_index(query_name): |
| 204 | + if query_name == 'report_campaign_logs_by_min': |
| 205 | + return 0 |
| 206 | + elif query_name == 'report_campaign_logs_by_hr': |
| 207 | + return 0 |
| 208 | + elif query_name == 'report_non_US_clicks': |
| 209 | + return 0 |
| 210 | + elif query_name == 'report_contribution_tracking': |
| 211 | + return 0 |
| 212 | + elif query_name == 'report_bannerLP_metrics': |
| 213 | + return 0 |
| 214 | + elif query_name == 'report_latest_campaign': |
| 215 | + return 1 |
| 216 | + elif query_name == 'report_banner_impressions_by_hour': |
| 217 | + return 0 |
| 218 | + elif query_name == 'report_lp_views_by_hour': |
| 219 | + return 0 |
| 220 | + elif query_name == 'report_banner_metrics_minutely': |
| 221 | + return 0 |
| 222 | + elif query_name == 'report_LP_metrics_minutely': |
| 223 | + return 0 |
| 224 | + else: |
| 225 | + return -1 |
| 226 | + |
| 227 | +def get_campaign_index(query_name): |
| 228 | + if query_name == 'report_campaign_logs_by_min': |
| 229 | + return 2 |
| 230 | + elif query_name == 'report_campaign_logs_by_hr': |
| 231 | + return 1 |
| 232 | + elif query_name == 'report_contribution_tracking': |
| 233 | + return 3 |
| 234 | + elif query_name == 'report_bannerLP_metrics': |
| 235 | + return 1 |
| 236 | + elif query_name == 'report_latest_campaign': |
| 237 | + return 0 |
| 238 | + else: |
| 239 | + return -1 |
| 240 | + |
| 241 | +def get_banner_index(query_name): |
| 242 | + if query_name == 'report_campaign_logs_by_min': |
| 243 | + return 3 |
| 244 | + elif query_name == 'report_campaign_logs_by_hr': |
| 245 | + return 2 |
| 246 | + elif query_name == 'report_contribution_tracking': |
| 247 | + return 1 |
| 248 | + elif query_name == 'report_bannerLP_metrics': |
| 249 | + return 1 |
| 250 | + elif query_name == 'report_banner_metrics_minutely': |
| 251 | + return 1 |
| 252 | + else: |
| 253 | + return -1 |
| 254 | + |
| 255 | +def get_landing_page_index(query_name): |
| 256 | + if query_name == 'report_campaign_logs_by_min': |
| 257 | + return 4 |
| 258 | + elif query_name == 'report_campaign_logs_by_hr': |
| 259 | + return 3 |
| 260 | + elif query_name == 'report_non_US_clicks': |
| 261 | + return 2 |
| 262 | + elif query_name == 'report_contribution_tracking': |
| 263 | + return 2 |
| 264 | + elif query_name == 'report_bannerLP_metrics': |
| 265 | + return 1 |
| 266 | + elif query_name == 'report_LP_metrics_minutely': |
| 267 | + return 1 |
| 268 | + else: |
| 269 | + return -1 |
| 270 | + |
| 271 | +def get_metric_index(query_name, metric_name): |
| 272 | + if query_name == 'report_campaign_logs_by_min': |
| 273 | + if metric_name == 'click_rate': |
| 274 | + return 9 |
| 275 | + elif query_name == 'report_campaign_logs_by_hr': |
| 276 | + if metric_name == 'click_rate': |
| 277 | + return 8 |
| 278 | + elif query_name == 'report_contribution_tracking': |
| 279 | + if metric_name == 'converted_amount': |
| 280 | + return 4 |
| 281 | + elif query_name == 'report_bannerLP_metrics': |
| 282 | + if metric_name == 'total_impressions': |
| 283 | + return 2 |
| 284 | + elif metric_name == 'impressions': |
| 285 | + return 3 |
| 286 | + elif metric_name == 'views': |
| 287 | + return 4 |
| 288 | + elif metric_name == 'clicks': |
| 289 | + return 5 |
| 290 | + elif metric_name == 'donations': |
| 291 | + return 6 |
| 292 | + elif metric_name == 'amount': |
| 293 | + return 7 |
| 294 | + elif metric_name == 'click_rate': |
| 295 | + return 8 |
| 296 | + elif metric_name == 'completion_rate': |
| 297 | + return 9 |
| 298 | + elif metric_name == 'don_per_imp': |
| 299 | + return 10 |
| 300 | + elif metric_name == 'amt_per_imp': |
| 301 | + return 11 |
| 302 | + elif metric_name == 'don_per_view': |
| 303 | + return 12 |
| 304 | + elif metric_name == 'amt_per_view': |
| 305 | + return 13 |
| 306 | + elif metric_name == 'amt_per_donation': |
| 307 | + return 14 |
| 308 | + elif metric_name == 'max_amt': |
| 309 | + return 15 |
| 310 | + elif metric_name == 'pp_don': |
| 311 | + return 16 |
| 312 | + elif metric_name == 'cc_don': |
| 313 | + return 17 |
| 314 | + elif metric_name == 'paypal_click_thru': |
| 315 | + return 18 |
| 316 | + elif metric_name == 'creditcard_click_thru': |
| 317 | + return 19 |
| 318 | + else: |
| 319 | + return -1 |
| 320 | + elif query_name == 'report_banner_confidence': |
| 321 | + if metric_name == 'click_rate': |
| 322 | + return 7 |
| 323 | + elif metric_name == 'don_per_imp': |
| 324 | + return 9 |
| 325 | + elif metric_name == 'amt_per_imp': |
| 326 | + return 10 |
| 327 | + elif metric_name == 'amt50_per_imp': |
| 328 | + return 14 |
| 329 | + elif metric_name == 'amt100_per_imp': |
| 330 | + return 15 |
| 331 | + else: |
| 332 | + return -1 |
| 333 | + elif query_name == 'report_LP_confidence': |
| 334 | + if metric_name == 'completion_rate': |
| 335 | + return 5 |
| 336 | + elif metric_name == 'don_per_view': |
| 337 | + return 6 |
| 338 | + elif metric_name == 'amt_per_view': |
| 339 | + return 7 |
| 340 | + elif metric_name == 'amt_per_donation': |
| 341 | + return 8 |
| 342 | + elif metric_name == 'amt50_per_view': |
| 343 | + return 9 |
| 344 | + elif metric_name == 'amt100_per_view': |
| 345 | + return 10 |
| 346 | + else: |
| 347 | + return -1 |
| 348 | + elif query_name == 'report_bannerLP_confidence': |
| 349 | + if metric_name == 'click_rate': |
| 350 | + return 7 |
| 351 | + elif metric_name == 'completion_rate': |
| 352 | + return 8 |
| 353 | + elif metric_name == 'don_per_imp': |
| 354 | + return 9 |
| 355 | + elif metric_name == 'amt_per_imp': |
| 356 | + return 10 |
| 357 | + elif metric_name == 'don_per_view': |
| 358 | + return 11 |
| 359 | + elif metric_name == 'amt_per_view': |
| 360 | + return 12 |
| 361 | + elif metric_name == 'amt_per_donation': |
| 362 | + return 13 |
| 363 | + elif metric_name == 'amt50_per_imp': |
| 364 | + return 14 |
| 365 | + elif metric_name == 'amt100_per_imp': |
| 366 | + return 15 |
| 367 | + else: |
| 368 | + return -1 |
| 369 | + elif query_name == 'report_LP_metrics_minutely': |
| 370 | + if metric_name == 'views': |
| 371 | + return 2 |
| 372 | + elif metric_name == 'donations': |
| 373 | + return 4 |
| 374 | + elif metric_name == 'amount50': |
| 375 | + return 5 |
| 376 | + elif metric_name == 'don_per_view': |
| 377 | + return 7 |
| 378 | + elif metric_name == 'amt50_per_view': |
| 379 | + return 9 |
| 380 | + else: |
| 381 | + return -1 |
| 382 | + elif query_name == 'report_banner_metrics_minutely': |
| 383 | + if metric_name == 'imp': |
| 384 | + return 2 |
| 385 | + elif metric_name == 'donations': |
| 386 | + return 5 |
| 387 | + elif metric_name == 'amount50': |
| 388 | + return 7 |
| 389 | + elif metric_name == 'don_per_imp': |
| 390 | + return 10 |
| 391 | + elif metric_name == 'amt50_per_imp': |
| 392 | + return 12 |
| 393 | + else: |
| 394 | + return -1 |
| 395 | + else: |
| 396 | + return 'no such table' |
| 397 | + |
| 398 | +def get_plot_title(query_name): |
| 399 | + if query_name == 'report_banner_impressions_by_hour': |
| 400 | + return 'Banner Impressions Over the Past 24 Hours' |
| 401 | + elif query_name == 'report_lp_views_by_hour': |
| 402 | + return 'Landing Page Views Over the Past 24 Hours' |
| 403 | + else: |
| 404 | + return 'no such table' |
| 405 | + |
| 406 | +def get_plot_ylabel(query_name): |
| 407 | + if query_name == 'report_banner_impressions_by_hour': |
| 408 | + return 'IMPRESSIONS' |
| 409 | + elif query_name == 'report_lp_views_by_hour': |
| 410 | + return 'VIEWS' |
| 411 | + else: |
| 412 | + return'no such table' |
| 413 | + |
| 414 | +def get_metric_full_name(metric_name): |
| 415 | + if metric_name == 'imp': |
| 416 | + return 'IMPRESSIONS' |
| 417 | + elif metric_name == 'view': |
| 418 | + return 'VIEWS' |
| 419 | + elif metric_name == 'don_per_imp': |
| 420 | + return 'DONATIONS PER IMPRESSION' |
| 421 | + elif metric_name == 'don_per_view': |
| 422 | + return 'DONATIONS PER VIEW' |
| 423 | + elif metric_name == 'amt50_per_imp': |
| 424 | + return 'AMOUNT50 PER IMPRESSION' |
| 425 | + elif metric_name == 'amt50_per_view': |
| 426 | + return 'AMOUNT50 PER VIEW' |
| 427 | + elif metric_name == 'amount50': |
| 428 | + return 'AMOUNT50' |
| 429 | + elif metric_name == 'donations': |
| 430 | + return 'DONATIONS' |
| 431 | + else: |
| 432 | + return'no such table' |
\ No newline at end of file |
Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/DataLoader.py |
— | — | @@ -1,4 +1,15 @@ |
| 2 | +""" |
2 | 3 | |
| 4 | +This module provides access to the datasource and enables querying. The class |
| 5 | +atructure defined has DataLoader as the base which outlines the basic members |
| 6 | +and functionality. This interface is extended for interaction with specific |
| 7 | +sources of data. |
| 8 | + |
| 9 | +These classes are used to define the data sources for the DataReporting family of |
| 10 | +classes in an Adapter structural pattern. |
| 11 | + |
| 12 | +""" |
| 13 | + |
3 | 14 | __author__ = "Ryan Faulkner" |
4 | 15 | __revision__ = "$Rev$" |
5 | 16 | __date__ = "April 8th, 2010" |
— | — | @@ -6,8 +17,13 @@ |
7 | 18 | |
8 | 19 | import sys |
9 | 20 | import MySQLdb |
| 21 | +import math |
| 22 | +import datetime |
| 23 | + |
| 24 | +import miner_help as mh |
| 25 | +import QueryData as QD |
| 26 | +import TimestampProcessor as TP |
10 | 27 | |
11 | | - |
12 | 28 | """ |
13 | 29 | |
14 | 30 | CLASS :: DataLoader |
— | — | @@ -18,28 +34,174 @@ |
19 | 35 | """ |
20 | 36 | class DataLoader(object): |
21 | 37 | |
22 | | - # Database and Cursor objects |
23 | | - db = None |
24 | | - cur = None |
| 38 | + """ Database and Cursor objects """ |
| 39 | + _db_ = None |
| 40 | + _cur_ = None |
| 41 | + _sql_path_ = '../sql/' # Relative path for SQL files to be processed |
| 42 | + _query_names_ = dict() |
25 | 43 | |
26 | 44 | def init_db(self): |
| 45 | + |
27 | 46 | """ Establish connection """ |
28 | 47 | #db = MySQLdb.connect(host='db10.pmtpa.wmnet', user='rfaulk', db='faulkner') |
29 | | - self.db = MySQLdb.connect(host='127.0.0.1', user='rfaulk', db='faulkner', port=3307) |
| 48 | + self._db_ = MySQLdb.connect(host='127.0.0.1', user='rfaulk', db='faulkner', port=3307) |
30 | 49 | #self.db = MySQLdb.connect(host='storage3.pmtpa.wmnet', user='rfaulk', db='faulkner') |
31 | | - |
| 50 | + |
32 | 51 | """ Create cursor """ |
33 | | - self.cur = self.db.cursor() |
| 52 | + self._cur_ = self._db_.cursor() |
34 | 53 | |
35 | 54 | def close_db(self): |
36 | | - self.cur.close() |
37 | | - self.db.close() |
| 55 | + self._cur_.close() |
| 56 | + self._db_.close() |
| 57 | + |
| 58 | + """ |
| 59 | + <DESCRIPTION> |
38 | 60 | |
| 61 | + INPUT: |
| 62 | + query_type - |
| 63 | + |
| 64 | + |
| 65 | + RETURN: |
| 66 | + - |
| 67 | + |
| 68 | + """ |
| 69 | + def get_sql_filename_for_query(self, query_type): |
| 70 | + return '' |
39 | 71 | |
40 | 72 | class IntervalReportingLoader(DataLoader): |
| 73 | + |
| 74 | + def __init__(self): |
| 75 | + self._query_names_['banner'] = 'report_banner_metrics_minutely' |
| 76 | + self._query_names_['LP'] = 'report_LP_metrics_minutely' |
| 77 | + |
| 78 | + def get_sql_filename_for_query(self, query_type): |
| 79 | + return self._query_names_[query_type] |
| 80 | + |
| 81 | + """ |
| 82 | + <DESCRIPTION> |
| 83 | + |
| 84 | + INPUT: |
| 85 | + start_time - |
| 86 | + end_time - |
| 87 | + interval - |
| 88 | + query_type - |
| 89 | + metric_name - |
| 90 | + campaign - |
| 91 | + |
| 92 | + |
| 93 | + RETURN: |
| 94 | + metrics - |
| 95 | + times - |
| 96 | + """ |
| 97 | + def run_query(self, start_time, end_time, interval, query_type, metric_name, campaign): |
| 98 | + |
| 99 | + self.init_db() |
| 100 | + |
| 101 | + try: |
| 102 | + query_name = self.get_sql_filename_for_query(query_type) |
| 103 | + except KeyError: |
| 104 | + print 'Could not find a query for type: ' + query_type |
| 105 | + sys.exit(2) |
| 106 | + |
| 107 | + metrics = mh.AutoVivification() |
| 108 | + times = mh.AutoVivification() |
| 109 | + times_norm = mh.AutoVivification() |
| 110 | + |
| 111 | + """ Compose datetime objects to represent the first and last intervals """ |
| 112 | + start_time_obj = TP.timestamp_to_obj(start_time, 1) |
| 113 | + start_time_obj = start_time_obj.replace(minute=int(math.floor(start_time_obj.minute / interval) * interval)) |
| 114 | + start_time_obj_str = TP.timestamp_from_obj(start_time_obj, 1, 3) |
| 115 | + |
| 116 | + end_time_obj = TP.timestamp_to_obj(end_time, 1) |
| 117 | + # end_time_obj = end_time_obj + datetime.timedelta(seconds=-1) |
| 118 | + end_time_obj = end_time_obj.replace(minute=int(math.floor(end_time_obj.minute / interval) * interval)) |
| 119 | + end_time_obj_str = TP.timestamp_from_obj(end_time_obj, 1, 3) |
| 120 | + |
| 121 | + """ The start time for the impression portion of the query should be one second less""" |
| 122 | + |
| 123 | + imp_start_time_obj = start_time_obj + datetime.timedelta(seconds=-1) |
| 124 | + imp_start_time_obj_str = TP.timestamp_from_obj(imp_start_time_obj, 1, 3) |
| 125 | + |
| 126 | + """ Load the SQL File & Format """ |
| 127 | + filename = self._sql_path_+ query_name + '.sql' |
| 128 | + sql_stmnt = mh.read_sql(filename) |
| 129 | + |
| 130 | + sql_stmnt = QD.format_query(query_name, sql_stmnt, [start_time, end_time, campaign, interval, imp_start_time_obj_str]) |
| 131 | + |
| 132 | + """ Get Indexes into Query """ |
| 133 | + key_index = QD.get_banner_index(query_name) |
| 134 | + metric_index = QD.get_metric_index(query_name, metric_name) |
| 135 | + time_index = QD.get_time_index(query_name) |
| 136 | + |
| 137 | + """ Compose the data for each separate donor pipeline artifact """ |
| 138 | + try: |
| 139 | + err_msg = sql_stmnt |
| 140 | + self._cur_.execute(sql_stmnt) |
| 141 | + |
| 142 | + results = self._cur_.fetchall() |
| 143 | + final_time = dict() # stores the last timestamp seen |
| 144 | + interval_obj = datetime.timedelta(minutes=interval) # timedelta object used to shift times by _interval_ minutes |
| 145 | + |
| 146 | + for row in results: |
| 147 | + |
| 148 | + key_name = row[key_index] |
| 149 | + time_obj = TP.timestamp_to_obj(row[time_index], 1) # format = 1, 14-digit TS |
| 150 | + |
| 151 | + """ For each new dictionary index by key name start a new list if its not already there """ |
| 152 | + try: |
| 153 | + metrics[key_name].append(row[metric_index]) |
| 154 | + times[key_name].append(time_obj + interval_obj) |
| 155 | + final_time[key_name] = row[time_index] |
| 156 | + except: |
| 157 | + metrics[key_name] = list() |
| 158 | + times[key_name] = list() |
| 159 | + |
| 160 | + """ If the first element is not the start time add it |
| 161 | + this will be the case if there is no data for the first interval |
| 162 | + NOTE: two datapoints are added at the beginning to define the first interval """ |
| 163 | + if start_time_obj_str != row[time_index]: |
| 164 | + times[key_name].append(start_time_obj) |
| 165 | + metrics[key_name].append(0.0) |
| 166 | + |
| 167 | + times[key_name].append(start_time_obj + interval_obj) |
| 168 | + metrics[key_name].append(0.0) |
| 169 | + else: |
| 170 | + metrics[key_name].append(row[metric_index]) |
| 171 | + times[key_name].append(time_obj) |
| 172 | + |
| 173 | + metrics[key_name].append(row[metric_index]) |
| 174 | + times[key_name].append(time_obj + interval_obj) |
| 175 | + |
| 176 | + |
| 177 | + except Exception as inst: |
| 178 | + print type(inst) # the exception instance |
| 179 | + print inst.args # arguments stored in .args |
| 180 | + print inst # __str__ allows args to printed directly |
| 181 | + |
| 182 | + self._db_.rollback() |
| 183 | + sys.exit(0) |
| 184 | + |
| 185 | + |
| 186 | + |
| 187 | + """ Ensure that the last time in the list is the endtime less the interval """ |
| 188 | + |
| 189 | + for key in times.keys(): |
| 190 | + if final_time[key_name] != end_time_obj_str: |
| 191 | + times[key].append(end_time_obj) |
| 192 | + metrics[key].append(0.0) |
| 193 | + |
| 194 | + self.close_db() |
| 195 | + |
| 196 | + """ Convert counts to float (from Decimal) to prevent exception when bar plotting |
| 197 | + Bbox::update_numerix_xy expected numerix array """ |
| 198 | + for key in metrics.keys(): |
| 199 | + metrics_new = list() |
| 200 | + for i in range(len(metrics[key])): |
| 201 | + metrics_new.append(float(metrics[key][i])) |
| 202 | + metrics[key] = metrics_new |
| 203 | + |
| 204 | + return [metrics, times] |
41 | 205 | |
42 | | - def run_query(self): |
43 | | - return |
44 | 206 | |
45 | 207 | class BannerLPReportingLoader(DataLoader): |
46 | 208 | |
Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/DataReporting.py |
— | — | @@ -0,0 +1,999 @@ |
| 2 | + |
| 3 | + |
| 4 | +""" |
| 5 | + |
| 6 | +This module is used to define the reporting methodologies on different types of data. The base class |
| 7 | +DataReporting is defined to outline the general functionality of the reporting architecture and |
| 8 | +functionality which includes generating the data via a dataloader object and transforming the data |
| 9 | +among different reporting mediums including matlab plots (primary medium) and html tables. |
| 10 | + |
| 11 | +The DataLoader class decouples the data access of the reports using the Adapter structural pattern. |
| 12 | + |
| 13 | +""" |
| 14 | + |
| 15 | +__author__ = "Ryan Faulkner" |
| 16 | +__revision__ = "$Rev$" |
| 17 | +__date__ = "December 16th, 2010" |
| 18 | + |
| 19 | + |
| 20 | +import sys |
| 21 | +sys.path.append('../') |
| 22 | + |
| 23 | +import matplotlib |
| 24 | +import datetime |
| 25 | +import MySQLdb |
| 26 | +import pylab |
| 27 | +import HTML |
| 28 | +import math |
| 29 | + |
| 30 | +import QueryData as QD |
| 31 | +import miner_help as mh |
| 32 | +import TimestampProcessor as TP |
| 33 | +import DataLoader as DL |
| 34 | + |
| 35 | +matplotlib.use('Agg') |
| 36 | + |
| 37 | + |
| 38 | + |
| 39 | +""" |
| 40 | + |
| 41 | + BASE CLASS :: DataReporting |
| 42 | + |
| 43 | + Base class for reporting fundraiser analytics. Methods that are intended to be extended in derived classes include: |
| 44 | + |
| 45 | + METHODS: |
| 46 | + |
| 47 | + run_query - format and execute the query to obtain data |
| 48 | + gen_plot - plots the results of the report |
| 49 | + write_to_html_table - writes the results to an HTML table |
| 50 | + run |
| 51 | + |
| 52 | +""" |
| 53 | +class DataReporting(object): |
| 54 | + |
| 55 | + _data_loader_ = None |
| 56 | + |
| 57 | + """ |
| 58 | + |
| 59 | + Smooths a list of values |
| 60 | + |
| 61 | + INPUT: |
| 62 | + values - a list of datetime objects |
| 63 | + window_length - indicate whether the list counts back from the end |
| 64 | + |
| 65 | + RETURN: |
| 66 | + new_values - list of smoothed values |
| 67 | + |
| 68 | + """ |
| 69 | + def smooth(self, values, window_length): |
| 70 | + |
| 71 | + window_length = int(math.floor(window_length / 2)) |
| 72 | + |
| 73 | + if window_length < 1: |
| 74 | + return values |
| 75 | + |
| 76 | + list_len = len(values) |
| 77 | + new_values = list() |
| 78 | + |
| 79 | + for i in range(list_len): |
| 80 | + index_left = max([0, i - window_length]) |
| 81 | + index_right = min([list_len - 1, i + window_length]) |
| 82 | + |
| 83 | + width = index_right - index_left + 1 |
| 84 | + |
| 85 | + new_val = sum(values[index_left : (index_right + 1)]) / width |
| 86 | + new_values.append(new_val) |
| 87 | + |
| 88 | + return new_values |
| 89 | + |
| 90 | + """ |
| 91 | + |
| 92 | + workaround for issue with tuple objects in HTML.py |
| 93 | + MySQLdb returns unfamiliar tuple elements from its fetchall() method |
| 94 | + this is probably a version problem since the issue popped up in 2.5 but not 2.6 |
| 95 | + |
| 96 | + INPUT: |
| 97 | + row - row object returned from MySQLdb.fetchall() |
| 98 | + |
| 99 | + RETURN: |
| 100 | + l - a list of tuple objects from the db |
| 101 | + |
| 102 | + """ |
| 103 | + def listify(self, row): |
| 104 | + l = [] |
| 105 | + for i in row: |
| 106 | + l.append(i) |
| 107 | + return l |
| 108 | + |
| 109 | + |
| 110 | + """ |
| 111 | + |
| 112 | + To be overloaded by subclasses for specific types of queries |
| 113 | + |
| 114 | + INPUT: |
| 115 | + values - a list of datetime objects |
| 116 | + window_length - indicate whether the list counts back from the end |
| 117 | + |
| 118 | + RETURN: |
| 119 | + return_status - integer, 0 indicates un-exceptional execution |
| 120 | + |
| 121 | + """ |
| 122 | + def run_query(self, start_time, end_time, query_name, metric_name): |
| 123 | + return 0 |
| 124 | + |
| 125 | + |
| 126 | + """ |
| 127 | + |
| 128 | + To be overloaded by subclasses for different plotting behaviour |
| 129 | + |
| 130 | + INPUT: |
| 131 | + values - a list of datetime objects |
| 132 | + window_length - indicate whether the list counts back from the end |
| 133 | + |
| 134 | + RETURN: |
| 135 | + return_status - integer, 0 indicates un-exceptional execution |
| 136 | + |
| 137 | + """ |
| 138 | + def gen_plot(self,x, y_lists, labels, title, xlabel, ylabel, subplot_index, fname): |
| 139 | + return 0 |
| 140 | + |
| 141 | + """ |
| 142 | + |
| 143 | + To be overloaded by subclasses for writing tables - this functionality currently exists outside of this class structure (test_reporting.py) |
| 144 | + |
| 145 | + INPUT: |
| 146 | + values - a list of datetime objects |
| 147 | + window_length - indicate whether the list counts back from the end |
| 148 | + |
| 149 | + RETURN: |
| 150 | + return_status - integer, 0 indicates un-exceptional execution |
| 151 | + |
| 152 | + """ |
| 153 | + def write_to_html_table(self): |
| 154 | + |
| 155 | + """ |
| 156 | + FROM TEST REPORTING |
| 157 | + |
| 158 | + query_obj = qs.query_store() |
| 159 | + |
| 160 | + # Populate the campaigns table |
| 161 | + s1 = 'drop table if exists campaigns;' |
| 162 | + s2 = 'create table campaigns as (select utm_campaign from drupal.contribution_tracking where ts > \'%s\' group by utm_campaign having count(*) > 100);' % (start_time) |
| 163 | + cur.execute(s1) |
| 164 | + cur.execute(s2) |
| 165 | + |
| 166 | + table_data = [] |
| 167 | + sql_stmnt = mh.read_sql(sql_path + query_type + '.sql'); |
| 168 | + |
| 169 | + # open the html file for writing |
| 170 | + f = open(html_path + query_type + '.html', 'w') |
| 171 | + |
| 172 | + format_start_time = start_time[0:4] + '-' + start_time[4:6] + '-' + start_time[6:8] + '-' + start_time[8:10] + 'HRs' |
| 173 | + |
| 174 | + # Formats the statement according to query type |
| 175 | + select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time]) |
| 176 | + |
| 177 | + # html formatting |
| 178 | + if query_type == 'report_campaign_ecomm': |
| 179 | + f.write('<br>Donation data since ' + format_start_time + ' ... <br><br>') |
| 180 | + select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time]) |
| 181 | + |
| 182 | + elif query_type == 'report_campaign_logs': |
| 183 | + f.write('<br>Impression and landing page data since ' + format_start_time+ ' ... <br><br>') |
| 184 | + select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time]) |
| 185 | + |
| 186 | + elif query_type == 'report_campaign_ecomm_by_hr': |
| 187 | + f.write('<br>Donation data by hour since ' + format_start_time + ' ... <br><br>') |
| 188 | + select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time]) |
| 189 | + |
| 190 | + elif query_type == 'report_campaign_logs_by_hr': |
| 191 | + f.write('<br>Impression and landing page by hour since ' + format_start_time + ' ... <br><br>') |
| 192 | + select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time]) |
| 193 | + |
| 194 | + else: |
| 195 | + select_stmnt = query_obj.format_query(query_type, sql_stmnt, [start_time]) |
| 196 | + |
| 197 | + try: |
| 198 | + err_msg = select_stmnt |
| 199 | + cur.execute(select_stmnt) |
| 200 | + |
| 201 | + results = cur.fetchall() |
| 202 | + |
| 203 | + for row in results: |
| 204 | + cpRow = listify(row) |
| 205 | + # t.rows.append(row) |
| 206 | + table_data.append(cpRow) |
| 207 | + |
| 208 | + except: |
| 209 | + db.rollback() |
| 210 | + sys.exit("Database Interface Exception:\n" + err_msg) |
| 211 | + |
| 212 | + |
| 213 | + t = HTML.table(table_data, header_row=header) |
| 214 | + htmlcode = str(t) |
| 215 | + |
| 216 | + f.write(htmlcode) |
| 217 | + f.close() |
| 218 | + |
| 219 | + return htmlcode |
| 220 | + |
| 221 | + """ |
| 222 | + return 0 |
| 223 | + |
| 224 | + |
| 225 | + |
| 226 | + """ |
| 227 | + |
| 228 | + The access point of DataReporting and derived objects. Will be used for executing and orchestrating the creation of plots, tables etc. |
| 229 | + To be overloaded by subclasses |
| 230 | + |
| 231 | + INPUT: |
| 232 | + |
| 233 | + RETURN: |
| 234 | + return_status - integer, 0 indicates un-exceptional execution |
| 235 | + |
| 236 | + """ |
| 237 | + def run(self): |
| 238 | + return |
| 239 | + |
| 240 | + |
| 241 | + |
| 242 | +""" |
| 243 | + |
| 244 | +CLASS :: ^TotalAmountsReporting^ |
| 245 | + |
| 246 | +This subclass handles reporting on total amounts for the fundraiser. |
| 247 | + |
| 248 | +""" |
| 249 | + |
| 250 | +class TotalAmountsReporting(DataReporting): |
| 251 | + |
| 252 | + def __init__(self): |
| 253 | + self.data = [] |
| 254 | + |
| 255 | + def run_query(self, start_time, end_time, query_name, descriptor): |
| 256 | + |
| 257 | + self.init_db() |
| 258 | + |
| 259 | + # Load the SQL File & Format |
| 260 | + filename = self._sql_path_ + query_name + '.sql' |
| 261 | + sql_stmnt = mh.read_sql(filename) |
| 262 | + sql_stmnt = QD.format_query(query_name + descriptor, sql_stmnt, [start_time, end_time]) |
| 263 | + |
| 264 | + labels = [None] * 21 |
| 265 | + labels[0] = 'clicks' |
| 266 | + labels[1] = 'donations' |
| 267 | + labels[2] = 'total amount' |
| 268 | + labels[3] = 'banner amount' |
| 269 | + labels[4] = 'US amount' |
| 270 | + labels[5] = 'EN amount' |
| 271 | + labels[6] = 'Other Amount' |
| 272 | + labels[7] = 'Email Amount' |
| 273 | + labels[8] = 'Recurring Guess' |
| 274 | + labels[9] = 'completion_rate' |
| 275 | + labels[10] = 'pp_clicks' |
| 276 | + labels[11] = 'pp_donations' |
| 277 | + labels[12] = 'pp_completion' |
| 278 | + labels[13] = 'pp_amount' |
| 279 | + labels[14] = 'pp_max_amount' |
| 280 | + labels[15] = 'cc_clicks' |
| 281 | + labels[16] = 'cc_donations' |
| 282 | + labels[17] = 'cc_completion' |
| 283 | + labels[18] = 'cc_amount' |
| 284 | + labels[19] = 'cc_max_amount' |
| 285 | + labels[20] = 'total_amt50' |
| 286 | + |
| 287 | + |
| 288 | + num_keys = len(labels) |
| 289 | + |
| 290 | + lists = list() |
| 291 | + for i in range(num_keys): |
| 292 | + lists.append(list()) |
| 293 | + |
| 294 | + # Composes the data for each banner |
| 295 | + try: |
| 296 | + err_msg = sql_stmnt |
| 297 | + self.cur.execute(sql_stmnt) |
| 298 | + |
| 299 | + # This query store records according to dates |
| 300 | + results = self.cur.fetchall() |
| 301 | + for row in results: |
| 302 | + for i in range(num_keys): |
| 303 | + lists[i].append(row[i+1]) |
| 304 | + |
| 305 | + except: |
| 306 | + self.db.rollback() |
| 307 | + sys.exit("Database Interface Exception:\n" + err_msg) |
| 308 | + |
| 309 | + self.close_db() |
| 310 | + |
| 311 | + # Only interested in amounts |
| 312 | + return [labels, lists] |
| 313 | + |
| 314 | + |
| 315 | + |
| 316 | + def gen_plot(self,x, y_lists, labels, title, xlabel, ylabel, ranges, subplot_index, fname): |
| 317 | + pylab.subplot(subplot_index) |
| 318 | + num_keys = len(y_lists) |
| 319 | + |
| 320 | + pylab.figure(num=None,figsize=[26,14]) |
| 321 | + line_types = ['b-o','g-o','r-o','c-o','m-o','k-o','b--o','g--o','r--o','c--o','m--o','k--o'] |
| 322 | + |
| 323 | + for i in range(num_keys): |
| 324 | + pylab.plot(x, y_lists[i], line_types[i]) |
| 325 | + |
| 326 | + pylab.grid() |
| 327 | + pylab.xlim(ranges[0], ranges[1]) |
| 328 | + pylab.legend(labels,loc=2) |
| 329 | + |
| 330 | + pylab.xlabel(xlabel) |
| 331 | + pylab.ylabel(ylabel) |
| 332 | + |
| 333 | + pylab.title(title) |
| 334 | + pylab.savefig(fname+'.png', format='png') |
| 335 | + |
| 336 | + |
| 337 | + |
| 338 | + def run_hr(self, type): |
| 339 | + |
| 340 | + |
| 341 | + # Current date & time |
| 342 | + now = datetime.datetime.now() |
| 343 | + #UTC = 8 |
| 344 | + #delta = datetime.timedelta(hours=UTC) |
| 345 | + #now = now + delta |
| 346 | + |
| 347 | + |
| 348 | + """ ESTABLISH THE START TIME TO PULL ANALYTICS - TS format=1, TS resolution=1 """ |
| 349 | + hours_back = 24 |
| 350 | + times = self.gen_date_strings(now, hours_back,1,1) |
| 351 | + |
| 352 | + start_time = times[0] |
| 353 | + end_time = times[1] |
| 354 | + |
| 355 | + print '\nGenerating analytics total amount for ' + str(hours_back) + ' hours back. The start and end times are: ' + start_time + ' - ' + end_time +' ... \n' |
| 356 | + |
| 357 | + # QUERY NAME |
| 358 | + query_name = 'report_total_amounts' |
| 359 | + |
| 360 | + |
| 361 | + # RUN BY HOUR |
| 362 | + descriptor = '_by_hr' |
| 363 | + return_val = self.run_query(start_time, end_time, query_name, descriptor) |
| 364 | + |
| 365 | + labels = return_val[0] # curve labels |
| 366 | + counts = return_val[1] # curve data - lists |
| 367 | + |
| 368 | + r = self.get_query_fields(labels, counts, type, start_time, end_time) |
| 369 | + labels = r[0] |
| 370 | + counts = r[1] |
| 371 | + title = r[2] |
| 372 | + ylabel = r[3] |
| 373 | + |
| 374 | + xlabel = 'Time - Hours' |
| 375 | + subplot_index = 111 |
| 376 | + |
| 377 | + # plot the curves |
| 378 | + time_range = range(len(counts[0])) |
| 379 | + for i in range(len(counts[0])): |
| 380 | + time_range[i] = time_range[i] - len(counts[0]) |
| 381 | + |
| 382 | + ranges = [min(time_range), max(time_range)] |
| 383 | + |
| 384 | + fname = query_name + descriptor + '_' + type |
| 385 | + self.gen_plot(time_range, counts, labels, title, xlabel, ylabel, ranges, subplot_index, fname) |
| 386 | + |
| 387 | + |
| 388 | + |
| 389 | + def run_day(self,type): |
| 390 | + |
| 391 | + # Current date & time |
| 392 | + now = datetime.datetime.now() |
| 393 | + #UTC = 8 |
| 394 | + #delta = datetime.timedelta(hours=UTC) |
| 395 | + #now = now + delta |
| 396 | + |
| 397 | + |
| 398 | + """ ESTABLISH THE START TIME TO PULL ANALYTICS - TS format=1, TS resolution=0 """ |
| 399 | + hours_back = 7 * 24 # 7 days back |
| 400 | + times = self.gen_date_strings(now, hours_back,1,0) |
| 401 | + |
| 402 | + start_time = times[0] |
| 403 | + end_time = times[1] |
| 404 | + |
| 405 | + print '\nGenerating analytics total amount for ' + str(days_back) + ' days back. The start and end times are: ' + start_time + ' - ' + end_time +' ... \n' |
| 406 | + |
| 407 | + |
| 408 | + # FORMAT HEADERS & QUERY NAME |
| 409 | + query_name = 'report_total_amounts' |
| 410 | + descriptor = '_by_day' |
| 411 | + return_val = self.run_query(start_time, end_time, query_name, descriptor) |
| 412 | + |
| 413 | + labels = return_val[0] |
| 414 | + counts = return_val[1] |
| 415 | + |
| 416 | + r = self.get_query_fields(labels, counts, type, start_time, end_time) |
| 417 | + labels = r[0] |
| 418 | + counts = r[1] |
| 419 | + title = r[2] |
| 420 | + ylabel = r[3] |
| 421 | + |
| 422 | + xlabel = 'Time - Days' |
| 423 | + subplot_index = 111 |
| 424 | + |
| 425 | + # Plot values |
| 426 | + time_range = range(len(counts[0])) |
| 427 | + for i in range(len(counts[0])): |
| 428 | + time_range[i] = time_range[i] - len(counts[0]) |
| 429 | + |
| 430 | + ranges = [min(time_range), max(time_range)] |
| 431 | + |
| 432 | + fname = query_name + descriptor + '_' + type |
| 433 | + self.gen_plot(time_range, counts, labels, title, xlabel, ylabel, ranges, subplot_index, fname) |
| 434 | + |
| 435 | + |
| 436 | + def get_query_fields(self, labels, counts, type, start_time, end_time): |
| 437 | + |
| 438 | + if type == 'BAN_EM': |
| 439 | + indices = range(2,9) |
| 440 | + title = 'Total Amounts: ' + start_time + ' -- ' + end_time |
| 441 | + ylabel = 'Amount' |
| 442 | + elif type == 'CC_PP_completion': |
| 443 | + indices = [12,17] |
| 444 | + title = 'Credit Card & Paypal Completion Rates: ' + start_time + ' -- ' + end_time |
| 445 | + ylabel = 'Rate' |
| 446 | + elif type == 'CC_PP_amount': |
| 447 | + indices = [13,18] |
| 448 | + title = 'Credit Card & Paypal Total Amounts: ' + start_time + ' -- ' + end_time |
| 449 | + ylabel = 'Amount' |
| 450 | + elif type == 'AMT_VS_AMT50': |
| 451 | + indices = [2,20] |
| 452 | + title = 'Amount50 and Amount Totals: ' + start_time + ' -- ' + end_time |
| 453 | + ylabel = 'Amount' |
| 454 | + else: |
| 455 | + sys.exit("Total Amounts: You must enter a valid report type.\n" ) |
| 456 | + |
| 457 | + # Exract relevant labels and values |
| 458 | + labels_temp = list() |
| 459 | + counts_temp = list() |
| 460 | + |
| 461 | + for i in range(len(labels)): |
| 462 | + if i in indices: |
| 463 | + labels_temp.append(labels[i]) |
| 464 | + counts_temp.append(counts[i]) |
| 465 | + |
| 466 | + return [labels_temp, counts_temp, title, ylabel] |
| 467 | + |
| 468 | + |
| 469 | +""" |
| 470 | + |
| 471 | +CLASS :: ^BannerLPReporting^ |
| 472 | + |
| 473 | +This subclass handles reporting on banners and landing pages for the fundraiser. |
| 474 | + |
| 475 | +""" |
| 476 | + |
| 477 | +class BannerLPReporting(DataReporting): |
| 478 | + |
| 479 | + |
| 480 | + def __init__(self, *args): |
| 481 | + |
| 482 | + if len(args) == 2: |
| 483 | + self.campaign = args[0] |
| 484 | + self.start_time = args[1] |
| 485 | + else: |
| 486 | + self.campaign = None |
| 487 | + self.start_time = None |
| 488 | + |
| 489 | + def run_query(self,start_time, end_time, campaign, query_name, metric_name): |
| 490 | + |
| 491 | + self.init_db() |
| 492 | + |
| 493 | + metric_lists = mh.AutoVivification() |
| 494 | + time_lists = mh.AutoVivification() |
| 495 | + # table_data = [] # store the results in a table for reporting |
| 496 | + |
| 497 | + # Load the SQL File & Format |
| 498 | + filename = self._sql_path_ + query_name + '.sql' |
| 499 | + sql_stmnt = mh.read_sql(filename) |
| 500 | + |
| 501 | + query_name = 'report_bannerLP_metrics' # rename query to work with query store |
| 502 | + sql_stmnt = QD.format_query(query_name, sql_stmnt, [start_time, end_time, campaign]) |
| 503 | + |
| 504 | + key_index = QD.get_banner_index(query_name) |
| 505 | + time_index = QD.get_time_index(query_name) |
| 506 | + metric_index = QD.get_metric_index(query_name, metric_name) |
| 507 | + |
| 508 | + # Composes the data for each banner |
| 509 | + try: |
| 510 | + err_msg = sql_stmnt |
| 511 | + self.cur.execute(sql_stmnt) |
| 512 | + |
| 513 | + results = self.cur.fetchall() |
| 514 | + |
| 515 | + # Compile Table Data |
| 516 | + # cpRow = self.listify(row) |
| 517 | + # table_data.append(cpRow) |
| 518 | + |
| 519 | + for row in results: |
| 520 | + |
| 521 | + key_name = row[key_index] |
| 522 | + |
| 523 | + try: |
| 524 | + metric_lists[key_name].append(row[metric_index]) |
| 525 | + time_lists[key_name].append(row[time_index]) |
| 526 | + except: |
| 527 | + metric_lists[key_name] = list() |
| 528 | + time_lists[key_name] = list() |
| 529 | + |
| 530 | + metric_lists[key_name].append(row[metric_index]) |
| 531 | + time_lists[key_name].append(row[time_index]) |
| 532 | + |
| 533 | + except: |
| 534 | + self.db.rollback() |
| 535 | + sys.exit("Database Interface Exception:\n" + err_msg) |
| 536 | + |
| 537 | + """ Convert Times to Integers """ |
| 538 | + # Find the earliest date |
| 539 | + max_i = 0 |
| 540 | + |
| 541 | + for key in time_lists.keys(): |
| 542 | + for date_str in time_lists[key]: |
| 543 | + day_int = int(date_str[8:10]) |
| 544 | + hr_int = int(date_str[11:13]) |
| 545 | + date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13]) |
| 546 | + if date_int > max_i: |
| 547 | + max_i = date_int |
| 548 | + max_day = day_int |
| 549 | + max_hr = hr_int |
| 550 | + |
| 551 | + |
| 552 | + # Normalize dates |
| 553 | + time_norm = mh.AutoVivification() |
| 554 | + for key in time_lists.keys(): |
| 555 | + for date_str in time_lists[key]: |
| 556 | + day = int(date_str[8:10]) |
| 557 | + hr = int(date_str[11:13]) |
| 558 | + # date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13]) |
| 559 | + elem = (day - max_day) * 24 + (hr - max_hr) |
| 560 | + try: |
| 561 | + time_norm[key].append(elem) |
| 562 | + except: |
| 563 | + time_norm[key] = list() |
| 564 | + time_norm[key].append(elem) |
| 565 | + |
| 566 | + # smooth out the values |
| 567 | + #window_length = 20 |
| 568 | + #for banner in metric_lists.keys(): |
| 569 | + # metric_lists[banner] = smooth(metric_lists[banner], window_length) |
| 570 | + |
| 571 | + self.close_db() |
| 572 | + |
| 573 | + # return [metric_lists, time_norm, table_data] |
| 574 | + return [metric_lists, time_norm] |
| 575 | + |
| 576 | + |
| 577 | + def gen_plot(self,counts, times, title, xlabel, ylabel, ranges, subplot_index, fname): |
| 578 | + pylab.subplot(subplot_index) |
| 579 | + pylab.figure(num=None,figsize=[26,14]) |
| 580 | + count_keys = counts.keys() |
| 581 | + |
| 582 | + line_types = ['b-o','g-o','r-o','c-o','m-o','k-o','y-o','b--d','g--d','r--d','c--d','m--d','k--d','y--d','b-.s','g-.s','r-.s','c-.s','m-.s','k-.s','y-.s'] |
| 583 | + |
| 584 | + count = 0 |
| 585 | + for key in counts.keys(): |
| 586 | + pylab.plot(times[key], counts[key], line_types[count]) |
| 587 | + count = count + 1 |
| 588 | + |
| 589 | + pylab.grid() |
| 590 | + pylab.xlim(ranges[0], ranges[1]) |
| 591 | + pylab.legend(count_keys,loc=2) |
| 592 | + |
| 593 | + pylab.xlabel(xlabel) |
| 594 | + pylab.ylabel(ylabel) |
| 595 | + |
| 596 | + pylab.title(title) |
| 597 | + pylab.savefig(fname, format='png') |
| 598 | + |
| 599 | + |
| 600 | + """ |
| 601 | + |
| 602 | + type = 'LP' || 'BAN' || 'BAN-TEST' || 'LP-TEST' |
| 603 | + |
| 604 | + """ |
| 605 | + def run(self, type, metric_name): |
| 606 | + |
| 607 | + # Current date & time |
| 608 | + now = datetime.datetime.now() |
| 609 | + #UTC = 8 |
| 610 | + #delta = datetime.timedelta(hours=UTC) |
| 611 | + #now = now + delta |
| 612 | + |
| 613 | + """ ESTABLISH THE START TIME TO PULL ANALYTICS - TS format=1, TS resolution=1 """ |
| 614 | + hours_back = 24 |
| 615 | + times = self.gen_date_strings(now, hours_back,1,1) |
| 616 | + |
| 617 | + start_time = times[0] |
| 618 | + end_time = times[1] |
| 619 | + |
| 620 | + print '\nGenerating ' + type +' for ' + str(hours_back) + ' hours back. The start and end times are: ' + start_time + ' - ' + end_time +' ... \n' |
| 621 | + |
| 622 | + if type == 'LP': |
| 623 | + query_name = 'report_LP_metrics' |
| 624 | + |
| 625 | + # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign |
| 626 | + if self.campaign == None: |
| 627 | + campaign = '[0-9](JA|SA|EA|TY)[0-9]' |
| 628 | + else: |
| 629 | + campaign = self.campaign |
| 630 | + |
| 631 | + title = metric_name + ': ' + start_time + ' -- ' + end_time |
| 632 | + fname = query_name + '_' + metric_name + '.png' |
| 633 | + elif type == 'BAN': |
| 634 | + query_name = 'report_banner_metrics' |
| 635 | + |
| 636 | + # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign |
| 637 | + if self.campaign == None: |
| 638 | + campaign = '[0-9](JA|SA|EA|TY)[0-9]' |
| 639 | + else: |
| 640 | + campaign = self.campaign |
| 641 | + |
| 642 | + title = metric_name + ': ' + start_time + ' -- ' + end_time |
| 643 | + fname = query_name + '_' + metric_name + '.png' |
| 644 | + elif type == 'BAN-TEST': |
| 645 | + r = self.get_latest_campaign() |
| 646 | + query_name = 'report_banner_metrics' |
| 647 | + |
| 648 | + # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign |
| 649 | + if self.campaign == None: |
| 650 | + campaign = r[0] |
| 651 | + start_time = r[1] |
| 652 | + else: |
| 653 | + campaign = self.campaign |
| 654 | + start_time = self.start_time |
| 655 | + |
| 656 | + title = metric_name + ': ' + start_time + ' -- ' + end_time + ', CAMPAIGN =' + campaign |
| 657 | + fname = query_name + '_' + metric_name + '_latest' + '.png' |
| 658 | + elif type == 'LP-TEST': |
| 659 | + r = self.get_latest_campaign() |
| 660 | + query_name = 'report_LP_metrics' |
| 661 | + |
| 662 | + # Set the campaign type - either a regular expression corresponding to a particular campaign or specific campaign |
| 663 | + if self.campaign == None: |
| 664 | + campaign = r[0] |
| 665 | + start_time = r[1] |
| 666 | + else: |
| 667 | + campaign = self.campaign |
| 668 | + start_time = self.start_time |
| 669 | + |
| 670 | + title = metric_name + ': ' + start_time + ' -- ' + end_time + ', CAMPAIGN =' + campaign |
| 671 | + fname = query_name + '_' + metric_name + '_latest' + '.png' |
| 672 | + else: |
| 673 | + sys.exit("Invalid type name - must be 'LP' or 'BAN'.") |
| 674 | + |
| 675 | + return_val = self.run_query(start_time, end_time, campaign, query_name, metric_name) |
| 676 | + metrics = return_val[0] |
| 677 | + times = return_val[1] |
| 678 | + |
| 679 | + # title = metric_name + ': ' + start_time + ' -- ' + end_time |
| 680 | + xlabel = 'Time - Hours' |
| 681 | + ylabel = metric_name |
| 682 | + subplot_index = 111 |
| 683 | + |
| 684 | + min_time = 99 |
| 685 | + for key in times.keys(): |
| 686 | + min_elem = min(times[key]) |
| 687 | + if min_elem < min_time: |
| 688 | + min_time = min_elem |
| 689 | + |
| 690 | + ranges = [min_time, 0] |
| 691 | + |
| 692 | + self.gen_plot(metrics, times, title, xlabel, ylabel, ranges, subplot_index, fname) |
| 693 | + |
| 694 | + return [metrics, times] |
| 695 | + |
| 696 | + |
| 697 | + def get_latest_campaign(self): |
| 698 | + |
| 699 | + query_name = 'report_latest_campaign' |
| 700 | + self.init_db() |
| 701 | + |
| 702 | + """ Look at campaigns over the past 24 hours - TS format=1, TS resolution=1 """ |
| 703 | + now = datetime.datetime.now() |
| 704 | + hours_back = 72 |
| 705 | + times = self.gen_date_strings(now, hours_back,1,1) |
| 706 | + |
| 707 | + sql_stmnt = mh.read_sql('./sql/report_latest_campaign.sql') |
| 708 | + sql_stmnt = QD.format_query(query_name, sql_stmnt, [times[0]]) |
| 709 | + |
| 710 | + campaign_index = QD.get_campaign_index(query_name) |
| 711 | + time_index = QD.get_time_index(query_name) |
| 712 | + |
| 713 | + try: |
| 714 | + err_msg = sql_stmnt |
| 715 | + self.cur.execute(sql_stmnt) |
| 716 | + |
| 717 | + row = self.cur.fetchone() |
| 718 | + except: |
| 719 | + self.db.rollback() |
| 720 | + sys.exit("Database Interface Exception:\n" + err_msg) |
| 721 | + |
| 722 | + campaign = row[campaign_index] |
| 723 | + timestamp = row[time_index] |
| 724 | + |
| 725 | + self.close_db() |
| 726 | + |
| 727 | + return [campaign, timestamp] |
| 728 | + |
| 729 | + """ |
| 730 | + |
| 731 | + Takes as input and converts it to a set of hours counting back from 0 |
| 732 | + |
| 733 | + time_lists - a dictionary of timestamp lists |
| 734 | + time_norm - a dictionary of normalized times |
| 735 | + |
| 736 | + """ |
| 737 | + def normalize_timestamps(self, time_lists): |
| 738 | + # Find the earliest date |
| 739 | + max_i = 0 |
| 740 | + |
| 741 | + for key in time_lists.keys(): |
| 742 | + for date_str in time_lists[key]: |
| 743 | + day_int = int(date_str[8:10]) |
| 744 | + hr_int = int(date_str[11:13]) |
| 745 | + date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13]) |
| 746 | + if date_int > max_i: |
| 747 | + max_i = date_int |
| 748 | + max_day = day_int |
| 749 | + max_hr = hr_int |
| 750 | + |
| 751 | + |
| 752 | + # Normalize dates |
| 753 | + time_norm = mh.AutoVivification() |
| 754 | + for key in time_lists.keys(): |
| 755 | + for date_str in time_lists[key]: |
| 756 | + day = int(date_str[8:10]) |
| 757 | + hr = int(date_str[11:13]) |
| 758 | + # date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13]) |
| 759 | + elem = (day - max_day) * 24 + (hr - max_hr) |
| 760 | + try: |
| 761 | + time_norm[key].append(elem) |
| 762 | + except: |
| 763 | + time_norm[key] = list() |
| 764 | + time_norm[key].append(elem) |
| 765 | + |
| 766 | + return time_norm |
| 767 | + |
| 768 | +""" |
| 769 | + |
| 770 | +CLASS :: ^MinerReporting^ |
| 771 | + |
| 772 | +This subclass handles reporting on raw values imported into the database. |
| 773 | + |
| 774 | +""" |
| 775 | + |
| 776 | +class MinerReporting(DataReporting): |
| 777 | + |
| 778 | + def run_query(self, start_time, end_time, query_name): |
| 779 | + |
| 780 | + self.init_db() |
| 781 | + |
| 782 | + counts = list() |
| 783 | + times = list() |
| 784 | + |
| 785 | + # Load the SQL File & Format |
| 786 | + filename = self._sql_path_+ query_name + '.sql' |
| 787 | + sql_stmnt = mh.read_sql(filename) |
| 788 | + |
| 789 | + sql_stmnt = QD.format_query(query_name, sql_stmnt, [start_time, end_time]) |
| 790 | + #print sql_stmnt |
| 791 | + |
| 792 | + # Get Indexes into Query |
| 793 | + count_index = QD.get_count_index(query_name) |
| 794 | + time_index = QD.get_time_index(query_name) |
| 795 | + |
| 796 | + # Composes the data for each banner |
| 797 | + try: |
| 798 | + err_msg = sql_stmnt |
| 799 | + self.cur.execute(sql_stmnt) |
| 800 | + |
| 801 | + results = self.cur.fetchall() |
| 802 | + |
| 803 | + for row in results: |
| 804 | + counts.append(row[count_index]) |
| 805 | + times.append(row[time_index]) |
| 806 | + |
| 807 | + except: |
| 808 | + self.db.rollback() |
| 809 | + sys.exit("Database Interface Exception:\n" + err_msg) |
| 810 | + |
| 811 | + """ Convert Times to Integers """ |
| 812 | + time_norm = self.normalize_timestamps(times) |
| 813 | + |
| 814 | + |
| 815 | + self.close_db() |
| 816 | + |
| 817 | + return [counts, time_norm] |
| 818 | + |
| 819 | + |
| 820 | + # Create histograms for hourly counts |
| 821 | + |
| 822 | + def gen_plot(self,counts, times, title, xlabel, ylabel, ranges, subplot_index, fname): |
| 823 | + |
| 824 | + pylab.subplot(subplot_index) |
| 825 | + pylab.figure(num=None,figsize=[26,14]) |
| 826 | + |
| 827 | + # pylab.plot(times, counts) |
| 828 | + # pylab.hist(counts, times) |
| 829 | + pylab.bar(times, counts, width=0.5) |
| 830 | + |
| 831 | + pylab.grid() |
| 832 | + pylab.xlim(ranges[0], ranges[1]) |
| 833 | + |
| 834 | + pylab.xlabel(xlabel) |
| 835 | + pylab.ylabel(ylabel) |
| 836 | + |
| 837 | + pylab.title(title) |
| 838 | + pylab.savefig(fname, format='png') |
| 839 | + |
| 840 | + def run(self, query_name): |
| 841 | + |
| 842 | + # Current date & time |
| 843 | + now = datetime.datetime.now() |
| 844 | + #UTC = 8 |
| 845 | + #delta = datetime.timedelta(hours=UTC) |
| 846 | + #now = now + delta |
| 847 | + |
| 848 | + """ ESTABLISH THE START TIME TO PULL ANALYTICS - TS format=1, TS resolution=1 """ |
| 849 | + hours_back = 24 |
| 850 | + times = self.gen_date_strings_hr(now, hours_back,1,1) |
| 851 | + |
| 852 | + start_time = times[0] |
| 853 | + end_time = times[1] |
| 854 | + |
| 855 | + print '\nGenerating ' + query_name +', start and end times are: ' + start_time + ' - ' + end_time +' ... \n' |
| 856 | + |
| 857 | + # Run Query |
| 858 | + return_val = self.run_query(start_time, end_time, query_name) |
| 859 | + counts = return_val[0] |
| 860 | + times = return_val[1] |
| 861 | + |
| 862 | + # Normalize times |
| 863 | + min_time = min(times) |
| 864 | + ranges = [min_time, 0] |
| 865 | + |
| 866 | + xlabel = 'Hours' |
| 867 | + subplot_index = 111 |
| 868 | + fname = query_name + '.png' |
| 869 | + |
| 870 | + title = QD.get_plot_title(query_name) |
| 871 | + title = title + ' -- ' + start_time + ' - ' + end_time |
| 872 | + ylabel = QD.get_plot_ylabel(query_name) |
| 873 | + |
| 874 | + # Convert counts to float (from Decimal) to prevent exception when bar plotting |
| 875 | + # Bbox::update_numerix_xy expected numerix array |
| 876 | + counts_new = list() |
| 877 | + for i in range(len(counts)): |
| 878 | + counts_new.append(float(counts[i])) |
| 879 | + counts = counts_new |
| 880 | + |
| 881 | + # Generate Histogram |
| 882 | + self.gen_plot(counts, times, title, xlabel, ylabel, ranges, subplot_index, fname) |
| 883 | + |
| 884 | + |
| 885 | +""" |
| 886 | + |
| 887 | +CLASS :: IntervalReporting |
| 888 | + |
| 889 | +Performs queries that take timestamps, query, and an interval as arguments. Data for a single metric |
| 890 | +is generated for each time interval in the time period defined by the start and end timestamps. |
| 891 | + |
| 892 | +Types of queries supported: |
| 893 | + |
| 894 | +report_banner_metrics_minutely |
| 895 | +report_LP_metrics_minutely |
| 896 | + |
| 897 | +""" |
| 898 | + |
| 899 | +class IntervalReporting(DataReporting): |
| 900 | + |
| 901 | + """ |
| 902 | + """ |
| 903 | + def __init__(self): |
| 904 | + self._data_loader_ = DL.IntervalReportingLoader() |
| 905 | + |
| 906 | + """ |
| 907 | + """ |
| 908 | + def usage(self): |
| 909 | + |
| 910 | + print 'Types of queries:' |
| 911 | + print ' (1) banner' |
| 912 | + print ' (2) LP' |
| 913 | + print '' |
| 914 | + print 'e.g.' |
| 915 | + print " run('20101230160400', '20101230165400', 2, 'banner', 'imp', '20101230JA091_US')" |
| 916 | + print " run('20101230160400', '20101230165400', 2, 'LP', 'views', '20101230JA091_US')" |
| 917 | + print '' |
| 918 | + |
| 919 | + return |
| 920 | + |
| 921 | + """ |
| 922 | + Execute reporting query and generate plots |
| 923 | + """ |
| 924 | + def gen_plot(self, metrics, times, title, xlabel, ylabel, ranges, subplot_index, fname): |
| 925 | + |
| 926 | + pylab.subplot(subplot_index) |
| 927 | + pylab.figure(num=None,figsize=[26,14]) |
| 928 | + |
| 929 | + line_types = ['b-o','g-o','r-o','c-o','m-o','k-o','y-o','b--d','g--d','r--d','c--d','m--d','k--d','y--d','b-.s','g-.s','r-.s','c-.s','m-.s','k-.s','y-.s'] |
| 930 | + |
| 931 | + count = 0 |
| 932 | + for key in metrics.keys(): |
| 933 | + pylab.step(times[key], metrics[key], line_types[count]) |
| 934 | + count = count + 1 |
| 935 | + |
| 936 | + pylab.grid() |
| 937 | + pylab.xlim(ranges[0], ranges[1]) |
| 938 | + pylab.ylim(ranges[2], ranges[3]) |
| 939 | + pylab.legend(metrics.keys(),loc=2) |
| 940 | + |
| 941 | + pylab.xlabel(xlabel) |
| 942 | + pylab.ylabel(ylabel) |
| 943 | + |
| 944 | + pylab.title(title) |
| 945 | + pylab.savefig(fname, format='png') |
| 946 | + |
| 947 | + |
| 948 | + """ |
| 949 | + Execute reporting query and generate plots |
| 950 | + """ |
| 951 | + def run(self, start_time, end_time, interval, query_type, metric_name, campaign): |
| 952 | + |
| 953 | + print '\nGenerating ' + query_type +', start and end times are: ' + start_time + ' - ' + end_time +' ... \n' |
| 954 | + |
| 955 | + """ Execute the query that generates interval reporting data """ |
| 956 | + return_val = self._data_loader_.run_query(start_time, end_time, interval, query_type, metric_name, campaign) |
| 957 | + counts = return_val[0] |
| 958 | + times = return_val[1] |
| 959 | + |
| 960 | + """ Convert Times to Integers that indicate relative times AND normalize the intervals in case any are missing """ |
| 961 | + for key in times.keys(): |
| 962 | + times[key] = TP.normalize_timestamps(times[key], False, 2) |
| 963 | + times[key], counts[key] = TP.normalize_intervals(times[key], counts[key], interval) |
| 964 | + |
| 965 | + # Normalize times |
| 966 | + min_time = min(times) |
| 967 | + ranges = [min_time, 0] |
| 968 | + |
| 969 | + xlabel = 'MINUTES' |
| 970 | + subplot_index = 111 |
| 971 | + fname = campaign + ' ' + query_type + ' ' + metric_name + '.png' |
| 972 | + |
| 973 | + metric_full_name = QD.get_metric_full_name(metric_name) |
| 974 | + title = campaign + ': ' + metric_full_name + ' -- ' + start_time + ' - ' + end_time |
| 975 | + ylabel = metric_full_name |
| 976 | + |
| 977 | + """ Determine List maximums """ |
| 978 | + times_max = 0 |
| 979 | + metrics_max = 0 |
| 980 | + |
| 981 | + for key in counts.keys(): |
| 982 | + list_max = max(counts[key]) |
| 983 | + if list_max > metrics_max: |
| 984 | + metrics_max = list_max |
| 985 | + |
| 986 | + for key in times.keys(): |
| 987 | + list_max = max(times[key]) |
| 988 | + if list_max > times_max: |
| 989 | + times_max = list_max |
| 990 | + |
| 991 | + ranges = list() |
| 992 | + ranges.append(0.0) |
| 993 | + ranges.append(times_max * 1.1) |
| 994 | + ranges.append(0.0) |
| 995 | + ranges.append(metrics_max * 1.1) |
| 996 | + |
| 997 | + """ Generate plots given data """ |
| 998 | + self.gen_plot(counts, times, title, xlabel, ylabel, ranges, subplot_index, fname) |
| 999 | + |
| 1000 | + |
Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/TimestampProcessor.py |
— | — | @@ -1,10 +1,39 @@ |
2 | 2 | |
3 | 3 | """ |
4 | 4 | |
| 5 | +This module effectively functions as a Singleton class. |
| 6 | + |
5 | 7 | TimestampProcesser module is used to provide definitions for dealing with date and time |
6 | 8 | objects. This is primarily used to handle |
7 | 9 | |
| 10 | +TimestampProcesser facilitates the processing of timestamps used in the CiviCRM and "faulkner" mySQL |
| 11 | +databases. This includes mapping among timestamp formats and converting those formats to indexed |
| 12 | +lists and dictionaries. |
8 | 13 | |
| 14 | +Examples of format definitions: |
| 15 | + |
| 16 | + format 1 - 20080101000606 |
| 17 | + format 2 - 2008-01-01 00:06:06 |
| 18 | + |
| 19 | +Examples of resolution definitions: |
| 20 | + |
| 21 | + resolution 0 - xxxx-xx-xx 00:00:00 |
| 22 | + resolution 1 - xxxx-xx-xx xx:00:00 |
| 23 | + resolution 2 - xxxx-xx-xx xx:xx:00 |
| 24 | + resolution 3 - xxxx-xx-xx xx:xx:xx |
| 25 | + |
| 26 | +METHODS: |
| 27 | + |
| 28 | + normalize_timestamps - Takes a list of timestamps as input and converts it to a set of days, hours, or minutes counting back from 0 |
| 29 | + timestamps_to_dict - Convert lists into dictionaries before processing it is assumed that lists are composed of only simple types |
| 30 | + find_latest_date_in_list - Find the latest time stamp in a list |
| 31 | + find_earliest_date_in_list - Find the earliest time stamp in a list |
| 32 | + gen_date_strings - Given a datetime object produce a timestamp a number of hours in the past and according to a particular format |
| 33 | + timestamp_from_obj - Convert datetime objects to a timestamp of a given format. |
| 34 | + timestamp_to_obj - Convert timestamp to a datetime object of a given format |
| 35 | + normalize_intervals - Inserts missing interval points into the time and metric lists |
| 36 | + timestamp_convert_format - Converts from one timestamp format to another timestamp format |
| 37 | + |
9 | 38 | """ |
10 | 39 | |
11 | 40 | __author__ = "Ryan Faulkner" |
— | — | @@ -15,402 +44,361 @@ |
16 | 45 | import sys |
17 | 46 | sys.path.append('../') |
18 | 47 | |
19 | | -import matplotlib |
20 | 48 | import datetime |
21 | | -import MySQLdb |
22 | | -import pylab |
23 | | -import HTML |
24 | 49 | import math |
25 | | - |
26 | 50 | import miner_help as mh |
27 | 51 | |
28 | | -matplotlib.use('Agg') |
29 | | - |
30 | | - |
31 | | - |
| 52 | + |
32 | 53 | """ |
| 54 | + |
| 55 | + Takes a list of timestamps as input and converts it to a set of days, hours, or minutes counting back from 0 |
| 56 | + |
| 57 | + INPUT: |
| 58 | + time_lists - a list of datetime objects |
| 59 | + count_back - indicate whether the list counts back from the end |
| 60 | + time_unit - an integer indicating what unit to measure time in (0 = day, 1 = hour, 2 = minute) |
| 61 | + |
| 62 | + RETURN: |
| 63 | + time_norm - a dictionary of normalized times |
| 64 | + |
| 65 | +""" |
| 66 | +def normalize_timestamps(time_lists, count_back, time_unit): |
| 67 | + |
| 68 | + time_lists, isList = timestamps_to_dict(time_lists) |
| 69 | + |
| 70 | + """ Depending on args set the start date """ |
| 71 | + if count_back: |
| 72 | + start_date_obj = find_latest_date_in_list(time_lists) |
| 73 | + else: |
| 74 | + start_date_obj = find_earliest_date_in_list(time_lists) |
| 75 | + |
| 76 | + start_day = start_date_obj.day |
| 77 | + start_hr = start_date_obj.hour |
| 78 | + start_mte = start_date_obj.minute |
| 79 | + |
| 80 | + # Normalize dates |
| 81 | + time_norm = mh.AutoVivification() |
| 82 | + for key in time_lists.keys(): |
| 83 | + for date_obj in time_lists[key]: |
| 84 | + |
| 85 | + day = date_obj.day |
| 86 | + hr = date_obj.hour |
| 87 | + mte = date_obj.minute |
| 88 | + |
| 89 | + if time_unit == 0: |
| 90 | + elem = (day - start_day) |
| 91 | + elif time_unit == 1: |
| 92 | + elem = (day - start_day) * 24 + (hr - start_hr) |
| 93 | + elif time_unit == 2: |
| 94 | + elem = (day - start_day) * 24 * 60 + (hr - start_hr) * 60 + (mte - start_mte) |
| 95 | + |
| 96 | + try: |
| 97 | + time_norm[key].append(elem) |
| 98 | + except: |
| 99 | + time_norm[key] = list() |
| 100 | + time_norm[key].append(elem) |
| 101 | + |
| 102 | + """ If the original argument was a list put it back in that form """ |
| 103 | + if isList: |
| 104 | + time_norm = time_norm[key] |
| 105 | + |
| 106 | + return time_norm |
| 107 | + |
| 108 | + |
| 109 | +""" |
33 | 110 | |
34 | | - TimestampProcesser facilitates the processing of timestamps used in the CiviCRM and "faulkner" mySQL |
35 | | - databases. This includes mapping among timestamp formats and converting those formats to indexed |
36 | | - lists and dictionaries. |
| 111 | + HELPER METHOD for normalize_timestamps. Convert lists into dictionaries before processing it is assumed that lists |
| 112 | + are composed of only simple types |
37 | 113 | |
38 | | - Examples of format definitions: |
39 | | - |
40 | | - format 1 - 20080101000606 |
41 | | - format 2 - 2008-01-01 00:06:06 |
| 114 | + INPUT: |
| 115 | + time_lists - a list of datetime objects |
42 | 116 | |
43 | | - Examples of resolution definitions: |
44 | | - |
45 | | - resolution 0 - xxxx-xx-xx 00:00:00 |
46 | | - resolution 1 - xxxx-xx-xx xx:00:00 |
47 | | - resolution 2 - xxxx-xx-xx xx:xx:00 |
48 | | - resolution 3 - xxxx-xx-xx xx:xx:xx |
49 | | - |
50 | | - METHODS: |
51 | | - |
52 | | - normalize_timestamps - Takes a list of timestamps as input and converts it to a set of days, hours, or minutes counting back from 0 |
53 | | - timestamps_to_dict - Convert lists into dictionaries before processing it is assumed that lists are composed of only simple types |
54 | | - find_latest_date_in_list - Find the latest time stamp in a list |
55 | | - find_earliest_date_in_list - Find the earliest time stamp in a list |
56 | | - gen_date_strings - Given a datetime object produce a timestamp a number of hours in the past and according to a particular format |
57 | | - timestamp_from_obj - Convert datetime objects to a timestamp of a given format. |
58 | | - timestamp_to_obj - Convert timestamp to a datetime object of a given format |
59 | | - normalize_intervals - Inserts missing interval points into the time and metric lists |
60 | | - timestamp_convert_format - Converts from one timestamp format to another timestamp format |
61 | | - |
| 117 | + RETURN: |
| 118 | + time_lists - dictionary with a single key 'key' that stores the list |
| 119 | + isList - a dictionary of normalized times |
| 120 | + |
62 | 121 | """ |
63 | | -class TimestampProcesser(object): |
| 122 | +def timestamps_to_dict(time_lists): |
64 | 123 | |
65 | | - """ |
66 | | - |
67 | | - Takes a list of timestamps as input and converts it to a set of days, hours, or minutes counting back from 0 |
| 124 | + isList = 0 |
| 125 | + if type(time_lists) is list: |
| 126 | + isList = 1 |
68 | 127 | |
69 | | - INPUT: |
70 | | - time_lists - a list of datetime objects |
71 | | - count_back - indicate whether the list counts back from the end |
72 | | - time_unit - an integer indicating what unit to measure time in (0 = day, 1 = hour, 2 = minute) |
| 128 | + old_list = time_lists |
| 129 | + time_lists = mh.AutoVivification() |
73 | 130 | |
74 | | - RETURN: |
75 | | - time_norm - a dictionary of normalized times |
76 | | - |
77 | | - """ |
78 | | - def normalize_timestamps(self, time_lists, count_back, time_unit): |
| 131 | + key = 'key' |
| 132 | + time_lists[key] = list() |
79 | 133 | |
80 | | - time_lists, isList = self.timestamps_to_dict(time_lists) |
| 134 | + for i in range(len(old_list)): |
| 135 | + time_lists[key].append(old_list[i]) |
| 136 | + |
| 137 | + return [time_lists, isList] |
| 138 | + |
| 139 | + |
| 140 | +""" |
| 141 | + |
| 142 | + HELPER METHOD for normalize_timestamps. Find the latest time stamp in a list |
| 143 | + |
| 144 | + INPUT: |
| 145 | + time_lists - a list of datetime objects |
81 | 146 | |
82 | | - """ Depending on args set the start date """ |
83 | | - if count_back: |
84 | | - start_date_obj = self.find_latest_date_in_list(time_lists) |
85 | | - else: |
86 | | - start_date_obj = self.find_earliest_date_in_list(time_lists) |
| 147 | + RETURN: |
| 148 | + date_max - datetime object of the latest date in the list |
87 | 149 | |
88 | | - start_day = start_date_obj.day |
89 | | - start_hr = start_date_obj.hour |
90 | | - start_mte = start_date_obj.minute |
91 | | - |
92 | | - # Normalize dates |
93 | | - time_norm = mh.AutoVivification() |
94 | | - for key in time_lists.keys(): |
95 | | - for date_obj in time_lists[key]: |
| 150 | +""" |
| 151 | +def find_latest_date_in_list(time_lists): |
| 152 | + |
| 153 | + date_max = datetime.datetime(1000,1,1,0,0,0) |
| 154 | + |
| 155 | + for key in time_lists.keys(): |
| 156 | + for date_obj in time_lists[key]: |
| 157 | + if date_int > date_min: |
| 158 | + date_min = date_obj |
96 | 159 | |
97 | | - day = date_obj.day |
98 | | - hr = date_obj.hour |
99 | | - mte = date_obj.minute |
100 | | - |
101 | | - if time_unit == 0: |
102 | | - elem = (day - start_day) |
103 | | - elif time_unit == 1: |
104 | | - elem = (day - start_day) * 24 + (hr - start_hr) |
105 | | - elif time_unit == 2: |
106 | | - elem = (day - start_day) * 24 * 60 + (hr - start_hr) * 60 + (mte - start_mte) |
107 | | - |
108 | | - try: |
109 | | - time_norm[key].append(elem) |
110 | | - except: |
111 | | - time_norm[key] = list() |
112 | | - time_norm[key].append(elem) |
| 160 | + return date_max |
| 161 | + |
| 162 | +""" |
| 163 | + |
| 164 | + HELPER METHOD for normalize_timestamps. Find the earliest timestamp in a list |
| 165 | + |
| 166 | + INPUT: |
| 167 | + time_lists - a list of datetime objects |
113 | 168 | |
114 | | - """ If the original argument was a list put it back in that form """ |
115 | | - if isList: |
116 | | - time_norm = time_norm[key] |
117 | | - |
118 | | - return time_norm |
| 169 | + RETURN: |
| 170 | + date_min - datetime object of the earliest date in the list |
119 | 171 | |
| 172 | +""" |
| 173 | +def find_earliest_date_in_list(time_lists): |
120 | 174 | |
121 | | - """ |
| 175 | + date_min = datetime.datetime(3000,1,1,0,0,0) |
122 | 176 | |
123 | | - HELPER METHOD for normalize_timestamps. Convert lists into dictionaries before processing it is assumed that lists |
124 | | - are composed of only simple types |
125 | | - |
126 | | - INPUT: |
127 | | - time_lists - a list of datetime objects |
128 | | - |
129 | | - RETURN: |
130 | | - time_lists - dictionary with a single key 'key' that stores the list |
131 | | - isList - a dictionary of normalized times |
132 | | - |
133 | | - """ |
134 | | - def timestamps_to_dict(self, time_lists): |
135 | | - |
136 | | - isList = 0 |
137 | | - if type(time_lists) is list: |
138 | | - isList = 1 |
139 | | - |
140 | | - old_list = time_lists |
141 | | - time_lists = mh.AutoVivification() |
142 | | - |
143 | | - key = 'key' |
144 | | - time_lists[key] = list() |
145 | | - |
146 | | - for i in range(len(old_list)): |
147 | | - time_lists[key].append(old_list[i]) |
| 177 | + for key in time_lists.keys(): |
| 178 | + for date_obj in time_lists[key]: |
| 179 | + if date_obj < date_min: |
| 180 | + date_min = date_obj |
| 181 | + |
| 182 | + return date_min |
| 183 | + |
| 184 | + |
| 185 | +""" |
| 186 | + |
| 187 | + Given a datetime object produce a timestamp a number of hours in the past and according to a particular format |
148 | 188 | |
149 | | - return [time_lists, isList] |
| 189 | + format 1 - 20080101000606 |
| 190 | + format 2 - 2008-01-01 00:06:06 |
150 | 191 | |
151 | | - |
152 | | - """ |
| 192 | + INPUT: |
153 | 193 | |
154 | | - HELPER METHOD for normalize_timestamps. Find the latest time stamp in a list |
155 | | - |
156 | | - INPUT: |
157 | | - time_lists - a list of datetime objects |
158 | | - |
159 | | - RETURN: |
160 | | - date_max - datetime object of the latest date in the list |
161 | | - |
162 | | - """ |
163 | | - def find_latest_date_in_list(self, time_lists): |
164 | | - |
165 | | - date_max = datetime.datetime(1000,1,1,0,0,0) |
166 | | - |
167 | | - for key in time_lists.keys(): |
168 | | - for date_obj in time_lists[key]: |
169 | | - if date_int > date_min: |
170 | | - date_min = date_obj |
171 | | - |
172 | | - return date_max |
| 194 | + now - datetime object |
| 195 | + hours_back - the amount of time the |
| 196 | + format - the format of the returned timestamp strings |
| 197 | + resolution - the resolution detail of the timestamp (e.g. down to the minute, down to the hour, ...) |
173 | 198 | |
174 | | - """ |
175 | 199 | |
176 | | - HELPER METHOD for normalize_timestamps. Find the earliest timestamp in a list |
| 200 | + RETURN: |
| 201 | + start_time - formatted datetime string |
| 202 | + end_time - formatted datetime string |
| 203 | + |
| 204 | +""" |
| 205 | +def gen_date_strings(time_ref, hours_back, format, resolution): |
| 206 | + |
| 207 | + delta = datetime.timedelta(hours=-hours_back) |
| 208 | + |
| 209 | + time_obj = time_ref + delta |
| 210 | + time_ref = time_ref + datetime.timedelta(hours=-1) # Move an hour back to terminate at 55 minute |
| 211 | + |
| 212 | + # Cast the start and end time strings in the proper format |
| 213 | + start_time = timestamp_from_obj(time_obj, format, resolution) |
| 214 | + end_time = timestamp_from_obj(time_ref, format, resolution) |
| 215 | + |
| 216 | + return [start_time, end_time] |
| 217 | + |
| 218 | + |
| 219 | + |
| 220 | +""" |
| 221 | + |
| 222 | + Convert datetime objects to a timestamp of a given format. HELPER METHOD for gen_date_strings. |
177 | 223 | |
178 | | - INPUT: |
179 | | - time_lists - a list of datetime objects |
180 | | - |
181 | | - RETURN: |
182 | | - date_min - datetime object of the earliest date in the list |
183 | | - |
184 | | - """ |
185 | | - def find_earliest_date_in_list(self, time_lists): |
186 | | - |
187 | | - date_min = datetime.datetime(3000,1,1,0,0,0) |
188 | | - |
189 | | - for key in time_lists.keys(): |
190 | | - for date_obj in time_lists[key]: |
191 | | - if date_obj < date_min: |
192 | | - date_min = date_obj |
193 | | - |
194 | | - return date_min |
195 | | - |
| 224 | + INPUT: |
196 | 225 | |
197 | | - """ |
| 226 | + time_obj - datetime object |
| 227 | + format - the format of the returned timestamp strings |
| 228 | + resolution - the resolution detail of the timestamp (e.g. down to the minute, down to the hour, ...) |
198 | 229 | |
199 | | - Given a datetime object produce a timestamp a number of hours in the past and according to a particular format |
| 230 | + |
| 231 | + RETURN: |
| 232 | + start_time - formatted datetime string |
| 233 | + end_time - formatted datetime string |
| 234 | + |
| 235 | +""" |
| 236 | +def timestamp_from_obj(time_obj, format, resolution): |
| 237 | + |
| 238 | + if time_obj.month < 10: |
| 239 | + month = '0' + str(time_obj.month) |
| 240 | + else: |
| 241 | + month = str(time_obj.month) |
| 242 | + |
| 243 | + if time_obj.day < 10: |
| 244 | + day = '0' + str(time_obj.day) |
| 245 | + else: |
| 246 | + day = str(time_obj.day) |
| 247 | + |
| 248 | + if time_obj.hour < 10: |
| 249 | + hour = '0' + str(time_obj.hour) |
| 250 | + else: |
| 251 | + hour = str(time_obj.hour) |
200 | 252 | |
201 | | - format 1 - 20080101000606 |
202 | | - format 2 - 2008-01-01 00:06:06 |
| 253 | + if time_obj.minute < 10: |
| 254 | + minute = '0' + str(time_obj.minute) |
| 255 | + else: |
| 256 | + minute = str(time_obj.minute) |
203 | 257 | |
204 | | - INPUT: |
| 258 | + if time_obj.second < 10: |
| 259 | + second = '0' + str(time_obj.second) |
| 260 | + else: |
| 261 | + second = str(time_obj.second) |
205 | 262 | |
206 | | - now - datetime object |
207 | | - hours_back - the amount of time the |
208 | | - format - the format of the returned timestamp strings |
209 | | - resolution - the resolution detail of the timestamp (e.g. down to the minute, down to the hour, ...) |
| 263 | + # Cast the start and end time strings in the proper format |
| 264 | + if format == 1: |
210 | 265 | |
| 266 | + if resolution == 0: |
| 267 | + timestamp = str(time_obj.year) + month + day + '000000' |
| 268 | + elif resolution == 1: |
| 269 | + timestamp = str(time_obj.year) + month + day + hour + '0000' |
| 270 | + elif resolution == 2: |
| 271 | + timestamp = str(time_obj.year) + month + day + hour + minute + '00' |
| 272 | + elif resolution == 3: |
| 273 | + timestamp = str(time_obj.year) + month + day + hour + minute + second |
| 274 | + |
| 275 | + elif format == 2: |
211 | 276 | |
212 | | - RETURN: |
213 | | - start_time - formatted datetime string |
214 | | - end_time - formatted datetime string |
215 | | - |
216 | | - """ |
217 | | - def gen_date_strings(self, time_ref, hours_back, format, resolution): |
218 | | - |
219 | | - delta = datetime.timedelta(hours=-hours_back) |
| 277 | + if resolution == 0: |
| 278 | + timestamp = str(time_obj.year) + '-' + month + '-' + day + ' ' + '00:00:00' |
| 279 | + elif resolution == 1: |
| 280 | + timestamp = str(time_obj.year) + '-' + month + '-' + day + ' ' + hour + ':00:00' |
| 281 | + elif resolution == 2: |
| 282 | + timestamp = str(time_obj.year) + '-' + month + '-' + day + ' ' + hour + ':' + minute + ':00' |
| 283 | + elif resolution == 3: |
| 284 | + timestamp = str(time_obj.year) + '-' + month + '-' + day + ' ' + hour + ':' + minute + ':' + second |
| 285 | + |
| 286 | + return timestamp |
220 | 287 | |
221 | | - time_obj = time_ref + delta |
222 | | - time_ref = time_ref + datetime.timedelta(hours=-1) # Move an hour back to terminate at 55 minute |
223 | | - |
224 | | - # Cast the start and end time strings in the proper format |
225 | | - start_time = self.timestamp_from_obj(time_obj, format, resolution) |
226 | | - end_time = self.timestamp_from_obj(time_ref, format, resolution) |
227 | 288 | |
228 | | - return [start_time, end_time] |
| 289 | +""" |
| 290 | + |
| 291 | + Convert timestamp to a datetime object of a given format |
229 | 292 | |
| 293 | + INPUT: |
230 | 294 | |
| 295 | + timestamp - timestamp string |
| 296 | + format - the format of the returned timestamp strings |
231 | 297 | |
232 | | - """ |
233 | 298 | |
234 | | - Convert datetime objects to a timestamp of a given format. HELPER METHOD for gen_date_strings. |
235 | | - |
236 | | - INPUT: |
237 | | - |
238 | | - time_obj - datetime object |
239 | | - format - the format of the returned timestamp strings |
240 | | - resolution - the resolution detail of the timestamp (e.g. down to the minute, down to the hour, ...) |
241 | | - |
242 | | - |
243 | | - RETURN: |
244 | | - start_time - formatted datetime string |
245 | | - end_time - formatted datetime string |
| 299 | + RETURN: |
| 300 | + time_obj - datetime conversion of timestamp string |
| 301 | + |
| 302 | +""" |
| 303 | +def timestamp_to_obj(timestamp, format): |
246 | 304 | |
247 | | - """ |
248 | | - def timestamp_from_obj(self, time_obj, format, resolution): |
| 305 | + if format == 1: |
| 306 | + time_obj = datetime.datetime(int(timestamp[0:4]), int(timestamp[4:6]), int(timestamp[6:8]), \ |
| 307 | + int(timestamp[8:10]), int(timestamp[10:12]), int(timestamp[12:14])) |
249 | 308 | |
250 | | - if time_obj.month < 10: |
251 | | - month = '0' + str(time_obj.month) |
252 | | - else: |
253 | | - month = str(time_obj.month) |
| 309 | + elif format == 2: |
| 310 | + time_obj = datetime.datetime(int(timestamp[0:4]), int(timestamp[5:7]), int(timestamp[8:10]), \ |
| 311 | + int(timestamp[11:13]), int(timestamp[14:16]), int(timestamp[17:19])) |
254 | 312 | |
255 | | - if time_obj.day < 10: |
256 | | - day = '0' + str(time_obj.day) |
257 | | - else: |
258 | | - day = str(time_obj.day) |
| 313 | + return time_obj |
259 | 314 | |
260 | | - if time_obj.hour < 10: |
261 | | - hour = '0' + str(time_obj.hour) |
262 | | - else: |
263 | | - hour = str(time_obj.hour) |
264 | | - |
265 | | - if time_obj.minute < 10: |
266 | | - minute = '0' + str(time_obj.minute) |
267 | | - else: |
268 | | - minute = str(time_obj.minute) |
269 | | - |
270 | | - if time_obj.second < 10: |
271 | | - second = '0' + str(time_obj.second) |
272 | | - else: |
273 | | - second = str(time_obj.second) |
274 | | - |
275 | | - # Cast the start and end time strings in the proper format |
276 | | - if format == 1: |
277 | | - |
278 | | - if resolution == 0: |
279 | | - timestamp = str(time_obj.year) + month + day + '000000' |
280 | | - elif resolution == 1: |
281 | | - timestamp = str(time_obj.year) + month + day + hour + '0000' |
282 | | - elif resolution == 2: |
283 | | - timestamp = str(time_obj.year) + month + day + hour + minute + '00' |
284 | | - elif resolution == 3: |
285 | | - timestamp = str(time_obj.year) + month + day + hour + minute + second |
286 | | - |
287 | | - elif format == 2: |
288 | | - |
289 | | - if resolution == 0: |
290 | | - timestamp = str(time_obj.year) + '-' + month + '-' + day + ' ' + '00:00:00' |
291 | | - elif resolution == 1: |
292 | | - timestamp = str(time_obj.year) + '-' + month + '-' + day + ' ' + hour + ':00:00' |
293 | | - elif resolution == 2: |
294 | | - timestamp = str(time_obj.year) + '-' + month + '-' + day + ' ' + hour + ':' + minute + ':00' |
295 | | - elif resolution == 3: |
296 | | - timestamp = str(time_obj.year) + '-' + month + '-' + day + ' ' + hour + ':' + minute + ':' + second |
297 | | - |
298 | | - return timestamp |
| 315 | + |
| 316 | +""" |
| 317 | + |
| 318 | + Inserts missing interval points into the time and metric lists |
299 | 319 | |
| 320 | + Assumptions: |
| 321 | + _metrics_ and _times_ are lists of the same length |
| 322 | + there must be a data point at each interval |
| 323 | + Some data points may be missed |
| 324 | + where there is no metric data the metric takes on the value 0.0 |
300 | 325 | |
301 | | - """ |
| 326 | + e.g. when _interval_ = 10 |
| 327 | + times = [0 10 30 50], metrics = [1 1 1 1] ==> [0 10 30 40 50], [1 1 0 1 0 1] |
302 | 328 | |
303 | | - Convert timestamp to a datetime object of a given format |
304 | | - |
305 | | - INPUT: |
306 | | - |
307 | | - timestamp - timestamp string |
308 | | - format - the format of the returned timestamp strings |
309 | | - |
310 | | - |
311 | | - RETURN: |
312 | | - time_obj - datetime conversion of timestamp string |
313 | | - |
314 | | - """ |
315 | | - def timestamp_to_obj(self, timestamp, format): |
316 | | - |
317 | | - if format == 1: |
318 | | - time_obj = datetime.datetime(int(timestamp[0:4]), int(timestamp[4:6]), int(timestamp[6:8]), \ |
319 | | - int(timestamp[8:10]), int(timestamp[10:12]), int(timestamp[12:14])) |
320 | | - |
321 | | - elif format == 2: |
322 | | - time_obj = datetime.datetime(int(timestamp[0:4]), int(timestamp[5:7]), int(timestamp[8:10]), \ |
323 | | - int(timestamp[11:13]), int(timestamp[14:16]), int(timestamp[17:19])) |
| 329 | + INPUT: |
324 | 330 | |
325 | | - return time_obj |
| 331 | + times - |
| 332 | + metrics - |
| 333 | + interval - |
326 | 334 | |
| 335 | + RETURN: |
| 336 | + new_times - |
| 337 | + new_metrics - |
327 | 338 | |
328 | | - """ |
| 339 | +""" |
| 340 | +def normalize_intervals(times, metrics, interval): |
329 | 341 | |
330 | | - Inserts missing interval points into the time and metric lists |
| 342 | + current_time = 0.0 |
| 343 | + index = 0 |
| 344 | + iterations = 0 |
| 345 | + max_elems = math.ceil((times[-1] - times[0]) / interval) # there should be no more elements in the list than this |
| 346 | + |
| 347 | + new_times = list() |
| 348 | + new_metrics = list() |
| 349 | + |
| 350 | + """ Iterate through the time list """ |
| 351 | + while index < len(times): |
331 | 352 | |
332 | | - Assumptions: |
333 | | - _metrics_ and _times_ are lists of the same length |
334 | | - there must be a data point at each interval |
335 | | - Some data points may be missed |
336 | | - where there is no metric data the metric takes on the value 0.0 |
| 353 | + """ TEMPORARY SOLUTION: break out of the loop if more than the maximum number of elements is reached """ |
| 354 | + if iterations > max_elems: |
| 355 | + break; |
337 | 356 | |
338 | | - e.g. when _interval_ = 10 |
339 | | - times = [0 10 30 50], metrics = [1 1 1 1] ==> [0 10 30 40 50], [1 1 0 1 0 1] |
| 357 | + new_times.append(current_time) |
340 | 358 | |
341 | | - INPUT: |
| 359 | + """ If the current time is not in the current list then add it and a metric value of 0.0 |
| 360 | + otherwise add the existing elements to the new lists """ |
| 361 | + if current_time != times[index]: |
| 362 | + new_metrics.append(0.0) |
342 | 363 | |
343 | | - times - |
344 | | - metrics - |
345 | | - interval - |
| 364 | + else: |
| 365 | + new_metrics.append(metrics[index]) |
| 366 | + index = index + 1 |
346 | 367 | |
347 | | - RETURN: |
348 | | - new_times - |
349 | | - new_metrics - |
| 368 | + current_time = current_time + interval |
350 | 369 | |
351 | | - """ |
352 | | - def normalize_intervals(self, times, metrics, interval): |
| 370 | + iterations = iterations + 1 |
353 | 371 | |
354 | | - current_time = 0.0 |
355 | | - index = 0 |
356 | | - iterations = 0 |
357 | | - max_elems = math.ceil((times[-1] - times[0]) / interval) # there should be no more elements in the list than this |
| 372 | + return [new_times, new_metrics] |
| 373 | + |
| 374 | +""" |
| 375 | + |
| 376 | + Converts from one timestamp format to another timestamp format |
358 | 377 | |
359 | | - new_times = list() |
360 | | - new_metrics = list() |
| 378 | + format 1 - 20080101000606 |
| 379 | + format 2 - 2008-01-01 00:06:06 |
361 | 380 | |
362 | | - """ Iterate through the time list """ |
363 | | - while index < len(times): |
364 | | - |
365 | | - """ TEMPORARY SOLUTION: break out of the loop if more than the maximum number of elements is reached """ |
366 | | - if iterations > max_elems: |
367 | | - break; |
368 | | - |
369 | | - new_times.append(current_time) |
370 | | - |
371 | | - """ If the current time is not in the current list then add it and a metric value of 0.0 |
372 | | - otherwise add the existing elements to the new lists """ |
373 | | - if current_time != times[index]: |
374 | | - new_metrics.append(0.0) |
375 | | - |
376 | | - else: |
377 | | - new_metrics.append(metrics[index]) |
378 | | - index = index + 1 |
379 | | - |
380 | | - current_time = current_time + interval |
381 | | - |
382 | | - iterations = iterations + 1 |
383 | | - |
384 | | - return [new_times, new_metrics] |
| 381 | + INPUT: |
| 382 | + |
| 383 | + ts - timestamp string |
| 384 | + format_from - input format |
| 385 | + format_to - output format |
| 386 | + |
| 387 | + RETURN: |
| 388 | + |
| 389 | + new_timestamp - new timestamp string |
| 390 | + |
385 | 391 | |
386 | | - """ |
| 392 | +""" |
| 393 | +def timestamp_convert_format(ts, format_from, format_to): |
387 | 394 | |
388 | | - Converts from one timestamp format to another timestamp format |
389 | | - |
390 | | - format 1 - 20080101000606 |
391 | | - format 2 - 2008-01-01 00:06:06 |
392 | | - |
393 | | - INPUT: |
| 395 | + if format_from == 1: |
394 | 396 | |
395 | | - ts - timestamp string |
396 | | - format_from - input format |
397 | | - format_to - output format |
398 | | - |
399 | | - RETURN: |
400 | | - |
401 | | - new_timestamp - new timestamp string |
402 | | - |
403 | | - |
404 | | - """ |
405 | | - def timestamp_convert_format(self, ts, format_from, format_to): |
406 | | - |
407 | | - if format_from == 1: |
| 397 | + if format_to == 2: |
| 398 | + new_timestamp = ts[0:4] + '-' + ts[4:6] + '-' + ts[6:8] + ' ' + ts[8:10] + ':' + ts[10:12] + ':' + ts[12:14] |
408 | 399 | |
409 | | - if format_to == 2: |
410 | | - new_timestamp = ts[0:4] + '-' + ts[4:6] + '-' + ts[6:8] + ' ' + ts[8:10] + ':' + ts[10:12] + ':' + ts[12:14] |
411 | | - |
412 | | - elif format_from == 2: |
413 | | - if format_to == 1: |
414 | | - new_timestamp = ts[0:4] + ts[5:7] + ts[8:10] + ts[11:13] + ts[14:16] + ts[15:17] |
415 | | - |
416 | | - return new_timestamp |
417 | | - |
| 400 | + elif format_from == 2: |
| 401 | + if format_to == 1: |
| 402 | + new_timestamp = ts[0:4] + ts[5:7] + ts[8:10] + ts[11:13] + ts[14:16] + ts[15:17] |
| 403 | + |
| 404 | + return new_timestamp |
| 405 | + |