r87443 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r87442‎ | r87443 | r87444 >
Date:20:11, 4 May 2011
Author:rfaulk
Status:deferred
Tags:
Comment:
- generalized import paths to 'Fundraiser_Tools'
- Added 'TableLoader' and 'TestTableLoader' classes for handling table specific queries
- Added logic to 'BannerLPReportingLoader' (although this may be removed entirely)
- Added query_artifacts method for new query types in 'CampaignReportingLoader'
Modified paths:
  • /trunk/fundraiser-statistics/fundraiser-scripts/classes/DataLoader.py (modified) (history)

Diff [purge]

Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/DataLoader.py
@@ -26,6 +26,7 @@
2727 import Fundraiser_Tools.miner_help as mh
2828 import Fundraiser_Tools.classes.QueryData as QD
2929 import Fundraiser_Tools.classes.TimestampProcessor as TP
 30+import Fundraiser_Tools.classes.Helper as Hlp
3031
3132 """
3233
@@ -177,8 +178,11 @@
178179 except KeyError:
179180 print >> sys.stderr, 'Could not find a query for type: ' + query_type
180181 sys.exit(2)
181 -
 182+
 183+
182184
 185+
 186+
183187 class IntervalReportingLoader(DataLoader):
184188
185189 def __init__(self):
@@ -364,11 +368,140 @@
365369 """
366370 class BannerLPReportingLoader(DataLoader):
367371
368 - def run_query(self):
369 - return
 372+ def __init__(self):
 373+ self._query_names_['LP'] = 'report_LP_metrics'
 374+ self._query_names_['BAN'] = 'report_banner_metrics'
 375+ self._query_names_['BAN-TEST'] = 'report_banner_metrics'
 376+ self._query_names_['LP-TEST'] = 'report_LP_metrics'
 377+
 378+ """
 379+ <description>
 380+
 381+ INPUT:
 382+
 383+ RETURN:
 384+
 385+ """
 386+ def run_query(self,start_time, end_time, campaign, query_name, metric_name):
 387+
 388+ self.init_db()
 389+
 390+ metric_lists = mh.AutoVivification()
 391+ time_lists = mh.AutoVivification()
 392+ # table_data = [] # store the results in a table for reporting
 393+
 394+ # Load the SQL File & Format
 395+ filename = self._sql_path_ + query_name + '.sql'
 396+ sql_stmnt = mh.read_sql(filename)
 397+
 398+ sql_stmnt = QD.format_query(query_name, sql_stmnt, [start_time, end_time, campaign])
 399+
 400+ key_index = QD.get_key_index(query_name)
 401+ time_index = QD.get_time_index(query_name)
 402+ metric_index = QD.get_metric_index(query_name, metric_name)
 403+
 404+ """ Composes the data for each banner or LP """
 405+ try:
 406+ err_msg = sql_stmnt
 407+ self.cur.execute(sql_stmnt)
 408+
 409+ results = self.cur.fetchall()
 410+
 411+ for row in results:
 412+
 413+ key_name = row[key_index]
 414+
 415+ try:
 416+ metric_lists[key_name].append(row[metric_index])
 417+ time_lists[key_name].append(row[time_index])
 418+ except:
 419+ metric_lists[key_name] = list()
 420+ time_lists[key_name] = list()
 421+
 422+ metric_lists[key_name].append(row[metric_index])
 423+ time_lists[key_name].append(row[time_index])
 424+
 425+ except:
 426+ self.db.rollback()
 427+ sys.exit("Database Interface Exception:\n" + err_msg)
 428+
 429+ """ Convert Times to Integers """
 430+ max_i = 0
 431+
 432+ for key in time_lists.keys():
 433+ for date_str in time_lists[key]:
 434+ day_int = int(date_str[8:10])
 435+ hr_int = int(date_str[11:13])
 436+ date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13])
 437+ if date_int > max_i:
 438+ max_i = date_int
 439+ max_day = day_int
 440+ max_hr = hr_int
 441+
 442+
 443+ # Normalize dates
 444+ time_norm = mh.AutoVivification()
 445+ for key in time_lists.keys():
 446+ for date_str in time_lists[key]:
 447+ day = int(date_str[8:10])
 448+ hr = int(date_str[11:13])
 449+ # date_int = int(date_str[0:4]+date_str[5:7]+date_str[8:10]+date_str[11:13])
 450+ elem = (day - max_day) * 24 + (hr - max_hr)
 451+ try:
 452+ time_norm[key].append(elem)
 453+ except:
 454+ time_norm[key] = list()
 455+ time_norm[key].append(elem)
370456
 457+
 458+ self.close_db()
 459+
 460+ return [metric_lists, time_norm]
371461
372462
 463+ """ !! MOVE INTO DATA LOADER!!
 464+
 465+ <description>
 466+
 467+ INPUT:
 468+
 469+ RETURN:
 470+
 471+ """
 472+ def get_latest_campaign(self):
 473+
 474+ query_name = 'report_latest_campaign'
 475+ self.init_db()
 476+
 477+ """ Look at campaigns over the past 24 hours - TS format=1, TS resolution=1 """
 478+ now = datetime.datetime.now()
 479+ hours_back = 72
 480+ times = self.gen_date_strings(now, hours_back,1,1)
 481+
 482+ sql_stmnt = mh.read_sql('./sql/report_latest_campaign.sql')
 483+ sql_stmnt = QD.format_query(query_name, sql_stmnt, [times[0]])
 484+
 485+ campaign_index = QD.get_campaign_index(query_name)
 486+ time_index = QD.get_time_index(query_name)
 487+
 488+ try:
 489+ err_msg = sql_stmnt
 490+ self.cur.execute(sql_stmnt)
 491+
 492+ row = self.cur.fetchone()
 493+ except:
 494+ self.db.rollback()
 495+ sys.exit("Database Interface Exception:\n" + err_msg)
 496+
 497+ campaign = row[campaign_index]
 498+ timestamp = row[time_index]
 499+
 500+ self.close_db()
 501+
 502+ return [campaign, timestamp]
 503+
 504+
 505+
373506 class HypothesisTestLoader(DataLoader):
374507
375508 """
@@ -471,6 +604,8 @@
472605 def __init__(self):
473606 self._query_names_['totals'] = 'report_campaign_totals'
474607 self._query_names_['times'] = 'report_campaign_times'
 608+ self._query_names_['banners'] = 'report_campaign_banners'
 609+ self._query_names_['lps'] = 'report_campaign_lps'
475610
476611 """
477612 !! MODIFY -- use python reflection !! ... maybe
@@ -484,9 +619,13 @@
485620
486621 self.init_db()
487622
 623+ data = None
 624+
488625 if query_type == 'totals':
489626 data = self.query_totals(query_type, params)
490 -
 627+ elif query_type == 'banners' or query_type == 'lps':
 628+ data = self.query_artifacts(query_type, params)
 629+
491630 self.close_db()
492631
493632 return data
@@ -495,6 +634,8 @@
496635
497636 Handle queries from "report_campaign_totals"
498637
 638+ Gets metric totals for campaigns
 639+
499640 """
500641 def query_totals(self, query_type, params):
501642
@@ -539,9 +680,57 @@
540681
541682
542683 return data
 684+
 685+ """
 686+
 687+ Handle queries from "report_campaign_banners" and "report_campaign_lps"
 688+
 689+ Gets a list of banners and landing pages running on the campaign in a time frame
 690+
 691+ """
 692+ def query_artifacts(self, query_type, params):
 693+
 694+ """ Resolve parameters """
 695+ utm_campaign = params['utm_campaign']
 696+ start_time = params['start_time']
 697+ end_time = params['end_time']
 698+
 699+ query_name = self.get_sql_filename_for_query(query_type)
 700+
 701+ """ Load the SQL File & Format """
 702+ filename = self._sql_path_+ query_name + '.sql'
 703+ sql_stmnt = mh.read_sql(filename)
 704+ sql_stmnt = QD.format_query(query_name, sql_stmnt, [start_time, end_time, utm_campaign])
 705+
 706+ """ Get Indexes into Query """
 707+ key_index = QD.get_key_index(query_name)
 708+
 709+ data = list()
 710+
 711+ """ Compose the data for each separate donor pipeline artifact """
 712+ try:
 713+
 714+ self._cur_.execute(sql_stmnt)
 715+
 716+ results = self._cur_.fetchall()
 717+
 718+ for row in results:
 719+ data.append(row[key_index])
 720+ # key_name = row[key_index]
 721+
 722+ except Exception as inst:
 723+ print type(inst) # the exception instance
 724+ print inst.args # arguments stored in .args
 725+ print inst # __str__ allows args to printed directly
 726+
 727+ self._db_.rollback()
 728+ sys.exit(0)
543729
544730
 731+ return data
545732
 733+
 734+
546735 """
547736
548737 CLASS :: TTestLoaderHelp
@@ -588,4 +777,87 @@
589778 self._db_.close()
590779
591780 return p
 781+
 782+"""
 783+
 784+ CLASS :: TableLoader
 785+
 786+ Provides data access particular to the t-test
 787+
 788+ METHODS:
 789+ init_db -
 790+ close_db -
 791+"""
 792+class TableLoader(DataLoader):
 793+
 794+ def insert_row(self):
 795+ return
 796+
 797+ def delete_row(self):
 798+ return
 799+
 800+ def update_row(self):
 801+ return
 802+
 803+"""
 804+
 805+ CLASS :: TestTableLoader
 806+
 807+ storage3.pmtpa.wmnet.faulkner.test:
 808+
 809+ +---------------+---------------+------+-----+---------------------+-----------------------------+
 810+ | Field | Type | Null | Key | Default | Extra |
 811+ +---------------+---------------+------+-----+---------------------+-----------------------------+
 812+ | utm_campaign | varchar(128) | NO | PRI | NULL | |
 813+ | start_time | timestamp | NO | | CURRENT_TIMESTAMP | on update CURRENT_TIMESTAMP |
 814+ | end_time | timestamp | NO | | 0000-00-00 00:00:00 | |
 815+ | winner | varchar(128) | YES | | NULL | |
 816+ | is_conclusive | binary(1) | YES | | NULL | |
 817+ | html_report | varchar(2000) | YES | | NULL | |
 818+ +---------------+---------------+------+-----+---------------------+-----------------------------+
 819+
 820+ METHODS:
 821+
 822+"""
 823+class TestTableLoader(TableLoader):
 824+
 825+ def insert_row(self, **kwargs):
 826+
 827+ insert_stmnt = 'insert into test values '
 828+
 829+ winner = 'NULL'
 830+ is_conclusive = 'NULL'
 831+ html_report = 'NULL'
 832+
 833+
 834+ for key in kwargs:
 835+ if key == 'utm_campaign':
 836+ utm_campaign = Hlp.stringify(kwargs[key])
 837+ elif key == 'start_time':
 838+ start_time = Hlp.stringify(kwargs[key])
 839+ elif key == 'end_time':
 840+ end_time = Hlp.stringify(kwargs[key])
 841+ elif key == 'winner':
 842+ winner = kwargs[key]
 843+ elif key == 'is_conclusive':
 844+ is_conclusive = kwargs[key]
 845+ elif key == 'html_report':
 846+ html_report = kwargs[key]
 847+
 848+ insert_stmnt = insert_stmnt + '(' + utm_campaign + ',' + start_time + ',' + end_time + ',' + winner + ',' + is_conclusive + ',' + html_report + ')'
 849+
 850+ try:
 851+ self._cur_.execute(insert_stmnt)
 852+ except:
 853+ self._db_.rollback()
 854+ self._db_.close()
 855+ sys.exit('Could not execute: ' + insert_stmnt)
 856+
 857+ return
 858+
 859+ def delete_row(self):
 860+ return
 861+
 862+ def update_row(self):
 863+ return
592864
\ No newline at end of file

Status & tagging log