r86931 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r86930‎ | r86931 | r86932 >
Date:07:31, 26 April 2011
Author:rfaulk
Status:deferred
Tags:
Comment:
added methods to the DataLoader base class that allows new keys to be composed or parsed from existing ones.
Modified paths:
  • /trunk/fundraiser-statistics/fundraiser-scripts/classes/DataLoader.py (modified) (history)

Diff [purge]

Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/DataLoader.py
@@ -16,9 +16,12 @@
1717
1818
1919 import sys
 20+sys.path.append('../')
 21+
2022 import MySQLdb
2123 import math
2224 import datetime
 25+import re # regular expression matching
2326
2427 import miner_help as mh
2528 import QueryData as QD
@@ -55,26 +58,127 @@
5659 self._db_.close()
5760
5861 """
59 - <DESCRIPTION>
 62+ Make a new key-entry based on the search key and action. Take all keys containing the search_key
 63+ and compose a new key with action.
6064
 65+ NOTE: this method will fail if
 66+
6167 INPUT:
62 - query_type -
 68+ data_dict -
 69+ search_strings - list of substrings
 70+ action - one of {'+', '-', '*', '/'}, specifies the operation to compose the new key list
 71+
 72+ RETURN:
 73+ new_data_dict -
 74+ """
 75+ def compose_key(self, data_dict, search_strings, new_key, action):
 76+
 77+ new_data_dict = dict()
 78+ new_list = list()
 79+
 80+ for key in data_dict.keys():
 81+ for str in search_strings:
6382
 83+ if re.search(str, key):
 84+ if len(new_list) == 0:
 85+ new_list = data_dict[key]
 86+ else:
 87+ if action == '+':
 88+
 89+ """ catch any errors """
 90+ try:
 91+ for i in range(len(new_list)):
 92+ new_list[i] = new_list[i] + data_dict[key][i]
 93+ except IndexError as e:
 94+ print >> sys.stderr, e.msg
 95+ break;
 96+
 97+ new_data_dict[key] = data_dict[key]
6498
 99+ new_data_dict[new_key] = new_list
 100+
 101+ return new_data_dict
 102+
 103+ """
 104+ Only include keys from data_dict that are not matched on strings in search_strings.
 105+
 106+ INPUT:
 107+ data_dict -
 108+ search_strings - list of substrings
 109+ action - one of {'+', '-', '*', '/'}, specifies the operation to compose the new key list
 110+
65111 RETURN:
66 - -
 112+ new_data_dict -
 113+ -
 114+ """
 115+ def include_keys(self, data_dict, search_strings):
 116+
 117+ new_data_dict = dict()
 118+
 119+ for key in data_dict.keys():
 120+ for str in search_strings:
 121+ """ is the key a super-string of any of the strings in search_strings """
 122+ if re.search(str, key):
 123+ new_data_dict[key] = data_dict[key]
 124+
 125+ return new_data_dict
 126+
 127+ """
 128+ Remove all keys from data_dict that are not matched on strings in search_strings.
 129+
 130+ INPUT:
 131+ data_dict -
 132+ search_strings - list of substrings
 133+ action - one of {'+', '-', '*', '/'}, specifies the operation to compose the new key list
 134+
 135+ RETURN:
 136+ new_data_dict -
 137+ -
 138+ """
 139+ def exclude_keys(self, data_dict, search_strings):
 140+
 141+ new_data_dict = dict()
 142+ regExp = ''
 143+
 144+ for str in search_strings:
 145+ regExp = regExp + '(' + str + ')|'
67146
 147+ regExp = regExp[:-1]
 148+
 149+ for key in data_dict.keys():
 150+ if not(re.search(regExp, key)):
 151+ new_data_dict[key] = data_dict[key]
 152+
 153+ return new_data_dict
 154+
 155+
68156 """
 157+ Return a specific query name given a query type
 158+
 159+ INPUT:
 160+ query_type -
 161+
 162+ RETURN:
 163+ query_name -
 164+
 165+ """
69166 def get_sql_filename_for_query(self, query_type):
70 - return ''
71167
 168+ try:
 169+ query_name = self.get_sql_filename_for_query(query_type)
 170+ except KeyError:
 171+ print >> sys.stderr, 'Could not find a query for type: ' + query_type
 172+ sys.exit(2)
 173+
 174+ return query_name
 175+
72176 class IntervalReportingLoader(DataLoader):
73177
74178 def __init__(self):
75 - self._query_names_['banner'] = 'report_banner_metrics_minutely'
76 - self._query_names_['LP'] = 'report_LP_metrics_minutely'
77 - self._query_names_['campaign'] = 'report_campaign_metrics_minutely'
78 - self._query_names_['campaign_total'] = 'report_campaign_metrics_minutely_total'
 179+ self._query_names_['banner'] = 'report_banner_metrics_minutely'
 180+ self._query_names_['LP'] = 'report_LP_metrics_minutely'
 181+ self._query_names_['campaign'] = 'report_campaign_metrics_minutely'
 182+ self._query_names_['campaign_total'] = 'report_campaign_metrics_minutely_total'
79183
80184 def get_sql_filename_for_query(self, query_type):
81185 return self._query_names_[query_type]
@@ -99,15 +203,10 @@
100204
101205 self.init_db()
102206
103 - try:
104 - query_name = self.get_sql_filename_for_query(query_type)
105 - except KeyError:
106 - print 'Could not find a query for type: ' + query_type
107 - sys.exit(2)
 207+ query_name = self.get_sql_filename_for_query(query_type)
108208
109209 metrics = mh.AutoVivification()
110210 times = mh.AutoVivification()
111 - times_norm = mh.AutoVivification()
112211
113212 """ Compose datetime objects to represent the first and last intervals """
114213 start_time_obj = TP.timestamp_to_obj(start_time, 1)
@@ -134,7 +233,7 @@
135234
136235 """ Compose the data for each separate donor pipeline artifact """
137236 try:
138 - err_msg = sql_stmnt
 237+ # err_msg = sql_stmnt
139238 self._cur_.execute(sql_stmnt)
140239
141240 results = self._cur_.fetchall()

Status & tagging log