r85705 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r85704‎ | r85705 | r85706 >
Date:00:34, 9 April 2011
Author:rfaulk
Status:deferred
Tags:
Comment:
Built out DataLoader class to handle connections to MySQL (eventually this will handle queries as well). Modified documentation to class files to standardization.
Modified paths:
  • /trunk/fundraiser-statistics/fundraiser-scripts/classes/DataLoader.py (added) (history)
  • /trunk/fundraiser-statistics/fundraiser-scripts/classes/TimestampProcessor.py (modified) (history)
  • /trunk/fundraiser-statistics/fundraiser-scripts/classes/compute_confidence.py (modified) (history)
  • /trunk/fundraiser-statistics/fundraiser-scripts/classes/fundraiser_reporting.py (modified) (history)
  • /trunk/fundraiser-statistics/fundraiser-scripts/classes/query_store.py (modified) (history)

Diff [purge]

Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/compute_confidence.py
@@ -2,57 +2,47 @@
33
44 """
55
6 -compute_confidence.py
7 -
8 -wikimediafoundation.org
9 -Ryan Faulkner
10 -January 11th, 2011
11 -
12 -
136 Generates confidence estimate for a test
147
158
16 -class ConfidenceTesting
17 -
189 """
1910
 11+__author__ = "Ryan Faulkner"
 12+__revision__ = "$Rev$"
 13+__date__ = "January 11th, 2011"
 14+
2015 import sys
 16+sys.path.append('../')
 17+
2118 import math
2219 import datetime as dt
2320 import MySQLdb
2421 import pylab
25 -
2622 import matplotlib
27 -matplotlib.use('Agg')
2823
2924 import miner_help as mh
3025 import query_store as qs
 26+import DataLoader as DL
3127
32 -class ConfidenceTest(object):
 28+matplotlib.use('Agg')
3329
34 - # Database and Cursor objects
35 - db = None
36 - cur = None
37 -
38 - def init_db(self):
39 - """ Establish connection """
40 - #db = MySQLdb.connect(host='db10.pmtpa.wmnet', user='rfaulk', db='faulkner')
41 - self.db = MySQLdb.connect(host='127.0.0.1', user='rfaulk', db='faulkner', port=3307)
42 - #self.db = MySQLdb.connect(host='storage3.pmtpa.wmnet', user='rfaulk', db='faulkner')
4330
44 - """ Create cursor """
45 - self.cur = self.db.cursor()
 31+"""
4632
47 - def close_db(self):
48 - self.cur.close()
49 - self.db.close()
50 -
 33+ CLASS :: ConfidenceTest
 34+
 35+
 36+ METHODS:
 37+
 38+
5139
 40+"""
 41+class ConfidenceTest(DataLoader):
 42+
 43+
5244 """
5345 ConfidenceTesting :: query_tables
5446 """
55 - # query_name = 'report_banner_confidence'
56 - # metric_name = 'don_per_imp'
5747 def query_tables(self, query_name, metric_name, campaign, item_1, item_2, start_time, end_time, interval, num_samples):
5848
5949 ret = self.get_time_lists(start_time, end_time, interval, num_samples)
@@ -60,7 +50,7 @@
6151 times_indices = ret[1]
6252
6353 self.init_db()
64 - query_obj = qs.query_store()
 54+ query_obj = qs.QueryStore()
6555
6656 filename = './sql/' + query_name + '.sql'
6757 sql_stmnt = mh.read_sql(filename)
@@ -211,7 +201,7 @@
212202 """
213203 def run_test(self, test_name, query_name, metric_name, campaign, item_1, item_2, start_time, end_time, interval, num_samples):
214204
215 - query_obj = qs.query_store()
 205+ query_obj = qs.QueryStore()
216206
217207 """ Retrieve values from database """
218208 ret = self.query_tables(query_name, metric_name, campaign, item_1, item_2, start_time, end_time, interval, num_samples)
@@ -226,22 +216,7 @@
227217 std_devs_1 = ret[2]
228218 std_devs_2 = ret[3]
229219 confidence = ret[4]
230 -
231 - """ Pad data with beginning and end points """
232 - # times_indices.insert(len(times_indices), math.ceil(times_indices[-1]))
233 - # times_indices.insert(0, 0)
234 -
235 - # means_1.insert(len(means_1),means_1[-1])
236 - # means_2.insert(len(means_2),means_2[-1])
237 - # means_1.insert(0,means_1[0])
238 - # means_2.insert(0,means_2[0])
239 -
240 - # std_devs_1.insert(len(std_devs_1),0)
241 - # std_devs_2.insert(len(std_devs_2),0)
242 - # std_devs_1.insert(0,0)
243 - # std_devs_2.insert(0,0)
244220
245 -
246221 """ plot the results """
247222 xlabel = 'Hours'
248223 subplot_index = 111
Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/fundraiser_reporting.py
@@ -2,13 +2,6 @@
33
44 """
55
6 -fundraiser_reporting.py
7 -
8 -wikimediafoundation.org
9 -Ryan Faulkner
10 -December 16th, 2010
11 -
12 -
136 Pulls data from storage3.faulkner and generates plots.
147
158
@@ -32,6 +25,7 @@
3326 import query_store as qs
3427 import miner_help as mh
3528 import TimestampProcesser as TP
 29+import DataLoader as DL
3630
3731 matplotlib.use('Agg')
3832
@@ -39,46 +33,33 @@
4034
4135 """
4236
43 -CLASS :: ^FundraiserReporting^
44 -
45 -Base class for reporting fundraiser analytics. Methods that are intended to be extended in derived classes include:
46 -
47 -run_query() - format and execute the query to obtain data
48 -gen_plot() - plots the results of the report
49 -write_to_html_table() - writes the results to an HTML table
50 -run()
51 -
52 -"""
53 -class FundraiserReporting(TP.TimestampProcesser):
54 -
55 - # Database and Cursor objects
56 - db = None
57 - cur = None
 37+ CLASS :: FundraiserReporting
5838
59 - def init_db(self):
60 - """ Establish connection """
61 - #db = MySQLdb.connect(host='db10.pmtpa.wmnet', user='rfaulk', db='faulkner')
62 - self.db = MySQLdb.connect(host='127.0.0.1', user='rfaulk', db='faulkner', port=3307)
63 - #self.db = MySQLdb.connect(host='storage3.pmtpa.wmnet', user='rfaulk', db='faulkner')
64 -
65 - """ Create cursor """
66 - self.cur = self.db.cursor()
 39+ Base class for reporting fundraiser analytics. Methods that are intended to be extended in derived classes include:
6740
68 - def close_db(self):
69 - self.cur.close()
70 - self.db.close()
 41+ METHODS:
7142
 43+ run_query - format and execute the query to obtain data
 44+ gen_plot - plots the results of the report
 45+ write_to_html_table - writes the results to an HTML table
 46+ run
 47+
 48+"""
 49+class FundraiserReporting(TP.TimestampProcesser, DL.DataLoader):
7250
73 -
74 -
7551 """
7652
77 - def smooth::
 53+ Smooths a list of values
 54+
 55+ INPUT:
 56+ values - a list of datetime objects
 57+ window_length - indicate whether the list counts back from the end
 58+
 59+ RETURN:
 60+ new_values - list of smoothed values
7861
79 - Smooths a list of values
80 -
8162 """
82 - def smooth(values, window_length):
 63+ def smooth(self, values, window_length):
8364
8465 window_length = int(math.floor(window_length / 2))
8566
@@ -101,12 +82,18 @@
10283
10384 """
10485
105 - workaround for issue with tuple objects in HTML.py
106 - MySQLdb returns unfamiliar tuple elements from its fetchall method
107 - this is probably a version problem since the issue popped up in 2.5 but not 2.6
 86+ workaround for issue with tuple objects in HTML.py
 87+ MySQLdb returns unfamiliar tuple elements from its fetchall() method
 88+ this is probably a version problem since the issue popped up in 2.5 but not 2.6
 89+
 90+ INPUT:
 91+ row - row object returned from MySQLdb.fetchall()
 92+
 93+ RETURN:
 94+ l - a list of tuple objects from the db
10895
10996 """
110 - def listify(row):
 97+ def listify(self, row):
11198 l = []
11299 for i in row:
113100 l.append(i)
@@ -115,36 +102,62 @@
116103
117104 """
118105
119 - To be overloaded by subclasses for specific types of queries
 106+ To be overloaded by subclasses for specific types of queries
 107+
 108+ INPUT:
 109+ values - a list of datetime objects
 110+ window_length - indicate whether the list counts back from the end
 111+
 112+ RETURN:
 113+ return_status - integer, 0 indicates un-exceptional execution
120114
121115 """
122116 def run_query(self, start_time, end_time, query_name, metric_name):
123 - return
 117+ return 0
124118
125119
126120 """
127121
128 - To be overloaded by subclasses for different plotting behaviour
 122+ To be overloaded by subclasses for different plotting behaviour
 123+
 124+ INPUT:
 125+ values - a list of datetime objects
 126+ window_length - indicate whether the list counts back from the end
 127+
 128+ RETURN:
 129+ return_status - integer, 0 indicates un-exceptional execution
129130
130131 """
131132 def gen_plot(self,x, y_lists, labels, title, xlabel, ylabel, subplot_index, fname):
132 - return
 133+ return 0
133134
134135 """
135136
136 - To be overloaded by subclasses for writing tables - this functionality currently exists outside of this class structure (test_reporting.py)
 137+ To be overloaded by subclasses for writing tables - this functionality currently exists outside of this class structure (test_reporting.py)
 138+
 139+ INPUT:
 140+ values - a list of datetime objects
 141+ window_length - indicate whether the list counts back from the end
 142+
 143+ RETURN:
 144+ return_status - integer, 0 indicates un-exceptional execution
137145
138146 """
139147 def write_to_html_table(self):
140 - return
 148+ return 0
141149
142150
143151
144152 """
145153
146 - The access point of FundraiserReporting and derived objects. Will be used for executing and orchestrating the creation of plots, tables etc.
147 - To be overloaded by subclasses
148 -
 154+ The access point of FundraiserReporting and derived objects. Will be used for executing and orchestrating the creation of plots, tables etc.
 155+ To be overloaded by subclasses
 156+
 157+ INPUT:
 158+
 159+ RETURN:
 160+ return_status - integer, 0 indicates un-exceptional execution
 161+
149162 """
150163 def run(self):
151164 return
@@ -168,7 +181,7 @@
169182
170183 self.init_db()
171184
172 - query_obj = qs.query_store()
 185+ query_obj = qs.QueryStore()
173186
174187 # Load the SQL File & Format
175188 filename = './sql/' + query_name + '.sql'
@@ -404,7 +417,7 @@
405418
406419 self.init_db()
407420
408 - query_obj = qs.query_store()
 421+ query_obj = qs.QueryStore()
409422
410423 metric_lists = mh.AutoVivification()
411424 time_lists = mh.AutoVivification()
@@ -620,7 +633,7 @@
621634 hours_back = 72
622635 times = self.gen_date_strings_hr(now, hours_back)
623636
624 - query_obj = qs.query_store()
 637+ query_obj = qs.QueryStore()
625638 sql_stmnt = mh.read_sql('./sql/report_latest_campaign.sql')
626639 sql_stmnt = query_obj.format_query(query_name, sql_stmnt, [times[0]])
627640
@@ -696,7 +709,7 @@
697710
698711 self.init_db()
699712
700 - query_obj = qs.query_store()
 713+ query_obj = qs.QueryStore()
701714
702715 counts = list()
703716 times = list()
@@ -754,10 +767,11 @@
755768 pylab.ylabel(ylabel)
756769
757770 pylab.title(title)
758 - pylab.savefig(fname, format='png'
 771+ pylab.savefig(fname, format='png')
 772+
759773 def run(self, query_name):
760774
761 - query_obj = qs.query_store()
 775+ query_obj = qs.QueryStore()
762776
763777 # Current date & time
764778 now = datetime.datetime.now()
@@ -822,7 +836,7 @@
823837
824838 self.init_db()
825839
826 - query_obj = qs.query_store()
 840+ query_obj = qs.QueryStore()
827841
828842 metrics = mh.AutoVivification()
829843 times = mh.AutoVivification()
@@ -948,7 +962,7 @@
949963 """
950964 def run(self, start_time, end_time, interval, query_name, metric_name, campaign):
951965
952 - query_obj = qs.query_store()
 966+ query_obj = qs.QueryStore()
953967
954968 print '\nGenerating ' + query_name +', start and end times are: ' + start_time + ' - ' + end_time +' ... \n'
955969
Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/DataLoader.py
@@ -0,0 +1,36 @@
 2+
 3+__author__ = "Ryan Faulkner"
 4+__revision__ = "$Rev$"
 5+__date__ = "April 8th, 2010"
 6+
 7+
 8+import sys
 9+import MySQLdb
 10+
 11+
 12+"""
 13+
 14+ CLASS :: DataLoader
 15+
 16+ METHODS:
 17+ init_db -
 18+ close_db -
 19+"""
 20+class DataLoader(object):
 21+
 22+ # Database and Cursor objects
 23+ db = None
 24+ cur = None
 25+
 26+ def init_db(self):
 27+ """ Establish connection """
 28+ #db = MySQLdb.connect(host='db10.pmtpa.wmnet', user='rfaulk', db='faulkner')
 29+ self.db = MySQLdb.connect(host='127.0.0.1', user='rfaulk', db='faulkner', port=3307)
 30+ #self.db = MySQLdb.connect(host='storage3.pmtpa.wmnet', user='rfaulk', db='faulkner')
 31+
 32+ """ Create cursor """
 33+ self.cur = self.db.cursor()
 34+
 35+ def close_db(self):
 36+ self.cur.close()
 37+ self.db.close()
\ No newline at end of file
Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/query_store.py
@@ -1,20 +1,41 @@
22
33 """
44
5 -query_store.py
 5+Class that contains and organizes query info. Depends on the contents of ../sql/ where filenames are
 6+coupled with query_name parameters
67
7 -wikimediafoundation.org
8 -Ryan Faulkner
9 -November 28th, 2010
 8+"""
109
 10+__author__ = "Ryan Faulkner"
 11+__revision__ = "$Rev$"
 12+__date__ = "November 28th, 2010"
1113
12 -Class that contains and organizes query info
1314
 15+"""
1416
 17+ CLASS :: QueryStore
 18+
 19+
 20+
 21+ METHODS:
 22+
 23+ format_query
 24+ get_query
 25+ get_query_header
 26+ get_key_index
 27+ get_count_index
 28+ get_time_index
 29+ get_campaign_index
 30+ get_banner_index
 31+ get_landing_page_index
 32+ get_metric_index
 33+ get_plot_title
 34+ get_plot_ylabel
 35+ get_metric_full_name
 36+
1537 """
 38+class QueryStore:
1639
17 -class query_store:
18 -
1940 def format_query(self, query_name, sql_stmnt, args):
2041
2142 if query_name == 'report_campaign_ecomm':
@@ -378,7 +399,6 @@
379400 return -1
380401 else:
381402 return 'no such table'
382 -
383403
384404 def get_plot_title(self, query_name):
385405 if query_name == 'report_banner_impressions_by_hour':
Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/TimestampProcessor.py
@@ -22,7 +22,6 @@
2323 import HTML
2424 import math
2525
26 -import query_store as qs
2726 import miner_help as mh
2827
2928 matplotlib.use('Agg')
@@ -115,7 +114,7 @@
116115 time_lists - a list of datetime objects
117116
118117 RETURN:
119 - time_lists -
 118+ time_lists - dictionary with a single key 'key' that stores the list
120119 isList - a dictionary of normalized times
121120
122121 """
@@ -184,23 +183,23 @@
185184
186185 """
187186
188 - Given a datetime object produce a timestamp a number of hours in the past and according to a particular format
189 -
190 - format 1 - 20080101000606
191 - format 2 - 2008-01-01 00:06:06
192 -
193 - INPUT:
194 -
195 - now - datetime object
196 - hours_back - the amount of time the
197 - format - the format of the returned timestamp strings
198 - resolution - the resolution detail of the timestamp (e.g. down to the minute, down to the hour, ...)
199 -
200 -
201 - RETURN:
202 - start_time - formatted datetime string
203 - end_time - formatted datetime string
204 -
 187+ Given a datetime object produce a timestamp a number of hours in the past and according to a particular format
 188+
 189+ format 1 - 20080101000606
 190+ format 2 - 2008-01-01 00:06:06
 191+
 192+ INPUT:
 193+
 194+ now - datetime object
 195+ hours_back - the amount of time the
 196+ format - the format of the returned timestamp strings
 197+ resolution - the resolution detail of the timestamp (e.g. down to the minute, down to the hour, ...)
 198+
 199+
 200+ RETURN:
 201+ start_time - formatted datetime string
 202+ end_time - formatted datetime string
 203+
205204 """
206205 def gen_date_strings(self, time_ref, hours_back, format, resolution):
207206
@@ -219,19 +218,19 @@
220219
221220 """
222221
223 - Convert datetime objects to a timestamp of a given format. HELPER METHOD for gen_date_strings.
 222+ Convert datetime objects to a timestamp of a given format. HELPER METHOD for gen_date_strings.
 223+
 224+ INPUT:
224225
225 - INPUT:
 226+ time_obj - datetime object
 227+ format - the format of the returned timestamp strings
 228+ resolution - the resolution detail of the timestamp (e.g. down to the minute, down to the hour, ...)
 229+
 230+
 231+ RETURN:
 232+ start_time - formatted datetime string
 233+ end_time - formatted datetime string
226234
227 - time_obj - datetime object
228 - format - the format of the returned timestamp strings
229 - resolution - the resolution detail of the timestamp (e.g. down to the minute, down to the hour, ...)
230 -
231 -
232 - RETURN:
233 - start_time - formatted datetime string
234 - end_time - formatted datetime string
235 -
236235 """
237236 def timestamp_from_obj(self, time_obj, format, resolution):
238237
@@ -288,16 +287,16 @@
289288
290289 """
291290
292 - Convert timestamp to a datetime object of a given format
293 -
294 - INPUT:
295 -
296 - timestamp - timestamp string
297 - format - the format of the returned timestamp strings
298 -
299 -
300 - RETURN:
301 - time_obj - datetime conversion of timestamp string
 291+ Convert timestamp to a datetime object of a given format
 292+
 293+ INPUT:
 294+
 295+ timestamp - timestamp string
 296+ format - the format of the returned timestamp strings
 297+
 298+
 299+ RETURN:
 300+ time_obj - datetime conversion of timestamp string
302301
303302 """
304303 def timestamp_to_obj(self, timestamp, format):
@@ -315,27 +314,27 @@
316315
317316 """
318317
319 - Inserts missing interval points into the time and metric lists
320 -
321 - Assumptions:
322 - _metrics_ and _times_ are lists of the same length
323 - there must be a data point at each interval
324 - Some data points may be missed
325 - where there is no metric data the metric takes on the value 0.0
326 -
327 - e.g. when _interval_ = 10
328 - times = [0 10 30 50], metrics = [1 1 1 1] ==> [0 10 30 40 50], [1 1 0 1 0 1]
329 -
330 - INPUT:
331 -
332 - times -
333 - metrics -
334 - interval -
335 -
336 - RETURN:
337 - new_times -
338 - new_metrics -
 318+ Inserts missing interval points into the time and metric lists
339319
 320+ Assumptions:
 321+ _metrics_ and _times_ are lists of the same length
 322+ there must be a data point at each interval
 323+ Some data points may be missed
 324+ where there is no metric data the metric takes on the value 0.0
 325+
 326+ e.g. when _interval_ = 10
 327+ times = [0 10 30 50], metrics = [1 1 1 1] ==> [0 10 30 40 50], [1 1 0 1 0 1]
 328+
 329+ INPUT:
 330+
 331+ times -
 332+ metrics -
 333+ interval -
 334+
 335+ RETURN:
 336+ new_times -
 337+ new_metrics -
 338+
340339 """
341340 def normalize_intervals(self, times, metrics, interval):
342341
@@ -373,20 +372,20 @@
374373
375374 """
376375
377 - Converts from one timestamp format to another timestamp format
 376+ Converts from one timestamp format to another timestamp format
 377+
 378+ format 1 - 20080101000606
 379+ format 2 - 2008-01-01 00:06:06
 380+
 381+ INPUT:
378382
379 - format 1 - 20080101000606
380 - format 2 - 2008-01-01 00:06:06
 383+ ts - timestamp string
 384+ format_from - input format
 385+ format_to - output format
381386
382 - INPUT:
383 -
384 - ts - timestamp string
385 - format_from - input format
386 - format_to - output format
387 -
388 - RETURN:
389 -
390 - new_timestamp - new timestamp string
 387+ RETURN:
 388+
 389+ new_timestamp - new timestamp string
391390
392391
393392 """

Status & tagging log