r87507 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r87506‎ | r87507 | r87508 >
Date:19:28, 5 May 2011
Author:rfaulk
Status:deferred
Tags:
Comment:
This singleton class is meant to serve as a bucket for as yet miscellaneous helper methods
Modified paths:
  • /trunk/fundraiser-statistics/fundraiser-scripts/classes/Helper.py (added) (history)

Diff [purge]

Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/Helper.py
@@ -0,0 +1,330 @@
 2+
 3+
 4+"""
 5+
 6+This module effectively functions as a Singleton class.
 7+
 8+Helper is a bucket for miscellaneous general methods that are needed and have yet to be grouped with other functionality.
 9+
 10+"""
 11+
 12+__author__ = "Ryan Faulkner"
 13+__revision__ = "$Rev$"
 14+__date__ = "May 3rd, 2011"
 15+
 16+
 17+import sys
 18+import math
 19+import calendar as cal
 20+import csv
 21+import MySQLdb
 22+
 23+
 24+def precede_with_backslash(string, char):
 25+
 26+ new_string = ''
 27+
 28+ for i in string:
 29+ if i == char:
 30+ new_string = new_string + '\\'
 31+
 32+ new_string = new_string + i
 33+
 34+ return new_string
 35+
 36+"""
 37+
 38+
 39+"""
 40+def get_test_type_metrics(test_type):
 41+
 42+ if test_type == 'banner':
 43+ test_metrics = ['imp', 'donations', 'don_per_imp', 'amt50_per_imp']
 44+ if test_type == 'LP':
 45+ test_metrics = ['views', 'donations', 'don_per_view', 'amt50_per_view']
 46+
 47+ return test_metrics
 48+
 49+"""
 50+
 51+
 52+"""
 53+def convert_Decimal_list_to_float(lst):
 54+ new_lst = list()
 55+
 56+ for i in lst:
 57+ new_lst.append(float(i))
 58+
 59+ return new_lst
 60+
 61+
 62+"""
 63+ Get Test type from campaign
 64+
 65+"""
 66+def get_test_type(utm_campaign):
 67+
 68+ if utm_campaign == '20101228JA075':
 69+ return 'banner'
 70+ elif utm_campaign == '20101230JA089_US':
 71+ return 'banner'
 72+
 73+"""
 74+ Return a specific query name given a query type
 75+
 76+"""
 77+def stringify(str_to_stringify):
 78+ return '"' + str_to_stringify + '"'
 79+
 80+
 81+
 82+
 83+"""
 84+
 85+!! FROM miner_help.py !!
 86+
 87+"""
 88+
 89+""" Determines the following hour based on the precise date to the hour """
 90+def getNextHour(year, month, day, hour):
 91+
 92+ lastDayofMonth = cal.monthrange(year,month)[1]
 93+
 94+ next_year = year
 95+ next_month = month
 96+ next_day = day
 97+ next_hour = hour + 1
 98+
 99+ if hour == 23:
 100+ next_hour = 0
 101+ if day == lastDayofMonth:
 102+ next_day = 1
 103+ if month == 12:
 104+ next_month = 1
 105+ next_year = year + 1
 106+
 107+ return [next_year, next_month, next_day, next_hour]
 108+
 109+""" Determines the previous hour based on the precise date to the hour """
 110+def getPrevHour(year, month, day, hour):
 111+
 112+ if month == 1:
 113+ last_year = year - 1
 114+ last_month = 12
 115+ else:
 116+ last_year = year
 117+ last_month = month - 1
 118+
 119+ lastDayofPrevMonth = cal.monthrange(year,last_month)[1]
 120+
 121+ prev_year = year
 122+ prev_month = month
 123+ prev_day = day
 124+ prev_hour = hour - 1
 125+
 126+ if prev_hour == -1:
 127+ prev_hour = 23
 128+ if day == 1:
 129+ prev_day = lastDayofPrevMonth
 130+ prev_month = last_month
 131+ prev_year = last_year
 132+ else:
 133+ prev_day = day - 1
 134+
 135+ return [prev_year, prev_month, prev_day, prev_hour]
 136+
 137+
 138+class AutoVivification(dict):
 139+ """Implementation of perl's autovivification feature."""
 140+ def __getitem__(self, item):
 141+ try:
 142+ return dict.__getitem__(self, item)
 143+ except KeyError:
 144+ value = self[item] = type(self)()
 145+ return value
 146+
 147+def read_sql(filename):
 148+
 149+ sql_file = open(filename, 'r')
 150+
 151+ sql_stmnt = ''
 152+ line = sql_file.readline()
 153+ while (line != ''):
 154+ sql_stmnt = sql_stmnt + line
 155+ line = sql_file.readline()
 156+
 157+ sql_file.close()
 158+
 159+ return sql_stmnt
 160+
 161+def drange(start, stop, step):
 162+
 163+ if step < 1:
 164+ gain = math.floor(1 / step)
 165+ lst = range(0, ((stop-start) * gain), 1)
 166+ return [start + x * step for x in lst]
 167+ else:
 168+ return range(start, stop, step)
 169+
 170+
 171+def mod_list(lst, modulus):
 172+ return [x % modulus for x in lst]
 173+
 174+""" Extract a timestamp from the filename """
 175+def get_timestamps(logFileName):
 176+
 177+ fname_parts = logFileName.split('-')
 178+
 179+ year = int(fname_parts[1])
 180+ month = int(fname_parts[2])
 181+ day = int(fname_parts[3])
 182+ hour = int(fname_parts[4][0:2])
 183+
 184+ # Is this an afternoon log?
 185+ afternoon = (fname_parts[4][2:4] == 'PM')
 186+
 187+ # Adjust the hour as necessary if == 12AM or *PM
 188+ if afternoon and hour < 12:
 189+ hour = hour + 12
 190+
 191+ if not(afternoon) and hour == 12:
 192+ hour = 0
 193+
 194+ prev_hr = getPrevHour(year, month, day, hour)
 195+
 196+ str_month = '0' + str(month) if month < 10 else str(month)
 197+ str_day = '0' + str(day) if day < 10 else str(day)
 198+ str_hour = '0' + str(hour) if hour < 10 else str(hour)
 199+
 200+ prev_month = prev_hr[1]
 201+ prev_day = prev_hr[2]
 202+ prev_hour = prev_hr[3]
 203+ str_prev_month = '0' + str(prev_month) if prev_month < 10 else str(prev_month)
 204+ str_prev_day = '0' + str(prev_day) if prev_day < 10 else str(prev_day)
 205+ str_prev_hour = '0' + str(prev_hour) if prev_hour < 10 else str(prev_hour)
 206+
 207+ log_end = str(year) + str_month + str_day + str_hour + '5500'
 208+ log_start = str(prev_hr[0]) + str_prev_month + str_prev_day + str_prev_hour + '5500'
 209+
 210+ #log_start = str(year) + str(month) + str(day) + str(hour) + '5500'
 211+ #log_end = str(prev_hr[0]) + str(prev_hr[1]) + str(prev_hr[2]) + str(prev_hr[3]) + '5500'
 212+
 213+ return [log_start, log_end]
 214+
 215+
 216+""" Compute the difference among two timestamps """
 217+def get_timestamps_diff(timestamp_start, timestamp_end):
 218+
 219+ year_1 = int(timestamp_start[0:4])
 220+ month_1 = int(timestamp_start[4:6])
 221+ day_1 = int(timestamp_start[6:8])
 222+ hr_1 = int(timestamp_start[8:10])
 223+ min_1 = int(timestamp_start[10:12])
 224+
 225+ year_2 = int(timestamp_end[0:4])
 226+ month_2 = int(timestamp_end[4:6])
 227+ day_2 = int(timestamp_end[6:8])
 228+ hr_2 = int(timestamp_end[8:10])
 229+ min_2 = int(timestamp_end[10:12])
 230+
 231+ t1 = cal.datetime.datetime(year=year_1, month=month_1, day=day_1, hour=hr_1, minute=min_1,second=0)
 232+ t2 = cal.datetime.datetime(year=year_2, month=month_2, day=day_2, hour=hr_2, minute=min_2,second=0)
 233+
 234+ diff = t2 - t1
 235+ diff = float(diff.seconds) / 3600
 236+
 237+ return diff
 238+
 239+""" Converts a list to a dictionary or vice versa -- INCOMPLETE MAY BE USEFUL AT SOME FUTURE POINT """
 240+def convert_list_dict(collection):
 241+
 242+ if type(collection) is dict:
 243+ new_collection = list()
 244+
 245+ elif type(collection) is list:
 246+ new_collection = dict()
 247+
 248+ else:
 249+ print "miner_help::convert_list_dict: Invalid type, must be a list or a dictionary."
 250+ return 0;
 251+
 252+ return new_collection
 253+
 254+""" Given an IP localizes the country """
 255+def localize_IP(cur, ip_string):
 256+
 257+ # compute ip number
 258+ ip_fields = ip_string.split('.')
 259+ w = int(ip_fields[0])
 260+ x = int(ip_fields[1])
 261+ y = int(ip_fields[2])
 262+ z = int(ip_fields[3])
 263+
 264+ ip_num = 16777216 * w + 65536 * x + 256 * y + z;
 265+
 266+ sql_stmnt = 'select country_ISO_1 from ip_country where ' + str(ip_num) + ' >= ip_from and ' + str(ip_num) + ' <= ip_to'
 267+
 268+ try:
 269+ cur.execute(sql_stmnt)
 270+ row = cur.fetchone()
 271+ except:
 272+ db.rollback()
 273+ sys.exit("Could not execute: " + sql_stmnt)
 274+
 275+ try:
 276+ country = row[0]
 277+ except:
 278+ country = ''
 279+
 280+ return country
 281+
 282+""" Load data into the IP localization table to associate IPs with countries """
 283+def load_IP_localization_table():
 284+
 285+ """ Get db object / Create cursor """
 286+ # db = MySQLdb.connect(host='127.0.0.1', user='rfaulk', db='faulkner', port=3307)
 287+ db = MySQLdb.connect(host='storage3.pmtpa.wmnet', user='rfaulk', db='faulkner')
 288+ cur = db.cursor()
 289+
 290+ # Parse CSV file
 291+ ipReader = csv.reader(open('./csv/IpToCountry.csv', 'rb'))
 292+ insert_stmnt = 'INSERT INTO ip_country VALUES '
 293+ # (ip_from,ip_to,registry,assigned,country_ISO_1,country_ISO_2,country_name)
 294+ header = 1
 295+ for row in ipReader:
 296+ # skip the csv comments
 297+ if row[0][0] != '#':
 298+ header = 0
 299+
 300+ if not(header):
 301+
 302+ for i in range(len(row)):
 303+ pieces = row[i].split('\'')
 304+
 305+ if len(pieces) > 1:
 306+ new_str = pieces[0]
 307+
 308+ # remove single quotes from fields
 309+ for j in range(1,len(pieces)):
 310+ new_str = new_str + ' ' + pieces[j]
 311+
 312+ row[i] = new_str
 313+
 314+ vals = '\',\''.join(row)
 315+ sql_stmnt = insert_stmnt + '(\'' + vals + '\')'
 316+
 317+ print vals
 318+ #cur.execute(sql_stmnt)
 319+ try:
 320+ cur.execute(sql_stmnt)
 321+ except:
 322+ db.rollback()
 323+ sys.exit("Could not insert: " + sql_stmnt)
 324+
 325+
 326+ # Commit to the db
 327+ db.commit()
 328+
 329+ # Close connection
 330+ cur.close()
 331+ db.close()
\ No newline at end of file

Status & tagging log