r90015 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r90014‎ | r90015 | r90016 >
Date:22:06, 13 June 2011
Author:rfaulk
Status:deferred
Tags:
Comment:
modified mine_squid_landing_page_requests and evaluate_landing_url methods to properly handle mining requests.
slight modification of DataMapper db and cursor member names for consistency
Modified paths:
  • /trunk/fundraiser-statistics/fundraiser-scripts/classes/DataMapper.py (modified) (history)

Diff [purge]

Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/DataMapper.py
@@ -131,8 +131,8 @@
132132 """
133133 class FundraiserDataMapper(DataMapper):
134134
135 - _db = None
136 - _cur = None
 135+ _db_ = None
 136+ _cur_ = None
137137
138138 _impression_table_name_ = 'banner_impressions'
139139 _landing_page_table_name_ = 'landing_page_requests'
@@ -146,13 +146,13 @@
147147
148148 """ !! MODIFY -- use dataloaders! """
149149 def _init_db(self):
150 - self._db = MySQLdb.connect(host='127.0.0.1', user='rfaulk', db='faulkner', port=3307)
151 - self._cur = self._db.cursor()
 150+ self._db_ = MySQLdb.connect(host='127.0.0.1', user='rfaulk', db='faulkner', port=3307)
 151+ self._cur_ = self._db_.cursor()
152152
153153 """ !! MODIFY -- use dataloaders! """
154154 def _close_db(self):
155 - self._cur.close()
156 - self._db.close()
 155+ self._cur_.close()
 156+ self._db_.close()
157157
158158
159159
@@ -168,7 +168,7 @@
169169 deleteStmnt = 'delete from ' + self._landing_page_table_name_ + ' where start_timestamp = \'' + timestamp + '\';'
170170
171171 try:
172 - self._cur.execute(deleteStmnt)
 172+ self._cur_.execute(deleteStmnt)
173173 print >> sys.stdout, "Executed delete from impression: " + deleteStmnt
174174 except:
175175 print >> sys.stderr, "Could not execute delete:\n" + deleteStmnt + "\nResuming insert ..."
@@ -356,9 +356,9 @@
357357 val = '(' + start_timestamp_in + ',\'' + banner + '\',\'' + project + '\',\'' + country + '\',\'' + lang + '\',' \
358358 + str(count) + ',' + time_stamp_in + ');'
359359
360 - self._cur.execute(insertStmt + val)
 360+ self._cur_.execute(insertStmt + val)
361361 except:
362 - self._db.rollback()
 362+ self._db_.rollback()
363363 sys.exit("Database Interface Exception - Could not execute statement:\n" + insertStmt + val)
364364
365365 # Re-initialize counts
@@ -400,8 +400,6 @@
401401 start_timestamp_in = "convert(\'" + start + "\', datetime)"
402402 curr_time = TP.timestamp_from_obj(datetime.datetime.now(),1,3)
403403
404 - count_parse = 0
405 -
406404 """ retrieve the start time of the log """
407405 start = self.get_first_timestamp_from_log(logFileName)
408406
@@ -547,10 +545,21 @@
548546 landing_url = lineArgs[8]
549547 except IndexError:
550548 landing_url = 'Unavailable'
551 -
552 - include_request, index_str_flag = self.evaluate_landing_url(landing_url)
 549+
 550+ hostIndex = 1
 551+ queryIndex = 4
 552+ pathIndex = 2
553553
554 -
 554+ parsed_landing_url = up.urlparse(landing_url)
 555+ query_fields = cgi.parse_qs(parsed_landing_url[queryIndex]) # Get the banner name and lang
 556+ path_pieces = parsed_landing_url[pathIndex].split('/')
 557+
 558+ #print ''
 559+ #print landing_url
 560+ include_request, index_str_flag = self.evaluate_landing_url(landing_url, parsed_landing_url, query_fields, path_pieces)
 561+ #print [include_request, index_str_flag]
 562+
 563+
555564 if include_request:
556565
557566 """ Address cases where the query string contains the landing page - ...wikimediafoundation.org/w/index.php?... """
@@ -573,7 +582,7 @@
574583
575584 except:
576585 landing_page = 'NONE'
577 - country = Hlp.localize_IP(self._cur, ip_add)
 586+ country = Hlp.localize_IP(self._cur_, ip_add)
578587
579588 else:
580589 """ Address cases where the query string does not contain the landing page - ...wikimediafoundation.org/wiki/... """
@@ -597,11 +606,11 @@
598607 country = landing_path[3]
599608
600609 except:
601 - country = Hlp.localize_IP(self._cur, ip_add)
 610+ country = Hlp.localize_IP(self._cur_, ip_add)
602611
603612 # If country is confused with the language use the ip
604613 if country == country.lower():
605 - country = Hlp.localize_IP(self._cur, ip_add)
 614+ country = Hlp.localize_IP(self._cur_, ip_add)
606615
607616 # ensure fields exist
608617 try:
@@ -621,8 +630,8 @@
622631 + project + '\',\'' + ip_add + '\',' + 'convert(\'' + timestamp_string + '\', datetime)' + ');'
623632
624633 #print insertStmt + val
625 - self._cur.execute(insertStmt_lp + val)
626 -
 634+ self._cur_.execute(insertStmt_lp + val)
 635+
627636 except:
628637 print "Could not insert:\n" + insertStmt_lp + val
629638 pass
@@ -695,16 +704,12 @@
696705 """
697706 Parses the landing url and determines if its valid
698707 """
699 - def evaluate_landing_url(self, landing_url):
 708+ def evaluate_landing_url(self, landing_url, parsed_landing_url, query_fields, path_pieces):
700709
701710 hostIndex = 1
702711 queryIndex = 4
703712 pathIndex = 2
704713
705 - parsed_landing_url = up.urlparse(landing_url)
706 - query_fields = cgi.parse_qs(parsed_landing_url[queryIndex]) # Get the banner name and lang
707 - path_pieces = parsed_landing_url[pathIndex].split('/')
708 -
709714 """
710715 Filter the landing URLs
711716
@@ -714,11 +719,11 @@
715720 Evaluate conditions which determine acceptance of request based on the landing url
716721 """
717722 try:
 723+ c1 = re.search('WMF', path_pieces[2] ) != None or re.search('Junetesting001', path_pieces[2] ) != None
 724+ c2 = re.search('Hear_from_Kartika', path_pieces[2]) != None
718725
719 - c1 = re.search('WMF', path_pieces[2] ) != None or re.search('Junetesting001', path_pieces[2] ) != None
720 - c2 = re.search('Hear_from_Kartika', path_pieces[2]) != None
721726 cond1 = parsed_landing_url[hostIndex] == 'wikimediafoundation.org' and path_pieces[1] == 'wiki' and (c1 or c2)
722 -
 727+
723728 c1 = re.search('index.php', path_pieces[2] ) != None
724729 index_str_flag = c1
725730
@@ -727,16 +732,17 @@
728733 except KeyError:
729734 c2 = 0
730735 cond2 = (parsed_landing_url[hostIndex] == 'wikimediafoundation.org' and path_pieces[1] == 'w' and c1 and c2)
731 -
732 - if cond2:
733 - count_parse = count_parse + 1
734 -
 736+
735737 regexp_res = re.search('Special:LandingCheck',landing_url)
736738 cond3 = (regexp_res == None)
737739
738740 return [(cond1 or cond2) and cond3, index_str_flag]
739741
740 - except:
741 - return [0, 0]
 742+ except Exception as e:
 743+ #print type(e) # the exception instance
 744+ #print e.args # arguments stored in .args
 745+ #print e # __str__ allows args to printed directly
 746+
 747+ return [False, False]
742748
743749
\ No newline at end of file

Status & tagging log