Index: trunk/fundraiser-statistics/fundraiser-scripts/classes/DataMapper.py |
— | — | @@ -131,8 +131,8 @@ |
132 | 132 | """ |
133 | 133 | class FundraiserDataMapper(DataMapper): |
134 | 134 | |
135 | | - _db = None |
136 | | - _cur = None |
| 135 | + _db_ = None |
| 136 | + _cur_ = None |
137 | 137 | |
138 | 138 | _impression_table_name_ = 'banner_impressions' |
139 | 139 | _landing_page_table_name_ = 'landing_page_requests' |
— | — | @@ -146,13 +146,13 @@ |
147 | 147 | |
148 | 148 | """ !! MODIFY -- use dataloaders! """ |
149 | 149 | def _init_db(self): |
150 | | - self._db = MySQLdb.connect(host='127.0.0.1', user='rfaulk', db='faulkner', port=3307) |
151 | | - self._cur = self._db.cursor() |
| 150 | + self._db_ = MySQLdb.connect(host='127.0.0.1', user='rfaulk', db='faulkner', port=3307) |
| 151 | + self._cur_ = self._db_.cursor() |
152 | 152 | |
153 | 153 | """ !! MODIFY -- use dataloaders! """ |
154 | 154 | def _close_db(self): |
155 | | - self._cur.close() |
156 | | - self._db.close() |
| 155 | + self._cur_.close() |
| 156 | + self._db_.close() |
157 | 157 | |
158 | 158 | |
159 | 159 | |
— | — | @@ -168,7 +168,7 @@ |
169 | 169 | deleteStmnt = 'delete from ' + self._landing_page_table_name_ + ' where start_timestamp = \'' + timestamp + '\';' |
170 | 170 | |
171 | 171 | try: |
172 | | - self._cur.execute(deleteStmnt) |
| 172 | + self._cur_.execute(deleteStmnt) |
173 | 173 | print >> sys.stdout, "Executed delete from impression: " + deleteStmnt |
174 | 174 | except: |
175 | 175 | print >> sys.stderr, "Could not execute delete:\n" + deleteStmnt + "\nResuming insert ..." |
— | — | @@ -356,9 +356,9 @@ |
357 | 357 | val = '(' + start_timestamp_in + ',\'' + banner + '\',\'' + project + '\',\'' + country + '\',\'' + lang + '\',' \ |
358 | 358 | + str(count) + ',' + time_stamp_in + ');' |
359 | 359 | |
360 | | - self._cur.execute(insertStmt + val) |
| 360 | + self._cur_.execute(insertStmt + val) |
361 | 361 | except: |
362 | | - self._db.rollback() |
| 362 | + self._db_.rollback() |
363 | 363 | sys.exit("Database Interface Exception - Could not execute statement:\n" + insertStmt + val) |
364 | 364 | |
365 | 365 | # Re-initialize counts |
— | — | @@ -400,8 +400,6 @@ |
401 | 401 | start_timestamp_in = "convert(\'" + start + "\', datetime)" |
402 | 402 | curr_time = TP.timestamp_from_obj(datetime.datetime.now(),1,3) |
403 | 403 | |
404 | | - count_parse = 0 |
405 | | - |
406 | 404 | """ retrieve the start time of the log """ |
407 | 405 | start = self.get_first_timestamp_from_log(logFileName) |
408 | 406 | |
— | — | @@ -547,10 +545,21 @@ |
548 | 546 | landing_url = lineArgs[8] |
549 | 547 | except IndexError: |
550 | 548 | landing_url = 'Unavailable' |
551 | | - |
552 | | - include_request, index_str_flag = self.evaluate_landing_url(landing_url) |
| 549 | + |
| 550 | + hostIndex = 1 |
| 551 | + queryIndex = 4 |
| 552 | + pathIndex = 2 |
553 | 553 | |
554 | | - |
| 554 | + parsed_landing_url = up.urlparse(landing_url) |
| 555 | + query_fields = cgi.parse_qs(parsed_landing_url[queryIndex]) # Get the banner name and lang |
| 556 | + path_pieces = parsed_landing_url[pathIndex].split('/') |
| 557 | + |
| 558 | + #print '' |
| 559 | + #print landing_url |
| 560 | + include_request, index_str_flag = self.evaluate_landing_url(landing_url, parsed_landing_url, query_fields, path_pieces) |
| 561 | + #print [include_request, index_str_flag] |
| 562 | + |
| 563 | + |
555 | 564 | if include_request: |
556 | 565 | |
557 | 566 | """ Address cases where the query string contains the landing page - ...wikimediafoundation.org/w/index.php?... """ |
— | — | @@ -573,7 +582,7 @@ |
574 | 583 | |
575 | 584 | except: |
576 | 585 | landing_page = 'NONE' |
577 | | - country = Hlp.localize_IP(self._cur, ip_add) |
| 586 | + country = Hlp.localize_IP(self._cur_, ip_add) |
578 | 587 | |
579 | 588 | else: |
580 | 589 | """ Address cases where the query string does not contain the landing page - ...wikimediafoundation.org/wiki/... """ |
— | — | @@ -597,11 +606,11 @@ |
598 | 607 | country = landing_path[3] |
599 | 608 | |
600 | 609 | except: |
601 | | - country = Hlp.localize_IP(self._cur, ip_add) |
| 610 | + country = Hlp.localize_IP(self._cur_, ip_add) |
602 | 611 | |
603 | 612 | # If country is confused with the language use the ip |
604 | 613 | if country == country.lower(): |
605 | | - country = Hlp.localize_IP(self._cur, ip_add) |
| 614 | + country = Hlp.localize_IP(self._cur_, ip_add) |
606 | 615 | |
607 | 616 | # ensure fields exist |
608 | 617 | try: |
— | — | @@ -621,8 +630,8 @@ |
622 | 631 | + project + '\',\'' + ip_add + '\',' + 'convert(\'' + timestamp_string + '\', datetime)' + ');' |
623 | 632 | |
624 | 633 | #print insertStmt + val |
625 | | - self._cur.execute(insertStmt_lp + val) |
626 | | - |
| 634 | + self._cur_.execute(insertStmt_lp + val) |
| 635 | + |
627 | 636 | except: |
628 | 637 | print "Could not insert:\n" + insertStmt_lp + val |
629 | 638 | pass |
— | — | @@ -695,16 +704,12 @@ |
696 | 705 | """ |
697 | 706 | Parses the landing url and determines if its valid |
698 | 707 | """ |
699 | | - def evaluate_landing_url(self, landing_url): |
| 708 | + def evaluate_landing_url(self, landing_url, parsed_landing_url, query_fields, path_pieces): |
700 | 709 | |
701 | 710 | hostIndex = 1 |
702 | 711 | queryIndex = 4 |
703 | 712 | pathIndex = 2 |
704 | 713 | |
705 | | - parsed_landing_url = up.urlparse(landing_url) |
706 | | - query_fields = cgi.parse_qs(parsed_landing_url[queryIndex]) # Get the banner name and lang |
707 | | - path_pieces = parsed_landing_url[pathIndex].split('/') |
708 | | - |
709 | 714 | """ |
710 | 715 | Filter the landing URLs |
711 | 716 | |
— | — | @@ -714,11 +719,11 @@ |
715 | 720 | Evaluate conditions which determine acceptance of request based on the landing url |
716 | 721 | """ |
717 | 722 | try: |
| 723 | + c1 = re.search('WMF', path_pieces[2] ) != None or re.search('Junetesting001', path_pieces[2] ) != None |
| 724 | + c2 = re.search('Hear_from_Kartika', path_pieces[2]) != None |
718 | 725 | |
719 | | - c1 = re.search('WMF', path_pieces[2] ) != None or re.search('Junetesting001', path_pieces[2] ) != None |
720 | | - c2 = re.search('Hear_from_Kartika', path_pieces[2]) != None |
721 | 726 | cond1 = parsed_landing_url[hostIndex] == 'wikimediafoundation.org' and path_pieces[1] == 'wiki' and (c1 or c2) |
722 | | - |
| 727 | + |
723 | 728 | c1 = re.search('index.php', path_pieces[2] ) != None |
724 | 729 | index_str_flag = c1 |
725 | 730 | |
— | — | @@ -727,16 +732,17 @@ |
728 | 733 | except KeyError: |
729 | 734 | c2 = 0 |
730 | 735 | cond2 = (parsed_landing_url[hostIndex] == 'wikimediafoundation.org' and path_pieces[1] == 'w' and c1 and c2) |
731 | | - |
732 | | - if cond2: |
733 | | - count_parse = count_parse + 1 |
734 | | - |
| 736 | + |
735 | 737 | regexp_res = re.search('Special:LandingCheck',landing_url) |
736 | 738 | cond3 = (regexp_res == None) |
737 | 739 | |
738 | 740 | return [(cond1 or cond2) and cond3, index_str_flag] |
739 | 741 | |
740 | | - except: |
741 | | - return [0, 0] |
| 742 | + except Exception as e: |
| 743 | + #print type(e) # the exception instance |
| 744 | + #print e.args # arguments stored in .args |
| 745 | + #print e # __str__ allows args to printed directly |
| 746 | + |
| 747 | + return [False, False] |
742 | 748 | |
743 | 749 | |
\ No newline at end of file |