Index: trunk/fundraiser-statistics/standalone-queries/create_tables_faulkner.sql |
— | — | @@ -0,0 +1,57 @@ |
| 2 | +
|
| 3 | +
|
| 4 | +-- create tables in FAULKNER for recording test metric
|
| 5 | +
|
| 6 | +use faulkner;
|
| 7 | +
|
| 8 | +drop table if exists impression;
|
| 9 | +drop table if exists landing_page;
|
| 10 | +drop table if exists ip_country;
|
| 11 | +
|
| 12 | +-- drop table if exists treatment;
|
| 13 | +
|
| 14 | +
|
| 15 | +create table impression (
|
| 16 | + -- id int(10) unsigned NOT NULL auto_increment,
|
| 17 | + run_id int(10) unsigned default NULL,
|
| 18 | + utm_source varchar(128) default NULL,
|
| 19 | + referrer varchar(128) default NULL,
|
| 20 | + country varchar(128) default NULL,
|
| 21 | + counts int(10) unsigned default NULL,
|
| 22 | + on_minute timestamp,
|
| 23 | + -- primary key (id),
|
| 24 | + constraint imp_run_fk foreign key (run_id) references log_run (id) on delete cascade
|
| 25 | +);
|
| 26 | +
|
| 27 | +create table landing_page (
|
| 28 | + id int(10) unsigned NOT NULL auto_increment,
|
| 29 | + -- run_id int(10) unsigned default NULL,
|
| 30 | + utm_source varchar(128) default NULL,
|
| 31 | + utm_campaign varchar(128) default NULL,
|
| 32 | + utm_medium varchar(128) default NULL,
|
| 33 | + landing_page varchar(128) default NULL,
|
| 34 | + page_url varchar(1000) default NULL,
|
| 35 | + referrer_url varchar(1000) default NULL,
|
| 36 | + browser varchar(50) default NULL,
|
| 37 | + lang varchar(20) default NULL, -- CHARACTER(2) NULL,
|
| 38 | + country varchar(20) default NULL, -- CHARACTER(2) NULL,
|
| 39 | + project varchar(128) default NULL,
|
| 40 | + ip varchar(20) default NULL,
|
| 41 | + request_time timestamp,
|
| 42 | + primary key (id)
|
| 43 | + -- constraint lp_tr_fk foreign key (test_run_id) references test_run (id) on delete cascade
|
| 44 | +);
|
| 45 | +
|
| 46 | +
|
| 47 | +create table ip_country (
|
| 48 | +
|
| 49 | + ip_from varchar(50) default NULL,
|
| 50 | + ip_to varchar(50) default NULL,
|
| 51 | + registry varchar(50) default NULL,
|
| 52 | + assigned varchar(50) default NULL,
|
| 53 | + country_ISO_1 varchar(50) default NULL,
|
| 54 | + country_ISO_2 varchar(50) default NULL,
|
| 55 | + country_name varchar(50) default NULL,
|
| 56 | + primary key (ip_from)
|
| 57 | +);
|
| 58 | +
|
Index: trunk/fundraiser-statistics/fundraiser-scripts/mine_landing_pages.py |
— | — | @@ -200,27 +200,38 @@ |
201 | 201 | |
202 | 202 | if include_request: |
203 | 203 | |
204 | | - # Address cases where the query string contains the landing page |
| 204 | + # Address cases where the query string contains the landing page - ...wikimediafoundation.org/w/index.php?... |
205 | 205 | if index_str_flag: |
206 | 206 | try: |
| 207 | + # URLs of the form ...?title=<lp_name> |
207 | 208 | lp_country = query_fields['title'][0].split('/') |
208 | 209 | landing_page = lp_country[0] |
209 | 210 | |
210 | | - if len(lp_country) == 3: |
211 | | - country = lp_country[2] |
212 | | - else: |
213 | | - country = lp_country[1] |
| 211 | + # URLs of the form ...?county_code=<iso_code> |
| 212 | + try: |
| 213 | + country = query_fields['country_code'][0] |
214 | 214 | |
215 | | - if country == country.lower(): |
216 | | - country = '' |
| 215 | + # URLs of the form ...?title=<lp_name>/<lang>/<iso_code> |
| 216 | + except: |
| 217 | + if len(lp_country) == 3: |
| 218 | + country = lp_country[2] |
| 219 | + else: |
| 220 | + country = lp_country[1] |
| 221 | + |
217 | 222 | except: |
218 | 223 | landing_page = 'NONE' |
219 | | - country = 'NONE' |
220 | | - else: |
| 224 | + country = mh.localize_IP(ip_add) + ' !' |
| 225 | + |
| 226 | + else: # ...wikimediafoundation.org/wiki/... |
| 227 | + |
221 | 228 | landing_path = parsed_landing_url[pathIndex].split('/') |
222 | 229 | landing_page = landing_path[2]; |
| 230 | + |
| 231 | + # URLs of the form ...?county_code=<iso_code> |
223 | 232 | try: |
224 | 233 | country = query_fields['country_code'][0] |
| 234 | + |
| 235 | + # URLs of the form ...<path>/ <lp_name>/<lang>/<iso_code> |
225 | 236 | except: |
226 | 237 | try: |
227 | 238 | if len(landing_path) == 5: |
— | — | @@ -228,9 +239,15 @@ |
229 | 240 | # source_lang = landing_path[3] |
230 | 241 | else: |
231 | 242 | country = landing_path[3] |
| 243 | + |
232 | 244 | except: |
233 | | - country = 'NONE' |
| 245 | + country = mh.localize_IP(ip_add) + ' !' |
234 | 246 | |
| 247 | + # If country is confused with the language use the ip |
| 248 | + if country == country.lower(): |
| 249 | + country = mh.localize_IP(ip_add) + ' !' |
| 250 | + |
| 251 | + |
235 | 252 | # ensure fields exist |
236 | 253 | try: |
237 | 254 | utm_source = query_fields['utm_source'][0] |
Index: trunk/fundraiser-statistics/fundraiser-scripts/miner_help.py |
— | — | @@ -14,10 +14,15 @@ |
15 | 15 | # Helper script for mining |
16 | 16 | # =================== |
17 | 17 | |
| 18 | + |
| 19 | +import sys |
| 20 | +import math |
18 | 21 | import calendar as cal |
19 | | -import math |
| 22 | +import csv |
| 23 | +import MySQLdb |
20 | 24 | |
21 | 25 | |
| 26 | + |
22 | 27 | """ Determines the following hour based on the precise date to the hour """ |
23 | 28 | def getNextHour(year, month, day, hour): |
24 | 29 | |
— | — | @@ -179,4 +184,93 @@ |
180 | 185 | print "miner_help::convert_list_dict: Invalid type, must be a list or a dictionary." |
181 | 186 | return 0; |
182 | 187 | |
183 | | - return new_collection |
\ No newline at end of file |
| 188 | + return new_collection |
| 189 | + |
| 190 | +""" Given an IP localizes the country """ |
| 191 | +def localize_IP(ip_string): |
| 192 | + |
| 193 | + """ Get db object / Create cursor """ |
| 194 | + # db = MySQLdb.connect(host='127.0.0.1', user='rfaulk', db='faulkner', port=3307) |
| 195 | + db = MySQLdb.connect(host='storage3.pmtpa.wmnet', user='rfaulk', db='faulkner') |
| 196 | + cur = db.cursor() |
| 197 | + |
| 198 | + |
| 199 | + # compute ip number |
| 200 | + ip_fields = ip_string.split('.') |
| 201 | + w = int(ip_fields[0]) |
| 202 | + x = int(ip_fields[1]) |
| 203 | + y = int(ip_fields[2]) |
| 204 | + z = int(ip_fields[3]) |
| 205 | + |
| 206 | + ip_num = 16777216 * w + 65536 * x + 256 * y + z; |
| 207 | + |
| 208 | + sql_stmnt = 'select country_ISO_1 from ip_country where ' + str(ip_num) + ' >= ip_from and ' + str(ip_num) + ' <= ip_to' |
| 209 | + |
| 210 | + try: |
| 211 | + cur.execute(sql_stmnt) |
| 212 | + row = cur.fetchone() |
| 213 | + except: |
| 214 | + db.rollback() |
| 215 | + sys.exit("Could execute: " + sql_stmnt) |
| 216 | + |
| 217 | + country = row[0] |
| 218 | + |
| 219 | + # Commit to the db |
| 220 | + db.commit() |
| 221 | + |
| 222 | + # Close connection |
| 223 | + cur.close() |
| 224 | + db.close() |
| 225 | + |
| 226 | + return country |
| 227 | + |
| 228 | +""" Load data into the IP localization table to associate IPs with countries """ |
| 229 | +def load_IP_localization_table(): |
| 230 | + |
| 231 | + """ Get db object / Create cursor """ |
| 232 | + # db = MySQLdb.connect(host='127.0.0.1', user='rfaulk', db='faulkner', port=3307) |
| 233 | + db = MySQLdb.connect(host='storage3.pmtpa.wmnet', user='rfaulk', db='faulkner') |
| 234 | + cur = db.cursor() |
| 235 | + |
| 236 | + # Parse CSV file |
| 237 | + ipReader = csv.reader(open('./csv/IpToCountry.csv', 'rb')) |
| 238 | + insert_stmnt = 'INSERT INTO ip_country VALUES ' |
| 239 | + # (ip_from,ip_to,registry,assigned,country_ISO_1,country_ISO_2,country_name) |
| 240 | + header = 1 |
| 241 | + for row in ipReader: |
| 242 | + # skip the csv comments |
| 243 | + if row[0][0] != '#': |
| 244 | + header = 0 |
| 245 | + |
| 246 | + if not(header): |
| 247 | + |
| 248 | + for i in range(len(row)): |
| 249 | + pieces = row[i].split('\'') |
| 250 | + |
| 251 | + if len(pieces) > 1: |
| 252 | + new_str = pieces[0] |
| 253 | + |
| 254 | + # remove single quotes from fields |
| 255 | + for j in range(1,len(pieces)): |
| 256 | + new_str = new_str + ' ' + pieces[j] |
| 257 | + |
| 258 | + row[i] = new_str |
| 259 | + |
| 260 | + vals = '\',\''.join(row) |
| 261 | + sql_stmnt = insert_stmnt + '(\'' + vals + '\')' |
| 262 | + |
| 263 | + print vals |
| 264 | + #cur.execute(sql_stmnt) |
| 265 | + try: |
| 266 | + cur.execute(sql_stmnt) |
| 267 | + except: |
| 268 | + db.rollback() |
| 269 | + sys.exit("Could not insert: " + sql_stmnt) |
| 270 | + |
| 271 | + |
| 272 | + # Commit to the db |
| 273 | + db.commit() |
| 274 | + |
| 275 | + # Close connection |
| 276 | + cur.close() |
| 277 | + db.close() |