Index: trunk/wikistats/analytics/create_and_use_db_analytics.txt |
— | — | @@ -1,4 +1,5 @@ |
2 | 2 | -- invoke this file with "mysql --user=root --password=[...] < create_and_use_db_analytics.txt" |
| 3 | +-- for test queries make sure to delete output files *test*.csv first (MySQL on purpose forbids overwrite) |
3 | 4 | |
4 | 5 | -- tables implemented: |
5 | 6 | -- comscore |
— | — | @@ -22,6 +23,7 @@ |
23 | 24 | |
24 | 25 | -- Create database and two tables from scratch |
25 | 26 | DROP DATABASE IF EXISTS `analytics` ; |
| 27 | + |
26 | 28 | CREATE DATABASE `analytics` ; |
27 | 29 | |
28 | 30 | USE `analytics` ; |
— | — | @@ -51,9 +53,9 @@ |
52 | 54 | |
53 | 55 | CREATE TABLE `comscore_regions` ( |
54 | 56 | `region_code` char (2), |
55 | | - `target_language` char (10), |
| 57 | + `report_language` char (10), |
56 | 58 | `region_name` char (18), |
57 | | - PRIMARY KEY (target_language,region_code) |
| 59 | + PRIMARY KEY (report_language,region_code) |
58 | 60 | ) ; |
59 | 61 | |
60 | 62 | CREATE TABLE `wikistats` ( |
— | — | @@ -95,10 +97,10 @@ |
96 | 98 | ) ; |
97 | 99 | |
98 | 100 | CREATE TABLE `language_names` ( |
99 | | - `target_language` char (15), |
| 101 | + `report_language` char (15), |
100 | 102 | `language_code` char (15), |
101 | 103 | `language_name` char (50), |
102 | | - PRIMARY KEY (target_language,language_code) |
| 104 | + PRIMARY KEY (report_language,language_code) |
103 | 105 | ) ; |
104 | 106 | |
105 | 107 | -- SHOW TABLES ; |
— | — | @@ -111,90 +113,117 @@ |
112 | 114 | -- Database Manipulation |
113 | 115 | -- Obviously in real world this is a separate script |
114 | 116 | |
115 | | - |
116 | | - |
117 | | -LOAD DATA INFILE 'c:/MySQL/analytics/analytics_in_comscore.csv' |
| 117 | +LOAD DATA LOCAL INFILE 'c:/MySQL/analytics/analytics_in_comscore.csv' |
118 | 118 | INTO TABLE comscore |
119 | 119 | FIELDS TERMINATED BY ',' |
120 | 120 | OPTIONALLY ENCLOSED BY '"' |
121 | 121 | (yyyymm,country_code,region_code,property,project,reach,visitors) ; |
122 | 122 | |
123 | | -LOAD DATA INFILE 'c:/MySQL/analytics/analytics_in_comscore_regions.csv' |
| 123 | +LOAD DATA LOCAL INFILE 'c:/MySQL/analytics/analytics_in_comscore_regions.csv' |
124 | 124 | INTO TABLE comscore_regions |
125 | 125 | FIELDS TERMINATED BY ',' |
126 | 126 | OPTIONALLY ENCLOSED BY '"' |
127 | | - (target_language,region_code,region_name) ; |
| 127 | + (report_language,region_code,region_name) ; |
128 | 128 | |
129 | | -LOAD DATA INFILE 'c:/MySQL/analytics/analytics_in_wikistats.csv' |
| 129 | +-- show contents (debugging only) |
| 130 | +SELECT * |
| 131 | + FROM comscore_regions ; |
| 132 | + |
| 133 | +LOAD DATA LOCAL INFILE 'analytics_in_wikistats.csv' |
130 | 134 | INTO TABLE wikistats |
131 | 135 | FIELDS TERMINATED BY ',' |
132 | 136 | OPTIONALLY ENCLOSED BY '"' |
133 | 137 | (project,language_code,yyyymm,editors_all_time,editors_new,editors_ge_5,editors_ge_25,editors_ge_100,articles,articles_new_per_day,articles_over_bytes_500,articles_over_bytes_2000,edits_per_article,bytes_per_article,edits,size_in_bytes,size_in_words,links_internal,links_interwiki,links_image,links_external,redirects) ; |
134 | 138 | |
135 | | -LOAD DATA INFILE 'c:/MySQL/analytics/analytics_in_page_views.csv' |
| 139 | +LOAD DATA LOCAL INFILE 'analytics_in_page_views.csv' |
136 | 140 | INTO TABLE page_views |
137 | 141 | FIELDS TERMINATED BY ',' |
138 | 142 | OPTIONALLY ENCLOSED BY '"' |
139 | 143 | (project,language_code,yyyymm,views_non_mobile_raw,views_mobile_raw,views_non_mobile_normalized,views_mobile_normalized) ; |
140 | 144 | |
141 | | -LOAD DATA INFILE 'c:/MySQL/analytics/analytics_in_language_names.csv' |
| 145 | +LOAD DATA LOCAL INFILE 'analytics_in_language_names.csv' |
142 | 146 | INTO TABLE language_names |
143 | 147 | FIELDS TERMINATED BY ',' |
144 | 148 | OPTIONALLY ENCLOSED BY '"' |
145 | | - (target_language,language_code,language_name) ; |
| 149 | + (report_language,language_code,language_name) ; |
146 | 150 | |
147 | | -SELECT * |
148 | | - FROM comscore |
149 | | - ORDER BY yyyymm,country_code,region_code,property,project |
150 | | - INTO OUTFILE 'c:/MySQL/analytics/analytics_out_comscore_full_table.csv' |
151 | | - FIELDS TERMINATED BY ',' ; |
152 | | - |
153 | | -SELECT yyyymm, project, page_views.language_code, language_name, views_non_mobile_normalized, views_mobile_normalized |
| 151 | +-- on views procedures and functions see http://www.paragoncorporation.com/ArticleDetail.aspx?ArticleID=28 |
| 152 | +-- view makes join implicit and adds aggregated fields (and provides default order, can be overidden) |
| 153 | +CREATE VIEW page_views_v AS |
| 154 | + SELECT yyyymm, project, page_views.language_code, language_name, |
| 155 | + views_non_mobile_raw, views_mobile_raw , |
| 156 | + views_non_mobile_normalized, views_mobile_normalized, |
| 157 | + views_non_mobile_raw+views_mobile_raw AS views_raw, |
| 158 | + views_non_mobile_normalized+views_mobile_normalized AS views_normalized, |
| 159 | + language_names.report_language |
154 | 160 | FROM page_views LEFT JOIN language_names |
155 | 161 | ON page_views.language_code = language_names.language_code |
| 162 | + ORDER BY yyyymm,project,language_name ; |
| 163 | + |
| 164 | +PREPARE page_views_s1 |
| 165 | + FROM |
| 166 | + "SELECT yyyymm,project,language_code,language_name,views_normalized |
| 167 | + FROM page_views_v |
156 | 168 | WHERE (project = 'wp') AND |
157 | 169 | (yyyymm BETWEEN '2011-03' AND '2011-05') AND |
158 | | - (page_views.language_code = 'nl') AND |
159 | | - (language_names.target_language = 'en') |
160 | | - ORDER BY project,language_name,yyyymm |
| 170 | + (language_code = 'nl') AND |
| 171 | + (report_language=?) -- parameter needed |
161 | 172 | INTO OUTFILE 'c:/MySQL/analytics/analytics_out_page_views_test1.csv' |
162 | | - FIELDS TERMINATED BY ',' ; |
163 | | - |
| 173 | + FIELDS TERMINATED BY ','" ; |
164 | 174 | |
165 | | -SELECT yyyymm, project, page_views.language_code, language_name, views_non_mobile_normalized, views_mobile_normalized |
166 | | - FROM page_views LEFT JOIN language_names |
167 | | - ON page_views.language_code = language_names.language_code |
168 | | - WHERE (project = 'wp') AND |
169 | | - (yyyymm BETWEEN '2011-03' AND '2011-05') AND |
170 | | - (page_views.language_code = 'nl') AND |
171 | | - (language_names.target_language = 'de') |
172 | | - ORDER BY project,language_name,yyyymm |
| 175 | +SET @report_language = 'en' ; -- used as parameter in prepared statements ; for tests databasee contains translations for 'en' and 'de' |
| 176 | + |
| 177 | +EXECUTE page_views_s1 USING @report_language ; |
| 178 | + |
| 179 | +SET @project = 'wp' ; |
| 180 | +SET @yyyymm_first = '2011-03' ; |
| 181 | +SET @yyyymm_last = '2011-05' ; |
| 182 | +SET @language_code = 'nl' ; |
| 183 | +SET @report_language = 'de' ; |
| 184 | + |
| 185 | +PREPARE page_views_s2 |
| 186 | + FROM |
| 187 | + "SELECT yyyymm,project,language_code,language_name,views_normalized |
| 188 | + FROM page_views_v |
| 189 | + WHERE (project = ?) AND |
| 190 | + (yyyymm BETWEEN ? AND ?) AND |
| 191 | + (language_code = ?) AND |
| 192 | + (report_language = ?) |
173 | 193 | INTO OUTFILE 'c:/MySQL/analytics/analytics_out_page_views_test2.csv' |
174 | | - FIELDS TERMINATED BY ',' ; |
175 | | - |
| 194 | + FIELDS TERMINATED BY ','" ; |
176 | 195 | |
177 | | -SELECT yyyymm,region_name,reach |
178 | | - FROM comscore LEFT JOIN comscore_regions |
179 | | - ON comscore.region_code = comscore_regions.region_code AND comscore_regions.target_language = 'en' |
180 | | - WHERE (region_name != '') AND (yyyymm BETWEEN '2011-03' AND '2011-05') |
181 | | - ORDER BY yyyymm,region_name |
182 | | - INTO OUTFILE 'c:/MySQL/analytics/analytics_out_comscore_reach_test1.csv' |
183 | | - FIELDS TERMINATED BY ',' ; |
| 196 | +EXECUTE page_views_s2 USING @project, @yyyymm_first, @yyyymm_last, @language_code, @report_language ; |
| 197 | + |
| 198 | + |
| 199 | +/* |
| 200 | +-- in progress: tests with using procedure to make invocation more flexible than with prepared statement |
| 201 | +-- prep stats expects all parameters predefined |
| 202 | + |
| 203 | +-- could the following be done in MySQL: a variable where clause with variable number of variables ? (seems not exactly like in this example for other DMRS) |
| 204 | +-- http://www.sqlteam.com/article/implementing-a-dynamic-where-clause |
| 205 | + |
| 206 | +DROP PROCEDURE IF EXISTS page_views_p1 ; |
| 207 | +DELIMITER // |
| 208 | +CREATE PROCEDURE page_views_p1 (parm_project VARCHAR(30)) |
| 209 | + SELECT yyyymm,project,language_code,language_name,views_normalized |
| 210 | + FROM page_views_v |
| 211 | + WHERE (project = parm_project) ; |
| 212 | + // |
| 213 | +DELIMITER ; |
| 214 | + |
| 215 | +-- CALL page_views_p1 ('wk') ; |
| 216 | + |
| 217 | +delimiter // |
| 218 | +CREATE PROCEDURE dynamic(IN tbl CHAR(64), IN col CHAR(64)) |
| 219 | +BEGIN |
| 220 | + SET @s = CONCAT('SELECT ',col,' FROM ',tbl) ; |
| 221 | + PREPARE stmt FROM @s; |
| 222 | + EXECUTE stmt; |
| 223 | +END |
| 224 | +// |
| 225 | +delimiter ; |
| 226 | +CALL dynamic ('comscore', 'yyyymm') |
| 227 | +*/ |
| 228 | + |
| 229 | + |
| 230 | + |