Index: trunk/wikistats/analytics/AnalyticsPrepBinariesData.pl |
— | — | @@ -1,124 +0,0 @@ |
2 | | -#!/usr/local/bin/perl |
3 | | - |
4 | | - use Getopt::Std ; |
5 | | - |
6 | | - &ParseArguments ; |
7 | | - |
8 | | - print "Write file '$file_csv_out'\n" ; |
9 | | - open CSV_OUT, '>', $file_csv_out ; |
10 | | - |
11 | | - foreach $project (qw (wb wk wn wp wq ws wv wx)) |
12 | | - { &ReadStatisticsPerBinariesExtension ($project) ; } |
13 | | - |
14 | | - close CSV_OUT ; |
15 | | - |
16 | | - print "\n\nReady\n\n" ; |
17 | | - exit ; |
18 | | - |
19 | | -sub ParseArguments |
20 | | -{ |
21 | | - my @options ; |
22 | | - getopt ("io", \%options) ; |
23 | | - |
24 | | - die ("Specify input folder as: -i path") if (! defined ($options {"i"})) ; |
25 | | - die ("Specify output folder as: -o path'") if (! defined ($options {"o"})) ; |
26 | | - |
27 | | - $path_in = $options {"i"} ; |
28 | | - $path_out = $options {"o"} ; |
29 | | - |
30 | | - die "Input folder '$path_in' does not exist" if (! -d $path_in) ; |
31 | | - die "Output folder '$path_out' does not exist" if (! -d $path_out) ; |
32 | | - |
33 | | - # tests only |
34 | | - # $path_in = "C:/@ Wikimedia/# Out Bayes" ; |
35 | | - # $path_out = "C:/analytics" ; # "w:/@ report card/data" ; |
36 | | - |
37 | | - print "Input folder: $path_in\n" ; |
38 | | - print "Output folder: $path_out\n" ; |
39 | | - print "\n" ; |
40 | | - |
41 | | - $file_csv_out = "$path_out/analytics_in_binaries.csv" ; |
42 | | -} |
43 | | - |
44 | | - |
45 | | -sub ReadStatisticsPerBinariesExtension |
46 | | -{ |
47 | | - my $project = shift ; |
48 | | - my $file_csv_in = "$path_in/csv_$project/StatisticsPerBinariesExtension.csv" ; |
49 | | - $yyyymm_hi = -1 ; |
50 | | - |
51 | | - if (! -e $file_csv_in) |
52 | | - { die "Input file '$file_csv_in' not found" ; } |
53 | | - |
54 | | - |
55 | | - print "Read '$file_csv_in'\n" ; |
56 | | - open CSV_IN, '<', $file_csv_in ; |
57 | | - |
58 | | - $language_prev = '' ; |
59 | | - while ($line = <CSV_IN>) |
60 | | - { |
61 | | - chomp $line ; |
62 | | - next if $line !~ /,.*?,/ ; |
63 | | - |
64 | | - ($language,$date,$data) = split (',', $line, 3) ; |
65 | | - |
66 | | - # for each wiki first line shows ext names, no tcounts |
67 | | - if ($date eq "00/0000") |
68 | | - { |
69 | | - if ($language_prev ne '') |
70 | | - { &WriteMonthlyData ($project, $language_prev) ; } |
71 | | - $language_prev = $language ; |
72 | | - |
73 | | - undef %ext_name ; |
74 | | - undef %ext_ndx ; |
75 | | - undef %ext_cnt ; |
76 | | - undef %months ; |
77 | | - |
78 | | - @exts = split (',', $data) ; |
79 | | - $ndx = 0 ; |
80 | | - foreach $ext (@exts) |
81 | | - { |
82 | | - $ext_name {$ndx} = $ext ; |
83 | | - $ext_ndx {$ext} = $ndx ; |
84 | | - $ndx ++ ; |
85 | | - } |
86 | | - next ; |
87 | | - } |
88 | | - |
89 | | - ($month,$year) = split ('\/', $date) ; |
90 | | - $yyyymm = sprintf ("%04d-%02d", $year, $month) ; |
91 | | - if ($yyyymm gt $yyyymm_hi) |
92 | | - { $yyyymm_hi = $yyyymm ; } |
93 | | - $months {$yyyymm}++ ; |
94 | | - |
95 | | - @counts = split (',', $data) ; |
96 | | - $ndx = 0 ; |
97 | | - foreach $count (@counts) |
98 | | - { |
99 | | - $ext_cnt {$yyyymm}{$ext_name {$ndx}} = $count ; |
100 | | - $ndx ++ ; |
101 | | - } |
102 | | - } |
103 | | - &WriteMonthlyData ($project, $language_prev) ; |
104 | | - |
105 | | - close CSV_IN ; |
106 | | -} |
107 | | - |
108 | | -sub WriteMonthlyData |
109 | | -{ |
110 | | - my ($project,$language) = @_ ; |
111 | | - # get sorted array of extensions, order by count for most recent month |
112 | | - %ext_cnt_yyyymm_hi = %{$ext_cnt {$yyyymm_hi}} ; |
113 | | - @ext_cnt_yyyymm_hi = (sort {$ext_cnt_yyyymm_hi {$b} <=> $ext_cnt_yyyymm_hi {$a}} keys %ext_cnt_yyyymm_hi) ; |
114 | | - |
115 | | - foreach $month (sort keys %months) |
116 | | - { |
117 | | - $ndx = 0 ; |
118 | | - foreach $ext (@ext_cnt_yyyymm_hi) |
119 | | - { |
120 | | - print CSV_OUT "$project,$language,$month,$ext,${ext_cnt{$yyyymm}{$ext_name {$ndx}}}\n" ; |
121 | | - # print "$month,$ext,${ext_cnt{$yyyymm}{$ext_name {$ndx}}}\n" ; |
122 | | - last if (++ $ndx > 25) ; |
123 | | - } |
124 | | - } |
125 | | -} |
Index: trunk/wikistats/analytics/AnalyticsPrepLanguageNames.pl |
— | — | @@ -1,162 +0,0 @@ |
2 | | -#!/usr/bin/perl |
3 | | - |
4 | | -# This module prepares a csv file with language names for feed into the analytics database |
5 | | -# The main work of collecting these names from different sources is done by |
6 | | -# http://svn.wikimedia.org/viewvc/mediawiki/trunk/wikistats/dumps/WikiReportsLocalizations.pm |
7 | | -# which is part of wikistats reporting phase and once each month updates local master csv files from all sources |
8 | | - |
9 | | -# The following code to merge and filter these master csv files is based on parts of the code in WikiReportsLocalizations.pm, function Localization |
10 | | -# To do some day: make a completely independent script out of this code and code from WikiReportsLocalizations.pm which covers the whole production cycle |
11 | | - |
12 | | -# Sources for language names: |
13 | | -# - php files |
14 | | -# - translatewiki |
15 | | -# - English Wikipedia API (interwikilinks) |
16 | | - |
17 | | -# Right now multilingual support for the analytics database is just a nice idea, so to speed up data feeds, just keep English and German translations |
18 | | - |
19 | | - use Getopt::Std ; |
20 | | - |
21 | | - $true = 1 ; |
22 | | - $false = 0 ; |
23 | | - |
24 | | - $max_language_name = 50 ; # truncate if longer |
25 | | - |
26 | | - $file_csv_language_names_php = "LanguageNamesViaPhp.csv" ; |
27 | | - $file_csv_language_names_wp = "LanguageNamesViaWpEnEdited.csv" ; |
28 | | - $file_csv_analytics_in = "analytics_in_language_names.csv" ; |
29 | | - |
30 | | - $languages_force_case_uc = "ast|br|de|en|id|nl|wa" ; # incomplete list, based on languages supported by wikistats reporting |
31 | | - $languages_filter = "de|en" ; |
32 | | - foreach $language (split '\|', $languages_filter) |
33 | | - { $include_target_language {$language} = $true ; } |
34 | | - |
35 | | - &ParseArguments ; |
36 | | - &ReadCsvFiles ; |
37 | | - &WriteCsvFile ; |
38 | | - |
39 | | - |
40 | | -# if ($language eq "id") # swap which file takes precedence |
41 | | - |
42 | | - print "\nReady\n\n" ; |
43 | | - exit ; |
44 | | - |
45 | | -sub ParseArguments |
46 | | -{ |
47 | | - my (@options, $arguments) ; |
48 | | - |
49 | | - getopt ("io", \%options) ; |
50 | | - |
51 | | - foreach $arg (sort keys %options) |
52 | | - { $arguments .= " -$arg " . $options {$arg} . "\n" ; } |
53 | | - print ("\nArguments\n$arguments\n") ; |
54 | | - |
55 | | - $options {"i"} = "w:/# out bayes/csv_wp" ; # EZ test |
56 | | - $options {"o"} = "c:/MySQL/analytics" ; # EZ test |
57 | | - |
58 | | - die ("Specify input folder for projectcounts files as: -i path") if (! defined ($options {"i"})) ; |
59 | | - die ("Specify output folder as: -o path'") if (! defined ($options {"o"})) ; |
60 | | - |
61 | | - ($path_in = $options {"i"}) =~ s/[\/\\]+$// ; # remove trailing (back)slash if any |
62 | | - ($path_out = $options {"o"}) =~ s/[\/\\]+$// ; # remove trailing (back)slash if any |
63 | | - |
64 | | - die "Input folder '$path_in' does not exist" if (! -d $path_in) ; |
65 | | - die "Output folder '$path_out' does not exist" if (! -d $path_out) ; |
66 | | - |
67 | | - print "Input folder: $path_in\n" ; |
68 | | - print "Output folder: $path_out\n\n" ; |
69 | | - |
70 | | - $file_csv_language_names_php = "$path_in/$file_csv_language_names_php" ; |
71 | | - $file_csv_language_names_wp = "$path_in/$file_csv_language_names_wp" ; |
72 | | - $file_csv_analytics_in = "$path_out/$file_csv_analytics_in" ; |
73 | | - |
74 | | - die "Input file '$file_csv_language_names_php' not found" if (! -e $file_csv_language_names_php) ; |
75 | | - die "Input file '$file_csv_language_names_wp' not found" if (! -e $file_csv_language_names_wp) ; |
76 | | -} |
77 | | - |
78 | | -sub ReadCsvFiles |
79 | | -{ |
80 | | - #first read definitions from php message files, then overwrite with definitions from interwiki links when available |
81 | | - # except for target language 'id' (Indonesian) where quality of php file has been deemed more reliable |
82 | | - |
83 | | - open CSV_IN, "<", $file_csv_language_names_php ; |
84 | | - while ($line = <CSV_IN>) |
85 | | - { |
86 | | - chomp ($line) ; |
87 | | - ($target_language, $code, $name_unicode, $name_html) = split (',', $line) ; |
88 | | - |
89 | | - next if ! $include_target_language {$target_language} ; |
90 | | - |
91 | | - $out_languages {$target_language} {$code} = &FormatName ($target_language, $name_unicode) ; # forget about html for analytics database |
92 | | - } |
93 | | - close CSV_IN ; |
94 | | - |
95 | | - open CSV_IN, "<", $file_csv_language_names_wp ; |
96 | | - while ($line = <CSV_IN>) |
97 | | - { |
98 | | - chomp ($line) ; |
99 | | - ($target_language, $code, $name_unicode, $name_html) = split (',', $line) ; |
100 | | - |
101 | | - next if ! $include_target_language {$target_language} ; |
102 | | - |
103 | | - next if $target_language eq 'id' and $out_languages {$target_language} {$code} ne '' ; |
104 | | - |
105 | | - # $name_unicode_php = $out_languages {$target_language} {$code} ; # test only |
106 | | - # $name_unicode_wp = &FormatName ($target_language, $name_unicode) ; # test only |
107 | | - # if (($name_unicode_php ne '') && ($name_unicode_php ne $name_unicode_wp)) # test only |
108 | | - # { print "$name_unicode_php => $name_unicode_wp\n" ; } # test only |
109 | | - |
110 | | - $out_languages {$target_language} {$code} = &FormatName ($target_language, $name_unicode) ; # forget about html for analytics database |
111 | | - } |
112 | | - close CSV_IN ; |
113 | | -} |
114 | | - |
115 | | -sub FormatName |
116 | | -{ |
117 | | - my ($target_language, $name_unicode) = @_ ; |
118 | | - |
119 | | - $name_unicode2 = $name_unicode ; |
120 | | - |
121 | | - if ($target_language eq "de") |
122 | | - { $name_unicode =~ s/e?\s*\-?sprache//i ; } |
123 | | - |
124 | | - if ($target_language =~ /^(?:$languages_force_case_uc)/) |
125 | | - { $name_unicode = ucfirst $name_unicode ; } |
126 | | - else |
127 | | - { $name_unicode = lc $name_unicode ; } |
128 | | - |
129 | | -# Test only |
130 | | -# if (($target_language eq 'de') && ($name_unicode ne $name_unicode2)) |
131 | | -# { print "$name_unicode2 => $name_unicode\n" ; } |
132 | | - |
133 | | - return ($name_unicode) ; |
134 | | -} |
135 | | - |
136 | | -sub WriteCsvFile |
137 | | -{ |
138 | | - open CSV_OUT, ">", $file_csv_analytics_in || die ("File '$file_csv_analytics_in' could not be opened") ; |
139 | | - binmode CSV_OUT ; # force Unix style linebreak \012 |
140 | | - |
141 | | - foreach $target_language (sort keys %out_languages) |
142 | | - { |
143 | | - print "\nTarget language '$target_language'\n" ; |
144 | | - %translations = %{$out_languages {$target_language}} ; |
145 | | - |
146 | | - foreach $code (sort keys %translations) |
147 | | - { |
148 | | - $language_name = $translations{$code} ; |
149 | | - $language_name2 = substr ($language_name,0,$max_language_name) ; |
150 | | - |
151 | | - if ($language_name ne $language_name2) |
152 | | - { print "Language name truncated to $max_language_name chars: '$language_name' -> '$language_name2'\n" ; } |
153 | | - |
154 | | - if ($language_name2 =~ /,/) |
155 | | - { $language_name2 = "\"$language_name2\"" ; } |
156 | | - # test only |
157 | | - print CSV_OUT "$target_language,$code,$language_name2\n" ; |
158 | | - } |
159 | | - } |
160 | | - |
161 | | - close CSV_OUT ; |
162 | | -} |
163 | | - |
Index: trunk/wikistats/analytics/analytics_refresh_from_csv.txt |
— | — | @@ -1,55 +0,0 @@ |
2 | | -USE `analytics` ; |
3 | | - |
4 | | -TRUNCATE TABLE comscore ; |
5 | | -LOAD DATA LOCAL INFILE 'analytics_in_comscore.csv' |
6 | | - INTO TABLE comscore |
7 | | - FIELDS TERMINATED BY ',' |
8 | | - OPTIONALLY ENCLOSED BY '"' |
9 | | - (@date,country_code,region_code,web_property,project_code,reach,visitors) |
10 | | - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ; |
11 | | - |
12 | | -TRUNCATE TABLE comscore_regions ; |
13 | | -LOAD DATA LOCAL INFILE 'analytics_in_comscore_regions.csv' |
14 | | - INTO TABLE comscore_regions |
15 | | - FIELDS TERMINATED BY ',' |
16 | | - OPTIONALLY ENCLOSED BY '"' |
17 | | - (report_language,region_code,region_name) ; |
18 | | - |
19 | | -TRUNCATE TABLE wikistats ; |
20 | | -LOAD DATA LOCAL INFILE 'analytics_in_wikistats.csv' |
21 | | - INTO TABLE wikistats |
22 | | - FIELDS TERMINATED BY ',' |
23 | | - OPTIONALLY ENCLOSED BY '"' |
24 | | - (project_code,language_code,@date,editors_all_time,editors_new,editors_ge_5,editors_ge_25,editors_ge_100,articles,articles_new_per_day,articles_over_bytes_500,articles_over_bytes_2000,edits_per_article,bytes_per_article,edits,size_in_bytes,size_in_words,links_internal,links_interwiki,links_image,links_external,redirects) |
25 | | - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ; |
26 | | - |
27 | | -TRUNCATE TABLE page_views ; |
28 | | -LOAD DATA LOCAL INFILE 'analytics_in_page_views.csv' |
29 | | - INTO TABLE page_views |
30 | | - FIELDS TERMINATED BY ',' |
31 | | - OPTIONALLY ENCLOSED BY '"' |
32 | | - (project_code,language_code,@date,views_non_mobile_raw,views_mobile_raw,views_non_mobile_normalized,views_mobile_normalized,views_raw,views_normalized) |
33 | | - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ; |
34 | | - |
35 | | -TRUNCATE TABLE language_names ; |
36 | | -LOAD DATA LOCAL INFILE 'analytics_in_language_names.csv' |
37 | | - INTO TABLE language_names |
38 | | - FIELDS TERMINATED BY ',' |
39 | | - OPTIONALLY ENCLOSED BY '"' |
40 | | - (report_language,language_code,language_name) ; |
41 | | - |
42 | | -TRUNCATE TABLE binaries ; |
43 | | -LOAD DATA LOCAL INFILE 'analytics_in_binaries.csv' |
44 | | - INTO TABLE binaries |
45 | | - FIELDS TERMINATED BY ',' |
46 | | - OPTIONALLY ENCLOSED BY '"' |
47 | | - (project_code,language_code,@date,extension,binaries) |
48 | | - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ; |
49 | | - |
50 | | -TRUNCATE TABLE offline ; |
51 | | -LOAD DATA LOCAL INFILE 'analytics_in_offline.csv' |
52 | | - INTO TABLE offline |
53 | | - FIELDS TERMINATED BY ',' |
54 | | - OPTIONALLY ENCLOSED BY '"' |
55 | | - (@date, readers) |
56 | | - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ; |
Index: trunk/wikistats/analytics/AnalyticsPrepPageViews.pl |
— | — | @@ -1,11 +0,0 @@ |
2 | | -#!/usr/bin/perl |
3 | | - |
4 | | -# This file is just a place holder (alias) |
5 | | -# Page views per day/week/month/etc, per project, per language, nrmalized and not normalized are collected by |
6 | | -# http://svn.wikimedia.org/viewvc/mediawiki/trunk/wikistats/dumps/WikiCountsSummarizeProjectCounts.pl |
7 | | -# |
8 | | -# Added May 2001: |
9 | | -# For analytics database one file is written for all projects and languages combined, |
10 | | -# with per month, not normalized and normalized page view counts in one row. |
11 | | -# This file is written to same output folder as other csv files generated by WikiCountsSummarizeProjectCounts.pl |
12 | | -# and needs to be copied to analytics folder later |
\ No newline at end of file |
Index: trunk/wikistats/analytics/analytics_create_and_load_from_csv.txt |
— | — | @@ -1,181 +0,0 @@ |
2 | | - |
3 | | - |
4 | | - |
5 | | - |
6 | | - |
7 | | -DROP DATABASE IF EXISTS `analytics` ; |
8 | | - |
9 | | -CREATE DATABASE `analytics` ; |
10 | | - |
11 | | -USE `analytics` ; |
12 | | - |
13 | | -CREATE TABLE `comscore` ( |
14 | | - `date` date NOT NULL, |
15 | | - `country_code` varchar (3), |
16 | | - `region_code` varchar (3), |
17 | | - `web_property` varchar (20), |
18 | | - `project_code` varchar (10), |
19 | | - `reach` decimal (4,1) DEFAULT NULL, |
20 | | - `visitors` decimal (15) DEFAULT NULL, |
21 | | - PRIMARY KEY (date,country_code,region_code,project_code,web_property), |
22 | | - KEY (`country_code`) |
23 | | -) ; |
24 | | - |
25 | | -CREATE TABLE `comscore_regions` ( |
26 | | - `region_code` varchar (2), |
27 | | - `report_language` varchar (10), |
28 | | - `region_name` varchar (18), |
29 | | - PRIMARY KEY (region_code,report_language) |
30 | | -) ; |
31 | | - |
32 | | -CREATE TABLE `wikistats` ( |
33 | | - `date` date NOT NULL, |
34 | | - `project_code` varchar (10), |
35 | | - `language_code` varchar (15), |
36 | | - `editors_all_time` int (10) DEFAULT NULL, |
37 | | - `editors_new` int (7) DEFAULT NULL, |
38 | | - `editors_ge_5` int (7) DEFAULT NULL, |
39 | | - `editors_ge_25` int (7) DEFAULT NULL, |
40 | | - `editors_ge_100` int (7) DEFAULT NULL, |
41 | | - `articles` int (12) DEFAULT NULL, |
42 | | - `articles_new_per_day` int (9) DEFAULT NULL, |
43 | | - `articles_over_bytes_500` int (12) DEFAULT NULL, |
44 | | - `articles_over_bytes_2000` int (12) DEFAULT NULL, |
45 | | - `edits_per_article` decimal (9,1) DEFAULT NULL, |
46 | | - `bytes_per_article` decimal (9,1) DEFAULT NULL, |
47 | | - `edits` int (12) DEFAULT NULL, |
48 | | - `size_in_bytes` int (15) DEFAULT NULL, |
49 | | - `size_in_words` int (15) DEFAULT NULL, |
50 | | - `links_internal` int (15) DEFAULT NULL, |
51 | | - `links_interwiki` int (15) DEFAULT NULL, |
52 | | - `links_image` int (15) DEFAULT NULL, |
53 | | - `links_external` int (15) DEFAULT NULL, |
54 | | - `redirects` int (15) DEFAULT NULL, |
55 | | - PRIMARY KEY (date,project_code,language_code) |
56 | | -) ; |
57 | | - |
58 | | -CREATE TABLE `page_views` ( |
59 | | - `date` date NOT NULL, |
60 | | - `project_code` char (2), |
61 | | - `language_code` char (15), |
62 | | - `views_non_mobile_raw` bigint (15), |
63 | | - `views_mobile_raw` bigint (15), |
64 | | - `views_non_mobile_normalized` bigint (15), |
65 | | - `views_mobile_normalized` bigint (15), |
66 | | - `views_raw` bigint (15), |
67 | | - `views_normalized` bigint (15), |
68 | | - PRIMARY KEY (date,project_code,language_code), |
69 | | - KEY (date,language_code,project_code) |
70 | | -) ; |
71 | | - |
72 | | -CREATE TABLE `language_names` ( |
73 | | - `report_language` varchar (15), |
74 | | - `language_code` varchar (15), |
75 | | - `language_name` varchar (50), |
76 | | - PRIMARY KEY (report_language,language_code) |
77 | | -) ; |
78 | | - |
79 | | -CREATE TABLE `binaries` ( |
80 | | - `date` date NOT NULL, |
81 | | - `project_code` char (2), |
82 | | - `language_code` char (15), |
83 | | - `extension` varchar (10), |
84 | | - `binaries` bigint (15), |
85 | | - PRIMARY KEY (date,project_code,language_code,extension) |
86 | | -) ; |
87 | | - |
88 | | -CREATE TABLE `offline` ( |
89 | | - `date` date NOT NULL, |
90 | | - `source` varchar (255), |
91 | | - `readers` bigint (12), |
92 | | - PRIMARY KEY (date,source) |
93 | | -) ; |
94 | | - |
95 | | - |
96 | | - |
97 | | -LOAD DATA LOCAL INFILE 'analytics_in_comscore.csv' |
98 | | - INTO TABLE comscore |
99 | | - FIELDS TERMINATED BY ',' |
100 | | - OPTIONALLY ENCLOSED BY '"' |
101 | | - (@date,country_code,region_code,web_property,project_code,reach,visitors) |
102 | | - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ; |
103 | | - |
104 | | -LOAD DATA LOCAL INFILE 'analytics_in_comscore_regions.csv' |
105 | | - INTO TABLE comscore_regions |
106 | | - FIELDS TERMINATED BY ',' |
107 | | - OPTIONALLY ENCLOSED BY '"' |
108 | | - (report_language,region_code,region_name) ; |
109 | | - |
110 | | -LOAD DATA LOCAL INFILE 'analytics_in_wikistats.csv' |
111 | | - INTO TABLE wikistats |
112 | | - FIELDS TERMINATED BY ',' |
113 | | - OPTIONALLY ENCLOSED BY '"' |
114 | | - (project_code,language_code,@date,editors_all_time,editors_new,editors_ge_5,editors_ge_25,editors_ge_100,articles,articles_new_per_day,articles_over_bytes_500,articles_over_bytes_2000,edits_per_article,bytes_per_article,edits,size_in_bytes,size_in_words,links_internal,links_interwiki,links_image,links_external,redirects) |
115 | | - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ; |
116 | | - |
117 | | -LOAD DATA LOCAL INFILE 'analytics_in_page_views.csv' |
118 | | - INTO TABLE page_views |
119 | | - FIELDS TERMINATED BY ',' |
120 | | - OPTIONALLY ENCLOSED BY '"' |
121 | | - (project_code,language_code,@date,views_non_mobile_raw,views_mobile_raw,views_non_mobile_normalized,views_mobile_normalized,views_raw,views_normalized) |
122 | | - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ; |
123 | | - |
124 | | - |
125 | | -LOAD DATA LOCAL INFILE 'analytics_in_language_names.csv' |
126 | | - INTO TABLE language_names |
127 | | - FIELDS TERMINATED BY ',' |
128 | | - OPTIONALLY ENCLOSED BY '"' |
129 | | - (report_language,language_code,language_name) ; |
130 | | - |
131 | | -LOAD DATA LOCAL INFILE 'analytics_in_binaries.csv' |
132 | | - INTO TABLE binaries |
133 | | - FIELDS TERMINATED BY ',' |
134 | | - OPTIONALLY ENCLOSED BY '"' |
135 | | - (project_code,language_code,@date,extension,binaries) |
136 | | - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ; |
137 | | - |
138 | | -LOAD DATA LOCAL INFILE 'analytics_in_offline.csv' |
139 | | - INTO TABLE offline |
140 | | - FIELDS TERMINATED BY ',' |
141 | | - OPTIONALLY ENCLOSED BY '"' |
142 | | - (@date,readers) |
143 | | - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ; |
144 | | - |
145 | | - |
146 | | - SELECT * FROM offline ; |
147 | | - |
148 | | - |
Index: trunk/wikistats/analytics/analytics_generate_csv_files.sh |
— | — | @@ -1,47 +0,0 @@ |
2 | | -#!/bin/sh |
3 | | - |
4 | | -# Prepare several csv files, ready for importing into analytics database |
5 | | -# All generated files have _in_ in name signalling these contain data ready for importing into database |
6 | | -# One input record corresponds to one database record |
7 | | - |
8 | | -ulimit -v 8000000 |
9 | | - |
10 | | -clear |
11 | | -cd /a/analytics |
12 | | - |
13 | | - |
14 | | -# AnalyticsPrepBinariesData.pl read counts for binaries which were generated by wikistats |
15 | | -# and which reside in /a/wikistats/csv_[project code]/StatisticsPerBinariesExtension.csv |
16 | | -# It filters and reorganizes data and produces analytics_in_binaries.csv |
17 | | -# Output csv contains: project code, language, month, extension name, count |
18 | | - |
19 | | -perl AnalyticsPrepBinariesData.pl -i /a/wikistats/ -o /a/analytics/ |
20 | | - |
21 | | -# AnalyticsPrepComscoreData.pl scans /a/analytics/comscore for newest comScore csv files (with data for last 14 months) |
22 | | -# parses those csv files, adds/replaces data from these csv files into master files (containing full history) |
23 | | -# and generates input csv file analytics_in_comscore.csv ready for importing into database |
24 | | -# |
25 | | -# note : these csv files were manually downloaded from http://mymetrix.comscore.com/app/report.aspx |
26 | | -# and given more descriptive names, script finds newest files based on partial name search |
27 | | -# |
28 | | -# -r replace (default is add only) |
29 | | -# -i input folder, contains manually downloaded csv files from comScore (or xls files manually converted to csv) |
30 | | -# -m master files with full history |
31 | | -# -o output csv file, with reach per region, UV's per region and UV's per top web property, ready for import into database |
32 | | - |
33 | | -perl AnalyticsPrepComscoreData.pl -r -i /a/analytics/comscore -m /a/analytics -o /a/analytics |
34 | | - |
35 | | -# AnalyticsPrepWikiCountsOutput.pl reads a plethora of fields from several csv files from wikistats process |
36 | | -# It filters and reorganizes data and produces analytics_in_wikistats.csv, ready for import into analytics database |
37 | | - |
38 | | -perl AnalyticsPrepWikiCountsOutput.pl -i /a/wikistats/ -o /a/analytics |
39 | | - |
40 | | -# analytics_in_page_views.csv is written daily as part of WikiCountsSummarizeProjectCounts.pl |
41 | | -# part of (/home/ezachte/pageviews_monthly.sh job) |
42 | | -# which processes hourly projectcounts files (per wiki page view totals for one hour) from http://dammit.lt/wikistats |
43 | | -# and generates several files on different aggregation levels |
44 | | -# only action here is to copy data to this folder to have everything in one place |
45 | | -# note: unlike folder name suggests this file contains stats for all projects |
46 | | - |
47 | | -cp /a/wikistats/csv_wp/analytics_in_page_views.csv . |
48 | | - |
Index: trunk/wikistats/analytics/analytics_upd.sh |
— | — | @@ -1,6 +0,0 @@ |
2 | | -clear |
3 | | -cd /a/analytics |
4 | | - |
5 | | -./analytics_generate_csv_files.sh |
6 | | - |
7 | | -mysql -u analytics -h project2.wikimedia.org -preport < analytics_refresh_from_csv.txt |
Index: trunk/wikistats/analytics/AnalyticsPrepWikiCountsOutput.pl |
— | — | @@ -1,334 +0,0 @@ |
2 | | -#!/usr/local/bin/perl |
3 | | - |
4 | | -# Copyright (C) 2011 Wikimedia Foundation |
5 | | -# This program is free software; you can redistribute it and/or |
6 | | -# modify it under the terms of the GNU General Public License version 2 |
7 | | -# as published by the Free Software Foundation. |
8 | | -# This program is distributed in the hope that it will be useful, |
9 | | -# but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | | -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
11 | | -# See the GNU General Public License for more details, at |
12 | | -# http://www.fsf.org/licenses/gpl.html |
13 | | - |
14 | | -# Author: |
15 | | -# Erik Zachte, email ezachte@wikimedia.org |
16 | | -# loosely based on predecessor |
17 | | -# http://svn.wikimedia.org/viewvc/mediawiki/trunk/wikistats/reportcard/ReportCardExtractWikiCountsOutput.pl |
18 | | - |
19 | | -# Functionality: |
20 | | -# tba |
21 | | - |
22 | | -# Parameters: |
23 | | -# tba |
24 | | - |
25 | | -# Output: |
26 | | -# updated csv file for import in MySQL |
27 | | - |
28 | | -# http://svn.wikimedia.org/viewvc/mediawiki/trunk/wikistats/analytics/ |
29 | | - |
30 | | - use Getopt::Std ; |
31 | | - |
32 | | - $true = 1 ; |
33 | | - $false = 0 ; |
34 | | - |
35 | | - @projects = ('wb','wk','wn','wp','wq','ws','wv','wx','commons','*') ; |
36 | | - |
37 | | - $file_csv_monthly_data = "StatisticsMonthly.csv" ; |
38 | | - $file_csv_user_activity_spread = "StatisticsUserActivitySpread.csv" ; |
39 | | - $file_csv_analytics_in = "analytics_in_wikistats.csv" ; |
40 | | - |
41 | | - &ParseArguments ; |
42 | | - &ReadStatisticsMonthly ; |
43 | | - &FindLargestWikis ; |
44 | | - &WriteMonthlyData ; |
45 | | - |
46 | | - print "\nReady\n\n" ; |
47 | | - exit ; |
48 | | - |
49 | | -sub ParseArguments |
50 | | -{ |
51 | | - print "ParseArguments\n" ; |
52 | | - my (@options, $arguments) ; |
53 | | - |
54 | | - getopt ("io", \%options) ; |
55 | | - |
56 | | - foreach $arg (sort keys %options) |
57 | | - { $arguments .= " -$arg " . $options {$arg} . "\n" ; } |
58 | | - print ("\nArguments\n$arguments\n") ; |
59 | | - |
60 | | - if (! -d '/mnt/') # EZ test |
61 | | - { |
62 | | - $path_in = "c:/\@ wikimedia/# out bayes" ; |
63 | | - $path_out = "c:/MySQL/analytics" ; |
64 | | - } |
65 | | - else |
66 | | - { |
67 | | - die ("Specify input folder for projectcounts files as: -i path") if (! defined ($options {"i"})) ; |
68 | | - die ("Specify output folder as: -o path'") if (! defined ($options {"o"})) ; |
69 | | - |
70 | | - $path_in = $options {"i"} ; |
71 | | - $path_out = $options {"o"} ; |
72 | | - } |
73 | | - |
74 | | - die "Input folder '$path_in' does not exist" if (! -d $path_in) ; |
75 | | - die "Output folder '$path_out' does not exist" if (! -d $path_out) ; |
76 | | - |
77 | | - print "Input folder: $path_in\n" ; |
78 | | - print "Output folder: $path_out\n\n" ; |
79 | | - |
80 | | - $file_csv_out = "$path_out/analytics_in_wikistats.csv" ; |
81 | | -} |
82 | | - |
83 | | -sub ReadStatisticsMonthly |
84 | | -{ |
85 | | - print "ReadStatisticsMonthly\n" ; |
86 | | - &ReadStatisticsMonthlyForProject ("wb") ; |
87 | | - &ReadStatisticsMonthlyForProject ("wk") ; |
88 | | - &ReadStatisticsMonthlyForProject ("wn") ; |
89 | | - &ReadStatisticsMonthlyForProject ("wp") ; |
90 | | - &ReadStatisticsMonthlyForProject ("wq") ; |
91 | | - &ReadStatisticsMonthlyForProject ("ws") ; |
92 | | - &ReadStatisticsMonthlyForProject ("wv") ; |
93 | | - &ReadStatisticsMonthlyForProject ("wx") ; |
94 | | - |
95 | | -# &ReadStatisticsPerBinariesExtensionCommons ; |
96 | | -} |
97 | | - |
98 | | -sub ReadStatisticsMonthlyForProject |
99 | | -{ |
100 | | - my $project = shift; |
101 | | - $all_projects = "*" ; |
102 | | - |
103 | | - my $file_csv_in_1 = "$path_in/csv_$project/$file_csv_monthly_data" ; |
104 | | - my $file_csv_in_2 = "$path_in/csv_$project/$file_csv_user_activity_spread" ; |
105 | | - |
106 | | - if (! -e $file_csv_in_1) |
107 | | - { &Abort ("Input file '$file_csv_in_1' not found") ; } |
108 | | - if (! -e $file_csv_in_2) |
109 | | - { &Abort ("Input file '$file_csv_in_2' not found") ; } |
110 | | - |
111 | | - my $yyyymm ; |
112 | | - |
113 | | - print "Read '$file_csv_in_1'\n" ; |
114 | | - open CSV_IN, '<', $file_csv_in_1 ; |
115 | | - while ($line = <CSV_IN>) |
116 | | - { |
117 | | - chomp $line ; |
118 | | - ($language,$date,$counts) = split (',', $line, 3) ; |
119 | | - @fields = split (',', $counts) ; |
120 | | - |
121 | | - next if ! &AcceptWiki ($project,$language) ; |
122 | | - |
123 | | - ($month,$day,$year) = split ('\/', $date) ; |
124 | | - $yyyymm = sprintf ("%04d-%02d", $year, $month) ; |
125 | | - |
126 | | - foreach $field (@fields) |
127 | | - { |
128 | | - if ($field eq '-') |
129 | | - { $field = 0 ; } |
130 | | - } |
131 | | - |
132 | | - $data = $fields [0] . ',' . # contributors all time |
133 | | - $fields [1] . ',' . # new contributors |
134 | | - 'data2,' . # place holder for more data, to be inserted later |
135 | | - $fields [4] . ',' . # articles |
136 | | - $fields [6] . ',' . # articles new per day |
137 | | - $fields [9] . ',' . # larger than 0.5 kB |
138 | | - $fields [10] . ',' . # larger than 2.0 kB |
139 | | - $fields [7] . ',' . # mean edits per article |
140 | | - $fields [8] . ',' . # mean bytes per article |
141 | | - $fields [11] . ',' . # edits |
142 | | - $fields [12] . ',' . # size in bytes |
143 | | - $fields [13] . ',' . # size in words |
144 | | - $fields [14] . ',' . # links internal |
145 | | - $fields [15] . ',' . # links interwiki |
146 | | - $fields [16] . ',' . # links images |
147 | | - $fields [17] . ',' . # links external |
148 | | - $fields [18] ; # redirects |
149 | | - |
150 | | - $data1 {"$project,$language,$yyyymm"} = $data ; |
151 | | - } |
152 | | - close CSV_IN ; |
153 | | - |
154 | | - # now read (very) active editors from newer more accurate file (split data for reg users and bots, unlike StatisticsMonthly.csv) |
155 | | - |
156 | | - print "Read '$file_csv_in_2'\n" ; |
157 | | - open CSV_IN, '<', $file_csv_in_2 ; |
158 | | - while ($line = <CSV_IN>) |
159 | | - { |
160 | | - chomp $line ; |
161 | | - ($language,$date,$reguser_bot,$group,@counts) = split (',', $line) ; |
162 | | - |
163 | | - next if ! &AcceptWiki ($project,$language) ; |
164 | | - |
165 | | - if ($reguser_bot ne "R") { next ; } # R: reg user, B: bot |
166 | | - if ($group ne "A") { next ; } # A: articles, T: talk pages, O: other namespaces |
167 | | - |
168 | | - ($month,$day,$year) = split ('\/', $date) ; |
169 | | - $yyyymm = sprintf ("%04d-%02d", $year, $month) ; |
170 | | - $months {$yyyymm} ++ ; |
171 | | -# print "YYYYMM $yyyymm\n" ; |
172 | | - |
173 | | - # data have been collected in WikiCountsProcess.pm and been written in WikiCountsOutput.pm |
174 | | - # count user with over x edits |
175 | | - # threshold starting with a 3 are 10xSQRT(10), 100xSQRT(10), 1000xSQRT(10), etc |
176 | | - # @thresholds = (1,3,5,10,25,32,50,100,250,316,500,1000,2500,3162,5000,10000,25000,31623,50000,100000,250000,316228,500000,1000000,2500000,3162278,500000,10000000,25000000,31622777,5000000,100000000) ; |
177 | | - $edits_ge_5 = @counts [2] > 0 ? @counts [2] : 0 ; |
178 | | - $edits_ge_25 = @counts [4] > 0 ? @counts [4] : 0 ; |
179 | | - $edits_ge_100 = @counts [7] > 0 ? @counts [7] : 0 ; |
180 | | - $data2 {"$project,$language,$yyyymm"} = "$edits_ge_5,$edits_ge_25,$edits_ge_100" ; |
181 | | - |
182 | | - $total_edits_ge_5 {"$project,$language"} += $edits_ge_5 ; |
183 | | - $total_edits_ge_25 {"$project,$language"} += $edits_ge_25 ; |
184 | | - $total_edits_ge_100 {"$project,$language"} += $edits_ge_100 ; |
185 | | - |
186 | | - # prep string with right amount of comma's |
187 | | - if ($data2_default eq '') |
188 | | - { |
189 | | - $data2_default = $data2 {"$project,$language,$yyyymm"} ; |
190 | | - $data2_default =~ s/[^,]+/0/g ; |
191 | | - } |
192 | | - } |
193 | | - close CSV_IN ; |
194 | | -} |
195 | | - |
196 | | -#sub ReadStatisticsPerBinariesExtensionCommons |
197 | | -#{ |
198 | | -# my $file_csv_in = "$path_in/csv_wx/StatisticsPerBinariesExtension.csv" ; |
199 | | -# my $mmax = -1 ; |
200 | | - |
201 | | -# if (! -e $file_csv_in) |
202 | | -# { &Abort ("Input file '$file_csv_in' not found") ; } |
203 | | - |
204 | | -# print "Read '$file_csv_in'\n" ; |
205 | | -# open CSV_IN, '<', $file_csv_in ; |
206 | | -# while ($line = <CSV_IN>) |
207 | | -# { |
208 | | -# chomp $line ; |
209 | | -# ($language,$date,$counts) = split (',', $line, 3) ; |
210 | | - |
211 | | -# if ($language ne "commons") { next ; } |
212 | | - |
213 | | -# if ($date eq "00/0000") |
214 | | -# { |
215 | | -# @fields = split (',', $counts) ; |
216 | | -# $field_ndx = 0 ; |
217 | | -# foreach $field (@fields) |
218 | | -# { |
219 | | -# $ext_cnt {-1}{$field_ndx} = $field ; |
220 | | -# # print "EXT_CNT $field_ndx : $field\n" ; |
221 | | -# $field_ndx ++ ; |
222 | | -# } |
223 | | -# next ; |
224 | | -# } |
225 | | - |
226 | | -# ($month,$year) = split ('\/', $date) ; |
227 | | -# my $m = &months_since_2000_01 ($year,$month) ; |
228 | | -# next if $m < $m_start ; |
229 | | - |
230 | | -# if ($m > $mmax) |
231 | | -# { $mmax = $m ; } |
232 | | - |
233 | | -# @fields = split (',', $counts) ; |
234 | | -# $field_ndx = 0 ; |
235 | | -# foreach $field (@fields) |
236 | | -# { |
237 | | -# $ext_cnt {$m}{$field_ndx} = $field ; |
238 | | -# $ext_tot {$m} += $field ; |
239 | | -# $field_ndx ++ ; |
240 | | -# } |
241 | | -# } |
242 | | -# close CSV_IN ; |
243 | | - |
244 | | -# %ext_cnt_mmax = %{$ext_cnt {$mmax}} ; |
245 | | -# @ext_cnt_mmax = (sort {$ext_cnt_mmax {$b} <=> $ext_cnt_mmax {$a}} keys %ext_cnt_mmax) ; |
246 | | - |
247 | | -# $extcnt = 0 ; |
248 | | -# foreach $extndx (@ext_cnt_mmax) |
249 | | -# { |
250 | | -# # print "$extndx < ${ext_cnt {-1}{$extndx}} > : ${ext_cnt_mmax {$extndx}}\n" ; |
251 | | -# push @extndxs, $extndx ; |
252 | | -# if ($extcnt++ >= 9) { last ; } |
253 | | -# } |
254 | | -#} |
255 | | - |
256 | | -sub FindLargestWikis |
257 | | -{ |
258 | | - print "FindLargestWikis\n" ; |
259 | | - print "Largest projects (most accumulated very active editors):\n"; |
260 | | - @total_edits_ge_100 = sort {$total_edits_ge_100 {$b} <=> $total_edits_ge_100 {$a}} keys %total_edits_ge_100 ; |
261 | | - $rank = 0 ; |
262 | | - foreach $project_language (@total_edits_ge_100) |
263 | | - { |
264 | | - $largest_projects {$project_language} = $rank++ ; |
265 | | - print "$project_language," ; |
266 | | - last if $rank > 10 ; |
267 | | - } |
268 | | - print "\n\n" ; |
269 | | - |
270 | | - foreach $yyyymm (sort keys %months) |
271 | | - { |
272 | | - next if $yyyymm lt '2011' ; |
273 | | - foreach $project_language (keys %largest_projects) |
274 | | - { |
275 | | - ($project,$language) = split (',', $project_language) ; |
276 | | - if ($data2 {"$project,$language,$yyyymm"} eq '') |
277 | | - { |
278 | | - print "No data yet for large wiki $project_language for $yyyymm-> skip month $yyyymm\n" ; |
279 | | - $months {$yyyymm} = 0 ; |
280 | | - } |
281 | | - } |
282 | | - } |
283 | | -} |
284 | | - |
285 | | -sub WriteMonthlyData |
286 | | -{ |
287 | | - print "WriteMonthlyData\n" ; |
288 | | - my $file_csv_out = "$path_out/$file_csv_analytics_in" ; |
289 | | - open CSV_OUT, '>', $file_csv_out ; |
290 | | - foreach $project_wiki_month (sort keys %data1) |
291 | | - { |
292 | | - ($project,$wiki,$yyyymm) = split (',', $project_wiki_month) ; |
293 | | - |
294 | | - # recent month misses on eor more large wikis? |
295 | | - next if $months {$yyyymm} == 0 ; |
296 | | - |
297 | | - $data1 = $data1 {$project_wiki_month} ; |
298 | | - $data2 = $data2 {$project_wiki_month} ; |
299 | | - if ($data2 eq '') |
300 | | - { |
301 | | - print "Editor data missing for $project_wiki_month\n" ; |
302 | | - $data2 = $data2_default ; |
303 | | - } |
304 | | - $data1 =~ s/data2/$data2/ ; # insert rather than append to have all editor fields close together |
305 | | - print CSV_OUT "$project_wiki_month,$data1\n" ; |
306 | | - } |
307 | | - $total_edits_ge_5 {"$project,*,$yyyymm"} += $edits_ge_5 ; |
308 | | - $total_edits_ge_25 {"$project,*,$yyyymm"} += $edits_ge_25 ; |
309 | | - $total_edits_ge_100 {"$project,*,$yyyymm"} += $edits_ge_100 ; |
310 | | - close CSV_OUT ; |
311 | | -} |
312 | | - |
313 | | -sub AcceptWiki |
314 | | -{ |
315 | | - my ($project,$language) = @_ ; |
316 | | - |
317 | | - return $false if $language eq 'commons' and $project ne 'wx' ; # commons also in wikipedia csv files (bug, hard to cleanup, just skip) |
318 | | - return $false if $language eq 'sr' and $project eq 'wn' ; # ignore insane bot spam on |
319 | | - return $false if $language =~ /mania|team|comcom|closed|chair|langcom|office|searchcom|sep11|nostalgia|stats|test/i ; |
320 | | - |
321 | | - return $false if $language =~ /^(?:dk|tlh|ru_sib)$/ ; # dk=dumps exist(ed?) but site not, tlh=Klignon, ru-sib=Siberian |
322 | | - return $false if $project eq 'wk' and ($language eq "als" or $language eq "tlh") ; |
323 | | - |
324 | | - return $true ; |
325 | | -} |
326 | | - |
327 | | -sub Abort |
328 | | -{ |
329 | | - my $msg = shift ; |
330 | | - print "$msg\nExecution aborted." ; |
331 | | - # to do: log also to file |
332 | | - exit ; |
333 | | -} |
334 | | - |
335 | | - |
Index: trunk/wikistats/analytics/analytics_new.sh |
— | — | @@ -1,5 +0,0 @@ |
2 | | -clear |
3 | | -cd /a/analytics |
4 | | -# rm *test*.csv |
5 | | -# mysql --user=root --password=changerootwhenpriyankareturns < analytics_create_and_load_from_csv.txt > mysql_log.txt |
6 | | - mysql -u analytics -h project2.wikimedia.org -preport < analytics_create_and_load_from_csv.txt > mysql_log.txt |
Index: trunk/wikistats/analytics/_readme.txt |
— | — | @@ -1,21 +0,0 @@ |
2 | | -This folder contain bash and perl files to create and fill database 'analytics', a.o. for monthly dashboard. |
3 | | - |
4 | | -== analytics_new.sh == |
5 | | -Defines the database and tables and loads data from existing csv files. |
6 | | -It executes SQL from analytics_create_and_load_from_csv.txt |
7 | | - |
8 | | -== analytics_upd.sh == |
9 | | -Prepares new csv files (delegated to analytics_generate_csv_files.sh), |
10 | | -and empties/reloads all tables for which csv files are in this folder. |
11 | | -It executes SQL from analytics_refresh_from_csv.txt |
12 | | - |
13 | | -== CSV files == |
14 | | -CSV files and where they are generated: |
15 | | - |
16 | | -analytics_in_binaries.csv <- AnalyticsPrepBinariesData.pl |
17 | | -analytics_in_comscore.csv <- AnalyticsPrepComscoreData.pl |
18 | | -analytics_in_comscore_regions.csv <- manual |
19 | | -analytics_in_language_names.csv |
20 | | -analytics_in_offline.csv <- manual |
21 | | -analytics_in_page_views.csv <- /home/ezachte/wikistats/pageviews_monthly.sh (file copy after that) |
22 | | -analytics_in_wikistats.csv <- AnalyticsPrepWikiCountsOutput.pl |
Index: trunk/wikistats/analytics/AnalyticsPrepComscoreData.pl |
— | — | @@ -1,564 +0,0 @@ |
2 | | -#!/usr/bin/perl |
3 | | - |
4 | | -# Copyright (C) 2011 Wikimedia Foundation |
5 | | -# This program is free software; you can redistribute it and/or |
6 | | -# modify it under the terms of the GNU General Public License version 2 |
7 | | -# as published by the Free Software Foundation. |
8 | | -# This program is distributed in the hope that it will be useful, |
9 | | -# but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | | -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
11 | | -# See the GNU General Public License for more details, at |
12 | | -# http://www.fsf.org/licenses/gpl.html |
13 | | - |
14 | | -# Author: |
15 | | -# Erik Zachte, email ezachte@wikimedia.org |
16 | | - |
17 | | -# Functionality: |
18 | | -# comScore data can be downloaded as csv file, which each contain 14 months history |
19 | | -# This script uses these files to update 'master' csv files which contain all known history |
20 | | -# Note: only entities which are already in master file will be updated! |
21 | | -# Then it merges these master files into one csv file which can be loaded into analytics database |
22 | | -# Data are: reach by region, unique visitors by region, unique visitors by web property |
23 | | - |
24 | | -# Parameters: |
25 | | -# -m (required) folder with 'master' csv files (files with all known history) |
26 | | -# -u (required) folder with 'update' csv files (files with lastest 14 months history, produced by comScore) |
27 | | - |
28 | | -# Output: |
29 | | -# updated master csv files + merged and formatted csv for import in MySQL |
30 | | - |
31 | | -# http://svn.wikimedia.org/viewvc/mediawiki/trunk/wikistats/analytics/ |
32 | | - |
33 | | - use Getopt::Std ; |
34 | | - use Cwd; |
35 | | - |
36 | | - my $options ; |
37 | | - getopt ("imo", \%options) ; |
38 | | - |
39 | | - $true = 1 ; |
40 | | - $false = 0 ; |
41 | | - |
42 | | - $script_name = "AnalyticsPrepComscoreData.pl" ; |
43 | | - $script_version = "0.31" ; |
44 | | - |
45 | | -# EZ test only |
46 | | -# $source = "comscore" ; |
47 | | -# $server = "ez_test" ; |
48 | | -# $generated = "2011-05-06 00:00:00" ; |
49 | | -# $user = "ezachte" ; |
50 | | - |
51 | | - $dir_in = $options {"i"} ; |
52 | | - $dir_upd = $options {"m"} ; |
53 | | - $dir_out = $options {"o"} ; |
54 | | - $mode = 'add'; |
55 | | - if (defined $options {'r'}) |
56 | | - { $mode = 'replace'; } |
57 | | - |
58 | | - print "Mode is $mode (specify '-r' for replace)\n\n"; |
59 | | - |
60 | | - if (! -d "/home/") # EZ test machine |
61 | | - { |
62 | | - $dir_in = "C:/@ Wikimedia/@ Report Card/Data" ; |
63 | | - $dir_upd = "C:/MySQL/analytics" ; |
64 | | - $dir_out = "C:/MySQL/analytics" ; |
65 | | - $mode = 'replace' ; |
66 | | - } |
67 | | - |
68 | | - if ($dir_in eq '') |
69 | | - { Abort ("Specify folder for input file (new comScore data) '-i folder'") ; } |
70 | | - if ($dir_upd eq '') |
71 | | - { Abort ("Specify folder for master files (full history) as '-m folder'") ; } |
72 | | - if ($dir_out eq '') |
73 | | - { Abort ("Specify folder for output file '-o folder'") ; } |
74 | | - |
75 | | - $file_comscore_reach_master = "history_comscore_reach_regions.csv" ; |
76 | | - $file_comscore_reach_update = "*reach*by*region*csv" ; |
77 | | - $file_comscore_uv_region_master = "history_comscore_UV_regions.csv" ; |
78 | | - $file_comscore_uv_region_update = "*UVs*by*region*csv" ; |
79 | | - $file_comscore_uv_property_master = "history_comscore_UV_properties.csv" ; |
80 | | - $file_comscore_uv_property_update = "*UV*trend*csv" ; |
81 | | - |
82 | | - $layout_csv_reach = 1 ; |
83 | | - $layout_csv_regions = 2 ; |
84 | | - $layout_csv_properties = 3 ; |
85 | | - |
86 | | - print "Directories:\n" . |
87 | | - "Input (new comScore data): '$dir_in'\n". |
88 | | - "Master files (full history): '$dir_upd'\n" . |
89 | | - "Output (database feed): '$dir_out'\n\n" ; |
90 | | - |
91 | | - %region_codes = ( |
92 | | - "Europe"=>"EU", |
93 | | - "North America"=>"NA", |
94 | | - "Latin America"=>"LA", |
95 | | - "World-Wide" => "W", |
96 | | - "Middle East - Africa" => "MA", |
97 | | - "Asia Pacific"=> "AS", |
98 | | - "United States" => "US", |
99 | | - "India" => "I", |
100 | | - "China" => "C" |
101 | | - ) ; |
102 | | - |
103 | | - foreach $region_name (keys %region_codes) |
104 | | - { $region_names {$region_codes {$region_name}} = $region_name ; } |
105 | | - |
106 | | - @months_short = qw "Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec" ; |
107 | | - |
108 | | - &ReadMasterComscoreDataReachPerRegion ($file_comscore_reach_master, $file_comscore_reach_update, "%.1f", 1, $layout_csv_reach) ; |
109 | | - %reach_region_code = %data ; |
110 | | - |
111 | | - &ReadMasterComscoreDataVisitorsPerRegion ($file_comscore_uv_region_master, $file_comscore_uv_region_update, "%.0f", 1000, $layout_csv_regions) ; |
112 | | - %visitors_region_code = %data ; |
113 | | - |
114 | | - &ReadMasterComscoreDataVisitorsPerProperty ($file_comscore_uv_property_master, $file_comscore_uv_property_update, "%.0f", 1000, $layout_csv_properties) ; |
115 | | - %visitors_web_property = %data ; |
116 | | - |
117 | | - &WriteDataAnalytics ; |
118 | | - |
119 | | - print "\nReady\n\n" ; |
120 | | - exit ; |
121 | | - |
122 | | -sub ReadMasterComscoreDataReachPerRegion |
123 | | -{ |
124 | | - my ($file_comscore_master, $file_comscore_updates, $precision, $layout_csv) = @_ ; |
125 | | - |
126 | | - print "ReadMasterComscoreDataReachPerRegion\n\n" ; |
127 | | - |
128 | | - undef %months ; |
129 | | - undef %data ; |
130 | | - undef @regions ; |
131 | | - |
132 | | - open IN, '<', "$dir_upd/$file_comscore_master" ; |
133 | | - |
134 | | - $lines = 0 ; |
135 | | - while ($line = <IN>) |
136 | | - { |
137 | | - chomp $line ; |
138 | | - $line =~ s/\r//g ; |
139 | | - |
140 | | - ($yyyymm,@data) = split (',', $line) ; |
141 | | - |
142 | | - if ($lines++ == 0) |
143 | | - { |
144 | | - @regions = @data ; |
145 | | - print "Regions found: " . (join ',', @regions) . "\n"; |
146 | | - next ; |
147 | | - } |
148 | | - |
149 | | - $field_ndx = 0 ; |
150 | | - foreach (@data) |
151 | | - { |
152 | | - $region = $regions [$field_ndx] ; |
153 | | - $region_code = $region_codes {$region} ; |
154 | | - |
155 | | - $data = $data [$field_ndx] ; |
156 | | - if ($data eq '') |
157 | | - { $data = '0' ; } |
158 | | - $months {$yyyymm} ++ ; |
159 | | - $data {"$yyyymm,$region_code"} = $data ; |
160 | | - # print "Old data $yyyymm,$region_code = $data\n" ; |
161 | | - $field_ndx++ ; |
162 | | - } |
163 | | - } |
164 | | - close IN ; |
165 | | - |
166 | | - my $updates_found = &UpdateMasterFileFromRecentComscoreData ($file_comscore_master, $file_comscore_updates, 1, $layout_csv, @regions) ; |
167 | | - return if ! $updates_found ; |
168 | | - |
169 | | - rename "$dir_upd/$file_comscore_master", "$dir_upd/$file_comscore_master.~" ; |
170 | | - open OUT, '>', "$dir_upd/$file_comscore_master" ; |
171 | | - |
172 | | - $line_out = "yyyymm" ; |
173 | | - foreach $region_name (@regions) |
174 | | - { $line_out .= ",$region_name" ; } |
175 | | - print OUT "$line_out" ; |
176 | | - |
177 | | - foreach $yyyymm (sort {$b cmp $a} keys %months) |
178 | | - { |
179 | | - $line_out = "\n$yyyymm" ; |
180 | | - foreach $region_name (@regions) |
181 | | - { |
182 | | - $yyyymm_region_code = $yyyymm . ',' . $region_codes {$region_name} ; |
183 | | - $line_out .= "," . sprintf ($precision, $data {$yyyymm_region_code}) ; |
184 | | - } |
185 | | - print OUT "$line_out" ; |
186 | | - } |
187 | | - |
188 | | - close OUT ; |
189 | | -} |
190 | | - |
191 | | -sub ReadMasterComscoreDataVisitorsPerRegion |
192 | | -{ |
193 | | - my ($file_comscore_master, $file_comscore_updates, $precision, $multiplier, $layout_csv) = @_ ; |
194 | | - |
195 | | - print "ReadMasterComscoreDataVisitorsPerRegion\n\n"; |
196 | | - |
197 | | - undef %months ; |
198 | | - undef %data ; |
199 | | - undef @regions ; |
200 | | - |
201 | | - open IN, '<', "$dir_upd/$file_comscore_master" ; |
202 | | - |
203 | | - $lines = 0 ; |
204 | | - $metric = 'unique_visitors' ; |
205 | | - while ($line = <IN>) |
206 | | - { |
207 | | - chomp $line ; |
208 | | - $line =~ s/\r//g ; |
209 | | - $line = &GetNumberOnly ($line) ; |
210 | | - |
211 | | - next if $line !~ /(?:yyyymm|\d\d\d\d-\d\d)/ ; |
212 | | - |
213 | | - ($yyyymm,@data) = split (',', $line) ; |
214 | | - |
215 | | - if ($lines++ == 0) |
216 | | - { |
217 | | - @regions = @data ; |
218 | | - print "Regions found: " . (join ',', @regions) . "\n"; |
219 | | - next ; |
220 | | - } |
221 | | - |
222 | | - $field_ndx = 0 ; |
223 | | - foreach (@data) |
224 | | - { |
225 | | - $region = $regions [$field_ndx] ; |
226 | | - $region_code = $region_codes {$region} ; |
227 | | - |
228 | | - $data = $data [$field_ndx] ; |
229 | | - if ($data eq '') |
230 | | - { $data = '0' ; } |
231 | | - |
232 | | - # print "Old data $yyyymm,$region = $data\n" ; |
233 | | - |
234 | | - $months {$yyyymm} ++ ; |
235 | | - $data {"$yyyymm,$region_code"} = $data ; |
236 | | - |
237 | | - $field_ndx++ ; |
238 | | - } |
239 | | - } |
240 | | - close IN ; |
241 | | - |
242 | | - my $updates_found = &UpdateMasterFileFromRecentComscoreData ($file_comscore_master, $file_comscore_updates, 1000, $layout_csv, @regions) ; |
243 | | - return if ! $updates_found ; |
244 | | - |
245 | | - rename "$dir_upd/$file_comscore_master", "$dir_upd/$file_comscore_master.~" ; |
246 | | - open OUT, '>', "$dir_upd/$file_comscore_master" ; |
247 | | - |
248 | | - $line_out = "yyyymm" ; |
249 | | - foreach $region_name (@regions) |
250 | | - { $line_out .= ",$region_name" ; } |
251 | | - print OUT "$line_out" ; |
252 | | - |
253 | | - foreach $yyyymm (sort {$b cmp $a} keys %months) |
254 | | - { |
255 | | - $line_out = "\n$yyyymm" ; |
256 | | - foreach $region_name (@regions) |
257 | | - { |
258 | | - $yyyymm_region_code = $yyyymm . ',' . $region_codes {$region_name} ; |
259 | | - $line_out .= "," . sprintf ($precision, $data {$yyyymm_region_code}) ; |
260 | | - } |
261 | | - print OUT "$line_out" ; |
262 | | - } |
263 | | - |
264 | | - close OUT ; |
265 | | -} |
266 | | - |
267 | | -sub ReadMasterComscoreDataVisitorsPerProperty |
268 | | -{ |
269 | | - my ($file_comscore_master, $file_comscore_updates, $precision, $multiplier, $layout_csv) = @_ ; |
270 | | - |
271 | | - print "ReadMasterComscoreDataVisitorsPerProperty\n\n"; |
272 | | - |
273 | | - undef %months ; |
274 | | - undef %data ; |
275 | | - undef @properties ; |
276 | | - |
277 | | - open IN, '<', "$dir_upd/$file_comscore_master" ; |
278 | | - |
279 | | - $lines = 0 ; |
280 | | - $metric = 'unique_visitors' ; |
281 | | - while ($line = <IN>) |
282 | | - { |
283 | | - chomp $line ; |
284 | | - $line =~ s/\r//g ; |
285 | | - |
286 | | - ($yyyymm,@data) = split (',', $line) ; |
287 | | - if ($lines++ == 0) |
288 | | - { @properties = @data ; next ; } |
289 | | - |
290 | | - $field_ndx = 0 ; |
291 | | - foreach (@data) |
292 | | - { |
293 | | - $property = $properties [$field_ndx] ; |
294 | | - $property =~ s/.*Yahoo.*/Yahoo/ ; |
295 | | - $data = $data [$field_ndx] ; |
296 | | - if ($data eq '') |
297 | | - { $data = '0' ; } |
298 | | - |
299 | | - # print "Old data $yyyymm,$property = $data\n" ; |
300 | | - |
301 | | - $months {$yyyymm} ++ ; |
302 | | - $data {"$yyyymm,$property"} = $data ; |
303 | | - |
304 | | - $field_ndx++ ; |
305 | | - } |
306 | | - } |
307 | | - close IN ; |
308 | | - |
309 | | - my $updates_found = &UpdateMasterFileFromRecentComscoreData ($file_comscore_master, $file_comscore_updates, 1000, $layout_csv, @properties) ; |
310 | | - return if ! $updates_found ; |
311 | | - |
312 | | - rename "$dir_upd/$file_comscore_master", "$dir_upd/$file_comscore_master.~" ; |
313 | | - open OUT, '>', "$dir_upd/$file_comscore_master" ; |
314 | | - |
315 | | - $line_out = "yyyymm" ; |
316 | | - foreach $property (@properties) |
317 | | - { $line_out .= ",$property" ; } |
318 | | - print OUT "$line_out" ; |
319 | | - |
320 | | - foreach $yyyymm (sort {$b cmp $a} keys %months) |
321 | | - { |
322 | | - $line_out = "\n$yyyymm" ; |
323 | | - foreach $property (@properties) |
324 | | - { |
325 | | - $yyyymm_property = "$yyyymm,$property" ; |
326 | | - $line_out .= "," . sprintf ($precision, $data {$yyyymm_property}) ; |
327 | | - } |
328 | | - print OUT "$line_out" ; |
329 | | - } |
330 | | - |
331 | | - close OUT ; |
332 | | -} |
333 | | - |
334 | | -sub UpdateMasterFileFromRecentComscoreData |
335 | | -{ |
336 | | - my ($file_comscore_master, $file_comscore_updates, $multiplier, $layout_csv, @white_list) = @_ ; |
337 | | - |
338 | | - print "UpdateMasterFileFromRecentComscoreData\n\n"; |
339 | | - |
340 | | - undef %white_list ; |
341 | | - undef %not_white_listed ; |
342 | | - |
343 | | - print "White list: ". (join (',', @white_list)) . "\n\n"; |
344 | | - |
345 | | - foreach $id (@white_list) |
346 | | - { $white_list {$id} = $true ; } |
347 | | - |
348 | | - if (! -e "$dir_upd/$file_comscore_master") |
349 | | - { Abort ("File $file_comscore_master not found!") ; } |
350 | | - |
351 | | - $age_all = -M "$dir_upd/$file_comscore_master" ; |
352 | | - print "Latest comscore master file is " . sprintf ("%.0f", $age_all) . " days old: '$file_comscore_master'\n" ; |
353 | | - |
354 | | - my $cwd = getcwd ; |
355 | | - chdir $dir_in ; |
356 | | - |
357 | | - @files = glob($file_comscore_updates) ; |
358 | | - $min_age_upd = 999999 ; |
359 | | - $file_comscore_updates_latest = '' ; |
360 | | - foreach $file (@files) |
361 | | - { |
362 | | - $age = -M $file ; |
363 | | - if ($age < $min_age_upd) |
364 | | - { |
365 | | - $min_age_upd = $age ; |
366 | | - $file_comscore_updates_latest = $file ; |
367 | | - } |
368 | | - } |
369 | | - print "Latest comscore update file is " . sprintf ("%.0f", $min_age_upd) . " days old: '$file_comscore_updates_latest'\n" ; |
370 | | - |
371 | | - if ($min_age_upd == 999999) |
372 | | - { |
373 | | - print "No valid update file found. Nothing to update." ; |
374 | | - return ; |
375 | | - } |
376 | | - |
377 | | - #if ($age_all > $min_age_upd) |
378 | | - #{ |
379 | | - # print "File with master data more recent than latest update csv from comScore. Nothing to update." ; |
380 | | - # return ; |
381 | | - #} |
382 | | - |
383 | | - my $updates_found = $false ; |
384 | | - |
385 | | - open CSV, '<', $file_comscore_updates_latest ; |
386 | | - binmode CSV ; |
387 | | - while ($line = <CSV>) |
388 | | - { |
389 | | - chomp $line ; |
390 | | - $line =~ s/\r//g ; |
391 | | - $line = &GetNumberOnly ($line) ; |
392 | | - |
393 | | - if ($line =~ /Jan-\d\d\d\d.*?Feb-\d\d\d\d/) # e.g. 'Location,Location,Jan-2010,Feb-2010,Mar-2010,Apr-2010,...' |
394 | | - { |
395 | | - if ($layout_csv == $layout_csv_properties) |
396 | | - { ($dummy1,$dummy2,$dummy3,@months) = split (',', $line) ; } # web properties csv file |
397 | | - else |
398 | | - { ($dummy1,$dummy2,@months) = split (',', $line) ; } # uv / reach csv files |
399 | | - |
400 | | - @months = &mmm_yyyy2yyyy_mm (@months) ; |
401 | | - } |
402 | | - |
403 | | - if (($line =~ /^\d+,/) || ($line =~ /,,.*?Total Internet/)) |
404 | | - { |
405 | | - if ($layout_csv == $layout_csv_properties) |
406 | | - { |
407 | | - ($index,$dummy,$property,@data) = split (',', $line) ; |
408 | | - $property =~ s/^\s+// ; |
409 | | - $property =~ s/\s+$// ; |
410 | | - |
411 | | - $property =~ s/.*Total Internet.*/Total Internet/i ; |
412 | | - $property =~ s/.*Google.*/Google/i ; |
413 | | - $property =~ s/.*Microsoft.*/Microsoft/i ; |
414 | | - $property =~ s/.*FACEBOOK.*/Facebook/i ; |
415 | | - $property =~ s/.*Yahoo.*/Yahoo/i ; |
416 | | - $property =~ s/.*Amazon.*/Amazon/i ; |
417 | | - $property =~ s/.*Apple.*/Apple/i ; |
418 | | - $property =~ s/.*AOL.*/AOL/i ; |
419 | | - $property =~ s/.*Wikimedia.*/Wikimedia/i ; |
420 | | - $property =~ s/.*Tencent.*/Tencent/i ; |
421 | | - $property =~ s/.*Baidu.*/Baidu/i ; |
422 | | - $property =~ s/.*CBS.*/CBS/i ; |
423 | | - |
424 | | - if (! $white_list {$property}) |
425 | | - { |
426 | | - $not_white_listed {$property}++ ; |
427 | | - next ; |
428 | | - } |
429 | | - |
430 | | - $id = $property ; |
431 | | - } |
432 | | - else |
433 | | - { |
434 | | - ($index,$region,@data) = split (',', $line) ; |
435 | | - $region =~ s/^\s+// ; |
436 | | - $region =~ s/\s+$// ; |
437 | | - |
438 | | - if (! $white_list {$region}) |
439 | | - { |
440 | | - $not_white_listed {$region}++ ; |
441 | | - next ; |
442 | | - } |
443 | | - |
444 | | - $id = $region_codes {$region} ; |
445 | | - } |
446 | | - |
447 | | - for ($m = 0 ; $m <= $#months ; $m++) |
448 | | - { |
449 | | - $yyyymm = $months [$m] ; |
450 | | - $months {$yyyymm} ++ ; |
451 | | - $yyyymm_id = "$yyyymm,$id" ; |
452 | | - $data = $data [$m] * $multiplier ; |
453 | | - |
454 | | - if ($mode eq 'add') |
455 | | - { |
456 | | - if (! defined $data {$yyyymm_id}) |
457 | | - { |
458 | | - $updates_found = $true ; |
459 | | - print "New data found: $yyyymm_id = $data\n" ; |
460 | | - $data {$yyyymm_id} = $data ; |
461 | | - } |
462 | | - } |
463 | | - else |
464 | | - { |
465 | | - $updates_found = $true ; |
466 | | - print "Data found: $yyyymm_id = $data\n" ; |
467 | | - $data {$yyyymm_id} = $data ; |
468 | | - } |
469 | | - } |
470 | | - } |
471 | | - } |
472 | | - |
473 | | - $entities_not_white_listed = join (', ', sort keys %not_white_listed) ; |
474 | | - if ($entities_not_white_listed ne '') |
475 | | - { print "\nEntities ignored:\n$entities_not_white_listed\n\n" ; } |
476 | | - |
477 | | - if (! $updates_found) |
478 | | - { print "No new updates found\n" ; } |
479 | | - else |
480 | | - { print "\nUpdates found, rewrite master file '$file_comscore_master'\n\n" ; } |
481 | | - |
482 | | - return ($updates_found) ; |
483 | | -} |
484 | | - |
485 | | -sub WriteDataAnalytics |
486 | | -{ |
487 | | - print "WriteDataAnalytics\n\n"; |
488 | | - |
489 | | - open OUT, '>', "$dir_out/analytics_in_comscore.csv" ; |
490 | | - |
491 | | - $metric = 'unique_visitors' ; |
492 | | - foreach $yyyymm (sort keys %months) |
493 | | - { |
494 | | - # store meta data elsewhere |
495 | | - # $line = "$generated,$source,$server,$script_name,$script_version,$user,$yyyymm,$country_code,$region_code,$property,$project,$normalized,$metric,$data\n" ; |
496 | | - foreach $region_code (sort values %region_codes) |
497 | | - { |
498 | | - $country_code = '-' ; |
499 | | - $property = '-' ; |
500 | | - $project = '-' ; |
501 | | - $reach = $reach_region_code {"$yyyymm,$region_code"} ; |
502 | | - $visitors = $visitors_region_code {"$yyyymm,$region_code"} ; |
503 | | - |
504 | | - if (! defined $reach) { $reach = -1 ; } |
505 | | - if (! defined $visitors) { $visitors = -1 ; } |
506 | | - |
507 | | - next if $reach == -1 and $visitors == -1 ; |
508 | | - |
509 | | - $line = "$yyyymm,$country_code,$region_code,$property,$project,$reach,$visitors\n" ; |
510 | | - print OUT $line ; |
511 | | - print $line ; |
512 | | - } |
513 | | - |
514 | | - foreach $property (sort @properties) |
515 | | - { |
516 | | - $country_code = '-' ; |
517 | | - $region_code = '-' ; |
518 | | - $project = '-' ; |
519 | | - $reach = '-1' ; |
520 | | - $visitors = $visitors_web_property {"$yyyymm,$property"} ; |
521 | | - |
522 | | - next if ! defined $visitors ; |
523 | | - |
524 | | - $line = "$yyyymm,$country_code,$region_code,$property,$project,$reach,$visitors\n" ; |
525 | | - print OUT $line ; |
526 | | - # print $line ; |
527 | | - } |
528 | | - } |
529 | | -} |
530 | | - |
531 | | -sub GetNumberOnly |
532 | | -{ |
533 | | - my $line = shift ; |
534 | | - $line =~ s/("[^\"]+")/($a=$1,$a=~s#,##g,$a)/ge ; # nested regexp: remove comma's inside double quotes |
535 | | - $line =~ s/"//g ; |
536 | | - return $line ; |
537 | | -} |
538 | | - |
539 | | -sub mmm_yyyy2yyyy_mm |
540 | | -{ |
541 | | - my @months = @_ ; |
542 | | - my ($m) ; |
543 | | - # Jan -> 01, etc |
544 | | - foreach $month (@months) |
545 | | - { |
546 | | - my ($mmm,$yyyy) = split ('-', $month) ; |
547 | | - for ($m = 0 ; $m <= $#months_short ; $m++) |
548 | | - { |
549 | | - if ($mmm eq $months_short [$m]) |
550 | | - { |
551 | | - $month = "$yyyy-" . sprintf ("%02d", $m+1) ; |
552 | | - last ; |
553 | | - } |
554 | | - } |
555 | | - } |
556 | | - return @months ; |
557 | | -} |
558 | | - |
559 | | -sub Abort |
560 | | -{ |
561 | | - $msg = shift ; |
562 | | - |
563 | | - print "\nAbort, reason: $msg\n\n" ; |
564 | | - exit ; |
565 | | -} |
Index: trunk/wikistats/analytics/README |
— | — | @@ -0,0 +1 @@ |
| 2 | +The files in this folder have been moved to trunk/extensions/MetricReporting/import. |
Property changes on: trunk/wikistats/analytics/README |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 3 | + native |