r91509 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r91508‎ | r91509 | r91510 >
Date:00:37, 6 July 2011
Author:ezachte
Status:deferred
Tags:
Comment:
new MySQL tables and fields, updated perl files, more bash scripts
Modified paths:
  • /trunk/wikistats/analytics/AnalyticsPrepBinariesData.pl (modified) (history)
  • /trunk/wikistats/analytics/AnalyticsPrepComscoreData.pl (modified) (history)
  • /trunk/wikistats/analytics/AnalyticsPrepWikiCountsOutput.pl (modified) (history)
  • /trunk/wikistats/analytics/analytics_create_and_load_from_csv.txt (added) (history)
  • /trunk/wikistats/analytics/analytics_generate_csv_files.sh (added) (history)
  • /trunk/wikistats/analytics/analytics_new.sh (added) (history)
  • /trunk/wikistats/analytics/analytics_refresh_from_csv.txt (added) (history)
  • /trunk/wikistats/analytics/analytics_upd.sh (added) (history)

Diff [purge]

Index: trunk/wikistats/analytics/AnalyticsPrepBinariesData.pl
@@ -17,21 +17,21 @@
1818
1919 sub ParseArguments
2020 {
21 -# my @options ;
22 -# getopt ("io", \%options) ;
 21+ my @options ;
 22+ getopt ("io", \%options) ;
2323
24 -# die ("Specify input folder as: -i path") if (! defined ($options {"i"})) ;
25 -# die ("Specify output folder as: -o path'") if (! defined ($options {"o"})) ;
 24+ die ("Specify input folder as: -i path") if (! defined ($options {"i"})) ;
 25+ die ("Specify output folder as: -o path'") if (! defined ($options {"o"})) ;
2626
27 -# $path_in = $options {"i"} ;
28 -# $path_out = $options {"o"} ;
 27+ $path_in = $options {"i"} ;
 28+ $path_out = $options {"o"} ;
2929
30 -# die "Input folder '$path_in' does not exist" if (! -d $path_in) ;
31 -# die "Output folder '$path_out' does not exist" if (! -d $path_out) ;
 30+ die "Input folder '$path_in' does not exist" if (! -d $path_in) ;
 31+ die "Output folder '$path_out' does not exist" if (! -d $path_out) ;
3232
3333 # tests only
34 - $path_in = "C:/@ Wikimedia/# Out Bayes" ;
35 - $path_out = "C:/analytics" ; # "w:/@ report card/data" ;
 34+ # $path_in = "C:/@ Wikimedia/# Out Bayes" ;
 35+ # $path_out = "C:/analytics" ; # "w:/@ report card/data" ;
3636
3737 print "Input folder: $path_in\n" ;
3838 print "Output folder: $path_out\n" ;
Index: trunk/wikistats/analytics/analytics_refresh_from_csv.txt
@@ -0,0 +1,145 @@
 2+-- make sure to delete output files *test*.csv first if any exist (MySQL on purpose forbids overwrite)
 3+
 4+USE `analytics` ;
 5+
 6+CREATE TABLE `comscore` (
 7+ `date` date NOT NULL,
 8+ `country_code` varchar (3),
 9+ `region_code` varchar (3),
 10+ `web_property` varchar (20),
 11+ `project_code` varchar (10),
 12+ `reach` decimal (4,1) DEFAULT NULL,
 13+ `visitors` decimal (15) DEFAULT NULL,
 14+ PRIMARY KEY (date,country_code,region_code,project_code,web_property),
 15+ KEY (`country_code`)
 16+) ;
 17+
 18+CREATE TABLE `comscore_regions` (
 19+ `region_code` varchar (2),
 20+ `report_language` varchar (10),
 21+ `region_name` varchar (18),
 22+ PRIMARY KEY (report_language,region_code)
 23+) ;
 24+
 25+CREATE TABLE `wikistats` (
 26+ `date` date NOT NULL,
 27+ `project_code` varchar (10),
 28+ `language_code` varchar (15),
 29+ `editors_all_time` int (10) DEFAULT NULL,
 30+ `editors_new` int (7) DEFAULT NULL,
 31+ `editors_ge_5` int (7) DEFAULT NULL,
 32+ `editors_ge_25` int (7) DEFAULT NULL,
 33+ `editors_ge_100` int (7) DEFAULT NULL,
 34+ `articles` int (12) DEFAULT NULL,
 35+ `articles_new_per_day` int (9) DEFAULT NULL,
 36+ `articles_over_bytes_500` int (12) DEFAULT NULL,
 37+ `articles_over_bytes_2000` int (12) DEFAULT NULL,
 38+ `edits_per_article` decimal (9,1) DEFAULT NULL,
 39+ `bytes_per_article` decimal (9,1) DEFAULT NULL,
 40+ `edits` int (12) DEFAULT NULL,
 41+ `size_in_bytes` int (15) DEFAULT NULL,
 42+ `size_in_words` int (15) DEFAULT NULL,
 43+ `links_internal` int (15) DEFAULT NULL,
 44+ `links_interwiki` int (15) DEFAULT NULL,
 45+ `links_image` int (15) DEFAULT NULL,
 46+ `links_external` int (15) DEFAULT NULL,
 47+ `redirects` int (15) DEFAULT NULL,
 48+ PRIMARY KEY (date,project_code,language_code)
 49+) ;
 50+
 51+CREATE TABLE `page_views` (
 52+ `date` date NOT NULL,
 53+ `project_code` char (2),
 54+ `language_code` char (15),
 55+ `views_non_mobile_raw` bigint (15),
 56+ `views_mobile_raw` bigint (15),
 57+ `views_non_mobile_normalized` bigint (15),
 58+ `views_mobile_normalized` bigint (15),
 59+ `views_raw` bigint (15),
 60+ `views_normalized` bigint (15),
 61+ PRIMARY KEY (date,project_code,language_code)
 62+) ;
 63+
 64+CREATE TABLE `language_names` (
 65+ `report_language` varchar (15),
 66+ `language_code` varchar (15),
 67+ `language_name` varchar (50),
 68+ PRIMARY KEY (report_language,language_code)
 69+) ;
 70+
 71+CREATE TABLE `binaries` (
 72+ `date` date NOT NULL,
 73+ `project_code` char (2),
 74+ `language_code` char (15),
 75+ `extension` varchar (10),
 76+ `binaries` bigint (15),
 77+ PRIMARY KEY (date,project_code,language_code,extension)
 78+) ;
 79+
 80+CREATE TABLE `offline` (
 81+ `date` date NOT NULL,
 82+ `readers` bigint (12),
 83+ PRIMARY KEY (date,readers)
 84+) ;
 85+
 86+
 87+-- SHOW TABLES ;
 88+-- DESCRIBE comscore ;
 89+-- DESCRIBE comscore_regions ;
 90+-- DESCRIBE wikistats ;
 91+-- DESCRIBE page_views ;
 92+-- DESCRIBE language_names ;
 93+-- DESCRIBE binaries ;
 94+-- DESCRIBE offline ;
 95+
 96+-- Database Manipulation
 97+-- Obviously in real world this is a separate script
 98+
 99+TRUNCATE TABLE 'analytics_in_comscore.csv' ;
 100+LOAD DATA LOCAL INFILE 'analytics_in_comscore.csv'
 101+ INTO TABLE comscore
 102+ FIELDS TERMINATED BY ','
 103+ OPTIONALLY ENCLOSED BY '"'
 104+ (@date,country_code,region_code,web_property,project_code,reach,visitors)
 105+ SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
 106+
 107+TRUNCATE TABLE 'analytics_in_comscore_regions.csv' ;
 108+LOAD DATA LOCAL INFILE 'analytics_in_comscore_regions.csv'
 109+ INTO TABLE comscore_regions
 110+ FIELDS TERMINATED BY ','
 111+ OPTIONALLY ENCLOSED BY '"'
 112+ (report_language,region_code,region_name) ;
 113+
 114+TRUNCATE TABLE 'analytics_in_wikistats.csv' ;
 115+LOAD DATA LOCAL INFILE 'analytics_in_wikistats.csv'
 116+ INTO TABLE wikistats
 117+ FIELDS TERMINATED BY ','
 118+ OPTIONALLY ENCLOSED BY '"'
 119+ (project_code,language_code,@date,editors_all_time,editors_new,editors_ge_5,editors_ge_25,editors_ge_100,articles,articles_new_per_day,articles_over_bytes_500,articles_over_bytes_2000,edits_per_article,bytes_per_article,edits,size_in_bytes,size_in_words,links_internal,links_interwiki,links_image,links_external,redirects)
 120+ SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
 121+
 122+TRUNCATE TABLE 'analytics_in_page_views.csv' ;
 123+LOAD DATA LOCAL INFILE 'analytics_in_page_views.csv'
 124+ INTO TABLE page_views
 125+ FIELDS TERMINATED BY ','
 126+ OPTIONALLY ENCLOSED BY '"'
 127+ (project_code,language_code,@date,views_non_mobile_raw,views_mobile_raw,views_non_mobile_normalized,views_mobile_normalized,views_raw,views_normalized)
 128+ SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
 129+
 130+
 131+TRUNCATE TABLE 'analytics_in_language_names.csv' ;
 132+LOAD DATA LOCAL INFILE 'analytics_in_language_names.csv'
 133+ INTO TABLE language_names
 134+ FIELDS TERMINATED BY ','
 135+ OPTIONALLY ENCLOSED BY '"'
 136+ (report_language,language_code,language_name) ;
 137+
 138+TRUNCATE TABLE 'analytics_in_binaries.csv' ;
 139+LOAD DATA LOCAL INFILE 'analytics_in_binaries.csv'
 140+ INTO TABLE binaries
 141+ FIELDS TERMINATED BY ','
 142+ OPTIONALLY ENCLOSED BY '"'
 143+ (project_code,language_code,@date,extension,binaries)
 144+ SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
 145+
 146+TRUNCATE TABLE 'analytics_in_offline.csv' ;
Index: trunk/wikistats/analytics/analytics_create_and_load_from_csv.txt
@@ -0,0 +1,179 @@
 2+-- make sure to delete output files *test*.csv first if any exist (MySQL on purpose forbids overwrite)
 3+
 4+-- tables implemented:
 5+-- comscore
 6+-- comscore_regions
 7+-- wikistats
 8+-- page_views
 9+-- language names
 10+-- binaries
 11+
 12+-- more tables planned (O= optional, not needed for report card stage)
 13+-- project_names
 14+-- O edits per project_code, per language, per month, per normalization type (Y/N), editor type (manual, anonymous, bot), namespace group (articles, talk pages, other)
 15+-- O editors per project_code, per language, per month, per normalization type (Y/N), editor type (manual, anonymous, bot), namespace group (articles, talk pages, other)
 16+
 17+
 18+-- open issues:
 19+-- only store basic data in database and calculate all aggregates on the fly or do some aggragation before hand ? (e.g. count for mobile / non-mobile / ==> total of both ? <==)
 20+-- for binaries, store one extension type per row? (future proof, more work to query), or a selected few as columns? (one row only needed per month)
 21+
 22+-- Create database and two tables from scratch
 23+DROP DATABASE IF EXISTS `analytics` ;
 24+
 25+CREATE DATABASE `analytics` ;
 26+
 27+USE `analytics` ;
 28+
 29+CREATE TABLE `comscore` (
 30+ `date` date NOT NULL,
 31+ `country_code` varchar (3),
 32+ `region_code` varchar (3),
 33+ `web_property` varchar (20),
 34+ `project_code` varchar (10),
 35+ `reach` decimal (4,1) DEFAULT NULL,
 36+ `visitors` decimal (15) DEFAULT NULL,
 37+ PRIMARY KEY (date,country_code,region_code,project_code,web_property),
 38+ KEY (`country_code`)
 39+) ;
 40+
 41+CREATE TABLE `comscore_regions` (
 42+ `region_code` varchar (2),
 43+ `report_language` varchar (10),
 44+ `region_name` varchar (18),
 45+ PRIMARY KEY (report_language,region_code)
 46+) ;
 47+
 48+CREATE TABLE `wikistats` (
 49+ `date` date NOT NULL,
 50+ `project_code` varchar (10),
 51+ `language_code` varchar (15),
 52+ `editors_all_time` int (10) DEFAULT NULL,
 53+ `editors_new` int (7) DEFAULT NULL,
 54+ `editors_ge_5` int (7) DEFAULT NULL,
 55+ `editors_ge_25` int (7) DEFAULT NULL,
 56+ `editors_ge_100` int (7) DEFAULT NULL,
 57+ `articles` int (12) DEFAULT NULL,
 58+ `articles_new_per_day` int (9) DEFAULT NULL,
 59+ `articles_over_bytes_500` int (12) DEFAULT NULL,
 60+ `articles_over_bytes_2000` int (12) DEFAULT NULL,
 61+ `edits_per_article` decimal (9,1) DEFAULT NULL,
 62+ `bytes_per_article` decimal (9,1) DEFAULT NULL,
 63+ `edits` int (12) DEFAULT NULL,
 64+ `size_in_bytes` int (15) DEFAULT NULL,
 65+ `size_in_words` int (15) DEFAULT NULL,
 66+ `links_internal` int (15) DEFAULT NULL,
 67+ `links_interwiki` int (15) DEFAULT NULL,
 68+ `links_image` int (15) DEFAULT NULL,
 69+ `links_external` int (15) DEFAULT NULL,
 70+ `redirects` int (15) DEFAULT NULL,
 71+ PRIMARY KEY (date,project_code,language_code)
 72+) ;
 73+
 74+CREATE TABLE `page_views` (
 75+ `date` date NOT NULL,
 76+ `project_code` char (2),
 77+ `language_code` char (15),
 78+ `views_non_mobile_raw` bigint (15),
 79+ `views_mobile_raw` bigint (15),
 80+ `views_non_mobile_normalized` bigint (15),
 81+ `views_mobile_normalized` bigint (15),
 82+ `views_raw` bigint (15),
 83+ `views_normalized` bigint (15),
 84+ PRIMARY KEY (date,project_code,language_code)
 85+) ;
 86+
 87+CREATE TABLE `language_names` (
 88+ `report_language` varchar (15),
 89+ `language_code` varchar (15),
 90+ `language_name` varchar (50),
 91+ PRIMARY KEY (report_language,language_code)
 92+) ;
 93+
 94+CREATE TABLE `binaries` (
 95+ `date` date NOT NULL,
 96+ `project_code` char (2),
 97+ `language_code` char (15),
 98+ `extension` varchar (10),
 99+ `binaries` bigint (15),
 100+ PRIMARY KEY (date,project_code,language_code,extension)
 101+) ;
 102+
 103+CREATE TABLE `offline` (
 104+ `date` date NOT NULL,
 105+ `readers` bigint (12),
 106+ PRIMARY KEY (date,readers)
 107+) ;
 108+
 109+-- SHOW TABLES ;
 110+-- DESCRIBE comscore ;
 111+-- DESCRIBE comscore_regions ;
 112+-- DESCRIBE wikistats ;
 113+-- DESCRIBE page_views ;
 114+-- DESCRIBE language_names ;
 115+-- DESCRIBE binaries ;
 116+-- DESCRIBE offline ;
 117+
 118+-- Database Manipulation
 119+-- Obviously in real world this is a separate script
 120+
 121+LOAD DATA LOCAL INFILE 'analytics_in_comscore.csv'
 122+ INTO TABLE comscore
 123+ FIELDS TERMINATED BY ','
 124+ OPTIONALLY ENCLOSED BY '"'
 125+ (@date,country_code,region_code,web_property,project_code,reach,visitors)
 126+ SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
 127+
 128+LOAD DATA LOCAL INFILE 'analytics_in_comscore_regions.csv'
 129+ INTO TABLE comscore_regions
 130+ FIELDS TERMINATED BY ','
 131+ OPTIONALLY ENCLOSED BY '"'
 132+ (report_language,region_code,region_name) ;
 133+
 134+LOAD DATA LOCAL INFILE 'analytics_in_wikistats.csv'
 135+ INTO TABLE wikistats
 136+ FIELDS TERMINATED BY ','
 137+ OPTIONALLY ENCLOSED BY '"'
 138+ (project_code,language_code,@date,editors_all_time,editors_new,editors_ge_5,editors_ge_25,editors_ge_100,articles,articles_new_per_day,articles_over_bytes_500,articles_over_bytes_2000,edits_per_article,bytes_per_article,edits,size_in_bytes,size_in_words,links_internal,links_interwiki,links_image,links_external,redirects)
 139+ SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
 140+
 141+LOAD DATA LOCAL INFILE 'analytics_in_page_views.csv'
 142+ INTO TABLE page_views
 143+ FIELDS TERMINATED BY ','
 144+ OPTIONALLY ENCLOSED BY '"'
 145+ (project_code,language_code,@date,views_non_mobile_raw,views_mobile_raw,views_non_mobile_normalized,views_mobile_normalized,views_raw,views_normalized)
 146+ SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
 147+
 148+
 149+LOAD DATA LOCAL INFILE 'analytics_in_language_names.csv'
 150+ INTO TABLE language_names
 151+ FIELDS TERMINATED BY ','
 152+ OPTIONALLY ENCLOSED BY '"'
 153+ (report_language,language_code,language_name) ;
 154+
 155+LOAD DATA LOCAL INFILE 'analytics_in_binaries.csv'
 156+ INTO TABLE binaries
 157+ FIELDS TERMINATED BY ','
 158+ OPTIONALLY ENCLOSED BY '"'
 159+ (project_code,language_code,@date,extension,binaries)
 160+ SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
 161+
 162+LOAD DATA LOCAL INFILE 'analytics_in_offline.csv'
 163+ INTO TABLE offline
 164+ FIELDS TERMINATED BY ','
 165+ OPTIONALLY ENCLOSED BY '"'
 166+ (@date,readers)
 167+ SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
 168+
 169+
 170+-- show contents (debugging only)
 171+-- SELECT * FROM comscore ;
 172+-- SELECT * FROM comscore_regions ;
 173+-- SELECT * FROM wikistats ;
 174+-- SELECT * FROM page_views ;
 175+-- SELECT * FROM language_names ;
 176+-- SELECT * FROM binaries
 177+-- WHERE project_code = 'commons' ;
 178+ SELECT * FROM offline ;
 179+
 180+
Index: trunk/wikistats/analytics/analytics_generate_csv_files.sh
@@ -0,0 +1,21 @@
 2+#!/bin/sh
 3+ulimit -v 8000000
 4+
 5+clear
 6+cd /a/analytics
 7+
 8+perl AnalyticsPrepBinariesData.pl -i /a/wikistats/ -o /a/analytics/test/
 9+
 10+# add or replace data from newest comScore csv files (last 14 months) into master files (full history)
 11+# and generate database input csv file from it
 12+
 13+# -r replace (default is add only)
 14+# -i input folder, contains manually downloaded csv files from comScore (or xls, converted to csv)
 15+# -m master files with full history
 16+# -o output csv file, with reach and UV's per region and UV's per top web property, ready for import into database
 17+perl AnalyticsPrepComscoreData.pl -r -i /a/analytics/comscore -m /a/analytics -o /a/analytics
 18+
 19+perl AnalyticsPrepWikiCountsOutput.pl -i /a/wikistats/ -o /a/analytics
 20+
 21+cp /a/wikistats/csv_wp/analytics_in_page_views.csv .
 22+
Index: trunk/wikistats/analytics/analytics_upd.sh
@@ -0,0 +1,6 @@
 2+clear
 3+cd /a/analytics
 4+
 5+./analytics_generate_csv_files.sh
 6+
 7+mysql -u analytics -h project2.wikimedia.org -preport < analytics_refresh_from_csv.txt
\ No newline at end of file
Index: trunk/wikistats/analytics/AnalyticsPrepWikiCountsOutput.pl
@@ -39,6 +39,7 @@
4040
4141 &ParseArguments ;
4242 &ReadStatisticsMonthly ;
 43+ &FindLargestWikis ;
4344 &WriteMonthlyData ;
4445
4546 print "\nReady\n\n" ;
@@ -54,15 +55,20 @@
5556 { $arguments .= " -$arg " . $options {$arg} . "\n" ; }
5657 print ("\nArguments\n$arguments\n") ;
5758
58 -# $options {"i"} = "w:/# out bayes" ; # EZ test
59 -# $options {"o"} = "c:/MySQL/analytics" ; # EZ test
 59+ if (! -d '/mnt/') # EZ test
 60+ {
 61+ $path_in = "c:/\@ wikimedia/# out bayes" ;
 62+ $path_out = "c:/MySQL/analytics" ;
 63+ }
 64+ else
 65+ {
 66+ die ("Specify input folder for projectcounts files as: -i path") if (! defined ($options {"i"})) ;
 67+ die ("Specify output folder as: -o path'") if (! defined ($options {"o"})) ;
6068
61 - die ("Specify input folder for projectcounts files as: -i path") if (! defined ($options {"i"})) ;
62 - die ("Specify output folder as: -o path'") if (! defined ($options {"o"})) ;
 69+ $path_in = $options {"i"} ;
 70+ $path_out = $options {"o"} ;
 71+ }
6372
64 - $path_in = $options {"i"} ;
65 - $path_out = $options {"o"} ;
66 -
6773 die "Input folder '$path_in' does not exist" if (! -d $path_in) ;
6874 die "Output folder '$path_out' does not exist" if (! -d $path_out) ;
6975
@@ -158,6 +164,8 @@
159165
160166 ($month,$day,$year) = split ('\/', $date) ;
161167 $yyyymm = sprintf ("%04d-%02d", $year, $month) ;
 168+ $months {$yyyymm} ++ ;
 169+# print "YYYYMM $yyyymm\n" ;
162170
163171 # data have been collected in WikiCountsProcess.pm and been written in WikiCountsOutput.pm
164172 # count user with over x edits
@@ -167,6 +175,11 @@
168176 $edits_ge_25 = @counts [4] > 0 ? @counts [4] : 0 ;
169177 $edits_ge_100 = @counts [7] > 0 ? @counts [7] : 0 ;
170178 $data2 {"$project,$language,$yyyymm"} = "$edits_ge_5,$edits_ge_25,$edits_ge_100" ;
 179+
 180+ $total_edits_ge_5 {"$project,$language"} += $edits_ge_5 ;
 181+ $total_edits_ge_25 {"$project,$language"} += $edits_ge_25 ;
 182+ $total_edits_ge_100 {"$project,$language"} += $edits_ge_100 ;
 183+
171184 # prep string with right amount of comma's
172185 if ($data2_default eq '')
173186 {
@@ -237,12 +250,46 @@
238251 # }
239252 #}
240253
 254+sub FindLargestWikis
 255+{
 256+ print "Largest projects (most accumulated very active editors):\n";
 257+ @total_edits_ge_100 = sort {$total_edits_ge_100 {$b} <=> $total_edits_ge_100 {$a}} keys %total_edits_ge_100 ;
 258+ $rank = 0 ;
 259+ foreach $project_language (@total_edits_ge_100)
 260+ {
 261+ $largest_projects {$project_language} = $rank++ ;
 262+ print "$project_language," ;
 263+ last if $rank > 10 ;
 264+ }
 265+ print "\n\n" ;
 266+
 267+ foreach $yyyymm (sort keys %months)
 268+ {
 269+ next if $yyyymm lt '2011' ;
 270+ foreach $project_language (keys %largest_projects)
 271+ {
 272+ ($project,$language) = split (',', $project_language) ;
 273+ if ($data2 {"$project,$language,$yyyymm"} eq '')
 274+ {
 275+ print "No data yet for large wiki $project_language for $yyyymm-> skip month $yyyymm\n" ;
 276+ $months {$yyyymm} = 0 ;
 277+ }
 278+ }
 279+ }
 280+ exit ;
 281+}
 282+
241283 sub WriteMonthlyData
242284 {
243285 my $file_csv_out = "$path_out/$file_csv_analytics_in" ;
244286 open CSV_OUT, '>', $file_csv_out ;
245287 foreach $project_wiki_month (sort keys %data1)
246288 {
 289+ ($project,$wiki,$yyyymm) = split (',', $project_wiki_month) ;
 290+
 291+ # recent month misses on eor more large wikis?
 292+ next if $months {$yyyymm} == 0 ;
 293+
247294 $data1 = $data1 {$project_wiki_month} ;
248295 $data2 = $data2 {$project_wiki_month} ;
249296 if ($data2 eq '')
@@ -250,9 +297,12 @@
251298 print "Editor data missing for $project_wiki_month\n" ;
252299 $data2 = $data2_default ;
253300 }
254 - $data1 =~ s/data2/$data2/ ; # insert rather than append to have all editor fields follow each other
 301+ $data1 =~ s/data2/$data2/ ; # insert rather than append to have all editor fields close together
255302 print CSV_OUT "$project_wiki_month,$data1\n" ;
256303 }
 304+ $total_edits_ge_5 {"$project,*,$yyyymm"} += $edits_ge_5 ;
 305+ $total_edits_ge_25 {"$project,*,$yyyymm"} += $edits_ge_25 ;
 306+ $total_edits_ge_100 {"$project,*,$yyyymm"} += $edits_ge_100 ;
257307 close CSV_OUT ;
258308 }
259309
Index: trunk/wikistats/analytics/analytics_new.sh
@@ -0,0 +1,5 @@
 2+clear
 3+cd /a/analytics
 4+# rm *test*.csv
 5+# mysql --user=root --password=changerootwhenpriyankareturns < analytics_create_and_load_from_csv.txt > mysql_log.txt
 6+ mysql -u analytics -h project2.wikimedia.org -preport < analytics_create_and_load_from_csv.txt > mysql_log.txt
Index: trunk/wikistats/analytics/AnalyticsPrepComscoreData.pl
@@ -33,13 +33,13 @@
3434 use Cwd;
3535
3636 my $options ;
37 - getopt ("mu", \%options) ;
 37+ getopt ("imo", \%options) ;
3838
3939 $true = 1 ;
4040 $false = 0 ;
4141
4242 $script_name = "AnalyticsPrepComscoreData.pl" ;
43 - $script_version = "0.32" ;
 43+ $script_version = "0.31" ;
4444
4545 # EZ test only
4646 # $source = "comscore" ;
@@ -47,21 +47,34 @@
4848 # $generated = "2011-05-06 00:00:00" ;
4949 # $user = "ezachte" ;
5050
51 - $dir_analytics = $options {"m"} ;
52 - $dir_comscore_updates = $options {"u"} ;
 51+ $dir_in = $options {"i"} ;
 52+ $dir_upd = $options {"m"} ;
 53+ $dir_out = $options {"o"} ;
 54+ $mode = 'add';
 55+ if (defined $options {'r'})
 56+ { $mode = 'replace'; }
5357
54 - $dir_analytics = "C:/@ Wikimedia/! MySQL/analytics" ; # EZ test only
55 - $dir_comscore_updates = "C:/@ Wikimedia/@ Report Card/Data" ; # EZ test only
 58+ print "Mode is $mode (specify '-r' for replace)\n\n";
5659
57 - if (($dir_analytics eq '') || ($dir_comscore_updates eq ''))
58 - { Abort ("Specify folder for 'master' csv files as '-m folder', folder for 'update' csv files as -u folder'") ; }
 60+ if (! -d "/home/") # EZ test machine
 61+ {
 62+ $dir_in = "C:/@ Wikimedia/@ Report Card/Data" ;
 63+ $dir_upd = "C:/MySQL/analytics" ;
 64+ $dir_out = "C:/MySQL/analytics" ;
 65+ $mode = 'replace' ;
 66+ }
5967
60 - $file_comscore_reach_master = "history_comscore_reach_regions.csv" ;
61 - $file_comscore_reach_update = "*reach*by*region*csv" ;
 68+ if ($dir_in eq '')
 69+ { Abort ("Specify folder for input file (new comScore data) '-i folder'") ; }
 70+ if ($dir_upd eq '')
 71+ { Abort ("Specify folder for master files (full history) as '-m folder'") ; }
 72+ if ($dir_out eq '')
 73+ { Abort ("Specify folder for output file '-o folder'") ; }
6274
63 - $file_comscore_uv_region_master = "history_comscore_UV_regions.csv" ;
64 - $file_comscore_uv_region_update = "*UVs*by*region*csv" ;
65 -
 75+ $file_comscore_reach_master = "history_comscore_reach_regions.csv" ;
 76+ $file_comscore_reach_update = "*reach*by*region*csv" ;
 77+ $file_comscore_uv_region_master = "history_comscore_UV_regions.csv" ;
 78+ $file_comscore_uv_region_update = "*UVs*by*region*csv" ;
6679 $file_comscore_uv_property_master = "history_comscore_UV_properties.csv" ;
6780 $file_comscore_uv_property_update = "*UV*trend*csv" ;
6881
@@ -69,7 +82,10 @@
7083 $layout_csv_regions = 2 ;
7184 $layout_csv_properties = 3 ;
7285
73 - print "Directories:\nAnalytics '$dir_analytics'\nUpdates '$dir_comscore_updates'\n\n" ;
 86+ print "Directories:\n" .
 87+ "Input (new comScore data): '$dir_in'\n".
 88+ "Master files (full history): '$dir_upd'\n" .
 89+ "Output (database feed): '$dir_out'\n\n" ;
7490
7591 %region_codes = (
7692 "Europe"=>"EU",
@@ -88,13 +104,13 @@
89105
90106 @months_short = qw "Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec" ;
91107
92 - &ReadDataReachPerRegion ($file_comscore_reach_master, $file_comscore_reach_update, "%.1f", 1, $layout_csv_reach) ;
 108+ &ReadMasterComscoreDataReachPerRegion ($file_comscore_reach_master, $file_comscore_reach_update, "%.1f", 1, $layout_csv_reach) ;
93109 %reach_region_code = %data ;
94110
95 - &ReadDataVisitorsPerRegion ($file_comscore_uv_region_master, $file_comscore_uv_region_update, "%.0f", 1000, $layout_csv_regions) ;
 111+ &ReadMasterComscoreDataVisitorsPerRegion ($file_comscore_uv_region_master, $file_comscore_uv_region_update, "%.0f", 1000, $layout_csv_regions) ;
96112 %visitors_region_code = %data ;
97113
98 - &ReadDataVisitorsPerProperty ($file_comscore_uv_property_master, $file_comscore_uv_property_update, "%.0f", 1000, $layout_csv_properties) ;
 114+ &ReadMasterComscoreDataVisitorsPerProperty ($file_comscore_uv_property_master, $file_comscore_uv_property_update, "%.0f", 1000, $layout_csv_properties) ;
99115 %visitors_web_property = %data ;
100116
101117 &WriteDataAnalytics ;
@@ -102,153 +118,32 @@
103119 print "\nReady\n\n" ;
104120 exit ;
105121
106 -sub UpdateFromLatestComscoreData
 122+sub ReadMasterComscoreDataReachPerRegion
107123 {
108 - my ($file_comscore_master, $file_comscore_updates, $multiplier, $layout_csv, @update_only) = @_ ;
 124+ my ($file_comscore_master, $file_comscore_updates, $precision, $layout_csv) = @_ ;
109125
110 - undef %update_only ;
111 - undef %do_not_update ;
 126+ print "ReadMasterComscoreDataReachPerRegion\n\n" ;
112127
113 - foreach $id (@update_only)
114 - { $update_only {$id} = $true ; }
115 -
116 - if (! -e "$dir_analytics/$file_comscore_master")
117 - { Abort ("File $file_comscore_master not found!") ; }
118 -
119 - $age_master = -M "$dir_analytics/$file_comscore_master" ;
120 - print "\nLatest comscore master file is " . sprintf ("%.0f", $age_master) . " days old: '$file_comscore_master'\n" ;
121 -
122 - my $cwd = getcwd ;
123 - chdir $dir_comscore_updates ;
124 -
125 - @files = glob($file_comscore_updates) ;
126 - $age_update = 999999 ;
127 - $file_comscore_updates_latest = '' ;
128 - foreach $file (@files)
129 - {
130 - $age = -M $file ;
131 - if ($age < $age_update)
132 - {
133 - $age_update = $age ;
134 - $file_comscore_updates_latest = $file ;
135 - }
136 - }
137 - print "\nLatest comscore update file is " . sprintf ("%.0f", $age_update) . " days old: '$file_comscore_updates_latest'\n" ;
138 -
139 - if ($age_update == 999999)
140 - {
141 - print "No valid update file found. Nothing to update." ;
142 - return ;
143 - }
144 -
145 - if ($age_master < $age_update)
146 - {
147 - print "File with master data more recent than latest update csv from comScore. Nothing to update." ;
148 - return ;
149 - }
150 -
151 - my $updates_found = $false ;
152 -
153 - print "\nRead updates\n\n" ;
154 - open CSV, '<', $file_comscore_updates_latest ;
155 - while ($line = <CSV>)
156 - {
157 - chomp $line ;
158 - $line = &GetNumberOnly ($line) ;
159 -
160 - if ($line =~ /Jan-\d\d\d\d.*?Feb-\d\d\d\d/) # e.g. 'Location,Location,Jan-2010,Feb-2010,Mar-2010,Apr-2010,...'
161 - {
162 - if ($layout_csv == $layout_csv_properties)
163 - { ($dummy1,$dummy2,$dummy3,@months) = split (',', $line) ; } # web properties csv file
164 - else
165 - { ($dummy1,$dummy2,@months) = split (',', $line) ; } # uv / reach csv files
166 -
167 - @months = &mmm_yyyy2yyyy_mm (@months) ;
168 - }
169 -
170 - if ($line =~ /^\d+,/)
171 - {
172 - if ($layout_csv == $layout_csv_properties)
173 - {
174 - ($index,$dummy,$property,@data) = split (',', $line) ;
175 - $property =~ s/^\s+// ;
176 - $property =~ s/\s+$// ;
177 -
178 - $property =~ s/.*Google.*/Google/i ;
179 - $property =~ s/.*Microsoft.*/Microsoft/i ;
180 - $property =~ s/.*FACEBOOK.*/Facebook/i ;
181 - $property =~ s/.*Yahoo.*/Yahoo/i ;
182 - $property =~ s/.*Amazon.*/Amazon/i ;
183 - $property =~ s/.*Apple.*/Apple/i ;
184 - $property =~ s/.*AOL.*/AOL/i ;
185 - $property =~ s/.*Wikimedia.*/Wikimedia/i ;
186 - $property =~ s/.*Tencent.*/Tencent/i ;
187 - $property =~ s/.*Baidu.*/Baidu/i ;
188 - $property =~ s/.*CBS.*/CBS/i ;
189 -
190 - $id = $property ;
191 - }
192 - else
193 - {
194 - ($index,$region,@data) = split (',', $line) ;
195 - $region =~ s/^\s+// ;
196 - $region =~ s/\s+$// ;
197 - $id = $region_codes {$region} ;
198 - }
199 -
200 - if ($update_only {$id} == 0)
201 - {
202 - $do_not_update {$id}++ ;
203 - next ;
204 - }
205 -
206 - for ($m = 0 ; $m <= $#months ; $m++)
207 - {
208 - $yyyymm = $months [$m] ;
209 - $months {$yyyymm} ++ ;
210 - $yyyymm_id = "$yyyymm,$id" ;
211 - $data = $data [$m] * $multiplier ;
212 -
213 - if (! defined $data {$yyyymm_id})
214 - {
215 - $updates_found = $true ;
216 - print "New data found: $yyyymm_id = $data\n" ;
217 - $data {$yyyymm_id} = $data ;
218 - }
219 - }
220 - }
221 - }
222 -
223 - $ignored = join ', ', sort keys %do_not_update ;
224 - print "\nEntities ignored:\n$ignored\n\n" ;
225 -
226 - if (! $updates_found)
227 - { print "No new updates found\n" ; }
228 - else
229 - { print "\nUpdates found, rewrite master file '$file_comscore_master'\n\n" ; }
230 -
231 - return ($updates_found) ;
232 -}
233 -
234 -sub ReadDataReachPerRegion
235 -{
236 - my ($file_comscore_master, $file_comscore_updates, $precision, $layout_csv) = @_ ;
237 -
238128 undef %months ;
239129 undef %data ;
240130 undef @regions ;
241131
242 - open IN, '<', "$dir_analytics/$file_comscore_master" ;
 132+ open IN, '<', "$dir_upd/$file_comscore_master" ;
243133
244134 $lines = 0 ;
245135 while ($line = <IN>)
246136 {
247137 chomp $line ;
 138+ $line =~ s/\r//g ;
248139
249140 ($yyyymm,@data) = split (',', $line) ;
250141
251142 if ($lines++ == 0)
252 - { @regions = @data ; next ; }
 143+ {
 144+ @regions = @data ;
 145+ print "Regions found: " . (join ',', @regions) . "\n";
 146+ next ;
 147+ }
253148
254149 $field_ndx = 0 ;
255150 foreach (@data)
@@ -267,11 +162,11 @@
268163 }
269164 close IN ;
270165
271 - my $updates_found = &UpdateFromLatestComscoreData ($file_comscore_master, $file_comscore_updates, 1, $layout_csv, @regions) ;
 166+ my $updates_found = &UpdateMasterFileFromRecentComscoreData ($file_comscore_master, $file_comscore_updates, 1, $layout_csv, @regions) ;
272167 return if ! $updates_found ;
273168
274 - rename "$dir_analytics/$file_comscore_master", "$dir_analytics/$file_comscore_master.~" ;
275 - open OUT, '>', "$dir_analytics/$file_comscore_master" ;
 169+ rename "$dir_upd/$file_comscore_master", "$dir_upd/$file_comscore_master.~" ;
 170+ open OUT, '>', "$dir_upd/$file_comscore_master" ;
276171
277172 $line_out = "yyyymm" ;
278173 foreach $region_name (@regions)
@@ -292,27 +187,36 @@
293188 close OUT ;
294189 }
295190
296 -sub ReadDataVisitorsPerRegion
 191+sub ReadMasterComscoreDataVisitorsPerRegion
297192 {
298193 my ($file_comscore_master, $file_comscore_updates, $precision, $multiplier, $layout_csv) = @_ ;
299194
 195+ print "ReadMasterComscoreDataVisitorsPerRegion\n\n";
 196+
300197 undef %months ;
301198 undef %data ;
302199 undef @regions ;
303200
304 - open IN, '<', "$dir_analytics/$file_comscore_master" ;
 201+ open IN, '<', "$dir_upd/$file_comscore_master" ;
305202
306203 $lines = 0 ;
307204 $metric = 'unique_visitors' ;
308205 while ($line = <IN>)
309206 {
310207 chomp $line ;
 208+ $line =~ s/\r//g ;
311209 $line = &GetNumberOnly ($line) ;
312210
 211+ next if $line !~ /(?:yyyymm|\d\d\d\d-\d\d)/ ;
 212+
313213 ($yyyymm,@data) = split (',', $line) ;
314214
315215 if ($lines++ == 0)
316 - { @regions = @data ; next ; }
 216+ {
 217+ @regions = @data ;
 218+ print "Regions found: " . (join ',', @regions) . "\n";
 219+ next ;
 220+ }
317221
318222 $field_ndx = 0 ;
319223 foreach (@data)
@@ -334,11 +238,11 @@
335239 }
336240 close IN ;
337241
338 - my $updates_found = &UpdateFromLatestComscoreData ($file_comscore_master, $file_comscore_updates, 1000, $layout_csv, @regions) ;
 242+ my $updates_found = &UpdateMasterFileFromRecentComscoreData ($file_comscore_master, $file_comscore_updates, 1000, $layout_csv, @regions) ;
339243 return if ! $updates_found ;
340244
341 - rename "$dir_analytics/$file_comscore_master", "$dir_analytics/$file_comscore_master.~" ;
342 - open OUT, '>', "$dir_analytics/$file_comscore_master" ;
 245+ rename "$dir_upd/$file_comscore_master", "$dir_upd/$file_comscore_master.~" ;
 246+ open OUT, '>', "$dir_upd/$file_comscore_master" ;
343247
344248 $line_out = "yyyymm" ;
345249 foreach $region_name (@regions)
@@ -359,21 +263,24 @@
360264 close OUT ;
361265 }
362266
363 -sub ReadDataVisitorsPerProperty
 267+sub ReadMasterComscoreDataVisitorsPerProperty
364268 {
365269 my ($file_comscore_master, $file_comscore_updates, $precision, $multiplier, $layout_csv) = @_ ;
366270
 271+ print "ReadMasterComscoreDataVisitorsPerProperty\n\n";
 272+
367273 undef %months ;
368274 undef %data ;
369275 undef @properties ;
370276
371 - open IN, '<', "$dir_analytics/$file_comscore_master" ;
 277+ open IN, '<', "$dir_upd/$file_comscore_master" ;
372278
373279 $lines = 0 ;
374280 $metric = 'unique_visitors' ;
375281 while ($line = <IN>)
376282 {
377283 chomp $line ;
 284+ $line =~ s/\r//g ;
378285
379286 ($yyyymm,@data) = split (',', $line) ;
380287 if ($lines++ == 0)
@@ -398,11 +305,11 @@
399306 }
400307 close IN ;
401308
402 - my $updates_found = &UpdateFromLatestComscoreData ($file_comscore_master, $file_comscore_updates, 1000, $layout_csv, @properties) ;
 309+ my $updates_found = &UpdateMasterFileFromRecentComscoreData ($file_comscore_master, $file_comscore_updates, 1000, $layout_csv, @properties) ;
403310 return if ! $updates_found ;
404311
405 - rename "$dir_analytics/$file_comscore_master", "$dir_analytics/$file_comscore_master.~" ;
406 - open OUT, '>', "$dir_analytics/$file_comscore_master" ;
 312+ rename "$dir_upd/$file_comscore_master", "$dir_upd/$file_comscore_master.~" ;
 313+ open OUT, '>', "$dir_upd/$file_comscore_master" ;
407314
408315 $line_out = "yyyymm" ;
409316 foreach $property (@properties)
@@ -423,10 +330,163 @@
424331 close OUT ;
425332 }
426333
 334+sub UpdateMasterFileFromRecentComscoreData
 335+{
 336+ my ($file_comscore_master, $file_comscore_updates, $multiplier, $layout_csv, @white_list) = @_ ;
 337+
 338+ print "UpdateMasterFileFromRecentComscoreData\n\n";
 339+
 340+ undef %white_list ;
 341+ undef %not_white_listed ;
 342+
 343+ print "White list: ". (join (',', @white_list)) . "\n\n";
 344+
 345+ foreach $id (@white_list)
 346+ { $white_list {$id} = $true ; }
 347+
 348+ if (! -e "$dir_upd/$file_comscore_master")
 349+ { Abort ("File $file_comscore_master not found!") ; }
 350+
 351+ $age_all = -M "$dir_upd/$file_comscore_master" ;
 352+ print "Latest comscore master file is " . sprintf ("%.0f", $age_all) . " days old: '$file_comscore_master'\n" ;
 353+
 354+ my $cwd = getcwd ;
 355+ chdir $dir_in ;
 356+
 357+ @files = glob($file_comscore_updates) ;
 358+ $min_age_upd = 999999 ;
 359+ $file_comscore_updates_latest = '' ;
 360+ foreach $file (@files)
 361+ {
 362+ $age = -M $file ;
 363+ if ($age < $min_age_upd)
 364+ {
 365+ $min_age_upd = $age ;
 366+ $file_comscore_updates_latest = $file ;
 367+ }
 368+ }
 369+ print "Latest comscore update file is " . sprintf ("%.0f", $min_age_upd) . " days old: '$file_comscore_updates_latest'\n" ;
 370+
 371+ if ($min_age_upd == 999999)
 372+ {
 373+ print "No valid update file found. Nothing to update." ;
 374+ return ;
 375+ }
 376+
 377+ #if ($age_all > $min_age_upd)
 378+ #{
 379+ # print "File with master data more recent than latest update csv from comScore. Nothing to update." ;
 380+ # return ;
 381+ #}
 382+
 383+ my $updates_found = $false ;
 384+
 385+ open CSV, '<', $file_comscore_updates_latest ;
 386+ binmode CSV ;
 387+ while ($line = <CSV>)
 388+ {
 389+ chomp $line ;
 390+ $line =~ s/\r//g ;
 391+ $line = &GetNumberOnly ($line) ;
 392+
 393+ if ($line =~ /Jan-\d\d\d\d.*?Feb-\d\d\d\d/) # e.g. 'Location,Location,Jan-2010,Feb-2010,Mar-2010,Apr-2010,...'
 394+ {
 395+ if ($layout_csv == $layout_csv_properties)
 396+ { ($dummy1,$dummy2,$dummy3,@months) = split (',', $line) ; } # web properties csv file
 397+ else
 398+ { ($dummy1,$dummy2,@months) = split (',', $line) ; } # uv / reach csv files
 399+
 400+ @months = &mmm_yyyy2yyyy_mm (@months) ;
 401+ }
 402+
 403+ if (($line =~ /^\d+,/) || ($line =~ /,,.*?Total Internet/))
 404+ {
 405+ if ($layout_csv == $layout_csv_properties)
 406+ {
 407+ ($index,$dummy,$property,@data) = split (',', $line) ;
 408+ $property =~ s/^\s+// ;
 409+ $property =~ s/\s+$// ;
 410+
 411+ $property =~ s/.*Total Internet.*/Total Internet/i ;
 412+ $property =~ s/.*Google.*/Google/i ;
 413+ $property =~ s/.*Microsoft.*/Microsoft/i ;
 414+ $property =~ s/.*FACEBOOK.*/Facebook/i ;
 415+ $property =~ s/.*Yahoo.*/Yahoo/i ;
 416+ $property =~ s/.*Amazon.*/Amazon/i ;
 417+ $property =~ s/.*Apple.*/Apple/i ;
 418+ $property =~ s/.*AOL.*/AOL/i ;
 419+ $property =~ s/.*Wikimedia.*/Wikimedia/i ;
 420+ $property =~ s/.*Tencent.*/Tencent/i ;
 421+ $property =~ s/.*Baidu.*/Baidu/i ;
 422+ $property =~ s/.*CBS.*/CBS/i ;
 423+
 424+ if (! $white_list {$property})
 425+ {
 426+ $not_white_listed {$property}++ ;
 427+ next ;
 428+ }
 429+
 430+ $id = $property ;
 431+ }
 432+ else
 433+ {
 434+ ($index,$region,@data) = split (',', $line) ;
 435+ $region =~ s/^\s+// ;
 436+ $region =~ s/\s+$// ;
 437+
 438+ if (! $white_list {$region})
 439+ {
 440+ $not_white_listed {$region}++ ;
 441+ next ;
 442+ }
 443+
 444+ $id = $region_codes {$region} ;
 445+ }
 446+
 447+ for ($m = 0 ; $m <= $#months ; $m++)
 448+ {
 449+ $yyyymm = $months [$m] ;
 450+ $months {$yyyymm} ++ ;
 451+ $yyyymm_id = "$yyyymm,$id" ;
 452+ $data = $data [$m] * $multiplier ;
 453+
 454+ if ($mode eq 'add')
 455+ {
 456+ if (! defined $data {$yyyymm_id})
 457+ {
 458+ $updates_found = $true ;
 459+ print "New data found: $yyyymm_id = $data\n" ;
 460+ $data {$yyyymm_id} = $data ;
 461+ }
 462+ }
 463+ else
 464+ {
 465+ $updates_found = $true ;
 466+ print "Data found: $yyyymm_id = $data\n" ;
 467+ $data {$yyyymm_id} = $data ;
 468+ }
 469+ }
 470+ }
 471+ }
 472+
 473+ $entities_not_white_listed = join (', ', sort keys %not_white_listed) ;
 474+ if ($entities_not_white_listed ne '')
 475+ { print "\nEntities ignored:\n$entities_not_white_listed\n\n" ; }
 476+
 477+ if (! $updates_found)
 478+ { print "No new updates found\n" ; }
 479+ else
 480+ { print "\nUpdates found, rewrite master file '$file_comscore_master'\n\n" ; }
 481+
 482+ return ($updates_found) ;
 483+}
 484+
427485 sub WriteDataAnalytics
428486 {
429 - open OUT, '>', "$dir_analytics/analytics_in_comscore.csv" ;
 487+ print "WriteDataAnalytics\n\n";
430488
 489+ open OUT, '>', "$dir_out/analytics_in_comscore.csv" ;
 490+
431491 $metric = 'unique_visitors' ;
432492 foreach $yyyymm (sort keys %months)
433493 {
@@ -447,7 +507,7 @@
448508
449509 $line = "$yyyymm,$country_code,$region_code,$property,$project,$reach,$visitors\n" ;
450510 print OUT $line ;
451 - # print $line ;
 511+ print $line ;
452512 }
453513
454514 foreach $property (sort @properties)
@@ -478,14 +538,18 @@
479539 sub mmm_yyyy2yyyy_mm
480540 {
481541 my @months = @_ ;
 542+ my ($m) ;
482543 # Jan -> 01, etc
483 - foreach my $month (@months)
 544+ foreach $month (@months)
484545 {
485546 my ($mmm,$yyyy) = split ('-', $month) ;
486547 for ($m = 0 ; $m <= $#months_short ; $m++)
487548 {
488549 if ($mmm eq $months_short [$m])
489 - { $month = "$yyyy-" . sprintf ("%02d", $m+1) ; }
 550+ {
 551+ $month = "$yyyy-" . sprintf ("%02d", $m+1) ;
 552+ last ;
 553+ }
490554 }
491555 }
492556 return @months ;

Status & tagging log