r105444 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r105443‎ \| r105444 \| r105445 >
Date:	17:36, 7 December 2011
Author:	diederik
Status:	deferred
Tags:
Comment:	Moved reportcard import utils from trunk/wikistats/analytics to trunk/extensions/MetricsReporting
Modified paths:	/trunk/extensions/MetricsReporting/import/AnalyticsPrepBinariesData.pl (added) (history) /trunk/extensions/MetricsReporting/import/AnalyticsPrepComscoreData.pl (added) (history) /trunk/extensions/MetricsReporting/import/AnalyticsPrepLanguageNames.pl (added) (history) /trunk/extensions/MetricsReporting/import/AnalyticsPrepPageViews.pl (added) (history) /trunk/extensions/MetricsReporting/import/AnalyticsPrepWikiCountsOutput.pl (added) (history) /trunk/extensions/MetricsReporting/import/_readme.txt (added) (history) /trunk/extensions/MetricsReporting/import/analytics_create_and_load_from_csv.txt (added) (history) /trunk/extensions/MetricsReporting/import/analytics_generate_csv_files.sh (added) (history) /trunk/extensions/MetricsReporting/import/analytics_new.sh (added) (history) /trunk/extensions/MetricsReporting/import/analytics_refresh_from_csv.txt (added) (history) /trunk/extensions/MetricsReporting/import/analytics_upd.sh (added) (history)

Diff [purge]

Index: trunk/extensions/MetricsReporting/import/AnalyticsPrepBinariesData.pl
—	—	@@ -0,0 +1,124 @@
	2	+#!/usr/local/bin/perl
	3	+
	4	+ use Getopt::Std ;
	5	+
	6	+ &ParseArguments ;
	7	+
	8	+ print "Write file '$file_csv_out'\n" ;
	9	+ open CSV_OUT, '>', $file_csv_out ;
	10	+
	11	+ foreach $project (qw (wb wk wn wp wq ws wv wx))
	12	+ { &ReadStatisticsPerBinariesExtension ($project) ; }
	13	+
	14	+ close CSV_OUT ;
	15	+
	16	+ print "\n\nReady\n\n" ;
	17	+ exit ;
	18	+
	19	+sub ParseArguments
	20	+{
	21	+ my @options ;
	22	+ getopt ("io", \%options) ;
	23	+
	24	+ die ("Specify input folder as: -i path") if (! defined ($options {"i"})) ;
	25	+ die ("Specify output folder as: -o path'") if (! defined ($options {"o"})) ;
	26	+
	27	+ $path_in = $options {"i"} ;
	28	+ $path_out = $options {"o"} ;
	29	+
	30	+ die "Input folder '$path_in' does not exist" if (! -d $path_in) ;
	31	+ die "Output folder '$path_out' does not exist" if (! -d $path_out) ;
	32	+
	33	+ # tests only
	34	+ # $path_in = "C:/@ Wikimedia/# Out Bayes" ;
	35	+ # $path_out = "C:/analytics" ; # "w:/@ report card/data" ;
	36	+
	37	+ print "Input folder: $path_in\n" ;
	38	+ print "Output folder: $path_out\n" ;
	39	+ print "\n" ;
	40	+
	41	+ $file_csv_out = "$path_out/analytics_in_binaries.csv" ;
	42	+}
	43	+
	44	+
	45	+sub ReadStatisticsPerBinariesExtension
	46	+{
	47	+ my $project = shift ;
	48	+ my $file_csv_in = "$path_in/csv_$project/StatisticsPerBinariesExtension.csv" ;
	49	+ $yyyymm_hi = -1 ;
	50	+
	51	+ if (! -e $file_csv_in)
	52	+ { die "Input file '$file_csv_in' not found" ; }
	53	+
	54	+
	55	+ print "Read '$file_csv_in'\n" ;
	56	+ open CSV_IN, '<', $file_csv_in ;
	57	+
	58	+ $language_prev = '' ;
	59	+ while ($line = <CSV_IN>)
	60	+ {
	61	+ chomp $line ;
	62	+ next if $line !~ /,.*?,/ ;
	63	+
	64	+ ($language,$date,$data) = split (',', $line, 3) ;
	65	+
	66	+ # for each wiki first line shows ext names, no tcounts
	67	+ if ($date eq "00/0000")
	68	+ {
	69	+ if ($language_prev ne '')
	70	+ { &WriteMonthlyData ($project, $language_prev) ; }
	71	+ $language_prev = $language ;
	72	+
	73	+ undef %ext_name ;
	74	+ undef %ext_ndx ;
	75	+ undef %ext_cnt ;
	76	+ undef %months ;
	77	+
	78	+ @exts = split (',', $data) ;
	79	+ $ndx = 0 ;
	80	+ foreach $ext (@exts)
	81	+ {
	82	+ $ext_name {$ndx} = $ext ;
	83	+ $ext_ndx {$ext} = $ndx ;
	84	+ $ndx ++ ;
	85	+ }
	86	+ next ;
	87	+ }
	88	+
	89	+ ($month,$year) = split ('\/', $date) ;
	90	+ $yyyymm = sprintf ("%04d-%02d", $year, $month) ;
	91	+ if ($yyyymm gt $yyyymm_hi)
	92	+ { $yyyymm_hi = $yyyymm ; }
	93	+ $months {$yyyymm}++ ;
	94	+
	95	+ @counts = split (',', $data) ;
	96	+ $ndx = 0 ;
	97	+ foreach $count (@counts)
	98	+ {
	99	+ $ext_cnt {$yyyymm}{$ext_name {$ndx}} = $count ;
	100	+ $ndx ++ ;
	101	+ }
	102	+ }
	103	+ &WriteMonthlyData ($project, $language_prev) ;
	104	+
	105	+ close CSV_IN ;
	106	+}
	107	+
	108	+sub WriteMonthlyData
	109	+{
	110	+ my ($project,$language) = @_ ;
	111	+ # get sorted array of extensions, order by count for most recent month
	112	+ %ext_cnt_yyyymm_hi = %{$ext_cnt {$yyyymm_hi}} ;
	113	+ @ext_cnt_yyyymm_hi = (sort {$ext_cnt_yyyymm_hi {$b} <=> $ext_cnt_yyyymm_hi {$a}} keys %ext_cnt_yyyymm_hi) ;
	114	+
	115	+ foreach $month (sort keys %months)
	116	+ {
	117	+ $ndx = 0 ;
	118	+ foreach $ext (@ext_cnt_yyyymm_hi)
	119	+ {
	120	+ print CSV_OUT "$project,$language,$month,$ext,${ext_cnt{$yyyymm}{$ext_name {$ndx}}}\n" ;
	121	+ # print "$month,$ext,${ext_cnt{$yyyymm}{$ext_name {$ndx}}}\n" ;
	122	+ last if (++ $ndx > 25) ;
	123	+ }
	124	+ }
	125	+}
Property changes on: trunk/extensions/MetricsReporting/import/AnalyticsPrepBinariesData.pl
___________________________________________________________________
Added: svn:eol-style
1	126	+ native
Index: trunk/extensions/MetricsReporting/import/AnalyticsPrepLanguageNames.pl
—	—	@@ -0,0 +1,162 @@
	2	+#!/usr/bin/perl
	3	+
	4	+# This module prepares a csv file with language names for feed into the analytics database
	5	+# The main work of collecting these names from different sources is done by
	6	+# http://svn.wikimedia.org/viewvc/mediawiki/trunk/wikistats/dumps/WikiReportsLocalizations.pm
	7	+# which is part of wikistats reporting phase and once each month updates local master csv files from all sources
	8	+
	9	+# The following code to merge and filter these master csv files is based on parts of the code in WikiReportsLocalizations.pm, function Localization
	10	+# To do some day: make a completely independent script out of this code and code from WikiReportsLocalizations.pm which covers the whole production cycle
	11	+
	12	+# Sources for language names:
	13	+# - php files
	14	+# - translatewiki
	15	+# - English Wikipedia API (interwikilinks)
	16	+
	17	+# Right now multilingual support for the analytics database is just a nice idea, so to speed up data feeds, just keep English and German translations
	18	+
	19	+ use Getopt::Std ;
	20	+
	21	+ $true = 1 ;
	22	+ $false = 0 ;
	23	+
	24	+ $max_language_name = 50 ; # truncate if longer
	25	+
	26	+ $file_csv_language_names_php = "LanguageNamesViaPhp.csv" ;
	27	+ $file_csv_language_names_wp = "LanguageNamesViaWpEnEdited.csv" ;
	28	+ $file_csv_analytics_in = "analytics_in_language_names.csv" ;
	29	+
	30	+ $languages_force_case_uc = "ast\|br\|de\|en\|id\|nl\|wa" ; # incomplete list, based on languages supported by wikistats reporting
	31	+ $languages_filter = "de\|en" ;
	32	+ foreach $language (split '\\|', $languages_filter)
	33	+ { $include_target_language {$language} = $true ; }
	34	+
	35	+ &ParseArguments ;
	36	+ &ReadCsvFiles ;
	37	+ &WriteCsvFile ;
	38	+
	39	+
	40	+# if ($language eq "id") # swap which file takes precedence
	41	+
	42	+ print "\nReady\n\n" ;
	43	+ exit ;
	44	+
	45	+sub ParseArguments
	46	+{
	47	+ my (@options, $arguments) ;
	48	+
	49	+ getopt ("io", \%options) ;
	50	+
	51	+ foreach $arg (sort keys %options)
	52	+ { $arguments .= " -$arg " . $options {$arg} . "\n" ; }
	53	+ print ("\nArguments\n$arguments\n") ;
	54	+
	55	+ $options {"i"} = "w:/# out bayes/csv_wp" ; # EZ test
	56	+ $options {"o"} = "c:/MySQL/analytics" ; # EZ test
	57	+
	58	+ die ("Specify input folder for projectcounts files as: -i path") if (! defined ($options {"i"})) ;
	59	+ die ("Specify output folder as: -o path'") if (! defined ($options {"o"})) ;
	60	+
	61	+ ($path_in = $options {"i"}) =~ s/[\/\\]+$// ; # remove trailing (back)slash if any
	62	+ ($path_out = $options {"o"}) =~ s/[\/\\]+$// ; # remove trailing (back)slash if any
	63	+
	64	+ die "Input folder '$path_in' does not exist" if (! -d $path_in) ;
	65	+ die "Output folder '$path_out' does not exist" if (! -d $path_out) ;
	66	+
	67	+ print "Input folder: $path_in\n" ;
	68	+ print "Output folder: $path_out\n\n" ;
	69	+
	70	+ $file_csv_language_names_php = "$path_in/$file_csv_language_names_php" ;
	71	+ $file_csv_language_names_wp = "$path_in/$file_csv_language_names_wp" ;
	72	+ $file_csv_analytics_in = "$path_out/$file_csv_analytics_in" ;
	73	+
	74	+ die "Input file '$file_csv_language_names_php' not found" if (! -e $file_csv_language_names_php) ;
	75	+ die "Input file '$file_csv_language_names_wp' not found" if (! -e $file_csv_language_names_wp) ;
	76	+}
	77	+
	78	+sub ReadCsvFiles
	79	+{
	80	+ #first read definitions from php message files, then overwrite with definitions from interwiki links when available
	81	+ # except for target language 'id' (Indonesian) where quality of php file has been deemed more reliable
	82	+
	83	+ open CSV_IN, "<", $file_csv_language_names_php ;
	84	+ while ($line = <CSV_IN>)
	85	+ {
	86	+ chomp ($line) ;
	87	+ ($target_language, $code, $name_unicode, $name_html) = split (',', $line) ;
	88	+
	89	+ next if ! $include_target_language {$target_language} ;
	90	+
	91	+ $out_languages {$target_language} {$code} = &FormatName ($target_language, $name_unicode) ; # forget about html for analytics database
	92	+ }
	93	+ close CSV_IN ;
	94	+
	95	+ open CSV_IN, "<", $file_csv_language_names_wp ;
	96	+ while ($line = <CSV_IN>)
	97	+ {
	98	+ chomp ($line) ;
	99	+ ($target_language, $code, $name_unicode, $name_html) = split (',', $line) ;
	100	+
	101	+ next if ! $include_target_language {$target_language} ;
	102	+
	103	+ next if $target_language eq 'id' and $out_languages {$target_language} {$code} ne '' ;
	104	+
	105	+ # $name_unicode_php = $out_languages {$target_language} {$code} ; # test only
	106	+ # $name_unicode_wp = &FormatName ($target_language, $name_unicode) ; # test only
	107	+ # if (($name_unicode_php ne '') && ($name_unicode_php ne $name_unicode_wp)) # test only
	108	+ # { print "$name_unicode_php => $name_unicode_wp\n" ; } # test only
	109	+
	110	+ $out_languages {$target_language} {$code} = &FormatName ($target_language, $name_unicode) ; # forget about html for analytics database
	111	+ }
	112	+ close CSV_IN ;
	113	+}
	114	+
	115	+sub FormatName
	116	+{
	117	+ my ($target_language, $name_unicode) = @_ ;
	118	+
	119	+ $name_unicode2 = $name_unicode ;
	120	+
	121	+ if ($target_language eq "de")
	122	+ { $name_unicode =~ s/e?\s*\-?sprache//i ; }
	123	+
	124	+ if ($target_language =~ /^(?:$languages_force_case_uc)/)
	125	+ { $name_unicode = ucfirst $name_unicode ; }
	126	+ else
	127	+ { $name_unicode = lc $name_unicode ; }
	128	+
	129	+# Test only
	130	+# if (($target_language eq 'de') && ($name_unicode ne $name_unicode2))
	131	+# { print "$name_unicode2 => $name_unicode\n" ; }
	132	+
	133	+ return ($name_unicode) ;
	134	+}
	135	+
	136	+sub WriteCsvFile
	137	+{
	138	+ open CSV_OUT, ">", $file_csv_analytics_in \|\| die ("File '$file_csv_analytics_in' could not be opened") ;
	139	+ binmode CSV_OUT ; # force Unix style linebreak \012
	140	+
	141	+ foreach $target_language (sort keys %out_languages)
	142	+ {
	143	+ print "\nTarget language '$target_language'\n" ;
	144	+ %translations = %{$out_languages {$target_language}} ;
	145	+
	146	+ foreach $code (sort keys %translations)
	147	+ {
	148	+ $language_name = $translations{$code} ;
	149	+ $language_name2 = substr ($language_name,0,$max_language_name) ;
	150	+
	151	+ if ($language_name ne $language_name2)
	152	+ { print "Language name truncated to $max_language_name chars: '$language_name' -> '$language_name2'\n" ; }
	153	+
	154	+ if ($language_name2 =~ /,/)
	155	+ { $language_name2 = "\"$language_name2\"" ; }
	156	+ # test only
	157	+ print CSV_OUT "$target_language,$code,$language_name2\n" ;
	158	+ }
	159	+ }
	160	+
	161	+ close CSV_OUT ;
	162	+}
	163	+
Property changes on: trunk/extensions/MetricsReporting/import/AnalyticsPrepLanguageNames.pl
___________________________________________________________________
Added: svn:eol-style
1	164	+ native
Index: trunk/extensions/MetricsReporting/import/analytics_refresh_from_csv.txt
—	—	@@ -0,0 +1,55 @@
	2	+USE `analytics` ;
	3	+
	4	+TRUNCATE TABLE comscore ;
	5	+LOAD DATA LOCAL INFILE 'analytics_in_comscore.csv'
	6	+ INTO TABLE comscore
	7	+ FIELDS TERMINATED BY ','
	8	+ OPTIONALLY ENCLOSED BY '"'
	9	+ (@date,country_code,region_code,web_property,project_code,reach,visitors)
	10	+ SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
	11	+
	12	+TRUNCATE TABLE comscore_regions ;
	13	+LOAD DATA LOCAL INFILE 'analytics_in_comscore_regions.csv'
	14	+ INTO TABLE comscore_regions
	15	+ FIELDS TERMINATED BY ','
	16	+ OPTIONALLY ENCLOSED BY '"'
	17	+ (report_language,region_code,region_name) ;
	18	+
	19	+TRUNCATE TABLE wikistats ;
	20	+LOAD DATA LOCAL INFILE 'analytics_in_wikistats.csv'
	21	+ INTO TABLE wikistats
	22	+ FIELDS TERMINATED BY ','
	23	+ OPTIONALLY ENCLOSED BY '"'
	24	+ (project_code,language_code,@date,editors_all_time,editors_new,editors_ge_5,editors_ge_25,editors_ge_100,articles,articles_new_per_day,articles_over_bytes_500,articles_over_bytes_2000,edits_per_article,bytes_per_article,edits,size_in_bytes,size_in_words,links_internal,links_interwiki,links_image,links_external,redirects)
	25	+ SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
	26	+
	27	+TRUNCATE TABLE page_views ;
	28	+LOAD DATA LOCAL INFILE 'analytics_in_page_views.csv'
	29	+ INTO TABLE page_views
	30	+ FIELDS TERMINATED BY ','
	31	+ OPTIONALLY ENCLOSED BY '"'
	32	+ (project_code,language_code,@date,views_non_mobile_raw,views_mobile_raw,views_non_mobile_normalized,views_mobile_normalized,views_raw,views_normalized)
	33	+ SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
	34	+
	35	+TRUNCATE TABLE language_names ;
	36	+LOAD DATA LOCAL INFILE 'analytics_in_language_names.csv'
	37	+ INTO TABLE language_names
	38	+ FIELDS TERMINATED BY ','
	39	+ OPTIONALLY ENCLOSED BY '"'
	40	+ (report_language,language_code,language_name) ;
	41	+
	42	+TRUNCATE TABLE binaries ;
	43	+LOAD DATA LOCAL INFILE 'analytics_in_binaries.csv'
	44	+ INTO TABLE binaries
	45	+ FIELDS TERMINATED BY ','
	46	+ OPTIONALLY ENCLOSED BY '"'
	47	+ (project_code,language_code,@date,extension,binaries)
	48	+ SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
	49	+
	50	+TRUNCATE TABLE offline ;
	51	+LOAD DATA LOCAL INFILE 'analytics_in_offline.csv'
	52	+ INTO TABLE offline
	53	+ FIELDS TERMINATED BY ','
	54	+ OPTIONALLY ENCLOSED BY '"'
	55	+ (@date, readers)
	56	+ SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
Property changes on: trunk/extensions/MetricsReporting/import/analytics_refresh_from_csv.txt
___________________________________________________________________
Added: svn:eol-style
1	57	+ native
Index: trunk/extensions/MetricsReporting/import/AnalyticsPrepPageViews.pl
—	—	@@ -0,0 +1,11 @@
	2	+#!/usr/bin/perl
	3	+
	4	+# This file is just a place holder (alias)
	5	+# Page views per day/week/month/etc, per project, per language, nrmalized and not normalized are collected by
	6	+# http://svn.wikimedia.org/viewvc/mediawiki/trunk/wikistats/dumps/WikiCountsSummarizeProjectCounts.pl
	7	+#
	8	+# Added May 2001:
	9	+# For analytics database one file is written for all projects and languages combined,
	10	+# with per month, not normalized and normalized page view counts in one row.
	11	+# This file is written to same output folder as other csv files generated by WikiCountsSummarizeProjectCounts.pl
	12	+# and needs to be copied to analytics folder later
\ No newline at end of file
Property changes on: trunk/extensions/MetricsReporting/import/AnalyticsPrepPageViews.pl
___________________________________________________________________
Added: svn:eol-style
1	13	+ native
Index: trunk/extensions/MetricsReporting/import/analytics_create_and_load_from_csv.txt
—	—	@@ -0,0 +1,181 @@
	2	+-- make sure to delete output files test.csv first if any exist (MySQL on purpose forbids overwrite)
	3	+
	4	+-- tables implemented:
	5	+-- comscore
	6	+-- comscore_regions
	7	+-- wikistats
	8	+-- page_views
	9	+-- language names
	10	+-- binaries
	11	+
	12	+-- more tables planned (O= optional, not needed for report card stage)
	13	+-- project_names
	14	+-- O edits per project_code, per language, per month, per normalization type (Y/N), editor type (manual, anonymous, bot), namespace group (articles, talk pages, other)
	15	+-- O editors per project_code, per language, per month, per normalization type (Y/N), editor type (manual, anonymous, bot), namespace group (articles, talk pages, other)
	16	+
	17	+
	18	+-- open issues:
	19	+-- only store basic data in database and calculate all aggregates on the fly or do some aggregation before hand ? (e.g. count for mobile / non-mobile / ==> total of both ? <==)
	20	+-- for binaries, store one extension type per row? (future proof, more work to query), or a selected few as columns? (one row only needed per month)
	21	+
	22	+-- Create database and two tables from scratch
	23	+DROP DATABASE IF EXISTS `analytics` ;
	24	+
	25	+CREATE DATABASE `analytics` ;
	26	+
	27	+USE `analytics` ;
	28	+
	29	+CREATE TABLE `comscore` (
	30	+ `date` date NOT NULL,
	31	+ `country_code` varchar (3),
	32	+ `region_code` varchar (3),
	33	+ `web_property` varchar (20),
	34	+ `project_code` varchar (10),
	35	+ `reach` decimal (4,1) DEFAULT NULL,
	36	+ `visitors` decimal (15) DEFAULT NULL,
	37	+ PRIMARY KEY (date,country_code,region_code,project_code,web_property),
	38	+ KEY (`country_code`)
	39	+) ;
	40	+
	41	+CREATE TABLE `comscore_regions` (
	42	+ `region_code` varchar (2),
	43	+ `report_language` varchar (10),
	44	+ `region_name` varchar (18),
	45	+ PRIMARY KEY (region_code,report_language)
	46	+) ;
	47	+
	48	+CREATE TABLE `wikistats` (
	49	+ `date` date NOT NULL,
	50	+ `project_code` varchar (10),
	51	+ `language_code` varchar (15),
	52	+ `editors_all_time` int (10) DEFAULT NULL,
	53	+ `editors_new` int (7) DEFAULT NULL,
	54	+ `editors_ge_5` int (7) DEFAULT NULL,
	55	+ `editors_ge_25` int (7) DEFAULT NULL,
	56	+ `editors_ge_100` int (7) DEFAULT NULL,
	57	+ `articles` int (12) DEFAULT NULL,
	58	+ `articles_new_per_day` int (9) DEFAULT NULL,
	59	+ `articles_over_bytes_500` int (12) DEFAULT NULL,
	60	+ `articles_over_bytes_2000` int (12) DEFAULT NULL,
	61	+ `edits_per_article` decimal (9,1) DEFAULT NULL,
	62	+ `bytes_per_article` decimal (9,1) DEFAULT NULL,
	63	+ `edits` int (12) DEFAULT NULL,
	64	+ `size_in_bytes` int (15) DEFAULT NULL,
	65	+ `size_in_words` int (15) DEFAULT NULL,
	66	+ `links_internal` int (15) DEFAULT NULL,
	67	+ `links_interwiki` int (15) DEFAULT NULL,
	68	+ `links_image` int (15) DEFAULT NULL,
	69	+ `links_external` int (15) DEFAULT NULL,
	70	+ `redirects` int (15) DEFAULT NULL,
	71	+ PRIMARY KEY (date,project_code,language_code)
	72	+) ;
	73	+
	74	+CREATE TABLE `page_views` (
	75	+ `date` date NOT NULL,
	76	+ `project_code` char (2),
	77	+ `language_code` char (15),
	78	+ `views_non_mobile_raw` bigint (15),
	79	+ `views_mobile_raw` bigint (15),
	80	+ `views_non_mobile_normalized` bigint (15),
	81	+ `views_mobile_normalized` bigint (15),
	82	+ `views_raw` bigint (15),
	83	+ `views_normalized` bigint (15),
	84	+ PRIMARY KEY (date,project_code,language_code),
	85	+ KEY (date,language_code,project_code)
	86	+) ;
	87	+
	88	+CREATE TABLE `language_names` (
	89	+ `report_language` varchar (15),
	90	+ `language_code` varchar (15),
	91	+ `language_name` varchar (50),
	92	+ PRIMARY KEY (report_language,language_code)
	93	+) ;
	94	+
	95	+CREATE TABLE `binaries` (
	96	+ `date` date NOT NULL,
	97	+ `project_code` char (2),
	98	+ `language_code` char (15),
	99	+ `extension` varchar (10),
	100	+ `binaries` bigint (15),
	101	+ PRIMARY KEY (date,project_code,language_code,extension)
	102	+) ;
	103	+
	104	+CREATE TABLE `offline` (
	105	+ `date` date NOT NULL,
	106	+ `source` varchar (255),
	107	+ `readers` bigint (12),
	108	+ PRIMARY KEY (date,source)
	109	+) ;
	110	+
	111	+-- SHOW TABLES ;
	112	+-- DESCRIBE comscore ;
	113	+-- DESCRIBE comscore_regions ;
	114	+-- DESCRIBE wikistats ;
	115	+-- DESCRIBE page_views ;
	116	+-- DESCRIBE language_names ;
	117	+-- DESCRIBE binaries ;
	118	+-- DESCRIBE offline ;
	119	+
	120	+-- Database Manipulation
	121	+-- Obviously in real world this is a separate script
	122	+
	123	+LOAD DATA LOCAL INFILE 'analytics_in_comscore.csv'
	124	+ INTO TABLE comscore
	125	+ FIELDS TERMINATED BY ','
	126	+ OPTIONALLY ENCLOSED BY '"'
	127	+ (@date,country_code,region_code,web_property,project_code,reach,visitors)
	128	+ SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
	129	+
	130	+LOAD DATA LOCAL INFILE 'analytics_in_comscore_regions.csv'
	131	+ INTO TABLE comscore_regions
	132	+ FIELDS TERMINATED BY ','
	133	+ OPTIONALLY ENCLOSED BY '"'
	134	+ (report_language,region_code,region_name) ;
	135	+
	136	+LOAD DATA LOCAL INFILE 'analytics_in_wikistats.csv'
	137	+ INTO TABLE wikistats
	138	+ FIELDS TERMINATED BY ','
	139	+ OPTIONALLY ENCLOSED BY '"'
	140	+ (project_code,language_code,@date,editors_all_time,editors_new,editors_ge_5,editors_ge_25,editors_ge_100,articles,articles_new_per_day,articles_over_bytes_500,articles_over_bytes_2000,edits_per_article,bytes_per_article,edits,size_in_bytes,size_in_words,links_internal,links_interwiki,links_image,links_external,redirects)
	141	+ SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
	142	+
	143	+LOAD DATA LOCAL INFILE 'analytics_in_page_views.csv'
	144	+ INTO TABLE page_views
	145	+ FIELDS TERMINATED BY ','
	146	+ OPTIONALLY ENCLOSED BY '"'
	147	+ (project_code,language_code,@date,views_non_mobile_raw,views_mobile_raw,views_non_mobile_normalized,views_mobile_normalized,views_raw,views_normalized)
	148	+ SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
	149	+
	150	+
	151	+LOAD DATA LOCAL INFILE 'analytics_in_language_names.csv'
	152	+ INTO TABLE language_names
	153	+ FIELDS TERMINATED BY ','
	154	+ OPTIONALLY ENCLOSED BY '"'
	155	+ (report_language,language_code,language_name) ;
	156	+
	157	+LOAD DATA LOCAL INFILE 'analytics_in_binaries.csv'
	158	+ INTO TABLE binaries
	159	+ FIELDS TERMINATED BY ','
	160	+ OPTIONALLY ENCLOSED BY '"'
	161	+ (project_code,language_code,@date,extension,binaries)
	162	+ SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
	163	+
	164	+LOAD DATA LOCAL INFILE 'analytics_in_offline.csv'
	165	+ INTO TABLE offline
	166	+ FIELDS TERMINATED BY ','
	167	+ OPTIONALLY ENCLOSED BY '"'
	168	+ (@date,readers)
	169	+ SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
	170	+
	171	+
	172	+-- show contents (debugging only)
	173	+-- SELECT * FROM comscore ;
	174	+-- SELECT * FROM comscore_regions ;
	175	+-- SELECT * FROM wikistats ;
	176	+-- SELECT * FROM page_views ;
	177	+-- SELECT * FROM language_names ;
	178	+-- SELECT * FROM binaries
	179	+-- WHERE project_code = 'commons' ;
	180	+ SELECT * FROM offline ;
	181	+
	182	+
Property changes on: trunk/extensions/MetricsReporting/import/analytics_create_and_load_from_csv.txt
___________________________________________________________________
Added: svn:eol-style
1	183	+ native
Index: trunk/extensions/MetricsReporting/import/analytics_generate_csv_files.sh
—	—	@@ -0,0 +1,47 @@
	2	+#!/bin/sh
	3	+
	4	+# Prepare several csv files, ready for importing into analytics database
	5	+# All generated files have _in_ in name signalling these contain data ready for importing into database
	6	+# One input record corresponds to one database record
	7	+
	8	+ulimit -v 8000000
	9	+
	10	+clear
	11	+cd /a/analytics
	12	+
	13	+
	14	+# AnalyticsPrepBinariesData.pl read counts for binaries which were generated by wikistats
	15	+# and which reside in /a/wikistats/csv_[project code]/StatisticsPerBinariesExtension.csv
	16	+# It filters and reorganizes data and produces analytics_in_binaries.csv
	17	+# Output csv contains: project code, language, month, extension name, count
	18	+
	19	+perl AnalyticsPrepBinariesData.pl -i /a/wikistats/ -o /a/analytics/
	20	+
	21	+# AnalyticsPrepComscoreData.pl scans /a/analytics/comscore for newest comScore csv files (with data for last 14 months)
	22	+# parses those csv files, adds/replaces data from these csv files into master files (containing full history)
	23	+# and generates input csv file analytics_in_comscore.csv ready for importing into database
	24	+#
	25	+# note : these csv files were manually downloaded from http://mymetrix.comscore.com/app/report.aspx
	26	+# and given more descriptive names, script finds newest files based on partial name search
	27	+#
	28	+# -r replace (default is add only)
	29	+# -i input folder, contains manually downloaded csv files from comScore (or xls files manually converted to csv)
	30	+# -m master files with full history
	31	+# -o output csv file, with reach per region, UV's per region and UV's per top web property, ready for import into database
	32	+
	33	+perl AnalyticsPrepComscoreData.pl -r -i /a/analytics/comscore -m /a/analytics -o /a/analytics
	34	+
	35	+# AnalyticsPrepWikiCountsOutput.pl reads a plethora of fields from several csv files from wikistats process
	36	+# It filters and reorganizes data and produces analytics_in_wikistats.csv, ready for import into analytics database
	37	+
	38	+perl AnalyticsPrepWikiCountsOutput.pl -i /a/wikistats/ -o /a/analytics
	39	+
	40	+# analytics_in_page_views.csv is written daily as part of WikiCountsSummarizeProjectCounts.pl
	41	+# part of (/home/ezachte/pageviews_monthly.sh job)
	42	+# which processes hourly projectcounts files (per wiki page view totals for one hour) from http://dammit.lt/wikistats
	43	+# and generates several files on different aggregation levels
	44	+# only action here is to copy data to this folder to have everything in one place
	45	+# note: unlike folder name suggests this file contains stats for all projects
	46	+
	47	+cp /a/wikistats/csv_wp/analytics_in_page_views.csv .
	48	+
Property changes on: trunk/extensions/MetricsReporting/import/analytics_generate_csv_files.sh
___________________________________________________________________
Added: svn:eol-style
1	49	+ native
Index: trunk/extensions/MetricsReporting/import/analytics_upd.sh
—	—	@@ -0,0 +1,6 @@
	2	+clear
	3	+cd /a/analytics
	4	+
	5	+./analytics_generate_csv_files.sh
	6	+
	7	+mysql -u analytics -h project2.wikimedia.org -preport < analytics_refresh_from_csv.txt
Property changes on: trunk/extensions/MetricsReporting/import/analytics_upd.sh
___________________________________________________________________
Added: svn:eol-style
1	8	+ native
Index: trunk/extensions/MetricsReporting/import/AnalyticsPrepWikiCountsOutput.pl
—	—	@@ -0,0 +1,334 @@
	2	+#!/usr/local/bin/perl
	3	+
	4	+# Copyright (C) 2011 Wikimedia Foundation
	5	+# This program is free software; you can redistribute it and/or
	6	+# modify it under the terms of the GNU General Public License version 2
	7	+# as published by the Free Software Foundation.
	8	+# This program is distributed in the hope that it will be useful,
	9	+# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
	11	+# See the GNU General Public License for more details, at
	12	+# http://www.fsf.org/licenses/gpl.html
	13	+
	14	+# Author:
	15	+# Erik Zachte, email ezachte@wikimedia.org
	16	+# loosely based on predecessor
	17	+# http://svn.wikimedia.org/viewvc/mediawiki/trunk/wikistats/reportcard/ReportCardExtractWikiCountsOutput.pl
	18	+
	19	+# Functionality:
	20	+# tba
	21	+
	22	+# Parameters:
	23	+# tba
	24	+
	25	+# Output:
	26	+# updated csv file for import in MySQL
	27	+
	28	+# http://svn.wikimedia.org/viewvc/mediawiki/trunk/wikistats/analytics/
	29	+
	30	+ use Getopt::Std ;
	31	+
	32	+ $true = 1 ;
	33	+ $false = 0 ;
	34	+
	35	+ @projects = ('wb','wk','wn','wp','wq','ws','wv','wx','commons','*') ;
	36	+
	37	+ $file_csv_monthly_data = "StatisticsMonthly.csv" ;
	38	+ $file_csv_user_activity_spread = "StatisticsUserActivitySpread.csv" ;
	39	+ $file_csv_analytics_in = "analytics_in_wikistats.csv" ;
	40	+
	41	+ &ParseArguments ;
	42	+ &ReadStatisticsMonthly ;
	43	+ &FindLargestWikis ;
	44	+ &WriteMonthlyData ;
	45	+
	46	+ print "\nReady\n\n" ;
	47	+ exit ;
	48	+
	49	+sub ParseArguments
	50	+{
	51	+ print "ParseArguments\n" ;
	52	+ my (@options, $arguments) ;
	53	+
	54	+ getopt ("io", \%options) ;
	55	+
	56	+ foreach $arg (sort keys %options)
	57	+ { $arguments .= " -$arg " . $options {$arg} . "\n" ; }
	58	+ print ("\nArguments\n$arguments\n") ;
	59	+
	60	+ if (! -d '/mnt/') # EZ test
	61	+ {
	62	+ $path_in = "c:/\@ wikimedia/# out bayes" ;
	63	+ $path_out = "c:/MySQL/analytics" ;
	64	+ }
	65	+ else
	66	+ {
	67	+ die ("Specify input folder for projectcounts files as: -i path") if (! defined ($options {"i"})) ;
	68	+ die ("Specify output folder as: -o path'") if (! defined ($options {"o"})) ;
	69	+
	70	+ $path_in = $options {"i"} ;
	71	+ $path_out = $options {"o"} ;
	72	+ }
	73	+
	74	+ die "Input folder '$path_in' does not exist" if (! -d $path_in) ;
	75	+ die "Output folder '$path_out' does not exist" if (! -d $path_out) ;
	76	+
	77	+ print "Input folder: $path_in\n" ;
	78	+ print "Output folder: $path_out\n\n" ;
	79	+
	80	+ $file_csv_out = "$path_out/analytics_in_wikistats.csv" ;
	81	+}
	82	+
	83	+sub ReadStatisticsMonthly
	84	+{
	85	+ print "ReadStatisticsMonthly\n" ;
	86	+ &ReadStatisticsMonthlyForProject ("wb") ;
	87	+ &ReadStatisticsMonthlyForProject ("wk") ;
	88	+ &ReadStatisticsMonthlyForProject ("wn") ;
	89	+ &ReadStatisticsMonthlyForProject ("wp") ;
	90	+ &ReadStatisticsMonthlyForProject ("wq") ;
	91	+ &ReadStatisticsMonthlyForProject ("ws") ;
	92	+ &ReadStatisticsMonthlyForProject ("wv") ;
	93	+ &ReadStatisticsMonthlyForProject ("wx") ;
	94	+
	95	+# &ReadStatisticsPerBinariesExtensionCommons ;
	96	+}
	97	+
	98	+sub ReadStatisticsMonthlyForProject
	99	+{
	100	+ my $project = shift;
	101	+ $all_projects = "*" ;
	102	+
	103	+ my $file_csv_in_1 = "$path_in/csv_$project/$file_csv_monthly_data" ;
	104	+ my $file_csv_in_2 = "$path_in/csv_$project/$file_csv_user_activity_spread" ;
	105	+
	106	+ if (! -e $file_csv_in_1)
	107	+ { &Abort ("Input file '$file_csv_in_1' not found") ; }
	108	+ if (! -e $file_csv_in_2)
	109	+ { &Abort ("Input file '$file_csv_in_2' not found") ; }
	110	+
	111	+ my $yyyymm ;
	112	+
	113	+ print "Read '$file_csv_in_1'\n" ;
	114	+ open CSV_IN, '<', $file_csv_in_1 ;
	115	+ while ($line = <CSV_IN>)
	116	+ {
	117	+ chomp $line ;
	118	+ ($language,$date,$counts) = split (',', $line, 3) ;
	119	+ @fields = split (',', $counts) ;
	120	+
	121	+ next if ! &AcceptWiki ($project,$language) ;
	122	+
	123	+ ($month,$day,$year) = split ('\/', $date) ;
	124	+ $yyyymm = sprintf ("%04d-%02d", $year, $month) ;
	125	+
	126	+ foreach $field (@fields)
	127	+ {
	128	+ if ($field eq '-')
	129	+ { $field = 0 ; }
	130	+ }
	131	+
	132	+ $data = $fields [0] . ',' . # contributors all time
	133	+ $fields [1] . ',' . # new contributors
	134	+ 'data2,' . # place holder for more data, to be inserted later
	135	+ $fields [4] . ',' . # articles
	136	+ $fields [6] . ',' . # articles new per day
	137	+ $fields [9] . ',' . # larger than 0.5 kB
	138	+ $fields [10] . ',' . # larger than 2.0 kB
	139	+ $fields [7] . ',' . # mean edits per article
	140	+ $fields [8] . ',' . # mean bytes per article
	141	+ $fields [11] . ',' . # edits
	142	+ $fields [12] . ',' . # size in bytes
	143	+ $fields [13] . ',' . # size in words
	144	+ $fields [14] . ',' . # links internal
	145	+ $fields [15] . ',' . # links interwiki
	146	+ $fields [16] . ',' . # links images
	147	+ $fields [17] . ',' . # links external
	148	+ $fields [18] ; # redirects
	149	+
	150	+ $data1 {"$project,$language,$yyyymm"} = $data ;
	151	+ }
	152	+ close CSV_IN ;
	153	+
	154	+ # now read (very) active editors from newer more accurate file (split data for reg users and bots, unlike StatisticsMonthly.csv)
	155	+
	156	+ print "Read '$file_csv_in_2'\n" ;
	157	+ open CSV_IN, '<', $file_csv_in_2 ;
	158	+ while ($line = <CSV_IN>)
	159	+ {
	160	+ chomp $line ;
	161	+ ($language,$date,$reguser_bot,$group,@counts) = split (',', $line) ;
	162	+
	163	+ next if ! &AcceptWiki ($project,$language) ;
	164	+
	165	+ if ($reguser_bot ne "R") { next ; } # R: reg user, B: bot
	166	+ if ($group ne "A") { next ; } # A: articles, T: talk pages, O: other namespaces
	167	+
	168	+ ($month,$day,$year) = split ('\/', $date) ;
	169	+ $yyyymm = sprintf ("%04d-%02d", $year, $month) ;
	170	+ $months {$yyyymm} ++ ;
	171	+# print "YYYYMM $yyyymm\n" ;
	172	+
	173	+ # data have been collected in WikiCountsProcess.pm and been written in WikiCountsOutput.pm
	174	+ # count user with over x edits
	175	+ # threshold starting with a 3 are 10xSQRT(10), 100xSQRT(10), 1000xSQRT(10), etc
	176	+ # @thresholds = (1,3,5,10,25,32,50,100,250,316,500,1000,2500,3162,5000,10000,25000,31623,50000,100000,250000,316228,500000,1000000,2500000,3162278,500000,10000000,25000000,31622777,5000000,100000000) ;
	177	+ $edits_ge_5 = @counts [2] > 0 ? @counts [2] : 0 ;
	178	+ $edits_ge_25 = @counts [4] > 0 ? @counts [4] : 0 ;
	179	+ $edits_ge_100 = @counts [7] > 0 ? @counts [7] : 0 ;
	180	+ $data2 {"$project,$language,$yyyymm"} = "$edits_ge_5,$edits_ge_25,$edits_ge_100" ;
	181	+
	182	+ $total_edits_ge_5 {"$project,$language"} += $edits_ge_5 ;
	183	+ $total_edits_ge_25 {"$project,$language"} += $edits_ge_25 ;
	184	+ $total_edits_ge_100 {"$project,$language"} += $edits_ge_100 ;
	185	+
	186	+ # prep string with right amount of comma's
	187	+ if ($data2_default eq '')
	188	+ {
	189	+ $data2_default = $data2 {"$project,$language,$yyyymm"} ;
	190	+ $data2_default =~ s/[^,]+/0/g ;
	191	+ }
	192	+ }
	193	+ close CSV_IN ;
	194	+}
	195	+
	196	+#sub ReadStatisticsPerBinariesExtensionCommons
	197	+#{
	198	+# my $file_csv_in = "$path_in/csv_wx/StatisticsPerBinariesExtension.csv" ;
	199	+# my $mmax = -1 ;
	200	+
	201	+# if (! -e $file_csv_in)
	202	+# { &Abort ("Input file '$file_csv_in' not found") ; }
	203	+
	204	+# print "Read '$file_csv_in'\n" ;
	205	+# open CSV_IN, '<', $file_csv_in ;
	206	+# while ($line = <CSV_IN>)
	207	+# {
	208	+# chomp $line ;
	209	+# ($language,$date,$counts) = split (',', $line, 3) ;
	210	+
	211	+# if ($language ne "commons") { next ; }
	212	+
	213	+# if ($date eq "00/0000")
	214	+# {
	215	+# @fields = split (',', $counts) ;
	216	+# $field_ndx = 0 ;
	217	+# foreach $field (@fields)
	218	+# {
	219	+# $ext_cnt {-1}{$field_ndx} = $field ;
	220	+# # print "EXT_CNT $field_ndx : $field\n" ;
	221	+# $field_ndx ++ ;
	222	+# }
	223	+# next ;
	224	+# }
	225	+
	226	+# ($month,$year) = split ('\/', $date) ;
	227	+# my $m = &months_since_2000_01 ($year,$month) ;
	228	+# next if $m < $m_start ;
	229	+
	230	+# if ($m > $mmax)
	231	+# { $mmax = $m ; }
	232	+
	233	+# @fields = split (',', $counts) ;
	234	+# $field_ndx = 0 ;
	235	+# foreach $field (@fields)
	236	+# {
	237	+# $ext_cnt {$m}{$field_ndx} = $field ;
	238	+# $ext_tot {$m} += $field ;
	239	+# $field_ndx ++ ;
	240	+# }
	241	+# }
	242	+# close CSV_IN ;
	243	+
	244	+# %ext_cnt_mmax = %{$ext_cnt {$mmax}} ;
	245	+# @ext_cnt_mmax = (sort {$ext_cnt_mmax {$b} <=> $ext_cnt_mmax {$a}} keys %ext_cnt_mmax) ;
	246	+
	247	+# $extcnt = 0 ;
	248	+# foreach $extndx (@ext_cnt_mmax)
	249	+# {
	250	+# # print "$extndx < ${ext_cnt {-1}{$extndx}} > : ${ext_cnt_mmax {$extndx}}\n" ;
	251	+# push @extndxs, $extndx ;
	252	+# if ($extcnt++ >= 9) { last ; }
	253	+# }
	254	+#}
	255	+
	256	+sub FindLargestWikis
	257	+{
	258	+ print "FindLargestWikis\n" ;
	259	+ print "Largest projects (most accumulated very active editors):\n";
	260	+ @total_edits_ge_100 = sort {$total_edits_ge_100 {$b} <=> $total_edits_ge_100 {$a}} keys %total_edits_ge_100 ;
	261	+ $rank = 0 ;
	262	+ foreach $project_language (@total_edits_ge_100)
	263	+ {
	264	+ $largest_projects {$project_language} = $rank++ ;
	265	+ print "$project_language," ;
	266	+ last if $rank > 10 ;
	267	+ }
	268	+ print "\n\n" ;
	269	+
	270	+ foreach $yyyymm (sort keys %months)
	271	+ {
	272	+ next if $yyyymm lt '2011' ;
	273	+ foreach $project_language (keys %largest_projects)
	274	+ {
	275	+ ($project,$language) = split (',', $project_language) ;
	276	+ if ($data2 {"$project,$language,$yyyymm"} eq '')
	277	+ {
	278	+ print "No data yet for large wiki $project_language for $yyyymm-> skip month $yyyymm\n" ;
	279	+ $months {$yyyymm} = 0 ;
	280	+ }
	281	+ }
	282	+ }
	283	+}
	284	+
	285	+sub WriteMonthlyData
	286	+{
	287	+ print "WriteMonthlyData\n" ;
	288	+ my $file_csv_out = "$path_out/$file_csv_analytics_in" ;
	289	+ open CSV_OUT, '>', $file_csv_out ;
	290	+ foreach $project_wiki_month (sort keys %data1)
	291	+ {
	292	+ ($project,$wiki,$yyyymm) = split (',', $project_wiki_month) ;
	293	+
	294	+ # recent month misses on eor more large wikis?
	295	+ next if $months {$yyyymm} == 0 ;
	296	+
	297	+ $data1 = $data1 {$project_wiki_month} ;
	298	+ $data2 = $data2 {$project_wiki_month} ;
	299	+ if ($data2 eq '')
	300	+ {
	301	+ print "Editor data missing for $project_wiki_month\n" ;
	302	+ $data2 = $data2_default ;
	303	+ }
	304	+ $data1 =~ s/data2/$data2/ ; # insert rather than append to have all editor fields close together
	305	+ print CSV_OUT "$project_wiki_month,$data1\n" ;
	306	+ }
	307	+ $total_edits_ge_5 {"$project,*,$yyyymm"} += $edits_ge_5 ;
	308	+ $total_edits_ge_25 {"$project,*,$yyyymm"} += $edits_ge_25 ;
	309	+ $total_edits_ge_100 {"$project,*,$yyyymm"} += $edits_ge_100 ;
	310	+ close CSV_OUT ;
	311	+}
	312	+
	313	+sub AcceptWiki
	314	+{
	315	+ my ($project,$language) = @_ ;
	316	+
	317	+ return $false if $language eq 'commons' and $project ne 'wx' ; # commons also in wikipedia csv files (bug, hard to cleanup, just skip)
	318	+ return $false if $language eq 'sr' and $project eq 'wn' ; # ignore insane bot spam on
	319	+ return $false if $language =~ /mania\|team\|comcom\|closed\|chair\|langcom\|office\|searchcom\|sep11\|nostalgia\|stats\|test/i ;
	320	+
	321	+ return $false if $language =~ /^(?:dk\|tlh\|ru_sib)$/ ; # dk=dumps exist(ed?) but site not, tlh=Klignon, ru-sib=Siberian
	322	+ return $false if $project eq 'wk' and ($language eq "als" or $language eq "tlh") ;
	323	+
	324	+ return $true ;
	325	+}
	326	+
	327	+sub Abort
	328	+{
	329	+ my $msg = shift ;
	330	+ print "$msg\nExecution aborted." ;
	331	+ # to do: log also to file
	332	+ exit ;
	333	+}
	334	+
	335	+
Property changes on: trunk/extensions/MetricsReporting/import/AnalyticsPrepWikiCountsOutput.pl
___________________________________________________________________
Added: svn:eol-style
1	336	+ native
Index: trunk/extensions/MetricsReporting/import/analytics_new.sh
—	—	@@ -0,0 +1,5 @@
	2	+clear
	3	+cd /a/analytics
	4	+# rm test.csv
	5	+# mysql --user=root --password=changerootwhenpriyankareturns < analytics_create_and_load_from_csv.txt > mysql_log.txt
	6	+ mysql -u analytics -h project2.wikimedia.org -preport < analytics_create_and_load_from_csv.txt > mysql_log.txt
Property changes on: trunk/extensions/MetricsReporting/import/analytics_new.sh
___________________________________________________________________
Added: svn:eol-style
1	7	+ native
Index: trunk/extensions/MetricsReporting/import/_readme.txt
—	—	@@ -0,0 +1,21 @@
	2	+This folder contain bash and perl files to create and fill database 'analytics', a.o. for monthly dashboard.
	3	+
	4	+== analytics_new.sh ==
	5	+Defines the database and tables and loads data from existing csv files.
	6	+It executes SQL from analytics_create_and_load_from_csv.txt
	7	+
	8	+== analytics_upd.sh ==
	9	+Prepares new csv files (delegated to analytics_generate_csv_files.sh),
	10	+and empties/reloads all tables for which csv files are in this folder.
	11	+It executes SQL from analytics_refresh_from_csv.txt
	12	+
	13	+== CSV files ==
	14	+CSV files and where they are generated:
	15	+
	16	+analytics_in_binaries.csv <- AnalyticsPrepBinariesData.pl
	17	+analytics_in_comscore.csv <- AnalyticsPrepComscoreData.pl
	18	+analytics_in_comscore_regions.csv <- manual
	19	+analytics_in_language_names.csv
	20	+analytics_in_offline.csv <- manual
	21	+analytics_in_page_views.csv <- /home/ezachte/wikistats/pageviews_monthly.sh (file copy after that)
	22	+analytics_in_wikistats.csv <- AnalyticsPrepWikiCountsOutput.pl
Property changes on: trunk/extensions/MetricsReporting/import/_readme.txt
___________________________________________________________________
Added: svn:eol-style
1	23	+ native
Index: trunk/extensions/MetricsReporting/import/AnalyticsPrepComscoreData.pl
—	—	@@ -0,0 +1,564 @@
	2	+#!/usr/bin/perl
	3	+
	4	+# Copyright (C) 2011 Wikimedia Foundation
	5	+# This program is free software; you can redistribute it and/or
	6	+# modify it under the terms of the GNU General Public License version 2
	7	+# as published by the Free Software Foundation.
	8	+# This program is distributed in the hope that it will be useful,
	9	+# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
	11	+# See the GNU General Public License for more details, at
	12	+# http://www.fsf.org/licenses/gpl.html
	13	+
	14	+# Author:
	15	+# Erik Zachte, email ezachte@wikimedia.org
	16	+
	17	+# Functionality:
	18	+# comScore data can be downloaded as csv file, which each contain 14 months history
	19	+# This script uses these files to update 'master' csv files which contain all known history
	20	+# Note: only entities which are already in master file will be updated!
	21	+# Then it merges these master files into one csv file which can be loaded into analytics database
	22	+# Data are: reach by region, unique visitors by region, unique visitors by web property
	23	+
	24	+# Parameters:
	25	+# -m (required) folder with 'master' csv files (files with all known history)
	26	+# -u (required) folder with 'update' csv files (files with lastest 14 months history, produced by comScore)
	27	+
	28	+# Output:
	29	+# updated master csv files + merged and formatted csv for import in MySQL
	30	+
	31	+# http://svn.wikimedia.org/viewvc/mediawiki/trunk/wikistats/analytics/
	32	+
	33	+ use Getopt::Std ;
	34	+ use Cwd;
	35	+
	36	+ my $options ;
	37	+ getopt ("imo", \%options) ;
	38	+
	39	+ $true = 1 ;
	40	+ $false = 0 ;
	41	+
	42	+ $script_name = "AnalyticsPrepComscoreData.pl" ;
	43	+ $script_version = "0.31" ;
	44	+
	45	+# EZ test only
	46	+# $source = "comscore" ;
	47	+# $server = "ez_test" ;
	48	+# $generated = "2011-05-06 00:00:00" ;
	49	+# $user = "ezachte" ;
	50	+
	51	+ $dir_in = $options {"i"} ;
	52	+ $dir_upd = $options {"m"} ;
	53	+ $dir_out = $options {"o"} ;
	54	+ $mode = 'add';
	55	+ if (defined $options {'r'})
	56	+ { $mode = 'replace'; }
	57	+
	58	+ print "Mode is $mode (specify '-r' for replace)\n\n";
	59	+
	60	+ if (! -d "/home/") # EZ test machine
	61	+ {
	62	+ $dir_in = "C:/@ Wikimedia/@ Report Card/Data" ;
	63	+ $dir_upd = "C:/MySQL/analytics" ;
	64	+ $dir_out = "C:/MySQL/analytics" ;
	65	+ $mode = 'replace' ;
	66	+ }
	67	+
	68	+ if ($dir_in eq '')
	69	+ { Abort ("Specify folder for input file (new comScore data) '-i folder'") ; }
	70	+ if ($dir_upd eq '')
	71	+ { Abort ("Specify folder for master files (full history) as '-m folder'") ; }
	72	+ if ($dir_out eq '')
	73	+ { Abort ("Specify folder for output file '-o folder'") ; }
	74	+
	75	+ $file_comscore_reach_master = "history_comscore_reach_regions.csv" ;
	76	+ $file_comscore_reach_update = "reachbyregioncsv" ;
	77	+ $file_comscore_uv_region_master = "history_comscore_UV_regions.csv" ;
	78	+ $file_comscore_uv_region_update = "UVsbyregioncsv" ;
	79	+ $file_comscore_uv_property_master = "history_comscore_UV_properties.csv" ;
	80	+ $file_comscore_uv_property_update = "UVtrend*csv" ;
	81	+
	82	+ $layout_csv_reach = 1 ;
	83	+ $layout_csv_regions = 2 ;
	84	+ $layout_csv_properties = 3 ;
	85	+
	86	+ print "Directories:\n" .
	87	+ "Input (new comScore data): '$dir_in'\n".
	88	+ "Master files (full history): '$dir_upd'\n" .
	89	+ "Output (database feed): '$dir_out'\n\n" ;
	90	+
	91	+ %region_codes = (
	92	+ "Europe"=>"EU",
	93	+ "North America"=>"NA",
	94	+ "Latin America"=>"LA",
	95	+ "World-Wide" => "W",
	96	+ "Middle East - Africa" => "MA",
	97	+ "Asia Pacific"=> "AS",
	98	+ "United States" => "US",
	99	+ "India" => "I",
	100	+ "China" => "C"
	101	+ ) ;
	102	+
	103	+ foreach $region_name (keys %region_codes)
	104	+ { $region_names {$region_codes {$region_name}} = $region_name ; }
	105	+
	106	+ @months_short = qw "Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec" ;
	107	+
	108	+ &ReadMasterComscoreDataReachPerRegion ($file_comscore_reach_master, $file_comscore_reach_update, "%.1f", 1, $layout_csv_reach) ;
	109	+ %reach_region_code = %data ;
	110	+
	111	+ &ReadMasterComscoreDataVisitorsPerRegion ($file_comscore_uv_region_master, $file_comscore_uv_region_update, "%.0f", 1000, $layout_csv_regions) ;
	112	+ %visitors_region_code = %data ;
	113	+
	114	+ &ReadMasterComscoreDataVisitorsPerProperty ($file_comscore_uv_property_master, $file_comscore_uv_property_update, "%.0f", 1000, $layout_csv_properties) ;
	115	+ %visitors_web_property = %data ;
	116	+
	117	+ &WriteDataAnalytics ;
	118	+
	119	+ print "\nReady\n\n" ;
	120	+ exit ;
	121	+
	122	+sub ReadMasterComscoreDataReachPerRegion
	123	+{
	124	+ my ($file_comscore_master, $file_comscore_updates, $precision, $layout_csv) = @_ ;
	125	+
	126	+ print "ReadMasterComscoreDataReachPerRegion\n\n" ;
	127	+
	128	+ undef %months ;
	129	+ undef %data ;
	130	+ undef @regions ;
	131	+
	132	+ open IN, '<', "$dir_upd/$file_comscore_master" ;
	133	+
	134	+ $lines = 0 ;
	135	+ while ($line = <IN>)
	136	+ {
	137	+ chomp $line ;
	138	+ $line =~ s/\r//g ;
	139	+
	140	+ ($yyyymm,@data) = split (',', $line) ;
	141	+
	142	+ if ($lines++ == 0)
	143	+ {
	144	+ @regions = @data ;
	145	+ print "Regions found: " . (join ',', @regions) . "\n";
	146	+ next ;
	147	+ }
	148	+
	149	+ $field_ndx = 0 ;
	150	+ foreach (@data)
	151	+ {
	152	+ $region = $regions [$field_ndx] ;
	153	+ $region_code = $region_codes {$region} ;
	154	+
	155	+ $data = $data [$field_ndx] ;
	156	+ if ($data eq '')
	157	+ { $data = '0' ; }
	158	+ $months {$yyyymm} ++ ;
	159	+ $data {"$yyyymm,$region_code"} = $data ;
	160	+ # print "Old data $yyyymm,$region_code = $data\n" ;
	161	+ $field_ndx++ ;
	162	+ }
	163	+ }
	164	+ close IN ;
	165	+
	166	+ my $updates_found = &UpdateMasterFileFromRecentComscoreData ($file_comscore_master, $file_comscore_updates, 1, $layout_csv, @regions) ;
	167	+ return if ! $updates_found ;
	168	+
	169	+ rename "$dir_upd/$file_comscore_master", "$dir_upd/$file_comscore_master.~" ;
	170	+ open OUT, '>', "$dir_upd/$file_comscore_master" ;
	171	+
	172	+ $line_out = "yyyymm" ;
	173	+ foreach $region_name (@regions)
	174	+ { $line_out .= ",$region_name" ; }
	175	+ print OUT "$line_out" ;
	176	+
	177	+ foreach $yyyymm (sort {$b cmp $a} keys %months)
	178	+ {
	179	+ $line_out = "\n$yyyymm" ;
	180	+ foreach $region_name (@regions)
	181	+ {
	182	+ $yyyymm_region_code = $yyyymm . ',' . $region_codes {$region_name} ;
	183	+ $line_out .= "," . sprintf ($precision, $data {$yyyymm_region_code}) ;
	184	+ }
	185	+ print OUT "$line_out" ;
	186	+ }
	187	+
	188	+ close OUT ;
	189	+}
	190	+
	191	+sub ReadMasterComscoreDataVisitorsPerRegion
	192	+{
	193	+ my ($file_comscore_master, $file_comscore_updates, $precision, $multiplier, $layout_csv) = @_ ;
	194	+
	195	+ print "ReadMasterComscoreDataVisitorsPerRegion\n\n";
	196	+
	197	+ undef %months ;
	198	+ undef %data ;
	199	+ undef @regions ;
	200	+
	201	+ open IN, '<', "$dir_upd/$file_comscore_master" ;
	202	+
	203	+ $lines = 0 ;
	204	+ $metric = 'unique_visitors' ;
	205	+ while ($line = <IN>)
	206	+ {
	207	+ chomp $line ;
	208	+ $line =~ s/\r//g ;
	209	+ $line = &GetNumberOnly ($line) ;
	210	+
	211	+ next if $line !~ /(?:yyyymm\|\d\d\d\d-\d\d)/ ;
	212	+
	213	+ ($yyyymm,@data) = split (',', $line) ;
	214	+
	215	+ if ($lines++ == 0)
	216	+ {
	217	+ @regions = @data ;
	218	+ print "Regions found: " . (join ',', @regions) . "\n";
	219	+ next ;
	220	+ }
	221	+
	222	+ $field_ndx = 0 ;
	223	+ foreach (@data)
	224	+ {
	225	+ $region = $regions [$field_ndx] ;
	226	+ $region_code = $region_codes {$region} ;
	227	+
	228	+ $data = $data [$field_ndx] ;
	229	+ if ($data eq '')
	230	+ { $data = '0' ; }
	231	+
	232	+ # print "Old data $yyyymm,$region = $data\n" ;
	233	+
	234	+ $months {$yyyymm} ++ ;
	235	+ $data {"$yyyymm,$region_code"} = $data ;
	236	+
	237	+ $field_ndx++ ;
	238	+ }
	239	+ }
	240	+ close IN ;
	241	+
	242	+ my $updates_found = &UpdateMasterFileFromRecentComscoreData ($file_comscore_master, $file_comscore_updates, 1000, $layout_csv, @regions) ;
	243	+ return if ! $updates_found ;
	244	+
	245	+ rename "$dir_upd/$file_comscore_master", "$dir_upd/$file_comscore_master.~" ;
	246	+ open OUT, '>', "$dir_upd/$file_comscore_master" ;
	247	+
	248	+ $line_out = "yyyymm" ;
	249	+ foreach $region_name (@regions)
	250	+ { $line_out .= ",$region_name" ; }
	251	+ print OUT "$line_out" ;
	252	+
	253	+ foreach $yyyymm (sort {$b cmp $a} keys %months)
	254	+ {
	255	+ $line_out = "\n$yyyymm" ;
	256	+ foreach $region_name (@regions)
	257	+ {
	258	+ $yyyymm_region_code = $yyyymm . ',' . $region_codes {$region_name} ;
	259	+ $line_out .= "," . sprintf ($precision, $data {$yyyymm_region_code}) ;
	260	+ }
	261	+ print OUT "$line_out" ;
	262	+ }
	263	+
	264	+ close OUT ;
	265	+}
	266	+
	267	+sub ReadMasterComscoreDataVisitorsPerProperty
	268	+{
	269	+ my ($file_comscore_master, $file_comscore_updates, $precision, $multiplier, $layout_csv) = @_ ;
	270	+
	271	+ print "ReadMasterComscoreDataVisitorsPerProperty\n\n";
	272	+
	273	+ undef %months ;
	274	+ undef %data ;
	275	+ undef @properties ;
	276	+
	277	+ open IN, '<', "$dir_upd/$file_comscore_master" ;
	278	+
	279	+ $lines = 0 ;
	280	+ $metric = 'unique_visitors' ;
	281	+ while ($line = <IN>)
	282	+ {
	283	+ chomp $line ;
	284	+ $line =~ s/\r//g ;
	285	+
	286	+ ($yyyymm,@data) = split (',', $line) ;
	287	+ if ($lines++ == 0)
	288	+ { @properties = @data ; next ; }
	289	+
	290	+ $field_ndx = 0 ;
	291	+ foreach (@data)
	292	+ {
	293	+ $property = $properties [$field_ndx] ;
	294	+ $property =~ s/.Yahoo./Yahoo/ ;
	295	+ $data = $data [$field_ndx] ;
	296	+ if ($data eq '')
	297	+ { $data = '0' ; }
	298	+
	299	+ # print "Old data $yyyymm,$property = $data\n" ;
	300	+
	301	+ $months {$yyyymm} ++ ;
	302	+ $data {"$yyyymm,$property"} = $data ;
	303	+
	304	+ $field_ndx++ ;
	305	+ }
	306	+ }
	307	+ close IN ;
	308	+
	309	+ my $updates_found = &UpdateMasterFileFromRecentComscoreData ($file_comscore_master, $file_comscore_updates, 1000, $layout_csv, @properties) ;
	310	+ return if ! $updates_found ;
	311	+
	312	+ rename "$dir_upd/$file_comscore_master", "$dir_upd/$file_comscore_master.~" ;
	313	+ open OUT, '>', "$dir_upd/$file_comscore_master" ;
	314	+
	315	+ $line_out = "yyyymm" ;
	316	+ foreach $property (@properties)
	317	+ { $line_out .= ",$property" ; }
	318	+ print OUT "$line_out" ;
	319	+
	320	+ foreach $yyyymm (sort {$b cmp $a} keys %months)
	321	+ {
	322	+ $line_out = "\n$yyyymm" ;
	323	+ foreach $property (@properties)
	324	+ {
	325	+ $yyyymm_property = "$yyyymm,$property" ;
	326	+ $line_out .= "," . sprintf ($precision, $data {$yyyymm_property}) ;
	327	+ }
	328	+ print OUT "$line_out" ;
	329	+ }
	330	+
	331	+ close OUT ;
	332	+}
	333	+
	334	+sub UpdateMasterFileFromRecentComscoreData
	335	+{
	336	+ my ($file_comscore_master, $file_comscore_updates, $multiplier, $layout_csv, @white_list) = @_ ;
	337	+
	338	+ print "UpdateMasterFileFromRecentComscoreData\n\n";
	339	+
	340	+ undef %white_list ;
	341	+ undef %not_white_listed ;
	342	+
	343	+ print "White list: ". (join (',', @white_list)) . "\n\n";
	344	+
	345	+ foreach $id (@white_list)
	346	+ { $white_list {$id} = $true ; }
	347	+
	348	+ if (! -e "$dir_upd/$file_comscore_master")
	349	+ { Abort ("File $file_comscore_master not found!") ; }
	350	+
	351	+ $age_all = -M "$dir_upd/$file_comscore_master" ;
	352	+ print "Latest comscore master file is " . sprintf ("%.0f", $age_all) . " days old: '$file_comscore_master'\n" ;
	353	+
	354	+ my $cwd = getcwd ;
	355	+ chdir $dir_in ;
	356	+
	357	+ @files = glob($file_comscore_updates) ;
	358	+ $min_age_upd = 999999 ;
	359	+ $file_comscore_updates_latest = '' ;
	360	+ foreach $file (@files)
	361	+ {
	362	+ $age = -M $file ;
	363	+ if ($age < $min_age_upd)
	364	+ {
	365	+ $min_age_upd = $age ;
	366	+ $file_comscore_updates_latest = $file ;
	367	+ }
	368	+ }
	369	+ print "Latest comscore update file is " . sprintf ("%.0f", $min_age_upd) . " days old: '$file_comscore_updates_latest'\n" ;
	370	+
	371	+ if ($min_age_upd == 999999)
	372	+ {
	373	+ print "No valid update file found. Nothing to update." ;
	374	+ return ;
	375	+ }
	376	+
	377	+ #if ($age_all > $min_age_upd)
	378	+ #{
	379	+ # print "File with master data more recent than latest update csv from comScore. Nothing to update." ;
	380	+ # return ;
	381	+ #}
	382	+
	383	+ my $updates_found = $false ;
	384	+
	385	+ open CSV, '<', $file_comscore_updates_latest ;
	386	+ binmode CSV ;
	387	+ while ($line = <CSV>)
	388	+ {
	389	+ chomp $line ;
	390	+ $line =~ s/\r//g ;
	391	+ $line = &GetNumberOnly ($line) ;
	392	+
	393	+ if ($line =~ /Jan-\d\d\d\d.*?Feb-\d\d\d\d/) # e.g. 'Location,Location,Jan-2010,Feb-2010,Mar-2010,Apr-2010,...'
	394	+ {
	395	+ if ($layout_csv == $layout_csv_properties)
	396	+ { ($dummy1,$dummy2,$dummy3,@months) = split (',', $line) ; } # web properties csv file
	397	+ else
	398	+ { ($dummy1,$dummy2,@months) = split (',', $line) ; } # uv / reach csv files
	399	+
	400	+ @months = &mmm_yyyy2yyyy_mm (@months) ;
	401	+ }
	402	+
	403	+ if (($line =~ /^\d+,/) \|\| ($line =~ /,,.*?Total Internet/))
	404	+ {
	405	+ if ($layout_csv == $layout_csv_properties)
	406	+ {
	407	+ ($index,$dummy,$property,@data) = split (',', $line) ;
	408	+ $property =~ s/^\s+// ;
	409	+ $property =~ s/\s+$// ;
	410	+
	411	+ $property =~ s/.Total Internet./Total Internet/i ;
	412	+ $property =~ s/.Google./Google/i ;
	413	+ $property =~ s/.Microsoft./Microsoft/i ;
	414	+ $property =~ s/.FACEBOOK./Facebook/i ;
	415	+ $property =~ s/.Yahoo./Yahoo/i ;
	416	+ $property =~ s/.Amazon./Amazon/i ;
	417	+ $property =~ s/.Apple./Apple/i ;
	418	+ $property =~ s/.AOL./AOL/i ;
	419	+ $property =~ s/.Wikimedia./Wikimedia/i ;
	420	+ $property =~ s/.Tencent./Tencent/i ;
	421	+ $property =~ s/.Baidu./Baidu/i ;
	422	+ $property =~ s/.CBS./CBS/i ;
	423	+
	424	+ if (! $white_list {$property})
	425	+ {
	426	+ $not_white_listed {$property}++ ;
	427	+ next ;
	428	+ }
	429	+
	430	+ $id = $property ;
	431	+ }
	432	+ else
	433	+ {
	434	+ ($index,$region,@data) = split (',', $line) ;
	435	+ $region =~ s/^\s+// ;
	436	+ $region =~ s/\s+$// ;
	437	+
	438	+ if (! $white_list {$region})
	439	+ {
	440	+ $not_white_listed {$region}++ ;
	441	+ next ;
	442	+ }
	443	+
	444	+ $id = $region_codes {$region} ;
	445	+ }
	446	+
	447	+ for ($m = 0 ; $m <= $#months ; $m++)
	448	+ {
	449	+ $yyyymm = $months [$m] ;
	450	+ $months {$yyyymm} ++ ;
	451	+ $yyyymm_id = "$yyyymm,$id" ;
	452	+ $data = $data [$m] * $multiplier ;
	453	+
	454	+ if ($mode eq 'add')
	455	+ {
	456	+ if (! defined $data {$yyyymm_id})
	457	+ {
	458	+ $updates_found = $true ;
	459	+ print "New data found: $yyyymm_id = $data\n" ;
	460	+ $data {$yyyymm_id} = $data ;
	461	+ }
	462	+ }
	463	+ else
	464	+ {
	465	+ $updates_found = $true ;
	466	+ print "Data found: $yyyymm_id = $data\n" ;
	467	+ $data {$yyyymm_id} = $data ;
	468	+ }
	469	+ }
	470	+ }
	471	+ }
	472	+
	473	+ $entities_not_white_listed = join (', ', sort keys %not_white_listed) ;
	474	+ if ($entities_not_white_listed ne '')
	475	+ { print "\nEntities ignored:\n$entities_not_white_listed\n\n" ; }
	476	+
	477	+ if (! $updates_found)
	478	+ { print "No new updates found\n" ; }
	479	+ else
	480	+ { print "\nUpdates found, rewrite master file '$file_comscore_master'\n\n" ; }
	481	+
	482	+ return ($updates_found) ;
	483	+}
	484	+
	485	+sub WriteDataAnalytics
	486	+{
	487	+ print "WriteDataAnalytics\n\n";
	488	+
	489	+ open OUT, '>', "$dir_out/analytics_in_comscore.csv" ;
	490	+
	491	+ $metric = 'unique_visitors' ;
	492	+ foreach $yyyymm (sort keys %months)
	493	+ {
	494	+ # store meta data elsewhere
	495	+ # $line = "$generated,$source,$server,$script_name,$script_version,$user,$yyyymm,$country_code,$region_code,$property,$project,$normalized,$metric,$data\n" ;
	496	+ foreach $region_code (sort values %region_codes)
	497	+ {
	498	+ $country_code = '-' ;
	499	+ $property = '-' ;
	500	+ $project = '-' ;
	501	+ $reach = $reach_region_code {"$yyyymm,$region_code"} ;
	502	+ $visitors = $visitors_region_code {"$yyyymm,$region_code"} ;
	503	+
	504	+ if (! defined $reach) { $reach = -1 ; }
	505	+ if (! defined $visitors) { $visitors = -1 ; }
	506	+
	507	+ next if $reach == -1 and $visitors == -1 ;
	508	+
	509	+ $line = "$yyyymm,$country_code,$region_code,$property,$project,$reach,$visitors\n" ;
	510	+ print OUT $line ;
	511	+ print $line ;
	512	+ }
	513	+
	514	+ foreach $property (sort @properties)
	515	+ {
	516	+ $country_code = '-' ;
	517	+ $region_code = '-' ;
	518	+ $project = '-' ;
	519	+ $reach = '-1' ;
	520	+ $visitors = $visitors_web_property {"$yyyymm,$property"} ;
	521	+
	522	+ next if ! defined $visitors ;
	523	+
	524	+ $line = "$yyyymm,$country_code,$region_code,$property,$project,$reach,$visitors\n" ;
	525	+ print OUT $line ;
	526	+ # print $line ;
	527	+ }
	528	+ }
	529	+}
	530	+
	531	+sub GetNumberOnly
	532	+{
	533	+ my $line = shift ;
	534	+ $line =~ s/("[^\"]+")/($a=$1,$a=~s#,##g,$a)/ge ; # nested regexp: remove comma's inside double quotes
	535	+ $line =~ s/"//g ;
	536	+ return $line ;
	537	+}
	538	+
	539	+sub mmm_yyyy2yyyy_mm
	540	+{
	541	+ my @months = @_ ;
	542	+ my ($m) ;
	543	+ # Jan -> 01, etc
	544	+ foreach $month (@months)
	545	+ {
	546	+ my ($mmm,$yyyy) = split ('-', $month) ;
	547	+ for ($m = 0 ; $m <= $#months_short ; $m++)
	548	+ {
	549	+ if ($mmm eq $months_short [$m])
	550	+ {
	551	+ $month = "$yyyy-" . sprintf ("%02d", $m+1) ;
	552	+ last ;
	553	+ }
	554	+ }
	555	+ }
	556	+ return @months ;
	557	+}
	558	+
	559	+sub Abort
	560	+{
	561	+ $msg = shift ;
	562	+
	563	+ print "\nAbort, reason: $msg\n\n" ;
	564	+ exit ;
	565	+}
Property changes on: trunk/extensions/MetricsReporting/import/AnalyticsPrepComscoreData.pl
___________________________________________________________________
Added: svn:eol-style
1	566	+ native

Status & tagging log

17:59, 7 December 2011 Reedy (talk | contribs) changed the status of r105444 [removed: new added: deferred]