r82606 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r82605‎ \| r82606 \| r82607 >
Date:	15:17, 22 February 2011
Author:	ezachte
Status:	deferred
Tags:
Comment:	Process squid logs for traffic reports http://stats.wikimedia.org/#requests
Modified paths:	/trunk/wikistats/squids (added) (history) /trunk/wikistats/squids/SquidCountArchive.pl (added) (history) /trunk/wikistats/squids/SquidCountArchive.sh (added) (history) /trunk/wikistats/squids/SquidCountryScan.pl (added) (history) /trunk/wikistats/squids/SquidCountryScan.sh (added) (history) /trunk/wikistats/squids/SquidReportArchive.pl (added) (history) /trunk/wikistats/squids/SquidReportArchive.sh (added) (history)

Diff [purge]

Index: trunk/wikistats/squids/SquidCountryScan.sh
—	—	@@ -0,0 +1,6 @@
	2	+#!/bin/bash
	3	+
	4	+ulimit -v 4000000
	5	+
	6	+# perl ./SquidCountryScan.pl -y 2010
	7	+perl ./SquidCountryScan.pl # start in July 2009
Property changes on: trunk/wikistats/squids/SquidCountryScan.sh
___________________________________________________________________
Added: svn:eol-style
1	8	+ native
Index: trunk/wikistats/squids/SquidCountryScan.pl
—	—	@@ -0,0 +1,488 @@
	2	+#!/usr/bin/perl
	3	+## Collect page views stats by country on Locke
	4	+## sub CollectRawData -> SquidDataCountries.csv
	5	+## sub ProcessRawData <- SquidDataCountries.csv -> ??
	6	+
	7	+ use lib "/home/ezachte/lib" ;
	8	+ use EzLib ;
	9	+ $trace_on_exit = $true ;
	10	+
	11	+ use Time::Local ;
	12	+ use Getopt::Std ;
	13	+ use Cwd;
	14	+ $timestart = time ;
	15	+
	16	+ my %options ;
	17	+ getopt ("y", \%options) ;
	18	+ $process_year = $options {"y"} ;
	19	+ if (($process_year !~ /^\d\d\d\d$/) \|\| ($process_year < 2009))
	20	+ {
	21	+ $process_year = 2009 ;
	22	+ # print "Specify year as '-y nnnn'\n\n" ;
	23	+ # exit ;
	24	+ }
	25	+
	26	+ $path_root = "/a/ezachte/" ;
	27	+# $path_root = "w:/! perl/squids/archive/" ;
	28	+
	29	+ $file_raw_data_monthly_visits = "$path_root/SquidDataVisitsPerCountryMonthly.csv" ;
	30	+ $file_raw_data_daily_visits = "$path_root/SquidDataVisitsPerCountryDaily.csv" ;
	31	+ $file_per_country_visits = "public/SquidDataCountriesViews.csv" ;
	32	+ $file_per_country_visits_old = "SquidDataCountries2.csv" ;
	33	+
	34	+ $file_raw_data_monthly_saves = "$path_root/SquidDataSavesPerCountryMonthly.csv" ;
	35	+ $file_raw_data_daily_saves = "$path_root/SquidDataSavesPerCountryDaily.csv" ;
	36	+ $file_per_country_saves = "public/SquidDataCountriesSaves.csv" ;
	37	+ $file_per_country_saves_old = "SquidDataCountriesSaves.csv" ;
	38	+
	39	+ &CollectRawData ('visits', $file_per_country_visits, $file_per_country_visits_old, $file_raw_data_monthly_visits, $file_raw_data_daily_visits) ;
	40	+ &CollectRawData ('saves', $file_per_country_saves, $file_per_country_saves_old, $file_raw_data_monthly_saves, $file_raw_data_daily_saves) ;
	41	+# &ProcessRawData ;
	42	+
	43	+ exit ;
	44	+
	45	+sub CollectRawData
	46	+{
	47	+ my ($mode, $file_per_country, $file_per_country_old, $file_raw_data_monthly, $file_raw_data_daily) = @_ ;
	48	+ my ($visits_wp_total, $visits_total_wp_en) ;
	49	+ my (%visits_monthly, %visits_daily, %visits_wp_yyyymm, %visits_per_project, %visits_per_language, %visits_per_country, %visits_wp_b, %visits_wp_u, %correct_for_missing_days) ;
	50	+
	51	+ print "Collect raw data for $mode\n\n" ;
	52	+ print "Input data per country $file_per_country, $file_per_country_old\n" ;
	53	+ print "Raw data monthly $file_raw_data_monthly\n" ;
	54	+ print "Raw data daily $file_raw_data_daily\n\n" ;
	55	+
	56	+ $year = $process_year ;
	57	+ if ($year == 2009)
	58	+ { $month = 7 ; }
	59	+ else
	60	+ { $month = 1 ; }
	61	+
	62	+ while ($true)
	63	+ {
	64	+ $dir = "$path_root/" . sprintf ("%04d-%02d", $year, $month) ;
	65	+ $yyyymm = sprintf ("%04d-%02d", $year, $month) ;
	66	+ if (-d $dir)
	67	+ {
	68	+ print "Dir: $dir\n" ;
	69	+ $days_in_month = &DaysInMonth ($year,$month) ;
	70	+
	71	+ $days_found = 0 ;
	72	+ for ($day = 1 ; $day <= $days_in_month ; $day++)
	73	+ {
	74	+ if (($month == 4) && ($year == 2009) && ($day < 18)) { next ; }
	75	+
	76	+ $yyyymmdd = sprintf ("%04d-%02d-%02d", $year, $month, $day) ;
	77	+
	78	+ # do not combine with SquidDataCountries.csv from earlier months
	79	+ # only from 2009-07 anonymous bots (hits > 1 in sampled log) were ignored
	80	+ $file = "$dir/" . sprintf ("%04d-%02d-%02d", $year, $month, $day) . "/$file_per_country_old" ;
	81	+ # print "READ1 $file\n" ;
	82	+ if (! -e $file)
	83	+ {
	84	+ $file = "$dir/" . sprintf ("%04d-%02d-%02d", $year, $month, $day) . "/$file_per_country" ;
	85	+ # print "READ2 $file\n" ;
	86	+ }
	87	+
	88	+ if (-e $file)
	89	+ {
	90	+ $days_found++ ;
	91	+ # print "File: $file\n" ;
	92	+ open IN, '<', $file ;
	93	+ while ($line = <IN>)
	94	+ {
	95	+ if ($line =~ /^#/) { next ; }
	96	+
	97	+ chomp $line ;
	98	+ ($bot,$wiki,$country,$count) = split (',', $line) ;
	99	+
	100	+ if ($bot =~ /Y/)
	101	+ { $bot = 'B' ; }
	102	+ else
	103	+ { $bot = 'U' ; }
	104	+
	105	+ ($project,$language) = split (':', $wiki) ;
	106	+ $project =~ s/\s//g ;
	107	+
	108	+ # if ($project ne "wp") { next ; }
	109	+ # if ($yyyymm ne "2009-11") { next ; }
	110	+ # if ($language eq "www") { next ; }
	111	+
	112	+ $visits_monthly {"$yyyymm,$project,$language,$country,$bot"} += $count ;
	113	+ $visits_daily {"$yyyymmdd,$project,$language,$country,$bot"} += $count ;
	114	+
	115	+ # following hashes for specific research, not for regular csv files
	116	+ if (($project eq "wp") && ($bot eq 'U') && ($country ne "--"))
	117	+ {
	118	+ $visits_wp_yyyymm {$yyyymm} += $count ;
	119	+ $visits_wp_total += $count ;
	120	+ }
	121	+
	122	+ if (($project eq "wp") && ($language eq "en") && ($bot eq 'U') && ($country ne "--"))
	123	+ {
	124	+ $visits_total_wp_en += $count ;
	125	+ $visits_wp_en {$country} += $count ;
	126	+ }
	127	+
	128	+ if (($bot eq 'U') && ($country ne "--"))
	129	+ {
	130	+ $visits_per_project {$project} += $count ;
	131	+ $visits_per_language {$language} += $count ;
	132	+ $visits_per_country {$country} += $count ;
	133	+ }
	134	+
	135	+ $visits_total += $count ;
	136	+
	137	+ if (($project eq "wp") && ($language =~ /^(?:th\|sk)$/))
	138	+ {
	139	+ if ($bot eq 'U')
	140	+ { $visits_wp_u {"$language $yyyymm"} += $count ; }
	141	+ else
	142	+ { $visits_wp_b {"$language $yyyymm"} += $count ; }
	143	+ }
	144	+ }
	145	+ close IN ;
	146	+ }
	147	+ else
	148	+ { print "Miss! $file\n" ; }
	149	+ }
	150	+ $correct_for_missing_days {$yyyymm} = 1 ;
	151	+ if (($days_found > 0) && ($days_in_month > $days_found))
	152	+ {
	153	+ $correct_for_missing_days {$yyyymm} = $days_in_month / $days_found ;
	154	+ print "Correct for $yyyymm: $days_found -> $days_in_month = * ${correct_for_missing_days {$yyyymm}}\n" ;
	155	+ }
	156	+ }
	157	+ else
	158	+ {
	159	+ print "Folder $dir not found. Processing complete.\n" ;
	160	+ last ;
	161	+ }
	162	+
	163	+ $month++ ;
	164	+ if ($month > 12)
	165	+ {
	166	+ $month =1 ;
	167	+ $year ++ ;
	168	+ # last ;
	169	+ }
	170	+ }
	171	+
	172	+ print "\nVisits per project:\n" ;
	173	+ foreach $key (sort {$visits_per_project {$b} <=> $visits_per_project {$a} } keys %visits_per_project)
	174	+ {
	175	+ print sprintf ("%9d", $visits_per_project {$key}) . " " .sprintf ("%5.2f", 100 * $visits_per_project {$key}/$visits_total) . "% $key\n" ;
	176	+ }
	177	+
	178	+ print "\n\n" ;
	179	+
	180	+ print "\nVisits per country:\n" ;
	181	+ foreach $key (sort {$visits_per_country {$a} <=> $visits_per_country {$b}} keys %visits_per_country)
	182	+ {
	183	+ print sprintf ("%9d", $visits_per_country {$key}) . " " .sprintf ("%6.3f", 100 * $visits_per_country {$key}/$visits_total) . "% $key\n" ;
	184	+ }
	185	+
	186	+ print "\nWikipedia visits per country:\n" ;
	187	+ foreach $key (sort {$visits_wp_u {$b} cmp $visits_wp_u {$a}} keys %visits_wp_u)
	188	+ {
	189	+ print sprintf ("%9.1f", ($visits_wp_u {$key} + $visits_wp_b {$key}) /1000) . " - " . sprintf ("%9.1f", $visits_wp_u {$key} /1000) . " - " . sprintf ("%9.1f", $visits_wp_b {$key} /1000) . " $key\n" ; # / 1000 on 1:1000 sampled file is millions
	190	+ }
	191	+
	192	+ print "\nVisits per language:\n" ;
	193	+ foreach $key (sort {$visits_per_language {$a} <=> $visits_per_language {$b}} keys %visits_per_language)
	194	+ {
	195	+ print sprintf ("%9d", $visits_per_language {$key}) . " " .sprintf ("%6.3f", 100 * $visits_per_language {$key}/$visits_total) . "% $key\n" ;
	196	+ }
	197	+
	198	+ print "\nVisits to English Wikipedia\n" ;
	199	+ foreach $key (sort {$visits_wp_en {$a} <=> $visits_wp_en {$b}} keys %visits_wp_en)
	200	+ {
	201	+ print sprintf ("%9d", $visits_wp_en {$key}) . " " .sprintf ("%6.3f", 100 * $visits_wp_en {$key}/$visits_total_wp_en) . "% $key\n" ;
	202	+ }
	203	+
	204	+ print "\n\n" ;
	205	+
	206	+ print "\n\n" ;
	207	+
	208	+# foreach $key (sort keys %visits)
	209	+# {
	210	+# if ($key !~ /wq/) { next ; }
	211	+# print sprintf ("%5d", $visits {$key}) . " $key\n" ;
	212	+# }
	213	+
	214	+ open CSV_MONTHLY, '>', $file_raw_data_monthly ;
	215	+ foreach $key (sort keys %visits_monthly)
	216	+ {
	217	+ ($yyyymm, $project, $language, $country) = split (',', $key) ;
	218	+ $correction = $correct_for_missing_days {$yyyymm} ;
	219	+ $count = $visits_monthly{$key} ;
	220	+ $count2 = $count ;
	221	+ if (($correction != 0) && ($correction != 1))
	222	+ {
	223	+ $count2 = $count ;
	224	+ $count = sprintf ("%.0f", $count * $correction) ;
	225	+ # print "$yyyymm: $count2 -> $count (=* $correction)\n" ;
	226	+ }
	227	+ print CSV_MONTHLY "$key,$count\n" ;
	228	+ }
	229	+ close CSV_MONTHLY ;
	230	+
	231	+ # note correct for missing days in follow processing, see monthly data above
	232	+ open CSV_DAILY, '>', $file_raw_data_daily ;
	233	+ foreach $key (sort keys %visits_daily)
	234	+ { print CSV_DAILY "$key,${visits_daily{$key}}\n" ; }
	235	+ close CSV_DAILY ;
	236	+
	237	+ foreach $yyyymm (sort keys %visits_wp_yyyymm)
	238	+ {
	239	+ $total = $visits_wp_yyyymm {$yyyymm} ;
	240	+ $correction = $correct_for_missing_days {$yyyymm} ;
	241	+ $total_corrected = $total * $correction ;
	242	+ $total_corrected_share = int (100 * $total_corrected / $visits_wp_total) ;
	243	+ print "$yyyymm: $total * $correction = $total_corrected / $visits_wp_total = $total_corrected_share\%\n" ;
	244	+ }
	245	+}
	246	+
	247	+sub ProcessRawData
	248	+{
	249	+ print "\nProcessRawData\n\n" ;
	250	+
	251	+ open IN, '<', $file_raw_data ;
	252	+ open OUT, '>', $file_csv_counts_daily_project ;
	253	+
	254	+ $date_prev = "" ;
	255	+
	256	+ while ($line = <IN>)
	257	+ {
	258	+ $lines++ ;
	259	+ chomp ($line) ;
	260	+ # ($date,$bot,$from,$to,$php,$status,$mime,$action,$agent,$count) = split (',', $line) ;
	261	+ ($date,$bot,$from,$to,$status,$mime,$action,$count) = split (',', $line) ;
	262	+
	263	+# if ($to !~ /wk:lt/) { next ; }
	264	+
	265	+ if ($bot =~ /^#/) { next ; } # fix, should be removed in CollectRawData
	266	+
	267	+ # if ($php ne "php(index.php)") { $lines_unexpected_php {$php}++ ; next ; }
	268	+
	269	+ $action2 = $action ;
	270	+ $action2 =~ s/\&.*$// ;
	271	+ $counts_per_action {"$action2"} += $count ;
	272	+
	273	+ $action =~ s/\&/&/g ;
	274	+
	275	+ if ($action =~ /submitlogin/)
	276	+ { next ; }
	277	+
	278	+ if (($action !~ /^action=edit\&/) && ($action !~ /^action=submit\&/) )
	279	+ {
	280	+ $invalid_actions ++ ;
	281	+ next ;
	282	+ }
	283	+
	284	+ if ($mime ne "text/html")
	285	+ {
	286	+ $mime_not_text_html {$mime} ++ ;
	287	+ next ;
	288	+ }
	289	+
	290	+ if (! ((($action =~ /action=edit/) && ($status =~ /200/)) \|\|
	291	+ (($action =~ /action=submit/) && ($status =~ /302/))))
	292	+ { next ; }
	293	+
	294	+ $counts_per_relevant_action_and_status1 {"$action2"} += $count ;
	295	+
	296	+ $counts_per_bot_relevant_action_and_status2 {"$bot,$action2,$status"} += $count ;
	297	+
	298	+ if ($action !~ /redlink/)
	299	+ {
	300	+ $counts_per_relevant_action_and_status_no_redlink {"$action2,$status"} += $count ;
	301	+
	302	+ $counts_per_bot_relevant_action_and_status_no_redlink {"$bot,$status,$action2"} += $count ;
	303	+
	304	+ if ($bot =~ /N/)
	305	+ {
	306	+ # print "$to,$action2,$count\n" ;
	307	+ $counts_no_bot_per_relevant_action_and_status_no_redlink {"$to,$action2"} += $count ;
	308	+ $counts_no_bot_no_redlink_per_destination {$to} += $count ;
	309	+ }
	310	+ }
	311	+
	312	+ if (($action =~ /redlink/) && ($status =~ /(?:200\|302)/))
	313	+ {
	314	+ $counts_per_relevant_status_with_redlink {"$to,action=edit,redlink=..,$status"} += $count ;
	315	+ $counts_per_destination {$to} += $count ;
	316	+ }
	317	+
	318	+ if ($action =~ /redlink/)
	319	+ { next ; }
	320	+
	321	+ if (($to !~ /wp:(?:en\|de\|ja\|es\|fr\|ru\|zh)$/) && ($to !~ /wk:(?:lt)$/) && ($to !~ /wx:(?:mw)$/))
	322	+ { next ; }
	323	+
	324	+ if ($bot !~ /N/)
	325	+ { next ; }
	326	+
	327	+ $counts {"$date,$to,$action2"} += $count ;
	328	+ $dates {$date}++ ;
	329	+ $tos {$to}++ ;
	330	+
	331	+ if ($bot eq "bot=Y")
	332	+ {
	333	+ if ($action =~ /action=edit/)
	334	+ {$ bots_edits += $count ; }
	335	+ elsif ($action =~ /action=submit/)
	336	+ { $bots_saves += $count ; }
	337	+ }
	338	+ else
	339	+ {
	340	+ if ($action =~ /action=edit/)
	341	+ {$user_edits += $count ; }
	342	+ elsif ($action =~ /action=submit/)
	343	+ { $user_saves += $count ; }
	344	+ }
	345	+ }
	346	+
	347	+
	348	+ print OUT "date," ;
	349	+ foreach $to (sort keys %tos)
	350	+ { print OUT "edits $to,saves $to,ratio $to," ; }
	351	+ print OUT "\n" ;
	352	+
	353	+ foreach $date (sort keys %dates)
	354	+ {
	355	+ # print "DAY $date\n" ;
	356	+ $csv_date = "\"=DATE(" . substr ($date,0,4) . "," . substr ($date,4,2) . "," . substr ($date,6,2) . ")\"" ;
	357	+
	358	+ print OUT "$csv_date, " ;
	359	+
	360	+ foreach $to (sort keys %tos)
	361	+ {
	362	+ # print "TO $to\n" ;
	363	+
	364	+ $edits = $counts {"$date,$to,action=edit"} ;
	365	+ $submits = $counts {"$date,$to,action=submit"} ;
	366	+ $ratio = -1 ;
	367	+ if ($submits > 0)
	368	+ { $ratio = sprintf ("%.1f", $edits/$submits) ; }
	369	+ print OUT "$edits,$submits,$ratio," ;
	370	+ }
	371	+ print OUT "\n" ;
	372	+ }
	373	+
	374	+ # Write CSV_COUNT_DAILY
	375	+
	376	+ open CSV_COUNT_DAILY, '>', $file_csv_counts_daily ;
	377	+ foreach $key (sort keys %counts)
	378	+ { print CSV_COUNT_DAILY sprintf ("%6d", $counts {$key}) . ",$key\n" ; }
	379	+ close CSV_COUNT_DAILY ;
	380	+
	381	+ $text = "" ;
	382	+ $text .= "\nInvalid actions: $invalid_actions\n\n" ;
	383	+
	384	+ $text .= "Counts per action:\n" ;
	385	+ foreach $key (sort keys %counts_per_action)
	386	+ {
	387	+ $count = $counts_per_action {$key} ;
	388	+ if ($count < 5) { next ; }
	389	+ $text .= sprintf ("%6d", $count) . ",$key\n" ;
	390	+ }
	391	+ $text .= "\n\n" ;
	392	+
	393	+ $text .= "Counts per relevant action and status:\n" ;
	394	+ foreach $key (sort keys %counts_per_relevant_action_and_status1)
	395	+ {
	396	+ $count = $counts_per_relevant_action_and_status1 {$key} ;
	397	+ # if ($count < 5) { next ; }
	398	+ $text .= sprintf ("%6d", $count) . ",$key\n" ;
	399	+ }
	400	+ $text .= "\n\n" ;
	401	+
	402	+ $text .= "Counts per bot, relevant action and status:\n" ;
	403	+ foreach $key (sort keys %counts_per_bot_relevant_action_and_status2)
	404	+ {
	405	+ $count = $counts_per_bot_relevant_action_and_status2 {$key} ;
	406	+ # if ($count < 5) { next ; }
	407	+ $text .= sprintf ("%6d", $count) . ",$key\n" ;
	408	+ }
	409	+ $text .= "\n\n" ;
	410	+
	411	+ $text .= "Counts per relevant action and status and no redlinks:\n" ;
	412	+ foreach $key (sort keys %counts_per_relevant_action_and_status_no_redlink)
	413	+ {
	414	+ $count = $counts_per_relevant_action_and_status_no_redlink {$key} ;
	415	+ if ($count < 5) { next ; }
	416	+ $text .= sprintf ("%6d", $count) . ",$key\n" ;
	417	+ }
	418	+ $text .= "\n\n" ;
	419	+
	420	+ $text .= "Count per bot, relevant action and status and no redlink:\n" ;
	421	+ foreach $key (sort keys %counts_per_bot_relevant_action_and_status_no_redlink)
	422	+ {
	423	+ $count = $counts_per_bot_relevant_action_and_status_no_redlink {$key} ;
	424	+ # if ($count < 5) { next ; }
	425	+ $text .= sprintf ("%-33s",$key) . sprintf ("%6d", $count) . "\n" ;
	426	+ }
	427	+ $text .= "\n\n" ;
	428	+
	429	+ $text .= "Counts no bot, per relevant action and status no redlink:\n" ;
	430	+ foreach $key (sort keys %counts_no_bot_per_relevant_action_and_status_no_redlink)
	431	+ {
	432	+ ($to = $key) =~ s/,.*$// ;
	433	+ if ($to !~ /:/) { next ; }
	434	+ if ($counts_no_bot_no_redlink_per_destination {$to} < 100) { next ; }
	435	+ $count = $counts_no_bot_per_relevant_action_and_status_no_redlink {$key} ;
	436	+ if ($key =~ /action=edit/)
	437	+ {
	438	+ $count_edit = $counts_no_bot_per_relevant_action_and_status_no_redlink {"$to,action=edit"} ;
	439	+ $count_submit = $counts_no_bot_per_relevant_action_and_status_no_redlink {"$to,action=submit"} ;
	440	+ $count_edits += $count_edit ;
	441	+ $count_submits += $count_submit ;
	442	+ $ratio = '..' ;
	443	+ if ($count_submit > 0)
	444	+ { $ratio = sprintf ("%5.1f", $count_edit / $count_submit) ; }
	445	+ push @ratios, "$ratio\|" . sprintf ("%-14s",$to) . "edits " . sprintf ("%6d", $count_edit) . ", submits ". sprintf ("%6d", $count_submit) . ", ratio $ratio\n" ;
	446	+ }
	447	+ # $text .= sprintf ("%-33s",$key) . sprintf ("%6d", $count) . "\n" ;
	448	+ }
	449	+ @ratios = sort {$b <=> $a} @ratios ;
	450	+ foreach $line (@ratios)
	451	+ {
	452	+ ($ratio, $line) = split ('\\|', $line) ;
	453	+ $text .= $line ;
	454	+ }
	455	+ $ratio = sprintf ("%5.1f", $count_edits / $count_submits) ;
	456	+ $text .= sprintf ("%-14s",'total') . "edits " . sprintf ("%6d", $count_edits) . ", submits ". sprintf ("%6d", $count_submits) . ", ratio $ratio\n" ;
	457	+ $text .= "\n\n" ;
	458	+ print $count
	459	+
	460	+ $text .= "Count per relevant status with redlink:\n" ;
	461	+ foreach $key (sort keys %counts_per_relevant_status_with_redlink)
	462	+ {
	463	+ $count = $counts_per_relevant_status_with_redlink {$key} ;
	464	+ ($to = $key) =~ s/,.*$// ;
	465	+ if ($counts_per_destination {$to} < 100) { next ; }
	466	+ $text .= sprintf ("%6d", $count) . ",$key\n" ;
	467	+ }
	468	+ $text .= "\n\n" ;
	469	+
	470	+ open SUMMARY, '>', $file_txt_summary ;
	471	+ print SUMMARY $text ;
	472	+ close SUMMARY ;
	473	+
	474	+ print $text ;
	475	+}
	476	+
	477	+
	478	+sub DaysInMonth
	479	+{
	480	+ my $year = shift ;
	481	+ my $month = shift ;
	482	+ my $timegm1 = timegm (0,0,0,1,$month-1,$year-1900) ;
	483	+ $month++ ;
	484	+ if ($month > 12)
	485	+ { $month = 1 ; $year++ }
	486	+ my $timegm2 = timegm (0,0,0,1,$month-1,$year-1900) ;
	487	+ my $days = ($timegm2-$timegm1) / (246060) ;
	488	+ return ($days) ;
	489	+}
Index: trunk/wikistats/squids/SquidCountArchive.sh
—	—	@@ -0,0 +1,13 @@
	2	+#!/bin/bash
	3	+
	4	+ulimit -v 4000000
	5	+
	6	+home="/a/ezachte"
	7	+log="$home/SquidCountArchiveLog.txt"
	8	+script="$home/SquidCountArchive.pl"
	9	+
	10	+echo "" > $log
	11	+
	12	+nice perl $script -d 2011/02/07-2011/02/11
	13	+echo "Ready" >> $log
	14	+echo "Ready"
Property changes on: trunk/wikistats/squids/SquidCountArchive.sh
___________________________________________________________________
Added: svn:eol-style
1	15	+ native
Index: trunk/wikistats/squids/SquidCountArchive.pl
—	—	@@ -0,0 +1,1030 @@
	2	+ #!/usr/bin/perl
	3	+
	4	+ use lib "/home/ezachte/lib" ;
	5	+ use EzLib ;
	6	+
	7	+ $trace_on_exit = $true ;
	8	+ ez_lib_version (13) ;
	9	+
	10	+ use SquidCountArchiveProcessLogRecord ;
	11	+ use SquidCountArchiveReadInput ;
	12	+ use SquidCountArchiveWriteOutput ;
	13	+
	14	+ # set defaults mainly for tests on local machine
	15	+ default_argv "-d 2010/05/10" ;
	16	+
	17	+# http://wikitech.wikimedia.org/view/Squid_log_format
	18	+# 1. Hostname
	19	+# 2. Sequence number
	20	+# 3. Current time in ISO 8601 format (oplus milliseconds), according ot the squid server's clock
	21	+# 4. Request time in ms
	22	+# 5. Client IP
	23	+# 6. Squid request status, HTTP status code
	24	+# 7. Reply size including HTTP headers
	25	+# 8. Request method (GET/POST etc)
	26	+# 9. URL
	27	+# 10. Squid hierarchy status, peer IP
	28	+# 11. MIME content type
	29	+# 12. Referer header
	30	+# 13. X-Forwarded-For header
	31	+# 14 User-Agent header
	32	+
	33	+# valid parameters:
	34	+# parm -d m[-n] (last m\|n days before today) or yyyymmdd[-yyyymmdd] or yyyy/mm/dd[-yyyy/mm/dd]
	35	+# parm -f [1\|2\|12] force phase 1 and or 2 even when already ran succesfully earlier
	36	+# phase 1 = collect IP frequency counts, this is first pass through data (there is litle change this needs to be redone, hence default is no overwrite)
	37	+# phase 2 = collect other counts, this may have to be redone after filtering logic has changed
	38	+# parm -t test mode
	39	+
	40	+# todo: parm -e use unsampled file with all edits and saves
	41	+# todo: parm -r root folder
	42	+
	43	+ $test = $false ;
	44	+ $test_maxlines = 4000000 ;
	45	+
	46	+ if (! $job_runs_on_production_server)
	47	+ {
	48	+ $test = $true ;
	49	+ $file_test = "w:/# Out Locke/sampled-1000-log-20100510b.txt" ;
	50	+ # $file_test = getcwd . "/SquidDataFilterFY.txt" ;
	51	+ if (! -e $file_test)
	52	+ { abort "Test input file '$file_test' not found" ; }
	53	+ }
	54	+
	55	+ $time_start = time ;
	56	+
	57	+ if ($job_runs_on_production_server)
	58	+ { $path_root = "/a/ezachte" ; }
	59	+ else
	60	+ { $path_root = "w:/! perl/squids/archive/test" ; }
	61	+
	62	+ $tags_mobile = "Android\|BlackBerry\|Windows CE\|DoCoMo\|iPad\|iPod\|iPhone\|HipTop\|LGE\|Linux arm\|Mobile\|MIDP\|NetFront\|Nintendo\|Nokia\|Obigo\|Opera Mini\|Palm Pre\|Playstation\|Samsung\|SoftBank\|SonyEricsson\|SymbianOS\|UP\.Browser\|Vodafone\|WAP\|webOS\|Wikiamo\|Wikipanion" ;
	63	+ $tags_mobile_upd = "May 2010" ;
	64	+
	65	+ $pattern_url_pre = "(?:^\|[a-zA-Z0-9-]+\\.)*?" ;
	66	+ $pattern_url_post = "\\.(?:biz\|com\|info\|name\|net\|org\|pro\|aero\|asia\|cat\|coop\|edu\|gov\|int\|jobs\|mil\|mobi\|museum\|tel\|travel\|arpa\|[a-zA-Z0-9-]{2}\|(?:com?\|ne)\\.[a-zA-Z0-9-]{2})\$" ;
	67	+
	68	+ my (%squid_seqno_lo, %squid_seqno_hi) ;
	69	+
	70	+ my ($from_days_ago, $till_days_ago, $from_date, $till_date) = &ParseArguments ;
	71	+ &SetFileNames ;
	72	+
	73	+ my ($path_out, $path_out_month) ;
	74	+ for ($days_ago = $from_days_ago ; $days_ago >= $till_days_ago ; $days_ago--)
	75	+ {
	76	+ if ($days_to_process ++ > 0)
	77	+ { print "\n" . "=" x 80 . "\n" ; }
	78	+ ($path_out, $path_out_month) = &SetPathOut ($days_ago) ;
	79	+
	80	+ open OUT, '>', "$path_out/$file_out" ;
	81	+ open OUT2, '>', "$path_out/$file_out2" ;
	82	+ open ERR, '>', "$path_out/$file_err" ;
	83	+ # open FILTER_FY, '>>', "$path_out_month/$file_filter_fy" ;
	84	+
	85	+ my $do_phase1 = &CheckProcessPhase1 ($days_ago, $path_out) ; # Collect IP frequencies
	86	+ my $do_phase2 = &CheckProcessPhase2 ($days_ago, $path_out) ; # collect other data
	87	+
	88	+ next if ! $do_phase1 and ! $do_phase2 ;
	89	+
	90	+ &InitGlobals ;
	91	+ undef @files ; # keep out of InitGlobals, to allow rerun with same files, see 'test InitGlobals' below
	92	+
	93	+ ($date_collect_files, $time_to_start, $time_to_stop) = &SetTimeRangeToProcess ($days_ago) ;
	94	+
	95	+ $all_files_found = &CollectFilesToProcess ($days_ago, $date_collect_files, $time_to_start, $time_to_stop, $path_out, $path_out_month) ;
	96	+ next if not $all_files_found ;
	97	+
	98	+ if ($do_phase1) # Collect IP frequencies
	99	+ { &ProcessPhase1 ($days_ago, $date_collect_files, $time_to_start, $time_to_stop, $path_out, @files) ; }
	100	+
	101	+ if ($do_phase2) # collect other data
	102	+ { &ProcessPhase2 ($days_ago, $date_collect_files, $time_to_start, $time_to_stop, $path_out, $path_out_month, @files) ; }
	103	+
	104	+ # test InitGlobals: rebuild files in alternate folder, if InitGlobals did its work, all files are binary equal
	105	+ # &InitGlobals ;
	106	+ # if ($do_phase2) # collect other data
	107	+ # { &ProcessPhase2 ($days_ago, $date_collect_files, $time_to_start, $time_to_stop, $path_out. 'b', $path_out_month, @files) ; }
	108	+
	109	+ close OUT ;
	110	+ close OUT2 ;
	111	+ close ERR ;
	112	+ # close FILTER_FY ;
	113	+ }
	114	+
	115	+# if (defined ($options {"u"})) # all lines with action=edit or action=submit generated in mode scan_squid_archive
	116	+# { &ScanEditsSavesFile ; } # also use to build ScanDataCountriesSaves.csv for earlier months from SquidDataEditsSavesyyyy-mm-dd.txt.bz2
	117	+# else
	118	+# {
	119	+# if (defined ($options {"a"})) # scan ip addresses only (find multiple occurrences, store for reuse)
	120	+# {
	121	+# $scan_ip_frequencies = $true ;
	122	+# print "Scan for multiple occurrences of ip addresses\n\n" ;
	123	+# }
	124	+# elsif (defined ($options {"s"})) # scan squid sequence numbers
	125	+# {
	126	+# $scan_squid_msg_sequence_numbers = $true ;
	127	+# print "Scan for squid sequence numbers\n\n" ;
	128	+# }
	129	+# else
	130	+# {
	131	+# $scan_all_fields = $true ;
	132	+# print "Scan all fields\n\n" ;
	133	+# }
	134	+
	135	+# &ScanSquidArchive ;
	136	+# }
	137	+
	138	+# &ProcessSquidSequenceNumbers ;
	139	+
	140	+ print "\n\nReady\n\n" ;
	141	+ exit ;
	142	+
	143	+sub ParseArguments
	144	+{
	145	+ trace ParseArguments ;
	146	+
	147	+ my %options ;
	148	+
	149	+ getopt ("df", \%options) ;
	150	+
	151	+ $date_range = $options {"d"} ;
	152	+ $force_phases = $options {"f"} ;
	153	+
	154	+ if ($force_phases !~ /^(?:\|1\|2\|12\|21)$/)
	155	+ { abort "Invalid data for -f parameter: specify which phases to force as -f [1\|2\|12]\nForce = execute phase even when already done succesfully earlier\nPhase1 = collect ip counts\nPhase2 = collect other counts\n" ; }
	156	+
	157	+ if ($date_range eq '')
	158	+ { abort "No valid date range specified\n\nSpecify first and last day to process as:\n'-d yyyymmdd[-yyyymmdd]' (yymmdd or yyyy/mm/dd, " .
	159	+ "second date defaults to first)\nor\n'-d mmm[-nnn]', where mmm and nnn are days before today (mmm less or equal to nnn), nnn defaults to mmm\n\n" ; }
	160	+
	161	+ if ($date_range =~ m/^\d{4}\/?\d{2}\/?\d{2}(?:\-\d{4}\/?\d{2}\/?\d{2})?$/) # specify daterange as yyyymmdd-yyyymmdd or yyyy/mm/dd-yyyy/mm/dd
	162	+ {
	163	+ if ($date_range =~ /^\d{4}\/?\d{2}\/?\d{2}$/) # expand shorthand version
	164	+ { $date_range =~ s/^(\d{4}\/?\d{2}\/?\d{2})$/$1-$1/ ; }
	165	+
	166	+ ($from_date,$till_date) = split '-', $date_range ;
	167	+
	168	+ $from_year = substr ($from_date,0,4) ;
	169	+ $from_month = substr ($from_date,4,2) ;
	170	+ $from_day = substr ($from_date,6,2) ;
	171	+
	172	+ $till_year = substr ($till_date,0,4) ;
	173	+ $till_month = substr ($till_date,4,2) ;
	174	+ $till_day = substr ($till_date,6.2) ;
	175	+
	176	+ $from_days_ago = ValidateDateAndCalcDaysAgo ('from date', $from_date) ;
	177	+ $till_days_ago = ValidateDateAndCalcDaysAgo ('till date', $till_date) ;
	178	+
	179	+ my $diff_days = ($from_days_ago - $till_days_ago) + 1 ;
	180	+ if ($till_days_ago > $from_days_ago)
	181	+ { abort "Invalid date range: from date '$from_date' is later than till date '$till_date'\n" ; }
	182	+
	183	+ $yyyymmdd = 'yyyy/mm/dd' ;
	184	+ if ($from_date !~ /\//)
	185	+ { $yyyymmdd =~ s/\///g ; }
	186	+ print "Process following date range:\nFrom '$from_date' till '$till_date' ($yyyymmdd)\nWhich is from $from_days_ago till $till_days_ago days ago = $diff_days days\n" ;
	187	+ }
	188	+ elsif ($date_range =~ /^\d{1,3}(?:-\d{1,3})?$/) # specify daterange as mmm-nnn (where mmm and nnn are number of days before today), nnn defaults to mmm
	189	+ {
	190	+ if ($date_range =~ /^\d+$/) # expand shorthand version
	191	+ { $date_range =~ s/^(\d+)$/$1-$1/ ; }
	192	+
	193	+ ($from_days_ago,$till_days_ago) = split '-', $date_range ;
	194	+
	195	+ if ($till_days_ago > $from_days_ago) # swap
	196	+ # { abort "Invalid date range: from date '$from_date' is later than till date '$till_date'\n" ; }
	197	+ { my $temp = $till_days_ago ; $till_days_ago = $from_days_ago ; $from_days_ago = $temp ; }
	198	+
	199	+ ($sec,$min,$hour,$day,$month,$year) = localtime (time) ;
	200	+ ($year,$month,$day) = &ShiftDays ($year+1900, $month+1, $day, - $from_days_ago) ;
	201	+ $from_date = sprintf ("%04d/%02d/%02d",$year,$month,$day) ;
	202	+
	203	+ ($sec,$min,$hour,$day,$month,$year) = localtime (time) ;
	204	+ ($year,$month,$day) = &ShiftDays ($year+1900, $month+1, $day, - $till_days_ago) ;
	205	+ $till_date = sprintf ("%04d/%02d/%02d",$year,$month,$day) ;
	206	+
	207	+ my $diff_days = ($from_days_ago - $till_days_ago) + 1 ;
	208	+ print "Process following date range:\nFrom $from_days_ago till $till_days_ago days ago, which is:\nFrom '$from_date' till '$till_date' (yyyy/mm/dd) = $diff_days days\n" ;
	209	+ }
	210	+ else
	211	+ { abort "\nNo valid date range specified!\n\nSpecify first and last day to process as:\n'-d yyyymmdd[-yyyymmdd]' (yyyy/m/dd also valid)\n" .
	212	+ "(second date defaults to first)\nor\n'-d mmm[-nnn]', where mmm and nnn are days before today (mmm =< nnn), nnn defaults to mmm\n\n" ; }
	213	+
	214	+ if ($options {"t"})
	215	+ {
	216	+ $test = $true ;
	217	+ print "Run in test mode: process less input\n" ;
	218	+ }
	219	+
	220	+ return ($from_days_ago, $till_days_ago, $from_date, $till_date) ;
	221	+}
	222	+
	223	+sub ValidateDateAndCalcDaysAgo
	224	+{
	225	+ trace ValidateDateAndCalcDaysAgo ;
	226	+
	227	+ my ($desc, $date) = @_ ;
	228	+
	229	+ my ($sec,$min,$hour,$day,$month,$year) ;
	230	+ ($sec,$min,$hour,$day,$month,$year) = localtime (time) ;
	231	+
	232	+ my $date_today = sprintf ("%4d/%02d/%02d", $year+1900,$month+1,$day) ;
	233	+ if ($date !~ /\//)
	234	+ { $date_today =~ s/\///g ; }
	235	+
	236	+ if ($date =~ m!^(20\d\d)/?(0[1-9]\|1[012])/?(0[1-9]\|[12][0-9]\|3[01])$!)
	237	+ {
	238	+ # At this point, $1 holds the year, $2 the month and $3 the day of the date entered
	239	+ $year = $1 ;
	240	+ $month = $2 ;
	241	+ $day = $3 ;
	242	+
	243	+ if ($day == 31 and ($month == 4 or $month == 6 or $month == 9 or $month == 11))
	244	+ { abort "$desc '$date': 31st of a month with 30 days" ; }
	245	+ elsif ($day >= 30 and $month == 2)
	246	+ { abort "$desc '$date': February 30th or 31st" ; }
	247	+ elsif ($month == 2 and $day == 29 and not ($year % 4 == 0 and ($year % 100 != 0 or $year % 400 == 0)))
	248	+ { abort "$desc '$date': February 29th outside a leap year" ; }
	249	+ else { ; } # valid date
	250	+ }
	251	+ else { abort "$date: not valid date format: use yyyymmdd or yyyy/mm/dd" ; }
	252	+
	253	+ my $time_input = timelocal (0,0,0,$day, $month-1, $year-1900) ;
	254	+ ($sec,$min,$hour,$day,$month,$year) = localtime (time) ;
	255	+ my $time_today = timelocal (0,0,0,$day, $month, $year) ;
	256	+
	257	+ my $days_ago = ($time_today - $time_input) / (24 * 60 * 60) ;
	258	+
	259	+ if ($days_ago < 1)
	260	+ { abort "$desc '$date' should be before today which is $date_today" ; }
	261	+
	262	+ if ($days_ago > 366)
	263	+ { abort "$desc '$date' should be a year or less ago (but before today: '$date_today')" ; }
	264	+
	265	+ return ($days_ago) ;
	266	+}
	267	+
	268	+sub SetFileNames
	269	+{
	270	+ trace SetFileNames ;
	271	+
	272	+ $file_out = "private/DebugSquidDataOutDoNotPublish.txt" ;
	273	+ $file_out2 = "private/DebugSquidDataOutDoNotPublish2.txt" ;
	274	+ $file_err = "private/DebugSquidDataErrDoNotPublish.txt" ;
	275	+
	276	+ $file_ip_frequencies = "private/SquidDataIpFrequenciesDoNotPublish.csv" ;
	277	+ $file_ip_frequencies_bz2 = "private/SquidDataIpFrequenciesDoNotPublish.csv.bz2" ;
	278	+ $file_out_referers = "private/SquidDataReferersDoNotPublish.txt" ;
	279	+ $file_edits_saves = "private/SquidDataEditsSavesDoNotPublish.txt" ;
	280	+
	281	+ $file_csv_agents = "public/SquidDataAgents.csv" ;
	282	+ $file_csv_banners = "public/SquidDataBanners.csv" ;
	283	+ $file_csv_binaries = "public/SquidDataBinaries.csv" ;
	284	+ $file_csv_clients = "public/SquidDataClients.csv" ;
	285	+ $file_csv_clients_by_wiki = "public/SquidDataClientsByWiki.csv" ; # request Howie
	286	+ $file_csv_countries_views = "public/SquidDataCountriesViews.csv" ; # was SquidDataCountries2.csv
	287	+ $file_csv_countries_timed = "public/SquidDataCountriesViewsTimed.csv" ; # was SquidDataCountriesTimed2.csv
	288	+ $file_csv_countries_saves = "public/SquidDataCountriesSaves.csv" ;
	289	+ $file_csv_bots = "public/SquidDataCrawlers.csv" ;
	290	+ $file_csv_extensions = "public/SquidDataExtensions.csv" ;
	291	+ $file_csv_googlebots = "public/SquidDataGoogleBots.csv" ;
	292	+ $file_csv_images = "public/SquidDataImages.csv" ;
	293	+ $file_csv_indexphp = "public/SquidDataIndexPhp.csv" ; #
	294	+ $file_csv_languages = "public/SquidDataLanguages.csv" ;
	295	+ $file_head_tail = "public/SquidDataLogFilesHeadTail.csv" ;
	296	+ $file_csv_methods = "public/SquidDataMethods.csv" ;
	297	+ $file_csv_opsys = "public/SquidDataOpSys.csv" ;
	298	+ $file_csv_origins = "public/SquidDataOrigins.csv" ;
	299	+ $file_csv_requests = "public/SquidDataRequests.csv" ;
	300	+ $file_csv_requests_wap = "public/SquidDataRequestsWap.csv" ;
	301	+ $file_csv_requests_m = "public/SquidDataRequestsM.csv" ; # .m. in url, not mobile as derived from agent
	302	+ $file_csv_scripts = "public/SquidDataScripts.csv" ;
	303	+ $file_csv_search = "public/SquidDataSearch.csv" ;
	304	+ $file_csv_skins = "public/SquidDataSkins.csv" ;
	305	+
	306	+ $file_seqno_per_squidhour = "SquidDataSequenceNumbersPerSquidHour.csv" ;
	307	+ $file_seqno_all_squids = "SquidDataSequenceNumbersAllSquids.csv" ;
	308	+ $file_head_tail = "SquidDataLogFilesHeadTail.csv" ;
	309	+# $file_filter_fy = "SquidDataFilterFY.txt" ;
	310	+
	311	+ $path_out = "" ;
	312	+}
	313	+
	314	+sub SetPathOut
	315	+{
	316	+ trace SetPathOut ; # to keep trace tidy , do this at end of routine
	317	+
	318	+ my $days_ago = shift ;
	319	+ my ($path_out, $path_out_month) ;
	320	+
	321	+ ($sec,$min,$hour,$day,$month,$year) = localtime ($time_start - $days_ago * 24 * 3600) ;
	322	+
	323	+ $path_out = sprintf ("%04d-%02d", $year+1900, $month+1) ;
	324	+
	325	+ $path_out = "$path_root/$path_out" ;
	326	+ $path_out_month = $path_out ;
	327	+
	328	+ if (! -d $path_out)
	329	+ {
	330	+ # print "mkdir $path_out\n" ;
	331	+ mkdir ($path_out) \|\| die "Unable to create directory $path_out\n" ;
	332	+ }
	333	+
	334	+ $path_out .= "/" . sprintf ("%04d-%02d-%02d", $year+1900, $month+1, $day) ;
	335	+ if (! -d $path_out)
	336	+ {
	337	+ # print "mkdir $path_out\n" ;
	338	+ mkdir ($path_out) \|\| die "Unable to create directory $path_out\n" ;
	339	+ # print "mkdir $path_out/private\n" ;
	340	+ mkdir ("$path_out/private") \|\| die "Unable to create directory $path_out/private\n" ;
	341	+ # print "mkdir $path_out/public\n" ;
	342	+ mkdir ("$path_out/public" ) \|\| die "Unable to create directory $path_out/public\n" ;
	343	+ }
	344	+
	345	+ # clean up obsolete signal files
	346	+ $file_ready = "$path_out/\^Ready" ;
	347	+ unlink $file_ready ;
	348	+ $file_ready = "$path_out/\@Ready" ;
	349	+ unlink $file_ready ;
	350	+
	351	+ trace "SetPathOut for $days_ago days ago => path_out = '$path_out'\n" ;
	352	+ return ($path_out, $path_out_month) ;
	353	+}
	354	+
	355	+sub SetTimeRangeToProcess
	356	+{
	357	+ my $days_ago = shift ;
	358	+
	359	+ my ($sec,$min,$hour,$day,$month,$year) = localtime ($time_start - $days_ago * 24 * 3600) ;
	360	+ my $date_collect_files = sprintf ("%4d-%02d-%02d", $year+1900, $month+1, $day) ;
	361	+ my $time_to_start = $date_collect_files . "T00:00:00" ;
	362	+ my ($sec,$min,$hour,$day,$month,$year) = localtime ($time_start - ($days_ago-1) * 24 * 3600) ;
	363	+ my $date_after_collect_files = sprintf ("%4d-%02d-%02d", $year+1900, $month+1, $day) ;
	364	+ my $time_to_stop = $date_after_collect_files . "T00:00:00" ;
	365	+# my $time_to_stop = $date_collect_files . "T23:30:00" ; # Q&D fix to process last file available
	366	+
	367	+ # if ($test)
	368	+ # { $time_to_stop = $date_collect_files . "T00:30:00" ; }
	369	+
	370	+ return ($date_collect_files, $time_to_start, $time_to_stop) ;
	371	+}
	372	+
	373	+sub CheckProcessPhase1 # Collect IP frequencies
	374	+{
	375	+ trace CheckProcessPhase1 ;
	376	+
	377	+ my ($days_ago, $path_out) = @_ ;
	378	+ my $process = $true ;
	379	+
	380	+ my $file_ready = "$file_ip_frequencies_bz2" ;
	381	+ my $path_ready = "$path_out/$file_ready" ;
	382	+
	383	+ if (-e $path_ready)
	384	+ {
	385	+ if ($force_phases !~ /1/)
	386	+ {
	387	+ $process = $false ;
	388	+ print "File '[path_out]$file_ready' already exists => skip phase 1 (collecting ip address counts)\n" ;
	389	+ }
	390	+ else
	391	+ { print "File '[path_out]$file_ready' already exists.\nYet force execute phase 1 (collecting ip address counts), as -f 1 has been specified\n" ; }
	392	+ }
	393	+ else
	394	+ { print "File '[path_out]/$file_ready' not found -> process phase 1\n" ; }
	395	+
	396	+ return ($process) ;
	397	+}
	398	+
	399	+sub CheckProcessPhase2 # collect other data
	400	+
	401	+{
	402	+ trace CheckProcessPhase2 ;
	403	+
	404	+ my ($days_ago, $path_out) = @_ ;
	405	+ my $process = $true ;
	406	+
	407	+ my $file_ready = "#Ready" ;
	408	+ my $path_ready = "$path_out/$file_ready" ;
	409	+ if (-e $path_ready)
	410	+ {
	411	+ if ($force_phases !~ /2/)
	412	+ {
	413	+ $process = $false ;
	414	+ print "File '[path_out]/$file_ready' already exists => skip phase 2 (collecting counts other than ip counts)\n" ;
	415	+ }
	416	+ else
	417	+ { print "File '[path_out]/$file_ready' already exists.\nYet force execute phase 2 (collecting counts other than ip counts), as -f 2 has been specified\n" ; }
	418	+ }
	419	+ else
	420	+ { print "File '[path_out]/$file_ready' not found -> process phase 2\n" ; }
	421	+
	422	+ return ($process) ;
	423	+}
	424	+
	425	+sub InitGlobals # qqq
	426	+{
	427	+ trace InitGlobals ;
	428	+
	429	+ undef $addresses_stored ;
	430	+ undef $banner_requests_ignored ;
	431	+ undef $date_prev ;
	432	+ undef $fields_too_few ;
	433	+ undef $fields_too_many ;
	434	+ undef $googlebots ;
	435	+ undef $googles ;
	436	+ undef $html_pages_found ;
	437	+ undef $lines_in_file ;
	438	+ undef $lines_processed ;
	439	+ undef $lines_this_day ;
	440	+ undef $newest_time_read ;
	441	+ undef $oldest_time_read ;
	442	+ undef $statusses_non_tcp ;
	443	+ undef $tot_mime_html ;
	444	+ undef $tot_mime_html2 ;
	445	+ undef $tot_origins_external_counted ;
	446	+ undef $tot_referers_external ;
	447	+ undef $tot_referers_internal ;
	448	+ undef $unrecognized_domains ;
	449	+
	450	+ undef %google_bot_hits ;
	451	+ undef %ip_bot_no_google ;
	452	+ undef %agents_raw ;
	453	+ undef %binaries ;
	454	+ undef %bots ;
	455	+ undef %client_ip_record_cnt ;
	456	+ undef %client_ip_record_cnt_total ;
	457	+ undef %clients ;
	458	+ undef %clients_by_wiki ;
	459	+ undef %cnt_ip_ranges ;
	460	+ undef %countries ;
	461	+ undef %countries_saves ;
	462	+ undef %countries_timed ;
	463	+ undef %countries_views ;
	464	+ undef %edit_submit_filtered ;
	465	+ undef %engines ;
	466	+ undef %exts ;
	467	+ undef %google_imposters ;
	468	+ undef %googlebins ;
	469	+ undef %googlebins2 ;
	470	+ undef %grouped_clients ;
	471	+ undef %imagesizes ;
	472	+ undef %index_php ;
	473	+ undef %index_php_raw ;
	474	+ undef %ip_distribution ;
	475	+ undef %ip_frequencies ;
	476	+ undef %languages ;
	477	+ undef %languages_unrecognized ;
	478	+ undef %lines_read ;
	479	+ undef %mobile_other ;
	480	+ undef %operating_systems ;
	481	+ undef %origin_simplified ;
	482	+ undef %origins ;
	483	+ undef %origins_external ;
	484	+ undef %origins_unsimplified ;
	485	+ undef %referers_internal ;
	486	+ undef %requests ;
	487	+ undef %scripts ;
	488	+ undef %search ;
	489	+ undef %skins ;
	490	+ undef %squid_delta ;
	491	+ undef %squid_events ;
	492	+ undef %squid_seqno ;
	493	+ undef %statusses ;
	494	+ undef %unrecognized_domains ;
	495	+ undef %wikis ;
	496	+# undef @files ;
	497	+};
	498	+
	499	+sub ProcessPhase1 # collect IP frequencies, needed for filtering probable bots in phase 2
	500	+
	501	+{
	502	+ trace "ProcessPhase1: Collect IP frequencies" ;
	503	+ my ($days_ago, $date_collect_files, $time_to_start, $time_to_stop, $path_out, @files) = @_ ;
	504	+
	505	+ $scan_ip_frequencies = $true ;
	506	+ $scan_all_fields = $false ;
	507	+
	508	+ my $data_read = &ReadSquidLogFiles ($path_out, $time_to_start, $time_to_stop, @files) ;
	509	+ return if not $data_read ;
	510	+
	511	+ &WriteOutputIpFrequencies ($path_out) ;
	512	+}
	513	+
	514	+sub ProcessPhase2 # Collect other data
	515	+{
	516	+ trace "ProcessPhase2: Collect other data" ;
	517	+ my ($days_ago, $date_collect_files, $time_to_start, $time_to_stop, $path_out, $path_out_month, @files) = @_ ;
	518	+
	519	+ $scan_ip_frequencies = $false ;
	520	+ $scan_all_fields = $true ;
	521	+
	522	+ my $data_read = &ReadIpFrequencies ($path_out) ;
	523	+ return if not $data_read ;
	524	+
	525	+ my $data_read = &ReadSquidLogFiles ($path_out, $time_to_start, $time_to_stop, @files) ;
	526	+ return if not $data_read ;
	527	+
	528	+ &WriteOutputSquidSequenceGaps ($path_out) ;
	529	+ &WriteOutputSquidLogs ($path_out) ;
	530	+ &WriteOutputEditsSavesFile ($path_out) ;
	531	+ &WriteOutputCountriesSaves ($path_out) ;
	532	+
	533	+ &WriteDiagnostics ;
	534	+
	535	+ if ($job_runs_on_production_server)
	536	+ { &MoveAndCompressFiles ($path_out, $path_out_month, $date_collect_files) ; }
	537	+
	538	+
	539	+ if ($job_runs_on_production_server)
	540	+ {
	541	+ $cmd = "echo \"Ready in \"" . ddhhmmss (time - $time_start). " > $path_out/\#Ready" ; # use in next run to test whether this day has been completely processed
	542	+ `$cmd` ;
	543	+ $cmd = "echo \"\nReady in \"" . ddhhmmss (time - $time_start). " >> /home/ezachte/SquidCountArchiveLog.txt\n\n" ;
	544	+ `$cmd` ;
	545	+ }
	546	+}
	547	+
	548	+#sub ScanSquidArchive
	549	+#{
	550	+# trace ScanSquidArchive ;
	551	+
	552	+# $T00 = "T00:00:00" ;
	553	+
	554	+# ($time_to_start, $time_to_stop) = &GetSquidLogsToProcess ; # aborts if not all found
	555	+
	556	+# open OUT, '>', "$path_out/$file_out" ;
	557	+# open OUT2, '>', "$path_out/$file_out2" ;
	558	+# open ERR, '>', "$path_out/$file_err" ;
	559	+
	560	+# &CheckSquidLogsAlreadyProcessed ; # aborts if this is the case
	561	+
	562	+# if ($scan_all_fields)
	563	+# { &ReadIpFrequencies ; }
	564	+
	565	+# &ReadSquidLogFiles ;
	566	+
	567	+# if (($oldest_time_read gt $time_to_start) \|\| ($newest_time_read lt $time_to_stop))
	568	+# { abort ("Log does not contain full range from $time_to_start till $time_to_stop (oldest time read $oldest_time_read, newest time read $newest_time_read)\n") unless $test ; }
	569	+
	570	+# print "\ncd $path_out\n" ;
	571	+# chdir ($path_out) ;
	572	+
	573	+# &WriteOutputSquidLogs ;
	574	+
	575	+# if ($scan_all_fields)
	576	+# { &WriteDiagnostics ; }
	577	+
	578	+# close OUT ;
	579	+# close OUT2 ;
	580	+# close ERR ;
	581	+
	582	+# if ($job_runs_on_production_server && $scan_all_fields)
	583	+# { &MoveAndCompressFiles ($path_out, $time_to_start) ; }
	584	+#}
	585	+
	586	+#sub GetSquidLogsToProcess
	587	+#{
	588	+# trace GetSquidLogsToProcess ;
	589	+
	590	+# my ($date_archived, $datestart, $datestop) ;
	591	+
	592	+# $time = time ;
	593	+# my ($sec,$min,$hour,$day,$month,$year) = localtime ($time) ;
	594	+
	595	+# $day_today = sprintf ("%04d-%02d-%02d",$year+1900,$month+1,$day) ;
	596	+# print "Date today is $day_today.\n\n" ;
	597	+
	598	+# if ($job_runs_on_production_server)
	599	+# {
	600	+# $dir_in = "/a/squid/archive" ;
	601	+
	602	+# if ($logdate =~ /^\d{8}$/)
	603	+# {
	604	+# $year = substr ($logdate,0,4) ;
	605	+# $month = substr ($logdate,4,2) ;
	606	+# $day = substr ($logdate,6,2) ;
	607	+
	608	+# $time_to_start = sprintf ("%04d-%02d-%02d$T00",$year,$month,$day) ;
	609	+# ($year,$month,$day) = &ShiftDays ($year, $month, $day, 1) ;
	610	+# $time_to_stop = sprintf ("%04d-%02d-%02d$T00",$year,$month,$day) ;
	611	+# }
	612	+# elsif ($logdate =~ /^-\d+$/)
	613	+# {
	614	+# ($sec,$min,$hour,$day,$month,$year) = localtime ($time+$logdate243600) ;
	615	+# $year += 1900 ;
	616	+# $month += 1 ;
	617	+# $time_to_start = sprintf ("%04d-%02d-%02d$T00",$year,$month,$day) ;
	618	+# ($year,$month,$day) = &ShiftDays ($year, $month, $day, 1) ;
	619	+# $time_to_stop = sprintf ("%04d-%02d-%02d$T00",$year,$month,$day) ;
	620	+# }
	621	+# else
	622	+# {
	623	+# print "No logdate specified\n" ;
	624	+# exit ;
	625	+# }
	626	+
	627	+# print "-d $logdate => Process data from $time_to_start till $time_to_stop\n\n" ;
	628	+# }
	629	+# else # test
	630	+# {
	631	+# # $time_to_start = "2009-02-05T00" ;
	632	+# # $time_to_stop = "2009-02-05T23:59:59" ;
	633	+# # push @files, getcwd . "/sampled-1000-oneday.txt" ;
	634	+
	635	+# $time_to_start = "2010-05-10T00" ;
	636	+# $time_to_stop = "2010-05-10T01" ;
	637	+# push @files, getcwd . "/sampled-1000-log-20100510.txt" ;
	638	+
	639	+# print "Job runs in test env => Process data from $time_to_start till $time_to_stop\n\n" ;
	640	+# }
	641	+
	642	+# $some_files_found = $false ;
	643	+# $full_range_found = $false ;
	644	+
	645	+# ($path_out, $path_out_month) = &GetPathOut ($time_to_start) ;
	646	+# $path_head_tail = "$path_out_month/$file_head_tail" ;
	647	+
	648	+# if ($job_runs_on_production_server)
	649	+# {
	650	+# # file naming scheme on server: sampled-1000.log-yyyymmdd, does not mean on that day file sampled-1000.log was archived
	651	+# # file can contain data for days(s) before and day (days?) after yyyymmdd, see e.g. sampled-10000.log-20090802 (days 0801-0803)
	652	+# # this is confusing so start a few days earlier and check for each day:
	653	+# # whether a file exists and whether it's 'head' and or 'tail' time (first last record) fall within range
	654	+
	655	+# # find first and last file to process that comprise all log records within date range
	656	+# $year = substr ($time_to_stop,0,4) ;
	657	+# $month = substr ($time_to_stop,5,2) ;
	658	+# $day = substr ($time_to_stop,8,2) ;
	659	+# ($year,$month,$day) = &ShiftDays ($year, $month, $day, +5) ;
	660	+# $datestop = sprintf ("%4d%02d%02d", $year, $month, $day) ;
	661	+
	662	+# $year = substr ($time_to_start,0,4) ;
	663	+# $month = substr ($time_to_start,5,2) ;
	664	+# $day = substr ($time_to_start,8,2) ;
	665	+
	666	+# ($year,$month,$day) = &ShiftDays ($year, $month, $day, -5) ;
	667	+# $datestart = sprintf ("%4d%02d%02d", $year, $month, $day) ;
	668	+
	669	+# $date_archived = $datestart ;
	670	+# while ($date_archived lt $datestop)
	671	+# {
	672	+# $date_archived = sprintf ("%4d%02d%02d", $year, $month, $day) ;
	673	+# ($year,$month,$day) = &ShiftDays ($year, $month, $day, +1) ;
	674	+
	675	+# $file = "$dir_in/sampled-1000.log-$date_archived.gz" ;
	676	+
	677	+# if (-e $file)
	678	+# {
	679	+# ($timehead,$timetail) = &GetLogRange ($file, $path_head_tail) ;
	680	+
	681	+# if (($timehead lt $time_to_start) && ($timetail ge $time_to_start))
	682	+# {
	683	+# $some_files_found = $true ;
	684	+# $processfiles = $true ;
	685	+# }
	686	+
	687	+# if ($processfiles)
	688	+# {
	689	+# print "$file: time range $timehead - $timetail\n" ;
	690	+# push @files, $file ;
	691	+# }
	692	+
	693	+# if (($timehead lt $time_to_stop) && ($timetail ge $time_to_stop))
	694	+# {
	695	+# $full_range_found = $true ;
	696	+# last ;
	697	+# }
	698	+# }
	699	+# }
	700	+# }
	701	+
	702	+# if ($job_runs_on_production_server)
	703	+# {
	704	+# if (! $some_files_found)
	705	+# { print "Not any file containing start time. Aborting...\n\n" ; exit ; }
	706	+# if (! $full_range_found)
	707	+# { print "Not all files were found. Aborting...\n\n" ; exit ; }
	708	+# }
	709	+
	710	+# print "\n" ;
	711	+# foreach $file (sort @files)
	712	+# { print "Process $file\n" ; }
	713	+
	714	+# return ($time_to_start, $time_to_stop) ;
	715	+#}
	716	+
	717	+#sub GetPathOut
	718	+#{
	719	+# my $time_to_start = shift ;
	720	+
	721	+# $path_out = substr ($time_to_start,0,7) ;
	722	+# if ($job_runs_on_production_server)
	723	+# {
	724	+# $path_out = "$path_root/$path_out" ;
	725	+# $path_out_month = $path_out ;
	726	+# }
	727	+
	728	+# if (! -d $path_out)
	729	+# {
	730	+# mkdir ($path_out) \|\| die "Unable to create directory $path_out\n" ;
	731	+# print "mkdir $path_out\n" ;
	732	+# }
	733	+
	734	+# $path_out .= "/" . substr ($time_to_start,0,10) ;
	735	+# if (! -d $path_out)
	736	+# {
	737	+# mkdir ($path_out) \|\| die "Unable to create directory $path_out\n" ;
	738	+# print "mkdir $path_out\n" ;
	739	+# }
	740	+
	741	+# # clean up obsolete signal files
	742	+# $file_ready = "$path_out/\^Ready" ;
	743	+# unlink $file_ready ;
	744	+# $file_ready = "$path_out/\@Ready" ;
	745	+# unlink $file_ready ;
	746	+
	747	+# return ($path_out,$path_out_month) ;
	748	+#}
	749	+
	750	+#sub CheckSquidLogsAlreadyProcessed
	751	+#{
	752	+# trace CheckSquidLogsAlreadyProcessed ;
	753	+
	754	+# if ($scan_ip_frequencies)
	755	+# {
	756	+# if (-e $file_ip_frequencies)
	757	+# {
	758	+# print "File $path_out/$file_ip_frequencies exists -> Day already processed\nExiting ...\n" ;
	759	+# exit ;
	760	+# }
	761	+# }
	762	+# elsif ($scan_squid_msg_sequence_numbers)
	763	+# {
	764	+# if (-e $file_sequence_numbers)
	765	+# {
	766	+# print "File $path_out/$file_sequence_numbers exists -> Day already processed\nExiting ...\n" ;
	767	+# exit ;
	768	+# }
	769	+# }
	770	+# else
	771	+# {
	772	+# if (-e $file_ready)
	773	+# {
	774	+# print "File $file_ready exists -> Day already processed\nExiting ...\n" ;
	775	+# exit ;
	776	+# }
	777	+# else
	778	+# { print "File $file_ready not found -> process data\n" ; }
	779	+# }
	780	+#}
	781	+
	782	+#sub ScanEditsSavesFile
	783	+#{
	784	+# trace ScanEditsSavesFile ;
	785	+
	786	+# if ($logdate =~ /^\d{8}$/)
	787	+# {
	788	+# $year = substr ($logdate,0,4) ;
	789	+# $month = substr ($logdate,4,2) ;
	790	+# $day = substr ($logdate,6,2) ;
	791	+# }
	792	+# else
	793	+# {
	794	+# print "No (valid) logdate specified\n" ;
	795	+# if ($job_runs_on_production_server)
	796	+# { exit ; }
	797	+# else
	798	+# {
	799	+# $year = 2010 ;
	800	+# $month = 4 ;
	801	+# $day = 01 ;
	802	+# }
	803	+# }
	804	+
	805	+# $time_to_start = sprintf ("%04d-%02d-%02d$T00",$year,$month,$day) ;
	806	+# ($year2,$month2,$day2) = &ShiftDays ($year, $month, $day, 1) ;
	807	+# $time_to_stop = sprintf ("%04d-%02d-%02d$T00",$year2,$month2,$day2) ;
	808	+
	809	+# ($path_out, $path_out_month) = &GetPathOut ($time_to_start) ;
	810	+
	811	+# if ($job_runs_on_production_server)
	812	+# { $path_out = $path_root ; }
	813	+# else
	814	+# {
	815	+# push @files, getcwd . "/sampled-1000.log-20100401" ;
	816	+# # return ;
	817	+# }
	818	+
	819	+# $file_txt = "$path_root/" . sprintf ("%4d-%02d", $year, $month) . "/SquidDataEditsSaves" . sprintf ("%4d-%02d-%02d", $year, $month, $day) . ".txt.bz2" ;
	820	+# $file_csv = "$path_root/" . sprintf ("%4d-%02d", $year, $month) . "/" . sprintf ("%4d-%02d-%02d", $year, $month, $day) . "/$file_csv_indexphp" ;
	821	+# $file_csv_countries_saves = "$path_root/" . sprintf ("%4d-%02d", $year, $month) . "/" . sprintf ("%4d-%02d-%02d", $year, $month, $day) . "/$file_csv_countries_saves" ;
	822	+# if (-e $file_txt)
	823	+# {
	824	+# &ReadInputEditsSavesFile ($file_txt) ;
	825	+# &WriteOutputEditsSavesFile ($file_csv) ;
	826	+# &WriteOutputCountriesSaves ($file_csv_countries_saves) ;
	827	+# }
	828	+# else
	829	+# { print "ScanEditsSavesFile: File $file_txt not found. Aborting...\n\n" ; exit ; }
	830	+#}
	831	+
	832	+sub ShiftDays
	833	+{
	834	+ my $year = shift ;
	835	+ my $month = shift ;
	836	+ my $day = shift ;
	837	+ my $delta = shift ;
	838	+
	839	+ my $time = timelocal (0,0,0,$day, $month-1, $year-1900) ;
	840	+ ($sec,$min,$hour,$day,$month,$year) = localtime ($time+$delta243600) ;
	841	+
	842	+ return ($year+1900,$month+1,$day) ;
	843	+}
	844	+
	845	+sub ExpandAbbreviation
	846	+
	847	+{
	848	+ my $text = shift ;
	849	+ # reverse (more or less) abbreviations
	850	+ $text =~ s/^[\@\*]//o ;
	851	+ $text =~ s/^xx:upload/upload: /o;
	852	+ $text =~ s/^wb:/wikibooks:/o;
	853	+ $text =~ s/^wk:/wiktionary:/o;
	854	+ $text =~ s/^wn:/wikinews:/o;
	855	+ $text =~ s/^wp:/wikipedia:/o;
	856	+ $text =~ s/^wq:/wikiquote:/o;
	857	+ $text =~ s/^ws:/wikisource:/o;
	858	+ $text =~ s/^wv:/wikiversity:/o;
	859	+ $text =~ s/^wx:/wikispecial:/o;
	860	+ $text =~ s/^mw:/wikispecial:/o; # eg bugzilla
	861	+ $text =~ s/:!mw/:mediawiki/o;
	862	+ $text =~ s/^wm:/wikimedia:/o;
	863	+ $text =~ s/:wm$/:wikimedia/o;
	864	+ $text =~ s/^wmf:/foundation:/o;
	865	+ $text =~ s/:www$/:portal/o;
	866	+# $text =~ s/^wikispecial:(.*)$/$1: /o;
	867	+ return ($text) ;
	868	+}
	869	+
	870	+sub ProcessSquidSequenceNumbers
	871	+{
	872	+ # input has been established for tast three months of data in WriteOutputSquidLogs
	873	+ # there for each day per squid and hour of day total event and total gap were established
	874	+ # avg gap for all squids combined (per hour and per day) was written to this csv file
	875	+ open CSV, '<', 'SquidDataSequenceNumbersAllSquids.csv' ;
	876	+ while ($line = <CSV>)
	877	+ {
	878	+ next if $line =~ /\*/o ;
	879	+ next if $line !~ /\d\d\d\d\-\d\d\-\d\d,/o ;
	880	+ chomp $line ;
	881	+ ($date,$hour,$events,$mean_gap) = split (',', $line) ;
	882	+ $yyyy = substr ($date,0,4) ;
	883	+ $mm = substr ($date,5,2) ;
	884	+ $dd = substr ($date,8,2) ;
	885	+ $time = timelocal (0,0,0,$dd,$mm-1,$yyyy-1900) ;
	886	+ ($ss,$nn,$hh,$day,$month,$year,$wday,$yday,$isdst) = localtime($time);
	887	+ $month ++ ;
	888	+ $weekno = int ($yday / 7) ;
	889	+ if ($weekno_start {$weekno} eq '')
	890	+ { $weekno_start {$weekno} = $date ; }
	891	+ $weekno_stop {$weekno} = $date ;
	892	+ $events {"$weekno,$hour"} += $events ;
	893	+ $totgap {"$weekno,$hour"} += $events * $mean_gap ;
	894	+ $events_allday {$weekno} += $events ;
	895	+ $totgap_allday {$weekno} += $events * $mean_gap ;
	896	+
	897	+ # to establish correction factor per month igore all days when another anomaly occurred, or after problem was fixed
	898	+ # wk 23: from 6/11 till 6/16 unusually many messages got lost due to temporary slowdown of server
	899	+ # (unwanted blocking process had been introduced by vector switch)
	900	+ # wk 26: on 6/27 and 6/28 22 hours of data were lost after incomplete manual restart of locke
	901	+ # wk 26/27: from 7/7 till 7/10 69 hours of data were lost after incomplete restart of locke after power down
	902	+ # (week 27 does not stand out in the chart, squids got rebooted? <- counters were reset?)
	903	+ # wk 29: 7/22 Mark stopped several secondary processes on locke,
	904	+ # around 14.00 hrs GMT message loss vanished almost entirely
	905	+ # After that average gap became 1003, meaning only 0.3% of messages is missing.
	906	+
	907	+
	908	+ next if $month == 6 and (($day >= 11 and $day <= 16) or ($day >= 27 and $day <= 28)) ;
	909	+ next if $month == 7 and (($day >= 7 and $day <= 10) or ($day >= 22)) ;
	910	+ # these dates where data were missing or underreported are already skipped in WikiCountsSummarizeProjectCounts
	911	+ # and totals are already extrapolated
	912	+
	913	+ $events_allmonth {$month} += $events ;
	914	+ $totgap_allmonth {$month} += $events * $mean_gap ;
	915	+
	916	+ $weeks {$weekno} ++ ;
	917	+ $months {$month} ++ ;
	918	+ }
	919	+ close CSV ;
	920	+
	921	+ open CSV, '>', 'SquidDataSequenceNumbersAllSquidsOut.csv' ;
	922	+
	923	+ print CSV "hour," ;
	924	+ print "hour," ;
	925	+ foreach $weekno (sort {$a <=> $b} keys %weeks)
	926	+ {
	927	+ $start = substr ($weekno_start {$weekno},5) ;
	928	+ $start =~ s/-/\//go ;
	929	+ $start =~ s/^0//go ;
	930	+ # $stop = substr ($weekno_stop {$weekno},5) ;
	931	+
	932	+
	933	+ print CSV "wk $weekno: ($start ..)," ;
	934	+ print "wk $weekno: ($start ..)," ;
	935	+ }
	936	+ print "\n" ;
	937	+ print CSV "\n" ;
	938	+
	939	+ foreach ($hour = 0 ; $hour <= 23 ; $hour++)
	940	+ {
	941	+ print CSV "$hour," ;
	942	+ print "$hour," ;
	943	+
	944	+ $hour = sprintf ("%02d", $hour) ;
	945	+ foreach $weekno (sort {$a <=> $b} keys %weeks)
	946	+ {
	947	+ $events = $events {"$weekno,$hour"} ;
	948	+ $totgap = $totgap {"$weekno,$hour"} ;
	949	+ $mean_gap = 0 ;
	950	+ if ($events > 0)
	951	+ { $mean_gap = sprintf ("%.0f", $totgap / $events ) ; }
	952	+ print CSV "$mean_gap," ;
	953	+ print "$mean_gap," ;
	954	+ }
	955	+
	956	+ print "\n" ;
	957	+ print CSV "\n" ;
	958	+ }
	959	+ print CSV "all day," ;
	960	+ print "all day," ;
	961	+ foreach $weekno (sort {$a <=> $b} keys %weeks)
	962	+ {
	963	+ $events = $events_allday {$weekno} ;
	964	+ $totgap = $totgap_allday {$weekno} ;
	965	+ $mean_gap = 0 ;
	966	+ if ($events > 0)
	967	+ { $mean_gap = sprintf ("%.0f", $totgap / $events ) ; }
	968	+ print CSV "$mean_gap," ;
	969	+ print "$mean_gap," ;
	970	+ }
	971	+
	972	+ # the following yields (month, avg gap)
	973	+ # 4: 1241 so assume this factor for full April: 1,000,000 / 1241 gap = x msgs, too short: y msgs = 1000 - x
	974	+ # 5: 1310
	975	+ # 6: 1328
	976	+ # 7: 1470 so assume this factor for 22.5/days for July
	977	+
	978	+ print "\n\n" ;
	979	+ print CSV "\n\n" ;
	980	+ foreach $month (sort {$a <=> $b} keys %months)
	981	+ {
	982	+ print CSV "month $month," ;
	983	+ print "month $month," ;
	984	+ $events = $events_allmonth {$month} ;
	985	+ $totgap = $totgap_allmonth {$month} ;
	986	+ $mean_gap = 0 ;
	987	+ if ($events > 0)
	988	+ { $mean_gap = sprintf ("%.0f", $totgap / $events ) ; }
	989	+ print CSV "$mean_gap\n" ;
	990	+ print "$mean_gap\n" ;
	991	+ }
	992	+
	993	+ close CSV ;
	994	+}
	995	+
	996	+
	997	+# how to detect page saves:
	998	+# henbane /a/log/vu.awk: (see also Domasz' webstats collector)
	999	+#
	1000	+# function savemark(url, code) {
	1001	+# if (url ~ /action=submit$/ && code == "TCP_MISS/302")
	1002	+# return "save"
	1003	+# return "-"
	1004	+# }
	1005	+
	1006	+# http://svn.wikimedia.org/viewvc/mediawiki/trunk/tools/counter/
	1007	+# http://leuksman.com/log/2007/06/07/wikimedia-page-views/
	1008	+# http://www.iplists.com/
	1009	+# WHOIS http://ws.arin.net/whois/?queryinput=N%20.%20GOOGLE
	1010	+# WHOIS http://tools.whois.net/index.php?fuseaction=whois.whoisbyipresults
	1011	+# http://en.wikipedia.org/wiki/List_of_search_engines
	1012	+
	1013	+# http://en.wikipedia.org/wiki/User_agent
	1014	+# http://www.texsoft.it/index.php?c=software&m=sw.php.useragent&l=it
	1015	+# http://www.hyperborea.org/journal/archives/2004/06/19/whats-in-a-user-agent-string/
	1016	+
	1017	+# Funwebproducts
	1018	+# No fun with funwebproducts http://www.networkworld.com/newsletters/web/2003/1208web2.html
	1019	+
	1020	+# SLCC
	1021	+# Nice and easy. SLCC1 stands for Secure Licensing Commerce Client version 1.0. SLCC is the service responsible for the Windows Anytime upgrade process present in Vista and Server 2008 which allows you to upgrade Vista Home Basic to Vista Ultimate Edition, or Server 2008 Standard to Server 2008 Enterprise ad-hoc.
	1022	+# SLCC is present in the browser identifier tag, the User Agent, in order to allow Microsoft update servers to offer you the tantalising and irresistible promise of an even more resource heavy version of Vista!
	1023	+# J2ME
	1024	+# Java 2 Micro Edition
	1025	+
	1026	+# Chrome Safari
	1027	+# http://www.neowin.net/news/main/09/02/01/chrome-masks-as-safari-to-fool-windows-live-mail
	1028	+
	1029	+# Danger Hiptop
	1030	+# http://en.wikipedia.org/wiki/Danger_Hiptop
	1031	+
Index: trunk/wikistats/squids/SquidReportArchive.sh
—	—	@@ -0,0 +1,10 @@
	2	+#! /bin/sh
	3	+ulimit -v 4000000
	4	+home="/a/ezachte"
	5	+# perl $home/SquidReportArchive.pl -m 201007 > SquidReportArchiveLog.txt
	6	+# after further automating SquidScanCountries.sh:
	7	+perl $home/SquidReportArchive.pl -c 201101 >> SquidReportArchiveLog.txt # -c for per country reports
	8	+perl $home/SquidReportArchive.pl -m 201101 >> SquidReportArchiveLog.txt
	9	+tar -cf reports.tar /a/ezachte/*.htm
	10	+bzip2 reports.tar
	11	+mv reports.tar.bz2 /a/ezachte
Property changes on: trunk/wikistats/squids/SquidReportArchive.sh
___________________________________________________________________
Added: svn:eol-style
1	12	+ native
Index: trunk/wikistats/squids/SquidReportArchive.pl
—	—	@@ -0,0 +1,6265 @@
	2	+#!/usr/bin/perl
	3	+
	4	+ use lib "/home/ezachte/lib" ;
	5	+ use EzLib ;
	6	+ $trace_on_exit = $true ;
	7	+ ez_lib_version (2) ;
	8	+
	9	+# $quarter_only = '2010 Q3' ; # if not empty filter process for this quarter only
	10	+
	11	+ # set defaults mainly for tests on local machine
	12	+# default_argv "-m 201009 " ;
	13	+ default_argv "-c " ;
	14	+
	15	+# $html = "<html><body bgcolor=black><table>" ;
	16	+# for ($i = 4 ; $i >= 0 ; $i-=0.5)
	17	+# {
	18	+# ($requests,$ratio,$fill) = RatioAndFillColor1 ('',$i,4, $ratio_sqrt) ;
	19	+# print sprintf ("%.1f",$i) . ": $fill\n" ;
	20	+# $i2 = sprintf ("%0.1f", $i) ;
	21	+# $html .= "<tr><td align=right><font color=grey>$i2</font></td><td width=15> </td><td width=50 style=\"background:$fill\"> </td><td width=15> </td><td><font color=grey> $fill</font></td></tr>" ;
	22	+# }
	23	+# $html .= "<tr><td height=30 colspan=99> </td></tr>" ;
	24	+# for ($i = 4 ; $i >= 0 ; $i-=0.5)
	25	+# {
	26	+# ($requests,$ratio,$fill) = RatioAndFillColor2 ('',$i,4, $ratio_sqrt) ;
	27	+# print sprintf ("%.1f",$i) . ": $fill\n" ;
	28	+# $i2 = sprintf ("%0.1f", $i) ;
	29	+# $html .= "<tr><td align=right><font color=grey>$i2</font></td><td width=15> </td><td width=50 style=\"background:$fill\"> </td><td width=15> </td><td><font color=grey> $fill</font></td></tr>" ;
	30	+# }
	31	+# $html .= "</table><body></html>" ;
	32	+# open HTML, '>', 'color_range2.html' ;
	33	+# print HTML $html ;
	34	+# close HTML ;
	35	+# exit ;
	36	+
	37	+#sub RatioAndFillColor1
	38	+#{
	39	+# my ($code, $requests,$requests_max) = @_ ;
	40	+# my ($ratio,$green,$red,$blue,$fill) ;
	41	+
	42	+# if ($requests > $requests_max)
	43	+# { $requests = $requests_max ; }
	44	+
	45	+# $ratio = sqrt ($requests / $requests_max) ;
	46	+# if ($ratio >= 0.20)
	47	+# {
	48	+# $green = 180 ;
	49	+# $red = 180 - int (0.5 + 180 * 5/4 * ($ratio-0.20)) ;
	50	+# $blue = int ($green / 3) ;
	51	+# }
	52	+# else
	53	+# {
	54	+# $red = 220 ;
	55	+# $green = int (0.5 + 220 * 5 * $ratio) ;
	56	+# $blue = 0 ; #int ($green / 2) ;
	57	+# }
	58	+
	59	+# $fill = "\#" . sprintf ("%02x%02x%02x",$red,$green,$blue) ;
	60	+# $fill = lc hsv2rgb($ratio150,0.67+$ratio0.33,0.8-0.2*$ratio) ;
	61	+
	62	+# $fills {lc $code} = $fill ;
	63	+# return ($requests,$ratio,$fill) ;
	64	+#}
	65	+
	66	+#sub RatioAndFillColor2
	67	+#{
	68	+# my ($code, $requests,$requests_max) = @_ ;
	69	+# my ($ratio,$green,$red,$blue,$fill) ;
	70	+
	71	+# if ($requests > $requests_max)
	72	+# { $requests = $requests_max ; }
	73	+
	74	+# $ratio = $requests / $requests_max ;
	75	+# if ($ratio >= 0.20)
	76	+# {
	77	+# $green = 180 ;
	78	+# $red = 180 - int (0.5 + 180 * 5/4 * ($ratio-0.20)) ;
	79	+# $blue = int ($green / 3) ;
	80	+# }
	81	+# else
	82	+# {
	83	+# $red = 220 ;
	84	+# $green = int (0.5 + 220 * 5 * $ratio) ;
	85	+# $blue = 0 ; #int ($green / 2) ;
	86	+# }
	87	+
	88	+# $fill = "\#" . sprintf ("%02x%02x%02x",$red,$green,$blue) ;
	89	+# $fill = lc hsv2rgb($ratio150,1-$ratio0.334,0.6) ;
	90	+
	91	+# $fills {lc $code} = $fill ;
	92	+# return ($requests,$ratio,$fill) ;
	93	+#}
	94	+
	95	+# to do: add text from http://wiki.squid-cache.org/SquidFaq/SquidLogs
	96	+# ReportOrigin how to handle '!error <-> other
	97	+# SquidReportOrigins.htm total count<->alpha are not the same (+ skip total for "google (total)")
	98	+# SquidReportOrigins.htm totals google don't match ReportMimeTypes
	99	+# SquidReportOrigins.htm internal tonen als bij mime types
	100	+
	101	+# cater for missing files -> different multiplier
	102	+# csv file google bot hits per hour -> Stu
	103	+# report for edit/submit
	104	+# log.txt s -> date folder
	105	+
	106	+# http://www.linux.com/community/blogs/Convert-a-.svg-file-to-a-.png-in-Ubuntu.html
	107	+
	108	+# use CGI::Carp qw(fatalsToBrowser);
	109	+# use Getopt::Std ;
	110	+ use Time::Local ;
	111	+ use Cwd;
	112	+
	113	+ $ratio_sqrt = $true ;
	114	+ $ratio_linear = $false ;
	115	+
	116	+ getopt ("dm", \%options) ;
	117	+
	118	+ if (-d "/a/squid")
	119	+ {
	120	+ print "\n\nJob runs on server $hostname\n\n" ;
	121	+ $path_root = "/a/ezachte" ;
	122	+ }
	123	+ elsif ($hostname eq 'bayes')
	124	+ {
	125	+ print "\n\nJob runs on server $hostname\n\n" ;
	126	+ $path_root = "/home/ezachte/wikistats/animation" ;
	127	+ }
	128	+ else
	129	+ {
	130	+ print "Job runs local for tests\n\n" ;
	131	+ $path_root = "W:/! Perl/Squids/Archive/test5" ;
	132	+ }
	133	+ $path_in = $path_root ;
	134	+ $path_out = $path_root ;
	135	+
	136	+ print "Path root = $path_root\n" ;
	137	+
	138	+ # periodically harvest updated metrics from
	139	+ # 'http://en.wikipedia.org/wiki/List_of_countries_by_population'
	140	+ # 'http://en.wikipedia.org/wiki/List_of_countries_by_number_of_Internet_users'
	141	+ if (defined ($options {"w"}))
	142	+ { &ReadWikipedia ; exit ; }
	143	+
	144	+ if (defined ($options {"c"}))
	145	+ { $reportcountries = $true ; }
	146	+
	147	+ # date range used to be read from csv file with ReadDate, now there are daily csv files
	148	+ # if earlier methods still is useful it needs to be tweaked
	149	+# if (($reportmonth ne "") && ($reportmonth !~ /^\d{6}$/))
	150	+
	151	+ &InitProjectNames ;
	152	+
	153	+ if ($reportcountries)
	154	+ {
	155	+ $project_mode = "wp" ;
	156	+
	157	+ $file_csv_country_codes = "CountryCodes.csv" ;
	158	+ $file_csv_country_meta_info = "SquidReportCountryMetaInfo.csv" ;
	159	+
	160	+ &ReadInputCountriesNames ;
	161	+ &ReadInputCountriesMeta ;
	162	+
	163	+ &CollectRegionCounts ;
	164	+
	165	+ &ReportCountries ('Saves');
	166	+ &ReportCountries ('Views');
	167	+
	168	+ exit ;
	169	+ }
	170	+
	171	+ $reportdaysback = $options {"d"} ;
	172	+ $reportmonth = $options {"m"} ;
	173	+
	174	+ if (($reportmonth !~ /^\d{6}$/) && ($reportdaysback !~ /^-\d+/))
	175	+ { print "Specify month as -m yyyymm or days back as -d -[days] (e.g. -d -1 for yesterday)" ; exit ; }
	176	+
	177	+ if ($reportmonth =~ /^\d{6}$/)
	178	+ { $reportmonth = substr ($reportmonth,0,4) . "-" . substr ($reportmonth,4,2) ; }
	179	+ else
	180	+ {
	181	+ ($sec,$min,$hour,$day,$month,$year) = localtime (time+$reportdaysback*86400) ;
	182	+ $reportmonth = sprintf ("%04d-%02d",$year+1900,$month+1) ;
	183	+ }
	184	+ print "Report month = $reportmonth\n" ;
	185	+
	186	+ $threshold_mime = 0 ;
	187	+ $threshold_project = 10 ;
	188	+
	189	+ $file_log = "WikiReportsSampledVisitorsLog.log" ;
	190	+
	191	+ $file_html_crawlers = "SquidReportCrawlers.htm" ;
	192	+ $file_html_methods = "SquidReportMethods.htm" ;
	193	+ $file_html_origins = "SquidReportOrigins.htm" ;
	194	+ $file_html_opsys = "SquidReportOperatingSystems.htm" ;
	195	+ $file_html_scripts = "SquidReportScripts.htm" ;
	196	+ $file_html_skins = "SquidReportSkins.htm" ;
	197	+ $file_html_requests = "SquidReportRequests.htm" ;
	198	+ $file_html_google = "SquidReportGoogle.htm" ;
	199	+ $file_html_clients = "SquidReportClients.htm" ;
	200	+
	201	+# names till 2010-07-01
	202	+#
	203	+# $file_csv_crawlers = "SquidDataCrawlers.csv" ;
	204	+# $file_csv_methods = "SquidDataMethods.csv" ;
	205	+# $file_csv_origins = "SquidDataOrigins.csv" ;
	206	+# $file_csv_opsys = "SquidDataOpSys.csv" ;
	207	+# $file_csv_requests = "SquidDataRequests.csv" ;
	208	+# $file_csv_scripts = "SquidDataScripts.csv" ;
	209	+# $file_csv_google = "SquidDataSearch.csv" ;
	210	+# $file_csv_skins = "SquidDataSkins.csv" ;
	211	+# $file_csv_clients = "SquidDataClients.csv" ;
	212	+# $file_csv_google_bots = "SquidDataGoogleBots.csv" ;
	213	+# $file_csv_indexphp = "SquidDataIndexPhp.csv" ;
	214	+# $file_csv_countries_languages_visited = "SquidDataCountriesLanguagesVisited.csv" ;
	215	+# $file_csv_countries_timed = "SquidDataCountriesTimed.csv" ;
	216	+# $file_csv_browser_languages = "SquidDataLanguages.csv" ;
	217	+
	218	+ $file_csv_crawlers = "public/SquidDataCrawlers.csv" ;
	219	+ $file_csv_methods = "public/SquidDataMethods.csv" ;
	220	+ $file_csv_origins = "public/SquidDataOrigins.csv" ;
	221	+ $file_csv_opsys = "public/SquidDataOpSys.csv" ;
	222	+ $file_csv_requests = "public/SquidDataRequests.csv" ;
	223	+ $file_csv_scripts = "public/SquidDataScripts.csv" ;
	224	+ $file_csv_google = "public/SquidDataSearch.csv" ;
	225	+ $file_csv_skins = "public/SquidDataSkins.csv" ;
	226	+ $file_csv_clients = "public/SquidDataClients.csv" ;
	227	+ $file_csv_google_bots = "public/SquidDataGoogleBots.csv" ;
	228	+ $file_csv_indexphp = "public/SquidDataIndexPhp.csv" ;
	229	+ $file_csv_countries_languages_visited = "public/SquidDataCountriesViews.csv" ;
	230	+ $file_csv_countries_timed = "public/SquidDataCountriesViewsTimed.csv" ;
	231	+ $file_csv_browser_languages = "public/SquidDataLanguages.csv" ;
	232	+
	233	+ print "\n\nJob SquidReportArchive.pl\n\n" ;
	234	+
	235	+# if (! -d "/a/squid")
	236	+# {
	237	+# if (! -e $file_csv_requests) { $file_csv_requests =~ s/\./Test./ }
	238	+# if (! -e $file_csv_methods) { $file_csv_methods =~ s/\./Test./ }
	239	+# if (! -e $file_csv_skins) { $file_csv_skins =~ s/\./Test./ }
	240	+# if (! -e $file_csv_scripts) { $file_csv_scripts =~ s/\./Test./ }
	241	+# if (! -e $file_csv_opsys) { $file_csv_opsys =~ s/\./Test./ }
	242	+# if (! -e $file_csv_origins) { $file_csv_origins =~ s/\./Test./ }
	243	+# if (! -e $file_csv_google) { $file_csv_google =~ s/\./Test./ }
	244	+# if (! -e $file_csv_crawlers) { $file_csv_crawlers =~ s/\./Test./ }
	245	+# }
	246	+
	247	+ if (! -d "$path_root/$reportmonth")
	248	+ { print "Directory not found: $path_root\/$reportmonth\n" ; exit ; }
	249	+
	250	+# for ($month = 4 ; $month <= 10 ; $month ++)
	251	+# {
	252	+# $reportmonth = "2009-" . sprintf ("%02d", $month) ;
	253	+
	254	+ for ($day = 1 ; $day <= 31 ; $day ++)
	255	+ {
	256	+# last if ($month == 10) && ($day > 24) # temp code stay with DST summer time zone for SV
	257	+
	258	+ $date = $reportmonth . "-". sprintf ("%02d", $day) ;
	259	+ $dir = "$path_root/$reportmonth/$date" ;
	260	+
	261	+ if (-d $dir)
	262	+ {
	263	+ if (-e "$dir/#Ready")
	264	+ {
	265	+ if ($date_first eq "")
	266	+ { $date_first = $date ; }
	267	+ $date_last = $date ;
	268	+ print "Process dir $dir\n" ;
	269	+ push @dirs_process, $dir ;
	270	+ }
	271	+ else
	272	+ { print "Empty or incomplete dir $dir!\n" ; }
	273	+ }
	274	+ else
	275	+ { print "Missing dir $dir!\n" ; }
	276	+ }
	277	+# }
	278	+ if ($#dirs_process < 0)
	279	+ { print "No valid data to process.\n" ; exit ; }
	280	+
	281	+ $dir_reports = "$path_root/$reportmonth" ;
	282	+
	283	+ $google_ip_ranges = "<b>IP ranges:</b> known ip ranges for Google are 64.233.[160.0-191.255], 66.249.[64.0-95.255], 66.102.[0.0-15.255], 72.14.[192.0-255.255], <br>74.125.[0.0-255.255], " .
	284	+ "209.085.[128.0-255.255], 216.239.[32.0-63.255] and a few minor other subranges</small><p>\n" ;
	285	+
	286	+ &OpenLog ;
	287	+ &PrepHtml ;
	288	+ &SetPeriod ; # now date range derived from which folders found
	289	+
	290	+# &ReadDate ; date range was read from csv file
	291	+
	292	+ foreach $dir_process (@dirs_process)
	293	+ {
	294	+ $days_input_found ++ ;
	295	+
	296	+ &ReadInputClients ;
	297	+ &ReadInputCrawlers ;
	298	+ &ReadInputMethods ;
	299	+ &ReadInputMimeTypes ;
	300	+ &ReadInputOpSys ;
	301	+ &ReadInputOrigins ;
	302	+ &ReadInputScripts ;
	303	+ &ReadInputGoogle ;
	304	+ &ReadInputSkins ;
	305	+ &ReadInputIndexPhp ;
	306	+ &ReadInputBrowserLanguages ;
	307	+# &ReadInputCountriesTimed ;
	308	+ }
	309	+
	310	+#&ReadCountryCodes ;
	311	+
	312	+ print "\nDays input = $days_input_found\n" ;
	313	+ $multiplier = 1 / $days_input_found ;
	314	+ print "\nMultiplier = " . sprintf ("%.4f", $multiplier) . "\n" ;
	315	+
	316	+#&WriteCsvCountriesTimed ;
	317	+#&WriteCsvCountriesGoTo ;
	318	+#exit ;
	319	+
	320	+ foreach $key (keys_sorted_alpha_desc %edit_submit)
	321	+ { print "YYY " . sprintf ("%5d", $edit_submit {$key}) . ": $key\n" ; }
	322	+
	323	+ foreach $total (keys_sorted_by_value_num_desc %edit_submits)
	324	+ { print "total $total: ${edit_submits {$total}} \n" ; }
	325	+
	326	+ print "\n\n" ;
	327	+
	328	+
	329	+ foreach $domain (keys_sorted_by_value_num_desc %edit_submit_bot_sort)
	330	+ {
	331	+ $cnt = $edit_submit_bot_sort {$domain} ;
	332	+
	333	+ last if $cnt < 100 ;
	334	+
	335	+ print "DOMAIN $domain total $cnt\n" ;
	336	+ foreach $key (sort keys %{$edit_submit_bot {$domain}})
	337	+ { print sprintf ("%5d", $edit_submit_bot {$domain} {$key}) . ": $key\n" ; }
	338	+ # { print "$key: ${edit_submit_bot {$domain} {$key}}, " ; }
	339	+ print "\n" ;
	340	+ }
	341	+ print "\n\n" ;
	342	+ foreach $agent (keys_sorted_by_value_num_desc %edit_submit_bot_agent_sort)
	343	+ {
	344	+ $cnt = $edit_submit_bot_agent_sort {$agent} ;
	345	+
	346	+ last if $cnt < 25 ;
	347	+
	348	+ print "AGENT $agent total $cnt\n" ;
	349	+ foreach $key (sort keys %{$edit_submit_bot_agent {$agent}})
	350	+ { print sprintf ("%5d", $edit_submit_bot_agent {$agent} {$key}) . ": $key\n" ; }
	351	+ # { print "$key: ${edit_submit_bot {$domain} {$key}}, " ; }
	352	+ print "\n" ;
	353	+ }
	354	+
	355	+
	356	+
	357	+# foreach $key (keys_sorted_by_value_num_desc %edit_submit_bot_agent)
	358	+# { print "AGENT: " .sprintf ("%5d", $edit_submit_bot_agent {$key}) . ": $key\n" ; }
	359	+# print "\n\n" ;
	360	+# foreach $key (keys_sorted_by_value_num_desc %edit_submit_subparms)
	361	+# {
	362	+# $count = $edit_submit_subparms {$key} ;
	363	+#
	364	+# last if $count < 5 ;
	365	+#
	366	+# ($subparm, $referer) = split (',', $key) ;
	367	+# print "ZZZ " . sprintf ("%5d", $count) . ": $referer, $subparm\n" ;
	368	+# }
	369	+ &CalcPercentages ;
	370	+ &NormalizeCounts ;
	371	+ &SortCounts ;
	372	+
	373	+ &WriteReportClients ;
	374	+ &WriteReportCrawlers ;
	375	+
	376	+ &WriteReportMethods ;
	377	+ &WriteReportMimeTypes ;
	378	+ &WriteReportOpSys ;
	379	+ &WriteReportOrigins ;
	380	+ &WriteReportScripts ;
	381	+ &WriteReportGoogle ;
	382	+ &WriteReportSkins ;
	383	+ &WriteCsvGoogleBots ;
	384	+ &WriteCsvBrowserLanguages ;
	385	+
	386	+# &WriteCsvCountriesTimed ;
	387	+# &WriteCsvCountriesTargets ;
	388	+ close "FILE_LOG" ;
	389	+ print "\nReady\n\n" ;
	390	+
	391	+ if (-d "/a/squid")
	392	+ {
	393	+# $cmd = "tar -cf $dir_reports/$date_last\-csv.tar $dir_reports_in/*.csv \| bzip2 $dir_reports/$date_last\-csv.tar" ;
	394	+# print "cmd = '$cmd'\n" ;
	395	+# `$cmd` ;
	396	+ $cmd = "tar -cf $dir_reports/$reportmonth\-html.tar $dir_reports/*.htm \| bzip2 $dir_reports/$reportmonth\-html.tar" ;
	397	+ print "cmd = '$cmd'\n" ;
	398	+ `$cmd` ;
	399	+ }
	400	+
	401	+ exit ;
	402	+
	403	+sub ReportCountries
	404	+{
	405	+ my $mode_report = shift ;
	406	+
	407	+ if ($mode_report eq 'Views')
	408	+ {
	409	+ $selection = 'PageViews' ;
	410	+ $selection2 = 'Visits' ;
	411	+ $views_edits = 'Page Views' ;
	412	+ }
	413	+ else
	414	+ {
	415	+ $selection = 'PageEdits' ;
	416	+ $selection2 = 'Saves' ;
	417	+ $views_edits = 'Page Edits' ;
	418	+ }
	419	+
	420	+ ($quarter_only2 = $quarter_only) =~ s/ // ;
	421	+
	422	+ $file_csv_squid_counts_monthly = "SquidData${selection2}PerCountryMonthly.csv" ; # LockePrev.csv" ;
	423	+ $file_csv_squid_counts_daily = "SquidData${selection2}PerCountryDaily.csv" ;
	424	+
	425	+ $file_html_per_country_breakdown = "SquidReport${selection}PerCountryBreakdown.htm" ;
	426	+ $file_html_per_country_breakdown_huge = "SquidReport${selection}PerCountryBreakdownHuge.htm" ;
	427	+ $file_html_per_country_overview = "SquidReport${selection}PerCountryOverview$quarter_only2.htm" ;
	428	+ $file_html_per_country_trends = "SquidReport${selection}PerCountryTrends.htm" ;
	429	+ $file_html_per_language_breakdown = "SquidReport${selection}PerLanguageBreakdown.htm" ;
	430	+ $file_csv_per_country_overview = "SquidReport${selection}PerCountryOverview.csv" ;
	431	+
	432	+ $path_csv_squid_counts_monthly = "$path_in/$file_csv_squid_counts_monthly" ;
	433	+ if (! -e $path_csv_squid_counts_monthly) { abort ("Input file $path_csv_squid_counts_monthly not found!") ; }
	434	+ $path_csv_squid_counts_daily = "$path_in/$file_csv_squid_counts_daily" ;
	435	+ if (! -e $path_csv_squid_counts_daily) { abort ("Input file $path_csv_squid_counts_daily not found!") ; }
	436	+
	437	+ &ReadInputCountriesMonthly ($project_mode) ;
	438	+ &ReadInputCountriesDaily ($project_mode) ;
	439	+
	440	+# foreach $week (sort {$a <=> $b} keys %changes_per_week_per_country_code)
	441	+# { &WriteCsvSvgFilePerCountryOverview ($views_edits, $week, \%changes_per_week_per_country_code, 200, "Wikipedia " . lc $views_edits . ", weekly trend") } ;
	442	+
	443	+# foreach $week (sort {$a <=> $b} keys %requests_per_week_per_country_code)
	444	+# { &WriteCsvSvgFilePerCountryOverview ($views_edits, $week, \%requests_per_week_per_country_code, $max_requests_per_connected_us_week, "Wikipedia " . lc $views_edits . " per person") } ;
	445	+# foreach $yyyymm (sort keys %yyyymm_)
	446	+# { &WriteCsvSvgFilePerCountryOverview ($views_edits, $yyyymm, \%requests_per_month_per_country_code, $max_requests_per_connected_us_month, "Wikipedia " . lc $views_edits . " per person") } ;
	447	+
	448	+ &PrepHtml ;
	449	+
	450	+# $comment = "<p> See also: <a href='SquidReportTrafficPerCountry.htm'>Wikipedia $views_edits per Country</a> / <a href='SquidReportLanguagesVisitedDetailed.htm'>Breakdown per Country of Wikipedia's Visited (detailed)</a> / <a href='SquidReportTrafficPerWikipediaOverview.htm'>Breakdown per Wikipedia of Requesting Countries</a>" ;
	451	+
	452	+ $title_main = "Wikimedia Traffic Analysis Report" ;
	453	+
	454	+ $links = "<p> Also: <b>$views_edits Per Country</b> - " .
	455	+ "<a href='$file_html_per_country_overview'>Overview</a> / " .
	456	+ "<a href='$file_html_per_country_breakdown'>Breakdown</a> / " .
	457	+ "<a href='$file_html_per_country_trends'>Trends</a>,    " .
	458	+ "<b>$views_edits Per Wikipedia Language - </b> " .
	459	+ "<a href='$file_html_per_language_breakdown'>Breakdown</a>" ;
	460	+
	461	+ $title = "$title_main - Wikipedia $views_edits Per Country - Overview" ;
	462	+ &WriteReportPerCountryOverview ($title, $views_edits, &UnLink ($links,1)) ; ;
	463	+
	464	+ $title = "$title_main - Wikipedia $views_edits Per Country - Breakdown" ;
	465	+ &WriteReportPerCountryBreakdown ($title, $views_edits, &UnLink ($links,2),$cutoff_requests = 100, $cutoff_percentage = 1, $show_logcount = $false) ;
	466	+ &WriteReportPerCountryBreakdown ($title, $views_edits, &UnLink ($links,2),$cutoff_requests = 10, $cutoff_percentage = 0.1, $show_logcount = $true) ;
	467	+
	468	+ $title = "$title_main - Wikipedia $views_edits Per Country - Trends" ;
	469	+ &WriteReportPerCountryTrends ($title, $views_edits, &UnLink ($links,3)) ;
	470	+
	471	+ $links =~ s/,.*$// ;
	472	+ $title = "$title_main - $views_edits Per Wikipedia Language - Breakdown" ;
	473	+ &WriteReportPerLanguageBreakDown ($title, $views_edits, &UnLink ($links,4)) ;
	474	+}
	475	+
	476	+sub ReadDate
	477	+{
	478	+ open CSV_CRAWLERS, '<', "$dir_process/$file_csv_crawlers" ;
	479	+ $line = <CSV_CRAWLERS> ;
	480	+ close CSV_CRAWLERS ;
	481	+# print "DATE LINE $line\n" ;
	482	+ chomp ($line) ;
	483	+ $line =~ s/^.?(\d\d\d\d\-\d\d\-\d\d(?:T\d\d)?).?(\d\d\d\d\-\d\d\-\d\d(?:T\d\d)?).*$/$1.",".$2/e ;
	484	+ ($timefrom,$timetill) = split (',', $line) ;
	485	+ if (($timefrom eq "") \|\| ($timetill eq ""))
	486	+ { abort ("$file_csv_crawlers does not contain valid date range on first line\n") ; }
	487	+
	488	+ $yearfrom = substr ($timefrom,0,4) ;
	489	+ $monthfrom = substr ($timefrom,5,2) ;
	490	+ $dayfrom = substr ($timefrom,8,2) ;
	491	+ $hourfrom = substr ($timefrom,11,2) ;
	492	+
	493	+ $yeartill = substr ($timetill,0,4) ;
	494	+ $monthtill = substr ($timetill,5,2) ;
	495	+ $daytill = substr ($timetill,8,2) ;
	496	+ $hourtill = substr ($timetill,11,2) ;
	497	+
	498	+ $period = sprintf ("%d %s %d %d:00 - %d %s %d %d:00", $dayfrom, month_english_short ($monthfrom-1), $yearfrom, $hourfrom, $daytill, month_english_short ($monthtill-1), $yeartill, $hourtill) ;
	499	+
	500	+ $timefrom = timegm (0,0,$hourfrom,$dayfrom,$monthfrom-1,$yearfrom-1900) ;
	501	+ $timetill = timegm (0,0,$hourtill,$daytill,$monthtill-1,$yeartill-1900) ;
	502	+
	503	+ $timespan = ($timetill - $timefrom) / 3600 ;
	504	+ $multiplier = (24 * 3600) / ($timetill - $timefrom) ;
	505	+ print "Multiplier = $multiplier\n" ;
	506	+ $header =~ s/DATE/Daily averages, based on sample period: $period (yyyy-mm-dd)/ ;
	507	+}
	508	+
	509	+sub SetPeriod
	510	+{
	511	+ $year_first = substr ($date_first,0,4) ;
	512	+ $month_first = substr ($date_first,5,2) ;
	513	+ $day_first = substr ($date_first,8,2) ;
	514	+
	515	+ $year_last = substr ($date_last,0,4) ;
	516	+ $month_last = substr ($date_last,5,2) ;
	517	+ $day_last = substr ($date_last,8,2) ;
	518	+
	519	+ $timefrom = timegm (0,0,0,$day_first,$month_first-1,$year_first-1900) ;
	520	+ $timetill = timegm (0,0,0,$day_last,$month_last-1,$year_last-1900) + 86400 ; # date_last + 1 day (in seconds)
	521	+
	522	+ $timespan = ($timetill - $timefrom) / 3600 ;
	523	+ $multiplier = (24 * 3600) / ($timetill - $timefrom) ;
	524	+
	525	+ $period = sprintf ("%d %s %d - %d %s %d", $day_first, month_english_short ($month_first-1), $year_first, $day_last, month_english_short ($month_last-1), $year_last) ;
	526	+ $header =~ s/DATE/Daily averages, based on sample period: $period/ ;
	527	+ print "Sample period: $period => for daily averages multiplier = " . sprintf ("%.2f",$multiplier) . "\n" ;
	528	+}
	529	+
	530	+sub PrepHtml
	531	+{
	532	+ $language = "en" ;
	533	+ $header = "<!DOCTYPE FILE_HTML PUBLIC '-//W3C//DTD FILE_HTML 4.01 Transitional//EN' 'http://www.w3.org/TR/html4/loose.dtd'>\n" .
	534	+ "<html lang='en'>\n" .
	535	+ "<head>\n" .
	536	+ "<title>TITLE</title>\n" .
	537	+ "<meta http-equiv='Content-type' content='text/html; charset=iso-8859-1'>\n" .
	538	+ "<meta name='robots' content='index,follow'>\n" .
	539	+ "<script language='javascript' type='text/javascript' src='../WikipediaStatistics13.js'></script>\n" .
	540	+ "<style type='text/css'>\n" .
	541	+ "<!--\n" .
	542	+ "body {font-family:arial,sans-serif; font-size:12px }\n" .
	543	+ "h2 {margin:0px 0px 3px 0px; font-size:18px}\n" .
	544	+ "table {font-size:12px ;}\n" .
	545	+ "td {white-space:wrap; text-align:right; padding-left:2px; padding-right:2px; padding-top:1px;padding-bottom:0px ; font-size:12px ; vertical-align:top}\n" .
	546	+ "th {white-space:nowrap; text-align:right; padding-left:2px; padding-right:2px; padding-top:1px;padding-bottom:0px ; font-size:12px ; vertical-align:top ; font-width:bold}\n" .
	547	+ "th.small {white-space:wrap; text-align:right; padding-left:2px; padding-right:2px; padding-top:1px;padding-bottom:0px ; font-size:11px ; vertical-align:top ; font-width:bold}\n" .
	548	+ "td.hl {text-align:left;}\n" .
	549	+ "td.hr {text-align:right;}\n" .
	550	+ "td.r {text-align:right; border: inset 1px #FFFFFF}\n" .
	551	+ "td.c {text-align:center; border: inset 1px #FFFFFF}\n" .
	552	+ "td.l {text-align:left; border: inset 1px #FFFFFF}\n" .
	553	+ "th.c {text-align:center; border: inset 1px #FFFFFF}\n" .
	554	+ "th.l {text-align:left; border: inset 1px #FFFFFF}\n" .
	555	+ "th.lh3 {text-align:left; border: inset 1px #FFFFFF ; font-size:14px}\n" .
	556	+ "a:link { color:blue;text-decoration:none;}\n" .
	557	+ "a:visited {color:#0000FF;text-decoration:none;}\n" .
	558	+ "a:active {color:#0000FF;text-decoration:none;}\n" .
	559	+ "a:hover {color:#FF00FF;text-decoration:underline}\n" .
	560	+ "-->\n" .
	561	+ "</style>\n" .
	562	+ "<body bgcolor='\#FFFFDD'>\n<table width=100%>\n<tr><td class=hl>\n<h2>HEADER</h2>\n<b>DATE</b>\n</td>\n<td class=hr>" .
	563	+ "<input type='button' value=' Archive ' onclick='window.location=\"http://stats.wikimedia.org/archive/squid_reports\"'> " .
	564	+ "<input type='button' value=' Wikimedia Statistics ' onclick='window.location=\"http://stats.wikimedia.org\"'>" .
	565	+ "</td></tr>\n</table><hr>" .
	566	+ " This analysis is based on a 1:1000 sampled server log (squids) X1000\nALSO<p>" ;
	567	+
	568	+ # to be localized some day like any reports
	569	+ $out_license = "All data and images on this page are in the public domain." ;
	570	+ $out_generated = "Generated on " ;
	571	+ $out_author = "Author" ;
	572	+ $out_mail = "Mail" ;
	573	+ $out_site = "Web site" ;
	574	+ $out_myname = "Erik Zachte" ;
	575	+ $out_mymail = "ezachte@### (no spam: ### = wikimedia.org)" ;
	576	+ $out_mysite = "http://infodisiac.com/" ;
	577	+
	578	+ $colophon = "<p>\n" .
	579	+ $out_generated . date_time_english (time) . "\n<br>" .
	580	+ $out_author . ":" . $out_myname .
	581	+ " (<a href='" . $out_mysite . "'>" . $out_site . "</a>)\n<br>" .
	582	+ "$out_mail: $out_mymail<br>\n" .
	583	+ "$out_license" .
	584	+ "</small>\n" .
	585	+ "</body>\n" .
	586	+ "</html>\n" ;
	587	+
	588	+ $dummy_requests = "Requests <font color=#808080>by destination</font> or " ;
	589	+ $dummy_origins = "<font color=#000060>by origin</font>" ;
	590	+ $dummy_methods = "<font color=#000060>Methods</font>" ;
	591	+ $dummy_scripts = "<font color=#000060>Scripts</font>" ;
	592	+ $dummy_skins = "<font color=#000060>Skins</font>" ;
	593	+ $dummy_crawlers = "<font color=#000060>Crawlers</font>" ;
	594	+ $dummy_opsys = "<font color=#000060>Op.Sys.</font>" ;
	595	+ $dummy_browsers = "<font color=#000060>Browsers</font>" ;
	596	+ $dummy_google = "<font color=#000060>Google</font>" ;
	597	+
	598	+ $link_requests = "Requests <a href='$file_html_requests'>by destination</a> or " ;
	599	+ $link_origins = "<a href='$file_html_origins'>by origin</a>" ;
	600	+ $link_methods = "<a href='$file_html_methods'>Methods</a>" ;
	601	+ $link_scripts = "<a href='$file_html_scripts'>Scripts</a>" ;
	602	+ $link_skins = "<a href='$file_html_skins'>Skins</a>" ;
	603	+ $link_crawlers = "<a href='$file_html_crawlers'>Crawlers</a>" ;
	604	+ $link_opsys = "<a href='$file_html_opsys'>Op.Sys.</a>" ;
	605	+ $link_browsers = "<a href='$file_html_clients'>Browsers</a>" ;
	606	+ $link_google = "<a href='$file_html_google'>Google</a>" ;
	607	+}
	608	+
	609	+sub ReadCountryCodes
	610	+{
	611	+ open CODES, '<', "$path_in/$file_csv_country_codes" ;
	612	+ while ($line = <CODES>)
	613	+ {
	614	+ if ($line =~ /^[A-Z]/)
	615	+ {
	616	+ chomp ($line) ;
	617	+ ($code,$region,$north_south,$name) = split (',',$line,4) ;
	618	+ $country_codes {$code} = $name ;
	619	+ # print "$code => $name\n" ;
	620	+ }
	621	+ }
	622	+ close CODES ;
	623	+}
	624	+
	625	+sub ReadInputClients
	626	+{
	627	+ my $file_csv = "$dir_process/$file_csv_clients" ;
	628	+ if (! -e $file_csv)
	629	+ { abort ("Function ReadInputClients: file $file_csv not found!!!") ; }
	630	+ open CSV_CLIENTS, '<', $file_csv ;
	631	+
	632	+ while ($line = <CSV_CLIENTS>)
	633	+ {
	634	+ next if $line =~ /^#/ ; # comments
	635	+ next if $line =~ /^:/ ; # csv header (not a comment)
	636	+
	637	+ chomp ($line) ;
	638	+
	639	+ if ($line =~ /^E/)
	640	+ {
	641	+ ($rectype, $engine, $count) = split (',', $line) ;
	642	+
	643	+ next if ($engine !~ /^Gecko/) && ($engine !~ /^AppleWebKit/) ;
	644	+
	645	+ if ($engine !~ / \d/)
	646	+ { $engine =~ s/\// / ; }
	647	+
	648	+ if ($engine =~ /AppleWebKit/)
	649	+ {
	650	+ $engine =~ s/AppleWebKit\//AppleWebKit / ; # fix
	651	+ $engine =~ s/Safari\/\d+/Safari/ ; # fix input
	652	+ $engine =~ s/(?:\|iPad\|iPod\|iPhone) Mozilla.*$/iPod)/i ; # fix input
	653	+ ($engine2 = $engine) =~ s/\s*\/?\d\d\d// ;
	654	+ $webkit_engines {$engine2} += $count ;
	655	+
	656	+ # $webkit_total_engines {$engine} += $count ;
	657	+ }
	658	+
	659	+ $engines {$engine} += $count ;
	660	+
	661	+ $engine =~ s/\/.*$// ;
	662	+ $engine =~ s/ .*$// ;
	663	+ $total_engines {$engine} += $count ;
	664	+ }
	665	+ elsif ($line =~ /^G/)
	666	+ {
	667	+ ($rectype, $mobile, $group, $count, $perc) = split (',', $line) ;
	668	+ $total_clientgroups {$mobile} += $count ;
	669	+
	670	+ $group =~ s/^KDDI.*$/KDDI/ ;
	671	+ $group =~ s/^MOT.*$/MOT/ ;
	672	+ $group =~ s/^LG-.*$/LG/i ;
	673	+ $group =~ s/^LGE.*$/LGE/i ;
	674	+ $group =~ s/^KWC.*$/KWC/i ;
	675	+ $group =~ s/^Nokia.*$/Nokia/i ;
	676	+ $group =~ s/^Samsung.*$/Samsung/i ;
	677	+ $group =~ s/^Motorola.*$/Motorola/i ;
	678	+ $group =~ s/^SonyEricsson.*$/SonyEricsson/i ;
	679	+ $group =~ s/^PANTECH.*$/PanTech/i ;
	680	+ $group =~ s/^Palm_Pre/Palm Pre/i ;
	681	+ $clientgroups {"$mobile,$group"} += $count ;
	682	+ }
	683	+ else
	684	+ {
	685	+ ($rectype, $client, $count, $perc) = split (',', $line) ;
	686	+
	687	+ $total_clients += $count ;
	688	+ $client =~ s/_/./g ;
	689	+ $client =~ s/\.\./Other/g ;
	690	+ if ($client !=~ / \d/)
	691	+ { $client =~ s/\// / ; }
	692	+ if ($rectype eq "-") { $total_clients_non_mobile += $count ; }
	693	+ if ($rectype eq "M") { $total_clients_mobile += $count ; }
	694	+ $clients {"$rectype,$client"} += $count ;
	695	+ }
	696	+ }
	697	+ close CSV_CLIENTS ;
	698	+
	699	+# foreach $key (sort keys %clientgroups)
	700	+# {
	701	+# next if $clientgroups {$key} < 50000 ; }
	702	+# next if $key =~ /^M/ ; }
	703	+
	704	+# print "$key:" . $clientgroups {$key} . "\n" ;
	705	+# }
	706	+# print "\n" ;
	707	+# foreach $key (sort keys %total_clientgroups)
	708	+# {
	709	+# print "$key:" . $total_clientgroups {$key} . "\n" ;
	710	+# }
	711	+# print "\n" ;
	712	+}
	713	+
	714	+sub ReadInputCrawlers
	715	+{
	716	+ my $file_csv = "$dir_process/$file_csv_crawlers" ;
	717	+ if (! -e $file_csv)
	718	+ { abort ("Function ReadInputCrawlers: file $file_csv not found!!!\n") ; }
	719	+ open CSV_CRAWLERS, '<', $file_csv ;
	720	+ while ($line = <CSV_CRAWLERS>)
	721	+ {
	722	+ next if $line =~ /^#/ ; # comments
	723	+ next if $line =~ /^:/ ; # csv header (not a comment)
	724	+
	725	+ chomp ($line) ;
	726	+ ($count, $mime, $agent) = split (',', $line,3) ;
	727	+
	728	+
	729	+ $mime2 = $mime ;
	730	+ $mime =~ s/^image\/.*$/image\/../ ;
	731	+ $mime =~ s/^text\/.*$/text\/../ ;
	732	+ $agent =~ s/%([a-fA-F0-9]{2})/chr(hex($1))/seg;
	733	+
	734	+ next if $agent =~ /<\sscript\s>/i ;
	735	+ next if $agent =~ /MSIE \d+\.\d+/ ; # most likely false positives
	736	+
	737	+ if ($agent =~ /\\|Google ip add?ress/) # typo
	738	+ {
	739	+ $agent =~ s/\\|Google ip add?ress// ;
	740	+ $agent =~ s/GoogleBot/<b><font color=green>GoogleBot<\/font><\/b>/gi ;
	741	+ }
	742	+ if ($agent =~ / \\|no Google ip address/)
	743	+ {
	744	+ $agent =~ s/ \\|no Google ip address// ;
	745	+ $agent =~ s/GoogleBot/<b><font color=red>GoogleBot<\/font><\/b>/gi ;
	746	+ }
	747	+ if ($agent =~ /www\.teesoft\.info/)
	748	+ {
	749	+ $agent =~ s/($(?:X11\|Windows\|Macintosh);[^;];)[^;];[^$]*\)/$1 [lang code]; rv:[..]\)/ ;
	750	+ $agent =~ s/Gecko\/\d+/Gecko\/../ ;
	751	+ $agent =~ s/Firefox\/\d+\.\d\.?\d/Firefox\/../ ;
	752	+ $agent =~ s/(Gecko\/\.\.).*?\(http/$1 etc \(http/ ;
	753	+ }
	754	+
	755	+ $agent =~ s/\+//g ;
	756	+# $agent =~ s/^Mozilla\/\d+\.\d+\s$compatible\s;\s([^$])\)\s*/$1/ ; # Mozilla/5.0 (compatible; xxx) -> xxx
	757	+# $agent =~ s/^Mozilla\/\d+\.\d+\s$\s([^$])\)\s/$1/ ; # Mozilla/5.0 (xxx) -> xxx
	758	+ $agent =~ s/$(http:.?feedfetcher.html)[^$]\)/($1)/ ; # (http://www.google.com/feedfetcher.html; 1 subscribers; feed-id=1894739019218796495)
	759	+ $agent =~ s/FeedFetcher-Google/FeedFetcher-Google/i ;
	760	+ if ($agent !~ /http:/)
	761	+ { $agent =~ s/(bot\|spider\|crawl(?:er)?)/<b>$1<\/b>/gi ; }
	762	+ if ($mime2 eq "text/html")
	763	+ { $total_page_crawlerrequests += $count ; }
	764	+ $crawlers {"$mime\|$agent"} += $count ;
	765	+ }
	766	+ close CSV_CRAWLERS ;
	767	+}
	768	+
	769	+sub ReadInputMethods
	770	+{
	771	+ my $file_csv = "$dir_process/$file_csv_methods" ;
	772	+ if (! -e $file_csv)
	773	+ { abort ("Function ReadInputMethods: file $file_csv not found!!!") ; }
	774	+ open CSV_METHODS, '<', $file_csv ;
	775	+ while ($line = <CSV_METHODS>)
	776	+ {
	777	+ next if $line =~ /^#/ ; # comments
	778	+ next if $line =~ /^:/ ; # csv header (not a comment)
	779	+
	780	+ ($method, $status, $count) = split (',', $line) ;
	781	+ $statusses {"$method,$status"} += $count ;
	782	+ $methods {$method} += $count ;
	783	+ }
	784	+ close CSV_METHODS ;
	785	+}
	786	+
	787	+sub ReadInputMimeTypes
	788	+{
	789	+ my $file_csv = "$dir_process/$file_csv_requests" ;
	790	+ if (! -e $file_csv)
	791	+ { abort ("Function ReadInputMimeTypes: file $file_csv not found!!!") ; }
	792	+ open CSV_REQUESTS, '<', $file_csv ;
	793	+ while ($line = <CSV_REQUESTS>)
	794	+ {
	795	+ next if $line =~ /^#/ ; # comments
	796	+ next if $line =~ /^:/ ; # csv header (not a comment)
	797	+
	798	+ chomp $line ;
	799	+ ($project, $origin, $ext, $mime, $parm, $count) = split (',', $line) ;
	800	+
	801	+ $project = &ExpandAbbreviation ($project) ;
	802	+
	803	+ $mime =~ s/(\w+\.)(\w+\.)(\w+)/$1$2<br>$3/ ;
	804	+ $mime =~ s/opensearchdescription/opensearch-<br>description/ ;
	805	+ if ($project =~ /\./)
	806	+ {
	807	+ $project = '!invalid!' ;
	808	+ if ($origin ne "external")
	809	+ { $origin = 'internal' ; }
	810	+ $ext = ".." ;
	811	+ $mime = ".." ;
	812	+ next ;
	813	+ }
	814	+
	815	+ if ($parms eq "")
	816	+ { $parms = " " ; }
	817	+ $ext =~ s/^([a-z\[\]])[^a-z\[\]].$/$1/g ;
	818	+ $ext =~ s/$(.*)$/ ($1.php)/ ;
	819	+ if ($project eq $origin)
	820	+ { $origin = '⇐' ; }
	821	+
	822	+ if ($project ne "upload")
	823	+ { @counts_prem {"$project,$origin,$ext,$mime"} += $count ; }
	824	+ # if ($project ne "upload")
	825	+ # { @counts_pm {"$project,$mime"} += $count ; }
	826	+
	827	+ $counts_pm {"$project,$mime"} += $count ;
	828	+ ($domain = $project) =~ s/\:.*$// ;
	829	+ $counts_dm {"$domain,$mime"} += $count ;
	830	+ $mimetypes {$mime} += $count ;
	831	+ $projects {$project} += $count ;
	832	+ $domains {$domain} += $count ;
	833	+
	834	+ if ($mime =~ /image\/(?:png\|jpeg\|gif)/)
	835	+ {
	836	+ $images_project {$project} += $count ;
	837	+ $images_domain {$domain} += $count ;
	838	+ }
	839	+ $mimetypes_found {$mime} ++ ;
	840	+ # @counts_prem {"$project,$origin,$ext,$mime"} += $count ;
	841	+
	842	+ $total_mimes += $count ;
	843	+ }
	844	+ close CSV_REQUESTS ;
	845	+
	846	+# $html .= "<tr><th class=c>counts</th><th class=l>project</th><th class=l>origin</th><th class=l>extension</th><th class=l>mime</th></tr>\n" ;
	847	+# $rows = 0 ;
	848	+# foreach $key (sort keys %counts_prem)
	849	+# {
	850	+# ($project, $origin, $ext, $mime) = split (',', $key) ;
	851	+# $count = $counts_prem {$key} ;
	852	+# $count =~ s/^(\d+?)(\d\d\d)$/$1,$2/ ;
	853	+# $html .= "<tr><td class=r>${count},000</td><td class=l>$project</td><td class=l>$origin</td><td class=l>$ext</td><td class=l>$mime</td></tr>\n" ;
	854	+# $rows++ ;
	855	+# }
	856	+# $html .= "</table>\n" ;
	857	+# $html .= "<small>$rows rows written</small><p>" ;
	858	+
	859	+# $html .= "<table border=1>\n" ;
	860	+# $html .= "<tr><th class=c>counts</th><th class=l>project</th><th class=l>mime</th></tr>\n" ;
	861	+# $rows = 0 ;
	862	+# foreach $key (sort keys %counts_pm)
	863	+# {
	864	+# ($project, $mime) = split (',', $key) ;
	865	+# $count = $counts_pm {$key} ;
	866	+# $count =~ s/^(\d+?)(\d\d\d)$/$1,$2/ ;
	867	+# $html .= "<tr><td class=r>${count},000</td><td class=l>$project</td><td class=l>$mime</td></tr>\n" ;
	868	+# $rows++ ;
	869	+# }
	870	+# $html .= "</table>\n" ;
	871	+# $html .= "<small>$rows rows written</small><p>" ;
	872	+}
	873	+
	874	+sub ReadInputOpSys
	875	+{
	876	+ my $file_csv = "$dir_process/$file_csv_opsys" ;
	877	+ if (! -e $file_csv)
	878	+ { abort ("Function ReadInputOpSys: file $file_csv not found!!!") ; }
	879	+ open CSV_OPSYS, '<', $file_csv ;
	880	+ while ($line = <CSV_OPSYS>)
	881	+ {
	882	+ if ($line =~ /^#/) # comments
	883	+ {
	884	+ if ($line =~ /^# mobile:/)
	885	+ {
	886	+ $line =~ s/^.*?: // ;
	887	+ ($month_upd_keywords_mobile = $line) =~ s/^.?$([^$]+)\).$/$1/ ;
	888	+ ($keywords_mobile = $line) =~ s/ $[^$]+\).*$// ;
	889	+ $keywords_mobile =~ s/\\|/, /g ;
	890	+ $keywords_mobile =~ s/((?:[^,]+,){10})/$1<br>/g ;
	891	+ next ;
	892	+ }
	893	+ next ;
	894	+ }
	895	+ next if $line =~ /^:/ ; # csv header (not a comment)
	896	+
	897	+ chomp $line ;
	898	+ ($rectype, $os, $count, $perc) = split (',', $line) ;
	899	+
	900	+ next if $count !~ /^\d+$/ ; # -,Linux Gentoo,,2,0.00% (extra comma !)
	901	+
	902	+ $os =~ s/_/./g ;
	903	+ $os =~ s/\.\./Other/g ;
	904	+ if ($rectype ne "G")
	905	+ {
	906	+ if ($os =~ / \d/)
	907	+ { ; }
	908	+ else
	909	+ { $os =~ s/\// / ; }
	910	+ }
	911	+
	912	+ if ($rectype eq "-") { $total_opsys_non_mobile += $count ; }
	913	+ if ($rectype eq "M") { $total_opsys_mobile += $count ; }
	914	+
	915	+ $opsys {"$rectype,$os"} += $count ;
	916	+ }
	917	+}
	918	+
	919	+
	920	+sub ReadInputOrigins
	921	+{
	922	+ my $file_csv = "$dir_process/$file_csv_origins" ;
	923	+ if (! -e $file_csv)
	924	+ { abort ("Function ReadInputOrigins: file $file_csv not found!!!") ; }
	925	+ open CSV_ORIGINS, '<', $file_csv ;
	926	+ while ($line = <CSV_ORIGINS>)
	927	+ {
	928	+ next if $line =~ /^#/ ; # comments
	929	+ next if $line =~ /^:/ ; # csv header (not a comment)
	930	+
	931	+ chomp $line ;
	932	+ ($source, $origin, $toplevel, $mimecat, $count) = split (',', $line) ;
	933	+
	934	+# test:
	935	+ if (($source eq "external") && ($origin !~ /^google/))
	936	+ { $origin .= $toplevel ; }
	937	+
	938	+# ~ s/xx:upload/upload (~css)/;
	939	+# $origin =~ s/wb:/wikibooks:/;
	940	+# $origin =~ s/wk:/wiktionary:/;
	941	+# $origin =~ s/wn:/wikinews:/;
	942	+# $origin =~ s/wp:/wikipedia:/;
	943	+# $origin =~ s/wq:/wikiquote:/;
	944	+# $origin =~ s/ws:/wikisource:/;
	945	+# $origin =~ s/wv:/wikiversity:/;
	946	+# $origin =~ s/wx://;
	947	+# $origin =~ s/mw:/mediawiki:/;
	948	+# $origin =~ s/wm:/wikimedia:/;
	949	+# $origin =~ s/wmf:/foundation:/;
	950	+# $origin =~ s/:www$/:portal/;
	951	+# $origin =~ s/:mw$/:mediawiki/;
	952	+
	953	+ if ($source eq "internal")
	954	+ {
	955	+ $origin = &ExpandAbbreviation ($origin) ;
	956	+ ($project,$subproject) = split (':', $origin) ;
	957	+ $origin_int_top_split {"$mimecat:$origin"} += $count ;
	958	+ $origin_int_top {$origin} += $count ;
	959	+ $project_int_top_split {"$mimecat:$project"} += $count ;
	960	+ $project_int_top {$project} += $count ;
	961	+ }
	962	+ else
	963	+ {
	964	+# $origin2 = $origin ;
	965	+# $origin2 =~ s/^google.*?\\|/google:ext\|/ ;
	966	+# $origin2 =~ s/^yahoo.*\\|/yahoo:ext\|/ ;
	967	+# if (($origin2 !~ /^google/) && ($origin2 !~ /^yahoo/))
	968	+# { $origin2 =~ s/^.*?\\|/other:ext\|/ ; }
	969	+# ($prefix,$code) = split ('\:', $origin2) ;
	970	+# print "$origin -> $origin2\n" ;
	971	+# $origin_ext_top_split {$origin} += $count ;
	972	+# $origin_ext_top {$code} += $count ;
	973	+
	974	+# if ($origin =~ /\\|page/)
	975	+# {
	976	+# ($prefix,$code) = split ('\:', $origin) ;
	977	+# $code =~ s/\\|.*$// ;
	978	+# $origin =~ s/\\|.*$// ;
	979	+# $origin_ext_page_top_split {$origin} += $count ;
	980	+# $origin_ext_page_top {$code} += $count ;
	981	+# }
	982	+ if ($origin eq "unmatched ip address")
	983	+ { $origin = "origin unknown" ; }
	984	+
	985	+ if ($mimecat eq "page")
	986	+ { $total_page_requests_external += $count ; }
	987	+
	988	+ $origin_ext_top_split {"$mimecat:$origin"} += $count ;
	989	+ $origin_ext_top {$origin} += $count ;
	990	+ $total_origins_external_counted += $count ;
	991	+ # if ($origin =~ /^google/)
	992	+ # {
	993	+ # $origin = "google (total)" ;
	994	+ # $origin_ext_top_split {"$mimecat:$origin"} += $count ;
	995	+ # $origin_ext_top {$origin} += $count ;
	996	+ # }
	997	+ }
	998	+ }
	999	+
	1000	+ close CSV_ORIGINS ;
	1001	+}
	1002	+
	1003	+sub ReadInputScripts
	1004	+{
	1005	+ my $file_csv = "$dir_process/$file_csv_scripts" ;
	1006	+ if (! -e $file_csv)
	1007	+ { abort ("Function ReadInputScripts: file $file_csv not found!!!") ; }
	1008	+ open CSV_SCRIPTS, '<', $file_csv ;
	1009	+ while ($line = <CSV_SCRIPTS>)
	1010	+ {
	1011	+ next if $line =~ /^#/ ; # comments
	1012	+ next if $line =~ /^:/ ; # csv header (not a comment)
	1013	+
	1014	+ chomp $line ;
	1015	+ $line =~ s/\%3B/;/gi ;
	1016	+ $line =~ s/\&/\&/gi ;
	1017	+ ($ext, $script, $parm, $count) = split (',', $line) ;
	1018	+ if ($script =~ /\%/)
	1019	+ { $script = "other" ; }
	1020	+ if ($parm =~ /\%/)
	1021	+ { $parm = "other" ; }
	1022	+
	1023	+ if (($ext eq "php") && ($parm =~ /action=/) && ($parm !~ /search=/)) # action can occur as parm after search
	1024	+ {
	1025	+ @parms = split ('\&', $parm) ;
	1026	+ foreach $parm (@parms)
	1027	+ {
	1028	+ ($keyword,$data) = split ('\=', $parm) ;
	1029	+ if ($keyword eq "action")
	1030	+ { @actions {"$script,$data"} += $count }
	1031	+ }
	1032	+ }
	1033	+ }
	1034	+ close CSV_SCRIPTS ;
	1035	+
	1036	+# foreach $key (keys_sorted_by_value_num_desc %actions)
	1037	+# { print "$key: " . $actions {$key} . "\n" ; }
	1038	+
	1039	+ open CSV_SCRIPTS, '<', "$dir_process/$file_csv_scripts" ;
	1040	+ read_script:
	1041	+ while ($line = <CSV_SCRIPTS>)
	1042	+ {
	1043	+ next if $line =~ /^#/ ; # comments
	1044	+ next if $line =~ /^:/ ; # csv header (not a comment)
	1045	+
	1046	+ chomp $line ;
	1047	+ $line =~ s/\%3B/;/gi ;
	1048	+ $line =~ s/\%5B/[/gi ;
	1049	+ $line =~ s/\%5D/]/gi ;
	1050	+ $line =~ s/\&/\&/gi ;
	1051	+ ($ext, $script, $parm, $count) = split (',', $line) ;
	1052	+
	1053	+ # incomplete validation check on valid names, but captures already lot of rubbish
	1054	+ if ($script =~ /\%/)
	1055	+ { $script = "other" ; }
	1056	+ if ($parm =~ /\%/)
	1057	+ { $parm = "other" ; }
	1058	+
	1059	+ if (($parm =~ /amp;amp;/) \|\|
	1060	+ ($parm =~ /feed=.*feed=/))
	1061	+ { next read_script ; }
	1062	+
	1063	+ if (($ext eq "php") && ($parm =~ /action=/))
	1064	+ {
	1065	+ @parms = split ('\&', $parm) ;
	1066	+ foreach $parm (@parms)
	1067	+ {
	1068	+ ($keyword,$data) = split ('\=', $parm) ;
	1069	+ if ($keyword eq "action")
	1070	+ {
	1071	+ if (@actions {"$script,$data"} < 2)
	1072	+ { next read_script ; }
	1073	+ }
	1074	+ }
	1075	+ }
	1076	+ if ($ext eq "php")
	1077	+ {
	1078	+ # generalize ns10 -> ns.. + remove all ns..=.. but one
	1079	+ $parm =~ s/\&ns\d+/\&ns../g ;
	1080	+ $parm =~ s/\&ns\.\.=\.\./-^-^/ ;
	1081	+ $parm =~ s/\&ns\.\.=\.\.//g ;
	1082	+ $parm =~ s/\-\\^\-\\^/\&ns\.\.=\.\./g ;
	1083	+
	1084	+ # generalize nsargs[]= -> remove all but one
	1085	+ $parm =~ s/\&rsargs\[\]=\.\./-^-^/ ;
	1086	+ $parm =~ s/\&rsargs\[\]=\.\.//g ;
	1087	+ $parm =~ s/\-\\^\-\\^/\&rsargs\[n\]=\.\./g ;
	1088	+
	1089	+ if (length ($parm) > 100)
	1090	+ { $parm =~ s/(.{100}[^\&]*\&)/$1<br>/g ; }
	1091	+
	1092	+ $parms {"$script,$parm"} += $count ;
	1093	+ $scripts_php {$script} += $count ;
	1094	+ }
	1095	+ elsif ($ext eq "js")
	1096	+ { $scripts_js {$script} += $count ; }
	1097	+ elsif ($ext eq "css")
	1098	+ { $scripts_css {$script} += $count ; }
	1099	+ }
	1100	+ close CSV_SCRIPTS ;
	1101	+}
	1102	+
	1103	+sub ReadInputGoogle
	1104	+{
	1105	+ my $file_csv = "$dir_process/$file_csv_google" ;
	1106	+ if (! -e $file_csv)
	1107	+ { abort ("Function ReadInputGoogle: file $file_csv not found!!!") ; }
	1108	+ open CSV_SEARCH, '<', $file_csv ;
	1109	+ while ($line = <CSV_SEARCH>)
	1110	+ {
	1111	+ next if $line =~ /^#/ ; # comments
	1112	+ next if $line =~ /^:/ ; # csv header (not a comment)
	1113	+
	1114	+ chomp $line ;
	1115	+ ($matches, $site, $origin, $service, $agent, $mimecat, $toplevel, $count) = split (',', $line) ;
	1116	+
	1117	+ if ($service eq "Imposters?")
	1118	+ { $service = "GoogleBot?" ; }
	1119	+ if ($service eq "GoogleBotNot?")
	1120	+ { $service = "GoogleBot?" ; }
	1121	+ if ($service eq "Crawler")
	1122	+ { $service = "GoogleBot" ; }
	1123	+
	1124	+ if ($matches =~ /x/)
	1125	+ { $googleIp = 'Y' ; }
	1126	+ else
	1127	+ { $googleIp = 'N' ; }
	1128	+
	1129	+ next if $site ne "google" ;
	1130	+
	1131	+ if ($toplevel eq "-")
	1132	+ { $toplevel = "undefined" ; }
	1133	+ if (length ($toplevel) > 3)
	1134	+ { $toplevel = "_$toplevel" ; } # sort on top
	1135	+
	1136	+ $searches_crawlers {$service} += $count ;
	1137	+ $searches_service {"$service,$googleIp"} += $count ;
	1138	+ $searches_toplevel {$toplevel} += $count ;
	1139	+ $searches_service_mimecat {"$service,$mimecat,$googleIp"} += $count ;
	1140	+ $searches_service_mimecat {"$service,total,$googleIp"} += $count ;
	1141	+ $searches_service_matches {"$service,$matches"} += $count ;
	1142	+
	1143	+# if ($origin =~ /search/i)
	1144	+ if ($toplevel =~ /^[a-zA-Z0-9-]+$/)
	1145	+ { $searches_toplevel_tld_found {$toplevel} += $count ; } # print "$line\n" ;}
	1146	+ else
	1147	+ {
	1148	+ $searches_mimecat_tld_not_found {$mimecat} += $count ;
	1149	+ $searches_mimecat_tld_not_found {"total"} += $count ;
	1150	+ }
	1151	+
	1152	+ $searches_toplevel_mimecat {"$toplevel,$mimecat"} += $count ;
	1153	+ $searches_toplevel_mimecat {"$toplevel,total"} += $count ;
	1154	+
	1155	+# if ($toplevel !~ /:/) { print "invalid toplevel $toplevel\n" ; }
	1156	+ }
	1157	+ close CSV_SEARCH ;
	1158	+}
	1159	+
	1160	+sub ReadInputSkins
	1161	+{
	1162	+ my $file_csv = "$dir_process/$file_csv_skins" ;
	1163	+ if (! -e $file_csv)
	1164	+ { abort ("Function ReadInputSkins: file $file_csv not found!!!") ; }
	1165	+ open CSV_SKINS, '<', $file_csv ;
	1166	+ while ($line = <CSV_SKINS>)
	1167	+ {
	1168	+ next if $line =~ /^#/ ; # comments
	1169	+ next if $line =~ /^:/ ; # csv header (not a comment)
	1170	+
	1171	+ chomp $line ;
	1172	+ ($skins, $count) = split (',', $line) ;
	1173	+
	1174	+ $skins {$skins} += $count ;
	1175	+ ($name,$rest) = split ('\/', $skins, 2) ;
	1176	+ $skin_set {$name}+= $count ;
	1177	+ }
	1178	+ close CSV_SCRIPTS ;
	1179	+}
	1180	+
	1181	+sub ReadInputIndexPhp
	1182	+{
	1183	+ my $file_csv = "$dir_process/$file_csv_indexphp" ;
	1184	+ if (! -e $file_csv)
	1185	+ { abort ("Function ReadInputIndexPhp: file $file_csv not found!!!") ; }
	1186	+ open CSV_INDEXPHP, '<', $file_csv ;
	1187	+ while ($line = <CSV_INDEXPHP>)
	1188	+ {
	1189	+ next if $line =~ /^#/ ; # comments
	1190	+ next if $line =~ /^:/ ; # csv header (not a comment)
	1191	+
	1192	+ chomp $line ;
	1193	+ ($bot,$domain,$referer,$ext,$status,$mime,$parm,$agent) = split (',', $line) ;
	1194	+
	1195	+ my $action = "" ;
	1196	+ if ($parm =~ /action=edit/)
	1197	+ { $action = 'edit' ; }
	1198	+ if ($parm =~ /action=submit/)
	1199	+ { $action = 'submit' ; }
	1200	+
	1201	+ next if $ext !~ /index.php/ ;
	1202	+ next if $parm !~ /action=(?:edit\|submit)(?:$\|\&)/ ; # submit or submit&.., not submitlogin
	1203	+ next if $mime ne "text/html" ; # excludes mime - (undefined), application/x-external-editor on action=edit
	1204	+ # and text/plain, text/xml, application/xml on action=submit
	1205	+
	1206	+ if ($bot =~ /Y/)
	1207	+ {
	1208	+ $intent = "" ;
	1209	+
	1210	+ if ($agent =~ /DotNetWikiBot/i)
	1211	+ { $agent = "DotNetWikiBot" ; }
	1212	+ $agent =~ s/\%27/\'/g ;
	1213	+ # $agent =~ s/$.*?$//g;
	1214	+
	1215	+ if ($action eq "edit")
	1216	+ {
	1217	+ if ($referer =~ /^\w\w:/)
	1218	+ { $referer = "int" ; }
	1219	+ $edit_submit_bot {$domain} {"edit,$referer"} ++ ;
	1220	+ $edit_submit_bot_sort {$domain} ++ ;
	1221	+ $edit_submit_bot_agent {$agent} {"$action,$referer"}++ ;
	1222	+ $edit_submit_bot_agent_sort {$agent}++ ;
	1223	+ }
	1224	+
	1225	+ if ($action eq "submit")
	1226	+ {
	1227	+ if ($referer =~ /^\w\w:/)
	1228	+ { $referer = "int" ; }
	1229	+
	1230	+ $intent = 'unknown' ;
	1231	+ if ($status eq "TCP_MISS/302") { $intent = 'save' ; }
	1232	+ elsif ($status eq "TCP_MISS/200") { $intent = 'preview' ; }
	1233	+ # next if $intent ne 'save' ;
	1234	+
	1235	+ $edit_submit_bot {$domain} {"$intent,$referer"} ++ ;
	1236	+ $edit_submit_bot_sort {$domain} ++ ;
	1237	+
	1238	+ # if ($referer eq "-") { $edit_submit_bot_agent {$agent}++ ; }
	1239	+ $edit_submit_bot_agent {$agent} {"$intent,$referer"}++ ;
	1240	+ $edit_submit_bot_agent_sort {$agent}++ ;
	1241	+ }
	1242	+ }
	1243	+
	1244	+ next if $bot =~ /N/ ; # 2009-05 /N/ -> total oldid: 127, total other: 54, total redlink: 4
	1245	+ next if $bot =~ /Y/ ; # 2009-05 /N/ -> total oldid: 127, total other: 54, total redlink: 4
	1246	+ next if $domain ne "wp:en" ; # 2009-05 ne -> total other: 26, total redlink: 22
	1247	+ # if (($referer ne "-") && ($referer ne "ext") && ($referer ne "wp:en")) { next ; }
	1248	+ # if (($referer ne "-") && ($referer !~ /^..:/)) { $referer = "ext" ; }
	1249	+ # if ($referer eq "-") { $referer = "- " ; }
	1250	+ next if $referer ne "wp:en" ; # 2009-05 eq -> # total other: 2014, total redlink: 1031, total oldid: 47, total undo: 30
	1251	+
	1252	+ my $filter = '' ;
	1253	+ if ($parm =~ /action=edit/)
	1254	+ {
	1255	+ $filter = 'other' ;
	1256	+ if ($parm =~ /redlink/) { $filter = 'redlink' ; }
	1257	+ if ($parm =~ /oldid=/) { $filter = 'oldid' ; }
	1258	+ if ($parm =~ /undo=/) { $filter = 'undo' ; }
	1259	+
	1260	+ $edit_submit {"[$bot $referer $action $filter] $parm"}++ ;
	1261	+ $edit_submits {"$filter"}++ ;
	1262	+ }
	1263	+ if ($parm =~ /action=submit/)
	1264	+ {
	1265	+ $edit_submit {"$bot $referer $action $status"}++ ;
	1266	+ }
	1267	+
	1268	+ # my @subparms = split ('\&', $parm) ;
	1269	+ # foreach $subparm (@subparms)
	1270	+ # { $edit_submit_subparms {"[$action] [$filter] $subparm"}++ ; }
	1271	+ }
	1272	+ close CSV_INDEXPHP ;
	1273	+
	1274	+# next if $bot =~ /N/ ; # + any referrer ->
	1275	+# Sample period: 1 May 2009 - 31 May 2009 => for daily averages multiplier = 0.03
	1276	+# 9: [bot=Y - edit oldid] action=edit&oldid=&section=&title=..
	1277	+# 3: [bot=Y - edit oldid] action=edit&oldid=..&title=..
	1278	+# 17: [bot=Y - edit oldid] action=edit&oldid=..&title=..&useskin=..
	1279	+# 1: [bot=Y - edit other] _herbs&action=edit&title=..
	1280	+# 65: [bot=Y - edit other] action=edit&section=..&title=..
	1281	+# 1: [bot=Y - edit other] action=edit&stub&title=..
	1282	+# 2: [bot=Y - edit other] action=edit&title=
	1283	+# 188: [bot=Y - edit other] action=edit&title=..
	1284	+# 31: [bot=Y - edit other] action=edit&title=..&useskin=..
	1285	+# 30: [bot=Y - edit redlink] action=edit&redlink=..&title=..
	1286	+# 5: [bot=Y - edit undo] action=edit&title=..&undo=..&undoafter=..
	1287	+# 14: [bot=Y ext edit other] action=edit&section=..&title=..
	1288	+# 5: [bot=Y ext edit other] action=edit&title=..
	1289	+# 11: [bot=Y ext edit redlink] action=edit&redlink=..&title=..
	1290	+# 2: [bot=Y ext edit undo] action=edit&title=..&undo=..&undoafter=..
	1291	+# 107: [bot=Y wp:en edit oldid] action=edit&oldid=&section=&title=..
	1292	+# 3: [bot=Y wp:en edit oldid] action=edit&oldid=..&section=&title=..
	1293	+# 17: [bot=Y wp:en edit oldid] action=edit&oldid=..&title=..
	1294	+# 1: [bot=Y wp:en edit other] action=edit&articleget=..&dykcredittab=..&editintro=..&preload=..&preloadtitle=..&section=..&title=..
	1295	+# 5: [bot=Y wp:en edit other] action=edit&section=..&title=..
	1296	+# 48: [bot=Y wp:en edit other] action=edit&title=..
	1297	+# 4: [bot=Y wp:en edit redlink] action=edit&redlink=..&title=..
	1298	+# 9: bot=Y - submit TCP_MISS/200
	1299	+# 62: bot=Y - submit TCP_MISS/302
	1300	+# 31: bot=Y wp:en submit TCP_MISS/302
	1301	+# total other: 361
	1302	+# total oldid: 156
	1303	+# total redlink: 45
	1304	+# total undo: 7
	1305	+}
	1306	+
	1307	+sub ReadInputCountriesTimed
	1308	+{
	1309	+ my $file_csv = "$dir_process/$file_csv_countries_timed" ;
	1310	+ if (! -e $file_csv)
	1311	+ { abort ("Function ReadInputSkins: file $file_csv not found!!! ") ; }
	1312	+ open CSV_COUNTRIES, '<', $file_csv ;
	1313	+ while ($line = <CSV_COUNTRIES>)
	1314	+ {
	1315	+ next if $line =~ /^#/ ; # comments
	1316	+ next if $line =~ /^:/ ; # csv header (not a comment)
	1317	+
	1318	+ chomp $line ;
	1319	+ ($bot,$target,$country,$time,$count) = split (',', $line) ;
	1320	+
	1321	+ next if $target !~ /^wp/ ; # wikipedia only
	1322	+
	1323	+ if ($bot =~ /Y/)
	1324	+ { $bot = 'Y' }
	1325	+ else
	1326	+ { $bot = 'N' }
	1327	+ $countries {$country} ++ ;
	1328	+ $targets {$target} ++ ;
	1329	+ $times {$time} ++ ;
	1330	+ $countries_timed {"$bot,$target,$country,$time"} += $count ;
	1331	+ $countries_totals {"$bot,$target"}{$country} += $count ;
	1332	+ $targets_totals {"$bot,$country"}{$target} += $count ;
	1333	+ }
	1334	+ close CSV_COUNTRIES ;
	1335	+}
	1336	+
	1337	+sub ReadInputCountriesNames
	1338	+{
	1339	+ $path_csv_country_codes = "$path_in/$file_csv_country_codes" ;
	1340	+ if (! -e $path_csv_country_codes) { abort ("Input file $path_csv_country_codes not found!") ; }
	1341	+
	1342	+ open CSV_COUNTRY_CODES, '<', $path_csv_country_codes ;
	1343	+ $country_names {"--"} = "Unknown" ;
	1344	+ while ($line = <CSV_COUNTRY_CODES>)
	1345	+ {
	1346	+ chomp $line ;
	1347	+
	1348	+ next if $line =~ /^#/ ;
	1349	+
	1350	+ ($country_code,$region_code,$north_south_code,$country_name) = split (',', $line,4) ;
	1351	+ $region_codes {$country_code} = $region_code ;
	1352	+ $north_south_codes {$country_code} = $north_south_code ;
	1353	+
	1354	+ $country_name =~ s/"//g ;
	1355	+
	1356	+ next if $country_name eq "Anonymous Proxy" ;
	1357	+ next if $country_name eq "Satellite Provider" ;
	1358	+ next if $country_name eq "Other Country" ;
	1359	+ next if $country_name eq "Asia/Pacific Region" ;
	1360	+ next if $country_name eq "Europe" ;
	1361	+
	1362	+# if ($country_meta_info {$country} eq "")
	1363	+# {
	1364	+# if ($country_meta_info_not_found_reported {$country} ++ == 0)
	1365	+# { print "Meta info not found for country '$country'\n" ; }
	1366	+# }
	1367	+
	1368	+ $country_names {$country_code} = $country_name ;
	1369	+ $country_codes_all {"$country_name\|$country_code"} ++ ;
	1370	+ }
	1371	+}
	1372	+
	1373	+sub ReadInputCountriesMeta
	1374	+{
	1375	+ # http://en.wikipedia.org/wiki/List_of_countries_by_population
	1376	+ # http://en.wikipedia.org/wiki/List_of_countries_by_number_of_Internet_users
	1377	+ open COUNTRY_META_INFO, '<', "$path_in/$file_csv_country_meta_info" ;
	1378	+ while ($line = <COUNTRY_META_INFO>)
	1379	+ {
	1380	+ chomp $line ;
	1381	+ ($country,$link,$population,$connected,$icon) = split ',', $line ;
	1382	+print "$line\n" ; # qqq
	1383	+ $country =~ s/,/,/g ;
	1384	+
	1385	+ # use country names as given by MaxMind
	1386	+ $country =~ s/Brunei/Brunei Darussalam/ ;
	1387	+ $country =~ s/C..?te d'Ivoire/Cote d'Ivoire/ ;
	1388	+ $country =~ s/Congo, The Democratic Republic of the/Republic of the Congo/ ;
	1389	+ $country =~ s/Dem. Rep. of Congo/Congo - The Democratic Republic of the/ ;
	1390	+ $country =~ s/East timor/Timor-Leste/ ;
	1391	+ $country =~ s/Guyane/French Guiana/ ;
	1392	+ $country =~ s/Iran/Iran, Islamic Republic of/ ;
	1393	+ $country =~ s/Laos/Lao People's Democratic Republic/ ;
	1394	+ $country =~ s/Libya/Libyan Arab Jamahiriya/ ;
	1395	+ $country =~ s/Macau/Macao/ ;
	1396	+ $country =~ s/Moldova/Moldova, Republic of/ ;
	1397	+ $country =~ s/North Korea/Korea, Republic of/ ;
	1398	+ $country =~ s/Palestine/Palestinian Territory/ ;
	1399	+ $country =~ s/Republic of the Congo/Congo/ ;
	1400	+ $country =~ s/Russia/Russian Federation/ ;
	1401	+ $country =~ s/North Korea/Korea, Democratic People's Republic of/ ;
	1402	+ $country =~ s/South Korea/Korea, Republic of/ ;
	1403	+ $country =~ s/Syria/Syrian Arab Republic/ ;
	1404	+ $country =~ s/Tanzania/Tanzania, United Republic of/ ;
	1405	+ $country =~ s/U.S. Virgin Islands/Virgin Islands, British/ ;
	1406	+ $country =~ s/Vatican City/Holy See (Vatican City State)/ ;
	1407	+ $country =~ s/^Korea$/South Korea/ ;
	1408	+
	1409	+ $connected =~ s/connected/../g ;
	1410	+ $country_meta_info {$country} = "$link,$population,$connected,$icon" ;
	1411	+print "meta info found for '$country'\n" ; # qqq
	1412	+
	1413	+ if ($country eq "United States")
	1414	+ { ($connected_us = $connected) =~ s/_//g ; }
	1415	+ }
	1416	+ close COUNTRY_META_INFO ;
	1417	+}
	1418	+
	1419	+sub CollectRegionCounts
	1420	+{
	1421	+ my ($country_code, $region_code, $north_south_code, $country_name) ;
	1422	+
	1423	+ foreach $country_code (keys %country_names)
	1424	+ {
	1425	+ $country_name = $country_names {$country_code} ;
	1426	+ $country_meta = $country_meta_info {$country_name} ;
	1427	+ my ($link,$population,$connected,$icon) = split (',', $country_meta) ;
	1428	+
	1429	+ $region_code = $region_codes {$country_code} ;
	1430	+ $north_south_code = $north_south_codes {$country_code} ;
	1431	+
	1432	+ $population =~ s/_//g ;
	1433	+ $connected =~ s/_//g ;
	1434	+
	1435	+ $population_tot += $population ;
	1436	+ $connected_tot += $connected ;
	1437	+
	1438	+ $population_per_region {$region_code} += $population ;
	1439	+ $connected_per_region {$region_code} += $connected ;
	1440	+
	1441	+ $population_per_region {$north_south_code} += $population ;
	1442	+ $connected_per_region {$north_south_code} += $connected ;
	1443	+
	1444	+ # print "CODE $country_code NAME $country_name POP $population, $CONN $connected REGION $region_code NS $north_south_code PPR ${population_per_region {$region_code}}\n" ;
	1445	+ }
	1446	+}
	1447	+
	1448	+sub ReadInputCountriesMonthly
	1449	+{
	1450	+ my $project_mode = shift ;
	1451	+
	1452	+ undef %yyyymm_ ;
	1453	+ undef %quarters ;
	1454	+ undef %requests_unknown_per_quarter ;
	1455	+ undef %country_codes ;
	1456	+ undef %requests_all ;
	1457	+ undef %requests_all_per_period ;
	1458	+ undef %requests_per_quarter ;
	1459	+ undef %requests_per_country ;
	1460	+ undef %requests_per_quarter_per_country ;
	1461	+ undef %requests_per_country_per_language ;
	1462	+ undef %requests_per_language_per_country ;
	1463	+ undef %requests_per_quarter_per_country_per_language ;
	1464	+ undef %requests_per_month_per_country_code ;
	1465	+ undef %requests_per_month_us ;
	1466	+ undef %descriptions_per_period ;
	1467	+ undef %requests_recently_all ;
	1468	+ undef %requests_recently_per_country_code ;
	1469	+ undef %requests_recently_per_country ;
	1470	+ undef %requests_recently_per_country_per_language ;
	1471	+ undef %requests_recently_per_language_per_country ;
	1472	+ undef %requests_recently_per_language ;
	1473	+ undef %months_recently ;
	1474	+
	1475	+ $requests_recently_start = "999999" ;
	1476	+ $requests_recently_stop = "000000" ;
	1477	+ $requests_start = "999999" ;
	1478	+ $requests_stop = "000000" ;
	1479	+
	1480	+ $requests_all = 0 ;
	1481	+ $requests_recently_all = 0 ;
	1482	+
	1483	+ my ($sec,$min,$hour,$day,$report_month,$report_year) = localtime (time) ;
	1484	+ $report_year += 1900 ;
	1485	+ $report_month ++ ;
	1486	+
	1487	+ print "Process project $project_mode\n\n" ;
	1488	+
	1489	+ open CSV_SQUID_COUNTS_MONTHLY, '<', $path_csv_squid_counts_monthly ;
	1490	+ while ($line = <CSV_SQUID_COUNTS_MONTHLY>)
	1491	+ {
	1492	+ chomp $line ;
	1493	+ $line =~ s/,\s+/,/g ;
	1494	+ $line =~ s/\s+,/,/g ;
	1495	+ ($yyyymm,$project,$language,$code,$bot,$count) = split (',', $line) ;
	1496	+
	1497	+ ($code,$language) = &NormalizeSquidInput ($code,$language) ;
	1498	+ $country = &GetCountryName ($code) ;
	1499	+
	1500	+ next if &DiscardSquidInput ($bot,$project,$project_mode,$code,$language) ;
	1501	+
	1502	+ # $yyyymm = "2009-12" ;
	1503	+ $yyyymm_ {$yyyymm} ++ ;
	1504	+
	1505	+ $year = substr ($yyyymm,0,4) ;
	1506	+ $month = substr ($yyyymm,5,2) ;
	1507	+ # print "year $year report_year month $month $report_year $report_month\n" ;
	1508	+
	1509	+ $recently = $false ;
	1510	+
	1511	+ if (($year == $report_year) or (($year == $report_year - 1) && ($month >= $report_month))) # last 12 months
	1512	+ { $recently = $true ; }
	1513	+
	1514	+ if ($month <= 3) { $quarter = $year . ' Q1' ; }
	1515	+ elsif ($month <= 6) { $quarter = $year . ' Q2' ; }
	1516	+ elsif ($month <= 9) { $quarter = $year . ' Q3' ; }
	1517	+ else { $quarter = $year . ' Q4' ; }
	1518	+
	1519	+ if ($quarter_only ne '')
	1520	+ { next if $quarter ne $quarter_only ; }
	1521	+
	1522	+ # if ($views_edits eq 'Page Edits')
	1523	+
	1524	+ $quarters {$quarter} ++ ;
	1525	+
	1526	+ if (($country =~ /\?/) \|\| ($country =~ /unknown/i))
	1527	+ { $requests_unknown_per_quarter {$quarter} += $count ; next ; }
	1528	+ $country_codes {"$country\|$code"}++ ;
	1529	+ $requests_all += $count ;
	1530	+ $requests_all_per_period {$yyyymm} += $count ;
	1531	+ $requests_per_quarter {$quarter} += $count ;
	1532	+ $requests_per_country {$country} += $count ;
	1533	+
	1534	+ $requests_per_quarter_per_country {$quarter} {$country} += $count ;
	1535	+ $requests_per_country_per_language {$country} {$language} += $count ;
	1536	+ $requests_per_language_per_country {$language} {$country} += $count ;
	1537	+ $requests_per_quarter_per_country_per_language {$quarter} {$country} {$language} += $count ;
	1538	+ $requests_per_month_per_country_code {$yyyymm} {"$country\|$code"} += $count ;
	1539	+
	1540	+ if ($code eq "US")
	1541	+ {$requests_per_month_us {$yyyymm} += $count ; }
	1542	+
	1543	+ $descriptions_per_period {$yyyymm} = $yyyymm ;
	1544	+ if ($yyyymm lt $requests_start) { $requests_start = $yyyymm ; }
	1545	+ if ($yyyymm gt $requests_stop) { $requests_stop = $yyyymm ; }
	1546	+
	1547	+ if ($recently)
	1548	+ {
	1549	+ if ($yyyymm lt $requests_recently_start) { $requests_recently_start = $yyyymm ; }
	1550	+ if ($yyyymm gt $requests_recently_stop) { $requests_recently_stop = $yyyymm ; }
	1551	+
	1552	+ $months_recently {$yyyymm}++ ;
	1553	+ $requests_recently_all += $count ;
	1554	+ $requests_recently_per_country_code {"$country\|$code"} += $count ;
	1555	+ $requests_recently_per_country {$country} += $count ;
	1556	+ $requests_recently_per_country_per_language {$country} {$language} += $count ;
	1557	+ $requests_recently_per_language_per_country {$language} {$country} += $count ;
	1558	+ $requests_recently_per_language {$language} += $count ;
	1559	+ }
	1560	+ }
	1561	+
	1562	+ print "\n" ;
	1563	+ @quarters = keys_sorted_alpha_desc %quarters ;
	1564	+ foreach $quarter (@quarters)
	1565	+ {
	1566	+ print "Quarter $quarter: requests: " . (0+$requests_per_quarter {$quarter}) . "\n" ;
	1567	+ if ($requests_per_quarter {$quarter} == 0)
	1568	+ { abort ("No known requests found for quarter $quarter") ; }
	1569	+ }
	1570	+ print "\n" ;
	1571	+
	1572	+ $months_recently = keys %months_recently ;
	1573	+ if ($months_recently == 0) { die "\$months_recently == 0\n" ; }
	1574	+
	1575	+ $requests_recently_start = substr ($requests_recently_start,5,2) . "/" . substr ($requests_recently_start,2,2) ;
	1576	+ $requests_recently_stop = substr ($requests_recently_stop ,5,2) . "/" . substr ($requests_recently_stop ,2,2) ;
	1577	+ $requests_start = substr ($requests_start,5,2) . "/" . substr ($requests_start,2,2) ;
	1578	+ $requests_stop = substr ($requests_stop ,5,2) . "/" . substr ($requests_stop ,2,2) ;
	1579	+
	1580	+ foreach $yyyymm (keys %$yyyymm)
	1581	+ {
	1582	+ if ($requests_per_month_us {$week} > $max_requests_per_month_us)
	1583	+ { $max_requests_per_month_us = $requests_per_month_us {$week} ; }
	1584	+ }
	1585	+
	1586	+ # die "\$connected_us == 0" if $connected_us == 0 ;
	1587	+ if ($connected_us > 0)
	1588	+ { $max_requests_per_connected_us_month = sprintf ("%.1f", $max_requests_per_month_us / $connected_us) ; }
	1589	+
	1590	+# foreach $country_code (sort keys %country_codes_all)
	1591	+# {
	1592	+# $200907 = ${$requests_per_month_per_country_code {"200907"}} {$country_code} ;
	1593	+# $200908 = ${$requests_per_month_per_country_code {"200908"}} {$country_code} ;
	1594	+# $200909 = ${$requests_per_month_per_country_code {"200909"}} {$country_code} ;
	1595	+# $200910 = ${$requests_per_month_per_country_code {"200910"}} {$country_code} ;
	1596	+# $200911 = ${$requests_per_month_per_country_code {"200911"}} {$country_code} ;
	1597	+# $200912 = ${$requests_per_month_per_country_code {"200912"}} {$country_code} ;
	1598	+# print "$country_code, $200907, $200908, $200909, $200910, $200911, $200912\n" ;
	1599	+# }
	1600	+# exit ;
	1601	+}
	1602	+
	1603	+sub ReadInputCountriesDaily
	1604	+{
	1605	+ # http://en.wikipedia.org/wiki/List_of_countries_by_population
	1606	+ # http://en.wikipedia.org/wiki/List_of_countries_by_number_of_Internet_users
	1607	+
	1608	+ my $project_mode = shift ;
	1609	+
	1610	+ undef %country_codes_found ;
	1611	+ undef %weeknum_this_years ;
	1612	+ undef %descriptions_per_period ;
	1613	+ undef %days_in_input_for_week ;
	1614	+ undef %requests_all_per_period ;
	1615	+ undef %requests_per_week_per_country_code ;
	1616	+ undef %requests_per_week_us ;
	1617	+ undef %missing_days ;
	1618	+ undef %correct_for_missing_days ;
	1619	+ undef %changes_per_week_per_country_code ;
	1620	+
	1621	+# $requests_recently_start = "999999" ;
	1622	+# $requests_recently_stop = "000000" ;
	1623	+
	1624	+# $time_2000_01_01 = timegm(0,0,0,1,1-1,2000-1900) ;
	1625	+ $sec_per_day = 24 * 60 * 60 ;
	1626	+
	1627	+ my ($sec,$min,$hour,$day,$report_month,$report_year) = localtime (time) ;
	1628	+ $report_year += 1900 ;
	1629	+ $report_month ++ ;
	1630	+
	1631	+ print "Process project $project_mode\n\n" ;
	1632	+
	1633	+ $yyyymmdd_prev = "" ;
	1634	+ open CSV_SQUID_COUNTS_DAILY, '<', $path_csv_squid_counts_daily ;
	1635	+ while ($line = <CSV_SQUID_COUNTS_DAILY>)
	1636	+ {
	1637	+ chomp $line ;
	1638	+ ($yyyymmdd,$project,$language,$code,$bot,$count) = split (',', $line) ;
	1639	+
	1640	+ die "\$yyyymmdd $yyyymmdd lt \$yyyymmdd_prev $yyyymmdd_prev" if $yyyymmdd lt $yyyymmdd_prev ;
	1641	+ $yyyymmdd_prev = $yyyymmdd ;
	1642	+
	1643	+ ($code,$language) = &NormalizeSquidInput ($code,$language) ;
	1644	+ $country = &GetCountryName ($code) ;
	1645	+
	1646	+ $country_codes_found {"$country\|$code"} ++ ;
	1647	+
	1648	+ next if &DiscardSquidInput ($bot,$project,$project_mode,$code,$language) ;
	1649	+
	1650	+ # $yyyymmdd = "2009-12-01" ;
	1651	+ $yyyymmdd_ {$yyyymmdd} ++ ;
	1652	+
	1653	+ $year = substr ($yyyymmdd,0,4) ;
	1654	+ $month = substr ($yyyymmdd,5,2) ;
	1655	+ $day = substr ($yyyymmdd,8,2) ;
	1656	+
	1657	+ $time = timegm(0,0,0,$day,$month-1,$year-1900) ;
	1658	+ # $days_since_2000 = int (($time - $time_2000_01_01) / $sec_per_day) ;
	1659	+ $days_this_year = (gmtime $time) [7] ;
	1660	+ $weeknum_this_year = int ($days_this_year / 7) + 1 ;
	1661	+ $weeknum_since_2000 = $year . sprintf ("%02d",$weeknum_this_year) ; # * int ($days_since_2000 / 7) + 1 ;
	1662	+
	1663	+ $weeknum_this_years {"$weeknum_this_year - $weeknum_since_2000"}++ ;
	1664	+
	1665	+ $descriptions_per_period {$weeknum_since_2000} = "week $weeknum_this_year - " . month_english_short ($month-1) . " $year" ;
	1666	+ $days_in_input_for_week {$weeknum_since_2000} {$yyyymmdd} ++ ;
	1667	+
	1668	+ $requests_all_per_period {$weeknum_since_2000} += $count ;
	1669	+ $requests_per_week_per_country_code {$weeknum_since_2000} {"$country\|$code"} += $count ;
	1670	+
	1671	+ if ($code eq "US")
	1672	+ {$requests_per_week_us {$weeknum_since_2000} += $count ; }
	1673	+
	1674	+ # last if ($weeknum_since_2000 == 501) ; # test
	1675	+ }
	1676	+
	1677	+ foreach $week (sort keys %weeknum_this_years)
	1678	+ { print "week $week " . $weeknum_this_years {$week} . "\n" ; }
	1679	+
	1680	+ foreach $week (sort {$a <=> $b} keys %days_in_input_for_week)
	1681	+ {
	1682	+ @keys = keys %{$requests_per_week_per_country_code {$week-1}} ;
	1683	+ if (@keys == 0)
	1684	+ {
	1685	+ # print "skip week $week: no data for previous week available.\n" ;
	1686	+ next ;
	1687	+ }
	1688	+
	1689	+ if ($requests_per_week_us {$week} > $max_requests_per_week_us)
	1690	+ { $max_requests_per_week_us = $requests_per_week_us {$week} ; }
	1691	+
	1692	+ $desc= $week_descriptions {$week} ;
	1693	+ @days = keys %{$days_in_input_for_week {$week}} ;
	1694	+ $daycount = @days ;
	1695	+ $missing_days {$week} = 7 - $daycount ;
	1696	+ $correct_for_missing_days {$week} = 7 / $daycount ;
	1697	+ # print "Week $week: $desc: $daycount " . (join ' - ', @days) . " ${correct_for_missing_days {$week}}\n" ;
	1698	+ # foreach $country_code (keys %{$requests_per_week_per_country_code {$week}})
	1699	+
	1700	+ foreach $country_code (keys %country_codes_all)
	1701	+ {
	1702	+ $new = &CorrectForMissingDays ($week , ${$requests_per_week_per_country_code {$week }} {$country_code}) ;
	1703	+ $old = &CorrectForMissingDays ($week-1, ${$requests_per_week_per_country_code {$week-1}} {$country_code}) ;
	1704	+
	1705	+ # print "country_code $country_code\n" ;
	1706	+ if ($old == 0)
	1707	+ {
	1708	+ if ($new > 0)
	1709	+ {
	1710	+ # print "$country_code: no data for prev week\n" ;
	1711	+ $changes_per_week_per_country_code {$week} {$country_code} = 100 ;
	1712	+ }
	1713	+ }
	1714	+ else
	1715	+ {
	1716	+ $delta = sprintf ("%.1f", 100 * sqrt ($new / $old)) ;
	1717	+ if ($delta < 0) { $delta = 0 ; }
	1718	+ if ($delta > 200) { $delta = 200 ; }
	1719	+ $changes_per_week_per_country_code {$week} {$country_code} = $delta ;
	1720	+ $country_code =~ s/,/;/g ;
	1721	+ push @trace, "$country_code, $week, $old, $new, $delta\n" ;
	1722	+ }
	1723	+
	1724	+ }
	1725	+ }
	1726	+ open TRACE, '>', "svg/SquidReportPageViewsPerCountryTrend.csv" ;
	1727	+ print TRACE sort @trace ;
	1728	+ close TRACE ;
	1729	+
	1730	+ # die "\$connected_us == 0" if $connected_us == 0 ;
	1731	+ if ($connected_us > 0)
	1732	+ { $max_requests_per_connected_us_week = sprintf ("%.1f", (($max_requests_per_week_us * 1000) / $connected_us)) ; }
	1733	+}
	1734	+
	1735	+sub NormalizeSquidInput
	1736	+{
	1737	+ my ($code,$language) = @_ ;
	1738	+
	1739	+ if ($language eq "jp") { $language = "ja" ; }
	1740	+ if ($language eq "cz") { $language = "cs" ; }
	1741	+
	1742	+ # following are part of France, according to Wikipedia, List_of_countries_by_population
	1743	+ if ($code eq 'BL') { $code = 'FR' ; } # Saint Barth�lemy
	1744	+ if ($code eq 'MF') { $code = 'FR' ; } # Saint Martin
	1745	+ if ($code eq 'MQ') { $code = 'FR' ; } # Martinique
	1746	+ if ($code eq 'NC') { $code = 'FR' ; } # New Caledonia
	1747	+ if ($code eq 'PF') { $code = 'FR' ; } # French Polynesia
	1748	+ if ($code eq 'PM') { $code = 'FR' ; } # Saint Pierre and Miquelon
	1749	+ if ($code eq 'WF') { $code = 'FR' ; } # Wallis and Futuna
	1750	+ if ($code eq 'YT') { $code = 'FR' ; } # Mayotte
	1751	+
	1752	+ return ($code,$language) ;
	1753	+}
	1754	+
	1755	+sub DiscardSquidInput
	1756	+{
	1757	+ ($bot,$project,$project_mode,$code,$language) = @_ ;
	1758	+ if ($bot ne "U" or # user
	1759	+ $project ne $project_mode or # eg 'wp'
	1760	+ $language eq "upload" or
	1761	+ $language =~ /mobile/i or
	1762	+ $code eq "A1" or # Anonymous Proxy
	1763	+ $code eq "A2" or # Satellite Provider
	1764	+ $code eq "AP" or # Asia/Pacific Region
	1765	+ $code eq "EU") # Europe
	1766	+ {
	1767	+ # print "bot $bot project '$project' project_mode $project_mode code $code language $language\n" ;
	1768	+ return ($true) ;
	1769	+ }
	1770	+
	1771	+ return ($false) ;
	1772	+}
	1773	+
	1774	+sub GetCountryName
	1775	+{
	1776	+ my $code = shift ;
	1777	+ if ($country_names {$code} eq "")
	1778	+ {
	1779	+ $country = "$code (?)" ;
	1780	+ if ($country_code_not_specified_reported {$code}++ == 0)
	1781	+ { print "Country name not specified for $code\n" ; }
	1782	+ }
	1783	+ else
	1784	+ { $country = $country_names {$code} ; }
	1785	+ return ($country) ;
	1786	+}
	1787	+
	1788	+sub ReadInputBrowserLanguages
	1789	+{
	1790	+ my $file_csv = "$dir_process/$file_csv_browser_languages" ;
	1791	+ if (! -e $file_csv)
	1792	+ { abort ("Function ReadInputBrowserLanguages: file $file_csv not found!!! ") ; }
	1793	+ open CSV_BROWSER_LANGUAGES, '<', $file_csv ;
	1794	+ while ($line = <CSV_BROWSER_LANGUAGES>)
	1795	+ {
	1796	+ next if $line =~ /^#/ ; # comments
	1797	+ next if $line =~ /^:/ ; # csv header (not a comment)
	1798	+
	1799	+ chomp $line ;
	1800	+ ($browser,$language,$count) = split (',', $line) ;
	1801	+
	1802	+ $browser_languages {"$browser,$language"} += $count ;
	1803	+ }
	1804	+ close CSV_BROWSER_LANGUAGES ;
	1805	+}
	1806	+
	1807	+sub CalcPercentages
	1808	+{
	1809	+ my $total_opsys = $total_opsys_mobile + $total_opsys_non_mobile ;
	1810	+ foreach $key (keys %opsys)
	1811	+ { $opsys_perc {$key} = sprintf ("%.2f",(100*$opsys {$key}/$total_opsys)) . "%" ; }
	1812	+
	1813	+ foreach $key (keys %clients)
	1814	+ { $clients_perc {$key} = sprintf ("%.2f",(100*$clients {$key}/$total_clients)) . "%" ; }
	1815	+
	1816	+ foreach $key (keys %clientgroups)
	1817	+ {
	1818	+ $perc = 100*$clientgroups {$key}/$total_clients ;
	1819	+ if ($key =~ /^M/)
	1820	+ { $perc_threshold = 0.005 ; }
	1821	+ else
	1822	+ { $perc_threshold = 0.02 ; }
	1823	+
	1824	+ if ($perc > $perc_threshold)
	1825	+ { $clientgroups_perc {$key} = sprintf ("%.2f",$perc) . "%" ; }
	1826	+ else
	1827	+ {
	1828	+ ($mobile,$group) = split (',', $key) ;
	1829	+ $clientgroups_other {$mobile} += $clientgroups {$key} ;
	1830	+ $clientgroups {$key} = 0 ;
	1831	+ }
	1832	+ }
	1833	+}
	1834	+
	1835	+sub NormalizeCounts
	1836	+{
	1837	+# ReadInputClients
	1838	+ foreach $key (keys %engines)
	1839	+ { $engines {$key} = &Normalize ($engines {$key}) ; }
	1840	+
	1841	+ foreach $key (keys %clientgroups)
	1842	+ { $clientgroups {$key} = &Normalize ($clientgroups {$key}) ; }
	1843	+
	1844	+ foreach $key (keys %clients)
	1845	+ { $clients {$key} = &Normalize ($clients {$key}) ; }
	1846	+
	1847	+ foreach $key (keys %clientgroups_other)
	1848	+ { $clientgroups_other {$key} = &Normalize ($clientgroups_other {$key}) ; }
	1849	+
	1850	+ foreach $key (keys %total_clientgroups)
	1851	+ { $total_clientgroups {$key} = &Normalize ($total_clientgroups {$key}) ; }
	1852	+
	1853	+ foreach $key (keys %total_engines)
	1854	+ { $total_engines {$key} = &Normalize ($total_engines {$key}) ; }
	1855	+
	1856	+ foreach $key (keys %webkit_engines)
	1857	+ { $webkit_engines {$key} = &Normalize ($webkit_engines {$key}) ; }
	1858	+
	1859	+ $total_clients = &Normalize ($total_clients) ;
	1860	+ $total_clients_mobile = &Normalize ($total_clients_mobile) ;
	1861	+ $total_clients_non_mobile = &Normalize ($total_clients_non_mobile) ;
	1862	+
	1863	+# ReadInputCrawlers
	1864	+ foreach $key (keys %crawlers)
	1865	+ { $crawlers {$key} = &Normalize ($crawlers {$key}) ; }
	1866	+
	1867	+ $total_page_crawlerrequests = &Normalize ($total_page_crawlerrequests) ;
	1868	+
	1869	+# ReadInputMethods
	1870	+ foreach $key (keys %statusses)
	1871	+ { $statusses {$key} = &Normalize ($statusses {$key}) ; }
	1872	+ foreach $key (keys %methods)
	1873	+ { $methods {$key} = &Normalize ($methods {$key}) ; }
	1874	+
	1875	+# ReadInputMimeTypes
	1876	+ foreach $key (keys %mimetypes)
	1877	+ { $mimetypes {$key} = &Normalize ($mimetypes {$key}) ; }
	1878	+ foreach $key (keys %projects)
	1879	+ { $projects {$key} = &Normalize ($projects {$key}) ; }
	1880	+ foreach $key (keys %domains)
	1881	+ { $domains {$key} = &Normalize ($domains {$key}) ; }
	1882	+ foreach $key (keys %images_project)
	1883	+ { $images_project {$key} = &Normalize ($images_project {$key}) ; }
	1884	+ foreach $key (keys %images_domain)
	1885	+ { $images_domain {$key} = &Normalize ($images_domain {$key}) ; }
	1886	+ foreach $key (keys %mimetypes_found)
	1887	+ { $mimetypes_found {$key} = &Normalize ($mimetypes_found {$key}) ; }
	1888	+ foreach $key (keys %counts_pm)
	1889	+ { $counts_pm {$key} = &Normalize ($counts_pm {$key}) ; }
	1890	+ foreach $key (keys %counts_dm)
	1891	+ { $counts_dm {$key} = &Normalize ($counts_dm {$key}) ; }
	1892	+ foreach $key (keys %counts_prem)
	1893	+ { $counts_prem {$key} = &Normalize ($counts_prem {$key}) ; }
	1894	+
	1895	+ $total_mimes = &Normalize ($total_mimes) ;
	1896	+
	1897	+# ReadInputOpSys
	1898	+ foreach $key (keys %opsys)
	1899	+ { $opsys {$key} = &Normalize ($opsys {$key}) ; }
	1900	+
	1901	+ $total_opsys_non_mobile = &Normalize ($total_opsys_non_mobile) ;
	1902	+ $total_opsys_mobile = &Normalize ($total_opsys_mobile) ;
	1903	+
	1904	+# ReadInputOrigins
	1905	+ foreach $key (keys %origin_int_top)
	1906	+ { $origin_int_top {$key} = &Normalize ($origin_int_top {$key}) ; }
	1907	+ foreach $key (keys %origin_int_top_split)
	1908	+ { $origin_int_top_split {$key} = &Normalize ($origin_int_top_split {$key}) ; }
	1909	+ foreach $key (keys %origin_ext_top)
	1910	+ { $origin_ext_top {$key} = &Normalize ($origin_ext_top {$key}) ; }
	1911	+ foreach $key (keys %origin_ext_top_split)
	1912	+ { $origin_ext_top_split {$key} = &Normalize ($origin_ext_top_split {$key}) ; }
	1913	+ foreach $key (keys %origin_ext_page_top)
	1914	+ { $origin_ext_page_top {$key} = &Normalize ($origin_ext_page_top {$key}) ; }
	1915	+ foreach $key (keys %project_int_top)
	1916	+ { $project_int_top {$key} = &Normalize ($project_int_top {$key}) ; }
	1917	+ foreach $key (keys %project_int_top_split)
	1918	+ { $project_int_top_split {$key} = &Normalize ($project_int_top_split {$key}) ; }
	1919	+
	1920	+ $total_page_requests_external = &Normalize ($total_page_requests_external) ;
	1921	+ $total_origins_external_counted = &Normalize ($total_origins_external_counted) ;
	1922	+
	1923	+# ReadInputScripts
	1924	+ foreach $key (keys %actions)
	1925	+ { $actions {$key} = &Normalize ($actions {$key}) ; }
	1926	+ foreach $key (keys %parms)
	1927	+ { $parms {$key} = &Normalize ($parms {$key}) ; }
	1928	+ foreach $key (keys %scripts_php)
	1929	+ { $scripts_php {$key} = &Normalize ($scripts_php {$key}) ; }
	1930	+ foreach $key (keys %scripts_js)
	1931	+ { $scripts_js {$key} = &Normalize ($scripts_js {$key}) ; }
	1932	+ foreach $key (keys %scripts_css)
	1933	+ { $scripts_css {$key} = &Normalize ($scripts_css {$key}) ; }
	1934	+
	1935	+# ReadInputGoogle
	1936	+ foreach $key (keys %searches_service)
	1937	+ { $searches_service {$key} = &Normalize ($searches_service {$key}) ; }
	1938	+ foreach $key (keys %searches_crawlers)
	1939	+ { $searches_crawlers {$key} = &Normalize ($searches_crawlers {$key}) ; }
	1940	+ foreach $key (keys %searches_toplevel)
	1941	+ { $searches_toplevel {$key} = &Normalize ($searches_toplevel {$key}) ; }
	1942	+ foreach $key (keys %searches_toplevel_tld_found)
	1943	+ { $searches_toplevel_tld_found {$key} = &Normalize ($searches_toplevel_tld_found {$key}) ; }
	1944	+ foreach $key (keys %searches_service_mimecat)
	1945	+ { $searches_service_mimecat {$key} = &Normalize ($searches_service_mimecat {$key}) ; }
	1946	+ foreach $key (keys %searches_service_matches)
	1947	+ { $searches_service_matches {$key} = &Normalize ($searches_service_matches {$key}) ; }
	1948	+ foreach $key (keys %searches_toplevel_mimecat)
	1949	+ { $searches_toplevel_mimecat {$key} = &Normalize ($searches_toplevel_mimecat {$key}) ; }
	1950	+ foreach $key (keys %searches_mimecat_tld_not_found)
	1951	+ { $searches_mimecat_tld_not_found {$key} = &Normalize ($searches_mimecat_tld_not_found {$key}) ; }
	1952	+
	1953	+# ReadInputSkins
	1954	+ foreach $key (keys %skins)
	1955	+ { $skins {$key} = &Normalize ($skins {$key}) ; }
	1956	+ foreach $key (keys %skin_set)
	1957	+ { $skin_set {$key} = &Normalize ($skin_set {$key}) ; }
	1958	+
	1959	+# ReadInputBrowserLanguages
	1960	+ foreach $key (keys %browser_languages)
	1961	+ { $browser_languages {$key} = &Normalize ($browser_languages {$key}) ; }
	1962	+}
	1963	+
	1964	+sub SortCounts
	1965	+{
	1966	+# ReadInputClients
	1967	+# @engines_sorted_count = keys_sorted_by_value_num_desc %engines ;
	1968	+ @engines_sorted_alpha = keys_sorted_alpha_asc %engines ;
	1969	+ @webkit_engines_sorted_alpha = keys_sorted_alpha_asc %webkit_engines ;
	1970	+ @clientgroups_sorted_count = keys_sorted_by_value_num_desc %clientgroups ;
	1971	+ @clientgroups_sorted_alpha = keys_sorted_alpha_asc %clientgroups ;
	1972	+ @clients_sorted_count = keys_sorted_by_value_num_desc %clients ;
	1973	+ @clients_sorted_alpha = keys_sorted_alpha_asc %clients ;
	1974	+
	1975	+# ReadInputCrawlers
	1976	+# @crawlers_sorted_count = keys_sorted_by_value_num_desc %crawlers ;
	1977	+# @crawlers_sorted_alpha = keys_sorted_alpha_asc %crawlers ;
	1978	+
	1979	+# ReadInputMethods
	1980	+ @statusses_sorted_count = keys_sorted_by_value_num_desc %statusses ;
	1981	+ @statusses_sorted_method = keys_sorted_alpha_desc %statusses ;
	1982	+ @methods_sorted_count = keys_sorted_by_value_num_desc %methods ;
	1983	+ @methods_sorted_method = keys_sorted_alpha_desc %methods ;
	1984	+
	1985	+# ReadInputMimeTypes
	1986	+ @mimetypes_sorted = sort {&SortMime ($b) <=> &SortMime ($a)} keys %mimetypes ;
	1987	+ @projects_sorted = keys_sorted_by_value_num_desc %projects ;
	1988	+ @domains_sorted = keys_sorted_by_value_num_desc %domains ;
	1989	+
	1990	+# ReadInputOpSys
	1991	+ @opsys_sorted_alpha = sort {lc($a) cmp lc($b)} keys %opsys ;
	1992	+ @opsys_sorted_count = keys_sorted_by_value_num_desc %opsys ;
	1993	+
	1994	+# ReadInputOrigins
	1995	+ @origin_int_top_sorted_alpha = keys_sorted_alpha_desc %origin_int_top ;
	1996	+ @origin_ext_top_sorted_alpha = keys_sorted_alpha_desc %origin_ext_top ;
	1997	+ @origin_ext_page_top_sorted_alpha = keys_sorted_alpha_desc %origin_ext_page_top ;
	1998	+ @origin_int_top_sorted_count = keys_sorted_by_value_num_desc %origin_int_top ;
	1999	+ @origin_ext_top_sorted_count = keys_sorted_by_value_num_desc %origin_ext_top ;
	2000	+ @origin_ext_page_top_sorted_count = keys_sorted_by_value_num_desc %origin_ext_page_top ;
	2001	+
	2002	+ @project_int_top_sorted_alpha = keys_sorted_alpha_desc %project_int_top ;
	2003	+ @project_int_top_sorted_count = keys_sorted_by_value_num_desc %project_int_top ;
	2004	+
	2005	+# ReadInputScripts
	2006	+ @parms_sorted_count = keys_sorted_by_value_num_desc %parms ;
	2007	+ @parms_sorted_script = keys_sorted_alpha_desc %parms ;
	2008	+
	2009	+ @scripts_php_sorted_count = keys_sorted_by_value_num_desc %scripts_php ;
	2010	+ @scripts_php_sorted_script = keys_sorted_alpha_asc %scripts_php ;
	2011	+ @scripts_js_sorted_count = keys_sorted_by_value_num_desc %scripts_js ;
	2012	+ @scripts_js_sorted_script = keys_sorted_alpha_asc %scripts_js ;
	2013	+ @scripts_css_sorted_count = keys_sorted_by_value_num_desc %scripts_css ;
	2014	+ @scripts_css_sorted_script = keys_sorted_alpha_asc %scripts_css ;
	2015	+
	2016	+# ReadInputGoogle
	2017	+ @searches_service_count = keys_sorted_by_value_num_desc %searches_service ;
	2018	+ @searches_service_alpha = keys_sorted_alpha_desc %searches_service ;
	2019	+ @searches_toplevel_count = keys_sorted_by_value_num_desc %searches_toplevel_tld_found ;
	2020	+ @searches_toplevel_alpha = keys_sorted_alpha_asc %searches_toplevel_tld_found ;
	2021	+ @searches_service_matches_alpha = keys_sorted_alpha_asc %searches_service_matches ;
	2022	+
	2023	+# ReadInputSkins
	2024	+ @skins_sorted_skin = keys_sorted_alpha_asc %skins ;
	2025	+}
	2026	+
	2027	+sub WriteReportClients
	2028	+{
	2029	+ open FILE_HTML_CLIENTS, '>', "$dir_reports/$file_html_clients" ;
	2030	+
	2031	+ $html = $header ;
	2032	+ $html =~ s/TITLE/Wikimedia Traffic Analysis Report - Browsers e.a./ ;
	2033	+ $html =~ s/HEADER/Wikimedia Traffic Analysis Report - Browsers e.a./ ;
	2034	+ $html =~ s/ALSO/ See also: <b>LINKS<\/b>/ ;
	2035	+ $html =~ s/LINKS/$link_requests $link_origins \/ $link_methods \/ $link_scripts \/ $link_skins \/ $link_crawlers \/ $link_opsys \/ $dummy_browsers \/ $link_google/ ;
	2036	+ $html =~ s/X1000/⇒ <font color=#008000><b>all counts x 1000<\/b><\/font>.<br>/ ;
	2037	+
	2038	+ $html .= "<table border=1>\n" ;
	2039	+ $html .= "<tr><td class=l colspan=99 wrap>The following overview of page requests per client (~browser) application is based on the <a href='http://en.wikipedia.org/wiki/User_agent'>user agent</a> information that accompanies most server requests.<br>" .
	2040	+ "Please note that agent information does not follow strict guidelines and some programs may provide wrong information on purpose.<br>" .
	2041	+ "This report ignores all requests where agent information is missing, or contains any of the following: bot, crawl(er) or spider.<p>" .
	2042	+ "<b>Recommended reading:</b> <a href='http://en.wikipedia.org/wiki/Usage_share_of_web_browsers'>Wikipedia article</a> on usage share of web browsers and measurement methodology." .
	2043	+ "</td></tr>\n" ;
	2044	+
	2045	+ # CLIENTS SORTED BY FREQUENCY
	2046	+ $html .= "<tr><td width=50% valign=top>" ;
	2047	+ $html .= "<table border=1 width=100%>\n" ;
	2048	+ $html .= "<tr><th colspan=99 class=l><h3>In order of popularity</h3></th></tr>\n" ;
	2049	+
	2050	+ $html .= "<tr><th colspan=99 class=l> <br>Browsers, non mobile</th></tr>\n" ;
	2051	+ $perc_total = 0 ;
	2052	+ foreach $key (@clientgroups_sorted_count)
	2053	+ {
	2054	+ $count = $clientgroups {$key} ;
	2055	+
	2056	+ next if $count == 0 ;
	2057	+
	2058	+ $perc = $clientgroups_perc {$key} ;
	2059	+ ($mobile,$group) = split (',', $key) ;
	2060	+
	2061	+ next if $mobile ne '-' ;
	2062	+
	2063	+ $count = &FormatCount ($count) ;
	2064	+ $html .= "<tr><td class=l>$group</a></td><td class=r>$count</td><td class=r>$perc</td></tr>\n" ;
	2065	+ $perc =~ s/\%// ;
	2066	+ $perc_total += $perc ;
	2067	+ }
	2068	+
	2069	+ $perc = ".." ;
	2070	+ $count = $clientgroups_other {'-'} ;
	2071	+ if ($total_clientgroups {'-'} + $total_clientgroups {'M'} > 0)
	2072	+ {
	2073	+ $perc = sprintf ("%.2f", 100 * $clientgroups_other {'-'} / ($total_clientgroups {'-'} + $total_clientgroups {'M'})) ;
	2074	+ $perc_total += $perc ;
	2075	+ }
	2076	+ $html .= "<tr><td class=l>Other</th><td class=r>$count</td><td class=r>$perc\%</td></tr>\n" ;
	2077	+
	2078	+ $total = &FormatCount ($total_clientgroups {'-'}) ;
	2079	+ $perc_total = sprintf ("%.1f", $perc_total) ;
	2080	+ $html .= "<tr><th class=l>Total</th><th class=r>$total</th><th class=r>$perc_total\%</th></tr>\n" ;
	2081	+
	2082	+ $html .= "<tr><th colspan=99 class=l> <br>Browsers, mobile</th></tr>\n" ;
	2083	+ foreach $key (@clientgroups_sorted_count)
	2084	+ {
	2085	+ $count = $clientgroups {$key} ;
	2086	+
	2087	+ next if $count == 0 ;
	2088	+
	2089	+ $perc = $clientgroups_perc {$key} ;
	2090	+ ($mobile,$group) = split (',', $key) ;
	2091	+
	2092	+ next if $mobile ne 'M' ;
	2093	+
	2094	+ $count = &FormatCount ($count) ;
	2095	+ $html .= "<tr><td class=l>$group</a></td><td class=r>$count</td><td class=r>$perc</td></tr>\n" ;
	2096	+ $perc =~ s/\%// ;
	2097	+ }
	2098	+ $count = $clientgroups_other {'M'} ;
	2099	+
	2100	+ $perc = ".." ;
	2101	+ if ($total_clientgroups {'-'} + $total_clientgroups {'M'} > 0)
	2102	+ { $perc = sprintf ("%.2f", 100 * $count / ($total_clientgroups {'-'} + $total_clientgroups {'M'})) ; }
	2103	+
	2104	+ $perc_total = sprintf ("%.1f", (100 - $perc_total)) ;
	2105	+ $total = &FormatCount ($total_clientgroups {'M'}) ;
	2106	+ $html .= "<tr><td class=l>Other</th><td class=r>$count</td><td class=r>$perc\%</td></tr>\n" ;
	2107	+ $html .= "<tr><th class=l>Total</th><th class=r>$total</th><th class=r>$perc_total\%</th></tr>\n" ;
	2108	+
	2109	+ $html .= "<tr><th colspan=99 class=l> <br>Browser versions, non mobile</th></tr>\n" ;
	2110	+
	2111	+ foreach $key (@clients_sorted_count)
	2112	+ {
	2113	+ $count = $clients {$key} ;
	2114	+ ($rectype, $client) = split (',', $key,2) ;
	2115	+
	2116	+ next if $rectype ne '-' ; # group
	2117	+
	2118	+ $perc = $clients_perc {$key} ;
	2119	+
	2120	+ next if $perc lt "0.02%" ;
	2121	+
	2122	+ $count = &FormatCount ($count) ;
	2123	+ $html .= "<tr><td class=l>$client</a></td><td class=r>$count</td><td class=r>$perc</td></tr>\n" ;
	2124	+ $perc =~ s/\%// ;
	2125	+ }
	2126	+ $total = &FormatCount ($total_clients_non_mobile) ;
	2127	+
	2128	+ $perc_total = sprintf ("%.1f", (100 - $perc_total)) ;
	2129	+ $html .= "<tr><th class=l>Total</th><th class=r>$total</th><th class=r>$perc_total\%</th></tr>\n" ;
	2130	+
	2131	+ $html .= "<tr><th colspan=99 class=l> <br>Browser versions, mobile</th></tr>\n" ;
	2132	+ foreach $key (@clients_sorted_count)
	2133	+ {
	2134	+ $count = $clients {$key} ;
	2135	+ ($rectype, $client) = split (',', $key,2) ;
	2136	+
	2137	+ next if $rectype ne 'M' ; # group
	2138	+
	2139	+ $perc = $clients_perc {$key} ;
	2140	+
	2141	+ next if $perc lt "0.02%" ;
	2142	+
	2143	+ $count = &FormatCount ($count) ;
	2144	+ $html .= "<tr><td class=l>$client</a></td><td class=r>$count</td><td class=r>$perc</td></tr>\n" ;
	2145	+ }
	2146	+ $total = &FormatCount ($total_clients_mobile) ;
	2147	+ $perc = sprintf ("%.1f", (100 - $perc_total)) ;
	2148	+ $html .= "<tr><th class=l>Total</th><th class=r>$total</th><th class=r>$perc\%</th></tr>\n" ;
	2149	+
	2150	+ $html .= "</table>\n" ;
	2151	+
	2152	+ # CLIENTS In alphabetical order
	2153	+ $html .= "</td><td width=50% valign=top>" ;
	2154	+ $html .= "<table border=1 width=100%>\n" ;
	2155	+ $html .= "<tr><th colspan=99 class=l><h3>In alphabetical order</h3></th></tr>\n" ;
	2156	+
	2157	+ $html .= "<tr><th colspan=99 class=l> <br>Browsers, non mobile</th></tr>\n" ;
	2158	+ $perc_total = 0 ;
	2159	+ foreach $key (@clientgroups_sorted_alpha)
	2160	+ {
	2161	+ $count = $clientgroups {$key} ;
	2162	+
	2163	+ next if $count == 0 ;
	2164	+
	2165	+ $perc = $clientgroups_perc {$key} ;
	2166	+ ($mobile,$group) = split (',', $key) ;
	2167	+
	2168	+ next if $mobile ne '-' ;
	2169	+
	2170	+ $count = &FormatCount ($count) ;
	2171	+ $html .= "<tr><td class=l>$group</a></td><td class=r>$count</td><td class=r>$perc</td></tr>\n" ;
	2172	+ $perc =~ s/\%// ;
	2173	+ $perc_total += $perc ;
	2174	+ }
	2175	+
	2176	+ $count = $clientgroups_other {'-'} ;
	2177	+ $total = &FormatCount ($total_clientgroups {'-'}) ;
	2178	+ $perc = ".." ;
	2179	+ if ($total_clientgroups {'-'} + $total_clientgroups {'M'} > 0)
	2180	+ { $perc = sprintf ("%.2f", 100 * $count / ($total_clientgroups {'-'} + $total_clientgroups {'M'})) ; }
	2181	+ $perc_total += $perc ;
	2182	+ $perc_total = sprintf ("%.1f", $perc_total) ;
	2183	+ $html .= "<tr><td class=l>Other</th><td class=r>$count</td><td class=r>$perc\%</td></tr>\n" ;
	2184	+ $html .= "<tr><th class=l>Total</th><th class=r>$total</th><th class=r>$perc_total\%</th></tr>\n" ;
	2185	+
	2186	+ $html .= "<tr><th colspan=99 class=l> <br>Browsers, mobile</th></tr>\n" ;
	2187	+ foreach $key (@clientgroups_sorted_alpha)
	2188	+ {
	2189	+ $count = $clientgroups {$key} ;
	2190	+
	2191	+ next if $count == 0 ;
	2192	+
	2193	+ $perc = $clientgroups_perc {$key} ;
	2194	+ ($mobile,$group) = split (',', $key) ;
	2195	+
	2196	+ next if $mobile ne 'M' ;
	2197	+
	2198	+ $count = &FormatCount ($count) ;
	2199	+ $html .= "<tr><td class=l>$group</a></td><td class=r>$count</td><td class=r>$perc</td></tr>\n" ;
	2200	+ $perc =~ s/\%// ;
	2201	+ }
	2202	+ $count = $clientgroups_other {'M'} ;
	2203	+ $total = &FormatCount ($total_clientgroups {'M'}) ;
	2204	+ $perc = sprintf ("%.2f", 100 * $count / ($total_clientgroups {'-'} + $total_clientgroups {'M'})) ;
	2205	+ $perc_total = sprintf ("%.1f", (100 - $perc_total)) ;
	2206	+ $html .= "<tr><td class=l>Other</th><td class=r>$count</td><td class=r>$perc\%</td></tr>\n" ;
	2207	+ $html .= "<tr><th class=l>Total</th><th class=r>$total</th><th class=r>$perc_total\%</th></tr>\n" ;
	2208	+
	2209	+ $html .= "<tr><th colspan=99 class=l> <br>Browser versions, non mobile</th></tr>\n" ;
	2210	+
	2211	+ foreach $key (@clients_sorted_alpha)
	2212	+ {
	2213	+ $count = $clients {$key} ;
	2214	+ ($rectype, $client) = split (',', $key,2) ;
	2215	+
	2216	+ next if $rectype ne '-' ; # group
	2217	+
	2218	+ $perc = $clients_perc {$key} ;
	2219	+
	2220	+ next if $perc lt "0.02%" ;
	2221	+
	2222	+ $count = &FormatCount ($count) ;
	2223	+ $html .= "<tr><td class=l>$client</a></td><td class=r>$count</td><td class=r>$perc</td></tr>\n" ;
	2224	+ }
	2225	+ $total = &FormatCount ($total_clients_non_mobile) ;
	2226	+ $perc = sprintf ("%.1f",100*$total_clients_non_mobile / ($total_clients_mobile + $total_clients_non_mobile)) ;
	2227	+ $html .= "<tr><th class=l>Total</th><th class=r>$total</th><th class=r>$perc\%</th></tr>\n" ;
	2228	+
	2229	+ $html .= "<tr><th colspan=99 class=l> <br>Browser versions, mobile</th></tr>\n" ;
	2230	+ foreach $key (@clients_sorted_alpha)
	2231	+ {
	2232	+ $count = $clients {$key} ;
	2233	+ ($rectype, $client) = split (',', $key,2) ;
	2234	+
	2235	+ next if $rectype ne 'M' ; # group
	2236	+
	2237	+ $perc = $clients_perc {$key} ;
	2238	+
	2239	+ next if $perc lt "0.02%" ;
	2240	+
	2241	+ $count = &FormatCount ($count) ;
	2242	+ $html .= "<tr><td class=l>$client</a></td><td class=r>$count</td><td class=r>$perc</td></tr>\n" ;
	2243	+ }
	2244	+ $total = &FormatCount ($total_clients_mobile) ;
	2245	+ $perc = sprintf ("%.1f",100*$total_clients_mobile / ($total_clients_mobile + $total_clients_non_mobile)) ;
	2246	+ $html .= "<tr><th class=l>Total</th><th class=r>$total</th><th class=r>$perc\%</th></tr>\n" ;
	2247	+
	2248	+ $html .= "<tr><th colspan=99 class=l> <br>Browser engines</th></tr>\n" ;
	2249	+
	2250	+ $engine_prev = "" ;
	2251	+ foreach $engine (@webkit_engines_sorted_alpha)
	2252	+ {
	2253	+ $total = $webkit_engines {$engine} ;
	2254	+
	2255	+ next if $total < 5 ;
	2256	+
	2257	+ $engine2 = $engine ;
	2258	+ $engine2 =~ s/\/.*$// ;
	2259	+ $engine2 =~ s/ .*$// ;
	2260	+ if (($engine2 ne $engine_prev) && ($engine_prev ne ""))
	2261	+ {
	2262	+ $total_engine = $total_engines {$engine_prev} ;
	2263	+ $perc_engine = sprintf ("%.1f", 100 * $total_engine / ($total_clients_mobile + $total_clients_non_mobile)) ;
	2264	+ $total_engine = &FormatCount ($total_engine) ;
	2265	+ $html .= "<tr><th class=l>Total</th><th class=r>$total_engine</th><th class=r>$perc_engine\%</th></tr>\n" ;
	2266	+ }
	2267	+ $engine_prev = $engine2 ;
	2268	+ $total = &FormatCount ($total) ;
	2269	+ $html .= "<tr><td class=l>$engine</td><td class=r>$total</td><td class=r> </td></tr>\n" ;
	2270	+ }
	2271	+ $total_engine = $total_engines {$engine_prev} ;
	2272	+ $perc_engine = sprintf ("%.1f", 100 * $total_engine / ($total_clients_mobile + $total_clients_non_mobile)) ;
	2273	+ $total_engine = &FormatCount ($total_engine) ;
	2274	+ $html .= "<tr><th class=l>Total</th><th class=r>$total_engine</th><th class=r>$perc_engine\%</th></tr>\n" ;
	2275	+
	2276	+ $engine_prev = "" ;
	2277	+ foreach $engine (@engines_sorted_alpha)
	2278	+ {
	2279	+ $total = $engines {$engine} ;
	2280	+
	2281	+ next if $total < 5 ;
	2282	+
	2283	+ $engine2 = $engine ;
	2284	+ $engine2 =~ s/\/.*$// ;
	2285	+ $engine2 =~ s/ .*$// ;
	2286	+ if (($engine2 ne $engine_prev) && ($engine_prev ne ""))
	2287	+ {
	2288	+ $total_engine = $total_engines {$engine_prev} ;
	2289	+ $perc_engine = sprintf ("%.1f", 100 * $total_engine / ($total_clients_mobile + $total_clients_non_mobile)) ;
	2290	+ $total_engine = &FormatCount ($total_engine) ;
	2291	+ $html .= "<tr><th class=l>Total</th><th class=r>$total_engine</th><th class=r>$perc_engine\%</th></tr>\n" ;
	2292	+ }
	2293	+ $engine_prev = $engine2 ;
	2294	+ $total = &FormatCount ($total) ;
	2295	+ $html .= "<tr><td class=l>$engine</td><td class=r>$total</td><td class=r> </td></tr>\n" ;
	2296	+ }
	2297	+ $total_engine = $total_engines {$engine_prev} ;
	2298	+ $perc_engine = sprintf ("%.1f", 100 * $total_engine / ($total_clients_mobile + $total_clients_non_mobile)) ;
	2299	+ $total_engine = &FormatCount ($total_engine) ;
	2300	+ $html .= "<tr><th class=l>Total</th><th class=r>$total_engine</th><th class=r>$perc_engine\%</th></tr>\n" ;
	2301	+
	2302	+ $html .= "</table>\n" ;
	2303	+ $html .= "</td></tr>\n" ;
	2304	+
	2305	+ $html .= "<tr><td colspan=99 class=l wrap>Requests from mobile devices are recognized as follows:<br>" .
	2306	+ "Agent string contains any of the following terms (last upd: $month_upd_keywords_mobile):<br>" .
	2307	+ "<i>$keywords_mobile</i></td></tr>" ;
	2308	+
	2309	+ $html .= "</table>\n" ;
	2310	+
	2311	+# $html .= "<p><b>Explanation:</b><br>'osd' = opensearchdescription / 'php.ser' = vnd.php.serialized" ;
	2312	+ $html .= $colophon ;
	2313	+
	2314	+ print FILE_HTML_CLIENTS $html ;
	2315	+ close FILE_HTML_CLIENTS ;
	2316	+}
	2317	+
	2318	+sub WriteReportCrawlers
	2319	+{
	2320	+ open FILE_HTML_CRAWLERS, '>', "$dir_reports/$file_html_crawlers" ;
	2321	+
	2322	+ $html = $header ;
	2323	+ $html =~ s/TITLE/Wikimedia Traffic Analysis Report - Crawler requests/ ;
	2324	+ $html =~ s/HEADER/Wikimedia Traffic Analysis Report - Crawler requests/ ;
	2325	+ $html =~ s/ALSO/ See also: <b>LINKS<\/b>/ ;
	2326	+ $html =~ s/LINKS/$link_requests $link_origins \/ $link_methods \/ $link_scripts \/ $link_skins \/ $dummy_crawlers \/ $link_opsys \/ $dummy_browsers \/ $link_google/ ;
	2327	+ $html =~ s/X1000/⇒ <font color=#008000><b>all counts x 1000<\/b><\/font>.<br>/ ;
	2328	+
	2329	+ $html .= "<table border=1>\n" ;
	2330	+ $html .= "<tr><td class=l colspan=99>The following overview of crawler (aka bot) page requests is based on the <a href='http://en.wikipedia.org/wiki/User_agent'>user agent</a> information that accompanies most server requests." .
	2331	+ " Unfortunately this user agent information follows rather loosely defined guidelines." .
	2332	+ "<br>Also please bear in mind than the most popular crawler names may be somewhat overrepresented." .
	2333	+ " This is the result of so called <i>user agent spoofing</i> (where a requester supplies false credentials, e.g. to bypass web servers filters)." .
	2334	+ "<br>GoogleBot seems to be a favorite for spoofing. Therefore requests from an ip address registered by Google (see below) are color coded <b><font color=green>GoogleBot</font></b>, others <b><font color=red>GoogleBot</font></b>" .
	2335	+ "<p>For this report page requests are considered to be issued by a crawler in two cases:" .
	2336	+ "<br>1 The user agent string contains a web address (only crawlers should have that, but there a some false positives, " .
	2337	+ " where a browser sends a user agent string with a web address (ill behaved plug-in, main offenders have been eliminated)" .
	2338	+ "<br>2 The user agent string contains the term bot, spider or crawl[er]'" .
	2339	+ "PERC_GOOGLE\n" .
	2340	+ "</td></tr>\n" ;
	2341	+
	2342	+ $total_crawlers = 0 ;
	2343	+# $html .= "<tr><th class=l>Count<br><small>x 1000</small></th><th class=l>Secondary domain<br>(~site) name</th><th class=l>Mime type</th><th class=l>User agent</th></tr>\n" ;
	2344	+ foreach $mime_agent (keys_sorted_by_value_num_desc %crawlers)
	2345	+ {
	2346	+ $count = $crawlers {$mime_agent} ;
	2347	+ ($mime, $agent) = split ('\\|', $mime_agent,2) ;
	2348	+ $agent =~ s/([^,;\s]+?\@[^,;\s]+)/ <font color=#808080>mail address<\/font> /g ;
	2349	+ $agent =~ s/([\w-]+\s.?at.?\s[\w-]+\s.?dot.?\s[\w-]+)/ <font color=#808080>mail address<\/font> /gi ;
	2350	+ $site = "-" ;
	2351	+ if ($agent =~ /http:/)
	2352	+ {
	2353	+ $site = $agent ;
	2354	+ $site =~ s/^.*?http:/http:/ ;
	2355	+ $site =~ s/>/>/gi ;
	2356	+ $site =~ s/</</gi ;
	2357	+ $site =~ s/^(.?)[,;\)\<\>\s)].$/$1/ ;
	2358	+ }
	2359	+ $agent =~ s/\Q$site\E/<b>$site<\/b>/ ;
	2360	+ # $agent =~ s/\Q$site\E// ;
	2361	+
	2362	+ $secondary_domain = &GetSecondaryDomain ($site) ;
	2363	+ if (($secondary_domain eq "google") and ($agent =~ /color=red>GoogleBot</))
	2364	+ { $secondary_domain .= "?" ; }
	2365	+
	2366	+ $secondary_domains {$secondary_domain} += $count ;
	2367	+
	2368	+ if ($secondary_domain ne "-")
	2369	+ { $crawlers_per_domain {$secondary_domain} {$mime_agent} += $count ; }
	2370	+ else
	2371	+ {
	2372	+ $crawlers_no_url {$agent} {$mime} += $count ;
	2373	+ $crawlers_no_url_agent {$agent} += $count ;
	2374	+ }
	2375	+
	2376	+ $total_crawlers += $count ;
	2377	+
	2378	+ next if $count <= 2 ;
	2379	+
	2380	+ # $count = &FormatCount ($count) ;
	2381	+ # $html .= "<tr><td class=r>$count</td><td class=l><a href='$site'>$secondary_domain</a></td><td class=l>$mime</td><td class=l>$agent</td></tr>\n" ;
	2382	+ # $rows++ ;
	2383	+ }
	2384	+
	2385	+ $perc_crawlers = ".." ;
	2386	+ if ($total_page_requests_external > 0)
	2387	+ { $perc_crawlers = sprintf ("%.1f",100 * $total_page_crawlerrequests/$total_page_requests_external) ; }
	2388	+
	2389	+ $total_page_requests_external2 = &FormatCount ($total_page_requests_external*1000) ;
	2390	+ $total_page_crawlerrequests2 = &FormatCount ($total_page_crawlerrequests*1000) ;
	2391	+ $html =~ s/PERC_GOOGLE/<p>In total $total_page_crawlerrequests2 page requests (mime type <a href='SquidReportRequests.htm'>text\/html<\/a> only!) per day are considered crawler requests, out of $total_page_requests_external2 external requests, which is $perc_crawlers%/ ;
	2392	+
	2393	+ $total_crawlers = &FormatCount ($total_crawlers) ;
	2394	+# $html .= "<tr><th class=l>$total_crawlers</th><th class=l colspan=2>total</th></tr>\n" ;
	2395	+# $html .= "</table><p>\n" ;
	2396	+
	2397	+# $html .= "<table border=1>\n" ;
	2398	+# $html .= "<tr><th class=l colspan=99>Top 25 secondary domains<br>(~ sites) mentioned</th></tr>\n" ;
	2399	+# foreach $secondary_domain (keys_sorted_by_value_num_desc %secondary_domains)
	2400	+# {
	2401	+# next if $secondary_domain eq ".." ;
	2402	+# last if ++$secondary_domains_listed > 25 ;
	2403	+#
	2404	+# $count = $secondary_domains {$secondary_domain} ;
	2405	+# $count = &FormatCount ($count) ;
	2406	+# $html .= "<tr><td class=r>$count</td><td class=l colspan=2>$secondary_domain</td></tr>\n" ;
	2407	+# }
	2408	+# $html .= "</table>\n" ;
	2409	+
	2410	+ $html .= "<tr><th class=lh3 colspan=99>Page requests for crawlers that specify a url in the agent string</th></tr>\n" ;
	2411	+ $html .= "<tr><th class=l>Count<br><small>x 1000</small></th><th class=l>Secondary domain<br>(~site) name</th><th class=l>URL</th><th class=l>Mime type</th><th class=l>User agent</th></tr>\n" ;
	2412	+ foreach $secondary_domain (keys_sorted_by_value_num_desc %secondary_domains)
	2413	+ {
	2414	+ next if $secondary_domain eq "-" ;
	2415	+
	2416	+ $total = $secondary_domains {$secondary_domain} ;
	2417	+ $total_crawlers_url += $total ;
	2418	+
	2419	+ last if $total < 10 ;
	2420	+
	2421	+ $total = &FormatCount ($total) ;
	2422	+ $html .= "<tr><th class=r>$total</th><th class=l colspan=99>$secondary_domain</th></tr>\n" ;
	2423	+ foreach $mime_agent (sort {$crawlers_per_domain {$secondary_domain} {$b} <=> $crawlers_per_domain {$secondary_domain} {$a}} keys %{$crawlers_per_domain {$secondary_domain}})
	2424	+ {
	2425	+ ($mime, $agent) = split ('\\|', $mime_agent,2) ;
	2426	+ $agent =~ s/([^,;\s]+?\@[^,;\s]+)/ <font color=#808080>mail address<\/font> /g ;
	2427	+ $agent =~ s/([\w-]+\s.?at.?\s[\w-]+\s.?dot.?\s[\w-]+)/ <font color=#808080>mail address<\/font> /gi ;
	2428	+ $site = "-" ;
	2429	+ if ($agent =~ /http:/)
	2430	+ {
	2431	+ $site = $agent ;
	2432	+ $site =~ s/^.*?http:/http:/ ;
	2433	+ $site =~ s/>/>/gi ;
	2434	+ $site =~ s/</</gi ;
	2435	+ $site =~ s/^(.?)[,;\)\<\>\s)].$/$1/ ;
	2436	+ }
	2437	+ # $agent =~ s/\Q$site\E/<b>$site<\/b> <a href='$site'>x<\/a>/ ;
	2438	+ if ($site ne "-")
	2439	+ { $agent =~ s/\Q$site\E/<b>url<\/b>/ ; }
	2440	+ $count = $crawlers_per_domain {$secondary_domain} {$mime_agent} ;
	2441	+
	2442	+ next if $count <= 2 ;
	2443	+
	2444	+ # print "[$secondary_domain] [$mime_agent] : $count\n" ;
	2445	+ $count = &FormatCount ($count) ;
	2446	+ ($site2 = $site) =~ s/^http:\/\/// ;
	2447	+ $html .= "<tr><td class=r>$count</td><td class=l> </td><td class=l><a href='$site' ref='nofollow'>$site2<\/a></td><td class=l>$mime</td><td class=l>$agent</td></tr>\n" ;
	2448	+ $rows++ ;
	2449	+ }
	2450	+ }
	2451	+ $total_crawlers_url = &FormatCount ($total_crawlers_url) ;
	2452	+ $html .= "<tr><th class=l>$total_crawlers_url</th><th class=l colspan=99>total</th></tr>\n" ;
	2453	+ $html .= "</table><p>\n" ;
	2454	+
	2455	+ $total_crawlers_no_url = 0 ;
	2456	+ $html .= "<table border=1>\n" ;
	2457	+ $html .= "<tr><th class=lh3 colspan=99>Page requests for probable crawlers, recognized by keyword</th></tr>\n" ;
	2458	+ $html .= "<tr><th class=l width=40>Count<br><small>x 1000</small></th><th class=l colspan=99>Agent string</th></tr>\n" ;
	2459	+ $html .= "<tr><th class=l width=40> </td><th class=l width=40> </td><th class=l>Mime type (count ≥ 3)</th></tr>\n" ;
	2460	+ foreach $agent (keys_sorted_by_value_num_desc %crawlers_no_url_agent)
	2461	+ {
	2462	+ $total = $crawlers_no_url_agent {$agent} ;
	2463	+ $total_crawlers_no_url += $total ;
	2464	+
	2465	+ last if $total < 3 ;
	2466	+
	2467	+ $total = &FormatCount ($total) ;
	2468	+ $html .= "<tr><th class=r>$total</th><td class=l colspan=99>$agent</td></tr>\n" ;
	2469	+ foreach $mime (sort {$crawlers_no_url {$agent} {$b} <=> $crawlers_no_url {$agent} {$a}} keys %{$crawlers_no_url {$agent}})
	2470	+ {
	2471	+ $agent =~ s/([^,;\s]+?\@[^,;\s]+)/ <font color=#808080>mail address<\/font> /g ;
	2472	+ $agent =~ s/([\w-]+\s.?at.?\s[\w-]+\s.?dot.?\s[\w-]+)/ <font color=#808080>mail address<\/font> /gi ;
	2473	+ $count = $crawlers_no_url {$agent} {$mime} ;
	2474	+ $count = &FormatCount ($count) ;
	2475	+ ($site2 = $site) =~ s/^http:\/\/// ;
	2476	+ $html .= "<tr><td class=r>$count</td><td> </td><td class=l colspan=99>$mime</td></tr>\n" ;
	2477	+ $rows++ ;
	2478	+ }
	2479	+ }
	2480	+
	2481	+ $total_crawlers_no_url = &FormatCount ($total_crawlers_no_url) ;
	2482	+ $html .= "<tr><th class=l>$total_crawlers_no_url</th><th class=l colspan=99>total</th></tr>\n" ;
	2483	+ $html .= "</table><p>\n" ;
	2484	+
	2485	+ $html .= "<p>$google_ip_ranges" ;
	2486	+ $html .= $colophon ;
	2487	+
	2488	+ print FILE_HTML_CRAWLERS $html ;
	2489	+ close FILE_HTML_CRAWLERS ;
	2490	+}
	2491	+
	2492	+sub WriteReportMethods
	2493	+{
	2494	+ open FILE_HTML_METHODS, '>', "$dir_reports/$file_html_methods" ;
	2495	+
	2496	+ $html = $header ;
	2497	+ $html =~ s/TITLE/Wikimedia Traffic Analysis Report - Request Methods/ ;
	2498	+ $html =~ s/HEADER/Wikimedia Traffic Analysis Report - Request Methods/ ;
	2499	+ $html =~ s/ALSO/ See also: <b>LINKS<\/b>/ ;
	2500	+ $html =~ s/LINKS/$link_requests $link_origins \/ $dummy_methods \/ $link_scripts \/ $link_skins \/ $link_crawlers \/ $link_opsys \/ $link_browsers \/ $link_google/ ;
	2501	+ $html =~ s/X1000/⇒ <font color=#008000><b>all counts x 1000<\/b><\/font>.<br>/ ;
	2502	+
	2503	+ $html .= "<table border=0>\n" ;
	2504	+ $html .= "<tr><td>" ;
	2505	+
	2506	+ $html .= "<table border=1>\n" ;
	2507	+ $html .= "<tr><th colspan=99 class=l><h3>In order of request volume</h3></th></tr>\n" ;
	2508	+ $html .= "<tr><th colspan=2 class=l>Method</th><th class=r>Count<br><small>x 1000</small></th></tr>\n" ;
	2509	+ $rows = 0 ;
	2510	+ $total_methods = 0 ;
	2511	+ foreach $method (@methods_sorted_count)
	2512	+ {
	2513	+ $total = $methods {$method} ;
	2514	+ $total_methods += $total ;
	2515	+ $total = &FormatCount ($total) ;
	2516	+ $html .= "<tr><td colspan=2 class=l>$method</td><td class=r>$total</td></tr>\n" ;
	2517	+ }
	2518	+ $total_methods = &FormatCount ($total_methods) ;
	2519	+ $html .= "<tr><th colspan=2 class=l>Total</th><th class=r>$total_methods</th></tr>\n" ;
	2520	+ $html .= "<tr><td colspan=99> </td></tr>\n" ;
	2521	+ $html .= "<tr><td class=l>Method</th><th class=l>Result</th><th class=r>Count<br><small>x 1000</small></th></tr>\n" ;
	2522	+ $total_statusses = 0 ;
	2523	+ foreach $status (@statusses_sorted_count)
	2524	+ {
	2525	+ $total = $statusses {$status} ;
	2526	+ $total_statusses += $total ;
	2527	+ $total = &FormatCount ($total) ;
	2528	+ ($method,$result) = split (',', $status, 2) ;
	2529	+
	2530	+ $html .= "<tr><td class=l>$method</td><td class=l>$result</td><td class=r>$total</td></tr>\n" ;
	2531	+ $rows++ ;
	2532	+ }
	2533	+ $total_statusses = &FormatCount ($total_statusses) ;
	2534	+ $html .= "<tr><th colspan=2 class=l>Total</th><th class=r>$total_statusses</th></tr>\n" ;
	2535	+ $html .= "</table>\n" ;
	2536	+
	2537	+ $html .= "</td><td>   </td><td>" ;
	2538	+
	2539	+ $html .= "<table border=1>\n" ;
	2540	+ $html .= "<tr><th colspan=99 class=l><h3>In alphabetical order: method+result</h3></th></tr>\n" ;
	2541	+ $html .= "<tr><th colspan=2 class=l>Method</th><th class=r>Count<br><small>x 1000</small></th></tr>\n" ;
	2542	+ $rows = 0 ;
	2543	+ foreach $method (@methods_sorted_method)
	2544	+ {
	2545	+ $total = &FormatCount ($methods {$method}) ;
	2546	+ $html .= "<tr><td colspan=2 class=l>$method</td><td class=r>$total</td></tr>\n" ;
	2547	+ }
	2548	+ $html .= "<tr><th colspan=2 class=l>Total</th><th class=r>$total_methods</th></tr>\n" ;
	2549	+ $html .= "<tr><td colspan=99> </td></tr>\n" ;
	2550	+ $html .= "<tr><th class=l>Method</th><th class=l>Result</th><th class=r>Count<br><small>x 1000</small></th></tr>\n" ;
	2551	+ foreach $status (@statusses_sorted_method)
	2552	+ {
	2553	+ $total = &FormatCount ($statusses {$status}) ;
	2554	+ ($method,$result) = split (',', $status, 2) ;
	2555	+
	2556	+ $html .= "<tr><td class=l>$method</td><td class=l>$result</td><td class=r>$total</td></tr>\n" ;
	2557	+ $rows++ ;
	2558	+ }
	2559	+ $html .= "<tr><th colspan=2 class=l>Total</th><th class=r>$total_statusses</th></tr>\n" ;
	2560	+ $html .= "</table>\n" ;
	2561	+
	2562	+ $html .= "</td></tr></table>\n" ;
	2563	+ $html .= " <small>$rows rows written</small><p>" ;
	2564	+
	2565	+# $html .= "<p><b>Explanation:</b><br>'osd' = opensearchdescription / 'php.ser' = vnd.php.serialized" ;
	2566	+ $html .= $colophon ;
	2567	+
	2568	+ print FILE_HTML_METHODS $html ;
	2569	+ close FILE_HTML_METHODS ;
	2570	+}
	2571	+
	2572	+sub WriteReportMimeTypes
	2573	+{
	2574	+ open FILE_HTML_REQUESTS, '>', "$dir_reports/$file_html_requests" ;
	2575	+
	2576	+ $html = $header ;
	2577	+ $html =~ s/TITLE/Wikimedia Traffic Analysis Report - Requests by destination/ ;
	2578	+ $html =~ s/HEADER/Wikimedia Traffic Analysis Report - Requests by destination/ ;
	2579	+ $html =~ s/ALSO/ See also: <b>LINKS<\/b>/ ;
	2580	+ $html =~ s/NOTES/<br> This report shows where requests are sent to. Report 'Requests by origin' shows where requests come from.<br> Those numbers bear no direct relation.<br>/ ;
	2581	+ $html =~ s/LINKS/$dummy_requests $link_origins \/ $link_methods \/ $link_scripts \/ $link_skins \/ $link_crawlers \/ $link_opsys \/ $link_browsers \/ $link_google/ ;
	2582	+ $html .= "<table border=1>\n" ;
	2583	+
	2584	+ $header1 = "<tr><th colspan=2 class=l><small>x 1000</small></th><th colspan=2 class=c>Totals</th><th class=c><font color=#008000>Pages</font></th><th colspan=3 class=c><font color=#900000>Images</font></th><th colspan=99 class=c>Other</th></tr>\n" ;
	2585	+ $header2 = "<tr><th colspan=2 class=l> </th><th class=c>total<br>all</th><th class=c><font color=#900000>total<br>images</font></th>\n" ;
	2586	+ $columns = 0 ;
	2587	+ foreach $mimetype (@mimetypes_sorted)
	2588	+ {
	2589	+ $columns++ ;
	2590	+
	2591	+ next if $mimetypes_found {$mimetype} < $threshold_mime ;
	2592	+
	2593	+ $mimetype2 = $mimetype ;
	2594	+ if ($mimetype2 eq "text/html")
	2595	+ { $mimetype2 .= "<br><small>(page)</small> " ; }
	2596	+ if ($mimetype2 =~ /image\/(?:png\|jpeg\|gif)/)
	2597	+ { $mimetype2 .= "<br><small>(img)</small> " ; }
	2598	+ if ($columns == 1)
	2599	+ { $mimetype2 = "<font color=#008000>$mimetype2</font" ; }
	2600	+ if (($columns >= 2) && ($columns <= 4))
	2601	+ { $mimetype2 = "<font color=#900000>$mimetype2</font" ; }
	2602	+ ($mime1,$mime2) = split ('\/', $mimetype2, 2) ;
	2603	+ $header2 .= "<th class=c>$mime1<br>$mime2</th>\n" ;
	2604	+ }
	2605	+ $header2 .= "</tr>\n" ;
	2606	+ $html .= $header1 . $header2 ;
	2607	+
	2608	+ $rows = 0 ;
	2609	+ $total_mimes2 = 0 ;
	2610	+ $total_images1 = 0 ;
	2611	+ foreach $domain (@domains_sorted)
	2612	+ {
	2613	+ $html .= "<tr><td colspan=2 class=l>" . ucfirst($domain) . "</td>\n" ;
	2614	+ $total = $domains {$domain} ;
	2615	+ $total_mimes2 += $total ;
	2616	+ $total = &FormatCount ($total) ;
	2617	+ $total_images = $images_domain {$domain} ;
	2618	+ $total_images1 += $total_images ;
	2619	+ $total_images = &FormatCount ($total_images) ;
	2620	+ $total_images = "<font color=#900000>" . &FormatCount ($total_images) . "</font>" ;
	2621	+
	2622	+ $html .= "<th class=r>$total</th><th class=r>$total_images</th>\n" ;
	2623	+ $columns = 0 ;
	2624	+ foreach $mimetype (@mimetypes_sorted)
	2625	+ {
	2626	+ $columns++ ;
	2627	+
	2628	+ next if $mimetypes_found {$mimetype} < $threshold_mime ;
	2629	+
	2630	+ $count = &FormatCount ($counts_dm {"$domain,$mimetype"}) ;
	2631	+ if ($columns == 1)
	2632	+ { $count = "<font color=#008000>$count</font" ; }
	2633	+ if (($columns >= 2) && ($columns <= 4))
	2634	+ { $count = "<font color=#900000>$count</font" ; }
	2635	+ if ($count eq "")
	2636	+ { $count = " " ; }
	2637	+ $html .= "<td class=r>$count</td>\n" ;
	2638	+ }
	2639	+ $html .= "</tr>\n" ;
	2640	+ $rows++ ;
	2641	+ }
	2642	+
	2643	+ if ($total_mimes != $total_mimes2)
	2644	+ {
	2645	+ print ERR "total_mimes $total_mimes != total_mimes2 $total_mimes2\n" ;
	2646	+ print "total_mimes $total_mimes != total_mimes2 $total_mimes2\n" ;
	2647	+ }
	2648	+
	2649	+ $total_mimes1 = &FormatCount ($total_mimes) ;
	2650	+ $total_images1 = &FormatCount ($total_images1) ;
	2651	+ $total_images1 = "<font color=#900000>" . &FormatCount ($total_images1) . "</font>" ;
	2652	+ $html .= "<tr><th colspan=2 class=l>Total</th><th class=c>$total_mimes1</th><th class=c>$total_images1</th>\n" ;
	2653	+ $columns = 0 ;
	2654	+ foreach $mimetype (@mimetypes_sorted)
	2655	+ {
	2656	+ $columns++ ;
	2657	+
	2658	+ next if $mimetypes_found {$mimetype} < $threshold_mime ;
	2659	+
	2660	+ $count = &FormatCount ($mimetypes {$mimetype}) ;
	2661	+ if ($columns == 1)
	2662	+ { $count = "<font color=#008000>$count</font" ; }
	2663	+ if (($columns >= 2) && ($columns <= 4))
	2664	+ { $count = "<font color=#900000>$count</font" ; }
	2665	+ $html .= "<th class=r>$count</th>\n" ;
	2666	+ }
	2667	+ $html .= "</tr>\n" ;
	2668	+
	2669	+ $html .= "<tr><th colspan=99> </th></tr>\n" ;
	2670	+ $html .= "<tr><td colspan=99 class=l><b>Per project / language subproject</b> (top 50)</td></tr>\n" ;
	2671	+ $total_mimes3 = 0 ;
	2672	+ $total_mimes4 = 0 ;
	2673	+ $cnt_projects = 0 ;
	2674	+ foreach $project (@projects_sorted)
	2675	+ {
	2676	+ last if ++ $cnt_projects > 50 ;
	2677	+
	2678	+ $total = $projects {$project} ;
	2679	+ $total_mimes3 += $total ;
	2680	+
	2681	+ next if $total < $threshold_project ;
	2682	+
	2683	+ $total_mimes4 += $total ;
	2684	+ ($domain,$language) = split ('\:', $project,2) ;
	2685	+ $html .= "<tr><td class=l>" . ucfirst($domain) . "</td><td class=l>$language</td>\n" ;
	2686	+
	2687	+ $total = &FormatCount ($total) ;
	2688	+ $total_images = $images_project {$project} ;
	2689	+ $total_images = "<font color=#900000>" . &FormatCount ($total_images) . "</font>" ;
	2690	+ $html .= "<th class=r>$total</th><th class=r>$total_images</th>\n" ;
	2691	+
	2692	+ $columns = 0 ;
	2693	+ foreach $mimetype (@mimetypes_sorted)
	2694	+ {
	2695	+ $columns++ ;
	2696	+
	2697	+ next if $mimetypes_found {$mimetype} < $threshold_mime ;
	2698	+
	2699	+ $count = &FormatCount ($counts_pm {"$project,$mimetype"}) ;
	2700	+ if ($columns == 1)
	2701	+ { $count = "<font color=#008000>$count</font" ; }
	2702	+ if (($columns >= 2) && ($columns <= 4))
	2703	+ { $count = "<font color=#900000>$count</font" ; }
	2704	+# if ($count eq "")
	2705	+# { $count = " " ; }
	2706	+ $html .= "<td class=r>$count</td>\n" ;
	2707	+ }
	2708	+ $html .= "</tr>\n" ;
	2709	+ $rows++ ;
	2710	+ }
	2711	+ $html .= $header2 . $header1 ;
	2712	+ $html .= "</table>\n" ;
	2713	+ $html .= " <small>$rows rows written</small><p>" ;
	2714	+
	2715	+ if ($total_mimes != $total_mimes3)
	2716	+ {
	2717	+ print ERR "total_mimes $total_mimes != total_mimes3 $total_mimes3\n" ;
	2718	+ print "total_mimes $total_mimes != total_mimes3 $total_mimes3\n" ;
	2719	+ }
	2720	+
	2721	+ if ($threshold_mime > 0)
	2722	+ {
	2723	+ $html .= "<b>Mime types that are found on less than $threshold_mime projects:</b> (again 1 = 1000)<p>" ;
	2724	+ foreach $mimetype (@mimetypes_sorted)
	2725	+ {
	2726	+ next if $mimetypes_found {$mimetype} >= $threshold_mime ;
	2727	+
	2728	+ $count = $mimetypes {$mimetype} ;
	2729	+ $count =~ s/^(\d{1,3})(\d\d\d)$/$1,$2/ ;
	2730	+ $count =~ s/^(\d{1,3})(\d\d\d)(\d\d\d)$/$1,$2,$3/ ;
	2731	+ $html .= "<b>$mimetype</b> $count total<br>" ;
	2732	+ }
	2733	+ }
	2734	+
	2735	+# $html .= "<p><b>Explanation:</b><br>'osd' = opensearchdescription / 'php.ser' = vnd.php.serialized" ;
	2736	+ $html .= $colophon ;
	2737	+
	2738	+ print FILE_HTML_REQUESTS $html ;
	2739	+ close FILE_HTML_REQUESTS ;
	2740	+}
	2741	+
	2742	+sub WriteReportOpSys
	2743	+{
	2744	+ open FILE_HTML_OPSYS, '>', "$dir_reports/$file_html_opsys" ;
	2745	+
	2746	+ $html = $header ;
	2747	+ $html =~ s/TITLE/Wikimedia Traffic Analysis Report - Operating Systems/ ;
	2748	+ $html =~ s/HEADER/Wikimedia Traffic Analysis Report - Operating Systems/ ;
	2749	+ $html =~ s/ALSO/ See also: <b>LINKS<\/b>/ ;
	2750	+ $html =~ s/LINKS/$link_requests $link_origins \/ $link_methods \/ $link_scripts \/ $link_skins \/ $link_crawlers \/ $dummy_opsys \/ $link_browsers \/ $link_google/ ;
	2751	+ $html =~ s/X1000/⇒ <font color=#008000><b>all counts x 1000<\/b><\/font>.<br>/ ;
	2752	+
	2753	+ $total_all2 = &FormatCount ($total_opsys_mobile + $total_opsys_non_mobile) ;
	2754	+ $total_opsys_mobile2 = &FormatCount ($total_opsys_mobile) ;
	2755	+ $total_opsys_non_mobile2 = &FormatCount ($total_opsys_non_mobile) ;
	2756	+ $total_perc_mobile = sprintf ("%.1f", 100 * $total_opsys_mobile / ($total_opsys_mobile + $total_opsys_non_mobile)) ;
	2757	+ $total_perc_non_mobile = 100 - $total_perc_mobile ;
	2758	+ $line_total_all = "<tr><th class=l>Total</th><th class=r>$total_all2</th><th class=r>100\%</th></tr>\n" ;
	2759	+ $line_total_mobile = "<tr><th class=l>Total</th><th class=r>$total_opsys_mobile2</th><th class=r>$total_perc_mobile\%</th></tr>\n" ;
	2760	+ $line_total_non_mobile = "<tr><th class=l>Total</th><th class=r>$total_opsys_non_mobile2</th><th class=r>$total_perc_non_mobile\%</th></tr>\n" ;
	2761	+
	2762	+ $html .= "<table border=1>\n" ;
	2763	+ $html .= "<tr><td class=l colspan=99>The following overview of page requests by operating system is based on the <a href='http://en.wikipedia.org/wiki/User_agent'>user agent</a> information that accompanies most server requests.<br>" .
	2764	+ "Please note that agent information does not follow strict guidelines and some programs may provide wrong information on purpose.<br>" .
	2765	+ "This report ignores all requests where agent information is missing, or contains any of the following: bot, crawl(er) or spider.<p>" .
	2766	+ "<a href='http://en.wikipedia.org/wiki/Windows_NT#Releases'>Wikipedia</a>: NT 5.0 = Windows 2000, NT 5.1/5.2 = XP + Server 2003, NT 6.0 = VISTA + Server 2008, NT 6.1 = Windows 7.<br> " .
	2767	+ "<a href='http://en.wikipedia.org/wiki/Mac_OS_X#Versions'>Wikipedia</a>: OS X 10.4 = Tiger, 10.5 = Leopard, 10.6 = Snow Leopard.<br> " .
	2768	+ "<a href='http://en.wikipedia.org/wiki/Ubuntu#Releases'>Wikipedia</a>: Ubuntu 7.10 = Gutsy Gibbon, 8.04 = Hardy Heron, 8.10 = Intrepid Ibex, 9.04 = Jaunty Jackalope, 9.10 = Karma Koala." .
	2769	+ "</td></tr>\n" ;
	2770	+
	2771	+# $html .= "<tr><th class=l>Count<br><small>x 1000</small></th><th class=l>Secondary domain<br>(~site) name</th><th class=l>Mime type</th><th class=l>User agent</th></tr>\n" ;
	2772	+
	2773	+ $html .= "<tr><td width=50% valign=top>" ;
	2774	+
	2775	+ # OS SORTED BY FREQUENCY
	2776	+ $html .= "<table border=1 width=100%>\n" ;
	2777	+ $html .= "<tr><td colspan=99 class=l><h3>In order of popularity</h3></td></tr>" ;
	2778	+ $html .= "<tr><th class=l>Operating System</th><th class=r>Requests</th><th class=r>Percentage</th></tr>\n" ;
	2779	+ foreach $key (@opsys_sorted_count)
	2780	+ {
	2781	+ $count = $opsys {$key} ;
	2782	+ $perc = $opsys_perc {$key} ;
	2783	+ ($rectype, $os) = split (',', $key,2) ;
	2784	+
	2785	+ next if $rectype ne 'G' ; # group
	2786	+ next if $key =~ / / ; # subgroup
	2787	+
	2788	+ $count = &FormatCount ($count) ;
	2789	+ $html .= "<tr><td class=l>$os</a></td><td class=r>$count</td><td class=r>$perc</td></tr>\n" ;
	2790	+ # $rows++ ;
	2791	+ }
	2792	+ $html .= $line_total_all ;
	2793	+
	2794	+ $html .= "<tr><th class=l colspan=99> <br>Breakdown per platform for Mac and Linux</th></tr>\n" ;
	2795	+ foreach $key (@opsys_sorted_count)
	2796	+ {
	2797	+ $count = $opsys {$key} ;
	2798	+ $perc = $opsys_perc {$key} ;
	2799	+ ($rectype, $os) = split (',', $key,2) ;
	2800	+
	2801	+ next if $rectype ne 'G' ; # group
	2802	+ next if $key !~ / / ; # subgroup
	2803	+
	2804	+ $count = &FormatCount ($count) ;
	2805	+ $html .= "<tr><td class=l>$os</a></td><td class=r>$count</td><td class=r>$perc</td></tr>\n" ;
	2806	+ # $rows++ ;
	2807	+ }
	2808	+
	2809	+ $html .= "<tr><th class=l colspan=99> <br>Breakdown per OS version, non mobile</th></tr>\n" ;
	2810	+ foreach $key (@opsys_sorted_count)
	2811	+ {
	2812	+ $count = $opsys {$key} ;
	2813	+ $perc = $opsys_perc {$key} ;
	2814	+
	2815	+ next if $perc lt "0.02%" ;
	2816	+
	2817	+ ($rectype, $os) = split (',', $key,2) ;
	2818	+
	2819	+ next if $rectype ne '-' ; # group
	2820	+
	2821	+ $count = &FormatCount ($count) ;
	2822	+ $html .= "<tr><td class=l>$os</a></td><td class=r>$count</td><td class=r>$perc</td></tr>\n" ;
	2823	+ # $rows++ ;
	2824	+ }
	2825	+ $html .= $line_total_non_mobile ;
	2826	+
	2827	+ $html .= "<tr><th class=l colspan=99> <br>Breakdown per OS version, mobile</th></tr>\n" ;
	2828	+ foreach $key (@opsys_sorted_count)
	2829	+ {
	2830	+ $count = $opsys {$key} ;
	2831	+ $perc = $opsys_perc {$key} ;
	2832	+
	2833	+ next if $perc lt "0.02%" ;
	2834	+
	2835	+ ($rectype, $os) = split (',', $key,2) ;
	2836	+
	2837	+ next if $rectype ne 'M' ; # group
	2838	+
	2839	+ $count = &FormatCount ($count) ;
	2840	+ $html .= "<tr><td class=l>$os</a></td><td class=r>$count</td><td class=r>$perc</td></tr>\n" ;
	2841	+ # $rows++ ;
	2842	+ }
	2843	+ $html .= $line_total_mobile ;
	2844	+ $html .= "</table>\n" ;
	2845	+
	2846	+ $html .= "</td><td width=50% valign=top>" ;
	2847	+
	2848	+ # IN ALPHABETICAL ORDER
	2849	+ $html .= "<table border=1 width=100%>\n" ;
	2850	+
	2851	+ $html .= "<tr><td colspan=99 class=l><h3>In alphabetical order</h3></td></tr>" ;
	2852	+ $html .= "<tr><th class=l>Operating System</th><th class=r>Requests</th><th class=r>Percentage</th></tr>\n" ;
	2853	+ foreach $key (@opsys_sorted_alpha)
	2854	+ {
	2855	+ $count = $opsys {$key} ;
	2856	+ $perc = $opsys_perc {$key} ;
	2857	+ ($rectype, $os) = split (',', $key,2) ;
	2858	+
	2859	+ next if $rectype ne 'G' ; # group
	2860	+ next if $key =~ / / ; # subgroup
	2861	+
	2862	+ $count = &FormatCount ($count) ;
	2863	+ $html .= "<tr><td class=l>$os</a></td><td class=r>$count</td><td class=r>$perc</td></tr>\n" ;
	2864	+ # $rows++ ;
	2865	+ }
	2866	+ $html .= $line_total_all ;
	2867	+
	2868	+ $html .= "<tr><th class=l colspan=99> <br>Breakdown per platform for Mac and Linux</th></tr>\n" ;
	2869	+ foreach $key (@opsys_sorted_alpha)
	2870	+ {
	2871	+ $count = $opsys {$key} ;
	2872	+ $perc = $opsys_perc {$key} ;
	2873	+ ($rectype, $os) = split (',', $key,2) ;
	2874	+
	2875	+ next if $rectype ne 'G' ; # group
	2876	+ next if $key !~ / / ; # subgroup
	2877	+
	2878	+ $count = &FormatCount ($count) ;
	2879	+ $html .= "<tr><td class=l>$os</a></td><td class=r>$count</td><td class=r>$perc</td></tr>\n" ;
	2880	+ # $rows++ ;
	2881	+ }
	2882	+
	2883	+ $html .= "<tr><th class=l colspan=99> <br>Breakdown per OS version, non mobile</th></tr>\n" ;
	2884	+ foreach $key (@opsys_sorted_alpha)
	2885	+ {
	2886	+ $count = $opsys {$key} ;
	2887	+ $perc = $opsys_perc {$key} ;
	2888	+
	2889	+ next if $perc lt "0.02%" ;
	2890	+
	2891	+ ($rectype, $os) = split (',', $key,2) ;
	2892	+
	2893	+ next if $rectype ne '-' ; # group
	2894	+
	2895	+ $count = &FormatCount ($count) ;
	2896	+ $html .= "<tr><td class=l>$os</a></td><td class=r>$count</td><td class=r>$perc</td></tr>\n" ;
	2897	+ # $rows++ ;
	2898	+ }
	2899	+
	2900	+ $html .= $line_total_non_mobile ;
	2901	+ $html .= "<tr><th class=l colspan=99> <br>Breakdown per OS version, mobile</th></tr>\n" ;
	2902	+ foreach $key (@opsys_sorted_alpha)
	2903	+ {
	2904	+ $count = $opsys {$key} ;
	2905	+ $perc = $opsys_perc {$key} ;
	2906	+
	2907	+ next if $perc lt "0.02%" ;
	2908	+
	2909	+ ($rectype, $os) = split (',', $key,2) ;
	2910	+
	2911	+ next if $rectype ne 'M' ; # group
	2912	+
	2913	+ $count = &FormatCount ($count) ;
	2914	+ $html .= "<tr><td class=l>$os</a></td><td class=r>$count</td><td class=r>$perc</td></tr>\n" ;
	2915	+ # $rows++ ;
	2916	+ }
	2917	+ $html .= $line_total_mobile ;
	2918	+ $html .= "</table>\n" ;
	2919	+ $html .= "</td></tr>" ;
	2920	+
	2921	+ $html .= "<tr><td colspan=99 class=l wrap>Requests from mobile devices are recognized as follows:<br>" .
	2922	+ "Agent string contains any of the following terms (last upd: $month_upd_keywords_mobile):<br>" .
	2923	+ "<i>$keywords_mobile</i></td></tr>" ;
	2924	+
	2925	+ $html .= "</table><p>" ;
	2926	+
	2927	+# $perc_crawlers = sprintf ("%.1f",100 * $total_page_crawlerrequests/$total_page_requests_external) ;
	2928	+# $total_page_requests_external2 = &FormatCount ($total_page_requests_external*1000) ;
	2929	+# $total_page_crawlerrequests2 = &FormatCount ($total_page_crawlerrequests*1000) ;
	2930	+# $html =~ s/PERC_GOOGLE/<p>In total $total_page_crawlerrequests2 page requests (mime type <a href='SquidReportRequests.htm'>text\/html<\/a> only!) per day are considered crawler requests, out of $total_page_requests_external2 external requests, which is $perc_crawlers%/ ;
	2931	+
	2932	+# $total_crawlers = &FormatCount ($total_crawlers) ;
	2933	+
	2934	+# $html .= "<tr><th class=l>$total_crawlers</th><th class=l colspan=2>total</th></tr>\n" ;
	2935	+# $html .= "</table><p>\n" ;
	2936	+
	2937	+# $html .= "<table border=1>\n" ;
	2938	+# $html .= "<tr><th class=l colspan=99>Top 25 secondary domains<br>(~ sites) mentioned</th></tr>\n" ;
	2939	+# foreach $secondary_domain (keys_sorted_by_value_num_desc %secondary_domains)
	2940	+# {
	2941	+# next if $secondary_domain eq ".." ;
	2942	+# last if ++$secondary_domains_listed > 25 ;
	2943	+#
	2944	+# $count = $secondary_domains {$secondary_domain} ;
	2945	+# $count = &FormatCount ($count) ;
	2946	+# $html .= "<tr><td class=r>$count</td><td class=l colspan=2>$secondary_domain</td></tr>\n" ;
	2947	+# }
	2948	+# $html .= "</table>\n" ;
	2949	+
	2950	+ $html .= $colophon ;
	2951	+
	2952	+ print FILE_HTML_OPSYS $html ;
	2953	+ close FILE_HTML_OPSYS ;
	2954	+}
	2955	+
	2956	+# http://en.wikipedia.org/wiki/Domain_name
	2957	+sub WriteReportOrigins
	2958	+{
	2959	+ open FILE_HTML_ORIGINS, '>', "$dir_reports/$file_html_origins" ;
	2960	+
	2961	+ $html = $header ;
	2962	+ $html =~ s/TITLE/Wikimedia Traffic Analysis Report - Requests by origin/ ;
	2963	+ $html =~ s/HEADER/Wikimedia Traffic Analysis Report - Requests by origin/ ;
	2964	+ $html =~ s/ALSO/ See also: <b>LINKS<\/b>/ ;
	2965	+ $html =~ s/LINKS/$link_requests $dummy_origins \/ $link_methods \/ $link_scripts \/ $link_skins \/ $link_crawlers \/ $link_opsys \/ $link_browsers \/ $link_google/ ;
	2966	+ $html =~ s/NOTES/<br> This report shows where requests come from. Report 'Requests by destination' shows where requests are serviced.<br> Those numbers bear no direct relation.<br>/ ;
	2967	+
	2968	+ $html .= "<table border=1>\n" ;
	2969	+ $html .= "<tr><td colspan=99>" ;
	2970	+
	2971	+
	2972	+ $html .= "<table border=0 width=100%>\n" ;
	2973	+# $html .= "<tr><td colspan=99 class=c>traffic from yahoo is allocated as if yahoo used same domain naming scheme as google: <b>search.yahoo.ca</b> instead of <b>ca.search.yahoo.com</b></td></tr>\n" ;
	2974	+# $html .= "<tr><td colspan=99 class=c><small>All counts x 1000</small></td></tr>\n" ;
	2975	+
	2976	+ # INTERNAL ORIGINS
	2977	+
	2978	+ $html .= "<tr><td colspan=99 class=c><h3>Requests with internal origins</h3></td></tr>\n" ;
	2979	+ $html .= "<table border=1 width=100%>\n" ;
	2980	+
	2981	+ $html .= "<tr><td width=50% valign=top>" ;
	2982	+ $html .= "<table border=1 width=100%>\n" ;
	2983	+ $html .= "<tr><td colspan=2 class=l><b>Internal origins<br>sorted by<br>frequency</b></td><th class=r> Total</th><th class=r>Pages</th><th class=r>Images</th><th class=r>Other</th></tr>\n" ;
	2984	+
	2985	+ $total_total = 0 ;
	2986	+ $total_page = 0 ;
	2987	+ $total_image = 0 ;
	2988	+ $total_rest = 0 ;
	2989	+ foreach $project (@project_int_top_sorted_count)
	2990	+ {
	2991	+ $total = $project_int_top {$project} ;
	2992	+ $page = $project_int_top_split {"page:$project"} ;
	2993	+ $image = $project_int_top_split {"image:$project"} ;
	2994	+ $rest = $project_int_top_split {"other:$project"} ;
	2995	+ $total_total += $total ;
	2996	+ $total_page += $page ;
	2997	+ $total_image += $image ;
	2998	+ $total_rest += $rest ;
	2999	+ $total = &FormatCount ($total) ;
	3000	+ $page = &FormatCount ($page) ;
	3001	+ $image = &FormatCount ($image) ;
	3002	+ $rest = &FormatCount ($rest) ;
	3003	+ $html .= "<tr><td colspan=2 class=l>" . ucfirst($project) . "</td><th class=r>$total</th><td class=r>$page</td><td class=r>$image</td><td class=r>$rest</td></tr>\n" ;
	3004	+ }
	3005	+ $total_total = &FormatCount ($total_total) ;
	3006	+ $total_page = &FormatCount ($total_page) ;
	3007	+ $total_image = &FormatCount ($total_image) ;
	3008	+ $total_rest = &FormatCount ($total_rest) ;
	3009	+ $html .= "<tr><th colspan=2 class=l>Total</th><th class=r>$total_total</th><td class=r>$total_page</td><td class=r>$total_image</td><td class=r>$total_rest</td></tr>\n" ;
	3010	+
	3011	+ $html .= "<tr><td colspan=99> </td></tr>\n" ;
	3012	+ $html .= "<tr><td colspan=99 class=l><b>Per project language / subproject</b> (top 50)</td></tr>\n" ;
	3013	+ $projects = 0 ;
	3014	+ $total_total = 0 ;
	3015	+ $total_page = 0 ;
	3016	+ $total_image = 0 ;
	3017	+ $total_rest = 0 ;
	3018	+ foreach $origin (@origin_int_top_sorted_count)
	3019	+ {
	3020	+ if (++$projects > 50)
	3021	+ {
	3022	+ $origin_int_top_other {"all"} += $origin_int_top {$origin} ; ;
	3023	+ $origin_int_top_other {"page"} += $origin_int_top_split {"page:$origin"} ;
	3024	+ $origin_int_top_other {"image"} += $origin_int_top_split {"image:$origin"} ;
	3025	+ $origin_int_top_other {"other"} += $origin_int_top_split {"other:$origin"} ;
	3026	+ next ;
	3027	+ }
	3028	+ $top100_internal_origins {$origin} ++ ;
	3029	+ $total = $origin_int_top {$origin} ;
	3030	+ $page = $origin_int_top_split {"page:$origin"} ;
	3031	+ $image = $origin_int_top_split {"image:$origin"} ;
	3032	+ $rest = $origin_int_top_split {"other:$origin"} ;
	3033	+ $total_total += $total ;
	3034	+ $total_page += $page ;
	3035	+ $total_image += $image ;
	3036	+ $total_rest += $rest ;
	3037	+ $total = &FormatCount ($total) ;
	3038	+ $page = &FormatCount ($page) ;
	3039	+ $image = &FormatCount ($image) ;
	3040	+ $rest = &FormatCount ($rest) ;
	3041	+ ($project,$subproject) = split (':', $origin) ;
	3042	+ $html .= "<tr><td class=l>" . ucfirst($project) . "</td><td class=l>$subproject</td><th class=r>$total</th><td class=r>$page</td><td class=r>$image</td><td class=r>$rest</td></tr>\n" ;
	3043	+
	3044	+ }
	3045	+ $total = $origin_int_top_other {"all"} ;
	3046	+ $page = $origin_int_top_other {"page"} ;
	3047	+ $image = $origin_int_top_other {"image"} ;
	3048	+ $rest = $origin_int_top_other {"other"} ;
	3049	+ $total_total += $total ;
	3050	+ $total_page += $page ;
	3051	+ $total_image += $image ;
	3052	+ $total_rest += $rest ;
	3053	+ $total = &FormatCount ($total) ;
	3054	+ $page = &FormatCount ($page) ;
	3055	+ $image = &FormatCount ($image) ;
	3056	+ $rest = &FormatCount ($rest) ;
	3057	+ $html .= "<tr><td colspan=2 class=l>Other</td><th class=r>$total</th><td class=r>$page</td><td class=r>$image</td><td class=r>$rest</td></tr>\n" ;
	3058	+ $grand_grand_total = $total_total ;
	3059	+ $total_total = &FormatCount ($total_total) ;
	3060	+ $total_page = &FormatCount ($total_page) ;
	3061	+ $total_image = &FormatCount ($total_image) ;
	3062	+ $total_rest = &FormatCount ($total_rest) ;
	3063	+ $html .= "<tr><th colspan=2 class=l>Total</th><th class=r>$total_total</th><td class=r>$total_page</td><td class=r>$total_image</td><td class=r>$total_rest</td></tr>\n" ;
	3064	+ $html .= "</table>" ;
	3065	+
	3066	+ # BY ALPHABET
	3067	+ $html .= "</td><td width=50% valign=top>" ;
	3068	+
	3069	+ $html .= "<table border=1 width=100%>\n" ;
	3070	+ $html .= "<tr><td colspan=2 class=l><b>Internal origins<br>sorted by<br>alphabet</b></td><th class=r> Total</th><th class=r>Pages</th><th class=r>Images</th><th class=r>Other</th></tr>\n" ;
	3071	+
	3072	+ $total_total = 0 ;
	3073	+ $total_page = 0 ;
	3074	+ $total_image = 0 ;
	3075	+ $total_rest = 0 ;
	3076	+ foreach $project (@project_int_top_sorted_alpha)
	3077	+ {
	3078	+ $total = $project_int_top {$project} ;
	3079	+ $page = $project_int_top_split {"page:$project"} ;
	3080	+ $image = $project_int_top_split {"image:$project"} ;
	3081	+ $rest = $project_int_top_split {"other:$project"} ;
	3082	+ $total_total += $total ;
	3083	+ $total_page += $page ;
	3084	+ $total_image += $image ;
	3085	+ $total_rest += $rest ;
	3086	+ $total = &FormatCount ($total) ;
	3087	+ $page = &FormatCount ($page) ;
	3088	+ $image = &FormatCount ($image) ;
	3089	+ $rest = &FormatCount ($rest) ;
	3090	+ $html .= "<tr><td colspan=2 class=l>$project</td><th class=r>$total</th><td class=r>$page</td><td class=r>$image</td><td class=r>$rest</td></tr>\n" ;
	3091	+ }
	3092	+ $total_total = &FormatCount ($total_total) ;
	3093	+ $total_page = &FormatCount ($total_page) ;
	3094	+ $total_image = &FormatCount ($total_image) ;
	3095	+ $total_rest = &FormatCount ($total_rest) ;
	3096	+ $html .= "<tr><th colspan=2 class=l>total</th><th class=r>$total_total</th><td class=r>$total_page</td><td class=r>$total_image</td><td class=r>$total_rest</td></tr>\n" ;
	3097	+
	3098	+ $html .= "<tr><td colspan=99> </td></tr>\n" ;
	3099	+ $html .= "<tr><td colspan=99 class=l><b>Per project language / subproject</b> (top 50)</td></tr>\n" ;
	3100	+ $projects = 0 ;
	3101	+ $total_total = 0 ;
	3102	+ $total_page = 0 ;
	3103	+ $total_image = 0 ;
	3104	+ $total_rest = 0 ;
	3105	+ foreach $origin (@origin_int_top_sorted_alpha)
	3106	+ {
	3107	+ next if $top100_internal_origins {$origin} == 0 ;
	3108	+
	3109	+ $total = $origin_int_top {$origin} ;
	3110	+ $page = $origin_int_top_split {"page:$origin"} ;
	3111	+ $image = $origin_int_top_split {"image:$origin"} ;
	3112	+ $rest = $origin_int_top_split {"other:$origin"} ;
	3113	+ $total_total += $total ;
	3114	+ $total_page += $page ;
	3115	+ $total_image += $image ;
	3116	+ $total_rest += $rest ;
	3117	+ $total = &FormatCount ($total) ;
	3118	+ $page = &FormatCount ($page) ;
	3119	+ $image = &FormatCount ($image) ;
	3120	+ $rest = &FormatCount ($rest) ;
	3121	+ ($project,$subproject) = split (':', $origin) ;
	3122	+ $html .= "<tr><td class=l>$project</td><td class=l>$subproject</td><th class=r>$total</th><td class=r>$page</td><td class=r>$image</td><td class=r>$rest</td></tr>\n" ;
	3123	+
	3124	+ }
	3125	+ $total = $origin_int_top_other {"all"} ;
	3126	+ $page = $origin_int_top_other {"page"} ;
	3127	+ $image = $origin_int_top_other {"image"} ;
	3128	+ $rest = $origin_int_top_other {"other"} ;
	3129	+ $total_total += $total ;
	3130	+ $total_page += $page ;
	3131	+ $total_image += $image ;
	3132	+ $total_rest += $rest ;
	3133	+ $total = &FormatCount ($total) ;
	3134	+ $page = &FormatCount ($page) ;
	3135	+ $image = &FormatCount ($image) ;
	3136	+ $rest = &FormatCount ($rest) ;
	3137	+ $html .= "<tr><td colspan=2 class=l>other</td><th class=r>$total</th><td class=r>$page</td><td class=r>$image</td><td class=r>$rest</td></tr>\n" ;
	3138	+ $total_total = &FormatCount ($total_total) ;
	3139	+ $total_page = &FormatCount ($total_page) ;
	3140	+ $total_image = &FormatCount ($total_image) ;
	3141	+ $total_rest = &FormatCount ($total_rest) ;
	3142	+ $html .= "<tr><th colspan=2 class=l>total</th><th class=r>$total_total</th><td class=r>$total_page</td><td class=r>$total_image</td><td class=r>$total_rest</td></tr>\n" ;
	3143	+ $html .= "</table>" ;
	3144	+
	3145	+ $html .= "</td></tr>" ;
	3146	+ $html .= "</table>" ;
	3147	+
	3148	+ # REQUESTS WITH EXTERNAL ORIGINS
	3149	+
	3150	+ $html .= "<table border=1 width=100%>\n" ;
	3151	+ $html .= "<tr><td colspan=99 class=c> </td></tr>\n" ;
	3152	+ $html .= "<tr><td colspan=99 class=c><h3>Requests with external origins</h3></td></tr>\n" ;
	3153	+ $html .= "<table border=1 width=100%>\n" ;
	3154	+
	3155	+ $html .= "<tr><td width=50% valign=top>" ;
	3156	+ $html .= "<table border=1 width=100%>\n" ;
	3157	+# $html .= "<tr><td class=l><b><a href='http://..'>External origins</a><br>sorted by<br>frequency</b><br>top 100</td><th class=r> Total</th><th class=r>Pages</th><th class=r>Images</th><th class=r>Other</th></tr>\n" ;
	3158	+ $html .= "<tr><td class=l><b>External origins<br>sorted by<br>frequency</b><br>top 100</td><th class=r> Total</th><th class=r>Pages</th><th class=r>Images</th><th class=r>Other</th></tr>\n" ;
	3159	+
	3160	+ $projects = 0 ;
	3161	+ $total_total = 0 ;
	3162	+ $total_page = 0 ;
	3163	+ $total_image = 0 ;
	3164	+ $total_rest = 0 ;
	3165	+ foreach $origin (@origin_ext_top_sorted_count)
	3166	+ {
	3167	+ $total = $origin_ext_top {$origin} ;
	3168	+ $page = $origin_ext_top_split {"page:$origin"} ;
	3169	+ $image = $origin_ext_top_split {"image:$origin"} ;
	3170	+ $rest = $origin_ext_top_split {"other:$origin"} ;
	3171	+ $total_total += $total ;
	3172	+ $total_page += $page ;
	3173	+ $total_image += $image ;
	3174	+ $total_rest += $rest ;
	3175	+ $total = &FormatCount ($total) ;
	3176	+ $page = &FormatCount ($page) ;
	3177	+ $image = &FormatCount ($image) ;
	3178	+ $rest = &FormatCount ($rest) ;
	3179	+
	3180	+ if (++$projects > 100)
	3181	+ {
	3182	+ $origin_ext_top_other {"all"} += $origin_ext_top {$origin} ; ;
	3183	+ $origin_ext_top_other {"page"} += $origin_ext_top_split {"page:$origin"} ;
	3184	+ $origin_ext_top_other {"image"} += $origin_ext_top_split {"image:$origin"} ;
	3185	+ $origin_ext_top_other {"other"} += $origin_ext_top_split {"other:$origin"} ;
	3186	+ next ;
	3187	+ }
	3188	+ $top100_internal_origins {$origin} ++ ;
	3189	+
	3190	+ if ($origin =~ /\./)
	3191	+ { $link_origin = "<a href='http://$origin' ref='nofollow'>$origin</a>" ; }
	3192	+ else
	3193	+ { $link_origin = $origin ; }
	3194	+ $html .= "<tr><td class=l>$link_origin</td><th class=r>$total</th><td class=r>$page</td><td class=r>$image</td><td class=r>$rest</td></tr>\n" ;
	3195	+ }
	3196	+ $total = $origin_ext_top_other {"all"} ;
	3197	+ $page = $origin_ext_top_other {"page"} ;
	3198	+ $image = $origin_ext_top_other {"image"} ;
	3199	+ $rest = $origin_ext_top_other {"other"} ;
	3200	+ $total = &FormatCount ($total) ;
	3201	+ $page = &FormatCount ($page) ;
	3202	+ $image = &FormatCount ($image) ;
	3203	+ $rest = &FormatCount ($rest) ;
	3204	+ $html .= "<tr><td class=l>other</td><th class=r>$total</th><td class=r>$page</td><td class=r>$image</td><td class=r>$rest</td></tr>\n" ;
	3205	+ $grand_grand_total = $total_total ;
	3206	+ $total_total = &FormatCount ($total_total) ;
	3207	+ $total_page = &FormatCount ($total_page) ;
	3208	+ $total_image = &FormatCount ($total_image) ;
	3209	+ $total_rest = &FormatCount ($total_rest) ;
	3210	+ $html .= "<tr><th class=l>total</th><th class=r>$total_total</th><td class=r>$total_page</td><td class=r>$total_image</td><td class=r>$total_rest</td></tr>\n" ;
	3211	+ $html .= "</table>" ;
	3212	+
	3213	+ # BY ALPHABET
	3214	+ $html .= "</td><td width=50% valign=top>" ;
	3215	+
	3216	+ $html .= "<table border=1 width=100%>\n" ;
	3217	+# $html .= "<tr><td class=l><b><a href='http://..'>External origins</a><br>sorted by<br>alphabet</b><br>top 100</td><th class=r> Total</th><th class=r>Pages</th><th class=r>Images</th><th class=r>Other</th></tr>\n" ;
	3218	+ $html .= "<tr><td class=l><b>External origins<br>sorted by<br>alphabet</b><br>top 100</td><th class=r> Total</th><th class=r>Pages</th><th class=r>Images</th><th class=r>Other</th></tr>\n" ;
	3219	+
	3220	+ $projects = 0 ;
	3221	+ $total_total = 0 ;
	3222	+ $total_page = 0 ;
	3223	+ $total_image = 0 ;
	3224	+ $total_rest = 0 ;
	3225	+ foreach $origin (@origin_ext_top_sorted_alpha)
	3226	+ {
	3227	+
	3228	+ $total = $origin_ext_top {$origin} ;
	3229	+ $page = $origin_ext_top_split {"page:$origin"} ;
	3230	+ $image = $origin_ext_top_split {"image:$origin"} ;
	3231	+ $rest = $origin_ext_top_split {"other:$origin"} ;
	3232	+ $total_total += $total ;
	3233	+ $total_page += $page ;
	3234	+ $total_image += $image ;
	3235	+ $total_rest += $rest ;
	3236	+ $total = &FormatCount ($total) ;
	3237	+ $page = &FormatCount ($page) ;
	3238	+ $image = &FormatCount ($image) ;
	3239	+ $rest = &FormatCount ($rest) ;
	3240	+
	3241	+ next if $top100_internal_origins {$origin} == 0 ;
	3242	+
	3243	+ $html .= "<tr><td class=l>$origin</td><th class=r>$total</th><td class=r>$page</td><td class=r>$image</td><td class=r>$rest</td></tr>\n" ;
	3244	+
	3245	+ }
	3246	+ $total = $origin_ext_top_other {"all"} ;
	3247	+ $page = $origin_ext_top_other {"page"} ;
	3248	+ $image = $origin_ext_top_other {"image"} ;
	3249	+ $rest = $origin_ext_top_other {"other"} ;
	3250	+ $total = &FormatCount ($total) ;
	3251	+ $page = &FormatCount ($page) ;
	3252	+ $image = &FormatCount ($image) ;
	3253	+ $rest = &FormatCount ($rest) ;
	3254	+ $html .= "<tr><td class=l>other</td><th class=r>$total</th><td class=r>$page</td><td class=r>$image</td><td class=r>$rest</td></tr>\n" ;
	3255	+ $total_total = &FormatCount ($total_total) ;
	3256	+ $total_page = &FormatCount ($total_page) ;
	3257	+ $total_image = &FormatCount ($total_image) ;
	3258	+ $total_rest = &FormatCount ($total_rest) ;
	3259	+ $html .= "<tr><th class=l>total</th><th class=r>$total_total</th><td class=r>$total_page</td><td class=r>$total_image</td><td class=r>$total_rest</td></tr>\n" ;
	3260	+ $html .= "</table>" ;
	3261	+
	3262	+ $html .= "</td></tr>" ;
	3263	+# $html .= "<tr><td colspan=99 class=c>For presentation conciseness the top level domain (.org, .com, ..) is ignored here. There is a theoretical<br> possibility that figures for two unrelated sites which are both popular are presented as one here.<p>" .
	3264	+# "'Unmatched ip address': all requests without explicit referer url that were not allocated <br>to a site based on known ip range, e.g. google (by ip) or agent string, e.g. google (by agent)</td></tr>" ;
	3265	+ $html .= "<tr><td colspan=99 class=c>'Origin unknown': all requests without explicit referer url, without known ip range and without identity clue in the agent string.<br>Note that right now only ip ranges for Google and Yahoo are recognized by the script (manual input Feb 2009)</td></tr>" ;
	3266	+ $html .= "</table>" ;
	3267	+
	3268	+ # EXTERNAL ORIGINS
	3269	+if (0)
	3270	+{
	3271	+ $html .= "<tr><td colspan=99 class=c> </td></tr>\n" ;
	3272	+ $html .= "<tr><td colspan=99 class=c><h3>External origins</h3></td></tr>\n" ;
	3273	+ $html .= "<tr><td width=50% valign=top>" ;
	3274	+
	3275	+
	3276	+ $html .= "<table border=1 width=100%>\n" ;
	3277	+ $html .= "<tr><td class=l><b><a href='http://en.wikipedia.org/wiki/Top-level_domain'>Top level domains</a> (tld)<br>sorted by<br>frequency</b></td><th class=r> Total</th><th class=r>Google</th><th class=r>Yahoo</th><th class=r>Other</th></tr>\n" ;
	3278	+ $html .= "<tr><td colspan=99 class=l> <br><b><a href='http://en.wikipedia.org/wiki/Generic_top-level_domain'>Generic</a> and <a href='http://en.wikipedia.org/wiki/Sponsored_top-level_domains'>Sponsored</a> tld's</a></b></td></tr>\n" ;
	3279	+ foreach $toplevel (@origin_ext_page_top_sorted_count)
	3280	+ {
	3281	+ next if (length ($toplevel) <= 2) \|\| ($toplevel =~ /^(?:address\|local\|rest\|unspecified)$/) ;
	3282	+
	3283	+ $total = $origin_ext_page_top {$toplevel} ;
	3284	+ $google = $origin_ext_page_top_split {"google:$toplevel"} ;
	3285	+ $yahoo = $origin_ext_page_top_split {"yahoo:$toplevel"} ;
	3286	+ $rest = $origin_ext_page_top_split {"other:$toplevel"} ;
	3287	+ $total_total += $total ;
	3288	+ $total_google += $google ;
	3289	+ $total_yahoo += $yahoo ;
	3290	+ $total_rest += $rest ;
	3291	+ $total = &FormatCount ($total) ;
	3292	+ $google = &FormatCount ($google) ;
	3293	+ $yahoo = &FormatCount ($yahoo) ;
	3294	+ $rest = &FormatCount ($rest) ;
	3295	+ $html .= "<tr><td class=l>$toplevel</td><th class=r>$total</th><td class=r>$google</td><td class=r>$yahoo</td><td class=r>$rest</td></tr>\n" ;
	3296	+ }
	3297	+ $grand_total += $total_total ;
	3298	+ $grand_google += $total_google ;
	3299	+ $grand_yahoo += $total_yahoo ;
	3300	+ $grand_rest += $total_rest ;
	3301	+ $total_total = &FormatCount ($total_total) ;
	3302	+ $total_google = &FormatCount ($total_google) ;
	3303	+ $total_yahoo = &FormatCount ($total_yahoo) ;
	3304	+ $total_rest = &FormatCount ($total_rest) ;
	3305	+ $html .= "<tr><th class=l>total</th><th class=r>$total_total</th><td class=r>$total_google</td><td class=r>$total_yahoo</td><td class=r>$total_rest</td></tr>\n" ;
	3306	+
	3307	+ $total_total = 0 ;
	3308	+ $total_google = 0 ;
	3309	+ $total_yahoo = 0 ;
	3310	+ $total_rest = 0 ;
	3311	+ $html .= "<tr><td colspan=99 class=l> <br><b><a href='http://en.wikipedia.org/wiki/Country_code_top-level_domain'>Country code tld's</a></b></td></tr>\n" ;
	3312	+ foreach $toplevel (@origin_ext_page_top_sorted_count)
	3313	+ {
	3314	+ next if length ($toplevel) != 2 ;
	3315	+
	3316	+ $total = $origin_ext_page_top {$toplevel} ;
	3317	+ $google = $origin_ext_page_top_split {"google:$toplevel"} ;
	3318	+ $yahoo = $origin_ext_page_top_split {"yahoo:$toplevel"} ;
	3319	+ $rest = $origin_ext_page_top_split {"other:$toplevel"} ;
	3320	+ $total_total += $total ;
	3321	+ $total_google += $google ;
	3322	+ $total_yahoo += $yahoo ;
	3323	+ $total_rest += $rest ;
	3324	+ $total = &FormatCount ($total) ;
	3325	+ $google = &FormatCount ($google) ;
	3326	+ $yahoo = &FormatCount ($yahoo) ;
	3327	+ $rest = &FormatCount ($rest) ;
	3328	+ $html .= "<tr><td class=l>$toplevel</td><th class=r>$total</th><td class=r>$google</td><td class=r>$yahoo</td><td class=r>$rest</td></tr>\n" ;
	3329	+ }
	3330	+ $grand_total += $total_total ;
	3331	+ $grand_google += $total_google ;
	3332	+ $grand_yahoo += $total_yahoo ;
	3333	+ $grand_rest += $total_rest ;
	3334	+ $total_total = &FormatCount ($total_total) ;
	3335	+ $total_google = &FormatCount ($total_google) ;
	3336	+ $total_yahoo = &FormatCount ($total_yahoo) ;
	3337	+ $total_rest = &FormatCount ($total_rest) ;
	3338	+ $html .= "<tr><th class=l>total</th><th class=r>$total_total</th><td class=r>$total_google</td><td class=r>$total_yahoo</td><td class=r>$total_rest</td></tr>\n" ;
	3339	+
	3340	+ $total_total = 0 ;
	3341	+ $total_google = 0 ;
	3342	+ $total_yahoo = 0 ;
	3343	+ $total_rest = 0 ;
	3344	+ $html .= "<tr><td colspan=99 class=l> <br><b>Remainder</th></tr>\n" ;
	3345	+ $total = $origin_ext_page_top {"local"} ;
	3346	+ $google = $origin_ext_page_top_split {"google:local"} ; # always zero
	3347	+ $yahoo = $origin_ext_page_top_split {"yahoo:local"} ; # always zero
	3348	+ $rest = $origin_ext_page_top_split {"other:local"} ;
	3349	+ $total_total += $total ;
	3350	+ $total_google += $google ;
	3351	+ $total_yahoo += $yahoo ;
	3352	+ $total_rest += $rest ;
	3353	+ $total = &FormatCount ($total) ;
	3354	+ $google = &FormatCount ($google) ;
	3355	+ $yahoo = &FormatCount ($yahoo) ;
	3356	+ $rest = &FormatCount ($rest) ;
	3357	+ $html .= "<tr><td class=l>localhost</td><th class=r>$total</th><td class=r>$google</td><td class=r>$yahoo</td><td class=r>$rest</td></tr>\n" ;
	3358	+
	3359	+ $total = $origin_ext_page_top {"address"} ;
	3360	+ $google = $origin_ext_page_top_split {"google:address"} ;
	3361	+ $yahoo = $origin_ext_page_top_split {"yahoo:address"} ;
	3362	+ $rest = $origin_ext_page_top_split {"other:address"} ;
	3363	+ $total_total += $total ;
	3364	+ $total_google += $google ;
	3365	+ $total_yahoo += $yahoo ;
	3366	+ $total_rest += $rest ;
	3367	+ $total = &FormatCount ($total) ;
	3368	+ $google = &FormatCount ($google) ;
	3369	+ $yahoo = &FormatCount ($yahoo) ;
	3370	+ $rest = &FormatCount ($rest) ;
	3371	+ $html .= "<tr><td class=l>ip address</td><th class=r>$total</th><td class=r>$google</td><td class=r>$yahoo</td><td class=r>$rest</td></tr>\n" ;
	3372	+
	3373	+ $total = $origin_ext_page_top {"rest"} ;
	3374	+ $google = $origin_ext_page_top_split {"google:rest"} ;
	3375	+ $yahoo = $origin_ext_page_top_split {"yahoo:rest"} ;
	3376	+ $rest = $origin_ext_page_top_split {"other:rest"} ;
	3377	+ $total_total += $total ;
	3378	+ $total_google += $google ;
	3379	+ $total_yahoo += $yahoo ;
	3380	+ $total_rest += $rest ;
	3381	+ $total = &FormatCount ($total) ;
	3382	+ $google = &FormatCount ($google) ;
	3383	+ $yahoo = &FormatCount ($yahoo) ;
	3384	+ $rest = &FormatCount ($rest) ;
	3385	+ $html .= "<tr><td class=l>other</td><th class=r>$total</th><td class=r>$google</td><td class=r>$yahoo</td><td class=r>$rest</td></tr>\n" ;
	3386	+
	3387	+ $total = $origin_ext_page_top {"unspecified"} ;
	3388	+ $google = $origin_ext_page_top_split {"google:unspecified"} ;
	3389	+ $yahoo = $origin_ext_page_top_split {"yahoo:unspecified"} ;
	3390	+ $rest = $origin_ext_page_top_split {"other:unspecified"} ;
	3391	+ $total_total += $total ;
	3392	+ $total_google += $google ;
	3393	+ $total_yahoo += $yahoo ;
	3394	+ $total_rest += $rest ;
	3395	+ $total = &FormatCount ($total) ;
	3396	+ $google = &FormatCount ($google) ;
	3397	+ $yahoo = &FormatCount ($yahoo) ;
	3398	+ $rest = &FormatCount ($rest) ;
	3399	+ $html .= "<tr><td class=l>anonymous</td><th class=r>$total</th><td class=r>$google</td><td class=r>$yahoo</td><td class=r>$rest</td></tr>\n" ;
	3400	+
	3401	+ $grand_total += $total_total ;
	3402	+ $grand_google += $total_google ;
	3403	+ $grand_yahoo += $total_yahoo ;
	3404	+ $grand_rest += $total_rest ;
	3405	+ $total_total = &FormatCount ($total_total) ;
	3406	+ $total_google = &FormatCount ($total_google) ;
	3407	+ $total_yahoo = &FormatCount ($total_yahoo) ;
	3408	+ $total_rest = &FormatCount ($total_rest) ;
	3409	+ $html .= "<tr><th class=l>total</th><th class=r>$total_total</th><td class=r>$total_google</td><td class=r>$total_yahoo</td><td class=r>$total_rest</td></tr>\n" ;
	3410	+
	3411	+ $html .= "<tr><td colspan=99 class=l> <br><b>Grand total external</th></tr>\n" ;
	3412	+ $grand_total = &FormatCount ($grand_total) ;
	3413	+ $grand_google = &FormatCount ($grand_google) ;
	3414	+ $grand_yahoo = &FormatCount ($grand_yahoo) ;
	3415	+ $grand_rest = &FormatCount ($grand_rest) ;
	3416	+ $html .= "<tr><th class=l>total</th><th class=r>$grand_total</th><td class=r>$grand_google</td><td class=r>$grand_yahoo</td><td class=r>$grand_rest</td></tr>\n" ;
	3417	+ $html .= "</table>" ;
	3418	+
	3419	+ $html .= "</td><td width=50% valign=top>" ;
	3420	+
	3421	+ $html .= "<table border=1 width=100%>\n" ;
	3422	+
	3423	+ $html .= "<tr><th class=l>Top level domains<br>sorted by<br>alphabet</th><th class=r>Total<th class=r>Google<th class=r>Yahoo<th class=r>Other</th></tr>\n" ;
	3424	+# $html .= "<tr><th colspan=99 class=l> <br><b><a href='http://en.wikipedia.org/wiki/Top-level_domain'>generic/sponsored tld's</a></b></th></tr>\n" ;
	3425	+ $total_total = 0 ;
	3426	+ $total_google = 0 ;
	3427	+ $total_yahoo = 0 ;
	3428	+ $total_rest = 0 ;
	3429	+ $html .= "<tr><td colspan=99 class=l> <br><b>Generic and sponsored tld's</b></td></tr>\n" ;
	3430	+
	3431	+ foreach $toplevel (@origin_ext_page_top_sorted_alpha)
	3432	+ {
	3433	+ next if (length ($toplevel) <= 2) \|\| ($toplevel =~ /^(?:address\|local\|rest\|unspecified)$/) ;
	3434	+
	3435	+ $total = $origin_ext_page_top {$toplevel} ;
	3436	+ $google = $origin_ext_page_top_split {"google:$toplevel"} ;
	3437	+ $yahoo = $origin_ext_page_top_split {"yahoo:$toplevel"} ;
	3438	+ $rest = $origin_ext_page_top_split {"other:$toplevel"} ;
	3439	+ $total_total += $total ;
	3440	+ $total_google += $google ;
	3441	+ $total_yahoo += $yahoo ;
	3442	+ $total_rest += $rest ;
	3443	+ $total = &FormatCount ($total) ;
	3444	+ $google = &FormatCount ($google) ;
	3445	+ $yahoo = &FormatCount ($yahoo) ;
	3446	+ $rest = &FormatCount ($rest) ;
	3447	+ $html .= "<tr><td class=l>$toplevel</td><th class=r>$total</th><td class=r>$google</td><td class=r>$yahoo</td><td class=r>$rest</td></tr>\n" ;
	3448	+ }
	3449	+ $total_total = &FormatCount ($total_total) ;
	3450	+ $total_google = &FormatCount ($total_google) ;
	3451	+ $total_yahoo = &FormatCount ($total_yahoo) ;
	3452	+ $total_rest = &FormatCount ($total_rest) ;
	3453	+ $html .= "<tr><th class=l>total</th><th class=r>$total_total</th><td class=r>$total_google</td><td class=r>$total_yahoo</td><td class=r>$total_rest</td></tr>\n" ;
	3454	+
	3455	+ $total_total = 0 ;
	3456	+ $total_google = 0 ;
	3457	+ $total_yahoo = 0 ;
	3458	+ $total_rest = 0 ;
	3459	+ $html .= "<tr><td colspan=99 class=l> <br><b><a href='http://en.wikipedia.org/wiki/Country_code_top-level_domain'>Country code tld's</a></b></td></tr>\n" ;
	3460	+ foreach $toplevel (@origin_ext_page_top_sorted_alpha)
	3461	+ {
	3462	+ next if length ($toplevel) != 2 ;
	3463	+
	3464	+ $total = $origin_ext_page_top {$toplevel} ;
	3465	+ $google = $origin_ext_page_top_split {"google:$toplevel"} ;
	3466	+ $yahoo = $origin_ext_page_top_split {"yahoo:$toplevel"} ;
	3467	+ $rest = $origin_ext_page_top_split {"other:$toplevel"} ;
	3468	+ $total_total += $total ;
	3469	+ $total_google += $google ;
	3470	+ $total_yahoo += $yahoo ;
	3471	+ $total_rest += $rest ;
	3472	+ $total = &FormatCount ($total) ;
	3473	+ $google = &FormatCount ($google) ;
	3474	+ $yahoo = &FormatCount ($yahoo) ;
	3475	+ $rest = &FormatCount ($rest) ;
	3476	+ $html .= "<tr><td class=l>$toplevel</td><th class=r>$total</th><td class=r>$google</td><td class=r>$yahoo</td><td class=r>$rest</td></tr>\n" ;
	3477	+ }
	3478	+ $total_total = &FormatCount ($total_total) ;
	3479	+ $total_google = &FormatCount ($total_google) ;
	3480	+ $total_yahoo = &FormatCount ($total_yahoo) ;
	3481	+ $total_rest = &FormatCount ($total_rest) ;
	3482	+ $html .= "<tr><th class=l>total</th><th class=r>$total_total</th><td class=r>$total_google</td><td class=r>$total_yahoo</td><td class=r>$total_rest</td></tr>\n" ;
	3483	+
	3484	+ $total_total = 0 ;
	3485	+ $total_google = 0 ;
	3486	+ $total_yahoo = 0 ;
	3487	+ $total_rest = 0 ;
	3488	+ $html .= "<tr><td colspan=99 class=l> <br><b>Remainder</th></tr>\n" ;
	3489	+ $total = $origin_ext_page_top {"local"} ;
	3490	+ $google = $origin_ext_page_top_split {"google:local"} ; # always zero
	3491	+ $yahoo = $origin_ext_page_top_split {"yahoo:local"} ; # always zero
	3492	+ $rest = $origin_ext_page_top_split {"other:local"} ;
	3493	+ $total_total += $total ;
	3494	+ $total_google += $google ;
	3495	+ $total_yahoo += $yahoo ;
	3496	+ $total_rest += $rest ;
	3497	+ $total = &FormatCount ($total) ;
	3498	+ $google = &FormatCount ($google) ;
	3499	+ $yahoo = &FormatCount ($yahoo) ;
	3500	+ $rest = &FormatCount ($rest) ;
	3501	+ $html .= "<tr><td class=l>localhost</td><th class=r>$total</th><td class=r>$google</td><td class=r>$yahoo</td><td class=r>$rest</td></tr>\n" ;
	3502	+
	3503	+ $total = $origin_ext_page_top {"address"} ;
	3504	+ $google = $origin_ext_page_top_split {"google:address"} ;
	3505	+ $yahoo = $origin_ext_page_top_split {"yahoo:address"} ;
	3506	+ $rest = $origin_ext_page_top_split {"other:address"} ;
	3507	+ $total_total += $total ;
	3508	+ $total_google += $google ;
	3509	+ $total_yahoo += $yahoo ;
	3510	+ $total_rest += $rest ;
	3511	+ $total = &FormatCount ($total) ;
	3512	+ $google = &FormatCount ($google) ;
	3513	+ $yahoo = &FormatCount ($yahoo) ;
	3514	+ $rest = &FormatCount ($rest) ;
	3515	+ $html .= "<tr><td class=l>ip address</td><th class=r>$total</th><td class=r>$google</td><td class=r>$yahoo</td><td class=r>$rest</td></tr>\n" ;
	3516	+
	3517	+ $total = $origin_ext_page_top {"rest"} ;
	3518	+ $google = $origin_ext_page_top_split {"google:rest"} ;
	3519	+ $yahoo = $origin_ext_page_top_split {"yahoo:rest"} ;
	3520	+ $rest = $origin_ext_page_top_split {"other:rest"} ;
	3521	+ $total_total += $total ;
	3522	+ $total_google += $google ;
	3523	+ $total_yahoo += $yahoo ;
	3524	+ $total_rest += $rest ;
	3525	+ $total = &FormatCount ($total) ;
	3526	+ $google = &FormatCount ($google) ;
	3527	+ $yahoo = &FormatCount ($yahoo) ;
	3528	+ $rest = &FormatCount ($rest) ;
	3529	+ $html .= "<tr><td class=l>other</td><th class=r>$total</th><td class=r>$google</td><td class=r>$yahoo</td><td class=r>$rest</td></tr>\n" ;
	3530	+
	3531	+ $total = $origin_ext_page_top {"unspecified"} ;
	3532	+ $google = $origin_ext_page_top_split {"google:unspecified"} ;
	3533	+ $yahoo = $origin_ext_page_top_split {"yahoo:unspecified"} ;
	3534	+ $rest = $origin_ext_page_top_split {"other:unspecified"} ;
	3535	+ $total_total += $total ;
	3536	+ $total_google += $google ;
	3537	+ $total_yahoo += $yahoo ;
	3538	+ $total_rest += $rest ;
	3539	+ $total = &FormatCount ($total) ;
	3540	+ $google = &FormatCount ($google) ;
	3541	+ $yahoo = &FormatCount ($yahoo) ;
	3542	+ $rest = &FormatCount ($rest) ;
	3543	+ $html .= "<tr><td class=l>anonymous</td><th class=r>$total</th><td class=r>$google</td><td class=r>$yahoo</td><td class=r>$rest</td></tr>\n" ;
	3544	+
	3545	+ $total_total = &FormatCount ($total_total) ;
	3546	+ $total_google = &FormatCount ($total_google) ;
	3547	+ $total_yahoo = &FormatCount ($total_yahoo) ;
	3548	+ $total_rest = &FormatCount ($total_rest) ;
	3549	+ $html .= "<tr><th class=l>total</th><th class=r>$total_total</th><td class=r>$total_google</td><td class=r>$total_yahoo</td><td class=r>$total_rest</td></tr>\n" ;
	3550	+
	3551	+ $html .= "<tr><td colspan=99 class=l> <br><b>Grand total external</th></tr>\n" ;
	3552	+ $html .= "<tr><th class=l>total</th><th class=r>$grand_total</th><td class=r>$grand_google</td><td class=r>$grand_yahoo</td><td class=r>$grand_rest</td></tr>\n" ;
	3553	+ $html .= "</table>" ;
	3554	+
	3555	+ $html .= "</td></tr>" ;
	3556	+ $html .= "</table>" ;
	3557	+ $html .= "</td></tr>" ;
	3558	+
	3559	+ $html .= "</table>\n" ;
	3560	+}
	3561	+
	3562	+sub WriteReportScripts
	3563	+{
	3564	+ open FILE_HTML_SCRIPTS, '>', "$dir_reports/$file_html_scripts" ;
	3565	+
	3566	+ $html = $header ;
	3567	+ $html =~ s/TITLE/Wikimedia Traffic Analysis Report - Scripts/ ;
	3568	+ $html =~ s/HEADER/Wikimedia Traffic Analysis Report - Scripts/ ;
	3569	+ $html =~ s/ALSO/ See also: <b>LINKS<\/b>/ ;
	3570	+ $html =~ s/LINKS/$link_requests $link_origins \/ $link_methods \/ $dummy_scripts \/ $link_skins \/ $link_crawlers \/ $link_opsys \/ $link_browsers \/ $link_google/ ;
	3571	+ $html =~ s/X1000/⇒ <font color=#008000><b>all counts x 1000<\/b><\/font>.<br>/ ;
	3572	+
	3573	+ $html .= "<table border=1>\n" ;
	3574	+ $html .= "<tr><td colspan=99>" ;
	3575	+
	3576	+
	3577	+ $html .= "<table border=0 width=100%>\n" ;
	3578	+ $html .= "<tr><td width=50% valign=top>" ;
	3579	+ $html .= "<table border=1 width=100%>\n" ;
	3580	+
	3581	+ $html .= "<tr><td class=l><h3>In order of request volume</h3></td><th class=r>Count<br><small>x 1000</small></th></tr>\n" ;
	3582	+ $html .= "<tr><th colspan=99 class=l> <br><b>css</b></th></tr>\n" ;
	3583	+ foreach $script (@scripts_css_sorted_count)
	3584	+ {
	3585	+ $total = $scripts_css {$script} ;
	3586	+
	3587	+ next if $total < 3 ;
	3588	+
	3589	+ $total = &FormatCount ($total) ;
	3590	+ $html .= "<tr><td class=l>$script</td><td class=r>$total</td></tr>\n" ;
	3591	+ }
	3592	+ $html .= "<tr><th colspan=99 class=l> <br><b>js</b></th></tr>\n" ;
	3593	+ foreach $script (@scripts_js_sorted_count)
	3594	+ {
	3595	+ $total = $scripts_js {$script} ;
	3596	+
	3597	+ next if $total < 3 ;
	3598	+
	3599	+ $total = &FormatCount ($total) ;
	3600	+ $html .= "<tr><td class=l>$script</td><td class=r>$total</td></tr>\n" ;
	3601	+ }
	3602	+ $html .= "<tr><th colspan=99 class=l> <br><b>php</b></th></tr>\n" ;
	3603	+ $total_php = 0 ;
	3604	+ foreach $script (@scripts_php_sorted_count)
	3605	+ {
	3606	+ $total = $scripts_php {$script} ;
	3607	+
	3608	+ next if $total < 3 ;
	3609	+
	3610	+ $total_php += $total ;
	3611	+ $total = &FormatCount ($total) ;
	3612	+ $html .= "<tr><td class=l>$script</td><td class=r>$total</td></tr>\n" ;
	3613	+ foreach $key (keys_sorted_by_value_num_desc %actions)
	3614	+ {
	3615	+ ($script2,$action) = split (',', $key) ;
	3616	+ if (($script eq $script2) && ($actions {$key} < $scripts_php {$script}))
	3617	+ { $html .= "<tr><td class=l>   <small>$action</small></td><td class=r><small>" . &FormatCount ($actions {$key}) . "</small></td></tr>\n" ; }
	3618	+ }
	3619	+ }
	3620	+ $total_php = &FormatCount ($total_php) ;
	3621	+ $html .= "<tr><th class=l>total php</th><th class=r>$total_php</th></tr>\n" ;
	3622	+ $html .= "</table>" ;
	3623	+
	3624	+ $html .= "</td><td width=50% valign=top>" ;
	3625	+
	3626	+ $html .= "<table border=1 width=100%>\n" ;
	3627	+
	3628	+ $html .= "<tr><td class=l><h3>In alphabetical order</h3></td><th class=r>Count<br><small>x 1000</small></th></tr>\n" ;
	3629	+ $html .= "<tr><th colspan=99 class=l> <br><b>css</b></th></tr>\n" ;
	3630	+ foreach $script (@scripts_css_sorted_script)
	3631	+ {
	3632	+ $total = $scripts_css {$script} ;
	3633	+
	3634	+ next if $total < 3 ;
	3635	+
	3636	+ $total = &FormatCount ($total) ;
	3637	+ $html .= "<tr><td class=l>$script</td><td class=r>$total</td></tr>\n" ;
	3638	+ }
	3639	+ $html .= "<tr><th colspan=99 class=l> <br><b>js</b></th></tr>\n" ;
	3640	+ foreach $script (@scripts_js_sorted_script)
	3641	+ {
	3642	+ $total = $scripts_js {$script} ;
	3643	+
	3644	+ next if $total < 3 ;
	3645	+
	3646	+ $total = &FormatCount ($total) ;
	3647	+ $html .= "<tr><td class=l>$script</td><td class=r>$total</td></tr>\n" ;
	3648	+ }
	3649	+ $html .= "<tr><th colspan=99 class=l> <br><b>php</b></th></tr>\n" ;
	3650	+ foreach $script (@scripts_php_sorted_script)
	3651	+ {
	3652	+ $total = $scripts_php {$script} ;
	3653	+
	3654	+ next if $total < 3 ;
	3655	+
	3656	+ $total_php += $total ;
	3657	+ $total = &FormatCount ($total) ;
	3658	+ $html .= "<tr><td class=l>$script</td><td class=r>$total</td></tr>\n" ;
	3659	+ foreach $key (sort keys %actions)
	3660	+ {
	3661	+ ($script2,$action) = split (',', $key) ;
	3662	+ if (($script eq $script2) && ($actions {$key} < $scripts_php {$script}))
	3663	+ { $html .= "<tr><td class=l>   <small>$action</small></td><td class=r><small>" . &FormatCount ($actions {$key}) . "</small></td></tr>\n" ; }
	3664	+ }
	3665	+ }
	3666	+ $html .= "<tr><th class=l>total php</th><th class=r>$total_php</th></tr>\n" ;
	3667	+ $html .= "</table>" ;
	3668	+
	3669	+ $html .= "</td></tr>" ;
	3670	+ $html .= "</table>" ;
	3671	+ $html .= "</td></tr>" ;
	3672	+
	3673	+ $html .= "<tr><td colspan=99> </td></tr>\n" ;
	3674	+ $html .= "<tr><th colspan=99 class=l><h3>PHP scripts and generalized arguments, sorted by frequency, top 25</h3></th></tr>\n" ;
	3675	+ $html .= "<tr><th class=l>Script</th><th class=l>Parameters</th><th class=r>Count<br><small>x 1000</small></th></tr>\n" ;
	3676	+ $rows = 0 ;
	3677	+ foreach $parm (@parms_sorted_count)
	3678	+ {
	3679	+ $total = &FormatCount ($parms {$parm}) ;
	3680	+ ($name,$parms) = split (',', $parm) ;
	3681	+ if ($parms eq "")
	3682	+ { $parms = "-" ; }
	3683	+ $html .= "<tr><td class=l>$name</td><td class=l>$parms</td><td class=r>$total</td></tr>\n" ;
	3684	+ $rows++ ;
	3685	+
	3686	+ last if $rows == 25 ;
	3687	+ }
	3688	+# $html .= "</table>\n" ;
	3689	+# $html .= "</td><td>   </td><td>" ;
	3690	+# $html .= "<table border=1>\n" ;
	3691	+ $html .= "<tr><th colspan=99 class=l> </th></tr>\n" ;
	3692	+
	3693	+ $html .= "<tr><th colspan=99 class=l><h3>PHP scripts and generalized arguments, in alphabetical order <small>(≥ 3)</small></h3></small></th></tr>\n" ;
	3694	+
	3695	+ $html .= "<tr><td colspan=2 class=l><b>Script</b><br>Parameters</td><th class=r>Count<br><small>x 1000</small></th></tr>\n" ;
	3696	+ $rows = 0 ;
	3697	+ $nameprev = "" ;
	3698	+ foreach $parm (@parms_sorted_script)
	3699	+ {
	3700	+ ($name,$parms) = split (',', $parm, 2) ;
	3701	+
	3702	+ $total = &FormatCount ($parms {$parm}) ;
	3703	+ if ($name ne $nameprev)
	3704	+ {
	3705	+ $total = &FormatCount ($scripts_php {$name}) ;
	3706	+
	3707	+ next if $total < 3 ;
	3708	+
	3709	+ if ($nameprev ne "")
	3710	+ { $html .= "<tr><th colspan=99 class=l> </th></tr>\n" ; }
	3711	+ if (($name eq "api.php") \|\| ($name eq "index.php"))
	3712	+ { $html .= "<tr><td colspan=2 class=l><b>$name</b> <small>(≥ 3)</small></td><th class=r>$total</th></tr>\n" ; }
	3713	+ else
	3714	+ { $html .= "<tr><td colspan=2 class=l><b>$name</b></td><th class=r>$total</th></tr>\n" ; }
	3715	+ }
	3716	+ $total = $parms {$parm} ;
	3717	+
	3718	+ next if (($name eq "api.php") \|\| ($name eq "index.php")) && ($total <= 2) ;
	3719	+
	3720	+ $total = &FormatCount ($total) ;
	3721	+ if ($parms eq "")
	3722	+ { $parms = "-" ; }
	3723	+ $html .= "<tr><td colspan=2 class=l>$parms</td><td class=r>$total</td></tr>\n" ;
	3724	+ $rows++ ;
	3725	+ $nameprev = $name ;
	3726	+ }
	3727	+ $html .= "</table>\n" ;
	3728	+
	3729	+ $html .= "</td></tr></table>\n" ;
	3730	+ $html .= " <small>$rows rows written</small><p>" ;
	3731	+
	3732	+# $html .= "<p><b>Explanation:</b><br>'osd' = opensearchdescription / 'php.ser' = vnd.php.serialized" ;
	3733	+ $html .= $colophon ;
	3734	+
	3735	+ print FILE_HTML_SCRIPTS $html ;
	3736	+ close FILE_HTML_SCRIPTS ;
	3737	+}
	3738	+
	3739	+sub WriteReportGoogle
	3740	+{
	3741	+ open FILE_HTML_SEARCH, '>', "$dir_reports/$file_html_google" ;
	3742	+
	3743	+ $html = $header ;
	3744	+ $html =~ s/TITLE/Wikimedia Traffic Analysis Report - Google requests/ ;
	3745	+ $html =~ s/HEADER/Wikimedia Traffic Analysis Report - Google requests/ ;
	3746	+ $html =~ s/ALSO/ See also: <b>LINKS<\/b>/ ;
	3747	+ $html =~ s/LINKS/$link_requests $link_origins \/ $link_methods \/ $link_scripts \/ $link_skins \/ $link_crawlers \/ $link_opsys \/ $link_browsers \/ $dummy_google/ ;
	3748	+ $html =~ s/X1000/⇒ <font color=#008000><b>all counts x 1000<\/b><\/font>.<br>/ ;
	3749	+
	3750	+ $html .= "<table border=1 width=500 wrap>\n" ;
	3751	+# $html .= "<tr><td colspan=99 class=l> <br>This report shows <b>all requests to Wikimedia servers where a Google server of service was involved in any way</b>,<br> " .
	3752	+# "be it the <a href='http://en.wikipedia.org/wiki/Googlebot'>GoogleBot</a> crawler or <a href='http://www.google.com/feedfetcher.html'>FeedFetcher</a> collector scripts that run on Google servers,<br> " .
	3753	+# "or a user that follows a link from a Google Web or Google Desktop search results page, or " .
	3754	+# "from Google Maps or Google Earth etcetera. <p>Technically speaking three fields in the <a href='http://wikitech.wikimedia.org/view/Squid_log_format'>squid log records</a> are checked for this: " .
	3755	+# "client ip address, referer header and user agent header.<br>A request can originate from an ip address which has been registered by Google and/or it can carry a referer tag that tells us<br>a user clicked a link " .
	3756	+# "on a Google results page and/or it can carry an agent string that mentions a Google application which<br>can reasonably be assumed to be genuinely Google's. See bottom of page for <a href='#details'>further details</a>." .
	3757	+# "PERC_GOOGLE\n" ;
	3758	+ $html .= "<tr><td colspan=99 class=l wrap> <br>This report shows <b>all requests to Wikimedia servers where a Google server of service was involved in any way</b>, " .
	3759	+ "be it the <a href='http://en.wikipedia.org/wiki/Googlebot'>GoogleBot</a> crawler or <a href='http://www.google.com/feedfetcher.html'>FeedFetcher</a> collector scripts that run on Google servers, " .
	3760	+ "or a user that follows a link from a Google Web or Google Desktop search results page, or " .
	3761	+ "from Google Maps or Google Earth etcetera. <p>Technically speaking three fields in the <a href='http://wikitech.wikimedia.org/view/Squid_log_format'>squid log records</a> are checked for this: " .
	3762	+ "client ip address, referer header and user agent header. A request can originate from an ip address which has been registered by Google and/or it can carry a referer tag that tells us a user clicked a link " .
	3763	+ "on a Google results page and/or it can carry an agent string that mentions a Google application which can reasonably be assumed to be genuinely Google's. See bottom of page for <a href='#details'>further details</a>." .
	3764	+ "PERC_GOOGLE\n" ;
	3765	+
	3766	+ $html .= "<tr><td width=50%>\n" ;
	3767	+
	3768	+ # SORTED BY FREQUENCY
	3769	+ $html .= "<table border=1>\n" ;
	3770	+ $html .= "<tr><th colspan=99 class=l><h3>In order of request volume</h3></th></tr>\n" ;
	3771	+ $html .= "<tr><th colspan=99 class=l>Requests originating from a Google ip address</th></tr>\n" ;
	3772	+# $html .= "<tr><th colspan=99 class=l><small>x 1000</small></th>\n" ;
	3773	+ my $total_total_direct ;
	3774	+ my $total_page_direct ;
	3775	+ my $total_image_direct ;
	3776	+ my $total_rest_direct ;
	3777	+ $html .= "<tr><th class=l>Service</a><th class=r>Total</th><th class=r>Pages</th><th class=r>Images</th><th class=r>Other</th></tr>\n" ;
	3778	+ foreach $key (@searches_service_count)
	3779	+ {
	3780	+ next if $key !~ /Y$/ ; # googleIp
	3781	+
	3782	+ ($key2 = $key) =~ s/,[YN]$// ;
	3783	+ $total = $searches_service_mimecat {"$key2,total,Y"} ;
	3784	+ $page = $searches_service_mimecat {"$key2,page,Y"} ;
	3785	+ $image = $searches_service_mimecat {"$key2,image,Y"} ;
	3786	+ $rest = $searches_service_mimecat {"$key2,other,Y"} ;
	3787	+ $total_total_direct += $total ;
	3788	+ $total_page_direct += $page ;
	3789	+ $total_image_direct += $image ;
	3790	+ $total_rest_direct += $rest ;
	3791	+ $total = &FormatCount ($total) ;
	3792	+ $page = &FormatCount ($page) ;
	3793	+ $image = &FormatCount ($image) ;
	3794	+ $rest = &FormatCount ($rest) ;
	3795	+ $html .= "<tr><td class=l>$key2</a></td><td class=r>$total</td><td class=r>$page</td><td class=r>$image</td><td class=r>$rest</td></tr>\n" ;
	3796	+ }
	3797	+ $total_page_all = $total_page_direct ;
	3798	+
	3799	+# $total_page_requests_external_fmt = &FormatCount ($total_page_requests_external*1000) ;
	3800	+
	3801	+ $perc_google_direct = ".." ;
	3802	+ if ($total_page_requests_external > 0)
	3803	+ { $perc_google_direct = sprintf ("%.1f",100 * $total_page_direct/$total_page_requests_external) ; }
	3804	+ $total_page_direct_fmt = &FormatCount ($total_page_direct*1000) ;
	3805	+ $perc_google_msg_direct = "<p>Including all of its different search crawlers and services hosted on its servers, Google itself requested another $total_page_direct_fmt page pages per day, representing $perc_google_direct% of our external page requests.\n" ;
	3806	+
	3807	+ $total_total_direct = &FormatCount ($total_total_direct) ;
	3808	+ $total_page_direct = &FormatCount ($total_page_direct) ;
	3809	+ $total_image_direct = &FormatCount ($total_image_direct) ;
	3810	+ $total_rest_direct = &FormatCount ($total_rest_direct) ;
	3811	+
	3812	+ $html .= "<tr><th class=l>Total</a></th><th class=r>$total_total_direct</th><th class=r>$total_page_direct</th><th class=r>$total_image_direct</th><th class=r>$total_rest_direct</th></tr>\n" ;
	3813	+
	3814	+ my $total_total_indirect ;
	3815	+ my $total_page_indirect ;
	3816	+ my $total_image_indirect ;
	3817	+ my $total_rest_indirect ;
	3818	+
	3819	+ $html .= "<tr><th colspan=99 class=l> </th></tr>\n" ;
	3820	+ $html .= "<tr><th colspan=99 class=l>Requests originating from elsewhere</th></tr>\n" ;
	3821	+ $html .= "<tr><th class=l>Service</a><th class=r>Total</th><th class=r>Pages</th><th class=r>Images</th><th class=r>Other</th></tr>\n" ;
	3822	+ foreach $key (@searches_service_count)
	3823	+ {
	3824	+ next if $key =~ /Y$/ ; # googleIp
	3825	+
	3826	+ ($key2 = $key) =~ s/,[YN]$// ;
	3827	+ $total = $searches_service_mimecat {"$key2,total,N"} ;
	3828	+ $page = $searches_service_mimecat {"$key2,page,N"} ;
	3829	+ $image = $searches_service_mimecat {"$key2,image,N"} ;
	3830	+ $rest = $searches_service_mimecat {"$key2,other,N"} ;
	3831	+ $total_total_indirect += $total ;
	3832	+ $total_page_indirect += $page ;
	3833	+ $total_image_indirect += $image ;
	3834	+ $total_rest_indirect += $rest ;
	3835	+ $total = &FormatCount ($total) ;
	3836	+ $page = &FormatCount ($page) ;
	3837	+ $image = &FormatCount ($image) ;
	3838	+ $rest = &FormatCount ($rest) ;
	3839	+ $html .= "<tr><td class=l>$key2</a></td><td class=r>$total</td><td class=r>$page</td><td class=r>$image</td><td class=r>$rest</td></tr>\n" ;
	3840	+ }
	3841	+ $total_page_all += $total_page_indirect ;
	3842	+
	3843	+ $perc_google_indirect = ".." ;
	3844	+ if ($total_page_requests_external > 0)
	3845	+ { $perc_google_indirect = sprintf ("%.1f",100 * $total_page_indirect/$total_page_requests_external) ; }
	3846	+ $total_page_indirect_fmt = &FormatCount ($total_page_indirect*1000) ;
	3847	+ $perc_google_msg_indirect = "<p>Google referred to our sites, through its services including search, maps, and Google Earth, $total_page_indirect_fmt page views per day, representing $perc_google_indirect% of our external page requests.\n" ;
	3848	+
	3849	+ $total_total_indirect = &FormatCount ($total_total_indirect) ;
	3850	+ $total_page_indirect = &FormatCount ($total_page_indirect) ;
	3851	+ $total_image_indirect = &FormatCount ($total_image_indirect) ;
	3852	+ $total_rest_indirect = &FormatCount ($total_rest_indirect) ;
	3853	+
	3854	+ $html .= "<tr><th class=l>Total</a></th><th class=r>$total_total_indirect</th><th class=r>$total_page_indirect</th><th class=r>$total_image_indirect</th><th class=r>$total_rest_indirect</th></tr>\n" ;
	3855	+ $html .= "<tr><th class=l colspan=99> </td></tr>\n" ;
	3856	+ $html .= "<tr><th colspan=99 class=l><a href='http://en.wikipedia.org/wiki/List_of_Internet_top-level_domains'>Top level domains</a></th></tr>\n" ;
	3857	+
	3858	+# $total_page_all_fmt = &FormatCount ($total_page_all*1000) ;
	3859	+
	3860	+ $perc_google = ".." ;
	3861	+ if ($total_page_requests_external > 0)
	3862	+ { $perc_google = sprintf ("%.1f",100 * $total_page_all/$total_page_requests_external) ; }
	3863	+
	3864	+ $perc_google_msg_all = "<p>In total Google was somehow involved in $perc_google\% of daily external page<sup>*<\/sup> requests \n" ;
	3865	+ $html =~ s/PERC_GOOGLE/<hr width=90%>$perc_google_msg_all $perc_google_msg_indirect $perc_google_msg_direct<p><small>* = mime type <a href='SquidReportRequests.htm'>text\/html<\/a> only<\/small>/ ;
	3866	+
	3867	+ $total_total = 0 ;
	3868	+ $total_page = 0 ;
	3869	+ $total_image = 0 ;
	3870	+ $total_rest = 0 ;
	3871	+ foreach $key (@searches_toplevel_count)
	3872	+ {
	3873	+ $total = $searches_toplevel_mimecat {"$key,total"} ;
	3874	+ $page = $searches_toplevel_mimecat {"$key,page"} ;
	3875	+ $image = $searches_toplevel_mimecat {"$key,image"} ;
	3876	+ $rest = $searches_toplevel_mimecat {"$key,other"} ;
	3877	+ $total_total += $total ;
	3878	+ $total_page += $page ;
	3879	+ $total_image += $image ;
	3880	+ $total_rest += $rest ;
	3881	+ $total = &FormatCount ($total) ;
	3882	+ $page = &FormatCount ($page) ;
	3883	+ $image = &FormatCount ($image) ;
	3884	+ $rest = &FormatCount ($rest) ;
	3885	+ if ($key !~ /^[\_\.]/)
	3886	+ { $key = ".$key" ; }
	3887	+# else
	3888	+# { $key =~ s/^[\.]// ; }
	3889	+ if ($key =~ /^\_/)
	3890	+ { $key = "<i>" . substr ($key,1) . "</i>" ; }
	3891	+ $html .= "<tr><td class=l>$key</a></td><td class=r>$total</td><td class=r>$page</td><td class=r>$image</td><td class=r>$rest</td></tr>\n" ;
	3892	+ }
	3893	+ $total_no_tld = $searches_mimecat_tld_not_found {"total"} ;
	3894	+ $page_no_tld = $searches_mimecat_tld_not_found {"page"} ;
	3895	+ $image_no_tld = $searches_mimecat_tld_not_found {"image"} ;
	3896	+ $other_no_tld = $searches_mimecat_tld_not_found {"other"} ;
	3897	+
	3898	+ $total_total += $total_no_tld ;
	3899	+ $total_page += $page_no_tld ;
	3900	+ $total_image += $image_no_tld ;
	3901	+ $total_rest += $other_no_tld ;
	3902	+
	3903	+ $total_no_tld = &FormatCount ($total_no_tld) ;
	3904	+ $page_no_tld = &FormatCount ($page_no_tld) ;
	3905	+ $image_no_tld = &FormatCount ($image_no_tld) ;
	3906	+ $other_no_tld = &FormatCount ($other_no_tld) ;
	3907	+ $html .= "<tr><td class=l>undefined</a></td><td class=r>$total_no_tld</td><td class=r>$page_no_tld</td><td class=r>$image_no_tld</td><td class=r>$other_no_tld</td></tr>\n" ;
	3908	+
	3909	+ $total_total = &FormatCount ($total_total) ;
	3910	+ $total_page = &FormatCount ($total_page) ;
	3911	+ $total_image = &FormatCount ($total_image) ;
	3912	+ $total_rest = &FormatCount ($total_rest) ;
	3913	+ $html .= "<tr><th class=l>Total</a></th><th class=r>$total_total</th><th class=r>$total_page</th><th class=r>$total_image</th><th class=r>$total_rest</th></tr>\n" ;
	3914	+
	3915	+ $html .= "</table>\n" ;
	3916	+
	3917	+ $html .= "</td><td width=50%>\n" ;
	3918	+
	3919	+ # SORTED BY ALPHABETICALLY
	3920	+ $html .= "<table border=1>\n" ;
	3921	+ $html .= "<tr><th colspan=99 class=l><h3>In alphabetical order</h3></th></tr>\n" ;
	3922	+ $html .= "<tr><th colspan=99 class=l>Requests originating from a Google ip address</th></tr>\n" ;
	3923	+# $html .= "<tr><th colspan=99 class=l><small>x 1000</small></th>\n" ;
	3924	+ $html .= "<tr><th class=l>Service</a><th class=r>Total</th><th class=r>Pages</th><th class=r>Images</th><th class=r>Other</th></tr>\n" ;
	3925	+ foreach $key (@searches_service_alpha)
	3926	+ {
	3927	+ next if $key !~ /Y$/ ; # googleIp
	3928	+
	3929	+ ($key2 = $key) =~ s/,[YN]$// ;
	3930	+ $total = $searches_service_mimecat {"$key2,total,Y"} ;
	3931	+ $page = $searches_service_mimecat {"$key2,page,Y"} ;
	3932	+ $image = $searches_service_mimecat {"$key2,image,Y"} ;
	3933	+ $rest = $searches_service_mimecat {"$key2,other,Y"} ;
	3934	+ $total = &FormatCount ($total) ;
	3935	+ $page = &FormatCount ($page) ;
	3936	+ $image = &FormatCount ($image) ;
	3937	+ $rest = &FormatCount ($rest) ;
	3938	+ if ($key !~ /(?:undefined\|unspecified\|crawler\|feedfetcher\|wireless transcoder)/)
	3939	+ { $key = ucfirst ($key) ; }
	3940	+ else
	3941	+ { $key = "<i>$key</i>" ; }
	3942	+ $html .= "<tr><td class=l>$key2</a></td><td class=r>$total</td><td class=r>$page</td><td class=r>$image</td><td class=r>$rest</td></tr>\n" ;
	3943	+ }
	3944	+ $html .= "<tr><th class=l>Total</a></th><th class=r>$total_total_direct</th><th class=r>$total_page_direct</th><th class=r>$total_image_direct</th><th class=r>$total_rest_direct</th></tr>\n" ;
	3945	+
	3946	+ $html .= "<tr><th colspan=99 class=l> </th></tr>\n" ;
	3947	+ $html .= "<tr><th colspan=99 class=l>Requests originating from elsewhere</th></tr>\n" ;
	3948	+ $html .= "<tr><th class=l>Service</a><th class=r>Total</th><th class=r>Pages</th><th class=r>Images</th><th class=r>Other</th></tr>\n" ;
	3949	+ foreach $key (@searches_service_alpha)
	3950	+ {
	3951	+ next if $key =~ /Y$/ ; # googleIp
	3952	+
	3953	+ ($key2 = $key) =~ s/,[YN]$// ;
	3954	+ $total = $searches_service_mimecat {"$key2,total,N"} ;
	3955	+ $page = $searches_service_mimecat {"$key2,page,N"} ;
	3956	+ $image = $searches_service_mimecat {"$key2,image,N"} ;
	3957	+ $rest = $searches_service_mimecat {"$key2,other,N"} ;
	3958	+ $total = &FormatCount ($total) ;
	3959	+ $page = &FormatCount ($page) ;
	3960	+ $image = &FormatCount ($image) ;
	3961	+ $rest = &FormatCount ($rest) ;
	3962	+ if ($key !~ /(?:undefined\|unspecified\|crawler\|feedfetcher\|wireless transcoder)/)
	3963	+ { $key = ucfirst ($key) ; }
	3964	+ else
	3965	+ { $key = "<i>$key</i>" ; }
	3966	+ $html .= "<tr><td class=l>$key2</a></td><td class=r>$total</td><td class=r>$page</td><td class=r>$image</td><td class=r>$rest</td></tr>\n" ;
	3967	+ }
	3968	+ $html .= "<tr><th class=l>Total</a></th><th class=r>$total_total_indirect</th><th class=r>$total_page_indirect</th><th class=r>$total_image_indirect</th><th class=r>$total_rest_indirect</th></tr>\n" ;
	3969	+ $html .= "<tr><th class=l colspan=99> </td></tr>\n" ;
	3970	+ $html .= "<tr><th colspan=99 class=l>Top level domains</th></tr>\n" ;
	3971	+
	3972	+ $total_total = 0 ;
	3973	+ $total_page = 0 ;
	3974	+ $total_image = 0 ;
	3975	+ $total_rest = 0 ;
	3976	+ foreach $key (@searches_toplevel_alpha)
	3977	+ {
	3978	+ $total = $searches_toplevel_mimecat {"$key,total"} ;
	3979	+ $page = $searches_toplevel_mimecat {"$key,page"} ;
	3980	+ $image = $searches_toplevel_mimecat {"$key,image"} ;
	3981	+ $rest = $searches_toplevel_mimecat {"$key,other"} ;
	3982	+ $total_total += $total ;
	3983	+ $total_page += $page ;
	3984	+ $total_image += $image ;
	3985	+ $total_rest += $rest ;
	3986	+ $total = &FormatCount ($total) ;
	3987	+ $page = &FormatCount ($page) ;
	3988	+ $image = &FormatCount ($image) ;
	3989	+ $rest = &FormatCount ($rest) ;
	3990	+ if ($key !~ /^[\_\.]/)
	3991	+ { $key = ".$key" ; }
	3992	+ if ($key =~ /^\_/)
	3993	+ { $key = "<i>" . substr ($key,1) . "</i>" ; }
	3994	+ $html .= "<tr><td class=l>$key</a></td><td class=r>$total</td><td class=r>$page</td><td class=r>$image</td><td class=r>$rest</td></tr>\n" ;
	3995	+ }
	3996	+ $total_no_tld = $searches_mimecat_tld_not_found {"total"} ;
	3997	+ $page_no_tld = $searches_mimecat_tld_not_found {"page"} ;
	3998	+ $image_no_tld = $searches_mimecat_tld_not_found {"image"} ;
	3999	+ $other_no_tld = $searches_mimecat_tld_not_found {"other"} ;
	4000	+
	4001	+ $total_total += $total_no_tld ;
	4002	+ $total_page += $page_no_tld ;
	4003	+ $total_image += $image_no_tld ;
	4004	+ $total_rest += $other_no_tld ;
	4005	+
	4006	+ $total_no_tld = &FormatCount ($total_no_tld) ;
	4007	+ $page_no_tld = &FormatCount ($page_no_tld) ;
	4008	+ $image_no_tld = &FormatCount ($image_no_tld) ;
	4009	+ $other_no_tld = &FormatCount ($other_no_tld) ;
	4010	+ $html .= "<tr><td class=l>undefined</a></td><td class=r>$total_no_tld</td><td class=r>$page_no_tld</td><td class=r>$image_no_tld</td><td class=r>$other_no_tld</td></tr>\n" ;
	4011	+
	4012	+ $total_total = &FormatCount ($total_total) ;
	4013	+ $total_page = &FormatCount ($total_page) ;
	4014	+ $total_image = &FormatCount ($total_image) ;
	4015	+ $total_rest = &FormatCount ($total_rest) ;
	4016	+ $html .= "<tr><th class=l>Total</a></th><th class=r>$total_total</th><th class=r>$total_page</th><th class=r>$total_image</th><th class=r>$total_rest</th></tr>\n" ;
	4017	+
	4018	+ $html .= "</table>\n" ;
	4019	+ $html .= "</td></tr>\n" ;
	4020	+
	4021	+
	4022	+ $breakdown = "Here is detailed breakdown per service of indicators that pointed to Google <small>(total ≥ 3)</small><br> <br>" .
	4023	+ "<table width=100%><tr><th class=l>Service</th><th class=c>Total</th><th class=c>Originating from<br>Google ip address</th><th class=c>Referer mentions<br>Google url</th><th class=c>Agent mentions<br>Google service</th></tr>\n" ;
	4024	+ foreach $key (@searches_service_matches_alpha)
	4025	+ {
	4026	+ $count = $searches_service_matches {$key} ;
	4027	+
	4028	+ next if $count <= 2 ;
	4029	+
	4030	+ $count = &FormatCount ($count) ;
	4031	+ ($service,$matches) = split (',', $key) ;
	4032	+ if ($matches =~ /x/) { $x = 'Y' } else { $x = '-' } ;
	4033	+ if ($matches =~ /y/) { $y = 'Y' } else { $y = '-' } ;
	4034	+ if ($matches =~ /z/) { $z = 'Y' } else { $z = '-' } ;
	4035	+ $breakdown .= "<tr><td class=l>$service</td><td class=r>$count</td><td class=c>$x</td><td class=c>$y</td><td class=c>$z</td></tr>" ;
	4036	+ }
	4037	+ $breakdown .= "</table><br.&bsp;<br>\n" ;
	4038	+
	4039	+
	4040	+ $html .= "<tr><td class=l colspan=99><a name='details' id='details'></a> <p>" .
	4041	+ $google_ip_ranges .
	4042	+ "<b>Agents</b>: as for genuine agent strings: too many crawlers indentify themselves as 'GoogleBot' to take this at face value. " .
	4043	+ "They are accepted as genuine Google crawler requests only when the ip address matches a known range (see above). " .
	4044	+ "Other records that mention GoogleBot are counted as GoogleBot? (question mark, as this may include partners, like DoCoMo). " .
	4045	+ "However when the agent string mentions Google Desktop or Google Earth this is always accepted" .
	4046	+ "<p><b>Service</b>: the service name is based on the agent string (plus for GoogleBot check for ip address, see above), if this is inconclusive it is based on the referer string." .
	4047	+ "<p>$breakdown" .
	4048	+ "<p><b>Top Level Domain 'undefined'</b>: requests with top level domain 'undefined' are nearly all requests from anonymous ip addresses (crawler and other services)" .
	4049	+ "<p><b>Note</b>: averages below 1 are always rounded up to 1\n" .
	4050	+ "</small></td></tr>\n";
	4051	+
	4052	+ $html .= "</table>\n" ;
	4053	+
	4054	+ $html .= $colophon ;
	4055	+
	4056	+ print FILE_HTML_SEARCH $html ;
	4057	+ close FILE_HTML_SEARCH ;
	4058	+}
	4059	+
	4060	+sub WriteReportSkins
	4061	+{
	4062	+ open FILE_HTML_SKINS, '>', "$dir_reports/$file_html_skins" ;
	4063	+
	4064	+ $html = $header ;
	4065	+ $html =~ s/TITLE/Wikimedia Traffic Analysis Report - Skins/ ;
	4066	+ $html =~ s/HEADER/Wikimedia Traffic Analysis Report - Skins/ ;
	4067	+ $html =~ s/ALSO/ See also: <b>LINKS<\/b>/ ;
	4068	+ $html =~ s/LINKS/$link_requests $link_origins \/ $link_methods \/ $link_scripts \/ $dummy_skins \/ $link_crawlers \/ $link_opsys \/ $link_browsers \/ $link_google/ ;
	4069	+ $html =~ s/X1000/⇒ <font color=#008000><b>all counts x 1000<\/b><\/font>.<br>/ ;
	4070	+
	4071	+ $html .= "<table border=1>\n" ;
	4072	+
	4073	+ $html .= "<tr><td colspan=99 class=l><b>Skin</b><br>Files (≥ 3)</td></tr>\n" ;
	4074	+ $rows = 0 ;
	4075	+ $nameprev = "" ;
	4076	+ foreach $skin (@skins_sorted_skin)
	4077	+ {
	4078	+ $count = &FormatCount ($skins {$skin}) ;
	4079	+
	4080	+ next if $count < 3 ;
	4081	+
	4082	+ $skin =~ s/^skins\/// ;
	4083	+ ($name,$rest) = split ('\/', $skin, 2) ;
	4084	+
	4085	+ next if $skin_set {$name} < 3 ;
	4086	+
	4087	+ if ($name ne $nameprev)
	4088	+ { $html .= "<tr><th colspan=99 class=l> <br><b>" . ucfirst ($name) . "</b></th></tr>\n" ; }
	4089	+ $nameprev = $name ;
	4090	+ $html .= "<tr><td class=l>$skin</td><td class=r>$count</td></tr>\n" ;
	4091	+ $rows++ ;
	4092	+ }
	4093	+ $html .= "</table>\n" ;
	4094	+
	4095	+ $html .= " <small>$rows rows written</small><p>" ;
	4096	+
	4097	+# $html .= "<p><b>Explanation:</b><br>'osd' = opensearchdescription / 'php.ser' = vnd.php.serialized" ;
	4098	+ $html .= $colophon ;
	4099	+
	4100	+ print FILE_HTML_SKINS $html ;
	4101	+ close FILE_HTML_SKINS ;
	4102	+}
	4103	+
	4104	+ $html .= "</td></tr></table>\n" ;
	4105	+# $html .= " <small>$rows rows written</small><p>" ;
	4106	+
	4107	+# $html .= "<p><b>Explanation:</b><br>'osd' = opensearchdescription / 'php.ser' = vnd.php.serialized" ;
	4108	+ $html .= $colophon ;
	4109	+
	4110	+ print FILE_HTML_ORIGINS $html ;
	4111	+ close FILE_HTML_ORIGINS ;
	4112	+}
	4113	+
	4114	+sub WriteCsvGoogleBots
	4115	+{
	4116	+ open CSV_GOOGLE_BOTS_OUT, '>', "$dir_reports/$file_csv_google_bots" ;
	4117	+ print CSV_GOOGLE_BOTS_OUT "Date Time,Ip Range,Hits\n" ;
	4118	+ foreach $dir_process (@dirs_process)
	4119	+ {
	4120	+ open CSV_GOOGLE_BOTS_IN, '<', "$dir_process/$file_csv_google_bots" ;
	4121	+ while ($line = <CSV_GOOGLE_BOTS_IN>)
	4122	+ {
	4123	+ next if $line =~ /^#/ ; # comments
	4124	+ next if $line =~ /^:/ ; # csv header (not a comment)
	4125	+
	4126	+ chomp $line ;
	4127	+ ($datetime,$range,$hits) = split (',', $line) ;
	4128	+ ($date,$time) = split (' ', $datetime) ;
	4129	+ ($year,$month,$day) = split ('\/', $date) ;
	4130	+ $hour = substr ($time,0,2) ;
	4131	+ $datetime = "\"=DATE($year,$month,$day)+TIME($hour,0,0)\"" ;
	4132	+ print CSV_GOOGLE_BOTS_OUT "$datetime,$hits,$range\n" ;
	4133	+ $googlebots {$datetime} += $hits ;
	4134	+ }
	4135	+ close CSV_GOOGLE_BOTS_IN ;
	4136	+ }
	4137	+ foreach $datetime (sort keys %googlebots)
	4138	+ { print CSV_GOOGLE_BOTS_OUT "$datetime,${googlebots{$datetime}},*\n" ; }
	4139	+ close CSV_GOOGLE_BOTS_OUT ;
	4140	+}
	4141	+
	4142	+sub WriteCsvBrowserLanguages
	4143	+{
	4144	+ open CSV_BROWSER_LANGUAGES, '>', "$dir_reports/$file_csv_browser_languages" ;
	4145	+ print CSV_BROWSER_LANGUAGES "Browser,Languages,Hits\n" ;
	4146	+ foreach $key (keys_sorted_alpha_asc %browser_languages)
	4147	+ { print CSV_BROWSER_LANGUAGES "$key,${browser_languages {$key}}\n" ; }
	4148	+ close CSV_BROWSER_LANGUAGES ;
	4149	+}
	4150	+
	4151	+sub WriteCsvCountriesTimed
	4152	+{
	4153	+ $multiplier_1000 = 1000 * $multiplier ;
	4154	+# open CSV_COUNTRIES_TIMED, '>', "$dir_reports/$file_csv_countries_timed" ;
	4155	+ open CSV_COUNTRIES_TIMED, '>', "/home/ezachte/$file_csv_countries_timed" ;
	4156	+
	4157	+ foreach $target (sort keys %targets)
	4158	+ {
	4159	+ @countries = sort {$countries_totals {"N,$target"}{$b} <=> $countries_totals {"N,$target"}{$a}} keys %{$countries_totals {"N,$target"}} ;
	4160	+
	4161	+ foreach $bot ("N","Y")
	4162	+ {
	4163	+ $line = "\nBot,Wiki,Time," ;
	4164	+ $cnt_countries = 0 ;
	4165	+ foreach $country (@countries)
	4166	+ {
	4167	+ $line .= sprintf ("%.0f", $multiplier_1000 * $countries_totals {"$bot,$target"}{$country}) . "," ;
	4168	+
	4169	+ last if $cnt_countries++ >= 25 ;
	4170	+ }
	4171	+ print CSV_COUNTRIES_TIMED "$line\n" ;
	4172	+
	4173	+ $line = "\nBot,Wiki,Time," ;
	4174	+ $cnt_countries = 0 ;
	4175	+ foreach $country (@countries)
	4176	+ {
	4177	+ $country_name = $country_codes {$country} ;
	4178	+ $line .= "$country_name," ;
	4179	+
	4180	+ last if $cnt_countries++ >= 25 ;
	4181	+ }
	4182	+ print CSV_COUNTRIES_TIMED "$line\n" ;
	4183	+
	4184	+ foreach $time (sort {$a <=> $b} keys %times)
	4185	+ {
	4186	+ $hrs = $time / 60 ;
	4187	+ $min = $time % 60 ;
	4188	+ $time2 = "\"=Time($hrs,$min,0)\"" ;
	4189	+ $line = "$bot,$target,$time2," ;
	4190	+ $cnt_countries = 0 ;
	4191	+ foreach $country (@countries)
	4192	+ {
	4193	+ $line .= sprintf ("%.0f", $multiplier_1000 * $countries_timed {"$bot,$target,$country,$time"}) . "," ;
	4194	+
	4195	+ last if $cnt_countries++ >= 25 ;
	4196	+ }
	4197	+ print CSV_COUNTRIES_TIMED "$line\n" ;
	4198	+ }
	4199	+ }
	4200	+ }
	4201	+ close CSV_COUNTRIES_TIMED ;
	4202	+}
	4203	+
	4204	+# http://www.maxmind.com/app/iso3166 country codes
	4205	+sub WriteCsvCountriesGoTo
	4206	+{
	4207	+# open CSV_COUNTRIES_TIMED, '>', "$dir_reports/$file_csv_countries_timed" ;
	4208	+ open CSV_COUNTRIES_LANGUAGES_VISITED, '>', "/home/ezachte/$file_csv_countries_languages_visited" ;
	4209	+
	4210	+ foreach $country (sort keys %countries)
	4211	+ {
	4212	+ @targets = sort {$targets_totals {"N,$country"}{$b} <=> $targets_totals {"N,$country"}{$a}} keys %{$targets_totals {"N,$country"}} ;
	4213	+
	4214	+ $line = "\nBot,Country," ;
	4215	+ $cnt_targets = 0 ;
	4216	+ foreach $target (@targets)
	4217	+ {
	4218	+ $target2 = $target ;
	4219	+ $target2 =~ s/^.*?:// ;
	4220	+ $target3 = $out_languages {$target2} ;
	4221	+ if ($target3 eq "")
	4222	+ { $target3 = "[$target2]" ; }
	4223	+ $line .= "$target3," ;
	4224	+
	4225	+ last if $cnt_targets++ >= 25 ;
	4226	+ }
	4227	+ print CSV_COUNTRIES_LANGUAGES_VISITED "$line\n" ;
	4228	+
	4229	+ foreach $bot ("N","Y")
	4230	+ {
	4231	+ $country_name = $country_codes {$country} ;
	4232	+ $country_name =~ s/\n//gs ;
	4233	+ $country_name =~ s/[0x00-0x1F]//gs ;
	4234	+
	4235	+ $cnt_targets = 0 ;
	4236	+ $tot_targets = 0 ;
	4237	+ foreach $target (@targets)
	4238	+ {
	4239	+ $tot_targets += $targets_totals {"$bot,$country"}{$target} ;
	4240	+ }
	4241	+
	4242	+ $line = "$bot,$country_name," ;
	4243	+ $cnt_targets = 0 ;
	4244	+ foreach $target (@targets)
	4245	+ {
	4246	+ $line .= $targets_totals {"$bot,$country"}{$target} . "," ;
	4247	+
	4248	+ last if $cnt_targets++ >= 25 ;
	4249	+ }
	4250	+ print CSV_COUNTRIES_LANGUAGES_VISITED "$line\n" ;
	4251	+
	4252	+ $line = "$bot,$country_name," ;
	4253	+ $cnt_targets = 0 ;
	4254	+ if ($tot_targets > 0)
	4255	+ {
	4256	+ foreach $target (@targets)
	4257	+ {
	4258	+ $line .= sprintf ("%.1f\%",100*$targets_totals {"$bot,$country"}{$target} / $tot_targets) . "," ;
	4259	+
	4260	+ last if $cnt_targets++ >= 25 ;
	4261	+ }
	4262	+ print CSV_COUNTRIES_LANGUAGES_VISITED "$line\n" ;
	4263	+ }
	4264	+ }
	4265	+ }
	4266	+ close CSV_COUNTRIES_LANGUAGES_VISITED ;
	4267	+}
	4268	+
	4269	+sub WriteReportPerLanguageBreakDown
	4270	+{
	4271	+ print "\nWriteReportPerLanguageBreakDown\n" ;
	4272	+
	4273	+ my ($title,$views_edits,$links) = @_ ;
	4274	+ my ($link_country,$population,$icon,$bar,$bars,$bar_width,$perc,$perc_tot,$perc_global,$requests_tot) ;
	4275	+ my @index_countries ;
	4276	+ my $views_edits_lc = lc $views_edits ;
	4277	+
	4278	+ $html = $header ;
	4279	+ $html =~ s/TITLE/$title/ ;
	4280	+ $html =~ s/HEADER/$title/ ;
	4281	+ $html =~ s/ALSO/$links/ ;
	4282	+ $html =~ s/LINKS// ;
	4283	+ $html =~ s/NOTES// ;
	4284	+ $html =~ s/X1000/. Period <b>$requests_recently_start - $requests_recently_stop<\/b>/ ;
	4285	+ $html =~ s/DATE// ;
	4286	+
	4287	+ $html .= "<p><table border=1 width=800>INDEX\n" ;
	4288	+
	4289	+ my $languages_reported ;
	4290	+
	4291	+ foreach $language (keys_sorted_by_value_num_desc %requests_recently_per_language)
	4292	+ {
	4293	+ next if $requests_recently_per_language {$language} < 100 ;
	4294	+
	4295	+ ($language_name,$anchor_language) = &GetLanguageInfo ($language) ;
	4296	+
	4297	+ my %requests_per_country = %{$requests_recently_per_language_per_country {$language}} ;
	4298	+ @countries = keys_sorted_by_value_num_desc %requests_per_country ;
	4299	+
	4300	+ my $requests_this_language = $requests_recently_per_language {$language} ;
	4301	+
	4302	+ $perc_global = '..' ;
	4303	+ if ($requests_recently_all > 0)
	4304	+ { $perc_global = &Percentage ($requests_this_language / $requests_recently_all) ; }
	4305	+
	4306	+ $html .= "<tr><th colspan=99 class=lh3><a id='$anchor_language' name='$anchor_language'></a><br>$language_name ($language) <small>($perc_global share of global total)</small></th></tr>\n" ;
	4307	+
	4308	+ if ($languages_reported % 2 == 0)
	4309	+ { $gif = "bluebar_hor2.gif" ; }
	4310	+ else
	4311	+ { $gif = "greenbar_hor2.gif" ; }
	4312	+
	4313	+ $perc_tot = 0;
	4314	+ for ($l = 0 ; $l < 50 ; $l++)
	4315	+ {
	4316	+ my $requests_this_country = $requests_recently_per_language_per_country {$language} {$countries [$l]} ;
	4317	+ my $requests_all_countries = $requests_recently_per_language {$language} ;
	4318	+ $perc = 0 ;
	4319	+ if ($requests_all_countries > 0)
	4320	+ {
	4321	+ $perc = &Percentage ($requests_this_country / $requests_all_countries) ;
	4322	+
	4323	+ last if ($perc < 0.5) \|\| (($perc_global < 0.1) && ($perc < 1) \|\| (($perc_global < 0.01) && ($perc < 3)) \|\| (($perc_global < 0.001) && ($perc < 5))) ;
	4324	+
	4325	+ $perc_tot += $perc ;
	4326	+ }
	4327	+
	4328	+ $country = $countries [$l] ;
	4329	+ $country =~ s/ .*$// if length ($country) > 20 ;
	4330	+ $bar_width = int ($perc * 6) ;
	4331	+
	4332	+ $bar_100 = "" ;
	4333	+ if ($bars++ == 0)
	4334	+ {
	4335	+ $bar_width_100 = 600 - $bar_width ;
	4336	+ $bar_100 = "<img src='background.gif' width=$bar_width_100 height=15>" ;
	4337	+ }
	4338	+ if (($country =~ /Australia/) && ($language_name =~ /Japanese/) && ($perc > 5))
	4339	+ { $perc .= " <b><a href='#anomaly' onclick='alert(\"Probably incorrectly assigned to this country.\\nOutdated Regional Internet Registry (RIR) administration may have caused this.\")';><font color='#FF0000'>(*)</font></a></b>" ; $anomaly_found = $true ;}
	4340	+ $html .= "<tr><th class=l class=small nowrap>$country</th>" .
	4341	+ "<td class=c>[$requests_this_country ]$perc</td>" .
	4342	+ "<td class=l><img src='$gif' width=$bar_width height=15>$bar_100</td></tr>\n" ;
	4343	+ }
	4344	+
	4345	+ if ($perc_tot > 100) { $perc_tot = 100 ; }
	4346	+
	4347	+ $perc_other = sprintf '%.1f', 100 - $perc_tot ;
	4348	+ if ($perc_other > 0)
	4349	+ {
	4350	+ $bar_width = $perc_other * 6 ;
	4351	+ $html .= "<tr><th class=l class=small nowrap>Other</th>" .
	4352	+ "<td class=c>$perc_other%</td>" .
	4353	+ "<td class=l><img src='$gif' width=$bar_width height=15></td></tr>\n" ;
	4354	+ }
	4355	+
	4356	+ push @index_languages, "<a href='#$anchor_language'>$language_name</a> " ;
	4357	+
	4358	+ # print "\n" ;
	4359	+ # $html .= "<tr><td colspan=99> </td></tr>\n" ;
	4360	+ }
	4361	+ $html .= "</table>" ;
	4362	+ $html .= "<p><b>Share<\/b> is the percentage of requesting ip addresses (out of the global total) which originated from this country" .
	4363	+ "<br> Further percentages show per country share of requests per Wikipedia visited" ;
	4364	+ $html .= "<p>Countries are only included if the number of requests in the period exceeds 100,000 (100 matching records in 1:1000 sampled log)" ;
	4365	+ $html .= "<br>Page requests by bots are not included. Also all ip addresses that occur more than once on a given day are discarded for that day." ;
	4366	+ $html .= "<br> A few false negatives are taken for granted. " ;
	4367	+ $html .= $colophon ;
	4368	+
	4369	+ $index = &HtmlIndex (join '/ ', sort (@index_languages)) ;
	4370	+ $html =~ s/INDEX/$index/ ;
	4371	+
	4372	+ &PrintHtml ($html, "$path_out/$file_html_per_language_breakdown") ;
	4373	+}
	4374	+
	4375	+sub WriteReportPerCountryOverview
	4376	+{
	4377	+ print "\nWriteReportPerCountryOverview\n" ;
	4378	+
	4379	+ my ($title,$views_edits,$links) = @_ ;
	4380	+ my ($link_country,$population,$icon,$bar,$bars,$bar_width,$perc,$perc_tot,$perc_global,$requests_tot) ;
	4381	+ my (@index_countries,@csv_countries) ;
	4382	+ my $views_edits_lc = lc $views_edits ;
	4383	+ my $views_edits_lcf = ucfirst $views_edits_lc ;
	4384	+ ($views_edits2 = $views_edits) =~ s/ /\<br\>/ ;
	4385	+ if ($views_edits =~ /edit/i)
	4386	+ { $MPVE = 'MPE' ; } # monthly page edits
	4387	+ else
	4388	+ { $MPVE = 'MPV' ; } # monthly page views
	4389	+
	4390	+ $html = $header ;
	4391	+ $html =~ s/TITLE/$title/ ;
	4392	+ $html =~ s/HEADER/$title/ ;
	4393	+ $html =~ s/LINKS// ;
	4394	+ $html =~ s/ALSO/$links/ ;
	4395	+ $html =~ s/NOTES// ;
	4396	+ $html =~ s/X1000/. Period <b>$requests_recently_start - $requests_recently_stop<\/b>/ ;
	4397	+ $html =~ s/DATE// ;
	4398	+
	4399	+ $html .= &HtmlSortTable ;
	4400	+
	4401	+ $html .= "<p><table border=1 width=800 class=tablesorter id=table1>\n" ;
	4402	+ $html .= "<thead>\n" ;
	4403	+ $html .= "INDEX\n" ;
	4404	+
	4405	+ $html .= &HtmlWorldMaps ;
	4406	+
	4407	+ $html .= "<tr><td class=rh5 colspan=3 rowspan=1><b>Country</b></td><td class=c rowspan=2><b>Monthly<br>$views_edits2</b></td>" .
	4408	+ "<td class=r rowspan=2><b>Population</b></td>" . # <td class=c rowspan=2><b>$MPVE's<br>Per<br>Person</b></td>" .
	4409	+ "<td class=c colspan=2><b>Internet<br>Users</b></td><td class=c><b>${MPVE}'s<br>Per<br>I U</b></td>" .
	4410	+ "<td colspan=99 class=l rowspan=2><b>Share in Global Monthly $views_edits</b><br><small><font color=#808080>red and blue bars have different scale</font></small></td></tr>\n" ;
	4411	+ $html .= "<tr><td class=c><b>Name</b></td><td class=c><b>Region</b><br><img src='http://stats.wikimedia.org/Location_of_Continents2.gif'></td><td class=c><b>N/S</b></td><td class=c><b>Total</b></td><td class=c><b>/Pop.</b></td></tr>\n" ;
	4412	+ $html .= "<tr><th> </th><th> </th><th> </th><th> </th><th> </th><th> </th><th> </th><th> </th><th> </th><th colspan=2> </th></tr>\n" ;
	4413	+ $html .= "</thead><tbody>\nTOTAL\nREGIONS\n" ;
	4414	+
	4415	+ push @csv_countries, "# Wikimedia Traffic Analysis Report - Wikipedia $views_edits Per Country - Overview\n" .
	4416	+ "# Report based on data from $requests_recently_start - $requests_recently_stop\n" .
	4417	+ "country name, country code, monthly $views_edits_lc,population,internet users,internet penetration,monthly $views_edits_lc per internet user,share of global $views_edits_lc\n" ;
	4418	+
	4419	+ $requests_tot = 0 ;
	4420	+
	4421	+ undef %requests_per_region ;
	4422	+
	4423	+ foreach $country_code (keys_sorted_by_value_num_desc %requests_recently_per_country_code)
	4424	+ {
	4425	+ my ($country,$code) = split ('\\|', $country_code) ;
	4426	+
	4427	+ my $region_code = $region_codes {$code} ;
	4428	+ my $north_south_code = $north_south_codes {$code} ;
	4429	+
	4430	+ $region_name = $region_code ;
	4431	+ $region_name =~ s/^AF$/<font color=#028702><b>Africa<\/b><\/font>/ ;
	4432	+ $region_name =~ s/^CA$/<font color=#249CA0><b>Central-America<\/b><\/font>/ ;
	4433	+ $region_name =~ s/^SA$/<font color=#FCAA03><b>South-America<\/b><\/font>/ ;
	4434	+ $region_name =~ s/^NA$/<font color=#C802CA><b>North-America<\/b><\/font>/ ;
	4435	+ $region_name =~ s/^AU$/<font color=#02AAD4><b>Australia<\/b><\/font>/ ;
	4436	+ $region_name =~ s/^EU$/<font color=#0100CA><b>Europe<\/b><\/font>/ ;
	4437	+ $region_name =~ s/^AS$/<font color=#E10202><b>Asia<\/b><\/font>/ ;
	4438	+ $region_name =~ s/^OC$/<font color=#02AAD4><b>Oceania<\/b><\/font>/ ;
	4439	+
	4440	+ $north_south_name = $north_south_code ;
	4441	+ $north_south_name =~ s/^N$/<font color=#000BF7><b>N<\/b><\/font>/ ;
	4442	+ $north_south_name =~ s/^S$/<font color=#FE0B0D><b>S<\/b><\/font>/ ;
	4443	+
	4444	+print "\n" ; # qqq
	4445	+ ($link_country,$icon,$population,$connected) = &CountryMetaInfo ($country) ;
	4446	+
	4447	+ my $requests_this_country = $requests_recently_per_country {$country} ;
	4448	+ my $requests_this_country2 = int ($requests_this_country * 1000 / $months_recently) ;
	4449	+ $requests_tot += $requests_this_country2 ;
	4450	+
	4451	+ $requests_per_region {$region_code} += $requests_this_country ;
	4452	+ $requests_per_region {$north_south_code} += $requests_this_country ;
	4453	+ $requests_per_region2 {$region_code} += $requests_this_country2 ;
	4454	+ $requests_per_region2 {$north_south_code} += $requests_this_country2 ;
	4455	+
	4456	+ $requests_per_person = ".." ;
	4457	+ if ($population > 0)
	4458	+ { $requests_per_person = sprintf ("%.0f", $requests_this_country2 / $population) ; }
	4459	+
	4460	+ $requests_per_connected_person = ".." ;
	4461	+ if ($connected > 0)
	4462	+ {
	4463	+ if ($views_edits =~ /edit/i)
	4464	+ { $requests_per_connected_person = sprintf ("%.4f", $requests_this_country2 / $connected) ; }
	4465	+ else
	4466	+ {
	4467	+ if ($requests_this_country2 / $connected >= 1.95)
	4468	+ { $requests_per_connected_person = sprintf ("%.0f", $requests_this_country2 / $connected) ; }
	4469	+ else
	4470	+ { $requests_per_connected_person = sprintf ("%.1f", $requests_this_country2 / $connected) ; }
	4471	+ }
	4472	+ }
	4473	+
	4474	+ $perc_share_total = '..' ;
	4475	+ if ($requests_recently_all > 0)
	4476	+ { $perc_share_total = &Percentage ($requests_this_country / $requests_recently_all) ; }
	4477	+ $perc_tot += $perc_share_total ;
	4478	+
	4479	+ $bar = " " ;
	4480	+ if ($perc_share_total > 0)
	4481	+ { $bar = "<img src='redbar_hor.gif' width=" . (int ($perc_share_total * 10)) . " height=15>" ; }
	4482	+
	4483	+ $perc_connected = ".." ;
	4484	+ if ($population > 0)
	4485	+ { $perc_connected = sprintf ("%.0f", 100 * $connected / $population) .'%' ; }
	4486	+
	4487	+ # now use country names that are suitable for http://gunn.co.nz/map/
	4488	+ $country2 = $country ;
	4489	+ $country2 =~ s/Moldova, Republic of/Moldova/ ;
	4490	+ $country2 =~ s/Korea, Republic of/South Korea/ ;
	4491	+ $country2 =~ s/Korea, Democratic People's Republic of/North Korea/ ;
	4492	+ $country2 =~ s/Iran, Islamic Republic of/Iran/ ;
	4493	+ $country2 =~ s/UAE/United Arab Emirates/ ;
	4494	+ $country2 =~ s/Congo - The Democratic Republic of the/Democratic Republic of the Congo/ ;
	4495	+ $country2 =~ s/^Congo$/Republic of the Congo/ ;
	4496	+ $country2 =~ s/Syrian Arab Republic/Syria/ ;
	4497	+ $country2 =~ s/Tanzania, United Republic of/Tanzania/ ;
	4498	+ $country2 =~ s/Libyan Arab Jamahiriya/Libya/ ;
	4499	+ $country2 =~ s/C..?te d'Ivoire/C\xC3\xB4te d'Ivoire/ ;
	4500	+ $country2 =~ s/Serbia/republic of serbia/ ;
	4501	+ $country2 =~ s/Lao People's Democratic Republic/Laos/ ;
	4502	+
	4503	+
	4504	+ push @csv_countries, "$country2,$code,$requests_this_country2,$population,$connected,$perc_connected,$requests_per_connected_person,$perc\n" ;
	4505	+
	4506	+ $population2 = &i2KM2 ($population) ;
	4507	+ $connected2 = &i2KM2 ($connected) ;
	4508	+ $requests_this_country2 = &i2KM2 ($requests_this_country2) ;
	4509	+ $html .= "<tr><th class=rh3><a id='$country' name='$country'></a>$link_country $icon</td>" .
	4510	+ "<td>$region_name</td>" .
	4511	+ "<td>$north_south_name</td>" .
	4512	+ "<td>$requests_this_country2</td>" .
	4513	+ "<td>$population2</td>" . # <td>$requests_per_person</td>" .
	4514	+ "<td>$connected2</td>" .
	4515	+ "<td>$perc_connected</td>" .
	4516	+ "<td>$requests_per_connected_person</td>" .
	4517	+ "<td>$perc_share_total</td>" .
	4518	+ "<td class=l>$bar</td></tr>\n" ;
	4519	+
	4520	+ if ($verbose)
	4521	+ { push @index_countries, "<a href=#$country>$country ($perc)</a>\n " ; }
	4522	+ else
	4523	+ { push @index_countries, "<a href=#$country>$country</a>\n " ; }
	4524	+ }
	4525	+
	4526	+
	4527	+ $requests_per_person_tot = '..' ;
	4528	+
	4529	+ if ($population_tot > 0)
	4530	+ { $requests_per_person_tot = sprintf ("%.0f", $requests_tot / $population_tot) ; }
	4531	+
	4532	+ if ($connected_tot > 0)
	4533	+ {
	4534	+ if ($views_edits =~ /edit/i)
	4535	+ { $requests_per_connected_person_tot = sprintf ("%.4f", $requests_tot / $connected_tot) ; }
	4536	+ else
	4537	+ { $requests_per_connected_person_tot = sprintf ("%.0f", $requests_tot / $connected_tot) ; }
	4538	+ }
	4539	+
	4540	+ $perc_connected_tot = ".." ;
	4541	+ if ($population_tot > 0)
	4542	+ { $perc_connected_tot = sprintf ("%.0f", 100 * $connected_tot / $population_tot) .'%' ; }
	4543	+
	4544	+ push @csv_countries, "world,*,$requests_tot,$population_tot,$connected_tot,$perc_connected_tot,$requests_per_connected_person_tot,100%\n" ;
	4545	+
	4546	+ $requests_tot2 = &i2KM2 ($requests_tot) ;
	4547	+ $population_tot2 = &i2KM2 ($population_tot) ;
	4548	+ $connected_tot2 = &i2KM2 ($connected_tot) ;
	4549	+
	4550	+ $html_total = "<tr><th class=rh3>All countries in</td>" .
	4551	+ "<td><b>World</b></td>" .
	4552	+ "<td> </td>" .
	4553	+ "<td>$requests_tot2</td>" .
	4554	+ "<td>$population_tot2</td>" .
	4555	+ "<td>$connected_tot2</td>" .
	4556	+ "<td>$perc_connected_tot</td>" .
	4557	+ "<td>$requests_per_connected_person_tot</td>" .
	4558	+ "<td>100%</th>" .
	4559	+ "<td class=l> </td></tr>\n" ;
	4560	+ $html_total .= "<tr><td colspan=99> </td></tr>" ;
	4561	+
	4562	+
	4563	+ undef @keys_regions ;
	4564	+# foreach $key (sort keys %population_per_hemisphere)
	4565	+# { push @keys_regions, $key ; }
	4566	+ $html_regions = '' ;
	4567	+ foreach $key (qw (N S AF AS AU EU CA NA SA OC))
	4568	+ {
	4569	+ $region = $key ;
	4570	+
	4571	+ $region =~ s/^N$/<font color=#000BF7><b>Global North<\/b><\/font>/ ;
	4572	+ $region =~ s/^S$/<font color=#FE0B0D><b>Global South<\/b><\/font>/ ;
	4573	+
	4574	+ $region =~ s/^AF$/<font color=#028702><b>Africa<\/b><\/font>/ ;
	4575	+ $region =~ s/^CA$/<font color=#249CA0><b>Central-America<\/b><\/font>/ ;
	4576	+ $region =~ s/^SA$/<font color=#FCAA03><b>South-America<\/b><\/font>/ ;
	4577	+ $region =~ s/^NA$/<font color=#C802CA><b>North-America<\/b><\/font>/ ;
	4578	+ $region =~ s/^AU$/<font color=#02AAD4><b>Australia<\/b><\/font>/ ;
	4579	+ $region =~ s/^EU$/<font color=#0100CA><b>Europe<\/b><\/font>/ ;
	4580	+ $region =~ s/^AS$/<font color=#E10202><b>Asia<\/b><\/font>/ ;
	4581	+ $region =~ s/^OC$/<font color=#02AAD4><b>Oceania<\/b><\/font>/ ;
	4582	+
	4583	+ $population_region = $population_per_region {$key} ;
	4584	+ $connected_region = $connected_per_region {$key} ;
	4585	+ $requests_region = $requests_per_region {$key} ;
	4586	+ $requests_region2 = $requests_per_region2 {$key} ;
	4587	+
	4588	+ $perc_connected_region = ".." ;
	4589	+ if ($population_region > 0)
	4590	+ { $perc_connected_region = sprintf ("%.0f", 100 * $connected_region / $population_region) .'%' ; }
	4591	+
	4592	+ $perc_share_total = '..' ;
	4593	+ if ($requests_recently_all > 0)
	4594	+ { $perc_share_total = &Percentage ($requests_region / $requests_recently_all) ; }
	4595	+
	4596	+ $perc_connected_region = ".." ;
	4597	+ if ($population_region > 0)
	4598	+ { $perc_connected_region = sprintf ("%.0f", 100 * $connected_region / $population_region) .'%' ; }
	4599	+
	4600	+ # $requests_region2 = int ($requests_region * 1000 / $months_recently) ;
	4601	+
	4602	+ $requests_per_connected_person = '..' ;
	4603	+ if ($connected_region > 0)
	4604	+ {
	4605	+ if ($views_edits =~ /edit/i)
	4606	+ { $requests_per_connected_person = sprintf ("%.4f", $requests_region2 / $connected_region) ; }
	4607	+ else
	4608	+ { $requests_per_connected_person = sprintf ("%.0f", $requests_region2 / $connected_region) ; }
	4609	+ }
	4610	+
	4611	+ $population_region = &i2KM2 ($population_region) ;
	4612	+ $connected_region = &i2KM2 ($connected_region) ;
	4613	+ $requests_region = &i2KM2 ($requests_region) ;
	4614	+ $requests_region2 = &i2KM2 ($requests_region2) ;
	4615	+
	4616	+ $bar = " " ;
	4617	+ if ($perc_share_total > 0)
	4618	+ { $bar = "<img src='bluebar_hor.gif' width=" . (int ($perc_share_total * 3)) . " height=15>" ; }
	4619	+
	4620	+ # $html_regions .= &WriteReportPerCountryOverviewLine ("All countries in", $region, '', $requests, $population) ;
	4621	+ $html_regions .= "<tr><th>All countries in</th>" .
	4622	+ "</td><td>$region</td>" .
	4623	+ "<td> </td>" .
	4624	+ "<td>$requests_region2</td>" .
	4625	+ "<td>$population_region</td>" .
	4626	+ "<td>$connected_region</td>" .
	4627	+ "<td>$perc_connected_region</td>" .
	4628	+ "<td>$requests_per_connected_person</td>" .
	4629	+ "<td>$perc_share_total</th>" .
	4630	+ "<td class=l>$bar</td></tr>\n" ;
	4631	+
	4632	+ if (($key eq 'S') \|\| (($key eq 'OC')))
	4633	+ { $html_regions .= "<tr><td colspan=99> </td></tr>" ; }
	4634	+ }
	4635	+
	4636	+
	4637	+ $html .= "</tbody>\n</table>" ;
	4638	+ $html .= "<p>Countries are only included if the number of $views_edits_lc in the period exceeds 100,000 (100 matching records in 1:1000 sampled log)" ;
	4639	+ $html .= "<br>$views_edits_lcf by bots are not included. Also all ip addresses that occur more than once on a given day are discarded for that day." ;
	4640	+ $html .= "<br> A few false negatives are taken for granted. " ;
	4641	+ $html .= "Country meta data collected from English Wikipedia (<a href='http://en.wikipedia.org/wiki/List_of_countries_by_population'>population</a>, <a href='http://en.wikipedia.org/wiki/List_of_countries_by_number_of_Internet_users'>internet users</a>)). " ;
	4642	+# $html .= "<br>Monthly $views_edits_lc per person is calculated over total population, regardless of age and internet connectivity" ; # how come, misplaced here ?!
	4643	+
	4644	+ $html .= &HtmlSortTableColumns; ;
	4645	+ $html .= $colophon ;
	4646	+
	4647	+ $index = &HtmlIndex (join '/ ', sort (@index_countries)) ;
	4648	+ $html =~ s/INDEX/$index/ ;
	4649	+ $html =~ s/TOTAL/$html_total/ ;
	4650	+ $html =~ s/REGIONS/$html_regions/ ;
	4651	+
	4652	+ &PrintHtml ($html, "$path_out/$file_html_per_country_overview") ;
	4653	+}
	4654	+
	4655	+#sub WriteReportPerCountryOverviewLine
	4656	+#{
	4657	+# my ($name,$region,$hemisphere,$population,$connected,$requests) = @_ ;
	4658	+# my ($perc_requests, $perc_connected, $requests_per_connected_person) ;
	4659	+# my $html ;
	4660	+# $html = "<tr><th>$name</th></td><td>$region</td><td>$hemisphere</td><td>$requests</td>" .
	4661	+# "<td>$population</td>" . # <td>$requests_per_person_tot</td>" .
	4662	+# "<td>$connected</td><td>$perc_connected</td><td>$requests_per_connected_person</td>" .
	4663	+# "<td>$perc_requests</th><td class=l> </td></tr>\n" ;
	4664	+# return ($html) ;
	4665	+#}
	4666	+
	4667	+sub WriteCsvSvgFilePerCountryOverview
	4668	+{
	4669	+ my ($views_edits, $period, $ref_requests_per_period_per_country_code, $max_requests_per_connected_us, $desc_animation) = @_ ;
	4670	+
	4671	+ my %requests_per_country_code = %{$ref_requests_per_period_per_country_code -> {$period}} ;
	4672	+ my %requests_per_country_code_prev = %{$ref_requests_per_period_per_country_code -> {$period_prev}} ;
	4673	+ $period_prev = $period ;
	4674	+
	4675	+ my $description = $descriptions_per_period {$period} ;
	4676	+ my $postfix = $descriptions_per_period {$period} ;
	4677	+# $test = join '', sort values %requests_per_country_code ;
	4678	+# print $test . "\n\n" ;
	4679	+ print "\nWriteCsvSvgFilePerCountryOverview\n" ;
	4680	+
	4681	+ my ($link_country,$country,$code,$population,$connected,$icon,$bar,$bars,$bar_width,$perc,$perc_tot,$perc_global,$requests_tot,$requests_max,$requests_this_country,$requests_this_country2) ;
	4682	+ my (@index_countries,@csv_countries,%svg_groups,%percentage_of_total_pageviews,%requests_per_connected_persons) ;
	4683	+
	4684	+ undef @csv_countries ;
	4685	+ $header_csv_countries = "# Wikimedia Traffic Analysis Report - Wikipedia $views_edits Per Country - Overview\n" .
	4686	+ "# Report based on data from $description\n" .
	4687	+ "country,code,views,population,internet users,%connected,views per user,%global views\n" ;
	4688	+
	4689	+ $requests_tot = 0 ;
	4690	+ undef %fills ;
	4691	+
	4692	+# # normalize to 100% average
	4693	+# $requests_cnt = 0 ;
	4694	+# $requests_tot = 0 ;
	4695	+# foreach $country_code (keys %requests_per_country_code)
	4696	+# {
	4697	+# $requests_cnt ++ ;
	4698	+# $requests_tot += $requests_per_country_code {$country_code} ;
	4699	+# }
	4700	+
	4701	+# die "\$requests_cnt == 0" if $requests_cnt == 0 ;
	4702	+# $requests_avg = $requests_tot / $requests_cnt ;
	4703	+# print "requests cnt: $requests_cnt, tot: $requests_tot, avg: $requests_avg\n" ;
	4704	+
	4705	+# die "\$requests_avg == 0" if $requests_avg == 0 ;
	4706	+# foreach $country_code (keys %requests_per_country_code)
	4707	+# { $requests_per_country_code {$country_code} *= 100/$requests_avg ; }
	4708	+# # normalize complete
	4709	+
	4710	+# print "$code, $country: $requests_this_country\n" ;
	4711	+ $requests_this_country = $requests_per_country_code {$country_code} ;
	4712	+
	4713	+ foreach $country_code (keys_sorted_by_value_num_desc %requests_per_country_code)
	4714	+ {
	4715	+ ($country,$code) = split ('\\|', $country_code) ;
	4716	+ ($link_country,$icon,$population,$connected) = &CountryMetaInfo ($country) ;
	4717	+
	4718	+ $requests_this_country = ($requests_per_country_code {$country_code} +
	4719	+ 4*$requests_per_country_code_prev {$country_code}) / 5 ;
	4720	+ ($requests_svg,$ratio_svg,$fill_svg) = RatioAndFillColor2 ($code, $requests_this_country, 200, $ratio_linear) ;
	4721	+ }
	4722	+ &WriteWorldMapSvg ("$period-1", $description) ;
	4723	+
	4724	+ foreach $country_code (keys_sorted_by_value_num_desc %requests_per_country_code)
	4725	+ {
	4726	+ ($country,$code) = split ('\\|', $country_code) ;
	4727	+ ($link_country,$icon,$population,$connected) = &CountryMetaInfo ($country) ;
	4728	+
	4729	+ $requests_this_country = (2*$requests_per_country_code {$country_code} +
	4730	+ 3*$requests_per_country_code_prev {$country_code}) / 5 ;
	4731	+ ($requests_svg,$ratio_svg,$fill_svg) = RatioAndFillColor2 ($code, $requests_this_country, 200, $ratio_linear) ;
	4732	+ }
	4733	+ &WriteWorldMapSvg ("$period-2", $description) ;
	4734	+
	4735	+ foreach $country_code (keys_sorted_by_value_num_desc %requests_per_country_code)
	4736	+ {
	4737	+ ($country,$code) = split ('\\|', $country_code) ;
	4738	+ ($link_country,$icon,$population,$connected) = &CountryMetaInfo ($country) ;
	4739	+
	4740	+ $requests_this_country = (3*$requests_per_country_code {$country_code} +
	4741	+ 2*$requests_per_country_code_prev {$country_code}) / 5 ;
	4742	+ ($requests_svg,$ratio_svg,$fill_svg) = RatioAndFillColor2 ($code, $requests_this_country, 200, $ratio_linear) ;
	4743	+ }
	4744	+ &WriteWorldMapSvg ("$period-3", $description) ;
	4745	+
	4746	+ foreach $country_code (keys_sorted_by_value_num_desc %requests_per_country_code)
	4747	+ {
	4748	+ ($country,$code) = split ('\\|', $country_code) ;
	4749	+ ($link_country,$icon,$population,$connected) = &CountryMetaInfo ($country) ;
	4750	+
	4751	+ $requests_this_country = (4*$requests_per_country_code {$country_code} +
	4752	+ $requests_per_country_code_prev {$country_code}) / 5 ;
	4753	+ ($requests_svg,$ratio_svg,$fill_svg) = RatioAndFillColor2 ($code, $requests_this_country, 200, $ratio_linear) ;
	4754	+ }
	4755	+ &WriteWorldMapSvg ("$period-4", $description) ;
	4756	+
	4757	+
	4758	+# print "$code, $country: $requests_this_country\n" ;
	4759	+
	4760	+
	4761	+ foreach $country_code (keys_sorted_by_value_num_desc %requests_per_country_code)
	4762	+ {
	4763	+ ($country,$code) = split ('\\|', $country_code) ;
	4764	+ ($link_country,$icon,$population,$connected) = &CountryMetaInfo ($country) ;
	4765	+
	4766	+# print "$code, $country: $requests_this_country\n" ;
	4767	+ $requests_this_country = $requests_per_country_code {$country_code} ;
	4768	+ ($requests_svg,$ratio_svg,$fill_svg) = RatioAndFillColor2 ($code, $requests_this_country, 200, $ratio_linear) ;
	4769	+
	4770	+next ;
	4771	+ $requests_this_country = &CorrectForMissingDays ($period, $requests_per_country_code {$country_code} * 1000, $code, "\$requests_this_country") ;
	4772	+
	4773	+ $requests_tot += $requests_this_country ;
	4774	+
	4775	+ $requests_per_person = ".." ;
	4776	+ if ($population > 0)
	4777	+ { $requests_per_person = sprintf ("%.1f", $requests_this_country / $population) ; }
	4778	+
	4779	+ $requests_per_connected_person = ".." ;
	4780	+ if ($connected > 0)
	4781	+ {
	4782	+ # if ($requests_this_country / $connected >= 1.95)
	4783	+ # { $requests_per_connected_person = sprintf ("%.0f", $requests_this_country / $connected) ; }
	4784	+ # else
	4785	+ # { $requests_per_connected_person = sprintf ("%.1f", $requests_this_country / $connected) ; }
	4786	+ $requests_per_connected_person = sprintf ("%.1f", $requests_this_country / $connected) ;
	4787	+ }
	4788	+
	4789	+ $perc = '..' ;
	4790	+ $requests_all = &CorrectForMissingDays ($period, $requests_all_per_period {$period} * 1000, $code, "\$requests_all") ;
	4791	+ if ($requests_all > 0)
	4792	+ { $perc = &Percentage ($requests_this_country / $requests_all) ; }
	4793	+ $perc_tot += $perc ;
	4794	+
	4795	+ $perc_connected = ".." ;
	4796	+ if ($population > 0)
	4797	+ { $perc_connected = sprintf ("%.1f", 100 * $connected / $population) .'%' ; }
	4798	+
	4799	+ # now use country names that are suitable for http://gunn.co.nz/map/
	4800	+ $country =~ s/Moldova, Republic of/Moldova/ ;
	4801	+ $country =~ s/Korea, Republic of/South Korea/ ;
	4802	+ $country =~ s/Korea, Democratic People's Republic of/North Korea/ ;
	4803	+ $country =~ s/Iran, Islamic Republic of/Iran/ ;
	4804	+ $country =~ s/UAE/United Arab Emirates/ ;
	4805	+ $country =~ s/Congo - The Democratic Republic of the/Democratic Republic of the Congo/ ;
	4806	+ $country =~ s/^Congo$/Republic of the Congo/ ;
	4807	+ $country =~ s/Syrian Arab Republic/Syria/ ;
	4808	+ $country =~ s/Tanzania, United Republic of/Tanzania/ ;
	4809	+ $country =~ s/Libyan Arab Jamahiriya/Libya/ ;
	4810	+ $country =~ s/C..?te d'Ivoire/C\xC3\xB4te d'Ivoire/ ;
	4811	+ $country =~ s/Serbia/republic of serbia/ ;
	4812	+ $country =~ s/Lao People's Democratic Republic/Laos/ ;
	4813	+
	4814	+ # ($requests_svg,$ratio_svg,$fill_svg) = RatioAndFillColor ($code, $requests_per_connected_person, $max_requests_per_connected_us, $ratio_sqrt) ;
	4815	+ ($requests_svg,$ratio_svg,$fill_svg) = RatioAndFillColor ($code, $requests_per_person, 3, $ratio_sqrt) ;
	4816	+ $ratio_svg = sprintf ("%.1f", $ratio_svg) ;
	4817	+ push @csv_countries, "\"$country\",$code,$requests_this_country,$population,$connected,$perc_connected,$requests_per_connected_person,$perc,$requests_svg,$ratio_svg,$fill_svg\n" ;
	4818	+
	4819	+ $requests_per_connected_persons {lc $code} = $requests_per_connected_person ;
	4820	+ $requests_per_persons {lc $code} = $requests_per_person ;
	4821	+ $percentage_of_total_pageviews {lc $code} = $perc ;
	4822	+ }
	4823	+ &WriteWorldMapSvg ("$period-5", $description) ;
	4824	+
	4825	+ $requests_per_person_tot = '..' ;
	4826	+
	4827	+ if ($population_tot > 0)
	4828	+ { $requests_per_person_tot = sprintf ("%.1f", $requests_tot / $population_tot) ; }
	4829	+
	4830	+ if ($connected_tot > 0)
	4831	+ { $requests_per_connected_person_tot = sprintf ("%.1f", $requests_tot / $connected_tot) ; }
	4832	+
	4833	+ $perc_connected_tot = ".." ;
	4834	+ if ($population_tot > 0)
	4835	+ { $perc_connected_tot = sprintf ("%.1f", 100 * $connected_tot / $population_tot) .'%' ; }
	4836	+
	4837	+ push @csv_countries, "world,*,$requests_tot,$population_tot,$connected_tot,$perc_connected_tot,$requests_per_connected_person_tot,100%\n" ;
	4838	+ print "$period $requests_tot\n" ;
	4839	+
	4840	+ $file_csv_per_country_overview2 = $file_csv_per_country_overview ;
	4841	+ $file_csv_per_country_overview2 =~ s/\.csv/-$postfix.csv/ ;
	4842	+ &PrintCsv ($header_csv_countries . join ('', sort @csv_countries), "$path_out/svg/$file_csv_per_country_overview2") ;
	4843	+
	4844	+# $perc_tot = 0 ;
	4845	+# foreach $code (keys_sorted_by_value_num_desc %requests_per_connected_persons)
	4846	+# {
	4847	+# $perc = $percentage_of_total_pageviews {$code} ;
	4848	+# $requests = $requests_per_connected_persons {$code} ;
	4849	+# $perc =~ s/\%// ;
	4850	+# $perc_tot += $perc ;
	4851	+# print "$code $requests $perc $perc_tot\n" ;
	4852	+# if ($perc_tot > 30)
	4853	+# {
	4854	+# $requests_max = $requests ;
	4855	+# print "Max requests = $requests_max\n " ;
	4856	+# last ;
	4857	+# }
	4858	+# }
	4859	+
	4860	+# for svg with prefined styles (InkScape only ?)
	4861	+# foreach $code (keys %requests_per_connected_persons)
	4862	+# {
	4863	+# $requests = $requests_per_connected_persons {$code} ;
	4864	+# if ($requests > $max_requests_per_connected_us)
	4865	+# { $requests = $max_requests_per_connected_us ; }
	4866	+# $svg_groups {$requests} .= "." . lc ($code) . ", " ;
	4867	+# }
	4868	+
	4869	+#foreach $code (keys %requests_per_connected_persons)
	4870	+# {
	4871	+# $requests = $requests_per_connected_persons {$code} ;
	4872	+# if ($requests > $max_requests_per_connected_us)
	4873	+# { $requests = $max_requests_per_connected_us ; }
	4874	+
	4875	+# $ratio = sqrt ($requests / $max_requests_per_connected_us) ;
	4876	+# if ($ratio >= 0.20)
	4877	+# {
	4878	+# $green = 180 ;
	4879	+# $red = 180 - int (0.5 + 180 * 5/4 * ($ratio-0.20)) ;
	4880	+# $blue = int ($green / 3) ;
	4881	+# }
	4882	+# else
	4883	+# {
	4884	+# $red = 220 ;
	4885	+# $green = int (0.5 + 220 * 5 * $ratio) ;
	4886	+# $blue = 0 ; #int ($green / 2) ;
	4887	+# }
	4888	+# $fill = "\#" . sprintf ("%02x%02x%02x",$red,$green,$blue) ;
	4889	+# $fill = lc hsv2rgb($ratio*120,1,1) ;
	4890	+
	4891	+# $fills {$code} = $fill ;
	4892	+# }
	4893	+}
	4894	+
	4895	+sub WriteWorldMapSvg
	4896	+{
	4897	+ ($period, $description) = @_ ;
	4898	+
	4899	+ open SVG_IN, "world_map_blank_plain2.svg" ;
	4900	+# open SVG_IN, "BlankMap-World6,_compact with text box.svg" ;
	4901	+ @lines = <SVG_IN> ;
	4902	+ close SVG_IN ;
	4903	+
	4904	+# foreach $line (@lines)
	4905	+# { $line =~ s/COUNTRY_STYLES/$svg_text/ ; }
	4906	+
	4907	+ ($text1,$text2) = split ' - ', $description ;
	4908	+ print "Animation description: $description -> $text1 \| $text2\n" ;
	4909	+
	4910	+ $lines = join '', @lines ;
	4911	+ $lines =~ s/<circle[^>]*?>//gs ;
	4912	+ $lines =~ s/Yyyy/$text2/ ;
	4913	+ $lines =~ s/Xxxx/$text1/ ;
	4914	+# $lines =~ s/Zzzz/Wikipedia views per internet user/ ;
	4915	+ $lines =~ s/Zzzz/$desc_animation/ ;
	4916	+
	4917	+ $linenum = 0 ;
	4918	+ @lines = split '<g', $lines ;
	4919	+ foreach $line (@lines)
	4920	+ {
	4921	+ @lines2 = split '<path', $line ;
	4922	+
	4923	+ ($code = $lines2 [0]) =~ s/^.?id=\"(\w+)\".$/$1/s ;
	4924	+ $code = substr ($code,0,2) ;
	4925	+
	4926	+ if (defined $fills {$code})
	4927	+ {
	4928	+ $fill = $fills {$code} ;
	4929	+ $lines2 [0] =~ s/(id="$code[x-]?")(?:\s\n\sstyle="[^"]*")?/$1\n style="fill:$fill;fill-opacity:1;stroke:#000000;stroke-width:2.5"/s ;
	4930	+ }
	4931	+ $linenum = 0 ;
	4932	+ foreach $line2 (@lines2)
	4933	+ {
	4934	+ ($code = $line2) =~ s/^.?id=\"(\w+)\".$/$1/s ;
	4935	+ $code = substr ($code,0,2) ;
	4936	+
	4937	+ next if ! defined $fills {$code} ;
	4938	+ $fill = $fills {$code} ;
	4939	+
	4940	+ # $trace_svg = $false ;
	4941	+ # if (($code eq 'ne') && ($line2 =~ /id=\"$code/i))
	4942	+ # { $trace_svg = $true ; }
	4943	+ # print "A " . $line2 . "\n\n" if $trace_svg ;
	4944	+
	4945	+ next if $linenum ++ == 0 ;
	4946	+ $line2 =~ s/style="[^"]*"/style="fill:$fill;fill-opacity:1;stroke:#000000;stroke-width:2.5"/s;
	4947	+
	4948	+ # print "B " . $line2 [0] . "\n\n" if $trace_svg ;
	4949	+ }
	4950	+ $line = join '<path', @lines2 ;
	4951	+ }
	4952	+ $lines = join '<g', @lines ;
	4953	+
	4954	+ @lines = split '<path', $lines ;
	4955	+ foreach $line (@lines)
	4956	+ {
	4957	+ ($code = $line) =~ s/^.?id=\"([\w-]+)\".$/$1/s ;
	4958	+ next if ! defined $requests_per_persons {$code} ;
	4959	+ # print "A $line\n" ;
	4960	+ $fill = $fills {$code} ;
	4961	+ $line =~ s/(id="$code[x-]?")\s\n\sstyle="fill:#b9b9b9[^"]*"/$1\n style="fill:$fill;fill-opacity:1;stroke:#000000;stroke-width:2.5"/sg ;
	4962	+ # print "B $line\n" ;
	4963	+ }
	4964	+ $lines = join '<path', @lines ;
	4965	+
	4966	+ # if (! defined $fills {$code}) { if ($code =~ /^.{2,3}$/) { print uc($code) . ",\"CODE NOT FOUND\"\n" ; } }
	4967	+
	4968	+ $lines =~ s/fill:#b9b9b9;stroke:#ffffff;stroke-width:[\d\.]*/fill:#606060;stroke:#000000;stroke-width:2.5/g ;
	4969	+
	4970	+ @lines = split ("\n", $lines) ;
	4971	+ open SVG_OUT, '>', "svg/world_map_$period.svg" ;
	4972	+ foreach $line (@lines)
	4973	+ {
	4974	+ chomp $line ;
	4975	+ print SVG_OUT "$line\n" ;
	4976	+ }
	4977	+ close SVG_OUT ;
	4978	+
	4979	+ print "Convert world_map_$period.svg to png\n" ;
	4980	+ `svg/convert.exe svg/world_map_$period.svg png:svg/world_map_$period.png` ;
	4981	+# print "Convert world_map_$period.svg to jpg\n" ;
	4982	+# `svg/convert.exe svg/world_map_$period.svg jpg:svg/world_map_$period.jpg` ;
	4983	+# print "Convert world_map_$period.svg to gif\n" ;
	4984	+# `svg/convert.exe svg/world_map_$period.svg gif:svg/world_map_$period.gif` ;
	4985	+
	4986	+# exit ; # qqq
	4987	+# exit ;
	4988	+# sleep (2) ; # until computer fan fixed
	4989	+}
	4990	+
	4991	+sub RatioAndFillColor
	4992	+{
	4993	+ my ($code, $requests,$requests_max, $ratio_sqrt) = @_ ;
	4994	+ my ($ratio,$green,$red,$blue,$fill) ;
	4995	+
	4996	+ if ($requests > $requests_max)
	4997	+ { $requests = $requests_max ; }
	4998	+
	4999	+ $ratio = $requests / $requests_max ;
	5000	+
	5001	+ if ($ratio_sqrt && ($ratio > 0))
	5002	+ { $ratio = sqrt ($ratio) ; }
	5003	+
	5004	+# if ($ratio >= 0.20)
	5005	+# {
	5006	+# $green = 180 ;
	5007	+# $red = 180 - int (0.5 + 180 * 5/4 * ($ratio-0.20)) ;
	5008	+# $blue = int ($green / 3) ;
	5009	+# }
	5010	+# else
	5011	+# {
	5012	+# $red = 220 ;
	5013	+# $green = int (0.5 + 220 * 5 * $ratio) ;
	5014	+# $blue = 0 ; #int ($green / 2) ;
	5015	+# }
	5016	+
	5017	+# $fill = "\#" . sprintf ("%02x%02x%02x",$red,$green,$blue) ;
	5018	+# $fill = lc hsv2rgb($ratio150,0.67+$ratio0.33,0.8-0.2*$ratio) ;
	5019	+ $fill = lc hsv2rgb($ratio*120,1,1) ;
	5020	+
	5021	+ $fills {lc $code} = $fill ;
	5022	+ return ($requests,$ratio,$fill) ;
	5023	+}
	5024	+
	5025	+sub RatioAndFillColor2
	5026	+{
	5027	+ my ($code, $requests,$requests_max, $ratio_sqrt) = @_ ;
	5028	+ my ($ratio,$green,$red,$blue,$fill,$value) ;
	5029	+
	5030	+ if ($requests > $requests_max)
	5031	+ { $requests = $requests_max ; }
	5032	+
	5033	+ $ratio = $requests / $requests_max ;
	5034	+
	5035	+# if ($ratio_sqrt && ($ratio > 0))
	5036	+# { $ratio = sqrt ($ratio) ; }
	5037	+
	5038	+ if ($ratio >= 0.5)
	5039	+ {
	5040	+ $value = $ratio * 2 - 1 ; # 0.5 - 1 -> 0 - 1
	5041	+ $fill = lc hsv2rgb(60+$value*60,0.5+$value/2,0.5+$value/2) ;
	5042	+ $fill = lc hsv2rgb(120,0+$value,0.5+$value/2) ;
	5043	+ }
	5044	+ else
	5045	+ {
	5046	+ $value = 1 - $ratio * 2 ; # 0 - 0.5 -> 1 - 0
	5047	+ $fill = lc hsv2rgb(60-$value*60,0.5+$value/2,0.5+$value/2) ;
	5048	+ $fill = lc hsv2rgb(0,0+$value,0.5+$value/2) ;
	5049	+ } # lc hsv2rgb($ratio150,0.67+$ratio0.33,0.8-0.2*$ratio) ; }
	5050	+# print "ratio $ratio: requests $requests max requests $requests_max $fill\n" ;
	5051	+
	5052	+ $fills {lc $code} = $fill ;
	5053	+ return ($requests,$ratio,$fill) ;
	5054	+}
	5055	+
	5056	+
	5057	+sub WriteReportPerCountryBreakdown
	5058	+{
	5059	+ print "\nWriteReportPerCountryBreakDown\n" ;
	5060	+
	5061	+ my ($title,$views_edits,$links,$cutoff_requests, $cutoff_percentage, $show_logcount) = @_ ;
	5062	+ my ($link_country,$population,$icon,$bar,$bars,$bar_width,$perc,$perc_tot,$perc_global,$requests_tot) ;
	5063	+ my ($requests_this_language, $requests_all_languages, $requests_used, $requests_other) ;
	5064	+ my @index_countries ;
	5065	+ my $views_edits_lc = lc $views_edits ;
	5066	+
	5067	+ if ($show_logcount)
	5068	+ { $report_version = "<p>This is the extended version of this report, with even small percentages included (> $cutoff_percentage\%) (see also bottom of page). " .
	5069	+ "Switch to <a href='$file_html_per_country_breakdown'>regular version</a>" ; }
	5070	+ else
	5071	+ { $report_version = "<p>This is the regular version of this report, with only major percentages (> $cutoff_percentage\%) included." .
	5072	+ " Switch to <a href='$file_html_per_country_breakdown_huge'>extended version</a>" ; }
	5073	+
	5074	+ $html = $header ;
	5075	+ $html =~ s/TITLE/$title/ ;
	5076	+ $html =~ s/HEADER/$title/ ;
	5077	+ $html =~ s/LINKS// ;
	5078	+ $html =~ s/ALSO/$links/ ;
	5079	+ $html =~ s/NOTES// ;
	5080	+ $html =~ s/X1000/. Period <b>$requests_recently_start - $requests_recently_stop<\/b><br>$report_version/ ;
	5081	+ $html =~ s/DATE// ;
	5082	+
	5083	+ $html .= "<p><table border=1 width=800>INDEX\n" ;
	5084	+
	5085	+ $html .= &HtmlWorldMaps ;
	5086	+
	5087	+ my $anomaly_found ;
	5088	+
	5089	+ foreach $country (keys_sorted_by_value_num_desc %requests_recently_per_country)
	5090	+ {
	5091	+ next if $requests_recently_per_country {$country} < $cutoff_requests ;
	5092	+
	5093	+ %requests_per_language = %{$requests_recently_per_country_per_language {$country}} ;
	5094	+ @languages = keys_sorted_by_value_num_desc %requests_per_language ;
	5095	+
	5096	+ $requests_this_country = $requests_recently_per_country {$country} ;
	5097	+
	5098	+ $perc = 'n.a.' ;
	5099	+ if ($requests_recently_all > 0)
	5100	+ { $perc = &Percentage ($requests_this_country / $requests_recently_all) ; }
	5101	+
	5102	+ ($link_country,$icon,$population) = &CountryMetaInfo ($country) ;
	5103	+
	5104	+ $html .= "<tr><th colspan=99 class=lh3><a id='$country' name='$country'></a><br>$icon $link_country <small>($perc share of global total)</small></th></tr>\n" ;
	5105	+
	5106	+ $perc_tot = 0;
	5107	+ $requests_used = 0 ;
	5108	+ for ($l = 0 ; $l < 50 ; $l++)
	5109	+ {
	5110	+ $requests_this_language = $requests_recently_per_country_per_language {$country} {$languages [$l]} ;
	5111	+ $requests_all_languages = $requests_recently_per_country {$country} ;
	5112	+
	5113	+ last if $requests_this_language == 0 ;
	5114	+
	5115	+ $requests_used += $requests_this_language ;
	5116	+
	5117	+ $perc = 0 ;
	5118	+ if ($requests_recently_all > 0)
	5119	+ {
	5120	+ $perc = &Percentage ($requests_this_language / $requests_all_languages) ;
	5121	+
	5122	+ last if $perc < $cutoff_percentage ;
	5123	+
	5124	+ $perc_tot += $perc ;
	5125	+ }
	5126	+
	5127	+ $language = $languages [$l] ;
	5128	+ if ($out_languages {$language} ne "")
	5129	+ { $language = $out_languages {$language} ; }
	5130	+ if (length ($language) > 20)
	5131	+ { $language =~ s/ .*$// ; }
	5132	+ $bar_width = int ($perc * 6) ;
	5133	+
	5134	+ if (($country eq "Australia") && ($language eq "Japanese") && ($perc > 5))
	5135	+ { $language .= " <b><a href='#anomaly' onclick='alert(\"Probably incorrectly assigned to this country.\\nOutdated Regional Internet Registry (RIR) administration may have caused this.\")';><font color='#FF0000'>(*)</font></a></b>" ; $anomaly_found = $true ;}
	5136	+
	5137	+ $bar_100 = "" ;
	5138	+ if ($bars++ == 0)
	5139	+ {
	5140	+ $bar_width_100 = 600 - $bar_width ;
	5141	+ $bar_100 = "<img src='background.gif' width=$bar_width_100 height=15>" ;
	5142	+ }
	5143	+
	5144	+ if ($language !~ /Portal/)
	5145	+ { $language .= " Wp" ; }
	5146	+
	5147	+ $perc =~ s/(\.\d)0/$1/ ; # 0.10% -> 0.1%
	5148	+ if ($show_logcount && ($requests_this_language < 5 * $months_recently)) # show in grey to discuss threshold on foundation-l
	5149	+ { $perc = "<font color=#800000>$perc</font" ; }
	5150	+
	5151	+ $html .= "<tr><th class=l class=small nowrap>$language</th>" .
	5152	+ ($show_logcount ? "<td class=r>$requests_this_language</td>" : "") .
	5153	+ "<td class=c>$perc</td>" .
	5154	+ "<td class=l><img src='yellowbar_hor.gif' width=$bar_width height=15>$bar_100</td></tr>\n" ;
	5155	+ }
	5156	+
	5157	+ if ($perc_tot > 100) { $perc_tot = 100 ; }
	5158	+ $requests_other = $requests_all_languages - $requests_used ;
	5159	+ $perc_other = sprintf '%.1f', 100 - $perc_tot ;
	5160	+ if (($requests_other > 0) && ($perc_other > 0))
	5161	+ {
	5162	+ $bar_width = $perc_other * 6 ;
	5163	+ $html .= "<tr><th class=l class=small nowrap>Other</th>" .
	5164	+ ($show_logcount ? "<td class=r>$requests_other</td>" : "") .
	5165	+ "<td class=c>$perc_other%</td>" .
	5166	+ "<td class=l><img src='yellowbar_hor.gif' width=$bar_width height=15></td></tr>\n" ;
	5167	+ }
	5168	+
	5169	+ if ($verbose)
	5170	+ { push @index_countries, "<a href='#$country'>$country ($perc)</a> " ; }
	5171	+ else
	5172	+ { push @index_countries, "<a href='#$country'>$country</a> " ; }
	5173	+
	5174	+ # print "\n" ;
	5175	+ # $html .= "<tr><td colspan=99> </td></tr>\n" ;
	5176	+ }
	5177	+ $html .= "</table>" ;
	5178	+ $html .= "<p><b>Share<\/b> is the percentage of requesting ip addresses (out of the global total) which originated from this country" .
	5179	+ "<br> Further percentages show per country share of $views_edits_lc per Wikipedia visited" ;
	5180	+ $html .= "<p><b>Countries</b> are only included if the number of requests in the period exceeds $cutoff_requests,000 ($cutoff_requests matching records in 1:1000 sampled log)" ;
	5181	+ $html .= "<p><b>Wikipedia's</b> are only listed for some country if the share of visitors for that particular country exceeds $cutoff_percentage\%." ;
	5182	+ if ($show_logcount)
	5183	+ {
	5184	+ $html .= "<p>The second column displays the actual <b>numbers of records</b> found in the 1:1000 sampled log on which the percentage is based." .
	5185	+ "<br>Multiply by 1000 for actual $views_edits_lc over the whole period of $months_recently months." ;
	5186	+ $html .= "<br>If the number of records in the sampled log does not reach the (arbitrary) number of 5 per sampled month, the percentage is flagged dark red to extra emphasize high inaccuracy." ;
	5187	+ }
	5188	+
	5189	+ $html .= "<p>Page requests by bots are not included. Also all ip addresses that occur more than once on a given day are discarded for that day." ;
	5190	+ $html .= "<br> A few false negatives are taken for granted. " .
	5191	+ "Country meta data collected from <a href='http://en.wikipedia.org/wiki/List_of_countries_by_population'>English Wikipedia</a>. " .
	5192	+ "Portal = <a href='http://www.wikipedia.org'>www.wikipedia.org</a>" ;
	5193	+# if ($anomaly_found)
	5194	+# { $html .= "<p><a id='anomaly' name='anomaly'>Probably anomaly caused by outdated <a href='http://en.wikipedia.org/wiki/Regional_Internet_Registry'>Regional Internet Registry</a> administration.\n" ; }
	5195	+
	5196	+ $html .= $colophon ;
	5197	+
	5198	+ $index = &HtmlIndex (join '/ ', sort (@index_countries)) ;
	5199	+ $html =~ s/INDEX/$index/ ;
	5200	+
	5201	+ if (! $show_logcount)
	5202	+ { &PrintHtml ($html, "$path_out/$file_html_per_country_breakdown") ; }
	5203	+ else
	5204	+ { &PrintHtml ($html, "$path_out/$file_html_per_country_breakdown_huge") ; }
	5205	+}
	5206	+
	5207	+sub WriteReportPerCountryTrends
	5208	+{
	5209	+ print "\nWriteReportPerCountryTrends\n" ;
	5210	+
	5211	+ my ($title,$views_edits,$links) = @_ ;
	5212	+ my ($link_country,$population,$icon,$bar,$bars,$bar_width,$perc,$perc_tot,$perc_global,$requests_tot) ;
	5213	+ my @index_languages ;
	5214	+ my $views_edits_lc = lc $views_edits ;
	5215	+
	5216	+ $html = $header ;
	5217	+ $html =~ s/TITLE/$title/ ;
	5218	+ $html =~ s/HEADER/$title/ ;
	5219	+ $html =~ s/LINKS// ;
	5220	+ $html =~ s/ALSO/$links/ ;
	5221	+ $html =~ s/NOTES// ;
	5222	+ $html =~ s/X1000/. Period <b>$requests_start - $requests_stop<\/b>/ ;
	5223	+ $html =~ s/DATE// ;
	5224	+
	5225	+ $html .= "<p><table border=1 width=800>INDEX\n" ;
	5226	+
	5227	+ $html .= &HtmlWorldMaps ;
	5228	+
	5229	+ foreach $country (keys_sorted_by_value_num_desc %requests_per_country)
	5230	+ {
	5231	+ next if $requests_per_country {$country} < 50 * ($#quarters + 1) ;
	5232	+
	5233	+ %requests_per_language = %{$requests_per_country_per_language {$country}} ;
	5234	+ @languages = keys_sorted_by_value_num_desc %requests_per_language ;
	5235	+
	5236	+ ($link_country,$icon,$population) = &CountryMetaInfo ($country) ;
	5237	+
	5238	+ $html .= "<tr><th colspan=99 class=lh3><a id='$country' name='$country'></a><br>$icon $link_country</th></tr>\n" ;
	5239	+
	5240	+ if ($views_edits eq 'Page Edits')
	5241	+ { $rowspan = $#quarters+2 ; }
	5242	+ else
	5243	+ { $rowspan = $#quarters+3 ; }
	5244	+
	5245	+ $html .= "<tr><th class=small>Quarter</th>[<th class=small>Total</th>]<th class=small>Share</th><th rowspan=$rowspan> </th>\n" ;
	5246	+ for ($l = 0 ; $l < 10 ; $l++)
	5247	+ {
	5248	+ $language = $languages [$l] ;
	5249	+ if ($out_languages {$language} ne "")
	5250	+ { $language = $out_languages {$language} ; }
	5251	+ if (length ($language) > 20)
	5252	+ { $language =~ s/ .*$// ; }
	5253	+ $html .= "<th class=c class=small>$language</th>\n" ;
	5254	+ # print " [$language] " ;
	5255	+ }
	5256	+ $html .= "<th>other</th>\n" ;
	5257	+ $html .= "</tr>\n" ;
	5258	+ # print "\n" ;
	5259	+
	5260	+ $lines = 0 ;
	5261	+ foreach $quarter (reverse @quarters)
	5262	+ {
	5263	+ next if $views_edits eq 'Page Edits' and $quarter =~ /2009.*?Q3/ ; # strange results, to be researched
	5264	+
	5265	+ $line1 = "<tr>\n" ;
	5266	+ $line2 = "<tr>\n" ;
	5267	+
	5268	+ my $requests_this_country = $requests_per_quarter_per_country {$quarter} {$country} ;
	5269	+ my $requests_all_countries = $requests_per_quarter {$quarter} ;
	5270	+
	5271	+ $perc = 'n.a.' ;
	5272	+ if ($requests_all_countries > 0)
	5273	+ {
	5274	+ $perc = &Percentage ($requests_this_country / $requests_all_countries) ;
	5275	+ # print "$quarter: " . sprintf ("%9d", $requests_this_country) . " = $perc\% $country\n" ;
	5276	+ $line1 .= "<th class=c nowrap> $quarter </th>[<td align=right>$requests_this_country</td>]<td align=center>$perc</td>" ;
	5277	+ $line2 .= "<th nowrap> $quarter </th>[<td align=right>$requests_this_country</td>]<td align=center>$perc</td>" ;
	5278	+ }
	5279	+
	5280	+ $perc_tot = 0;
	5281	+ for ($l = 0 ; $l < 10 ; $l++)
	5282	+ {
	5283	+ my $requests_this_language = $requests_per_quarter_per_country_per_language {$quarter} {$country} {$languages [$l]} ;
	5284	+ my $requests_all_languages = $requests_per_quarter_per_country {$quarter} {$country} ;
	5285	+ $perc = 0 ;
	5286	+ if ($requests_all_languages > 0)
	5287	+ {
	5288	+ $perc = &Percentage ($requests_this_language / $requests_all_languages) ;
	5289	+ $perc_tot += $perc ;
	5290	+ }
	5291	+ # print "[" . sprintf ("%9d", $requests_this_language) . " = $perc\%]" ;
	5292	+ if ($perc != 0)
	5293	+ { $line2 .= "<td class=c><img src='yellowbar_hor.gif' width=$perc height=15></td>" ; }
	5294	+ else
	5295	+ { $line2 .= "<td class=l> </td>" ; }
	5296	+
	5297	+ if (($country eq "Australia") && (($perc < 50) && ($perc > 5)))
	5298	+ { $perc .= " <b><a href='#anomaly' onclick='alert(\"Probably incorrectly assigned to this country.\\nOutdated Regional Internet Registry (RIR) administration may have caused this.\")';><font color='#FF0000'>(*)</font></a></b>" ; $anomaly_found = $true ;}
	5299	+ $line1 .= "<td class=c>[$requests_this_language]$perc</td>" ;
	5300	+ }
	5301	+ if ($perc_tot > 100) { $perc_tot = 100 ; }
	5302	+ $perc_other = sprintf '%.1f', 100 - $perc_tot ;
	5303	+ $line1 .= "<td class=c>$perc_other%</td>" ;
	5304	+
	5305	+ $line1 .= "</tr>\n" ;
	5306	+ $line2 .= "</tr>\n" ;
	5307	+ $html .= $line1 ;
	5308	+ if ($lines++ == $#quarters)
	5309	+ { $html .= $line2 ; } # only for last quarter
	5310	+ }
	5311	+
	5312	+ if ($verbose)
	5313	+ { push @index_countries, "<a href='#$country'>$country ($perc)</a> " ; }
	5314	+ else
	5315	+ { push @index_countries, "<a href='#$country'>$country</a> " ; }
	5316	+
	5317	+ # print "\n" ;
	5318	+ # $html .= "<tr><td colspan=99> </td></tr>\n" ;
	5319	+ }
	5320	+ $html .= "</table>" ;
	5321	+ $html .= "<p><b>Share<\/b> is the percentage of requesting ip addresses (out of the global total) which originated from this country" .
	5322	+ "<br> Further percentages show per country per quarter share of $views_edits_lc per Wikipedia visited" ;
	5323	+ $html .= "<p>Countries are only included if the number of requests in the period exceeds 100,000 (100 matching records in 1:1000 sampled log)" ;
	5324	+ $html .= "<br>Page requests by bots are not included. Also all ip addresses that occur more than once on a given day are discarded for that day." ;
	5325	+ $html .= "<br> A few false negatives are taken for granted. " .
	5326	+ "Country meta data collected from <a href='http://en.wikipedia.org/wiki/List_of_countries_by_population'>English Wikipedia</a>. " .
	5327	+ "Portal = <a href='http://www.wikipedia.org'>www.wikipedia.org</a>" ;
	5328	+ $html .= $colophon ;
	5329	+
	5330	+ $index = &HtmlIndex (join '/ ', sort (@index_countries)) ;
	5331	+ $html =~ s/INDEX/$index/ ;
	5332	+
	5333	+ &PrintHtml ($html, "$path_out/$file_html_per_country_trends") ;
	5334	+}
	5335	+
	5336	+sub CorrectForMissingDays
	5337	+{
	5338	+ my ($period, $count, $code, $var) = @_ ;
	5339	+
	5340	+ if ($missing_days {$period} > 0)
	5341	+ {
	5342	+ my $count_prev = $count ;
	5343	+ $count = int (0.5 + $count * $correct_for_missing_days {$period}) ;
	5344	+ if ($code =~ /us/i)
	5345	+ { print "\nperiod $period: correct for ${missing_days {$period}} missing days = * ${correct_for_missing_days {$period}}, " .
	5346	+ " e.g. for $code: $var $count_prev -> $count\n\n" ; }
	5347	+ }
	5348	+ return ($count) ;
	5349	+}
	5350	+
	5351	+sub FormatCount
	5352	+{
	5353	+ my $count = shift ;
	5354	+ if ($count eq "")
	5355	+ { return (" ") ; }
	5356	+ if ($count < 1)
	5357	+ { return ("1") ; }
	5358	+ $count =~ s/^(\d{1,3})(\d\d\d)$/$1,$2/ ;
	5359	+ $count =~ s/^(\d{1,3})(\d\d\d)(\d\d\d)$/$1,$2,$3/ ;
	5360	+ $count =~ s/^(\d{1,3})(\d\d\d)(\d\d\d)(\d\d\d)$/$1,$2,$3,$4/ ;
	5361	+ return ($count) ;
	5362	+}
	5363	+
	5364	+sub SortMime
	5365	+{
	5366	+ my $mime = shift ;
	5367	+ if ($mime eq "text/html")
	5368	+ { return (2000000000 + $mimetypes {$mime}) ; }
	5369	+ elsif ($mime =~ /image\/(?:png\|jpeg\|gif)/)
	5370	+ { return (1000000000 + $mimetypes {$mime}) ; }
	5371	+ else
	5372	+ { return ($mimetypes {$mime}) ; }
	5373	+}
	5374	+
	5375	+sub ExpandAbbreviation
	5376	+{
	5377	+ my $text = shift ;
	5378	+ # reverse (more or less) abbreviations
	5379	+ $text =~ s/^[\@\*]// ;
	5380	+ $text =~ s/^xx:upload/upload: /;
	5381	+ $text =~ s/^wb:/wikibooks:/;
	5382	+ $text =~ s/^wk:/wiktionary:/;
	5383	+ $text =~ s/^wn:/wikinews:/;
	5384	+ $text =~ s/^wp:/wikipedia:/;
	5385	+ $text =~ s/^wq:/wikiquote:/;
	5386	+ $text =~ s/^ws:/wikisource:/;
	5387	+ $text =~ s/^wv:/wikiversity:/;
	5388	+ $text =~ s/^wx:/wikispecial:/;
	5389	+ $text =~ s/^mw:/wikispecial:/; # eg bugzilla
	5390	+ $text =~ s/:!mw/:mediawiki/;
	5391	+ $text =~ s/^wm:/wikimedia:/;
	5392	+ $text =~ s/:wm$/:wikimedia/;
	5393	+ $text =~ s/^wmf:/foundation:/;
	5394	+ $text =~ s/:www$/:portal/;
	5395	+# $text =~ s/^wikispecial:(.*)$/$1: /;
	5396	+ return ($text) ;
	5397	+}
	5398	+
	5399	+sub GetSecondaryDomain
	5400	+{
	5401	+ $pattern_url_post = "\\.(?:biz\|com\|info\|name\|net\|org\|pro\|aero\|asia\|cat\|coop\|edu\|gov\|int\|jobs\|mil\|mobi\|museum\|tel\|travel\|arpa\|[a-zA-Z0-9-]{2}\|(?:com?\|ne)\\.[a-zA-Z0-9-]{2})\$" ;
	5402	+
	5403	+ my $domain = shift ;
	5404	+ $domain =~ s/http:\/\/// ;
	5405	+ $domain =~ s/\/.*$// ;
	5406	+
	5407	+ if ($domain !~ /\./)
	5408	+ { return ($domain) ; }
	5409	+
	5410	+ $domain =~ s/$pattern_url_post// ;
	5411	+ $domain =~ s/^.*?\.([^\.]+)$/$1/ ;
	5412	+ return ($domain) ;
	5413	+}
	5414	+
	5415	+sub OpenLog
	5416	+{
	5417	+# only shrink log when same log file is appended daily, is no longer the case
	5418	+# $fileage = -M "$dir_reports/$file_log" ;
	5419	+# if ($fileage > 5)
	5420	+# {
	5421	+# open "FILE_LOG", "<", "$dir_reports/$file_log" \|\| abort ("Log file '$file_log' could not be opened.") ;
	5422	+# @log = <FILE_LOG> ;
	5423	+# close "FILE_LOG" ;
	5424	+# $lines = 0 ;
	5425	+# open "FILE_LOG", ">", "$dir_reports/$file_log" \|\| abort ("Log file '$file_log' could not be opened.") ;
	5426	+# foreach $line (@log)
	5427	+# {
	5428	+# if (++$lines >= $#log - 5000)
	5429	+# { print FILE_LOG $line ; }
	5430	+# }
	5431	+# close "FILE_LOG" ;
	5432	+# }
	5433	+# open "FILE_LOG", ">>", "$dir_reports/$file_log" \|\| abort ("Log file '$file_log' could not be opened.") ;
	5434	+ open "FILE_LOG", ">>", "$dir_reports/$file_log" \|\| abort ("Log file '$file_log' could not be opened.") ;
	5435	+ &Log ("\n\n===== Wikimedia Sampled Visitors Log Report / " . date_time_english (time) . " =====\n\n") ;
	5436	+}
	5437	+
	5438	+sub Normalize
	5439	+{
	5440	+ my $count = shift ;
	5441	+ $count *= $multiplier ;
	5442	+# if ($count < 1) { $count = 1 ; } -> do this at FormatCount
	5443	+ return (sprintf ("%.0f", $count)) ;
	5444	+}
	5445	+
	5446	+sub Log
	5447	+{
	5448	+ $msg = shift ;
	5449	+ print $msg ;
	5450	+ print FILE_LOG $msg ;
	5451	+}
	5452	+
	5453	+sub InitProjectNames
	5454	+{
	5455	+ # copied from WikiReports.pl
	5456	+
	5457	+ %wikipedias = (
	5458	+# mediawiki=>"http://wikimediafoundation.org Wikimedia",
	5459	+ nostalgia=>"http://nostalgia.wikipedia.org Nostalgia",
	5460	+ sources=>"http://wikisource.org Old Wikisource",
	5461	+ meta=>"http://meta.wikimedia.org Meta-Wiki",
	5462	+ beta=>"http://beta.wikiversity.org Beta",
	5463	+ species=>"http://species.wikipedia.org WikiSpecies",
	5464	+ commons=>"http://commons.wikimedia.org Commons",
	5465	+ foundation=>"http://wikimediafoundation.org Wikimedia Foundation",
	5466	+ sep11=>"http://sep11.wikipedia.org In Memoriam",
	5467	+ nlwikimedia=>"http://nl.wikimedia.org Wikimedia Nederland",
	5468	+ plwikimedia=>"http://pl.wikimedia.org Wikimedia Polska",
	5469	+ mediawiki=>"http://www.mediawiki.org MediaWiki",
	5470	+ dewikiversity=>"http://de.wikiversity.org Wikiversität",
	5471	+ frwikiversity=>"http://fr.wikiversity.org Wikiversität",
	5472	+ wikimania2005=>"http://wikimania2005.wikimedia.org Wikimania 2005",
	5473	+ wikimania2006=>"http://wikimania2006.wikimedia.org Wikimania 2006",
	5474	+ aa=>"http://aa.wikipedia.org Afar",
	5475	+ ab=>"http://ab.wikipedia.org Abkhazian",
	5476	+ ace=>"http://ace.wikipedia.org Acehnese",
	5477	+ af=>"http://af.wikipedia.org Afrikaans",
	5478	+ ak=>"http://ak.wikipedia.org Akan", # was Akana
	5479	+ als=>"http://als.wikipedia.org Alemannic", # was Elsatian
	5480	+ am=>"http://am.wikipedia.org Amharic",
	5481	+ an=>"http://an.wikipedia.org Aragonese",
	5482	+ ang=>"http://ang.wikipedia.org Anglo-Saxon",
	5483	+ ar=>"http://ar.wikipedia.org Arabic",
	5484	+ arc=>"http://arc.wikipedia.org Aramaic",
	5485	+ arz=>"http://arz.wikipedia.org Egyptian Arabic",
	5486	+ as=>"http://as.wikipedia.org Assamese",
	5487	+ ast=>"http://ast.wikipedia.org Asturian",
	5488	+ av=>"http://av.wikipedia.org Avar", # was Avienan
	5489	+ ay=>"http://ay.wikipedia.org Aymara",
	5490	+ az=>"http://az.wikipedia.org Azeri", # was Azerbaijani
	5491	+ ba=>"http://ba.wikipedia.org Bashkir",
	5492	+ bar=>"http://bar.wikipedia.org Bavarian",
	5493	+ bat_smg=>"http://bat-smg.wikipedia.org Samogitian",
	5494	+ "bat-smg"=>"http://bat-smg.wikipedia.org Samogitian",
	5495	+ bcl=>"http://bcl.wikipedia.org Central Bicolano",
	5496	+ be=>"http://be.wikipedia.org Belarusian",
	5497	+ "be-x-old"=>"http://be.wikipedia.org Belarusian (Tarashkevitsa)",
	5498	+ be_x_old=>"http://be.wikipedia.org Belarusian (Tarashkevitsa)",
	5499	+ bg=>"http://bg.wikipedia.org Bulgarian",
	5500	+ bh=>"http://bh.wikipedia.org Bihari",
	5501	+ bi=>"http://bi.wikipedia.org Bislama",
	5502	+ bm=>"http://bm.wikipedia.org Bambara",
	5503	+ bn=>"http://bn.wikipedia.org Bengali",
	5504	+ bo=>"http://bo.wikipedia.org Tibetan",
	5505	+ bpy=>"http://bpy.wikipedia.org Bishnupriya Manipuri",
	5506	+ br=>"http://br.wikipedia.org Breton",
	5507	+ bs=>"http://bs.wikipedia.org Bosnian",
	5508	+ bug=>"http://bug.wikipedia.org Buginese",
	5509	+ bxr=>"http://bxr.wikipedia.org Buryat",
	5510	+ ca=>"http://ca.wikipedia.org Catalan",
	5511	+ cbk_zam=>"http://cbk-zam.wikipedia.org Chavacano",
	5512	+ "cbk-zam"=>"http://cbk-zam.wikipedia.org Chavacano",
	5513	+ cdo=>"http://cdo.wikipedia.org Min Dong",
	5514	+ ce=>"http://ce.wikipedia.org Chechen",
	5515	+ ceb=>"http://ceb.wikipedia.org Cebuano",
	5516	+ ch=>"http://ch.wikipedia.org Chamorro", # was Chamoru
	5517	+ ckb=>"http://ckb.wikipedia.org Sorani",
	5518	+ cho=>"http://cho.wikipedia.org Choctaw", # was Chotaw
	5519	+ chr=>"http://chr.wikipedia.org Cherokee",
	5520	+ chy=>"http://chy.wikipedia.org Cheyenne", # was Setsêhestâhese
	5521	+ co=>"http://co.wikipedia.org Corsican",
	5522	+ cr=>"http://cr.wikipedia.org Cree",
	5523	+ crh=>"http://crh.wikipedia.org Crimean Tatar",
	5524	+ cs=>"http://cs.wikipedia.org Czech",
	5525	+ csb=>"http://csb.wikipedia.org Cashubian", # was Kashubian
	5526	+ cu=>"http://cv.wikipedia.org Old Church Slavonic",
	5527	+ cv=>"http://cv.wikipedia.org Chuvash", # was Cavas
	5528	+ cy=>"http://cy.wikipedia.org Welsh",
	5529	+ da=>"http://da.wikipedia.org Danish",
	5530	+ de=>"http://de.wikipedia.org German",
	5531	+ diq=>"http://diq.wikipedia.org Zazaki",
	5532	+ dk=>"http://dk.wikipedia.org Danish",
	5533	+ dsb=>"http://dsb.wikipedia.org Lower Sorbian",
	5534	+ dv=>"http://dv.wikipedia.org Divehi",
	5535	+ dz=>"http://dz.wikipedia.org Dzongkha",
	5536	+ ee=>"http://ee.wikipedia.org Ewe",
	5537	+ el=>"http://el.wikipedia.org Greek",
	5538	+ eml=>"http://eml.wikipedia.org Emilian-Romagnol",
	5539	+ en=>"http://en.wikipedia.org English",
	5540	+ eo=>"http://eo.wikipedia.org Esperanto",
	5541	+ es=>"http://es.wikipedia.org Spanish",
	5542	+ et=>"http://et.wikipedia.org Estonian",
	5543	+ eu=>"http://eu.wikipedia.org Basque",
	5544	+ ext=>"http://ext.wikipedia.org Extremaduran",
	5545	+ fa=>"http://fa.wikipedia.org Persian",
	5546	+ ff=>"http://ff.wikipedia.org Fulfulde",
	5547	+ fi=>"http://fi.wikipedia.org Finnish",
	5548	+ "fiu-vro"=>"http://fiu-vro.wikipedia.org Voro",
	5549	+ fiu_vro=>"http://fiu-vro.wikipedia.org Voro",
	5550	+ fj=>"http://fj.wikipedia.org Fijian",
	5551	+ fo=>"http://fo.wikipedia.org Faroese", # was Faeroese
	5552	+ fr=>"http://fr.wikipedia.org French",
	5553	+ frp=>"http://frp.wikipedia.org Arpitan",
	5554	+ fur=>"http://fur.wikipedia.org Friulian",
	5555	+ fy=>"http://fy.wikipedia.org Frisian",
	5556	+ ga=>"http://ga.wikipedia.org Irish",
	5557	+ gan=>"http://gan.wikipedia.org Gan",
	5558	+ gay=>"http://gay.wikipedia.org Gayo",
	5559	+ gd=>"http://gd.wikipedia.org Scots Gaelic", # was Scottish Gaelic
	5560	+ gl=>"http://gl.wikipedia.org Galician", # was Galego
	5561	+ glk=>"http://glk.wikipedia.org Gilaki",
	5562	+ gn=>"http://gn.wikipedia.org Guarani",
	5563	+ got=>"http://got.wikipedia.org Gothic",
	5564	+ gu=>"http://gu.wikipedia.org Gujarati",
	5565	+ gv=>"http://gv.wikipedia.org Manx", # was Manx Gaelic
	5566	+ ha=>"http://ha.wikipedia.org Hausa",
	5567	+ hak=>"http://hak.wikipedia.org Hakka",
	5568	+ haw=>"http://haw.wikipedia.org Hawai'ian", # was Hawaiian
	5569	+ he=>"http://he.wikipedia.org Hebrew",
	5570	+ hi=>"http://hi.wikipedia.org Hindi",
	5571	+ hif=>"http://hif.wikipedia.org Fiji Hindi",
	5572	+ ho=>"http://ho.wikipedia.org Hiri Motu",
	5573	+ hr=>"http://hr.wikipedia.org Croatian",
	5574	+ hsb=>"http://hsb.wikipedia.org Upper Sorbian",
	5575	+ ht=>"http://ht.wikipedia.org Haitian",
	5576	+ hu=>"http://hu.wikipedia.org Hungarian",
	5577	+ hy=>"http://hy.wikipedia.org Armenian",
	5578	+ hz=>"http://hz.wikipedia.org Herero",
	5579	+ ia=>"http://ia.wikipedia.org Interlingua",
	5580	+ iba=>"http://iba.wikipedia.org Iban",
	5581	+ id=>"http://id.wikipedia.org Indonesian",
	5582	+ ie=>"http://ie.wikipedia.org Interlingue",
	5583	+ ig=>"http://ig.wikipedia.org Igbo",
	5584	+ ii=>"http://ii.wikipedia.org Yi",
	5585	+ ik=>"http://ik.wikipedia.org Inupiak",
	5586	+ ilo=>"http://ilo.wikipedia.org Ilokano",
	5587	+ io=>"http://io.wikipedia.org Ido",
	5588	+ is=>"http://is.wikipedia.org Icelandic",
	5589	+ it=>"http://it.wikipedia.org Italian",
	5590	+ iu=>"http://iu.wikipedia.org Inuktitut",
	5591	+ ja=>"http://ja.wikipedia.org Japanese",
	5592	+ jbo=>"http://jbo.wikipedia.org Lojban",
	5593	+ jv=>"http://jv.wikipedia.org Javanese",
	5594	+ ka=>"http://ka.wikipedia.org Georgian",
	5595	+ kaa=>"http://kaa.wikipedia.org Karakalpak",
	5596	+ kab=>"http://ka.wikipedia.org Kabyle",
	5597	+ kaw=>"http://kaw.wikipedia.org Kawi",
	5598	+ kg=>"http://kg.wikipedia.org Kongo",
	5599	+ ki=>"http://ki.wikipedia.org Kikuyu",
	5600	+ kj=>"http://kj.wikipedia.org Kuanyama", # was Otjiwambo
	5601	+ kk=>"http://kk.wikipedia.org Kazakh",
	5602	+ kl=>"http://kl.wikipedia.org Greenlandic",
	5603	+ km=>"http://km.wikipedia.org Khmer", # was Cambodian
	5604	+ kn=>"http://kn.wikipedia.org Kannada",
	5605	+ ko=>"http://ko.wikipedia.org Korean",
	5606	+ kr=>"http://kr.wikipedia.org Kanuri",
	5607	+ ks=>"http://ks.wikipedia.org Kashmiri",
	5608	+ ksh=>"http://ksh.wikipedia.org Ripuarian",
	5609	+ ku=>"http://ku.wikipedia.org Kurdish",
	5610	+ kv=>"http://kv.wikipedia.org Komi",
	5611	+ kw=>"http://kw.wikipedia.org Cornish", # was Kornish
	5612	+ ky=>"http://ky.wikipedia.org Kirghiz",
	5613	+ la=>"http://la.wikipedia.org Latin",
	5614	+ lad=>"http://lad.wikipedia.org Ladino",
	5615	+ lb=>"http://lb.wikipedia.org Luxembourgish", # was Letzeburgesch
	5616	+ lbe=>"http://lbe.wikipedia.org Lak",
	5617	+ lg=>"http://lg.wikipedia.org Ganda",
	5618	+ li=>"http://li.wikipedia.org Limburgish",
	5619	+ lij=>"http://lij.wikipedia.org Ligurian",
	5620	+ lmo=>"http://lmo.wikipedia.org Lombard",
	5621	+ ln=>"http://ln.wikipedia.org Lingala",
	5622	+ lo=>"http://lo.wikipedia.org Laotian",
	5623	+ ls=>"http://ls.wikipedia.org Latino Sine Flexione",
	5624	+ lt=>"http://lt.wikipedia.org Lithuanian",
	5625	+ lv=>"http://lv.wikipedia.org Latvian",
	5626	+ mad=>"http://mad.wikipedia.org Madurese",
	5627	+ mak=>"http://mak.wikipedia.org Makasar",
	5628	+ map_bms=>"http://map-bms.wikipedia.org Banyumasan",
	5629	+ "map-bms"=>"http://map-bms.wikipedia.org Banyumasan",
	5630	+ mdf=>"http://mdf.wikipedia.org Moksha",
	5631	+ mg=>"http://mg.wikipedia.org Malagasy",
	5632	+ mh=>"http://mh.wikipedia.org Marshallese",
	5633	+ mhr=>"http://mhr.wikipedia.org Eastern Mari",
	5634	+ mi=>"http://mi.wikipedia.org Maori",
	5635	+ min=>"http://min.wikipedia.org Minangkabau",
	5636	+ minnan=>"http://minnan.wikipedia.org Minnan",
	5637	+ mk=>"http://mk.wikipedia.org Macedonian",
	5638	+ ml=>"http://ml.wikipedia.org Malayalam",
	5639	+ mn=>"http://mn.wikipedia.org Mongolian",
	5640	+ mo=>"http://mo.wikipedia.org Moldavian",
	5641	+ mr=>"http://mr.wikipedia.org Marathi",
	5642	+ ms=>"http://ms.wikipedia.org Malay",
	5643	+ mt=>"http://mt.wikipedia.org Maltese",
	5644	+ mus=>"http://mus.wikipedia.org Muskogee",
	5645	+ mwl=>"http://mwl.wikipedia.org Mirandese",
	5646	+ my=>"http://my.wikipedia.org Burmese",
	5647	+ myv=>"http://myv.wikipedia.org Erzya",
	5648	+ mzn=>"http://mzn.wikipedia.org Mazandarani",
	5649	+ na=>"http://na.wikipedia.org Nauruan", # was Nauru
	5650	+ nah=>"http://nah.wikipedia.org Nahuatl",
	5651	+ nap=>"http://nap.wikipedia.org Neapolitan",
	5652	+ nds=>"http://nds.wikipedia.org Low Saxon",
	5653	+ nds_nl=>"http://nds-nl.wikipedia.org Dutch Low Saxon",
	5654	+ "nds-nl"=>"http://nds-nl.wikipedia.org Dutch Low Saxon",
	5655	+ ne=>"http://ne.wikipedia.org Nepali",
	5656	+ new=>"http://new.wikipedia.org Nepal Bhasa",
	5657	+ ng=>"http://ng.wikipedia.org Ndonga",
	5658	+ nl=>"http://nl.wikipedia.org Dutch",
	5659	+ nov=>"http://nov.wikipedia.org Novial",
	5660	+ nrm=>"http://nrm.wikipedia.org Norman",
	5661	+ nn=>"http://nn.wikipedia.org Nynorsk", # was Neo-Norwegian
	5662	+ no=>"http://no.wikipedia.org Norwegian",
	5663	+ nv=>"http://nv.wikipedia.org Navajo", # was Avayo
	5664	+ ny=>"http://ny.wikipedia.org Chichewa",
	5665	+ oc=>"http://oc.wikipedia.org Occitan",
	5666	+ om=>"http://om.wikipedia.org Oromo",
	5667	+ or=>"http://or.wikipedia.org Oriya",
	5668	+ os=>"http://os.wikipedia.org Ossetic",
	5669	+ pa=>"http://pa.wikipedia.org Punjabi",
	5670	+ pag=>"http://pag.wikipedia.org Pangasinan",
	5671	+ pam=>"http://pam.wikipedia.org Kapampangan",
	5672	+ pap=>"http://pap.wikipedia.org Papiamentu",
	5673	+ pdc=>"http://pdc.wikipedia.org Pennsylvania German",
	5674	+ pi=>"http://pi.wikipedia.org Pali",
	5675	+ pih=>"http://pih.wikipedia.org Norfolk",
	5676	+ pl=>"http://pl.wikipedia.org Polish",
	5677	+ pms=>"http://pms.wikipedia.org Piedmontese",
	5678	+ pnb=>"http://pnb.wikipedia.org Western Panjabi",
	5679	+ pnt=>"http://pnt.wikipedia.org Pontic",
	5680	+ ps=>"http://ps.wikipedia.org Pashto",
	5681	+ pt=>"http://pt.wikipedia.org Portuguese",
	5682	+ qu=>"http://qu.wikipedia.org Quechua",
	5683	+ rm=>"http://rm.wikipedia.org Romansh", # was Rhaeto-Romance
	5684	+ rmy=>"http://rmy.wikipedia.org Romani",
	5685	+ rn=>"http://rn.wikipedia.org Kirundi",
	5686	+ ro=>"http://ro.wikipedia.org Romanian",
	5687	+ roa_rup=>"http://roa-rup.wikipedia.org Aromanian",
	5688	+ "roa-rup"=>"http://roa-rup.wikipedia.org Aromanian",
	5689	+ roa_tara=>"http://roa-tara.wikipedia.org Tarantino",
	5690	+ "roa-tara"=>"http://roa-tara.wikipedia.org Tarantino",
	5691	+ ru=>"http://ru.wikipedia.org Russian",
	5692	+ ru_sib=>"http://ru-sib.wikipedia.org Siberian",
	5693	+ "ru-sib"=>"http://ru-sib.wikipedia.org Siberian",
	5694	+ rw=>"http://rw.wikipedia.org Kinyarwanda",
	5695	+ sa=>"http://sa.wikipedia.org Sanskrit",
	5696	+ sah=>"http://sah.wikipedia.org Sakha",
	5697	+ sc=>"http://sc.wikipedia.org Sardinian",
	5698	+ scn=>"http://scn.wikipedia.org Sicilian",
	5699	+ sco=>"http://sco.wikipedia.org Scots",
	5700	+ sd=>"http://sd.wikipedia.org Sindhi",
	5701	+ se=>"http://se.wikipedia.org Northern Sami",
	5702	+ sg=>"http://sg.wikipedia.org Sangro",
	5703	+ sh=>"http://sh.wikipedia.org Serbo-Croatian",
	5704	+ si=>"http://si.wikipedia.org Sinhala", # was Singhalese
	5705	+ simple=>"http://simple.wikipedia.org Simple English",
	5706	+ sk=>"http://sk.wikipedia.org Slovak",
	5707	+ sl=>"http://sl.wikipedia.org Slovene",
	5708	+ sm=>"http://sm.wikipedia.org Samoan",
	5709	+ sn=>"http://sn.wikipedia.org Shona",
	5710	+ so=>"http://so.wikipedia.org Somali", # was Somalian
	5711	+ sq=>"http://sq.wikipedia.org Albanian",
	5712	+ sr=>"http://sr.wikipedia.org Serbian",
	5713	+ srn=>"http://srn.wikipedia.org Sranan",
	5714	+ ss=>"http://ss.wikipedia.org Siswati",
	5715	+ st=>"http://st.wikipedia.org Sesotho",
	5716	+ stq=>"http://stq.wikipedia.org Saterland Frisian",
	5717	+ su=>"http://su.wikipedia.org Sundanese",
	5718	+ sv=>"http://sv.wikipedia.org Swedish",
	5719	+ sw=>"http://sw.wikipedia.org Swahili",
	5720	+ szl=>"http://szl.wikipedia.org Silesian",
	5721	+ ta=>"http://ta.wikipedia.org Tamil",
	5722	+ te=>"http://te.wikipedia.org Telugu",
	5723	+ test=>"http://test.wikipedia.org Test",
	5724	+ tet=>"http://tet.wikipedia.org Tetum",
	5725	+ tg=>"http://tg.wikipedia.org Tajik",
	5726	+ th=>"http://th.wikipedia.org Thai",
	5727	+ ti=>"http://ti.wikipedia.org Tigrinya",
	5728	+ tk=>"http://tk.wikipedia.org Turkmen",
	5729	+ tl=>"http://tl.wikipedia.org Tagalog",
	5730	+ tlh=>"http://tlh.wikipedia.org Klingon", # was Klignon
	5731	+ tn=>"http://tn.wikipedia.org Setswana",
	5732	+ to=>"http://to.wikipedia.org Tongan",
	5733	+ tokipona=>"http://tokipona.wikipedia.org Tokipona",
	5734	+ tpi=>"http://tpi.wikipedia.org Tok Pisin",
	5735	+ tr=>"http://tr.wikipedia.org Turkish",
	5736	+ ts=>"http://ts.wikipedia.org Tsonga",
	5737	+ tt=>"http://tt.wikipedia.org Tatar",
	5738	+ tum=>"http://tum.wikipedia.org Tumbuka",
	5739	+ turn=>"http://turn.wikipedia.org Turnbuka",
	5740	+ tw=>"http://tw.wikipedia.org Twi",
	5741	+ ty=>"http://ty.wikipedia.org Tahitian",
	5742	+ udm=>"http://udm.wikipedia.org Udmurt",
	5743	+ ug=>"http://ug.wikipedia.org Uighur",
	5744	+ uk=>"http://uk.wikipedia.org Ukrainian",
	5745	+ ur=>"http://ur.wikipedia.org Urdu",
	5746	+ uz=>"http://uz.wikipedia.org Uzbek",
	5747	+ ve=>"http://ve.wikipedia.org Venda", # was Lushaka
	5748	+ vec=>"http://vec.wikipedia.org Venetian",
	5749	+ vi=>"http://vi.wikipedia.org Vietnamese",
	5750	+ vls=>"http://vls.wikipedia.org West Flemish",
	5751	+ vo=>"http://vo.wikipedia.org Volapük",
	5752	+ wa=>"http://wa.wikipedia.org Walloon",
	5753	+ war=>"http://war.wikipedia.org Waray-Waray",
	5754	+ wo=>"http://wo.wikipedia.org Wolof",
	5755	+ wuu=>"http://wuu.wikipedia.org Wu",
	5756	+ xal=>"http://xal.wikipedia.org Kalmyk",
	5757	+ xh=>"http://xh.wikipedia.org Xhosa",
	5758	+ yi=>"http://yi.wikipedia.org Yiddish",
	5759	+ yo=>"http://yo.wikipedia.org Yoruba",
	5760	+ za=>"http://za.wikipedia.org Zhuang",
	5761	+ zea=>"http://zea.wikipedia.org Zealandic",
	5762	+ zh=>"http://zh.wikipedia.org Chinese",
	5763	+ zh_min_nan=>"http://zh-min-nan.wikipedia.org Min Nan",
	5764	+ "zh-min-nan"=>"http://zh-min-nan.wikipedia.org Min Nan",
	5765	+ zh_classical=>"http://zh-classical.wikipedia.org Classical Chinese",
	5766	+ "zh-classical"=>"http://zh-classical.wikipedia.org Classical Chinese",
	5767	+ zh_yue=>"http://zh-yue.wikipedia.org Cantonese",
	5768	+ "zh-yue"=>"http://zh-yue.wikipedia.org Cantonese",
	5769	+ zu=>"http://zu.wikipedia.org Zulu",
	5770	+ zz=>"  All languages",
	5771	+ zzz=>"  All languages except English"
	5772	+ );
	5773	+
	5774	+ foreach $key (keys %wikipedias)
	5775	+ {
	5776	+ my $wikipedia = $wikipedias {$key} ;
	5777	+ $out_urls {$key} = $wikipedia ;
	5778	+ $out_languages {$key} = $wikipedia ;
	5779	+ $out_urls {$key} =~ s/(^[^\s]+).*$/$1/ ;
	5780	+ $out_languages {$key} =~ s/^[^\s]+\s+(.*)$/$1/ ;
	5781	+ $out_article {$key} = "http://en.wikipedia.org/wiki/" . $out_languages {$key} . "_language" ;
	5782	+ $out_article {$key} =~ s/ /_/g ;
	5783	+ $out_urls {$key} =~ s/(^[^\s]+).*$/$1/ ;
	5784	+ }
	5785	+ $out_languages {"www"} = "Portal" ;
	5786	+}
	5787	+
	5788	+
	5789	+sub Percentage
	5790	+{
	5791	+ my $perc = shift ;
	5792	+ $perc = 100 * $perc ;
	5793	+ if ($perc == 100) { $perc = '100%' ; }
	5794	+ if ($perc == 0) { $perc = ' ' ; }
	5795	+ elsif ($perc < 0.00001) { $perc = '0.00001%' ; }
	5796	+ elsif ($perc < 0.0001) { $perc = sprintf ("%.5f%", $perc) ; }
	5797	+ elsif ($perc < 0.001) { $perc = sprintf ("%.4f%", $perc) ; }
	5798	+ elsif ($perc < 0.01) { $perc = sprintf ("%.3f%", $perc) ; }
	5799	+ elsif ($perc < 0.1) { $perc = sprintf ("%.2f%", $perc) ; }
	5800	+ else { $perc = sprintf ("%.1f%", $perc) ; }
	5801	+ return ($perc) ;
	5802	+}
	5803	+
	5804	+sub ReadWikipedia
	5805	+{
	5806	+ use LWP::Simple qw($ua get);
	5807	+
	5808	+ $ua->agent('Wikipedia Wikicounts job');
	5809	+ $ua->timeout(60);
	5810	+ my $url = 'http://en.wikipedia.org/wiki/List_of_countries_by_population';
	5811	+ my $html = get $url \|\| die "Timed out!";
	5812	+
	5813	+# open TEST, '<', 'List_of_countries_by_population.html' ;
	5814	+# @lines = <TEST> ;
	5815	+# $html = join "\n", @lines ;
	5816	+# close TEST ;
	5817	+
	5818	+ # split file on <tr>'s, remove all behind </tr>
	5819	+ $html =~ s/\n/\\n/gs ;
	5820	+ foreach $line (split "(?=<tr)", $html)
	5821	+ {
	5822	+ next if $line !~ /^<tr/ ;
	5823	+ next if $line !~ /class=\"flagicon\"/ ;
	5824	+
	5825	+ $line =~ s/(?<=<\/tr>).*$// ;
	5826	+ # print "$line\n\n" ;
	5827	+
	5828	+ @cells = split "(?=<td)", $line ;
	5829	+ # foreach $cell (@cells)
	5830	+ # { print "CELL $cell\n" ; }
	5831	+
	5832	+ if ($cells [2] =~ /<img /)
	5833	+ {
	5834	+ $icon = $cells [2] ;
	5835	+ $icon =~ s/^.?(<img[^>]>).*$/$1/ ;
	5836	+ $icon =~ s/class=\"[^\"]*\"// ;
	5837	+ $icon =~ s/\s*\/>/>/ ;
	5838	+ # print "ICON '$icon'\n" ;
	5839	+ }
	5840	+ else
	5841	+ { $icon = "n.a." ; }
	5842	+
	5843	+ if ($cells [2] =~ /title/)
	5844	+ {
	5845	+ $country = $cells [2] ;
	5846	+ $country =~ s/^.?<a [^>]>([^<])<.$/$1/ ;
	5847	+ # print "COUNTRY '$country'\n" ;
	5848	+ }
	5849	+ else
	5850	+ { $title = "n.a." ; }
	5851	+
	5852	+ if ($cells [2] =~ /<a /)
	5853	+ {
	5854	+ $link = $cells [2] ;
	5855	+ $link =~ s/^.?(<a [^>]>.?<\/a>).$/$1/ ;
	5856	+ $link =~ s/\/wiki/http:\/\/en.wikipedia.org\/wiki/ ;
	5857	+ # print "LINK '$link'\n" ;
	5858	+ }
	5859	+ else
	5860	+ { $title = "n.a." ; }
	5861	+
	5862	+ ($population = $cells [3]) =~ s/<td[^>]>(.?)<.*$/$1/, $population =~ s/,/_/g ;
	5863	+ # print "POP $population\n\n" ;
	5864	+
	5865	+ $country =~ s/,/,/g ;
	5866	+ $link =~ s/,/,/g ;
	5867	+ $icon =~ s/,/,/g ;
	5868	+
	5869	+ $countries {$country} = "$country,$link,$population,connected,$icon\n" ;
	5870	+ }
	5871	+
	5872	+ $url = 'http://en.wikipedia.org/wiki/List_of_countries_by_number_of_Internet_users';
	5873	+ $html = get $url \|\| die "Timed out!";
	5874	+
	5875	+ # split file on <tr>'s, remove all behind </tr>
	5876	+ $html =~ s/\n/\\n/gs ;
	5877	+ foreach $line (split "(?=<tr)", $html)
	5878	+ {
	5879	+ next if $line !~ /^<tr/ ;
	5880	+ next if $line !~ /class=\"flagicon\"/ ;
	5881	+
	5882	+ $line =~ s/(?<=<\/tr>).*$// ;
	5883	+ # print "$line\n\n" ;
	5884	+
	5885	+ @cells = split "(?=<td)", $line ;
	5886	+ # foreach $cell (@cells)
	5887	+ # { print "CELL $cell\n" ; }
	5888	+
	5889	+ if ($cells [2] =~ /title/)
	5890	+ {
	5891	+ $country = $cells [2] ;
	5892	+ $country =~ s/^.?<a [^>]>([^<])<.$/$1/ ;
	5893	+ # print "COUNTRY '$country'\n" ;
	5894	+ }
	5895	+ else
	5896	+ { $country = "n.a." ; }
	5897	+
	5898	+ ($connected = $cells [3]) =~ s/<td[^>]>(.?)<.*$/$1/, $connected =~ s/,/_/g ;
	5899	+ # print "POP $population\n\n" ;
	5900	+
	5901	+ $country =~ s/,/,/g ;
	5902	+ $country =~ s/Bosnia-Herzegovina/Bosnia and Herzegovina/ ;
	5903	+ $country =~ s/Cote d'Ivoire/C�te d'Ivoire/ ;
	5904	+ $country =~ s/Macao/Macau/ ; # will be changed back later
	5905	+ $country =~ s/Samoa/American Samoa/ ;
	5906	+ $country =~ s/Timor Leste/Timor-Leste/ ;
	5907	+ $country =~ s/UAE/United Arab Emirates/ ;
	5908	+
	5909	+ $countries {$country} =~ s/connected/$connected/ ;
	5910	+ }
	5911	+
	5912	+ open COUNTRY_META_INFO, '>', "$path_out/SquidReportCountryMetaInfo.csv" ;
	5913	+ foreach $country (sort keys %countries)
	5914	+ { print COUNTRY_META_INFO $countries {$country} ; }
	5915	+ close COUNTRY_META_INFO ;
	5916	+}
	5917	+
	5918	+sub GetLanguageInfo
	5919	+{
	5920	+ my $language = shift ;
	5921	+ my ($language_name,$anchor_language) ;
	5922	+ $language_name = "$language (?)" ;
	5923	+ if ($out_languages {$language} ne "")
	5924	+ { $language_name = $out_languages {$language} ; }
	5925	+ ($anchor_language = $language_name) =~ s/ /_/g ;
	5926	+ return ($language_name,$anchor_language) ;
	5927	+}
	5928	+
	5929	+sub CountryMetaInfo
	5930	+{
	5931	+ my $country = shift ;
	5932	+print "Country '$country'\n" ; # qqq
	5933	+ my ($link_country,$icon,$population) ;
	5934	+ if ($country_meta_info {$country} eq "")
	5935	+ {
	5936	+ if ($country_meta_info_not_found_reported {$country} ++ == 0)
	5937	+ { print "_Meta info not found for country '$country'\n" ; }
	5938	+ $link_country = $country ;
	5939	+ return ($country,'','..','..') ;
	5940	+ }
	5941	+ else
	5942	+ {
	5943	+ ($link_country,$population,$connected,$icon) = split ',', $country_meta_info {$country} ;
	5944	+ $population =~ s/_//g ;
	5945	+ $connected =~ s/_//g ;
	5946	+ $link_country =~ s/,/,/g ;
	5947	+ $icon =~ s/,/,/g ;
	5948	+ $icon =~ s/>/ border=1>/ ;
	5949	+ return ($link_country,$icon,$population,$connected) ;
	5950	+ }
	5951	+}
	5952	+
	5953	+sub i2KM
	5954	+{
	5955	+ $out_million = 'M' ;
	5956	+ $out_thousand = 'K' ;
	5957	+
	5958	+ my $v = shift ;
	5959	+
	5960	+ if ($v == 0)
	5961	+ { return (" ") ; }
	5962	+ if ($v >= 100000000)
	5963	+ {
	5964	+ $v = sprintf ("%.0f",($v / 1000000)) . " " . $out_million ;
	5965	+ $v =~ s/(\d+?)(\d\d\d[^\d])/$1,$2/ ;
	5966	+ }
	5967	+ elsif ($v >= 1000000)
	5968	+ { $v = sprintf ("%.1f",($v / 1000000)) . " " . $out_million ; }
	5969	+ elsif ($v >= 10000)
	5970	+ { $v = sprintf ("%.0f",($v / 1000)) . " " . $out_thousand ; }
	5971	+ elsif ($v >= 1000)
	5972	+ { $v = sprintf ("%.1f",($v / 1000)) . " " . $out_thousand ; }
	5973	+ return ($v) ;
	5974	+}
	5975	+
	5976	+sub i2KM2
	5977	+{
	5978	+ $out_million = 'M' ;
	5979	+ $out_thousand = 'K' ;
	5980	+
	5981	+ my $v = shift ;
	5982	+ return $v if $v !~ /^\d*$/ ;
	5983	+
	5984	+# return (sprintf ("%.1f",$v/1000000)) ;
	5985	+ if ($v == 0)
	5986	+ { return (" ") ; }
	5987	+ if ($v >= 10000000)
	5988	+ { $v = sprintf ("%.0f",($v / 1000000)) . " " . $out_million ; }
	5989	+ elsif ($v >= 1000000)
	5990	+ { $v = sprintf ("%.1f",($v / 1000000)) . " " . $out_million ; }
	5991	+ elsif ($v >= 1000)
	5992	+ { $v = sprintf ("%.0f",($v / 1000)) . " " . $out_thousand ; }
	5993	+ return ($v) ;
	5994	+}
	5995	+
	5996	+# format: function(s) { return $.tablesorter.formatFloat(s.replace(/<[^>]*>/g,"").replace(/\\&nbsp\\;/g,"").replace(/M/i,"000000").replace(/М/,"000000").replace(/K/i,"000").replace(/К/i,"000")); },
	5997	+
	5998	+sub UnLink
	5999	+{
	6000	+ my ($links,$index) = @_ ;
	6001	+# print "\n\nUnLink $index\n\n" ;
	6002	+ my @segments = split '(?=<a )', $links ;
	6003	+# print "SEGMENT 1 $segments[$index]\n" ;
	6004	+ $segments [$index] =~ s/^.?<a .?>([^<]*)<\/a>/$1/ ;
	6005	+# print "SEGMENT 2 $segments[$index]\n" ;
	6006	+ $links = join '', @segments ;
	6007	+ return ($links) ;
	6008	+}
	6009	+
	6010	+sub PrintHtml
	6011	+{
	6012	+ ($html, $path) = @_ ;
	6013	+
	6014	+ $verbose = $false ;
	6015	+ if ($verbose)
	6016	+ { $html =~ s/\[([^\]]*)\]/$1/g ; }
	6017	+ else
	6018	+ { $html =~ s/\[([^\]]*)\]//g ; }
	6019	+
	6020	+ $html =~ s/and images// ; # all data [and images] onthis page are in the public domain
	6021	+ open HTML_OUT, '>', $path ;
	6022	+ print HTML_OUT $html ;
	6023	+ close HTML_OUT ;
	6024	+}
	6025	+
	6026	+sub PrintCsv
	6027	+{
	6028	+ ($csv, $path) = @_ ;
	6029	+
	6030	+ open HTML_CSV, '>', $path ;
	6031	+ print HTML_CSV $csv ;
	6032	+ close HTML_CSV ;
	6033	+}
	6034	+
	6035	+sub HtmlSortTable
	6036	+{
	6037	+ my $html = <<__HTML_SORT_TABLE__ ;
	6038	+
	6039	+<script src="jquery-1.3.2.min.js" type="text/javascript"></script>
	6040	+<script src="jquery.tablesorter.js" type="text/javascript"></script>
	6041	+
	6042	+<script type="text/javascript">
	6043	+\$.tablesorter.addParser({
	6044	+ id: "nohtml",
	6045	+ is: function(s) { return false; },
	6046	+ format: function(s) { return s.replace(/<.*?>/g,"").replace(/ /g,""); },
	6047	+ type: "text"
	6048	+});
	6049	+
	6050	+\$.tablesorter.addParser({
	6051	+ id: "millions",
	6052	+ is: function(s) { return false; },
	6053	+ format: function(s) { return \$.tablesorter.formatFloat(s.replace(/<[^>]*>/g,"").replace(/ /g,"").replace(/M/,"000000").replace(/М/,"000000").replace(/K/,"000").replace(/К/i,"000")); },
	6054	+ type: "numeric"
	6055	+});
	6056	+
	6057	+
	6058	+\$.tablesorter.addParser({
	6059	+ id: "digitsonly",
	6060	+ is: function(s) { return false; },
	6061	+ format: function(s) { return \$.tablesorter.formatFloat(s.replace(/<.*?>/g,"").replace(/ /g,"").replace(/,/g,"").replace(/-/,"-1")); },
	6062	+ type: "numeric"
	6063	+});
	6064	+</script>
	6065	+
	6066	+<style type="text/css">
	6067	+table.tablesorter
	6068	+{
	6069	+/*
	6070	+ font-family:arial;
	6071	+ background-color: #CDCDCD;
	6072	+ margin:10px 0pt 15px;
	6073	+ font-size: 7pt;
	6074	+ width: 80%;
	6075	+ text-align: left;
	6076	+*/
	6077	+}
	6078	+table.tablesorter thead tr th, table.tablesorter tfoot tr th
	6079	+{
	6080	+/*
	6081	+ background-color: #99D;
	6082	+ border: 1px solid #FFF;
	6083	+ font-size: 8pt;
	6084	+ padding: 4px;
	6085	+*/
	6086	+}
	6087	+table.tablesorter thead tr .header
	6088	+{
	6089	+ background-color: #ffffdd;
	6090	+ background-image: url(bg.gif);
	6091	+ background-repeat: no-repeat;
	6092	+ background-position: center right;
	6093	+ cursor: pointer;
	6094	+}
	6095	+table.tablesorter tbody th
	6096	+{
	6097	+/*
	6098	+ color: #3D3D3D;
	6099	+ padding: 4px;
	6100	+ background-color: #CCF;
	6101	+ vertical-align: top;
	6102	+*/
	6103	+}
	6104	+table.tablesorter tbody tr.odd th
	6105	+{
	6106	+ background-color:#eeeeaa;
	6107	+ background-image:url(asc.gif);
	6108	+}
	6109	+table.tablesorter thead tr .headerSortUp
	6110	+{
	6111	+ background-color:#eeeeaa;
	6112	+ background-image:url(asc.gif);
	6113	+}
	6114	+table.tablesorter thead tr .headerSortDown
	6115	+{
	6116	+ background-color:#eeeeaa;
	6117	+ background-image:url(desc.gif);
	6118	+}
	6119	+table.tablesorter thead tr .headerSorthown, table.tablesorter thead tr .headerSortUp
	6120	+{
	6121	+ background-color: #eeeeaa;
	6122	+}
	6123	+</style>
	6124	+__HTML_SORT_TABLE__
	6125	+return ($html) ;
	6126	+}
	6127	+
	6128	+sub HtmlSortTableColumns
	6129	+{
	6130	+ my $html = <<__HTML_SORT_TABLE_COLUMNS__ ;
	6131	+
	6132	+<script type='text/javascript'>
	6133	+\$('#table1').tablesorter({
	6134	+ // debug:true,
	6135	+ headers:{0:{sorter:'nohtml'},1:{sorter:'nohtml'},2:{sorter:'nohtml'},3:{sorter:'millions'},4:{sorter:'millions'},5:{sorter:'millions'},6:{sorter:'digitsonly'},7:{sorter:'digitsonly'},6:{sorter:'digitsonly'},7:{sorter:'digitsonly'}}
	6136	+});
	6137	+</script>
	6138	+__HTML_SORT_TABLE_COLUMNS__
	6139	+return ($html) ;
	6140	+}
	6141	+
	6142	+sub HtmlIndex
	6143	+{
	6144	+ $index = shift ;
	6145	+
	6146	+ my $html = <<__HTML_INDEX__ ;
	6147	+
	6148	+<script type="text/javascript">
	6149	+<!--
	6150	+function toggle_visibility_index()
	6151	+{
	6152	+ var index = document.getElementById('index');
	6153	+ var toggle = document.getElementById('toggle');
	6154	+ if (index.style.display == 'block')
	6155	+ {
	6156	+ index.style.display = 'none';
	6157	+ toggle.innerHTML = 'Show index';
	6158	+ }
	6159	+ else
	6160	+ {
	6161	+ index.style.display = 'block';
	6162	+ toggle.innerHTML = 'Hide index';
	6163	+ }
	6164	+}
	6165	+//-->
	6166	+</script>
	6167	+
	6168	+<tr><td class=r colspan=99><a href="#" id='toggle' onclick="toggle_visibility_index();">Show index</a></td></tr>
	6169	+<tr><td class=l colspan=99><span id='index' style="display:none">\n$index\n</span></td></tr>
	6170	+__HTML_INDEX__
	6171	+
	6172	+return ($html) ;
	6173	+}
	6174	+
	6175	+sub hsv_to_rgb {
	6176	+
	6177	+ my $h = shift;
	6178	+ my $s = shift;
	6179	+ my $v = shift;
	6180	+
	6181	+ # limit this to h values between 0 and 360 and s/v values
	6182	+ # between 0 and 1
	6183	+
	6184	+ unless (defined($h) && defined($s) && defined($v) &&
	6185	+ $h >= 0 && $s >= 0 && $v >= 0 &&
	6186	+ $h <= 360 && $s <= 1 && $v <= 1) {
	6187	+ return (undef, undef, undef);
	6188	+ }
	6189	+
	6190	+ my $r;
	6191	+ my $g;
	6192	+ my $b;
	6193	+
	6194	+ # 0.003 is less than 1/255; use this to make the floating point
	6195	+ # approximation of zero, since the resulting rgb values will
	6196	+ # normally be used as integers between 0 and 255. Feel free to
	6197	+ # change this approximation of zero to something else, if this
	6198	+ # suits you.
	6199	+
	6200	+ if ($s < 0.003) {
	6201	+ $r = $g = $b = $v;
	6202	+ }
	6203	+ else {
	6204	+
	6205	+ $h /= 60;
	6206	+ my $sector = int($h);
	6207	+ my $fraction = $h - $sector;
	6208	+
	6209	+ my $p = $v * (1 - $s);
	6210	+ my $q = $v * (1 - ($s * $fraction));
	6211	+ my $t = $v * (1 - ($s * (1 - $fraction)));
	6212	+
	6213	+ if ($sector == 0) { $r = $v; $g = $t; $b = $p; }
	6214	+ elsif ($sector == 1) { $r = $q; $g = $v; $b = $p; }
	6215	+ elsif ($sector == 2) { $r = $p; $g = $v; $b = $t; }
	6216	+ elsif ($sector == 3) { $r = $p; $g = $q; $b = $v; }
	6217	+ elsif ($sector == 4) { $r = $t; $g = $p; $b = $v; }
	6218	+ else { $r = $v; $g = $p; $b = $q; }
	6219	+ }
	6220	+
	6221	+ # Convert the r/g/b values to all be between 0 and 255; use the
	6222	+ # ol' 0.003 approximation again, with the same comment as above.
	6223	+
	6224	+ $r = ($r < 0.003 ? 0.0 : $r * 255);
	6225	+ $g = ($g < 0.003 ? 0.0 : $g * 255);
	6226	+ $b = ($b < 0.003 ? 0.0 : $b * 255);
	6227	+
	6228	+ return ($r, $g, $b);
	6229	+ }
	6230	+
	6231	+sub hsv2rgb
	6232	+{
	6233	+ my ($h,$s,$v) = @_;
	6234	+ my ($p,$q) ;
	6235	+ ($v,$p,$q) = hsv_to_rgb ($h,$s,$v) ;
	6236	+ my $color = "\#" . sprintf ("%02X", int($v)) . sprintf ("%02X", int($p)) . sprintf ("%02X", int($q)) ;
	6237	+ return ($color) ;
	6238	+}
	6239	+
	6240	+sub HtmlWorldMaps
	6241	+{
	6242	+my $html_worldmaps = <<__HTML_WORLD_MAPS__ ;
	6243	+<tr><td colspan=99 align=center>
	6244	+<table width='100%' align=center><td align=left>
	6245	+<small>
	6246	+<img src='http://upload.wikimedia.org/wikipedia/commons/thumb/b/b1/World_population.PNG/400px-World_population.PNG' border='1'>
	6247	+<br><a href='http://en.wikipedia.org/wiki/List_of_countries_by_population'>Countries by population</a> - English Wikipedia
	6248	+</small>
	6249	+</td><td>
	6250	+<small>
	6251	+<img src='http://upload.wikimedia.org/wikipedia/commons/thumb/a/af/Internet_Penetration.png/400px-Internet_Penetration.png' border='1'>
	6252	+<br><a href='http://en.wikipedia.org/wiki/List_of_countries_by_number_of_Internet_users'>Internet penetration</a> (% of population) - English Wikipedia
	6253	+</small>
	6254	+</td></tr>
	6255	+<tr><td>
	6256	+<small>
	6257	+<img src='http://upload.wikimedia.org/wikipedia/commons/thumb/4/46/North_South_divide.svg/400px-North_South_divide.svg.png' border='1'>
	6258	+<br><a href='http://en.wikipedia.org/wiki/North-South_divide'>Global North South</a> - English Wikipedia
	6259	+</small>
	6260	+</td></tr>
	6261	+</table>
	6262	+</td></tr>
	6263	+__HTML_WORLD_MAPS__
	6264	+
	6265	+return $html_worldmaps ;
	6266	+}

Status & tagging log

15:20, 22 February 2011 😂 (talk | contribs) changed the status of r82606 [removed: new added: deferred]