r85013 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r85012‎ | r85013 | r85014 >
Date:14:48, 30 March 2011
Author:ezachte
Status:deferred
Tags:
Comment:
Misc. updates, a.o. reorganized and extended page view stats
Modified paths:
  • /trunk/wikistats/dumps/WikiReports.pl (modified) (history)
  • /trunk/wikistats/dumps/WikiReportsConversions.pm (modified) (history)
  • /trunk/wikistats/dumps/WikiReportsInput.pm (modified) (history)
  • /trunk/wikistats/dumps/WikiReportsLiterals.pm (modified) (history)
  • /trunk/wikistats/dumps/WikiReportsOutputEditHistory.pm (modified) (history)
  • /trunk/wikistats/dumps/WikiReportsOutputMisc.pm (modified) (history)
  • /trunk/wikistats/dumps/WikiReportsOutputPageViews.pm (modified) (history)
  • /trunk/wikistats/dumps/WikiReportsOutputSummaries.pm (added) (history)
  • /trunk/wikistats/dumps/WikiReportsOutputTables.pm (modified) (history)
  • /trunk/wikistats/dumps/WikiReportsProcessReverts.pm (modified) (history)
  • /trunk/wikistats/dumps/WikiReportsScripts.pm (modified) (history)
  • /trunk/wikistats/dumps/WikiReports_EN.pm (modified) (history)

Diff [purge]

Index: trunk/wikistats/dumps/WikiReportsConversions.pm
@@ -170,7 +170,7 @@
171171 { return (sprintf ("%02d/%02d/%04d", $dumpmonth, $dumpday, $dumpyear)) ; }
172172 }
173173
174 -# code year,month as monthes since january 2000 (1 byte)
 174+# code year,month as monthes since 1 january 2000 (1 byte)
175175 sub yyyymm2b
176176 {
177177 my $year = shift ;
Index: trunk/wikistats/dumps/WikiReportsProcessReverts.pm
@@ -25,7 +25,7 @@
2626
2727 foreach $wp (@languages)
2828 {
29 - # next if $wp ne "fy" ;
 29+ # next if $wp ne "en" ;
3030
3131 undef %reverts_per_article ;
3232 undef %reverts_in_non_article_namespaces ;
Index: trunk/wikistats/dumps/WikiReports.pl
@@ -1,5 +1,5 @@
22 #!/usr/bin/perl
3 -# Copyright (C) 2003-2010 Erik Zachte , email erikzachte\@xxx.com (nospam: xxx=infodisiac)
 3+# Copyright (C) 2003-2008 Erik Zachte , email ezachte a-t wikimedia d-o-t org
44 # This program is free software; you can redistribute it and/or
55 # modify it under the terms of the GNU General Public License version 2
66 # as published by the Free Software Foundation.
@@ -7,71 +7,105 @@
88 # but WITHOUT ANY WARRANTY; without even the implied warranty of
99 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
1010 # See the GNU General Public License for more details, at
11 -# http://www.fsf.org/licenses/gpl.html
 11+# http://www.fsf.org/licenses/gpl.html =
1212
13 -# Disclaimer: most of these sources have been developed in limited free time.
14 -# Over the years complexity of the sources grew, sometimes at the expense of maintainability.
15 -# Some design decisions have not scaled well.
16 -# Some parts of the code are hard to read due to overly concise or obscure variable names
17 -# (WikiCounts.. files suffer less from this than WikiReports.. files).
18 -# although in general I try to choose descriptive variable and function names.
19 -# There is little documentation, too few comments in the code.
20 -# Sometimes obsolete code has been commented out rather than deleted to ease re-activation.
21 -# Some code contains hard coded file paths mainly to Erik's test environment (Windows)
22 -
23 -# On the bright side:
24 -# Most code produces a decent audit trail, which can help understand process flow.
25 -# Great care has been taken to produce output that is tuned to each specific project.
26 -
2713 use lib "/home/ezachte/lib" ;
2814 use EzLib ;
2915 $trace_on_exit = $true ;
30 - ez_lib_version (11) ;
 16+ ez_lib_version (14) ;
3117
32 -# set defaults mainly for tests on local machine
33 -# default_argv "-a|-m wk|-l en|-i 'D:\@Wikimedia\\# Out Bayes\\csv_wk'|-o 'D:\@Wikimedia\\# Out Test\\htdocs2'" ;
34 -# default_argv "-g|-t|-m wp|-l en|-i 'D:\@Wikimedia\\# Out Bayes\\csv_wp'|-o 'D:\@Wikimedia\\# Out Test\\htdocs\\'" ;
35 -# default_argv "-t|-m wx|-l en|-i 'D:\@Wikimedia\\# Out Bayes\\csv_wx'|-o 'D:\@Wikimedia\\# Out Test\\htdocs\\wikispecial'" ;
36 -# default_argv "-v m|-n|-g|-t|-m wp|-l en|-i 'D:\@Wikimedia\\# Out Bayes\\csv_wp'|-o 'D:\@Wikimedia\\# Out Test\\htdocs'" ; # for page views
37 -# default_argv "-r africa|-g|-t|-m wp|-l en|-i 'D:\@Wikimedia\\# Out Bayes\\csv_wp'|-o 'D:\@Wikimedia\\# Out Test\\htdocs'" ;
38 -# default_argv "-g|-t|-m wp|-l en|-i 'D:\@Wikimedia\\# Out Bayes\\csv_wp'|-o 'D:\@Wikimedia\\# Out Test\\htdocs'" ;
39 - default_argv "-v m|-r africa|-n|-g|-t|-m wp|-l en|-i 'D:\@Wikimedia\\# Out Bayes\\csv_wp'|-o 'D:\@Wikimedia\\# Out Test\\htdocs'" ;
 18+# build argument list for test run in OptiPerl IDE (Erik's home test env)
 19+# arguments are parsed in WikiReportsInput:ParseArguments
 20+ if (! $job_runs_on_production_server)
 21+ {
 22+ # push @arguments, '-a' ; # generate input files (.js) for animations, see e.g.
 23+ # http://stats.wikimedia.org/wikimedia/animations/growth/AnimationProjectsGrowthWp.html
 24+ # mutually exclusive with other reporting
4025
 26+ # push @arguments, '-v m' ; # generate tables with pageviews per wiki: mobile sites
 27+ # e.g. http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
 28+ # mutually exclusive with other reporting
 29+ # push @arguments, '-v n' ; # generate tables with pageviews per wiki: non-mobile traffic
 30+ # e.g. http://stats.wikimedia.org/EN/TablesPageViewsMonthlyMobile.htm
 31+ # mutually exclusive with other reporting
 32+ push @arguments, '-v c' ; # generate tables with pageviews per wiki: mobile + non-mobile traffic
 33+ # e.g. http://stats.wikimedia.org/EN/TablesPageViewsMonthlyCombined.htm
 34+ # mutually exclusive with other reporting
 35+ # push @arguments, '-n' ; # normalize monthly page view data (see -v) to 30 days for each month
 36+
 37+ # push @arguments, '-G' ; # generate .html and .bat (DOS batch) files to get screen shots of all Wikimedia main pages, using url2bmp.exe (Windows only)
 38+ # mutually exclusive with other reporting
 39+
 40+ # push @arguments, '-c' ; # generate category trees
 41+ # mutually exclusive with other reporting
 42+
 43+ # push @arguments, '-r india' ; # only one region per run, no region specified -> all languages
 44+ # push @arguments, '-r africa' ;
 45+ # push @arguments, '-r america' ;
 46+ # push @arguments, '-r asia' ;
 47+ # push @arguments, '-r europe' ;
 48+ # push @arguments, '-r oceania' ;
 49+ # push @arguments, '-r articifial' ;
 50+
 51+ $mode = 'wp' ; # specify wp=wikipedia (default), wb=wikibooks, wk=wiktionary, wn=wikinews, wq=wikiquote, ws=wikisource, wv=wikiversity, wx=wikispecial
 52+ $mode =~ s/\s//g ;
 53+ push @arguments, "-m $mode" ;
 54+
 55+ if ($mode eq 'wb') { $folder = 'wikibooks' ; }
 56+ elsif ($mode eq 'wk') { $folder = 'wiktionary' ; }
 57+ elsif ($mode eq 'wn') { $folder = 'wikinews' ; }
 58+ elsif ($mode eq 'wp') { ; }
 59+ elsif ($mode eq 'wq') { $folder = 'wikiquote' ; }
 60+ elsif ($mode eq 'ws') { $folder = 'wikisource' ; }
 61+ elsif ($mode eq 'wv') { $folder = 'wikiversity' ; }
 62+ elsif ($mode eq 'wx') { $folder = 'wikispecial' ; }
 63+
 64+ push @arguments, '-l en' ; # output language (ISO codes), see WikiReportsLiterals for acceptable codes
 65+
 66+ push @arguments, "-i 'W:\\# Out Bayes\\csv_$mode'" ; # input directory: csv files
 67+ if (join ('|', @arguments) =~ /-a/)
 68+ { push @arguments, "-o 'W:\\\@ Main Page Gallery'" ; } # output directory: batch files for capturing all Wikimedia main pages
 69+ else
 70+ { push @arguments, "-o 'W:\\# Out Test\\htdocs\\$folder'" ; } # output directory: html files, image files (plots)
 71+
 72+ push @arguments, '-g' ; # convert generated gif (Ploticus) nconvert.exe, on Windows platform only
 73+
 74+ push @arguments, '-t' ; # test mode
 75+
 76+ # push @arguments, '-p [some path]' ; # path to Ploticus exe
 77+ # push @arguments, '-s wikimedia' ; # site for which stats are run
 78+ }
 79+
 80+ default_argv (join ("\|", @arguments)) ;
 81+
4182 # to do
4283 # change: figures for first months are too low -> figures for early 2001
43 -
4484 # and remove this notice at all on project pages that start to report from 2002 or later
4585
46 -
47 - # use Statistics:LineFit ;
48 - # use warnings ;
49 - # use strict 'vars' ;
50 -
 86+ use WikiReportsConversions ;
 87+ use WikiReportsDate ;
 88+ use WikiReportsHtml ;
5189 use WikiReportsInput ;
52 - use WikiReportsOutputTables ;
 90+ use WikiReportsLiterals ;
 91+ use WikiReportsLocalizations ;
 92+ use WikiReportsNoWikimedia ;
 93+ use WikiReportsOutputAnimations ;
 94+ use WikiReportsOutputCategories ;
5395 use WikiReportsOutputCharts ;
 96+ use WikiReportsOutputEditHistory ;
 97+ use WikiReportsOutputMisc ;
 98+ use WikiReportsOutputPageViews ;
5499 use WikiReportsOutputPlots ;
55 - use WikiReportsOutputMisc ;
56 -
57 - use WikiReportsOutputAnimations ;
58 - use WikiReportsOutputCategories ;
 100+ use WikiReportsOutputSummaries ;
 101+ use WikiReportsOutputTables ;
59102 use WikiReportsOutputTimelines ;
60103 use WikiReportsOutputWikibooks ;
61 - use WikiReportsOutputPageViews ;
62104 use WikiReportsProcessReverts ;
63 - use WikiReportsOutputEditHistory ;
64 -
65105 use WikiReportsScripts ;
66 - use WikiReportsDate ;
67 - use WikiReportsHtml ;
68 - use WikiReportsConversions ;
69 - use WikiReportsLocalizations ;
70 - use WikiReportsLiterals ;
71 - use WikiReportsNoWikimedia ;
72106
73107 no warnings 'uninitialized';
74108
75 - $version = "2.5" ;
 109+ $version = "2.6" ; # versioning has not been maintained consistently
76110 $timestart = time ;
77111 $Kb = 1024 ;
78112 $Mb = $Kb * $Kb ;
@@ -118,6 +152,7 @@
119153 &GenerateAnimationsInputSizeAndCommunity ;
120154 &LogT ("Ready\n") ;
121155 close "FILE_LOG" ;
 156+ exit ;
122157 }
123158
124159 &SetScripts ;
@@ -126,11 +161,11 @@
127162 {
128163 &LogT ("\nRead Monthly Statistics") ;
129164 &ReadMonthlyStats ;
130 - &LogT ("\nWrite Page Views Totals Report") ;
 165+ &LogT ("\nWrite Page Views Totals Report\n") ;
131166 &WritePageViewsMonthly ;
132167 &WriteMonthlyStatsHtmlAllProjects ;
133 - &GenerateComparisonTablePageviewsAllProjects ('non-mobile,normalized') ;
134 - &GenerateComparisonTablePageviewsAllProjects ('non-mobile,not-normalized') ;
 168+ &GenerateComparisonTablePageviewsAllProjects ($true) ; # normalized
 169+ &GenerateComparisonTablePageviewsAllProjects ($false) ; # not normalized
135170 &LogT ("\n\nExecution took " . ddhhmmss (time - $timestart). ".\n") ;
136171 &LogT ("Ready\n") ;
137172 close "FILE_LOG" ;
@@ -153,12 +188,13 @@
154189 &LogT ("\nRead Monthly Statistics") ;
155190 &ReadMonthlyStats ;
156191
157 -#&GenerateTableZeitGeist ('commons') ;
158 -# generate .html and .bat (DOS batch) files to get screen shots of all Wikipedia main pages
159 -# reduced to 40% using url2bmp.exe
160 -# &GenerateSiteMap ;
161 -# &GenerateGallery ;
162 -# exit ;
 192+ if ($dump_gallery)
 193+ {
 194+ # generate .html and .bat (DOS batch) files to get screen shots of all Wikipedia main pages
 195+ # reduced to 40% using url2bmp.exe
 196+ &GenerateGallery ;
 197+ exit ;
 198+ }
163199
164200 &LogT ("\nGenerate Current Status") ;
165201 if (! $singlewiki)
@@ -170,6 +206,12 @@
171207 else
172208 { &GenerateSiteMap ; }
173209
 210+ if ($mode_wp && ($language eq "en"))
 211+ {
 212+ &LogT ("\nGenerate Summaries Per Wiki") ;
 213+ &GenerateSummariesPerWiki ;
 214+ }
 215+
174216 # &GenerateTablesPerWiki ("zz") ;
175217 # &GenerateComparisonTables ;
176218
@@ -233,7 +275,7 @@
234276 if ($mode_wp)
235277 { &GenerateTablesPerWiki ("zzz") ; }
236278
237 - &LogT ("\nGenerate Wikipedia Specific Charts") ;
 279+ &LogT ("\nGenerate Wikipedia Specific Charts" ) ;
238280 foreach $wp (@languages)
239281 { &GenerateChartsPerWikipedia ($wp) ; }
240282 if ($mode_wp)
Index: trunk/wikistats/dumps/WikiReportsLiterals.pm
@@ -211,6 +211,7 @@
212212 kaa=>"http://kaa.wikipedia.org Karakalpak [0.41,AS]",
213213 kab=>"http://ka.wikipedia.org Kabyle [8,AF]",
214214 kaw=>"http://kaw.wikipedia.org Kawi",
 215+ kbd=>"http://kbd.wikipedia.org Karbadian [1.6,AS]",
215216 kg=>"http://kg.wikipedia.org Kongo [7,AF]",
216217 ki=>"http://ki.wikipedia.org Kikuyu [5.4,AF]",
217218 kj=>"http://kj.wikipedia.org Kuanyama",
@@ -229,6 +230,7 @@
230231 ky=>"http://ky.wikipedia.org Kirghiz [5,AS]",
231232 la=>"http://la.wikipedia.org Latin [,W]",
232233 lad=>"http://lad.wikipedia.org Ladino [0.109,AS]",
 234+ ltg=>"http://ltg.wikipedia.org Latgalian [0.15,EU]",
233235 lb=>"http://lb.wikipedia.org Luxembourgish [0.39,EU]", # was Letzeburgesch
234236 lbe=>"http://lbe.wikipedia.org Lak [0.12,AS]",
235237 lg=>"http://lg.wikipedia.org Ganda [10,AF]",
@@ -299,6 +301,7 @@
300302 ps=>"http://ps.wikipedia.org Pashto [26,AS]",
301303 pt=>"http://pt.wikipedia.org Portuguese [290,EU,SA,AF,AS]",
302304 qu=>"http://qu.wikipedia.org Quechua [10.4,SA]",
 305+ rue=>"http://rue.wikipedia.org Rusyn [0.6,EU]",
303306 rm=>"http://rm.wikipedia.org Romansh [0.035,EU]", # was Rhaeto-Romance
304307 rmy=>"http://rmy.wikipedia.org Romani [2.5,EU]",
305308 rn=>"http://rn.wikipedia.org Kirundi [4.6,AF]",
@@ -569,6 +572,9 @@
570573 $out_mainpage = "http://" . $wp . ".wikipedia.org" ;
571574 $out_wikipage = "/wiki/" ;
572575
 576+ $out_csv_files = "CSV files" ;
 577+
 578+
573579 # ten thousand two hundred three + 4/10 = 10,203.4
574580 $out_thousands_separator = "," ;
575581 $out_decimal_separator = "." ;
@@ -614,7 +620,49 @@
615621 "<font color='#008000'>25\% &lt; x &lt; 75\%</font>&nbsp;&nbsp;&nbsp;&nbsp;" .
616622 "<font color='#008000'><u>75\% &lt; x</u></font></small>\n" ;
617623
618 - $out_documentation = "For documentation see <a href='http://meta.wikipedia.org/wiki/Wikistats'>meta</a>" ; #new
 624+ $out_documentation = "Documentation" ; # was "For documentation see <a href='http://meta.wikipedia.org/wiki/Wikistats'>meta</a>" ;
619625 }
620626
 627+sub GetProjectBaseUrl
 628+{
 629+ my $wp = shift ;
 630+ my $base ;
 631+
 632+ if ($mode_wb)
 633+ { $base = "http://$wp.wikibooks.org/" ; }
 634+ if ($mode_wk)
 635+ { $base = "http://$wp.wiktionary.org/" ; }
 636+ if ($mode_wn)
 637+ { $base = "http://$wp.wikinews.org/" ; }
 638+ if ($mode_wp)
 639+ { $base = "http://$wp.wikipedia.org/" ; }
 640+ if ($mode_wq)
 641+ { $base = "http://$wp.wikiquote.org/" ; }
 642+ if ($mode_ws)
 643+ { $base = "http://$wp.wikisource.org/" ; }
 644+ if ($mode_wv)
 645+ { $base = "http://$wp.wikiversity.org/" ; }
 646+ if ($mode_wx)
 647+ {
 648+ if ($wp eq "sources")
 649+ { $base = "http://wikisource.org/" ; }
 650+ elsif ($wp eq "sep11")
 651+ { $base = "http://sep11.wikipedia.org/" ; }
 652+ elsif ($wp eq "foundation")
 653+ { $base = "http://wikimediafoundation.org/" ; }
 654+ elsif ($wp =~ /(\w\w+)(wikimedia)/)
 655+ { $base = "http://$1.wikimedia.org/" ; }
 656+ elsif ($wp eq "species")
 657+ { $base = "http://species.wikipedia.org/" ; }
 658+ elsif ($wp eq "mediawiki")
 659+ { $base = "http://www.mediawiki.org/" ; }
 660+ else
 661+ { $base = "http://$wp.wikimedia.org/" ; }
 662+ }
 663+
 664+ $base =~ s/_/-/g ; # e.g. zh-min-nan
 665+
 666+ return $base ;
 667+}
 668+
621669 1;
Index: trunk/wikistats/dumps/WikiReportsOutputSummaries.pm
@@ -0,0 +1,855 @@
 2+#!/usr/bin/perl
 3+
 4+# needed files
 5+# StatisticsMonthly.csv
 6+# StatisticsUserActivitySpread.csv
 7+sub GenerateSummariesPerWiki
 8+{
 9+ my @months_en = qw (January February March April May June July August September October November December);
 10+ ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime(time);
 11+ $summaries_published = "$mday ${months_en [$mon]} " . ($year+1900) ;
 12+
 13+ $col_highlight = "#8080FF" ;
 14+
 15+$explanation = <<__HTML_SUMMARY_EXPLANATION__ ;
 16+ <td class=l colspan=99 width=100%>
 17+ <b>Definitions</b><p>
 18+ All metrics below only take into account proper articles (aka namespace 0 pages),<br>which excludes discussion, help, project, etc pages.
 19+ <p>
 20+ <dl>
 21+ <dt><b>Page Views</b><dd>The chart does not necessarily cover all years of a project's existence.
 22+ <brThe first year presented is the year where page views reached at least 1/100th of all-time maximum.
 23+ <dt><b>Article Count</b><dd>An article is defined as any page in namespace 0 which contains an internal link
 24+ <dt><b>New Articles per Day</b><dd>
 25+ <dt><b>Edits per Month</b><dd>
 26+ <dt><b>Active Editors</b><dd>Registered (and signed in) users who made 5 or more edits in a month
 27+ <dt><b>Very Active Editors</b><dd>Registered (and signed in) users who made 100 or more edits in a month
 28+ <dt><b>New Editors</b><dd>Registered (and signed in) users who completed their all time 10th edit in this month
 29+ <dt><b>Speakers</b><dd>Includes secondary language speakers, data from the English Wikipedia (page on this language)
 30+ <dt><b>Editors per Million Speakers</b><dd> aka Participation Rate
 31+ </dl>
 32+ </td>
 33+__HTML_SUMMARY_EXPLANATION__
 34+
 35+$explanation2 = <<__HTML_SUMMARY_EXPLANATION_2__ ;
 36+<table width=660 cellpadding=18 align=center border=1>
 37+<tr>
 38+ $explanation
 39+</tr>
 40+</table>
 41+__HTML_SUMMARY_EXPLANATION_2__
 42+
 43+$header_all = <<__HTML_SUMMARY_HEADER_ALL__ ;
 44+<a name='top' id='top'></a>
 45+<table width=660 cellpadding=18 align=center border=1 style="background-color:white">
 46+<tr>
 47+ <td class=c colspan=99 width=100%>
 48+
 49+ <table width=100% border=0>
 50+ <tr>
 51+ <td width=100% colspan=99>
 52+
 53+ <table width=100% border=0>
 54+ <tr>
 55+ <td class=c width=80% valign=top>
 56+ <h2>WMF Report Card <font color=$col_highlight>India</h2>
 57+ INDEX
 58+ </td>
 59+ </tr>
 60+ </table>
 61+
 62+ </td>
 63+ </tr>
 64+ </table>
 65+ </td>
 66+<tr>
 67+</table>
 68+&nbsp;<p>
 69+__HTML_SUMMARY_HEADER_ALL__
 70+
 71+ $out_html_all = '' ;
 72+
 73+ foreach $wp (@languages)
 74+ {
 75+ next if $skip {$wp} ;
 76+ next if $wp =~ /^z+$/ ;
 77+
 78+ $title_all = "WMF Report Card India" ;
 79+
 80+ $out_html = &GetSummaryPerWiki ($wp) ;
 81+ &GeneratePlotEditors ($wp) ;
 82+ &GeneratePlotPageviews ($wp) ;
 83+
 84+ my $file_html = $path_out . "Summary" . ucfirst ($wp) . ".htm" ;
 85+ print "Write $file_html\n" ;
 86+
 87+ $out_html2 = $out_html ;
 88+
 89+ $out_html2 =~ s/EXPLANATION// ;
 90+ $out_html =~ s/EXPLANATION/$explanation/ ;
 91+
 92+ $out_html2 =~ s/EXPLANATION2/$explanation2/ ;
 93+ $out_html =~ s/EXPLANATION2// ;
 94+
 95+ $out_html2 =~ s/SEE_ALSO// ;
 96+ $out_html =~ s/SEE_ALSO/$see_also/ ;
 97+
 98+ $out_html =~ s/SOURCE// ;
 99+ $out_html2 =~ s/SOURCE/$source/ ;
 100+
 101+ $out_html =~ s/TOP/&nbsp;/ ;
 102+ $out_html2 =~ s/TOP/<a href='#top'>top<\/a>&nbsp;&lArr;/ ;
 103+
 104+ $out_html_all .= $out_html2 . "&nbsp;<p>" ;
 105+
 106+$out_html = <<__HTML_SUMMARY_ONE__ ;
 107+<html>
 108+<head>
 109+<title>Wikimedia project at a glance</title>
 110+<meta http-equiv="Content-type" content="text/html; charset=iso-8859-1">
 111+<meta name="robots" content="index,follow">
 112+<script language="javascript" type="text/javascript" src="../WikipediaStatistics14.js"></script>
 113+$out_style2
 114+</head>
 115+<body>
 116+$out_html
 117+</body>
 118+</html>
 119+__HTML_SUMMARY_ONE__
 120+
 121+ open "FILE_OUT", ">", $file_html ;
 122+ print FILE_OUT &AlignPerLanguage ($out_html) ;
 123+ close "FILE_OUT" ;
 124+
 125+ # last if $summaries_written++ > 3 # test
 126+ }
 127+
 128+$out_html_all = <<__HTML_SUMMARY_ALL__ ;
 129+<html>
 130+<head>
 131+<title>$title_all</title>
 132+<meta http-equiv="Content-type" content="text/html; charset=iso-8859-1">
 133+<meta name="robots" content="index,follow">
 134+<script language="javascript" type="text/javascript" src="../WikipediaStatistics14.js"></script>
 135+$out_style2
 136+</head>
 137+<body>
 138+$header_all
 139+$out_html_all
 140+$explanation2
 141+</body>
 142+</html>
 143+__HTML_SUMMARY_ALL__
 144+
 145+ &SummaryAddIndexes ;
 146+ $out_html_all =~ s/INDEX/$index_html/ ;
 147+
 148+ $file_html_all = $path_out . "ReportCardIndia.htm" ;
 149+ open "FILE_OUT", ">", $file_html_all ;
 150+ print FILE_OUT &AlignPerLanguage ($out_html_all) ;
 151+ close "FILE_OUT" ;
 152+}
 153+
 154+sub GetSummaryPerWiki
 155+{
 156+ my $wp = shift ;
 157+
 158+ my @months_en = qw (January February March April May June July August September October November December);
 159+
 160+ my $html ;
 161+
 162+ my $m = $MonthlyStatsWpStop {$wp} ;
 163+ if ($month_max_incomplete)
 164+ { $m-- ; }
 165+ my $mmddyyyy = &m2mmddyyyy ($m) ;
 166+
 167+ $month_year = $months_en [substr ($mmddyyyy,0,2)-1] . " " . substr ($mmddyyyy,6,4) ;
 168+ my $out_language_name = $out_languages {$wp} ;
 169+
 170+ my $main_page = &GetProjectBaseUrl ($wp) ;
 171+
 172+ print "\n\n$wp:$out_language_name $out_publication\nmonth $m -> '$month_year' ;\n" ;
 173+ $html = "\n" ;
 174+
 175+ # page views
 176+
 177+ $daysinmonth = days_in_month (substr ($mmddyyyy,6,4), substr ($mmddyyyy,0,2)) ;
 178+ $pageviews = sprintf ("%.0f", ($PageViewsPerHour {$wp} * 24 * 30)) ; # use normalized count (month always 30 days)
 179+ $pageviews_day = $pageviews / 30 ; # $daysinmonth ;
 180+ $pageviews_hour = $pageviews_day / 24 ;
 181+ $pageviews_min = $pageviews_day / (24 * 60) ;
 182+ $pageviews_sec = $pageviews_day / (24 * 60 * 60) ;
 183+
 184+ $this_month = $pageviews ;
 185+ $metric_PV_yearly = "--" ;
 186+ $metric_PV_monthly = "--" ;
 187+
 188+ print "DAYSINMONTH $month_year: $daysinmonth, PAGEVIEWS $pageviews\n" ;
 189+
 190+ $metric_PV_data = &FormatSummary ($this_month) ;
 191+
 192+ $pageviews = &format($pageviews,'X') ;
 193+ $pageviews_day = &format($pageviews_day,'X') ;
 194+ $pageviews_hour = &format($pageviews_hour,'X') ;
 195+ $pageviews_min = &format($pageviews_min,'X') ;
 196+ $pageviews_sec = &format($pageviews_sec,'X') ;
 197+
 198+ if ($pageviews_sec >= 1)
 199+ { $pageviews_per_unit = "$pageviews/month = $pageviews_day /day = $pageviews_hour /hour = $pageviews_min /minute = $pageviews_sec /second" ; }
 200+ elsif ($pageviews_min >= 1)
 201+ { $pageviews_per_unit = "$pageviews/month = $pageviews_day /day = $pageviews_hour /hour = $pageviews_min /minute" ; }
 202+ else
 203+ { $pageviews_per_unit = "$pageviews/month = $pageviews_day /day = $pageviews_hour /hour " ; }
 204+ $pageviews_per_unit =~ s/M/million/g ;
 205+ $pageviews_per_unit =~ s/k/thousand/g ;
 206+ $pageviews_per_unit =~ s/\// per /g ;
 207+
 208+ # article count
 209+ $this_month = $MonthlyStats {$wp.$m.$c[4]} ;
 210+ $prev_month = $MonthlyStats {$wp.($m-1).$c[4]} ;
 211+ $prev_year = $MonthlyStats {$wp.($m-12).$c[4]} ;
 212+ print "Article Count 0:$this_month, -1:$prev_month, -12:$prev_year\n" ;
 213+ $metric_AC_yearly = &SummaryTrendChange ($this_month, $prev_year) ;
 214+ $metric_AC_monthly = &SummaryTrendChange ($this_month, $prev_month) ;
 215+ $metric_AC_data = &FormatSummary ($this_month) ;
 216+
 217+ # new articles per day
 218+ $this_month = $MonthlyStats {$wp.$m.$c[6]} ;
 219+ $prev_month = $MonthlyStats {$wp.($m-1).$c[6]} ;
 220+ $prev_year = $MonthlyStats {$wp.($m-12).$c[6]} ;
 221+ $this_month =~ s/(\d)(\d\d\d)/$1,$2/g ;
 222+ print "New Articles Per Day 0:$this_month, -1:$prev_month, -12:$prev_year\n" ;
 223+ $metric_NAD_yearly = '--&nbsp;&nbsp;&nbsp;&nbsp;' ; # &SummaryTrendChange ($this_month, $prev_year) ;
 224+ $metric_NAD_monthly = '--&nbsp;&nbsp;&nbsp;&nbsp;' ; # &SummaryTrendChange ($this_month, $prev_month) ;
 225+ $metric_NAD_data = &FormatSummary ($this_month) ;
 226+
 227+ # edits per month
 228+ $this_month = $MonthlyStats {$wp.$m.$c[11]} ;
 229+ $prev_month = $MonthlyStats {$wp.($m-1).$c[11]} ;
 230+ $prev_year = $MonthlyStats {$wp.($m-12).$c[11]} ;
 231+ print "Edits Per Month 0:$this_month, -1:$prev_month, -12:$prev_year\n" ;
 232+ $metric_EPM_yearly = &SummaryTrendChange ($this_month, $prev_year) ;
 233+ $metric_EPM_monthly = &SummaryTrendChange ($this_month, $prev_month) ;
 234+ $metric_EPM_data = &FormatSummary ($this_month) ;
 235+
 236+ # active editors
 237+ $this_month = $MonthlyStats {$wp.$m.$c[2]} ;
 238+ $prev_month = $MonthlyStats {$wp.($m-1).$c[2]} ;
 239+ $prev_year = $MonthlyStats {$wp.($m-12).$c[2]} ;
 240+ print "Active Editors 0:$this_month, -1:$prev_month, -12:$prev_year\n" ;
 241+ $metric_AE_yearly = &SummaryTrendChange ($this_month, $prev_year) ;
 242+ $metric_AE_monthly = &SummaryTrendChange ($this_month, $prev_month) ;
 243+ $metric_AE_data = &FormatSummary ($this_month) ;
 244+
 245+ # very active editors
 246+ $this_month = $MonthlyStats {$wp.$m.$c[3]} ;
 247+ $prev_month = $MonthlyStats {$wp.($m-1).$c[3]} ;
 248+ $prev_year = $MonthlyStats {$wp.($m-12).$c[3]} ;
 249+ print "Very Active Editors 0:$this_month, -1:$prev_month, -12:$prev_year\n" ;
 250+ $metric_VAE_yearly = &SummaryTrendChange ($this_month, $prev_year) ;
 251+ $metric_VAE_monthly = &SummaryTrendChange ($this_month, $prev_month) ;
 252+ $metric_VAE_data = &FormatSummary ($this_month) ;
 253+
 254+ # new editors
 255+ $this_month = $MonthlyStats {$wp.$m.$c[1]} ;
 256+ $prev_month = $MonthlyStats {$wp.($m-1).$c[1]} ;
 257+ $prev_year = $MonthlyStats {$wp.($m-12).$c[1]} ;
 258+ print "New Editors 0:$this_month, -1:$prev_month, -12:$prev_year\n" ;
 259+ $metric_NE_yearly = &SummaryTrendChange ($this_month, $prev_year) ;
 260+ $metric_NE_monthly = &SummaryTrendChange ($this_month, $prev_month) ;
 261+ $metric_NE_data = &FormatSummary ($this_month) ;
 262+
 263+ # million speakers
 264+ $speakers = $out_speakers {$wp} ;
 265+ $editors = $MonthlyStats {$wp.$m.$c[2]} ;
 266+ print "SPEAKERS $speakers EDITORS $editors\n" ;
 267+ if ($speakers == 0)
 268+ { $participation = "" ; }
 269+ elsif ($editors / $speakers >= 1)
 270+ { $participation = sprintf ("%.0f", $editors / $speakers) ; }
 271+ else
 272+ { $participation = sprintf ("%.1f", $editors / $speakers) ; }
 273+
 274+ $this_month = $speakers ;
 275+ $metric_MS_yearly = '--&nbsp;&nbsp;&nbsp;&nbsp;' ;
 276+ $metric_MS_monthly = '--&nbsp;&nbsp;&nbsp;&nbsp;' ; # &SummaryTrendChange ($this_month, $prev_month) ;
 277+ $metric_MS_data = &FormatSummary (sprintf ("%.0f", $this_month * 1000000)) ;
 278+
 279+ # editors per million speakers
 280+ $metric_EMS_yearly = '--&nbsp;&nbsp;&nbsp;&nbsp;' ;
 281+ $metric_EMS_monthly = '--&nbsp;&nbsp;&nbsp;&nbsp;' ; # &SummaryTrendChange ($this_month, $prev_month) ;
 282+ $metric_EMS_data = $participation ;
 283+
 284+ $out_style2 = $out_style ;
 285+ $out_style2 =~ s/td {white-space:nowrap;/td {font-size:12px; white-space:nowrap;/ ;
 286+ $out_style2 =~ s/body\s*\{.*?\}/body {font-family:arial,sans-serif;background-color:#C0C0C0}/ ;
 287+
 288+ $plot_editors = 'PlotEditors' . uc ($wp) . '.png' ;
 289+ $plot_pageviews = 'PlotPageviews' . uc ($wp) . '.png' ;
 290+
 291+$html = <<__HTML_SUMMARY__ ;
 292+<a id='lang_$wp' name='lang_$wp'></a>
 293+<table width=660 cellpadding=18 align=center border=1 style="background-color:white">
 294+<tr>
 295+ <td class=c colspan=99 width=100%>
 296+ <table width=100% border=0>
 297+ <tr>
 298+ <td width=100% colspan=99>
 299+
 300+ <table width=100% border=0>
 301+ <tr>
 302+ <td class=l width=80% valign=top>
 303+ <h2><a href='$main_page'><font color=$col_highlight>$out_language_name $out_publication</font></a></h2>
 304+ </td>
 305+ <td class=r width=20% valign=top><img src='WikimediaLogo.jpg' width=30></td>
 306+ </tr>
 307+ </table>
 308+
 309+ </td>
 310+ </tr>
 311+ </table>
 312+
 313+ <table width=100% border=0>
 314+ <tr>
 315+ <td class=l colspan=99 width=100%>
 316+ <b>$out_language_name $out_publication at a glance</b>&nbsp;<i>$month_year</i>&nbsp;&nbsp;<br>
 317+ <small>Data are for last month with available database snapshots (dump system is recovering from backlog after server outage)</small>
 318+ </td>
 319+ <tr>
 320+ <!--
 321+ <td width=5%>
 322+ &nbsp;
 323+ </td>
 324+ <td width=95%>
 325+ -->
 326+ <td width=100%>
 327+ <table width=100% border=0>
 328+ <tr>
 329+ <td class=l width=34%>TOP</td>
 330+ <td class=r width=22%><font color=$col_highlight>Data</td>
 331+ <td class=r width=22%><font color=$col_highlight>Yearly change</td>
 332+ <td class=r width=22%><font color=$col_highlight>Monthly change</td>
 333+ </tr>
 334+ <tr>
 335+ <td colspan=99><hr color=#808080></td>
 336+ </tr>
 337+
 338+ <tr>
 339+ <td class=l>Page Views per Month</td>
 340+ <td class=r>$metric_PV_data</td>
 341+ <td class=r>$metric_PV_yearly</td>
 342+ <td class=r>$metric_PV_monthly</td>
 343+ </tr>
 344+ <tr>
 345+ <td colspan=99><hr></td>
 346+ </tr>
 347+
 348+ <tr>
 349+ <td class=l>Article Count</td>
 350+ <td class=r>$metric_AC_data</td>
 351+ <td class=r>$metric_AC_yearly</td>
 352+ <td class=r>$metric_AC_monthly</td>
 353+ </tr>
 354+ <tr>
 355+ <td colspan=99><hr></td>
 356+ </tr>
 357+
 358+ <tr>
 359+ <td class=l>New Articles per Day</td>
 360+ <td class=r>$metric_NAD_data</td>
 361+ <td class=r>$metric_NAD_yearly</td>
 362+ <td class=r>$metric_NAD_monthly</td>
 363+ </tr>
 364+ <tr>
 365+ <td colspan=99><hr></td>
 366+ </tr>
 367+
 368+ <tr>
 369+ <td class=l>Edits per Month</td>
 370+ <td class=r>$metric_EPM_data</td>
 371+ <td class=r>$metric_EPM_yearly</td>
 372+ <td class=r>$metric_EPM_monthly</td>
 373+ </tr>
 374+ <tr>
 375+ <td colspan=99><hr></td>
 376+ </tr>
 377+
 378+ <tr>
 379+ <td class=l>Active Editors</td>
 380+ <td class=r>$metric_AE_data</td>
 381+ <td class=r>$metric_AE_yearly</td>
 382+ <td class=r>$metric_AE_monthly</td>
 383+ </tr>
 384+ <tr>
 385+ <td colspan=99><hr></td>
 386+ </tr>
 387+
 388+ <tr>
 389+ <td class=l>Very Active Editors</td>
 390+ <td class=r>$metric_VAE_data</td>
 391+ <td class=r>$metric_VAE_yearly</td>
 392+ <td class=r>$metric_VAE_monthly</td>
 393+ </tr>
 394+ <tr>
 395+ <td colspan=99><hr></td>
 396+ </tr>
 397+
 398+ <tr>
 399+ <td class=l>New Editors</td>
 400+ <td class=r>$metric_NE_data</td>
 401+ <td class=r>$metric_NE_yearly</td>
 402+ <td class=r>$metric_NE_monthly</td>
 403+ </tr>
 404+ <tr>
 405+ <td colspan=99><hr></td>
 406+ </tr>
 407+
 408+ <tr>
 409+ <td class=l>Speakers</td>
 410+ <td class=r>$metric_MS_data</td>
 411+ <td class=r>$metric_MS_yearly</td>
 412+ <td class=r>$metric_MS_monthly</td>
 413+ </tr>
 414+ <tr>
 415+ <td colspan=99><hr></td>
 416+ </tr>
 417+
 418+ <tr>
 419+ <td class=l>Editors per Million Speakers</td>
 420+ <td class=r>$metric_EMS_data</td>
 421+ <td class=r>$metric_EMS_yearly</td>
 422+ <td class=r>$metric_EMS_monthly</td>
 423+ </tr>
 424+ <tr>
 425+ <td colspan=99><hr></td>
 426+ </tr>
 427+
 428+ </table>
 429+ </td>
 430+ </tr>
 431+
 432+ <!--
 433+ <tr>
 434+ <td class=l colspan=99 width=100%>
 435+ &nbsp;<p><b>Active Editors</b>
 436+ </tr>
 437+ -->
 438+ <tr>
 439+ <!--
 440+ <td width=5%>
 441+ &nbsp;
 442+ </td>
 443+ <td class=l colspan=99 width=95%>
 444+ -->
 445+ <td class=c colspan=99 width=100%>
 446+ &nbsp;<p><img src='$plot_editors'>
 447+ </td>
 448+ </tr>
 449+ <tr>
 450+
 451+ <!--
 452+ <tr>
 453+ <td class=l colspan=99 width=100%>
 454+ &nbsp;<p><b>Page Views per Month</b><br>&nbsp;
 455+ </tr>
 456+ -->
 457+ <tr>
 458+ <!--
 459+ <td width=5%>
 460+ &nbsp;
 461+ </td>
 462+ <td class=l colspan=99 width=95%>
 463+ -->
 464+ <td class=c colspan=99 width=100%>
 465+ &nbsp;<p><img src='$plot_pageviews'>
 466+ </td>
 467+ </tr>
 468+
 469+EXPLANATION
 470+SEE_ALSO
 471+SOURCE
 472+ </table>
 473+
 474+ </td>
 475+</tr>
 476+</table>
 477+
 478+__HTML_SUMMARY__
 479+
 480+if ($region eq '')
 481+{
 482+ $langcode = 'EN' ;
 483+ if ($mode_wb)
 484+ { $url_base = "http://stats.wikimedia.org/wikibooks/$langcode" ; }
 485+ if ($mode_wk)
 486+ { $url_base = "http://stats.wikimedia.org/wiktionary/$langcode" ; }
 487+ if ($mode_wn)
 488+ { $url_base = "http://stats.wikimedia.org/wikinews/$langcode" ; }
 489+ if ($mode_wp)
 490+ { $url_base = "http://stats.wikimedia.org/$langcode" ; }
 491+ if ($mode_wq)
 492+ { $url_base = "http://stats.wikimedia.org/wikiquote/$langcode" ; }
 493+ if ($mode_ws)
 494+ { $url_base = "http://stats.wikimedia.org/wikisource/$langcode" ; }
 495+ if ($mode_wv)
 496+ { $url_base = "http://stats.wikimedia.org/wikiversity/$langcode" ; }
 497+ if ($mode_wx)
 498+ { $url_base = "http://stats.wikimedia.org/wikispecial/$langcode" ; }
 499+}
 500+
 501+$url_trends = "$url_base/TablesWikipedia".uc($wp).".htm" ;
 502+$url_site_map = "$url_base/Sitemap.htm" ;
 503+
 504+$see_also = <<__HTML_SUMMARY_SEE_ALSO__ ;
 505+ <tr>
 506+ <td class=c colspan=99 width=100%>
 507+ &nbsp;<p><hr color=#808080>
 508+ <font color=#808080>
 509+ <small>
 510+ Published $summaries_published&nbsp;&nbsp;/&nbsp;&nbsp;
 511+ <b>See Also</b>
 512+ <a href='$url_trends'><font color=#000080>Detailed trends</font></a> for <a href='$main_page'><font color=#000080>$out_language_name $out_publication</font></a>&nbsp;&nbsp;/&nbsp;&nbsp;
 513+ <a href='$url_site_map'><font color=#000080>Stats for all $out_publications</font></a>&nbsp;&nbsp;/&nbsp;&nbsp;
 514+ <a href='http://stats.wikimedia.org'><font color=#000080>Wikistats portal</font></a>
 515+ </small>
 516+ </font>
 517+ </td>
 518+ </tr>
 519+__HTML_SUMMARY_SEE_ALSO__
 520+
 521+$source = <<__HTML_SUMMARY_SOURCE__ ;
 522+ <tr>
 523+ <td colspan=99 class=c>
 524+ <small><font color=#808080>page views: $pageviews_per_unit</font></small><p>
 525+ <i><small>Source <a href='http://stats.wikimedia.org'>stats.wikimedia.org</a> / Published $summaries_published</small></i>
 526+ </td>
 527+ </tr>
 528+__HTML_SUMMARY_SOURCE__
 529+
 530+ return ($html) ;
 531+}
 532+
 533+sub GeneratePlotEditors
 534+{
 535+ my @months_en = qw (Jan Feby Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
 536+
 537+ my $wp = shift ;
 538+
 539+ return if $wp =~ /^z+$/ ;
 540+
 541+ &LogT ("GeneratePlotEditors $wp\n") ;
 542+
 543+ my $file_csv_input = $file_editors_per_wiki ;
 544+ my $path_png_raw = "$path_out_plots\/PlotEditors" . uc($wp) . ".png" ;
 545+ my $path_png_trends = "$path_out_plots\/PlotEditorsTrends" . uc($wp) . ".png" ;
 546+ my $path_svg = "$path_out_plots\/PlotEditors" . uc($wp) . ".svg" ;
 547+ my $out_script_plot = $out_script_plot_editors ;
 548+ my $out_language_name = $out_languages {$wp} ;
 549+ my $editors_max = $editors_max_5 {$wp} ;
 550+ my $month_max = $editors_month_max_5 {$wp} ;
 551+ my $code = uc ($wp) ;
 552+
 553+ $file_csv_input =~ s/\\/\//g ;
 554+ $path_png_raw =~ s/\\/\//g ;
 555+ $path_png_trends =~ s/\\/\//g ;
 556+ $path_svg =~ s/\\/\//g ;
 557+ $out_language_name =~ s/&nbsp;/ /g ;
 558+
 559+ open EDITORS_OUT, '>', $file_csv_input || &Abort ("Could not open file $file_csv_input") ;
 560+ print EDITORS_OUT "language,month,count_5,count_25,count_100\n" ;
 561+ print "$wp: " . (0+$editors_month_lo_5 {$wp}) . ", " . (0+$editors_month_hi_5 {$wp}) . "\n" ;
 562+
 563+ # start in year where value exceeds 1/100 of max value
 564+
 565+ for ($m = $editors_month_lo_5 {$wp} ; $m < $editors_month_hi_5 {$wp} ; $m++)
 566+ { last if $editors_5 {$wp.$m} >= $editors_max / 100 ; }
 567+ $editors_month_lo_5_100th = $m - $m % 12 + 1 ;
 568+
 569+ $period = month_year_english_short ($editors_month_lo_5_100th) . ' ' . month_year_english_short ($editors_month_hi_5 {$wp}) ;
 570+
 571+ for ($m = $editors_month_lo_5_100th ; $m <= $editors_month_hi_5 {$wp} ; $m++)
 572+ {
 573+ # make boundary not show at 2010-01-31 but at 2010-01-01 as follows:
 574+ # instead of value for last day of month, present it as value for first day of next month
 575+ # this requires outputting extra first value for 20xx-01-01 (to make chart start at January)
 576+ $count_5 = 0 + $editors_5 {$wp.$m} ;
 577+ $count_25 = 0 + $editors_25 {$wp.$m} ;
 578+ $count_100 = 0 + $editors_100 {$wp.$m} ;
 579+
 580+ if ($m == $editors_month_lo_5_100th)
 581+ {
 582+ $date = &m2mmddyyyy ($m) ;
 583+ $date =~ s/(\d\d)\/\d\d\/(\d\d\d\d)/$1\/01\/$2/ ;
 584+ print EDITORS_OUT "$wp,$date,$count_5,$count_25,$count_100\n" ;
 585+ }
 586+
 587+ $date = &m2mmddyyyy ($m+1) ;
 588+ $date =~ s/(\d\d)\/\d\d\/(\d\d\d\d)/$1\/01\/$2/ ;
 589+ print EDITORS_OUT "$wp,$date,$count_5,$count_25,$count_100\n" ;
 590+ }
 591+ close EDITORS_OUT ;
 592+
 593+ # calc plot parameters
 594+
 595+ if ($editors_max > 0)
 596+ {
 597+ $editors_max_rounded = 10000000000000 ;
 598+ while ($editors_max_rounded / 10 > $editors_max) { $editors_max_rounded /= 10 ; }
 599+
 600+ if ($editors_max_rounded * 0.15 > $editors_max) { $editors_max_rounded *= 0.15 ; }
 601+ elsif ($editors_max_rounded * 0.2 > $editors_max) { $editors_max_rounded *= 0.2 ; }
 602+ elsif ($editors_max_rounded * 0.4 > $editors_max) { $editors_max_rounded *= 0.4 ; }
 603+ elsif ($editors_max_rounded * 0.6 > $editors_max) { $editors_max_rounded *= 0.6 ; }
 604+ elsif ($editors_max_rounded * 0.8 > $editors_max) { $editors_max_rounded *= 0.8 ; }
 605+ print "$wp $editors_max -> $editorsmax_rounded\n" ;
 606+
 607+ $editors_max =~ s/(\d)(\d\d\d)$/$1,$2/ ;
 608+ $editors_max =~ s/(\d)(\d\d\d),/$1,$2,/ ;
 609+ $editors_max =~ s/(\d)(\d\d\d),/$1,$2,/ ;
 610+ $editors_max =~ s/(\d)(\d\d\d),/$1,$2,/ ;
 611+ }
 612+ else
 613+ { $editors_max = '10' ; }
 614+
 615+ # edit plot parameters
 616+
 617+ if ($wp eq 'zz')
 618+ { $out_script_plot =~ s/TITLE/Active Editors on all $out_publications/g ; }
 619+ elsif ($mode_wx)
 620+ { $out_script_plot =~ s/TITLE/Active Editors on $out_language_name wiki/g ; }
 621+ else
 622+ {
 623+ $out_script_plot =~ s/TITLE/Active Editors on LANGUAGE $out_publication/g ;
 624+ $out_script_plot =~ s/LANGUAGE/$out_language_name/g ;
 625+ $out_script_plot =~ s/CODE/$code/g ;
 626+ }
 627+
 628+ $mmddyyyy = &m2mmddyyyy ($month_max) ;
 629+ $month_max = $months_en [substr ($mmddyyyy,0,2) - 1] . " " . substr ($mmddyyyy,6,4) ;
 630+
 631+ $out_script_plot =~ s/Wikipedia/$out_publication/g ;
 632+
 633+ $out_script_plot =~ s/FILE_CSV/$file_csv_input/g ;
 634+ $out_script_plot =~ s/FILE_PNG_TRENDS/$path_png_trends/g ;
 635+ $out_script_plot =~ s/FILE_PNG_RAW/$path_png_raw/g ;
 636+ $out_script_plot =~ s/FILE_SVG/$path_svg/g ;
 637+ $out_script_plot =~ s/CODE/$code/g ;
 638+ $out_script_plot =~ s/MAX_MONTH/$month_max/g ;
 639+ $out_script_plot =~ s/EDITORS/$editors_max/g ;
 640+ $out_script_plot =~ s/YLIM_MAX/$editors_max_rounded/g ;
 641+ $out_script_plot =~ s/LANGUAGE/$out_language_name/g ;
 642+ $out_script_plot =~ s/PERIOD/$period/g ;
 643+
 644+ $out_script_plot =~ s/COLOR_5/violetred2/g ;
 645+ $out_script_plot =~ s/COLOR_25/purple2/g ;
 646+ $out_script_plot =~ s/COLOR_100/dodgerblue2/g ;
 647+
 648+ my $file_script = $path_in . "R-PlotEditors.txt" ;
 649+ open R_SCRIPT, '>', $file_script or die ("file $file_script not found") ; ;
 650+ print R_SCRIPT $out_script_plot ;
 651+ close R_SCRIPT ;
 652+
 653+ $cmd = "R CMD BATCH \"$file_script\"" ;
 654+
 655+ if ($generate_edit_plots++ == 0)
 656+ { print "$cmd\n" ; }
 657+
 658+ @result = `$cmd` ;
 659+}
 660+
 661+sub GeneratePlotPageviews
 662+{
 663+ my @months_en = qw (Jan Feby Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
 664+
 665+ my $wp = shift ;
 666+
 667+ return if $wp =~ /^z+$/ ;
 668+
 669+ &LogT ("GeneratePlotPageviews $wp\n") ;
 670+
 671+ my $file_csv_input = $file_pageviews_per_wiki ;
 672+ my $path_png_raw = "$path_out_plots\/PlotPageviews" . uc($wp) . ".png" ;
 673+ my $path_png_trends = "$path_out_plots\/PlotPageviewsTrends" . uc($wp) . ".png" ;
 674+ my $path_svg = "$path_out_plots\/PlotPageviews" . uc($wp) . ".svg" ;
 675+ my $out_script_plot = $out_script_plot_pageviews ;
 676+ my $out_language_name = $out_languages {$wp} ;
 677+ my $pageviews_max = $pageviews_max {$wp} ;
 678+ my $month_max = $pageviews_month_max {$wp} ;
 679+ my $code = uc ($wp) ;
 680+
 681+ $file_csv_input =~ s/\\/\//g ;
 682+ $path_png_raw =~ s/\\/\//g ;
 683+ $path_png_trends =~ s/\\/\//g ;
 684+ $path_svg =~ s/\\/\//g ;
 685+ $out_language_name =~ s/&nbsp;/ /g ;
 686+
 687+ open PAGEVIEWS_OUT, '>', $file_csv_input || &Abort ("Could not open file $file_csv_input") ;
 688+ print PAGEVIEWS_OUT "language,month,count_normalized\n" ;
 689+
 690+ $pageviews_unit = 1 ;
 691+ $pageviews_unit_text = "" ;
 692+ $pageviews_unit_text2 = "" ;
 693+ if ($pageviews_max >= 1000000)
 694+ {
 695+ $pageviews_unit = 1000 ;
 696+ $pageviews_unit_text = " (x 1000)" ;
 697+ $pageviews_unit_text2 = ",000" ;
 698+ }
 699+ if ($pageviews_max >= 1000000000)
 700+ {
 701+ $pageviews_unit = 1000000 ;
 702+ $pageviews_unit_text = " (in millions)" ;
 703+ $pageviews_unit_text2 = " million" ;
 704+ }
 705+ $pageviews_max = sprintf ("%.0f", $pageviews_max / $pageviews_unit) ;
 706+
 707+ $period = month_year_english_short ($pageviews_month_lo {$wp}) . ' ' . month_year_english_short ($pageviews_month_hi {$wp}-1) ;
 708+
 709+ for ($m = $pageviews_month_lo {$wp} ; $m < $pageviews_month_hi {$wp} ; $m++)
 710+ {
 711+ $count_normalized = sprintf ("%.0f", $pageviews {$wp.$m} / $pageviews_unit) ;
 712+ # $days_in_month = days_in_month (substr($date,6,4),substr($date,0,2)) ;
 713+ # $count_normalized = sprintf ("%.0f", 30/$days_in_month * $count) ;
 714+
 715+ # make boundary not show at 2010-01-31 but at 2010-01-01 as follows:
 716+ # instead of value for last day of month, present it as value for first day of next month
 717+ # this requires outputting extra first value for 20xx-01-01 (to make chart start at January)
 718+
 719+ if ($m == $pageviews_month_lo {$wp})
 720+ {
 721+ $date = &m2mmddyyyy ($m) ;
 722+ $date =~ s/(\d\d)\/\d\d\/(\d\d\d\d)/$1\/01\/$2/ ;
 723+ print PAGEVIEWS_OUT "$wp,$date,$count_normalized\n" ;
 724+ }
 725+
 726+ $date = &m2mmddyyyy ($m+1) ;
 727+ $date =~ s/(\d\d)\/\d\d\/(\d\d\d\d)/$1\/01\/$2/ ;
 728+ print PAGEVIEWS_OUT "$wp,$date,$count_normalized\n" ;
 729+
 730+ }
 731+ close PAGEVIEWS_OUT ;
 732+
 733+ # calc plot parameters
 734+
 735+ $pageviews_max_rounded = 10000000000000 ;
 736+ while ($pageviews_max_rounded / 10 > $pageviews_max) { $pageviews_max_rounded /= 10 ; }
 737+
 738+ if ($pageviews_max_rounded * 0.15 > $pageviews_max) { $pageviews_max_rounded *= 0.15 ; }
 739+ elsif ($pageviews_max_rounded * 0.2 > $pageviews_max) { $pageviews_max_rounded *= 0.2 ; }
 740+ elsif ($pageviews_max_rounded * 0.4 > $pageviews_max) { $pageviews_max_rounded *= 0.4 ; }
 741+ elsif ($pageviews_max_rounded * 0.6 > $pageviews_max) { $pageviews_max_rounded *= 0.6 ; }
 742+ elsif ($pageviews_max_rounded * 0.8 > $pageviews_max) { $pageviews_max_rounded *= 0.8 ; }
 743+ print "$wp $pageviews_max -> $pageviews_max_rounded\n" ;
 744+
 745+ $pageviews_max =~ s/(\d)(\d\d\d)$/$1,$2/ ;
 746+ $pageviews_max =~ s/(\d)(\d\d\d),/$1,$2,/ ;
 747+ $pageviews_max =~ s/(\d)(\d\d\d),/$1,$2,/ ;
 748+ $pageviews_max =~ s/(\d)(\d\d\d),/$1,$2,/ ;
 749+
 750+ # edit plot parameters
 751+
 752+ if ($wp eq 'zz')
 753+ { $out_script_plot =~ s/TITLE/Page Views on all $out_publications$pageviews_unit_text/g ; }
 754+ elsif ($mode_wx)
 755+ { $out_script_plot =~ s/TITLE/Page Views on $out_language_name wiki$pageviews_unit_text/g ; }
 756+ else
 757+ {
 758+ $out_script_plot =~ s/TITLE/Page Views on LANGUAGE $out_publication$pageviews_unit_text/g ;
 759+ $out_script_plot =~ s/LANGUAGE/$out_language_name/g ;
 760+ $out_script_plot =~ s/CODE/$code/g ;
 761+ }
 762+
 763+ $mmddyyyy = &m2mmddyyyy ($month_max) ;
 764+ $month_max = $months_en [substr ($mmddyyyy,0,2) - 1] . " " . substr ($mmddyyyy,6,4) ;
 765+
 766+ $out_script_plot =~ s/Wikipedia/$out_publication/g ;
 767+
 768+ $out_script_plot =~ s/FILE_CSV/$file_csv_input/g ;
 769+ $out_script_plot =~ s/FILE_PNG_TRENDS/$path_png_trends/g ;
 770+ $out_script_plot =~ s/FILE_PNG_RAW/$path_png_raw/g ;
 771+ $out_script_plot =~ s/FILE_SVG/$path_svg/g ;
 772+ $out_script_plot =~ s/CODE/$code/g ;
 773+ $out_script_plot =~ s/MAX_MONTH/$month_max/g ;
 774+ $out_script_plot =~ s/VIEWS/$pageviews_max$pageviews_unit_text2/g ;
 775+ $out_script_plot =~ s/YLIM_MAX/$pageviews_max_rounded/g ;
 776+ $out_script_plot =~ s/LANGUAGE/$out_language_name/g ;
 777+ $out_script_plot =~ s/UNIT/$pageviews_unit_text/g ;
 778+ $out_script_plot =~ s/PERIOD/$period/g ;
 779+
 780+ my $file_script = $path_in . "R-PlotPageviews.txt" ;
 781+ open R_SCRIPT, '>', $file_script or die ("file $file_script not found") ; ;
 782+ print R_SCRIPT $out_script_plot ;
 783+ close R_SCRIPT ;
 784+
 785+ $cmd = "R CMD BATCH \"$file_script\"" ;
 786+
 787+ if ($generate_edit_plots++ == 0)
 788+ { print "$cmd\n" ; }
 789+
 790+ @result = `$cmd` ;
 791+}
 792+
 793+sub SummaryAddIndexes
 794+{
 795+ foreach $lang (sort {$out_languages {$a} cmp $out_languages {$b}} @languages)
 796+ {
 797+ next if $lang =~ /^z+$/ ;
 798+ push @index_languages1, "<a href='#lang_$lang'>${out_languages {$lang}}</a>" ;
 799+ }
 800+
 801+ foreach $lang (sort @languages)
 802+ {
 803+ next if $lang =~ /^z+$/ ;
 804+ push @index_languages2, "<a href='#lang_$lang'>$lang</a>" ;
 805+ }
 806+
 807+# foreach $lang (keys_sorted_by_value_num_desc %{$editstottype{'R'}})
 808+# {
 809+# my $edits = &i2KM4 ($editstottype {'R'}{$lang} + $editstottype {'A'}{$lang} + $editstottype {'B'}{$lang}) ;
 810+# my $file_html = "EditsReverts" . uc ($lang) . ".htm" ;
 811+# my $file_csv = "$path_in\/RevertedEdits" . uc($lang) . ".csv" ;
 812+# if (-e $file_csv)
 813+# { push @index_languages3, "<a href='$file_html'>${out_languages{$lang}}</a> ($edits)" ; }
 814+# else
 815+# { push @index_languages3, $out_languages{$lang} ; }
 816+# }
 817+
 818+ $index_languages1 = join ', ', @index_languages1 ;
 819+ $index_languages2 = join ', ', @index_languages2 ;
 820+# $index_languages3 = join ', ', @index_languages3 ;
 821+ $index_html = &HtmlIndex3 ; # in WikiReportsOutputEditHistory
 822+ $index_html .= "<tr><td class=l><b>Projects indexed by <span id='caption'><font color=#006600>language</font> / <font color=#A0A0A0>language code</font></span></b></td><td class=r colspan=99><a href=\"#\" id='toggle' onclick=\"toggle_visibility_index();\">Toggle index</a></td></tr>\n" ;
 823+ $index_html .= "<tr><td class=lwrap colspan=99>\n" .
 824+ "<span id='index1' style=\"display:block\">\n$index_languages1\n</span>\n" .
 825+ "<span id='index2' style=\"display:none\">\n$index_languages2\n</span>\n" .
 826+ # "<span id='index3' style=\"display:none\">\n$index_languages3\n</span>" .
 827+ "</td></tr>\n" ;
 828+}
 829+sub SummaryTrendChange
 830+{
 831+ my ($now, $prev) = @_ ;
 832+ if ($prev == 0)
 833+ { $result = '--&nbsp;&nbsp;&nbsp;&nbsp;' ; }
 834+ else
 835+ {
 836+ $result = sprintf ("%.0f", (100 * ($now / $prev)) - 100) . '%' ;
 837+ if ($result !~ /-/)
 838+ { $result = "<font color=#009000>+$result</font>" ; }
 839+ else
 840+ { $result = "<font color=#900000>$result</font>" ; }
 841+ }
 842+
 843+ print "SM $now, $prev -> result $result\n" ;
 844+ return $result ;
 845+}
 846+
 847+sub FormatSummary
 848+{
 849+ my $x = shift ;
 850+ $x =~ s/(\d)(\d\d\d)$/$1,$2/ ;
 851+ $x =~ s/(\d)(\d\d\d),(\d\d\d)$/$1,$2,$3/ ;
 852+ $x =~ s/(\d)(\d\d\d),(\d\d\d),(\d\d\d)$/$1,$2,$3,$4/ ;
 853+ return ($x) ;
 854+}
 855+
 856+1;
Index: trunk/wikistats/dumps/WikiReportsScripts.pm
@@ -423,7 +423,109 @@
424424
425425 __SCRIPT_EDIT_PLOT_ANONS__
426426
 427+# PE = Plot Editors
 428+$out_script_plot_editors = <<__SCRIPT_EDIT_PLOT_EDITORS__ ;
 429+$out_script_multititle
427430
 431+plotdata <- read.csv(file="FILE_CSV",head=TRUE,sep=",")[2:5]
 432+counts <- plotdata[2:4]
 433+dates <-strptime(as.character(plotdata\$month), "%m/%d/%Y")
 434+dates
 435+
 436+plotdata = data.frame(date=dates,counts)
 437+plotdata
 438+attach (plotdata)
 439+
 440+#install.packages(c("Cairo"), repos="http://cran.r-project.org" )
 441+ library(Cairo)
 442+ Cairo(width=640, height=240, file="FILE_PNG_RAW", type="png", pointsize=10, bg="#F0F0F0", canvas = "white", units = "px", dpi = "auto", title="Test")
 443+
 444+options("scipen"=20)
 445+r <- as.POSIXct(round(range(dates), "days"))
 446+r
 447+
 448+par(mar=c(2.5,4,2.5,1.5))
 449+par(oma=c(0,0,0,0))
 450+
 451+plot (dates,plotdata\$count_5,type="l", col="blue", lty="solid", lwd=0.5, tck=1, xlab="", ylab="", xaxt="n", yaxt="n", las=2, bty="o", xaxs = "i", yaxs = "i", ylim=c(0,YLIM_MAX))
 452+
 453+axis(2, col.axis="black", las=2, tck=1, col="#D0D0D0")
 454+
 455+axis.POSIXct(1, at=seq(r[1], r[2], by="month"), format="\b ", tck=1, col="gray80") # vertical monthly bars light grey
 456+axis.POSIXct(1, at=seq(r[1], r[2], by="year"), format="%Y ", tck=1, col="gray80") # year numbers below x axis
 457+axis.POSIXct(1, at=seq(r[1], r[2], by="year") , format="\b ", tck=1, col="gray20") # vertical yearly bar dark grey
 458+axis.POSIXct(1, at=seq(r[1], r[2], by="year") , format="\b ", tck=-0.02, col="gray20") # extending slightly below x asix (as tick marks)
 459+
 460+title(" TITLE ", cex.main=1.2, font.main=3, col.main= "black")
 461+
 462+lines(dates,plotdata\$count_5,col="COLOR_5", lty="solid", lwd=1.8)
 463+lines(dates,plotdata\$count_25,col="COLOR_25", lty="solid", lwd=1.8)
 464+lines(dates,plotdata\$count_100,col="COLOR_100", lty="solid", lwd=1.8)
 465+
 466+#legend("topleft",c("5+ edits ", "25+ edits ", "100+ edits ", "(reg edits only)"), lty=1, lwd=2, col=c("COLOR_5","COLOR_25", "COLOR_100", "#F0F0F0"), inset=0.05, bg="#E0E0E0")
 467+legend("topleft",c("5+ edits ", "25+ edits ", "100+ edits "), lty=1, lwd=1.8, col=c("COLOR_5","COLOR_25", "COLOR_100"), inset=0.04, bg="#E0E0E0")
 468+
 469+mtext("max editors (5+ edits) in ", cex=0.85, line=1.5, side=3, adj=0, outer=FALSE, col="#000000")
 470+mtext("MAX_MONTH: EDITORS ", cex=0.85, line=0.5, side=3, adj=0, outer=FALSE, col="#000000")
 471+mtext(paste(" stats.wikimedia.org "), cex=0.85, line=1.5, side=3, adj=1, outer=FALSE, col="#000000")
 472+mtext(paste ("PERIOD "), cex=0.85, line=0.5, side=3, adj=1, outer=FALSE, col="#000000")
 473+mtext(paste ("Erik Zachte - perl+R - ", format(Sys.time(), "%b %d, %H:%M ")), cex=0.80, line=0.2, side=4, adj=0, outer=FALSE, col="#AAAAAA")
 474+
 475+box()
 476+dev.off()
 477+
 478+__SCRIPT_EDIT_PLOT_EDITORS__
 479+
 480+# PE = Plot Page Views
 481+$out_script_plot_pageviews = <<__SCRIPT_EDIT_PLOT_PAGEVIEWS__ ;
 482+$out_script_multititle
 483+
 484+plotdata <- read.csv(file="FILE_CSV",head=TRUE,sep=",")[2:3]
 485+counts <- plotdata[2:2]
 486+dates <-strptime(as.character(plotdata\$month), "%m/%d/%Y")
 487+
 488+plotdata = data.frame(date=dates,counts)
 489+plotdata
 490+attach (plotdata)
 491+
 492+#install.packages(c("Cairo"), repos="http://cran.r-project.org" )
 493+ library(Cairo)
 494+ Cairo(width=640, height=240, file="FILE_PNG_RAW", type="png", pointsize=10, bg="#F0F0F0", canvas = "white", units = "px", dpi = "auto", title="Test")
 495+
 496+options("scipen"=20)
 497+r <- as.POSIXct(round(range(dates), "days"))
 498+
 499+par(mar=c(3.5,4,2.5,1.5))
 500+par(oma=c(0,0,0,0))
 501+
 502+plot (dates,plotdata\$count_normalized,type="l", col="blue", lty="solid", lwd=0.5, tck=1, xlab="", ylab="", xaxt="n", yaxt="n", las=2, bty="o", xaxs = "i", yaxs = "i", ylim=c(0,YLIM_MAX))
 503+
 504+#axis(2, at=100000000*c(0:10),labels=100000000*c(0:10), col.axis="black", las=2, tck=1, col="#D0D0D0")
 505+axis(2, col.axis="black", las=2, tck=1, col="#D0D0D0")
 506+
 507+axis.POSIXct(1, at=seq(r[1], r[2], by="month"), format="\b ", tck=1, col="gray80") # vertical monthly bars light grey
 508+axis.POSIXct(1, at=seq(r[1], r[2], by="year"), format="%Y ", tck=1, col="gray80") # year numbers below x axis
 509+axis.POSIXct(1, at=seq(r[1], r[2], by="year") , format="\b ", tck=1, col="gray20") # vertical yearly bar dark grey
 510+axis.POSIXct(1, at=seq(r[1], r[2], by="year") , format="\b ", tck=-0.02, col="gray20") # extending slightly below x asix (as tick marks)
 511+
 512+title(" TITLE ", cex.main=1.2, font.main=3, col.main= "black")
 513+
 514+lines(dates,plotdata\$count_normalized,col="green4", lty="solid", lwd=1.8)
 515+
 516+mtext("max page views in ", cex=0.85, line=1.5, side=3, adj=0, outer=FALSE, col="#000000")
 517+mtext("MAX_MONTH: VIEWS ", cex=0.85, line=0.5, side=3, adj=0, outer=FALSE, col="#000000")
 518+mtext(paste(" stats.wikimedia.org "), cex=0.85, line=1.5, side=3, adj=1, outer=FALSE, col="#000000")
 519+mtext(paste ("PERIOD "), cex=0.85, line=0.5, side=3, adj=1, outer=FALSE, col="#000000")
 520+mtext(paste ("Erik Zachte - perl+R - ", format(Sys.time(), "%b %d, %H:%M ")), cex=0.80, line=0.2, side=4, adj=0, outer=FALSE, col="#AAAAAA")
 521+mtext("page views have been normalized to months of 30 days (Jan*30/31, Feb*(29|30)/28, Mar*30/31, etc)", cex=0.85, line=2.2, side=1, outer=FALSE, col="#808080")
 522+
 523+box()
 524+dev.off()
 525+
 526+__SCRIPT_EDIT_PLOT_PAGEVIEWS__
 527+
 528+
 529+
428530 $out_script_expand = <<__SCRIPT_EXPAND__ ;
429531 <script>
430532 var base = 'http://WP.wikibooks.org/wiki/' ;
Index: trunk/wikistats/dumps/WikiReportsOutputEditHistory.pm
@@ -1035,6 +1035,35 @@
10361036 return ($html) ;
10371037 }
10381038
 1039+sub HtmlIndex3
 1040+{
 1041+ my $html = <<__HTML_INDEX_3__ ;
 1042+<script type="text/javascript">
 1043+<!--
 1044+function toggle_visibility_index()
 1045+{
 1046+ var index1 = document.getElementById('index1');
 1047+ var index2 = document.getElementById('index2');
 1048+ var caption = document.getElementById('caption');
 1049+ if (index1.style.display == 'block')
 1050+ {
 1051+ index1.style.display = 'none';
 1052+ index2.style.display = 'block';
 1053+ caption.innerHTML = '<font color=#A0A0A0>language</font> / <font color=#006600>language code</font>' ;
 1054+ }
 1055+ else
 1056+ {
 1057+ index1.style.display = 'block';
 1058+ index2.style.display = 'none';
 1059+ caption.innerHTML = '<font color=#006600>language</font> / <font color=#A0A0A0>language code</font>' ;
 1060+ }
 1061+}
 1062+//-->
 1063+</script>
 1064+__HTML_INDEX_3__
 1065+ return ($html) ;
 1066+}
 1067+
10391068 sub GenerateYearlyGrowthStats
10401069 {
10411070 return if ! $wikimedia ;
Index: trunk/wikistats/dumps/WikiReportsOutputTables.pm
@@ -504,7 +504,6 @@
505505 &tdrb (&w ($speakers)) .
506506 &tdrb (&w ($participation)) ;
507507
508 -
509508 $out_html_verbose .= &tr ((($wikimedia && !$mode_wx)? &tdcb ($site) : &tdlb ($site)) .
510509 (((! $mode_wx) && (! $singlewiki)) ? ($wikimedia ? &tdlb ($out_language_name) : "") : "") .
511510 &tdcb (&w("<a href='TablesWikipedia" . uc($wpc) . ".htm'>T</a> | " .
@@ -3132,7 +3131,7 @@
31333132 {
31343133 for ($f = 0 ; $f <= $fmax ; $f++)
31353134 {
3136 - # if ($f == 6) { next ; } # skip obsolete alternate article counts
 3135+ # if ($f == 5) { next ; } # skip obsolete alternate article counts
31373136
31383137 if (($mode_wp) ||
31393138 (($f != 5) && ($f != 9) && ($f != 10)))
@@ -3179,22 +3178,36 @@
31803179 if (index ("CD", $c[$f]) > -1) { $colormode = 'G' ; }
31813180 if ($pageviews) { $colormode = 'I' ; }
31823181
3183 - &GenerateHtmlStartComparisonTables ($f) ;
 3182+ my $content = &GenerateHtmlStartComparisonTables ($f, $normalize_days_per_month) ;
31843183
3185 - if ($pageviews_normal)
 3184+ if ($pageviews_mobile)
31863185 {
3187 - $mode_wp ? $root = ".." : $root = "../.." ;
 3186+ $href_normalized = 'TablesPageViewsMonthlyMobile.htm' ;
 3187+ $href_not_normalized = 'TablesPageViewsMonthlyOriginalMobile.htm' ;
 3188+ }
 3189+ elsif ($pageviews_non_mobile)
 3190+ {
 3191+ $href_normalized = 'TablesPageViewsMonthly.htm' ;
 3192+ $href_not_normalized = 'TablesPageViewsMonthlyOriginal.htm' ;
 3193+ }
 3194+ else
 3195+ {
 3196+ $href_normalized = 'TablesPageViewsMonthlyCombined.htm' ;
 3197+ $href_not_normalized = 'TablesPageViewsMonthlyOriginalCombined.htm' ;
 3198+ }
 3199+ $href_normalized2 = 'TablesPageViewsMonthlyCombined.htm' ;
 3200+ $href_not_normalized2 = 'TablesPageViewsMonthlyOriginalCombined.htm' ;
31883201
3189 - $out_xref = "<a href='$root/EN/TablesPageViewsMonthlyAllProjects.htm'>All projects, </a>\n" ;
3190 - $mode_wb ? ($out_xref .= "Wikibooks, ") : ($out_xref .= "<a href='$root/wikibooks/EN/TablesPageViewsMonthly.htm'>Wikibooks, </a>\n") ;
3191 - $mode_wk ? ($out_xref .= "Wiktionary, ") : ($out_xref .= "<a href='$root/wiktionary/EN/TablesPageViewsMonthly.htm'>Wiktionaries, </a>\n") ;
3192 - $mode_wn ? ($out_xref .= "Wikinews, ") : ($out_xref .= "<a href='$root/wikinews/EN/TablesPageViewsMonthly.htm'>Wikinews, </a>\n") ;
3193 - $mode_wp ? ($out_xref .= "Wikipedia, ") : ($out_xref .= "<a href='$root/EN/TablesPageViewsMonthly.htm'>Wikipedias, </a>\n") ;
3194 - $mode_wq ? ($out_xref .= "Wikiquote, ") : ($out_xref .= "<a href='$root/wikiquote/EN/TablesPageViewsMonthly.htm'>Wikiquotes, </a>\n") ;
3195 - $mode_ws ? ($out_xref .= "Wikisource, ") : ($out_xref .= "<a href='$root/wikisource/EN/TablesPageViewsMonthly.htm'>Wikisources, </a>\n") ;
3196 - $mode_wv ? ($out_xref .= "Wikiversity, ") : ($out_xref .= "<a href='$root/wikiversity/EN/TablesPageViewsMonthly.htm'>Wikiversities, </a>\n") ;
3197 - $mode_wx ? ($out_xref .= "Wikispecial") : ($out_xref .= "<a href='$root/wikispecial/EN/TablesPageViewsMonthly.htm'>Wikispecial</a>\n") ;
 3202+ if ($normalize_days_per_month)
 3203+ {
 3204+ $href_current_file = $href_normalized ;
 3205+ $href_current_file2 = $href_normalized2 ;
31983206 }
 3207+ else
 3208+ {
 3209+ $href_current_file = $href_not_normalized ;
 3210+ $href_current_file2 = $href_not_normalized2 ;
 3211+ }
31993212
32003213 if ($wikimedia && ($f <= 1) && $mode_wp)
32013214 {
@@ -3206,92 +3219,116 @@
32073220
32083221 if ($pageviews)
32093222 {
3210 - $out_html .= "<b><font color=#A00000>Warning: page view counts from Nov 2009 till March 2010 are too low.</font></b> " .
3211 - "In July 2010 is was established that the server that collects and aggregates log data for all squids could not keep up with all incoming messages, and hence underreported page views. " .
3212 - "This issue has been resolved. For April - July 2010 the amount of underreporting could be inferred from still available log files and counts for these months have been corrected (read <a href='http://infodisiac.com/blog/wp-content/uploads/2010/07/assessment.pdf'>more</a>). For earlier months, possibly from Nov 2009 till March 2010 counts in the table below are too low.<p>" .
3213 - "<hr>" ;
3214 - $out_html .= "<p><b>Legend:</b><br>$legend_pageviews_monthly<br>&nbsp;<hr>" ;
 3223+ $out_html .= "$legend_pageviews_monthly<hr>\n" ;
32153224
3216 - if ($pageviews_mobile)
3217 - {
3218 - if ($region eq '')
3219 - { $out_html .= "<p>$msg_perc_mobile" ; }
3220 - $out_html .= "<h3>Page views per language per month (mobile site) <font color=#A0A0A0>(plus links to edit trends)</font>&nbsp;&nbsp;&nbsp;&nbsp;<span id='wait'><font color='#666600'>" . $out_rendering . "</font></span></h3>\n" ;
3221 - $out_html .= "<p><b><font color=#008000>Mobile traffic only! </font></b>" ;
3222 - }
3223 - else
3224 - {
3225 - if ($region eq '')
3226 - { $out_html .= "<p>$msg_perc_non_mobile" ; }
3227 - $out_html .= "<h3>Page views per language per month <font color=#A0A0A0>(plus links to edit trends)</font>&nbsp;&nbsp;&nbsp;&nbsp;<span id='wait'><font color='#444400'>" . $out_rendering . "</font></span></h3>" ;
3228 - $out_html .= "<p><b><font color=#008000>Non-mobile traffic only! </font></b>" ;
3229 - }
 3225+ $out_html .= "<p><h2>$content</h2><p>" ;
32303226
3231 - if ($normalize_days_per_month)
3232 - { $out_html .= "<b><font color=#008000>View counts on this page have been normalized to months of 30 days, for fair comparison.</font></b>. " ; }
3233 - else
3234 - { $out_html .= "<b><font color=#800000>View counts on this page have <font color=#FF0000>not</font> been normalized to months of 30 days.</font></b>. " ; }
 3227+ $out_html .= "<b>Page views per language per month</b> (plus links to edit trends)\n" ;
32353228
3236 - # $out_html .= "<p><b><font color='#600000'>Everything on this page is about page views, except links named 'Edit Trend'</font></b>" ;
 3229+ if ($pageviews_combined)
 3230+ { $out_html .= " <b>Mobile + Non-mobile traffic</b>" ; }
32373231
 3232+ # $out_html .= "<h3><span id='wait'>!!! <font color='#800000'>" . $out_rendering . "</font> !!!</span></h3>\n" ;
 3233+
 3234+ if ($mode_wp && ($region eq ''))
 3235+ { $out_html .= " ($msg_perc_mobile)" ; }
 3236+
32383237 # for linear regression
32393238 # use Statistics:LineFit ;
32403239 # http://search.cpan.org/~randerson/Statistics-LineFit-0.07/lib/Statistics/LineFit.pm
32413240
32423241 # http://forum.chromefans.org/problem-with-span-p-and-style-display-none-t389.html
3243 - # $out_html .= "\n<span id='wait'><left><font color='green'><b>" . $out_rendering . "</b></font></left><p></span>\n" ;
 3242+ # $out_html .= "\n<span id='wait'><left><font color='green'><b>" . $out_rendering . "</b></font></left><p></span>\n" ;
32443243
3245 - if ($pageviews_mobile)
 3244+ my $coverage ;
 3245+ if ($mode_wp)
32463246 {
3247 - $href_normalized = 'TablesPageViewsMonthlyMobile.htm' ;
3248 - $href_not_normalized = 'TablesPageViewsMonthlyOriginalMobile.htm' ;
 3247+ if ($region eq '') { $coverage1 = "<font color=#000080>Wikipedia All Languages, </font>" ; }
 3248+ elsif ($region eq 'artificial') { $coverage1 = "<font color=#000080>Wikipedia Artificial Languages, </font>" ; }
 3249+ else { $coverage1 = "<font color=#000080>Wikipedia " . ucfirst $region . ", </font>" ; }
32493250 }
3250 - else
 3251+ elsif ($mode_wx) { $coverage = "<font color=#000080>Other Projects, </font>" ; }
 3252+ else { $coverage = "<font color=#000080>$out_publication, </font>" ; }
 3253+
 3254+ if ($pageviews_mobile) { $coverage2 = "<font color=#000080>Mobile, </font>" ; }
 3255+ elsif ($pageviews_non_mobile) { $coverage2 = "<font color=#000080>Non-Mobile, </font>" ; }
 3256+ if ($pageviews_combined) { $coverage2 = "<font color=#000080>All Platforms, </font>" ; }
 3257+
 3258+ ($coverage2b = $coverage) =~ s/<[^>]*>//g ;;
 3259+
 3260+ if ($normalize_days_per_month)
32513261 {
3252 - $href_normalized = 'TablesPageViewsMonthly.htm' ;
3253 - $href_not_normalized = 'TablesPageViewsMonthlyOriginal.htm' ;
 3262+ $out_html .= "<p>View counts on this page have been normalized to months of 30 days, for fair comparison. " ;
 3263+ $raw_or_not = "Normalized, " ;
32543264 }
3255 -
3256 - if ($mode_wp)
 3265+ else
32573266 {
3258 - if ($pageviews_mobile)
3259 - { $out_html .= "<p>Switch to <a href='TablesPageViewsMonthly.htm'>regular (non-mobile) page views</a>" ; }
3260 - else
3261 - { $out_html .= "<p>Switch to <a href='TablesPageViewsMonthlyMobile.htm'>mobile page views</a>" . blank_text_after ("15/09/2010"," <b><font color=#008000>(June 2010: New)</font></b>") ; }
 3267+ $out_html .= "<p>View counts on this page have <font color=#FF0000><b>not</b></font> been normalized to months of 30 days. " ;
 3268+ $raw_or_not = "Raw Data, " ;
32623269 }
32633270
32643271 if ($normalize_days_per_month)
32653272 {
3266 - $out_html .= "<p>Switch to <a href='$href_not_normalized'>not normalized version</a>" ;
 3273+ $out_html .= "<p>Switch to $coverage1$coverage2<a href='$href_not_normalized'>Raw Data</a>" ;
32673274 $href_current_file = $href_normalized ;
32683275 }
32693276 else
32703277 {
3271 - $out_html .= "<p>For fairer comparison of monthly trends switch to <a href='$href_normalized'>normalized version</a>" ;
 3278+ $out_html .= "<p>Switch to $coverage1$coverage2<a href='$href_normalized'>Normalized</a> (for fairer comparison of monthly trends)" ;
32723279 $href_current_file = $href_not_normalized ;
32733280 }
32743281
32753282 if ($mode_wp)
32763283 {
3277 - $out_html .= "<p>Switch to " ;
3278 - if ($mode_wp && ($region ne ''))
3279 - { $out_html .= "<a href='http://stats.wikimedia.org/EN/$href_current_file'>all languages</a>, " ; }
3280 - if ($mode_wp && ($region ne 'africa'))
3281 - { $out_html .= "<a href='http://stats.wikimedia.org/EN_Africa/$href_current_file'>Africa</a>, " ; }
3282 - if ($mode_wp && ($region ne 'asia'))
3283 - { $out_html .= "<a href='http://stats.wikimedia.org/EN_Asia/$href_current_file'>Asia</a>, " ; }
3284 - if ($mode_wp && ($region ne 'america'))
3285 - { $out_html .= "<a href='http://stats.wikimedia.org/EN_America/$href_current_file'>America's</a>, " ; }
3286 - if ($mode_wp && ($region ne 'europe'))
3287 - { $out_html .= "<a href='http://stats.wikimedia.org/EN_Europe/$href_current_file'>Europe</a>, " ; }
3288 - if ($mode_wp && ($region ne 'india'))
3289 - { $out_html .= "<a href='http://stats.wikimedia.org/EN_India/$href_current_file'>India</a>, " ; }
3290 - if ($mode_wp && ($region ne 'oceania'))
3291 - { $out_html .= "<a href='http://stats.wikimedia.org/EN_Oceania/$href_current_file'>Oceania</a>, " ; }
3292 - if ($mode_wp && ($region ne 'artificial'))
3293 - { $out_html .= "<a href='http://stats.wikimedia.org/EN_Artificial/$href_current_file'>artificial languages</a>, " ; }
 3284+ if ($pageviews_mobile)
 3285+ { $out_html .= "<p>Switch to $coverage1$raw_or_not<a href='TablesPageViewsMonthly.htm'>Non-Mobile</a>, " .
 3286+ "<a href='TablesPageViewsMonthlyCombined.htm'>All Platforms</a>" ; }
 3287+ elsif ($pageviews_non_mobile)
 3288+ { $out_html .= "<p>Switch to $coverage1$raw_or_not<a href='TablesPageViewsMonthlyMobile.htm'>Mobile</a>, " .
 3289+ " <a href='TablesPageViewsMonthlyCombined.htm'>All Platforms</a>" ; }
 3290+ else
 3291+ { $out_html .= "<p>Switch to $coverage1$raw_or_not<a href='TablesPageViewsMonthly.htm'>Non-Mobile</a>, " .
 3292+ " <a href='TablesPageViewsMonthlyMobile.htm'>Mobile</a>" ; }
 3293+ }
 3294+
 3295+ $out_html .= "<p>Stay with $coverage2${raw_or_not}but ..." ;
 3296+ if ($mode_wp)
 3297+ {
 3298+ $root = $testmode ? ".." : "../.." ;
 3299+
 3300+ $out_html .= "<p>Switch to Wikipedia " ;
 3301+ if ($region ne '') { $out_html .= "<a href='$root/EN/$href_current_file'>All Languages</a>, " ; }
 3302+ if ($region ne 'africa') { $out_html .= "<a href='$root/EN_Africa/$href_current_file'>Africa</a>, " ; }
 3303+ if ($region ne 'asia') { $out_html .= "<a href='$root/EN_Asia/$href_current_file'>Asia</a>, " ; }
 3304+ if ($region ne 'america') { $out_html .= "<a href='$root/EN_America/$href_current_file'>America's</a>, " ; }
 3305+ if ($region ne 'europe') { $out_html .= "<a href='$root/EN_Europe/$href_current_file'>Europe</a>, " ; }
 3306+ if ($region ne 'india') { $out_html .= "<a href='$root/EN_India/$href_current_file'>India</a>, " ; }
 3307+ if ($region ne 'oceania') { $out_html .= "<a href='$root/EN_Oceania/$href_current_file'>Oceania</a>, " ; }
 3308+ if ($region ne 'artificial') { $out_html .= "<a href='$root/EN_Artificial/$href_current_file'>Artificial Languages</a>" ; }
32943309 $out_html =~ s/, $// ;
32953310 }
 3311+
 3312+ # if ($pageviews_non_mobile)
 3313+ # {
 3314+ $root = $mode_wp ? ".." : "../.." ;
 3315+
 3316+ $out_xref = "<a href='$root/EN/TablesPageViewsMonthlyAllProjects.htm'>All projects, </a>\n" ;
 3317+ $mode_wb ? ($out_xref .= "Wikibooks, ") : ($out_xref .= "<a href='$root/wikibooks/EN/$href_current_file2'>Wikibooks, </a>\n") ;
 3318+ $mode_wk ? ($out_xref .= "Wiktionary, ") : ($out_xref .= "<a href='$root/wiktionary/EN/$href_current_file2'>Wiktionaries, </a>\n") ;
 3319+ $mode_wn ? ($out_xref .= "Wikinews, ") : ($out_xref .= "<a href='$root/wikinews/EN/$href_current_file2'>Wikinews, </a>\n") ;
 3320+ $mode_wp ? ($out_xref .= "Wikipedia, ") : ($out_xref .= "<a href='$root/EN/$href_current_file2'>Wikipedias, </a>\n") ;
 3321+ $mode_wq ? ($out_xref .= "Wikiquote, ") : ($out_xref .= "<a href='$root/wikiquote/EN/$href_current_file2'>Wikiquotes, </a>\n") ;
 3322+ $mode_ws ? ($out_xref .= "Wikisource, ") : ($out_xref .= "<a href='$root/wikisource/EN/$href_current_file2'>Wikisources, </a>\n") ;
 3323+ $mode_wv ? ($out_xref .= "Wikiversity, ") : ($out_xref .= "<a href='$root/wikiversity/EN/$href_current_file2'>Wikiversities, </a>\n") ;
 3324+ $mode_wx ? ($out_xref .= "Wikispecial") : ($out_xref .= "<a href='$root/wikispecial/EN/$href_current_file2'>Wikispecial</a>\n") ;
 3325+ $out_html .= "<p>Switch to All Platforms, $coverage2 " . $out_xref ;
 3326+ # }
 3327+
 3328+
 3329+ $out_html .= "<p><font color=#A00000>Warning: page view counts from Nov 2009 till March 2010 are 10% to 20% too low, due to server overload.</font> " ;
 3330+ # "In July 2010 is was established that the server that collects and aggregates log data for all squids could not keep up with all incoming messages, and hence underreported page views. " .
 3331+ # "This issue has been resolved. For April - July 2010 the amount of underreporting could be inferred from still available log files and counts for these months have been corrected (read <a href='http://infodisiac.com/blog/wp-content/uploads/2010/07/assessment.pdf'>more</a>). For earlier months, possibly from Nov 2009 till March 2010 counts in the table below are too low.<p>" .
 3332+ # "<hr>" ;
32963333 }
32973334
32983335 $out_html .= "<table border=1 cellspacing=0 id='table1' class=b style='margin-top:5px; border:solid 1px #000000' summary='Header comparison table'>\n" ;
@@ -3327,9 +3364,8 @@
33283365
33293366 &GenerateComparisonTableMonthlyData (ord (&yyyymm2b (2001,1)), $f, 0, 999, $true, $true) ;
33303367
3331 - if ($pageviews)
3332 - { # &GenerateComparisonTableMaxData (0) ;
3333 - }
 3368+# if ($pageviews)
 3369+# { &GenerateComparisonTableMaxData (0) ; }
33343370
33353371
33363372 # $line_languages =~ s/<\/?a[^>]*>//g ;
@@ -3537,7 +3573,7 @@
35383574 "{ document.write (\"$legend\"); } \n" .
35393575 "<\/script>\n" ;
35403576
3541 - $out_html =~ s/(<hr[^>]*>)/$legend$out_xref$1/ ;
 3577+ # $out_html =~ s/(<hr[^>]*>)/$legend$out_xref$1/ ;
35423578 }
35433579 }
35443580
@@ -3557,14 +3593,17 @@
35583594
35593595 sub GenerateComparisonTablePageviewsAllProjects
35603596 {
3561 - my ($filter_source_normalized) = @_ ;
 3597+ my ($normalized) = @_ ;
35623598
3563 - return if $pageviews_mobile ;
 3599+ return if ! $pageviews_combined ;
35643600 return if $mode ne 'wp' ; # test here to keep calling code simple
35653601 return if ! $wikimedia ; # test here to keep calling code simple
35663602
3567 - &LogT ("\nGenerateComparisonTablePageviewsAllProjects $filter_source_normalized") ;
 3603+ my $javascript_ = $javascript ;
 3604+ my $javascript = $true ;
35683605
 3606+ &LogT ("GenerateComparisonTablePageviewsAllProjects\n") ;
 3607+
35693608 $legend = "<table border='0'><tr><td valign=bottom><table border=1 cellspacing=0 id='legend' class=b style='margin-top:5px; border:solid 1px #000000'><tr>" .
35703609 &TdBgColor ('I', '-50%') .
35713610 &TdBgColor ('I', '-40%') .
@@ -3580,8 +3619,9 @@
35813620 "</tr></table></td><td valign=bottom><table border='0'><tr><td>Percentage increase or decrease compared to previous $period</td></tr></table></td><tr></table>" ;
35823621
35833622 $pageviews_all_projects = $true ;
3584 - &GenerateHtmlStartComparisonTables ;
35853623
 3624+ my $content = &GenerateHtmlStartComparisonTables (-1, $normalized) ;
 3625+
35863626 if ($javascript)
35873627 {
35883628 $legend = "\n<script language='javascript'>\n" .
@@ -3591,9 +3631,6 @@
35923632 $out_html =~ s/(<hr[^>]*>)/$legend$1/ ;
35933633 }
35943634
3595 - $javascript_ = $javascript ;
3596 - $javascript = $true ;
3597 -
35983635 # if ($pageviews_mobile)
35993636 # {
36003637 # $out_html .= "<h3>Page views per project per month (mobile site) <font color=#A0A0A0>(plus links to edit trends)</font></h3>" ;
@@ -3607,41 +3644,101 @@
36083645
36093646 # $out_html .= "<p><b><font color='#600000'>Everything on this page is about page views, except links named 'Edit Trend'</font></b>" ;
36103647
3611 - $out_html .= "<b><font color=#A00000>Warning: page view counts from Nov 2009 till March 2010 are too low.</font></b> " .
3612 - "In July 2010 is was established that the server that collects and aggregates log data for all squids could not keep up with all incoming messages, and hence underreported page views. " .
3613 - "This issue has been resolved. For April - July 2010 the amount of underreporting could be inferred from still available log files and counts for these months have been corrected (read <a href='http://infodisiac.com/blog/wp-content/uploads/2010/07/assessment.pdf'>more</a>). For earlier months, possibly from Nov 2009 till March 2010 counts in the table below are too low.<hr><p>" ;
 3648+ $out_html .= "$legend_pageviews_monthly<hr>\n";
36143649
3615 - $out_html .= $legend_pageviews_monthly ;
 3650+ $out_html .= "<p><h2>$content</h2><p>" ;
36163651
3617 - if ($pageviews_mobile)
 3652+# if ($pageviews_mobile)
 3653+# {
 3654+# $href_normalized = 'TablesPageViewsMonthlyAllProjectsMobile.htm' ;
 3655+# $href_not_normalized = 'TablesPageViewsMonthlyAllProjectsOriginalMobile.htm' ;
 3656+# }
 3657+# else
 3658+# {
 3659+# $href_normalized = 'TablesPageViewsMonthlyAllProjects.htm' ;
 3660+# $href_not_normalized = 'TablesPageViewsMonthlyAllProjectsOriginal.htm' ;
 3661+# }
 3662+
 3663+ $href_normalized = 'TablesPageViewsMonthlyAllProjects.htm' ;
 3664+ $href_not_normalized = 'TablesPageViewsMonthlyAllProjectsOriginal.htm' ;
 3665+
 3666+ if ($normalized)
36183667 {
3619 - $href_normalized = 'TablesPageViewsMonthlyAllProjectsMobile.htm' ;
3620 - $href_not_normalized = 'TablesPageViewsMonthlyAllProjectsOriginalMobile.htm' ;
 3668+ $out_html .= "<p>View counts on this page have been normalized to months of 30 days, for fair comparison.." .
 3669+ "<p>Switch to All Projects, All Platforms, <a href='$href_not_normalized'>Raw Data.</a>" ;
 3670+ $coverage3 = "Normalized, " ;
36213671 }
36223672 else
36233673 {
3624 - $href_normalized = 'TablesPageViewsMonthlyAllProjects.htm' ;
3625 - $href_not_normalized = 'TablesPageViewsMonthlyAllProjectsOriginal.htm' ;
 3674+ $out_html .= "<p>View counts on this page have <font color=#FF0000><b>not</b></font> been normalized to months of 30 days..<p>" .
 3675+ "<p>Switch to All Projects, All Platforms, <a href='$href_normalized'>Normalized</a>." ;
 3676+ $coverage3 = "Raw Data, " ;
36263677 }
36273678
3628 - if ($filter_source_normalized =~ /not-normalized/)
3629 - { $out_html .= "<p><b><font color=#800000>View counts on this page have <font color=#FF0000>not</font> been normalized to months of 30 days.</font></b>. " .
3630 - "For fairer comparison of monthly trends <a href='$href_normalized'>switch to normalized version</a>." ; }
3631 - else
3632 - { $out_html .= "<p><b><font color=#008000>View counts on this page have been normalized to months of 30 days, for fair comparison.</font></b>. " .
3633 - "<a href='$href_not_normalized'>Switch to not normalized version.</a>" ; }
 3679+ $root = $testmode ? ".." : "../.." ;
36343680
 3681+ $out_html .= "<p>Switch to All Platforms, " ;
 3682+ $out_html .= "<a href='$root/wikibooks/EN/$href_current_file'>Wikibooks, </a>\n" ;
 3683+ $out_html .= "<a href='$root/wiktionary/EN/$href_current_file'>Wiktionaries, </a>\n" ;
 3684+ $out_html .= "<a href='$root/wikinews/EN/$href_current_file'>Wikinews, </a>\n" ;
 3685+ $out_html .= "<a href='$root/EN/$href_current_file'>Wikipedias, </a>\n" ;
 3686+ $out_html .= "<a href='$root/wikiquote/EN/$href_current_file'>Wikiquotes, </a>\n" ;
 3687+ $out_html .= "<a href='$root/wikisource/EN/$href_current_file'>Wikisources, </a>\n" ;
 3688+ $out_html .= "<a href='$root/wikiversity/EN/$href_current_file'>Wikiversities, </a>\n" ;
 3689+ $out_html .= "<a href='$root/wikispecial/EN/$href_current_file'>Wikispecial</a>\n" ;
 3690+
 3691+ $out_html .= "<p><font color=#A00000>Warning: page view counts from Nov 2009 till March 2010 are 10% to 20% too low due to server overload.</font> " ;
 3692+ # "In July 2010 is was established that the server that collects and aggregates log data for all squids could not keep up with all incoming messages, and hence underreported page views. " .
 3693+ # "This issue has been resolved. For April - July 2010 the amount of underreporting could be inferred from still available log files and counts for these months have been corrected (read <a href='http://infodisiac.com/blog/wp-content/uploads/2010/07/assessment.pdf'>more</a>). For earlier months, possibly from Nov 2009 till March 2010 counts in the table below are too low." .
 3694+ # "<hr><p>" ;
 3695+
36353696 $out_html .= "<table border=1 cellspacing=0 id='table1' class=b style='margin-top:5px; border:solid 1px #000000' summary='Header comparison table'>\n" ;
36363697
36373698 &Log ("\n") ;
3638 - my ($key1,$key2,$html) ;
 3699+ my ($topic,$id,$html) ;
36393700 $month_lo_pageviews = 999 ;
 3701+ # add data for non-mobile projects
36403702 foreach $code (qw (wb wk wn wp wq ws wv wx)) # in case of wx (wikispecial), file contains only results for commons
36413703 {
36423704 $path_in_views = $path_in ;
36433705 $path_in_views =~ s/csv_$mode/csv_$code/ ;
 3706+
36443707 $file_views_html = "$path_in_views/PageViewsPerMonthHtmlAllProjects.csv" ;
 3708+ if (! -e $file_views_html)
 3709+ { print "$file_views_html not found\n" ; next ; }
36453710
 3711+ $str_normalized = $normalized ? 'normalized' : 'not-normalized' ;
 3712+
 3713+ print "Read $file_views_html\n" ;
 3714+ open CSV, '<', $file_views_html ;
 3715+ while ($line = <CSV>)
 3716+ {
 3717+ next if $line !~ /^non-mobile,$str_normalized,/ ;
 3718+ chomp $line ;
 3719+ ($dummy1,$dummy2,$topic,$id,$html) = split (',', $line) ;
 3720+ $html =~ s/&comma;/,/g ;
 3721+ $html =~ s/&linebreak;/\n/g ;
 3722+
 3723+ # find first and last month to show
 3724+ if ($id =~ /header_\d+/)
 3725+ {
 3726+ ($id2 = $id) =~ s/[^\d]//g ;
 3727+ if ($id2 < $month_lo_pageviews)
 3728+ { $month_lo_pageviews = $id2 ; }
 3729+ if ($id2 > $month_hi_pageviews)
 3730+ { $month_hi_pageviews = $id2 ; }
 3731+ }
 3732+ $html_pageviews_all_projects {"$code,$topic,$id"} = $html ;
 3733+ }
 3734+ }
 3735+
 3736+ # add data for mobile wikipedia
 3737+ foreach $code (qw (wp))
 3738+ {
 3739+ $path_in_views = $path_in ;
 3740+ $path_in_views =~ s/csv_$mode/csv_$code/ ;
 3741+
 3742+ $file_views_html = "$path_in_views/PageViewsPerMonthHtmlAllProjects.csv" ;
36463743 if (! -e $file_views_html)
36473744 { print "$file_views_html not found\n" ; next ; }
36483745
@@ -3649,58 +3746,96 @@
36503747 open CSV, '<', $file_views_html ;
36513748 while ($line = <CSV>)
36523749 {
3653 - next if $line !~ /^$filter_source_normalized/ ;
 3750+
 3751+ next if $line !~ /^mobile,$str_normalized/ ;
36543752 chomp $line ;
3655 - $line =~ s/$filter_source_normalized,// ;
3656 - ($key1,$key2,$html) = split (',', $line) ;
 3753+ ($dummy1,$dummy2,$topic,$id,$html) = split (',', $line) ;
36573754 $html =~ s/&comma;/,/g ;
3658 - # &Log ("< $line = $key1 $key2 $html\n") ;
36593755 $html =~ s/&linebreak;/\n/g ;
36603756
3661 - if ($key2 =~ /header_\d+/)
 3757+ # find first and last month to show
 3758+ if ($id =~ /header_\d+/)
36623759 {
3663 - ($key2b = $key2) =~ s/[^\d]//g ;
3664 - if ($key2b < $month_lo_pageviews)
3665 - { $month_lo_pageviews = $key2b ; }
3666 - if ($key2b > $month_hi_pageviews)
3667 - { $month_hi_pageviews = $key2b ; }
 3760+ ($id2 = $id) =~ s/[^\d]//g ;
 3761+ if ($id2 < $month_lo_pageviews)
 3762+ { $month_lo_pageviews = $id2 ; }
 3763+ if ($id2 > $month_hi_pageviews)
 3764+ { $month_hi_pageviews = $id2 ; }
36683765 }
3669 - $html_pageviews_all_projects {"$code,$key1,$key2"} = $html ;
3670 - $html_row_keys_all_projects {$key1}++ ;
 3766+ $html_pageviews_all_projects {"$code.m,$topic,$id"} = $html ;
36713767 }
36723768 }
3673 - print "\n\$month_hi_pageviews $month_hi_pageviews\n" ;
3674 - print "\$month_lo_pageviews $month_lo_pageviews\n" ;
36753769
 3770+ # add data for all platforms for wikipedia
 3771+ foreach $code (qw (wp))
 3772+ {
 3773+ $path_in_views = $path_in ;
 3774+ $path_in_views =~ s/csv_$mode/csv_$code/ ;
 3775+
 3776+ $file_views_html = "$path_in_views/PageViewsPerMonthHtmlAllProjects.csv" ;
 3777+ if (! -e $file_views_html)
 3778+ { print "$file_views_html not found\n" ; next ; }
 3779+
 3780+ print "Read $file_views_html\n" ;
 3781+ open CSV, '<', $file_views_html ;
 3782+ while ($line = <CSV>)
 3783+ {
 3784+
 3785+ next if $line !~ /^combined,$str_normalized/ ;
 3786+ chomp $line ;
 3787+ ($dummy1,$dummy2,$topic,$id,$html) = split (',', $line) ;
 3788+ $html =~ s/&comma;/,/g ;
 3789+ $html =~ s/&linebreak;/\n/g ;
 3790+
 3791+ # find first and last month to show
 3792+ if ($id =~ /header_\d+/)
 3793+ {
 3794+ ($id2 = $id) =~ s/[^\d]//g ;
 3795+ if ($id2 < $month_lo_pageviews)
 3796+ { $month_lo_pageviews = $id2 ; }
 3797+ if ($id2 > $month_hi_pageviews)
 3798+ { $month_hi_pageviews = $id2 ; }
 3799+ }
 3800+ $html_pageviews_all_projects {"$code.c,$topic,$id"} = $html ;
 3801+ }
 3802+ }
 3803+
 3804+ # print "\n\$month_hi_pageviews $month_hi_pageviews\n" ; # qqq
 3805+ # print "\$month_lo_pageviews $month_lo_pageviews\n" ;
 3806+
36763807 $out_html .= "<p>\n\n<table border=1>\n" ;
36773808
36783809 my %project_names ;
3679 - $project_names {'wb'} = 'wikibooks' ;
3680 - $project_names {'wk'} = 'wiktionary' ;
3681 - $project_names {'wn'} = 'wikinews' ;
3682 - $project_names {'wp'} = 'wikipedia' ;
3683 - $project_names {'wq'} = 'wikiquote' ;
3684 - $project_names {'ws'} = 'wikisource' ;
3685 - $project_names {'wv'} = 'wikiversity' ;
3686 - $project_names {'wx'} = 'commons' ;
 3810+ $project_names {'wb'} = 'wikibooks' ;
 3811+ $project_names {'wk'} = 'wiktionary' ;
 3812+ $project_names {'wn'} = 'wikinews' ;
 3813+ $project_names {'wp'} = 'wikipedia<br>non-mobile' ;
 3814+ $project_names {'wp.m'} = 'wikipedia<br>mobile' ;
 3815+ $project_names {'wp.c'} = 'wikipedia<br>total' ;
 3816+ $project_names {'wq'} = 'wikiquote' ;
 3817+ $project_names {'ws'} = 'wikisource' ;
 3818+ $project_names {'wv'} = 'wikiversity' ;
 3819+ $project_names {'wx'} = 'commons' ;
36873820
36883821 $url_report_pageviews {'wb'} = "http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm" ;
36893822
36903823 $line_html = &the;
3691 - foreach $code (qw (wb wk wn wp wq ws wv wx))
 3824+ foreach $code (qw (wb wk wn wp wp.m wp.c wq ws wv wx))
36923825 {
3693 - if ($pageviews_normal)
 3826+ if ($pageviews_non_mobile)
36943827 {
36953828 $root = "http://stats.wikimedia.org" ;
36963829
3697 - if ($code eq 'wb') { $link = "<a href='$root/wikibooks/EN/TablesPageViewsMonthly.htm'>Wikibooks</a>" ; }
3698 - if ($code eq 'wk') { $link = "<a href='$root/wiktionary/EN/TablesPageViewsMonthly.htm'>Wiktionaries</a>" ; }
3699 - if ($code eq 'wn') { $link = "<a href='$root/wikinews/EN/TablesPageViewsMonthly.htm'>Wikinews</a>" ; }
3700 - if ($code eq 'wp') { $link = "<a href='$root/EN/TablesPageViewsMonthly.htm'>Wikipedia</a>" ; }
3701 - if ($code eq 'wq') { $link = "<a href='$root/wikiquote/EN/TablesPageViewsMonthly.htm'>Wikiquote</a>" ; }
3702 - if ($code eq 'ws') { $link = "<a href='$root/wikisource/EN/TablesPageViewsMonthly.htm'>Wikisource</a>" ; }
3703 - if ($code eq 'wv') { $link = "<a href='$root/wikiversity/EN/TablesPageViewsMonthly.htm'>Wikiversity</a>" ; }
3704 - if ($code eq 'wx') { $link = "<a href='$root/wikispecial/EN/TablesPageViewsMonthly.htm'>Commons</a>" ; }
 3830+ if ($code eq 'wb') { $link = "<a href='$root/wikibooks/EN/TablesPageViewsMonthly.htm'>Wikibooks<br>&nbsp;</a>" ; }
 3831+ if ($code eq 'wk') { $link = "<a href='$root/wiktionary/EN/TablesPageViewsMonthly.htm'>Wiktionaries<br>&nbsp;</a>" ; }
 3832+ if ($code eq 'wn') { $link = "<a href='$root/wikinews/EN/TablesPageViewsMonthly.htm'>Wikinews<br>&nbsp;</a>" ; }
 3833+ if ($code eq 'wp') { $link = "<a href='$root/EN/TablesPageViewsMonthly.htm'>Wikipedia<br>Non-mobile</a>" ; }
 3834+ if ($code eq 'wp.m') { $link = "<a href='$root/EN/TablesPageViewsMonthlyMobile.htm'>Wikipedia<br>Mobile</a>" ; }
 3835+ if ($code eq 'wp.c') { $link = "<a href='$root/EN/TablesPageViewsMonthlyMobile.htm'>Wikipedia<br>Total</a>" ; }
 3836+ if ($code eq 'wq') { $link = "<a href='$root/wikiquote/EN/TablesPageViewsMonthly.htm'>Wikiquote<br>&nbsp;</a>" ; }
 3837+ if ($code eq 'ws') { $link = "<a href='$root/wikisource/EN/TablesPageViewsMonthly.htm'>Wikisource<br>&nbsp;</a>" ; }
 3838+ if ($code eq 'wv') { $link = "<a href='$root/wikiversity/EN/TablesPageViewsMonthly.htm'>Wikiversity<br>&nbsp;</a>" ; }
 3839+ if ($code eq 'wx') { $link = "<a href='$root/wikispecial/EN/TablesPageViewsMonthly.htm'>Commons<br>&nbsp;</a>" ; }
37053840 $line_html .= &th("&nbsp;$link&nbsp;") ;
37063841 }
37073842 else
@@ -3709,33 +3844,36 @@
37103845 $out_html .= &tr ($line_html) ;
37113846
37123847 $line_html = &the ;
3713 - foreach $code (qw (wb wk wn wp wq ws wv wx))
 3848+ foreach $code (qw (wb wk wn wp wp.m wq ws wv wx))
37143849 {
3715 - if ($code eq 'wb') { $link = "$root/wikibooks/EN/PlotEditsZZ.png" ; }
3716 - if ($code eq 'wk') { $link = "$root/wiktionary/EN/PlotEditsZZ.png" ; }
3717 - if ($code eq 'wn') { $link = "$root/wikinews/EN/PlotEditsZZ.png" ; }
3718 - if ($code eq 'wp') { $link = "$root/EN/PlotEditsZZ.png" ; }
3719 - if ($code eq 'wq') { $link = "$root/wikiquote/EN/PlotEditsZZ.png" ; }
3720 - if ($code eq 'ws') { $link = "$root/wikisource/EN/PlotEditsZZ.png" ; }
3721 - if ($code eq 'wv') { $link = "$root/wikiversity/EN/PlotEditsZZ.png" ; }
3722 - if ($code eq 'wx') { $link = "$root/wikispecial/EN/PlotEditsZZ.png" ; }
 3850+ if ($code eq 'wb') { $link = "$root/wikibooks/EN/PlotEditsZZ.png" ; }
 3851+ if ($code eq 'wk') { $link = "$root/wiktionary/EN/PlotEditsZZ.png" ; }
 3852+ if ($code eq 'wn') { $link = "$root/wikinews/EN/PlotEditsZZ.png" ; }
 3853+ if ($code eq 'wp') { $link = "$root/EN/PlotEditsZZ.png" ; }
 3854+ if ($code eq 'wp.m') { $link = "$root/EN/PlotEditsZZ.png" ; }
 3855+ if ($code eq 'wp.c') { $link = "$root/EN/PlotEditsZZ.png" ; }
 3856+ if ($code eq 'wq') { $link = "$root/wikiquote/EN/PlotEditsZZ.png" ; }
 3857+ if ($code eq 'ws') { $link = "$root/wikisource/EN/PlotEditsZZ.png" ; }
 3858+ if ($code eq 'wv') { $link = "$root/wikiversity/EN/PlotEditsZZ.png" ; }
 3859+ if ($code eq 'wx') { $link = "$root/wikispecial/EN/PlotEditsZZ.png" ; }
37233860 $line_html .= &tdcb ("<a href='$link'>Edit Trends</a>") ;
37243861 }
37253862 $out_html .= &tr ($line_html) ;
37263863
3727 - foreach $key1 (qw (year_trend view_rates sparklines forecast forecast2))
 3864+ foreach $topic (qw (year_trend view_rates sparklines forecast forecast2))
37283865 {
3729 - $line_html = $html_pageviews_all_projects {"wp,$key1,header"} ;
 3866+ $line_html = $html_pageviews_all_projects {"wp,$topic,header"} ;
37303867
3731 - if ($key1 =~ /year_trend|forecast/)
 3868+ if ($topic =~ /year_trend|forecast/)
37323869 { $line_html .= "\n<script language='javascript'>\n" ; }
37333870
3734 - foreach $code (qw (wb wk wn wp wq ws wv wx))
 3871+ foreach $code (qw (wb wk wn wp wp.m wp.c wq ws wv wx))
37353872 {
3736 - $cell_html = $html_pageviews_all_projects {"$code,$key1,data"} ;
 3873+ $cell_html = $html_pageviews_all_projects {"$code,$topic,data"} ;
 3874+
37373875 if ($cell_html eq '')
37383876 {
3739 - if ($key1 =~ /year_trend|forecast|forecast2/)
 3877+ if ($topic =~ /year_trend|forecast|forecast2/)
37403878 { $cell_html = "tdg('');" ; }
37413879 else
37423880 { $cell_html = "<td>&nbsp;</td>" ; }
@@ -3743,7 +3881,7 @@
37443882 $line_html .= $cell_html ;
37453883 }
37463884
3747 - if ($key1 =~ /year_trend|forecast|forecast2/)
 3885+ if ($topic =~ /year_trend|forecast|forecast2/)
37483886 { $line_html .= "\n</script>\n" ; }
37493887
37503888 $out_html .= &tr ($line_html) ;
@@ -3755,7 +3893,7 @@
37563894 next if $line_html eq '' ;
37573895
37583896 $line_html .= "\n<script language='javascript'>\n" ;
3759 - foreach $code (qw (wb wk wn wp wq ws wv wx))
 3897+ foreach $code (qw (wb wk wn wp wp.m wp.c wq ws wv wx))
37603898 {
37613899 $cell_html = $html_pageviews_all_projects {"$code,monthly,data_$m"} ;
37623900 if ($cell_html eq '')
@@ -3767,11 +3905,11 @@
37683906 }
37693907
37703908 $line_html = &the;
3771 - foreach $code (qw (wb wk wn wp wq ws wv wx))
 3909+ foreach $code (qw (wb wk wn wp wp.m wp.c wq ws wv wx))
37723910 { $line_html .= &th('&nbsp;'. ucfirst($project_names {$code}).'&nbsp;') ; }
37733911 $out_html .= &tr ($line_html) ;
37743912
3775 - # &GenerateComparisonTableEditPlots ;
 3913+# &GenerateComparisonTableEditPlots ;
37763914 # &GenerateComparisonTableYearlyGrowth (0) ;
37773915 # &GenerateComparisonTableViewRates (0) ;
37783916 # &GenerateComparisonTableSparklinesWithBars ;
@@ -3788,14 +3926,18 @@
37893927
37903928 $out_html .= "</body>\n</html>" ;
37913929
3792 - if ($filter_source_normalized =~ /not-normalized/)
 3930+ if ($normalized)
 3931+ { $file_html = $path_out . "TablesPageViewsMonthlyAllProjects.htm" ; }
 3932+ else
37933933 { $file_html = $path_out . "TablesPageViewsMonthlyAllProjectsOriginal.htm" ; }
3794 - else
3795 - { $file_html = $path_out . "TablesPageViewsMonthlyAllProjects.htm" ; }
37963934
 3935+ print "HTML FILE $file_html\n\n" ;
 3936+
37973937 open "FILE_HTML", ">", $file_html ;
37983938 print FILE_HTML &AlignPerLanguage ($out_html) ;
37993939 close "FILE_HTML" ;
 3940+
 3941+ $javascript = $javascript_ ;
38003942 }
38013943
38023944 sub BgColor
@@ -4468,7 +4610,6 @@
44694611
44704612 for (my $m = $dumpmonth_ord ; $m >= $m0 ; $m--)
44714613 {
4472 -
44734614 $line_html = '' ;
44744615 if (($m % 12 == 0) && ($m < $dumpmonth_ord) && ($m > $m0))
44754616 {
@@ -4555,6 +4696,8 @@
45564697 }
45574698 }
45584699
 4700+# print "$wp $m $value $total\n" ; # qqq
 4701+
45594702 if (($m < $MonthlyStatsWpStop {"zz"}) &&
45604703 (($m == $MonthlyStatsWpStop {$wp}) && $MonthlyStatsWpIncomplete {$wp}))
45614704 { $value = 0 ; }
Index: trunk/wikistats/dumps/WikiReportsOutputPageViews.pm
@@ -8,7 +8,8 @@
99 # "<script language=\"javascript\" type=\"text/javascript\" src=\"..\/jquery-1.2.6.min.js\"></script>\n" .
1010 # "<script language=\"javascript\" type=\"text/javascript\" src=\"..\/jquery.sparkline.js\"></script>\n" ;
1111
12 - $legend_pageviews_monthly = "<table><tr>" ;
 12+# $legend_pageviews_monthly = "<table><tr><td valign=center><b>Legend</b></td><td>" ;
 13+ $legend_pageviews_monthly = "<table><tr>" ;
1314 $legend_pageviews_monthly .= &th ("<span class=d1>Yearly trend</span>") ;
1415 $legend_pageviews_monthly .= &td ("<table width=100% cellspacing=0 class=b style='margin-top:5px; border:solid 1px #000000'><tr><td class=cb bgcolor=#FFFFFF><span class=d1><b>r</b> th</span><br><span class=d2>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<b>i</b> %</span></td></tr></table>") ;
1516 $legend_pageviews_monthly .= &td ("&nbsp;e.g.&nbsp;") ;
@@ -23,6 +24,7 @@
2425 $legend_pageviews_monthly .= &td ("&nbsp;&nbsp;&nbsp;") ;
2526 $legend_pageviews_monthly .= &td ("<span class=d1><b>c</b>%=change compared to previous month, &nbsp;<b>s</b>%=share of page views for this language, &nbsp;<b>r</b> th=rank this month<br>v=page views this month ($out_million = 10<sup>6</sup>, $out_thousand = 10<sup>3</sup>)</span>") ;
2627 $legend_pageviews_monthly .= "</tr></table>" ;
 28+# $legend_pageviews_monthly .= "</td></tr></table>" ;
2729
2830 &GenerateComparisonTable (0) ;
2931
@@ -44,20 +46,27 @@
4547 }
4648
4749 my $file_html ;
48 - if ($pageviews_normal)
 50+ if ($pageviews_non_mobile)
4951 {
5052 if ($normalize_days_per_month)
5153 { $file_html = $path_out . "TablesPageViewsMonthly.htm" ; }
5254 else
5355 { $file_html = $path_out . "TablesPageViewsMonthlyOriginal.htm" ; }
5456 }
55 - else # $pageviews_mobile
 57+ elsif ($pageviews_mobile)
5658 {
5759 if ($normalize_days_per_month)
5860 { $file_html = $path_out . "TablesPageViewsMonthlyMobile.htm" ; }
5961 else
6062 { $file_html = $path_out . "TablesPageViewsMonthlyOriginalMobile.htm" ; }
6163 }
 64+ else # $pageviews_combined
 65+ {
 66+ if ($normalize_days_per_month)
 67+ { $file_html = $path_out . "TablesPageViewsMonthlyCombined.htm" ; }
 68+ else
 69+ { $file_html = $path_out . "TablesPageViewsMonthlyOriginalCombined.htm" ; }
 70+ }
6271
6372 print "\n\nFILE HTML '$file_html'\n\n" ;
6473
@@ -78,7 +87,8 @@
7988 else
8089 { return if $wp ne 'zz' and $wp ne '' ; } # test here to keep call tidy
8190
82 - return if ! $pageviews ; # test here to keep call tidy
 91+ return if ! $pageviews ; # test here to keep call tidy
 92+ return if $region ne '' ; # test here to keep call tidy
8393
8494 $data =~ s/,/&comma;/g ;
8595 $data =~ s/\n/&linebreak;/g ;
@@ -89,6 +99,7 @@
90100 $data =~ s/100.00\%// ;
91101 $data =~ s/--// ;
92102 }
 103+
93104 push @csv_pageviews_all_projects, "$keys_html_pageviews_all_projects,$keys,$data\n" ;
94105 # print "= $keys_html_pageviews_all_projects,,$keys,$data\n" ;
95106 }
@@ -96,8 +107,11 @@
97108 # write rendered html (header column and 'all languages' column = 'zz' = Sigma) for reuse on page views report for all projects
98109 sub WriteMonthlyStatsHtmlAllProjects
99110 {
100 - &LogT ("\nWriteMonthlyStatsHtmlAllProjects") ;
 111+ &LogT ("\nWriteMonthlyStatsHtmlAllProjects\n") ;
101112
 113+ return if ! $pageviews ; # test here to keep call tidy
 114+ return if $region ne '' ; # test here to keep call tidy
 115+
102116 my (@csv,$source, $normalized) ;
103117
104118 open CSV, '<', $file_csv_pageviewsmonthly_html || abort ("Could not open file $file_csv_pageviewsmonthly_html") ;
Index: trunk/wikistats/dumps/WikiReports_EN.pm
@@ -72,8 +72,10 @@
7373 $out_botactivity = "Bot activity" ; # new
7474 $out_stats_for = "Statistics for " ; # new
7575 $out_stats_per = "Statistics per " ; # new
76 -$out_documentation = "Documentation: see <a href='http://meta.wikipedia.org/wiki/Wikistats'>Meta</a>" ; #new
 76+#$out_documentation = "Documentation: see <a href='http://meta.wikipedia.org/wiki/Wikistats'>Meta</a>" ; #new
 77+$out_documentation = "Documentation" ;
7778 $out_scripts = "Scripts" ;
 79+$out_csv_files = "CSV files" ;
7880
7981 $out_gigabytes = "GB" ;
8082 $out_megabytes = "MB" ;
Index: trunk/wikistats/dumps/WikiReportsOutputMisc.pm
@@ -72,29 +72,118 @@
7373 }
7474 }
7575
 76+#Hi Erik;
 77+#I have found a cool script for taking snapshots of webpages. I saw some time ago in your website a gallery with screenshots of Wikipedia mainpages. I loved that, and I have been searching for a script which works in Linux (you used urlbmp.exe, i guess).
 78+#The script is from a bot of RationalWiki.[1] I have tested it in my Linux PC and works fine (I attached you the files). You can run it:
 79+#python snap.py http://en.wikipedia.org --geometry 1024 1 > a.png
 80+#I have installed pyqt4-dev-tools and pyqt-tools with apt-get. It would be nice if you can run it in a cronjob in the WMF servers.
 81+#I will try it in Toolserver, but previously, I have to request to an admin to install those packages.
 82+#Regards,
 83+#emijrp
 84+#[1] http://rationalwiki.org/wiki/User:Capturebot2
 85+#[2] http://rationalwiki.org/wiki/User:Capturebot2/webkit2png.py
 86+
7687 sub GenerateGallery
7788 {
78 - my $out_html = "<html><head><title>Wikipedia Main Page Gallery - screen shots taken on May 1, 2004</title></head>\n" .
79 - "<body bgcolor=black><table summary='Gallery'><tr>\n" ;
 89+ &LogT ("\nGenerate Gallery, mode $mode\n") ;
 90+ &LogT ("In: $path_in\n") ;
 91+ &LogT ("Out: $path_out\n") ;
 92+
 93+ my $languages = @languages - 1 ; # minus 'zz'
 94+ my $date = &GetDate(time) ;
 95+
 96+ my $mode2 = ucfirst ($mode) ;
 97+ my $out_publication2 = $out_publication ;
 98+
 99+ if ($mode_wx)
 100+ { $out_publication2 = "Wikimedia Miscellaneous Projects" ; }
 101+
 102+ my $description = " Screenshots of $languages $out_publication2 main pages, collected on $date, sorted by average page views per project</a>. See <a href='index.html'><font color=#A0A0D0>more screenshots</font></a>" ;
 103+ my $footer = "<small><font color=#A0A0A0> Screenshots collected with <a href='http://www.pixel-technology.com/freeware/url2bmp/'><font color=#A0A0D0>url2bmp.exe</font></a> (Windows freeware)<br>\n" .
 104+ " Please note: on a few pages javascript errors may have influenced page rendition<br>" .
 105+ " Script author: <a href='http://infodisiac.com'><font color=#A0A0D0>Erik Zachte</font></a></font></small>" ;
 106+ my $out_html = "<html><head><title>$out_publication2 Main Page Gallery - screen shots taken $date</title></head>\n" .
 107+ "<body bgcolor=black><small><font color=#C0C0C0>$description</font></small>" .
 108+ "<table summary='Gallery'><tr>\n" ;
 109+ my $out_html_40 = "<html><head><title>$out_publication2 Main Page Gallery - screen shots taken $date</title></head>\n" .
 110+ "<body bgcolor=black><small><font color=#C0C0C0>$description</font></small>" .
 111+ "<table summary='Gallery'><tr>\n" ;
 112+ my $out_html_1024_768 = "<html><head><title>$out_publication2 Main Page Gallery - screen shots taken $date</title></head>\n" .
 113+ "<body bgcolor=black><small><font color=#C0C0C0>$description</font></small>" .
 114+ "<table summary='Gallery'><tr>\n" ;
 115+ my $out_html_768_1024 = "<html><head><title>$out_publication2 Main Page Gallery - screen shots taken $date</title></head>\n" .
 116+ "<body bgcolor=black><small><font color=#C0C0C0>$description</font></small>" .
 117+ "<table summary='Gallery'><tr>\n" ;
 118+
80119 foreach $wp (@languages)
81120 {
82121 if ($wp eq "zz") { next ; }
83 - $out_bat .= "url2bmp.exe -url \"$wp.wikipedia.org\" -file \"$wp.png\" -format PNG -wx 1000 -wy 3000 -bx 400 -by 1200 -wait 1 -notinteractive\n" ;
 122+
 123+ $gallery_image_list .= "'wp_$wp.png', // " . $out_languages {$wp} . "\n" ;
 124+
 125+ my $base = &GetProjectBaseUrl ($wp) ;
 126+ &LogT ("Base: " . sprintf ("%-10s", $wp) . " -> $base\n") ;
 127+
 128+ ($wp2 = $wp) =~ s/_/-/g ;
 129+ $url2bmp++ ;
 130+ $wait1 = '' ;
 131+ $wait2 = '' ;
 132+ if ($url2bmp % 3 == 0)
 133+ { $wait1 = "wait 1" ; }
 134+
 135+ # Q&D: download twice until bath image resize figured out with either convert.exe or nconvert.exe
 136+ $out_bat .= "rem url2bmp.exe -url \"$base?country=xx\" -file \"${mode}_$wp2.png\" -format PNG -wx 1000 -wy 3000 -bx 1000 -by 3000 $wait1 -notinteractive\nrem $url2bmp/$languages\n" .
 137+ "rem url2bmp.exe -url \"$base?country=xx\" -file \"${mode}_${wp2}_40.png\" -format PNG -wx 1000 -wy 3000 -bx 250 -by 1200 $wait2 -notinteractive\nrem $url2bmp/$languages\n" .
 138+ " url2bmp.exe -url \"$base?country=xx\" -file \"${mode}_$wp2_768_1024.png\" -format PNG -wx 1024 -wy 768 -bx 1024 -by 768 $wait1 -notinteractive\nrem $url2bmp/$languages\n" .
 139+ " url2bmp.exe -url \"$base?country=xx\" -file \"${mode}_$wp2_1024_768.png\" -format PNG -wx 768 -wy 1024 -bx 768 -by 1024 $wait1 -notinteractive\nrem $url2bmp/$languages\n" ;
 140+ # "nconvert.exe -resize 40% 40% -o wx_commons_40.png wx_commons.png >> nconvert.txt 2>> nconvert2.err\n\n" ;
84141 $out_html .= "<td align='center' valign='top'>\n" .
85 - "<small><b><font color='#AAAAAA'>" . uc($wp) . "</font>" .
 142+ "<small><b><font color='#AAAAAA'>" . uc($wp2) . "</font>" .
86143 "&nbsp;&nbsp;&nbsp;" .
87 - "<a href='http://$wp.wikipedia.org'><font color='#AAAAAA'>" . $out_languages {$wp} . "</font></a></small></b><p>" .
88 - "<img src='$wp.png'></td>\n" ;
 144+ "<a href='$base'><font color='#AAAAAA'>" . $out_languages {$wp} . "</font></a></small></b><p>" .
 145+ "<img src='${mode}_$wp2.png'></td>\n" ;
 146+ $out_html_40 .= "<td align='center' valign='top'>\n" .
 147+ "<small><b><font color='#AAAAAA'>" . uc($wp2) . "</font>" .
 148+ "&nbsp;&nbsp;&nbsp;" .
 149+ "<a href='$base'><font color='#AAAAAA'>" . $out_languages {$wp} . "</font></a></small></b><p>" .
 150+ "<img src='${mode}_${wp2}_40.png'></td>\n" ;
 151+ $out_html_1024_768 .= "<td align='center' valign='top'>\n" .
 152+ "<small><b><font color='#AAAAAA'>" . uc($wp2) . "</font>" .
 153+ "&nbsp;&nbsp;&nbsp;" .
 154+ "<a href='$base'><font color='#AAAAAA'>" . $out_languages {$wp} . "</font></a></small></b><p>" .
 155+ "<img src='${mode}_${wp2}_1024_768.png'></td>\n" ;
 156+ $out_html_768_1024 .= "<td align='center' valign='top'>\n" .
 157+ "<small><b><font color='#AAAAAA'>" . uc($wp2) . "</font>" .
 158+ "&nbsp;&nbsp;&nbsp;" .
 159+ "<a href='$base'><font color='#AAAAAA'>" . $out_languages {$wp} . "</font></a></small></b><p>" .
 160+ "<img src='${mode}_${wp2}_768_1024.png'></td>\n" ;
89161 }
90 - $out_html .= "</tr></table></body>" ;
 162+ $out_html .= "</tr></table>\n<p>$footer</body>" ;
 163+ $out_html_40 .= "</tr></table>\n<p>$footer</body>" ;
91164
92 - open "FILE_OUT", ">", $path_out . "Gallery.bat" ;
 165+ open "FILE_OUT", ">", $path_out . "Gallery_$mode2.bat" ;
93166 print FILE_OUT $out_bat ;
94167 close "FILE_OUT" ;
95168
96 - open "FILE_OUT", ">", $path_out . "Gallery.htm" ;
 169+ open "FILE_OUT", ">", $path_out . "Gallery_$mode2.htm" ;
97170 print FILE_OUT $out_html ;
98171 close "FILE_OUT" ;
 172+
 173+ open "FILE_OUT", ">", $path_out . "Gallery_${mode2}_40.htm" ;
 174+ print FILE_OUT $out_html_40 ;
 175+ close "FILE_OUT" ;
 176+
 177+ open "FILE_OUT", ">", $path_out . "Gallery_${mode2}_1024_768.htm" ;
 178+ print FILE_OUT $out_html_1024_768 ;
 179+ close "FILE_OUT" ;
 180+
 181+ open "FILE_OUT", ">", $path_out . "Gallery_${mode2}_768_1024.htm" ;
 182+ print FILE_OUT $out_html_768_1024 ;
 183+ close "FILE_OUT" ;
 184+
 185+ open "FILE_OUT", ">", $path_out . "Gallery_ImageList_${mode2}.txt" ;
 186+ print FILE_OUT $gallery_image_list ;
 187+ close "FILE_OUT" ;
99188 }
100189
101190 sub GenerateSiteMapNew
@@ -646,7 +735,7 @@
647736
648737 if ($region eq '')
649738 {
650 - if (($mode_wx) && ($growth_summary_generated))
 739+ if ((! $mode_wx) && ($growth_summary_generated))
651740 { $out_html .= &tr (&tdlb (&w ("<a href='TablesWikipediaGrowthSummary.htm'>$out_creation_history</a>"))) ; }
652741
653742 if ($mode_wx)
@@ -1013,8 +1102,12 @@
10141103
10151104 sub GenerateHtmlStartComparisonTables
10161105 {
 1106+ &LogT ("GenerateHtmlStartComparisonTables\n") ;
 1107+
10171108 if ($pageviews)
10181109 {
 1110+ my ($dummy, $normalized) = @_ ;
 1111+
10191112 my $out_zoom = "" ;
10201113 my $out_options = "" ;
10211114 my $out_explanation = "" ;
@@ -1035,43 +1128,51 @@
10361129
10371130 $out_zoom = $out_color_buttons . " " . $out_zoom_buttons2 ;
10381131
 1132+ my ($out_html_title, $out_page_title) ;
10391133
1040 - my ($out_html_title, $out_page_title) ;
10411134 if ($pageviews_all_projects)
1042 - {
1043 - $out_html_title = "$out_wikimedia $out_pageviews - All projects" ;
1044 - $out_page_title = "$out_wikimedia $out_pageviews - All projects" ;
1045 - }
 1135+ { $out_html_title = "$out_pageviews for <font color=#008000>$out_wikimedia, All Projects</font>" ; }
10461136 else
1047 - {
1048 - $out_html_title = "$out_publication $out_pageviews" ;
1049 - $out_page_title = "$out_publication $out_pageviews" ;
1050 - }
 1137+ { $out_html_title = "$out_pageviews for <font color=#008000>$out_publication</font>" ; }
10511138
10521139 if ($region ne "")
1053 - {
1054 - $out_html_title .= " - " . ucfirst ($region) ;
1055 - $out_page_title .= " - " . ucfirst ($region) ;
1056 - }
 1140+ { $out_html_title .= " for <font color=#008000>" . ucfirst ($region) . "</font>"; }
10571141
 1142+ if ($pageviews_non_mobile)
 1143+ { $out_html_title .= "<font color=#008000>, Non-mobile</font>" ; }
 1144+ elsif ($pageviews_mobile)
 1145+ { $out_html_title .= "<font color=#008000>, Mobile</font>" ; }
 1146+ elsif ($pageviews_combined)
 1147+ { $out_html_title .= "<font color=#008000>, All Platforms</font>" ; }
10581148
 1149+ if ($normalized)
 1150+ { $out_html_title .= "<font color=#008000>, Normalized</font>" ; }
 1151+ else
 1152+ { $out_html_title .= "<font color=#008000>, Raw data</font>" ; }
 1153+
 1154+ $out_page_title = $out_html_title ;
 1155+ $out_page_title2 = $out_html_title ;
 1156+ $out_html_title =~ s/<[^>]*>//g ;
 1157+
10591158 if (defined ($dumpdate_hi))
10601159 {
10611160 $dumpdate2 = timegm (0,0,0,
10621161 substr ($dumpdate_hi,6,2),
10631162 substr ($dumpdate_hi,4,2)-1,
10641163 substr ($dumpdate_hi,0,4)-1900) ;
1065 - $out_page_title .= "<b>" . &GetDate ($dumpdate2) . "<\/b>" ;
 1164+ $out_page_title2 .= "<br><b>" . &GetDate ($dumpdate2) . "<\/b>" ;
10661165 }
 1166+
10671167 # $out_crossref = &GenerateCrossReference ($language) ;
10681168
10691169 # &ReadLog ($language) ;
10701170
1071 - &GenerateHtmlStart ($out_html_title, $out_zoom, $out_options,
1072 - $out_page_title, $out_page_subtitle, $out_explanation,
1073 - $out_button_prev, $out_button_next, $out_button_switch,
1074 - $out_crossref, $out_msg) ;
1075 - return ;
 1171+ &GenerateHtmlStart ($out_html_title, $out_zoom, $out_options,
 1172+ $out_page_title2, $out_page_subtitle, $out_explanation,
 1173+ $out_button_prev, $out_button_next, $out_button_switch,
 1174+ $out_crossref, $out_msg) ;
 1175+
 1176+ return ($out_page_title) ;
10761177 }
10771178
10781179 my $ndx_report = shift ;
@@ -1080,7 +1181,7 @@
10811182 my $out_page_title = $out_statistics ;
10821183 my $out_page_subtitle = $out_report_descriptions [$ndx_report] ;
10831184
1084 - print "ndx_report $ndx_report out_page_subtitle $out_page_subtitle\n" ;
 1185+ print "ndx $ndx_report -> page subtitle '$out_page_subtitle'\n" ;
10851186
10861187 my $out_html_title = $out_statistics . " - Tables - " . $out_page_subtitle ;
10871188 my $out_explanation = $out_tbl3_legend [$ndx_report] ;
@@ -1178,6 +1279,8 @@
11791280 $out_page_title, $out_page_subtitle, $out_explanation,
11801281 $out_button_prev, $out_button_next, $out_button_switch,
11811282 $out_crossref, $out_msg) ;
 1283+
 1284+ return $out_html_title ;
11821285 }
11831286
11841287 sub GenerateHtmlStart
@@ -1354,7 +1457,7 @@
13551458 substr ($dumpdate_hi,0,4)-1900) ;
13561459 }
13571460
1358 - $path_scripts = "http://stats.wikimedia.org/scripts.zip" ;
 1461+ $path_about = "http://stats.wikimedia.org/index.html#fragment-14" ;
13591462
13601463 if ($out_delay ne "")
13611464 { $out_delay = "$out_delay<p>" ; }
@@ -1374,8 +1477,7 @@
13751478 $out_author . ":" . $out_myname .
13761479 " (<a href='" . $out_mysite . "'>" . $out_site . "</a>)\n<br>" .
13771480 ($wikimedia ? $out_mail . ":" . $out_mymail . "<br>\n" : "") .
1378 - ($wikimedia ? $out_documentation . "<br>\n" : "" ) .
1379 - ($wikimedia ? $out_scripts . ": <a href='$path_scripts'>scripts.zip</a>\n" : "") .
 1481+ ($wikimedia ? "$out_documentation / $out_scripts / $out_csv_files" . ": <a href='$path_about'>About WikiStats</a>\n" : "") .
13801482 $out_translator . "\n" .
13811483 $out_ploticus2 . $out_r . "\n" .
13821484 ((! $wikimedia && $mail ne "") ? "<p>" .$siteadmin . "\n" . $mail . "\n" : "") .
Index: trunk/wikistats/dumps/WikiReportsInput.pm
@@ -34,6 +34,7 @@
3535 $pageviews = $options {"v"} ;
3636 $region = $options {"r"} ;
3737 $normalize_days_per_month = $options {"n"} ;
 38+ $dump_gallery = $options {"G"} ;
3839
3940 # Indian languages
4041 # as Assamese (http://as.wikipedia.org)
@@ -60,7 +61,7 @@
6162 # ur Urdu (http://ur.wikipedia.org)
6263 $wp_1st = "en" ;
6364 $wp_2nd = "de" ;
64 - if ($region =~ /^india$/i) #misuse language
 65+ if ($region =~ /^india$/i)
6566 {
6667 $region = lc $region ;
6768 $some_languages_only = $true ;
@@ -74,7 +75,7 @@
7576 $wp_1st = "ta" ;
7677 $wp_2nd = "hi" ;
7778 }
78 - elsif ($region =~ /^(?:africa|america|asia|europe|oceania|artificial)$/i) #misuse language
 79+ elsif ($region =~ /^(?:africa|america|asia|europe|oceania|artificial)$/i)
7980 {
8081 $region = lc $region ;
8182 $region_uc = ucfirst $region ;
@@ -150,24 +151,38 @@
151152 $langcode = uc ($language) ;
152153 $testmode = ((defined $options {"t"}) ? $true : $false) ;
153154
 155+ if ($testmode)
 156+ { print "Test mode\n" ; }
 157+
154158 if (defined $pageviews)
155159 {
156160 if ($pageviews eq 'n')
157 - { $pageviews_normal = $true ; print "Generate page views report for non-mobile site" ; }
 161+ {
 162+ $pageviews_non_mobile = $true ;
 163+ $keys_html_pageviews_all_projects = 'non-mobile,' ;
 164+ print "Generate page views report for non-mobile site" ;
 165+ }
158166 elsif ($pageviews eq 'm')
159 - { $pageviews_mobile = $true ; print "Generate page views report for mobile site" ; }
 167+ {
 168+ $pageviews_mobile = $true ;
 169+ $keys_html_pageviews_all_projects = 'mobile,' ;
 170+ print "Generate page views report for mobile site" ;
 171+ }
 172+ elsif ($pageviews eq 'c')
 173+ {
 174+ $pageviews_combined = $true ;
 175+ $keys_html_pageviews_all_projects = 'combined,' ;
 176+ print "Generate page views report for mobile + non-mobile site" ;
 177+ }
160178 else { abort ("Invalid option for pageviews: specify '-v n' for non-mobile or '-v m' for mobile data") ; }
161179
162180 $pageviews = $true ;
163181
164 - if ($pageviews_normal && $normalize_days_per_month)
165 - { $keys_html_pageviews_all_projects = 'non-mobile,normalized' ; }
166 - elsif ($pageviews_normal && (! $normalize_days_per_month))
167 - { $keys_html_pageviews_all_projects = 'non-mobile,not-normalized' ; }
168 - elsif ((! $pageviews_normal) && $normalize_days_per_month)
169 - { $keys_html_pageviews_all_projects = 'mobile,normalized' ; }
 182+ if ($normalize_days_per_month)
 183+ { $keys_html_pageviews_all_projects .= 'normalized' ; }
170184 else
171 - { $keys_html_pageviews_all_projects = 'mobile,not-normalized' ; }
 185+ { $keys_html_pageviews_all_projects .= 'not-normalized' ; }
 186+
172187 print "\nCollect pageviews for $keys_html_pageviews_all_projects\n\n" ;
173188 }
174189
@@ -200,18 +215,30 @@
201216 else
202217 { $path_in =~ s/[\/]*$/\// ; }
203218
204 - if ($path_out =~ /\\/)
 219+ if ($dump_gallery)
205220 {
206 - $path_out =~ s/[\\]*$/\\/ ;
207 - $path_out_timelines = $path_out . "EN\\" ;
208 - $path_out .= uc ($language) ;
 221+ if ($path_out =~ /\\/)
 222+ { $path_out =~ s/[\\]*$/\\/ ; } # make sure there is one trailing (back)slash
 223+ else
 224+ { $path_out =~ s/[\/]*$/\// ; }
 225+ $path_in .= "csv_$mode\\" ;
209226 }
210227 else
211228 {
212 - $path_out =~ s/[\/]*$/\// . "\/" . uc ($language);
213 - $path_out_timelines = $path_out . "EN\/" ;
214 - $path_out .= uc ($language) ;
 229+ if ($path_out =~ /\\/)
 230+ {
 231+ $path_out =~ s/[\\]*$/\\/ ;
 232+ $path_out_timelines = $path_out . "EN\\" ;
 233+ $path_out .= uc ($language) ;
 234+ }
 235+ else
 236+ {
 237+ $path_out =~ s/[\/]*$/\// . "\/" . uc ($language);
 238+ $path_out_timelines = $path_out . "EN\/" ;
 239+ $path_out .= uc ($language) ;
 240+ }
215241 }
 242+
216243 if ($region ne '')
217244 { $path_out .= '_' . ucfirst ($region) ; }
218245 $path_out .= "\/" ;
@@ -298,6 +325,8 @@
299326 $file_csv_namespaces = $path_in . "Namespaces.csv" ;
300327 $file_edits_per_namespace = $path_in . "StatisticsEditsPerNamespace.csv" ;
301328 $file_edits_per_usertype = $path_in . "StatisticsEditsPerUsertype.csv" ;
 329+ $file_pageviews_per_wiki = $path_in . "StatisticsPageviewsPerWiki.csv" ;
 330+ $file_editors_per_wiki = $path_in . "StatisticsEditorsPerWiki.csv" ;
302331
303332 $file_log = $path_in . "WikiReportsLog.txt" ;
304333 $file_errors = $path_in . "WikiReportsErrors.txt" ;
@@ -329,12 +358,12 @@
330359 $file_animation_projects_growth = "W:/@ Visualizations/Animation Projects Growth/AnimationProjectsGrowthInit".ucfirst($mode).".js" ;
331360 $file_animation_size_and_community = "W:/@ Visualizations/Animation Size And Community/AnimationProjectsGrowthInit".ucfirst($mode).".js" ;
332361
333 - if ($pageviews)
334 - {
 362+# if ($pageviews)
 363+# {
335364 if (! -e $file_csv_pageviewsmonthly)
336365 { abort ("CSV file '" . $file_csv_pageviewsmonthly . "' not found or in use") ; }
337 - return ;
338 - }
 366+# return ;
 367+# }
339368
340369 if (! -e $file_csv_monthly_stats)
341370 { abort ("CSV file '" . $file_csv_monthly_stats . "' not found or in use") ; }
@@ -389,9 +418,9 @@
390419 }
391420
392421 if ($wikimedia)
393 - { &Log ("Script runs on Wikimedia server\n\n") ; }
 422+ { &Log ("Script runs for Wikimedia site") ; }
394423 else
395 - { &Log ("Script does not run on WikiMedia server\n\n") ; }
 424+ { &Log ("Script does not run for WikiMedia site") ; }
396425 }
397426
398427 sub InitGlobals
@@ -640,66 +669,121 @@
641670
642671 $month_max = 0 ;
643672
644 - open "FILE_IN", "<", $file_csv_users_activity_spread ;
645 - while ($line = <FILE_IN>)
 673+ if (! $pageviews)
646674 {
647 - chomp ($line) ;
648 - # count user with over x edits
649 - # threshold starting with a 3 are 10xSQRT(10), 100xSQRT(10), 1000xSQRT(10), etc
650 - # thresholds = 1,3,5,10,25,32,50,100,etc
 675+ open "FILE_IN", "<", $file_csv_users_activity_spread ;
 676+ while ($line = <FILE_IN>)
 677+ {
 678+ chomp ($line) ;
 679+ # count user with over x edits
 680+ # threshold starting with a 3 are 10xSQRT(10), 100xSQRT(10), 1000xSQRT(10), etc
 681+ # thresholds = 1,3,5,10,25,32,50,100,etc
651682
652 - ($wp, $date, $reguser_bot, $ns_group, @fields) = split (",", $line) ;
653 - # print "$wp, $date, $reguser_bot, $ns_group\n" ;
654 - if ($reguser_bot ne "R") { next ; } # R: registered user, B: bot
655 - if ($ns_group ne "A") { next ; } # A: articles, T: talk pages, O: other
 683+ ($wp, $date, $reguser_bot, $ns_group, @fields) = split (",", $line) ;
 684+ # print "$wp, $date, $reguser_bot, $ns_group\n" ;
 685+ if ($reguser_bot ne "R") { next ; } # R: registered user, B: bot
 686+ if ($ns_group ne "A") { next ; } # A: articles, T: talk pages, O: other
656687
657 - $user_edits_5 {"$wp,$date"} = $fields [2] ;
658 - $user_edits_100 {"$wp,$date"} = $fields [7] ;
659 - }
660 - close "FILE_IN" ;
 688+ $month = substr ($date,0,2) ;
 689+ $year = substr ($date,6,4) ;
 690+ $m = ord (&yyyymm2b ($year, $month)) ;
661691
662 - if ($pageviews)
663 - {
664 - print "\nRead input for page views\n" ;
665 - &ReadFileCsv ($file_csv_log) ;
666 - foreach $wp (@csv)
667 - {
668 - $wp =~ s/,.*$// ;
669 - $wp =~ s/_/-/g ;
670 - next if $some_languages_only and ! $include_language {$wp} ;
671 - $wp_ok {$wp} = 1 ;
672 - $wp_ok {"$wp.m"} = 1 ;
673 - }
674 - # find oldest month (to be skipped, probably incomplete)
675 - # $oldest_month_pageviews = "9999/99/99" ;
676 - open "FILE_IN", "<", $file_csv_pageviewsmonthly ;
677 - while ($line = <FILE_IN>)
678 - {
679 - ($wp, $date, @fields) = split (",", $line) ;
 692+ $count_5 = $fields [2] ;
 693+ $count_25 = $fields [4] ;
 694+ $count_100 = $fields [7] ;
680695
681 - next if $wp ne lc $wp ; # cruft
682 - next if $some_languages_only and ! $include_language {$wp} ;
 696+ $user_edits_5 {"$wp,$date"} = $count_5 ;
 697+ $user_edits_100 {"$wp,$date"} = $count_100 ;
683698
684 - # if ((! $mode_wp) && ($date eq '2008/05/31')) { next ; } # skip incomplete first month
 699+ $editors_5 {$wp.$m} = $count_5 ;
 700+ $editors_25 {$wp.$m} = $count_25 ;
 701+ $editors_100 {$wp.$m} = $count_100 ;
685702
686 - if ($wp_ok {$wp} == 0)
687 - { $wp_nok {$wp} ++ ; }
688 - if (($oldest_month_pageviews {$wp} eq "") || ($date lt $oldest_month_pageviews {$wp}))
689 - { $oldest_month_pageviews {$wp} = $date ; }
 703+ if ($count_5 > $editors_max_5 {$wp})
 704+ {
 705+ $editors_max_5 {$wp} = $count_5 ;
 706+ $editors_month_max_5 {$wp} = $m ;
 707+ }
 708+ if (($editors_month_lo_5 {$wp} == 0) || ($editors_month_lo_5 {$wp} > $m))
 709+ { $editors_month_lo_5 {$wp} = $m ; }
 710+ if ($editors_month_hi_5 {$wp} < $m)
 711+ { $editors_month_hi_5 {$wp} = $m ; }
690712 }
 713+
691714 close "FILE_IN" ;
 715+ }
692716
693 - my $msg = "\nLanguage codes skipped (not in StatisticsLog.csv):\n" ;
694 - foreach $wp (sort keys %wp_nok)
695 - { $msg .= "$wp," ; }
696 - if ($msg =~ /,/)
 717+
 718+ # this code is partly duplicated below for mode $pageviews, some day needs to be combined
 719+
 720+ &ReadFileCsv ($file_csv_log) ;
 721+ foreach $wp (@csv)
 722+ {
 723+ $wp =~ s/,.*$// ;
 724+ $wp =~ s/_/-/g ;
 725+ next if $some_languages_only and ! $include_language {$wp} ;
 726+ $wp_ok {$wp} = 1 ;
 727+ $wp_ok {"$wp.m"} = 1 ;
 728+ }
 729+ # find oldest month (to be skipped, probably incomplete)
 730+ # $oldest_month_pageviews = "9999/99/99" ;
 731+ open "FILE_IN", "<", $file_csv_pageviewsmonthly ;
 732+ while ($line = <FILE_IN>)
 733+ {
 734+ chomp $line ;
 735+ ($wp, $date, $count) = split (",", $line) ;
 736+
 737+ next if $wp ne lc $wp ; # cruft
 738+ next if $some_languages_only and ! $include_language {$wp} ;
 739+
 740+ # if ((! $mode_wp) && ($date eq '2008/05/31')) { next ; } # skip incomplete first month
 741+
 742+ if ($wp_ok {$wp} == 0)
 743+ { $wp_nok {$wp} ++ ; }
 744+ if (($oldest_month_pageviews {$wp} eq "") || ($date lt $oldest_month_pageviews {$wp}))
 745+ { $oldest_month_pageviews {$wp} = $date ; }
 746+
 747+ $month = substr ($date,5,2) ;
 748+ $year = substr ($date,0,4) ;
 749+
 750+ next if $year < 2001 ; # StatisticsMonthly.csv contains weird dates for tiny Wp's, to be fixed in counts job
 751+
 752+ next if $wp eq "ar" and $year < 2003 ; # clearly erroneous record for arwiki pollutes TablesWikipediaGrowthSummaryContributors.htm
 753+
 754+ $m = ord (&yyyymm2b ($year, $month)) ;
 755+
 756+ next if $mode_wx and $m < 102 ; # oldest months are erroneous (incomplete)
 757+
 758+
 759+ # figures for current month are ignored when month has just begun
 760+
 761+
 762+ $days_in_month = days_in_month ($year, $month) ;
 763+ $count_normalized = sprintf ("%.0f", 30/$days_in_month * $count) ;
 764+ $pageviews {$wp.$m} = $count_normalized ;
 765+ if ($count_normalized > $pageviews_max {$wp})
697766 {
698 - $msg =~ s/,$/\n/ ;
699 - print $msg ;
 767+ $pageviews_max {$wp} = $count_normalized ;
 768+ $pageviews_month_max {$wp} = $m ;
700769 }
 770+ if (($pageviews_month_lo {$wp} == 0) || ($pageviews_month_lo {$wp} > $m))
 771+ { $pageviews_month_lo {$wp} = $m ; }
 772+ if ($pageviews_month_hi {$wp} < $m)
 773+ { $pageviews_month_hi {$wp} = $m ; }
 774+ }
 775+ close "FILE_IN" ;
701776
702 - open "FILE_IN", "<", $file_csv_pageviewsmonthly ;
 777+ my $msg = "\nLanguage codes skipped (not in StatisticsLog.csv):\n" ;
 778+ foreach $wp (sort keys %wp_nok)
 779+ { $msg .= "$wp," ; }
 780+ if ($msg =~ /,/)
 781+ {
 782+ $msg =~ s/,$/\n/ ;
 783+ print $msg ;
703784 }
 785+
 786+ if ($pageviews)
 787+ { open "FILE_IN", "<", $file_csv_pageviewsmonthly ; }
704788 else
705789 { open "FILE_IN", "<", $file_csv_monthly_stats ; }
706790
@@ -710,10 +794,9 @@
711795 if ($pageviews)
712796 {
713797 ($wp, $date, @fields) = split (",", $line) ;
714 -
715798 next if $wp ne lc $wp ; # cruft
716799 next if $pageviews_mobile and $wp !~ /\.m/ ;
717 - next if $pageviews_normal and $wp =~ /\.m/ ;
 800+ next if $pageviews_non_mobile and $wp =~ /\.m/ ;
718801 next if $some_languages_only and ! $include_language {$wp} ;
719802
720803 $wp =~ s/\.m// ; # mobile postix is .m
@@ -738,53 +821,53 @@
739822
740823 }
741824 else
742 - { ($wp, $date, @fields) = split (",", $line) ; }
 825+ {
 826+ ($wp, $date, @fields) = split (",", $line) ;
 827+ # use newer counts, excluding bots from $file_csv_users_activity_spread
 828+ $fields [2] = $user_edits_5 {"$wp,$date"} ;
 829+ $fields [3] = $user_edits_100 {"$wp,$date"} ;
 830+ }
743831
744 - # use newer counts, excluding bots from $file_csv_users_activity_spread
745 -
746 - $fields [2] = $user_edits_5 {"$wp,$date"} ;
747 - $fields [3] = $user_edits_100 {"$wp,$date"} ;
748 -
749832 next if $some_languages_only and ! $include_language {$wp} ;
750833
751834 # if ($wp eq $wp_1st) { next ; } # Dec 2006: skip till counts are fixed
752 - if ($mode_wk && (($wp eq "als") || ($wp eq "tlh"))) { next ; } # obsolete
753 - if ($wp eq "dk") { next ; } # Dec 2006: dumps exist but site not
754 - if ($wp eq "zz") { next ; }
755 - if ($wp eq "test") { next ; }
756 - if ($wp eq "tlh") { next ; } # Klignon
757 - if ($wp eq "ru-sib") { next ; } # Siberian
758 - if ($wp eq "ru_sib") { next ; } # Siberian
759835
760 - if ($wp =~ /mania/i) { next ; }
761 - if ($wp =~ /team/i) { next ; }
762 - if ($wp =~ /comcom/i) { next ; }
763 - if ($wp =~ /closed/i) { next ; }
764 - if ($wp =~ /chair/i) { next ; }
765 - if ($wp =~ /langcom/i) { next ; }
766 - if ($wp =~ /office/i) { next ; }
767 - if ($wp =~ /searchcom/i) { next ; }
768 - if ($wp =~ /sep11/i) { next ; }
769 - if ($wp =~ /nostalgia/i) { next ; }
770 - if ($wp =~ /stats/i) { next ; }
771 - if (! $mode_wx && ($wp =~ /commons/i)) { next ; }
 836+ next if $mode_wk and ($wp eq "als" or $wp eq "tlh") ; # obsolete
 837+ next if ! $mode_wx and $wp =~ /commons/i ;
772838
 839+ next if $wp eq "dk" ; # Dec 2006: dumps exist but site not
 840+ next if $wp eq "zz" ;
 841+ next if $wp eq "test" ;
 842+ next if $wp eq "tlh" ; # Klignon
 843+ next if $wp eq "ru-sib" ; # Siberian
 844+ next if $wp eq "ru_sib" ; # Siberian
 845+
 846+ next if $wp =~ /mania/i ;
 847+ next if $wp =~ /team/i ;
 848+ next if $wp =~ /comcom/i ;
 849+ next if $wp =~ /closed/i ;
 850+ next if $wp =~ /chair/i ;
 851+ next if $wp =~ /langcom/i ;
 852+ next if $wp =~ /office/i ;
 853+ next if $wp =~ /searchcom/i ;
 854+ next if $wp =~ /sep11/i ;
 855+ next if $wp =~ /nostalgia/i ;
 856+ next if $wp =~ /stats/i ;
 857+
773858 # $date = &FixDateMonthlyStats ($date) ;
774859 $day = substr ($date,3,2) ;
775860 $month = substr ($date,0,2) ;
776861 $year = substr ($date,6,4) ;
777862
778 - if ($year < 2001) # StatisticsMonthly.csv contains weird dates for tiny Wp's, to be fixed in counts job
779 - { next ; }
780 - if (($wp eq "ar") && ($year < 2003)) # clearly erroneous record for arwiki pollutes TablesWikipediaGrowthSummaryContributors.htm
781 - { next ; }
 863+ next if $year < 2001 ; # StatisticsMonthly.csv contains weird dates for tiny Wp's, to be fixed in counts job
 864+ next if $wp eq "ar" and $year < 2003 ; # clearly erroneous record for arwiki pollutes TablesWikipediaGrowthSummaryContributors.htm
782865
783866 $m = ord (&yyyymm2b ($year, $month)) ;
784867
785868 next if $pageviews and $mode_wx and $m < 102 ; # oldest months are erroneous (incomplete)
786 -
787 - next if $wp eq 'commons' and $m < 58 ;
788 -
 869+
 870+ next if $wp eq 'commons' and $m < 58 ; # &yyyymm2b(2004,9) -> 59 ; there is stray record years earlier, ignore
 871+
789872 # figures for current month are ignored when month has just begun
790873 # (were)
791874 # if ($day < 7)
@@ -809,7 +892,7 @@
810893 }
811894
812895 if ($pageviews || ($f < $#fields))
813 - { $MonthlyStats {$wp.$m.$c[$f]} = $fields [$f] ; }
 896+ { $MonthlyStats {$wp.$m.$c[$f]} += $fields [$f] ; } # += instead of = for combined page views: mobile + non-mobile
814897 else
815898 { $MonthlyStats {$wp.$m.$c[$f+2]} = $fields [$f] ; } # daily usage counts will be 'inserted' below,
816899 # those used to be last columns in input,
@@ -983,13 +1066,22 @@
9841067 }
9851068
9861069 #collect totals
987 - if ($mode_wb) { $m1 = ord (&yyyymm2b (2001, 1)) ; }
988 - if ($mode_wk) { $m1 = ord (&yyyymm2b (2002,12)) ; }
989 - if ($mode_wn) { $m1 = ord (&yyyymm2b (2004, 7)) ; }
990 - if ($mode_wp) { $m1 = ord (&yyyymm2b (2001, 1)) ; }
991 - if ($mode_wq) { $m1 = ord (&yyyymm2b (2001, 1)) ; }
992 - if ($mode_ws) { $m1 = ord (&yyyymm2b (2001, 1)) ; }
993 - if ($mode_wx) { $m1 = ord (&yyyymm2b (2001, 1)) ; }
 1070+ if ($pageviews)
 1071+ {
 1072+ if ($mode_wp) { $m1 = ord (&yyyymm2b (2008, 1)) ; }
 1073+ else
 1074+ { $m1 = ord (&yyyymm2b (2008, 6)) ; }
 1075+ }
 1076+ else
 1077+ {
 1078+ if ($mode_wb) { $m1 = ord (&yyyymm2b (2001, 1)) ; }
 1079+ if ($mode_wk) { $m1 = ord (&yyyymm2b (2002,12)) ; }
 1080+ if ($mode_wn) { $m1 = ord (&yyyymm2b (2004, 7)) ; }
 1081+ if ($mode_wp) { $m1 = ord (&yyyymm2b (2001, 1)) ; }
 1082+ if ($mode_wq) { $m1 = ord (&yyyymm2b (2001, 1)) ; }
 1083+ if ($mode_ws) { $m1 = ord (&yyyymm2b (2001, 1)) ; }
 1084+ if ($mode_wx) { $m1 = ord (&yyyymm2b (2001, 1)) ; }
 1085+ }
9941086
9951087 foreach $wp (@languages)
9961088 {
@@ -1078,10 +1170,9 @@
10791171
10801172 # except for pageviews, for last 12 months check if most prominent wiki has data
10811173 # only for last 12 months: especially for region 'India' this is not so for early months
1082 - if ($pageviews)
1083 - { $zz += $MonthlyStats {$wp.$m.$c[$f]} ; } -
1084 - elsif (($m <= $md - 12) || ($MonthlyStats {$wp_1st.$m.$c[$f]} > 0))
 1174+ if ($pageviews)
 1175+ { $zz += $MonthlyStats {$wp.$m.$c[$f]} ; }
 1176+ elsif (($m <= $md - 12) || ($MonthlyStats {$wp_1st.$m.$c[$f]} > 0))
10851177 {
10861178 if (($f >= 7) && ($f <= 10))
10871179 { $zz += $MonthlyStats {$wp.$m.$c[$f]} * $MonthlyStats {$wp.$m.$c[4]} ; }
@@ -1634,10 +1725,10 @@
16351726 {
16361727 my $year = substr ($maxdate,0,4) ;
16371728 my $month = &month_english_short (substr ($maxdate,5,2)-1) ;
1638 - $msg_perc_mobile = "$month $year: Mobile traffic represents ${perc_mobile {$maxdate}}% of total traffic.\n" ;
1639 - $msg_perc_non_mobile = "$month $year: Non-mobile traffic represents ${perc_non_mobile {$maxdate}}% of total traffic.\n" ;
1640 - print $msg_perc_mobile ;
1641 - print $msg_perc_non_mobile ;
 1729+ $msg_perc_mobile = "$month $year: mobile traffic represents ${perc_mobile {$maxdate}}% of total traffic" ;
 1730+ $msg_perc_non_mobile = "$month $year: non-mobile traffic represents ${perc_non_mobile {$maxdate}}% of total traffic" ;
 1731+ print "$msg_perc_mobile\n" ;
 1732+ print "$msg_perc_non_mobile\n" ;
16421733 }
16431734 }
16441735

Status & tagging log