r91623 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r91622‎ | r91623 | r91624 >
Date:00:20, 7 July 2011
Author:reedy
Status:deferred
Tags:
Comment:
Fix some svn:eol-style native
Modified paths:
  • /trunk/wikistats/analytics/AnalyticsPrepBinariesData.pl (modified) (history)
  • /trunk/wikistats/analytics/AnalyticsPrepComscoreData.pl (modified) (history)
  • /trunk/wikistats/analytics/AnalyticsPrepLanguageNames.pl (modified) (history)
  • /trunk/wikistats/analytics/AnalyticsPrepPageViews.pl (modified) (history)
  • /trunk/wikistats/analytics/AnalyticsPrepWikiCountsOutput.pl (modified) (history)
  • /trunk/wikistats/analytics/_readme.txt (modified) (history)
  • /trunk/wikistats/analytics/analytics_create_and_load_from_csv.txt (modified) (history)
  • /trunk/wikistats/analytics/analytics_generate_csv_files.sh (modified) (history)
  • /trunk/wikistats/analytics/analytics_new.sh (modified) (history)
  • /trunk/wikistats/analytics/analytics_refresh_from_csv.txt (modified) (history)
  • /trunk/wikistats/analytics/analytics_upd.sh (modified) (history)
  • /trunk/wikistats/reportcard/ComScoreTop1000.pl (modified) (history)
  • /trunk/wikistats/reportcard/ReportCardExtractWikiCountsOutput.pl (modified) (history)
  • /trunk/wikistats/reportcard/ReportCardExtractWikiCountsOutputYearly.pl (modified) (history)
  • /trunk/wikistats/reportcard/ReportCardGenerateHtml.pl (modified) (history)
  • /trunk/wikistats/reportcard/ReportCardLinkErrata.pl (modified) (history)
  • /trunk/wikistats/reportcard/StatisticsMonthlyFilter.pl (modified) (history)

Diff [purge]

Index: trunk/wikistats/reportcard/ReportCardGenerateHtml.pl
@@ -1,1416 +1,1416 @@
2 -#!/usr/local/bin/perl
3 -
4 - use lib "/home/ezachte/lib" ;
5 - use EzLib ;
6 - $trace_on_exit = $true ;
7 -
8 - use CGI::Carp qw(fatalsToBrowser);
9 - use Time::Local ;
10 - use Getopt::Std ;
11 -
12 - # !! adapt these for every run !!
13 - $p_year = 2010 ;
14 - $p_month = 12 ;
15 -
16 - $debug = $false ;
17 -
18 - $public = 0 ;
19 - $private = 1 ;
20 -
21 - $p_month_d2 = sprintf ("%02d", $p_month) ;
22 -
23 - @months = qw (January February March April May June July August September October November December) ;
24 - $p_month_prev = ($p_month > 1) ? $p_month - 1 : 12 ;
25 - $p_month_next = ($p_month < 12) ? $p_month + 1 : 1 ;
26 - $p_month_next2 = ($p_month < 11) ? $p_month + 2 : $p_month - 10 ;
27 - $p_year_plus_m2 = ($p_month < 11) ? $p_year : $p_year + 1 ;
28 - $p_month_prev_d2 = sprintf ("%02d", $p_month_prev) ;
29 - $p_month_next_d2 = sprintf ("%02d", $p_month_next) ;
30 -
31 - $p_year_prev = $p_year - 1 ;
32 - $p_year_next = $p_year + 1 ;
33 - $p_year_short = $p_year - 2000 ;
34 - $p_year_prev_short = $p_year_prev - 2000 ;
35 - $p_year_short_d2 = sprintf ("%02d", $p_year_short) ;
36 - $p_year_prev_short_d2 = sprintf ("%02d", $p_year_prev_short) ;
37 -
38 - $p_month_name = $months [$p_month -1] ;
39 - $p_month_name_prev = $months [$p_month_prev-1] ;
40 - $p_month_name_next = $months [$p_month_next-1] ;
41 - $p_month_name_next2 = $months [$p_month_next2-1] ;
42 -
43 -
44 - $trend_one_year = "{{m}}/{{y-1}}|{{m}}/{{y}}" ;
45 -
46 - if ($p_month == 1)
47 - { $trend_one_month = "12/{{y-1}}|1/{{y}}" ; }
48 - else
49 - { $trend_one_month = "{{m-1}}/{{y}}|{{m}}/{{y}}" ; }
50 -
51 - $p_year_month_m1 = ($p_month == 1) ? "$p_month_prev/$p_year_prev_short_d2" : "$p_month_prev/$p_year_short_d2" ; # m1 = minus 1
52 -
53 - print "\$p_year $p_year\n" ;
54 - print "\$p_year_prev $p_year_prev\n" ;
55 - print "\$p_year_plus_m2 $p_year_plus_m2\n" ;
56 - print "\$p_year_short $p_year_short\n" ;
57 - print "\$p_year_prev_short $p_year_prev_short\n" ;
58 - print "\$p_year_short_d2 $p_year_short_d2\n" ;
59 - print "\$p_year_prev_short_d2 $p_year_prev_short_d2\n" ;
60 - print "\n" ;
61 - print "\$p_month $p_month\n" ;
62 - print "\$p_month_d2 $p_month_d2\n" ;
63 - print "\$p_month_next $p_month_next\n" ;
64 - print "\$p_month_prev $p_month_prev\n" ;
65 - print "\$p_month_next_d2 $p_month_next_d2\n" ;
66 - print "\$p_month_prev_d2 $p_month_prev_d2\n" ;
67 - print "\$p_month_name $p_month_name\n" ;
68 - print "\$p_month_name_prev $p_month_name_prev\n" ;
69 - print "\$p_month_name_next $p_month_name_next\n" ;
70 - print "\$p_month_name_next2 $p_month_name_next2\n" ;
71 - print "\$p_year_month_m1 $p_year_month_m1\n" ;
72 -
73 -
74 - # example output for synopsys.txt
75 - #STATISTICS
76 -
77 - #http://infodisiac.com/Wikimedia/ReportCard/EN/RC_2009_08_summary.html
78 -
79 - #Y: Jun, 2008->2009 k=thousand m=million b=billion
80 - #M: 2009, May->Jun
81 -
82 - #Unique Visitors 301 m (Y:+21% / M: -5%)
83 - #Page Requests 11 b (Y: +6% / M: -6%)
84 - #Site Rank 5th (Y: +0 / M: -1 )
85 - #Commons Files 4.7 m (Y:+62% / M: +4%) ++ growth pdf/djvu files
86 - #Article Count 20.6 m (Y:+33% / M: +2%)
87 - #New Articles 17 k (Y: -9% / M: -6%)
88 - #New Editors 18 k (Y:+39% / M:+25%) wp:it in one year -50%
89 - #Active Editors 88 k (Y:+ 1% / M: -2%) wp:ru in one year +45%
90 -
91 - print "\n"."="x80 . "\n\n" ;
92 -
93 -# !! This is rather crummy Q&D way to collect variable data, data need to be externalized !!
94 -
95 -## if ($2010_12)
96 -## {
97 - @visitors = qw ( 395,472,000 m 14.0 -3.7 %) ; # Unique Visitors by Region
98 - @page_requests = qw (13,976,000,000 b 22.6 2.4 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
99 - @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
100 - @commons_files = qw ( 8,046,377 m 43.1 3.0 %) ; # Binaries per month - Absolute
101 - @article_count = qw ( 17,616,951 m 20.0 1.5 %) ; # Starting Sep-2010 Wikipedia articles only / Article count (official) - Absolute
102 - @new_articles = qw ( 8,555 k 16.5 5.1 %) ; # New articles per day - Absolute
103 - @edits = qw ( 11,566,371 m 3.6 3.8 %) ; # Edits per month - Absolute
104 - @new_editors = qw ( 14,607 k -16.6 -2.5 %) ; # New editors - Absolute
105 - @active_editors = qw ( 79,324 k -5.9 -0.5 %) ; # Active editors - Absolute
106 - @very_active_editors = qw ( 10,254 k -1.6 0.1 %) ; # Very active editors - Absolute
107 - @reach = qw ( 31.1 x 1.5 -1.4 %) ; # Reach Percentage by Region
108 - # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
109 - push @visitors, "1|Unique Visitors<br>1: Average for last 12 months 377M." ;
110 -# "2: Growth in UV count in last 12 months 18.8% (for whole internet 8.9%)." ;
111 -# "&nbsp;&nbsp;&nbsp;&nbsp;(avg 28.3%, low 25.7% in July 09). Reach in Asia lags behind other regions (15.9%)" ;
112 - push @page_requests, "2,3|Page Requests<br>" .
113 - "2: <a href='http://stats.wikimedia.org/EN/TablesPageViewsMonthlyMobile.htm'>Mobile traffic</a> in Dec: 4.1% of total Wikipedia traffic (556M/13489M)<br>" .
114 -# "&nbsp;&nbsp;&nbsp;&nbsp;Look ahead for page requests: Dec -> Jan = 13367M -> 14724M = +10.1%<br>" .
115 - "#3: Page requests have been normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" ;
116 - push @rank, "4|Site Rank<br>#4: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
117 - push @commons_files, "5|Commons Files<br>#5: Tiff uploads increased 5-fold in July 2010, 13-fold in last 12 months.<br>" ;
118 -# "#7: Commons consistently fastest growing project, 48% in last 12 months." ;
119 -
120 -#push @article_count, "8|Article Count<br>#8: From Sep 2010 this metric is for Wikipedia projects only. This prevents adding apples and oranges." ;
121 -# "9: Seven Wiktionaries in top 25 Wikimedia projects" ;
122 -# push @new_articles, "7|New Articles Per Day<br>" .
123 -# "7: Strong growth in August by peaks on 3 wikis: Catalan/Dutch 3-fold inc., Slovene 17-fold (bots?)." ;
124 - push @edits, "6|Edits<br>#6: Over the last 3 years there is fairly consistent growth in manual, registered edits.<br>" .
125 - "#&nbsp;&nbsp;&nbsp;&nbsp;Net growth in constructive edits is less clear, as this metric includes most reverting edits." ;
126 -# "&nbsp;&nbsp;&nbsp;&nbsp;Strong one-monthly dip in July due to World Cup Socker?." ;
127 -# "#13: Average monthly manual edits by registered users for all Wikipedia's combined, in millions<br>" .
128 -# "&nbsp;&nbsp;&nbsp;&nbsp;#2006 &rArr; 2010: &nbsp;&nbsp;7.7 &rArr; 9.9 &rArr; 11.5 &rArr; 12.4 &rArr; 12.7" ;
129 - push @new_editors, "7|New Editors Per Day<br>" .
130 -# "10: Signifant decline in last month (All projects: -10.5%, <a href='http://stats.wikimedia.org/EN/ChartsWikipediaZZ.htm'>Wikipedias -11.2%</a>).<br>" .
131 -# "&nbsp;&nbsp;&nbsp;&nbsp;Arguably slowing influx of editors can partly be attributed to (multi-factorial) <a href='http://en.wikipedia.org/wiki/Market_saturation'>saturation process(es)</a><br>" .
132 -# "&nbsp;&nbsp;&nbsp;&nbsp;But 19% drop for Wikipedias in half year (comparing 3-monthly averages) is not consistent with that.<br>" .
133 - "#7:WMF recently commissioned in depth study of editor activity trends, which is ongoing." ;
134 - push @active_editors, "8|(Very) Active Editors<br>" .
135 -# "11: Recent drops are well within normal bandwidth, largest drop was in <a href='charts/2010-08/Monthly-Active-Users-Since-Jan-2006.png'>June 2006</a>.<br>" .
136 - "#8: Since a few months editors on Commons are no longer included in overall editor total,<br>" .
137 - "#&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; on the assumption that most of these also edit on one or more other projects.<br>" ;
138 -# "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; #Detection of double counts between any projects and languages is planned for late 2010." ;
139 - push @very_active_editors, "8|" ; #Very Active Editors<br>14: Ukrain +84% (61->112), Indonesian +180% (22->62) (during contest), Swedish -13% (141->122)." ;
140 -## }
141 -
142 -
143 -# if ($2010_11)
144 -# {
145 -# @visitors = qw ( 410,816,000 m 18.8 0.6 %) ; # Unique Visitors by Region
146 -# @page_requests = qw (13,976,000,000 b 22.6 2.4 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
147 -# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
148 -# @commons_files = qw ( ? m ? ? %) ; # Binaries per month - Absolute
149 -# @article_count = qw ( ? m ? ? %) ; # Starting Sep-2010 Wikipedia articles only / Article count (official) - Absolute
150 -# @new_articles = qw ( ? k ? ? %) ; # New articles per day - Absolute
151 -# @edits = qw ( ? m ? ? %) ; # Edits per month - Absolute
152 -# @new_editors = qw ( ? k ? ? %) ; # New editors - Absolute
153 -# @active_editors = qw ( ? k ? ? %) ; # Active editors - Absolute
154 -# @very_active_editors = qw ( ? k ? ? %) ; # Very active editors - Absolute
155 -# @reach = qw ( 31.1 x 2.6 0.0 %) ; # Reach Percentage by Region
156 -# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
157 -# push @visitors, "1,2|Unique Visitors<br>1: 410M UV's exceeds Oct 2010 record with 2M. Average for last 12 months 377M.<br>" .
158 -# "2: Growth in UV count in last 12 months 18.8% (for whole internet 8.9%)." ;
159 -# push @page_requests, "3,4|Page Requests<br>" .
160 -# "3: <a href='http://stats.wikimedia.org/EN/TablesPageViewsMonthlyMobile.htm'>Mobile traffic</a> in Sep: 3.4% of total traffic (492M/14468M)<br>" .
161 -# "#4: Page requests have been normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" ;
162 -# push @rank, "3|Site Rank<br>#3: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
163 -# }
164 -
165 -# if ($2010_10)
166 -# {
167 -# @visitors = qw ( 408,350,000 m 18.5 2.6 %) ; # Unique Visitors by Region
168 -# @page_requests = qw ( ? b ? ? %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
169 -# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
170 -# @commons_files = qw ( ? m ? ? %) ; # Binaries per month - Absolute
171 -# @article_count = qw ( ? m ? ? %) ; # Starting Sep-2010 Wikipedia articles only / Article count (official) - Absolute
172 -# @new_articles = qw ( ? k ? ? %) ; # New articles per day - Absolute
173 -# @edits = qw ( ? m ? ? %) ; # Edits per month - Absolute
174 -# @new_editors = qw ( ? k ? ? %) ; # New editors - Absolute
175 -# @active_editors = qw ( ? k ? ? %) ; # Active editors - Absolute
176 -# @very_active_editors = qw ( ? k ? ? %) ; # Very active editors - Absolute
177 -# @reach = qw ( 31.1 x 2.3 0.5 %) ; # Reach Percentage by Region
178 -# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
179 -# push @visitors, "1,2|Unique Visitors<br>1: 408M UV's beats September 2010 record with 10M.<br>" .
180 -# "2: Growth in UV count in last 12 months 18.5% (for whole internet 9.6%)." ;
181 -# push @rank, "3|Site Rank<br>#3: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
182 -# }
183 -
184 -
185 -# if ($2010_09)
186 -# {
187 -# @visitors = qw ( 398,178,000 m 22.1 6.6 %) ; # Unique Visitors by Region
188 -# @page_requests = qw (13,671,000,000 b 20.2 5.4 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
189 -# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
190 -# @commons_files = qw ( 7,491,824 m 48.2 2.8 %) ; # Binaries per month - Absolute
191 -# @article_count = qw ( 16,678,710 m 20.7 1.8 %) ; # Starting Sep-2010 Wikipedia articles only / Article count (official) - Absolute
192 -# @new_articles = qw ( 7,578 k 3.9 -18.9 %) ; # New articles per day - Absolute
193 -# @edits = qw ( 11,924,018 m 9.0 -3.3 %) ; # Edits per month - Absolute
194 -# @new_editors = qw ( 15,805 k -17.4 -10.5 %) ; # New editors - Absolute
195 -# @active_editors = qw ( 82,503 k -5.6 -3.3 %) ; # Active editors - Absolute
196 -# @very_active_editors = qw ( 11,011 k -2.5 -3.4 %) ; # Very active editors - Absolute
197 -# @reach = qw ( 30.8 x 3.2 1.8 %) ; # Reach Percentage by Region
198 -# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
199 -# push @visitors, "1,2|Unique Visitors<br>1: 398M UV's beats May 2010 record with 9M or 2.4%.<br>" .
200 -# "2: Growth in UV count in last 12 months 22% (for whole internet 10%)." ;
201 -# push @page_requests, "3,4|Page Requests<br>" .
202 -# "3: <a href='http://stats.wikimedia.org/EN/TablesPageViewsMonthlyMobile.htm'>Mobile traffic</a> in Sep: 3.0% of total traffic (425M/14096M)<br>" .
203 -# # "&nbsp;&nbsp;&nbsp;&nbsp;Look ahead for page requests: Aug -> Sep = 13367M -> 14724M = +10.1%<br>" .
204 -# "#4: Page requests have been normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" ;
205 -# push @rank, "5|Site Rank<br>#5: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
206 -# push @commons_files, "6,7|Commons Files<br>#6: Tiff uploads increased 5-fold in July 2010, 18-fold in last 12 months.<br>" .
207 -# "7: Commons consistently fastest growing project, 48% in last 12 months." ;
208 -
209 -# push @article_count, "8|Article Count<br>8: From Sep 2010 this metric is for Wikipedia projects only. This prevents adding apples and oranges." ;
210 -# push @edits, "9|Edits<br>9: Over the last 3 years there is fairly consistent growth in manual, registered edits.<br>" .
211 -# "&nbsp;&nbsp;&nbsp;&nbsp;Net growth in constructive edits is less clear, as this metric includes most reverting edits." ;
212 -# "&nbsp;&nbsp;&nbsp;&nbsp;Strong one-monthly dip in July due to World Cup Socker?." ;
213 -# push @new_editors, "10|New Editors Per Day<br>" .
214 -# "10: Signifant decline in last month (All projects: -10.5%, <a href='http://stats.wikimedia.org/EN/ChartsWikipediaZZ.htm'>Wikipedias -11.2%</a>).<br>" .
215 -# "&nbsp;&nbsp;&nbsp;&nbsp;Arguably slowing influx of editors can partly be attributed to (multi-factorial) <a href='http://en.wikipedia.org/wiki/Market_saturation'>saturation process(es)</a><br>" .
216 -# "&nbsp;&nbsp;&nbsp;&nbsp;But 19% drop for Wikipedias in half year (comparing 3-monthly averages) is not consistent with that.<br>" .
217 -# "&nbsp;&nbsp;&nbsp;&nbsp;WMF recently commissioned in depth study of editor activity trends, which is ongoing." ;
218 -# push @active_editors, "11,12|(Very) Active Editors<br>" .
219 -# "11: Recent drops are well within normal bandwidth, largest drop was in <a href='charts/2010-08/Monthly-Active-Users-Since-Jan-2006.png'>June 2006</a>.<br>" .
220 -# "#12: Editors on Commons are no longer included in overall editor total,<br>" .
221 -# "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; #on the assumption that most of these also edit on one or more other projects.<br>" .
222 -# "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; #Detection of double counts between any projects and languages is planned for late 2010." ;
223 -# push @very_active_editors, "11,12|" ; #Very Active Editors<br>14: Ukrain +84% (61->112), Indonesian +180% (22->62) (during contest), Swedish -13% (141->122)." ;
224 -# }
225 -
226 -# if ($2010_08)
227 -# {
228 -# @visitors = qw ( 373,392,000 m 21.4 3.7 %) ; # Unique Visitors by Region
229 -# @page_requests = qw (13,367,000,000 b 23.9 -1 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
230 -# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
231 -# @commons_files = qw ( 7,298,379 m 48.1 2.8 %) ; # Binaries per month - Absolute
232 -# @article_count = qw ( 34,963,360 m 30.0 2.4 %) ; # Article count (official) - Absolute
233 -# @new_articles = qw ( 9,437 k 22.4 25.7 %) ; # New articles per day - Absolute
234 -# @edits = qw ( 12,346,207 m 7.9 15.4 %) ; # Edits per month - Absolute
235 -# @new_editors = qw ( 17,026 k -17.3 -1.1 %) ; # New editors - Absolute
236 -# @active_editors = qw ( 85,643 k -5.2 2.1 %) ; # Active editors - Absolute
237 -# @very_active_editors = qw ( 11,419 k -1.6 5.0 %) ; # Very active editors - Absolute
238 -# @reach = qw ( 29.0 x 2.6 0.5 %) ; # Reach Percentage by Region
239 -# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
240 -# push @page_requests, "1,2,3,4|Page Requests<br>" .
241 -# "1: <a href='http://stats.wikimedia.org/EN/TablesPageViewsMonthlyMobile.htm'>Mobile traffic</a> in Sep: 2.9% of total traffic (425M/14724M)<br>" .
242 -# "&nbsp;&nbsp;&nbsp;&nbsp;Look ahead for page requests: Aug -> Sep = 13367M -> 14724M = +10.1%<br>" .
243 -# "#&nbsp;&nbsp;&nbsp;&nbsp;Trend data for mobile will be added when more history is available.<br>" .
244 -# "#2: Due to server problems counts from squid logs for December 2009 - March 2010 are too low,<br>" .
245 -# "#&nbsp;&nbsp;&nbsp;&nbsp;estimated underreporting 10%-25%. Counts for April - July 2010 have been patched. Read <a href='http://infodisiac.com/blog/2010/07/wikimedia-page-views-some-good-and-bad-news/'>more</a>.<br>" .
246 -# "#3: Many projects show peak traffic late 2009: see <a href='charts/2010-08/Page-Views-Per-Project-Indexed.png'>chart</a><br>" .
247 -# "#4: Page requests are now normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" ;
248 -# push @rank, "5|Site Rank<br>#5: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
249 -# push @commons_files, "6|Commons Files<br>#6: Tiff uploads increased <a href='charts/2010-07/Monthly-Binaries-Absolute-Log.png'>5-fold</a> in July 2010, <a href='charts/2010-07/Monthly-Binaries-Indexed.png'>22-fold</a> in a year." ;
250 -
251 -# push @new_articles, "7|New Articles Per Day<br>" .
252 -# "7: Strong growth in August by peaks on 3 wikis: Catalan/Dutch 3-fold inc., Slovene 17-fold (bots?)." ;
253 -# push @edits, "8|Edits<br>8: All time high for edit count, even slightly above May level.<br>" .
254 -# "&nbsp;&nbsp;&nbsp;&nbsp;Strong one-monthly dip in July due to World Cup Socker?." ;
255 -# push @active_editors, "9,10|(Very) Active Editors<br>" .
256 -# "9: After a <a href='charts/2010-08/Monthly-Active-Editors-Absolute-Linear.png'>6% drop in active Wikipedia editors</a> in June, and a further 2% drop in July,<br>" .
257 -# "&nbsp;&nbsp;&nbsp;&nbsp;trend is upwards again, with 2.5% increase in August.<br>" .
258 -# "&nbsp;&nbsp;&nbsp;&nbsp;Prospects for September are good, with +10% growth in page requests<br>" .
259 -# "&nbsp;&nbsp;&nbsp;&nbsp;(given strong correlation of 0.67 between page requests and active editors).<br>" .
260 -# "&nbsp;&nbsp;&nbsp;&nbsp;From a wider perspective drops were stil within normal bandwidth, largest drop was in <a href='charts/2010-08/Monthly-Active-Users-Since-Jan-2006.png'>June 2006</a>,<br>" .
261 -# "&nbsp;&nbsp;&nbsp;&nbsp;see also <a href='charts/2010-08/Monthly-Active-Users-Since-Jan-2008.png'>similar chart with trend line since June 2008</a>.<br>" .
262 -# "10: New: Editors on Commons are no longer included in overall editor total,<br>" .
263 -# "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; on the assumption that most of these also edit on one or more other projects.<br>" .
264 -# "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Detection of double counts between any projects and languages is planned for late 2010." ;
265 -# push @very_active_editors, "9,10|" ; #Very Active Editors<br>14: Ukrain +84% (61->112), Indonesian +180% (22->62) (during contest), Swedish -13% (141->122)." ;
266 -# }
267 -
268 -# if ($2010_07)
269 -# {
270 -# @visitors = qw ( 360,225,000 m 21.9 -5 %) ; # Unique Visitors by Region
271 -# @page_requests = qw (13,116,000,000 b 27.2 -6 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
272 -# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
273 -# @commons_files = qw ( 7,104,689 m 49.1 2.9 %) ; # Binaries per month - Absolute
274 -# @article_count = qw ( 34,198,285 m 29.9 2 %) ; # Article count (official) - Absolute
275 -# @new_articles = qw ( 7,642 k 4.2 -0.6 %) ; # New articles per day - Absolute
276 -# @edits = qw ( 10,734,940 m -5.5 -9.8 %) ; # Edits per month - Absolute
277 -# @new_editors = qw ( 16,661 k -20.8 -5.6 %) ; # New editors - Absolute
278 -# @active_editors = qw ( 90,554 k -5.9 -1.6 %) ; # Active editors - Absolute
279 -# @very_active_editors = qw ( 11,818 k -2.1 -1.8 %) ; # Very active editors - Absolute
280 -# @reach = qw ( 28.5 x 2.8 -1.7 %) ; # Reach Percentage by Region
281 - # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
282 -# push @visitors, "1,2|Unique Visitors<br>1: 375M UV's beats last month's record with 4M or 1.1 % (matches overall internet growth).<br>" .
283 -# "2: Wikimedia projects reach 30.4 % of internet population, which is best reach for last year<br>" .
284 -# "&nbsp;&nbsp;&nbsp;&nbsp;(avg 28.3%, low 25.7% in July 09). Reach in Asia lags behind other regions (15.9%)" ;
285 -# push @page_requests, "1,2,3,4|Page Requests<br>" .
286 -# "1: Due to <a href='http://infodisiac.com/blog/2010/07/wikimedia-page-views-some-good-and-bad-news/'>server problems</a> counts from squid logs for December 2009 - March 2010 are too low,<br>" .
287 -# "&nbsp;&nbsp;&nbsp;&nbsp;estimated underreporting 10%-25%. Counts for April - July 2010 have been patched." .
288 -# ".<br>" .
289 -# "2: August : <a href='http://stats.wikimedia.org/EN/TablesPageViewsMonthlyMobile.htm'>Mobile traffic (401M)</a>: 3.0% of total traffic (13367M)<br>" .
290 -# "#&nbsp;&nbsp;&nbsp;&nbsp;Trend data for mobile will be added when more history is available.<br>" .
291 -# "#3: Many projects show peak traffic late 2009: see <a href='charts/2010-07/Page-Views-Per-Project-Indexed.png'>chart</a><br>" .
292 -# "#4: Page requests are now normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" ;
293 -# push @rank, "5|Site Rank<br>#5: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
294 -# push @commons_files, "6|Commons Files<br>#6: Tiff uploads increased <a href='charts/2010-07/Monthly-Binaries-Absolute-Log.png'>5-fold</a> in July 2010, <a href='charts/2010-07/Monthly-Binaries-Indexed.png'>25-fold</a> in a year." ;
295 -
296 -# push @article_count, "8,9|Article Count<br>8: Serbian Wikinews: 5k->36k in a year, compare English Wikinews: 15k->17k<br>" .
297 -# "9: Seven Wiktionaries in top 25 Wikimedia projects" ;
298 -# push @new_articles, "7|New Articles Per Day<br>" .
299 -# "#7: Peak in April and May by massive activity on Aromanian and Waray-Waray Wp's, each by single user.<br>" .
300 -# "#&nbsp;&nbsp;&nbsp;&nbsp;In May 20% of all new articles were created on these two small wikis (April 7%, June 11%)" ;
301 -# push @active_editors, "8,9|(Very) Active Editors<br>" .
302 -# "8: The <a href='charts/2010-07/Monthly-Active-Editors-Absolute-Linear.png'>6% drop in active editors</a> for all Wikipias in June was relatively large,<br>" .
303 -# "&nbsp;&nbsp;&nbsp;&nbsp;but from a <a href='charts/2010-07/Monthly-Active-Users-Since-Jan-2006.png'>wider perspective</a> still within normal bandwidth, largest drop was in June 2006.<br>" .
304 -# "&nbsp;&nbsp;&nbsp;&nbsp;There might be a seasonal component in fluctuations.<br>" .
305 -# "9: Bug fix: in earlier RC editions editors from Commons (6k active editors) were counted double.<br>" .
306 -# "&nbsp;&nbsp;&nbsp;&nbsp;This has been fixed for all months in this RC." ;
307 -# push @very_active_editors, "9|" ; #Very Active Editors<br>14: Ukrain +84% (61->112), Indonesian +180% (22->62) (during contest), Swedish -13% (141->122)." ;
308 -# }
309 -
310 -# if ($2010_06)
311 -# {
312 -# @visitors = qw ( 379,344,000 m 25.2 -2.5 %) ; # Unique Visitors by Region
313 -# @page_requests = qw (13,957,000,000 b 26.0 1.0 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
314 -# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
315 -# @commons_files = qw ( 6,910,267 m 50.1 2.5 %) ; # Binaries per month - Absolute
316 -# @article_count = qw ( 33,430,039 m 29.7 1.5 %) ; # Article count (official) - Absolute
317 -# @new_articles = qw ( 7,865 k 14.5 -16.2 %) ; # New articles per day - Absolute
318 -# @edits = qw ( 12,056,265 m 10.1 -1.6 %) ; # Edits per month - Absolute
319 -# @new_editors = qw ( 17,573 k -15.2 -10.6 %) ; # New editors - Absolute
320 -# @active_editors = qw ( 99,124 k -3.5 -4.4 %) ; # Active editors - Absolute
321 -# @very_active_editors = qw ( 13,042 k 0.7 -2.9 %) ; # Very active editors - Absolute
322 -# @reach = qw ( 30.2 x 3.5 -1.1 %) ; # Reach Percentage by Region
323 -# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
324 -# push @visitors, "1,2|Unique Visitors<br>1: 375M UV's beats last month's record with 4M or 1.1 % (matches overall internet growth).<br>" .
325 -# "2: Wikimedia projects reach 30.4 % of internet population, which is best reach for last year<br>" .
326 -# "&nbsp;&nbsp;&nbsp;&nbsp;(avg 28.3%, low 25.7% in July 09). Reach in Asia lags behind other regions (15.9%)" ;
327 -# push @page_requests, "1,2,3,4|Page Requests<br>" .
328 -# "1: Traffic volume for recent months had been underreported due to monitor capacity problems.<br>" .
329 -# "&nbsp;&nbsp;&nbsp;&nbsp;Counts from April 2010 and later " .
330 -# "<a href='http://infodisiac.com/blog/2010/07/wikimedia-page-views-some-good-and-bad-news/'>have been corrected</a>.<br>" .
331 -# "&nbsp;&nbsp;&nbsp;&nbsp;Data from Nov 2009 - Mar 2010 may still be too low.<br>" .
332 -# "2: Traffic to mobile site is now counted. <a href='http://stats.wikimedia.org/EN/TablesPageViewsMonthlyMobile.htm'>(June 208M:13957M=1.5% of total)</a><br>" .
333 -# "&nbsp;&nbsp;&nbsp;&nbsp;This is the first month, so no trend data yet. <a href='charts/2010-06/Page-Views-Breakdown-Mobile-Traffic.png'> " .
334 -# "Breakdown per language</a>:" .
335 -# "English:71.3%,<br>&nbsp;&nbsp;&nbsp;&nbsp; Japanese:8.6%, German:4.5%, French:3.9%, Russian:3.4%, Others:8.3%<br>" .
336 -# "3: <a href='charts/2010-06/Page-Views-Per-Project-Indexed.png'>New chart</a> for breakdown of traffic volume per project: many projects show peak traffic late 2009.<br>" .
337 -# "#4: Page requests are now normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" ;
338 -# push @rank, "5|Site Rank<br>#5: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
339 -# push @commons_files, "6|Commons Files<br>#6: Fastest riser (relatively speaking): ogg vorbis video, djvu (for scanned docs) also booming." ;
340 -# push @new_articles, "7|New Articles Per Day<br>7: Peak in April and May by massive activity on <a href='http://stats.wikimedia.org/EN/TablesWikipediaROA_RUP.htm'>Aromanian</a> and <a href='http://stats.wikimedia.org/EN/TablesWikipediaWAR.htm'>Waray-Waray</a> Wp's, each by single user.<br>" .
341 -# "&nbsp;&nbsp;&nbsp;&nbsp;In May 20% of all new articles were created on these two small wikis (April 7%, June 11%)" ;
342 -# push @edits, "9|Edits<br>9: For German,French and Polish Wikipedia dumps were not yet updated, reused data from previous month" ;
343 -# "Most Serbian Wikinews edits by (overactive?) weather bot that updates temp/wind speed every few seconds.<br>" .
344 -# "#13: Average monthly manual edits by registered users for all Wikipedia's combined, in millions<br>" .
345 -# "&nbsp;&nbsp;&nbsp;&nbsp;#2006 &rArr; 2010: &nbsp;&nbsp;7.7 &rArr; 9.9 &rArr; 11.5 &rArr; 12.4 &rArr; 12.7" ;
346 -# push @very_active_editors, "14|Very Active Editors<br>14: Ukrain +84% (61->112), Indonesian +180% (22->62) (during contest), Swedish -13% (141->122)." ;
347 -# }
348 -
349 -# if ($2010_05)
350 -# {
351 -# @visitors = qw ( 388,932,000 m 22.6 3.8 %) ; # Unique Visitors by Region
352 -# @page_requests = qw (11,250,000,000 b -1.0 -1.0 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
353 -# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
354 -# @commons_files = qw ( 6,765,082 m 51.9 3.1 %) ; # Binaries per month - Absolute
355 -# @article_count = qw ( 32,410,992 m 31.9 2.3 %) ; # Article count (official) - Absolute
356 -# @new_articles = qw ( 8,638 k 11.2 12.9 %) ; # New articles per day - Absolute
357 -# @edits = qw ( 12,119,403 m 11.6 0.0 %) ; # Edits per month - Absolute
358 -# @new_editors = qw ( 18,761 k -8.2 -8.1 %) ; # New editors - Absolute
359 -# @active_editors = qw ( 102,689 k 1.7 -1.8 %) ; # Active editors - Absolute
360 -# @very_active_editors = qw ( 13,124 k 3.4 -1.9 %) ; # Very active editors - Absolute
361 -# @reach = qw ( 31.3 x 3.0 0.9 %) ; # Reach Percentage by Region
362 -# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
363 -# push @visitors, "1,2|Unique Visitors<br>1: 375M UV's beats last month's record with 4M or 1.1 % (matches overall internet growth).<br>" .
364 -# "2: Wikimedia projects reach 30.4 % of internet population, which is best reach for last year<br>" .
365 -# "&nbsp;&nbsp;&nbsp;&nbsp;(avg 28.3%, low 25.7% in July 09). Reach in Asia lags behind other regions (15.9%)" ;
366 -# push @page_requests, "3,4|Page Requests<br>" .
367 -# "#3: Page requests are now normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" .
368 -# "4: Traffic to mobile site not yet included. <a href='http://stats.wikimedia.org/EN/TablesPageViewsMonthlyMobile.htm'>(June 154M:10700M=1.4% of total)</a><br>" .
369 -# "5: Page request trends on several projects are falling for 4th month, which deserves some further analysis" ;
370 -# push @rank, "6|Site Rank<br>#6: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
371 -# push @commons_files, "7|Commons Files<br>#8: Fastest riser (relatively speaking): ogg vorbis video, djvu (for scanned docs) also booming." ;
372 -# push @article_count, "8,9|Article Count<br>8: Serbian Wikinews: 5k->36k in a year, compare English Wikinews: 15k->17k<br>" .
373 -# "9: Seven Wiktionaries in top 25 Wikimedia projects" ;
374 -# push @new_articles, "10,11|New Articles Per Day<br>10: All wikinews project combined +240% (39->133 p/d), see below Serbian Wikinews<br>" .
375 -# "11:<a href='http://stats.wikimedia.org/EN/TablesWikipediaWAR.htm'>Waray-Waray Wikipedia</a> 2nd fastest grower with +610 mostly <a href='http://war.wikipedia.org/wiki/Obyce'>geo stubs</a> p/day by <a href='http://en.wikipedia.org/wiki/User:JinJian'>JinJian</a>" ;
376 -# push @edits, "12,13|Edits<br>12: 3 of 4 Wikinews monthly edits on Serbian Wikinews: 36k, English 5k, German/French 2k each<br>" .
377 -# "Most Serbian Wikinews edits by (overactive?) weather bot that updates temp/wind speed every few seconds.<br>" .
378 -# "#13: Average monthly manual edits by registered users for all Wikipedia's combined, in millions<br>" .
379 -# "&nbsp;&nbsp;&nbsp;&nbsp;#2006 &rArr; 2010: &nbsp;&nbsp;7.7 &rArr; 9.9 &rArr; 11.5 &rArr; 12.4 &rArr; 12.7" ;
380 -# push @very_active_editors, "14|Very Active Editors<br>14: Ukrain +84% (61->112), Indonesian +180% (22->62) (during contest), Swedish -13% (141->122)." ;
381 -# }
382 -
383 -
384 -# if ($2010_04)
385 -# {
386 -# @visitors = qw ( 374,846,000 m 17.1 1.1 %) ; # Unique Visitors by Region
387 -# @page_requests = qw (11,724,000,000 b +7.4 -0.1 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
388 -# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
389 -# @commons_files = qw ( 6,564,544 m 52.2 3.3 %) ; # Binaries per month - Absolute
390 -# @article_count = qw ( 32,410,992 m 31.9 2.3 %) ; # Article count (official) - Absolute
391 -# @new_articles = qw ( 8,638 k 11.2 12.9 %) ; # New articles per day - Absolute
392 -# @edits = qw ( 12,119,403 m 11.6 0.0 %) ; # Edits per month - Absolute
393 -# @new_editors = qw ( 18,761 k -8.2 -8.1 %) ; # New editors - Absolute
394 -# @active_editors = qw ( 102,689 k 1.7 -1.8 %) ; # Active editors - Absolute
395 -# @very_active_editors = qw ( 13,124 k 3.4 -1.9 %) ; # Very active editors - Absolute
396 -# @reach = qw ( 30.4 x 1.5 0.0 %) ; # Reach Percentage by Region
397 -# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
398 -# push @visitors, "1,2|Unique Visitors<br>1: 375M UV's beats last month's record with 4M or 1.1 % (matches overall internet growth).<br>" .
399 -# "2: Wikimedia projects reach 30.4 % of internet population, which is best reach for last year<br>" .
400 -# "&nbsp;&nbsp;&nbsp;&nbsp;(avg 28.3%, low 25.7% in July 09). Reach in Asia lags behind other regions (15.9%)" ;
401 -# push @page_requests, "3,4|Page Requests<br>" .
402 -# "#3: Page requests are now normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" .
403 -# "4: Traffic to mobile site not included. Expect this next month." ;
404 -# push @rank, "5|Site Rank<br>#5: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
405 -# push @commons_files, "6|Commons Files<br>6: Fastest relative growth: tiff images (723%), ogg vorbis video (446%)." ;
406 -# push @article_count, "7,8|Article Count<br>7: Serbian Wikinews: 5k->36k in a year, compare English Wikinews: 15k->17k<br>" .
407 -# "8: Seven Wiktionaries in top 25 Wikimedia projects" ;
408 -# push @new_articles, "9,10|New Articles Per Day<br>9: All wikinews project combined +240% (39->133 p/d), see below Serbian Wikinews<br>" .
409 -# "10:<a href='http://stats.wikimedia.org/EN/TablesWikipediaWAR.htm'>Waray-Waray Wikipedia</a> 2nd fastest grower with +610 mostly <a href='http://war.wikipedia.org/wiki/Obyce'>geo stubs</a> p/day by <a href='http://en.wikipedia.org/wiki/User:JinJian'>JinJian</a>" ;
410 -# push @edits, "11,12|Edits<br>11: 3 of 4 Wikinews monthly edits on Serbian Wikinews: 36k, English 5k, German/French 2k each<br>" .
411 -# "All Serbian Wikinews edits by weather bot that updates temp/wind speed every few seconds.<br>" .
412 -# "30 June 2010: report filed for <a href='http://en.wikinews.org/wiki/Wikinews:Admin_action_alerts'>runaway bot</a><br>" .
413 -# "#12: Average monthly manual edits by registered users for all Wikipedia's combined, in millions<br>" .
414 -# "&nbsp;&nbsp;&nbsp;&nbsp;#2006 &rArr; 2010: &nbsp;&nbsp;7.7 &rArr; 9.9 &rArr; 11.5 &rArr; 12.4 &rArr; 12.7" ;
415 -# push @very_active_editors, "13|Very Active Editors<br>13: Ukrain +84% (61->112), Indonesian +180% (22->62) (during contest), Swedish -13% (141->122)." ;
416 -# }
417 -
418 -# if ($2010_03)
419 -# {
420 -# @visitors = qw ( 370,744,000 m 13.3 7.4 %) ; # Unique Visitors by Region
421 -# @page_requests = qw (11,730,000,000 b +0.3 0.0 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
422 -# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
423 -# @commons_files = qw ( 6,209,569 m 58.3 2.6 %) ; # Binaries per month - Absolute
424 -# @article_count = qw ( 30,349,860 m 34.0 1.9 %) ; # Article count (official) - Absolute
425 -# @new_articles = qw ( 7,567 k -5.7 -0.4 %) ; # New articles per day - Absolute
426 -# @edits = qw ( 11,462,106 m 7.1 -3.2 %) ; # Edits per month - Absolute
427 -# @new_editors = qw ( 18,362 k -11.5 -10.8 %) ; # New editors - Absolute
428 -# @active_editors = qw ( 101,730 k 1.5 -4.6 %) ; # Active editors - Absolute
429 -# @very_active_editors = qw ( 12,983 k 5.6 -5.4 %) ; # Very active editors - Absolute
430 -# @reach = qw ( 30.4 x 0.5 1.7 %) ; # Reach Percentage by Region
431 -# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
432 -# push @visitors, "1,2|Unique Visitors<br>1: March has 3 more (11%) more days than February<br>" .
433 -# "&nbsp;&nbsp;&nbsp;&nbsp;This will explain much of apparently large monthly growth in visitors<br>" .
434 -# "2: All regions same of more unique visitors than year ago. North Am. +25%, Latin Am. + 27%" ;
435 -# push @page_requests, "3|Page Requests<br>" .
436 -# "3: Page requests are now normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" .
437 -# "&nbsp;&nbsp;&nbsp;&nbsp;This way monthly changes are more meaningful<br>" .
438 -# "&nbsp;&nbsp;&nbsp;&nbsp;Difference with not normalized data is mainly visible in Jan&rArr;Feb and Feb&rArr;Mar" ;
439 -# push @rank, "4|Site Rank<br>#4: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
440 -# push @commons_files, "5|Commons Files<br>#5: Fastest riser (relatively speaking): ogg vorbis video, djvu (for scanned docs) also booming." ;
441 -# push @article_count, "6|Article Count<br>#6: 60% growth in Commons files in one year, English and French wiktionaries +36% through bots." ;
442 -# push @edits, "7|Edits<br>#7: Average monthly manual edits by registered users for all Wikipedia's combined, in millions<br>" .
443 -# "&nbsp;&nbsp;&nbsp;&nbsp;#2006 &rArr; 2010: &nbsp;&nbsp;7.7 &rArr; 9.9 &rArr; 11.5 &rArr; 12.4 &rArr; 12.7" ;
444 -# push @new_editors, "9|New Editors<br>#9: Most mature Wikipedia's see least growth in editors. Largest influx: Russian / Commons<p>" .
445 -# push @active_editors, "10|Active Editors<br>10: Russian editor base still growing steeply: +30% editors in one year." ;
446 -# }
447 -
448 -# if ($2010_02)
449 -# {
450 -# @visitors = qw ( 345,218,000 m 14.8 -5.3 %) ; # Unique Visitors by Region
451 -# @page_requests = qw (11,081,000,000 b +5.8 0.0 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
452 -# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
453 -# @commons_files = qw ( 6,209,569 m 58.3 2.6 %) ; # Binaries per month - Absolute
454 -# @article_count = qw ( 30,349,860 m 34.0 1.9 %) ; # Article count (official) - Absolute
455 -# @new_articles = qw ( 7,567 k -5.7 -0.4 %) ; # New articles per day - Absolute
456 -# @edits = qw ( 11,462,106 m 7.1 -3.2 %) ; # Edits per month - Absolute
457 -# @new_editors = qw ( 18,362 k -11.5 -10.8 %) ; # New editors - Absolute
458 -# @active_editors = qw ( 101,730 k 1.5 -4.6 %) ; # Active editors - Absolute
459 -# @very_active_editors = qw ( 12,983 k 5.6 -5.4 %) ; # Very active editors - Absolute
460 -# @reach = qw ( 28.7 x 0.8 -0.8 %) ; # Reach Percentage by Region
461 -# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
462 -# push @visitors, "1|Unique Visitors<br>1: comScore reassesses online population in their target segments twice a year (Feb & Aug)<br>" .
463 -# "&nbsp;&nbsp;&nbsp;&nbsp;This time estimate for Indonesia, Philippines and Vietnam was lowered by -54%,<br>" .
464 -# "&nbsp;&nbsp;&nbsp;&nbsp;resulting in a worldwide reassesment of online population of -4%" ;
465 -# push @page_requests, "2,3|Page Requests<br>" .
466 -# "2:Corrected for length of months Jan -> Feb increase was actually +11.0% !<br>" .
467 -# "3:Russia maintains its steep growth: +57% in last 12 months, +137% in preceding 12 months<br>" .
468 -# "&nbsp;&nbsp;&nbsp;&nbsp;Indonesia is 2nd, and speeding up: +46% in last 12 months, +34% before that<br>" .
469 -# "#&nbsp;&nbsp;&nbsp;&nbsp;German decline (-10%) is still atypical (caused by spike year ago after court decision)" ;
470 -# push @rank, "5|Site Rank<br>#5: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
471 -# push @commons_files, "6|Commons Files<br>#6: Fastest riser (relatively speaking): ogg vorbis video, djvu (for scanned docs) also booming." ;
472 -# push @article_count, "7|Article Count<br>#7: 60% growth in Commons files in one year, English and French wiktionaries +36% through bots." ;
473 -# push @edits, "8|Edits<br>8: Average monthly manual edits by registered users for all Wikipedia's combined, in millions<br>" .
474 -# "&nbsp;&nbsp;&nbsp;&nbsp;2006 &rArr; 2010: &nbsp;&nbsp;7.7 &rArr; 9.9 &rArr; 11.5 &rArr; 12.4 &rArr; 12.7" ;
475 -# }
476 -
477 -# if ($2009_??)
478 -# {
479 -# @visitors = qw ( 364,719,000 m 25.8 5.1 %) ; # Unique Visitors by Region
480 -# @page_requests = qw (11,054,000,000 b -3.1 6.4 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
481 -# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
482 -# @commons_files = qw ( 6,058,601 m 59.5 6.5 %) ; # Binaries per month - Absolute
483 -# @article_count = qw ( 29,742,993 m 34.7 2.4 %) ; # Article count (official) - Absolute
484 -# @new_articles = qw ( 7,626 k -1.1 3.4 %) ; # New articles per day - Absolute
485 -# @edits = qw ( 12,251,152 m 4.8 9.0 %) ; # Edits per month - Absolute
486 -# @new_editors = qw ( 19,279 k -12.4 5.6 %) ; # New editors - Absolute
487 -# @active_editors = qw ( 98,597 k -1.4 5.0 %) ; # Active editors - Absolute
488 -# @very_active_editors = qw ( 12,488 k -1.1 6.3 %) ; # Very active editors - Absolute
489 -# @reach = qw ( 29.0 x 1.0 1.0 %) ; # Reach Percentage by Region
490 -# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
491 -# push @visitors, "1,2|Unique Visitors<br>#1: Yearly growth in UV's (26%) exceeds growth of total internet (21%).<br>" .
492 -# "2: Large monthly shifts in UV/Reach in 3rd world explained by comScore as seasonal influences:<br>&nbsp;&nbsp;&nbsp;&nbsp;school vacations, and large festivals, religious (e.g. Ramadan) or otherwise (e.g. Carnival)." ;
493 -# push @page_requests, "3,4|Page Requests<br>3:<b> Trends measured by comScore and internal measurements diverge somewhat.</b><br>&nbsp;&nbsp;&nbsp;&nbsp;<b>Possible causes are under investigation.</b><p>" .
494 -# "4:Fastest rising large Wikipedia's in last 12 months:<br>" .
495 -# "&nbsp;&nbsp;&nbsp;&nbsp;Vietnamese (87%), Ukranian (65%), Russian (45%), Indonesian (39%), Chinese (28%), Thai (23%)<br>" .
496 -# "&nbsp;&nbsp;&nbsp;&nbsp;German decline (-32%) is atypical (caused by short massive spike year ago due after court decision)" ;
497 -# push @rank, "5|Site Rank<br>#5: 5th position will be stable for long time: differences with 4th and 6th ranked properties are considerable." ;
498 -# push @commons_files, "6|Commons Files<br>#6: Fastest riser (relatively speaking): ogg vorbis video, djvu (for scanned docs) also booming." ;
499 -# push @article_count, "7|Article Count<br>#7: 60% growth in Commons files in one year. Wiktionaries exploding through bots." ;
500 -# push @edits, "8|Edits<br>#8: <a href='http://stats.wikimedia.org/EN/TablesWikipediaZZ.htm'>#Monthly edits for all Wikipedia's combined</a># remarkably stable between 10 and 12 million<br>#&nbsp;&nbsp;&nbsp;&nbsp;for 3 years now (as is the case for active and very active editors)" ;
501 -# push @new_editors, "9|New Editors<br>#9: Most mature Wikipedia's see least growth in editors. Largest influx: Russian / Commons<p>" .
502 -# "Experiment: logarithmic chart now uses two scales for widely divergent values.<br>This helps to remove clutter, but may need some getting used to." ;
503 -
504 -# push @active_editors, "10|Active Editors<br>10: Russian editor base still growing steeply: +30% editors in one year." ;
505 -# }
506 -
507 -# if ($2009_??)
508 -# {
509 -# @visitors = qw ( 347,019,000 m 27.1 0.4 %) ; # Unique Visitors by Region
510 -# @page_requests = qw (10,389,000,000 b 0.0 -9.2 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma)
511 -# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
512 -# @commons_files = qw ( 5,695,283 m 55.1 2.6 %) ; # Binaries per month - Absolute
513 -# @article_count = qw ( 29,016,248 m 34.3 2.1 %) ; # Article count (official) - Absolute
514 -# @new_articles = qw ( 7,457 k 7.7 2.6 %) ; # New articles per day - Absolute
515 -# @edits = qw ( 10,791,575 m 0.6 0.4 %) ; # Edits per month - Absolute
516 -# @new_editors = qw ( 18,597 k -6.3 -2.4 %) ; # New editors - Absolute
517 -# @active_editors = qw ( 95,849 k 3.8 -0.4 %) ; # Active editors - Absolute
518 -# @very_active_editors = qw ( 11,764 k 0.4 -0.5 %) ; # Very active editors - Absolute
519 -# @reach = qw ( 28.7 x 1.6 -0.0 %) ; # Reach Percentage by Region
520 -# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
521 -# push @visitors, "1,2|Unique Visitors<br>1: Yearly growth in UV's (27%) exceeds growth of total internet (21%).<br>" .
522 -# "2: Conversation with comScore on huge monthly shifts in UV/Reach in 3rd world continues." ;
523 -# push @page_requests, "3|Page Requests<br>3: Same as last year: dip in page requests (but spike in image requests)." ;
524 -# push @rank, "4|Site Rank<br>4: 5th position will be stable for long time: 4th has 35% more UV's, 6th 23% less." ;
525 -# push @commons_files, "5|Commons Files<br>5: Fastest riser (relatively speaking): ogg vorbis video, djvu (for scanned docs) also booming." ;
526 -# push @article_count, "6|Article Count<br>6: 60% growth in Commons files in one year. Wiktionaries exploding through bots." ;
527 -# push @new_articles, "7|New Articles<br>7: Russian consistently fast riser, Ukranian growth 40% of previous months" ;
528 -# push @edits, "8|Edits<br>8: <a href='http://stats.wikimedia.org/EN/TablesWikipediaZZ.htm'>Monthly edits for all Wikipedia's combined</a> remarkably stable between 10 and 12 million<br>for 3 years now (as is the case for active and very active editors)" ;
529 -# push @new_editors, "9|New Editors<br>9: Most mature Wikipedia's see least growth in editors. Largest influx: Russian / Commons" ;
530 -# }
531 -
532 -# if ($2009_10)
533 -# {
534 -# @visitors = qw ( 345,805,000 m 23.1 0.4 %) ; # Unique Visitors by Region
535 -# @page_requests = qw (11,257,000,000 b 7.7 -2.8 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma)
536 -# @rank = qw ( 5th x -1 0 th) ; # Web Properties - Unique Visitors
537 -# @commons_files = qw ( 5,558,644 m 59.7 3.4 %) ; # Binaries per month - Absolute
538 -# @article_count = qw ( 28,506,011 m 35.4 2.5 %) ; # Article count (official) - Absolute
539 -# @new_articles = qw ( 7,357 k 2.1 -6.1 %) ; # New articles per day - Absolute
540 -# @edits = qw ( 10,772,957 m 2.8 -3.4 %) ; # Edits per month - Absolute
541 -# @new_editors = qw ( 18,779 k -5.2 -4.5 %) ; # New editors - Absolute
542 -# @active_editors = qw ( 96,521 k 4.0 0.1 %) ; # Active editors - Absolute
543 -# @very_active_editors = qw ( 11,726 k 2.7 -3.4 %) ; # Very active editors - Absolute
544 -
545 -# @reach = qw ( 28.7 x 0.5 -0.3 %) ; # Reach Percentage by Region
546 - # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
547 -# push @visitors, "1|1: asked comScore to explain huge shifts in UV/Reach in Middle East-Africa." ;
548 -# push @page_requests, "2|2: Capacity problems may have played a role. New servers ordered." ;
549 -# push @new_articles, "2,3|3: Ukranian Wikipedia fastest riser (compare edits for Russian)" ;
550 -# push @edits, "4|4: Russian Wikipedia fastest riser (compare new articles for Ukrain)" ;
551 -# push @very_active_editors, "2" ;
552 -# }
553 -
554 -# if ($2009_10)
555 -# {
556 -# @new_editors = qw ( 19,002 k -8.9 3.2 %) ;
557 -# @active_editors = qw ( 97,132 k 1.9 3.4 %) ;
558 -# @very_active_editors = qw ( 12,172 k 2.8 1.2 %) ;
559 -# @article_count = qw ( 27,852,471 m 35.6 2.8 %) ;
560 -# @new_articles = qw ( 8,050 k 11.2 5.9 %) ;
561 -# @edits = qw ( 11,188,080 m -1.8 1.7 %) ;
562 -# @commons_files = qw ( 5,539,645 m 60.3 5.5 %) ;
563 -# @rank = qw ( 5th x -1 0 th) ;
564 -# @visitors = qw ( 344,563,000 m 24.3 5.7 %) ;
565 -# @reach = qw ( 29.0 x 0.8 1.3 %) ;
566 -# @page_requests = qw (11,586,000,000 b 8.8 1.9 %) ;
567 -# }
568 -
569 -# if ($2009_09)
570 -# {
571 -# @new_editors = qw ( 17,792 k -8.7 -9.6 %) ;
572 -# @active_editors = qw ( 94,565 k 2.3 -2.5 %) ;
573 -# @very_active_editors = qw ( 12,069 k 3.6 -2.5 %) ;
574 -# @article_count = qw ( 27,120,974 m 36.6 2.0 %) ;
575 -# @new_articles = qw ( 12,907 k -0.3 -11.4 %) ;
576 -# @edits = qw ( 12,578,009 m 8.8 -9.0 %) ;
577 -# @commons_files = qw ( 5,115,042 m 57.4 2.7 %) ;
578 -# @rank = qw ( 5th x 0 0 th) ;
579 -# @visitors = qw ( 325,998,000 m 19.8 6.0 %) ;
580 -# @reach = qw ( 27.6 x -1.4 4.5 %) ;
581 -# @page_requests = qw (11,372,000,000 b 11.7 5.1 %) ;
582 -# }
583 -
584 -# if ($2009_08)
585 -# {
586 -# @new_editors = qw ( 17,998 k -9.4 -6.2 %) ;
587 -# @active_editors = qw ( 91,359 k 1.1 0.8 %) ;
588 -# @very_active_editors = qw ( 11,568 k 0.3 3.0 %) ;
589 -# @article_count = qw ( 21,143,943 m 29.9 2.0 %) ;
590 -# @new_articles = qw ( 13,174 k 8.1 11.4 %) ;
591 -# @edits = qw ( 12,807,952 m 8.4 4.8 %) ;
592 -# @commons_files = qw ( 4,996,023 m 60.2 3.6 %) ;
593 -# @rank = qw ( 5th x 0 0 th) ;
594 -# @visitors = qw ( 307,641,000 m 23.8 4.1 %) ;
595 -# @reach = qw ( 26.4 x 1.9 2.7 %) ;
596 -# @page_requests = qw (10,817,000,000 b 15.3 1.5 %) ;
597 -# }
598 -
599 -# if ($2009_07)
600 -# {
601 -# @new_editors = qw ( 18,916 k -8.5 -1 %) ;
602 -# @active_editors = qw ( 90,659 k -0.3 -0.6 %) ;
603 -# @very_active_editors = qw ( 11,242 k -2.4 -0.7 %) ;
604 -# @article_count = qw ( 20,768,108 m 30.2 0.8 %) ;
605 -# @new_articles = qw ( 11,888 k -18.9 -30.3 %) ;
606 -# @edits = qw ( 12,219,008 m 6.3 0.7 %) ;
607 -# @commons_files = qw ( 4,831,659 m 61.1 3.7 %) ;
608 -# @rank = qw ( 5th x 0 0 th) ;
609 -# @visitors = qw ( 295,848,000 m 20.9 -2.5 %) ;
610 -# @reach = qw ( 25.7 x 0 -3.7 %) ;
611 -# @page_requests = qw (10,700,000,000 b 12.9 -3.0 %) ;
612 -# }
613 -
614 - $synopsis = "Y: " . substr ($p_month_name,0,3) . ",$p_year_prev->$p_year k=thousand m=million b=billion\n" ;
615 - $synopsis .= "M: $p_year," . substr ($p_month_name_prev,0,3) . "->" . substr ($p_month_name,0,3) . " M=monthly D=daily T=Total\n\n" ;
616 -
617 - $synopsis .= &FormatSynopsisText ("M Unique Visitors, All Projects", "", @visitors) ;
618 - $synopsis .= &FormatSynopsisText ("M Page Views, All Projects", "", @page_requests) ;
619 - $synopsis .= &FormatSynopsisText (" Site Rank", "", @rank) ;
620 - $synopsis .= &FormatSynopsisText ("T Binary Files", "", @commons_files) ;
621 - $synopsis .= &FormatSynopsisText ("M Wikipedia Article Count", "", @article_count) ;
622 - $synopsis .= &FormatSynopsisText ("D New Wikipedia Articles", "", @new_articles) ;
623 - $synopsis .= &FormatSynopsisText ("M Wikipedia Edits per Month", "", @edits) ;
624 - $synopsis .= &FormatSynopsisText ("M New Wikipedia Editors", "", @new_editors) ;
625 - $synopsis .= &FormatSynopsisText ("M Active Wikipedia Editors", "", @active_editors) ;
626 - $synopsis .= &FormatSynopsisText ("M Very Active Wikipedia Ed.", "", @very_active_editors) ;
627 -
628 - print "\n\n$synopsis" ;
629 - print "\n"."="x80 . "\n\n" ;
630 -
631 - @visitors_ = @visitors ;
632 - @page_requests_ = @page_requests ;
633 - @rank_ = @rank ;
634 - @commons_files_ = @commons_files ;
635 - @article_count_ = @article_count ;
636 - @new_articles_ = @new_articles ;
637 - @edits_ = @edits ;
638 - @new_editors_ = @new_editors ;
639 - @active_editors_ = @active_editors ;
640 - @very_active_editors_ = @very_active_editors ;
641 - @reach_ = @reach ;
642 -
643 - $visitors [0] =~ s/,//g ;
644 - $new_editors [0] =~ s/,//g ;
645 - $active_editors [0] =~ s/,//g ;
646 - $very_active_editors [0] =~ s/,//g ;
647 - $article_count [0] =~ s/,//g ;
648 - $new_articles [0] =~ s/,//g ;
649 - $edits [0] =~ s/,//g ;
650 - $commons_files [0] =~ s/,//g ;
651 - $rank [0] =~ s/,//g ;
652 - $reach [0] =~ s/,//g ;
653 - $page_requests [0] =~ s/,//g ;
654 -
655 - $visitors [0] = sprintf ("%.0f",$visitors [0]/1000000) ;
656 - $article_count [0] = sprintf ("%.1f",$article_count [0]/1000000) ;
657 - $edits [0] = sprintf ("%.1f",$edits [0]/1000000) ;
658 - $commons_files [0] = sprintf ("%.1f",$commons_files [0]/1000000) ;
659 - $page_requests [0] = sprintf ("%.1f",$page_requests [0]/1000000000) ;
660 -
661 - $new_editors [0] =~ s/(\d\d\d)$/,$1/ ;
662 - $active_editors [0] =~ s/(\d\d\d)$/,$1/ ;
663 - $very_active_editors [0] =~ s/(\d\d\d)$/,$1/ ;
664 - $new_articles [0] =~ s/(\d\d\d)$/,$1/ ;
665 -
666 - $visitors [2] = sprintf ("%.1f", $visitors [2]) ;
667 - $visitors [3] = sprintf ("%.1f", $visitors [3]) ;
668 - $visitors [5] =~ ($visitors [2] >= 0) ? 'A' : 'E' ;
669 - $visitors [6] =~ ($visitors [3] >= 0) ? 'A' : 'E' ;
670 -
671 - $page_requests [2] = sprintf ("%.1f", $page_requests [2]) ;
672 - $page_requests [3] = sprintf ("%.1f", $page_requests [3]) ;
673 - $new_editors [2] = sprintf ("%.1f", $new_editors [2]) ;
674 - $new_editors [3] = sprintf ("%.1f", $new_editors [3]) ;
675 -# $active_editors [2] = sprintf ("%.1f", $active_editors [2]) ;
676 -# $active_editors [3] = sprintf ("%.1f", $active_editors [3]) ;
677 - $very_active_editors [2] = sprintf ("%.1f", $very_active_editors [2]) ;
678 - $very_active_editors [3] = sprintf ("%.1f", $very_active_editors [3]) ;
679 -# $article_count [2] = sprintf ("%.1f", $article_count [2]) ;
680 -# $article_count [3] = sprintf ("%.1f", $article_count [3]) ;
681 - $new_articles [2] = sprintf ("%.1f", $new_articles [2]) ;
682 - $new_articles [3] = sprintf ("%.1f", $new_articles [3]) ;
683 - $edits [2] = sprintf ("%.1f", $edits [2]) ;
684 - $edits [3] = sprintf ("%.1f", $edits [3]) ;
685 - $commons_files [2] = sprintf ("%.1f", $commons_files [2]) ;
686 - $commons_files [3] = sprintf ("%.1f", $commons_files [3]) ;
687 - $rank [2] = sprintf ("%.0f", $rank [2]) ;
688 - $rank [3] = sprintf ("%.0f", $rank [3]) ;
689 - $reach [2] = sprintf ("%.1f", $reach [2]) ;
690 - $reach [3] = sprintf ("%.1f", $reach [3]) ;
691 - $page_requests [2] = sprintf ("%.1f", $page_requests [2]) ;
692 - $page_requests [3] = sprintf ("%.1f", $page_requests [3]) ;
693 -
694 - for ($i = 0 ; $i <= 3 ; $i++)
695 - {
696 - $visitors [$i] = '...' if $visitors_ [$i] eq '?' ;
697 - $page_requests [$i] = '...' if $page_requests_ [$i] eq '?' ;
698 - $rank [$i] = '...' if $rank_ [$i] eq '?' ;
699 - $commons_files [$i] = '...' if $commons_files_ [$i] eq '?' ;
700 - $article_count [$i] = '...' if $article_count_ [$i] eq '?' ;
701 - $new_articles [$i] = '...' if $new_articles_ [$i] eq '?' ;
702 - $edits [$i] = '...' if $edits_ [$i] eq '?' ;
703 - $new_editors [$i] = '...' if $new_editors_ [$i] eq '?' ;
704 - $active_editors [$i] = '...' if $active_editors_ [$i] eq '?' ;
705 - $very_active_editors [$i] = '...' if $very_active_editors_ [$i] eq '?' ;
706 - $reach [$i] = '...' if $reach_ [$i] eq '?' ;
707 - }
708 -
709 - $path_input = "W:/@ Report Card/Input/" ;
710 - $path_public = "W:/@ Report Card/Public/" ;
711 - $path_private = "W:/@ Report Card/Extended/" ; # few more charts with top 10 web properties based on data from comScore (slightly confidential)
712 -
713 - &WriteReports ($path_input, $path_public, $public) ;
714 - &WriteReports ($path_input, $path_private, $private) ;
715 -
716 - print "\nReady\n\n" ;
717 - exit ;
718 -
719 -sub WriteReports
720 -{
721 - $path_in = shift ;
722 - $path_out = shift ;
723 - $target_audience = shift ;
724 -
725 - &WriteSynopsis ($path_out) ;
726 -
727 - open TEMPLATE, '<', "RT_yyyy_mm.html" ;
728 - open DETAILS, '>', "$path_out/RC_${p_year}_${p_month_d2}_detailed.html" ;
729 - open SUMMARY, '>', "$path_out/RC_${p_year}_${p_month_d2}_summary.html" ;
730 - open COLUMNS, '>', "$path_out/RC_${p_year}_${p_month_d2}_columns.html" ;
731 -
732 -
733 - $write_details = $true ;
734 - $write_summary = $true ;
735 - $write_columns = $true ;
736 -
737 - $write_public = $true ;
738 - $write_private = $true ;
739 -
740 - $iscomment = $false ;
741 -
742 - while ($line = <TEMPLATE>)
743 - {
744 - chomp $line ;
745 -
746 - $line =~ s/<!--.*?-->// ;
747 -# if ($line =~ /<!--/)
748 -# {
749 -# $iscomment = $true ;
750 -# $line =~ s/<!--.*$// ;
751 -# }
752 -# if ($line =~ /-->/)
753 -# {
754 -# $iscomment = $false ;
755 -# $line =~ s/^.*?-->// ;
756 -# }
757 -# if ($iscomment)
758 -# { $line = "<!-- {{$line}} -->" ; }
759 -
760 - if ($line =~ /\{\{yyyy\}\}_\{\{mm[+-]1\}\}/)
761 - {
762 - if ($p_month == 1)
763 - { $line =~ s/\{\{yyyy\}\}_\{\{mm\-1\}\}/{{yyyy-1}}_{{mm-1}}/ ; } # Q&D temp fix
764 - if ($p_month == 12)
765 - { $line =~ s/\{\{yyyy\}\}_\{\{mm\+1\}\}/{{yyyy+1}}_{{mm+1}}/ ; } # Q&D temp fix
766 - }
767 -
768 - # $no_upd = "<font color=#800000>*<\/font>" ;
769 -
770 - if ($true) # test ?
771 - {
772 - # $no_upd = "&nbsp;&nbsp;<small><small><font color=#FF0000><b>chart could not be updated for current month</b></font></small></small>" ;
773 - $line =~ s/H2 (UNIQUE VISITORS)/A[$1] H2 {${visitors [0]} million|Unique Visitors, All Projects}/ ;
774 - $line =~ s/H2 (PAGE REQUESTS)/A[$1] H2 {${page_requests[0]} billion|Page Requests, All Projects}/ ;
775 - $line =~ s/H2 (WEB PROPERTIES)/A[$1] H2 {${rank[0]} in rank|Web Properties - Unique Visitors}/ ;
776 - $line =~ s/H2 (COMMONS FILES)/A[$1] H2 {${commons_files[0]} million|Binary Files $no_upd}/ ;
777 - $line =~ s/H2 (ARTICLE COUNT)/A[$1] H2 {${article_count[0]} million|Wikipedia Articles, Comparison with Other Projects $no_upd}/ ;
778 - $line =~ s/H2 (ARTICLES PER DAY)/A[$1] H2 {${new_articles[0]}|New Wikipedia Articles Per Day $no_upd}/ ;
779 - $line =~ s/H2 (EDITS PER MONTH)/A[$1] H2 {${edits[0]} million|Wikipedia Edits Per Month $no_upd}/ ;
780 - $line =~ s/H2 (NEW EDITORS PER MONTH)/A[$1] H2 {${new_editors[0]}|New Wikipedia Editors Per Month $no_upd}/ ;
781 - $line =~ s/H2 (ACTIVE EDITORS)/A[$1] H2 {${active_editors[0]}|Active Wikipedia Editors (5+ edits per month) $no_upd}/ ;
782 - $line =~ s/H2 (VERY ACTIVE EDITORS)/A[$1] H2 {${very_active_editors[0]}|Very Active Wikipedia Editors (100+ edits per month) $no_upd}/ ;
783 -
784 - $line =~ s/TRENDS UNIQUE VISITORS/TRENDS {$trend_one_year|${visitors[2]}%}{$trend_one_month|${visitors[3]}%}/ ;
785 - $line =~ s/TRENDS PAGE REQUESTS/TRENDS {$trend_one_year|${page_requests[2]}%}{$trend_one_month|${page_requests[3]}%}/ ;
786 - $line =~ s/TRENDS WEB PROPERTIES/TRENDS {$trend_one_year|${rank[2]}}{$trend_one_month|${rank[3]}}/ ;
787 - $line =~ s/TRENDS COMMONS FILES/TRENDS {$trend_one_year|${commons_files[2]}%}{$trend_one_month|${commons_files[3]}%}/ ;
788 - $line =~ s/TRENDS ARTICLE COUNT/TRENDS {$trend_one_year|${article_count[2]}%}{$trend_one_month|${article_count[3]}%}/ ;
789 - $line =~ s/TRENDS ARTICLES PER DAY/TRENDS {$trend_one_year|${new_articles[2]}%}{$trend_one_month|${new_articles[3]}%}/ ;
790 - $line =~ s/TRENDS EDITS PER MONTH/TRENDS {$trend_one_year|${edits[2]}%}{$trend_one_month|${edits[3]}%}/ ;
791 - $line =~ s/TRENDS NEW EDITORS PER MONTH/TRENDS {$trend_one_year|${new_editors[2]}%}{$trend_one_month|${new_editors[3]}%}/ ;
792 - $line =~ s/TRENDS ACTIVE EDITORS/TRENDS {$trend_one_year|${active_editors[2]}%}{$trend_one_month|${active_editors[3]}%}/ ;
793 - $line =~ s/TRENDS VERY ACTIVE EDITORS/TRENDS {$trend_one_year|${very_active_editors[2]}%}{$trend_one_month|${very_active_editors[3]}%}/ ;
794 -
795 - $line =~ s/{{yyyy}}/$p_year/g ;
796 - $line =~ s/{{yyyy\-1}}/$p_year_prev/g ;
797 - $line =~ s/{{yyyy\+1}}/$p_year_next/g ;
798 - $line =~ s/{{yyyy\+m2}}/$p_year_plus_m2/g ;
799 - $line =~ s/{{month}}/$p_month_name/g ;
800 - $line =~ s/{{month\-1}}/$p_month_name_prev/g ;
801 - $line =~ s/{{month\+1}}/$p_month_name_next/g ;
802 - $line =~ s/{{month\+2}}/$p_month_name_next2/g ;
803 -
804 - $line =~ s/{{y}}/$p_year_short/g ;
805 - $line =~ s/{{y\-1}}/$p_year_prev_short/g ;
806 - $line =~ s/{{yy}}/$p_year_short_d2/g ;
807 - $line =~ s/{{yy\-1}}/$p_year_prev_short_d2/g ;
808 -
809 - $line =~ s/{{m}}/$p_month/g ;
810 - $line =~ s/{{m\-1}}/$p_month_prev/g ;
811 - $line =~ s/{{mm}}/$p_month_d2/g ;
812 - $line =~ s/{{mm-1}}/$p_month_prev_d2/g ;
813 - $line =~ s/{{mm\+1}}/$p_month_next_d2/g ;
814 -
815 - $line =~ s/{{\(mm\/yy\)-1}}/$p_year_month_m1/g ;
816 - }
817 - else
818 - {
819 - $line =~ s/{{yyyy}}/[[yyyy]]/g ;
820 - $line =~ s/{{yyyy-1}}/[[yyyy-1]]/g ;
821 - $line =~ s/{{yyyy\+m2}}/[[yyyy\+m2]]/g ;
822 - $line =~ s/{{month}}/[[month]]/g ;
823 - $line =~ s/{{month-1}}/[[month-1]]/g ;
824 - $line =~ s/{{month\+1}}/[[month\+1]]/g ;
825 - $line =~ s/{{month\+2}}/[[month\+2]]/g ;
826 -
827 - $line =~ s/{{y}}/y/g ;
828 - $line =~ s/{{y-1}}/y-1/g ;
829 - $line =~ s/{{m}}/m/g ;
830 - $line =~ s/{{m-1}}/m-1/g ;
831 - $line =~ s/{{mm}}/mm/g ;
832 - $line =~ s/{{mm-1}}/mm-1/g ;
833 - $line =~ s/{{mm\+}}/mm+1/g ;
834 -
835 - $line =~ s/{{\(mm\/yy\)-1}}/(mm\/yy)-1/g ;
836 - }
837 -
838 - if ($line =~ /<!==\s*COMMENT\s*\{[^\}]*\}\s*==>/)
839 - {
840 - $comment = $line ;
841 - $comment =~ s/^.*?\{// ;
842 - $comment =~ s/\}.*$// ;
843 - $line = " <span class=comment>$comment</span\n" ;
844 - }
845 -
846 - if ($line =~ /<!==\s*H1\s*\{[^\}]*\}\s*==>/)
847 - {
848 - $title = $line ;
849 - $title =~ s/^.*?\{// ;
850 - $title =~ s/\}.*$// ;
851 - $line = " <tr>\n" .
852 - " <td class=h1 colspan=99><span class=h9>$title</span></td>\n" .
853 - " </tr>\n" .
854 - " <tr>\n" .
855 - " <td><small><small>&nbsp;</small></small></td>\n" .
856 - " </tr>\n" ;
857 - }
858 -
859 - if ($line =~ /<!==\s*A\[[^\]]*\] H2\s*\{[^\}]*\}\s*==>/)
860 - {
861 - ($anchor = $line) ;
862 - $anchor =~ s/^.*?A\[// ;
863 - $anchor =~ s/\].*$// ;
864 - $anchor =~ s/\s/_/g ;
865 - $anchor = lc($anchor) ;
866 -
867 - $parms = $line ;
868 - $parms =~ s/^.*?\{// ;
869 - $parms =~ s/\}.*$// ;
870 - ($metric,$title) = split ('\|', $parms,2) ;
871 - ($title2 = $title) =~ s/ /_/g ;
872 - $line = " <tr>\n" .
873 - " <td class=score><a id='$anchor' name='$anchor'></a><span class=bg>$metric</sup></span></td>\n" .
874 - " <td class=h2><span class=h2>$title</span><br></td>\n" .
875 - "</tr>\n" ;
876 - }
877 -
878 - if ($line =~ /<!==\s*TABS\s*\{[^\}]*\}\s*==>/)
879 - {
880 - $parms = $line ;
881 - $parms =~ s/^.*?\{// ;
882 - $parms =~ s/\}.*$// ;
883 - ($id,@texts) = split ('\|', $parms) ;
884 - $line = " <div id=\"container-" . ($id/10) . "\">\n" ;
885 - $line .= " <ul>\n" ;
886 - foreach $text (@texts)
887 - {
888 - $id++ ;
889 - $line .= " <li><a href=\"#fragment-$id\"><span>$text</span></a></li>\n" ;
890 - }
891 - $line .= " </ul>\n" ;
892 - $id_hi = $id ;
893 - }
894 -
895 - if ($line =~ /<!==\s*TAB\s*\{[^\}]*\}\s*==>/)
896 - {
897 - $parms = $line ;
898 - $parms =~ s/^.*?\{// ;
899 - $parms =~ s/\}.*$// ;
900 - ($id,$text) = split ('\|', $parms) ;
901 -
902 - if ($text =~ /^START/i)
903 - {
904 - $line = "\n <div id=\"fragment-$id\">\n" ;
905 - }
906 - elsif ($text =~ /^END/i)
907 - {
908 - if ($id == $id_hi)
909 - { $line = " </div>" ; }
910 - }
911 - else
912 - {
913 - $line = "\n <div id=\"fragment-$id\">\n $text\n </div>\n" ;
914 - if ($id == $id_hi)
915 - { $line .= " </div>" ; }
916 - }
917 - }
918 -
919 - if ($line =~ /<!==\s*TRENDS\s*\{[^\}]*\}\{[^\}]*\}\s*==>/)
920 - {
921 - $parms = $line ;
922 - $parms =~ s/^[^\{]*\{// ;
923 - $parms =~ s/\}[^\}]*$// ;
924 - ($trendY,$trendM) = split ('\}\s*\{', $parms,2) ;
925 -
926 - # ($colorY,$month1Y,$month2Y,$trendY) = split ('\|',$trendY) ;
927 - # ($colorM,$month1M,$month2M,$trendM) = split ('\|',$trendM) ;
928 - ($month1Y,$month2Y,$trendY) = split ('\|',$trendY) ;
929 - ($month1M,$month2M,$trendM) = split ('\|',$trendM) ;
930 - if ($trendY >= 0)
931 - { $colorY = "A" ; $trendY = "+$trendY" }
932 - else
933 - { $colorY = "E" ; }
934 - if ($trendM >= 0)
935 - { $colorM = "A" ; $trendM = "+$trendM" }
936 - else
937 - { $colorM = "E" ; }
938 -
939 -
940 - #<!== TRENDS {A|5/8|5/9|+12%}{A|4/9|5/9|+8%} ==>
941 - $line = " <td class=date>\n" .
942 - " <table border=0>\n" .
943 - " <tr>\n" .
944 - " <td class=date$colorY><b>Y</b>&nbsp;$month1Y&rArr;$month2Y</td>\n" .
945 - " <td class=date$colorY>$trendY</td>\n" .
946 - " </tr>\n" .
947 - " <tr>\n" .
948 - " <td class=date$colorM><b>M</b>&nbsp;$month1M&rArr;$month2M</td>\n" .
949 - " <td class=date$colorM>$trendM</td>\n" .
950 - " </tr>\n" .
951 - " </table>\n" .
952 - " </td>\n" ;
953 - }
954 -
955 - if ($line =~ /<!==\s*OUT\s*PUBLIC\s*==>/)
956 - {
957 - $write_public = $true ;
958 - $write_private = $false ;
959 - }
960 - elsif ($line =~ /<!==\s*OUT\s*EXTENDED\s*==>/)
961 - {
962 - $write_public = $false ;
963 - $write_private = $true ;
964 - }
965 - elsif ($line =~ /<!==\s*OUT\s*ALWAYS\s*==>/)
966 - {
967 - $write_public = $true ;
968 - $write_private = $true ;
969 - }
970 - elsif ($line =~ /<!==\s*OUT .*\s*==>/)
971 - {
972 - $line2 = $line ;
973 - $line2 =~ s/^.*<!==\s*OUT\s*// ;
974 - $line2 =~ s/\s*==>.*$// ;
975 - $write_details = $false ;
976 - $write_summary = $false ;
977 - $write_columns = $false ;
978 - if ($line2 =~ /C/)
979 - { $write_columns = $true ; }
980 - if ($line2 =~ /D/)
981 - { $write_details = $true ; }
982 - if ($line2 =~ /S/)
983 - { $write_summary = $true ; }
984 -
985 - &Print (COLUMNS, "$line\n") ;
986 - &Print (DETAILS, "$line\n") ;
987 - &Print (SUMMARY, "$line\n") ;
988 - next ;
989 - }
990 -
991 - if ($line =~ /<!==\s*INC .*\s*==>/)
992 - {
993 - $line2 = $line ;
994 - $line2 =~ s/^.*<!==\s*INC\s*// ;
995 - $line2 =~ s/\s*==>.*$// ;
996 -
997 - $file = "$path_in/$line2" ;
998 - print "\nInclude $file\n" ;
999 - if (! -e $file)
1000 - { &Abort ("File $file not found\n") ; }
1001 - open FILE, '<', $file ;
1002 - foreach $line (<FILE>)
1003 - {
1004 - if ($write_columns)
1005 - { &Print (COLUMNS, $line) ; }
1006 - if ($write_details)
1007 - { &Print (DETAILS, $line) ; }
1008 - if ($write_summary)
1009 - { &Print (SUMMARY, $line) ; }
1010 - }
1011 - next ;
1012 - }
1013 -
1014 - if ($write_columns)
1015 - { &Print (COLUMNS, "$line\n") ; }
1016 - elsif ($line =~ /-->/)
1017 - { &Print (COLUMNS, "<!-- $line\n") ; }
1018 - else
1019 - { &Print (COLUMNS, "<!-- $line -->\n") ; }
1020 -
1021 - if ($write_details)
1022 - { &Print (DETAILS, "$line\n") ; }
1023 - elsif ($line =~ /-->/)
1024 - { &Print (DETAILS, "<!-- $line\n") ; }
1025 - else
1026 - { &Print (DETAILS, "<!-- $line -->\n") ; }
1027 -
1028 - if ($write_summary)
1029 - { &Print (SUMMARY, "$line\n") ; }
1030 - elsif ($line =~ /-->/)
1031 - { &Print (SUMMARY, "<!-- $line\n") ; }
1032 - else
1033 - { &Print (SUMMARY, "<!-- $line -->\n") ; }
1034 - }
1035 -}
1036 -
1037 -sub Anchor
1038 -{
1039 - my $anchor = shift ;
1040 - $anchor =~ s/^\s*// ;
1041 - $anchor =~ s/\s*$// ;
1042 - $anchor =~ s/\s/_/g ;
1043 - return (lc ($anchor)) ;
1044 -}
1045 -
1046 -sub WriteSynopsis
1047 -{
1048 - my $path_out = shift ;
1049 -
1050 - $notice_synopsis = "" ;
1051 - # "<font color=#008000><b>New: multi-year trends for most metrics. Depending on history available reporting period can vary.</b></font>" ;
1052 -
1053 - open SYNOPSIS, '>', "$path_out/RC_${p_year}_${p_month_d2}_synopsis.txt" ;
1054 - print SYNOPSIS $synopsis ;
1055 - close SYNOPSIS ;
1056 -
1057 -# some day also get this code from RT_yyyy_mm.html, for uniformity
1058 -$synopsis = <<__SYNOPSIS__ ;
1059 -<html lang="en">
1060 -<head>
1061 -<title>Wikimedia Report Card Synopsis - {{month}} {{yyyy}}</title>
1062 -<meta http-equiv="content-type" content="text/html"; charset="iso-8859-1">
1063 -<meta http-equiv="Window-target" content="_top">
1064 -<meta name="language" content="en,English">
1065 -<meta name="robots" content="index,follow">
1066 -<link rel="shortcut icon" href="http://wikimediafoundation.org/favicon.ico" />
1067 -<link rel="apple-touch-icon" href="http://wikimediafoundation.org/favicon.ico" />
1068 -<script src="assets/jquery-1.1.3.1.pack.js" type="text/javascript"></script>
1069 -<script src="assets/jquery.history_remote.pack.js" type="text/javascript"></script>
1070 -<script src="assets/jquery.tabs.pack.js" type="text/javascript"></script>
1071 -<script src="assets/jquery.tablesorter.js" type="text/javascript"></script>
1072 -
1073 -<script type="text/javascript">
1074 -\$(function()
1075 -{
1076 - \$("#Synopsis").tablesorter();
1077 -})
1078 -</script>
1079 -
1080 -<script type="text/javascript">
1081 -\$(document).ready(
1082 -function()
1083 -{
1084 -\$("#Synopsis").tablesorter(sortList: [[0,0]] );
1085 -}
1086 -);
1087 -</script>
1088 -
1089 -<script type="text/javascript">
1090 -\$.tablesorter.addParser({
1091 - id: "nohtml",
1092 - is: function(s) { return false; },
1093 - format: function(s) { return s.replace(/<.*?>/g,"").replace(/&nbsp;/g,""); },
1094 - type: "text"
1095 -});
1096 -\$.tablesorter.addParser({
1097 - id: "digitsonly",
1098 - is: function(s) { return false; },
1099 - format: function(s) { return $.tablesorter.formatFloat(s.replace(/<.*?>/g,"").replace(/&nbsp;/g,"").replace(/,/g,"").replace(/-/,"-1")); },
1100 - type: "numeric"
1101 -});
1102 -</script>
1103 -
1104 -<style type="text/css">
1105 -/* tables */
1106 -table.tablesorter
1107 -{
1108 - font-family:arial;
1109 - background-color: #FFF; // #CDCDCD;
1110 - margin:10px 0pt 15px;
1111 - font-size: 7pt;
1112 - width: 80%;
1113 - text-align: left;
1114 -}
1115 -table.tablesorter thead tr th, table.tablesorter tfoot tr th
1116 -{
1117 - background-color: #AAB;
1118 - border: 1px solid #FFF;
1119 - font-size: 8pt;
1120 - padding: 4px;
1121 -}
1122 -table.tablesorter thead tr .header
1123 -{
1124 - background-image: url(assets/bg.gif);
1125 - background-repeat: no-repeat;
1126 - background-position: center right;
1127 - cursor: pointer;
1128 -}
1129 -table.tablesorter tbody td
1130 -{
1131 - color: #3D3D3D;
1132 - padding: 4px;
1133 - background-color: #FFF;
1134 - vertical-align: top;
1135 -}
1136 -table.tablesorter tbody tr.odd td
1137 -{ background-color:#F0F0F6; }
1138 -table.tablesorter thead tr .headerSortUp
1139 -{ background-image: url(assets/asc.gif); }
1140 -table.tablesorter thead tr .headerSortDown
1141 -{ background-image: url(assets/desc.gif); }
1142 -table.tablesorter thead tr .headerSortDown, table.tablesorter thead tr .headerSortUp
1143 -{ background-color: #BBF; //#8dbdd8; }
1144 -<!--
1145 -body {font-family:arial,sans-serif;background-color:#B0B0B0}
1146 -table,td,tr{background-color:#FFFFFF;font-size:11pt}
1147 -h1{font-size:22px}
1148 -h2{font-size:18px ; color:#006000 ; margin-top:40px}
1149 -h3{font-size:15px ; color:#006000}
1150 -form{margin:0}
1151 -a:link {color:#000080;text-decoration:none}
1152 -a:visited {color:#000080;text-decoration:none}
1153 -a:active {color:#000080;text-decoration:none}
1154 -a:hover {color:#0000FF;text-decoration:underline}
1155 -a img {border-color:black}
1156 -td.detail-left {font-size:12px ; color:#000000 ; text-align:left ; }
1157 -td.detail-center {font-size:12px ; color:#000000 ; text-align:center ; }
1158 -td.detail-right {font-size:12px ; color:#000000 ; text-align:right ; }
1159 -</style>
1160 -</head>
1161 -<body>
1162 -<table width=800 cellpadding=18 align=center>
1163 -<tr>
1164 - <td align='center'>
1165 -
1166 - <table width=95%>
1167 -
1168 - <tr>
1169 - <td width=100% colspan=99>
1170 - <table width=100%>
1171 - <tr>
1172 - <td align=left width=150 valign=top><img src='assets/WikimediaLogo.png' width=30></td>
1173 - <td align=center valign=top><h1>Wikimedia Report Card <font color=#008000>{{month}} {{yyyy}} </font></h1>
1174 - </td>
1175 - <td align=right width=150 valign=top><h1>Synopsis</h1></td>
1176 - <!-- <td align=right width=150 valign=top><small><small>Published<br>{{month+2}}<br>{{yyyy+m2}}</small></small></td> -->
1177 - </tr>
1178 - <tr>
1179 - <td align=left width=150 valign=top><!-- <small><a href='RC_{{yyyy}}_{{mm-1}}_synopsis.html'>&lArr;&nbsp;{{month-1}}</a></small>--> </td>
1180 - <td align=center valign=top>
1181 - <small>&rArr; <a href='RC_{{yyyy}}_{{mm}}_detailed.html'>Detailed version</a>&nbsp;&nbsp;&nbsp;&nbsp; &rArr; <a href='RC_{{yyyy}}_{{mm}}_summary.html'>Summary, 1 column</a>&nbsp;&nbsp;&nbsp;&nbsp; &rArr; <a href='RC_{{yyyy}}_{{mm}}_columns.html'>Summary, 2 columns</a></small>
1182 - </td>
1183 - <td align=right width=150 valign=top><!--<small><a href='RC_{{yyyy}}_{{mm+1}}_synopsis.html'>{{month+1}}&nbsp;&rArr;</a></small>--></td>
1184 - </tr>
1185 - </table>
1186 - </td>
1187 - </tr>
1188 - <tr>
1189 - <td colspan=99>
1190 - <small>
1191 - <center>
1192 - $notice_synopsis
1193 - </center> <!-- General comment -->
1194 - </small>
1195 - </td>
1196 -</tr>
1197 -<tr><td colspan=99 align=center>
1198 -<table border=1 id='Synopsis' class=tablesorter>
1199 -<!-- <tr> -->
1200 -<!-- <td align='left' colspan=99> -->
1201 -<!-- <font color=#800000><b><small>No English Wikipedia dump was produced this month.<br>Without it some totals and trends are also meaningless and left blank.</small></b></font> -->
1202 -<!-- </td> -->
1203 -<!-- </tr> -->
1204 -DATA
1205 -</table>
1206 -</td></tr>
1207 - <tr>
1208 - <td colspan=99 align=center>
1209 - <hr class=thin>
1210 - <small><small><font color=808080>Author Erik Zachte - mail: ezachte@###.org (nospam: ###=wikimedia)</font></small></small>
1211 - </td>
1212 - </tr>
1213 - </table>
1214 -<script type='text/javascript'>
1215 -\$('#Synopsis').tablesorter({
1216 - // debug:true,
1217 - headers:{0:{sorter:'nohtml'},1:{sorter:false},2:{sorter:'digitsonly'},3:{sorter:'digitsonly'},4:{sorter:false}}
1218 -});
1219 -</script>
1220 -
1221 -</body>
1222 -</html>
1223 -__SYNOPSIS__
1224 -
1225 - undef @synopsis_notes ;
1226 -
1227 -# $data = "<tr><th>Unique Visitors</th></tr>\n" ;
1228 -# $synopsis = "Y: " . substr ($p_month_name,0,3) . ",$p_year_prev->$p_year k=thousand m=million b=billion\n" ;
1229 -# $synopsis .= "M: $p_year," . substr ($p_month_name_prev,0,3) . "->" . substr ($p_month_name,0,3) . " M=monthly D=daily T=Total\n\n" ;
1230 - $data = "<thead><tr><th class=detail-left valign=top>&nbsp;<b>Metric</b>&nbsp;</th>" .
1231 - "<th class=detail-center valign=top>&nbsp;<b>Now</b>&nbsp;<br>{{mm}}/{{yy}}</th>" .
1232 - "<th class=detail-center valign=top>&nbsp;<b>Yearly change</b>&nbsp;<br>{{mm}}/{{yy-1}} &rArr; {{mm}}/{{yy}}</th>" .
1233 - "<th class=detail-center valign=top>&nbsp;<b>Monthly change</b>&nbsp;<br>{{(mm/yy)-1}} &rArr; {{mm}}/{{yy}}</th>" .
1234 - "<th class=detail-center valign=top>&nbsp;<b>Notes</b>&nbsp;</th></tr></thead>\n<tbody>\n" ;
1235 -# $data .= "<tr><th>&nbsp;</th><th>&nbsp;</th><th>&nbsp;</th><th>&nbsp;</th><th>&nbsp;</th></tr></thead>" ;
1236 -
1237 -# $comment_prev_month = "<sup><font color=#800000>*</font></sup>" ; # qqq
1238 -
1239 - $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#unique_visitors'>Unique Visitors</a> <sup>All</sup>", "", @visitors) ;
1240 - $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#page_requests'>Page Requests</a> <sup>All</sup>", "", @page_requests) ;
1241 - $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#web_properties'>Site Rank</a> <sup>All</sup>", "", @rank) ;
1242 - $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#commons_files'>Binary Files</a> <sup>Commons</sup> $comment_prev_month", "", @commons_files) ;
1243 - $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#article_count'>Article Count</a> <sup>Wp</sup> $comment_prev_month", "", @article_count) ;
1244 - $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#articles_per_day'>New Articles Per Day</a> <sup>Wp</sup> $comment_prev_month", "", @new_articles) ;
1245 - $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#edits_per_month'>Edits</a> <sup>Wp</sup> $comment_prev_month", "", @edits) ;
1246 - $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#new_editors_per_month'>New Editors <sup>Wp</sup></a> $comment_prev_month", "", @new_editors) ;
1247 - $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#active_editors'>Active Editors</a> <sup>Wp</sup> $comment_prev_month", "", @active_editors) ;
1248 - $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#very_active_editors'>Very Active Editors</a> <sup>Wp</sup> $comment_prev_month", "", @very_active_editors) ;
1249 - $data .= "</tbody>\n<tfoot><tr><td colspan=99>&nbsp;</td></tr>\n" ;
1250 - $data .= "<tr><td colspan=99><b><small>Repeated observations below are grayed</small></b></td></tr>\n" ;
1251 -
1252 - foreach $note (@synopsis_notes)
1253 - {
1254 - $data .= "<tr><td class=detail-left colspan=99>$note</td></tr>" ;
1255 - }
1256 -# $data .= "<tr><td class=detail-left colspan=99><font color=#800000><small>* For German and Polish Wikipedias data for June were not yet available: reused counts from May</small></font></td></tr>" ;
1257 - $data .= "<tr><td class=detail-left colspan=99><font color=#808080><small>All = All projects, Wp = Wikipedia project&nbsp;&nbsp;&nbsp;/&nbsp;&nbsp;&nbsp;B = billion, M = million, k = thousand</small></font></td></tr></tfoot>" ;
1258 -
1259 - $synopsis =~ s/DATA/$data/ ;
1260 -
1261 - $synopsis =~ s/{{yyyy}}/$p_year/g ;
1262 - $synopsis =~ s/{{yyyy-1}}/$p_year_prev/g ;
1263 - $synopsis =~ s/{{yyyy\+m2}}/$p_year_plus_m2/g ;
1264 - $synopsis =~ s/{{month}}/$p_month_name/g ;
1265 - $synopsis =~ s/{{month-1}}/$p_month_name_prev/g ;
1266 - $synopsis =~ s/{{month\+1}}/$p_month_name_next/g ;
1267 - $synopsis =~ s/{{month\+2}}/$p_month_name_next2/g ;
1268 -
1269 - $synopsis =~ s/{{y}}/$p_year_short/g ;
1270 - $synopsis =~ s/{{y\-1}}/$p_year_prev_short/g ;
1271 - $synopsis =~ s/{{yy}}/$p_year_short_d2/g ;
1272 - $synopsis =~ s/{{yy\-1}}/$p_year_prev_short_d2/g ;
1273 - $synopsis =~ s/{{m}}/$p_month/g ;
1274 - $synopsis =~ s/{{m\-1}}/$p_month_prev/g ;
1275 - $synopsis =~ s/{{mm}}/$p_month_d2/g ;
1276 - $synopsis =~ s/{{mm-1}}/$p_month_prev_d2/g ;
1277 - $synopsis =~ s/{{mm\+1}}/$p_month_next_d2/g ;
1278 -
1279 - $synopsis =~ s/{{\(mm\/yy\)-1}}/$p_year_month_m1/g ;
1280 - open SYNOPSIS, '>', "$path_out/RC_${p_year}_${p_month_d2}_synopsis.html" ;
1281 - print SYNOPSIS $synopsis ;
1282 - close SYNOPSIS ;
1283 -}
1284 -
1285 -sub Print
1286 -{
1287 - $handle = shift ;
1288 - $text = shift ;
1289 -
1290 - if ((! $debug) && ($text !~ /\[if lte/)) # Q&D: keep MSIE directive
1291 - {
1292 - if ($text =~ /<!--/) # comments
1293 - { return ; }
1294 - if ($text =~ /<!==/) # template markup
1295 - { return ; }
1296 - }
1297 -
1298 - if (($target_audience == $public) && $write_public)
1299 - { print $handle $text ; }
1300 - if (($target_audience == $private) && $write_private)
1301 - { print $handle $text ; }
1302 -}
1303 -
1304 -sub FormatSynopsisText
1305 -{
1306 - $label = shift ;
1307 - $comment = shift ;
1308 - @metrics = @_ ;
1309 -
1310 - $metric = $metrics [0] ;
1311 - $size = $metrics [1] ;
1312 - $inc_y = $metrics [2] ; # yearly
1313 - $inc_m = $metrics [3] ; # monthly
1314 - $inc = $metrics [4] ; # perc ?
1315 -
1316 - $metric =~ s/,//g ;
1317 - if ($inc eq "th") # rank
1318 - {
1319 - $inc_y .= " " ;
1320 - $inc_m .= " " ;
1321 - $inc = " " ;
1322 - }
1323 - $size=~ s/[x]/ / ;
1324 -
1325 -
1326 - if ($inc_y !~ /-/) { $inc_y = '+' . $inc_y ; }
1327 - if ($inc_m !~ /-/) { $inc_m = '+' . $inc_m ; }
1328 - $inc_y = sprintf ("%5s", $inc_y) . $inc ;
1329 - $inc_m = sprintf ("%5s", $inc_m) . $inc ;
1330 -
1331 - if ($metric =~ /^\.+$/)
1332 - { ; }
1333 - elsif ($size eq "b")
1334 - { $metric = sprintf ("%.0f", $metric / 1000000000) ; }
1335 - elsif ($size eq "m")
1336 - { $metric = sprintf ("%.0f", $metric / 1000000) ; }
1337 - elsif ($size eq "k")
1338 - { $metric = sprintf ("%.0f", $metric / 1000) ; }
1339 - else
1340 - { $metric = sprintf ("%.0f", $metric) ; }
1341 -
1342 - my $text = sprintf ("%-20s", $label) . sprintf ("%8s", "$metric $size") ;
1343 - $text .= " (Y:$inc_y / M:$inc_m) $comment\n" ;
1344 - return $text ;
1345 -}
1346 -
1347 -sub FormatSynopsisTable
1348 -{
1349 - $label = shift ;
1350 - $comment = shift ;
1351 -
1352 - @metrics = @_ ;
1353 -
1354 - $metric = $metrics [0] ;
1355 - $size = $metrics [1] ;
1356 - $inc_y = $metrics [2] ; # yearly
1357 - $inc_m = $metrics [3] ; # monthly
1358 - $inc = $metrics [4] ; # perc ?
1359 - $notes = $metrics [5] ; # perc ?
1360 -
1361 - ($notes_ref,$notes) = split ('\|', $notes) ;
1362 - if ($notes ne "")
1363 - {
1364 - # text between '#' and first bracket (<>) will be grayed (repeated remarks)
1365 - $notes =~ s/#([^<>]+)/<font color=#808080>$1<\/font>/g ;
1366 - push @synopsis_notes, $notes ;
1367 - }
1368 -
1369 - $metric =~ s/,//g ;
1370 - if ($inc eq "th") # rank
1371 - {
1372 - $inc_y .= " " ;
1373 - $inc_m .= " " ;
1374 - $inc = " " ;
1375 - }
1376 - $size=~ s/[x]/ / ;
1377 -
1378 -
1379 - if ($inc_y !~ /-/) { $inc_y = '+' . $inc_y ; }
1380 - if ($inc_m !~ /-/) { $inc_m = '+' . $inc_m ; }
1381 - $inc_y = sprintf ("%5s", $inc_y) . $inc ;
1382 - $inc_m = sprintf ("%5s", $inc_m) . $inc ;
1383 -
1384 - if ($size eq "k")
1385 - { $metric = sprintf ("%.1f", $metric / 1000) ; }
1386 - elsif ($size eq "b")
1387 - { $size = "B" ; }
1388 - elsif ($size eq "m")
1389 - { $size = "M" ; }
1390 - elsif ($size eq "k")
1391 - { $size = "K" ; }
1392 - else
1393 - { $size = "&nbsp;&nbsp;" ; }
1394 -
1395 - if ($notes_ref eq "")
1396 - { $notes_ref = '&nbsp;' ; }
1397 -
1398 - $metric = "$metric $size" ;
1399 -
1400 - if (($metric =~ /\.\./) || ($metric =~ /^0\.0/)) { $metric = "<font color=#C0C0C0>$metric</font>" ; }
1401 - if (($metric =~ /\.\./) || ($metric =~ /^0\.0/)) { $metric = "<font color=#C0C0C0>$metric</font>" ; }
1402 - if (($inc_y =~ /\.\./) || ($inc_y =~ /^0\.0/)) { $inc_y = "<font color=#C0C0C0>$inc_y</font>" ; }
1403 - if (($inc_m =~ /\.\./) || ($inc_m =~ /^0\.0/)) { $inc_m = "<font color=#C0C0C0>$inc_m</font>" ; }
1404 -
1405 - my $text = "<tr><td class=detail-left>$label</td><td class=detail-right>$metric</td><td class=detail-right>$inc_y</td><td class=detail-right>$inc_m</td><td class=detail-right>$notes_ref</td></tr>\n" ;
1406 - return $text ;
1407 -}
1408 -
1409 -sub Abort
1410 -{
1411 - $msg = shift ;
1412 - chomp $msg ;
1413 - print "\n!!! Abort script: '$msg'\n" ;
1414 - exit ;
1415 -}
1416 -
 2+#!/usr/local/bin/perl
 3+
 4+ use lib "/home/ezachte/lib" ;
 5+ use EzLib ;
 6+ $trace_on_exit = $true ;
 7+
 8+ use CGI::Carp qw(fatalsToBrowser);
 9+ use Time::Local ;
 10+ use Getopt::Std ;
 11+
 12+ # !! adapt these for every run !!
 13+ $p_year = 2010 ;
 14+ $p_month = 12 ;
 15+
 16+ $debug = $false ;
 17+
 18+ $public = 0 ;
 19+ $private = 1 ;
 20+
 21+ $p_month_d2 = sprintf ("%02d", $p_month) ;
 22+
 23+ @months = qw (January February March April May June July August September October November December) ;
 24+ $p_month_prev = ($p_month > 1) ? $p_month - 1 : 12 ;
 25+ $p_month_next = ($p_month < 12) ? $p_month + 1 : 1 ;
 26+ $p_month_next2 = ($p_month < 11) ? $p_month + 2 : $p_month - 10 ;
 27+ $p_year_plus_m2 = ($p_month < 11) ? $p_year : $p_year + 1 ;
 28+ $p_month_prev_d2 = sprintf ("%02d", $p_month_prev) ;
 29+ $p_month_next_d2 = sprintf ("%02d", $p_month_next) ;
 30+
 31+ $p_year_prev = $p_year - 1 ;
 32+ $p_year_next = $p_year + 1 ;
 33+ $p_year_short = $p_year - 2000 ;
 34+ $p_year_prev_short = $p_year_prev - 2000 ;
 35+ $p_year_short_d2 = sprintf ("%02d", $p_year_short) ;
 36+ $p_year_prev_short_d2 = sprintf ("%02d", $p_year_prev_short) ;
 37+
 38+ $p_month_name = $months [$p_month -1] ;
 39+ $p_month_name_prev = $months [$p_month_prev-1] ;
 40+ $p_month_name_next = $months [$p_month_next-1] ;
 41+ $p_month_name_next2 = $months [$p_month_next2-1] ;
 42+
 43+
 44+ $trend_one_year = "{{m}}/{{y-1}}|{{m}}/{{y}}" ;
 45+
 46+ if ($p_month == 1)
 47+ { $trend_one_month = "12/{{y-1}}|1/{{y}}" ; }
 48+ else
 49+ { $trend_one_month = "{{m-1}}/{{y}}|{{m}}/{{y}}" ; }
 50+
 51+ $p_year_month_m1 = ($p_month == 1) ? "$p_month_prev/$p_year_prev_short_d2" : "$p_month_prev/$p_year_short_d2" ; # m1 = minus 1
 52+
 53+ print "\$p_year $p_year\n" ;
 54+ print "\$p_year_prev $p_year_prev\n" ;
 55+ print "\$p_year_plus_m2 $p_year_plus_m2\n" ;
 56+ print "\$p_year_short $p_year_short\n" ;
 57+ print "\$p_year_prev_short $p_year_prev_short\n" ;
 58+ print "\$p_year_short_d2 $p_year_short_d2\n" ;
 59+ print "\$p_year_prev_short_d2 $p_year_prev_short_d2\n" ;
 60+ print "\n" ;
 61+ print "\$p_month $p_month\n" ;
 62+ print "\$p_month_d2 $p_month_d2\n" ;
 63+ print "\$p_month_next $p_month_next\n" ;
 64+ print "\$p_month_prev $p_month_prev\n" ;
 65+ print "\$p_month_next_d2 $p_month_next_d2\n" ;
 66+ print "\$p_month_prev_d2 $p_month_prev_d2\n" ;
 67+ print "\$p_month_name $p_month_name\n" ;
 68+ print "\$p_month_name_prev $p_month_name_prev\n" ;
 69+ print "\$p_month_name_next $p_month_name_next\n" ;
 70+ print "\$p_month_name_next2 $p_month_name_next2\n" ;
 71+ print "\$p_year_month_m1 $p_year_month_m1\n" ;
 72+
 73+
 74+ # example output for synopsys.txt
 75+ #STATISTICS
 76+
 77+ #http://infodisiac.com/Wikimedia/ReportCard/EN/RC_2009_08_summary.html
 78+
 79+ #Y: Jun, 2008->2009 k=thousand m=million b=billion
 80+ #M: 2009, May->Jun
 81+
 82+ #Unique Visitors 301 m (Y:+21% / M: -5%)
 83+ #Page Requests 11 b (Y: +6% / M: -6%)
 84+ #Site Rank 5th (Y: +0 / M: -1 )
 85+ #Commons Files 4.7 m (Y:+62% / M: +4%) ++ growth pdf/djvu files
 86+ #Article Count 20.6 m (Y:+33% / M: +2%)
 87+ #New Articles 17 k (Y: -9% / M: -6%)
 88+ #New Editors 18 k (Y:+39% / M:+25%) wp:it in one year -50%
 89+ #Active Editors 88 k (Y:+ 1% / M: -2%) wp:ru in one year +45%
 90+
 91+ print "\n"."="x80 . "\n\n" ;
 92+
 93+# !! This is rather crummy Q&D way to collect variable data, data need to be externalized !!
 94+
 95+## if ($2010_12)
 96+## {
 97+ @visitors = qw ( 395,472,000 m 14.0 -3.7 %) ; # Unique Visitors by Region
 98+ @page_requests = qw (13,976,000,000 b 22.6 2.4 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
 99+ @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
 100+ @commons_files = qw ( 8,046,377 m 43.1 3.0 %) ; # Binaries per month - Absolute
 101+ @article_count = qw ( 17,616,951 m 20.0 1.5 %) ; # Starting Sep-2010 Wikipedia articles only / Article count (official) - Absolute
 102+ @new_articles = qw ( 8,555 k 16.5 5.1 %) ; # New articles per day - Absolute
 103+ @edits = qw ( 11,566,371 m 3.6 3.8 %) ; # Edits per month - Absolute
 104+ @new_editors = qw ( 14,607 k -16.6 -2.5 %) ; # New editors - Absolute
 105+ @active_editors = qw ( 79,324 k -5.9 -0.5 %) ; # Active editors - Absolute
 106+ @very_active_editors = qw ( 10,254 k -1.6 0.1 %) ; # Very active editors - Absolute
 107+ @reach = qw ( 31.1 x 1.5 -1.4 %) ; # Reach Percentage by Region
 108+ # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
 109+ push @visitors, "1|Unique Visitors<br>1: Average for last 12 months 377M." ;
 110+# "2: Growth in UV count in last 12 months 18.8% (for whole internet 8.9%)." ;
 111+# "&nbsp;&nbsp;&nbsp;&nbsp;(avg 28.3%, low 25.7% in July 09). Reach in Asia lags behind other regions (15.9%)" ;
 112+ push @page_requests, "2,3|Page Requests<br>" .
 113+ "2: <a href='http://stats.wikimedia.org/EN/TablesPageViewsMonthlyMobile.htm'>Mobile traffic</a> in Dec: 4.1% of total Wikipedia traffic (556M/13489M)<br>" .
 114+# "&nbsp;&nbsp;&nbsp;&nbsp;Look ahead for page requests: Dec -> Jan = 13367M -> 14724M = +10.1%<br>" .
 115+ "#3: Page requests have been normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" ;
 116+ push @rank, "4|Site Rank<br>#4: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
 117+ push @commons_files, "5|Commons Files<br>#5: Tiff uploads increased 5-fold in July 2010, 13-fold in last 12 months.<br>" ;
 118+# "#7: Commons consistently fastest growing project, 48% in last 12 months." ;
 119+
 120+#push @article_count, "8|Article Count<br>#8: From Sep 2010 this metric is for Wikipedia projects only. This prevents adding apples and oranges." ;
 121+# "9: Seven Wiktionaries in top 25 Wikimedia projects" ;
 122+# push @new_articles, "7|New Articles Per Day<br>" .
 123+# "7: Strong growth in August by peaks on 3 wikis: Catalan/Dutch 3-fold inc., Slovene 17-fold (bots?)." ;
 124+ push @edits, "6|Edits<br>#6: Over the last 3 years there is fairly consistent growth in manual, registered edits.<br>" .
 125+ "#&nbsp;&nbsp;&nbsp;&nbsp;Net growth in constructive edits is less clear, as this metric includes most reverting edits." ;
 126+# "&nbsp;&nbsp;&nbsp;&nbsp;Strong one-monthly dip in July due to World Cup Socker?." ;
 127+# "#13: Average monthly manual edits by registered users for all Wikipedia's combined, in millions<br>" .
 128+# "&nbsp;&nbsp;&nbsp;&nbsp;#2006 &rArr; 2010: &nbsp;&nbsp;7.7 &rArr; 9.9 &rArr; 11.5 &rArr; 12.4 &rArr; 12.7" ;
 129+ push @new_editors, "7|New Editors Per Day<br>" .
 130+# "10: Signifant decline in last month (All projects: -10.5%, <a href='http://stats.wikimedia.org/EN/ChartsWikipediaZZ.htm'>Wikipedias -11.2%</a>).<br>" .
 131+# "&nbsp;&nbsp;&nbsp;&nbsp;Arguably slowing influx of editors can partly be attributed to (multi-factorial) <a href='http://en.wikipedia.org/wiki/Market_saturation'>saturation process(es)</a><br>" .
 132+# "&nbsp;&nbsp;&nbsp;&nbsp;But 19% drop for Wikipedias in half year (comparing 3-monthly averages) is not consistent with that.<br>" .
 133+ "#7:WMF recently commissioned in depth study of editor activity trends, which is ongoing." ;
 134+ push @active_editors, "8|(Very) Active Editors<br>" .
 135+# "11: Recent drops are well within normal bandwidth, largest drop was in <a href='charts/2010-08/Monthly-Active-Users-Since-Jan-2006.png'>June 2006</a>.<br>" .
 136+ "#8: Since a few months editors on Commons are no longer included in overall editor total,<br>" .
 137+ "#&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; on the assumption that most of these also edit on one or more other projects.<br>" ;
 138+# "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; #Detection of double counts between any projects and languages is planned for late 2010." ;
 139+ push @very_active_editors, "8|" ; #Very Active Editors<br>14: Ukrain +84% (61->112), Indonesian +180% (22->62) (during contest), Swedish -13% (141->122)." ;
 140+## }
 141+
 142+
 143+# if ($2010_11)
 144+# {
 145+# @visitors = qw ( 410,816,000 m 18.8 0.6 %) ; # Unique Visitors by Region
 146+# @page_requests = qw (13,976,000,000 b 22.6 2.4 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
 147+# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
 148+# @commons_files = qw ( ? m ? ? %) ; # Binaries per month - Absolute
 149+# @article_count = qw ( ? m ? ? %) ; # Starting Sep-2010 Wikipedia articles only / Article count (official) - Absolute
 150+# @new_articles = qw ( ? k ? ? %) ; # New articles per day - Absolute
 151+# @edits = qw ( ? m ? ? %) ; # Edits per month - Absolute
 152+# @new_editors = qw ( ? k ? ? %) ; # New editors - Absolute
 153+# @active_editors = qw ( ? k ? ? %) ; # Active editors - Absolute
 154+# @very_active_editors = qw ( ? k ? ? %) ; # Very active editors - Absolute
 155+# @reach = qw ( 31.1 x 2.6 0.0 %) ; # Reach Percentage by Region
 156+# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
 157+# push @visitors, "1,2|Unique Visitors<br>1: 410M UV's exceeds Oct 2010 record with 2M. Average for last 12 months 377M.<br>" .
 158+# "2: Growth in UV count in last 12 months 18.8% (for whole internet 8.9%)." ;
 159+# push @page_requests, "3,4|Page Requests<br>" .
 160+# "3: <a href='http://stats.wikimedia.org/EN/TablesPageViewsMonthlyMobile.htm'>Mobile traffic</a> in Sep: 3.4% of total traffic (492M/14468M)<br>" .
 161+# "#4: Page requests have been normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" ;
 162+# push @rank, "3|Site Rank<br>#3: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
 163+# }
 164+
 165+# if ($2010_10)
 166+# {
 167+# @visitors = qw ( 408,350,000 m 18.5 2.6 %) ; # Unique Visitors by Region
 168+# @page_requests = qw ( ? b ? ? %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
 169+# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
 170+# @commons_files = qw ( ? m ? ? %) ; # Binaries per month - Absolute
 171+# @article_count = qw ( ? m ? ? %) ; # Starting Sep-2010 Wikipedia articles only / Article count (official) - Absolute
 172+# @new_articles = qw ( ? k ? ? %) ; # New articles per day - Absolute
 173+# @edits = qw ( ? m ? ? %) ; # Edits per month - Absolute
 174+# @new_editors = qw ( ? k ? ? %) ; # New editors - Absolute
 175+# @active_editors = qw ( ? k ? ? %) ; # Active editors - Absolute
 176+# @very_active_editors = qw ( ? k ? ? %) ; # Very active editors - Absolute
 177+# @reach = qw ( 31.1 x 2.3 0.5 %) ; # Reach Percentage by Region
 178+# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
 179+# push @visitors, "1,2|Unique Visitors<br>1: 408M UV's beats September 2010 record with 10M.<br>" .
 180+# "2: Growth in UV count in last 12 months 18.5% (for whole internet 9.6%)." ;
 181+# push @rank, "3|Site Rank<br>#3: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
 182+# }
 183+
 184+
 185+# if ($2010_09)
 186+# {
 187+# @visitors = qw ( 398,178,000 m 22.1 6.6 %) ; # Unique Visitors by Region
 188+# @page_requests = qw (13,671,000,000 b 20.2 5.4 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
 189+# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
 190+# @commons_files = qw ( 7,491,824 m 48.2 2.8 %) ; # Binaries per month - Absolute
 191+# @article_count = qw ( 16,678,710 m 20.7 1.8 %) ; # Starting Sep-2010 Wikipedia articles only / Article count (official) - Absolute
 192+# @new_articles = qw ( 7,578 k 3.9 -18.9 %) ; # New articles per day - Absolute
 193+# @edits = qw ( 11,924,018 m 9.0 -3.3 %) ; # Edits per month - Absolute
 194+# @new_editors = qw ( 15,805 k -17.4 -10.5 %) ; # New editors - Absolute
 195+# @active_editors = qw ( 82,503 k -5.6 -3.3 %) ; # Active editors - Absolute
 196+# @very_active_editors = qw ( 11,011 k -2.5 -3.4 %) ; # Very active editors - Absolute
 197+# @reach = qw ( 30.8 x 3.2 1.8 %) ; # Reach Percentage by Region
 198+# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
 199+# push @visitors, "1,2|Unique Visitors<br>1: 398M UV's beats May 2010 record with 9M or 2.4%.<br>" .
 200+# "2: Growth in UV count in last 12 months 22% (for whole internet 10%)." ;
 201+# push @page_requests, "3,4|Page Requests<br>" .
 202+# "3: <a href='http://stats.wikimedia.org/EN/TablesPageViewsMonthlyMobile.htm'>Mobile traffic</a> in Sep: 3.0% of total traffic (425M/14096M)<br>" .
 203+# # "&nbsp;&nbsp;&nbsp;&nbsp;Look ahead for page requests: Aug -> Sep = 13367M -> 14724M = +10.1%<br>" .
 204+# "#4: Page requests have been normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" ;
 205+# push @rank, "5|Site Rank<br>#5: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
 206+# push @commons_files, "6,7|Commons Files<br>#6: Tiff uploads increased 5-fold in July 2010, 18-fold in last 12 months.<br>" .
 207+# "7: Commons consistently fastest growing project, 48% in last 12 months." ;
 208+
 209+# push @article_count, "8|Article Count<br>8: From Sep 2010 this metric is for Wikipedia projects only. This prevents adding apples and oranges." ;
 210+# push @edits, "9|Edits<br>9: Over the last 3 years there is fairly consistent growth in manual, registered edits.<br>" .
 211+# "&nbsp;&nbsp;&nbsp;&nbsp;Net growth in constructive edits is less clear, as this metric includes most reverting edits." ;
 212+# "&nbsp;&nbsp;&nbsp;&nbsp;Strong one-monthly dip in July due to World Cup Socker?." ;
 213+# push @new_editors, "10|New Editors Per Day<br>" .
 214+# "10: Signifant decline in last month (All projects: -10.5%, <a href='http://stats.wikimedia.org/EN/ChartsWikipediaZZ.htm'>Wikipedias -11.2%</a>).<br>" .
 215+# "&nbsp;&nbsp;&nbsp;&nbsp;Arguably slowing influx of editors can partly be attributed to (multi-factorial) <a href='http://en.wikipedia.org/wiki/Market_saturation'>saturation process(es)</a><br>" .
 216+# "&nbsp;&nbsp;&nbsp;&nbsp;But 19% drop for Wikipedias in half year (comparing 3-monthly averages) is not consistent with that.<br>" .
 217+# "&nbsp;&nbsp;&nbsp;&nbsp;WMF recently commissioned in depth study of editor activity trends, which is ongoing." ;
 218+# push @active_editors, "11,12|(Very) Active Editors<br>" .
 219+# "11: Recent drops are well within normal bandwidth, largest drop was in <a href='charts/2010-08/Monthly-Active-Users-Since-Jan-2006.png'>June 2006</a>.<br>" .
 220+# "#12: Editors on Commons are no longer included in overall editor total,<br>" .
 221+# "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; #on the assumption that most of these also edit on one or more other projects.<br>" .
 222+# "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; #Detection of double counts between any projects and languages is planned for late 2010." ;
 223+# push @very_active_editors, "11,12|" ; #Very Active Editors<br>14: Ukrain +84% (61->112), Indonesian +180% (22->62) (during contest), Swedish -13% (141->122)." ;
 224+# }
 225+
 226+# if ($2010_08)
 227+# {
 228+# @visitors = qw ( 373,392,000 m 21.4 3.7 %) ; # Unique Visitors by Region
 229+# @page_requests = qw (13,367,000,000 b 23.9 -1 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
 230+# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
 231+# @commons_files = qw ( 7,298,379 m 48.1 2.8 %) ; # Binaries per month - Absolute
 232+# @article_count = qw ( 34,963,360 m 30.0 2.4 %) ; # Article count (official) - Absolute
 233+# @new_articles = qw ( 9,437 k 22.4 25.7 %) ; # New articles per day - Absolute
 234+# @edits = qw ( 12,346,207 m 7.9 15.4 %) ; # Edits per month - Absolute
 235+# @new_editors = qw ( 17,026 k -17.3 -1.1 %) ; # New editors - Absolute
 236+# @active_editors = qw ( 85,643 k -5.2 2.1 %) ; # Active editors - Absolute
 237+# @very_active_editors = qw ( 11,419 k -1.6 5.0 %) ; # Very active editors - Absolute
 238+# @reach = qw ( 29.0 x 2.6 0.5 %) ; # Reach Percentage by Region
 239+# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
 240+# push @page_requests, "1,2,3,4|Page Requests<br>" .
 241+# "1: <a href='http://stats.wikimedia.org/EN/TablesPageViewsMonthlyMobile.htm'>Mobile traffic</a> in Sep: 2.9% of total traffic (425M/14724M)<br>" .
 242+# "&nbsp;&nbsp;&nbsp;&nbsp;Look ahead for page requests: Aug -> Sep = 13367M -> 14724M = +10.1%<br>" .
 243+# "#&nbsp;&nbsp;&nbsp;&nbsp;Trend data for mobile will be added when more history is available.<br>" .
 244+# "#2: Due to server problems counts from squid logs for December 2009 - March 2010 are too low,<br>" .
 245+# "#&nbsp;&nbsp;&nbsp;&nbsp;estimated underreporting 10%-25%. Counts for April - July 2010 have been patched. Read <a href='http://infodisiac.com/blog/2010/07/wikimedia-page-views-some-good-and-bad-news/'>more</a>.<br>" .
 246+# "#3: Many projects show peak traffic late 2009: see <a href='charts/2010-08/Page-Views-Per-Project-Indexed.png'>chart</a><br>" .
 247+# "#4: Page requests are now normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" ;
 248+# push @rank, "5|Site Rank<br>#5: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
 249+# push @commons_files, "6|Commons Files<br>#6: Tiff uploads increased <a href='charts/2010-07/Monthly-Binaries-Absolute-Log.png'>5-fold</a> in July 2010, <a href='charts/2010-07/Monthly-Binaries-Indexed.png'>22-fold</a> in a year." ;
 250+
 251+# push @new_articles, "7|New Articles Per Day<br>" .
 252+# "7: Strong growth in August by peaks on 3 wikis: Catalan/Dutch 3-fold inc., Slovene 17-fold (bots?)." ;
 253+# push @edits, "8|Edits<br>8: All time high for edit count, even slightly above May level.<br>" .
 254+# "&nbsp;&nbsp;&nbsp;&nbsp;Strong one-monthly dip in July due to World Cup Socker?." ;
 255+# push @active_editors, "9,10|(Very) Active Editors<br>" .
 256+# "9: After a <a href='charts/2010-08/Monthly-Active-Editors-Absolute-Linear.png'>6% drop in active Wikipedia editors</a> in June, and a further 2% drop in July,<br>" .
 257+# "&nbsp;&nbsp;&nbsp;&nbsp;trend is upwards again, with 2.5% increase in August.<br>" .
 258+# "&nbsp;&nbsp;&nbsp;&nbsp;Prospects for September are good, with +10% growth in page requests<br>" .
 259+# "&nbsp;&nbsp;&nbsp;&nbsp;(given strong correlation of 0.67 between page requests and active editors).<br>" .
 260+# "&nbsp;&nbsp;&nbsp;&nbsp;From a wider perspective drops were stil within normal bandwidth, largest drop was in <a href='charts/2010-08/Monthly-Active-Users-Since-Jan-2006.png'>June 2006</a>,<br>" .
 261+# "&nbsp;&nbsp;&nbsp;&nbsp;see also <a href='charts/2010-08/Monthly-Active-Users-Since-Jan-2008.png'>similar chart with trend line since June 2008</a>.<br>" .
 262+# "10: New: Editors on Commons are no longer included in overall editor total,<br>" .
 263+# "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; on the assumption that most of these also edit on one or more other projects.<br>" .
 264+# "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Detection of double counts between any projects and languages is planned for late 2010." ;
 265+# push @very_active_editors, "9,10|" ; #Very Active Editors<br>14: Ukrain +84% (61->112), Indonesian +180% (22->62) (during contest), Swedish -13% (141->122)." ;
 266+# }
 267+
 268+# if ($2010_07)
 269+# {
 270+# @visitors = qw ( 360,225,000 m 21.9 -5 %) ; # Unique Visitors by Region
 271+# @page_requests = qw (13,116,000,000 b 27.2 -6 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
 272+# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
 273+# @commons_files = qw ( 7,104,689 m 49.1 2.9 %) ; # Binaries per month - Absolute
 274+# @article_count = qw ( 34,198,285 m 29.9 2 %) ; # Article count (official) - Absolute
 275+# @new_articles = qw ( 7,642 k 4.2 -0.6 %) ; # New articles per day - Absolute
 276+# @edits = qw ( 10,734,940 m -5.5 -9.8 %) ; # Edits per month - Absolute
 277+# @new_editors = qw ( 16,661 k -20.8 -5.6 %) ; # New editors - Absolute
 278+# @active_editors = qw ( 90,554 k -5.9 -1.6 %) ; # Active editors - Absolute
 279+# @very_active_editors = qw ( 11,818 k -2.1 -1.8 %) ; # Very active editors - Absolute
 280+# @reach = qw ( 28.5 x 2.8 -1.7 %) ; # Reach Percentage by Region
 281+ # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
 282+# push @visitors, "1,2|Unique Visitors<br>1: 375M UV's beats last month's record with 4M or 1.1 % (matches overall internet growth).<br>" .
 283+# "2: Wikimedia projects reach 30.4 % of internet population, which is best reach for last year<br>" .
 284+# "&nbsp;&nbsp;&nbsp;&nbsp;(avg 28.3%, low 25.7% in July 09). Reach in Asia lags behind other regions (15.9%)" ;
 285+# push @page_requests, "1,2,3,4|Page Requests<br>" .
 286+# "1: Due to <a href='http://infodisiac.com/blog/2010/07/wikimedia-page-views-some-good-and-bad-news/'>server problems</a> counts from squid logs for December 2009 - March 2010 are too low,<br>" .
 287+# "&nbsp;&nbsp;&nbsp;&nbsp;estimated underreporting 10%-25%. Counts for April - July 2010 have been patched." .
 288+# ".<br>" .
 289+# "2: August : <a href='http://stats.wikimedia.org/EN/TablesPageViewsMonthlyMobile.htm'>Mobile traffic (401M)</a>: 3.0% of total traffic (13367M)<br>" .
 290+# "#&nbsp;&nbsp;&nbsp;&nbsp;Trend data for mobile will be added when more history is available.<br>" .
 291+# "#3: Many projects show peak traffic late 2009: see <a href='charts/2010-07/Page-Views-Per-Project-Indexed.png'>chart</a><br>" .
 292+# "#4: Page requests are now normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" ;
 293+# push @rank, "5|Site Rank<br>#5: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
 294+# push @commons_files, "6|Commons Files<br>#6: Tiff uploads increased <a href='charts/2010-07/Monthly-Binaries-Absolute-Log.png'>5-fold</a> in July 2010, <a href='charts/2010-07/Monthly-Binaries-Indexed.png'>25-fold</a> in a year." ;
 295+
 296+# push @article_count, "8,9|Article Count<br>8: Serbian Wikinews: 5k->36k in a year, compare English Wikinews: 15k->17k<br>" .
 297+# "9: Seven Wiktionaries in top 25 Wikimedia projects" ;
 298+# push @new_articles, "7|New Articles Per Day<br>" .
 299+# "#7: Peak in April and May by massive activity on Aromanian and Waray-Waray Wp's, each by single user.<br>" .
 300+# "#&nbsp;&nbsp;&nbsp;&nbsp;In May 20% of all new articles were created on these two small wikis (April 7%, June 11%)" ;
 301+# push @active_editors, "8,9|(Very) Active Editors<br>" .
 302+# "8: The <a href='charts/2010-07/Monthly-Active-Editors-Absolute-Linear.png'>6% drop in active editors</a> for all Wikipias in June was relatively large,<br>" .
 303+# "&nbsp;&nbsp;&nbsp;&nbsp;but from a <a href='charts/2010-07/Monthly-Active-Users-Since-Jan-2006.png'>wider perspective</a> still within normal bandwidth, largest drop was in June 2006.<br>" .
 304+# "&nbsp;&nbsp;&nbsp;&nbsp;There might be a seasonal component in fluctuations.<br>" .
 305+# "9: Bug fix: in earlier RC editions editors from Commons (6k active editors) were counted double.<br>" .
 306+# "&nbsp;&nbsp;&nbsp;&nbsp;This has been fixed for all months in this RC." ;
 307+# push @very_active_editors, "9|" ; #Very Active Editors<br>14: Ukrain +84% (61->112), Indonesian +180% (22->62) (during contest), Swedish -13% (141->122)." ;
 308+# }
 309+
 310+# if ($2010_06)
 311+# {
 312+# @visitors = qw ( 379,344,000 m 25.2 -2.5 %) ; # Unique Visitors by Region
 313+# @page_requests = qw (13,957,000,000 b 26.0 1.0 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
 314+# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
 315+# @commons_files = qw ( 6,910,267 m 50.1 2.5 %) ; # Binaries per month - Absolute
 316+# @article_count = qw ( 33,430,039 m 29.7 1.5 %) ; # Article count (official) - Absolute
 317+# @new_articles = qw ( 7,865 k 14.5 -16.2 %) ; # New articles per day - Absolute
 318+# @edits = qw ( 12,056,265 m 10.1 -1.6 %) ; # Edits per month - Absolute
 319+# @new_editors = qw ( 17,573 k -15.2 -10.6 %) ; # New editors - Absolute
 320+# @active_editors = qw ( 99,124 k -3.5 -4.4 %) ; # Active editors - Absolute
 321+# @very_active_editors = qw ( 13,042 k 0.7 -2.9 %) ; # Very active editors - Absolute
 322+# @reach = qw ( 30.2 x 3.5 -1.1 %) ; # Reach Percentage by Region
 323+# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
 324+# push @visitors, "1,2|Unique Visitors<br>1: 375M UV's beats last month's record with 4M or 1.1 % (matches overall internet growth).<br>" .
 325+# "2: Wikimedia projects reach 30.4 % of internet population, which is best reach for last year<br>" .
 326+# "&nbsp;&nbsp;&nbsp;&nbsp;(avg 28.3%, low 25.7% in July 09). Reach in Asia lags behind other regions (15.9%)" ;
 327+# push @page_requests, "1,2,3,4|Page Requests<br>" .
 328+# "1: Traffic volume for recent months had been underreported due to monitor capacity problems.<br>" .
 329+# "&nbsp;&nbsp;&nbsp;&nbsp;Counts from April 2010 and later " .
 330+# "<a href='http://infodisiac.com/blog/2010/07/wikimedia-page-views-some-good-and-bad-news/'>have been corrected</a>.<br>" .
 331+# "&nbsp;&nbsp;&nbsp;&nbsp;Data from Nov 2009 - Mar 2010 may still be too low.<br>" .
 332+# "2: Traffic to mobile site is now counted. <a href='http://stats.wikimedia.org/EN/TablesPageViewsMonthlyMobile.htm'>(June 208M:13957M=1.5% of total)</a><br>" .
 333+# "&nbsp;&nbsp;&nbsp;&nbsp;This is the first month, so no trend data yet. <a href='charts/2010-06/Page-Views-Breakdown-Mobile-Traffic.png'> " .
 334+# "Breakdown per language</a>:" .
 335+# "English:71.3%,<br>&nbsp;&nbsp;&nbsp;&nbsp; Japanese:8.6%, German:4.5%, French:3.9%, Russian:3.4%, Others:8.3%<br>" .
 336+# "3: <a href='charts/2010-06/Page-Views-Per-Project-Indexed.png'>New chart</a> for breakdown of traffic volume per project: many projects show peak traffic late 2009.<br>" .
 337+# "#4: Page requests are now normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" ;
 338+# push @rank, "5|Site Rank<br>#5: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
 339+# push @commons_files, "6|Commons Files<br>#6: Fastest riser (relatively speaking): ogg vorbis video, djvu (for scanned docs) also booming." ;
 340+# push @new_articles, "7|New Articles Per Day<br>7: Peak in April and May by massive activity on <a href='http://stats.wikimedia.org/EN/TablesWikipediaROA_RUP.htm'>Aromanian</a> and <a href='http://stats.wikimedia.org/EN/TablesWikipediaWAR.htm'>Waray-Waray</a> Wp's, each by single user.<br>" .
 341+# "&nbsp;&nbsp;&nbsp;&nbsp;In May 20% of all new articles were created on these two small wikis (April 7%, June 11%)" ;
 342+# push @edits, "9|Edits<br>9: For German,French and Polish Wikipedia dumps were not yet updated, reused data from previous month" ;
 343+# "Most Serbian Wikinews edits by (overactive?) weather bot that updates temp/wind speed every few seconds.<br>" .
 344+# "#13: Average monthly manual edits by registered users for all Wikipedia's combined, in millions<br>" .
 345+# "&nbsp;&nbsp;&nbsp;&nbsp;#2006 &rArr; 2010: &nbsp;&nbsp;7.7 &rArr; 9.9 &rArr; 11.5 &rArr; 12.4 &rArr; 12.7" ;
 346+# push @very_active_editors, "14|Very Active Editors<br>14: Ukrain +84% (61->112), Indonesian +180% (22->62) (during contest), Swedish -13% (141->122)." ;
 347+# }
 348+
 349+# if ($2010_05)
 350+# {
 351+# @visitors = qw ( 388,932,000 m 22.6 3.8 %) ; # Unique Visitors by Region
 352+# @page_requests = qw (11,250,000,000 b -1.0 -1.0 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
 353+# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
 354+# @commons_files = qw ( 6,765,082 m 51.9 3.1 %) ; # Binaries per month - Absolute
 355+# @article_count = qw ( 32,410,992 m 31.9 2.3 %) ; # Article count (official) - Absolute
 356+# @new_articles = qw ( 8,638 k 11.2 12.9 %) ; # New articles per day - Absolute
 357+# @edits = qw ( 12,119,403 m 11.6 0.0 %) ; # Edits per month - Absolute
 358+# @new_editors = qw ( 18,761 k -8.2 -8.1 %) ; # New editors - Absolute
 359+# @active_editors = qw ( 102,689 k 1.7 -1.8 %) ; # Active editors - Absolute
 360+# @very_active_editors = qw ( 13,124 k 3.4 -1.9 %) ; # Very active editors - Absolute
 361+# @reach = qw ( 31.3 x 3.0 0.9 %) ; # Reach Percentage by Region
 362+# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
 363+# push @visitors, "1,2|Unique Visitors<br>1: 375M UV's beats last month's record with 4M or 1.1 % (matches overall internet growth).<br>" .
 364+# "2: Wikimedia projects reach 30.4 % of internet population, which is best reach for last year<br>" .
 365+# "&nbsp;&nbsp;&nbsp;&nbsp;(avg 28.3%, low 25.7% in July 09). Reach in Asia lags behind other regions (15.9%)" ;
 366+# push @page_requests, "3,4|Page Requests<br>" .
 367+# "#3: Page requests are now normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" .
 368+# "4: Traffic to mobile site not yet included. <a href='http://stats.wikimedia.org/EN/TablesPageViewsMonthlyMobile.htm'>(June 154M:10700M=1.4% of total)</a><br>" .
 369+# "5: Page request trends on several projects are falling for 4th month, which deserves some further analysis" ;
 370+# push @rank, "6|Site Rank<br>#6: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
 371+# push @commons_files, "7|Commons Files<br>#8: Fastest riser (relatively speaking): ogg vorbis video, djvu (for scanned docs) also booming." ;
 372+# push @article_count, "8,9|Article Count<br>8: Serbian Wikinews: 5k->36k in a year, compare English Wikinews: 15k->17k<br>" .
 373+# "9: Seven Wiktionaries in top 25 Wikimedia projects" ;
 374+# push @new_articles, "10,11|New Articles Per Day<br>10: All wikinews project combined +240% (39->133 p/d), see below Serbian Wikinews<br>" .
 375+# "11:<a href='http://stats.wikimedia.org/EN/TablesWikipediaWAR.htm'>Waray-Waray Wikipedia</a> 2nd fastest grower with +610 mostly <a href='http://war.wikipedia.org/wiki/Obyce'>geo stubs</a> p/day by <a href='http://en.wikipedia.org/wiki/User:JinJian'>JinJian</a>" ;
 376+# push @edits, "12,13|Edits<br>12: 3 of 4 Wikinews monthly edits on Serbian Wikinews: 36k, English 5k, German/French 2k each<br>" .
 377+# "Most Serbian Wikinews edits by (overactive?) weather bot that updates temp/wind speed every few seconds.<br>" .
 378+# "#13: Average monthly manual edits by registered users for all Wikipedia's combined, in millions<br>" .
 379+# "&nbsp;&nbsp;&nbsp;&nbsp;#2006 &rArr; 2010: &nbsp;&nbsp;7.7 &rArr; 9.9 &rArr; 11.5 &rArr; 12.4 &rArr; 12.7" ;
 380+# push @very_active_editors, "14|Very Active Editors<br>14: Ukrain +84% (61->112), Indonesian +180% (22->62) (during contest), Swedish -13% (141->122)." ;
 381+# }
 382+
 383+
 384+# if ($2010_04)
 385+# {
 386+# @visitors = qw ( 374,846,000 m 17.1 1.1 %) ; # Unique Visitors by Region
 387+# @page_requests = qw (11,724,000,000 b +7.4 -0.1 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
 388+# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
 389+# @commons_files = qw ( 6,564,544 m 52.2 3.3 %) ; # Binaries per month - Absolute
 390+# @article_count = qw ( 32,410,992 m 31.9 2.3 %) ; # Article count (official) - Absolute
 391+# @new_articles = qw ( 8,638 k 11.2 12.9 %) ; # New articles per day - Absolute
 392+# @edits = qw ( 12,119,403 m 11.6 0.0 %) ; # Edits per month - Absolute
 393+# @new_editors = qw ( 18,761 k -8.2 -8.1 %) ; # New editors - Absolute
 394+# @active_editors = qw ( 102,689 k 1.7 -1.8 %) ; # Active editors - Absolute
 395+# @very_active_editors = qw ( 13,124 k 3.4 -1.9 %) ; # Very active editors - Absolute
 396+# @reach = qw ( 30.4 x 1.5 0.0 %) ; # Reach Percentage by Region
 397+# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
 398+# push @visitors, "1,2|Unique Visitors<br>1: 375M UV's beats last month's record with 4M or 1.1 % (matches overall internet growth).<br>" .
 399+# "2: Wikimedia projects reach 30.4 % of internet population, which is best reach for last year<br>" .
 400+# "&nbsp;&nbsp;&nbsp;&nbsp;(avg 28.3%, low 25.7% in July 09). Reach in Asia lags behind other regions (15.9%)" ;
 401+# push @page_requests, "3,4|Page Requests<br>" .
 402+# "#3: Page requests are now normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" .
 403+# "4: Traffic to mobile site not included. Expect this next month." ;
 404+# push @rank, "5|Site Rank<br>#5: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
 405+# push @commons_files, "6|Commons Files<br>6: Fastest relative growth: tiff images (723%), ogg vorbis video (446%)." ;
 406+# push @article_count, "7,8|Article Count<br>7: Serbian Wikinews: 5k->36k in a year, compare English Wikinews: 15k->17k<br>" .
 407+# "8: Seven Wiktionaries in top 25 Wikimedia projects" ;
 408+# push @new_articles, "9,10|New Articles Per Day<br>9: All wikinews project combined +240% (39->133 p/d), see below Serbian Wikinews<br>" .
 409+# "10:<a href='http://stats.wikimedia.org/EN/TablesWikipediaWAR.htm'>Waray-Waray Wikipedia</a> 2nd fastest grower with +610 mostly <a href='http://war.wikipedia.org/wiki/Obyce'>geo stubs</a> p/day by <a href='http://en.wikipedia.org/wiki/User:JinJian'>JinJian</a>" ;
 410+# push @edits, "11,12|Edits<br>11: 3 of 4 Wikinews monthly edits on Serbian Wikinews: 36k, English 5k, German/French 2k each<br>" .
 411+# "All Serbian Wikinews edits by weather bot that updates temp/wind speed every few seconds.<br>" .
 412+# "30 June 2010: report filed for <a href='http://en.wikinews.org/wiki/Wikinews:Admin_action_alerts'>runaway bot</a><br>" .
 413+# "#12: Average monthly manual edits by registered users for all Wikipedia's combined, in millions<br>" .
 414+# "&nbsp;&nbsp;&nbsp;&nbsp;#2006 &rArr; 2010: &nbsp;&nbsp;7.7 &rArr; 9.9 &rArr; 11.5 &rArr; 12.4 &rArr; 12.7" ;
 415+# push @very_active_editors, "13|Very Active Editors<br>13: Ukrain +84% (61->112), Indonesian +180% (22->62) (during contest), Swedish -13% (141->122)." ;
 416+# }
 417+
 418+# if ($2010_03)
 419+# {
 420+# @visitors = qw ( 370,744,000 m 13.3 7.4 %) ; # Unique Visitors by Region
 421+# @page_requests = qw (11,730,000,000 b +0.3 0.0 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
 422+# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
 423+# @commons_files = qw ( 6,209,569 m 58.3 2.6 %) ; # Binaries per month - Absolute
 424+# @article_count = qw ( 30,349,860 m 34.0 1.9 %) ; # Article count (official) - Absolute
 425+# @new_articles = qw ( 7,567 k -5.7 -0.4 %) ; # New articles per day - Absolute
 426+# @edits = qw ( 11,462,106 m 7.1 -3.2 %) ; # Edits per month - Absolute
 427+# @new_editors = qw ( 18,362 k -11.5 -10.8 %) ; # New editors - Absolute
 428+# @active_editors = qw ( 101,730 k 1.5 -4.6 %) ; # Active editors - Absolute
 429+# @very_active_editors = qw ( 12,983 k 5.6 -5.4 %) ; # Very active editors - Absolute
 430+# @reach = qw ( 30.4 x 0.5 1.7 %) ; # Reach Percentage by Region
 431+# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
 432+# push @visitors, "1,2|Unique Visitors<br>1: March has 3 more (11%) more days than February<br>" .
 433+# "&nbsp;&nbsp;&nbsp;&nbsp;This will explain much of apparently large monthly growth in visitors<br>" .
 434+# "2: All regions same of more unique visitors than year ago. North Am. +25%, Latin Am. + 27%" ;
 435+# push @page_requests, "3|Page Requests<br>" .
 436+# "3: Page requests are now normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" .
 437+# "&nbsp;&nbsp;&nbsp;&nbsp;This way monthly changes are more meaningful<br>" .
 438+# "&nbsp;&nbsp;&nbsp;&nbsp;Difference with not normalized data is mainly visible in Jan&rArr;Feb and Feb&rArr;Mar" ;
 439+# push @rank, "4|Site Rank<br>#4: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
 440+# push @commons_files, "5|Commons Files<br>#5: Fastest riser (relatively speaking): ogg vorbis video, djvu (for scanned docs) also booming." ;
 441+# push @article_count, "6|Article Count<br>#6: 60% growth in Commons files in one year, English and French wiktionaries +36% through bots." ;
 442+# push @edits, "7|Edits<br>#7: Average monthly manual edits by registered users for all Wikipedia's combined, in millions<br>" .
 443+# "&nbsp;&nbsp;&nbsp;&nbsp;#2006 &rArr; 2010: &nbsp;&nbsp;7.7 &rArr; 9.9 &rArr; 11.5 &rArr; 12.4 &rArr; 12.7" ;
 444+# push @new_editors, "9|New Editors<br>#9: Most mature Wikipedia's see least growth in editors. Largest influx: Russian / Commons<p>" .
 445+# push @active_editors, "10|Active Editors<br>10: Russian editor base still growing steeply: +30% editors in one year." ;
 446+# }
 447+
 448+# if ($2010_02)
 449+# {
 450+# @visitors = qw ( 345,218,000 m 14.8 -5.3 %) ; # Unique Visitors by Region
 451+# @page_requests = qw (11,081,000,000 b +5.8 0.0 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
 452+# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
 453+# @commons_files = qw ( 6,209,569 m 58.3 2.6 %) ; # Binaries per month - Absolute
 454+# @article_count = qw ( 30,349,860 m 34.0 1.9 %) ; # Article count (official) - Absolute
 455+# @new_articles = qw ( 7,567 k -5.7 -0.4 %) ; # New articles per day - Absolute
 456+# @edits = qw ( 11,462,106 m 7.1 -3.2 %) ; # Edits per month - Absolute
 457+# @new_editors = qw ( 18,362 k -11.5 -10.8 %) ; # New editors - Absolute
 458+# @active_editors = qw ( 101,730 k 1.5 -4.6 %) ; # Active editors - Absolute
 459+# @very_active_editors = qw ( 12,983 k 5.6 -5.4 %) ; # Very active editors - Absolute
 460+# @reach = qw ( 28.7 x 0.8 -0.8 %) ; # Reach Percentage by Region
 461+# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
 462+# push @visitors, "1|Unique Visitors<br>1: comScore reassesses online population in their target segments twice a year (Feb & Aug)<br>" .
 463+# "&nbsp;&nbsp;&nbsp;&nbsp;This time estimate for Indonesia, Philippines and Vietnam was lowered by -54%,<br>" .
 464+# "&nbsp;&nbsp;&nbsp;&nbsp;resulting in a worldwide reassesment of online population of -4%" ;
 465+# push @page_requests, "2,3|Page Requests<br>" .
 466+# "2:Corrected for length of months Jan -> Feb increase was actually +11.0% !<br>" .
 467+# "3:Russia maintains its steep growth: +57% in last 12 months, +137% in preceding 12 months<br>" .
 468+# "&nbsp;&nbsp;&nbsp;&nbsp;Indonesia is 2nd, and speeding up: +46% in last 12 months, +34% before that<br>" .
 469+# "#&nbsp;&nbsp;&nbsp;&nbsp;German decline (-10%) is still atypical (caused by spike year ago after court decision)" ;
 470+# push @rank, "5|Site Rank<br>#5: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
 471+# push @commons_files, "6|Commons Files<br>#6: Fastest riser (relatively speaking): ogg vorbis video, djvu (for scanned docs) also booming." ;
 472+# push @article_count, "7|Article Count<br>#7: 60% growth in Commons files in one year, English and French wiktionaries +36% through bots." ;
 473+# push @edits, "8|Edits<br>8: Average monthly manual edits by registered users for all Wikipedia's combined, in millions<br>" .
 474+# "&nbsp;&nbsp;&nbsp;&nbsp;2006 &rArr; 2010: &nbsp;&nbsp;7.7 &rArr; 9.9 &rArr; 11.5 &rArr; 12.4 &rArr; 12.7" ;
 475+# }
 476+
 477+# if ($2009_??)
 478+# {
 479+# @visitors = qw ( 364,719,000 m 25.8 5.1 %) ; # Unique Visitors by Region
 480+# @page_requests = qw (11,054,000,000 b -3.1 6.4 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
 481+# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
 482+# @commons_files = qw ( 6,058,601 m 59.5 6.5 %) ; # Binaries per month - Absolute
 483+# @article_count = qw ( 29,742,993 m 34.7 2.4 %) ; # Article count (official) - Absolute
 484+# @new_articles = qw ( 7,626 k -1.1 3.4 %) ; # New articles per day - Absolute
 485+# @edits = qw ( 12,251,152 m 4.8 9.0 %) ; # Edits per month - Absolute
 486+# @new_editors = qw ( 19,279 k -12.4 5.6 %) ; # New editors - Absolute
 487+# @active_editors = qw ( 98,597 k -1.4 5.0 %) ; # Active editors - Absolute
 488+# @very_active_editors = qw ( 12,488 k -1.1 6.3 %) ; # Very active editors - Absolute
 489+# @reach = qw ( 29.0 x 1.0 1.0 %) ; # Reach Percentage by Region
 490+# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
 491+# push @visitors, "1,2|Unique Visitors<br>#1: Yearly growth in UV's (26%) exceeds growth of total internet (21%).<br>" .
 492+# "2: Large monthly shifts in UV/Reach in 3rd world explained by comScore as seasonal influences:<br>&nbsp;&nbsp;&nbsp;&nbsp;school vacations, and large festivals, religious (e.g. Ramadan) or otherwise (e.g. Carnival)." ;
 493+# push @page_requests, "3,4|Page Requests<br>3:<b> Trends measured by comScore and internal measurements diverge somewhat.</b><br>&nbsp;&nbsp;&nbsp;&nbsp;<b>Possible causes are under investigation.</b><p>" .
 494+# "4:Fastest rising large Wikipedia's in last 12 months:<br>" .
 495+# "&nbsp;&nbsp;&nbsp;&nbsp;Vietnamese (87%), Ukranian (65%), Russian (45%), Indonesian (39%), Chinese (28%), Thai (23%)<br>" .
 496+# "&nbsp;&nbsp;&nbsp;&nbsp;German decline (-32%) is atypical (caused by short massive spike year ago due after court decision)" ;
 497+# push @rank, "5|Site Rank<br>#5: 5th position will be stable for long time: differences with 4th and 6th ranked properties are considerable." ;
 498+# push @commons_files, "6|Commons Files<br>#6: Fastest riser (relatively speaking): ogg vorbis video, djvu (for scanned docs) also booming." ;
 499+# push @article_count, "7|Article Count<br>#7: 60% growth in Commons files in one year. Wiktionaries exploding through bots." ;
 500+# push @edits, "8|Edits<br>#8: <a href='http://stats.wikimedia.org/EN/TablesWikipediaZZ.htm'>#Monthly edits for all Wikipedia's combined</a># remarkably stable between 10 and 12 million<br>#&nbsp;&nbsp;&nbsp;&nbsp;for 3 years now (as is the case for active and very active editors)" ;
 501+# push @new_editors, "9|New Editors<br>#9: Most mature Wikipedia's see least growth in editors. Largest influx: Russian / Commons<p>" .
 502+# "Experiment: logarithmic chart now uses two scales for widely divergent values.<br>This helps to remove clutter, but may need some getting used to." ;
 503+
 504+# push @active_editors, "10|Active Editors<br>10: Russian editor base still growing steeply: +30% editors in one year." ;
 505+# }
 506+
 507+# if ($2009_??)
 508+# {
 509+# @visitors = qw ( 347,019,000 m 27.1 0.4 %) ; # Unique Visitors by Region
 510+# @page_requests = qw (10,389,000,000 b 0.0 -9.2 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma)
 511+# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
 512+# @commons_files = qw ( 5,695,283 m 55.1 2.6 %) ; # Binaries per month - Absolute
 513+# @article_count = qw ( 29,016,248 m 34.3 2.1 %) ; # Article count (official) - Absolute
 514+# @new_articles = qw ( 7,457 k 7.7 2.6 %) ; # New articles per day - Absolute
 515+# @edits = qw ( 10,791,575 m 0.6 0.4 %) ; # Edits per month - Absolute
 516+# @new_editors = qw ( 18,597 k -6.3 -2.4 %) ; # New editors - Absolute
 517+# @active_editors = qw ( 95,849 k 3.8 -0.4 %) ; # Active editors - Absolute
 518+# @very_active_editors = qw ( 11,764 k 0.4 -0.5 %) ; # Very active editors - Absolute
 519+# @reach = qw ( 28.7 x 1.6 -0.0 %) ; # Reach Percentage by Region
 520+# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
 521+# push @visitors, "1,2|Unique Visitors<br>1: Yearly growth in UV's (27%) exceeds growth of total internet (21%).<br>" .
 522+# "2: Conversation with comScore on huge monthly shifts in UV/Reach in 3rd world continues." ;
 523+# push @page_requests, "3|Page Requests<br>3: Same as last year: dip in page requests (but spike in image requests)." ;
 524+# push @rank, "4|Site Rank<br>4: 5th position will be stable for long time: 4th has 35% more UV's, 6th 23% less." ;
 525+# push @commons_files, "5|Commons Files<br>5: Fastest riser (relatively speaking): ogg vorbis video, djvu (for scanned docs) also booming." ;
 526+# push @article_count, "6|Article Count<br>6: 60% growth in Commons files in one year. Wiktionaries exploding through bots." ;
 527+# push @new_articles, "7|New Articles<br>7: Russian consistently fast riser, Ukranian growth 40% of previous months" ;
 528+# push @edits, "8|Edits<br>8: <a href='http://stats.wikimedia.org/EN/TablesWikipediaZZ.htm'>Monthly edits for all Wikipedia's combined</a> remarkably stable between 10 and 12 million<br>for 3 years now (as is the case for active and very active editors)" ;
 529+# push @new_editors, "9|New Editors<br>9: Most mature Wikipedia's see least growth in editors. Largest influx: Russian / Commons" ;
 530+# }
 531+
 532+# if ($2009_10)
 533+# {
 534+# @visitors = qw ( 345,805,000 m 23.1 0.4 %) ; # Unique Visitors by Region
 535+# @page_requests = qw (11,257,000,000 b 7.7 -2.8 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma)
 536+# @rank = qw ( 5th x -1 0 th) ; # Web Properties - Unique Visitors
 537+# @commons_files = qw ( 5,558,644 m 59.7 3.4 %) ; # Binaries per month - Absolute
 538+# @article_count = qw ( 28,506,011 m 35.4 2.5 %) ; # Article count (official) - Absolute
 539+# @new_articles = qw ( 7,357 k 2.1 -6.1 %) ; # New articles per day - Absolute
 540+# @edits = qw ( 10,772,957 m 2.8 -3.4 %) ; # Edits per month - Absolute
 541+# @new_editors = qw ( 18,779 k -5.2 -4.5 %) ; # New editors - Absolute
 542+# @active_editors = qw ( 96,521 k 4.0 0.1 %) ; # Active editors - Absolute
 543+# @very_active_editors = qw ( 11,726 k 2.7 -3.4 %) ; # Very active editors - Absolute
 544+
 545+# @reach = qw ( 28.7 x 0.5 -0.3 %) ; # Reach Percentage by Region
 546+ # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
 547+# push @visitors, "1|1: asked comScore to explain huge shifts in UV/Reach in Middle East-Africa." ;
 548+# push @page_requests, "2|2: Capacity problems may have played a role. New servers ordered." ;
 549+# push @new_articles, "2,3|3: Ukranian Wikipedia fastest riser (compare edits for Russian)" ;
 550+# push @edits, "4|4: Russian Wikipedia fastest riser (compare new articles for Ukrain)" ;
 551+# push @very_active_editors, "2" ;
 552+# }
 553+
 554+# if ($2009_10)
 555+# {
 556+# @new_editors = qw ( 19,002 k -8.9 3.2 %) ;
 557+# @active_editors = qw ( 97,132 k 1.9 3.4 %) ;
 558+# @very_active_editors = qw ( 12,172 k 2.8 1.2 %) ;
 559+# @article_count = qw ( 27,852,471 m 35.6 2.8 %) ;
 560+# @new_articles = qw ( 8,050 k 11.2 5.9 %) ;
 561+# @edits = qw ( 11,188,080 m -1.8 1.7 %) ;
 562+# @commons_files = qw ( 5,539,645 m 60.3 5.5 %) ;
 563+# @rank = qw ( 5th x -1 0 th) ;
 564+# @visitors = qw ( 344,563,000 m 24.3 5.7 %) ;
 565+# @reach = qw ( 29.0 x 0.8 1.3 %) ;
 566+# @page_requests = qw (11,586,000,000 b 8.8 1.9 %) ;
 567+# }
 568+
 569+# if ($2009_09)
 570+# {
 571+# @new_editors = qw ( 17,792 k -8.7 -9.6 %) ;
 572+# @active_editors = qw ( 94,565 k 2.3 -2.5 %) ;
 573+# @very_active_editors = qw ( 12,069 k 3.6 -2.5 %) ;
 574+# @article_count = qw ( 27,120,974 m 36.6 2.0 %) ;
 575+# @new_articles = qw ( 12,907 k -0.3 -11.4 %) ;
 576+# @edits = qw ( 12,578,009 m 8.8 -9.0 %) ;
 577+# @commons_files = qw ( 5,115,042 m 57.4 2.7 %) ;
 578+# @rank = qw ( 5th x 0 0 th) ;
 579+# @visitors = qw ( 325,998,000 m 19.8 6.0 %) ;
 580+# @reach = qw ( 27.6 x -1.4 4.5 %) ;
 581+# @page_requests = qw (11,372,000,000 b 11.7 5.1 %) ;
 582+# }
 583+
 584+# if ($2009_08)
 585+# {
 586+# @new_editors = qw ( 17,998 k -9.4 -6.2 %) ;
 587+# @active_editors = qw ( 91,359 k 1.1 0.8 %) ;
 588+# @very_active_editors = qw ( 11,568 k 0.3 3.0 %) ;
 589+# @article_count = qw ( 21,143,943 m 29.9 2.0 %) ;
 590+# @new_articles = qw ( 13,174 k 8.1 11.4 %) ;
 591+# @edits = qw ( 12,807,952 m 8.4 4.8 %) ;
 592+# @commons_files = qw ( 4,996,023 m 60.2 3.6 %) ;
 593+# @rank = qw ( 5th x 0 0 th) ;
 594+# @visitors = qw ( 307,641,000 m 23.8 4.1 %) ;
 595+# @reach = qw ( 26.4 x 1.9 2.7 %) ;
 596+# @page_requests = qw (10,817,000,000 b 15.3 1.5 %) ;
 597+# }
 598+
 599+# if ($2009_07)
 600+# {
 601+# @new_editors = qw ( 18,916 k -8.5 -1 %) ;
 602+# @active_editors = qw ( 90,659 k -0.3 -0.6 %) ;
 603+# @very_active_editors = qw ( 11,242 k -2.4 -0.7 %) ;
 604+# @article_count = qw ( 20,768,108 m 30.2 0.8 %) ;
 605+# @new_articles = qw ( 11,888 k -18.9 -30.3 %) ;
 606+# @edits = qw ( 12,219,008 m 6.3 0.7 %) ;
 607+# @commons_files = qw ( 4,831,659 m 61.1 3.7 %) ;
 608+# @rank = qw ( 5th x 0 0 th) ;
 609+# @visitors = qw ( 295,848,000 m 20.9 -2.5 %) ;
 610+# @reach = qw ( 25.7 x 0 -3.7 %) ;
 611+# @page_requests = qw (10,700,000,000 b 12.9 -3.0 %) ;
 612+# }
 613+
 614+ $synopsis = "Y: " . substr ($p_month_name,0,3) . ",$p_year_prev->$p_year k=thousand m=million b=billion\n" ;
 615+ $synopsis .= "M: $p_year," . substr ($p_month_name_prev,0,3) . "->" . substr ($p_month_name,0,3) . " M=monthly D=daily T=Total\n\n" ;
 616+
 617+ $synopsis .= &FormatSynopsisText ("M Unique Visitors, All Projects", "", @visitors) ;
 618+ $synopsis .= &FormatSynopsisText ("M Page Views, All Projects", "", @page_requests) ;
 619+ $synopsis .= &FormatSynopsisText (" Site Rank", "", @rank) ;
 620+ $synopsis .= &FormatSynopsisText ("T Binary Files", "", @commons_files) ;
 621+ $synopsis .= &FormatSynopsisText ("M Wikipedia Article Count", "", @article_count) ;
 622+ $synopsis .= &FormatSynopsisText ("D New Wikipedia Articles", "", @new_articles) ;
 623+ $synopsis .= &FormatSynopsisText ("M Wikipedia Edits per Month", "", @edits) ;
 624+ $synopsis .= &FormatSynopsisText ("M New Wikipedia Editors", "", @new_editors) ;
 625+ $synopsis .= &FormatSynopsisText ("M Active Wikipedia Editors", "", @active_editors) ;
 626+ $synopsis .= &FormatSynopsisText ("M Very Active Wikipedia Ed.", "", @very_active_editors) ;
 627+
 628+ print "\n\n$synopsis" ;
 629+ print "\n"."="x80 . "\n\n" ;
 630+
 631+ @visitors_ = @visitors ;
 632+ @page_requests_ = @page_requests ;
 633+ @rank_ = @rank ;
 634+ @commons_files_ = @commons_files ;
 635+ @article_count_ = @article_count ;
 636+ @new_articles_ = @new_articles ;
 637+ @edits_ = @edits ;
 638+ @new_editors_ = @new_editors ;
 639+ @active_editors_ = @active_editors ;
 640+ @very_active_editors_ = @very_active_editors ;
 641+ @reach_ = @reach ;
 642+
 643+ $visitors [0] =~ s/,//g ;
 644+ $new_editors [0] =~ s/,//g ;
 645+ $active_editors [0] =~ s/,//g ;
 646+ $very_active_editors [0] =~ s/,//g ;
 647+ $article_count [0] =~ s/,//g ;
 648+ $new_articles [0] =~ s/,//g ;
 649+ $edits [0] =~ s/,//g ;
 650+ $commons_files [0] =~ s/,//g ;
 651+ $rank [0] =~ s/,//g ;
 652+ $reach [0] =~ s/,//g ;
 653+ $page_requests [0] =~ s/,//g ;
 654+
 655+ $visitors [0] = sprintf ("%.0f",$visitors [0]/1000000) ;
 656+ $article_count [0] = sprintf ("%.1f",$article_count [0]/1000000) ;
 657+ $edits [0] = sprintf ("%.1f",$edits [0]/1000000) ;
 658+ $commons_files [0] = sprintf ("%.1f",$commons_files [0]/1000000) ;
 659+ $page_requests [0] = sprintf ("%.1f",$page_requests [0]/1000000000) ;
 660+
 661+ $new_editors [0] =~ s/(\d\d\d)$/,$1/ ;
 662+ $active_editors [0] =~ s/(\d\d\d)$/,$1/ ;
 663+ $very_active_editors [0] =~ s/(\d\d\d)$/,$1/ ;
 664+ $new_articles [0] =~ s/(\d\d\d)$/,$1/ ;
 665+
 666+ $visitors [2] = sprintf ("%.1f", $visitors [2]) ;
 667+ $visitors [3] = sprintf ("%.1f", $visitors [3]) ;
 668+ $visitors [5] =~ ($visitors [2] >= 0) ? 'A' : 'E' ;
 669+ $visitors [6] =~ ($visitors [3] >= 0) ? 'A' : 'E' ;
 670+
 671+ $page_requests [2] = sprintf ("%.1f", $page_requests [2]) ;
 672+ $page_requests [3] = sprintf ("%.1f", $page_requests [3]) ;
 673+ $new_editors [2] = sprintf ("%.1f", $new_editors [2]) ;
 674+ $new_editors [3] = sprintf ("%.1f", $new_editors [3]) ;
 675+# $active_editors [2] = sprintf ("%.1f", $active_editors [2]) ;
 676+# $active_editors [3] = sprintf ("%.1f", $active_editors [3]) ;
 677+ $very_active_editors [2] = sprintf ("%.1f", $very_active_editors [2]) ;
 678+ $very_active_editors [3] = sprintf ("%.1f", $very_active_editors [3]) ;
 679+# $article_count [2] = sprintf ("%.1f", $article_count [2]) ;
 680+# $article_count [3] = sprintf ("%.1f", $article_count [3]) ;
 681+ $new_articles [2] = sprintf ("%.1f", $new_articles [2]) ;
 682+ $new_articles [3] = sprintf ("%.1f", $new_articles [3]) ;
 683+ $edits [2] = sprintf ("%.1f", $edits [2]) ;
 684+ $edits [3] = sprintf ("%.1f", $edits [3]) ;
 685+ $commons_files [2] = sprintf ("%.1f", $commons_files [2]) ;
 686+ $commons_files [3] = sprintf ("%.1f", $commons_files [3]) ;
 687+ $rank [2] = sprintf ("%.0f", $rank [2]) ;
 688+ $rank [3] = sprintf ("%.0f", $rank [3]) ;
 689+ $reach [2] = sprintf ("%.1f", $reach [2]) ;
 690+ $reach [3] = sprintf ("%.1f", $reach [3]) ;
 691+ $page_requests [2] = sprintf ("%.1f", $page_requests [2]) ;
 692+ $page_requests [3] = sprintf ("%.1f", $page_requests [3]) ;
 693+
 694+ for ($i = 0 ; $i <= 3 ; $i++)
 695+ {
 696+ $visitors [$i] = '...' if $visitors_ [$i] eq '?' ;
 697+ $page_requests [$i] = '...' if $page_requests_ [$i] eq '?' ;
 698+ $rank [$i] = '...' if $rank_ [$i] eq '?' ;
 699+ $commons_files [$i] = '...' if $commons_files_ [$i] eq '?' ;
 700+ $article_count [$i] = '...' if $article_count_ [$i] eq '?' ;
 701+ $new_articles [$i] = '...' if $new_articles_ [$i] eq '?' ;
 702+ $edits [$i] = '...' if $edits_ [$i] eq '?' ;
 703+ $new_editors [$i] = '...' if $new_editors_ [$i] eq '?' ;
 704+ $active_editors [$i] = '...' if $active_editors_ [$i] eq '?' ;
 705+ $very_active_editors [$i] = '...' if $very_active_editors_ [$i] eq '?' ;
 706+ $reach [$i] = '...' if $reach_ [$i] eq '?' ;
 707+ }
 708+
 709+ $path_input = "W:/@ Report Card/Input/" ;
 710+ $path_public = "W:/@ Report Card/Public/" ;
 711+ $path_private = "W:/@ Report Card/Extended/" ; # few more charts with top 10 web properties based on data from comScore (slightly confidential)
 712+
 713+ &WriteReports ($path_input, $path_public, $public) ;
 714+ &WriteReports ($path_input, $path_private, $private) ;
 715+
 716+ print "\nReady\n\n" ;
 717+ exit ;
 718+
 719+sub WriteReports
 720+{
 721+ $path_in = shift ;
 722+ $path_out = shift ;
 723+ $target_audience = shift ;
 724+
 725+ &WriteSynopsis ($path_out) ;
 726+
 727+ open TEMPLATE, '<', "RT_yyyy_mm.html" ;
 728+ open DETAILS, '>', "$path_out/RC_${p_year}_${p_month_d2}_detailed.html" ;
 729+ open SUMMARY, '>', "$path_out/RC_${p_year}_${p_month_d2}_summary.html" ;
 730+ open COLUMNS, '>', "$path_out/RC_${p_year}_${p_month_d2}_columns.html" ;
 731+
 732+
 733+ $write_details = $true ;
 734+ $write_summary = $true ;
 735+ $write_columns = $true ;
 736+
 737+ $write_public = $true ;
 738+ $write_private = $true ;
 739+
 740+ $iscomment = $false ;
 741+
 742+ while ($line = <TEMPLATE>)
 743+ {
 744+ chomp $line ;
 745+
 746+ $line =~ s/<!--.*?-->// ;
 747+# if ($line =~ /<!--/)
 748+# {
 749+# $iscomment = $true ;
 750+# $line =~ s/<!--.*$// ;
 751+# }
 752+# if ($line =~ /-->/)
 753+# {
 754+# $iscomment = $false ;
 755+# $line =~ s/^.*?-->// ;
 756+# }
 757+# if ($iscomment)
 758+# { $line = "<!-- {{$line}} -->" ; }
 759+
 760+ if ($line =~ /\{\{yyyy\}\}_\{\{mm[+-]1\}\}/)
 761+ {
 762+ if ($p_month == 1)
 763+ { $line =~ s/\{\{yyyy\}\}_\{\{mm\-1\}\}/{{yyyy-1}}_{{mm-1}}/ ; } # Q&D temp fix
 764+ if ($p_month == 12)
 765+ { $line =~ s/\{\{yyyy\}\}_\{\{mm\+1\}\}/{{yyyy+1}}_{{mm+1}}/ ; } # Q&D temp fix
 766+ }
 767+
 768+ # $no_upd = "<font color=#800000>*<\/font>" ;
 769+
 770+ if ($true) # test ?
 771+ {
 772+ # $no_upd = "&nbsp;&nbsp;<small><small><font color=#FF0000><b>chart could not be updated for current month</b></font></small></small>" ;
 773+ $line =~ s/H2 (UNIQUE VISITORS)/A[$1] H2 {${visitors [0]} million|Unique Visitors, All Projects}/ ;
 774+ $line =~ s/H2 (PAGE REQUESTS)/A[$1] H2 {${page_requests[0]} billion|Page Requests, All Projects}/ ;
 775+ $line =~ s/H2 (WEB PROPERTIES)/A[$1] H2 {${rank[0]} in rank|Web Properties - Unique Visitors}/ ;
 776+ $line =~ s/H2 (COMMONS FILES)/A[$1] H2 {${commons_files[0]} million|Binary Files $no_upd}/ ;
 777+ $line =~ s/H2 (ARTICLE COUNT)/A[$1] H2 {${article_count[0]} million|Wikipedia Articles, Comparison with Other Projects $no_upd}/ ;
 778+ $line =~ s/H2 (ARTICLES PER DAY)/A[$1] H2 {${new_articles[0]}|New Wikipedia Articles Per Day $no_upd}/ ;
 779+ $line =~ s/H2 (EDITS PER MONTH)/A[$1] H2 {${edits[0]} million|Wikipedia Edits Per Month $no_upd}/ ;
 780+ $line =~ s/H2 (NEW EDITORS PER MONTH)/A[$1] H2 {${new_editors[0]}|New Wikipedia Editors Per Month $no_upd}/ ;
 781+ $line =~ s/H2 (ACTIVE EDITORS)/A[$1] H2 {${active_editors[0]}|Active Wikipedia Editors (5+ edits per month) $no_upd}/ ;
 782+ $line =~ s/H2 (VERY ACTIVE EDITORS)/A[$1] H2 {${very_active_editors[0]}|Very Active Wikipedia Editors (100+ edits per month) $no_upd}/ ;
 783+
 784+ $line =~ s/TRENDS UNIQUE VISITORS/TRENDS {$trend_one_year|${visitors[2]}%}{$trend_one_month|${visitors[3]}%}/ ;
 785+ $line =~ s/TRENDS PAGE REQUESTS/TRENDS {$trend_one_year|${page_requests[2]}%}{$trend_one_month|${page_requests[3]}%}/ ;
 786+ $line =~ s/TRENDS WEB PROPERTIES/TRENDS {$trend_one_year|${rank[2]}}{$trend_one_month|${rank[3]}}/ ;
 787+ $line =~ s/TRENDS COMMONS FILES/TRENDS {$trend_one_year|${commons_files[2]}%}{$trend_one_month|${commons_files[3]}%}/ ;
 788+ $line =~ s/TRENDS ARTICLE COUNT/TRENDS {$trend_one_year|${article_count[2]}%}{$trend_one_month|${article_count[3]}%}/ ;
 789+ $line =~ s/TRENDS ARTICLES PER DAY/TRENDS {$trend_one_year|${new_articles[2]}%}{$trend_one_month|${new_articles[3]}%}/ ;
 790+ $line =~ s/TRENDS EDITS PER MONTH/TRENDS {$trend_one_year|${edits[2]}%}{$trend_one_month|${edits[3]}%}/ ;
 791+ $line =~ s/TRENDS NEW EDITORS PER MONTH/TRENDS {$trend_one_year|${new_editors[2]}%}{$trend_one_month|${new_editors[3]}%}/ ;
 792+ $line =~ s/TRENDS ACTIVE EDITORS/TRENDS {$trend_one_year|${active_editors[2]}%}{$trend_one_month|${active_editors[3]}%}/ ;
 793+ $line =~ s/TRENDS VERY ACTIVE EDITORS/TRENDS {$trend_one_year|${very_active_editors[2]}%}{$trend_one_month|${very_active_editors[3]}%}/ ;
 794+
 795+ $line =~ s/{{yyyy}}/$p_year/g ;
 796+ $line =~ s/{{yyyy\-1}}/$p_year_prev/g ;
 797+ $line =~ s/{{yyyy\+1}}/$p_year_next/g ;
 798+ $line =~ s/{{yyyy\+m2}}/$p_year_plus_m2/g ;
 799+ $line =~ s/{{month}}/$p_month_name/g ;
 800+ $line =~ s/{{month\-1}}/$p_month_name_prev/g ;
 801+ $line =~ s/{{month\+1}}/$p_month_name_next/g ;
 802+ $line =~ s/{{month\+2}}/$p_month_name_next2/g ;
 803+
 804+ $line =~ s/{{y}}/$p_year_short/g ;
 805+ $line =~ s/{{y\-1}}/$p_year_prev_short/g ;
 806+ $line =~ s/{{yy}}/$p_year_short_d2/g ;
 807+ $line =~ s/{{yy\-1}}/$p_year_prev_short_d2/g ;
 808+
 809+ $line =~ s/{{m}}/$p_month/g ;
 810+ $line =~ s/{{m\-1}}/$p_month_prev/g ;
 811+ $line =~ s/{{mm}}/$p_month_d2/g ;
 812+ $line =~ s/{{mm-1}}/$p_month_prev_d2/g ;
 813+ $line =~ s/{{mm\+1}}/$p_month_next_d2/g ;
 814+
 815+ $line =~ s/{{\(mm\/yy\)-1}}/$p_year_month_m1/g ;
 816+ }
 817+ else
 818+ {
 819+ $line =~ s/{{yyyy}}/[[yyyy]]/g ;
 820+ $line =~ s/{{yyyy-1}}/[[yyyy-1]]/g ;
 821+ $line =~ s/{{yyyy\+m2}}/[[yyyy\+m2]]/g ;
 822+ $line =~ s/{{month}}/[[month]]/g ;
 823+ $line =~ s/{{month-1}}/[[month-1]]/g ;
 824+ $line =~ s/{{month\+1}}/[[month\+1]]/g ;
 825+ $line =~ s/{{month\+2}}/[[month\+2]]/g ;
 826+
 827+ $line =~ s/{{y}}/y/g ;
 828+ $line =~ s/{{y-1}}/y-1/g ;
 829+ $line =~ s/{{m}}/m/g ;
 830+ $line =~ s/{{m-1}}/m-1/g ;
 831+ $line =~ s/{{mm}}/mm/g ;
 832+ $line =~ s/{{mm-1}}/mm-1/g ;
 833+ $line =~ s/{{mm\+}}/mm+1/g ;
 834+
 835+ $line =~ s/{{\(mm\/yy\)-1}}/(mm\/yy)-1/g ;
 836+ }
 837+
 838+ if ($line =~ /<!==\s*COMMENT\s*\{[^\}]*\}\s*==>/)
 839+ {
 840+ $comment = $line ;
 841+ $comment =~ s/^.*?\{// ;
 842+ $comment =~ s/\}.*$// ;
 843+ $line = " <span class=comment>$comment</span\n" ;
 844+ }
 845+
 846+ if ($line =~ /<!==\s*H1\s*\{[^\}]*\}\s*==>/)
 847+ {
 848+ $title = $line ;
 849+ $title =~ s/^.*?\{// ;
 850+ $title =~ s/\}.*$// ;
 851+ $line = " <tr>\n" .
 852+ " <td class=h1 colspan=99><span class=h9>$title</span></td>\n" .
 853+ " </tr>\n" .
 854+ " <tr>\n" .
 855+ " <td><small><small>&nbsp;</small></small></td>\n" .
 856+ " </tr>\n" ;
 857+ }
 858+
 859+ if ($line =~ /<!==\s*A\[[^\]]*\] H2\s*\{[^\}]*\}\s*==>/)
 860+ {
 861+ ($anchor = $line) ;
 862+ $anchor =~ s/^.*?A\[// ;
 863+ $anchor =~ s/\].*$// ;
 864+ $anchor =~ s/\s/_/g ;
 865+ $anchor = lc($anchor) ;
 866+
 867+ $parms = $line ;
 868+ $parms =~ s/^.*?\{// ;
 869+ $parms =~ s/\}.*$// ;
 870+ ($metric,$title) = split ('\|', $parms,2) ;
 871+ ($title2 = $title) =~ s/ /_/g ;
 872+ $line = " <tr>\n" .
 873+ " <td class=score><a id='$anchor' name='$anchor'></a><span class=bg>$metric</sup></span></td>\n" .
 874+ " <td class=h2><span class=h2>$title</span><br></td>\n" .
 875+ "</tr>\n" ;
 876+ }
 877+
 878+ if ($line =~ /<!==\s*TABS\s*\{[^\}]*\}\s*==>/)
 879+ {
 880+ $parms = $line ;
 881+ $parms =~ s/^.*?\{// ;
 882+ $parms =~ s/\}.*$// ;
 883+ ($id,@texts) = split ('\|', $parms) ;
 884+ $line = " <div id=\"container-" . ($id/10) . "\">\n" ;
 885+ $line .= " <ul>\n" ;
 886+ foreach $text (@texts)
 887+ {
 888+ $id++ ;
 889+ $line .= " <li><a href=\"#fragment-$id\"><span>$text</span></a></li>\n" ;
 890+ }
 891+ $line .= " </ul>\n" ;
 892+ $id_hi = $id ;
 893+ }
 894+
 895+ if ($line =~ /<!==\s*TAB\s*\{[^\}]*\}\s*==>/)
 896+ {
 897+ $parms = $line ;
 898+ $parms =~ s/^.*?\{// ;
 899+ $parms =~ s/\}.*$// ;
 900+ ($id,$text) = split ('\|', $parms) ;
 901+
 902+ if ($text =~ /^START/i)
 903+ {
 904+ $line = "\n <div id=\"fragment-$id\">\n" ;
 905+ }
 906+ elsif ($text =~ /^END/i)
 907+ {
 908+ if ($id == $id_hi)
 909+ { $line = " </div>" ; }
 910+ }
 911+ else
 912+ {
 913+ $line = "\n <div id=\"fragment-$id\">\n $text\n </div>\n" ;
 914+ if ($id == $id_hi)
 915+ { $line .= " </div>" ; }
 916+ }
 917+ }
 918+
 919+ if ($line =~ /<!==\s*TRENDS\s*\{[^\}]*\}\{[^\}]*\}\s*==>/)
 920+ {
 921+ $parms = $line ;
 922+ $parms =~ s/^[^\{]*\{// ;
 923+ $parms =~ s/\}[^\}]*$// ;
 924+ ($trendY,$trendM) = split ('\}\s*\{', $parms,2) ;
 925+
 926+ # ($colorY,$month1Y,$month2Y,$trendY) = split ('\|',$trendY) ;
 927+ # ($colorM,$month1M,$month2M,$trendM) = split ('\|',$trendM) ;
 928+ ($month1Y,$month2Y,$trendY) = split ('\|',$trendY) ;
 929+ ($month1M,$month2M,$trendM) = split ('\|',$trendM) ;
 930+ if ($trendY >= 0)
 931+ { $colorY = "A" ; $trendY = "+$trendY" }
 932+ else
 933+ { $colorY = "E" ; }
 934+ if ($trendM >= 0)
 935+ { $colorM = "A" ; $trendM = "+$trendM" }
 936+ else
 937+ { $colorM = "E" ; }
 938+
 939+
 940+ #<!== TRENDS {A|5/8|5/9|+12%}{A|4/9|5/9|+8%} ==>
 941+ $line = " <td class=date>\n" .
 942+ " <table border=0>\n" .
 943+ " <tr>\n" .
 944+ " <td class=date$colorY><b>Y</b>&nbsp;$month1Y&rArr;$month2Y</td>\n" .
 945+ " <td class=date$colorY>$trendY</td>\n" .
 946+ " </tr>\n" .
 947+ " <tr>\n" .
 948+ " <td class=date$colorM><b>M</b>&nbsp;$month1M&rArr;$month2M</td>\n" .
 949+ " <td class=date$colorM>$trendM</td>\n" .
 950+ " </tr>\n" .
 951+ " </table>\n" .
 952+ " </td>\n" ;
 953+ }
 954+
 955+ if ($line =~ /<!==\s*OUT\s*PUBLIC\s*==>/)
 956+ {
 957+ $write_public = $true ;
 958+ $write_private = $false ;
 959+ }
 960+ elsif ($line =~ /<!==\s*OUT\s*EXTENDED\s*==>/)
 961+ {
 962+ $write_public = $false ;
 963+ $write_private = $true ;
 964+ }
 965+ elsif ($line =~ /<!==\s*OUT\s*ALWAYS\s*==>/)
 966+ {
 967+ $write_public = $true ;
 968+ $write_private = $true ;
 969+ }
 970+ elsif ($line =~ /<!==\s*OUT .*\s*==>/)
 971+ {
 972+ $line2 = $line ;
 973+ $line2 =~ s/^.*<!==\s*OUT\s*// ;
 974+ $line2 =~ s/\s*==>.*$// ;
 975+ $write_details = $false ;
 976+ $write_summary = $false ;
 977+ $write_columns = $false ;
 978+ if ($line2 =~ /C/)
 979+ { $write_columns = $true ; }
 980+ if ($line2 =~ /D/)
 981+ { $write_details = $true ; }
 982+ if ($line2 =~ /S/)
 983+ { $write_summary = $true ; }
 984+
 985+ &Print (COLUMNS, "$line\n") ;
 986+ &Print (DETAILS, "$line\n") ;
 987+ &Print (SUMMARY, "$line\n") ;
 988+ next ;
 989+ }
 990+
 991+ if ($line =~ /<!==\s*INC .*\s*==>/)
 992+ {
 993+ $line2 = $line ;
 994+ $line2 =~ s/^.*<!==\s*INC\s*// ;
 995+ $line2 =~ s/\s*==>.*$// ;
 996+
 997+ $file = "$path_in/$line2" ;
 998+ print "\nInclude $file\n" ;
 999+ if (! -e $file)
 1000+ { &Abort ("File $file not found\n") ; }
 1001+ open FILE, '<', $file ;
 1002+ foreach $line (<FILE>)
 1003+ {
 1004+ if ($write_columns)
 1005+ { &Print (COLUMNS, $line) ; }
 1006+ if ($write_details)
 1007+ { &Print (DETAILS, $line) ; }
 1008+ if ($write_summary)
 1009+ { &Print (SUMMARY, $line) ; }
 1010+ }
 1011+ next ;
 1012+ }
 1013+
 1014+ if ($write_columns)
 1015+ { &Print (COLUMNS, "$line\n") ; }
 1016+ elsif ($line =~ /-->/)
 1017+ { &Print (COLUMNS, "<!-- $line\n") ; }
 1018+ else
 1019+ { &Print (COLUMNS, "<!-- $line -->\n") ; }
 1020+
 1021+ if ($write_details)
 1022+ { &Print (DETAILS, "$line\n") ; }
 1023+ elsif ($line =~ /-->/)
 1024+ { &Print (DETAILS, "<!-- $line\n") ; }
 1025+ else
 1026+ { &Print (DETAILS, "<!-- $line -->\n") ; }
 1027+
 1028+ if ($write_summary)
 1029+ { &Print (SUMMARY, "$line\n") ; }
 1030+ elsif ($line =~ /-->/)
 1031+ { &Print (SUMMARY, "<!-- $line\n") ; }
 1032+ else
 1033+ { &Print (SUMMARY, "<!-- $line -->\n") ; }
 1034+ }
 1035+}
 1036+
 1037+sub Anchor
 1038+{
 1039+ my $anchor = shift ;
 1040+ $anchor =~ s/^\s*// ;
 1041+ $anchor =~ s/\s*$// ;
 1042+ $anchor =~ s/\s/_/g ;
 1043+ return (lc ($anchor)) ;
 1044+}
 1045+
 1046+sub WriteSynopsis
 1047+{
 1048+ my $path_out = shift ;
 1049+
 1050+ $notice_synopsis = "" ;
 1051+ # "<font color=#008000><b>New: multi-year trends for most metrics. Depending on history available reporting period can vary.</b></font>" ;
 1052+
 1053+ open SYNOPSIS, '>', "$path_out/RC_${p_year}_${p_month_d2}_synopsis.txt" ;
 1054+ print SYNOPSIS $synopsis ;
 1055+ close SYNOPSIS ;
 1056+
 1057+# some day also get this code from RT_yyyy_mm.html, for uniformity
 1058+$synopsis = <<__SYNOPSIS__ ;
 1059+<html lang="en">
 1060+<head>
 1061+<title>Wikimedia Report Card Synopsis - {{month}} {{yyyy}}</title>
 1062+<meta http-equiv="content-type" content="text/html"; charset="iso-8859-1">
 1063+<meta http-equiv="Window-target" content="_top">
 1064+<meta name="language" content="en,English">
 1065+<meta name="robots" content="index,follow">
 1066+<link rel="shortcut icon" href="http://wikimediafoundation.org/favicon.ico" />
 1067+<link rel="apple-touch-icon" href="http://wikimediafoundation.org/favicon.ico" />
 1068+<script src="assets/jquery-1.1.3.1.pack.js" type="text/javascript"></script>
 1069+<script src="assets/jquery.history_remote.pack.js" type="text/javascript"></script>
 1070+<script src="assets/jquery.tabs.pack.js" type="text/javascript"></script>
 1071+<script src="assets/jquery.tablesorter.js" type="text/javascript"></script>
 1072+
 1073+<script type="text/javascript">
 1074+\$(function()
 1075+{
 1076+ \$("#Synopsis").tablesorter();
 1077+})
 1078+</script>
 1079+
 1080+<script type="text/javascript">
 1081+\$(document).ready(
 1082+function()
 1083+{
 1084+\$("#Synopsis").tablesorter(sortList: [[0,0]] );
 1085+}
 1086+);
 1087+</script>
 1088+
 1089+<script type="text/javascript">
 1090+\$.tablesorter.addParser({
 1091+ id: "nohtml",
 1092+ is: function(s) { return false; },
 1093+ format: function(s) { return s.replace(/<.*?>/g,"").replace(/&nbsp;/g,""); },
 1094+ type: "text"
 1095+});
 1096+\$.tablesorter.addParser({
 1097+ id: "digitsonly",
 1098+ is: function(s) { return false; },
 1099+ format: function(s) { return $.tablesorter.formatFloat(s.replace(/<.*?>/g,"").replace(/&nbsp;/g,"").replace(/,/g,"").replace(/-/,"-1")); },
 1100+ type: "numeric"
 1101+});
 1102+</script>
 1103+
 1104+<style type="text/css">
 1105+/* tables */
 1106+table.tablesorter
 1107+{
 1108+ font-family:arial;
 1109+ background-color: #FFF; // #CDCDCD;
 1110+ margin:10px 0pt 15px;
 1111+ font-size: 7pt;
 1112+ width: 80%;
 1113+ text-align: left;
 1114+}
 1115+table.tablesorter thead tr th, table.tablesorter tfoot tr th
 1116+{
 1117+ background-color: #AAB;
 1118+ border: 1px solid #FFF;
 1119+ font-size: 8pt;
 1120+ padding: 4px;
 1121+}
 1122+table.tablesorter thead tr .header
 1123+{
 1124+ background-image: url(assets/bg.gif);
 1125+ background-repeat: no-repeat;
 1126+ background-position: center right;
 1127+ cursor: pointer;
 1128+}
 1129+table.tablesorter tbody td
 1130+{
 1131+ color: #3D3D3D;
 1132+ padding: 4px;
 1133+ background-color: #FFF;
 1134+ vertical-align: top;
 1135+}
 1136+table.tablesorter tbody tr.odd td
 1137+{ background-color:#F0F0F6; }
 1138+table.tablesorter thead tr .headerSortUp
 1139+{ background-image: url(assets/asc.gif); }
 1140+table.tablesorter thead tr .headerSortDown
 1141+{ background-image: url(assets/desc.gif); }
 1142+table.tablesorter thead tr .headerSortDown, table.tablesorter thead tr .headerSortUp
 1143+{ background-color: #BBF; //#8dbdd8; }
 1144+<!--
 1145+body {font-family:arial,sans-serif;background-color:#B0B0B0}
 1146+table,td,tr{background-color:#FFFFFF;font-size:11pt}
 1147+h1{font-size:22px}
 1148+h2{font-size:18px ; color:#006000 ; margin-top:40px}
 1149+h3{font-size:15px ; color:#006000}
 1150+form{margin:0}
 1151+a:link {color:#000080;text-decoration:none}
 1152+a:visited {color:#000080;text-decoration:none}
 1153+a:active {color:#000080;text-decoration:none}
 1154+a:hover {color:#0000FF;text-decoration:underline}
 1155+a img {border-color:black}
 1156+td.detail-left {font-size:12px ; color:#000000 ; text-align:left ; }
 1157+td.detail-center {font-size:12px ; color:#000000 ; text-align:center ; }
 1158+td.detail-right {font-size:12px ; color:#000000 ; text-align:right ; }
 1159+-->
 1160+</style>
 1161+</head>
 1162+<body>
 1163+<table width=800 cellpadding=18 align=center>
 1164+<tr>
 1165+ <td align='center'>
 1166+
 1167+ <table width=95%>
 1168+
 1169+ <tr>
 1170+ <td width=100% colspan=99>
 1171+ <table width=100%>
 1172+ <tr>
 1173+ <td align=left width=150 valign=top><img src='assets/WikimediaLogo.png' width=30></td>
 1174+ <td align=center valign=top><h1>Wikimedia Report Card <font color=#008000>{{month}} {{yyyy}} </font></h1>
 1175+ </td>
 1176+ <td align=right width=150 valign=top><h1>Synopsis</h1></td>
 1177+ <!-- <td align=right width=150 valign=top><small><small>Published<br>{{month+2}}<br>{{yyyy+m2}}</small></small></td> -->
 1178+ </tr>
 1179+ <tr>
 1180+ <td align=left width=150 valign=top><!-- <small><a href='RC_{{yyyy}}_{{mm-1}}_synopsis.html'>&lArr;&nbsp;{{month-1}}</a></small>--> </td>
 1181+ <td align=center valign=top>
 1182+ <small>&rArr; <a href='RC_{{yyyy}}_{{mm}}_detailed.html'>Detailed version</a>&nbsp;&nbsp;&nbsp;&nbsp; &rArr; <a href='RC_{{yyyy}}_{{mm}}_summary.html'>Summary, 1 column</a>&nbsp;&nbsp;&nbsp;&nbsp; &rArr; <a href='RC_{{yyyy}}_{{mm}}_columns.html'>Summary, 2 columns</a></small>
 1183+ </td>
 1184+ <td align=right width=150 valign=top><!--<small><a href='RC_{{yyyy}}_{{mm+1}}_synopsis.html'>{{month+1}}&nbsp;&rArr;</a></small>--></td>
 1185+ </tr>
 1186+ </table>
 1187+ </td>
 1188+ </tr>
 1189+ <tr>
 1190+ <td colspan=99>
 1191+ <small>
 1192+ <center>
 1193+ $notice_synopsis
 1194+ </center> <!-- General comment -->
 1195+ </small>
 1196+ </td>
 1197+</tr>
 1198+<tr><td colspan=99 align=center>
 1199+<table border=1 id='Synopsis' class=tablesorter>
 1200+<!-- <tr> -->
 1201+<!-- <td align='left' colspan=99> -->
 1202+<!-- <font color=#800000><b><small>No English Wikipedia dump was produced this month.<br>Without it some totals and trends are also meaningless and left blank.</small></b></font> -->
 1203+<!-- </td> -->
 1204+<!-- </tr> -->
 1205+DATA
 1206+</table>
 1207+</td></tr>
 1208+ <tr>
 1209+ <td colspan=99 align=center>
 1210+ <hr class=thin>
 1211+ <small><small><font color=808080>Author Erik Zachte - mail: ezachte@###.org (nospam: ###=wikimedia)</font></small></small>
 1212+ </td>
 1213+ </tr>
 1214+ </table>
 1215+<script type='text/javascript'>
 1216+\$('#Synopsis').tablesorter({
 1217+ // debug:true,
 1218+ headers:{0:{sorter:'nohtml'},1:{sorter:false},2:{sorter:'digitsonly'},3:{sorter:'digitsonly'},4:{sorter:false}}
 1219+});
 1220+</script>
 1221+
 1222+</body>
 1223+</html>
 1224+__SYNOPSIS__
 1225+
 1226+ undef @synopsis_notes ;
 1227+
 1228+# $data = "<tr><th>Unique Visitors</th></tr>\n" ;
 1229+# $synopsis = "Y: " . substr ($p_month_name,0,3) . ",$p_year_prev->$p_year k=thousand m=million b=billion\n" ;
 1230+# $synopsis .= "M: $p_year," . substr ($p_month_name_prev,0,3) . "->" . substr ($p_month_name,0,3) . " M=monthly D=daily T=Total\n\n" ;
 1231+ $data = "<thead><tr><th class=detail-left valign=top>&nbsp;<b>Metric</b>&nbsp;</th>" .
 1232+ "<th class=detail-center valign=top>&nbsp;<b>Now</b>&nbsp;<br>{{mm}}/{{yy}}</th>" .
 1233+ "<th class=detail-center valign=top>&nbsp;<b>Yearly change</b>&nbsp;<br>{{mm}}/{{yy-1}} &rArr; {{mm}}/{{yy}}</th>" .
 1234+ "<th class=detail-center valign=top>&nbsp;<b>Monthly change</b>&nbsp;<br>{{(mm/yy)-1}} &rArr; {{mm}}/{{yy}}</th>" .
 1235+ "<th class=detail-center valign=top>&nbsp;<b>Notes</b>&nbsp;</th></tr></thead>\n<tbody>\n" ;
 1236+# $data .= "<tr><th>&nbsp;</th><th>&nbsp;</th><th>&nbsp;</th><th>&nbsp;</th><th>&nbsp;</th></tr></thead>" ;
 1237+
 1238+# $comment_prev_month = "<sup><font color=#800000>*</font></sup>" ; # qqq
 1239+
 1240+ $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#unique_visitors'>Unique Visitors</a> <sup>All</sup>", "", @visitors) ;
 1241+ $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#page_requests'>Page Requests</a> <sup>All</sup>", "", @page_requests) ;
 1242+ $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#web_properties'>Site Rank</a> <sup>All</sup>", "", @rank) ;
 1243+ $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#commons_files'>Binary Files</a> <sup>Commons</sup> $comment_prev_month", "", @commons_files) ;
 1244+ $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#article_count'>Article Count</a> <sup>Wp</sup> $comment_prev_month", "", @article_count) ;
 1245+ $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#articles_per_day'>New Articles Per Day</a> <sup>Wp</sup> $comment_prev_month", "", @new_articles) ;
 1246+ $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#edits_per_month'>Edits</a> <sup>Wp</sup> $comment_prev_month", "", @edits) ;
 1247+ $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#new_editors_per_month'>New Editors <sup>Wp</sup></a> $comment_prev_month", "", @new_editors) ;
 1248+ $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#active_editors'>Active Editors</a> <sup>Wp</sup> $comment_prev_month", "", @active_editors) ;
 1249+ $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#very_active_editors'>Very Active Editors</a> <sup>Wp</sup> $comment_prev_month", "", @very_active_editors) ;
 1250+ $data .= "</tbody>\n<tfoot><tr><td colspan=99>&nbsp;</td></tr>\n" ;
 1251+ $data .= "<tr><td colspan=99><b><small>Repeated observations below are grayed</small></b></td></tr>\n" ;
 1252+
 1253+ foreach $note (@synopsis_notes)
 1254+ {
 1255+ $data .= "<tr><td class=detail-left colspan=99>$note</td></tr>" ;
 1256+ }
 1257+# $data .= "<tr><td class=detail-left colspan=99><font color=#800000><small>* For German and Polish Wikipedias data for June were not yet available: reused counts from May</small></font></td></tr>" ;
 1258+ $data .= "<tr><td class=detail-left colspan=99><font color=#808080><small>All = All projects, Wp = Wikipedia project&nbsp;&nbsp;&nbsp;/&nbsp;&nbsp;&nbsp;B = billion, M = million, k = thousand</small></font></td></tr></tfoot>" ;
 1259+
 1260+ $synopsis =~ s/DATA/$data/ ;
 1261+
 1262+ $synopsis =~ s/{{yyyy}}/$p_year/g ;
 1263+ $synopsis =~ s/{{yyyy-1}}/$p_year_prev/g ;
 1264+ $synopsis =~ s/{{yyyy\+m2}}/$p_year_plus_m2/g ;
 1265+ $synopsis =~ s/{{month}}/$p_month_name/g ;
 1266+ $synopsis =~ s/{{month-1}}/$p_month_name_prev/g ;
 1267+ $synopsis =~ s/{{month\+1}}/$p_month_name_next/g ;
 1268+ $synopsis =~ s/{{month\+2}}/$p_month_name_next2/g ;
 1269+
 1270+ $synopsis =~ s/{{y}}/$p_year_short/g ;
 1271+ $synopsis =~ s/{{y\-1}}/$p_year_prev_short/g ;
 1272+ $synopsis =~ s/{{yy}}/$p_year_short_d2/g ;
 1273+ $synopsis =~ s/{{yy\-1}}/$p_year_prev_short_d2/g ;
 1274+ $synopsis =~ s/{{m}}/$p_month/g ;
 1275+ $synopsis =~ s/{{m\-1}}/$p_month_prev/g ;
 1276+ $synopsis =~ s/{{mm}}/$p_month_d2/g ;
 1277+ $synopsis =~ s/{{mm-1}}/$p_month_prev_d2/g ;
 1278+ $synopsis =~ s/{{mm\+1}}/$p_month_next_d2/g ;
 1279+
 1280+ $synopsis =~ s/{{\(mm\/yy\)-1}}/$p_year_month_m1/g ;
 1281+ open SYNOPSIS, '>', "$path_out/RC_${p_year}_${p_month_d2}_synopsis.html" ;
 1282+ print SYNOPSIS $synopsis ;
 1283+ close SYNOPSIS ;
 1284+}
 1285+
 1286+sub Print
 1287+{
 1288+ $handle = shift ;
 1289+ $text = shift ;
 1290+
 1291+ if ((! $debug) && ($text !~ /\[if lte/)) # Q&D: keep MSIE directive
 1292+ {
 1293+ if ($text =~ /<!--/) # comments
 1294+ { return ; }
 1295+ if ($text =~ /<!==/) # template markup
 1296+ { return ; }
 1297+ }
 1298+
 1299+ if (($target_audience == $public) && $write_public)
 1300+ { print $handle $text ; }
 1301+ if (($target_audience == $private) && $write_private)
 1302+ { print $handle $text ; }
 1303+}
 1304+
 1305+sub FormatSynopsisText
 1306+{
 1307+ $label = shift ;
 1308+ $comment = shift ;
 1309+ @metrics = @_ ;
 1310+
 1311+ $metric = $metrics [0] ;
 1312+ $size = $metrics [1] ;
 1313+ $inc_y = $metrics [2] ; # yearly
 1314+ $inc_m = $metrics [3] ; # monthly
 1315+ $inc = $metrics [4] ; # perc ?
 1316+
 1317+ $metric =~ s/,//g ;
 1318+ if ($inc eq "th") # rank
 1319+ {
 1320+ $inc_y .= " " ;
 1321+ $inc_m .= " " ;
 1322+ $inc = " " ;
 1323+ }
 1324+ $size=~ s/[x]/ / ;
 1325+
 1326+
 1327+ if ($inc_y !~ /-/) { $inc_y = '+' . $inc_y ; }
 1328+ if ($inc_m !~ /-/) { $inc_m = '+' . $inc_m ; }
 1329+ $inc_y = sprintf ("%5s", $inc_y) . $inc ;
 1330+ $inc_m = sprintf ("%5s", $inc_m) . $inc ;
 1331+
 1332+ if ($metric =~ /^\.+$/)
 1333+ { ; }
 1334+ elsif ($size eq "b")
 1335+ { $metric = sprintf ("%.0f", $metric / 1000000000) ; }
 1336+ elsif ($size eq "m")
 1337+ { $metric = sprintf ("%.0f", $metric / 1000000) ; }
 1338+ elsif ($size eq "k")
 1339+ { $metric = sprintf ("%.0f", $metric / 1000) ; }
 1340+ else
 1341+ { $metric = sprintf ("%.0f", $metric) ; }
 1342+
 1343+ my $text = sprintf ("%-20s", $label) . sprintf ("%8s", "$metric $size") ;
 1344+ $text .= " (Y:$inc_y / M:$inc_m) $comment\n" ;
 1345+ return $text ;
 1346+}
 1347+
 1348+sub FormatSynopsisTable
 1349+{
 1350+ $label = shift ;
 1351+ $comment = shift ;
 1352+
 1353+ @metrics = @_ ;
 1354+
 1355+ $metric = $metrics [0] ;
 1356+ $size = $metrics [1] ;
 1357+ $inc_y = $metrics [2] ; # yearly
 1358+ $inc_m = $metrics [3] ; # monthly
 1359+ $inc = $metrics [4] ; # perc ?
 1360+ $notes = $metrics [5] ; # perc ?
 1361+
 1362+ ($notes_ref,$notes) = split ('\|', $notes) ;
 1363+ if ($notes ne "")
 1364+ {
 1365+ # text between '#' and first bracket (<>) will be grayed (repeated remarks)
 1366+ $notes =~ s/#([^<>]+)/<font color=#808080>$1<\/font>/g ;
 1367+ push @synopsis_notes, $notes ;
 1368+ }
 1369+
 1370+ $metric =~ s/,//g ;
 1371+ if ($inc eq "th") # rank
 1372+ {
 1373+ $inc_y .= " " ;
 1374+ $inc_m .= " " ;
 1375+ $inc = " " ;
 1376+ }
 1377+ $size=~ s/[x]/ / ;
 1378+
 1379+
 1380+ if ($inc_y !~ /-/) { $inc_y = '+' . $inc_y ; }
 1381+ if ($inc_m !~ /-/) { $inc_m = '+' . $inc_m ; }
 1382+ $inc_y = sprintf ("%5s", $inc_y) . $inc ;
 1383+ $inc_m = sprintf ("%5s", $inc_m) . $inc ;
 1384+
 1385+ if ($size eq "k")
 1386+ { $metric = sprintf ("%.1f", $metric / 1000) ; }
 1387+ elsif ($size eq "b")
 1388+ { $size = "B" ; }
 1389+ elsif ($size eq "m")
 1390+ { $size = "M" ; }
 1391+ elsif ($size eq "k")
 1392+ { $size = "K" ; }
 1393+ else
 1394+ { $size = "&nbsp;&nbsp;" ; }
 1395+
 1396+ if ($notes_ref eq "")
 1397+ { $notes_ref = '&nbsp;' ; }
 1398+
 1399+ $metric = "$metric $size" ;
 1400+
 1401+ if (($metric =~ /\.\./) || ($metric =~ /^0\.0/)) { $metric = "<font color=#C0C0C0>$metric</font>" ; }
 1402+ if (($metric =~ /\.\./) || ($metric =~ /^0\.0/)) { $metric = "<font color=#C0C0C0>$metric</font>" ; }
 1403+ if (($inc_y =~ /\.\./) || ($inc_y =~ /^0\.0/)) { $inc_y = "<font color=#C0C0C0>$inc_y</font>" ; }
 1404+ if (($inc_m =~ /\.\./) || ($inc_m =~ /^0\.0/)) { $inc_m = "<font color=#C0C0C0>$inc_m</font>" ; }
 1405+
 1406+ my $text = "<tr><td class=detail-left>$label</td><td class=detail-right>$metric</td><td class=detail-right>$inc_y</td><td class=detail-right>$inc_m</td><td class=detail-right>$notes_ref</td></tr>\n" ;
 1407+ return $text ;
 1408+}
 1409+
 1410+sub Abort
 1411+{
 1412+ $msg = shift ;
 1413+ chomp $msg ;
 1414+ print "\n!!! Abort script: '$msg'\n" ;
 1415+ exit ;
 1416+}
 1417+
Property changes on: trunk/wikistats/reportcard/ReportCardGenerateHtml.pl
___________________________________________________________________
Added: svn:eol-style
14171418 + native
Index: trunk/wikistats/reportcard/ReportCardExtractWikiCountsOutputYearly.pl
@@ -1,1240 +1,1240 @@
2 -#!/usr/local/bin/perl
3 -# -i "w:/# Out Bayes" -o "w:/@ Report Card/Data"
4 -
5 - use lib "/home/ezachte/lib" ;
6 - use EzLib ;
7 - $trace_on_exit = $true ;
8 - ez_lib_version (2) ;
9 -
10 - $month_0 = "08" ; # collect 13 months up to
11 - $year_0 = 2010 ;
12 -
13 -# set defaults mainly for tests on local machine
14 - default_argv "-i 'W:/# Out Bayes'|-o 'W:/@ Report Card/Data'" ;
15 -
16 - use Getopt::Std ;
17 -
18 -# $file_regions_UV = "Multi-Country Media Trend, UVs by region (July 2008 - September 2009)_27290.csv" ;
19 -# $file_regions_Reach = "Multi-Country Media Trend, % reach by region (July 2008 - September 2009)_10786.csv" ;
20 -
21 - $maxpopularwikis = 25 ;
22 - @projects = ('wb','wk','wn','wp','wq','ws','wv','commons') ;
23 - @projects2 = ('wb','wk','wn','wp','wq','ws','wv','wx','tot') ;
24 - @projects2b = ('Wikibooks','Wiktionary','Wikinews','Wikipedia','Wikiquote','Wikisource','Wikiversity','Other','Total') ;
25 -
26 - &LogArguments ;
27 - &ParseArguments ;
28 - &InitProjectNames ;
29 - &InitReportNames ;
30 - &ReadStatisticsMonthly ;
31 - &WriteYearlyData ;
32 -# &WriteMonthlyData ;
33 - exit ;
34 -
35 -sub LogArguments
36 -{
37 - my $arguments ;
38 - getopt ("iolpft", \%options) ;
39 - foreach $arg (sort keys %options)
40 - { $arguments .= " -$arg " . $options {$arg} . "\n" ; }
41 - print ("\nArguments\n$arguments\n") ;
42 -# &Log ("\nArguments\n$arguments\n") ;
43 -}
44 -
45 -sub ParseArguments
46 -{
47 -# my @options ;
48 -# getopt ("io", \%options) ;
49 -
50 -# die ("Specify input folder for projectcounts files as: -i path") if (! defined ($options {"i"})) ;
51 -# die ("Specify output folder as: -o path'") if (! defined ($options {"o"})) ;
52 -
53 -# $path_in = $options {"i"} ;
54 -# $path_out = $options {"o"} ;
55 -
56 -# die "Input folder '$path_in' does not exist" if (! -d $path_in) ;
57 -# die "Output folder '$path_out' does not exist" if (! -d $path_out) ;
58 -
59 - $path_in = "w:/# out bayes" ;
60 - $path_out = "w:/@ report card/data" ;
61 -
62 - print "Input folder: $path_in\n" ;
63 - print "Output folder: $path_out\n" ;
64 - print "\n" ;
65 -
66 - $file_csv_out = "$path_out/StatisticsMonthly_${year_0}_$month_0.csv" ;
67 - $file_csv_out_year = "$path_out/StatisticsYearly.csv" ;
68 -
69 - &SetComparisonPeriods ($year_0,$month_0) ;
70 -}
71 -
72 -sub ReadStatisticsMonthly
73 -{
74 - &ReadStatisticsMonthlyForProject ("wb") ;
75 - &ReadStatisticsMonthlyForProject ("wk") ;
76 - &ReadStatisticsMonthlyForProject ("wn") ;
77 - &ReadStatisticsMonthlyForProject ("wp") ;
78 - &ReadStatisticsMonthlyForProject ("wq") ;
79 - &ReadStatisticsMonthlyForProject ("ws") ;
80 - &ReadStatisticsMonthlyForProject ("wv") ;
81 - &ReadStatisticsMonthlyForProject ("wx") ;
82 -
83 - &ReadStatisticsPerBinariesExtensionCommons ;
84 -}
85 -
86 -sub ReadStatisticsMonthlyForProject
87 -{
88 - my $project = shift;
89 -
90 - my $file_csv_in_1 = "$path_in/csv_$project/StatisticsMonthly.csv" ;
91 - my $file_csv_in_2 = "$path_in/csv_$project/StatisticsUserActivitySpread.csv" ;
92 -
93 - if (! -e $file_csv_in_1)
94 - { &Abort ("Input file '$file_csv_in_1' not found") ; }
95 - if (! -e $file_csv_in_2)
96 - { &Abort ("Input file '$file_csv_in_2' not found") ; }
97 -
98 - print "Read '$file_csv_in_1'\n" ;
99 - open CSV_IN, '<', $file_csv_in_1 ;
100 -
101 - undef %lines ;
102 - while ($line = <CSV_IN>)
103 - {
104 - ($language,$date,$counts) = split (',', $line, 3) ;
105 -
106 - next if $language eq 'commons' and $project ne 'wx' ;
107 - next if $language eq 'sr' and $project eq 'wn' ; # ignore insane bot spam on
108 -
109 - ($month,$day,$year) = split ('\/', $date) ;
110 -
111 -if ($month == 6)
112 -{
113 - @fields = split (',', $counts) ;
114 - $articles = $fields [4] ;
115 - $june_articles {"$project,$year"} += $articles ;
116 - $june_articles {"tot,$year"} += $articles ;
117 - $years {$year} ++ ;
118 -# print "$project $year $month : $articles\n" ;
119 -}
120 -
121 - my $m = &MonthsSinceYearAgo ($year, $month) ;
122 - if (! ($m < 0) || ($m > 12))
123 - {
124 - $lines {$language}{$m} = $line ;
125 - $languages {$language}++ ;
126 - }
127 - }
128 -
129 - foreach $language (sort keys %languages)
130 - {
131 - for ($m=1 ; $m <= 12 ; $m++)
132 - {
133 - if ($lines {$language}{$m} eq '')
134 - { $lines {$language}{$m} = $lines {$language}{$m -1} ; }
135 - }
136 -
137 - for ($m=0 ; $m <= 12 ; $m++)
138 - {
139 - $line = $lines {$language}{$m} ;
140 - chomp $line ;
141 - ($language,$date,$counts) = split (',', $line, 3) ;
142 - @fields = split (',', $counts) ;
143 -
144 - if ($project eq "wp")
145 - {
146 - foreach $f (1,4,6,11) # new editors, articles, new articles, edits
147 - {
148 - $values {"$f,$m"} {"$project,$language"} = $fields [$f] ;
149 - $totals {"$f,$m"} += $fields [$f] ;
150 - $totals_project {"$f,$m"} {$project} += $fields [$f] ;
151 - # print "TOTALS $f $m = . " . $totals {"$f,$m"} . "\n" ;
152 - }
153 - }
154 - else
155 - {
156 - foreach $f (1,4)
157 - {
158 - $values {"$f,$m"} {"$project,$language"} = $fields [$f] ;
159 - $totals {"$f,$m"} += $fields [$f] ;
160 - $totals_project {"$f,$m"} {$project} += $fields [$f] ;
161 -
162 - if ($language eq 'commons')
163 - { $totals_project {"$f,$m"} {'commons'} += $fields [$f] ; }
164 -
165 - # print "TOTALS $f $m = . " . $totals {"$f,$m"} . "\n" ;
166 - }
167 - foreach $f (6,11)
168 - {
169 - $totals_project {"$f,$m"} {$project} += $fields [$f] ;
170 - if ($language eq 'commons')
171 - { $totals_project {"$f,$m"} {'commons'} += $fields [$f] ; }
172 - # print "TOTALS $f $m = . " . $totals {"$f,$m"} . "\n" ;
173 - }
174 -
175 - }
176 - }
177 - }
178 - close CSV_IN ;
179 -
180 - # now read (very) active editors from newer more accurate file (split data for reg users and bots, unlike StatisticsMonthly.csv)
181 - # but use f = column count in StatisticsMonthly.csv
182 -
183 - print "Read '$file_csv_in_2'\n" ;
184 - open CSV_IN, '<', $file_csv_in_2 ;
185 -
186 - undef %lines ;
187 - while ($line = <CSV_IN>)
188 - {
189 - chomp $line ;
190 - ($language,$date,$reguser_bot,$group,$counts) = split (',', $line, 5) ;
191 -
192 - next if $language eq 'commons' and $project ne 'wx' ; # commons also in wikipedia csv files (bug, hard to cleanup, just skip)
193 - # next if $language eq 'commons' ; # ignore editor count on commons alltogether, most are already counted for other project
194 - # (even for several projects, to be tuned after centralauth dump is available)
195 -
196 - if ($reguser_bot ne "R") { next ; } # R: reg user, B: bot
197 - if ($group ne "A") { next ; } # A: articles, T: talk pages, O: other namespaces
198 -
199 - ($month,$day,$year) = split ('\/', $date) ;
200 - my $m = &MonthsSinceYearAgo ($year, $month) ;
201 - if (($m < 0) || ($m > 12))
202 - { next ; }
203 -
204 - $lines {$language}{$m} = $line ;
205 - $languages {$language}++ ;
206 - }
207 -
208 - foreach $language (sort keys %languages)
209 - {
210 - for ($m=1 ; $m <= 12 ; $m++)
211 - {
212 - if ($lines {$language}{$m} eq '')
213 - { $lines {$language}{$m} = $lines {$language}{$m -1} ; }
214 - }
215 -
216 - for ($m=0 ; $m <= 12 ; $m++)
217 - {
218 - $line = $lines {$language}{$m} ;
219 - chomp $line ;
220 - ($language,$date,$reguser_bot,$group,$counts) = split (',', $line, 5) ;
221 - @fields = split (',', $counts) ;
222 -
223 - foreach $f (2,3) # editors_gt_5, editors_gt_100
224 - {
225 - # count user with over x edits
226 - # threshold starting with a 3 are 10xSQRT(10), 100xSQRT(10), 1000xSQRT(10), etc
227 - # thresholds = 1,3,5,10,25,32,50,100,etc
228 - if ($f == 2) { $f2 = 2 ; }
229 - if ($f == 3) { $f2 = 7 ; }
230 -
231 - $values {"$f,$m"} {"$project,$language"} = $fields [$f2] ;
232 -
233 - if ($language ne 'commons') # ignore editor count on commons for totals, most editors are already counted for other project
234 - { $totals {"$f,$m"} += $fields [$f2] ; } # (even for several projects, to be tuned after centralauth dump is available)
235 -
236 - $totals_project {"$f,$m"} {$project} += $fields [$f2] ;
237 -
238 - if ($language eq 'commons')
239 - { $totals_project {"$f,$m"} {'commons'} += $fields [$f2] ; }
240 - }
241 - }
242 - }
243 - close CSV_IN ;
244 -}
245 -
246 -sub ReadStatisticsPerBinariesExtensionCommons
247 -{
248 - my $file_csv_in = "$path_in/csv_wx/StatisticsPerBinariesExtension.csv" ;
249 - my $mmax = -1 ;
250 -
251 - if (! -e $file_csv_in)
252 - { &Abort ("Input file '$file_csv_in' not found") ; }
253 -
254 - print "Read '$file_csv_in'\n" ;
255 - open CSV_IN, '<', $file_csv_in ;
256 - while ($line = <CSV_IN>)
257 - {
258 - chomp $line ;
259 - ($language,$date,$counts) = split (',', $line, 3) ;
260 -
261 - if ($language ne "commons") { next ; }
262 -
263 - if ($date eq "00/0000")
264 - {
265 - @fields = split (',', $counts) ;
266 - $field_ndx = 0 ;
267 - foreach $field (@fields)
268 - {
269 - $ext_cnt {-1}{$field_ndx} = $field ;
270 - # print "EXT_CNT $field_ndx : $field\n" ;
271 - $field_ndx ++ ;
272 - }
273 - next ;
274 - }
275 -
276 - ($month,$year) = split ('\/', $date) ;
277 - my $m = &MonthsSinceYearAgo ($year, $month) ;
278 - if (($m < 0) || ($m > 12))
279 - { next ; }
280 - if ($m > $mmax)
281 - { $mmax = $m ; }
282 -
283 - @fields = split (',', $counts) ;
284 - $field_ndx = 0 ;
285 - foreach $field (@fields)
286 - {
287 - $ext_cnt {$m}{$field_ndx} = $field ;
288 - $ext_tot {$m} += $field ;
289 - $field_ndx ++ ;
290 - }
291 - }
292 - close CSV_IN ;
293 -
294 - %ext_cnt_mmax = %{$ext_cnt {$mmax}} ;
295 - @ext_cnt_mmax = (sort {$ext_cnt_mmax {$b} <=> $ext_cnt_mmax {$a}} keys %ext_cnt_mmax) ;
296 -
297 - $extcnt = 0 ;
298 - foreach $extndx (@ext_cnt_mmax)
299 - {
300 - # print "$extndx < ${ext_cnt {-1}{$extndx}} > : ${ext_cnt_mmax {$extndx}}\n" ;
301 - push @extndxs, $extndx ;
302 - if ($extcnt++ >= 9) { last ; }
303 - }
304 -}
305 -
306 -sub ReadMediaTrends
307 -{
308 -# open FILE_UV, '<', $file_regions_UV ;
309 -# close FILE-UV ;
310 -
311 -# open FILE_REACH, '<', $file_regions_Reach ;
312 -# close FILE_REACH ;
313 -}
314 -
315 -sub WriteYearlyData
316 -{
317 - print "Write file '$file_csv_out_year'\n" ;
318 - open CSV_OUT, '>', $file_csv_out_year ;
319 -
320 - $line = "Articles per project per year (June 30)" ;
321 - print "$line\n" ;
322 - print CSV_OUT "$line\n" ;
323 -
324 - $line = "" ;
325 - foreach $project (@projects2b)
326 - { $line .= ",$project," ; }
327 - $line .= ",Growth," ;
328 - print "$line\n" ;
329 - print CSV_OUT "$line\n" ;
330 -
331 - foreach $year (sort keys %years)
332 - {
333 - $june_diff {$year} = $june_articles {"tot,$year"} - $june_articles {"tot,".($year-1)} ;
334 - $line = "$year" ;
335 - foreach $project (@projects2)
336 - {
337 - $count = $june_articles {"$project,$year"} ;
338 - $count_prev = $june_articles {"$project,".($year-1)} ;
339 - $perc = "-" ;
340 - if ($count_prev > 0)
341 - { $perc = sprintf ("%.0f", 100 * ($count/$count_prev) - 100) . '%' ; $perc =~ s/^(\d)/\+$1/ ; }
342 - # $count = sprintf ("%.0f", $count / 1000) ;
343 - $line .= ",$count,$perc" ;
344 - }
345 - $diff = $june_diff {$year} ;
346 - $diff_prev = $june_diff {$year-1} ;
347 - $perc = "-" ;
348 - if ($diff_prev > 0)
349 - { $perc = sprintf ("%.0f", 100 * ($diff/$diff_prev) - 100) . '%' ; $perc =~ s/^(\d)/+$1/ ; }
350 - $line .= ",$diff,$perc" ;
351 - print "$line\n" ;
352 - print CSV_OUT "$line\n" ;
353 - }
354 -
355 - $line = "\nReadable version" ;
356 - print "$line\n" ;
357 - print CSV_OUT "$line\n" ;
358 -
359 - $line = "" ;
360 - foreach $project (@projects2b)
361 - { $line .= ",$project," ; }
362 - $line .= ",Growth," ;
363 - print "$line\n" ;
364 - print CSV_OUT "$line\n" ;
365 -
366 - foreach $year (sort keys %years)
367 - {
368 - $line = "$year" ;
369 - foreach $project (@projects2)
370 - {
371 - $count = $june_articles {"$project,$year"} ;
372 - $count_prev = $june_articles {"$project,".($year-1)} ;
373 -
374 - $perc = "-" ;
375 - if ($count_prev > 0)
376 - { $perc = sprintf ("%.0f", 100 * ($count/$count_prev) - 100) . '%' ; $perc =~ s/^(\d)/\+$1/ ; }
377 - if ($count >= 1000000)
378 - { $count = sprintf ("%.1f", $count / 1000000) . 'M' ; }
379 - elsif ($count >= 1000)
380 - { $count = sprintf ("%.0f", $count / 1000) . 'k' ; }
381 - $line .= ",$count,$perc" ;
382 - }
383 - $diff = $june_diff {$year} ;
384 - $diff_prev = $june_diff {$year-1} ;
385 - $perc = "-" ;
386 - if ($diff_prev > 0)
387 - { $perc = sprintf ("%.0f", 100 * ($diff/$diff_prev) - 100) . '%' ; $perc =~ s/^(\d)/\+$1/ ; }
388 - if ($diff >= 1000000)
389 - { $diff = sprintf ("%.1f", $diff / 1000000) . 'M' ; }
390 - elsif ($count >= 1000)
391 - { $diff = sprintf ("%.0f", $diff / 1000) . 'k' ; }
392 - $line .= ",$diff,$perc" ;
393 -
394 - print "$line\n" ;
395 - print CSV_OUT "$line\n" ;
396 - }
397 -
398 -}
399 -
400 -sub WriteMonthlyData
401 -{
402 - print "Write file '$file_csv_out'\n" ;
403 - open CSV_OUT, '>', $file_csv_out ;
404 - $output = "" ;
405 - foreach $f (1,2,3,4,6,11) # new editors, editors_gt_5, editors_gt_100, articles, new articles, edits
406 - {
407 -
408 - $output .= "\n,${out_report_descriptions [$f]} - Absolute\n" ;
409 - $output .= "$csv_recent_months,%inc year, %inc month\n" ;
410 -
411 - $line = ",Total," ;
412 - for ($m = 0 ; $m <= 12 ; $m++)
413 - { $line .= $totals {"$f,$m"} . "," ; }
414 -
415 - # growth in one year
416 - if ($totals {"$f,0"} != 0)
417 - { $line .= sprintf ("%.1f", 100 * ($totals {"$f,12"} / $totals {"$f,0"}) - 100). "%," ; }
418 - else
419 - { $line .= "n.a.," ; }
420 -
421 - # growth in one month
422 - if ($totals {"$f,11"} != 0)
423 - { $line .= sprintf ("%.1f", 100 * ($totals {"$f,12"} / $totals {"$f,11"}) - 100). "%," ; }
424 - else
425 - { $line .= "n.a.," ; }
426 -
427 - $line =~ s/,$// ;
428 - $output .= "$line\n" ;
429 -
430 - # sort by absolute amount for last month
431 - %values_f_12 = %{$values {"$f,12"}} ;
432 - $index = 1 ;
433 - foreach $key (sort {$values_f_12 {$b} <=> $values_f_12 {$a}} keys %values_f_12)
434 - {
435 - # print "$index $f: $key -> ${values_f_12 {$key}}\n" ;
436 -
437 - ($project,$language) = split (",", $key) ;
438 - $language_name = $out_languages {$language} ;
439 - if (($project ne "wp") && ($project ne "wx"))
440 - { $line = "$index,$language_name " . &GetProjectName ($project) . "," ; }
441 - else
442 - { $line = "$index,$language_name," ; }
443 -
444 - for ($m = 0 ; $m <= 12 ; $m++)
445 - { $line .= $values {"$f,$m"} {$key} . "," ; }
446 -
447 - if ($values {"$f,0"} {$key} != 0)
448 - { $line .= sprintf ("%.1f", 100 * ($values {"$f,12"} {$key} / $values {"$f,0"} {$key}) - 100). "%," ; }
449 - else
450 - { $line .= "n.a.," ; }
451 -
452 - if ($values {"$f,11"} {$key} != 0)
453 - { $line .= sprintf ("%.1f", 100 * ($values {"$f,12"} {$key} / $values {"$f,11"} {$key}) - 100). "%," ; }
454 - else
455 - { $line .= "n.a.," ; }
456 -
457 - $line =~ s/,$// ;
458 - $output .= "$line\n" ;
459 -
460 - if ($index++ >= 25) { last ; }
461 - }
462 - $output .= "\n" ;
463 -
464 - foreach $project (sort {$totals_project {"$f,12"} {$b} <=> $totals_project {"$f,12"} {$a}} @projects)
465 - {
466 -# next if $project eq 'commons' and ($f ==2 or $f == 3) ; # (very) active editors no longer counted for commons
467 -
468 - if ($project eq 'commons')
469 - { $line = ",Commons," ; }
470 - else
471 - { $line = "," . &GetProjectName ($project) . "," ; }
472 -
473 - for ($m = 0 ; $m <= 12 ; $m++)
474 - { $line .= $totals_project {"$f,$m"} {$project} . "," ; }
475 -
476 - if ($totals_project {"$f,0"} {$project} != 0)
477 - { $line .= sprintf ("%.1f", 100 * ($totals_project {"$f,12"} {$project} / $totals_project {"$f,0"} {$project}) - 100). "%," ; }
478 - else
479 - { $line .= "n.a.," ; }
480 -
481 - if ($totals_project {"$f,11"} {$project} != 0)
482 - { $line .= sprintf ("%.1f", 100 * ($totals_project {"$f,12"} {$project} / $totals_project {"$f,11"} {$project}) - 100). "%," ; }
483 - else
484 - { $line .= "n.a.," ; }
485 -
486 - $line =~ s/,$// ;
487 - $output .= "$line\n" ;
488 - }
489 -
490 - $output .= "\n,${out_report_descriptions [$f]} - Indexed\n" ;
491 - $output .= "$csv_recent_months\n" ;
492 -
493 - # sort by absolute amount for last month
494 - $index = 1 ;
495 - foreach $key (sort {$values_f_12 {$b} <=> $values_f_12 {$a}} keys %values_f_12)
496 - {
497 - # print "$index $f: $key -> ${values_f_12 {$key}}\n" ;
498 -
499 - ($project,$language) = split (",", $key) ;
500 - $language_name = $out_languages {$language} ;
501 - if (($project ne "wp") && ($project ne "wx"))
502 - { $line = "$index,$language_name " . &GetProjectName ($project) . "," ; }
503 - else
504 - { $line = "$index,$language_name," ; }
505 -
506 - $value_100 = $values {"$f,0"} {$key} ;
507 - for ($m = 0 ; $m <= 12 ; $m++)
508 - {
509 - if ($value_100 != 0)
510 - { $line .= sprintf ("%.1f", 100 * ($values {"$f,$m"} {$key} / $value_100)) . "," ; }
511 - else
512 - { $line .= "," ; }
513 - }
514 - $line =~ s/,$// ;
515 - $output .= "$line\n" ;
516 -
517 - # put totals last in chart to show line on top of others
518 - if ($index == 9)
519 - {
520 - $line = ",Total," ;
521 - $total_100 = $totals {"$f,0"} ;
522 - for ($m = 0 ; $m <= 12 ; $m++)
523 - {
524 - if ($total_100 != 0)
525 - { $line .= sprintf ("%.1f", 100 * ($totals {"$f,$m"} / $total_100)) . "," ; }
526 - else
527 - { $line .= "," ; }
528 - }
529 - $line .= ",(sorted here to make it top-most line out of 10 in Excel)" ;
530 - $output .= "$line\n" ;
531 - }
532 -
533 - if ($index++ >= 25) { last ; }
534 - }
535 - $output .= "\n" ;
536 -
537 - foreach $project (sort {$totals_project {"$f,12"} {$b} <=> $totals_project {"$f,12"} {$a}} @projects)
538 - {
539 -# next if $project eq 'commons' and ($f ==2 or $f == 3) ; # (very) active editors no longer counted for commons
540 -
541 - if ($project eq 'commons')
542 - { $line = ",Commons," ; }
543 - else
544 - { $line = "," . &GetProjectName ($project) . "," ; }
545 -
546 - $value_100 = $totals_project {"$f,0"} {$project} ;
547 - for ($m = 0 ; $m <= 12 ; $m++)
548 - {
549 - if ($value_100 != 0)
550 - { $line .= sprintf ("%.1f", 100 * ($totals_project {"$f,$m"} {$project} / $value_100)) . "," ; }
551 - else
552 - { $line .= "," ; }
553 - }
554 - $line =~ s/,$// ;
555 - $output .= "$line\n" ;
556 - }
557 - $output .= "\n," . '=' x 150 . "\n" ;
558 - }
559 -
560 - print CSV_OUT $output ;
561 -
562 - $output = "\n,Binaries per month - Absolute\n" ;
563 - $output .= "$csv_recent_months,%inc year, %inc month\n" ;
564 - $output .= "\n$csv_recent_months,%inc year,%inc month\n" ;
565 -
566 - $line = ",Total," ;
567 - for ($m = 0 ; $m <= 12 ; $m++)
568 - { $line .= $ext_tot {$m} . "," ; }
569 -
570 - if ($ext_tot {0} != 0)
571 - { $line .= sprintf ("%.1f", 100 * ($ext_tot {12} / $ext_tot {0}) - 100). "%," ; }
572 - else
573 - { $line .= "n.a.," ; }
574 -
575 - if ($ext_tot {11} != 0)
576 - { $line .= sprintf ("%.1f", 100 * ($ext_tot {12} / $ext_tot {11}) - 100). "%," ; }
577 - else
578 - { $line .= "n.a.," ; }
579 -
580 - $line =~ s/,$// ;
581 - $output .= "$line\n" ;
582 -
583 - $index = 0 ;
584 - # feed the 10 extensions with most pages, largest one last (comes on top in Excel chart)
585 - for ($e = $#extndxs ; $e >= $#extndxs - 9 ; $e--)
586 - {
587 - $index++ ;
588 -
589 - if ($e < 0)
590 - {
591 - $line = "$index,xxx," ;
592 - for ($m = 0 ; $m <= 12 ; $m++)
593 - { $line .= "," ; }
594 - }
595 - else
596 - {
597 - $extndx = $extndxs [$e] ;
598 - $line = "$index,${ext_cnt {-1}{$extndx}}," ;
599 -
600 - for ($m = 0 ; $m <= 12 ; $m++)
601 - { $line .= $ext_cnt {$m}{$extndx} . "," ; }
602 -
603 - if ($ext_cnt {0}{$extndx} != 0)
604 - { $line .= sprintf ("%.1f", 100 * ($ext_cnt {12}{$extndx} / $ext_cnt {0}{$extndx}) - 100). "%," ; }
605 - else
606 - { $line .= "n.a.," ; }
607 -
608 - if ($ext_cnt {11}{$extndx} != 0)
609 - { $line .= sprintf ("%.1f", 100 * ($ext_cnt {12}{$extndx} / $ext_cnt {11}{$extndx}) - 100). "%," ; }
610 - else
611 - { $line .= "n.a.," ; }
612 - }
613 -
614 - $line =~ s/,$// ;
615 - $output .= "$line\n" ;
616 - }
617 -
618 - print CSV_OUT $output ;
619 -
620 - $output = "\n,Binaries per month - Indexed\n" ;
621 - $output .= "$csv_recent_months\n" ;
622 -
623 - $index = 0 ;
624 - # feed the 10 extensions with most pages, largest one last (comes on top in Excel chart)
625 - for ($e = $#extndxs ; $e >= $#extndxs - 9 ; $e--)
626 - {
627 - $index++ ;
628 -
629 - if ($e < 0)
630 - {
631 - $line = "$index,xxx," ;
632 - for ($m = 0 ; $m <= 12 ; $m++)
633 - { $line .= "," ; }
634 - }
635 - else
636 - {
637 - $extndx = $extndxs [$e] ;
638 - $line = "$index,${ext_cnt {-1}{$extndx}}," ;
639 - $ext_cnt_m0 = $ext_cnt {0}{$extndx} ;
640 - for ($m = 0 ; $m <= 12 ; $m++)
641 - {
642 - if ($ext_cnt_m0 > 0)
643 - { $line .= sprintf ("%.1f", 100 * ($ext_cnt {$m}{$extndx} / $ext_cnt_m0)). "," ; }
644 - else
645 - { $line .= "," ; }
646 - }
647 - }
648 -
649 - $line =~ s/,$// ;
650 - $output .= "$line\n" ;
651 - }
652 - print CSV_OUT $output ;
653 - close CSV_OUT ;
654 -
655 - print "\nOutput written to $file_csv_out\n\n" ;
656 -}
657 -
658 -sub SetComparisonPeriods
659 -{
660 - my $year = shift ;
661 - my $month = shift ;
662 - my @months = qw(Xxx Jan Feb Mar Apr May Jun Jul Aug Sept Oct Nov Dec) ;
663 -
664 - my ($month_0, $month_0_file, $month_0_minus_12, $month_0_minus_1) ;
665 -
666 - $year_ = $year ;
667 - $month_ = $month ;
668 -
669 - $month_0 = sprintf ("%04d/%02d",$year, $month+1) ;
670 - $month_0_file = sprintf ("%04d_%02d",$year, $month+1) ; # for filenames
671 - $month_0_minus_12 = sprintf ("%04d/%02d",$year-1,$month+1) ;
672 - ($year,$month) = $month > 1 ? ($year,$month-1) : ($year-1,12) ;
673 - $month_0_minus_1 = sprintf ("%04d/%02d",$year,$month+1) ;
674 -
675 - print "\nWrite trend data up till month: $month_0\n\n" ;
676 - print "Compare with previous month: $month_0_minus_1, previous year: $month_0_minus_12\n\n" ;
677 -
678 - $csv_recent_months = ",project," ;
679 - $year = $year_ - 1 ;
680 - $month = $month_ ;
681 - for ($m = 0 ; $m <= 12 ; $m++)
682 - {
683 - $recent_months [$m] = sprintf ("%04d/%02d", $year, $month) ;
684 - $csv_recent_months .= sprintf ("%02d/%04d", $month, $year) . "," ;
685 - ($year,$month) = $month < 12 ? ($year,$month+1) : ($year+1,1) ;
686 - }
687 - $csv_recent_months =~ s/,$// ;
688 -}
689 -
690 -#sub WriteCsvFilesPerPeriod
691 -#{
692 -# foreach $period (sort keys %totals)
693 -# {
694 -# &LogT ("\nWrite totals per $period: ") ;
695 -# $desc = $descriptions {$period} ;
696 -
697 -# foreach $project (sort keys %{$totals {$period}})
698 -# {
699 -# &Log ("$project ") ;
700 -
701 -# $dir_out = "$path_out/csv_$project" ;
702 -# if (! -d $dir_out)
703 -# { mkdir $dir_out, 0777 ; }
704 -
705 -# $file_out = "$dir_out/$desc.csv" ;
706 -
707 -# open CSV, ">", $file_out ;
708 -# foreach $key (sort {$a cmp $b} keys %{$totals {$period}{$project}})
709 -# {
710 -# ($language,$yearmonth) = split (",", $key) ;
711 -# # print "PERIOD $period PROJECT $project KEY $key\n" ;
712 -# if ($period eq "month")
713 -# { print CSV "$language," . $date_high {"$yearmonth"} . "," . $totals{$period}{$project}{$key} . "\n" ; }
714 -# else
715 -# { print CSV "$key," . $totals{$period}{$project}{$key} . "\n" ; }
716 -# }
717 -# close CSV ;
718 -# }
719 -# }
720 -#}
721 -
722 -#sub WriteCsvHtmlFilesPopularWikis
723 -#{
724 -# @totals_lastmonth = sort {$totals_lastmonth {$b} <=> $totals_lastmonth {$a}} keys %totals_lastmonth ;
725 -
726 -# $dir_out = "$path_out/csv_wp" ;
727 -# $file_out = "$dir_out/PageViewsPerMonthPopularWikis_$month_0_file.csv" ;
728 -
729 -## extend with normalized counts
730 -## see manually created PageViewsPerMonthTop25PlusNormalizedTo100.csv
731 -
732 -# open CSV, ">", $file_out ;
733 -# print CSV $csv_recent_months ;
734 -
735 -# # write per popular language+wiki 13 months of page view totals
736 -# $lines = 0 ;
737 -# foreach $line (@totals_lastmonth)
738 -# {
739 -# if (++$lines > $maxpopularwikis) { last ; }
740 -
741 -# ($project, $language) = split (',', $line) ;
742 -# $largest_projects {"$project-$language"} ++ ;
743 -
744 -# $language_name = $out_languages {$language} ;
745 -
746 -# if (($project ne "wp") && ($project ne "wx"))
747 -# { print CSV "$language_name " . &GetProjectName ($project) . "," ; }
748 -# else
749 -# { print CSV "$language_name," ; }
750 -
751 -## %test = %{$totals {"month"} {"wp"} };
752 -## %test2 = @recent_months ;
753 -# for ($m = 0 ; $m <= 12 ; $m++)
754 -# { print CSV $totals {"month"} {$project} {"$language,${recent_months [$m]}"} . "," ; }
755 -# print CSV "\n" ;
756 -# }
757 -
758 -# print CSV "\n$csv_recent_months" ;
759 -
760 -# # write per popular language+wiki 13 months of page view totals, normalized to first month = 100
761 -# $lines = 0 ;
762 -# foreach $line (@totals_lastmonth)
763 -# {
764 -# if (++$lines > $maxpopularwikis) { last ; }
765 -
766 -# ($project, $language) = split (',', $line) ;
767 -# $language_name = $out_languages {$language} ;
768 -
769 -# if (($project ne "wp") && ($project ne "wx"))
770 -# { print CSV "$language_name " . &GetProjectName ($project) . "," ; }
771 -# else
772 -# { print CSV "$language_name," ; }
773 -
774 -# $recent_month_0 = $totals {"month"} {$project} {"$language,${recent_months [ 0]}"} ;
775 -# for ($m = 0 ; $m <= 12 ; $m++)
776 -# {
777 -# if ($recent_month_0 > 0)
778 -# { print CSV sprintf ("%.2f", 100 * $totals {"month"} {$project} {"$language,${recent_months [$m]}"} / $recent_month_0) . "," ; }
779 -# else
780 -# { print CSV "," ; }
781 -# }
782 -
783 -# print CSV "\n" ;
784 -# }
785 -# close CSV ;
786 -
787 -# # write ready made table rows for report card: page views top 25 movers shakers
788 -# foreach $key (keys %largest_projects)
789 -# {
790 -# ($project,$language) = split ('-', $key) ;
791 -
792 -# $total_lastmonth = $totals {"month"} {$project} {"$language,$month_0"} ;
793 -# $total_prevmonth = $totals {"month"} {$project} {"$language,$month_0_minus_1"} ;
794 -# $total_prevyear = $totals {"month"} {$project} {"$language,$month_0_minus_12"} ;
795 -
796 -# $perc_month = "no data" ;
797 -# $perc_year = "no data" ;
798 -
799 -# if ($total_prevyear > 0)
800 -# { $perc_year = sprintf ("%.1f", 100 * $total_lastmonth/$total_prevyear - 100) ; }
801 -# if ($total_prevyear > 0)
802 -# { $perc_month = sprintf ("%.1f", 100 * $total_lastmonth/$total_prevmonth - 100) ; }
803 -
804 -# $line = "$project-$language: $total_prevyear=>$total_lastmonth=$perc_year%, $total_prevmonth=>$total_lastmonth=$perc_month%" ;
805 -
806 -# $total_lastmonth = sprintf ("%.0f", $total_lastmonth / 1000000) ;
807 -
808 -# $project_name = &GetProjectName ($project) ;
809 -# $language_name = $out_languages {$language} ;
810 -
811 -# $col1 = "<td class=detail-left>$language_name $project_name</td>\n" ;
812 -# $col2 = "<td class=detail-blue>$total_lastmonth</td>\n" ;
813 -# $col3 = "<td class=detail-blue>$perc_month%</td>\n" ;
814 -# $col4 = "<td class=detail-blue>$perc_year%</td>\n" ;
815 -# $html = "<tr>\n$col1$col2$col3$col4</tr>\n" ;
816 -
817 -# $growth_figures_text {"$perc_month-$project-$language"} = $line ;
818 -# $growth_figures_html {"$perc_month-$project-$language"} = $html ;
819 -# }
820 -
821 -# $file_html = "$dir_out/PageViewsMoversShakersPopularWikis_$month_0_file.html" ;
822 -
823 -# open HTML, ">", $file_html ;
824 -# foreach $key (sort {$b <=> $a} keys %growth_figures_text)
825 -# {
826 -# print "$key: ". $growth_figures_text {$key} . "\n" ;
827 -# print HTML $growth_figures_html {$key} ;
828 -# }
829 -# close HTML ;
830 -#}
831 -
832 -
833 -sub GetProjectName
834 -{
835 - my $project =shift ;
836 -
837 - if ($project eq "wp") { $project_name = "Wikipedia"; }
838 - elsif ($project eq "wb") { $project_name = "Wikibooks"; }
839 - elsif ($project eq "wk") { $project_name = "Wiktionary"; }
840 - elsif ($project eq "wx") { $project_name = ""; }
841 - elsif ($project eq "wn") { $project_name = "Wikinews"; }
842 - elsif ($project eq "wq") { $project_name = "Wikiquote"; }
843 - elsif ($project eq "ws") { $project_name = "Wikisource"; }
844 - elsif ($project eq "wv") { $project_name = "Wikiversity"; }
845 -
846 - return ($project_name) ;
847 -}
848 -
849 -sub MonthsSinceYearAgo
850 -{
851 - my $year = shift ;
852 - my $month = shift ;
853 - return 12 - (($year_0 - $year) * 12 + $month_0 - $month) ;
854 -}
855 -
856 -#sub Log
857 -#{
858 -# $msg = shift ;
859 -# print $msg ;
860 -# print LOG $msg ;
861 -#}
862 -
863 -#sub LogT
864 -#{
865 -# $msg = shift ;
866 -# my ($ss,$mm,$hh) = (localtime (time))[0,1,2] ;
867 -# my $time = sprintf ("%02d:%02d:%02d ", $hh, $mm, $ss) ;
868 -# $msg =~ s/^(\n*)/$1$time/s ;
869 -# &Log ($msg) ;
870 -#}
871 -
872 -sub MmSs
873 -{
874 - my ($ss,$mm,$hh) = (localtime (time))[0,1,2] ;
875 - return (sprintf ("%02d:%02d:%02d ", $hh, $mm, $ss)) ;
876 -}
877 -
878 -sub Abort
879 -{
880 - my $msg = shift ;
881 - print "$msg\nExecution aborted." ;
882 - # to do: log also to file
883 - exit ;
884 -}
885 -
886 -sub InitProjectNames
887 -{
888 - # copied from WikiReports.pl
889 -
890 - %wikipedias = (
891 -# mediawiki=>"http://wikimediafoundation.org Wikimedia",
892 - nostalgia=>"http://nostalgia.wikipedia.org Nostalgia",
893 - sources=>"http://wikisource.org Old&nbsp;Wikisource",
894 - meta=>"http://meta.wikimedia.org Meta-Wiki",
895 - beta=>"http://beta.wikiversity.org Beta",
896 - species=>"http://species.wikipedia.org WikiSpecies",
897 - commons=>"http://commons.wikimedia.org Commons",
898 - foundation=>"http://wikimediafoundation.org Wikimedia&nbsp;Foundation",
899 - sep11=>"http://sep11.wikipedia.org In&nbsp;Memoriam",
900 - nlwikimedia=>"http://nl.wikimedia.org Wikimedia&nbsp;Nederland",
901 - plwikimedia=>"http://pl.wikimedia.org Wikimedia&nbsp;Polska",
902 - mediawiki=>"http://www.mediawiki.org MediaWiki",
903 - dewikiversity=>"http://de.wikiversity.org Wikiversit&auml;t",
904 - frwikiversity=>"http://fr.wikiversity.org Wikiversit&auml;t",
905 - wikimania2005=>"http://wikimania2005.wikimedia.org Wikimania 2005",
906 - wikimania2006=>"http://wikimania2006.wikimedia.org Wikimania 2006",
907 - aa=>"http://aa.wikipedia.org Afar",
908 - ab=>"http://ab.wikipedia.org Abkhazian",
909 - af=>"http://af.wikipedia.org Afrikaans",
910 - ak=>"http://ak.wikipedia.org Akan", # was Akana
911 - als=>"http://als.wikipedia.org Alemannic", # was Elsatian
912 - am=>"http://am.wikipedia.org Amharic",
913 - an=>"http://an.wikipedia.org Aragonese",
914 - ang=>"http://ang.wikipedia.org Anglo-Saxon",
915 - ar=>"http://ar.wikipedia.org Arabic",
916 - arc=>"http://arc.wikipedia.org Aramaic",
917 - as=>"http://as.wikipedia.org Assamese",
918 - ast=>"http://ast.wikipedia.org Asturian",
919 - av=>"http://av.wikipedia.org Avar", # was Avienan
920 - ay=>"http://ay.wikipedia.org Aymara",
921 - az=>"http://az.wikipedia.org Azeri", # was Azerbaijani
922 - ba=>"http://ba.wikipedia.org Bashkir",
923 - bar=>"http://bar.wikipedia.org Bavarian",
924 - bat_smg=>"http://bat-smg.wikipedia.org Samogitian",
925 - "bat-smg"=>"http://bat-smg.wikipedia.org Samogitian",
926 - bcl=>"http://bcl.wikipedia.org Central Bicolano",
927 - be=>"http://be.wikipedia.org Belarusian",
928 - "be-x-old"=>"http://be.wikipedia.org Belarusian (Tarashkevitsa)",
929 - be_x_old=>"http://be.wikipedia.org Belarusian (Tarashkevitsa)",
930 - bg=>"http://bg.wikipedia.org Bulgarian",
931 - bh=>"http://bh.wikipedia.org Bihari",
932 - bi=>"http://bi.wikipedia.org Bislama",
933 - bm=>"http://bm.wikipedia.org Bambara",
934 - bn=>"http://bn.wikipedia.org Bengali",
935 - bo=>"http://bo.wikipedia.org Tibetan",
936 - bpy=>"http://bpy.wikipedia.org Bishnupriya Manipuri",
937 - br=>"http://br.wikipedia.org Breton",
938 - bs=>"http://bs.wikipedia.org Bosnian",
939 - bug=>"http://bug.wikipedia.org Buginese",
940 - bxr=>"http://bxr.wikipedia.org Buryat",
941 - ca=>"http://ca.wikipedia.org Catalan",
942 - cbk_zam=>"http://cbk-zam.wikipedia.org Chavacano",
943 - "cbk-zam"=>"http://cbk-zam.wikipedia.org Chavacano",
944 - cdo=>"http://cdo.wikipedia.org Min Dong",
945 - ce=>"http://ce.wikipedia.org Chechen",
946 - ceb=>"http://ceb.wikipedia.org Cebuano",
947 - ch=>"http://ch.wikipedia.org Chamorro", # was Chamoru
948 - cho=>"http://cho.wikipedia.org Choctaw", # was Chotaw
949 - chr=>"http://chr.wikipedia.org Cherokee",
950 - chy=>"http://chy.wikipedia.org Cheyenne", # was Sets&ecirc;hest&acirc;hese
951 - co=>"http://co.wikipedia.org Corsican",
952 - cr=>"http://cr.wikipedia.org Cree",
953 - crh=>"http://crh.wikipedia.org Crimean Tatar",
954 - cs=>"http://cs.wikipedia.org Czech",
955 - csb=>"http://csb.wikipedia.org Cashubian", # was Kashubian
956 - cu=>"http://cv.wikipedia.org Old Church Slavonic",
957 - cv=>"http://cv.wikipedia.org Chuvash", # was Cavas
958 - cy=>"http://cy.wikipedia.org Welsh",
959 - da=>"http://da.wikipedia.org Danish",
960 - de=>"http://de.wikipedia.org German",
961 - diq=>"http://diq.wikipedia.org Zazaki",
962 - dk=>"http://dk.wikipedia.org Danish",
963 - dsb=>"http://dsb.wikipedia.org Lower Sorbian",
964 - dv=>"http://dv.wikipedia.org Divehi",
965 - dz=>"http://dz.wikipedia.org Dzongkha",
966 - ee=>"http://ee.wikipedia.org Ewe",
967 - el=>"http://el.wikipedia.org Greek",
968 - eml=>"http://eml.wikipedia.org Emilian-Romagnol",
969 - en=>"http://en.wikipedia.org English",
970 - eo=>"http://eo.wikipedia.org Esperanto",
971 - es=>"http://es.wikipedia.org Spanish",
972 - et=>"http://et.wikipedia.org Estonian",
973 - eu=>"http://eu.wikipedia.org Basque",
974 - ext=>"http://ext.wikipedia.org Extremaduran",
975 - fa=>"http://fa.wikipedia.org Persian",
976 - ff=>"http://ff.wikipedia.org Fulfulde",
977 - fi=>"http://fi.wikipedia.org Finnish",
978 - "fiu-vro"=>"http://fiu-vro.wikipedia.org Voro",
979 - fiu_vro=>"http://fiu-vro.wikipedia.org Voro",
980 - fj=>"http://fj.wikipedia.org Fijian",
981 - fo=>"http://fo.wikipedia.org Faroese", # was Faeroese
982 - fr=>"http://fr.wikipedia.org French",
983 - frp=>"http://frp.wikipedia.org Arpitan",
984 - fur=>"http://fur.wikipedia.org Friulian",
985 - fy=>"http://fy.wikipedia.org Frisian",
986 - ga=>"http://ga.wikipedia.org Irish",
987 - gan=>"http://gan.wikipedia.org Gan",
988 - gay=>"http://gay.wikipedia.org Gayo",
989 - gd=>"http://gd.wikipedia.org Scots Gaelic", # was Scottish Gaelic
990 - gl=>"http://gl.wikipedia.org Galician", # was Galego
991 - glk=>"http://glk.wikipedia.org Gilaki",
992 - gn=>"http://gn.wikipedia.org Guarani",
993 - got=>"http://got.wikipedia.org Gothic",
994 - gu=>"http://gu.wikipedia.org Gujarati",
995 - gv=>"http://gv.wikipedia.org Manx", # was Manx Gaelic
996 - ha=>"http://ha.wikipedia.org Hausa",
997 - hak=>"http://hak.wikipedia.org Hakka",
998 - haw=>"http://haw.wikipedia.org Hawai'ian", # was Hawaiian
999 - he=>"http://he.wikipedia.org Hebrew",
1000 - hi=>"http://hi.wikipedia.org Hindi",
1001 - hif=>"http://hif.wikipedia.org Fiji Hindi",
1002 - ho=>"http://ho.wikipedia.org Hiri Motu",
1003 - hr=>"http://hr.wikipedia.org Croatian",
1004 - hsb=>"http://hsb.wikipedia.org Upper Sorbian",
1005 - ht=>"http://ht.wikipedia.org Haitian",
1006 - hu=>"http://hu.wikipedia.org Hungarian",
1007 - hy=>"http://hy.wikipedia.org Armenian",
1008 - hz=>"http://hz.wikipedia.org Herero",
1009 - ia=>"http://ia.wikipedia.org Interlingua",
1010 - iba=>"http://iba.wikipedia.org Iban",
1011 - id=>"http://id.wikipedia.org Indonesian",
1012 - ie=>"http://ie.wikipedia.org Interlingue",
1013 - ig=>"http://ig.wikipedia.org Igbo",
1014 - ii=>"http://ii.wikipedia.org Yi",
1015 - ik=>"http://ik.wikipedia.org Inupiak",
1016 - ilo=>"http://ilo.wikipedia.org Ilokano",
1017 - io=>"http://io.wikipedia.org Ido",
1018 - is=>"http://is.wikipedia.org Icelandic",
1019 - it=>"http://it.wikipedia.org Italian",
1020 - iu=>"http://iu.wikipedia.org Inuktitut",
1021 - ja=>"http://ja.wikipedia.org Japanese",
1022 - jbo=>"http://jbo.wikipedia.org Lojban",
1023 - jv=>"http://jv.wikipedia.org Javanese",
1024 - ka=>"http://ka.wikipedia.org Georgian",
1025 - kaa=>"http://kaa.wikipedia.org Karakalpak",
1026 - kab=>"http://ka.wikipedia.org Kabyle",
1027 - kaw=>"http://kaw.wikipedia.org Kawi",
1028 - kg=>"http://kg.wikipedia.org Kongo",
1029 - ki=>"http://ki.wikipedia.org Kikuyu",
1030 - kj=>"http://kj.wikipedia.org Kuanyama", # was Otjiwambo
1031 - kk=>"http://kk.wikipedia.org Kazakh",
1032 - kl=>"http://kl.wikipedia.org Greenlandic",
1033 - km=>"http://km.wikipedia.org Khmer", # was Cambodian
1034 - kn=>"http://kn.wikipedia.org Kannada",
1035 - ko=>"http://ko.wikipedia.org Korean",
1036 - kr=>"http://kr.wikipedia.org Kanuri",
1037 - ks=>"http://ks.wikipedia.org Kashmiri",
1038 - ksh=>"http://ksh.wikipedia.org Ripuarian",
1039 - ku=>"http://ku.wikipedia.org Kurdish",
1040 - kv=>"http://kv.wikipedia.org Komi",
1041 - kw=>"http://kw.wikipedia.org Cornish", # was Kornish
1042 - ky=>"http://ky.wikipedia.org Kirghiz",
1043 - la=>"http://la.wikipedia.org Latin",
1044 - lad=>"http://lad.wikipedia.org Ladino",
1045 - lb=>"http://lb.wikipedia.org Luxembourgish", # was Letzeburgesch
1046 - lbe=>"http://lbe.wikipedia.org Lak",
1047 - lg=>"http://lg.wikipedia.org Ganda",
1048 - li=>"http://li.wikipedia.org Limburgish",
1049 - lij=>"http://lij.wikipedia.org Ligurian",
1050 - lmo=>"http://lmo.wikipedia.org Lombard",
1051 - ln=>"http://ln.wikipedia.org Lingala",
1052 - lo=>"http://lo.wikipedia.org Laotian",
1053 - ls=>"http://ls.wikipedia.org Latino Sine Flexione",
1054 - lt=>"http://lt.wikipedia.org Lithuanian",
1055 - lv=>"http://lv.wikipedia.org Latvian",
1056 - mad=>"http://mad.wikipedia.org Madurese",
1057 - mak=>"http://mak.wikipedia.org Makasar",
1058 - map_bms=>"http://map-bms.wikipedia.org Banyumasan",
1059 - "map-bms"=>"http://map-bms.wikipedia.org Banyumasan",
1060 - mdf=>"http://mdf.wikipedia.org Moksha",
1061 - mg=>"http://mg.wikipedia.org Malagasy",
1062 - mh=>"http://mh.wikipedia.org Marshallese",
1063 - mi=>"http://mi.wikipedia.org Maori",
1064 - min=>"http://min.wikipedia.org Minangkabau",
1065 - minnan=>"http://minnan.wikipedia.org Minnan",
1066 - mk=>"http://mk.wikipedia.org Macedonian",
1067 - ml=>"http://ml.wikipedia.org Malayalam",
1068 - mn=>"http://mn.wikipedia.org Mongolian",
1069 - mo=>"http://mo.wikipedia.org Moldavian",
1070 - mr=>"http://mr.wikipedia.org Marathi",
1071 - ms=>"http://ms.wikipedia.org Malay",
1072 - mt=>"http://mt.wikipedia.org Maltese",
1073 - mus=>"http://mus.wikipedia.org Muskogee",
1074 - my=>"http://my.wikipedia.org Burmese",
1075 - myv=>"http://myv.wikipedia.org Erzya",
1076 - mzn=>"http://mzn.wikipedia.org Mazandarani",
1077 - na=>"http://na.wikipedia.org Nauruan", # was Nauru
1078 - nah=>"http://nah.wikipedia.org Nahuatl",
1079 - nap=>"http://nap.wikipedia.org Neapolitan",
1080 - nds=>"http://nds.wikipedia.org Low Saxon",
1081 - nds_nl=>"http://nds-nl.wikipedia.org Dutch Low Saxon",
1082 - "nds-nl"=>"http://nds-nl.wikipedia.org Dutch Low Saxon",
1083 - ne=>"http://ne.wikipedia.org Nepali",
1084 - new=>"http://new.wikipedia.org Nepal Bhasa",
1085 - ng=>"http://ng.wikipedia.org Ndonga",
1086 - nl=>"http://nl.wikipedia.org Dutch",
1087 - nov=>"http://nov.wikipedia.org Novial",
1088 - nrm=>"http://nrm.wikipedia.org Norman",
1089 - nn=>"http://nn.wikipedia.org Nynorsk", # was Neo-Norwegian
1090 - no=>"http://no.wikipedia.org Norwegian",
1091 - nv=>"http://nv.wikipedia.org Navajo", # was Avayo
1092 - ny=>"http://ny.wikipedia.org Chichewa",
1093 - oc=>"http://oc.wikipedia.org Occitan",
1094 - om=>"http://om.wikipedia.org Oromo",
1095 - or=>"http://or.wikipedia.org Oriya",
1096 - os=>"http://os.wikipedia.org Ossetic",
1097 - pa=>"http://pa.wikipedia.org Punjabi",
1098 - pag=>"http://pag.wikipedia.org Pangasinan",
1099 - pam=>"http://pam.wikipedia.org Kapampangan",
1100 - pap=>"http://pap.wikipedia.org Papiamentu",
1101 - pdc=>"http://pdc.wikipedia.org Pennsylvania German",
1102 - pi=>"http://pi.wikipedia.org Pali",
1103 - pih=>"http://pih.wikipedia.org Norfolk",
1104 - pl=>"http://pl.wikipedia.org Polish",
1105 - pms=>"http://pms.wikipedia.org Piedmontese",
1106 - ps=>"http://ps.wikipedia.org Pashto",
1107 - pt=>"http://pt.wikipedia.org Portuguese",
1108 - qu=>"http://qu.wikipedia.org Quechua",
1109 - rm=>"http://rm.wikipedia.org Romansh", # was Rhaeto-Romance
1110 - rmy=>"http://rmy.wikipedia.org Romani",
1111 - rn=>"http://rn.wikipedia.org Kirundi",
1112 - ro=>"http://ro.wikipedia.org Romanian",
1113 - roa_rup=>"http://roa-rup.wikipedia.org Aromanian",
1114 - "roa-rup"=>"http://roa-rup.wikipedia.org Aromanian",
1115 - roa_tara=>"http://roa-tara.wikipedia.org Tarantino",
1116 - "roa-tara"=>"http://roa-tara.wikipedia.org Tarantino",
1117 - ru=>"http://ru.wikipedia.org Russian",
1118 - ru_sib=>"http://ru-sib.wikipedia.org Siberian",
1119 - "ru-sib"=>"http://ru-sib.wikipedia.org Siberian",
1120 - rw=>"http://rw.wikipedia.org Kinyarwanda",
1121 - sa=>"http://sa.wikipedia.org Sanskrit",
1122 - sah=>"http://sah.wikipedia.org Sakha",
1123 - sc=>"http://sc.wikipedia.org Sardinian",
1124 - scn=>"http://scn.wikipedia.org Sicilian",
1125 - sco=>"http://sco.wikipedia.org Scots",
1126 - sd=>"http://sd.wikipedia.org Sindhi",
1127 - se=>"http://se.wikipedia.org Northern Sami",
1128 - sg=>"http://sg.wikipedia.org Sangro",
1129 - sh=>"http://sh.wikipedia.org Serbo-Croatian",
1130 - si=>"http://si.wikipedia.org Sinhala", # was Singhalese
1131 - simple=>"http://simple.wikipedia.org Simple English",
1132 - sk=>"http://sk.wikipedia.org Slovak",
1133 - sl=>"http://sl.wikipedia.org Slovene",
1134 - sm=>"http://sm.wikipedia.org Samoan",
1135 - sn=>"http://sn.wikipedia.org Shona",
1136 - so=>"http://so.wikipedia.org Somali", # was Somalian
1137 - sq=>"http://sq.wikipedia.org Albanian",
1138 - sr=>"http://sr.wikipedia.org Serbian",
1139 - srn=>"http://srn.wikipedia.org Sranan",
1140 - ss=>"http://ss.wikipedia.org Siswati",
1141 - st=>"http://st.wikipedia.org Sesotho",
1142 - stq=>"http://stq.wikipedia.org Saterland Frisian",
1143 - su=>"http://su.wikipedia.org Sundanese",
1144 - sv=>"http://sv.wikipedia.org Swedish",
1145 - sw=>"http://sw.wikipedia.org Swahili",
1146 - szl=>"http://szl.wikipedia.org Silesian",
1147 - ta=>"http://ta.wikipedia.org Tamil",
1148 - te=>"http://te.wikipedia.org Telugu",
1149 - test=>"http://test.wikipedia.org Test",
1150 - tet=>"http://tet.wikipedia.org Tetum",
1151 - tg=>"http://tg.wikipedia.org Tajik",
1152 - th=>"http://th.wikipedia.org Thai",
1153 - ti=>"http://ti.wikipedia.org Tigrinya",
1154 - tk=>"http://tk.wikipedia.org Turkmen",
1155 - tl=>"http://tl.wikipedia.org Tagalog",
1156 - tlh=>"http://tlh.wikipedia.org Klingon", # was Klignon
1157 - tn=>"http://tn.wikipedia.org Setswana",
1158 - to=>"http://to.wikipedia.org Tongan",
1159 - tokipona=>"http://tokipona.wikipedia.org Tokipona",
1160 - tpi=>"http://tpi.wikipedia.org Tok Pisin",
1161 - tr=>"http://tr.wikipedia.org Turkish",
1162 - ts=>"http://ts.wikipedia.org Tsonga",
1163 - tt=>"http://tt.wikipedia.org Tatar",
1164 - tum=>"http://tum.wikipedia.org Tumbuka",
1165 - turn=>"http://turn.wikipedia.org Turnbuka",
1166 - tw=>"http://tw.wikipedia.org Twi",
1167 - ty=>"http://ty.wikipedia.org Tahitian",
1168 - udm=>"http://udm.wikipedia.org Udmurt",
1169 - ug=>"http://ug.wikipedia.org Uighur",
1170 - uk=>"http://uk.wikipedia.org Ukrainian",
1171 - ur=>"http://ur.wikipedia.org Urdu",
1172 - uz=>"http://uz.wikipedia.org Uzbek",
1173 - ve=>"http://ve.wikipedia.org Venda", # was Lushaka
1174 - vec=>"http://vec.wikipedia.org Venetian",
1175 - vi=>"http://vi.wikipedia.org Vietnamese",
1176 - vls=>"http://vls.wikipedia.org West Flemish",
1177 - vo=>"http://vo.wikipedia.org Volap&uuml;k",
1178 - wa=>"http://wa.wikipedia.org Walloon",
1179 - war=>"http://war.wikipedia.org Waray-Waray",
1180 - wo=>"http://wo.wikipedia.org Wolof",
1181 - wuu=>"http://wuu.wikipedia.org Wu",
1182 - xal=>"http://xal.wikipedia.org Kalmyk",
1183 - xh=>"http://xh.wikipedia.org Xhosa",
1184 - yi=>"http://yi.wikipedia.org Yiddish",
1185 - yo=>"http://yo.wikipedia.org Yoruba",
1186 - za=>"http://za.wikipedia.org Zhuang",
1187 - zea=>"http://zea.wikipedia.org Zealandic",
1188 - zh=>"http://zh.wikipedia.org Chinese",
1189 - zh_min_nan=>"http://zh-min-nan.wikipedia.org Min Nan",
1190 - "zh-min-nan"=>"http://zh-min-nan.wikipedia.org Min Nan",
1191 - zh_classical=>"http://zh-classical.wikipedia.org Classical Chinese",
1192 - "zh-classical"=>"http://zh-classical.wikipedia.org Classical Chinese",
1193 - zh_yue=>"http://zh-yue.wikipedia.org Cantonese",
1194 - "zh-yue"=>"http://zh-yue.wikipedia.org Cantonese",
1195 - zu=>"http://zu.wikipedia.org Zulu",
1196 - zz=>"&nbsp; All&nbsp;languages",
1197 - zzz=>"&nbsp; All&nbsp;languages except English"
1198 - );
1199 -
1200 - foreach $key (keys %wikipedias)
1201 - {
1202 - my $wikipedia = $wikipedias {$key} ;
1203 - $out_urls {$key} = $wikipedia ;
1204 - $out_languages {$key} = $wikipedia ;
1205 - $out_urls {$key} =~ s/(^[^\s]+).*$/$1/ ;
1206 - $out_languages {$key} =~ s/^[^\s]+\s+(.*)$/$1/ ;
1207 - $out_article {$key} = "http://en.wikipedia.org/wiki/" . $out_languages {$key} . "_language" ;
1208 - $out_article {$key} =~ s/ /_/g ;
1209 - $out_urls {$key} =~ s/(^[^\s]+).*$/$1/ ;
1210 - }
1211 -}
1212 -
1213 -# copied from WikiReports_EN.pl
1214 -sub InitReportNames
1215 -{
1216 - @out_report_descriptions = (
1217 - "Contributors",
1218 - "New editors",
1219 - "Active editors",
1220 - "Very active editors",
1221 - "Article count (official)",
1222 - "Article count (alternate)",
1223 - "New articles per day",
1224 - "Edits per article",
1225 - "Bytes per article",
1226 - "Articles over 0.5 Kb",
1227 - "Articles over 2 Kb",
1228 - "Edits per month",
1229 - "Database size",
1230 - "Words",
1231 - "Internal links",
1232 - "Links to other Wikipedias",
1233 - "Binaries",
1234 - "External links",
1235 - "Redirects",
1236 - "Page requests per day",
1237 - "Visits per day",
1238 - "Overview recent months"
1239 - ) ;
1240 -}
1241 -
 2+#!/usr/local/bin/perl
 3+# -i "w:/# Out Bayes" -o "w:/@ Report Card/Data"
 4+
 5+ use lib "/home/ezachte/lib" ;
 6+ use EzLib ;
 7+ $trace_on_exit = $true ;
 8+ ez_lib_version (2) ;
 9+
 10+ $month_0 = "08" ; # collect 13 months up to
 11+ $year_0 = 2010 ;
 12+
 13+# set defaults mainly for tests on local machine
 14+ default_argv "-i 'W:/# Out Bayes'|-o 'W:/@ Report Card/Data'" ;
 15+
 16+ use Getopt::Std ;
 17+
 18+# $file_regions_UV = "Multi-Country Media Trend, UVs by region (July 2008 - September 2009)_27290.csv" ;
 19+# $file_regions_Reach = "Multi-Country Media Trend, % reach by region (July 2008 - September 2009)_10786.csv" ;
 20+
 21+ $maxpopularwikis = 25 ;
 22+ @projects = ('wb','wk','wn','wp','wq','ws','wv','commons') ;
 23+ @projects2 = ('wb','wk','wn','wp','wq','ws','wv','wx','tot') ;
 24+ @projects2b = ('Wikibooks','Wiktionary','Wikinews','Wikipedia','Wikiquote','Wikisource','Wikiversity','Other','Total') ;
 25+
 26+ &LogArguments ;
 27+ &ParseArguments ;
 28+ &InitProjectNames ;
 29+ &InitReportNames ;
 30+ &ReadStatisticsMonthly ;
 31+ &WriteYearlyData ;
 32+# &WriteMonthlyData ;
 33+ exit ;
 34+
 35+sub LogArguments
 36+{
 37+ my $arguments ;
 38+ getopt ("iolpft", \%options) ;
 39+ foreach $arg (sort keys %options)
 40+ { $arguments .= " -$arg " . $options {$arg} . "\n" ; }
 41+ print ("\nArguments\n$arguments\n") ;
 42+# &Log ("\nArguments\n$arguments\n") ;
 43+}
 44+
 45+sub ParseArguments
 46+{
 47+# my @options ;
 48+# getopt ("io", \%options) ;
 49+
 50+# die ("Specify input folder for projectcounts files as: -i path") if (! defined ($options {"i"})) ;
 51+# die ("Specify output folder as: -o path'") if (! defined ($options {"o"})) ;
 52+
 53+# $path_in = $options {"i"} ;
 54+# $path_out = $options {"o"} ;
 55+
 56+# die "Input folder '$path_in' does not exist" if (! -d $path_in) ;
 57+# die "Output folder '$path_out' does not exist" if (! -d $path_out) ;
 58+
 59+ $path_in = "w:/# out bayes" ;
 60+ $path_out = "w:/@ report card/data" ;
 61+
 62+ print "Input folder: $path_in\n" ;
 63+ print "Output folder: $path_out\n" ;
 64+ print "\n" ;
 65+
 66+ $file_csv_out = "$path_out/StatisticsMonthly_${year_0}_$month_0.csv" ;
 67+ $file_csv_out_year = "$path_out/StatisticsYearly.csv" ;
 68+
 69+ &SetComparisonPeriods ($year_0,$month_0) ;
 70+}
 71+
 72+sub ReadStatisticsMonthly
 73+{
 74+ &ReadStatisticsMonthlyForProject ("wb") ;
 75+ &ReadStatisticsMonthlyForProject ("wk") ;
 76+ &ReadStatisticsMonthlyForProject ("wn") ;
 77+ &ReadStatisticsMonthlyForProject ("wp") ;
 78+ &ReadStatisticsMonthlyForProject ("wq") ;
 79+ &ReadStatisticsMonthlyForProject ("ws") ;
 80+ &ReadStatisticsMonthlyForProject ("wv") ;
 81+ &ReadStatisticsMonthlyForProject ("wx") ;
 82+
 83+ &ReadStatisticsPerBinariesExtensionCommons ;
 84+}
 85+
 86+sub ReadStatisticsMonthlyForProject
 87+{
 88+ my $project = shift;
 89+
 90+ my $file_csv_in_1 = "$path_in/csv_$project/StatisticsMonthly.csv" ;
 91+ my $file_csv_in_2 = "$path_in/csv_$project/StatisticsUserActivitySpread.csv" ;
 92+
 93+ if (! -e $file_csv_in_1)
 94+ { &Abort ("Input file '$file_csv_in_1' not found") ; }
 95+ if (! -e $file_csv_in_2)
 96+ { &Abort ("Input file '$file_csv_in_2' not found") ; }
 97+
 98+ print "Read '$file_csv_in_1'\n" ;
 99+ open CSV_IN, '<', $file_csv_in_1 ;
 100+
 101+ undef %lines ;
 102+ while ($line = <CSV_IN>)
 103+ {
 104+ ($language,$date,$counts) = split (',', $line, 3) ;
 105+
 106+ next if $language eq 'commons' and $project ne 'wx' ;
 107+ next if $language eq 'sr' and $project eq 'wn' ; # ignore insane bot spam on
 108+
 109+ ($month,$day,$year) = split ('\/', $date) ;
 110+
 111+if ($month == 6)
 112+{
 113+ @fields = split (',', $counts) ;
 114+ $articles = $fields [4] ;
 115+ $june_articles {"$project,$year"} += $articles ;
 116+ $june_articles {"tot,$year"} += $articles ;
 117+ $years {$year} ++ ;
 118+# print "$project $year $month : $articles\n" ;
 119+}
 120+
 121+ my $m = &MonthsSinceYearAgo ($year, $month) ;
 122+ if (! ($m < 0) || ($m > 12))
 123+ {
 124+ $lines {$language}{$m} = $line ;
 125+ $languages {$language}++ ;
 126+ }
 127+ }
 128+
 129+ foreach $language (sort keys %languages)
 130+ {
 131+ for ($m=1 ; $m <= 12 ; $m++)
 132+ {
 133+ if ($lines {$language}{$m} eq '')
 134+ { $lines {$language}{$m} = $lines {$language}{$m -1} ; }
 135+ }
 136+
 137+ for ($m=0 ; $m <= 12 ; $m++)
 138+ {
 139+ $line = $lines {$language}{$m} ;
 140+ chomp $line ;
 141+ ($language,$date,$counts) = split (',', $line, 3) ;
 142+ @fields = split (',', $counts) ;
 143+
 144+ if ($project eq "wp")
 145+ {
 146+ foreach $f (1,4,6,11) # new editors, articles, new articles, edits
 147+ {
 148+ $values {"$f,$m"} {"$project,$language"} = $fields [$f] ;
 149+ $totals {"$f,$m"} += $fields [$f] ;
 150+ $totals_project {"$f,$m"} {$project} += $fields [$f] ;
 151+ # print "TOTALS $f $m = . " . $totals {"$f,$m"} . "\n" ;
 152+ }
 153+ }
 154+ else
 155+ {
 156+ foreach $f (1,4)
 157+ {
 158+ $values {"$f,$m"} {"$project,$language"} = $fields [$f] ;
 159+ $totals {"$f,$m"} += $fields [$f] ;
 160+ $totals_project {"$f,$m"} {$project} += $fields [$f] ;
 161+
 162+ if ($language eq 'commons')
 163+ { $totals_project {"$f,$m"} {'commons'} += $fields [$f] ; }
 164+
 165+ # print "TOTALS $f $m = . " . $totals {"$f,$m"} . "\n" ;
 166+ }
 167+ foreach $f (6,11)
 168+ {
 169+ $totals_project {"$f,$m"} {$project} += $fields [$f] ;
 170+ if ($language eq 'commons')
 171+ { $totals_project {"$f,$m"} {'commons'} += $fields [$f] ; }
 172+ # print "TOTALS $f $m = . " . $totals {"$f,$m"} . "\n" ;
 173+ }
 174+
 175+ }
 176+ }
 177+ }
 178+ close CSV_IN ;
 179+
 180+ # now read (very) active editors from newer more accurate file (split data for reg users and bots, unlike StatisticsMonthly.csv)
 181+ # but use f = column count in StatisticsMonthly.csv
 182+
 183+ print "Read '$file_csv_in_2'\n" ;
 184+ open CSV_IN, '<', $file_csv_in_2 ;
 185+
 186+ undef %lines ;
 187+ while ($line = <CSV_IN>)
 188+ {
 189+ chomp $line ;
 190+ ($language,$date,$reguser_bot,$group,$counts) = split (',', $line, 5) ;
 191+
 192+ next if $language eq 'commons' and $project ne 'wx' ; # commons also in wikipedia csv files (bug, hard to cleanup, just skip)
 193+ # next if $language eq 'commons' ; # ignore editor count on commons alltogether, most are already counted for other project
 194+ # (even for several projects, to be tuned after centralauth dump is available)
 195+
 196+ if ($reguser_bot ne "R") { next ; } # R: reg user, B: bot
 197+ if ($group ne "A") { next ; } # A: articles, T: talk pages, O: other namespaces
 198+
 199+ ($month,$day,$year) = split ('\/', $date) ;
 200+ my $m = &MonthsSinceYearAgo ($year, $month) ;
 201+ if (($m < 0) || ($m > 12))
 202+ { next ; }
 203+
 204+ $lines {$language}{$m} = $line ;
 205+ $languages {$language}++ ;
 206+ }
 207+
 208+ foreach $language (sort keys %languages)
 209+ {
 210+ for ($m=1 ; $m <= 12 ; $m++)
 211+ {
 212+ if ($lines {$language}{$m} eq '')
 213+ { $lines {$language}{$m} = $lines {$language}{$m -1} ; }
 214+ }
 215+
 216+ for ($m=0 ; $m <= 12 ; $m++)
 217+ {
 218+ $line = $lines {$language}{$m} ;
 219+ chomp $line ;
 220+ ($language,$date,$reguser_bot,$group,$counts) = split (',', $line, 5) ;
 221+ @fields = split (',', $counts) ;
 222+
 223+ foreach $f (2,3) # editors_gt_5, editors_gt_100
 224+ {
 225+ # count user with over x edits
 226+ # threshold starting with a 3 are 10xSQRT(10), 100xSQRT(10), 1000xSQRT(10), etc
 227+ # thresholds = 1,3,5,10,25,32,50,100,etc
 228+ if ($f == 2) { $f2 = 2 ; }
 229+ if ($f == 3) { $f2 = 7 ; }
 230+
 231+ $values {"$f,$m"} {"$project,$language"} = $fields [$f2] ;
 232+
 233+ if ($language ne 'commons') # ignore editor count on commons for totals, most editors are already counted for other project
 234+ { $totals {"$f,$m"} += $fields [$f2] ; } # (even for several projects, to be tuned after centralauth dump is available)
 235+
 236+ $totals_project {"$f,$m"} {$project} += $fields [$f2] ;
 237+
 238+ if ($language eq 'commons')
 239+ { $totals_project {"$f,$m"} {'commons'} += $fields [$f2] ; }
 240+ }
 241+ }
 242+ }
 243+ close CSV_IN ;
 244+}
 245+
 246+sub ReadStatisticsPerBinariesExtensionCommons
 247+{
 248+ my $file_csv_in = "$path_in/csv_wx/StatisticsPerBinariesExtension.csv" ;
 249+ my $mmax = -1 ;
 250+
 251+ if (! -e $file_csv_in)
 252+ { &Abort ("Input file '$file_csv_in' not found") ; }
 253+
 254+ print "Read '$file_csv_in'\n" ;
 255+ open CSV_IN, '<', $file_csv_in ;
 256+ while ($line = <CSV_IN>)
 257+ {
 258+ chomp $line ;
 259+ ($language,$date,$counts) = split (',', $line, 3) ;
 260+
 261+ if ($language ne "commons") { next ; }
 262+
 263+ if ($date eq "00/0000")
 264+ {
 265+ @fields = split (',', $counts) ;
 266+ $field_ndx = 0 ;
 267+ foreach $field (@fields)
 268+ {
 269+ $ext_cnt {-1}{$field_ndx} = $field ;
 270+ # print "EXT_CNT $field_ndx : $field\n" ;
 271+ $field_ndx ++ ;
 272+ }
 273+ next ;
 274+ }
 275+
 276+ ($month,$year) = split ('\/', $date) ;
 277+ my $m = &MonthsSinceYearAgo ($year, $month) ;
 278+ if (($m < 0) || ($m > 12))
 279+ { next ; }
 280+ if ($m > $mmax)
 281+ { $mmax = $m ; }
 282+
 283+ @fields = split (',', $counts) ;
 284+ $field_ndx = 0 ;
 285+ foreach $field (@fields)
 286+ {
 287+ $ext_cnt {$m}{$field_ndx} = $field ;
 288+ $ext_tot {$m} += $field ;
 289+ $field_ndx ++ ;
 290+ }
 291+ }
 292+ close CSV_IN ;
 293+
 294+ %ext_cnt_mmax = %{$ext_cnt {$mmax}} ;
 295+ @ext_cnt_mmax = (sort {$ext_cnt_mmax {$b} <=> $ext_cnt_mmax {$a}} keys %ext_cnt_mmax) ;
 296+
 297+ $extcnt = 0 ;
 298+ foreach $extndx (@ext_cnt_mmax)
 299+ {
 300+ # print "$extndx < ${ext_cnt {-1}{$extndx}} > : ${ext_cnt_mmax {$extndx}}\n" ;
 301+ push @extndxs, $extndx ;
 302+ if ($extcnt++ >= 9) { last ; }
 303+ }
 304+}
 305+
 306+sub ReadMediaTrends
 307+{
 308+# open FILE_UV, '<', $file_regions_UV ;
 309+# close FILE-UV ;
 310+
 311+# open FILE_REACH, '<', $file_regions_Reach ;
 312+# close FILE_REACH ;
 313+}
 314+
 315+sub WriteYearlyData
 316+{
 317+ print "Write file '$file_csv_out_year'\n" ;
 318+ open CSV_OUT, '>', $file_csv_out_year ;
 319+
 320+ $line = "Articles per project per year (June 30)" ;
 321+ print "$line\n" ;
 322+ print CSV_OUT "$line\n" ;
 323+
 324+ $line = "" ;
 325+ foreach $project (@projects2b)
 326+ { $line .= ",$project," ; }
 327+ $line .= ",Growth," ;
 328+ print "$line\n" ;
 329+ print CSV_OUT "$line\n" ;
 330+
 331+ foreach $year (sort keys %years)
 332+ {
 333+ $june_diff {$year} = $june_articles {"tot,$year"} - $june_articles {"tot,".($year-1)} ;
 334+ $line = "$year" ;
 335+ foreach $project (@projects2)
 336+ {
 337+ $count = $june_articles {"$project,$year"} ;
 338+ $count_prev = $june_articles {"$project,".($year-1)} ;
 339+ $perc = "-" ;
 340+ if ($count_prev > 0)
 341+ { $perc = sprintf ("%.0f", 100 * ($count/$count_prev) - 100) . '%' ; $perc =~ s/^(\d)/\+$1/ ; }
 342+ # $count = sprintf ("%.0f", $count / 1000) ;
 343+ $line .= ",$count,$perc" ;
 344+ }
 345+ $diff = $june_diff {$year} ;
 346+ $diff_prev = $june_diff {$year-1} ;
 347+ $perc = "-" ;
 348+ if ($diff_prev > 0)
 349+ { $perc = sprintf ("%.0f", 100 * ($diff/$diff_prev) - 100) . '%' ; $perc =~ s/^(\d)/+$1/ ; }
 350+ $line .= ",$diff,$perc" ;
 351+ print "$line\n" ;
 352+ print CSV_OUT "$line\n" ;
 353+ }
 354+
 355+ $line = "\nReadable version" ;
 356+ print "$line\n" ;
 357+ print CSV_OUT "$line\n" ;
 358+
 359+ $line = "" ;
 360+ foreach $project (@projects2b)
 361+ { $line .= ",$project," ; }
 362+ $line .= ",Growth," ;
 363+ print "$line\n" ;
 364+ print CSV_OUT "$line\n" ;
 365+
 366+ foreach $year (sort keys %years)
 367+ {
 368+ $line = "$year" ;
 369+ foreach $project (@projects2)
 370+ {
 371+ $count = $june_articles {"$project,$year"} ;
 372+ $count_prev = $june_articles {"$project,".($year-1)} ;
 373+
 374+ $perc = "-" ;
 375+ if ($count_prev > 0)
 376+ { $perc = sprintf ("%.0f", 100 * ($count/$count_prev) - 100) . '%' ; $perc =~ s/^(\d)/\+$1/ ; }
 377+ if ($count >= 1000000)
 378+ { $count = sprintf ("%.1f", $count / 1000000) . 'M' ; }
 379+ elsif ($count >= 1000)
 380+ { $count = sprintf ("%.0f", $count / 1000) . 'k' ; }
 381+ $line .= ",$count,$perc" ;
 382+ }
 383+ $diff = $june_diff {$year} ;
 384+ $diff_prev = $june_diff {$year-1} ;
 385+ $perc = "-" ;
 386+ if ($diff_prev > 0)
 387+ { $perc = sprintf ("%.0f", 100 * ($diff/$diff_prev) - 100) . '%' ; $perc =~ s/^(\d)/\+$1/ ; }
 388+ if ($diff >= 1000000)
 389+ { $diff = sprintf ("%.1f", $diff / 1000000) . 'M' ; }
 390+ elsif ($count >= 1000)
 391+ { $diff = sprintf ("%.0f", $diff / 1000) . 'k' ; }
 392+ $line .= ",$diff,$perc" ;
 393+
 394+ print "$line\n" ;
 395+ print CSV_OUT "$line\n" ;
 396+ }
 397+
 398+}
 399+
 400+sub WriteMonthlyData
 401+{
 402+ print "Write file '$file_csv_out'\n" ;
 403+ open CSV_OUT, '>', $file_csv_out ;
 404+ $output = "" ;
 405+ foreach $f (1,2,3,4,6,11) # new editors, editors_gt_5, editors_gt_100, articles, new articles, edits
 406+ {
 407+
 408+ $output .= "\n,${out_report_descriptions [$f]} - Absolute\n" ;
 409+ $output .= "$csv_recent_months,%inc year, %inc month\n" ;
 410+
 411+ $line = ",Total," ;
 412+ for ($m = 0 ; $m <= 12 ; $m++)
 413+ { $line .= $totals {"$f,$m"} . "," ; }
 414+
 415+ # growth in one year
 416+ if ($totals {"$f,0"} != 0)
 417+ { $line .= sprintf ("%.1f", 100 * ($totals {"$f,12"} / $totals {"$f,0"}) - 100). "%," ; }
 418+ else
 419+ { $line .= "n.a.," ; }
 420+
 421+ # growth in one month
 422+ if ($totals {"$f,11"} != 0)
 423+ { $line .= sprintf ("%.1f", 100 * ($totals {"$f,12"} / $totals {"$f,11"}) - 100). "%," ; }
 424+ else
 425+ { $line .= "n.a.," ; }
 426+
 427+ $line =~ s/,$// ;
 428+ $output .= "$line\n" ;
 429+
 430+ # sort by absolute amount for last month
 431+ %values_f_12 = %{$values {"$f,12"}} ;
 432+ $index = 1 ;
 433+ foreach $key (sort {$values_f_12 {$b} <=> $values_f_12 {$a}} keys %values_f_12)
 434+ {
 435+ # print "$index $f: $key -> ${values_f_12 {$key}}\n" ;
 436+
 437+ ($project,$language) = split (",", $key) ;
 438+ $language_name = $out_languages {$language} ;
 439+ if (($project ne "wp") && ($project ne "wx"))
 440+ { $line = "$index,$language_name " . &GetProjectName ($project) . "," ; }
 441+ else
 442+ { $line = "$index,$language_name," ; }
 443+
 444+ for ($m = 0 ; $m <= 12 ; $m++)
 445+ { $line .= $values {"$f,$m"} {$key} . "," ; }
 446+
 447+ if ($values {"$f,0"} {$key} != 0)
 448+ { $line .= sprintf ("%.1f", 100 * ($values {"$f,12"} {$key} / $values {"$f,0"} {$key}) - 100). "%," ; }
 449+ else
 450+ { $line .= "n.a.," ; }
 451+
 452+ if ($values {"$f,11"} {$key} != 0)
 453+ { $line .= sprintf ("%.1f", 100 * ($values {"$f,12"} {$key} / $values {"$f,11"} {$key}) - 100). "%," ; }
 454+ else
 455+ { $line .= "n.a.," ; }
 456+
 457+ $line =~ s/,$// ;
 458+ $output .= "$line\n" ;
 459+
 460+ if ($index++ >= 25) { last ; }
 461+ }
 462+ $output .= "\n" ;
 463+
 464+ foreach $project (sort {$totals_project {"$f,12"} {$b} <=> $totals_project {"$f,12"} {$a}} @projects)
 465+ {
 466+# next if $project eq 'commons' and ($f ==2 or $f == 3) ; # (very) active editors no longer counted for commons
 467+
 468+ if ($project eq 'commons')
 469+ { $line = ",Commons," ; }
 470+ else
 471+ { $line = "," . &GetProjectName ($project) . "," ; }
 472+
 473+ for ($m = 0 ; $m <= 12 ; $m++)
 474+ { $line .= $totals_project {"$f,$m"} {$project} . "," ; }
 475+
 476+ if ($totals_project {"$f,0"} {$project} != 0)
 477+ { $line .= sprintf ("%.1f", 100 * ($totals_project {"$f,12"} {$project} / $totals_project {"$f,0"} {$project}) - 100). "%," ; }
 478+ else
 479+ { $line .= "n.a.," ; }
 480+
 481+ if ($totals_project {"$f,11"} {$project} != 0)
 482+ { $line .= sprintf ("%.1f", 100 * ($totals_project {"$f,12"} {$project} / $totals_project {"$f,11"} {$project}) - 100). "%," ; }
 483+ else
 484+ { $line .= "n.a.," ; }
 485+
 486+ $line =~ s/,$// ;
 487+ $output .= "$line\n" ;
 488+ }
 489+
 490+ $output .= "\n,${out_report_descriptions [$f]} - Indexed\n" ;
 491+ $output .= "$csv_recent_months\n" ;
 492+
 493+ # sort by absolute amount for last month
 494+ $index = 1 ;
 495+ foreach $key (sort {$values_f_12 {$b} <=> $values_f_12 {$a}} keys %values_f_12)
 496+ {
 497+ # print "$index $f: $key -> ${values_f_12 {$key}}\n" ;
 498+
 499+ ($project,$language) = split (",", $key) ;
 500+ $language_name = $out_languages {$language} ;
 501+ if (($project ne "wp") && ($project ne "wx"))
 502+ { $line = "$index,$language_name " . &GetProjectName ($project) . "," ; }
 503+ else
 504+ { $line = "$index,$language_name," ; }
 505+
 506+ $value_100 = $values {"$f,0"} {$key} ;
 507+ for ($m = 0 ; $m <= 12 ; $m++)
 508+ {
 509+ if ($value_100 != 0)
 510+ { $line .= sprintf ("%.1f", 100 * ($values {"$f,$m"} {$key} / $value_100)) . "," ; }
 511+ else
 512+ { $line .= "," ; }
 513+ }
 514+ $line =~ s/,$// ;
 515+ $output .= "$line\n" ;
 516+
 517+ # put totals last in chart to show line on top of others
 518+ if ($index == 9)
 519+ {
 520+ $line = ",Total," ;
 521+ $total_100 = $totals {"$f,0"} ;
 522+ for ($m = 0 ; $m <= 12 ; $m++)
 523+ {
 524+ if ($total_100 != 0)
 525+ { $line .= sprintf ("%.1f", 100 * ($totals {"$f,$m"} / $total_100)) . "," ; }
 526+ else
 527+ { $line .= "," ; }
 528+ }
 529+ $line .= ",(sorted here to make it top-most line out of 10 in Excel)" ;
 530+ $output .= "$line\n" ;
 531+ }
 532+
 533+ if ($index++ >= 25) { last ; }
 534+ }
 535+ $output .= "\n" ;
 536+
 537+ foreach $project (sort {$totals_project {"$f,12"} {$b} <=> $totals_project {"$f,12"} {$a}} @projects)
 538+ {
 539+# next if $project eq 'commons' and ($f ==2 or $f == 3) ; # (very) active editors no longer counted for commons
 540+
 541+ if ($project eq 'commons')
 542+ { $line = ",Commons," ; }
 543+ else
 544+ { $line = "," . &GetProjectName ($project) . "," ; }
 545+
 546+ $value_100 = $totals_project {"$f,0"} {$project} ;
 547+ for ($m = 0 ; $m <= 12 ; $m++)
 548+ {
 549+ if ($value_100 != 0)
 550+ { $line .= sprintf ("%.1f", 100 * ($totals_project {"$f,$m"} {$project} / $value_100)) . "," ; }
 551+ else
 552+ { $line .= "," ; }
 553+ }
 554+ $line =~ s/,$// ;
 555+ $output .= "$line\n" ;
 556+ }
 557+ $output .= "\n," . '=' x 150 . "\n" ;
 558+ }
 559+
 560+ print CSV_OUT $output ;
 561+
 562+ $output = "\n,Binaries per month - Absolute\n" ;
 563+ $output .= "$csv_recent_months,%inc year, %inc month\n" ;
 564+ $output .= "\n$csv_recent_months,%inc year,%inc month\n" ;
 565+
 566+ $line = ",Total," ;
 567+ for ($m = 0 ; $m <= 12 ; $m++)
 568+ { $line .= $ext_tot {$m} . "," ; }
 569+
 570+ if ($ext_tot {0} != 0)
 571+ { $line .= sprintf ("%.1f", 100 * ($ext_tot {12} / $ext_tot {0}) - 100). "%," ; }
 572+ else
 573+ { $line .= "n.a.," ; }
 574+
 575+ if ($ext_tot {11} != 0)
 576+ { $line .= sprintf ("%.1f", 100 * ($ext_tot {12} / $ext_tot {11}) - 100). "%," ; }
 577+ else
 578+ { $line .= "n.a.," ; }
 579+
 580+ $line =~ s/,$// ;
 581+ $output .= "$line\n" ;
 582+
 583+ $index = 0 ;
 584+ # feed the 10 extensions with most pages, largest one last (comes on top in Excel chart)
 585+ for ($e = $#extndxs ; $e >= $#extndxs - 9 ; $e--)
 586+ {
 587+ $index++ ;
 588+
 589+ if ($e < 0)
 590+ {
 591+ $line = "$index,xxx," ;
 592+ for ($m = 0 ; $m <= 12 ; $m++)
 593+ { $line .= "," ; }
 594+ }
 595+ else
 596+ {
 597+ $extndx = $extndxs [$e] ;
 598+ $line = "$index,${ext_cnt {-1}{$extndx}}," ;
 599+
 600+ for ($m = 0 ; $m <= 12 ; $m++)
 601+ { $line .= $ext_cnt {$m}{$extndx} . "," ; }
 602+
 603+ if ($ext_cnt {0}{$extndx} != 0)
 604+ { $line .= sprintf ("%.1f", 100 * ($ext_cnt {12}{$extndx} / $ext_cnt {0}{$extndx}) - 100). "%," ; }
 605+ else
 606+ { $line .= "n.a.," ; }
 607+
 608+ if ($ext_cnt {11}{$extndx} != 0)
 609+ { $line .= sprintf ("%.1f", 100 * ($ext_cnt {12}{$extndx} / $ext_cnt {11}{$extndx}) - 100). "%," ; }
 610+ else
 611+ { $line .= "n.a.," ; }
 612+ }
 613+
 614+ $line =~ s/,$// ;
 615+ $output .= "$line\n" ;
 616+ }
 617+
 618+ print CSV_OUT $output ;
 619+
 620+ $output = "\n,Binaries per month - Indexed\n" ;
 621+ $output .= "$csv_recent_months\n" ;
 622+
 623+ $index = 0 ;
 624+ # feed the 10 extensions with most pages, largest one last (comes on top in Excel chart)
 625+ for ($e = $#extndxs ; $e >= $#extndxs - 9 ; $e--)
 626+ {
 627+ $index++ ;
 628+
 629+ if ($e < 0)
 630+ {
 631+ $line = "$index,xxx," ;
 632+ for ($m = 0 ; $m <= 12 ; $m++)
 633+ { $line .= "," ; }
 634+ }
 635+ else
 636+ {
 637+ $extndx = $extndxs [$e] ;
 638+ $line = "$index,${ext_cnt {-1}{$extndx}}," ;
 639+ $ext_cnt_m0 = $ext_cnt {0}{$extndx} ;
 640+ for ($m = 0 ; $m <= 12 ; $m++)
 641+ {
 642+ if ($ext_cnt_m0 > 0)
 643+ { $line .= sprintf ("%.1f", 100 * ($ext_cnt {$m}{$extndx} / $ext_cnt_m0)). "," ; }
 644+ else
 645+ { $line .= "," ; }
 646+ }
 647+ }
 648+
 649+ $line =~ s/,$// ;
 650+ $output .= "$line\n" ;
 651+ }
 652+ print CSV_OUT $output ;
 653+ close CSV_OUT ;
 654+
 655+ print "\nOutput written to $file_csv_out\n\n" ;
 656+}
 657+
 658+sub SetComparisonPeriods
 659+{
 660+ my $year = shift ;
 661+ my $month = shift ;
 662+ my @months = qw(Xxx Jan Feb Mar Apr May Jun Jul Aug Sept Oct Nov Dec) ;
 663+
 664+ my ($month_0, $month_0_file, $month_0_minus_12, $month_0_minus_1) ;
 665+
 666+ $year_ = $year ;
 667+ $month_ = $month ;
 668+
 669+ $month_0 = sprintf ("%04d/%02d",$year, $month+1) ;
 670+ $month_0_file = sprintf ("%04d_%02d",$year, $month+1) ; # for filenames
 671+ $month_0_minus_12 = sprintf ("%04d/%02d",$year-1,$month+1) ;
 672+ ($year,$month) = $month > 1 ? ($year,$month-1) : ($year-1,12) ;
 673+ $month_0_minus_1 = sprintf ("%04d/%02d",$year,$month+1) ;
 674+
 675+ print "\nWrite trend data up till month: $month_0\n\n" ;
 676+ print "Compare with previous month: $month_0_minus_1, previous year: $month_0_minus_12\n\n" ;
 677+
 678+ $csv_recent_months = ",project," ;
 679+ $year = $year_ - 1 ;
 680+ $month = $month_ ;
 681+ for ($m = 0 ; $m <= 12 ; $m++)
 682+ {
 683+ $recent_months [$m] = sprintf ("%04d/%02d", $year, $month) ;
 684+ $csv_recent_months .= sprintf ("%02d/%04d", $month, $year) . "," ;
 685+ ($year,$month) = $month < 12 ? ($year,$month+1) : ($year+1,1) ;
 686+ }
 687+ $csv_recent_months =~ s/,$// ;
 688+}
 689+
 690+#sub WriteCsvFilesPerPeriod
 691+#{
 692+# foreach $period (sort keys %totals)
 693+# {
 694+# &LogT ("\nWrite totals per $period: ") ;
 695+# $desc = $descriptions {$period} ;
 696+
 697+# foreach $project (sort keys %{$totals {$period}})
 698+# {
 699+# &Log ("$project ") ;
 700+
 701+# $dir_out = "$path_out/csv_$project" ;
 702+# if (! -d $dir_out)
 703+# { mkdir $dir_out, 0777 ; }
 704+
 705+# $file_out = "$dir_out/$desc.csv" ;
 706+
 707+# open CSV, ">", $file_out ;
 708+# foreach $key (sort {$a cmp $b} keys %{$totals {$period}{$project}})
 709+# {
 710+# ($language,$yearmonth) = split (",", $key) ;
 711+# # print "PERIOD $period PROJECT $project KEY $key\n" ;
 712+# if ($period eq "month")
 713+# { print CSV "$language," . $date_high {"$yearmonth"} . "," . $totals{$period}{$project}{$key} . "\n" ; }
 714+# else
 715+# { print CSV "$key," . $totals{$period}{$project}{$key} . "\n" ; }
 716+# }
 717+# close CSV ;
 718+# }
 719+# }
 720+#}
 721+
 722+#sub WriteCsvHtmlFilesPopularWikis
 723+#{
 724+# @totals_lastmonth = sort {$totals_lastmonth {$b} <=> $totals_lastmonth {$a}} keys %totals_lastmonth ;
 725+
 726+# $dir_out = "$path_out/csv_wp" ;
 727+# $file_out = "$dir_out/PageViewsPerMonthPopularWikis_$month_0_file.csv" ;
 728+
 729+## extend with normalized counts
 730+## see manually created PageViewsPerMonthTop25PlusNormalizedTo100.csv
 731+
 732+# open CSV, ">", $file_out ;
 733+# print CSV $csv_recent_months ;
 734+
 735+# # write per popular language+wiki 13 months of page view totals
 736+# $lines = 0 ;
 737+# foreach $line (@totals_lastmonth)
 738+# {
 739+# if (++$lines > $maxpopularwikis) { last ; }
 740+
 741+# ($project, $language) = split (',', $line) ;
 742+# $largest_projects {"$project-$language"} ++ ;
 743+
 744+# $language_name = $out_languages {$language} ;
 745+
 746+# if (($project ne "wp") && ($project ne "wx"))
 747+# { print CSV "$language_name " . &GetProjectName ($project) . "," ; }
 748+# else
 749+# { print CSV "$language_name," ; }
 750+
 751+## %test = %{$totals {"month"} {"wp"} };
 752+## %test2 = @recent_months ;
 753+# for ($m = 0 ; $m <= 12 ; $m++)
 754+# { print CSV $totals {"month"} {$project} {"$language,${recent_months [$m]}"} . "," ; }
 755+# print CSV "\n" ;
 756+# }
 757+
 758+# print CSV "\n$csv_recent_months" ;
 759+
 760+# # write per popular language+wiki 13 months of page view totals, normalized to first month = 100
 761+# $lines = 0 ;
 762+# foreach $line (@totals_lastmonth)
 763+# {
 764+# if (++$lines > $maxpopularwikis) { last ; }
 765+
 766+# ($project, $language) = split (',', $line) ;
 767+# $language_name = $out_languages {$language} ;
 768+
 769+# if (($project ne "wp") && ($project ne "wx"))
 770+# { print CSV "$language_name " . &GetProjectName ($project) . "," ; }
 771+# else
 772+# { print CSV "$language_name," ; }
 773+
 774+# $recent_month_0 = $totals {"month"} {$project} {"$language,${recent_months [ 0]}"} ;
 775+# for ($m = 0 ; $m <= 12 ; $m++)
 776+# {
 777+# if ($recent_month_0 > 0)
 778+# { print CSV sprintf ("%.2f", 100 * $totals {"month"} {$project} {"$language,${recent_months [$m]}"} / $recent_month_0) . "," ; }
 779+# else
 780+# { print CSV "," ; }
 781+# }
 782+
 783+# print CSV "\n" ;
 784+# }
 785+# close CSV ;
 786+
 787+# # write ready made table rows for report card: page views top 25 movers shakers
 788+# foreach $key (keys %largest_projects)
 789+# {
 790+# ($project,$language) = split ('-', $key) ;
 791+
 792+# $total_lastmonth = $totals {"month"} {$project} {"$language,$month_0"} ;
 793+# $total_prevmonth = $totals {"month"} {$project} {"$language,$month_0_minus_1"} ;
 794+# $total_prevyear = $totals {"month"} {$project} {"$language,$month_0_minus_12"} ;
 795+
 796+# $perc_month = "no data" ;
 797+# $perc_year = "no data" ;
 798+
 799+# if ($total_prevyear > 0)
 800+# { $perc_year = sprintf ("%.1f", 100 * $total_lastmonth/$total_prevyear - 100) ; }
 801+# if ($total_prevyear > 0)
 802+# { $perc_month = sprintf ("%.1f", 100 * $total_lastmonth/$total_prevmonth - 100) ; }
 803+
 804+# $line = "$project-$language: $total_prevyear=>$total_lastmonth=$perc_year%, $total_prevmonth=>$total_lastmonth=$perc_month%" ;
 805+
 806+# $total_lastmonth = sprintf ("%.0f", $total_lastmonth / 1000000) ;
 807+
 808+# $project_name = &GetProjectName ($project) ;
 809+# $language_name = $out_languages {$language} ;
 810+
 811+# $col1 = "<td class=detail-left>$language_name $project_name</td>\n" ;
 812+# $col2 = "<td class=detail-blue>$total_lastmonth</td>\n" ;
 813+# $col3 = "<td class=detail-blue>$perc_month%</td>\n" ;
 814+# $col4 = "<td class=detail-blue>$perc_year%</td>\n" ;
 815+# $html = "<tr>\n$col1$col2$col3$col4</tr>\n" ;
 816+
 817+# $growth_figures_text {"$perc_month-$project-$language"} = $line ;
 818+# $growth_figures_html {"$perc_month-$project-$language"} = $html ;
 819+# }
 820+
 821+# $file_html = "$dir_out/PageViewsMoversShakersPopularWikis_$month_0_file.html" ;
 822+
 823+# open HTML, ">", $file_html ;
 824+# foreach $key (sort {$b <=> $a} keys %growth_figures_text)
 825+# {
 826+# print "$key: ". $growth_figures_text {$key} . "\n" ;
 827+# print HTML $growth_figures_html {$key} ;
 828+# }
 829+# close HTML ;
 830+#}
 831+
 832+
 833+sub GetProjectName
 834+{
 835+ my $project =shift ;
 836+
 837+ if ($project eq "wp") { $project_name = "Wikipedia"; }
 838+ elsif ($project eq "wb") { $project_name = "Wikibooks"; }
 839+ elsif ($project eq "wk") { $project_name = "Wiktionary"; }
 840+ elsif ($project eq "wx") { $project_name = ""; }
 841+ elsif ($project eq "wn") { $project_name = "Wikinews"; }
 842+ elsif ($project eq "wq") { $project_name = "Wikiquote"; }
 843+ elsif ($project eq "ws") { $project_name = "Wikisource"; }
 844+ elsif ($project eq "wv") { $project_name = "Wikiversity"; }
 845+
 846+ return ($project_name) ;
 847+}
 848+
 849+sub MonthsSinceYearAgo
 850+{
 851+ my $year = shift ;
 852+ my $month = shift ;
 853+ return 12 - (($year_0 - $year) * 12 + $month_0 - $month) ;
 854+}
 855+
 856+#sub Log
 857+#{
 858+# $msg = shift ;
 859+# print $msg ;
 860+# print LOG $msg ;
 861+#}
 862+
 863+#sub LogT
 864+#{
 865+# $msg = shift ;
 866+# my ($ss,$mm,$hh) = (localtime (time))[0,1,2] ;
 867+# my $time = sprintf ("%02d:%02d:%02d ", $hh, $mm, $ss) ;
 868+# $msg =~ s/^(\n*)/$1$time/s ;
 869+# &Log ($msg) ;
 870+#}
 871+
 872+sub MmSs
 873+{
 874+ my ($ss,$mm,$hh) = (localtime (time))[0,1,2] ;
 875+ return (sprintf ("%02d:%02d:%02d ", $hh, $mm, $ss)) ;
 876+}
 877+
 878+sub Abort
 879+{
 880+ my $msg = shift ;
 881+ print "$msg\nExecution aborted." ;
 882+ # to do: log also to file
 883+ exit ;
 884+}
 885+
 886+sub InitProjectNames
 887+{
 888+ # copied from WikiReports.pl
 889+
 890+ %wikipedias = (
 891+# mediawiki=>"http://wikimediafoundation.org Wikimedia",
 892+ nostalgia=>"http://nostalgia.wikipedia.org Nostalgia",
 893+ sources=>"http://wikisource.org Old&nbsp;Wikisource",
 894+ meta=>"http://meta.wikimedia.org Meta-Wiki",
 895+ beta=>"http://beta.wikiversity.org Beta",
 896+ species=>"http://species.wikipedia.org WikiSpecies",
 897+ commons=>"http://commons.wikimedia.org Commons",
 898+ foundation=>"http://wikimediafoundation.org Wikimedia&nbsp;Foundation",
 899+ sep11=>"http://sep11.wikipedia.org In&nbsp;Memoriam",
 900+ nlwikimedia=>"http://nl.wikimedia.org Wikimedia&nbsp;Nederland",
 901+ plwikimedia=>"http://pl.wikimedia.org Wikimedia&nbsp;Polska",
 902+ mediawiki=>"http://www.mediawiki.org MediaWiki",
 903+ dewikiversity=>"http://de.wikiversity.org Wikiversit&auml;t",
 904+ frwikiversity=>"http://fr.wikiversity.org Wikiversit&auml;t",
 905+ wikimania2005=>"http://wikimania2005.wikimedia.org Wikimania 2005",
 906+ wikimania2006=>"http://wikimania2006.wikimedia.org Wikimania 2006",
 907+ aa=>"http://aa.wikipedia.org Afar",
 908+ ab=>"http://ab.wikipedia.org Abkhazian",
 909+ af=>"http://af.wikipedia.org Afrikaans",
 910+ ak=>"http://ak.wikipedia.org Akan", # was Akana
 911+ als=>"http://als.wikipedia.org Alemannic", # was Elsatian
 912+ am=>"http://am.wikipedia.org Amharic",
 913+ an=>"http://an.wikipedia.org Aragonese",
 914+ ang=>"http://ang.wikipedia.org Anglo-Saxon",
 915+ ar=>"http://ar.wikipedia.org Arabic",
 916+ arc=>"http://arc.wikipedia.org Aramaic",
 917+ as=>"http://as.wikipedia.org Assamese",
 918+ ast=>"http://ast.wikipedia.org Asturian",
 919+ av=>"http://av.wikipedia.org Avar", # was Avienan
 920+ ay=>"http://ay.wikipedia.org Aymara",
 921+ az=>"http://az.wikipedia.org Azeri", # was Azerbaijani
 922+ ba=>"http://ba.wikipedia.org Bashkir",
 923+ bar=>"http://bar.wikipedia.org Bavarian",
 924+ bat_smg=>"http://bat-smg.wikipedia.org Samogitian",
 925+ "bat-smg"=>"http://bat-smg.wikipedia.org Samogitian",
 926+ bcl=>"http://bcl.wikipedia.org Central Bicolano",
 927+ be=>"http://be.wikipedia.org Belarusian",
 928+ "be-x-old"=>"http://be.wikipedia.org Belarusian (Tarashkevitsa)",
 929+ be_x_old=>"http://be.wikipedia.org Belarusian (Tarashkevitsa)",
 930+ bg=>"http://bg.wikipedia.org Bulgarian",
 931+ bh=>"http://bh.wikipedia.org Bihari",
 932+ bi=>"http://bi.wikipedia.org Bislama",
 933+ bm=>"http://bm.wikipedia.org Bambara",
 934+ bn=>"http://bn.wikipedia.org Bengali",
 935+ bo=>"http://bo.wikipedia.org Tibetan",
 936+ bpy=>"http://bpy.wikipedia.org Bishnupriya Manipuri",
 937+ br=>"http://br.wikipedia.org Breton",
 938+ bs=>"http://bs.wikipedia.org Bosnian",
 939+ bug=>"http://bug.wikipedia.org Buginese",
 940+ bxr=>"http://bxr.wikipedia.org Buryat",
 941+ ca=>"http://ca.wikipedia.org Catalan",
 942+ cbk_zam=>"http://cbk-zam.wikipedia.org Chavacano",
 943+ "cbk-zam"=>"http://cbk-zam.wikipedia.org Chavacano",
 944+ cdo=>"http://cdo.wikipedia.org Min Dong",
 945+ ce=>"http://ce.wikipedia.org Chechen",
 946+ ceb=>"http://ceb.wikipedia.org Cebuano",
 947+ ch=>"http://ch.wikipedia.org Chamorro", # was Chamoru
 948+ cho=>"http://cho.wikipedia.org Choctaw", # was Chotaw
 949+ chr=>"http://chr.wikipedia.org Cherokee",
 950+ chy=>"http://chy.wikipedia.org Cheyenne", # was Sets&ecirc;hest&acirc;hese
 951+ co=>"http://co.wikipedia.org Corsican",
 952+ cr=>"http://cr.wikipedia.org Cree",
 953+ crh=>"http://crh.wikipedia.org Crimean Tatar",
 954+ cs=>"http://cs.wikipedia.org Czech",
 955+ csb=>"http://csb.wikipedia.org Cashubian", # was Kashubian
 956+ cu=>"http://cv.wikipedia.org Old Church Slavonic",
 957+ cv=>"http://cv.wikipedia.org Chuvash", # was Cavas
 958+ cy=>"http://cy.wikipedia.org Welsh",
 959+ da=>"http://da.wikipedia.org Danish",
 960+ de=>"http://de.wikipedia.org German",
 961+ diq=>"http://diq.wikipedia.org Zazaki",
 962+ dk=>"http://dk.wikipedia.org Danish",
 963+ dsb=>"http://dsb.wikipedia.org Lower Sorbian",
 964+ dv=>"http://dv.wikipedia.org Divehi",
 965+ dz=>"http://dz.wikipedia.org Dzongkha",
 966+ ee=>"http://ee.wikipedia.org Ewe",
 967+ el=>"http://el.wikipedia.org Greek",
 968+ eml=>"http://eml.wikipedia.org Emilian-Romagnol",
 969+ en=>"http://en.wikipedia.org English",
 970+ eo=>"http://eo.wikipedia.org Esperanto",
 971+ es=>"http://es.wikipedia.org Spanish",
 972+ et=>"http://et.wikipedia.org Estonian",
 973+ eu=>"http://eu.wikipedia.org Basque",
 974+ ext=>"http://ext.wikipedia.org Extremaduran",
 975+ fa=>"http://fa.wikipedia.org Persian",
 976+ ff=>"http://ff.wikipedia.org Fulfulde",
 977+ fi=>"http://fi.wikipedia.org Finnish",
 978+ "fiu-vro"=>"http://fiu-vro.wikipedia.org Voro",
 979+ fiu_vro=>"http://fiu-vro.wikipedia.org Voro",
 980+ fj=>"http://fj.wikipedia.org Fijian",
 981+ fo=>"http://fo.wikipedia.org Faroese", # was Faeroese
 982+ fr=>"http://fr.wikipedia.org French",
 983+ frp=>"http://frp.wikipedia.org Arpitan",
 984+ fur=>"http://fur.wikipedia.org Friulian",
 985+ fy=>"http://fy.wikipedia.org Frisian",
 986+ ga=>"http://ga.wikipedia.org Irish",
 987+ gan=>"http://gan.wikipedia.org Gan",
 988+ gay=>"http://gay.wikipedia.org Gayo",
 989+ gd=>"http://gd.wikipedia.org Scots Gaelic", # was Scottish Gaelic
 990+ gl=>"http://gl.wikipedia.org Galician", # was Galego
 991+ glk=>"http://glk.wikipedia.org Gilaki",
 992+ gn=>"http://gn.wikipedia.org Guarani",
 993+ got=>"http://got.wikipedia.org Gothic",
 994+ gu=>"http://gu.wikipedia.org Gujarati",
 995+ gv=>"http://gv.wikipedia.org Manx", # was Manx Gaelic
 996+ ha=>"http://ha.wikipedia.org Hausa",
 997+ hak=>"http://hak.wikipedia.org Hakka",
 998+ haw=>"http://haw.wikipedia.org Hawai'ian", # was Hawaiian
 999+ he=>"http://he.wikipedia.org Hebrew",
 1000+ hi=>"http://hi.wikipedia.org Hindi",
 1001+ hif=>"http://hif.wikipedia.org Fiji Hindi",
 1002+ ho=>"http://ho.wikipedia.org Hiri Motu",
 1003+ hr=>"http://hr.wikipedia.org Croatian",
 1004+ hsb=>"http://hsb.wikipedia.org Upper Sorbian",
 1005+ ht=>"http://ht.wikipedia.org Haitian",
 1006+ hu=>"http://hu.wikipedia.org Hungarian",
 1007+ hy=>"http://hy.wikipedia.org Armenian",
 1008+ hz=>"http://hz.wikipedia.org Herero",
 1009+ ia=>"http://ia.wikipedia.org Interlingua",
 1010+ iba=>"http://iba.wikipedia.org Iban",
 1011+ id=>"http://id.wikipedia.org Indonesian",
 1012+ ie=>"http://ie.wikipedia.org Interlingue",
 1013+ ig=>"http://ig.wikipedia.org Igbo",
 1014+ ii=>"http://ii.wikipedia.org Yi",
 1015+ ik=>"http://ik.wikipedia.org Inupiak",
 1016+ ilo=>"http://ilo.wikipedia.org Ilokano",
 1017+ io=>"http://io.wikipedia.org Ido",
 1018+ is=>"http://is.wikipedia.org Icelandic",
 1019+ it=>"http://it.wikipedia.org Italian",
 1020+ iu=>"http://iu.wikipedia.org Inuktitut",
 1021+ ja=>"http://ja.wikipedia.org Japanese",
 1022+ jbo=>"http://jbo.wikipedia.org Lojban",
 1023+ jv=>"http://jv.wikipedia.org Javanese",
 1024+ ka=>"http://ka.wikipedia.org Georgian",
 1025+ kaa=>"http://kaa.wikipedia.org Karakalpak",
 1026+ kab=>"http://ka.wikipedia.org Kabyle",
 1027+ kaw=>"http://kaw.wikipedia.org Kawi",
 1028+ kg=>"http://kg.wikipedia.org Kongo",
 1029+ ki=>"http://ki.wikipedia.org Kikuyu",
 1030+ kj=>"http://kj.wikipedia.org Kuanyama", # was Otjiwambo
 1031+ kk=>"http://kk.wikipedia.org Kazakh",
 1032+ kl=>"http://kl.wikipedia.org Greenlandic",
 1033+ km=>"http://km.wikipedia.org Khmer", # was Cambodian
 1034+ kn=>"http://kn.wikipedia.org Kannada",
 1035+ ko=>"http://ko.wikipedia.org Korean",
 1036+ kr=>"http://kr.wikipedia.org Kanuri",
 1037+ ks=>"http://ks.wikipedia.org Kashmiri",
 1038+ ksh=>"http://ksh.wikipedia.org Ripuarian",
 1039+ ku=>"http://ku.wikipedia.org Kurdish",
 1040+ kv=>"http://kv.wikipedia.org Komi",
 1041+ kw=>"http://kw.wikipedia.org Cornish", # was Kornish
 1042+ ky=>"http://ky.wikipedia.org Kirghiz",
 1043+ la=>"http://la.wikipedia.org Latin",
 1044+ lad=>"http://lad.wikipedia.org Ladino",
 1045+ lb=>"http://lb.wikipedia.org Luxembourgish", # was Letzeburgesch
 1046+ lbe=>"http://lbe.wikipedia.org Lak",
 1047+ lg=>"http://lg.wikipedia.org Ganda",
 1048+ li=>"http://li.wikipedia.org Limburgish",
 1049+ lij=>"http://lij.wikipedia.org Ligurian",
 1050+ lmo=>"http://lmo.wikipedia.org Lombard",
 1051+ ln=>"http://ln.wikipedia.org Lingala",
 1052+ lo=>"http://lo.wikipedia.org Laotian",
 1053+ ls=>"http://ls.wikipedia.org Latino Sine Flexione",
 1054+ lt=>"http://lt.wikipedia.org Lithuanian",
 1055+ lv=>"http://lv.wikipedia.org Latvian",
 1056+ mad=>"http://mad.wikipedia.org Madurese",
 1057+ mak=>"http://mak.wikipedia.org Makasar",
 1058+ map_bms=>"http://map-bms.wikipedia.org Banyumasan",
 1059+ "map-bms"=>"http://map-bms.wikipedia.org Banyumasan",
 1060+ mdf=>"http://mdf.wikipedia.org Moksha",
 1061+ mg=>"http://mg.wikipedia.org Malagasy",
 1062+ mh=>"http://mh.wikipedia.org Marshallese",
 1063+ mi=>"http://mi.wikipedia.org Maori",
 1064+ min=>"http://min.wikipedia.org Minangkabau",
 1065+ minnan=>"http://minnan.wikipedia.org Minnan",
 1066+ mk=>"http://mk.wikipedia.org Macedonian",
 1067+ ml=>"http://ml.wikipedia.org Malayalam",
 1068+ mn=>"http://mn.wikipedia.org Mongolian",
 1069+ mo=>"http://mo.wikipedia.org Moldavian",
 1070+ mr=>"http://mr.wikipedia.org Marathi",
 1071+ ms=>"http://ms.wikipedia.org Malay",
 1072+ mt=>"http://mt.wikipedia.org Maltese",
 1073+ mus=>"http://mus.wikipedia.org Muskogee",
 1074+ my=>"http://my.wikipedia.org Burmese",
 1075+ myv=>"http://myv.wikipedia.org Erzya",
 1076+ mzn=>"http://mzn.wikipedia.org Mazandarani",
 1077+ na=>"http://na.wikipedia.org Nauruan", # was Nauru
 1078+ nah=>"http://nah.wikipedia.org Nahuatl",
 1079+ nap=>"http://nap.wikipedia.org Neapolitan",
 1080+ nds=>"http://nds.wikipedia.org Low Saxon",
 1081+ nds_nl=>"http://nds-nl.wikipedia.org Dutch Low Saxon",
 1082+ "nds-nl"=>"http://nds-nl.wikipedia.org Dutch Low Saxon",
 1083+ ne=>"http://ne.wikipedia.org Nepali",
 1084+ new=>"http://new.wikipedia.org Nepal Bhasa",
 1085+ ng=>"http://ng.wikipedia.org Ndonga",
 1086+ nl=>"http://nl.wikipedia.org Dutch",
 1087+ nov=>"http://nov.wikipedia.org Novial",
 1088+ nrm=>"http://nrm.wikipedia.org Norman",
 1089+ nn=>"http://nn.wikipedia.org Nynorsk", # was Neo-Norwegian
 1090+ no=>"http://no.wikipedia.org Norwegian",
 1091+ nv=>"http://nv.wikipedia.org Navajo", # was Avayo
 1092+ ny=>"http://ny.wikipedia.org Chichewa",
 1093+ oc=>"http://oc.wikipedia.org Occitan",
 1094+ om=>"http://om.wikipedia.org Oromo",
 1095+ or=>"http://or.wikipedia.org Oriya",
 1096+ os=>"http://os.wikipedia.org Ossetic",
 1097+ pa=>"http://pa.wikipedia.org Punjabi",
 1098+ pag=>"http://pag.wikipedia.org Pangasinan",
 1099+ pam=>"http://pam.wikipedia.org Kapampangan",
 1100+ pap=>"http://pap.wikipedia.org Papiamentu",
 1101+ pdc=>"http://pdc.wikipedia.org Pennsylvania German",
 1102+ pi=>"http://pi.wikipedia.org Pali",
 1103+ pih=>"http://pih.wikipedia.org Norfolk",
 1104+ pl=>"http://pl.wikipedia.org Polish",
 1105+ pms=>"http://pms.wikipedia.org Piedmontese",
 1106+ ps=>"http://ps.wikipedia.org Pashto",
 1107+ pt=>"http://pt.wikipedia.org Portuguese",
 1108+ qu=>"http://qu.wikipedia.org Quechua",
 1109+ rm=>"http://rm.wikipedia.org Romansh", # was Rhaeto-Romance
 1110+ rmy=>"http://rmy.wikipedia.org Romani",
 1111+ rn=>"http://rn.wikipedia.org Kirundi",
 1112+ ro=>"http://ro.wikipedia.org Romanian",
 1113+ roa_rup=>"http://roa-rup.wikipedia.org Aromanian",
 1114+ "roa-rup"=>"http://roa-rup.wikipedia.org Aromanian",
 1115+ roa_tara=>"http://roa-tara.wikipedia.org Tarantino",
 1116+ "roa-tara"=>"http://roa-tara.wikipedia.org Tarantino",
 1117+ ru=>"http://ru.wikipedia.org Russian",
 1118+ ru_sib=>"http://ru-sib.wikipedia.org Siberian",
 1119+ "ru-sib"=>"http://ru-sib.wikipedia.org Siberian",
 1120+ rw=>"http://rw.wikipedia.org Kinyarwanda",
 1121+ sa=>"http://sa.wikipedia.org Sanskrit",
 1122+ sah=>"http://sah.wikipedia.org Sakha",
 1123+ sc=>"http://sc.wikipedia.org Sardinian",
 1124+ scn=>"http://scn.wikipedia.org Sicilian",
 1125+ sco=>"http://sco.wikipedia.org Scots",
 1126+ sd=>"http://sd.wikipedia.org Sindhi",
 1127+ se=>"http://se.wikipedia.org Northern Sami",
 1128+ sg=>"http://sg.wikipedia.org Sangro",
 1129+ sh=>"http://sh.wikipedia.org Serbo-Croatian",
 1130+ si=>"http://si.wikipedia.org Sinhala", # was Singhalese
 1131+ simple=>"http://simple.wikipedia.org Simple English",
 1132+ sk=>"http://sk.wikipedia.org Slovak",
 1133+ sl=>"http://sl.wikipedia.org Slovene",
 1134+ sm=>"http://sm.wikipedia.org Samoan",
 1135+ sn=>"http://sn.wikipedia.org Shona",
 1136+ so=>"http://so.wikipedia.org Somali", # was Somalian
 1137+ sq=>"http://sq.wikipedia.org Albanian",
 1138+ sr=>"http://sr.wikipedia.org Serbian",
 1139+ srn=>"http://srn.wikipedia.org Sranan",
 1140+ ss=>"http://ss.wikipedia.org Siswati",
 1141+ st=>"http://st.wikipedia.org Sesotho",
 1142+ stq=>"http://stq.wikipedia.org Saterland Frisian",
 1143+ su=>"http://su.wikipedia.org Sundanese",
 1144+ sv=>"http://sv.wikipedia.org Swedish",
 1145+ sw=>"http://sw.wikipedia.org Swahili",
 1146+ szl=>"http://szl.wikipedia.org Silesian",
 1147+ ta=>"http://ta.wikipedia.org Tamil",
 1148+ te=>"http://te.wikipedia.org Telugu",
 1149+ test=>"http://test.wikipedia.org Test",
 1150+ tet=>"http://tet.wikipedia.org Tetum",
 1151+ tg=>"http://tg.wikipedia.org Tajik",
 1152+ th=>"http://th.wikipedia.org Thai",
 1153+ ti=>"http://ti.wikipedia.org Tigrinya",
 1154+ tk=>"http://tk.wikipedia.org Turkmen",
 1155+ tl=>"http://tl.wikipedia.org Tagalog",
 1156+ tlh=>"http://tlh.wikipedia.org Klingon", # was Klignon
 1157+ tn=>"http://tn.wikipedia.org Setswana",
 1158+ to=>"http://to.wikipedia.org Tongan",
 1159+ tokipona=>"http://tokipona.wikipedia.org Tokipona",
 1160+ tpi=>"http://tpi.wikipedia.org Tok Pisin",
 1161+ tr=>"http://tr.wikipedia.org Turkish",
 1162+ ts=>"http://ts.wikipedia.org Tsonga",
 1163+ tt=>"http://tt.wikipedia.org Tatar",
 1164+ tum=>"http://tum.wikipedia.org Tumbuka",
 1165+ turn=>"http://turn.wikipedia.org Turnbuka",
 1166+ tw=>"http://tw.wikipedia.org Twi",
 1167+ ty=>"http://ty.wikipedia.org Tahitian",
 1168+ udm=>"http://udm.wikipedia.org Udmurt",
 1169+ ug=>"http://ug.wikipedia.org Uighur",
 1170+ uk=>"http://uk.wikipedia.org Ukrainian",
 1171+ ur=>"http://ur.wikipedia.org Urdu",
 1172+ uz=>"http://uz.wikipedia.org Uzbek",
 1173+ ve=>"http://ve.wikipedia.org Venda", # was Lushaka
 1174+ vec=>"http://vec.wikipedia.org Venetian",
 1175+ vi=>"http://vi.wikipedia.org Vietnamese",
 1176+ vls=>"http://vls.wikipedia.org West Flemish",
 1177+ vo=>"http://vo.wikipedia.org Volap&uuml;k",
 1178+ wa=>"http://wa.wikipedia.org Walloon",
 1179+ war=>"http://war.wikipedia.org Waray-Waray",
 1180+ wo=>"http://wo.wikipedia.org Wolof",
 1181+ wuu=>"http://wuu.wikipedia.org Wu",
 1182+ xal=>"http://xal.wikipedia.org Kalmyk",
 1183+ xh=>"http://xh.wikipedia.org Xhosa",
 1184+ yi=>"http://yi.wikipedia.org Yiddish",
 1185+ yo=>"http://yo.wikipedia.org Yoruba",
 1186+ za=>"http://za.wikipedia.org Zhuang",
 1187+ zea=>"http://zea.wikipedia.org Zealandic",
 1188+ zh=>"http://zh.wikipedia.org Chinese",
 1189+ zh_min_nan=>"http://zh-min-nan.wikipedia.org Min Nan",
 1190+ "zh-min-nan"=>"http://zh-min-nan.wikipedia.org Min Nan",
 1191+ zh_classical=>"http://zh-classical.wikipedia.org Classical Chinese",
 1192+ "zh-classical"=>"http://zh-classical.wikipedia.org Classical Chinese",
 1193+ zh_yue=>"http://zh-yue.wikipedia.org Cantonese",
 1194+ "zh-yue"=>"http://zh-yue.wikipedia.org Cantonese",
 1195+ zu=>"http://zu.wikipedia.org Zulu",
 1196+ zz=>"&nbsp; All&nbsp;languages",
 1197+ zzz=>"&nbsp; All&nbsp;languages except English"
 1198+ );
 1199+
 1200+ foreach $key (keys %wikipedias)
 1201+ {
 1202+ my $wikipedia = $wikipedias {$key} ;
 1203+ $out_urls {$key} = $wikipedia ;
 1204+ $out_languages {$key} = $wikipedia ;
 1205+ $out_urls {$key} =~ s/(^[^\s]+).*$/$1/ ;
 1206+ $out_languages {$key} =~ s/^[^\s]+\s+(.*)$/$1/ ;
 1207+ $out_article {$key} = "http://en.wikipedia.org/wiki/" . $out_languages {$key} . "_language" ;
 1208+ $out_article {$key} =~ s/ /_/g ;
 1209+ $out_urls {$key} =~ s/(^[^\s]+).*$/$1/ ;
 1210+ }
 1211+}
 1212+
 1213+# copied from WikiReports_EN.pl
 1214+sub InitReportNames
 1215+{
 1216+ @out_report_descriptions = (
 1217+ "Contributors",
 1218+ "New editors",
 1219+ "Active editors",
 1220+ "Very active editors",
 1221+ "Article count (official)",
 1222+ "Article count (alternate)",
 1223+ "New articles per day",
 1224+ "Edits per article",
 1225+ "Bytes per article",
 1226+ "Articles over 0.5 Kb",
 1227+ "Articles over 2 Kb",
 1228+ "Edits per month",
 1229+ "Database size",
 1230+ "Words",
 1231+ "Internal links",
 1232+ "Links to other Wikipedias",
 1233+ "Binaries",
 1234+ "External links",
 1235+ "Redirects",
 1236+ "Page requests per day",
 1237+ "Visits per day",
 1238+ "Overview recent months"
 1239+ ) ;
 1240+}
 1241+
Property changes on: trunk/wikistats/reportcard/ReportCardExtractWikiCountsOutputYearly.pl
___________________________________________________________________
Added: svn:eol-style
12421242 + native
Index: trunk/wikistats/reportcard/StatisticsMonthlyFilter.pl
@@ -1,101 +1,101 @@
2 -#!/usr/bin/perl
3 -
4 -# scratchpad script, kept for reuse
5 -
6 - use CGI::Carp qw(fatalsToBrowser);
7 - use Time::Local ;
8 -
9 - open IN, "<", "StatisticsMonthly.csv" ;
10 - open OUT, ">", "StatisticsMonthlyExtract.csv" ;
11 -
12 - my ($sec,$min,$hour,$day,$month,$year,$wday,$yday,$isdst) = localtime (time);
13 - $month += 1 ;
14 - $year += 1900 ;
15 - print "Now: " . sprintf ("%04d:%02d:%02d\n", $year, $month, $day) ;
16 -
17 - $month-- ;
18 - if ($month < 1) { $month == 12 ; $year-- ; }
19 - print "Extract from " . sprintf ("%04d:%02d", $year-1, $month) . " till " . sprintf ("%04d:%02d", $year, $month) . "\n" ;
20 -
21 - $month_lo = $month ;
22 - $year_lo = $year - 1 ;
23 -
24 - $time_lo = timegm (0,0,0,1 ,$month-1,$year-1-1900) ;
25 - $time_hi = timegm (0,0,0,&daysinmonth($year,$month),$month-1,$year-1900) ;
26 -
27 - while ($line = <IN>)
28 - {
29 - ($wp, $date, $u1, $u2, $u3, $u4, $articles) = split (',', $line) ;
30 - if ($articles > $max_articles {$wp})
31 - { $max_articles {$wp} = $articles ; }
32 - }
33 - close IN ;
34 -
35 - $wikis = 0 ;
36 - foreach $wp (sort {$max_articles {$b} <=> $max_articles {$a}} keys %max_articles)
37 - {
38 - if (++$wikis > 25) { last ; }
39 - print "$wp: " . $max_articles {$wp} . "\n" ;
40 - $filter_wikis {$wp} ++ ;
41 - }
42 -
43 - open IN, "<", "StatisticsMonthly.csv" ;
44 - while ($line = <IN>)
45 - {
46 - ($wp, $date, $u1, $u2, $u3, $u4, $articles) = split (',', $line) ;
47 - if ($filter_wikis {$wp} == 0) { next ; }
48 -
49 - $year = substr ($date,6,4) ;
50 - $month = substr ($date,0,2) ;
51 - $day = substr ($date,3,2) ;
52 -
53 - $time = timegm (0,0,0,$day,$month-1,$year-1900) ;
54 -
55 - if (($time < $time_lo) || ($time > $time_hi)){ next ; }
56 - # print "$wp $date\n" ;
57 - $articles {"$wp:$date"} = $articles ;
58 - }
59 - close IN ;
60 -
61 -
62 - $wikis = 0 ;
63 - foreach $wp (sort {$max_articles {$b} <=> $max_articles {$a}} keys %max_articles)
64 - {
65 - if (++$wikis > 25) { last ; }
66 - $month = $month_lo ;
67 - $year = $year_lo ;
68 - $line = "$wp," ;
69 - for ($m = 0 ; $m <= 12 ; $m++)
70 - {
71 - $date = sprintf ("%02d/%02d/%04d", $month, &daysinmonth($year,$month), $year) ;
72 - $count = $articles {"$wp:$date"} + 0 ; # force numeric
73 - $line .= "$count," ;
74 - $month++ ;
75 - if ($month > 12)
76 - { $month = 1 ; $year ++ ; }
77 - }
78 - $line =~ s/,$// ;
79 - print OUT "$line\n" ;
80 - }
81 -
82 -
83 - print "\nReady\n\n" ;
84 - exit ;
85 -
86 -sub daysinmonth
87 -{
88 - my $year = shift ;
89 - my $month = shift ;
90 - if ($month == 0)
91 - { return (0) ; }
92 - my $timegm1 = timegm (0,0,0,1,$month-1,$year-1900) ;
93 - $month++ ;
94 - if ($month > 12)
95 - { $month = 1 ; $year++ }
96 - my $timegm2 = timegm (0,0,0,1,$month-1,$year-1900) ;
97 - my $days = ($timegm2-$timegm1) / (24*60*60) ;
98 - return ($days) ;
99 -}
100 -
101 -
102 -
 2+#!/usr/bin/perl
 3+
 4+# scratchpad script, kept for reuse
 5+
 6+ use CGI::Carp qw(fatalsToBrowser);
 7+ use Time::Local ;
 8+
 9+ open IN, "<", "StatisticsMonthly.csv" ;
 10+ open OUT, ">", "StatisticsMonthlyExtract.csv" ;
 11+
 12+ my ($sec,$min,$hour,$day,$month,$year,$wday,$yday,$isdst) = localtime (time);
 13+ $month += 1 ;
 14+ $year += 1900 ;
 15+ print "Now: " . sprintf ("%04d:%02d:%02d\n", $year, $month, $day) ;
 16+
 17+ $month-- ;
 18+ if ($month < 1) { $month == 12 ; $year-- ; }
 19+ print "Extract from " . sprintf ("%04d:%02d", $year-1, $month) . " till " . sprintf ("%04d:%02d", $year, $month) . "\n" ;
 20+
 21+ $month_lo = $month ;
 22+ $year_lo = $year - 1 ;
 23+
 24+ $time_lo = timegm (0,0,0,1 ,$month-1,$year-1-1900) ;
 25+ $time_hi = timegm (0,0,0,&daysinmonth($year,$month),$month-1,$year-1900) ;
 26+
 27+ while ($line = <IN>)
 28+ {
 29+ ($wp, $date, $u1, $u2, $u3, $u4, $articles) = split (',', $line) ;
 30+ if ($articles > $max_articles {$wp})
 31+ { $max_articles {$wp} = $articles ; }
 32+ }
 33+ close IN ;
 34+
 35+ $wikis = 0 ;
 36+ foreach $wp (sort {$max_articles {$b} <=> $max_articles {$a}} keys %max_articles)
 37+ {
 38+ if (++$wikis > 25) { last ; }
 39+ print "$wp: " . $max_articles {$wp} . "\n" ;
 40+ $filter_wikis {$wp} ++ ;
 41+ }
 42+
 43+ open IN, "<", "StatisticsMonthly.csv" ;
 44+ while ($line = <IN>)
 45+ {
 46+ ($wp, $date, $u1, $u2, $u3, $u4, $articles) = split (',', $line) ;
 47+ if ($filter_wikis {$wp} == 0) { next ; }
 48+
 49+ $year = substr ($date,6,4) ;
 50+ $month = substr ($date,0,2) ;
 51+ $day = substr ($date,3,2) ;
 52+
 53+ $time = timegm (0,0,0,$day,$month-1,$year-1900) ;
 54+
 55+ if (($time < $time_lo) || ($time > $time_hi)){ next ; }
 56+ # print "$wp $date\n" ;
 57+ $articles {"$wp:$date"} = $articles ;
 58+ }
 59+ close IN ;
 60+
 61+
 62+ $wikis = 0 ;
 63+ foreach $wp (sort {$max_articles {$b} <=> $max_articles {$a}} keys %max_articles)
 64+ {
 65+ if (++$wikis > 25) { last ; }
 66+ $month = $month_lo ;
 67+ $year = $year_lo ;
 68+ $line = "$wp," ;
 69+ for ($m = 0 ; $m <= 12 ; $m++)
 70+ {
 71+ $date = sprintf ("%02d/%02d/%04d", $month, &daysinmonth($year,$month), $year) ;
 72+ $count = $articles {"$wp:$date"} + 0 ; # force numeric
 73+ $line .= "$count," ;
 74+ $month++ ;
 75+ if ($month > 12)
 76+ { $month = 1 ; $year ++ ; }
 77+ }
 78+ $line =~ s/,$// ;
 79+ print OUT "$line\n" ;
 80+ }
 81+
 82+
 83+ print "\nReady\n\n" ;
 84+ exit ;
 85+
 86+sub daysinmonth
 87+{
 88+ my $year = shift ;
 89+ my $month = shift ;
 90+ if ($month == 0)
 91+ { return (0) ; }
 92+ my $timegm1 = timegm (0,0,0,1,$month-1,$year-1900) ;
 93+ $month++ ;
 94+ if ($month > 12)
 95+ { $month = 1 ; $year++ }
 96+ my $timegm2 = timegm (0,0,0,1,$month-1,$year-1900) ;
 97+ my $days = ($timegm2-$timegm1) / (24*60*60) ;
 98+ return ($days) ;
 99+}
 100+
 101+
 102+
Property changes on: trunk/wikistats/reportcard/StatisticsMonthlyFilter.pl
___________________________________________________________________
Added: svn:eol-style
103103 + native
Property changes on: trunk/wikistats/reportcard/ComScoreTop1000.pl
___________________________________________________________________
Added: svn:eol-style
104104 + native
Index: trunk/wikistats/reportcard/ReportCardExtractWikiCountsOutput.pl
@@ -1,1185 +1,1185 @@
2 -#!/usr/local/bin/perl
3 -
4 - use lib "/home/ezachte/lib" ;
5 - use EzLib ;
6 - $trace_on_exit = $true ;
7 - ez_lib_version (2) ;
8 -
9 - $month_last = "12" ;
10 - $year_last = 2010 ;
11 -
12 - $month_start = "1" ;
13 - $year_start = 2008 ;
14 -
15 - $m_start = &months_since_2000_01 ($year_start, $month_start) ;
16 - $m_last = &months_since_2000_01 ($year_last, $month_last) ;
17 - $m_last_12 = $m_last - 12 ;
18 - $m_last_1 = $m_last - 1 ;
19 -
20 - $month_last = sprintf ("%02d", $month_last) ; # 1 -> 01
21 -
22 - # set defaults mainly for tests on local machine
23 - default_argv "-i 'W:/# Out Bayes'|-o 'W:/@ Report Card/Data'" ;
24 -
25 - use Getopt::Std ;
26 -
27 -# $file_regions_UV = "Multi-Country Media Trend, UVs by region (July 2008 - September 2009)_27290.csv" ;
28 -# $file_regions_Reach = "Multi-Country Media Trend, % reach by region (July 2008 - September 2009)_10786.csv" ;
29 -
30 - $maxpopularwikis = 25 ;
31 - @projects = ('wb','wk','wn','wp','wq','ws','wv','wx','commons','*') ;
32 -
33 - &LogArguments ;
34 - &ParseArguments ;
35 - &InitProjectNames ;
36 - &InitReportNames ;
37 - &ReadStatisticsMonthly ;
38 - &WriteMonthlyData ;
39 - exit ;
40 -
41 -sub LogArguments
42 -{
43 - my $arguments ;
44 - getopt ("iolpft", \%options) ;
45 - foreach $arg (sort keys %options)
46 - { $arguments .= " -$arg " . $options {$arg} . "\n" ; }
47 - print ("\nArguments\n$arguments\n") ;
48 -# &Log ("\nArguments\n$arguments\n") ;
49 -}
50 -
51 -sub ParseArguments
52 -{
53 -# my @options ;
54 -# getopt ("io", \%options) ;
55 -
56 -# die ("Specify input folder for projectcounts files as: -i path") if (! defined ($options {"i"})) ;
57 -# die ("Specify output folder as: -o path'") if (! defined ($options {"o"})) ;
58 -
59 -# $path_in = $options {"i"} ;
60 -# $path_out = $options {"o"} ;
61 -
62 -# die "Input folder '$path_in' does not exist" if (! -d $path_in) ;
63 -# die "Output folder '$path_out' does not exist" if (! -d $path_out) ;
64 -
65 - $path_in = "w:/# out bayes" ;
66 - $path_out = "w:/@ report card/data" ;
67 -
68 - print "Input folder: $path_in\n" ;
69 - print "Output folder: $path_out\n" ;
70 - print "\n" ;
71 -
72 - $file_csv_out = "$path_out/StatisticsMonthly_${year_last}_${month_last}.csv" ;
73 -
74 - &SetComparisonPeriods ;
75 -}
76 -
77 -sub ReadStatisticsMonthly
78 -{
79 - &ReadStatisticsMonthlyForProject ("wb") ;
80 - &ReadStatisticsMonthlyForProject ("wk") ;
81 - &ReadStatisticsMonthlyForProject ("wn") ;
82 - &ReadStatisticsMonthlyForProject ("wp") ;
83 - &ReadStatisticsMonthlyForProject ("wq") ;
84 - &ReadStatisticsMonthlyForProject ("ws") ;
85 - &ReadStatisticsMonthlyForProject ("wv") ;
86 - &ReadStatisticsMonthlyForProject ("wx") ;
87 -
88 - &ReadStatisticsPerBinariesExtensionCommons ;
89 -}
90 -
91 -sub ReadStatisticsMonthlyForProject
92 -{
93 - my $project = shift;
94 -
95 - $all_projects = "*" ;
96 -
97 - my $file_csv_in_1 = "$path_in/csv_$project/StatisticsMonthly.csv" ;
98 - my $file_csv_in_2 = "$path_in/csv_$project/StatisticsUserActivitySpread.csv" ;
99 -
100 - if (! -e $file_csv_in_1)
101 - { &Abort ("Input file '$file_csv_in_1' not found") ; }
102 - if (! -e $file_csv_in_2)
103 - { &Abort ("Input file '$file_csv_in_2' not found") ; }
104 -
105 - print "Read '$file_csv_in_1'\n" ;
106 - open CSV_IN, '<', $file_csv_in_1 ;
107 -
108 - undef %lines ;
109 - while ($line = <CSV_IN>)
110 - {
111 - ($language,$date,$counts) = split (',', $line, 3) ;
112 -
113 - next if $language eq 'commons' and $project ne 'wx' ;
114 - next if $language eq 'sr' and $project eq 'wn' ; # ignore insane bot spam on
115 -
116 - ($month,$day,$year) = split ('\/', $date) ;
117 - my $m = &months_since_2000_01 ($year,$month) ;
118 - next if $m < $m_start ;
119 -
120 - $lines {$language}{$m} = $line ;
121 - $languages {$language}++ ;
122 - }
123 -
124 - foreach $language (sort keys %languages)
125 - {
126 - for ($m = $m_start + 1 ; $m <= $m_last ; $m++)
127 - {
128 - if ($lines {$language}{$m} eq '')
129 - { $lines {$language}{$m} = $lines {$language}{$m -1} ; }
130 - }
131 -
132 - for ($m = $m_start ; $m <= $m_last ; $m++)
133 - {
134 - $line = $lines {$language}{$m} ;
135 - chomp $line ;
136 - ($language,$date,$counts) = split (',', $line, 3) ;
137 - @fields = split (',', $counts) ;
138 -
139 - if ($project eq "wp")
140 - {
141 - foreach $f (1,4,6,11) # new editors, articles, new articles, edits
142 - {
143 - $values {"$f,$m"} {"$project,$language"} = $fields [$f] ;
144 -
145 - $totals {"$f,$m"} += $fields [$f] ;
146 -
147 - $totals_project {"$f,$m"} {$project} += $fields [$f] ;
148 - $totals_project {"$f,$m"} {$all_projects} += $fields [$f] ;
149 -
150 - # print "TOTALS $f $m = . " . $totals {"$f,$m"} . "\n" ;
151 - }
152 - }
153 - else
154 - {
155 - foreach $f (1,4)
156 - {
157 - if ($f <= 3)
158 - {
159 - $values {"$f,$m"} {"$project,$language"} = $fields [$f] ;
160 - $totals {"$f,$m"} += $fields [$f] ;
161 - }
162 -
163 -
164 - # ignore editor count on commons for totals, most editors are already counted for other project
165 - # (even for several projects, to be tuned after centralauth dump is available)
166 - # count for all_projects only Wikipedia articles
167 - if (($f <= 3) && ($language ne 'commons')) # 0 = Contributors, 1 = New Wikimedians, 2 = Active Editors (5+ edits), 3 = Very Active Editors (100+ edits),
168 - { $totals_project {"$f,$m"} {$all_projects} += $fields [$f] ; }
169 -
170 - if ($language eq 'commons')
171 - { $totals_project {"$f,$m"} {'commons'} += $fields [$f] ; }
172 - else
173 - { $totals_project {"$f,$m"} {$project} += $fields [$f] ; }
174 -
175 - # print "TOTALS $f $m = . " . $totals {"$f,$m"} . "\n" ;
176 - }
177 - foreach $f (6,11)
178 - {
179 - $totals_project {"$f,$m"} {$all_projects} += $fields [$f] ;
180 - if ($language eq 'commons')
181 - { $totals_project {"$f,$m"} {'commons'} += $fields [$f] ; }
182 - else
183 - { $totals_project {"$f,$m"} {$project} += $fields [$f] ; }
184 - # print "TOTALS $f $m = . " . $totals {"$f,$m"} . "\n" ;
185 - }
186 -
187 - }
188 - }
189 - }
190 - close CSV_IN ;
191 -
192 - # now read (very) active editors from newer more accurate file (split data for reg users and bots, unlike StatisticsMonthly.csv)
193 - # but use f = column count in StatisticsMonthly.csv
194 -
195 - print "Read '$file_csv_in_2'\n" ;
196 - open CSV_IN, '<', $file_csv_in_2 ;
197 -
198 - undef %lines ;
199 - while ($line = <CSV_IN>)
200 - {
201 - chomp $line ;
202 - ($language,$date,$reguser_bot,$group,$counts) = split (',', $line, 5) ;
203 -
204 - next if $language eq 'commons' and $project ne 'wx' ; # commons also in wikipedia csv files (bug, hard to cleanup, just skip)
205 - # next if $language eq 'commons' ; # ignore editor count on commons alltogether, most are already counted for other project
206 - # (even for several projects, to be tuned after centralauth dump is available)
207 -
208 - if ($reguser_bot ne "R") { next ; } # R: reg user, B: bot
209 - if ($group ne "A") { next ; } # A: articles, T: talk pages, O: other namespaces
210 -
211 - ($month,$day,$year) = split ('\/', $date) ;
212 - my $m = &months_since_2000_01 ($year,$month) ;
213 - next if $m < $m_start ;
214 -
215 - $lines {$language}{$m} = $line ;
216 - $languages {$language}++ ;
217 - }
218 -
219 - foreach $language (sort keys %languages)
220 - {
221 - for ($m = $m_start+1 ; $m <= $m_last ; $m++)
222 - {
223 - if ($lines {$language}{$m} eq '')
224 - { $lines {$language}{$m} = $lines {$language}{$m -1} ; }
225 - }
226 -
227 - for ($m = $m_start ; $m <= $m_last ; $m++)
228 - {
229 - $line = $lines {$language}{$m} ;
230 - chomp $line ;
231 - ($language,$date,$reguser_bot,$group,$counts) = split (',', $line, 5) ;
232 - @fields = split (',', $counts) ;
233 -
234 - foreach $f (2,3) # editors_gt_5, editors_gt_100
235 - {
236 - # count user with over x edits
237 - # threshold starting with a 3 are 10xSQRT(10), 100xSQRT(10), 1000xSQRT(10), etc
238 - # thresholds = 1,3,5,10,25,32,50,100,etc
239 - if ($f == 2) { $f2 = 2 ; }
240 - if ($f == 3) { $f2 = 7 ; }
241 -
242 - $values {"$f,$m"} {"$project,$language"} = $fields [$f2] ;
243 - $totals {"$f,$m"} += $fields [$f2] ;
244 -
245 - # ignore editor count on commons for totals, most editors are already counted for other project
246 - # (even for several projects, to be tuned after centralauth dump is available)
247 - if (($f <= 3) && ($language ne 'commons')) # 0 = Contributors, 1 = New Wikimedians, 2 = Active Editors (5+ edits), 3 = Very Active Editors (100+ edits),
248 - { $totals_project {"$f,$m"} {$all_projects} += $fields [$f2] ; }
249 -
250 - if ($language eq 'commons')
251 - { $totals_project {"$f,$m"} {'commons'} += $fields [$f2] ; }
252 - else
253 - { $totals_project {"$f,$m"} {$project} += $fields [$f2] ; }
254 - }
255 - }
256 - }
257 - close CSV_IN ;
258 -}
259 -
260 -sub ReadStatisticsPerBinariesExtensionCommons
261 -{
262 - my $file_csv_in = "$path_in/csv_wx/StatisticsPerBinariesExtension.csv" ;
263 - my $mmax = -1 ;
264 -
265 - if (! -e $file_csv_in)
266 - { &Abort ("Input file '$file_csv_in' not found") ; }
267 -
268 - print "Read '$file_csv_in'\n" ;
269 - open CSV_IN, '<', $file_csv_in ;
270 - while ($line = <CSV_IN>)
271 - {
272 - chomp $line ;
273 - ($language,$date,$counts) = split (',', $line, 3) ;
274 -
275 - if ($language ne "commons") { next ; }
276 -
277 - if ($date eq "00/0000")
278 - {
279 - @fields = split (',', $counts) ;
280 - $field_ndx = 0 ;
281 - foreach $field (@fields)
282 - {
283 - $ext_cnt {-1}{$field_ndx} = $field ;
284 - # print "EXT_CNT $field_ndx : $field\n" ;
285 - $field_ndx ++ ;
286 - }
287 - next ;
288 - }
289 -
290 - ($month,$year) = split ('\/', $date) ;
291 - my $m = &months_since_2000_01 ($year,$month) ;
292 - next if $m < $m_start ;
293 -
294 - if ($m > $mmax)
295 - { $mmax = $m ; }
296 -
297 - @fields = split (',', $counts) ;
298 - $field_ndx = 0 ;
299 - foreach $field (@fields)
300 - {
301 - $ext_cnt {$m}{$field_ndx} = $field ;
302 - $ext_tot {$m} += $field ;
303 - $field_ndx ++ ;
304 - }
305 - }
306 - close CSV_IN ;
307 -
308 - %ext_cnt_mmax = %{$ext_cnt {$mmax}} ;
309 - @ext_cnt_mmax = (sort {$ext_cnt_mmax {$b} <=> $ext_cnt_mmax {$a}} keys %ext_cnt_mmax) ;
310 -
311 - $extcnt = 0 ;
312 - foreach $extndx (@ext_cnt_mmax)
313 - {
314 - # print "$extndx < ${ext_cnt {-1}{$extndx}} > : ${ext_cnt_mmax {$extndx}}\n" ;
315 - push @extndxs, $extndx ;
316 - if ($extcnt++ >= 9) { last ; }
317 - }
318 -}
319 -
320 -sub ReadMediaTrends
321 -{
322 -# open FILE_UV, '<', $file_regions_UV ;
323 -# close FILE-UV ;
324 -
325 -# open FILE_REACH, '<', $file_regions_Reach ;
326 -# close FILE_REACH ;
327 -}
328 -
329 -sub WriteMonthlyData
330 -{
331 - print "Write file '$file_csv_out'\n" ;
332 - open CSV_OUT, '>', $file_csv_out ;
333 - $output = "" ;
334 - foreach $f (1,2,3,4,6,11) # new editors, editors_gt_5, editors_gt_100, articles, new articles, edits
335 - {
336 -
337 - $output .= "\n,${out_report_descriptions [$f]} - Absolute - Per Wiki\n" ;
338 - $output .= "$csv_recent_months,%inc year, %inc month\n" ;
339 -
340 - $line = ",Total," ;
341 - for ($m = $m_start ; $m <= $m_last ; $m++)
342 - { $line .= $totals {"$f,$m"} . "," ; }
343 -
344 - # growth in one year
345 - if ($totals {"$f,$m_last_12"} != 0)
346 - { $line .= sprintf ("%.1f", 100 * ($totals {"$f,$m_last"} / $totals {"$f,$m_last_12"}) - 100). "%," ; }
347 - else
348 - { $line .= "n.a.," ; }
349 -
350 - # growth in one month
351 - if ($totals {"$f,$m_last_1"} != 0)
352 - { $line .= sprintf ("%.1f", 100 * ($totals {"$f,$m_last"} / $totals {"$f,$m_last_1"}) - 100). "%," ; }
353 - else
354 - { $line .= "n.a.," ; }
355 -
356 - $line =~ s/,$// ;
357 - $output .= "$line\n" ;
358 -
359 - # sort by absolute amount for last month
360 - %values_f_12 = %{$values {"$f,$m_last"}} ;
361 - $index = 1 ;
362 - foreach $key (sort {$values_f_12 {$b} <=> $values_f_12 {$a}} keys %values_f_12)
363 - {
364 - ($project,$language) = split (",", $key) ;
365 - $language_name = $out_languages {$language} ;
366 - if (($project ne "wp") && ($project ne "wx"))
367 - { $line = "$index,$language_name " . &GetProjectName ($project) . "," ; }
368 - else
369 - { $line = "$index,$language_name," ; }
370 -
371 - for ($m = $m_start ; $m <= $m_last ; $m++)
372 - { $line .= $values {"$f,$m"} {$key} . "," ; }
373 -
374 - if ($values {"$f,$m_last_12"} {$key} != 0)
375 - { $line .= sprintf ("%.1f", 100 * ($values {"$f,$m_last"} {$key} / $values {"$f,$m_last_12"} {$key}) - 100). "%," ; }
376 - else
377 - { $line .= "n.a.," ; }
378 -
379 - if ($values {"$f,$m_last_1"} {$key} != 0)
380 - { $line .= sprintf ("%.1f", 100 * ($values {"$f,$m_last"} {$key} / $values {"$f,$m_last_1"} {$key}) - 100). "%," ; }
381 - else
382 - { $line .= "n.a.," ; }
383 -
384 - $line =~ s/,$// ;
385 - $output .= "$line\n" ;
386 -
387 - if ($index++ >= 25) { last ; }
388 - }
389 -
390 - $output .= "\n,${out_report_descriptions [$f]} - Absolute - Per Project\n" ;
391 - if ($f <= 3) # 0 = Contributors, 1 = New Wikimedians, 2 = Active Editors (5+ edits), 3 = Very Active Editors (100+ edits),
392 - { $output .= ",Note: All projects does not include Commons\n" ; }
393 - $output .= "$csv_recent_months,%inc year, %inc month\n" ;
394 - foreach $project (sort {$totals_project {"$f,$m_last"} {$b} <=> $totals_project {"$f,$m_last"} {$a}} @projects)
395 - {
396 -# next if $project eq 'commons' and ($f ==2 or $f == 3) ; # (very) active editors no longer counted for commons
397 -
398 - if ($project eq 'commons')
399 - { $line = ",Commons," ; }
400 - else
401 - { $line = "," . &GetProjectName ($project) . "," ; }
402 -
403 - for ($m = $m_start ; $m <= $m_last ; $m++)
404 - { $line .= $totals_project {"$f,$m"} {$project} . "," ; }
405 -
406 - if ($totals_project {"$f,$m_last_12"} {$project} != 0)
407 - { $line .= sprintf ("%.1f", 100 * ($totals_project {"$f,$m_last"} {$project} / $totals_project {"$f,$m_last_12"} {$project}) - 100). "%," ; }
408 - else
409 - { $line .= "n.a.," ; }
410 -
411 - if ($totals_project {"$f,$m_last_1"} {$project} != 0)
412 - { $line .= sprintf ("%.1f", 100 * ($totals_project {"$f,$m_last"} {$project} / $totals_project {"$f,$m_last_1"} {$project}) - 100). "%," ; }
413 - else
414 - { $line .= "n.a.," ; }
415 -
416 - $line =~ s/,$// ;
417 - $output .= "$line\n" ;
418 - }
419 -
420 - $output .= "\n,${out_report_descriptions [$f]} - Indexed - Per Wiki\n" ;
421 - $output .= "$csv_recent_months\n" ;
422 -
423 - # sort by absolute amount for last month
424 - $index = 1 ;
425 - foreach $key (sort {$values_f_12 {$b} <=> $values_f_12 {$a}} keys %values_f_12)
426 - {
427 - # print "$index $f: $key -> ${values_f_12 {$key}}\n" ;
428 -
429 - ($project,$language) = split (",", $key) ;
430 - $language_name = $out_languages {$language} ;
431 - if (($project ne "wp") && ($project ne "wx"))
432 - { $line = "$index,$language_name " . &GetProjectName ($project) . "," ; }
433 - else
434 - { $line = "$index,$language_name," ; }
435 -
436 - # $value_100 = $values {"$f,$m_last_12"} {$key} ;
437 - $value_100 = $values {"$f,$m_start"} {$key} ;
438 - for ($m = $m_start ; $m <= $m_last ; $m++)
439 - {
440 - if ($value_100 != 0)
441 - { $line .= sprintf ("%.1f", 100 * ($values {"$f,$m"} {$key} / $value_100)) . "," ; }
442 - else
443 - { $line .= "," ; }
444 - }
445 - $line =~ s/,$// ;
446 - $output .= "$line\n" ;
447 -
448 - # put totals last in chart to show line on top of others
449 - if ($index == 9)
450 - {
451 - $line = ",Total," ;
452 - $total_100 = $totals {"$f,$m_last_12"} ;
453 - for ($m = $m_start ; $m <= $m_last ; $m++)
454 - {
455 - if ($total_100 != 0)
456 - { $line .= sprintf ("%.1f", 100 * ($totals {"$f,$m"} / $total_100)) . "," ; }
457 - else
458 - { $line .= "," ; }
459 - }
460 - $line .= ",(sorted here to make it top-most line out of 10 in Excel)" ;
461 - $output .= "$line\n" ;
462 - }
463 -
464 - if ($index++ >= 25) { last ; }
465 - }
466 -
467 - $output .= "\n,${out_report_descriptions [$f]} - Indexed - Per Project\n" ;
468 - $output .= "$csv_recent_months,%inc year, %inc month\n" ;
469 - foreach $project (sort {$totals_project {"$f,$m_last"} {$b} <=> $totals_project {"$f,$m_last"} {$a}} @projects)
470 - {
471 -# next if $project eq 'commons' and ($f ==2 or $f == 3) ; # (very) active editors no longer counted for commons
472 -
473 - if ($project eq 'commons')
474 - { $line = ",Commons," ; }
475 - else
476 - { $line = "," . &GetProjectName ($project) . "," ; }
477 -
478 - # $value_100 = $totals_project {"$f,$m_last_12"} {$project} ;
479 - $value_100 = $totals_project {"$f,$m_start"} {$project} ;
480 - for ($m = $m_start ; $m <= $m_last ; $m++)
481 - {
482 - if ($value_100 != 0)
483 - { $line .= sprintf ("%.1f", 100 * ($totals_project {"$f,$m"} {$project} / $value_100)) . "," ; }
484 - else
485 - { $line .= "," ; }
486 - }
487 - $line =~ s/,$// ;
488 - $output .= "$line\n" ;
489 - }
490 - $output .= "\n," . '=' x 150 . "\n" ;
491 - }
492 -
493 - print CSV_OUT $output ;
494 -
495 - $output = "\n,Binaries per month - Absolute\n" ;
496 - $output .= "$csv_recent_months,%inc year, %inc month\n" ;
497 - $output .= "\n$csv_recent_months,%inc year,%inc month\n" ;
498 -
499 - $line = ",Total," ;
500 - for ($m = $m_start ; $m <= $m_last ; $m++)
501 - { $line .= $ext_tot {$m} . "," ; }
502 -
503 - if ($ext_tot {$m_last_12} != 0)
504 - { $line .= sprintf ("%.1f", 100 * ($ext_tot {$m_last} / $ext_tot {$m_last_12}) - 100). "%," ; }
505 - else
506 - { $line .= "n.a.," ; }
507 -
508 - if ($ext_tot {$m_last_1} != 0)
509 - { $line .= sprintf ("%.1f", 100 * ($ext_tot {$m_last} / $ext_tot {$m_last_1}) - 100). "%," ; }
510 - else
511 - { $line .= "n.a.," ; }
512 -
513 - $line =~ s/,$// ;
514 - $output .= "$line\n" ;
515 -
516 - $index = 0 ;
517 - # feed the 10 extensions with most pages, largest one last (comes on top in Excel chart)
518 - for ($e = $#extndxs - 9 ; $e <= $#extndxs ; $e++)
519 - {
520 - $index++ ;
521 -
522 - if ($e < 0)
523 - {
524 - $line = "$index,xxx," ;
525 - for ($m = $m_start ; $m <= $m_last ; $m++)
526 - { $line .= "," ; }
527 - }
528 - else
529 - {
530 - $extndx = $extndxs [$e] ;
531 - $line = "$index,${ext_cnt {-1}{$extndx}}," ;
532 -
533 - for ($m = $m_start ; $m <= $m_last ; $m++)
534 - { $line .= $ext_cnt {$m}{$extndx} . "," ; }
535 -
536 - if ($ext_cnt {$m_last_12}{$extndx} != 0)
537 - { $line .= sprintf ("%.1f", 100 * ($ext_cnt {$m_last}{$extndx} / $ext_cnt {$m_last_12}{$extndx}) - 100). "%," ; }
538 - else
539 - { $line .= "n.a.," ; }
540 -
541 - if ($ext_cnt {$m_last_1}{$extndx} != 0)
542 - { $line .= sprintf ("%.1f", 100 * ($ext_cnt {$m_last}{$extndx} / $ext_cnt {$m_last_1}{$extndx}) - 100). "%," ; }
543 - else
544 - { $line .= "n.a.," ; }
545 - }
546 -
547 - $line =~ s/,$// ;
548 - $output .= "$line\n" ;
549 - }
550 -
551 - print CSV_OUT $output ;
552 -
553 - $output = "\n,Binaries per month - Indexed\n" ;
554 - $output .= "$csv_recent_months\n" ;
555 -
556 - $index = 0 ;
557 - # feed the 10 extensions with most pages, largest one last (comes on top in Excel chart)
558 - for ($e = $#extndxs - 9 ; $e <= $#extndxs ; $e++)
559 - {
560 - $index++ ;
561 -
562 - if ($e < 0)
563 - {
564 - $line = "$index,xxx," ;
565 - for ($m = $m_start ; $m <= $m_last ; $m++)
566 - { $line .= "," ; }
567 - }
568 - else
569 - {
570 - $extndx = $extndxs [$e] ;
571 - $line = "$index,${ext_cnt {-1}{$extndx}}," ;
572 - $ext_cnt_m0 = $ext_cnt {$m_last-12}{$extndx} ;
573 - # $ext_cnt_m0 = $ext_cnt {$m_start}{$extndx} ;
574 - for ($m = $m_start ; $m <= $m_last ; $m++)
575 - {
576 - if ($ext_cnt_m0 > 0)
577 - { $line .= sprintf ("%.1f", 100 * ($ext_cnt {$m}{$extndx} / $ext_cnt_m0)). "," ; }
578 - else
579 - { $line .= "," ; }
580 - }
581 - }
582 -
583 - $line =~ s/,$// ;
584 - $output .= "$line\n" ;
585 - }
586 - print CSV_OUT $output ;
587 - close CSV_OUT ;
588 -
589 - print "\nOutput written to $file_csv_out\n\n" ;
590 -}
591 -
592 -sub SetComparisonPeriods
593 -{
594 - my @months = qw(Xxx Jan Feb Mar Apr May Jun Jul Aug Sept Oct Nov Dec) ;
595 -
596 - my ($file_year_month_last, $year_month_last, $year_month_last_minus_12, $year_month_last_minus_1) ;
597 -
598 - $year_month_last = sprintf ("%04d/%02d",$year_last, $month_last) ; # for filenames
599 - $file_year_month_last = sprintf ("%04d_%02d",$year_last, $month_last) ; # for filenames
600 - $year_month_last_minus_12 = sprintf ("%04d/%02d",$year_last - 1,$month_last) ;
601 - $year_month_last_minus_1 = $month_last > 1 ? sprintf ("%04d/%02d",$year_last,$month_last-1): sprintf ("%04d/%02d",$year_last - 1 ,12) ;
602 -
603 - print "\nWrite trend data up till $year_month_last\n\n" ;
604 - print "Compare with previous month: $year_month_last_minus_1, previous year: $year_month_last_minus_12\n\n" ;
605 -
606 - $csv_recent_months = ",project," ;
607 - $year = $year_start ;
608 - $month = $month_start ;
609 - for ($m = $m_start ; $m <= $m_last ; $m++)
610 - {
611 - $recent_months [$m] = sprintf ("%04d/%02d", $year, $month) ;
612 - $csv_recent_months .= sprintf ("%02d/%04d", $month, $year) . "," ;
613 - ($year,$month) = $month < 12 ? ($year,$month+1) : ($year+1,1) ;
614 - }
615 - $csv_recent_months =~ s/,$// ;
616 -}
617 -
618 -#sub WriteCsvFilesPerPeriod
619 -#{
620 -# foreach $period (sort keys %totals)
621 -# {
622 -# &LogT ("\nWrite totals per $period: ") ;
623 -# $desc = $descriptions {$period} ;
624 -
625 -# foreach $project (sort keys %{$totals {$period}})
626 -# {
627 -# &Log ("$project ") ;
628 -
629 -# $dir_out = "$path_out/csv_$project" ;
630 -# if (! -d $dir_out)
631 -# { mkdir $dir_out, 0777 ; }
632 -
633 -# $file_out = "$dir_out/$desc.csv" ;
634 -
635 -# open CSV, ">", $file_out ;
636 -# foreach $key (sort {$a cmp $b} keys %{$totals {$period}{$project}})
637 -# {
638 -# ($language,$yearmonth) = split (",", $key) ;
639 -# # print "PERIOD $period PROJECT $project KEY $key\n" ;
640 -# if ($period eq "month")
641 -# { print CSV "$language," . $date_high {"$yearmonth"} . "," . $totals{$period}{$project}{$key} . "\n" ; }
642 -# else
643 -# { print CSV "$key," . $totals{$period}{$project}{$key} . "\n" ; }
644 -# }
645 -# close CSV ;
646 -# }
647 -# }
648 -#}
649 -
650 -#sub WriteCsvHtmlFilesPopularWikis
651 -#{
652 -# @totals_lastmonth = sort {$totals_lastmonth {$b} <=> $totals_lastmonth {$a}} keys %totals_lastmonth ;
653 -
654 -# $dir_out = "$path_out/csv_wp" ;
655 -# $file_out = "$dir_out/PageViewsPerMonthPopularWikis_$file_year_month_last.csv" ;
656 -
657 -## extend with normalized counts
658 -## see manually created PageViewsPerMonthTop25PlusNormalizedTo100.csv
659 -
660 -# open CSV, ">", $file_out ;
661 -# print CSV $csv_recent_months ;
662 -
663 -# # write per popular language+wiki 13 months of page view totals
664 -# $lines = 0 ;
665 -# foreach $line (@totals_lastmonth)
666 -# {
667 -# if (++$lines > $maxpopularwikis) { last ; }
668 -
669 -# ($project, $language) = split (',', $line) ;
670 -# $largest_projects {"$project-$language"} ++ ;
671 -
672 -# $language_name = $out_languages {$language} ;
673 -
674 -# if (($project ne "wp") && ($project ne "wx"))
675 -# { print CSV "$language_name " . &GetProjectName ($project) . "," ; }
676 -# else
677 -# { print CSV "$language_name," ; }
678 -
679 -## %test = %{$totals {"month"} {"wp"} };
680 -## %test2 = @recent_months ;
681 -# for ($m = 0 ; $m <= 12 ; $m++)
682 -# { print CSV $totals {"month"} {$project} {"$language,${recent_months [$m]}"} . "," ; }
683 -# print CSV "\n" ;
684 -# }
685 -
686 -# print CSV "\n$csv_recent_months" ;
687 -
688 -# # write per popular language+wiki 13 months of page view totals, normalized to first month = 100
689 -# $lines = 0 ;
690 -# foreach $line (@totals_lastmonth)
691 -# {
692 -# if (++$lines > $maxpopularwikis) { last ; }
693 -
694 -# ($project, $language) = split (',', $line) ;
695 -# $language_name = $out_languages {$language} ;
696 -
697 -# if (($project ne "wp") && ($project ne "wx"))
698 -# { print CSV "$language_name " . &GetProjectName ($project) . "," ; }
699 -# else
700 -# { print CSV "$language_name," ; }
701 -
702 -# $recent_month_0 = $totals {"month"} {$project} {"$language,${recent_months [ 0]}"} ;
703 -# for ($m = 0 ; $m <= 12 ; $m++)
704 -# {
705 -# if ($recent_month_0 > 0)
706 -# { print CSV sprintf ("%.2f", 100 * $totals {"month"} {$project} {"$language,${recent_months [$m]}"} / $recent_month_0) . "," ; }
707 -# else
708 -# { print CSV "," ; }
709 -# }
710 -
711 -# print CSV "\n" ;
712 -# }
713 -# close CSV ;
714 -
715 -# # write ready made table rows for report card: page views top 25 movers shakers
716 -# foreach $key (keys %largest_projects)
717 -# {
718 -# ($project,$language) = split ('-', $key) ;
719 -
720 -# $total_lastmonth = $totals {"month"} {$project} {"$language,$month_last"} ;
721 -# $total_prevmonth = $totals {"month"} {$project} {"$language,$year_month_last_minus_1"} ;
722 -# $total_prevyear = $totals {"month"} {$project} {"$language,$year_month_last_minus_12"} ;
723 -
724 -# $perc_month = "no data" ;
725 -# $perc_year = "no data" ;
726 -
727 -# if ($total_prevyear > 0)
728 -# { $perc_year = sprintf ("%.1f", 100 * $total_lastmonth/$total_prevyear - 100) ; }
729 -# if ($total_prevyear > 0)
730 -# { $perc_month = sprintf ("%.1f", 100 * $total_lastmonth/$total_prevmonth - 100) ; }
731 -
732 -# $line = "$project-$language: $total_prevyear=>$total_lastmonth=$perc_year%, $total_prevmonth=>$total_lastmonth=$perc_month%" ;
733 -
734 -# $total_lastmonth = sprintf ("%.0f", $total_lastmonth / 1000000) ;
735 -
736 -# $project_name = &GetProjectName ($project) ;
737 -# $language_name = $out_languages {$language} ;
738 -
739 -# $col1 = "<td class=detail-left>$language_name $project_name</td>\n" ;
740 -# $col2 = "<td class=detail-blue>$total_lastmonth</td>\n" ;
741 -# $col3 = "<td class=detail-blue>$perc_month%</td>\n" ;
742 -# $col4 = "<td class=detail-blue>$perc_year%</td>\n" ;
743 -# $html = "<tr>\n$col1$col2$col3$col4</tr>\n" ;
744 -
745 -# $growth_figures_text {"$perc_month-$project-$language"} = $line ;
746 -# $growth_figures_html {"$perc_month-$project-$language"} = $html ;
747 -# }
748 -
749 -# $file_html = "$dir_out/PageViewsMoversShakersPopularWikis_$file_year_month_last.html" ;
750 -
751 -# open HTML, ">", $file_html ;
752 -# foreach $key (sort {$b <=> $a} keys %growth_figures_text)
753 -# {
754 -# print "$key: ". $growth_figures_text {$key} . "\n" ;
755 -# print HTML $growth_figures_html {$key} ;
756 -# }
757 -# close HTML ;
758 -#}
759 -
760 -
761 -sub GetProjectName
762 -{
763 - my $project =shift ;
764 -
765 - if ($project eq "wp") { $project_name = "Wikipedia"; }
766 - elsif ($project eq "wb") { $project_name = "Wikibooks"; }
767 - elsif ($project eq "wk") { $project_name = "Wiktionary"; }
768 - elsif ($project eq "wx") { $project_name = "Other Wikis"; }
769 - elsif ($project eq "wn") { $project_name = "Wikinews"; }
770 - elsif ($project eq "wq") { $project_name = "Wikiquote"; }
771 - elsif ($project eq "ws") { $project_name = "Wikisource"; }
772 - elsif ($project eq "wv") { $project_name = "Wikiversity"; }
773 - elsif ($project eq "*") { $project_name = "All projects"; }
774 -
775 - return ($project_name) ;
776 -}
777 -
778 -sub MonthsSinceYearAgo
779 -{
780 - my $year = shift ;
781 - my $month = shift ;
782 - return 12 - (($year_last - $year) * 12 + $month_last - $month) ;
783 -}
784 -
785 -sub MonthsSinceFirstMonthToShow
786 -{
787 - my $year = shift ;
788 - my $month = shift ;
789 - return ($year - 2008) * 12 + ($month - 1) ;
790 -}
791 -
792 -# code year,month as monthes since january 2000 (1 byte)
793 -sub months_since_2000_01
794 -{
795 - my $year = shift ;
796 - my $month = shift ;
797 - my $m = ($year - 2000) * 12 + $month ;
798 - return $m ;
799 -}
800 -
801 -#sub Log
802 -#{
803 -# $msg = shift ;
804 -# print $msg ;
805 -# print LOG $msg ;
806 -#}
807 -
808 -#sub LogT
809 -#{
810 -# $msg = shift ;
811 -# my ($ss,$mm,$hh) = (localtime (time))[0,1,2] ;
812 -# my $time = sprintf ("%02d:%02d:%02d ", $hh, $mm, $ss) ;
813 -# $msg =~ s/^(\n*)/$1$time/s ;
814 -# &Log ($msg) ;
815 -#}
816 -
817 -sub MmSs
818 -{
819 - my ($ss,$mm,$hh) = (localtime (time))[0,1,2] ;
820 - return (sprintf ("%02d:%02d:%02d ", $hh, $mm, $ss)) ;
821 -}
822 -
823 -sub Abort
824 -{
825 - my $msg = shift ;
826 - print "$msg\nExecution aborted." ;
827 - # to do: log also to file
828 - exit ;
829 -}
830 -
831 -sub InitProjectNames
832 -{
833 - # copied from WikiReports.pl
834 -
835 - %wikipedias = (
836 -# mediawiki=>"http://wikimediafoundation.org Wikimedia",
837 - nostalgia=>"http://nostalgia.wikipedia.org Nostalgia",
838 - sources=>"http://wikisource.org Old&nbsp;Wikisource",
839 - meta=>"http://meta.wikimedia.org Meta-Wiki",
840 - beta=>"http://beta.wikiversity.org Beta",
841 - species=>"http://species.wikipedia.org WikiSpecies",
842 - commons=>"http://commons.wikimedia.org Commons",
843 - foundation=>"http://wikimediafoundation.org Wikimedia&nbsp;Foundation",
844 - sep11=>"http://sep11.wikipedia.org In&nbsp;Memoriam",
845 - nlwikimedia=>"http://nl.wikimedia.org Wikimedia&nbsp;Nederland",
846 - plwikimedia=>"http://pl.wikimedia.org Wikimedia&nbsp;Polska",
847 - mediawiki=>"http://www.mediawiki.org MediaWiki",
848 - dewikiversity=>"http://de.wikiversity.org Wikiversit&auml;t",
849 - frwikiversity=>"http://fr.wikiversity.org Wikiversit&auml;t",
850 - wikimania2005=>"http://wikimania2005.wikimedia.org Wikimania 2005",
851 - wikimania2006=>"http://wikimania2006.wikimedia.org Wikimania 2006",
852 - aa=>"http://aa.wikipedia.org Afar",
853 - ab=>"http://ab.wikipedia.org Abkhazian",
854 - af=>"http://af.wikipedia.org Afrikaans",
855 - ak=>"http://ak.wikipedia.org Akan", # was Akana
856 - als=>"http://als.wikipedia.org Alemannic", # was Elsatian
857 - am=>"http://am.wikipedia.org Amharic",
858 - an=>"http://an.wikipedia.org Aragonese",
859 - ang=>"http://ang.wikipedia.org Anglo-Saxon",
860 - ar=>"http://ar.wikipedia.org Arabic",
861 - arc=>"http://arc.wikipedia.org Aramaic",
862 - as=>"http://as.wikipedia.org Assamese",
863 - ast=>"http://ast.wikipedia.org Asturian",
864 - av=>"http://av.wikipedia.org Avar", # was Avienan
865 - ay=>"http://ay.wikipedia.org Aymara",
866 - az=>"http://az.wikipedia.org Azeri", # was Azerbaijani
867 - ba=>"http://ba.wikipedia.org Bashkir",
868 - bar=>"http://bar.wikipedia.org Bavarian",
869 - bat_smg=>"http://bat-smg.wikipedia.org Samogitian",
870 - "bat-smg"=>"http://bat-smg.wikipedia.org Samogitian",
871 - bcl=>"http://bcl.wikipedia.org Central Bicolano",
872 - be=>"http://be.wikipedia.org Belarusian",
873 - "be-x-old"=>"http://be.wikipedia.org Belarusian (Tarashkevitsa)",
874 - be_x_old=>"http://be.wikipedia.org Belarusian (Tarashkevitsa)",
875 - bg=>"http://bg.wikipedia.org Bulgarian",
876 - bh=>"http://bh.wikipedia.org Bihari",
877 - bi=>"http://bi.wikipedia.org Bislama",
878 - bm=>"http://bm.wikipedia.org Bambara",
879 - bn=>"http://bn.wikipedia.org Bengali",
880 - bo=>"http://bo.wikipedia.org Tibetan",
881 - bpy=>"http://bpy.wikipedia.org Bishnupriya Manipuri",
882 - br=>"http://br.wikipedia.org Breton",
883 - bs=>"http://bs.wikipedia.org Bosnian",
884 - bug=>"http://bug.wikipedia.org Buginese",
885 - bxr=>"http://bxr.wikipedia.org Buryat",
886 - ca=>"http://ca.wikipedia.org Catalan",
887 - cbk_zam=>"http://cbk-zam.wikipedia.org Chavacano",
888 - "cbk-zam"=>"http://cbk-zam.wikipedia.org Chavacano",
889 - cdo=>"http://cdo.wikipedia.org Min Dong",
890 - ce=>"http://ce.wikipedia.org Chechen",
891 - ceb=>"http://ceb.wikipedia.org Cebuano",
892 - ch=>"http://ch.wikipedia.org Chamorro", # was Chamoru
893 - cho=>"http://cho.wikipedia.org Choctaw", # was Chotaw
894 - chr=>"http://chr.wikipedia.org Cherokee",
895 - chy=>"http://chy.wikipedia.org Cheyenne", # was Sets&ecirc;hest&acirc;hese
896 - co=>"http://co.wikipedia.org Corsican",
897 - cr=>"http://cr.wikipedia.org Cree",
898 - crh=>"http://crh.wikipedia.org Crimean Tatar",
899 - cs=>"http://cs.wikipedia.org Czech",
900 - csb=>"http://csb.wikipedia.org Cashubian", # was Kashubian
901 - cu=>"http://cv.wikipedia.org Old Church Slavonic",
902 - cv=>"http://cv.wikipedia.org Chuvash", # was Cavas
903 - cy=>"http://cy.wikipedia.org Welsh",
904 - da=>"http://da.wikipedia.org Danish",
905 - de=>"http://de.wikipedia.org German",
906 - diq=>"http://diq.wikipedia.org Zazaki",
907 - dk=>"http://dk.wikipedia.org Danish",
908 - dsb=>"http://dsb.wikipedia.org Lower Sorbian",
909 - dv=>"http://dv.wikipedia.org Divehi",
910 - dz=>"http://dz.wikipedia.org Dzongkha",
911 - ee=>"http://ee.wikipedia.org Ewe",
912 - el=>"http://el.wikipedia.org Greek",
913 - eml=>"http://eml.wikipedia.org Emilian-Romagnol",
914 - en=>"http://en.wikipedia.org English",
915 - eo=>"http://eo.wikipedia.org Esperanto",
916 - es=>"http://es.wikipedia.org Spanish",
917 - et=>"http://et.wikipedia.org Estonian",
918 - eu=>"http://eu.wikipedia.org Basque",
919 - ext=>"http://ext.wikipedia.org Extremaduran",
920 - fa=>"http://fa.wikipedia.org Persian",
921 - ff=>"http://ff.wikipedia.org Fulfulde",
922 - fi=>"http://fi.wikipedia.org Finnish",
923 - "fiu-vro"=>"http://fiu-vro.wikipedia.org Voro",
924 - fiu_vro=>"http://fiu-vro.wikipedia.org Voro",
925 - fj=>"http://fj.wikipedia.org Fijian",
926 - fo=>"http://fo.wikipedia.org Faroese", # was Faeroese
927 - fr=>"http://fr.wikipedia.org French",
928 - frp=>"http://frp.wikipedia.org Arpitan",
929 - fur=>"http://fur.wikipedia.org Friulian",
930 - fy=>"http://fy.wikipedia.org Frisian",
931 - ga=>"http://ga.wikipedia.org Irish",
932 - gan=>"http://gan.wikipedia.org Gan",
933 - gay=>"http://gay.wikipedia.org Gayo",
934 - gd=>"http://gd.wikipedia.org Scots Gaelic", # was Scottish Gaelic
935 - gl=>"http://gl.wikipedia.org Galician", # was Galego
936 - glk=>"http://glk.wikipedia.org Gilaki",
937 - gn=>"http://gn.wikipedia.org Guarani",
938 - got=>"http://got.wikipedia.org Gothic",
939 - gu=>"http://gu.wikipedia.org Gujarati",
940 - gv=>"http://gv.wikipedia.org Manx", # was Manx Gaelic
941 - ha=>"http://ha.wikipedia.org Hausa",
942 - hak=>"http://hak.wikipedia.org Hakka",
943 - haw=>"http://haw.wikipedia.org Hawai'ian", # was Hawaiian
944 - he=>"http://he.wikipedia.org Hebrew",
945 - hi=>"http://hi.wikipedia.org Hindi",
946 - hif=>"http://hif.wikipedia.org Fiji Hindi",
947 - ho=>"http://ho.wikipedia.org Hiri Motu",
948 - hr=>"http://hr.wikipedia.org Croatian",
949 - hsb=>"http://hsb.wikipedia.org Upper Sorbian",
950 - ht=>"http://ht.wikipedia.org Haitian",
951 - hu=>"http://hu.wikipedia.org Hungarian",
952 - hy=>"http://hy.wikipedia.org Armenian",
953 - hz=>"http://hz.wikipedia.org Herero",
954 - ia=>"http://ia.wikipedia.org Interlingua",
955 - iba=>"http://iba.wikipedia.org Iban",
956 - id=>"http://id.wikipedia.org Indonesian",
957 - ie=>"http://ie.wikipedia.org Interlingue",
958 - ig=>"http://ig.wikipedia.org Igbo",
959 - ii=>"http://ii.wikipedia.org Yi",
960 - ik=>"http://ik.wikipedia.org Inupiak",
961 - ilo=>"http://ilo.wikipedia.org Ilokano",
962 - io=>"http://io.wikipedia.org Ido",
963 - is=>"http://is.wikipedia.org Icelandic",
964 - it=>"http://it.wikipedia.org Italian",
965 - iu=>"http://iu.wikipedia.org Inuktitut",
966 - ja=>"http://ja.wikipedia.org Japanese",
967 - jbo=>"http://jbo.wikipedia.org Lojban",
968 - jv=>"http://jv.wikipedia.org Javanese",
969 - ka=>"http://ka.wikipedia.org Georgian",
970 - kaa=>"http://kaa.wikipedia.org Karakalpak",
971 - kab=>"http://ka.wikipedia.org Kabyle",
972 - kaw=>"http://kaw.wikipedia.org Kawi",
973 - kg=>"http://kg.wikipedia.org Kongo",
974 - ki=>"http://ki.wikipedia.org Kikuyu",
975 - kj=>"http://kj.wikipedia.org Kuanyama", # was Otjiwambo
976 - kk=>"http://kk.wikipedia.org Kazakh",
977 - kl=>"http://kl.wikipedia.org Greenlandic",
978 - km=>"http://km.wikipedia.org Khmer", # was Cambodian
979 - kn=>"http://kn.wikipedia.org Kannada",
980 - ko=>"http://ko.wikipedia.org Korean",
981 - kr=>"http://kr.wikipedia.org Kanuri",
982 - ks=>"http://ks.wikipedia.org Kashmiri",
983 - ksh=>"http://ksh.wikipedia.org Ripuarian",
984 - ku=>"http://ku.wikipedia.org Kurdish",
985 - kv=>"http://kv.wikipedia.org Komi",
986 - kw=>"http://kw.wikipedia.org Cornish", # was Kornish
987 - ky=>"http://ky.wikipedia.org Kirghiz",
988 - la=>"http://la.wikipedia.org Latin",
989 - lad=>"http://lad.wikipedia.org Ladino",
990 - lb=>"http://lb.wikipedia.org Luxembourgish", # was Letzeburgesch
991 - lbe=>"http://lbe.wikipedia.org Lak",
992 - lg=>"http://lg.wikipedia.org Ganda",
993 - li=>"http://li.wikipedia.org Limburgish",
994 - lij=>"http://lij.wikipedia.org Ligurian",
995 - lmo=>"http://lmo.wikipedia.org Lombard",
996 - ln=>"http://ln.wikipedia.org Lingala",
997 - lo=>"http://lo.wikipedia.org Laotian",
998 - ls=>"http://ls.wikipedia.org Latino Sine Flexione",
999 - lt=>"http://lt.wikipedia.org Lithuanian",
1000 - lv=>"http://lv.wikipedia.org Latvian",
1001 - mad=>"http://mad.wikipedia.org Madurese",
1002 - mak=>"http://mak.wikipedia.org Makasar",
1003 - map_bms=>"http://map-bms.wikipedia.org Banyumasan",
1004 - "map-bms"=>"http://map-bms.wikipedia.org Banyumasan",
1005 - mdf=>"http://mdf.wikipedia.org Moksha",
1006 - mg=>"http://mg.wikipedia.org Malagasy",
1007 - mh=>"http://mh.wikipedia.org Marshallese",
1008 - mi=>"http://mi.wikipedia.org Maori",
1009 - min=>"http://min.wikipedia.org Minangkabau",
1010 - minnan=>"http://minnan.wikipedia.org Minnan",
1011 - mk=>"http://mk.wikipedia.org Macedonian",
1012 - ml=>"http://ml.wikipedia.org Malayalam",
1013 - mn=>"http://mn.wikipedia.org Mongolian",
1014 - mo=>"http://mo.wikipedia.org Moldavian",
1015 - mr=>"http://mr.wikipedia.org Marathi",
1016 - ms=>"http://ms.wikipedia.org Malay",
1017 - mt=>"http://mt.wikipedia.org Maltese",
1018 - mus=>"http://mus.wikipedia.org Muskogee",
1019 - my=>"http://my.wikipedia.org Burmese",
1020 - myv=>"http://myv.wikipedia.org Erzya",
1021 - mzn=>"http://mzn.wikipedia.org Mazandarani",
1022 - na=>"http://na.wikipedia.org Nauruan", # was Nauru
1023 - nah=>"http://nah.wikipedia.org Nahuatl",
1024 - nap=>"http://nap.wikipedia.org Neapolitan",
1025 - nds=>"http://nds.wikipedia.org Low Saxon",
1026 - nds_nl=>"http://nds-nl.wikipedia.org Dutch Low Saxon",
1027 - "nds-nl"=>"http://nds-nl.wikipedia.org Dutch Low Saxon",
1028 - ne=>"http://ne.wikipedia.org Nepali",
1029 - new=>"http://new.wikipedia.org Nepal Bhasa",
1030 - ng=>"http://ng.wikipedia.org Ndonga",
1031 - nl=>"http://nl.wikipedia.org Dutch",
1032 - nov=>"http://nov.wikipedia.org Novial",
1033 - nrm=>"http://nrm.wikipedia.org Norman",
1034 - nn=>"http://nn.wikipedia.org Nynorsk", # was Neo-Norwegian
1035 - no=>"http://no.wikipedia.org Norwegian",
1036 - nv=>"http://nv.wikipedia.org Navajo", # was Avayo
1037 - ny=>"http://ny.wikipedia.org Chichewa",
1038 - oc=>"http://oc.wikipedia.org Occitan",
1039 - om=>"http://om.wikipedia.org Oromo",
1040 - or=>"http://or.wikipedia.org Oriya",
1041 - os=>"http://os.wikipedia.org Ossetic",
1042 - pa=>"http://pa.wikipedia.org Punjabi",
1043 - pag=>"http://pag.wikipedia.org Pangasinan",
1044 - pam=>"http://pam.wikipedia.org Kapampangan",
1045 - pap=>"http://pap.wikipedia.org Papiamentu",
1046 - pdc=>"http://pdc.wikipedia.org Pennsylvania German",
1047 - pi=>"http://pi.wikipedia.org Pali",
1048 - pih=>"http://pih.wikipedia.org Norfolk",
1049 - pl=>"http://pl.wikipedia.org Polish",
1050 - pms=>"http://pms.wikipedia.org Piedmontese",
1051 - ps=>"http://ps.wikipedia.org Pashto",
1052 - pt=>"http://pt.wikipedia.org Portuguese",
1053 - qu=>"http://qu.wikipedia.org Quechua",
1054 - rm=>"http://rm.wikipedia.org Romansh", # was Rhaeto-Romance
1055 - rmy=>"http://rmy.wikipedia.org Romani",
1056 - rn=>"http://rn.wikipedia.org Kirundi",
1057 - ro=>"http://ro.wikipedia.org Romanian",
1058 - roa_rup=>"http://roa-rup.wikipedia.org Aromanian",
1059 - "roa-rup"=>"http://roa-rup.wikipedia.org Aromanian",
1060 - roa_tara=>"http://roa-tara.wikipedia.org Tarantino",
1061 - "roa-tara"=>"http://roa-tara.wikipedia.org Tarantino",
1062 - ru=>"http://ru.wikipedia.org Russian",
1063 - ru_sib=>"http://ru-sib.wikipedia.org Siberian",
1064 - "ru-sib"=>"http://ru-sib.wikipedia.org Siberian",
1065 - rw=>"http://rw.wikipedia.org Kinyarwanda",
1066 - sa=>"http://sa.wikipedia.org Sanskrit",
1067 - sah=>"http://sah.wikipedia.org Sakha",
1068 - sc=>"http://sc.wikipedia.org Sardinian",
1069 - scn=>"http://scn.wikipedia.org Sicilian",
1070 - sco=>"http://sco.wikipedia.org Scots",
1071 - sd=>"http://sd.wikipedia.org Sindhi",
1072 - se=>"http://se.wikipedia.org Northern Sami",
1073 - sg=>"http://sg.wikipedia.org Sangro",
1074 - sh=>"http://sh.wikipedia.org Serbo-Croatian",
1075 - si=>"http://si.wikipedia.org Sinhala", # was Singhalese
1076 - simple=>"http://simple.wikipedia.org Simple English",
1077 - sk=>"http://sk.wikipedia.org Slovak",
1078 - sl=>"http://sl.wikipedia.org Slovene",
1079 - sm=>"http://sm.wikipedia.org Samoan",
1080 - sn=>"http://sn.wikipedia.org Shona",
1081 - so=>"http://so.wikipedia.org Somali", # was Somalian
1082 - sq=>"http://sq.wikipedia.org Albanian",
1083 - sr=>"http://sr.wikipedia.org Serbian",
1084 - srn=>"http://srn.wikipedia.org Sranan",
1085 - ss=>"http://ss.wikipedia.org Siswati",
1086 - st=>"http://st.wikipedia.org Sesotho",
1087 - stq=>"http://stq.wikipedia.org Saterland Frisian",
1088 - su=>"http://su.wikipedia.org Sundanese",
1089 - sv=>"http://sv.wikipedia.org Swedish",
1090 - sw=>"http://sw.wikipedia.org Swahili",
1091 - szl=>"http://szl.wikipedia.org Silesian",
1092 - ta=>"http://ta.wikipedia.org Tamil",
1093 - te=>"http://te.wikipedia.org Telugu",
1094 - test=>"http://test.wikipedia.org Test",
1095 - tet=>"http://tet.wikipedia.org Tetum",
1096 - tg=>"http://tg.wikipedia.org Tajik",
1097 - th=>"http://th.wikipedia.org Thai",
1098 - ti=>"http://ti.wikipedia.org Tigrinya",
1099 - tk=>"http://tk.wikipedia.org Turkmen",
1100 - tl=>"http://tl.wikipedia.org Tagalog",
1101 - tlh=>"http://tlh.wikipedia.org Klingon", # was Klignon
1102 - tn=>"http://tn.wikipedia.org Setswana",
1103 - to=>"http://to.wikipedia.org Tongan",
1104 - tokipona=>"http://tokipona.wikipedia.org Tokipona",
1105 - tpi=>"http://tpi.wikipedia.org Tok Pisin",
1106 - tr=>"http://tr.wikipedia.org Turkish",
1107 - ts=>"http://ts.wikipedia.org Tsonga",
1108 - tt=>"http://tt.wikipedia.org Tatar",
1109 - tum=>"http://tum.wikipedia.org Tumbuka",
1110 - turn=>"http://turn.wikipedia.org Turnbuka",
1111 - tw=>"http://tw.wikipedia.org Twi",
1112 - ty=>"http://ty.wikipedia.org Tahitian",
1113 - udm=>"http://udm.wikipedia.org Udmurt",
1114 - ug=>"http://ug.wikipedia.org Uighur",
1115 - uk=>"http://uk.wikipedia.org Ukrainian",
1116 - ur=>"http://ur.wikipedia.org Urdu",
1117 - uz=>"http://uz.wikipedia.org Uzbek",
1118 - ve=>"http://ve.wikipedia.org Venda", # was Lushaka
1119 - vec=>"http://vec.wikipedia.org Venetian",
1120 - vi=>"http://vi.wikipedia.org Vietnamese",
1121 - vls=>"http://vls.wikipedia.org West Flemish",
1122 - vo=>"http://vo.wikipedia.org Volap&uuml;k",
1123 - wa=>"http://wa.wikipedia.org Walloon",
1124 - war=>"http://war.wikipedia.org Waray-Waray",
1125 - wo=>"http://wo.wikipedia.org Wolof",
1126 - wuu=>"http://wuu.wikipedia.org Wu",
1127 - xal=>"http://xal.wikipedia.org Kalmyk",
1128 - xh=>"http://xh.wikipedia.org Xhosa",
1129 - yi=>"http://yi.wikipedia.org Yiddish",
1130 - yo=>"http://yo.wikipedia.org Yoruba",
1131 - za=>"http://za.wikipedia.org Zhuang",
1132 - zea=>"http://zea.wikipedia.org Zealandic",
1133 - zh=>"http://zh.wikipedia.org Chinese",
1134 - zh_min_nan=>"http://zh-min-nan.wikipedia.org Min Nan",
1135 - "zh-min-nan"=>"http://zh-min-nan.wikipedia.org Min Nan",
1136 - zh_classical=>"http://zh-classical.wikipedia.org Classical Chinese",
1137 - "zh-classical"=>"http://zh-classical.wikipedia.org Classical Chinese",
1138 - zh_yue=>"http://zh-yue.wikipedia.org Cantonese",
1139 - "zh-yue"=>"http://zh-yue.wikipedia.org Cantonese",
1140 - zu=>"http://zu.wikipedia.org Zulu",
1141 - zz=>"&nbsp; All&nbsp;languages",
1142 - zzz=>"&nbsp; All&nbsp;languages except English"
1143 - );
1144 -
1145 - foreach $key (keys %wikipedias)
1146 - {
1147 - my $wikipedia = $wikipedias {$key} ;
1148 - $out_urls {$key} = $wikipedia ;
1149 - $out_languages {$key} = $wikipedia ;
1150 - $out_urls {$key} =~ s/(^[^\s]+).*$/$1/ ;
1151 - $out_languages {$key} =~ s/^[^\s]+\s+(.*)$/$1/ ;
1152 - $out_article {$key} = "http://en.wikipedia.org/wiki/" . $out_languages {$key} . "_language" ;
1153 - $out_article {$key} =~ s/ /_/g ;
1154 - $out_urls {$key} =~ s/(^[^\s]+).*$/$1/ ;
1155 - }
1156 -}
1157 -
1158 -# copied from WikiReports_EN.pl
1159 -sub InitReportNames
1160 -{
1161 - @out_report_descriptions = (
1162 - "Contributors",
1163 - "New editors",
1164 - "Active editors",
1165 - "Very active editors",
1166 - "Article count (official)",
1167 - "Article count (alternate)",
1168 - "New articles per day",
1169 - "Edits per article",
1170 - "Bytes per article",
1171 - "Articles over 0.5 Kb",
1172 - "Articles over 2 Kb",
1173 - "Edits per month",
1174 - "Database size",
1175 - "Words",
1176 - "Internal links",
1177 - "Links to other Wikipedias",
1178 - "Binaries",
1179 - "External links",
1180 - "Redirects",
1181 - "Page requests per day",
1182 - "Visits per day",
1183 - "Overview recent months"
1184 - ) ;
1185 -}
1186 -
 2+#!/usr/local/bin/perl
 3+
 4+ use lib "/home/ezachte/lib" ;
 5+ use EzLib ;
 6+ $trace_on_exit = $true ;
 7+ ez_lib_version (2) ;
 8+
 9+ $month_last = "12" ;
 10+ $year_last = 2010 ;
 11+
 12+ $month_start = "1" ;
 13+ $year_start = 2008 ;
 14+
 15+ $m_start = &months_since_2000_01 ($year_start, $month_start) ;
 16+ $m_last = &months_since_2000_01 ($year_last, $month_last) ;
 17+ $m_last_12 = $m_last - 12 ;
 18+ $m_last_1 = $m_last - 1 ;
 19+
 20+ $month_last = sprintf ("%02d", $month_last) ; # 1 -> 01
 21+
 22+ # set defaults mainly for tests on local machine
 23+ default_argv "-i 'W:/# Out Bayes'|-o 'W:/@ Report Card/Data'" ;
 24+
 25+ use Getopt::Std ;
 26+
 27+# $file_regions_UV = "Multi-Country Media Trend, UVs by region (July 2008 - September 2009)_27290.csv" ;
 28+# $file_regions_Reach = "Multi-Country Media Trend, % reach by region (July 2008 - September 2009)_10786.csv" ;
 29+
 30+ $maxpopularwikis = 25 ;
 31+ @projects = ('wb','wk','wn','wp','wq','ws','wv','wx','commons','*') ;
 32+
 33+ &LogArguments ;
 34+ &ParseArguments ;
 35+ &InitProjectNames ;
 36+ &InitReportNames ;
 37+ &ReadStatisticsMonthly ;
 38+ &WriteMonthlyData ;
 39+ exit ;
 40+
 41+sub LogArguments
 42+{
 43+ my $arguments ;
 44+ getopt ("iolpft", \%options) ;
 45+ foreach $arg (sort keys %options)
 46+ { $arguments .= " -$arg " . $options {$arg} . "\n" ; }
 47+ print ("\nArguments\n$arguments\n") ;
 48+# &Log ("\nArguments\n$arguments\n") ;
 49+}
 50+
 51+sub ParseArguments
 52+{
 53+# my @options ;
 54+# getopt ("io", \%options) ;
 55+
 56+# die ("Specify input folder for projectcounts files as: -i path") if (! defined ($options {"i"})) ;
 57+# die ("Specify output folder as: -o path'") if (! defined ($options {"o"})) ;
 58+
 59+# $path_in = $options {"i"} ;
 60+# $path_out = $options {"o"} ;
 61+
 62+# die "Input folder '$path_in' does not exist" if (! -d $path_in) ;
 63+# die "Output folder '$path_out' does not exist" if (! -d $path_out) ;
 64+
 65+ $path_in = "w:/# out bayes" ;
 66+ $path_out = "w:/@ report card/data" ;
 67+
 68+ print "Input folder: $path_in\n" ;
 69+ print "Output folder: $path_out\n" ;
 70+ print "\n" ;
 71+
 72+ $file_csv_out = "$path_out/StatisticsMonthly_${year_last}_${month_last}.csv" ;
 73+
 74+ &SetComparisonPeriods ;
 75+}
 76+
 77+sub ReadStatisticsMonthly
 78+{
 79+ &ReadStatisticsMonthlyForProject ("wb") ;
 80+ &ReadStatisticsMonthlyForProject ("wk") ;
 81+ &ReadStatisticsMonthlyForProject ("wn") ;
 82+ &ReadStatisticsMonthlyForProject ("wp") ;
 83+ &ReadStatisticsMonthlyForProject ("wq") ;
 84+ &ReadStatisticsMonthlyForProject ("ws") ;
 85+ &ReadStatisticsMonthlyForProject ("wv") ;
 86+ &ReadStatisticsMonthlyForProject ("wx") ;
 87+
 88+ &ReadStatisticsPerBinariesExtensionCommons ;
 89+}
 90+
 91+sub ReadStatisticsMonthlyForProject
 92+{
 93+ my $project = shift;
 94+
 95+ $all_projects = "*" ;
 96+
 97+ my $file_csv_in_1 = "$path_in/csv_$project/StatisticsMonthly.csv" ;
 98+ my $file_csv_in_2 = "$path_in/csv_$project/StatisticsUserActivitySpread.csv" ;
 99+
 100+ if (! -e $file_csv_in_1)
 101+ { &Abort ("Input file '$file_csv_in_1' not found") ; }
 102+ if (! -e $file_csv_in_2)
 103+ { &Abort ("Input file '$file_csv_in_2' not found") ; }
 104+
 105+ print "Read '$file_csv_in_1'\n" ;
 106+ open CSV_IN, '<', $file_csv_in_1 ;
 107+
 108+ undef %lines ;
 109+ while ($line = <CSV_IN>)
 110+ {
 111+ ($language,$date,$counts) = split (',', $line, 3) ;
 112+
 113+ next if $language eq 'commons' and $project ne 'wx' ;
 114+ next if $language eq 'sr' and $project eq 'wn' ; # ignore insane bot spam on
 115+
 116+ ($month,$day,$year) = split ('\/', $date) ;
 117+ my $m = &months_since_2000_01 ($year,$month) ;
 118+ next if $m < $m_start ;
 119+
 120+ $lines {$language}{$m} = $line ;
 121+ $languages {$language}++ ;
 122+ }
 123+
 124+ foreach $language (sort keys %languages)
 125+ {
 126+ for ($m = $m_start + 1 ; $m <= $m_last ; $m++)
 127+ {
 128+ if ($lines {$language}{$m} eq '')
 129+ { $lines {$language}{$m} = $lines {$language}{$m -1} ; }
 130+ }
 131+
 132+ for ($m = $m_start ; $m <= $m_last ; $m++)
 133+ {
 134+ $line = $lines {$language}{$m} ;
 135+ chomp $line ;
 136+ ($language,$date,$counts) = split (',', $line, 3) ;
 137+ @fields = split (',', $counts) ;
 138+
 139+ if ($project eq "wp")
 140+ {
 141+ foreach $f (1,4,6,11) # new editors, articles, new articles, edits
 142+ {
 143+ $values {"$f,$m"} {"$project,$language"} = $fields [$f] ;
 144+
 145+ $totals {"$f,$m"} += $fields [$f] ;
 146+
 147+ $totals_project {"$f,$m"} {$project} += $fields [$f] ;
 148+ $totals_project {"$f,$m"} {$all_projects} += $fields [$f] ;
 149+
 150+ # print "TOTALS $f $m = . " . $totals {"$f,$m"} . "\n" ;
 151+ }
 152+ }
 153+ else
 154+ {
 155+ foreach $f (1,4)
 156+ {
 157+ if ($f <= 3)
 158+ {
 159+ $values {"$f,$m"} {"$project,$language"} = $fields [$f] ;
 160+ $totals {"$f,$m"} += $fields [$f] ;
 161+ }
 162+
 163+
 164+ # ignore editor count on commons for totals, most editors are already counted for other project
 165+ # (even for several projects, to be tuned after centralauth dump is available)
 166+ # count for all_projects only Wikipedia articles
 167+ if (($f <= 3) && ($language ne 'commons')) # 0 = Contributors, 1 = New Wikimedians, 2 = Active Editors (5+ edits), 3 = Very Active Editors (100+ edits),
 168+ { $totals_project {"$f,$m"} {$all_projects} += $fields [$f] ; }
 169+
 170+ if ($language eq 'commons')
 171+ { $totals_project {"$f,$m"} {'commons'} += $fields [$f] ; }
 172+ else
 173+ { $totals_project {"$f,$m"} {$project} += $fields [$f] ; }
 174+
 175+ # print "TOTALS $f $m = . " . $totals {"$f,$m"} . "\n" ;
 176+ }
 177+ foreach $f (6,11)
 178+ {
 179+ $totals_project {"$f,$m"} {$all_projects} += $fields [$f] ;
 180+ if ($language eq 'commons')
 181+ { $totals_project {"$f,$m"} {'commons'} += $fields [$f] ; }
 182+ else
 183+ { $totals_project {"$f,$m"} {$project} += $fields [$f] ; }
 184+ # print "TOTALS $f $m = . " . $totals {"$f,$m"} . "\n" ;
 185+ }
 186+
 187+ }
 188+ }
 189+ }
 190+ close CSV_IN ;
 191+
 192+ # now read (very) active editors from newer more accurate file (split data for reg users and bots, unlike StatisticsMonthly.csv)
 193+ # but use f = column count in StatisticsMonthly.csv
 194+
 195+ print "Read '$file_csv_in_2'\n" ;
 196+ open CSV_IN, '<', $file_csv_in_2 ;
 197+
 198+ undef %lines ;
 199+ while ($line = <CSV_IN>)
 200+ {
 201+ chomp $line ;
 202+ ($language,$date,$reguser_bot,$group,$counts) = split (',', $line, 5) ;
 203+
 204+ next if $language eq 'commons' and $project ne 'wx' ; # commons also in wikipedia csv files (bug, hard to cleanup, just skip)
 205+ # next if $language eq 'commons' ; # ignore editor count on commons alltogether, most are already counted for other project
 206+ # (even for several projects, to be tuned after centralauth dump is available)
 207+
 208+ if ($reguser_bot ne "R") { next ; } # R: reg user, B: bot
 209+ if ($group ne "A") { next ; } # A: articles, T: talk pages, O: other namespaces
 210+
 211+ ($month,$day,$year) = split ('\/', $date) ;
 212+ my $m = &months_since_2000_01 ($year,$month) ;
 213+ next if $m < $m_start ;
 214+
 215+ $lines {$language}{$m} = $line ;
 216+ $languages {$language}++ ;
 217+ }
 218+
 219+ foreach $language (sort keys %languages)
 220+ {
 221+ for ($m = $m_start+1 ; $m <= $m_last ; $m++)
 222+ {
 223+ if ($lines {$language}{$m} eq '')
 224+ { $lines {$language}{$m} = $lines {$language}{$m -1} ; }
 225+ }
 226+
 227+ for ($m = $m_start ; $m <= $m_last ; $m++)
 228+ {
 229+ $line = $lines {$language}{$m} ;
 230+ chomp $line ;
 231+ ($language,$date,$reguser_bot,$group,$counts) = split (',', $line, 5) ;
 232+ @fields = split (',', $counts) ;
 233+
 234+ foreach $f (2,3) # editors_gt_5, editors_gt_100
 235+ {
 236+ # count user with over x edits
 237+ # threshold starting with a 3 are 10xSQRT(10), 100xSQRT(10), 1000xSQRT(10), etc
 238+ # thresholds = 1,3,5,10,25,32,50,100,etc
 239+ if ($f == 2) { $f2 = 2 ; }
 240+ if ($f == 3) { $f2 = 7 ; }
 241+
 242+ $values {"$f,$m"} {"$project,$language"} = $fields [$f2] ;
 243+ $totals {"$f,$m"} += $fields [$f2] ;
 244+
 245+ # ignore editor count on commons for totals, most editors are already counted for other project
 246+ # (even for several projects, to be tuned after centralauth dump is available)
 247+ if (($f <= 3) && ($language ne 'commons')) # 0 = Contributors, 1 = New Wikimedians, 2 = Active Editors (5+ edits), 3 = Very Active Editors (100+ edits),
 248+ { $totals_project {"$f,$m"} {$all_projects} += $fields [$f2] ; }
 249+
 250+ if ($language eq 'commons')
 251+ { $totals_project {"$f,$m"} {'commons'} += $fields [$f2] ; }
 252+ else
 253+ { $totals_project {"$f,$m"} {$project} += $fields [$f2] ; }
 254+ }
 255+ }
 256+ }
 257+ close CSV_IN ;
 258+}
 259+
 260+sub ReadStatisticsPerBinariesExtensionCommons
 261+{
 262+ my $file_csv_in = "$path_in/csv_wx/StatisticsPerBinariesExtension.csv" ;
 263+ my $mmax = -1 ;
 264+
 265+ if (! -e $file_csv_in)
 266+ { &Abort ("Input file '$file_csv_in' not found") ; }
 267+
 268+ print "Read '$file_csv_in'\n" ;
 269+ open CSV_IN, '<', $file_csv_in ;
 270+ while ($line = <CSV_IN>)
 271+ {
 272+ chomp $line ;
 273+ ($language,$date,$counts) = split (',', $line, 3) ;
 274+
 275+ if ($language ne "commons") { next ; }
 276+
 277+ if ($date eq "00/0000")
 278+ {
 279+ @fields = split (',', $counts) ;
 280+ $field_ndx = 0 ;
 281+ foreach $field (@fields)
 282+ {
 283+ $ext_cnt {-1}{$field_ndx} = $field ;
 284+ # print "EXT_CNT $field_ndx : $field\n" ;
 285+ $field_ndx ++ ;
 286+ }
 287+ next ;
 288+ }
 289+
 290+ ($month,$year) = split ('\/', $date) ;
 291+ my $m = &months_since_2000_01 ($year,$month) ;
 292+ next if $m < $m_start ;
 293+
 294+ if ($m > $mmax)
 295+ { $mmax = $m ; }
 296+
 297+ @fields = split (',', $counts) ;
 298+ $field_ndx = 0 ;
 299+ foreach $field (@fields)
 300+ {
 301+ $ext_cnt {$m}{$field_ndx} = $field ;
 302+ $ext_tot {$m} += $field ;
 303+ $field_ndx ++ ;
 304+ }
 305+ }
 306+ close CSV_IN ;
 307+
 308+ %ext_cnt_mmax = %{$ext_cnt {$mmax}} ;
 309+ @ext_cnt_mmax = (sort {$ext_cnt_mmax {$b} <=> $ext_cnt_mmax {$a}} keys %ext_cnt_mmax) ;
 310+
 311+ $extcnt = 0 ;
 312+ foreach $extndx (@ext_cnt_mmax)
 313+ {
 314+ # print "$extndx < ${ext_cnt {-1}{$extndx}} > : ${ext_cnt_mmax {$extndx}}\n" ;
 315+ push @extndxs, $extndx ;
 316+ if ($extcnt++ >= 9) { last ; }
 317+ }
 318+}
 319+
 320+sub ReadMediaTrends
 321+{
 322+# open FILE_UV, '<', $file_regions_UV ;
 323+# close FILE-UV ;
 324+
 325+# open FILE_REACH, '<', $file_regions_Reach ;
 326+# close FILE_REACH ;
 327+}
 328+
 329+sub WriteMonthlyData
 330+{
 331+ print "Write file '$file_csv_out'\n" ;
 332+ open CSV_OUT, '>', $file_csv_out ;
 333+ $output = "" ;
 334+ foreach $f (1,2,3,4,6,11) # new editors, editors_gt_5, editors_gt_100, articles, new articles, edits
 335+ {
 336+
 337+ $output .= "\n,${out_report_descriptions [$f]} - Absolute - Per Wiki\n" ;
 338+ $output .= "$csv_recent_months,%inc year, %inc month\n" ;
 339+
 340+ $line = ",Total," ;
 341+ for ($m = $m_start ; $m <= $m_last ; $m++)
 342+ { $line .= $totals {"$f,$m"} . "," ; }
 343+
 344+ # growth in one year
 345+ if ($totals {"$f,$m_last_12"} != 0)
 346+ { $line .= sprintf ("%.1f", 100 * ($totals {"$f,$m_last"} / $totals {"$f,$m_last_12"}) - 100). "%," ; }
 347+ else
 348+ { $line .= "n.a.," ; }
 349+
 350+ # growth in one month
 351+ if ($totals {"$f,$m_last_1"} != 0)
 352+ { $line .= sprintf ("%.1f", 100 * ($totals {"$f,$m_last"} / $totals {"$f,$m_last_1"}) - 100). "%," ; }
 353+ else
 354+ { $line .= "n.a.," ; }
 355+
 356+ $line =~ s/,$// ;
 357+ $output .= "$line\n" ;
 358+
 359+ # sort by absolute amount for last month
 360+ %values_f_12 = %{$values {"$f,$m_last"}} ;
 361+ $index = 1 ;
 362+ foreach $key (sort {$values_f_12 {$b} <=> $values_f_12 {$a}} keys %values_f_12)
 363+ {
 364+ ($project,$language) = split (",", $key) ;
 365+ $language_name = $out_languages {$language} ;
 366+ if (($project ne "wp") && ($project ne "wx"))
 367+ { $line = "$index,$language_name " . &GetProjectName ($project) . "," ; }
 368+ else
 369+ { $line = "$index,$language_name," ; }
 370+
 371+ for ($m = $m_start ; $m <= $m_last ; $m++)
 372+ { $line .= $values {"$f,$m"} {$key} . "," ; }
 373+
 374+ if ($values {"$f,$m_last_12"} {$key} != 0)
 375+ { $line .= sprintf ("%.1f", 100 * ($values {"$f,$m_last"} {$key} / $values {"$f,$m_last_12"} {$key}) - 100). "%," ; }
 376+ else
 377+ { $line .= "n.a.," ; }
 378+
 379+ if ($values {"$f,$m_last_1"} {$key} != 0)
 380+ { $line .= sprintf ("%.1f", 100 * ($values {"$f,$m_last"} {$key} / $values {"$f,$m_last_1"} {$key}) - 100). "%," ; }
 381+ else
 382+ { $line .= "n.a.," ; }
 383+
 384+ $line =~ s/,$// ;
 385+ $output .= "$line\n" ;
 386+
 387+ if ($index++ >= 25) { last ; }
 388+ }
 389+
 390+ $output .= "\n,${out_report_descriptions [$f]} - Absolute - Per Project\n" ;
 391+ if ($f <= 3) # 0 = Contributors, 1 = New Wikimedians, 2 = Active Editors (5+ edits), 3 = Very Active Editors (100+ edits),
 392+ { $output .= ",Note: All projects does not include Commons\n" ; }
 393+ $output .= "$csv_recent_months,%inc year, %inc month\n" ;
 394+ foreach $project (sort {$totals_project {"$f,$m_last"} {$b} <=> $totals_project {"$f,$m_last"} {$a}} @projects)
 395+ {
 396+# next if $project eq 'commons' and ($f ==2 or $f == 3) ; # (very) active editors no longer counted for commons
 397+
 398+ if ($project eq 'commons')
 399+ { $line = ",Commons," ; }
 400+ else
 401+ { $line = "," . &GetProjectName ($project) . "," ; }
 402+
 403+ for ($m = $m_start ; $m <= $m_last ; $m++)
 404+ { $line .= $totals_project {"$f,$m"} {$project} . "," ; }
 405+
 406+ if ($totals_project {"$f,$m_last_12"} {$project} != 0)
 407+ { $line .= sprintf ("%.1f", 100 * ($totals_project {"$f,$m_last"} {$project} / $totals_project {"$f,$m_last_12"} {$project}) - 100). "%," ; }
 408+ else
 409+ { $line .= "n.a.," ; }
 410+
 411+ if ($totals_project {"$f,$m_last_1"} {$project} != 0)
 412+ { $line .= sprintf ("%.1f", 100 * ($totals_project {"$f,$m_last"} {$project} / $totals_project {"$f,$m_last_1"} {$project}) - 100). "%," ; }
 413+ else
 414+ { $line .= "n.a.," ; }
 415+
 416+ $line =~ s/,$// ;
 417+ $output .= "$line\n" ;
 418+ }
 419+
 420+ $output .= "\n,${out_report_descriptions [$f]} - Indexed - Per Wiki\n" ;
 421+ $output .= "$csv_recent_months\n" ;
 422+
 423+ # sort by absolute amount for last month
 424+ $index = 1 ;
 425+ foreach $key (sort {$values_f_12 {$b} <=> $values_f_12 {$a}} keys %values_f_12)
 426+ {
 427+ # print "$index $f: $key -> ${values_f_12 {$key}}\n" ;
 428+
 429+ ($project,$language) = split (",", $key) ;
 430+ $language_name = $out_languages {$language} ;
 431+ if (($project ne "wp") && ($project ne "wx"))
 432+ { $line = "$index,$language_name " . &GetProjectName ($project) . "," ; }
 433+ else
 434+ { $line = "$index,$language_name," ; }
 435+
 436+ # $value_100 = $values {"$f,$m_last_12"} {$key} ;
 437+ $value_100 = $values {"$f,$m_start"} {$key} ;
 438+ for ($m = $m_start ; $m <= $m_last ; $m++)
 439+ {
 440+ if ($value_100 != 0)
 441+ { $line .= sprintf ("%.1f", 100 * ($values {"$f,$m"} {$key} / $value_100)) . "," ; }
 442+ else
 443+ { $line .= "," ; }
 444+ }
 445+ $line =~ s/,$// ;
 446+ $output .= "$line\n" ;
 447+
 448+ # put totals last in chart to show line on top of others
 449+ if ($index == 9)
 450+ {
 451+ $line = ",Total," ;
 452+ $total_100 = $totals {"$f,$m_last_12"} ;
 453+ for ($m = $m_start ; $m <= $m_last ; $m++)
 454+ {
 455+ if ($total_100 != 0)
 456+ { $line .= sprintf ("%.1f", 100 * ($totals {"$f,$m"} / $total_100)) . "," ; }
 457+ else
 458+ { $line .= "," ; }
 459+ }
 460+ $line .= ",(sorted here to make it top-most line out of 10 in Excel)" ;
 461+ $output .= "$line\n" ;
 462+ }
 463+
 464+ if ($index++ >= 25) { last ; }
 465+ }
 466+
 467+ $output .= "\n,${out_report_descriptions [$f]} - Indexed - Per Project\n" ;
 468+ $output .= "$csv_recent_months,%inc year, %inc month\n" ;
 469+ foreach $project (sort {$totals_project {"$f,$m_last"} {$b} <=> $totals_project {"$f,$m_last"} {$a}} @projects)
 470+ {
 471+# next if $project eq 'commons' and ($f ==2 or $f == 3) ; # (very) active editors no longer counted for commons
 472+
 473+ if ($project eq 'commons')
 474+ { $line = ",Commons," ; }
 475+ else
 476+ { $line = "," . &GetProjectName ($project) . "," ; }
 477+
 478+ # $value_100 = $totals_project {"$f,$m_last_12"} {$project} ;
 479+ $value_100 = $totals_project {"$f,$m_start"} {$project} ;
 480+ for ($m = $m_start ; $m <= $m_last ; $m++)
 481+ {
 482+ if ($value_100 != 0)
 483+ { $line .= sprintf ("%.1f", 100 * ($totals_project {"$f,$m"} {$project} / $value_100)) . "," ; }
 484+ else
 485+ { $line .= "," ; }
 486+ }
 487+ $line =~ s/,$// ;
 488+ $output .= "$line\n" ;
 489+ }
 490+ $output .= "\n," . '=' x 150 . "\n" ;
 491+ }
 492+
 493+ print CSV_OUT $output ;
 494+
 495+ $output = "\n,Binaries per month - Absolute\n" ;
 496+ $output .= "$csv_recent_months,%inc year, %inc month\n" ;
 497+ $output .= "\n$csv_recent_months,%inc year,%inc month\n" ;
 498+
 499+ $line = ",Total," ;
 500+ for ($m = $m_start ; $m <= $m_last ; $m++)
 501+ { $line .= $ext_tot {$m} . "," ; }
 502+
 503+ if ($ext_tot {$m_last_12} != 0)
 504+ { $line .= sprintf ("%.1f", 100 * ($ext_tot {$m_last} / $ext_tot {$m_last_12}) - 100). "%," ; }
 505+ else
 506+ { $line .= "n.a.," ; }
 507+
 508+ if ($ext_tot {$m_last_1} != 0)
 509+ { $line .= sprintf ("%.1f", 100 * ($ext_tot {$m_last} / $ext_tot {$m_last_1}) - 100). "%," ; }
 510+ else
 511+ { $line .= "n.a.," ; }
 512+
 513+ $line =~ s/,$// ;
 514+ $output .= "$line\n" ;
 515+
 516+ $index = 0 ;
 517+ # feed the 10 extensions with most pages, largest one last (comes on top in Excel chart)
 518+ for ($e = $#extndxs - 9 ; $e <= $#extndxs ; $e++)
 519+ {
 520+ $index++ ;
 521+
 522+ if ($e < 0)
 523+ {
 524+ $line = "$index,xxx," ;
 525+ for ($m = $m_start ; $m <= $m_last ; $m++)
 526+ { $line .= "," ; }
 527+ }
 528+ else
 529+ {
 530+ $extndx = $extndxs [$e] ;
 531+ $line = "$index,${ext_cnt {-1}{$extndx}}," ;
 532+
 533+ for ($m = $m_start ; $m <= $m_last ; $m++)
 534+ { $line .= $ext_cnt {$m}{$extndx} . "," ; }
 535+
 536+ if ($ext_cnt {$m_last_12}{$extndx} != 0)
 537+ { $line .= sprintf ("%.1f", 100 * ($ext_cnt {$m_last}{$extndx} / $ext_cnt {$m_last_12}{$extndx}) - 100). "%," ; }
 538+ else
 539+ { $line .= "n.a.," ; }
 540+
 541+ if ($ext_cnt {$m_last_1}{$extndx} != 0)
 542+ { $line .= sprintf ("%.1f", 100 * ($ext_cnt {$m_last}{$extndx} / $ext_cnt {$m_last_1}{$extndx}) - 100). "%," ; }
 543+ else
 544+ { $line .= "n.a.," ; }
 545+ }
 546+
 547+ $line =~ s/,$// ;
 548+ $output .= "$line\n" ;
 549+ }
 550+
 551+ print CSV_OUT $output ;
 552+
 553+ $output = "\n,Binaries per month - Indexed\n" ;
 554+ $output .= "$csv_recent_months\n" ;
 555+
 556+ $index = 0 ;
 557+ # feed the 10 extensions with most pages, largest one last (comes on top in Excel chart)
 558+ for ($e = $#extndxs - 9 ; $e <= $#extndxs ; $e++)
 559+ {
 560+ $index++ ;
 561+
 562+ if ($e < 0)
 563+ {
 564+ $line = "$index,xxx," ;
 565+ for ($m = $m_start ; $m <= $m_last ; $m++)
 566+ { $line .= "," ; }
 567+ }
 568+ else
 569+ {
 570+ $extndx = $extndxs [$e] ;
 571+ $line = "$index,${ext_cnt {-1}{$extndx}}," ;
 572+ $ext_cnt_m0 = $ext_cnt {$m_last-12}{$extndx} ;
 573+ # $ext_cnt_m0 = $ext_cnt {$m_start}{$extndx} ;
 574+ for ($m = $m_start ; $m <= $m_last ; $m++)
 575+ {
 576+ if ($ext_cnt_m0 > 0)
 577+ { $line .= sprintf ("%.1f", 100 * ($ext_cnt {$m}{$extndx} / $ext_cnt_m0)). "," ; }
 578+ else
 579+ { $line .= "," ; }
 580+ }
 581+ }
 582+
 583+ $line =~ s/,$// ;
 584+ $output .= "$line\n" ;
 585+ }
 586+ print CSV_OUT $output ;
 587+ close CSV_OUT ;
 588+
 589+ print "\nOutput written to $file_csv_out\n\n" ;
 590+}
 591+
 592+sub SetComparisonPeriods
 593+{
 594+ my @months = qw(Xxx Jan Feb Mar Apr May Jun Jul Aug Sept Oct Nov Dec) ;
 595+
 596+ my ($file_year_month_last, $year_month_last, $year_month_last_minus_12, $year_month_last_minus_1) ;
 597+
 598+ $year_month_last = sprintf ("%04d/%02d",$year_last, $month_last) ; # for filenames
 599+ $file_year_month_last = sprintf ("%04d_%02d",$year_last, $month_last) ; # for filenames
 600+ $year_month_last_minus_12 = sprintf ("%04d/%02d",$year_last - 1,$month_last) ;
 601+ $year_month_last_minus_1 = $month_last > 1 ? sprintf ("%04d/%02d",$year_last,$month_last-1): sprintf ("%04d/%02d",$year_last - 1 ,12) ;
 602+
 603+ print "\nWrite trend data up till $year_month_last\n\n" ;
 604+ print "Compare with previous month: $year_month_last_minus_1, previous year: $year_month_last_minus_12\n\n" ;
 605+
 606+ $csv_recent_months = ",project," ;
 607+ $year = $year_start ;
 608+ $month = $month_start ;
 609+ for ($m = $m_start ; $m <= $m_last ; $m++)
 610+ {
 611+ $recent_months [$m] = sprintf ("%04d/%02d", $year, $month) ;
 612+ $csv_recent_months .= sprintf ("%02d/%04d", $month, $year) . "," ;
 613+ ($year,$month) = $month < 12 ? ($year,$month+1) : ($year+1,1) ;
 614+ }
 615+ $csv_recent_months =~ s/,$// ;
 616+}
 617+
 618+#sub WriteCsvFilesPerPeriod
 619+#{
 620+# foreach $period (sort keys %totals)
 621+# {
 622+# &LogT ("\nWrite totals per $period: ") ;
 623+# $desc = $descriptions {$period} ;
 624+
 625+# foreach $project (sort keys %{$totals {$period}})
 626+# {
 627+# &Log ("$project ") ;
 628+
 629+# $dir_out = "$path_out/csv_$project" ;
 630+# if (! -d $dir_out)
 631+# { mkdir $dir_out, 0777 ; }
 632+
 633+# $file_out = "$dir_out/$desc.csv" ;
 634+
 635+# open CSV, ">", $file_out ;
 636+# foreach $key (sort {$a cmp $b} keys %{$totals {$period}{$project}})
 637+# {
 638+# ($language,$yearmonth) = split (",", $key) ;
 639+# # print "PERIOD $period PROJECT $project KEY $key\n" ;
 640+# if ($period eq "month")
 641+# { print CSV "$language," . $date_high {"$yearmonth"} . "," . $totals{$period}{$project}{$key} . "\n" ; }
 642+# else
 643+# { print CSV "$key," . $totals{$period}{$project}{$key} . "\n" ; }
 644+# }
 645+# close CSV ;
 646+# }
 647+# }
 648+#}
 649+
 650+#sub WriteCsvHtmlFilesPopularWikis
 651+#{
 652+# @totals_lastmonth = sort {$totals_lastmonth {$b} <=> $totals_lastmonth {$a}} keys %totals_lastmonth ;
 653+
 654+# $dir_out = "$path_out/csv_wp" ;
 655+# $file_out = "$dir_out/PageViewsPerMonthPopularWikis_$file_year_month_last.csv" ;
 656+
 657+## extend with normalized counts
 658+## see manually created PageViewsPerMonthTop25PlusNormalizedTo100.csv
 659+
 660+# open CSV, ">", $file_out ;
 661+# print CSV $csv_recent_months ;
 662+
 663+# # write per popular language+wiki 13 months of page view totals
 664+# $lines = 0 ;
 665+# foreach $line (@totals_lastmonth)
 666+# {
 667+# if (++$lines > $maxpopularwikis) { last ; }
 668+
 669+# ($project, $language) = split (',', $line) ;
 670+# $largest_projects {"$project-$language"} ++ ;
 671+
 672+# $language_name = $out_languages {$language} ;
 673+
 674+# if (($project ne "wp") && ($project ne "wx"))
 675+# { print CSV "$language_name " . &GetProjectName ($project) . "," ; }
 676+# else
 677+# { print CSV "$language_name," ; }
 678+
 679+## %test = %{$totals {"month"} {"wp"} };
 680+## %test2 = @recent_months ;
 681+# for ($m = 0 ; $m <= 12 ; $m++)
 682+# { print CSV $totals {"month"} {$project} {"$language,${recent_months [$m]}"} . "," ; }
 683+# print CSV "\n" ;
 684+# }
 685+
 686+# print CSV "\n$csv_recent_months" ;
 687+
 688+# # write per popular language+wiki 13 months of page view totals, normalized to first month = 100
 689+# $lines = 0 ;
 690+# foreach $line (@totals_lastmonth)
 691+# {
 692+# if (++$lines > $maxpopularwikis) { last ; }
 693+
 694+# ($project, $language) = split (',', $line) ;
 695+# $language_name = $out_languages {$language} ;
 696+
 697+# if (($project ne "wp") && ($project ne "wx"))
 698+# { print CSV "$language_name " . &GetProjectName ($project) . "," ; }
 699+# else
 700+# { print CSV "$language_name," ; }
 701+
 702+# $recent_month_0 = $totals {"month"} {$project} {"$language,${recent_months [ 0]}"} ;
 703+# for ($m = 0 ; $m <= 12 ; $m++)
 704+# {
 705+# if ($recent_month_0 > 0)
 706+# { print CSV sprintf ("%.2f", 100 * $totals {"month"} {$project} {"$language,${recent_months [$m]}"} / $recent_month_0) . "," ; }
 707+# else
 708+# { print CSV "," ; }
 709+# }
 710+
 711+# print CSV "\n" ;
 712+# }
 713+# close CSV ;
 714+
 715+# # write ready made table rows for report card: page views top 25 movers shakers
 716+# foreach $key (keys %largest_projects)
 717+# {
 718+# ($project,$language) = split ('-', $key) ;
 719+
 720+# $total_lastmonth = $totals {"month"} {$project} {"$language,$month_last"} ;
 721+# $total_prevmonth = $totals {"month"} {$project} {"$language,$year_month_last_minus_1"} ;
 722+# $total_prevyear = $totals {"month"} {$project} {"$language,$year_month_last_minus_12"} ;
 723+
 724+# $perc_month = "no data" ;
 725+# $perc_year = "no data" ;
 726+
 727+# if ($total_prevyear > 0)
 728+# { $perc_year = sprintf ("%.1f", 100 * $total_lastmonth/$total_prevyear - 100) ; }
 729+# if ($total_prevyear > 0)
 730+# { $perc_month = sprintf ("%.1f", 100 * $total_lastmonth/$total_prevmonth - 100) ; }
 731+
 732+# $line = "$project-$language: $total_prevyear=>$total_lastmonth=$perc_year%, $total_prevmonth=>$total_lastmonth=$perc_month%" ;
 733+
 734+# $total_lastmonth = sprintf ("%.0f", $total_lastmonth / 1000000) ;
 735+
 736+# $project_name = &GetProjectName ($project) ;
 737+# $language_name = $out_languages {$language} ;
 738+
 739+# $col1 = "<td class=detail-left>$language_name $project_name</td>\n" ;
 740+# $col2 = "<td class=detail-blue>$total_lastmonth</td>\n" ;
 741+# $col3 = "<td class=detail-blue>$perc_month%</td>\n" ;
 742+# $col4 = "<td class=detail-blue>$perc_year%</td>\n" ;
 743+# $html = "<tr>\n$col1$col2$col3$col4</tr>\n" ;
 744+
 745+# $growth_figures_text {"$perc_month-$project-$language"} = $line ;
 746+# $growth_figures_html {"$perc_month-$project-$language"} = $html ;
 747+# }
 748+
 749+# $file_html = "$dir_out/PageViewsMoversShakersPopularWikis_$file_year_month_last.html" ;
 750+
 751+# open HTML, ">", $file_html ;
 752+# foreach $key (sort {$b <=> $a} keys %growth_figures_text)
 753+# {
 754+# print "$key: ". $growth_figures_text {$key} . "\n" ;
 755+# print HTML $growth_figures_html {$key} ;
 756+# }
 757+# close HTML ;
 758+#}
 759+
 760+
 761+sub GetProjectName
 762+{
 763+ my $project =shift ;
 764+
 765+ if ($project eq "wp") { $project_name = "Wikipedia"; }
 766+ elsif ($project eq "wb") { $project_name = "Wikibooks"; }
 767+ elsif ($project eq "wk") { $project_name = "Wiktionary"; }
 768+ elsif ($project eq "wx") { $project_name = "Other Wikis"; }
 769+ elsif ($project eq "wn") { $project_name = "Wikinews"; }
 770+ elsif ($project eq "wq") { $project_name = "Wikiquote"; }
 771+ elsif ($project eq "ws") { $project_name = "Wikisource"; }
 772+ elsif ($project eq "wv") { $project_name = "Wikiversity"; }
 773+ elsif ($project eq "*") { $project_name = "All projects"; }
 774+
 775+ return ($project_name) ;
 776+}
 777+
 778+sub MonthsSinceYearAgo
 779+{
 780+ my $year = shift ;
 781+ my $month = shift ;
 782+ return 12 - (($year_last - $year) * 12 + $month_last - $month) ;
 783+}
 784+
 785+sub MonthsSinceFirstMonthToShow
 786+{
 787+ my $year = shift ;
 788+ my $month = shift ;
 789+ return ($year - 2008) * 12 + ($month - 1) ;
 790+}
 791+
 792+# code year,month as monthes since january 2000 (1 byte)
 793+sub months_since_2000_01
 794+{
 795+ my $year = shift ;
 796+ my $month = shift ;
 797+ my $m = ($year - 2000) * 12 + $month ;
 798+ return $m ;
 799+}
 800+
 801+#sub Log
 802+#{
 803+# $msg = shift ;
 804+# print $msg ;
 805+# print LOG $msg ;
 806+#}
 807+
 808+#sub LogT
 809+#{
 810+# $msg = shift ;
 811+# my ($ss,$mm,$hh) = (localtime (time))[0,1,2] ;
 812+# my $time = sprintf ("%02d:%02d:%02d ", $hh, $mm, $ss) ;
 813+# $msg =~ s/^(\n*)/$1$time/s ;
 814+# &Log ($msg) ;
 815+#}
 816+
 817+sub MmSs
 818+{
 819+ my ($ss,$mm,$hh) = (localtime (time))[0,1,2] ;
 820+ return (sprintf ("%02d:%02d:%02d ", $hh, $mm, $ss)) ;
 821+}
 822+
 823+sub Abort
 824+{
 825+ my $msg = shift ;
 826+ print "$msg\nExecution aborted." ;
 827+ # to do: log also to file
 828+ exit ;
 829+}
 830+
 831+sub InitProjectNames
 832+{
 833+ # copied from WikiReports.pl
 834+
 835+ %wikipedias = (
 836+# mediawiki=>"http://wikimediafoundation.org Wikimedia",
 837+ nostalgia=>"http://nostalgia.wikipedia.org Nostalgia",
 838+ sources=>"http://wikisource.org Old&nbsp;Wikisource",
 839+ meta=>"http://meta.wikimedia.org Meta-Wiki",
 840+ beta=>"http://beta.wikiversity.org Beta",
 841+ species=>"http://species.wikipedia.org WikiSpecies",
 842+ commons=>"http://commons.wikimedia.org Commons",
 843+ foundation=>"http://wikimediafoundation.org Wikimedia&nbsp;Foundation",
 844+ sep11=>"http://sep11.wikipedia.org In&nbsp;Memoriam",
 845+ nlwikimedia=>"http://nl.wikimedia.org Wikimedia&nbsp;Nederland",
 846+ plwikimedia=>"http://pl.wikimedia.org Wikimedia&nbsp;Polska",
 847+ mediawiki=>"http://www.mediawiki.org MediaWiki",
 848+ dewikiversity=>"http://de.wikiversity.org Wikiversit&auml;t",
 849+ frwikiversity=>"http://fr.wikiversity.org Wikiversit&auml;t",
 850+ wikimania2005=>"http://wikimania2005.wikimedia.org Wikimania 2005",
 851+ wikimania2006=>"http://wikimania2006.wikimedia.org Wikimania 2006",
 852+ aa=>"http://aa.wikipedia.org Afar",
 853+ ab=>"http://ab.wikipedia.org Abkhazian",
 854+ af=>"http://af.wikipedia.org Afrikaans",
 855+ ak=>"http://ak.wikipedia.org Akan", # was Akana
 856+ als=>"http://als.wikipedia.org Alemannic", # was Elsatian
 857+ am=>"http://am.wikipedia.org Amharic",
 858+ an=>"http://an.wikipedia.org Aragonese",
 859+ ang=>"http://ang.wikipedia.org Anglo-Saxon",
 860+ ar=>"http://ar.wikipedia.org Arabic",
 861+ arc=>"http://arc.wikipedia.org Aramaic",
 862+ as=>"http://as.wikipedia.org Assamese",
 863+ ast=>"http://ast.wikipedia.org Asturian",
 864+ av=>"http://av.wikipedia.org Avar", # was Avienan
 865+ ay=>"http://ay.wikipedia.org Aymara",
 866+ az=>"http://az.wikipedia.org Azeri", # was Azerbaijani
 867+ ba=>"http://ba.wikipedia.org Bashkir",
 868+ bar=>"http://bar.wikipedia.org Bavarian",
 869+ bat_smg=>"http://bat-smg.wikipedia.org Samogitian",
 870+ "bat-smg"=>"http://bat-smg.wikipedia.org Samogitian",
 871+ bcl=>"http://bcl.wikipedia.org Central Bicolano",
 872+ be=>"http://be.wikipedia.org Belarusian",
 873+ "be-x-old"=>"http://be.wikipedia.org Belarusian (Tarashkevitsa)",
 874+ be_x_old=>"http://be.wikipedia.org Belarusian (Tarashkevitsa)",
 875+ bg=>"http://bg.wikipedia.org Bulgarian",
 876+ bh=>"http://bh.wikipedia.org Bihari",
 877+ bi=>"http://bi.wikipedia.org Bislama",
 878+ bm=>"http://bm.wikipedia.org Bambara",
 879+ bn=>"http://bn.wikipedia.org Bengali",
 880+ bo=>"http://bo.wikipedia.org Tibetan",
 881+ bpy=>"http://bpy.wikipedia.org Bishnupriya Manipuri",
 882+ br=>"http://br.wikipedia.org Breton",
 883+ bs=>"http://bs.wikipedia.org Bosnian",
 884+ bug=>"http://bug.wikipedia.org Buginese",
 885+ bxr=>"http://bxr.wikipedia.org Buryat",
 886+ ca=>"http://ca.wikipedia.org Catalan",
 887+ cbk_zam=>"http://cbk-zam.wikipedia.org Chavacano",
 888+ "cbk-zam"=>"http://cbk-zam.wikipedia.org Chavacano",
 889+ cdo=>"http://cdo.wikipedia.org Min Dong",
 890+ ce=>"http://ce.wikipedia.org Chechen",
 891+ ceb=>"http://ceb.wikipedia.org Cebuano",
 892+ ch=>"http://ch.wikipedia.org Chamorro", # was Chamoru
 893+ cho=>"http://cho.wikipedia.org Choctaw", # was Chotaw
 894+ chr=>"http://chr.wikipedia.org Cherokee",
 895+ chy=>"http://chy.wikipedia.org Cheyenne", # was Sets&ecirc;hest&acirc;hese
 896+ co=>"http://co.wikipedia.org Corsican",
 897+ cr=>"http://cr.wikipedia.org Cree",
 898+ crh=>"http://crh.wikipedia.org Crimean Tatar",
 899+ cs=>"http://cs.wikipedia.org Czech",
 900+ csb=>"http://csb.wikipedia.org Cashubian", # was Kashubian
 901+ cu=>"http://cv.wikipedia.org Old Church Slavonic",
 902+ cv=>"http://cv.wikipedia.org Chuvash", # was Cavas
 903+ cy=>"http://cy.wikipedia.org Welsh",
 904+ da=>"http://da.wikipedia.org Danish",
 905+ de=>"http://de.wikipedia.org German",
 906+ diq=>"http://diq.wikipedia.org Zazaki",
 907+ dk=>"http://dk.wikipedia.org Danish",
 908+ dsb=>"http://dsb.wikipedia.org Lower Sorbian",
 909+ dv=>"http://dv.wikipedia.org Divehi",
 910+ dz=>"http://dz.wikipedia.org Dzongkha",
 911+ ee=>"http://ee.wikipedia.org Ewe",
 912+ el=>"http://el.wikipedia.org Greek",
 913+ eml=>"http://eml.wikipedia.org Emilian-Romagnol",
 914+ en=>"http://en.wikipedia.org English",
 915+ eo=>"http://eo.wikipedia.org Esperanto",
 916+ es=>"http://es.wikipedia.org Spanish",
 917+ et=>"http://et.wikipedia.org Estonian",
 918+ eu=>"http://eu.wikipedia.org Basque",
 919+ ext=>"http://ext.wikipedia.org Extremaduran",
 920+ fa=>"http://fa.wikipedia.org Persian",
 921+ ff=>"http://ff.wikipedia.org Fulfulde",
 922+ fi=>"http://fi.wikipedia.org Finnish",
 923+ "fiu-vro"=>"http://fiu-vro.wikipedia.org Voro",
 924+ fiu_vro=>"http://fiu-vro.wikipedia.org Voro",
 925+ fj=>"http://fj.wikipedia.org Fijian",
 926+ fo=>"http://fo.wikipedia.org Faroese", # was Faeroese
 927+ fr=>"http://fr.wikipedia.org French",
 928+ frp=>"http://frp.wikipedia.org Arpitan",
 929+ fur=>"http://fur.wikipedia.org Friulian",
 930+ fy=>"http://fy.wikipedia.org Frisian",
 931+ ga=>"http://ga.wikipedia.org Irish",
 932+ gan=>"http://gan.wikipedia.org Gan",
 933+ gay=>"http://gay.wikipedia.org Gayo",
 934+ gd=>"http://gd.wikipedia.org Scots Gaelic", # was Scottish Gaelic
 935+ gl=>"http://gl.wikipedia.org Galician", # was Galego
 936+ glk=>"http://glk.wikipedia.org Gilaki",
 937+ gn=>"http://gn.wikipedia.org Guarani",
 938+ got=>"http://got.wikipedia.org Gothic",
 939+ gu=>"http://gu.wikipedia.org Gujarati",
 940+ gv=>"http://gv.wikipedia.org Manx", # was Manx Gaelic
 941+ ha=>"http://ha.wikipedia.org Hausa",
 942+ hak=>"http://hak.wikipedia.org Hakka",
 943+ haw=>"http://haw.wikipedia.org Hawai'ian", # was Hawaiian
 944+ he=>"http://he.wikipedia.org Hebrew",
 945+ hi=>"http://hi.wikipedia.org Hindi",
 946+ hif=>"http://hif.wikipedia.org Fiji Hindi",
 947+ ho=>"http://ho.wikipedia.org Hiri Motu",
 948+ hr=>"http://hr.wikipedia.org Croatian",
 949+ hsb=>"http://hsb.wikipedia.org Upper Sorbian",
 950+ ht=>"http://ht.wikipedia.org Haitian",
 951+ hu=>"http://hu.wikipedia.org Hungarian",
 952+ hy=>"http://hy.wikipedia.org Armenian",
 953+ hz=>"http://hz.wikipedia.org Herero",
 954+ ia=>"http://ia.wikipedia.org Interlingua",
 955+ iba=>"http://iba.wikipedia.org Iban",
 956+ id=>"http://id.wikipedia.org Indonesian",
 957+ ie=>"http://ie.wikipedia.org Interlingue",
 958+ ig=>"http://ig.wikipedia.org Igbo",
 959+ ii=>"http://ii.wikipedia.org Yi",
 960+ ik=>"http://ik.wikipedia.org Inupiak",
 961+ ilo=>"http://ilo.wikipedia.org Ilokano",
 962+ io=>"http://io.wikipedia.org Ido",
 963+ is=>"http://is.wikipedia.org Icelandic",
 964+ it=>"http://it.wikipedia.org Italian",
 965+ iu=>"http://iu.wikipedia.org Inuktitut",
 966+ ja=>"http://ja.wikipedia.org Japanese",
 967+ jbo=>"http://jbo.wikipedia.org Lojban",
 968+ jv=>"http://jv.wikipedia.org Javanese",
 969+ ka=>"http://ka.wikipedia.org Georgian",
 970+ kaa=>"http://kaa.wikipedia.org Karakalpak",
 971+ kab=>"http://ka.wikipedia.org Kabyle",
 972+ kaw=>"http://kaw.wikipedia.org Kawi",
 973+ kg=>"http://kg.wikipedia.org Kongo",
 974+ ki=>"http://ki.wikipedia.org Kikuyu",
 975+ kj=>"http://kj.wikipedia.org Kuanyama", # was Otjiwambo
 976+ kk=>"http://kk.wikipedia.org Kazakh",
 977+ kl=>"http://kl.wikipedia.org Greenlandic",
 978+ km=>"http://km.wikipedia.org Khmer", # was Cambodian
 979+ kn=>"http://kn.wikipedia.org Kannada",
 980+ ko=>"http://ko.wikipedia.org Korean",
 981+ kr=>"http://kr.wikipedia.org Kanuri",
 982+ ks=>"http://ks.wikipedia.org Kashmiri",
 983+ ksh=>"http://ksh.wikipedia.org Ripuarian",
 984+ ku=>"http://ku.wikipedia.org Kurdish",
 985+ kv=>"http://kv.wikipedia.org Komi",
 986+ kw=>"http://kw.wikipedia.org Cornish", # was Kornish
 987+ ky=>"http://ky.wikipedia.org Kirghiz",
 988+ la=>"http://la.wikipedia.org Latin",
 989+ lad=>"http://lad.wikipedia.org Ladino",
 990+ lb=>"http://lb.wikipedia.org Luxembourgish", # was Letzeburgesch
 991+ lbe=>"http://lbe.wikipedia.org Lak",
 992+ lg=>"http://lg.wikipedia.org Ganda",
 993+ li=>"http://li.wikipedia.org Limburgish",
 994+ lij=>"http://lij.wikipedia.org Ligurian",
 995+ lmo=>"http://lmo.wikipedia.org Lombard",
 996+ ln=>"http://ln.wikipedia.org Lingala",
 997+ lo=>"http://lo.wikipedia.org Laotian",
 998+ ls=>"http://ls.wikipedia.org Latino Sine Flexione",
 999+ lt=>"http://lt.wikipedia.org Lithuanian",
 1000+ lv=>"http://lv.wikipedia.org Latvian",
 1001+ mad=>"http://mad.wikipedia.org Madurese",
 1002+ mak=>"http://mak.wikipedia.org Makasar",
 1003+ map_bms=>"http://map-bms.wikipedia.org Banyumasan",
 1004+ "map-bms"=>"http://map-bms.wikipedia.org Banyumasan",
 1005+ mdf=>"http://mdf.wikipedia.org Moksha",
 1006+ mg=>"http://mg.wikipedia.org Malagasy",
 1007+ mh=>"http://mh.wikipedia.org Marshallese",
 1008+ mi=>"http://mi.wikipedia.org Maori",
 1009+ min=>"http://min.wikipedia.org Minangkabau",
 1010+ minnan=>"http://minnan.wikipedia.org Minnan",
 1011+ mk=>"http://mk.wikipedia.org Macedonian",
 1012+ ml=>"http://ml.wikipedia.org Malayalam",
 1013+ mn=>"http://mn.wikipedia.org Mongolian",
 1014+ mo=>"http://mo.wikipedia.org Moldavian",
 1015+ mr=>"http://mr.wikipedia.org Marathi",
 1016+ ms=>"http://ms.wikipedia.org Malay",
 1017+ mt=>"http://mt.wikipedia.org Maltese",
 1018+ mus=>"http://mus.wikipedia.org Muskogee",
 1019+ my=>"http://my.wikipedia.org Burmese",
 1020+ myv=>"http://myv.wikipedia.org Erzya",
 1021+ mzn=>"http://mzn.wikipedia.org Mazandarani",
 1022+ na=>"http://na.wikipedia.org Nauruan", # was Nauru
 1023+ nah=>"http://nah.wikipedia.org Nahuatl",
 1024+ nap=>"http://nap.wikipedia.org Neapolitan",
 1025+ nds=>"http://nds.wikipedia.org Low Saxon",
 1026+ nds_nl=>"http://nds-nl.wikipedia.org Dutch Low Saxon",
 1027+ "nds-nl"=>"http://nds-nl.wikipedia.org Dutch Low Saxon",
 1028+ ne=>"http://ne.wikipedia.org Nepali",
 1029+ new=>"http://new.wikipedia.org Nepal Bhasa",
 1030+ ng=>"http://ng.wikipedia.org Ndonga",
 1031+ nl=>"http://nl.wikipedia.org Dutch",
 1032+ nov=>"http://nov.wikipedia.org Novial",
 1033+ nrm=>"http://nrm.wikipedia.org Norman",
 1034+ nn=>"http://nn.wikipedia.org Nynorsk", # was Neo-Norwegian
 1035+ no=>"http://no.wikipedia.org Norwegian",
 1036+ nv=>"http://nv.wikipedia.org Navajo", # was Avayo
 1037+ ny=>"http://ny.wikipedia.org Chichewa",
 1038+ oc=>"http://oc.wikipedia.org Occitan",
 1039+ om=>"http://om.wikipedia.org Oromo",
 1040+ or=>"http://or.wikipedia.org Oriya",
 1041+ os=>"http://os.wikipedia.org Ossetic",
 1042+ pa=>"http://pa.wikipedia.org Punjabi",
 1043+ pag=>"http://pag.wikipedia.org Pangasinan",
 1044+ pam=>"http://pam.wikipedia.org Kapampangan",
 1045+ pap=>"http://pap.wikipedia.org Papiamentu",
 1046+ pdc=>"http://pdc.wikipedia.org Pennsylvania German",
 1047+ pi=>"http://pi.wikipedia.org Pali",
 1048+ pih=>"http://pih.wikipedia.org Norfolk",
 1049+ pl=>"http://pl.wikipedia.org Polish",
 1050+ pms=>"http://pms.wikipedia.org Piedmontese",
 1051+ ps=>"http://ps.wikipedia.org Pashto",
 1052+ pt=>"http://pt.wikipedia.org Portuguese",
 1053+ qu=>"http://qu.wikipedia.org Quechua",
 1054+ rm=>"http://rm.wikipedia.org Romansh", # was Rhaeto-Romance
 1055+ rmy=>"http://rmy.wikipedia.org Romani",
 1056+ rn=>"http://rn.wikipedia.org Kirundi",
 1057+ ro=>"http://ro.wikipedia.org Romanian",
 1058+ roa_rup=>"http://roa-rup.wikipedia.org Aromanian",
 1059+ "roa-rup"=>"http://roa-rup.wikipedia.org Aromanian",
 1060+ roa_tara=>"http://roa-tara.wikipedia.org Tarantino",
 1061+ "roa-tara"=>"http://roa-tara.wikipedia.org Tarantino",
 1062+ ru=>"http://ru.wikipedia.org Russian",
 1063+ ru_sib=>"http://ru-sib.wikipedia.org Siberian",
 1064+ "ru-sib"=>"http://ru-sib.wikipedia.org Siberian",
 1065+ rw=>"http://rw.wikipedia.org Kinyarwanda",
 1066+ sa=>"http://sa.wikipedia.org Sanskrit",
 1067+ sah=>"http://sah.wikipedia.org Sakha",
 1068+ sc=>"http://sc.wikipedia.org Sardinian",
 1069+ scn=>"http://scn.wikipedia.org Sicilian",
 1070+ sco=>"http://sco.wikipedia.org Scots",
 1071+ sd=>"http://sd.wikipedia.org Sindhi",
 1072+ se=>"http://se.wikipedia.org Northern Sami",
 1073+ sg=>"http://sg.wikipedia.org Sangro",
 1074+ sh=>"http://sh.wikipedia.org Serbo-Croatian",
 1075+ si=>"http://si.wikipedia.org Sinhala", # was Singhalese
 1076+ simple=>"http://simple.wikipedia.org Simple English",
 1077+ sk=>"http://sk.wikipedia.org Slovak",
 1078+ sl=>"http://sl.wikipedia.org Slovene",
 1079+ sm=>"http://sm.wikipedia.org Samoan",
 1080+ sn=>"http://sn.wikipedia.org Shona",
 1081+ so=>"http://so.wikipedia.org Somali", # was Somalian
 1082+ sq=>"http://sq.wikipedia.org Albanian",
 1083+ sr=>"http://sr.wikipedia.org Serbian",
 1084+ srn=>"http://srn.wikipedia.org Sranan",
 1085+ ss=>"http://ss.wikipedia.org Siswati",
 1086+ st=>"http://st.wikipedia.org Sesotho",
 1087+ stq=>"http://stq.wikipedia.org Saterland Frisian",
 1088+ su=>"http://su.wikipedia.org Sundanese",
 1089+ sv=>"http://sv.wikipedia.org Swedish",
 1090+ sw=>"http://sw.wikipedia.org Swahili",
 1091+ szl=>"http://szl.wikipedia.org Silesian",
 1092+ ta=>"http://ta.wikipedia.org Tamil",
 1093+ te=>"http://te.wikipedia.org Telugu",
 1094+ test=>"http://test.wikipedia.org Test",
 1095+ tet=>"http://tet.wikipedia.org Tetum",
 1096+ tg=>"http://tg.wikipedia.org Tajik",
 1097+ th=>"http://th.wikipedia.org Thai",
 1098+ ti=>"http://ti.wikipedia.org Tigrinya",
 1099+ tk=>"http://tk.wikipedia.org Turkmen",
 1100+ tl=>"http://tl.wikipedia.org Tagalog",
 1101+ tlh=>"http://tlh.wikipedia.org Klingon", # was Klignon
 1102+ tn=>"http://tn.wikipedia.org Setswana",
 1103+ to=>"http://to.wikipedia.org Tongan",
 1104+ tokipona=>"http://tokipona.wikipedia.org Tokipona",
 1105+ tpi=>"http://tpi.wikipedia.org Tok Pisin",
 1106+ tr=>"http://tr.wikipedia.org Turkish",
 1107+ ts=>"http://ts.wikipedia.org Tsonga",
 1108+ tt=>"http://tt.wikipedia.org Tatar",
 1109+ tum=>"http://tum.wikipedia.org Tumbuka",
 1110+ turn=>"http://turn.wikipedia.org Turnbuka",
 1111+ tw=>"http://tw.wikipedia.org Twi",
 1112+ ty=>"http://ty.wikipedia.org Tahitian",
 1113+ udm=>"http://udm.wikipedia.org Udmurt",
 1114+ ug=>"http://ug.wikipedia.org Uighur",
 1115+ uk=>"http://uk.wikipedia.org Ukrainian",
 1116+ ur=>"http://ur.wikipedia.org Urdu",
 1117+ uz=>"http://uz.wikipedia.org Uzbek",
 1118+ ve=>"http://ve.wikipedia.org Venda", # was Lushaka
 1119+ vec=>"http://vec.wikipedia.org Venetian",
 1120+ vi=>"http://vi.wikipedia.org Vietnamese",
 1121+ vls=>"http://vls.wikipedia.org West Flemish",
 1122+ vo=>"http://vo.wikipedia.org Volap&uuml;k",
 1123+ wa=>"http://wa.wikipedia.org Walloon",
 1124+ war=>"http://war.wikipedia.org Waray-Waray",
 1125+ wo=>"http://wo.wikipedia.org Wolof",
 1126+ wuu=>"http://wuu.wikipedia.org Wu",
 1127+ xal=>"http://xal.wikipedia.org Kalmyk",
 1128+ xh=>"http://xh.wikipedia.org Xhosa",
 1129+ yi=>"http://yi.wikipedia.org Yiddish",
 1130+ yo=>"http://yo.wikipedia.org Yoruba",
 1131+ za=>"http://za.wikipedia.org Zhuang",
 1132+ zea=>"http://zea.wikipedia.org Zealandic",
 1133+ zh=>"http://zh.wikipedia.org Chinese",
 1134+ zh_min_nan=>"http://zh-min-nan.wikipedia.org Min Nan",
 1135+ "zh-min-nan"=>"http://zh-min-nan.wikipedia.org Min Nan",
 1136+ zh_classical=>"http://zh-classical.wikipedia.org Classical Chinese",
 1137+ "zh-classical"=>"http://zh-classical.wikipedia.org Classical Chinese",
 1138+ zh_yue=>"http://zh-yue.wikipedia.org Cantonese",
 1139+ "zh-yue"=>"http://zh-yue.wikipedia.org Cantonese",
 1140+ zu=>"http://zu.wikipedia.org Zulu",
 1141+ zz=>"&nbsp; All&nbsp;languages",
 1142+ zzz=>"&nbsp; All&nbsp;languages except English"
 1143+ );
 1144+
 1145+ foreach $key (keys %wikipedias)
 1146+ {
 1147+ my $wikipedia = $wikipedias {$key} ;
 1148+ $out_urls {$key} = $wikipedia ;
 1149+ $out_languages {$key} = $wikipedia ;
 1150+ $out_urls {$key} =~ s/(^[^\s]+).*$/$1/ ;
 1151+ $out_languages {$key} =~ s/^[^\s]+\s+(.*)$/$1/ ;
 1152+ $out_article {$key} = "http://en.wikipedia.org/wiki/" . $out_languages {$key} . "_language" ;
 1153+ $out_article {$key} =~ s/ /_/g ;
 1154+ $out_urls {$key} =~ s/(^[^\s]+).*$/$1/ ;
 1155+ }
 1156+}
 1157+
 1158+# copied from WikiReports_EN.pl
 1159+sub InitReportNames
 1160+{
 1161+ @out_report_descriptions = (
 1162+ "Contributors",
 1163+ "New editors",
 1164+ "Active editors",
 1165+ "Very active editors",
 1166+ "Article count (official)",
 1167+ "Article count (alternate)",
 1168+ "New articles per day",
 1169+ "Edits per article",
 1170+ "Bytes per article",
 1171+ "Articles over 0.5 Kb",
 1172+ "Articles over 2 Kb",
 1173+ "Edits per month",
 1174+ "Database size",
 1175+ "Words",
 1176+ "Internal links",
 1177+ "Links to other Wikipedias",
 1178+ "Binaries",
 1179+ "External links",
 1180+ "Redirects",
 1181+ "Page requests per day",
 1182+ "Visits per day",
 1183+ "Overview recent months"
 1184+ ) ;
 1185+}
 1186+
Property changes on: trunk/wikistats/reportcard/ReportCardExtractWikiCountsOutput.pl
___________________________________________________________________
Added: svn:eol-style
11871187 + native
Index: trunk/wikistats/reportcard/ReportCardLinkErrata.pl
@@ -1,92 +1,92 @@
2 -#!/usr/local/bin/perl
3 -
4 - use lib "/home/ezachte/lib" ;
5 - use EzLib ;
6 - $trace_on_exit = $true ;
7 -
8 - &PatchFiles ("W:/@ Report Card/Extended") ;
9 - &PatchFiles ("W:/@ Report Card/Public") ;
10 -
11 - print "\n\nReady\n\n" ;
12 - exit ;
13 -
14 -sub PatchFiles
15 -{
16 - my $dir = shift ;
17 - $prevdir = getcwd ;
18 - print "prevdir $prevdir\n" ;
19 - chdir ($dir) || die "Cannot chdir to $dir\n";
20 - $dir = getcwd ;
21 - print "currdir $dir\n" ;
22 -
23 - print "\nErrata files:\n\n" ;
24 -
25 - local (*DIR);
26 - opendir (DIR, ".");
27 -
28 - my %errata ;
29 - while ($file = readdir (DIR))
30 - {
31 - if ($file eq "." || $file eq "..")
32 - { next ; }
33 -
34 - next if $file !~ /^RC_\d\d\d\d_\d\d_errata\.html$/ ;
35 -
36 - print "File $file\n" ;
37 - $file =~ s/_errata.*$// ;
38 - $errata {$file} ++ ;
39 - }
40 -
41 - closedir(DIR);
42 -
43 - print "\nPatch files:\n\n" ;
44 -
45 - opendir (DIR, ".");
46 - while ($file = readdir (DIR))
47 - {
48 - if ($file eq "." || $file eq "..")
49 - { next ; }
50 -
51 - next if $file !~ /^RC_\d\d\d\d_\d\d_(?:synopsis|columns|detailed|summary)\.html$/ ;
52 -
53 - ($file2 = $file) =~ s/_[a-z]+\.html$// ;
54 - next if $errata {$file2} == 0 ;
55 -
56 - # print "Check file $file\n" ;
57 -
58 - $add_errata = $false ;
59 - open FILE, '<', $file ;
60 - @lines = <FILE> ;
61 - close FILE ;
62 -
63 - foreach $line (@lines)
64 - {
65 - if ($line =~ /RC_\d\d\d\d_\d\d_\w+\.html.*?RC_\d\d\d\d_\d\d_\w+\.html.*?RC_\d\d\d\d_\d\d_\w+\.html/i)
66 - {
67 - if ($line !~ /errata/i)
68 - {
69 - $add_errata = $true ;
70 - # print "\nBefore:$line\n" ;
71 - $line =~ s/<\/small>/&nbsp;&nbsp;&nbsp;&nbsp; &rArr; <a href='${file2}_errata.html'><font color=#A00000>Errata<\/font><\/a><\/small>/ ;
72 - # print "\nAfter:$line\n" ;
73 - last ;
74 - }
75 - }
76 - }
77 -
78 - if ($add_errata)
79 - {
80 - print "Patch file $file\n" ;
81 - open FILE, '>', $file ;
82 - print FILE @lines ;
83 - close FILE ;
84 - }
85 - }
86 - closedir(DIR);
87 -
88 - chdir($prevdir);
89 - $dir = getcwd ;
90 - print "\ncurrdir $dir\n" ;
91 -}
92 -
93 -
 2+#!/usr/local/bin/perl
 3+
 4+ use lib "/home/ezachte/lib" ;
 5+ use EzLib ;
 6+ $trace_on_exit = $true ;
 7+
 8+ &PatchFiles ("W:/@ Report Card/Extended") ;
 9+ &PatchFiles ("W:/@ Report Card/Public") ;
 10+
 11+ print "\n\nReady\n\n" ;
 12+ exit ;
 13+
 14+sub PatchFiles
 15+{
 16+ my $dir = shift ;
 17+ $prevdir = getcwd ;
 18+ print "prevdir $prevdir\n" ;
 19+ chdir ($dir) || die "Cannot chdir to $dir\n";
 20+ $dir = getcwd ;
 21+ print "currdir $dir\n" ;
 22+
 23+ print "\nErrata files:\n\n" ;
 24+
 25+ local (*DIR);
 26+ opendir (DIR, ".");
 27+
 28+ my %errata ;
 29+ while ($file = readdir (DIR))
 30+ {
 31+ if ($file eq "." || $file eq "..")
 32+ { next ; }
 33+
 34+ next if $file !~ /^RC_\d\d\d\d_\d\d_errata\.html$/ ;
 35+
 36+ print "File $file\n" ;
 37+ $file =~ s/_errata.*$// ;
 38+ $errata {$file} ++ ;
 39+ }
 40+
 41+ closedir(DIR);
 42+
 43+ print "\nPatch files:\n\n" ;
 44+
 45+ opendir (DIR, ".");
 46+ while ($file = readdir (DIR))
 47+ {
 48+ if ($file eq "." || $file eq "..")
 49+ { next ; }
 50+
 51+ next if $file !~ /^RC_\d\d\d\d_\d\d_(?:synopsis|columns|detailed|summary)\.html$/ ;
 52+
 53+ ($file2 = $file) =~ s/_[a-z]+\.html$// ;
 54+ next if $errata {$file2} == 0 ;
 55+
 56+ # print "Check file $file\n" ;
 57+
 58+ $add_errata = $false ;
 59+ open FILE, '<', $file ;
 60+ @lines = <FILE> ;
 61+ close FILE ;
 62+
 63+ foreach $line (@lines)
 64+ {
 65+ if ($line =~ /RC_\d\d\d\d_\d\d_\w+\.html.*?RC_\d\d\d\d_\d\d_\w+\.html.*?RC_\d\d\d\d_\d\d_\w+\.html/i)
 66+ {
 67+ if ($line !~ /errata/i)
 68+ {
 69+ $add_errata = $true ;
 70+ # print "\nBefore:$line\n" ;
 71+ $line =~ s/<\/small>/&nbsp;&nbsp;&nbsp;&nbsp; &rArr; <a href='${file2}_errata.html'><font color=#A00000>Errata<\/font><\/a><\/small>/ ;
 72+ # print "\nAfter:$line\n" ;
 73+ last ;
 74+ }
 75+ }
 76+ }
 77+
 78+ if ($add_errata)
 79+ {
 80+ print "Patch file $file\n" ;
 81+ open FILE, '>', $file ;
 82+ print FILE @lines ;
 83+ close FILE ;
 84+ }
 85+ }
 86+ closedir(DIR);
 87+
 88+ chdir($prevdir);
 89+ $dir = getcwd ;
 90+ print "\ncurrdir $dir\n" ;
 91+}
 92+
 93+
Property changes on: trunk/wikistats/reportcard/ReportCardLinkErrata.pl
___________________________________________________________________
Added: svn:eol-style
9494 + native
Index: trunk/wikistats/analytics/AnalyticsPrepBinariesData.pl
@@ -1,124 +1,124 @@
2 -#!/usr/local/bin/perl
3 -
4 - use Getopt::Std ;
5 -
6 - &ParseArguments ;
7 -
8 - print "Write file '$file_csv_out'\n" ;
9 - open CSV_OUT, '>', $file_csv_out ;
10 -
11 - foreach $project (qw (wb wk wn wp wq ws wv wx))
12 - { &ReadStatisticsPerBinariesExtension ($project) ; }
13 -
14 - close CSV_OUT ;
15 -
16 - print "\n\nReady\n\n" ;
17 - exit ;
18 -
19 -sub ParseArguments
20 -{
21 - my @options ;
22 - getopt ("io", \%options) ;
23 -
24 - die ("Specify input folder as: -i path") if (! defined ($options {"i"})) ;
25 - die ("Specify output folder as: -o path'") if (! defined ($options {"o"})) ;
26 -
27 - $path_in = $options {"i"} ;
28 - $path_out = $options {"o"} ;
29 -
30 - die "Input folder '$path_in' does not exist" if (! -d $path_in) ;
31 - die "Output folder '$path_out' does not exist" if (! -d $path_out) ;
32 -
33 - # tests only
34 - # $path_in = "C:/@ Wikimedia/# Out Bayes" ;
35 - # $path_out = "C:/analytics" ; # "w:/@ report card/data" ;
36 -
37 - print "Input folder: $path_in\n" ;
38 - print "Output folder: $path_out\n" ;
39 - print "\n" ;
40 -
41 - $file_csv_out = "$path_out/analytics_in_binaries.csv" ;
42 -}
43 -
44 -
45 -sub ReadStatisticsPerBinariesExtension
46 -{
47 - my $project = shift ;
48 - my $file_csv_in = "$path_in/csv_$project/StatisticsPerBinariesExtension.csv" ;
49 - $yyyymm_hi = -1 ;
50 -
51 - if (! -e $file_csv_in)
52 - { die "Input file '$file_csv_in' not found" ; }
53 -
54 -
55 - print "Read '$file_csv_in'\n" ;
56 - open CSV_IN, '<', $file_csv_in ;
57 -
58 - $language_prev = '' ;
59 - while ($line = <CSV_IN>)
60 - {
61 - chomp $line ;
62 - next if $line !~ /,.*?,/ ;
63 -
64 - ($language,$date,$data) = split (',', $line, 3) ;
65 -
66 - # for each wiki first line shows ext names, no tcounts
67 - if ($date eq "00/0000")
68 - {
69 - if ($language_prev ne '')
70 - { &WriteMonthlyData ($project, $language_prev) ; }
71 - $language_prev = $language ;
72 -
73 - undef %ext_name ;
74 - undef %ext_ndx ;
75 - undef %ext_cnt ;
76 - undef %months ;
77 -
78 - @exts = split (',', $data) ;
79 - $ndx = 0 ;
80 - foreach $ext (@exts)
81 - {
82 - $ext_name {$ndx} = $ext ;
83 - $ext_ndx {$ext} = $ndx ;
84 - $ndx ++ ;
85 - }
86 - next ;
87 - }
88 -
89 - ($month,$year) = split ('\/', $date) ;
90 - $yyyymm = sprintf ("%04d-%02d", $year, $month) ;
91 - if ($yyyymm gt $yyyymm_hi)
92 - { $yyyymm_hi = $yyyymm ; }
93 - $months {$yyyymm}++ ;
94 -
95 - @counts = split (',', $data) ;
96 - $ndx = 0 ;
97 - foreach $count (@counts)
98 - {
99 - $ext_cnt {$yyyymm}{$ext_name {$ndx}} = $count ;
100 - $ndx ++ ;
101 - }
102 - }
103 - &WriteMonthlyData ($project, $language_prev) ;
104 -
105 - close CSV_IN ;
106 -}
107 -
108 -sub WriteMonthlyData
109 -{
110 - my ($project,$language) = @_ ;
111 - # get sorted array of extensions, order by count for most recent month
112 - %ext_cnt_yyyymm_hi = %{$ext_cnt {$yyyymm_hi}} ;
113 - @ext_cnt_yyyymm_hi = (sort {$ext_cnt_yyyymm_hi {$b} <=> $ext_cnt_yyyymm_hi {$a}} keys %ext_cnt_yyyymm_hi) ;
114 -
115 - foreach $month (sort keys %months)
116 - {
117 - $ndx = 0 ;
118 - foreach $ext (@ext_cnt_yyyymm_hi)
119 - {
120 - print CSV_OUT "$project,$language,$month,$ext,${ext_cnt{$yyyymm}{$ext_name {$ndx}}}\n" ;
121 - # print "$month,$ext,${ext_cnt{$yyyymm}{$ext_name {$ndx}}}\n" ;
122 - last if (++ $ndx > 25) ;
123 - }
124 - }
125 -}
 2+#!/usr/local/bin/perl
 3+
 4+ use Getopt::Std ;
 5+
 6+ &ParseArguments ;
 7+
 8+ print "Write file '$file_csv_out'\n" ;
 9+ open CSV_OUT, '>', $file_csv_out ;
 10+
 11+ foreach $project (qw (wb wk wn wp wq ws wv wx))
 12+ { &ReadStatisticsPerBinariesExtension ($project) ; }
 13+
 14+ close CSV_OUT ;
 15+
 16+ print "\n\nReady\n\n" ;
 17+ exit ;
 18+
 19+sub ParseArguments
 20+{
 21+ my @options ;
 22+ getopt ("io", \%options) ;
 23+
 24+ die ("Specify input folder as: -i path") if (! defined ($options {"i"})) ;
 25+ die ("Specify output folder as: -o path'") if (! defined ($options {"o"})) ;
 26+
 27+ $path_in = $options {"i"} ;
 28+ $path_out = $options {"o"} ;
 29+
 30+ die "Input folder '$path_in' does not exist" if (! -d $path_in) ;
 31+ die "Output folder '$path_out' does not exist" if (! -d $path_out) ;
 32+
 33+ # tests only
 34+ # $path_in = "C:/@ Wikimedia/# Out Bayes" ;
 35+ # $path_out = "C:/analytics" ; # "w:/@ report card/data" ;
 36+
 37+ print "Input folder: $path_in\n" ;
 38+ print "Output folder: $path_out\n" ;
 39+ print "\n" ;
 40+
 41+ $file_csv_out = "$path_out/analytics_in_binaries.csv" ;
 42+}
 43+
 44+
 45+sub ReadStatisticsPerBinariesExtension
 46+{
 47+ my $project = shift ;
 48+ my $file_csv_in = "$path_in/csv_$project/StatisticsPerBinariesExtension.csv" ;
 49+ $yyyymm_hi = -1 ;
 50+
 51+ if (! -e $file_csv_in)
 52+ { die "Input file '$file_csv_in' not found" ; }
 53+
 54+
 55+ print "Read '$file_csv_in'\n" ;
 56+ open CSV_IN, '<', $file_csv_in ;
 57+
 58+ $language_prev = '' ;
 59+ while ($line = <CSV_IN>)
 60+ {
 61+ chomp $line ;
 62+ next if $line !~ /,.*?,/ ;
 63+
 64+ ($language,$date,$data) = split (',', $line, 3) ;
 65+
 66+ # for each wiki first line shows ext names, no tcounts
 67+ if ($date eq "00/0000")
 68+ {
 69+ if ($language_prev ne '')
 70+ { &WriteMonthlyData ($project, $language_prev) ; }
 71+ $language_prev = $language ;
 72+
 73+ undef %ext_name ;
 74+ undef %ext_ndx ;
 75+ undef %ext_cnt ;
 76+ undef %months ;
 77+
 78+ @exts = split (',', $data) ;
 79+ $ndx = 0 ;
 80+ foreach $ext (@exts)
 81+ {
 82+ $ext_name {$ndx} = $ext ;
 83+ $ext_ndx {$ext} = $ndx ;
 84+ $ndx ++ ;
 85+ }
 86+ next ;
 87+ }
 88+
 89+ ($month,$year) = split ('\/', $date) ;
 90+ $yyyymm = sprintf ("%04d-%02d", $year, $month) ;
 91+ if ($yyyymm gt $yyyymm_hi)
 92+ { $yyyymm_hi = $yyyymm ; }
 93+ $months {$yyyymm}++ ;
 94+
 95+ @counts = split (',', $data) ;
 96+ $ndx = 0 ;
 97+ foreach $count (@counts)
 98+ {
 99+ $ext_cnt {$yyyymm}{$ext_name {$ndx}} = $count ;
 100+ $ndx ++ ;
 101+ }
 102+ }
 103+ &WriteMonthlyData ($project, $language_prev) ;
 104+
 105+ close CSV_IN ;
 106+}
 107+
 108+sub WriteMonthlyData
 109+{
 110+ my ($project,$language) = @_ ;
 111+ # get sorted array of extensions, order by count for most recent month
 112+ %ext_cnt_yyyymm_hi = %{$ext_cnt {$yyyymm_hi}} ;
 113+ @ext_cnt_yyyymm_hi = (sort {$ext_cnt_yyyymm_hi {$b} <=> $ext_cnt_yyyymm_hi {$a}} keys %ext_cnt_yyyymm_hi) ;
 114+
 115+ foreach $month (sort keys %months)
 116+ {
 117+ $ndx = 0 ;
 118+ foreach $ext (@ext_cnt_yyyymm_hi)
 119+ {
 120+ print CSV_OUT "$project,$language,$month,$ext,${ext_cnt{$yyyymm}{$ext_name {$ndx}}}\n" ;
 121+ # print "$month,$ext,${ext_cnt{$yyyymm}{$ext_name {$ndx}}}\n" ;
 122+ last if (++ $ndx > 25) ;
 123+ }
 124+ }
 125+}
Property changes on: trunk/wikistats/analytics/AnalyticsPrepBinariesData.pl
___________________________________________________________________
Added: svn:eol-style
126126 + native
Property changes on: trunk/wikistats/analytics/AnalyticsPrepLanguageNames.pl
___________________________________________________________________
Added: svn:eol-style
127127 + native
Index: trunk/wikistats/analytics/analytics_refresh_from_csv.txt
@@ -1,55 +1,55 @@
2 -USE `analytics` ;
3 -
4 -TRUNCATE TABLE comscore ;
5 -LOAD DATA LOCAL INFILE 'analytics_in_comscore.csv'
6 - INTO TABLE comscore
7 - FIELDS TERMINATED BY ','
8 - OPTIONALLY ENCLOSED BY '"'
9 - (@date,country_code,region_code,web_property,project_code,reach,visitors)
10 - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
11 -
 2+USE `analytics` ;
 3+
 4+TRUNCATE TABLE comscore ;
 5+LOAD DATA LOCAL INFILE 'analytics_in_comscore.csv'
 6+ INTO TABLE comscore
 7+ FIELDS TERMINATED BY ','
 8+ OPTIONALLY ENCLOSED BY '"'
 9+ (@date,country_code,region_code,web_property,project_code,reach,visitors)
 10+ SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
 11+
1212 TRUNCATE TABLE comscore_regions ;
13 -LOAD DATA LOCAL INFILE 'analytics_in_comscore_regions.csv'
14 - INTO TABLE comscore_regions
15 - FIELDS TERMINATED BY ','
16 - OPTIONALLY ENCLOSED BY '"'
17 - (report_language,region_code,region_name) ;
18 -
 13+LOAD DATA LOCAL INFILE 'analytics_in_comscore_regions.csv'
 14+ INTO TABLE comscore_regions
 15+ FIELDS TERMINATED BY ','
 16+ OPTIONALLY ENCLOSED BY '"'
 17+ (report_language,region_code,region_name) ;
 18+
1919 TRUNCATE TABLE wikistats ;
20 -LOAD DATA LOCAL INFILE 'analytics_in_wikistats.csv'
21 - INTO TABLE wikistats
22 - FIELDS TERMINATED BY ','
23 - OPTIONALLY ENCLOSED BY '"'
24 - (project_code,language_code,@date,editors_all_time,editors_new,editors_ge_5,editors_ge_25,editors_ge_100,articles,articles_new_per_day,articles_over_bytes_500,articles_over_bytes_2000,edits_per_article,bytes_per_article,edits,size_in_bytes,size_in_words,links_internal,links_interwiki,links_image,links_external,redirects)
25 - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
26 -
 20+LOAD DATA LOCAL INFILE 'analytics_in_wikistats.csv'
 21+ INTO TABLE wikistats
 22+ FIELDS TERMINATED BY ','
 23+ OPTIONALLY ENCLOSED BY '"'
 24+ (project_code,language_code,@date,editors_all_time,editors_new,editors_ge_5,editors_ge_25,editors_ge_100,articles,articles_new_per_day,articles_over_bytes_500,articles_over_bytes_2000,edits_per_article,bytes_per_article,edits,size_in_bytes,size_in_words,links_internal,links_interwiki,links_image,links_external,redirects)
 25+ SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
 26+
2727 TRUNCATE TABLE page_views ;
28 -LOAD DATA LOCAL INFILE 'analytics_in_page_views.csv'
29 - INTO TABLE page_views
30 - FIELDS TERMINATED BY ','
31 - OPTIONALLY ENCLOSED BY '"'
32 - (project_code,language_code,@date,views_non_mobile_raw,views_mobile_raw,views_non_mobile_normalized,views_mobile_normalized,views_raw,views_normalized)
33 - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
34 -
 28+LOAD DATA LOCAL INFILE 'analytics_in_page_views.csv'
 29+ INTO TABLE page_views
 30+ FIELDS TERMINATED BY ','
 31+ OPTIONALLY ENCLOSED BY '"'
 32+ (project_code,language_code,@date,views_non_mobile_raw,views_mobile_raw,views_non_mobile_normalized,views_mobile_normalized,views_raw,views_normalized)
 33+ SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
 34+
3535 TRUNCATE TABLE language_names ;
36 -LOAD DATA LOCAL INFILE 'analytics_in_language_names.csv'
37 - INTO TABLE language_names
38 - FIELDS TERMINATED BY ','
39 - OPTIONALLY ENCLOSED BY '"'
40 - (report_language,language_code,language_name) ;
41 -
 36+LOAD DATA LOCAL INFILE 'analytics_in_language_names.csv'
 37+ INTO TABLE language_names
 38+ FIELDS TERMINATED BY ','
 39+ OPTIONALLY ENCLOSED BY '"'
 40+ (report_language,language_code,language_name) ;
 41+
4242 TRUNCATE TABLE binaries ;
43 -LOAD DATA LOCAL INFILE 'analytics_in_binaries.csv'
44 - INTO TABLE binaries
45 - FIELDS TERMINATED BY ','
46 - OPTIONALLY ENCLOSED BY '"'
47 - (project_code,language_code,@date,extension,binaries)
48 - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
49 -
 43+LOAD DATA LOCAL INFILE 'analytics_in_binaries.csv'
 44+ INTO TABLE binaries
 45+ FIELDS TERMINATED BY ','
 46+ OPTIONALLY ENCLOSED BY '"'
 47+ (project_code,language_code,@date,extension,binaries)
 48+ SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
 49+
5050 TRUNCATE TABLE offline ;
51 -LOAD DATA LOCAL INFILE 'analytics_in_offline.csv'
52 - INTO TABLE offline
53 - FIELDS TERMINATED BY ','
54 - OPTIONALLY ENCLOSED BY '"'
55 - (@date, readers)
56 - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
 51+LOAD DATA LOCAL INFILE 'analytics_in_offline.csv'
 52+ INTO TABLE offline
 53+ FIELDS TERMINATED BY ','
 54+ OPTIONALLY ENCLOSED BY '"'
 55+ (@date, readers)
 56+ SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
Property changes on: trunk/wikistats/analytics/analytics_refresh_from_csv.txt
___________________________________________________________________
Added: svn:eol-style
5757 + native
Property changes on: trunk/wikistats/analytics/AnalyticsPrepPageViews.pl
___________________________________________________________________
Added: svn:eol-style
5858 + native
Index: trunk/wikistats/analytics/analytics_create_and_load_from_csv.txt
@@ -1,179 +1,179 @@
2 -
3 -
4 -
5 -
6 -
7 -DROP DATABASE IF EXISTS `analytics` ;
8 -
9 -CREATE DATABASE `analytics` ;
10 -
11 -USE `analytics` ;
12 -
13 -CREATE TABLE `comscore` (
14 - `date` date NOT NULL,
15 - `country_code` varchar (3),
16 - `region_code` varchar (3),
17 - `web_property` varchar (20),
18 - `project_code` varchar (10),
19 - `reach` decimal (4,1) DEFAULT NULL,
20 - `visitors` decimal (15) DEFAULT NULL,
21 - PRIMARY KEY (date,country_code,region_code,project_code,web_property),
22 - KEY (`country_code`)
23 -) ;
24 -
25 -CREATE TABLE `comscore_regions` (
26 - `region_code` varchar (2),
27 - `report_language` varchar (10),
28 - `region_name` varchar (18),
29 - PRIMARY KEY (report_language,region_code)
30 -) ;
31 -
32 -CREATE TABLE `wikistats` (
33 - `date` date NOT NULL,
34 - `project_code` varchar (10),
35 - `language_code` varchar (15),
36 - `editors_all_time` int (10) DEFAULT NULL,
37 - `editors_new` int (7) DEFAULT NULL,
38 - `editors_ge_5` int (7) DEFAULT NULL,
39 - `editors_ge_25` int (7) DEFAULT NULL,
40 - `editors_ge_100` int (7) DEFAULT NULL,
41 - `articles` int (12) DEFAULT NULL,
42 - `articles_new_per_day` int (9) DEFAULT NULL,
43 - `articles_over_bytes_500` int (12) DEFAULT NULL,
44 - `articles_over_bytes_2000` int (12) DEFAULT NULL,
45 - `edits_per_article` decimal (9,1) DEFAULT NULL,
46 - `bytes_per_article` decimal (9,1) DEFAULT NULL,
47 - `edits` int (12) DEFAULT NULL,
48 - `size_in_bytes` int (15) DEFAULT NULL,
49 - `size_in_words` int (15) DEFAULT NULL,
50 - `links_internal` int (15) DEFAULT NULL,
51 - `links_interwiki` int (15) DEFAULT NULL,
52 - `links_image` int (15) DEFAULT NULL,
53 - `links_external` int (15) DEFAULT NULL,
54 - `redirects` int (15) DEFAULT NULL,
55 - PRIMARY KEY (date,project_code,language_code)
56 -) ;
57 -
58 -CREATE TABLE `page_views` (
59 - `date` date NOT NULL,
60 - `project_code` char (2),
61 - `language_code` char (15),
62 - `views_non_mobile_raw` bigint (15),
63 - `views_mobile_raw` bigint (15),
64 - `views_non_mobile_normalized` bigint (15),
65 - `views_mobile_normalized` bigint (15),
66 - `views_raw` bigint (15),
67 - `views_normalized` bigint (15),
68 - PRIMARY KEY (date,project_code,language_code)
69 -) ;
70 -
71 -CREATE TABLE `language_names` (
72 - `report_language` varchar (15),
73 - `language_code` varchar (15),
74 - `language_name` varchar (50),
75 - PRIMARY KEY (report_language,language_code)
76 -) ;
77 -
78 -CREATE TABLE `binaries` (
79 - `date` date NOT NULL,
80 - `project_code` char (2),
81 - `language_code` char (15),
82 - `extension` varchar (10),
83 - `binaries` bigint (15),
84 - PRIMARY KEY (date,project_code,language_code,extension)
85 -) ;
86 -
87 -CREATE TABLE `offline` (
88 - `date` date NOT NULL,
89 - `readers` bigint (12),
90 - PRIMARY KEY (date,readers)
91 -) ;
92 -
93 -
94 -
95 -LOAD DATA LOCAL INFILE 'analytics_in_comscore.csv'
96 - INTO TABLE comscore
97 - FIELDS TERMINATED BY ','
98 - OPTIONALLY ENCLOSED BY '"'
99 - (@date,country_code,region_code,web_property,project_code,reach,visitors)
100 - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
101 -
102 -LOAD DATA LOCAL INFILE 'analytics_in_comscore_regions.csv'
103 - INTO TABLE comscore_regions
104 - FIELDS TERMINATED BY ','
105 - OPTIONALLY ENCLOSED BY '"'
106 - (report_language,region_code,region_name) ;
107 -
108 -LOAD DATA LOCAL INFILE 'analytics_in_wikistats.csv'
109 - INTO TABLE wikistats
110 - FIELDS TERMINATED BY ','
111 - OPTIONALLY ENCLOSED BY '"'
112 - (project_code,language_code,@date,editors_all_time,editors_new,editors_ge_5,editors_ge_25,editors_ge_100,articles,articles_new_per_day,articles_over_bytes_500,articles_over_bytes_2000,edits_per_article,bytes_per_article,edits,size_in_bytes,size_in_words,links_internal,links_interwiki,links_image,links_external,redirects)
113 - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
114 -
115 -LOAD DATA LOCAL INFILE 'analytics_in_page_views.csv'
116 - INTO TABLE page_views
117 - FIELDS TERMINATED BY ','
118 - OPTIONALLY ENCLOSED BY '"'
119 - (project_code,language_code,@date,views_non_mobile_raw,views_mobile_raw,views_non_mobile_normalized,views_mobile_normalized,views_raw,views_normalized)
120 - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
121 -
122 -
123 -LOAD DATA LOCAL INFILE 'analytics_in_language_names.csv'
124 - INTO TABLE language_names
125 - FIELDS TERMINATED BY ','
126 - OPTIONALLY ENCLOSED BY '"'
127 - (report_language,language_code,language_name) ;
128 -
129 -LOAD DATA LOCAL INFILE 'analytics_in_binaries.csv'
130 - INTO TABLE binaries
131 - FIELDS TERMINATED BY ','
132 - OPTIONALLY ENCLOSED BY '"'
133 - (project_code,language_code,@date,extension,binaries)
134 - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
135 -
136 -LOAD DATA LOCAL INFILE 'analytics_in_offline.csv'
137 - INTO TABLE offline
138 - FIELDS TERMINATED BY ','
139 - OPTIONALLY ENCLOSED BY '"'
140 - (@date,readers)
141 - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
142 -
143 -
144 - SELECT * FROM offline ;
145 -
146 -
 2+-- make sure to delete output files *test*.csv first if any exist (MySQL on purpose forbids overwrite)
 3+
 4+-- tables implemented:
 5+-- comscore
 6+-- comscore_regions
 7+-- wikistats
 8+-- page_views
 9+-- language names
 10+-- binaries
 11+
 12+-- more tables planned (O= optional, not needed for report card stage)
 13+-- project_names
 14+-- O edits per project_code, per language, per month, per normalization type (Y/N), editor type (manual, anonymous, bot), namespace group (articles, talk pages, other)
 15+-- O editors per project_code, per language, per month, per normalization type (Y/N), editor type (manual, anonymous, bot), namespace group (articles, talk pages, other)
 16+
 17+
 18+-- open issues:
 19+-- only store basic data in database and calculate all aggregates on the fly or do some aggragation before hand ? (e.g. count for mobile / non-mobile / ==> total of both ? <==)
 20+-- for binaries, store one extension type per row? (future proof, more work to query), or a selected few as columns? (one row only needed per month)
 21+
 22+-- Create database and two tables from scratch
 23+DROP DATABASE IF EXISTS `analytics` ;
 24+
 25+CREATE DATABASE `analytics` ;
 26+
 27+USE `analytics` ;
 28+
 29+CREATE TABLE `comscore` (
 30+ `date` date NOT NULL,
 31+ `country_code` varchar (3),
 32+ `region_code` varchar (3),
 33+ `web_property` varchar (20),
 34+ `project_code` varchar (10),
 35+ `reach` decimal (4,1) DEFAULT NULL,
 36+ `visitors` decimal (15) DEFAULT NULL,
 37+ PRIMARY KEY (date,country_code,region_code,project_code,web_property),
 38+ KEY (`country_code`)
 39+) ;
 40+
 41+CREATE TABLE `comscore_regions` (
 42+ `region_code` varchar (2),
 43+ `report_language` varchar (10),
 44+ `region_name` varchar (18),
 45+ PRIMARY KEY (report_language,region_code)
 46+) ;
 47+
 48+CREATE TABLE `wikistats` (
 49+ `date` date NOT NULL,
 50+ `project_code` varchar (10),
 51+ `language_code` varchar (15),
 52+ `editors_all_time` int (10) DEFAULT NULL,
 53+ `editors_new` int (7) DEFAULT NULL,
 54+ `editors_ge_5` int (7) DEFAULT NULL,
 55+ `editors_ge_25` int (7) DEFAULT NULL,
 56+ `editors_ge_100` int (7) DEFAULT NULL,
 57+ `articles` int (12) DEFAULT NULL,
 58+ `articles_new_per_day` int (9) DEFAULT NULL,
 59+ `articles_over_bytes_500` int (12) DEFAULT NULL,
 60+ `articles_over_bytes_2000` int (12) DEFAULT NULL,
 61+ `edits_per_article` decimal (9,1) DEFAULT NULL,
 62+ `bytes_per_article` decimal (9,1) DEFAULT NULL,
 63+ `edits` int (12) DEFAULT NULL,
 64+ `size_in_bytes` int (15) DEFAULT NULL,
 65+ `size_in_words` int (15) DEFAULT NULL,
 66+ `links_internal` int (15) DEFAULT NULL,
 67+ `links_interwiki` int (15) DEFAULT NULL,
 68+ `links_image` int (15) DEFAULT NULL,
 69+ `links_external` int (15) DEFAULT NULL,
 70+ `redirects` int (15) DEFAULT NULL,
 71+ PRIMARY KEY (date,project_code,language_code)
 72+) ;
 73+
 74+CREATE TABLE `page_views` (
 75+ `date` date NOT NULL,
 76+ `project_code` char (2),
 77+ `language_code` char (15),
 78+ `views_non_mobile_raw` bigint (15),
 79+ `views_mobile_raw` bigint (15),
 80+ `views_non_mobile_normalized` bigint (15),
 81+ `views_mobile_normalized` bigint (15),
 82+ `views_raw` bigint (15),
 83+ `views_normalized` bigint (15),
 84+ PRIMARY KEY (date,project_code,language_code)
 85+) ;
 86+
 87+CREATE TABLE `language_names` (
 88+ `report_language` varchar (15),
 89+ `language_code` varchar (15),
 90+ `language_name` varchar (50),
 91+ PRIMARY KEY (report_language,language_code)
 92+) ;
 93+
 94+CREATE TABLE `binaries` (
 95+ `date` date NOT NULL,
 96+ `project_code` char (2),
 97+ `language_code` char (15),
 98+ `extension` varchar (10),
 99+ `binaries` bigint (15),
 100+ PRIMARY KEY (date,project_code,language_code,extension)
 101+) ;
 102+
 103+CREATE TABLE `offline` (
 104+ `date` date NOT NULL,
 105+ `readers` bigint (12),
 106+ PRIMARY KEY (date,readers)
 107+) ;
 108+
 109+-- SHOW TABLES ;
 110+-- DESCRIBE comscore ;
 111+-- DESCRIBE comscore_regions ;
 112+-- DESCRIBE wikistats ;
 113+-- DESCRIBE page_views ;
 114+-- DESCRIBE language_names ;
 115+-- DESCRIBE binaries ;
 116+-- DESCRIBE offline ;
 117+
 118+-- Database Manipulation
 119+-- Obviously in real world this is a separate script
 120+
 121+LOAD DATA LOCAL INFILE 'analytics_in_comscore.csv'
 122+ INTO TABLE comscore
 123+ FIELDS TERMINATED BY ','
 124+ OPTIONALLY ENCLOSED BY '"'
 125+ (@date,country_code,region_code,web_property,project_code,reach,visitors)
 126+ SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
 127+
 128+LOAD DATA LOCAL INFILE 'analytics_in_comscore_regions.csv'
 129+ INTO TABLE comscore_regions
 130+ FIELDS TERMINATED BY ','
 131+ OPTIONALLY ENCLOSED BY '"'
 132+ (report_language,region_code,region_name) ;
 133+
 134+LOAD DATA LOCAL INFILE 'analytics_in_wikistats.csv'
 135+ INTO TABLE wikistats
 136+ FIELDS TERMINATED BY ','
 137+ OPTIONALLY ENCLOSED BY '"'
 138+ (project_code,language_code,@date,editors_all_time,editors_new,editors_ge_5,editors_ge_25,editors_ge_100,articles,articles_new_per_day,articles_over_bytes_500,articles_over_bytes_2000,edits_per_article,bytes_per_article,edits,size_in_bytes,size_in_words,links_internal,links_interwiki,links_image,links_external,redirects)
 139+ SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
 140+
 141+LOAD DATA LOCAL INFILE 'analytics_in_page_views.csv'
 142+ INTO TABLE page_views
 143+ FIELDS TERMINATED BY ','
 144+ OPTIONALLY ENCLOSED BY '"'
 145+ (project_code,language_code,@date,views_non_mobile_raw,views_mobile_raw,views_non_mobile_normalized,views_mobile_normalized,views_raw,views_normalized)
 146+ SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
 147+
 148+
 149+LOAD DATA LOCAL INFILE 'analytics_in_language_names.csv'
 150+ INTO TABLE language_names
 151+ FIELDS TERMINATED BY ','
 152+ OPTIONALLY ENCLOSED BY '"'
 153+ (report_language,language_code,language_name) ;
 154+
 155+LOAD DATA LOCAL INFILE 'analytics_in_binaries.csv'
 156+ INTO TABLE binaries
 157+ FIELDS TERMINATED BY ','
 158+ OPTIONALLY ENCLOSED BY '"'
 159+ (project_code,language_code,@date,extension,binaries)
 160+ SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
 161+
 162+LOAD DATA LOCAL INFILE 'analytics_in_offline.csv'
 163+ INTO TABLE offline
 164+ FIELDS TERMINATED BY ','
 165+ OPTIONALLY ENCLOSED BY '"'
 166+ (@date,readers)
 167+ SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
 168+
 169+
 170+-- show contents (debugging only)
 171+-- SELECT * FROM comscore ;
 172+-- SELECT * FROM comscore_regions ;
 173+-- SELECT * FROM wikistats ;
 174+-- SELECT * FROM page_views ;
 175+-- SELECT * FROM language_names ;
 176+-- SELECT * FROM binaries
 177+-- WHERE project_code = 'commons' ;
 178+ SELECT * FROM offline ;
 179+
 180+
Property changes on: trunk/wikistats/analytics/analytics_create_and_load_from_csv.txt
___________________________________________________________________
Added: svn:eol-style
147181 + native
Property changes on: trunk/wikistats/analytics/analytics_generate_csv_files.sh
___________________________________________________________________
Added: svn:eol-style
148182 + native
Property changes on: trunk/wikistats/analytics/analytics_upd.sh
___________________________________________________________________
Added: svn:eol-style
149183 + native
Index: trunk/wikistats/analytics/AnalyticsPrepWikiCountsOutput.pl
@@ -1,331 +1,331 @@
2 -#!/usr/local/bin/perl
3 -
4 -# Copyright (C) 2011 Wikimedia Foundation
5 -# This program is free software; you can redistribute it and/or
6 -# modify it under the terms of the GNU General Public License version 2
7 -# as published by the Free Software Foundation.
8 -# This program is distributed in the hope that it will be useful,
9 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
10 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 -# See the GNU General Public License for more details, at
12 -# http://www.fsf.org/licenses/gpl.html
13 -
14 -# Author:
15 -# Erik Zachte, email ezachte@wikimedia.org
16 -# loosely based on predecessor
17 -# http://svn.wikimedia.org/viewvc/mediawiki/trunk/wikistats/reportcard/ReportCardExtractWikiCountsOutput.pl
18 -
19 -# Functionality:
20 -# tba
21 -
22 -# Parameters:
23 -# tba
24 -
25 -# Output:
26 -# updated csv file for import in MySQL
27 -
28 -# http://svn.wikimedia.org/viewvc/mediawiki/trunk/wikistats/analytics/
29 -
30 - use Getopt::Std ;
31 -
32 - $true = 1 ;
33 - $false = 0 ;
34 -
35 - @projects = ('wb','wk','wn','wp','wq','ws','wv','wx','commons','*') ;
36 -
37 - $file_csv_monthly_data = "StatisticsMonthly.csv" ;
38 - $file_csv_user_activity_spread = "StatisticsUserActivitySpread.csv" ;
39 - $file_csv_analytics_in = "analytics_in_wikistats.csv" ;
40 -
41 - &ParseArguments ;
42 - &ReadStatisticsMonthly ;
43 - &FindLargestWikis ;
44 - &WriteMonthlyData ;
45 -
46 - print "\nReady\n\n" ;
47 - exit ;
48 -
49 -sub ParseArguments
50 -{
51 - my (@options, $arguments) ;
52 -
53 - getopt ("io", \%options) ;
54 -
55 - foreach $arg (sort keys %options)
56 - { $arguments .= " -$arg " . $options {$arg} . "\n" ; }
57 - print ("\nArguments\n$arguments\n") ;
58 -
59 - if (! -d '/mnt/') # EZ test
60 - {
61 - $path_in = "c:/\@ wikimedia/# out bayes" ;
62 - $path_out = "c:/MySQL/analytics" ;
63 - }
64 - else
65 - {
66 - die ("Specify input folder for projectcounts files as: -i path") if (! defined ($options {"i"})) ;
67 - die ("Specify output folder as: -o path'") if (! defined ($options {"o"})) ;
68 -
69 - $path_in = $options {"i"} ;
70 - $path_out = $options {"o"} ;
71 - }
72 -
73 - die "Input folder '$path_in' does not exist" if (! -d $path_in) ;
74 - die "Output folder '$path_out' does not exist" if (! -d $path_out) ;
75 -
76 - print "Input folder: $path_in\n" ;
77 - print "Output folder: $path_out\n\n" ;
78 -
79 - $file_csv_out = "$path_out/analytics_in_wikistats.csv" ;
80 -}
81 -
82 -sub ReadStatisticsMonthly
83 -{
84 - &ReadStatisticsMonthlyForProject ("wb") ;
85 - &ReadStatisticsMonthlyForProject ("wk") ;
86 - &ReadStatisticsMonthlyForProject ("wn") ;
87 - &ReadStatisticsMonthlyForProject ("wp") ;
88 - &ReadStatisticsMonthlyForProject ("wq") ;
89 - &ReadStatisticsMonthlyForProject ("ws") ;
90 - &ReadStatisticsMonthlyForProject ("wv") ;
91 - &ReadStatisticsMonthlyForProject ("wx") ;
92 -
93 -# &ReadStatisticsPerBinariesExtensionCommons ;
94 -}
95 -
96 -sub ReadStatisticsMonthlyForProject
97 -{
98 - my $project = shift;
99 - $all_projects = "*" ;
100 -
101 - my $file_csv_in_1 = "$path_in/csv_$project/$file_csv_monthly_data" ;
102 - my $file_csv_in_2 = "$path_in/csv_$project/$file_csv_user_activity_spread" ;
103 -
104 - if (! -e $file_csv_in_1)
105 - { &Abort ("Input file '$file_csv_in_1' not found") ; }
106 - if (! -e $file_csv_in_2)
107 - { &Abort ("Input file '$file_csv_in_2' not found") ; }
108 -
109 - my $yyyymm ;
110 -
111 - print "Read '$file_csv_in_1'\n" ;
112 - open CSV_IN, '<', $file_csv_in_1 ;
113 - while ($line = <CSV_IN>)
114 - {
115 - chomp $line ;
116 - ($language,$date,$counts) = split (',', $line, 3) ;
117 - @fields = split (',', $counts) ;
118 -
119 - next if ! &AcceptWiki ($project,$language) ;
120 -
121 - ($month,$day,$year) = split ('\/', $date) ;
122 - $yyyymm = sprintf ("%04d-%02d", $year, $month) ;
123 -
124 - foreach $field (@fields)
125 - {
126 - if ($field eq '-')
127 - { $field = 0 ; }
128 - }
129 -
130 - $data = $fields [0] . ',' . # contributors all time
131 - $fields [1] . ',' . # new contributors
132 - 'data2,' . # place holder for more data, to be inserted later
133 - $fields [4] . ',' . # articles
134 - $fields [6] . ',' . # articles new per day
135 - $fields [9] . ',' . # larger than 0.5 kB
136 - $fields [10] . ',' . # larger than 2.0 kB
137 - $fields [7] . ',' . # mean edits per article
138 - $fields [8] . ',' . # mean bytes per article
139 - $fields [11] . ',' . # edits
140 - $fields [12] . ',' . # size in bytes
141 - $fields [13] . ',' . # size in words
142 - $fields [14] . ',' . # links internal
143 - $fields [15] . ',' . # links interwiki
144 - $fields [16] . ',' . # links images
145 - $fields [17] . ',' . # links external
146 - $fields [18] ; # redirects
147 -
148 - $data1 {"$project,$language,$yyyymm"} = $data ;
149 - }
150 - close CSV_IN ;
151 -
152 - # now read (very) active editors from newer more accurate file (split data for reg users and bots, unlike StatisticsMonthly.csv)
153 -
154 - print "Read '$file_csv_in_2'\n" ;
155 - open CSV_IN, '<', $file_csv_in_2 ;
156 - while ($line = <CSV_IN>)
157 - {
158 - chomp $line ;
159 - ($language,$date,$reguser_bot,$group,@counts) = split (',', $line) ;
160 -
161 - next if ! &AcceptWiki ($project,$language) ;
162 -
163 - if ($reguser_bot ne "R") { next ; } # R: reg user, B: bot
164 - if ($group ne "A") { next ; } # A: articles, T: talk pages, O: other namespaces
165 -
166 - ($month,$day,$year) = split ('\/', $date) ;
167 - $yyyymm = sprintf ("%04d-%02d", $year, $month) ;
168 - $months {$yyyymm} ++ ;
169 -# print "YYYYMM $yyyymm\n" ;
170 -
171 - # data have been collected in WikiCountsProcess.pm and been written in WikiCountsOutput.pm
172 - # count user with over x edits
173 - # threshold starting with a 3 are 10xSQRT(10), 100xSQRT(10), 1000xSQRT(10), etc
174 - # @thresholds = (1,3,5,10,25,32,50,100,250,316,500,1000,2500,3162,5000,10000,25000,31623,50000,100000,250000,316228,500000,1000000,2500000,3162278,500000,10000000,25000000,31622777,5000000,100000000) ;
175 - $edits_ge_5 = @counts [2] > 0 ? @counts [2] : 0 ;
176 - $edits_ge_25 = @counts [4] > 0 ? @counts [4] : 0 ;
177 - $edits_ge_100 = @counts [7] > 0 ? @counts [7] : 0 ;
178 - $data2 {"$project,$language,$yyyymm"} = "$edits_ge_5,$edits_ge_25,$edits_ge_100" ;
179 -
180 - $total_edits_ge_5 {"$project,$language"} += $edits_ge_5 ;
181 - $total_edits_ge_25 {"$project,$language"} += $edits_ge_25 ;
182 - $total_edits_ge_100 {"$project,$language"} += $edits_ge_100 ;
183 -
184 - # prep string with right amount of comma's
185 - if ($data2_default eq '')
186 - {
187 - $data2_default = $data2 {"$project,$language,$yyyymm"} ;
188 - $data2_default =~ s/[^,]+/0/g ;
189 - }
190 - }
191 - close CSV_IN ;
192 -}
193 -
194 -#sub ReadStatisticsPerBinariesExtensionCommons
195 -#{
196 -# my $file_csv_in = "$path_in/csv_wx/StatisticsPerBinariesExtension.csv" ;
197 -# my $mmax = -1 ;
198 -
199 -# if (! -e $file_csv_in)
200 -# { &Abort ("Input file '$file_csv_in' not found") ; }
201 -
202 -# print "Read '$file_csv_in'\n" ;
203 -# open CSV_IN, '<', $file_csv_in ;
204 -# while ($line = <CSV_IN>)
205 -# {
206 -# chomp $line ;
207 -# ($language,$date,$counts) = split (',', $line, 3) ;
208 -
209 -# if ($language ne "commons") { next ; }
210 -
211 -# if ($date eq "00/0000")
212 -# {
213 -# @fields = split (',', $counts) ;
214 -# $field_ndx = 0 ;
215 -# foreach $field (@fields)
216 -# {
217 -# $ext_cnt {-1}{$field_ndx} = $field ;
218 -# # print "EXT_CNT $field_ndx : $field\n" ;
219 -# $field_ndx ++ ;
220 -# }
221 -# next ;
222 -# }
223 -
224 -# ($month,$year) = split ('\/', $date) ;
225 -# my $m = &months_since_2000_01 ($year,$month) ;
226 -# next if $m < $m_start ;
227 -
228 -# if ($m > $mmax)
229 -# { $mmax = $m ; }
230 -
231 -# @fields = split (',', $counts) ;
232 -# $field_ndx = 0 ;
233 -# foreach $field (@fields)
234 -# {
235 -# $ext_cnt {$m}{$field_ndx} = $field ;
236 -# $ext_tot {$m} += $field ;
237 -# $field_ndx ++ ;
238 -# }
239 -# }
240 -# close CSV_IN ;
241 -
242 -# %ext_cnt_mmax = %{$ext_cnt {$mmax}} ;
243 -# @ext_cnt_mmax = (sort {$ext_cnt_mmax {$b} <=> $ext_cnt_mmax {$a}} keys %ext_cnt_mmax) ;
244 -
245 -# $extcnt = 0 ;
246 -# foreach $extndx (@ext_cnt_mmax)
247 -# {
248 -# # print "$extndx < ${ext_cnt {-1}{$extndx}} > : ${ext_cnt_mmax {$extndx}}\n" ;
249 -# push @extndxs, $extndx ;
250 -# if ($extcnt++ >= 9) { last ; }
251 -# }
252 -#}
253 -
254 -sub FindLargestWikis
255 -{
256 - print "Largest projects (most accumulated very active editors):\n";
257 - @total_edits_ge_100 = sort {$total_edits_ge_100 {$b} <=> $total_edits_ge_100 {$a}} keys %total_edits_ge_100 ;
258 - $rank = 0 ;
259 - foreach $project_language (@total_edits_ge_100)
260 - {
261 - $largest_projects {$project_language} = $rank++ ;
262 - print "$project_language," ;
263 - last if $rank > 10 ;
264 - }
265 - print "\n\n" ;
266 -
267 - foreach $yyyymm (sort keys %months)
268 - {
269 - next if $yyyymm lt '2011' ;
270 - foreach $project_language (keys %largest_projects)
271 - {
272 - ($project,$language) = split (',', $project_language) ;
273 - if ($data2 {"$project,$language,$yyyymm"} eq '')
274 - {
275 - print "No data yet for large wiki $project_language for $yyyymm-> skip month $yyyymm\n" ;
276 - $months {$yyyymm} = 0 ;
277 - }
278 - }
279 - }
280 - exit ;
281 -}
282 -
283 -sub WriteMonthlyData
284 -{
285 - my $file_csv_out = "$path_out/$file_csv_analytics_in" ;
286 - open CSV_OUT, '>', $file_csv_out ;
287 - foreach $project_wiki_month (sort keys %data1)
288 - {
289 - ($project,$wiki,$yyyymm) = split (',', $project_wiki_month) ;
290 -
291 - # recent month misses on eor more large wikis?
292 - next if $months {$yyyymm} == 0 ;
293 -
294 - $data1 = $data1 {$project_wiki_month} ;
295 - $data2 = $data2 {$project_wiki_month} ;
296 - if ($data2 eq '')
297 - {
298 - print "Editor data missing for $project_wiki_month\n" ;
299 - $data2 = $data2_default ;
300 - }
301 - $data1 =~ s/data2/$data2/ ; # insert rather than append to have all editor fields close together
302 - print CSV_OUT "$project_wiki_month,$data1\n" ;
303 - }
304 - $total_edits_ge_5 {"$project,*,$yyyymm"} += $edits_ge_5 ;
305 - $total_edits_ge_25 {"$project,*,$yyyymm"} += $edits_ge_25 ;
306 - $total_edits_ge_100 {"$project,*,$yyyymm"} += $edits_ge_100 ;
307 - close CSV_OUT ;
308 -}
309 -
310 -sub AcceptWiki
311 -{
312 - my ($project,$language) = @_ ;
313 -
314 - return $false if $language eq 'commons' and $project ne 'wx' ; # commons also in wikipedia csv files (bug, hard to cleanup, just skip)
315 - return $false if $language eq 'sr' and $project eq 'wn' ; # ignore insane bot spam on
316 - return $false if $language =~ /mania|team|comcom|closed|chair|langcom|office|searchcom|sep11|nostalgia|stats|test/i ;
317 -
318 - return $false if $language =~ /^(?:dk|tlh|ru_sib)$/ ; # dk=dumps exist(ed?) but site not, tlh=Klignon, ru-sib=Siberian
319 - return $false if $project eq 'wk' and ($language eq "als" or $language eq "tlh") ;
320 -
321 - return $true ;
322 -}
323 -
324 -sub Abort
325 -{
326 - my $msg = shift ;
327 - print "$msg\nExecution aborted." ;
328 - # to do: log also to file
329 - exit ;
330 -}
331 -
332 -
 2+#!/usr/local/bin/perl
 3+
 4+# Copyright (C) 2011 Wikimedia Foundation
 5+# This program is free software; you can redistribute it and/or
 6+# modify it under the terms of the GNU General Public License version 2
 7+# as published by the Free Software Foundation.
 8+# This program is distributed in the hope that it will be useful,
 9+# but WITHOUT ANY WARRANTY; without even the implied warranty of
 10+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 11+# See the GNU General Public License for more details, at
 12+# http://www.fsf.org/licenses/gpl.html
 13+
 14+# Author:
 15+# Erik Zachte, email ezachte@wikimedia.org
 16+# loosely based on predecessor
 17+# http://svn.wikimedia.org/viewvc/mediawiki/trunk/wikistats/reportcard/ReportCardExtractWikiCountsOutput.pl
 18+
 19+# Functionality:
 20+# tba
 21+
 22+# Parameters:
 23+# tba
 24+
 25+# Output:
 26+# updated csv file for import in MySQL
 27+
 28+# http://svn.wikimedia.org/viewvc/mediawiki/trunk/wikistats/analytics/
 29+
 30+ use Getopt::Std ;
 31+
 32+ $true = 1 ;
 33+ $false = 0 ;
 34+
 35+ @projects = ('wb','wk','wn','wp','wq','ws','wv','wx','commons','*') ;
 36+
 37+ $file_csv_monthly_data = "StatisticsMonthly.csv" ;
 38+ $file_csv_user_activity_spread = "StatisticsUserActivitySpread.csv" ;
 39+ $file_csv_analytics_in = "analytics_in_wikistats.csv" ;
 40+
 41+ &ParseArguments ;
 42+ &ReadStatisticsMonthly ;
 43+ &FindLargestWikis ;
 44+ &WriteMonthlyData ;
 45+
 46+ print "\nReady\n\n" ;
 47+ exit ;
 48+
 49+sub ParseArguments
 50+{
 51+ my (@options, $arguments) ;
 52+
 53+ getopt ("io", \%options) ;
 54+
 55+ foreach $arg (sort keys %options)
 56+ { $arguments .= " -$arg " . $options {$arg} . "\n" ; }
 57+ print ("\nArguments\n$arguments\n") ;
 58+
 59+ if (! -d '/mnt/') # EZ test
 60+ {
 61+ $path_in = "c:/\@ wikimedia/# out bayes" ;
 62+ $path_out = "c:/MySQL/analytics" ;
 63+ }
 64+ else
 65+ {
 66+ die ("Specify input folder for projectcounts files as: -i path") if (! defined ($options {"i"})) ;
 67+ die ("Specify output folder as: -o path'") if (! defined ($options {"o"})) ;
 68+
 69+ $path_in = $options {"i"} ;
 70+ $path_out = $options {"o"} ;
 71+ }
 72+
 73+ die "Input folder '$path_in' does not exist" if (! -d $path_in) ;
 74+ die "Output folder '$path_out' does not exist" if (! -d $path_out) ;
 75+
 76+ print "Input folder: $path_in\n" ;
 77+ print "Output folder: $path_out\n\n" ;
 78+
 79+ $file_csv_out = "$path_out/analytics_in_wikistats.csv" ;
 80+}
 81+
 82+sub ReadStatisticsMonthly
 83+{
 84+ &ReadStatisticsMonthlyForProject ("wb") ;
 85+ &ReadStatisticsMonthlyForProject ("wk") ;
 86+ &ReadStatisticsMonthlyForProject ("wn") ;
 87+ &ReadStatisticsMonthlyForProject ("wp") ;
 88+ &ReadStatisticsMonthlyForProject ("wq") ;
 89+ &ReadStatisticsMonthlyForProject ("ws") ;
 90+ &ReadStatisticsMonthlyForProject ("wv") ;
 91+ &ReadStatisticsMonthlyForProject ("wx") ;
 92+
 93+# &ReadStatisticsPerBinariesExtensionCommons ;
 94+}
 95+
 96+sub ReadStatisticsMonthlyForProject
 97+{
 98+ my $project = shift;
 99+ $all_projects = "*" ;
 100+
 101+ my $file_csv_in_1 = "$path_in/csv_$project/$file_csv_monthly_data" ;
 102+ my $file_csv_in_2 = "$path_in/csv_$project/$file_csv_user_activity_spread" ;
 103+
 104+ if (! -e $file_csv_in_1)
 105+ { &Abort ("Input file '$file_csv_in_1' not found") ; }
 106+ if (! -e $file_csv_in_2)
 107+ { &Abort ("Input file '$file_csv_in_2' not found") ; }
 108+
 109+ my $yyyymm ;
 110+
 111+ print "Read '$file_csv_in_1'\n" ;
 112+ open CSV_IN, '<', $file_csv_in_1 ;
 113+ while ($line = <CSV_IN>)
 114+ {
 115+ chomp $line ;
 116+ ($language,$date,$counts) = split (',', $line, 3) ;
 117+ @fields = split (',', $counts) ;
 118+
 119+ next if ! &AcceptWiki ($project,$language) ;
 120+
 121+ ($month,$day,$year) = split ('\/', $date) ;
 122+ $yyyymm = sprintf ("%04d-%02d", $year, $month) ;
 123+
 124+ foreach $field (@fields)
 125+ {
 126+ if ($field eq '-')
 127+ { $field = 0 ; }
 128+ }
 129+
 130+ $data = $fields [0] . ',' . # contributors all time
 131+ $fields [1] . ',' . # new contributors
 132+ 'data2,' . # place holder for more data, to be inserted later
 133+ $fields [4] . ',' . # articles
 134+ $fields [6] . ',' . # articles new per day
 135+ $fields [9] . ',' . # larger than 0.5 kB
 136+ $fields [10] . ',' . # larger than 2.0 kB
 137+ $fields [7] . ',' . # mean edits per article
 138+ $fields [8] . ',' . # mean bytes per article
 139+ $fields [11] . ',' . # edits
 140+ $fields [12] . ',' . # size in bytes
 141+ $fields [13] . ',' . # size in words
 142+ $fields [14] . ',' . # links internal
 143+ $fields [15] . ',' . # links interwiki
 144+ $fields [16] . ',' . # links images
 145+ $fields [17] . ',' . # links external
 146+ $fields [18] ; # redirects
 147+
 148+ $data1 {"$project,$language,$yyyymm"} = $data ;
 149+ }
 150+ close CSV_IN ;
 151+
 152+ # now read (very) active editors from newer more accurate file (split data for reg users and bots, unlike StatisticsMonthly.csv)
 153+
 154+ print "Read '$file_csv_in_2'\n" ;
 155+ open CSV_IN, '<', $file_csv_in_2 ;
 156+ while ($line = <CSV_IN>)
 157+ {
 158+ chomp $line ;
 159+ ($language,$date,$reguser_bot,$group,@counts) = split (',', $line) ;
 160+
 161+ next if ! &AcceptWiki ($project,$language) ;
 162+
 163+ if ($reguser_bot ne "R") { next ; } # R: reg user, B: bot
 164+ if ($group ne "A") { next ; } # A: articles, T: talk pages, O: other namespaces
 165+
 166+ ($month,$day,$year) = split ('\/', $date) ;
 167+ $yyyymm = sprintf ("%04d-%02d", $year, $month) ;
 168+ $months {$yyyymm} ++ ;
 169+# print "YYYYMM $yyyymm\n" ;
 170+
 171+ # data have been collected in WikiCountsProcess.pm and been written in WikiCountsOutput.pm
 172+ # count user with over x edits
 173+ # threshold starting with a 3 are 10xSQRT(10), 100xSQRT(10), 1000xSQRT(10), etc
 174+ # @thresholds = (1,3,5,10,25,32,50,100,250,316,500,1000,2500,3162,5000,10000,25000,31623,50000,100000,250000,316228,500000,1000000,2500000,3162278,500000,10000000,25000000,31622777,5000000,100000000) ;
 175+ $edits_ge_5 = @counts [2] > 0 ? @counts [2] : 0 ;
 176+ $edits_ge_25 = @counts [4] > 0 ? @counts [4] : 0 ;
 177+ $edits_ge_100 = @counts [7] > 0 ? @counts [7] : 0 ;
 178+ $data2 {"$project,$language,$yyyymm"} = "$edits_ge_5,$edits_ge_25,$edits_ge_100" ;
 179+
 180+ $total_edits_ge_5 {"$project,$language"} += $edits_ge_5 ;
 181+ $total_edits_ge_25 {"$project,$language"} += $edits_ge_25 ;
 182+ $total_edits_ge_100 {"$project,$language"} += $edits_ge_100 ;
 183+
 184+ # prep string with right amount of comma's
 185+ if ($data2_default eq '')
 186+ {
 187+ $data2_default = $data2 {"$project,$language,$yyyymm"} ;
 188+ $data2_default =~ s/[^,]+/0/g ;
 189+ }
 190+ }
 191+ close CSV_IN ;
 192+}
 193+
 194+#sub ReadStatisticsPerBinariesExtensionCommons
 195+#{
 196+# my $file_csv_in = "$path_in/csv_wx/StatisticsPerBinariesExtension.csv" ;
 197+# my $mmax = -1 ;
 198+
 199+# if (! -e $file_csv_in)
 200+# { &Abort ("Input file '$file_csv_in' not found") ; }
 201+
 202+# print "Read '$file_csv_in'\n" ;
 203+# open CSV_IN, '<', $file_csv_in ;
 204+# while ($line = <CSV_IN>)
 205+# {
 206+# chomp $line ;
 207+# ($language,$date,$counts) = split (',', $line, 3) ;
 208+
 209+# if ($language ne "commons") { next ; }
 210+
 211+# if ($date eq "00/0000")
 212+# {
 213+# @fields = split (',', $counts) ;
 214+# $field_ndx = 0 ;
 215+# foreach $field (@fields)
 216+# {
 217+# $ext_cnt {-1}{$field_ndx} = $field ;
 218+# # print "EXT_CNT $field_ndx : $field\n" ;
 219+# $field_ndx ++ ;
 220+# }
 221+# next ;
 222+# }
 223+
 224+# ($month,$year) = split ('\/', $date) ;
 225+# my $m = &months_since_2000_01 ($year,$month) ;
 226+# next if $m < $m_start ;
 227+
 228+# if ($m > $mmax)
 229+# { $mmax = $m ; }
 230+
 231+# @fields = split (',', $counts) ;
 232+# $field_ndx = 0 ;
 233+# foreach $field (@fields)
 234+# {
 235+# $ext_cnt {$m}{$field_ndx} = $field ;
 236+# $ext_tot {$m} += $field ;
 237+# $field_ndx ++ ;
 238+# }
 239+# }
 240+# close CSV_IN ;
 241+
 242+# %ext_cnt_mmax = %{$ext_cnt {$mmax}} ;
 243+# @ext_cnt_mmax = (sort {$ext_cnt_mmax {$b} <=> $ext_cnt_mmax {$a}} keys %ext_cnt_mmax) ;
 244+
 245+# $extcnt = 0 ;
 246+# foreach $extndx (@ext_cnt_mmax)
 247+# {
 248+# # print "$extndx < ${ext_cnt {-1}{$extndx}} > : ${ext_cnt_mmax {$extndx}}\n" ;
 249+# push @extndxs, $extndx ;
 250+# if ($extcnt++ >= 9) { last ; }
 251+# }
 252+#}
 253+
 254+sub FindLargestWikis
 255+{
 256+ print "Largest projects (most accumulated very active editors):\n";
 257+ @total_edits_ge_100 = sort {$total_edits_ge_100 {$b} <=> $total_edits_ge_100 {$a}} keys %total_edits_ge_100 ;
 258+ $rank = 0 ;
 259+ foreach $project_language (@total_edits_ge_100)
 260+ {
 261+ $largest_projects {$project_language} = $rank++ ;
 262+ print "$project_language," ;
 263+ last if $rank > 10 ;
 264+ }
 265+ print "\n\n" ;
 266+
 267+ foreach $yyyymm (sort keys %months)
 268+ {
 269+ next if $yyyymm lt '2011' ;
 270+ foreach $project_language (keys %largest_projects)
 271+ {
 272+ ($project,$language) = split (',', $project_language) ;
 273+ if ($data2 {"$project,$language,$yyyymm"} eq '')
 274+ {
 275+ print "No data yet for large wiki $project_language for $yyyymm-> skip month $yyyymm\n" ;
 276+ $months {$yyyymm} = 0 ;
 277+ }
 278+ }
 279+ }
 280+ exit ;
 281+}
 282+
 283+sub WriteMonthlyData
 284+{
 285+ my $file_csv_out = "$path_out/$file_csv_analytics_in" ;
 286+ open CSV_OUT, '>', $file_csv_out ;
 287+ foreach $project_wiki_month (sort keys %data1)
 288+ {
 289+ ($project,$wiki,$yyyymm) = split (',', $project_wiki_month) ;
 290+
 291+ # recent month misses on eor more large wikis?
 292+ next if $months {$yyyymm} == 0 ;
 293+
 294+ $data1 = $data1 {$project_wiki_month} ;
 295+ $data2 = $data2 {$project_wiki_month} ;
 296+ if ($data2 eq '')
 297+ {
 298+ print "Editor data missing for $project_wiki_month\n" ;
 299+ $data2 = $data2_default ;
 300+ }
 301+ $data1 =~ s/data2/$data2/ ; # insert rather than append to have all editor fields close together
 302+ print CSV_OUT "$project_wiki_month,$data1\n" ;
 303+ }
 304+ $total_edits_ge_5 {"$project,*,$yyyymm"} += $edits_ge_5 ;
 305+ $total_edits_ge_25 {"$project,*,$yyyymm"} += $edits_ge_25 ;
 306+ $total_edits_ge_100 {"$project,*,$yyyymm"} += $edits_ge_100 ;
 307+ close CSV_OUT ;
 308+}
 309+
 310+sub AcceptWiki
 311+{
 312+ my ($project,$language) = @_ ;
 313+
 314+ return $false if $language eq 'commons' and $project ne 'wx' ; # commons also in wikipedia csv files (bug, hard to cleanup, just skip)
 315+ return $false if $language eq 'sr' and $project eq 'wn' ; # ignore insane bot spam on
 316+ return $false if $language =~ /mania|team|comcom|closed|chair|langcom|office|searchcom|sep11|nostalgia|stats|test/i ;
 317+
 318+ return $false if $language =~ /^(?:dk|tlh|ru_sib)$/ ; # dk=dumps exist(ed?) but site not, tlh=Klignon, ru-sib=Siberian
 319+ return $false if $project eq 'wk' and ($language eq "als" or $language eq "tlh") ;
 320+
 321+ return $true ;
 322+}
 323+
 324+sub Abort
 325+{
 326+ my $msg = shift ;
 327+ print "$msg\nExecution aborted." ;
 328+ # to do: log also to file
 329+ exit ;
 330+}
 331+
 332+
Property changes on: trunk/wikistats/analytics/AnalyticsPrepWikiCountsOutput.pl
___________________________________________________________________
Added: svn:eol-style
333333 + native
Property changes on: trunk/wikistats/analytics/analytics_new.sh
___________________________________________________________________
Added: svn:eol-style
334334 + native
Property changes on: trunk/wikistats/analytics/_readme.txt
___________________________________________________________________
Added: svn:eol-style
335335 + native
Property changes on: trunk/wikistats/analytics/AnalyticsPrepComscoreData.pl
___________________________________________________________________
Added: svn:eol-style
336336 + native

Status & tagging log