Index: trunk/wikistats/reportcard/ReportCardGenerateHtml.pl |
— | — | @@ -1,1416 +1,1416 @@ |
2 | | -#!/usr/local/bin/perl
|
3 | | -
|
4 | | - use lib "/home/ezachte/lib" ;
|
5 | | - use EzLib ;
|
6 | | - $trace_on_exit = $true ;
|
7 | | -
|
8 | | - use CGI::Carp qw(fatalsToBrowser);
|
9 | | - use Time::Local ;
|
10 | | - use Getopt::Std ;
|
11 | | -
|
12 | | - # !! adapt these for every run !!
|
13 | | - $p_year = 2010 ;
|
14 | | - $p_month = 12 ;
|
15 | | -
|
16 | | - $debug = $false ;
|
17 | | -
|
18 | | - $public = 0 ;
|
19 | | - $private = 1 ;
|
20 | | -
|
21 | | - $p_month_d2 = sprintf ("%02d", $p_month) ;
|
22 | | -
|
23 | | - @months = qw (January February March April May June July August September October November December) ;
|
24 | | - $p_month_prev = ($p_month > 1) ? $p_month - 1 : 12 ;
|
25 | | - $p_month_next = ($p_month < 12) ? $p_month + 1 : 1 ;
|
26 | | - $p_month_next2 = ($p_month < 11) ? $p_month + 2 : $p_month - 10 ;
|
27 | | - $p_year_plus_m2 = ($p_month < 11) ? $p_year : $p_year + 1 ;
|
28 | | - $p_month_prev_d2 = sprintf ("%02d", $p_month_prev) ;
|
29 | | - $p_month_next_d2 = sprintf ("%02d", $p_month_next) ;
|
30 | | -
|
31 | | - $p_year_prev = $p_year - 1 ;
|
32 | | - $p_year_next = $p_year + 1 ;
|
33 | | - $p_year_short = $p_year - 2000 ;
|
34 | | - $p_year_prev_short = $p_year_prev - 2000 ;
|
35 | | - $p_year_short_d2 = sprintf ("%02d", $p_year_short) ;
|
36 | | - $p_year_prev_short_d2 = sprintf ("%02d", $p_year_prev_short) ;
|
37 | | -
|
38 | | - $p_month_name = $months [$p_month -1] ;
|
39 | | - $p_month_name_prev = $months [$p_month_prev-1] ;
|
40 | | - $p_month_name_next = $months [$p_month_next-1] ;
|
41 | | - $p_month_name_next2 = $months [$p_month_next2-1] ;
|
42 | | -
|
43 | | -
|
44 | | - $trend_one_year = "{{m}}/{{y-1}}|{{m}}/{{y}}" ;
|
45 | | -
|
46 | | - if ($p_month == 1)
|
47 | | - { $trend_one_month = "12/{{y-1}}|1/{{y}}" ; }
|
48 | | - else
|
49 | | - { $trend_one_month = "{{m-1}}/{{y}}|{{m}}/{{y}}" ; }
|
50 | | -
|
51 | | - $p_year_month_m1 = ($p_month == 1) ? "$p_month_prev/$p_year_prev_short_d2" : "$p_month_prev/$p_year_short_d2" ; # m1 = minus 1
|
52 | | -
|
53 | | - print "\$p_year $p_year\n" ;
|
54 | | - print "\$p_year_prev $p_year_prev\n" ;
|
55 | | - print "\$p_year_plus_m2 $p_year_plus_m2\n" ;
|
56 | | - print "\$p_year_short $p_year_short\n" ;
|
57 | | - print "\$p_year_prev_short $p_year_prev_short\n" ;
|
58 | | - print "\$p_year_short_d2 $p_year_short_d2\n" ;
|
59 | | - print "\$p_year_prev_short_d2 $p_year_prev_short_d2\n" ;
|
60 | | - print "\n" ;
|
61 | | - print "\$p_month $p_month\n" ;
|
62 | | - print "\$p_month_d2 $p_month_d2\n" ;
|
63 | | - print "\$p_month_next $p_month_next\n" ;
|
64 | | - print "\$p_month_prev $p_month_prev\n" ;
|
65 | | - print "\$p_month_next_d2 $p_month_next_d2\n" ;
|
66 | | - print "\$p_month_prev_d2 $p_month_prev_d2\n" ;
|
67 | | - print "\$p_month_name $p_month_name\n" ;
|
68 | | - print "\$p_month_name_prev $p_month_name_prev\n" ;
|
69 | | - print "\$p_month_name_next $p_month_name_next\n" ;
|
70 | | - print "\$p_month_name_next2 $p_month_name_next2\n" ;
|
71 | | - print "\$p_year_month_m1 $p_year_month_m1\n" ;
|
72 | | -
|
73 | | -
|
74 | | - # example output for synopsys.txt
|
75 | | - #STATISTICS
|
76 | | -
|
77 | | - #http://infodisiac.com/Wikimedia/ReportCard/EN/RC_2009_08_summary.html
|
78 | | -
|
79 | | - #Y: Jun, 2008->2009 k=thousand m=million b=billion
|
80 | | - #M: 2009, May->Jun
|
81 | | -
|
82 | | - #Unique Visitors 301 m (Y:+21% / M: -5%)
|
83 | | - #Page Requests 11 b (Y: +6% / M: -6%)
|
84 | | - #Site Rank 5th (Y: +0 / M: -1 )
|
85 | | - #Commons Files 4.7 m (Y:+62% / M: +4%) ++ growth pdf/djvu files
|
86 | | - #Article Count 20.6 m (Y:+33% / M: +2%)
|
87 | | - #New Articles 17 k (Y: -9% / M: -6%)
|
88 | | - #New Editors 18 k (Y:+39% / M:+25%) wp:it in one year -50%
|
89 | | - #Active Editors 88 k (Y:+ 1% / M: -2%) wp:ru in one year +45%
|
90 | | -
|
91 | | - print "\n"."="x80 . "\n\n" ;
|
92 | | -
|
93 | | -# !! This is rather crummy Q&D way to collect variable data, data need to be externalized !!
|
94 | | -
|
95 | | -## if ($2010_12)
|
96 | | -## {
|
97 | | - @visitors = qw ( 395,472,000 m 14.0 -3.7 %) ; # Unique Visitors by Region
|
98 | | - @page_requests = qw (13,976,000,000 b 22.6 2.4 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
|
99 | | - @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
|
100 | | - @commons_files = qw ( 8,046,377 m 43.1 3.0 %) ; # Binaries per month - Absolute
|
101 | | - @article_count = qw ( 17,616,951 m 20.0 1.5 %) ; # Starting Sep-2010 Wikipedia articles only / Article count (official) - Absolute
|
102 | | - @new_articles = qw ( 8,555 k 16.5 5.1 %) ; # New articles per day - Absolute
|
103 | | - @edits = qw ( 11,566,371 m 3.6 3.8 %) ; # Edits per month - Absolute
|
104 | | - @new_editors = qw ( 14,607 k -16.6 -2.5 %) ; # New editors - Absolute
|
105 | | - @active_editors = qw ( 79,324 k -5.9 -0.5 %) ; # Active editors - Absolute
|
106 | | - @very_active_editors = qw ( 10,254 k -1.6 0.1 %) ; # Very active editors - Absolute
|
107 | | - @reach = qw ( 31.1 x 1.5 -1.4 %) ; # Reach Percentage by Region
|
108 | | - # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
|
109 | | - push @visitors, "1|Unique Visitors<br>1: Average for last 12 months 377M." ;
|
110 | | -# "2: Growth in UV count in last 12 months 18.8% (for whole internet 8.9%)." ;
|
111 | | -# " (avg 28.3%, low 25.7% in July 09). Reach in Asia lags behind other regions (15.9%)" ;
|
112 | | - push @page_requests, "2,3|Page Requests<br>" .
|
113 | | - "2: <a href='http://stats.wikimedia.org/EN/TablesPageViewsMonthlyMobile.htm'>Mobile traffic</a> in Dec: 4.1% of total Wikipedia traffic (556M/13489M)<br>" .
|
114 | | -# " Look ahead for page requests: Dec -> Jan = 13367M -> 14724M = +10.1%<br>" .
|
115 | | - "#3: Page requests have been normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" ;
|
116 | | - push @rank, "4|Site Rank<br>#4: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
|
117 | | - push @commons_files, "5|Commons Files<br>#5: Tiff uploads increased 5-fold in July 2010, 13-fold in last 12 months.<br>" ;
|
118 | | -# "#7: Commons consistently fastest growing project, 48% in last 12 months." ;
|
119 | | -
|
120 | | -#push @article_count, "8|Article Count<br>#8: From Sep 2010 this metric is for Wikipedia projects only. This prevents adding apples and oranges." ;
|
121 | | -# "9: Seven Wiktionaries in top 25 Wikimedia projects" ;
|
122 | | -# push @new_articles, "7|New Articles Per Day<br>" .
|
123 | | -# "7: Strong growth in August by peaks on 3 wikis: Catalan/Dutch 3-fold inc., Slovene 17-fold (bots?)." ;
|
124 | | - push @edits, "6|Edits<br>#6: Over the last 3 years there is fairly consistent growth in manual, registered edits.<br>" .
|
125 | | - "# Net growth in constructive edits is less clear, as this metric includes most reverting edits." ;
|
126 | | -# " Strong one-monthly dip in July due to World Cup Socker?." ;
|
127 | | -# "#13: Average monthly manual edits by registered users for all Wikipedia's combined, in millions<br>" .
|
128 | | -# " #2006 ⇒ 2010: 7.7 ⇒ 9.9 ⇒ 11.5 ⇒ 12.4 ⇒ 12.7" ;
|
129 | | - push @new_editors, "7|New Editors Per Day<br>" .
|
130 | | -# "10: Signifant decline in last month (All projects: -10.5%, <a href='http://stats.wikimedia.org/EN/ChartsWikipediaZZ.htm'>Wikipedias -11.2%</a>).<br>" .
|
131 | | -# " Arguably slowing influx of editors can partly be attributed to (multi-factorial) <a href='http://en.wikipedia.org/wiki/Market_saturation'>saturation process(es)</a><br>" .
|
132 | | -# " But 19% drop for Wikipedias in half year (comparing 3-monthly averages) is not consistent with that.<br>" .
|
133 | | - "#7:WMF recently commissioned in depth study of editor activity trends, which is ongoing." ;
|
134 | | - push @active_editors, "8|(Very) Active Editors<br>" .
|
135 | | -# "11: Recent drops are well within normal bandwidth, largest drop was in <a href='charts/2010-08/Monthly-Active-Users-Since-Jan-2006.png'>June 2006</a>.<br>" .
|
136 | | - "#8: Since a few months editors on Commons are no longer included in overall editor total,<br>" .
|
137 | | - "# on the assumption that most of these also edit on one or more other projects.<br>" ;
|
138 | | -# " #Detection of double counts between any projects and languages is planned for late 2010." ;
|
139 | | - push @very_active_editors, "8|" ; #Very Active Editors<br>14: Ukrain +84% (61->112), Indonesian +180% (22->62) (during contest), Swedish -13% (141->122)." ;
|
140 | | -## }
|
141 | | -
|
142 | | -
|
143 | | -# if ($2010_11)
|
144 | | -# {
|
145 | | -# @visitors = qw ( 410,816,000 m 18.8 0.6 %) ; # Unique Visitors by Region
|
146 | | -# @page_requests = qw (13,976,000,000 b 22.6 2.4 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
|
147 | | -# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
|
148 | | -# @commons_files = qw ( ? m ? ? %) ; # Binaries per month - Absolute
|
149 | | -# @article_count = qw ( ? m ? ? %) ; # Starting Sep-2010 Wikipedia articles only / Article count (official) - Absolute
|
150 | | -# @new_articles = qw ( ? k ? ? %) ; # New articles per day - Absolute
|
151 | | -# @edits = qw ( ? m ? ? %) ; # Edits per month - Absolute
|
152 | | -# @new_editors = qw ( ? k ? ? %) ; # New editors - Absolute
|
153 | | -# @active_editors = qw ( ? k ? ? %) ; # Active editors - Absolute
|
154 | | -# @very_active_editors = qw ( ? k ? ? %) ; # Very active editors - Absolute
|
155 | | -# @reach = qw ( 31.1 x 2.6 0.0 %) ; # Reach Percentage by Region
|
156 | | -# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
|
157 | | -# push @visitors, "1,2|Unique Visitors<br>1: 410M UV's exceeds Oct 2010 record with 2M. Average for last 12 months 377M.<br>" .
|
158 | | -# "2: Growth in UV count in last 12 months 18.8% (for whole internet 8.9%)." ;
|
159 | | -# push @page_requests, "3,4|Page Requests<br>" .
|
160 | | -# "3: <a href='http://stats.wikimedia.org/EN/TablesPageViewsMonthlyMobile.htm'>Mobile traffic</a> in Sep: 3.4% of total traffic (492M/14468M)<br>" .
|
161 | | -# "#4: Page requests have been normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" ;
|
162 | | -# push @rank, "3|Site Rank<br>#3: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
|
163 | | -# }
|
164 | | -
|
165 | | -# if ($2010_10)
|
166 | | -# {
|
167 | | -# @visitors = qw ( 408,350,000 m 18.5 2.6 %) ; # Unique Visitors by Region
|
168 | | -# @page_requests = qw ( ? b ? ? %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
|
169 | | -# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
|
170 | | -# @commons_files = qw ( ? m ? ? %) ; # Binaries per month - Absolute
|
171 | | -# @article_count = qw ( ? m ? ? %) ; # Starting Sep-2010 Wikipedia articles only / Article count (official) - Absolute
|
172 | | -# @new_articles = qw ( ? k ? ? %) ; # New articles per day - Absolute
|
173 | | -# @edits = qw ( ? m ? ? %) ; # Edits per month - Absolute
|
174 | | -# @new_editors = qw ( ? k ? ? %) ; # New editors - Absolute
|
175 | | -# @active_editors = qw ( ? k ? ? %) ; # Active editors - Absolute
|
176 | | -# @very_active_editors = qw ( ? k ? ? %) ; # Very active editors - Absolute
|
177 | | -# @reach = qw ( 31.1 x 2.3 0.5 %) ; # Reach Percentage by Region
|
178 | | -# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
|
179 | | -# push @visitors, "1,2|Unique Visitors<br>1: 408M UV's beats September 2010 record with 10M.<br>" .
|
180 | | -# "2: Growth in UV count in last 12 months 18.5% (for whole internet 9.6%)." ;
|
181 | | -# push @rank, "3|Site Rank<br>#3: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
|
182 | | -# }
|
183 | | -
|
184 | | -
|
185 | | -# if ($2010_09)
|
186 | | -# {
|
187 | | -# @visitors = qw ( 398,178,000 m 22.1 6.6 %) ; # Unique Visitors by Region
|
188 | | -# @page_requests = qw (13,671,000,000 b 20.2 5.4 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
|
189 | | -# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
|
190 | | -# @commons_files = qw ( 7,491,824 m 48.2 2.8 %) ; # Binaries per month - Absolute
|
191 | | -# @article_count = qw ( 16,678,710 m 20.7 1.8 %) ; # Starting Sep-2010 Wikipedia articles only / Article count (official) - Absolute
|
192 | | -# @new_articles = qw ( 7,578 k 3.9 -18.9 %) ; # New articles per day - Absolute
|
193 | | -# @edits = qw ( 11,924,018 m 9.0 -3.3 %) ; # Edits per month - Absolute
|
194 | | -# @new_editors = qw ( 15,805 k -17.4 -10.5 %) ; # New editors - Absolute
|
195 | | -# @active_editors = qw ( 82,503 k -5.6 -3.3 %) ; # Active editors - Absolute
|
196 | | -# @very_active_editors = qw ( 11,011 k -2.5 -3.4 %) ; # Very active editors - Absolute
|
197 | | -# @reach = qw ( 30.8 x 3.2 1.8 %) ; # Reach Percentage by Region
|
198 | | -# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
|
199 | | -# push @visitors, "1,2|Unique Visitors<br>1: 398M UV's beats May 2010 record with 9M or 2.4%.<br>" .
|
200 | | -# "2: Growth in UV count in last 12 months 22% (for whole internet 10%)." ;
|
201 | | -# push @page_requests, "3,4|Page Requests<br>" .
|
202 | | -# "3: <a href='http://stats.wikimedia.org/EN/TablesPageViewsMonthlyMobile.htm'>Mobile traffic</a> in Sep: 3.0% of total traffic (425M/14096M)<br>" .
|
203 | | -# # " Look ahead for page requests: Aug -> Sep = 13367M -> 14724M = +10.1%<br>" .
|
204 | | -# "#4: Page requests have been normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" ;
|
205 | | -# push @rank, "5|Site Rank<br>#5: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
|
206 | | -# push @commons_files, "6,7|Commons Files<br>#6: Tiff uploads increased 5-fold in July 2010, 18-fold in last 12 months.<br>" .
|
207 | | -# "7: Commons consistently fastest growing project, 48% in last 12 months." ;
|
208 | | -
|
209 | | -# push @article_count, "8|Article Count<br>8: From Sep 2010 this metric is for Wikipedia projects only. This prevents adding apples and oranges." ;
|
210 | | -# push @edits, "9|Edits<br>9: Over the last 3 years there is fairly consistent growth in manual, registered edits.<br>" .
|
211 | | -# " Net growth in constructive edits is less clear, as this metric includes most reverting edits." ;
|
212 | | -# " Strong one-monthly dip in July due to World Cup Socker?." ;
|
213 | | -# push @new_editors, "10|New Editors Per Day<br>" .
|
214 | | -# "10: Signifant decline in last month (All projects: -10.5%, <a href='http://stats.wikimedia.org/EN/ChartsWikipediaZZ.htm'>Wikipedias -11.2%</a>).<br>" .
|
215 | | -# " Arguably slowing influx of editors can partly be attributed to (multi-factorial) <a href='http://en.wikipedia.org/wiki/Market_saturation'>saturation process(es)</a><br>" .
|
216 | | -# " But 19% drop for Wikipedias in half year (comparing 3-monthly averages) is not consistent with that.<br>" .
|
217 | | -# " WMF recently commissioned in depth study of editor activity trends, which is ongoing." ;
|
218 | | -# push @active_editors, "11,12|(Very) Active Editors<br>" .
|
219 | | -# "11: Recent drops are well within normal bandwidth, largest drop was in <a href='charts/2010-08/Monthly-Active-Users-Since-Jan-2006.png'>June 2006</a>.<br>" .
|
220 | | -# "#12: Editors on Commons are no longer included in overall editor total,<br>" .
|
221 | | -# " #on the assumption that most of these also edit on one or more other projects.<br>" .
|
222 | | -# " #Detection of double counts between any projects and languages is planned for late 2010." ;
|
223 | | -# push @very_active_editors, "11,12|" ; #Very Active Editors<br>14: Ukrain +84% (61->112), Indonesian +180% (22->62) (during contest), Swedish -13% (141->122)." ;
|
224 | | -# }
|
225 | | -
|
226 | | -# if ($2010_08)
|
227 | | -# {
|
228 | | -# @visitors = qw ( 373,392,000 m 21.4 3.7 %) ; # Unique Visitors by Region
|
229 | | -# @page_requests = qw (13,367,000,000 b 23.9 -1 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
|
230 | | -# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
|
231 | | -# @commons_files = qw ( 7,298,379 m 48.1 2.8 %) ; # Binaries per month - Absolute
|
232 | | -# @article_count = qw ( 34,963,360 m 30.0 2.4 %) ; # Article count (official) - Absolute
|
233 | | -# @new_articles = qw ( 9,437 k 22.4 25.7 %) ; # New articles per day - Absolute
|
234 | | -# @edits = qw ( 12,346,207 m 7.9 15.4 %) ; # Edits per month - Absolute
|
235 | | -# @new_editors = qw ( 17,026 k -17.3 -1.1 %) ; # New editors - Absolute
|
236 | | -# @active_editors = qw ( 85,643 k -5.2 2.1 %) ; # Active editors - Absolute
|
237 | | -# @very_active_editors = qw ( 11,419 k -1.6 5.0 %) ; # Very active editors - Absolute
|
238 | | -# @reach = qw ( 29.0 x 2.6 0.5 %) ; # Reach Percentage by Region
|
239 | | -# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
|
240 | | -# push @page_requests, "1,2,3,4|Page Requests<br>" .
|
241 | | -# "1: <a href='http://stats.wikimedia.org/EN/TablesPageViewsMonthlyMobile.htm'>Mobile traffic</a> in Sep: 2.9% of total traffic (425M/14724M)<br>" .
|
242 | | -# " Look ahead for page requests: Aug -> Sep = 13367M -> 14724M = +10.1%<br>" .
|
243 | | -# "# Trend data for mobile will be added when more history is available.<br>" .
|
244 | | -# "#2: Due to server problems counts from squid logs for December 2009 - March 2010 are too low,<br>" .
|
245 | | -# "# estimated underreporting 10%-25%. Counts for April - July 2010 have been patched. Read <a href='http://infodisiac.com/blog/2010/07/wikimedia-page-views-some-good-and-bad-news/'>more</a>.<br>" .
|
246 | | -# "#3: Many projects show peak traffic late 2009: see <a href='charts/2010-08/Page-Views-Per-Project-Indexed.png'>chart</a><br>" .
|
247 | | -# "#4: Page requests are now normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" ;
|
248 | | -# push @rank, "5|Site Rank<br>#5: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
|
249 | | -# push @commons_files, "6|Commons Files<br>#6: Tiff uploads increased <a href='charts/2010-07/Monthly-Binaries-Absolute-Log.png'>5-fold</a> in July 2010, <a href='charts/2010-07/Monthly-Binaries-Indexed.png'>22-fold</a> in a year." ;
|
250 | | -
|
251 | | -# push @new_articles, "7|New Articles Per Day<br>" .
|
252 | | -# "7: Strong growth in August by peaks on 3 wikis: Catalan/Dutch 3-fold inc., Slovene 17-fold (bots?)." ;
|
253 | | -# push @edits, "8|Edits<br>8: All time high for edit count, even slightly above May level.<br>" .
|
254 | | -# " Strong one-monthly dip in July due to World Cup Socker?." ;
|
255 | | -# push @active_editors, "9,10|(Very) Active Editors<br>" .
|
256 | | -# "9: After a <a href='charts/2010-08/Monthly-Active-Editors-Absolute-Linear.png'>6% drop in active Wikipedia editors</a> in June, and a further 2% drop in July,<br>" .
|
257 | | -# " trend is upwards again, with 2.5% increase in August.<br>" .
|
258 | | -# " Prospects for September are good, with +10% growth in page requests<br>" .
|
259 | | -# " (given strong correlation of 0.67 between page requests and active editors).<br>" .
|
260 | | -# " From a wider perspective drops were stil within normal bandwidth, largest drop was in <a href='charts/2010-08/Monthly-Active-Users-Since-Jan-2006.png'>June 2006</a>,<br>" .
|
261 | | -# " see also <a href='charts/2010-08/Monthly-Active-Users-Since-Jan-2008.png'>similar chart with trend line since June 2008</a>.<br>" .
|
262 | | -# "10: New: Editors on Commons are no longer included in overall editor total,<br>" .
|
263 | | -# " on the assumption that most of these also edit on one or more other projects.<br>" .
|
264 | | -# " Detection of double counts between any projects and languages is planned for late 2010." ;
|
265 | | -# push @very_active_editors, "9,10|" ; #Very Active Editors<br>14: Ukrain +84% (61->112), Indonesian +180% (22->62) (during contest), Swedish -13% (141->122)." ;
|
266 | | -# }
|
267 | | -
|
268 | | -# if ($2010_07)
|
269 | | -# {
|
270 | | -# @visitors = qw ( 360,225,000 m 21.9 -5 %) ; # Unique Visitors by Region
|
271 | | -# @page_requests = qw (13,116,000,000 b 27.2 -6 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
|
272 | | -# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
|
273 | | -# @commons_files = qw ( 7,104,689 m 49.1 2.9 %) ; # Binaries per month - Absolute
|
274 | | -# @article_count = qw ( 34,198,285 m 29.9 2 %) ; # Article count (official) - Absolute
|
275 | | -# @new_articles = qw ( 7,642 k 4.2 -0.6 %) ; # New articles per day - Absolute
|
276 | | -# @edits = qw ( 10,734,940 m -5.5 -9.8 %) ; # Edits per month - Absolute
|
277 | | -# @new_editors = qw ( 16,661 k -20.8 -5.6 %) ; # New editors - Absolute
|
278 | | -# @active_editors = qw ( 90,554 k -5.9 -1.6 %) ; # Active editors - Absolute
|
279 | | -# @very_active_editors = qw ( 11,818 k -2.1 -1.8 %) ; # Very active editors - Absolute
|
280 | | -# @reach = qw ( 28.5 x 2.8 -1.7 %) ; # Reach Percentage by Region
|
281 | | - # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
|
282 | | -# push @visitors, "1,2|Unique Visitors<br>1: 375M UV's beats last month's record with 4M or 1.1 % (matches overall internet growth).<br>" .
|
283 | | -# "2: Wikimedia projects reach 30.4 % of internet population, which is best reach for last year<br>" .
|
284 | | -# " (avg 28.3%, low 25.7% in July 09). Reach in Asia lags behind other regions (15.9%)" ;
|
285 | | -# push @page_requests, "1,2,3,4|Page Requests<br>" .
|
286 | | -# "1: Due to <a href='http://infodisiac.com/blog/2010/07/wikimedia-page-views-some-good-and-bad-news/'>server problems</a> counts from squid logs for December 2009 - March 2010 are too low,<br>" .
|
287 | | -# " estimated underreporting 10%-25%. Counts for April - July 2010 have been patched." .
|
288 | | -# ".<br>" .
|
289 | | -# "2: August : <a href='http://stats.wikimedia.org/EN/TablesPageViewsMonthlyMobile.htm'>Mobile traffic (401M)</a>: 3.0% of total traffic (13367M)<br>" .
|
290 | | -# "# Trend data for mobile will be added when more history is available.<br>" .
|
291 | | -# "#3: Many projects show peak traffic late 2009: see <a href='charts/2010-07/Page-Views-Per-Project-Indexed.png'>chart</a><br>" .
|
292 | | -# "#4: Page requests are now normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" ;
|
293 | | -# push @rank, "5|Site Rank<br>#5: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
|
294 | | -# push @commons_files, "6|Commons Files<br>#6: Tiff uploads increased <a href='charts/2010-07/Monthly-Binaries-Absolute-Log.png'>5-fold</a> in July 2010, <a href='charts/2010-07/Monthly-Binaries-Indexed.png'>25-fold</a> in a year." ;
|
295 | | -
|
296 | | -# push @article_count, "8,9|Article Count<br>8: Serbian Wikinews: 5k->36k in a year, compare English Wikinews: 15k->17k<br>" .
|
297 | | -# "9: Seven Wiktionaries in top 25 Wikimedia projects" ;
|
298 | | -# push @new_articles, "7|New Articles Per Day<br>" .
|
299 | | -# "#7: Peak in April and May by massive activity on Aromanian and Waray-Waray Wp's, each by single user.<br>" .
|
300 | | -# "# In May 20% of all new articles were created on these two small wikis (April 7%, June 11%)" ;
|
301 | | -# push @active_editors, "8,9|(Very) Active Editors<br>" .
|
302 | | -# "8: The <a href='charts/2010-07/Monthly-Active-Editors-Absolute-Linear.png'>6% drop in active editors</a> for all Wikipias in June was relatively large,<br>" .
|
303 | | -# " but from a <a href='charts/2010-07/Monthly-Active-Users-Since-Jan-2006.png'>wider perspective</a> still within normal bandwidth, largest drop was in June 2006.<br>" .
|
304 | | -# " There might be a seasonal component in fluctuations.<br>" .
|
305 | | -# "9: Bug fix: in earlier RC editions editors from Commons (6k active editors) were counted double.<br>" .
|
306 | | -# " This has been fixed for all months in this RC." ;
|
307 | | -# push @very_active_editors, "9|" ; #Very Active Editors<br>14: Ukrain +84% (61->112), Indonesian +180% (22->62) (during contest), Swedish -13% (141->122)." ;
|
308 | | -# }
|
309 | | -
|
310 | | -# if ($2010_06)
|
311 | | -# {
|
312 | | -# @visitors = qw ( 379,344,000 m 25.2 -2.5 %) ; # Unique Visitors by Region
|
313 | | -# @page_requests = qw (13,957,000,000 b 26.0 1.0 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
|
314 | | -# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
|
315 | | -# @commons_files = qw ( 6,910,267 m 50.1 2.5 %) ; # Binaries per month - Absolute
|
316 | | -# @article_count = qw ( 33,430,039 m 29.7 1.5 %) ; # Article count (official) - Absolute
|
317 | | -# @new_articles = qw ( 7,865 k 14.5 -16.2 %) ; # New articles per day - Absolute
|
318 | | -# @edits = qw ( 12,056,265 m 10.1 -1.6 %) ; # Edits per month - Absolute
|
319 | | -# @new_editors = qw ( 17,573 k -15.2 -10.6 %) ; # New editors - Absolute
|
320 | | -# @active_editors = qw ( 99,124 k -3.5 -4.4 %) ; # Active editors - Absolute
|
321 | | -# @very_active_editors = qw ( 13,042 k 0.7 -2.9 %) ; # Very active editors - Absolute
|
322 | | -# @reach = qw ( 30.2 x 3.5 -1.1 %) ; # Reach Percentage by Region
|
323 | | -# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
|
324 | | -# push @visitors, "1,2|Unique Visitors<br>1: 375M UV's beats last month's record with 4M or 1.1 % (matches overall internet growth).<br>" .
|
325 | | -# "2: Wikimedia projects reach 30.4 % of internet population, which is best reach for last year<br>" .
|
326 | | -# " (avg 28.3%, low 25.7% in July 09). Reach in Asia lags behind other regions (15.9%)" ;
|
327 | | -# push @page_requests, "1,2,3,4|Page Requests<br>" .
|
328 | | -# "1: Traffic volume for recent months had been underreported due to monitor capacity problems.<br>" .
|
329 | | -# " Counts from April 2010 and later " .
|
330 | | -# "<a href='http://infodisiac.com/blog/2010/07/wikimedia-page-views-some-good-and-bad-news/'>have been corrected</a>.<br>" .
|
331 | | -# " Data from Nov 2009 - Mar 2010 may still be too low.<br>" .
|
332 | | -# "2: Traffic to mobile site is now counted. <a href='http://stats.wikimedia.org/EN/TablesPageViewsMonthlyMobile.htm'>(June 208M:13957M=1.5% of total)</a><br>" .
|
333 | | -# " This is the first month, so no trend data yet. <a href='charts/2010-06/Page-Views-Breakdown-Mobile-Traffic.png'> " .
|
334 | | -# "Breakdown per language</a>:" .
|
335 | | -# "English:71.3%,<br> Japanese:8.6%, German:4.5%, French:3.9%, Russian:3.4%, Others:8.3%<br>" .
|
336 | | -# "3: <a href='charts/2010-06/Page-Views-Per-Project-Indexed.png'>New chart</a> for breakdown of traffic volume per project: many projects show peak traffic late 2009.<br>" .
|
337 | | -# "#4: Page requests are now normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" ;
|
338 | | -# push @rank, "5|Site Rank<br>#5: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
|
339 | | -# push @commons_files, "6|Commons Files<br>#6: Fastest riser (relatively speaking): ogg vorbis video, djvu (for scanned docs) also booming." ;
|
340 | | -# push @new_articles, "7|New Articles Per Day<br>7: Peak in April and May by massive activity on <a href='http://stats.wikimedia.org/EN/TablesWikipediaROA_RUP.htm'>Aromanian</a> and <a href='http://stats.wikimedia.org/EN/TablesWikipediaWAR.htm'>Waray-Waray</a> Wp's, each by single user.<br>" .
|
341 | | -# " In May 20% of all new articles were created on these two small wikis (April 7%, June 11%)" ;
|
342 | | -# push @edits, "9|Edits<br>9: For German,French and Polish Wikipedia dumps were not yet updated, reused data from previous month" ;
|
343 | | -# "Most Serbian Wikinews edits by (overactive?) weather bot that updates temp/wind speed every few seconds.<br>" .
|
344 | | -# "#13: Average monthly manual edits by registered users for all Wikipedia's combined, in millions<br>" .
|
345 | | -# " #2006 ⇒ 2010: 7.7 ⇒ 9.9 ⇒ 11.5 ⇒ 12.4 ⇒ 12.7" ;
|
346 | | -# push @very_active_editors, "14|Very Active Editors<br>14: Ukrain +84% (61->112), Indonesian +180% (22->62) (during contest), Swedish -13% (141->122)." ;
|
347 | | -# }
|
348 | | -
|
349 | | -# if ($2010_05)
|
350 | | -# {
|
351 | | -# @visitors = qw ( 388,932,000 m 22.6 3.8 %) ; # Unique Visitors by Region
|
352 | | -# @page_requests = qw (11,250,000,000 b -1.0 -1.0 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
|
353 | | -# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
|
354 | | -# @commons_files = qw ( 6,765,082 m 51.9 3.1 %) ; # Binaries per month - Absolute
|
355 | | -# @article_count = qw ( 32,410,992 m 31.9 2.3 %) ; # Article count (official) - Absolute
|
356 | | -# @new_articles = qw ( 8,638 k 11.2 12.9 %) ; # New articles per day - Absolute
|
357 | | -# @edits = qw ( 12,119,403 m 11.6 0.0 %) ; # Edits per month - Absolute
|
358 | | -# @new_editors = qw ( 18,761 k -8.2 -8.1 %) ; # New editors - Absolute
|
359 | | -# @active_editors = qw ( 102,689 k 1.7 -1.8 %) ; # Active editors - Absolute
|
360 | | -# @very_active_editors = qw ( 13,124 k 3.4 -1.9 %) ; # Very active editors - Absolute
|
361 | | -# @reach = qw ( 31.3 x 3.0 0.9 %) ; # Reach Percentage by Region
|
362 | | -# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
|
363 | | -# push @visitors, "1,2|Unique Visitors<br>1: 375M UV's beats last month's record with 4M or 1.1 % (matches overall internet growth).<br>" .
|
364 | | -# "2: Wikimedia projects reach 30.4 % of internet population, which is best reach for last year<br>" .
|
365 | | -# " (avg 28.3%, low 25.7% in July 09). Reach in Asia lags behind other regions (15.9%)" ;
|
366 | | -# push @page_requests, "3,4|Page Requests<br>" .
|
367 | | -# "#3: Page requests are now normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" .
|
368 | | -# "4: Traffic to mobile site not yet included. <a href='http://stats.wikimedia.org/EN/TablesPageViewsMonthlyMobile.htm'>(June 154M:10700M=1.4% of total)</a><br>" .
|
369 | | -# "5: Page request trends on several projects are falling for 4th month, which deserves some further analysis" ;
|
370 | | -# push @rank, "6|Site Rank<br>#6: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
|
371 | | -# push @commons_files, "7|Commons Files<br>#8: Fastest riser (relatively speaking): ogg vorbis video, djvu (for scanned docs) also booming." ;
|
372 | | -# push @article_count, "8,9|Article Count<br>8: Serbian Wikinews: 5k->36k in a year, compare English Wikinews: 15k->17k<br>" .
|
373 | | -# "9: Seven Wiktionaries in top 25 Wikimedia projects" ;
|
374 | | -# push @new_articles, "10,11|New Articles Per Day<br>10: All wikinews project combined +240% (39->133 p/d), see below Serbian Wikinews<br>" .
|
375 | | -# "11:<a href='http://stats.wikimedia.org/EN/TablesWikipediaWAR.htm'>Waray-Waray Wikipedia</a> 2nd fastest grower with +610 mostly <a href='http://war.wikipedia.org/wiki/Obyce'>geo stubs</a> p/day by <a href='http://en.wikipedia.org/wiki/User:JinJian'>JinJian</a>" ;
|
376 | | -# push @edits, "12,13|Edits<br>12: 3 of 4 Wikinews monthly edits on Serbian Wikinews: 36k, English 5k, German/French 2k each<br>" .
|
377 | | -# "Most Serbian Wikinews edits by (overactive?) weather bot that updates temp/wind speed every few seconds.<br>" .
|
378 | | -# "#13: Average monthly manual edits by registered users for all Wikipedia's combined, in millions<br>" .
|
379 | | -# " #2006 ⇒ 2010: 7.7 ⇒ 9.9 ⇒ 11.5 ⇒ 12.4 ⇒ 12.7" ;
|
380 | | -# push @very_active_editors, "14|Very Active Editors<br>14: Ukrain +84% (61->112), Indonesian +180% (22->62) (during contest), Swedish -13% (141->122)." ;
|
381 | | -# }
|
382 | | -
|
383 | | -
|
384 | | -# if ($2010_04)
|
385 | | -# {
|
386 | | -# @visitors = qw ( 374,846,000 m 17.1 1.1 %) ; # Unique Visitors by Region
|
387 | | -# @page_requests = qw (11,724,000,000 b +7.4 -0.1 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
|
388 | | -# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
|
389 | | -# @commons_files = qw ( 6,564,544 m 52.2 3.3 %) ; # Binaries per month - Absolute
|
390 | | -# @article_count = qw ( 32,410,992 m 31.9 2.3 %) ; # Article count (official) - Absolute
|
391 | | -# @new_articles = qw ( 8,638 k 11.2 12.9 %) ; # New articles per day - Absolute
|
392 | | -# @edits = qw ( 12,119,403 m 11.6 0.0 %) ; # Edits per month - Absolute
|
393 | | -# @new_editors = qw ( 18,761 k -8.2 -8.1 %) ; # New editors - Absolute
|
394 | | -# @active_editors = qw ( 102,689 k 1.7 -1.8 %) ; # Active editors - Absolute
|
395 | | -# @very_active_editors = qw ( 13,124 k 3.4 -1.9 %) ; # Very active editors - Absolute
|
396 | | -# @reach = qw ( 30.4 x 1.5 0.0 %) ; # Reach Percentage by Region
|
397 | | -# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
|
398 | | -# push @visitors, "1,2|Unique Visitors<br>1: 375M UV's beats last month's record with 4M or 1.1 % (matches overall internet growth).<br>" .
|
399 | | -# "2: Wikimedia projects reach 30.4 % of internet population, which is best reach for last year<br>" .
|
400 | | -# " (avg 28.3%, low 25.7% in July 09). Reach in Asia lags behind other regions (15.9%)" ;
|
401 | | -# push @page_requests, "3,4|Page Requests<br>" .
|
402 | | -# "#3: Page requests are now normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" .
|
403 | | -# "4: Traffic to mobile site not included. Expect this next month." ;
|
404 | | -# push @rank, "5|Site Rank<br>#5: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
|
405 | | -# push @commons_files, "6|Commons Files<br>6: Fastest relative growth: tiff images (723%), ogg vorbis video (446%)." ;
|
406 | | -# push @article_count, "7,8|Article Count<br>7: Serbian Wikinews: 5k->36k in a year, compare English Wikinews: 15k->17k<br>" .
|
407 | | -# "8: Seven Wiktionaries in top 25 Wikimedia projects" ;
|
408 | | -# push @new_articles, "9,10|New Articles Per Day<br>9: All wikinews project combined +240% (39->133 p/d), see below Serbian Wikinews<br>" .
|
409 | | -# "10:<a href='http://stats.wikimedia.org/EN/TablesWikipediaWAR.htm'>Waray-Waray Wikipedia</a> 2nd fastest grower with +610 mostly <a href='http://war.wikipedia.org/wiki/Obyce'>geo stubs</a> p/day by <a href='http://en.wikipedia.org/wiki/User:JinJian'>JinJian</a>" ;
|
410 | | -# push @edits, "11,12|Edits<br>11: 3 of 4 Wikinews monthly edits on Serbian Wikinews: 36k, English 5k, German/French 2k each<br>" .
|
411 | | -# "All Serbian Wikinews edits by weather bot that updates temp/wind speed every few seconds.<br>" .
|
412 | | -# "30 June 2010: report filed for <a href='http://en.wikinews.org/wiki/Wikinews:Admin_action_alerts'>runaway bot</a><br>" .
|
413 | | -# "#12: Average monthly manual edits by registered users for all Wikipedia's combined, in millions<br>" .
|
414 | | -# " #2006 ⇒ 2010: 7.7 ⇒ 9.9 ⇒ 11.5 ⇒ 12.4 ⇒ 12.7" ;
|
415 | | -# push @very_active_editors, "13|Very Active Editors<br>13: Ukrain +84% (61->112), Indonesian +180% (22->62) (during contest), Swedish -13% (141->122)." ;
|
416 | | -# }
|
417 | | -
|
418 | | -# if ($2010_03)
|
419 | | -# {
|
420 | | -# @visitors = qw ( 370,744,000 m 13.3 7.4 %) ; # Unique Visitors by Region
|
421 | | -# @page_requests = qw (11,730,000,000 b +0.3 0.0 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
|
422 | | -# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
|
423 | | -# @commons_files = qw ( 6,209,569 m 58.3 2.6 %) ; # Binaries per month - Absolute
|
424 | | -# @article_count = qw ( 30,349,860 m 34.0 1.9 %) ; # Article count (official) - Absolute
|
425 | | -# @new_articles = qw ( 7,567 k -5.7 -0.4 %) ; # New articles per day - Absolute
|
426 | | -# @edits = qw ( 11,462,106 m 7.1 -3.2 %) ; # Edits per month - Absolute
|
427 | | -# @new_editors = qw ( 18,362 k -11.5 -10.8 %) ; # New editors - Absolute
|
428 | | -# @active_editors = qw ( 101,730 k 1.5 -4.6 %) ; # Active editors - Absolute
|
429 | | -# @very_active_editors = qw ( 12,983 k 5.6 -5.4 %) ; # Very active editors - Absolute
|
430 | | -# @reach = qw ( 30.4 x 0.5 1.7 %) ; # Reach Percentage by Region
|
431 | | -# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
|
432 | | -# push @visitors, "1,2|Unique Visitors<br>1: March has 3 more (11%) more days than February<br>" .
|
433 | | -# " This will explain much of apparently large monthly growth in visitors<br>" .
|
434 | | -# "2: All regions same of more unique visitors than year ago. North Am. +25%, Latin Am. + 27%" ;
|
435 | | -# push @page_requests, "3|Page Requests<br>" .
|
436 | | -# "3: Page requests are now normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" .
|
437 | | -# " This way monthly changes are more meaningful<br>" .
|
438 | | -# " Difference with not normalized data is mainly visible in Jan⇒Feb and Feb⇒Mar" ;
|
439 | | -# push @rank, "4|Site Rank<br>#4: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
|
440 | | -# push @commons_files, "5|Commons Files<br>#5: Fastest riser (relatively speaking): ogg vorbis video, djvu (for scanned docs) also booming." ;
|
441 | | -# push @article_count, "6|Article Count<br>#6: 60% growth in Commons files in one year, English and French wiktionaries +36% through bots." ;
|
442 | | -# push @edits, "7|Edits<br>#7: Average monthly manual edits by registered users for all Wikipedia's combined, in millions<br>" .
|
443 | | -# " #2006 ⇒ 2010: 7.7 ⇒ 9.9 ⇒ 11.5 ⇒ 12.4 ⇒ 12.7" ;
|
444 | | -# push @new_editors, "9|New Editors<br>#9: Most mature Wikipedia's see least growth in editors. Largest influx: Russian / Commons<p>" .
|
445 | | -# push @active_editors, "10|Active Editors<br>10: Russian editor base still growing steeply: +30% editors in one year." ;
|
446 | | -# }
|
447 | | -
|
448 | | -# if ($2010_02)
|
449 | | -# {
|
450 | | -# @visitors = qw ( 345,218,000 m 14.8 -5.3 %) ; # Unique Visitors by Region
|
451 | | -# @page_requests = qw (11,081,000,000 b +5.8 0.0 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
|
452 | | -# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
|
453 | | -# @commons_files = qw ( 6,209,569 m 58.3 2.6 %) ; # Binaries per month - Absolute
|
454 | | -# @article_count = qw ( 30,349,860 m 34.0 1.9 %) ; # Article count (official) - Absolute
|
455 | | -# @new_articles = qw ( 7,567 k -5.7 -0.4 %) ; # New articles per day - Absolute
|
456 | | -# @edits = qw ( 11,462,106 m 7.1 -3.2 %) ; # Edits per month - Absolute
|
457 | | -# @new_editors = qw ( 18,362 k -11.5 -10.8 %) ; # New editors - Absolute
|
458 | | -# @active_editors = qw ( 101,730 k 1.5 -4.6 %) ; # Active editors - Absolute
|
459 | | -# @very_active_editors = qw ( 12,983 k 5.6 -5.4 %) ; # Very active editors - Absolute
|
460 | | -# @reach = qw ( 28.7 x 0.8 -0.8 %) ; # Reach Percentage by Region
|
461 | | -# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
|
462 | | -# push @visitors, "1|Unique Visitors<br>1: comScore reassesses online population in their target segments twice a year (Feb & Aug)<br>" .
|
463 | | -# " This time estimate for Indonesia, Philippines and Vietnam was lowered by -54%,<br>" .
|
464 | | -# " resulting in a worldwide reassesment of online population of -4%" ;
|
465 | | -# push @page_requests, "2,3|Page Requests<br>" .
|
466 | | -# "2:Corrected for length of months Jan -> Feb increase was actually +11.0% !<br>" .
|
467 | | -# "3:Russia maintains its steep growth: +57% in last 12 months, +137% in preceding 12 months<br>" .
|
468 | | -# " Indonesia is 2nd, and speeding up: +46% in last 12 months, +34% before that<br>" .
|
469 | | -# "# German decline (-10%) is still atypical (caused by spike year ago after court decision)" ;
|
470 | | -# push @rank, "5|Site Rank<br>#5: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ;
|
471 | | -# push @commons_files, "6|Commons Files<br>#6: Fastest riser (relatively speaking): ogg vorbis video, djvu (for scanned docs) also booming." ;
|
472 | | -# push @article_count, "7|Article Count<br>#7: 60% growth in Commons files in one year, English and French wiktionaries +36% through bots." ;
|
473 | | -# push @edits, "8|Edits<br>8: Average monthly manual edits by registered users for all Wikipedia's combined, in millions<br>" .
|
474 | | -# " 2006 ⇒ 2010: 7.7 ⇒ 9.9 ⇒ 11.5 ⇒ 12.4 ⇒ 12.7" ;
|
475 | | -# }
|
476 | | -
|
477 | | -# if ($2009_??)
|
478 | | -# {
|
479 | | -# @visitors = qw ( 364,719,000 m 25.8 5.1 %) ; # Unique Visitors by Region
|
480 | | -# @page_requests = qw (11,054,000,000 b -3.1 6.4 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough)
|
481 | | -# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
|
482 | | -# @commons_files = qw ( 6,058,601 m 59.5 6.5 %) ; # Binaries per month - Absolute
|
483 | | -# @article_count = qw ( 29,742,993 m 34.7 2.4 %) ; # Article count (official) - Absolute
|
484 | | -# @new_articles = qw ( 7,626 k -1.1 3.4 %) ; # New articles per day - Absolute
|
485 | | -# @edits = qw ( 12,251,152 m 4.8 9.0 %) ; # Edits per month - Absolute
|
486 | | -# @new_editors = qw ( 19,279 k -12.4 5.6 %) ; # New editors - Absolute
|
487 | | -# @active_editors = qw ( 98,597 k -1.4 5.0 %) ; # Active editors - Absolute
|
488 | | -# @very_active_editors = qw ( 12,488 k -1.1 6.3 %) ; # Very active editors - Absolute
|
489 | | -# @reach = qw ( 29.0 x 1.0 1.0 %) ; # Reach Percentage by Region
|
490 | | -# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
|
491 | | -# push @visitors, "1,2|Unique Visitors<br>#1: Yearly growth in UV's (26%) exceeds growth of total internet (21%).<br>" .
|
492 | | -# "2: Large monthly shifts in UV/Reach in 3rd world explained by comScore as seasonal influences:<br> school vacations, and large festivals, religious (e.g. Ramadan) or otherwise (e.g. Carnival)." ;
|
493 | | -# push @page_requests, "3,4|Page Requests<br>3:<b> Trends measured by comScore and internal measurements diverge somewhat.</b><br> <b>Possible causes are under investigation.</b><p>" .
|
494 | | -# "4:Fastest rising large Wikipedia's in last 12 months:<br>" .
|
495 | | -# " Vietnamese (87%), Ukranian (65%), Russian (45%), Indonesian (39%), Chinese (28%), Thai (23%)<br>" .
|
496 | | -# " German decline (-32%) is atypical (caused by short massive spike year ago due after court decision)" ;
|
497 | | -# push @rank, "5|Site Rank<br>#5: 5th position will be stable for long time: differences with 4th and 6th ranked properties are considerable." ;
|
498 | | -# push @commons_files, "6|Commons Files<br>#6: Fastest riser (relatively speaking): ogg vorbis video, djvu (for scanned docs) also booming." ;
|
499 | | -# push @article_count, "7|Article Count<br>#7: 60% growth in Commons files in one year. Wiktionaries exploding through bots." ;
|
500 | | -# push @edits, "8|Edits<br>#8: <a href='http://stats.wikimedia.org/EN/TablesWikipediaZZ.htm'>#Monthly edits for all Wikipedia's combined</a># remarkably stable between 10 and 12 million<br># for 3 years now (as is the case for active and very active editors)" ;
|
501 | | -# push @new_editors, "9|New Editors<br>#9: Most mature Wikipedia's see least growth in editors. Largest influx: Russian / Commons<p>" .
|
502 | | -# "Experiment: logarithmic chart now uses two scales for widely divergent values.<br>This helps to remove clutter, but may need some getting used to." ;
|
503 | | -
|
504 | | -# push @active_editors, "10|Active Editors<br>10: Russian editor base still growing steeply: +30% editors in one year." ;
|
505 | | -# }
|
506 | | -
|
507 | | -# if ($2009_??)
|
508 | | -# {
|
509 | | -# @visitors = qw ( 347,019,000 m 27.1 0.4 %) ; # Unique Visitors by Region
|
510 | | -# @page_requests = qw (10,389,000,000 b 0.0 -9.2 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma)
|
511 | | -# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors
|
512 | | -# @commons_files = qw ( 5,695,283 m 55.1 2.6 %) ; # Binaries per month - Absolute
|
513 | | -# @article_count = qw ( 29,016,248 m 34.3 2.1 %) ; # Article count (official) - Absolute
|
514 | | -# @new_articles = qw ( 7,457 k 7.7 2.6 %) ; # New articles per day - Absolute
|
515 | | -# @edits = qw ( 10,791,575 m 0.6 0.4 %) ; # Edits per month - Absolute
|
516 | | -# @new_editors = qw ( 18,597 k -6.3 -2.4 %) ; # New editors - Absolute
|
517 | | -# @active_editors = qw ( 95,849 k 3.8 -0.4 %) ; # Active editors - Absolute
|
518 | | -# @very_active_editors = qw ( 11,764 k 0.4 -0.5 %) ; # Very active editors - Absolute
|
519 | | -# @reach = qw ( 28.7 x 1.6 -0.0 %) ; # Reach Percentage by Region
|
520 | | -# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
|
521 | | -# push @visitors, "1,2|Unique Visitors<br>1: Yearly growth in UV's (27%) exceeds growth of total internet (21%).<br>" .
|
522 | | -# "2: Conversation with comScore on huge monthly shifts in UV/Reach in 3rd world continues." ;
|
523 | | -# push @page_requests, "3|Page Requests<br>3: Same as last year: dip in page requests (but spike in image requests)." ;
|
524 | | -# push @rank, "4|Site Rank<br>4: 5th position will be stable for long time: 4th has 35% more UV's, 6th 23% less." ;
|
525 | | -# push @commons_files, "5|Commons Files<br>5: Fastest riser (relatively speaking): ogg vorbis video, djvu (for scanned docs) also booming." ;
|
526 | | -# push @article_count, "6|Article Count<br>6: 60% growth in Commons files in one year. Wiktionaries exploding through bots." ;
|
527 | | -# push @new_articles, "7|New Articles<br>7: Russian consistently fast riser, Ukranian growth 40% of previous months" ;
|
528 | | -# push @edits, "8|Edits<br>8: <a href='http://stats.wikimedia.org/EN/TablesWikipediaZZ.htm'>Monthly edits for all Wikipedia's combined</a> remarkably stable between 10 and 12 million<br>for 3 years now (as is the case for active and very active editors)" ;
|
529 | | -# push @new_editors, "9|New Editors<br>9: Most mature Wikipedia's see least growth in editors. Largest influx: Russian / Commons" ;
|
530 | | -# }
|
531 | | -
|
532 | | -# if ($2009_10)
|
533 | | -# {
|
534 | | -# @visitors = qw ( 345,805,000 m 23.1 0.4 %) ; # Unique Visitors by Region
|
535 | | -# @page_requests = qw (11,257,000,000 b 7.7 -2.8 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma)
|
536 | | -# @rank = qw ( 5th x -1 0 th) ; # Web Properties - Unique Visitors
|
537 | | -# @commons_files = qw ( 5,558,644 m 59.7 3.4 %) ; # Binaries per month - Absolute
|
538 | | -# @article_count = qw ( 28,506,011 m 35.4 2.5 %) ; # Article count (official) - Absolute
|
539 | | -# @new_articles = qw ( 7,357 k 2.1 -6.1 %) ; # New articles per day - Absolute
|
540 | | -# @edits = qw ( 10,772,957 m 2.8 -3.4 %) ; # Edits per month - Absolute
|
541 | | -# @new_editors = qw ( 18,779 k -5.2 -4.5 %) ; # New editors - Absolute
|
542 | | -# @active_editors = qw ( 96,521 k 4.0 0.1 %) ; # Active editors - Absolute
|
543 | | -# @very_active_editors = qw ( 11,726 k 2.7 -3.4 %) ; # Very active editors - Absolute
|
544 | | -
|
545 | | -# @reach = qw ( 28.7 x 0.5 -0.3 %) ; # Reach Percentage by Region
|
546 | | - # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm
|
547 | | -# push @visitors, "1|1: asked comScore to explain huge shifts in UV/Reach in Middle East-Africa." ;
|
548 | | -# push @page_requests, "2|2: Capacity problems may have played a role. New servers ordered." ;
|
549 | | -# push @new_articles, "2,3|3: Ukranian Wikipedia fastest riser (compare edits for Russian)" ;
|
550 | | -# push @edits, "4|4: Russian Wikipedia fastest riser (compare new articles for Ukrain)" ;
|
551 | | -# push @very_active_editors, "2" ;
|
552 | | -# }
|
553 | | -
|
554 | | -# if ($2009_10)
|
555 | | -# {
|
556 | | -# @new_editors = qw ( 19,002 k -8.9 3.2 %) ;
|
557 | | -# @active_editors = qw ( 97,132 k 1.9 3.4 %) ;
|
558 | | -# @very_active_editors = qw ( 12,172 k 2.8 1.2 %) ;
|
559 | | -# @article_count = qw ( 27,852,471 m 35.6 2.8 %) ;
|
560 | | -# @new_articles = qw ( 8,050 k 11.2 5.9 %) ;
|
561 | | -# @edits = qw ( 11,188,080 m -1.8 1.7 %) ;
|
562 | | -# @commons_files = qw ( 5,539,645 m 60.3 5.5 %) ;
|
563 | | -# @rank = qw ( 5th x -1 0 th) ;
|
564 | | -# @visitors = qw ( 344,563,000 m 24.3 5.7 %) ;
|
565 | | -# @reach = qw ( 29.0 x 0.8 1.3 %) ;
|
566 | | -# @page_requests = qw (11,586,000,000 b 8.8 1.9 %) ;
|
567 | | -# }
|
568 | | -
|
569 | | -# if ($2009_09)
|
570 | | -# {
|
571 | | -# @new_editors = qw ( 17,792 k -8.7 -9.6 %) ;
|
572 | | -# @active_editors = qw ( 94,565 k 2.3 -2.5 %) ;
|
573 | | -# @very_active_editors = qw ( 12,069 k 3.6 -2.5 %) ;
|
574 | | -# @article_count = qw ( 27,120,974 m 36.6 2.0 %) ;
|
575 | | -# @new_articles = qw ( 12,907 k -0.3 -11.4 %) ;
|
576 | | -# @edits = qw ( 12,578,009 m 8.8 -9.0 %) ;
|
577 | | -# @commons_files = qw ( 5,115,042 m 57.4 2.7 %) ;
|
578 | | -# @rank = qw ( 5th x 0 0 th) ;
|
579 | | -# @visitors = qw ( 325,998,000 m 19.8 6.0 %) ;
|
580 | | -# @reach = qw ( 27.6 x -1.4 4.5 %) ;
|
581 | | -# @page_requests = qw (11,372,000,000 b 11.7 5.1 %) ;
|
582 | | -# }
|
583 | | -
|
584 | | -# if ($2009_08)
|
585 | | -# {
|
586 | | -# @new_editors = qw ( 17,998 k -9.4 -6.2 %) ;
|
587 | | -# @active_editors = qw ( 91,359 k 1.1 0.8 %) ;
|
588 | | -# @very_active_editors = qw ( 11,568 k 0.3 3.0 %) ;
|
589 | | -# @article_count = qw ( 21,143,943 m 29.9 2.0 %) ;
|
590 | | -# @new_articles = qw ( 13,174 k 8.1 11.4 %) ;
|
591 | | -# @edits = qw ( 12,807,952 m 8.4 4.8 %) ;
|
592 | | -# @commons_files = qw ( 4,996,023 m 60.2 3.6 %) ;
|
593 | | -# @rank = qw ( 5th x 0 0 th) ;
|
594 | | -# @visitors = qw ( 307,641,000 m 23.8 4.1 %) ;
|
595 | | -# @reach = qw ( 26.4 x 1.9 2.7 %) ;
|
596 | | -# @page_requests = qw (10,817,000,000 b 15.3 1.5 %) ;
|
597 | | -# }
|
598 | | -
|
599 | | -# if ($2009_07)
|
600 | | -# {
|
601 | | -# @new_editors = qw ( 18,916 k -8.5 -1 %) ;
|
602 | | -# @active_editors = qw ( 90,659 k -0.3 -0.6 %) ;
|
603 | | -# @very_active_editors = qw ( 11,242 k -2.4 -0.7 %) ;
|
604 | | -# @article_count = qw ( 20,768,108 m 30.2 0.8 %) ;
|
605 | | -# @new_articles = qw ( 11,888 k -18.9 -30.3 %) ;
|
606 | | -# @edits = qw ( 12,219,008 m 6.3 0.7 %) ;
|
607 | | -# @commons_files = qw ( 4,831,659 m 61.1 3.7 %) ;
|
608 | | -# @rank = qw ( 5th x 0 0 th) ;
|
609 | | -# @visitors = qw ( 295,848,000 m 20.9 -2.5 %) ;
|
610 | | -# @reach = qw ( 25.7 x 0 -3.7 %) ;
|
611 | | -# @page_requests = qw (10,700,000,000 b 12.9 -3.0 %) ;
|
612 | | -# }
|
613 | | -
|
614 | | - $synopsis = "Y: " . substr ($p_month_name,0,3) . ",$p_year_prev->$p_year k=thousand m=million b=billion\n" ;
|
615 | | - $synopsis .= "M: $p_year," . substr ($p_month_name_prev,0,3) . "->" . substr ($p_month_name,0,3) . " M=monthly D=daily T=Total\n\n" ;
|
616 | | -
|
617 | | - $synopsis .= &FormatSynopsisText ("M Unique Visitors, All Projects", "", @visitors) ;
|
618 | | - $synopsis .= &FormatSynopsisText ("M Page Views, All Projects", "", @page_requests) ;
|
619 | | - $synopsis .= &FormatSynopsisText (" Site Rank", "", @rank) ;
|
620 | | - $synopsis .= &FormatSynopsisText ("T Binary Files", "", @commons_files) ;
|
621 | | - $synopsis .= &FormatSynopsisText ("M Wikipedia Article Count", "", @article_count) ;
|
622 | | - $synopsis .= &FormatSynopsisText ("D New Wikipedia Articles", "", @new_articles) ;
|
623 | | - $synopsis .= &FormatSynopsisText ("M Wikipedia Edits per Month", "", @edits) ;
|
624 | | - $synopsis .= &FormatSynopsisText ("M New Wikipedia Editors", "", @new_editors) ;
|
625 | | - $synopsis .= &FormatSynopsisText ("M Active Wikipedia Editors", "", @active_editors) ;
|
626 | | - $synopsis .= &FormatSynopsisText ("M Very Active Wikipedia Ed.", "", @very_active_editors) ;
|
627 | | -
|
628 | | - print "\n\n$synopsis" ;
|
629 | | - print "\n"."="x80 . "\n\n" ;
|
630 | | -
|
631 | | - @visitors_ = @visitors ;
|
632 | | - @page_requests_ = @page_requests ;
|
633 | | - @rank_ = @rank ;
|
634 | | - @commons_files_ = @commons_files ;
|
635 | | - @article_count_ = @article_count ;
|
636 | | - @new_articles_ = @new_articles ;
|
637 | | - @edits_ = @edits ;
|
638 | | - @new_editors_ = @new_editors ;
|
639 | | - @active_editors_ = @active_editors ;
|
640 | | - @very_active_editors_ = @very_active_editors ;
|
641 | | - @reach_ = @reach ;
|
642 | | -
|
643 | | - $visitors [0] =~ s/,//g ;
|
644 | | - $new_editors [0] =~ s/,//g ;
|
645 | | - $active_editors [0] =~ s/,//g ;
|
646 | | - $very_active_editors [0] =~ s/,//g ;
|
647 | | - $article_count [0] =~ s/,//g ;
|
648 | | - $new_articles [0] =~ s/,//g ;
|
649 | | - $edits [0] =~ s/,//g ;
|
650 | | - $commons_files [0] =~ s/,//g ;
|
651 | | - $rank [0] =~ s/,//g ;
|
652 | | - $reach [0] =~ s/,//g ;
|
653 | | - $page_requests [0] =~ s/,//g ;
|
654 | | -
|
655 | | - $visitors [0] = sprintf ("%.0f",$visitors [0]/1000000) ;
|
656 | | - $article_count [0] = sprintf ("%.1f",$article_count [0]/1000000) ;
|
657 | | - $edits [0] = sprintf ("%.1f",$edits [0]/1000000) ;
|
658 | | - $commons_files [0] = sprintf ("%.1f",$commons_files [0]/1000000) ;
|
659 | | - $page_requests [0] = sprintf ("%.1f",$page_requests [0]/1000000000) ;
|
660 | | -
|
661 | | - $new_editors [0] =~ s/(\d\d\d)$/,$1/ ;
|
662 | | - $active_editors [0] =~ s/(\d\d\d)$/,$1/ ;
|
663 | | - $very_active_editors [0] =~ s/(\d\d\d)$/,$1/ ;
|
664 | | - $new_articles [0] =~ s/(\d\d\d)$/,$1/ ;
|
665 | | -
|
666 | | - $visitors [2] = sprintf ("%.1f", $visitors [2]) ;
|
667 | | - $visitors [3] = sprintf ("%.1f", $visitors [3]) ;
|
668 | | - $visitors [5] =~ ($visitors [2] >= 0) ? 'A' : 'E' ;
|
669 | | - $visitors [6] =~ ($visitors [3] >= 0) ? 'A' : 'E' ;
|
670 | | -
|
671 | | - $page_requests [2] = sprintf ("%.1f", $page_requests [2]) ;
|
672 | | - $page_requests [3] = sprintf ("%.1f", $page_requests [3]) ;
|
673 | | - $new_editors [2] = sprintf ("%.1f", $new_editors [2]) ;
|
674 | | - $new_editors [3] = sprintf ("%.1f", $new_editors [3]) ;
|
675 | | -# $active_editors [2] = sprintf ("%.1f", $active_editors [2]) ;
|
676 | | -# $active_editors [3] = sprintf ("%.1f", $active_editors [3]) ;
|
677 | | - $very_active_editors [2] = sprintf ("%.1f", $very_active_editors [2]) ;
|
678 | | - $very_active_editors [3] = sprintf ("%.1f", $very_active_editors [3]) ;
|
679 | | -# $article_count [2] = sprintf ("%.1f", $article_count [2]) ;
|
680 | | -# $article_count [3] = sprintf ("%.1f", $article_count [3]) ;
|
681 | | - $new_articles [2] = sprintf ("%.1f", $new_articles [2]) ;
|
682 | | - $new_articles [3] = sprintf ("%.1f", $new_articles [3]) ;
|
683 | | - $edits [2] = sprintf ("%.1f", $edits [2]) ;
|
684 | | - $edits [3] = sprintf ("%.1f", $edits [3]) ;
|
685 | | - $commons_files [2] = sprintf ("%.1f", $commons_files [2]) ;
|
686 | | - $commons_files [3] = sprintf ("%.1f", $commons_files [3]) ;
|
687 | | - $rank [2] = sprintf ("%.0f", $rank [2]) ;
|
688 | | - $rank [3] = sprintf ("%.0f", $rank [3]) ;
|
689 | | - $reach [2] = sprintf ("%.1f", $reach [2]) ;
|
690 | | - $reach [3] = sprintf ("%.1f", $reach [3]) ;
|
691 | | - $page_requests [2] = sprintf ("%.1f", $page_requests [2]) ;
|
692 | | - $page_requests [3] = sprintf ("%.1f", $page_requests [3]) ;
|
693 | | -
|
694 | | - for ($i = 0 ; $i <= 3 ; $i++)
|
695 | | - {
|
696 | | - $visitors [$i] = '...' if $visitors_ [$i] eq '?' ;
|
697 | | - $page_requests [$i] = '...' if $page_requests_ [$i] eq '?' ;
|
698 | | - $rank [$i] = '...' if $rank_ [$i] eq '?' ;
|
699 | | - $commons_files [$i] = '...' if $commons_files_ [$i] eq '?' ;
|
700 | | - $article_count [$i] = '...' if $article_count_ [$i] eq '?' ;
|
701 | | - $new_articles [$i] = '...' if $new_articles_ [$i] eq '?' ;
|
702 | | - $edits [$i] = '...' if $edits_ [$i] eq '?' ;
|
703 | | - $new_editors [$i] = '...' if $new_editors_ [$i] eq '?' ;
|
704 | | - $active_editors [$i] = '...' if $active_editors_ [$i] eq '?' ;
|
705 | | - $very_active_editors [$i] = '...' if $very_active_editors_ [$i] eq '?' ;
|
706 | | - $reach [$i] = '...' if $reach_ [$i] eq '?' ;
|
707 | | - }
|
708 | | -
|
709 | | - $path_input = "W:/@ Report Card/Input/" ;
|
710 | | - $path_public = "W:/@ Report Card/Public/" ;
|
711 | | - $path_private = "W:/@ Report Card/Extended/" ; # few more charts with top 10 web properties based on data from comScore (slightly confidential)
|
712 | | -
|
713 | | - &WriteReports ($path_input, $path_public, $public) ;
|
714 | | - &WriteReports ($path_input, $path_private, $private) ;
|
715 | | -
|
716 | | - print "\nReady\n\n" ;
|
717 | | - exit ;
|
718 | | -
|
719 | | -sub WriteReports
|
720 | | -{
|
721 | | - $path_in = shift ;
|
722 | | - $path_out = shift ;
|
723 | | - $target_audience = shift ;
|
724 | | -
|
725 | | - &WriteSynopsis ($path_out) ;
|
726 | | -
|
727 | | - open TEMPLATE, '<', "RT_yyyy_mm.html" ;
|
728 | | - open DETAILS, '>', "$path_out/RC_${p_year}_${p_month_d2}_detailed.html" ;
|
729 | | - open SUMMARY, '>', "$path_out/RC_${p_year}_${p_month_d2}_summary.html" ;
|
730 | | - open COLUMNS, '>', "$path_out/RC_${p_year}_${p_month_d2}_columns.html" ;
|
731 | | -
|
732 | | -
|
733 | | - $write_details = $true ;
|
734 | | - $write_summary = $true ;
|
735 | | - $write_columns = $true ;
|
736 | | -
|
737 | | - $write_public = $true ;
|
738 | | - $write_private = $true ;
|
739 | | -
|
740 | | - $iscomment = $false ;
|
741 | | -
|
742 | | - while ($line = <TEMPLATE>)
|
743 | | - {
|
744 | | - chomp $line ;
|
745 | | -
|
746 | | - $line =~ s/<!--.*?-->// ;
|
747 | | -# if ($line =~ /<!--/)
|
748 | | -# {
|
749 | | -# $iscomment = $true ;
|
750 | | -# $line =~ s/<!--.*$// ;
|
751 | | -# }
|
752 | | -# if ($line =~ /-->/)
|
753 | | -# {
|
754 | | -# $iscomment = $false ;
|
755 | | -# $line =~ s/^.*?-->// ;
|
756 | | -# }
|
757 | | -# if ($iscomment)
|
758 | | -# { $line = "<!-- {{$line}} -->" ; }
|
759 | | -
|
760 | | - if ($line =~ /\{\{yyyy\}\}_\{\{mm[+-]1\}\}/)
|
761 | | - {
|
762 | | - if ($p_month == 1)
|
763 | | - { $line =~ s/\{\{yyyy\}\}_\{\{mm\-1\}\}/{{yyyy-1}}_{{mm-1}}/ ; } # Q&D temp fix
|
764 | | - if ($p_month == 12)
|
765 | | - { $line =~ s/\{\{yyyy\}\}_\{\{mm\+1\}\}/{{yyyy+1}}_{{mm+1}}/ ; } # Q&D temp fix
|
766 | | - }
|
767 | | -
|
768 | | - # $no_upd = "<font color=#800000>*<\/font>" ;
|
769 | | -
|
770 | | - if ($true) # test ?
|
771 | | - {
|
772 | | - # $no_upd = " <small><small><font color=#FF0000><b>chart could not be updated for current month</b></font></small></small>" ;
|
773 | | - $line =~ s/H2 (UNIQUE VISITORS)/A[$1] H2 {${visitors [0]} million|Unique Visitors, All Projects}/ ;
|
774 | | - $line =~ s/H2 (PAGE REQUESTS)/A[$1] H2 {${page_requests[0]} billion|Page Requests, All Projects}/ ;
|
775 | | - $line =~ s/H2 (WEB PROPERTIES)/A[$1] H2 {${rank[0]} in rank|Web Properties - Unique Visitors}/ ;
|
776 | | - $line =~ s/H2 (COMMONS FILES)/A[$1] H2 {${commons_files[0]} million|Binary Files $no_upd}/ ;
|
777 | | - $line =~ s/H2 (ARTICLE COUNT)/A[$1] H2 {${article_count[0]} million|Wikipedia Articles, Comparison with Other Projects $no_upd}/ ;
|
778 | | - $line =~ s/H2 (ARTICLES PER DAY)/A[$1] H2 {${new_articles[0]}|New Wikipedia Articles Per Day $no_upd}/ ;
|
779 | | - $line =~ s/H2 (EDITS PER MONTH)/A[$1] H2 {${edits[0]} million|Wikipedia Edits Per Month $no_upd}/ ;
|
780 | | - $line =~ s/H2 (NEW EDITORS PER MONTH)/A[$1] H2 {${new_editors[0]}|New Wikipedia Editors Per Month $no_upd}/ ;
|
781 | | - $line =~ s/H2 (ACTIVE EDITORS)/A[$1] H2 {${active_editors[0]}|Active Wikipedia Editors (5+ edits per month) $no_upd}/ ;
|
782 | | - $line =~ s/H2 (VERY ACTIVE EDITORS)/A[$1] H2 {${very_active_editors[0]}|Very Active Wikipedia Editors (100+ edits per month) $no_upd}/ ;
|
783 | | -
|
784 | | - $line =~ s/TRENDS UNIQUE VISITORS/TRENDS {$trend_one_year|${visitors[2]}%}{$trend_one_month|${visitors[3]}%}/ ;
|
785 | | - $line =~ s/TRENDS PAGE REQUESTS/TRENDS {$trend_one_year|${page_requests[2]}%}{$trend_one_month|${page_requests[3]}%}/ ;
|
786 | | - $line =~ s/TRENDS WEB PROPERTIES/TRENDS {$trend_one_year|${rank[2]}}{$trend_one_month|${rank[3]}}/ ;
|
787 | | - $line =~ s/TRENDS COMMONS FILES/TRENDS {$trend_one_year|${commons_files[2]}%}{$trend_one_month|${commons_files[3]}%}/ ;
|
788 | | - $line =~ s/TRENDS ARTICLE COUNT/TRENDS {$trend_one_year|${article_count[2]}%}{$trend_one_month|${article_count[3]}%}/ ;
|
789 | | - $line =~ s/TRENDS ARTICLES PER DAY/TRENDS {$trend_one_year|${new_articles[2]}%}{$trend_one_month|${new_articles[3]}%}/ ;
|
790 | | - $line =~ s/TRENDS EDITS PER MONTH/TRENDS {$trend_one_year|${edits[2]}%}{$trend_one_month|${edits[3]}%}/ ;
|
791 | | - $line =~ s/TRENDS NEW EDITORS PER MONTH/TRENDS {$trend_one_year|${new_editors[2]}%}{$trend_one_month|${new_editors[3]}%}/ ;
|
792 | | - $line =~ s/TRENDS ACTIVE EDITORS/TRENDS {$trend_one_year|${active_editors[2]}%}{$trend_one_month|${active_editors[3]}%}/ ;
|
793 | | - $line =~ s/TRENDS VERY ACTIVE EDITORS/TRENDS {$trend_one_year|${very_active_editors[2]}%}{$trend_one_month|${very_active_editors[3]}%}/ ;
|
794 | | -
|
795 | | - $line =~ s/{{yyyy}}/$p_year/g ;
|
796 | | - $line =~ s/{{yyyy\-1}}/$p_year_prev/g ;
|
797 | | - $line =~ s/{{yyyy\+1}}/$p_year_next/g ;
|
798 | | - $line =~ s/{{yyyy\+m2}}/$p_year_plus_m2/g ;
|
799 | | - $line =~ s/{{month}}/$p_month_name/g ;
|
800 | | - $line =~ s/{{month\-1}}/$p_month_name_prev/g ;
|
801 | | - $line =~ s/{{month\+1}}/$p_month_name_next/g ;
|
802 | | - $line =~ s/{{month\+2}}/$p_month_name_next2/g ;
|
803 | | -
|
804 | | - $line =~ s/{{y}}/$p_year_short/g ;
|
805 | | - $line =~ s/{{y\-1}}/$p_year_prev_short/g ;
|
806 | | - $line =~ s/{{yy}}/$p_year_short_d2/g ;
|
807 | | - $line =~ s/{{yy\-1}}/$p_year_prev_short_d2/g ;
|
808 | | -
|
809 | | - $line =~ s/{{m}}/$p_month/g ;
|
810 | | - $line =~ s/{{m\-1}}/$p_month_prev/g ;
|
811 | | - $line =~ s/{{mm}}/$p_month_d2/g ;
|
812 | | - $line =~ s/{{mm-1}}/$p_month_prev_d2/g ;
|
813 | | - $line =~ s/{{mm\+1}}/$p_month_next_d2/g ;
|
814 | | -
|
815 | | - $line =~ s/{{\(mm\/yy\)-1}}/$p_year_month_m1/g ;
|
816 | | - }
|
817 | | - else
|
818 | | - {
|
819 | | - $line =~ s/{{yyyy}}/[[yyyy]]/g ;
|
820 | | - $line =~ s/{{yyyy-1}}/[[yyyy-1]]/g ;
|
821 | | - $line =~ s/{{yyyy\+m2}}/[[yyyy\+m2]]/g ;
|
822 | | - $line =~ s/{{month}}/[[month]]/g ;
|
823 | | - $line =~ s/{{month-1}}/[[month-1]]/g ;
|
824 | | - $line =~ s/{{month\+1}}/[[month\+1]]/g ;
|
825 | | - $line =~ s/{{month\+2}}/[[month\+2]]/g ;
|
826 | | -
|
827 | | - $line =~ s/{{y}}/y/g ;
|
828 | | - $line =~ s/{{y-1}}/y-1/g ;
|
829 | | - $line =~ s/{{m}}/m/g ;
|
830 | | - $line =~ s/{{m-1}}/m-1/g ;
|
831 | | - $line =~ s/{{mm}}/mm/g ;
|
832 | | - $line =~ s/{{mm-1}}/mm-1/g ;
|
833 | | - $line =~ s/{{mm\+}}/mm+1/g ;
|
834 | | -
|
835 | | - $line =~ s/{{\(mm\/yy\)-1}}/(mm\/yy)-1/g ;
|
836 | | - }
|
837 | | -
|
838 | | - if ($line =~ /<!==\s*COMMENT\s*\{[^\}]*\}\s*==>/)
|
839 | | - {
|
840 | | - $comment = $line ;
|
841 | | - $comment =~ s/^.*?\{// ;
|
842 | | - $comment =~ s/\}.*$// ;
|
843 | | - $line = " <span class=comment>$comment</span\n" ;
|
844 | | - }
|
845 | | -
|
846 | | - if ($line =~ /<!==\s*H1\s*\{[^\}]*\}\s*==>/)
|
847 | | - {
|
848 | | - $title = $line ;
|
849 | | - $title =~ s/^.*?\{// ;
|
850 | | - $title =~ s/\}.*$// ;
|
851 | | - $line = " <tr>\n" .
|
852 | | - " <td class=h1 colspan=99><span class=h9>$title</span></td>\n" .
|
853 | | - " </tr>\n" .
|
854 | | - " <tr>\n" .
|
855 | | - " <td><small><small> </small></small></td>\n" .
|
856 | | - " </tr>\n" ;
|
857 | | - }
|
858 | | -
|
859 | | - if ($line =~ /<!==\s*A\[[^\]]*\] H2\s*\{[^\}]*\}\s*==>/)
|
860 | | - {
|
861 | | - ($anchor = $line) ;
|
862 | | - $anchor =~ s/^.*?A\[// ;
|
863 | | - $anchor =~ s/\].*$// ;
|
864 | | - $anchor =~ s/\s/_/g ;
|
865 | | - $anchor = lc($anchor) ;
|
866 | | -
|
867 | | - $parms = $line ;
|
868 | | - $parms =~ s/^.*?\{// ;
|
869 | | - $parms =~ s/\}.*$// ;
|
870 | | - ($metric,$title) = split ('\|', $parms,2) ;
|
871 | | - ($title2 = $title) =~ s/ /_/g ;
|
872 | | - $line = " <tr>\n" .
|
873 | | - " <td class=score><a id='$anchor' name='$anchor'></a><span class=bg>$metric</sup></span></td>\n" .
|
874 | | - " <td class=h2><span class=h2>$title</span><br></td>\n" .
|
875 | | - "</tr>\n" ;
|
876 | | - }
|
877 | | -
|
878 | | - if ($line =~ /<!==\s*TABS\s*\{[^\}]*\}\s*==>/)
|
879 | | - {
|
880 | | - $parms = $line ;
|
881 | | - $parms =~ s/^.*?\{// ;
|
882 | | - $parms =~ s/\}.*$// ;
|
883 | | - ($id,@texts) = split ('\|', $parms) ;
|
884 | | - $line = " <div id=\"container-" . ($id/10) . "\">\n" ;
|
885 | | - $line .= " <ul>\n" ;
|
886 | | - foreach $text (@texts)
|
887 | | - {
|
888 | | - $id++ ;
|
889 | | - $line .= " <li><a href=\"#fragment-$id\"><span>$text</span></a></li>\n" ;
|
890 | | - }
|
891 | | - $line .= " </ul>\n" ;
|
892 | | - $id_hi = $id ;
|
893 | | - }
|
894 | | -
|
895 | | - if ($line =~ /<!==\s*TAB\s*\{[^\}]*\}\s*==>/)
|
896 | | - {
|
897 | | - $parms = $line ;
|
898 | | - $parms =~ s/^.*?\{// ;
|
899 | | - $parms =~ s/\}.*$// ;
|
900 | | - ($id,$text) = split ('\|', $parms) ;
|
901 | | -
|
902 | | - if ($text =~ /^START/i)
|
903 | | - {
|
904 | | - $line = "\n <div id=\"fragment-$id\">\n" ;
|
905 | | - }
|
906 | | - elsif ($text =~ /^END/i)
|
907 | | - {
|
908 | | - if ($id == $id_hi)
|
909 | | - { $line = " </div>" ; }
|
910 | | - }
|
911 | | - else
|
912 | | - {
|
913 | | - $line = "\n <div id=\"fragment-$id\">\n $text\n </div>\n" ;
|
914 | | - if ($id == $id_hi)
|
915 | | - { $line .= " </div>" ; }
|
916 | | - }
|
917 | | - }
|
918 | | -
|
919 | | - if ($line =~ /<!==\s*TRENDS\s*\{[^\}]*\}\{[^\}]*\}\s*==>/)
|
920 | | - {
|
921 | | - $parms = $line ;
|
922 | | - $parms =~ s/^[^\{]*\{// ;
|
923 | | - $parms =~ s/\}[^\}]*$// ;
|
924 | | - ($trendY,$trendM) = split ('\}\s*\{', $parms,2) ;
|
925 | | -
|
926 | | - # ($colorY,$month1Y,$month2Y,$trendY) = split ('\|',$trendY) ;
|
927 | | - # ($colorM,$month1M,$month2M,$trendM) = split ('\|',$trendM) ;
|
928 | | - ($month1Y,$month2Y,$trendY) = split ('\|',$trendY) ;
|
929 | | - ($month1M,$month2M,$trendM) = split ('\|',$trendM) ;
|
930 | | - if ($trendY >= 0)
|
931 | | - { $colorY = "A" ; $trendY = "+$trendY" }
|
932 | | - else
|
933 | | - { $colorY = "E" ; }
|
934 | | - if ($trendM >= 0)
|
935 | | - { $colorM = "A" ; $trendM = "+$trendM" }
|
936 | | - else
|
937 | | - { $colorM = "E" ; }
|
938 | | -
|
939 | | -
|
940 | | - #<!== TRENDS {A|5/8|5/9|+12%}{A|4/9|5/9|+8%} ==>
|
941 | | - $line = " <td class=date>\n" .
|
942 | | - " <table border=0>\n" .
|
943 | | - " <tr>\n" .
|
944 | | - " <td class=date$colorY><b>Y</b> $month1Y⇒$month2Y</td>\n" .
|
945 | | - " <td class=date$colorY>$trendY</td>\n" .
|
946 | | - " </tr>\n" .
|
947 | | - " <tr>\n" .
|
948 | | - " <td class=date$colorM><b>M</b> $month1M⇒$month2M</td>\n" .
|
949 | | - " <td class=date$colorM>$trendM</td>\n" .
|
950 | | - " </tr>\n" .
|
951 | | - " </table>\n" .
|
952 | | - " </td>\n" ;
|
953 | | - }
|
954 | | -
|
955 | | - if ($line =~ /<!==\s*OUT\s*PUBLIC\s*==>/)
|
956 | | - {
|
957 | | - $write_public = $true ;
|
958 | | - $write_private = $false ;
|
959 | | - }
|
960 | | - elsif ($line =~ /<!==\s*OUT\s*EXTENDED\s*==>/)
|
961 | | - {
|
962 | | - $write_public = $false ;
|
963 | | - $write_private = $true ;
|
964 | | - }
|
965 | | - elsif ($line =~ /<!==\s*OUT\s*ALWAYS\s*==>/)
|
966 | | - {
|
967 | | - $write_public = $true ;
|
968 | | - $write_private = $true ;
|
969 | | - }
|
970 | | - elsif ($line =~ /<!==\s*OUT .*\s*==>/)
|
971 | | - {
|
972 | | - $line2 = $line ;
|
973 | | - $line2 =~ s/^.*<!==\s*OUT\s*// ;
|
974 | | - $line2 =~ s/\s*==>.*$// ;
|
975 | | - $write_details = $false ;
|
976 | | - $write_summary = $false ;
|
977 | | - $write_columns = $false ;
|
978 | | - if ($line2 =~ /C/)
|
979 | | - { $write_columns = $true ; }
|
980 | | - if ($line2 =~ /D/)
|
981 | | - { $write_details = $true ; }
|
982 | | - if ($line2 =~ /S/)
|
983 | | - { $write_summary = $true ; }
|
984 | | -
|
985 | | - &Print (COLUMNS, "$line\n") ;
|
986 | | - &Print (DETAILS, "$line\n") ;
|
987 | | - &Print (SUMMARY, "$line\n") ;
|
988 | | - next ;
|
989 | | - }
|
990 | | -
|
991 | | - if ($line =~ /<!==\s*INC .*\s*==>/)
|
992 | | - {
|
993 | | - $line2 = $line ;
|
994 | | - $line2 =~ s/^.*<!==\s*INC\s*// ;
|
995 | | - $line2 =~ s/\s*==>.*$// ;
|
996 | | -
|
997 | | - $file = "$path_in/$line2" ;
|
998 | | - print "\nInclude $file\n" ;
|
999 | | - if (! -e $file)
|
1000 | | - { &Abort ("File $file not found\n") ; }
|
1001 | | - open FILE, '<', $file ;
|
1002 | | - foreach $line (<FILE>)
|
1003 | | - {
|
1004 | | - if ($write_columns)
|
1005 | | - { &Print (COLUMNS, $line) ; }
|
1006 | | - if ($write_details)
|
1007 | | - { &Print (DETAILS, $line) ; }
|
1008 | | - if ($write_summary)
|
1009 | | - { &Print (SUMMARY, $line) ; }
|
1010 | | - }
|
1011 | | - next ;
|
1012 | | - }
|
1013 | | -
|
1014 | | - if ($write_columns)
|
1015 | | - { &Print (COLUMNS, "$line\n") ; }
|
1016 | | - elsif ($line =~ /-->/)
|
1017 | | - { &Print (COLUMNS, "<!-- $line\n") ; }
|
1018 | | - else
|
1019 | | - { &Print (COLUMNS, "<!-- $line -->\n") ; }
|
1020 | | -
|
1021 | | - if ($write_details)
|
1022 | | - { &Print (DETAILS, "$line\n") ; }
|
1023 | | - elsif ($line =~ /-->/)
|
1024 | | - { &Print (DETAILS, "<!-- $line\n") ; }
|
1025 | | - else
|
1026 | | - { &Print (DETAILS, "<!-- $line -->\n") ; }
|
1027 | | -
|
1028 | | - if ($write_summary)
|
1029 | | - { &Print (SUMMARY, "$line\n") ; }
|
1030 | | - elsif ($line =~ /-->/)
|
1031 | | - { &Print (SUMMARY, "<!-- $line\n") ; }
|
1032 | | - else
|
1033 | | - { &Print (SUMMARY, "<!-- $line -->\n") ; }
|
1034 | | - }
|
1035 | | -}
|
1036 | | -
|
1037 | | -sub Anchor
|
1038 | | -{
|
1039 | | - my $anchor = shift ;
|
1040 | | - $anchor =~ s/^\s*// ;
|
1041 | | - $anchor =~ s/\s*$// ;
|
1042 | | - $anchor =~ s/\s/_/g ;
|
1043 | | - return (lc ($anchor)) ;
|
1044 | | -}
|
1045 | | -
|
1046 | | -sub WriteSynopsis
|
1047 | | -{
|
1048 | | - my $path_out = shift ;
|
1049 | | -
|
1050 | | - $notice_synopsis = "" ;
|
1051 | | - # "<font color=#008000><b>New: multi-year trends for most metrics. Depending on history available reporting period can vary.</b></font>" ;
|
1052 | | -
|
1053 | | - open SYNOPSIS, '>', "$path_out/RC_${p_year}_${p_month_d2}_synopsis.txt" ;
|
1054 | | - print SYNOPSIS $synopsis ;
|
1055 | | - close SYNOPSIS ;
|
1056 | | -
|
1057 | | -# some day also get this code from RT_yyyy_mm.html, for uniformity
|
1058 | | -$synopsis = <<__SYNOPSIS__ ;
|
1059 | | -<html lang="en">
|
1060 | | -<head>
|
1061 | | -<title>Wikimedia Report Card Synopsis - {{month}} {{yyyy}}</title>
|
1062 | | -<meta http-equiv="content-type" content="text/html"; charset="iso-8859-1">
|
1063 | | -<meta http-equiv="Window-target" content="_top">
|
1064 | | -<meta name="language" content="en,English">
|
1065 | | -<meta name="robots" content="index,follow">
|
1066 | | -<link rel="shortcut icon" href="http://wikimediafoundation.org/favicon.ico" />
|
1067 | | -<link rel="apple-touch-icon" href="http://wikimediafoundation.org/favicon.ico" />
|
1068 | | -<script src="assets/jquery-1.1.3.1.pack.js" type="text/javascript"></script>
|
1069 | | -<script src="assets/jquery.history_remote.pack.js" type="text/javascript"></script>
|
1070 | | -<script src="assets/jquery.tabs.pack.js" type="text/javascript"></script>
|
1071 | | -<script src="assets/jquery.tablesorter.js" type="text/javascript"></script>
|
1072 | | -
|
1073 | | -<script type="text/javascript">
|
1074 | | -\$(function()
|
1075 | | -{
|
1076 | | - \$("#Synopsis").tablesorter();
|
1077 | | -})
|
1078 | | -</script>
|
1079 | | -
|
1080 | | -<script type="text/javascript">
|
1081 | | -\$(document).ready(
|
1082 | | -function()
|
1083 | | -{
|
1084 | | -\$("#Synopsis").tablesorter(sortList: [[0,0]] );
|
1085 | | -}
|
1086 | | -);
|
1087 | | -</script>
|
1088 | | -
|
1089 | | -<script type="text/javascript">
|
1090 | | -\$.tablesorter.addParser({
|
1091 | | - id: "nohtml",
|
1092 | | - is: function(s) { return false; },
|
1093 | | - format: function(s) { return s.replace(/<.*?>/g,"").replace(/ /g,""); },
|
1094 | | - type: "text"
|
1095 | | -});
|
1096 | | -\$.tablesorter.addParser({
|
1097 | | - id: "digitsonly",
|
1098 | | - is: function(s) { return false; },
|
1099 | | - format: function(s) { return $.tablesorter.formatFloat(s.replace(/<.*?>/g,"").replace(/ /g,"").replace(/,/g,"").replace(/-/,"-1")); },
|
1100 | | - type: "numeric"
|
1101 | | -});
|
1102 | | -</script>
|
1103 | | -
|
1104 | | -<style type="text/css">
|
1105 | | -/* tables */
|
1106 | | -table.tablesorter
|
1107 | | -{
|
1108 | | - font-family:arial;
|
1109 | | - background-color: #FFF; // #CDCDCD;
|
1110 | | - margin:10px 0pt 15px;
|
1111 | | - font-size: 7pt;
|
1112 | | - width: 80%;
|
1113 | | - text-align: left;
|
1114 | | -}
|
1115 | | -table.tablesorter thead tr th, table.tablesorter tfoot tr th
|
1116 | | -{
|
1117 | | - background-color: #AAB;
|
1118 | | - border: 1px solid #FFF;
|
1119 | | - font-size: 8pt;
|
1120 | | - padding: 4px;
|
1121 | | -}
|
1122 | | -table.tablesorter thead tr .header
|
1123 | | -{
|
1124 | | - background-image: url(assets/bg.gif);
|
1125 | | - background-repeat: no-repeat;
|
1126 | | - background-position: center right;
|
1127 | | - cursor: pointer;
|
1128 | | -}
|
1129 | | -table.tablesorter tbody td
|
1130 | | -{
|
1131 | | - color: #3D3D3D;
|
1132 | | - padding: 4px;
|
1133 | | - background-color: #FFF;
|
1134 | | - vertical-align: top;
|
1135 | | -}
|
1136 | | -table.tablesorter tbody tr.odd td
|
1137 | | -{ background-color:#F0F0F6; }
|
1138 | | -table.tablesorter thead tr .headerSortUp
|
1139 | | -{ background-image: url(assets/asc.gif); }
|
1140 | | -table.tablesorter thead tr .headerSortDown
|
1141 | | -{ background-image: url(assets/desc.gif); }
|
1142 | | -table.tablesorter thead tr .headerSortDown, table.tablesorter thead tr .headerSortUp
|
1143 | | -{ background-color: #BBF; //#8dbdd8; }
|
1144 | | -<!--
|
1145 | | -body {font-family:arial,sans-serif;background-color:#B0B0B0}
|
1146 | | -table,td,tr{background-color:#FFFFFF;font-size:11pt}
|
1147 | | -h1{font-size:22px}
|
1148 | | -h2{font-size:18px ; color:#006000 ; margin-top:40px}
|
1149 | | -h3{font-size:15px ; color:#006000}
|
1150 | | -form{margin:0}
|
1151 | | -a:link {color:#000080;text-decoration:none}
|
1152 | | -a:visited {color:#000080;text-decoration:none}
|
1153 | | -a:active {color:#000080;text-decoration:none}
|
1154 | | -a:hover {color:#0000FF;text-decoration:underline}
|
1155 | | -a img {border-color:black}
|
1156 | | -td.detail-left {font-size:12px ; color:#000000 ; text-align:left ; }
|
1157 | | -td.detail-center {font-size:12px ; color:#000000 ; text-align:center ; }
|
1158 | | -td.detail-right {font-size:12px ; color:#000000 ; text-align:right ; }
|
1159 | | -</style>
|
1160 | | -</head>
|
1161 | | -<body>
|
1162 | | -<table width=800 cellpadding=18 align=center>
|
1163 | | -<tr>
|
1164 | | - <td align='center'>
|
1165 | | -
|
1166 | | - <table width=95%>
|
1167 | | -
|
1168 | | - <tr>
|
1169 | | - <td width=100% colspan=99>
|
1170 | | - <table width=100%>
|
1171 | | - <tr>
|
1172 | | - <td align=left width=150 valign=top><img src='assets/WikimediaLogo.png' width=30></td>
|
1173 | | - <td align=center valign=top><h1>Wikimedia Report Card <font color=#008000>{{month}} {{yyyy}} </font></h1>
|
1174 | | - </td>
|
1175 | | - <td align=right width=150 valign=top><h1>Synopsis</h1></td>
|
1176 | | - <!-- <td align=right width=150 valign=top><small><small>Published<br>{{month+2}}<br>{{yyyy+m2}}</small></small></td> -->
|
1177 | | - </tr>
|
1178 | | - <tr>
|
1179 | | - <td align=left width=150 valign=top><!-- <small><a href='RC_{{yyyy}}_{{mm-1}}_synopsis.html'>⇐ {{month-1}}</a></small>--> </td>
|
1180 | | - <td align=center valign=top>
|
1181 | | - <small>⇒ <a href='RC_{{yyyy}}_{{mm}}_detailed.html'>Detailed version</a> ⇒ <a href='RC_{{yyyy}}_{{mm}}_summary.html'>Summary, 1 column</a> ⇒ <a href='RC_{{yyyy}}_{{mm}}_columns.html'>Summary, 2 columns</a></small>
|
1182 | | - </td>
|
1183 | | - <td align=right width=150 valign=top><!--<small><a href='RC_{{yyyy}}_{{mm+1}}_synopsis.html'>{{month+1}} ⇒</a></small>--></td>
|
1184 | | - </tr>
|
1185 | | - </table>
|
1186 | | - </td>
|
1187 | | - </tr>
|
1188 | | - <tr>
|
1189 | | - <td colspan=99>
|
1190 | | - <small>
|
1191 | | - <center>
|
1192 | | - $notice_synopsis
|
1193 | | - </center> <!-- General comment -->
|
1194 | | - </small>
|
1195 | | - </td>
|
1196 | | -</tr>
|
1197 | | -<tr><td colspan=99 align=center>
|
1198 | | -<table border=1 id='Synopsis' class=tablesorter>
|
1199 | | -<!-- <tr> -->
|
1200 | | -<!-- <td align='left' colspan=99> -->
|
1201 | | -<!-- <font color=#800000><b><small>No English Wikipedia dump was produced this month.<br>Without it some totals and trends are also meaningless and left blank.</small></b></font> -->
|
1202 | | -<!-- </td> -->
|
1203 | | -<!-- </tr> -->
|
1204 | | -DATA
|
1205 | | -</table>
|
1206 | | -</td></tr>
|
1207 | | - <tr>
|
1208 | | - <td colspan=99 align=center>
|
1209 | | - <hr class=thin>
|
1210 | | - <small><small><font color=808080>Author Erik Zachte - mail: ezachte@###.org (nospam: ###=wikimedia)</font></small></small>
|
1211 | | - </td>
|
1212 | | - </tr>
|
1213 | | - </table>
|
1214 | | -<script type='text/javascript'>
|
1215 | | -\$('#Synopsis').tablesorter({
|
1216 | | - // debug:true,
|
1217 | | - headers:{0:{sorter:'nohtml'},1:{sorter:false},2:{sorter:'digitsonly'},3:{sorter:'digitsonly'},4:{sorter:false}}
|
1218 | | -});
|
1219 | | -</script>
|
1220 | | -
|
1221 | | -</body>
|
1222 | | -</html>
|
1223 | | -__SYNOPSIS__
|
1224 | | -
|
1225 | | - undef @synopsis_notes ;
|
1226 | | -
|
1227 | | -# $data = "<tr><th>Unique Visitors</th></tr>\n" ;
|
1228 | | -# $synopsis = "Y: " . substr ($p_month_name,0,3) . ",$p_year_prev->$p_year k=thousand m=million b=billion\n" ;
|
1229 | | -# $synopsis .= "M: $p_year," . substr ($p_month_name_prev,0,3) . "->" . substr ($p_month_name,0,3) . " M=monthly D=daily T=Total\n\n" ;
|
1230 | | - $data = "<thead><tr><th class=detail-left valign=top> <b>Metric</b> </th>" .
|
1231 | | - "<th class=detail-center valign=top> <b>Now</b> <br>{{mm}}/{{yy}}</th>" .
|
1232 | | - "<th class=detail-center valign=top> <b>Yearly change</b> <br>{{mm}}/{{yy-1}} ⇒ {{mm}}/{{yy}}</th>" .
|
1233 | | - "<th class=detail-center valign=top> <b>Monthly change</b> <br>{{(mm/yy)-1}} ⇒ {{mm}}/{{yy}}</th>" .
|
1234 | | - "<th class=detail-center valign=top> <b>Notes</b> </th></tr></thead>\n<tbody>\n" ;
|
1235 | | -# $data .= "<tr><th> </th><th> </th><th> </th><th> </th><th> </th></tr></thead>" ;
|
1236 | | -
|
1237 | | -# $comment_prev_month = "<sup><font color=#800000>*</font></sup>" ; # qqq
|
1238 | | -
|
1239 | | - $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#unique_visitors'>Unique Visitors</a> <sup>All</sup>", "", @visitors) ;
|
1240 | | - $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#page_requests'>Page Requests</a> <sup>All</sup>", "", @page_requests) ;
|
1241 | | - $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#web_properties'>Site Rank</a> <sup>All</sup>", "", @rank) ;
|
1242 | | - $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#commons_files'>Binary Files</a> <sup>Commons</sup> $comment_prev_month", "", @commons_files) ;
|
1243 | | - $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#article_count'>Article Count</a> <sup>Wp</sup> $comment_prev_month", "", @article_count) ;
|
1244 | | - $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#articles_per_day'>New Articles Per Day</a> <sup>Wp</sup> $comment_prev_month", "", @new_articles) ;
|
1245 | | - $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#edits_per_month'>Edits</a> <sup>Wp</sup> $comment_prev_month", "", @edits) ;
|
1246 | | - $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#new_editors_per_month'>New Editors <sup>Wp</sup></a> $comment_prev_month", "", @new_editors) ;
|
1247 | | - $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#active_editors'>Active Editors</a> <sup>Wp</sup> $comment_prev_month", "", @active_editors) ;
|
1248 | | - $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#very_active_editors'>Very Active Editors</a> <sup>Wp</sup> $comment_prev_month", "", @very_active_editors) ;
|
1249 | | - $data .= "</tbody>\n<tfoot><tr><td colspan=99> </td></tr>\n" ;
|
1250 | | - $data .= "<tr><td colspan=99><b><small>Repeated observations below are grayed</small></b></td></tr>\n" ;
|
1251 | | -
|
1252 | | - foreach $note (@synopsis_notes)
|
1253 | | - {
|
1254 | | - $data .= "<tr><td class=detail-left colspan=99>$note</td></tr>" ;
|
1255 | | - }
|
1256 | | -# $data .= "<tr><td class=detail-left colspan=99><font color=#800000><small>* For German and Polish Wikipedias data for June were not yet available: reused counts from May</small></font></td></tr>" ;
|
1257 | | - $data .= "<tr><td class=detail-left colspan=99><font color=#808080><small>All = All projects, Wp = Wikipedia project / B = billion, M = million, k = thousand</small></font></td></tr></tfoot>" ;
|
1258 | | -
|
1259 | | - $synopsis =~ s/DATA/$data/ ;
|
1260 | | -
|
1261 | | - $synopsis =~ s/{{yyyy}}/$p_year/g ;
|
1262 | | - $synopsis =~ s/{{yyyy-1}}/$p_year_prev/g ;
|
1263 | | - $synopsis =~ s/{{yyyy\+m2}}/$p_year_plus_m2/g ;
|
1264 | | - $synopsis =~ s/{{month}}/$p_month_name/g ;
|
1265 | | - $synopsis =~ s/{{month-1}}/$p_month_name_prev/g ;
|
1266 | | - $synopsis =~ s/{{month\+1}}/$p_month_name_next/g ;
|
1267 | | - $synopsis =~ s/{{month\+2}}/$p_month_name_next2/g ;
|
1268 | | -
|
1269 | | - $synopsis =~ s/{{y}}/$p_year_short/g ;
|
1270 | | - $synopsis =~ s/{{y\-1}}/$p_year_prev_short/g ;
|
1271 | | - $synopsis =~ s/{{yy}}/$p_year_short_d2/g ;
|
1272 | | - $synopsis =~ s/{{yy\-1}}/$p_year_prev_short_d2/g ;
|
1273 | | - $synopsis =~ s/{{m}}/$p_month/g ;
|
1274 | | - $synopsis =~ s/{{m\-1}}/$p_month_prev/g ;
|
1275 | | - $synopsis =~ s/{{mm}}/$p_month_d2/g ;
|
1276 | | - $synopsis =~ s/{{mm-1}}/$p_month_prev_d2/g ;
|
1277 | | - $synopsis =~ s/{{mm\+1}}/$p_month_next_d2/g ;
|
1278 | | -
|
1279 | | - $synopsis =~ s/{{\(mm\/yy\)-1}}/$p_year_month_m1/g ;
|
1280 | | - open SYNOPSIS, '>', "$path_out/RC_${p_year}_${p_month_d2}_synopsis.html" ;
|
1281 | | - print SYNOPSIS $synopsis ;
|
1282 | | - close SYNOPSIS ;
|
1283 | | -}
|
1284 | | -
|
1285 | | -sub Print
|
1286 | | -{
|
1287 | | - $handle = shift ;
|
1288 | | - $text = shift ;
|
1289 | | -
|
1290 | | - if ((! $debug) && ($text !~ /\[if lte/)) # Q&D: keep MSIE directive
|
1291 | | - {
|
1292 | | - if ($text =~ /<!--/) # comments
|
1293 | | - { return ; }
|
1294 | | - if ($text =~ /<!==/) # template markup
|
1295 | | - { return ; }
|
1296 | | - }
|
1297 | | -
|
1298 | | - if (($target_audience == $public) && $write_public)
|
1299 | | - { print $handle $text ; }
|
1300 | | - if (($target_audience == $private) && $write_private)
|
1301 | | - { print $handle $text ; }
|
1302 | | -}
|
1303 | | -
|
1304 | | -sub FormatSynopsisText
|
1305 | | -{
|
1306 | | - $label = shift ;
|
1307 | | - $comment = shift ;
|
1308 | | - @metrics = @_ ;
|
1309 | | -
|
1310 | | - $metric = $metrics [0] ;
|
1311 | | - $size = $metrics [1] ;
|
1312 | | - $inc_y = $metrics [2] ; # yearly
|
1313 | | - $inc_m = $metrics [3] ; # monthly
|
1314 | | - $inc = $metrics [4] ; # perc ?
|
1315 | | -
|
1316 | | - $metric =~ s/,//g ;
|
1317 | | - if ($inc eq "th") # rank
|
1318 | | - {
|
1319 | | - $inc_y .= " " ;
|
1320 | | - $inc_m .= " " ;
|
1321 | | - $inc = " " ;
|
1322 | | - }
|
1323 | | - $size=~ s/[x]/ / ;
|
1324 | | -
|
1325 | | -
|
1326 | | - if ($inc_y !~ /-/) { $inc_y = '+' . $inc_y ; }
|
1327 | | - if ($inc_m !~ /-/) { $inc_m = '+' . $inc_m ; }
|
1328 | | - $inc_y = sprintf ("%5s", $inc_y) . $inc ;
|
1329 | | - $inc_m = sprintf ("%5s", $inc_m) . $inc ;
|
1330 | | -
|
1331 | | - if ($metric =~ /^\.+$/)
|
1332 | | - { ; }
|
1333 | | - elsif ($size eq "b")
|
1334 | | - { $metric = sprintf ("%.0f", $metric / 1000000000) ; }
|
1335 | | - elsif ($size eq "m")
|
1336 | | - { $metric = sprintf ("%.0f", $metric / 1000000) ; }
|
1337 | | - elsif ($size eq "k")
|
1338 | | - { $metric = sprintf ("%.0f", $metric / 1000) ; }
|
1339 | | - else
|
1340 | | - { $metric = sprintf ("%.0f", $metric) ; }
|
1341 | | -
|
1342 | | - my $text = sprintf ("%-20s", $label) . sprintf ("%8s", "$metric $size") ;
|
1343 | | - $text .= " (Y:$inc_y / M:$inc_m) $comment\n" ;
|
1344 | | - return $text ;
|
1345 | | -}
|
1346 | | -
|
1347 | | -sub FormatSynopsisTable
|
1348 | | -{
|
1349 | | - $label = shift ;
|
1350 | | - $comment = shift ;
|
1351 | | -
|
1352 | | - @metrics = @_ ;
|
1353 | | -
|
1354 | | - $metric = $metrics [0] ;
|
1355 | | - $size = $metrics [1] ;
|
1356 | | - $inc_y = $metrics [2] ; # yearly
|
1357 | | - $inc_m = $metrics [3] ; # monthly
|
1358 | | - $inc = $metrics [4] ; # perc ?
|
1359 | | - $notes = $metrics [5] ; # perc ?
|
1360 | | -
|
1361 | | - ($notes_ref,$notes) = split ('\|', $notes) ;
|
1362 | | - if ($notes ne "")
|
1363 | | - {
|
1364 | | - # text between '#' and first bracket (<>) will be grayed (repeated remarks)
|
1365 | | - $notes =~ s/#([^<>]+)/<font color=#808080>$1<\/font>/g ;
|
1366 | | - push @synopsis_notes, $notes ;
|
1367 | | - }
|
1368 | | -
|
1369 | | - $metric =~ s/,//g ;
|
1370 | | - if ($inc eq "th") # rank
|
1371 | | - {
|
1372 | | - $inc_y .= " " ;
|
1373 | | - $inc_m .= " " ;
|
1374 | | - $inc = " " ;
|
1375 | | - }
|
1376 | | - $size=~ s/[x]/ / ;
|
1377 | | -
|
1378 | | -
|
1379 | | - if ($inc_y !~ /-/) { $inc_y = '+' . $inc_y ; }
|
1380 | | - if ($inc_m !~ /-/) { $inc_m = '+' . $inc_m ; }
|
1381 | | - $inc_y = sprintf ("%5s", $inc_y) . $inc ;
|
1382 | | - $inc_m = sprintf ("%5s", $inc_m) . $inc ;
|
1383 | | -
|
1384 | | - if ($size eq "k")
|
1385 | | - { $metric = sprintf ("%.1f", $metric / 1000) ; }
|
1386 | | - elsif ($size eq "b")
|
1387 | | - { $size = "B" ; }
|
1388 | | - elsif ($size eq "m")
|
1389 | | - { $size = "M" ; }
|
1390 | | - elsif ($size eq "k")
|
1391 | | - { $size = "K" ; }
|
1392 | | - else
|
1393 | | - { $size = " " ; }
|
1394 | | -
|
1395 | | - if ($notes_ref eq "")
|
1396 | | - { $notes_ref = ' ' ; }
|
1397 | | -
|
1398 | | - $metric = "$metric $size" ;
|
1399 | | -
|
1400 | | - if (($metric =~ /\.\./) || ($metric =~ /^0\.0/)) { $metric = "<font color=#C0C0C0>$metric</font>" ; }
|
1401 | | - if (($metric =~ /\.\./) || ($metric =~ /^0\.0/)) { $metric = "<font color=#C0C0C0>$metric</font>" ; }
|
1402 | | - if (($inc_y =~ /\.\./) || ($inc_y =~ /^0\.0/)) { $inc_y = "<font color=#C0C0C0>$inc_y</font>" ; }
|
1403 | | - if (($inc_m =~ /\.\./) || ($inc_m =~ /^0\.0/)) { $inc_m = "<font color=#C0C0C0>$inc_m</font>" ; }
|
1404 | | -
|
1405 | | - my $text = "<tr><td class=detail-left>$label</td><td class=detail-right>$metric</td><td class=detail-right>$inc_y</td><td class=detail-right>$inc_m</td><td class=detail-right>$notes_ref</td></tr>\n" ;
|
1406 | | - return $text ;
|
1407 | | -}
|
1408 | | -
|
1409 | | -sub Abort
|
1410 | | -{
|
1411 | | - $msg = shift ;
|
1412 | | - chomp $msg ;
|
1413 | | - print "\n!!! Abort script: '$msg'\n" ;
|
1414 | | - exit ;
|
1415 | | -}
|
1416 | | -
|
| 2 | +#!/usr/local/bin/perl |
| 3 | + |
| 4 | + use lib "/home/ezachte/lib" ; |
| 5 | + use EzLib ; |
| 6 | + $trace_on_exit = $true ; |
| 7 | + |
| 8 | + use CGI::Carp qw(fatalsToBrowser); |
| 9 | + use Time::Local ; |
| 10 | + use Getopt::Std ; |
| 11 | + |
| 12 | + # !! adapt these for every run !! |
| 13 | + $p_year = 2010 ; |
| 14 | + $p_month = 12 ; |
| 15 | + |
| 16 | + $debug = $false ; |
| 17 | + |
| 18 | + $public = 0 ; |
| 19 | + $private = 1 ; |
| 20 | + |
| 21 | + $p_month_d2 = sprintf ("%02d", $p_month) ; |
| 22 | + |
| 23 | + @months = qw (January February March April May June July August September October November December) ; |
| 24 | + $p_month_prev = ($p_month > 1) ? $p_month - 1 : 12 ; |
| 25 | + $p_month_next = ($p_month < 12) ? $p_month + 1 : 1 ; |
| 26 | + $p_month_next2 = ($p_month < 11) ? $p_month + 2 : $p_month - 10 ; |
| 27 | + $p_year_plus_m2 = ($p_month < 11) ? $p_year : $p_year + 1 ; |
| 28 | + $p_month_prev_d2 = sprintf ("%02d", $p_month_prev) ; |
| 29 | + $p_month_next_d2 = sprintf ("%02d", $p_month_next) ; |
| 30 | + |
| 31 | + $p_year_prev = $p_year - 1 ; |
| 32 | + $p_year_next = $p_year + 1 ; |
| 33 | + $p_year_short = $p_year - 2000 ; |
| 34 | + $p_year_prev_short = $p_year_prev - 2000 ; |
| 35 | + $p_year_short_d2 = sprintf ("%02d", $p_year_short) ; |
| 36 | + $p_year_prev_short_d2 = sprintf ("%02d", $p_year_prev_short) ; |
| 37 | + |
| 38 | + $p_month_name = $months [$p_month -1] ; |
| 39 | + $p_month_name_prev = $months [$p_month_prev-1] ; |
| 40 | + $p_month_name_next = $months [$p_month_next-1] ; |
| 41 | + $p_month_name_next2 = $months [$p_month_next2-1] ; |
| 42 | + |
| 43 | + |
| 44 | + $trend_one_year = "{{m}}/{{y-1}}|{{m}}/{{y}}" ; |
| 45 | + |
| 46 | + if ($p_month == 1) |
| 47 | + { $trend_one_month = "12/{{y-1}}|1/{{y}}" ; } |
| 48 | + else |
| 49 | + { $trend_one_month = "{{m-1}}/{{y}}|{{m}}/{{y}}" ; } |
| 50 | + |
| 51 | + $p_year_month_m1 = ($p_month == 1) ? "$p_month_prev/$p_year_prev_short_d2" : "$p_month_prev/$p_year_short_d2" ; # m1 = minus 1 |
| 52 | + |
| 53 | + print "\$p_year $p_year\n" ; |
| 54 | + print "\$p_year_prev $p_year_prev\n" ; |
| 55 | + print "\$p_year_plus_m2 $p_year_plus_m2\n" ; |
| 56 | + print "\$p_year_short $p_year_short\n" ; |
| 57 | + print "\$p_year_prev_short $p_year_prev_short\n" ; |
| 58 | + print "\$p_year_short_d2 $p_year_short_d2\n" ; |
| 59 | + print "\$p_year_prev_short_d2 $p_year_prev_short_d2\n" ; |
| 60 | + print "\n" ; |
| 61 | + print "\$p_month $p_month\n" ; |
| 62 | + print "\$p_month_d2 $p_month_d2\n" ; |
| 63 | + print "\$p_month_next $p_month_next\n" ; |
| 64 | + print "\$p_month_prev $p_month_prev\n" ; |
| 65 | + print "\$p_month_next_d2 $p_month_next_d2\n" ; |
| 66 | + print "\$p_month_prev_d2 $p_month_prev_d2\n" ; |
| 67 | + print "\$p_month_name $p_month_name\n" ; |
| 68 | + print "\$p_month_name_prev $p_month_name_prev\n" ; |
| 69 | + print "\$p_month_name_next $p_month_name_next\n" ; |
| 70 | + print "\$p_month_name_next2 $p_month_name_next2\n" ; |
| 71 | + print "\$p_year_month_m1 $p_year_month_m1\n" ; |
| 72 | + |
| 73 | + |
| 74 | + # example output for synopsys.txt |
| 75 | + #STATISTICS |
| 76 | + |
| 77 | + #http://infodisiac.com/Wikimedia/ReportCard/EN/RC_2009_08_summary.html |
| 78 | + |
| 79 | + #Y: Jun, 2008->2009 k=thousand m=million b=billion |
| 80 | + #M: 2009, May->Jun |
| 81 | + |
| 82 | + #Unique Visitors 301 m (Y:+21% / M: -5%) |
| 83 | + #Page Requests 11 b (Y: +6% / M: -6%) |
| 84 | + #Site Rank 5th (Y: +0 / M: -1 ) |
| 85 | + #Commons Files 4.7 m (Y:+62% / M: +4%) ++ growth pdf/djvu files |
| 86 | + #Article Count 20.6 m (Y:+33% / M: +2%) |
| 87 | + #New Articles 17 k (Y: -9% / M: -6%) |
| 88 | + #New Editors 18 k (Y:+39% / M:+25%) wp:it in one year -50% |
| 89 | + #Active Editors 88 k (Y:+ 1% / M: -2%) wp:ru in one year +45% |
| 90 | + |
| 91 | + print "\n"."="x80 . "\n\n" ; |
| 92 | + |
| 93 | +# !! This is rather crummy Q&D way to collect variable data, data need to be externalized !! |
| 94 | + |
| 95 | +## if ($2010_12) |
| 96 | +## { |
| 97 | + @visitors = qw ( 395,472,000 m 14.0 -3.7 %) ; # Unique Visitors by Region |
| 98 | + @page_requests = qw (13,976,000,000 b 22.6 2.4 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough) |
| 99 | + @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors |
| 100 | + @commons_files = qw ( 8,046,377 m 43.1 3.0 %) ; # Binaries per month - Absolute |
| 101 | + @article_count = qw ( 17,616,951 m 20.0 1.5 %) ; # Starting Sep-2010 Wikipedia articles only / Article count (official) - Absolute |
| 102 | + @new_articles = qw ( 8,555 k 16.5 5.1 %) ; # New articles per day - Absolute |
| 103 | + @edits = qw ( 11,566,371 m 3.6 3.8 %) ; # Edits per month - Absolute |
| 104 | + @new_editors = qw ( 14,607 k -16.6 -2.5 %) ; # New editors - Absolute |
| 105 | + @active_editors = qw ( 79,324 k -5.9 -0.5 %) ; # Active editors - Absolute |
| 106 | + @very_active_editors = qw ( 10,254 k -1.6 0.1 %) ; # Very active editors - Absolute |
| 107 | + @reach = qw ( 31.1 x 1.5 -1.4 %) ; # Reach Percentage by Region |
| 108 | + # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm |
| 109 | + push @visitors, "1|Unique Visitors<br>1: Average for last 12 months 377M." ; |
| 110 | +# "2: Growth in UV count in last 12 months 18.8% (for whole internet 8.9%)." ; |
| 111 | +# " (avg 28.3%, low 25.7% in July 09). Reach in Asia lags behind other regions (15.9%)" ; |
| 112 | + push @page_requests, "2,3|Page Requests<br>" . |
| 113 | + "2: <a href='http://stats.wikimedia.org/EN/TablesPageViewsMonthlyMobile.htm'>Mobile traffic</a> in Dec: 4.1% of total Wikipedia traffic (556M/13489M)<br>" . |
| 114 | +# " Look ahead for page requests: Dec -> Jan = 13367M -> 14724M = +10.1%<br>" . |
| 115 | + "#3: Page requests have been normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" ; |
| 116 | + push @rank, "4|Site Rank<br>#4: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ; |
| 117 | + push @commons_files, "5|Commons Files<br>#5: Tiff uploads increased 5-fold in July 2010, 13-fold in last 12 months.<br>" ; |
| 118 | +# "#7: Commons consistently fastest growing project, 48% in last 12 months." ; |
| 119 | + |
| 120 | +#push @article_count, "8|Article Count<br>#8: From Sep 2010 this metric is for Wikipedia projects only. This prevents adding apples and oranges." ; |
| 121 | +# "9: Seven Wiktionaries in top 25 Wikimedia projects" ; |
| 122 | +# push @new_articles, "7|New Articles Per Day<br>" . |
| 123 | +# "7: Strong growth in August by peaks on 3 wikis: Catalan/Dutch 3-fold inc., Slovene 17-fold (bots?)." ; |
| 124 | + push @edits, "6|Edits<br>#6: Over the last 3 years there is fairly consistent growth in manual, registered edits.<br>" . |
| 125 | + "# Net growth in constructive edits is less clear, as this metric includes most reverting edits." ; |
| 126 | +# " Strong one-monthly dip in July due to World Cup Socker?." ; |
| 127 | +# "#13: Average monthly manual edits by registered users for all Wikipedia's combined, in millions<br>" . |
| 128 | +# " #2006 ⇒ 2010: 7.7 ⇒ 9.9 ⇒ 11.5 ⇒ 12.4 ⇒ 12.7" ; |
| 129 | + push @new_editors, "7|New Editors Per Day<br>" . |
| 130 | +# "10: Signifant decline in last month (All projects: -10.5%, <a href='http://stats.wikimedia.org/EN/ChartsWikipediaZZ.htm'>Wikipedias -11.2%</a>).<br>" . |
| 131 | +# " Arguably slowing influx of editors can partly be attributed to (multi-factorial) <a href='http://en.wikipedia.org/wiki/Market_saturation'>saturation process(es)</a><br>" . |
| 132 | +# " But 19% drop for Wikipedias in half year (comparing 3-monthly averages) is not consistent with that.<br>" . |
| 133 | + "#7:WMF recently commissioned in depth study of editor activity trends, which is ongoing." ; |
| 134 | + push @active_editors, "8|(Very) Active Editors<br>" . |
| 135 | +# "11: Recent drops are well within normal bandwidth, largest drop was in <a href='charts/2010-08/Monthly-Active-Users-Since-Jan-2006.png'>June 2006</a>.<br>" . |
| 136 | + "#8: Since a few months editors on Commons are no longer included in overall editor total,<br>" . |
| 137 | + "# on the assumption that most of these also edit on one or more other projects.<br>" ; |
| 138 | +# " #Detection of double counts between any projects and languages is planned for late 2010." ; |
| 139 | + push @very_active_editors, "8|" ; #Very Active Editors<br>14: Ukrain +84% (61->112), Indonesian +180% (22->62) (during contest), Swedish -13% (141->122)." ; |
| 140 | +## } |
| 141 | + |
| 142 | + |
| 143 | +# if ($2010_11) |
| 144 | +# { |
| 145 | +# @visitors = qw ( 410,816,000 m 18.8 0.6 %) ; # Unique Visitors by Region |
| 146 | +# @page_requests = qw (13,976,000,000 b 22.6 2.4 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough) |
| 147 | +# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors |
| 148 | +# @commons_files = qw ( ? m ? ? %) ; # Binaries per month - Absolute |
| 149 | +# @article_count = qw ( ? m ? ? %) ; # Starting Sep-2010 Wikipedia articles only / Article count (official) - Absolute |
| 150 | +# @new_articles = qw ( ? k ? ? %) ; # New articles per day - Absolute |
| 151 | +# @edits = qw ( ? m ? ? %) ; # Edits per month - Absolute |
| 152 | +# @new_editors = qw ( ? k ? ? %) ; # New editors - Absolute |
| 153 | +# @active_editors = qw ( ? k ? ? %) ; # Active editors - Absolute |
| 154 | +# @very_active_editors = qw ( ? k ? ? %) ; # Very active editors - Absolute |
| 155 | +# @reach = qw ( 31.1 x 2.6 0.0 %) ; # Reach Percentage by Region |
| 156 | +# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm |
| 157 | +# push @visitors, "1,2|Unique Visitors<br>1: 410M UV's exceeds Oct 2010 record with 2M. Average for last 12 months 377M.<br>" . |
| 158 | +# "2: Growth in UV count in last 12 months 18.8% (for whole internet 8.9%)." ; |
| 159 | +# push @page_requests, "3,4|Page Requests<br>" . |
| 160 | +# "3: <a href='http://stats.wikimedia.org/EN/TablesPageViewsMonthlyMobile.htm'>Mobile traffic</a> in Sep: 3.4% of total traffic (492M/14468M)<br>" . |
| 161 | +# "#4: Page requests have been normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" ; |
| 162 | +# push @rank, "3|Site Rank<br>#3: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ; |
| 163 | +# } |
| 164 | + |
| 165 | +# if ($2010_10) |
| 166 | +# { |
| 167 | +# @visitors = qw ( 408,350,000 m 18.5 2.6 %) ; # Unique Visitors by Region |
| 168 | +# @page_requests = qw ( ? b ? ? %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough) |
| 169 | +# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors |
| 170 | +# @commons_files = qw ( ? m ? ? %) ; # Binaries per month - Absolute |
| 171 | +# @article_count = qw ( ? m ? ? %) ; # Starting Sep-2010 Wikipedia articles only / Article count (official) - Absolute |
| 172 | +# @new_articles = qw ( ? k ? ? %) ; # New articles per day - Absolute |
| 173 | +# @edits = qw ( ? m ? ? %) ; # Edits per month - Absolute |
| 174 | +# @new_editors = qw ( ? k ? ? %) ; # New editors - Absolute |
| 175 | +# @active_editors = qw ( ? k ? ? %) ; # Active editors - Absolute |
| 176 | +# @very_active_editors = qw ( ? k ? ? %) ; # Very active editors - Absolute |
| 177 | +# @reach = qw ( 31.1 x 2.3 0.5 %) ; # Reach Percentage by Region |
| 178 | +# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm |
| 179 | +# push @visitors, "1,2|Unique Visitors<br>1: 408M UV's beats September 2010 record with 10M.<br>" . |
| 180 | +# "2: Growth in UV count in last 12 months 18.5% (for whole internet 9.6%)." ; |
| 181 | +# push @rank, "3|Site Rank<br>#3: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ; |
| 182 | +# } |
| 183 | + |
| 184 | + |
| 185 | +# if ($2010_09) |
| 186 | +# { |
| 187 | +# @visitors = qw ( 398,178,000 m 22.1 6.6 %) ; # Unique Visitors by Region |
| 188 | +# @page_requests = qw (13,671,000,000 b 20.2 5.4 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough) |
| 189 | +# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors |
| 190 | +# @commons_files = qw ( 7,491,824 m 48.2 2.8 %) ; # Binaries per month - Absolute |
| 191 | +# @article_count = qw ( 16,678,710 m 20.7 1.8 %) ; # Starting Sep-2010 Wikipedia articles only / Article count (official) - Absolute |
| 192 | +# @new_articles = qw ( 7,578 k 3.9 -18.9 %) ; # New articles per day - Absolute |
| 193 | +# @edits = qw ( 11,924,018 m 9.0 -3.3 %) ; # Edits per month - Absolute |
| 194 | +# @new_editors = qw ( 15,805 k -17.4 -10.5 %) ; # New editors - Absolute |
| 195 | +# @active_editors = qw ( 82,503 k -5.6 -3.3 %) ; # Active editors - Absolute |
| 196 | +# @very_active_editors = qw ( 11,011 k -2.5 -3.4 %) ; # Very active editors - Absolute |
| 197 | +# @reach = qw ( 30.8 x 3.2 1.8 %) ; # Reach Percentage by Region |
| 198 | +# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm |
| 199 | +# push @visitors, "1,2|Unique Visitors<br>1: 398M UV's beats May 2010 record with 9M or 2.4%.<br>" . |
| 200 | +# "2: Growth in UV count in last 12 months 22% (for whole internet 10%)." ; |
| 201 | +# push @page_requests, "3,4|Page Requests<br>" . |
| 202 | +# "3: <a href='http://stats.wikimedia.org/EN/TablesPageViewsMonthlyMobile.htm'>Mobile traffic</a> in Sep: 3.0% of total traffic (425M/14096M)<br>" . |
| 203 | +# # " Look ahead for page requests: Aug -> Sep = 13367M -> 14724M = +10.1%<br>" . |
| 204 | +# "#4: Page requests have been normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" ; |
| 205 | +# push @rank, "5|Site Rank<br>#5: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ; |
| 206 | +# push @commons_files, "6,7|Commons Files<br>#6: Tiff uploads increased 5-fold in July 2010, 18-fold in last 12 months.<br>" . |
| 207 | +# "7: Commons consistently fastest growing project, 48% in last 12 months." ; |
| 208 | + |
| 209 | +# push @article_count, "8|Article Count<br>8: From Sep 2010 this metric is for Wikipedia projects only. This prevents adding apples and oranges." ; |
| 210 | +# push @edits, "9|Edits<br>9: Over the last 3 years there is fairly consistent growth in manual, registered edits.<br>" . |
| 211 | +# " Net growth in constructive edits is less clear, as this metric includes most reverting edits." ; |
| 212 | +# " Strong one-monthly dip in July due to World Cup Socker?." ; |
| 213 | +# push @new_editors, "10|New Editors Per Day<br>" . |
| 214 | +# "10: Signifant decline in last month (All projects: -10.5%, <a href='http://stats.wikimedia.org/EN/ChartsWikipediaZZ.htm'>Wikipedias -11.2%</a>).<br>" . |
| 215 | +# " Arguably slowing influx of editors can partly be attributed to (multi-factorial) <a href='http://en.wikipedia.org/wiki/Market_saturation'>saturation process(es)</a><br>" . |
| 216 | +# " But 19% drop for Wikipedias in half year (comparing 3-monthly averages) is not consistent with that.<br>" . |
| 217 | +# " WMF recently commissioned in depth study of editor activity trends, which is ongoing." ; |
| 218 | +# push @active_editors, "11,12|(Very) Active Editors<br>" . |
| 219 | +# "11: Recent drops are well within normal bandwidth, largest drop was in <a href='charts/2010-08/Monthly-Active-Users-Since-Jan-2006.png'>June 2006</a>.<br>" . |
| 220 | +# "#12: Editors on Commons are no longer included in overall editor total,<br>" . |
| 221 | +# " #on the assumption that most of these also edit on one or more other projects.<br>" . |
| 222 | +# " #Detection of double counts between any projects and languages is planned for late 2010." ; |
| 223 | +# push @very_active_editors, "11,12|" ; #Very Active Editors<br>14: Ukrain +84% (61->112), Indonesian +180% (22->62) (during contest), Swedish -13% (141->122)." ; |
| 224 | +# } |
| 225 | + |
| 226 | +# if ($2010_08) |
| 227 | +# { |
| 228 | +# @visitors = qw ( 373,392,000 m 21.4 3.7 %) ; # Unique Visitors by Region |
| 229 | +# @page_requests = qw (13,367,000,000 b 23.9 -1 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough) |
| 230 | +# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors |
| 231 | +# @commons_files = qw ( 7,298,379 m 48.1 2.8 %) ; # Binaries per month - Absolute |
| 232 | +# @article_count = qw ( 34,963,360 m 30.0 2.4 %) ; # Article count (official) - Absolute |
| 233 | +# @new_articles = qw ( 9,437 k 22.4 25.7 %) ; # New articles per day - Absolute |
| 234 | +# @edits = qw ( 12,346,207 m 7.9 15.4 %) ; # Edits per month - Absolute |
| 235 | +# @new_editors = qw ( 17,026 k -17.3 -1.1 %) ; # New editors - Absolute |
| 236 | +# @active_editors = qw ( 85,643 k -5.2 2.1 %) ; # Active editors - Absolute |
| 237 | +# @very_active_editors = qw ( 11,419 k -1.6 5.0 %) ; # Very active editors - Absolute |
| 238 | +# @reach = qw ( 29.0 x 2.6 0.5 %) ; # Reach Percentage by Region |
| 239 | +# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm |
| 240 | +# push @page_requests, "1,2,3,4|Page Requests<br>" . |
| 241 | +# "1: <a href='http://stats.wikimedia.org/EN/TablesPageViewsMonthlyMobile.htm'>Mobile traffic</a> in Sep: 2.9% of total traffic (425M/14724M)<br>" . |
| 242 | +# " Look ahead for page requests: Aug -> Sep = 13367M -> 14724M = +10.1%<br>" . |
| 243 | +# "# Trend data for mobile will be added when more history is available.<br>" . |
| 244 | +# "#2: Due to server problems counts from squid logs for December 2009 - March 2010 are too low,<br>" . |
| 245 | +# "# estimated underreporting 10%-25%. Counts for April - July 2010 have been patched. Read <a href='http://infodisiac.com/blog/2010/07/wikimedia-page-views-some-good-and-bad-news/'>more</a>.<br>" . |
| 246 | +# "#3: Many projects show peak traffic late 2009: see <a href='charts/2010-08/Page-Views-Per-Project-Indexed.png'>chart</a><br>" . |
| 247 | +# "#4: Page requests are now normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" ; |
| 248 | +# push @rank, "5|Site Rank<br>#5: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ; |
| 249 | +# push @commons_files, "6|Commons Files<br>#6: Tiff uploads increased <a href='charts/2010-07/Monthly-Binaries-Absolute-Log.png'>5-fold</a> in July 2010, <a href='charts/2010-07/Monthly-Binaries-Indexed.png'>22-fold</a> in a year." ; |
| 250 | + |
| 251 | +# push @new_articles, "7|New Articles Per Day<br>" . |
| 252 | +# "7: Strong growth in August by peaks on 3 wikis: Catalan/Dutch 3-fold inc., Slovene 17-fold (bots?)." ; |
| 253 | +# push @edits, "8|Edits<br>8: All time high for edit count, even slightly above May level.<br>" . |
| 254 | +# " Strong one-monthly dip in July due to World Cup Socker?." ; |
| 255 | +# push @active_editors, "9,10|(Very) Active Editors<br>" . |
| 256 | +# "9: After a <a href='charts/2010-08/Monthly-Active-Editors-Absolute-Linear.png'>6% drop in active Wikipedia editors</a> in June, and a further 2% drop in July,<br>" . |
| 257 | +# " trend is upwards again, with 2.5% increase in August.<br>" . |
| 258 | +# " Prospects for September are good, with +10% growth in page requests<br>" . |
| 259 | +# " (given strong correlation of 0.67 between page requests and active editors).<br>" . |
| 260 | +# " From a wider perspective drops were stil within normal bandwidth, largest drop was in <a href='charts/2010-08/Monthly-Active-Users-Since-Jan-2006.png'>June 2006</a>,<br>" . |
| 261 | +# " see also <a href='charts/2010-08/Monthly-Active-Users-Since-Jan-2008.png'>similar chart with trend line since June 2008</a>.<br>" . |
| 262 | +# "10: New: Editors on Commons are no longer included in overall editor total,<br>" . |
| 263 | +# " on the assumption that most of these also edit on one or more other projects.<br>" . |
| 264 | +# " Detection of double counts between any projects and languages is planned for late 2010." ; |
| 265 | +# push @very_active_editors, "9,10|" ; #Very Active Editors<br>14: Ukrain +84% (61->112), Indonesian +180% (22->62) (during contest), Swedish -13% (141->122)." ; |
| 266 | +# } |
| 267 | + |
| 268 | +# if ($2010_07) |
| 269 | +# { |
| 270 | +# @visitors = qw ( 360,225,000 m 21.9 -5 %) ; # Unique Visitors by Region |
| 271 | +# @page_requests = qw (13,116,000,000 b 27.2 -6 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough) |
| 272 | +# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors |
| 273 | +# @commons_files = qw ( 7,104,689 m 49.1 2.9 %) ; # Binaries per month - Absolute |
| 274 | +# @article_count = qw ( 34,198,285 m 29.9 2 %) ; # Article count (official) - Absolute |
| 275 | +# @new_articles = qw ( 7,642 k 4.2 -0.6 %) ; # New articles per day - Absolute |
| 276 | +# @edits = qw ( 10,734,940 m -5.5 -9.8 %) ; # Edits per month - Absolute |
| 277 | +# @new_editors = qw ( 16,661 k -20.8 -5.6 %) ; # New editors - Absolute |
| 278 | +# @active_editors = qw ( 90,554 k -5.9 -1.6 %) ; # Active editors - Absolute |
| 279 | +# @very_active_editors = qw ( 11,818 k -2.1 -1.8 %) ; # Very active editors - Absolute |
| 280 | +# @reach = qw ( 28.5 x 2.8 -1.7 %) ; # Reach Percentage by Region |
| 281 | + # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm |
| 282 | +# push @visitors, "1,2|Unique Visitors<br>1: 375M UV's beats last month's record with 4M or 1.1 % (matches overall internet growth).<br>" . |
| 283 | +# "2: Wikimedia projects reach 30.4 % of internet population, which is best reach for last year<br>" . |
| 284 | +# " (avg 28.3%, low 25.7% in July 09). Reach in Asia lags behind other regions (15.9%)" ; |
| 285 | +# push @page_requests, "1,2,3,4|Page Requests<br>" . |
| 286 | +# "1: Due to <a href='http://infodisiac.com/blog/2010/07/wikimedia-page-views-some-good-and-bad-news/'>server problems</a> counts from squid logs for December 2009 - March 2010 are too low,<br>" . |
| 287 | +# " estimated underreporting 10%-25%. Counts for April - July 2010 have been patched." . |
| 288 | +# ".<br>" . |
| 289 | +# "2: August : <a href='http://stats.wikimedia.org/EN/TablesPageViewsMonthlyMobile.htm'>Mobile traffic (401M)</a>: 3.0% of total traffic (13367M)<br>" . |
| 290 | +# "# Trend data for mobile will be added when more history is available.<br>" . |
| 291 | +# "#3: Many projects show peak traffic late 2009: see <a href='charts/2010-07/Page-Views-Per-Project-Indexed.png'>chart</a><br>" . |
| 292 | +# "#4: Page requests are now normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" ; |
| 293 | +# push @rank, "5|Site Rank<br>#5: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ; |
| 294 | +# push @commons_files, "6|Commons Files<br>#6: Tiff uploads increased <a href='charts/2010-07/Monthly-Binaries-Absolute-Log.png'>5-fold</a> in July 2010, <a href='charts/2010-07/Monthly-Binaries-Indexed.png'>25-fold</a> in a year." ; |
| 295 | + |
| 296 | +# push @article_count, "8,9|Article Count<br>8: Serbian Wikinews: 5k->36k in a year, compare English Wikinews: 15k->17k<br>" . |
| 297 | +# "9: Seven Wiktionaries in top 25 Wikimedia projects" ; |
| 298 | +# push @new_articles, "7|New Articles Per Day<br>" . |
| 299 | +# "#7: Peak in April and May by massive activity on Aromanian and Waray-Waray Wp's, each by single user.<br>" . |
| 300 | +# "# In May 20% of all new articles were created on these two small wikis (April 7%, June 11%)" ; |
| 301 | +# push @active_editors, "8,9|(Very) Active Editors<br>" . |
| 302 | +# "8: The <a href='charts/2010-07/Monthly-Active-Editors-Absolute-Linear.png'>6% drop in active editors</a> for all Wikipias in June was relatively large,<br>" . |
| 303 | +# " but from a <a href='charts/2010-07/Monthly-Active-Users-Since-Jan-2006.png'>wider perspective</a> still within normal bandwidth, largest drop was in June 2006.<br>" . |
| 304 | +# " There might be a seasonal component in fluctuations.<br>" . |
| 305 | +# "9: Bug fix: in earlier RC editions editors from Commons (6k active editors) were counted double.<br>" . |
| 306 | +# " This has been fixed for all months in this RC." ; |
| 307 | +# push @very_active_editors, "9|" ; #Very Active Editors<br>14: Ukrain +84% (61->112), Indonesian +180% (22->62) (during contest), Swedish -13% (141->122)." ; |
| 308 | +# } |
| 309 | + |
| 310 | +# if ($2010_06) |
| 311 | +# { |
| 312 | +# @visitors = qw ( 379,344,000 m 25.2 -2.5 %) ; # Unique Visitors by Region |
| 313 | +# @page_requests = qw (13,957,000,000 b 26.0 1.0 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough) |
| 314 | +# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors |
| 315 | +# @commons_files = qw ( 6,910,267 m 50.1 2.5 %) ; # Binaries per month - Absolute |
| 316 | +# @article_count = qw ( 33,430,039 m 29.7 1.5 %) ; # Article count (official) - Absolute |
| 317 | +# @new_articles = qw ( 7,865 k 14.5 -16.2 %) ; # New articles per day - Absolute |
| 318 | +# @edits = qw ( 12,056,265 m 10.1 -1.6 %) ; # Edits per month - Absolute |
| 319 | +# @new_editors = qw ( 17,573 k -15.2 -10.6 %) ; # New editors - Absolute |
| 320 | +# @active_editors = qw ( 99,124 k -3.5 -4.4 %) ; # Active editors - Absolute |
| 321 | +# @very_active_editors = qw ( 13,042 k 0.7 -2.9 %) ; # Very active editors - Absolute |
| 322 | +# @reach = qw ( 30.2 x 3.5 -1.1 %) ; # Reach Percentage by Region |
| 323 | +# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm |
| 324 | +# push @visitors, "1,2|Unique Visitors<br>1: 375M UV's beats last month's record with 4M or 1.1 % (matches overall internet growth).<br>" . |
| 325 | +# "2: Wikimedia projects reach 30.4 % of internet population, which is best reach for last year<br>" . |
| 326 | +# " (avg 28.3%, low 25.7% in July 09). Reach in Asia lags behind other regions (15.9%)" ; |
| 327 | +# push @page_requests, "1,2,3,4|Page Requests<br>" . |
| 328 | +# "1: Traffic volume for recent months had been underreported due to monitor capacity problems.<br>" . |
| 329 | +# " Counts from April 2010 and later " . |
| 330 | +# "<a href='http://infodisiac.com/blog/2010/07/wikimedia-page-views-some-good-and-bad-news/'>have been corrected</a>.<br>" . |
| 331 | +# " Data from Nov 2009 - Mar 2010 may still be too low.<br>" . |
| 332 | +# "2: Traffic to mobile site is now counted. <a href='http://stats.wikimedia.org/EN/TablesPageViewsMonthlyMobile.htm'>(June 208M:13957M=1.5% of total)</a><br>" . |
| 333 | +# " This is the first month, so no trend data yet. <a href='charts/2010-06/Page-Views-Breakdown-Mobile-Traffic.png'> " . |
| 334 | +# "Breakdown per language</a>:" . |
| 335 | +# "English:71.3%,<br> Japanese:8.6%, German:4.5%, French:3.9%, Russian:3.4%, Others:8.3%<br>" . |
| 336 | +# "3: <a href='charts/2010-06/Page-Views-Per-Project-Indexed.png'>New chart</a> for breakdown of traffic volume per project: many projects show peak traffic late 2009.<br>" . |
| 337 | +# "#4: Page requests are now normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" ; |
| 338 | +# push @rank, "5|Site Rank<br>#5: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ; |
| 339 | +# push @commons_files, "6|Commons Files<br>#6: Fastest riser (relatively speaking): ogg vorbis video, djvu (for scanned docs) also booming." ; |
| 340 | +# push @new_articles, "7|New Articles Per Day<br>7: Peak in April and May by massive activity on <a href='http://stats.wikimedia.org/EN/TablesWikipediaROA_RUP.htm'>Aromanian</a> and <a href='http://stats.wikimedia.org/EN/TablesWikipediaWAR.htm'>Waray-Waray</a> Wp's, each by single user.<br>" . |
| 341 | +# " In May 20% of all new articles were created on these two small wikis (April 7%, June 11%)" ; |
| 342 | +# push @edits, "9|Edits<br>9: For German,French and Polish Wikipedia dumps were not yet updated, reused data from previous month" ; |
| 343 | +# "Most Serbian Wikinews edits by (overactive?) weather bot that updates temp/wind speed every few seconds.<br>" . |
| 344 | +# "#13: Average monthly manual edits by registered users for all Wikipedia's combined, in millions<br>" . |
| 345 | +# " #2006 ⇒ 2010: 7.7 ⇒ 9.9 ⇒ 11.5 ⇒ 12.4 ⇒ 12.7" ; |
| 346 | +# push @very_active_editors, "14|Very Active Editors<br>14: Ukrain +84% (61->112), Indonesian +180% (22->62) (during contest), Swedish -13% (141->122)." ; |
| 347 | +# } |
| 348 | + |
| 349 | +# if ($2010_05) |
| 350 | +# { |
| 351 | +# @visitors = qw ( 388,932,000 m 22.6 3.8 %) ; # Unique Visitors by Region |
| 352 | +# @page_requests = qw (11,250,000,000 b -1.0 -1.0 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough) |
| 353 | +# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors |
| 354 | +# @commons_files = qw ( 6,765,082 m 51.9 3.1 %) ; # Binaries per month - Absolute |
| 355 | +# @article_count = qw ( 32,410,992 m 31.9 2.3 %) ; # Article count (official) - Absolute |
| 356 | +# @new_articles = qw ( 8,638 k 11.2 12.9 %) ; # New articles per day - Absolute |
| 357 | +# @edits = qw ( 12,119,403 m 11.6 0.0 %) ; # Edits per month - Absolute |
| 358 | +# @new_editors = qw ( 18,761 k -8.2 -8.1 %) ; # New editors - Absolute |
| 359 | +# @active_editors = qw ( 102,689 k 1.7 -1.8 %) ; # Active editors - Absolute |
| 360 | +# @very_active_editors = qw ( 13,124 k 3.4 -1.9 %) ; # Very active editors - Absolute |
| 361 | +# @reach = qw ( 31.3 x 3.0 0.9 %) ; # Reach Percentage by Region |
| 362 | +# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm |
| 363 | +# push @visitors, "1,2|Unique Visitors<br>1: 375M UV's beats last month's record with 4M or 1.1 % (matches overall internet growth).<br>" . |
| 364 | +# "2: Wikimedia projects reach 30.4 % of internet population, which is best reach for last year<br>" . |
| 365 | +# " (avg 28.3%, low 25.7% in July 09). Reach in Asia lags behind other regions (15.9%)" ; |
| 366 | +# push @page_requests, "3,4|Page Requests<br>" . |
| 367 | +# "#3: Page requests are now normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" . |
| 368 | +# "4: Traffic to mobile site not yet included. <a href='http://stats.wikimedia.org/EN/TablesPageViewsMonthlyMobile.htm'>(June 154M:10700M=1.4% of total)</a><br>" . |
| 369 | +# "5: Page request trends on several projects are falling for 4th month, which deserves some further analysis" ; |
| 370 | +# push @rank, "6|Site Rank<br>#6: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ; |
| 371 | +# push @commons_files, "7|Commons Files<br>#8: Fastest riser (relatively speaking): ogg vorbis video, djvu (for scanned docs) also booming." ; |
| 372 | +# push @article_count, "8,9|Article Count<br>8: Serbian Wikinews: 5k->36k in a year, compare English Wikinews: 15k->17k<br>" . |
| 373 | +# "9: Seven Wiktionaries in top 25 Wikimedia projects" ; |
| 374 | +# push @new_articles, "10,11|New Articles Per Day<br>10: All wikinews project combined +240% (39->133 p/d), see below Serbian Wikinews<br>" . |
| 375 | +# "11:<a href='http://stats.wikimedia.org/EN/TablesWikipediaWAR.htm'>Waray-Waray Wikipedia</a> 2nd fastest grower with +610 mostly <a href='http://war.wikipedia.org/wiki/Obyce'>geo stubs</a> p/day by <a href='http://en.wikipedia.org/wiki/User:JinJian'>JinJian</a>" ; |
| 376 | +# push @edits, "12,13|Edits<br>12: 3 of 4 Wikinews monthly edits on Serbian Wikinews: 36k, English 5k, German/French 2k each<br>" . |
| 377 | +# "Most Serbian Wikinews edits by (overactive?) weather bot that updates temp/wind speed every few seconds.<br>" . |
| 378 | +# "#13: Average monthly manual edits by registered users for all Wikipedia's combined, in millions<br>" . |
| 379 | +# " #2006 ⇒ 2010: 7.7 ⇒ 9.9 ⇒ 11.5 ⇒ 12.4 ⇒ 12.7" ; |
| 380 | +# push @very_active_editors, "14|Very Active Editors<br>14: Ukrain +84% (61->112), Indonesian +180% (22->62) (during contest), Swedish -13% (141->122)." ; |
| 381 | +# } |
| 382 | + |
| 383 | + |
| 384 | +# if ($2010_04) |
| 385 | +# { |
| 386 | +# @visitors = qw ( 374,846,000 m 17.1 1.1 %) ; # Unique Visitors by Region |
| 387 | +# @page_requests = qw (11,724,000,000 b +7.4 -0.1 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough) |
| 388 | +# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors |
| 389 | +# @commons_files = qw ( 6,564,544 m 52.2 3.3 %) ; # Binaries per month - Absolute |
| 390 | +# @article_count = qw ( 32,410,992 m 31.9 2.3 %) ; # Article count (official) - Absolute |
| 391 | +# @new_articles = qw ( 8,638 k 11.2 12.9 %) ; # New articles per day - Absolute |
| 392 | +# @edits = qw ( 12,119,403 m 11.6 0.0 %) ; # Edits per month - Absolute |
| 393 | +# @new_editors = qw ( 18,761 k -8.2 -8.1 %) ; # New editors - Absolute |
| 394 | +# @active_editors = qw ( 102,689 k 1.7 -1.8 %) ; # Active editors - Absolute |
| 395 | +# @very_active_editors = qw ( 13,124 k 3.4 -1.9 %) ; # Very active editors - Absolute |
| 396 | +# @reach = qw ( 30.4 x 1.5 0.0 %) ; # Reach Percentage by Region |
| 397 | +# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm |
| 398 | +# push @visitors, "1,2|Unique Visitors<br>1: 375M UV's beats last month's record with 4M or 1.1 % (matches overall internet growth).<br>" . |
| 399 | +# "2: Wikimedia projects reach 30.4 % of internet population, which is best reach for last year<br>" . |
| 400 | +# " (avg 28.3%, low 25.7% in July 09). Reach in Asia lags behind other regions (15.9%)" ; |
| 401 | +# push @page_requests, "3,4|Page Requests<br>" . |
| 402 | +# "#3: Page requests are now normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" . |
| 403 | +# "4: Traffic to mobile site not included. Expect this next month." ; |
| 404 | +# push @rank, "5|Site Rank<br>#5: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ; |
| 405 | +# push @commons_files, "6|Commons Files<br>6: Fastest relative growth: tiff images (723%), ogg vorbis video (446%)." ; |
| 406 | +# push @article_count, "7,8|Article Count<br>7: Serbian Wikinews: 5k->36k in a year, compare English Wikinews: 15k->17k<br>" . |
| 407 | +# "8: Seven Wiktionaries in top 25 Wikimedia projects" ; |
| 408 | +# push @new_articles, "9,10|New Articles Per Day<br>9: All wikinews project combined +240% (39->133 p/d), see below Serbian Wikinews<br>" . |
| 409 | +# "10:<a href='http://stats.wikimedia.org/EN/TablesWikipediaWAR.htm'>Waray-Waray Wikipedia</a> 2nd fastest grower with +610 mostly <a href='http://war.wikipedia.org/wiki/Obyce'>geo stubs</a> p/day by <a href='http://en.wikipedia.org/wiki/User:JinJian'>JinJian</a>" ; |
| 410 | +# push @edits, "11,12|Edits<br>11: 3 of 4 Wikinews monthly edits on Serbian Wikinews: 36k, English 5k, German/French 2k each<br>" . |
| 411 | +# "All Serbian Wikinews edits by weather bot that updates temp/wind speed every few seconds.<br>" . |
| 412 | +# "30 June 2010: report filed for <a href='http://en.wikinews.org/wiki/Wikinews:Admin_action_alerts'>runaway bot</a><br>" . |
| 413 | +# "#12: Average monthly manual edits by registered users for all Wikipedia's combined, in millions<br>" . |
| 414 | +# " #2006 ⇒ 2010: 7.7 ⇒ 9.9 ⇒ 11.5 ⇒ 12.4 ⇒ 12.7" ; |
| 415 | +# push @very_active_editors, "13|Very Active Editors<br>13: Ukrain +84% (61->112), Indonesian +180% (22->62) (during contest), Swedish -13% (141->122)." ; |
| 416 | +# } |
| 417 | + |
| 418 | +# if ($2010_03) |
| 419 | +# { |
| 420 | +# @visitors = qw ( 370,744,000 m 13.3 7.4 %) ; # Unique Visitors by Region |
| 421 | +# @page_requests = qw (11,730,000,000 b +0.3 0.0 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough) |
| 422 | +# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors |
| 423 | +# @commons_files = qw ( 6,209,569 m 58.3 2.6 %) ; # Binaries per month - Absolute |
| 424 | +# @article_count = qw ( 30,349,860 m 34.0 1.9 %) ; # Article count (official) - Absolute |
| 425 | +# @new_articles = qw ( 7,567 k -5.7 -0.4 %) ; # New articles per day - Absolute |
| 426 | +# @edits = qw ( 11,462,106 m 7.1 -3.2 %) ; # Edits per month - Absolute |
| 427 | +# @new_editors = qw ( 18,362 k -11.5 -10.8 %) ; # New editors - Absolute |
| 428 | +# @active_editors = qw ( 101,730 k 1.5 -4.6 %) ; # Active editors - Absolute |
| 429 | +# @very_active_editors = qw ( 12,983 k 5.6 -5.4 %) ; # Very active editors - Absolute |
| 430 | +# @reach = qw ( 30.4 x 0.5 1.7 %) ; # Reach Percentage by Region |
| 431 | +# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm |
| 432 | +# push @visitors, "1,2|Unique Visitors<br>1: March has 3 more (11%) more days than February<br>" . |
| 433 | +# " This will explain much of apparently large monthly growth in visitors<br>" . |
| 434 | +# "2: All regions same of more unique visitors than year ago. North Am. +25%, Latin Am. + 27%" ; |
| 435 | +# push @page_requests, "3|Page Requests<br>" . |
| 436 | +# "3: Page requests are now normalized to 30 days (Jan*30/31, Feb*30/28, Mar*30/31, etc)<br>" . |
| 437 | +# " This way monthly changes are more meaningful<br>" . |
| 438 | +# " Difference with not normalized data is mainly visible in Jan⇒Feb and Feb⇒Mar" ; |
| 439 | +# push @rank, "4|Site Rank<br>#4: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ; |
| 440 | +# push @commons_files, "5|Commons Files<br>#5: Fastest riser (relatively speaking): ogg vorbis video, djvu (for scanned docs) also booming." ; |
| 441 | +# push @article_count, "6|Article Count<br>#6: 60% growth in Commons files in one year, English and French wiktionaries +36% through bots." ; |
| 442 | +# push @edits, "7|Edits<br>#7: Average monthly manual edits by registered users for all Wikipedia's combined, in millions<br>" . |
| 443 | +# " #2006 ⇒ 2010: 7.7 ⇒ 9.9 ⇒ 11.5 ⇒ 12.4 ⇒ 12.7" ; |
| 444 | +# push @new_editors, "9|New Editors<br>#9: Most mature Wikipedia's see least growth in editors. Largest influx: Russian / Commons<p>" . |
| 445 | +# push @active_editors, "10|Active Editors<br>10: Russian editor base still growing steeply: +30% editors in one year." ; |
| 446 | +# } |
| 447 | + |
| 448 | +# if ($2010_02) |
| 449 | +# { |
| 450 | +# @visitors = qw ( 345,218,000 m 14.8 -5.3 %) ; # Unique Visitors by Region |
| 451 | +# @page_requests = qw (11,081,000,000 b +5.8 0.0 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough) |
| 452 | +# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors |
| 453 | +# @commons_files = qw ( 6,209,569 m 58.3 2.6 %) ; # Binaries per month - Absolute |
| 454 | +# @article_count = qw ( 30,349,860 m 34.0 1.9 %) ; # Article count (official) - Absolute |
| 455 | +# @new_articles = qw ( 7,567 k -5.7 -0.4 %) ; # New articles per day - Absolute |
| 456 | +# @edits = qw ( 11,462,106 m 7.1 -3.2 %) ; # Edits per month - Absolute |
| 457 | +# @new_editors = qw ( 18,362 k -11.5 -10.8 %) ; # New editors - Absolute |
| 458 | +# @active_editors = qw ( 101,730 k 1.5 -4.6 %) ; # Active editors - Absolute |
| 459 | +# @very_active_editors = qw ( 12,983 k 5.6 -5.4 %) ; # Very active editors - Absolute |
| 460 | +# @reach = qw ( 28.7 x 0.8 -0.8 %) ; # Reach Percentage by Region |
| 461 | +# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm |
| 462 | +# push @visitors, "1|Unique Visitors<br>1: comScore reassesses online population in their target segments twice a year (Feb & Aug)<br>" . |
| 463 | +# " This time estimate for Indonesia, Philippines and Vietnam was lowered by -54%,<br>" . |
| 464 | +# " resulting in a worldwide reassesment of online population of -4%" ; |
| 465 | +# push @page_requests, "2,3|Page Requests<br>" . |
| 466 | +# "2:Corrected for length of months Jan -> Feb increase was actually +11.0% !<br>" . |
| 467 | +# "3:Russia maintains its steep growth: +57% in last 12 months, +137% in preceding 12 months<br>" . |
| 468 | +# " Indonesia is 2nd, and speeding up: +46% in last 12 months, +34% before that<br>" . |
| 469 | +# "# German decline (-10%) is still atypical (caused by spike year ago after court decision)" ; |
| 470 | +# push @rank, "5|Site Rank<br>#5: 5th position will be stable for long time: differences with those ranked 4th and 6th are considerable." ; |
| 471 | +# push @commons_files, "6|Commons Files<br>#6: Fastest riser (relatively speaking): ogg vorbis video, djvu (for scanned docs) also booming." ; |
| 472 | +# push @article_count, "7|Article Count<br>#7: 60% growth in Commons files in one year, English and French wiktionaries +36% through bots." ; |
| 473 | +# push @edits, "8|Edits<br>8: Average monthly manual edits by registered users for all Wikipedia's combined, in millions<br>" . |
| 474 | +# " 2006 ⇒ 2010: 7.7 ⇒ 9.9 ⇒ 11.5 ⇒ 12.4 ⇒ 12.7" ; |
| 475 | +# } |
| 476 | + |
| 477 | +# if ($2009_??) |
| 478 | +# { |
| 479 | +# @visitors = qw ( 364,719,000 m 25.8 5.1 %) ; # Unique Visitors by Region |
| 480 | +# @page_requests = qw (11,054,000,000 b -3.1 6.4 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) of http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm (Wikipedia only is good enough) |
| 481 | +# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors |
| 482 | +# @commons_files = qw ( 6,058,601 m 59.5 6.5 %) ; # Binaries per month - Absolute |
| 483 | +# @article_count = qw ( 29,742,993 m 34.7 2.4 %) ; # Article count (official) - Absolute |
| 484 | +# @new_articles = qw ( 7,626 k -1.1 3.4 %) ; # New articles per day - Absolute |
| 485 | +# @edits = qw ( 12,251,152 m 4.8 9.0 %) ; # Edits per month - Absolute |
| 486 | +# @new_editors = qw ( 19,279 k -12.4 5.6 %) ; # New editors - Absolute |
| 487 | +# @active_editors = qw ( 98,597 k -1.4 5.0 %) ; # Active editors - Absolute |
| 488 | +# @very_active_editors = qw ( 12,488 k -1.1 6.3 %) ; # Very active editors - Absolute |
| 489 | +# @reach = qw ( 29.0 x 1.0 1.0 %) ; # Reach Percentage by Region |
| 490 | +# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm |
| 491 | +# push @visitors, "1,2|Unique Visitors<br>#1: Yearly growth in UV's (26%) exceeds growth of total internet (21%).<br>" . |
| 492 | +# "2: Large monthly shifts in UV/Reach in 3rd world explained by comScore as seasonal influences:<br> school vacations, and large festivals, religious (e.g. Ramadan) or otherwise (e.g. Carnival)." ; |
| 493 | +# push @page_requests, "3,4|Page Requests<br>3:<b> Trends measured by comScore and internal measurements diverge somewhat.</b><br> <b>Possible causes are under investigation.</b><p>" . |
| 494 | +# "4:Fastest rising large Wikipedia's in last 12 months:<br>" . |
| 495 | +# " Vietnamese (87%), Ukranian (65%), Russian (45%), Indonesian (39%), Chinese (28%), Thai (23%)<br>" . |
| 496 | +# " German decline (-32%) is atypical (caused by short massive spike year ago due after court decision)" ; |
| 497 | +# push @rank, "5|Site Rank<br>#5: 5th position will be stable for long time: differences with 4th and 6th ranked properties are considerable." ; |
| 498 | +# push @commons_files, "6|Commons Files<br>#6: Fastest riser (relatively speaking): ogg vorbis video, djvu (for scanned docs) also booming." ; |
| 499 | +# push @article_count, "7|Article Count<br>#7: 60% growth in Commons files in one year. Wiktionaries exploding through bots." ; |
| 500 | +# push @edits, "8|Edits<br>#8: <a href='http://stats.wikimedia.org/EN/TablesWikipediaZZ.htm'>#Monthly edits for all Wikipedia's combined</a># remarkably stable between 10 and 12 million<br># for 3 years now (as is the case for active and very active editors)" ; |
| 501 | +# push @new_editors, "9|New Editors<br>#9: Most mature Wikipedia's see least growth in editors. Largest influx: Russian / Commons<p>" . |
| 502 | +# "Experiment: logarithmic chart now uses two scales for widely divergent values.<br>This helps to remove clutter, but may need some getting used to." ; |
| 503 | + |
| 504 | +# push @active_editors, "10|Active Editors<br>10: Russian editor base still growing steeply: +30% editors in one year." ; |
| 505 | +# } |
| 506 | + |
| 507 | +# if ($2009_??) |
| 508 | +# { |
| 509 | +# @visitors = qw ( 347,019,000 m 27.1 0.4 %) ; # Unique Visitors by Region |
| 510 | +# @page_requests = qw (10,389,000,000 b 0.0 -9.2 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) |
| 511 | +# @rank = qw ( 5th x 0 0 th) ; # Web Properties - Unique Visitors |
| 512 | +# @commons_files = qw ( 5,695,283 m 55.1 2.6 %) ; # Binaries per month - Absolute |
| 513 | +# @article_count = qw ( 29,016,248 m 34.3 2.1 %) ; # Article count (official) - Absolute |
| 514 | +# @new_articles = qw ( 7,457 k 7.7 2.6 %) ; # New articles per day - Absolute |
| 515 | +# @edits = qw ( 10,791,575 m 0.6 0.4 %) ; # Edits per month - Absolute |
| 516 | +# @new_editors = qw ( 18,597 k -6.3 -2.4 %) ; # New editors - Absolute |
| 517 | +# @active_editors = qw ( 95,849 k 3.8 -0.4 %) ; # Active editors - Absolute |
| 518 | +# @very_active_editors = qw ( 11,764 k 0.4 -0.5 %) ; # Very active editors - Absolute |
| 519 | +# @reach = qw ( 28.7 x 1.6 -0.0 %) ; # Reach Percentage by Region |
| 520 | +# # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm |
| 521 | +# push @visitors, "1,2|Unique Visitors<br>1: Yearly growth in UV's (27%) exceeds growth of total internet (21%).<br>" . |
| 522 | +# "2: Conversation with comScore on huge monthly shifts in UV/Reach in 3rd world continues." ; |
| 523 | +# push @page_requests, "3|Page Requests<br>3: Same as last year: dip in page requests (but spike in image requests)." ; |
| 524 | +# push @rank, "4|Site Rank<br>4: 5th position will be stable for long time: 4th has 35% more UV's, 6th 23% less." ; |
| 525 | +# push @commons_files, "5|Commons Files<br>5: Fastest riser (relatively speaking): ogg vorbis video, djvu (for scanned docs) also booming." ; |
| 526 | +# push @article_count, "6|Article Count<br>6: 60% growth in Commons files in one year. Wiktionaries exploding through bots." ; |
| 527 | +# push @new_articles, "7|New Articles<br>7: Russian consistently fast riser, Ukranian growth 40% of previous months" ; |
| 528 | +# push @edits, "8|Edits<br>8: <a href='http://stats.wikimedia.org/EN/TablesWikipediaZZ.htm'>Monthly edits for all Wikipedia's combined</a> remarkably stable between 10 and 12 million<br>for 3 years now (as is the case for active and very active editors)" ; |
| 529 | +# push @new_editors, "9|New Editors<br>9: Most mature Wikipedia's see least growth in editors. Largest influx: Russian / Commons" ; |
| 530 | +# } |
| 531 | + |
| 532 | +# if ($2009_10) |
| 533 | +# { |
| 534 | +# @visitors = qw ( 345,805,000 m 23.1 0.4 %) ; # Unique Visitors by Region |
| 535 | +# @page_requests = qw (11,257,000,000 b 7.7 -2.8 %) ; # copy/calc manually monthly total and monthly and yearly growth from 1st column (Sigma) |
| 536 | +# @rank = qw ( 5th x -1 0 th) ; # Web Properties - Unique Visitors |
| 537 | +# @commons_files = qw ( 5,558,644 m 59.7 3.4 %) ; # Binaries per month - Absolute |
| 538 | +# @article_count = qw ( 28,506,011 m 35.4 2.5 %) ; # Article count (official) - Absolute |
| 539 | +# @new_articles = qw ( 7,357 k 2.1 -6.1 %) ; # New articles per day - Absolute |
| 540 | +# @edits = qw ( 10,772,957 m 2.8 -3.4 %) ; # Edits per month - Absolute |
| 541 | +# @new_editors = qw ( 18,779 k -5.2 -4.5 %) ; # New editors - Absolute |
| 542 | +# @active_editors = qw ( 96,521 k 4.0 0.1 %) ; # Active editors - Absolute |
| 543 | +# @very_active_editors = qw ( 11,726 k 2.7 -3.4 %) ; # Very active editors - Absolute |
| 544 | + |
| 545 | +# @reach = qw ( 28.7 x 0.5 -0.3 %) ; # Reach Percentage by Region |
| 546 | + # http://stats.wikimedia.org/EN/TablesPageViewsMonthly.htm |
| 547 | +# push @visitors, "1|1: asked comScore to explain huge shifts in UV/Reach in Middle East-Africa." ; |
| 548 | +# push @page_requests, "2|2: Capacity problems may have played a role. New servers ordered." ; |
| 549 | +# push @new_articles, "2,3|3: Ukranian Wikipedia fastest riser (compare edits for Russian)" ; |
| 550 | +# push @edits, "4|4: Russian Wikipedia fastest riser (compare new articles for Ukrain)" ; |
| 551 | +# push @very_active_editors, "2" ; |
| 552 | +# } |
| 553 | + |
| 554 | +# if ($2009_10) |
| 555 | +# { |
| 556 | +# @new_editors = qw ( 19,002 k -8.9 3.2 %) ; |
| 557 | +# @active_editors = qw ( 97,132 k 1.9 3.4 %) ; |
| 558 | +# @very_active_editors = qw ( 12,172 k 2.8 1.2 %) ; |
| 559 | +# @article_count = qw ( 27,852,471 m 35.6 2.8 %) ; |
| 560 | +# @new_articles = qw ( 8,050 k 11.2 5.9 %) ; |
| 561 | +# @edits = qw ( 11,188,080 m -1.8 1.7 %) ; |
| 562 | +# @commons_files = qw ( 5,539,645 m 60.3 5.5 %) ; |
| 563 | +# @rank = qw ( 5th x -1 0 th) ; |
| 564 | +# @visitors = qw ( 344,563,000 m 24.3 5.7 %) ; |
| 565 | +# @reach = qw ( 29.0 x 0.8 1.3 %) ; |
| 566 | +# @page_requests = qw (11,586,000,000 b 8.8 1.9 %) ; |
| 567 | +# } |
| 568 | + |
| 569 | +# if ($2009_09) |
| 570 | +# { |
| 571 | +# @new_editors = qw ( 17,792 k -8.7 -9.6 %) ; |
| 572 | +# @active_editors = qw ( 94,565 k 2.3 -2.5 %) ; |
| 573 | +# @very_active_editors = qw ( 12,069 k 3.6 -2.5 %) ; |
| 574 | +# @article_count = qw ( 27,120,974 m 36.6 2.0 %) ; |
| 575 | +# @new_articles = qw ( 12,907 k -0.3 -11.4 %) ; |
| 576 | +# @edits = qw ( 12,578,009 m 8.8 -9.0 %) ; |
| 577 | +# @commons_files = qw ( 5,115,042 m 57.4 2.7 %) ; |
| 578 | +# @rank = qw ( 5th x 0 0 th) ; |
| 579 | +# @visitors = qw ( 325,998,000 m 19.8 6.0 %) ; |
| 580 | +# @reach = qw ( 27.6 x -1.4 4.5 %) ; |
| 581 | +# @page_requests = qw (11,372,000,000 b 11.7 5.1 %) ; |
| 582 | +# } |
| 583 | + |
| 584 | +# if ($2009_08) |
| 585 | +# { |
| 586 | +# @new_editors = qw ( 17,998 k -9.4 -6.2 %) ; |
| 587 | +# @active_editors = qw ( 91,359 k 1.1 0.8 %) ; |
| 588 | +# @very_active_editors = qw ( 11,568 k 0.3 3.0 %) ; |
| 589 | +# @article_count = qw ( 21,143,943 m 29.9 2.0 %) ; |
| 590 | +# @new_articles = qw ( 13,174 k 8.1 11.4 %) ; |
| 591 | +# @edits = qw ( 12,807,952 m 8.4 4.8 %) ; |
| 592 | +# @commons_files = qw ( 4,996,023 m 60.2 3.6 %) ; |
| 593 | +# @rank = qw ( 5th x 0 0 th) ; |
| 594 | +# @visitors = qw ( 307,641,000 m 23.8 4.1 %) ; |
| 595 | +# @reach = qw ( 26.4 x 1.9 2.7 %) ; |
| 596 | +# @page_requests = qw (10,817,000,000 b 15.3 1.5 %) ; |
| 597 | +# } |
| 598 | + |
| 599 | +# if ($2009_07) |
| 600 | +# { |
| 601 | +# @new_editors = qw ( 18,916 k -8.5 -1 %) ; |
| 602 | +# @active_editors = qw ( 90,659 k -0.3 -0.6 %) ; |
| 603 | +# @very_active_editors = qw ( 11,242 k -2.4 -0.7 %) ; |
| 604 | +# @article_count = qw ( 20,768,108 m 30.2 0.8 %) ; |
| 605 | +# @new_articles = qw ( 11,888 k -18.9 -30.3 %) ; |
| 606 | +# @edits = qw ( 12,219,008 m 6.3 0.7 %) ; |
| 607 | +# @commons_files = qw ( 4,831,659 m 61.1 3.7 %) ; |
| 608 | +# @rank = qw ( 5th x 0 0 th) ; |
| 609 | +# @visitors = qw ( 295,848,000 m 20.9 -2.5 %) ; |
| 610 | +# @reach = qw ( 25.7 x 0 -3.7 %) ; |
| 611 | +# @page_requests = qw (10,700,000,000 b 12.9 -3.0 %) ; |
| 612 | +# } |
| 613 | + |
| 614 | + $synopsis = "Y: " . substr ($p_month_name,0,3) . ",$p_year_prev->$p_year k=thousand m=million b=billion\n" ; |
| 615 | + $synopsis .= "M: $p_year," . substr ($p_month_name_prev,0,3) . "->" . substr ($p_month_name,0,3) . " M=monthly D=daily T=Total\n\n" ; |
| 616 | + |
| 617 | + $synopsis .= &FormatSynopsisText ("M Unique Visitors, All Projects", "", @visitors) ; |
| 618 | + $synopsis .= &FormatSynopsisText ("M Page Views, All Projects", "", @page_requests) ; |
| 619 | + $synopsis .= &FormatSynopsisText (" Site Rank", "", @rank) ; |
| 620 | + $synopsis .= &FormatSynopsisText ("T Binary Files", "", @commons_files) ; |
| 621 | + $synopsis .= &FormatSynopsisText ("M Wikipedia Article Count", "", @article_count) ; |
| 622 | + $synopsis .= &FormatSynopsisText ("D New Wikipedia Articles", "", @new_articles) ; |
| 623 | + $synopsis .= &FormatSynopsisText ("M Wikipedia Edits per Month", "", @edits) ; |
| 624 | + $synopsis .= &FormatSynopsisText ("M New Wikipedia Editors", "", @new_editors) ; |
| 625 | + $synopsis .= &FormatSynopsisText ("M Active Wikipedia Editors", "", @active_editors) ; |
| 626 | + $synopsis .= &FormatSynopsisText ("M Very Active Wikipedia Ed.", "", @very_active_editors) ; |
| 627 | + |
| 628 | + print "\n\n$synopsis" ; |
| 629 | + print "\n"."="x80 . "\n\n" ; |
| 630 | + |
| 631 | + @visitors_ = @visitors ; |
| 632 | + @page_requests_ = @page_requests ; |
| 633 | + @rank_ = @rank ; |
| 634 | + @commons_files_ = @commons_files ; |
| 635 | + @article_count_ = @article_count ; |
| 636 | + @new_articles_ = @new_articles ; |
| 637 | + @edits_ = @edits ; |
| 638 | + @new_editors_ = @new_editors ; |
| 639 | + @active_editors_ = @active_editors ; |
| 640 | + @very_active_editors_ = @very_active_editors ; |
| 641 | + @reach_ = @reach ; |
| 642 | + |
| 643 | + $visitors [0] =~ s/,//g ; |
| 644 | + $new_editors [0] =~ s/,//g ; |
| 645 | + $active_editors [0] =~ s/,//g ; |
| 646 | + $very_active_editors [0] =~ s/,//g ; |
| 647 | + $article_count [0] =~ s/,//g ; |
| 648 | + $new_articles [0] =~ s/,//g ; |
| 649 | + $edits [0] =~ s/,//g ; |
| 650 | + $commons_files [0] =~ s/,//g ; |
| 651 | + $rank [0] =~ s/,//g ; |
| 652 | + $reach [0] =~ s/,//g ; |
| 653 | + $page_requests [0] =~ s/,//g ; |
| 654 | + |
| 655 | + $visitors [0] = sprintf ("%.0f",$visitors [0]/1000000) ; |
| 656 | + $article_count [0] = sprintf ("%.1f",$article_count [0]/1000000) ; |
| 657 | + $edits [0] = sprintf ("%.1f",$edits [0]/1000000) ; |
| 658 | + $commons_files [0] = sprintf ("%.1f",$commons_files [0]/1000000) ; |
| 659 | + $page_requests [0] = sprintf ("%.1f",$page_requests [0]/1000000000) ; |
| 660 | + |
| 661 | + $new_editors [0] =~ s/(\d\d\d)$/,$1/ ; |
| 662 | + $active_editors [0] =~ s/(\d\d\d)$/,$1/ ; |
| 663 | + $very_active_editors [0] =~ s/(\d\d\d)$/,$1/ ; |
| 664 | + $new_articles [0] =~ s/(\d\d\d)$/,$1/ ; |
| 665 | + |
| 666 | + $visitors [2] = sprintf ("%.1f", $visitors [2]) ; |
| 667 | + $visitors [3] = sprintf ("%.1f", $visitors [3]) ; |
| 668 | + $visitors [5] =~ ($visitors [2] >= 0) ? 'A' : 'E' ; |
| 669 | + $visitors [6] =~ ($visitors [3] >= 0) ? 'A' : 'E' ; |
| 670 | + |
| 671 | + $page_requests [2] = sprintf ("%.1f", $page_requests [2]) ; |
| 672 | + $page_requests [3] = sprintf ("%.1f", $page_requests [3]) ; |
| 673 | + $new_editors [2] = sprintf ("%.1f", $new_editors [2]) ; |
| 674 | + $new_editors [3] = sprintf ("%.1f", $new_editors [3]) ; |
| 675 | +# $active_editors [2] = sprintf ("%.1f", $active_editors [2]) ; |
| 676 | +# $active_editors [3] = sprintf ("%.1f", $active_editors [3]) ; |
| 677 | + $very_active_editors [2] = sprintf ("%.1f", $very_active_editors [2]) ; |
| 678 | + $very_active_editors [3] = sprintf ("%.1f", $very_active_editors [3]) ; |
| 679 | +# $article_count [2] = sprintf ("%.1f", $article_count [2]) ; |
| 680 | +# $article_count [3] = sprintf ("%.1f", $article_count [3]) ; |
| 681 | + $new_articles [2] = sprintf ("%.1f", $new_articles [2]) ; |
| 682 | + $new_articles [3] = sprintf ("%.1f", $new_articles [3]) ; |
| 683 | + $edits [2] = sprintf ("%.1f", $edits [2]) ; |
| 684 | + $edits [3] = sprintf ("%.1f", $edits [3]) ; |
| 685 | + $commons_files [2] = sprintf ("%.1f", $commons_files [2]) ; |
| 686 | + $commons_files [3] = sprintf ("%.1f", $commons_files [3]) ; |
| 687 | + $rank [2] = sprintf ("%.0f", $rank [2]) ; |
| 688 | + $rank [3] = sprintf ("%.0f", $rank [3]) ; |
| 689 | + $reach [2] = sprintf ("%.1f", $reach [2]) ; |
| 690 | + $reach [3] = sprintf ("%.1f", $reach [3]) ; |
| 691 | + $page_requests [2] = sprintf ("%.1f", $page_requests [2]) ; |
| 692 | + $page_requests [3] = sprintf ("%.1f", $page_requests [3]) ; |
| 693 | + |
| 694 | + for ($i = 0 ; $i <= 3 ; $i++) |
| 695 | + { |
| 696 | + $visitors [$i] = '...' if $visitors_ [$i] eq '?' ; |
| 697 | + $page_requests [$i] = '...' if $page_requests_ [$i] eq '?' ; |
| 698 | + $rank [$i] = '...' if $rank_ [$i] eq '?' ; |
| 699 | + $commons_files [$i] = '...' if $commons_files_ [$i] eq '?' ; |
| 700 | + $article_count [$i] = '...' if $article_count_ [$i] eq '?' ; |
| 701 | + $new_articles [$i] = '...' if $new_articles_ [$i] eq '?' ; |
| 702 | + $edits [$i] = '...' if $edits_ [$i] eq '?' ; |
| 703 | + $new_editors [$i] = '...' if $new_editors_ [$i] eq '?' ; |
| 704 | + $active_editors [$i] = '...' if $active_editors_ [$i] eq '?' ; |
| 705 | + $very_active_editors [$i] = '...' if $very_active_editors_ [$i] eq '?' ; |
| 706 | + $reach [$i] = '...' if $reach_ [$i] eq '?' ; |
| 707 | + } |
| 708 | + |
| 709 | + $path_input = "W:/@ Report Card/Input/" ; |
| 710 | + $path_public = "W:/@ Report Card/Public/" ; |
| 711 | + $path_private = "W:/@ Report Card/Extended/" ; # few more charts with top 10 web properties based on data from comScore (slightly confidential) |
| 712 | + |
| 713 | + &WriteReports ($path_input, $path_public, $public) ; |
| 714 | + &WriteReports ($path_input, $path_private, $private) ; |
| 715 | + |
| 716 | + print "\nReady\n\n" ; |
| 717 | + exit ; |
| 718 | + |
| 719 | +sub WriteReports |
| 720 | +{ |
| 721 | + $path_in = shift ; |
| 722 | + $path_out = shift ; |
| 723 | + $target_audience = shift ; |
| 724 | + |
| 725 | + &WriteSynopsis ($path_out) ; |
| 726 | + |
| 727 | + open TEMPLATE, '<', "RT_yyyy_mm.html" ; |
| 728 | + open DETAILS, '>', "$path_out/RC_${p_year}_${p_month_d2}_detailed.html" ; |
| 729 | + open SUMMARY, '>', "$path_out/RC_${p_year}_${p_month_d2}_summary.html" ; |
| 730 | + open COLUMNS, '>', "$path_out/RC_${p_year}_${p_month_d2}_columns.html" ; |
| 731 | + |
| 732 | + |
| 733 | + $write_details = $true ; |
| 734 | + $write_summary = $true ; |
| 735 | + $write_columns = $true ; |
| 736 | + |
| 737 | + $write_public = $true ; |
| 738 | + $write_private = $true ; |
| 739 | + |
| 740 | + $iscomment = $false ; |
| 741 | + |
| 742 | + while ($line = <TEMPLATE>) |
| 743 | + { |
| 744 | + chomp $line ; |
| 745 | + |
| 746 | + $line =~ s/<!--.*?-->// ; |
| 747 | +# if ($line =~ /<!--/) |
| 748 | +# { |
| 749 | +# $iscomment = $true ; |
| 750 | +# $line =~ s/<!--.*$// ; |
| 751 | +# } |
| 752 | +# if ($line =~ /-->/) |
| 753 | +# { |
| 754 | +# $iscomment = $false ; |
| 755 | +# $line =~ s/^.*?-->// ; |
| 756 | +# } |
| 757 | +# if ($iscomment) |
| 758 | +# { $line = "<!-- {{$line}} -->" ; } |
| 759 | + |
| 760 | + if ($line =~ /\{\{yyyy\}\}_\{\{mm[+-]1\}\}/) |
| 761 | + { |
| 762 | + if ($p_month == 1) |
| 763 | + { $line =~ s/\{\{yyyy\}\}_\{\{mm\-1\}\}/{{yyyy-1}}_{{mm-1}}/ ; } # Q&D temp fix |
| 764 | + if ($p_month == 12) |
| 765 | + { $line =~ s/\{\{yyyy\}\}_\{\{mm\+1\}\}/{{yyyy+1}}_{{mm+1}}/ ; } # Q&D temp fix |
| 766 | + } |
| 767 | + |
| 768 | + # $no_upd = "<font color=#800000>*<\/font>" ; |
| 769 | + |
| 770 | + if ($true) # test ? |
| 771 | + { |
| 772 | + # $no_upd = " <small><small><font color=#FF0000><b>chart could not be updated for current month</b></font></small></small>" ; |
| 773 | + $line =~ s/H2 (UNIQUE VISITORS)/A[$1] H2 {${visitors [0]} million|Unique Visitors, All Projects}/ ; |
| 774 | + $line =~ s/H2 (PAGE REQUESTS)/A[$1] H2 {${page_requests[0]} billion|Page Requests, All Projects}/ ; |
| 775 | + $line =~ s/H2 (WEB PROPERTIES)/A[$1] H2 {${rank[0]} in rank|Web Properties - Unique Visitors}/ ; |
| 776 | + $line =~ s/H2 (COMMONS FILES)/A[$1] H2 {${commons_files[0]} million|Binary Files $no_upd}/ ; |
| 777 | + $line =~ s/H2 (ARTICLE COUNT)/A[$1] H2 {${article_count[0]} million|Wikipedia Articles, Comparison with Other Projects $no_upd}/ ; |
| 778 | + $line =~ s/H2 (ARTICLES PER DAY)/A[$1] H2 {${new_articles[0]}|New Wikipedia Articles Per Day $no_upd}/ ; |
| 779 | + $line =~ s/H2 (EDITS PER MONTH)/A[$1] H2 {${edits[0]} million|Wikipedia Edits Per Month $no_upd}/ ; |
| 780 | + $line =~ s/H2 (NEW EDITORS PER MONTH)/A[$1] H2 {${new_editors[0]}|New Wikipedia Editors Per Month $no_upd}/ ; |
| 781 | + $line =~ s/H2 (ACTIVE EDITORS)/A[$1] H2 {${active_editors[0]}|Active Wikipedia Editors (5+ edits per month) $no_upd}/ ; |
| 782 | + $line =~ s/H2 (VERY ACTIVE EDITORS)/A[$1] H2 {${very_active_editors[0]}|Very Active Wikipedia Editors (100+ edits per month) $no_upd}/ ; |
| 783 | + |
| 784 | + $line =~ s/TRENDS UNIQUE VISITORS/TRENDS {$trend_one_year|${visitors[2]}%}{$trend_one_month|${visitors[3]}%}/ ; |
| 785 | + $line =~ s/TRENDS PAGE REQUESTS/TRENDS {$trend_one_year|${page_requests[2]}%}{$trend_one_month|${page_requests[3]}%}/ ; |
| 786 | + $line =~ s/TRENDS WEB PROPERTIES/TRENDS {$trend_one_year|${rank[2]}}{$trend_one_month|${rank[3]}}/ ; |
| 787 | + $line =~ s/TRENDS COMMONS FILES/TRENDS {$trend_one_year|${commons_files[2]}%}{$trend_one_month|${commons_files[3]}%}/ ; |
| 788 | + $line =~ s/TRENDS ARTICLE COUNT/TRENDS {$trend_one_year|${article_count[2]}%}{$trend_one_month|${article_count[3]}%}/ ; |
| 789 | + $line =~ s/TRENDS ARTICLES PER DAY/TRENDS {$trend_one_year|${new_articles[2]}%}{$trend_one_month|${new_articles[3]}%}/ ; |
| 790 | + $line =~ s/TRENDS EDITS PER MONTH/TRENDS {$trend_one_year|${edits[2]}%}{$trend_one_month|${edits[3]}%}/ ; |
| 791 | + $line =~ s/TRENDS NEW EDITORS PER MONTH/TRENDS {$trend_one_year|${new_editors[2]}%}{$trend_one_month|${new_editors[3]}%}/ ; |
| 792 | + $line =~ s/TRENDS ACTIVE EDITORS/TRENDS {$trend_one_year|${active_editors[2]}%}{$trend_one_month|${active_editors[3]}%}/ ; |
| 793 | + $line =~ s/TRENDS VERY ACTIVE EDITORS/TRENDS {$trend_one_year|${very_active_editors[2]}%}{$trend_one_month|${very_active_editors[3]}%}/ ; |
| 794 | + |
| 795 | + $line =~ s/{{yyyy}}/$p_year/g ; |
| 796 | + $line =~ s/{{yyyy\-1}}/$p_year_prev/g ; |
| 797 | + $line =~ s/{{yyyy\+1}}/$p_year_next/g ; |
| 798 | + $line =~ s/{{yyyy\+m2}}/$p_year_plus_m2/g ; |
| 799 | + $line =~ s/{{month}}/$p_month_name/g ; |
| 800 | + $line =~ s/{{month\-1}}/$p_month_name_prev/g ; |
| 801 | + $line =~ s/{{month\+1}}/$p_month_name_next/g ; |
| 802 | + $line =~ s/{{month\+2}}/$p_month_name_next2/g ; |
| 803 | + |
| 804 | + $line =~ s/{{y}}/$p_year_short/g ; |
| 805 | + $line =~ s/{{y\-1}}/$p_year_prev_short/g ; |
| 806 | + $line =~ s/{{yy}}/$p_year_short_d2/g ; |
| 807 | + $line =~ s/{{yy\-1}}/$p_year_prev_short_d2/g ; |
| 808 | + |
| 809 | + $line =~ s/{{m}}/$p_month/g ; |
| 810 | + $line =~ s/{{m\-1}}/$p_month_prev/g ; |
| 811 | + $line =~ s/{{mm}}/$p_month_d2/g ; |
| 812 | + $line =~ s/{{mm-1}}/$p_month_prev_d2/g ; |
| 813 | + $line =~ s/{{mm\+1}}/$p_month_next_d2/g ; |
| 814 | + |
| 815 | + $line =~ s/{{\(mm\/yy\)-1}}/$p_year_month_m1/g ; |
| 816 | + } |
| 817 | + else |
| 818 | + { |
| 819 | + $line =~ s/{{yyyy}}/[[yyyy]]/g ; |
| 820 | + $line =~ s/{{yyyy-1}}/[[yyyy-1]]/g ; |
| 821 | + $line =~ s/{{yyyy\+m2}}/[[yyyy\+m2]]/g ; |
| 822 | + $line =~ s/{{month}}/[[month]]/g ; |
| 823 | + $line =~ s/{{month-1}}/[[month-1]]/g ; |
| 824 | + $line =~ s/{{month\+1}}/[[month\+1]]/g ; |
| 825 | + $line =~ s/{{month\+2}}/[[month\+2]]/g ; |
| 826 | + |
| 827 | + $line =~ s/{{y}}/y/g ; |
| 828 | + $line =~ s/{{y-1}}/y-1/g ; |
| 829 | + $line =~ s/{{m}}/m/g ; |
| 830 | + $line =~ s/{{m-1}}/m-1/g ; |
| 831 | + $line =~ s/{{mm}}/mm/g ; |
| 832 | + $line =~ s/{{mm-1}}/mm-1/g ; |
| 833 | + $line =~ s/{{mm\+}}/mm+1/g ; |
| 834 | + |
| 835 | + $line =~ s/{{\(mm\/yy\)-1}}/(mm\/yy)-1/g ; |
| 836 | + } |
| 837 | + |
| 838 | + if ($line =~ /<!==\s*COMMENT\s*\{[^\}]*\}\s*==>/) |
| 839 | + { |
| 840 | + $comment = $line ; |
| 841 | + $comment =~ s/^.*?\{// ; |
| 842 | + $comment =~ s/\}.*$// ; |
| 843 | + $line = " <span class=comment>$comment</span\n" ; |
| 844 | + } |
| 845 | + |
| 846 | + if ($line =~ /<!==\s*H1\s*\{[^\}]*\}\s*==>/) |
| 847 | + { |
| 848 | + $title = $line ; |
| 849 | + $title =~ s/^.*?\{// ; |
| 850 | + $title =~ s/\}.*$// ; |
| 851 | + $line = " <tr>\n" . |
| 852 | + " <td class=h1 colspan=99><span class=h9>$title</span></td>\n" . |
| 853 | + " </tr>\n" . |
| 854 | + " <tr>\n" . |
| 855 | + " <td><small><small> </small></small></td>\n" . |
| 856 | + " </tr>\n" ; |
| 857 | + } |
| 858 | + |
| 859 | + if ($line =~ /<!==\s*A\[[^\]]*\] H2\s*\{[^\}]*\}\s*==>/) |
| 860 | + { |
| 861 | + ($anchor = $line) ; |
| 862 | + $anchor =~ s/^.*?A\[// ; |
| 863 | + $anchor =~ s/\].*$// ; |
| 864 | + $anchor =~ s/\s/_/g ; |
| 865 | + $anchor = lc($anchor) ; |
| 866 | + |
| 867 | + $parms = $line ; |
| 868 | + $parms =~ s/^.*?\{// ; |
| 869 | + $parms =~ s/\}.*$// ; |
| 870 | + ($metric,$title) = split ('\|', $parms,2) ; |
| 871 | + ($title2 = $title) =~ s/ /_/g ; |
| 872 | + $line = " <tr>\n" . |
| 873 | + " <td class=score><a id='$anchor' name='$anchor'></a><span class=bg>$metric</sup></span></td>\n" . |
| 874 | + " <td class=h2><span class=h2>$title</span><br></td>\n" . |
| 875 | + "</tr>\n" ; |
| 876 | + } |
| 877 | + |
| 878 | + if ($line =~ /<!==\s*TABS\s*\{[^\}]*\}\s*==>/) |
| 879 | + { |
| 880 | + $parms = $line ; |
| 881 | + $parms =~ s/^.*?\{// ; |
| 882 | + $parms =~ s/\}.*$// ; |
| 883 | + ($id,@texts) = split ('\|', $parms) ; |
| 884 | + $line = " <div id=\"container-" . ($id/10) . "\">\n" ; |
| 885 | + $line .= " <ul>\n" ; |
| 886 | + foreach $text (@texts) |
| 887 | + { |
| 888 | + $id++ ; |
| 889 | + $line .= " <li><a href=\"#fragment-$id\"><span>$text</span></a></li>\n" ; |
| 890 | + } |
| 891 | + $line .= " </ul>\n" ; |
| 892 | + $id_hi = $id ; |
| 893 | + } |
| 894 | + |
| 895 | + if ($line =~ /<!==\s*TAB\s*\{[^\}]*\}\s*==>/) |
| 896 | + { |
| 897 | + $parms = $line ; |
| 898 | + $parms =~ s/^.*?\{// ; |
| 899 | + $parms =~ s/\}.*$// ; |
| 900 | + ($id,$text) = split ('\|', $parms) ; |
| 901 | + |
| 902 | + if ($text =~ /^START/i) |
| 903 | + { |
| 904 | + $line = "\n <div id=\"fragment-$id\">\n" ; |
| 905 | + } |
| 906 | + elsif ($text =~ /^END/i) |
| 907 | + { |
| 908 | + if ($id == $id_hi) |
| 909 | + { $line = " </div>" ; } |
| 910 | + } |
| 911 | + else |
| 912 | + { |
| 913 | + $line = "\n <div id=\"fragment-$id\">\n $text\n </div>\n" ; |
| 914 | + if ($id == $id_hi) |
| 915 | + { $line .= " </div>" ; } |
| 916 | + } |
| 917 | + } |
| 918 | + |
| 919 | + if ($line =~ /<!==\s*TRENDS\s*\{[^\}]*\}\{[^\}]*\}\s*==>/) |
| 920 | + { |
| 921 | + $parms = $line ; |
| 922 | + $parms =~ s/^[^\{]*\{// ; |
| 923 | + $parms =~ s/\}[^\}]*$// ; |
| 924 | + ($trendY,$trendM) = split ('\}\s*\{', $parms,2) ; |
| 925 | + |
| 926 | + # ($colorY,$month1Y,$month2Y,$trendY) = split ('\|',$trendY) ; |
| 927 | + # ($colorM,$month1M,$month2M,$trendM) = split ('\|',$trendM) ; |
| 928 | + ($month1Y,$month2Y,$trendY) = split ('\|',$trendY) ; |
| 929 | + ($month1M,$month2M,$trendM) = split ('\|',$trendM) ; |
| 930 | + if ($trendY >= 0) |
| 931 | + { $colorY = "A" ; $trendY = "+$trendY" } |
| 932 | + else |
| 933 | + { $colorY = "E" ; } |
| 934 | + if ($trendM >= 0) |
| 935 | + { $colorM = "A" ; $trendM = "+$trendM" } |
| 936 | + else |
| 937 | + { $colorM = "E" ; } |
| 938 | + |
| 939 | + |
| 940 | + #<!== TRENDS {A|5/8|5/9|+12%}{A|4/9|5/9|+8%} ==> |
| 941 | + $line = " <td class=date>\n" . |
| 942 | + " <table border=0>\n" . |
| 943 | + " <tr>\n" . |
| 944 | + " <td class=date$colorY><b>Y</b> $month1Y⇒$month2Y</td>\n" . |
| 945 | + " <td class=date$colorY>$trendY</td>\n" . |
| 946 | + " </tr>\n" . |
| 947 | + " <tr>\n" . |
| 948 | + " <td class=date$colorM><b>M</b> $month1M⇒$month2M</td>\n" . |
| 949 | + " <td class=date$colorM>$trendM</td>\n" . |
| 950 | + " </tr>\n" . |
| 951 | + " </table>\n" . |
| 952 | + " </td>\n" ; |
| 953 | + } |
| 954 | + |
| 955 | + if ($line =~ /<!==\s*OUT\s*PUBLIC\s*==>/) |
| 956 | + { |
| 957 | + $write_public = $true ; |
| 958 | + $write_private = $false ; |
| 959 | + } |
| 960 | + elsif ($line =~ /<!==\s*OUT\s*EXTENDED\s*==>/) |
| 961 | + { |
| 962 | + $write_public = $false ; |
| 963 | + $write_private = $true ; |
| 964 | + } |
| 965 | + elsif ($line =~ /<!==\s*OUT\s*ALWAYS\s*==>/) |
| 966 | + { |
| 967 | + $write_public = $true ; |
| 968 | + $write_private = $true ; |
| 969 | + } |
| 970 | + elsif ($line =~ /<!==\s*OUT .*\s*==>/) |
| 971 | + { |
| 972 | + $line2 = $line ; |
| 973 | + $line2 =~ s/^.*<!==\s*OUT\s*// ; |
| 974 | + $line2 =~ s/\s*==>.*$// ; |
| 975 | + $write_details = $false ; |
| 976 | + $write_summary = $false ; |
| 977 | + $write_columns = $false ; |
| 978 | + if ($line2 =~ /C/) |
| 979 | + { $write_columns = $true ; } |
| 980 | + if ($line2 =~ /D/) |
| 981 | + { $write_details = $true ; } |
| 982 | + if ($line2 =~ /S/) |
| 983 | + { $write_summary = $true ; } |
| 984 | + |
| 985 | + &Print (COLUMNS, "$line\n") ; |
| 986 | + &Print (DETAILS, "$line\n") ; |
| 987 | + &Print (SUMMARY, "$line\n") ; |
| 988 | + next ; |
| 989 | + } |
| 990 | + |
| 991 | + if ($line =~ /<!==\s*INC .*\s*==>/) |
| 992 | + { |
| 993 | + $line2 = $line ; |
| 994 | + $line2 =~ s/^.*<!==\s*INC\s*// ; |
| 995 | + $line2 =~ s/\s*==>.*$// ; |
| 996 | + |
| 997 | + $file = "$path_in/$line2" ; |
| 998 | + print "\nInclude $file\n" ; |
| 999 | + if (! -e $file) |
| 1000 | + { &Abort ("File $file not found\n") ; } |
| 1001 | + open FILE, '<', $file ; |
| 1002 | + foreach $line (<FILE>) |
| 1003 | + { |
| 1004 | + if ($write_columns) |
| 1005 | + { &Print (COLUMNS, $line) ; } |
| 1006 | + if ($write_details) |
| 1007 | + { &Print (DETAILS, $line) ; } |
| 1008 | + if ($write_summary) |
| 1009 | + { &Print (SUMMARY, $line) ; } |
| 1010 | + } |
| 1011 | + next ; |
| 1012 | + } |
| 1013 | + |
| 1014 | + if ($write_columns) |
| 1015 | + { &Print (COLUMNS, "$line\n") ; } |
| 1016 | + elsif ($line =~ /-->/) |
| 1017 | + { &Print (COLUMNS, "<!-- $line\n") ; } |
| 1018 | + else |
| 1019 | + { &Print (COLUMNS, "<!-- $line -->\n") ; } |
| 1020 | + |
| 1021 | + if ($write_details) |
| 1022 | + { &Print (DETAILS, "$line\n") ; } |
| 1023 | + elsif ($line =~ /-->/) |
| 1024 | + { &Print (DETAILS, "<!-- $line\n") ; } |
| 1025 | + else |
| 1026 | + { &Print (DETAILS, "<!-- $line -->\n") ; } |
| 1027 | + |
| 1028 | + if ($write_summary) |
| 1029 | + { &Print (SUMMARY, "$line\n") ; } |
| 1030 | + elsif ($line =~ /-->/) |
| 1031 | + { &Print (SUMMARY, "<!-- $line\n") ; } |
| 1032 | + else |
| 1033 | + { &Print (SUMMARY, "<!-- $line -->\n") ; } |
| 1034 | + } |
| 1035 | +} |
| 1036 | + |
| 1037 | +sub Anchor |
| 1038 | +{ |
| 1039 | + my $anchor = shift ; |
| 1040 | + $anchor =~ s/^\s*// ; |
| 1041 | + $anchor =~ s/\s*$// ; |
| 1042 | + $anchor =~ s/\s/_/g ; |
| 1043 | + return (lc ($anchor)) ; |
| 1044 | +} |
| 1045 | + |
| 1046 | +sub WriteSynopsis |
| 1047 | +{ |
| 1048 | + my $path_out = shift ; |
| 1049 | + |
| 1050 | + $notice_synopsis = "" ; |
| 1051 | + # "<font color=#008000><b>New: multi-year trends for most metrics. Depending on history available reporting period can vary.</b></font>" ; |
| 1052 | + |
| 1053 | + open SYNOPSIS, '>', "$path_out/RC_${p_year}_${p_month_d2}_synopsis.txt" ; |
| 1054 | + print SYNOPSIS $synopsis ; |
| 1055 | + close SYNOPSIS ; |
| 1056 | + |
| 1057 | +# some day also get this code from RT_yyyy_mm.html, for uniformity |
| 1058 | +$synopsis = <<__SYNOPSIS__ ; |
| 1059 | +<html lang="en"> |
| 1060 | +<head> |
| 1061 | +<title>Wikimedia Report Card Synopsis - {{month}} {{yyyy}}</title> |
| 1062 | +<meta http-equiv="content-type" content="text/html"; charset="iso-8859-1"> |
| 1063 | +<meta http-equiv="Window-target" content="_top"> |
| 1064 | +<meta name="language" content="en,English"> |
| 1065 | +<meta name="robots" content="index,follow"> |
| 1066 | +<link rel="shortcut icon" href="http://wikimediafoundation.org/favicon.ico" /> |
| 1067 | +<link rel="apple-touch-icon" href="http://wikimediafoundation.org/favicon.ico" /> |
| 1068 | +<script src="assets/jquery-1.1.3.1.pack.js" type="text/javascript"></script> |
| 1069 | +<script src="assets/jquery.history_remote.pack.js" type="text/javascript"></script> |
| 1070 | +<script src="assets/jquery.tabs.pack.js" type="text/javascript"></script> |
| 1071 | +<script src="assets/jquery.tablesorter.js" type="text/javascript"></script> |
| 1072 | + |
| 1073 | +<script type="text/javascript"> |
| 1074 | +\$(function() |
| 1075 | +{ |
| 1076 | + \$("#Synopsis").tablesorter(); |
| 1077 | +}) |
| 1078 | +</script> |
| 1079 | + |
| 1080 | +<script type="text/javascript"> |
| 1081 | +\$(document).ready( |
| 1082 | +function() |
| 1083 | +{ |
| 1084 | +\$("#Synopsis").tablesorter(sortList: [[0,0]] ); |
| 1085 | +} |
| 1086 | +); |
| 1087 | +</script> |
| 1088 | + |
| 1089 | +<script type="text/javascript"> |
| 1090 | +\$.tablesorter.addParser({ |
| 1091 | + id: "nohtml", |
| 1092 | + is: function(s) { return false; }, |
| 1093 | + format: function(s) { return s.replace(/<.*?>/g,"").replace(/ /g,""); }, |
| 1094 | + type: "text" |
| 1095 | +}); |
| 1096 | +\$.tablesorter.addParser({ |
| 1097 | + id: "digitsonly", |
| 1098 | + is: function(s) { return false; }, |
| 1099 | + format: function(s) { return $.tablesorter.formatFloat(s.replace(/<.*?>/g,"").replace(/ /g,"").replace(/,/g,"").replace(/-/,"-1")); }, |
| 1100 | + type: "numeric" |
| 1101 | +}); |
| 1102 | +</script> |
| 1103 | + |
| 1104 | +<style type="text/css"> |
| 1105 | +/* tables */ |
| 1106 | +table.tablesorter |
| 1107 | +{ |
| 1108 | + font-family:arial; |
| 1109 | + background-color: #FFF; // #CDCDCD; |
| 1110 | + margin:10px 0pt 15px; |
| 1111 | + font-size: 7pt; |
| 1112 | + width: 80%; |
| 1113 | + text-align: left; |
| 1114 | +} |
| 1115 | +table.tablesorter thead tr th, table.tablesorter tfoot tr th |
| 1116 | +{ |
| 1117 | + background-color: #AAB; |
| 1118 | + border: 1px solid #FFF; |
| 1119 | + font-size: 8pt; |
| 1120 | + padding: 4px; |
| 1121 | +} |
| 1122 | +table.tablesorter thead tr .header |
| 1123 | +{ |
| 1124 | + background-image: url(assets/bg.gif); |
| 1125 | + background-repeat: no-repeat; |
| 1126 | + background-position: center right; |
| 1127 | + cursor: pointer; |
| 1128 | +} |
| 1129 | +table.tablesorter tbody td |
| 1130 | +{ |
| 1131 | + color: #3D3D3D; |
| 1132 | + padding: 4px; |
| 1133 | + background-color: #FFF; |
| 1134 | + vertical-align: top; |
| 1135 | +} |
| 1136 | +table.tablesorter tbody tr.odd td |
| 1137 | +{ background-color:#F0F0F6; } |
| 1138 | +table.tablesorter thead tr .headerSortUp |
| 1139 | +{ background-image: url(assets/asc.gif); } |
| 1140 | +table.tablesorter thead tr .headerSortDown |
| 1141 | +{ background-image: url(assets/desc.gif); } |
| 1142 | +table.tablesorter thead tr .headerSortDown, table.tablesorter thead tr .headerSortUp |
| 1143 | +{ background-color: #BBF; //#8dbdd8; } |
| 1144 | +<!-- |
| 1145 | +body {font-family:arial,sans-serif;background-color:#B0B0B0} |
| 1146 | +table,td,tr{background-color:#FFFFFF;font-size:11pt} |
| 1147 | +h1{font-size:22px} |
| 1148 | +h2{font-size:18px ; color:#006000 ; margin-top:40px} |
| 1149 | +h3{font-size:15px ; color:#006000} |
| 1150 | +form{margin:0} |
| 1151 | +a:link {color:#000080;text-decoration:none} |
| 1152 | +a:visited {color:#000080;text-decoration:none} |
| 1153 | +a:active {color:#000080;text-decoration:none} |
| 1154 | +a:hover {color:#0000FF;text-decoration:underline} |
| 1155 | +a img {border-color:black} |
| 1156 | +td.detail-left {font-size:12px ; color:#000000 ; text-align:left ; } |
| 1157 | +td.detail-center {font-size:12px ; color:#000000 ; text-align:center ; } |
| 1158 | +td.detail-right {font-size:12px ; color:#000000 ; text-align:right ; } |
| 1159 | +--> |
| 1160 | +</style> |
| 1161 | +</head> |
| 1162 | +<body> |
| 1163 | +<table width=800 cellpadding=18 align=center> |
| 1164 | +<tr> |
| 1165 | + <td align='center'> |
| 1166 | + |
| 1167 | + <table width=95%> |
| 1168 | + |
| 1169 | + <tr> |
| 1170 | + <td width=100% colspan=99> |
| 1171 | + <table width=100%> |
| 1172 | + <tr> |
| 1173 | + <td align=left width=150 valign=top><img src='assets/WikimediaLogo.png' width=30></td> |
| 1174 | + <td align=center valign=top><h1>Wikimedia Report Card <font color=#008000>{{month}} {{yyyy}} </font></h1> |
| 1175 | + </td> |
| 1176 | + <td align=right width=150 valign=top><h1>Synopsis</h1></td> |
| 1177 | + <!-- <td align=right width=150 valign=top><small><small>Published<br>{{month+2}}<br>{{yyyy+m2}}</small></small></td> --> |
| 1178 | + </tr> |
| 1179 | + <tr> |
| 1180 | + <td align=left width=150 valign=top><!-- <small><a href='RC_{{yyyy}}_{{mm-1}}_synopsis.html'>⇐ {{month-1}}</a></small>--> </td> |
| 1181 | + <td align=center valign=top> |
| 1182 | + <small>⇒ <a href='RC_{{yyyy}}_{{mm}}_detailed.html'>Detailed version</a> ⇒ <a href='RC_{{yyyy}}_{{mm}}_summary.html'>Summary, 1 column</a> ⇒ <a href='RC_{{yyyy}}_{{mm}}_columns.html'>Summary, 2 columns</a></small> |
| 1183 | + </td> |
| 1184 | + <td align=right width=150 valign=top><!--<small><a href='RC_{{yyyy}}_{{mm+1}}_synopsis.html'>{{month+1}} ⇒</a></small>--></td> |
| 1185 | + </tr> |
| 1186 | + </table> |
| 1187 | + </td> |
| 1188 | + </tr> |
| 1189 | + <tr> |
| 1190 | + <td colspan=99> |
| 1191 | + <small> |
| 1192 | + <center> |
| 1193 | + $notice_synopsis |
| 1194 | + </center> <!-- General comment --> |
| 1195 | + </small> |
| 1196 | + </td> |
| 1197 | +</tr> |
| 1198 | +<tr><td colspan=99 align=center> |
| 1199 | +<table border=1 id='Synopsis' class=tablesorter> |
| 1200 | +<!-- <tr> --> |
| 1201 | +<!-- <td align='left' colspan=99> --> |
| 1202 | +<!-- <font color=#800000><b><small>No English Wikipedia dump was produced this month.<br>Without it some totals and trends are also meaningless and left blank.</small></b></font> --> |
| 1203 | +<!-- </td> --> |
| 1204 | +<!-- </tr> --> |
| 1205 | +DATA |
| 1206 | +</table> |
| 1207 | +</td></tr> |
| 1208 | + <tr> |
| 1209 | + <td colspan=99 align=center> |
| 1210 | + <hr class=thin> |
| 1211 | + <small><small><font color=808080>Author Erik Zachte - mail: ezachte@###.org (nospam: ###=wikimedia)</font></small></small> |
| 1212 | + </td> |
| 1213 | + </tr> |
| 1214 | + </table> |
| 1215 | +<script type='text/javascript'> |
| 1216 | +\$('#Synopsis').tablesorter({ |
| 1217 | + // debug:true, |
| 1218 | + headers:{0:{sorter:'nohtml'},1:{sorter:false},2:{sorter:'digitsonly'},3:{sorter:'digitsonly'},4:{sorter:false}} |
| 1219 | +}); |
| 1220 | +</script> |
| 1221 | + |
| 1222 | +</body> |
| 1223 | +</html> |
| 1224 | +__SYNOPSIS__ |
| 1225 | + |
| 1226 | + undef @synopsis_notes ; |
| 1227 | + |
| 1228 | +# $data = "<tr><th>Unique Visitors</th></tr>\n" ; |
| 1229 | +# $synopsis = "Y: " . substr ($p_month_name,0,3) . ",$p_year_prev->$p_year k=thousand m=million b=billion\n" ; |
| 1230 | +# $synopsis .= "M: $p_year," . substr ($p_month_name_prev,0,3) . "->" . substr ($p_month_name,0,3) . " M=monthly D=daily T=Total\n\n" ; |
| 1231 | + $data = "<thead><tr><th class=detail-left valign=top> <b>Metric</b> </th>" . |
| 1232 | + "<th class=detail-center valign=top> <b>Now</b> <br>{{mm}}/{{yy}}</th>" . |
| 1233 | + "<th class=detail-center valign=top> <b>Yearly change</b> <br>{{mm}}/{{yy-1}} ⇒ {{mm}}/{{yy}}</th>" . |
| 1234 | + "<th class=detail-center valign=top> <b>Monthly change</b> <br>{{(mm/yy)-1}} ⇒ {{mm}}/{{yy}}</th>" . |
| 1235 | + "<th class=detail-center valign=top> <b>Notes</b> </th></tr></thead>\n<tbody>\n" ; |
| 1236 | +# $data .= "<tr><th> </th><th> </th><th> </th><th> </th><th> </th></tr></thead>" ; |
| 1237 | + |
| 1238 | +# $comment_prev_month = "<sup><font color=#800000>*</font></sup>" ; # qqq |
| 1239 | + |
| 1240 | + $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#unique_visitors'>Unique Visitors</a> <sup>All</sup>", "", @visitors) ; |
| 1241 | + $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#page_requests'>Page Requests</a> <sup>All</sup>", "", @page_requests) ; |
| 1242 | + $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#web_properties'>Site Rank</a> <sup>All</sup>", "", @rank) ; |
| 1243 | + $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#commons_files'>Binary Files</a> <sup>Commons</sup> $comment_prev_month", "", @commons_files) ; |
| 1244 | + $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#article_count'>Article Count</a> <sup>Wp</sup> $comment_prev_month", "", @article_count) ; |
| 1245 | + $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#articles_per_day'>New Articles Per Day</a> <sup>Wp</sup> $comment_prev_month", "", @new_articles) ; |
| 1246 | + $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#edits_per_month'>Edits</a> <sup>Wp</sup> $comment_prev_month", "", @edits) ; |
| 1247 | + $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#new_editors_per_month'>New Editors <sup>Wp</sup></a> $comment_prev_month", "", @new_editors) ; |
| 1248 | + $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#active_editors'>Active Editors</a> <sup>Wp</sup> $comment_prev_month", "", @active_editors) ; |
| 1249 | + $data .= &FormatSynopsisTable ("<a href='RC_{{yyyy}}_{{mm}}_detailed.html#very_active_editors'>Very Active Editors</a> <sup>Wp</sup> $comment_prev_month", "", @very_active_editors) ; |
| 1250 | + $data .= "</tbody>\n<tfoot><tr><td colspan=99> </td></tr>\n" ; |
| 1251 | + $data .= "<tr><td colspan=99><b><small>Repeated observations below are grayed</small></b></td></tr>\n" ; |
| 1252 | + |
| 1253 | + foreach $note (@synopsis_notes) |
| 1254 | + { |
| 1255 | + $data .= "<tr><td class=detail-left colspan=99>$note</td></tr>" ; |
| 1256 | + } |
| 1257 | +# $data .= "<tr><td class=detail-left colspan=99><font color=#800000><small>* For German and Polish Wikipedias data for June were not yet available: reused counts from May</small></font></td></tr>" ; |
| 1258 | + $data .= "<tr><td class=detail-left colspan=99><font color=#808080><small>All = All projects, Wp = Wikipedia project / B = billion, M = million, k = thousand</small></font></td></tr></tfoot>" ; |
| 1259 | + |
| 1260 | + $synopsis =~ s/DATA/$data/ ; |
| 1261 | + |
| 1262 | + $synopsis =~ s/{{yyyy}}/$p_year/g ; |
| 1263 | + $synopsis =~ s/{{yyyy-1}}/$p_year_prev/g ; |
| 1264 | + $synopsis =~ s/{{yyyy\+m2}}/$p_year_plus_m2/g ; |
| 1265 | + $synopsis =~ s/{{month}}/$p_month_name/g ; |
| 1266 | + $synopsis =~ s/{{month-1}}/$p_month_name_prev/g ; |
| 1267 | + $synopsis =~ s/{{month\+1}}/$p_month_name_next/g ; |
| 1268 | + $synopsis =~ s/{{month\+2}}/$p_month_name_next2/g ; |
| 1269 | + |
| 1270 | + $synopsis =~ s/{{y}}/$p_year_short/g ; |
| 1271 | + $synopsis =~ s/{{y\-1}}/$p_year_prev_short/g ; |
| 1272 | + $synopsis =~ s/{{yy}}/$p_year_short_d2/g ; |
| 1273 | + $synopsis =~ s/{{yy\-1}}/$p_year_prev_short_d2/g ; |
| 1274 | + $synopsis =~ s/{{m}}/$p_month/g ; |
| 1275 | + $synopsis =~ s/{{m\-1}}/$p_month_prev/g ; |
| 1276 | + $synopsis =~ s/{{mm}}/$p_month_d2/g ; |
| 1277 | + $synopsis =~ s/{{mm-1}}/$p_month_prev_d2/g ; |
| 1278 | + $synopsis =~ s/{{mm\+1}}/$p_month_next_d2/g ; |
| 1279 | + |
| 1280 | + $synopsis =~ s/{{\(mm\/yy\)-1}}/$p_year_month_m1/g ; |
| 1281 | + open SYNOPSIS, '>', "$path_out/RC_${p_year}_${p_month_d2}_synopsis.html" ; |
| 1282 | + print SYNOPSIS $synopsis ; |
| 1283 | + close SYNOPSIS ; |
| 1284 | +} |
| 1285 | + |
| 1286 | +sub Print |
| 1287 | +{ |
| 1288 | + $handle = shift ; |
| 1289 | + $text = shift ; |
| 1290 | + |
| 1291 | + if ((! $debug) && ($text !~ /\[if lte/)) # Q&D: keep MSIE directive |
| 1292 | + { |
| 1293 | + if ($text =~ /<!--/) # comments |
| 1294 | + { return ; } |
| 1295 | + if ($text =~ /<!==/) # template markup |
| 1296 | + { return ; } |
| 1297 | + } |
| 1298 | + |
| 1299 | + if (($target_audience == $public) && $write_public) |
| 1300 | + { print $handle $text ; } |
| 1301 | + if (($target_audience == $private) && $write_private) |
| 1302 | + { print $handle $text ; } |
| 1303 | +} |
| 1304 | + |
| 1305 | +sub FormatSynopsisText |
| 1306 | +{ |
| 1307 | + $label = shift ; |
| 1308 | + $comment = shift ; |
| 1309 | + @metrics = @_ ; |
| 1310 | + |
| 1311 | + $metric = $metrics [0] ; |
| 1312 | + $size = $metrics [1] ; |
| 1313 | + $inc_y = $metrics [2] ; # yearly |
| 1314 | + $inc_m = $metrics [3] ; # monthly |
| 1315 | + $inc = $metrics [4] ; # perc ? |
| 1316 | + |
| 1317 | + $metric =~ s/,//g ; |
| 1318 | + if ($inc eq "th") # rank |
| 1319 | + { |
| 1320 | + $inc_y .= " " ; |
| 1321 | + $inc_m .= " " ; |
| 1322 | + $inc = " " ; |
| 1323 | + } |
| 1324 | + $size=~ s/[x]/ / ; |
| 1325 | + |
| 1326 | + |
| 1327 | + if ($inc_y !~ /-/) { $inc_y = '+' . $inc_y ; } |
| 1328 | + if ($inc_m !~ /-/) { $inc_m = '+' . $inc_m ; } |
| 1329 | + $inc_y = sprintf ("%5s", $inc_y) . $inc ; |
| 1330 | + $inc_m = sprintf ("%5s", $inc_m) . $inc ; |
| 1331 | + |
| 1332 | + if ($metric =~ /^\.+$/) |
| 1333 | + { ; } |
| 1334 | + elsif ($size eq "b") |
| 1335 | + { $metric = sprintf ("%.0f", $metric / 1000000000) ; } |
| 1336 | + elsif ($size eq "m") |
| 1337 | + { $metric = sprintf ("%.0f", $metric / 1000000) ; } |
| 1338 | + elsif ($size eq "k") |
| 1339 | + { $metric = sprintf ("%.0f", $metric / 1000) ; } |
| 1340 | + else |
| 1341 | + { $metric = sprintf ("%.0f", $metric) ; } |
| 1342 | + |
| 1343 | + my $text = sprintf ("%-20s", $label) . sprintf ("%8s", "$metric $size") ; |
| 1344 | + $text .= " (Y:$inc_y / M:$inc_m) $comment\n" ; |
| 1345 | + return $text ; |
| 1346 | +} |
| 1347 | + |
| 1348 | +sub FormatSynopsisTable |
| 1349 | +{ |
| 1350 | + $label = shift ; |
| 1351 | + $comment = shift ; |
| 1352 | + |
| 1353 | + @metrics = @_ ; |
| 1354 | + |
| 1355 | + $metric = $metrics [0] ; |
| 1356 | + $size = $metrics [1] ; |
| 1357 | + $inc_y = $metrics [2] ; # yearly |
| 1358 | + $inc_m = $metrics [3] ; # monthly |
| 1359 | + $inc = $metrics [4] ; # perc ? |
| 1360 | + $notes = $metrics [5] ; # perc ? |
| 1361 | + |
| 1362 | + ($notes_ref,$notes) = split ('\|', $notes) ; |
| 1363 | + if ($notes ne "") |
| 1364 | + { |
| 1365 | + # text between '#' and first bracket (<>) will be grayed (repeated remarks) |
| 1366 | + $notes =~ s/#([^<>]+)/<font color=#808080>$1<\/font>/g ; |
| 1367 | + push @synopsis_notes, $notes ; |
| 1368 | + } |
| 1369 | + |
| 1370 | + $metric =~ s/,//g ; |
| 1371 | + if ($inc eq "th") # rank |
| 1372 | + { |
| 1373 | + $inc_y .= " " ; |
| 1374 | + $inc_m .= " " ; |
| 1375 | + $inc = " " ; |
| 1376 | + } |
| 1377 | + $size=~ s/[x]/ / ; |
| 1378 | + |
| 1379 | + |
| 1380 | + if ($inc_y !~ /-/) { $inc_y = '+' . $inc_y ; } |
| 1381 | + if ($inc_m !~ /-/) { $inc_m = '+' . $inc_m ; } |
| 1382 | + $inc_y = sprintf ("%5s", $inc_y) . $inc ; |
| 1383 | + $inc_m = sprintf ("%5s", $inc_m) . $inc ; |
| 1384 | + |
| 1385 | + if ($size eq "k") |
| 1386 | + { $metric = sprintf ("%.1f", $metric / 1000) ; } |
| 1387 | + elsif ($size eq "b") |
| 1388 | + { $size = "B" ; } |
| 1389 | + elsif ($size eq "m") |
| 1390 | + { $size = "M" ; } |
| 1391 | + elsif ($size eq "k") |
| 1392 | + { $size = "K" ; } |
| 1393 | + else |
| 1394 | + { $size = " " ; } |
| 1395 | + |
| 1396 | + if ($notes_ref eq "") |
| 1397 | + { $notes_ref = ' ' ; } |
| 1398 | + |
| 1399 | + $metric = "$metric $size" ; |
| 1400 | + |
| 1401 | + if (($metric =~ /\.\./) || ($metric =~ /^0\.0/)) { $metric = "<font color=#C0C0C0>$metric</font>" ; } |
| 1402 | + if (($metric =~ /\.\./) || ($metric =~ /^0\.0/)) { $metric = "<font color=#C0C0C0>$metric</font>" ; } |
| 1403 | + if (($inc_y =~ /\.\./) || ($inc_y =~ /^0\.0/)) { $inc_y = "<font color=#C0C0C0>$inc_y</font>" ; } |
| 1404 | + if (($inc_m =~ /\.\./) || ($inc_m =~ /^0\.0/)) { $inc_m = "<font color=#C0C0C0>$inc_m</font>" ; } |
| 1405 | + |
| 1406 | + my $text = "<tr><td class=detail-left>$label</td><td class=detail-right>$metric</td><td class=detail-right>$inc_y</td><td class=detail-right>$inc_m</td><td class=detail-right>$notes_ref</td></tr>\n" ; |
| 1407 | + return $text ; |
| 1408 | +} |
| 1409 | + |
| 1410 | +sub Abort |
| 1411 | +{ |
| 1412 | + $msg = shift ; |
| 1413 | + chomp $msg ; |
| 1414 | + print "\n!!! Abort script: '$msg'\n" ; |
| 1415 | + exit ; |
| 1416 | +} |
| 1417 | + |
Property changes on: trunk/wikistats/reportcard/ReportCardGenerateHtml.pl |
___________________________________________________________________ |
Added: svn:eol-style |
1417 | 1418 | + native |
Index: trunk/wikistats/reportcard/ReportCardExtractWikiCountsOutputYearly.pl |
— | — | @@ -1,1240 +1,1240 @@ |
2 | | -#!/usr/local/bin/perl
|
3 | | -# -i "w:/# Out Bayes" -o "w:/@ Report Card/Data"
|
4 | | -
|
5 | | - use lib "/home/ezachte/lib" ;
|
6 | | - use EzLib ;
|
7 | | - $trace_on_exit = $true ;
|
8 | | - ez_lib_version (2) ;
|
9 | | -
|
10 | | - $month_0 = "08" ; # collect 13 months up to
|
11 | | - $year_0 = 2010 ;
|
12 | | -
|
13 | | -# set defaults mainly for tests on local machine
|
14 | | - default_argv "-i 'W:/# Out Bayes'|-o 'W:/@ Report Card/Data'" ;
|
15 | | -
|
16 | | - use Getopt::Std ;
|
17 | | -
|
18 | | -# $file_regions_UV = "Multi-Country Media Trend, UVs by region (July 2008 - September 2009)_27290.csv" ;
|
19 | | -# $file_regions_Reach = "Multi-Country Media Trend, % reach by region (July 2008 - September 2009)_10786.csv" ;
|
20 | | -
|
21 | | - $maxpopularwikis = 25 ;
|
22 | | - @projects = ('wb','wk','wn','wp','wq','ws','wv','commons') ;
|
23 | | - @projects2 = ('wb','wk','wn','wp','wq','ws','wv','wx','tot') ;
|
24 | | - @projects2b = ('Wikibooks','Wiktionary','Wikinews','Wikipedia','Wikiquote','Wikisource','Wikiversity','Other','Total') ;
|
25 | | -
|
26 | | - &LogArguments ;
|
27 | | - &ParseArguments ;
|
28 | | - &InitProjectNames ;
|
29 | | - &InitReportNames ;
|
30 | | - &ReadStatisticsMonthly ;
|
31 | | - &WriteYearlyData ;
|
32 | | -# &WriteMonthlyData ;
|
33 | | - exit ;
|
34 | | -
|
35 | | -sub LogArguments
|
36 | | -{
|
37 | | - my $arguments ;
|
38 | | - getopt ("iolpft", \%options) ;
|
39 | | - foreach $arg (sort keys %options)
|
40 | | - { $arguments .= " -$arg " . $options {$arg} . "\n" ; }
|
41 | | - print ("\nArguments\n$arguments\n") ;
|
42 | | -# &Log ("\nArguments\n$arguments\n") ;
|
43 | | -}
|
44 | | -
|
45 | | -sub ParseArguments
|
46 | | -{
|
47 | | -# my @options ;
|
48 | | -# getopt ("io", \%options) ;
|
49 | | -
|
50 | | -# die ("Specify input folder for projectcounts files as: -i path") if (! defined ($options {"i"})) ;
|
51 | | -# die ("Specify output folder as: -o path'") if (! defined ($options {"o"})) ;
|
52 | | -
|
53 | | -# $path_in = $options {"i"} ;
|
54 | | -# $path_out = $options {"o"} ;
|
55 | | -
|
56 | | -# die "Input folder '$path_in' does not exist" if (! -d $path_in) ;
|
57 | | -# die "Output folder '$path_out' does not exist" if (! -d $path_out) ;
|
58 | | -
|
59 | | - $path_in = "w:/# out bayes" ;
|
60 | | - $path_out = "w:/@ report card/data" ;
|
61 | | -
|
62 | | - print "Input folder: $path_in\n" ;
|
63 | | - print "Output folder: $path_out\n" ;
|
64 | | - print "\n" ;
|
65 | | -
|
66 | | - $file_csv_out = "$path_out/StatisticsMonthly_${year_0}_$month_0.csv" ;
|
67 | | - $file_csv_out_year = "$path_out/StatisticsYearly.csv" ;
|
68 | | -
|
69 | | - &SetComparisonPeriods ($year_0,$month_0) ;
|
70 | | -}
|
71 | | -
|
72 | | -sub ReadStatisticsMonthly
|
73 | | -{
|
74 | | - &ReadStatisticsMonthlyForProject ("wb") ;
|
75 | | - &ReadStatisticsMonthlyForProject ("wk") ;
|
76 | | - &ReadStatisticsMonthlyForProject ("wn") ;
|
77 | | - &ReadStatisticsMonthlyForProject ("wp") ;
|
78 | | - &ReadStatisticsMonthlyForProject ("wq") ;
|
79 | | - &ReadStatisticsMonthlyForProject ("ws") ;
|
80 | | - &ReadStatisticsMonthlyForProject ("wv") ;
|
81 | | - &ReadStatisticsMonthlyForProject ("wx") ;
|
82 | | -
|
83 | | - &ReadStatisticsPerBinariesExtensionCommons ;
|
84 | | -}
|
85 | | -
|
86 | | -sub ReadStatisticsMonthlyForProject
|
87 | | -{
|
88 | | - my $project = shift;
|
89 | | -
|
90 | | - my $file_csv_in_1 = "$path_in/csv_$project/StatisticsMonthly.csv" ;
|
91 | | - my $file_csv_in_2 = "$path_in/csv_$project/StatisticsUserActivitySpread.csv" ;
|
92 | | -
|
93 | | - if (! -e $file_csv_in_1)
|
94 | | - { &Abort ("Input file '$file_csv_in_1' not found") ; }
|
95 | | - if (! -e $file_csv_in_2)
|
96 | | - { &Abort ("Input file '$file_csv_in_2' not found") ; }
|
97 | | -
|
98 | | - print "Read '$file_csv_in_1'\n" ;
|
99 | | - open CSV_IN, '<', $file_csv_in_1 ;
|
100 | | -
|
101 | | - undef %lines ;
|
102 | | - while ($line = <CSV_IN>)
|
103 | | - {
|
104 | | - ($language,$date,$counts) = split (',', $line, 3) ;
|
105 | | -
|
106 | | - next if $language eq 'commons' and $project ne 'wx' ;
|
107 | | - next if $language eq 'sr' and $project eq 'wn' ; # ignore insane bot spam on
|
108 | | -
|
109 | | - ($month,$day,$year) = split ('\/', $date) ;
|
110 | | -
|
111 | | -if ($month == 6)
|
112 | | -{
|
113 | | - @fields = split (',', $counts) ;
|
114 | | - $articles = $fields [4] ;
|
115 | | - $june_articles {"$project,$year"} += $articles ;
|
116 | | - $june_articles {"tot,$year"} += $articles ;
|
117 | | - $years {$year} ++ ;
|
118 | | -# print "$project $year $month : $articles\n" ;
|
119 | | -}
|
120 | | -
|
121 | | - my $m = &MonthsSinceYearAgo ($year, $month) ;
|
122 | | - if (! ($m < 0) || ($m > 12))
|
123 | | - {
|
124 | | - $lines {$language}{$m} = $line ;
|
125 | | - $languages {$language}++ ;
|
126 | | - }
|
127 | | - }
|
128 | | -
|
129 | | - foreach $language (sort keys %languages)
|
130 | | - {
|
131 | | - for ($m=1 ; $m <= 12 ; $m++)
|
132 | | - {
|
133 | | - if ($lines {$language}{$m} eq '')
|
134 | | - { $lines {$language}{$m} = $lines {$language}{$m -1} ; }
|
135 | | - }
|
136 | | -
|
137 | | - for ($m=0 ; $m <= 12 ; $m++)
|
138 | | - {
|
139 | | - $line = $lines {$language}{$m} ;
|
140 | | - chomp $line ;
|
141 | | - ($language,$date,$counts) = split (',', $line, 3) ;
|
142 | | - @fields = split (',', $counts) ;
|
143 | | -
|
144 | | - if ($project eq "wp")
|
145 | | - {
|
146 | | - foreach $f (1,4,6,11) # new editors, articles, new articles, edits
|
147 | | - {
|
148 | | - $values {"$f,$m"} {"$project,$language"} = $fields [$f] ;
|
149 | | - $totals {"$f,$m"} += $fields [$f] ;
|
150 | | - $totals_project {"$f,$m"} {$project} += $fields [$f] ;
|
151 | | - # print "TOTALS $f $m = . " . $totals {"$f,$m"} . "\n" ;
|
152 | | - }
|
153 | | - }
|
154 | | - else
|
155 | | - {
|
156 | | - foreach $f (1,4)
|
157 | | - {
|
158 | | - $values {"$f,$m"} {"$project,$language"} = $fields [$f] ;
|
159 | | - $totals {"$f,$m"} += $fields [$f] ;
|
160 | | - $totals_project {"$f,$m"} {$project} += $fields [$f] ;
|
161 | | -
|
162 | | - if ($language eq 'commons')
|
163 | | - { $totals_project {"$f,$m"} {'commons'} += $fields [$f] ; }
|
164 | | -
|
165 | | - # print "TOTALS $f $m = . " . $totals {"$f,$m"} . "\n" ;
|
166 | | - }
|
167 | | - foreach $f (6,11)
|
168 | | - {
|
169 | | - $totals_project {"$f,$m"} {$project} += $fields [$f] ;
|
170 | | - if ($language eq 'commons')
|
171 | | - { $totals_project {"$f,$m"} {'commons'} += $fields [$f] ; }
|
172 | | - # print "TOTALS $f $m = . " . $totals {"$f,$m"} . "\n" ;
|
173 | | - }
|
174 | | -
|
175 | | - }
|
176 | | - }
|
177 | | - }
|
178 | | - close CSV_IN ;
|
179 | | -
|
180 | | - # now read (very) active editors from newer more accurate file (split data for reg users and bots, unlike StatisticsMonthly.csv)
|
181 | | - # but use f = column count in StatisticsMonthly.csv
|
182 | | -
|
183 | | - print "Read '$file_csv_in_2'\n" ;
|
184 | | - open CSV_IN, '<', $file_csv_in_2 ;
|
185 | | -
|
186 | | - undef %lines ;
|
187 | | - while ($line = <CSV_IN>)
|
188 | | - {
|
189 | | - chomp $line ;
|
190 | | - ($language,$date,$reguser_bot,$group,$counts) = split (',', $line, 5) ;
|
191 | | -
|
192 | | - next if $language eq 'commons' and $project ne 'wx' ; # commons also in wikipedia csv files (bug, hard to cleanup, just skip)
|
193 | | - # next if $language eq 'commons' ; # ignore editor count on commons alltogether, most are already counted for other project
|
194 | | - # (even for several projects, to be tuned after centralauth dump is available)
|
195 | | -
|
196 | | - if ($reguser_bot ne "R") { next ; } # R: reg user, B: bot
|
197 | | - if ($group ne "A") { next ; } # A: articles, T: talk pages, O: other namespaces
|
198 | | -
|
199 | | - ($month,$day,$year) = split ('\/', $date) ;
|
200 | | - my $m = &MonthsSinceYearAgo ($year, $month) ;
|
201 | | - if (($m < 0) || ($m > 12))
|
202 | | - { next ; }
|
203 | | -
|
204 | | - $lines {$language}{$m} = $line ;
|
205 | | - $languages {$language}++ ;
|
206 | | - }
|
207 | | -
|
208 | | - foreach $language (sort keys %languages)
|
209 | | - {
|
210 | | - for ($m=1 ; $m <= 12 ; $m++)
|
211 | | - {
|
212 | | - if ($lines {$language}{$m} eq '')
|
213 | | - { $lines {$language}{$m} = $lines {$language}{$m -1} ; }
|
214 | | - }
|
215 | | -
|
216 | | - for ($m=0 ; $m <= 12 ; $m++)
|
217 | | - {
|
218 | | - $line = $lines {$language}{$m} ;
|
219 | | - chomp $line ;
|
220 | | - ($language,$date,$reguser_bot,$group,$counts) = split (',', $line, 5) ;
|
221 | | - @fields = split (',', $counts) ;
|
222 | | -
|
223 | | - foreach $f (2,3) # editors_gt_5, editors_gt_100
|
224 | | - {
|
225 | | - # count user with over x edits
|
226 | | - # threshold starting with a 3 are 10xSQRT(10), 100xSQRT(10), 1000xSQRT(10), etc
|
227 | | - # thresholds = 1,3,5,10,25,32,50,100,etc
|
228 | | - if ($f == 2) { $f2 = 2 ; }
|
229 | | - if ($f == 3) { $f2 = 7 ; }
|
230 | | -
|
231 | | - $values {"$f,$m"} {"$project,$language"} = $fields [$f2] ;
|
232 | | -
|
233 | | - if ($language ne 'commons') # ignore editor count on commons for totals, most editors are already counted for other project
|
234 | | - { $totals {"$f,$m"} += $fields [$f2] ; } # (even for several projects, to be tuned after centralauth dump is available)
|
235 | | -
|
236 | | - $totals_project {"$f,$m"} {$project} += $fields [$f2] ;
|
237 | | -
|
238 | | - if ($language eq 'commons')
|
239 | | - { $totals_project {"$f,$m"} {'commons'} += $fields [$f2] ; }
|
240 | | - }
|
241 | | - }
|
242 | | - }
|
243 | | - close CSV_IN ;
|
244 | | -}
|
245 | | -
|
246 | | -sub ReadStatisticsPerBinariesExtensionCommons
|
247 | | -{
|
248 | | - my $file_csv_in = "$path_in/csv_wx/StatisticsPerBinariesExtension.csv" ;
|
249 | | - my $mmax = -1 ;
|
250 | | -
|
251 | | - if (! -e $file_csv_in)
|
252 | | - { &Abort ("Input file '$file_csv_in' not found") ; }
|
253 | | -
|
254 | | - print "Read '$file_csv_in'\n" ;
|
255 | | - open CSV_IN, '<', $file_csv_in ;
|
256 | | - while ($line = <CSV_IN>)
|
257 | | - {
|
258 | | - chomp $line ;
|
259 | | - ($language,$date,$counts) = split (',', $line, 3) ;
|
260 | | -
|
261 | | - if ($language ne "commons") { next ; }
|
262 | | -
|
263 | | - if ($date eq "00/0000")
|
264 | | - {
|
265 | | - @fields = split (',', $counts) ;
|
266 | | - $field_ndx = 0 ;
|
267 | | - foreach $field (@fields)
|
268 | | - {
|
269 | | - $ext_cnt {-1}{$field_ndx} = $field ;
|
270 | | - # print "EXT_CNT $field_ndx : $field\n" ;
|
271 | | - $field_ndx ++ ;
|
272 | | - }
|
273 | | - next ;
|
274 | | - }
|
275 | | -
|
276 | | - ($month,$year) = split ('\/', $date) ;
|
277 | | - my $m = &MonthsSinceYearAgo ($year, $month) ;
|
278 | | - if (($m < 0) || ($m > 12))
|
279 | | - { next ; }
|
280 | | - if ($m > $mmax)
|
281 | | - { $mmax = $m ; }
|
282 | | -
|
283 | | - @fields = split (',', $counts) ;
|
284 | | - $field_ndx = 0 ;
|
285 | | - foreach $field (@fields)
|
286 | | - {
|
287 | | - $ext_cnt {$m}{$field_ndx} = $field ;
|
288 | | - $ext_tot {$m} += $field ;
|
289 | | - $field_ndx ++ ;
|
290 | | - }
|
291 | | - }
|
292 | | - close CSV_IN ;
|
293 | | -
|
294 | | - %ext_cnt_mmax = %{$ext_cnt {$mmax}} ;
|
295 | | - @ext_cnt_mmax = (sort {$ext_cnt_mmax {$b} <=> $ext_cnt_mmax {$a}} keys %ext_cnt_mmax) ;
|
296 | | -
|
297 | | - $extcnt = 0 ;
|
298 | | - foreach $extndx (@ext_cnt_mmax)
|
299 | | - {
|
300 | | - # print "$extndx < ${ext_cnt {-1}{$extndx}} > : ${ext_cnt_mmax {$extndx}}\n" ;
|
301 | | - push @extndxs, $extndx ;
|
302 | | - if ($extcnt++ >= 9) { last ; }
|
303 | | - }
|
304 | | -}
|
305 | | -
|
306 | | -sub ReadMediaTrends
|
307 | | -{
|
308 | | -# open FILE_UV, '<', $file_regions_UV ;
|
309 | | -# close FILE-UV ;
|
310 | | -
|
311 | | -# open FILE_REACH, '<', $file_regions_Reach ;
|
312 | | -# close FILE_REACH ;
|
313 | | -}
|
314 | | -
|
315 | | -sub WriteYearlyData
|
316 | | -{
|
317 | | - print "Write file '$file_csv_out_year'\n" ;
|
318 | | - open CSV_OUT, '>', $file_csv_out_year ;
|
319 | | -
|
320 | | - $line = "Articles per project per year (June 30)" ;
|
321 | | - print "$line\n" ;
|
322 | | - print CSV_OUT "$line\n" ;
|
323 | | -
|
324 | | - $line = "" ;
|
325 | | - foreach $project (@projects2b)
|
326 | | - { $line .= ",$project," ; }
|
327 | | - $line .= ",Growth," ;
|
328 | | - print "$line\n" ;
|
329 | | - print CSV_OUT "$line\n" ;
|
330 | | -
|
331 | | - foreach $year (sort keys %years)
|
332 | | - {
|
333 | | - $june_diff {$year} = $june_articles {"tot,$year"} - $june_articles {"tot,".($year-1)} ;
|
334 | | - $line = "$year" ;
|
335 | | - foreach $project (@projects2)
|
336 | | - {
|
337 | | - $count = $june_articles {"$project,$year"} ;
|
338 | | - $count_prev = $june_articles {"$project,".($year-1)} ;
|
339 | | - $perc = "-" ;
|
340 | | - if ($count_prev > 0)
|
341 | | - { $perc = sprintf ("%.0f", 100 * ($count/$count_prev) - 100) . '%' ; $perc =~ s/^(\d)/\+$1/ ; }
|
342 | | - # $count = sprintf ("%.0f", $count / 1000) ;
|
343 | | - $line .= ",$count,$perc" ;
|
344 | | - }
|
345 | | - $diff = $june_diff {$year} ;
|
346 | | - $diff_prev = $june_diff {$year-1} ;
|
347 | | - $perc = "-" ;
|
348 | | - if ($diff_prev > 0)
|
349 | | - { $perc = sprintf ("%.0f", 100 * ($diff/$diff_prev) - 100) . '%' ; $perc =~ s/^(\d)/+$1/ ; }
|
350 | | - $line .= ",$diff,$perc" ;
|
351 | | - print "$line\n" ;
|
352 | | - print CSV_OUT "$line\n" ;
|
353 | | - }
|
354 | | -
|
355 | | - $line = "\nReadable version" ;
|
356 | | - print "$line\n" ;
|
357 | | - print CSV_OUT "$line\n" ;
|
358 | | -
|
359 | | - $line = "" ;
|
360 | | - foreach $project (@projects2b)
|
361 | | - { $line .= ",$project," ; }
|
362 | | - $line .= ",Growth," ;
|
363 | | - print "$line\n" ;
|
364 | | - print CSV_OUT "$line\n" ;
|
365 | | -
|
366 | | - foreach $year (sort keys %years)
|
367 | | - {
|
368 | | - $line = "$year" ;
|
369 | | - foreach $project (@projects2)
|
370 | | - {
|
371 | | - $count = $june_articles {"$project,$year"} ;
|
372 | | - $count_prev = $june_articles {"$project,".($year-1)} ;
|
373 | | -
|
374 | | - $perc = "-" ;
|
375 | | - if ($count_prev > 0)
|
376 | | - { $perc = sprintf ("%.0f", 100 * ($count/$count_prev) - 100) . '%' ; $perc =~ s/^(\d)/\+$1/ ; }
|
377 | | - if ($count >= 1000000)
|
378 | | - { $count = sprintf ("%.1f", $count / 1000000) . 'M' ; }
|
379 | | - elsif ($count >= 1000)
|
380 | | - { $count = sprintf ("%.0f", $count / 1000) . 'k' ; }
|
381 | | - $line .= ",$count,$perc" ;
|
382 | | - }
|
383 | | - $diff = $june_diff {$year} ;
|
384 | | - $diff_prev = $june_diff {$year-1} ;
|
385 | | - $perc = "-" ;
|
386 | | - if ($diff_prev > 0)
|
387 | | - { $perc = sprintf ("%.0f", 100 * ($diff/$diff_prev) - 100) . '%' ; $perc =~ s/^(\d)/\+$1/ ; }
|
388 | | - if ($diff >= 1000000)
|
389 | | - { $diff = sprintf ("%.1f", $diff / 1000000) . 'M' ; }
|
390 | | - elsif ($count >= 1000)
|
391 | | - { $diff = sprintf ("%.0f", $diff / 1000) . 'k' ; }
|
392 | | - $line .= ",$diff,$perc" ;
|
393 | | -
|
394 | | - print "$line\n" ;
|
395 | | - print CSV_OUT "$line\n" ;
|
396 | | - }
|
397 | | -
|
398 | | -}
|
399 | | -
|
400 | | -sub WriteMonthlyData
|
401 | | -{
|
402 | | - print "Write file '$file_csv_out'\n" ;
|
403 | | - open CSV_OUT, '>', $file_csv_out ;
|
404 | | - $output = "" ;
|
405 | | - foreach $f (1,2,3,4,6,11) # new editors, editors_gt_5, editors_gt_100, articles, new articles, edits
|
406 | | - {
|
407 | | -
|
408 | | - $output .= "\n,${out_report_descriptions [$f]} - Absolute\n" ;
|
409 | | - $output .= "$csv_recent_months,%inc year, %inc month\n" ;
|
410 | | -
|
411 | | - $line = ",Total," ;
|
412 | | - for ($m = 0 ; $m <= 12 ; $m++)
|
413 | | - { $line .= $totals {"$f,$m"} . "," ; }
|
414 | | -
|
415 | | - # growth in one year
|
416 | | - if ($totals {"$f,0"} != 0)
|
417 | | - { $line .= sprintf ("%.1f", 100 * ($totals {"$f,12"} / $totals {"$f,0"}) - 100). "%," ; }
|
418 | | - else
|
419 | | - { $line .= "n.a.," ; }
|
420 | | -
|
421 | | - # growth in one month
|
422 | | - if ($totals {"$f,11"} != 0)
|
423 | | - { $line .= sprintf ("%.1f", 100 * ($totals {"$f,12"} / $totals {"$f,11"}) - 100). "%," ; }
|
424 | | - else
|
425 | | - { $line .= "n.a.," ; }
|
426 | | -
|
427 | | - $line =~ s/,$// ;
|
428 | | - $output .= "$line\n" ;
|
429 | | -
|
430 | | - # sort by absolute amount for last month
|
431 | | - %values_f_12 = %{$values {"$f,12"}} ;
|
432 | | - $index = 1 ;
|
433 | | - foreach $key (sort {$values_f_12 {$b} <=> $values_f_12 {$a}} keys %values_f_12)
|
434 | | - {
|
435 | | - # print "$index $f: $key -> ${values_f_12 {$key}}\n" ;
|
436 | | -
|
437 | | - ($project,$language) = split (",", $key) ;
|
438 | | - $language_name = $out_languages {$language} ;
|
439 | | - if (($project ne "wp") && ($project ne "wx"))
|
440 | | - { $line = "$index,$language_name " . &GetProjectName ($project) . "," ; }
|
441 | | - else
|
442 | | - { $line = "$index,$language_name," ; }
|
443 | | -
|
444 | | - for ($m = 0 ; $m <= 12 ; $m++)
|
445 | | - { $line .= $values {"$f,$m"} {$key} . "," ; }
|
446 | | -
|
447 | | - if ($values {"$f,0"} {$key} != 0)
|
448 | | - { $line .= sprintf ("%.1f", 100 * ($values {"$f,12"} {$key} / $values {"$f,0"} {$key}) - 100). "%," ; }
|
449 | | - else
|
450 | | - { $line .= "n.a.," ; }
|
451 | | -
|
452 | | - if ($values {"$f,11"} {$key} != 0)
|
453 | | - { $line .= sprintf ("%.1f", 100 * ($values {"$f,12"} {$key} / $values {"$f,11"} {$key}) - 100). "%," ; }
|
454 | | - else
|
455 | | - { $line .= "n.a.," ; }
|
456 | | -
|
457 | | - $line =~ s/,$// ;
|
458 | | - $output .= "$line\n" ;
|
459 | | -
|
460 | | - if ($index++ >= 25) { last ; }
|
461 | | - }
|
462 | | - $output .= "\n" ;
|
463 | | -
|
464 | | - foreach $project (sort {$totals_project {"$f,12"} {$b} <=> $totals_project {"$f,12"} {$a}} @projects)
|
465 | | - {
|
466 | | -# next if $project eq 'commons' and ($f ==2 or $f == 3) ; # (very) active editors no longer counted for commons
|
467 | | -
|
468 | | - if ($project eq 'commons')
|
469 | | - { $line = ",Commons," ; }
|
470 | | - else
|
471 | | - { $line = "," . &GetProjectName ($project) . "," ; }
|
472 | | -
|
473 | | - for ($m = 0 ; $m <= 12 ; $m++)
|
474 | | - { $line .= $totals_project {"$f,$m"} {$project} . "," ; }
|
475 | | -
|
476 | | - if ($totals_project {"$f,0"} {$project} != 0)
|
477 | | - { $line .= sprintf ("%.1f", 100 * ($totals_project {"$f,12"} {$project} / $totals_project {"$f,0"} {$project}) - 100). "%," ; }
|
478 | | - else
|
479 | | - { $line .= "n.a.," ; }
|
480 | | -
|
481 | | - if ($totals_project {"$f,11"} {$project} != 0)
|
482 | | - { $line .= sprintf ("%.1f", 100 * ($totals_project {"$f,12"} {$project} / $totals_project {"$f,11"} {$project}) - 100). "%," ; }
|
483 | | - else
|
484 | | - { $line .= "n.a.," ; }
|
485 | | -
|
486 | | - $line =~ s/,$// ;
|
487 | | - $output .= "$line\n" ;
|
488 | | - }
|
489 | | -
|
490 | | - $output .= "\n,${out_report_descriptions [$f]} - Indexed\n" ;
|
491 | | - $output .= "$csv_recent_months\n" ;
|
492 | | -
|
493 | | - # sort by absolute amount for last month
|
494 | | - $index = 1 ;
|
495 | | - foreach $key (sort {$values_f_12 {$b} <=> $values_f_12 {$a}} keys %values_f_12)
|
496 | | - {
|
497 | | - # print "$index $f: $key -> ${values_f_12 {$key}}\n" ;
|
498 | | -
|
499 | | - ($project,$language) = split (",", $key) ;
|
500 | | - $language_name = $out_languages {$language} ;
|
501 | | - if (($project ne "wp") && ($project ne "wx"))
|
502 | | - { $line = "$index,$language_name " . &GetProjectName ($project) . "," ; }
|
503 | | - else
|
504 | | - { $line = "$index,$language_name," ; }
|
505 | | -
|
506 | | - $value_100 = $values {"$f,0"} {$key} ;
|
507 | | - for ($m = 0 ; $m <= 12 ; $m++)
|
508 | | - {
|
509 | | - if ($value_100 != 0)
|
510 | | - { $line .= sprintf ("%.1f", 100 * ($values {"$f,$m"} {$key} / $value_100)) . "," ; }
|
511 | | - else
|
512 | | - { $line .= "," ; }
|
513 | | - }
|
514 | | - $line =~ s/,$// ;
|
515 | | - $output .= "$line\n" ;
|
516 | | -
|
517 | | - # put totals last in chart to show line on top of others
|
518 | | - if ($index == 9)
|
519 | | - {
|
520 | | - $line = ",Total," ;
|
521 | | - $total_100 = $totals {"$f,0"} ;
|
522 | | - for ($m = 0 ; $m <= 12 ; $m++)
|
523 | | - {
|
524 | | - if ($total_100 != 0)
|
525 | | - { $line .= sprintf ("%.1f", 100 * ($totals {"$f,$m"} / $total_100)) . "," ; }
|
526 | | - else
|
527 | | - { $line .= "," ; }
|
528 | | - }
|
529 | | - $line .= ",(sorted here to make it top-most line out of 10 in Excel)" ;
|
530 | | - $output .= "$line\n" ;
|
531 | | - }
|
532 | | -
|
533 | | - if ($index++ >= 25) { last ; }
|
534 | | - }
|
535 | | - $output .= "\n" ;
|
536 | | -
|
537 | | - foreach $project (sort {$totals_project {"$f,12"} {$b} <=> $totals_project {"$f,12"} {$a}} @projects)
|
538 | | - {
|
539 | | -# next if $project eq 'commons' and ($f ==2 or $f == 3) ; # (very) active editors no longer counted for commons
|
540 | | -
|
541 | | - if ($project eq 'commons')
|
542 | | - { $line = ",Commons," ; }
|
543 | | - else
|
544 | | - { $line = "," . &GetProjectName ($project) . "," ; }
|
545 | | -
|
546 | | - $value_100 = $totals_project {"$f,0"} {$project} ;
|
547 | | - for ($m = 0 ; $m <= 12 ; $m++)
|
548 | | - {
|
549 | | - if ($value_100 != 0)
|
550 | | - { $line .= sprintf ("%.1f", 100 * ($totals_project {"$f,$m"} {$project} / $value_100)) . "," ; }
|
551 | | - else
|
552 | | - { $line .= "," ; }
|
553 | | - }
|
554 | | - $line =~ s/,$// ;
|
555 | | - $output .= "$line\n" ;
|
556 | | - }
|
557 | | - $output .= "\n," . '=' x 150 . "\n" ;
|
558 | | - }
|
559 | | -
|
560 | | - print CSV_OUT $output ;
|
561 | | -
|
562 | | - $output = "\n,Binaries per month - Absolute\n" ;
|
563 | | - $output .= "$csv_recent_months,%inc year, %inc month\n" ;
|
564 | | - $output .= "\n$csv_recent_months,%inc year,%inc month\n" ;
|
565 | | -
|
566 | | - $line = ",Total," ;
|
567 | | - for ($m = 0 ; $m <= 12 ; $m++)
|
568 | | - { $line .= $ext_tot {$m} . "," ; }
|
569 | | -
|
570 | | - if ($ext_tot {0} != 0)
|
571 | | - { $line .= sprintf ("%.1f", 100 * ($ext_tot {12} / $ext_tot {0}) - 100). "%," ; }
|
572 | | - else
|
573 | | - { $line .= "n.a.," ; }
|
574 | | -
|
575 | | - if ($ext_tot {11} != 0)
|
576 | | - { $line .= sprintf ("%.1f", 100 * ($ext_tot {12} / $ext_tot {11}) - 100). "%," ; }
|
577 | | - else
|
578 | | - { $line .= "n.a.," ; }
|
579 | | -
|
580 | | - $line =~ s/,$// ;
|
581 | | - $output .= "$line\n" ;
|
582 | | -
|
583 | | - $index = 0 ;
|
584 | | - # feed the 10 extensions with most pages, largest one last (comes on top in Excel chart)
|
585 | | - for ($e = $#extndxs ; $e >= $#extndxs - 9 ; $e--)
|
586 | | - {
|
587 | | - $index++ ;
|
588 | | -
|
589 | | - if ($e < 0)
|
590 | | - {
|
591 | | - $line = "$index,xxx," ;
|
592 | | - for ($m = 0 ; $m <= 12 ; $m++)
|
593 | | - { $line .= "," ; }
|
594 | | - }
|
595 | | - else
|
596 | | - {
|
597 | | - $extndx = $extndxs [$e] ;
|
598 | | - $line = "$index,${ext_cnt {-1}{$extndx}}," ;
|
599 | | -
|
600 | | - for ($m = 0 ; $m <= 12 ; $m++)
|
601 | | - { $line .= $ext_cnt {$m}{$extndx} . "," ; }
|
602 | | -
|
603 | | - if ($ext_cnt {0}{$extndx} != 0)
|
604 | | - { $line .= sprintf ("%.1f", 100 * ($ext_cnt {12}{$extndx} / $ext_cnt {0}{$extndx}) - 100). "%," ; }
|
605 | | - else
|
606 | | - { $line .= "n.a.," ; }
|
607 | | -
|
608 | | - if ($ext_cnt {11}{$extndx} != 0)
|
609 | | - { $line .= sprintf ("%.1f", 100 * ($ext_cnt {12}{$extndx} / $ext_cnt {11}{$extndx}) - 100). "%," ; }
|
610 | | - else
|
611 | | - { $line .= "n.a.," ; }
|
612 | | - }
|
613 | | -
|
614 | | - $line =~ s/,$// ;
|
615 | | - $output .= "$line\n" ;
|
616 | | - }
|
617 | | -
|
618 | | - print CSV_OUT $output ;
|
619 | | -
|
620 | | - $output = "\n,Binaries per month - Indexed\n" ;
|
621 | | - $output .= "$csv_recent_months\n" ;
|
622 | | -
|
623 | | - $index = 0 ;
|
624 | | - # feed the 10 extensions with most pages, largest one last (comes on top in Excel chart)
|
625 | | - for ($e = $#extndxs ; $e >= $#extndxs - 9 ; $e--)
|
626 | | - {
|
627 | | - $index++ ;
|
628 | | -
|
629 | | - if ($e < 0)
|
630 | | - {
|
631 | | - $line = "$index,xxx," ;
|
632 | | - for ($m = 0 ; $m <= 12 ; $m++)
|
633 | | - { $line .= "," ; }
|
634 | | - }
|
635 | | - else
|
636 | | - {
|
637 | | - $extndx = $extndxs [$e] ;
|
638 | | - $line = "$index,${ext_cnt {-1}{$extndx}}," ;
|
639 | | - $ext_cnt_m0 = $ext_cnt {0}{$extndx} ;
|
640 | | - for ($m = 0 ; $m <= 12 ; $m++)
|
641 | | - {
|
642 | | - if ($ext_cnt_m0 > 0)
|
643 | | - { $line .= sprintf ("%.1f", 100 * ($ext_cnt {$m}{$extndx} / $ext_cnt_m0)). "," ; }
|
644 | | - else
|
645 | | - { $line .= "," ; }
|
646 | | - }
|
647 | | - }
|
648 | | -
|
649 | | - $line =~ s/,$// ;
|
650 | | - $output .= "$line\n" ;
|
651 | | - }
|
652 | | - print CSV_OUT $output ;
|
653 | | - close CSV_OUT ;
|
654 | | -
|
655 | | - print "\nOutput written to $file_csv_out\n\n" ;
|
656 | | -}
|
657 | | -
|
658 | | -sub SetComparisonPeriods
|
659 | | -{
|
660 | | - my $year = shift ;
|
661 | | - my $month = shift ;
|
662 | | - my @months = qw(Xxx Jan Feb Mar Apr May Jun Jul Aug Sept Oct Nov Dec) ;
|
663 | | -
|
664 | | - my ($month_0, $month_0_file, $month_0_minus_12, $month_0_minus_1) ;
|
665 | | -
|
666 | | - $year_ = $year ;
|
667 | | - $month_ = $month ;
|
668 | | -
|
669 | | - $month_0 = sprintf ("%04d/%02d",$year, $month+1) ;
|
670 | | - $month_0_file = sprintf ("%04d_%02d",$year, $month+1) ; # for filenames
|
671 | | - $month_0_minus_12 = sprintf ("%04d/%02d",$year-1,$month+1) ;
|
672 | | - ($year,$month) = $month > 1 ? ($year,$month-1) : ($year-1,12) ;
|
673 | | - $month_0_minus_1 = sprintf ("%04d/%02d",$year,$month+1) ;
|
674 | | -
|
675 | | - print "\nWrite trend data up till month: $month_0\n\n" ;
|
676 | | - print "Compare with previous month: $month_0_minus_1, previous year: $month_0_minus_12\n\n" ;
|
677 | | -
|
678 | | - $csv_recent_months = ",project," ;
|
679 | | - $year = $year_ - 1 ;
|
680 | | - $month = $month_ ;
|
681 | | - for ($m = 0 ; $m <= 12 ; $m++)
|
682 | | - {
|
683 | | - $recent_months [$m] = sprintf ("%04d/%02d", $year, $month) ;
|
684 | | - $csv_recent_months .= sprintf ("%02d/%04d", $month, $year) . "," ;
|
685 | | - ($year,$month) = $month < 12 ? ($year,$month+1) : ($year+1,1) ;
|
686 | | - }
|
687 | | - $csv_recent_months =~ s/,$// ;
|
688 | | -}
|
689 | | -
|
690 | | -#sub WriteCsvFilesPerPeriod
|
691 | | -#{
|
692 | | -# foreach $period (sort keys %totals)
|
693 | | -# {
|
694 | | -# &LogT ("\nWrite totals per $period: ") ;
|
695 | | -# $desc = $descriptions {$period} ;
|
696 | | -
|
697 | | -# foreach $project (sort keys %{$totals {$period}})
|
698 | | -# {
|
699 | | -# &Log ("$project ") ;
|
700 | | -
|
701 | | -# $dir_out = "$path_out/csv_$project" ;
|
702 | | -# if (! -d $dir_out)
|
703 | | -# { mkdir $dir_out, 0777 ; }
|
704 | | -
|
705 | | -# $file_out = "$dir_out/$desc.csv" ;
|
706 | | -
|
707 | | -# open CSV, ">", $file_out ;
|
708 | | -# foreach $key (sort {$a cmp $b} keys %{$totals {$period}{$project}})
|
709 | | -# {
|
710 | | -# ($language,$yearmonth) = split (",", $key) ;
|
711 | | -# # print "PERIOD $period PROJECT $project KEY $key\n" ;
|
712 | | -# if ($period eq "month")
|
713 | | -# { print CSV "$language," . $date_high {"$yearmonth"} . "," . $totals{$period}{$project}{$key} . "\n" ; }
|
714 | | -# else
|
715 | | -# { print CSV "$key," . $totals{$period}{$project}{$key} . "\n" ; }
|
716 | | -# }
|
717 | | -# close CSV ;
|
718 | | -# }
|
719 | | -# }
|
720 | | -#}
|
721 | | -
|
722 | | -#sub WriteCsvHtmlFilesPopularWikis
|
723 | | -#{
|
724 | | -# @totals_lastmonth = sort {$totals_lastmonth {$b} <=> $totals_lastmonth {$a}} keys %totals_lastmonth ;
|
725 | | -
|
726 | | -# $dir_out = "$path_out/csv_wp" ;
|
727 | | -# $file_out = "$dir_out/PageViewsPerMonthPopularWikis_$month_0_file.csv" ;
|
728 | | -
|
729 | | -## extend with normalized counts
|
730 | | -## see manually created PageViewsPerMonthTop25PlusNormalizedTo100.csv
|
731 | | -
|
732 | | -# open CSV, ">", $file_out ;
|
733 | | -# print CSV $csv_recent_months ;
|
734 | | -
|
735 | | -# # write per popular language+wiki 13 months of page view totals
|
736 | | -# $lines = 0 ;
|
737 | | -# foreach $line (@totals_lastmonth)
|
738 | | -# {
|
739 | | -# if (++$lines > $maxpopularwikis) { last ; }
|
740 | | -
|
741 | | -# ($project, $language) = split (',', $line) ;
|
742 | | -# $largest_projects {"$project-$language"} ++ ;
|
743 | | -
|
744 | | -# $language_name = $out_languages {$language} ;
|
745 | | -
|
746 | | -# if (($project ne "wp") && ($project ne "wx"))
|
747 | | -# { print CSV "$language_name " . &GetProjectName ($project) . "," ; }
|
748 | | -# else
|
749 | | -# { print CSV "$language_name," ; }
|
750 | | -
|
751 | | -## %test = %{$totals {"month"} {"wp"} };
|
752 | | -## %test2 = @recent_months ;
|
753 | | -# for ($m = 0 ; $m <= 12 ; $m++)
|
754 | | -# { print CSV $totals {"month"} {$project} {"$language,${recent_months [$m]}"} . "," ; }
|
755 | | -# print CSV "\n" ;
|
756 | | -# }
|
757 | | -
|
758 | | -# print CSV "\n$csv_recent_months" ;
|
759 | | -
|
760 | | -# # write per popular language+wiki 13 months of page view totals, normalized to first month = 100
|
761 | | -# $lines = 0 ;
|
762 | | -# foreach $line (@totals_lastmonth)
|
763 | | -# {
|
764 | | -# if (++$lines > $maxpopularwikis) { last ; }
|
765 | | -
|
766 | | -# ($project, $language) = split (',', $line) ;
|
767 | | -# $language_name = $out_languages {$language} ;
|
768 | | -
|
769 | | -# if (($project ne "wp") && ($project ne "wx"))
|
770 | | -# { print CSV "$language_name " . &GetProjectName ($project) . "," ; }
|
771 | | -# else
|
772 | | -# { print CSV "$language_name," ; }
|
773 | | -
|
774 | | -# $recent_month_0 = $totals {"month"} {$project} {"$language,${recent_months [ 0]}"} ;
|
775 | | -# for ($m = 0 ; $m <= 12 ; $m++)
|
776 | | -# {
|
777 | | -# if ($recent_month_0 > 0)
|
778 | | -# { print CSV sprintf ("%.2f", 100 * $totals {"month"} {$project} {"$language,${recent_months [$m]}"} / $recent_month_0) . "," ; }
|
779 | | -# else
|
780 | | -# { print CSV "," ; }
|
781 | | -# }
|
782 | | -
|
783 | | -# print CSV "\n" ;
|
784 | | -# }
|
785 | | -# close CSV ;
|
786 | | -
|
787 | | -# # write ready made table rows for report card: page views top 25 movers shakers
|
788 | | -# foreach $key (keys %largest_projects)
|
789 | | -# {
|
790 | | -# ($project,$language) = split ('-', $key) ;
|
791 | | -
|
792 | | -# $total_lastmonth = $totals {"month"} {$project} {"$language,$month_0"} ;
|
793 | | -# $total_prevmonth = $totals {"month"} {$project} {"$language,$month_0_minus_1"} ;
|
794 | | -# $total_prevyear = $totals {"month"} {$project} {"$language,$month_0_minus_12"} ;
|
795 | | -
|
796 | | -# $perc_month = "no data" ;
|
797 | | -# $perc_year = "no data" ;
|
798 | | -
|
799 | | -# if ($total_prevyear > 0)
|
800 | | -# { $perc_year = sprintf ("%.1f", 100 * $total_lastmonth/$total_prevyear - 100) ; }
|
801 | | -# if ($total_prevyear > 0)
|
802 | | -# { $perc_month = sprintf ("%.1f", 100 * $total_lastmonth/$total_prevmonth - 100) ; }
|
803 | | -
|
804 | | -# $line = "$project-$language: $total_prevyear=>$total_lastmonth=$perc_year%, $total_prevmonth=>$total_lastmonth=$perc_month%" ;
|
805 | | -
|
806 | | -# $total_lastmonth = sprintf ("%.0f", $total_lastmonth / 1000000) ;
|
807 | | -
|
808 | | -# $project_name = &GetProjectName ($project) ;
|
809 | | -# $language_name = $out_languages {$language} ;
|
810 | | -
|
811 | | -# $col1 = "<td class=detail-left>$language_name $project_name</td>\n" ;
|
812 | | -# $col2 = "<td class=detail-blue>$total_lastmonth</td>\n" ;
|
813 | | -# $col3 = "<td class=detail-blue>$perc_month%</td>\n" ;
|
814 | | -# $col4 = "<td class=detail-blue>$perc_year%</td>\n" ;
|
815 | | -# $html = "<tr>\n$col1$col2$col3$col4</tr>\n" ;
|
816 | | -
|
817 | | -# $growth_figures_text {"$perc_month-$project-$language"} = $line ;
|
818 | | -# $growth_figures_html {"$perc_month-$project-$language"} = $html ;
|
819 | | -# }
|
820 | | -
|
821 | | -# $file_html = "$dir_out/PageViewsMoversShakersPopularWikis_$month_0_file.html" ;
|
822 | | -
|
823 | | -# open HTML, ">", $file_html ;
|
824 | | -# foreach $key (sort {$b <=> $a} keys %growth_figures_text)
|
825 | | -# {
|
826 | | -# print "$key: ". $growth_figures_text {$key} . "\n" ;
|
827 | | -# print HTML $growth_figures_html {$key} ;
|
828 | | -# }
|
829 | | -# close HTML ;
|
830 | | -#}
|
831 | | -
|
832 | | -
|
833 | | -sub GetProjectName
|
834 | | -{
|
835 | | - my $project =shift ;
|
836 | | -
|
837 | | - if ($project eq "wp") { $project_name = "Wikipedia"; }
|
838 | | - elsif ($project eq "wb") { $project_name = "Wikibooks"; }
|
839 | | - elsif ($project eq "wk") { $project_name = "Wiktionary"; }
|
840 | | - elsif ($project eq "wx") { $project_name = ""; }
|
841 | | - elsif ($project eq "wn") { $project_name = "Wikinews"; }
|
842 | | - elsif ($project eq "wq") { $project_name = "Wikiquote"; }
|
843 | | - elsif ($project eq "ws") { $project_name = "Wikisource"; }
|
844 | | - elsif ($project eq "wv") { $project_name = "Wikiversity"; }
|
845 | | -
|
846 | | - return ($project_name) ;
|
847 | | -}
|
848 | | -
|
849 | | -sub MonthsSinceYearAgo
|
850 | | -{
|
851 | | - my $year = shift ;
|
852 | | - my $month = shift ;
|
853 | | - return 12 - (($year_0 - $year) * 12 + $month_0 - $month) ;
|
854 | | -}
|
855 | | -
|
856 | | -#sub Log
|
857 | | -#{
|
858 | | -# $msg = shift ;
|
859 | | -# print $msg ;
|
860 | | -# print LOG $msg ;
|
861 | | -#}
|
862 | | -
|
863 | | -#sub LogT
|
864 | | -#{
|
865 | | -# $msg = shift ;
|
866 | | -# my ($ss,$mm,$hh) = (localtime (time))[0,1,2] ;
|
867 | | -# my $time = sprintf ("%02d:%02d:%02d ", $hh, $mm, $ss) ;
|
868 | | -# $msg =~ s/^(\n*)/$1$time/s ;
|
869 | | -# &Log ($msg) ;
|
870 | | -#}
|
871 | | -
|
872 | | -sub MmSs
|
873 | | -{
|
874 | | - my ($ss,$mm,$hh) = (localtime (time))[0,1,2] ;
|
875 | | - return (sprintf ("%02d:%02d:%02d ", $hh, $mm, $ss)) ;
|
876 | | -}
|
877 | | -
|
878 | | -sub Abort
|
879 | | -{
|
880 | | - my $msg = shift ;
|
881 | | - print "$msg\nExecution aborted." ;
|
882 | | - # to do: log also to file
|
883 | | - exit ;
|
884 | | -}
|
885 | | -
|
886 | | -sub InitProjectNames
|
887 | | -{
|
888 | | - # copied from WikiReports.pl
|
889 | | -
|
890 | | - %wikipedias = (
|
891 | | -# mediawiki=>"http://wikimediafoundation.org Wikimedia",
|
892 | | - nostalgia=>"http://nostalgia.wikipedia.org Nostalgia",
|
893 | | - sources=>"http://wikisource.org Old Wikisource",
|
894 | | - meta=>"http://meta.wikimedia.org Meta-Wiki",
|
895 | | - beta=>"http://beta.wikiversity.org Beta",
|
896 | | - species=>"http://species.wikipedia.org WikiSpecies",
|
897 | | - commons=>"http://commons.wikimedia.org Commons",
|
898 | | - foundation=>"http://wikimediafoundation.org Wikimedia Foundation",
|
899 | | - sep11=>"http://sep11.wikipedia.org In Memoriam",
|
900 | | - nlwikimedia=>"http://nl.wikimedia.org Wikimedia Nederland",
|
901 | | - plwikimedia=>"http://pl.wikimedia.org Wikimedia Polska",
|
902 | | - mediawiki=>"http://www.mediawiki.org MediaWiki",
|
903 | | - dewikiversity=>"http://de.wikiversity.org Wikiversität",
|
904 | | - frwikiversity=>"http://fr.wikiversity.org Wikiversität",
|
905 | | - wikimania2005=>"http://wikimania2005.wikimedia.org Wikimania 2005",
|
906 | | - wikimania2006=>"http://wikimania2006.wikimedia.org Wikimania 2006",
|
907 | | - aa=>"http://aa.wikipedia.org Afar",
|
908 | | - ab=>"http://ab.wikipedia.org Abkhazian",
|
909 | | - af=>"http://af.wikipedia.org Afrikaans",
|
910 | | - ak=>"http://ak.wikipedia.org Akan", # was Akana
|
911 | | - als=>"http://als.wikipedia.org Alemannic", # was Elsatian
|
912 | | - am=>"http://am.wikipedia.org Amharic",
|
913 | | - an=>"http://an.wikipedia.org Aragonese",
|
914 | | - ang=>"http://ang.wikipedia.org Anglo-Saxon",
|
915 | | - ar=>"http://ar.wikipedia.org Arabic",
|
916 | | - arc=>"http://arc.wikipedia.org Aramaic",
|
917 | | - as=>"http://as.wikipedia.org Assamese",
|
918 | | - ast=>"http://ast.wikipedia.org Asturian",
|
919 | | - av=>"http://av.wikipedia.org Avar", # was Avienan
|
920 | | - ay=>"http://ay.wikipedia.org Aymara",
|
921 | | - az=>"http://az.wikipedia.org Azeri", # was Azerbaijani
|
922 | | - ba=>"http://ba.wikipedia.org Bashkir",
|
923 | | - bar=>"http://bar.wikipedia.org Bavarian",
|
924 | | - bat_smg=>"http://bat-smg.wikipedia.org Samogitian",
|
925 | | - "bat-smg"=>"http://bat-smg.wikipedia.org Samogitian",
|
926 | | - bcl=>"http://bcl.wikipedia.org Central Bicolano",
|
927 | | - be=>"http://be.wikipedia.org Belarusian",
|
928 | | - "be-x-old"=>"http://be.wikipedia.org Belarusian (Tarashkevitsa)",
|
929 | | - be_x_old=>"http://be.wikipedia.org Belarusian (Tarashkevitsa)",
|
930 | | - bg=>"http://bg.wikipedia.org Bulgarian",
|
931 | | - bh=>"http://bh.wikipedia.org Bihari",
|
932 | | - bi=>"http://bi.wikipedia.org Bislama",
|
933 | | - bm=>"http://bm.wikipedia.org Bambara",
|
934 | | - bn=>"http://bn.wikipedia.org Bengali",
|
935 | | - bo=>"http://bo.wikipedia.org Tibetan",
|
936 | | - bpy=>"http://bpy.wikipedia.org Bishnupriya Manipuri",
|
937 | | - br=>"http://br.wikipedia.org Breton",
|
938 | | - bs=>"http://bs.wikipedia.org Bosnian",
|
939 | | - bug=>"http://bug.wikipedia.org Buginese",
|
940 | | - bxr=>"http://bxr.wikipedia.org Buryat",
|
941 | | - ca=>"http://ca.wikipedia.org Catalan",
|
942 | | - cbk_zam=>"http://cbk-zam.wikipedia.org Chavacano",
|
943 | | - "cbk-zam"=>"http://cbk-zam.wikipedia.org Chavacano",
|
944 | | - cdo=>"http://cdo.wikipedia.org Min Dong",
|
945 | | - ce=>"http://ce.wikipedia.org Chechen",
|
946 | | - ceb=>"http://ceb.wikipedia.org Cebuano",
|
947 | | - ch=>"http://ch.wikipedia.org Chamorro", # was Chamoru
|
948 | | - cho=>"http://cho.wikipedia.org Choctaw", # was Chotaw
|
949 | | - chr=>"http://chr.wikipedia.org Cherokee",
|
950 | | - chy=>"http://chy.wikipedia.org Cheyenne", # was Setsêhestâhese
|
951 | | - co=>"http://co.wikipedia.org Corsican",
|
952 | | - cr=>"http://cr.wikipedia.org Cree",
|
953 | | - crh=>"http://crh.wikipedia.org Crimean Tatar",
|
954 | | - cs=>"http://cs.wikipedia.org Czech",
|
955 | | - csb=>"http://csb.wikipedia.org Cashubian", # was Kashubian
|
956 | | - cu=>"http://cv.wikipedia.org Old Church Slavonic",
|
957 | | - cv=>"http://cv.wikipedia.org Chuvash", # was Cavas
|
958 | | - cy=>"http://cy.wikipedia.org Welsh",
|
959 | | - da=>"http://da.wikipedia.org Danish",
|
960 | | - de=>"http://de.wikipedia.org German",
|
961 | | - diq=>"http://diq.wikipedia.org Zazaki",
|
962 | | - dk=>"http://dk.wikipedia.org Danish",
|
963 | | - dsb=>"http://dsb.wikipedia.org Lower Sorbian",
|
964 | | - dv=>"http://dv.wikipedia.org Divehi",
|
965 | | - dz=>"http://dz.wikipedia.org Dzongkha",
|
966 | | - ee=>"http://ee.wikipedia.org Ewe",
|
967 | | - el=>"http://el.wikipedia.org Greek",
|
968 | | - eml=>"http://eml.wikipedia.org Emilian-Romagnol",
|
969 | | - en=>"http://en.wikipedia.org English",
|
970 | | - eo=>"http://eo.wikipedia.org Esperanto",
|
971 | | - es=>"http://es.wikipedia.org Spanish",
|
972 | | - et=>"http://et.wikipedia.org Estonian",
|
973 | | - eu=>"http://eu.wikipedia.org Basque",
|
974 | | - ext=>"http://ext.wikipedia.org Extremaduran",
|
975 | | - fa=>"http://fa.wikipedia.org Persian",
|
976 | | - ff=>"http://ff.wikipedia.org Fulfulde",
|
977 | | - fi=>"http://fi.wikipedia.org Finnish",
|
978 | | - "fiu-vro"=>"http://fiu-vro.wikipedia.org Voro",
|
979 | | - fiu_vro=>"http://fiu-vro.wikipedia.org Voro",
|
980 | | - fj=>"http://fj.wikipedia.org Fijian",
|
981 | | - fo=>"http://fo.wikipedia.org Faroese", # was Faeroese
|
982 | | - fr=>"http://fr.wikipedia.org French",
|
983 | | - frp=>"http://frp.wikipedia.org Arpitan",
|
984 | | - fur=>"http://fur.wikipedia.org Friulian",
|
985 | | - fy=>"http://fy.wikipedia.org Frisian",
|
986 | | - ga=>"http://ga.wikipedia.org Irish",
|
987 | | - gan=>"http://gan.wikipedia.org Gan",
|
988 | | - gay=>"http://gay.wikipedia.org Gayo",
|
989 | | - gd=>"http://gd.wikipedia.org Scots Gaelic", # was Scottish Gaelic
|
990 | | - gl=>"http://gl.wikipedia.org Galician", # was Galego
|
991 | | - glk=>"http://glk.wikipedia.org Gilaki",
|
992 | | - gn=>"http://gn.wikipedia.org Guarani",
|
993 | | - got=>"http://got.wikipedia.org Gothic",
|
994 | | - gu=>"http://gu.wikipedia.org Gujarati",
|
995 | | - gv=>"http://gv.wikipedia.org Manx", # was Manx Gaelic
|
996 | | - ha=>"http://ha.wikipedia.org Hausa",
|
997 | | - hak=>"http://hak.wikipedia.org Hakka",
|
998 | | - haw=>"http://haw.wikipedia.org Hawai'ian", # was Hawaiian
|
999 | | - he=>"http://he.wikipedia.org Hebrew",
|
1000 | | - hi=>"http://hi.wikipedia.org Hindi",
|
1001 | | - hif=>"http://hif.wikipedia.org Fiji Hindi",
|
1002 | | - ho=>"http://ho.wikipedia.org Hiri Motu",
|
1003 | | - hr=>"http://hr.wikipedia.org Croatian",
|
1004 | | - hsb=>"http://hsb.wikipedia.org Upper Sorbian",
|
1005 | | - ht=>"http://ht.wikipedia.org Haitian",
|
1006 | | - hu=>"http://hu.wikipedia.org Hungarian",
|
1007 | | - hy=>"http://hy.wikipedia.org Armenian",
|
1008 | | - hz=>"http://hz.wikipedia.org Herero",
|
1009 | | - ia=>"http://ia.wikipedia.org Interlingua",
|
1010 | | - iba=>"http://iba.wikipedia.org Iban",
|
1011 | | - id=>"http://id.wikipedia.org Indonesian",
|
1012 | | - ie=>"http://ie.wikipedia.org Interlingue",
|
1013 | | - ig=>"http://ig.wikipedia.org Igbo",
|
1014 | | - ii=>"http://ii.wikipedia.org Yi",
|
1015 | | - ik=>"http://ik.wikipedia.org Inupiak",
|
1016 | | - ilo=>"http://ilo.wikipedia.org Ilokano",
|
1017 | | - io=>"http://io.wikipedia.org Ido",
|
1018 | | - is=>"http://is.wikipedia.org Icelandic",
|
1019 | | - it=>"http://it.wikipedia.org Italian",
|
1020 | | - iu=>"http://iu.wikipedia.org Inuktitut",
|
1021 | | - ja=>"http://ja.wikipedia.org Japanese",
|
1022 | | - jbo=>"http://jbo.wikipedia.org Lojban",
|
1023 | | - jv=>"http://jv.wikipedia.org Javanese",
|
1024 | | - ka=>"http://ka.wikipedia.org Georgian",
|
1025 | | - kaa=>"http://kaa.wikipedia.org Karakalpak",
|
1026 | | - kab=>"http://ka.wikipedia.org Kabyle",
|
1027 | | - kaw=>"http://kaw.wikipedia.org Kawi",
|
1028 | | - kg=>"http://kg.wikipedia.org Kongo",
|
1029 | | - ki=>"http://ki.wikipedia.org Kikuyu",
|
1030 | | - kj=>"http://kj.wikipedia.org Kuanyama", # was Otjiwambo
|
1031 | | - kk=>"http://kk.wikipedia.org Kazakh",
|
1032 | | - kl=>"http://kl.wikipedia.org Greenlandic",
|
1033 | | - km=>"http://km.wikipedia.org Khmer", # was Cambodian
|
1034 | | - kn=>"http://kn.wikipedia.org Kannada",
|
1035 | | - ko=>"http://ko.wikipedia.org Korean",
|
1036 | | - kr=>"http://kr.wikipedia.org Kanuri",
|
1037 | | - ks=>"http://ks.wikipedia.org Kashmiri",
|
1038 | | - ksh=>"http://ksh.wikipedia.org Ripuarian",
|
1039 | | - ku=>"http://ku.wikipedia.org Kurdish",
|
1040 | | - kv=>"http://kv.wikipedia.org Komi",
|
1041 | | - kw=>"http://kw.wikipedia.org Cornish", # was Kornish
|
1042 | | - ky=>"http://ky.wikipedia.org Kirghiz",
|
1043 | | - la=>"http://la.wikipedia.org Latin",
|
1044 | | - lad=>"http://lad.wikipedia.org Ladino",
|
1045 | | - lb=>"http://lb.wikipedia.org Luxembourgish", # was Letzeburgesch
|
1046 | | - lbe=>"http://lbe.wikipedia.org Lak",
|
1047 | | - lg=>"http://lg.wikipedia.org Ganda",
|
1048 | | - li=>"http://li.wikipedia.org Limburgish",
|
1049 | | - lij=>"http://lij.wikipedia.org Ligurian",
|
1050 | | - lmo=>"http://lmo.wikipedia.org Lombard",
|
1051 | | - ln=>"http://ln.wikipedia.org Lingala",
|
1052 | | - lo=>"http://lo.wikipedia.org Laotian",
|
1053 | | - ls=>"http://ls.wikipedia.org Latino Sine Flexione",
|
1054 | | - lt=>"http://lt.wikipedia.org Lithuanian",
|
1055 | | - lv=>"http://lv.wikipedia.org Latvian",
|
1056 | | - mad=>"http://mad.wikipedia.org Madurese",
|
1057 | | - mak=>"http://mak.wikipedia.org Makasar",
|
1058 | | - map_bms=>"http://map-bms.wikipedia.org Banyumasan",
|
1059 | | - "map-bms"=>"http://map-bms.wikipedia.org Banyumasan",
|
1060 | | - mdf=>"http://mdf.wikipedia.org Moksha",
|
1061 | | - mg=>"http://mg.wikipedia.org Malagasy",
|
1062 | | - mh=>"http://mh.wikipedia.org Marshallese",
|
1063 | | - mi=>"http://mi.wikipedia.org Maori",
|
1064 | | - min=>"http://min.wikipedia.org Minangkabau",
|
1065 | | - minnan=>"http://minnan.wikipedia.org Minnan",
|
1066 | | - mk=>"http://mk.wikipedia.org Macedonian",
|
1067 | | - ml=>"http://ml.wikipedia.org Malayalam",
|
1068 | | - mn=>"http://mn.wikipedia.org Mongolian",
|
1069 | | - mo=>"http://mo.wikipedia.org Moldavian",
|
1070 | | - mr=>"http://mr.wikipedia.org Marathi",
|
1071 | | - ms=>"http://ms.wikipedia.org Malay",
|
1072 | | - mt=>"http://mt.wikipedia.org Maltese",
|
1073 | | - mus=>"http://mus.wikipedia.org Muskogee",
|
1074 | | - my=>"http://my.wikipedia.org Burmese",
|
1075 | | - myv=>"http://myv.wikipedia.org Erzya",
|
1076 | | - mzn=>"http://mzn.wikipedia.org Mazandarani",
|
1077 | | - na=>"http://na.wikipedia.org Nauruan", # was Nauru
|
1078 | | - nah=>"http://nah.wikipedia.org Nahuatl",
|
1079 | | - nap=>"http://nap.wikipedia.org Neapolitan",
|
1080 | | - nds=>"http://nds.wikipedia.org Low Saxon",
|
1081 | | - nds_nl=>"http://nds-nl.wikipedia.org Dutch Low Saxon",
|
1082 | | - "nds-nl"=>"http://nds-nl.wikipedia.org Dutch Low Saxon",
|
1083 | | - ne=>"http://ne.wikipedia.org Nepali",
|
1084 | | - new=>"http://new.wikipedia.org Nepal Bhasa",
|
1085 | | - ng=>"http://ng.wikipedia.org Ndonga",
|
1086 | | - nl=>"http://nl.wikipedia.org Dutch",
|
1087 | | - nov=>"http://nov.wikipedia.org Novial",
|
1088 | | - nrm=>"http://nrm.wikipedia.org Norman",
|
1089 | | - nn=>"http://nn.wikipedia.org Nynorsk", # was Neo-Norwegian
|
1090 | | - no=>"http://no.wikipedia.org Norwegian",
|
1091 | | - nv=>"http://nv.wikipedia.org Navajo", # was Avayo
|
1092 | | - ny=>"http://ny.wikipedia.org Chichewa",
|
1093 | | - oc=>"http://oc.wikipedia.org Occitan",
|
1094 | | - om=>"http://om.wikipedia.org Oromo",
|
1095 | | - or=>"http://or.wikipedia.org Oriya",
|
1096 | | - os=>"http://os.wikipedia.org Ossetic",
|
1097 | | - pa=>"http://pa.wikipedia.org Punjabi",
|
1098 | | - pag=>"http://pag.wikipedia.org Pangasinan",
|
1099 | | - pam=>"http://pam.wikipedia.org Kapampangan",
|
1100 | | - pap=>"http://pap.wikipedia.org Papiamentu",
|
1101 | | - pdc=>"http://pdc.wikipedia.org Pennsylvania German",
|
1102 | | - pi=>"http://pi.wikipedia.org Pali",
|
1103 | | - pih=>"http://pih.wikipedia.org Norfolk",
|
1104 | | - pl=>"http://pl.wikipedia.org Polish",
|
1105 | | - pms=>"http://pms.wikipedia.org Piedmontese",
|
1106 | | - ps=>"http://ps.wikipedia.org Pashto",
|
1107 | | - pt=>"http://pt.wikipedia.org Portuguese",
|
1108 | | - qu=>"http://qu.wikipedia.org Quechua",
|
1109 | | - rm=>"http://rm.wikipedia.org Romansh", # was Rhaeto-Romance
|
1110 | | - rmy=>"http://rmy.wikipedia.org Romani",
|
1111 | | - rn=>"http://rn.wikipedia.org Kirundi",
|
1112 | | - ro=>"http://ro.wikipedia.org Romanian",
|
1113 | | - roa_rup=>"http://roa-rup.wikipedia.org Aromanian",
|
1114 | | - "roa-rup"=>"http://roa-rup.wikipedia.org Aromanian",
|
1115 | | - roa_tara=>"http://roa-tara.wikipedia.org Tarantino",
|
1116 | | - "roa-tara"=>"http://roa-tara.wikipedia.org Tarantino",
|
1117 | | - ru=>"http://ru.wikipedia.org Russian",
|
1118 | | - ru_sib=>"http://ru-sib.wikipedia.org Siberian",
|
1119 | | - "ru-sib"=>"http://ru-sib.wikipedia.org Siberian",
|
1120 | | - rw=>"http://rw.wikipedia.org Kinyarwanda",
|
1121 | | - sa=>"http://sa.wikipedia.org Sanskrit",
|
1122 | | - sah=>"http://sah.wikipedia.org Sakha",
|
1123 | | - sc=>"http://sc.wikipedia.org Sardinian",
|
1124 | | - scn=>"http://scn.wikipedia.org Sicilian",
|
1125 | | - sco=>"http://sco.wikipedia.org Scots",
|
1126 | | - sd=>"http://sd.wikipedia.org Sindhi",
|
1127 | | - se=>"http://se.wikipedia.org Northern Sami",
|
1128 | | - sg=>"http://sg.wikipedia.org Sangro",
|
1129 | | - sh=>"http://sh.wikipedia.org Serbo-Croatian",
|
1130 | | - si=>"http://si.wikipedia.org Sinhala", # was Singhalese
|
1131 | | - simple=>"http://simple.wikipedia.org Simple English",
|
1132 | | - sk=>"http://sk.wikipedia.org Slovak",
|
1133 | | - sl=>"http://sl.wikipedia.org Slovene",
|
1134 | | - sm=>"http://sm.wikipedia.org Samoan",
|
1135 | | - sn=>"http://sn.wikipedia.org Shona",
|
1136 | | - so=>"http://so.wikipedia.org Somali", # was Somalian
|
1137 | | - sq=>"http://sq.wikipedia.org Albanian",
|
1138 | | - sr=>"http://sr.wikipedia.org Serbian",
|
1139 | | - srn=>"http://srn.wikipedia.org Sranan",
|
1140 | | - ss=>"http://ss.wikipedia.org Siswati",
|
1141 | | - st=>"http://st.wikipedia.org Sesotho",
|
1142 | | - stq=>"http://stq.wikipedia.org Saterland Frisian",
|
1143 | | - su=>"http://su.wikipedia.org Sundanese",
|
1144 | | - sv=>"http://sv.wikipedia.org Swedish",
|
1145 | | - sw=>"http://sw.wikipedia.org Swahili",
|
1146 | | - szl=>"http://szl.wikipedia.org Silesian",
|
1147 | | - ta=>"http://ta.wikipedia.org Tamil",
|
1148 | | - te=>"http://te.wikipedia.org Telugu",
|
1149 | | - test=>"http://test.wikipedia.org Test",
|
1150 | | - tet=>"http://tet.wikipedia.org Tetum",
|
1151 | | - tg=>"http://tg.wikipedia.org Tajik",
|
1152 | | - th=>"http://th.wikipedia.org Thai",
|
1153 | | - ti=>"http://ti.wikipedia.org Tigrinya",
|
1154 | | - tk=>"http://tk.wikipedia.org Turkmen",
|
1155 | | - tl=>"http://tl.wikipedia.org Tagalog",
|
1156 | | - tlh=>"http://tlh.wikipedia.org Klingon", # was Klignon
|
1157 | | - tn=>"http://tn.wikipedia.org Setswana",
|
1158 | | - to=>"http://to.wikipedia.org Tongan",
|
1159 | | - tokipona=>"http://tokipona.wikipedia.org Tokipona",
|
1160 | | - tpi=>"http://tpi.wikipedia.org Tok Pisin",
|
1161 | | - tr=>"http://tr.wikipedia.org Turkish",
|
1162 | | - ts=>"http://ts.wikipedia.org Tsonga",
|
1163 | | - tt=>"http://tt.wikipedia.org Tatar",
|
1164 | | - tum=>"http://tum.wikipedia.org Tumbuka",
|
1165 | | - turn=>"http://turn.wikipedia.org Turnbuka",
|
1166 | | - tw=>"http://tw.wikipedia.org Twi",
|
1167 | | - ty=>"http://ty.wikipedia.org Tahitian",
|
1168 | | - udm=>"http://udm.wikipedia.org Udmurt",
|
1169 | | - ug=>"http://ug.wikipedia.org Uighur",
|
1170 | | - uk=>"http://uk.wikipedia.org Ukrainian",
|
1171 | | - ur=>"http://ur.wikipedia.org Urdu",
|
1172 | | - uz=>"http://uz.wikipedia.org Uzbek",
|
1173 | | - ve=>"http://ve.wikipedia.org Venda", # was Lushaka
|
1174 | | - vec=>"http://vec.wikipedia.org Venetian",
|
1175 | | - vi=>"http://vi.wikipedia.org Vietnamese",
|
1176 | | - vls=>"http://vls.wikipedia.org West Flemish",
|
1177 | | - vo=>"http://vo.wikipedia.org Volapük",
|
1178 | | - wa=>"http://wa.wikipedia.org Walloon",
|
1179 | | - war=>"http://war.wikipedia.org Waray-Waray",
|
1180 | | - wo=>"http://wo.wikipedia.org Wolof",
|
1181 | | - wuu=>"http://wuu.wikipedia.org Wu",
|
1182 | | - xal=>"http://xal.wikipedia.org Kalmyk",
|
1183 | | - xh=>"http://xh.wikipedia.org Xhosa",
|
1184 | | - yi=>"http://yi.wikipedia.org Yiddish",
|
1185 | | - yo=>"http://yo.wikipedia.org Yoruba",
|
1186 | | - za=>"http://za.wikipedia.org Zhuang",
|
1187 | | - zea=>"http://zea.wikipedia.org Zealandic",
|
1188 | | - zh=>"http://zh.wikipedia.org Chinese",
|
1189 | | - zh_min_nan=>"http://zh-min-nan.wikipedia.org Min Nan",
|
1190 | | - "zh-min-nan"=>"http://zh-min-nan.wikipedia.org Min Nan",
|
1191 | | - zh_classical=>"http://zh-classical.wikipedia.org Classical Chinese",
|
1192 | | - "zh-classical"=>"http://zh-classical.wikipedia.org Classical Chinese",
|
1193 | | - zh_yue=>"http://zh-yue.wikipedia.org Cantonese",
|
1194 | | - "zh-yue"=>"http://zh-yue.wikipedia.org Cantonese",
|
1195 | | - zu=>"http://zu.wikipedia.org Zulu",
|
1196 | | - zz=>" All languages",
|
1197 | | - zzz=>" All languages except English"
|
1198 | | - );
|
1199 | | -
|
1200 | | - foreach $key (keys %wikipedias)
|
1201 | | - {
|
1202 | | - my $wikipedia = $wikipedias {$key} ;
|
1203 | | - $out_urls {$key} = $wikipedia ;
|
1204 | | - $out_languages {$key} = $wikipedia ;
|
1205 | | - $out_urls {$key} =~ s/(^[^\s]+).*$/$1/ ;
|
1206 | | - $out_languages {$key} =~ s/^[^\s]+\s+(.*)$/$1/ ;
|
1207 | | - $out_article {$key} = "http://en.wikipedia.org/wiki/" . $out_languages {$key} . "_language" ;
|
1208 | | - $out_article {$key} =~ s/ /_/g ;
|
1209 | | - $out_urls {$key} =~ s/(^[^\s]+).*$/$1/ ;
|
1210 | | - }
|
1211 | | -}
|
1212 | | -
|
1213 | | -# copied from WikiReports_EN.pl
|
1214 | | -sub InitReportNames
|
1215 | | -{
|
1216 | | - @out_report_descriptions = (
|
1217 | | - "Contributors",
|
1218 | | - "New editors",
|
1219 | | - "Active editors",
|
1220 | | - "Very active editors",
|
1221 | | - "Article count (official)",
|
1222 | | - "Article count (alternate)",
|
1223 | | - "New articles per day",
|
1224 | | - "Edits per article",
|
1225 | | - "Bytes per article",
|
1226 | | - "Articles over 0.5 Kb",
|
1227 | | - "Articles over 2 Kb",
|
1228 | | - "Edits per month",
|
1229 | | - "Database size",
|
1230 | | - "Words",
|
1231 | | - "Internal links",
|
1232 | | - "Links to other Wikipedias",
|
1233 | | - "Binaries",
|
1234 | | - "External links",
|
1235 | | - "Redirects",
|
1236 | | - "Page requests per day",
|
1237 | | - "Visits per day",
|
1238 | | - "Overview recent months"
|
1239 | | - ) ;
|
1240 | | -}
|
1241 | | -
|
| 2 | +#!/usr/local/bin/perl |
| 3 | +# -i "w:/# Out Bayes" -o "w:/@ Report Card/Data" |
| 4 | + |
| 5 | + use lib "/home/ezachte/lib" ; |
| 6 | + use EzLib ; |
| 7 | + $trace_on_exit = $true ; |
| 8 | + ez_lib_version (2) ; |
| 9 | + |
| 10 | + $month_0 = "08" ; # collect 13 months up to |
| 11 | + $year_0 = 2010 ; |
| 12 | + |
| 13 | +# set defaults mainly for tests on local machine |
| 14 | + default_argv "-i 'W:/# Out Bayes'|-o 'W:/@ Report Card/Data'" ; |
| 15 | + |
| 16 | + use Getopt::Std ; |
| 17 | + |
| 18 | +# $file_regions_UV = "Multi-Country Media Trend, UVs by region (July 2008 - September 2009)_27290.csv" ; |
| 19 | +# $file_regions_Reach = "Multi-Country Media Trend, % reach by region (July 2008 - September 2009)_10786.csv" ; |
| 20 | + |
| 21 | + $maxpopularwikis = 25 ; |
| 22 | + @projects = ('wb','wk','wn','wp','wq','ws','wv','commons') ; |
| 23 | + @projects2 = ('wb','wk','wn','wp','wq','ws','wv','wx','tot') ; |
| 24 | + @projects2b = ('Wikibooks','Wiktionary','Wikinews','Wikipedia','Wikiquote','Wikisource','Wikiversity','Other','Total') ; |
| 25 | + |
| 26 | + &LogArguments ; |
| 27 | + &ParseArguments ; |
| 28 | + &InitProjectNames ; |
| 29 | + &InitReportNames ; |
| 30 | + &ReadStatisticsMonthly ; |
| 31 | + &WriteYearlyData ; |
| 32 | +# &WriteMonthlyData ; |
| 33 | + exit ; |
| 34 | + |
| 35 | +sub LogArguments |
| 36 | +{ |
| 37 | + my $arguments ; |
| 38 | + getopt ("iolpft", \%options) ; |
| 39 | + foreach $arg (sort keys %options) |
| 40 | + { $arguments .= " -$arg " . $options {$arg} . "\n" ; } |
| 41 | + print ("\nArguments\n$arguments\n") ; |
| 42 | +# &Log ("\nArguments\n$arguments\n") ; |
| 43 | +} |
| 44 | + |
| 45 | +sub ParseArguments |
| 46 | +{ |
| 47 | +# my @options ; |
| 48 | +# getopt ("io", \%options) ; |
| 49 | + |
| 50 | +# die ("Specify input folder for projectcounts files as: -i path") if (! defined ($options {"i"})) ; |
| 51 | +# die ("Specify output folder as: -o path'") if (! defined ($options {"o"})) ; |
| 52 | + |
| 53 | +# $path_in = $options {"i"} ; |
| 54 | +# $path_out = $options {"o"} ; |
| 55 | + |
| 56 | +# die "Input folder '$path_in' does not exist" if (! -d $path_in) ; |
| 57 | +# die "Output folder '$path_out' does not exist" if (! -d $path_out) ; |
| 58 | + |
| 59 | + $path_in = "w:/# out bayes" ; |
| 60 | + $path_out = "w:/@ report card/data" ; |
| 61 | + |
| 62 | + print "Input folder: $path_in\n" ; |
| 63 | + print "Output folder: $path_out\n" ; |
| 64 | + print "\n" ; |
| 65 | + |
| 66 | + $file_csv_out = "$path_out/StatisticsMonthly_${year_0}_$month_0.csv" ; |
| 67 | + $file_csv_out_year = "$path_out/StatisticsYearly.csv" ; |
| 68 | + |
| 69 | + &SetComparisonPeriods ($year_0,$month_0) ; |
| 70 | +} |
| 71 | + |
| 72 | +sub ReadStatisticsMonthly |
| 73 | +{ |
| 74 | + &ReadStatisticsMonthlyForProject ("wb") ; |
| 75 | + &ReadStatisticsMonthlyForProject ("wk") ; |
| 76 | + &ReadStatisticsMonthlyForProject ("wn") ; |
| 77 | + &ReadStatisticsMonthlyForProject ("wp") ; |
| 78 | + &ReadStatisticsMonthlyForProject ("wq") ; |
| 79 | + &ReadStatisticsMonthlyForProject ("ws") ; |
| 80 | + &ReadStatisticsMonthlyForProject ("wv") ; |
| 81 | + &ReadStatisticsMonthlyForProject ("wx") ; |
| 82 | + |
| 83 | + &ReadStatisticsPerBinariesExtensionCommons ; |
| 84 | +} |
| 85 | + |
| 86 | +sub ReadStatisticsMonthlyForProject |
| 87 | +{ |
| 88 | + my $project = shift; |
| 89 | + |
| 90 | + my $file_csv_in_1 = "$path_in/csv_$project/StatisticsMonthly.csv" ; |
| 91 | + my $file_csv_in_2 = "$path_in/csv_$project/StatisticsUserActivitySpread.csv" ; |
| 92 | + |
| 93 | + if (! -e $file_csv_in_1) |
| 94 | + { &Abort ("Input file '$file_csv_in_1' not found") ; } |
| 95 | + if (! -e $file_csv_in_2) |
| 96 | + { &Abort ("Input file '$file_csv_in_2' not found") ; } |
| 97 | + |
| 98 | + print "Read '$file_csv_in_1'\n" ; |
| 99 | + open CSV_IN, '<', $file_csv_in_1 ; |
| 100 | + |
| 101 | + undef %lines ; |
| 102 | + while ($line = <CSV_IN>) |
| 103 | + { |
| 104 | + ($language,$date,$counts) = split (',', $line, 3) ; |
| 105 | + |
| 106 | + next if $language eq 'commons' and $project ne 'wx' ; |
| 107 | + next if $language eq 'sr' and $project eq 'wn' ; # ignore insane bot spam on |
| 108 | + |
| 109 | + ($month,$day,$year) = split ('\/', $date) ; |
| 110 | + |
| 111 | +if ($month == 6) |
| 112 | +{ |
| 113 | + @fields = split (',', $counts) ; |
| 114 | + $articles = $fields [4] ; |
| 115 | + $june_articles {"$project,$year"} += $articles ; |
| 116 | + $june_articles {"tot,$year"} += $articles ; |
| 117 | + $years {$year} ++ ; |
| 118 | +# print "$project $year $month : $articles\n" ; |
| 119 | +} |
| 120 | + |
| 121 | + my $m = &MonthsSinceYearAgo ($year, $month) ; |
| 122 | + if (! ($m < 0) || ($m > 12)) |
| 123 | + { |
| 124 | + $lines {$language}{$m} = $line ; |
| 125 | + $languages {$language}++ ; |
| 126 | + } |
| 127 | + } |
| 128 | + |
| 129 | + foreach $language (sort keys %languages) |
| 130 | + { |
| 131 | + for ($m=1 ; $m <= 12 ; $m++) |
| 132 | + { |
| 133 | + if ($lines {$language}{$m} eq '') |
| 134 | + { $lines {$language}{$m} = $lines {$language}{$m -1} ; } |
| 135 | + } |
| 136 | + |
| 137 | + for ($m=0 ; $m <= 12 ; $m++) |
| 138 | + { |
| 139 | + $line = $lines {$language}{$m} ; |
| 140 | + chomp $line ; |
| 141 | + ($language,$date,$counts) = split (',', $line, 3) ; |
| 142 | + @fields = split (',', $counts) ; |
| 143 | + |
| 144 | + if ($project eq "wp") |
| 145 | + { |
| 146 | + foreach $f (1,4,6,11) # new editors, articles, new articles, edits |
| 147 | + { |
| 148 | + $values {"$f,$m"} {"$project,$language"} = $fields [$f] ; |
| 149 | + $totals {"$f,$m"} += $fields [$f] ; |
| 150 | + $totals_project {"$f,$m"} {$project} += $fields [$f] ; |
| 151 | + # print "TOTALS $f $m = . " . $totals {"$f,$m"} . "\n" ; |
| 152 | + } |
| 153 | + } |
| 154 | + else |
| 155 | + { |
| 156 | + foreach $f (1,4) |
| 157 | + { |
| 158 | + $values {"$f,$m"} {"$project,$language"} = $fields [$f] ; |
| 159 | + $totals {"$f,$m"} += $fields [$f] ; |
| 160 | + $totals_project {"$f,$m"} {$project} += $fields [$f] ; |
| 161 | + |
| 162 | + if ($language eq 'commons') |
| 163 | + { $totals_project {"$f,$m"} {'commons'} += $fields [$f] ; } |
| 164 | + |
| 165 | + # print "TOTALS $f $m = . " . $totals {"$f,$m"} . "\n" ; |
| 166 | + } |
| 167 | + foreach $f (6,11) |
| 168 | + { |
| 169 | + $totals_project {"$f,$m"} {$project} += $fields [$f] ; |
| 170 | + if ($language eq 'commons') |
| 171 | + { $totals_project {"$f,$m"} {'commons'} += $fields [$f] ; } |
| 172 | + # print "TOTALS $f $m = . " . $totals {"$f,$m"} . "\n" ; |
| 173 | + } |
| 174 | + |
| 175 | + } |
| 176 | + } |
| 177 | + } |
| 178 | + close CSV_IN ; |
| 179 | + |
| 180 | + # now read (very) active editors from newer more accurate file (split data for reg users and bots, unlike StatisticsMonthly.csv) |
| 181 | + # but use f = column count in StatisticsMonthly.csv |
| 182 | + |
| 183 | + print "Read '$file_csv_in_2'\n" ; |
| 184 | + open CSV_IN, '<', $file_csv_in_2 ; |
| 185 | + |
| 186 | + undef %lines ; |
| 187 | + while ($line = <CSV_IN>) |
| 188 | + { |
| 189 | + chomp $line ; |
| 190 | + ($language,$date,$reguser_bot,$group,$counts) = split (',', $line, 5) ; |
| 191 | + |
| 192 | + next if $language eq 'commons' and $project ne 'wx' ; # commons also in wikipedia csv files (bug, hard to cleanup, just skip) |
| 193 | + # next if $language eq 'commons' ; # ignore editor count on commons alltogether, most are already counted for other project |
| 194 | + # (even for several projects, to be tuned after centralauth dump is available) |
| 195 | + |
| 196 | + if ($reguser_bot ne "R") { next ; } # R: reg user, B: bot |
| 197 | + if ($group ne "A") { next ; } # A: articles, T: talk pages, O: other namespaces |
| 198 | + |
| 199 | + ($month,$day,$year) = split ('\/', $date) ; |
| 200 | + my $m = &MonthsSinceYearAgo ($year, $month) ; |
| 201 | + if (($m < 0) || ($m > 12)) |
| 202 | + { next ; } |
| 203 | + |
| 204 | + $lines {$language}{$m} = $line ; |
| 205 | + $languages {$language}++ ; |
| 206 | + } |
| 207 | + |
| 208 | + foreach $language (sort keys %languages) |
| 209 | + { |
| 210 | + for ($m=1 ; $m <= 12 ; $m++) |
| 211 | + { |
| 212 | + if ($lines {$language}{$m} eq '') |
| 213 | + { $lines {$language}{$m} = $lines {$language}{$m -1} ; } |
| 214 | + } |
| 215 | + |
| 216 | + for ($m=0 ; $m <= 12 ; $m++) |
| 217 | + { |
| 218 | + $line = $lines {$language}{$m} ; |
| 219 | + chomp $line ; |
| 220 | + ($language,$date,$reguser_bot,$group,$counts) = split (',', $line, 5) ; |
| 221 | + @fields = split (',', $counts) ; |
| 222 | + |
| 223 | + foreach $f (2,3) # editors_gt_5, editors_gt_100 |
| 224 | + { |
| 225 | + # count user with over x edits |
| 226 | + # threshold starting with a 3 are 10xSQRT(10), 100xSQRT(10), 1000xSQRT(10), etc |
| 227 | + # thresholds = 1,3,5,10,25,32,50,100,etc |
| 228 | + if ($f == 2) { $f2 = 2 ; } |
| 229 | + if ($f == 3) { $f2 = 7 ; } |
| 230 | + |
| 231 | + $values {"$f,$m"} {"$project,$language"} = $fields [$f2] ; |
| 232 | + |
| 233 | + if ($language ne 'commons') # ignore editor count on commons for totals, most editors are already counted for other project |
| 234 | + { $totals {"$f,$m"} += $fields [$f2] ; } # (even for several projects, to be tuned after centralauth dump is available) |
| 235 | + |
| 236 | + $totals_project {"$f,$m"} {$project} += $fields [$f2] ; |
| 237 | + |
| 238 | + if ($language eq 'commons') |
| 239 | + { $totals_project {"$f,$m"} {'commons'} += $fields [$f2] ; } |
| 240 | + } |
| 241 | + } |
| 242 | + } |
| 243 | + close CSV_IN ; |
| 244 | +} |
| 245 | + |
| 246 | +sub ReadStatisticsPerBinariesExtensionCommons |
| 247 | +{ |
| 248 | + my $file_csv_in = "$path_in/csv_wx/StatisticsPerBinariesExtension.csv" ; |
| 249 | + my $mmax = -1 ; |
| 250 | + |
| 251 | + if (! -e $file_csv_in) |
| 252 | + { &Abort ("Input file '$file_csv_in' not found") ; } |
| 253 | + |
| 254 | + print "Read '$file_csv_in'\n" ; |
| 255 | + open CSV_IN, '<', $file_csv_in ; |
| 256 | + while ($line = <CSV_IN>) |
| 257 | + { |
| 258 | + chomp $line ; |
| 259 | + ($language,$date,$counts) = split (',', $line, 3) ; |
| 260 | + |
| 261 | + if ($language ne "commons") { next ; } |
| 262 | + |
| 263 | + if ($date eq "00/0000") |
| 264 | + { |
| 265 | + @fields = split (',', $counts) ; |
| 266 | + $field_ndx = 0 ; |
| 267 | + foreach $field (@fields) |
| 268 | + { |
| 269 | + $ext_cnt {-1}{$field_ndx} = $field ; |
| 270 | + # print "EXT_CNT $field_ndx : $field\n" ; |
| 271 | + $field_ndx ++ ; |
| 272 | + } |
| 273 | + next ; |
| 274 | + } |
| 275 | + |
| 276 | + ($month,$year) = split ('\/', $date) ; |
| 277 | + my $m = &MonthsSinceYearAgo ($year, $month) ; |
| 278 | + if (($m < 0) || ($m > 12)) |
| 279 | + { next ; } |
| 280 | + if ($m > $mmax) |
| 281 | + { $mmax = $m ; } |
| 282 | + |
| 283 | + @fields = split (',', $counts) ; |
| 284 | + $field_ndx = 0 ; |
| 285 | + foreach $field (@fields) |
| 286 | + { |
| 287 | + $ext_cnt {$m}{$field_ndx} = $field ; |
| 288 | + $ext_tot {$m} += $field ; |
| 289 | + $field_ndx ++ ; |
| 290 | + } |
| 291 | + } |
| 292 | + close CSV_IN ; |
| 293 | + |
| 294 | + %ext_cnt_mmax = %{$ext_cnt {$mmax}} ; |
| 295 | + @ext_cnt_mmax = (sort {$ext_cnt_mmax {$b} <=> $ext_cnt_mmax {$a}} keys %ext_cnt_mmax) ; |
| 296 | + |
| 297 | + $extcnt = 0 ; |
| 298 | + foreach $extndx (@ext_cnt_mmax) |
| 299 | + { |
| 300 | + # print "$extndx < ${ext_cnt {-1}{$extndx}} > : ${ext_cnt_mmax {$extndx}}\n" ; |
| 301 | + push @extndxs, $extndx ; |
| 302 | + if ($extcnt++ >= 9) { last ; } |
| 303 | + } |
| 304 | +} |
| 305 | + |
| 306 | +sub ReadMediaTrends |
| 307 | +{ |
| 308 | +# open FILE_UV, '<', $file_regions_UV ; |
| 309 | +# close FILE-UV ; |
| 310 | + |
| 311 | +# open FILE_REACH, '<', $file_regions_Reach ; |
| 312 | +# close FILE_REACH ; |
| 313 | +} |
| 314 | + |
| 315 | +sub WriteYearlyData |
| 316 | +{ |
| 317 | + print "Write file '$file_csv_out_year'\n" ; |
| 318 | + open CSV_OUT, '>', $file_csv_out_year ; |
| 319 | + |
| 320 | + $line = "Articles per project per year (June 30)" ; |
| 321 | + print "$line\n" ; |
| 322 | + print CSV_OUT "$line\n" ; |
| 323 | + |
| 324 | + $line = "" ; |
| 325 | + foreach $project (@projects2b) |
| 326 | + { $line .= ",$project," ; } |
| 327 | + $line .= ",Growth," ; |
| 328 | + print "$line\n" ; |
| 329 | + print CSV_OUT "$line\n" ; |
| 330 | + |
| 331 | + foreach $year (sort keys %years) |
| 332 | + { |
| 333 | + $june_diff {$year} = $june_articles {"tot,$year"} - $june_articles {"tot,".($year-1)} ; |
| 334 | + $line = "$year" ; |
| 335 | + foreach $project (@projects2) |
| 336 | + { |
| 337 | + $count = $june_articles {"$project,$year"} ; |
| 338 | + $count_prev = $june_articles {"$project,".($year-1)} ; |
| 339 | + $perc = "-" ; |
| 340 | + if ($count_prev > 0) |
| 341 | + { $perc = sprintf ("%.0f", 100 * ($count/$count_prev) - 100) . '%' ; $perc =~ s/^(\d)/\+$1/ ; } |
| 342 | + # $count = sprintf ("%.0f", $count / 1000) ; |
| 343 | + $line .= ",$count,$perc" ; |
| 344 | + } |
| 345 | + $diff = $june_diff {$year} ; |
| 346 | + $diff_prev = $june_diff {$year-1} ; |
| 347 | + $perc = "-" ; |
| 348 | + if ($diff_prev > 0) |
| 349 | + { $perc = sprintf ("%.0f", 100 * ($diff/$diff_prev) - 100) . '%' ; $perc =~ s/^(\d)/+$1/ ; } |
| 350 | + $line .= ",$diff,$perc" ; |
| 351 | + print "$line\n" ; |
| 352 | + print CSV_OUT "$line\n" ; |
| 353 | + } |
| 354 | + |
| 355 | + $line = "\nReadable version" ; |
| 356 | + print "$line\n" ; |
| 357 | + print CSV_OUT "$line\n" ; |
| 358 | + |
| 359 | + $line = "" ; |
| 360 | + foreach $project (@projects2b) |
| 361 | + { $line .= ",$project," ; } |
| 362 | + $line .= ",Growth," ; |
| 363 | + print "$line\n" ; |
| 364 | + print CSV_OUT "$line\n" ; |
| 365 | + |
| 366 | + foreach $year (sort keys %years) |
| 367 | + { |
| 368 | + $line = "$year" ; |
| 369 | + foreach $project (@projects2) |
| 370 | + { |
| 371 | + $count = $june_articles {"$project,$year"} ; |
| 372 | + $count_prev = $june_articles {"$project,".($year-1)} ; |
| 373 | + |
| 374 | + $perc = "-" ; |
| 375 | + if ($count_prev > 0) |
| 376 | + { $perc = sprintf ("%.0f", 100 * ($count/$count_prev) - 100) . '%' ; $perc =~ s/^(\d)/\+$1/ ; } |
| 377 | + if ($count >= 1000000) |
| 378 | + { $count = sprintf ("%.1f", $count / 1000000) . 'M' ; } |
| 379 | + elsif ($count >= 1000) |
| 380 | + { $count = sprintf ("%.0f", $count / 1000) . 'k' ; } |
| 381 | + $line .= ",$count,$perc" ; |
| 382 | + } |
| 383 | + $diff = $june_diff {$year} ; |
| 384 | + $diff_prev = $june_diff {$year-1} ; |
| 385 | + $perc = "-" ; |
| 386 | + if ($diff_prev > 0) |
| 387 | + { $perc = sprintf ("%.0f", 100 * ($diff/$diff_prev) - 100) . '%' ; $perc =~ s/^(\d)/\+$1/ ; } |
| 388 | + if ($diff >= 1000000) |
| 389 | + { $diff = sprintf ("%.1f", $diff / 1000000) . 'M' ; } |
| 390 | + elsif ($count >= 1000) |
| 391 | + { $diff = sprintf ("%.0f", $diff / 1000) . 'k' ; } |
| 392 | + $line .= ",$diff,$perc" ; |
| 393 | + |
| 394 | + print "$line\n" ; |
| 395 | + print CSV_OUT "$line\n" ; |
| 396 | + } |
| 397 | + |
| 398 | +} |
| 399 | + |
| 400 | +sub WriteMonthlyData |
| 401 | +{ |
| 402 | + print "Write file '$file_csv_out'\n" ; |
| 403 | + open CSV_OUT, '>', $file_csv_out ; |
| 404 | + $output = "" ; |
| 405 | + foreach $f (1,2,3,4,6,11) # new editors, editors_gt_5, editors_gt_100, articles, new articles, edits |
| 406 | + { |
| 407 | + |
| 408 | + $output .= "\n,${out_report_descriptions [$f]} - Absolute\n" ; |
| 409 | + $output .= "$csv_recent_months,%inc year, %inc month\n" ; |
| 410 | + |
| 411 | + $line = ",Total," ; |
| 412 | + for ($m = 0 ; $m <= 12 ; $m++) |
| 413 | + { $line .= $totals {"$f,$m"} . "," ; } |
| 414 | + |
| 415 | + # growth in one year |
| 416 | + if ($totals {"$f,0"} != 0) |
| 417 | + { $line .= sprintf ("%.1f", 100 * ($totals {"$f,12"} / $totals {"$f,0"}) - 100). "%," ; } |
| 418 | + else |
| 419 | + { $line .= "n.a.," ; } |
| 420 | + |
| 421 | + # growth in one month |
| 422 | + if ($totals {"$f,11"} != 0) |
| 423 | + { $line .= sprintf ("%.1f", 100 * ($totals {"$f,12"} / $totals {"$f,11"}) - 100). "%," ; } |
| 424 | + else |
| 425 | + { $line .= "n.a.," ; } |
| 426 | + |
| 427 | + $line =~ s/,$// ; |
| 428 | + $output .= "$line\n" ; |
| 429 | + |
| 430 | + # sort by absolute amount for last month |
| 431 | + %values_f_12 = %{$values {"$f,12"}} ; |
| 432 | + $index = 1 ; |
| 433 | + foreach $key (sort {$values_f_12 {$b} <=> $values_f_12 {$a}} keys %values_f_12) |
| 434 | + { |
| 435 | + # print "$index $f: $key -> ${values_f_12 {$key}}\n" ; |
| 436 | + |
| 437 | + ($project,$language) = split (",", $key) ; |
| 438 | + $language_name = $out_languages {$language} ; |
| 439 | + if (($project ne "wp") && ($project ne "wx")) |
| 440 | + { $line = "$index,$language_name " . &GetProjectName ($project) . "," ; } |
| 441 | + else |
| 442 | + { $line = "$index,$language_name," ; } |
| 443 | + |
| 444 | + for ($m = 0 ; $m <= 12 ; $m++) |
| 445 | + { $line .= $values {"$f,$m"} {$key} . "," ; } |
| 446 | + |
| 447 | + if ($values {"$f,0"} {$key} != 0) |
| 448 | + { $line .= sprintf ("%.1f", 100 * ($values {"$f,12"} {$key} / $values {"$f,0"} {$key}) - 100). "%," ; } |
| 449 | + else |
| 450 | + { $line .= "n.a.," ; } |
| 451 | + |
| 452 | + if ($values {"$f,11"} {$key} != 0) |
| 453 | + { $line .= sprintf ("%.1f", 100 * ($values {"$f,12"} {$key} / $values {"$f,11"} {$key}) - 100). "%," ; } |
| 454 | + else |
| 455 | + { $line .= "n.a.," ; } |
| 456 | + |
| 457 | + $line =~ s/,$// ; |
| 458 | + $output .= "$line\n" ; |
| 459 | + |
| 460 | + if ($index++ >= 25) { last ; } |
| 461 | + } |
| 462 | + $output .= "\n" ; |
| 463 | + |
| 464 | + foreach $project (sort {$totals_project {"$f,12"} {$b} <=> $totals_project {"$f,12"} {$a}} @projects) |
| 465 | + { |
| 466 | +# next if $project eq 'commons' and ($f ==2 or $f == 3) ; # (very) active editors no longer counted for commons |
| 467 | + |
| 468 | + if ($project eq 'commons') |
| 469 | + { $line = ",Commons," ; } |
| 470 | + else |
| 471 | + { $line = "," . &GetProjectName ($project) . "," ; } |
| 472 | + |
| 473 | + for ($m = 0 ; $m <= 12 ; $m++) |
| 474 | + { $line .= $totals_project {"$f,$m"} {$project} . "," ; } |
| 475 | + |
| 476 | + if ($totals_project {"$f,0"} {$project} != 0) |
| 477 | + { $line .= sprintf ("%.1f", 100 * ($totals_project {"$f,12"} {$project} / $totals_project {"$f,0"} {$project}) - 100). "%," ; } |
| 478 | + else |
| 479 | + { $line .= "n.a.," ; } |
| 480 | + |
| 481 | + if ($totals_project {"$f,11"} {$project} != 0) |
| 482 | + { $line .= sprintf ("%.1f", 100 * ($totals_project {"$f,12"} {$project} / $totals_project {"$f,11"} {$project}) - 100). "%," ; } |
| 483 | + else |
| 484 | + { $line .= "n.a.," ; } |
| 485 | + |
| 486 | + $line =~ s/,$// ; |
| 487 | + $output .= "$line\n" ; |
| 488 | + } |
| 489 | + |
| 490 | + $output .= "\n,${out_report_descriptions [$f]} - Indexed\n" ; |
| 491 | + $output .= "$csv_recent_months\n" ; |
| 492 | + |
| 493 | + # sort by absolute amount for last month |
| 494 | + $index = 1 ; |
| 495 | + foreach $key (sort {$values_f_12 {$b} <=> $values_f_12 {$a}} keys %values_f_12) |
| 496 | + { |
| 497 | + # print "$index $f: $key -> ${values_f_12 {$key}}\n" ; |
| 498 | + |
| 499 | + ($project,$language) = split (",", $key) ; |
| 500 | + $language_name = $out_languages {$language} ; |
| 501 | + if (($project ne "wp") && ($project ne "wx")) |
| 502 | + { $line = "$index,$language_name " . &GetProjectName ($project) . "," ; } |
| 503 | + else |
| 504 | + { $line = "$index,$language_name," ; } |
| 505 | + |
| 506 | + $value_100 = $values {"$f,0"} {$key} ; |
| 507 | + for ($m = 0 ; $m <= 12 ; $m++) |
| 508 | + { |
| 509 | + if ($value_100 != 0) |
| 510 | + { $line .= sprintf ("%.1f", 100 * ($values {"$f,$m"} {$key} / $value_100)) . "," ; } |
| 511 | + else |
| 512 | + { $line .= "," ; } |
| 513 | + } |
| 514 | + $line =~ s/,$// ; |
| 515 | + $output .= "$line\n" ; |
| 516 | + |
| 517 | + # put totals last in chart to show line on top of others |
| 518 | + if ($index == 9) |
| 519 | + { |
| 520 | + $line = ",Total," ; |
| 521 | + $total_100 = $totals {"$f,0"} ; |
| 522 | + for ($m = 0 ; $m <= 12 ; $m++) |
| 523 | + { |
| 524 | + if ($total_100 != 0) |
| 525 | + { $line .= sprintf ("%.1f", 100 * ($totals {"$f,$m"} / $total_100)) . "," ; } |
| 526 | + else |
| 527 | + { $line .= "," ; } |
| 528 | + } |
| 529 | + $line .= ",(sorted here to make it top-most line out of 10 in Excel)" ; |
| 530 | + $output .= "$line\n" ; |
| 531 | + } |
| 532 | + |
| 533 | + if ($index++ >= 25) { last ; } |
| 534 | + } |
| 535 | + $output .= "\n" ; |
| 536 | + |
| 537 | + foreach $project (sort {$totals_project {"$f,12"} {$b} <=> $totals_project {"$f,12"} {$a}} @projects) |
| 538 | + { |
| 539 | +# next if $project eq 'commons' and ($f ==2 or $f == 3) ; # (very) active editors no longer counted for commons |
| 540 | + |
| 541 | + if ($project eq 'commons') |
| 542 | + { $line = ",Commons," ; } |
| 543 | + else |
| 544 | + { $line = "," . &GetProjectName ($project) . "," ; } |
| 545 | + |
| 546 | + $value_100 = $totals_project {"$f,0"} {$project} ; |
| 547 | + for ($m = 0 ; $m <= 12 ; $m++) |
| 548 | + { |
| 549 | + if ($value_100 != 0) |
| 550 | + { $line .= sprintf ("%.1f", 100 * ($totals_project {"$f,$m"} {$project} / $value_100)) . "," ; } |
| 551 | + else |
| 552 | + { $line .= "," ; } |
| 553 | + } |
| 554 | + $line =~ s/,$// ; |
| 555 | + $output .= "$line\n" ; |
| 556 | + } |
| 557 | + $output .= "\n," . '=' x 150 . "\n" ; |
| 558 | + } |
| 559 | + |
| 560 | + print CSV_OUT $output ; |
| 561 | + |
| 562 | + $output = "\n,Binaries per month - Absolute\n" ; |
| 563 | + $output .= "$csv_recent_months,%inc year, %inc month\n" ; |
| 564 | + $output .= "\n$csv_recent_months,%inc year,%inc month\n" ; |
| 565 | + |
| 566 | + $line = ",Total," ; |
| 567 | + for ($m = 0 ; $m <= 12 ; $m++) |
| 568 | + { $line .= $ext_tot {$m} . "," ; } |
| 569 | + |
| 570 | + if ($ext_tot {0} != 0) |
| 571 | + { $line .= sprintf ("%.1f", 100 * ($ext_tot {12} / $ext_tot {0}) - 100). "%," ; } |
| 572 | + else |
| 573 | + { $line .= "n.a.," ; } |
| 574 | + |
| 575 | + if ($ext_tot {11} != 0) |
| 576 | + { $line .= sprintf ("%.1f", 100 * ($ext_tot {12} / $ext_tot {11}) - 100). "%," ; } |
| 577 | + else |
| 578 | + { $line .= "n.a.," ; } |
| 579 | + |
| 580 | + $line =~ s/,$// ; |
| 581 | + $output .= "$line\n" ; |
| 582 | + |
| 583 | + $index = 0 ; |
| 584 | + # feed the 10 extensions with most pages, largest one last (comes on top in Excel chart) |
| 585 | + for ($e = $#extndxs ; $e >= $#extndxs - 9 ; $e--) |
| 586 | + { |
| 587 | + $index++ ; |
| 588 | + |
| 589 | + if ($e < 0) |
| 590 | + { |
| 591 | + $line = "$index,xxx," ; |
| 592 | + for ($m = 0 ; $m <= 12 ; $m++) |
| 593 | + { $line .= "," ; } |
| 594 | + } |
| 595 | + else |
| 596 | + { |
| 597 | + $extndx = $extndxs [$e] ; |
| 598 | + $line = "$index,${ext_cnt {-1}{$extndx}}," ; |
| 599 | + |
| 600 | + for ($m = 0 ; $m <= 12 ; $m++) |
| 601 | + { $line .= $ext_cnt {$m}{$extndx} . "," ; } |
| 602 | + |
| 603 | + if ($ext_cnt {0}{$extndx} != 0) |
| 604 | + { $line .= sprintf ("%.1f", 100 * ($ext_cnt {12}{$extndx} / $ext_cnt {0}{$extndx}) - 100). "%," ; } |
| 605 | + else |
| 606 | + { $line .= "n.a.," ; } |
| 607 | + |
| 608 | + if ($ext_cnt {11}{$extndx} != 0) |
| 609 | + { $line .= sprintf ("%.1f", 100 * ($ext_cnt {12}{$extndx} / $ext_cnt {11}{$extndx}) - 100). "%," ; } |
| 610 | + else |
| 611 | + { $line .= "n.a.," ; } |
| 612 | + } |
| 613 | + |
| 614 | + $line =~ s/,$// ; |
| 615 | + $output .= "$line\n" ; |
| 616 | + } |
| 617 | + |
| 618 | + print CSV_OUT $output ; |
| 619 | + |
| 620 | + $output = "\n,Binaries per month - Indexed\n" ; |
| 621 | + $output .= "$csv_recent_months\n" ; |
| 622 | + |
| 623 | + $index = 0 ; |
| 624 | + # feed the 10 extensions with most pages, largest one last (comes on top in Excel chart) |
| 625 | + for ($e = $#extndxs ; $e >= $#extndxs - 9 ; $e--) |
| 626 | + { |
| 627 | + $index++ ; |
| 628 | + |
| 629 | + if ($e < 0) |
| 630 | + { |
| 631 | + $line = "$index,xxx," ; |
| 632 | + for ($m = 0 ; $m <= 12 ; $m++) |
| 633 | + { $line .= "," ; } |
| 634 | + } |
| 635 | + else |
| 636 | + { |
| 637 | + $extndx = $extndxs [$e] ; |
| 638 | + $line = "$index,${ext_cnt {-1}{$extndx}}," ; |
| 639 | + $ext_cnt_m0 = $ext_cnt {0}{$extndx} ; |
| 640 | + for ($m = 0 ; $m <= 12 ; $m++) |
| 641 | + { |
| 642 | + if ($ext_cnt_m0 > 0) |
| 643 | + { $line .= sprintf ("%.1f", 100 * ($ext_cnt {$m}{$extndx} / $ext_cnt_m0)). "," ; } |
| 644 | + else |
| 645 | + { $line .= "," ; } |
| 646 | + } |
| 647 | + } |
| 648 | + |
| 649 | + $line =~ s/,$// ; |
| 650 | + $output .= "$line\n" ; |
| 651 | + } |
| 652 | + print CSV_OUT $output ; |
| 653 | + close CSV_OUT ; |
| 654 | + |
| 655 | + print "\nOutput written to $file_csv_out\n\n" ; |
| 656 | +} |
| 657 | + |
| 658 | +sub SetComparisonPeriods |
| 659 | +{ |
| 660 | + my $year = shift ; |
| 661 | + my $month = shift ; |
| 662 | + my @months = qw(Xxx Jan Feb Mar Apr May Jun Jul Aug Sept Oct Nov Dec) ; |
| 663 | + |
| 664 | + my ($month_0, $month_0_file, $month_0_minus_12, $month_0_minus_1) ; |
| 665 | + |
| 666 | + $year_ = $year ; |
| 667 | + $month_ = $month ; |
| 668 | + |
| 669 | + $month_0 = sprintf ("%04d/%02d",$year, $month+1) ; |
| 670 | + $month_0_file = sprintf ("%04d_%02d",$year, $month+1) ; # for filenames |
| 671 | + $month_0_minus_12 = sprintf ("%04d/%02d",$year-1,$month+1) ; |
| 672 | + ($year,$month) = $month > 1 ? ($year,$month-1) : ($year-1,12) ; |
| 673 | + $month_0_minus_1 = sprintf ("%04d/%02d",$year,$month+1) ; |
| 674 | + |
| 675 | + print "\nWrite trend data up till month: $month_0\n\n" ; |
| 676 | + print "Compare with previous month: $month_0_minus_1, previous year: $month_0_minus_12\n\n" ; |
| 677 | + |
| 678 | + $csv_recent_months = ",project," ; |
| 679 | + $year = $year_ - 1 ; |
| 680 | + $month = $month_ ; |
| 681 | + for ($m = 0 ; $m <= 12 ; $m++) |
| 682 | + { |
| 683 | + $recent_months [$m] = sprintf ("%04d/%02d", $year, $month) ; |
| 684 | + $csv_recent_months .= sprintf ("%02d/%04d", $month, $year) . "," ; |
| 685 | + ($year,$month) = $month < 12 ? ($year,$month+1) : ($year+1,1) ; |
| 686 | + } |
| 687 | + $csv_recent_months =~ s/,$// ; |
| 688 | +} |
| 689 | + |
| 690 | +#sub WriteCsvFilesPerPeriod |
| 691 | +#{ |
| 692 | +# foreach $period (sort keys %totals) |
| 693 | +# { |
| 694 | +# &LogT ("\nWrite totals per $period: ") ; |
| 695 | +# $desc = $descriptions {$period} ; |
| 696 | + |
| 697 | +# foreach $project (sort keys %{$totals {$period}}) |
| 698 | +# { |
| 699 | +# &Log ("$project ") ; |
| 700 | + |
| 701 | +# $dir_out = "$path_out/csv_$project" ; |
| 702 | +# if (! -d $dir_out) |
| 703 | +# { mkdir $dir_out, 0777 ; } |
| 704 | + |
| 705 | +# $file_out = "$dir_out/$desc.csv" ; |
| 706 | + |
| 707 | +# open CSV, ">", $file_out ; |
| 708 | +# foreach $key (sort {$a cmp $b} keys %{$totals {$period}{$project}}) |
| 709 | +# { |
| 710 | +# ($language,$yearmonth) = split (",", $key) ; |
| 711 | +# # print "PERIOD $period PROJECT $project KEY $key\n" ; |
| 712 | +# if ($period eq "month") |
| 713 | +# { print CSV "$language," . $date_high {"$yearmonth"} . "," . $totals{$period}{$project}{$key} . "\n" ; } |
| 714 | +# else |
| 715 | +# { print CSV "$key," . $totals{$period}{$project}{$key} . "\n" ; } |
| 716 | +# } |
| 717 | +# close CSV ; |
| 718 | +# } |
| 719 | +# } |
| 720 | +#} |
| 721 | + |
| 722 | +#sub WriteCsvHtmlFilesPopularWikis |
| 723 | +#{ |
| 724 | +# @totals_lastmonth = sort {$totals_lastmonth {$b} <=> $totals_lastmonth {$a}} keys %totals_lastmonth ; |
| 725 | + |
| 726 | +# $dir_out = "$path_out/csv_wp" ; |
| 727 | +# $file_out = "$dir_out/PageViewsPerMonthPopularWikis_$month_0_file.csv" ; |
| 728 | + |
| 729 | +## extend with normalized counts |
| 730 | +## see manually created PageViewsPerMonthTop25PlusNormalizedTo100.csv |
| 731 | + |
| 732 | +# open CSV, ">", $file_out ; |
| 733 | +# print CSV $csv_recent_months ; |
| 734 | + |
| 735 | +# # write per popular language+wiki 13 months of page view totals |
| 736 | +# $lines = 0 ; |
| 737 | +# foreach $line (@totals_lastmonth) |
| 738 | +# { |
| 739 | +# if (++$lines > $maxpopularwikis) { last ; } |
| 740 | + |
| 741 | +# ($project, $language) = split (',', $line) ; |
| 742 | +# $largest_projects {"$project-$language"} ++ ; |
| 743 | + |
| 744 | +# $language_name = $out_languages {$language} ; |
| 745 | + |
| 746 | +# if (($project ne "wp") && ($project ne "wx")) |
| 747 | +# { print CSV "$language_name " . &GetProjectName ($project) . "," ; } |
| 748 | +# else |
| 749 | +# { print CSV "$language_name," ; } |
| 750 | + |
| 751 | +## %test = %{$totals {"month"} {"wp"} }; |
| 752 | +## %test2 = @recent_months ; |
| 753 | +# for ($m = 0 ; $m <= 12 ; $m++) |
| 754 | +# { print CSV $totals {"month"} {$project} {"$language,${recent_months [$m]}"} . "," ; } |
| 755 | +# print CSV "\n" ; |
| 756 | +# } |
| 757 | + |
| 758 | +# print CSV "\n$csv_recent_months" ; |
| 759 | + |
| 760 | +# # write per popular language+wiki 13 months of page view totals, normalized to first month = 100 |
| 761 | +# $lines = 0 ; |
| 762 | +# foreach $line (@totals_lastmonth) |
| 763 | +# { |
| 764 | +# if (++$lines > $maxpopularwikis) { last ; } |
| 765 | + |
| 766 | +# ($project, $language) = split (',', $line) ; |
| 767 | +# $language_name = $out_languages {$language} ; |
| 768 | + |
| 769 | +# if (($project ne "wp") && ($project ne "wx")) |
| 770 | +# { print CSV "$language_name " . &GetProjectName ($project) . "," ; } |
| 771 | +# else |
| 772 | +# { print CSV "$language_name," ; } |
| 773 | + |
| 774 | +# $recent_month_0 = $totals {"month"} {$project} {"$language,${recent_months [ 0]}"} ; |
| 775 | +# for ($m = 0 ; $m <= 12 ; $m++) |
| 776 | +# { |
| 777 | +# if ($recent_month_0 > 0) |
| 778 | +# { print CSV sprintf ("%.2f", 100 * $totals {"month"} {$project} {"$language,${recent_months [$m]}"} / $recent_month_0) . "," ; } |
| 779 | +# else |
| 780 | +# { print CSV "," ; } |
| 781 | +# } |
| 782 | + |
| 783 | +# print CSV "\n" ; |
| 784 | +# } |
| 785 | +# close CSV ; |
| 786 | + |
| 787 | +# # write ready made table rows for report card: page views top 25 movers shakers |
| 788 | +# foreach $key (keys %largest_projects) |
| 789 | +# { |
| 790 | +# ($project,$language) = split ('-', $key) ; |
| 791 | + |
| 792 | +# $total_lastmonth = $totals {"month"} {$project} {"$language,$month_0"} ; |
| 793 | +# $total_prevmonth = $totals {"month"} {$project} {"$language,$month_0_minus_1"} ; |
| 794 | +# $total_prevyear = $totals {"month"} {$project} {"$language,$month_0_minus_12"} ; |
| 795 | + |
| 796 | +# $perc_month = "no data" ; |
| 797 | +# $perc_year = "no data" ; |
| 798 | + |
| 799 | +# if ($total_prevyear > 0) |
| 800 | +# { $perc_year = sprintf ("%.1f", 100 * $total_lastmonth/$total_prevyear - 100) ; } |
| 801 | +# if ($total_prevyear > 0) |
| 802 | +# { $perc_month = sprintf ("%.1f", 100 * $total_lastmonth/$total_prevmonth - 100) ; } |
| 803 | + |
| 804 | +# $line = "$project-$language: $total_prevyear=>$total_lastmonth=$perc_year%, $total_prevmonth=>$total_lastmonth=$perc_month%" ; |
| 805 | + |
| 806 | +# $total_lastmonth = sprintf ("%.0f", $total_lastmonth / 1000000) ; |
| 807 | + |
| 808 | +# $project_name = &GetProjectName ($project) ; |
| 809 | +# $language_name = $out_languages {$language} ; |
| 810 | + |
| 811 | +# $col1 = "<td class=detail-left>$language_name $project_name</td>\n" ; |
| 812 | +# $col2 = "<td class=detail-blue>$total_lastmonth</td>\n" ; |
| 813 | +# $col3 = "<td class=detail-blue>$perc_month%</td>\n" ; |
| 814 | +# $col4 = "<td class=detail-blue>$perc_year%</td>\n" ; |
| 815 | +# $html = "<tr>\n$col1$col2$col3$col4</tr>\n" ; |
| 816 | + |
| 817 | +# $growth_figures_text {"$perc_month-$project-$language"} = $line ; |
| 818 | +# $growth_figures_html {"$perc_month-$project-$language"} = $html ; |
| 819 | +# } |
| 820 | + |
| 821 | +# $file_html = "$dir_out/PageViewsMoversShakersPopularWikis_$month_0_file.html" ; |
| 822 | + |
| 823 | +# open HTML, ">", $file_html ; |
| 824 | +# foreach $key (sort {$b <=> $a} keys %growth_figures_text) |
| 825 | +# { |
| 826 | +# print "$key: ". $growth_figures_text {$key} . "\n" ; |
| 827 | +# print HTML $growth_figures_html {$key} ; |
| 828 | +# } |
| 829 | +# close HTML ; |
| 830 | +#} |
| 831 | + |
| 832 | + |
| 833 | +sub GetProjectName |
| 834 | +{ |
| 835 | + my $project =shift ; |
| 836 | + |
| 837 | + if ($project eq "wp") { $project_name = "Wikipedia"; } |
| 838 | + elsif ($project eq "wb") { $project_name = "Wikibooks"; } |
| 839 | + elsif ($project eq "wk") { $project_name = "Wiktionary"; } |
| 840 | + elsif ($project eq "wx") { $project_name = ""; } |
| 841 | + elsif ($project eq "wn") { $project_name = "Wikinews"; } |
| 842 | + elsif ($project eq "wq") { $project_name = "Wikiquote"; } |
| 843 | + elsif ($project eq "ws") { $project_name = "Wikisource"; } |
| 844 | + elsif ($project eq "wv") { $project_name = "Wikiversity"; } |
| 845 | + |
| 846 | + return ($project_name) ; |
| 847 | +} |
| 848 | + |
| 849 | +sub MonthsSinceYearAgo |
| 850 | +{ |
| 851 | + my $year = shift ; |
| 852 | + my $month = shift ; |
| 853 | + return 12 - (($year_0 - $year) * 12 + $month_0 - $month) ; |
| 854 | +} |
| 855 | + |
| 856 | +#sub Log |
| 857 | +#{ |
| 858 | +# $msg = shift ; |
| 859 | +# print $msg ; |
| 860 | +# print LOG $msg ; |
| 861 | +#} |
| 862 | + |
| 863 | +#sub LogT |
| 864 | +#{ |
| 865 | +# $msg = shift ; |
| 866 | +# my ($ss,$mm,$hh) = (localtime (time))[0,1,2] ; |
| 867 | +# my $time = sprintf ("%02d:%02d:%02d ", $hh, $mm, $ss) ; |
| 868 | +# $msg =~ s/^(\n*)/$1$time/s ; |
| 869 | +# &Log ($msg) ; |
| 870 | +#} |
| 871 | + |
| 872 | +sub MmSs |
| 873 | +{ |
| 874 | + my ($ss,$mm,$hh) = (localtime (time))[0,1,2] ; |
| 875 | + return (sprintf ("%02d:%02d:%02d ", $hh, $mm, $ss)) ; |
| 876 | +} |
| 877 | + |
| 878 | +sub Abort |
| 879 | +{ |
| 880 | + my $msg = shift ; |
| 881 | + print "$msg\nExecution aborted." ; |
| 882 | + # to do: log also to file |
| 883 | + exit ; |
| 884 | +} |
| 885 | + |
| 886 | +sub InitProjectNames |
| 887 | +{ |
| 888 | + # copied from WikiReports.pl |
| 889 | + |
| 890 | + %wikipedias = ( |
| 891 | +# mediawiki=>"http://wikimediafoundation.org Wikimedia", |
| 892 | + nostalgia=>"http://nostalgia.wikipedia.org Nostalgia", |
| 893 | + sources=>"http://wikisource.org Old Wikisource", |
| 894 | + meta=>"http://meta.wikimedia.org Meta-Wiki", |
| 895 | + beta=>"http://beta.wikiversity.org Beta", |
| 896 | + species=>"http://species.wikipedia.org WikiSpecies", |
| 897 | + commons=>"http://commons.wikimedia.org Commons", |
| 898 | + foundation=>"http://wikimediafoundation.org Wikimedia Foundation", |
| 899 | + sep11=>"http://sep11.wikipedia.org In Memoriam", |
| 900 | + nlwikimedia=>"http://nl.wikimedia.org Wikimedia Nederland", |
| 901 | + plwikimedia=>"http://pl.wikimedia.org Wikimedia Polska", |
| 902 | + mediawiki=>"http://www.mediawiki.org MediaWiki", |
| 903 | + dewikiversity=>"http://de.wikiversity.org Wikiversität", |
| 904 | + frwikiversity=>"http://fr.wikiversity.org Wikiversität", |
| 905 | + wikimania2005=>"http://wikimania2005.wikimedia.org Wikimania 2005", |
| 906 | + wikimania2006=>"http://wikimania2006.wikimedia.org Wikimania 2006", |
| 907 | + aa=>"http://aa.wikipedia.org Afar", |
| 908 | + ab=>"http://ab.wikipedia.org Abkhazian", |
| 909 | + af=>"http://af.wikipedia.org Afrikaans", |
| 910 | + ak=>"http://ak.wikipedia.org Akan", # was Akana |
| 911 | + als=>"http://als.wikipedia.org Alemannic", # was Elsatian |
| 912 | + am=>"http://am.wikipedia.org Amharic", |
| 913 | + an=>"http://an.wikipedia.org Aragonese", |
| 914 | + ang=>"http://ang.wikipedia.org Anglo-Saxon", |
| 915 | + ar=>"http://ar.wikipedia.org Arabic", |
| 916 | + arc=>"http://arc.wikipedia.org Aramaic", |
| 917 | + as=>"http://as.wikipedia.org Assamese", |
| 918 | + ast=>"http://ast.wikipedia.org Asturian", |
| 919 | + av=>"http://av.wikipedia.org Avar", # was Avienan |
| 920 | + ay=>"http://ay.wikipedia.org Aymara", |
| 921 | + az=>"http://az.wikipedia.org Azeri", # was Azerbaijani |
| 922 | + ba=>"http://ba.wikipedia.org Bashkir", |
| 923 | + bar=>"http://bar.wikipedia.org Bavarian", |
| 924 | + bat_smg=>"http://bat-smg.wikipedia.org Samogitian", |
| 925 | + "bat-smg"=>"http://bat-smg.wikipedia.org Samogitian", |
| 926 | + bcl=>"http://bcl.wikipedia.org Central Bicolano", |
| 927 | + be=>"http://be.wikipedia.org Belarusian", |
| 928 | + "be-x-old"=>"http://be.wikipedia.org Belarusian (Tarashkevitsa)", |
| 929 | + be_x_old=>"http://be.wikipedia.org Belarusian (Tarashkevitsa)", |
| 930 | + bg=>"http://bg.wikipedia.org Bulgarian", |
| 931 | + bh=>"http://bh.wikipedia.org Bihari", |
| 932 | + bi=>"http://bi.wikipedia.org Bislama", |
| 933 | + bm=>"http://bm.wikipedia.org Bambara", |
| 934 | + bn=>"http://bn.wikipedia.org Bengali", |
| 935 | + bo=>"http://bo.wikipedia.org Tibetan", |
| 936 | + bpy=>"http://bpy.wikipedia.org Bishnupriya Manipuri", |
| 937 | + br=>"http://br.wikipedia.org Breton", |
| 938 | + bs=>"http://bs.wikipedia.org Bosnian", |
| 939 | + bug=>"http://bug.wikipedia.org Buginese", |
| 940 | + bxr=>"http://bxr.wikipedia.org Buryat", |
| 941 | + ca=>"http://ca.wikipedia.org Catalan", |
| 942 | + cbk_zam=>"http://cbk-zam.wikipedia.org Chavacano", |
| 943 | + "cbk-zam"=>"http://cbk-zam.wikipedia.org Chavacano", |
| 944 | + cdo=>"http://cdo.wikipedia.org Min Dong", |
| 945 | + ce=>"http://ce.wikipedia.org Chechen", |
| 946 | + ceb=>"http://ceb.wikipedia.org Cebuano", |
| 947 | + ch=>"http://ch.wikipedia.org Chamorro", # was Chamoru |
| 948 | + cho=>"http://cho.wikipedia.org Choctaw", # was Chotaw |
| 949 | + chr=>"http://chr.wikipedia.org Cherokee", |
| 950 | + chy=>"http://chy.wikipedia.org Cheyenne", # was Setsêhestâhese |
| 951 | + co=>"http://co.wikipedia.org Corsican", |
| 952 | + cr=>"http://cr.wikipedia.org Cree", |
| 953 | + crh=>"http://crh.wikipedia.org Crimean Tatar", |
| 954 | + cs=>"http://cs.wikipedia.org Czech", |
| 955 | + csb=>"http://csb.wikipedia.org Cashubian", # was Kashubian |
| 956 | + cu=>"http://cv.wikipedia.org Old Church Slavonic", |
| 957 | + cv=>"http://cv.wikipedia.org Chuvash", # was Cavas |
| 958 | + cy=>"http://cy.wikipedia.org Welsh", |
| 959 | + da=>"http://da.wikipedia.org Danish", |
| 960 | + de=>"http://de.wikipedia.org German", |
| 961 | + diq=>"http://diq.wikipedia.org Zazaki", |
| 962 | + dk=>"http://dk.wikipedia.org Danish", |
| 963 | + dsb=>"http://dsb.wikipedia.org Lower Sorbian", |
| 964 | + dv=>"http://dv.wikipedia.org Divehi", |
| 965 | + dz=>"http://dz.wikipedia.org Dzongkha", |
| 966 | + ee=>"http://ee.wikipedia.org Ewe", |
| 967 | + el=>"http://el.wikipedia.org Greek", |
| 968 | + eml=>"http://eml.wikipedia.org Emilian-Romagnol", |
| 969 | + en=>"http://en.wikipedia.org English", |
| 970 | + eo=>"http://eo.wikipedia.org Esperanto", |
| 971 | + es=>"http://es.wikipedia.org Spanish", |
| 972 | + et=>"http://et.wikipedia.org Estonian", |
| 973 | + eu=>"http://eu.wikipedia.org Basque", |
| 974 | + ext=>"http://ext.wikipedia.org Extremaduran", |
| 975 | + fa=>"http://fa.wikipedia.org Persian", |
| 976 | + ff=>"http://ff.wikipedia.org Fulfulde", |
| 977 | + fi=>"http://fi.wikipedia.org Finnish", |
| 978 | + "fiu-vro"=>"http://fiu-vro.wikipedia.org Voro", |
| 979 | + fiu_vro=>"http://fiu-vro.wikipedia.org Voro", |
| 980 | + fj=>"http://fj.wikipedia.org Fijian", |
| 981 | + fo=>"http://fo.wikipedia.org Faroese", # was Faeroese |
| 982 | + fr=>"http://fr.wikipedia.org French", |
| 983 | + frp=>"http://frp.wikipedia.org Arpitan", |
| 984 | + fur=>"http://fur.wikipedia.org Friulian", |
| 985 | + fy=>"http://fy.wikipedia.org Frisian", |
| 986 | + ga=>"http://ga.wikipedia.org Irish", |
| 987 | + gan=>"http://gan.wikipedia.org Gan", |
| 988 | + gay=>"http://gay.wikipedia.org Gayo", |
| 989 | + gd=>"http://gd.wikipedia.org Scots Gaelic", # was Scottish Gaelic |
| 990 | + gl=>"http://gl.wikipedia.org Galician", # was Galego |
| 991 | + glk=>"http://glk.wikipedia.org Gilaki", |
| 992 | + gn=>"http://gn.wikipedia.org Guarani", |
| 993 | + got=>"http://got.wikipedia.org Gothic", |
| 994 | + gu=>"http://gu.wikipedia.org Gujarati", |
| 995 | + gv=>"http://gv.wikipedia.org Manx", # was Manx Gaelic |
| 996 | + ha=>"http://ha.wikipedia.org Hausa", |
| 997 | + hak=>"http://hak.wikipedia.org Hakka", |
| 998 | + haw=>"http://haw.wikipedia.org Hawai'ian", # was Hawaiian |
| 999 | + he=>"http://he.wikipedia.org Hebrew", |
| 1000 | + hi=>"http://hi.wikipedia.org Hindi", |
| 1001 | + hif=>"http://hif.wikipedia.org Fiji Hindi", |
| 1002 | + ho=>"http://ho.wikipedia.org Hiri Motu", |
| 1003 | + hr=>"http://hr.wikipedia.org Croatian", |
| 1004 | + hsb=>"http://hsb.wikipedia.org Upper Sorbian", |
| 1005 | + ht=>"http://ht.wikipedia.org Haitian", |
| 1006 | + hu=>"http://hu.wikipedia.org Hungarian", |
| 1007 | + hy=>"http://hy.wikipedia.org Armenian", |
| 1008 | + hz=>"http://hz.wikipedia.org Herero", |
| 1009 | + ia=>"http://ia.wikipedia.org Interlingua", |
| 1010 | + iba=>"http://iba.wikipedia.org Iban", |
| 1011 | + id=>"http://id.wikipedia.org Indonesian", |
| 1012 | + ie=>"http://ie.wikipedia.org Interlingue", |
| 1013 | + ig=>"http://ig.wikipedia.org Igbo", |
| 1014 | + ii=>"http://ii.wikipedia.org Yi", |
| 1015 | + ik=>"http://ik.wikipedia.org Inupiak", |
| 1016 | + ilo=>"http://ilo.wikipedia.org Ilokano", |
| 1017 | + io=>"http://io.wikipedia.org Ido", |
| 1018 | + is=>"http://is.wikipedia.org Icelandic", |
| 1019 | + it=>"http://it.wikipedia.org Italian", |
| 1020 | + iu=>"http://iu.wikipedia.org Inuktitut", |
| 1021 | + ja=>"http://ja.wikipedia.org Japanese", |
| 1022 | + jbo=>"http://jbo.wikipedia.org Lojban", |
| 1023 | + jv=>"http://jv.wikipedia.org Javanese", |
| 1024 | + ka=>"http://ka.wikipedia.org Georgian", |
| 1025 | + kaa=>"http://kaa.wikipedia.org Karakalpak", |
| 1026 | + kab=>"http://ka.wikipedia.org Kabyle", |
| 1027 | + kaw=>"http://kaw.wikipedia.org Kawi", |
| 1028 | + kg=>"http://kg.wikipedia.org Kongo", |
| 1029 | + ki=>"http://ki.wikipedia.org Kikuyu", |
| 1030 | + kj=>"http://kj.wikipedia.org Kuanyama", # was Otjiwambo |
| 1031 | + kk=>"http://kk.wikipedia.org Kazakh", |
| 1032 | + kl=>"http://kl.wikipedia.org Greenlandic", |
| 1033 | + km=>"http://km.wikipedia.org Khmer", # was Cambodian |
| 1034 | + kn=>"http://kn.wikipedia.org Kannada", |
| 1035 | + ko=>"http://ko.wikipedia.org Korean", |
| 1036 | + kr=>"http://kr.wikipedia.org Kanuri", |
| 1037 | + ks=>"http://ks.wikipedia.org Kashmiri", |
| 1038 | + ksh=>"http://ksh.wikipedia.org Ripuarian", |
| 1039 | + ku=>"http://ku.wikipedia.org Kurdish", |
| 1040 | + kv=>"http://kv.wikipedia.org Komi", |
| 1041 | + kw=>"http://kw.wikipedia.org Cornish", # was Kornish |
| 1042 | + ky=>"http://ky.wikipedia.org Kirghiz", |
| 1043 | + la=>"http://la.wikipedia.org Latin", |
| 1044 | + lad=>"http://lad.wikipedia.org Ladino", |
| 1045 | + lb=>"http://lb.wikipedia.org Luxembourgish", # was Letzeburgesch |
| 1046 | + lbe=>"http://lbe.wikipedia.org Lak", |
| 1047 | + lg=>"http://lg.wikipedia.org Ganda", |
| 1048 | + li=>"http://li.wikipedia.org Limburgish", |
| 1049 | + lij=>"http://lij.wikipedia.org Ligurian", |
| 1050 | + lmo=>"http://lmo.wikipedia.org Lombard", |
| 1051 | + ln=>"http://ln.wikipedia.org Lingala", |
| 1052 | + lo=>"http://lo.wikipedia.org Laotian", |
| 1053 | + ls=>"http://ls.wikipedia.org Latino Sine Flexione", |
| 1054 | + lt=>"http://lt.wikipedia.org Lithuanian", |
| 1055 | + lv=>"http://lv.wikipedia.org Latvian", |
| 1056 | + mad=>"http://mad.wikipedia.org Madurese", |
| 1057 | + mak=>"http://mak.wikipedia.org Makasar", |
| 1058 | + map_bms=>"http://map-bms.wikipedia.org Banyumasan", |
| 1059 | + "map-bms"=>"http://map-bms.wikipedia.org Banyumasan", |
| 1060 | + mdf=>"http://mdf.wikipedia.org Moksha", |
| 1061 | + mg=>"http://mg.wikipedia.org Malagasy", |
| 1062 | + mh=>"http://mh.wikipedia.org Marshallese", |
| 1063 | + mi=>"http://mi.wikipedia.org Maori", |
| 1064 | + min=>"http://min.wikipedia.org Minangkabau", |
| 1065 | + minnan=>"http://minnan.wikipedia.org Minnan", |
| 1066 | + mk=>"http://mk.wikipedia.org Macedonian", |
| 1067 | + ml=>"http://ml.wikipedia.org Malayalam", |
| 1068 | + mn=>"http://mn.wikipedia.org Mongolian", |
| 1069 | + mo=>"http://mo.wikipedia.org Moldavian", |
| 1070 | + mr=>"http://mr.wikipedia.org Marathi", |
| 1071 | + ms=>"http://ms.wikipedia.org Malay", |
| 1072 | + mt=>"http://mt.wikipedia.org Maltese", |
| 1073 | + mus=>"http://mus.wikipedia.org Muskogee", |
| 1074 | + my=>"http://my.wikipedia.org Burmese", |
| 1075 | + myv=>"http://myv.wikipedia.org Erzya", |
| 1076 | + mzn=>"http://mzn.wikipedia.org Mazandarani", |
| 1077 | + na=>"http://na.wikipedia.org Nauruan", # was Nauru |
| 1078 | + nah=>"http://nah.wikipedia.org Nahuatl", |
| 1079 | + nap=>"http://nap.wikipedia.org Neapolitan", |
| 1080 | + nds=>"http://nds.wikipedia.org Low Saxon", |
| 1081 | + nds_nl=>"http://nds-nl.wikipedia.org Dutch Low Saxon", |
| 1082 | + "nds-nl"=>"http://nds-nl.wikipedia.org Dutch Low Saxon", |
| 1083 | + ne=>"http://ne.wikipedia.org Nepali", |
| 1084 | + new=>"http://new.wikipedia.org Nepal Bhasa", |
| 1085 | + ng=>"http://ng.wikipedia.org Ndonga", |
| 1086 | + nl=>"http://nl.wikipedia.org Dutch", |
| 1087 | + nov=>"http://nov.wikipedia.org Novial", |
| 1088 | + nrm=>"http://nrm.wikipedia.org Norman", |
| 1089 | + nn=>"http://nn.wikipedia.org Nynorsk", # was Neo-Norwegian |
| 1090 | + no=>"http://no.wikipedia.org Norwegian", |
| 1091 | + nv=>"http://nv.wikipedia.org Navajo", # was Avayo |
| 1092 | + ny=>"http://ny.wikipedia.org Chichewa", |
| 1093 | + oc=>"http://oc.wikipedia.org Occitan", |
| 1094 | + om=>"http://om.wikipedia.org Oromo", |
| 1095 | + or=>"http://or.wikipedia.org Oriya", |
| 1096 | + os=>"http://os.wikipedia.org Ossetic", |
| 1097 | + pa=>"http://pa.wikipedia.org Punjabi", |
| 1098 | + pag=>"http://pag.wikipedia.org Pangasinan", |
| 1099 | + pam=>"http://pam.wikipedia.org Kapampangan", |
| 1100 | + pap=>"http://pap.wikipedia.org Papiamentu", |
| 1101 | + pdc=>"http://pdc.wikipedia.org Pennsylvania German", |
| 1102 | + pi=>"http://pi.wikipedia.org Pali", |
| 1103 | + pih=>"http://pih.wikipedia.org Norfolk", |
| 1104 | + pl=>"http://pl.wikipedia.org Polish", |
| 1105 | + pms=>"http://pms.wikipedia.org Piedmontese", |
| 1106 | + ps=>"http://ps.wikipedia.org Pashto", |
| 1107 | + pt=>"http://pt.wikipedia.org Portuguese", |
| 1108 | + qu=>"http://qu.wikipedia.org Quechua", |
| 1109 | + rm=>"http://rm.wikipedia.org Romansh", # was Rhaeto-Romance |
| 1110 | + rmy=>"http://rmy.wikipedia.org Romani", |
| 1111 | + rn=>"http://rn.wikipedia.org Kirundi", |
| 1112 | + ro=>"http://ro.wikipedia.org Romanian", |
| 1113 | + roa_rup=>"http://roa-rup.wikipedia.org Aromanian", |
| 1114 | + "roa-rup"=>"http://roa-rup.wikipedia.org Aromanian", |
| 1115 | + roa_tara=>"http://roa-tara.wikipedia.org Tarantino", |
| 1116 | + "roa-tara"=>"http://roa-tara.wikipedia.org Tarantino", |
| 1117 | + ru=>"http://ru.wikipedia.org Russian", |
| 1118 | + ru_sib=>"http://ru-sib.wikipedia.org Siberian", |
| 1119 | + "ru-sib"=>"http://ru-sib.wikipedia.org Siberian", |
| 1120 | + rw=>"http://rw.wikipedia.org Kinyarwanda", |
| 1121 | + sa=>"http://sa.wikipedia.org Sanskrit", |
| 1122 | + sah=>"http://sah.wikipedia.org Sakha", |
| 1123 | + sc=>"http://sc.wikipedia.org Sardinian", |
| 1124 | + scn=>"http://scn.wikipedia.org Sicilian", |
| 1125 | + sco=>"http://sco.wikipedia.org Scots", |
| 1126 | + sd=>"http://sd.wikipedia.org Sindhi", |
| 1127 | + se=>"http://se.wikipedia.org Northern Sami", |
| 1128 | + sg=>"http://sg.wikipedia.org Sangro", |
| 1129 | + sh=>"http://sh.wikipedia.org Serbo-Croatian", |
| 1130 | + si=>"http://si.wikipedia.org Sinhala", # was Singhalese |
| 1131 | + simple=>"http://simple.wikipedia.org Simple English", |
| 1132 | + sk=>"http://sk.wikipedia.org Slovak", |
| 1133 | + sl=>"http://sl.wikipedia.org Slovene", |
| 1134 | + sm=>"http://sm.wikipedia.org Samoan", |
| 1135 | + sn=>"http://sn.wikipedia.org Shona", |
| 1136 | + so=>"http://so.wikipedia.org Somali", # was Somalian |
| 1137 | + sq=>"http://sq.wikipedia.org Albanian", |
| 1138 | + sr=>"http://sr.wikipedia.org Serbian", |
| 1139 | + srn=>"http://srn.wikipedia.org Sranan", |
| 1140 | + ss=>"http://ss.wikipedia.org Siswati", |
| 1141 | + st=>"http://st.wikipedia.org Sesotho", |
| 1142 | + stq=>"http://stq.wikipedia.org Saterland Frisian", |
| 1143 | + su=>"http://su.wikipedia.org Sundanese", |
| 1144 | + sv=>"http://sv.wikipedia.org Swedish", |
| 1145 | + sw=>"http://sw.wikipedia.org Swahili", |
| 1146 | + szl=>"http://szl.wikipedia.org Silesian", |
| 1147 | + ta=>"http://ta.wikipedia.org Tamil", |
| 1148 | + te=>"http://te.wikipedia.org Telugu", |
| 1149 | + test=>"http://test.wikipedia.org Test", |
| 1150 | + tet=>"http://tet.wikipedia.org Tetum", |
| 1151 | + tg=>"http://tg.wikipedia.org Tajik", |
| 1152 | + th=>"http://th.wikipedia.org Thai", |
| 1153 | + ti=>"http://ti.wikipedia.org Tigrinya", |
| 1154 | + tk=>"http://tk.wikipedia.org Turkmen", |
| 1155 | + tl=>"http://tl.wikipedia.org Tagalog", |
| 1156 | + tlh=>"http://tlh.wikipedia.org Klingon", # was Klignon |
| 1157 | + tn=>"http://tn.wikipedia.org Setswana", |
| 1158 | + to=>"http://to.wikipedia.org Tongan", |
| 1159 | + tokipona=>"http://tokipona.wikipedia.org Tokipona", |
| 1160 | + tpi=>"http://tpi.wikipedia.org Tok Pisin", |
| 1161 | + tr=>"http://tr.wikipedia.org Turkish", |
| 1162 | + ts=>"http://ts.wikipedia.org Tsonga", |
| 1163 | + tt=>"http://tt.wikipedia.org Tatar", |
| 1164 | + tum=>"http://tum.wikipedia.org Tumbuka", |
| 1165 | + turn=>"http://turn.wikipedia.org Turnbuka", |
| 1166 | + tw=>"http://tw.wikipedia.org Twi", |
| 1167 | + ty=>"http://ty.wikipedia.org Tahitian", |
| 1168 | + udm=>"http://udm.wikipedia.org Udmurt", |
| 1169 | + ug=>"http://ug.wikipedia.org Uighur", |
| 1170 | + uk=>"http://uk.wikipedia.org Ukrainian", |
| 1171 | + ur=>"http://ur.wikipedia.org Urdu", |
| 1172 | + uz=>"http://uz.wikipedia.org Uzbek", |
| 1173 | + ve=>"http://ve.wikipedia.org Venda", # was Lushaka |
| 1174 | + vec=>"http://vec.wikipedia.org Venetian", |
| 1175 | + vi=>"http://vi.wikipedia.org Vietnamese", |
| 1176 | + vls=>"http://vls.wikipedia.org West Flemish", |
| 1177 | + vo=>"http://vo.wikipedia.org Volapük", |
| 1178 | + wa=>"http://wa.wikipedia.org Walloon", |
| 1179 | + war=>"http://war.wikipedia.org Waray-Waray", |
| 1180 | + wo=>"http://wo.wikipedia.org Wolof", |
| 1181 | + wuu=>"http://wuu.wikipedia.org Wu", |
| 1182 | + xal=>"http://xal.wikipedia.org Kalmyk", |
| 1183 | + xh=>"http://xh.wikipedia.org Xhosa", |
| 1184 | + yi=>"http://yi.wikipedia.org Yiddish", |
| 1185 | + yo=>"http://yo.wikipedia.org Yoruba", |
| 1186 | + za=>"http://za.wikipedia.org Zhuang", |
| 1187 | + zea=>"http://zea.wikipedia.org Zealandic", |
| 1188 | + zh=>"http://zh.wikipedia.org Chinese", |
| 1189 | + zh_min_nan=>"http://zh-min-nan.wikipedia.org Min Nan", |
| 1190 | + "zh-min-nan"=>"http://zh-min-nan.wikipedia.org Min Nan", |
| 1191 | + zh_classical=>"http://zh-classical.wikipedia.org Classical Chinese", |
| 1192 | + "zh-classical"=>"http://zh-classical.wikipedia.org Classical Chinese", |
| 1193 | + zh_yue=>"http://zh-yue.wikipedia.org Cantonese", |
| 1194 | + "zh-yue"=>"http://zh-yue.wikipedia.org Cantonese", |
| 1195 | + zu=>"http://zu.wikipedia.org Zulu", |
| 1196 | + zz=>" All languages", |
| 1197 | + zzz=>" All languages except English" |
| 1198 | + ); |
| 1199 | + |
| 1200 | + foreach $key (keys %wikipedias) |
| 1201 | + { |
| 1202 | + my $wikipedia = $wikipedias {$key} ; |
| 1203 | + $out_urls {$key} = $wikipedia ; |
| 1204 | + $out_languages {$key} = $wikipedia ; |
| 1205 | + $out_urls {$key} =~ s/(^[^\s]+).*$/$1/ ; |
| 1206 | + $out_languages {$key} =~ s/^[^\s]+\s+(.*)$/$1/ ; |
| 1207 | + $out_article {$key} = "http://en.wikipedia.org/wiki/" . $out_languages {$key} . "_language" ; |
| 1208 | + $out_article {$key} =~ s/ /_/g ; |
| 1209 | + $out_urls {$key} =~ s/(^[^\s]+).*$/$1/ ; |
| 1210 | + } |
| 1211 | +} |
| 1212 | + |
| 1213 | +# copied from WikiReports_EN.pl |
| 1214 | +sub InitReportNames |
| 1215 | +{ |
| 1216 | + @out_report_descriptions = ( |
| 1217 | + "Contributors", |
| 1218 | + "New editors", |
| 1219 | + "Active editors", |
| 1220 | + "Very active editors", |
| 1221 | + "Article count (official)", |
| 1222 | + "Article count (alternate)", |
| 1223 | + "New articles per day", |
| 1224 | + "Edits per article", |
| 1225 | + "Bytes per article", |
| 1226 | + "Articles over 0.5 Kb", |
| 1227 | + "Articles over 2 Kb", |
| 1228 | + "Edits per month", |
| 1229 | + "Database size", |
| 1230 | + "Words", |
| 1231 | + "Internal links", |
| 1232 | + "Links to other Wikipedias", |
| 1233 | + "Binaries", |
| 1234 | + "External links", |
| 1235 | + "Redirects", |
| 1236 | + "Page requests per day", |
| 1237 | + "Visits per day", |
| 1238 | + "Overview recent months" |
| 1239 | + ) ; |
| 1240 | +} |
| 1241 | + |
Property changes on: trunk/wikistats/reportcard/ReportCardExtractWikiCountsOutputYearly.pl |
___________________________________________________________________ |
Added: svn:eol-style |
1242 | 1242 | + native |
Index: trunk/wikistats/reportcard/StatisticsMonthlyFilter.pl |
— | — | @@ -1,101 +1,101 @@ |
2 | | -#!/usr/bin/perl
|
3 | | -
|
4 | | -# scratchpad script, kept for reuse
|
5 | | -
|
6 | | - use CGI::Carp qw(fatalsToBrowser);
|
7 | | - use Time::Local ;
|
8 | | -
|
9 | | - open IN, "<", "StatisticsMonthly.csv" ;
|
10 | | - open OUT, ">", "StatisticsMonthlyExtract.csv" ;
|
11 | | -
|
12 | | - my ($sec,$min,$hour,$day,$month,$year,$wday,$yday,$isdst) = localtime (time);
|
13 | | - $month += 1 ;
|
14 | | - $year += 1900 ;
|
15 | | - print "Now: " . sprintf ("%04d:%02d:%02d\n", $year, $month, $day) ;
|
16 | | -
|
17 | | - $month-- ;
|
18 | | - if ($month < 1) { $month == 12 ; $year-- ; }
|
19 | | - print "Extract from " . sprintf ("%04d:%02d", $year-1, $month) . " till " . sprintf ("%04d:%02d", $year, $month) . "\n" ;
|
20 | | -
|
21 | | - $month_lo = $month ;
|
22 | | - $year_lo = $year - 1 ;
|
23 | | -
|
24 | | - $time_lo = timegm (0,0,0,1 ,$month-1,$year-1-1900) ;
|
25 | | - $time_hi = timegm (0,0,0,&daysinmonth($year,$month),$month-1,$year-1900) ;
|
26 | | -
|
27 | | - while ($line = <IN>)
|
28 | | - {
|
29 | | - ($wp, $date, $u1, $u2, $u3, $u4, $articles) = split (',', $line) ;
|
30 | | - if ($articles > $max_articles {$wp})
|
31 | | - { $max_articles {$wp} = $articles ; }
|
32 | | - }
|
33 | | - close IN ;
|
34 | | -
|
35 | | - $wikis = 0 ;
|
36 | | - foreach $wp (sort {$max_articles {$b} <=> $max_articles {$a}} keys %max_articles)
|
37 | | - {
|
38 | | - if (++$wikis > 25) { last ; }
|
39 | | - print "$wp: " . $max_articles {$wp} . "\n" ;
|
40 | | - $filter_wikis {$wp} ++ ;
|
41 | | - }
|
42 | | -
|
43 | | - open IN, "<", "StatisticsMonthly.csv" ;
|
44 | | - while ($line = <IN>)
|
45 | | - {
|
46 | | - ($wp, $date, $u1, $u2, $u3, $u4, $articles) = split (',', $line) ;
|
47 | | - if ($filter_wikis {$wp} == 0) { next ; }
|
48 | | -
|
49 | | - $year = substr ($date,6,4) ;
|
50 | | - $month = substr ($date,0,2) ;
|
51 | | - $day = substr ($date,3,2) ;
|
52 | | -
|
53 | | - $time = timegm (0,0,0,$day,$month-1,$year-1900) ;
|
54 | | -
|
55 | | - if (($time < $time_lo) || ($time > $time_hi)){ next ; }
|
56 | | - # print "$wp $date\n" ;
|
57 | | - $articles {"$wp:$date"} = $articles ;
|
58 | | - }
|
59 | | - close IN ;
|
60 | | -
|
61 | | -
|
62 | | - $wikis = 0 ;
|
63 | | - foreach $wp (sort {$max_articles {$b} <=> $max_articles {$a}} keys %max_articles)
|
64 | | - {
|
65 | | - if (++$wikis > 25) { last ; }
|
66 | | - $month = $month_lo ;
|
67 | | - $year = $year_lo ;
|
68 | | - $line = "$wp," ;
|
69 | | - for ($m = 0 ; $m <= 12 ; $m++)
|
70 | | - {
|
71 | | - $date = sprintf ("%02d/%02d/%04d", $month, &daysinmonth($year,$month), $year) ;
|
72 | | - $count = $articles {"$wp:$date"} + 0 ; # force numeric
|
73 | | - $line .= "$count," ;
|
74 | | - $month++ ;
|
75 | | - if ($month > 12)
|
76 | | - { $month = 1 ; $year ++ ; }
|
77 | | - }
|
78 | | - $line =~ s/,$// ;
|
79 | | - print OUT "$line\n" ;
|
80 | | - }
|
81 | | -
|
82 | | -
|
83 | | - print "\nReady\n\n" ;
|
84 | | - exit ;
|
85 | | -
|
86 | | -sub daysinmonth
|
87 | | -{
|
88 | | - my $year = shift ;
|
89 | | - my $month = shift ;
|
90 | | - if ($month == 0)
|
91 | | - { return (0) ; }
|
92 | | - my $timegm1 = timegm (0,0,0,1,$month-1,$year-1900) ;
|
93 | | - $month++ ;
|
94 | | - if ($month > 12)
|
95 | | - { $month = 1 ; $year++ }
|
96 | | - my $timegm2 = timegm (0,0,0,1,$month-1,$year-1900) ;
|
97 | | - my $days = ($timegm2-$timegm1) / (24*60*60) ;
|
98 | | - return ($days) ;
|
99 | | -}
|
100 | | -
|
101 | | -
|
102 | | -
|
| 2 | +#!/usr/bin/perl |
| 3 | + |
| 4 | +# scratchpad script, kept for reuse |
| 5 | + |
| 6 | + use CGI::Carp qw(fatalsToBrowser); |
| 7 | + use Time::Local ; |
| 8 | + |
| 9 | + open IN, "<", "StatisticsMonthly.csv" ; |
| 10 | + open OUT, ">", "StatisticsMonthlyExtract.csv" ; |
| 11 | + |
| 12 | + my ($sec,$min,$hour,$day,$month,$year,$wday,$yday,$isdst) = localtime (time); |
| 13 | + $month += 1 ; |
| 14 | + $year += 1900 ; |
| 15 | + print "Now: " . sprintf ("%04d:%02d:%02d\n", $year, $month, $day) ; |
| 16 | + |
| 17 | + $month-- ; |
| 18 | + if ($month < 1) { $month == 12 ; $year-- ; } |
| 19 | + print "Extract from " . sprintf ("%04d:%02d", $year-1, $month) . " till " . sprintf ("%04d:%02d", $year, $month) . "\n" ; |
| 20 | + |
| 21 | + $month_lo = $month ; |
| 22 | + $year_lo = $year - 1 ; |
| 23 | + |
| 24 | + $time_lo = timegm (0,0,0,1 ,$month-1,$year-1-1900) ; |
| 25 | + $time_hi = timegm (0,0,0,&daysinmonth($year,$month),$month-1,$year-1900) ; |
| 26 | + |
| 27 | + while ($line = <IN>) |
| 28 | + { |
| 29 | + ($wp, $date, $u1, $u2, $u3, $u4, $articles) = split (',', $line) ; |
| 30 | + if ($articles > $max_articles {$wp}) |
| 31 | + { $max_articles {$wp} = $articles ; } |
| 32 | + } |
| 33 | + close IN ; |
| 34 | + |
| 35 | + $wikis = 0 ; |
| 36 | + foreach $wp (sort {$max_articles {$b} <=> $max_articles {$a}} keys %max_articles) |
| 37 | + { |
| 38 | + if (++$wikis > 25) { last ; } |
| 39 | + print "$wp: " . $max_articles {$wp} . "\n" ; |
| 40 | + $filter_wikis {$wp} ++ ; |
| 41 | + } |
| 42 | + |
| 43 | + open IN, "<", "StatisticsMonthly.csv" ; |
| 44 | + while ($line = <IN>) |
| 45 | + { |
| 46 | + ($wp, $date, $u1, $u2, $u3, $u4, $articles) = split (',', $line) ; |
| 47 | + if ($filter_wikis {$wp} == 0) { next ; } |
| 48 | + |
| 49 | + $year = substr ($date,6,4) ; |
| 50 | + $month = substr ($date,0,2) ; |
| 51 | + $day = substr ($date,3,2) ; |
| 52 | + |
| 53 | + $time = timegm (0,0,0,$day,$month-1,$year-1900) ; |
| 54 | + |
| 55 | + if (($time < $time_lo) || ($time > $time_hi)){ next ; } |
| 56 | + # print "$wp $date\n" ; |
| 57 | + $articles {"$wp:$date"} = $articles ; |
| 58 | + } |
| 59 | + close IN ; |
| 60 | + |
| 61 | + |
| 62 | + $wikis = 0 ; |
| 63 | + foreach $wp (sort {$max_articles {$b} <=> $max_articles {$a}} keys %max_articles) |
| 64 | + { |
| 65 | + if (++$wikis > 25) { last ; } |
| 66 | + $month = $month_lo ; |
| 67 | + $year = $year_lo ; |
| 68 | + $line = "$wp," ; |
| 69 | + for ($m = 0 ; $m <= 12 ; $m++) |
| 70 | + { |
| 71 | + $date = sprintf ("%02d/%02d/%04d", $month, &daysinmonth($year,$month), $year) ; |
| 72 | + $count = $articles {"$wp:$date"} + 0 ; # force numeric |
| 73 | + $line .= "$count," ; |
| 74 | + $month++ ; |
| 75 | + if ($month > 12) |
| 76 | + { $month = 1 ; $year ++ ; } |
| 77 | + } |
| 78 | + $line =~ s/,$// ; |
| 79 | + print OUT "$line\n" ; |
| 80 | + } |
| 81 | + |
| 82 | + |
| 83 | + print "\nReady\n\n" ; |
| 84 | + exit ; |
| 85 | + |
| 86 | +sub daysinmonth |
| 87 | +{ |
| 88 | + my $year = shift ; |
| 89 | + my $month = shift ; |
| 90 | + if ($month == 0) |
| 91 | + { return (0) ; } |
| 92 | + my $timegm1 = timegm (0,0,0,1,$month-1,$year-1900) ; |
| 93 | + $month++ ; |
| 94 | + if ($month > 12) |
| 95 | + { $month = 1 ; $year++ } |
| 96 | + my $timegm2 = timegm (0,0,0,1,$month-1,$year-1900) ; |
| 97 | + my $days = ($timegm2-$timegm1) / (24*60*60) ; |
| 98 | + return ($days) ; |
| 99 | +} |
| 100 | + |
| 101 | + |
| 102 | + |
Property changes on: trunk/wikistats/reportcard/StatisticsMonthlyFilter.pl |
___________________________________________________________________ |
Added: svn:eol-style |
103 | 103 | + native |
Property changes on: trunk/wikistats/reportcard/ComScoreTop1000.pl |
___________________________________________________________________ |
Added: svn:eol-style |
104 | 104 | + native |
Index: trunk/wikistats/reportcard/ReportCardExtractWikiCountsOutput.pl |
— | — | @@ -1,1185 +1,1185 @@ |
2 | | -#!/usr/local/bin/perl
|
3 | | -
|
4 | | - use lib "/home/ezachte/lib" ;
|
5 | | - use EzLib ;
|
6 | | - $trace_on_exit = $true ;
|
7 | | - ez_lib_version (2) ;
|
8 | | -
|
9 | | - $month_last = "12" ;
|
10 | | - $year_last = 2010 ;
|
11 | | -
|
12 | | - $month_start = "1" ;
|
13 | | - $year_start = 2008 ;
|
14 | | -
|
15 | | - $m_start = &months_since_2000_01 ($year_start, $month_start) ;
|
16 | | - $m_last = &months_since_2000_01 ($year_last, $month_last) ;
|
17 | | - $m_last_12 = $m_last - 12 ;
|
18 | | - $m_last_1 = $m_last - 1 ;
|
19 | | -
|
20 | | - $month_last = sprintf ("%02d", $month_last) ; # 1 -> 01
|
21 | | -
|
22 | | - # set defaults mainly for tests on local machine
|
23 | | - default_argv "-i 'W:/# Out Bayes'|-o 'W:/@ Report Card/Data'" ;
|
24 | | -
|
25 | | - use Getopt::Std ;
|
26 | | -
|
27 | | -# $file_regions_UV = "Multi-Country Media Trend, UVs by region (July 2008 - September 2009)_27290.csv" ;
|
28 | | -# $file_regions_Reach = "Multi-Country Media Trend, % reach by region (July 2008 - September 2009)_10786.csv" ;
|
29 | | -
|
30 | | - $maxpopularwikis = 25 ;
|
31 | | - @projects = ('wb','wk','wn','wp','wq','ws','wv','wx','commons','*') ;
|
32 | | -
|
33 | | - &LogArguments ;
|
34 | | - &ParseArguments ;
|
35 | | - &InitProjectNames ;
|
36 | | - &InitReportNames ;
|
37 | | - &ReadStatisticsMonthly ;
|
38 | | - &WriteMonthlyData ;
|
39 | | - exit ;
|
40 | | -
|
41 | | -sub LogArguments
|
42 | | -{
|
43 | | - my $arguments ;
|
44 | | - getopt ("iolpft", \%options) ;
|
45 | | - foreach $arg (sort keys %options)
|
46 | | - { $arguments .= " -$arg " . $options {$arg} . "\n" ; }
|
47 | | - print ("\nArguments\n$arguments\n") ;
|
48 | | -# &Log ("\nArguments\n$arguments\n") ;
|
49 | | -}
|
50 | | -
|
51 | | -sub ParseArguments
|
52 | | -{
|
53 | | -# my @options ;
|
54 | | -# getopt ("io", \%options) ;
|
55 | | -
|
56 | | -# die ("Specify input folder for projectcounts files as: -i path") if (! defined ($options {"i"})) ;
|
57 | | -# die ("Specify output folder as: -o path'") if (! defined ($options {"o"})) ;
|
58 | | -
|
59 | | -# $path_in = $options {"i"} ;
|
60 | | -# $path_out = $options {"o"} ;
|
61 | | -
|
62 | | -# die "Input folder '$path_in' does not exist" if (! -d $path_in) ;
|
63 | | -# die "Output folder '$path_out' does not exist" if (! -d $path_out) ;
|
64 | | -
|
65 | | - $path_in = "w:/# out bayes" ;
|
66 | | - $path_out = "w:/@ report card/data" ;
|
67 | | -
|
68 | | - print "Input folder: $path_in\n" ;
|
69 | | - print "Output folder: $path_out\n" ;
|
70 | | - print "\n" ;
|
71 | | -
|
72 | | - $file_csv_out = "$path_out/StatisticsMonthly_${year_last}_${month_last}.csv" ;
|
73 | | -
|
74 | | - &SetComparisonPeriods ;
|
75 | | -}
|
76 | | -
|
77 | | -sub ReadStatisticsMonthly
|
78 | | -{
|
79 | | - &ReadStatisticsMonthlyForProject ("wb") ;
|
80 | | - &ReadStatisticsMonthlyForProject ("wk") ;
|
81 | | - &ReadStatisticsMonthlyForProject ("wn") ;
|
82 | | - &ReadStatisticsMonthlyForProject ("wp") ;
|
83 | | - &ReadStatisticsMonthlyForProject ("wq") ;
|
84 | | - &ReadStatisticsMonthlyForProject ("ws") ;
|
85 | | - &ReadStatisticsMonthlyForProject ("wv") ;
|
86 | | - &ReadStatisticsMonthlyForProject ("wx") ;
|
87 | | -
|
88 | | - &ReadStatisticsPerBinariesExtensionCommons ;
|
89 | | -}
|
90 | | -
|
91 | | -sub ReadStatisticsMonthlyForProject
|
92 | | -{
|
93 | | - my $project = shift;
|
94 | | -
|
95 | | - $all_projects = "*" ;
|
96 | | -
|
97 | | - my $file_csv_in_1 = "$path_in/csv_$project/StatisticsMonthly.csv" ;
|
98 | | - my $file_csv_in_2 = "$path_in/csv_$project/StatisticsUserActivitySpread.csv" ;
|
99 | | -
|
100 | | - if (! -e $file_csv_in_1)
|
101 | | - { &Abort ("Input file '$file_csv_in_1' not found") ; }
|
102 | | - if (! -e $file_csv_in_2)
|
103 | | - { &Abort ("Input file '$file_csv_in_2' not found") ; }
|
104 | | -
|
105 | | - print "Read '$file_csv_in_1'\n" ;
|
106 | | - open CSV_IN, '<', $file_csv_in_1 ;
|
107 | | -
|
108 | | - undef %lines ;
|
109 | | - while ($line = <CSV_IN>)
|
110 | | - {
|
111 | | - ($language,$date,$counts) = split (',', $line, 3) ;
|
112 | | -
|
113 | | - next if $language eq 'commons' and $project ne 'wx' ;
|
114 | | - next if $language eq 'sr' and $project eq 'wn' ; # ignore insane bot spam on
|
115 | | -
|
116 | | - ($month,$day,$year) = split ('\/', $date) ;
|
117 | | - my $m = &months_since_2000_01 ($year,$month) ;
|
118 | | - next if $m < $m_start ;
|
119 | | -
|
120 | | - $lines {$language}{$m} = $line ;
|
121 | | - $languages {$language}++ ;
|
122 | | - }
|
123 | | -
|
124 | | - foreach $language (sort keys %languages)
|
125 | | - {
|
126 | | - for ($m = $m_start + 1 ; $m <= $m_last ; $m++)
|
127 | | - {
|
128 | | - if ($lines {$language}{$m} eq '')
|
129 | | - { $lines {$language}{$m} = $lines {$language}{$m -1} ; }
|
130 | | - }
|
131 | | -
|
132 | | - for ($m = $m_start ; $m <= $m_last ; $m++)
|
133 | | - {
|
134 | | - $line = $lines {$language}{$m} ;
|
135 | | - chomp $line ;
|
136 | | - ($language,$date,$counts) = split (',', $line, 3) ;
|
137 | | - @fields = split (',', $counts) ;
|
138 | | -
|
139 | | - if ($project eq "wp")
|
140 | | - {
|
141 | | - foreach $f (1,4,6,11) # new editors, articles, new articles, edits
|
142 | | - {
|
143 | | - $values {"$f,$m"} {"$project,$language"} = $fields [$f] ;
|
144 | | -
|
145 | | - $totals {"$f,$m"} += $fields [$f] ;
|
146 | | -
|
147 | | - $totals_project {"$f,$m"} {$project} += $fields [$f] ;
|
148 | | - $totals_project {"$f,$m"} {$all_projects} += $fields [$f] ;
|
149 | | -
|
150 | | - # print "TOTALS $f $m = . " . $totals {"$f,$m"} . "\n" ;
|
151 | | - }
|
152 | | - }
|
153 | | - else
|
154 | | - {
|
155 | | - foreach $f (1,4)
|
156 | | - {
|
157 | | - if ($f <= 3)
|
158 | | - {
|
159 | | - $values {"$f,$m"} {"$project,$language"} = $fields [$f] ;
|
160 | | - $totals {"$f,$m"} += $fields [$f] ;
|
161 | | - }
|
162 | | -
|
163 | | -
|
164 | | - # ignore editor count on commons for totals, most editors are already counted for other project
|
165 | | - # (even for several projects, to be tuned after centralauth dump is available)
|
166 | | - # count for all_projects only Wikipedia articles
|
167 | | - if (($f <= 3) && ($language ne 'commons')) # 0 = Contributors, 1 = New Wikimedians, 2 = Active Editors (5+ edits), 3 = Very Active Editors (100+ edits),
|
168 | | - { $totals_project {"$f,$m"} {$all_projects} += $fields [$f] ; }
|
169 | | -
|
170 | | - if ($language eq 'commons')
|
171 | | - { $totals_project {"$f,$m"} {'commons'} += $fields [$f] ; }
|
172 | | - else
|
173 | | - { $totals_project {"$f,$m"} {$project} += $fields [$f] ; }
|
174 | | -
|
175 | | - # print "TOTALS $f $m = . " . $totals {"$f,$m"} . "\n" ;
|
176 | | - }
|
177 | | - foreach $f (6,11)
|
178 | | - {
|
179 | | - $totals_project {"$f,$m"} {$all_projects} += $fields [$f] ;
|
180 | | - if ($language eq 'commons')
|
181 | | - { $totals_project {"$f,$m"} {'commons'} += $fields [$f] ; }
|
182 | | - else
|
183 | | - { $totals_project {"$f,$m"} {$project} += $fields [$f] ; }
|
184 | | - # print "TOTALS $f $m = . " . $totals {"$f,$m"} . "\n" ;
|
185 | | - }
|
186 | | -
|
187 | | - }
|
188 | | - }
|
189 | | - }
|
190 | | - close CSV_IN ;
|
191 | | -
|
192 | | - # now read (very) active editors from newer more accurate file (split data for reg users and bots, unlike StatisticsMonthly.csv)
|
193 | | - # but use f = column count in StatisticsMonthly.csv
|
194 | | -
|
195 | | - print "Read '$file_csv_in_2'\n" ;
|
196 | | - open CSV_IN, '<', $file_csv_in_2 ;
|
197 | | -
|
198 | | - undef %lines ;
|
199 | | - while ($line = <CSV_IN>)
|
200 | | - {
|
201 | | - chomp $line ;
|
202 | | - ($language,$date,$reguser_bot,$group,$counts) = split (',', $line, 5) ;
|
203 | | -
|
204 | | - next if $language eq 'commons' and $project ne 'wx' ; # commons also in wikipedia csv files (bug, hard to cleanup, just skip)
|
205 | | - # next if $language eq 'commons' ; # ignore editor count on commons alltogether, most are already counted for other project
|
206 | | - # (even for several projects, to be tuned after centralauth dump is available)
|
207 | | -
|
208 | | - if ($reguser_bot ne "R") { next ; } # R: reg user, B: bot
|
209 | | - if ($group ne "A") { next ; } # A: articles, T: talk pages, O: other namespaces
|
210 | | -
|
211 | | - ($month,$day,$year) = split ('\/', $date) ;
|
212 | | - my $m = &months_since_2000_01 ($year,$month) ;
|
213 | | - next if $m < $m_start ;
|
214 | | -
|
215 | | - $lines {$language}{$m} = $line ;
|
216 | | - $languages {$language}++ ;
|
217 | | - }
|
218 | | -
|
219 | | - foreach $language (sort keys %languages)
|
220 | | - {
|
221 | | - for ($m = $m_start+1 ; $m <= $m_last ; $m++)
|
222 | | - {
|
223 | | - if ($lines {$language}{$m} eq '')
|
224 | | - { $lines {$language}{$m} = $lines {$language}{$m -1} ; }
|
225 | | - }
|
226 | | -
|
227 | | - for ($m = $m_start ; $m <= $m_last ; $m++)
|
228 | | - {
|
229 | | - $line = $lines {$language}{$m} ;
|
230 | | - chomp $line ;
|
231 | | - ($language,$date,$reguser_bot,$group,$counts) = split (',', $line, 5) ;
|
232 | | - @fields = split (',', $counts) ;
|
233 | | -
|
234 | | - foreach $f (2,3) # editors_gt_5, editors_gt_100
|
235 | | - {
|
236 | | - # count user with over x edits
|
237 | | - # threshold starting with a 3 are 10xSQRT(10), 100xSQRT(10), 1000xSQRT(10), etc
|
238 | | - # thresholds = 1,3,5,10,25,32,50,100,etc
|
239 | | - if ($f == 2) { $f2 = 2 ; }
|
240 | | - if ($f == 3) { $f2 = 7 ; }
|
241 | | -
|
242 | | - $values {"$f,$m"} {"$project,$language"} = $fields [$f2] ;
|
243 | | - $totals {"$f,$m"} += $fields [$f2] ;
|
244 | | -
|
245 | | - # ignore editor count on commons for totals, most editors are already counted for other project
|
246 | | - # (even for several projects, to be tuned after centralauth dump is available)
|
247 | | - if (($f <= 3) && ($language ne 'commons')) # 0 = Contributors, 1 = New Wikimedians, 2 = Active Editors (5+ edits), 3 = Very Active Editors (100+ edits),
|
248 | | - { $totals_project {"$f,$m"} {$all_projects} += $fields [$f2] ; }
|
249 | | -
|
250 | | - if ($language eq 'commons')
|
251 | | - { $totals_project {"$f,$m"} {'commons'} += $fields [$f2] ; }
|
252 | | - else
|
253 | | - { $totals_project {"$f,$m"} {$project} += $fields [$f2] ; }
|
254 | | - }
|
255 | | - }
|
256 | | - }
|
257 | | - close CSV_IN ;
|
258 | | -}
|
259 | | -
|
260 | | -sub ReadStatisticsPerBinariesExtensionCommons
|
261 | | -{
|
262 | | - my $file_csv_in = "$path_in/csv_wx/StatisticsPerBinariesExtension.csv" ;
|
263 | | - my $mmax = -1 ;
|
264 | | -
|
265 | | - if (! -e $file_csv_in)
|
266 | | - { &Abort ("Input file '$file_csv_in' not found") ; }
|
267 | | -
|
268 | | - print "Read '$file_csv_in'\n" ;
|
269 | | - open CSV_IN, '<', $file_csv_in ;
|
270 | | - while ($line = <CSV_IN>)
|
271 | | - {
|
272 | | - chomp $line ;
|
273 | | - ($language,$date,$counts) = split (',', $line, 3) ;
|
274 | | -
|
275 | | - if ($language ne "commons") { next ; }
|
276 | | -
|
277 | | - if ($date eq "00/0000")
|
278 | | - {
|
279 | | - @fields = split (',', $counts) ;
|
280 | | - $field_ndx = 0 ;
|
281 | | - foreach $field (@fields)
|
282 | | - {
|
283 | | - $ext_cnt {-1}{$field_ndx} = $field ;
|
284 | | - # print "EXT_CNT $field_ndx : $field\n" ;
|
285 | | - $field_ndx ++ ;
|
286 | | - }
|
287 | | - next ;
|
288 | | - }
|
289 | | -
|
290 | | - ($month,$year) = split ('\/', $date) ;
|
291 | | - my $m = &months_since_2000_01 ($year,$month) ;
|
292 | | - next if $m < $m_start ;
|
293 | | -
|
294 | | - if ($m > $mmax)
|
295 | | - { $mmax = $m ; }
|
296 | | -
|
297 | | - @fields = split (',', $counts) ;
|
298 | | - $field_ndx = 0 ;
|
299 | | - foreach $field (@fields)
|
300 | | - {
|
301 | | - $ext_cnt {$m}{$field_ndx} = $field ;
|
302 | | - $ext_tot {$m} += $field ;
|
303 | | - $field_ndx ++ ;
|
304 | | - }
|
305 | | - }
|
306 | | - close CSV_IN ;
|
307 | | -
|
308 | | - %ext_cnt_mmax = %{$ext_cnt {$mmax}} ;
|
309 | | - @ext_cnt_mmax = (sort {$ext_cnt_mmax {$b} <=> $ext_cnt_mmax {$a}} keys %ext_cnt_mmax) ;
|
310 | | -
|
311 | | - $extcnt = 0 ;
|
312 | | - foreach $extndx (@ext_cnt_mmax)
|
313 | | - {
|
314 | | - # print "$extndx < ${ext_cnt {-1}{$extndx}} > : ${ext_cnt_mmax {$extndx}}\n" ;
|
315 | | - push @extndxs, $extndx ;
|
316 | | - if ($extcnt++ >= 9) { last ; }
|
317 | | - }
|
318 | | -}
|
319 | | -
|
320 | | -sub ReadMediaTrends
|
321 | | -{
|
322 | | -# open FILE_UV, '<', $file_regions_UV ;
|
323 | | -# close FILE-UV ;
|
324 | | -
|
325 | | -# open FILE_REACH, '<', $file_regions_Reach ;
|
326 | | -# close FILE_REACH ;
|
327 | | -}
|
328 | | -
|
329 | | -sub WriteMonthlyData
|
330 | | -{
|
331 | | - print "Write file '$file_csv_out'\n" ;
|
332 | | - open CSV_OUT, '>', $file_csv_out ;
|
333 | | - $output = "" ;
|
334 | | - foreach $f (1,2,3,4,6,11) # new editors, editors_gt_5, editors_gt_100, articles, new articles, edits
|
335 | | - {
|
336 | | -
|
337 | | - $output .= "\n,${out_report_descriptions [$f]} - Absolute - Per Wiki\n" ;
|
338 | | - $output .= "$csv_recent_months,%inc year, %inc month\n" ;
|
339 | | -
|
340 | | - $line = ",Total," ;
|
341 | | - for ($m = $m_start ; $m <= $m_last ; $m++)
|
342 | | - { $line .= $totals {"$f,$m"} . "," ; }
|
343 | | -
|
344 | | - # growth in one year
|
345 | | - if ($totals {"$f,$m_last_12"} != 0)
|
346 | | - { $line .= sprintf ("%.1f", 100 * ($totals {"$f,$m_last"} / $totals {"$f,$m_last_12"}) - 100). "%," ; }
|
347 | | - else
|
348 | | - { $line .= "n.a.," ; }
|
349 | | -
|
350 | | - # growth in one month
|
351 | | - if ($totals {"$f,$m_last_1"} != 0)
|
352 | | - { $line .= sprintf ("%.1f", 100 * ($totals {"$f,$m_last"} / $totals {"$f,$m_last_1"}) - 100). "%," ; }
|
353 | | - else
|
354 | | - { $line .= "n.a.," ; }
|
355 | | -
|
356 | | - $line =~ s/,$// ;
|
357 | | - $output .= "$line\n" ;
|
358 | | -
|
359 | | - # sort by absolute amount for last month
|
360 | | - %values_f_12 = %{$values {"$f,$m_last"}} ;
|
361 | | - $index = 1 ;
|
362 | | - foreach $key (sort {$values_f_12 {$b} <=> $values_f_12 {$a}} keys %values_f_12)
|
363 | | - {
|
364 | | - ($project,$language) = split (",", $key) ;
|
365 | | - $language_name = $out_languages {$language} ;
|
366 | | - if (($project ne "wp") && ($project ne "wx"))
|
367 | | - { $line = "$index,$language_name " . &GetProjectName ($project) . "," ; }
|
368 | | - else
|
369 | | - { $line = "$index,$language_name," ; }
|
370 | | -
|
371 | | - for ($m = $m_start ; $m <= $m_last ; $m++)
|
372 | | - { $line .= $values {"$f,$m"} {$key} . "," ; }
|
373 | | -
|
374 | | - if ($values {"$f,$m_last_12"} {$key} != 0)
|
375 | | - { $line .= sprintf ("%.1f", 100 * ($values {"$f,$m_last"} {$key} / $values {"$f,$m_last_12"} {$key}) - 100). "%," ; }
|
376 | | - else
|
377 | | - { $line .= "n.a.," ; }
|
378 | | -
|
379 | | - if ($values {"$f,$m_last_1"} {$key} != 0)
|
380 | | - { $line .= sprintf ("%.1f", 100 * ($values {"$f,$m_last"} {$key} / $values {"$f,$m_last_1"} {$key}) - 100). "%," ; }
|
381 | | - else
|
382 | | - { $line .= "n.a.," ; }
|
383 | | -
|
384 | | - $line =~ s/,$// ;
|
385 | | - $output .= "$line\n" ;
|
386 | | -
|
387 | | - if ($index++ >= 25) { last ; }
|
388 | | - }
|
389 | | -
|
390 | | - $output .= "\n,${out_report_descriptions [$f]} - Absolute - Per Project\n" ;
|
391 | | - if ($f <= 3) # 0 = Contributors, 1 = New Wikimedians, 2 = Active Editors (5+ edits), 3 = Very Active Editors (100+ edits),
|
392 | | - { $output .= ",Note: All projects does not include Commons\n" ; }
|
393 | | - $output .= "$csv_recent_months,%inc year, %inc month\n" ;
|
394 | | - foreach $project (sort {$totals_project {"$f,$m_last"} {$b} <=> $totals_project {"$f,$m_last"} {$a}} @projects)
|
395 | | - {
|
396 | | -# next if $project eq 'commons' and ($f ==2 or $f == 3) ; # (very) active editors no longer counted for commons
|
397 | | -
|
398 | | - if ($project eq 'commons')
|
399 | | - { $line = ",Commons," ; }
|
400 | | - else
|
401 | | - { $line = "," . &GetProjectName ($project) . "," ; }
|
402 | | -
|
403 | | - for ($m = $m_start ; $m <= $m_last ; $m++)
|
404 | | - { $line .= $totals_project {"$f,$m"} {$project} . "," ; }
|
405 | | -
|
406 | | - if ($totals_project {"$f,$m_last_12"} {$project} != 0)
|
407 | | - { $line .= sprintf ("%.1f", 100 * ($totals_project {"$f,$m_last"} {$project} / $totals_project {"$f,$m_last_12"} {$project}) - 100). "%," ; }
|
408 | | - else
|
409 | | - { $line .= "n.a.," ; }
|
410 | | -
|
411 | | - if ($totals_project {"$f,$m_last_1"} {$project} != 0)
|
412 | | - { $line .= sprintf ("%.1f", 100 * ($totals_project {"$f,$m_last"} {$project} / $totals_project {"$f,$m_last_1"} {$project}) - 100). "%," ; }
|
413 | | - else
|
414 | | - { $line .= "n.a.," ; }
|
415 | | -
|
416 | | - $line =~ s/,$// ;
|
417 | | - $output .= "$line\n" ;
|
418 | | - }
|
419 | | -
|
420 | | - $output .= "\n,${out_report_descriptions [$f]} - Indexed - Per Wiki\n" ;
|
421 | | - $output .= "$csv_recent_months\n" ;
|
422 | | -
|
423 | | - # sort by absolute amount for last month
|
424 | | - $index = 1 ;
|
425 | | - foreach $key (sort {$values_f_12 {$b} <=> $values_f_12 {$a}} keys %values_f_12)
|
426 | | - {
|
427 | | - # print "$index $f: $key -> ${values_f_12 {$key}}\n" ;
|
428 | | -
|
429 | | - ($project,$language) = split (",", $key) ;
|
430 | | - $language_name = $out_languages {$language} ;
|
431 | | - if (($project ne "wp") && ($project ne "wx"))
|
432 | | - { $line = "$index,$language_name " . &GetProjectName ($project) . "," ; }
|
433 | | - else
|
434 | | - { $line = "$index,$language_name," ; }
|
435 | | -
|
436 | | - # $value_100 = $values {"$f,$m_last_12"} {$key} ;
|
437 | | - $value_100 = $values {"$f,$m_start"} {$key} ;
|
438 | | - for ($m = $m_start ; $m <= $m_last ; $m++)
|
439 | | - {
|
440 | | - if ($value_100 != 0)
|
441 | | - { $line .= sprintf ("%.1f", 100 * ($values {"$f,$m"} {$key} / $value_100)) . "," ; }
|
442 | | - else
|
443 | | - { $line .= "," ; }
|
444 | | - }
|
445 | | - $line =~ s/,$// ;
|
446 | | - $output .= "$line\n" ;
|
447 | | -
|
448 | | - # put totals last in chart to show line on top of others
|
449 | | - if ($index == 9)
|
450 | | - {
|
451 | | - $line = ",Total," ;
|
452 | | - $total_100 = $totals {"$f,$m_last_12"} ;
|
453 | | - for ($m = $m_start ; $m <= $m_last ; $m++)
|
454 | | - {
|
455 | | - if ($total_100 != 0)
|
456 | | - { $line .= sprintf ("%.1f", 100 * ($totals {"$f,$m"} / $total_100)) . "," ; }
|
457 | | - else
|
458 | | - { $line .= "," ; }
|
459 | | - }
|
460 | | - $line .= ",(sorted here to make it top-most line out of 10 in Excel)" ;
|
461 | | - $output .= "$line\n" ;
|
462 | | - }
|
463 | | -
|
464 | | - if ($index++ >= 25) { last ; }
|
465 | | - }
|
466 | | -
|
467 | | - $output .= "\n,${out_report_descriptions [$f]} - Indexed - Per Project\n" ;
|
468 | | - $output .= "$csv_recent_months,%inc year, %inc month\n" ;
|
469 | | - foreach $project (sort {$totals_project {"$f,$m_last"} {$b} <=> $totals_project {"$f,$m_last"} {$a}} @projects)
|
470 | | - {
|
471 | | -# next if $project eq 'commons' and ($f ==2 or $f == 3) ; # (very) active editors no longer counted for commons
|
472 | | -
|
473 | | - if ($project eq 'commons')
|
474 | | - { $line = ",Commons," ; }
|
475 | | - else
|
476 | | - { $line = "," . &GetProjectName ($project) . "," ; }
|
477 | | -
|
478 | | - # $value_100 = $totals_project {"$f,$m_last_12"} {$project} ;
|
479 | | - $value_100 = $totals_project {"$f,$m_start"} {$project} ;
|
480 | | - for ($m = $m_start ; $m <= $m_last ; $m++)
|
481 | | - {
|
482 | | - if ($value_100 != 0)
|
483 | | - { $line .= sprintf ("%.1f", 100 * ($totals_project {"$f,$m"} {$project} / $value_100)) . "," ; }
|
484 | | - else
|
485 | | - { $line .= "," ; }
|
486 | | - }
|
487 | | - $line =~ s/,$// ;
|
488 | | - $output .= "$line\n" ;
|
489 | | - }
|
490 | | - $output .= "\n," . '=' x 150 . "\n" ;
|
491 | | - }
|
492 | | -
|
493 | | - print CSV_OUT $output ;
|
494 | | -
|
495 | | - $output = "\n,Binaries per month - Absolute\n" ;
|
496 | | - $output .= "$csv_recent_months,%inc year, %inc month\n" ;
|
497 | | - $output .= "\n$csv_recent_months,%inc year,%inc month\n" ;
|
498 | | -
|
499 | | - $line = ",Total," ;
|
500 | | - for ($m = $m_start ; $m <= $m_last ; $m++)
|
501 | | - { $line .= $ext_tot {$m} . "," ; }
|
502 | | -
|
503 | | - if ($ext_tot {$m_last_12} != 0)
|
504 | | - { $line .= sprintf ("%.1f", 100 * ($ext_tot {$m_last} / $ext_tot {$m_last_12}) - 100). "%," ; }
|
505 | | - else
|
506 | | - { $line .= "n.a.," ; }
|
507 | | -
|
508 | | - if ($ext_tot {$m_last_1} != 0)
|
509 | | - { $line .= sprintf ("%.1f", 100 * ($ext_tot {$m_last} / $ext_tot {$m_last_1}) - 100). "%," ; }
|
510 | | - else
|
511 | | - { $line .= "n.a.," ; }
|
512 | | -
|
513 | | - $line =~ s/,$// ;
|
514 | | - $output .= "$line\n" ;
|
515 | | -
|
516 | | - $index = 0 ;
|
517 | | - # feed the 10 extensions with most pages, largest one last (comes on top in Excel chart)
|
518 | | - for ($e = $#extndxs - 9 ; $e <= $#extndxs ; $e++)
|
519 | | - {
|
520 | | - $index++ ;
|
521 | | -
|
522 | | - if ($e < 0)
|
523 | | - {
|
524 | | - $line = "$index,xxx," ;
|
525 | | - for ($m = $m_start ; $m <= $m_last ; $m++)
|
526 | | - { $line .= "," ; }
|
527 | | - }
|
528 | | - else
|
529 | | - {
|
530 | | - $extndx = $extndxs [$e] ;
|
531 | | - $line = "$index,${ext_cnt {-1}{$extndx}}," ;
|
532 | | -
|
533 | | - for ($m = $m_start ; $m <= $m_last ; $m++)
|
534 | | - { $line .= $ext_cnt {$m}{$extndx} . "," ; }
|
535 | | -
|
536 | | - if ($ext_cnt {$m_last_12}{$extndx} != 0)
|
537 | | - { $line .= sprintf ("%.1f", 100 * ($ext_cnt {$m_last}{$extndx} / $ext_cnt {$m_last_12}{$extndx}) - 100). "%," ; }
|
538 | | - else
|
539 | | - { $line .= "n.a.," ; }
|
540 | | -
|
541 | | - if ($ext_cnt {$m_last_1}{$extndx} != 0)
|
542 | | - { $line .= sprintf ("%.1f", 100 * ($ext_cnt {$m_last}{$extndx} / $ext_cnt {$m_last_1}{$extndx}) - 100). "%," ; }
|
543 | | - else
|
544 | | - { $line .= "n.a.," ; }
|
545 | | - }
|
546 | | -
|
547 | | - $line =~ s/,$// ;
|
548 | | - $output .= "$line\n" ;
|
549 | | - }
|
550 | | -
|
551 | | - print CSV_OUT $output ;
|
552 | | -
|
553 | | - $output = "\n,Binaries per month - Indexed\n" ;
|
554 | | - $output .= "$csv_recent_months\n" ;
|
555 | | -
|
556 | | - $index = 0 ;
|
557 | | - # feed the 10 extensions with most pages, largest one last (comes on top in Excel chart)
|
558 | | - for ($e = $#extndxs - 9 ; $e <= $#extndxs ; $e++)
|
559 | | - {
|
560 | | - $index++ ;
|
561 | | -
|
562 | | - if ($e < 0)
|
563 | | - {
|
564 | | - $line = "$index,xxx," ;
|
565 | | - for ($m = $m_start ; $m <= $m_last ; $m++)
|
566 | | - { $line .= "," ; }
|
567 | | - }
|
568 | | - else
|
569 | | - {
|
570 | | - $extndx = $extndxs [$e] ;
|
571 | | - $line = "$index,${ext_cnt {-1}{$extndx}}," ;
|
572 | | - $ext_cnt_m0 = $ext_cnt {$m_last-12}{$extndx} ;
|
573 | | - # $ext_cnt_m0 = $ext_cnt {$m_start}{$extndx} ;
|
574 | | - for ($m = $m_start ; $m <= $m_last ; $m++)
|
575 | | - {
|
576 | | - if ($ext_cnt_m0 > 0)
|
577 | | - { $line .= sprintf ("%.1f", 100 * ($ext_cnt {$m}{$extndx} / $ext_cnt_m0)). "," ; }
|
578 | | - else
|
579 | | - { $line .= "," ; }
|
580 | | - }
|
581 | | - }
|
582 | | -
|
583 | | - $line =~ s/,$// ;
|
584 | | - $output .= "$line\n" ;
|
585 | | - }
|
586 | | - print CSV_OUT $output ;
|
587 | | - close CSV_OUT ;
|
588 | | -
|
589 | | - print "\nOutput written to $file_csv_out\n\n" ;
|
590 | | -}
|
591 | | -
|
592 | | -sub SetComparisonPeriods
|
593 | | -{
|
594 | | - my @months = qw(Xxx Jan Feb Mar Apr May Jun Jul Aug Sept Oct Nov Dec) ;
|
595 | | -
|
596 | | - my ($file_year_month_last, $year_month_last, $year_month_last_minus_12, $year_month_last_minus_1) ;
|
597 | | -
|
598 | | - $year_month_last = sprintf ("%04d/%02d",$year_last, $month_last) ; # for filenames
|
599 | | - $file_year_month_last = sprintf ("%04d_%02d",$year_last, $month_last) ; # for filenames
|
600 | | - $year_month_last_minus_12 = sprintf ("%04d/%02d",$year_last - 1,$month_last) ;
|
601 | | - $year_month_last_minus_1 = $month_last > 1 ? sprintf ("%04d/%02d",$year_last,$month_last-1): sprintf ("%04d/%02d",$year_last - 1 ,12) ;
|
602 | | -
|
603 | | - print "\nWrite trend data up till $year_month_last\n\n" ;
|
604 | | - print "Compare with previous month: $year_month_last_minus_1, previous year: $year_month_last_minus_12\n\n" ;
|
605 | | -
|
606 | | - $csv_recent_months = ",project," ;
|
607 | | - $year = $year_start ;
|
608 | | - $month = $month_start ;
|
609 | | - for ($m = $m_start ; $m <= $m_last ; $m++)
|
610 | | - {
|
611 | | - $recent_months [$m] = sprintf ("%04d/%02d", $year, $month) ;
|
612 | | - $csv_recent_months .= sprintf ("%02d/%04d", $month, $year) . "," ;
|
613 | | - ($year,$month) = $month < 12 ? ($year,$month+1) : ($year+1,1) ;
|
614 | | - }
|
615 | | - $csv_recent_months =~ s/,$// ;
|
616 | | -}
|
617 | | -
|
618 | | -#sub WriteCsvFilesPerPeriod
|
619 | | -#{
|
620 | | -# foreach $period (sort keys %totals)
|
621 | | -# {
|
622 | | -# &LogT ("\nWrite totals per $period: ") ;
|
623 | | -# $desc = $descriptions {$period} ;
|
624 | | -
|
625 | | -# foreach $project (sort keys %{$totals {$period}})
|
626 | | -# {
|
627 | | -# &Log ("$project ") ;
|
628 | | -
|
629 | | -# $dir_out = "$path_out/csv_$project" ;
|
630 | | -# if (! -d $dir_out)
|
631 | | -# { mkdir $dir_out, 0777 ; }
|
632 | | -
|
633 | | -# $file_out = "$dir_out/$desc.csv" ;
|
634 | | -
|
635 | | -# open CSV, ">", $file_out ;
|
636 | | -# foreach $key (sort {$a cmp $b} keys %{$totals {$period}{$project}})
|
637 | | -# {
|
638 | | -# ($language,$yearmonth) = split (",", $key) ;
|
639 | | -# # print "PERIOD $period PROJECT $project KEY $key\n" ;
|
640 | | -# if ($period eq "month")
|
641 | | -# { print CSV "$language," . $date_high {"$yearmonth"} . "," . $totals{$period}{$project}{$key} . "\n" ; }
|
642 | | -# else
|
643 | | -# { print CSV "$key," . $totals{$period}{$project}{$key} . "\n" ; }
|
644 | | -# }
|
645 | | -# close CSV ;
|
646 | | -# }
|
647 | | -# }
|
648 | | -#}
|
649 | | -
|
650 | | -#sub WriteCsvHtmlFilesPopularWikis
|
651 | | -#{
|
652 | | -# @totals_lastmonth = sort {$totals_lastmonth {$b} <=> $totals_lastmonth {$a}} keys %totals_lastmonth ;
|
653 | | -
|
654 | | -# $dir_out = "$path_out/csv_wp" ;
|
655 | | -# $file_out = "$dir_out/PageViewsPerMonthPopularWikis_$file_year_month_last.csv" ;
|
656 | | -
|
657 | | -## extend with normalized counts
|
658 | | -## see manually created PageViewsPerMonthTop25PlusNormalizedTo100.csv
|
659 | | -
|
660 | | -# open CSV, ">", $file_out ;
|
661 | | -# print CSV $csv_recent_months ;
|
662 | | -
|
663 | | -# # write per popular language+wiki 13 months of page view totals
|
664 | | -# $lines = 0 ;
|
665 | | -# foreach $line (@totals_lastmonth)
|
666 | | -# {
|
667 | | -# if (++$lines > $maxpopularwikis) { last ; }
|
668 | | -
|
669 | | -# ($project, $language) = split (',', $line) ;
|
670 | | -# $largest_projects {"$project-$language"} ++ ;
|
671 | | -
|
672 | | -# $language_name = $out_languages {$language} ;
|
673 | | -
|
674 | | -# if (($project ne "wp") && ($project ne "wx"))
|
675 | | -# { print CSV "$language_name " . &GetProjectName ($project) . "," ; }
|
676 | | -# else
|
677 | | -# { print CSV "$language_name," ; }
|
678 | | -
|
679 | | -## %test = %{$totals {"month"} {"wp"} };
|
680 | | -## %test2 = @recent_months ;
|
681 | | -# for ($m = 0 ; $m <= 12 ; $m++)
|
682 | | -# { print CSV $totals {"month"} {$project} {"$language,${recent_months [$m]}"} . "," ; }
|
683 | | -# print CSV "\n" ;
|
684 | | -# }
|
685 | | -
|
686 | | -# print CSV "\n$csv_recent_months" ;
|
687 | | -
|
688 | | -# # write per popular language+wiki 13 months of page view totals, normalized to first month = 100
|
689 | | -# $lines = 0 ;
|
690 | | -# foreach $line (@totals_lastmonth)
|
691 | | -# {
|
692 | | -# if (++$lines > $maxpopularwikis) { last ; }
|
693 | | -
|
694 | | -# ($project, $language) = split (',', $line) ;
|
695 | | -# $language_name = $out_languages {$language} ;
|
696 | | -
|
697 | | -# if (($project ne "wp") && ($project ne "wx"))
|
698 | | -# { print CSV "$language_name " . &GetProjectName ($project) . "," ; }
|
699 | | -# else
|
700 | | -# { print CSV "$language_name," ; }
|
701 | | -
|
702 | | -# $recent_month_0 = $totals {"month"} {$project} {"$language,${recent_months [ 0]}"} ;
|
703 | | -# for ($m = 0 ; $m <= 12 ; $m++)
|
704 | | -# {
|
705 | | -# if ($recent_month_0 > 0)
|
706 | | -# { print CSV sprintf ("%.2f", 100 * $totals {"month"} {$project} {"$language,${recent_months [$m]}"} / $recent_month_0) . "," ; }
|
707 | | -# else
|
708 | | -# { print CSV "," ; }
|
709 | | -# }
|
710 | | -
|
711 | | -# print CSV "\n" ;
|
712 | | -# }
|
713 | | -# close CSV ;
|
714 | | -
|
715 | | -# # write ready made table rows for report card: page views top 25 movers shakers
|
716 | | -# foreach $key (keys %largest_projects)
|
717 | | -# {
|
718 | | -# ($project,$language) = split ('-', $key) ;
|
719 | | -
|
720 | | -# $total_lastmonth = $totals {"month"} {$project} {"$language,$month_last"} ;
|
721 | | -# $total_prevmonth = $totals {"month"} {$project} {"$language,$year_month_last_minus_1"} ;
|
722 | | -# $total_prevyear = $totals {"month"} {$project} {"$language,$year_month_last_minus_12"} ;
|
723 | | -
|
724 | | -# $perc_month = "no data" ;
|
725 | | -# $perc_year = "no data" ;
|
726 | | -
|
727 | | -# if ($total_prevyear > 0)
|
728 | | -# { $perc_year = sprintf ("%.1f", 100 * $total_lastmonth/$total_prevyear - 100) ; }
|
729 | | -# if ($total_prevyear > 0)
|
730 | | -# { $perc_month = sprintf ("%.1f", 100 * $total_lastmonth/$total_prevmonth - 100) ; }
|
731 | | -
|
732 | | -# $line = "$project-$language: $total_prevyear=>$total_lastmonth=$perc_year%, $total_prevmonth=>$total_lastmonth=$perc_month%" ;
|
733 | | -
|
734 | | -# $total_lastmonth = sprintf ("%.0f", $total_lastmonth / 1000000) ;
|
735 | | -
|
736 | | -# $project_name = &GetProjectName ($project) ;
|
737 | | -# $language_name = $out_languages {$language} ;
|
738 | | -
|
739 | | -# $col1 = "<td class=detail-left>$language_name $project_name</td>\n" ;
|
740 | | -# $col2 = "<td class=detail-blue>$total_lastmonth</td>\n" ;
|
741 | | -# $col3 = "<td class=detail-blue>$perc_month%</td>\n" ;
|
742 | | -# $col4 = "<td class=detail-blue>$perc_year%</td>\n" ;
|
743 | | -# $html = "<tr>\n$col1$col2$col3$col4</tr>\n" ;
|
744 | | -
|
745 | | -# $growth_figures_text {"$perc_month-$project-$language"} = $line ;
|
746 | | -# $growth_figures_html {"$perc_month-$project-$language"} = $html ;
|
747 | | -# }
|
748 | | -
|
749 | | -# $file_html = "$dir_out/PageViewsMoversShakersPopularWikis_$file_year_month_last.html" ;
|
750 | | -
|
751 | | -# open HTML, ">", $file_html ;
|
752 | | -# foreach $key (sort {$b <=> $a} keys %growth_figures_text)
|
753 | | -# {
|
754 | | -# print "$key: ". $growth_figures_text {$key} . "\n" ;
|
755 | | -# print HTML $growth_figures_html {$key} ;
|
756 | | -# }
|
757 | | -# close HTML ;
|
758 | | -#}
|
759 | | -
|
760 | | -
|
761 | | -sub GetProjectName
|
762 | | -{
|
763 | | - my $project =shift ;
|
764 | | -
|
765 | | - if ($project eq "wp") { $project_name = "Wikipedia"; }
|
766 | | - elsif ($project eq "wb") { $project_name = "Wikibooks"; }
|
767 | | - elsif ($project eq "wk") { $project_name = "Wiktionary"; }
|
768 | | - elsif ($project eq "wx") { $project_name = "Other Wikis"; }
|
769 | | - elsif ($project eq "wn") { $project_name = "Wikinews"; }
|
770 | | - elsif ($project eq "wq") { $project_name = "Wikiquote"; }
|
771 | | - elsif ($project eq "ws") { $project_name = "Wikisource"; }
|
772 | | - elsif ($project eq "wv") { $project_name = "Wikiversity"; }
|
773 | | - elsif ($project eq "*") { $project_name = "All projects"; }
|
774 | | -
|
775 | | - return ($project_name) ;
|
776 | | -}
|
777 | | -
|
778 | | -sub MonthsSinceYearAgo
|
779 | | -{
|
780 | | - my $year = shift ;
|
781 | | - my $month = shift ;
|
782 | | - return 12 - (($year_last - $year) * 12 + $month_last - $month) ;
|
783 | | -}
|
784 | | -
|
785 | | -sub MonthsSinceFirstMonthToShow
|
786 | | -{
|
787 | | - my $year = shift ;
|
788 | | - my $month = shift ;
|
789 | | - return ($year - 2008) * 12 + ($month - 1) ;
|
790 | | -}
|
791 | | -
|
792 | | -# code year,month as monthes since january 2000 (1 byte)
|
793 | | -sub months_since_2000_01
|
794 | | -{
|
795 | | - my $year = shift ;
|
796 | | - my $month = shift ;
|
797 | | - my $m = ($year - 2000) * 12 + $month ;
|
798 | | - return $m ;
|
799 | | -}
|
800 | | -
|
801 | | -#sub Log
|
802 | | -#{
|
803 | | -# $msg = shift ;
|
804 | | -# print $msg ;
|
805 | | -# print LOG $msg ;
|
806 | | -#}
|
807 | | -
|
808 | | -#sub LogT
|
809 | | -#{
|
810 | | -# $msg = shift ;
|
811 | | -# my ($ss,$mm,$hh) = (localtime (time))[0,1,2] ;
|
812 | | -# my $time = sprintf ("%02d:%02d:%02d ", $hh, $mm, $ss) ;
|
813 | | -# $msg =~ s/^(\n*)/$1$time/s ;
|
814 | | -# &Log ($msg) ;
|
815 | | -#}
|
816 | | -
|
817 | | -sub MmSs
|
818 | | -{
|
819 | | - my ($ss,$mm,$hh) = (localtime (time))[0,1,2] ;
|
820 | | - return (sprintf ("%02d:%02d:%02d ", $hh, $mm, $ss)) ;
|
821 | | -}
|
822 | | -
|
823 | | -sub Abort
|
824 | | -{
|
825 | | - my $msg = shift ;
|
826 | | - print "$msg\nExecution aborted." ;
|
827 | | - # to do: log also to file
|
828 | | - exit ;
|
829 | | -}
|
830 | | -
|
831 | | -sub InitProjectNames
|
832 | | -{
|
833 | | - # copied from WikiReports.pl
|
834 | | -
|
835 | | - %wikipedias = (
|
836 | | -# mediawiki=>"http://wikimediafoundation.org Wikimedia",
|
837 | | - nostalgia=>"http://nostalgia.wikipedia.org Nostalgia",
|
838 | | - sources=>"http://wikisource.org Old Wikisource",
|
839 | | - meta=>"http://meta.wikimedia.org Meta-Wiki",
|
840 | | - beta=>"http://beta.wikiversity.org Beta",
|
841 | | - species=>"http://species.wikipedia.org WikiSpecies",
|
842 | | - commons=>"http://commons.wikimedia.org Commons",
|
843 | | - foundation=>"http://wikimediafoundation.org Wikimedia Foundation",
|
844 | | - sep11=>"http://sep11.wikipedia.org In Memoriam",
|
845 | | - nlwikimedia=>"http://nl.wikimedia.org Wikimedia Nederland",
|
846 | | - plwikimedia=>"http://pl.wikimedia.org Wikimedia Polska",
|
847 | | - mediawiki=>"http://www.mediawiki.org MediaWiki",
|
848 | | - dewikiversity=>"http://de.wikiversity.org Wikiversität",
|
849 | | - frwikiversity=>"http://fr.wikiversity.org Wikiversität",
|
850 | | - wikimania2005=>"http://wikimania2005.wikimedia.org Wikimania 2005",
|
851 | | - wikimania2006=>"http://wikimania2006.wikimedia.org Wikimania 2006",
|
852 | | - aa=>"http://aa.wikipedia.org Afar",
|
853 | | - ab=>"http://ab.wikipedia.org Abkhazian",
|
854 | | - af=>"http://af.wikipedia.org Afrikaans",
|
855 | | - ak=>"http://ak.wikipedia.org Akan", # was Akana
|
856 | | - als=>"http://als.wikipedia.org Alemannic", # was Elsatian
|
857 | | - am=>"http://am.wikipedia.org Amharic",
|
858 | | - an=>"http://an.wikipedia.org Aragonese",
|
859 | | - ang=>"http://ang.wikipedia.org Anglo-Saxon",
|
860 | | - ar=>"http://ar.wikipedia.org Arabic",
|
861 | | - arc=>"http://arc.wikipedia.org Aramaic",
|
862 | | - as=>"http://as.wikipedia.org Assamese",
|
863 | | - ast=>"http://ast.wikipedia.org Asturian",
|
864 | | - av=>"http://av.wikipedia.org Avar", # was Avienan
|
865 | | - ay=>"http://ay.wikipedia.org Aymara",
|
866 | | - az=>"http://az.wikipedia.org Azeri", # was Azerbaijani
|
867 | | - ba=>"http://ba.wikipedia.org Bashkir",
|
868 | | - bar=>"http://bar.wikipedia.org Bavarian",
|
869 | | - bat_smg=>"http://bat-smg.wikipedia.org Samogitian",
|
870 | | - "bat-smg"=>"http://bat-smg.wikipedia.org Samogitian",
|
871 | | - bcl=>"http://bcl.wikipedia.org Central Bicolano",
|
872 | | - be=>"http://be.wikipedia.org Belarusian",
|
873 | | - "be-x-old"=>"http://be.wikipedia.org Belarusian (Tarashkevitsa)",
|
874 | | - be_x_old=>"http://be.wikipedia.org Belarusian (Tarashkevitsa)",
|
875 | | - bg=>"http://bg.wikipedia.org Bulgarian",
|
876 | | - bh=>"http://bh.wikipedia.org Bihari",
|
877 | | - bi=>"http://bi.wikipedia.org Bislama",
|
878 | | - bm=>"http://bm.wikipedia.org Bambara",
|
879 | | - bn=>"http://bn.wikipedia.org Bengali",
|
880 | | - bo=>"http://bo.wikipedia.org Tibetan",
|
881 | | - bpy=>"http://bpy.wikipedia.org Bishnupriya Manipuri",
|
882 | | - br=>"http://br.wikipedia.org Breton",
|
883 | | - bs=>"http://bs.wikipedia.org Bosnian",
|
884 | | - bug=>"http://bug.wikipedia.org Buginese",
|
885 | | - bxr=>"http://bxr.wikipedia.org Buryat",
|
886 | | - ca=>"http://ca.wikipedia.org Catalan",
|
887 | | - cbk_zam=>"http://cbk-zam.wikipedia.org Chavacano",
|
888 | | - "cbk-zam"=>"http://cbk-zam.wikipedia.org Chavacano",
|
889 | | - cdo=>"http://cdo.wikipedia.org Min Dong",
|
890 | | - ce=>"http://ce.wikipedia.org Chechen",
|
891 | | - ceb=>"http://ceb.wikipedia.org Cebuano",
|
892 | | - ch=>"http://ch.wikipedia.org Chamorro", # was Chamoru
|
893 | | - cho=>"http://cho.wikipedia.org Choctaw", # was Chotaw
|
894 | | - chr=>"http://chr.wikipedia.org Cherokee",
|
895 | | - chy=>"http://chy.wikipedia.org Cheyenne", # was Setsêhestâhese
|
896 | | - co=>"http://co.wikipedia.org Corsican",
|
897 | | - cr=>"http://cr.wikipedia.org Cree",
|
898 | | - crh=>"http://crh.wikipedia.org Crimean Tatar",
|
899 | | - cs=>"http://cs.wikipedia.org Czech",
|
900 | | - csb=>"http://csb.wikipedia.org Cashubian", # was Kashubian
|
901 | | - cu=>"http://cv.wikipedia.org Old Church Slavonic",
|
902 | | - cv=>"http://cv.wikipedia.org Chuvash", # was Cavas
|
903 | | - cy=>"http://cy.wikipedia.org Welsh",
|
904 | | - da=>"http://da.wikipedia.org Danish",
|
905 | | - de=>"http://de.wikipedia.org German",
|
906 | | - diq=>"http://diq.wikipedia.org Zazaki",
|
907 | | - dk=>"http://dk.wikipedia.org Danish",
|
908 | | - dsb=>"http://dsb.wikipedia.org Lower Sorbian",
|
909 | | - dv=>"http://dv.wikipedia.org Divehi",
|
910 | | - dz=>"http://dz.wikipedia.org Dzongkha",
|
911 | | - ee=>"http://ee.wikipedia.org Ewe",
|
912 | | - el=>"http://el.wikipedia.org Greek",
|
913 | | - eml=>"http://eml.wikipedia.org Emilian-Romagnol",
|
914 | | - en=>"http://en.wikipedia.org English",
|
915 | | - eo=>"http://eo.wikipedia.org Esperanto",
|
916 | | - es=>"http://es.wikipedia.org Spanish",
|
917 | | - et=>"http://et.wikipedia.org Estonian",
|
918 | | - eu=>"http://eu.wikipedia.org Basque",
|
919 | | - ext=>"http://ext.wikipedia.org Extremaduran",
|
920 | | - fa=>"http://fa.wikipedia.org Persian",
|
921 | | - ff=>"http://ff.wikipedia.org Fulfulde",
|
922 | | - fi=>"http://fi.wikipedia.org Finnish",
|
923 | | - "fiu-vro"=>"http://fiu-vro.wikipedia.org Voro",
|
924 | | - fiu_vro=>"http://fiu-vro.wikipedia.org Voro",
|
925 | | - fj=>"http://fj.wikipedia.org Fijian",
|
926 | | - fo=>"http://fo.wikipedia.org Faroese", # was Faeroese
|
927 | | - fr=>"http://fr.wikipedia.org French",
|
928 | | - frp=>"http://frp.wikipedia.org Arpitan",
|
929 | | - fur=>"http://fur.wikipedia.org Friulian",
|
930 | | - fy=>"http://fy.wikipedia.org Frisian",
|
931 | | - ga=>"http://ga.wikipedia.org Irish",
|
932 | | - gan=>"http://gan.wikipedia.org Gan",
|
933 | | - gay=>"http://gay.wikipedia.org Gayo",
|
934 | | - gd=>"http://gd.wikipedia.org Scots Gaelic", # was Scottish Gaelic
|
935 | | - gl=>"http://gl.wikipedia.org Galician", # was Galego
|
936 | | - glk=>"http://glk.wikipedia.org Gilaki",
|
937 | | - gn=>"http://gn.wikipedia.org Guarani",
|
938 | | - got=>"http://got.wikipedia.org Gothic",
|
939 | | - gu=>"http://gu.wikipedia.org Gujarati",
|
940 | | - gv=>"http://gv.wikipedia.org Manx", # was Manx Gaelic
|
941 | | - ha=>"http://ha.wikipedia.org Hausa",
|
942 | | - hak=>"http://hak.wikipedia.org Hakka",
|
943 | | - haw=>"http://haw.wikipedia.org Hawai'ian", # was Hawaiian
|
944 | | - he=>"http://he.wikipedia.org Hebrew",
|
945 | | - hi=>"http://hi.wikipedia.org Hindi",
|
946 | | - hif=>"http://hif.wikipedia.org Fiji Hindi",
|
947 | | - ho=>"http://ho.wikipedia.org Hiri Motu",
|
948 | | - hr=>"http://hr.wikipedia.org Croatian",
|
949 | | - hsb=>"http://hsb.wikipedia.org Upper Sorbian",
|
950 | | - ht=>"http://ht.wikipedia.org Haitian",
|
951 | | - hu=>"http://hu.wikipedia.org Hungarian",
|
952 | | - hy=>"http://hy.wikipedia.org Armenian",
|
953 | | - hz=>"http://hz.wikipedia.org Herero",
|
954 | | - ia=>"http://ia.wikipedia.org Interlingua",
|
955 | | - iba=>"http://iba.wikipedia.org Iban",
|
956 | | - id=>"http://id.wikipedia.org Indonesian",
|
957 | | - ie=>"http://ie.wikipedia.org Interlingue",
|
958 | | - ig=>"http://ig.wikipedia.org Igbo",
|
959 | | - ii=>"http://ii.wikipedia.org Yi",
|
960 | | - ik=>"http://ik.wikipedia.org Inupiak",
|
961 | | - ilo=>"http://ilo.wikipedia.org Ilokano",
|
962 | | - io=>"http://io.wikipedia.org Ido",
|
963 | | - is=>"http://is.wikipedia.org Icelandic",
|
964 | | - it=>"http://it.wikipedia.org Italian",
|
965 | | - iu=>"http://iu.wikipedia.org Inuktitut",
|
966 | | - ja=>"http://ja.wikipedia.org Japanese",
|
967 | | - jbo=>"http://jbo.wikipedia.org Lojban",
|
968 | | - jv=>"http://jv.wikipedia.org Javanese",
|
969 | | - ka=>"http://ka.wikipedia.org Georgian",
|
970 | | - kaa=>"http://kaa.wikipedia.org Karakalpak",
|
971 | | - kab=>"http://ka.wikipedia.org Kabyle",
|
972 | | - kaw=>"http://kaw.wikipedia.org Kawi",
|
973 | | - kg=>"http://kg.wikipedia.org Kongo",
|
974 | | - ki=>"http://ki.wikipedia.org Kikuyu",
|
975 | | - kj=>"http://kj.wikipedia.org Kuanyama", # was Otjiwambo
|
976 | | - kk=>"http://kk.wikipedia.org Kazakh",
|
977 | | - kl=>"http://kl.wikipedia.org Greenlandic",
|
978 | | - km=>"http://km.wikipedia.org Khmer", # was Cambodian
|
979 | | - kn=>"http://kn.wikipedia.org Kannada",
|
980 | | - ko=>"http://ko.wikipedia.org Korean",
|
981 | | - kr=>"http://kr.wikipedia.org Kanuri",
|
982 | | - ks=>"http://ks.wikipedia.org Kashmiri",
|
983 | | - ksh=>"http://ksh.wikipedia.org Ripuarian",
|
984 | | - ku=>"http://ku.wikipedia.org Kurdish",
|
985 | | - kv=>"http://kv.wikipedia.org Komi",
|
986 | | - kw=>"http://kw.wikipedia.org Cornish", # was Kornish
|
987 | | - ky=>"http://ky.wikipedia.org Kirghiz",
|
988 | | - la=>"http://la.wikipedia.org Latin",
|
989 | | - lad=>"http://lad.wikipedia.org Ladino",
|
990 | | - lb=>"http://lb.wikipedia.org Luxembourgish", # was Letzeburgesch
|
991 | | - lbe=>"http://lbe.wikipedia.org Lak",
|
992 | | - lg=>"http://lg.wikipedia.org Ganda",
|
993 | | - li=>"http://li.wikipedia.org Limburgish",
|
994 | | - lij=>"http://lij.wikipedia.org Ligurian",
|
995 | | - lmo=>"http://lmo.wikipedia.org Lombard",
|
996 | | - ln=>"http://ln.wikipedia.org Lingala",
|
997 | | - lo=>"http://lo.wikipedia.org Laotian",
|
998 | | - ls=>"http://ls.wikipedia.org Latino Sine Flexione",
|
999 | | - lt=>"http://lt.wikipedia.org Lithuanian",
|
1000 | | - lv=>"http://lv.wikipedia.org Latvian",
|
1001 | | - mad=>"http://mad.wikipedia.org Madurese",
|
1002 | | - mak=>"http://mak.wikipedia.org Makasar",
|
1003 | | - map_bms=>"http://map-bms.wikipedia.org Banyumasan",
|
1004 | | - "map-bms"=>"http://map-bms.wikipedia.org Banyumasan",
|
1005 | | - mdf=>"http://mdf.wikipedia.org Moksha",
|
1006 | | - mg=>"http://mg.wikipedia.org Malagasy",
|
1007 | | - mh=>"http://mh.wikipedia.org Marshallese",
|
1008 | | - mi=>"http://mi.wikipedia.org Maori",
|
1009 | | - min=>"http://min.wikipedia.org Minangkabau",
|
1010 | | - minnan=>"http://minnan.wikipedia.org Minnan",
|
1011 | | - mk=>"http://mk.wikipedia.org Macedonian",
|
1012 | | - ml=>"http://ml.wikipedia.org Malayalam",
|
1013 | | - mn=>"http://mn.wikipedia.org Mongolian",
|
1014 | | - mo=>"http://mo.wikipedia.org Moldavian",
|
1015 | | - mr=>"http://mr.wikipedia.org Marathi",
|
1016 | | - ms=>"http://ms.wikipedia.org Malay",
|
1017 | | - mt=>"http://mt.wikipedia.org Maltese",
|
1018 | | - mus=>"http://mus.wikipedia.org Muskogee",
|
1019 | | - my=>"http://my.wikipedia.org Burmese",
|
1020 | | - myv=>"http://myv.wikipedia.org Erzya",
|
1021 | | - mzn=>"http://mzn.wikipedia.org Mazandarani",
|
1022 | | - na=>"http://na.wikipedia.org Nauruan", # was Nauru
|
1023 | | - nah=>"http://nah.wikipedia.org Nahuatl",
|
1024 | | - nap=>"http://nap.wikipedia.org Neapolitan",
|
1025 | | - nds=>"http://nds.wikipedia.org Low Saxon",
|
1026 | | - nds_nl=>"http://nds-nl.wikipedia.org Dutch Low Saxon",
|
1027 | | - "nds-nl"=>"http://nds-nl.wikipedia.org Dutch Low Saxon",
|
1028 | | - ne=>"http://ne.wikipedia.org Nepali",
|
1029 | | - new=>"http://new.wikipedia.org Nepal Bhasa",
|
1030 | | - ng=>"http://ng.wikipedia.org Ndonga",
|
1031 | | - nl=>"http://nl.wikipedia.org Dutch",
|
1032 | | - nov=>"http://nov.wikipedia.org Novial",
|
1033 | | - nrm=>"http://nrm.wikipedia.org Norman",
|
1034 | | - nn=>"http://nn.wikipedia.org Nynorsk", # was Neo-Norwegian
|
1035 | | - no=>"http://no.wikipedia.org Norwegian",
|
1036 | | - nv=>"http://nv.wikipedia.org Navajo", # was Avayo
|
1037 | | - ny=>"http://ny.wikipedia.org Chichewa",
|
1038 | | - oc=>"http://oc.wikipedia.org Occitan",
|
1039 | | - om=>"http://om.wikipedia.org Oromo",
|
1040 | | - or=>"http://or.wikipedia.org Oriya",
|
1041 | | - os=>"http://os.wikipedia.org Ossetic",
|
1042 | | - pa=>"http://pa.wikipedia.org Punjabi",
|
1043 | | - pag=>"http://pag.wikipedia.org Pangasinan",
|
1044 | | - pam=>"http://pam.wikipedia.org Kapampangan",
|
1045 | | - pap=>"http://pap.wikipedia.org Papiamentu",
|
1046 | | - pdc=>"http://pdc.wikipedia.org Pennsylvania German",
|
1047 | | - pi=>"http://pi.wikipedia.org Pali",
|
1048 | | - pih=>"http://pih.wikipedia.org Norfolk",
|
1049 | | - pl=>"http://pl.wikipedia.org Polish",
|
1050 | | - pms=>"http://pms.wikipedia.org Piedmontese",
|
1051 | | - ps=>"http://ps.wikipedia.org Pashto",
|
1052 | | - pt=>"http://pt.wikipedia.org Portuguese",
|
1053 | | - qu=>"http://qu.wikipedia.org Quechua",
|
1054 | | - rm=>"http://rm.wikipedia.org Romansh", # was Rhaeto-Romance
|
1055 | | - rmy=>"http://rmy.wikipedia.org Romani",
|
1056 | | - rn=>"http://rn.wikipedia.org Kirundi",
|
1057 | | - ro=>"http://ro.wikipedia.org Romanian",
|
1058 | | - roa_rup=>"http://roa-rup.wikipedia.org Aromanian",
|
1059 | | - "roa-rup"=>"http://roa-rup.wikipedia.org Aromanian",
|
1060 | | - roa_tara=>"http://roa-tara.wikipedia.org Tarantino",
|
1061 | | - "roa-tara"=>"http://roa-tara.wikipedia.org Tarantino",
|
1062 | | - ru=>"http://ru.wikipedia.org Russian",
|
1063 | | - ru_sib=>"http://ru-sib.wikipedia.org Siberian",
|
1064 | | - "ru-sib"=>"http://ru-sib.wikipedia.org Siberian",
|
1065 | | - rw=>"http://rw.wikipedia.org Kinyarwanda",
|
1066 | | - sa=>"http://sa.wikipedia.org Sanskrit",
|
1067 | | - sah=>"http://sah.wikipedia.org Sakha",
|
1068 | | - sc=>"http://sc.wikipedia.org Sardinian",
|
1069 | | - scn=>"http://scn.wikipedia.org Sicilian",
|
1070 | | - sco=>"http://sco.wikipedia.org Scots",
|
1071 | | - sd=>"http://sd.wikipedia.org Sindhi",
|
1072 | | - se=>"http://se.wikipedia.org Northern Sami",
|
1073 | | - sg=>"http://sg.wikipedia.org Sangro",
|
1074 | | - sh=>"http://sh.wikipedia.org Serbo-Croatian",
|
1075 | | - si=>"http://si.wikipedia.org Sinhala", # was Singhalese
|
1076 | | - simple=>"http://simple.wikipedia.org Simple English",
|
1077 | | - sk=>"http://sk.wikipedia.org Slovak",
|
1078 | | - sl=>"http://sl.wikipedia.org Slovene",
|
1079 | | - sm=>"http://sm.wikipedia.org Samoan",
|
1080 | | - sn=>"http://sn.wikipedia.org Shona",
|
1081 | | - so=>"http://so.wikipedia.org Somali", # was Somalian
|
1082 | | - sq=>"http://sq.wikipedia.org Albanian",
|
1083 | | - sr=>"http://sr.wikipedia.org Serbian",
|
1084 | | - srn=>"http://srn.wikipedia.org Sranan",
|
1085 | | - ss=>"http://ss.wikipedia.org Siswati",
|
1086 | | - st=>"http://st.wikipedia.org Sesotho",
|
1087 | | - stq=>"http://stq.wikipedia.org Saterland Frisian",
|
1088 | | - su=>"http://su.wikipedia.org Sundanese",
|
1089 | | - sv=>"http://sv.wikipedia.org Swedish",
|
1090 | | - sw=>"http://sw.wikipedia.org Swahili",
|
1091 | | - szl=>"http://szl.wikipedia.org Silesian",
|
1092 | | - ta=>"http://ta.wikipedia.org Tamil",
|
1093 | | - te=>"http://te.wikipedia.org Telugu",
|
1094 | | - test=>"http://test.wikipedia.org Test",
|
1095 | | - tet=>"http://tet.wikipedia.org Tetum",
|
1096 | | - tg=>"http://tg.wikipedia.org Tajik",
|
1097 | | - th=>"http://th.wikipedia.org Thai",
|
1098 | | - ti=>"http://ti.wikipedia.org Tigrinya",
|
1099 | | - tk=>"http://tk.wikipedia.org Turkmen",
|
1100 | | - tl=>"http://tl.wikipedia.org Tagalog",
|
1101 | | - tlh=>"http://tlh.wikipedia.org Klingon", # was Klignon
|
1102 | | - tn=>"http://tn.wikipedia.org Setswana",
|
1103 | | - to=>"http://to.wikipedia.org Tongan",
|
1104 | | - tokipona=>"http://tokipona.wikipedia.org Tokipona",
|
1105 | | - tpi=>"http://tpi.wikipedia.org Tok Pisin",
|
1106 | | - tr=>"http://tr.wikipedia.org Turkish",
|
1107 | | - ts=>"http://ts.wikipedia.org Tsonga",
|
1108 | | - tt=>"http://tt.wikipedia.org Tatar",
|
1109 | | - tum=>"http://tum.wikipedia.org Tumbuka",
|
1110 | | - turn=>"http://turn.wikipedia.org Turnbuka",
|
1111 | | - tw=>"http://tw.wikipedia.org Twi",
|
1112 | | - ty=>"http://ty.wikipedia.org Tahitian",
|
1113 | | - udm=>"http://udm.wikipedia.org Udmurt",
|
1114 | | - ug=>"http://ug.wikipedia.org Uighur",
|
1115 | | - uk=>"http://uk.wikipedia.org Ukrainian",
|
1116 | | - ur=>"http://ur.wikipedia.org Urdu",
|
1117 | | - uz=>"http://uz.wikipedia.org Uzbek",
|
1118 | | - ve=>"http://ve.wikipedia.org Venda", # was Lushaka
|
1119 | | - vec=>"http://vec.wikipedia.org Venetian",
|
1120 | | - vi=>"http://vi.wikipedia.org Vietnamese",
|
1121 | | - vls=>"http://vls.wikipedia.org West Flemish",
|
1122 | | - vo=>"http://vo.wikipedia.org Volapük",
|
1123 | | - wa=>"http://wa.wikipedia.org Walloon",
|
1124 | | - war=>"http://war.wikipedia.org Waray-Waray",
|
1125 | | - wo=>"http://wo.wikipedia.org Wolof",
|
1126 | | - wuu=>"http://wuu.wikipedia.org Wu",
|
1127 | | - xal=>"http://xal.wikipedia.org Kalmyk",
|
1128 | | - xh=>"http://xh.wikipedia.org Xhosa",
|
1129 | | - yi=>"http://yi.wikipedia.org Yiddish",
|
1130 | | - yo=>"http://yo.wikipedia.org Yoruba",
|
1131 | | - za=>"http://za.wikipedia.org Zhuang",
|
1132 | | - zea=>"http://zea.wikipedia.org Zealandic",
|
1133 | | - zh=>"http://zh.wikipedia.org Chinese",
|
1134 | | - zh_min_nan=>"http://zh-min-nan.wikipedia.org Min Nan",
|
1135 | | - "zh-min-nan"=>"http://zh-min-nan.wikipedia.org Min Nan",
|
1136 | | - zh_classical=>"http://zh-classical.wikipedia.org Classical Chinese",
|
1137 | | - "zh-classical"=>"http://zh-classical.wikipedia.org Classical Chinese",
|
1138 | | - zh_yue=>"http://zh-yue.wikipedia.org Cantonese",
|
1139 | | - "zh-yue"=>"http://zh-yue.wikipedia.org Cantonese",
|
1140 | | - zu=>"http://zu.wikipedia.org Zulu",
|
1141 | | - zz=>" All languages",
|
1142 | | - zzz=>" All languages except English"
|
1143 | | - );
|
1144 | | -
|
1145 | | - foreach $key (keys %wikipedias)
|
1146 | | - {
|
1147 | | - my $wikipedia = $wikipedias {$key} ;
|
1148 | | - $out_urls {$key} = $wikipedia ;
|
1149 | | - $out_languages {$key} = $wikipedia ;
|
1150 | | - $out_urls {$key} =~ s/(^[^\s]+).*$/$1/ ;
|
1151 | | - $out_languages {$key} =~ s/^[^\s]+\s+(.*)$/$1/ ;
|
1152 | | - $out_article {$key} = "http://en.wikipedia.org/wiki/" . $out_languages {$key} . "_language" ;
|
1153 | | - $out_article {$key} =~ s/ /_/g ;
|
1154 | | - $out_urls {$key} =~ s/(^[^\s]+).*$/$1/ ;
|
1155 | | - }
|
1156 | | -}
|
1157 | | -
|
1158 | | -# copied from WikiReports_EN.pl
|
1159 | | -sub InitReportNames
|
1160 | | -{
|
1161 | | - @out_report_descriptions = (
|
1162 | | - "Contributors",
|
1163 | | - "New editors",
|
1164 | | - "Active editors",
|
1165 | | - "Very active editors",
|
1166 | | - "Article count (official)",
|
1167 | | - "Article count (alternate)",
|
1168 | | - "New articles per day",
|
1169 | | - "Edits per article",
|
1170 | | - "Bytes per article",
|
1171 | | - "Articles over 0.5 Kb",
|
1172 | | - "Articles over 2 Kb",
|
1173 | | - "Edits per month",
|
1174 | | - "Database size",
|
1175 | | - "Words",
|
1176 | | - "Internal links",
|
1177 | | - "Links to other Wikipedias",
|
1178 | | - "Binaries",
|
1179 | | - "External links",
|
1180 | | - "Redirects",
|
1181 | | - "Page requests per day",
|
1182 | | - "Visits per day",
|
1183 | | - "Overview recent months"
|
1184 | | - ) ;
|
1185 | | -}
|
1186 | | -
|
| 2 | +#!/usr/local/bin/perl |
| 3 | + |
| 4 | + use lib "/home/ezachte/lib" ; |
| 5 | + use EzLib ; |
| 6 | + $trace_on_exit = $true ; |
| 7 | + ez_lib_version (2) ; |
| 8 | + |
| 9 | + $month_last = "12" ; |
| 10 | + $year_last = 2010 ; |
| 11 | + |
| 12 | + $month_start = "1" ; |
| 13 | + $year_start = 2008 ; |
| 14 | + |
| 15 | + $m_start = &months_since_2000_01 ($year_start, $month_start) ; |
| 16 | + $m_last = &months_since_2000_01 ($year_last, $month_last) ; |
| 17 | + $m_last_12 = $m_last - 12 ; |
| 18 | + $m_last_1 = $m_last - 1 ; |
| 19 | + |
| 20 | + $month_last = sprintf ("%02d", $month_last) ; # 1 -> 01 |
| 21 | + |
| 22 | + # set defaults mainly for tests on local machine |
| 23 | + default_argv "-i 'W:/# Out Bayes'|-o 'W:/@ Report Card/Data'" ; |
| 24 | + |
| 25 | + use Getopt::Std ; |
| 26 | + |
| 27 | +# $file_regions_UV = "Multi-Country Media Trend, UVs by region (July 2008 - September 2009)_27290.csv" ; |
| 28 | +# $file_regions_Reach = "Multi-Country Media Trend, % reach by region (July 2008 - September 2009)_10786.csv" ; |
| 29 | + |
| 30 | + $maxpopularwikis = 25 ; |
| 31 | + @projects = ('wb','wk','wn','wp','wq','ws','wv','wx','commons','*') ; |
| 32 | + |
| 33 | + &LogArguments ; |
| 34 | + &ParseArguments ; |
| 35 | + &InitProjectNames ; |
| 36 | + &InitReportNames ; |
| 37 | + &ReadStatisticsMonthly ; |
| 38 | + &WriteMonthlyData ; |
| 39 | + exit ; |
| 40 | + |
| 41 | +sub LogArguments |
| 42 | +{ |
| 43 | + my $arguments ; |
| 44 | + getopt ("iolpft", \%options) ; |
| 45 | + foreach $arg (sort keys %options) |
| 46 | + { $arguments .= " -$arg " . $options {$arg} . "\n" ; } |
| 47 | + print ("\nArguments\n$arguments\n") ; |
| 48 | +# &Log ("\nArguments\n$arguments\n") ; |
| 49 | +} |
| 50 | + |
| 51 | +sub ParseArguments |
| 52 | +{ |
| 53 | +# my @options ; |
| 54 | +# getopt ("io", \%options) ; |
| 55 | + |
| 56 | +# die ("Specify input folder for projectcounts files as: -i path") if (! defined ($options {"i"})) ; |
| 57 | +# die ("Specify output folder as: -o path'") if (! defined ($options {"o"})) ; |
| 58 | + |
| 59 | +# $path_in = $options {"i"} ; |
| 60 | +# $path_out = $options {"o"} ; |
| 61 | + |
| 62 | +# die "Input folder '$path_in' does not exist" if (! -d $path_in) ; |
| 63 | +# die "Output folder '$path_out' does not exist" if (! -d $path_out) ; |
| 64 | + |
| 65 | + $path_in = "w:/# out bayes" ; |
| 66 | + $path_out = "w:/@ report card/data" ; |
| 67 | + |
| 68 | + print "Input folder: $path_in\n" ; |
| 69 | + print "Output folder: $path_out\n" ; |
| 70 | + print "\n" ; |
| 71 | + |
| 72 | + $file_csv_out = "$path_out/StatisticsMonthly_${year_last}_${month_last}.csv" ; |
| 73 | + |
| 74 | + &SetComparisonPeriods ; |
| 75 | +} |
| 76 | + |
| 77 | +sub ReadStatisticsMonthly |
| 78 | +{ |
| 79 | + &ReadStatisticsMonthlyForProject ("wb") ; |
| 80 | + &ReadStatisticsMonthlyForProject ("wk") ; |
| 81 | + &ReadStatisticsMonthlyForProject ("wn") ; |
| 82 | + &ReadStatisticsMonthlyForProject ("wp") ; |
| 83 | + &ReadStatisticsMonthlyForProject ("wq") ; |
| 84 | + &ReadStatisticsMonthlyForProject ("ws") ; |
| 85 | + &ReadStatisticsMonthlyForProject ("wv") ; |
| 86 | + &ReadStatisticsMonthlyForProject ("wx") ; |
| 87 | + |
| 88 | + &ReadStatisticsPerBinariesExtensionCommons ; |
| 89 | +} |
| 90 | + |
| 91 | +sub ReadStatisticsMonthlyForProject |
| 92 | +{ |
| 93 | + my $project = shift; |
| 94 | + |
| 95 | + $all_projects = "*" ; |
| 96 | + |
| 97 | + my $file_csv_in_1 = "$path_in/csv_$project/StatisticsMonthly.csv" ; |
| 98 | + my $file_csv_in_2 = "$path_in/csv_$project/StatisticsUserActivitySpread.csv" ; |
| 99 | + |
| 100 | + if (! -e $file_csv_in_1) |
| 101 | + { &Abort ("Input file '$file_csv_in_1' not found") ; } |
| 102 | + if (! -e $file_csv_in_2) |
| 103 | + { &Abort ("Input file '$file_csv_in_2' not found") ; } |
| 104 | + |
| 105 | + print "Read '$file_csv_in_1'\n" ; |
| 106 | + open CSV_IN, '<', $file_csv_in_1 ; |
| 107 | + |
| 108 | + undef %lines ; |
| 109 | + while ($line = <CSV_IN>) |
| 110 | + { |
| 111 | + ($language,$date,$counts) = split (',', $line, 3) ; |
| 112 | + |
| 113 | + next if $language eq 'commons' and $project ne 'wx' ; |
| 114 | + next if $language eq 'sr' and $project eq 'wn' ; # ignore insane bot spam on |
| 115 | + |
| 116 | + ($month,$day,$year) = split ('\/', $date) ; |
| 117 | + my $m = &months_since_2000_01 ($year,$month) ; |
| 118 | + next if $m < $m_start ; |
| 119 | + |
| 120 | + $lines {$language}{$m} = $line ; |
| 121 | + $languages {$language}++ ; |
| 122 | + } |
| 123 | + |
| 124 | + foreach $language (sort keys %languages) |
| 125 | + { |
| 126 | + for ($m = $m_start + 1 ; $m <= $m_last ; $m++) |
| 127 | + { |
| 128 | + if ($lines {$language}{$m} eq '') |
| 129 | + { $lines {$language}{$m} = $lines {$language}{$m -1} ; } |
| 130 | + } |
| 131 | + |
| 132 | + for ($m = $m_start ; $m <= $m_last ; $m++) |
| 133 | + { |
| 134 | + $line = $lines {$language}{$m} ; |
| 135 | + chomp $line ; |
| 136 | + ($language,$date,$counts) = split (',', $line, 3) ; |
| 137 | + @fields = split (',', $counts) ; |
| 138 | + |
| 139 | + if ($project eq "wp") |
| 140 | + { |
| 141 | + foreach $f (1,4,6,11) # new editors, articles, new articles, edits |
| 142 | + { |
| 143 | + $values {"$f,$m"} {"$project,$language"} = $fields [$f] ; |
| 144 | + |
| 145 | + $totals {"$f,$m"} += $fields [$f] ; |
| 146 | + |
| 147 | + $totals_project {"$f,$m"} {$project} += $fields [$f] ; |
| 148 | + $totals_project {"$f,$m"} {$all_projects} += $fields [$f] ; |
| 149 | + |
| 150 | + # print "TOTALS $f $m = . " . $totals {"$f,$m"} . "\n" ; |
| 151 | + } |
| 152 | + } |
| 153 | + else |
| 154 | + { |
| 155 | + foreach $f (1,4) |
| 156 | + { |
| 157 | + if ($f <= 3) |
| 158 | + { |
| 159 | + $values {"$f,$m"} {"$project,$language"} = $fields [$f] ; |
| 160 | + $totals {"$f,$m"} += $fields [$f] ; |
| 161 | + } |
| 162 | + |
| 163 | + |
| 164 | + # ignore editor count on commons for totals, most editors are already counted for other project |
| 165 | + # (even for several projects, to be tuned after centralauth dump is available) |
| 166 | + # count for all_projects only Wikipedia articles |
| 167 | + if (($f <= 3) && ($language ne 'commons')) # 0 = Contributors, 1 = New Wikimedians, 2 = Active Editors (5+ edits), 3 = Very Active Editors (100+ edits), |
| 168 | + { $totals_project {"$f,$m"} {$all_projects} += $fields [$f] ; } |
| 169 | + |
| 170 | + if ($language eq 'commons') |
| 171 | + { $totals_project {"$f,$m"} {'commons'} += $fields [$f] ; } |
| 172 | + else |
| 173 | + { $totals_project {"$f,$m"} {$project} += $fields [$f] ; } |
| 174 | + |
| 175 | + # print "TOTALS $f $m = . " . $totals {"$f,$m"} . "\n" ; |
| 176 | + } |
| 177 | + foreach $f (6,11) |
| 178 | + { |
| 179 | + $totals_project {"$f,$m"} {$all_projects} += $fields [$f] ; |
| 180 | + if ($language eq 'commons') |
| 181 | + { $totals_project {"$f,$m"} {'commons'} += $fields [$f] ; } |
| 182 | + else |
| 183 | + { $totals_project {"$f,$m"} {$project} += $fields [$f] ; } |
| 184 | + # print "TOTALS $f $m = . " . $totals {"$f,$m"} . "\n" ; |
| 185 | + } |
| 186 | + |
| 187 | + } |
| 188 | + } |
| 189 | + } |
| 190 | + close CSV_IN ; |
| 191 | + |
| 192 | + # now read (very) active editors from newer more accurate file (split data for reg users and bots, unlike StatisticsMonthly.csv) |
| 193 | + # but use f = column count in StatisticsMonthly.csv |
| 194 | + |
| 195 | + print "Read '$file_csv_in_2'\n" ; |
| 196 | + open CSV_IN, '<', $file_csv_in_2 ; |
| 197 | + |
| 198 | + undef %lines ; |
| 199 | + while ($line = <CSV_IN>) |
| 200 | + { |
| 201 | + chomp $line ; |
| 202 | + ($language,$date,$reguser_bot,$group,$counts) = split (',', $line, 5) ; |
| 203 | + |
| 204 | + next if $language eq 'commons' and $project ne 'wx' ; # commons also in wikipedia csv files (bug, hard to cleanup, just skip) |
| 205 | + # next if $language eq 'commons' ; # ignore editor count on commons alltogether, most are already counted for other project |
| 206 | + # (even for several projects, to be tuned after centralauth dump is available) |
| 207 | + |
| 208 | + if ($reguser_bot ne "R") { next ; } # R: reg user, B: bot |
| 209 | + if ($group ne "A") { next ; } # A: articles, T: talk pages, O: other namespaces |
| 210 | + |
| 211 | + ($month,$day,$year) = split ('\/', $date) ; |
| 212 | + my $m = &months_since_2000_01 ($year,$month) ; |
| 213 | + next if $m < $m_start ; |
| 214 | + |
| 215 | + $lines {$language}{$m} = $line ; |
| 216 | + $languages {$language}++ ; |
| 217 | + } |
| 218 | + |
| 219 | + foreach $language (sort keys %languages) |
| 220 | + { |
| 221 | + for ($m = $m_start+1 ; $m <= $m_last ; $m++) |
| 222 | + { |
| 223 | + if ($lines {$language}{$m} eq '') |
| 224 | + { $lines {$language}{$m} = $lines {$language}{$m -1} ; } |
| 225 | + } |
| 226 | + |
| 227 | + for ($m = $m_start ; $m <= $m_last ; $m++) |
| 228 | + { |
| 229 | + $line = $lines {$language}{$m} ; |
| 230 | + chomp $line ; |
| 231 | + ($language,$date,$reguser_bot,$group,$counts) = split (',', $line, 5) ; |
| 232 | + @fields = split (',', $counts) ; |
| 233 | + |
| 234 | + foreach $f (2,3) # editors_gt_5, editors_gt_100 |
| 235 | + { |
| 236 | + # count user with over x edits |
| 237 | + # threshold starting with a 3 are 10xSQRT(10), 100xSQRT(10), 1000xSQRT(10), etc |
| 238 | + # thresholds = 1,3,5,10,25,32,50,100,etc |
| 239 | + if ($f == 2) { $f2 = 2 ; } |
| 240 | + if ($f == 3) { $f2 = 7 ; } |
| 241 | + |
| 242 | + $values {"$f,$m"} {"$project,$language"} = $fields [$f2] ; |
| 243 | + $totals {"$f,$m"} += $fields [$f2] ; |
| 244 | + |
| 245 | + # ignore editor count on commons for totals, most editors are already counted for other project |
| 246 | + # (even for several projects, to be tuned after centralauth dump is available) |
| 247 | + if (($f <= 3) && ($language ne 'commons')) # 0 = Contributors, 1 = New Wikimedians, 2 = Active Editors (5+ edits), 3 = Very Active Editors (100+ edits), |
| 248 | + { $totals_project {"$f,$m"} {$all_projects} += $fields [$f2] ; } |
| 249 | + |
| 250 | + if ($language eq 'commons') |
| 251 | + { $totals_project {"$f,$m"} {'commons'} += $fields [$f2] ; } |
| 252 | + else |
| 253 | + { $totals_project {"$f,$m"} {$project} += $fields [$f2] ; } |
| 254 | + } |
| 255 | + } |
| 256 | + } |
| 257 | + close CSV_IN ; |
| 258 | +} |
| 259 | + |
| 260 | +sub ReadStatisticsPerBinariesExtensionCommons |
| 261 | +{ |
| 262 | + my $file_csv_in = "$path_in/csv_wx/StatisticsPerBinariesExtension.csv" ; |
| 263 | + my $mmax = -1 ; |
| 264 | + |
| 265 | + if (! -e $file_csv_in) |
| 266 | + { &Abort ("Input file '$file_csv_in' not found") ; } |
| 267 | + |
| 268 | + print "Read '$file_csv_in'\n" ; |
| 269 | + open CSV_IN, '<', $file_csv_in ; |
| 270 | + while ($line = <CSV_IN>) |
| 271 | + { |
| 272 | + chomp $line ; |
| 273 | + ($language,$date,$counts) = split (',', $line, 3) ; |
| 274 | + |
| 275 | + if ($language ne "commons") { next ; } |
| 276 | + |
| 277 | + if ($date eq "00/0000") |
| 278 | + { |
| 279 | + @fields = split (',', $counts) ; |
| 280 | + $field_ndx = 0 ; |
| 281 | + foreach $field (@fields) |
| 282 | + { |
| 283 | + $ext_cnt {-1}{$field_ndx} = $field ; |
| 284 | + # print "EXT_CNT $field_ndx : $field\n" ; |
| 285 | + $field_ndx ++ ; |
| 286 | + } |
| 287 | + next ; |
| 288 | + } |
| 289 | + |
| 290 | + ($month,$year) = split ('\/', $date) ; |
| 291 | + my $m = &months_since_2000_01 ($year,$month) ; |
| 292 | + next if $m < $m_start ; |
| 293 | + |
| 294 | + if ($m > $mmax) |
| 295 | + { $mmax = $m ; } |
| 296 | + |
| 297 | + @fields = split (',', $counts) ; |
| 298 | + $field_ndx = 0 ; |
| 299 | + foreach $field (@fields) |
| 300 | + { |
| 301 | + $ext_cnt {$m}{$field_ndx} = $field ; |
| 302 | + $ext_tot {$m} += $field ; |
| 303 | + $field_ndx ++ ; |
| 304 | + } |
| 305 | + } |
| 306 | + close CSV_IN ; |
| 307 | + |
| 308 | + %ext_cnt_mmax = %{$ext_cnt {$mmax}} ; |
| 309 | + @ext_cnt_mmax = (sort {$ext_cnt_mmax {$b} <=> $ext_cnt_mmax {$a}} keys %ext_cnt_mmax) ; |
| 310 | + |
| 311 | + $extcnt = 0 ; |
| 312 | + foreach $extndx (@ext_cnt_mmax) |
| 313 | + { |
| 314 | + # print "$extndx < ${ext_cnt {-1}{$extndx}} > : ${ext_cnt_mmax {$extndx}}\n" ; |
| 315 | + push @extndxs, $extndx ; |
| 316 | + if ($extcnt++ >= 9) { last ; } |
| 317 | + } |
| 318 | +} |
| 319 | + |
| 320 | +sub ReadMediaTrends |
| 321 | +{ |
| 322 | +# open FILE_UV, '<', $file_regions_UV ; |
| 323 | +# close FILE-UV ; |
| 324 | + |
| 325 | +# open FILE_REACH, '<', $file_regions_Reach ; |
| 326 | +# close FILE_REACH ; |
| 327 | +} |
| 328 | + |
| 329 | +sub WriteMonthlyData |
| 330 | +{ |
| 331 | + print "Write file '$file_csv_out'\n" ; |
| 332 | + open CSV_OUT, '>', $file_csv_out ; |
| 333 | + $output = "" ; |
| 334 | + foreach $f (1,2,3,4,6,11) # new editors, editors_gt_5, editors_gt_100, articles, new articles, edits |
| 335 | + { |
| 336 | + |
| 337 | + $output .= "\n,${out_report_descriptions [$f]} - Absolute - Per Wiki\n" ; |
| 338 | + $output .= "$csv_recent_months,%inc year, %inc month\n" ; |
| 339 | + |
| 340 | + $line = ",Total," ; |
| 341 | + for ($m = $m_start ; $m <= $m_last ; $m++) |
| 342 | + { $line .= $totals {"$f,$m"} . "," ; } |
| 343 | + |
| 344 | + # growth in one year |
| 345 | + if ($totals {"$f,$m_last_12"} != 0) |
| 346 | + { $line .= sprintf ("%.1f", 100 * ($totals {"$f,$m_last"} / $totals {"$f,$m_last_12"}) - 100). "%," ; } |
| 347 | + else |
| 348 | + { $line .= "n.a.," ; } |
| 349 | + |
| 350 | + # growth in one month |
| 351 | + if ($totals {"$f,$m_last_1"} != 0) |
| 352 | + { $line .= sprintf ("%.1f", 100 * ($totals {"$f,$m_last"} / $totals {"$f,$m_last_1"}) - 100). "%," ; } |
| 353 | + else |
| 354 | + { $line .= "n.a.," ; } |
| 355 | + |
| 356 | + $line =~ s/,$// ; |
| 357 | + $output .= "$line\n" ; |
| 358 | + |
| 359 | + # sort by absolute amount for last month |
| 360 | + %values_f_12 = %{$values {"$f,$m_last"}} ; |
| 361 | + $index = 1 ; |
| 362 | + foreach $key (sort {$values_f_12 {$b} <=> $values_f_12 {$a}} keys %values_f_12) |
| 363 | + { |
| 364 | + ($project,$language) = split (",", $key) ; |
| 365 | + $language_name = $out_languages {$language} ; |
| 366 | + if (($project ne "wp") && ($project ne "wx")) |
| 367 | + { $line = "$index,$language_name " . &GetProjectName ($project) . "," ; } |
| 368 | + else |
| 369 | + { $line = "$index,$language_name," ; } |
| 370 | + |
| 371 | + for ($m = $m_start ; $m <= $m_last ; $m++) |
| 372 | + { $line .= $values {"$f,$m"} {$key} . "," ; } |
| 373 | + |
| 374 | + if ($values {"$f,$m_last_12"} {$key} != 0) |
| 375 | + { $line .= sprintf ("%.1f", 100 * ($values {"$f,$m_last"} {$key} / $values {"$f,$m_last_12"} {$key}) - 100). "%," ; } |
| 376 | + else |
| 377 | + { $line .= "n.a.," ; } |
| 378 | + |
| 379 | + if ($values {"$f,$m_last_1"} {$key} != 0) |
| 380 | + { $line .= sprintf ("%.1f", 100 * ($values {"$f,$m_last"} {$key} / $values {"$f,$m_last_1"} {$key}) - 100). "%," ; } |
| 381 | + else |
| 382 | + { $line .= "n.a.," ; } |
| 383 | + |
| 384 | + $line =~ s/,$// ; |
| 385 | + $output .= "$line\n" ; |
| 386 | + |
| 387 | + if ($index++ >= 25) { last ; } |
| 388 | + } |
| 389 | + |
| 390 | + $output .= "\n,${out_report_descriptions [$f]} - Absolute - Per Project\n" ; |
| 391 | + if ($f <= 3) # 0 = Contributors, 1 = New Wikimedians, 2 = Active Editors (5+ edits), 3 = Very Active Editors (100+ edits), |
| 392 | + { $output .= ",Note: All projects does not include Commons\n" ; } |
| 393 | + $output .= "$csv_recent_months,%inc year, %inc month\n" ; |
| 394 | + foreach $project (sort {$totals_project {"$f,$m_last"} {$b} <=> $totals_project {"$f,$m_last"} {$a}} @projects) |
| 395 | + { |
| 396 | +# next if $project eq 'commons' and ($f ==2 or $f == 3) ; # (very) active editors no longer counted for commons |
| 397 | + |
| 398 | + if ($project eq 'commons') |
| 399 | + { $line = ",Commons," ; } |
| 400 | + else |
| 401 | + { $line = "," . &GetProjectName ($project) . "," ; } |
| 402 | + |
| 403 | + for ($m = $m_start ; $m <= $m_last ; $m++) |
| 404 | + { $line .= $totals_project {"$f,$m"} {$project} . "," ; } |
| 405 | + |
| 406 | + if ($totals_project {"$f,$m_last_12"} {$project} != 0) |
| 407 | + { $line .= sprintf ("%.1f", 100 * ($totals_project {"$f,$m_last"} {$project} / $totals_project {"$f,$m_last_12"} {$project}) - 100). "%," ; } |
| 408 | + else |
| 409 | + { $line .= "n.a.," ; } |
| 410 | + |
| 411 | + if ($totals_project {"$f,$m_last_1"} {$project} != 0) |
| 412 | + { $line .= sprintf ("%.1f", 100 * ($totals_project {"$f,$m_last"} {$project} / $totals_project {"$f,$m_last_1"} {$project}) - 100). "%," ; } |
| 413 | + else |
| 414 | + { $line .= "n.a.," ; } |
| 415 | + |
| 416 | + $line =~ s/,$// ; |
| 417 | + $output .= "$line\n" ; |
| 418 | + } |
| 419 | + |
| 420 | + $output .= "\n,${out_report_descriptions [$f]} - Indexed - Per Wiki\n" ; |
| 421 | + $output .= "$csv_recent_months\n" ; |
| 422 | + |
| 423 | + # sort by absolute amount for last month |
| 424 | + $index = 1 ; |
| 425 | + foreach $key (sort {$values_f_12 {$b} <=> $values_f_12 {$a}} keys %values_f_12) |
| 426 | + { |
| 427 | + # print "$index $f: $key -> ${values_f_12 {$key}}\n" ; |
| 428 | + |
| 429 | + ($project,$language) = split (",", $key) ; |
| 430 | + $language_name = $out_languages {$language} ; |
| 431 | + if (($project ne "wp") && ($project ne "wx")) |
| 432 | + { $line = "$index,$language_name " . &GetProjectName ($project) . "," ; } |
| 433 | + else |
| 434 | + { $line = "$index,$language_name," ; } |
| 435 | + |
| 436 | + # $value_100 = $values {"$f,$m_last_12"} {$key} ; |
| 437 | + $value_100 = $values {"$f,$m_start"} {$key} ; |
| 438 | + for ($m = $m_start ; $m <= $m_last ; $m++) |
| 439 | + { |
| 440 | + if ($value_100 != 0) |
| 441 | + { $line .= sprintf ("%.1f", 100 * ($values {"$f,$m"} {$key} / $value_100)) . "," ; } |
| 442 | + else |
| 443 | + { $line .= "," ; } |
| 444 | + } |
| 445 | + $line =~ s/,$// ; |
| 446 | + $output .= "$line\n" ; |
| 447 | + |
| 448 | + # put totals last in chart to show line on top of others |
| 449 | + if ($index == 9) |
| 450 | + { |
| 451 | + $line = ",Total," ; |
| 452 | + $total_100 = $totals {"$f,$m_last_12"} ; |
| 453 | + for ($m = $m_start ; $m <= $m_last ; $m++) |
| 454 | + { |
| 455 | + if ($total_100 != 0) |
| 456 | + { $line .= sprintf ("%.1f", 100 * ($totals {"$f,$m"} / $total_100)) . "," ; } |
| 457 | + else |
| 458 | + { $line .= "," ; } |
| 459 | + } |
| 460 | + $line .= ",(sorted here to make it top-most line out of 10 in Excel)" ; |
| 461 | + $output .= "$line\n" ; |
| 462 | + } |
| 463 | + |
| 464 | + if ($index++ >= 25) { last ; } |
| 465 | + } |
| 466 | + |
| 467 | + $output .= "\n,${out_report_descriptions [$f]} - Indexed - Per Project\n" ; |
| 468 | + $output .= "$csv_recent_months,%inc year, %inc month\n" ; |
| 469 | + foreach $project (sort {$totals_project {"$f,$m_last"} {$b} <=> $totals_project {"$f,$m_last"} {$a}} @projects) |
| 470 | + { |
| 471 | +# next if $project eq 'commons' and ($f ==2 or $f == 3) ; # (very) active editors no longer counted for commons |
| 472 | + |
| 473 | + if ($project eq 'commons') |
| 474 | + { $line = ",Commons," ; } |
| 475 | + else |
| 476 | + { $line = "," . &GetProjectName ($project) . "," ; } |
| 477 | + |
| 478 | + # $value_100 = $totals_project {"$f,$m_last_12"} {$project} ; |
| 479 | + $value_100 = $totals_project {"$f,$m_start"} {$project} ; |
| 480 | + for ($m = $m_start ; $m <= $m_last ; $m++) |
| 481 | + { |
| 482 | + if ($value_100 != 0) |
| 483 | + { $line .= sprintf ("%.1f", 100 * ($totals_project {"$f,$m"} {$project} / $value_100)) . "," ; } |
| 484 | + else |
| 485 | + { $line .= "," ; } |
| 486 | + } |
| 487 | + $line =~ s/,$// ; |
| 488 | + $output .= "$line\n" ; |
| 489 | + } |
| 490 | + $output .= "\n," . '=' x 150 . "\n" ; |
| 491 | + } |
| 492 | + |
| 493 | + print CSV_OUT $output ; |
| 494 | + |
| 495 | + $output = "\n,Binaries per month - Absolute\n" ; |
| 496 | + $output .= "$csv_recent_months,%inc year, %inc month\n" ; |
| 497 | + $output .= "\n$csv_recent_months,%inc year,%inc month\n" ; |
| 498 | + |
| 499 | + $line = ",Total," ; |
| 500 | + for ($m = $m_start ; $m <= $m_last ; $m++) |
| 501 | + { $line .= $ext_tot {$m} . "," ; } |
| 502 | + |
| 503 | + if ($ext_tot {$m_last_12} != 0) |
| 504 | + { $line .= sprintf ("%.1f", 100 * ($ext_tot {$m_last} / $ext_tot {$m_last_12}) - 100). "%," ; } |
| 505 | + else |
| 506 | + { $line .= "n.a.," ; } |
| 507 | + |
| 508 | + if ($ext_tot {$m_last_1} != 0) |
| 509 | + { $line .= sprintf ("%.1f", 100 * ($ext_tot {$m_last} / $ext_tot {$m_last_1}) - 100). "%," ; } |
| 510 | + else |
| 511 | + { $line .= "n.a.," ; } |
| 512 | + |
| 513 | + $line =~ s/,$// ; |
| 514 | + $output .= "$line\n" ; |
| 515 | + |
| 516 | + $index = 0 ; |
| 517 | + # feed the 10 extensions with most pages, largest one last (comes on top in Excel chart) |
| 518 | + for ($e = $#extndxs - 9 ; $e <= $#extndxs ; $e++) |
| 519 | + { |
| 520 | + $index++ ; |
| 521 | + |
| 522 | + if ($e < 0) |
| 523 | + { |
| 524 | + $line = "$index,xxx," ; |
| 525 | + for ($m = $m_start ; $m <= $m_last ; $m++) |
| 526 | + { $line .= "," ; } |
| 527 | + } |
| 528 | + else |
| 529 | + { |
| 530 | + $extndx = $extndxs [$e] ; |
| 531 | + $line = "$index,${ext_cnt {-1}{$extndx}}," ; |
| 532 | + |
| 533 | + for ($m = $m_start ; $m <= $m_last ; $m++) |
| 534 | + { $line .= $ext_cnt {$m}{$extndx} . "," ; } |
| 535 | + |
| 536 | + if ($ext_cnt {$m_last_12}{$extndx} != 0) |
| 537 | + { $line .= sprintf ("%.1f", 100 * ($ext_cnt {$m_last}{$extndx} / $ext_cnt {$m_last_12}{$extndx}) - 100). "%," ; } |
| 538 | + else |
| 539 | + { $line .= "n.a.," ; } |
| 540 | + |
| 541 | + if ($ext_cnt {$m_last_1}{$extndx} != 0) |
| 542 | + { $line .= sprintf ("%.1f", 100 * ($ext_cnt {$m_last}{$extndx} / $ext_cnt {$m_last_1}{$extndx}) - 100). "%," ; } |
| 543 | + else |
| 544 | + { $line .= "n.a.," ; } |
| 545 | + } |
| 546 | + |
| 547 | + $line =~ s/,$// ; |
| 548 | + $output .= "$line\n" ; |
| 549 | + } |
| 550 | + |
| 551 | + print CSV_OUT $output ; |
| 552 | + |
| 553 | + $output = "\n,Binaries per month - Indexed\n" ; |
| 554 | + $output .= "$csv_recent_months\n" ; |
| 555 | + |
| 556 | + $index = 0 ; |
| 557 | + # feed the 10 extensions with most pages, largest one last (comes on top in Excel chart) |
| 558 | + for ($e = $#extndxs - 9 ; $e <= $#extndxs ; $e++) |
| 559 | + { |
| 560 | + $index++ ; |
| 561 | + |
| 562 | + if ($e < 0) |
| 563 | + { |
| 564 | + $line = "$index,xxx," ; |
| 565 | + for ($m = $m_start ; $m <= $m_last ; $m++) |
| 566 | + { $line .= "," ; } |
| 567 | + } |
| 568 | + else |
| 569 | + { |
| 570 | + $extndx = $extndxs [$e] ; |
| 571 | + $line = "$index,${ext_cnt {-1}{$extndx}}," ; |
| 572 | + $ext_cnt_m0 = $ext_cnt {$m_last-12}{$extndx} ; |
| 573 | + # $ext_cnt_m0 = $ext_cnt {$m_start}{$extndx} ; |
| 574 | + for ($m = $m_start ; $m <= $m_last ; $m++) |
| 575 | + { |
| 576 | + if ($ext_cnt_m0 > 0) |
| 577 | + { $line .= sprintf ("%.1f", 100 * ($ext_cnt {$m}{$extndx} / $ext_cnt_m0)). "," ; } |
| 578 | + else |
| 579 | + { $line .= "," ; } |
| 580 | + } |
| 581 | + } |
| 582 | + |
| 583 | + $line =~ s/,$// ; |
| 584 | + $output .= "$line\n" ; |
| 585 | + } |
| 586 | + print CSV_OUT $output ; |
| 587 | + close CSV_OUT ; |
| 588 | + |
| 589 | + print "\nOutput written to $file_csv_out\n\n" ; |
| 590 | +} |
| 591 | + |
| 592 | +sub SetComparisonPeriods |
| 593 | +{ |
| 594 | + my @months = qw(Xxx Jan Feb Mar Apr May Jun Jul Aug Sept Oct Nov Dec) ; |
| 595 | + |
| 596 | + my ($file_year_month_last, $year_month_last, $year_month_last_minus_12, $year_month_last_minus_1) ; |
| 597 | + |
| 598 | + $year_month_last = sprintf ("%04d/%02d",$year_last, $month_last) ; # for filenames |
| 599 | + $file_year_month_last = sprintf ("%04d_%02d",$year_last, $month_last) ; # for filenames |
| 600 | + $year_month_last_minus_12 = sprintf ("%04d/%02d",$year_last - 1,$month_last) ; |
| 601 | + $year_month_last_minus_1 = $month_last > 1 ? sprintf ("%04d/%02d",$year_last,$month_last-1): sprintf ("%04d/%02d",$year_last - 1 ,12) ; |
| 602 | + |
| 603 | + print "\nWrite trend data up till $year_month_last\n\n" ; |
| 604 | + print "Compare with previous month: $year_month_last_minus_1, previous year: $year_month_last_minus_12\n\n" ; |
| 605 | + |
| 606 | + $csv_recent_months = ",project," ; |
| 607 | + $year = $year_start ; |
| 608 | + $month = $month_start ; |
| 609 | + for ($m = $m_start ; $m <= $m_last ; $m++) |
| 610 | + { |
| 611 | + $recent_months [$m] = sprintf ("%04d/%02d", $year, $month) ; |
| 612 | + $csv_recent_months .= sprintf ("%02d/%04d", $month, $year) . "," ; |
| 613 | + ($year,$month) = $month < 12 ? ($year,$month+1) : ($year+1,1) ; |
| 614 | + } |
| 615 | + $csv_recent_months =~ s/,$// ; |
| 616 | +} |
| 617 | + |
| 618 | +#sub WriteCsvFilesPerPeriod |
| 619 | +#{ |
| 620 | +# foreach $period (sort keys %totals) |
| 621 | +# { |
| 622 | +# &LogT ("\nWrite totals per $period: ") ; |
| 623 | +# $desc = $descriptions {$period} ; |
| 624 | + |
| 625 | +# foreach $project (sort keys %{$totals {$period}}) |
| 626 | +# { |
| 627 | +# &Log ("$project ") ; |
| 628 | + |
| 629 | +# $dir_out = "$path_out/csv_$project" ; |
| 630 | +# if (! -d $dir_out) |
| 631 | +# { mkdir $dir_out, 0777 ; } |
| 632 | + |
| 633 | +# $file_out = "$dir_out/$desc.csv" ; |
| 634 | + |
| 635 | +# open CSV, ">", $file_out ; |
| 636 | +# foreach $key (sort {$a cmp $b} keys %{$totals {$period}{$project}}) |
| 637 | +# { |
| 638 | +# ($language,$yearmonth) = split (",", $key) ; |
| 639 | +# # print "PERIOD $period PROJECT $project KEY $key\n" ; |
| 640 | +# if ($period eq "month") |
| 641 | +# { print CSV "$language," . $date_high {"$yearmonth"} . "," . $totals{$period}{$project}{$key} . "\n" ; } |
| 642 | +# else |
| 643 | +# { print CSV "$key," . $totals{$period}{$project}{$key} . "\n" ; } |
| 644 | +# } |
| 645 | +# close CSV ; |
| 646 | +# } |
| 647 | +# } |
| 648 | +#} |
| 649 | + |
| 650 | +#sub WriteCsvHtmlFilesPopularWikis |
| 651 | +#{ |
| 652 | +# @totals_lastmonth = sort {$totals_lastmonth {$b} <=> $totals_lastmonth {$a}} keys %totals_lastmonth ; |
| 653 | + |
| 654 | +# $dir_out = "$path_out/csv_wp" ; |
| 655 | +# $file_out = "$dir_out/PageViewsPerMonthPopularWikis_$file_year_month_last.csv" ; |
| 656 | + |
| 657 | +## extend with normalized counts |
| 658 | +## see manually created PageViewsPerMonthTop25PlusNormalizedTo100.csv |
| 659 | + |
| 660 | +# open CSV, ">", $file_out ; |
| 661 | +# print CSV $csv_recent_months ; |
| 662 | + |
| 663 | +# # write per popular language+wiki 13 months of page view totals |
| 664 | +# $lines = 0 ; |
| 665 | +# foreach $line (@totals_lastmonth) |
| 666 | +# { |
| 667 | +# if (++$lines > $maxpopularwikis) { last ; } |
| 668 | + |
| 669 | +# ($project, $language) = split (',', $line) ; |
| 670 | +# $largest_projects {"$project-$language"} ++ ; |
| 671 | + |
| 672 | +# $language_name = $out_languages {$language} ; |
| 673 | + |
| 674 | +# if (($project ne "wp") && ($project ne "wx")) |
| 675 | +# { print CSV "$language_name " . &GetProjectName ($project) . "," ; } |
| 676 | +# else |
| 677 | +# { print CSV "$language_name," ; } |
| 678 | + |
| 679 | +## %test = %{$totals {"month"} {"wp"} }; |
| 680 | +## %test2 = @recent_months ; |
| 681 | +# for ($m = 0 ; $m <= 12 ; $m++) |
| 682 | +# { print CSV $totals {"month"} {$project} {"$language,${recent_months [$m]}"} . "," ; } |
| 683 | +# print CSV "\n" ; |
| 684 | +# } |
| 685 | + |
| 686 | +# print CSV "\n$csv_recent_months" ; |
| 687 | + |
| 688 | +# # write per popular language+wiki 13 months of page view totals, normalized to first month = 100 |
| 689 | +# $lines = 0 ; |
| 690 | +# foreach $line (@totals_lastmonth) |
| 691 | +# { |
| 692 | +# if (++$lines > $maxpopularwikis) { last ; } |
| 693 | + |
| 694 | +# ($project, $language) = split (',', $line) ; |
| 695 | +# $language_name = $out_languages {$language} ; |
| 696 | + |
| 697 | +# if (($project ne "wp") && ($project ne "wx")) |
| 698 | +# { print CSV "$language_name " . &GetProjectName ($project) . "," ; } |
| 699 | +# else |
| 700 | +# { print CSV "$language_name," ; } |
| 701 | + |
| 702 | +# $recent_month_0 = $totals {"month"} {$project} {"$language,${recent_months [ 0]}"} ; |
| 703 | +# for ($m = 0 ; $m <= 12 ; $m++) |
| 704 | +# { |
| 705 | +# if ($recent_month_0 > 0) |
| 706 | +# { print CSV sprintf ("%.2f", 100 * $totals {"month"} {$project} {"$language,${recent_months [$m]}"} / $recent_month_0) . "," ; } |
| 707 | +# else |
| 708 | +# { print CSV "," ; } |
| 709 | +# } |
| 710 | + |
| 711 | +# print CSV "\n" ; |
| 712 | +# } |
| 713 | +# close CSV ; |
| 714 | + |
| 715 | +# # write ready made table rows for report card: page views top 25 movers shakers |
| 716 | +# foreach $key (keys %largest_projects) |
| 717 | +# { |
| 718 | +# ($project,$language) = split ('-', $key) ; |
| 719 | + |
| 720 | +# $total_lastmonth = $totals {"month"} {$project} {"$language,$month_last"} ; |
| 721 | +# $total_prevmonth = $totals {"month"} {$project} {"$language,$year_month_last_minus_1"} ; |
| 722 | +# $total_prevyear = $totals {"month"} {$project} {"$language,$year_month_last_minus_12"} ; |
| 723 | + |
| 724 | +# $perc_month = "no data" ; |
| 725 | +# $perc_year = "no data" ; |
| 726 | + |
| 727 | +# if ($total_prevyear > 0) |
| 728 | +# { $perc_year = sprintf ("%.1f", 100 * $total_lastmonth/$total_prevyear - 100) ; } |
| 729 | +# if ($total_prevyear > 0) |
| 730 | +# { $perc_month = sprintf ("%.1f", 100 * $total_lastmonth/$total_prevmonth - 100) ; } |
| 731 | + |
| 732 | +# $line = "$project-$language: $total_prevyear=>$total_lastmonth=$perc_year%, $total_prevmonth=>$total_lastmonth=$perc_month%" ; |
| 733 | + |
| 734 | +# $total_lastmonth = sprintf ("%.0f", $total_lastmonth / 1000000) ; |
| 735 | + |
| 736 | +# $project_name = &GetProjectName ($project) ; |
| 737 | +# $language_name = $out_languages {$language} ; |
| 738 | + |
| 739 | +# $col1 = "<td class=detail-left>$language_name $project_name</td>\n" ; |
| 740 | +# $col2 = "<td class=detail-blue>$total_lastmonth</td>\n" ; |
| 741 | +# $col3 = "<td class=detail-blue>$perc_month%</td>\n" ; |
| 742 | +# $col4 = "<td class=detail-blue>$perc_year%</td>\n" ; |
| 743 | +# $html = "<tr>\n$col1$col2$col3$col4</tr>\n" ; |
| 744 | + |
| 745 | +# $growth_figures_text {"$perc_month-$project-$language"} = $line ; |
| 746 | +# $growth_figures_html {"$perc_month-$project-$language"} = $html ; |
| 747 | +# } |
| 748 | + |
| 749 | +# $file_html = "$dir_out/PageViewsMoversShakersPopularWikis_$file_year_month_last.html" ; |
| 750 | + |
| 751 | +# open HTML, ">", $file_html ; |
| 752 | +# foreach $key (sort {$b <=> $a} keys %growth_figures_text) |
| 753 | +# { |
| 754 | +# print "$key: ". $growth_figures_text {$key} . "\n" ; |
| 755 | +# print HTML $growth_figures_html {$key} ; |
| 756 | +# } |
| 757 | +# close HTML ; |
| 758 | +#} |
| 759 | + |
| 760 | + |
| 761 | +sub GetProjectName |
| 762 | +{ |
| 763 | + my $project =shift ; |
| 764 | + |
| 765 | + if ($project eq "wp") { $project_name = "Wikipedia"; } |
| 766 | + elsif ($project eq "wb") { $project_name = "Wikibooks"; } |
| 767 | + elsif ($project eq "wk") { $project_name = "Wiktionary"; } |
| 768 | + elsif ($project eq "wx") { $project_name = "Other Wikis"; } |
| 769 | + elsif ($project eq "wn") { $project_name = "Wikinews"; } |
| 770 | + elsif ($project eq "wq") { $project_name = "Wikiquote"; } |
| 771 | + elsif ($project eq "ws") { $project_name = "Wikisource"; } |
| 772 | + elsif ($project eq "wv") { $project_name = "Wikiversity"; } |
| 773 | + elsif ($project eq "*") { $project_name = "All projects"; } |
| 774 | + |
| 775 | + return ($project_name) ; |
| 776 | +} |
| 777 | + |
| 778 | +sub MonthsSinceYearAgo |
| 779 | +{ |
| 780 | + my $year = shift ; |
| 781 | + my $month = shift ; |
| 782 | + return 12 - (($year_last - $year) * 12 + $month_last - $month) ; |
| 783 | +} |
| 784 | + |
| 785 | +sub MonthsSinceFirstMonthToShow |
| 786 | +{ |
| 787 | + my $year = shift ; |
| 788 | + my $month = shift ; |
| 789 | + return ($year - 2008) * 12 + ($month - 1) ; |
| 790 | +} |
| 791 | + |
| 792 | +# code year,month as monthes since january 2000 (1 byte) |
| 793 | +sub months_since_2000_01 |
| 794 | +{ |
| 795 | + my $year = shift ; |
| 796 | + my $month = shift ; |
| 797 | + my $m = ($year - 2000) * 12 + $month ; |
| 798 | + return $m ; |
| 799 | +} |
| 800 | + |
| 801 | +#sub Log |
| 802 | +#{ |
| 803 | +# $msg = shift ; |
| 804 | +# print $msg ; |
| 805 | +# print LOG $msg ; |
| 806 | +#} |
| 807 | + |
| 808 | +#sub LogT |
| 809 | +#{ |
| 810 | +# $msg = shift ; |
| 811 | +# my ($ss,$mm,$hh) = (localtime (time))[0,1,2] ; |
| 812 | +# my $time = sprintf ("%02d:%02d:%02d ", $hh, $mm, $ss) ; |
| 813 | +# $msg =~ s/^(\n*)/$1$time/s ; |
| 814 | +# &Log ($msg) ; |
| 815 | +#} |
| 816 | + |
| 817 | +sub MmSs |
| 818 | +{ |
| 819 | + my ($ss,$mm,$hh) = (localtime (time))[0,1,2] ; |
| 820 | + return (sprintf ("%02d:%02d:%02d ", $hh, $mm, $ss)) ; |
| 821 | +} |
| 822 | + |
| 823 | +sub Abort |
| 824 | +{ |
| 825 | + my $msg = shift ; |
| 826 | + print "$msg\nExecution aborted." ; |
| 827 | + # to do: log also to file |
| 828 | + exit ; |
| 829 | +} |
| 830 | + |
| 831 | +sub InitProjectNames |
| 832 | +{ |
| 833 | + # copied from WikiReports.pl |
| 834 | + |
| 835 | + %wikipedias = ( |
| 836 | +# mediawiki=>"http://wikimediafoundation.org Wikimedia", |
| 837 | + nostalgia=>"http://nostalgia.wikipedia.org Nostalgia", |
| 838 | + sources=>"http://wikisource.org Old Wikisource", |
| 839 | + meta=>"http://meta.wikimedia.org Meta-Wiki", |
| 840 | + beta=>"http://beta.wikiversity.org Beta", |
| 841 | + species=>"http://species.wikipedia.org WikiSpecies", |
| 842 | + commons=>"http://commons.wikimedia.org Commons", |
| 843 | + foundation=>"http://wikimediafoundation.org Wikimedia Foundation", |
| 844 | + sep11=>"http://sep11.wikipedia.org In Memoriam", |
| 845 | + nlwikimedia=>"http://nl.wikimedia.org Wikimedia Nederland", |
| 846 | + plwikimedia=>"http://pl.wikimedia.org Wikimedia Polska", |
| 847 | + mediawiki=>"http://www.mediawiki.org MediaWiki", |
| 848 | + dewikiversity=>"http://de.wikiversity.org Wikiversität", |
| 849 | + frwikiversity=>"http://fr.wikiversity.org Wikiversität", |
| 850 | + wikimania2005=>"http://wikimania2005.wikimedia.org Wikimania 2005", |
| 851 | + wikimania2006=>"http://wikimania2006.wikimedia.org Wikimania 2006", |
| 852 | + aa=>"http://aa.wikipedia.org Afar", |
| 853 | + ab=>"http://ab.wikipedia.org Abkhazian", |
| 854 | + af=>"http://af.wikipedia.org Afrikaans", |
| 855 | + ak=>"http://ak.wikipedia.org Akan", # was Akana |
| 856 | + als=>"http://als.wikipedia.org Alemannic", # was Elsatian |
| 857 | + am=>"http://am.wikipedia.org Amharic", |
| 858 | + an=>"http://an.wikipedia.org Aragonese", |
| 859 | + ang=>"http://ang.wikipedia.org Anglo-Saxon", |
| 860 | + ar=>"http://ar.wikipedia.org Arabic", |
| 861 | + arc=>"http://arc.wikipedia.org Aramaic", |
| 862 | + as=>"http://as.wikipedia.org Assamese", |
| 863 | + ast=>"http://ast.wikipedia.org Asturian", |
| 864 | + av=>"http://av.wikipedia.org Avar", # was Avienan |
| 865 | + ay=>"http://ay.wikipedia.org Aymara", |
| 866 | + az=>"http://az.wikipedia.org Azeri", # was Azerbaijani |
| 867 | + ba=>"http://ba.wikipedia.org Bashkir", |
| 868 | + bar=>"http://bar.wikipedia.org Bavarian", |
| 869 | + bat_smg=>"http://bat-smg.wikipedia.org Samogitian", |
| 870 | + "bat-smg"=>"http://bat-smg.wikipedia.org Samogitian", |
| 871 | + bcl=>"http://bcl.wikipedia.org Central Bicolano", |
| 872 | + be=>"http://be.wikipedia.org Belarusian", |
| 873 | + "be-x-old"=>"http://be.wikipedia.org Belarusian (Tarashkevitsa)", |
| 874 | + be_x_old=>"http://be.wikipedia.org Belarusian (Tarashkevitsa)", |
| 875 | + bg=>"http://bg.wikipedia.org Bulgarian", |
| 876 | + bh=>"http://bh.wikipedia.org Bihari", |
| 877 | + bi=>"http://bi.wikipedia.org Bislama", |
| 878 | + bm=>"http://bm.wikipedia.org Bambara", |
| 879 | + bn=>"http://bn.wikipedia.org Bengali", |
| 880 | + bo=>"http://bo.wikipedia.org Tibetan", |
| 881 | + bpy=>"http://bpy.wikipedia.org Bishnupriya Manipuri", |
| 882 | + br=>"http://br.wikipedia.org Breton", |
| 883 | + bs=>"http://bs.wikipedia.org Bosnian", |
| 884 | + bug=>"http://bug.wikipedia.org Buginese", |
| 885 | + bxr=>"http://bxr.wikipedia.org Buryat", |
| 886 | + ca=>"http://ca.wikipedia.org Catalan", |
| 887 | + cbk_zam=>"http://cbk-zam.wikipedia.org Chavacano", |
| 888 | + "cbk-zam"=>"http://cbk-zam.wikipedia.org Chavacano", |
| 889 | + cdo=>"http://cdo.wikipedia.org Min Dong", |
| 890 | + ce=>"http://ce.wikipedia.org Chechen", |
| 891 | + ceb=>"http://ceb.wikipedia.org Cebuano", |
| 892 | + ch=>"http://ch.wikipedia.org Chamorro", # was Chamoru |
| 893 | + cho=>"http://cho.wikipedia.org Choctaw", # was Chotaw |
| 894 | + chr=>"http://chr.wikipedia.org Cherokee", |
| 895 | + chy=>"http://chy.wikipedia.org Cheyenne", # was Setsêhestâhese |
| 896 | + co=>"http://co.wikipedia.org Corsican", |
| 897 | + cr=>"http://cr.wikipedia.org Cree", |
| 898 | + crh=>"http://crh.wikipedia.org Crimean Tatar", |
| 899 | + cs=>"http://cs.wikipedia.org Czech", |
| 900 | + csb=>"http://csb.wikipedia.org Cashubian", # was Kashubian |
| 901 | + cu=>"http://cv.wikipedia.org Old Church Slavonic", |
| 902 | + cv=>"http://cv.wikipedia.org Chuvash", # was Cavas |
| 903 | + cy=>"http://cy.wikipedia.org Welsh", |
| 904 | + da=>"http://da.wikipedia.org Danish", |
| 905 | + de=>"http://de.wikipedia.org German", |
| 906 | + diq=>"http://diq.wikipedia.org Zazaki", |
| 907 | + dk=>"http://dk.wikipedia.org Danish", |
| 908 | + dsb=>"http://dsb.wikipedia.org Lower Sorbian", |
| 909 | + dv=>"http://dv.wikipedia.org Divehi", |
| 910 | + dz=>"http://dz.wikipedia.org Dzongkha", |
| 911 | + ee=>"http://ee.wikipedia.org Ewe", |
| 912 | + el=>"http://el.wikipedia.org Greek", |
| 913 | + eml=>"http://eml.wikipedia.org Emilian-Romagnol", |
| 914 | + en=>"http://en.wikipedia.org English", |
| 915 | + eo=>"http://eo.wikipedia.org Esperanto", |
| 916 | + es=>"http://es.wikipedia.org Spanish", |
| 917 | + et=>"http://et.wikipedia.org Estonian", |
| 918 | + eu=>"http://eu.wikipedia.org Basque", |
| 919 | + ext=>"http://ext.wikipedia.org Extremaduran", |
| 920 | + fa=>"http://fa.wikipedia.org Persian", |
| 921 | + ff=>"http://ff.wikipedia.org Fulfulde", |
| 922 | + fi=>"http://fi.wikipedia.org Finnish", |
| 923 | + "fiu-vro"=>"http://fiu-vro.wikipedia.org Voro", |
| 924 | + fiu_vro=>"http://fiu-vro.wikipedia.org Voro", |
| 925 | + fj=>"http://fj.wikipedia.org Fijian", |
| 926 | + fo=>"http://fo.wikipedia.org Faroese", # was Faeroese |
| 927 | + fr=>"http://fr.wikipedia.org French", |
| 928 | + frp=>"http://frp.wikipedia.org Arpitan", |
| 929 | + fur=>"http://fur.wikipedia.org Friulian", |
| 930 | + fy=>"http://fy.wikipedia.org Frisian", |
| 931 | + ga=>"http://ga.wikipedia.org Irish", |
| 932 | + gan=>"http://gan.wikipedia.org Gan", |
| 933 | + gay=>"http://gay.wikipedia.org Gayo", |
| 934 | + gd=>"http://gd.wikipedia.org Scots Gaelic", # was Scottish Gaelic |
| 935 | + gl=>"http://gl.wikipedia.org Galician", # was Galego |
| 936 | + glk=>"http://glk.wikipedia.org Gilaki", |
| 937 | + gn=>"http://gn.wikipedia.org Guarani", |
| 938 | + got=>"http://got.wikipedia.org Gothic", |
| 939 | + gu=>"http://gu.wikipedia.org Gujarati", |
| 940 | + gv=>"http://gv.wikipedia.org Manx", # was Manx Gaelic |
| 941 | + ha=>"http://ha.wikipedia.org Hausa", |
| 942 | + hak=>"http://hak.wikipedia.org Hakka", |
| 943 | + haw=>"http://haw.wikipedia.org Hawai'ian", # was Hawaiian |
| 944 | + he=>"http://he.wikipedia.org Hebrew", |
| 945 | + hi=>"http://hi.wikipedia.org Hindi", |
| 946 | + hif=>"http://hif.wikipedia.org Fiji Hindi", |
| 947 | + ho=>"http://ho.wikipedia.org Hiri Motu", |
| 948 | + hr=>"http://hr.wikipedia.org Croatian", |
| 949 | + hsb=>"http://hsb.wikipedia.org Upper Sorbian", |
| 950 | + ht=>"http://ht.wikipedia.org Haitian", |
| 951 | + hu=>"http://hu.wikipedia.org Hungarian", |
| 952 | + hy=>"http://hy.wikipedia.org Armenian", |
| 953 | + hz=>"http://hz.wikipedia.org Herero", |
| 954 | + ia=>"http://ia.wikipedia.org Interlingua", |
| 955 | + iba=>"http://iba.wikipedia.org Iban", |
| 956 | + id=>"http://id.wikipedia.org Indonesian", |
| 957 | + ie=>"http://ie.wikipedia.org Interlingue", |
| 958 | + ig=>"http://ig.wikipedia.org Igbo", |
| 959 | + ii=>"http://ii.wikipedia.org Yi", |
| 960 | + ik=>"http://ik.wikipedia.org Inupiak", |
| 961 | + ilo=>"http://ilo.wikipedia.org Ilokano", |
| 962 | + io=>"http://io.wikipedia.org Ido", |
| 963 | + is=>"http://is.wikipedia.org Icelandic", |
| 964 | + it=>"http://it.wikipedia.org Italian", |
| 965 | + iu=>"http://iu.wikipedia.org Inuktitut", |
| 966 | + ja=>"http://ja.wikipedia.org Japanese", |
| 967 | + jbo=>"http://jbo.wikipedia.org Lojban", |
| 968 | + jv=>"http://jv.wikipedia.org Javanese", |
| 969 | + ka=>"http://ka.wikipedia.org Georgian", |
| 970 | + kaa=>"http://kaa.wikipedia.org Karakalpak", |
| 971 | + kab=>"http://ka.wikipedia.org Kabyle", |
| 972 | + kaw=>"http://kaw.wikipedia.org Kawi", |
| 973 | + kg=>"http://kg.wikipedia.org Kongo", |
| 974 | + ki=>"http://ki.wikipedia.org Kikuyu", |
| 975 | + kj=>"http://kj.wikipedia.org Kuanyama", # was Otjiwambo |
| 976 | + kk=>"http://kk.wikipedia.org Kazakh", |
| 977 | + kl=>"http://kl.wikipedia.org Greenlandic", |
| 978 | + km=>"http://km.wikipedia.org Khmer", # was Cambodian |
| 979 | + kn=>"http://kn.wikipedia.org Kannada", |
| 980 | + ko=>"http://ko.wikipedia.org Korean", |
| 981 | + kr=>"http://kr.wikipedia.org Kanuri", |
| 982 | + ks=>"http://ks.wikipedia.org Kashmiri", |
| 983 | + ksh=>"http://ksh.wikipedia.org Ripuarian", |
| 984 | + ku=>"http://ku.wikipedia.org Kurdish", |
| 985 | + kv=>"http://kv.wikipedia.org Komi", |
| 986 | + kw=>"http://kw.wikipedia.org Cornish", # was Kornish |
| 987 | + ky=>"http://ky.wikipedia.org Kirghiz", |
| 988 | + la=>"http://la.wikipedia.org Latin", |
| 989 | + lad=>"http://lad.wikipedia.org Ladino", |
| 990 | + lb=>"http://lb.wikipedia.org Luxembourgish", # was Letzeburgesch |
| 991 | + lbe=>"http://lbe.wikipedia.org Lak", |
| 992 | + lg=>"http://lg.wikipedia.org Ganda", |
| 993 | + li=>"http://li.wikipedia.org Limburgish", |
| 994 | + lij=>"http://lij.wikipedia.org Ligurian", |
| 995 | + lmo=>"http://lmo.wikipedia.org Lombard", |
| 996 | + ln=>"http://ln.wikipedia.org Lingala", |
| 997 | + lo=>"http://lo.wikipedia.org Laotian", |
| 998 | + ls=>"http://ls.wikipedia.org Latino Sine Flexione", |
| 999 | + lt=>"http://lt.wikipedia.org Lithuanian", |
| 1000 | + lv=>"http://lv.wikipedia.org Latvian", |
| 1001 | + mad=>"http://mad.wikipedia.org Madurese", |
| 1002 | + mak=>"http://mak.wikipedia.org Makasar", |
| 1003 | + map_bms=>"http://map-bms.wikipedia.org Banyumasan", |
| 1004 | + "map-bms"=>"http://map-bms.wikipedia.org Banyumasan", |
| 1005 | + mdf=>"http://mdf.wikipedia.org Moksha", |
| 1006 | + mg=>"http://mg.wikipedia.org Malagasy", |
| 1007 | + mh=>"http://mh.wikipedia.org Marshallese", |
| 1008 | + mi=>"http://mi.wikipedia.org Maori", |
| 1009 | + min=>"http://min.wikipedia.org Minangkabau", |
| 1010 | + minnan=>"http://minnan.wikipedia.org Minnan", |
| 1011 | + mk=>"http://mk.wikipedia.org Macedonian", |
| 1012 | + ml=>"http://ml.wikipedia.org Malayalam", |
| 1013 | + mn=>"http://mn.wikipedia.org Mongolian", |
| 1014 | + mo=>"http://mo.wikipedia.org Moldavian", |
| 1015 | + mr=>"http://mr.wikipedia.org Marathi", |
| 1016 | + ms=>"http://ms.wikipedia.org Malay", |
| 1017 | + mt=>"http://mt.wikipedia.org Maltese", |
| 1018 | + mus=>"http://mus.wikipedia.org Muskogee", |
| 1019 | + my=>"http://my.wikipedia.org Burmese", |
| 1020 | + myv=>"http://myv.wikipedia.org Erzya", |
| 1021 | + mzn=>"http://mzn.wikipedia.org Mazandarani", |
| 1022 | + na=>"http://na.wikipedia.org Nauruan", # was Nauru |
| 1023 | + nah=>"http://nah.wikipedia.org Nahuatl", |
| 1024 | + nap=>"http://nap.wikipedia.org Neapolitan", |
| 1025 | + nds=>"http://nds.wikipedia.org Low Saxon", |
| 1026 | + nds_nl=>"http://nds-nl.wikipedia.org Dutch Low Saxon", |
| 1027 | + "nds-nl"=>"http://nds-nl.wikipedia.org Dutch Low Saxon", |
| 1028 | + ne=>"http://ne.wikipedia.org Nepali", |
| 1029 | + new=>"http://new.wikipedia.org Nepal Bhasa", |
| 1030 | + ng=>"http://ng.wikipedia.org Ndonga", |
| 1031 | + nl=>"http://nl.wikipedia.org Dutch", |
| 1032 | + nov=>"http://nov.wikipedia.org Novial", |
| 1033 | + nrm=>"http://nrm.wikipedia.org Norman", |
| 1034 | + nn=>"http://nn.wikipedia.org Nynorsk", # was Neo-Norwegian |
| 1035 | + no=>"http://no.wikipedia.org Norwegian", |
| 1036 | + nv=>"http://nv.wikipedia.org Navajo", # was Avayo |
| 1037 | + ny=>"http://ny.wikipedia.org Chichewa", |
| 1038 | + oc=>"http://oc.wikipedia.org Occitan", |
| 1039 | + om=>"http://om.wikipedia.org Oromo", |
| 1040 | + or=>"http://or.wikipedia.org Oriya", |
| 1041 | + os=>"http://os.wikipedia.org Ossetic", |
| 1042 | + pa=>"http://pa.wikipedia.org Punjabi", |
| 1043 | + pag=>"http://pag.wikipedia.org Pangasinan", |
| 1044 | + pam=>"http://pam.wikipedia.org Kapampangan", |
| 1045 | + pap=>"http://pap.wikipedia.org Papiamentu", |
| 1046 | + pdc=>"http://pdc.wikipedia.org Pennsylvania German", |
| 1047 | + pi=>"http://pi.wikipedia.org Pali", |
| 1048 | + pih=>"http://pih.wikipedia.org Norfolk", |
| 1049 | + pl=>"http://pl.wikipedia.org Polish", |
| 1050 | + pms=>"http://pms.wikipedia.org Piedmontese", |
| 1051 | + ps=>"http://ps.wikipedia.org Pashto", |
| 1052 | + pt=>"http://pt.wikipedia.org Portuguese", |
| 1053 | + qu=>"http://qu.wikipedia.org Quechua", |
| 1054 | + rm=>"http://rm.wikipedia.org Romansh", # was Rhaeto-Romance |
| 1055 | + rmy=>"http://rmy.wikipedia.org Romani", |
| 1056 | + rn=>"http://rn.wikipedia.org Kirundi", |
| 1057 | + ro=>"http://ro.wikipedia.org Romanian", |
| 1058 | + roa_rup=>"http://roa-rup.wikipedia.org Aromanian", |
| 1059 | + "roa-rup"=>"http://roa-rup.wikipedia.org Aromanian", |
| 1060 | + roa_tara=>"http://roa-tara.wikipedia.org Tarantino", |
| 1061 | + "roa-tara"=>"http://roa-tara.wikipedia.org Tarantino", |
| 1062 | + ru=>"http://ru.wikipedia.org Russian", |
| 1063 | + ru_sib=>"http://ru-sib.wikipedia.org Siberian", |
| 1064 | + "ru-sib"=>"http://ru-sib.wikipedia.org Siberian", |
| 1065 | + rw=>"http://rw.wikipedia.org Kinyarwanda", |
| 1066 | + sa=>"http://sa.wikipedia.org Sanskrit", |
| 1067 | + sah=>"http://sah.wikipedia.org Sakha", |
| 1068 | + sc=>"http://sc.wikipedia.org Sardinian", |
| 1069 | + scn=>"http://scn.wikipedia.org Sicilian", |
| 1070 | + sco=>"http://sco.wikipedia.org Scots", |
| 1071 | + sd=>"http://sd.wikipedia.org Sindhi", |
| 1072 | + se=>"http://se.wikipedia.org Northern Sami", |
| 1073 | + sg=>"http://sg.wikipedia.org Sangro", |
| 1074 | + sh=>"http://sh.wikipedia.org Serbo-Croatian", |
| 1075 | + si=>"http://si.wikipedia.org Sinhala", # was Singhalese |
| 1076 | + simple=>"http://simple.wikipedia.org Simple English", |
| 1077 | + sk=>"http://sk.wikipedia.org Slovak", |
| 1078 | + sl=>"http://sl.wikipedia.org Slovene", |
| 1079 | + sm=>"http://sm.wikipedia.org Samoan", |
| 1080 | + sn=>"http://sn.wikipedia.org Shona", |
| 1081 | + so=>"http://so.wikipedia.org Somali", # was Somalian |
| 1082 | + sq=>"http://sq.wikipedia.org Albanian", |
| 1083 | + sr=>"http://sr.wikipedia.org Serbian", |
| 1084 | + srn=>"http://srn.wikipedia.org Sranan", |
| 1085 | + ss=>"http://ss.wikipedia.org Siswati", |
| 1086 | + st=>"http://st.wikipedia.org Sesotho", |
| 1087 | + stq=>"http://stq.wikipedia.org Saterland Frisian", |
| 1088 | + su=>"http://su.wikipedia.org Sundanese", |
| 1089 | + sv=>"http://sv.wikipedia.org Swedish", |
| 1090 | + sw=>"http://sw.wikipedia.org Swahili", |
| 1091 | + szl=>"http://szl.wikipedia.org Silesian", |
| 1092 | + ta=>"http://ta.wikipedia.org Tamil", |
| 1093 | + te=>"http://te.wikipedia.org Telugu", |
| 1094 | + test=>"http://test.wikipedia.org Test", |
| 1095 | + tet=>"http://tet.wikipedia.org Tetum", |
| 1096 | + tg=>"http://tg.wikipedia.org Tajik", |
| 1097 | + th=>"http://th.wikipedia.org Thai", |
| 1098 | + ti=>"http://ti.wikipedia.org Tigrinya", |
| 1099 | + tk=>"http://tk.wikipedia.org Turkmen", |
| 1100 | + tl=>"http://tl.wikipedia.org Tagalog", |
| 1101 | + tlh=>"http://tlh.wikipedia.org Klingon", # was Klignon |
| 1102 | + tn=>"http://tn.wikipedia.org Setswana", |
| 1103 | + to=>"http://to.wikipedia.org Tongan", |
| 1104 | + tokipona=>"http://tokipona.wikipedia.org Tokipona", |
| 1105 | + tpi=>"http://tpi.wikipedia.org Tok Pisin", |
| 1106 | + tr=>"http://tr.wikipedia.org Turkish", |
| 1107 | + ts=>"http://ts.wikipedia.org Tsonga", |
| 1108 | + tt=>"http://tt.wikipedia.org Tatar", |
| 1109 | + tum=>"http://tum.wikipedia.org Tumbuka", |
| 1110 | + turn=>"http://turn.wikipedia.org Turnbuka", |
| 1111 | + tw=>"http://tw.wikipedia.org Twi", |
| 1112 | + ty=>"http://ty.wikipedia.org Tahitian", |
| 1113 | + udm=>"http://udm.wikipedia.org Udmurt", |
| 1114 | + ug=>"http://ug.wikipedia.org Uighur", |
| 1115 | + uk=>"http://uk.wikipedia.org Ukrainian", |
| 1116 | + ur=>"http://ur.wikipedia.org Urdu", |
| 1117 | + uz=>"http://uz.wikipedia.org Uzbek", |
| 1118 | + ve=>"http://ve.wikipedia.org Venda", # was Lushaka |
| 1119 | + vec=>"http://vec.wikipedia.org Venetian", |
| 1120 | + vi=>"http://vi.wikipedia.org Vietnamese", |
| 1121 | + vls=>"http://vls.wikipedia.org West Flemish", |
| 1122 | + vo=>"http://vo.wikipedia.org Volapük", |
| 1123 | + wa=>"http://wa.wikipedia.org Walloon", |
| 1124 | + war=>"http://war.wikipedia.org Waray-Waray", |
| 1125 | + wo=>"http://wo.wikipedia.org Wolof", |
| 1126 | + wuu=>"http://wuu.wikipedia.org Wu", |
| 1127 | + xal=>"http://xal.wikipedia.org Kalmyk", |
| 1128 | + xh=>"http://xh.wikipedia.org Xhosa", |
| 1129 | + yi=>"http://yi.wikipedia.org Yiddish", |
| 1130 | + yo=>"http://yo.wikipedia.org Yoruba", |
| 1131 | + za=>"http://za.wikipedia.org Zhuang", |
| 1132 | + zea=>"http://zea.wikipedia.org Zealandic", |
| 1133 | + zh=>"http://zh.wikipedia.org Chinese", |
| 1134 | + zh_min_nan=>"http://zh-min-nan.wikipedia.org Min Nan", |
| 1135 | + "zh-min-nan"=>"http://zh-min-nan.wikipedia.org Min Nan", |
| 1136 | + zh_classical=>"http://zh-classical.wikipedia.org Classical Chinese", |
| 1137 | + "zh-classical"=>"http://zh-classical.wikipedia.org Classical Chinese", |
| 1138 | + zh_yue=>"http://zh-yue.wikipedia.org Cantonese", |
| 1139 | + "zh-yue"=>"http://zh-yue.wikipedia.org Cantonese", |
| 1140 | + zu=>"http://zu.wikipedia.org Zulu", |
| 1141 | + zz=>" All languages", |
| 1142 | + zzz=>" All languages except English" |
| 1143 | + ); |
| 1144 | + |
| 1145 | + foreach $key (keys %wikipedias) |
| 1146 | + { |
| 1147 | + my $wikipedia = $wikipedias {$key} ; |
| 1148 | + $out_urls {$key} = $wikipedia ; |
| 1149 | + $out_languages {$key} = $wikipedia ; |
| 1150 | + $out_urls {$key} =~ s/(^[^\s]+).*$/$1/ ; |
| 1151 | + $out_languages {$key} =~ s/^[^\s]+\s+(.*)$/$1/ ; |
| 1152 | + $out_article {$key} = "http://en.wikipedia.org/wiki/" . $out_languages {$key} . "_language" ; |
| 1153 | + $out_article {$key} =~ s/ /_/g ; |
| 1154 | + $out_urls {$key} =~ s/(^[^\s]+).*$/$1/ ; |
| 1155 | + } |
| 1156 | +} |
| 1157 | + |
| 1158 | +# copied from WikiReports_EN.pl |
| 1159 | +sub InitReportNames |
| 1160 | +{ |
| 1161 | + @out_report_descriptions = ( |
| 1162 | + "Contributors", |
| 1163 | + "New editors", |
| 1164 | + "Active editors", |
| 1165 | + "Very active editors", |
| 1166 | + "Article count (official)", |
| 1167 | + "Article count (alternate)", |
| 1168 | + "New articles per day", |
| 1169 | + "Edits per article", |
| 1170 | + "Bytes per article", |
| 1171 | + "Articles over 0.5 Kb", |
| 1172 | + "Articles over 2 Kb", |
| 1173 | + "Edits per month", |
| 1174 | + "Database size", |
| 1175 | + "Words", |
| 1176 | + "Internal links", |
| 1177 | + "Links to other Wikipedias", |
| 1178 | + "Binaries", |
| 1179 | + "External links", |
| 1180 | + "Redirects", |
| 1181 | + "Page requests per day", |
| 1182 | + "Visits per day", |
| 1183 | + "Overview recent months" |
| 1184 | + ) ; |
| 1185 | +} |
| 1186 | + |
Property changes on: trunk/wikistats/reportcard/ReportCardExtractWikiCountsOutput.pl |
___________________________________________________________________ |
Added: svn:eol-style |
1187 | 1187 | + native |
Index: trunk/wikistats/reportcard/ReportCardLinkErrata.pl |
— | — | @@ -1,92 +1,92 @@ |
2 | | -#!/usr/local/bin/perl
|
3 | | -
|
4 | | - use lib "/home/ezachte/lib" ;
|
5 | | - use EzLib ;
|
6 | | - $trace_on_exit = $true ;
|
7 | | -
|
8 | | - &PatchFiles ("W:/@ Report Card/Extended") ;
|
9 | | - &PatchFiles ("W:/@ Report Card/Public") ;
|
10 | | -
|
11 | | - print "\n\nReady\n\n" ;
|
12 | | - exit ;
|
13 | | -
|
14 | | -sub PatchFiles
|
15 | | -{
|
16 | | - my $dir = shift ;
|
17 | | - $prevdir = getcwd ;
|
18 | | - print "prevdir $prevdir\n" ;
|
19 | | - chdir ($dir) || die "Cannot chdir to $dir\n";
|
20 | | - $dir = getcwd ;
|
21 | | - print "currdir $dir\n" ;
|
22 | | -
|
23 | | - print "\nErrata files:\n\n" ;
|
24 | | -
|
25 | | - local (*DIR);
|
26 | | - opendir (DIR, ".");
|
27 | | -
|
28 | | - my %errata ;
|
29 | | - while ($file = readdir (DIR))
|
30 | | - {
|
31 | | - if ($file eq "." || $file eq "..")
|
32 | | - { next ; }
|
33 | | -
|
34 | | - next if $file !~ /^RC_\d\d\d\d_\d\d_errata\.html$/ ;
|
35 | | -
|
36 | | - print "File $file\n" ;
|
37 | | - $file =~ s/_errata.*$// ;
|
38 | | - $errata {$file} ++ ;
|
39 | | - }
|
40 | | -
|
41 | | - closedir(DIR);
|
42 | | -
|
43 | | - print "\nPatch files:\n\n" ;
|
44 | | -
|
45 | | - opendir (DIR, ".");
|
46 | | - while ($file = readdir (DIR))
|
47 | | - {
|
48 | | - if ($file eq "." || $file eq "..")
|
49 | | - { next ; }
|
50 | | -
|
51 | | - next if $file !~ /^RC_\d\d\d\d_\d\d_(?:synopsis|columns|detailed|summary)\.html$/ ;
|
52 | | -
|
53 | | - ($file2 = $file) =~ s/_[a-z]+\.html$// ;
|
54 | | - next if $errata {$file2} == 0 ;
|
55 | | -
|
56 | | - # print "Check file $file\n" ;
|
57 | | -
|
58 | | - $add_errata = $false ;
|
59 | | - open FILE, '<', $file ;
|
60 | | - @lines = <FILE> ;
|
61 | | - close FILE ;
|
62 | | -
|
63 | | - foreach $line (@lines)
|
64 | | - {
|
65 | | - if ($line =~ /RC_\d\d\d\d_\d\d_\w+\.html.*?RC_\d\d\d\d_\d\d_\w+\.html.*?RC_\d\d\d\d_\d\d_\w+\.html/i)
|
66 | | - {
|
67 | | - if ($line !~ /errata/i)
|
68 | | - {
|
69 | | - $add_errata = $true ;
|
70 | | - # print "\nBefore:$line\n" ;
|
71 | | - $line =~ s/<\/small>/ ⇒ <a href='${file2}_errata.html'><font color=#A00000>Errata<\/font><\/a><\/small>/ ;
|
72 | | - # print "\nAfter:$line\n" ;
|
73 | | - last ;
|
74 | | - }
|
75 | | - }
|
76 | | - }
|
77 | | -
|
78 | | - if ($add_errata)
|
79 | | - {
|
80 | | - print "Patch file $file\n" ;
|
81 | | - open FILE, '>', $file ;
|
82 | | - print FILE @lines ;
|
83 | | - close FILE ;
|
84 | | - }
|
85 | | - }
|
86 | | - closedir(DIR);
|
87 | | -
|
88 | | - chdir($prevdir);
|
89 | | - $dir = getcwd ;
|
90 | | - print "\ncurrdir $dir\n" ;
|
91 | | -}
|
92 | | -
|
93 | | -
|
| 2 | +#!/usr/local/bin/perl |
| 3 | + |
| 4 | + use lib "/home/ezachte/lib" ; |
| 5 | + use EzLib ; |
| 6 | + $trace_on_exit = $true ; |
| 7 | + |
| 8 | + &PatchFiles ("W:/@ Report Card/Extended") ; |
| 9 | + &PatchFiles ("W:/@ Report Card/Public") ; |
| 10 | + |
| 11 | + print "\n\nReady\n\n" ; |
| 12 | + exit ; |
| 13 | + |
| 14 | +sub PatchFiles |
| 15 | +{ |
| 16 | + my $dir = shift ; |
| 17 | + $prevdir = getcwd ; |
| 18 | + print "prevdir $prevdir\n" ; |
| 19 | + chdir ($dir) || die "Cannot chdir to $dir\n"; |
| 20 | + $dir = getcwd ; |
| 21 | + print "currdir $dir\n" ; |
| 22 | + |
| 23 | + print "\nErrata files:\n\n" ; |
| 24 | + |
| 25 | + local (*DIR); |
| 26 | + opendir (DIR, "."); |
| 27 | + |
| 28 | + my %errata ; |
| 29 | + while ($file = readdir (DIR)) |
| 30 | + { |
| 31 | + if ($file eq "." || $file eq "..") |
| 32 | + { next ; } |
| 33 | + |
| 34 | + next if $file !~ /^RC_\d\d\d\d_\d\d_errata\.html$/ ; |
| 35 | + |
| 36 | + print "File $file\n" ; |
| 37 | + $file =~ s/_errata.*$// ; |
| 38 | + $errata {$file} ++ ; |
| 39 | + } |
| 40 | + |
| 41 | + closedir(DIR); |
| 42 | + |
| 43 | + print "\nPatch files:\n\n" ; |
| 44 | + |
| 45 | + opendir (DIR, "."); |
| 46 | + while ($file = readdir (DIR)) |
| 47 | + { |
| 48 | + if ($file eq "." || $file eq "..") |
| 49 | + { next ; } |
| 50 | + |
| 51 | + next if $file !~ /^RC_\d\d\d\d_\d\d_(?:synopsis|columns|detailed|summary)\.html$/ ; |
| 52 | + |
| 53 | + ($file2 = $file) =~ s/_[a-z]+\.html$// ; |
| 54 | + next if $errata {$file2} == 0 ; |
| 55 | + |
| 56 | + # print "Check file $file\n" ; |
| 57 | + |
| 58 | + $add_errata = $false ; |
| 59 | + open FILE, '<', $file ; |
| 60 | + @lines = <FILE> ; |
| 61 | + close FILE ; |
| 62 | + |
| 63 | + foreach $line (@lines) |
| 64 | + { |
| 65 | + if ($line =~ /RC_\d\d\d\d_\d\d_\w+\.html.*?RC_\d\d\d\d_\d\d_\w+\.html.*?RC_\d\d\d\d_\d\d_\w+\.html/i) |
| 66 | + { |
| 67 | + if ($line !~ /errata/i) |
| 68 | + { |
| 69 | + $add_errata = $true ; |
| 70 | + # print "\nBefore:$line\n" ; |
| 71 | + $line =~ s/<\/small>/ ⇒ <a href='${file2}_errata.html'><font color=#A00000>Errata<\/font><\/a><\/small>/ ; |
| 72 | + # print "\nAfter:$line\n" ; |
| 73 | + last ; |
| 74 | + } |
| 75 | + } |
| 76 | + } |
| 77 | + |
| 78 | + if ($add_errata) |
| 79 | + { |
| 80 | + print "Patch file $file\n" ; |
| 81 | + open FILE, '>', $file ; |
| 82 | + print FILE @lines ; |
| 83 | + close FILE ; |
| 84 | + } |
| 85 | + } |
| 86 | + closedir(DIR); |
| 87 | + |
| 88 | + chdir($prevdir); |
| 89 | + $dir = getcwd ; |
| 90 | + print "\ncurrdir $dir\n" ; |
| 91 | +} |
| 92 | + |
| 93 | + |
Property changes on: trunk/wikistats/reportcard/ReportCardLinkErrata.pl |
___________________________________________________________________ |
Added: svn:eol-style |
94 | 94 | + native |
Index: trunk/wikistats/analytics/AnalyticsPrepBinariesData.pl |
— | — | @@ -1,124 +1,124 @@ |
2 | | -#!/usr/local/bin/perl
|
3 | | -
|
4 | | - use Getopt::Std ;
|
5 | | -
|
6 | | - &ParseArguments ;
|
7 | | -
|
8 | | - print "Write file '$file_csv_out'\n" ;
|
9 | | - open CSV_OUT, '>', $file_csv_out ;
|
10 | | -
|
11 | | - foreach $project (qw (wb wk wn wp wq ws wv wx))
|
12 | | - { &ReadStatisticsPerBinariesExtension ($project) ; }
|
13 | | -
|
14 | | - close CSV_OUT ;
|
15 | | -
|
16 | | - print "\n\nReady\n\n" ;
|
17 | | - exit ;
|
18 | | -
|
19 | | -sub ParseArguments
|
20 | | -{
|
21 | | - my @options ;
|
22 | | - getopt ("io", \%options) ;
|
23 | | -
|
24 | | - die ("Specify input folder as: -i path") if (! defined ($options {"i"})) ;
|
25 | | - die ("Specify output folder as: -o path'") if (! defined ($options {"o"})) ;
|
26 | | -
|
27 | | - $path_in = $options {"i"} ;
|
28 | | - $path_out = $options {"o"} ;
|
29 | | -
|
30 | | - die "Input folder '$path_in' does not exist" if (! -d $path_in) ;
|
31 | | - die "Output folder '$path_out' does not exist" if (! -d $path_out) ;
|
32 | | -
|
33 | | - # tests only
|
34 | | - # $path_in = "C:/@ Wikimedia/# Out Bayes" ;
|
35 | | - # $path_out = "C:/analytics" ; # "w:/@ report card/data" ;
|
36 | | -
|
37 | | - print "Input folder: $path_in\n" ;
|
38 | | - print "Output folder: $path_out\n" ;
|
39 | | - print "\n" ;
|
40 | | -
|
41 | | - $file_csv_out = "$path_out/analytics_in_binaries.csv" ;
|
42 | | -}
|
43 | | -
|
44 | | -
|
45 | | -sub ReadStatisticsPerBinariesExtension
|
46 | | -{
|
47 | | - my $project = shift ;
|
48 | | - my $file_csv_in = "$path_in/csv_$project/StatisticsPerBinariesExtension.csv" ;
|
49 | | - $yyyymm_hi = -1 ;
|
50 | | -
|
51 | | - if (! -e $file_csv_in)
|
52 | | - { die "Input file '$file_csv_in' not found" ; }
|
53 | | -
|
54 | | -
|
55 | | - print "Read '$file_csv_in'\n" ;
|
56 | | - open CSV_IN, '<', $file_csv_in ;
|
57 | | -
|
58 | | - $language_prev = '' ;
|
59 | | - while ($line = <CSV_IN>)
|
60 | | - {
|
61 | | - chomp $line ;
|
62 | | - next if $line !~ /,.*?,/ ;
|
63 | | -
|
64 | | - ($language,$date,$data) = split (',', $line, 3) ;
|
65 | | -
|
66 | | - # for each wiki first line shows ext names, no tcounts
|
67 | | - if ($date eq "00/0000")
|
68 | | - {
|
69 | | - if ($language_prev ne '')
|
70 | | - { &WriteMonthlyData ($project, $language_prev) ; }
|
71 | | - $language_prev = $language ;
|
72 | | -
|
73 | | - undef %ext_name ;
|
74 | | - undef %ext_ndx ;
|
75 | | - undef %ext_cnt ;
|
76 | | - undef %months ;
|
77 | | -
|
78 | | - @exts = split (',', $data) ;
|
79 | | - $ndx = 0 ;
|
80 | | - foreach $ext (@exts)
|
81 | | - {
|
82 | | - $ext_name {$ndx} = $ext ;
|
83 | | - $ext_ndx {$ext} = $ndx ;
|
84 | | - $ndx ++ ;
|
85 | | - }
|
86 | | - next ;
|
87 | | - }
|
88 | | -
|
89 | | - ($month,$year) = split ('\/', $date) ;
|
90 | | - $yyyymm = sprintf ("%04d-%02d", $year, $month) ;
|
91 | | - if ($yyyymm gt $yyyymm_hi)
|
92 | | - { $yyyymm_hi = $yyyymm ; }
|
93 | | - $months {$yyyymm}++ ;
|
94 | | -
|
95 | | - @counts = split (',', $data) ;
|
96 | | - $ndx = 0 ;
|
97 | | - foreach $count (@counts)
|
98 | | - {
|
99 | | - $ext_cnt {$yyyymm}{$ext_name {$ndx}} = $count ;
|
100 | | - $ndx ++ ;
|
101 | | - }
|
102 | | - }
|
103 | | - &WriteMonthlyData ($project, $language_prev) ;
|
104 | | -
|
105 | | - close CSV_IN ;
|
106 | | -}
|
107 | | -
|
108 | | -sub WriteMonthlyData
|
109 | | -{
|
110 | | - my ($project,$language) = @_ ;
|
111 | | - # get sorted array of extensions, order by count for most recent month
|
112 | | - %ext_cnt_yyyymm_hi = %{$ext_cnt {$yyyymm_hi}} ;
|
113 | | - @ext_cnt_yyyymm_hi = (sort {$ext_cnt_yyyymm_hi {$b} <=> $ext_cnt_yyyymm_hi {$a}} keys %ext_cnt_yyyymm_hi) ;
|
114 | | -
|
115 | | - foreach $month (sort keys %months)
|
116 | | - {
|
117 | | - $ndx = 0 ;
|
118 | | - foreach $ext (@ext_cnt_yyyymm_hi)
|
119 | | - {
|
120 | | - print CSV_OUT "$project,$language,$month,$ext,${ext_cnt{$yyyymm}{$ext_name {$ndx}}}\n" ;
|
121 | | - # print "$month,$ext,${ext_cnt{$yyyymm}{$ext_name {$ndx}}}\n" ;
|
122 | | - last if (++ $ndx > 25) ;
|
123 | | - }
|
124 | | - }
|
125 | | -}
|
| 2 | +#!/usr/local/bin/perl |
| 3 | + |
| 4 | + use Getopt::Std ; |
| 5 | + |
| 6 | + &ParseArguments ; |
| 7 | + |
| 8 | + print "Write file '$file_csv_out'\n" ; |
| 9 | + open CSV_OUT, '>', $file_csv_out ; |
| 10 | + |
| 11 | + foreach $project (qw (wb wk wn wp wq ws wv wx)) |
| 12 | + { &ReadStatisticsPerBinariesExtension ($project) ; } |
| 13 | + |
| 14 | + close CSV_OUT ; |
| 15 | + |
| 16 | + print "\n\nReady\n\n" ; |
| 17 | + exit ; |
| 18 | + |
| 19 | +sub ParseArguments |
| 20 | +{ |
| 21 | + my @options ; |
| 22 | + getopt ("io", \%options) ; |
| 23 | + |
| 24 | + die ("Specify input folder as: -i path") if (! defined ($options {"i"})) ; |
| 25 | + die ("Specify output folder as: -o path'") if (! defined ($options {"o"})) ; |
| 26 | + |
| 27 | + $path_in = $options {"i"} ; |
| 28 | + $path_out = $options {"o"} ; |
| 29 | + |
| 30 | + die "Input folder '$path_in' does not exist" if (! -d $path_in) ; |
| 31 | + die "Output folder '$path_out' does not exist" if (! -d $path_out) ; |
| 32 | + |
| 33 | + # tests only |
| 34 | + # $path_in = "C:/@ Wikimedia/# Out Bayes" ; |
| 35 | + # $path_out = "C:/analytics" ; # "w:/@ report card/data" ; |
| 36 | + |
| 37 | + print "Input folder: $path_in\n" ; |
| 38 | + print "Output folder: $path_out\n" ; |
| 39 | + print "\n" ; |
| 40 | + |
| 41 | + $file_csv_out = "$path_out/analytics_in_binaries.csv" ; |
| 42 | +} |
| 43 | + |
| 44 | + |
| 45 | +sub ReadStatisticsPerBinariesExtension |
| 46 | +{ |
| 47 | + my $project = shift ; |
| 48 | + my $file_csv_in = "$path_in/csv_$project/StatisticsPerBinariesExtension.csv" ; |
| 49 | + $yyyymm_hi = -1 ; |
| 50 | + |
| 51 | + if (! -e $file_csv_in) |
| 52 | + { die "Input file '$file_csv_in' not found" ; } |
| 53 | + |
| 54 | + |
| 55 | + print "Read '$file_csv_in'\n" ; |
| 56 | + open CSV_IN, '<', $file_csv_in ; |
| 57 | + |
| 58 | + $language_prev = '' ; |
| 59 | + while ($line = <CSV_IN>) |
| 60 | + { |
| 61 | + chomp $line ; |
| 62 | + next if $line !~ /,.*?,/ ; |
| 63 | + |
| 64 | + ($language,$date,$data) = split (',', $line, 3) ; |
| 65 | + |
| 66 | + # for each wiki first line shows ext names, no tcounts |
| 67 | + if ($date eq "00/0000") |
| 68 | + { |
| 69 | + if ($language_prev ne '') |
| 70 | + { &WriteMonthlyData ($project, $language_prev) ; } |
| 71 | + $language_prev = $language ; |
| 72 | + |
| 73 | + undef %ext_name ; |
| 74 | + undef %ext_ndx ; |
| 75 | + undef %ext_cnt ; |
| 76 | + undef %months ; |
| 77 | + |
| 78 | + @exts = split (',', $data) ; |
| 79 | + $ndx = 0 ; |
| 80 | + foreach $ext (@exts) |
| 81 | + { |
| 82 | + $ext_name {$ndx} = $ext ; |
| 83 | + $ext_ndx {$ext} = $ndx ; |
| 84 | + $ndx ++ ; |
| 85 | + } |
| 86 | + next ; |
| 87 | + } |
| 88 | + |
| 89 | + ($month,$year) = split ('\/', $date) ; |
| 90 | + $yyyymm = sprintf ("%04d-%02d", $year, $month) ; |
| 91 | + if ($yyyymm gt $yyyymm_hi) |
| 92 | + { $yyyymm_hi = $yyyymm ; } |
| 93 | + $months {$yyyymm}++ ; |
| 94 | + |
| 95 | + @counts = split (',', $data) ; |
| 96 | + $ndx = 0 ; |
| 97 | + foreach $count (@counts) |
| 98 | + { |
| 99 | + $ext_cnt {$yyyymm}{$ext_name {$ndx}} = $count ; |
| 100 | + $ndx ++ ; |
| 101 | + } |
| 102 | + } |
| 103 | + &WriteMonthlyData ($project, $language_prev) ; |
| 104 | + |
| 105 | + close CSV_IN ; |
| 106 | +} |
| 107 | + |
| 108 | +sub WriteMonthlyData |
| 109 | +{ |
| 110 | + my ($project,$language) = @_ ; |
| 111 | + # get sorted array of extensions, order by count for most recent month |
| 112 | + %ext_cnt_yyyymm_hi = %{$ext_cnt {$yyyymm_hi}} ; |
| 113 | + @ext_cnt_yyyymm_hi = (sort {$ext_cnt_yyyymm_hi {$b} <=> $ext_cnt_yyyymm_hi {$a}} keys %ext_cnt_yyyymm_hi) ; |
| 114 | + |
| 115 | + foreach $month (sort keys %months) |
| 116 | + { |
| 117 | + $ndx = 0 ; |
| 118 | + foreach $ext (@ext_cnt_yyyymm_hi) |
| 119 | + { |
| 120 | + print CSV_OUT "$project,$language,$month,$ext,${ext_cnt{$yyyymm}{$ext_name {$ndx}}}\n" ; |
| 121 | + # print "$month,$ext,${ext_cnt{$yyyymm}{$ext_name {$ndx}}}\n" ; |
| 122 | + last if (++ $ndx > 25) ; |
| 123 | + } |
| 124 | + } |
| 125 | +} |
Property changes on: trunk/wikistats/analytics/AnalyticsPrepBinariesData.pl |
___________________________________________________________________ |
Added: svn:eol-style |
126 | 126 | + native |
Property changes on: trunk/wikistats/analytics/AnalyticsPrepLanguageNames.pl |
___________________________________________________________________ |
Added: svn:eol-style |
127 | 127 | + native |
Index: trunk/wikistats/analytics/analytics_refresh_from_csv.txt |
— | — | @@ -1,55 +1,55 @@ |
2 | | -USE `analytics` ;
|
3 | | -
|
4 | | -TRUNCATE TABLE comscore ;
|
5 | | -LOAD DATA LOCAL INFILE 'analytics_in_comscore.csv'
|
6 | | - INTO TABLE comscore
|
7 | | - FIELDS TERMINATED BY ','
|
8 | | - OPTIONALLY ENCLOSED BY '"'
|
9 | | - (@date,country_code,region_code,web_property,project_code,reach,visitors)
|
10 | | - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
|
11 | | -
|
| 2 | +USE `analytics` ; |
| 3 | + |
| 4 | +TRUNCATE TABLE comscore ; |
| 5 | +LOAD DATA LOCAL INFILE 'analytics_in_comscore.csv' |
| 6 | + INTO TABLE comscore |
| 7 | + FIELDS TERMINATED BY ',' |
| 8 | + OPTIONALLY ENCLOSED BY '"' |
| 9 | + (@date,country_code,region_code,web_property,project_code,reach,visitors) |
| 10 | + SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ; |
| 11 | + |
12 | 12 | TRUNCATE TABLE comscore_regions ; |
13 | | -LOAD DATA LOCAL INFILE 'analytics_in_comscore_regions.csv'
|
14 | | - INTO TABLE comscore_regions
|
15 | | - FIELDS TERMINATED BY ','
|
16 | | - OPTIONALLY ENCLOSED BY '"'
|
17 | | - (report_language,region_code,region_name) ;
|
18 | | -
|
| 13 | +LOAD DATA LOCAL INFILE 'analytics_in_comscore_regions.csv' |
| 14 | + INTO TABLE comscore_regions |
| 15 | + FIELDS TERMINATED BY ',' |
| 16 | + OPTIONALLY ENCLOSED BY '"' |
| 17 | + (report_language,region_code,region_name) ; |
| 18 | + |
19 | 19 | TRUNCATE TABLE wikistats ; |
20 | | -LOAD DATA LOCAL INFILE 'analytics_in_wikistats.csv'
|
21 | | - INTO TABLE wikistats
|
22 | | - FIELDS TERMINATED BY ','
|
23 | | - OPTIONALLY ENCLOSED BY '"'
|
24 | | - (project_code,language_code,@date,editors_all_time,editors_new,editors_ge_5,editors_ge_25,editors_ge_100,articles,articles_new_per_day,articles_over_bytes_500,articles_over_bytes_2000,edits_per_article,bytes_per_article,edits,size_in_bytes,size_in_words,links_internal,links_interwiki,links_image,links_external,redirects)
|
25 | | - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
|
26 | | -
|
| 20 | +LOAD DATA LOCAL INFILE 'analytics_in_wikistats.csv' |
| 21 | + INTO TABLE wikistats |
| 22 | + FIELDS TERMINATED BY ',' |
| 23 | + OPTIONALLY ENCLOSED BY '"' |
| 24 | + (project_code,language_code,@date,editors_all_time,editors_new,editors_ge_5,editors_ge_25,editors_ge_100,articles,articles_new_per_day,articles_over_bytes_500,articles_over_bytes_2000,edits_per_article,bytes_per_article,edits,size_in_bytes,size_in_words,links_internal,links_interwiki,links_image,links_external,redirects) |
| 25 | + SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ; |
| 26 | + |
27 | 27 | TRUNCATE TABLE page_views ; |
28 | | -LOAD DATA LOCAL INFILE 'analytics_in_page_views.csv'
|
29 | | - INTO TABLE page_views
|
30 | | - FIELDS TERMINATED BY ','
|
31 | | - OPTIONALLY ENCLOSED BY '"'
|
32 | | - (project_code,language_code,@date,views_non_mobile_raw,views_mobile_raw,views_non_mobile_normalized,views_mobile_normalized,views_raw,views_normalized)
|
33 | | - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
|
34 | | -
|
| 28 | +LOAD DATA LOCAL INFILE 'analytics_in_page_views.csv' |
| 29 | + INTO TABLE page_views |
| 30 | + FIELDS TERMINATED BY ',' |
| 31 | + OPTIONALLY ENCLOSED BY '"' |
| 32 | + (project_code,language_code,@date,views_non_mobile_raw,views_mobile_raw,views_non_mobile_normalized,views_mobile_normalized,views_raw,views_normalized) |
| 33 | + SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ; |
| 34 | + |
35 | 35 | TRUNCATE TABLE language_names ; |
36 | | -LOAD DATA LOCAL INFILE 'analytics_in_language_names.csv'
|
37 | | - INTO TABLE language_names
|
38 | | - FIELDS TERMINATED BY ','
|
39 | | - OPTIONALLY ENCLOSED BY '"'
|
40 | | - (report_language,language_code,language_name) ;
|
41 | | -
|
| 36 | +LOAD DATA LOCAL INFILE 'analytics_in_language_names.csv' |
| 37 | + INTO TABLE language_names |
| 38 | + FIELDS TERMINATED BY ',' |
| 39 | + OPTIONALLY ENCLOSED BY '"' |
| 40 | + (report_language,language_code,language_name) ; |
| 41 | + |
42 | 42 | TRUNCATE TABLE binaries ; |
43 | | -LOAD DATA LOCAL INFILE 'analytics_in_binaries.csv'
|
44 | | - INTO TABLE binaries
|
45 | | - FIELDS TERMINATED BY ','
|
46 | | - OPTIONALLY ENCLOSED BY '"'
|
47 | | - (project_code,language_code,@date,extension,binaries)
|
48 | | - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
|
49 | | -
|
| 43 | +LOAD DATA LOCAL INFILE 'analytics_in_binaries.csv' |
| 44 | + INTO TABLE binaries |
| 45 | + FIELDS TERMINATED BY ',' |
| 46 | + OPTIONALLY ENCLOSED BY '"' |
| 47 | + (project_code,language_code,@date,extension,binaries) |
| 48 | + SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ; |
| 49 | + |
50 | 50 | TRUNCATE TABLE offline ; |
51 | | -LOAD DATA LOCAL INFILE 'analytics_in_offline.csv'
|
52 | | - INTO TABLE offline
|
53 | | - FIELDS TERMINATED BY ','
|
54 | | - OPTIONALLY ENCLOSED BY '"'
|
55 | | - (@date, readers)
|
56 | | - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
|
| 51 | +LOAD DATA LOCAL INFILE 'analytics_in_offline.csv' |
| 52 | + INTO TABLE offline |
| 53 | + FIELDS TERMINATED BY ',' |
| 54 | + OPTIONALLY ENCLOSED BY '"' |
| 55 | + (@date, readers) |
| 56 | + SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ; |
Property changes on: trunk/wikistats/analytics/analytics_refresh_from_csv.txt |
___________________________________________________________________ |
Added: svn:eol-style |
57 | 57 | + native |
Property changes on: trunk/wikistats/analytics/AnalyticsPrepPageViews.pl |
___________________________________________________________________ |
Added: svn:eol-style |
58 | 58 | + native |
Index: trunk/wikistats/analytics/analytics_create_and_load_from_csv.txt |
— | — | @@ -1,179 +1,179 @@ |
2 | | -
|
3 | | -
|
4 | | -
|
5 | | -
|
6 | | -
|
7 | | -DROP DATABASE IF EXISTS `analytics` ;
|
8 | | -
|
9 | | -CREATE DATABASE `analytics` ;
|
10 | | -
|
11 | | -USE `analytics` ;
|
12 | | -
|
13 | | -CREATE TABLE `comscore` (
|
14 | | - `date` date NOT NULL,
|
15 | | - `country_code` varchar (3),
|
16 | | - `region_code` varchar (3),
|
17 | | - `web_property` varchar (20),
|
18 | | - `project_code` varchar (10),
|
19 | | - `reach` decimal (4,1) DEFAULT NULL,
|
20 | | - `visitors` decimal (15) DEFAULT NULL,
|
21 | | - PRIMARY KEY (date,country_code,region_code,project_code,web_property),
|
22 | | - KEY (`country_code`)
|
23 | | -) ;
|
24 | | -
|
25 | | -CREATE TABLE `comscore_regions` (
|
26 | | - `region_code` varchar (2),
|
27 | | - `report_language` varchar (10),
|
28 | | - `region_name` varchar (18),
|
29 | | - PRIMARY KEY (report_language,region_code)
|
30 | | -) ;
|
31 | | -
|
32 | | -CREATE TABLE `wikistats` (
|
33 | | - `date` date NOT NULL,
|
34 | | - `project_code` varchar (10),
|
35 | | - `language_code` varchar (15),
|
36 | | - `editors_all_time` int (10) DEFAULT NULL,
|
37 | | - `editors_new` int (7) DEFAULT NULL,
|
38 | | - `editors_ge_5` int (7) DEFAULT NULL,
|
39 | | - `editors_ge_25` int (7) DEFAULT NULL,
|
40 | | - `editors_ge_100` int (7) DEFAULT NULL,
|
41 | | - `articles` int (12) DEFAULT NULL,
|
42 | | - `articles_new_per_day` int (9) DEFAULT NULL,
|
43 | | - `articles_over_bytes_500` int (12) DEFAULT NULL,
|
44 | | - `articles_over_bytes_2000` int (12) DEFAULT NULL,
|
45 | | - `edits_per_article` decimal (9,1) DEFAULT NULL,
|
46 | | - `bytes_per_article` decimal (9,1) DEFAULT NULL,
|
47 | | - `edits` int (12) DEFAULT NULL,
|
48 | | - `size_in_bytes` int (15) DEFAULT NULL,
|
49 | | - `size_in_words` int (15) DEFAULT NULL,
|
50 | | - `links_internal` int (15) DEFAULT NULL,
|
51 | | - `links_interwiki` int (15) DEFAULT NULL,
|
52 | | - `links_image` int (15) DEFAULT NULL,
|
53 | | - `links_external` int (15) DEFAULT NULL,
|
54 | | - `redirects` int (15) DEFAULT NULL,
|
55 | | - PRIMARY KEY (date,project_code,language_code)
|
56 | | -) ;
|
57 | | -
|
58 | | -CREATE TABLE `page_views` (
|
59 | | - `date` date NOT NULL,
|
60 | | - `project_code` char (2),
|
61 | | - `language_code` char (15),
|
62 | | - `views_non_mobile_raw` bigint (15),
|
63 | | - `views_mobile_raw` bigint (15),
|
64 | | - `views_non_mobile_normalized` bigint (15),
|
65 | | - `views_mobile_normalized` bigint (15),
|
66 | | - `views_raw` bigint (15),
|
67 | | - `views_normalized` bigint (15),
|
68 | | - PRIMARY KEY (date,project_code,language_code)
|
69 | | -) ;
|
70 | | -
|
71 | | -CREATE TABLE `language_names` (
|
72 | | - `report_language` varchar (15),
|
73 | | - `language_code` varchar (15),
|
74 | | - `language_name` varchar (50),
|
75 | | - PRIMARY KEY (report_language,language_code)
|
76 | | -) ;
|
77 | | -
|
78 | | -CREATE TABLE `binaries` (
|
79 | | - `date` date NOT NULL,
|
80 | | - `project_code` char (2),
|
81 | | - `language_code` char (15),
|
82 | | - `extension` varchar (10),
|
83 | | - `binaries` bigint (15),
|
84 | | - PRIMARY KEY (date,project_code,language_code,extension)
|
85 | | -) ;
|
86 | | -
|
87 | | -CREATE TABLE `offline` (
|
88 | | - `date` date NOT NULL,
|
89 | | - `readers` bigint (12),
|
90 | | - PRIMARY KEY (date,readers)
|
91 | | -) ;
|
92 | | -
|
93 | | -
|
94 | | -
|
95 | | -LOAD DATA LOCAL INFILE 'analytics_in_comscore.csv'
|
96 | | - INTO TABLE comscore
|
97 | | - FIELDS TERMINATED BY ','
|
98 | | - OPTIONALLY ENCLOSED BY '"'
|
99 | | - (@date,country_code,region_code,web_property,project_code,reach,visitors)
|
100 | | - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
|
101 | | -
|
102 | | -LOAD DATA LOCAL INFILE 'analytics_in_comscore_regions.csv'
|
103 | | - INTO TABLE comscore_regions
|
104 | | - FIELDS TERMINATED BY ','
|
105 | | - OPTIONALLY ENCLOSED BY '"'
|
106 | | - (report_language,region_code,region_name) ;
|
107 | | -
|
108 | | -LOAD DATA LOCAL INFILE 'analytics_in_wikistats.csv'
|
109 | | - INTO TABLE wikistats
|
110 | | - FIELDS TERMINATED BY ','
|
111 | | - OPTIONALLY ENCLOSED BY '"'
|
112 | | - (project_code,language_code,@date,editors_all_time,editors_new,editors_ge_5,editors_ge_25,editors_ge_100,articles,articles_new_per_day,articles_over_bytes_500,articles_over_bytes_2000,edits_per_article,bytes_per_article,edits,size_in_bytes,size_in_words,links_internal,links_interwiki,links_image,links_external,redirects)
|
113 | | - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
|
114 | | -
|
115 | | -LOAD DATA LOCAL INFILE 'analytics_in_page_views.csv'
|
116 | | - INTO TABLE page_views
|
117 | | - FIELDS TERMINATED BY ','
|
118 | | - OPTIONALLY ENCLOSED BY '"'
|
119 | | - (project_code,language_code,@date,views_non_mobile_raw,views_mobile_raw,views_non_mobile_normalized,views_mobile_normalized,views_raw,views_normalized)
|
120 | | - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
|
121 | | -
|
122 | | -
|
123 | | -LOAD DATA LOCAL INFILE 'analytics_in_language_names.csv'
|
124 | | - INTO TABLE language_names
|
125 | | - FIELDS TERMINATED BY ','
|
126 | | - OPTIONALLY ENCLOSED BY '"'
|
127 | | - (report_language,language_code,language_name) ;
|
128 | | -
|
129 | | -LOAD DATA LOCAL INFILE 'analytics_in_binaries.csv'
|
130 | | - INTO TABLE binaries
|
131 | | - FIELDS TERMINATED BY ','
|
132 | | - OPTIONALLY ENCLOSED BY '"'
|
133 | | - (project_code,language_code,@date,extension,binaries)
|
134 | | - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
|
135 | | -
|
136 | | -LOAD DATA LOCAL INFILE 'analytics_in_offline.csv'
|
137 | | - INTO TABLE offline
|
138 | | - FIELDS TERMINATED BY ','
|
139 | | - OPTIONALLY ENCLOSED BY '"'
|
140 | | - (@date,readers)
|
141 | | - SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ;
|
142 | | -
|
143 | | -
|
144 | | - SELECT * FROM offline ;
|
145 | | -
|
146 | | -
|
| 2 | +-- make sure to delete output files *test*.csv first if any exist (MySQL on purpose forbids overwrite) |
| 3 | + |
| 4 | +-- tables implemented: |
| 5 | +-- comscore |
| 6 | +-- comscore_regions |
| 7 | +-- wikistats |
| 8 | +-- page_views |
| 9 | +-- language names |
| 10 | +-- binaries |
| 11 | + |
| 12 | +-- more tables planned (O= optional, not needed for report card stage) |
| 13 | +-- project_names |
| 14 | +-- O edits per project_code, per language, per month, per normalization type (Y/N), editor type (manual, anonymous, bot), namespace group (articles, talk pages, other) |
| 15 | +-- O editors per project_code, per language, per month, per normalization type (Y/N), editor type (manual, anonymous, bot), namespace group (articles, talk pages, other) |
| 16 | + |
| 17 | + |
| 18 | +-- open issues: |
| 19 | +-- only store basic data in database and calculate all aggregates on the fly or do some aggragation before hand ? (e.g. count for mobile / non-mobile / ==> total of both ? <==) |
| 20 | +-- for binaries, store one extension type per row? (future proof, more work to query), or a selected few as columns? (one row only needed per month) |
| 21 | + |
| 22 | +-- Create database and two tables from scratch |
| 23 | +DROP DATABASE IF EXISTS `analytics` ; |
| 24 | + |
| 25 | +CREATE DATABASE `analytics` ; |
| 26 | + |
| 27 | +USE `analytics` ; |
| 28 | + |
| 29 | +CREATE TABLE `comscore` ( |
| 30 | + `date` date NOT NULL, |
| 31 | + `country_code` varchar (3), |
| 32 | + `region_code` varchar (3), |
| 33 | + `web_property` varchar (20), |
| 34 | + `project_code` varchar (10), |
| 35 | + `reach` decimal (4,1) DEFAULT NULL, |
| 36 | + `visitors` decimal (15) DEFAULT NULL, |
| 37 | + PRIMARY KEY (date,country_code,region_code,project_code,web_property), |
| 38 | + KEY (`country_code`) |
| 39 | +) ; |
| 40 | + |
| 41 | +CREATE TABLE `comscore_regions` ( |
| 42 | + `region_code` varchar (2), |
| 43 | + `report_language` varchar (10), |
| 44 | + `region_name` varchar (18), |
| 45 | + PRIMARY KEY (report_language,region_code) |
| 46 | +) ; |
| 47 | + |
| 48 | +CREATE TABLE `wikistats` ( |
| 49 | + `date` date NOT NULL, |
| 50 | + `project_code` varchar (10), |
| 51 | + `language_code` varchar (15), |
| 52 | + `editors_all_time` int (10) DEFAULT NULL, |
| 53 | + `editors_new` int (7) DEFAULT NULL, |
| 54 | + `editors_ge_5` int (7) DEFAULT NULL, |
| 55 | + `editors_ge_25` int (7) DEFAULT NULL, |
| 56 | + `editors_ge_100` int (7) DEFAULT NULL, |
| 57 | + `articles` int (12) DEFAULT NULL, |
| 58 | + `articles_new_per_day` int (9) DEFAULT NULL, |
| 59 | + `articles_over_bytes_500` int (12) DEFAULT NULL, |
| 60 | + `articles_over_bytes_2000` int (12) DEFAULT NULL, |
| 61 | + `edits_per_article` decimal (9,1) DEFAULT NULL, |
| 62 | + `bytes_per_article` decimal (9,1) DEFAULT NULL, |
| 63 | + `edits` int (12) DEFAULT NULL, |
| 64 | + `size_in_bytes` int (15) DEFAULT NULL, |
| 65 | + `size_in_words` int (15) DEFAULT NULL, |
| 66 | + `links_internal` int (15) DEFAULT NULL, |
| 67 | + `links_interwiki` int (15) DEFAULT NULL, |
| 68 | + `links_image` int (15) DEFAULT NULL, |
| 69 | + `links_external` int (15) DEFAULT NULL, |
| 70 | + `redirects` int (15) DEFAULT NULL, |
| 71 | + PRIMARY KEY (date,project_code,language_code) |
| 72 | +) ; |
| 73 | + |
| 74 | +CREATE TABLE `page_views` ( |
| 75 | + `date` date NOT NULL, |
| 76 | + `project_code` char (2), |
| 77 | + `language_code` char (15), |
| 78 | + `views_non_mobile_raw` bigint (15), |
| 79 | + `views_mobile_raw` bigint (15), |
| 80 | + `views_non_mobile_normalized` bigint (15), |
| 81 | + `views_mobile_normalized` bigint (15), |
| 82 | + `views_raw` bigint (15), |
| 83 | + `views_normalized` bigint (15), |
| 84 | + PRIMARY KEY (date,project_code,language_code) |
| 85 | +) ; |
| 86 | + |
| 87 | +CREATE TABLE `language_names` ( |
| 88 | + `report_language` varchar (15), |
| 89 | + `language_code` varchar (15), |
| 90 | + `language_name` varchar (50), |
| 91 | + PRIMARY KEY (report_language,language_code) |
| 92 | +) ; |
| 93 | + |
| 94 | +CREATE TABLE `binaries` ( |
| 95 | + `date` date NOT NULL, |
| 96 | + `project_code` char (2), |
| 97 | + `language_code` char (15), |
| 98 | + `extension` varchar (10), |
| 99 | + `binaries` bigint (15), |
| 100 | + PRIMARY KEY (date,project_code,language_code,extension) |
| 101 | +) ; |
| 102 | + |
| 103 | +CREATE TABLE `offline` ( |
| 104 | + `date` date NOT NULL, |
| 105 | + `readers` bigint (12), |
| 106 | + PRIMARY KEY (date,readers) |
| 107 | +) ; |
| 108 | + |
| 109 | +-- SHOW TABLES ; |
| 110 | +-- DESCRIBE comscore ; |
| 111 | +-- DESCRIBE comscore_regions ; |
| 112 | +-- DESCRIBE wikistats ; |
| 113 | +-- DESCRIBE page_views ; |
| 114 | +-- DESCRIBE language_names ; |
| 115 | +-- DESCRIBE binaries ; |
| 116 | +-- DESCRIBE offline ; |
| 117 | + |
| 118 | +-- Database Manipulation |
| 119 | +-- Obviously in real world this is a separate script |
| 120 | + |
| 121 | +LOAD DATA LOCAL INFILE 'analytics_in_comscore.csv' |
| 122 | + INTO TABLE comscore |
| 123 | + FIELDS TERMINATED BY ',' |
| 124 | + OPTIONALLY ENCLOSED BY '"' |
| 125 | + (@date,country_code,region_code,web_property,project_code,reach,visitors) |
| 126 | + SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ; |
| 127 | + |
| 128 | +LOAD DATA LOCAL INFILE 'analytics_in_comscore_regions.csv' |
| 129 | + INTO TABLE comscore_regions |
| 130 | + FIELDS TERMINATED BY ',' |
| 131 | + OPTIONALLY ENCLOSED BY '"' |
| 132 | + (report_language,region_code,region_name) ; |
| 133 | + |
| 134 | +LOAD DATA LOCAL INFILE 'analytics_in_wikistats.csv' |
| 135 | + INTO TABLE wikistats |
| 136 | + FIELDS TERMINATED BY ',' |
| 137 | + OPTIONALLY ENCLOSED BY '"' |
| 138 | + (project_code,language_code,@date,editors_all_time,editors_new,editors_ge_5,editors_ge_25,editors_ge_100,articles,articles_new_per_day,articles_over_bytes_500,articles_over_bytes_2000,edits_per_article,bytes_per_article,edits,size_in_bytes,size_in_words,links_internal,links_interwiki,links_image,links_external,redirects) |
| 139 | + SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ; |
| 140 | + |
| 141 | +LOAD DATA LOCAL INFILE 'analytics_in_page_views.csv' |
| 142 | + INTO TABLE page_views |
| 143 | + FIELDS TERMINATED BY ',' |
| 144 | + OPTIONALLY ENCLOSED BY '"' |
| 145 | + (project_code,language_code,@date,views_non_mobile_raw,views_mobile_raw,views_non_mobile_normalized,views_mobile_normalized,views_raw,views_normalized) |
| 146 | + SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ; |
| 147 | + |
| 148 | + |
| 149 | +LOAD DATA LOCAL INFILE 'analytics_in_language_names.csv' |
| 150 | + INTO TABLE language_names |
| 151 | + FIELDS TERMINATED BY ',' |
| 152 | + OPTIONALLY ENCLOSED BY '"' |
| 153 | + (report_language,language_code,language_name) ; |
| 154 | + |
| 155 | +LOAD DATA LOCAL INFILE 'analytics_in_binaries.csv' |
| 156 | + INTO TABLE binaries |
| 157 | + FIELDS TERMINATED BY ',' |
| 158 | + OPTIONALLY ENCLOSED BY '"' |
| 159 | + (project_code,language_code,@date,extension,binaries) |
| 160 | + SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ; |
| 161 | + |
| 162 | +LOAD DATA LOCAL INFILE 'analytics_in_offline.csv' |
| 163 | + INTO TABLE offline |
| 164 | + FIELDS TERMINATED BY ',' |
| 165 | + OPTIONALLY ENCLOSED BY '"' |
| 166 | + (@date,readers) |
| 167 | + SET date = last_day (str_to_date(concat (@date,'-01'),'%Y-%m-%d')) ; |
| 168 | + |
| 169 | + |
| 170 | +-- show contents (debugging only) |
| 171 | +-- SELECT * FROM comscore ; |
| 172 | +-- SELECT * FROM comscore_regions ; |
| 173 | +-- SELECT * FROM wikistats ; |
| 174 | +-- SELECT * FROM page_views ; |
| 175 | +-- SELECT * FROM language_names ; |
| 176 | +-- SELECT * FROM binaries |
| 177 | +-- WHERE project_code = 'commons' ; |
| 178 | + SELECT * FROM offline ; |
| 179 | + |
| 180 | + |
Property changes on: trunk/wikistats/analytics/analytics_create_and_load_from_csv.txt |
___________________________________________________________________ |
Added: svn:eol-style |
147 | 181 | + native |
Property changes on: trunk/wikistats/analytics/analytics_generate_csv_files.sh |
___________________________________________________________________ |
Added: svn:eol-style |
148 | 182 | + native |
Property changes on: trunk/wikistats/analytics/analytics_upd.sh |
___________________________________________________________________ |
Added: svn:eol-style |
149 | 183 | + native |
Index: trunk/wikistats/analytics/AnalyticsPrepWikiCountsOutput.pl |
— | — | @@ -1,331 +1,331 @@ |
2 | | -#!/usr/local/bin/perl
|
3 | | -
|
4 | | -# Copyright (C) 2011 Wikimedia Foundation
|
5 | | -# This program is free software; you can redistribute it and/or
|
6 | | -# modify it under the terms of the GNU General Public License version 2
|
7 | | -# as published by the Free Software Foundation.
|
8 | | -# This program is distributed in the hope that it will be useful,
|
9 | | -# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10 | | -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
11 | | -# See the GNU General Public License for more details, at
|
12 | | -# http://www.fsf.org/licenses/gpl.html
|
13 | | -
|
14 | | -# Author:
|
15 | | -# Erik Zachte, email ezachte@wikimedia.org
|
16 | | -# loosely based on predecessor
|
17 | | -# http://svn.wikimedia.org/viewvc/mediawiki/trunk/wikistats/reportcard/ReportCardExtractWikiCountsOutput.pl
|
18 | | -
|
19 | | -# Functionality:
|
20 | | -# tba
|
21 | | -
|
22 | | -# Parameters:
|
23 | | -# tba
|
24 | | -
|
25 | | -# Output:
|
26 | | -# updated csv file for import in MySQL
|
27 | | -
|
28 | | -# http://svn.wikimedia.org/viewvc/mediawiki/trunk/wikistats/analytics/
|
29 | | -
|
30 | | - use Getopt::Std ;
|
31 | | -
|
32 | | - $true = 1 ;
|
33 | | - $false = 0 ;
|
34 | | -
|
35 | | - @projects = ('wb','wk','wn','wp','wq','ws','wv','wx','commons','*') ;
|
36 | | -
|
37 | | - $file_csv_monthly_data = "StatisticsMonthly.csv" ;
|
38 | | - $file_csv_user_activity_spread = "StatisticsUserActivitySpread.csv" ;
|
39 | | - $file_csv_analytics_in = "analytics_in_wikistats.csv" ;
|
40 | | -
|
41 | | - &ParseArguments ;
|
42 | | - &ReadStatisticsMonthly ;
|
43 | | - &FindLargestWikis ;
|
44 | | - &WriteMonthlyData ;
|
45 | | -
|
46 | | - print "\nReady\n\n" ;
|
47 | | - exit ;
|
48 | | -
|
49 | | -sub ParseArguments
|
50 | | -{
|
51 | | - my (@options, $arguments) ;
|
52 | | -
|
53 | | - getopt ("io", \%options) ;
|
54 | | -
|
55 | | - foreach $arg (sort keys %options)
|
56 | | - { $arguments .= " -$arg " . $options {$arg} . "\n" ; }
|
57 | | - print ("\nArguments\n$arguments\n") ;
|
58 | | -
|
59 | | - if (! -d '/mnt/') # EZ test
|
60 | | - {
|
61 | | - $path_in = "c:/\@ wikimedia/# out bayes" ;
|
62 | | - $path_out = "c:/MySQL/analytics" ;
|
63 | | - }
|
64 | | - else
|
65 | | - {
|
66 | | - die ("Specify input folder for projectcounts files as: -i path") if (! defined ($options {"i"})) ;
|
67 | | - die ("Specify output folder as: -o path'") if (! defined ($options {"o"})) ;
|
68 | | -
|
69 | | - $path_in = $options {"i"} ;
|
70 | | - $path_out = $options {"o"} ;
|
71 | | - }
|
72 | | -
|
73 | | - die "Input folder '$path_in' does not exist" if (! -d $path_in) ;
|
74 | | - die "Output folder '$path_out' does not exist" if (! -d $path_out) ;
|
75 | | -
|
76 | | - print "Input folder: $path_in\n" ;
|
77 | | - print "Output folder: $path_out\n\n" ;
|
78 | | -
|
79 | | - $file_csv_out = "$path_out/analytics_in_wikistats.csv" ;
|
80 | | -}
|
81 | | -
|
82 | | -sub ReadStatisticsMonthly
|
83 | | -{
|
84 | | - &ReadStatisticsMonthlyForProject ("wb") ;
|
85 | | - &ReadStatisticsMonthlyForProject ("wk") ;
|
86 | | - &ReadStatisticsMonthlyForProject ("wn") ;
|
87 | | - &ReadStatisticsMonthlyForProject ("wp") ;
|
88 | | - &ReadStatisticsMonthlyForProject ("wq") ;
|
89 | | - &ReadStatisticsMonthlyForProject ("ws") ;
|
90 | | - &ReadStatisticsMonthlyForProject ("wv") ;
|
91 | | - &ReadStatisticsMonthlyForProject ("wx") ;
|
92 | | -
|
93 | | -# &ReadStatisticsPerBinariesExtensionCommons ;
|
94 | | -}
|
95 | | -
|
96 | | -sub ReadStatisticsMonthlyForProject
|
97 | | -{
|
98 | | - my $project = shift;
|
99 | | - $all_projects = "*" ;
|
100 | | -
|
101 | | - my $file_csv_in_1 = "$path_in/csv_$project/$file_csv_monthly_data" ;
|
102 | | - my $file_csv_in_2 = "$path_in/csv_$project/$file_csv_user_activity_spread" ;
|
103 | | -
|
104 | | - if (! -e $file_csv_in_1)
|
105 | | - { &Abort ("Input file '$file_csv_in_1' not found") ; }
|
106 | | - if (! -e $file_csv_in_2)
|
107 | | - { &Abort ("Input file '$file_csv_in_2' not found") ; }
|
108 | | -
|
109 | | - my $yyyymm ;
|
110 | | -
|
111 | | - print "Read '$file_csv_in_1'\n" ;
|
112 | | - open CSV_IN, '<', $file_csv_in_1 ;
|
113 | | - while ($line = <CSV_IN>)
|
114 | | - {
|
115 | | - chomp $line ;
|
116 | | - ($language,$date,$counts) = split (',', $line, 3) ;
|
117 | | - @fields = split (',', $counts) ;
|
118 | | -
|
119 | | - next if ! &AcceptWiki ($project,$language) ;
|
120 | | -
|
121 | | - ($month,$day,$year) = split ('\/', $date) ;
|
122 | | - $yyyymm = sprintf ("%04d-%02d", $year, $month) ;
|
123 | | -
|
124 | | - foreach $field (@fields)
|
125 | | - {
|
126 | | - if ($field eq '-')
|
127 | | - { $field = 0 ; }
|
128 | | - }
|
129 | | -
|
130 | | - $data = $fields [0] . ',' . # contributors all time
|
131 | | - $fields [1] . ',' . # new contributors
|
132 | | - 'data2,' . # place holder for more data, to be inserted later
|
133 | | - $fields [4] . ',' . # articles
|
134 | | - $fields [6] . ',' . # articles new per day
|
135 | | - $fields [9] . ',' . # larger than 0.5 kB
|
136 | | - $fields [10] . ',' . # larger than 2.0 kB
|
137 | | - $fields [7] . ',' . # mean edits per article
|
138 | | - $fields [8] . ',' . # mean bytes per article
|
139 | | - $fields [11] . ',' . # edits
|
140 | | - $fields [12] . ',' . # size in bytes
|
141 | | - $fields [13] . ',' . # size in words
|
142 | | - $fields [14] . ',' . # links internal
|
143 | | - $fields [15] . ',' . # links interwiki
|
144 | | - $fields [16] . ',' . # links images
|
145 | | - $fields [17] . ',' . # links external
|
146 | | - $fields [18] ; # redirects
|
147 | | -
|
148 | | - $data1 {"$project,$language,$yyyymm"} = $data ;
|
149 | | - }
|
150 | | - close CSV_IN ;
|
151 | | -
|
152 | | - # now read (very) active editors from newer more accurate file (split data for reg users and bots, unlike StatisticsMonthly.csv)
|
153 | | -
|
154 | | - print "Read '$file_csv_in_2'\n" ;
|
155 | | - open CSV_IN, '<', $file_csv_in_2 ;
|
156 | | - while ($line = <CSV_IN>)
|
157 | | - {
|
158 | | - chomp $line ;
|
159 | | - ($language,$date,$reguser_bot,$group,@counts) = split (',', $line) ;
|
160 | | -
|
161 | | - next if ! &AcceptWiki ($project,$language) ;
|
162 | | -
|
163 | | - if ($reguser_bot ne "R") { next ; } # R: reg user, B: bot
|
164 | | - if ($group ne "A") { next ; } # A: articles, T: talk pages, O: other namespaces
|
165 | | -
|
166 | | - ($month,$day,$year) = split ('\/', $date) ;
|
167 | | - $yyyymm = sprintf ("%04d-%02d", $year, $month) ;
|
168 | | - $months {$yyyymm} ++ ;
|
169 | | -# print "YYYYMM $yyyymm\n" ;
|
170 | | -
|
171 | | - # data have been collected in WikiCountsProcess.pm and been written in WikiCountsOutput.pm
|
172 | | - # count user with over x edits
|
173 | | - # threshold starting with a 3 are 10xSQRT(10), 100xSQRT(10), 1000xSQRT(10), etc
|
174 | | - # @thresholds = (1,3,5,10,25,32,50,100,250,316,500,1000,2500,3162,5000,10000,25000,31623,50000,100000,250000,316228,500000,1000000,2500000,3162278,500000,10000000,25000000,31622777,5000000,100000000) ;
|
175 | | - $edits_ge_5 = @counts [2] > 0 ? @counts [2] : 0 ;
|
176 | | - $edits_ge_25 = @counts [4] > 0 ? @counts [4] : 0 ;
|
177 | | - $edits_ge_100 = @counts [7] > 0 ? @counts [7] : 0 ;
|
178 | | - $data2 {"$project,$language,$yyyymm"} = "$edits_ge_5,$edits_ge_25,$edits_ge_100" ;
|
179 | | -
|
180 | | - $total_edits_ge_5 {"$project,$language"} += $edits_ge_5 ;
|
181 | | - $total_edits_ge_25 {"$project,$language"} += $edits_ge_25 ;
|
182 | | - $total_edits_ge_100 {"$project,$language"} += $edits_ge_100 ;
|
183 | | -
|
184 | | - # prep string with right amount of comma's
|
185 | | - if ($data2_default eq '')
|
186 | | - {
|
187 | | - $data2_default = $data2 {"$project,$language,$yyyymm"} ;
|
188 | | - $data2_default =~ s/[^,]+/0/g ;
|
189 | | - }
|
190 | | - }
|
191 | | - close CSV_IN ;
|
192 | | -}
|
193 | | -
|
194 | | -#sub ReadStatisticsPerBinariesExtensionCommons
|
195 | | -#{
|
196 | | -# my $file_csv_in = "$path_in/csv_wx/StatisticsPerBinariesExtension.csv" ;
|
197 | | -# my $mmax = -1 ;
|
198 | | -
|
199 | | -# if (! -e $file_csv_in)
|
200 | | -# { &Abort ("Input file '$file_csv_in' not found") ; }
|
201 | | -
|
202 | | -# print "Read '$file_csv_in'\n" ;
|
203 | | -# open CSV_IN, '<', $file_csv_in ;
|
204 | | -# while ($line = <CSV_IN>)
|
205 | | -# {
|
206 | | -# chomp $line ;
|
207 | | -# ($language,$date,$counts) = split (',', $line, 3) ;
|
208 | | -
|
209 | | -# if ($language ne "commons") { next ; }
|
210 | | -
|
211 | | -# if ($date eq "00/0000")
|
212 | | -# {
|
213 | | -# @fields = split (',', $counts) ;
|
214 | | -# $field_ndx = 0 ;
|
215 | | -# foreach $field (@fields)
|
216 | | -# {
|
217 | | -# $ext_cnt {-1}{$field_ndx} = $field ;
|
218 | | -# # print "EXT_CNT $field_ndx : $field\n" ;
|
219 | | -# $field_ndx ++ ;
|
220 | | -# }
|
221 | | -# next ;
|
222 | | -# }
|
223 | | -
|
224 | | -# ($month,$year) = split ('\/', $date) ;
|
225 | | -# my $m = &months_since_2000_01 ($year,$month) ;
|
226 | | -# next if $m < $m_start ;
|
227 | | -
|
228 | | -# if ($m > $mmax)
|
229 | | -# { $mmax = $m ; }
|
230 | | -
|
231 | | -# @fields = split (',', $counts) ;
|
232 | | -# $field_ndx = 0 ;
|
233 | | -# foreach $field (@fields)
|
234 | | -# {
|
235 | | -# $ext_cnt {$m}{$field_ndx} = $field ;
|
236 | | -# $ext_tot {$m} += $field ;
|
237 | | -# $field_ndx ++ ;
|
238 | | -# }
|
239 | | -# }
|
240 | | -# close CSV_IN ;
|
241 | | -
|
242 | | -# %ext_cnt_mmax = %{$ext_cnt {$mmax}} ;
|
243 | | -# @ext_cnt_mmax = (sort {$ext_cnt_mmax {$b} <=> $ext_cnt_mmax {$a}} keys %ext_cnt_mmax) ;
|
244 | | -
|
245 | | -# $extcnt = 0 ;
|
246 | | -# foreach $extndx (@ext_cnt_mmax)
|
247 | | -# {
|
248 | | -# # print "$extndx < ${ext_cnt {-1}{$extndx}} > : ${ext_cnt_mmax {$extndx}}\n" ;
|
249 | | -# push @extndxs, $extndx ;
|
250 | | -# if ($extcnt++ >= 9) { last ; }
|
251 | | -# }
|
252 | | -#}
|
253 | | -
|
254 | | -sub FindLargestWikis
|
255 | | -{
|
256 | | - print "Largest projects (most accumulated very active editors):\n";
|
257 | | - @total_edits_ge_100 = sort {$total_edits_ge_100 {$b} <=> $total_edits_ge_100 {$a}} keys %total_edits_ge_100 ;
|
258 | | - $rank = 0 ;
|
259 | | - foreach $project_language (@total_edits_ge_100)
|
260 | | - {
|
261 | | - $largest_projects {$project_language} = $rank++ ;
|
262 | | - print "$project_language," ;
|
263 | | - last if $rank > 10 ;
|
264 | | - }
|
265 | | - print "\n\n" ;
|
266 | | -
|
267 | | - foreach $yyyymm (sort keys %months)
|
268 | | - {
|
269 | | - next if $yyyymm lt '2011' ;
|
270 | | - foreach $project_language (keys %largest_projects)
|
271 | | - {
|
272 | | - ($project,$language) = split (',', $project_language) ;
|
273 | | - if ($data2 {"$project,$language,$yyyymm"} eq '')
|
274 | | - {
|
275 | | - print "No data yet for large wiki $project_language for $yyyymm-> skip month $yyyymm\n" ;
|
276 | | - $months {$yyyymm} = 0 ;
|
277 | | - }
|
278 | | - }
|
279 | | - }
|
280 | | - exit ;
|
281 | | -}
|
282 | | -
|
283 | | -sub WriteMonthlyData
|
284 | | -{
|
285 | | - my $file_csv_out = "$path_out/$file_csv_analytics_in" ;
|
286 | | - open CSV_OUT, '>', $file_csv_out ;
|
287 | | - foreach $project_wiki_month (sort keys %data1)
|
288 | | - {
|
289 | | - ($project,$wiki,$yyyymm) = split (',', $project_wiki_month) ;
|
290 | | -
|
291 | | - # recent month misses on eor more large wikis?
|
292 | | - next if $months {$yyyymm} == 0 ;
|
293 | | -
|
294 | | - $data1 = $data1 {$project_wiki_month} ;
|
295 | | - $data2 = $data2 {$project_wiki_month} ;
|
296 | | - if ($data2 eq '')
|
297 | | - {
|
298 | | - print "Editor data missing for $project_wiki_month\n" ;
|
299 | | - $data2 = $data2_default ;
|
300 | | - }
|
301 | | - $data1 =~ s/data2/$data2/ ; # insert rather than append to have all editor fields close together
|
302 | | - print CSV_OUT "$project_wiki_month,$data1\n" ;
|
303 | | - }
|
304 | | - $total_edits_ge_5 {"$project,*,$yyyymm"} += $edits_ge_5 ;
|
305 | | - $total_edits_ge_25 {"$project,*,$yyyymm"} += $edits_ge_25 ;
|
306 | | - $total_edits_ge_100 {"$project,*,$yyyymm"} += $edits_ge_100 ;
|
307 | | - close CSV_OUT ;
|
308 | | -}
|
309 | | -
|
310 | | -sub AcceptWiki
|
311 | | -{
|
312 | | - my ($project,$language) = @_ ;
|
313 | | -
|
314 | | - return $false if $language eq 'commons' and $project ne 'wx' ; # commons also in wikipedia csv files (bug, hard to cleanup, just skip)
|
315 | | - return $false if $language eq 'sr' and $project eq 'wn' ; # ignore insane bot spam on
|
316 | | - return $false if $language =~ /mania|team|comcom|closed|chair|langcom|office|searchcom|sep11|nostalgia|stats|test/i ;
|
317 | | -
|
318 | | - return $false if $language =~ /^(?:dk|tlh|ru_sib)$/ ; # dk=dumps exist(ed?) but site not, tlh=Klignon, ru-sib=Siberian
|
319 | | - return $false if $project eq 'wk' and ($language eq "als" or $language eq "tlh") ;
|
320 | | -
|
321 | | - return $true ;
|
322 | | -}
|
323 | | -
|
324 | | -sub Abort
|
325 | | -{
|
326 | | - my $msg = shift ;
|
327 | | - print "$msg\nExecution aborted." ;
|
328 | | - # to do: log also to file
|
329 | | - exit ;
|
330 | | -}
|
331 | | -
|
332 | | -
|
| 2 | +#!/usr/local/bin/perl |
| 3 | + |
| 4 | +# Copyright (C) 2011 Wikimedia Foundation |
| 5 | +# This program is free software; you can redistribute it and/or |
| 6 | +# modify it under the terms of the GNU General Public License version 2 |
| 7 | +# as published by the Free Software Foundation. |
| 8 | +# This program is distributed in the hope that it will be useful, |
| 9 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
| 11 | +# See the GNU General Public License for more details, at |
| 12 | +# http://www.fsf.org/licenses/gpl.html |
| 13 | + |
| 14 | +# Author: |
| 15 | +# Erik Zachte, email ezachte@wikimedia.org |
| 16 | +# loosely based on predecessor |
| 17 | +# http://svn.wikimedia.org/viewvc/mediawiki/trunk/wikistats/reportcard/ReportCardExtractWikiCountsOutput.pl |
| 18 | + |
| 19 | +# Functionality: |
| 20 | +# tba |
| 21 | + |
| 22 | +# Parameters: |
| 23 | +# tba |
| 24 | + |
| 25 | +# Output: |
| 26 | +# updated csv file for import in MySQL |
| 27 | + |
| 28 | +# http://svn.wikimedia.org/viewvc/mediawiki/trunk/wikistats/analytics/ |
| 29 | + |
| 30 | + use Getopt::Std ; |
| 31 | + |
| 32 | + $true = 1 ; |
| 33 | + $false = 0 ; |
| 34 | + |
| 35 | + @projects = ('wb','wk','wn','wp','wq','ws','wv','wx','commons','*') ; |
| 36 | + |
| 37 | + $file_csv_monthly_data = "StatisticsMonthly.csv" ; |
| 38 | + $file_csv_user_activity_spread = "StatisticsUserActivitySpread.csv" ; |
| 39 | + $file_csv_analytics_in = "analytics_in_wikistats.csv" ; |
| 40 | + |
| 41 | + &ParseArguments ; |
| 42 | + &ReadStatisticsMonthly ; |
| 43 | + &FindLargestWikis ; |
| 44 | + &WriteMonthlyData ; |
| 45 | + |
| 46 | + print "\nReady\n\n" ; |
| 47 | + exit ; |
| 48 | + |
| 49 | +sub ParseArguments |
| 50 | +{ |
| 51 | + my (@options, $arguments) ; |
| 52 | + |
| 53 | + getopt ("io", \%options) ; |
| 54 | + |
| 55 | + foreach $arg (sort keys %options) |
| 56 | + { $arguments .= " -$arg " . $options {$arg} . "\n" ; } |
| 57 | + print ("\nArguments\n$arguments\n") ; |
| 58 | + |
| 59 | + if (! -d '/mnt/') # EZ test |
| 60 | + { |
| 61 | + $path_in = "c:/\@ wikimedia/# out bayes" ; |
| 62 | + $path_out = "c:/MySQL/analytics" ; |
| 63 | + } |
| 64 | + else |
| 65 | + { |
| 66 | + die ("Specify input folder for projectcounts files as: -i path") if (! defined ($options {"i"})) ; |
| 67 | + die ("Specify output folder as: -o path'") if (! defined ($options {"o"})) ; |
| 68 | + |
| 69 | + $path_in = $options {"i"} ; |
| 70 | + $path_out = $options {"o"} ; |
| 71 | + } |
| 72 | + |
| 73 | + die "Input folder '$path_in' does not exist" if (! -d $path_in) ; |
| 74 | + die "Output folder '$path_out' does not exist" if (! -d $path_out) ; |
| 75 | + |
| 76 | + print "Input folder: $path_in\n" ; |
| 77 | + print "Output folder: $path_out\n\n" ; |
| 78 | + |
| 79 | + $file_csv_out = "$path_out/analytics_in_wikistats.csv" ; |
| 80 | +} |
| 81 | + |
| 82 | +sub ReadStatisticsMonthly |
| 83 | +{ |
| 84 | + &ReadStatisticsMonthlyForProject ("wb") ; |
| 85 | + &ReadStatisticsMonthlyForProject ("wk") ; |
| 86 | + &ReadStatisticsMonthlyForProject ("wn") ; |
| 87 | + &ReadStatisticsMonthlyForProject ("wp") ; |
| 88 | + &ReadStatisticsMonthlyForProject ("wq") ; |
| 89 | + &ReadStatisticsMonthlyForProject ("ws") ; |
| 90 | + &ReadStatisticsMonthlyForProject ("wv") ; |
| 91 | + &ReadStatisticsMonthlyForProject ("wx") ; |
| 92 | + |
| 93 | +# &ReadStatisticsPerBinariesExtensionCommons ; |
| 94 | +} |
| 95 | + |
| 96 | +sub ReadStatisticsMonthlyForProject |
| 97 | +{ |
| 98 | + my $project = shift; |
| 99 | + $all_projects = "*" ; |
| 100 | + |
| 101 | + my $file_csv_in_1 = "$path_in/csv_$project/$file_csv_monthly_data" ; |
| 102 | + my $file_csv_in_2 = "$path_in/csv_$project/$file_csv_user_activity_spread" ; |
| 103 | + |
| 104 | + if (! -e $file_csv_in_1) |
| 105 | + { &Abort ("Input file '$file_csv_in_1' not found") ; } |
| 106 | + if (! -e $file_csv_in_2) |
| 107 | + { &Abort ("Input file '$file_csv_in_2' not found") ; } |
| 108 | + |
| 109 | + my $yyyymm ; |
| 110 | + |
| 111 | + print "Read '$file_csv_in_1'\n" ; |
| 112 | + open CSV_IN, '<', $file_csv_in_1 ; |
| 113 | + while ($line = <CSV_IN>) |
| 114 | + { |
| 115 | + chomp $line ; |
| 116 | + ($language,$date,$counts) = split (',', $line, 3) ; |
| 117 | + @fields = split (',', $counts) ; |
| 118 | + |
| 119 | + next if ! &AcceptWiki ($project,$language) ; |
| 120 | + |
| 121 | + ($month,$day,$year) = split ('\/', $date) ; |
| 122 | + $yyyymm = sprintf ("%04d-%02d", $year, $month) ; |
| 123 | + |
| 124 | + foreach $field (@fields) |
| 125 | + { |
| 126 | + if ($field eq '-') |
| 127 | + { $field = 0 ; } |
| 128 | + } |
| 129 | + |
| 130 | + $data = $fields [0] . ',' . # contributors all time |
| 131 | + $fields [1] . ',' . # new contributors |
| 132 | + 'data2,' . # place holder for more data, to be inserted later |
| 133 | + $fields [4] . ',' . # articles |
| 134 | + $fields [6] . ',' . # articles new per day |
| 135 | + $fields [9] . ',' . # larger than 0.5 kB |
| 136 | + $fields [10] . ',' . # larger than 2.0 kB |
| 137 | + $fields [7] . ',' . # mean edits per article |
| 138 | + $fields [8] . ',' . # mean bytes per article |
| 139 | + $fields [11] . ',' . # edits |
| 140 | + $fields [12] . ',' . # size in bytes |
| 141 | + $fields [13] . ',' . # size in words |
| 142 | + $fields [14] . ',' . # links internal |
| 143 | + $fields [15] . ',' . # links interwiki |
| 144 | + $fields [16] . ',' . # links images |
| 145 | + $fields [17] . ',' . # links external |
| 146 | + $fields [18] ; # redirects |
| 147 | + |
| 148 | + $data1 {"$project,$language,$yyyymm"} = $data ; |
| 149 | + } |
| 150 | + close CSV_IN ; |
| 151 | + |
| 152 | + # now read (very) active editors from newer more accurate file (split data for reg users and bots, unlike StatisticsMonthly.csv) |
| 153 | + |
| 154 | + print "Read '$file_csv_in_2'\n" ; |
| 155 | + open CSV_IN, '<', $file_csv_in_2 ; |
| 156 | + while ($line = <CSV_IN>) |
| 157 | + { |
| 158 | + chomp $line ; |
| 159 | + ($language,$date,$reguser_bot,$group,@counts) = split (',', $line) ; |
| 160 | + |
| 161 | + next if ! &AcceptWiki ($project,$language) ; |
| 162 | + |
| 163 | + if ($reguser_bot ne "R") { next ; } # R: reg user, B: bot |
| 164 | + if ($group ne "A") { next ; } # A: articles, T: talk pages, O: other namespaces |
| 165 | + |
| 166 | + ($month,$day,$year) = split ('\/', $date) ; |
| 167 | + $yyyymm = sprintf ("%04d-%02d", $year, $month) ; |
| 168 | + $months {$yyyymm} ++ ; |
| 169 | +# print "YYYYMM $yyyymm\n" ; |
| 170 | + |
| 171 | + # data have been collected in WikiCountsProcess.pm and been written in WikiCountsOutput.pm |
| 172 | + # count user with over x edits |
| 173 | + # threshold starting with a 3 are 10xSQRT(10), 100xSQRT(10), 1000xSQRT(10), etc |
| 174 | + # @thresholds = (1,3,5,10,25,32,50,100,250,316,500,1000,2500,3162,5000,10000,25000,31623,50000,100000,250000,316228,500000,1000000,2500000,3162278,500000,10000000,25000000,31622777,5000000,100000000) ; |
| 175 | + $edits_ge_5 = @counts [2] > 0 ? @counts [2] : 0 ; |
| 176 | + $edits_ge_25 = @counts [4] > 0 ? @counts [4] : 0 ; |
| 177 | + $edits_ge_100 = @counts [7] > 0 ? @counts [7] : 0 ; |
| 178 | + $data2 {"$project,$language,$yyyymm"} = "$edits_ge_5,$edits_ge_25,$edits_ge_100" ; |
| 179 | + |
| 180 | + $total_edits_ge_5 {"$project,$language"} += $edits_ge_5 ; |
| 181 | + $total_edits_ge_25 {"$project,$language"} += $edits_ge_25 ; |
| 182 | + $total_edits_ge_100 {"$project,$language"} += $edits_ge_100 ; |
| 183 | + |
| 184 | + # prep string with right amount of comma's |
| 185 | + if ($data2_default eq '') |
| 186 | + { |
| 187 | + $data2_default = $data2 {"$project,$language,$yyyymm"} ; |
| 188 | + $data2_default =~ s/[^,]+/0/g ; |
| 189 | + } |
| 190 | + } |
| 191 | + close CSV_IN ; |
| 192 | +} |
| 193 | + |
| 194 | +#sub ReadStatisticsPerBinariesExtensionCommons |
| 195 | +#{ |
| 196 | +# my $file_csv_in = "$path_in/csv_wx/StatisticsPerBinariesExtension.csv" ; |
| 197 | +# my $mmax = -1 ; |
| 198 | + |
| 199 | +# if (! -e $file_csv_in) |
| 200 | +# { &Abort ("Input file '$file_csv_in' not found") ; } |
| 201 | + |
| 202 | +# print "Read '$file_csv_in'\n" ; |
| 203 | +# open CSV_IN, '<', $file_csv_in ; |
| 204 | +# while ($line = <CSV_IN>) |
| 205 | +# { |
| 206 | +# chomp $line ; |
| 207 | +# ($language,$date,$counts) = split (',', $line, 3) ; |
| 208 | + |
| 209 | +# if ($language ne "commons") { next ; } |
| 210 | + |
| 211 | +# if ($date eq "00/0000") |
| 212 | +# { |
| 213 | +# @fields = split (',', $counts) ; |
| 214 | +# $field_ndx = 0 ; |
| 215 | +# foreach $field (@fields) |
| 216 | +# { |
| 217 | +# $ext_cnt {-1}{$field_ndx} = $field ; |
| 218 | +# # print "EXT_CNT $field_ndx : $field\n" ; |
| 219 | +# $field_ndx ++ ; |
| 220 | +# } |
| 221 | +# next ; |
| 222 | +# } |
| 223 | + |
| 224 | +# ($month,$year) = split ('\/', $date) ; |
| 225 | +# my $m = &months_since_2000_01 ($year,$month) ; |
| 226 | +# next if $m < $m_start ; |
| 227 | + |
| 228 | +# if ($m > $mmax) |
| 229 | +# { $mmax = $m ; } |
| 230 | + |
| 231 | +# @fields = split (',', $counts) ; |
| 232 | +# $field_ndx = 0 ; |
| 233 | +# foreach $field (@fields) |
| 234 | +# { |
| 235 | +# $ext_cnt {$m}{$field_ndx} = $field ; |
| 236 | +# $ext_tot {$m} += $field ; |
| 237 | +# $field_ndx ++ ; |
| 238 | +# } |
| 239 | +# } |
| 240 | +# close CSV_IN ; |
| 241 | + |
| 242 | +# %ext_cnt_mmax = %{$ext_cnt {$mmax}} ; |
| 243 | +# @ext_cnt_mmax = (sort {$ext_cnt_mmax {$b} <=> $ext_cnt_mmax {$a}} keys %ext_cnt_mmax) ; |
| 244 | + |
| 245 | +# $extcnt = 0 ; |
| 246 | +# foreach $extndx (@ext_cnt_mmax) |
| 247 | +# { |
| 248 | +# # print "$extndx < ${ext_cnt {-1}{$extndx}} > : ${ext_cnt_mmax {$extndx}}\n" ; |
| 249 | +# push @extndxs, $extndx ; |
| 250 | +# if ($extcnt++ >= 9) { last ; } |
| 251 | +# } |
| 252 | +#} |
| 253 | + |
| 254 | +sub FindLargestWikis |
| 255 | +{ |
| 256 | + print "Largest projects (most accumulated very active editors):\n"; |
| 257 | + @total_edits_ge_100 = sort {$total_edits_ge_100 {$b} <=> $total_edits_ge_100 {$a}} keys %total_edits_ge_100 ; |
| 258 | + $rank = 0 ; |
| 259 | + foreach $project_language (@total_edits_ge_100) |
| 260 | + { |
| 261 | + $largest_projects {$project_language} = $rank++ ; |
| 262 | + print "$project_language," ; |
| 263 | + last if $rank > 10 ; |
| 264 | + } |
| 265 | + print "\n\n" ; |
| 266 | + |
| 267 | + foreach $yyyymm (sort keys %months) |
| 268 | + { |
| 269 | + next if $yyyymm lt '2011' ; |
| 270 | + foreach $project_language (keys %largest_projects) |
| 271 | + { |
| 272 | + ($project,$language) = split (',', $project_language) ; |
| 273 | + if ($data2 {"$project,$language,$yyyymm"} eq '') |
| 274 | + { |
| 275 | + print "No data yet for large wiki $project_language for $yyyymm-> skip month $yyyymm\n" ; |
| 276 | + $months {$yyyymm} = 0 ; |
| 277 | + } |
| 278 | + } |
| 279 | + } |
| 280 | + exit ; |
| 281 | +} |
| 282 | + |
| 283 | +sub WriteMonthlyData |
| 284 | +{ |
| 285 | + my $file_csv_out = "$path_out/$file_csv_analytics_in" ; |
| 286 | + open CSV_OUT, '>', $file_csv_out ; |
| 287 | + foreach $project_wiki_month (sort keys %data1) |
| 288 | + { |
| 289 | + ($project,$wiki,$yyyymm) = split (',', $project_wiki_month) ; |
| 290 | + |
| 291 | + # recent month misses on eor more large wikis? |
| 292 | + next if $months {$yyyymm} == 0 ; |
| 293 | + |
| 294 | + $data1 = $data1 {$project_wiki_month} ; |
| 295 | + $data2 = $data2 {$project_wiki_month} ; |
| 296 | + if ($data2 eq '') |
| 297 | + { |
| 298 | + print "Editor data missing for $project_wiki_month\n" ; |
| 299 | + $data2 = $data2_default ; |
| 300 | + } |
| 301 | + $data1 =~ s/data2/$data2/ ; # insert rather than append to have all editor fields close together |
| 302 | + print CSV_OUT "$project_wiki_month,$data1\n" ; |
| 303 | + } |
| 304 | + $total_edits_ge_5 {"$project,*,$yyyymm"} += $edits_ge_5 ; |
| 305 | + $total_edits_ge_25 {"$project,*,$yyyymm"} += $edits_ge_25 ; |
| 306 | + $total_edits_ge_100 {"$project,*,$yyyymm"} += $edits_ge_100 ; |
| 307 | + close CSV_OUT ; |
| 308 | +} |
| 309 | + |
| 310 | +sub AcceptWiki |
| 311 | +{ |
| 312 | + my ($project,$language) = @_ ; |
| 313 | + |
| 314 | + return $false if $language eq 'commons' and $project ne 'wx' ; # commons also in wikipedia csv files (bug, hard to cleanup, just skip) |
| 315 | + return $false if $language eq 'sr' and $project eq 'wn' ; # ignore insane bot spam on |
| 316 | + return $false if $language =~ /mania|team|comcom|closed|chair|langcom|office|searchcom|sep11|nostalgia|stats|test/i ; |
| 317 | + |
| 318 | + return $false if $language =~ /^(?:dk|tlh|ru_sib)$/ ; # dk=dumps exist(ed?) but site not, tlh=Klignon, ru-sib=Siberian |
| 319 | + return $false if $project eq 'wk' and ($language eq "als" or $language eq "tlh") ; |
| 320 | + |
| 321 | + return $true ; |
| 322 | +} |
| 323 | + |
| 324 | +sub Abort |
| 325 | +{ |
| 326 | + my $msg = shift ; |
| 327 | + print "$msg\nExecution aborted." ; |
| 328 | + # to do: log also to file |
| 329 | + exit ; |
| 330 | +} |
| 331 | + |
| 332 | + |
Property changes on: trunk/wikistats/analytics/AnalyticsPrepWikiCountsOutput.pl |
___________________________________________________________________ |
Added: svn:eol-style |
333 | 333 | + native |
Property changes on: trunk/wikistats/analytics/analytics_new.sh |
___________________________________________________________________ |
Added: svn:eol-style |
334 | 334 | + native |
Property changes on: trunk/wikistats/analytics/_readme.txt |
___________________________________________________________________ |
Added: svn:eol-style |
335 | 335 | + native |
Property changes on: trunk/wikistats/analytics/AnalyticsPrepComscoreData.pl |
___________________________________________________________________ |
Added: svn:eol-style |
336 | 336 | + native |