Index: trunk/extensions/UploadWizard/languageStats.php |
— | — | @@ -4,22 +4,307 @@ |
5 | 5 | |
6 | 6 | $langsToTest = array_slice( $argv, 1 ); |
7 | 7 | |
| 8 | +$wikipediaSize = getWikipediaSize(); |
8 | 9 | |
9 | | -foreach ( $messages as $lang => $langDict ) { |
| 10 | +$total = count( $messages['en'] ); |
| 11 | + |
| 12 | +foreach ( $wikipediaSize as $lang => $size ) { |
10 | 13 | if ( count($langsToTest) and (! in_array( $lang, $langsToTest ) ) ) { |
11 | 14 | continue; |
12 | 15 | } |
13 | 16 | $percentComplete[$lang] = 0; |
14 | | - $total = 0; |
15 | 17 | $translated = 0; |
16 | | - foreach( $messages['en'] as $key => $val ) { |
17 | | - $total++; |
18 | | - if ( array_key_exists( $key, $langDict ) ) { |
19 | | - $translated++; |
| 18 | + if ( array_key_exists( $lang, $messages ) ) { |
| 19 | + $langDict = $messages[$lang]; |
| 20 | + foreach( $messages['en'] as $key => $val ) { |
| 21 | + if ( array_key_exists( $key, $langDict ) ) { |
| 22 | + $translated++; |
| 23 | + } |
20 | 24 | } |
21 | 25 | } |
22 | 26 | $percentage = (int)( 100 * $translated / $total ); |
23 | | - echo "Language: $lang Translated: $translated Percentage: $percentage\n"; |
| 27 | + echo "Language: $lang Wiki size: $size Translated: $translated Percentage: $percentage\n"; |
24 | 28 | |
25 | 29 | } |
26 | 30 | |
| 31 | + |
| 32 | +# taken from stats.wikimedia.org/EN/TablesArticlesTotal.htm May 2011 |
| 33 | +function getWikipediaSize() { |
| 34 | + return array( |
| 35 | + 'en' => 3600000, |
| 36 | + 'de' => 1200000, |
| 37 | + 'fr' => 1100000, |
| 38 | + 'it' => 796000, |
| 39 | + 'pl' => 793000, |
| 40 | + 'ja' => 748000, |
| 41 | + 'es' => 747000, |
| 42 | + 'ru' => 697000, |
| 43 | + 'pt' => 680000, |
| 44 | + 'nl' => 680000, |
| 45 | + 'sv' => 392000, |
| 46 | + // hack -- don't know what the right thing is here |
| 47 | + 'zh-hans' => 344000, |
| 48 | + 'zh-hant' => 344000, |
| 49 | + 'ca' => 316000, |
| 50 | + 'no' => 297000, |
| 51 | + 'uk' => 272000, |
| 52 | + 'fi' => 266000, |
| 53 | + 'vi' => 210000, |
| 54 | + 'cs' => 191000, |
| 55 | + 'hu' => 187000, |
| 56 | + 'ko' => 160000, |
| 57 | + 'tr' => 159000, |
| 58 | + 'ro' => 159000, |
| 59 | + 'id' => 159000, |
| 60 | + 'da' => 146000, |
| 61 | + 'eo' => 143000, |
| 62 | + 'sr' => 141000, |
| 63 | + 'ar' => 138000, |
| 64 | + 'lt' => 131000, |
| 65 | + 'sk' => 122000, |
| 66 | + 'fa' => 122000, |
| 67 | + 'vo' => 119000, |
| 68 | + 'he' => 118000, |
| 69 | + 'ms' => 117000, |
| 70 | + 'bg' => 115000, |
| 71 | + 'sl' => 109000, |
| 72 | + 'war' => 102000, |
| 73 | + 'hr' => 100000, |
| 74 | + 'et' => 83000, |
| 75 | + 'hi' => 82000, |
| 76 | + 'new' => 71000, |
| 77 | + 'simple' => 70000, |
| 78 | + 'gl' => 70000, |
| 79 | + 'th' => 67000, |
| 80 | + 'eu' => 66000, |
| 81 | + 'nn' => 65000, |
| 82 | + 'roa-rup' => 62000, |
| 83 | + 'el' => 61000, |
| 84 | + 'az' => 56000, |
| 85 | + 'ht' => 54000, |
| 86 | + 'tl' => 52000, |
| 87 | + 'la' => 52000, |
| 88 | + 'te' => 48000, |
| 89 | + 'ka' => 48000, |
| 90 | + 'mk' => 45000, |
| 91 | + 'ceb' => 43000, |
| 92 | + 'sh' => 41000, |
| 93 | + 'pms' => 37000, |
| 94 | + 'br' => 37000, |
| 95 | + 'mr' => 34000, |
| 96 | + 'lv' => 34000, |
| 97 | + 'be_x_old' => 34000, |
| 98 | + 'jv' => 33000, |
| 99 | + 'sq' => 32000, |
| 100 | + 'lb' => 32000, |
| 101 | + 'cy' => 32000, |
| 102 | + 'is' => 31000, |
| 103 | + 'bs' => 31000, |
| 104 | + 'ta' => 30000, |
| 105 | + 'be' => 29000, |
| 106 | + 'bpy' => 25000, |
| 107 | + 'an' => 25000, |
| 108 | + 'oc' => 24000, |
| 109 | + 'io' => 22000, |
| 110 | + 'bn' => 22000, |
| 111 | + 'sw' => 21000, |
| 112 | + 'lmo' => 20000, |
| 113 | + 'gu' => 19000, |
| 114 | + 'fy' => 19000, |
| 115 | + 'ml' => 18000, |
| 116 | + 'ur' => 17000, |
| 117 | + 'scn' => 17000, |
| 118 | + 'nds' => 17000, |
| 119 | + 'af' => 17000, |
| 120 | + 'qu' => 16000, |
| 121 | + 'ku' => 16000, |
| 122 | + 'zh_yue' => 15000, |
| 123 | + 'su' => 15000, |
| 124 | + 'ne' => 14000, |
| 125 | + 'hy' => 14000, |
| 126 | + 'ast' => 14000, |
| 127 | + 'yo' => 13000, |
| 128 | + 'nap' => 13000, |
| 129 | + 'bat_smg' => 13000, |
| 130 | + 'wa' => 12000, |
| 131 | + 'ga' => 12000, |
| 132 | + 'cv' => 12000, |
| 133 | + 'pnb' => 11000, |
| 134 | + 'kn' => 11000, |
| 135 | + 'tg' => 9400, |
| 136 | + 'yi' => 9300, |
| 137 | + 'kk' => 9200, |
| 138 | + 'vec' => 8900, |
| 139 | + 'roa_tara' => 8900, |
| 140 | + 'tt' => 8700, |
| 141 | + 'als' => 8500, |
| 142 | + 'zh-min-nan' => 8400, |
| 143 | + 'gd' => 8400, |
| 144 | + 'uz' => 8000, |
| 145 | + 'os' => 7700, |
| 146 | + 'pam' => 7600, |
| 147 | + 'si' => 7500, |
| 148 | + 'sah' => 7500, |
| 149 | + 'arz' => 7500, |
| 150 | + 'bug' => 7100, |
| 151 | + 'am' => 7100, |
| 152 | + 'mi' => 6700, |
| 153 | + 'li' => 6600, |
| 154 | + 'nah' => 6500, |
| 155 | + 'hsb' => 6500, |
| 156 | + 'sco' => 6300, |
| 157 | + 'glk' => 6300, |
| 158 | + 'my' => 6100, |
| 159 | + 'mn' => 6100, |
| 160 | + 'gan' => 6000, |
| 161 | + 'co' => 6000, |
| 162 | + 'ia' => 5400, |
| 163 | + 'bcl' => 5000, |
| 164 | + 'fiu-vro' => 4700, |
| 165 | + 'fo' => 4700, |
| 166 | + 'sa' => 4500, |
| 167 | + 'nds_nl' => 4400, |
| 168 | + 'vls' => 4300, |
| 169 | + 'tk' => 4300, |
| 170 | + 'bar' => 4100, |
| 171 | + 'dv' => 4000, |
| 172 | + 'mg' => 3900, |
| 173 | + 'map_bms' => 3800, |
| 174 | + 'gv' => 3800, |
| 175 | + 'pag' => 3500, |
| 176 | + 'nrm' => 3500, |
| 177 | + 'ckb' => 3500, |
| 178 | + 'ug' => 3400, |
| 179 | + 'se' => 3200, |
| 180 | + 'rm' => 3200, |
| 181 | + 'mzn' => 3200, |
| 182 | + 'diq' => 3200, |
| 183 | + 'wuu' => 3100, |
| 184 | + 'hif' => 3100, |
| 185 | + 'ps' => 2900, |
| 186 | + 'fur' => 2900, |
| 187 | + 'bo' => 2900, |
| 188 | + 'mt' => 2800, |
| 189 | + 'lij' => 2800, |
| 190 | + 'ilo' => 2800, |
| 191 | + 'bh' => 2700, |
| 192 | + 'sc' => 2600, |
| 193 | + 'nov' => 2600, |
| 194 | + 'km' => 2600, |
| 195 | + 'csb' => 2600, |
| 196 | + 'ang' => 2600, |
| 197 | + 'zh_classical' => 2500, |
| 198 | + 'lad' => 2500, |
| 199 | + 'cbk_zam' => 2400, |
| 200 | + 'pi' => 2300, |
| 201 | + 'szl' => 2200, |
| 202 | + 'stq' => 2100, |
| 203 | + 'mrj' => 2100, |
| 204 | + 'kw' => 2100, |
| 205 | + 'ksh' => 2100, |
| 206 | + 'hak' => 2100, |
| 207 | + 'frp' => 2100, |
| 208 | + 'so' => 2000, |
| 209 | + 'rue' => 2000, |
| 210 | + 'pa' => 2000, |
| 211 | + 'nv' => 2000, |
| 212 | + 'mhr' => 1900, |
| 213 | + 'ky' => 1900, |
| 214 | + 'xal' => 1800, |
| 215 | + 'ie' => 1800, |
| 216 | + 'haw' => 1800, |
| 217 | + 'udm' => 1700, |
| 218 | + 'pdc' => 1700, |
| 219 | + 'ext' => 1700, |
| 220 | + 'ace' => 1700, |
| 221 | + 'to' => 1600, |
| 222 | + 'rw' => 1600, |
| 223 | + 'ln' => 1600, |
| 224 | + 'kv' => 1600, |
| 225 | + 'krc' => 1600, |
| 226 | + 'crh' => 1600, |
| 227 | + 'pcd' => 1500, |
| 228 | + 'myv' => 1400, |
| 229 | + 'gn' => 1400, |
| 230 | + 'eml' => 1300, |
| 231 | + 'ce' => 1300, |
| 232 | + 'ba' => 1300, |
| 233 | + 'pap' => 1200, |
| 234 | + 'kab' => 1200, |
| 235 | + 'ay' => 1200, |
| 236 | + 'arc' => 1200, |
| 237 | + 'wo' => 1100, |
| 238 | + 'mwl' => 1100, |
| 239 | + 'kl' => 1100, |
| 240 | + 'jbo' => 1100, |
| 241 | + 'frr' => 1100, |
| 242 | + 'bjn' => 1100, |
| 243 | + 'tpi' => 1000, |
| 244 | + 'dsb' => 964, |
| 245 | + 'lo' => 941, |
| 246 | + 'srn' => 875, |
| 247 | + 'ty' => 869, |
| 248 | + 'zea' => 810, |
| 249 | + 'ab' => 721, |
| 250 | + 'koi' => 707, |
| 251 | + 'or' => 706, |
| 252 | + 'ig' => 671, |
| 253 | + 'mdf' => 650, |
| 254 | + 'av' => 618, |
| 255 | + 'kg' => 608, |
| 256 | + 'tet' => 596, |
| 257 | + 'rmy' => 508, |
| 258 | + 'lbe' => 500, |
| 259 | + 'cu' => 497, |
| 260 | + 'ks' => 467, |
| 261 | + 'sd' => 465, |
| 262 | + 'ltg' => 464, |
| 263 | + 'sm' => 450, |
| 264 | + 'kaa' => 431, |
| 265 | + 'mo' => 401, |
| 266 | + 'kbd' => 396, |
| 267 | + 'bm' => 392, |
| 268 | + 'got' => 383, |
| 269 | + 'ik' => 356, |
| 270 | + 'bxr' => 349, |
| 271 | + 'iu' => 348, |
| 272 | + 'bi' => 348, |
| 273 | + 'as' => 343, |
| 274 | + 'na' => 339, |
| 275 | + 'pih' => 337, |
| 276 | + 'chr' => 332, |
| 277 | + 'pnt' => 331, |
| 278 | + 'ss' => 323, |
| 279 | + 'cdo' => 311, |
| 280 | + 'cr' => 300, |
| 281 | + 'ee' => 259, |
| 282 | + 'ha' => 255, |
| 283 | + 'rn' => 235, |
| 284 | + 'om' => 214, |
| 285 | + 'zu' => 213, |
| 286 | + 'ti' => 203, |
| 287 | + 'za' => 200, |
| 288 | + 'ts' => 186, |
| 289 | + 'tum' => 162, |
| 290 | + 've' => 157, |
| 291 | + 'sg' => 149, |
| 292 | + 'dz' => 145, |
| 293 | + 'ch' => 133, |
| 294 | + 'ny' => 129, |
| 295 | + 'fj' => 127, |
| 296 | + 'lg' => 126, |
| 297 | + 'st' => 120, |
| 298 | + 'ki' => 114, |
| 299 | + 'ff' => 111, |
| 300 | + 'xh' => 110, |
| 301 | + 'tn' => 101, |
| 302 | + 'sn' => 98, |
| 303 | + 'chy' => 60, |
| 304 | + 'ak' => 49, |
| 305 | + 'tw' => 48, |
| 306 | + 'ng' => 25, |
| 307 | + 'cho' => 20, |
| 308 | + 'ii' => 14, |
| 309 | + 'mh' => 11, |
| 310 | + ); |
| 311 | +} |