Index: trunk/tools/editor_trends/config.py |
— | — | @@ -1,90 +0,0 @@ |
2 | | -#!/usr/bin/python |
3 | | -# -*- coding: utf-8 -*- |
4 | | -''' |
5 | | -Copyright (C) 2010 by Diederik van Liere (dvanliere@gmail.com) |
6 | | -This program is free software; you can redistribute it and/or |
7 | | -modify it under the terms of the GNU General Public License version 2 |
8 | | -as published by the Free Software Foundation. |
9 | | -This program is distributed in the hope that it will be useful, |
10 | | -but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | | -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
12 | | -See the GNU General Public License for more details, at |
13 | | -http://www.fsf.org/licenses/gpl.html |
14 | | -''' |
15 | | - |
16 | | -__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ]) |
17 | | -__email__ = 'dvanliere at gmail dot com' |
18 | | -__date__ = '2010-10-21' |
19 | | -__version__ = '0.1' |
20 | | - |
21 | | - |
22 | | -import os |
23 | | -import ConfigParser |
24 | | - |
25 | | -from utils import file_utils |
26 | | -from classes import wikiprojects |
27 | | - |
28 | | - |
29 | | -def show_choices(settings, attr): |
30 | | - choices = getattr(settings, attr).items() |
31 | | - choices.sort() |
32 | | - choices = ['%s\t%s' % (choice[0], choice[1]) for choice in choices] |
33 | | - return choices |
34 | | - |
35 | | - |
36 | | -def create_configuration(settings, args): |
37 | | - force = getattr(args, 'force', False) |
38 | | - |
39 | | - if not os.path.exists('wiki.cfg') or force: |
40 | | - config = ConfigParser.RawConfigParser() |
41 | | - project = None |
42 | | - language = None |
43 | | - dumpversion = None |
44 | | - #language_map = languages.language_map() |
45 | | - working_directory = raw_input('Please indicate where you installed Editor Trends Analytics.\nCurrent location is %s\nPress Enter to accept default.\n' % os.getcwd()) |
46 | | - input_location = raw_input('Please indicate where to store the Wikipedia dump files.\nDefault is: %s\nPress Enter to accept default.\n' % settings.input_location) |
47 | | - |
48 | | - while project not in settings.projects.keys(): |
49 | | - project = raw_input('Please indicate which project you would like to analyze.\nDefault is: %s\nPress Enter to accept default.\n' % settings.projects[args.project].capitalize()) |
50 | | - project = project if len(project) > 0 else args.project |
51 | | - if project not in settings.projects.keys(): |
52 | | - print 'Valid choices for a project are: %s' % ','.join(settings.projects.keys()) |
53 | | - |
54 | | - wiki = wikiprojects.Wiki(settings.encoding, project=project) |
55 | | - while language not in wiki.valid_languages: |
56 | | - language = raw_input('Please indicate which language of project %s you would like to analyze.\nDefault is: %s\nPress Enter to accept default.\n' % (settings.projects[project].capitalize(), language_map[args.language])) |
57 | | - if len(language) == 0: |
58 | | - language = language_map[args.language] |
59 | | - language = language if language in wiki.valid_languages else args.language |
60 | | - |
61 | | -# while dumpversion not in settings.dumpversions.keys(): |
62 | | -# choices = '\n'.join(show_choices(settings, 'dumpversions')) |
63 | | -# dumpversion = raw_input('Please indicate the version of the Wikipedia project you are analyzing.\nValid choices are:\n%s\nDefault is: 0 (%s)\nPress Enter to accept default.\n' % (choices, settings.dumpversions['0'])) |
64 | | -# if len(dumpversion) == 0: |
65 | | -# dumpversion = settings.dumpversions['0'] |
66 | | - |
67 | | - #dumpversion = settings.dumpversions[dumpversion] |
68 | | - input_location = input_location if len(input_location) > 0 else settings.input_location |
69 | | - working_directory = working_directory if len(working_directory) > 0 else os.getcwd() |
70 | | - |
71 | | - config = ConfigParser.RawConfigParser() |
72 | | - config.add_section('file_locations') |
73 | | - config.set('file_locations', 'working_directory', working_directory) |
74 | | - config.set('file_locations', 'input_location', input_location) |
75 | | - config.add_section('wiki') |
76 | | - config.set('wiki', 'project', project) |
77 | | - config.set('wiki', 'language', language) |
78 | | - #config.set('wiki', 'dumpversion', dumpversion) |
79 | | - |
80 | | - fh = file_utils.create_binary_filehandle(working_directory, 'wiki.cfg', 'wb') |
81 | | - config.write(fh) |
82 | | - fh.close() |
83 | | - |
84 | | - settings.working_directory = config.get('file_locations', 'working_directory') |
85 | | - settings.input_location = config.get('file_locations', 'input_location') |
86 | | - #settings.xml_namespace = config.get('wiki', 'dumpversion') |
87 | | - return settings |
88 | | - |
89 | | - |
90 | | -if __name__ == '__main__': |
91 | | - pass |
Index: trunk/tools/editor_trends/languages.py |
— | — | @@ -1,613 +0,0 @@ |
2 | | -#!/usr/bin/python |
3 | | -# coding=utf-8 |
4 | | -''' |
5 | | -Copyright (C) 2010 by Diederik van Liere (dvanliere@gmail.com) |
6 | | -This program is free software; you can redistribute it and/or |
7 | | -modify it under the terms of the GNU General Public License version 2 |
8 | | -as published by the Free Software Foundation. |
9 | | -This program is distributed in the hope that it will be useful, |
10 | | -but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | | -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
12 | | -See the GNU General Public License for more details, at |
13 | | -http,//www.fsf.org/licenses/gpl.html |
14 | | -''' |
15 | | - |
16 | | -__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ]) |
17 | | -__author__email = 'dvanliere at gmail dot com' |
18 | | -__date__ = '2010-10-21' |
19 | | -__version__ = '0.1' |
20 | | - |
21 | | -''' |
22 | | -This file provides mapper between language name and locale language name and |
23 | | -Wikipedia acronym. |
24 | | -Gothic and Birmese are not yet supported, see rows 450 and 554. |
25 | | -''' |
26 | | - |
27 | | -from utils import ordered_dict as odict |
28 | | -from utils import utils |
29 | | - |
30 | | -MAPPING = odict.OrderedDict([ |
31 | | -(u'English', 'en'), |
32 | | -(u'German', 'de'), |
33 | | -(u'French', 'fr'), |
34 | | -(u'Italian', 'it'), |
35 | | -(u'Polish', 'pl'), |
36 | | -(u'Japanese', 'ja'), |
37 | | -(u'Spanish', 'es'), |
38 | | -(u'Dutch', 'nl'), |
39 | | -(u'Portuguese', 'pt'), |
40 | | -(u'Russian', 'ru'), |
41 | | -(u'Swedish', 'sv'), |
42 | | -(u'Chinese', 'zh'), |
43 | | -(u'Catalan', 'ca'), |
44 | | -(u'Norwegian', 'no'), |
45 | | -(u'Bokmål', 'no'), |
46 | | -(u'Finnish', 'fi'), |
47 | | -(u'Ukrainian', 'uk'), |
48 | | -(u'Hungarian', 'hu'), |
49 | | -(u'Czech', 'cs'), |
50 | | -(u'Romanian', 'ro'), |
51 | | -(u'Turkish', 'tr'), |
52 | | -(u'Korean', 'ko'), |
53 | | -(u'Vietnamese', 'vi'), |
54 | | -(u'Danish', 'da'), |
55 | | -(u'Arabic', 'ar'), |
56 | | -(u'Esperanto', 'eo'), |
57 | | -(u'Serbian', 'sr'), |
58 | | -(u'Indonesian', 'id'), |
59 | | -(u'Lithuanian', 'lt'), |
60 | | -(u'Volapük', 'vo'), |
61 | | -(u'Slovak', 'sk'), |
62 | | -(u'Hebrew', 'he'), |
63 | | -(u'Bulgarian', 'bg'), |
64 | | -(u'Persian', 'fa'), |
65 | | -(u'Slovenian', 'sl'), |
66 | | -(u'Waray-Waray', 'war'), |
67 | | -(u'Croatian', 'hr'), |
68 | | -(u'Estonian', 'et'), |
69 | | -(u'Malay', 'ms'), |
70 | | -(u'Newar', 'new'), |
71 | | -(u'Nepal Bhasa', 'new'), |
72 | | -(u'Simple English', 'simple'), |
73 | | -(u'Galician', 'gl'), |
74 | | -(u'Thai', 'th'), |
75 | | -(u'Aromanian', 'roa-rup'), |
76 | | -(u'Nynorsk', 'nn'), |
77 | | -(u'Basque', 'eu'), |
78 | | -(u'Hindi', 'hi'), |
79 | | -(u'Greek', 'el'), |
80 | | -(u'Haitian', 'ht'), |
81 | | -(u'Latin', 'la'), |
82 | | -(u'Telugu', 'te'), |
83 | | -(u'Georgian', 'ka'), |
84 | | -(u'Cebuano', 'ceb'), |
85 | | -(u'Macedonian', 'mk'), |
86 | | -(u'Azeri', 'az'), |
87 | | -(u'Tagalog', 'tl'), |
88 | | -(u'Breton', 'br'), |
89 | | -(u'Serbo-Croatian', 'sh'), |
90 | | -(u'Marathi', 'mr'), |
91 | | -(u'Luxembourgish', 'lb'), |
92 | | -(u'Javanese', 'jv'), |
93 | | -(u'Latvian', 'lv'), |
94 | | -(u'Bosnian', 'bs'), |
95 | | -(u'Icelandic', 'is'), |
96 | | -(u'Welsh', 'cy'), |
97 | | -(u'Belarusian', 'be-x-old'), |
98 | | -(u'Taraškievica', 'be-x-old'), |
99 | | -(u'Piedmontese', 'pms'), |
100 | | -(u'Albanian', 'sq'), |
101 | | -(u'Tamil', 'ta'), |
102 | | -(u'Bishnupriya Manipuri', 'bpy'), |
103 | | -(u'Belarusian', 'be'), |
104 | | -(u'Aragonese', 'an'), |
105 | | -(u'Occitan', 'oc'), |
106 | | -(u'Bengali', 'bn'), |
107 | | -(u'Swahili', 'sw'), |
108 | | -(u'Ido', 'io'), |
109 | | -(u'Ripuarian', 'ksh'), |
110 | | -(u'Lombard', 'lmo'), |
111 | | -(u'West Frisian', 'fy'), |
112 | | -(u'Gujarati', 'gu'), |
113 | | -(u'Low Saxon', 'nds'), |
114 | | -(u'Afrikaans', 'af'), |
115 | | -(u'Sicilian', 'scn'), |
116 | | -(u'Quechua', 'qu'), |
117 | | -(u'Kurdish', 'ku'), |
118 | | -(u'Urdu', 'ur'), |
119 | | -(u'Sundanese', 'su'), |
120 | | -(u'Malayalam', 'ml'), |
121 | | -(u'Cantonese', 'zh-yue'), |
122 | | -(u'Asturian', 'ast'), |
123 | | -(u'Neapolitan', 'nap'), |
124 | | -(u'Samogitian', 'bat-smg'), |
125 | | -(u'Walloon', 'wa'), |
126 | | -(u'Chuvash', 'cv'), |
127 | | -(u'Irish', 'ga'), |
128 | | -(u'Armenian', 'hy'), |
129 | | -(u'Yoruba', 'yo'), |
130 | | -(u'Kannada', 'kn'), |
131 | | -(u'Tajik', 'tg'), |
132 | | -(u'Tarantino', 'roa-tara'), |
133 | | -(u'Venetian', 'vec'), |
134 | | -(u'Western Panjabi', 'pnb'), |
135 | | -(u'Nepali', 'ne'), |
136 | | -(u'Scottish Gaelic', 'gd'), |
137 | | -(u'Yiddish', 'yi'), |
138 | | -(u'Min Nan', 'zh-min-nan'), |
139 | | -(u'Uzbek', 'uz'), |
140 | | -(u'Tatar', 'tt'), |
141 | | -(u'Kapampangan', 'pam'), |
142 | | -(u'Ossetian', 'os'), |
143 | | -(u'Sakha', 'sah'), |
144 | | -(u'Alemannic', 'als'), |
145 | | -(u'Maori', 'mi'), |
146 | | -(u'Egyptian Arabic', 'arz'), |
147 | | -(u'Kazakh', 'kk'), |
148 | | -(u'Nahuatl', 'nah'), |
149 | | -(u'Limburgian', 'li'), |
150 | | -(u'Upper Sorbian', 'hsb'), |
151 | | -(u'Gilaki', 'glk'), |
152 | | -(u'Corsican', 'co'), |
153 | | -(u'Gan', 'gan'), |
154 | | -(u'Amharic', 'am'), |
155 | | -(u'Mongolian', 'mn'), |
156 | | -(u'Interlingua', 'ia'), |
157 | | -(u'Central Bicolano', 'bcl'), |
158 | | -(u'Võro', 'fiu-vro'), |
159 | | -(u'Dutch Low Saxon', 'nds-nl'), |
160 | | -(u'Faroese', 'fo'), |
161 | | -(u'Turkmen', 'tk'), |
162 | | -(u'Scots', 'sco'), |
163 | | -(u'West Flemish', 'vls'), |
164 | | -(u'Sinhalese', 'si'), |
165 | | -(u'Sanskrit', 'sa'), |
166 | | -(u'Bavarian', 'bar'), |
167 | | -(u'Burmese', 'my'), |
168 | | -(u'Manx', 'gv'), |
169 | | -(u'Divehi', 'dv'), |
170 | | -(u'Norman', 'nrm'), |
171 | | -(u'Pangasinan', 'pag'), |
172 | | -(u'Romansh', 'rm'), |
173 | | -(u'Banyumasan', 'map-bms'), |
174 | | -(u'Zazaki', 'diq'), |
175 | | -(u'Sorani', 'ckb'), |
176 | | -(u'Northern Sami', 'se'), |
177 | | -(u'Mazandarani', 'mzn'), |
178 | | -(u'Wu', 'wuu'), |
179 | | -(u'Uyghur', 'ug'), |
180 | | -(u'Friulian', 'fur'), |
181 | | -(u'Ligurian', 'lij'), |
182 | | -(u'Maltese', 'mt'), |
183 | | -(u'Bihari', 'bh'), |
184 | | -(u'Novial', 'nov'), |
185 | | -(u'Malagasy', 'mg'), |
186 | | -(u'Kashubian', 'csb'), |
187 | | -(u'Ilokano', 'ilo'), |
188 | | -(u'Sardinian', 'sc'), |
189 | | -(u'Classical Chinese', 'zh-classical'), |
190 | | -(u'Khmer', 'km'), |
191 | | -(u'Ladino', 'lad'), |
192 | | -(u'Pali', 'pi'), |
193 | | -(u'Anglo-Saxon', 'ang'), |
194 | | -(u'Zamboanga Chavacano', 'cbk-zam'), |
195 | | -(u'Tibetan', 'bo'), |
196 | | -(u'Fiji Hindi', 'hif'), |
197 | | -(u'Franco-Provençal', 'frp'), |
198 | | -(u'Arpitan', 'frp'), |
199 | | -(u'Hakka', 'hak'), |
200 | | -(u'Cornish', 'kw'), |
201 | | -(u'Punjabi', 'pa'), |
202 | | -(u'Pashto', 'ps'), |
203 | | -(u'Kalmyk', 'xal'), |
204 | | -(u'Silesian', 'szl'), |
205 | | -(u'Pennsylvania German', 'pdc'), |
206 | | -(u'Hawaiian', 'haw'), |
207 | | -(u'Saterland Frisian', 'stq'), |
208 | | -(u'Interlingue', 'ie'), |
209 | | -(u'Navajo', 'nv'), |
210 | | -(u'Fijian', 'fj'), |
211 | | -(u'Crimean Tatar', 'crh'), |
212 | | -(u'Komi', 'kv'), |
213 | | -(u'Tongan', 'to'), |
214 | | -(u'Acehnese', 'ace'), |
215 | | -(u'Somali', 'so'), |
216 | | -(u'Erzya', 'myv'), |
217 | | -(u'Guarani', 'gn'), |
218 | | -(u'Karachay-Balkar', 'krc'), |
219 | | -(u'Extremaduran', 'ext'), |
220 | | -(u'Lingala', 'ln'), |
221 | | -(u'Kirghiz', 'ky'), |
222 | | -(u'Meadow Mari', 'mhr'), |
223 | | -(u'Assyrian Neo-Aramaic', 'arc'), |
224 | | -(u'Emilian-Romagnol', 'eml'), |
225 | | -(u'Lojban', 'jbo'), |
226 | | -(u'Picard', 'pcd'), |
227 | | -(u'Aymara', 'ay'), |
228 | | -(u'Wolof', 'wo'), |
229 | | -(u'Tumbuka', 'tum'), |
230 | | -(u'Kabyle', 'kab'), |
231 | | -(u'Bashkir', 'ba'), |
232 | | -(u'North Frisian', 'frr'), |
233 | | -(u'Tahitian', 'ty'), |
234 | | -(u'Tok Pisin', 'tpi'), |
235 | | -(u'Papiamentu', 'pap'), |
236 | | -(u'Zealandic', 'zea'), |
237 | | -(u'Sranan', 'srn'), |
238 | | -(u'Greenlandic', 'kl'), |
239 | | -(u'Udmurt', 'udm'), |
240 | | -(u'Chechen', 'ce'), |
241 | | -(u'Igbo', 'ig'), |
242 | | -(u'Komi-Permyak', 'koi'), |
243 | | -(u'Oriya', 'or'), |
244 | | -(u'Lower Sorbian', 'dsb'), |
245 | | -(u'Kongo', 'kg'), |
246 | | -(u'Lao', 'lo'), |
247 | | -(u'Abkhazian', 'ab'), |
248 | | -(u'Moksha', 'mdf'), |
249 | | -(u'Romani', 'rmy'), |
250 | | -(u'Hill Mari', 'mrj'), |
251 | | -(u'Banjar', 'bjn'), |
252 | | -(u'Old Church Slavonic', 'cu'), |
253 | | -(u'Mirandese', 'mwl'), |
254 | | -(u'Karakalpak', 'kaa'), |
255 | | -(u'Samoan', 'sm'), |
256 | | -(u'Moldovan', 'mo'), |
257 | | -(u'Tetum', 'tet'), |
258 | | -(u'Avar', 'av'), |
259 | | -(u'Kashmiri', 'ks'), |
260 | | -(u'Gothic', 'got'), |
261 | | -(u'Sindhi', 'sd'), |
262 | | -(u'Bambara', 'bm'), |
263 | | -(u'Nauruan', 'na'), |
264 | | -(u'Norfolk', 'pih'), |
265 | | -(u'Pontic', 'pnt'), |
266 | | -(u'Inuktitut', 'iu'), |
267 | | -(u'Inupiak', 'ik'), |
268 | | -(u'Bislama', 'bi'), |
269 | | -(u'Cherokee', 'chr'), |
270 | | -(u'Assamese', 'as'), |
271 | | -(u'Min Dong', 'cdo'), |
272 | | -(u'Ewe', 'ee'), |
273 | | -(u'Swati', 'ss'), |
274 | | -(u'Oromo', 'om'), |
275 | | -(u'Zhuang', 'za'), |
276 | | -(u'Zulu', 'zu'), |
277 | | -(u'Tigrinya', 'ti'), |
278 | | -(u'Venda', 've'), |
279 | | -(u'Tsonga', 'ts'), |
280 | | -(u'Hausa', 'ha'), |
281 | | -(u'Dzongkha', 'dz'), |
282 | | -(u'Sango', 'sg'), |
283 | | -(u'Chamorro', 'ch'), |
284 | | -(u'Cree', 'cr'), |
285 | | -(u'Xhosa', 'xh'), |
286 | | -(u'Akan', 'ak'), |
287 | | -(u'Sesotho', 'st'), |
288 | | -(u'Kinyarwanda', 'rw'), |
289 | | -(u'Tswana', 'tn'), |
290 | | -(u'Kikuyu', 'ki'), |
291 | | -(u'Buryat', 'bxr'), |
292 | | -(u'Buginese', 'bug'), |
293 | | -(u'Chichewa', 'ny'), |
294 | | -(u'Lak', 'lbe'), |
295 | | -(u'Twi', 'tw'), |
296 | | -(u'Shona', 'sn'), |
297 | | -(u'Kirundi', 'rn'), |
298 | | -(u'Fula', 'ff'), |
299 | | -(u'Cheyenne', 'chy'), |
300 | | -(u'Luganda', 'lg'), |
301 | | -(u'Ndonga', 'ng'), |
302 | | -(u'Sichuan Yi', 'ii'), |
303 | | -(u'Choctaw', 'cho'), |
304 | | -(u'Marshallese', 'mh'), |
305 | | -(u'Afar', 'aa'), |
306 | | -(u'Kuanyama', 'kj'), |
307 | | -(u'Hiri Motu', 'ho'), |
308 | | -(u'Muscogee', 'mus'), |
309 | | -(u'Kanuri', 'kr'), |
310 | | -(u'Herero', 'hz'), |
311 | | -(u'English', 'en'), |
312 | | -(u'Deutsch', 'de'), |
313 | | -(u'Français', 'fr'), |
314 | | -(u'Italiano', 'it'), |
315 | | -(u'Polski', 'pl'), |
316 | | -(u'日本語', 'ja'), |
317 | | -(u'Español', 'es'), |
318 | | -(u'Nederlands', 'nl'), |
319 | | -(u'Português', 'pt'), |
320 | | -(u'Русский', 'ru'), |
321 | | -(u'Svenska', 'sv'), |
322 | | -(u'中文', 'zh'), |
323 | | -(u'Català', 'ca'), |
324 | | -(u'Norsk', 'no'), |
325 | | -(u'Bokmål', 'no'), |
326 | | -(u'Suomi', 'fi'), |
327 | | -(u'Українська', 'uk'), |
328 | | -(u'Magyar', 'hu'), |
329 | | -(u'Čeština', 'cs'), |
330 | | -(u'Română', 'ro'), |
331 | | -(u'Türkçe', 'tr'), |
332 | | -(u'한국어', 'ko'), |
333 | | -(u'Tiếng Việt', 'vi'), |
334 | | -(u'Dansk', 'da'), |
335 | | -(u'العربية', 'ar'), |
336 | | -(u'Esperanto', 'eo'), |
337 | | -(u'Српски', 'sr'), |
338 | | -(u'Srpski', 'sr'), |
339 | | -(u'Bahasa Indonesia', 'id'), |
340 | | -(u'Lietuvių', 'lt'), |
341 | | -(u'Volapük', 'vo'), |
342 | | -(u'Slovenčina', 'sk'), |
343 | | -(u'עברית', 'he'), |
344 | | -(u'Български', 'bg'), |
345 | | -(u'فارسی', 'fa'), |
346 | | -(u'Slovenščina', 'sl'), |
347 | | -(u'Winaray', 'war'), |
348 | | -(u'Hrvatski', 'hr'), |
349 | | -(u'Eesti', 'et'), |
350 | | -(u'Bahasa Melayu', 'ms'), |
351 | | -(u'नेपाल भाषा', 'new'), |
352 | | -(u'Simple English', 'simple'), |
353 | | -(u'Galego', 'gl'), |
354 | | -(u'ไทย', 'th'), |
355 | | -(u'Armãneashce', 'roa-rup'), |
356 | | -(u'Nynorsk', 'nn'), |
357 | | -(u'Euskara', 'eu'), |
358 | | -(u'हिन्दी', 'hi'), |
359 | | -(u'Ελληνικά', 'el'), |
360 | | -(u'Krèyol ayisyen', 'ht'), |
361 | | -(u'Latina', 'la'), |
362 | | -(u'తెలుగు', 'te'), |
363 | | -(u'ქართული', 'ka'), |
364 | | -(u'Sinugboanong Binisaya', 'ceb'), |
365 | | -(u'Македонски', 'mk'), |
366 | | -(u'Azərbaycan', 'az'), |
367 | | -(u'Tagalog', 'tl'), |
368 | | -(u'Brezhoneg', 'br'), |
369 | | -(u'Srpskohrvatski', 'sh'), |
370 | | -(u'Српскохрватски', 'sh'), |
371 | | -(u'मराठी', 'mr'), |
372 | | -(u'Lëtzebuergesch', 'lb'), |
373 | | -(u'Basa Jawa', 'jv'), |
374 | | -(u'Latviešu', 'lv'), |
375 | | -(u'Bosanski', 'bs'), |
376 | | -(u'Íslenska', 'is'), |
377 | | -(u'Cymraeg', 'cy'), |
378 | | -(u'Беларуская', 'be-x-old'), |
379 | | -(u'тарашкевіца', 'be-x-old'), |
380 | | -(u'Piemontèis', 'pms'), |
381 | | -(u'Shqip', 'sq'), |
382 | | -(u'தமிழ்', 'ta'), |
383 | | -(u'ইমার ঠার', 'bpy'), |
384 | | -(u'বিষ্ণুপ্রিয়া মণিপুরী', 'bpy'), |
385 | | -(u'Беларуская', 'be'), |
386 | | -(u'Aragonés', 'an'), |
387 | | -(u'Occitan', 'oc'), |
388 | | -(u'বাংলা', 'bn'), |
389 | | -(u'Kiswahili', 'sw'), |
390 | | -(u'Ido', 'io'), |
391 | | -(u'Ripoarisch', 'ksh'), |
392 | | -(u'Lumbaart', 'lmo'), |
393 | | -(u'Frysk', 'fy'), |
394 | | -(u'ગુજરાતી', 'gu'), |
395 | | -(u'Plattdüütsch', 'nds'), |
396 | | -(u'Afrikaans', 'af'), |
397 | | -(u'Sicilianu', 'scn'), |
398 | | -(u'Runa Simi', 'qu'), |
399 | | -(u'Kurdî', 'ku'), |
400 | | -(u'كوردی', 'ku'), |
401 | | -(u'اردو', 'ur'), |
402 | | -(u'Basa Sunda', 'su'), |
403 | | -(u'മലയാളം', 'ml'), |
404 | | -(u'粵語', 'zh-yue'), |
405 | | -(u'Asturianu', 'ast'), |
406 | | -(u'Nnapulitano', 'nap'), |
407 | | -(u'Žemaitėška', 'bat-smg'), |
408 | | -(u'Walon', 'wa'), |
409 | | -(u'Чăваш', 'cv'), |
410 | | -(u'Gaeilge', 'ga'), |
411 | | -(u'Հայերեն', 'hy'), |
412 | | -(u'Yorùbá', 'yo'), |
413 | | -(u'ಕನ್ನಡ', 'kn'), |
414 | | -(u'Тоҷикӣ', 'tg'), |
415 | | -(u'Tarandíne', 'roa-tara'), |
416 | | -(u'Vèneto', 'vec'), |
417 | | -(u'شاہ مکھی پنجابی', 'pnb'), |
418 | | -(u'Shāhmukhī Pañjābī', 'pnb'), |
419 | | -(u'नेपाली', 'ne'), |
420 | | -(u'Gàidhlig', 'gd'), |
421 | | -(u'ייִדיש', 'yi'), |
422 | | -(u'Bân-lâm-gú', 'zh-min-nan'), |
423 | | -(u'O‘zbek', 'uz'), |
424 | | -(u'Tatarça', 'tt'), |
425 | | -(u'Татарча', 'tt'), |
426 | | -(u'Kapampangan', 'pam'), |
427 | | -(u'Иронау', 'os'), |
428 | | -(u'Саха тыла', 'sah'), |
429 | | -(u'Saxa Tyla', 'sah'), |
430 | | -(u'Alemannisch', 'als'), |
431 | | -(u'Māori', 'mi'), |
432 | | -(u'مصرى', 'arz'), |
433 | | -(u'Maṣrī', 'arz'), |
434 | | -(u'Қазақша', 'kk'), |
435 | | -(u'Nāhuatl', 'nah'), |
436 | | -(u'Limburgs', 'li'), |
437 | | -(u'Hornjoserbsce', 'hsb'), |
438 | | -(u'گیلکی', 'glk'), |
439 | | -(u'Corsu', 'co'), |
440 | | -(u'贛語', 'gan'), |
441 | | -(u'አማርኛ', 'am'), |
442 | | -(u'Монгол', 'mn'), |
443 | | -(u'Interlingua', 'ia'), |
444 | | -(u'Bikol', 'bcl'), |
445 | | -(u'Võro', 'fiu-vro'), |
446 | | -(u'Nedersaksisch', 'nds-nl'), |
447 | | -(u'Føroyskt', 'fo'), |
448 | | -(u'تركمن ', 'tk'), |
449 | | -(u'Туркмен', 'tk'), |
450 | | -(u'Scots', 'sco'), |
451 | | -(u'West-Vlams', 'vls'), |
452 | | -(u'සිංහල', 'si'), |
453 | | -(u'संस्कृतम्', 'sa'), |
454 | | -(u'Boarisch', 'bar'), |
455 | | -(u'မ္ရန္မာစာ', 'my'), #Needs fix |
456 | | -(u'Gaelg', 'gv'), |
457 | | -(u'ދިވެހިބަސް', 'dv'), |
458 | | -(u'Nouormand', 'nrm'), |
459 | | -(u'Normaund', 'nrm'), |
460 | | -(u'Pangasinan', 'pag'), |
461 | | -(u'Rumantsch', 'rm'), |
462 | | -(u'Basa Banyumasan', 'map-bms'), |
463 | | -(u'Zazaki', 'diq'), |
464 | | -(u'Soranî', 'ckb'), |
465 | | -(u'کوردی', 'ckb'), |
466 | | -(u'Sámegiella', 'se'), |
467 | | -(u'مَزِروني', 'mzn'), |
468 | | -(u'吴语', 'wuu'), |
469 | | -(u'Oyghurque', 'ug'), |
470 | | -(u'Furlan', 'fur'), |
471 | | -(u'Líguru', 'lij'), |
472 | | -(u'Malti', 'mt'), |
473 | | -(u'भोजपुरी', 'bh'), |
474 | | -(u'Novial', 'nov'), |
475 | | -(u'Malagasy', 'mg'), |
476 | | -(u'Kaszëbsczi', 'csb'), |
477 | | -(u'Ilokano', 'ilo'), |
478 | | -(u'Sardu', 'sc'), |
479 | | -(u'古文', 'zh-classical'), |
480 | | -(u'文言文', 'zh-classical'), |
481 | | -(u'ភាសាខ្មែរ', 'km'), |
482 | | -(u'Dzhudezmo', 'lad'), |
483 | | -(u'पाऴि', 'pi'), |
484 | | -(u'Englisc', 'ang'), |
485 | | -(u'Chavacano de Zamboanga', 'cbk-zam'), |
486 | | -(u'བོད་སྐད', 'bo'), |
487 | | -(u'Fiji Hindi', 'hif'), |
488 | | -(u'Arpitan', 'frp'), |
489 | | -(u'Hak-kâ-fa', 'hak'), |
490 | | -(u'客家話', 'hak'), |
491 | | -(u'Kernewek', 'kw'), |
492 | | -(u'Karnuack', 'kw'), |
493 | | -(u'ਪੰਜਾਬੀ', 'pa'), |
494 | | -(u'پښتو', 'ps'), |
495 | | -(u'Хальмг', 'xal'), |
496 | | -(u'Ślůnski', 'szl'), |
497 | | -(u'Deitsch', 'pdc'), |
498 | | -(u'Hawai`i', 'haw'), |
499 | | -(u'Seeltersk', 'stq'), |
500 | | -(u'Interlingue', 'ie'), |
501 | | -(u'Diné bizaad', 'nv'), |
502 | | -(u'Na Vosa Vakaviti', 'fj'), |
503 | | -(u'Qırımtatarca', 'crh'), |
504 | | -(u'Коми', 'kv'), |
505 | | -(u'faka Tonga', 'to'), |
506 | | -(u'Bahsa Acèh', 'ace'), |
507 | | -(u'Soomaaliga', 'so'), |
508 | | -(u'Эрзянь', 'myv'), |
509 | | -(u'Erzjanj Kelj', 'myv'), |
510 | | -(u"Avañe'ẽ", 'gn'), |
511 | | -(u'Къарачай-Малкъар', 'krc'), |
512 | | -(u'Qarachay-Malqar', 'krc'), |
513 | | -(u'Estremeñu', 'ext'), |
514 | | -(u'Lingala', 'ln'), |
515 | | -(u'Кыргызча', 'ky'), |
516 | | -(u'Олык Марий', 'mhr'), |
517 | | -(u'Olyk Marij', 'mhr'), |
518 | | -(u'ܐܪܡܝܐ', 'arc'), |
519 | | -(u'Emiliàn e rumagnòl', 'eml'), |
520 | | -(u'Lojban', 'jbo'), |
521 | | -(u'Picard', 'pcd'), |
522 | | -(u'Aymar', 'ay'), |
523 | | -(u'Wolof', 'wo'), |
524 | | -(u'chiTumbuka', 'tum'), |
525 | | -(u'Taqbaylit', 'kab'), |
526 | | -(u'Башҡорт', 'ba'), |
527 | | -(u'Frasch', 'frr'), |
528 | | -(u'Reo Mā`ohi', 'ty'), |
529 | | -(u'Tok Pisin', 'tpi'), |
530 | | -(u'Papiamentu', 'pap'), |
531 | | -(u'Zeêuws', 'zea'), |
532 | | -(u'Sranantongo', 'srn'), |
533 | | -(u'Kalaallisut', 'kl'), |
534 | | -(u'Удмурт кыл', 'udm'), |
535 | | -(u'Нохчийн', 'ce'), |
536 | | -(u'Igbo', 'ig'), |
537 | | -(u'Перем Коми', 'koi'), |
538 | | -(u'Perem Komi', 'koi'), |
539 | | -(u'ଓଡ଼ିଆ', 'or'), |
540 | | -(u'Dolnoserbski', 'dsb'), |
541 | | -(u'KiKongo', 'kg'), |
542 | | -(u'ລາວ', 'lo'), |
543 | | -(u'Аҧсуа', 'ab'), |
544 | | -(u'Мокшень', 'mdf'), |
545 | | -(u'Mokshanj Kälj', 'mdf'), |
546 | | -(u'romani - रोमानी', 'rmy'), |
547 | | -(u'Кырык Мары', 'mrj'), |
548 | | -(u'Kyryk Mary', 'mrj'), |
549 | | -(u'Bahasa Banjar', 'bjn'), |
550 | | -(u'Словѣньскъ', 'cu'), |
551 | | -(u'Páigina Percipal', 'mwl'), |
552 | | -(u'Qaraqalpaqsha', 'kaa'), |
553 | | -(u'Gagana Samoa', 'sm'), |
554 | | -(u'Молдовеняскэ', 'mo'), |
555 | | -(u'Tetun', 'tet'), |
556 | | -(u'Авар', 'av'), |
557 | | -(u'कश्मीरी', 'ks'), |
558 | | -(u'كشميري', 'ks'), |
559 | | -(u'𐌲𐌿𐍄𐌹𐍃𐌺', 'got'), #Needs fix |
560 | | -(u'سنڌي، سندھی ، सिन्ध', 'sd'), |
561 | | -(u'Bamanankan', 'bm'), |
562 | | -(u'dorerin Naoero', 'na'), |
563 | | -(u'Norfuk', 'pih'), |
564 | | -(u'Ποντιακά', 'pnt'), |
565 | | -(u'ᐃᓄᒃᑎᑐᑦ', 'iu'), |
566 | | -(u'Iñupiak', 'ik'), |
567 | | -(u'Bislama', 'bi'), |
568 | | -(u'ᏣᎳᎩ', 'chr'), |
569 | | -(u'অসমীয়া', 'as'), |
570 | | -(u'Mìng-dĕ̤ng-ngṳ̄', 'cdo'), |
571 | | -(u'Eʋegbe', 'ee'), |
572 | | -(u'SiSwati', 'ss'), |
573 | | -(u'Oromoo', 'om'), |
574 | | -(u'Cuengh', 'za'), |
575 | | -(u'isiZulu', 'zu'), |
576 | | -(u'ትግርኛ', 'ti'), |
577 | | -(u'Tshivenda', 've'), |
578 | | -(u'Xitsonga', 'ts'), |
579 | | -(u'هَوُسَ', 'ha'), |
580 | | -(u'ཇོང་ཁ', 'dz'), |
581 | | -(u'Sängö', 'sg'), |
582 | | -(u'Chamoru', 'ch'), |
583 | | -(u'Nehiyaw', 'cr'), |
584 | | -(u'isiXhosa', 'xh'), |
585 | | -(u'Akana', 'ak'), |
586 | | -(u'Sesotho', 'st'), |
587 | | -(u'Ikinyarwanda', 'rw'), |
588 | | -(u'Setswana', 'tn'), |
589 | | -(u'Gĩkũyũ', 'ki'), |
590 | | -(u'Буряад', 'bxr'), |
591 | | -(u'Basa Ugi', 'bug'), |
592 | | -(u'Chi-Chewa', 'ny'), |
593 | | -(u'Лакку', 'lbe'), |
594 | | -(u'Twi', 'tw'), |
595 | | -(u'chiShona', 'sn'), |
596 | | -(u'Kirundi', 'rn'), |
597 | | -(u'Fulfulde', 'ff'), |
598 | | -(u'Tsetsêhestâhese', 'chy'), |
599 | | -(u'Luganda', 'lg'), |
600 | | -(u'Oshiwambo', 'ng'), |
601 | | -(u'ꆇꉙ', 'ii'), |
602 | | -(u'Choctaw', 'cho'), |
603 | | -(u'Ebon', 'mh'), |
604 | | -(u'Afar', 'aa'), |
605 | | -(u'Kuanyama', 'kj'), |
606 | | -(u'Hiri Motu', 'ho'), |
607 | | -(u'Muskogee', 'mus'), |
608 | | -(u'Kanuri', 'kr'), |
609 | | -(u'Otsiherero', 'hz'), |
610 | | -]) |
611 | | - |
612 | | - |
613 | | -def language_map(): |
614 | | - return utils.invert_dict(MAPPING) |
Index: trunk/tools/editor_trends/manage.py |
— | — | @@ -24,6 +24,7 @@ |
25 | 25 | import datetime |
26 | 26 | from argparse import ArgumentParser |
27 | 27 | from argparse import RawTextHelpFormatter |
| 28 | +import ConfigParser |
28 | 29 | |
29 | 30 | |
30 | 31 | import configuration |
— | — | @@ -44,15 +45,61 @@ |
45 | 46 | from analyses import count_editors |
46 | 47 | |
47 | 48 | |
| 49 | +def show_choices(settings, attr): |
| 50 | + choices = getattr(settings, attr).items() |
| 51 | + choices.sort() |
| 52 | + choices = ['%s\t%s' % (choice[0], choice[1]) for choice in choices] |
| 53 | + return choices |
48 | 54 | |
49 | 55 | |
| 56 | + |
50 | 57 | def config_launcher(properties, settings, logger): |
51 | 58 | ''' |
52 | 59 | Config launcher is used to reconfigure editor trends toolkit. |
53 | 60 | ''' |
54 | | - settings.load_configuration() |
| 61 | +# settings.load_configuration() |
| 62 | +# |
| 63 | + if not os.path.exists('wiki.cfg') or properties.force: |
| 64 | + config = ConfigParser.RawConfigParser() |
| 65 | + project = None |
| 66 | + language = None |
| 67 | + #language_map = languages.language_map() |
| 68 | + working_directory = raw_input('Please indicate where you installed Editor Trends Analytics.\nCurrent location is %s\nPress Enter to accept default.\n' % os.getcwd()) |
| 69 | + input_location = raw_input('Please indicate where to store the Wikipedia dump files.\nDefault is: %s\nPress Enter to accept default.\n' % settings.input_location) |
55 | 70 | |
| 71 | + while project not in properties.projects.keys(): |
| 72 | + project = raw_input('Please indicate which project you would like to analyze.\nDefault is: %s\nPress Enter to accept default.\n' % properties.projects[properties.short_project].capitalize()) |
| 73 | + project = project if len(project) > 0 else properties.short_project |
| 74 | + if project not in properties.projects.keys(): |
| 75 | + print 'Valid choices for a project are: %s' % ','.join(properties.projects.keys()) |
56 | 76 | |
| 77 | + while language not in properties.valid_languages: |
| 78 | + language = raw_input('Please indicate which language of project %s you would like to analyze.\nDefault is: %s\nPress Enter to accept default.\n' % (properties.projects[project].capitalize(), properties.language)) |
| 79 | + if len(language) == 0: |
| 80 | + language = properties.language_code |
| 81 | + language = language if language in properties.valid_languages else properties.language |
| 82 | + |
| 83 | + input_location = input_location if len(input_location) > 0 else settings.input_location |
| 84 | + working_directory = working_directory if len(working_directory) > 0 else os.getcwd() |
| 85 | + |
| 86 | + config = ConfigParser.RawConfigParser() |
| 87 | + config.add_section('file_locations') |
| 88 | + config.set('file_locations', 'working_directory', working_directory) |
| 89 | + config.set('file_locations', 'input_location', input_location) |
| 90 | + config.add_section('wiki') |
| 91 | + config.set('wiki', 'project', project) |
| 92 | + config.set('wiki', 'language', language) |
| 93 | + |
| 94 | + fh = file_utils.create_binary_filehandle(working_directory, 'wiki.cfg', 'wb') |
| 95 | + config.write(fh) |
| 96 | + fh.close() |
| 97 | + |
| 98 | + settings.working_directory = config.get('file_locations', 'working_directory') |
| 99 | + settings.input_location = config.get('file_locations', 'input_location') |
| 100 | + |
| 101 | + |
| 102 | + |
| 103 | + |
57 | 104 | def downloader_launcher(properties, settings, logger): |
58 | 105 | ''' |
59 | 106 | This launcher calls the dump downloader to download a Wikimedia dump file. |
Index: trunk/tools/editor_trends/classes/wikiprojects.py |
— | — | @@ -66,8 +66,9 @@ |
67 | 67 | self.short_project = 'wiki' |
68 | 68 | self.long_project = 'wikipedia' if self.short_project == 'wiki' else \ |
69 | 69 | self.projects.get(self.short_project, None) |
70 | | - self.language_code = determine_default_language() |
71 | | - self.language = self.get_english_language_name() |
| 70 | + |
| 71 | + self.language = determine_default_language() |
| 72 | + self.language_code = MAPPING[self.language] |
72 | 73 | self.valid_languages = self.project_supports_language() |
73 | 74 | |
74 | 75 | if args: |
— | — | @@ -85,7 +86,7 @@ |
86 | 87 | self.collection = self.get_value('collection') |
87 | 88 | self.ignore = self.get_value('except') |
88 | 89 | self.clean = self.get_value('new') |
89 | | - |
| 90 | + self.force = self.get_value('force') |
90 | 91 | self.project = self.get_projectname() |
91 | 92 | self.location = self.get_project_location() |
92 | 93 | self.filename = self.generate_wikidump_filename() |
— | — | @@ -317,7 +318,8 @@ |
318 | 319 | Wikipedia project is most likely of interest |
319 | 320 | ''' |
320 | 321 | language_code = locale.getdefaultlocale()[0] |
321 | | - return language_code.split('_')[0] |
| 322 | + language_code = language_code.split('_')[0] |
| 323 | + return get_language(language_code) |
322 | 324 | |
323 | 325 | |
324 | 326 | def get_language(language_code): |