Index: trunk/tools/editor_trends/manage.py |
— | — | @@ -17,9 +17,12 @@ |
18 | 18 | __date__ = '2010-10-21' |
19 | 19 | __version__ = '0.1' |
20 | 20 | |
| 21 | +import os |
| 22 | +import sys |
21 | 23 | from argparse import ArgumentParser |
22 | 24 | from argparse import RawTextHelpFormatter |
23 | 25 | |
| 26 | + |
24 | 27 | import progressbar |
25 | 28 | |
26 | 29 | import settings |
— | — | @@ -27,44 +30,68 @@ |
28 | 31 | from utils import utils |
29 | 32 | from utils import dump_downloader |
30 | 33 | import split_xml_file |
| 34 | +import config |
31 | 35 | |
| 36 | + |
32 | 37 | def get_value(args, key): |
33 | 38 | return getattr(args, key, None) |
34 | 39 | |
| 40 | + |
35 | 41 | def config_launcher(args): |
36 | | - pass |
| 42 | + config.load_configuration(args) |
37 | 43 | |
| 44 | + |
| 45 | +def retrieve_language(args): |
| 46 | + language = get_value(args, 'language') |
| 47 | + language = language.title() |
| 48 | + return languages.MAPPING.get(language, None) |
| 49 | + |
| 50 | +def retrieve_project(args): |
| 51 | + project = get_value(args, 'project') |
| 52 | + if project != 'wiki': |
| 53 | + project = settings.WIKIMEDIA_PROJECTS.get(project, None) |
| 54 | + return project |
| 55 | + |
| 56 | + |
38 | 57 | def dump_downloader_launcher(args): |
39 | 58 | print 'dump downloader' |
40 | | - language = get_value(args, 'language') |
41 | | - location = get_value(args, 'store') |
42 | | - filename = '%s-%s-%s' % (create_dbname(args), 'latest', get_value(args, 'file')) |
| 59 | + config.load_configuration(args) |
| 60 | + language_code = retrieve_language(args) |
| 61 | + if language_code == None: |
| 62 | + print 'Entered language: %s is not a valid Wikipedia language' % get_value(args, 'language') |
| 63 | + sys.exit(-1) |
| 64 | + project = retrieve_project(args) |
| 65 | + if project == None: |
| 66 | + print 'Entered project: %s is not valid Wikipedia project.' % get_value(args, 'project') |
| 67 | + sys.exit(-1) |
| 68 | + location = os.path.join(get_value(args, 'location'), language_code) |
| 69 | + project = language_code + project |
| 70 | + filename = '%s-%s-%s' % (project, 'latest', get_value(args, 'file')) |
43 | 71 | pbar = get_value(args, 'progress') |
44 | 72 | |
45 | 73 | domain = settings.WP_DUMP_LOCATION |
46 | | - path = '/%s/latest/' % create_dbname(args) |
| 74 | + path = '/%s/latest/' % language_code |
47 | 75 | |
48 | 76 | extension = utils.determine_file_extension(filename) |
49 | 77 | filemode = utils.determine_file_mode(extension) |
50 | 78 | |
51 | 79 | dump_downloader.download_wp_dump(domain, path, filename, location, filemode, pbar) |
52 | 80 | |
53 | | -def create_dbname(args): |
54 | | - language = get_value(args, 'language') |
55 | | - return languages.MAPPING[language] + 'wiki' |
56 | 81 | |
57 | 82 | def split_xml_file_launcher(args): |
58 | 83 | print 'split_xml_file_launcher' |
59 | 84 | dbname = create_dbname(args) |
60 | 85 | split_xml_file.split_xml(dbname) |
61 | 86 | |
| 87 | + |
62 | 88 | def mongodb_script_launcher(args): |
63 | 89 | print 'mongodb_script_launcher' |
| 90 | + config.load_configuration(args) |
64 | 91 | dbname = create_dbname(args) |
65 | | - |
66 | 92 | #map_wiki_editors.run_stand_alone(dbname) |
67 | 93 | #print args |
68 | 94 | |
| 95 | + |
69 | 96 | def all_launcher(args): |
70 | 97 | print 'all_launcher' |
71 | 98 | config_launcher(args) |
— | — | @@ -72,19 +99,30 @@ |
73 | 100 | split_xml_file_launcher(args) |
74 | 101 | mongodb_script_launcher(args) |
75 | 102 | |
76 | | -def supported_languages(first_letter=False): |
77 | | - if first_letter == False: |
78 | | - choices = languages.MAPPING.keys()[:10] |
79 | | - else: |
80 | | - choices = languages.MAPPING.keys() |
81 | | - choices = [c for c in choices if c.startswith(first_letter)] |
| 103 | + |
| 104 | +def supported_languages(): |
| 105 | + choices = languages.MAPPING.keys() |
82 | 106 | choices = [c.encode(settings.ENCODING) for c in choices] |
83 | | - |
84 | 107 | return tuple(choices) |
85 | 108 | |
86 | 109 | |
| 110 | +def show_languages(args): |
| 111 | + first = get_value(args, 'first') |
| 112 | + if first != None: |
| 113 | + first = first.title() |
| 114 | + choices = supported_languages() |
| 115 | + languages = [] |
| 116 | + for choice in choices: |
| 117 | + languages.append(choice) |
| 118 | + languages.sort() |
| 119 | + for language in languages: |
| 120 | + if first == None: |
| 121 | + print '%s' % language |
| 122 | + elif first != None and language.startswith(first): |
| 123 | + print '%s' % language |
| 124 | + |
| 125 | + |
87 | 126 | def main(): |
88 | | - |
89 | 127 | file_choices = ('stub-meta-history.xml.gz', |
90 | 128 | 'stub-meta-current.xml.gz', |
91 | 129 | 'pages-meta-history.xml.7z', |
— | — | @@ -92,64 +130,46 @@ |
93 | 131 | |
94 | 132 | parser = ArgumentParser(prog='manage', formatter_class=RawTextHelpFormatter) |
95 | 133 | subparsers = parser.add_subparsers(help='sub-command help') |
96 | | - |
97 | | - parser.add_argument('language', action='store', |
98 | | - help='Example of valid languages. To see more languages, add the first character of the language you are interested in.', |
99 | | - choices=supported_languages(), |
100 | | - default='Russian') |
101 | | - |
102 | 134 | parser.add_argument('-p', '--progress', action='store_true', default=True, |
103 | 135 | help='Indicate whether you want to have a progressbar.') |
104 | 136 | |
| 137 | + parser_languages = subparsers.add_parser('show_languages', help='Overview of all valid languages.') |
| 138 | + parser_languages.set_defaults(func=show_languages) |
| 139 | + parser_languages.add_argument('-f', '--first', action='store', help='Enter the first letter of a language to see which languages are available.') |
| 140 | + |
105 | 141 | parser_config = subparsers.add_parser('config', help='The config sub command allows you set the data location of where to store files.') |
106 | 142 | parser_config.set_defaults(func=config_launcher) |
107 | 143 | |
108 | | - |
109 | | - |
110 | 144 | parser_download = subparsers.add_parser('download', help='The download sub command allows you to download a Wikipedia dump file.') |
111 | | - |
| 145 | + parser_download.add_argument('language', action='store', |
| 146 | + help='Example of valid languages.', |
| 147 | + choices=supported_languages(), |
| 148 | + default='Russian') |
| 149 | + parser_download.add_argument('-p', '--project', action='store', help='Specify the Wikimedia project that you would like to download', |
| 150 | + choices=settings.WIKIMEDIA_PROJECTS.keys(), |
| 151 | + default='wiki') |
112 | 152 | parser_download.add_argument('-l', '--location', action='store', |
113 | 153 | help='Indicate where you want to store the downloaded file.', |
114 | 154 | default=settings.XML_FILE_LOCATION) |
115 | | - |
116 | | - |
117 | | - parser_download.add_argument('file', action='store', |
| 155 | + parser_download.add_argument('-f', '--file', action='store', |
118 | 156 | choices=file_choices, |
119 | | - help='Indicate which dump you want to download. Valid choices are:\n %s' % ''.join([f + ',\n' for f in file_choices]), |
120 | | - default='user_groups.sql.gz') |
121 | | - |
| 157 | + help='Indicate which dump you want to download. Valid choices are:\n %s' % ''.join([f + ',\n' for f in file_choices]), |
| 158 | + default='stub-meta-current.xml.gz') |
122 | 159 | parser_download.set_defaults(func=dump_downloader_launcher) |
123 | 160 | |
124 | | - |
125 | 161 | parser_split = subparsers.add_parser('split', help='The split sub command splits the downloaded file in smaller chunks to parallelize extracting information.') |
126 | 162 | parser_split.set_defaults(func=split_xml_file_launcher) |
127 | 163 | |
128 | 164 | parser_create = subparsers.add_parser('store', help='The store sub command parsers the XML chunk files, extracts the information and stores it in a MongoDB.') |
129 | 165 | parser_create.set_defaults(func=mongodb_script_launcher) |
130 | 166 | |
131 | | - |
132 | | - |
133 | 167 | parser_all = subparsers.add_parser('all', help='The all sub command runs the download, split, store and dataset commands.\n\nWARNING: THIS COULD TAKE DAYS DEPENDING ON THE CONFIGURATION OF YOUR MACHINE AND THE SIZE OF THE WIKIMEDIA DUMP FILE.') |
134 | 168 | parser_all.set_defaults(func=all_launcher) |
135 | 169 | |
136 | | - |
137 | | - |
138 | | - #parser_create.add_argument() |
139 | | - |
140 | | - |
141 | | - #('-c', '--create', |
142 | | - # help='This will start the scripts to create a dataset\\\ |
143 | | - # from the MongoDB', type=mongodb_script_launcher) |
144 | | - #.add_argument('-d', '--download', |
145 | | - # help='This will start downloading the dump file.', |
146 | | - # ) |
147 | | - |
148 | | - |
149 | 170 | args = parser.parse_args() |
150 | 171 | args.func(args) |
151 | 172 | |
152 | 173 | |
153 | | - |
154 | 174 | if __name__ == '__main__': |
155 | 175 | #args = ['download', '-l', 'Russian'] |
156 | 176 | main() |
Index: trunk/tools/editor_trends/config.py |
— | — | @@ -0,0 +1,61 @@ |
| 2 | +#!/usr/bin/python |
| 3 | +# -*- coding: utf-8 -*- |
| 4 | +''' |
| 5 | +Copyright (C) 2010 by Diederik van Liere (dvanliere@gmail.com) |
| 6 | +This program is free software; you can redistribute it and/or |
| 7 | +modify it under the terms of the GNU General Public License version 2 |
| 8 | +as published by the Free Software Foundation. |
| 9 | +This program is distributed in the hope that it will be useful, |
| 10 | +but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
| 12 | +See the GNU General Public License for more details, at |
| 13 | +http://www.fsf.org/licenses/gpl.html |
| 14 | +''' |
| 15 | + |
| 16 | +__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ]) |
| 17 | +__author__email = 'dvanliere at gmail dot com' |
| 18 | +__date__ = '2010-10-21' |
| 19 | +__version__ = '0.1' |
| 20 | + |
| 21 | + |
| 22 | +import os |
| 23 | +import ConfigParser |
| 24 | + |
| 25 | +import settings |
| 26 | +from utils import utils |
| 27 | + |
| 28 | + |
| 29 | +def load_configuration(args): |
| 30 | + config = ConfigParser.RawConfigParser() |
| 31 | + if not utils.check_file_exists(settings.WORKING_DIRECTORY, 'wiki.cfg'): |
| 32 | + working_directory = raw_input('Please indicate where you installed Editor Trends Analytics.\nCurrent location is %s\nPress Enter to accept default.' % os.getcwd()) |
| 33 | + if working_directory == '': |
| 34 | + working_directory = os.getcwd() |
| 35 | + |
| 36 | + xml_file_location = raw_input('Please indicate where to store the Wikipedia dump files.\nDefault is: %s\nPress Enter to accept default.' % settings.XML_FILE_LOCATION) |
| 37 | + if xml_file_location == '': |
| 38 | + xml_file_location = settings.XML_FILE_LOCATION |
| 39 | + |
| 40 | + create_configuration(WORKING_DIRECTORY=working_directory, XML_FILE_LOCATION=xml_file_location) |
| 41 | + |
| 42 | + config.read('wiki.cfg') |
| 43 | + settings.WORKING_DIRECTORY = config.get('file_locations', 'WORKING_DIRECTORY') |
| 44 | + settings.XML_FILE_LOCATION = config.get('file_locations', 'XML_FILE_LOCATION') |
| 45 | + |
| 46 | + |
| 47 | +def create_configuration(**kwargs): |
| 48 | + working_directory = kwargs.get('WORKING_DIRECTORY', settings.WORKING_DIRECTORY) |
| 49 | + config = ConfigParser.RawConfigParser() |
| 50 | + config.add_section('file_locations') |
| 51 | + config.set('file_locations', 'WORKING_DIRECTORY', working_directory) |
| 52 | + config.set('file_locations', 'XML_FILE_LOCATION', kwargs.get('XML_FILE_LOCATION', settings.XML_FILE_LOCATION)) |
| 53 | + |
| 54 | + fh = utils.create_binary_filehandle(working_directory, 'wiki.cfg', 'wb') |
| 55 | + config.write(fh) |
| 56 | + fh.close() |
| 57 | + |
| 58 | + |
| 59 | +if __name__ == '__main__': |
| 60 | + load_configuration([]) |
| 61 | + |
| 62 | + |
Property changes on: trunk/tools/editor_trends/config.py |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 63 | + native |
Index: trunk/tools/editor_trends/languages.py |
— | — | @@ -1,5 +1,5 @@ |
2 | 2 | #!/usr/bin/python |
3 | | -# -*- coding: utf-8 -*- |
| 3 | +# coding=utf-8 |
4 | 4 | ''' |
5 | 5 | Copyright (C) 2010 by Diederik van Liere (dvanliere@gmail.com) |
6 | 6 | This program is free software; you can redistribute it and/or |
— | — | @@ -9,7 +9,7 @@ |
10 | 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
12 | 12 | See the GNU General Public License for more details, at |
13 | | -http://www.fsf.org/licenses/gpl.html |
| 13 | +http,//www.fsf.org/licenses/gpl.html |
14 | 14 | ''' |
15 | 15 | |
16 | 16 | __author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ]) |
— | — | @@ -23,584 +23,585 @@ |
24 | 24 | Gothic and Birmese are not yet supported, see rows 450 and 554. |
25 | 25 | ''' |
26 | 26 | |
27 | | -LANGUAGES = { |
28 | | -'English':'en', |
29 | | -'German':'de', |
30 | | -'French':'fr', |
31 | | -'Italian':'it', |
32 | | -'Polish':'pl', |
33 | | -'Japanese':'ja', |
34 | | -'Spanish':'es', |
35 | | -'Dutch':'nl', |
36 | | -'Portuguese':'pt', |
37 | | -'Russian':'ru', |
38 | | -'Swedish':'sv', |
39 | | -'Chinese':'zh', |
40 | | -'Catalan':'ca', |
41 | | -'Norwegian':'no', |
42 | | -'Bokmål':'no', |
43 | | -'Finnish':'fi', |
44 | | -'Ukrainian':'uk', |
45 | | -'Hungarian':'hu', |
46 | | -'Czech':'cs', |
47 | | -'Romanian':'ro', |
48 | | -'Turkish':'tr', |
49 | | -'Korean':'ko', |
50 | | -'Vietnamese':'vi', |
51 | | -'Danish':'da', |
52 | | -'Arabic':'ar', |
53 | | -'Esperanto':'eo', |
54 | | -'Serbian':'sr', |
55 | | -'Indonesian':'id', |
56 | | -'Lithuanian':'lt', |
57 | | -'Volapük':'vo', |
58 | | -'Slovak':'sk', |
59 | | -'Hebrew':'he', |
60 | | -'Bulgarian':'bg', |
61 | | -'Persian':'fa', |
62 | | -'Slovenian':'sl', |
63 | | -'Waray-Waray':'war', |
64 | | -'Croatian':'hr', |
65 | | -'Estonian':'et', |
66 | | -'Malay':'ms', |
67 | | -'Newar':'new', |
68 | | -'Nepal Bhasa':'new', |
69 | | -'Simple English':'simple', |
70 | | -'Galician':'gl', |
71 | | -'Thai':'th', |
72 | | -'Aromanian':'roa-rup', |
73 | | -'Nynorsk':'nn', |
74 | | -'Basque':'eu', |
75 | | -'Hindi':'hi', |
76 | | -'Greek':'el', |
77 | | -'Haitian':'ht', |
78 | | -'Latin':'la', |
79 | | -'Telugu':'te', |
80 | | -'Georgian':'ka', |
81 | | -'Cebuano':'ceb', |
82 | | -'Macedonian':'mk', |
83 | | -'Azeri':'az', |
84 | | -'Tagalog':'tl', |
85 | | -'Breton':'br', |
86 | | -'Serbo-Croatian':'sh', |
87 | | -'Marathi':'mr', |
88 | | -'Luxembourgish':'lb', |
89 | | -'Javanese':'jv', |
90 | | -'Latvian':'lv', |
91 | | -'Bosnian':'bs', |
92 | | -'Icelandic':'is', |
93 | | -'Welsh':'cy', |
94 | | -'Belarusian':'be-x-old', |
95 | | -'Taraškievica':'be-x-old', |
96 | | -'Piedmontese':'pms', |
97 | | -'Albanian':'sq', |
98 | | -'Tamil':'ta', |
99 | | -'Bishnupriya Manipuri':'bpy', |
100 | | -'Belarusian':'be', |
101 | | -'Aragonese':'an', |
102 | | -'Occitan':'oc', |
103 | | -'Bengali':'bn', |
104 | | -'Swahili':'sw', |
105 | | -'Ido':'io', |
106 | | -'Ripuarian':'ksh', |
107 | | -'Lombard':'lmo', |
108 | | -'West Frisian':'fy', |
109 | | -'Gujarati':'gu', |
110 | | -'Low Saxon':'nds', |
111 | | -'Afrikaans':'af', |
112 | | -'Sicilian':'scn', |
113 | | -'Quechua':'qu', |
114 | | -'Kurdish':'ku', |
115 | | -'Urdu':'ur', |
116 | | -'Sundanese':'su', |
117 | | -'Malayalam':'ml', |
118 | | -'Cantonese':'zh-yue', |
119 | | -'Asturian':'ast', |
120 | | -'Neapolitan':'nap', |
121 | | -'Samogitian':'bat-smg', |
122 | | -'Walloon':'wa', |
123 | | -'Chuvash':'cv', |
124 | | -'Irish':'ga', |
125 | | -'Armenian':'hy', |
126 | | -'Yoruba':'yo', |
127 | | -'Kannada':'kn', |
128 | | -'Tajik':'tg', |
129 | | -'Tarantino':'roa-tara', |
130 | | -'Venetian':'vec', |
131 | | -'Western Panjabi':'pnb', |
132 | | -'Nepali':'ne', |
133 | | -'Scottish Gaelic':'gd', |
134 | | -'Yiddish':'yi', |
135 | | -'Min Nan':'zh-min-nan', |
136 | | -'Uzbek':'uz', |
137 | | -'Tatar':'tt', |
138 | | -'Kapampangan':'pam', |
139 | | -'Ossetian':'os', |
140 | | -'Sakha':'sah', |
141 | | -'Alemannic':'als', |
142 | | -'Maori':'mi', |
143 | | -'Egyptian Arabic':'arz', |
144 | | -'Kazakh':'kk', |
145 | | -'Nahuatl':'nah', |
146 | | -'Limburgian':'li', |
147 | | -'Upper Sorbian':'hsb', |
148 | | -'Gilaki':'glk', |
149 | | -'Corsican':'co', |
150 | | -'Gan':'gan', |
151 | | -'Amharic':'am', |
152 | | -'Mongolian':'mn', |
153 | | -'Interlingua':'ia', |
154 | | -'Central Bicolano':'bcl', |
155 | | -'Võro':'fiu-vro', |
156 | | -'Dutch Low Saxon':'nds-nl', |
157 | | -'Faroese':'fo', |
158 | | -'Turkmen':'tk', |
159 | | -'Scots':'sco', |
160 | | -'West Flemish':'vls', |
161 | | -'Sinhalese':'si', |
162 | | -'Sanskrit':'sa', |
163 | | -'Bavarian':'bar', |
164 | | -'Burmese':'my', |
165 | | -'Manx':'gv', |
166 | | -'Divehi':'dv', |
167 | | -'Norman':'nrm', |
168 | | -'Pangasinan':'pag', |
169 | | -'Romansh':'rm', |
170 | | -'Banyumasan':'map-bms', |
171 | | -'Zazaki':'diq', |
172 | | -'Sorani':'ckb', |
173 | | -'Northern Sami':'se', |
174 | | -'Mazandarani':'mzn', |
175 | | -'Wu':'wuu', |
176 | | -'Uyghur':'ug', |
177 | | -'Friulian':'fur', |
178 | | -'Ligurian':'lij', |
179 | | -'Maltese':'mt', |
180 | | -'Bihari':'bh', |
181 | | -'Novial':'nov', |
182 | | -'Malagasy':'mg', |
183 | | -'Kashubian':'csb', |
184 | | -'Ilokano':'ilo', |
185 | | -'Sardinian':'sc', |
186 | | -'Classical Chinese':'zh-classical', |
187 | | -'Khmer':'km', |
188 | | -'Ladino':'lad', |
189 | | -'Pali':'pi', |
190 | | -'Anglo-Saxon':'ang', |
191 | | -'Zamboanga Chavacano':'cbk-zam', |
192 | | -'Tibetan':'bo', |
193 | | -'Fiji Hindi':'hif', |
194 | | -'Franco-Provençal':'frp', |
195 | | -'Arpitan':'frp', |
196 | | -'Hakka':'hak', |
197 | | -'Cornish':'kw', |
198 | | -'Punjabi':'pa', |
199 | | -'Pashto':'ps', |
200 | | -'Kalmyk':'xal', |
201 | | -'Silesian':'szl', |
202 | | -'Pennsylvania German':'pdc', |
203 | | -'Hawaiian':'haw', |
204 | | -'Saterland Frisian':'stq', |
205 | | -'Interlingue':'ie', |
206 | | -'Navajo':'nv', |
207 | | -'Fijian':'fj', |
208 | | -'Crimean Tatar':'crh', |
209 | | -'Komi':'kv', |
210 | | -'Tongan':'to', |
211 | | -'Acehnese':'ace', |
212 | | -'Somali':'so', |
213 | | -'Erzya':'myv', |
214 | | -'Guarani':'gn', |
215 | | -'Karachay-Balkar':'krc', |
216 | | -'Extremaduran':'ext', |
217 | | -'Lingala':'ln', |
218 | | -'Kirghiz':'ky', |
219 | | -'Meadow Mari':'mhr', |
220 | | -'Assyrian Neo-Aramaic':'arc', |
221 | | -'Emilian-Romagnol':'eml', |
222 | | -'Lojban':'jbo', |
223 | | -'Picard':'pcd', |
224 | | -'Aymara':'ay', |
225 | | -'Wolof':'wo', |
226 | | -'Tumbuka':'tum', |
227 | | -'Kabyle':'kab', |
228 | | -'Bashkir':'ba', |
229 | | -'North Frisian':'frr', |
230 | | -'Tahitian':'ty', |
231 | | -'Tok Pisin':'tpi', |
232 | | -'Papiamentu':'pap', |
233 | | -'Zealandic':'zea', |
234 | | -'Sranan':'srn', |
235 | | -'Greenlandic':'kl', |
236 | | -'Udmurt':'udm', |
237 | | -'Chechen':'ce', |
238 | | -'Igbo':'ig', |
239 | | -'Komi-Permyak':'koi', |
240 | | -'Oriya':'or', |
241 | | -'Lower Sorbian':'dsb', |
242 | | -'Kongo':'kg', |
243 | | -'Lao':'lo', |
244 | | -'Abkhazian':'ab', |
245 | | -'Moksha':'mdf', |
246 | | -'Romani':'rmy', |
247 | | -'Hill Mari':'mrj', |
248 | | -'Banjar':'bjn', |
249 | | -'Old Church Slavonic':'cu', |
250 | | -'Mirandese':'mwl', |
251 | | -'Karakalpak':'kaa', |
252 | | -'Samoan':'sm', |
253 | | -'Moldovan':'mo', |
254 | | -'Tetum':'tet', |
255 | | -'Avar':'av', |
256 | | -'Kashmiri':'ks', |
257 | | -'Gothic':'got', |
258 | | -'Sindhi':'sd', |
259 | | -'Bambara':'bm', |
260 | | -'Nauruan':'na', |
261 | | -'Norfolk':'pih', |
262 | | -'Pontic':'pnt', |
263 | | -'Inuktitut':'iu', |
264 | | -'Inupiak':'ik', |
265 | | -'Bislama':'bi', |
266 | | -'Cherokee':'chr', |
267 | | -'Assamese':'as', |
268 | | -'Min Dong':'cdo', |
269 | | -'Ewe':'ee', |
270 | | -'Swati':'ss', |
271 | | -'Oromo':'om', |
272 | | -'Zhuang':'za', |
273 | | -'Zulu':'zu', |
274 | | -'Tigrinya':'ti', |
275 | | -'Venda':'ve', |
276 | | -'Tsonga':'ts', |
277 | | -'Hausa':'ha', |
278 | | -'Dzongkha':'dz', |
279 | | -'Sango':'sg', |
280 | | -'Chamorro':'ch', |
281 | | -'Cree':'cr', |
282 | | -'Xhosa':'xh', |
283 | | -'Akan':'ak', |
284 | | -'Sesotho':'st', |
285 | | -'Kinyarwanda':'rw', |
286 | | -'Tswana':'tn', |
287 | | -'Kikuyu':'ki', |
288 | | -'Buryat':'bxr', |
289 | | -'Buginese':'bug', |
290 | | -'Chichewa':'ny', |
291 | | -'Lak':'lbe', |
292 | | -'Twi':'tw', |
293 | | -'Shona':'sn', |
294 | | -'Kirundi':'rn', |
295 | | -'Fula':'ff', |
296 | | -'Cheyenne':'chy', |
297 | | -'Luganda':'lg', |
298 | | -'Ndonga':'ng', |
299 | | -'Sichuan Yi':'ii', |
300 | | -'Choctaw':'cho', |
301 | | -'Marshallese':'mh', |
302 | | -'Afar':'aa', |
303 | | -'Kuanyama':'kj', |
304 | | -'Hiri Motu':'ho', |
305 | | -'Muscogee':'mus', |
306 | | -'Kanuri':'kr', |
307 | | -'Herero':'hz', |
308 | | -'English':'en', |
309 | | -'Deutsch':'de', |
310 | | -'Français':'fr', |
311 | | -'Italiano':'it', |
312 | | -'Polski':'pl', |
313 | | -'日本語':'ja', |
314 | | -'Español':'es', |
315 | | -'Nederlands':'nl', |
316 | | -'Português':'pt', |
317 | | -'Русский':'ru', |
318 | | -'Svenska':'sv', |
319 | | -'中文':'zh', |
320 | | -'Català':'ca', |
321 | | -'Norsk':'no', |
322 | | -'Bokmål':'no', |
323 | | -'Suomi':'fi', |
324 | | -'Українська':'uk', |
325 | | -'Magyar':'hu', |
326 | | -'Čeština':'cs', |
327 | | -'Română':'ro', |
328 | | -'Türkçe':'tr', |
329 | | -'한국어':'ko', |
330 | | -'Tiếng Việt':'vi', |
331 | | -'Dansk':'da', |
332 | | -'العربية':'ar', |
333 | | -'Esperanto':'eo', |
334 | | -'Српски':'sr', |
335 | | -'Srpski':'sr', |
336 | | -'Bahasa Indonesia':'id', |
337 | | -'Lietuvių':'lt', |
338 | | -'Volapük':'vo', |
339 | | -'Slovenčina':'sk', |
340 | | -'עברית':'he', |
341 | | -'Български':'bg', |
342 | | -'فارسی':'fa', |
343 | | -'Slovenščina':'sl', |
344 | | -'Winaray':'war', |
345 | | -'Hrvatski':'hr', |
346 | | -'Eesti':'et', |
347 | | -'Bahasa Melayu':'ms', |
348 | | -'नेपाल भाषा':'new', |
349 | | -'Simple English':'simple', |
350 | | -'Galego':'gl', |
351 | | -'ไทย':'th', |
352 | | -'Armãneashce':'roa-rup', |
353 | | -'Nynorsk':'nn', |
354 | | -'Euskara':'eu', |
355 | | -'हिन्दी':'hi', |
356 | | -'Ελληνικά':'el', |
357 | | -'Krèyol ayisyen':'ht', |
358 | | -'Latina':'la', |
359 | | -'తెలుగు':'te', |
360 | | -'ქართული':'ka', |
361 | | -'Sinugboanong Binisaya':'ceb', |
362 | | -'Македонски':'mk', |
363 | | -'Azərbaycan':'az', |
364 | | -'Tagalog':'tl', |
365 | | -'Brezhoneg':'br', |
366 | | -'Srpskohrvatski':'sh', |
367 | | -'Српскохрватски':'sh', |
368 | | -'मराठी':'mr', |
369 | | -'Lëtzebuergesch':'lb', |
370 | | -'Basa Jawa':'jv', |
371 | | -'Latviešu':'lv', |
372 | | -'Bosanski':'bs', |
373 | | -'Íslenska':'is', |
374 | | -'Cymraeg':'cy', |
375 | | -'Беларуская':'be-x-old', |
376 | | -'тарашкевіца':'be-x-old', |
377 | | -'Piemontèis':'pms', |
378 | | -'Shqip':'sq', |
379 | | -'தமிழ்':'ta', |
380 | | -'ইমার ঠার':'bpy', |
381 | | -'বিষ্ণুপ্রিয়া মণিপুরী':'bpy', |
382 | | -'Беларуская':'be', |
383 | | -'Aragonés':'an', |
384 | | -'Occitan':'oc', |
385 | | -'বাংলা':'bn', |
386 | | -'Kiswahili':'sw', |
387 | | -'Ido':'io', |
388 | | -'Ripoarisch':'ksh', |
389 | | -'Lumbaart':'lmo', |
390 | | -'Frysk':'fy', |
391 | | -'ગુજરાતી':'gu', |
392 | | -'Plattdüütsch':'nds', |
393 | | -'Afrikaans':'af', |
394 | | -'Sicilianu':'scn', |
395 | | -'Runa Simi':'qu', |
396 | | -'Kurdî':'ku', |
397 | | -'كوردی':'ku', |
398 | | -'اردو':'ur', |
399 | | -'Basa Sunda':'su', |
400 | | -'മലയാളം':'ml', |
401 | | -'粵語':'zh-yue', |
402 | | -'Asturianu':'ast', |
403 | | -'Nnapulitano':'nap', |
404 | | -'Žemaitėška':'bat-smg', |
405 | | -'Walon':'wa', |
406 | | -'Чăваш':'cv', |
407 | | -'Gaeilge':'ga', |
408 | | -'Հայերեն':'hy', |
409 | | -'Yorùbá':'yo', |
410 | | -'ಕನ್ನಡ':'kn', |
411 | | -'Тоҷикӣ':'tg', |
412 | | -'Tarandíne':'roa-tara', |
413 | | -'Vèneto':'vec', |
414 | | -'شاہ مکھی پنجابی':'pnb', |
415 | | -'Shāhmukhī Pañjābī':'pnb', |
416 | | -'नेपाली':'ne', |
417 | | -'Gàidhlig':'gd', |
418 | | -'ייִדיש':'yi', |
419 | | -'Bân-lâm-gú':'zh-min-nan', |
420 | | -'O‘zbek':'uz', |
421 | | -'Tatarça':'tt', |
422 | | -'Татарча':'tt', |
423 | | -'Kapampangan':'pam', |
424 | | -'Иронау':'os', |
425 | | -'Саха тыла':'sah', |
426 | | -'Saxa Tyla':'sah', |
427 | | -'Alemannisch':'als', |
428 | | -'Māori':'mi', |
429 | | -'مصرى':'arz', |
430 | | -'Maṣrī':'arz', |
431 | | -'Қазақша':'kk', |
432 | | -'Nāhuatl':'nah', |
433 | | -'Limburgs':'li', |
434 | | -'Hornjoserbsce':'hsb', |
435 | | -'گیلکی':'glk', |
436 | | -'Corsu':'co', |
437 | | -'贛語':'gan', |
438 | | -'አማርኛ':'am', |
439 | | -'Монгол':'mn', |
440 | | -'Interlingua':'ia', |
441 | | -'Bikol':'bcl', |
442 | | -'Võro':'fiu-vro', |
443 | | -'Nedersaksisch':'nds-nl', |
444 | | -'Føroyskt':'fo', |
445 | | -'تركمن ':'tk', |
446 | | -'Туркмен':'tk', |
447 | | -'Scots':'sco', |
448 | | -'West-Vlams':'vls', |
449 | | -'සිංහල':'si', |
450 | | -'संस्कृतम्':'sa', |
451 | | -'Boarisch':'bar', |
452 | | -'မ္ရန္မာစာ':'my', #Needs fix |
453 | | -'Gaelg':'gv', |
454 | | -'ދިވެހިބަސް':'dv', |
455 | | -'Nouormand':'nrm', |
456 | | -'Normaund':'nrm', |
457 | | -'Pangasinan':'pag', |
458 | | -'Rumantsch':'rm', |
459 | | -'Basa Banyumasan':'map-bms', |
460 | | -'Zazaki':'diq', |
461 | | -'Soranî':'ckb', |
462 | | -'کوردی':'ckb', |
463 | | -'Sámegiella':'se', |
464 | | -'مَزِروني':'mzn', |
465 | | -'吴语':'wuu', |
466 | | -'Oyghurque':'ug', |
467 | | -'Furlan':'fur', |
468 | | -'Líguru':'lij', |
469 | | -'Malti':'mt', |
470 | | -'भोजपुरी':'bh', |
471 | | -'Novial':'nov', |
472 | | -'Malagasy':'mg', |
473 | | -'Kaszëbsczi':'csb', |
474 | | -'Ilokano':'ilo', |
475 | | -'Sardu':'sc', |
476 | | -'古文':'zh-classical', |
477 | | -'文言文':'zh-classical', |
478 | | -'ភាសាខ្មែរ':'km', |
479 | | -'Dzhudezmo':'lad', |
480 | | -'पाऴि':'pi', |
481 | | -'Englisc':'ang', |
482 | | -'Chavacano de Zamboanga':'cbk-zam', |
483 | | -'བོད་སྐད':'bo', |
484 | | -'Fiji Hindi':'hif', |
485 | | -'Arpitan':'frp', |
486 | | -'Hak-kâ-fa':'hak', |
487 | | -'客家話':'hak', |
488 | | -'Kernewek':'kw', |
489 | | -'Karnuack':'kw', |
490 | | -'ਪੰਜਾਬੀ':'pa', |
491 | | -'پښتو':'ps', |
492 | | -'Хальмг':'xal', |
493 | | -'Ślůnski':'szl', |
494 | | -'Deitsch':'pdc', |
495 | | -'Hawai`i':'haw', |
496 | | -'Seeltersk':'stq', |
497 | | -'Interlingue':'ie', |
498 | | -'Diné bizaad':'nv', |
499 | | -'Na Vosa Vakaviti':'fj', |
500 | | -'Qırımtatarca':'crh', |
501 | | -'Коми':'kv', |
502 | | -'faka Tonga':'to', |
503 | | -'Bahsa Acèh':'ace', |
504 | | -'Soomaaliga':'so', |
505 | | -'Эрзянь':'myv', |
506 | | -'Erzjanj Kelj':'myv', |
507 | | -"Avañe'ẽ":'gn', |
508 | | -'Къарачай-Малкъар':'krc', |
509 | | -'Qarachay-Malqar':'krc', |
510 | | -'Estremeñu':'ext', |
511 | | -'Lingala':'ln', |
512 | | -'Кыргызча':'ky', |
513 | | -'Олык Марий':'mhr', |
514 | | -'Olyk Marij':'mhr', |
515 | | -'ܐܪܡܝܐ':'arc', |
516 | | -'Emiliàn e rumagnòl':'eml', |
517 | | -'Lojban':'jbo', |
518 | | -'Picard':'pcd', |
519 | | -'Aymar':'ay', |
520 | | -'Wolof':'wo', |
521 | | -'chiTumbuka':'tum', |
522 | | -'Taqbaylit':'kab', |
523 | | -'Башҡорт':'ba', |
524 | | -'Frasch':'frr', |
525 | | -'Reo Mā`ohi':'ty', |
526 | | -'Tok Pisin':'tpi', |
527 | | -'Papiamentu':'pap', |
528 | | -'Zeêuws':'zea', |
529 | | -'Sranantongo':'srn', |
530 | | -'Kalaallisut':'kl', |
531 | | -'Удмурт кыл':'udm', |
532 | | -'Нохчийн':'ce', |
533 | | -'Igbo':'ig', |
534 | | -'Перем Коми':'koi', |
535 | | -'Perem Komi':'koi', |
536 | | -'ଓଡ଼ିଆ':'or', |
537 | | -'Dolnoserbski':'dsb', |
538 | | -'KiKongo':'kg', |
539 | | -'ລາວ':'lo', |
540 | | -'Аҧсуа':'ab', |
541 | | -'Мокшень':'mdf', |
542 | | -'Mokshanj Kälj':'mdf', |
543 | | -'romani - रोमानी':'rmy', |
544 | | -'Кырык Мары':'mrj', |
545 | | -'Kyryk Mary':'mrj', |
546 | | -'Bahasa Banjar':'bjn', |
547 | | -'Словѣньскъ':'cu', |
548 | | -'Páigina Percipal':'mwl', |
549 | | -'Qaraqalpaqsha':'kaa', |
550 | | -'Gagana Samoa':'sm', |
551 | | -'Молдовеняскэ':'mo', |
552 | | -'Tetun':'tet', |
553 | | -'Авар':'av', |
554 | | -'कश्मीरी':'ks', |
555 | | -'كشميري':'ks', |
556 | | -'𐌲𐌿𐍄𐌹𐍃𐌺':'got', #Needs fix |
557 | | -'سنڌي، سندھی ، सिन्ध':'sd', |
558 | | -'Bamanankan':'bm', |
559 | | -'dorerin Naoero':'na', |
560 | | -'Norfuk':'pih', |
561 | | -'Ποντιακά':'pnt', |
562 | | -'ᐃᓄᒃᑎᑐᑦ':'iu', |
563 | | -'Iñupiak':'ik', |
564 | | -'Bislama':'bi', |
565 | | -'ᏣᎳᎩ':'chr', |
566 | | -'অসমীয়া':'as', |
567 | | -'Mìng-dĕ̤ng-ngṳ̄':'cdo', |
568 | | -'Eʋegbe':'ee', |
569 | | -'SiSwati':'ss', |
570 | | -'Oromoo':'om', |
571 | | -'Cuengh':'za', |
572 | | -'isiZulu':'zu', |
573 | | -'ትግርኛ':'ti', |
574 | | -'Tshivenda':'ve', |
575 | | -'Xitsonga':'ts', |
576 | | -'هَوُسَ':'ha', |
577 | | -'ཇོང་ཁ':'dz', |
578 | | -'Sängö':'sg', |
579 | | -'Chamoru':'ch', |
580 | | -'Nehiyaw':'cr', |
581 | | -'isiXhosa':'xh', |
582 | | -'Akana':'ak', |
583 | | -'Sesotho':'st', |
584 | | -'Ikinyarwanda':'rw', |
585 | | -'Setswana':'tn', |
586 | | -'Gĩkũyũ':'ki', |
587 | | -'Буряад':'bxr', |
588 | | -'Basa Ugi':'bug', |
589 | | -'Chi-Chewa':'ny', |
590 | | -'Лакку':'lbe', |
591 | | -'Twi':'tw', |
592 | | -'chiShona':'sn', |
593 | | -'Kirundi':'rn', |
594 | | -'Fulfulde':'ff', |
595 | | -'Tsetsêhestâhese':'chy', |
596 | | -'Luganda':'lg', |
597 | | -'Oshiwambo':'ng', |
598 | | -'ꆇꉙ':'ii', |
599 | | -'Choctaw':'cho', |
600 | | -'Ebon':'mh', |
601 | | -'Afar':'aa', |
602 | | -'Kuanyama':'kj', |
603 | | -'Hiri Motu':'ho', |
604 | | -'Muskogee':'mus', |
605 | | -'Kanuri':'kr', |
606 | | -'Otsiherero':'hz', |
607 | | -} |
\ No newline at end of file |
| 27 | +from utils import ordered_dict as odict |
| 28 | +MAPPING = odict.OrderedDict([ |
| 29 | +(u'English','en'), |
| 30 | +(u'German','de'), |
| 31 | +(u'French','fr'), |
| 32 | +(u'Italian','it'), |
| 33 | +(u'Polish','pl'), |
| 34 | +(u'Japanese','ja'), |
| 35 | +(u'Spanish','es'), |
| 36 | +(u'Dutch','nl'), |
| 37 | +(u'Portuguese','pt'), |
| 38 | +(u'Russian','ru'), |
| 39 | +(u'Swedish','sv'), |
| 40 | +(u'Chinese','zh'), |
| 41 | +(u'Catalan','ca'), |
| 42 | +(u'Norwegian','no'), |
| 43 | +(u'Bokmål','no'), |
| 44 | +(u'Finnish','fi'), |
| 45 | +(u'Ukrainian','uk'), |
| 46 | +(u'Hungarian','hu'), |
| 47 | +(u'Czech','cs'), |
| 48 | +(u'Romanian','ro'), |
| 49 | +(u'Turkish','tr'), |
| 50 | +(u'Korean','ko'), |
| 51 | +(u'Vietnamese','vi'), |
| 52 | +(u'Danish','da'), |
| 53 | +(u'Arabic','ar'), |
| 54 | +(u'Esperanto','eo'), |
| 55 | +(u'Serbian','sr'), |
| 56 | +(u'Indonesian','id'), |
| 57 | +(u'Lithuanian','lt'), |
| 58 | +(u'Volapük','vo'), |
| 59 | +(u'Slovak','sk'), |
| 60 | +(u'Hebrew','he'), |
| 61 | +(u'Bulgarian','bg'), |
| 62 | +(u'Persian','fa'), |
| 63 | +(u'Slovenian','sl'), |
| 64 | +(u'Waray-Waray','war'), |
| 65 | +(u'Croatian','hr'), |
| 66 | +(u'Estonian','et'), |
| 67 | +(u'Malay','ms'), |
| 68 | +(u'Newar','new'), |
| 69 | +(u'Nepal Bhasa','new'), |
| 70 | +(u'Simple English','simple'), |
| 71 | +(u'Galician','gl'), |
| 72 | +(u'Thai','th'), |
| 73 | +(u'Aromanian','roa-rup'), |
| 74 | +(u'Nynorsk','nn'), |
| 75 | +(u'Basque','eu'), |
| 76 | +(u'Hindi','hi'), |
| 77 | +(u'Greek','el'), |
| 78 | +(u'Haitian','ht'), |
| 79 | +(u'Latin','la'), |
| 80 | +(u'Telugu','te'), |
| 81 | +(u'Georgian','ka'), |
| 82 | +(u'Cebuano','ceb'), |
| 83 | +(u'Macedonian','mk'), |
| 84 | +(u'Azeri','az'), |
| 85 | +(u'Tagalog','tl'), |
| 86 | +(u'Breton','br'), |
| 87 | +(u'Serbo-Croatian','sh'), |
| 88 | +(u'Marathi','mr'), |
| 89 | +(u'Luxembourgish','lb'), |
| 90 | +(u'Javanese','jv'), |
| 91 | +(u'Latvian','lv'), |
| 92 | +(u'Bosnian','bs'), |
| 93 | +(u'Icelandic','is'), |
| 94 | +(u'Welsh','cy'), |
| 95 | +(u'Belarusian','be-x-old'), |
| 96 | +(u'Taraškievica','be-x-old'), |
| 97 | +(u'Piedmontese','pms'), |
| 98 | +(u'Albanian','sq'), |
| 99 | +(u'Tamil','ta'), |
| 100 | +(u'Bishnupriya Manipuri','bpy'), |
| 101 | +(u'Belarusian','be'), |
| 102 | +(u'Aragonese','an'), |
| 103 | +(u'Occitan','oc'), |
| 104 | +(u'Bengali','bn'), |
| 105 | +(u'Swahili','sw'), |
| 106 | +(u'Ido','io'), |
| 107 | +(u'Ripuarian','ksh'), |
| 108 | +(u'Lombard','lmo'), |
| 109 | +(u'West Frisian','fy'), |
| 110 | +(u'Gujarati','gu'), |
| 111 | +(u'Low Saxon','nds'), |
| 112 | +(u'Afrikaans','af'), |
| 113 | +(u'Sicilian','scn'), |
| 114 | +(u'Quechua','qu'), |
| 115 | +(u'Kurdish','ku'), |
| 116 | +(u'Urdu','ur'), |
| 117 | +(u'Sundanese','su'), |
| 118 | +(u'Malayalam','ml'), |
| 119 | +(u'Cantonese','zh-yue'), |
| 120 | +(u'Asturian','ast'), |
| 121 | +(u'Neapolitan','nap'), |
| 122 | +(u'Samogitian','bat-smg'), |
| 123 | +(u'Walloon','wa'), |
| 124 | +(u'Chuvash','cv'), |
| 125 | +(u'Irish','ga'), |
| 126 | +(u'Armenian','hy'), |
| 127 | +(u'Yoruba','yo'), |
| 128 | +(u'Kannada','kn'), |
| 129 | +(u'Tajik','tg'), |
| 130 | +(u'Tarantino','roa-tara'), |
| 131 | +(u'Venetian','vec'), |
| 132 | +(u'Western Panjabi','pnb'), |
| 133 | +(u'Nepali','ne'), |
| 134 | +(u'Scottish Gaelic','gd'), |
| 135 | +(u'Yiddish','yi'), |
| 136 | +(u'Min Nan','zh-min-nan'), |
| 137 | +(u'Uzbek','uz'), |
| 138 | +(u'Tatar','tt'), |
| 139 | +(u'Kapampangan','pam'), |
| 140 | +(u'Ossetian','os'), |
| 141 | +(u'Sakha','sah'), |
| 142 | +(u'Alemannic','als'), |
| 143 | +(u'Maori','mi'), |
| 144 | +(u'Egyptian Arabic','arz'), |
| 145 | +(u'Kazakh','kk'), |
| 146 | +(u'Nahuatl','nah'), |
| 147 | +(u'Limburgian','li'), |
| 148 | +(u'Upper Sorbian','hsb'), |
| 149 | +(u'Gilaki','glk'), |
| 150 | +(u'Corsican','co'), |
| 151 | +(u'Gan','gan'), |
| 152 | +(u'Amharic','am'), |
| 153 | +(u'Mongolian','mn'), |
| 154 | +(u'Interlingua','ia'), |
| 155 | +(u'Central Bicolano','bcl'), |
| 156 | +(u'Võro','fiu-vro'), |
| 157 | +(u'Dutch Low Saxon','nds-nl'), |
| 158 | +(u'Faroese','fo'), |
| 159 | +(u'Turkmen','tk'), |
| 160 | +(u'Scots','sco'), |
| 161 | +(u'West Flemish','vls'), |
| 162 | +(u'Sinhalese','si'), |
| 163 | +(u'Sanskrit','sa'), |
| 164 | +(u'Bavarian','bar'), |
| 165 | +(u'Burmese','my'), |
| 166 | +(u'Manx','gv'), |
| 167 | +(u'Divehi','dv'), |
| 168 | +(u'Norman','nrm'), |
| 169 | +(u'Pangasinan','pag'), |
| 170 | +(u'Romansh','rm'), |
| 171 | +(u'Banyumasan','map-bms'), |
| 172 | +(u'Zazaki','diq'), |
| 173 | +(u'Sorani','ckb'), |
| 174 | +(u'Northern Sami','se'), |
| 175 | +(u'Mazandarani','mzn'), |
| 176 | +(u'Wu','wuu'), |
| 177 | +(u'Uyghur','ug'), |
| 178 | +(u'Friulian','fur'), |
| 179 | +(u'Ligurian','lij'), |
| 180 | +(u'Maltese','mt'), |
| 181 | +(u'Bihari','bh'), |
| 182 | +(u'Novial','nov'), |
| 183 | +(u'Malagasy','mg'), |
| 184 | +(u'Kashubian','csb'), |
| 185 | +(u'Ilokano','ilo'), |
| 186 | +(u'Sardinian','sc'), |
| 187 | +(u'Classical Chinese','zh-classical'), |
| 188 | +(u'Khmer','km'), |
| 189 | +(u'Ladino','lad'), |
| 190 | +(u'Pali','pi'), |
| 191 | +(u'Anglo-Saxon','ang'), |
| 192 | +(u'Zamboanga Chavacano','cbk-zam'), |
| 193 | +(u'Tibetan','bo'), |
| 194 | +(u'Fiji Hindi','hif'), |
| 195 | +(u'Franco-Provençal','frp'), |
| 196 | +(u'Arpitan','frp'), |
| 197 | +(u'Hakka','hak'), |
| 198 | +(u'Cornish','kw'), |
| 199 | +(u'Punjabi','pa'), |
| 200 | +(u'Pashto','ps'), |
| 201 | +(u'Kalmyk','xal'), |
| 202 | +(u'Silesian','szl'), |
| 203 | +(u'Pennsylvania German','pdc'), |
| 204 | +(u'Hawaiian','haw'), |
| 205 | +(u'Saterland Frisian','stq'), |
| 206 | +(u'Interlingue','ie'), |
| 207 | +(u'Navajo','nv'), |
| 208 | +(u'Fijian','fj'), |
| 209 | +(u'Crimean Tatar','crh'), |
| 210 | +(u'Komi','kv'), |
| 211 | +(u'Tongan','to'), |
| 212 | +(u'Acehnese','ace'), |
| 213 | +(u'Somali','so'), |
| 214 | +(u'Erzya','myv'), |
| 215 | +(u'Guarani','gn'), |
| 216 | +(u'Karachay-Balkar','krc'), |
| 217 | +(u'Extremaduran','ext'), |
| 218 | +(u'Lingala','ln'), |
| 219 | +(u'Kirghiz','ky'), |
| 220 | +(u'Meadow Mari','mhr'), |
| 221 | +(u'Assyrian Neo-Aramaic','arc'), |
| 222 | +(u'Emilian-Romagnol','eml'), |
| 223 | +(u'Lojban','jbo'), |
| 224 | +(u'Picard','pcd'), |
| 225 | +(u'Aymara','ay'), |
| 226 | +(u'Wolof','wo'), |
| 227 | +(u'Tumbuka','tum'), |
| 228 | +(u'Kabyle','kab'), |
| 229 | +(u'Bashkir','ba'), |
| 230 | +(u'North Frisian','frr'), |
| 231 | +(u'Tahitian','ty'), |
| 232 | +(u'Tok Pisin','tpi'), |
| 233 | +(u'Papiamentu','pap'), |
| 234 | +(u'Zealandic','zea'), |
| 235 | +(u'Sranan','srn'), |
| 236 | +(u'Greenlandic','kl'), |
| 237 | +(u'Udmurt','udm'), |
| 238 | +(u'Chechen','ce'), |
| 239 | +(u'Igbo','ig'), |
| 240 | +(u'Komi-Permyak','koi'), |
| 241 | +(u'Oriya','or'), |
| 242 | +(u'Lower Sorbian','dsb'), |
| 243 | +(u'Kongo','kg'), |
| 244 | +(u'Lao','lo'), |
| 245 | +(u'Abkhazian','ab'), |
| 246 | +(u'Moksha','mdf'), |
| 247 | +(u'Romani','rmy'), |
| 248 | +(u'Hill Mari','mrj'), |
| 249 | +(u'Banjar','bjn'), |
| 250 | +(u'Old Church Slavonic','cu'), |
| 251 | +(u'Mirandese','mwl'), |
| 252 | +(u'Karakalpak','kaa'), |
| 253 | +(u'Samoan','sm'), |
| 254 | +(u'Moldovan','mo'), |
| 255 | +(u'Tetum','tet'), |
| 256 | +(u'Avar','av'), |
| 257 | +(u'Kashmiri','ks'), |
| 258 | +(u'Gothic','got'), |
| 259 | +(u'Sindhi','sd'), |
| 260 | +(u'Bambara','bm'), |
| 261 | +(u'Nauruan','na'), |
| 262 | +(u'Norfolk','pih'), |
| 263 | +(u'Pontic','pnt'), |
| 264 | +(u'Inuktitut','iu'), |
| 265 | +(u'Inupiak','ik'), |
| 266 | +(u'Bislama','bi'), |
| 267 | +(u'Cherokee','chr'), |
| 268 | +(u'Assamese','as'), |
| 269 | +(u'Min Dong','cdo'), |
| 270 | +(u'Ewe','ee'), |
| 271 | +(u'Swati','ss'), |
| 272 | +(u'Oromo','om'), |
| 273 | +(u'Zhuang','za'), |
| 274 | +(u'Zulu','zu'), |
| 275 | +(u'Tigrinya','ti'), |
| 276 | +(u'Venda','ve'), |
| 277 | +(u'Tsonga','ts'), |
| 278 | +(u'Hausa','ha'), |
| 279 | +(u'Dzongkha','dz'), |
| 280 | +(u'Sango','sg'), |
| 281 | +(u'Chamorro','ch'), |
| 282 | +(u'Cree','cr'), |
| 283 | +(u'Xhosa','xh'), |
| 284 | +(u'Akan','ak'), |
| 285 | +(u'Sesotho','st'), |
| 286 | +(u'Kinyarwanda','rw'), |
| 287 | +(u'Tswana','tn'), |
| 288 | +(u'Kikuyu','ki'), |
| 289 | +(u'Buryat','bxr'), |
| 290 | +(u'Buginese','bug'), |
| 291 | +(u'Chichewa','ny'), |
| 292 | +(u'Lak','lbe'), |
| 293 | +(u'Twi','tw'), |
| 294 | +(u'Shona','sn'), |
| 295 | +(u'Kirundi','rn'), |
| 296 | +(u'Fula','ff'), |
| 297 | +(u'Cheyenne','chy'), |
| 298 | +(u'Luganda','lg'), |
| 299 | +(u'Ndonga','ng'), |
| 300 | +(u'Sichuan Yi','ii'), |
| 301 | +(u'Choctaw','cho'), |
| 302 | +(u'Marshallese','mh'), |
| 303 | +(u'Afar','aa'), |
| 304 | +(u'Kuanyama','kj'), |
| 305 | +(u'Hiri Motu','ho'), |
| 306 | +(u'Muscogee','mus'), |
| 307 | +(u'Kanuri','kr'), |
| 308 | +(u'Herero','hz'), |
| 309 | +(u'English','en'), |
| 310 | +(u'Deutsch','de'), |
| 311 | +(u'Français','fr'), |
| 312 | +(u'Italiano','it'), |
| 313 | +(u'Polski','pl'), |
| 314 | +(u'日本語','ja'), |
| 315 | +(u'Español','es'), |
| 316 | +(u'Nederlands','nl'), |
| 317 | +(u'Português','pt'), |
| 318 | +(u'Русский','ru'), |
| 319 | +(u'Svenska','sv'), |
| 320 | +(u'中文','zh'), |
| 321 | +(u'Català','ca'), |
| 322 | +(u'Norsk','no'), |
| 323 | +(u'Bokmål','no'), |
| 324 | +(u'Suomi','fi'), |
| 325 | +(u'Українська','uk'), |
| 326 | +(u'Magyar','hu'), |
| 327 | +(u'Čeština','cs'), |
| 328 | +(u'Română','ro'), |
| 329 | +(u'Türkçe','tr'), |
| 330 | +(u'한국어','ko'), |
| 331 | +(u'Tiếng Việt','vi'), |
| 332 | +(u'Dansk','da'), |
| 333 | +(u'العربية','ar'), |
| 334 | +(u'Esperanto','eo'), |
| 335 | +(u'Српски','sr'), |
| 336 | +(u'Srpski','sr'), |
| 337 | +(u'Bahasa Indonesia','id'), |
| 338 | +(u'Lietuvių','lt'), |
| 339 | +(u'Volapük','vo'), |
| 340 | +(u'Slovenčina','sk'), |
| 341 | +(u'עברית','he'), |
| 342 | +(u'Български','bg'), |
| 343 | +(u'فارسی','fa'), |
| 344 | +(u'Slovenščina','sl'), |
| 345 | +(u'Winaray','war'), |
| 346 | +(u'Hrvatski','hr'), |
| 347 | +(u'Eesti','et'), |
| 348 | +(u'Bahasa Melayu','ms'), |
| 349 | +(u'नेपाल भाषा','new'), |
| 350 | +(u'Simple English','simple'), |
| 351 | +(u'Galego','gl'), |
| 352 | +(u'ไทย','th'), |
| 353 | +(u'Armãneashce','roa-rup'), |
| 354 | +(u'Nynorsk','nn'), |
| 355 | +(u'Euskara','eu'), |
| 356 | +(u'हिन्दी','hi'), |
| 357 | +(u'Ελληνικά','el'), |
| 358 | +(u'Krèyol ayisyen','ht'), |
| 359 | +(u'Latina','la'), |
| 360 | +(u'తెలుగు','te'), |
| 361 | +(u'ქართული','ka'), |
| 362 | +(u'Sinugboanong Binisaya','ceb'), |
| 363 | +(u'Македонски','mk'), |
| 364 | +(u'Azərbaycan','az'), |
| 365 | +(u'Tagalog','tl'), |
| 366 | +(u'Brezhoneg','br'), |
| 367 | +(u'Srpskohrvatski','sh'), |
| 368 | +(u'Српскохрватски','sh'), |
| 369 | +(u'मराठी','mr'), |
| 370 | +(u'Lëtzebuergesch','lb'), |
| 371 | +(u'Basa Jawa','jv'), |
| 372 | +(u'Latviešu','lv'), |
| 373 | +(u'Bosanski','bs'), |
| 374 | +(u'Íslenska','is'), |
| 375 | +(u'Cymraeg','cy'), |
| 376 | +(u'Беларуская','be-x-old'), |
| 377 | +(u'тарашкевіца','be-x-old'), |
| 378 | +(u'Piemontèis','pms'), |
| 379 | +(u'Shqip','sq'), |
| 380 | +(u'தமிழ்','ta'), |
| 381 | +(u'ইমার ঠার','bpy'), |
| 382 | +(u'বিষ্ণুপ্রিয়া মণিপুরী','bpy'), |
| 383 | +(u'Беларуская','be'), |
| 384 | +(u'Aragonés','an'), |
| 385 | +(u'Occitan','oc'), |
| 386 | +(u'বাংলা','bn'), |
| 387 | +(u'Kiswahili','sw'), |
| 388 | +(u'Ido','io'), |
| 389 | +(u'Ripoarisch','ksh'), |
| 390 | +(u'Lumbaart','lmo'), |
| 391 | +(u'Frysk','fy'), |
| 392 | +(u'ગુજરાતી','gu'), |
| 393 | +(u'Plattdüütsch','nds'), |
| 394 | +(u'Afrikaans','af'), |
| 395 | +(u'Sicilianu','scn'), |
| 396 | +(u'Runa Simi','qu'), |
| 397 | +(u'Kurdî','ku'), |
| 398 | +(u'كوردی','ku'), |
| 399 | +(u'اردو','ur'), |
| 400 | +(u'Basa Sunda','su'), |
| 401 | +(u'മലയാളം','ml'), |
| 402 | +(u'粵語','zh-yue'), |
| 403 | +(u'Asturianu','ast'), |
| 404 | +(u'Nnapulitano','nap'), |
| 405 | +(u'Žemaitėška','bat-smg'), |
| 406 | +(u'Walon','wa'), |
| 407 | +(u'Чăваш','cv'), |
| 408 | +(u'Gaeilge','ga'), |
| 409 | +(u'Հայերեն','hy'), |
| 410 | +(u'Yorùbá','yo'), |
| 411 | +(u'ಕನ್ನಡ','kn'), |
| 412 | +(u'Тоҷикӣ','tg'), |
| 413 | +(u'Tarandíne','roa-tara'), |
| 414 | +(u'Vèneto','vec'), |
| 415 | +(u'شاہ مکھی پنجابی','pnb'), |
| 416 | +(u'Shāhmukhī Pañjābī','pnb'), |
| 417 | +(u'नेपाली','ne'), |
| 418 | +(u'Gàidhlig','gd'), |
| 419 | +(u'ייִדיש','yi'), |
| 420 | +(u'Bân-lâm-gú','zh-min-nan'), |
| 421 | +(u'O‘zbek','uz'), |
| 422 | +(u'Tatarça','tt'), |
| 423 | +(u'Татарча','tt'), |
| 424 | +(u'Kapampangan','pam'), |
| 425 | +(u'Иронау','os'), |
| 426 | +(u'Саха тыла','sah'), |
| 427 | +(u'Saxa Tyla','sah'), |
| 428 | +(u'Alemannisch','als'), |
| 429 | +(u'Māori','mi'), |
| 430 | +(u'مصرى','arz'), |
| 431 | +(u'Maṣrī','arz'), |
| 432 | +(u'Қазақша','kk'), |
| 433 | +(u'Nāhuatl','nah'), |
| 434 | +(u'Limburgs','li'), |
| 435 | +(u'Hornjoserbsce','hsb'), |
| 436 | +(u'گیلکی','glk'), |
| 437 | +(u'Corsu','co'), |
| 438 | +(u'贛語','gan'), |
| 439 | +(u'አማርኛ','am'), |
| 440 | +(u'Монгол','mn'), |
| 441 | +(u'Interlingua','ia'), |
| 442 | +(u'Bikol','bcl'), |
| 443 | +(u'Võro','fiu-vro'), |
| 444 | +(u'Nedersaksisch','nds-nl'), |
| 445 | +(u'Føroyskt','fo'), |
| 446 | +(u'تركمن ','tk'), |
| 447 | +(u'Туркмен','tk'), |
| 448 | +(u'Scots','sco'), |
| 449 | +(u'West-Vlams','vls'), |
| 450 | +(u'සිංහල','si'), |
| 451 | +(u'संस्कृतम्','sa'), |
| 452 | +(u'Boarisch','bar'), |
| 453 | +(u'မ္ရန္မာစာ','my'), #Needs fix |
| 454 | +(u'Gaelg','gv'), |
| 455 | +(u'ދިވެހިބަސް','dv'), |
| 456 | +(u'Nouormand','nrm'), |
| 457 | +(u'Normaund','nrm'), |
| 458 | +(u'Pangasinan','pag'), |
| 459 | +(u'Rumantsch','rm'), |
| 460 | +(u'Basa Banyumasan','map-bms'), |
| 461 | +(u'Zazaki','diq'), |
| 462 | +(u'Soranî','ckb'), |
| 463 | +(u'کوردی','ckb'), |
| 464 | +(u'Sámegiella','se'), |
| 465 | +(u'مَزِروني','mzn'), |
| 466 | +(u'吴语','wuu'), |
| 467 | +(u'Oyghurque','ug'), |
| 468 | +(u'Furlan','fur'), |
| 469 | +(u'Líguru','lij'), |
| 470 | +(u'Malti','mt'), |
| 471 | +(u'भोजपुरी','bh'), |
| 472 | +(u'Novial','nov'), |
| 473 | +(u'Malagasy','mg'), |
| 474 | +(u'Kaszëbsczi','csb'), |
| 475 | +(u'Ilokano','ilo'), |
| 476 | +(u'Sardu','sc'), |
| 477 | +(u'古文','zh-classical'), |
| 478 | +(u'文言文','zh-classical'), |
| 479 | +(u'ភាសាខ្មែរ','km'), |
| 480 | +(u'Dzhudezmo','lad'), |
| 481 | +(u'पाऴि','pi'), |
| 482 | +(u'Englisc','ang'), |
| 483 | +(u'Chavacano de Zamboanga','cbk-zam'), |
| 484 | +(u'བོད་སྐད','bo'), |
| 485 | +(u'Fiji Hindi','hif'), |
| 486 | +(u'Arpitan','frp'), |
| 487 | +(u'Hak-kâ-fa','hak'), |
| 488 | +(u'客家話','hak'), |
| 489 | +(u'Kernewek','kw'), |
| 490 | +(u'Karnuack','kw'), |
| 491 | +(u'ਪੰਜਾਬੀ','pa'), |
| 492 | +(u'پښتو','ps'), |
| 493 | +(u'Хальмг','xal'), |
| 494 | +(u'Ślůnski','szl'), |
| 495 | +(u'Deitsch','pdc'), |
| 496 | +(u'Hawai`i','haw'), |
| 497 | +(u'Seeltersk','stq'), |
| 498 | +(u'Interlingue','ie'), |
| 499 | +(u'Diné bizaad','nv'), |
| 500 | +(u'Na Vosa Vakaviti','fj'), |
| 501 | +(u'Qırımtatarca','crh'), |
| 502 | +(u'Коми','kv'), |
| 503 | +(u'faka Tonga','to'), |
| 504 | +(u'Bahsa Acèh','ace'), |
| 505 | +(u'Soomaaliga','so'), |
| 506 | +(u'Эрзянь','myv'), |
| 507 | +(u'Erzjanj Kelj','myv'), |
| 508 | +(u"Avañe'ẽ",'gn'), |
| 509 | +(u'Къарачай-Малкъар','krc'), |
| 510 | +(u'Qarachay-Malqar','krc'), |
| 511 | +(u'Estremeñu','ext'), |
| 512 | +(u'Lingala','ln'), |
| 513 | +(u'Кыргызча','ky'), |
| 514 | +(u'Олык Марий','mhr'), |
| 515 | +(u'Olyk Marij','mhr'), |
| 516 | +(u'ܐܪܡܝܐ','arc'), |
| 517 | +(u'Emiliàn e rumagnòl','eml'), |
| 518 | +(u'Lojban','jbo'), |
| 519 | +(u'Picard','pcd'), |
| 520 | +(u'Aymar','ay'), |
| 521 | +(u'Wolof','wo'), |
| 522 | +(u'chiTumbuka','tum'), |
| 523 | +(u'Taqbaylit','kab'), |
| 524 | +(u'Башҡорт','ba'), |
| 525 | +(u'Frasch','frr'), |
| 526 | +(u'Reo Mā`ohi','ty'), |
| 527 | +(u'Tok Pisin','tpi'), |
| 528 | +(u'Papiamentu','pap'), |
| 529 | +(u'Zeêuws','zea'), |
| 530 | +(u'Sranantongo','srn'), |
| 531 | +(u'Kalaallisut','kl'), |
| 532 | +(u'Удмурт кыл','udm'), |
| 533 | +(u'Нохчийн','ce'), |
| 534 | +(u'Igbo','ig'), |
| 535 | +(u'Перем Коми','koi'), |
| 536 | +(u'Perem Komi','koi'), |
| 537 | +(u'ଓଡ଼ିଆ','or'), |
| 538 | +(u'Dolnoserbski','dsb'), |
| 539 | +(u'KiKongo','kg'), |
| 540 | +(u'ລາວ','lo'), |
| 541 | +(u'Аҧсуа','ab'), |
| 542 | +(u'Мокшень','mdf'), |
| 543 | +(u'Mokshanj Kälj','mdf'), |
| 544 | +(u'romani - रोमानी','rmy'), |
| 545 | +(u'Кырык Мары','mrj'), |
| 546 | +(u'Kyryk Mary','mrj'), |
| 547 | +(u'Bahasa Banjar','bjn'), |
| 548 | +(u'Словѣньскъ','cu'), |
| 549 | +(u'Páigina Percipal','mwl'), |
| 550 | +(u'Qaraqalpaqsha','kaa'), |
| 551 | +(u'Gagana Samoa','sm'), |
| 552 | +(u'Молдовеняскэ','mo'), |
| 553 | +(u'Tetun','tet'), |
| 554 | +(u'Авар','av'), |
| 555 | +(u'कश्मीरी','ks'), |
| 556 | +(u'كشميري','ks'), |
| 557 | +(u'𐌲𐌿𐍄𐌹𐍃𐌺','got'), #Needs fix |
| 558 | +(u'سنڌي، سندھی ، सिन्ध','sd'), |
| 559 | +(u'Bamanankan','bm'), |
| 560 | +(u'dorerin Naoero','na'), |
| 561 | +(u'Norfuk','pih'), |
| 562 | +(u'Ποντιακά','pnt'), |
| 563 | +(u'ᐃᓄᒃᑎᑐᑦ','iu'), |
| 564 | +(u'Iñupiak','ik'), |
| 565 | +(u'Bislama','bi'), |
| 566 | +(u'ᏣᎳᎩ','chr'), |
| 567 | +(u'অসমীয়া','as'), |
| 568 | +(u'Mìng-dĕ̤ng-ngṳ̄','cdo'), |
| 569 | +(u'Eʋegbe','ee'), |
| 570 | +(u'SiSwati','ss'), |
| 571 | +(u'Oromoo','om'), |
| 572 | +(u'Cuengh','za'), |
| 573 | +(u'isiZulu','zu'), |
| 574 | +(u'ትግርኛ','ti'), |
| 575 | +(u'Tshivenda','ve'), |
| 576 | +(u'Xitsonga','ts'), |
| 577 | +(u'هَوُسَ','ha'), |
| 578 | +(u'ཇོང་ཁ','dz'), |
| 579 | +(u'Sängö','sg'), |
| 580 | +(u'Chamoru','ch'), |
| 581 | +(u'Nehiyaw','cr'), |
| 582 | +(u'isiXhosa','xh'), |
| 583 | +(u'Akana','ak'), |
| 584 | +(u'Sesotho','st'), |
| 585 | +(u'Ikinyarwanda','rw'), |
| 586 | +(u'Setswana','tn'), |
| 587 | +(u'Gĩkũyũ','ki'), |
| 588 | +(u'Буряад','bxr'), |
| 589 | +(u'Basa Ugi','bug'), |
| 590 | +(u'Chi-Chewa','ny'), |
| 591 | +(u'Лакку','lbe'), |
| 592 | +(u'Twi','tw'), |
| 593 | +(u'chiShona','sn'), |
| 594 | +(u'Kirundi','rn'), |
| 595 | +(u'Fulfulde','ff'), |
| 596 | +(u'Tsetsêhestâhese','chy'), |
| 597 | +(u'Luganda','lg'), |
| 598 | +(u'Oshiwambo','ng'), |
| 599 | +(u'ꆇꉙ','ii'), |
| 600 | +(u'Choctaw','cho'), |
| 601 | +(u'Ebon','mh'), |
| 602 | +(u'Afar','aa'), |
| 603 | +(u'Kuanyama','kj'), |
| 604 | +(u'Hiri Motu','ho'), |
| 605 | +(u'Muskogee','mus'), |
| 606 | +(u'Kanuri','kr'), |
| 607 | +(u'Otsiherero','hz'), |
| 608 | +]) |
\ No newline at end of file |
Index: trunk/tools/editor_trends/utils/dump_downloader.py |
— | — | @@ -46,7 +46,7 @@ |
47 | 47 | return - 1 |
48 | 48 | |
49 | 49 | |
50 | | -def download_wp_dump(domain, path, filename, location, filemode, pbar): |
| 50 | +def download_wiki_file(domain, path, filename, location, filemode, pbar): |
51 | 51 | ''' |
52 | 52 | This is a very simple replacement for wget and curl because Windows does |
53 | 53 | support these tools. |
— | — | @@ -57,10 +57,13 @@ |
58 | 58 | @pbar is an instance of progressbar.ProgressBar() |
59 | 59 | ''' |
60 | 60 | chunk = 4096 |
| 61 | + result = utils.check_file_exists(location, '') |
| 62 | + if result == False: |
| 63 | + utils.create_directory(os.path.join(location)) |
61 | 64 | if filemode == 'w': |
62 | | - fh = utils.open_txt_file(location, filename, filemode, settings.ENCODING) |
| 65 | + fh = utils.create_txt_filehandle(location, filename, filemode, settings.ENCODING) |
63 | 66 | else: |
64 | | - fh = utils.open_binary_file(location, filename, filemode) |
| 67 | + fh = utils.create_binary_filehandle(location, filename, 'wb') |
65 | 68 | |
66 | 69 | filesize = determine_remote_filesize(domain, path + filename) |
67 | 70 | |
— | — | @@ -73,9 +76,12 @@ |
74 | 77 | pbar = progressbar.ProgressBar(widgets=widgets,maxval=filesize).start() |
75 | 78 | else: |
76 | 79 | pbar = False |
77 | | - |
78 | | - req = urllib2.Request(domain + path + filename) |
| 80 | + |
79 | 81 | try: |
| 82 | + if filename.endswith('json'): |
| 83 | + req = urllib2.Request(domain + path) |
| 84 | + else: |
| 85 | + req = urllib2.Request(domain + path + filename) |
80 | 86 | response = urllib2.urlopen(req) |
81 | 87 | while True: |
82 | 88 | data = response.read(chunk) |