Index: trunk/tools/editor_trends/manage.py |
— | — | @@ -46,8 +46,12 @@ |
47 | 47 | from etl import transformer |
48 | 48 | from etl import exporter |
49 | 49 | |
| 50 | +datasets = {'forward': 'generate_cohort_dataset_forward', |
| 51 | + 'backward': 'generate_cohort_dataset_backward', |
| 52 | + 'backward_custom': 'generate_cohort_dataset_backward_custom', |
| 53 | + 'wide': 'generate_wide_editor_dataset', |
| 54 | + } |
50 | 55 | |
51 | | - |
52 | 56 | class Timer(object): |
53 | 57 | def __init__(self): |
54 | 58 | self.t0 = datetime.datetime.now() |
— | — | @@ -102,26 +106,31 @@ |
103 | 107 | else: |
104 | 108 | return namespaces |
105 | 109 | |
| 110 | + |
106 | 111 | def write_message_to_log(logger, args, message=None, verb=None, **kwargs): |
107 | 112 | function = get_value(args, 'func') |
108 | | - logger.debug('Starting %s task' % function.func_name) |
| 113 | + logger.debug('%s\tStarting %s task' % (datetime.datetime.now(), function.func_name)) |
109 | 114 | if message: |
110 | | - logger.debug(message) |
| 115 | + logger.debug('%s\t%s' % (datetime.datetime.now(), message)) |
111 | 116 | |
112 | 117 | max_length = max([len(kw) for kw in kwargs]) |
113 | | - #max_tab = max_length / 4 |
| 118 | + max_tabs = max_length // settings.tab_width |
| 119 | + res = max_length % settings.tab_width |
| 120 | + if res > 0: |
| 121 | + max_tabs += 1 |
| 122 | + pos = max_tabs * settings.tab_width |
114 | 123 | for kw in kwargs: |
115 | 124 | if verb: |
116 | | - logger.debug('Action: %s\tSetting: %s' % (verb, kwargs[kw])) |
| 125 | + logger.debug('%s\tAction: %s\tSetting: %s' % (datetime.datetime.now(), verb, kwargs[kw])) |
117 | 126 | else: |
118 | | - tabs = (max_length - len(kw)) / 4 |
119 | | - if tabs == 0: |
120 | | - tabs = 1 |
| 127 | + tabs = (pos - len(kw)) // settings.tab_width |
| 128 | + res = len(kw) % settings.tab_width |
| 129 | + if res > 0 or tabs == 0: |
| 130 | + tabs += 1 |
121 | 131 | tabs = ''.join(['\t' for t in xrange(tabs)]) |
122 | | - logger.debug('\tKey: %s%sSetting: %s' % (kw, tabs, kwargs[kw])) |
| 132 | + logger.debug('%s\t\tKey: %s%sSetting: %s' % (datetime.datetime.now(), kw, tabs, kwargs[kw])) |
123 | 133 | |
124 | 134 | |
125 | | - |
126 | 135 | def get_project(args): |
127 | 136 | project = get_value(args, 'project') |
128 | 137 | if project != 'wiki': |
— | — | @@ -142,21 +151,17 @@ |
143 | 152 | config['language_code'] = language_code |
144 | 153 | config['language'] = get_value(args, 'language') |
145 | 154 | config['location'] = os.path.join(location, language_code, project) |
146 | | - #config['chunks'] = os.path.join(config['location'], 'chunks') |
147 | 155 | config['txt'] = os.path.join(config['location'], 'txt') |
148 | 156 | config['sorted'] = os.path.join(config['location'], 'sorted') |
149 | | - config['dbready'] = os.path.join(config['location'], 'dbready') |
150 | 157 | config['project'] = project |
151 | 158 | config['full_project'] = get_projectname(args) |
152 | 159 | config['filename'] = generate_wikidump_filename(language_code, project, args) |
153 | 160 | config['collection'] = get_value(args, 'collection') |
154 | 161 | config['namespaces'] = get_namespaces(args) |
155 | | - config['directories'] = [config['location'], config['txt'], config['sorted'], config['dbready']] |
| 162 | + config['directories'] = [config['location'], config['txt'], config['sorted']] |
156 | 163 | |
157 | 164 | message = 'Settings as generated from the configuration module.' |
158 | 165 | write_message_to_log(logger, args, message, None, **config) |
159 | | - #for c in config: |
160 | | - # logger.debug('Key: %s - Setting: %s' % (c, config[c])) |
161 | 166 | return config |
162 | 167 | |
163 | 168 | |
— | — | @@ -170,10 +175,11 @@ |
171 | 176 | config['Input directory'] = '%s' % kwargs.get('location') |
172 | 177 | config['Output directory'] = '%s and subdirectories' % kwargs.get('location') |
173 | 178 | |
| 179 | + max_length_key = max([len(key) for key in config.keys()]) |
174 | 180 | message = 'Final settings after parsing command line arguments:' |
175 | 181 | write_message_to_log(logger, args, message, None, **config) |
176 | 182 | for c in config: |
177 | | - print '%s\t%s' % (c, config[c]) |
| 183 | + print '%s: %s' % (c.rjust(max_length_key), config[c]) |
178 | 184 | |
179 | 185 | |
180 | 186 | def dump_downloader_launcher(args, logger, **kwargs): |
— | — | @@ -244,10 +250,8 @@ |
245 | 251 | location = kwargs.pop('location') |
246 | 252 | input = os.path.join(location, 'txt') |
247 | 253 | output = os.path.join(location, 'sorted') |
248 | | - final_output = os.path.join(location, 'dbready') |
249 | | - write_message_to_log(logger, args, location=location, input=input, output=output, final_output=final_output) |
| 254 | + write_message_to_log(logger, args, location=location, input=input, output=output) |
250 | 255 | sort.mergesort_launcher(input, output) |
251 | | - #loader.mergesort_external_launcher(output, final_output) |
252 | 256 | timer.elapsed() |
253 | 257 | |
254 | 258 | |
— | — | @@ -264,7 +268,6 @@ |
265 | 269 | write_message_to_log(logger, args, verb='Storing', location=location, input=input, project=project, collection=collection) |
266 | 270 | store.launcher(input, project, collection) |
267 | 271 | cnt_editors = db.count_records(project, collection) |
268 | | - #assert num_editors == cnt_editors |
269 | 272 | timer.elapsed() |
270 | 273 | |
271 | 274 | |
— | — | @@ -282,6 +285,7 @@ |
283 | 286 | def debug_launcher(args, logger, **kwargs): |
284 | 287 | pass |
285 | 288 | |
| 289 | + |
286 | 290 | def exporter_launcher(args, logger, **kwargs): |
287 | 291 | print 'Start exporting dataset' |
288 | 292 | timer = Timer() |
— | — | @@ -291,6 +295,7 @@ |
292 | 296 | targets = targets.split(',') |
293 | 297 | for target in targets: |
294 | 298 | write_message_to_log(logger, args, verb='Exporting', target=target, dbname=dbname, collection=collection) |
| 299 | + target = datasets[target] |
295 | 300 | exporter.dataset_launcher(dbname, collection, target) |
296 | 301 | timer.elapsed() |
297 | 302 | |
— | — | @@ -309,8 +314,9 @@ |
310 | 315 | write_message_to_log(logger, args, verb='Deleting', file=file) |
311 | 316 | utils.delete_file(settings.binary_location, file) |
312 | 317 | |
| 318 | + |
313 | 319 | def all_launcher(args, logger, **kwargs): |
314 | | - print 'all_launcher' |
| 320 | + print 'The entire data processing chain has been called, this will take a couple of hours (at least) to complete.' |
315 | 321 | timer = Timer() |
316 | 322 | full_project = kwargs.get('full_project', None) |
317 | 323 | message = 'Start of building %s dataset.' % full_project |
— | — | @@ -323,9 +329,6 @@ |
324 | 330 | if clean: |
325 | 331 | cleanup(logger, args, **kwargs) |
326 | 332 | |
327 | | - #if format != 'xml': |
328 | | - # ignore = ignore + ',extract' |
329 | | - |
330 | 333 | functions = ordered_dict.OrderedDict(((dump_downloader_launcher, 'download'), |
331 | 334 | #(chunker_launcher, 'split'), |
332 | 335 | (extract_launcher, 'extract'), |
— | — | @@ -337,7 +340,6 @@ |
338 | 341 | for function, callname in functions.iteritems(): |
339 | 342 | if callname not in ignore: |
340 | 343 | function(args, logger, **kwargs) |
341 | | - |
342 | 344 | timer.elapsed() |
343 | 345 | |
344 | 346 | |
— | — | @@ -374,9 +376,9 @@ |
375 | 377 | |
376 | 378 | |
377 | 379 | def about(): |
378 | | - print 'Editor Trends Software is (c) 2010 by the Wikimedia Foundation.' |
| 380 | + print '\nEditor Trends Software is (c) 2010 by the Wikimedia Foundation.' |
379 | 381 | print 'Written by Diederik van Liere (dvanliere@gmail.com).' |
380 | | - print 'This software comes with ABSOLUTELY NO WARRANTY. This is free software, and you are welcome to distribute it under certain conditions.' |
| 382 | + print 'This software comes with ABSOLUTELY NO WARRANTY.\nThis is free software, and you are welcome to distribute it\nunder certain conditions.' |
381 | 383 | print 'See the README.1ST file for more information.' |
382 | 384 | print '\n' |
383 | 385 | |
— | — | @@ -384,15 +386,11 @@ |
385 | 387 | def main(): |
386 | 388 | default_language = determine_default_language() |
387 | 389 | |
388 | | - datasets = {'forward': 'generate_cohort_dataset_forward', |
389 | | - 'backward': 'generate_cohort_dataset_backward', |
390 | | - 'wide': 'generate_wide_editor_dataset', |
391 | | - } |
392 | | - |
393 | 390 | file_choices = ('stub-meta-history.xml.gz', |
394 | 391 | 'stub-meta-current.xml.gz', |
395 | 392 | 'pages-meta-history.xml.7z', |
396 | | - 'pages-meta-current.xml.bz2') |
| 393 | + 'pages-meta-current.xml.bz2', |
| 394 | + ) |
397 | 395 | |
398 | 396 | |
399 | 397 | parser = ArgumentParser(prog='manage', formatter_class=RawTextHelpFormatter) |
— | — | @@ -418,7 +416,7 @@ |
419 | 417 | parser_create = subparsers.add_parser('extract', help='The store sub command parsers the XML chunk files, extracts the information and stores it in a MongoDB.') |
420 | 418 | parser_create.set_defaults(func=extract_launcher) |
421 | 419 | |
422 | | - parser_sort = subparsers.add_parser('sort', help='By presorting the data, significant processing time reducations are achieved.') |
| 420 | + parser_sort = subparsers.add_parser('sort', help='By presorting the data, significant processing time reductions are achieved.') |
423 | 421 | parser_sort.set_defaults(func=sort_launcher) |
424 | 422 | |
425 | 423 | parser_store = subparsers.add_parser('store', help='The store sub command parsers the XML chunk files, extracts the information and stores it in a MongoDB.') |
— | — | @@ -435,59 +433,75 @@ |
436 | 434 | |
437 | 435 | parser_all = subparsers.add_parser('all', help='The all sub command runs the download, split, store and dataset commands.\n\nWARNING: THIS COULD TAKE DAYS DEPENDING ON THE CONFIGURATION OF YOUR MACHINE AND THE SIZE OF THE WIKIMEDIA DUMP FILE.') |
438 | 436 | parser_all.set_defaults(func=all_launcher) |
439 | | - parser_all.add_argument('-e', '--except', action='store', |
| 437 | + parser_all.add_argument('-e', '--except', |
| 438 | + action='store', |
440 | 439 | help='Should be a list of functions that are to be ignored when executing \'all\'.', |
441 | | - default=[]) |
| 440 | + default=[] |
| 441 | + ) |
442 | 442 | |
443 | | - parser_all.add_argument('-n', '--new', action='store_true', |
| 443 | + parser_all.add_argument('-n', '--new', |
| 444 | + action='store_true', |
444 | 445 | help='This will delete all previous output and starts from scratch. Mostly useful for debugging purposes.', |
445 | | - default=False) |
| 446 | + default=False |
| 447 | + ) |
446 | 448 | |
447 | | - parser.add_argument('-l', '--language', action='store', |
| 449 | + parser.add_argument('-l', '--language', |
| 450 | + action='store', |
448 | 451 | help='Example of valid languages.', |
449 | 452 | choices=supported_languages(), |
450 | | - default=default_language) |
| 453 | + default=default_language |
| 454 | + ) |
451 | 455 | |
452 | | - parser.add_argument('-p', '--project', action='store', |
| 456 | + parser.add_argument('-p', '--project', |
| 457 | + action='store', |
453 | 458 | help='Specify the Wikimedia project that you would like to download', |
454 | 459 | choices=settings.projects.keys(), |
455 | | - default='wiki') |
| 460 | + default='wiki' |
| 461 | + ) |
456 | 462 | |
457 | 463 | parser.add_argument('-c', '--collection', action='store', |
458 | 464 | help='Name of MongoDB collection', |
459 | 465 | default='editors') |
460 | 466 | |
461 | 467 | |
462 | | - parser.add_argument('-o', '--location', action='store', |
| 468 | + parser.add_argument('-o', '--location', |
| 469 | + action='store', |
463 | 470 | help='Indicate where you want to store the downloaded file.', |
464 | 471 | default=settings.input_location |
465 | 472 | ) |
466 | 473 | |
467 | | - parser.add_argument('-ns', '--namespace', action='store', |
| 474 | + parser.add_argument('-ns', '--namespace', |
| 475 | + action='store', |
468 | 476 | help='A list of namespaces to include for analysis.', |
469 | | - default='0') |
| 477 | + default='0' |
| 478 | + ) |
470 | 479 | |
471 | | - #parser.add_argument('-fo', '--format', action='store', |
472 | | - # help='Indicate which format the chunks should be stored. Valid options are xml and txt.', |
473 | | - # default='txt') |
474 | | - |
475 | | - parser.add_argument('-f', '--file', action='store', |
| 480 | + parser.add_argument('-f', '--file', |
| 481 | + action='store', |
476 | 482 | choices=file_choices, |
477 | 483 | help='Indicate which dump you want to download. Valid choices are:\n %s' % ''.join([f + ',\n' for f in file_choices]), |
478 | | - default='stub-meta-history.xml.gz') |
| 484 | + default='stub-meta-history.xml.gz' |
| 485 | + ) |
479 | 486 | |
480 | | - parser.add_argument('-dv', '--dumpversion', action='store', |
| 487 | + parser.add_argument('-dv', '--dumpversion', |
| 488 | + action='store', |
481 | 489 | choices=settings.dumpversions.keys(), |
482 | 490 | help='Indicate the Wikidump version that you are parsing.', |
483 | | - default=settings.dumpversions['0']) |
| 491 | + default=settings.dumpversions['0'] |
| 492 | + ) |
484 | 493 | |
485 | | - parser.add_argument('-d', '--datasets', action='store', |
| 494 | + parser.add_argument('-d', '--datasets', |
| 495 | + action='store', |
486 | 496 | choices=datasets.keys(), |
487 | 497 | help='Indicate what type of data should be exported.', |
488 | | - default=datasets['backward']) |
| 498 | + default='backward' |
| 499 | + ) |
489 | 500 | |
490 | | - parser.add_argument('-prog', '--progress', action='store_true', default=True, |
491 | | - help='Indicate whether you want to have a progressbar.') |
| 501 | + parser.add_argument('-prog', '--progress', |
| 502 | + action='store_true', |
| 503 | + default=True, \ |
| 504 | + help='Indicate whether you want to have a progressbar.' |
| 505 | + ) |
492 | 506 | |
493 | 507 | args = parser.parse_args() |
494 | 508 | #initialize logger |
Index: trunk/tools/editor_trends/analyses/aggregates.py |
— | — | @@ -31,11 +31,6 @@ |
32 | 32 | from utils import messages |
33 | 33 | |
34 | 34 | |
35 | | -class Dataset: |
36 | | - def __init__(self): |
37 | | - pass |
38 | | - |
39 | | - |
40 | 35 | def new_editor_count(editors, dbname, collection, month=12): |
41 | 36 | ''' |
42 | 37 | @month should be an integer in the range of 1-12. |
— | — | @@ -105,6 +100,7 @@ |
106 | 101 | utils.write_dict_to_csv(data, fh, keys, write_key=False, newline=True) |
107 | 102 | fh.close() |
108 | 103 | |
| 104 | + |
109 | 105 | def active_editor_count_launcher(dbname, collection): |
110 | 106 | editors = db.retrieve_distinct_keys(dbname, collection, 'editor') |
111 | 107 | tasks = multiprocessing.JoinableQueue() |
Index: trunk/tools/editor_trends/analyses/cohort_charts.py |
— | — | @@ -25,8 +25,8 @@ |
26 | 26 | settings = configuration.Settings() |
27 | 27 | from utils import utils |
28 | 28 | |
29 | | -def prepare_cohort_dataset(dbname): |
30 | | - dataset = utils.load_object(settings.binary_location, dbname + '_cohort_data.bin') |
| 29 | +def prepare_cohort_dataset(dbname, filename): |
| 30 | + dataset = utils.load_object(settings.binary_location, '%s_%s' % (dbname, filename)) |
31 | 31 | fh = utils.create_txt_filehandle(settings.dataset_location, dbname + '_cohort_data.txt', 'w', settings.encoding) |
32 | 32 | |
33 | 33 | years = dataset.keys() |
Index: trunk/tools/editor_trends/etl/exporter.py |
— | — | @@ -20,8 +20,8 @@ |
21 | 21 | import os |
22 | 22 | import sys |
23 | 23 | import datetime |
| 24 | +import calendar |
24 | 25 | from dateutil.relativedelta import * |
25 | | -import calendar |
26 | 26 | import multiprocessing |
27 | 27 | from Queue import Empty |
28 | 28 | |
— | — | @@ -51,7 +51,13 @@ |
52 | 52 | ''' |
53 | 53 | def __init__(self, var): |
54 | 54 | self.name = var |
| 55 | + self.obs = [] |
55 | 56 | self.stats = ['n', 'avg', 'sd', 'min', 'max'] |
| 57 | + |
| 58 | + def __repr__(self): |
| 59 | + return self.name |
| 60 | + |
| 61 | + def descriptives(self): |
56 | 62 | self.time = shaper.create_datacontainer() |
57 | 63 | self.time = shaper.add_months_to_datacontainer(getattr(self, 'time'), datatype='dict') |
58 | 64 | |
— | — | @@ -59,10 +65,6 @@ |
60 | 66 | setattr(self, var, shaper.create_datacontainer()) |
61 | 67 | setattr(self, var, shaper.add_months_to_datacontainer(getattr(self, var), datatype='list')) |
62 | 68 | |
63 | | - def __repr__(self): |
64 | | - return self.name |
65 | | - |
66 | | - def descriptives(self): |
67 | 69 | for year in self.time: |
68 | 70 | for month in self.time[year]: |
69 | 71 | data = [self.time[year][month][k] for k in self.time[year][month].keys()] |
— | — | @@ -78,8 +80,8 @@ |
79 | 81 | This class acts as a container for the Variable class and has some methods |
80 | 82 | to output the dataset to a csv file. |
81 | 83 | ''' |
82 | | - def __init__(self, vars): |
83 | | - self.name = 'long_dataset.tsv' |
| 84 | + def __init__(self, vars, name): |
| 85 | + self.name = name |
84 | 86 | self.vars = [] |
85 | 87 | for var in vars: |
86 | 88 | setattr(self, var, Variable(var)) |
— | — | @@ -92,8 +94,9 @@ |
93 | 95 | fh.write('_time\t') |
94 | 96 | for var in self.vars: |
95 | 97 | var = getattr(self, var) |
96 | | - for stat in var.stats: |
97 | | - fh.write('%s_%s\t' % (var.name, stat)) |
| 98 | + fh.write('%s\t' % var.name) |
| 99 | + #for stat in var.stats: |
| 100 | + # fh.write('%s_%s\t' % (var.name, stat)) |
98 | 101 | fh.write('\n') |
99 | 102 | |
100 | 103 | def convert_to_longitudinal_data(self, id, obs, vars): |
— | — | @@ -108,32 +111,43 @@ |
109 | 112 | if id not in ds.time[year][m] and obs[var][year][m] > 0: |
110 | 113 | ds.time[year][m][id] = obs[var][year][m] |
111 | 114 | |
112 | | - def write_longitudinal_data(self): |
| 115 | + def write_longitudinal_data(self, write_time=True): |
113 | 116 | fh = utils.create_txt_filehandle(settings.dataset_location, self.name, 'w', settings.encoding) |
114 | 117 | self.write_headers(fh) |
115 | | - dc = shaper.create_datacontainer() |
116 | | - dc = shaper.add_months_to_datacontainer(dc) |
117 | | - |
118 | 118 | for var in self.vars: |
119 | 119 | var = getattr(self, var) |
120 | | - var.descriptives() |
121 | | - years = dc.keys() |
122 | | - years.sort() |
123 | | - for year in years: |
124 | | - months = dc[year].keys() |
125 | | - months.sort() |
126 | | - for month in months: |
127 | | - d = calendar.monthrange(int(year), int(month))[1] #determines the number of days in a given month/year |
128 | | - date = datetime.date(int(year), int(month), d) |
129 | | - fh.write('%s\t' % date) |
130 | | - for var in self.vars: |
131 | | - var = getattr(self, var) |
132 | | - #data = ['%s_%s\t' % (var.name, getattr(var, stat)[year][month]) for stat in var.stats] |
133 | | - fh.write(''.join(['%s\t' % (getattr(var, stat)[year][month],) for stat in var.stats])) |
134 | | - fh.write('\n') |
| 120 | + for o in var.obs: |
| 121 | + if write_time: |
| 122 | + fh.write('%s\t%s\n' % (o[0], o[1])) |
| 123 | + else: |
| 124 | + fh.write('%s\n' % (o[1])) |
135 | 125 | fh.close() |
136 | 126 | |
| 127 | +# windows = create_windows() |
| 128 | +# dc = shaper.create_datacontainer() |
| 129 | +# dc = shaper.add_months_to_datacontainer(dc, windows) |
| 130 | +# |
| 131 | +## for var in self.vars: |
| 132 | +## var = getattr(self, var) |
| 133 | +## var.descriptives() |
| 134 | +# years = dc.keys() |
| 135 | +# years.sort() |
| 136 | +# for year in years: |
| 137 | +# months = dc[year].keys() |
| 138 | +# months.sort() |
| 139 | +# for month in months: |
| 140 | +# d = calendar.monthrange(int(year), int(month))[1] #determines the number of days in a given month/year |
| 141 | +# date = datetime.date(int(year), int(month), d) |
| 142 | +# fh.write('%s\t' % date) |
| 143 | +# for var in self.vars: |
| 144 | +# var = getattr(self, var) |
| 145 | +# #data = ['%s_%s\t' % (var.name, getattr(var, stat)[year][month]) for stat in var.stats] |
| 146 | +# fh.write(''.join([ % s\t])) |
| 147 | +# #fh.write(''.join(['%s\t' % (getattr(var, stat)[year][month],) for stat in var.stats])) |
| 148 | +# fh.write('\n') |
137 | 149 | |
| 150 | + |
| 151 | + |
138 | 152 | def expand_edits(edits): |
139 | 153 | data = [] |
140 | 154 | for edit in edits: |
— | — | @@ -201,110 +215,113 @@ |
202 | 216 | return windows |
203 | 217 | |
204 | 218 | |
205 | | -#def generate_cohort_dataset_old(tasks, dbname, collection, **kwargs): |
206 | | -# mongo = db.init_mongo_db(dbname) |
207 | | -# editors = mongo[collection + '_dataset'] |
208 | | -# windows = create_windows() |
209 | | -# data = shaper.create_datacontainer('dict') |
210 | | -# data = shaper.add_windows_to_datacontainer(data, windows) |
211 | | -# |
212 | | -# while True: |
213 | | -# id = tasks.get(block=False) |
214 | | -# tasks.task_done() |
215 | | -# if id == None: |
216 | | -# break |
217 | | -# obs = editors.find_one({'editor': id}, {'first_edit': 1, 'final_edit': 1}) |
218 | | -# |
219 | | -# first_edit = obs['first_edit'] |
220 | | -# last_edit = obs['final_edit'] |
221 | | -# editor_dt = relativedelta(last_edit, first_edit) |
222 | | -# editor_dt = (editor_dt.years * 12) + editor_dt.months |
223 | | -# edits = [] |
224 | | -# for year in xrange(2001, datetime.datetime.now().year + 1): |
225 | | -# if first_edit.year > year or last_edit.year < year: |
226 | | -# continue |
227 | | -# window_end = datetime.datetime(year, 12, 31) |
228 | | -# for window in windows: |
229 | | -# window_start = window_end - relativedelta(months=window) |
230 | | -# if window_start < datetime.datetime(2001, 1, 1): |
231 | | -# window_start = datetime.datetime(2001, 1, 1) |
232 | | -# |
233 | | -# if editor_dt > 11: |
234 | | -# if date_falls_in_window(window_start, window_end, first_edit): |
235 | | -# edits.append(window) |
236 | | -# elif window > editor_dt: |
237 | | -# data[year][window] += 1 |
238 | | -# break |
239 | | -# |
240 | | -# if edits != []: |
241 | | -# w = min(edits) |
242 | | -# data[year][w] += 1 |
243 | | -# edits = [] |
244 | | -# |
245 | | -# |
246 | | -# print 'Storing data as %s' % os.path.join(settings.binary_location, dbname + '_cohort_data.bin') |
247 | | -# utils.store_object(data, settings.binary_location, dbname + '_cohort_data.bin') |
248 | | -# cohort_charts.prepare_cohort_dataset(dbname) |
249 | 219 | |
| 220 | +def diff_month(d1, d2): |
| 221 | + return (d1.year - d2.year) * 12 + d1.month - d2.month |
250 | 222 | |
251 | 223 | |
| 224 | +def generate_cohort_dataset_raw(tasks, dbname, collection): |
| 225 | + mongo = db.init_mongo_db(dbname) |
| 226 | + editors = mongo['%s%s' % (collection, '_dataset')] |
| 227 | + windows = create_windows() |
| 228 | + data = shaper.create_datacontainer('dict') |
| 229 | + final_year = datetime.datetime.now().year + 1 |
| 230 | + ld = LongDataset(['experience'], '%s_forward_cohort.csv' % dbname) |
| 231 | + while True: |
| 232 | + id = tasks.get(block=False) |
| 233 | + tasks.task_done() |
| 234 | + if id == None: |
| 235 | + break |
| 236 | + obs = editors.find_one({'editor': id}, |
| 237 | + {'new_wikipedian': 1, |
| 238 | + 'monthly_edits': 1, |
| 239 | + 'final_edit':1 |
| 240 | + }) |
252 | 241 | |
253 | | -def generate_cohort_dataset_forward(tasks, dbname, collection, **kwargs): |
| 242 | + new_wikipedian = obs['new_wikipedian'] |
| 243 | + last_edit = obs['final_edit'] |
| 244 | + dt = diff_month(last_edit, new_wikipedian) |
| 245 | + day = calendar.monthrange(new_wikipedian.year, new_wikipedian.month)[1] |
| 246 | + tenth_edit = datetime.date(new_wikipedian.year, new_wikipedian.month, day) |
| 247 | + ld.experience.obs.append([tenth_edit, dt]) |
| 248 | + |
| 249 | + ld.write_longitudinal_data() |
| 250 | + |
| 251 | +def generate_cohort_dataset_forward(tasks, dbname, collection): |
254 | 252 | mongo = db.init_mongo_db(dbname) |
255 | 253 | editors = mongo[collection + '_dataset'] |
| 254 | + final_year = datetime.datetime.now().year + 1 |
256 | 255 | windows = create_windows() |
257 | 256 | data = shaper.create_datacontainer('dict') |
258 | | - final_year = datetime.datetime.now().year + 1 |
259 | | - m1 = [1, 2, 3, 4, 5, 6] |
260 | | - m2 = [7, 8, 9, 10, 11, 12] |
261 | | - frames = [m1, m2] |
262 | 257 | while True: |
263 | 258 | id = tasks.get(block=False) |
264 | 259 | if id == None: |
265 | 260 | break |
| 261 | + |
266 | 262 | obs = editors.find_one({'editor': id}, {'new_wikipedian': 1, 'monthly_edits': 1, 'final_edit':1}) |
267 | 263 | new_wikipedian = obs['new_wikipedian'] |
| 264 | + year = new_wikipedian.year |
| 265 | + |
268 | 266 | last_edit = obs['final_edit'] |
269 | | - start_year = new_wikipedian.year |
270 | | - last_year = last_edit.year + 1 |
271 | | - if new_wikipedian.month != 1: |
272 | | - continue |
273 | | - for year in xrange(start_year, last_year): |
274 | | - if year not in data[start_year]: |
275 | | - data[start_year][year] = {} |
276 | | - for x, frame in enumerate(frames): |
277 | | - if x not in data[start_year][year]: |
278 | | - data[start_year][year][x] = 0 |
279 | | - if 'n' not in data[start_year][year]: |
280 | | - data[start_year][year]['n'] = 0 |
| 267 | + edits = obs['monthly_edits'] |
281 | 268 | |
282 | | - active = sum([obs['monthly_edits'][str(year)][str(m)] for m in frame]) |
283 | | - data[start_year][year]['n'] += 1 |
284 | | - if active > 0: |
285 | | - data[start_year][year][x] += 1 |
286 | | - filename = '%s_cohort_forward.csv' % dbname |
287 | | - fh = utils.create_txt_filehandle(settings.dataset_location, filename, 'w', settings.encoding) |
288 | | - frames.append('n') |
289 | | - headers = ["%s_%s" % (year, frame[0]) for year in xrange(2001, final_year) for frame in enumerate(frames)] |
290 | | - headers.insert(0, '\t') |
291 | | - utils.write_list_to_csv(headers, fh) |
| 269 | + if new_wikipedian.month not in data[new_wikipedian.year]: |
| 270 | + data[new_wikipedian.year][new_wikipedian.month] = {} |
| 271 | + for i, year in enumerate(xrange(new_wikipedian.year, final_year)): |
| 272 | + months = edits.get(str(year), []) |
| 273 | + if i == 0: |
| 274 | + months = months.keys() |
| 275 | + months = [int(m) for m in months] |
| 276 | + months.sort() |
| 277 | + months = months[new_wikipedian.month - 1:] |
| 278 | + months = [str(m) for m in months] |
| 279 | + for month in months: |
| 280 | + experience = str(i * 12 + int(month)) |
| 281 | + if experience not in data[new_wikipedian.year][new_wikipedian.month]: |
| 282 | + data[new_wikipedian.year][new_wikipedian.month][experience] = 0 |
| 283 | + data[new_wikipedian.year][new_wikipedian.month][experience] += 1 if edits[str(year)][month] > 0 else 0 |
292 | 284 | |
293 | | - for obs_year in data: |
294 | | - obs = '%s\t' % obs_year |
295 | | - for year in xrange(2001, final_year): |
296 | | - values = data[obs_year].get(year, None) |
297 | | - if values != None: |
298 | | - for value in values: |
299 | | - obs = '%s\t%s\t' % (obs, values[value]) |
300 | | - else: |
301 | | - obs = '%s\t.\t.\t.\t' % obs |
302 | | - |
303 | | - obs = '%s\n' % obs |
304 | | - fh.write(obs) |
| 285 | + fh = utils.create_txt_filehandle(settings.dataset_location, '%s_cohort_data_forward.csv' % (dbname), 'w', settings.encoding) |
| 286 | + for year in data: |
| 287 | + for month in data[year]: |
| 288 | + obs = data[year][month].keys() |
| 289 | + obs.sort() |
| 290 | + for o in obs: |
| 291 | + utils.write_list_to_csv(['%s-%s' % (month, year), o, data[year][month][o]], fh, recursive=False, newline=True) |
305 | 292 | fh.close() |
306 | 293 | |
307 | 294 | |
| 295 | +def generate_cohort_dataset_backward_custom(tasks, dbname, collection): |
| 296 | + mongo = db.init_mongo_db(dbname) |
| 297 | + editors = mongo[collection + '_dataset'] |
| 298 | + windows = create_windows() |
| 299 | + data = shaper.create_datacontainer('dict') |
| 300 | + data = shaper.add_windows_to_datacontainer(data, windows) |
308 | 301 | |
| 302 | + while True: |
| 303 | + id = tasks.get(block=False) |
| 304 | + tasks.task_done() |
| 305 | + if id == None: |
| 306 | + break |
| 307 | + obs = editors.find_one({'editor': id}, {'first_edit': 1, 'final_edit': 1, 'monthly_edits':1, 'edits_by_year': 1, 'last_edit_by_year': 1}) |
| 308 | + first_edit = obs['first_edit'] |
| 309 | + |
| 310 | + if obs['monthly_edits']['2010']['8'] > 0: |
| 311 | + for year in xrange(2001, datetime.datetime.now().year + 1): |
| 312 | + if obs['edits_by_year'].get(year, 0) > 0: |
| 313 | + last_edit = obs['last_edit_by_year'][year] |
| 314 | + editor_dt = relativedelta(last_edit, first_edit) |
| 315 | + editor_dt = (editor_dt.years * 12) + editor_dt.months |
| 316 | + for w in windows: |
| 317 | + if w >= editor_dt: |
| 318 | + data[int(year)][w] += 1 |
| 319 | + break |
| 320 | + filename = '_august_2010_cohort_data_.bin' |
| 321 | + utils.store_object(data, settings.binary_location, '%s_%s' % (dbname, filename)) |
| 322 | + cohort_charts.prepare_cohort_dataset(dbname, filename) |
| 323 | + |
| 324 | + |
| 325 | + |
309 | 326 | def generate_cohort_dataset_backward(tasks, dbname, collection, **kwargs): |
310 | 327 | mongo = db.init_mongo_db(dbname) |
311 | 328 | editors = mongo[collection + '_dataset'] |
— | — | @@ -334,15 +351,6 @@ |
335 | 352 | cohort_charts.prepare_cohort_dataset(dbname) |
336 | 353 | |
337 | 354 | |
338 | | - |
339 | | - |
340 | | -def date_falls_in_window(window_start, window_end, first_edit): |
341 | | - if first_edit >= window_start and first_edit <= window_end: |
342 | | - return True |
343 | | - else: |
344 | | - return False |
345 | | - |
346 | | - |
347 | 355 | def generate_wide_editor_dataset(tasks, dbname, collection, **kwargs): |
348 | 356 | mongo = db.init_mongo_db(dbname) |
349 | 357 | editors = mongo[collection + '_dataset'] |
Index: trunk/tools/editor_trends/etl/extracter.py |
— | — | @@ -101,13 +101,16 @@ |
102 | 102 | return revisions |
103 | 103 | |
104 | 104 | |
105 | | -def is_article_main_namespace(elem, namespace): |
| 105 | +def verify_article_belongs_namespace(elem, namespaces): |
106 | 106 | ''' |
107 | | - checks whether the article belongs to the main namespace |
| 107 | + @namespaces is a list of namespaces that should be ignored, hence if the |
| 108 | + title of article starts with the namespace then return False else return True |
108 | 109 | ''' |
109 | 110 | title = elem.text |
110 | | - for ns in namespace: |
111 | | - if title.startswith(ns): |
| 111 | + if title == None: |
| 112 | + return False |
| 113 | + for namespace in namespaces: |
| 114 | + if title.startswith(namespace): |
112 | 115 | return False |
113 | 116 | return True |
114 | 117 | |
— | — | @@ -249,7 +252,7 @@ |
250 | 253 | for page in wikitree.parser.read_input(fh): |
251 | 254 | title = page.find('title') |
252 | 255 | total_pages += 1 |
253 | | - if is_article_main_namespace(title, ns): |
| 256 | + if verify_article_belongs_namespace(title, ns): |
254 | 257 | #cElementTree.dump(page) |
255 | 258 | article_id = page.find('id').text |
256 | 259 | revisions = page.findall('revision') |
Index: trunk/tools/editor_trends/etl/shaper.py |
— | — | @@ -1,3 +1,16 @@ |
| 2 | +#!/usr/bin/python |
| 3 | +# -*- coding: utf-8 -*- |
| 4 | +''' |
| 5 | +Copyright (C) 2010 by Diederik van Liere (dvanliere@gmail.com) |
| 6 | +This program is free software; you can redistribute it and/or |
| 7 | +modify it under the terms of the GNU General Public License version 2 |
| 8 | +as published by the Free Software Foundation. |
| 9 | +This program is distributed in the hope that it will be useful, |
| 10 | +but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
| 12 | +See the GNU General Public License for more details, at |
| 13 | +http://www.fsf.org/licenses/gpl.html |
| 14 | +''' |
2 | 15 | |
3 | 16 | |
4 | 17 | __author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ]) |
— | — | @@ -27,7 +40,7 @@ |
28 | 41 | d[i] = 0.0 |
29 | 42 | return d |
30 | 43 | |
31 | | -def create_datacontainer(datatype): |
| 44 | +def create_datacontainer(datatype='dict'): |
32 | 45 | ''' |
33 | 46 | This function initializes an empty dictionary with as key the year (starting |
34 | 47 | 2001 and running through) and as value @datatype, in most cases this will |
Index: trunk/tools/editor_trends/etl/sort.py |
— | — | @@ -162,10 +162,9 @@ |
163 | 163 | |
164 | 164 | if __name__ == '__main__': |
165 | 165 | input = os.path.join(settings.input_location, 'en', 'wiki', 'txt') |
166 | | - intermediate_output = os.path.join(settings.input_location, 'en', 'wiki', 'sorted') |
167 | | - output = os.path.join(settings.input_location, 'en', 'wiki', 'dbready') |
| 166 | + output = os.path.join(settings.input_location, 'en', 'wiki', 'sorted') |
168 | 167 | dbname = 'enwiki' |
169 | 168 | collection = 'editors' |
170 | | - mergesort_launcher(input, intermediate_output) |
| 169 | + mergesort_launcher(input, output) |
171 | 170 | #mergesort_external_launcher(intermediate_output, output) |
172 | 171 | #num_editors = store_editors(output, dbname, collection) |
Index: trunk/tools/editor_trends/config.py |
— | — | @@ -24,15 +24,14 @@ |
25 | 25 | from utils import utils |
26 | 26 | import languages |
27 | 27 | |
| 28 | + |
28 | 29 | def show_choices(settings, attr): |
29 | 30 | choices = getattr(settings, attr).items() |
30 | 31 | choices.sort() |
31 | 32 | choices = ['%s\t%s' % (choice[0], choice[1]) for choice in choices] |
32 | | - #print '\n'.join(choices) |
33 | 33 | return choices |
34 | | - #for choice in choices: |
35 | | - # print '%s\t%s' % (choice[0], choice[1]) |
36 | 34 | |
| 35 | + |
37 | 36 | def create_configuration(settings, args): |
38 | 37 | force = getattr(args, 'force', False) |
39 | 38 | if not os.path.exists('wiki.cfg') or force: |
— | — | @@ -62,9 +61,6 @@ |
63 | 62 | if len(dumpversion) == 0: |
64 | 63 | dumpversion = settings.dumpversions['0'] |
65 | 64 | |
66 | | - |
67 | | - #dumpversion = dumpversion if dumpversion in settings.dumpversions.keys() else args.dumpversion |
68 | | - |
69 | 65 | dumpversion = settings.dumpversions[dumpversion] |
70 | 66 | input_location = input_location if len(input_location) > 0 else settings.input_location |
71 | 67 | working_directory = working_directory if len(working_directory) > 0 else os.getcwd() |
Index: trunk/tools/editor_trends/languages.py |
— | — | @@ -18,595 +18,596 @@ |
19 | 19 | __version__ = '0.1' |
20 | 20 | |
21 | 21 | ''' |
22 | | -This file provides mapper between language name and locale language name and |
23 | | -Wikipedia acronym. |
24 | | -Gothic and Birmese are not yet supported, see rows 450 and 554. |
| 22 | +This file provides mapper between language name and locale language name and |
| 23 | +Wikipedia acronym. |
| 24 | +Gothic and Birmese are not yet supported, see rows 450 and 554. |
25 | 25 | ''' |
26 | 26 | |
27 | 27 | from utils import ordered_dict as odict |
28 | 28 | from utils import utils |
29 | 29 | |
30 | 30 | MAPPING = odict.OrderedDict([ |
31 | | -(u'English','en'), |
32 | | -(u'German','de'), |
33 | | -(u'French','fr'), |
34 | | -(u'Italian','it'), |
35 | | -(u'Polish','pl'), |
36 | | -(u'Japanese','ja'), |
37 | | -(u'Spanish','es'), |
38 | | -(u'Dutch','nl'), |
39 | | -(u'Portuguese','pt'), |
40 | | -(u'Russian','ru'), |
41 | | -(u'Swedish','sv'), |
42 | | -(u'Chinese','zh'), |
43 | | -(u'Catalan','ca'), |
44 | | -(u'Norwegian','no'), |
45 | | -(u'Bokmål','no'), |
46 | | -(u'Finnish','fi'), |
47 | | -(u'Ukrainian','uk'), |
48 | | -(u'Hungarian','hu'), |
49 | | -(u'Czech','cs'), |
50 | | -(u'Romanian','ro'), |
51 | | -(u'Turkish','tr'), |
52 | | -(u'Korean','ko'), |
53 | | -(u'Vietnamese','vi'), |
54 | | -(u'Danish','da'), |
55 | | -(u'Arabic','ar'), |
56 | | -(u'Esperanto','eo'), |
57 | | -(u'Serbian','sr'), |
58 | | -(u'Indonesian','id'), |
59 | | -(u'Lithuanian','lt'), |
60 | | -(u'Volapük','vo'), |
61 | | -(u'Slovak','sk'), |
62 | | -(u'Hebrew','he'), |
63 | | -(u'Bulgarian','bg'), |
64 | | -(u'Persian','fa'), |
65 | | -(u'Slovenian','sl'), |
66 | | -(u'Waray-Waray','war'), |
67 | | -(u'Croatian','hr'), |
68 | | -(u'Estonian','et'), |
69 | | -(u'Malay','ms'), |
70 | | -(u'Newar','new'), |
71 | | -(u'Nepal Bhasa','new'), |
72 | | -(u'Simple English','simple'), |
73 | | -(u'Galician','gl'), |
74 | | -(u'Thai','th'), |
75 | | -(u'Aromanian','roa-rup'), |
76 | | -(u'Nynorsk','nn'), |
77 | | -(u'Basque','eu'), |
78 | | -(u'Hindi','hi'), |
79 | | -(u'Greek','el'), |
80 | | -(u'Haitian','ht'), |
81 | | -(u'Latin','la'), |
82 | | -(u'Telugu','te'), |
83 | | -(u'Georgian','ka'), |
84 | | -(u'Cebuano','ceb'), |
85 | | -(u'Macedonian','mk'), |
86 | | -(u'Azeri','az'), |
87 | | -(u'Tagalog','tl'), |
88 | | -(u'Breton','br'), |
89 | | -(u'Serbo-Croatian','sh'), |
90 | | -(u'Marathi','mr'), |
91 | | -(u'Luxembourgish','lb'), |
92 | | -(u'Javanese','jv'), |
93 | | -(u'Latvian','lv'), |
94 | | -(u'Bosnian','bs'), |
95 | | -(u'Icelandic','is'), |
96 | | -(u'Welsh','cy'), |
97 | | -(u'Belarusian','be-x-old'), |
98 | | -(u'Taraškievica','be-x-old'), |
99 | | -(u'Piedmontese','pms'), |
100 | | -(u'Albanian','sq'), |
101 | | -(u'Tamil','ta'), |
102 | | -(u'Bishnupriya Manipuri','bpy'), |
103 | | -(u'Belarusian','be'), |
104 | | -(u'Aragonese','an'), |
105 | | -(u'Occitan','oc'), |
106 | | -(u'Bengali','bn'), |
107 | | -(u'Swahili','sw'), |
108 | | -(u'Ido','io'), |
109 | | -(u'Ripuarian','ksh'), |
110 | | -(u'Lombard','lmo'), |
111 | | -(u'West Frisian','fy'), |
112 | | -(u'Gujarati','gu'), |
113 | | -(u'Low Saxon','nds'), |
114 | | -(u'Afrikaans','af'), |
115 | | -(u'Sicilian','scn'), |
116 | | -(u'Quechua','qu'), |
117 | | -(u'Kurdish','ku'), |
118 | | -(u'Urdu','ur'), |
119 | | -(u'Sundanese','su'), |
120 | | -(u'Malayalam','ml'), |
121 | | -(u'Cantonese','zh-yue'), |
122 | | -(u'Asturian','ast'), |
123 | | -(u'Neapolitan','nap'), |
124 | | -(u'Samogitian','bat-smg'), |
125 | | -(u'Walloon','wa'), |
126 | | -(u'Chuvash','cv'), |
127 | | -(u'Irish','ga'), |
128 | | -(u'Armenian','hy'), |
129 | | -(u'Yoruba','yo'), |
130 | | -(u'Kannada','kn'), |
131 | | -(u'Tajik','tg'), |
132 | | -(u'Tarantino','roa-tara'), |
133 | | -(u'Venetian','vec'), |
134 | | -(u'Western Panjabi','pnb'), |
135 | | -(u'Nepali','ne'), |
136 | | -(u'Scottish Gaelic','gd'), |
137 | | -(u'Yiddish','yi'), |
138 | | -(u'Min Nan','zh-min-nan'), |
139 | | -(u'Uzbek','uz'), |
140 | | -(u'Tatar','tt'), |
141 | | -(u'Kapampangan','pam'), |
142 | | -(u'Ossetian','os'), |
143 | | -(u'Sakha','sah'), |
144 | | -(u'Alemannic','als'), |
145 | | -(u'Maori','mi'), |
146 | | -(u'Egyptian Arabic','arz'), |
147 | | -(u'Kazakh','kk'), |
148 | | -(u'Nahuatl','nah'), |
149 | | -(u'Limburgian','li'), |
150 | | -(u'Upper Sorbian','hsb'), |
151 | | -(u'Gilaki','glk'), |
152 | | -(u'Corsican','co'), |
153 | | -(u'Gan','gan'), |
154 | | -(u'Amharic','am'), |
155 | | -(u'Mongolian','mn'), |
156 | | -(u'Interlingua','ia'), |
157 | | -(u'Central Bicolano','bcl'), |
158 | | -(u'Võro','fiu-vro'), |
159 | | -(u'Dutch Low Saxon','nds-nl'), |
160 | | -(u'Faroese','fo'), |
161 | | -(u'Turkmen','tk'), |
162 | | -(u'Scots','sco'), |
163 | | -(u'West Flemish','vls'), |
164 | | -(u'Sinhalese','si'), |
165 | | -(u'Sanskrit','sa'), |
166 | | -(u'Bavarian','bar'), |
167 | | -(u'Burmese','my'), |
168 | | -(u'Manx','gv'), |
169 | | -(u'Divehi','dv'), |
170 | | -(u'Norman','nrm'), |
171 | | -(u'Pangasinan','pag'), |
172 | | -(u'Romansh','rm'), |
173 | | -(u'Banyumasan','map-bms'), |
174 | | -(u'Zazaki','diq'), |
175 | | -(u'Sorani','ckb'), |
176 | | -(u'Northern Sami','se'), |
177 | | -(u'Mazandarani','mzn'), |
178 | | -(u'Wu','wuu'), |
179 | | -(u'Uyghur','ug'), |
180 | | -(u'Friulian','fur'), |
181 | | -(u'Ligurian','lij'), |
182 | | -(u'Maltese','mt'), |
183 | | -(u'Bihari','bh'), |
184 | | -(u'Novial','nov'), |
185 | | -(u'Malagasy','mg'), |
186 | | -(u'Kashubian','csb'), |
187 | | -(u'Ilokano','ilo'), |
188 | | -(u'Sardinian','sc'), |
189 | | -(u'Classical Chinese','zh-classical'), |
190 | | -(u'Khmer','km'), |
191 | | -(u'Ladino','lad'), |
192 | | -(u'Pali','pi'), |
193 | | -(u'Anglo-Saxon','ang'), |
194 | | -(u'Zamboanga Chavacano','cbk-zam'), |
195 | | -(u'Tibetan','bo'), |
196 | | -(u'Fiji Hindi','hif'), |
197 | | -(u'Franco-Provençal','frp'), |
198 | | -(u'Arpitan','frp'), |
199 | | -(u'Hakka','hak'), |
200 | | -(u'Cornish','kw'), |
201 | | -(u'Punjabi','pa'), |
202 | | -(u'Pashto','ps'), |
203 | | -(u'Kalmyk','xal'), |
204 | | -(u'Silesian','szl'), |
205 | | -(u'Pennsylvania German','pdc'), |
206 | | -(u'Hawaiian','haw'), |
207 | | -(u'Saterland Frisian','stq'), |
208 | | -(u'Interlingue','ie'), |
209 | | -(u'Navajo','nv'), |
210 | | -(u'Fijian','fj'), |
211 | | -(u'Crimean Tatar','crh'), |
212 | | -(u'Komi','kv'), |
213 | | -(u'Tongan','to'), |
214 | | -(u'Acehnese','ace'), |
215 | | -(u'Somali','so'), |
216 | | -(u'Erzya','myv'), |
217 | | -(u'Guarani','gn'), |
218 | | -(u'Karachay-Balkar','krc'), |
219 | | -(u'Extremaduran','ext'), |
220 | | -(u'Lingala','ln'), |
221 | | -(u'Kirghiz','ky'), |
222 | | -(u'Meadow Mari','mhr'), |
223 | | -(u'Assyrian Neo-Aramaic','arc'), |
224 | | -(u'Emilian-Romagnol','eml'), |
225 | | -(u'Lojban','jbo'), |
226 | | -(u'Picard','pcd'), |
227 | | -(u'Aymara','ay'), |
228 | | -(u'Wolof','wo'), |
229 | | -(u'Tumbuka','tum'), |
230 | | -(u'Kabyle','kab'), |
231 | | -(u'Bashkir','ba'), |
232 | | -(u'North Frisian','frr'), |
233 | | -(u'Tahitian','ty'), |
234 | | -(u'Tok Pisin','tpi'), |
235 | | -(u'Papiamentu','pap'), |
236 | | -(u'Zealandic','zea'), |
237 | | -(u'Sranan','srn'), |
238 | | -(u'Greenlandic','kl'), |
239 | | -(u'Udmurt','udm'), |
240 | | -(u'Chechen','ce'), |
241 | | -(u'Igbo','ig'), |
242 | | -(u'Komi-Permyak','koi'), |
243 | | -(u'Oriya','or'), |
244 | | -(u'Lower Sorbian','dsb'), |
245 | | -(u'Kongo','kg'), |
246 | | -(u'Lao','lo'), |
247 | | -(u'Abkhazian','ab'), |
248 | | -(u'Moksha','mdf'), |
249 | | -(u'Romani','rmy'), |
250 | | -(u'Hill Mari','mrj'), |
251 | | -(u'Banjar','bjn'), |
252 | | -(u'Old Church Slavonic','cu'), |
253 | | -(u'Mirandese','mwl'), |
254 | | -(u'Karakalpak','kaa'), |
255 | | -(u'Samoan','sm'), |
256 | | -(u'Moldovan','mo'), |
257 | | -(u'Tetum','tet'), |
258 | | -(u'Avar','av'), |
259 | | -(u'Kashmiri','ks'), |
260 | | -(u'Gothic','got'), |
261 | | -(u'Sindhi','sd'), |
262 | | -(u'Bambara','bm'), |
263 | | -(u'Nauruan','na'), |
264 | | -(u'Norfolk','pih'), |
265 | | -(u'Pontic','pnt'), |
266 | | -(u'Inuktitut','iu'), |
267 | | -(u'Inupiak','ik'), |
268 | | -(u'Bislama','bi'), |
269 | | -(u'Cherokee','chr'), |
270 | | -(u'Assamese','as'), |
271 | | -(u'Min Dong','cdo'), |
272 | | -(u'Ewe','ee'), |
273 | | -(u'Swati','ss'), |
274 | | -(u'Oromo','om'), |
275 | | -(u'Zhuang','za'), |
276 | | -(u'Zulu','zu'), |
277 | | -(u'Tigrinya','ti'), |
278 | | -(u'Venda','ve'), |
279 | | -(u'Tsonga','ts'), |
280 | | -(u'Hausa','ha'), |
281 | | -(u'Dzongkha','dz'), |
282 | | -(u'Sango','sg'), |
283 | | -(u'Chamorro','ch'), |
284 | | -(u'Cree','cr'), |
285 | | -(u'Xhosa','xh'), |
286 | | -(u'Akan','ak'), |
287 | | -(u'Sesotho','st'), |
288 | | -(u'Kinyarwanda','rw'), |
289 | | -(u'Tswana','tn'), |
290 | | -(u'Kikuyu','ki'), |
291 | | -(u'Buryat','bxr'), |
292 | | -(u'Buginese','bug'), |
293 | | -(u'Chichewa','ny'), |
294 | | -(u'Lak','lbe'), |
295 | | -(u'Twi','tw'), |
296 | | -(u'Shona','sn'), |
297 | | -(u'Kirundi','rn'), |
298 | | -(u'Fula','ff'), |
299 | | -(u'Cheyenne','chy'), |
300 | | -(u'Luganda','lg'), |
301 | | -(u'Ndonga','ng'), |
302 | | -(u'Sichuan Yi','ii'), |
303 | | -(u'Choctaw','cho'), |
304 | | -(u'Marshallese','mh'), |
305 | | -(u'Afar','aa'), |
306 | | -(u'Kuanyama','kj'), |
307 | | -(u'Hiri Motu','ho'), |
308 | | -(u'Muscogee','mus'), |
309 | | -(u'Kanuri','kr'), |
310 | | -(u'Herero','hz'), |
311 | | -(u'English','en'), |
312 | | -(u'Deutsch','de'), |
313 | | -(u'Français','fr'), |
314 | | -(u'Italiano','it'), |
315 | | -(u'Polski','pl'), |
316 | | -(u'日本語','ja'), |
317 | | -(u'Español','es'), |
318 | | -(u'Nederlands','nl'), |
319 | | -(u'Português','pt'), |
320 | | -(u'Русский','ru'), |
321 | | -(u'Svenska','sv'), |
322 | | -(u'中文','zh'), |
323 | | -(u'Català','ca'), |
324 | | -(u'Norsk','no'), |
325 | | -(u'Bokmål','no'), |
326 | | -(u'Suomi','fi'), |
327 | | -(u'Українська','uk'), |
328 | | -(u'Magyar','hu'), |
329 | | -(u'Čeština','cs'), |
330 | | -(u'Română','ro'), |
331 | | -(u'Türkçe','tr'), |
332 | | -(u'한국어','ko'), |
333 | | -(u'Tiếng Việt','vi'), |
334 | | -(u'Dansk','da'), |
335 | | -(u'العربية','ar'), |
336 | | -(u'Esperanto','eo'), |
337 | | -(u'Српски','sr'), |
338 | | -(u'Srpski','sr'), |
339 | | -(u'Bahasa Indonesia','id'), |
340 | | -(u'Lietuvių','lt'), |
341 | | -(u'Volapük','vo'), |
342 | | -(u'Slovenčina','sk'), |
343 | | -(u'עברית','he'), |
344 | | -(u'Български','bg'), |
345 | | -(u'فارسی','fa'), |
346 | | -(u'Slovenščina','sl'), |
347 | | -(u'Winaray','war'), |
348 | | -(u'Hrvatski','hr'), |
349 | | -(u'Eesti','et'), |
350 | | -(u'Bahasa Melayu','ms'), |
351 | | -(u'नेपाल भाषा','new'), |
352 | | -(u'Simple English','simple'), |
353 | | -(u'Galego','gl'), |
354 | | -(u'ไทย','th'), |
355 | | -(u'Armãneashce','roa-rup'), |
356 | | -(u'Nynorsk','nn'), |
357 | | -(u'Euskara','eu'), |
358 | | -(u'हिन्दी','hi'), |
359 | | -(u'Ελληνικά','el'), |
360 | | -(u'Krèyol ayisyen','ht'), |
361 | | -(u'Latina','la'), |
362 | | -(u'తెలుగు','te'), |
363 | | -(u'ქართული','ka'), |
364 | | -(u'Sinugboanong Binisaya','ceb'), |
365 | | -(u'Македонски','mk'), |
366 | | -(u'Azərbaycan','az'), |
367 | | -(u'Tagalog','tl'), |
368 | | -(u'Brezhoneg','br'), |
369 | | -(u'Srpskohrvatski','sh'), |
370 | | -(u'Српскохрватски','sh'), |
371 | | -(u'मराठी','mr'), |
372 | | -(u'Lëtzebuergesch','lb'), |
373 | | -(u'Basa Jawa','jv'), |
374 | | -(u'Latviešu','lv'), |
375 | | -(u'Bosanski','bs'), |
376 | | -(u'Íslenska','is'), |
377 | | -(u'Cymraeg','cy'), |
378 | | -(u'Беларуская','be-x-old'), |
379 | | -(u'тарашкевіца','be-x-old'), |
380 | | -(u'Piemontèis','pms'), |
381 | | -(u'Shqip','sq'), |
382 | | -(u'தமிழ்','ta'), |
383 | | -(u'ইমার ঠার','bpy'), |
384 | | -(u'বিষ্ণুপ্রিয়া মণিপুরী','bpy'), |
385 | | -(u'Беларуская','be'), |
386 | | -(u'Aragonés','an'), |
387 | | -(u'Occitan','oc'), |
388 | | -(u'বাংলা','bn'), |
389 | | -(u'Kiswahili','sw'), |
390 | | -(u'Ido','io'), |
391 | | -(u'Ripoarisch','ksh'), |
392 | | -(u'Lumbaart','lmo'), |
393 | | -(u'Frysk','fy'), |
394 | | -(u'ગુજરાતી','gu'), |
395 | | -(u'Plattdüütsch','nds'), |
396 | | -(u'Afrikaans','af'), |
397 | | -(u'Sicilianu','scn'), |
398 | | -(u'Runa Simi','qu'), |
399 | | -(u'Kurdî','ku'), |
400 | | -(u'كوردی','ku'), |
401 | | -(u'اردو','ur'), |
402 | | -(u'Basa Sunda','su'), |
403 | | -(u'മലയാളം','ml'), |
404 | | -(u'粵語','zh-yue'), |
405 | | -(u'Asturianu','ast'), |
406 | | -(u'Nnapulitano','nap'), |
407 | | -(u'Žemaitėška','bat-smg'), |
408 | | -(u'Walon','wa'), |
409 | | -(u'Чăваш','cv'), |
410 | | -(u'Gaeilge','ga'), |
411 | | -(u'Հայերեն','hy'), |
412 | | -(u'Yorùbá','yo'), |
413 | | -(u'ಕನ್ನಡ','kn'), |
414 | | -(u'Тоҷикӣ','tg'), |
415 | | -(u'Tarandíne','roa-tara'), |
416 | | -(u'Vèneto','vec'), |
417 | | -(u'شاہ مکھی پنجابی','pnb'), |
418 | | -(u'Shāhmukhī Pañjābī','pnb'), |
419 | | -(u'नेपाली','ne'), |
420 | | -(u'Gàidhlig','gd'), |
421 | | -(u'ייִדיש','yi'), |
422 | | -(u'Bân-lâm-gú','zh-min-nan'), |
423 | | -(u'O‘zbek','uz'), |
424 | | -(u'Tatarça','tt'), |
425 | | -(u'Татарча','tt'), |
426 | | -(u'Kapampangan','pam'), |
427 | | -(u'Иронау','os'), |
428 | | -(u'Саха тыла','sah'), |
429 | | -(u'Saxa Tyla','sah'), |
430 | | -(u'Alemannisch','als'), |
431 | | -(u'Māori','mi'), |
432 | | -(u'مصرى','arz'), |
433 | | -(u'Maṣrī','arz'), |
434 | | -(u'Қазақша','kk'), |
435 | | -(u'Nāhuatl','nah'), |
436 | | -(u'Limburgs','li'), |
437 | | -(u'Hornjoserbsce','hsb'), |
438 | | -(u'گیلکی','glk'), |
439 | | -(u'Corsu','co'), |
440 | | -(u'贛語','gan'), |
441 | | -(u'አማርኛ','am'), |
442 | | -(u'Монгол','mn'), |
443 | | -(u'Interlingua','ia'), |
444 | | -(u'Bikol','bcl'), |
445 | | -(u'Võro','fiu-vro'), |
446 | | -(u'Nedersaksisch','nds-nl'), |
447 | | -(u'Føroyskt','fo'), |
448 | | -(u'تركمن ','tk'), |
449 | | -(u'Туркмен','tk'), |
450 | | -(u'Scots','sco'), |
451 | | -(u'West-Vlams','vls'), |
452 | | -(u'සිංහල','si'), |
453 | | -(u'संस्कृतम्','sa'), |
454 | | -(u'Boarisch','bar'), |
455 | | -(u'မ္ရန္မာစာ','my'), #Needs fix |
456 | | -(u'Gaelg','gv'), |
457 | | -(u'ދިވެހިބަސް','dv'), |
458 | | -(u'Nouormand','nrm'), |
459 | | -(u'Normaund','nrm'), |
460 | | -(u'Pangasinan','pag'), |
461 | | -(u'Rumantsch','rm'), |
462 | | -(u'Basa Banyumasan','map-bms'), |
463 | | -(u'Zazaki','diq'), |
464 | | -(u'Soranî','ckb'), |
465 | | -(u'کوردی','ckb'), |
466 | | -(u'Sámegiella','se'), |
467 | | -(u'مَزِروني','mzn'), |
468 | | -(u'吴语','wuu'), |
469 | | -(u'Oyghurque','ug'), |
470 | | -(u'Furlan','fur'), |
471 | | -(u'Líguru','lij'), |
472 | | -(u'Malti','mt'), |
473 | | -(u'भोजपुरी','bh'), |
474 | | -(u'Novial','nov'), |
475 | | -(u'Malagasy','mg'), |
476 | | -(u'Kaszëbsczi','csb'), |
477 | | -(u'Ilokano','ilo'), |
478 | | -(u'Sardu','sc'), |
479 | | -(u'古文','zh-classical'), |
480 | | -(u'文言文','zh-classical'), |
481 | | -(u'ភាសាខ្មែរ','km'), |
482 | | -(u'Dzhudezmo','lad'), |
483 | | -(u'पाऴि','pi'), |
484 | | -(u'Englisc','ang'), |
485 | | -(u'Chavacano de Zamboanga','cbk-zam'), |
486 | | -(u'བོད་སྐད','bo'), |
487 | | -(u'Fiji Hindi','hif'), |
488 | | -(u'Arpitan','frp'), |
489 | | -(u'Hak-kâ-fa','hak'), |
490 | | -(u'客家話','hak'), |
491 | | -(u'Kernewek','kw'), |
492 | | -(u'Karnuack','kw'), |
493 | | -(u'ਪੰਜਾਬੀ','pa'), |
494 | | -(u'پښتو','ps'), |
495 | | -(u'Хальмг','xal'), |
496 | | -(u'Ślůnski','szl'), |
497 | | -(u'Deitsch','pdc'), |
498 | | -(u'Hawai`i','haw'), |
499 | | -(u'Seeltersk','stq'), |
500 | | -(u'Interlingue','ie'), |
501 | | -(u'Diné bizaad','nv'), |
502 | | -(u'Na Vosa Vakaviti','fj'), |
503 | | -(u'Qırımtatarca','crh'), |
504 | | -(u'Коми','kv'), |
505 | | -(u'faka Tonga','to'), |
506 | | -(u'Bahsa Acèh','ace'), |
507 | | -(u'Soomaaliga','so'), |
508 | | -(u'Эрзянь','myv'), |
509 | | -(u'Erzjanj Kelj','myv'), |
510 | | -(u"Avañe'ẽ",'gn'), |
511 | | -(u'Къарачай-Малкъар','krc'), |
512 | | -(u'Qarachay-Malqar','krc'), |
513 | | -(u'Estremeñu','ext'), |
514 | | -(u'Lingala','ln'), |
515 | | -(u'Кыргызча','ky'), |
516 | | -(u'Олык Марий','mhr'), |
517 | | -(u'Olyk Marij','mhr'), |
518 | | -(u'ܐܪܡܝܐ','arc'), |
519 | | -(u'Emiliàn e rumagnòl','eml'), |
520 | | -(u'Lojban','jbo'), |
521 | | -(u'Picard','pcd'), |
522 | | -(u'Aymar','ay'), |
523 | | -(u'Wolof','wo'), |
524 | | -(u'chiTumbuka','tum'), |
525 | | -(u'Taqbaylit','kab'), |
526 | | -(u'Башҡорт','ba'), |
527 | | -(u'Frasch','frr'), |
528 | | -(u'Reo Mā`ohi','ty'), |
529 | | -(u'Tok Pisin','tpi'), |
530 | | -(u'Papiamentu','pap'), |
531 | | -(u'Zeêuws','zea'), |
532 | | -(u'Sranantongo','srn'), |
533 | | -(u'Kalaallisut','kl'), |
534 | | -(u'Удмурт кыл','udm'), |
535 | | -(u'Нохчийн','ce'), |
536 | | -(u'Igbo','ig'), |
537 | | -(u'Перем Коми','koi'), |
538 | | -(u'Perem Komi','koi'), |
539 | | -(u'ଓଡ଼ିଆ','or'), |
540 | | -(u'Dolnoserbski','dsb'), |
541 | | -(u'KiKongo','kg'), |
542 | | -(u'ລາວ','lo'), |
543 | | -(u'Аҧсуа','ab'), |
544 | | -(u'Мокшень','mdf'), |
545 | | -(u'Mokshanj Kälj','mdf'), |
546 | | -(u'romani - रोमानी','rmy'), |
547 | | -(u'Кырык Мары','mrj'), |
548 | | -(u'Kyryk Mary','mrj'), |
549 | | -(u'Bahasa Banjar','bjn'), |
550 | | -(u'Словѣньскъ','cu'), |
551 | | -(u'Páigina Percipal','mwl'), |
552 | | -(u'Qaraqalpaqsha','kaa'), |
553 | | -(u'Gagana Samoa','sm'), |
554 | | -(u'Молдовеняскэ','mo'), |
555 | | -(u'Tetun','tet'), |
556 | | -(u'Авар','av'), |
557 | | -(u'कश्मीरी','ks'), |
558 | | -(u'كشميري','ks'), |
559 | | -(u'𐌲𐌿𐍄𐌹𐍃𐌺','got'), #Needs fix |
560 | | -(u'سنڌي، سندھی ، सिन्ध','sd'), |
561 | | -(u'Bamanankan','bm'), |
562 | | -(u'dorerin Naoero','na'), |
563 | | -(u'Norfuk','pih'), |
564 | | -(u'Ποντιακά','pnt'), |
565 | | -(u'ᐃᓄᒃᑎᑐᑦ','iu'), |
566 | | -(u'Iñupiak','ik'), |
567 | | -(u'Bislama','bi'), |
568 | | -(u'ᏣᎳᎩ','chr'), |
569 | | -(u'অসমীয়া','as'), |
570 | | -(u'Mìng-dĕ̤ng-ngṳ̄','cdo'), |
571 | | -(u'Eʋegbe','ee'), |
572 | | -(u'SiSwati','ss'), |
573 | | -(u'Oromoo','om'), |
574 | | -(u'Cuengh','za'), |
575 | | -(u'isiZulu','zu'), |
576 | | -(u'ትግርኛ','ti'), |
577 | | -(u'Tshivenda','ve'), |
578 | | -(u'Xitsonga','ts'), |
579 | | -(u'هَوُسَ','ha'), |
580 | | -(u'ཇོང་ཁ','dz'), |
581 | | -(u'Sängö','sg'), |
582 | | -(u'Chamoru','ch'), |
583 | | -(u'Nehiyaw','cr'), |
584 | | -(u'isiXhosa','xh'), |
585 | | -(u'Akana','ak'), |
586 | | -(u'Sesotho','st'), |
587 | | -(u'Ikinyarwanda','rw'), |
588 | | -(u'Setswana','tn'), |
589 | | -(u'Gĩkũyũ','ki'), |
590 | | -(u'Буряад','bxr'), |
591 | | -(u'Basa Ugi','bug'), |
592 | | -(u'Chi-Chewa','ny'), |
593 | | -(u'Лакку','lbe'), |
594 | | -(u'Twi','tw'), |
595 | | -(u'chiShona','sn'), |
596 | | -(u'Kirundi','rn'), |
597 | | -(u'Fulfulde','ff'), |
598 | | -(u'Tsetsêhestâhese','chy'), |
599 | | -(u'Luganda','lg'), |
600 | | -(u'Oshiwambo','ng'), |
601 | | -(u'ꆇꉙ','ii'), |
602 | | -(u'Choctaw','cho'), |
603 | | -(u'Ebon','mh'), |
604 | | -(u'Afar','aa'), |
605 | | -(u'Kuanyama','kj'), |
606 | | -(u'Hiri Motu','ho'), |
607 | | -(u'Muskogee','mus'), |
608 | | -(u'Kanuri','kr'), |
609 | | -(u'Otsiherero','hz'), |
| 31 | +(u'English', 'en'), |
| 32 | +(u'German', 'de'), |
| 33 | +(u'French', 'fr'), |
| 34 | +(u'Italian', 'it'), |
| 35 | +(u'Polish', 'pl'), |
| 36 | +(u'Japanese', 'ja'), |
| 37 | +(u'Spanish', 'es'), |
| 38 | +(u'Dutch', 'nl'), |
| 39 | +(u'Portuguese', 'pt'), |
| 40 | +(u'Russian', 'ru'), |
| 41 | +(u'Swedish', 'sv'), |
| 42 | +(u'Chinese', 'zh'), |
| 43 | +(u'Catalan', 'ca'), |
| 44 | +(u'Norwegian', 'no'), |
| 45 | +(u'Bokmål', 'no'), |
| 46 | +(u'Finnish', 'fi'), |
| 47 | +(u'Ukrainian', 'uk'), |
| 48 | +(u'Hungarian', 'hu'), |
| 49 | +(u'Czech', 'cs'), |
| 50 | +(u'Romanian', 'ro'), |
| 51 | +(u'Turkish', 'tr'), |
| 52 | +(u'Korean', 'ko'), |
| 53 | +(u'Vietnamese', 'vi'), |
| 54 | +(u'Danish', 'da'), |
| 55 | +(u'Arabic', 'ar'), |
| 56 | +(u'Esperanto', 'eo'), |
| 57 | +(u'Serbian', 'sr'), |
| 58 | +(u'Indonesian', 'id'), |
| 59 | +(u'Lithuanian', 'lt'), |
| 60 | +(u'Volapük', 'vo'), |
| 61 | +(u'Slovak', 'sk'), |
| 62 | +(u'Hebrew', 'he'), |
| 63 | +(u'Bulgarian', 'bg'), |
| 64 | +(u'Persian', 'fa'), |
| 65 | +(u'Slovenian', 'sl'), |
| 66 | +(u'Waray-Waray', 'war'), |
| 67 | +(u'Croatian', 'hr'), |
| 68 | +(u'Estonian', 'et'), |
| 69 | +(u'Malay', 'ms'), |
| 70 | +(u'Newar', 'new'), |
| 71 | +(u'Nepal Bhasa', 'new'), |
| 72 | +(u'Simple English', 'simple'), |
| 73 | +(u'Galician', 'gl'), |
| 74 | +(u'Thai', 'th'), |
| 75 | +(u'Aromanian', 'roa-rup'), |
| 76 | +(u'Nynorsk', 'nn'), |
| 77 | +(u'Basque', 'eu'), |
| 78 | +(u'Hindi', 'hi'), |
| 79 | +(u'Greek', 'el'), |
| 80 | +(u'Haitian', 'ht'), |
| 81 | +(u'Latin', 'la'), |
| 82 | +(u'Telugu', 'te'), |
| 83 | +(u'Georgian', 'ka'), |
| 84 | +(u'Cebuano', 'ceb'), |
| 85 | +(u'Macedonian', 'mk'), |
| 86 | +(u'Azeri', 'az'), |
| 87 | +(u'Tagalog', 'tl'), |
| 88 | +(u'Breton', 'br'), |
| 89 | +(u'Serbo-Croatian', 'sh'), |
| 90 | +(u'Marathi', 'mr'), |
| 91 | +(u'Luxembourgish', 'lb'), |
| 92 | +(u'Javanese', 'jv'), |
| 93 | +(u'Latvian', 'lv'), |
| 94 | +(u'Bosnian', 'bs'), |
| 95 | +(u'Icelandic', 'is'), |
| 96 | +(u'Welsh', 'cy'), |
| 97 | +(u'Belarusian', 'be-x-old'), |
| 98 | +(u'Taraškievica', 'be-x-old'), |
| 99 | +(u'Piedmontese', 'pms'), |
| 100 | +(u'Albanian', 'sq'), |
| 101 | +(u'Tamil', 'ta'), |
| 102 | +(u'Bishnupriya Manipuri', 'bpy'), |
| 103 | +(u'Belarusian', 'be'), |
| 104 | +(u'Aragonese', 'an'), |
| 105 | +(u'Occitan', 'oc'), |
| 106 | +(u'Bengali', 'bn'), |
| 107 | +(u'Swahili', 'sw'), |
| 108 | +(u'Ido', 'io'), |
| 109 | +(u'Ripuarian', 'ksh'), |
| 110 | +(u'Lombard', 'lmo'), |
| 111 | +(u'West Frisian', 'fy'), |
| 112 | +(u'Gujarati', 'gu'), |
| 113 | +(u'Low Saxon', 'nds'), |
| 114 | +(u'Afrikaans', 'af'), |
| 115 | +(u'Sicilian', 'scn'), |
| 116 | +(u'Quechua', 'qu'), |
| 117 | +(u'Kurdish', 'ku'), |
| 118 | +(u'Urdu', 'ur'), |
| 119 | +(u'Sundanese', 'su'), |
| 120 | +(u'Malayalam', 'ml'), |
| 121 | +(u'Cantonese', 'zh-yue'), |
| 122 | +(u'Asturian', 'ast'), |
| 123 | +(u'Neapolitan', 'nap'), |
| 124 | +(u'Samogitian', 'bat-smg'), |
| 125 | +(u'Walloon', 'wa'), |
| 126 | +(u'Chuvash', 'cv'), |
| 127 | +(u'Irish', 'ga'), |
| 128 | +(u'Armenian', 'hy'), |
| 129 | +(u'Yoruba', 'yo'), |
| 130 | +(u'Kannada', 'kn'), |
| 131 | +(u'Tajik', 'tg'), |
| 132 | +(u'Tarantino', 'roa-tara'), |
| 133 | +(u'Venetian', 'vec'), |
| 134 | +(u'Western Panjabi', 'pnb'), |
| 135 | +(u'Nepali', 'ne'), |
| 136 | +(u'Scottish Gaelic', 'gd'), |
| 137 | +(u'Yiddish', 'yi'), |
| 138 | +(u'Min Nan', 'zh-min-nan'), |
| 139 | +(u'Uzbek', 'uz'), |
| 140 | +(u'Tatar', 'tt'), |
| 141 | +(u'Kapampangan', 'pam'), |
| 142 | +(u'Ossetian', 'os'), |
| 143 | +(u'Sakha', 'sah'), |
| 144 | +(u'Alemannic', 'als'), |
| 145 | +(u'Maori', 'mi'), |
| 146 | +(u'Egyptian Arabic', 'arz'), |
| 147 | +(u'Kazakh', 'kk'), |
| 148 | +(u'Nahuatl', 'nah'), |
| 149 | +(u'Limburgian', 'li'), |
| 150 | +(u'Upper Sorbian', 'hsb'), |
| 151 | +(u'Gilaki', 'glk'), |
| 152 | +(u'Corsican', 'co'), |
| 153 | +(u'Gan', 'gan'), |
| 154 | +(u'Amharic', 'am'), |
| 155 | +(u'Mongolian', 'mn'), |
| 156 | +(u'Interlingua', 'ia'), |
| 157 | +(u'Central Bicolano', 'bcl'), |
| 158 | +(u'Võro', 'fiu-vro'), |
| 159 | +(u'Dutch Low Saxon', 'nds-nl'), |
| 160 | +(u'Faroese', 'fo'), |
| 161 | +(u'Turkmen', 'tk'), |
| 162 | +(u'Scots', 'sco'), |
| 163 | +(u'West Flemish', 'vls'), |
| 164 | +(u'Sinhalese', 'si'), |
| 165 | +(u'Sanskrit', 'sa'), |
| 166 | +(u'Bavarian', 'bar'), |
| 167 | +(u'Burmese', 'my'), |
| 168 | +(u'Manx', 'gv'), |
| 169 | +(u'Divehi', 'dv'), |
| 170 | +(u'Norman', 'nrm'), |
| 171 | +(u'Pangasinan', 'pag'), |
| 172 | +(u'Romansh', 'rm'), |
| 173 | +(u'Banyumasan', 'map-bms'), |
| 174 | +(u'Zazaki', 'diq'), |
| 175 | +(u'Sorani', 'ckb'), |
| 176 | +(u'Northern Sami', 'se'), |
| 177 | +(u'Mazandarani', 'mzn'), |
| 178 | +(u'Wu', 'wuu'), |
| 179 | +(u'Uyghur', 'ug'), |
| 180 | +(u'Friulian', 'fur'), |
| 181 | +(u'Ligurian', 'lij'), |
| 182 | +(u'Maltese', 'mt'), |
| 183 | +(u'Bihari', 'bh'), |
| 184 | +(u'Novial', 'nov'), |
| 185 | +(u'Malagasy', 'mg'), |
| 186 | +(u'Kashubian', 'csb'), |
| 187 | +(u'Ilokano', 'ilo'), |
| 188 | +(u'Sardinian', 'sc'), |
| 189 | +(u'Classical Chinese', 'zh-classical'), |
| 190 | +(u'Khmer', 'km'), |
| 191 | +(u'Ladino', 'lad'), |
| 192 | +(u'Pali', 'pi'), |
| 193 | +(u'Anglo-Saxon', 'ang'), |
| 194 | +(u'Zamboanga Chavacano', 'cbk-zam'), |
| 195 | +(u'Tibetan', 'bo'), |
| 196 | +(u'Fiji Hindi', 'hif'), |
| 197 | +(u'Franco-Provençal', 'frp'), |
| 198 | +(u'Arpitan', 'frp'), |
| 199 | +(u'Hakka', 'hak'), |
| 200 | +(u'Cornish', 'kw'), |
| 201 | +(u'Punjabi', 'pa'), |
| 202 | +(u'Pashto', 'ps'), |
| 203 | +(u'Kalmyk', 'xal'), |
| 204 | +(u'Silesian', 'szl'), |
| 205 | +(u'Pennsylvania German', 'pdc'), |
| 206 | +(u'Hawaiian', 'haw'), |
| 207 | +(u'Saterland Frisian', 'stq'), |
| 208 | +(u'Interlingue', 'ie'), |
| 209 | +(u'Navajo', 'nv'), |
| 210 | +(u'Fijian', 'fj'), |
| 211 | +(u'Crimean Tatar', 'crh'), |
| 212 | +(u'Komi', 'kv'), |
| 213 | +(u'Tongan', 'to'), |
| 214 | +(u'Acehnese', 'ace'), |
| 215 | +(u'Somali', 'so'), |
| 216 | +(u'Erzya', 'myv'), |
| 217 | +(u'Guarani', 'gn'), |
| 218 | +(u'Karachay-Balkar', 'krc'), |
| 219 | +(u'Extremaduran', 'ext'), |
| 220 | +(u'Lingala', 'ln'), |
| 221 | +(u'Kirghiz', 'ky'), |
| 222 | +(u'Meadow Mari', 'mhr'), |
| 223 | +(u'Assyrian Neo-Aramaic', 'arc'), |
| 224 | +(u'Emilian-Romagnol', 'eml'), |
| 225 | +(u'Lojban', 'jbo'), |
| 226 | +(u'Picard', 'pcd'), |
| 227 | +(u'Aymara', 'ay'), |
| 228 | +(u'Wolof', 'wo'), |
| 229 | +(u'Tumbuka', 'tum'), |
| 230 | +(u'Kabyle', 'kab'), |
| 231 | +(u'Bashkir', 'ba'), |
| 232 | +(u'North Frisian', 'frr'), |
| 233 | +(u'Tahitian', 'ty'), |
| 234 | +(u'Tok Pisin', 'tpi'), |
| 235 | +(u'Papiamentu', 'pap'), |
| 236 | +(u'Zealandic', 'zea'), |
| 237 | +(u'Sranan', 'srn'), |
| 238 | +(u'Greenlandic', 'kl'), |
| 239 | +(u'Udmurt', 'udm'), |
| 240 | +(u'Chechen', 'ce'), |
| 241 | +(u'Igbo', 'ig'), |
| 242 | +(u'Komi-Permyak', 'koi'), |
| 243 | +(u'Oriya', 'or'), |
| 244 | +(u'Lower Sorbian', 'dsb'), |
| 245 | +(u'Kongo', 'kg'), |
| 246 | +(u'Lao', 'lo'), |
| 247 | +(u'Abkhazian', 'ab'), |
| 248 | +(u'Moksha', 'mdf'), |
| 249 | +(u'Romani', 'rmy'), |
| 250 | +(u'Hill Mari', 'mrj'), |
| 251 | +(u'Banjar', 'bjn'), |
| 252 | +(u'Old Church Slavonic', 'cu'), |
| 253 | +(u'Mirandese', 'mwl'), |
| 254 | +(u'Karakalpak', 'kaa'), |
| 255 | +(u'Samoan', 'sm'), |
| 256 | +(u'Moldovan', 'mo'), |
| 257 | +(u'Tetum', 'tet'), |
| 258 | +(u'Avar', 'av'), |
| 259 | +(u'Kashmiri', 'ks'), |
| 260 | +(u'Gothic', 'got'), |
| 261 | +(u'Sindhi', 'sd'), |
| 262 | +(u'Bambara', 'bm'), |
| 263 | +(u'Nauruan', 'na'), |
| 264 | +(u'Norfolk', 'pih'), |
| 265 | +(u'Pontic', 'pnt'), |
| 266 | +(u'Inuktitut', 'iu'), |
| 267 | +(u'Inupiak', 'ik'), |
| 268 | +(u'Bislama', 'bi'), |
| 269 | +(u'Cherokee', 'chr'), |
| 270 | +(u'Assamese', 'as'), |
| 271 | +(u'Min Dong', 'cdo'), |
| 272 | +(u'Ewe', 'ee'), |
| 273 | +(u'Swati', 'ss'), |
| 274 | +(u'Oromo', 'om'), |
| 275 | +(u'Zhuang', 'za'), |
| 276 | +(u'Zulu', 'zu'), |
| 277 | +(u'Tigrinya', 'ti'), |
| 278 | +(u'Venda', 've'), |
| 279 | +(u'Tsonga', 'ts'), |
| 280 | +(u'Hausa', 'ha'), |
| 281 | +(u'Dzongkha', 'dz'), |
| 282 | +(u'Sango', 'sg'), |
| 283 | +(u'Chamorro', 'ch'), |
| 284 | +(u'Cree', 'cr'), |
| 285 | +(u'Xhosa', 'xh'), |
| 286 | +(u'Akan', 'ak'), |
| 287 | +(u'Sesotho', 'st'), |
| 288 | +(u'Kinyarwanda', 'rw'), |
| 289 | +(u'Tswana', 'tn'), |
| 290 | +(u'Kikuyu', 'ki'), |
| 291 | +(u'Buryat', 'bxr'), |
| 292 | +(u'Buginese', 'bug'), |
| 293 | +(u'Chichewa', 'ny'), |
| 294 | +(u'Lak', 'lbe'), |
| 295 | +(u'Twi', 'tw'), |
| 296 | +(u'Shona', 'sn'), |
| 297 | +(u'Kirundi', 'rn'), |
| 298 | +(u'Fula', 'ff'), |
| 299 | +(u'Cheyenne', 'chy'), |
| 300 | +(u'Luganda', 'lg'), |
| 301 | +(u'Ndonga', 'ng'), |
| 302 | +(u'Sichuan Yi', 'ii'), |
| 303 | +(u'Choctaw', 'cho'), |
| 304 | +(u'Marshallese', 'mh'), |
| 305 | +(u'Afar', 'aa'), |
| 306 | +(u'Kuanyama', 'kj'), |
| 307 | +(u'Hiri Motu', 'ho'), |
| 308 | +(u'Muscogee', 'mus'), |
| 309 | +(u'Kanuri', 'kr'), |
| 310 | +(u'Herero', 'hz'), |
| 311 | +(u'English', 'en'), |
| 312 | +(u'Deutsch', 'de'), |
| 313 | +(u'Français', 'fr'), |
| 314 | +(u'Italiano', 'it'), |
| 315 | +(u'Polski', 'pl'), |
| 316 | +(u'日本語', 'ja'), |
| 317 | +(u'Español', 'es'), |
| 318 | +(u'Nederlands', 'nl'), |
| 319 | +(u'Português', 'pt'), |
| 320 | +(u'Русский', 'ru'), |
| 321 | +(u'Svenska', 'sv'), |
| 322 | +(u'中文', 'zh'), |
| 323 | +(u'Català', 'ca'), |
| 324 | +(u'Norsk', 'no'), |
| 325 | +(u'Bokmål', 'no'), |
| 326 | +(u'Suomi', 'fi'), |
| 327 | +(u'Українська', 'uk'), |
| 328 | +(u'Magyar', 'hu'), |
| 329 | +(u'Čeština', 'cs'), |
| 330 | +(u'Română', 'ro'), |
| 331 | +(u'Türkçe', 'tr'), |
| 332 | +(u'한국어', 'ko'), |
| 333 | +(u'Tiếng Việt', 'vi'), |
| 334 | +(u'Dansk', 'da'), |
| 335 | +(u'العربية', 'ar'), |
| 336 | +(u'Esperanto', 'eo'), |
| 337 | +(u'Српски', 'sr'), |
| 338 | +(u'Srpski', 'sr'), |
| 339 | +(u'Bahasa Indonesia', 'id'), |
| 340 | +(u'Lietuvių', 'lt'), |
| 341 | +(u'Volapük', 'vo'), |
| 342 | +(u'Slovenčina', 'sk'), |
| 343 | +(u'עברית', 'he'), |
| 344 | +(u'Български', 'bg'), |
| 345 | +(u'فارسی', 'fa'), |
| 346 | +(u'Slovenščina', 'sl'), |
| 347 | +(u'Winaray', 'war'), |
| 348 | +(u'Hrvatski', 'hr'), |
| 349 | +(u'Eesti', 'et'), |
| 350 | +(u'Bahasa Melayu', 'ms'), |
| 351 | +(u'नेपाल भाषा', 'new'), |
| 352 | +(u'Simple English', 'simple'), |
| 353 | +(u'Galego', 'gl'), |
| 354 | +(u'ไทย', 'th'), |
| 355 | +(u'Armãneashce', 'roa-rup'), |
| 356 | +(u'Nynorsk', 'nn'), |
| 357 | +(u'Euskara', 'eu'), |
| 358 | +(u'हिन्दी', 'hi'), |
| 359 | +(u'Ελληνικά', 'el'), |
| 360 | +(u'Krèyol ayisyen', 'ht'), |
| 361 | +(u'Latina', 'la'), |
| 362 | +(u'తెలుగు', 'te'), |
| 363 | +(u'ქართული', 'ka'), |
| 364 | +(u'Sinugboanong Binisaya', 'ceb'), |
| 365 | +(u'Македонски', 'mk'), |
| 366 | +(u'Azərbaycan', 'az'), |
| 367 | +(u'Tagalog', 'tl'), |
| 368 | +(u'Brezhoneg', 'br'), |
| 369 | +(u'Srpskohrvatski', 'sh'), |
| 370 | +(u'Српскохрватски', 'sh'), |
| 371 | +(u'मराठी', 'mr'), |
| 372 | +(u'Lëtzebuergesch', 'lb'), |
| 373 | +(u'Basa Jawa', 'jv'), |
| 374 | +(u'Latviešu', 'lv'), |
| 375 | +(u'Bosanski', 'bs'), |
| 376 | +(u'Íslenska', 'is'), |
| 377 | +(u'Cymraeg', 'cy'), |
| 378 | +(u'Беларуская', 'be-x-old'), |
| 379 | +(u'тарашкевіца', 'be-x-old'), |
| 380 | +(u'Piemontèis', 'pms'), |
| 381 | +(u'Shqip', 'sq'), |
| 382 | +(u'தமிழ்', 'ta'), |
| 383 | +(u'ইমার ঠার', 'bpy'), |
| 384 | +(u'বিষ্ণুপ্রিয়া মণিপুরী', 'bpy'), |
| 385 | +(u'Беларуская', 'be'), |
| 386 | +(u'Aragonés', 'an'), |
| 387 | +(u'Occitan', 'oc'), |
| 388 | +(u'বাংলা', 'bn'), |
| 389 | +(u'Kiswahili', 'sw'), |
| 390 | +(u'Ido', 'io'), |
| 391 | +(u'Ripoarisch', 'ksh'), |
| 392 | +(u'Lumbaart', 'lmo'), |
| 393 | +(u'Frysk', 'fy'), |
| 394 | +(u'ગુજરાતી', 'gu'), |
| 395 | +(u'Plattdüütsch', 'nds'), |
| 396 | +(u'Afrikaans', 'af'), |
| 397 | +(u'Sicilianu', 'scn'), |
| 398 | +(u'Runa Simi', 'qu'), |
| 399 | +(u'Kurdî', 'ku'), |
| 400 | +(u'كوردی', 'ku'), |
| 401 | +(u'اردو', 'ur'), |
| 402 | +(u'Basa Sunda', 'su'), |
| 403 | +(u'മലയാളം', 'ml'), |
| 404 | +(u'粵語', 'zh-yue'), |
| 405 | +(u'Asturianu', 'ast'), |
| 406 | +(u'Nnapulitano', 'nap'), |
| 407 | +(u'Žemaitėška', 'bat-smg'), |
| 408 | +(u'Walon', 'wa'), |
| 409 | +(u'Чăваш', 'cv'), |
| 410 | +(u'Gaeilge', 'ga'), |
| 411 | +(u'Հայերեն', 'hy'), |
| 412 | +(u'Yorùbá', 'yo'), |
| 413 | +(u'ಕನ್ನಡ', 'kn'), |
| 414 | +(u'Тоҷикӣ', 'tg'), |
| 415 | +(u'Tarandíne', 'roa-tara'), |
| 416 | +(u'Vèneto', 'vec'), |
| 417 | +(u'شاہ مکھی پنجابی', 'pnb'), |
| 418 | +(u'Shāhmukhī Pañjābī', 'pnb'), |
| 419 | +(u'नेपाली', 'ne'), |
| 420 | +(u'Gàidhlig', 'gd'), |
| 421 | +(u'ייִדיש', 'yi'), |
| 422 | +(u'Bân-lâm-gú', 'zh-min-nan'), |
| 423 | +(u'O‘zbek', 'uz'), |
| 424 | +(u'Tatarça', 'tt'), |
| 425 | +(u'Татарча', 'tt'), |
| 426 | +(u'Kapampangan', 'pam'), |
| 427 | +(u'Иронау', 'os'), |
| 428 | +(u'Саха тыла', 'sah'), |
| 429 | +(u'Saxa Tyla', 'sah'), |
| 430 | +(u'Alemannisch', 'als'), |
| 431 | +(u'Māori', 'mi'), |
| 432 | +(u'مصرى', 'arz'), |
| 433 | +(u'Maṣrī', 'arz'), |
| 434 | +(u'Қазақша', 'kk'), |
| 435 | +(u'Nāhuatl', 'nah'), |
| 436 | +(u'Limburgs', 'li'), |
| 437 | +(u'Hornjoserbsce', 'hsb'), |
| 438 | +(u'گیلکی', 'glk'), |
| 439 | +(u'Corsu', 'co'), |
| 440 | +(u'贛語', 'gan'), |
| 441 | +(u'አማርኛ', 'am'), |
| 442 | +(u'Монгол', 'mn'), |
| 443 | +(u'Interlingua', 'ia'), |
| 444 | +(u'Bikol', 'bcl'), |
| 445 | +(u'Võro', 'fiu-vro'), |
| 446 | +(u'Nedersaksisch', 'nds-nl'), |
| 447 | +(u'Føroyskt', 'fo'), |
| 448 | +(u'تركمن ', 'tk'), |
| 449 | +(u'Туркмен', 'tk'), |
| 450 | +(u'Scots', 'sco'), |
| 451 | +(u'West-Vlams', 'vls'), |
| 452 | +(u'සිංහල', 'si'), |
| 453 | +(u'संस्कृतम्', 'sa'), |
| 454 | +(u'Boarisch', 'bar'), |
| 455 | +(u'မ္ရန္မာစာ', 'my'), #Needs fix |
| 456 | +(u'Gaelg', 'gv'), |
| 457 | +(u'ދިވެހިބަސް', 'dv'), |
| 458 | +(u'Nouormand', 'nrm'), |
| 459 | +(u'Normaund', 'nrm'), |
| 460 | +(u'Pangasinan', 'pag'), |
| 461 | +(u'Rumantsch', 'rm'), |
| 462 | +(u'Basa Banyumasan', 'map-bms'), |
| 463 | +(u'Zazaki', 'diq'), |
| 464 | +(u'Soranî', 'ckb'), |
| 465 | +(u'کوردی', 'ckb'), |
| 466 | +(u'Sámegiella', 'se'), |
| 467 | +(u'مَزِروني', 'mzn'), |
| 468 | +(u'吴语', 'wuu'), |
| 469 | +(u'Oyghurque', 'ug'), |
| 470 | +(u'Furlan', 'fur'), |
| 471 | +(u'Líguru', 'lij'), |
| 472 | +(u'Malti', 'mt'), |
| 473 | +(u'भोजपुरी', 'bh'), |
| 474 | +(u'Novial', 'nov'), |
| 475 | +(u'Malagasy', 'mg'), |
| 476 | +(u'Kaszëbsczi', 'csb'), |
| 477 | +(u'Ilokano', 'ilo'), |
| 478 | +(u'Sardu', 'sc'), |
| 479 | +(u'古文', 'zh-classical'), |
| 480 | +(u'文言文', 'zh-classical'), |
| 481 | +(u'ភាសាខ្មែរ', 'km'), |
| 482 | +(u'Dzhudezmo', 'lad'), |
| 483 | +(u'पाऴि', 'pi'), |
| 484 | +(u'Englisc', 'ang'), |
| 485 | +(u'Chavacano de Zamboanga', 'cbk-zam'), |
| 486 | +(u'བོད་སྐད', 'bo'), |
| 487 | +(u'Fiji Hindi', 'hif'), |
| 488 | +(u'Arpitan', 'frp'), |
| 489 | +(u'Hak-kâ-fa', 'hak'), |
| 490 | +(u'客家話', 'hak'), |
| 491 | +(u'Kernewek', 'kw'), |
| 492 | +(u'Karnuack', 'kw'), |
| 493 | +(u'ਪੰਜਾਬੀ', 'pa'), |
| 494 | +(u'پښتو', 'ps'), |
| 495 | +(u'Хальмг', 'xal'), |
| 496 | +(u'Ślůnski', 'szl'), |
| 497 | +(u'Deitsch', 'pdc'), |
| 498 | +(u'Hawai`i', 'haw'), |
| 499 | +(u'Seeltersk', 'stq'), |
| 500 | +(u'Interlingue', 'ie'), |
| 501 | +(u'Diné bizaad', 'nv'), |
| 502 | +(u'Na Vosa Vakaviti', 'fj'), |
| 503 | +(u'Qırımtatarca', 'crh'), |
| 504 | +(u'Коми', 'kv'), |
| 505 | +(u'faka Tonga', 'to'), |
| 506 | +(u'Bahsa Acèh', 'ace'), |
| 507 | +(u'Soomaaliga', 'so'), |
| 508 | +(u'Эрзянь', 'myv'), |
| 509 | +(u'Erzjanj Kelj', 'myv'), |
| 510 | +(u"Avañe'ẽ", 'gn'), |
| 511 | +(u'Къарачай-Малкъар', 'krc'), |
| 512 | +(u'Qarachay-Malqar', 'krc'), |
| 513 | +(u'Estremeñu', 'ext'), |
| 514 | +(u'Lingala', 'ln'), |
| 515 | +(u'Кыргызча', 'ky'), |
| 516 | +(u'Олык Марий', 'mhr'), |
| 517 | +(u'Olyk Marij', 'mhr'), |
| 518 | +(u'ܐܪܡܝܐ', 'arc'), |
| 519 | +(u'Emiliàn e rumagnòl', 'eml'), |
| 520 | +(u'Lojban', 'jbo'), |
| 521 | +(u'Picard', 'pcd'), |
| 522 | +(u'Aymar', 'ay'), |
| 523 | +(u'Wolof', 'wo'), |
| 524 | +(u'chiTumbuka', 'tum'), |
| 525 | +(u'Taqbaylit', 'kab'), |
| 526 | +(u'Башҡорт', 'ba'), |
| 527 | +(u'Frasch', 'frr'), |
| 528 | +(u'Reo Mā`ohi', 'ty'), |
| 529 | +(u'Tok Pisin', 'tpi'), |
| 530 | +(u'Papiamentu', 'pap'), |
| 531 | +(u'Zeêuws', 'zea'), |
| 532 | +(u'Sranantongo', 'srn'), |
| 533 | +(u'Kalaallisut', 'kl'), |
| 534 | +(u'Удмурт кыл', 'udm'), |
| 535 | +(u'Нохчийн', 'ce'), |
| 536 | +(u'Igbo', 'ig'), |
| 537 | +(u'Перем Коми', 'koi'), |
| 538 | +(u'Perem Komi', 'koi'), |
| 539 | +(u'ଓଡ଼ିଆ', 'or'), |
| 540 | +(u'Dolnoserbski', 'dsb'), |
| 541 | +(u'KiKongo', 'kg'), |
| 542 | +(u'ລາວ', 'lo'), |
| 543 | +(u'Аҧсуа', 'ab'), |
| 544 | +(u'Мокшень', 'mdf'), |
| 545 | +(u'Mokshanj Kälj', 'mdf'), |
| 546 | +(u'romani - रोमानी', 'rmy'), |
| 547 | +(u'Кырык Мары', 'mrj'), |
| 548 | +(u'Kyryk Mary', 'mrj'), |
| 549 | +(u'Bahasa Banjar', 'bjn'), |
| 550 | +(u'Словѣньскъ', 'cu'), |
| 551 | +(u'Páigina Percipal', 'mwl'), |
| 552 | +(u'Qaraqalpaqsha', 'kaa'), |
| 553 | +(u'Gagana Samoa', 'sm'), |
| 554 | +(u'Молдовеняскэ', 'mo'), |
| 555 | +(u'Tetun', 'tet'), |
| 556 | +(u'Авар', 'av'), |
| 557 | +(u'कश्मीरी', 'ks'), |
| 558 | +(u'كشميري', 'ks'), |
| 559 | +(u'𐌲𐌿𐍄𐌹𐍃𐌺', 'got'), #Needs fix |
| 560 | +(u'سنڌي، سندھی ، सिन्ध', 'sd'), |
| 561 | +(u'Bamanankan', 'bm'), |
| 562 | +(u'dorerin Naoero', 'na'), |
| 563 | +(u'Norfuk', 'pih'), |
| 564 | +(u'Ποντιακά', 'pnt'), |
| 565 | +(u'ᐃᓄᒃᑎᑐᑦ', 'iu'), |
| 566 | +(u'Iñupiak', 'ik'), |
| 567 | +(u'Bislama', 'bi'), |
| 568 | +(u'ᏣᎳᎩ', 'chr'), |
| 569 | +(u'অসমীয়া', 'as'), |
| 570 | +(u'Mìng-dĕ̤ng-ngṳ̄', 'cdo'), |
| 571 | +(u'Eʋegbe', 'ee'), |
| 572 | +(u'SiSwati', 'ss'), |
| 573 | +(u'Oromoo', 'om'), |
| 574 | +(u'Cuengh', 'za'), |
| 575 | +(u'isiZulu', 'zu'), |
| 576 | +(u'ትግርኛ', 'ti'), |
| 577 | +(u'Tshivenda', 've'), |
| 578 | +(u'Xitsonga', 'ts'), |
| 579 | +(u'هَوُسَ', 'ha'), |
| 580 | +(u'ཇོང་ཁ', 'dz'), |
| 581 | +(u'Sängö', 'sg'), |
| 582 | +(u'Chamoru', 'ch'), |
| 583 | +(u'Nehiyaw', 'cr'), |
| 584 | +(u'isiXhosa', 'xh'), |
| 585 | +(u'Akana', 'ak'), |
| 586 | +(u'Sesotho', 'st'), |
| 587 | +(u'Ikinyarwanda', 'rw'), |
| 588 | +(u'Setswana', 'tn'), |
| 589 | +(u'Gĩkũyũ', 'ki'), |
| 590 | +(u'Буряад', 'bxr'), |
| 591 | +(u'Basa Ugi', 'bug'), |
| 592 | +(u'Chi-Chewa', 'ny'), |
| 593 | +(u'Лакку', 'lbe'), |
| 594 | +(u'Twi', 'tw'), |
| 595 | +(u'chiShona', 'sn'), |
| 596 | +(u'Kirundi', 'rn'), |
| 597 | +(u'Fulfulde', 'ff'), |
| 598 | +(u'Tsetsêhestâhese', 'chy'), |
| 599 | +(u'Luganda', 'lg'), |
| 600 | +(u'Oshiwambo', 'ng'), |
| 601 | +(u'ꆇꉙ', 'ii'), |
| 602 | +(u'Choctaw', 'cho'), |
| 603 | +(u'Ebon', 'mh'), |
| 604 | +(u'Afar', 'aa'), |
| 605 | +(u'Kuanyama', 'kj'), |
| 606 | +(u'Hiri Motu', 'ho'), |
| 607 | +(u'Muskogee', 'mus'), |
| 608 | +(u'Kanuri', 'kr'), |
| 609 | +(u'Otsiherero', 'hz'), |
610 | 610 | ]) |
611 | 611 | |
| 612 | + |
612 | 613 | def language_map(): |
613 | | - return utils.invert_dict(MAPPING) |
\ No newline at end of file |
| 614 | + return utils.invert_dict(MAPPING) |
Index: trunk/tools/editor_trends/configuration.py |
— | — | @@ -49,12 +49,19 @@ |
50 | 50 | self.debug = debug |
51 | 51 | self.progressbar = True |
52 | 52 | self.encoding = 'utf-8' |
53 | | - self.date_format = '%Y-%m-%d' #Date format as used by Erik Zachte |
54 | | - self.timestamp_format = '%Y-%m-%dT%H:%M:%SZ' # Timestamp format as generated by the MediaWiki dumps |
55 | 53 | |
56 | | - self.max_xmlfile_size = 4096 * 1024 #67108864 # ==64Mb, see http://hadoop.apache.org/common/docs/r0.20.0/hdfs_design.html#Large+Data+Setsfor reason |
| 54 | + #Date format as used by Erik Zachte |
| 55 | + self.date_format = '%Y-%m-%d' |
| 56 | + |
| 57 | + # Timestamp format as generated by the MediaWiki dumps |
| 58 | + self.timestamp_format = '%Y-%m-%dT%H:%M:%SZ' |
| 59 | + |
| 60 | + #67108864 # ==64Mb, see http://hadoop.apache.org/common/docs/r0.20.0/hdfs_design.html#Large+Data+Setsfor reason |
| 61 | + self.max_xmlfile_size = 4096 * 1024 |
| 62 | + |
| 63 | + #Change this to match your computers configuration (RAM / CPU) |
57 | 64 | self.number_of_processes = cpu_count() * process_multiplier |
58 | | - #Change this to match your computers configuration (RAM / CPU) |
| 65 | + |
59 | 66 | self.minimum_python_version = (2, 6) |
60 | 67 | self.wp_dump_location = 'http://download.wikimedia.org' |
61 | 68 | self.xml_namespace = 'http://www.mediawiki.org/xml/export-0.4/' |
— | — | @@ -97,7 +104,6 @@ |
98 | 105 | } |
99 | 106 | |
100 | 107 | |
101 | | - |
102 | 108 | def set_custom_settings(self, **kwargs): |
103 | 109 | for kw in kwargs: |
104 | 110 | setattr(self, kw, kwargs[kw]) |
— | — | @@ -119,11 +125,11 @@ |
120 | 126 | return cwd |
121 | 127 | |
122 | 128 | def determine_platform(self): |
123 | | - os = platform.system() |
124 | | - if os == 'Darwin': |
125 | | - return 'OSX' |
126 | | - else: |
127 | | - return os |
| 129 | + os = platform.system() |
| 130 | + if os == 'Darwin': |
| 131 | + return 'OSX' |
| 132 | + else: |
| 133 | + return os |
128 | 134 | |
129 | 135 | #def determine_path_ziptool(self): |
130 | 136 | # return self.detect_installed_program(self.determine_ziptool()) |
— | — | @@ -165,7 +171,7 @@ |
166 | 172 | if self.platform == 'Windows' and self.architecture == 'i386': |
167 | 173 | return win32file._getmaxstdio() |
168 | 174 | elif self.platform != 'Windows': |
169 | | - return resource.getrlimit(resource.RLIMIT_NOFILE)[0] |
| 175 | + return resource.getrlimit(resource.RLIMIT_NOFILE)[0] - 100 |
170 | 176 | else: |
171 | 177 | return 500 |
172 | 178 | |
— | — | @@ -175,11 +181,10 @@ |
176 | 182 | os.path.isdir(os.path.join(self.working_directory, name))] |
177 | 183 | for subdirname in dirs: |
178 | 184 | if not subdirname.startswith('.') and subdirname not in IGNORE_DIRS: |
179 | | - sys.path.append(os.path.join(self.working_directory, |
| 185 | + sys.path.append(os.path.join(self.working_directory, |
180 | 186 | subdirname)) |
181 | 187 | |
182 | 188 | |
183 | | - |
184 | 189 | def set_file_locations(self): |
185 | 190 | self.input_location = os.path.join(self.root, 'wikimedia') |
186 | 191 | self.input_filename = os.path.join(self.input_location, 'en', |
Index: trunk/tools/editor_trends/utils/models.py |
— | — | @@ -1,63 +0,0 @@ |
2 | | -#!/usr/bin/python |
3 | | -# -*- coding: utf-8 -*- |
4 | | -''' |
5 | | -Copyright (C) 2010 by Diederik van Liere (dvanliere@gmail.com) |
6 | | -This program is free software; you can redistribute it and/or |
7 | | -modify it under the terms of the GNU General Public License version 2 |
8 | | -as published by the Free Software Foundation. |
9 | | -This program is distributed in the hope that it will be useful, |
10 | | -but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | | -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
12 | | -See the GNU General Public License for more details, at |
13 | | -http://www.fsf.org/licenses/gpl.html |
14 | | -''' |
15 | | - |
16 | | -__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ]) |
17 | | -__author__email = 'dvanliere at gmail dot com' |
18 | | -__date__ = '2010-11-09' |
19 | | -__version__ = '0.1' |
20 | | - |
21 | | -import multiprocessing |
22 | | - |
23 | | - |
24 | | -class BaseConsumer(multiprocessing.Process): |
25 | | - |
26 | | - def __init__(self, task_queue, result_queue): |
27 | | - multiprocessing.Process.__init__(self) |
28 | | - self.task_queue = task_queue |
29 | | - self.result_queue = result_queue |
30 | | - |
31 | | - |
32 | | - |
33 | | - |
34 | | -# for kw in kwargs: |
35 | | -# setattr(self, kw, kwargs[kw]) |
36 | | -# |
37 | | -# def run(self): |
38 | | -# proc_name = self.name |
39 | | -# kwargs = {} |
40 | | -# IGNORE = ['input_queue', 'result_queue', 'target'] |
41 | | -# for kw in self.__dict__: |
42 | | -# if kw not in IGNORE and not kw.startswith('_'): |
43 | | -# kwargs[kw] = getattr(self, kw) |
44 | | -# self.target(self.input_queue, self.result_queue, **kwargs) |
45 | | - |
46 | | - |
47 | | -class ProcessResultQueue(multiprocessing.Process): |
48 | | - |
49 | | - def __init__(self, target, result_queue, **kwargs): |
50 | | - multiprocessing.Process.__init__(self) |
51 | | - self.result_queue = result_queue |
52 | | - self.target = target |
53 | | - for kw in kwargs: |
54 | | - setattr(self, kw, kwargs[kw]) |
55 | | - |
56 | | - |
57 | | - def run(self): |
58 | | - proc_name = self.name |
59 | | - kwargs = {} |
60 | | - IGNORE = ['result_queue', 'target'] |
61 | | - for kw in self.__dict__: |
62 | | - if kw not in IGNORE and not kw.startswith('_'): |
63 | | - kwargs[kw] = getattr(self, kw) |
64 | | - self.target(self.result_queue, **kwargs) |
Index: trunk/tools/editor_trends/utils/consumers.py |
— | — | @@ -0,0 +1,61 @@ |
| 2 | +#!/usr/bin/python |
| 3 | +# -*- coding: utf-8 -*- |
| 4 | +''' |
| 5 | +Copyright (C) 2010 by Diederik van Liere (dvanliere@gmail.com) |
| 6 | +This program is free software; you can redistribute it and/or |
| 7 | +modify it under the terms of the GNU General Public License version 2 |
| 8 | +as published by the Free Software Foundation. |
| 9 | +This program is distributed in the hope that it will be useful, |
| 10 | +but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
| 12 | +See the GNU General Public License for more details, at |
| 13 | +http://www.fsf.org/licenses/gpl.html |
| 14 | +''' |
| 15 | + |
| 16 | +__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ]) |
| 17 | +__author__email = 'dvanliere at gmail dot com' |
| 18 | +__date__ = '2010-11-09' |
| 19 | +__version__ = '0.1' |
| 20 | + |
| 21 | +import multiprocessing |
| 22 | + |
| 23 | + |
| 24 | +class BaseConsumer(multiprocessing.Process): |
| 25 | + |
| 26 | + def __init__(self, task_queue, result_queue): |
| 27 | + multiprocessing.Process.__init__(self) |
| 28 | + self.task_queue = task_queue |
| 29 | + self.result_queue = result_queue |
| 30 | + |
| 31 | + |
| 32 | +# for kw in kwargs: |
| 33 | +# setattr(self, kw, kwargs[kw]) |
| 34 | +# |
| 35 | +# def run(self): |
| 36 | +# proc_name = self.name |
| 37 | +# kwargs = {} |
| 38 | +# IGNORE = ['input_queue', 'result_queue', 'target'] |
| 39 | +# for kw in self.__dict__: |
| 40 | +# if kw not in IGNORE and not kw.startswith('_'): |
| 41 | +# kwargs[kw] = getattr(self, kw) |
| 42 | +# self.target(self.input_queue, self.result_queue, **kwargs) |
| 43 | + |
| 44 | + |
| 45 | +class ProcessResultQueue(multiprocessing.Process): |
| 46 | + |
| 47 | + def __init__(self, target, result_queue, **kwargs): |
| 48 | + multiprocessing.Process.__init__(self) |
| 49 | + self.result_queue = result_queue |
| 50 | + self.target = target |
| 51 | + for kw in kwargs: |
| 52 | + setattr(self, kw, kwargs[kw]) |
| 53 | + |
| 54 | + |
| 55 | + def run(self): |
| 56 | + proc_name = self.name |
| 57 | + kwargs = {} |
| 58 | + IGNORE = ['result_queue', 'target'] |
| 59 | + for kw in self.__dict__: |
| 60 | + if kw not in IGNORE and not kw.startswith('_'): |
| 61 | + kwargs[kw] = getattr(self, kw) |
| 62 | + self.target(self.result_queue, **kwargs) |
Property changes on: trunk/tools/editor_trends/utils/consumers.py |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 63 | + native |
Added: svn:mime-type |
2 | 64 | + text/plain |
Index: trunk/tools/editor_trends/utils/messages.py |
— | — | @@ -26,7 +26,12 @@ |
27 | 27 |
|
28 | 28 |
|
29 | 29 | def show(func):
|
| 30 | + '''
|
| 31 | + @func should be an qsize() belonging to a task queue. qsize() is not supported
|
| 32 | + on OSX hence this simple workaround to make sure that we can continue supporting
|
| 33 | + OSX.
|
| 34 | + '''
|
30 | 35 | try:
|
31 | | - func()
|
| 36 | + return func()
|
32 | 37 | except:
|
33 | | - print 'Calling function %s caused an error, probably your platform is not supporting this function' % func
|
| 38 | + return 'unknown'
|
Index: trunk/tools/editor_trends/database/db_settings.py |
— | — | @@ -1,38 +0,0 @@ |
2 | | -#!/usr/bin/python |
3 | | -# -*- coding: utf-8 -*- |
4 | | -''' |
5 | | -Copyright (C) 2010 by Diederik van Liere (dvanliere@gmail.com) |
6 | | -This program is free software; you can redistribute it and/or |
7 | | -modify it under the terms of the GNU General Public License version 2 |
8 | | -as published by the Free Software Foundation. |
9 | | -This program is distributed in the hope that it will be useful, |
10 | | -but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | | -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
12 | | -See the GNU General Public License for more details, at |
13 | | -http://www.fsf.org/licenses/gpl.html |
14 | | -''' |
15 | | - |
16 | | -__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ]) |
17 | | - |
18 | | -''' |
19 | | -This is a settings file that contains the layout of different tables. The main |
20 | | -key will be used as the tablename while it,s values contain tuples containing |
21 | | -fieldname and datatype This is only be used for sqlite. |
22 | | -''' |
23 | | -CONTRIBUTOR_TABLE = {'contributors': []} |
24 | | -CONTRIBUTOR_TABLE['contributors'].append(('contributor', 'VARCHAR(64)')) |
25 | | -CONTRIBUTOR_TABLE['contributors'].append(('article', 'INTEGER')) |
26 | | -CONTRIBUTOR_TABLE['contributors'].append(('timestamp', 'TEXT')) |
27 | | -CONTRIBUTOR_TABLE['contributors'].append(('bot', 'INTEGER')) |
28 | | - |
29 | | -BOT_TABLE = {'bots': []} |
30 | | -BOT_TABLE['bots'].append(('language', 'VARCHAR(12)')) |
31 | | -BOT_TABLE['bots'].append(('name', 'VARCHAR(64)')) |
32 | | -BOT_TABLE['bots'].append(('edits_namespace_a', 'INTEGER')) |
33 | | -BOT_TABLE['bots'].append(('edits_namespace_x', 'INTEGER')) |
34 | | -BOT_TABLE['bots'].append(('rank_now', 'INTEGER')) |
35 | | -BOT_TABLE['bots'].append(('rank_prev', 'INTEGER')) |
36 | | -BOT_TABLE['bots'].append(('first_date', 'TEXT')) |
37 | | -BOT_TABLE['bots'].append(('days_first', 'INTEGER')) |
38 | | -BOT_TABLE['bots'].append(('last_date', 'TEXT')) |
39 | | -BOT_TABLE['bots'].append(('days_last', 'INTEGER')) |
Index: trunk/tools/editor_trends/database/launcher.py |
— | — | @@ -27,14 +27,14 @@ |
28 | 28 | from utils import utils |
29 | 29 | |
30 | 30 | |
31 | | -def start_mongodb_server(platform, x, path): |
| 31 | +def start_mongodb_server(x, path): |
32 | 32 | default_port = 27017 |
33 | 33 | port = default_port + x |
34 | | - if platform == 'Windows': |
| 34 | + if settings.platform == 'Windows': |
35 | 35 | p = subprocess.Popen([path, '--port', str(port), '--dbpath', 'c:\data\db', '--logpath', 'c:\mongodb\logs']) |
36 | | - elif platform == 'Linux': |
| 36 | + elif settings.platform == 'Linux': |
37 | 37 | subprocess.Popen([path, '--port %s' % port]) |
38 | | - elif platform == 'OSX': |
| 38 | + elif settings.platform == 'OSX': |
39 | 39 | raise NotImplementedError |
40 | 40 | else: |
41 | 41 | raise exceptions.PlatformNotSupportedError(platform) |