r79958 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r79957‎ \| r79958 \| r79959 >
Date:	21:06, 10 January 2011
Author:	diederik
Status:	deferred
Tags:
Comment:	Numerous small fixes
Modified paths:	/trunk/tools/editor_trends/analyses/aggregates.py (modified) (history) /trunk/tools/editor_trends/analyses/cohort_charts.py (modified) (history) /trunk/tools/editor_trends/config.py (modified) (history) /trunk/tools/editor_trends/configuration.py (modified) (history) /trunk/tools/editor_trends/database/db_settings.py (deleted) (history) /trunk/tools/editor_trends/database/launcher.py (modified) (history) /trunk/tools/editor_trends/etl/exporter.py (modified) (history) /trunk/tools/editor_trends/etl/extracter.py (modified) (history) /trunk/tools/editor_trends/etl/shaper.py (modified) (history) /trunk/tools/editor_trends/etl/sort.py (modified) (history) /trunk/tools/editor_trends/languages.py (modified) (history) /trunk/tools/editor_trends/manage.py (modified) (history) /trunk/tools/editor_trends/utils/consumers.py (added) (history) /trunk/tools/editor_trends/utils/messages.py (modified) (history) /trunk/tools/editor_trends/utils/models.py (deleted) (history)

Diff [purge]

Index: trunk/tools/editor_trends/manage.py
—	—	@@ -46,8 +46,12 @@
47	47	from etl import transformer
48	48	from etl import exporter
49	49
	50	+datasets = {'forward': 'generate_cohort_dataset_forward',
	51	+ 'backward': 'generate_cohort_dataset_backward',
	52	+ 'backward_custom': 'generate_cohort_dataset_backward_custom',
	53	+ 'wide': 'generate_wide_editor_dataset',
	54	+ }
50	55
51		-
52	56	class Timer(object):
53	57	def __init__(self):
54	58	self.t0 = datetime.datetime.now()
—	—	@@ -102,26 +106,31 @@
103	107	else:
104	108	return namespaces
105	109
	110	+
106	111	def write_message_to_log(logger, args, message=None, verb=None, **kwargs):
107	112	function = get_value(args, 'func')
108		~~- logger.debug('Starting %s task' % function.func_name)~~
	113	+ logger.debug('%s\tStarting %s task' % (datetime.datetime.now(), function.func_name))
109	114	if message:
110		~~- logger.debug(message)~~
	115	+ logger.debug('%s\t%s' % (datetime.datetime.now(), message))
111	116
112	117	max_length = max([len(kw) for kw in kwargs])
113		~~- #max_tab = max_length / 4~~
	118	+ max_tabs = max_length // settings.tab_width
	119	+ res = max_length % settings.tab_width
	120	+ if res > 0:
	121	+ max_tabs += 1
	122	+ pos = max_tabs * settings.tab_width
114	123	for kw in kwargs:
115	124	if verb:
116		~~- logger.debug('Action: %s\tSetting: %s' % (verb, kwargs[kw]))~~
	125	+ logger.debug('%s\tAction: %s\tSetting: %s' % (datetime.datetime.now(), verb, kwargs[kw]))
117	126	else:
118		~~- tabs = (max_length - len(kw)) / 4~~
119		~~- if tabs == 0:~~
120		~~- tabs = 1~~
	127	+ tabs = (pos - len(kw)) // settings.tab_width
	128	+ res = len(kw) % settings.tab_width
	129	+ if res > 0 or tabs == 0:
	130	+ tabs += 1
121	131	tabs = ''.join(['\t' for t in xrange(tabs)])
122		~~- logger.debug('\tKey: %s%sSetting: %s' % (kw, tabs, kwargs[kw]))~~
	132	+ logger.debug('%s\t\tKey: %s%sSetting: %s' % (datetime.datetime.now(), kw, tabs, kwargs[kw]))
123	133
124	134
125		-
126	135	def get_project(args):
127	136	project = get_value(args, 'project')
128	137	if project != 'wiki':
—	—	@@ -142,21 +151,17 @@
143	152	config['language_code'] = language_code
144	153	config['language'] = get_value(args, 'language')
145	154	config['location'] = os.path.join(location, language_code, project)
146		~~- #config['chunks'] = os.path.join(config['location'], 'chunks')~~
147	155	config['txt'] = os.path.join(config['location'], 'txt')
148	156	config['sorted'] = os.path.join(config['location'], 'sorted')
149		~~- config['dbready'] = os.path.join(config['location'], 'dbready')~~
150	157	config['project'] = project
151	158	config['full_project'] = get_projectname(args)
152	159	config['filename'] = generate_wikidump_filename(language_code, project, args)
153	160	config['collection'] = get_value(args, 'collection')
154	161	config['namespaces'] = get_namespaces(args)
155		~~- config['directories'] = [config['location'], config['txt'], config['sorted'], config['dbready']]~~
	162	+ config['directories'] = [config['location'], config['txt'], config['sorted']]
156	163
157	164	message = 'Settings as generated from the configuration module.'
158	165	write_message_to_log(logger, args, message, None, **config)
159		~~- #for c in config:~~
160		~~- # logger.debug('Key: %s - Setting: %s' % (c, config[c]))~~
161	166	return config
162	167
163	168
—	—	@@ -170,10 +175,11 @@
171	176	config['Input directory'] = '%s' % kwargs.get('location')
172	177	config['Output directory'] = '%s and subdirectories' % kwargs.get('location')
173	178
	179	+ max_length_key = max([len(key) for key in config.keys()])
174	180	message = 'Final settings after parsing command line arguments:'
175	181	write_message_to_log(logger, args, message, None, **config)
176	182	for c in config:
177		~~- print '%s\t%s' % (c, config[c])~~
	183	+ print '%s: %s' % (c.rjust(max_length_key), config[c])
178	184
179	185
180	186	def dump_downloader_launcher(args, logger, **kwargs):
—	—	@@ -244,10 +250,8 @@
245	251	location = kwargs.pop('location')
246	252	input = os.path.join(location, 'txt')
247	253	output = os.path.join(location, 'sorted')
248		~~- final_output = os.path.join(location, 'dbready')~~
249		~~- write_message_to_log(logger, args, location=location, input=input, output=output, final_output=final_output)~~
	254	+ write_message_to_log(logger, args, location=location, input=input, output=output)
250	255	sort.mergesort_launcher(input, output)
251		~~- #loader.mergesort_external_launcher(output, final_output)~~
252	256	timer.elapsed()
253	257
254	258
—	—	@@ -264,7 +268,6 @@
265	269	write_message_to_log(logger, args, verb='Storing', location=location, input=input, project=project, collection=collection)
266	270	store.launcher(input, project, collection)
267	271	cnt_editors = db.count_records(project, collection)
268		~~- #assert num_editors == cnt_editors~~
269	272	timer.elapsed()
270	273
271	274
—	—	@@ -282,6 +285,7 @@
283	286	def debug_launcher(args, logger, **kwargs):
284	287	pass
285	288
	289	+
286	290	def exporter_launcher(args, logger, **kwargs):
287	291	print 'Start exporting dataset'
288	292	timer = Timer()
—	—	@@ -291,6 +295,7 @@
292	296	targets = targets.split(',')
293	297	for target in targets:
294	298	write_message_to_log(logger, args, verb='Exporting', target=target, dbname=dbname, collection=collection)
	299	+ target = datasets[target]
295	300	exporter.dataset_launcher(dbname, collection, target)
296	301	timer.elapsed()
297	302
—	—	@@ -309,8 +314,9 @@
310	315	write_message_to_log(logger, args, verb='Deleting', file=file)
311	316	utils.delete_file(settings.binary_location, file)
312	317
	318	+
313	319	def all_launcher(args, logger, **kwargs):
314		~~- print 'all_launcher'~~
	320	+ print 'The entire data processing chain has been called, this will take a couple of hours (at least) to complete.'
315	321	timer = Timer()
316	322	full_project = kwargs.get('full_project', None)
317	323	message = 'Start of building %s dataset.' % full_project
—	—	@@ -323,9 +329,6 @@
324	330	if clean:
325	331	cleanup(logger, args, **kwargs)
326	332
327		~~- #if format != 'xml':~~
328		~~- # ignore = ignore + ',extract'~~
329		-
330	333	functions = ordered_dict.OrderedDict(((dump_downloader_launcher, 'download'),
331	334	#(chunker_launcher, 'split'),
332	335	(extract_launcher, 'extract'),
—	—	@@ -337,7 +340,6 @@
338	341	for function, callname in functions.iteritems():
339	342	if callname not in ignore:
340	343	function(args, logger, **kwargs)
341		-
342	344	timer.elapsed()
343	345
344	346
—	—	@@ -374,9 +376,9 @@
375	377
376	378
377	379	def about():
378		~~- print 'Editor Trends Software is (c) 2010 by the Wikimedia Foundation.'~~
	380	+ print '\nEditor Trends Software is (c) 2010 by the Wikimedia Foundation.'
379	381	print 'Written by Diederik van Liere (dvanliere@gmail.com).'
380		~~- print 'This software comes with ABSOLUTELY NO WARRANTY. This is free software, and you are welcome to distribute it under certain conditions.'~~
	382	+ print 'This software comes with ABSOLUTELY NO WARRANTY.\nThis is free software, and you are welcome to distribute it\nunder certain conditions.'
381	383	print 'See the README.1ST file for more information.'
382	384	print '\n'
383	385
—	—	@@ -384,15 +386,11 @@
385	387	def main():
386	388	default_language = determine_default_language()
387	389
388		~~- datasets = {'forward': 'generate_cohort_dataset_forward',~~
389		~~- 'backward': 'generate_cohort_dataset_backward',~~
390		~~- 'wide': 'generate_wide_editor_dataset',~~
391		~~- }~~
392		-
393	390	file_choices = ('stub-meta-history.xml.gz',
394	391	'stub-meta-current.xml.gz',
395	392	'pages-meta-history.xml.7z',
396		~~- 'pages-meta-current.xml.bz2')~~
	393	+ 'pages-meta-current.xml.bz2',
	394	+ )
397	395
398	396
399	397	parser = ArgumentParser(prog='manage', formatter_class=RawTextHelpFormatter)
—	—	@@ -418,7 +416,7 @@
419	417	parser_create = subparsers.add_parser('extract', help='The store sub command parsers the XML chunk files, extracts the information and stores it in a MongoDB.')
420	418	parser_create.set_defaults(func=extract_launcher)
421	419
422		~~- parser_sort = subparsers.add_parser('sort', help='By presorting the data, significant processing time reducations are achieved.')~~
	420	+ parser_sort = subparsers.add_parser('sort', help='By presorting the data, significant processing time reductions are achieved.')
423	421	parser_sort.set_defaults(func=sort_launcher)
424	422
425	423	parser_store = subparsers.add_parser('store', help='The store sub command parsers the XML chunk files, extracts the information and stores it in a MongoDB.')
—	—	@@ -435,59 +433,75 @@
436	434
437	435	parser_all = subparsers.add_parser('all', help='The all sub command runs the download, split, store and dataset commands.\n\nWARNING: THIS COULD TAKE DAYS DEPENDING ON THE CONFIGURATION OF YOUR MACHINE AND THE SIZE OF THE WIKIMEDIA DUMP FILE.')
438	436	parser_all.set_defaults(func=all_launcher)
439		~~- parser_all.add_argument('-e', '--except', action='store',~~
	437	+ parser_all.add_argument('-e', '--except',
	438	+ action='store',
440	439	help='Should be a list of functions that are to be ignored when executing \'all\'.',
441		~~- default=[])~~
	440	+ default=[]
	441	+ )
442	442
443		~~- parser_all.add_argument('-n', '--new', action='store_true',~~
	443	+ parser_all.add_argument('-n', '--new',
	444	+ action='store_true',
444	445	help='This will delete all previous output and starts from scratch. Mostly useful for debugging purposes.',
445		~~- default=False)~~
	446	+ default=False
	447	+ )
446	448
447		~~- parser.add_argument('-l', '--language', action='store',~~
	449	+ parser.add_argument('-l', '--language',
	450	+ action='store',
448	451	help='Example of valid languages.',
449	452	choices=supported_languages(),
450		~~- default=default_language)~~
	453	+ default=default_language
	454	+ )
451	455
452		~~- parser.add_argument('-p', '--project', action='store',~~
	456	+ parser.add_argument('-p', '--project',
	457	+ action='store',
453	458	help='Specify the Wikimedia project that you would like to download',
454	459	choices=settings.projects.keys(),
455		~~- default='wiki')~~
	460	+ default='wiki'
	461	+ )
456	462
457	463	parser.add_argument('-c', '--collection', action='store',
458	464	help='Name of MongoDB collection',
459	465	default='editors')
460	466
461	467
462		~~- parser.add_argument('-o', '--location', action='store',~~
	468	+ parser.add_argument('-o', '--location',
	469	+ action='store',
463	470	help='Indicate where you want to store the downloaded file.',
464	471	default=settings.input_location
465	472	)
466	473
467		~~- parser.add_argument('-ns', '--namespace', action='store',~~
	474	+ parser.add_argument('-ns', '--namespace',
	475	+ action='store',
468	476	help='A list of namespaces to include for analysis.',
469		~~- default='0')~~
	477	+ default='0'
	478	+ )
470	479
471		~~- #parser.add_argument('-fo', '--format', action='store',~~
472		~~- # help='Indicate which format the chunks should be stored. Valid options are xml and txt.',~~
473		~~- # default='txt')~~
474		-
475		~~- parser.add_argument('-f', '--file', action='store',~~
	480	+ parser.add_argument('-f', '--file',
	481	+ action='store',
476	482	choices=file_choices,
477	483	help='Indicate which dump you want to download. Valid choices are:\n %s' % ''.join([f + ',\n' for f in file_choices]),
478		~~- default='stub-meta-history.xml.gz')~~
	484	+ default='stub-meta-history.xml.gz'
	485	+ )
479	486
480		~~- parser.add_argument('-dv', '--dumpversion', action='store',~~
	487	+ parser.add_argument('-dv', '--dumpversion',
	488	+ action='store',
481	489	choices=settings.dumpversions.keys(),
482	490	help='Indicate the Wikidump version that you are parsing.',
483		~~- default=settings.dumpversions['0'])~~
	491	+ default=settings.dumpversions['0']
	492	+ )
484	493
485		~~- parser.add_argument('-d', '--datasets', action='store',~~
	494	+ parser.add_argument('-d', '--datasets',
	495	+ action='store',
486	496	choices=datasets.keys(),
487	497	help='Indicate what type of data should be exported.',
488		~~- default=datasets['backward'])~~
	498	+ default='backward'
	499	+ )
489	500
490		~~- parser.add_argument('-prog', '--progress', action='store_true', default=True,~~
491		~~- help='Indicate whether you want to have a progressbar.')~~
	501	+ parser.add_argument('-prog', '--progress',
	502	+ action='store_true',
	503	+ default=True, \
	504	+ help='Indicate whether you want to have a progressbar.'
	505	+ )
492	506
493	507	args = parser.parse_args()
494	508	#initialize logger
Index: trunk/tools/editor_trends/analyses/aggregates.py
—	—	@@ -31,11 +31,6 @@
32	32	from utils import messages
33	33
34	34
35		~~-class Dataset:~~
36		~~- def __init__(self):~~
37		~~- pass~~
38		-
39		-
40	35	def new_editor_count(editors, dbname, collection, month=12):
41	36	'''
42	37	@month should be an integer in the range of 1-12.
—	—	@@ -105,6 +100,7 @@
106	101	utils.write_dict_to_csv(data, fh, keys, write_key=False, newline=True)
107	102	fh.close()
108	103
	104	+
109	105	def active_editor_count_launcher(dbname, collection):
110	106	editors = db.retrieve_distinct_keys(dbname, collection, 'editor')
111	107	tasks = multiprocessing.JoinableQueue()
Index: trunk/tools/editor_trends/analyses/cohort_charts.py
—	—	@@ -25,8 +25,8 @@
26	26	settings = configuration.Settings()
27	27	from utils import utils
28	28
29		~~-def prepare_cohort_dataset(dbname):~~
30		~~- dataset = utils.load_object(settings.binary_location, dbname + '_cohort_data.bin')~~
	29	+def prepare_cohort_dataset(dbname, filename):
	30	+ dataset = utils.load_object(settings.binary_location, '%s_%s' % (dbname, filename))
31	31	fh = utils.create_txt_filehandle(settings.dataset_location, dbname + '_cohort_data.txt', 'w', settings.encoding)
32	32
33	33	years = dataset.keys()
Index: trunk/tools/editor_trends/etl/exporter.py
—	—	@@ -20,8 +20,8 @@
21	21	import os
22	22	import sys
23	23	import datetime
	24	+import calendar
24	25	from dateutil.relativedelta import *
25		~~-import calendar~~
26	26	import multiprocessing
27	27	from Queue import Empty
28	28
—	—	@@ -51,7 +51,13 @@
52	52	'''
53	53	def __init__(self, var):
54	54	self.name = var
	55	+ self.obs = []
55	56	self.stats = ['n', 'avg', 'sd', 'min', 'max']
	57	+
	58	+ def __repr__(self):
	59	+ return self.name
	60	+
	61	+ def descriptives(self):
56	62	self.time = shaper.create_datacontainer()
57	63	self.time = shaper.add_months_to_datacontainer(getattr(self, 'time'), datatype='dict')
58	64
—	—	@@ -59,10 +65,6 @@
60	66	setattr(self, var, shaper.create_datacontainer())
61	67	setattr(self, var, shaper.add_months_to_datacontainer(getattr(self, var), datatype='list'))
62	68
63		~~- def __repr__(self):~~
64		~~- return self.name~~
65		-
66		~~- def descriptives(self):~~
67	69	for year in self.time:
68	70	for month in self.time[year]:
69	71	data = [self.time[year][month][k] for k in self.time[year][month].keys()]
—	—	@@ -78,8 +80,8 @@
79	81	This class acts as a container for the Variable class and has some methods
80	82	to output the dataset to a csv file.
81	83	'''
82		~~- def __init__(self, vars):~~
83		~~- self.name = 'long_dataset.tsv'~~
	84	+ def __init__(self, vars, name):
	85	+ self.name = name
84	86	self.vars = []
85	87	for var in vars:
86	88	setattr(self, var, Variable(var))
—	—	@@ -92,8 +94,9 @@
93	95	fh.write('_time\t')
94	96	for var in self.vars:
95	97	var = getattr(self, var)
96		~~- for stat in var.stats:~~
97		~~- fh.write('%s_%s\t' % (var.name, stat))~~
	98	+ fh.write('%s\t' % var.name)
	99	+ #for stat in var.stats:
	100	+ # fh.write('%s_%s\t' % (var.name, stat))
98	101	fh.write('\n')
99	102
100	103	def convert_to_longitudinal_data(self, id, obs, vars):
—	—	@@ -108,32 +111,43 @@
109	112	if id not in ds.time[year][m] and obs[var][year][m] > 0:
110	113	ds.time[year][m][id] = obs[var][year][m]
111	114
112		~~- def write_longitudinal_data(self):~~
	115	+ def write_longitudinal_data(self, write_time=True):
113	116	fh = utils.create_txt_filehandle(settings.dataset_location, self.name, 'w', settings.encoding)
114	117	self.write_headers(fh)
115		~~- dc = shaper.create_datacontainer()~~
116		~~- dc = shaper.add_months_to_datacontainer(dc)~~
117		-
118	118	for var in self.vars:
119	119	var = getattr(self, var)
120		~~- var.descriptives()~~
121		~~- years = dc.keys()~~
122		~~- years.sort()~~
123		~~- for year in years:~~
124		~~- months = dc[year].keys()~~
125		~~- months.sort()~~
126		~~- for month in months:~~
127		~~- d = calendar.monthrange(int(year), int(month))[1] #determines the number of days in a given month/year~~
128		~~- date = datetime.date(int(year), int(month), d)~~
129		~~- fh.write('%s\t' % date)~~
130		~~- for var in self.vars:~~
131		~~- var = getattr(self, var)~~
132		~~- #data = ['%s_%s\t' % (var.name, getattr(var, stat)[year][month]) for stat in var.stats]~~
133		~~- fh.write(''.join(['%s\t' % (getattr(var, stat)[year][month],) for stat in var.stats]))~~
134		~~- fh.write('\n')~~
	120	+ for o in var.obs:
	121	+ if write_time:
	122	+ fh.write('%s\t%s\n' % (o[0], o[1]))
	123	+ else:
	124	+ fh.write('%s\n' % (o[1]))
135	125	fh.close()
136	126
	127	+# windows = create_windows()
	128	+# dc = shaper.create_datacontainer()
	129	+# dc = shaper.add_months_to_datacontainer(dc, windows)
	130	+#
	131	+## for var in self.vars:
	132	+## var = getattr(self, var)
	133	+## var.descriptives()
	134	+# years = dc.keys()
	135	+# years.sort()
	136	+# for year in years:
	137	+# months = dc[year].keys()
	138	+# months.sort()
	139	+# for month in months:
	140	+# d = calendar.monthrange(int(year), int(month))[1] #determines the number of days in a given month/year
	141	+# date = datetime.date(int(year), int(month), d)
	142	+# fh.write('%s\t' % date)
	143	+# for var in self.vars:
	144	+# var = getattr(self, var)
	145	+# #data = ['%s_%s\t' % (var.name, getattr(var, stat)[year][month]) for stat in var.stats]
	146	+# fh.write(''.join([ % s\t]))
	147	+# #fh.write(''.join(['%s\t' % (getattr(var, stat)[year][month],) for stat in var.stats]))
	148	+# fh.write('\n')
137	149
	150	+
	151	+
138	152	def expand_edits(edits):
139	153	data = []
140	154	for edit in edits:
—	—	@@ -201,110 +215,113 @@
202	216	return windows
203	217
204	218
205		~~-#def generate_cohort_dataset_old(tasks, dbname, collection, **kwargs):~~
206		~~-# mongo = db.init_mongo_db(dbname)~~
207		~~-# editors = mongo[collection + '_dataset']~~
208		~~-# windows = create_windows()~~
209		~~-# data = shaper.create_datacontainer('dict')~~
210		~~-# data = shaper.add_windows_to_datacontainer(data, windows)~~
211		-#
212		~~-# while True:~~
213		~~-# id = tasks.get(block=False)~~
214		~~-# tasks.task_done()~~
215		~~-# if id == None:~~
216		~~-# break~~
217		~~-# obs = editors.find_one({'editor': id}, {'first_edit': 1, 'final_edit': 1})~~
218		-#
219		~~-# first_edit = obs['first_edit']~~
220		~~-# last_edit = obs['final_edit']~~
221		~~-# editor_dt = relativedelta(last_edit, first_edit)~~
222		~~-# editor_dt = (editor_dt.years * 12) + editor_dt.months~~
223		~~-# edits = []~~
224		~~-# for year in xrange(2001, datetime.datetime.now().year + 1):~~
225		~~-# if first_edit.year > year or last_edit.year < year:~~
226		~~-# continue~~
227		~~-# window_end = datetime.datetime(year, 12, 31)~~
228		~~-# for window in windows:~~
229		~~-# window_start = window_end - relativedelta(months=window)~~
230		~~-# if window_start < datetime.datetime(2001, 1, 1):~~
231		~~-# window_start = datetime.datetime(2001, 1, 1)~~
232		-#
233		~~-# if editor_dt > 11:~~
234		~~-# if date_falls_in_window(window_start, window_end, first_edit):~~
235		~~-# edits.append(window)~~
236		~~-# elif window > editor_dt:~~
237		~~-# data[year][window] += 1~~
238		~~-# break~~
239		-#
240		~~-# if edits != []:~~
241		~~-# w = min(edits)~~
242		~~-# data[year][w] += 1~~
243		~~-# edits = []~~
244		-#
245		-#
246		~~-# print 'Storing data as %s' % os.path.join(settings.binary_location, dbname + '_cohort_data.bin')~~
247		~~-# utils.store_object(data, settings.binary_location, dbname + '_cohort_data.bin')~~
248		~~-# cohort_charts.prepare_cohort_dataset(dbname)~~
249	219
	220	+def diff_month(d1, d2):
	221	+ return (d1.year - d2.year) * 12 + d1.month - d2.month
250	222
251	223
	224	+def generate_cohort_dataset_raw(tasks, dbname, collection):
	225	+ mongo = db.init_mongo_db(dbname)
	226	+ editors = mongo['%s%s' % (collection, '_dataset')]
	227	+ windows = create_windows()
	228	+ data = shaper.create_datacontainer('dict')
	229	+ final_year = datetime.datetime.now().year + 1
	230	+ ld = LongDataset(['experience'], '%s_forward_cohort.csv' % dbname)
	231	+ while True:
	232	+ id = tasks.get(block=False)
	233	+ tasks.task_done()
	234	+ if id == None:
	235	+ break
	236	+ obs = editors.find_one({'editor': id},
	237	+ {'new_wikipedian': 1,
	238	+ 'monthly_edits': 1,
	239	+ 'final_edit':1
	240	+ })
252	241
253		~~-def generate_cohort_dataset_forward(tasks, dbname, collection, **kwargs):~~
	242	+ new_wikipedian = obs['new_wikipedian']
	243	+ last_edit = obs['final_edit']
	244	+ dt = diff_month(last_edit, new_wikipedian)
	245	+ day = calendar.monthrange(new_wikipedian.year, new_wikipedian.month)[1]
	246	+ tenth_edit = datetime.date(new_wikipedian.year, new_wikipedian.month, day)
	247	+ ld.experience.obs.append([tenth_edit, dt])
	248	+
	249	+ ld.write_longitudinal_data()
	250	+
	251	+def generate_cohort_dataset_forward(tasks, dbname, collection):
254	252	mongo = db.init_mongo_db(dbname)
255	253	editors = mongo[collection + '_dataset']
	254	+ final_year = datetime.datetime.now().year + 1
256	255	windows = create_windows()
257	256	data = shaper.create_datacontainer('dict')
258		~~- final_year = datetime.datetime.now().year + 1~~
259		~~- m1 = [1, 2, 3, 4, 5, 6]~~
260		~~- m2 = [7, 8, 9, 10, 11, 12]~~
261		~~- frames = [m1, m2]~~
262	257	while True:
263	258	id = tasks.get(block=False)
264	259	if id == None:
265	260	break
	261	+
266	262	obs = editors.find_one({'editor': id}, {'new_wikipedian': 1, 'monthly_edits': 1, 'final_edit':1})
267	263	new_wikipedian = obs['new_wikipedian']
	264	+ year = new_wikipedian.year
	265	+
268	266	last_edit = obs['final_edit']
269		~~- start_year = new_wikipedian.year~~
270		~~- last_year = last_edit.year + 1~~
271		~~- if new_wikipedian.month != 1:~~
272		~~- continue~~
273		~~- for year in xrange(start_year, last_year):~~
274		~~- if year not in data[start_year]:~~
275		~~- data[start_year][year] = {}~~
276		~~- for x, frame in enumerate(frames):~~
277		~~- if x not in data[start_year][year]:~~
278		~~- data[start_year][year][x] = 0~~
279		~~- if 'n' not in data[start_year][year]:~~
280		~~- data[start_year][year]['n'] = 0~~
	267	+ edits = obs['monthly_edits']
281	268
282		~~- active = sum([obs['monthly_edits'][str(year)][str(m)] for m in frame])~~
283		~~- data[start_year][year]['n'] += 1~~
284		~~- if active > 0:~~
285		~~- data[start_year][year][x] += 1~~
286		~~- filename = '%s_cohort_forward.csv' % dbname~~
287		~~- fh = utils.create_txt_filehandle(settings.dataset_location, filename, 'w', settings.encoding)~~
288		~~- frames.append('n')~~
289		~~- headers = ["%s_%s" % (year, frame[0]) for year in xrange(2001, final_year) for frame in enumerate(frames)]~~
290		~~- headers.insert(0, '\t')~~
291		~~- utils.write_list_to_csv(headers, fh)~~
	269	+ if new_wikipedian.month not in data[new_wikipedian.year]:
	270	+ data[new_wikipedian.year][new_wikipedian.month] = {}
	271	+ for i, year in enumerate(xrange(new_wikipedian.year, final_year)):
	272	+ months = edits.get(str(year), [])
	273	+ if i == 0:
	274	+ months = months.keys()
	275	+ months = [int(m) for m in months]
	276	+ months.sort()
	277	+ months = months[new_wikipedian.month - 1:]
	278	+ months = [str(m) for m in months]
	279	+ for month in months:
	280	+ experience = str(i * 12 + int(month))
	281	+ if experience not in data[new_wikipedian.year][new_wikipedian.month]:
	282	+ data[new_wikipedian.year][new_wikipedian.month][experience] = 0
	283	+ data[new_wikipedian.year][new_wikipedian.month][experience] += 1 if edits[str(year)][month] > 0 else 0
292	284
293		~~- for obs_year in data:~~
294		~~- obs = '%s\t' % obs_year~~
295		~~- for year in xrange(2001, final_year):~~
296		~~- values = data[obs_year].get(year, None)~~
297		~~- if values != None:~~
298		~~- for value in values:~~
299		~~- obs = '%s\t%s\t' % (obs, values[value])~~
300		~~- else:~~
301		~~- obs = '%s\t.\t.\t.\t' % obs~~
302		-
303		~~- obs = '%s\n' % obs~~
304		~~- fh.write(obs)~~
	285	+ fh = utils.create_txt_filehandle(settings.dataset_location, '%s_cohort_data_forward.csv' % (dbname), 'w', settings.encoding)
	286	+ for year in data:
	287	+ for month in data[year]:
	288	+ obs = data[year][month].keys()
	289	+ obs.sort()
	290	+ for o in obs:
	291	+ utils.write_list_to_csv(['%s-%s' % (month, year), o, data[year][month][o]], fh, recursive=False, newline=True)
305	292	fh.close()
306	293
307	294
	295	+def generate_cohort_dataset_backward_custom(tasks, dbname, collection):
	296	+ mongo = db.init_mongo_db(dbname)
	297	+ editors = mongo[collection + '_dataset']
	298	+ windows = create_windows()
	299	+ data = shaper.create_datacontainer('dict')
	300	+ data = shaper.add_windows_to_datacontainer(data, windows)
308	301
	302	+ while True:
	303	+ id = tasks.get(block=False)
	304	+ tasks.task_done()
	305	+ if id == None:
	306	+ break
	307	+ obs = editors.find_one({'editor': id}, {'first_edit': 1, 'final_edit': 1, 'monthly_edits':1, 'edits_by_year': 1, 'last_edit_by_year': 1})
	308	+ first_edit = obs['first_edit']
	309	+
	310	+ if obs['monthly_edits']['2010']['8'] > 0:
	311	+ for year in xrange(2001, datetime.datetime.now().year + 1):
	312	+ if obs['edits_by_year'].get(year, 0) > 0:
	313	+ last_edit = obs['last_edit_by_year'][year]
	314	+ editor_dt = relativedelta(last_edit, first_edit)
	315	+ editor_dt = (editor_dt.years * 12) + editor_dt.months
	316	+ for w in windows:
	317	+ if w >= editor_dt:
	318	+ data[int(year)][w] += 1
	319	+ break
	320	+ filename = '_august_2010_cohort_data_.bin'
	321	+ utils.store_object(data, settings.binary_location, '%s_%s' % (dbname, filename))
	322	+ cohort_charts.prepare_cohort_dataset(dbname, filename)
	323	+
	324	+
	325	+
309	326	def generate_cohort_dataset_backward(tasks, dbname, collection, **kwargs):
310	327	mongo = db.init_mongo_db(dbname)
311	328	editors = mongo[collection + '_dataset']
—	—	@@ -334,15 +351,6 @@
335	352	cohort_charts.prepare_cohort_dataset(dbname)
336	353
337	354
338		-
339		-
340		~~-def date_falls_in_window(window_start, window_end, first_edit):~~
341		~~- if first_edit >= window_start and first_edit <= window_end:~~
342		~~- return True~~
343		~~- else:~~
344		~~- return False~~
345		-
346		-
347	355	def generate_wide_editor_dataset(tasks, dbname, collection, **kwargs):
348	356	mongo = db.init_mongo_db(dbname)
349	357	editors = mongo[collection + '_dataset']
Index: trunk/tools/editor_trends/etl/extracter.py
—	—	@@ -101,13 +101,16 @@
102	102	return revisions
103	103
104	104
105		~~-def is_article_main_namespace(elem, namespace):~~
	105	+def verify_article_belongs_namespace(elem, namespaces):
106	106	'''
107		~~- checks whether the article belongs to the main namespace~~
	107	+ @namespaces is a list of namespaces that should be ignored, hence if the
	108	+ title of article starts with the namespace then return False else return True
108	109	'''
109	110	title = elem.text
110		~~- for ns in namespace:~~
111		~~- if title.startswith(ns):~~
	111	+ if title == None:
	112	+ return False
	113	+ for namespace in namespaces:
	114	+ if title.startswith(namespace):
112	115	return False
113	116	return True
114	117
—	—	@@ -249,7 +252,7 @@
250	253	for page in wikitree.parser.read_input(fh):
251	254	title = page.find('title')
252	255	total_pages += 1
253		~~- if is_article_main_namespace(title, ns):~~
	256	+ if verify_article_belongs_namespace(title, ns):
254	257	#cElementTree.dump(page)
255	258	article_id = page.find('id').text
256	259	revisions = page.findall('revision')
Index: trunk/tools/editor_trends/etl/shaper.py
—	—	@@ -1,3 +1,16 @@
	2	+#!/usr/bin/python
	3	+# -- coding: utf-8 --
	4	+'''
	5	+Copyright (C) 2010 by Diederik van Liere (dvanliere@gmail.com)
	6	+This program is free software; you can redistribute it and/or
	7	+modify it under the terms of the GNU General Public License version 2
	8	+as published by the Free Software Foundation.
	9	+This program is distributed in the hope that it will be useful,
	10	+but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
	12	+See the GNU General Public License for more details, at
	13	+http://www.fsf.org/licenses/gpl.html
	14	+'''
2	15
3	16
4	17	__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ])
—	—	@@ -27,7 +40,7 @@
28	41	d[i] = 0.0
29	42	return d
30	43
31		~~-def create_datacontainer(datatype):~~
	44	+def create_datacontainer(datatype='dict'):
32	45	'''
33	46	This function initializes an empty dictionary with as key the year (starting
34	47	2001 and running through) and as value @datatype, in most cases this will
Index: trunk/tools/editor_trends/etl/sort.py
—	—	@@ -162,10 +162,9 @@
163	163
164	164	if __name__ == '__main__':
165	165	input = os.path.join(settings.input_location, 'en', 'wiki', 'txt')
166		~~- intermediate_output = os.path.join(settings.input_location, 'en', 'wiki', 'sorted')~~
167		~~- output = os.path.join(settings.input_location, 'en', 'wiki', 'dbready')~~
	166	+ output = os.path.join(settings.input_location, 'en', 'wiki', 'sorted')
168	167	dbname = 'enwiki'
169	168	collection = 'editors'
170		~~- mergesort_launcher(input, intermediate_output)~~
	169	+ mergesort_launcher(input, output)
171	170	#mergesort_external_launcher(intermediate_output, output)
172	171	#num_editors = store_editors(output, dbname, collection)
Index: trunk/tools/editor_trends/config.py
—	—	@@ -24,15 +24,14 @@
25	25	from utils import utils
26	26	import languages
27	27
	28	+
28	29	def show_choices(settings, attr):
29	30	choices = getattr(settings, attr).items()
30	31	choices.sort()
31	32	choices = ['%s\t%s' % (choice[0], choice[1]) for choice in choices]
32		~~- #print '\n'.join(choices)~~
33	33	return choices
34		~~- #for choice in choices:~~
35		~~- # print '%s\t%s' % (choice[0], choice[1])~~
36	34
	35	+
37	36	def create_configuration(settings, args):
38	37	force = getattr(args, 'force', False)
39	38	if not os.path.exists('wiki.cfg') or force:
—	—	@@ -62,9 +61,6 @@
63	62	if len(dumpversion) == 0:
64	63	dumpversion = settings.dumpversions['0']
65	64
66		-
67		~~- #dumpversion = dumpversion if dumpversion in settings.dumpversions.keys() else args.dumpversion~~
68		-
69	65	dumpversion = settings.dumpversions[dumpversion]
70	66	input_location = input_location if len(input_location) > 0 else settings.input_location
71	67	working_directory = working_directory if len(working_directory) > 0 else os.getcwd()
Index: trunk/tools/editor_trends/languages.py
—	—	@@ -18,595 +18,596 @@
19	19	__version__ = '0.1'
20	20
21	21	'''
22		~~-This file provides mapper between language name and locale language name and~~
23		~~-Wikipedia acronym.~~
24		~~-Gothic and Birmese are not yet supported, see rows 450 and 554.~~
	22	+This file provides mapper between language name and locale language name and
	23	+Wikipedia acronym.
	24	+Gothic and Birmese are not yet supported, see rows 450 and 554.
25	25	'''
26	26
27	27	from utils import ordered_dict as odict
28	28	from utils import utils
29	29
30	30	MAPPING = odict.OrderedDict([
31		~~-(u'English','en'),~~
32		~~-(u'German','de'),~~
33		~~-(u'French','fr'),~~
34		~~-(u'Italian','it'),~~
35		~~-(u'Polish','pl'),~~
36		~~-(u'Japanese','ja'),~~
37		~~-(u'Spanish','es'),~~
38		~~-(u'Dutch','nl'),~~
39		~~-(u'Portuguese','pt'),~~
40		~~-(u'Russian','ru'),~~
41		~~-(u'Swedish','sv'),~~
42		~~-(u'Chinese','zh'),~~
43		~~-(u'Catalan','ca'),~~
44		~~-(u'Norwegian','no'),~~
45		~~-(u'Bokmål','no'),~~
46		~~-(u'Finnish','fi'),~~
47		~~-(u'Ukrainian','uk'),~~
48		~~-(u'Hungarian','hu'),~~
49		~~-(u'Czech','cs'),~~
50		~~-(u'Romanian','ro'),~~
51		~~-(u'Turkish','tr'),~~
52		~~-(u'Korean','ko'),~~
53		~~-(u'Vietnamese','vi'),~~
54		~~-(u'Danish','da'),~~
55		~~-(u'Arabic','ar'),~~
56		~~-(u'Esperanto','eo'),~~
57		~~-(u'Serbian','sr'),~~
58		~~-(u'Indonesian','id'),~~
59		~~-(u'Lithuanian','lt'),~~
60		~~-(u'Volapük','vo'),~~
61		~~-(u'Slovak','sk'),~~
62		~~-(u'Hebrew','he'),~~
63		~~-(u'Bulgarian','bg'),~~
64		~~-(u'Persian','fa'),~~
65		~~-(u'Slovenian','sl'),~~
66		~~-(u'Waray-Waray','war'),~~
67		~~-(u'Croatian','hr'),~~
68		~~-(u'Estonian','et'),~~
69		~~-(u'Malay','ms'),~~
70		~~-(u'Newar','new'),~~
71		~~-(u'Nepal Bhasa','new'),~~
72		~~-(u'Simple English','simple'),~~
73		~~-(u'Galician','gl'),~~
74		~~-(u'Thai','th'),~~
75		~~-(u'Aromanian','roa-rup'),~~
76		~~-(u'Nynorsk','nn'),~~
77		~~-(u'Basque','eu'),~~
78		~~-(u'Hindi','hi'),~~
79		~~-(u'Greek','el'),~~
80		~~-(u'Haitian','ht'),~~
81		~~-(u'Latin','la'),~~
82		~~-(u'Telugu','te'),~~
83		~~-(u'Georgian','ka'),~~
84		~~-(u'Cebuano','ceb'),~~
85		~~-(u'Macedonian','mk'),~~
86		~~-(u'Azeri','az'),~~
87		~~-(u'Tagalog','tl'),~~
88		~~-(u'Breton','br'),~~
89		~~-(u'Serbo-Croatian','sh'),~~
90		~~-(u'Marathi','mr'),~~
91		~~-(u'Luxembourgish','lb'),~~
92		~~-(u'Javanese','jv'),~~
93		~~-(u'Latvian','lv'),~~
94		~~-(u'Bosnian','bs'),~~
95		~~-(u'Icelandic','is'),~~
96		~~-(u'Welsh','cy'),~~
97		~~-(u'Belarusian','be-x-old'),~~
98		~~-(u'Taraškievica','be-x-old'),~~
99		~~-(u'Piedmontese','pms'),~~
100		~~-(u'Albanian','sq'),~~
101		~~-(u'Tamil','ta'),~~
102		~~-(u'Bishnupriya Manipuri','bpy'),~~
103		~~-(u'Belarusian','be'),~~
104		~~-(u'Aragonese','an'),~~
105		~~-(u'Occitan','oc'),~~
106		~~-(u'Bengali','bn'),~~
107		~~-(u'Swahili','sw'),~~
108		~~-(u'Ido','io'),~~
109		~~-(u'Ripuarian','ksh'),~~
110		~~-(u'Lombard','lmo'),~~
111		~~-(u'West Frisian','fy'),~~
112		~~-(u'Gujarati','gu'),~~
113		~~-(u'Low Saxon','nds'),~~
114		~~-(u'Afrikaans','af'),~~
115		~~-(u'Sicilian','scn'),~~
116		~~-(u'Quechua','qu'),~~
117		~~-(u'Kurdish','ku'),~~
118		~~-(u'Urdu','ur'),~~
119		~~-(u'Sundanese','su'),~~
120		~~-(u'Malayalam','ml'),~~
121		~~-(u'Cantonese','zh-yue'),~~
122		~~-(u'Asturian','ast'),~~
123		~~-(u'Neapolitan','nap'),~~
124		~~-(u'Samogitian','bat-smg'),~~
125		~~-(u'Walloon','wa'),~~
126		~~-(u'Chuvash','cv'),~~
127		~~-(u'Irish','ga'),~~
128		~~-(u'Armenian','hy'),~~
129		~~-(u'Yoruba','yo'),~~
130		~~-(u'Kannada','kn'),~~
131		~~-(u'Tajik','tg'),~~
132		~~-(u'Tarantino','roa-tara'),~~
133		~~-(u'Venetian','vec'),~~
134		~~-(u'Western Panjabi','pnb'),~~
135		~~-(u'Nepali','ne'),~~
136		~~-(u'Scottish Gaelic','gd'),~~
137		~~-(u'Yiddish','yi'),~~
138		~~-(u'Min Nan','zh-min-nan'),~~
139		~~-(u'Uzbek','uz'),~~
140		~~-(u'Tatar','tt'),~~
141		~~-(u'Kapampangan','pam'),~~
142		~~-(u'Ossetian','os'),~~
143		~~-(u'Sakha','sah'),~~
144		~~-(u'Alemannic','als'),~~
145		~~-(u'Maori','mi'),~~
146		~~-(u'Egyptian Arabic','arz'),~~
147		~~-(u'Kazakh','kk'),~~
148		~~-(u'Nahuatl','nah'),~~
149		~~-(u'Limburgian','li'),~~
150		~~-(u'Upper Sorbian','hsb'),~~
151		~~-(u'Gilaki','glk'),~~
152		~~-(u'Corsican','co'),~~
153		~~-(u'Gan','gan'),~~
154		~~-(u'Amharic','am'),~~
155		~~-(u'Mongolian','mn'),~~
156		~~-(u'Interlingua','ia'),~~
157		~~-(u'Central Bicolano','bcl'),~~
158		~~-(u'Võro','fiu-vro'),~~
159		~~-(u'Dutch Low Saxon','nds-nl'),~~
160		~~-(u'Faroese','fo'),~~
161		~~-(u'Turkmen','tk'),~~
162		~~-(u'Scots','sco'),~~
163		~~-(u'West Flemish','vls'),~~
164		~~-(u'Sinhalese','si'),~~
165		~~-(u'Sanskrit','sa'),~~
166		~~-(u'Bavarian','bar'),~~
167		~~-(u'Burmese','my'),~~
168		~~-(u'Manx','gv'),~~
169		~~-(u'Divehi','dv'),~~
170		~~-(u'Norman','nrm'),~~
171		~~-(u'Pangasinan','pag'),~~
172		~~-(u'Romansh','rm'),~~
173		~~-(u'Banyumasan','map-bms'),~~
174		~~-(u'Zazaki','diq'),~~
175		~~-(u'Sorani','ckb'),~~
176		~~-(u'Northern Sami','se'),~~
177		~~-(u'Mazandarani','mzn'),~~
178		~~-(u'Wu','wuu'),~~
179		~~-(u'Uyghur','ug'),~~
180		~~-(u'Friulian','fur'),~~
181		~~-(u'Ligurian','lij'),~~
182		~~-(u'Maltese','mt'),~~
183		~~-(u'Bihari','bh'),~~
184		~~-(u'Novial','nov'),~~
185		~~-(u'Malagasy','mg'),~~
186		~~-(u'Kashubian','csb'),~~
187		~~-(u'Ilokano','ilo'),~~
188		~~-(u'Sardinian','sc'),~~
189		~~-(u'Classical Chinese','zh-classical'),~~
190		~~-(u'Khmer','km'),~~
191		~~-(u'Ladino','lad'),~~
192		~~-(u'Pali','pi'),~~
193		~~-(u'Anglo-Saxon','ang'),~~
194		~~-(u'Zamboanga Chavacano','cbk-zam'),~~
195		~~-(u'Tibetan','bo'),~~
196		~~-(u'Fiji Hindi','hif'),~~
197		~~-(u'Franco-Provençal','frp'),~~
198		~~-(u'Arpitan','frp'),~~
199		~~-(u'Hakka','hak'),~~
200		~~-(u'Cornish','kw'),~~
201		~~-(u'Punjabi','pa'),~~
202		~~-(u'Pashto','ps'),~~
203		~~-(u'Kalmyk','xal'),~~
204		~~-(u'Silesian','szl'),~~
205		~~-(u'Pennsylvania German','pdc'),~~
206		~~-(u'Hawaiian','haw'),~~
207		~~-(u'Saterland Frisian','stq'),~~
208		~~-(u'Interlingue','ie'),~~
209		~~-(u'Navajo','nv'),~~
210		~~-(u'Fijian','fj'),~~
211		~~-(u'Crimean Tatar','crh'),~~
212		~~-(u'Komi','kv'),~~
213		~~-(u'Tongan','to'),~~
214		~~-(u'Acehnese','ace'),~~
215		~~-(u'Somali','so'),~~
216		~~-(u'Erzya','myv'),~~
217		~~-(u'Guarani','gn'),~~
218		~~-(u'Karachay-Balkar','krc'),~~
219		~~-(u'Extremaduran','ext'),~~
220		~~-(u'Lingala','ln'),~~
221		~~-(u'Kirghiz','ky'),~~
222		~~-(u'Meadow Mari','mhr'),~~
223		~~-(u'Assyrian Neo-Aramaic','arc'),~~
224		~~-(u'Emilian-Romagnol','eml'),~~
225		~~-(u'Lojban','jbo'),~~
226		~~-(u'Picard','pcd'),~~
227		~~-(u'Aymara','ay'),~~
228		~~-(u'Wolof','wo'),~~
229		~~-(u'Tumbuka','tum'),~~
230		~~-(u'Kabyle','kab'),~~
231		~~-(u'Bashkir','ba'),~~
232		~~-(u'North Frisian','frr'),~~
233		~~-(u'Tahitian','ty'),~~
234		~~-(u'Tok Pisin','tpi'),~~
235		~~-(u'Papiamentu','pap'),~~
236		~~-(u'Zealandic','zea'),~~
237		~~-(u'Sranan','srn'),~~
238		~~-(u'Greenlandic','kl'),~~
239		~~-(u'Udmurt','udm'),~~
240		~~-(u'Chechen','ce'),~~
241		~~-(u'Igbo','ig'),~~
242		~~-(u'Komi-Permyak','koi'),~~
243		~~-(u'Oriya','or'),~~
244		~~-(u'Lower Sorbian','dsb'),~~
245		~~-(u'Kongo','kg'),~~
246		~~-(u'Lao','lo'),~~
247		~~-(u'Abkhazian','ab'),~~
248		~~-(u'Moksha','mdf'),~~
249		~~-(u'Romani','rmy'),~~
250		~~-(u'Hill Mari','mrj'),~~
251		~~-(u'Banjar','bjn'),~~
252		~~-(u'Old Church Slavonic','cu'),~~
253		~~-(u'Mirandese','mwl'),~~
254		~~-(u'Karakalpak','kaa'),~~
255		~~-(u'Samoan','sm'),~~
256		~~-(u'Moldovan','mo'),~~
257		~~-(u'Tetum','tet'),~~
258		~~-(u'Avar','av'),~~
259		~~-(u'Kashmiri','ks'),~~
260		~~-(u'Gothic','got'),~~
261		~~-(u'Sindhi','sd'),~~
262		~~-(u'Bambara','bm'),~~
263		~~-(u'Nauruan','na'),~~
264		~~-(u'Norfolk','pih'),~~
265		~~-(u'Pontic','pnt'),~~
266		~~-(u'Inuktitut','iu'),~~
267		~~-(u'Inupiak','ik'),~~
268		~~-(u'Bislama','bi'),~~
269		~~-(u'Cherokee','chr'),~~
270		~~-(u'Assamese','as'),~~
271		~~-(u'Min Dong','cdo'),~~
272		~~-(u'Ewe','ee'),~~
273		~~-(u'Swati','ss'),~~
274		~~-(u'Oromo','om'),~~
275		~~-(u'Zhuang','za'),~~
276		~~-(u'Zulu','zu'),~~
277		~~-(u'Tigrinya','ti'),~~
278		~~-(u'Venda','ve'),~~
279		~~-(u'Tsonga','ts'),~~
280		~~-(u'Hausa','ha'),~~
281		~~-(u'Dzongkha','dz'),~~
282		~~-(u'Sango','sg'),~~
283		~~-(u'Chamorro','ch'),~~
284		~~-(u'Cree','cr'),~~
285		~~-(u'Xhosa','xh'),~~
286		~~-(u'Akan','ak'),~~
287		~~-(u'Sesotho','st'),~~
288		~~-(u'Kinyarwanda','rw'),~~
289		~~-(u'Tswana','tn'),~~
290		~~-(u'Kikuyu','ki'),~~
291		~~-(u'Buryat','bxr'),~~
292		~~-(u'Buginese','bug'),~~
293		~~-(u'Chichewa','ny'),~~
294		~~-(u'Lak','lbe'),~~
295		~~-(u'Twi','tw'),~~
296		~~-(u'Shona','sn'),~~
297		~~-(u'Kirundi','rn'),~~
298		~~-(u'Fula','ff'),~~
299		~~-(u'Cheyenne','chy'),~~
300		~~-(u'Luganda','lg'),~~
301		~~-(u'Ndonga','ng'),~~
302		~~-(u'Sichuan Yi','ii'),~~
303		~~-(u'Choctaw','cho'),~~
304		~~-(u'Marshallese','mh'),~~
305		~~-(u'Afar','aa'),~~
306		~~-(u'Kuanyama','kj'),~~
307		~~-(u'Hiri Motu','ho'),~~
308		~~-(u'Muscogee','mus'),~~
309		~~-(u'Kanuri','kr'),~~
310		~~-(u'Herero','hz'),~~
311		~~-(u'English','en'),~~
312		~~-(u'Deutsch','de'),~~
313		~~-(u'Français','fr'),~~
314		~~-(u'Italiano','it'),~~
315		~~-(u'Polski','pl'),~~
316		~~-(u'日本語','ja'),~~
317		~~-(u'Español','es'),~~
318		~~-(u'Nederlands','nl'),~~
319		~~-(u'Português','pt'),~~
320		~~-(u'Русский','ru'),~~
321		~~-(u'Svenska','sv'),~~
322		~~-(u'中文','zh'),~~
323		~~-(u'Català','ca'),~~
324		~~-(u'Norsk','no'),~~
325		~~-(u'Bokmål','no'),~~
326		~~-(u'Suomi','fi'),~~
327		~~-(u'Українська','uk'),~~
328		~~-(u'Magyar','hu'),~~
329		~~-(u'Čeština','cs'),~~
330		~~-(u'Română','ro'),~~
331		~~-(u'Türkçe','tr'),~~
332		~~-(u'한국어','ko'),~~
333		~~-(u'Tiếng Việt','vi'),~~
334		~~-(u'Dansk','da'),~~
335		~~-(u'العربية','ar'),~~
336		~~-(u'Esperanto','eo'),~~
337		~~-(u'Српски','sr'),~~
338		~~-(u'Srpski','sr'),~~
339		~~-(u'Bahasa Indonesia','id'),~~
340		~~-(u'Lietuvių','lt'),~~
341		~~-(u'Volapük','vo'),~~
342		~~-(u'Slovenčina','sk'),~~
343		~~-(u'עברית','he'),~~
344		~~-(u'Български','bg'),~~
345		~~-(u'فارسی','fa'),~~
346		~~-(u'Slovenščina','sl'),~~
347		~~-(u'Winaray','war'),~~
348		~~-(u'Hrvatski','hr'),~~
349		~~-(u'Eesti','et'),~~
350		~~-(u'Bahasa Melayu','ms'),~~
351		~~-(u'नेपाल भाषा','new'),~~
352		~~-(u'Simple English','simple'),~~
353		~~-(u'Galego','gl'),~~
354		~~-(u'ไทย','th'),~~
355		~~-(u'Armãneashce','roa-rup'),~~
356		~~-(u'Nynorsk','nn'),~~
357		~~-(u'Euskara','eu'),~~
358		~~-(u'हिन्दी','hi'),~~
359		~~-(u'Ελληνικά','el'),~~
360		~~-(u'Krèyol ayisyen','ht'),~~
361		~~-(u'Latina','la'),~~
362		~~-(u'తెలుగు','te'),~~
363		~~-(u'ქართული','ka'),~~
364		~~-(u'Sinugboanong Binisaya','ceb'),~~
365		~~-(u'Македонски','mk'),~~
366		~~-(u'Azərbaycan','az'),~~
367		~~-(u'Tagalog','tl'),~~
368		~~-(u'Brezhoneg','br'),~~
369		~~-(u'Srpskohrvatski','sh'),~~
370		~~-(u'Српскохрватски','sh'),~~
371		~~-(u'मराठी','mr'),~~
372		~~-(u'Lëtzebuergesch','lb'),~~
373		~~-(u'Basa Jawa','jv'),~~
374		~~-(u'Latviešu','lv'),~~
375		~~-(u'Bosanski','bs'),~~
376		~~-(u'Íslenska','is'),~~
377		~~-(u'Cymraeg','cy'),~~
378		~~-(u'Беларуская','be-x-old'),~~
379		~~-(u'тарашкевіца','be-x-old'),~~
380		~~-(u'Piemontèis','pms'),~~
381		~~-(u'Shqip','sq'),~~
382		~~-(u'தமிழ்','ta'),~~
383		~~-(u'ইমার ঠার','bpy'),~~
384		~~-(u'বিষ্ণুপ্রিয়া মণিপুরী','bpy'),~~
385		~~-(u'Беларуская','be'),~~
386		~~-(u'Aragonés','an'),~~
387		~~-(u'Occitan','oc'),~~
388		~~-(u'বাংলা','bn'),~~
389		~~-(u'Kiswahili','sw'),~~
390		~~-(u'Ido','io'),~~
391		~~-(u'Ripoarisch','ksh'),~~
392		~~-(u'Lumbaart','lmo'),~~
393		~~-(u'Frysk','fy'),~~
394		~~-(u'ગુજરાતી','gu'),~~
395		~~-(u'Plattdüütsch','nds'),~~
396		~~-(u'Afrikaans','af'),~~
397		~~-(u'Sicilianu','scn'),~~
398		~~-(u'Runa Simi','qu'),~~
399		~~-(u'Kurdî','ku'),~~
400		~~-(u'كوردی','ku'),~~
401		~~-(u'اردو','ur'),~~
402		~~-(u'Basa Sunda','su'),~~
403		~~-(u'മലയാളം','ml'),~~
404		~~-(u'粵語','zh-yue'),~~
405		~~-(u'Asturianu','ast'),~~
406		~~-(u'Nnapulitano','nap'),~~
407		~~-(u'Žemaitėška','bat-smg'),~~
408		~~-(u'Walon','wa'),~~
409		~~-(u'Чăваш','cv'),~~
410		~~-(u'Gaeilge','ga'),~~
411		~~-(u'Հայերեն','hy'),~~
412		~~-(u'Yorùbá','yo'),~~
413		~~-(u'ಕನ್ನಡ','kn'),~~
414		~~-(u'Тоҷикӣ','tg'),~~
415		~~-(u'Tarandíne','roa-tara'),~~
416		~~-(u'Vèneto','vec'),~~
417		~~-(u'شاہ مکھی پنجابی','pnb'),~~
418		~~-(u'Shāhmukhī Pañjābī','pnb'),~~
419		~~-(u'नेपाली','ne'),~~
420		~~-(u'Gàidhlig','gd'),~~
421		~~-(u'ייִדיש','yi'),~~
422		~~-(u'Bân-lâm-gú','zh-min-nan'),~~
423		~~-(u'O‘zbek','uz'),~~
424		~~-(u'Tatarça','tt'),~~
425		~~-(u'Татарча','tt'),~~
426		~~-(u'Kapampangan','pam'),~~
427		~~-(u'Иронау','os'),~~
428		~~-(u'Саха тыла','sah'),~~
429		~~-(u'Saxa Tyla','sah'),~~
430		~~-(u'Alemannisch','als'),~~
431		~~-(u'Māori','mi'),~~
432		~~-(u'مصرى','arz'),~~
433		~~-(u'Maṣrī','arz'),~~
434		~~-(u'Қазақша','kk'),~~
435		~~-(u'Nāhuatl','nah'),~~
436		~~-(u'Limburgs','li'),~~
437		~~-(u'Hornjoserbsce','hsb'),~~
438		~~-(u'گیلکی','glk'),~~
439		~~-(u'Corsu','co'),~~
440		~~-(u'贛語','gan'),~~
441		~~-(u'አማርኛ','am'),~~
442		~~-(u'Монгол','mn'),~~
443		~~-(u'Interlingua','ia'),~~
444		~~-(u'Bikol','bcl'),~~
445		~~-(u'Võro','fiu-vro'),~~
446		~~-(u'Nedersaksisch','nds-nl'),~~
447		~~-(u'Føroyskt','fo'),~~
448		~~-(u'تركمن ','tk'),~~
449		~~-(u'Туркмен','tk'),~~
450		~~-(u'Scots','sco'),~~
451		~~-(u'West-Vlams','vls'),~~
452		~~-(u'සිංහල','si'),~~
453		~~-(u'संस्कृतम्','sa'),~~
454		~~-(u'Boarisch','bar'),~~
455		~~-(u'မ္ရန္‌မာစာ','my'), #Needs fix~~
456		~~-(u'Gaelg','gv'),~~
457		~~-(u'ދިވެހިބަސް','dv'),~~
458		~~-(u'Nouormand','nrm'),~~
459		~~-(u'Normaund','nrm'),~~
460		~~-(u'Pangasinan','pag'),~~
461		~~-(u'Rumantsch','rm'),~~
462		~~-(u'Basa Banyumasan','map-bms'),~~
463		~~-(u'Zazaki','diq'),~~
464		~~-(u'Soranî','ckb'),~~
465		~~-(u'کوردی','ckb'),~~
466		~~-(u'Sámegiella','se'),~~
467		~~-(u'مَزِروني','mzn'),~~
468		~~-(u'吴语','wuu'),~~
469		~~-(u'Oyghurque','ug'),~~
470		~~-(u'Furlan','fur'),~~
471		~~-(u'Líguru','lij'),~~
472		~~-(u'Malti','mt'),~~
473		~~-(u'भोजपुरी','bh'),~~
474		~~-(u'Novial','nov'),~~
475		~~-(u'Malagasy','mg'),~~
476		~~-(u'Kaszëbsczi','csb'),~~
477		~~-(u'Ilokano','ilo'),~~
478		~~-(u'Sardu','sc'),~~
479		~~-(u'古文','zh-classical'),~~
480		~~-(u'文言文','zh-classical'),~~
481		~~-(u'ភាសាខ្មែរ','km'),~~
482		~~-(u'Dzhudezmo','lad'),~~
483		~~-(u'पाऴि','pi'),~~
484		~~-(u'Englisc','ang'),~~
485		~~-(u'Chavacano de Zamboanga','cbk-zam'),~~
486		~~-(u'བོད་སྐད','bo'),~~
487		~~-(u'Fiji Hindi','hif'),~~
488		~~-(u'Arpitan','frp'),~~
489		~~-(u'Hak-kâ-fa','hak'),~~
490		~~-(u'客家話','hak'),~~
491		~~-(u'Kernewek','kw'),~~
492		~~-(u'Karnuack','kw'),~~
493		~~-(u'ਪੰਜਾਬੀ','pa'),~~
494		~~-(u'پښتو','ps'),~~
495		~~-(u'Хальмг','xal'),~~
496		~~-(u'Ślůnski','szl'),~~
497		~~-(u'Deitsch','pdc'),~~
498		~~-(u'Hawai`i','haw'),~~
499		~~-(u'Seeltersk','stq'),~~
500		~~-(u'Interlingue','ie'),~~
501		~~-(u'Diné bizaad','nv'),~~
502		~~-(u'Na Vosa Vakaviti','fj'),~~
503		~~-(u'Qırımtatarca','crh'),~~
504		~~-(u'Коми','kv'),~~
505		~~-(u'faka Tonga','to'),~~
506		~~-(u'Bahsa Acèh','ace'),~~
507		~~-(u'Soomaaliga','so'),~~
508		~~-(u'Эрзянь','myv'),~~
509		~~-(u'Erzjanj Kelj','myv'),~~
510		~~-(u"Avañe'ẽ",'gn'),~~
511		~~-(u'Къарачай-Малкъар','krc'),~~
512		~~-(u'Qarachay-Malqar','krc'),~~
513		~~-(u'Estremeñu','ext'),~~
514		~~-(u'Lingala','ln'),~~
515		~~-(u'Кыргызча','ky'),~~
516		~~-(u'Олык Марий','mhr'),~~
517		~~-(u'Olyk Marij','mhr'),~~
518		~~-(u'ܐܪܡܝܐ','arc'),~~
519		~~-(u'Emiliàn e rumagnòl','eml'),~~
520		~~-(u'Lojban','jbo'),~~
521		~~-(u'Picard','pcd'),~~
522		~~-(u'Aymar','ay'),~~
523		~~-(u'Wolof','wo'),~~
524		~~-(u'chiTumbuka','tum'),~~
525		~~-(u'Taqbaylit','kab'),~~
526		~~-(u'Башҡорт','ba'),~~
527		~~-(u'Frasch','frr'),~~
528		~~-(u'Reo Mā`ohi','ty'),~~
529		~~-(u'Tok Pisin','tpi'),~~
530		~~-(u'Papiamentu','pap'),~~
531		~~-(u'Zeêuws','zea'),~~
532		~~-(u'Sranantongo','srn'),~~
533		~~-(u'Kalaallisut','kl'),~~
534		~~-(u'Удмурт кыл','udm'),~~
535		~~-(u'Нохчийн','ce'),~~
536		~~-(u'Igbo','ig'),~~
537		~~-(u'Перем Коми','koi'),~~
538		~~-(u'Perem Komi','koi'),~~
539		~~-(u'ଓଡ଼ିଆ','or'),~~
540		~~-(u'Dolnoserbski','dsb'),~~
541		~~-(u'KiKongo','kg'),~~
542		~~-(u'ລາວ','lo'),~~
543		~~-(u'Аҧсуа','ab'),~~
544		~~-(u'Мокшень','mdf'),~~
545		~~-(u'Mokshanj Kälj','mdf'),~~
546		~~-(u'romani - रोमानी','rmy'),~~
547		~~-(u'Кырык Мары','mrj'),~~
548		~~-(u'Kyryk Mary','mrj'),~~
549		~~-(u'Bahasa Banjar','bjn'),~~
550		~~-(u'Словѣньскъ','cu'),~~
551		~~-(u'Páigina Percipal','mwl'),~~
552		~~-(u'Qaraqalpaqsha','kaa'),~~
553		~~-(u'Gagana Samoa','sm'),~~
554		~~-(u'Молдовеняскэ','mo'),~~
555		~~-(u'Tetun','tet'),~~
556		~~-(u'Авар','av'),~~
557		~~-(u'कश्मीरी','ks'),~~
558		~~-(u'كشميري','ks'),~~
559		~~-(u'𐌲𐌿𐍄𐌹𐍃𐌺','got'), #Needs fix~~
560		~~-(u'سنڌي، سندھی ، सिन्ध','sd'),~~
561		~~-(u'Bamanankan','bm'),~~
562		~~-(u'dorerin Naoero','na'),~~
563		~~-(u'Norfuk','pih'),~~
564		~~-(u'Ποντιακά','pnt'),~~
565		~~-(u'ᐃᓄᒃᑎᑐᑦ','iu'),~~
566		~~-(u'Iñupiak','ik'),~~
567		~~-(u'Bislama','bi'),~~
568		~~-(u'ᏣᎳᎩ','chr'),~~
569		~~-(u'অসমীয়া','as'),~~
570		~~-(u'Mìng-dĕ̤ng-ngṳ̄','cdo'),~~
571		~~-(u'Eʋegbe','ee'),~~
572		~~-(u'SiSwati','ss'),~~
573		~~-(u'Oromoo','om'),~~
574		~~-(u'Cuengh','za'),~~
575		~~-(u'isiZulu','zu'),~~
576		~~-(u'ትግርኛ','ti'),~~
577		~~-(u'Tshivenda','ve'),~~
578		~~-(u'Xitsonga','ts'),~~
579		~~-(u'هَوُسَ','ha'),~~
580		~~-(u'ཇོང་ཁ','dz'),~~
581		~~-(u'Sängö','sg'),~~
582		~~-(u'Chamoru','ch'),~~
583		~~-(u'Nehiyaw','cr'),~~
584		~~-(u'isiXhosa','xh'),~~
585		~~-(u'Akana','ak'),~~
586		~~-(u'Sesotho','st'),~~
587		~~-(u'Ikinyarwanda','rw'),~~
588		~~-(u'Setswana','tn'),~~
589		~~-(u'Gĩkũyũ','ki'),~~
590		~~-(u'Буряад','bxr'),~~
591		~~-(u'Basa Ugi','bug'),~~
592		~~-(u'Chi-Chewa','ny'),~~
593		~~-(u'Лакку','lbe'),~~
594		~~-(u'Twi','tw'),~~
595		~~-(u'chiShona','sn'),~~
596		~~-(u'Kirundi','rn'),~~
597		~~-(u'Fulfulde','ff'),~~
598		~~-(u'Tsetsêhestâhese','chy'),~~
599		~~-(u'Luganda','lg'),~~
600		~~-(u'Oshiwambo','ng'),~~
601		~~-(u'ꆇꉙ','ii'),~~
602		~~-(u'Choctaw','cho'),~~
603		~~-(u'Ebon','mh'),~~
604		~~-(u'Afar','aa'),~~
605		~~-(u'Kuanyama','kj'),~~
606		~~-(u'Hiri Motu','ho'),~~
607		~~-(u'Muskogee','mus'),~~
608		~~-(u'Kanuri','kr'),~~
609		~~-(u'Otsiherero','hz'),~~
	31	+(u'English', 'en'),
	32	+(u'German', 'de'),
	33	+(u'French', 'fr'),
	34	+(u'Italian', 'it'),
	35	+(u'Polish', 'pl'),
	36	+(u'Japanese', 'ja'),
	37	+(u'Spanish', 'es'),
	38	+(u'Dutch', 'nl'),
	39	+(u'Portuguese', 'pt'),
	40	+(u'Russian', 'ru'),
	41	+(u'Swedish', 'sv'),
	42	+(u'Chinese', 'zh'),
	43	+(u'Catalan', 'ca'),
	44	+(u'Norwegian', 'no'),
	45	+(u'Bokmål', 'no'),
	46	+(u'Finnish', 'fi'),
	47	+(u'Ukrainian', 'uk'),
	48	+(u'Hungarian', 'hu'),
	49	+(u'Czech', 'cs'),
	50	+(u'Romanian', 'ro'),
	51	+(u'Turkish', 'tr'),
	52	+(u'Korean', 'ko'),
	53	+(u'Vietnamese', 'vi'),
	54	+(u'Danish', 'da'),
	55	+(u'Arabic', 'ar'),
	56	+(u'Esperanto', 'eo'),
	57	+(u'Serbian', 'sr'),
	58	+(u'Indonesian', 'id'),
	59	+(u'Lithuanian', 'lt'),
	60	+(u'Volapük', 'vo'),
	61	+(u'Slovak', 'sk'),
	62	+(u'Hebrew', 'he'),
	63	+(u'Bulgarian', 'bg'),
	64	+(u'Persian', 'fa'),
	65	+(u'Slovenian', 'sl'),
	66	+(u'Waray-Waray', 'war'),
	67	+(u'Croatian', 'hr'),
	68	+(u'Estonian', 'et'),
	69	+(u'Malay', 'ms'),
	70	+(u'Newar', 'new'),
	71	+(u'Nepal Bhasa', 'new'),
	72	+(u'Simple English', 'simple'),
	73	+(u'Galician', 'gl'),
	74	+(u'Thai', 'th'),
	75	+(u'Aromanian', 'roa-rup'),
	76	+(u'Nynorsk', 'nn'),
	77	+(u'Basque', 'eu'),
	78	+(u'Hindi', 'hi'),
	79	+(u'Greek', 'el'),
	80	+(u'Haitian', 'ht'),
	81	+(u'Latin', 'la'),
	82	+(u'Telugu', 'te'),
	83	+(u'Georgian', 'ka'),
	84	+(u'Cebuano', 'ceb'),
	85	+(u'Macedonian', 'mk'),
	86	+(u'Azeri', 'az'),
	87	+(u'Tagalog', 'tl'),
	88	+(u'Breton', 'br'),
	89	+(u'Serbo-Croatian', 'sh'),
	90	+(u'Marathi', 'mr'),
	91	+(u'Luxembourgish', 'lb'),
	92	+(u'Javanese', 'jv'),
	93	+(u'Latvian', 'lv'),
	94	+(u'Bosnian', 'bs'),
	95	+(u'Icelandic', 'is'),
	96	+(u'Welsh', 'cy'),
	97	+(u'Belarusian', 'be-x-old'),
	98	+(u'Taraškievica', 'be-x-old'),
	99	+(u'Piedmontese', 'pms'),
	100	+(u'Albanian', 'sq'),
	101	+(u'Tamil', 'ta'),
	102	+(u'Bishnupriya Manipuri', 'bpy'),
	103	+(u'Belarusian', 'be'),
	104	+(u'Aragonese', 'an'),
	105	+(u'Occitan', 'oc'),
	106	+(u'Bengali', 'bn'),
	107	+(u'Swahili', 'sw'),
	108	+(u'Ido', 'io'),
	109	+(u'Ripuarian', 'ksh'),
	110	+(u'Lombard', 'lmo'),
	111	+(u'West Frisian', 'fy'),
	112	+(u'Gujarati', 'gu'),
	113	+(u'Low Saxon', 'nds'),
	114	+(u'Afrikaans', 'af'),
	115	+(u'Sicilian', 'scn'),
	116	+(u'Quechua', 'qu'),
	117	+(u'Kurdish', 'ku'),
	118	+(u'Urdu', 'ur'),
	119	+(u'Sundanese', 'su'),
	120	+(u'Malayalam', 'ml'),
	121	+(u'Cantonese', 'zh-yue'),
	122	+(u'Asturian', 'ast'),
	123	+(u'Neapolitan', 'nap'),
	124	+(u'Samogitian', 'bat-smg'),
	125	+(u'Walloon', 'wa'),
	126	+(u'Chuvash', 'cv'),
	127	+(u'Irish', 'ga'),
	128	+(u'Armenian', 'hy'),
	129	+(u'Yoruba', 'yo'),
	130	+(u'Kannada', 'kn'),
	131	+(u'Tajik', 'tg'),
	132	+(u'Tarantino', 'roa-tara'),
	133	+(u'Venetian', 'vec'),
	134	+(u'Western Panjabi', 'pnb'),
	135	+(u'Nepali', 'ne'),
	136	+(u'Scottish Gaelic', 'gd'),
	137	+(u'Yiddish', 'yi'),
	138	+(u'Min Nan', 'zh-min-nan'),
	139	+(u'Uzbek', 'uz'),
	140	+(u'Tatar', 'tt'),
	141	+(u'Kapampangan', 'pam'),
	142	+(u'Ossetian', 'os'),
	143	+(u'Sakha', 'sah'),
	144	+(u'Alemannic', 'als'),
	145	+(u'Maori', 'mi'),
	146	+(u'Egyptian Arabic', 'arz'),
	147	+(u'Kazakh', 'kk'),
	148	+(u'Nahuatl', 'nah'),
	149	+(u'Limburgian', 'li'),
	150	+(u'Upper Sorbian', 'hsb'),
	151	+(u'Gilaki', 'glk'),
	152	+(u'Corsican', 'co'),
	153	+(u'Gan', 'gan'),
	154	+(u'Amharic', 'am'),
	155	+(u'Mongolian', 'mn'),
	156	+(u'Interlingua', 'ia'),
	157	+(u'Central Bicolano', 'bcl'),
	158	+(u'Võro', 'fiu-vro'),
	159	+(u'Dutch Low Saxon', 'nds-nl'),
	160	+(u'Faroese', 'fo'),
	161	+(u'Turkmen', 'tk'),
	162	+(u'Scots', 'sco'),
	163	+(u'West Flemish', 'vls'),
	164	+(u'Sinhalese', 'si'),
	165	+(u'Sanskrit', 'sa'),
	166	+(u'Bavarian', 'bar'),
	167	+(u'Burmese', 'my'),
	168	+(u'Manx', 'gv'),
	169	+(u'Divehi', 'dv'),
	170	+(u'Norman', 'nrm'),
	171	+(u'Pangasinan', 'pag'),
	172	+(u'Romansh', 'rm'),
	173	+(u'Banyumasan', 'map-bms'),
	174	+(u'Zazaki', 'diq'),
	175	+(u'Sorani', 'ckb'),
	176	+(u'Northern Sami', 'se'),
	177	+(u'Mazandarani', 'mzn'),
	178	+(u'Wu', 'wuu'),
	179	+(u'Uyghur', 'ug'),
	180	+(u'Friulian', 'fur'),
	181	+(u'Ligurian', 'lij'),
	182	+(u'Maltese', 'mt'),
	183	+(u'Bihari', 'bh'),
	184	+(u'Novial', 'nov'),
	185	+(u'Malagasy', 'mg'),
	186	+(u'Kashubian', 'csb'),
	187	+(u'Ilokano', 'ilo'),
	188	+(u'Sardinian', 'sc'),
	189	+(u'Classical Chinese', 'zh-classical'),
	190	+(u'Khmer', 'km'),
	191	+(u'Ladino', 'lad'),
	192	+(u'Pali', 'pi'),
	193	+(u'Anglo-Saxon', 'ang'),
	194	+(u'Zamboanga Chavacano', 'cbk-zam'),
	195	+(u'Tibetan', 'bo'),
	196	+(u'Fiji Hindi', 'hif'),
	197	+(u'Franco-Provençal', 'frp'),
	198	+(u'Arpitan', 'frp'),
	199	+(u'Hakka', 'hak'),
	200	+(u'Cornish', 'kw'),
	201	+(u'Punjabi', 'pa'),
	202	+(u'Pashto', 'ps'),
	203	+(u'Kalmyk', 'xal'),
	204	+(u'Silesian', 'szl'),
	205	+(u'Pennsylvania German', 'pdc'),
	206	+(u'Hawaiian', 'haw'),
	207	+(u'Saterland Frisian', 'stq'),
	208	+(u'Interlingue', 'ie'),
	209	+(u'Navajo', 'nv'),
	210	+(u'Fijian', 'fj'),
	211	+(u'Crimean Tatar', 'crh'),
	212	+(u'Komi', 'kv'),
	213	+(u'Tongan', 'to'),
	214	+(u'Acehnese', 'ace'),
	215	+(u'Somali', 'so'),
	216	+(u'Erzya', 'myv'),
	217	+(u'Guarani', 'gn'),
	218	+(u'Karachay-Balkar', 'krc'),
	219	+(u'Extremaduran', 'ext'),
	220	+(u'Lingala', 'ln'),
	221	+(u'Kirghiz', 'ky'),
	222	+(u'Meadow Mari', 'mhr'),
	223	+(u'Assyrian Neo-Aramaic', 'arc'),
	224	+(u'Emilian-Romagnol', 'eml'),
	225	+(u'Lojban', 'jbo'),
	226	+(u'Picard', 'pcd'),
	227	+(u'Aymara', 'ay'),
	228	+(u'Wolof', 'wo'),
	229	+(u'Tumbuka', 'tum'),
	230	+(u'Kabyle', 'kab'),
	231	+(u'Bashkir', 'ba'),
	232	+(u'North Frisian', 'frr'),
	233	+(u'Tahitian', 'ty'),
	234	+(u'Tok Pisin', 'tpi'),
	235	+(u'Papiamentu', 'pap'),
	236	+(u'Zealandic', 'zea'),
	237	+(u'Sranan', 'srn'),
	238	+(u'Greenlandic', 'kl'),
	239	+(u'Udmurt', 'udm'),
	240	+(u'Chechen', 'ce'),
	241	+(u'Igbo', 'ig'),
	242	+(u'Komi-Permyak', 'koi'),
	243	+(u'Oriya', 'or'),
	244	+(u'Lower Sorbian', 'dsb'),
	245	+(u'Kongo', 'kg'),
	246	+(u'Lao', 'lo'),
	247	+(u'Abkhazian', 'ab'),
	248	+(u'Moksha', 'mdf'),
	249	+(u'Romani', 'rmy'),
	250	+(u'Hill Mari', 'mrj'),
	251	+(u'Banjar', 'bjn'),
	252	+(u'Old Church Slavonic', 'cu'),
	253	+(u'Mirandese', 'mwl'),
	254	+(u'Karakalpak', 'kaa'),
	255	+(u'Samoan', 'sm'),
	256	+(u'Moldovan', 'mo'),
	257	+(u'Tetum', 'tet'),
	258	+(u'Avar', 'av'),
	259	+(u'Kashmiri', 'ks'),
	260	+(u'Gothic', 'got'),
	261	+(u'Sindhi', 'sd'),
	262	+(u'Bambara', 'bm'),
	263	+(u'Nauruan', 'na'),
	264	+(u'Norfolk', 'pih'),
	265	+(u'Pontic', 'pnt'),
	266	+(u'Inuktitut', 'iu'),
	267	+(u'Inupiak', 'ik'),
	268	+(u'Bislama', 'bi'),
	269	+(u'Cherokee', 'chr'),
	270	+(u'Assamese', 'as'),
	271	+(u'Min Dong', 'cdo'),
	272	+(u'Ewe', 'ee'),
	273	+(u'Swati', 'ss'),
	274	+(u'Oromo', 'om'),
	275	+(u'Zhuang', 'za'),
	276	+(u'Zulu', 'zu'),
	277	+(u'Tigrinya', 'ti'),
	278	+(u'Venda', 've'),
	279	+(u'Tsonga', 'ts'),
	280	+(u'Hausa', 'ha'),
	281	+(u'Dzongkha', 'dz'),
	282	+(u'Sango', 'sg'),
	283	+(u'Chamorro', 'ch'),
	284	+(u'Cree', 'cr'),
	285	+(u'Xhosa', 'xh'),
	286	+(u'Akan', 'ak'),
	287	+(u'Sesotho', 'st'),
	288	+(u'Kinyarwanda', 'rw'),
	289	+(u'Tswana', 'tn'),
	290	+(u'Kikuyu', 'ki'),
	291	+(u'Buryat', 'bxr'),
	292	+(u'Buginese', 'bug'),
	293	+(u'Chichewa', 'ny'),
	294	+(u'Lak', 'lbe'),
	295	+(u'Twi', 'tw'),
	296	+(u'Shona', 'sn'),
	297	+(u'Kirundi', 'rn'),
	298	+(u'Fula', 'ff'),
	299	+(u'Cheyenne', 'chy'),
	300	+(u'Luganda', 'lg'),
	301	+(u'Ndonga', 'ng'),
	302	+(u'Sichuan Yi', 'ii'),
	303	+(u'Choctaw', 'cho'),
	304	+(u'Marshallese', 'mh'),
	305	+(u'Afar', 'aa'),
	306	+(u'Kuanyama', 'kj'),
	307	+(u'Hiri Motu', 'ho'),
	308	+(u'Muscogee', 'mus'),
	309	+(u'Kanuri', 'kr'),
	310	+(u'Herero', 'hz'),
	311	+(u'English', 'en'),
	312	+(u'Deutsch', 'de'),
	313	+(u'Français', 'fr'),
	314	+(u'Italiano', 'it'),
	315	+(u'Polski', 'pl'),
	316	+(u'日本語', 'ja'),
	317	+(u'Español', 'es'),
	318	+(u'Nederlands', 'nl'),
	319	+(u'Português', 'pt'),
	320	+(u'Русский', 'ru'),
	321	+(u'Svenska', 'sv'),
	322	+(u'中文', 'zh'),
	323	+(u'Català', 'ca'),
	324	+(u'Norsk', 'no'),
	325	+(u'Bokmål', 'no'),
	326	+(u'Suomi', 'fi'),
	327	+(u'Українська', 'uk'),
	328	+(u'Magyar', 'hu'),
	329	+(u'Čeština', 'cs'),
	330	+(u'Română', 'ro'),
	331	+(u'Türkçe', 'tr'),
	332	+(u'한국어', 'ko'),
	333	+(u'Tiếng Việt', 'vi'),
	334	+(u'Dansk', 'da'),
	335	+(u'العربية', 'ar'),
	336	+(u'Esperanto', 'eo'),
	337	+(u'Српски', 'sr'),
	338	+(u'Srpski', 'sr'),
	339	+(u'Bahasa Indonesia', 'id'),
	340	+(u'Lietuvių', 'lt'),
	341	+(u'Volapük', 'vo'),
	342	+(u'Slovenčina', 'sk'),
	343	+(u'עברית', 'he'),
	344	+(u'Български', 'bg'),
	345	+(u'فارسی', 'fa'),
	346	+(u'Slovenščina', 'sl'),
	347	+(u'Winaray', 'war'),
	348	+(u'Hrvatski', 'hr'),
	349	+(u'Eesti', 'et'),
	350	+(u'Bahasa Melayu', 'ms'),
	351	+(u'नेपाल भाषा', 'new'),
	352	+(u'Simple English', 'simple'),
	353	+(u'Galego', 'gl'),
	354	+(u'ไทย', 'th'),
	355	+(u'Armãneashce', 'roa-rup'),
	356	+(u'Nynorsk', 'nn'),
	357	+(u'Euskara', 'eu'),
	358	+(u'हिन्दी', 'hi'),
	359	+(u'Ελληνικά', 'el'),
	360	+(u'Krèyol ayisyen', 'ht'),
	361	+(u'Latina', 'la'),
	362	+(u'తెలుగు', 'te'),
	363	+(u'ქართული', 'ka'),
	364	+(u'Sinugboanong Binisaya', 'ceb'),
	365	+(u'Македонски', 'mk'),
	366	+(u'Azərbaycan', 'az'),
	367	+(u'Tagalog', 'tl'),
	368	+(u'Brezhoneg', 'br'),
	369	+(u'Srpskohrvatski', 'sh'),
	370	+(u'Српскохрватски', 'sh'),
	371	+(u'मराठी', 'mr'),
	372	+(u'Lëtzebuergesch', 'lb'),
	373	+(u'Basa Jawa', 'jv'),
	374	+(u'Latviešu', 'lv'),
	375	+(u'Bosanski', 'bs'),
	376	+(u'Íslenska', 'is'),
	377	+(u'Cymraeg', 'cy'),
	378	+(u'Беларуская', 'be-x-old'),
	379	+(u'тарашкевіца', 'be-x-old'),
	380	+(u'Piemontèis', 'pms'),
	381	+(u'Shqip', 'sq'),
	382	+(u'தமிழ்', 'ta'),
	383	+(u'ইমার ঠার', 'bpy'),
	384	+(u'বিষ্ণুপ্রিয়া মণিপুরী', 'bpy'),
	385	+(u'Беларуская', 'be'),
	386	+(u'Aragonés', 'an'),
	387	+(u'Occitan', 'oc'),
	388	+(u'বাংলা', 'bn'),
	389	+(u'Kiswahili', 'sw'),
	390	+(u'Ido', 'io'),
	391	+(u'Ripoarisch', 'ksh'),
	392	+(u'Lumbaart', 'lmo'),
	393	+(u'Frysk', 'fy'),
	394	+(u'ગુજરાતી', 'gu'),
	395	+(u'Plattdüütsch', 'nds'),
	396	+(u'Afrikaans', 'af'),
	397	+(u'Sicilianu', 'scn'),
	398	+(u'Runa Simi', 'qu'),
	399	+(u'Kurdî', 'ku'),
	400	+(u'كوردی', 'ku'),
	401	+(u'اردو', 'ur'),
	402	+(u'Basa Sunda', 'su'),
	403	+(u'മലയാളം', 'ml'),
	404	+(u'粵語', 'zh-yue'),
	405	+(u'Asturianu', 'ast'),
	406	+(u'Nnapulitano', 'nap'),
	407	+(u'Žemaitėška', 'bat-smg'),
	408	+(u'Walon', 'wa'),
	409	+(u'Чăваш', 'cv'),
	410	+(u'Gaeilge', 'ga'),
	411	+(u'Հայերեն', 'hy'),
	412	+(u'Yorùbá', 'yo'),
	413	+(u'ಕನ್ನಡ', 'kn'),
	414	+(u'Тоҷикӣ', 'tg'),
	415	+(u'Tarandíne', 'roa-tara'),
	416	+(u'Vèneto', 'vec'),
	417	+(u'شاہ مکھی پنجابی', 'pnb'),
	418	+(u'Shāhmukhī Pañjābī', 'pnb'),
	419	+(u'नेपाली', 'ne'),
	420	+(u'Gàidhlig', 'gd'),
	421	+(u'ייִדיש', 'yi'),
	422	+(u'Bân-lâm-gú', 'zh-min-nan'),
	423	+(u'O‘zbek', 'uz'),
	424	+(u'Tatarça', 'tt'),
	425	+(u'Татарча', 'tt'),
	426	+(u'Kapampangan', 'pam'),
	427	+(u'Иронау', 'os'),
	428	+(u'Саха тыла', 'sah'),
	429	+(u'Saxa Tyla', 'sah'),
	430	+(u'Alemannisch', 'als'),
	431	+(u'Māori', 'mi'),
	432	+(u'مصرى', 'arz'),
	433	+(u'Maṣrī', 'arz'),
	434	+(u'Қазақша', 'kk'),
	435	+(u'Nāhuatl', 'nah'),
	436	+(u'Limburgs', 'li'),
	437	+(u'Hornjoserbsce', 'hsb'),
	438	+(u'گیلکی', 'glk'),
	439	+(u'Corsu', 'co'),
	440	+(u'贛語', 'gan'),
	441	+(u'አማርኛ', 'am'),
	442	+(u'Монгол', 'mn'),
	443	+(u'Interlingua', 'ia'),
	444	+(u'Bikol', 'bcl'),
	445	+(u'Võro', 'fiu-vro'),
	446	+(u'Nedersaksisch', 'nds-nl'),
	447	+(u'Føroyskt', 'fo'),
	448	+(u'تركمن ', 'tk'),
	449	+(u'Туркмен', 'tk'),
	450	+(u'Scots', 'sco'),
	451	+(u'West-Vlams', 'vls'),
	452	+(u'සිංහල', 'si'),
	453	+(u'संस्कृतम्', 'sa'),
	454	+(u'Boarisch', 'bar'),
	455	+(u'မ္ရန္‌မာစာ', 'my'), #Needs fix
	456	+(u'Gaelg', 'gv'),
	457	+(u'ދިވެހިބަސް', 'dv'),
	458	+(u'Nouormand', 'nrm'),
	459	+(u'Normaund', 'nrm'),
	460	+(u'Pangasinan', 'pag'),
	461	+(u'Rumantsch', 'rm'),
	462	+(u'Basa Banyumasan', 'map-bms'),
	463	+(u'Zazaki', 'diq'),
	464	+(u'Soranî', 'ckb'),
	465	+(u'کوردی', 'ckb'),
	466	+(u'Sámegiella', 'se'),
	467	+(u'مَزِروني', 'mzn'),
	468	+(u'吴语', 'wuu'),
	469	+(u'Oyghurque', 'ug'),
	470	+(u'Furlan', 'fur'),
	471	+(u'Líguru', 'lij'),
	472	+(u'Malti', 'mt'),
	473	+(u'भोजपुरी', 'bh'),
	474	+(u'Novial', 'nov'),
	475	+(u'Malagasy', 'mg'),
	476	+(u'Kaszëbsczi', 'csb'),
	477	+(u'Ilokano', 'ilo'),
	478	+(u'Sardu', 'sc'),
	479	+(u'古文', 'zh-classical'),
	480	+(u'文言文', 'zh-classical'),
	481	+(u'ភាសាខ្មែរ', 'km'),
	482	+(u'Dzhudezmo', 'lad'),
	483	+(u'पाऴि', 'pi'),
	484	+(u'Englisc', 'ang'),
	485	+(u'Chavacano de Zamboanga', 'cbk-zam'),
	486	+(u'བོད་སྐད', 'bo'),
	487	+(u'Fiji Hindi', 'hif'),
	488	+(u'Arpitan', 'frp'),
	489	+(u'Hak-kâ-fa', 'hak'),
	490	+(u'客家話', 'hak'),
	491	+(u'Kernewek', 'kw'),
	492	+(u'Karnuack', 'kw'),
	493	+(u'ਪੰਜਾਬੀ', 'pa'),
	494	+(u'پښتو', 'ps'),
	495	+(u'Хальмг', 'xal'),
	496	+(u'Ślůnski', 'szl'),
	497	+(u'Deitsch', 'pdc'),
	498	+(u'Hawai`i', 'haw'),
	499	+(u'Seeltersk', 'stq'),
	500	+(u'Interlingue', 'ie'),
	501	+(u'Diné bizaad', 'nv'),
	502	+(u'Na Vosa Vakaviti', 'fj'),
	503	+(u'Qırımtatarca', 'crh'),
	504	+(u'Коми', 'kv'),
	505	+(u'faka Tonga', 'to'),
	506	+(u'Bahsa Acèh', 'ace'),
	507	+(u'Soomaaliga', 'so'),
	508	+(u'Эрзянь', 'myv'),
	509	+(u'Erzjanj Kelj', 'myv'),
	510	+(u"Avañe'ẽ", 'gn'),
	511	+(u'Къарачай-Малкъар', 'krc'),
	512	+(u'Qarachay-Malqar', 'krc'),
	513	+(u'Estremeñu', 'ext'),
	514	+(u'Lingala', 'ln'),
	515	+(u'Кыргызча', 'ky'),
	516	+(u'Олык Марий', 'mhr'),
	517	+(u'Olyk Marij', 'mhr'),
	518	+(u'ܐܪܡܝܐ', 'arc'),
	519	+(u'Emiliàn e rumagnòl', 'eml'),
	520	+(u'Lojban', 'jbo'),
	521	+(u'Picard', 'pcd'),
	522	+(u'Aymar', 'ay'),
	523	+(u'Wolof', 'wo'),
	524	+(u'chiTumbuka', 'tum'),
	525	+(u'Taqbaylit', 'kab'),
	526	+(u'Башҡорт', 'ba'),
	527	+(u'Frasch', 'frr'),
	528	+(u'Reo Mā`ohi', 'ty'),
	529	+(u'Tok Pisin', 'tpi'),
	530	+(u'Papiamentu', 'pap'),
	531	+(u'Zeêuws', 'zea'),
	532	+(u'Sranantongo', 'srn'),
	533	+(u'Kalaallisut', 'kl'),
	534	+(u'Удмурт кыл', 'udm'),
	535	+(u'Нохчийн', 'ce'),
	536	+(u'Igbo', 'ig'),
	537	+(u'Перем Коми', 'koi'),
	538	+(u'Perem Komi', 'koi'),
	539	+(u'ଓଡ଼ିଆ', 'or'),
	540	+(u'Dolnoserbski', 'dsb'),
	541	+(u'KiKongo', 'kg'),
	542	+(u'ລາວ', 'lo'),
	543	+(u'Аҧсуа', 'ab'),
	544	+(u'Мокшень', 'mdf'),
	545	+(u'Mokshanj Kälj', 'mdf'),
	546	+(u'romani - रोमानी', 'rmy'),
	547	+(u'Кырык Мары', 'mrj'),
	548	+(u'Kyryk Mary', 'mrj'),
	549	+(u'Bahasa Banjar', 'bjn'),
	550	+(u'Словѣньскъ', 'cu'),
	551	+(u'Páigina Percipal', 'mwl'),
	552	+(u'Qaraqalpaqsha', 'kaa'),
	553	+(u'Gagana Samoa', 'sm'),
	554	+(u'Молдовеняскэ', 'mo'),
	555	+(u'Tetun', 'tet'),
	556	+(u'Авар', 'av'),
	557	+(u'कश्मीरी', 'ks'),
	558	+(u'كشميري', 'ks'),
	559	+(u'𐌲𐌿𐍄𐌹𐍃𐌺', 'got'), #Needs fix
	560	+(u'سنڌي، سندھی ، सिन्ध', 'sd'),
	561	+(u'Bamanankan', 'bm'),
	562	+(u'dorerin Naoero', 'na'),
	563	+(u'Norfuk', 'pih'),
	564	+(u'Ποντιακά', 'pnt'),
	565	+(u'ᐃᓄᒃᑎᑐᑦ', 'iu'),
	566	+(u'Iñupiak', 'ik'),
	567	+(u'Bislama', 'bi'),
	568	+(u'ᏣᎳᎩ', 'chr'),
	569	+(u'অসমীয়া', 'as'),
	570	+(u'Mìng-dĕ̤ng-ngṳ̄', 'cdo'),
	571	+(u'Eʋegbe', 'ee'),
	572	+(u'SiSwati', 'ss'),
	573	+(u'Oromoo', 'om'),
	574	+(u'Cuengh', 'za'),
	575	+(u'isiZulu', 'zu'),
	576	+(u'ትግርኛ', 'ti'),
	577	+(u'Tshivenda', 've'),
	578	+(u'Xitsonga', 'ts'),
	579	+(u'هَوُسَ', 'ha'),
	580	+(u'ཇོང་ཁ', 'dz'),
	581	+(u'Sängö', 'sg'),
	582	+(u'Chamoru', 'ch'),
	583	+(u'Nehiyaw', 'cr'),
	584	+(u'isiXhosa', 'xh'),
	585	+(u'Akana', 'ak'),
	586	+(u'Sesotho', 'st'),
	587	+(u'Ikinyarwanda', 'rw'),
	588	+(u'Setswana', 'tn'),
	589	+(u'Gĩkũyũ', 'ki'),
	590	+(u'Буряад', 'bxr'),
	591	+(u'Basa Ugi', 'bug'),
	592	+(u'Chi-Chewa', 'ny'),
	593	+(u'Лакку', 'lbe'),
	594	+(u'Twi', 'tw'),
	595	+(u'chiShona', 'sn'),
	596	+(u'Kirundi', 'rn'),
	597	+(u'Fulfulde', 'ff'),
	598	+(u'Tsetsêhestâhese', 'chy'),
	599	+(u'Luganda', 'lg'),
	600	+(u'Oshiwambo', 'ng'),
	601	+(u'ꆇꉙ', 'ii'),
	602	+(u'Choctaw', 'cho'),
	603	+(u'Ebon', 'mh'),
	604	+(u'Afar', 'aa'),
	605	+(u'Kuanyama', 'kj'),
	606	+(u'Hiri Motu', 'ho'),
	607	+(u'Muskogee', 'mus'),
	608	+(u'Kanuri', 'kr'),
	609	+(u'Otsiherero', 'hz'),
610	610	])
611	611
	612	+
612	613	def language_map():
613		~~- return utils.invert_dict(MAPPING)~~
\ No newline at end of file
	614	+ return utils.invert_dict(MAPPING)
Index: trunk/tools/editor_trends/configuration.py
—	—	@@ -49,12 +49,19 @@
50	50	self.debug = debug
51	51	self.progressbar = True
52	52	self.encoding = 'utf-8'
53		~~- self.date_format = '%Y-%m-%d' #Date format as used by Erik Zachte~~
54		~~- self.timestamp_format = '%Y-%m-%dT%H:%M:%SZ' # Timestamp format as generated by the MediaWiki dumps~~
55	53
56		~~- self.max_xmlfile_size = 4096 * 1024 #67108864 # ==64Mb, see http://hadoop.apache.org/common/docs/r0.20.0/hdfs_design.html#Large+Data+Setsfor reason~~
	54	+ #Date format as used by Erik Zachte
	55	+ self.date_format = '%Y-%m-%d'
	56	+
	57	+ # Timestamp format as generated by the MediaWiki dumps
	58	+ self.timestamp_format = '%Y-%m-%dT%H:%M:%SZ'
	59	+
	60	+ #67108864 # ==64Mb, see http://hadoop.apache.org/common/docs/r0.20.0/hdfs_design.html#Large+Data+Setsfor reason
	61	+ self.max_xmlfile_size = 4096 * 1024
	62	+
	63	+ #Change this to match your computers configuration (RAM / CPU)
57	64	self.number_of_processes = cpu_count() * process_multiplier
58		~~- #Change this to match your computers configuration (RAM / CPU)~~
	65	+
59	66	self.minimum_python_version = (2, 6)
60	67	self.wp_dump_location = 'http://download.wikimedia.org'
61	68	self.xml_namespace = 'http://www.mediawiki.org/xml/export-0.4/'
—	—	@@ -97,7 +104,6 @@
98	105	}
99	106
100	107
101		-
102	108	def set_custom_settings(self, **kwargs):
103	109	for kw in kwargs:
104	110	setattr(self, kw, kwargs[kw])
—	—	@@ -119,11 +125,11 @@
120	126	return cwd
121	127
122	128	def determine_platform(self):
123		~~- os = platform.system()~~
124		~~- if os == 'Darwin':~~
125		~~- return 'OSX'~~
126		~~- else:~~
127		~~- return os~~
	129	+ os = platform.system()
	130	+ if os == 'Darwin':
	131	+ return 'OSX'
	132	+ else:
	133	+ return os
128	134
129	135	#def determine_path_ziptool(self):
130	136	# return self.detect_installed_program(self.determine_ziptool())
—	—	@@ -165,7 +171,7 @@
166	172	if self.platform == 'Windows' and self.architecture == 'i386':
167	173	return win32file._getmaxstdio()
168	174	elif self.platform != 'Windows':
169		~~- return resource.getrlimit(resource.RLIMIT_NOFILE)[0]~~
	175	+ return resource.getrlimit(resource.RLIMIT_NOFILE)[0] - 100
170	176	else:
171	177	return 500
172	178
—	—	@@ -175,11 +181,10 @@
176	182	os.path.isdir(os.path.join(self.working_directory, name))]
177	183	for subdirname in dirs:
178	184	if not subdirname.startswith('.') and subdirname not in IGNORE_DIRS:
179		~~- sys.path.append(os.path.join(self.working_directory,~~
	185	+ sys.path.append(os.path.join(self.working_directory,
180	186	subdirname))
181	187
182	188
183		-
184	189	def set_file_locations(self):
185	190	self.input_location = os.path.join(self.root, 'wikimedia')
186	191	self.input_filename = os.path.join(self.input_location, 'en',
Index: trunk/tools/editor_trends/utils/models.py
—	—	@@ -1,63 +0,0 @@
2		~~-#!/usr/bin/python~~
3		~~-# -- coding: utf-8 --~~
4		~~-'''~~
5		~~-Copyright (C) 2010 by Diederik van Liere (dvanliere@gmail.com)~~
6		~~-This program is free software; you can redistribute it and/or~~
7		~~-modify it under the terms of the GNU General Public License version 2~~
8		~~-as published by the Free Software Foundation.~~
9		~~-This program is distributed in the hope that it will be useful,~~
10		~~-but WITHOUT ANY WARRANTY; without even the implied warranty of~~
11		~~-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.~~
12		~~-See the GNU General Public License for more details, at~~
13		~~-http://www.fsf.org/licenses/gpl.html~~
14		~~-'''~~
15		-
16		~~-__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ])~~
17		~~-__author__email = 'dvanliere at gmail dot com'~~
18		~~-__date__ = '2010-11-09'~~
19		~~-__version__ = '0.1'~~
20		-
21		~~-import multiprocessing~~
22		-
23		-
24		~~-class BaseConsumer(multiprocessing.Process):~~
25		-
26		~~- def __init__(self, task_queue, result_queue):~~
27		~~- multiprocessing.Process.__init__(self)~~
28		~~- self.task_queue = task_queue~~
29		~~- self.result_queue = result_queue~~
30		-
31		-
32		-
33		-
34		~~-# for kw in kwargs:~~
35		~~-# setattr(self, kw, kwargs[kw])~~
36		-#
37		~~-# def run(self):~~
38		~~-# proc_name = self.name~~
39		~~-# kwargs = {}~~
40		~~-# IGNORE = ['input_queue', 'result_queue', 'target']~~
41		~~-# for kw in self.__dict__:~~
42		~~-# if kw not in IGNORE and not kw.startswith('_'):~~
43		~~-# kwargs[kw] = getattr(self, kw)~~
44		~~-# self.target(self.input_queue, self.result_queue, **kwargs)~~
45		-
46		-
47		~~-class ProcessResultQueue(multiprocessing.Process):~~
48		-
49		~~- def __init__(self, target, result_queue, **kwargs):~~
50		~~- multiprocessing.Process.__init__(self)~~
51		~~- self.result_queue = result_queue~~
52		~~- self.target = target~~
53		~~- for kw in kwargs:~~
54		~~- setattr(self, kw, kwargs[kw])~~
55		-
56		-
57		~~- def run(self):~~
58		~~- proc_name = self.name~~
59		~~- kwargs = {}~~
60		~~- IGNORE = ['result_queue', 'target']~~
61		~~- for kw in self.__dict__:~~
62		~~- if kw not in IGNORE and not kw.startswith('_'):~~
63		~~- kwargs[kw] = getattr(self, kw)~~
64		~~- self.target(self.result_queue, **kwargs)~~
Index: trunk/tools/editor_trends/utils/consumers.py
—	—	@@ -0,0 +1,61 @@
	2	+#!/usr/bin/python
	3	+# -- coding: utf-8 --
	4	+'''
	5	+Copyright (C) 2010 by Diederik van Liere (dvanliere@gmail.com)
	6	+This program is free software; you can redistribute it and/or
	7	+modify it under the terms of the GNU General Public License version 2
	8	+as published by the Free Software Foundation.
	9	+This program is distributed in the hope that it will be useful,
	10	+but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
	12	+See the GNU General Public License for more details, at
	13	+http://www.fsf.org/licenses/gpl.html
	14	+'''
	15	+
	16	+__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ])
	17	+__author__email = 'dvanliere at gmail dot com'
	18	+__date__ = '2010-11-09'
	19	+__version__ = '0.1'
	20	+
	21	+import multiprocessing
	22	+
	23	+
	24	+class BaseConsumer(multiprocessing.Process):
	25	+
	26	+ def __init__(self, task_queue, result_queue):
	27	+ multiprocessing.Process.__init__(self)
	28	+ self.task_queue = task_queue
	29	+ self.result_queue = result_queue
	30	+
	31	+
	32	+# for kw in kwargs:
	33	+# setattr(self, kw, kwargs[kw])
	34	+#
	35	+# def run(self):
	36	+# proc_name = self.name
	37	+# kwargs = {}
	38	+# IGNORE = ['input_queue', 'result_queue', 'target']
	39	+# for kw in self.__dict__:
	40	+# if kw not in IGNORE and not kw.startswith('_'):
	41	+# kwargs[kw] = getattr(self, kw)
	42	+# self.target(self.input_queue, self.result_queue, **kwargs)
	43	+
	44	+
	45	+class ProcessResultQueue(multiprocessing.Process):
	46	+
	47	+ def __init__(self, target, result_queue, **kwargs):
	48	+ multiprocessing.Process.__init__(self)
	49	+ self.result_queue = result_queue
	50	+ self.target = target
	51	+ for kw in kwargs:
	52	+ setattr(self, kw, kwargs[kw])
	53	+
	54	+
	55	+ def run(self):
	56	+ proc_name = self.name
	57	+ kwargs = {}
	58	+ IGNORE = ['result_queue', 'target']
	59	+ for kw in self.__dict__:
	60	+ if kw not in IGNORE and not kw.startswith('_'):
	61	+ kwargs[kw] = getattr(self, kw)
	62	+ self.target(self.result_queue, **kwargs)
Property changes on: trunk/tools/editor_trends/utils/consumers.py
___________________________________________________________________
Added: svn:eol-style
1	63	+ native
Added: svn:mime-type
2	64	+ text/plain
Index: trunk/tools/editor_trends/utils/messages.py
—	—	@@ -26,7 +26,12 @@
27	27
28	28
29	29	def show(func):
	30	+ '''
	31	+ @func should be an qsize() belonging to a task queue. qsize() is not supported
	32	+ on OSX hence this simple workaround to make sure that we can continue supporting
	33	+ OSX.
	34	+ '''
30	35	try:
31		~~- func()~~
	36	+ return func()
32	37	except:
33		~~- print 'Calling function %s caused an error, probably your platform is not supporting this function' % func~~
	38	+ return 'unknown'
Index: trunk/tools/editor_trends/database/db_settings.py
—	—	@@ -1,38 +0,0 @@
2		~~-#!/usr/bin/python~~
3		~~-# -- coding: utf-8 --~~
4		~~-'''~~
5		~~-Copyright (C) 2010 by Diederik van Liere (dvanliere@gmail.com)~~
6		~~-This program is free software; you can redistribute it and/or~~
7		~~-modify it under the terms of the GNU General Public License version 2~~
8		~~-as published by the Free Software Foundation.~~
9		~~-This program is distributed in the hope that it will be useful,~~
10		~~-but WITHOUT ANY WARRANTY; without even the implied warranty of~~
11		~~-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.~~
12		~~-See the GNU General Public License for more details, at~~
13		~~-http://www.fsf.org/licenses/gpl.html~~
14		~~-'''~~
15		-
16		~~-__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ])~~
17		-
18		~~-'''~~
19		~~-This is a settings file that contains the layout of different tables. The main~~
20		~~-key will be used as the tablename while it,s values contain tuples containing~~
21		~~-fieldname and datatype This is only be used for sqlite.~~
22		~~-'''~~
23		~~-CONTRIBUTOR_TABLE = {'contributors': []}~~
24		~~-CONTRIBUTOR_TABLE['contributors'].append(('contributor', 'VARCHAR(64)'))~~
25		~~-CONTRIBUTOR_TABLE['contributors'].append(('article', 'INTEGER'))~~
26		~~-CONTRIBUTOR_TABLE['contributors'].append(('timestamp', 'TEXT'))~~
27		~~-CONTRIBUTOR_TABLE['contributors'].append(('bot', 'INTEGER'))~~
28		-
29		~~-BOT_TABLE = {'bots': []}~~
30		~~-BOT_TABLE['bots'].append(('language', 'VARCHAR(12)'))~~
31		~~-BOT_TABLE['bots'].append(('name', 'VARCHAR(64)'))~~
32		~~-BOT_TABLE['bots'].append(('edits_namespace_a', 'INTEGER'))~~
33		~~-BOT_TABLE['bots'].append(('edits_namespace_x', 'INTEGER'))~~
34		~~-BOT_TABLE['bots'].append(('rank_now', 'INTEGER'))~~
35		~~-BOT_TABLE['bots'].append(('rank_prev', 'INTEGER'))~~
36		~~-BOT_TABLE['bots'].append(('first_date', 'TEXT'))~~
37		~~-BOT_TABLE['bots'].append(('days_first', 'INTEGER'))~~
38		~~-BOT_TABLE['bots'].append(('last_date', 'TEXT'))~~
39		~~-BOT_TABLE['bots'].append(('days_last', 'INTEGER'))~~
Index: trunk/tools/editor_trends/database/launcher.py
—	—	@@ -27,14 +27,14 @@
28	28	from utils import utils
29	29
30	30
31		~~-def start_mongodb_server(platform, x, path):~~
	31	+def start_mongodb_server(x, path):
32	32	default_port = 27017
33	33	port = default_port + x
34		~~- if platform == 'Windows':~~
	34	+ if settings.platform == 'Windows':
35	35	p = subprocess.Popen([path, '--port', str(port), '--dbpath', 'c:\data\db', '--logpath', 'c:\mongodb\logs'])
36		~~- elif platform == 'Linux':~~
	36	+ elif settings.platform == 'Linux':
37	37	subprocess.Popen([path, '--port %s' % port])
38		~~- elif platform == 'OSX':~~
	38	+ elif settings.platform == 'OSX':
39	39	raise NotImplementedError
40	40	else:
41	41	raise exceptions.PlatformNotSupportedError(platform)

Status & tagging log

00:03, 14 January 2011 Reedy (talk | contribs) changed the status of r79958 [removed: new added: deferred]