r81061 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r81060‎ | r81061 | r81062 >
Date:22:27, 26 January 2011
Author:diederik
Status:deferred
Tags:
Comment:
Refactored the wikiprojects class, it's now cleanly separated in three parts:
1) Languages.py for language related functions and properties
2) Projects.py for projects related functions and properties
3) Runtime_settings.py which contains an instance of Settings, Language and Project and this object knows what needs to happen when and where.
Modified paths:
  • /trunk/tools/editor_trends/classes/languages.py (added) (history)
  • /trunk/tools/editor_trends/classes/projects.py (added) (history)
  • /trunk/tools/editor_trends/classes/runtime_settings.py (added) (history)
  • /trunk/tools/editor_trends/configuration.py (modified) (history)
  • /trunk/tools/editor_trends/cronjobs.py (modified) (history)
  • /trunk/tools/editor_trends/manage.py (modified) (history)
  • /trunk/tools/editor_trends/wikilytics/api/forms.py (modified) (history)
  • /trunk/tools/editor_trends/wikilytics/api/tasks.py (deleted) (history)

Diff [purge]

Index: trunk/tools/editor_trends/manage.py
@@ -31,7 +31,9 @@
3232 from utils import ordered_dict
3333 from utils import log
3434 from utils import timer
35 -from classes import wikiprojects
 35+from classes import projects
 36+from classes import languages
 37+from classes import runtime_settings
3638 from database import db
3739 from etl import downloader
3840 from etl import extracter
@@ -184,10 +186,11 @@
185187 event='finish')
186188
187189
188 -def exporter_launcher(properties, settings, logger):
 190+def dataset_launcher(properties, settings, logger):
189191 print 'Start exporting dataset'
190192 stopwatch = timer.Timer()
191193 log.log_to_mongo(properties, 'dataset', 'export', stopwatch, event='start')
 194+
192195 for target in properties.targets:
193196 # write_message_to_log(logger, settings,
194197 # message=None,
@@ -195,12 +198,11 @@
196199 # target=target,
197200 # dbname=properties.full_project,
198201 # collection=properties.collection)
199 - print 'Dataset is created by: %s' % target
200 -
201 - analyzer.generate_chart_data(properties.project,
 202+ analyzer.generate_chart_data(properties.project.name,
202203 properties.collection,
203 - properties.language_code,
204 - target)
 204+ properties.language.code,
 205+ target,
 206+ **properties.keywords)
205207 stopwatch.elapsed()
206208 log.log_to_mongo(properties, 'dataset', 'export', stopwatch, event='finish')
207209
@@ -265,25 +267,7 @@
266268 log.log_to_mongo(properties, 'dataset', 'all', stopwatch, event='finish')
267269
268270
269 -def show_languages(settings, logger, properties):
270 - first = properties.get_value('startswith')
271 - if first != None:
272 - first = first.title()
273 - choices = languages.supported_languages()
274 - lang = []
275 - for choice in choices:
276 - lang.append(choice)
277 - lang.sort()
278 - for language in lang:
279 - try:
280 - if first != None and language.startswith(first):
281 - print '%s' % language.decode(settings.encoding)
282 - elif first == None:
283 - print '%s' % language.decode(settings.encoding)
284 - except UnicodeEncodeError:
285 - print '%s' % language
286271
287 -
288272 def about_statement():
289273 print ''
290274 print 'Editor Trends Software is (c) 2010-2011 by the Wikimedia Foundation.'
@@ -300,9 +284,10 @@
301285 Entry point for parsing command line and launching the needed function(s).
302286 '''
303287 settings = configuration.Settings()
304 - default_language = wikiprojects.determine_default_language()
305 - wiki = wikiprojects.Wiki(settings)
306 - projects = wiki.projects.keys()
 288+ language = languages.init()
 289+ project = projects.init()
 290+ rts = runtime_settings.RunTimeSettings(project, language, settings)
 291+
307292 #Init Argument Parser
308293 parser = ArgumentParser(prog='manage', formatter_class=RawTextHelpFormatter)
309294 subparsers = parser.add_subparsers(help='sub - command help')
@@ -356,11 +341,20 @@
357342 exported.')
358343 parser_transform.set_defaults(func=transformer_launcher)
359344
360 - #EXPORT
361 - parser_dataset = subparsers.add_parser('export',
 345+ #DATASET
 346+ parser_dataset = subparsers.add_parser('dataset',
362347 help='Create a dataset from the MongoDB and write it to a csv file.')
363 - parser_dataset.set_defaults(func=exporter_launcher)
 348+ parser_dataset.set_defaults(func=dataset_launcher)
 349+ parser_dataset.add_argument('-c', '--charts',
 350+ action='store',
 351+ help='Should be a valid function name that matches one of the plugin functions',
 352+ default=analyzer.available_analyses()['new_editor_count'])
364353
 354+ parser_dataset.add_argument('-k', '--keywords',
 355+ action='store',
 356+ help='Add additional keywords in the format keyword1=value1,keyword2=value2',
 357+ default={})
 358+
365359 #ALL
366360 parser_all = subparsers.add_parser('all',
367361 help='The all sub command runs the download, split, store and dataset \
@@ -387,12 +381,11 @@
388382 executing all.',
389383 default=[])
390384
391 -
392385 parser.add_argument('-l', '--language',
393386 action='store',
394387 help='Example of valid languages.',
395 - choices=wiki.supported_languages(),
396 - default=default_language)
 388+ choices=project.supported_languages(),
 389+ default=language.name)
397390
398391 parser.add_argument('-p', '--project',
399392 action='store',
@@ -422,18 +415,13 @@
423416 %s' % ''.join([f + ',\n' for f in settings.file_choices]),
424417 default='stub-meta-history.xml.gz')
425418
426 - parser.add_argument('-d', '--datasets',
427 - action='store',
428 - choices=analyzer.available_analyses(),
429 - help='Indicate what type of data should be exported.',
430 - default='cohort_dataset_backward_bar')
431419
432 - return parser, settings, wiki
 420+ return project, language, parser, settings
433421
434422 def main():
435 - parser, settings, wiki = init_args_parser()
 423+ project, language, parser, settings = init_args_parser()
436424 args = parser.parse_args()
437 - properties = wikiprojects.Wiki(settings, args)
 425+ properties = runtime_settings.RunTimeSettings(project, language, settings, args)
438426 #initialize logger
439427 logger = logging.getLogger('manager')
440428 logger.setLevel(logging.DEBUG)
@@ -441,19 +429,19 @@
442430 # Add the log message handler to the logger
443431 today = datetime.datetime.today()
444432 log_filename = os.path.join(settings.log_location, '%s%s_%s-%s-%s.log' \
445 - % (properties.language_code, properties.project,
 433+ % (properties.language.code, properties.project.name,
446434 today.day, today.month, today.year))
447435 handler = logging.handlers.RotatingFileHandler(log_filename,
448436 maxBytes=1024 * 1024,
449437 backupCount=3)
450438
451439 logger.addHandler(handler)
452 - logger.debug('Chosen language: \t%s' % wiki.language)
 440+ logger.debug('Chosen language: \t%s' % properties.language)
453441
454442 #start manager
455443 #detect_python_version(logger)
456444 about_statement()
457 - config.create_configuration(settings, args)
 445+ #config.create_configuration(settings, args)
458446
459447 properties.show_settings()
460448 args.func(properties, settings, logger)
Index: trunk/tools/editor_trends/wikilytics/api/tasks.py
@@ -1,57 +0,0 @@
2 -from multiprocessing import Process
3 -
4 -from celery.decorators import task
5 -from celery.registry import tasks
6 -
7 -from editor_trends.classes import wikiprojects
8 -from editor_trends import manage as manager
9 -
10 -from wikilytics.api.models import Job
11 -
12 -@task
13 -def launcher():
14 - jobs = Job.objects.filter(finished=False)
15 - n = len(jobs)
16 - if n > 0:
17 -
18 - job = jobs[0]
19 - job.in_progress = True
20 - job.save()
21 - print 'Launching %s task' % job.type
22 - if job.type == 'dataset':
23 - res = launch_editor_trends_toolkit(job.project, job.language)
24 - elif job.type == 'chart':
25 - res = launch_chart(job.project, job.language)
26 - else:
27 - print 'Unknown job type, no handler has been configured.'
28 -
29 - if res == True:
30 - job.finished = True
31 - job.in_progress = False
32 - job.save()
33 -
34 -
35 -def launch_editor_trends_toolkit(project, language):
36 - '''
37 - This function should only be called from within Django Wikilytics.
38 - '''
39 - res = False
40 - parser, settings, wiki = manager.init_args_parser()
41 - args = parser.parse_args(['dummy'])
42 - args.language = language
43 - args.project = project
44 - print args
45 - wiki = wikiprojects.Wiki(settings, args)
46 - p = Process(target=manager.all_launcher, args=(wiki, settings, None))
47 - p.start()
48 - #res = manager.all_launcher(wiki, settings, None)
49 - return res
50 -
51 -def launch_chart(project, language):
52 - res = False
53 -
54 -
55 - return False
56 -
57 -
58 -tasks.register(launcher)
Index: trunk/tools/editor_trends/wikilytics/api/forms.py
@@ -2,22 +2,22 @@
33 from django import forms
44
55 from wikilytics.api.widgets import MonthYearWidget
6 -from editor_trends.classes import wikiprojects
 6+from editor_trends.classes import languages, projects
77 from editor_trends.analyses.analyzer import available_analyses
88
9 -wiki = wikiprojects.Wiki('settings')
109
1110
1211 years = [year for year in xrange(2001, datetime.date.today().year + 1)]
13 -#print wiki.supported_languages()
14 -#print wiki.supported_projects()
1512
 13+project = projects.init()
 14+language = languages.init()
 15+
1616 class SearchForm(forms.Form):
1717 project = forms.CharField(initial='wiki',
18 - widget=forms.Select(choices=wiki.supported_projects()))
 18+ widget=forms.Select(choices=project.supported_projects()))
1919
2020 language = forms.CharField(initial='en',
21 - widget=forms.Select(choices=wiki.supported_languages(output='django')))
 21+ widget=forms.Select(choices=language.supported_languages(output='django')))
2222 #print 'Project: %s' % language
2323 #date = forms.DateField(widget=MonthYearWidget(years=years))
2424
Index: trunk/tools/editor_trends/classes/projects.py
@@ -0,0 +1,109 @@
 2+#!/usr/bin/python
 3+# -*- coding: utf-8 -*-
 4+'''
 5+Copyright (C) 2010 by Diederik van Liere (dvanliere@gmail.com)
 6+This program is free software; you can redistribute it and/or
 7+modify it under the terms of the GNU General Public License version 2
 8+as published by the Free Software Foundation.
 9+This program is distributed in the hope that it will be useful,
 10+but WITHOUT ANY WARRANTY; without even the implied warranty of
 11+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 12+See the GNU General Public License for more details, at
 13+http://www.fsf.org/licenses/gpl.html
 14+'''
 15+
 16+__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ])
 17+__author__email = 'dvanliere at gmail dot com'
 18+__date__ = '2011-01-26'
 19+__version__ = '0.1'
 20+
 21+import languages
 22+
 23+class Project:
 24+ def __init__(self, name, urlname, full_name):
 25+ self.name = name
 26+ self.full_name = full_name
 27+ self.urlname = urlname
 28+
 29+ def __repr__(self):
 30+ return '%s' % self.full_name
 31+
 32+ def supported_languages(self, output='parser'):
 33+ '''
 34+ Generate a list of tuples with currently supported languages.
 35+ '''
 36+ assert output == 'django' or output == 'parser'
 37+ lnc = languages.LanguageContainer()
 38+ d = []
 39+ if output == 'parser':
 40+ for lang in self.valid_languages:
 41+ d.append(lnc.languages.get(lang, 'Unknown language'))
 42+ else:
 43+ print 'not yet implemented'
 44+# def supported_languages(self, output='parser'):
 45+# if output == 'parser':
 46+# choices = [d.values() for d in self.match_languages()]
 47+# choices = [item for sublist in choices for item in sublist]
 48+# #print choices
 49+# return choices
 50+# else:
 51+# choices = [(d.get('lnc'), '%s' % (' | '.join(d.values()))) for d in self.match_languages()]
 52+# return tuple(choices)
 53+
 54+class ProjectContainer:
 55+ def __init__(self):
 56+ self.projects = {}
 57+ self.wikis = {'wiki': {'url':'wiki', 'full_name': 'Wikipedia'},
 58+ 'commons': {'url':'wikicommons', 'full_name': 'Wikimedia Commons'},
 59+ 'books': {'url':'wikibooks', 'full_name':'Wikibooks'},
 60+ 'news': {'url':'wikinews', 'full_name': 'Wikinews'},
 61+ 'quote': {'url':'wikiquote', 'full_name': 'Wikiquote'},
 62+ 'source': {'url':'wikisource', 'full_name': 'Wikisource'},
 63+ 'versity': {'url':'wikiversity', 'full_name':'Wikiversity'},
 64+ 'tionary': {'url':'wiktionary', 'full_name': 'Wiktionary'},
 65+ 'meta': {'url':'metawiki', 'full_name': 'Metawiki'},
 66+ 'species': {'url':'wikispecies', 'full_name': 'Wikispecies'},
 67+ 'incubator': {'url':'incubatorwiki', 'full_name': 'Wikimedia Incubator'},
 68+ 'foundation': {'url':'foundationwiki', 'full_name': 'Wikimedia Foundation'},
 69+ 'mediawiki': {'url':'mediawikiwiki', 'full_name': 'Medawiki Wiki'},
 70+ 'outreach': {'url':'outreachwiki', 'full_name': 'Outreach Wiki'},
 71+ 'strategic_planning': {'url':'strategywiki', 'full_name': 'Wikimedia Strategic Planning'},
 72+ 'usability_initiative': {'url':'usabilitywiki', 'full_name': 'Wikimedia Usability Initiative'},
 73+ }
 74+ for project in self.wikis:
 75+ props = self.wikis[project]
 76+ urlname = props['url']
 77+ full_name = props['full_name']
 78+ proj = self.projects.get(project, Project(project, urlname, full_name))
 79+ proj.valid_languages = self.project_supports_language(urlname)
 80+ self.projects[project] = proj
 81+
 82+ def get_project(self, name):
 83+ return self.projects.get(name, None)
 84+
 85+ def supported_projects(self):
 86+ choices = ([(key, value.title()) for key, value in self.wikis.iteritems()])
 87+ return tuple(choices)
 88+
 89+ def project_supports_language(self, urlname):
 90+ valid_languages_wiki = ['ace', 'af', 'als', 'an', 'roa-rup', 'ast', 'gn', 'av', 'ay', 'az', 'id', 'ms', 'bm', 'zh-min-nan', 'jv', 'map-bms', 'su', 'bug', 'bi', 'bar', 'bs', 'br', 'ca', 'cbk-zam', 'ch', 'cs', 'ny', 'sn', 'tum', 've', 'co', 'za', 'cy', 'da', 'pdc', 'de', 'nv', 'na', 'lad', 'et', 'ang', 'en', 'es', 'eo', 'ext', 'eu', 'to', 'fo', 'fr', 'frp', 'fy', 'ff', 'fur', 'ga', 'gv', 'sm', 'gd', 'gl', 'got', 'hak', 'haw', 'hsb', 'hr', 'io', 'ilo', 'ig', 'ia', 'ie', 'ik', 'xh', 'zu', 'is', 'it', 'mh', 'kl', 'pam', 'csb', 'kw', 'kg', 'ki', 'rw', 'ky', 'rn', 'sw', 'ht', 'ku', 'la', 'lv', 'lb', 'lt', 'lij', 'li', 'ln', 'jbo', 'lg', 'lmo', 'hu', 'mg', 'mt', 'mi', 'cdo', 'my', 'nah', 'fj', 'nl', 'cr', 'ne', 'nap', 'frr', 'pih', 'no', 'nn', 'nrm', 'oc', 'om', 'pag', 'pi', 'pap', 'pms', 'nds', 'pl', 'pt', 'ty', 'ksh', 'ro', 'rmy', 'rm', 'qu', 'se', 'sg', 'sc', 'sco', 'st', 'tn', 'sq', 'scn', 'simple', 'ceb', 'ss', 'sk', 'sl', 'so', 'sh', 'fi', 'sv', 'tl', 'tt', 'tet', 'vi', 'tpi', 'chy', 'tr', 'tk', 'tw', 'vec', 'vo', 'fiu-vro', 'wa', 'vls', 'war', 'wo', 'ts', 'yo', 'bat-smg', 'el', 'ab', 'ba', 'be', 'bg', 'bxr', 'cu', 'os', 'kk', 'kv', 'mk', 'mn', 'ce', 'ru', 'sr', 'tg', 'udm', 'uk', 'uz', 'xal', 'cv', 'hy', 'ka', 'he', 'yi', 'ar', 'fa', 'ha', 'ps', 'sd', 'ur', 'ug', 'arc', 'dv', 'as', 'bn', 'bpy', 'gu', 'bh', 'hi', 'ks', 'mr', 'kn', 'ne', 'new', 'sa', 'ml', 'or', 'pa', 'ta', 'te', 'bo', 'dz', 'si', 'km', 'lo', 'th', 'am', 'ti', 'iu', 'chr', 'ko', 'ja', 'zh', 'wuu', 'lzh', 'yue']
 91+ valid_languages_wiktionary = ['af', 'als', 'an', 'roa-rup', 'ast', 'gn', 'ay', 'az', 'id', 'ms', 'zh-min-nan', 'jv', 'su', 'mt', 'bs', 'br', 'ca', 'cs', 'co', 'za', 'cy', 'da', 'de', 'na', 'et', 'ang', 'en', 'es', 'eo', 'eu', 'fo', 'fr', 'fy', 'gd', 'ga', 'gv', 'sm', 'gl', 'hr', 'io', 'ia', 'ie', 'ik', 'zu', 'is', 'it', 'kl', 'csb', 'ku', 'kw', 'rw', 'sw', 'la', 'lv', 'lb', 'lt', 'li', 'ln', 'jbo', 'hu', 'mg', 'mi', 'nah', 'fj', 'nl', 'no', 'nn', 'oc', 'om', 'uz', 'nds', 'pl', 'pt', 'ro', 'qu', 'sg', 'st', 'tn', 'scn', 'simple', 'sk', 'sl', 'sq', 'ss', 'so', 'sh', 'fi', 'sv', 'tl', 'tt', 'vi', 'tpi', 'tr', 'tk', 'vo', 'wa', 'wo', 'ts', 'el', 'tsd', 'be', 'bg', 'kk', 'ky', 'mk', 'mn', 'ru', 'sr', 'tg', 'uk', 'hy', 'ka', 'he', 'yi', 'ar', 'fa', 'ha', 'ps', 'sd', 'ug', 'ur', 'dv', 'bn', 'gu', 'hi', 'ks', 'ne', 'sa', 'mr', 'kn', 'ml', 'pa', 'ta', 'te', 'km', 'lo', 'my', 'si', 'th', 'am', 'ti', 'iu', 'chr', 'ko', 'ja', 'zh']
 92+ valid_languages_wikiquote = ['af', 'als', 'id', 'bs', 'ca', 'cs', 'da', 'de', 'en', 'es', 'eo', 'eu', 'fr', 'is', 'it', 'ku', 'la', 'lb', 'lt', 'hu', 'nl', 'no', 'pl', 'pt', 'ro', 'sk', 'fi', 'sv', 'tr', 'el', 'bg', 'ru', 'sr', 'ka', 'he', 'ar', 'fa', 'gu', 'mr', 'ta', 'th', 'ko', 'ja', 'zh']
 93+ valid_languages_wikinews = ['als', 'bs', 'ca', 'cs', 'de', 'en', 'es', 'fa', 'fr', 'it', 'hu', 'nl', 'no', 'nds', 'pl', 'pt', 'ro', 'fi', 'sv', 'tr', 'bg', 'ru', 'sr', 'uk', 'he', 'ar', 'sd', 'ta', 'th', 'ko', 'ja', 'zh']
 94+ valid_languages_wikisource = ['als', 'id', 'bs', 'cs', 'cy', 'da', 'de', 'en', 'es', 'fr', 'gl', 'hr', 'is', 'it', 'la', 'lt', 'li', 'nl', 'pl', 'pt', 'ro', 'sk', 'fi', 'sv', 'vi', 'tr', 'el', 'ru', 'sr', 'he', 'yi', 'ar', 'fa', 'bn', 'ml', 'th', 'ko', 'ja', 'zh']
 95+ valid_languages_wikibooks = ['af', 'als', 'ang', 'als', 'az', 'ms', 'su', 'bs', 'cs', 'co', 'cy', 'da', 'de', 'na', 'et', 'en', 'es', 'eo', 'eu', 'fr', 'fy', 'gl', 'hr', 'ia', 'ie', 'is', 'it', 'ku', 'la', 'lt', 'mg', 'nl', 'no', 'oc', 'uz', 'nds', 'pl', 'pt', 'ro', 'qu', 'sq', 'simple', 'sk', 'sl', 'fi', 'sv', 'vi', 'tl', 'tt', 'tr', 'tk', 'vo', 'el', 'bg', 'be', 'kk', 'ky', 'mk', 'ru', 'sr', 'tg', 'uk', 'cv', 'hy', 'ka', 'he', 'ar', 'fa', 'ps', 'ur', 'bn', 'hi', 'mr', 'sa', 'kn', 'ml', 'pa', 'ta', 'te', 'km', 'ne', 'th', 'ko', 'ja', 'zh']
 96+ valid_languages_wikiversity = ['cs', 'de', 'en', 'es', 'fr', 'it', 'pt', 'fi', 'el', 'ru', 'ja']
 97+ valid_languages_wikicommons = ['en']
 98+ valid_languages_wikispecies = ['en']
 99+ try:
 100+ languages = locals()['valid_languages_%s' % urlname]
 101+ return languages
 102+ except KeyError:
 103+ return []
 104+
 105+def init():
 106+ pc = ProjectContainer()
 107+ return pc.get_project('wiki')
 108+
 109+if __name__ == '__main__':
 110+ init()
Property changes on: trunk/tools/editor_trends/classes/projects.py
___________________________________________________________________
Added: svn:eol-style
1111 + native
Index: trunk/tools/editor_trends/classes/runtime_settings.py
@@ -0,0 +1,198 @@
 2+#!/usr/bin/python
 3+# coding=utf-8
 4+'''
 5+Copyright (C) 2010 by Diederik van Liere (dvanliere@gmail.com)
 6+This program is free software; you can redistribute it and/or
 7+modify it under the terms of the GNU General Public License version 2
 8+as published by the Free Software Foundation.
 9+This program is distributed in the hope that it will be useful,
 10+but WITHOUT ANY WARRANTY; without even the implied warranty of
 11+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 12+See the GNU General Public License for more details, at
 13+http,//www.fsf.org/licenses/gpl.html
 14+'''
 15+
 16+__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ])
 17+__email__ = 'dvanliere at gmail dot com'
 18+__date__ = '2010-10-21'
 19+__version__ = '0.1'
 20+
 21+'''
 22+This file provides mapper between language name and locale language name and
 23+Wikipedia acronym.
 24+Gothic and Birmese are not yet supported, see rows 450 and 554.
 25+'''
 26+
 27+import os
 28+import sys
 29+import locale
 30+import datetime
 31+import time
 32+import re
 33+sys.path.append('..')
 34+
 35+from utils import text_utils
 36+from utils import ordered_dict as odict
 37+from classes import languages
 38+
 39+
 40+class RunTimeSettings:
 41+ '''
 42+ This class keeps track of the commands issued by the user and is used to
 43+ feed the different etl functions. Difference with configuration class is
 44+ that the configuration class are read-only settings that are always the
 45+ same for a user while these settings can change depending on the kind of
 46+ analysis requested.
 47+ '''
 48+ def __init__(self, project, language, settings, args=None):
 49+ self.project = project
 50+ self.language = language
 51+ self.settings = settings
 52+
 53+ if args:
 54+ self.args = args
 55+ self.hash = self.secs_since_epoch()
 56+ self.base_location = self.get_value('location') \
 57+ if self.get_value('location') != None else settings.input_location
 58+ self.update_project_settings()
 59+ self.update_language_settings()
 60+
 61+ self.targets = self.get_value('charts')
 62+ self.keywords = self.split_keywords(self.get_value('keywords'))
 63+ self.function = self.get_value('func')
 64+ self.collection = self.get_value('collection')
 65+ self.ignore = self.get_value('except')
 66+ self.clean = self.get_value('new')
 67+ self.force = self.get_value('force')
 68+ self.location = self.get_project_location()
 69+ self.filename = self.generate_wikidump_filename()
 70+ self.namespaces = self.get_namespaces()
 71+
 72+ self.dataset = os.path.join(settings.dataset_location,
 73+ self.project.name)
 74+ self.charts = os.path.join(settings.chart_location,
 75+ self.project.name)
 76+
 77+ self.txt = os.path.join(self.location, 'txt')
 78+ self.sorted = os.path.join(self.location, 'sorted')
 79+
 80+ self.directories = [self.location,
 81+ self.txt,
 82+ self.sorted,
 83+ self.dataset,
 84+ self.charts]
 85+ self.path = '/%s/latest/' % self.project
 86+ self.targets = self.targets.split(', ')
 87+ settings.verify_environment(self.directories)
 88+
 89+ def __str__(self):
 90+ return 'Runtime Settings for project %s%s' % (self.language.name, self.project.name)
 91+
 92+ def __iter__(self):
 93+ for item in self.__dict__:
 94+ yield item
 95+
 96+ def dict(self):
 97+ '''
 98+ Return a dictionary with all properties and their values
 99+ '''
 100+ props = {}
 101+ for prop in self:
 102+ props[prop] = getattr(self, prop)
 103+ return props
 104+
 105+ def split_keywords(self, keywords):
 106+ keywords = keywords.split(',')
 107+ d = {}
 108+ for kw in keywords:
 109+ key, value = kw.split('=')
 110+ d[key] = value
 111+ return d
 112+
 113+ def get_project_location(self):
 114+ '''
 115+ Construct the full project location
 116+ '''
 117+ return os.path.join(self.base_location, self.language.code, self.project.name)
 118+
 119+ def show_settings(self):
 120+ '''
 121+ Prints some very high level configuration settings.
 122+ '''
 123+ about = {}
 124+ about['Project'] = '%s' % self.project.full_name.title()
 125+ about['Language'] = '%s / %s / %s' % (self.language.name, self.language.locale, self.language.code)
 126+ about['Input directory'] = '%s' % self.location
 127+ about['Output directory'] = '%s and subdirectories' % self.location
 128+
 129+ max_length_key = max([len(key) for key in about.keys()])
 130+ print 'Final settings after parsing command line arguments:'
 131+ for ab in about:
 132+ print '%s: %s' % (ab.rjust(max_length_key), about[ab])
 133+
 134+
 135+ def get_value(self, key):
 136+ '''
 137+ Returns key from argument if present else None
 138+ '''
 139+ return getattr(self.args, key, None)
 140+
 141+ def generate_wikidump_filename(self):
 142+ '''
 143+ Generate the main name of the wikidump file to be downloaded.
 144+ '''
 145+ return '%s-latest-%s' % (self.project, self.get_value('file'))
 146+
 147+ def update_language_settings(self):
 148+ '''
 149+ Determine the language to be used, default is the system language
 150+ '''
 151+ lang = self.get_value('language')
 152+ lnc = languages.LanguageContainer()
 153+ default = lnc.determine_default_language()
 154+ if lang != default.name:
 155+ lang = lnc.get_language(lang)
 156+ return lang
 157+
 158+ def update_project_settings(self):
 159+ '''
 160+ Determine the project to be analyzed, default is Wikipedia
 161+ '''
 162+ proj = self.get_value('project')
 163+ if proj != 'wiki':
 164+ pc = projects.ProjectContainer()
 165+ proj = pc.get_project(proj)
 166+ return proj
 167+
 168+ def get_projectname(self):
 169+ '''
 170+ Determine the full project name based on the project acronym and language.
 171+ '''
 172+ #language_code = self.get_language()
 173+ print self.language.code, self.project.name
 174+ if self.language.code == None:
 175+ print 'Entered language: %s is not a valid Wikimedia language' \
 176+ % self.get_value('language')
 177+ sys.exit(-1)
 178+
 179+ if self.project.full_name == None:
 180+ print 'Entered project: %s is not valid Wikimedia Foundation project.' \
 181+ % self.get_value('project')
 182+ sys.exit(-1)
 183+ else:
 184+ return '%s%s' % (self.language_code, self.short_project)
 185+
 186+ def secs_since_epoch(self):
 187+ dt = datetime.datetime.now()
 188+ return time.mktime(dt.timetuple())
 189+
 190+ def get_namespaces(self):
 191+ '''
 192+ Get the list of namespaces that should be included for analysis. Default
 193+ is namespace 0 (the main namespace)
 194+ '''
 195+ namespaces = self.get_value('namespace')
 196+ if namespaces != None:
 197+ return namespaces.split(',')
 198+ else:
 199+ return namespaces
Property changes on: trunk/tools/editor_trends/classes/runtime_settings.py
___________________________________________________________________
Added: svn:eol-style
1200 + native
Added: svn:mime-type
2201 + text/plain
Index: trunk/tools/editor_trends/classes/languages.py
@@ -0,0 +1,669 @@
 2+#!/usr/bin/python
 3+# coding=utf-8
 4+
 5+__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ])
 6+__author__email = 'dvanliere at gmail dot com'
 7+__date__ = '2011-01-26'
 8+__version__ = '0.1'
 9+
 10+import locale
 11+import sys
 12+sys.path.append('..')
 13+from utils import ordered_dict as odict
 14+
 15+class Language:
 16+ def __init__(self, name, code, locale=None):
 17+ self.name = name
 18+ self.locale = locale
 19+ self.code = code
 20+
 21+ def __repr__(self):
 22+ return '%s - %s' % (self.code, self.name)
 23+
 24+class LanguageContainer:
 25+ def __init__(self):
 26+ self.init_languages = odict.OrderedDict([
 27+ (u'English', 'en'),
 28+ (u'German', 'de'),
 29+ (u'French', 'fr'),
 30+ (u'Italian', 'it'),
 31+ (u'Polish', 'pl'),
 32+ (u'Japanese', 'ja'),
 33+ (u'Spanish', 'es'),
 34+ (u'Dutch', 'nl'),
 35+ (u'Portuguese', 'pt'),
 36+ (u'Russian', 'ru'),
 37+ (u'Swedish', 'sv'),
 38+ (u'Chinese', 'zh'),
 39+ (u'Catalan', 'ca'),
 40+ (u'Norwegian', 'no'),
 41+ (u'Bokmål', 'no'),
 42+ (u'Finnish', 'fi'),
 43+ (u'Ukrainian', 'uk'),
 44+ (u'Hungarian', 'hu'),
 45+ (u'Czech', 'cs'),
 46+ (u'Romanian', 'ro'),
 47+ (u'Turkish', 'tr'),
 48+ (u'Korean', 'ko'),
 49+ (u'Vietnamese', 'vi'),
 50+ (u'Danish', 'da'),
 51+ (u'Arabic', 'ar'),
 52+ (u'Esperanto', 'eo'),
 53+ (u'Serbian', 'sr'),
 54+ (u'Indonesian', 'id'),
 55+ (u'Lithuanian', 'lt'),
 56+ (u'Volapük', 'vo'),
 57+ (u'Slovak', 'sk'),
 58+ (u'Hebrew', 'he'),
 59+ (u'Bulgarian', 'bg'),
 60+ (u'Persian', 'fa'),
 61+ (u'Slovenian', 'sl'),
 62+ (u'Waray-Waray', 'war'),
 63+ (u'Croatian', 'hr'),
 64+ (u'Estonian', 'et'),
 65+ (u'Malay', 'ms'),
 66+ (u'Newar', 'new'),
 67+ (u'Nepal Bhasa', 'new'),
 68+ (u'Simple English', 'simple'),
 69+ (u'Galician', 'gl'),
 70+ (u'Thai', 'th'),
 71+ (u'Aromanian', 'roa-rup'),
 72+ (u'Nynorsk', 'nn'),
 73+ (u'Basque', 'eu'),
 74+ (u'Hindi', 'hi'),
 75+ (u'Greek', 'el'),
 76+ (u'Haitian', 'ht'),
 77+ (u'Latin', 'la'),
 78+ (u'Telugu', 'te'),
 79+ (u'Georgian', 'ka'),
 80+ (u'Cebuano', 'ceb'),
 81+ (u'Macedonian', 'mk'),
 82+ (u'Azeri', 'az'),
 83+ (u'Tagalog', 'tl'),
 84+ (u'Breton', 'br'),
 85+ (u'Serbo-Croatian', 'sh'),
 86+ (u'Marathi', 'mr'),
 87+ (u'Luxembourgish', 'lb'),
 88+ (u'Javanese', 'jv'),
 89+ (u'Latvian', 'lv'),
 90+ (u'Bosnian', 'bs'),
 91+ (u'Icelandic', 'is'),
 92+ (u'Welsh', 'cy'),
 93+ (u'Belarusian', 'be-x-old'),
 94+ (u'Taraškievica', 'be-x-old'),
 95+ (u'Piedmontese', 'pms'),
 96+ (u'Albanian', 'sq'),
 97+ (u'Tamil', 'ta'),
 98+ (u'Bishnupriya Manipuri', 'bpy'),
 99+ (u'Belarusian', 'be'),
 100+ (u'Aragonese', 'an'),
 101+ (u'Occitan', 'oc'),
 102+ (u'Bengali', 'bn'),
 103+ (u'Swahili', 'sw'),
 104+ (u'Ido', 'io'),
 105+ (u'Ripuarian', 'ksh'),
 106+ (u'Lombard', 'lmo'),
 107+ (u'West Frisian', 'fy'),
 108+ (u'Gujarati', 'gu'),
 109+ (u'Low Saxon', 'nds'),
 110+ (u'Afrikaans', 'af'),
 111+ (u'Sicilian', 'scn'),
 112+ (u'Quechua', 'qu'),
 113+ (u'Kurdish', 'ku'),
 114+ (u'Urdu', 'ur'),
 115+ (u'Sundanese', 'su'),
 116+ (u'Malayalam', 'ml'),
 117+ (u'Cantonese', 'zh-yue'),
 118+ (u'Asturian', 'ast'),
 119+ (u'Neapolitan', 'nap'),
 120+ (u'Samogitian', 'bat-smg'),
 121+ (u'Walloon', 'wa'),
 122+ (u'Chuvash', 'cv'),
 123+ (u'Irish', 'ga'),
 124+ (u'Armenian', 'hy'),
 125+ (u'Yoruba', 'yo'),
 126+ (u'Kannada', 'kn'),
 127+ (u'Tajik', 'tg'),
 128+ (u'Tarantino', 'roa-tara'),
 129+ (u'Venetian', 'vec'),
 130+ (u'Western Panjabi', 'pnb'),
 131+ (u'Nepali', 'ne'),
 132+ (u'Scottish Gaelic', 'gd'),
 133+ (u'Yiddish', 'yi'),
 134+ (u'Min Nan', 'zh-min-nan'),
 135+ (u'Uzbek', 'uz'),
 136+ (u'Tatar', 'tt'),
 137+ (u'Kapampangan', 'pam'),
 138+ (u'Ossetian', 'os'),
 139+ (u'Sakha', 'sah'),
 140+ (u'Alemannic', 'als'),
 141+ (u'Maori', 'mi'),
 142+ (u'Egyptian Arabic', 'arz'),
 143+ (u'Kazakh', 'kk'),
 144+ (u'Nahuatl', 'nah'),
 145+ (u'Limburgian', 'li'),
 146+ (u'Upper Sorbian', 'hsb'),
 147+ (u'Gilaki', 'glk'),
 148+ (u'Corsican', 'co'),
 149+ (u'Gan', 'gan'),
 150+ (u'Amharic', 'am'),
 151+ (u'Mongolian', 'mn'),
 152+ (u'Interlingua', 'ia'),
 153+ (u'Central Bicolano', 'bcl'),
 154+ (u'Võro', 'fiu-vro'),
 155+ (u'Dutch Low Saxon', 'nds-nl'),
 156+ (u'Faroese', 'fo'),
 157+ (u'Turkmen', 'tk'),
 158+ (u'Scots', 'sco'),
 159+ (u'West Flemish', 'vls'),
 160+ (u'Sinhalese', 'si'),
 161+ (u'Sanskrit', 'sa'),
 162+ (u'Bavarian', 'bar'),
 163+ (u'Burmese', 'my'),
 164+ (u'Manx', 'gv'),
 165+ (u'Divehi', 'dv'),
 166+ (u'Norman', 'nrm'),
 167+ (u'Pangasinan', 'pag'),
 168+ (u'Romansh', 'rm'),
 169+ (u'Banyumasan', 'map-bms'),
 170+ (u'Zazaki', 'diq'),
 171+ (u'Sorani', 'ckb'),
 172+ (u'Northern Sami', 'se'),
 173+ (u'Mazandarani', 'mzn'),
 174+ (u'Wu', 'wuu'),
 175+ (u'Uyghur', 'ug'),
 176+ (u'Friulian', 'fur'),
 177+ (u'Ligurian', 'lij'),
 178+ (u'Maltese', 'mt'),
 179+ (u'Bihari', 'bh'),
 180+ (u'Novial', 'nov'),
 181+ (u'Malagasy', 'mg'),
 182+ (u'Kashubian', 'csb'),
 183+ (u'Ilokano', 'ilo'),
 184+ (u'Sardinian', 'sc'),
 185+ (u'Classical Chinese', 'zh-classical'),
 186+ (u'Khmer', 'km'),
 187+ (u'Ladino', 'lad'),
 188+ (u'Pali', 'pi'),
 189+ (u'Anglo-Saxon', 'ang'),
 190+ (u'Zamboanga Chavacano', 'cbk-zam'),
 191+ (u'Tibetan', 'bo'),
 192+ (u'Fiji Hindi', 'hif'),
 193+ (u'Franco-Provençal', 'frp'),
 194+ (u'Arpitan', 'frp'),
 195+ (u'Hakka', 'hak'),
 196+ (u'Cornish', 'kw'),
 197+ (u'Punjabi', 'pa'),
 198+ (u'Pashto', 'ps'),
 199+ (u'Kalmyk', 'xal'),
 200+ (u'Silesian', 'szl'),
 201+ (u'Pennsylvania German', 'pdc'),
 202+ (u'Hawaiian', 'haw'),
 203+ (u'Saterland Frisian', 'stq'),
 204+ (u'Interlingue', 'ie'),
 205+ (u'Navajo', 'nv'),
 206+ (u'Fijian', 'fj'),
 207+ (u'Crimean Tatar', 'crh'),
 208+ (u'Komi', 'kv'),
 209+ (u'Tongan', 'to'),
 210+ (u'Acehnese', 'ace'),
 211+ (u'Somali', 'so'),
 212+ (u'Erzya', 'myv'),
 213+ (u'Guarani', 'gn'),
 214+ (u'Karachay-Balkar', 'krc'),
 215+ (u'Extremaduran', 'ext'),
 216+ (u'Lingala', 'ln'),
 217+ (u'Kirghiz', 'ky'),
 218+ (u'Meadow Mari', 'mhr'),
 219+ (u'Assyrian Neo-Aramaic', 'arc'),
 220+ (u'Emilian-Romagnol', 'eml'),
 221+ (u'Lojban', 'jbo'),
 222+ (u'Picard', 'pcd'),
 223+ (u'Aymara', 'ay'),
 224+ (u'Wolof', 'wo'),
 225+ (u'Tumbuka', 'tum'),
 226+ (u'Kabyle', 'kab'),
 227+ (u'Bashkir', 'ba'),
 228+ (u'North Frisian', 'frr'),
 229+ (u'Tahitian', 'ty'),
 230+ (u'Tok Pisin', 'tpi'),
 231+ (u'Papiamentu', 'pap'),
 232+ (u'Zealandic', 'zea'),
 233+ (u'Sranan', 'srn'),
 234+ (u'Greenlandic', 'kl'),
 235+ (u'Udmurt', 'udm'),
 236+ (u'Chechen', 'ce'),
 237+ (u'Igbo', 'ig'),
 238+ (u'Komi-Permyak', 'koi'),
 239+ (u'Oriya', 'or'),
 240+ (u'Lower Sorbian', 'dsb'),
 241+ (u'Kongo', 'kg'),
 242+ (u'Lao', 'lo'),
 243+ (u'Abkhazian', 'ab'),
 244+ (u'Moksha', 'mdf'),
 245+ (u'Romani', 'rmy'),
 246+ (u'Hill Mari', 'mrj'),
 247+ (u'Banjar', 'bjn'),
 248+ (u'Old Church Slavonic', 'cu'),
 249+ (u'Mirandese', 'mwl'),
 250+ (u'Karakalpak', 'kaa'),
 251+ (u'Samoan', 'sm'),
 252+ (u'Moldovan', 'mo'),
 253+ (u'Tetum', 'tet'),
 254+ (u'Avar', 'av'),
 255+ (u'Kashmiri', 'ks'),
 256+ (u'Gothic', 'got'),
 257+ (u'Sindhi', 'sd'),
 258+ (u'Bambara', 'bm'),
 259+ (u'Nauruan', 'na'),
 260+ (u'Norfolk', 'pih'),
 261+ (u'Pontic', 'pnt'),
 262+ (u'Inuktitut', 'iu'),
 263+ (u'Inupiak', 'ik'),
 264+ (u'Bislama', 'bi'),
 265+ (u'Cherokee', 'chr'),
 266+ (u'Assamese', 'as'),
 267+ (u'Min Dong', 'cdo'),
 268+ (u'Ewe', 'ee'),
 269+ (u'Swati', 'ss'),
 270+ (u'Oromo', 'om'),
 271+ (u'Zhuang', 'za'),
 272+ (u'Zulu', 'zu'),
 273+ (u'Tigrinya', 'ti'),
 274+ (u'Venda', 've'),
 275+ (u'Tsonga', 'ts'),
 276+ (u'Hausa', 'ha'),
 277+ (u'Dzongkha', 'dz'),
 278+ (u'Sango', 'sg'),
 279+ (u'Chamorro', 'ch'),
 280+ (u'Cree', 'cr'),
 281+ (u'Xhosa', 'xh'),
 282+ (u'Akan', 'ak'),
 283+ (u'Sesotho', 'st'),
 284+ (u'Kinyarwanda', 'rw'),
 285+ (u'Tswana', 'tn'),
 286+ (u'Kikuyu', 'ki'),
 287+ (u'Buryat', 'bxr'),
 288+ (u'Buginese', 'bug'),
 289+ (u'Chichewa', 'ny'),
 290+ (u'Lak', 'lbe'),
 291+ (u'Twi', 'tw'),
 292+ (u'Shona', 'sn'),
 293+ (u'Kirundi', 'rn'),
 294+ (u'Fula', 'ff'),
 295+ (u'Cheyenne', 'chy'),
 296+ (u'Luganda', 'lg'),
 297+ (u'Ndonga', 'ng'),
 298+ (u'Sichuan Yi', 'ii'),
 299+ (u'Choctaw', 'cho'),
 300+ (u'Marshallese', 'mh'),
 301+ (u'Afar', 'aa'),
 302+ (u'Kuanyama', 'kj'),
 303+ (u'Hiri Motu', 'ho'),
 304+ (u'Muscogee', 'mus'),
 305+ (u'Kanuri', 'kr'),
 306+ (u'Herero', 'hz'),
 307+ (u'English', 'en'),
 308+ (u'Deutsch', 'de'),
 309+ (u'Français', 'fr'),
 310+ (u'Italiano', 'it'),
 311+ (u'Polski', 'pl'),
 312+ (u'日本語', 'ja'),
 313+ (u'Español', 'es'),
 314+ (u'Nederlands', 'nl'),
 315+ (u'Português', 'pt'),
 316+ (u'Русский', 'ru'),
 317+ (u'Svenska', 'sv'),
 318+ (u'中文', 'zh'),
 319+ (u'Català', 'ca'),
 320+ (u'Norsk', 'no'),
 321+ (u'Bokmål', 'no'),
 322+ (u'Suomi', 'fi'),
 323+ (u'Українська', 'uk'),
 324+ (u'Magyar', 'hu'),
 325+ (u'Čeština', 'cs'),
 326+ (u'Română', 'ro'),
 327+ (u'Türkçe', 'tr'),
 328+ (u'한국어', 'ko'),
 329+ (u'Tiếng Việt', 'vi'),
 330+ (u'Dansk', 'da'),
 331+ (u'العربية', 'ar'),
 332+ (u'Esperanto', 'eo'),
 333+ (u'Српски', 'sr'),
 334+ (u'Srpski', 'sr'),
 335+ (u'Bahasa Indonesia', 'id'),
 336+ (u'Lietuvių', 'lt'),
 337+ (u'Volapük', 'vo'),
 338+ (u'Slovenčina', 'sk'),
 339+ (u'עברית', 'he'),
 340+ (u'Български', 'bg'),
 341+ (u'فارسی', 'fa'),
 342+ (u'Slovenščina', 'sl'),
 343+ (u'Winaray', 'war'),
 344+ (u'Hrvatski', 'hr'),
 345+ (u'Eesti', 'et'),
 346+ (u'Bahasa Melayu', 'ms'),
 347+ (u'नेपाल भाषा', 'new'),
 348+ (u'Simple English', 'simple'),
 349+ (u'Galego', 'gl'),
 350+ (u'ไทย', 'th'),
 351+ (u'Armãneashce', 'roa-rup'),
 352+ (u'Nynorsk', 'nn'),
 353+ (u'Euskara', 'eu'),
 354+ (u'हिन्दी', 'hi'),
 355+ (u'Ελληνικά', 'el'),
 356+ (u'Krèyol ayisyen', 'ht'),
 357+ (u'Latina', 'la'),
 358+ (u'తెలుగు', 'te'),
 359+ (u'ქართული', 'ka'),
 360+ (u'Sinugboanong Binisaya', 'ceb'),
 361+ (u'Македонски', 'mk'),
 362+ (u'Azərbaycan', 'az'),
 363+ (u'Tagalog', 'tl'),
 364+ (u'Brezhoneg', 'br'),
 365+ (u'Srpskohrvatski', 'sh'),
 366+ (u'Српскохрватски', 'sh'),
 367+ (u'मराठी', 'mr'),
 368+ (u'Lëtzebuergesch', 'lb'),
 369+ (u'Basa Jawa', 'jv'),
 370+ (u'Latviešu', 'lv'),
 371+ (u'Bosanski', 'bs'),
 372+ (u'Íslenska', 'is'),
 373+ (u'Cymraeg', 'cy'),
 374+ (u'Беларуская', 'be-x-old'),
 375+ (u'тарашкевіца', 'be-x-old'),
 376+ (u'Piemontèis', 'pms'),
 377+ (u'Shqip', 'sq'),
 378+ (u'தமிழ்', 'ta'),
 379+ (u'ইমার ঠার', 'bpy'),
 380+ (u'বিষ্ণুপ্রিয়া মণিপুরী', 'bpy'),
 381+ (u'Беларуская', 'be'),
 382+ (u'Aragonés', 'an'),
 383+ (u'Occitan', 'oc'),
 384+ (u'বাংলা', 'bn'),
 385+ (u'Kiswahili', 'sw'),
 386+ (u'Ido', 'io'),
 387+ (u'Ripoarisch', 'ksh'),
 388+ (u'Lumbaart', 'lmo'),
 389+ (u'Frysk', 'fy'),
 390+ (u'ગુજરાતી', 'gu'),
 391+ (u'Plattdüütsch', 'nds'),
 392+ (u'Afrikaans', 'af'),
 393+ (u'Sicilianu', 'scn'),
 394+ (u'Runa Simi', 'qu'),
 395+ (u'Kurdî', 'ku'),
 396+ (u'كوردی', 'ku'),
 397+ (u'اردو', 'ur'),
 398+ (u'Basa Sunda', 'su'),
 399+ (u'മലയാളം', 'ml'),
 400+ (u'粵語', 'zh-yue'),
 401+ (u'Asturianu', 'ast'),
 402+ (u'Nnapulitano', 'nap'),
 403+ (u'Žemaitėška', 'bat-smg'),
 404+ (u'Walon', 'wa'),
 405+ (u'Чăваш', 'cv'),
 406+ (u'Gaeilge', 'ga'),
 407+ (u'Հայերեն', 'hy'),
 408+ (u'Yorùbá', 'yo'),
 409+ (u'ಕನ್ನಡ', 'kn'),
 410+ (u'Тоҷикӣ', 'tg'),
 411+ (u'Tarandíne', 'roa-tara'),
 412+ (u'Vèneto', 'vec'),
 413+ (u'شاہ مکھی پنجابی', 'pnb'),
 414+ (u'Shāhmukhī Pañjābī', 'pnb'),
 415+ (u'नेपाली', 'ne'),
 416+ (u'Gàidhlig', 'gd'),
 417+ (u'ייִדיש', 'yi'),
 418+ (u'Bân-lâm-gú', 'zh-min-nan'),
 419+ (u'O‘zbek', 'uz'),
 420+ (u'Tatarça', 'tt'),
 421+ (u'Татарча', 'tt'),
 422+ (u'Kapampangan', 'pam'),
 423+ (u'Иронау', 'os'),
 424+ (u'Саха тыла', 'sah'),
 425+ (u'Saxa Tyla', 'sah'),
 426+ (u'Alemannisch', 'als'),
 427+ (u'Māori', 'mi'),
 428+ (u'مصرى', 'arz'),
 429+ (u'Maṣrī', 'arz'),
 430+ (u'Қазақша', 'kk'),
 431+ (u'Nāhuatl', 'nah'),
 432+ (u'Limburgs', 'li'),
 433+ (u'Hornjoserbsce', 'hsb'),
 434+ (u'گیلکی', 'glk'),
 435+ (u'Corsu', 'co'),
 436+ (u'贛語', 'gan'),
 437+ (u'አማርኛ', 'am'),
 438+ (u'Монгол', 'mn'),
 439+ (u'Interlingua', 'ia'),
 440+ (u'Bikol', 'bcl'),
 441+ (u'Võro', 'fiu-vro'),
 442+ (u'Nedersaksisch', 'nds-nl'),
 443+ (u'Føroyskt', 'fo'),
 444+ (u'تركمن ', 'tk'),
 445+ (u'Туркмен', 'tk'),
 446+ (u'Scots', 'sco'),
 447+ (u'West-Vlams', 'vls'),
 448+ (u'සිංහල', 'si'),
 449+ (u'संस्कृतम्', 'sa'),
 450+ (u'Boarisch', 'bar'),
 451+ (u'မ္ရန္‌မာစာ', 'my'), #Needs fix
 452+ (u'Gaelg', 'gv'),
 453+ (u'ދިވެހިބަސް', 'dv'),
 454+ (u'Nouormand', 'nrm'),
 455+ (u'Normaund', 'nrm'),
 456+ (u'Pangasinan', 'pag'),
 457+ (u'Rumantsch', 'rm'),
 458+ (u'Basa Banyumasan', 'map-bms'),
 459+ (u'Zazaki', 'diq'),
 460+ (u'Soranî', 'ckb'),
 461+ (u'کوردی', 'ckb'),
 462+ (u'Sámegiella', 'se'),
 463+ (u'مَزِروني', 'mzn'),
 464+ (u'吴语', 'wuu'),
 465+ (u'Oyghurque', 'ug'),
 466+ (u'Furlan', 'fur'),
 467+ (u'Líguru', 'lij'),
 468+ (u'Malti', 'mt'),
 469+ (u'भोजपुरी', 'bh'),
 470+ (u'Novial', 'nov'),
 471+ (u'Malagasy', 'mg'),
 472+ (u'Kaszëbsczi', 'csb'),
 473+ (u'Ilokano', 'ilo'),
 474+ (u'Sardu', 'sc'),
 475+ (u'古文', 'zh-classical'),
 476+ (u'文言文', 'zh-classical'),
 477+ (u'ភាសាខ្មែរ', 'km'),
 478+ (u'Dzhudezmo', 'lad'),
 479+ (u'पाऴि', 'pi'),
 480+ (u'Englisc', 'ang'),
 481+ (u'Chavacano de Zamboanga', 'cbk-zam'),
 482+ (u'བོད་སྐད', 'bo'),
 483+ (u'Fiji Hindi', 'hif'),
 484+ (u'Arpitan', 'frp'),
 485+ (u'Hak-kâ-fa', 'hak'),
 486+ (u'客家話', 'hak'),
 487+ (u'Kernewek', 'kw'),
 488+ (u'Karnuack', 'kw'),
 489+ (u'ਪੰਜਾਬੀ', 'pa'),
 490+ (u'پښتو', 'ps'),
 491+ (u'Хальмг', 'xal'),
 492+ (u'Ślůnski', 'szl'),
 493+ (u'Deitsch', 'pdc'),
 494+ (u'Hawai`i', 'haw'),
 495+ (u'Seeltersk', 'stq'),
 496+ (u'Interlingue', 'ie'),
 497+ (u'Diné bizaad', 'nv'),
 498+ (u'Na Vosa Vakaviti', 'fj'),
 499+ (u'Qırımtatarca', 'crh'),
 500+ (u'Коми', 'kv'),
 501+ (u'faka Tonga', 'to'),
 502+ (u'Bahsa Acèh', 'ace'),
 503+ (u'Soomaaliga', 'so'),
 504+ (u'Эрзянь', 'myv'),
 505+ (u'Erzjanj Kelj', 'myv'),
 506+ (u"Avañe'ẽ", 'gn'),
 507+ (u'Къарачай-Малкъар', 'krc'),
 508+ (u'Qarachay-Malqar', 'krc'),
 509+ (u'Estremeñu', 'ext'),
 510+ (u'Lingala', 'ln'),
 511+ (u'Кыргызча', 'ky'),
 512+ (u'Олык Марий', 'mhr'),
 513+ (u'Olyk Marij', 'mhr'),
 514+ (u'ܐܪܡܝܐ', 'arc'),
 515+ (u'Emiliàn e rumagnòl', 'eml'),
 516+ (u'Lojban', 'jbo'),
 517+ (u'Picard', 'pcd'),
 518+ (u'Aymar', 'ay'),
 519+ (u'Wolof', 'wo'),
 520+ (u'chiTumbuka', 'tum'),
 521+ (u'Taqbaylit', 'kab'),
 522+ (u'Башҡорт', 'ba'),
 523+ (u'Frasch', 'frr'),
 524+ (u'Reo Mā`ohi', 'ty'),
 525+ (u'Tok Pisin', 'tpi'),
 526+ (u'Papiamentu', 'pap'),
 527+ (u'Zeêuws', 'zea'),
 528+ (u'Sranantongo', 'srn'),
 529+ (u'Kalaallisut', 'kl'),
 530+ (u'Удмурт кыл', 'udm'),
 531+ (u'Нохчийн', 'ce'),
 532+ (u'Igbo', 'ig'),
 533+ (u'Перем Коми', 'koi'),
 534+ (u'Perem Komi', 'koi'),
 535+ (u'ଓଡ଼ିଆ', 'or'),
 536+ (u'Dolnoserbski', 'dsb'),
 537+ (u'KiKongo', 'kg'),
 538+ (u'ລາວ', 'lo'),
 539+ (u'Аҧсуа', 'ab'),
 540+ (u'Мокшень', 'mdf'),
 541+ (u'Mokshanj Kälj', 'mdf'),
 542+ (u'romani - रोमानी', 'rmy'),
 543+ (u'Кырык Мары', 'mrj'),
 544+ (u'Kyryk Mary', 'mrj'),
 545+ (u'Bahasa Banjar', 'bjn'),
 546+ (u'Словѣньскъ', 'cu'),
 547+ (u'Páigina Percipal', 'mwl'),
 548+ (u'Qaraqalpaqsha', 'kaa'),
 549+ (u'Gagana Samoa', 'sm'),
 550+ (u'Молдовеняскэ', 'mo'),
 551+ (u'Tetun', 'tet'),
 552+ (u'Авар', 'av'),
 553+ (u'कश्मीरी', 'ks'),
 554+ (u'كشميري', 'ks'),
 555+ (u'𐌲𐌿𐍄𐌹𐍃𐌺', 'got'), #Needs fix
 556+ (u'سنڌي، سندھی ، सिन्ध', 'sd'),
 557+ (u'Bamanankan', 'bm'),
 558+ (u'dorerin Naoero', 'na'),
 559+ (u'Norfuk', 'pih'),
 560+ (u'Ποντιακά', 'pnt'),
 561+ (u'ᐃᓄᒃᑎᑐᑦ', 'iu'),
 562+ (u'Iñupiak', 'ik'),
 563+ (u'Bislama', 'bi'),
 564+ (u'ᏣᎳᎩ', 'chr'),
 565+ (u'অসমীয়া', 'as'),
 566+ (u'Mìng-dĕ̤ng-ngṳ̄', 'cdo'),
 567+ (u'Eʋegbe', 'ee'),
 568+ (u'SiSwati', 'ss'),
 569+ (u'Oromoo', 'om'),
 570+ (u'Cuengh', 'za'),
 571+ (u'isiZulu', 'zu'),
 572+ (u'ትግርኛ', 'ti'),
 573+ (u'Tshivenda', 've'),
 574+ (u'Xitsonga', 'ts'),
 575+ (u'هَوُسَ', 'ha'),
 576+ (u'ཇོང་ཁ', 'dz'),
 577+ (u'Sängö', 'sg'),
 578+ (u'Chamoru', 'ch'),
 579+ (u'Nehiyaw', 'cr'),
 580+ (u'isiXhosa', 'xh'),
 581+ (u'Akana', 'ak'),
 582+ (u'Sesotho', 'st'),
 583+ (u'Ikinyarwanda', 'rw'),
 584+ (u'Setswana', 'tn'),
 585+ (u'Gĩkũyũ', 'ki'),
 586+ (u'Буряад', 'bxr'),
 587+ (u'Basa Ugi', 'bug'),
 588+ (u'Chi-Chewa', 'ny'),
 589+ (u'Лакку', 'lbe'),
 590+ (u'Twi', 'tw'),
 591+ (u'chiShona', 'sn'),
 592+ (u'Kirundi', 'rn'),
 593+ (u'Fulfulde', 'ff'),
 594+ (u'Tsetsêhestâhese', 'chy'),
 595+ (u'Luganda', 'lg'),
 596+ (u'Oshiwambo', 'ng'),
 597+ (u'ꆇꉙ', 'ii'),
 598+ (u'Choctaw', 'cho'),
 599+ (u'Ebon', 'mh'),
 600+ (u'Afar', 'aa'),
 601+ (u'Kuanyama', 'kj'),
 602+ (u'Hiri Motu', 'ho'),
 603+ (u'Muskogee', 'mus'),
 604+ (u'Kanuri', 'kr'),
 605+ (u'Otsiherero', 'hz'),
 606+ ])
 607+
 608+ self.languages = {}
 609+ for language, code in self.init_languages.iteritems():
 610+ ln = self.languages.get(code, Language(language, code))
 611+ if language != ln.name:
 612+ ln.locale = language
 613+ self.languages[code] = ln
 614+ self.default = self.determine_default_language()
 615+
 616+ def __repr__(self):
 617+ return 'contains %s languages' % (len(self.languages))
 618+
 619+ def get_language(self, code):
 620+ return self.languages.get(code, None)
 621+
 622+ def determine_default_language(self):
 623+ '''
 624+ Determines the default language to make an educated guess which
 625+ Wikipedia project is most likely of interest
 626+ '''
 627+ code = locale.getdefaultlocale()[0]
 628+ code = code.split('_')[0]
 629+ return self.languages[code]
 630+
 631+ def show_languages(settings, project, startswith=None):
 632+ if startswith != None:
 633+ startswith = startswith.title()
 634+ choices = project.supported_languages()
 635+ lang = []
 636+ for choice in choices:
 637+ lang.append(choice)
 638+ lang.sort()
 639+ for language in lang:
 640+ try:
 641+ if startswith != None and language.startswith(first):
 642+ print '%s' % language.decode(settings.encoding)
 643+ elif startswith == None:
 644+ print '%s' % language.decode(settings.encoding)
 645+ except UnicodeEncodeError:
 646+ print '%s' % language
 647+
 648+
 649+ def extract_language_code_from_wikiprojects(self):
 650+ '''
 651+ Copy and paste a string of all supported projects from
 652+ http://meta.wikimedia.org/wiki/Complete_list_of_Wikimedia_projects and use
 653+ this function to extract the language codes. This list can be used for the
 654+ Wiki class
 655+ '''
 656+ str = '''
 657+ Čeština (cs) • Deutsch (de) • English (en) • Español (es) • Français (fr) • Italiano (it) • Português (pt) • Suomi (fi) • Ελληνικά (el) • Русский (ru) • 日本語 (ja)
 658+ '''
 659+ reg = re.compile('\([\w\-]*\)')
 660+ abbr = re.findall(reg, str)
 661+ abbr = [ab.replace('(', '').replace(')', '') for ab in abbr]
 662+ print abbr
 663+ print len(abbr)
 664+
 665+def init():
 666+ lnc = LanguageContainer()
 667+ return lnc.determine_default_language()
 668+
 669+if __name__ == '__main__':
 670+ init()
Property changes on: trunk/tools/editor_trends/classes/languages.py
___________________________________________________________________
Added: svn:eol-style
1671 + native
Index: trunk/tools/editor_trends/configuration.py
@@ -197,7 +197,9 @@
198198 return 500
199199
200200 def update_python_path(self):
201 - IGNORE_DIRS = ['wikistats', 'zips']
 201+ IGNORE_DIRS = ['wikistats', 'zips', 'datasets', 'mapreduce', 'logs',
 202+ 'statistics', 'js_scripts', 'deployment',
 203+ 'documentation', 'data', 'code-snippets']
202204 dirs = [name for name in os.listdir(self.working_directory) if
203205 os.path.isdir(os.path.join(self.working_directory, name))]
204206 for subdirname in dirs:
Index: trunk/tools/editor_trends/cronjobs.py
@@ -22,7 +22,9 @@
2323 import manage as manager
2424
2525 from database import db
26 -from classes import wikiprojects
 26+from classes import languages
 27+from classes import projects
 28+from classes import runtime_settings
2729 from analyses import analyzer
2830
2931
@@ -30,13 +32,10 @@
3133 '''
3234 This function should only be called as a cronjob and not directly.
3335 '''
34 - parser, settings, wiki = manager.init_args_parser()
 36+ project, language, parser, settings = manager.init_args_parser()
3537 args = parser.parse_args(['django'])
36 - args.language = wikiprojects.get_language(task['language'])
37 - args.project = task['project']
38 - print args
39 - wiki = wikiprojects.Wiki(settings, args)
40 - res = manager.all_launcher(wiki, settings, None)
 38+ rts = runtime_settings.RunTimeSettings(project, language, settings, args)
 39+ res = manager.all_launcher(rts, settings, None)
4140 return res
4241
4342

Status & tagging log