Index: trunk/tools/editor_trends/manage.py |
— | — | @@ -31,7 +31,9 @@ |
32 | 32 | from utils import ordered_dict |
33 | 33 | from utils import log |
34 | 34 | from utils import timer |
35 | | -from classes import wikiprojects |
| 35 | +from classes import projects |
| 36 | +from classes import languages |
| 37 | +from classes import runtime_settings |
36 | 38 | from database import db |
37 | 39 | from etl import downloader |
38 | 40 | from etl import extracter |
— | — | @@ -184,10 +186,11 @@ |
185 | 187 | event='finish') |
186 | 188 | |
187 | 189 | |
188 | | -def exporter_launcher(properties, settings, logger): |
| 190 | +def dataset_launcher(properties, settings, logger): |
189 | 191 | print 'Start exporting dataset' |
190 | 192 | stopwatch = timer.Timer() |
191 | 193 | log.log_to_mongo(properties, 'dataset', 'export', stopwatch, event='start') |
| 194 | + |
192 | 195 | for target in properties.targets: |
193 | 196 | # write_message_to_log(logger, settings, |
194 | 197 | # message=None, |
— | — | @@ -195,12 +198,11 @@ |
196 | 199 | # target=target, |
197 | 200 | # dbname=properties.full_project, |
198 | 201 | # collection=properties.collection) |
199 | | - print 'Dataset is created by: %s' % target |
200 | | - |
201 | | - analyzer.generate_chart_data(properties.project, |
| 202 | + analyzer.generate_chart_data(properties.project.name, |
202 | 203 | properties.collection, |
203 | | - properties.language_code, |
204 | | - target) |
| 204 | + properties.language.code, |
| 205 | + target, |
| 206 | + **properties.keywords) |
205 | 207 | stopwatch.elapsed() |
206 | 208 | log.log_to_mongo(properties, 'dataset', 'export', stopwatch, event='finish') |
207 | 209 | |
— | — | @@ -265,25 +267,7 @@ |
266 | 268 | log.log_to_mongo(properties, 'dataset', 'all', stopwatch, event='finish') |
267 | 269 | |
268 | 270 | |
269 | | -def show_languages(settings, logger, properties): |
270 | | - first = properties.get_value('startswith') |
271 | | - if first != None: |
272 | | - first = first.title() |
273 | | - choices = languages.supported_languages() |
274 | | - lang = [] |
275 | | - for choice in choices: |
276 | | - lang.append(choice) |
277 | | - lang.sort() |
278 | | - for language in lang: |
279 | | - try: |
280 | | - if first != None and language.startswith(first): |
281 | | - print '%s' % language.decode(settings.encoding) |
282 | | - elif first == None: |
283 | | - print '%s' % language.decode(settings.encoding) |
284 | | - except UnicodeEncodeError: |
285 | | - print '%s' % language |
286 | 271 | |
287 | | - |
288 | 272 | def about_statement(): |
289 | 273 | print '' |
290 | 274 | print 'Editor Trends Software is (c) 2010-2011 by the Wikimedia Foundation.' |
— | — | @@ -300,9 +284,10 @@ |
301 | 285 | Entry point for parsing command line and launching the needed function(s). |
302 | 286 | ''' |
303 | 287 | settings = configuration.Settings() |
304 | | - default_language = wikiprojects.determine_default_language() |
305 | | - wiki = wikiprojects.Wiki(settings) |
306 | | - projects = wiki.projects.keys() |
| 288 | + language = languages.init() |
| 289 | + project = projects.init() |
| 290 | + rts = runtime_settings.RunTimeSettings(project, language, settings) |
| 291 | + |
307 | 292 | #Init Argument Parser |
308 | 293 | parser = ArgumentParser(prog='manage', formatter_class=RawTextHelpFormatter) |
309 | 294 | subparsers = parser.add_subparsers(help='sub - command help') |
— | — | @@ -356,11 +341,20 @@ |
357 | 342 | exported.') |
358 | 343 | parser_transform.set_defaults(func=transformer_launcher) |
359 | 344 | |
360 | | - #EXPORT |
361 | | - parser_dataset = subparsers.add_parser('export', |
| 345 | + #DATASET |
| 346 | + parser_dataset = subparsers.add_parser('dataset', |
362 | 347 | help='Create a dataset from the MongoDB and write it to a csv file.') |
363 | | - parser_dataset.set_defaults(func=exporter_launcher) |
| 348 | + parser_dataset.set_defaults(func=dataset_launcher) |
| 349 | + parser_dataset.add_argument('-c', '--charts', |
| 350 | + action='store', |
| 351 | + help='Should be a valid function name that matches one of the plugin functions', |
| 352 | + default=analyzer.available_analyses()['new_editor_count']) |
364 | 353 | |
| 354 | + parser_dataset.add_argument('-k', '--keywords', |
| 355 | + action='store', |
| 356 | + help='Add additional keywords in the format keyword1=value1,keyword2=value2', |
| 357 | + default={}) |
| 358 | + |
365 | 359 | #ALL |
366 | 360 | parser_all = subparsers.add_parser('all', |
367 | 361 | help='The all sub command runs the download, split, store and dataset \ |
— | — | @@ -387,12 +381,11 @@ |
388 | 382 | executing all.', |
389 | 383 | default=[]) |
390 | 384 | |
391 | | - |
392 | 385 | parser.add_argument('-l', '--language', |
393 | 386 | action='store', |
394 | 387 | help='Example of valid languages.', |
395 | | - choices=wiki.supported_languages(), |
396 | | - default=default_language) |
| 388 | + choices=project.supported_languages(), |
| 389 | + default=language.name) |
397 | 390 | |
398 | 391 | parser.add_argument('-p', '--project', |
399 | 392 | action='store', |
— | — | @@ -422,18 +415,13 @@ |
423 | 416 | %s' % ''.join([f + ',\n' for f in settings.file_choices]), |
424 | 417 | default='stub-meta-history.xml.gz') |
425 | 418 | |
426 | | - parser.add_argument('-d', '--datasets', |
427 | | - action='store', |
428 | | - choices=analyzer.available_analyses(), |
429 | | - help='Indicate what type of data should be exported.', |
430 | | - default='cohort_dataset_backward_bar') |
431 | 419 | |
432 | | - return parser, settings, wiki |
| 420 | + return project, language, parser, settings |
433 | 421 | |
434 | 422 | def main(): |
435 | | - parser, settings, wiki = init_args_parser() |
| 423 | + project, language, parser, settings = init_args_parser() |
436 | 424 | args = parser.parse_args() |
437 | | - properties = wikiprojects.Wiki(settings, args) |
| 425 | + properties = runtime_settings.RunTimeSettings(project, language, settings, args) |
438 | 426 | #initialize logger |
439 | 427 | logger = logging.getLogger('manager') |
440 | 428 | logger.setLevel(logging.DEBUG) |
— | — | @@ -441,19 +429,19 @@ |
442 | 430 | # Add the log message handler to the logger |
443 | 431 | today = datetime.datetime.today() |
444 | 432 | log_filename = os.path.join(settings.log_location, '%s%s_%s-%s-%s.log' \ |
445 | | - % (properties.language_code, properties.project, |
| 433 | + % (properties.language.code, properties.project.name, |
446 | 434 | today.day, today.month, today.year)) |
447 | 435 | handler = logging.handlers.RotatingFileHandler(log_filename, |
448 | 436 | maxBytes=1024 * 1024, |
449 | 437 | backupCount=3) |
450 | 438 | |
451 | 439 | logger.addHandler(handler) |
452 | | - logger.debug('Chosen language: \t%s' % wiki.language) |
| 440 | + logger.debug('Chosen language: \t%s' % properties.language) |
453 | 441 | |
454 | 442 | #start manager |
455 | 443 | #detect_python_version(logger) |
456 | 444 | about_statement() |
457 | | - config.create_configuration(settings, args) |
| 445 | + #config.create_configuration(settings, args) |
458 | 446 | |
459 | 447 | properties.show_settings() |
460 | 448 | args.func(properties, settings, logger) |
Index: trunk/tools/editor_trends/wikilytics/api/tasks.py |
— | — | @@ -1,57 +0,0 @@ |
2 | | -from multiprocessing import Process |
3 | | - |
4 | | -from celery.decorators import task |
5 | | -from celery.registry import tasks |
6 | | - |
7 | | -from editor_trends.classes import wikiprojects |
8 | | -from editor_trends import manage as manager |
9 | | - |
10 | | -from wikilytics.api.models import Job |
11 | | - |
12 | | -@task |
13 | | -def launcher(): |
14 | | - jobs = Job.objects.filter(finished=False) |
15 | | - n = len(jobs) |
16 | | - if n > 0: |
17 | | - |
18 | | - job = jobs[0] |
19 | | - job.in_progress = True |
20 | | - job.save() |
21 | | - print 'Launching %s task' % job.type |
22 | | - if job.type == 'dataset': |
23 | | - res = launch_editor_trends_toolkit(job.project, job.language) |
24 | | - elif job.type == 'chart': |
25 | | - res = launch_chart(job.project, job.language) |
26 | | - else: |
27 | | - print 'Unknown job type, no handler has been configured.' |
28 | | - |
29 | | - if res == True: |
30 | | - job.finished = True |
31 | | - job.in_progress = False |
32 | | - job.save() |
33 | | - |
34 | | - |
35 | | -def launch_editor_trends_toolkit(project, language): |
36 | | - ''' |
37 | | - This function should only be called from within Django Wikilytics. |
38 | | - ''' |
39 | | - res = False |
40 | | - parser, settings, wiki = manager.init_args_parser() |
41 | | - args = parser.parse_args(['dummy']) |
42 | | - args.language = language |
43 | | - args.project = project |
44 | | - print args |
45 | | - wiki = wikiprojects.Wiki(settings, args) |
46 | | - p = Process(target=manager.all_launcher, args=(wiki, settings, None)) |
47 | | - p.start() |
48 | | - #res = manager.all_launcher(wiki, settings, None) |
49 | | - return res |
50 | | - |
51 | | -def launch_chart(project, language): |
52 | | - res = False |
53 | | - |
54 | | - |
55 | | - return False |
56 | | - |
57 | | - |
58 | | -tasks.register(launcher) |
Index: trunk/tools/editor_trends/wikilytics/api/forms.py |
— | — | @@ -2,22 +2,22 @@ |
3 | 3 | from django import forms |
4 | 4 | |
5 | 5 | from wikilytics.api.widgets import MonthYearWidget |
6 | | -from editor_trends.classes import wikiprojects |
| 6 | +from editor_trends.classes import languages, projects |
7 | 7 | from editor_trends.analyses.analyzer import available_analyses |
8 | 8 | |
9 | | -wiki = wikiprojects.Wiki('settings') |
10 | 9 | |
11 | 10 | |
12 | 11 | years = [year for year in xrange(2001, datetime.date.today().year + 1)] |
13 | | -#print wiki.supported_languages() |
14 | | -#print wiki.supported_projects() |
15 | 12 | |
| 13 | +project = projects.init() |
| 14 | +language = languages.init() |
| 15 | + |
16 | 16 | class SearchForm(forms.Form): |
17 | 17 | project = forms.CharField(initial='wiki', |
18 | | - widget=forms.Select(choices=wiki.supported_projects())) |
| 18 | + widget=forms.Select(choices=project.supported_projects())) |
19 | 19 | |
20 | 20 | language = forms.CharField(initial='en', |
21 | | - widget=forms.Select(choices=wiki.supported_languages(output='django'))) |
| 21 | + widget=forms.Select(choices=language.supported_languages(output='django'))) |
22 | 22 | #print 'Project: %s' % language |
23 | 23 | #date = forms.DateField(widget=MonthYearWidget(years=years)) |
24 | 24 | |
Index: trunk/tools/editor_trends/classes/projects.py |
— | — | @@ -0,0 +1,109 @@ |
| 2 | +#!/usr/bin/python |
| 3 | +# -*- coding: utf-8 -*- |
| 4 | +''' |
| 5 | +Copyright (C) 2010 by Diederik van Liere (dvanliere@gmail.com) |
| 6 | +This program is free software; you can redistribute it and/or |
| 7 | +modify it under the terms of the GNU General Public License version 2 |
| 8 | +as published by the Free Software Foundation. |
| 9 | +This program is distributed in the hope that it will be useful, |
| 10 | +but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
| 12 | +See the GNU General Public License for more details, at |
| 13 | +http://www.fsf.org/licenses/gpl.html |
| 14 | +''' |
| 15 | + |
| 16 | +__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ]) |
| 17 | +__author__email = 'dvanliere at gmail dot com' |
| 18 | +__date__ = '2011-01-26' |
| 19 | +__version__ = '0.1' |
| 20 | + |
| 21 | +import languages |
| 22 | + |
| 23 | +class Project: |
| 24 | + def __init__(self, name, urlname, full_name): |
| 25 | + self.name = name |
| 26 | + self.full_name = full_name |
| 27 | + self.urlname = urlname |
| 28 | + |
| 29 | + def __repr__(self): |
| 30 | + return '%s' % self.full_name |
| 31 | + |
| 32 | + def supported_languages(self, output='parser'): |
| 33 | + ''' |
| 34 | + Generate a list of tuples with currently supported languages. |
| 35 | + ''' |
| 36 | + assert output == 'django' or output == 'parser' |
| 37 | + lnc = languages.LanguageContainer() |
| 38 | + d = [] |
| 39 | + if output == 'parser': |
| 40 | + for lang in self.valid_languages: |
| 41 | + d.append(lnc.languages.get(lang, 'Unknown language')) |
| 42 | + else: |
| 43 | + print 'not yet implemented' |
| 44 | +# def supported_languages(self, output='parser'): |
| 45 | +# if output == 'parser': |
| 46 | +# choices = [d.values() for d in self.match_languages()] |
| 47 | +# choices = [item for sublist in choices for item in sublist] |
| 48 | +# #print choices |
| 49 | +# return choices |
| 50 | +# else: |
| 51 | +# choices = [(d.get('lnc'), '%s' % (' | '.join(d.values()))) for d in self.match_languages()] |
| 52 | +# return tuple(choices) |
| 53 | + |
| 54 | +class ProjectContainer: |
| 55 | + def __init__(self): |
| 56 | + self.projects = {} |
| 57 | + self.wikis = {'wiki': {'url':'wiki', 'full_name': 'Wikipedia'}, |
| 58 | + 'commons': {'url':'wikicommons', 'full_name': 'Wikimedia Commons'}, |
| 59 | + 'books': {'url':'wikibooks', 'full_name':'Wikibooks'}, |
| 60 | + 'news': {'url':'wikinews', 'full_name': 'Wikinews'}, |
| 61 | + 'quote': {'url':'wikiquote', 'full_name': 'Wikiquote'}, |
| 62 | + 'source': {'url':'wikisource', 'full_name': 'Wikisource'}, |
| 63 | + 'versity': {'url':'wikiversity', 'full_name':'Wikiversity'}, |
| 64 | + 'tionary': {'url':'wiktionary', 'full_name': 'Wiktionary'}, |
| 65 | + 'meta': {'url':'metawiki', 'full_name': 'Metawiki'}, |
| 66 | + 'species': {'url':'wikispecies', 'full_name': 'Wikispecies'}, |
| 67 | + 'incubator': {'url':'incubatorwiki', 'full_name': 'Wikimedia Incubator'}, |
| 68 | + 'foundation': {'url':'foundationwiki', 'full_name': 'Wikimedia Foundation'}, |
| 69 | + 'mediawiki': {'url':'mediawikiwiki', 'full_name': 'Medawiki Wiki'}, |
| 70 | + 'outreach': {'url':'outreachwiki', 'full_name': 'Outreach Wiki'}, |
| 71 | + 'strategic_planning': {'url':'strategywiki', 'full_name': 'Wikimedia Strategic Planning'}, |
| 72 | + 'usability_initiative': {'url':'usabilitywiki', 'full_name': 'Wikimedia Usability Initiative'}, |
| 73 | + } |
| 74 | + for project in self.wikis: |
| 75 | + props = self.wikis[project] |
| 76 | + urlname = props['url'] |
| 77 | + full_name = props['full_name'] |
| 78 | + proj = self.projects.get(project, Project(project, urlname, full_name)) |
| 79 | + proj.valid_languages = self.project_supports_language(urlname) |
| 80 | + self.projects[project] = proj |
| 81 | + |
| 82 | + def get_project(self, name): |
| 83 | + return self.projects.get(name, None) |
| 84 | + |
| 85 | + def supported_projects(self): |
| 86 | + choices = ([(key, value.title()) for key, value in self.wikis.iteritems()]) |
| 87 | + return tuple(choices) |
| 88 | + |
| 89 | + def project_supports_language(self, urlname): |
| 90 | + valid_languages_wiki = ['ace', 'af', 'als', 'an', 'roa-rup', 'ast', 'gn', 'av', 'ay', 'az', 'id', 'ms', 'bm', 'zh-min-nan', 'jv', 'map-bms', 'su', 'bug', 'bi', 'bar', 'bs', 'br', 'ca', 'cbk-zam', 'ch', 'cs', 'ny', 'sn', 'tum', 've', 'co', 'za', 'cy', 'da', 'pdc', 'de', 'nv', 'na', 'lad', 'et', 'ang', 'en', 'es', 'eo', 'ext', 'eu', 'to', 'fo', 'fr', 'frp', 'fy', 'ff', 'fur', 'ga', 'gv', 'sm', 'gd', 'gl', 'got', 'hak', 'haw', 'hsb', 'hr', 'io', 'ilo', 'ig', 'ia', 'ie', 'ik', 'xh', 'zu', 'is', 'it', 'mh', 'kl', 'pam', 'csb', 'kw', 'kg', 'ki', 'rw', 'ky', 'rn', 'sw', 'ht', 'ku', 'la', 'lv', 'lb', 'lt', 'lij', 'li', 'ln', 'jbo', 'lg', 'lmo', 'hu', 'mg', 'mt', 'mi', 'cdo', 'my', 'nah', 'fj', 'nl', 'cr', 'ne', 'nap', 'frr', 'pih', 'no', 'nn', 'nrm', 'oc', 'om', 'pag', 'pi', 'pap', 'pms', 'nds', 'pl', 'pt', 'ty', 'ksh', 'ro', 'rmy', 'rm', 'qu', 'se', 'sg', 'sc', 'sco', 'st', 'tn', 'sq', 'scn', 'simple', 'ceb', 'ss', 'sk', 'sl', 'so', 'sh', 'fi', 'sv', 'tl', 'tt', 'tet', 'vi', 'tpi', 'chy', 'tr', 'tk', 'tw', 'vec', 'vo', 'fiu-vro', 'wa', 'vls', 'war', 'wo', 'ts', 'yo', 'bat-smg', 'el', 'ab', 'ba', 'be', 'bg', 'bxr', 'cu', 'os', 'kk', 'kv', 'mk', 'mn', 'ce', 'ru', 'sr', 'tg', 'udm', 'uk', 'uz', 'xal', 'cv', 'hy', 'ka', 'he', 'yi', 'ar', 'fa', 'ha', 'ps', 'sd', 'ur', 'ug', 'arc', 'dv', 'as', 'bn', 'bpy', 'gu', 'bh', 'hi', 'ks', 'mr', 'kn', 'ne', 'new', 'sa', 'ml', 'or', 'pa', 'ta', 'te', 'bo', 'dz', 'si', 'km', 'lo', 'th', 'am', 'ti', 'iu', 'chr', 'ko', 'ja', 'zh', 'wuu', 'lzh', 'yue'] |
| 91 | + valid_languages_wiktionary = ['af', 'als', 'an', 'roa-rup', 'ast', 'gn', 'ay', 'az', 'id', 'ms', 'zh-min-nan', 'jv', 'su', 'mt', 'bs', 'br', 'ca', 'cs', 'co', 'za', 'cy', 'da', 'de', 'na', 'et', 'ang', 'en', 'es', 'eo', 'eu', 'fo', 'fr', 'fy', 'gd', 'ga', 'gv', 'sm', 'gl', 'hr', 'io', 'ia', 'ie', 'ik', 'zu', 'is', 'it', 'kl', 'csb', 'ku', 'kw', 'rw', 'sw', 'la', 'lv', 'lb', 'lt', 'li', 'ln', 'jbo', 'hu', 'mg', 'mi', 'nah', 'fj', 'nl', 'no', 'nn', 'oc', 'om', 'uz', 'nds', 'pl', 'pt', 'ro', 'qu', 'sg', 'st', 'tn', 'scn', 'simple', 'sk', 'sl', 'sq', 'ss', 'so', 'sh', 'fi', 'sv', 'tl', 'tt', 'vi', 'tpi', 'tr', 'tk', 'vo', 'wa', 'wo', 'ts', 'el', 'tsd', 'be', 'bg', 'kk', 'ky', 'mk', 'mn', 'ru', 'sr', 'tg', 'uk', 'hy', 'ka', 'he', 'yi', 'ar', 'fa', 'ha', 'ps', 'sd', 'ug', 'ur', 'dv', 'bn', 'gu', 'hi', 'ks', 'ne', 'sa', 'mr', 'kn', 'ml', 'pa', 'ta', 'te', 'km', 'lo', 'my', 'si', 'th', 'am', 'ti', 'iu', 'chr', 'ko', 'ja', 'zh'] |
| 92 | + valid_languages_wikiquote = ['af', 'als', 'id', 'bs', 'ca', 'cs', 'da', 'de', 'en', 'es', 'eo', 'eu', 'fr', 'is', 'it', 'ku', 'la', 'lb', 'lt', 'hu', 'nl', 'no', 'pl', 'pt', 'ro', 'sk', 'fi', 'sv', 'tr', 'el', 'bg', 'ru', 'sr', 'ka', 'he', 'ar', 'fa', 'gu', 'mr', 'ta', 'th', 'ko', 'ja', 'zh'] |
| 93 | + valid_languages_wikinews = ['als', 'bs', 'ca', 'cs', 'de', 'en', 'es', 'fa', 'fr', 'it', 'hu', 'nl', 'no', 'nds', 'pl', 'pt', 'ro', 'fi', 'sv', 'tr', 'bg', 'ru', 'sr', 'uk', 'he', 'ar', 'sd', 'ta', 'th', 'ko', 'ja', 'zh'] |
| 94 | + valid_languages_wikisource = ['als', 'id', 'bs', 'cs', 'cy', 'da', 'de', 'en', 'es', 'fr', 'gl', 'hr', 'is', 'it', 'la', 'lt', 'li', 'nl', 'pl', 'pt', 'ro', 'sk', 'fi', 'sv', 'vi', 'tr', 'el', 'ru', 'sr', 'he', 'yi', 'ar', 'fa', 'bn', 'ml', 'th', 'ko', 'ja', 'zh'] |
| 95 | + valid_languages_wikibooks = ['af', 'als', 'ang', 'als', 'az', 'ms', 'su', 'bs', 'cs', 'co', 'cy', 'da', 'de', 'na', 'et', 'en', 'es', 'eo', 'eu', 'fr', 'fy', 'gl', 'hr', 'ia', 'ie', 'is', 'it', 'ku', 'la', 'lt', 'mg', 'nl', 'no', 'oc', 'uz', 'nds', 'pl', 'pt', 'ro', 'qu', 'sq', 'simple', 'sk', 'sl', 'fi', 'sv', 'vi', 'tl', 'tt', 'tr', 'tk', 'vo', 'el', 'bg', 'be', 'kk', 'ky', 'mk', 'ru', 'sr', 'tg', 'uk', 'cv', 'hy', 'ka', 'he', 'ar', 'fa', 'ps', 'ur', 'bn', 'hi', 'mr', 'sa', 'kn', 'ml', 'pa', 'ta', 'te', 'km', 'ne', 'th', 'ko', 'ja', 'zh'] |
| 96 | + valid_languages_wikiversity = ['cs', 'de', 'en', 'es', 'fr', 'it', 'pt', 'fi', 'el', 'ru', 'ja'] |
| 97 | + valid_languages_wikicommons = ['en'] |
| 98 | + valid_languages_wikispecies = ['en'] |
| 99 | + try: |
| 100 | + languages = locals()['valid_languages_%s' % urlname] |
| 101 | + return languages |
| 102 | + except KeyError: |
| 103 | + return [] |
| 104 | + |
| 105 | +def init(): |
| 106 | + pc = ProjectContainer() |
| 107 | + return pc.get_project('wiki') |
| 108 | + |
| 109 | +if __name__ == '__main__': |
| 110 | + init() |
Property changes on: trunk/tools/editor_trends/classes/projects.py |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 111 | + native |
Index: trunk/tools/editor_trends/classes/runtime_settings.py |
— | — | @@ -0,0 +1,198 @@ |
| 2 | +#!/usr/bin/python |
| 3 | +# coding=utf-8 |
| 4 | +''' |
| 5 | +Copyright (C) 2010 by Diederik van Liere (dvanliere@gmail.com) |
| 6 | +This program is free software; you can redistribute it and/or |
| 7 | +modify it under the terms of the GNU General Public License version 2 |
| 8 | +as published by the Free Software Foundation. |
| 9 | +This program is distributed in the hope that it will be useful, |
| 10 | +but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
| 12 | +See the GNU General Public License for more details, at |
| 13 | +http,//www.fsf.org/licenses/gpl.html |
| 14 | +''' |
| 15 | + |
| 16 | +__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ]) |
| 17 | +__email__ = 'dvanliere at gmail dot com' |
| 18 | +__date__ = '2010-10-21' |
| 19 | +__version__ = '0.1' |
| 20 | + |
| 21 | +''' |
| 22 | +This file provides mapper between language name and locale language name and |
| 23 | +Wikipedia acronym. |
| 24 | +Gothic and Birmese are not yet supported, see rows 450 and 554. |
| 25 | +''' |
| 26 | + |
| 27 | +import os |
| 28 | +import sys |
| 29 | +import locale |
| 30 | +import datetime |
| 31 | +import time |
| 32 | +import re |
| 33 | +sys.path.append('..') |
| 34 | + |
| 35 | +from utils import text_utils |
| 36 | +from utils import ordered_dict as odict |
| 37 | +from classes import languages |
| 38 | + |
| 39 | + |
| 40 | +class RunTimeSettings: |
| 41 | + ''' |
| 42 | + This class keeps track of the commands issued by the user and is used to |
| 43 | + feed the different etl functions. Difference with configuration class is |
| 44 | + that the configuration class are read-only settings that are always the |
| 45 | + same for a user while these settings can change depending on the kind of |
| 46 | + analysis requested. |
| 47 | + ''' |
| 48 | + def __init__(self, project, language, settings, args=None): |
| 49 | + self.project = project |
| 50 | + self.language = language |
| 51 | + self.settings = settings |
| 52 | + |
| 53 | + if args: |
| 54 | + self.args = args |
| 55 | + self.hash = self.secs_since_epoch() |
| 56 | + self.base_location = self.get_value('location') \ |
| 57 | + if self.get_value('location') != None else settings.input_location |
| 58 | + self.update_project_settings() |
| 59 | + self.update_language_settings() |
| 60 | + |
| 61 | + self.targets = self.get_value('charts') |
| 62 | + self.keywords = self.split_keywords(self.get_value('keywords')) |
| 63 | + self.function = self.get_value('func') |
| 64 | + self.collection = self.get_value('collection') |
| 65 | + self.ignore = self.get_value('except') |
| 66 | + self.clean = self.get_value('new') |
| 67 | + self.force = self.get_value('force') |
| 68 | + self.location = self.get_project_location() |
| 69 | + self.filename = self.generate_wikidump_filename() |
| 70 | + self.namespaces = self.get_namespaces() |
| 71 | + |
| 72 | + self.dataset = os.path.join(settings.dataset_location, |
| 73 | + self.project.name) |
| 74 | + self.charts = os.path.join(settings.chart_location, |
| 75 | + self.project.name) |
| 76 | + |
| 77 | + self.txt = os.path.join(self.location, 'txt') |
| 78 | + self.sorted = os.path.join(self.location, 'sorted') |
| 79 | + |
| 80 | + self.directories = [self.location, |
| 81 | + self.txt, |
| 82 | + self.sorted, |
| 83 | + self.dataset, |
| 84 | + self.charts] |
| 85 | + self.path = '/%s/latest/' % self.project |
| 86 | + self.targets = self.targets.split(', ') |
| 87 | + settings.verify_environment(self.directories) |
| 88 | + |
| 89 | + def __str__(self): |
| 90 | + return 'Runtime Settings for project %s%s' % (self.language.name, self.project.name) |
| 91 | + |
| 92 | + def __iter__(self): |
| 93 | + for item in self.__dict__: |
| 94 | + yield item |
| 95 | + |
| 96 | + def dict(self): |
| 97 | + ''' |
| 98 | + Return a dictionary with all properties and their values |
| 99 | + ''' |
| 100 | + props = {} |
| 101 | + for prop in self: |
| 102 | + props[prop] = getattr(self, prop) |
| 103 | + return props |
| 104 | + |
| 105 | + def split_keywords(self, keywords): |
| 106 | + keywords = keywords.split(',') |
| 107 | + d = {} |
| 108 | + for kw in keywords: |
| 109 | + key, value = kw.split('=') |
| 110 | + d[key] = value |
| 111 | + return d |
| 112 | + |
| 113 | + def get_project_location(self): |
| 114 | + ''' |
| 115 | + Construct the full project location |
| 116 | + ''' |
| 117 | + return os.path.join(self.base_location, self.language.code, self.project.name) |
| 118 | + |
| 119 | + def show_settings(self): |
| 120 | + ''' |
| 121 | + Prints some very high level configuration settings. |
| 122 | + ''' |
| 123 | + about = {} |
| 124 | + about['Project'] = '%s' % self.project.full_name.title() |
| 125 | + about['Language'] = '%s / %s / %s' % (self.language.name, self.language.locale, self.language.code) |
| 126 | + about['Input directory'] = '%s' % self.location |
| 127 | + about['Output directory'] = '%s and subdirectories' % self.location |
| 128 | + |
| 129 | + max_length_key = max([len(key) for key in about.keys()]) |
| 130 | + print 'Final settings after parsing command line arguments:' |
| 131 | + for ab in about: |
| 132 | + print '%s: %s' % (ab.rjust(max_length_key), about[ab]) |
| 133 | + |
| 134 | + |
| 135 | + def get_value(self, key): |
| 136 | + ''' |
| 137 | + Returns key from argument if present else None |
| 138 | + ''' |
| 139 | + return getattr(self.args, key, None) |
| 140 | + |
| 141 | + def generate_wikidump_filename(self): |
| 142 | + ''' |
| 143 | + Generate the main name of the wikidump file to be downloaded. |
| 144 | + ''' |
| 145 | + return '%s-latest-%s' % (self.project, self.get_value('file')) |
| 146 | + |
| 147 | + def update_language_settings(self): |
| 148 | + ''' |
| 149 | + Determine the language to be used, default is the system language |
| 150 | + ''' |
| 151 | + lang = self.get_value('language') |
| 152 | + lnc = languages.LanguageContainer() |
| 153 | + default = lnc.determine_default_language() |
| 154 | + if lang != default.name: |
| 155 | + lang = lnc.get_language(lang) |
| 156 | + return lang |
| 157 | + |
| 158 | + def update_project_settings(self): |
| 159 | + ''' |
| 160 | + Determine the project to be analyzed, default is Wikipedia |
| 161 | + ''' |
| 162 | + proj = self.get_value('project') |
| 163 | + if proj != 'wiki': |
| 164 | + pc = projects.ProjectContainer() |
| 165 | + proj = pc.get_project(proj) |
| 166 | + return proj |
| 167 | + |
| 168 | + def get_projectname(self): |
| 169 | + ''' |
| 170 | + Determine the full project name based on the project acronym and language. |
| 171 | + ''' |
| 172 | + #language_code = self.get_language() |
| 173 | + print self.language.code, self.project.name |
| 174 | + if self.language.code == None: |
| 175 | + print 'Entered language: %s is not a valid Wikimedia language' \ |
| 176 | + % self.get_value('language') |
| 177 | + sys.exit(-1) |
| 178 | + |
| 179 | + if self.project.full_name == None: |
| 180 | + print 'Entered project: %s is not valid Wikimedia Foundation project.' \ |
| 181 | + % self.get_value('project') |
| 182 | + sys.exit(-1) |
| 183 | + else: |
| 184 | + return '%s%s' % (self.language_code, self.short_project) |
| 185 | + |
| 186 | + def secs_since_epoch(self): |
| 187 | + dt = datetime.datetime.now() |
| 188 | + return time.mktime(dt.timetuple()) |
| 189 | + |
| 190 | + def get_namespaces(self): |
| 191 | + ''' |
| 192 | + Get the list of namespaces that should be included for analysis. Default |
| 193 | + is namespace 0 (the main namespace) |
| 194 | + ''' |
| 195 | + namespaces = self.get_value('namespace') |
| 196 | + if namespaces != None: |
| 197 | + return namespaces.split(',') |
| 198 | + else: |
| 199 | + return namespaces |
Property changes on: trunk/tools/editor_trends/classes/runtime_settings.py |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 200 | + native |
Added: svn:mime-type |
2 | 201 | + text/plain |
Index: trunk/tools/editor_trends/classes/languages.py |
— | — | @@ -0,0 +1,669 @@ |
| 2 | +#!/usr/bin/python |
| 3 | +# coding=utf-8 |
| 4 | + |
| 5 | +__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ]) |
| 6 | +__author__email = 'dvanliere at gmail dot com' |
| 7 | +__date__ = '2011-01-26' |
| 8 | +__version__ = '0.1' |
| 9 | + |
| 10 | +import locale |
| 11 | +import sys |
| 12 | +sys.path.append('..') |
| 13 | +from utils import ordered_dict as odict |
| 14 | + |
| 15 | +class Language: |
| 16 | + def __init__(self, name, code, locale=None): |
| 17 | + self.name = name |
| 18 | + self.locale = locale |
| 19 | + self.code = code |
| 20 | + |
| 21 | + def __repr__(self): |
| 22 | + return '%s - %s' % (self.code, self.name) |
| 23 | + |
| 24 | +class LanguageContainer: |
| 25 | + def __init__(self): |
| 26 | + self.init_languages = odict.OrderedDict([ |
| 27 | + (u'English', 'en'), |
| 28 | + (u'German', 'de'), |
| 29 | + (u'French', 'fr'), |
| 30 | + (u'Italian', 'it'), |
| 31 | + (u'Polish', 'pl'), |
| 32 | + (u'Japanese', 'ja'), |
| 33 | + (u'Spanish', 'es'), |
| 34 | + (u'Dutch', 'nl'), |
| 35 | + (u'Portuguese', 'pt'), |
| 36 | + (u'Russian', 'ru'), |
| 37 | + (u'Swedish', 'sv'), |
| 38 | + (u'Chinese', 'zh'), |
| 39 | + (u'Catalan', 'ca'), |
| 40 | + (u'Norwegian', 'no'), |
| 41 | + (u'Bokmål', 'no'), |
| 42 | + (u'Finnish', 'fi'), |
| 43 | + (u'Ukrainian', 'uk'), |
| 44 | + (u'Hungarian', 'hu'), |
| 45 | + (u'Czech', 'cs'), |
| 46 | + (u'Romanian', 'ro'), |
| 47 | + (u'Turkish', 'tr'), |
| 48 | + (u'Korean', 'ko'), |
| 49 | + (u'Vietnamese', 'vi'), |
| 50 | + (u'Danish', 'da'), |
| 51 | + (u'Arabic', 'ar'), |
| 52 | + (u'Esperanto', 'eo'), |
| 53 | + (u'Serbian', 'sr'), |
| 54 | + (u'Indonesian', 'id'), |
| 55 | + (u'Lithuanian', 'lt'), |
| 56 | + (u'Volapük', 'vo'), |
| 57 | + (u'Slovak', 'sk'), |
| 58 | + (u'Hebrew', 'he'), |
| 59 | + (u'Bulgarian', 'bg'), |
| 60 | + (u'Persian', 'fa'), |
| 61 | + (u'Slovenian', 'sl'), |
| 62 | + (u'Waray-Waray', 'war'), |
| 63 | + (u'Croatian', 'hr'), |
| 64 | + (u'Estonian', 'et'), |
| 65 | + (u'Malay', 'ms'), |
| 66 | + (u'Newar', 'new'), |
| 67 | + (u'Nepal Bhasa', 'new'), |
| 68 | + (u'Simple English', 'simple'), |
| 69 | + (u'Galician', 'gl'), |
| 70 | + (u'Thai', 'th'), |
| 71 | + (u'Aromanian', 'roa-rup'), |
| 72 | + (u'Nynorsk', 'nn'), |
| 73 | + (u'Basque', 'eu'), |
| 74 | + (u'Hindi', 'hi'), |
| 75 | + (u'Greek', 'el'), |
| 76 | + (u'Haitian', 'ht'), |
| 77 | + (u'Latin', 'la'), |
| 78 | + (u'Telugu', 'te'), |
| 79 | + (u'Georgian', 'ka'), |
| 80 | + (u'Cebuano', 'ceb'), |
| 81 | + (u'Macedonian', 'mk'), |
| 82 | + (u'Azeri', 'az'), |
| 83 | + (u'Tagalog', 'tl'), |
| 84 | + (u'Breton', 'br'), |
| 85 | + (u'Serbo-Croatian', 'sh'), |
| 86 | + (u'Marathi', 'mr'), |
| 87 | + (u'Luxembourgish', 'lb'), |
| 88 | + (u'Javanese', 'jv'), |
| 89 | + (u'Latvian', 'lv'), |
| 90 | + (u'Bosnian', 'bs'), |
| 91 | + (u'Icelandic', 'is'), |
| 92 | + (u'Welsh', 'cy'), |
| 93 | + (u'Belarusian', 'be-x-old'), |
| 94 | + (u'Taraškievica', 'be-x-old'), |
| 95 | + (u'Piedmontese', 'pms'), |
| 96 | + (u'Albanian', 'sq'), |
| 97 | + (u'Tamil', 'ta'), |
| 98 | + (u'Bishnupriya Manipuri', 'bpy'), |
| 99 | + (u'Belarusian', 'be'), |
| 100 | + (u'Aragonese', 'an'), |
| 101 | + (u'Occitan', 'oc'), |
| 102 | + (u'Bengali', 'bn'), |
| 103 | + (u'Swahili', 'sw'), |
| 104 | + (u'Ido', 'io'), |
| 105 | + (u'Ripuarian', 'ksh'), |
| 106 | + (u'Lombard', 'lmo'), |
| 107 | + (u'West Frisian', 'fy'), |
| 108 | + (u'Gujarati', 'gu'), |
| 109 | + (u'Low Saxon', 'nds'), |
| 110 | + (u'Afrikaans', 'af'), |
| 111 | + (u'Sicilian', 'scn'), |
| 112 | + (u'Quechua', 'qu'), |
| 113 | + (u'Kurdish', 'ku'), |
| 114 | + (u'Urdu', 'ur'), |
| 115 | + (u'Sundanese', 'su'), |
| 116 | + (u'Malayalam', 'ml'), |
| 117 | + (u'Cantonese', 'zh-yue'), |
| 118 | + (u'Asturian', 'ast'), |
| 119 | + (u'Neapolitan', 'nap'), |
| 120 | + (u'Samogitian', 'bat-smg'), |
| 121 | + (u'Walloon', 'wa'), |
| 122 | + (u'Chuvash', 'cv'), |
| 123 | + (u'Irish', 'ga'), |
| 124 | + (u'Armenian', 'hy'), |
| 125 | + (u'Yoruba', 'yo'), |
| 126 | + (u'Kannada', 'kn'), |
| 127 | + (u'Tajik', 'tg'), |
| 128 | + (u'Tarantino', 'roa-tara'), |
| 129 | + (u'Venetian', 'vec'), |
| 130 | + (u'Western Panjabi', 'pnb'), |
| 131 | + (u'Nepali', 'ne'), |
| 132 | + (u'Scottish Gaelic', 'gd'), |
| 133 | + (u'Yiddish', 'yi'), |
| 134 | + (u'Min Nan', 'zh-min-nan'), |
| 135 | + (u'Uzbek', 'uz'), |
| 136 | + (u'Tatar', 'tt'), |
| 137 | + (u'Kapampangan', 'pam'), |
| 138 | + (u'Ossetian', 'os'), |
| 139 | + (u'Sakha', 'sah'), |
| 140 | + (u'Alemannic', 'als'), |
| 141 | + (u'Maori', 'mi'), |
| 142 | + (u'Egyptian Arabic', 'arz'), |
| 143 | + (u'Kazakh', 'kk'), |
| 144 | + (u'Nahuatl', 'nah'), |
| 145 | + (u'Limburgian', 'li'), |
| 146 | + (u'Upper Sorbian', 'hsb'), |
| 147 | + (u'Gilaki', 'glk'), |
| 148 | + (u'Corsican', 'co'), |
| 149 | + (u'Gan', 'gan'), |
| 150 | + (u'Amharic', 'am'), |
| 151 | + (u'Mongolian', 'mn'), |
| 152 | + (u'Interlingua', 'ia'), |
| 153 | + (u'Central Bicolano', 'bcl'), |
| 154 | + (u'Võro', 'fiu-vro'), |
| 155 | + (u'Dutch Low Saxon', 'nds-nl'), |
| 156 | + (u'Faroese', 'fo'), |
| 157 | + (u'Turkmen', 'tk'), |
| 158 | + (u'Scots', 'sco'), |
| 159 | + (u'West Flemish', 'vls'), |
| 160 | + (u'Sinhalese', 'si'), |
| 161 | + (u'Sanskrit', 'sa'), |
| 162 | + (u'Bavarian', 'bar'), |
| 163 | + (u'Burmese', 'my'), |
| 164 | + (u'Manx', 'gv'), |
| 165 | + (u'Divehi', 'dv'), |
| 166 | + (u'Norman', 'nrm'), |
| 167 | + (u'Pangasinan', 'pag'), |
| 168 | + (u'Romansh', 'rm'), |
| 169 | + (u'Banyumasan', 'map-bms'), |
| 170 | + (u'Zazaki', 'diq'), |
| 171 | + (u'Sorani', 'ckb'), |
| 172 | + (u'Northern Sami', 'se'), |
| 173 | + (u'Mazandarani', 'mzn'), |
| 174 | + (u'Wu', 'wuu'), |
| 175 | + (u'Uyghur', 'ug'), |
| 176 | + (u'Friulian', 'fur'), |
| 177 | + (u'Ligurian', 'lij'), |
| 178 | + (u'Maltese', 'mt'), |
| 179 | + (u'Bihari', 'bh'), |
| 180 | + (u'Novial', 'nov'), |
| 181 | + (u'Malagasy', 'mg'), |
| 182 | + (u'Kashubian', 'csb'), |
| 183 | + (u'Ilokano', 'ilo'), |
| 184 | + (u'Sardinian', 'sc'), |
| 185 | + (u'Classical Chinese', 'zh-classical'), |
| 186 | + (u'Khmer', 'km'), |
| 187 | + (u'Ladino', 'lad'), |
| 188 | + (u'Pali', 'pi'), |
| 189 | + (u'Anglo-Saxon', 'ang'), |
| 190 | + (u'Zamboanga Chavacano', 'cbk-zam'), |
| 191 | + (u'Tibetan', 'bo'), |
| 192 | + (u'Fiji Hindi', 'hif'), |
| 193 | + (u'Franco-Provençal', 'frp'), |
| 194 | + (u'Arpitan', 'frp'), |
| 195 | + (u'Hakka', 'hak'), |
| 196 | + (u'Cornish', 'kw'), |
| 197 | + (u'Punjabi', 'pa'), |
| 198 | + (u'Pashto', 'ps'), |
| 199 | + (u'Kalmyk', 'xal'), |
| 200 | + (u'Silesian', 'szl'), |
| 201 | + (u'Pennsylvania German', 'pdc'), |
| 202 | + (u'Hawaiian', 'haw'), |
| 203 | + (u'Saterland Frisian', 'stq'), |
| 204 | + (u'Interlingue', 'ie'), |
| 205 | + (u'Navajo', 'nv'), |
| 206 | + (u'Fijian', 'fj'), |
| 207 | + (u'Crimean Tatar', 'crh'), |
| 208 | + (u'Komi', 'kv'), |
| 209 | + (u'Tongan', 'to'), |
| 210 | + (u'Acehnese', 'ace'), |
| 211 | + (u'Somali', 'so'), |
| 212 | + (u'Erzya', 'myv'), |
| 213 | + (u'Guarani', 'gn'), |
| 214 | + (u'Karachay-Balkar', 'krc'), |
| 215 | + (u'Extremaduran', 'ext'), |
| 216 | + (u'Lingala', 'ln'), |
| 217 | + (u'Kirghiz', 'ky'), |
| 218 | + (u'Meadow Mari', 'mhr'), |
| 219 | + (u'Assyrian Neo-Aramaic', 'arc'), |
| 220 | + (u'Emilian-Romagnol', 'eml'), |
| 221 | + (u'Lojban', 'jbo'), |
| 222 | + (u'Picard', 'pcd'), |
| 223 | + (u'Aymara', 'ay'), |
| 224 | + (u'Wolof', 'wo'), |
| 225 | + (u'Tumbuka', 'tum'), |
| 226 | + (u'Kabyle', 'kab'), |
| 227 | + (u'Bashkir', 'ba'), |
| 228 | + (u'North Frisian', 'frr'), |
| 229 | + (u'Tahitian', 'ty'), |
| 230 | + (u'Tok Pisin', 'tpi'), |
| 231 | + (u'Papiamentu', 'pap'), |
| 232 | + (u'Zealandic', 'zea'), |
| 233 | + (u'Sranan', 'srn'), |
| 234 | + (u'Greenlandic', 'kl'), |
| 235 | + (u'Udmurt', 'udm'), |
| 236 | + (u'Chechen', 'ce'), |
| 237 | + (u'Igbo', 'ig'), |
| 238 | + (u'Komi-Permyak', 'koi'), |
| 239 | + (u'Oriya', 'or'), |
| 240 | + (u'Lower Sorbian', 'dsb'), |
| 241 | + (u'Kongo', 'kg'), |
| 242 | + (u'Lao', 'lo'), |
| 243 | + (u'Abkhazian', 'ab'), |
| 244 | + (u'Moksha', 'mdf'), |
| 245 | + (u'Romani', 'rmy'), |
| 246 | + (u'Hill Mari', 'mrj'), |
| 247 | + (u'Banjar', 'bjn'), |
| 248 | + (u'Old Church Slavonic', 'cu'), |
| 249 | + (u'Mirandese', 'mwl'), |
| 250 | + (u'Karakalpak', 'kaa'), |
| 251 | + (u'Samoan', 'sm'), |
| 252 | + (u'Moldovan', 'mo'), |
| 253 | + (u'Tetum', 'tet'), |
| 254 | + (u'Avar', 'av'), |
| 255 | + (u'Kashmiri', 'ks'), |
| 256 | + (u'Gothic', 'got'), |
| 257 | + (u'Sindhi', 'sd'), |
| 258 | + (u'Bambara', 'bm'), |
| 259 | + (u'Nauruan', 'na'), |
| 260 | + (u'Norfolk', 'pih'), |
| 261 | + (u'Pontic', 'pnt'), |
| 262 | + (u'Inuktitut', 'iu'), |
| 263 | + (u'Inupiak', 'ik'), |
| 264 | + (u'Bislama', 'bi'), |
| 265 | + (u'Cherokee', 'chr'), |
| 266 | + (u'Assamese', 'as'), |
| 267 | + (u'Min Dong', 'cdo'), |
| 268 | + (u'Ewe', 'ee'), |
| 269 | + (u'Swati', 'ss'), |
| 270 | + (u'Oromo', 'om'), |
| 271 | + (u'Zhuang', 'za'), |
| 272 | + (u'Zulu', 'zu'), |
| 273 | + (u'Tigrinya', 'ti'), |
| 274 | + (u'Venda', 've'), |
| 275 | + (u'Tsonga', 'ts'), |
| 276 | + (u'Hausa', 'ha'), |
| 277 | + (u'Dzongkha', 'dz'), |
| 278 | + (u'Sango', 'sg'), |
| 279 | + (u'Chamorro', 'ch'), |
| 280 | + (u'Cree', 'cr'), |
| 281 | + (u'Xhosa', 'xh'), |
| 282 | + (u'Akan', 'ak'), |
| 283 | + (u'Sesotho', 'st'), |
| 284 | + (u'Kinyarwanda', 'rw'), |
| 285 | + (u'Tswana', 'tn'), |
| 286 | + (u'Kikuyu', 'ki'), |
| 287 | + (u'Buryat', 'bxr'), |
| 288 | + (u'Buginese', 'bug'), |
| 289 | + (u'Chichewa', 'ny'), |
| 290 | + (u'Lak', 'lbe'), |
| 291 | + (u'Twi', 'tw'), |
| 292 | + (u'Shona', 'sn'), |
| 293 | + (u'Kirundi', 'rn'), |
| 294 | + (u'Fula', 'ff'), |
| 295 | + (u'Cheyenne', 'chy'), |
| 296 | + (u'Luganda', 'lg'), |
| 297 | + (u'Ndonga', 'ng'), |
| 298 | + (u'Sichuan Yi', 'ii'), |
| 299 | + (u'Choctaw', 'cho'), |
| 300 | + (u'Marshallese', 'mh'), |
| 301 | + (u'Afar', 'aa'), |
| 302 | + (u'Kuanyama', 'kj'), |
| 303 | + (u'Hiri Motu', 'ho'), |
| 304 | + (u'Muscogee', 'mus'), |
| 305 | + (u'Kanuri', 'kr'), |
| 306 | + (u'Herero', 'hz'), |
| 307 | + (u'English', 'en'), |
| 308 | + (u'Deutsch', 'de'), |
| 309 | + (u'Français', 'fr'), |
| 310 | + (u'Italiano', 'it'), |
| 311 | + (u'Polski', 'pl'), |
| 312 | + (u'日本語', 'ja'), |
| 313 | + (u'Español', 'es'), |
| 314 | + (u'Nederlands', 'nl'), |
| 315 | + (u'Português', 'pt'), |
| 316 | + (u'Русский', 'ru'), |
| 317 | + (u'Svenska', 'sv'), |
| 318 | + (u'中文', 'zh'), |
| 319 | + (u'Català', 'ca'), |
| 320 | + (u'Norsk', 'no'), |
| 321 | + (u'Bokmål', 'no'), |
| 322 | + (u'Suomi', 'fi'), |
| 323 | + (u'Українська', 'uk'), |
| 324 | + (u'Magyar', 'hu'), |
| 325 | + (u'Čeština', 'cs'), |
| 326 | + (u'Română', 'ro'), |
| 327 | + (u'Türkçe', 'tr'), |
| 328 | + (u'한국어', 'ko'), |
| 329 | + (u'Tiếng Việt', 'vi'), |
| 330 | + (u'Dansk', 'da'), |
| 331 | + (u'العربية', 'ar'), |
| 332 | + (u'Esperanto', 'eo'), |
| 333 | + (u'Српски', 'sr'), |
| 334 | + (u'Srpski', 'sr'), |
| 335 | + (u'Bahasa Indonesia', 'id'), |
| 336 | + (u'Lietuvių', 'lt'), |
| 337 | + (u'Volapük', 'vo'), |
| 338 | + (u'Slovenčina', 'sk'), |
| 339 | + (u'עברית', 'he'), |
| 340 | + (u'Български', 'bg'), |
| 341 | + (u'فارسی', 'fa'), |
| 342 | + (u'Slovenščina', 'sl'), |
| 343 | + (u'Winaray', 'war'), |
| 344 | + (u'Hrvatski', 'hr'), |
| 345 | + (u'Eesti', 'et'), |
| 346 | + (u'Bahasa Melayu', 'ms'), |
| 347 | + (u'नेपाल भाषा', 'new'), |
| 348 | + (u'Simple English', 'simple'), |
| 349 | + (u'Galego', 'gl'), |
| 350 | + (u'ไทย', 'th'), |
| 351 | + (u'Armãneashce', 'roa-rup'), |
| 352 | + (u'Nynorsk', 'nn'), |
| 353 | + (u'Euskara', 'eu'), |
| 354 | + (u'हिन्दी', 'hi'), |
| 355 | + (u'Ελληνικά', 'el'), |
| 356 | + (u'Krèyol ayisyen', 'ht'), |
| 357 | + (u'Latina', 'la'), |
| 358 | + (u'తెలుగు', 'te'), |
| 359 | + (u'ქართული', 'ka'), |
| 360 | + (u'Sinugboanong Binisaya', 'ceb'), |
| 361 | + (u'Македонски', 'mk'), |
| 362 | + (u'Azərbaycan', 'az'), |
| 363 | + (u'Tagalog', 'tl'), |
| 364 | + (u'Brezhoneg', 'br'), |
| 365 | + (u'Srpskohrvatski', 'sh'), |
| 366 | + (u'Српскохрватски', 'sh'), |
| 367 | + (u'मराठी', 'mr'), |
| 368 | + (u'Lëtzebuergesch', 'lb'), |
| 369 | + (u'Basa Jawa', 'jv'), |
| 370 | + (u'Latviešu', 'lv'), |
| 371 | + (u'Bosanski', 'bs'), |
| 372 | + (u'Íslenska', 'is'), |
| 373 | + (u'Cymraeg', 'cy'), |
| 374 | + (u'Беларуская', 'be-x-old'), |
| 375 | + (u'тарашкевіца', 'be-x-old'), |
| 376 | + (u'Piemontèis', 'pms'), |
| 377 | + (u'Shqip', 'sq'), |
| 378 | + (u'தமிழ்', 'ta'), |
| 379 | + (u'ইমার ঠার', 'bpy'), |
| 380 | + (u'বিষ্ণুপ্রিয়া মণিপুরী', 'bpy'), |
| 381 | + (u'Беларуская', 'be'), |
| 382 | + (u'Aragonés', 'an'), |
| 383 | + (u'Occitan', 'oc'), |
| 384 | + (u'বাংলা', 'bn'), |
| 385 | + (u'Kiswahili', 'sw'), |
| 386 | + (u'Ido', 'io'), |
| 387 | + (u'Ripoarisch', 'ksh'), |
| 388 | + (u'Lumbaart', 'lmo'), |
| 389 | + (u'Frysk', 'fy'), |
| 390 | + (u'ગુજરાતી', 'gu'), |
| 391 | + (u'Plattdüütsch', 'nds'), |
| 392 | + (u'Afrikaans', 'af'), |
| 393 | + (u'Sicilianu', 'scn'), |
| 394 | + (u'Runa Simi', 'qu'), |
| 395 | + (u'Kurdî', 'ku'), |
| 396 | + (u'كوردی', 'ku'), |
| 397 | + (u'اردو', 'ur'), |
| 398 | + (u'Basa Sunda', 'su'), |
| 399 | + (u'മലയാളം', 'ml'), |
| 400 | + (u'粵語', 'zh-yue'), |
| 401 | + (u'Asturianu', 'ast'), |
| 402 | + (u'Nnapulitano', 'nap'), |
| 403 | + (u'Žemaitėška', 'bat-smg'), |
| 404 | + (u'Walon', 'wa'), |
| 405 | + (u'Чăваш', 'cv'), |
| 406 | + (u'Gaeilge', 'ga'), |
| 407 | + (u'Հայերեն', 'hy'), |
| 408 | + (u'Yorùbá', 'yo'), |
| 409 | + (u'ಕನ್ನಡ', 'kn'), |
| 410 | + (u'Тоҷикӣ', 'tg'), |
| 411 | + (u'Tarandíne', 'roa-tara'), |
| 412 | + (u'Vèneto', 'vec'), |
| 413 | + (u'شاہ مکھی پنجابی', 'pnb'), |
| 414 | + (u'Shāhmukhī Pañjābī', 'pnb'), |
| 415 | + (u'नेपाली', 'ne'), |
| 416 | + (u'Gàidhlig', 'gd'), |
| 417 | + (u'ייִדיש', 'yi'), |
| 418 | + (u'Bân-lâm-gú', 'zh-min-nan'), |
| 419 | + (u'O‘zbek', 'uz'), |
| 420 | + (u'Tatarça', 'tt'), |
| 421 | + (u'Татарча', 'tt'), |
| 422 | + (u'Kapampangan', 'pam'), |
| 423 | + (u'Иронау', 'os'), |
| 424 | + (u'Саха тыла', 'sah'), |
| 425 | + (u'Saxa Tyla', 'sah'), |
| 426 | + (u'Alemannisch', 'als'), |
| 427 | + (u'Māori', 'mi'), |
| 428 | + (u'مصرى', 'arz'), |
| 429 | + (u'Maṣrī', 'arz'), |
| 430 | + (u'Қазақша', 'kk'), |
| 431 | + (u'Nāhuatl', 'nah'), |
| 432 | + (u'Limburgs', 'li'), |
| 433 | + (u'Hornjoserbsce', 'hsb'), |
| 434 | + (u'گیلکی', 'glk'), |
| 435 | + (u'Corsu', 'co'), |
| 436 | + (u'贛語', 'gan'), |
| 437 | + (u'አማርኛ', 'am'), |
| 438 | + (u'Монгол', 'mn'), |
| 439 | + (u'Interlingua', 'ia'), |
| 440 | + (u'Bikol', 'bcl'), |
| 441 | + (u'Võro', 'fiu-vro'), |
| 442 | + (u'Nedersaksisch', 'nds-nl'), |
| 443 | + (u'Føroyskt', 'fo'), |
| 444 | + (u'تركمن ', 'tk'), |
| 445 | + (u'Туркмен', 'tk'), |
| 446 | + (u'Scots', 'sco'), |
| 447 | + (u'West-Vlams', 'vls'), |
| 448 | + (u'සිංහල', 'si'), |
| 449 | + (u'संस्कृतम्', 'sa'), |
| 450 | + (u'Boarisch', 'bar'), |
| 451 | + (u'မ္ရန္မာစာ', 'my'), #Needs fix |
| 452 | + (u'Gaelg', 'gv'), |
| 453 | + (u'ދިވެހިބަސް', 'dv'), |
| 454 | + (u'Nouormand', 'nrm'), |
| 455 | + (u'Normaund', 'nrm'), |
| 456 | + (u'Pangasinan', 'pag'), |
| 457 | + (u'Rumantsch', 'rm'), |
| 458 | + (u'Basa Banyumasan', 'map-bms'), |
| 459 | + (u'Zazaki', 'diq'), |
| 460 | + (u'Soranî', 'ckb'), |
| 461 | + (u'کوردی', 'ckb'), |
| 462 | + (u'Sámegiella', 'se'), |
| 463 | + (u'مَزِروني', 'mzn'), |
| 464 | + (u'吴语', 'wuu'), |
| 465 | + (u'Oyghurque', 'ug'), |
| 466 | + (u'Furlan', 'fur'), |
| 467 | + (u'Líguru', 'lij'), |
| 468 | + (u'Malti', 'mt'), |
| 469 | + (u'भोजपुरी', 'bh'), |
| 470 | + (u'Novial', 'nov'), |
| 471 | + (u'Malagasy', 'mg'), |
| 472 | + (u'Kaszëbsczi', 'csb'), |
| 473 | + (u'Ilokano', 'ilo'), |
| 474 | + (u'Sardu', 'sc'), |
| 475 | + (u'古文', 'zh-classical'), |
| 476 | + (u'文言文', 'zh-classical'), |
| 477 | + (u'ភាសាខ្មែរ', 'km'), |
| 478 | + (u'Dzhudezmo', 'lad'), |
| 479 | + (u'पाऴि', 'pi'), |
| 480 | + (u'Englisc', 'ang'), |
| 481 | + (u'Chavacano de Zamboanga', 'cbk-zam'), |
| 482 | + (u'བོད་སྐད', 'bo'), |
| 483 | + (u'Fiji Hindi', 'hif'), |
| 484 | + (u'Arpitan', 'frp'), |
| 485 | + (u'Hak-kâ-fa', 'hak'), |
| 486 | + (u'客家話', 'hak'), |
| 487 | + (u'Kernewek', 'kw'), |
| 488 | + (u'Karnuack', 'kw'), |
| 489 | + (u'ਪੰਜਾਬੀ', 'pa'), |
| 490 | + (u'پښتو', 'ps'), |
| 491 | + (u'Хальмг', 'xal'), |
| 492 | + (u'Ślůnski', 'szl'), |
| 493 | + (u'Deitsch', 'pdc'), |
| 494 | + (u'Hawai`i', 'haw'), |
| 495 | + (u'Seeltersk', 'stq'), |
| 496 | + (u'Interlingue', 'ie'), |
| 497 | + (u'Diné bizaad', 'nv'), |
| 498 | + (u'Na Vosa Vakaviti', 'fj'), |
| 499 | + (u'Qırımtatarca', 'crh'), |
| 500 | + (u'Коми', 'kv'), |
| 501 | + (u'faka Tonga', 'to'), |
| 502 | + (u'Bahsa Acèh', 'ace'), |
| 503 | + (u'Soomaaliga', 'so'), |
| 504 | + (u'Эрзянь', 'myv'), |
| 505 | + (u'Erzjanj Kelj', 'myv'), |
| 506 | + (u"Avañe'ẽ", 'gn'), |
| 507 | + (u'Къарачай-Малкъар', 'krc'), |
| 508 | + (u'Qarachay-Malqar', 'krc'), |
| 509 | + (u'Estremeñu', 'ext'), |
| 510 | + (u'Lingala', 'ln'), |
| 511 | + (u'Кыргызча', 'ky'), |
| 512 | + (u'Олык Марий', 'mhr'), |
| 513 | + (u'Olyk Marij', 'mhr'), |
| 514 | + (u'ܐܪܡܝܐ', 'arc'), |
| 515 | + (u'Emiliàn e rumagnòl', 'eml'), |
| 516 | + (u'Lojban', 'jbo'), |
| 517 | + (u'Picard', 'pcd'), |
| 518 | + (u'Aymar', 'ay'), |
| 519 | + (u'Wolof', 'wo'), |
| 520 | + (u'chiTumbuka', 'tum'), |
| 521 | + (u'Taqbaylit', 'kab'), |
| 522 | + (u'Башҡорт', 'ba'), |
| 523 | + (u'Frasch', 'frr'), |
| 524 | + (u'Reo Mā`ohi', 'ty'), |
| 525 | + (u'Tok Pisin', 'tpi'), |
| 526 | + (u'Papiamentu', 'pap'), |
| 527 | + (u'Zeêuws', 'zea'), |
| 528 | + (u'Sranantongo', 'srn'), |
| 529 | + (u'Kalaallisut', 'kl'), |
| 530 | + (u'Удмурт кыл', 'udm'), |
| 531 | + (u'Нохчийн', 'ce'), |
| 532 | + (u'Igbo', 'ig'), |
| 533 | + (u'Перем Коми', 'koi'), |
| 534 | + (u'Perem Komi', 'koi'), |
| 535 | + (u'ଓଡ଼ିଆ', 'or'), |
| 536 | + (u'Dolnoserbski', 'dsb'), |
| 537 | + (u'KiKongo', 'kg'), |
| 538 | + (u'ລາວ', 'lo'), |
| 539 | + (u'Аҧсуа', 'ab'), |
| 540 | + (u'Мокшень', 'mdf'), |
| 541 | + (u'Mokshanj Kälj', 'mdf'), |
| 542 | + (u'romani - रोमानी', 'rmy'), |
| 543 | + (u'Кырык Мары', 'mrj'), |
| 544 | + (u'Kyryk Mary', 'mrj'), |
| 545 | + (u'Bahasa Banjar', 'bjn'), |
| 546 | + (u'Словѣньскъ', 'cu'), |
| 547 | + (u'Páigina Percipal', 'mwl'), |
| 548 | + (u'Qaraqalpaqsha', 'kaa'), |
| 549 | + (u'Gagana Samoa', 'sm'), |
| 550 | + (u'Молдовеняскэ', 'mo'), |
| 551 | + (u'Tetun', 'tet'), |
| 552 | + (u'Авар', 'av'), |
| 553 | + (u'कश्मीरी', 'ks'), |
| 554 | + (u'كشميري', 'ks'), |
| 555 | + (u'𐌲𐌿𐍄𐌹𐍃𐌺', 'got'), #Needs fix |
| 556 | + (u'سنڌي، سندھی ، सिन्ध', 'sd'), |
| 557 | + (u'Bamanankan', 'bm'), |
| 558 | + (u'dorerin Naoero', 'na'), |
| 559 | + (u'Norfuk', 'pih'), |
| 560 | + (u'Ποντιακά', 'pnt'), |
| 561 | + (u'ᐃᓄᒃᑎᑐᑦ', 'iu'), |
| 562 | + (u'Iñupiak', 'ik'), |
| 563 | + (u'Bislama', 'bi'), |
| 564 | + (u'ᏣᎳᎩ', 'chr'), |
| 565 | + (u'অসমীয়া', 'as'), |
| 566 | + (u'Mìng-dĕ̤ng-ngṳ̄', 'cdo'), |
| 567 | + (u'Eʋegbe', 'ee'), |
| 568 | + (u'SiSwati', 'ss'), |
| 569 | + (u'Oromoo', 'om'), |
| 570 | + (u'Cuengh', 'za'), |
| 571 | + (u'isiZulu', 'zu'), |
| 572 | + (u'ትግርኛ', 'ti'), |
| 573 | + (u'Tshivenda', 've'), |
| 574 | + (u'Xitsonga', 'ts'), |
| 575 | + (u'هَوُسَ', 'ha'), |
| 576 | + (u'ཇོང་ཁ', 'dz'), |
| 577 | + (u'Sängö', 'sg'), |
| 578 | + (u'Chamoru', 'ch'), |
| 579 | + (u'Nehiyaw', 'cr'), |
| 580 | + (u'isiXhosa', 'xh'), |
| 581 | + (u'Akana', 'ak'), |
| 582 | + (u'Sesotho', 'st'), |
| 583 | + (u'Ikinyarwanda', 'rw'), |
| 584 | + (u'Setswana', 'tn'), |
| 585 | + (u'Gĩkũyũ', 'ki'), |
| 586 | + (u'Буряад', 'bxr'), |
| 587 | + (u'Basa Ugi', 'bug'), |
| 588 | + (u'Chi-Chewa', 'ny'), |
| 589 | + (u'Лакку', 'lbe'), |
| 590 | + (u'Twi', 'tw'), |
| 591 | + (u'chiShona', 'sn'), |
| 592 | + (u'Kirundi', 'rn'), |
| 593 | + (u'Fulfulde', 'ff'), |
| 594 | + (u'Tsetsêhestâhese', 'chy'), |
| 595 | + (u'Luganda', 'lg'), |
| 596 | + (u'Oshiwambo', 'ng'), |
| 597 | + (u'ꆇꉙ', 'ii'), |
| 598 | + (u'Choctaw', 'cho'), |
| 599 | + (u'Ebon', 'mh'), |
| 600 | + (u'Afar', 'aa'), |
| 601 | + (u'Kuanyama', 'kj'), |
| 602 | + (u'Hiri Motu', 'ho'), |
| 603 | + (u'Muskogee', 'mus'), |
| 604 | + (u'Kanuri', 'kr'), |
| 605 | + (u'Otsiherero', 'hz'), |
| 606 | + ]) |
| 607 | + |
| 608 | + self.languages = {} |
| 609 | + for language, code in self.init_languages.iteritems(): |
| 610 | + ln = self.languages.get(code, Language(language, code)) |
| 611 | + if language != ln.name: |
| 612 | + ln.locale = language |
| 613 | + self.languages[code] = ln |
| 614 | + self.default = self.determine_default_language() |
| 615 | + |
| 616 | + def __repr__(self): |
| 617 | + return 'contains %s languages' % (len(self.languages)) |
| 618 | + |
| 619 | + def get_language(self, code): |
| 620 | + return self.languages.get(code, None) |
| 621 | + |
| 622 | + def determine_default_language(self): |
| 623 | + ''' |
| 624 | + Determines the default language to make an educated guess which |
| 625 | + Wikipedia project is most likely of interest |
| 626 | + ''' |
| 627 | + code = locale.getdefaultlocale()[0] |
| 628 | + code = code.split('_')[0] |
| 629 | + return self.languages[code] |
| 630 | + |
| 631 | + def show_languages(settings, project, startswith=None): |
| 632 | + if startswith != None: |
| 633 | + startswith = startswith.title() |
| 634 | + choices = project.supported_languages() |
| 635 | + lang = [] |
| 636 | + for choice in choices: |
| 637 | + lang.append(choice) |
| 638 | + lang.sort() |
| 639 | + for language in lang: |
| 640 | + try: |
| 641 | + if startswith != None and language.startswith(first): |
| 642 | + print '%s' % language.decode(settings.encoding) |
| 643 | + elif startswith == None: |
| 644 | + print '%s' % language.decode(settings.encoding) |
| 645 | + except UnicodeEncodeError: |
| 646 | + print '%s' % language |
| 647 | + |
| 648 | + |
| 649 | + def extract_language_code_from_wikiprojects(self): |
| 650 | + ''' |
| 651 | + Copy and paste a string of all supported projects from |
| 652 | + http://meta.wikimedia.org/wiki/Complete_list_of_Wikimedia_projects and use |
| 653 | + this function to extract the language codes. This list can be used for the |
| 654 | + Wiki class |
| 655 | + ''' |
| 656 | + str = ''' |
| 657 | + Čeština (cs) • Deutsch (de) • English (en) • Español (es) • Français (fr) • Italiano (it) • Português (pt) • Suomi (fi) • Ελληνικά (el) • Русский (ru) • 日本語 (ja) |
| 658 | + ''' |
| 659 | + reg = re.compile('\([\w\-]*\)') |
| 660 | + abbr = re.findall(reg, str) |
| 661 | + abbr = [ab.replace('(', '').replace(')', '') for ab in abbr] |
| 662 | + print abbr |
| 663 | + print len(abbr) |
| 664 | + |
| 665 | +def init(): |
| 666 | + lnc = LanguageContainer() |
| 667 | + return lnc.determine_default_language() |
| 668 | + |
| 669 | +if __name__ == '__main__': |
| 670 | + init() |
Property changes on: trunk/tools/editor_trends/classes/languages.py |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 671 | + native |
Index: trunk/tools/editor_trends/configuration.py |
— | — | @@ -197,7 +197,9 @@ |
198 | 198 | return 500 |
199 | 199 | |
200 | 200 | def update_python_path(self): |
201 | | - IGNORE_DIRS = ['wikistats', 'zips'] |
| 201 | + IGNORE_DIRS = ['wikistats', 'zips', 'datasets', 'mapreduce', 'logs', |
| 202 | + 'statistics', 'js_scripts', 'deployment', |
| 203 | + 'documentation', 'data', 'code-snippets'] |
202 | 204 | dirs = [name for name in os.listdir(self.working_directory) if |
203 | 205 | os.path.isdir(os.path.join(self.working_directory, name))] |
204 | 206 | for subdirname in dirs: |
Index: trunk/tools/editor_trends/cronjobs.py |
— | — | @@ -22,7 +22,9 @@ |
23 | 23 | import manage as manager |
24 | 24 | |
25 | 25 | from database import db |
26 | | -from classes import wikiprojects |
| 26 | +from classes import languages |
| 27 | +from classes import projects |
| 28 | +from classes import runtime_settings |
27 | 29 | from analyses import analyzer |
28 | 30 | |
29 | 31 | |
— | — | @@ -30,13 +32,10 @@ |
31 | 33 | ''' |
32 | 34 | This function should only be called as a cronjob and not directly. |
33 | 35 | ''' |
34 | | - parser, settings, wiki = manager.init_args_parser() |
| 36 | + project, language, parser, settings = manager.init_args_parser() |
35 | 37 | args = parser.parse_args(['django']) |
36 | | - args.language = wikiprojects.get_language(task['language']) |
37 | | - args.project = task['project'] |
38 | | - print args |
39 | | - wiki = wikiprojects.Wiki(settings, args) |
40 | | - res = manager.all_launcher(wiki, settings, None) |
| 38 | + rts = runtime_settings.RunTimeSettings(project, language, settings, args) |
| 39 | + res = manager.all_launcher(rts, settings, None) |
41 | 40 | return res |
42 | 41 | |
43 | 42 | |