r76852 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r76851‎ | r76852 | r76853 >
Date:23:35, 16 November 2010
Author:diederik
Status:deferred
Tags:
Comment:
Major refactoring of settings / configuration. Updated all property references.
Modified paths:
  • /trunk/tools/editor_trends/manage.py (modified) (history)

Diff [purge]

Index: trunk/tools/editor_trends/manage.py
@@ -23,14 +23,16 @@
2424 from argparse import ArgumentParser
2525 from argparse import RawTextHelpFormatter
2626 import locale
27 -
2827 import progressbar
2928
30 -import settings
 29+sys.path.append('..')
 30+import configuration
 31+settings = configuration.Settings()
 32+
3133 import languages
3234 from utils import utils
3335 from utils import dump_downloader
34 -import split_xml_file
 36+from etl import chunker
3537 import map_wiki_editors
3638 import optimize_editors
3739 import construct_datasets
@@ -85,10 +87,11 @@
8688
8789 def determine_file_locations(args):
8890 locations = {}
89 - location = get_value(args, 'location') if get_value(args, 'location') != None else settings.XML_FILE_LOCATION
 91+ location = get_value(args, 'location') if get_value(args, 'location') != None else settings.input_location
9092 project = retrieve_project(args)
9193 language_code = retrieve_language(args)
9294 locations['language_code'] = language_code
 95+ locations['language'] = get_value(args, 'language')
9396 locations['location'] = os.path.join(location, language_code, project)
9497 locations['project'] = project
9598 locations['full_project'] = retrieve_projectname(args)
@@ -96,65 +99,61 @@
97100 return locations
98101
99102
100 -def prepare_file_locations(location):
101 - result = utils.check_file_exists(location, '')
102 - if result == False:
103 - utils.create_directory(os.path.join(location))
104103
105104
106 -def show_settings(args, location, filename, project, full_project, language_code):
107 - project = settings.WIKIMEDIA_PROJECTS.get(project, 'wiki')
 105+def show_settings(args, location, filename, project, full_project, language_code, language):
 106+ project = settings.projects.get(project, 'wiki')
108107 project = project.title()
109108 language_map = utils.invert_dict(languages.MAPPING)
110109 print 'Project: %s' % (project)
111 - print 'Language: %s' % language_map[language_code].decode('utf-8')
 110+ print 'Language: %s / %s' % (language_map[language_code].decode(settings.encoding), language.decode(settings.encoding))
112111 print 'Input directory: %s' % location
113112 print 'Output directory: %s and subdirectories' % location
114113
115114
116 -def dump_downloader_launcher(args, location, filename, project, full_project, language_code):
 115+def dump_downloader_launcher(args, location, filename, project, full_project, language_code, language):
117116 print 'dump downloader'
118117 pbar = get_value(args, 'progress')
119 - domain = settings.WP_DUMP_LOCATION
 118+ domain = settings.wp_dump_location
120119 path = '/%s/latest/' % project
121120 extension = utils.determine_file_extension(filename)
122121 filemode = utils.determine_file_mode(extension)
123122 dump_downloader.download_wiki_file(domain, path, filename, location, filemode, pbar)
124123
125124
126 -def split_xml_file_launcher(args, location, filename, project, full_project, language_code):
127 - print 'split_xml_file_launcher'
 125+def cruncher_launcher(args, location, filename, project, full_project, language_code, language):
 126+ print 'split_settings.input_filename_launcher'
128127 ext = utils.determine_file_extension(filename)
129 - if ext in settings.COMPRESSION_EXTENSIONS:
 128+ if ext in settings.compression_extensions:
130129 ext = '.%s' % ext
131130 file = filename.replace(ext, '')
132131 result = utils.check_file_exists(location, file)
133132 if not result:
134 - retcode = extract_xml_file(args, location, filename)
 133+ retcode = launch_zip_extractor(args, location, filename)
135134 else:
136135 retcode = 0
137136 if retcode != 0:
138137 sys.exit(retcode)
139 - split_xml_file.split_xml(location, file, project, language_code)
 138+ chunker.split_file(location, file, project, language_code, language)
140139
141140
142 -def extract_xml_file(args, location, file):
143 - path = config.detect_installed_program('7zip')
 141+def launch_zip_extractor(args, location, file):
 142+ path = settings.detect_installed_program('7zip')
144143 source = os.path.join(location, file)
145144 p = None
146145
147 - if settings.OS == 'Windows':
 146+ if settings.platform == 'Windows':
148147 p = subprocess.Popen(['%s%s' % (path, '7z.exe'), 'e', '-o%s\\' % location, '%s' % (source,)], shell=True).wait()
149 - elif settings.OS == 'Linux':
 148+ elif settings.platform == 'Linux':
150149 raise NotImplementedError
151 - elif settings.OS == 'OSX':
 150+ elif settings.platform == 'OSX':
152151 raise NotImplementedError
153152 else:
154153 raise exceptions.PlatformNotSupportedError
155154 return p
156155
157156
158 -def mongodb_script_launcher(args, location, filename, project, full_project, language_code):
 157+def mongodb_script_launcher(args, location, filename, project, full_project, language_code, language):
159158 print 'mongodb_script_launcher'
160159 map_wiki_editors.run_parse_editors(project, language_code, location)
161160
@@ -169,21 +168,21 @@
170169 construct_datasets.generate_editor_dataset_launcher(project)
171170
172171
173 -def all_launcher(args, location, filename, project, full_project, language_code):
 172+def all_launcher(args, location, filename, project, full_project, language_code, language):
174173 print 'all_launcher'
175174 dump_downloader_launcher(args, location, filename, project, language_code)
176 - split_xml_file_launcher(args, location, filename, project, language_code)
 175+ split_settings.input_filename_launcher(args, location, filename, project, language_code)
177176 mongodb_script_launcher(args, location, filename, project, language_code)
178177 dataset_launcher(args, location, filename, project, language_code)
179178
180179
181180 def supported_languages():
182181 choices = languages.MAPPING.keys()
183 - choices = [c.encode(settings.ENCODING) for c in choices]
 182+ choices = [c.encode(settings.encoding) for c in choices]
184183 return tuple(choices)
185184
186185
187 -def show_languages(args, location, filename, project, full_project, language_code):
 186+def show_languages(args, location, filename, project, full_project, language_code, language):
188187 first = get_value(args, 'startswith')
189188 if first != None:
190189 first = first.title()
@@ -195,16 +194,16 @@
196195 for language in languages:
197196 try:
198197 if first != None and language.startswith(first):
199 - print '%s' % language.decode('utf-8')
 198+ print '%s' % language.decode(settings.encoding)
200199 elif first == None:
201 - print '%s' % language.decode('utf-8')
 200+ print '%s' % language.decode(settings.encoding)
202201 except UnicodeEncodeError:
203202 print '%s' % language
204203
205204
206205 def detect_python_version():
207206 version = sys.version_info[0:2]
208 - if version < settings.MINIMUM_PYTHON_VERSION:
 207+ if version < settings.minimum_python_version:
209208 raise 'Please upgrade to Python 2.6 or higher (but not Python 3.x).'
210209
211210 def about():
@@ -238,7 +237,7 @@
239238 parser_download.set_defaults(func=dump_downloader_launcher)
240239
241240 parser_split = subparsers.add_parser('split', help='The split sub command splits the downloaded file in smaller chunks to parallelize extracting information.')
242 - parser_split.set_defaults(func=split_xml_file_launcher)
 241+ parser_split.set_defaults(func=cruncher_launcher)
243242
244243 parser_sort = subparsers.add_parser('sort', help='By presorting the data, significant processing time reducations are achieved.')
245244 parser_sort.set_defaults(func=sort_launcher)
@@ -259,12 +258,12 @@
260259
261260 parser.add_argument('-p', '--project', action='store',
262261 help='Specify the Wikimedia project that you would like to download',
263 - choices=settings.WIKIMEDIA_PROJECTS.keys(),
 262+ choices=settings.projects.keys(),
264263 default='wiki')
265264
266265 parser.add_argument('-o', '--location', action='store',
267266 help='Indicate where you want to store the downloaded file.',
268 - default=settings.XML_FILE_LOCATION)
 267+ default=settings.input_location)
269268
270269 parser.add_argument('-f', '--file', action='store',
271270 choices=file_choices,
@@ -275,11 +274,12 @@
276275 help='Indicate whether you want to have a progressbar.')
277276
278277 detect_python_version()
 278+ about()
279279 args = parser.parse_args()
280280 config.load_configuration(args)
281281 locations = determine_file_locations(args)
282 - prepare_file_locations(locations['location'])
283 - about()
 282+ #prepare_file_locations(locations['location'])
 283+ settings.verify_environment([locations['location']])
284284 show_settings(args, **locations)
285285 args.func(args, **locations)
286286

Status & tagging log