r81375 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r81374‎ | r81375 | r81376 >
Date:05:10, 2 February 2011
Author:diederik
Status:deferred
Tags:
Comment:
Fixed config module.
Modified paths:
  • /trunk/tools/editor_trends/classes/runtime_settings.py (modified) (history)
  • /trunk/tools/editor_trends/configuration.py (modified) (history)
  • /trunk/tools/editor_trends/etl/sort.py (modified) (history)
  • /trunk/tools/editor_trends/etl/store.py (modified) (history)
  • /trunk/tools/editor_trends/etl/transformer.py (modified) (history)
  • /trunk/tools/editor_trends/manage.py (modified) (history)
  • /trunk/tools/editor_trends/utils/file_utils.py (modified) (history)

Diff [purge]

Index: trunk/tools/editor_trends/manage.py
@@ -56,7 +56,7 @@
5757 Config launcher is used to reconfigure editor trends toolkit.
5858 '''
5959 # settings.load_configuration()
60 -#
 60+ pc = projects.ProjectContainer()
6161 if not os.path.exists('wiki.cfg') or properties.force:
6262 config = ConfigParser.RawConfigParser()
6363 project = None
@@ -65,17 +65,17 @@
6666 working_directory = raw_input('Please indicate where you installed Editor Trends Analytics.\nCurrent location is %s\nPress Enter to accept default.\n' % os.getcwd())
6767 input_location = raw_input('Please indicate where to store the Wikipedia dump files.\nDefault is: %s\nPress Enter to accept default.\n' % settings.input_location)
6868
69 - while project not in properties.projects.keys():
70 - project = raw_input('Please indicate which project you would like to analyze.\nDefault is: %s\nPress Enter to accept default.\n' % properties.projects[properties.short_project].capitalize())
71 - project = project if len(project) > 0 else properties.short_project
72 - if project not in properties.projects.keys():
73 - print 'Valid choices for a project are: %s' % ','.join(properties.projects.keys())
 69+ while project not in pc.projects.keys():
 70+ project = raw_input('Please indicate which project you would like to analyze.\nDefault is: %s\nPress Enter to accept default.\n' % pc.projects[properties.project.name])
 71+ project = project if len(project) > 0 else properties.project.name
 72+ if project not in pc.projects.keys():
 73+ print 'Valid choices for a project are: %s' % ','.join(pc.projects.keys())
7474
75 - while language not in properties.valid_languages:
76 - language = raw_input('Please indicate which language of project %s you would like to analyze.\nDefault is: %s\nPress Enter to accept default.\n' % (properties.projects[project].capitalize(), properties.language))
 75+ while language not in properties.project.valid_languages:
 76+ language = raw_input('Please indicate which language of project %s you would like to analyze.\nDefault is: %s\nPress Enter to accept default.\n' % (pc.projects[project], properties.language))
7777 if len(language) == 0:
78 - language = properties.language_code
79 - language = language if language in properties.valid_languages else properties.language
 78+ language = properties.language.code
 79+ language = language if language in properties.project.valid_languages else properties.language
8080
8181 input_location = input_location if len(input_location) > 0 else settings.input_location
8282 working_directory = working_directory if len(working_directory) > 0 else os.getcwd()
Index: trunk/tools/editor_trends/etl/store.py
@@ -20,6 +20,7 @@
2121 from Queue import Empty
2222 import multiprocessing
2323 import sys
 24+import os
2425
2526 sys.path.append('..')
2627 import configuration
@@ -31,6 +32,26 @@
3233 from database import db
3334
3435
 36+def store_articles(project, language_code):
 37+ location = os.path.join(settings.input_location, language_code, project)
 38+ fh = file_utils.create_txt_filehandle(location, 'articles.csv', 'r', settings.encoding)
 39+ headers = ['id', 'title']
 40+ data = fh.readlines()
 41+ fh.close()
 42+
 43+ dbname = '%s%s' % (language_code, project)
 44+ collection = '%s_%s' % (dbname, 'articles')
 45+ mongo = db.init_mongo_db(dbname)
 46+ collection = mongo[collection]
 47+
 48+ articles = {}
 49+ for d in data:
 50+ for header in headers:
 51+ articles[header] = d
 52+
 53+ collection.insert(articles)
 54+
 55+
3556 def store_editors(tasks, dbname, collection, source):
3657 '''
3758 This function is called by multiple consumers who each take a sorted file
Index: trunk/tools/editor_trends/etl/transformer.py
@@ -63,7 +63,7 @@
6464 return '%s' % (self.id)
6565
6666 def __call__(self):
67 -
 67+ cutoff = 9
6868 editor = self.input_db.find_one({'editor': self.id})
6969 if editor == None:
7070 return
@@ -74,7 +74,10 @@
7575 monthly_edits = db.stringify_keys(monthly_edits)
7676 edits = sort_edits(edits)
7777 edit_count = len(edits)
78 - new_wikipedian = edits[9]['date']
 78+ if len(edits) > cutoff:
 79+ new_wikipedian = edits[cutoff]['date']
 80+ else:
 81+ new_wikipedian = False
7982 first_edit = edits[0]['date']
8083 final_edit = edits[-1]['date']
8184 edits_by_year = determine_edits_by_year(edits, first_year, final_year)
@@ -83,7 +86,7 @@
8487 last_edit_by_year = db.stringify_keys(last_edit_by_year)
8588 articles_by_year = determine_articles_by_year(edits, first_year, final_year)
8689 articles_by_year = db.stringify_keys(articles_by_year)
87 - edits = edits[:10]
 90+ edits = edits[:cutoff]
8891
8992 self.output_db.insert({'editor': self.id,
9093 'edits': edits,
Index: trunk/tools/editor_trends/etl/sort.py
@@ -134,7 +134,7 @@
135135 except UnicodeDecodeError, e:
136136 print e
137137 except Empty:
138 - break
 138+ pass
139139
140140
141141 def mergesort_launcher(source, target):
Index: trunk/tools/editor_trends/classes/runtime_settings.py
@@ -52,6 +52,8 @@
5353 if args:
5454 self.args = args
5555 self.hash = self.secs_since_epoch()
 56+ print self.settings.input_location
 57+ print self.get_value('location')
5658 self.base_location = self.settings.input_location if \
5759 self.settings.input_location != None else self.get_value('location')
5860 self.project = self.update_project_settings()
@@ -84,6 +86,7 @@
8587 self.dump_filename = self.generate_wikidump_filename()
8688 self.dump_relative_path = self.set_dump_path()
8789 self.dump_absolute_path = self.set_dump_path(absolute=True)
 90+ print self.directories
8891 settings.verify_environment(self.directories)
8992
9093 def __str__(self):
@@ -138,7 +141,7 @@
139142 max_length_key = max([len(key) for key in about.keys()])
140143 print 'Final settings after parsing command line arguments:'
141144 for ab in about:
142 - print '%s: %s' % (ab.rjust(max_length_key), about[ab])
 145+ print '%s: %s' % (ab.rjust(max_length_key), about[ab].encode(self.settings.encoding))
143146
144147
145148 def get_value(self, key):
Index: trunk/tools/editor_trends/configuration.py
@@ -92,7 +92,7 @@
9393 self.working_directory = self.determine_working_directory()
9494 self.update_python_path()
9595
96 - self.root = '/' if self.platform != 'Windows' else 'c:\\'
 96+ self.root = os.path.expanduser('~') if self.platform != 'Windows' else 'c:\\'
9797 self.max_filehandles = self.determine_max_filehandles_open()
9898 self.tab_width = 4 if self.platform == 'Windows' else 8
9999
@@ -158,7 +158,7 @@
159159 try:
160160 os.makedirs(directory)
161161 except IOError:
162 - raise 'Configuration Error, could not create directory.'
 162+ print 'Configuration Error, could not create directory %s.' % directory
163163
164164 def detect_windows_program(self, program):
165165 entry = self.windows_register.get(program, None)
Index: trunk/tools/editor_trends/utils/file_utils.py
@@ -226,6 +226,7 @@
227227 print error
228228 return res
229229
 230+
230231 def determine_filesize(location, filename):
231232 path = os.path.join(location, filename)
232233 return os.path.getsize(path)

Status & tagging log