r81163 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r81162‎ | r81163 | r81164 >
Date:20:51, 28 January 2011
Author:diederik
Status:deferred
Tags:
Comment:
A little bit of cleaning up.
Modified paths:
  • /trunk/tools/editor_trends/configuration.py (modified) (history)
  • /trunk/tools/editor_trends/manage.py (modified) (history)
  • /trunk/tools/editor_trends/utils/compression.py (modified) (history)
  • /trunk/tools/editor_trends/utils/file_utils.py (modified) (history)
  • /trunk/tools/editor_trends/utils/http_utils.py (modified) (history)
  • /trunk/tools/editor_trends/utils/text_utils.py (modified) (history)
  • /trunk/tools/editor_trends/wikilytics/api/views.py (modified) (history)
  • /trunk/tools/editor_trends/wikilytics/templates/chart.html (modified) (history)

Diff [purge]

Index: trunk/tools/editor_trends/manage.py
@@ -153,7 +153,7 @@
154154 print 'Start storing data in MongoDB'
155155 stopwatch = timer.Timer()
156156 log.log_to_mongo(properties, 'dataset', 'store', stopwatch, event='start')
157 - db.cleanup_database(properties.project, logger)
 157+ db.cleanup_database(properties.project.name, logger)
158158 # write_message_to_log(logger, settings,
159159 # message=None,
160160 # verb='Storing',
@@ -164,7 +164,7 @@
165165 # collection=properties.collection)
166166 # for key in properties:
167167 # print key, getattr(properties, key)
168 - store.launcher(properties.sorted, properties.project, properties.collection)
 168+ store.launcher(properties.sorted, properties.project.name, properties.collection)
169169 stopwatch.elapsed()
170170 log.log_to_mongo(properties, 'dataset', 'store', stopwatch, event='finish')
171171
@@ -300,7 +300,7 @@
301301 action='store',
302302 help='Enter the first letter of a language to see which languages are \
303303 available.')
304 - parser_languages.set_defaults(func=language.show_languages)
 304+ parser_languages.set_defaults(func=language.show_languages, args=[settings, project])
305305
306306 #CONFIG
307307 parser_config = subparsers.add_parser('config',
Index: trunk/tools/editor_trends/wikilytics/api/views.py
@@ -13,6 +13,7 @@
1414 from wikilytics.api.forms import SearchForm, AnalysisForm
1515 from wikilytics.api.models import Editor, Dataset, Job, Dump
1616 import wikilytics.api.helpers as helpers
 17+from editor_trends.analyses import json_encoders
1718
1819
1920 def search(request):
@@ -76,7 +77,7 @@
7778 return HttpResponseRedirect(reverse('chart_generator', args=[project, language, chart]))
7879 elif xhr:
7980 dthandler = lambda obj:'new Date("%s")' % datetime.date.ctime(obj) if isinstance(obj, datetime.datetime) else obj
80 - data = helpers.transform_to_json(ds)
 81+ data = json_encoders.transform_to_json(ds)
8182 return HttpResponse(json.dumps(data, default=dthandler), mimetype='application/json')
8283 else:
8384
Index: trunk/tools/editor_trends/wikilytics/templates/chart.html
@@ -19,6 +19,7 @@
2020 var options = json['options'];
2121 var css_id = "#wikilytics";
2222 console.log(data);
 23+ console.log(options);
2324 $("#status > h1").hide();
2425 $.plot($(css_id), data, options);
2526 }
Index: trunk/tools/editor_trends/configuration.py
@@ -64,7 +64,7 @@
6565 class Settings:
6666 __metaclass__ = Singleton
6767
68 - def __init__(self, process_multiplier=1, **kwargs):
 68+ def __init__(self, process_multiplier=1):
6969 self.minimum_python_version = (2, 6)
7070 self.detect_python_version()
7171 self.encoding = 'utf-8'
@@ -74,7 +74,7 @@
7575
7676 # Timestamp format as generated by the MediaWiki dumps
7777 self.timestamp_format = '%Y-%m-%dT%H:%M:%SZ'
78 -
 78+ self.timestamp_server = '%D, %d %M %Y %H:M%:%SZ'
7979 #67108864 # ==64Mb, see http://hadoop.apache.org/common/docs/r0.20.0/hdfs_design.html#Large+Data+Setsfor reason
8080 self.max_xmlfile_size = 4096 * 1024
8181
Index: trunk/tools/editor_trends/utils/file_utils.py
@@ -37,8 +37,9 @@
3838 import configuration
3939 settings = configuration.Settings()
4040
41 -import exceptions
 41+from classes import exceptions
4242 import messages
 43+import text_utils
4344
4445 try:
4546 import psyco
@@ -74,16 +75,10 @@
7576 '''
7677 for line in fh:
7778 line = line.strip()
78 - if line == '':
79 - continue
80 - else:
81 - line = line.split('\t')
82 - yield line
 79+ line = line.split('\t')
 80+ yield line
8381
8482
85 -
86 -
87 -
8883 # read / write data related functions
8984 def read_data_from_csv(location, filename, encoding):
9085 '''
@@ -237,6 +232,22 @@
238233 return os.path.getsize(path)
239234
240235
 236+def set_modified_data(mod_rem, location, filename):
 237+ '''
 238+ Mod_rem is the modified date of the remote file (the Wikimedia dump file)
 239+ Mon, 15 Mar 2010 07:07:30 GMT Example server timestamp
 240+ '''
 241+ path = os.path.join(location, filename)
 242+ print mod_rem
 243+ mod_rem = text_utils.convert_timestamp_to_datetime_naive(mod_rem, settings.timestamp_format)
 244+ os.utime(path, (mod_rem, mod_rem))
 245+ raise exceptions.NotYetImplementedError(set_modified_data)
 246+
 247+def get_modified_date(location, filename):
 248+ path = os.path.join(location, filename)
 249+ return os.stat(path).st_mtime
 250+
 251+
241252 def check_file_exists(location, filename):
242253 if hasattr(filename, '__call__'):
243254 filename = construct_filename(filename, '.bin')
Index: trunk/tools/editor_trends/utils/http_utils.py
@@ -31,8 +31,6 @@
3232 import log
3333
3434
35 -
36 -
3735 def read_data_from_http_connection(domain, path):
3836 if not domain.startswith('http://'):
3937 domain = 'http://%s' % domain
@@ -50,7 +48,6 @@
5149 return data
5250
5351
54 -
5552 def retrieve_md5_hashes(domain, project, date):
5653 path = '%s/%s/%s-%s-md5sums.txt' % (project, date, project, date)
5754 data = read_data_from_http_connection(domain, path)
@@ -68,7 +65,7 @@
6966 canonical_filename = file_utils.determine_canonical_name(filename)
7067 for x in xrange(1, 100):
7168 f = '%s%s.xml.%s' % (canonical_filename, x, ext)
72 - res = check_remote_path_exists(domain, path, f)
 69+ res = get_headers(domain, path, f)
7370 if res == None or res.status != 200:
7471 if x == 1:
7572 task_queue.put(filename)
@@ -83,8 +80,7 @@
8481 return task_queue
8582
8683
87 -
88 -def check_remote_path_exists(domain, path, filename):
 84+def get_headers(domain, path, filename):
8985 '''
9086 @path is the full path of the file to be downloaded
9187 @filename is the name of the file to be downloaded
@@ -104,11 +100,20 @@
105101
106102 except httplib.socket.error:
107103 raise httplib.NotConnected('It seems that %s is temporarily \
108 - unavailable, please try again later.' % url)
 104+ unavailable, please try again later.' % url)
109105
110106
 107+def determine_modified_date(domain, path, filename):
 108+ res = get_headers(domain, path, filename)
 109+ print res.__dict__
 110+ if res != None and res.status == 200:
 111+ return int(res.getheader('last-modified', -1))
 112+ else:
 113+ return - 1
 114+
 115+
111116 def determine_remote_filesize(domain, path, filename):
112 - res = check_remote_path_exists(domain, path, filename)
 117+ res = get_headers(domain, path, filename)
113118 if res != None and res.status == 200:
114119 return int(res.getheader('content-length', -1))
115120 else:
@@ -116,9 +121,10 @@
117122
118123
119124 def debug():
120 - domain = 'download.wikimedia.org'
121 - path = 'enwikinews'
122 - filename = None
 125+ domain = 'http://download.wikimedia.org'
 126+ path = '/enwikinews/20100315/'
 127+ filename = 'enwikinews-20100315-all-titles-in-ns0.gz'
 128+ determine_modified_date(domain, path, filename)
123129 #check_remote_path_exists(domain, path, filename)
124130 #read_directory_contents(domain, path)
125131 # download_wp_dump('http://download.wikimedia.org/enwiki/latest',
Index: trunk/tools/editor_trends/utils/compression.py
@@ -25,7 +25,7 @@
2626 import configuration
2727 settings = configuration.Settings()
2828 import file_utils
29 -import exceptions
 29+from classes import exceptions
3030 import timer
3131 import log
3232
Index: trunk/tools/editor_trends/utils/text_utils.py
@@ -30,18 +30,19 @@
3131 return datetime.datetime.strptime(timestamp[:10], settings.date_format)
3232
3333
34 -def convert_timestamp_to_datetime_naive(timestamp):
35 - return datetime.datetime.strptime(timestamp, settings.timestamp_format)
 34+def convert_timestamp_to_datetime_naive(timestamp, timestamp_format):
 35+ return datetime.datetime.strptime(timestamp, timestamp_format)
3636
3737
3838 def convert_timestamp_to_datetime_utc(timestamp):
3939 tz = datetime.tzinfo('utc')
40 - d = convert_timestamp_to_datetime_naive(timestamp)
 40+ d = convert_timestamp_to_datetime_naive(timestamp, settings.timestamp_format)
4141 #return d.replace(tzinfo=tz) #enabling this line crashes pymongo
4242 return d
4343
4444
4545
 46+
4647 def invert_dict(dictionary):
4748 '''
4849 @dictionary is a simple dictionary containing simple values, ie. no lists,

Status & tagging log