r80862 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r80861‎ | r80862 | r80863 >
Date:16:20, 24 January 2011
Author:diederik
Status:deferred
Tags:
Comment:
Split dump_downloader.py in downloader.py and http_utils.py and moved downloader.py to the etl directory and http_utils.py to the utils directory.
Modified paths:
  • /trunk/tools/editor_trends/etl/downloader.py (added) (history)

Diff [purge]

Index: trunk/tools/editor_trends/etl/downloader.py
@@ -0,0 +1,104 @@
 2+#!/usr/bin/python
 3+# -*- coding: utf-8 -*-
 4+'''
 5+Copyright (C) 2010 by Diederik van Liere (dvanliere@gmail.com)
 6+This program is free software; you can redistribute it and/or
 7+modify it under the terms of the GNU General Public License version 2
 8+as published by the Free Software Foundation.
 9+This program is distributed in the hope that it will be useful,
 10+but WITHOUT ANY WARRANTY; without even the implied warranty of
 11+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 12+See the GNU General Public License for more details, at
 13+http://www.fsf.org/licenses/gpl.html
 14+'''
 15+
 16+
 17+__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ])
 18+__author__email = 'dvanliere at gmail dot com'
 19+__date__ = '2011-01-21'
 20+__version__ = '0.1'
 21+
 22+import urllib2
 23+import progressbar
 24+import multiprocessing
 25+import sys
 26+
 27+sys.path.append('..')
 28+import configuration
 29+settings = configuration.Settings()
 30+
 31+from utils import file_utils
 32+from utils import http_utils
 33+from utils import log
 34+
 35+def download_wiki_file(task_queue, config):
 36+ '''
 37+ This is a very simple replacement for wget and curl because Windows does
 38+ not have these tools installed by default
 39+ '''
 40+ success = True
 41+ chunk = 1024 * 4
 42+ while True:
 43+ filename = task_queue.get(block=False)
 44+ task_queue.task_done()
 45+ if filename == None:
 46+ print 'Swallowed a poison pill'
 47+ break
 48+ extension = file_utils.determine_file_extension(filename)
 49+ filemode = file_utils.determine_file_mode(extension)
 50+ filesize = http_utils.determine_remote_filesize(settings.wp_dump_location, config.path, filename)
 51+ if filemode == 'w':
 52+ fh = file_utils.create_txt_filehandle(config.location, filename, filemode, settings.encoding)
 53+ else:
 54+ fh = file_utils.create_binary_filehandle(config.location, filename, 'wb')
 55+
 56+ if filesize != -1:
 57+ widgets = log.init_progressbar_widgets(filename)
 58+ pbar = progressbar.ProgressBar(widgets=widgets, maxval=filesize).start()
 59+
 60+ try:
 61+ if filename.endswith('json'):
 62+ req = urllib2.Request(settings.wp_dump_location + config.path)
 63+ else:
 64+ req = urllib2.Request(settings.wp_dump_location + config.path + filename)
 65+ response = urllib2.urlopen(req)
 66+ while True:
 67+ data = response.read(chunk)
 68+ if not data:
 69+ print 'Finished downloading %s%s%s.' % (settings.wp_dump_location, config.path, filename)
 70+ break
 71+ fh.write(data)
 72+
 73+ filesize -= chunk
 74+ if filesize < 0:
 75+ chunk = chunk + filesize
 76+ pbar.update(pbar.currval + chunk)
 77+
 78+ except urllib2.URLError, error:
 79+ print 'Reason: %s' % error
 80+ success = False
 81+ except urllib2.HTTPError, error:
 82+ print 'Error: %s' % error
 83+ success = False
 84+ finally:
 85+ fh.close()
 86+
 87+ return success
 88+
 89+
 90+def launcher(properties, settings, logger):
 91+ print 'Creating list of files to be downloaded...'
 92+ tasks = http_utils.create_list_dumpfiles(settings.wp_dump_location,
 93+ properties.path,
 94+ properties.filename)
 95+ consumers = [multiprocessing.Process(target=download_wiki_file,
 96+ args=(tasks, properties))
 97+ for i in xrange(settings.number_of_processes)]
 98+
 99+ print 'Starting consumers to download files...'
 100+ for w in consumers:
 101+ w.start()
 102+
 103+ tasks.join()
 104+ result = all([consumer.exitcode for consumer in consumers])
 105+ return result
Property changes on: trunk/tools/editor_trends/etl/downloader.py
___________________________________________________________________
Added: svn:eol-style
1106 + native

Status & tagging log