r75090 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r75089‎ | r75090 | r75091 >
Date:18:11, 20 October 2010
Author:diederik
Status:deferred
Tags:
Comment:
Added support to download dump files from download.wikimedia.org
Modified paths:
  • /trunk/tools/editor_trends/utils/dump_downloader.py (added) (history)

Diff [purge]

Index: trunk/tools/editor_trends/utils/dump_downloader.py
@@ -0,0 +1,79 @@
 2+#!/usr/bin/python
 3+# -*- coding: utf-8 -*-
 4+'''
 5+Copyright (C) 2010 by Diederik van Liere (dvanliere@gmail.com)
 6+This program is free software; you can redistribute it and/or
 7+modify it under the terms of the GNU General Public License version 2
 8+as published by the Free Software Foundation.
 9+This program is distributed in the hope that it will be useful,
 10+but WITHOUT ANY WARRANTY; without even the implied warranty of
 11+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 12+See the GNU General Public License for more details, at
 13+http://www.fsf.org/licenses/gpl.html
 14+'''
 15+
 16+__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ])
 17+
 18+
 19+import os
 20+import sys
 21+import urllib2
 22+import httplib
 23+
 24+import progressbar
 25+
 26+import utils
 27+import settings
 28+
 29+
 30+def determine_remote_filesize(url, filename):
 31+ '''
 32+ @url is the full path of the file to be downloaded
 33+ @filename is the name of the file to be downloaded
 34+ '''
 35+ conn = httplib.HTTPConnection(url)
 36+ conn.request('HEAD', filename)
 37+ res = conn.getresponse()
 38+ if res.status == 200:
 39+ return res.getheader('content-length', -1)
 40+ else:
 41+ return - 1
 42+
 43+
 44+def download_wp_dump(url, filename, location, pbar):
 45+ '''
 46+ This is a very simple replacement for wget and curl because Windows does
 47+ support these tools.
 48+ @url location of the file to be downloaded
 49+ @filename name of the file to be downloaded
 50+ @location indicates where to store the file locally
 51+ @pbar is an instance of progressbar.ProgressBar()
 52+ '''
 53+ chunk = 4096
 54+ fh = utils.open_txt_file(location, filename, 'w', settings.ENCODING)
 55+ req = urllib2.Request(url + filename)
 56+ filesize = determine_remote_filesize(url, filename)
 57+ if filesize != -1:
 58+ pbar(maxval=filesize).start()
 59+ try:
 60+ response = urllib2.urlopen(req)
 61+ i = 0
 62+ while True:
 63+ data = response.read(chunk)
 64+ if not data:
 65+ print 'Finished downloading %s%s.' % (url, filename)
 66+ break
 67+ f.write(data)
 68+
 69+ if pbar:
 70+ pbar.update(i * chunk)
 71+ i += 1
 72+ except URLError, error:
 73+ print 'Reason: %s' % error.reason
 74+ except HTTPError, error:
 75+ print 'Error: %s' % error.code
 76+
 77+
 78+if __name__ == '__main__':
 79+ pbar = progressbar.ProgressBar()
 80+ download_wp_dump('http://download.wikimedia.org/enwiki/latest', 'bla.xml', settings.XML_FILE_LOCATION, pbar)
Property changes on: trunk/tools/editor_trends/utils/dump_downloader.py
___________________________________________________________________
Added: svn:eol-style
181 + native
Added: svn:mime-type
282 + text/plain

Status & tagging log