r80734 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r80733‎ \| r80734 \| r80735 >
Date:	23:59, 21 January 2011
Author:	diederik
Status:	deferred
Tags:
Comment:	A new helper file for http connections.
Modified paths:	/trunk/tools/editor_trends/utils/http_utils.py (added) (history)

Diff [purge]

Index: trunk/tools/editor_trends/utils/http_utils.py
—	—	@@ -0,0 +1,128 @@
	2	+#!/usr/bin/python
	3	+# -- coding: utf-8 --
	4	+'''
	5	+Copyright (C) 2010 by Diederik van Liere (dvanliere@gmail.com)
	6	+This program is free software; you can redistribute it and/or
	7	+modify it under the terms of the GNU General Public License version 2
	8	+as published by the Free Software Foundation.
	9	+This program is distributed in the hope that it will be useful,
	10	+but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
	12	+See the GNU General Public License for more details, at
	13	+http://www.fsf.org/licenses/gpl.html
	14	+'''
	15	+
	16	+__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ])
	17	+__email__ = 'dvanliere at gmail dot com'
	18	+__date__ = '2010-10-21'
	19	+__version__ = '0.1'
	20	+
	21	+import os
	22	+import sys
	23	+import urllib2
	24	+import httplib
	25	+import multiprocessing
	26	+
	27	+
	28	+sys.path.append('..')
	29	+import configuration
	30	+settings = configuration.Settings()
	31	+import file_utils
	32	+import log
	33	+
	34	+
	35	+
	36	+
	37	+def read_data_from_http_connection(domain, path):
	38	+ if not domain.startswith('http://'):
	39	+ domain = 'http://%s' % domain
	40	+ url = '%s/%s' % (domain, path)
	41	+
	42	+ try:
	43	+ req = urllib2.Request(url)
	44	+ response = urllib2.urlopen(req)
	45	+ data = response.read()
	46	+ except urllib2.URLError, error:
	47	+ print 'Reason: %s' % error
	48	+ except urllib2.HTTPError, error:
	49	+ print 'Error: %s' % error
	50	+
	51	+ return data
	52	+
	53	+
	54	+
	55	+def retrieve_md5_hashes(domain, project, date):
	56	+ path = '%s/%s/%s-%s-md5sums.txt' % (project, date, project, date)
	57	+ data = read_data_from_http_connection(domain, path)
	58	+ '''Implementation not yet finished'''
	59	+
	60	+
	61	+def create_list_dumpfiles(domain, path, filename):
	62	+ '''
	63	+ Wikipedia offers the option to download one dump file in separate pieces.
	64	+ This function determines how many files there are for a giving dump and puts
	65	+ them in a queue.
	66	+ '''
	67	+ task_queue = multiprocessing.JoinableQueue()
	68	+ ext = file_utils.determine_file_extension(filename)
	69	+ canonical_filename = file_utils.determine_canonical_name(filename)
	70	+ for x in xrange(1, 100):
	71	+ f = '%s%s.xml.%s' % (canonical_filename, x, ext)
	72	+ res = check_remote_path_exists(domain, path, f)
	73	+ if res == None or res.status != 200:
	74	+ if x == 1:
	75	+ task_queue.put(filename)
	76	+ break
	77	+ else:
	78	+ print 'Added chunk to download: %s' % f
	79	+ task_queue.put(f)
	80	+ for x in xrange(settings.number_of_processes):
	81	+ task_queue.put(None)
	82	+ return task_queue
	83	+
	84	+
	85	+
	86	+def check_remote_path_exists(domain, path, filename):
	87	+ '''
	88	+ @path is the full path of the file to be downloaded
	89	+ @filename is the name of the file to be downloaded
	90	+ '''
	91	+ try:
	92	+ if domain.startswith('http://'):
	93	+ domain = domain[7:]
	94	+ conn = httplib.HTTPConnection(domain)
	95	+ if filename != None:
	96	+ url = '%s%s' % (path, filename)
	97	+ else:
	98	+ url = '%s' % path
	99	+ conn.request('HEAD', url)
	100	+ res = conn.getresponse()
	101	+ conn.close()
	102	+ return res
	103	+
	104	+ except httplib.socket.error:
	105	+ raise httplib.NotConnected('It seems that %s is temporarily \
	106	+ unavailable, please try again later.' % url)
	107	+
	108	+
	109	+def determine_remote_filesize(domain, path, filename):
	110	+ res = check_remote_path_exists(domain, path, filename)
	111	+ if res != None and res.status == 200:
	112	+ return int(res.getheader('content-length', -1))
	113	+ else:
	114	+ return - 1
	115	+
	116	+
	117	+def debug():
	118	+ domain = 'download.wikimedia.org'
	119	+ path = 'enwikinews'
	120	+ filename = None
	121	+ #check_remote_path_exists(domain, path, filename)
	122	+ #read_directory_contents(domain, path)
	123	+# download_wp_dump('http://download.wikimedia.org/enwiki/latest',
	124	+# 'enwiki-latest-page_props.sql.gz',
	125	+# settings.input_location)
	126	+
	127	+
	128	+if __name__ == '__main__':
	129	+ debug()
Property changes on: trunk/tools/editor_trends/utils/http_utils.py
___________________________________________________________________
Added: svn:eol-style
1	130	+ native
Added: svn:mime-type
2	131	+ text/plain

Status & tagging log

21:36, 22 January 2011 Reedy (talk | contribs) changed the status of r80734 [removed: new added: deferred]