r86069 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r86068‎ \| r86069 \| r86070 >
Date:	19:07, 14 April 2011
Author:	diederik
Status:	deferred
Tags:
Comment:	Small fixes for generating md5 hashes.
Modified paths:	/trunk/tools/editor_trends/etl/extracter.py (modified) (history) /trunk/tools/editor_trends/etl/variables.py (modified) (history)

Diff [purge]

Index: trunk/tools/editor_trends/etl/variables.py
—	—	@@ -50,18 +50,11 @@
51	51	return True
52	52
53	53
54		~~-def extract_revision_text(revision):~~
55		~~- dump(revision)~~
56		~~- if revision.text == None:~~
57		~~- revision.text = fix_revision_text(revision)~~
58		~~- return revision.text~~
59		~~-# rev = revision.find('ns0:text')~~
60		~~-# if rev != None:~~
61		~~-# if rev.text == None:~~
62		~~-# rev = fix_revision_text(revision)~~
63		~~-# return rev.text.encode('utf-8')~~
64		~~-# else:~~
65		~~-# return ''~~
	54	+def extract_revision_text(revision, xml_namespace):
	55	+ rev_text = revision.find('%s%s' % (xml_namespace, 'text'))
	56	+ if rev_text.text == None:
	57	+ rev_text.text = fix_revision_text(revision)
	58	+ return rev_text.text
66	59
67	60
68	61	def parse_title(title):
—	—	@@ -158,15 +151,14 @@
159	152
160	153	def fix_revision_text(revision):
161	154	if revision.text == None:
162		~~- revision.text = ''~~
163		~~- return revision~~
	155	+ return ''
164	156
165	157
166	158	def create_md5hash(text):
167	159	hash = {}
168	160	if text != None:
169	161	m = hashlib.md5()
170		~~- m.update(text)~~
	162	+ m.update(text.encode('utf-8'))
171	163	#echo m.digest()
172	164	hash['hash'] = m.hexdigest()
173	165	else:
Index: trunk/tools/editor_trends/etl/extracter.py
—	—	@@ -54,14 +54,13 @@
55	55	return md5hashes, size
56	56
57	57	revision_id = revision.find('%s%s' % (xml_namespace, 'id'))
58		~~- print revision_id~~
59	58	revision_id = variables.extract_revision_id(revision_id)
60	59	if revision_id == None:
61	60	#revision_id is missing, which is weird
62	61	return md5hashes, size
63	62
64	63	article['revision_id'] = revision_id
65		~~- text = variables.extract_revision_text(revision)~~
	64	+ text = variables.extract_revision_text(revision, xml_namespace)
66	65	article.update(contributor)
67	66
68	67	comment = variables.extract_comment_text(revision_id, revision)