r86069 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r86068‎ | r86069 | r86070 >
Date:19:07, 14 April 2011
Author:diederik
Status:deferred
Tags:
Comment:
Small fixes for generating md5 hashes.
Modified paths:
  • /trunk/tools/editor_trends/etl/extracter.py (modified) (history)
  • /trunk/tools/editor_trends/etl/variables.py (modified) (history)

Diff [purge]

Index: trunk/tools/editor_trends/etl/variables.py
@@ -50,18 +50,11 @@
5151 return True
5252
5353
54 -def extract_revision_text(revision):
55 - dump(revision)
56 - if revision.text == None:
57 - revision.text = fix_revision_text(revision)
58 - return revision.text
59 -# rev = revision.find('ns0:text')
60 -# if rev != None:
61 -# if rev.text == None:
62 -# rev = fix_revision_text(revision)
63 -# return rev.text.encode('utf-8')
64 -# else:
65 -# return ''
 54+def extract_revision_text(revision, xml_namespace):
 55+ rev_text = revision.find('%s%s' % (xml_namespace, 'text'))
 56+ if rev_text.text == None:
 57+ rev_text.text = fix_revision_text(revision)
 58+ return rev_text.text
6659
6760
6861 def parse_title(title):
@@ -158,15 +151,14 @@
159152
160153 def fix_revision_text(revision):
161154 if revision.text == None:
162 - revision.text = ''
163 - return revision
 155+ return ''
164156
165157
166158 def create_md5hash(text):
167159 hash = {}
168160 if text != None:
169161 m = hashlib.md5()
170 - m.update(text)
 162+ m.update(text.encode('utf-8'))
171163 #echo m.digest()
172164 hash['hash'] = m.hexdigest()
173165 else:
Index: trunk/tools/editor_trends/etl/extracter.py
@@ -54,14 +54,13 @@
5555 return md5hashes, size
5656
5757 revision_id = revision.find('%s%s' % (xml_namespace, 'id'))
58 - print revision_id
5958 revision_id = variables.extract_revision_id(revision_id)
6059 if revision_id == None:
6160 #revision_id is missing, which is weird
6261 return md5hashes, size
6362
6463 article['revision_id'] = revision_id
65 - text = variables.extract_revision_text(revision)
 64+ text = variables.extract_revision_text(revision, xml_namespace)
6665 article.update(contributor)
6766
6867 comment = variables.extract_comment_text(revision_id, revision)