r86063 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r86062‎ | r86063 | r86064 >
Date:18:37, 14 April 2011
Author:diederik
Status:deferred
Tags:
Comment:
Added missing import
Modified paths:
  • /trunk/tools/editor_trends/etl/extracter.py (modified) (history)
  • /trunk/tools/editor_trends/etl/variables.py (modified) (history)

Diff [purge]

Index: trunk/tools/editor_trends/etl/variables.py
@@ -49,13 +49,14 @@
5050
5151
5252 def extract_revision_text(revision):
53 - rev = revision.find('ns0:text')
54 - if rev != None:
55 - if rev.text == None:
56 - rev = fix_revision_text(revision)
57 - return rev.text.encode('utf-8')
58 - else:
59 - return ''
 53+ return revision.text
 54+# rev = revision.find('ns0:text')
 55+# if rev != None:
 56+# if rev.text == None:
 57+# rev = fix_revision_text(revision)
 58+# return rev.text.encode('utf-8')
 59+# else:
 60+# return ''
6061
6162
6263 def parse_title(title):
Index: trunk/tools/editor_trends/etl/extracter.py
@@ -18,7 +18,7 @@
1919 __date__ = '2011-04-10'
2020 __version__ = '0.1'
2121
22 -
 22+import itertools
2323 from collections import deque
2424 import sys
2525 import os
@@ -54,6 +54,7 @@
5555 return md5hashes, size
5656
5757 revision_id = revision.find('%s%s' % (xml_namespace, 'id'))
 58+ print revision_id
5859 revision_id = variables.extract_revision_id(revision_id)
5960 if revision_id == None:
6061 #revision_id is missing, which is weird
@@ -149,7 +150,7 @@
150151 if event is start:
151152 clear = False
152153 else:
153 - counts = datacompetition_parse_revision(revision, xml_namespace, bots, counts)
 154+ counts = datacompetition_parse_revision(elem, xml_namespace, bots, counts)
154155 clear = True
155156 if clear:
156157 elem.clear()
@@ -160,9 +161,6 @@
161162 id = False
162163 parse = False
163164
164 - else:
165 - elem.clear()
166 -
167165 except SyntaxError, error:
168166 print 'Encountered invalid XML tag. Error message: %s' % error
169167 dump(elem)
@@ -175,6 +173,7 @@
176174 print error
177175
178176 filename = 'counts_kaggle_%s.csv' % file_id
 177+ keys = counts.keys()
179178 fh = file_utils.create_txt_filehandle(rts.txt, filename, 'w', 'utf-8')
180179 file_utils.write_dict_to_csv(counts, fh, keys)
181180 fh.close()