r86063 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r86062‎ \| r86063 \| r86064 >
Date:	18:37, 14 April 2011
Author:	diederik
Status:	deferred
Tags:
Comment:	Added missing import
Modified paths:	/trunk/tools/editor_trends/etl/extracter.py (modified) (history) /trunk/tools/editor_trends/etl/variables.py (modified) (history)

Diff [purge]

Index: trunk/tools/editor_trends/etl/variables.py
—	—	@@ -49,13 +49,14 @@
50	50
51	51
52	52	def extract_revision_text(revision):
53		~~- rev = revision.find('ns0:text')~~
54		~~- if rev != None:~~
55		~~- if rev.text == None:~~
56		~~- rev = fix_revision_text(revision)~~
57		~~- return rev.text.encode('utf-8')~~
58		~~- else:~~
59		~~- return ''~~
	53	+ return revision.text
	54	+# rev = revision.find('ns0:text')
	55	+# if rev != None:
	56	+# if rev.text == None:
	57	+# rev = fix_revision_text(revision)
	58	+# return rev.text.encode('utf-8')
	59	+# else:
	60	+# return ''
60	61
61	62
62	63	def parse_title(title):
Index: trunk/tools/editor_trends/etl/extracter.py
—	—	@@ -18,7 +18,7 @@
19	19	__date__ = '2011-04-10'
20	20	__version__ = '0.1'
21	21
22		-
	22	+import itertools
23	23	from collections import deque
24	24	import sys
25	25	import os
—	—	@@ -54,6 +54,7 @@
55	55	return md5hashes, size
56	56
57	57	revision_id = revision.find('%s%s' % (xml_namespace, 'id'))
	58	+ print revision_id
58	59	revision_id = variables.extract_revision_id(revision_id)
59	60	if revision_id == None:
60	61	#revision_id is missing, which is weird
—	—	@@ -149,7 +150,7 @@
150	151	if event is start:
151	152	clear = False
152	153	else:
153		~~- counts = datacompetition_parse_revision(revision, xml_namespace, bots, counts)~~
	154	+ counts = datacompetition_parse_revision(elem, xml_namespace, bots, counts)
154	155	clear = True
155	156	if clear:
156	157	elem.clear()
—	—	@@ -160,9 +161,6 @@
161	162	id = False
162	163	parse = False
163	164
164		~~- else:~~
165		~~- elem.clear()~~
166		-
167	165	except SyntaxError, error:
168	166	print 'Encountered invalid XML tag. Error message: %s' % error
169	167	dump(elem)
—	—	@@ -175,6 +173,7 @@
176	174	print error
177	175
178	176	filename = 'counts_kaggle_%s.csv' % file_id
	177	+ keys = counts.keys()
179	178	fh = file_utils.create_txt_filehandle(rts.txt, filename, 'w', 'utf-8')
180	179	file_utils.write_dict_to_csv(counts, fh, keys)
181	180	fh.close()