r85096 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r85095‎ | r85096 | r85097 >
Date:22:23, 31 March 2011
Author:diederik
Status:deferred
Tags:
Comment:
Fixed some edge cases with '=' sign in article title.
Modified paths:
  • /trunk/tools/editor_trends/etl/enricher.py (modified) (history)
  • /trunk/tools/editor_trends/etl/store.py (modified) (history)

Diff [purge]

Index: trunk/tools/editor_trends/etl/store.py
@@ -116,15 +116,15 @@
117117 line = line.strip()
118118 #print line.encode('utf-8')
119119 line = line.split('\t')
120 - title = line[-1]
121 - article_id = line[0]
122 - ns = line[1]
123 - if len(line) == 4:
124 - category = line[2]
125 - collection.insert({'id':article_id, 'title':title,
126 - 'category': category, 'ns': ns})
127 - else:
128 - collection.insert({'id':article_id, 'title':title, 'ns': ns})
 120+ data = {}
 121+ for l in line:
 122+ #print l.encode('utf-8')
 123+ try:
 124+ key, value = l.split('=')
 125+ except ValueError, error:
 126+ print l.encode('utf-8')
 127+ data[key] = value
 128+ collection.insert(data)
129129 fh.close()
130130 print 'Done...'
131131
Index: trunk/tools/editor_trends/etl/enricher.py
@@ -216,12 +216,10 @@
217217 keys = dict.keys()
218218 value = []
219219 for key in keys:
220 - #obs = '%s=%s' % (key, dict[key])
 220+ value.append(key)
221221 value.append(dict[key])
222 - #if key != 'ns' and key != 'title':
223 - # print dict['title'], obs
224 - #article_id = 'id=%s' % article_id
225222 value.insert(0, article_id)
 223+ value.insert(0, 'id')
226224 #title = title.encode('ascii')
227225 #row = '\t'.join([article_id, title]) + '\n'
228226 rows.append(value)
@@ -462,7 +460,7 @@
463461 ns = {}
464462 if title != None:
465463 for namespace in include_ns:
466 - if title.startswith(ns):
 464+ if title.startswith(namespace):
467465 ns['namespace'] = include_ns[namespace]
468466 if ns == {}:
469467 for namespace in exclude_ns.values():