r81339 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r81338‎ | r81339 | r81340 >
Date:22:51, 1 February 2011
Author:diederik
Status:deferred
Tags:
Comment:
Storing namespace variable in extract phase, refactored.
Modified paths:
  • /trunk/tools/editor_trends/etl/extracter.py (modified) (history)

Diff [purge]

Index: trunk/tools/editor_trends/etl/extracter.py
@@ -93,14 +93,14 @@
9494 if title == None:
9595 return False
9696 ns = title.split(':')
97 - if len(ns) ==1 and '0' in namespaces:
 97+ if len(ns) == 1 and '0' in namespaces:
9898 return {'id': 0, 'name': 'main namespace'}
9999 else:
100100 if ns[0] in namespaces:
101101 return {'id': ns[0], 'name': ns[1]}
102102 else:
103103 return False
104 -
 104+
105105 # for namespace in namespaces:
106106 # if title.startswith(namespace):
107107 # return False
@@ -237,7 +237,13 @@
238238 flat.append(f)
239239 return flat
240240
 241+def add_namespace_to_output(output, namespace):
 242+ for x, o in enumerate(output):
 243+ o.append(namespace['id'])
 244+ output[x] = o
 245+ return output
241246
 247+
242248 def parse_dumpfile(tasks, project, language_code, filehandles, lock, namespaces=['0']):
243249 bot_ids = detector.retrieve_bots(language_code)
244250 location = os.path.join(settings.input_location, language_code, project)
@@ -281,7 +287,7 @@
282288 revisions = page.findall('revision')
283289 revisions = parse_comments(revisions, remove_numeric_character_references)
284290 output = output_editor_information(revisions, article_id, bot_ids)
285 - output = [o.append(namespace['id'] for o in output)]
 291+ output = add_namespace_to_output(output, namespace)
286292 write_output(output, filehandles, lock)
287293 file_utils.write_list_to_csv([article_id, title], fh2)
288294 processed += 1

Status & tagging log