Index: trunk/tools/editor_trends/etl/extracter.py |
— | — | @@ -93,14 +93,14 @@ |
94 | 94 | if title == None: |
95 | 95 | return False |
96 | 96 | ns = title.split(':') |
97 | | - if len(ns) ==1 and '0' in namespaces: |
| 97 | + if len(ns) == 1 and '0' in namespaces: |
98 | 98 | return {'id': 0, 'name': 'main namespace'} |
99 | 99 | else: |
100 | 100 | if ns[0] in namespaces: |
101 | 101 | return {'id': ns[0], 'name': ns[1]} |
102 | 102 | else: |
103 | 103 | return False |
104 | | - |
| 104 | + |
105 | 105 | # for namespace in namespaces: |
106 | 106 | # if title.startswith(namespace): |
107 | 107 | # return False |
— | — | @@ -237,7 +237,13 @@ |
238 | 238 | flat.append(f) |
239 | 239 | return flat |
240 | 240 | |
| 241 | +def add_namespace_to_output(output, namespace): |
| 242 | + for x, o in enumerate(output): |
| 243 | + o.append(namespace['id']) |
| 244 | + output[x] = o |
| 245 | + return output |
241 | 246 | |
| 247 | + |
242 | 248 | def parse_dumpfile(tasks, project, language_code, filehandles, lock, namespaces=['0']): |
243 | 249 | bot_ids = detector.retrieve_bots(language_code) |
244 | 250 | location = os.path.join(settings.input_location, language_code, project) |
— | — | @@ -281,7 +287,7 @@ |
282 | 288 | revisions = page.findall('revision') |
283 | 289 | revisions = parse_comments(revisions, remove_numeric_character_references) |
284 | 290 | output = output_editor_information(revisions, article_id, bot_ids) |
285 | | - output = [o.append(namespace['id'] for o in output)] |
| 291 | + output = add_namespace_to_output(output, namespace) |
286 | 292 | write_output(output, filehandles, lock) |
287 | 293 | file_utils.write_list_to_csv([article_id, title], fh2) |
288 | 294 | processed += 1 |