Index: trunk/tools/editor_trends/etl/enricher.py |
— | — | @@ -441,8 +441,6 @@ |
442 | 442 | elif event == 'end' and elem.tag == '%s%s' % (namespace, 'page'): |
443 | 443 | yield article |
444 | 444 | elem.clear() |
445 | | - for elem in article.values(): |
446 | | - elem.clear() |
447 | 445 | article = {} |
448 | 446 | article['revisions'] = [] |
449 | 447 | id = False |
— | — | @@ -522,7 +520,7 @@ |
523 | 521 | |
524 | 522 | |
525 | 523 | def debug(): |
526 | | - path = '/media/wikipedia_dumps/batch2/' |
| 524 | + path = '/mnt/wikipedia_dumps/batch2/' |
527 | 525 | files = file_utils.retrieve_file_list(path, 'bz2') |
528 | 526 | for file in files: |
529 | 527 | filename = os.path.join(path, file) |
— | — | @@ -531,7 +529,7 @@ |
532 | 530 | |
533 | 531 | def launcher_training(): |
534 | 532 | # launcher for creating training data |
535 | | - path = '/media/wikipedia_dumps/batch2/' |
| 533 | + path = '/mnt/wikipedia_dumps/batch2/' |
536 | 534 | function = create_variables |
537 | 535 | storage = 'csv' |
538 | 536 | dataset = 'training' |