Index: trunk/tools/editor_trends/etl/store.py |
— | — | @@ -116,15 +116,15 @@ |
117 | 117 | line = line.strip() |
118 | 118 | #print line.encode('utf-8') |
119 | 119 | line = line.split('\t') |
120 | | - title = line[-1] |
121 | | - article_id = line[0] |
122 | | - ns = line[1] |
123 | | - if len(line) == 4: |
124 | | - category = line[2] |
125 | | - collection.insert({'id':article_id, 'title':title, |
126 | | - 'category': category, 'ns': ns}) |
127 | | - else: |
128 | | - collection.insert({'id':article_id, 'title':title, 'ns': ns}) |
| 120 | + data = {} |
| 121 | + for l in line: |
| 122 | + #print l.encode('utf-8') |
| 123 | + try: |
| 124 | + key, value = l.split('=') |
| 125 | + except ValueError, error: |
| 126 | + print l.encode('utf-8') |
| 127 | + data[key] = value |
| 128 | + collection.insert(data) |
129 | 129 | fh.close() |
130 | 130 | print 'Done...' |
131 | 131 | |
Index: trunk/tools/editor_trends/etl/enricher.py |
— | — | @@ -216,12 +216,10 @@ |
217 | 217 | keys = dict.keys() |
218 | 218 | value = [] |
219 | 219 | for key in keys: |
220 | | - #obs = '%s=%s' % (key, dict[key]) |
| 220 | + value.append(key) |
221 | 221 | value.append(dict[key]) |
222 | | - #if key != 'ns' and key != 'title': |
223 | | - # print dict['title'], obs |
224 | | - #article_id = 'id=%s' % article_id |
225 | 222 | value.insert(0, article_id) |
| 223 | + value.insert(0, 'id') |
226 | 224 | #title = title.encode('ascii') |
227 | 225 | #row = '\t'.join([article_id, title]) + '\n' |
228 | 226 | rows.append(value) |
— | — | @@ -462,7 +460,7 @@ |
463 | 461 | ns = {} |
464 | 462 | if title != None: |
465 | 463 | for namespace in include_ns: |
466 | | - if title.startswith(ns): |
| 464 | + if title.startswith(namespace): |
467 | 465 | ns['namespace'] = include_ns[namespace] |
468 | 466 | if ns == {}: |
469 | 467 | for namespace in exclude_ns.values(): |