Index: trunk/tools/editor_trends/etl/extracter.py |
— | — | @@ -124,6 +124,7 @@ |
125 | 125 | id = False
|
126 | 126 | ns = False
|
127 | 127 | parse = False
|
| 128 | + count_articles = 0
|
128 | 129 |
|
129 | 130 | try:
|
130 | 131 | for event, elem in context:
|
— | — | @@ -138,8 +139,8 @@ |
139 | 140 | current_namespace = variables.determine_namespace(title, namespaces, include_ns)
|
140 | 141 | if current_namespace != False:
|
141 | 142 | parse = True
|
142 | | - cache.count_articles += 1
|
143 | | - if cache.count_articles % 10000 == 0:
|
| 143 | + count_articles += 1
|
| 144 | + if count_articles % 10000 == 0:
|
144 | 145 | print 'Worker %s parsed %s articles' % (process_id, cache.count_articles)
|
145 | 146 |
|
146 | 147 | elem.clear()
|
— | — | @@ -149,7 +150,6 @@ |
150 | 151 | clear = False
|
151 | 152 | else:
|
152 | 153 | counts = datacompetition_parse_revision(revision, xml_namespace, bots, counts)
|
153 | | - cache.count_revisions += 1
|
154 | 154 | clear = True
|
155 | 155 | if clear:
|
156 | 156 | elem.clear()
|