Index: trunk/tools/editor_trends/etl/enricher.py |
— | — | @@ -199,6 +199,9 @@ |
200 | 200 | self.write_articles() |
201 | 201 | self.write_comments() |
202 | 202 | |
| 203 | + def close(self): |
| 204 | + self.store() |
| 205 | + self.filehandles = [fh.close() for fh in self.filehandles] |
203 | 206 | |
204 | 207 | def write_comments(self): |
205 | 208 | rows = [] |
— | — | @@ -242,23 +245,25 @@ |
243 | 246 | #t0 = datetime.datetime.now() |
244 | 247 | self.group_revisions_by_fileid(data) |
245 | 248 | editors = self.revisions.keys() |
246 | | - for editor in editors: |
247 | | - #lock the write around all edits of an editor for a particular page |
248 | | - for i, revision in enumerate(self.revisions[editor]): |
249 | | - if i == 0: |
250 | | - file_id = self.get_hash(revision[2]) |
251 | | - if self.lock.available(file_id): |
252 | | - fh = self.filehandles[file_id] |
253 | | - #print editor, file_id, fh |
254 | | - else: |
255 | | - break |
256 | | - try: |
257 | | - file_utils.write_list_to_csv(revision, fh) |
258 | | - self.lock.release(file_id) |
259 | | - del self.revisions[editor] |
260 | | - except Exception, error: |
261 | | - print '''Encountered the following error while writing |
262 | | - revision data to %s: %s''' % (fh, error) |
| 249 | + while len(self.revision.keys()) > 0: |
| 250 | + print len(self.revision.keys()) |
| 251 | + for editor in editors: |
| 252 | + #lock the write around all edits of an editor for a particular page |
| 253 | + for i, revision in enumerate(self.revisions[editor]): |
| 254 | + if i == 0: |
| 255 | + file_id = self.get_hash(revision[2]) |
| 256 | + if self.lock.available(file_id): |
| 257 | + fh = self.filehandles[file_id] |
| 258 | + #print editor, file_id, fh |
| 259 | + else: |
| 260 | + break |
| 261 | + try: |
| 262 | + file_utils.write_list_to_csv(revision, fh) |
| 263 | + self.lock.release(file_id) |
| 264 | + del self.revisions[editor] |
| 265 | + except Exception, error: |
| 266 | + print '''Encountered the following error while writing |
| 267 | + revision data to %s: %s''' % (fh, error) |
263 | 268 | #t1 = datetime.datetime.now() |
264 | 269 | #print '%s revisions took %s' % (len(self.revisions), (t1 - t0)) |
265 | 270 | |
— | — | @@ -720,7 +725,7 @@ |
721 | 726 | t0 = t1 |
722 | 727 | |
723 | 728 | if dataset == 'training': |
724 | | - cache.store() |
| 729 | + cache.close() |
725 | 730 | cache.summary() |
726 | 731 | else: |
727 | 732 | location = os.getcwd() |
— | — | @@ -778,8 +783,8 @@ |
779 | 784 | extracter.start() |
780 | 785 | |
781 | 786 | input_queue.join() |
782 | | - #filehandles = [fh.close() for fh in filehandles] |
783 | 787 | |
| 788 | + |
784 | 789 | def launcher_training(): |
785 | 790 | ''' |
786 | 791 | Launcher for creating training dataset for data competition |