r85692 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r85691‎ | r85692 | r85693 >
Date:21:17, 8 April 2011
Author:diederik
Status:deferred
Tags:
Comment:
Close files when finished.
Modified paths:
  • /trunk/tools/editor_trends/etl/enricher.py (modified) (history)

Diff [purge]

Index: trunk/tools/editor_trends/etl/enricher.py
@@ -199,6 +199,9 @@
200200 self.write_articles()
201201 self.write_comments()
202202
 203+ def close(self):
 204+ self.store()
 205+ self.filehandles = [fh.close() for fh in self.filehandles]
203206
204207 def write_comments(self):
205208 rows = []
@@ -242,23 +245,25 @@
243246 #t0 = datetime.datetime.now()
244247 self.group_revisions_by_fileid(data)
245248 editors = self.revisions.keys()
246 - for editor in editors:
247 - #lock the write around all edits of an editor for a particular page
248 - for i, revision in enumerate(self.revisions[editor]):
249 - if i == 0:
250 - file_id = self.get_hash(revision[2])
251 - if self.lock.available(file_id):
252 - fh = self.filehandles[file_id]
253 - #print editor, file_id, fh
254 - else:
255 - break
256 - try:
257 - file_utils.write_list_to_csv(revision, fh)
258 - self.lock.release(file_id)
259 - del self.revisions[editor]
260 - except Exception, error:
261 - print '''Encountered the following error while writing
262 - revision data to %s: %s''' % (fh, error)
 249+ while len(self.revision.keys()) > 0:
 250+ print len(self.revision.keys())
 251+ for editor in editors:
 252+ #lock the write around all edits of an editor for a particular page
 253+ for i, revision in enumerate(self.revisions[editor]):
 254+ if i == 0:
 255+ file_id = self.get_hash(revision[2])
 256+ if self.lock.available(file_id):
 257+ fh = self.filehandles[file_id]
 258+ #print editor, file_id, fh
 259+ else:
 260+ break
 261+ try:
 262+ file_utils.write_list_to_csv(revision, fh)
 263+ self.lock.release(file_id)
 264+ del self.revisions[editor]
 265+ except Exception, error:
 266+ print '''Encountered the following error while writing
 267+ revision data to %s: %s''' % (fh, error)
263268 #t1 = datetime.datetime.now()
264269 #print '%s revisions took %s' % (len(self.revisions), (t1 - t0))
265270
@@ -720,7 +725,7 @@
721726 t0 = t1
722727
723728 if dataset == 'training':
724 - cache.store()
 729+ cache.close()
725730 cache.summary()
726731 else:
727732 location = os.getcwd()
@@ -778,8 +783,8 @@
779784 extracter.start()
780785
781786 input_queue.join()
782 - #filehandles = [fh.close() for fh in filehandles]
783787
 788+
784789 def launcher_training():
785790 '''
786791 Launcher for creating training dataset for data competition