Index: trunk/tools/editor_trends/etl/sort.py |
— | — | @@ -19,18 +19,19 @@ |
20 | 20 | |
21 | 21 | |
22 | 22 | import heapq |
23 | | -import sys |
24 | | -import os |
25 | 23 | import multiprocessing |
26 | 24 | import progressbar |
27 | 25 | from Queue import Empty |
28 | 26 | |
29 | 27 | from utils import file_utils |
30 | | -from utils import messages |
31 | 28 | from classes import consumers |
32 | 29 | |
33 | 30 | |
34 | 31 | class Sorter(consumers.BaseConsumer): |
| 32 | + ''' |
| 33 | + This class takes care of sorting the different csv files as they have been |
| 34 | + generated by the Extracter task. A merge sort is used for this purpose. |
| 35 | + ''' |
35 | 36 | def run(self): |
36 | 37 | ''' |
37 | 38 | The feeder function is called by the launcher and gives it a task to |
— | — | @@ -43,7 +44,8 @@ |
44 | 45 | if filename == None: |
45 | 46 | self.result.put(None) |
46 | 47 | break |
47 | | - elif filename.startswith('comments') or filename.startswith('title'): |
| 48 | + elif filename.startswith('comments') or \ |
| 49 | + filename.startswith('title'): |
48 | 50 | continue |
49 | 51 | fh = file_utils.create_txt_filehandle(self.rts.txt, |
50 | 52 | filename, |
— | — | @@ -59,10 +61,10 @@ |
60 | 62 | sorted_data = mergesort(data) |
61 | 63 | write_sorted_file(sorted_data, filename, self.rts) |
62 | 64 | self.result.put(True) |
63 | | - except UnicodeDecodeError, e: |
64 | | - print 'Error: %s, (%s)' % (e, filename) |
65 | | - except MemoryError, e: |
66 | | - print 'Error: %s, (%s)' % (e, filename) |
| 65 | + except UnicodeDecodeError, error: |
| 66 | + print 'Error: %s, (%s)' % (error, filename) |
| 67 | + except MemoryError, error: |
| 68 | + print 'Error: %s, (%s)' % (error, filename) |
67 | 69 | except Empty: |
68 | 70 | pass |
69 | 71 | |
— | — | @@ -102,11 +104,9 @@ |
103 | 105 | """Merge two sorted lists together. Returns the merged list.""" |
104 | 106 | result = [] |
105 | 107 | while front and back: |
106 | | - ''' |
107 | | - pick the smaller one from the front and stick it on |
| 108 | + '''pick the smaller one from the front and stick it on |
108 | 109 | note that list.pop(0) is a linear operation, so this gives quadratic |
109 | | - running time... |
110 | | - ''' |
| 110 | + running time...''' |
111 | 111 | result.append(front.pop(0) if front[0] <= back[0] else back.pop(0)) |
112 | 112 | # add the remaining end |
113 | 113 | result.extend(front or back) |