Index: trunk/tools/editor_trends/etl/enricher.py |
— | — | @@ -141,7 +141,7 @@ |
142 | 142 | hashes[file_id].append(editor) |
143 | 143 | return hashes |
144 | 144 | |
145 | | - def group_revisions_by_fileid(self, revisions): |
| 145 | + def group_revisions_by_fileid(self): |
146 | 146 | ''' |
147 | 147 | This function groups observation by editor id and then by file_id, |
148 | 148 | this way we have to make fewer file opening calls and should reduce |
— | — | @@ -151,7 +151,7 @@ |
152 | 152 | editors = {} |
153 | 153 | #first, we group all revisions by editor |
154 | 154 | |
155 | | - for revision in revisions: |
| 155 | + for revision in self.revisions: |
156 | 156 | row = [] |
157 | 157 | #strip away the keys and make sure that the values are always in the same sequence |
158 | 158 | for key in self.keys: |
— | — | @@ -163,12 +163,11 @@ |
164 | 164 | |
165 | 165 | #now, we are going to group all editors by file_id |
166 | 166 | file_ids = self.invert_dictionary(editors) |
167 | | - revisions = {} |
| 167 | + self.revisions = {} |
168 | 168 | for file_id, editors in file_ids: |
169 | 169 | for editor in editors: |
170 | | - revisions.setdefault(file_id, []) |
171 | | - revisions[file_id].extend(data[editor]) |
172 | | - return revisions |
| 170 | + self.revisions.setdefault(file_id, []) |
| 171 | + self.revisions[file_id].extend(data[editor]) |
173 | 172 | |
174 | 173 | def add(self, revision): |
175 | 174 | self.stringify(revision) |
— | — | @@ -240,7 +239,7 @@ |
241 | 240 | |
242 | 241 | def write_revisions(self): |
243 | 242 | #t0 = datetime.datetime.now() |
244 | | - revisions = self.group_revisions_by_fileid() |
| 243 | + self.group_revisions_by_fileid() |
245 | 244 | file_ids = self.revisions.keys() |
246 | 245 | while len(self.revisions.keys()) > 0: |
247 | 246 | print len(self.revisions.keys()) |