Index: trunk/tools/editor_trends/manage.py |
— | — | @@ -203,7 +203,7 @@ |
204 | 204 | valid_storage = ['mongo', 'cassandra'] |
205 | 205 | working_directory = raw_input('Please indicate where you installed Wikilytics.\nCurrent location is %s\nPress Enter to accept default.\n' % os.getcwd()) |
206 | 206 | input_location = raw_input('Please indicate where the Wikipedia dump files are or will be located.\nDefault is: %s\nPress Enter to accept default.\n' % rts.input_location) |
207 | | - output_location = raw_input('Please indicate where to store all Wikilytics project files.\nDefault is: %s\nPress Enter to accept default.\n' % rts.output_location) |
| 207 | + base_location = raw_input('Please indicate where to store all Wikilytics project files.\nDefault is: %s\nPress Enter to accept default.\n' % rts.base_location) |
208 | 208 | |
209 | 209 | while db not in valid_storage: |
210 | 210 | db = raw_input('Please indicate what database you are using for storage. \nDefault is: Mongo\n') |
— | — | @@ -224,14 +224,14 @@ |
225 | 225 | language = language if language in rts.project.valid_languages else rts.language.default |
226 | 226 | |
227 | 227 | input_location = input_location if len(input_location) > 0 else rts.input_location |
228 | | - output_location = output_location if len(output_location) > 0 else rts.output_location |
| 228 | + base_location = base_location if len(base_location) > 0 else rts.base_location |
229 | 229 | working_directory = working_directory if len(working_directory) > 0 else os.getcwd() |
230 | 230 | |
231 | 231 | config = ConfigParser.RawConfigParser() |
232 | 232 | config.add_section('file_locations') |
233 | 233 | config.set('file_locations', 'working_directory', working_directory) |
234 | 234 | config.set('file_locations', 'input_location', input_location) |
235 | | - config.set('file_locations', 'output_location', output_location) |
| 235 | + config.set('file_locations', 'base_location', base_location) |
236 | 236 | config.add_section('wiki') |
237 | 237 | config.set('wiki', 'project', project) |
238 | 238 | config.set('wiki', 'language', language) |
— | — | @@ -246,7 +246,7 @@ |
247 | 247 | config_launcher, |
248 | 248 | working_directory=working_directory, |
249 | 249 | input_location=input_location, |
250 | | - output_location=output_location, |
| 250 | + base_location=base_location, |
251 | 251 | project=project, |
252 | 252 | language=language,) |
253 | 253 | |
Index: trunk/tools/editor_trends/etl/enricher.py |
— | — | @@ -22,9 +22,10 @@ |
23 | 23 | import hashlib |
24 | 24 | import codecs |
25 | 25 | import sys |
| 26 | +import itertools |
26 | 27 | import datetime |
27 | 28 | import progressbar |
28 | | -from multiprocessing import JoinableQueue, Process, cpu_count, current_process, RLock |
| 29 | +from multiprocessing import JoinableQueue, Process, cpu_count, RLock, Manager |
29 | 30 | from xml.etree.cElementTree import iterparse, dump |
30 | 31 | from collections import deque |
31 | 32 | |
— | — | @@ -77,33 +78,50 @@ |
78 | 79 | 108:'Book', |
79 | 80 | } |
80 | 81 | |
81 | | -class Statistics: |
82 | | - def __init__(self, process_id): |
83 | | - self.process_id = process_id |
84 | | - self.count_articles = 0 |
85 | | - self.count_revisions = 0 |
86 | 82 | |
87 | | - def summary(self): |
88 | | - print 'Worker %s: Number of articles: %s' % (self.process_id, self.count_articles) |
89 | | - print 'Worker %s: Number of revisions: %s' % (self.process_id, self.count_revisions) |
| 83 | +class CustomLock: |
| 84 | + def __init__(self, lock, open_handles): |
| 85 | + self.lock = lock |
| 86 | + self.open_handles = open_handles |
90 | 87 | |
| 88 | + def available(self, handle): |
| 89 | + self.lock.acquire() |
| 90 | + try: |
| 91 | + self.open_handles.index(handle) |
| 92 | + #print 'RETRIEVED FILEHANDLE %s' % handle |
| 93 | + return False |
| 94 | + except (ValueError, Exception), error: |
| 95 | + self.open_handles.append(handle) |
| 96 | + #print 'ADDED FILEHANDLE %s' % handle |
| 97 | + return True |
| 98 | + finally: |
| 99 | + #print 'FIles locked: %s' % len(self.open_handles) |
| 100 | + self.lock.release() |
91 | 101 | |
| 102 | + def release(self, handle): |
| 103 | + #print 'RELEASED FILEHANDLE %s' % handle |
| 104 | + self.open_handles.remove(handle) |
| 105 | + |
| 106 | + |
92 | 107 | class Buffer: |
93 | | - def __init__(self, storage, process_id, rts=None, locks=None): |
94 | | - self.storage = storage |
| 108 | + def __init__(self, process_id, rts, lock): |
| 109 | + self.rts = rts |
| 110 | + self.lock = lock |
95 | 111 | self.revisions = {} |
96 | 112 | self.comments = {} |
97 | | - self.titles = {} |
| 113 | + self.articles = {} |
98 | 114 | self.process_id = process_id |
| 115 | + self.count_articles = 0 |
| 116 | + self.count_revisions = 0 |
| 117 | + self.filehandles = [file_utils.create_txt_filehandle(self.rts.txt, |
| 118 | + file_id, 'a', 'utf-8') for file_id in xrange(self.rts.max_filehandles)] |
99 | 119 | self.keys = ['revision_id', 'article_id', 'id', 'username', 'namespace', |
100 | 120 | 'title', 'timestamp', 'hash', 'revert', 'bot', 'cur_size', |
101 | 121 | 'delta'] |
102 | | - self.stats = Statistics(self.process_id) |
103 | | - if locks != None: |
104 | | - self.rts = rts |
105 | | - self.lock1 = locks[0] #lock for generic data |
106 | | - self.lock2 = locks[1] #lock for comment data |
107 | | - self.lock3 = locks[2] #lock for article titles |
| 122 | + self.fh_articles = file_utils.create_txt_filehandle(self.rts.txt, |
| 123 | + 'articles_%s' % self.process_id, 'w', 'utf-8') |
| 124 | + self.fh_comments = file_utils.create_txt_filehandle(self.rts.txt, |
| 125 | + 'comments_%s' % self.process_id, 'w', 'utf-8') |
108 | 126 | |
109 | 127 | def get_hash(self, id): |
110 | 128 | ''' |
— | — | @@ -116,19 +134,38 @@ |
117 | 135 | except ValueError: |
118 | 136 | return sum([ord(i) for i in id]) % self.rts.max_filehandles |
119 | 137 | |
120 | | - def group_observations(self, revisions): |
| 138 | + def invert_dictionary(self, editors): |
| 139 | + hashes = {} |
| 140 | + for editor, file_id in editors.iteritems(): |
| 141 | + hashes.setdefault(file_id, []) |
| 142 | + hashes[file_id].append(editor) |
| 143 | + return hashes |
| 144 | + |
| 145 | + def group_revisions_by_fileid(self, revisions): |
121 | 146 | ''' |
122 | | - This function groups observation by editor id, this way we have to make |
123 | | - fewer fileopening calls. |
| 147 | + This function groups observation by editor id and then by file_id, |
| 148 | + this way we have to make fewer file opening calls and should reduce |
| 149 | + processing time. |
124 | 150 | ''' |
125 | 151 | data = {} |
| 152 | + editors = {} |
| 153 | + #first, we group all revisions by editor |
126 | 154 | for revision in revisions: |
127 | 155 | id = revision[0] |
128 | 156 | if id not in data: |
129 | 157 | data[id] = [] |
| 158 | + editors[id] = self.get_hash(id) |
130 | 159 | data[id].append(revision) |
131 | | - self.revisions = data |
132 | 160 | |
| 161 | + #now, we are going to group all editors by file_id |
| 162 | + file_ids = self.invert_dictionary(editors) |
| 163 | + revisions = {} |
| 164 | + for editors in file_ids.values(): |
| 165 | + for editor in editors: |
| 166 | + revisions.setdefault(editor, []) |
| 167 | + revisions[editor].extend(data[editor]) |
| 168 | + self.revisions = revisions |
| 169 | + |
133 | 170 | def add(self, revision): |
134 | 171 | self.stringify(revision) |
135 | 172 | id = revision['revision_id'] |
— | — | @@ -136,8 +173,8 @@ |
137 | 174 | if len(self.revisions) > 10000: |
138 | 175 | #print '%s: Emptying buffer %s - buffer size %s' % (datetime.datetime.now(), self.id, len(self.revisions)) |
139 | 176 | self.store() |
140 | | - self.clear() |
141 | 177 | |
| 178 | + |
142 | 179 | def stringify(self, revision): |
143 | 180 | for key, value in revision.iteritems(): |
144 | 181 | try: |
— | — | @@ -146,14 +183,10 @@ |
147 | 184 | value = value.encode('utf-8') |
148 | 185 | revision[key] = value |
149 | 186 | |
150 | | - def empty(self): |
151 | | - self.store() |
152 | | - self.clear() |
153 | 187 | |
154 | | - def clear(self): |
155 | | - self.revisions = {} |
156 | | - self.comments = {} |
157 | | - self.titles = {} |
| 188 | + def summary(self): |
| 189 | + print 'Worker %s: Number of articles: %s' % (self.process_id, self.count_articles) |
| 190 | + print 'Worker %s: Number of revisions: %s' % (self.process_id, self.count_revisions) |
158 | 191 | |
159 | 192 | def store(self): |
160 | 193 | rows = [] |
— | — | @@ -162,71 +195,73 @@ |
163 | 196 | for key in self.keys: |
164 | 197 | values.append(revision[key].decode('utf-8')) |
165 | 198 | rows.append(values) |
166 | | - self.write_output(rows) |
| 199 | + self.write_revisions(rows) |
| 200 | + self.write_articles() |
| 201 | + self.write_comments() |
167 | 202 | |
168 | | - if self.comments: |
169 | | - self.lock2.acquire() |
| 203 | + |
| 204 | + def write_comments(self): |
| 205 | + rows = [] |
| 206 | + try: |
| 207 | + for revision_id, comment in self.comments.iteritems(): |
| 208 | + #comment = comment.decode('utf-8') |
| 209 | + #row = '\t'.join([revision_id, comment]) + '\n' |
| 210 | + rows.append([revision_id, comment]) |
| 211 | + file_utils.write_list_to_csv(rows, self.fh_comments) |
| 212 | + self.comments = {} |
| 213 | + except Exception, error: |
| 214 | + print '''Encountered the following error while writing comment data |
| 215 | + to %s: %s''' % (self.fh_comments, error) |
| 216 | + |
| 217 | + def write_articles(self): |
| 218 | + #t0 = datetime.datetime.now() |
| 219 | + if len(self.articles.keys()) > 10000: |
| 220 | + rows = [] |
170 | 221 | try: |
171 | | - fh = file_utils.create_txt_filehandle(self.rts.txt, |
172 | | - 'comments.csv', 'a', 'utf-8') |
173 | | - rows = [] |
174 | | - for revision_id, comment in self.comments.iteritems(): |
175 | | - #comment = comment.decode('utf-8') |
176 | | - #row = '\t'.join([revision_id, comment]) + '\n' |
177 | | - rows.append([revision_id, comment]) |
178 | | - file_utils.write_list_to_csv(row, fh) |
179 | | - except Exception, error: |
180 | | - print 'Encountered the following error while writing data to %s: %s' % (fh, error) |
181 | | - finally: |
182 | | - fh.close() |
183 | | - self.lock2.release() |
| 222 | + for article_id, data in self.articles.iteritems(): |
| 223 | + keys = data.keys() |
| 224 | + keys.insert(0, 'id') |
184 | 225 | |
185 | | - elif self.titles: |
186 | | - self.lock3.acquire() |
187 | | - try: |
188 | | - fh = file_utils.create_txt_filehandle(self.rts.txt, |
189 | | - 'titles.csv', 'a', 'utf-8') |
190 | | - rows = [] |
191 | | - for article_id, dict in self.titles.iteritems(): |
192 | | - keys = dict.keys() |
193 | | - value = [] |
194 | | - for key in keys: |
195 | | - value.append(key) |
196 | | - value.append(dict[key]) |
197 | | - value.insert(0, article_id) |
198 | | - value.insert(0, 'id') |
| 226 | + values = data.values() |
| 227 | + values.insert(0, article_id) |
| 228 | + |
| 229 | + row = zip(keys, values) |
| 230 | + row = list(itertools.chain(*row)) |
199 | 231 | #title = title.encode('ascii') |
200 | 232 | #row = '\t'.join([article_id, title]) + '\n' |
201 | | - rows.append(value) |
202 | | - file_utils.write_list_to_csv(rows, fh, newline=False) |
| 233 | + rows.append(row) |
| 234 | + file_utils.write_list_to_csv(rows, self.fh_articles, newline=False) |
| 235 | + self.articles = {} |
203 | 236 | except Exception, error: |
204 | | - print 'Encountered the following error while writing data to %s: %s' % (fh, error) |
205 | | - finally: |
206 | | - fh.close() |
207 | | - self.lock3.release() |
| 237 | + print '''Encountered the following error while writing article |
| 238 | + data to %s: %s''' % (self.fh_articles, error) |
| 239 | + #t1 = datetime.datetime.now() |
| 240 | + #print '%s articles took %s' % (len(self.articles.keys()), (t1 - t0)) |
208 | 241 | |
209 | | - |
210 | | - def write_output(self, data): |
211 | | - self.group_observations(data) |
212 | | - for editor in self.revisions: |
| 242 | + def write_revisions(self, data): |
| 243 | + #t0 = datetime.datetime.now() |
| 244 | + self.group_revisions_by_fileid(data) |
| 245 | + editors = self.revisions.keys() |
| 246 | + for editor in editors: |
213 | 247 | #lock the write around all edits of an editor for a particular page |
214 | | - self.lock1.acquire() |
215 | | - try: |
216 | | - for i, revision in enumerate(self.revisions[editor]): |
217 | | - if i == 0: |
218 | | - id = self.get_hash(revision[2]) |
219 | | - fh = file_utils.create_txt_filehandle(self.rts.txt, |
220 | | - '%s.csv' % id, 'a', 'utf-8') |
| 248 | + for i, revision in enumerate(self.revisions[editor]): |
| 249 | + if i == 0: |
| 250 | + file_id = self.get_hash(revision[2]) |
| 251 | + if self.lock.available(file_id): |
| 252 | + fh = self.filehandles[file_id] |
| 253 | + #print editor, file_id, fh |
| 254 | + else: |
| 255 | + break |
221 | 256 | try: |
222 | | - file_utils.write_list_to_csv(revision, fh, lock=self.lock1) |
| 257 | + file_utils.write_list_to_csv(revision, fh) |
| 258 | + self.lock.release(file_id) |
| 259 | + del self.revisions[editor] |
223 | 260 | except Exception, error: |
224 | | - print 'Encountered the following error while writing data to %s: %s' % (fh, error) |
225 | | - finally: |
226 | | - fh.close() |
227 | | - self.lock1.release() |
| 261 | + print '''Encountered the following error while writing |
| 262 | + revision data to %s: %s''' % (fh, error) |
| 263 | + #t1 = datetime.datetime.now() |
| 264 | + #print '%s revisions took %s' % (len(self.revisions), (t1 - t0)) |
228 | 265 | |
229 | | - |
230 | | - |
231 | 266 | def extract_categories(): |
232 | 267 | ''' |
233 | 268 | Field 1: page id |
— | — | @@ -559,15 +594,15 @@ |
560 | 595 | namespace = determine_namespace(title, namespaces, include_ns, EXCLUDE_NAMESPACE) |
561 | 596 | title_meta = parse_title_meta_data(title, namespace) |
562 | 597 | if namespace != False: |
563 | | - cache.stats.count_articles += 1 |
| 598 | + cache.count_articles += 1 |
564 | 599 | article_id = article['id'].text |
565 | 600 | article['id'].clear() |
566 | | - cache.titles[article_id] = title_meta |
| 601 | + cache.articles[article_id] = title_meta |
567 | 602 | hashes = deque() |
568 | 603 | size = {} |
569 | 604 | revisions = article['revisions'] |
570 | 605 | for revision in revisions: |
571 | | - cache.stats.count_revisions += 1 |
| 606 | + cache.count_revisions += 1 |
572 | 607 | if revision == None: |
573 | 608 | #the entire revision is empty, weird. |
574 | 609 | continue |
— | — | @@ -639,20 +674,23 @@ |
640 | 675 | article['namespaces'] = namespaces |
641 | 676 | id = False |
642 | 677 | #elif event == 'end' and ns == True: |
643 | | - # elem.clear() |
| 678 | + # elem.clear() |
644 | 679 | except SyntaxError, error: |
645 | 680 | print 'Encountered invalid XML tag. Error message: %s' % error |
646 | 681 | dump(elem) |
647 | 682 | sys.exit(-1) |
| 683 | + except IOError, error: |
| 684 | + print '''Archive file is possibly corrupted. Please delete this archive |
| 685 | + and retry downloading. Error message: %s''' % error |
| 686 | + sys.exit(-1) |
648 | 687 | |
649 | | - |
650 | | -def stream_raw_xml(input_queue, storage, process_id, function, dataset, locks, rts): |
| 688 | +def stream_raw_xml(input_queue, process_id, function, dataset, lock, rts): |
651 | 689 | bots = bot_detector.retrieve_bots(rts.language.code) |
652 | 690 | |
653 | 691 | t0 = datetime.datetime.now() |
654 | 692 | i = 0 |
655 | 693 | if dataset == 'training': |
656 | | - cache = Buffer(storage, process_id, rts, locks) |
| 694 | + cache = Buffer(process_id, rts, lock) |
657 | 695 | else: |
658 | 696 | counts = {} |
659 | 697 | |
— | — | @@ -682,8 +720,8 @@ |
683 | 721 | t0 = t1 |
684 | 722 | |
685 | 723 | if dataset == 'training': |
686 | | - cache.empty() |
687 | | - cache.stats.summary() |
| 724 | + cache.store() |
| 725 | + cache.summary() |
688 | 726 | else: |
689 | 727 | location = os.getcwd() |
690 | 728 | keys = counts.keys() |
— | — | @@ -698,21 +736,26 @@ |
699 | 737 | print 'Finished parsing Wikipedia dump files.' |
700 | 738 | |
701 | 739 | |
702 | | -def setup(storage, rts=None): |
| 740 | +def setup(rts): |
703 | 741 | ''' |
704 | 742 | Depending on the storage system selected (cassandra, csv or mongo) some |
705 | 743 | preparations are made including setting up namespaces and cleaning up old |
706 | 744 | files. |
707 | 745 | ''' |
708 | | - if storage == 'csv': |
709 | | - res = file_utils.delete_file(rts.txt, None, directory=True) |
710 | | - if res: |
711 | | - res = file_utils.create_directory(rts.txt) |
| 746 | + res = file_utils.delete_file(rts.txt, None, directory=True) |
| 747 | + if res: |
| 748 | + res = file_utils.create_directory(rts.txt) |
712 | 749 | |
713 | 750 | |
714 | | -def multiprocessor_launcher(function, dataset, storage, locks, rts): |
| 751 | +def multiprocessor_launcher(function, dataset, lock, rts): |
| 752 | + mgr = Manager() |
| 753 | + open_handles = [] |
| 754 | + open_handles = mgr.list(open_handles) |
| 755 | + clock = CustomLock(lock, open_handles) |
715 | 756 | input_queue = JoinableQueue() |
| 757 | + |
716 | 758 | files = file_utils.retrieve_file_list(rts.input_location) |
| 759 | + |
717 | 760 | if len(files) > cpu_count(): |
718 | 761 | processors = cpu_count() - 1 |
719 | 762 | else: |
— | — | @@ -727,27 +770,26 @@ |
728 | 771 | print 'Inserting poison pill %s...' % x |
729 | 772 | input_queue.put(None) |
730 | 773 | |
731 | | - extracters = [Process(target=stream_raw_xml, args=[input_queue, storage, |
| 774 | + extracters = [Process(target=stream_raw_xml, args=[input_queue, |
732 | 775 | process_id, function, |
733 | | - dataset, locks, rts]) |
| 776 | + dataset, clock, rts]) |
734 | 777 | for process_id in xrange(processors)] |
735 | 778 | for extracter in extracters: |
736 | 779 | extracter.start() |
737 | 780 | |
738 | 781 | input_queue.join() |
| 782 | + filehandles = [fh.close() for fh in filehandles] |
739 | 783 | |
740 | | - |
741 | 784 | def launcher_training(): |
742 | 785 | ''' |
743 | 786 | Launcher for creating training dataset for data competition |
744 | 787 | ''' |
745 | 788 | path = '/mnt/wikipedia_dumps/batch2/' |
746 | 789 | function = create_variables |
747 | | - storage = 'csv' |
748 | 790 | dataset = 'training' |
749 | 791 | rts = DummyRTS(path) |
750 | 792 | locks = [] |
751 | | - multiprocessor_launcher(function, dataset, storage, locks, rts) |
| 793 | + multiprocessor_launcher(function, dataset, locks, rts) |
752 | 794 | |
753 | 795 | |
754 | 796 | def launcher_prediction(): |
— | — | @@ -756,11 +798,10 @@ |
757 | 799 | ''' |
758 | 800 | path = '/mnt/wikipedia_dumps/batch1/' |
759 | 801 | function = count_edits |
760 | | - storage = 'csv' |
761 | 802 | dataset = 'prediction' |
762 | 803 | rts = DummyRTS(path) |
763 | 804 | locks = [] |
764 | | - multiprocessor_launcher(function, dataset, storage, locks, rts) |
| 805 | + multiprocessor_launcher(function, dataset, locks, rts) |
765 | 806 | |
766 | 807 | |
767 | 808 | def launcher(rts): |
— | — | @@ -769,14 +810,10 @@ |
770 | 811 | ''' |
771 | 812 | # launcher for creating regular mongo dataset |
772 | 813 | function = create_variables |
773 | | - storage = 'csv' |
774 | 814 | dataset = 'training' |
775 | | - lock1 = RLock() |
776 | | - lock2 = RLock() |
777 | | - lock3 = RLock() |
778 | | - locks = [lock1, lock2, lock3] |
779 | | - setup(storage, rts) |
780 | | - multiprocessor_launcher(function, dataset, storage, locks, rts) |
| 815 | + lock = RLock() |
| 816 | + setup(rts) |
| 817 | + multiprocessor_launcher(function, dataset, lock, rts) |
781 | 818 | |
782 | 819 | |
783 | 820 | if __name__ == '__main__': |
Index: trunk/tools/editor_trends/classes/settings.py |
— | — | @@ -61,7 +61,7 @@ |
62 | 62 | result = self.load_configuration() |
63 | 63 | if not result: |
64 | 64 | self.input_location = os.path.join(self.root, 'wikimedia') |
65 | | - self.output_location = os.path.join(self.root, 'wikimedia') |
| 65 | + self.base_location = os.path.join(self.root, 'wikimedia') |
66 | 66 | |
67 | 67 | #Date format as used by Erik Zachte |
68 | 68 | self.date_format = '%Y-%m-%d' |
— | — | @@ -79,11 +79,9 @@ |
80 | 80 | |
81 | 81 | self.wp_dump_location = 'http://dumps.wikimedia.org' |
82 | 82 | |
83 | | - |
84 | 83 | self.architecture = platform.machine() |
85 | 84 | self.tab_width = 4 if self.platform == 'Windows' else 8 |
86 | 85 | |
87 | | - |
88 | 86 | self.update_python_path() |
89 | 87 | |
90 | 88 | self.max_filehandles = self.determine_max_filehandles_open() |
— | — | @@ -105,7 +103,7 @@ |
106 | 104 | config.read(os.path.join(self.working_directory, 'wiki.cfg')) |
107 | 105 | self.working_directory = config.get('file_locations', 'working_directory') |
108 | 106 | self.input_location = config.get('file_locations', 'input_location') |
109 | | - self.output_location = config.get('file_locations', 'output_location') |
| 107 | + self.base_location = config.get('file_locations', 'base_location') |
110 | 108 | self.default_project = config.get('wiki', 'project') |
111 | 109 | self.default_language = config.get('wiki', 'language') |
112 | 110 | self.storage = config.get('storage', 'db') |
Index: trunk/tools/editor_trends/classes/runtime_settings.py |
— | — | @@ -62,7 +62,6 @@ |
63 | 63 | self.input_location = self.set_input_location() |
64 | 64 | self.output_location = self.set_output_location() |
65 | 65 | |
66 | | - |
67 | 66 | self.charts = self.determine_chart() |
68 | 67 | self.keywords = self.split_keywords() |
69 | 68 | self.namespaces = self.get_namespaces() |
— | — | @@ -149,19 +148,20 @@ |
150 | 149 | def set_input_location(self): |
151 | 150 | files = os.listdir(self.input_location) |
152 | 151 | extensions = ['.gz', '.7z', '.bz2'] |
| 152 | + project = '%s%s' % (self.language.code, self.project.name) |
153 | 153 | for file in files: |
154 | 154 | basename, ext = os.path.splitext(file) |
155 | | - if ext in extensions: |
| 155 | + if ext in extensions and file.startswith(project): |
156 | 156 | #ABS path case: check if files are stored here |
157 | 157 | return self.input_location |
158 | | - return os.path.join(self.input_location, self.language.code, |
| 158 | + return os.path.join(self.base_location, self.language.code, |
159 | 159 | self.project.name) |
160 | 160 | |
161 | 161 | def set_output_location(self): |
162 | 162 | ''' |
163 | 163 | Construct the full project location |
164 | 164 | ''' |
165 | | - return os.path.join(self.output_location, self.language.code, |
| 165 | + return os.path.join(self.base_location, self.language.code, |
166 | 166 | self.project.name) |
167 | 167 | |
168 | 168 | def show_settings(self): |
— | — | @@ -221,6 +221,7 @@ |
222 | 222 | default = lnc.languages[lnc.default] |
223 | 223 | if lang != default.name: |
224 | 224 | lang = lnc.get_language(lang, code=False) |
| 225 | + language = lang |
225 | 226 | return lang |
226 | 227 | else: |
227 | 228 | return default |
— | — | @@ -234,6 +235,7 @@ |
235 | 236 | if proj != 'wiki': |
236 | 237 | pc = projects.ProjectContainer() |
237 | 238 | proj = pc.get_project(proj) |
| 239 | + project = proj |
238 | 240 | return proj |
239 | 241 | else: |
240 | 242 | return default |
Index: trunk/tools/editor_trends/utils/file_utils.py |
— | — | @@ -234,7 +234,7 @@ |
235 | 235 | if hasattr(name, '__call__'): |
236 | 236 | return '%s%s' % (name.func_name, extension) |
237 | 237 | else: |
238 | | - return name |
| 238 | + return '%s%s' % (name, extension) |
239 | 239 | |
240 | 240 | |
241 | 241 | def delete_file(location, filename, directory=False): |