Index: trunk/tools/editor_trends/manage.py |
— | — | @@ -79,17 +79,19 @@ |
80 | 80 | return project |
81 | 81 | |
82 | 82 | |
83 | | -def generate_wikidump_filename(args): |
84 | | - return '%s-%s-%s' % (retrieve_projectname(args), 'latest', get_value(args, 'file')) |
| 83 | +def generate_wikidump_filename(project, args): |
| 84 | + return '%s-%s-%s' % (project, 'latest', get_value(args, 'file')) |
85 | 85 | |
86 | 86 | |
87 | 87 | def determine_file_locations(args): |
88 | 88 | locations = {} |
89 | 89 | location = get_value(args, 'location') if get_value(args, 'location') != None else settings.XML_FILE_LOCATION |
90 | | - locations['language_code'] = retrieve_language(args) |
91 | | - locations['location'] = os.path.join(location, retrieve_language(args)) |
| 90 | + project = retrieve_project(args) |
| 91 | + language_code = retrieve_language(args) |
| 92 | + locations['language_code'] = language_code |
| 93 | + locations['location'] = os.path.join(location, language_code, project) |
92 | 94 | locations['project'] = retrieve_projectname(args) |
93 | | - locations['filename'] = generate_wikidump_filename(args) |
| 95 | + locations['filename'] = generate_wikidump_filename(project, args) |
94 | 96 | return locations |
95 | 97 | |
96 | 98 | |
— | — | @@ -189,6 +191,12 @@ |
190 | 192 | except UnicodeEncodeError: |
191 | 193 | print '%s' % language |
192 | 194 | |
| 195 | + |
| 196 | +def detect_python_version(): |
| 197 | + version = ''.join(sys.version_info[0:2]) |
| 198 | + if version < settings.MINIMUM_PYTHON_VERSION: |
| 199 | + raise 'Please upgrade to Python 2.6 or higher (but not Python 3.x).' |
| 200 | + |
193 | 201 | def about(): |
194 | 202 | print 'Editor Trends Software is (c) 2010 by the Wikimedia Foundation.' |
195 | 203 | print 'Written by Diederik van Liere (dvanliere@gmail.com).' |
— | — | @@ -253,6 +261,7 @@ |
254 | 262 | parser.add_argument('-prog', '--progress', action='store_true', default=True, |
255 | 263 | help='Indicate whether you want to have a progressbar.') |
256 | 264 | |
| 265 | + detect_python_version() |
257 | 266 | args = parser.parse_args() |
258 | 267 | config.load_configuration(args) |
259 | 268 | locations = determine_file_locations(args) |
Index: trunk/tools/editor_trends/map_wiki_editors.py |
— | — | @@ -88,20 +88,22 @@ |
89 | 89 | return - 1 |
90 | 90 | |
91 | 91 | |
92 | | -def output_editor_information(elem, data_queue, **kwargs): |
| 92 | +def output_editor_information(elem, output, **kwargs): |
93 | 93 | ''' |
94 | 94 | @elem is an XML element containing 1 revision from a page |
95 | | - @data_queue is where to store the data |
| 95 | + @output is where to store the data, either a queue or a filehandle |
96 | 96 | @**kwargs contains extra information |
97 | 97 | |
98 | 98 | the variable tags determines which attributes are being parsed, the values in |
99 | 99 | this dictionary are the functions used to extract the data. |
100 | 100 | ''' |
101 | | - tags = {'contributor': {'editor': extract_contributor_id, 'bot': determine_username_is_bot}, |
| 101 | + tags = {'contributor': {'editor': extract_contributor_id, |
| 102 | + 'bot': determine_username_is_bot}, |
102 | 103 | 'timestamp': {'date': xml.extract_text}, |
103 | 104 | } |
104 | 105 | vars = {} |
105 | | - |
| 106 | + headers = ['editor', 'date', 'article'] |
| 107 | + destination = kwargs.pop('destination') |
106 | 108 | revisions = elem.findall('revision') |
107 | 109 | for revision in revisions: |
108 | 110 | vars['article'] = elem.find('id').text.decode(settings.ENCODING) |
— | — | @@ -114,12 +116,19 @@ |
115 | 117 | #print '%s\t%s\t%s\t%s\t' % (vars['article'], vars['contributor'], vars['timestamp'], vars['bot']) |
116 | 118 | if vars['bot'] == 0 and vars['editor'] != -1 and vars['editor'] != None: |
117 | 119 | vars.pop('bot') |
118 | | - vars['date'] = utils.convert_timestamp_to_date(vars['date']) |
119 | | - data_queue.put(vars) |
| 120 | + if destination == 'queue': |
| 121 | + output.put(vars) |
| 122 | + vars['date'] = utils.convert_timestamp_to_date(vars['date']) |
| 123 | + elif destination == 'file': |
| 124 | + data =[] |
| 125 | + for head in headers: |
| 126 | + data.append(vars[head]) |
| 127 | + utils.write_list_to_csv(data, output) |
| 128 | + output.write('\n') |
120 | 129 | vars = {} |
121 | 130 | |
122 | 131 | |
123 | | -def parse_editors(xml_queue, data_queue, pbar, bots, **kwargs): |
| 132 | +def parse_editors(xml_queue, output, pbar, bots, **kwargs): |
124 | 133 | ''' |
125 | 134 | @xml_queue contains the filenames of the files to be parsed |
126 | 135 | @data_queue is an instance of Queue where the extracted data is stored for |
— | — | @@ -130,8 +139,10 @@ |
131 | 140 | |
132 | 141 | Output is the data_queue that will be used by store_editors() |
133 | 142 | ''' |
134 | | - file_location = os.path.join(settings.XML_FILE_LOCATION, kwargs.get('language', 'en')) |
135 | | - debug = kwargs.get('debug', None) |
| 143 | + file_location = os.path.join(settings.XML_FILE_LOCATION, kwargs.get('language', 'en'), kwargs.get('project', 'wiki')) |
| 144 | + debug = kwargs.get('debug', False) |
| 145 | + destination = kwargs.get('destination', 'file') |
| 146 | + |
136 | 147 | if settings.DEBUG: |
137 | 148 | messages = {} |
138 | 149 | vars = {} |
— | — | @@ -145,9 +156,13 @@ |
146 | 157 | if file == None: |
147 | 158 | print 'Swallowed a poison pill' |
148 | 159 | break |
| 160 | + |
149 | 161 | data = xml.read_input(utils.create_txt_filehandle(file_location, |
150 | 162 | file, 'r', |
151 | 163 | encoding=settings.ENCODING)) |
| 164 | + if destination == 'file': |
| 165 | + name = file[:-4] + '.txt' |
| 166 | + output = utils.create_txt_filehandle(file_location, name, 'w', settings.ENCODING) |
152 | 167 | for raw_data in data: |
153 | 168 | xml_buffer = cStringIO.StringIO() |
154 | 169 | raw_data.insert(0, '<?xml version="1.0" encoding="UTF-8" ?>\n') |
— | — | @@ -156,7 +171,7 @@ |
157 | 172 | raw_data = ''.join(raw_data) |
158 | 173 | xml_buffer.write(raw_data) |
159 | 174 | elem = cElementTree.XML(xml_buffer.getvalue()) |
160 | | - output_editor_information(elem, data_queue, bots=bots) |
| 175 | + output_editor_information(elem, output, bots=bots, destination=destination) |
161 | 176 | except SyntaxError, error: |
162 | 177 | print error |
163 | 178 | ''' |
— | — | @@ -176,26 +191,30 @@ |
177 | 192 | print file, error |
178 | 193 | print raw_data[:12] |
179 | 194 | print 'String was supposed to be %s characters long' % sum([len(raw) for raw in raw_data]) |
| 195 | + if destination == 'queue': |
| 196 | + output.put('NEXT') |
| 197 | + while True: |
| 198 | + if output.qsize() < 100000: |
| 199 | + break |
| 200 | + else: |
| 201 | + time.sleep(10) |
| 202 | + print 'Still sleeping, queue is %s items long' % output.qsize() |
180 | 203 | |
181 | | - data_queue.put('NEXT') |
| 204 | + else: |
| 205 | + output.close() |
| 206 | + |
182 | 207 | if pbar: |
183 | | - print file, xml_queue.qsize(), data_queue.qsize() |
| 208 | + print file, xml_queue.qsize() |
184 | 209 | #utils.update_progressbar(pbar, xml_queue) |
| 210 | + |
185 | 211 | if debug: |
186 | 212 | break |
187 | | - |
188 | | - while True: |
189 | | - if data_queue.qsize() < 100000: |
190 | | - break |
191 | | - else: |
192 | | - time.sleep(10) |
193 | | - print 'Still sleeping, queue is %s items long' % data_queue.qsize() |
194 | | - |
| 213 | + |
195 | 214 | except Empty: |
196 | 215 | break |
197 | 216 | |
198 | | - #for x in xrange(4): |
199 | | - data_queue.put(None) |
| 217 | + if destination == 'queue': |
| 218 | + data_queue.put(None) |
200 | 219 | |
201 | 220 | if settings.DEBUG: |
202 | 221 | utils.report_error_messages(messages, parse_editors) |
— | — | @@ -263,9 +282,9 @@ |
264 | 283 | cache[c] = {} |
265 | 284 | editor_cache.add('NEXT', '') |
266 | 285 | cache = {} |
267 | | - |
268 | 286 | |
269 | 287 | |
| 288 | + |
270 | 289 | def load_bot_ids(): |
271 | 290 | ''' |
272 | 291 | Loader function to retrieve list of id's of known Wikipedia bots. |
— | — | @@ -279,17 +298,20 @@ |
280 | 299 | return ids |
281 | 300 | |
282 | 301 | |
283 | | -def run_parse_editors(dbname, language, location): |
| 302 | +def run_parse_editors(location, language, project): |
284 | 303 | ids = load_bot_ids() |
285 | 304 | kwargs = {'bots': ids, |
286 | | - 'dbname': dbname, |
| 305 | + 'dbname': language + project, |
| 306 | + 'language': language, |
| 307 | + 'project': project, |
287 | 308 | 'pbar': True, |
288 | | - 'nr_input_processors': 2, |
289 | | - 'nr_output_processors': 2, |
290 | | - 'language': language, |
| 309 | + 'destination': 'file', |
| 310 | + 'nr_input_processors': settings.NUMBER_OF_PROCESSES, |
| 311 | + 'nr_output_processors': settings.NUMBER_OF_PROCESSES, |
291 | 312 | } |
292 | 313 | chunks = {} |
293 | | - files = utils.retrieve_file_list(location, 'xml') |
| 314 | + source = os.path.join(location, language, project) |
| 315 | + files = utils.retrieve_file_list(source, 'xml') |
294 | 316 | parts = int(round(float(len(files)) / settings.NUMBER_OF_PROCESSES, 0)) |
295 | 317 | a = 0 |
296 | 318 | for x in xrange(settings.NUMBER_OF_PROCESSES): |
— | — | @@ -297,18 +319,18 @@ |
298 | 320 | chunks[x] = files[a:b] |
299 | 321 | a = (x + 1) * parts |
300 | 322 | |
301 | | - pc.build_scaffolding(pc.load_queue, parse_editors, chunks, store_editors, True, **kwargs) |
302 | | - search_cache_for_missed_editors(dbname) |
| 323 | + pc.build_scaffolding(pc.load_queue, parse_editors, chunks, False, False, **kwargs) |
| 324 | + #search_cache_for_missed_editors(dbname) |
303 | 325 | |
304 | 326 | |
305 | 327 | def debug_parse_editors(dbname): |
306 | 328 | q = JoinableQueue() |
307 | | - parse_editors('en\\522.xml', q, None, None, True) |
| 329 | + parse_editors('522.xml', q, None, None, debug=True, destination='file') |
308 | 330 | store_editors(q, [], dbname) |
309 | | - search_cache_for_missed_editors(dbname) |
| 331 | + #search_cache_for_missed_editors(dbname) |
310 | 332 | |
311 | 333 | |
312 | 334 | if __name__ == "__main__": |
313 | | - #debug_parse_editors('test') |
314 | | - run_parse_editors('test', 'en') |
| 335 | + #debug_parse_editors('test2') |
| 336 | + run_parse_editors(settings.XML_FILE_LOCATION, 'en', 'wiki') |
315 | 337 | pass |
Index: trunk/tools/editor_trends/settings.py |
— | — | @@ -41,6 +41,7 @@ |
42 | 42 | IGNORE_DIRS = ['wikistats', 'zips'] |
43 | 43 | ROOT = '/' if OS != 'Windows' else 'c:\\' |
44 | 44 | |
| 45 | +MINIMUM_PYTHON_VERSION = 2.6 |
45 | 46 | |
46 | 47 | dirs = [name for name in os.listdir(WORKING_DIRECTORY) if |
47 | 48 | os.path.isdir(os.path.join(WORKING_DIRECTORY, name))] |
Index: trunk/tools/editor_trends/utils/utils.py |
— | — | @@ -132,6 +132,11 @@ |
133 | 133 | |
134 | 134 | # read / write data related functions |
135 | 135 | def read_data_from_csv(filename, encoding): |
| 136 | + ''' |
| 137 | + @filename is the path (either absolute or relative) including the name of |
| 138 | + of the file |
| 139 | + @encoding is usually utf-8 |
| 140 | + ''' |
136 | 141 | if hasattr(filename, '__call__'): |
137 | 142 | filename = construct_filename(filename) |
138 | 143 | |
— | — | @@ -156,6 +161,10 @@ |
157 | 162 | |
158 | 163 | |
159 | 164 | def determine_file_mode(extension): |
| 165 | + ''' |
| 166 | + Checks if a given extension is an ASCII extension or not. The settings file |
| 167 | + provides known ASCII extensions. |
| 168 | + ''' |
160 | 169 | if extension in settings.ASCII: |
161 | 170 | return 'w' |
162 | 171 | else: |
— | — | @@ -163,15 +172,30 @@ |
164 | 173 | |
165 | 174 | |
166 | 175 | def write_list_to_csv(data, fh, recursive=False): |
| 176 | + ''' |
| 177 | + @data is a list which can contain other lists that will be written as a |
| 178 | + single line to a textfile |
| 179 | + @fh is a handle to an open text |
| 180 | + |
| 181 | + The calling function is responsible for: |
| 182 | + 1) writing a newline |
| 183 | + 2) closing the filehandle |
| 184 | + ''' |
| 185 | + tab = False |
167 | 186 | if recursive: |
168 | 187 | recursive = False |
169 | | - for d in data: |
| 188 | + for x, d in enumerate(data): |
| 189 | + if tab: |
| 190 | + fh.write('\t') |
170 | 191 | if type(d) == type([]): |
171 | 192 | recursive = write_list_to_csv(d, fh, True) |
172 | 193 | else: |
173 | | - fh.write('%s\t' % d) |
| 194 | + fh.write('%s' % d) |
| 195 | + tab = True |
174 | 196 | if recursive: |
| 197 | + tab = False |
175 | 198 | return True |
| 199 | + fh.write('\n') |
176 | 200 | |
177 | 201 | |
178 | 202 | def write_dict_to_csv(data, fh): |
— | — | @@ -267,31 +291,37 @@ |
268 | 292 | |
269 | 293 | |
270 | 294 | def create_dict_from_csv_file(filename, encoding): |
| 295 | + ''' |
| 296 | + Constructs a dictionary from a txtfile |
| 297 | + ''' |
271 | 298 | d = {} |
272 | 299 | for line in read_data_from_csv(filename, encoding): |
273 | 300 | line = clean_string(line) |
274 | 301 | value, key = line.split('\t') |
275 | 302 | d[key] = value |
276 | | - |
277 | 303 | return d |
278 | 304 | |
279 | 305 | |
280 | | -def retrieve_file_list(location, extension, mask=''): |
| 306 | +def retrieve_file_list(location, extension, mask=None): |
281 | 307 | ''' |
282 | 308 | Retrieve a list of files from a specified location. |
283 | 309 | @location: either an absolute or relative path |
284 | 310 | @extension: only include files with extension (optional) |
285 | | - @mask: only include files that start with mask (optional) |
| 311 | + @mask: only include files that start with mask (optional), this is |
| 312 | + interpreted as a regular expression. |
286 | 313 | |
287 | 314 | @return: a list of files matching the criteria |
288 | 315 | ''' |
| 316 | + if mask: |
| 317 | + mask = re.compile(mask) |
| 318 | + else: |
| 319 | + mask = re.compile('[\w\d*]') |
289 | 320 | all_files = os.listdir(location) |
290 | | - if not extension.startswith('.'): |
291 | | - extension = '.' + extension |
292 | 321 | files = [] |
293 | 322 | for file in all_files: |
294 | | - if file.startswith(mask) and file.endswith(extension): |
295 | | - files.append(file) |
| 323 | + file = file.split('.') |
| 324 | + if re.match(mask, file[0]) and file[1].endswith(extension): |
| 325 | + files.append('.'.join(file)) |
296 | 326 | return files |
297 | 327 | |
298 | 328 | |
Index: trunk/tools/editor_trends/utils/process_constructor.py |
— | — | @@ -57,6 +57,7 @@ |
58 | 58 | nr_output_processors = kwargs.pop('nr_output_processors') |
59 | 59 | input_queues = {} |
60 | 60 | result_queues = {} |
| 61 | + |
61 | 62 | #assert len(obj) == nr_input_processors |
62 | 63 | #if result_queue: |
63 | 64 | # assert len(obj)== nr_output_processors |
Index: trunk/tools/editor_trends/utils/sort.py |
— | — | @@ -0,0 +1,119 @@ |
| 2 | +#!/usr/bin/python |
| 3 | +# -*- coding: utf-8 -*- |
| 4 | + |
| 5 | +''' |
| 6 | +Copyright (C) 2010 by Diederik van Liere (dvanliere@gmail.com) |
| 7 | +This program is free software; you can redistribute it and/or |
| 8 | +modify it under the terms of the GNU General Public License version 2 |
| 9 | +as published by the Free Software Foundation. |
| 10 | +This program is distributed in the hope that it will be useful, |
| 11 | +but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
| 13 | +See the GNU General Public License for more details, at |
| 14 | +http://www.fsf.org/licenses/gpl.html |
| 15 | +''' |
| 16 | + |
| 17 | +__author__ = '''\n'''.join(['Diederik van Liere (dvanliere@gmail.com)', ]) |
| 18 | +__author__email = 'dvanliere at gmail dot com' |
| 19 | +__date__ = '2010-11-07' |
| 20 | +__version__ = '0.1' |
| 21 | + |
| 22 | +''' |
| 23 | +This module provides a small number of sorting algorithms including mergesort, |
| 24 | +external mergesort and quicksort. By presorting the data, considerably |
| 25 | +efficiency gains can be realized when inserting the data in MongoDB. |
| 26 | +''' |
| 27 | + |
| 28 | +import heapq |
| 29 | + |
| 30 | +import settings |
| 31 | +import utils |
| 32 | + |
| 33 | +def quick_sort(obs): |
| 34 | + if obs == []: |
| 35 | + return [] |
| 36 | + else: |
| 37 | + pivot = obs[0] |
| 38 | + lesser = quick_sort([x for x in obs[1:] if x < pivot]) |
| 39 | + greater = quick_sort([x for x in obs[1:] if x >= pivot]) |
| 40 | + return lesser + [pivot] + greater |
| 41 | + |
| 42 | +def mergesort(n): |
| 43 | + """Recursively merge sort a list. Returns the sorted list.""" |
| 44 | + front = n[:len(n) / 2] |
| 45 | + back = n[len(n) / 2:] |
| 46 | + |
| 47 | + if len(front) > 1: |
| 48 | + front = mergesort(front) |
| 49 | + if len(back) > 1: |
| 50 | + back = mergesort(back) |
| 51 | + |
| 52 | + return merge(front, back) |
| 53 | + |
| 54 | + |
| 55 | +def merge(front, back): |
| 56 | + """Merge two sorted lists together. Returns the merged list.""" |
| 57 | + result = [] |
| 58 | + while front and back: |
| 59 | + # pick the smaller one from the front and stick it on |
| 60 | + # note that list.pop(0) is a linear operation, so this gives quadratic running time... |
| 61 | + result.append(front.pop(0) if front[0] <= back[0] else back.pop(0)) |
| 62 | + # add the remaining end |
| 63 | + result.extend(front or back) |
| 64 | + return result |
| 65 | + |
| 66 | + |
| 67 | +def readline(file): |
| 68 | + for line in file: |
| 69 | + if line == '': |
| 70 | + continue |
| 71 | + else: |
| 72 | + line = line.replace('\n', '') |
| 73 | + line = line.split('\t') |
| 74 | + yield line |
| 75 | + |
| 76 | + |
| 77 | +def merge_sorted_files(output, files): |
| 78 | + output = utils.create_txt_filehandle(output, 'merged.txt', 'w', settings.ENCODING) |
| 79 | + lines = 0 |
| 80 | + for line in heapq.merge(*[readline(file) for file in files]): |
| 81 | + output.write(line) |
| 82 | + lines += 1 |
| 83 | + output.close() |
| 84 | + return lines |
| 85 | + |
| 86 | + |
| 87 | +def write_sorted_file(sorted_data, file, output): |
| 88 | + file = file.split('.') |
| 89 | + file[0] = file[0] + '_sorted' |
| 90 | + file = '.'.join(file) |
| 91 | + fh = utils.create_txt_filehandle(output, file, 'w', settings.ENCODING) |
| 92 | + utils.write_list_to_csv(sorted_data, fh) |
| 93 | + fh.close() |
| 94 | + |
| 95 | + |
| 96 | +def debug_merge_sorted_files(input, output): |
| 97 | + files = utils.retrieve_file_list(input, 'txt', mask='') |
| 98 | + filehandles = [utils.create_txt_filehandle(input, file, 'r', settings.ENCODING) for file in files] |
| 99 | + lines = merge_sorted_files(output, filehandles) |
| 100 | + filehandles = [fh.close() for fh in filehandles] |
| 101 | + print lines |
| 102 | + |
| 103 | + |
| 104 | +def debug_mergesort(input, output): |
| 105 | + files = utils.retrieve_file_list(input, 'txt', mask='((?!_sorted)\d)') |
| 106 | + for file in files: |
| 107 | + fh = utils.create_txt_filehandle(input, file, 'r', settings.ENCODING) |
| 108 | + data = fh.readlines() |
| 109 | + fh.close() |
| 110 | + data = [d.replace('\n', '') for d in data] |
| 111 | + data = [d.split('\t') for d in data] |
| 112 | + sorted_data = mergesort(data) |
| 113 | + write_sorted_file(sorted_data, file, output) |
| 114 | + |
| 115 | + |
| 116 | +if __name__ == '__main__': |
| 117 | + input = os.path.join(settings.XML_FILE_LOCATION, 'en', 'wiki') |
| 118 | + output = os.path.join(settings.XML_FILE_LOCATION, 'en', 'wiki', 'sorted') |
| 119 | + debug_mergesort(input, output) |
| 120 | + #debug_merge_sorted_files(input, output) |
Property changes on: trunk/tools/editor_trends/utils/sort.py |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 121 | + native |
Index: trunk/tools/editor_trends/construct_datasets.py |
— | — | @@ -126,11 +126,11 @@ |
127 | 127 | pc.build_scaffolding(pc.load_queue, retrieve_edits_by_contributor, 'contributors') |
128 | 128 | |
129 | 129 | |
130 | | -def debug_retrieve_edits_by_contributor_launcher(): |
| 130 | +def debug_retrieve_edits_by_contributor_launcher(dbname): |
131 | 131 | kwargs = {'debug': False, |
132 | | - 'dbname': 'enwiki', |
| 132 | + 'dbname': dbname, |
133 | 133 | } |
134 | | - ids = retrieve_editor_ids_mongo('enwiki', 'editors') |
| 134 | + ids = retrieve_editor_ids_mongo(dbname, 'editors') |
135 | 135 | input_queue = pc.load_queue(ids) |
136 | 136 | q = Queue() |
137 | 137 | generate_editor_dataset(input_queue, q, False, kwargs) |
— | — | @@ -159,7 +159,6 @@ |
160 | 160 | def generate_editor_dataset_debug(dbname): |
161 | 161 | ids = retrieve_editor_ids_mongo(dbname, 'editors') |
162 | 162 | input_queue = pc.load_queue(ids) |
163 | | - #write_dataset(input_queue, [], 'enwiki') |
164 | 163 | kwargs = {'nr_input_processors': 1, |
165 | 164 | 'nr_output_processors': 1, |
166 | 165 | 'debug': True, |
Index: trunk/tools/editor_trends/database/cache.py |
— | — | @@ -86,25 +86,10 @@ |
87 | 87 | |
88 | 88 | if self.editors[key]['obs'] == self.treshold: |
89 | 89 | self.treshold_editors.add(key) |
90 | | -# self.update(key, self.editors[key]['edits']) |
91 | | -# del self.editors[key] |
92 | | -# self.n -= 10 |
93 | | -# self.number_editors -= 1 |
94 | 90 | |
95 | 91 | def update(self, editor, values): |
96 | | - #t = datetime.datetime.now() |
97 | 92 | self.collection.update({'editor': editor}, {'$pushAll': {'edits': values}}, upsert=True) |
98 | | - #print 'It took %s to store editor %s;and the cache contains %s editors and %s items' % (datetime.datetime.now() - t, editor, self.number_editors, self.n) |
99 | 93 | |
100 | | - def quick_sort(self, obs): |
101 | | - if obs == []: |
102 | | - return [] |
103 | | - else: |
104 | | - pivot = obs[0] |
105 | | - lesser = self.quick_sort([x for x in obs[1:] if x < pivot]) |
106 | | - greater = self.quick_sort([x for x in obs[1:] if x >= pivot]) |
107 | | - return lesser + [pivot] + greater |
108 | | - |
109 | 94 | def store(self): |
110 | 95 | utils.store_object(self, settings.BINARY_OBJECT_FILE_LOCATION, self.__repr__()) |
111 | 96 | |