Index: trunk/tools/editor_trends/analyses/json_encoders.py |
— | — | @@ -19,6 +19,7 @@ |
20 | 20 | |
21 | 21 | import sys |
22 | 22 | import types |
| 23 | +import re |
23 | 24 | |
24 | 25 | if '..' not in sys.path: |
25 | 26 | sys.path.append('..') |
— | — | @@ -27,35 +28,46 @@ |
28 | 29 | from classes import exceptions |
29 | 30 | from utils import data_converter |
30 | 31 | |
| 32 | +HISTOGRAM = re.compile('histogram') |
| 33 | +#BAR = re.compile('bar') |
| 34 | +STACKED_BAR = re.compile('stacked_bar') |
| 35 | +RES = [HISTOGRAM, STACKED_BAR] |
31 | 36 | |
32 | | -def available_json_encoders(): |
| 37 | +def determine_chart_type(chart): |
| 38 | + for res in RES: |
| 39 | + match = re.findall(res, chart) |
| 40 | + if len(match) > 0: |
| 41 | + return match[0] |
| 42 | + #Bar charts is the default chart, |
| 43 | + return 'bar' |
| 44 | + |
| 45 | +def get_json_encoder(chart): |
| 46 | + chart = determine_chart_type(chart) |
33 | 47 | functions = globals() |
34 | | - d = {} |
35 | | - ignore = ['transform_to_json'] |
| 48 | + ignore = ['transform_to_json', 'available_charts', 'init_options'] |
36 | 49 | for func in functions: |
37 | | - if func.endswith('json') \ |
38 | | - and func not in ignore: |
39 | | - d[func] = func |
| 50 | + if func.endswith('json'): |
| 51 | + if func not in ignore: |
| 52 | + encoder = func.replace('to_', '') |
| 53 | + encoder = encoder.replace('_json', '') |
| 54 | + if encoder == chart: |
| 55 | + return func, chart, RES |
| 56 | + return None, chart, RES |
40 | 57 | |
41 | | - return d |
42 | 58 | |
43 | | - |
44 | 59 | def transform_to_json(ds): |
45 | 60 | analyses = analyzer.available_analyses() |
46 | | - json_encoders = available_json_encoders() |
47 | | - analysis = '%s_%s_%s' % ('transform_to', ds.encoder, 'json') |
48 | | - print analysis |
| 61 | + encoder = get_json_encoder(ds.chart) |
| 62 | + #analysis = '%s_%s_%s' % ('transform_to', ds.encoder, 'json') |
| 63 | + #print analysis |
49 | 64 | encoder = getattr(locals(), analysis, None) |
50 | 65 | if encoder == None: |
51 | 66 | encoder = to_bar_json |
52 | 67 | |
53 | 68 | data = encoder(ds) |
54 | | -# except Exception, e: |
55 | | -# print e |
56 | | -# raise exceptions.UnknownJSONEncoderError(analysis) |
57 | | - |
58 | 69 | return data |
59 | 70 | |
| 71 | + |
60 | 72 | def init_options(): |
61 | 73 | options = {} |
62 | 74 | options['xaxis'] = {} |
— | — | @@ -101,6 +113,10 @@ |
102 | 114 | return json |
103 | 115 | |
104 | 116 | |
| 117 | +def to_histogram_json(ds): |
| 118 | + pass |
| 119 | + |
| 120 | + |
105 | 121 | def to_stacked_bar_json(ds): |
106 | 122 | ''' |
107 | 123 | This function outputs data in a format that is understood by jquery |
Index: trunk/tools/editor_trends/analyses/adhoc/community_graph.py |
— | — | @@ -18,11 +18,11 @@ |
19 | 19 | __version__ = '0.1' |
20 | 20 | |
21 | 21 | import sys |
22 | | -sys.path.append('..') |
| 22 | +if '..' not in sys.path: |
| 23 | + sys.path.append('..') |
23 | 24 | |
24 | | -import configuration |
25 | | -settings = configuration.Settings() |
26 | | - |
| 25 | +from classes import settings |
| 26 | +settings = settings.Settings() |
27 | 27 | from database import db |
28 | 28 | from utils import file_utils |
29 | 29 | |
Index: trunk/tools/editor_trends/analyses/adhoc/file_size_reduction.py |
— | — | @@ -17,17 +17,20 @@ |
18 | 18 | __date__ = '2010-11-15' |
19 | 19 | __version__ = '0.1' |
20 | 20 | |
21 | | -import sys |
22 | | -sys.path.append('..') |
23 | 21 | |
24 | 22 | import os |
25 | 23 | import xml.etree.cElementTree as cElementTree |
26 | 24 | |
27 | | -import configuration |
| 25 | +if '..' not in sys.path: |
| 26 | + sys.path.append('..') |
| 27 | + |
| 28 | +from classes import settings |
| 29 | +settings = settings.Settings() |
| 30 | + |
28 | 31 | from utils import file_utils |
29 | | -settings = configuration.Settings() |
30 | 32 | |
31 | 33 | |
| 34 | + |
32 | 35 | class DumpStatistics(object): |
33 | 36 | ''' Simple class to keep track of XML tags, how often they occur, |
34 | 37 | and the length of strings they contain. This is used to calculate the |
Index: trunk/tools/editor_trends/analyses/adhoc/match_talkpage_article.py |
— | — | @@ -19,11 +19,11 @@ |
20 | 20 | |
21 | 21 | import sys |
22 | 22 | import os |
23 | | -sys.path.append('..') |
| 23 | +if '..' not in sys.path: |
| 24 | + sys.path.append('..') |
24 | 25 | |
25 | | -import configuration |
26 | | -settings = configuration.Settings() |
27 | | - |
| 26 | +from classes import settings |
| 27 | +settings = settings.Settings() |
28 | 28 | from etl import extracter |
29 | 29 | from utils import file_utils |
30 | 30 | import wikitree |
Index: trunk/tools/editor_trends/manage.py |
— | — | @@ -26,7 +26,6 @@ |
27 | 27 | from argparse import RawTextHelpFormatter |
28 | 28 | import ConfigParser |
29 | 29 | |
30 | | -#import configuration |
31 | 30 | from utils import file_utils |
32 | 31 | from utils import ordered_dict |
33 | 32 | from utils import log |
— | — | @@ -268,9 +267,9 @@ |
269 | 268 | |
270 | 269 | def about_statement(): |
271 | 270 | print '' |
272 | | - print 'Editor Trends Software is (c) 2010-2011 by the Wikimedia Foundation.' |
| 271 | + print 'Wikilytics is (c) 2010-2011 by the Wikimedia Foundation.' |
273 | 272 | print 'Written by Diederik van Liere (dvanliere@gmail.com).' |
274 | | - print '''This software comes with ABSOLUTELY NO WARRANTY.\nThis is |
| 273 | + print '''This software comes with ABSOLUTELY NO WARRANTY. This is |
275 | 274 | free software, and you are welcome to distribute it under certain |
276 | 275 | conditions.''' |
277 | 276 | print 'See the README.1ST file for more information.' |
— | — | @@ -281,7 +280,6 @@ |
282 | 281 | ''' |
283 | 282 | Entry point for parsing command line and launching the needed function(s). |
284 | 283 | ''' |
285 | | - #settings = configuration.Settings() |
286 | 284 | language = languages.init() |
287 | 285 | project = projects.init() |
288 | 286 | pjc = projects.ProjectContainer() |
Index: trunk/tools/editor_trends/wikitree/parser.py |
— | — | @@ -23,10 +23,11 @@ |
24 | 24 | import xml.etree.cElementTree as cElementTree |
25 | 25 | import sys |
26 | 26 | |
27 | | -sys.path.append('..') |
| 27 | +if '..' not in sys.path: |
| 28 | + sys.path.append('..') |
28 | 29 | |
29 | | -import configuration |
30 | | -settings = configuration.Settings() |
| 30 | +from classes import settings |
| 31 | +settings = settings.Settings() |
31 | 32 | from utils import file_utils |
32 | 33 | |
33 | 34 | def convert_html_entities(text): |
Index: trunk/tools/editor_trends/etl/store.py |
— | — | @@ -21,100 +21,100 @@ |
22 | 22 | import multiprocessing |
23 | 23 | import sys |
24 | 24 | import os |
| 25 | +import progressbar |
25 | 26 | |
26 | 27 | from utils import file_utils |
27 | 28 | from utils import text_utils |
28 | 29 | from database import cache |
| 30 | +from database import db |
| 31 | +from classes import consumers |
29 | 32 | from utils import messages |
30 | | -from database import db |
31 | 33 | |
32 | 34 | |
33 | | -def store_articles(rts): |
34 | | - location = os.path.join(rts.input_location, rts.language.code, rts.project.name) |
35 | | - fh = file_utils.create_txt_filehandle(location, 'articles.csv', 'r', rts.encoding) |
36 | | - headers = ['id', 'title'] |
37 | | - data = file_utils.read_unicode_text(fh) |
38 | | - fh.close() |
39 | 35 | |
40 | | - mongo = db.init_mongo_db(rts.dbname) |
41 | | - collection = mongo[rts.articles_raw] |
| 36 | +class Storer(consumers.BaseConsumer): |
| 37 | + def run(self): |
| 38 | + ''' |
| 39 | + This function is called by multiple consumers who each take a sorted |
| 40 | + file and create a cache object. If the number of edits made by an |
| 41 | + editor is above the treshold then the cache object stores the data in |
| 42 | + Mongo, else the data is discarded. |
| 43 | + The treshold is currently more than 9 edits and is not yet configurable. |
| 44 | + ''' |
| 45 | + mongo = db.init_mongo_db(self.rts.dbname) |
| 46 | + collection = mongo[self.rts.editors_raw] |
42 | 47 | |
43 | | - articles = {} |
44 | | - for x, d in enumerate(data): |
45 | | - d = d.split('\t') |
46 | | - x = str(x) |
47 | | - articles[x] = {} |
48 | | - for k, v in zip(headers, d): |
49 | | - articles[x][k] = v |
| 48 | + editor_cache = cache.EditorCache(collection) |
| 49 | + prev_contributor = -1 |
| 50 | + while True: |
| 51 | + try: |
| 52 | + filename = self.tasks.get(block=False) |
| 53 | + except Empty: |
| 54 | + break |
50 | 55 | |
51 | | - collection.insert(articles) |
| 56 | + self.tasks.task_done() |
| 57 | + if filename == None: |
| 58 | + self.result.put(None) |
| 59 | + break |
52 | 60 | |
| 61 | + fh = file_utils.create_txt_filehandle(self.rts.sorted, filename, |
| 62 | + 'r', self.rts.encoding) |
| 63 | + for line in file_utils.read_raw_data(fh): |
| 64 | + if len(line) > 1: |
| 65 | + contributor = line[0] |
| 66 | + #print 'Parsing %s' % contributor |
| 67 | + if prev_contributor != contributor and prev_contributor != -1: |
| 68 | + editor_cache.add(prev_contributor, 'NEXT') |
| 69 | + date = text_utils.convert_timestamp_to_datetime_utc(line[1]) |
| 70 | + article_id = int(line[2]) |
| 71 | + username = line[3].encode(self.rts.encoding) |
| 72 | + ns = int(line[4]) |
| 73 | + value = {'date': date, |
| 74 | + 'article': article_id, |
| 75 | + 'username': username, |
| 76 | + 'ns': ns} |
| 77 | + editor_cache.add(contributor, value) |
| 78 | + prev_contributor = contributor |
| 79 | + fh.close() |
| 80 | + self.result.put(True) |
53 | 81 | |
54 | | -def store_editors(tasks, rts): |
55 | | - ''' |
56 | | - This function is called by multiple consumers who each take a sorted file |
57 | | - and create a cache object. If the number of edits made by an editor is above |
58 | | - the treshold then the cache object stores the data in Mongo, else the data |
59 | | - is discarded. |
60 | | - The treshold is currently more than 9 edits and is not yet configurable. |
61 | | - ''' |
| 82 | + |
| 83 | +def store_articles(rts): |
62 | 84 | mongo = db.init_mongo_db(rts.dbname) |
63 | | - collection = mongo[rts.editors_raw] |
| 85 | + collection = mongo[rts.articles_raw] |
64 | 86 | |
65 | | - editor_cache = cache.EditorCache(collection) |
66 | | - prev_contributor = -1 |
67 | | - while True: |
68 | | - try: |
69 | | - filename = tasks.get(block=False) |
70 | | - except Empty: |
71 | | - break |
| 87 | + location = os.path.join(rts.input_location, rts.language.code, rts.project.name) |
| 88 | + fh = file_utils.create_txt_filehandle(location, 'articles.csv', 'r', rts.encoding) |
| 89 | + print 'Storing article titles...' |
| 90 | + for line in fh: |
| 91 | + line = line.strip() |
| 92 | + id, title = line.split('\t') |
| 93 | + collection.insert({'id':id, 'title':title}) |
| 94 | + fh.close() |
| 95 | + print 'Done...' |
72 | 96 | |
73 | | - tasks.task_done() |
74 | | - if filename == None: |
75 | | - print 'Swallowing a poison pill.' |
76 | | - break |
77 | | - print '%s files left in the queue.' % messages.show(tasks.qsize) |
78 | 97 | |
79 | | - fh = file_utils.create_txt_filehandle(rts.sorted, filename, 'r', rts.encoding) |
80 | | - for line in file_utils.read_raw_data(fh): |
81 | | - if len(line) > 1: |
82 | | - contributor = line[0] |
83 | | - #print 'Parsing %s' % contributor |
84 | | - if prev_contributor != contributor and prev_contributor != -1: |
85 | | - editor_cache.add(prev_contributor, 'NEXT') |
86 | | - date = text_utils.convert_timestamp_to_datetime_utc(line[1]) |
87 | | - article_id = int(line[2]) |
88 | | - username = line[3].encode(rts.encoding) |
89 | | - ns = int(line[4]) |
90 | | - value = {'date': date, |
91 | | - 'article': article_id, |
92 | | - 'username': username, |
93 | | - 'ns': ns} |
94 | | - editor_cache.add(contributor, value) |
95 | | - prev_contributor = contributor |
96 | | - fh.close() |
97 | | - #print editor_cache.n |
98 | | - |
99 | | - |
100 | 98 | def launcher(rts): |
101 | 99 | ''' |
102 | 100 | This is the main entry point and creates a number of workers and launches |
103 | 101 | them. |
104 | 102 | ''' |
105 | | - #rts.sorted, rts.dbname, rts.collection |
| 103 | + store_articles(rts) |
| 104 | + print 'Input directory is: %s ' % rts.sorted |
106 | 105 | mongo = db.init_mongo_db(rts.dbname) |
107 | 106 | coll = mongo[rts.editors_raw] |
108 | 107 | coll.ensure_index('editor') |
109 | 108 | coll.create_index('editor') |
110 | 109 | |
111 | 110 | files = file_utils.retrieve_file_list(rts.sorted, 'csv') |
| 111 | + pbar = progressbar.ProgressBar(maxval=len(files)).start() |
112 | 112 | |
113 | | - print 'Input directory is: %s ' % rts.sorted |
114 | 113 | tasks = multiprocessing.JoinableQueue() |
115 | | - consumers = [multiprocessing.Process(target=store_editors, |
116 | | - args=(tasks, rts)) |
117 | | - for i in xrange(rts.number_of_processes)] |
| 114 | + result = multiprocessing.JoinableQueue() |
118 | 115 | |
| 116 | + consumers = [Storer(rts, tasks, result) for |
| 117 | + x in xrange(rts.number_of_processes)] |
| 118 | + |
119 | 119 | for filename in files: |
120 | 120 | tasks.put(filename) |
121 | 121 | |
— | — | @@ -124,8 +124,20 @@ |
125 | 125 | for w in consumers: |
126 | 126 | w.start() |
127 | 127 | |
| 128 | + ppills = rts.number_of_processes |
| 129 | + while True: |
| 130 | + while ppills > 0: |
| 131 | + try: |
| 132 | + res = result.get(block=True) |
| 133 | + if res == True: |
| 134 | + pbar.update(pbar.currval + 1) |
| 135 | + else: |
| 136 | + ppills -= 1 |
| 137 | + except Empty: |
| 138 | + pass |
| 139 | + break |
| 140 | + |
128 | 141 | tasks.join() |
129 | | - store_articles(rts) |
130 | 142 | |
131 | 143 | |
132 | 144 | def debug(): |
Index: trunk/tools/editor_trends/etl/downloader.py |
— | — | @@ -22,15 +22,12 @@ |
23 | 23 | import multiprocessing |
24 | 24 | import sys |
25 | 25 | |
26 | | -#sys.path.append('..') |
27 | | -#import configuration |
28 | | -#settings = configuration.Settings() |
29 | | - |
30 | 26 | from utils import file_utils |
31 | 27 | from utils import http_utils |
32 | 28 | from utils import text_utils |
33 | 29 | from utils import log |
34 | 30 | |
| 31 | + |
35 | 32 | def download_wiki_file(task_queue, properties): |
36 | 33 | ''' |
37 | 34 | This is a very simple replacement for wget and curl because Windows does |
Index: trunk/tools/editor_trends/etl/extracter.py |
— | — | @@ -380,7 +380,6 @@ |
381 | 381 | no data pollution and finally it will start the parser to actually extract |
382 | 382 | the variables from the different dump files. |
383 | 383 | ''' |
384 | | - result = True |
385 | 384 | tasks = unzip(rts) |
386 | 385 | if not tasks: |
387 | 386 | return False |
— | — | @@ -413,10 +412,14 @@ |
414 | 413 | tasks.join() |
415 | 414 | filehandles = [fh.close() for fh in filehandles] |
416 | 415 | |
417 | | - result = all([consumer.exitcode for consumer in consumers]) |
418 | | - return result |
| 416 | + result = sum([consumer.exitcode for consumer in consumers]) |
419 | 417 | |
| 418 | + if restult == 0: |
| 419 | + return True |
| 420 | + else: |
| 421 | + return False |
420 | 422 | |
| 423 | + |
421 | 424 | def debug(): |
422 | 425 | project = 'wiki' |
423 | 426 | language_code = 'sv' |
Index: trunk/tools/editor_trends/etl/sort.py |
— | — | @@ -22,12 +22,45 @@ |
23 | 23 | import sys |
24 | 24 | import os |
25 | 25 | import multiprocessing |
| 26 | +import progressbar |
26 | 27 | from Queue import Empty |
27 | 28 | |
28 | 29 | from utils import file_utils |
29 | 30 | from utils import messages |
| 31 | +from classes import consumers |
30 | 32 | |
31 | 33 | |
| 34 | +class Sorter(consumers.BaseConsumer): |
| 35 | + def run(self): |
| 36 | + ''' |
| 37 | + The feeder function is called by the launcher and gives it a task to |
| 38 | + complete. |
| 39 | + ''' |
| 40 | + while True: |
| 41 | + try: |
| 42 | + filename = self.tasks.get(block=False) |
| 43 | + self.tasks.task_done() |
| 44 | + if filename == None: |
| 45 | + self.result.put(None) |
| 46 | + break |
| 47 | + |
| 48 | + fh = file_utils.create_txt_filehandle(self.rts.txt, |
| 49 | + filename, |
| 50 | + 'r', |
| 51 | + self.rts.encoding) |
| 52 | + data = file_utils.read_unicode_text(fh) |
| 53 | + fh.close() |
| 54 | + data = [d.strip() for d in data] |
| 55 | + data = [d.split('\t') for d in data] |
| 56 | + sorted_data = mergesort(data) |
| 57 | + write_sorted_file(sorted_data, filename, self.rts) |
| 58 | + self.result.put(True) |
| 59 | + except UnicodeDecodeError, e: |
| 60 | + print e |
| 61 | + except Empty: |
| 62 | + pass |
| 63 | + |
| 64 | + |
32 | 65 | def quick_sort(obs): |
33 | 66 | ''' |
34 | 67 | Quicksort is a sorting algorithm developed by C. A. R. Hoare that, on \ |
— | — | @@ -74,7 +107,6 @@ |
75 | 108 | return result |
76 | 109 | |
77 | 110 | |
78 | | - |
79 | 111 | def merge_sorted_files(target, files, iteration, rts): |
80 | 112 | ''' |
81 | 113 | Merges smaller sorted files in one big file, Only used for creating |
— | — | @@ -105,48 +137,20 @@ |
106 | 138 | fh.close() |
107 | 139 | |
108 | 140 | |
109 | | -def mergesort_feeder(tasks, rts): |
110 | | - ''' |
111 | | - The feeder function is called by the launcher and gives it a task to |
112 | | - complete. |
113 | | - ''' |
114 | | - while True: |
115 | | - try: |
116 | | - filename = tasks.get(block=False) |
117 | | - tasks.task_done() |
118 | | - if filename == None: |
119 | | - print 'Swallowed a poison pill' |
120 | | - print tasks.qsize() |
121 | | - break |
122 | | - |
123 | | - fh = file_utils.create_txt_filehandle(rts.txt, |
124 | | - filename, |
125 | | - 'r', |
126 | | - rts.encoding) |
127 | | - #print fh |
128 | | - #data = fh.readlines() |
129 | | - data = file_utils.read_unicode_text(fh) |
130 | | - fh.close() |
131 | | - data = [d.strip() for d in data] |
132 | | - data = [d.split('\t') for d in data] |
133 | | - sorted_data = mergesort(data) |
134 | | - write_sorted_file(sorted_data, filename, rts) |
135 | | - print filename, messages.show(tasks.qsize) |
136 | | - except UnicodeDecodeError, e: |
137 | | - print e |
138 | | - except Empty: |
139 | | - pass |
140 | | - |
141 | | - |
142 | 141 | def launcher(rts): |
143 | 142 | ''' |
144 | 143 | rts is an instance of RunTimeSettings |
145 | 144 | ''' |
146 | 145 | files = file_utils.retrieve_file_list(rts.txt, 'csv') |
| 146 | + #files = files[0:6] |
| 147 | + |
| 148 | + pbar = progressbar.ProgressBar(maxval=len(files)).start() |
147 | 149 | tasks = multiprocessing.JoinableQueue() |
148 | | - consumers = [multiprocessing.Process(target=mergesort_feeder, |
149 | | - args=(tasks, rts)) |
150 | | - for x in xrange(rts.number_of_processes)] |
| 150 | + result = multiprocessing.JoinableQueue() |
| 151 | + |
| 152 | + consumers = [Sorter(rts, tasks, result) for |
| 153 | + x in xrange(rts.number_of_processes)] |
| 154 | + |
151 | 155 | for filename in files: |
152 | 156 | tasks.put(filename) |
153 | 157 | |
— | — | @@ -156,17 +160,17 @@ |
157 | 161 | for w in consumers: |
158 | 162 | w.start() |
159 | 163 | |
| 164 | + ppills = rts.number_of_processes |
| 165 | + while True: |
| 166 | + while ppills > 0: |
| 167 | + try: |
| 168 | + res = result.get(block=True) |
| 169 | + if res == True: |
| 170 | + pbar.update(pbar.currval + 1) |
| 171 | + else: |
| 172 | + ppills -= 1 |
| 173 | + except Empty: |
| 174 | + pass |
| 175 | + break |
| 176 | + |
160 | 177 | tasks.join() |
161 | | - |
162 | | - |
163 | | -def debug(): |
164 | | - ''' |
165 | | - Simple test function |
166 | | - ''' |
167 | | - source = os.path.join(settings.input_location, 'en', 'wiki', 'txt') |
168 | | - target = os.path.join(settings.input_location, 'en', 'wiki', 'sorted') |
169 | | - mergesort_launcher(source, target) |
170 | | - |
171 | | - |
172 | | -if __name__ == '__main__': |
173 | | - debug() |
Index: trunk/tools/editor_trends/classes/consumers.py |
— | — | @@ -19,153 +19,12 @@ |
20 | 20 | |
21 | 21 | import multiprocessing |
22 | 22 | |
23 | | -''' |
24 | | -This needs a cleanup |
25 | | -''' |
26 | 23 | class BaseConsumer(multiprocessing.Process): |
27 | 24 | |
28 | | - def __init__(self, task_queue, result_queue): |
| 25 | + def __init__(self, rts, tasks, result=None): |
29 | 26 | multiprocessing.Process.__init__(self) |
30 | | - self.task_queue = task_queue |
31 | | - self.result_queue = result_queue |
| 27 | + self.rts = rts |
| 28 | + self.tasks = tasks |
| 29 | + self.result = result |
32 | 30 | |
33 | 31 | |
34 | | -# for kw in kwargs: |
35 | | -# setattr(self, kw, kwargs[kw]) |
36 | | -# |
37 | | -# def run(self): |
38 | | -# proc_name = self.name |
39 | | -# kwargs = {} |
40 | | -# IGNORE = ['input_queue', 'result_queue', 'target'] |
41 | | -# for kw in self.__dict__: |
42 | | -# if kw not in IGNORE and not kw.startswith('_'): |
43 | | -# kwargs[kw] = getattr(self, kw) |
44 | | -# self.target(self.input_queue, self.result_queue, **kwargs) |
45 | | - |
46 | | - |
47 | | -class ProcessResultQueue(multiprocessing.Process): |
48 | | - |
49 | | - def __init__(self, target, result_queue, **kwargs): |
50 | | - multiprocessing.Process.__init__(self) |
51 | | - self.result_queue = result_queue |
52 | | - self.target = target |
53 | | - for kw in kwargs: |
54 | | - setattr(self, kw, kwargs[kw]) |
55 | | - |
56 | | - |
57 | | - def run(self): |
58 | | - proc_name = self.name |
59 | | - kwargs = {} |
60 | | - IGNORE = ['result_queue', 'target'] |
61 | | - for kw in self.__dict__: |
62 | | - if kw not in IGNORE and not kw.startswith('_'): |
63 | | - kwargs[kw] = getattr(self, kw) |
64 | | - self.target(self.result_queue, **kwargs) |
65 | | - |
66 | | -class TXTFile(object): |
67 | | - |
68 | | - def __init__(self, file, location, output, output_file, target, **kwargs): |
69 | | - self.file = file |
70 | | - self.location = location |
71 | | - self.target = target |
72 | | - self.output = output |
73 | | - self.output_file = output_file |
74 | | - for kw in kwargs: |
75 | | - setattr(self, kw, kwargs[kw]) |
76 | | - |
77 | | - def __str__(self): |
78 | | - return '%s' % (self.file) |
79 | | - |
80 | | - def __call__(self, bots): |
81 | | - self.bots = bots |
82 | | - self.fr = file_utils.create_txt_filehandle(self.location, self.file, 'r', settings.encoding) |
83 | | - self.fw = file_utils.create_txt_filehandle(self.output, self.output_file, 'w', settings.encoding) |
84 | | - for line in self.fr: |
85 | | - line = line.strip() |
86 | | - if line == '': |
87 | | - continue |
88 | | - line = line.split('\t') |
89 | | - self.bots = self.target(line, self.fw, self.bots, self.keys) |
90 | | - if self.bots == {}: |
91 | | - break |
92 | | - self.fr.close() |
93 | | - self.fw.close() |
94 | | - return self.bots |
95 | | - |
96 | | - |
97 | | -class XMLFileConsumer(BaseConsumer): |
98 | | - |
99 | | - def run(self): |
100 | | - while True: |
101 | | - new_xmlfile = self.task_queue.get() |
102 | | - self.task_queue.task_done() |
103 | | - if new_xmlfile == None: |
104 | | - print 'Swallowed a poison pill' |
105 | | - break |
106 | | - print 'Queue is %s files long...' % (messages.show(self.task_queue.qsize) - settings.number_of_processes) |
107 | | - new_xmlfile() |
108 | | - |
109 | | - |
110 | | -class XMLFile(object): |
111 | | - def __init__(self, file, location, output, output_file, target, ** kwargs): |
112 | | - self.file = file |
113 | | - self.location = location |
114 | | - self.output = output |
115 | | - self.target = target |
116 | | - self.output_file = output_file |
117 | | - for kw in kwargs: |
118 | | - setattr(self, kw, kwargs[kw]) |
119 | | - |
120 | | - def create_file_handle(self): |
121 | | - self.mode = 'a' |
122 | | - if self.output_file == None: |
123 | | - self.mode = 'w' |
124 | | - self.output_file = self.file[:-4] + '.txt' |
125 | | - |
126 | | - self.fh = file_utils.create_txt_filehandle(self.output, self.output_file, self.mode, settings.encoding) |
127 | | - |
128 | | - def __str__(self): |
129 | | - return '%s' % (self.file) |
130 | | - |
131 | | - def __call__(self, bots=None): |
132 | | - if bots != {} and bots != None: |
133 | | - self.bots = bots |
134 | | - if settings.debug: |
135 | | - messages = {} |
136 | | - vars = {} |
137 | | - |
138 | | - data = xml.read_input(utils.create_txt_filehandle(self.location, |
139 | | - self.file, 'r', |
140 | | - encoding=settings.encoding)) |
141 | | - self.create_file_handle() |
142 | | - for raw_data in data: |
143 | | - xml_buffer = cStringIO.StringIO() |
144 | | - raw_data.insert(0, '<?xml version="1.0" encoding="UTF-8" ?>\n') |
145 | | - try: |
146 | | - raw_data = ''.join(raw_data) |
147 | | - xml_buffer.write(raw_data) |
148 | | - elem = cElementTree.XML(xml_buffer.getvalue()) |
149 | | - bots = self.target(elem, fh=self.fh, bots=self.bots) |
150 | | - except SyntaxError, error: |
151 | | - print error |
152 | | - ''' |
153 | | - There are few cases with invalid tokens, they are ignored |
154 | | - ''' |
155 | | - if settings.debug: |
156 | | - file_utils.track_errors(xml_buffer, error, self.file, messages) |
157 | | - except UnicodeEncodeError, error: |
158 | | - print error |
159 | | - if settings.debug: |
160 | | - file_utils.track_errors(xml_buffer, error, self.file, messages) |
161 | | - except MemoryError, error: |
162 | | - print self.file, error |
163 | | - print raw_data[:12] |
164 | | - print 'String was supposed to be %s characters long' % sum([len(raw) for raw in raw_data]) |
165 | | - else: |
166 | | - self.fh.close() |
167 | | - |
168 | | - if settings.debug: |
169 | | - file_utils.report_error_messages(messages, self.target) |
170 | | - |
171 | | - return bots |
172 | | - |
Index: trunk/tools/editor_trends/classes/settings.py |
— | — | @@ -143,7 +143,8 @@ |
144 | 144 | try: |
145 | 145 | os.makedirs(directory) |
146 | 146 | except IOError: |
147 | | - print 'Configuration Error, could not create directory %s.' % directory |
| 147 | + print 'Configuration Error, could not create directory %s.'\ |
| 148 | + % directory |
148 | 149 | |
149 | 150 | def detect_windows_program(self, program): |
150 | 151 | entry = self.windows_register.get(program, None) |
— | — | @@ -154,7 +155,8 @@ |
155 | 156 | return None |
156 | 157 | |
157 | 158 | def detect_linux_program(self, program): |
158 | | - path = subprocess.Popen(['which', '%s' % program], stdout=subprocess.PIPE).communicate()[0] |
| 159 | + path = subprocess.Popen(['which', '%s' % program], |
| 160 | + stdout=subprocess.PIPE).communicate()[0] |
159 | 161 | return path.strip() |
160 | 162 | |
161 | 163 | def detect_installed_program(self, program): |
Index: trunk/tools/editor_trends/classes/exceptions.py |
— | — | @@ -62,6 +62,15 @@ |
63 | 63 | return 'There is no JSON encoder called %s, please make sure that you \ |
64 | 64 | entered the right name' % self.func |
65 | 65 | |
| 66 | +class UnknownChartError(Error): |
| 67 | + def __init__(self, chart, charts): |
| 68 | + self.chart = chart |
| 69 | + self.charts = charts |
| 70 | + |
| 71 | + def __str__(self): |
| 72 | + return 'Currently, chart type %s is not supported. Please choose one of \ |
| 73 | + the following charts: %s' % (self.chart, self.charts) |
| 74 | + |
66 | 75 | class NotYetImplementedError(Error): |
67 | 76 | def __init__(self, func): |
68 | 77 | self.func = func |
Index: trunk/tools/editor_trends/classes/runtime_settings.py |
— | — | @@ -20,7 +20,6 @@ |
21 | 21 | ''' |
22 | 22 | This file provides mapper between language name and locale language name and |
23 | 23 | Wikipedia acronym. |
24 | | -Gothic and Birmese are not yet supported, see rows 450 and 554. |
25 | 24 | ''' |
26 | 25 | |
27 | 26 | import os |
— | — | @@ -33,7 +32,8 @@ |
34 | 33 | from settings import Settings |
35 | 34 | from utils import text_utils |
36 | 35 | from utils import ordered_dict as odict |
37 | | -from classes import languages |
| 36 | +import languages |
| 37 | +import projects |
38 | 38 | |
39 | 39 | |
40 | 40 | class RunTimeSettings(Settings): |
— | — | @@ -129,7 +129,8 @@ |
130 | 130 | ''' |
131 | 131 | Construct the full project location |
132 | 132 | ''' |
133 | | - return os.path.join(self.input_location, self.language.code, self.project.name) |
| 133 | + return os.path.join(self.input_location, self.language.code, |
| 134 | + self.project.name) |
134 | 135 | |
135 | 136 | def show_settings(self): |
136 | 137 | ''' |
— | — | @@ -137,7 +138,9 @@ |
138 | 139 | ''' |
139 | 140 | about = {} |
140 | 141 | about['Project'] = '%s' % self.project.full_name.title() |
141 | | - about['Language'] = '%s / %s / %s' % (self.language.name, self.language.locale, self.language.code) |
| 142 | + about['Language'] = '%s / %s / %s' % (self.language.name, |
| 143 | + self.language.locale, |
| 144 | + self.language.code) |
142 | 145 | about['Input directory'] = '%s' % self.location |
143 | 146 | about['Output directory'] = '%s and subdirectories' % self.location |
144 | 147 | |
— | — | @@ -155,7 +158,9 @@ |
156 | 159 | |
157 | 160 | def set_dump_path(self, absolute=False): |
158 | 161 | if absolute: |
159 | | - return '%s/%s%s/latest/' % (self.wp_dump_location, self.language.code, self.project.name) |
| 162 | + return '%s/%s%s/latest/' % (self.wp_dump_location, |
| 163 | + self.language.code, |
| 164 | + self.project.name) |
160 | 165 | else: |
161 | 166 | return '/%s%s/latest/' % (self.language.code, self.project.name) |
162 | 167 | |
— | — | @@ -163,7 +168,8 @@ |
164 | 169 | ''' |
165 | 170 | Generate the main name of the wikidump file to be downloaded. |
166 | 171 | ''' |
167 | | - return '%s%s-latest-%s' % (self.language.code, self.project.name, self.get_value('file')) |
| 172 | + return '%s%s-latest-%s' % (self.language.code, self.project.name, |
| 173 | + self.get_value('file')) |
168 | 174 | |
169 | 175 | def update_language_settings(self): |
170 | 176 | ''' |
— | — | @@ -193,7 +199,8 @@ |
194 | 200 | |
195 | 201 | def get_projectname(self): |
196 | 202 | ''' |
197 | | - Determine the full project name based on the project acronym and language. |
| 203 | + Determine the full project name based on the project acronym |
| 204 | + and language. |
198 | 205 | ''' |
199 | 206 | #language_code = self.get_language() |
200 | 207 | print self.language.code, self.project.name |
— | — | @@ -223,3 +230,15 @@ |
224 | 231 | return namespaces.split(',') |
225 | 232 | else: |
226 | 233 | return ['0'] #Assume that the mainspace is of interest |
| 234 | + |
| 235 | + |
| 236 | +def init_environment(project, language_code, args): |
| 237 | + pjc = projects.ProjectContainer() |
| 238 | + project = pjc.get_project(project) |
| 239 | + lnc = languages.LanguageContainer() |
| 240 | + language = lnc.get_language(language_code) |
| 241 | + |
| 242 | + args.language = language.name |
| 243 | + args.project = project.name |
| 244 | + rts = RunTimeSettings(project, language, args) |
| 245 | + return rts |
Index: trunk/tools/editor_trends/classes/dataset.py |
— | — | @@ -23,18 +23,23 @@ |
24 | 24 | import math |
25 | 25 | import operator |
26 | 26 | import sys |
| 27 | +import hashlib |
27 | 28 | from pymongo.son_manipulator import SONManipulator |
28 | 29 | from multiprocessing import Lock |
29 | 30 | from texttable import Texttable |
30 | 31 | |
31 | | -sys.path.append('..') |
32 | | -import configuration |
33 | | -settings = configuration.Settings() |
34 | 32 | |
| 33 | +if '..' not in sys.path: |
| 34 | + sys.path.append('..') |
| 35 | + |
| 36 | +from classes import settings |
| 37 | +settings = settings.Settings() |
| 38 | + |
35 | 39 | from utils import file_utils |
36 | 40 | from utils import data_converter |
37 | 41 | from database import db |
38 | 42 | from analyses import json_encoders |
| 43 | +from classes import exceptions |
39 | 44 | |
40 | 45 | class Transform(SONManipulator): |
41 | 46 | ''' |
— | — | @@ -82,10 +87,14 @@ |
83 | 88 | def __hash__(self, vars): |
84 | 89 | ''' |
85 | 90 | This is a generic hash function that expects a list of variables, used |
86 | | - to lookup an observation or Variable. |
| 91 | + to lookup an Observation or Variable. |
87 | 92 | ''' |
88 | | - id = ''.join([str(var) for var in vars]) |
89 | | - return hash(id) |
| 93 | + id = '_'.join([str(var) for var in vars]) |
| 94 | + m = hashlib.md5() |
| 95 | + m.update(id) |
| 96 | + #print id, m.hexdigest() |
| 97 | + return m.hexdigest() |
| 98 | + #return ''.join([str(var) for var in vars]) |
90 | 99 | |
91 | 100 | def encode_to_bson(self, data=None): |
92 | 101 | ''' |
— | — | @@ -176,9 +185,6 @@ |
177 | 186 | |
178 | 187 | def add(self, value): |
179 | 188 | ''' |
180 | | - If update == True then data[i] will be incremented else data[i] will be |
181 | | - created, in that case make sure that i is unique. Update is useful for |
182 | | - tallying a variable. |
183 | 189 | ''' |
184 | 190 | self.lock.acquire() |
185 | 191 | try: |
— | — | @@ -192,6 +198,7 @@ |
193 | 199 | self.count += 1 |
194 | 200 | self.lock.release() |
195 | 201 | |
| 202 | + |
196 | 203 | def get_date_range(self): |
197 | 204 | return '%s-%s-%s:%s-%s-%s' % (self.t0.month, self.t0.day, self.t0.year, \ |
198 | 205 | self.t1.month, self.t1.day, self.t1.year) |
— | — | @@ -200,9 +207,10 @@ |
201 | 208 | ''' |
202 | 209 | This class constructs a time-based variable. |
203 | 210 | ''' |
204 | | - lock = Lock() |
| 211 | + |
205 | 212 | def __init__(self, name, time_unit, **kwargs): |
206 | 213 | self.name = name |
| 214 | + self.lock = Lock() |
207 | 215 | self.obs = {} |
208 | 216 | self.time_unit = time_unit |
209 | 217 | self.groupbys = [] |
— | — | @@ -278,11 +286,16 @@ |
279 | 287 | values.insert(0, end) |
280 | 288 | values.insert(0, start) |
281 | 289 | id = self.__hash__(values) |
| 290 | +# print values |
| 291 | + self.lock.acquire() |
| 292 | + try: |
| 293 | + obs = self.get_observation(id, date, meta) |
| 294 | + obs.add(value) |
| 295 | + self.obs[id] = obs |
| 296 | + finally: |
| 297 | + self.lock.release() |
| 298 | + print len(self.obs) |
282 | 299 | |
283 | | - obs = self.get_observation(id, date, meta) |
284 | | - obs.add(value) |
285 | | - self.obs[id] = obs |
286 | | - |
287 | 300 | def number_of_obs(self): |
288 | 301 | n = 0 |
289 | 302 | for obs in self.obs: |
— | — | @@ -327,19 +340,19 @@ |
328 | 341 | to output the dataset to a csv file, mongodb and display statistics. |
329 | 342 | ''' |
330 | 343 | |
331 | | - def __init__(self, name, project, collection, language_code, encoder, vars=None, **kwargs): |
332 | | - encoders = json_encoders.available_json_encoders() |
333 | | - if encoder not in encoders: |
334 | | - raise exception.UnknownJSONEncoderError(encoder) |
335 | | - else: |
336 | | - self.encoder = encoder |
337 | | - self.name = name |
338 | | - self.project = project |
339 | | - self.collection = collection |
340 | | - self.language_code = language_code |
| 344 | + def __init__(self, chart, rts, vars=None, **kwargs): |
| 345 | + #project, collection, language_code |
| 346 | + self.encoder, chart, charts = json_encoders.get_json_encoder(chart) |
| 347 | + if self.encoder == None: |
| 348 | + raise exceptions.UnknownChartError(chart, charts) |
| 349 | + self.chart = chart |
| 350 | + self.name = 'Dataset to construct %s' % self.chart |
| 351 | + self.project = rts.project.name |
| 352 | + self.collection = rts.editors_dataset |
| 353 | + self.language_code = rts.language.code |
341 | 354 | self.hash = self.name |
342 | 355 | self._type = 'dataset' |
343 | | - self.created = datetime.datetime.now() |
| 356 | + self.created = datetime.datetime.today() |
344 | 357 | self.format = 'long' |
345 | 358 | for kw in kwargs: |
346 | 359 | setattr(self, kw, kwargs[kw]) |
Index: trunk/tools/editor_trends/classes/languages.py |
— | — | @@ -16,6 +16,8 @@ |
17 | 17 | __date__ = '2011-01-26' |
18 | 18 | __version__ = '0.1' |
19 | 19 | |
| 20 | +'Gothic and Birmese are not yet supported, see rows 450 and 554.' |
| 21 | + |
20 | 22 | import locale |
21 | 23 | import sys |
22 | 24 | sys.path.append('..') |
Index: trunk/tools/editor_trends/classes/bots.py |
— | — | @@ -20,11 +20,13 @@ |
21 | 21 | |
22 | 22 | import datetime |
23 | 23 | import sys |
24 | | -sys.path.append('..') |
25 | 24 | |
26 | | -import configuration |
27 | | -settings = configuration.Settings() |
| 25 | +if '..' not in sys.path: |
| 26 | + sys.path.append('..') |
28 | 27 | |
| 28 | +from classes import settings |
| 29 | +settings = settings.Settings() |
| 30 | + |
29 | 31 | from etl import shaper |
30 | 32 | from utils import file_utils |
31 | 33 | |
Index: trunk/tools/editor_trends/utils/inventory.py |
— | — | @@ -21,10 +21,11 @@ |
22 | 22 | import sys |
23 | 23 | from threading import Thread |
24 | 24 | from HTMLParser import HTMLParser |
25 | | -sys.path.append('..') |
| 25 | +if '..' not in sys.path: |
| 26 | + sys.path.append('..') |
26 | 27 | |
27 | | -import configuration |
28 | | -settings = configuration.Settings() |
| 28 | +from classes import settings |
| 29 | +settings = settings.Settings() |
29 | 30 | |
30 | 31 | from database import db |
31 | 32 | from utils import http_utils |
Index: trunk/tools/editor_trends/utils/file_utils.py |
— | — | @@ -35,12 +35,12 @@ |
36 | 36 | import shutil |
37 | 37 | import multiprocessing |
38 | 38 | |
39 | | -sys.path.append('..') |
| 39 | +if '..' not in sys.path: |
| 40 | + sys.path.append('..') |
40 | 41 | |
| 42 | +from classes import settings |
| 43 | +settings = settings.Settings() |
41 | 44 | |
42 | | -import configuration |
43 | | -settings = configuration.Settings() |
44 | | - |
45 | 45 | from classes import exceptions |
46 | 46 | import messages |
47 | 47 | import text_utils |
Index: trunk/tools/editor_trends/utils/http_utils.py |
— | — | @@ -23,10 +23,11 @@ |
24 | 24 | import httplib |
25 | 25 | import multiprocessing |
26 | 26 | |
| 27 | +if '..' not in sys.path: |
| 28 | + sys.path.append('..') |
27 | 29 | |
28 | | -sys.path.append('..') |
29 | | -import configuration |
30 | | -settings = configuration.Settings() |
| 30 | +from classes import settings |
| 31 | +settings = settings.Settings() |
31 | 32 | import file_utils |
32 | 33 | import text_utils |
33 | 34 | import log |
Index: trunk/tools/editor_trends/utils/timer.py |
— | — | @@ -33,6 +33,7 @@ |
34 | 34 | self.stop() |
35 | 35 | print 'Processing time: %s' % (self.t1 - self.t0) |
36 | 36 | |
| 37 | + |
37 | 38 | def humanize_time_difference(seconds_elapsed): |
38 | 39 | """ |
39 | 40 | Returns a humanized string representing time difference. |
Index: trunk/tools/editor_trends/utils/compression.py |
— | — | @@ -20,10 +20,9 @@ |
21 | 21 | import sys |
22 | 22 | import subprocess |
23 | 23 | import os |
24 | | -sys.path.append('..') |
| 24 | +if '..' not in sys.path: |
| 25 | + sys.path.append('..') |
25 | 26 | |
26 | | -#import configuration |
27 | | -#settings = configuration.Settings() |
28 | 27 | from classes import settings |
29 | 28 | settings = settings.Settings() |
30 | 29 | from classes import exceptions |
Index: trunk/tools/editor_trends/utils/log.py |
— | — | @@ -20,10 +20,11 @@ |
21 | 21 | import datetime |
22 | 22 | import sys |
23 | 23 | import progressbar |
24 | | -sys.path.append('..') |
| 24 | +if '..' not in sys.path: |
| 25 | + sys.path.append('..') |
25 | 26 | |
26 | | -import configuration |
27 | | -settings = configuration.Settings() |
| 27 | +from classes import settings |
| 28 | +settings = settings.Settings() |
28 | 29 | |
29 | 30 | from database import db |
30 | 31 | |
— | — | @@ -47,8 +48,10 @@ |
48 | 49 | elif jobtype == 'chart': |
49 | 50 | _id = coll.save({'hash': hash, 'created': created, |
50 | 51 | 'jobtype': jobtype, |
51 | | - 'project': rts.project, |
52 | | - 'language_code': rts.language_code, |
| 52 | + 'finished': True, |
| 53 | + 'in_progress': True, |
| 54 | + 'project': rts.project.name, |
| 55 | + 'language_code': rts.language.code, |
53 | 56 | 'tasks': {}}) |
54 | 57 | |
55 | 58 | job = coll.find_one({'_id': _id}) |
Index: trunk/tools/editor_trends/utils/text_utils.py |
— | — | @@ -21,11 +21,13 @@ |
22 | 22 | import time |
23 | 23 | import sys |
24 | 24 | |
25 | | -sys.path.append('..') |
26 | | -import configuration |
27 | | -settings = configuration.Settings() |
| 25 | +if '..' not in sys.path: |
| 26 | + sys.path.append('..') |
28 | 27 | |
| 28 | +from classes import settings |
| 29 | +settings = settings.Settings() |
29 | 30 | |
| 31 | + |
30 | 32 | def convert_timestamp_to_date(timestamp): |
31 | 33 | return datetime.datetime.strptime(timestamp[:10], settings.date_format) |
32 | 34 | |
Index: trunk/tools/editor_trends/utils/messages.py |
— | — | @@ -17,14 +17,7 @@ |
18 | 18 | __date__ = '2011-01-05' |
19 | 19 | __version__ = '0.1' |
20 | 20 | |
21 | | -#import sys |
22 | | -#sys.path.append('..') |
23 | | -# |
24 | | -#import configuration |
25 | | -#settings = configuration.Settings() |
26 | 21 | |
27 | | - |
28 | | - |
29 | 22 | def show(func): |
30 | 23 | ''' |
31 | 24 | @func should be an qsize() belonging to a task queue. qsize() is not supported |
Index: trunk/tools/editor_trends/database/launcher.py |
— | — | @@ -21,8 +21,9 @@ |
22 | 22 | import subprocess |
23 | 23 | import os |
24 | 24 | |
25 | | -import configuration |
26 | | -settings = configuration.Settings() |
| 25 | + |
| 26 | +from classes import settings |
| 27 | +settings = settings.Settings() |
27 | 28 | from classes import exceptions |
28 | 29 | from utils import file_utils |
29 | 30 | |
Index: trunk/tools/editor_trends/database/cache.py |
— | — | @@ -20,12 +20,14 @@ |
21 | 21 | |
22 | 22 | import datetime |
23 | 23 | import sys |
24 | | -sys.path.append('..') |
25 | 24 | import bson |
26 | 25 | |
27 | | -import configuration |
28 | | -settings = configuration.Settings() |
| 26 | +if '..' not in sys.path: |
| 27 | + sys.path.append('..') |
29 | 28 | |
| 29 | +from classes import settings |
| 30 | +settings = settings.Settings() |
| 31 | + |
30 | 32 | import db |
31 | 33 | from utils import file_utils |
32 | 34 | from etl import shaper |
Index: trunk/tools/editor_trends/database/db.py |
— | — | @@ -17,15 +17,18 @@ |
18 | 18 | __date__ = '2010-10-21' |
19 | 19 | __version__ = '0.1' |
20 | 20 | |
| 21 | +import sys |
21 | 22 | import pymongo |
22 | 23 | from bson.code import Code |
23 | | -import sys |
24 | | -sys.path.append('..') |
25 | 24 | |
26 | | -import configuration |
27 | | -settings = configuration.Settings() |
| 25 | +if '..' not in sys.path: |
| 26 | + sys.path.append('..') |
| 27 | + |
| 28 | +from classes import settings |
| 29 | +settings = settings.Settings() |
28 | 30 | import file_utils |
29 | 31 | |
| 32 | + |
30 | 33 | def init_mongo_db(dbname): |
31 | 34 | connection = pymongo.Connection() |
32 | 35 | db = connection[dbname] |
Index: trunk/tools/editor_trends/mapreduce/xml2pig.py |
— | — | @@ -17,13 +17,15 @@ |
18 | 18 | __date__ = '2010-11-15' |
19 | 19 | __version__ = '0.1' |
20 | 20 | |
21 | | -import sys |
22 | | -sys.path.append('..') |
23 | 21 | |
24 | 22 | import os |
25 | 23 | import xml.etree.cElementTree as cElementTree |
26 | 24 | |
27 | | -import configuration |
28 | | -settings = configuration.Settings() |
| 25 | +if '..' not in sys.path: |
| 26 | + sys.path.append('..') |
29 | 27 | |
| 28 | +from classes import settings |
| 29 | +settings = settings.Settings() |
30 | 30 | |
| 31 | + |
| 32 | + |
Index: trunk/tools/editor_trends/cronjobs.py |
— | — | @@ -43,22 +43,22 @@ |
44 | 44 | return rts |
45 | 45 | |
46 | 46 | |
47 | | -def launch_editor_trends_toolkit(task): |
| 47 | +def launch_editor_trends_toolkit(task, args): |
48 | 48 | ''' |
49 | 49 | This function should only be called as a cronjob and not directly. |
50 | 50 | ''' |
51 | | - rts = init_environment(task) |
| 51 | + rts = runtime_settings.init_environment(task['project'], task['language_code'], args) |
52 | 52 | res = manager.all_launcher(rts, None) |
53 | 53 | return res |
54 | 54 | |
55 | 55 | |
56 | | -def launch_chart(task): |
| 56 | +def launch_chart(task, args): |
57 | 57 | ''' |
58 | 58 | This function should only be called as a cronjob and not directly. |
59 | 59 | ''' |
60 | 60 | res = True |
61 | 61 | try: |
62 | | - rts = init_environment(task) |
| 62 | + rts = runtime_settings.init_environment(task['project'], task['language_code'], args) |
63 | 63 | func = task['jobtype'] |
64 | 64 | time_unit = 'month' #FIXME hardcoded string |
65 | 65 | cutoff = 1 #FIXME hardcoded string |
— | — | @@ -89,6 +89,8 @@ |
90 | 90 | mongo = db.init_mongo_db('wikilytics') |
91 | 91 | coll = mongo['jobs'] |
92 | 92 | tasks = [] |
| 93 | + project, language, parser = manager.init_args_parser() |
| 94 | + args = parser.parse_args(['django']) |
93 | 95 | jobs = coll.find({'finished': False, 'in_progress': False, 'error': False}) |
94 | 96 | for job in jobs: |
95 | 97 | tasks.append(job) |
— | — | @@ -96,11 +98,11 @@ |
97 | 99 | for task in tasks: |
98 | 100 | if task['jobtype'] == 'dataset': |
99 | 101 | print 'Launching the Editor Trends Analytics Toolkit.' |
100 | | - res = launch_editor_trends_toolkit(task) |
| 102 | + res = launch_editor_trends_toolkit(task, args) |
101 | 103 | #res = False |
102 | 104 | else: |
103 | 105 | print 'Launching %s.' % task['jobtype'] |
104 | | - res = launch_chart(task) |
| 106 | + res = launch_chart(task, args) |
105 | 107 | |
106 | 108 | if res: |
107 | 109 | coll.update({'_id': task['_id']}, {'$set': {'finished': True}}) |
Index: trunk/tools/editor_trends/bots/detector.py |
— | — | @@ -24,10 +24,14 @@ |
25 | 25 | import xml.etree.cElementTree as cElementTree |
26 | 26 | import sys |
27 | 27 | from Queue import Empty |
28 | | -sys.path.append('..') |
29 | 28 | |
30 | | -import configuration |
31 | | -settings = configuration.Settings() |
| 29 | +if '..' not in sys.path: |
| 30 | + sys.path.append('..') |
| 31 | + |
| 32 | +from classes import settings |
| 33 | +settings = settings.Settings() |
| 34 | + |
| 35 | + |
32 | 36 | import wikitree |
33 | 37 | from database import db |
34 | 38 | from utils import file_utils |