Index: trunk/tools/editor_trends/bots/detector.py |
— | — | @@ -32,7 +32,6 @@ |
33 | 33 | settings = settings.Settings() |
34 | 34 | |
35 | 35 | |
36 | | -import wikitree |
37 | 36 | from database import db |
38 | 37 | from utils import file_utils |
39 | 38 | from utils import messages |
— | — | @@ -40,8 +39,6 @@ |
41 | 40 | from classes import consumers |
42 | 41 | from classes import bots |
43 | 42 | |
44 | | -import cProfile |
45 | | - |
46 | 43 | try: |
47 | 44 | import psyco |
48 | 45 | psyco.full() |
— | — | @@ -76,7 +73,7 @@ |
77 | 74 | |
78 | 75 | def retrieve_bots(language_code): |
79 | 76 | ''' |
80 | | - Loader function to retrieve list of id's of known Wikipedia bots. |
| 77 | + Loader function to retrieve list of id's of known Wikipedia bots. |
81 | 78 | ''' |
82 | 79 | ids = [] |
83 | 80 | mongo = db.init_mongo_db('bots') |
— | — | @@ -91,10 +88,13 @@ |
92 | 89 | def store_bots(): |
93 | 90 | ''' |
94 | 91 | This file reads the results from the lookup_bot_userid function and stores |
95 | | - it in a MongoDB collection. |
| 92 | + it in a MongoDB collection. |
96 | 93 | ''' |
97 | 94 | keys = ['name', 'verified', 'projects'] |
98 | | - bots = file_utils.create_dict_from_csv_file(settings.csv_location, 'bots_ids.csv', settings.encoding, keys) |
| 95 | + bots = file_utils.create_dict_from_csv_file(settings.csv_location, |
| 96 | + 'bots_ids.csv', |
| 97 | + settings.encoding, |
| 98 | + keys) |
99 | 99 | mongo = db.init_mongo_db('bots') |
100 | 100 | collection = mongo['ids'] |
101 | 101 | db.remove_documents_from_mongo_db(collection, None) |
— | — | @@ -103,7 +103,6 @@ |
104 | 104 | bot = bots[id] |
105 | 105 | data = dict([(k, bot[k]) for k in keys]) |
106 | 106 | data['id'] = id |
107 | | - #{'id': int(id), 'name': name, 'verified': verified, 'projects': projects} |
108 | 107 | collection.insert(data) |
109 | 108 | |
110 | 109 | print 'Stored %s bots' % collection.count() |
— | — | @@ -112,7 +111,7 @@ |
113 | 112 | def convert_object_to_dict(obj, exclude=[]): |
114 | 113 | ''' |
115 | 114 | @obj is an arbitray object where the properties need to be translated to |
116 | | - keys and values to ease writing to a csv file. |
| 115 | + keys and values to ease writing to a csv file. |
117 | 116 | ''' |
118 | 117 | d = {} |
119 | 118 | for kw in obj.__dict__.keys(): |
— | — | @@ -122,11 +121,13 @@ |
123 | 122 | |
124 | 123 | |
125 | 124 | def write_bot_list_to_csv(bots, keys): |
126 | | - fh = file_utils.create_txt_filehandle(settings.csv_location, 'bots_ids.csv', 'w', settings.encoding) |
| 125 | + fh = file_utils.create_txt_filehandle(settings.csv_location, 'bots_ids.csv', |
| 126 | + 'w', settings.encoding) |
127 | 127 | bot_dict = convert_object_to_dict(bots, exclude=['time', 'written']) |
128 | 128 | for bot in bot_dict: |
129 | 129 | bot = bot_dict[bot] |
130 | | - file_utils.write_dict_to_csv(bot, fh, keys, write_key=False, newline=True) |
| 130 | + file_utils.write_dict_to_csv(bot, fh, keys, write_key=False, |
| 131 | + newline=True) |
131 | 132 | fh.close() |
132 | 133 | |
133 | 134 | |
— | — | @@ -178,14 +179,13 @@ |
179 | 180 | def bot_launcher(language_code, project, target, action, single=False, manager=False): |
180 | 181 | ''' |
181 | 182 | This function sets the stage to launch bot id detection and collecting data |
182 | | - to discover new bots. |
| 183 | + to discover new bots. |
183 | 184 | ''' |
184 | 185 | file_utils.delete_file(settings.csv_location, 'bots_ids.csv') |
185 | 186 | location = os.path.join(settings.input_location, language_code, project) |
186 | 187 | input_xml = os.path.join(location, 'chunks') |
187 | 188 | input_txt = os.path.join(location, 'txt') |
188 | 189 | |
189 | | - |
190 | 190 | tasks = multiprocessing.JoinableQueue() |
191 | 191 | mgr = multiprocessing.Manager() |
192 | 192 | keys = ['id', 'name', 'verified', 'projects'] |
— | — | @@ -209,8 +209,6 @@ |
210 | 210 | if manager: |
211 | 211 | manager = mgr |
212 | 212 | |
213 | | - |
214 | | - |
215 | 213 | tracker = {} |
216 | 214 | if single: |
217 | 215 | while True: |
— | — | @@ -239,7 +237,6 @@ |
240 | 238 | #write_bot_list_to_csv(bots, keys) |
241 | 239 | |
242 | 240 | |
243 | | - |
244 | 241 | def bot_training_dataset(bots): |
245 | 242 | fh = file_utils.create_txt_filehandle(settings.csv_location, 'training_bots.csv', 'w', settings.encoding) |
246 | 243 | keys = bots.keys() |
— | — | @@ -254,7 +251,7 @@ |
255 | 252 | |
256 | 253 | def bot_launcher_multi(tasks): |
257 | 254 | ''' |
258 | | - This is the launcher that uses multiprocesses. |
| 255 | + This is the launcher that uses multiprocesses. |
259 | 256 | ''' |
260 | 257 | consumers = [consumers.XMLFileConsumer(tasks, None) for i in xrange(settings.number_of_processes)] |
261 | 258 | for x in xrange(settings.number_of_processes): |
— | — | @@ -265,6 +262,7 @@ |
266 | 263 | |
267 | 264 | tasks.join() |
268 | 265 | |
| 266 | + |
269 | 267 | def debug_bots_dict(): |
270 | 268 | bots = file_utils.load_object(settings.binary_location, 'bots.bin') |
271 | 269 | for bot in bots: |
— | — | @@ -274,6 +272,7 @@ |
275 | 273 | print 'done' |
276 | 274 | return bots |
277 | 275 | |
| 276 | + |
278 | 277 | if __name__ == '__main__': |
279 | 278 | language_code = 'en' |
280 | 279 | project = 'wiki' |
— | — | @@ -282,4 +281,3 @@ |
283 | 282 | #write_bot_list_to_csv(bots) |
284 | 283 | #language_code, project, lookup_bot_userid, single = False, manager = False |
285 | 284 | bot_launcher(language_code, project, create_bot_validation_dataset, action='training', single=True, manager=False) |
286 | | - #cProfile.run(bot_launcher(language_code, project, single=True), 'profile') |