r85070 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r85069‎ | r85070 | r85071 >
Date:14:58, 31 March 2011
Author:diederik
Status:deferred
Tags:
Comment:
Incorporated fixes suggested by pylint
Modified paths:
  • /trunk/tools/editor_trends/bots/detector.py (modified) (history)

Diff [purge]

Index: trunk/tools/editor_trends/bots/detector.py
@@ -32,7 +32,6 @@
3333 settings = settings.Settings()
3434
3535
36 -import wikitree
3736 from database import db
3837 from utils import file_utils
3938 from utils import messages
@@ -40,8 +39,6 @@
4140 from classes import consumers
4241 from classes import bots
4342
44 -import cProfile
45 -
4643 try:
4744 import psyco
4845 psyco.full()
@@ -76,7 +73,7 @@
7774
7875 def retrieve_bots(language_code):
7976 '''
80 - Loader function to retrieve list of id's of known Wikipedia bots.
 77+ Loader function to retrieve list of id's of known Wikipedia bots.
8178 '''
8279 ids = []
8380 mongo = db.init_mongo_db('bots')
@@ -91,10 +88,13 @@
9289 def store_bots():
9390 '''
9491 This file reads the results from the lookup_bot_userid function and stores
95 - it in a MongoDB collection.
 92+ it in a MongoDB collection.
9693 '''
9794 keys = ['name', 'verified', 'projects']
98 - bots = file_utils.create_dict_from_csv_file(settings.csv_location, 'bots_ids.csv', settings.encoding, keys)
 95+ bots = file_utils.create_dict_from_csv_file(settings.csv_location,
 96+ 'bots_ids.csv',
 97+ settings.encoding,
 98+ keys)
9999 mongo = db.init_mongo_db('bots')
100100 collection = mongo['ids']
101101 db.remove_documents_from_mongo_db(collection, None)
@@ -103,7 +103,6 @@
104104 bot = bots[id]
105105 data = dict([(k, bot[k]) for k in keys])
106106 data['id'] = id
107 - #{'id': int(id), 'name': name, 'verified': verified, 'projects': projects}
108107 collection.insert(data)
109108
110109 print 'Stored %s bots' % collection.count()
@@ -112,7 +111,7 @@
113112 def convert_object_to_dict(obj, exclude=[]):
114113 '''
115114 @obj is an arbitray object where the properties need to be translated to
116 - keys and values to ease writing to a csv file.
 115+ keys and values to ease writing to a csv file.
117116 '''
118117 d = {}
119118 for kw in obj.__dict__.keys():
@@ -122,11 +121,13 @@
123122
124123
125124 def write_bot_list_to_csv(bots, keys):
126 - fh = file_utils.create_txt_filehandle(settings.csv_location, 'bots_ids.csv', 'w', settings.encoding)
 125+ fh = file_utils.create_txt_filehandle(settings.csv_location, 'bots_ids.csv',
 126+ 'w', settings.encoding)
127127 bot_dict = convert_object_to_dict(bots, exclude=['time', 'written'])
128128 for bot in bot_dict:
129129 bot = bot_dict[bot]
130 - file_utils.write_dict_to_csv(bot, fh, keys, write_key=False, newline=True)
 130+ file_utils.write_dict_to_csv(bot, fh, keys, write_key=False,
 131+ newline=True)
131132 fh.close()
132133
133134
@@ -178,14 +179,13 @@
179180 def bot_launcher(language_code, project, target, action, single=False, manager=False):
180181 '''
181182 This function sets the stage to launch bot id detection and collecting data
182 - to discover new bots.
 183+ to discover new bots.
183184 '''
184185 file_utils.delete_file(settings.csv_location, 'bots_ids.csv')
185186 location = os.path.join(settings.input_location, language_code, project)
186187 input_xml = os.path.join(location, 'chunks')
187188 input_txt = os.path.join(location, 'txt')
188189
189 -
190190 tasks = multiprocessing.JoinableQueue()
191191 mgr = multiprocessing.Manager()
192192 keys = ['id', 'name', 'verified', 'projects']
@@ -209,8 +209,6 @@
210210 if manager:
211211 manager = mgr
212212
213 -
214 -
215213 tracker = {}
216214 if single:
217215 while True:
@@ -239,7 +237,6 @@
240238 #write_bot_list_to_csv(bots, keys)
241239
242240
243 -
244241 def bot_training_dataset(bots):
245242 fh = file_utils.create_txt_filehandle(settings.csv_location, 'training_bots.csv', 'w', settings.encoding)
246243 keys = bots.keys()
@@ -254,7 +251,7 @@
255252
256253 def bot_launcher_multi(tasks):
257254 '''
258 - This is the launcher that uses multiprocesses.
 255+ This is the launcher that uses multiprocesses.
259256 '''
260257 consumers = [consumers.XMLFileConsumer(tasks, None) for i in xrange(settings.number_of_processes)]
261258 for x in xrange(settings.number_of_processes):
@@ -265,6 +262,7 @@
266263
267264 tasks.join()
268265
 266+
269267 def debug_bots_dict():
270268 bots = file_utils.load_object(settings.binary_location, 'bots.bin')
271269 for bot in bots:
@@ -274,6 +272,7 @@
275273 print 'done'
276274 return bots
277275
 276+
278277 if __name__ == '__main__':
279278 language_code = 'en'
280279 project = 'wiki'
@@ -282,4 +281,3 @@
283282 #write_bot_list_to_csv(bots)
284283 #language_code, project, lookup_bot_userid, single = False, manager = False
285284 bot_launcher(language_code, project, create_bot_validation_dataset, action='training', single=True, manager=False)
286 - #cProfile.run(bot_launcher(language_code, project, single=True), 'profile')