r56008 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r56007‎ | r56008 | r56009 >
Date:01:11, 8 September 2009
Author:mrzman
Status:ok
Tags:
Comment:
Various code cleanups for the captcha generating script
* Use optparse instead of getopt
* Replace deprecated md5 module
* Replace deprecated string module functions with string methods
* More graceful failure
* Allow users to set the font size
* Don't run forever if no valid word combinations can be found
Modified paths:
  • /trunk/extensions/ConfirmEdit/captcha.py (modified) (history)

Diff [purge]

Index: trunk/extensions/ConfirmEdit/captcha.py
@@ -26,17 +26,24 @@
2727 # 2008-01-06: Add regex check to skip words containing other than a-z
2828
2929 import random
30 -import Image
31 -import ImageFont
32 -import ImageDraw
33 -import ImageEnhance
34 -import ImageOps
35 -import math, string, md5
36 -import getopt
 30+import math
 31+import hashlib
 32+from optparse import OptionParser
3733 import os
3834 import sys
3935 import re
4036
 37+try:
 38+ import Image
 39+ import ImageFont
 40+ import ImageDraw
 41+ import ImageEnhance
 42+ import ImageOps
 43+except:
 44+ sys.exit("This script requires the Python Imaging Library - http://www.pythonware.com/products/pil/")
 45+
 46+nonalpha = re.compile('[^a-z]') # regex to test for suitability of words
 47+
4148 # Does X-axis wobbly copy, sandwiched between two rotates
4249 def wobbly_copy(src, wob, col, scale, ang):
4350 x, y = src.size
@@ -48,7 +55,6 @@
4956 # Do a cheap bounding-box op here to try to limit work below
5057 bbx = rot.getbbox()
5158 if bbx == None:
52 - print "whoops"
5359 return src
5460 else:
5561 l, t, r, b= bbx
@@ -102,13 +108,13 @@
103109 # save the image, in format determined from filename
104110 im.save(file_name)
105111
106 -def gen_subdir(basedir, hash, levels):
 112+def gen_subdir(basedir, md5hash, levels):
107113 """Generate a subdirectory path out of the first _levels_
108114 characters of _hash_, and ensure the directories exist
109115 under _basedir_."""
110116 subdir = None
111117 for i in range(0, levels):
112 - char = hash[i]
 118+ char = md5hash[i]
113119 if subdir:
114120 subdir = os.path.join(subdir, char)
115121 else:
@@ -124,9 +130,9 @@
125131 word = word1+word2
126132 if verbose:
127133 print "word is %s" % word
128 - r = re.compile('[^a-z]');
129 - if r.search(word):
130 - print "skipping word pair '%s' because it contains non-alphabetic characters" % word
 134+ if nonalpha.search(word):
 135+ if verbose:
 136+ print "skipping word pair '%s' because it contains non-alphabetic characters" % word
131137 return None
132138
133139 for naughty in blacklist:
@@ -137,13 +143,14 @@
138144 return word
139145
140146 def pick_word(words, blacklist, verbose):
141 - while True:
 147+ for x in range(1000): # If we can't find a valid combination in 1000 tries, just give up
142148 word = try_pick_word(words, blacklist, verbose)
143149 if word:
144150 return word
 151+ sys.exit("Unable to find valid word combinations")
145152
146153 def read_wordlist(filename):
147 - return [string.lower(x.strip()) for x in open(wordlist).readlines()]
 154+ return [x.strip().lower() for x in open(wordlist).readlines()]
148155
149156 if __name__ == '__main__':
150157 """This grabs random words from the dictionary 'words' (one
@@ -153,47 +160,51 @@
154161 To check a reply, hash it in the same way with the same salt and
155162 secret key, then compare with the hash value given.
156163 """
157 - font = "VeraBd.ttf"
158 - wordlist = "awordlist.txt"
159 - blacklistfile = None
160 - key = "CHANGE_THIS_SECRET!"
161 - output = "."
162 - count = 20
163 - fill = 0
164 - dirs = 0
165 - verbose = False
 164+ parser = OptionParser()
 165+ parser.add_option("--wordlist", help="A list of words (required)", metavar="WORDS.txt")
 166+ parser.add_option("--key", help="The passphrase set as $wgCaptchaSecret (required)", metavar="KEY")
 167+ parser.add_option("--output", help="The directory to put the images in - $wgCaptchaDirectory (required)", metavar="DIR")
 168+ parser.add_option("--font", help="The font to use (required)", metavar="FONT.ttf")
 169+ parser.add_option("--font-size", help="The font size (default 40)", metavar="N", type='int', default=40)
 170+ parser.add_option("--count", help="The maximum number of images to make (default 20)", metavar="N", type='int', default=20)
 171+ parser.add_option("--blacklist", help="A blacklist of words that should not be used", metavar="FILE")
 172+ parser.add_option("--fill", help="Fill the output directory to contain N files, overrides count, cannot be used with --dirs", metavar="N", type='int')
 173+ parser.add_option("--dirs", help="Put the images into subdirectories N levels deep - $wgCaptchaDirectoryLevels", metavar="N", type='int')
 174+ parser.add_option("--verbose", "-v", help="Show debugging information", action='store_true')
166175
167 - opts, args = getopt.getopt(sys.argv[1:], "", ["font=", "wordlist=", "blacklist=", "key=", "output=", "count=", "fill=", "dirs=", "verbose"])
168 - for o, a in opts:
169 - if o == "--font":
170 - font = a
171 - if o == "--wordlist":
172 - wordlist = a
173 - if o == "--blacklist":
174 - blacklistfile = a
175 - if o == "--key":
176 - key = a
177 - if o == "--output":
178 - output = a
179 - if o == "--count":
180 - count = int(a)
181 - if o == "--fill":
182 - fill = int(a)
183 - if o == "--dirs":
184 - dirs = int(a)
185 - if o == "--verbose":
186 - verbose = True
 176+ opts, args = parser.parse_args()
 177+
 178+ if opts.wordlist:
 179+ wordlist = opts.wordlist
 180+ else:
 181+ sys.exit("Need to specify a wordlist")
 182+ if opts.key:
 183+ key = opts.key
 184+ else:
 185+ sys.exit("Need to specify a key")
 186+ if opts.output:
 187+ output = opts.output
 188+ else:
 189+ sys.exit("Need to specify an output directory")
 190+ if opts.font and os.path.exists(opts.font):
 191+ font = opts.font
 192+ else:
 193+ sys.exit("Need to specify the location of a font")
187194
 195+ blacklistfile = opts.blacklist
 196+ count = opts.count
 197+ fill = opts.fill
 198+ dirs = opts.dirs
 199+ verbose = opts.verbose
 200+ fontsize = opts.font_size
 201+
188202 if fill:
189 - # Option processing order is not guaranteed, so count the output
190 - # files after...
191203 count = max(0, fill - len(os.listdir(output)))
192204
193205 words = read_wordlist(wordlist)
194206 words = [x for x in words
195 - if len(x) <= 5 and len(x) >= 4 and x[0] != "f"
196 - and x[0] != x[1] and x[-1] != x[-2]
197 - and (not "'" in x)]
 207+ if len(x) in (4,5) and x[0] != "f"
 208+ and x[0] != x[1] and x[-1] != x[-2]]
198209
199210 if blacklistfile:
200211 blacklist = read_wordlist(blacklistfile)
@@ -204,11 +215,12 @@
205216 word = pick_word(words, blacklist, verbose)
206217 salt = "%08x" % random.randrange(2**32)
207218 # 64 bits of hash is plenty for this purpose
208 - hash = md5.new(key+salt+word+key+salt).hexdigest()[:16]
209 - filename = "image_%s_%s.png" % (salt, hash)
 219+ md5hash = hashlib.md5(key+salt+word+key+salt).hexdigest()[:16]
 220+ filename = "image_%s_%s.png" % (salt, md5hash)
210221 if dirs:
211 - subdir = gen_subdir(output, hash, dirs)
 222+ subdir = gen_subdir(output, md5hash, dirs)
212223 filename = os.path.join(subdir, filename)
213224 if verbose:
214225 print filename
215 - gen_captcha(word, font, 40, os.path.join(output, filename))
 226+ gen_captcha(word, font, fontsize, os.path.join(output, filename))
 227+

Status & tagging log