r64581 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r64580‎ | r64581 | r64582 >
Date:22:57, 3 April 2010
Author:siebrand
Status:deferred
Tags:
Comment:
stylize.php, trailing whitespace removed
Modified paths:
  • /trunk/extensions/AntiSpoof/AntiSpoof.i18n.php (modified) (history)
  • /trunk/extensions/AntiSpoof/AntiSpoof.php (modified) (history)
  • /trunk/extensions/AntiSpoof/AntiSpoof_body.php (modified) (history)
  • /trunk/extensions/AntiSpoof/SpoofUser.php (modified) (history)
  • /trunk/extensions/AntiSpoof/batchAntiSpoof.php (modified) (history)
  • /trunk/extensions/AntiSpoof/equivset.in (modified) (history)
  • /trunk/extensions/AntiSpoof/generateEquivset.php (modified) (history)
  • /trunk/extensions/AntiSpoof/sql/patch-antispoof.mysql.sql (modified) (history)
  • /trunk/extensions/AntiSpoof/sql/patch-antispoof.postgres.sql (modified) (history)

Diff [purge]

Index: trunk/extensions/AntiSpoof/sql/patch-antispoof.mysql.sql
@@ -3,19 +3,19 @@
44 -- Do a join against user_name to confirm that an account hasn't
55 -- been renamed or deleted away.
66 su_name VARCHAR(255),
7 -
 7+
88 -- Normalized form of name for similarity-spoofing checks
99 su_normalized VARCHAR(255),
10 -
 10+
1111 -- ok/not-ok according to the looks-like-a-valid-name check
1212 su_legal BOOL,
13 -
 13+
1414 -- error message that came out of the unicode check, if any
1515 su_error TEXT,
1616
1717 -- unique record per username
1818 PRIMARY KEY (su_name),
19 -
 19+
2020 -- for checking matching possible spoofs
2121 KEY(su_normalized, su_name)
2222 ) /*$wgDBTableOptions*/;
Index: trunk/extensions/AntiSpoof/sql/patch-antispoof.postgres.sql
@@ -14,4 +14,4 @@
1515 su_error text
1616 );
1717
18 -CREATE INDEX su_normname_idx ON spoofuser (su_normalized,su_name);
\ No newline at end of file
 18+CREATE INDEX su_normname_idx ON spoofuser (su_normalized,su_name);
Index: trunk/extensions/AntiSpoof/AntiSpoof_body.php
@@ -1,5 +1,4 @@
22 <?php
3 -
43 # AntiSpoof.php
54 # Username spoofing prevention for MediaWiki
65 # Version 0.04
@@ -30,9 +29,7 @@
3130 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
3231 # USA
3332
34 -
3533 class AntiSpoof {
36 -
3734 # Define script tag codes for various Unicode codepoint ranges
3835 # If it does not have a code here, it does not have a script assignment
3936 # NB: Braille is not in this list since it is a transliteration system, not a script;
@@ -43,93 +40,93 @@
4441 # are commented out: these are either not in modern use, or only used for specialized
4542 # religious purposes, or only of literary interest
4643 private static $script_ranges = array(
47 - array( 0x0020, 0x002F, "SCRIPT_ASCII_PUNCTUATION" ), # ASCII Punctuation 1, Hyphen, ASCII Punctuation 2
48 - array( 0x0030, 0x0039, "SCRIPT_ASCII_DIGITS" ), # ASCII Digits
49 - array( 0x003A, 0x0040, "SCRIPT_ASCII_PUNCTUATION" ), # Colon, ASCII Punctuation 3
50 - array( 0x0041, 0x005A, "SCRIPT_LATIN" ), # ASCII Uppercase
51 - array( 0x005B, 0x0060, "SCRIPT_ASCII_PUNCTUATION" ), # ASCII Punctuation 4, Underscore, ASCII Punctuation 5
52 - array( 0x0061, 0x007A, "SCRIPT_LATIN" ), # ASCII Lowercase
53 - array( 0x007B, 0x007E, "SCRIPT_ASCII_PUNCTUATION" ), # ASCII Punctuation 5
54 - array( 0x00B7, 0x00B7, "SCRIPT_LATIN" ), # Middle Dot
55 - array( 0x00C0, 0x00D6, "SCRIPT_LATIN" ), # Latin-1 Letters 1
56 - array( 0x00D8, 0x00F6, "SCRIPT_LATIN" ), # Latin-1 Letters 2
57 - array( 0x00F8, 0x02AF, "SCRIPT_LATIN" ), # Latin-1 Letters 3, Latin Extended-A, Latin Extended-B, IPA Extensions
58 - array( 0x0300, 0x036F, "SCRIPT_COMBINING_MARKS" ), # Combining Diacritical Marks
59 - array( 0x0370, 0x03E1, "SCRIPT_GREEK" ), # Greek and Coptic (Greek)
60 - array( 0x03E2, 0x03EF, "SCRIPT_COPTIC_EXTRAS" ), # Greek and Coptic (Coptic-unique)
61 - array( 0x03F0, 0x03FF, "SCRIPT_GREEK" ), # Greek and Coptic (Greek)
62 - array( 0x0400, 0x052F, "SCRIPT_CYRILLIC" ), # Cyrillic, Cyrillic Supplement
63 - array( 0x0530, 0x058F, "SCRIPT_ARMENIAN" ), # Armenian
64 - array( 0x0590, 0x05FF, "SCRIPT_HEBREW" ), # Hebrew
65 - array( 0x0600, 0x06FF, "SCRIPT_ARABIC" ), # Arabic
66 - array( 0x0700, 0x074F, "SCRIPT_SYRIAC" ), # Syriac
67 - array( 0x0750, 0x077F, "SCRIPT_ARABIC" ), # Arabic Supplement
68 - array( 0x0780, 0x07BF, "SCRIPT_THAANA" ), # Thaana
69 - array( 0x0900, 0x097F, "SCRIPT_DEVANAGARI" ), # Devanagari
70 - array( 0x0980, 0x09FF, "SCRIPT_BENGALI" ), # Bengali
71 - array( 0x0A00, 0x0A7F, "SCRIPT_GURMUKHI" ), # Gurmukhi
72 - array( 0x0A80, 0x0AFF, "SCRIPT_GUJARATI" ), # Gujarati
73 - array( 0x0B00, 0x0B7F, "SCRIPT_ORIYA" ), # Oriya
74 - array( 0x0B80, 0x0BFF, "SCRIPT_TAMIL" ), # Tamil
75 - array( 0x0C00, 0x0C7F, "SCRIPT_TELUGU" ), # Telugu
76 - array( 0x0C80, 0x0CFF, "SCRIPT_KANNADA" ), # Kannada
77 - array( 0x0D00, 0x0D7F, "SCRIPT_MALAYALAM" ), # Malayalam
78 - array( 0x0D80, 0x0DFF, "SCRIPT_SINHALA" ), # Sinhala
79 - array( 0x0E00, 0x0E7F, "SCRIPT_THAI" ), # Thai
80 - array( 0x0E80, 0x0EFF, "SCRIPT_LAO" ), # Lao
81 - array( 0x0F00, 0x0FFF, "SCRIPT_TIBETAN" ), # Tibetan
82 - array( 0x1000, 0x109F, "SCRIPT_MYANMAR" ), # Myanmar
83 - array( 0x10A0, 0x10FF, "SCRIPT_GEORGIAN" ), # Georgian
 44+ array( 0x0020, 0x002F, "SCRIPT_ASCII_PUNCTUATION" ), # ASCII Punctuation 1, Hyphen, ASCII Punctuation 2
 45+ array( 0x0030, 0x0039, "SCRIPT_ASCII_DIGITS" ), # ASCII Digits
 46+ array( 0x003A, 0x0040, "SCRIPT_ASCII_PUNCTUATION" ), # Colon, ASCII Punctuation 3
 47+ array( 0x0041, 0x005A, "SCRIPT_LATIN" ), # ASCII Uppercase
 48+ array( 0x005B, 0x0060, "SCRIPT_ASCII_PUNCTUATION" ), # ASCII Punctuation 4, Underscore, ASCII Punctuation 5
 49+ array( 0x0061, 0x007A, "SCRIPT_LATIN" ), # ASCII Lowercase
 50+ array( 0x007B, 0x007E, "SCRIPT_ASCII_PUNCTUATION" ), # ASCII Punctuation 5
 51+ array( 0x00B7, 0x00B7, "SCRIPT_LATIN" ), # Middle Dot
 52+ array( 0x00C0, 0x00D6, "SCRIPT_LATIN" ), # Latin-1 Letters 1
 53+ array( 0x00D8, 0x00F6, "SCRIPT_LATIN" ), # Latin-1 Letters 2
 54+ array( 0x00F8, 0x02AF, "SCRIPT_LATIN" ), # Latin-1 Letters 3, Latin Extended-A, Latin Extended-B, IPA Extensions
 55+ array( 0x0300, 0x036F, "SCRIPT_COMBINING_MARKS" ), # Combining Diacritical Marks
 56+ array( 0x0370, 0x03E1, "SCRIPT_GREEK" ), # Greek and Coptic (Greek)
 57+ array( 0x03E2, 0x03EF, "SCRIPT_COPTIC_EXTRAS" ), # Greek and Coptic (Coptic-unique)
 58+ array( 0x03F0, 0x03FF, "SCRIPT_GREEK" ), # Greek and Coptic (Greek)
 59+ array( 0x0400, 0x052F, "SCRIPT_CYRILLIC" ), # Cyrillic, Cyrillic Supplement
 60+ array( 0x0530, 0x058F, "SCRIPT_ARMENIAN" ), # Armenian
 61+ array( 0x0590, 0x05FF, "SCRIPT_HEBREW" ), # Hebrew
 62+ array( 0x0600, 0x06FF, "SCRIPT_ARABIC" ), # Arabic
 63+ array( 0x0700, 0x074F, "SCRIPT_SYRIAC" ), # Syriac
 64+ array( 0x0750, 0x077F, "SCRIPT_ARABIC" ), # Arabic Supplement
 65+ array( 0x0780, 0x07BF, "SCRIPT_THAANA" ), # Thaana
 66+ array( 0x0900, 0x097F, "SCRIPT_DEVANAGARI" ), # Devanagari
 67+ array( 0x0980, 0x09FF, "SCRIPT_BENGALI" ), # Bengali
 68+ array( 0x0A00, 0x0A7F, "SCRIPT_GURMUKHI" ), # Gurmukhi
 69+ array( 0x0A80, 0x0AFF, "SCRIPT_GUJARATI" ), # Gujarati
 70+ array( 0x0B00, 0x0B7F, "SCRIPT_ORIYA" ), # Oriya
 71+ array( 0x0B80, 0x0BFF, "SCRIPT_TAMIL" ), # Tamil
 72+ array( 0x0C00, 0x0C7F, "SCRIPT_TELUGU" ), # Telugu
 73+ array( 0x0C80, 0x0CFF, "SCRIPT_KANNADA" ), # Kannada
 74+ array( 0x0D00, 0x0D7F, "SCRIPT_MALAYALAM" ), # Malayalam
 75+ array( 0x0D80, 0x0DFF, "SCRIPT_SINHALA" ), # Sinhala
 76+ array( 0x0E00, 0x0E7F, "SCRIPT_THAI" ), # Thai
 77+ array( 0x0E80, 0x0EFF, "SCRIPT_LAO" ), # Lao
 78+ array( 0x0F00, 0x0FFF, "SCRIPT_TIBETAN" ), # Tibetan
 79+ array( 0x1000, 0x109F, "SCRIPT_MYANMAR" ), # Myanmar
 80+ array( 0x10A0, 0x10FF, "SCRIPT_GEORGIAN" ), # Georgian
8481 array( 0x1100, 0x11FF, "SCRIPT_HANGUL" ), # Hangul Jamo
85 - array( 0x1200, 0x139F, "SCRIPT_ETHIOPIC" ), # Ethiopic, Ethiopic Supplement
86 - array( 0x13A0, 0x13FF, "SCRIPT_CHEROKEE" ), # Cherokee
 82+ array( 0x1200, 0x139F, "SCRIPT_ETHIOPIC" ), # Ethiopic, Ethiopic Supplement
 83+ array( 0x13A0, 0x13FF, "SCRIPT_CHEROKEE" ), # Cherokee
8784 array( 0x1400, 0x167F, "SCRIPT_CANADIAN_ABORIGINAL" ), # Unified Canadian Aboriginal Syllabics
88 - # array( 0x1680, 0x169F, "SCRIPT_OGHAM" ), # Ogham
89 - # array( 0x16A0, 0x16FF, "SCRIPT_RUNIC" ), # Runic
90 - array( 0x1700, 0x171F, "SCRIPT_TAGALOG" ), # Tagalog
91 - array( 0x1720, 0x173F, "SCRIPT_HANUNOO" ), # Hanunoo
92 - array( 0x1740, 0x175F, "SCRIPT_BUHID" ), # Buhid
93 - array( 0x1760, 0x177F, "SCRIPT_TAGBANWA" ), # Tagbanwa
94 - array( 0x1780, 0x17FF, "SCRIPT_KHMER" ), # Khmer
95 - array( 0x1800, 0x18AF, "SCRIPT_MONGOLIAN" ), # Mongolian
96 - array( 0x1900, 0x194F, "SCRIPT_LIMBU" ), # Limbu
97 - array( 0x1950, 0x197F, "SCRIPT_TAI_LE" ), # Tai Le
98 - array( 0x1980, 0x19DF, "SCRIPT_NEW_TAI_LUE" ), # New Tai Lue
99 - array( 0x1A00, 0x1A1F, "SCRIPT_BUGINESE" ), # Buginese
100 - array( 0x1E00, 0x1EFF, "SCRIPT_LATIN" ), # Latin Extended Additional
101 - array( 0x1F00, 0x1FFF, "SCRIPT_GREEK" ), # Greek Extended
102 - # array( 0x2C00, 0x2C5F, "SCRIPT_GLAGOLITIC" ), # Glagolitic
103 - array( 0x2C80, 0x2CFF, "SCRIPT_COPTIC" ), # Coptic
104 - array( 0x2D00, 0x2D2F, "SCRIPT_GEORGIAN" ), # Georgian Supplement
105 - array( 0x2D30, 0x2D7F, "SCRIPT_TIFINAGH" ), # Tifinagh
106 - array( 0x2D80, 0x2DDF, "SCRIPT_ETHIOPIC" ), # Ethiopic Extended
107 - array( 0x2E80, 0x2FDF, "SCRIPT_DEPRECATED" ), # CJK Radicals Supplement, Kangxi Radicals
108 - array( 0x3040, 0x309F, "SCRIPT_HIRAGANA" ), # Hiragana
109 - array( 0x30A0, 0x30FF, "SCRIPT_KATAKANA" ), # Katakana
110 - array( 0x3100, 0x312F, "SCRIPT_BOPOMOFO" ), # Bopomofo
 85+ # array( 0x1680, 0x169F, "SCRIPT_OGHAM" ), # Ogham
 86+ # array( 0x16A0, 0x16FF, "SCRIPT_RUNIC" ), # Runic
 87+ array( 0x1700, 0x171F, "SCRIPT_TAGALOG" ), # Tagalog
 88+ array( 0x1720, 0x173F, "SCRIPT_HANUNOO" ), # Hanunoo
 89+ array( 0x1740, 0x175F, "SCRIPT_BUHID" ), # Buhid
 90+ array( 0x1760, 0x177F, "SCRIPT_TAGBANWA" ), # Tagbanwa
 91+ array( 0x1780, 0x17FF, "SCRIPT_KHMER" ), # Khmer
 92+ array( 0x1800, 0x18AF, "SCRIPT_MONGOLIAN" ), # Mongolian
 93+ array( 0x1900, 0x194F, "SCRIPT_LIMBU" ), # Limbu
 94+ array( 0x1950, 0x197F, "SCRIPT_TAI_LE" ), # Tai Le
 95+ array( 0x1980, 0x19DF, "SCRIPT_NEW_TAI_LUE" ), # New Tai Lue
 96+ array( 0x1A00, 0x1A1F, "SCRIPT_BUGINESE" ), # Buginese
 97+ array( 0x1E00, 0x1EFF, "SCRIPT_LATIN" ), # Latin Extended Additional
 98+ array( 0x1F00, 0x1FFF, "SCRIPT_GREEK" ), # Greek Extended
 99+ # array( 0x2C00, 0x2C5F, "SCRIPT_GLAGOLITIC" ), # Glagolitic
 100+ array( 0x2C80, 0x2CFF, "SCRIPT_COPTIC" ), # Coptic
 101+ array( 0x2D00, 0x2D2F, "SCRIPT_GEORGIAN" ), # Georgian Supplement
 102+ array( 0x2D30, 0x2D7F, "SCRIPT_TIFINAGH" ), # Tifinagh
 103+ array( 0x2D80, 0x2DDF, "SCRIPT_ETHIOPIC" ), # Ethiopic Extended
 104+ array( 0x2E80, 0x2FDF, "SCRIPT_DEPRECATED" ), # CJK Radicals Supplement, Kangxi Radicals
 105+ array( 0x3040, 0x309F, "SCRIPT_HIRAGANA" ), # Hiragana
 106+ array( 0x30A0, 0x30FF, "SCRIPT_KATAKANA" ), # Katakana
 107+ array( 0x3100, 0x312F, "SCRIPT_BOPOMOFO" ), # Bopomofo
111108 array( 0x3130, 0x318F, "SCRIPT_HANGUL" ), # Hangul Compatibility Jamo
112 - array( 0x31A0, 0x31BF, "SCRIPT_BOPOMOFO" ), # Bopomofo Extended
113 - array( 0x3400, 0x4DBF, "SCRIPT_HAN" ), # CJK Unified Ideographs Extension A
114 - array( 0x4E00, 0x9FFF, "SCRIPT_HAN" ), # CJK Unified Ideographs
115 - array( 0xA000, 0xA4CF, "SCRIPT_YI" ), # Yi Syllables, Yi Radicals
116 - array( 0xA800, 0xA82F, "SCRIPT_SYLOTI_NAGRI" ), # Syloti Nagri
117 - array( 0xAC00, 0xD7AF, "SCRIPT_HANGUL" ), # Hangul Syllables
118 - array( 0xF900, 0xFAFF, "SCRIPT_DEPRECATED" ), # CJK Compatibility Ideographs
119 - # array( 0x10000, 0x100FF, "SCRIPT_LINEAR_B" ), # Linear B Syllabary, Linear B Ideograms
120 - # array( 0x10140, 0x1018F, "SCRIPT_GREEK" ), # Ancient Greek Numbers
121 - # array( 0x10300, 0x1032F, "SCRIPT_OLD_ITALIC" ), # Old Italic
122 - array( 0x10330, 0x1034F, "SCRIPT_GOTHIC" ), # Gothic
123 - # array( 0x10380, 0x1039F, "SCRIPT_UGARITIC" ), # Ugaritic
124 - # array( 0x103A0, 0x103DF, "SCRIPT_OLD_PERSIAN" ), # Old Persian
125 - # array( 0x10400, 0x1044F, "SCRIPT_DESERET" ), # Deseret
126 - # array( 0x10450, 0x1047F, "SCRIPT_SHAVIAN" ), # Shavian
127 - # array( 0x10480, 0x104AF, "SCRIPT_OSMANYA" ), # Osmanya
128 - # array( 0x10800, 0x1083F, "SCRIPT_CYPRIOT" ), # Cypriot Syllabary
129 - array( 0x10A00, 0x10A5F, "SCRIPT_KHAROSHTHI" ), # Kharoshthi
130 - array( 0x20000, 0x2A6DF, "SCRIPT_HAN" ), # CJK Unified Ideographs Extension B
131 - array( 0x2F800, 0x2FA1F, "SCRIPT_DEPRECATED" ) # CJK Compatibility Ideographs Supplement
 109+ array( 0x31A0, 0x31BF, "SCRIPT_BOPOMOFO" ), # Bopomofo Extended
 110+ array( 0x3400, 0x4DBF, "SCRIPT_HAN" ), # CJK Unified Ideographs Extension A
 111+ array( 0x4E00, 0x9FFF, "SCRIPT_HAN" ), # CJK Unified Ideographs
 112+ array( 0xA000, 0xA4CF, "SCRIPT_YI" ), # Yi Syllables, Yi Radicals
 113+ array( 0xA800, 0xA82F, "SCRIPT_SYLOTI_NAGRI" ), # Syloti Nagri
 114+ array( 0xAC00, 0xD7AF, "SCRIPT_HANGUL" ), # Hangul Syllables
 115+ array( 0xF900, 0xFAFF, "SCRIPT_DEPRECATED" ), # CJK Compatibility Ideographs
 116+ # array( 0x10000, 0x100FF, "SCRIPT_LINEAR_B" ), # Linear B Syllabary, Linear B Ideograms
 117+ # array( 0x10140, 0x1018F, "SCRIPT_GREEK" ), # Ancient Greek Numbers
 118+ # array( 0x10300, 0x1032F, "SCRIPT_OLD_ITALIC" ), # Old Italic
 119+ array( 0x10330, 0x1034F, "SCRIPT_GOTHIC" ), # Gothic
 120+ # array( 0x10380, 0x1039F, "SCRIPT_UGARITIC" ), # Ugaritic
 121+ # array( 0x103A0, 0x103DF, "SCRIPT_OLD_PERSIAN" ), # Old Persian
 122+ # array( 0x10400, 0x1044F, "SCRIPT_DESERET" ), # Deseret
 123+ # array( 0x10450, 0x1047F, "SCRIPT_SHAVIAN" ), # Shavian
 124+ # array( 0x10480, 0x104AF, "SCRIPT_OSMANYA" ), # Osmanya
 125+ # array( 0x10800, 0x1083F, "SCRIPT_CYPRIOT" ), # Cypriot Syllabary
 126+ array( 0x10A00, 0x10A5F, "SCRIPT_KHAROSHTHI" ), # Kharoshthi
 127+ array( 0x20000, 0x2A6DF, "SCRIPT_HAN" ), # CJK Unified Ideographs Extension B
 128+ array( 0x2F800, 0x2FA1F, "SCRIPT_DEPRECATED" ) # CJK Compatibility Ideographs Supplement
132129 );
133 -
 130+
134131 # Specially naughty characters we don't ever want to see...
135132 private static $character_blacklist = array(
136133 0x0337,
@@ -143,21 +140,21 @@
144141 0x2AFD,
145142 0xFF0F
146143 );
147 -
 144+
148145 # Equivalence sets
149146 private static $equivset = null;
150147
151148 static function initEquivSet() {
152149 if ( is_null( self::$equivset ) ) {
153 - self::$equivset = unserialize( file_get_contents(
 150+ self::$equivset = unserialize( file_get_contents(
154151 dirname( __FILE__ ) . '/equivset.ser' ) );
155152 }
156153 }
157154
158155 private static function getScriptCode( $ch ) {
159156 # Linear search: binary chop would be faster...
160 - foreach( self::$script_ranges as $range ) {
161 - if( $ch >= $range[0] && $ch <= $range[1] ) {
 157+ foreach ( self::$script_ranges as $range ) {
 158+ if ( $ch >= $range[0] && $ch <= $range[1] ) {
162159 return $range[2];
163160 }
164161 }
@@ -170,19 +167,19 @@
171168 private static function getScriptTag( $name ) {
172169 $name = "SCRIPT_" . strtoupper( trim( $name ) );
173170 # Linear search
174 - foreach( self::$script_ranges as $range ) {
175 - if( $name == $range[2] ) {
 171+ foreach ( self::$script_ranges as $range ) {
 172+ if ( $name == $range[2] ) {
176173 return $range[2];
177174 }
178175 }
179176 # Otherwise...
180177 return null;
181178 }
182 -
 179+
183180 private static function isSubsetOf( $aList, $bList ) {
184181 return count( array_diff( $aList, $bList ) ) == 0;
185182 }
186 -
 183+
187184 # Is this an allowed script mixture?
188185 private static function isAllowedScriptCombination( $scriptList ) {
189186 $allowedScriptCombinations = array(
@@ -192,14 +189,14 @@
193190 array( "SCRIPT_HAN", "SCRIPT_HANGUL" ), # Korean
194191 array( "SCRIPT_HAN", "SCRIPT_KATAKANA", "SCRIPT_HIRAGANA" ) # Japanese
195192 );
196 - foreach( $allowedScriptCombinations as $allowedCombo ) {
197 - if( self::isSubsetOf( $scriptList, $allowedCombo ) ) {
 193+ foreach ( $allowedScriptCombinations as $allowedCombo ) {
 194+ if ( self::isSubsetOf( $scriptList, $allowedCombo ) ) {
198195 return true;
199196 }
200197 }
201198 return false;
202199 }
203 -
 200+
204201 /**
205202 * Convert string into array of Unicode code points as integers
206203 */
@@ -213,24 +210,24 @@
214211 }
215212 return $out;
216213 }
217 -
 214+
218215 public static function listToString( $list ) {
219216 $out = '';
220 - foreach( $list as $cp ) {
 217+ foreach ( $list as $cp ) {
221218 $out .= codepointToUtf8( $cp );
222219 }
223220 return $out;
224221 }
225 -
 222+
226223 private static function hardjoin( $a_list ) {
227224 return implode( '', $a_list );
228225 }
229 -
 226+
230227 public static function equivString( $testName ) {
231228 $out = array();
232229 self::initEquivSet();
233 - foreach( $testName as $codepoint ) {
234 - if( isset( self::$equivset[$codepoint] ) ) {
 230+ foreach ( $testName as $codepoint ) {
 231+ if ( isset( self::$equivset[$codepoint] ) ) {
235232 $out[] = self::$equivset[$codepoint];
236233 } else {
237234 $out[] = $codepoint;
@@ -238,11 +235,11 @@
239236 }
240237 return $out;
241238 }
242 -
 239+
243240 private static function mergePairs( $text, $pair, $result ) {
244241 $out = array();
245 - for( $i = 0; $i < count( $text ); $i++ ) {
246 - if( $text[$i] == $pair[0] && @$text[$i+1] == $pair[1] ) {
 242+ for ( $i = 0; $i < count( $text ); $i++ ) {
 243+ if ( $text[$i] == $pair[0] && @$text[$i + 1] == $pair[1] ) {
247244 $out[] = $result[0];
248245 $i++;
249246 } else {
@@ -251,75 +248,75 @@
252249 }
253250 return $out;
254251 }
255 -
 252+
256253 private static function stripScript( $text, $script ) {
257254 $scripts = array_map( array( 'AntiSpoof', 'getScriptCode' ), $text );
258255 $out = array();
259 - foreach( $text as $index => $char ) {
260 - if( $scripts[$index] !== $script ) {
 256+ foreach ( $text as $index => $char ) {
 257+ if ( $scripts[$index] !== $script ) {
261258 $out[] = $char;
262259 }
263260 }
264261 return $out;
265262 }
266 -
 263+
267264 # TODO: does too much in one routine, refactor...
268265 public static function checkUnicodeString( $testName ) {
269266 wfLoadExtensionMessages( 'AntiSpoof' );
270267 # Start with some sanity checking
271 - if( !is_string( $testName ) ) {
272 - return array( "ERROR", wfMsg('antispoof-badtype') );
 268+ if ( !is_string( $testName ) ) {
 269+ return array( "ERROR", wfMsg( 'antispoof-badtype' ) );
273270 }
274 -
275 - if( strlen( $testName ) == 0 ) {
276 - return array("ERROR", wfMsg('antispoof-empty') );
 271+
 272+ if ( strlen( $testName ) == 0 ) {
 273+ return array( "ERROR", wfMsg( 'antispoof-empty' ) );
277274 }
278 -
279 - if( array_intersect( self::stringToList( $testName ), self::$character_blacklist ) ) {
280 - return array( "ERROR", wfMsg('antispoof-blacklisted') );
 275+
 276+ if ( array_intersect( self::stringToList( $testName ), self::$character_blacklist ) ) {
 277+ return array( "ERROR", wfMsg( 'antispoof-blacklisted' ) );
281278 }
282 -
 279+
283280 # Perform Unicode _compatibility_ decomposition
284281 $testName = UtfNormal::toNFKD( $testName );
285282 $testChars = self::stringToList( $testName );
286 -
 283+
287284 # Be paranoid: check again, just in case Unicode normalization code changes...
288 - if( array_intersect( $testChars, self::$character_blacklist ) ) {
289 - return array( "ERROR", wfMsg('antispoof-blacklisted') );
 285+ if ( array_intersect( $testChars, self::$character_blacklist ) ) {
 286+ return array( "ERROR", wfMsg( 'antispoof-blacklisted' ) );
290287 }
291 -
 288+
292289 # Check for this: should not happen in any valid Unicode string
293 - if( self::getScriptCode( $testChars[0] ) == "SCRIPT_COMBINING_MARKS" ) {
294 - return array( "ERROR", wfMsg('antispoof-combining') );
 290+ if ( self::getScriptCode( $testChars[0] ) == "SCRIPT_COMBINING_MARKS" ) {
 291+ return array( "ERROR", wfMsg( 'antispoof-combining' ) );
295292 }
296 -
 293+
297294 # Strip all combining characters in order to crudely strip accents
298295 # Note: NFKD normalization should have decomposed all accented chars earlier
299296 $testChars = self::stripScript( $testChars, "SCRIPT_COMBINING_MARKS" );
300 -
 297+
301298 $testScripts = array_unique( array_map( array( 'AntiSpoof', 'getScriptCode' ), $testChars ) );
302 - if( in_array( "SCRIPT_UNASSIGNED", $testScripts ) || in_array( "SCRIPT_DEPRECATED", $testScripts ) ) {
303 - return array( "ERROR", wfMsg('antispoof-unassigned') );
 299+ if ( in_array( "SCRIPT_UNASSIGNED", $testScripts ) || in_array( "SCRIPT_DEPRECATED", $testScripts ) ) {
 300+ return array( "ERROR", wfMsg( 'antispoof-unassigned' ) );
304301 }
305 -
 302+
306303 # We don't mind ASCII punctuation or digits
307304 $testScripts = array_diff( $testScripts,
308305 array( "SCRIPT_ASCII_PUNCTUATION", "SCRIPT_ASCII_DIGITS" ) );
309 -
310 - if( !$testScripts ) {
311 - return array( "ERROR", wfMsg('antispoof-noletters') );
 306+
 307+ if ( !$testScripts ) {
 308+ return array( "ERROR", wfMsg( 'antispoof-noletters' ) );
312309 }
313 -
314 - if( count( $testScripts ) > 1 && !self::isAllowedScriptCombination( $testScripts ) ) {
315 - return array( "ERROR", wfMsg('antispoof-mixedscripts') );
 310+
 311+ if ( count( $testScripts ) > 1 && !self::isAllowedScriptCombination( $testScripts ) ) {
 312+ return array( "ERROR", wfMsg( 'antispoof-mixedscripts' ) );
316313 }
317 -
 314+
318315 # At this point, we should probably check for BiDi violations if they aren't
319316 # caught above...
320 -
 317+
321318 # Replace characters in confusables set with equivalence chars
322319 $testChars = self::equivString( $testChars );
323 -
 320+
324321 # Do very simple sequence processing: "vv" -> "w", "rn" -> "m"...
325322 # Not exhaustive, but ups the ante...
326323 # Do this _after_ canonicalization: looks weird, but needed for consistency
@@ -329,30 +326,29 @@
330327 $testChars = self::mergePairs( $testChars,
331328 self::equivString( self::stringToList( "RN" ) ),
332329 self::equivString( self::stringToList( "M" ) ) );
333 -
 330+
334331 # Squeeze out all punctuation chars
335332 # TODO: almost the same code occurs twice, refactor into own routine
336333 $testChars = self::stripScript( $testChars, "SCRIPT_ASCII_PUNCTUATION" );
337 -
 334+
338335 $testName = self::listToString( $testChars );
339 -
 336+
340337 # Remove all remaining spaces, just in case any have snuck through...
341338 $testName = self::hardjoin( explode( " ", $testName ) );
342 -
 339+
343340 # Reduce repeated char sequences to single character
344341 # BUG: TODO: implement this
345 -
346 - if( strlen( $testName ) < 1 ) {
347 - return array("ERROR", wfMsg('antispoof-tooshort') );
 342+
 343+ if ( strlen( $testName ) < 1 ) {
 344+ return array( "ERROR", wfMsg( 'antispoof-tooshort' ) );
348345 }
349 -
 346+
350347 # Don't ASCIIfy: we assume we are UTF-8 capable on output
351 -
 348+
352349 # Prepend version string, for futureproofing if this algorithm changes
353350 $testName = "v2:" . $testName;
354 -
 351+
355352 # And return the canonical version of the name
356353 return array( "OK", $testName );
357354 }
358 -
359355 }
Index: trunk/extensions/AntiSpoof/AntiSpoof.i18n.php
@@ -3,7 +3,7 @@
44 * Internationalisation file for extension AntiSpoof.
55 *
66 * @addtogroup Extensions
7 -*/
 7+ */
88
99 $messages = array();
1010
@@ -956,7 +956,7 @@
957957 'antispoof-desc' => 'Menggak nggawé akun utawa rékening mawa jeneng panganggo aksara campuran, mbingungaké lan sing mèmper',
958958 'antispoof-conflict-top' => 'Jeneng "$1" mèmper banget karo {{PLURAL:$2|akun sing wis ana|$2 akun iki}}:',
959959 'antispoof-conflict-bottom' => 'Mangga milih jeneng liya',
960 - 'antispoof-name-illegal' => 'Jeneng "$1" ora diidinaké supaya wong ora bingung utawa menggak ngapi-api jeneng panganggo sing wis ana: $2.
 960+ 'antispoof-name-illegal' => 'Jeneng "$1" ora diidinaké supaya wong ora bingung utawa menggak ngapi-api jeneng panganggo sing wis ana: $2.
961961 Mangga pilihen jeneng liya.',
962962 'antispoof-badtype' => 'Tipe data salah',
963963 'antispoof-empty' => 'Data kosong',
@@ -1331,7 +1331,7 @@
13321332 'antispoof-desc' => 'Blokkerer for oppretting av konti med liknande eller forvirrande brukarnamn, eller brukarnamn som inneheld forskjellige alfabettypar',
13331333 'antispoof-conflict-top' => 'Namnet «$1» er for likt følgjande {{PLURAL:$2|konto|kontoar}}:',
13341334 'antispoof-conflict-bottom' => 'Vel eit anna namn.',
1335 - 'antispoof-name-illegal' => 'Namnet «$1» er ikkje tillate for å hindra samanblanding: $2.
 1335+ 'antispoof-name-illegal' => 'Namnet «$1» er ikkje tillate for å hindra samanblanding: $2.
13361336 Ver venleg og vel eit anna namn.',
13371337 'antispoof-badtype' => 'Ugyldig datatype',
13381338 'antispoof-empty' => 'Tom streng',
@@ -1502,7 +1502,7 @@
15031503 'antispoof-desc' => 'Impede a criação de contas com escrita mista, e nomes de utilizador confusos e semelhantes',
15041504 'antispoof-conflict-top' => 'O nome "$1" é demasiado semelhante {{PLURAL:$2|ao da seguinte conta já existente|aos das seguintes $2 contas}}',
15051505 'antispoof-conflict-bottom' => 'Por favor, escolha outro nome.',
1506 - 'antispoof-name-illegal' => 'O nome "$1" não é permitido para prevenir que seja confundido com outro (ou que seja feito algum trocadilho): já existe $2.
 1506+ 'antispoof-name-illegal' => 'O nome "$1" não é permitido para prevenir que seja confundido com outro (ou que seja feito algum trocadilho): já existe $2.
15071507 Por favor, escolha outro nome.',
15081508 'antispoof-badtype' => 'Formato de dados incorreto',
15091509 'antispoof-empty' => 'Linha vazia',
Index: trunk/extensions/AntiSpoof/generateEquivset.php
@@ -1,7 +1,7 @@
22 <?php
33
4 -require_once ( getenv('MW_INSTALL_PATH') !== false
5 - ? getenv('MW_INSTALL_PATH')."/maintenance/commandLine.inc"
 4+require_once ( getenv( 'MW_INSTALL_PATH' ) !== false
 5+ ? getenv( 'MW_INSTALL_PATH' ) . "/maintenance/commandLine.inc"
66 : dirname( __FILE__ ) . '/../../maintenance/commandLine.inc' );
77
88 $dir = dirname( __FILE__ );
Index: trunk/extensions/AntiSpoof/AntiSpoof.php
@@ -1,4 +1,7 @@
22 <?php
 3+if ( !defined( 'MEDIAWIKI' ) ) {
 4+ exit( 1 );
 5+}
36
47 $wgExtensionCredits['other'][] = array(
58 'path' => __FILE__,
@@ -56,10 +59,10 @@
5760 global $wgAntiSpoofAccounts, $wgUser, $wgRequest;
5861 wfLoadExtensionMessages( 'AntiSpoof' );
5962
60 - if( !$wgAntiSpoofAccounts ) {
 63+ if ( !$wgAntiSpoofAccounts ) {
6164 $mode = 'LOGGING ';
6265 $active = false;
63 - } elseif( $wgRequest->getCheck('wpIgnoreAntiSpoof') &&
 66+ } elseif ( $wgRequest->getCheck( 'wpIgnoreAntiSpoof' ) &&
6467 $wgUser->isAllowed( 'override-antispoof' ) ) {
6568 $mode = 'OVERRIDE ';
6669 $active = false;
@@ -70,18 +73,18 @@
7174
7275 $name = $user->getName();
7376 $spoof = new SpoofUser( $name );
74 - if( $spoof->isLegal() ) {
 77+ if ( $spoof->isLegal() ) {
7578 $normalized = $spoof->getNormalized();
7679 $conflicts = $spoof->getConflicts();
77 - if( empty($conflicts) ) {
 80+ if ( empty( $conflicts ) ) {
7881 wfDebugLog( 'antispoof', "{$mode}PASS new account '$name' [$normalized]" );
7982 } else {
8083 wfDebugLog( 'antispoof', "{$mode}CONFLICT new account '$name' [$normalized] spoofs " . implode( ',', $conflicts ) );
81 - if( $active ) {
 84+ if ( $active ) {
8285 $numConflicts = count( $conflicts );
83 - $message = wfMsgExt( 'antispoof-conflict-top', array('parsemag'), htmlspecialchars( $name ), $numConflicts );
 86+ $message = wfMsgExt( 'antispoof-conflict-top', array( 'parsemag' ), htmlspecialchars( $name ), $numConflicts );
8487 $message .= '<ul>';
85 - foreach( $conflicts as $simUser ) {
 88+ foreach ( $conflicts as $simUser ) {
8689 $message .= '<li>' . wfMsg( 'antispoof-conflict-item', $simUser ) . '</li>';
8790 }
8891 $message .= '</ul>' . wfMsg( 'antispoof-conflict-bottom' );
@@ -91,7 +94,7 @@
9295 } else {
9396 $error = $spoof->getError();
9497 wfDebugLog( 'antispoof', "{$mode}ILLEGAL new account '$name' $error" );
95 - if( $active ) {
 98+ if ( $active ) {
9699 $message = wfMsg( 'antispoof-name-illegal', $name, $error );
97100 return false;
98101 }
@@ -107,9 +110,9 @@
108111
109112 wfLoadExtensionMessages( 'AntiSpoof' );
110113
111 - if( $wgAntiSpoofAccounts && $wgUser->isAllowed( 'override-antispoof' ) )
 114+ if ( $wgAntiSpoofAccounts && $wgUser->isAllowed( 'override-antispoof' ) )
112115 $template->addInputItem( 'wpIgnoreAntiSpoof',
113 - $wgRequest->getCheck('wpIgnoreAntiSpoof'),
 116+ $wgRequest->getCheck( 'wpIgnoreAntiSpoof' ),
114117 'checkbox', 'antispoof-ignore' );
115118 return true;
116119 }
Index: trunk/extensions/AntiSpoof/batchAntiSpoof.php
@@ -1,5 +1,4 @@
22 <?php
3 -
43 // Go through all usernames and calculate and record spoof thingies
54
65 $base = dirname( dirname( dirname( __FILE__ ) ) );
@@ -13,14 +12,14 @@
1413
1514 $result = $dbw->select( 'user', 'user_name', null, 'batchAntiSpoof.php' );
1615 $n = 0;
17 -while( $row = $dbw->fetchObject( $result ) ) {
18 - if( $n++ % $batchSize == 0 ) {
 16+while ( $row = $dbw->fetchObject( $result ) ) {
 17+ if ( $n++ % $batchSize == 0 ) {
1918 echo "$wgDBname $n\n";
2019 }
2120
2221 $items[] = new SpoofUser( $row->user_name );
2322
24 - if( $n % $batchSize == 0 ) {
 23+ if ( $n % $batchSize == 0 ) {
2524 SpoofUser::batchRecord( $items );
2625 $items = array();
2726 }
@@ -29,4 +28,3 @@
3029 SpoofUser::batchRecord( $items );
3130 echo "$wgDBname $n done.\n";
3231 $dbw->freeResult( $result );
33 -
Index: trunk/extensions/AntiSpoof/equivset.in
@@ -1,37 +1,36 @@
2 -# There is a publically editable copy of this file at
 2+# There is a publically editable copy of this file at
33 # http://www.mediawiki.org/wiki/AntiSpoof/Equivalence_sets
44
5 -
65 # This is the input file for generateEquivset.php
76 # The format is:
87 #
98 # <hexadecimal codepoint> <character> => [<hexadecimal codepoint>] <character>
109 #
11 -# If the codepoint is given, it must match the character, or else a warning
 10+# If the codepoint is given, it must match the character, or else a warning
1211 # will be issued and the line will be ignored.
1312 #
1413 # The effect of such a line is to conflate the two identified character, i.e.
15 -# to put them in the same set. If two sets share a member, then they will be
 14+# to put them in the same set. If two sets share a member, then they will be
1615 # merged into a single larger set.
1716 #
1817 # We have attempted to include the following types of equivalence:
19 -# * Case folding. Although letters of different cases are often visually
 18+# * Case folding. Although letters of different cases are often visually
2019 # distinct, they can easily be confused by people who are familiar with
21 -# the alphabet. Two words with a different case may be read as the same
 20+# the alphabet. Two words with a different case may be read as the same
2221 # word. This is a popular technique for impersonation.
2322 #
2423 # * Visually similar characters. Cross-script pairs are included, but these
25 -# tend to produce false conflations within scripts, and so should be
 24+# tend to produce false conflations within scripts, and so should be
2625 # avoided. The software implements a blanket restriction against cross-
27 -# script strings, which makes cross-script pairs mostly redundant.
 26+# script strings, which makes cross-script pairs mostly redundant.
2827 #
29 -# * Chinese Simplified/Traditional pairs.
 28+# * Chinese Simplified/Traditional pairs.
3029 #
3130 # The list is based on one by Neil Harris, which was derived by unknown methods.
3231 # That list also contained transliteration pairs, which we considered excessive
33 -# and have attempted to remove. For example, the latin E and H were considered
34 -# equivalent, because the latin transliteration of the cyrillic "Н" (which
35 -# looks like latin H) is "E".
 32+# and have attempted to remove. For example, the latin E and H were considered
 33+# equivalent, because the latin transliteration of the cyrillic "Н" (which
 34+# looks like latin H) is "E".
3635
3736 49 I => 31 1
3837 4C L => 31 1
@@ -5183,4 +5182,3 @@
51845183 FFDA ᅳ => 3161 ㅡ
51855184 FFDB ᅴ => 3162 ㅢ
51865185 FFDC ᅵ => 3163 ㅣ
5187 -
Index: trunk/extensions/AntiSpoof/SpoofUser.php
@@ -5,7 +5,7 @@
66 $this->mName = strval( $name );
77 list( $ok, $normalized ) = AntiSpoof::checkUnicodeString( $this->mName );
88 $this->mLegal = ( $ok == 'OK' );
9 - if( $this->mLegal ) {
 9+ if ( $this->mLegal ) {
1010 $this->mNormalized = $normalized;
1111 $this->mError = null;
1212 } else {
@@ -59,7 +59,7 @@
6060 ) );
6161
6262 $spoofs = array();
63 - while( $row = $dbr->fetchObject( $spoofedUsers ) ) {
 63+ while ( $row = $dbr->fetchObject( $spoofedUsers ) ) {
6464 array_push( $spoofs, $row->user_name );
6565 }
6666 return $spoofs;
@@ -87,9 +87,9 @@
8888 * @param $items array of SpoofUser
8989 */
9090 public function batchRecord( $items ) {
91 - if( count( $items ) ) {
 91+ if ( count( $items ) ) {
9292 $fields = array();
93 - foreach( $items as $item ) {
 93+ foreach ( $items as $item ) {
9494 $fields[] = $item->insertFields();
9595 }
9696 $dbw = wfGetDB( DB_MASTER );

Status & tagging log