Index: trunk/extensions/AntiSpoof/AntiSpoof_body.php |
— | — | @@ -306,6 +306,29 @@ |
307 | 307 | return $out; |
308 | 308 | } |
309 | 309 | |
| 310 | + /* |
| 311 | + * Helper function for checkUnicodeString: Return an error on a bad character. |
| 312 | + * TODO: I would like to show Unicode character name, but it is not clear how to get it. |
| 313 | + * @param $msgId -- string, message identifier. |
| 314 | + * @param $point -- number, codepoint of the bad character. |
| 315 | + * @return Formatted error message. |
| 316 | + */ |
| 317 | + private static function badCharErr( $msgId, $point ) { |
| 318 | + $symbol = codepointToUtf8( $point ); |
| 319 | + // Combining marks are combined with the previous character. If abusing character is a |
| 320 | + // combining mark, prepend it with space to show them correctly. |
| 321 | + if ( self::getScriptCode( $point ) == "SCRIPT_COMBINING_MARKS" ) { |
| 322 | + $symbol = ' ' . $symbol; |
| 323 | + } |
| 324 | + $code = sprintf( 'U+%04X', $point ); |
| 325 | + if ( preg_match( '/\A\p{C}\z/u', $symbol ) ) { |
| 326 | + $char = wfMsg( 'antispoof-bad-char-non-printable', $code ); |
| 327 | + } else { |
| 328 | + $char = wfMsg( 'antispoof-bad-char', $symbol, $code ); |
| 329 | + } |
| 330 | + return array( "ERROR", wfMsg( $msgId, $char ) ); |
| 331 | + } |
| 332 | + |
310 | 333 | /** |
311 | 334 | * TODO: does too much in one routine, refactor... |
312 | 335 | * @param $testName |
— | — | @@ -321,8 +344,10 @@ |
322 | 345 | return array( "ERROR", wfMsg( 'antispoof-empty' ) ); |
323 | 346 | } |
324 | 347 | |
325 | | - if ( array_intersect( self::stringToList( $testName ), self::$character_blacklist ) ) { |
326 | | - return array( "ERROR", wfMsg( 'antispoof-blacklisted' ) ); |
| 348 | + foreach ( self::stringToList( $testName ) as $char ) { |
| 349 | + if ( in_array( $char, self::$character_blacklist ) ) { |
| 350 | + return self::badCharErr( 'antispoof-blacklisted', $char ); |
| 351 | + } |
327 | 352 | } |
328 | 353 | |
329 | 354 | # Perform Unicode _compatibility_ decomposition |
— | — | @@ -330,23 +355,31 @@ |
331 | 356 | $testChars = self::stringToList( $testName ); |
332 | 357 | |
333 | 358 | # Be paranoid: check again, just in case Unicode normalization code changes... |
334 | | - if ( array_intersect( $testChars, self::$character_blacklist ) ) { |
335 | | - return array( "ERROR", wfMsg( 'antispoof-blacklisted' ) ); |
| 359 | + foreach ( $testChars as $char ) { |
| 360 | + if ( in_array( $char, self::$character_blacklist ) ) { |
| 361 | + return self::badCharErr( 'antispoof-blacklisted', $char ); |
| 362 | + } |
336 | 363 | } |
337 | 364 | |
338 | 365 | # Check for this: should not happen in any valid Unicode string |
339 | 366 | if ( self::getScriptCode( $testChars[0] ) == "SCRIPT_COMBINING_MARKS" ) { |
340 | | - return array( "ERROR", wfMsg( 'antispoof-combining' ) ); |
| 367 | + return self::badCharErr( 'antispoof-combining', $testChars[0] ); |
341 | 368 | } |
342 | 369 | |
343 | 370 | # Strip all combining characters in order to crudely strip accents |
344 | 371 | # Note: NFKD normalization should have decomposed all accented chars earlier |
345 | 372 | $testChars = self::stripScript( $testChars, "SCRIPT_COMBINING_MARKS" ); |
346 | 373 | |
347 | | - $testScripts = array_unique( array_map( array( 'AntiSpoof', 'getScriptCode' ), $testChars ) ); |
348 | | - if ( in_array( "SCRIPT_UNASSIGNED", $testScripts ) || in_array( "SCRIPT_DEPRECATED", $testScripts ) ) { |
349 | | - return array( "ERROR", wfMsg( 'antispoof-unassigned' ) ); |
| 374 | + $testScripts = array_map( array( 'AntiSpoof', 'getScriptCode' ), $testChars ); |
| 375 | + $unassigned = array_search( "SCRIPT_UNASSIGNED", $testScripts ); |
| 376 | + if ( $unassigned !== False ) { |
| 377 | + return self::badCharErr( 'antispoof-unassigned', $testChars[$unassigned] ); |
350 | 378 | } |
| 379 | + $deprecated = array_search( "SCRIPT_DEPRECTED", $testScripts ); |
| 380 | + if ( $deprecated !== False ) { |
| 381 | + return self::badCharErr( 'antispoof-deprecated', $testChars[$deprecated] ); |
| 382 | + } |
| 383 | + $testScripts = array_unique( $testScripts ); |
351 | 384 | |
352 | 385 | # We don't mind ASCII punctuation or digits |
353 | 386 | $testScripts = array_diff( $testScripts, |
Index: trunk/extensions/AntiSpoof/AntiSpoof.i18n.php |
— | — | @@ -15,11 +15,14 @@ |
16 | 16 | 'antispoof-conflict-bottom' => 'Please choose another name.', |
17 | 17 | 'antispoof-name-illegal' => 'The name "$1" is not allowed to prevent confusing or spoofed usernames: $2. |
18 | 18 | Please choose another name.', |
| 19 | + 'antispoof-bad-char' => '"$1" ($2)', |
| 20 | + 'antispoof-bad-char-non-printable' => '$1', |
19 | 21 | 'antispoof-badtype' => 'Bad data type', |
20 | 22 | 'antispoof-empty' => 'Empty string', |
21 | | - 'antispoof-blacklisted' => 'Contains blacklisted character', |
22 | | - 'antispoof-combining' => 'Begins with combining mark', |
23 | | - 'antispoof-unassigned' => 'Contains unassigned or deprecated character', |
| 23 | + 'antispoof-blacklisted' => 'Contains blacklisted character $1', |
| 24 | + 'antispoof-combining' => 'Begins with combining mark $1', |
| 25 | + 'antispoof-unassigned' => 'Contains unassigned character $1', |
| 26 | + 'antispoof-deprecated' => 'Contains deprecated character $1', |
24 | 27 | 'antispoof-noletters' => 'Does not contain any letters', |
25 | 28 | 'antispoof-mixedscripts' => 'Contains incompatible mixed scripts', |
26 | 29 | 'antispoof-tooshort' => 'Canonicalized name too short', |
— | — | @@ -43,11 +46,21 @@ |
44 | 47 | 'antispoof-name-illegal' => 'Account creation error message because a user account creation rule was violated. Parameters: |
45 | 48 | * $1 is the username that someone wanted to create |
46 | 49 | * $2 is the error message. One of {{msg-mw|antispoof-badtype}}, {{msg-mw|antispoof-empty}}, {{msg-mw|antispoof-blacklisted}} and others.', |
| 50 | + 'antispoof-bad-char' => 'It is not a complete message but a template for designator of a bad character, so localization can format it properly. Parameters: |
| 51 | +* $1 is the bad character itself. |
| 52 | +* $2 is the Unicode code point of bad character ("U+" followed by hex number).', |
| 53 | + 'antispoof-bad-char-non-printable' => 'The same as antispooof-bad-char, but for non-printable characters. Since non-printable characters do not have visual representation, template has only one parameter: |
| 54 | +* $1 is the Unicode code point of bad character ("U+" followed by hex number).', |
47 | 55 | 'antispoof-badtype' => 'Reason for failed account creation.', |
48 | 56 | 'antispoof-empty' => 'Reason for failed account creation.', |
49 | | - 'antispoof-blacklisted' => 'Reason for failed account creation.', |
50 | | - 'antispoof-combining' => 'Reason for failed account creation.', |
51 | | - 'antispoof-unassigned' => 'Reason for failed account creation.', |
| 57 | + 'antispoof-blacklisted' => 'Reason for failed account creation. Parameters: |
| 58 | +* $1 — bad character designator (built with either antispoof-bad-char or …-non-printable).', |
| 59 | + 'antispoof-combining' => 'Reason for failed account creation. Parameters: |
| 60 | +* $1 — bad character designator (built with either antispoof-bad-char or …-non-printable).', |
| 61 | + 'antispoof-unassigned' => 'Reason for failed account creation. Parameters: |
| 62 | +* $1 — bad character designator (built with either antispoof-bad-char or …-non-printable).', |
| 63 | + 'antispoof-deprecated' => 'Reason for failed account creation. Parameters: |
| 64 | +* $1 — bad character designator (built with either antispoof-bad-char or …-non-printable).', |
52 | 65 | 'antispoof-noletters' => 'Reason for failed account creation.', |
53 | 66 | 'antispoof-mixedscripts' => 'Reason for failed account creation.', |
54 | 67 | 'antispoof-tooshort' => 'Reason for failed account creation.', |