Index: trunk/extensions/AntiSpoof/equivset.head |
— | — | @@ -0,0 +1,34 @@ |
| 2 | +# There is a publically editable copy of this file at |
| 3 | +# http://www.mediawiki.org/wiki/AntiSpoof/Equivalence_sets |
| 4 | + |
| 5 | +# This is the input file for generateEquivset.php |
| 6 | +# The format is: |
| 7 | +# |
| 8 | +# <hexadecimal codepoint> <character> => [<hexadecimal codepoint>] <character> |
| 9 | +# |
| 10 | +# If the codepoint is given, it must match the character, or else a warning |
| 11 | +# will be issued and the line will be ignored. |
| 12 | +# |
| 13 | +# The effect of such a line is to conflate the two identified character, i.e. |
| 14 | +# to put them in the same set. If two sets share a member, then they will be |
| 15 | +# merged into a single larger set. |
| 16 | +# |
| 17 | +# We have attempted to include the following types of equivalence: |
| 18 | +# * Case folding. Although letters of different cases are often visually |
| 19 | +# distinct, they can easily be confused by people who are familiar with |
| 20 | +# the alphabet. Two words with a different case may be read as the same |
| 21 | +# word. This is a popular technique for impersonation. |
| 22 | +# |
| 23 | +# * Visually similar characters. Cross-script pairs are included, but these |
| 24 | +# tend to produce false conflations within scripts, and so should be |
| 25 | +# avoided. The software implements a blanket restriction against cross- |
| 26 | +# script strings, which makes cross-script pairs mostly redundant. |
| 27 | +# |
| 28 | +# * Chinese Simplified/Traditional pairs. |
| 29 | +# |
| 30 | +# The list is based on one by Neil Harris, which was derived by unknown methods. |
| 31 | +# That list also contained transliteration pairs, which we considered excessive |
| 32 | +# and have attempted to remove. For example, the latin E and H were considered |
| 33 | +# equivalent, because the latin transliteration of the cyrillic "Н" (which |
| 34 | +# looks like latin H) is "E". |
| 35 | + |
Index: trunk/extensions/AntiSpoof/generateEquivset.php |
— | — | @@ -59,6 +59,7 @@ |
60 | 60 | $lineNum = 0; |
61 | 61 | $setsByChar = array(); |
62 | 62 | $sets = array(); |
| 63 | +$exitStatus = 0; |
63 | 64 | foreach ( $lines as $line ) { |
64 | 65 | ++$lineNum; |
65 | 66 | $line = trim( $line ); |
— | — | @@ -73,6 +74,7 @@ |
74 | 75 | "/^(?P<hexleft> [A-F0-9]+) $sp+ (?P<charleft> .+?) $sp+ => $sp+ (?:(?P<hexright> [A-F0-9]+) $sp+|) (?P<charright> .+?) $sp* (?: \#.*|) $ /x", $line, $m ) ) |
75 | 76 | { |
76 | 77 | print "Error: invalid entry at line $lineNum: $line\n"; |
| 78 | + $exitStatus = 1; |
77 | 79 | continue; |
78 | 80 | } |
79 | 81 | $error = false; |
— | — | @@ -101,6 +103,7 @@ |
102 | 104 | $error = true; |
103 | 105 | } |
104 | 106 | if ( $error ) { |
| 107 | + $exitStatus = 1; |
105 | 108 | continue; |
106 | 109 | } |
107 | 110 | |
— | — | @@ -139,3 +142,6 @@ |
140 | 143 | fclose( $setsFile ); |
141 | 144 | fclose( $outputFile ); |
142 | 145 | fclose( $serializedFile ); |
| 146 | + |
| 147 | +exit( $exitStatus ); |
| 148 | + |
Index: trunk/extensions/AntiSpoof/Makefile |
— | — | @@ -0,0 +1,15 @@ |
| 2 | + |
| 3 | +equivset.txt equivset.php equivset.ser: equivset.in |
| 4 | + php generateEquivset.php |
| 5 | + |
| 6 | +equivset.in: equivset.head equivset_1 equivset_2 equivset_3 |
| 7 | + cat equivset.head > equivset.in |
| 8 | + grep -v -h "^</*pre>" equivset_1 equivset_2 equivset_3 >> equivset.in |
| 9 | + echo "Regenerated $@. Remember to run 'svn diff equivset.in' before commiting" |
| 10 | + |
| 11 | +equivset_%: |
| 12 | + wget --user-agent="Extension AntiSpoof equivset.in rebuild" -O $@ "http://www.mediawiki.org/w/index.php?action=raw&title=Extension:AntiSpoof/Equivalence_sets/$@" |
| 13 | + |
| 14 | +clean: |
| 15 | + rm -f equivset.in equivset_1 equivset_2 equivset_3 equivset.txt equivset.php equivset.ser |
| 16 | + |
Property changes on: trunk/extensions/AntiSpoof/Makefile |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 17 | + native |
Property changes on: trunk/extensions/AntiSpoof |
___________________________________________________________________ |
Added: svn:ignore |
2 | 18 | + equivset_1 |
equivset_2 |
equivset_3 |