r106821 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r106820‎ | r106821 | r106822 >
Date:17:29, 20 December 2011
Author:reedy
Status:ok
Tags:
Comment:
Refactor GenerateEquivset into new Maintenance subclassed script
Modified paths:
  • /trunk/extensions/AntiSpoof/generateEquivset.php (modified) (history)

Diff [purge]

Index: trunk/extensions/AntiSpoof/generateEquivset.php
@@ -1,27 +1,28 @@
22 <?php
33
4 -require_once ( getenv( 'MW_INSTALL_PATH' ) !== false
5 - ? getenv( 'MW_INSTALL_PATH' ) . "/maintenance/commandLine.inc"
6 - : dirname( __FILE__ ) . '/../../maintenance/commandLine.inc' );
 4+$IP = getenv( 'MW_INSTALL_PATH' );
 5+if ( $IP === false ) {
 6+ $IP = dirname( __FILE__ ) . '/../..';
 7+}
 8+require( "$IP/maintenance/Maintenance.php" );
79
8 -$dir = dirname( __FILE__ );
 10+class GenerateEquivset extends Maintenance {
 11+ public function execute() {
 12+ $dir = dirname( __FILE__ );
913
10 -$endl = '
11 -';
 14+ $endl = "\n";
1215
13 -$lines = file( "$dir/equivset.in" );
14 -if ( !$lines ) {
15 - print "Unable to open equivset.in\n";
16 - exit( 1 );
17 -}
 16+ $lines = file( "$dir/equivset.in" );
 17+ if ( !$lines ) {
 18+ $this->error( "Unable to open equivset.in\n", 1 );
 19+ }
1820
19 -$setsFile = fopen( "$dir/equivset.txt", 'w' );
20 -if ( !$setsFile ) {
21 - print "Unable to open equivset.txt for writing\n";
22 - exit( 1 );
23 -}
 21+ $setsFile = fopen( "$dir/equivset.txt", 'w' );
 22+ if ( !$setsFile ) {
 23+ $this->error( "Unable to open equivset.txt for writing\n", 1 );
 24+ }
2425
25 -fwrite( $setsFile, <<<EOT
 26+ fwrite( $setsFile, <<<EOT
2627 # This file is generated by generateEquivset.php
2728 # It shows sets of equivalent characters, one set per line, with characters
2829 # separated by whitespace. This file is not used by MediaWiki, rather it is
@@ -29,14 +30,14 @@
3031 # review purposes.
3132
3233 EOT
33 -);
 34+ );
3435
35 -$outputFile = fopen( "$dir/equivset.php", 'w' );
36 -if ( !$outputFile ) {
37 - print "Unable to open equivset.php for writing\n";
38 - exit( 1 );
39 -}
40 -fwrite( $outputFile, "<?" . "php$endl" . <<<EOT
 36+ $outputFile = fopen( "$dir/equivset.php", 'w' );
 37+ if ( !$outputFile ) {
 38+ $this->error( "Unable to open equivset.php for writing\n", 1 );
 39+ }
 40+
 41+ fwrite( $outputFile, "<?" . "php$endl" . <<<EOT
4142 # This file is generated by generateEquivset.php
4243 # It contains a map of characters, encoded in UTF-8, such that running strtr()
4344 # on a string with this map will cause confusable characters to be reduced to
@@ -44,104 +45,109 @@
4546 # form, in equivset.ser.
4647
4748 EOT
48 -);
 49+ );
4950
50 -$serializedFile = fopen( "$dir/equivset.ser", 'w' );
51 -if ( !$serializedFile ) {
52 - print "Unable to open equivset.ser for writing\n";
53 - exit( 1 );
54 -}
 51+ $serializedFile = fopen( "$dir/equivset.ser", 'w' );
 52+ if ( !$serializedFile ) {
 53+ $this->error( "Unable to open equivset.ser for writing\n", 1 );
 54+ }
5555
56 -# \s matches \xa0 in non-unicode mode, which is not what we want
57 -# So we need to make our own whitespace class
58 -$sp = '[\ \t]';
 56+ # \s matches \xa0 in non-unicode mode, which is not what we want
 57+ # So we need to make our own whitespace class
 58+ $sp = '[\ \t]';
5959
60 -$lineNum = 0;
61 -$setsByChar = array();
62 -$sets = array();
63 -$exitStatus = 0;
64 -foreach ( $lines as $line ) {
65 - ++$lineNum;
66 - $line = trim( $line );
 60+ $lineNum = 0;
 61+ $setsByChar = array();
 62+ $sets = array();
 63+ $exitStatus = 0;
6764
68 - # Filter comments
69 - if ( !$line || $line[0] == '#' ) {
70 - continue;
71 - }
 65+ foreach ( $lines as $line ) {
 66+ ++$lineNum;
 67+ $line = trim( $line );
7268
73 - # Process line
74 - if ( !preg_match(
75 -"/^(?P<hexleft> [A-F0-9]+) $sp+ (?P<charleft> .+?) $sp+ => $sp+ (?:(?P<hexright> [A-F0-9]+) $sp+|) (?P<charright> .+?) $sp* (?: \#.*|) $ /x", $line, $m ) )
76 - {
77 - print "Error: invalid entry at line $lineNum: $line\n";
78 - $exitStatus = 1;
79 - continue;
80 - }
81 - $error = false;
82 - if ( codepointToUtf8( hexdec( $m['hexleft'] ) ) != $m['charleft'] ) {
83 - $actual = utf8ToCodepoint( $m['charleft'] );
84 - if ( $actual === false ) {
85 - print "Bytes: " . strlen( $m['charleft'] ) . "\n";
86 - print bin2hex( $line ) . "\n";
87 - $hexForm = bin2hex( $m['charleft'] );
88 - print "Invalid UTF-8 character \"{$m['charleft']}\" ($hexForm) at line $lineNum: $line\n";
89 - } else {
90 - print "Error: left number ({$m['hexleft']}) does not match left character ($actual) " .
91 - "at line $lineNum: $line\n";
 69+ # Filter comments
 70+ if ( !$line || $line[0] == '#' ) {
 71+ continue;
 72+ }
 73+
 74+ # Process line
 75+ if ( !preg_match(
 76+ "/^(?P<hexleft> [A-F0-9]+) $sp+ (?P<charleft> .+?) $sp+ => $sp+ (?:(?P<hexright> [A-F0-9]+) $sp+|) (?P<charright> .+?) $sp* (?: \#.*|) $ /x", $line, $m ) )
 77+ {
 78+ $this->output( "Error: invalid entry at line $lineNum: $line\n" );
 79+ $exitStatus = 1;
 80+ continue;
 81+ }
 82+ $error = false;
 83+ if ( codepointToUtf8( hexdec( $m['hexleft'] ) ) != $m['charleft'] ) {
 84+ $actual = utf8ToCodepoint( $m['charleft'] );
 85+ if ( $actual === false ) {
 86+ $this->output( "Bytes: " . strlen( $m['charleft'] ) . "\n" );
 87+ $this->output( bin2hex( $line ) . "\n" );
 88+ $hexForm = bin2hex( $m['charleft'] );
 89+ $this->output( "Invalid UTF-8 character \"{$m['charleft']}\" ($hexForm) at line $lineNum: $line\n" );
 90+ } else {
 91+ $this->output( "Error: left number ({$m['hexleft']}) does not match left character ($actual) " .
 92+ "at line $lineNum: $line\n" );
 93+ }
 94+ $error = true;
 95+ }
 96+ if ( !empty( $m['hexright'] ) && codepointToUtf8( hexdec( $m['hexright'] ) ) != $m['charright'] ) {
 97+ $actual = utf8ToCodepoint( $m['charright'] );
 98+ if ( $actual === false ) {
 99+ $hexForm = bin2hex( $m['charright'] );
 100+ $this->output( "Invalid UTF-8 character \"{$m['charleft']}\" ($hexForm) at line $lineNum: $line\n" );
 101+ } else {
 102+ $this->output( "Error: right number ({$m['hexright']}) does not match right character ($actual) " .
 103+ "at line $lineNum: $line\n" );
 104+ }
 105+ $error = true;
 106+ }
 107+ if ( $error ) {
 108+ $exitStatus = 1;
 109+ continue;
 110+ }
 111+
 112+ # Find the set for the right character, add a new one if necessary
 113+ if ( isset( $setsByChar[$m['charright']] ) ) {
 114+ $setName = $setsByChar[$m['charright']];
 115+ } else {
 116+ # New set
 117+ $setName = $m['charright'];
 118+ $sets[$setName] = array( $m['charright'] );
 119+ $setsByChar[$setName] = $setName;
 120+ }
 121+
 122+ # Add the left character to the set
 123+ $sets[$setName][] = $m['charleft'];
 124+ $setsByChar[$m['charleft']] = $setName;
92125 }
93 - $error = true;
94 - }
95 - if ( !empty( $m['hexright'] ) && codepointToUtf8( hexdec( $m['hexright'] ) ) != $m['charright'] ) {
96 - $actual = utf8ToCodepoint( $m['charright'] );
97 - if ( $actual === false ) {
98 - $hexForm = bin2hex( $m['charright'] );
99 - print "Invalid UTF-8 character \"{$m['charleft']}\" ($hexForm) at line $lineNum: $line\n";
100 - } else {
101 - print "Error: right number ({$m['hexright']}) does not match right character ($actual) " .
102 - "at line $lineNum: $line\n";
 126+
 127+ # Sets output
 128+ foreach ( $sets as $members ) {
 129+ fwrite( $setsFile, implode( ' ', $members ) . $endl );
103130 }
104 - $error = true;
105 - }
106 - if ( $error ) {
107 - $exitStatus = 1;
108 - continue;
109 - }
110131
111 - # Find the set for the right character, add a new one if necessary
112 - if ( isset( $setsByChar[$m['charright']] ) ) {
113 - $setName = $setsByChar[$m['charright']];
114 - } else {
115 - # New set
116 - $setName = $m['charright'];
117 - $sets[$setName] = array( $m['charright'] );
118 - $setsByChar[$setName] = $setName;
119 - }
 132+ # Map output
 133+ $output = var_export( $setsByChar, true );
 134+ $output = str_replace( "\n", $endl, $output );
 135+ fwrite( $outputFile, '$equivset = ' . "$output;$endl" );
120136
121 - # Add the left character to the set
122 - $sets[$setName][] = $m['charleft'];
123 - $setsByChar[$m['charleft']] = $setName;
124 -}
 137+ # Serialized codepoint map
 138+ $codepointMap = array();
 139+ foreach ( $setsByChar as $char => $setName ) {
 140+ $codepointMap[ utf8ToCodepoint( $char ) ] = utf8ToCodepoint( $setName );
 141+ }
 142+ fwrite( $serializedFile, serialize( $codepointMap ) );
125143
126 -# Sets output
127 -foreach ( $sets as $setName => $members ) {
128 - fwrite( $setsFile, implode( ' ', $members ) . $endl );
129 -}
 144+ fclose( $setsFile );
 145+ fclose( $outputFile );
 146+ fclose( $serializedFile );
130147
131 -# Map output
132 -$output = var_export( $setsByChar, true );
133 -$output = str_replace( "\n", $endl, $output );
134 -fwrite( $outputFile, '$equivset = ' . "$output;$endl" );
135 -
136 -# Serialized codepoint map
137 -$codepointMap = array();
138 -foreach ( $setsByChar as $char => $setName ) {
139 - $codepointMap[ utf8ToCodepoint( $char ) ] = utf8ToCodepoint( $setName );
 148+ $this->error( '', $exitStatus );
 149+ }
140150 }
141 -fwrite( $serializedFile, serialize( $codepointMap ) );
142151
143 -fclose( $setsFile );
144 -fclose( $outputFile );
145 -fclose( $serializedFile );
 152+$maintClass = "GenerateEquivset";
 153+require_once( DO_MAINTENANCE );
146154
147 -exit( $exitStatus );
148 -

Status & tagging log