Index: trunk/phase3/includes/OutputPage.php |
— | — | @@ -901,8 +901,6 @@ |
902 | 902 | $this->addScriptFile( 'rightclickedit.js' ); |
903 | 903 | } |
904 | 904 | |
905 | | - $this->mBodytext = StringUtils::cleanForCharset( $this->mBodytext, $wgOutputEncoding ); |
906 | | - |
907 | 905 | # Buffer output; final headers may depend on later processing |
908 | 906 | ob_start(); |
909 | 907 | |
Index: trunk/phase3/includes/StringUtils.php |
— | — | @@ -179,86 +179,6 @@ |
180 | 180 | return new ArrayIterator( explode( $separator, $subject ) ); |
181 | 181 | } |
182 | 182 | } |
183 | | - |
184 | | - /** |
185 | | - * Clean characters that are invalid in the given character set |
186 | | - * from a given string. |
187 | | - * |
188 | | - * @param $string \type{$string} String to clean |
189 | | - * @param $charset \type{$string} Character set (if unspecified, assume $wgOutputEncoding) |
190 | | - * @return \type{$string} Cleaned string |
191 | | - */ |
192 | | - public static function cleanForCharset( $string, $charset='' ) { |
193 | | - global $wgOutputEncoding; |
194 | | - switch ( $charset ? $charset : $wgOutputEncoding ) { |
195 | | - # UTF-8 should be all we need to worry about. :) |
196 | | - case 'UTF-8': |
197 | | - return self::cleanUtf8( $string ); |
198 | | - default: |
199 | | - return $string; |
200 | | - } |
201 | | - } |
202 | | - |
203 | | - /** |
204 | | - * Clean invalid UTF-8 characters and sequences from a given string, |
205 | | - * replacing them with U+FFFD. |
206 | | - * Should be RFC 3629 compliant. |
207 | | - * |
208 | | - * @param $string \type{$string} String to clean |
209 | | - * @return \type{$string} Cleaned string |
210 | | - */ |
211 | | - private static function cleanUtf8( $str ) { |
212 | | - # HERE BE DRAGONS! |
213 | | - # ABANDON ALL HOPE, ALL YE WHO ENTER THE BITWISE HELLFIRE. |
214 | | - |
215 | | - $illegal = array( 0xD800, 0xDB7F, 0xDB80, 0xDBFF, |
216 | | - 0xDC00, 0xDF80, 0xDFFF, 0xFFFE, 0xFFFF ); |
217 | | - $len = strlen( $str ); |
218 | | - $left = $bytes = 0; |
219 | | - for ( $i = 0; $i < $len; $i++ ) { |
220 | | - $ch = ord( $str[$i] ); |
221 | | - if ( !$left ) { |
222 | | - if ( !($ch & 0x80 ) ) |
223 | | - continue; |
224 | | - $left = (( $ch & 0xFE ) == 0xFC ? 5 : |
225 | | - (( $ch & 0xFC ) == 0xF8 ? 4 : |
226 | | - (( $ch & 0xF8 ) == 0xF0 ? 3 : |
227 | | - (( $ch & 0xF0 ) == 0xE0 ? 2 : |
228 | | - (( $ch & 0xE0 ) == 0xC0 ? 1 : |
229 | | - 0 ))))); |
230 | | - if ( $left ) { |
231 | | - $bytes = $left + 1; |
232 | | - $sum = $ch & ( 0xFF >> $bytes + 1 ); |
233 | | - continue; |
234 | | - } else if ( $ch & 0x80 ) { |
235 | | - $bytes = 1; |
236 | | - } |
237 | | - } else if ( ( $ch & 0xC0 ) == 0x80 ) { |
238 | | - $sum <<= 6; |
239 | | - $sum += $ch & 0x3F; |
240 | | - if ( --$left ) continue; |
241 | | - if ( ( $bytes == 2 && $sum < 0x80 ) || |
242 | | - ( $bytes == 3 && $sum < 0x800 ) || |
243 | | - ( $bytes == 4 && $sum < 0x10000 ) || |
244 | | - ( $bytes > 4 || $sum > 0x10FFFF ) || |
245 | | - in_array( $sum, $illegal ) ) { |
246 | | - } else continue; |
247 | | - |
248 | | - } else { |
249 | | - $bytes -= $left; |
250 | | - $i--; |
251 | | - } |
252 | | - |
253 | | - $str = ( substr( $str, 0, $i - $bytes + 1 ) . |
254 | | - "\xEF\xBF\xBD" . |
255 | | - substr( $str, $i + 1 ) ); |
256 | | - $i += 3 - $bytes; |
257 | | - $len += 3 - $bytes; |
258 | | - $left = 0; |
259 | | - } |
260 | | - |
261 | | - return $str; |
262 | | - } |
263 | 183 | } |
264 | 184 | |
265 | 185 | /** |
Index: trunk/phase3/RELEASE-NOTES |
— | — | @@ -132,7 +132,6 @@ |
133 | 133 | |
134 | 134 | === Bug fixes in 1.14 === |
135 | 135 | |
136 | | -* (bug 332) Clean invalid UTF-8 to ensure output is RFC 3629 compliant |
137 | 136 | * (bug 14907) DatabasePostgres::fieldType now defined. |
138 | 137 | * (bug 14659) Passing the default limit param to Special:Recentchanges no more |
139 | 138 | falls back to the user option |