Index: trunk/phase3/includes/WebRequest.php |
— | — | @@ -220,13 +220,10 @@ |
221 | 221 | */ |
222 | 222 | function getGPCVal( $arr, $name, $default ) { |
223 | 223 | if( isset( $arr[$name] ) ) { |
224 | | - global $wgContLang; |
225 | 224 | $data = $arr[$name]; |
226 | 225 | if( isset( $_GET[$name] ) && !is_array( $data ) ) { |
227 | 226 | # Check for alternate/legacy character encoding. |
228 | | - if( isset( $wgContLang ) ) { |
229 | | - $data = $wgContLang->checkTitleEncoding( $data ); |
230 | | - } |
| 227 | + $data = $this->checkTitleEncoding( $data ); |
231 | 228 | } |
232 | 229 | $data = $this->normalizeUnicode( $data ); |
233 | 230 | return $data; |
— | — | @@ -234,7 +231,21 @@ |
235 | 232 | return $default; |
236 | 233 | } |
237 | 234 | } |
| 235 | + |
| 236 | + protected function checkTitleEncoding( $s ) { |
| 237 | + global $wgContLang; |
| 238 | + if( !isset($wgContLang) ) return $s; |
| 239 | + # Check for non-UTF-8 URLs |
| 240 | + $ishigh = preg_match( '/[\x80-\xff]/', $s); |
| 241 | + if( !$ishigh ) return $s; |
238 | 242 | |
| 243 | + $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' . |
| 244 | + '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s ); |
| 245 | + if( $isutf8 ) return $s; |
| 246 | + # Do the heavy lifting by unstubbing $wgContLang |
| 247 | + return $wgContLang->iconv( $wgContLang->fallback8bitEncoding(), "utf-8", $s ); |
| 248 | + } |
| 249 | + |
239 | 250 | /** |
240 | 251 | * Fetch a scalar from the input or return $default if it's not set. |
241 | 252 | * Returns a string. Arrays are discarded. Useful for |