Index: trunk/phase3/includes/normal/UtfNormal.php |
— | — | @@ -45,6 +45,7 @@ |
46 | 46 | define( 'UNORM_FCD', 6 ); |
47 | 47 | |
48 | 48 | define( 'NORMALIZE_ICU', function_exists( 'utf8_normalize' ) ); |
| 49 | +define( 'NORMALIZE_INTL', function_exists( 'normalizer_normalize' ) ); |
49 | 50 | |
50 | 51 | /** |
51 | 52 | * Unicode normalization routines for working with UTF-8 strings. |
— | — | @@ -79,7 +80,7 @@ |
80 | 81 | return $ret; |
81 | 82 | } |
82 | 83 | |
83 | | - if( NORMALIZE_ICU ) { |
| 84 | + if( NORMALIZE_ICU || NORMALIZE_INTL ) { |
84 | 85 | # We exclude a few chars that ICU would not. |
85 | 86 | $string = preg_replace( |
86 | 87 | '/[\x00-\x08\x0b\x0c\x0e-\x1f]/', |
— | — | @@ -90,7 +91,8 @@ |
91 | 92 | |
92 | 93 | # UnicodeString constructor fails if the string ends with a |
93 | 94 | # head byte. Add a junk char at the end, we'll strip it off. |
94 | | - return rtrim( utf8_normalize( $string . "\x01", UNORM_NFC ), "\x01" ); |
| 95 | + if ( NORMALIZE_ICU ) return rtrim( utf8_normalize( $string . "\x01", UNORM_NFC ), "\x01" ); |
| 96 | + if ( NORMALIZE_INTL ) return normalizer_normalize( $string, Normalizer::FORM_C ); |
95 | 97 | } elseif( UtfNormal::quickIsNFCVerify( $string ) ) { |
96 | 98 | # Side effect -- $string has had UTF-8 errors cleaned up. |
97 | 99 | return $string; |
— | — | @@ -108,7 +110,9 @@ |
109 | 111 | * @return string a UTF-8 string in normal form C |
110 | 112 | */ |
111 | 113 | static function toNFC( $string ) { |
112 | | - if( NORMALIZE_ICU ) |
| 114 | + if( NORMALIZE_INTL ) |
| 115 | + return normalizer_normalize( $string, Normalizer::FORM_C ); |
| 116 | + elseif( NORMALIZE_ICU ) |
113 | 117 | return utf8_normalize( $string, UNORM_NFC ); |
114 | 118 | elseif( UtfNormal::quickIsNFC( $string ) ) |
115 | 119 | return $string; |
— | — | @@ -124,7 +128,9 @@ |
125 | 129 | * @return string a UTF-8 string in normal form D |
126 | 130 | */ |
127 | 131 | static function toNFD( $string ) { |
128 | | - if( NORMALIZE_ICU ) |
| 132 | + if( NORMALIZE_INTL ) |
| 133 | + return normalizer_normalize( $string, Normalizer::FORM_D ); |
| 134 | + elseif( NORMALIZE_ICU ) |
129 | 135 | return utf8_normalize( $string, UNORM_NFD ); |
130 | 136 | elseif( preg_match( '/[\x80-\xff]/', $string ) ) |
131 | 137 | return UtfNormal::NFD( $string ); |
— | — | @@ -141,7 +147,9 @@ |
142 | 148 | * @return string a UTF-8 string in normal form KC |
143 | 149 | */ |
144 | 150 | static function toNFKC( $string ) { |
145 | | - if( NORMALIZE_ICU ) |
| 151 | + if( NORMALIZE_INTL ) |
| 152 | + return normalizer_normalize( $string, Normalizer::FORM_KC ); |
| 153 | + elseif( NORMALIZE_ICU ) |
146 | 154 | return utf8_normalize( $string, UNORM_NFKC ); |
147 | 155 | elseif( preg_match( '/[\x80-\xff]/', $string ) ) |
148 | 156 | return UtfNormal::NFKC( $string ); |
— | — | @@ -158,7 +166,9 @@ |
159 | 167 | * @return string a UTF-8 string in normal form KD |
160 | 168 | */ |
161 | 169 | static function toNFKD( $string ) { |
162 | | - if( NORMALIZE_ICU ) |
| 170 | + if( NORMALIZE_INTL ) |
| 171 | + return normalizer_normalize( $string, Normalizer::FORM_KD ); |
| 172 | + elseif( NORMALIZE_ICU ) |
163 | 173 | return utf8_normalize( $string, UNORM_NFKD ); |
164 | 174 | elseif( preg_match( '/[\x80-\xff]/', $string ) ) |
165 | 175 | return UtfNormal::NFKD( $string ); |