r69626 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r69625‎ | r69626 | r69627 >
Date:15:41, 20 July 2010
Author:mah
Status:ok
Tags:
Comment:
Prefer the intl PECL extension for ICU Unicode
Modified paths:
  • /trunk/phase3/includes/normal/UtfNormal.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/normal/UtfNormal.php
@@ -45,6 +45,7 @@
4646 define( 'UNORM_FCD', 6 );
4747
4848 define( 'NORMALIZE_ICU', function_exists( 'utf8_normalize' ) );
 49+define( 'NORMALIZE_INTL', function_exists( 'normalizer_normalize' ) );
4950
5051 /**
5152 * Unicode normalization routines for working with UTF-8 strings.
@@ -79,7 +80,7 @@
8081 return $ret;
8182 }
8283
83 - if( NORMALIZE_ICU ) {
 84+ if( NORMALIZE_ICU || NORMALIZE_INTL ) {
8485 # We exclude a few chars that ICU would not.
8586 $string = preg_replace(
8687 '/[\x00-\x08\x0b\x0c\x0e-\x1f]/',
@@ -90,7 +91,8 @@
9192
9293 # UnicodeString constructor fails if the string ends with a
9394 # head byte. Add a junk char at the end, we'll strip it off.
94 - return rtrim( utf8_normalize( $string . "\x01", UNORM_NFC ), "\x01" );
 95+ if ( NORMALIZE_ICU ) return rtrim( utf8_normalize( $string . "\x01", UNORM_NFC ), "\x01" );
 96+ if ( NORMALIZE_INTL ) return normalizer_normalize( $string, Normalizer::FORM_C );
9597 } elseif( UtfNormal::quickIsNFCVerify( $string ) ) {
9698 # Side effect -- $string has had UTF-8 errors cleaned up.
9799 return $string;
@@ -108,7 +110,9 @@
109111 * @return string a UTF-8 string in normal form C
110112 */
111113 static function toNFC( $string ) {
112 - if( NORMALIZE_ICU )
 114+ if( NORMALIZE_INTL )
 115+ return normalizer_normalize( $string, Normalizer::FORM_C );
 116+ elseif( NORMALIZE_ICU )
113117 return utf8_normalize( $string, UNORM_NFC );
114118 elseif( UtfNormal::quickIsNFC( $string ) )
115119 return $string;
@@ -124,7 +128,9 @@
125129 * @return string a UTF-8 string in normal form D
126130 */
127131 static function toNFD( $string ) {
128 - if( NORMALIZE_ICU )
 132+ if( NORMALIZE_INTL )
 133+ return normalizer_normalize( $string, Normalizer::FORM_D );
 134+ elseif( NORMALIZE_ICU )
129135 return utf8_normalize( $string, UNORM_NFD );
130136 elseif( preg_match( '/[\x80-\xff]/', $string ) )
131137 return UtfNormal::NFD( $string );
@@ -141,7 +147,9 @@
142148 * @return string a UTF-8 string in normal form KC
143149 */
144150 static function toNFKC( $string ) {
145 - if( NORMALIZE_ICU )
 151+ if( NORMALIZE_INTL )
 152+ return normalizer_normalize( $string, Normalizer::FORM_KC );
 153+ elseif( NORMALIZE_ICU )
146154 return utf8_normalize( $string, UNORM_NFKC );
147155 elseif( preg_match( '/[\x80-\xff]/', $string ) )
148156 return UtfNormal::NFKC( $string );
@@ -158,7 +166,9 @@
159167 * @return string a UTF-8 string in normal form KD
160168 */
161169 static function toNFKD( $string ) {
162 - if( NORMALIZE_ICU )
 170+ if( NORMALIZE_INTL )
 171+ return normalizer_normalize( $string, Normalizer::FORM_KD );
 172+ elseif( NORMALIZE_ICU )
163173 return utf8_normalize( $string, UNORM_NFKD );
164174 elseif( preg_match( '/[\x80-\xff]/', $string ) )
165175 return UtfNormal::NFKD( $string );

Follow-up revisions

RevisionCommit summaryAuthorDate
r86130(follow-up r69626) Make it so the intl normalizer_normalize function is not...bawolff18:39, 15 April 2011

Status & tagging log