r79463 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r79462‎ | r79463 | r79464 >
Date:01:29, 2 January 2011
Author:soxred93
Status:resolved (Comments)
Tags:
Comment:
Move fallback function creation out of function_exists() conditionals.
This allows for unit testing of the fallback functions to ensure that
they work like the real functions do
Modified paths:
  • /trunk/phase3/includes/GlobalFunctions.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/GlobalFunctions.php
@@ -20,170 +20,206 @@
2121 * Re-implementations of newer functions or functions in non-standard
2222 * PHP extensions may be included here.
2323 */
 24+
 25+# iconv support is not in the default configuration and so may not be present.
 26+# Assume will only ever use utf-8 and iso-8859-1.
 27+# This will *not* work in all circumstances.
 28+function fallback_iconv( $from, $to, $string ) {
 29+ if ( substr( $to, -8 ) == '//IGNORE' ) {
 30+ $to = substr( $to, 0, strlen( $to ) - 8 );
 31+ }
 32+ if( strcasecmp( $from, $to ) == 0 ) {
 33+ return $string;
 34+ }
 35+ if( strcasecmp( $from, 'utf-8' ) == 0 ) {
 36+ return utf8_decode( $string );
 37+ }
 38+ if( strcasecmp( $to, 'utf-8' ) == 0 ) {
 39+ return utf8_encode( $string );
 40+ }
 41+ return $string;
 42+}
 43+
2444 if( !function_exists( 'iconv' ) ) {
25 - # iconv support is not in the default configuration and so may not be present.
26 - # Assume will only ever use utf-8 and iso-8859-1.
27 - # This will *not* work in all circumstances.
2845 function iconv( $from, $to, $string ) {
29 - if ( substr( $to, -8 ) == '//IGNORE' ) {
30 - $to = substr( $to, 0, strlen( $to ) - 8 );
31 - }
32 - if( strcasecmp( $from, $to ) == 0 ) {
33 - return $string;
34 - }
35 - if( strcasecmp( $from, 'utf-8' ) == 0 ) {
36 - return utf8_decode( $string );
37 - }
38 - if( strcasecmp( $to, 'utf-8' ) == 0 ) {
39 - return utf8_encode( $string );
40 - }
41 - return $string;
 46+ return fallback_iconv( $from, $to, $string )
4247 }
4348 }
4449
45 -if ( !function_exists( 'mb_substr' ) ) {
46 - /**
47 - * Fallback implementation for mb_substr, hardcoded to UTF-8.
48 - * Attempts to be at least _moderately_ efficient; best optimized
49 - * for relatively small offset and count values -- about 5x slower
50 - * than native mb_string in my testing.
51 - *
52 - * Larger offsets are still fairly efficient for Latin text, but
53 - * can be up to 100x slower than native if the text is heavily
54 - * multibyte and we have to slog through a few hundred kb.
55 - */
56 - function mb_substr( $str, $start, $count='end' ) {
57 - if( $start != 0 ) {
58 - $split = mb_substr_split_unicode( $str, intval( $start ) );
59 - $str = substr( $str, $split );
60 - }
6150
62 - if( $count !== 'end' ) {
63 - $split = mb_substr_split_unicode( $str, intval( $count ) );
64 - $str = substr( $str, 0, $split );
65 - }
6651
67 - return $str;
 52+
 53+/**
 54+ * Fallback implementation for mb_substr, hardcoded to UTF-8.
 55+ * Attempts to be at least _moderately_ efficient; best optimized
 56+ * for relatively small offset and count values -- about 5x slower
 57+ * than native mb_string in my testing.
 58+ *
 59+ * Larger offsets are still fairly efficient for Latin text, but
 60+ * can be up to 100x slower than native if the text is heavily
 61+ * multibyte and we have to slog through a few hundred kb.
 62+ */
 63+function fallback_mb_substr( $str, $start, $count='end' ) {
 64+ if( $start != 0 ) {
 65+ $split = fallback_mb_substr_split_unicode( $str, intval( $start ) );
 66+ $str = substr( $str, $split );
6867 }
6968
70 - function mb_substr_split_unicode( $str, $splitPos ) {
71 - if( $splitPos == 0 ) {
72 - return 0;
73 - }
 69+ if( $count !== 'end' ) {
 70+ $split = fallback_mb_substr_split_unicode( $str, intval( $count ) );
 71+ $str = substr( $str, 0, $split );
 72+ }
7473
75 - $byteLen = strlen( $str );
 74+ return $str;
 75+}
7676
77 - if( $splitPos > 0 ) {
78 - if( $splitPos > 256 ) {
79 - // Optimize large string offsets by skipping ahead N bytes.
80 - // This will cut out most of our slow time on Latin-based text,
81 - // and 1/2 to 1/3 on East European and Asian scripts.
82 - $bytePos = $splitPos;
83 - while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {
84 - ++$bytePos;
85 - }
86 - $charPos = mb_strlen( substr( $str, 0, $bytePos ) );
87 - } else {
88 - $charPos = 0;
89 - $bytePos = 0;
 77+function fallback_mb_substr_split_unicode( $str, $splitPos ) {
 78+ if( $splitPos == 0 ) {
 79+ return 0;
 80+ }
 81+
 82+ $byteLen = strlen( $str );
 83+
 84+ if( $splitPos > 0 ) {
 85+ if( $splitPos > 256 ) {
 86+ // Optimize large string offsets by skipping ahead N bytes.
 87+ // This will cut out most of our slow time on Latin-based text,
 88+ // and 1/2 to 1/3 on East European and Asian scripts.
 89+ $bytePos = $splitPos;
 90+ while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {
 91+ ++$bytePos;
9092 }
 93+ $charPos = mb_strlen( substr( $str, 0, $bytePos ) );
 94+ } else {
 95+ $charPos = 0;
 96+ $bytePos = 0;
 97+ }
9198
92 - while( $charPos++ < $splitPos ) {
 99+ while( $charPos++ < $splitPos ) {
 100+ ++$bytePos;
 101+ // Move past any tail bytes
 102+ while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {
93103 ++$bytePos;
94 - // Move past any tail bytes
95 - while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {
96 - ++$bytePos;
97 - }
98104 }
99 - } else {
100 - $splitPosX = $splitPos + 1;
101 - $charPos = 0; // relative to end of string; we don't care about the actual char position here
102 - $bytePos = $byteLen;
103 - while( $bytePos > 0 && $charPos-- >= $splitPosX ) {
 105+ }
 106+ } else {
 107+ $splitPosX = $splitPos + 1;
 108+ $charPos = 0; // relative to end of string; we don't care about the actual char position here
 109+ $bytePos = $byteLen;
 110+ while( $bytePos > 0 && $charPos-- >= $splitPosX ) {
 111+ --$bytePos;
 112+ // Move past any tail bytes
 113+ while ( $bytePos > 0 && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {
104114 --$bytePos;
105 - // Move past any tail bytes
106 - while ( $bytePos > 0 && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {
107 - --$bytePos;
108 - }
109115 }
110116 }
 117+ }
111118
112 - return $bytePos;
 119+ return $bytePos;
 120+}
 121+
 122+if ( !function_exists( 'mb_substr' ) ) {
 123+ function mb_substr( $str, $start, $count='end' ) {
 124+ return fallback_mb_substr( $str, $start, $count )
113125 }
 126+
 127+ function mb_substr_split_unicode( $str, $splitPos ) {
 128+ return fallback_mb_substr_split_unicode( $str, $splitPos );
 129+ }
114130 }
115131
 132+
 133+
 134+/**
 135+ * Fallback implementation of mb_strlen, hardcoded to UTF-8.
 136+ * @param string $str
 137+ * @param string $enc optional encoding; ignored
 138+ * @return int
 139+ */
 140+function fallback_mb_strlen( $str, $enc = '' ) {
 141+ $counts = count_chars( $str );
 142+ $total = 0;
 143+
 144+ // Count ASCII bytes
 145+ for( $i = 0; $i < 0x80; $i++ ) {
 146+ $total += $counts[$i];
 147+ }
 148+
 149+ // Count multibyte sequence heads
 150+ for( $i = 0xc0; $i < 0xff; $i++ ) {
 151+ $total += $counts[$i];
 152+ }
 153+ return $total;
 154+}
 155+
116156 if ( !function_exists( 'mb_strlen' ) ) {
117 - /**
118 - * Fallback implementation of mb_strlen, hardcoded to UTF-8.
119 - * @param string $str
120 - * @param string $enc optional encoding; ignored
121 - * @return int
122 - */
123157 function mb_strlen( $str, $enc = '' ) {
124 - $counts = count_chars( $str );
125 - $total = 0;
 158+ return fallback_mb_strlen( $str, $enc );
 159+ }
 160+}
126161
127 - // Count ASCII bytes
128 - for( $i = 0; $i < 0x80; $i++ ) {
129 - $total += $counts[$i];
130 - }
131162
132 - // Count multibyte sequence heads
133 - for( $i = 0xc0; $i < 0xff; $i++ ) {
134 - $total += $counts[$i];
135 - }
136 - return $total;
 163+
 164+/**
 165+ * Fallback implementation of mb_strpos, hardcoded to UTF-8.
 166+ * @param $haystack String
 167+ * @param $needle String
 168+ * @param $offset String: optional start position
 169+ * @param $encoding String: optional encoding; ignored
 170+ * @return int
 171+ */
 172+function fallback_mb_strpos( $haystack, $needle, $offset = 0, $encoding = '' ) {
 173+ $needle = preg_quote( $needle, '/' );
 174+
 175+ $ar = array();
 176+ preg_match( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset );
 177+
 178+ if( isset( $ar[0][1] ) ) {
 179+ return $ar[0][1];
 180+ } else {
 181+ return false;
137182 }
138183 }
139184
140 -
141185 if( !function_exists( 'mb_strpos' ) ) {
142 - /**
143 - * Fallback implementation of mb_strpos, hardcoded to UTF-8.
144 - * @param $haystack String
145 - * @param $needle String
146 - * @param $offset String: optional start position
147 - * @param $encoding String: optional encoding; ignored
148 - * @return int
149 - */
 186+
150187 function mb_strpos( $haystack, $needle, $offset = 0, $encoding = '' ) {
151 - $needle = preg_quote( $needle, '/' );
 188+ return fallback_mb_strpos( $haystack, $needle, $offset, $encoding );
 189+ }
 190+
 191+}
152192
153 - $ar = array();
154 - preg_match( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset );
155193
156 - if( isset( $ar[0][1] ) ) {
157 - return $ar[0][1];
158 - } else {
159 - return false;
160 - }
 194+
 195+/**
 196+ * Fallback implementation of mb_strrpos, hardcoded to UTF-8.
 197+ * @param $haystack String
 198+ * @param $needle String
 199+ * @param $offset String: optional start position
 200+ * @param $encoding String: optional encoding; ignored
 201+ * @return int
 202+ */
 203+function fallback_mb_strrpos( $haystack, $needle, $offset = 0, $encoding = '' ) {
 204+ $needle = preg_quote( $needle, '/' );
 205+
 206+ $ar = array();
 207+ preg_match_all( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset );
 208+
 209+ if( isset( $ar[0] ) && count( $ar[0] ) > 0 &&
 210+ isset( $ar[0][count( $ar[0] ) - 1][1] ) ) {
 211+ return $ar[0][count( $ar[0] ) - 1][1];
 212+ } else {
 213+ return false;
161214 }
162215 }
163216
164217 if( !function_exists( 'mb_strrpos' ) ) {
165 - /**
166 - * Fallback implementation of mb_strrpos, hardcoded to UTF-8.
167 - * @param $haystack String
168 - * @param $needle String
169 - * @param $offset String: optional start position
170 - * @param $encoding String: optional encoding; ignored
171 - * @return int
172 - */
173218 function mb_strrpos( $haystack, $needle, $offset = 0, $encoding = '' ) {
174 - $needle = preg_quote( $needle, '/' );
175 -
176 - $ar = array();
177 - preg_match_all( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset );
178 -
179 - if( isset( $ar[0] ) && count( $ar[0] ) > 0 &&
180 - isset( $ar[0][count( $ar[0] ) - 1][1] ) ) {
181 - return $ar[0][count( $ar[0] ) - 1][1];
182 - } else {
183 - return false;
184 - }
 219+ return fallback_mb_strrpos( $haystack, $needle, $offset, $encoding );
185220 }
186221 }
187222
 223+
188224 // Support for Wietse Venema's taint feature
189225 if ( !function_exists( 'istainted' ) ) {
190226 function istainted( $var ) {
@@ -200,6 +236,7 @@
201237 /// @endcond
202238
203239
 240+
204241 /**
205242 * Like array_diff( $a, $b ) except that it works with two-dimensional arrays.
206243 */

Follow-up revisions

RevisionCommit summaryAuthorDate
r79465Fix r79463 and r79464: Syntax errorsoxred9301:35, 2 January 2011
r79494Followup to r79463: Move fallback functions to new Fallback classsoxred9315:54, 2 January 2011

Comments

#Comment by Nikerabbit (talk | contribs)   08:19, 2 January 2011

Whii, can we now move them away from GlobalFunctions.php?

#Comment by Platonides (talk | contribs)   14:37, 2 January 2011

Why not? Fallback::mb_strrpos()...

#Comment by X! (talk | contribs)   14:39, 2 January 2011

That's on the to-do list for today. :)

#Comment by X! (talk | contribs)   15:55, 2 January 2011

Done in r79494.

Status & tagging log