Index: trunk/phase3/includes/GlobalFunctions.php |
— | — | @@ -20,170 +20,206 @@ |
21 | 21 | * Re-implementations of newer functions or functions in non-standard |
22 | 22 | * PHP extensions may be included here. |
23 | 23 | */ |
| 24 | + |
| 25 | +# iconv support is not in the default configuration and so may not be present. |
| 26 | +# Assume will only ever use utf-8 and iso-8859-1. |
| 27 | +# This will *not* work in all circumstances. |
| 28 | +function fallback_iconv( $from, $to, $string ) { |
| 29 | + if ( substr( $to, -8 ) == '//IGNORE' ) { |
| 30 | + $to = substr( $to, 0, strlen( $to ) - 8 ); |
| 31 | + } |
| 32 | + if( strcasecmp( $from, $to ) == 0 ) { |
| 33 | + return $string; |
| 34 | + } |
| 35 | + if( strcasecmp( $from, 'utf-8' ) == 0 ) { |
| 36 | + return utf8_decode( $string ); |
| 37 | + } |
| 38 | + if( strcasecmp( $to, 'utf-8' ) == 0 ) { |
| 39 | + return utf8_encode( $string ); |
| 40 | + } |
| 41 | + return $string; |
| 42 | +} |
| 43 | + |
24 | 44 | if( !function_exists( 'iconv' ) ) { |
25 | | - # iconv support is not in the default configuration and so may not be present. |
26 | | - # Assume will only ever use utf-8 and iso-8859-1. |
27 | | - # This will *not* work in all circumstances. |
28 | 45 | function iconv( $from, $to, $string ) { |
29 | | - if ( substr( $to, -8 ) == '//IGNORE' ) { |
30 | | - $to = substr( $to, 0, strlen( $to ) - 8 ); |
31 | | - } |
32 | | - if( strcasecmp( $from, $to ) == 0 ) { |
33 | | - return $string; |
34 | | - } |
35 | | - if( strcasecmp( $from, 'utf-8' ) == 0 ) { |
36 | | - return utf8_decode( $string ); |
37 | | - } |
38 | | - if( strcasecmp( $to, 'utf-8' ) == 0 ) { |
39 | | - return utf8_encode( $string ); |
40 | | - } |
41 | | - return $string; |
| 46 | + return fallback_iconv( $from, $to, $string ) |
42 | 47 | } |
43 | 48 | } |
44 | 49 | |
45 | | -if ( !function_exists( 'mb_substr' ) ) { |
46 | | - /** |
47 | | - * Fallback implementation for mb_substr, hardcoded to UTF-8. |
48 | | - * Attempts to be at least _moderately_ efficient; best optimized |
49 | | - * for relatively small offset and count values -- about 5x slower |
50 | | - * than native mb_string in my testing. |
51 | | - * |
52 | | - * Larger offsets are still fairly efficient for Latin text, but |
53 | | - * can be up to 100x slower than native if the text is heavily |
54 | | - * multibyte and we have to slog through a few hundred kb. |
55 | | - */ |
56 | | - function mb_substr( $str, $start, $count='end' ) { |
57 | | - if( $start != 0 ) { |
58 | | - $split = mb_substr_split_unicode( $str, intval( $start ) ); |
59 | | - $str = substr( $str, $split ); |
60 | | - } |
61 | 50 | |
62 | | - if( $count !== 'end' ) { |
63 | | - $split = mb_substr_split_unicode( $str, intval( $count ) ); |
64 | | - $str = substr( $str, 0, $split ); |
65 | | - } |
66 | 51 | |
67 | | - return $str; |
| 52 | + |
| 53 | +/** |
| 54 | + * Fallback implementation for mb_substr, hardcoded to UTF-8. |
| 55 | + * Attempts to be at least _moderately_ efficient; best optimized |
| 56 | + * for relatively small offset and count values -- about 5x slower |
| 57 | + * than native mb_string in my testing. |
| 58 | + * |
| 59 | + * Larger offsets are still fairly efficient for Latin text, but |
| 60 | + * can be up to 100x slower than native if the text is heavily |
| 61 | + * multibyte and we have to slog through a few hundred kb. |
| 62 | + */ |
| 63 | +function fallback_mb_substr( $str, $start, $count='end' ) { |
| 64 | + if( $start != 0 ) { |
| 65 | + $split = fallback_mb_substr_split_unicode( $str, intval( $start ) ); |
| 66 | + $str = substr( $str, $split ); |
68 | 67 | } |
69 | 68 | |
70 | | - function mb_substr_split_unicode( $str, $splitPos ) { |
71 | | - if( $splitPos == 0 ) { |
72 | | - return 0; |
73 | | - } |
| 69 | + if( $count !== 'end' ) { |
| 70 | + $split = fallback_mb_substr_split_unicode( $str, intval( $count ) ); |
| 71 | + $str = substr( $str, 0, $split ); |
| 72 | + } |
74 | 73 | |
75 | | - $byteLen = strlen( $str ); |
| 74 | + return $str; |
| 75 | +} |
76 | 76 | |
77 | | - if( $splitPos > 0 ) { |
78 | | - if( $splitPos > 256 ) { |
79 | | - // Optimize large string offsets by skipping ahead N bytes. |
80 | | - // This will cut out most of our slow time on Latin-based text, |
81 | | - // and 1/2 to 1/3 on East European and Asian scripts. |
82 | | - $bytePos = $splitPos; |
83 | | - while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) { |
84 | | - ++$bytePos; |
85 | | - } |
86 | | - $charPos = mb_strlen( substr( $str, 0, $bytePos ) ); |
87 | | - } else { |
88 | | - $charPos = 0; |
89 | | - $bytePos = 0; |
| 77 | +function fallback_mb_substr_split_unicode( $str, $splitPos ) { |
| 78 | + if( $splitPos == 0 ) { |
| 79 | + return 0; |
| 80 | + } |
| 81 | + |
| 82 | + $byteLen = strlen( $str ); |
| 83 | + |
| 84 | + if( $splitPos > 0 ) { |
| 85 | + if( $splitPos > 256 ) { |
| 86 | + // Optimize large string offsets by skipping ahead N bytes. |
| 87 | + // This will cut out most of our slow time on Latin-based text, |
| 88 | + // and 1/2 to 1/3 on East European and Asian scripts. |
| 89 | + $bytePos = $splitPos; |
| 90 | + while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) { |
| 91 | + ++$bytePos; |
90 | 92 | } |
| 93 | + $charPos = mb_strlen( substr( $str, 0, $bytePos ) ); |
| 94 | + } else { |
| 95 | + $charPos = 0; |
| 96 | + $bytePos = 0; |
| 97 | + } |
91 | 98 | |
92 | | - while( $charPos++ < $splitPos ) { |
| 99 | + while( $charPos++ < $splitPos ) { |
| 100 | + ++$bytePos; |
| 101 | + // Move past any tail bytes |
| 102 | + while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) { |
93 | 103 | ++$bytePos; |
94 | | - // Move past any tail bytes |
95 | | - while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) { |
96 | | - ++$bytePos; |
97 | | - } |
98 | 104 | } |
99 | | - } else { |
100 | | - $splitPosX = $splitPos + 1; |
101 | | - $charPos = 0; // relative to end of string; we don't care about the actual char position here |
102 | | - $bytePos = $byteLen; |
103 | | - while( $bytePos > 0 && $charPos-- >= $splitPosX ) { |
| 105 | + } |
| 106 | + } else { |
| 107 | + $splitPosX = $splitPos + 1; |
| 108 | + $charPos = 0; // relative to end of string; we don't care about the actual char position here |
| 109 | + $bytePos = $byteLen; |
| 110 | + while( $bytePos > 0 && $charPos-- >= $splitPosX ) { |
| 111 | + --$bytePos; |
| 112 | + // Move past any tail bytes |
| 113 | + while ( $bytePos > 0 && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) { |
104 | 114 | --$bytePos; |
105 | | - // Move past any tail bytes |
106 | | - while ( $bytePos > 0 && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) { |
107 | | - --$bytePos; |
108 | | - } |
109 | 115 | } |
110 | 116 | } |
| 117 | + } |
111 | 118 | |
112 | | - return $bytePos; |
| 119 | + return $bytePos; |
| 120 | +} |
| 121 | + |
| 122 | +if ( !function_exists( 'mb_substr' ) ) { |
| 123 | + function mb_substr( $str, $start, $count='end' ) { |
| 124 | + return fallback_mb_substr( $str, $start, $count ) |
113 | 125 | } |
| 126 | + |
| 127 | + function mb_substr_split_unicode( $str, $splitPos ) { |
| 128 | + return fallback_mb_substr_split_unicode( $str, $splitPos ); |
| 129 | + } |
114 | 130 | } |
115 | 131 | |
| 132 | + |
| 133 | + |
| 134 | +/** |
| 135 | + * Fallback implementation of mb_strlen, hardcoded to UTF-8. |
| 136 | + * @param string $str |
| 137 | + * @param string $enc optional encoding; ignored |
| 138 | + * @return int |
| 139 | + */ |
| 140 | +function fallback_mb_strlen( $str, $enc = '' ) { |
| 141 | + $counts = count_chars( $str ); |
| 142 | + $total = 0; |
| 143 | + |
| 144 | + // Count ASCII bytes |
| 145 | + for( $i = 0; $i < 0x80; $i++ ) { |
| 146 | + $total += $counts[$i]; |
| 147 | + } |
| 148 | + |
| 149 | + // Count multibyte sequence heads |
| 150 | + for( $i = 0xc0; $i < 0xff; $i++ ) { |
| 151 | + $total += $counts[$i]; |
| 152 | + } |
| 153 | + return $total; |
| 154 | +} |
| 155 | + |
116 | 156 | if ( !function_exists( 'mb_strlen' ) ) { |
117 | | - /** |
118 | | - * Fallback implementation of mb_strlen, hardcoded to UTF-8. |
119 | | - * @param string $str |
120 | | - * @param string $enc optional encoding; ignored |
121 | | - * @return int |
122 | | - */ |
123 | 157 | function mb_strlen( $str, $enc = '' ) { |
124 | | - $counts = count_chars( $str ); |
125 | | - $total = 0; |
| 158 | + return fallback_mb_strlen( $str, $enc ); |
| 159 | + } |
| 160 | +} |
126 | 161 | |
127 | | - // Count ASCII bytes |
128 | | - for( $i = 0; $i < 0x80; $i++ ) { |
129 | | - $total += $counts[$i]; |
130 | | - } |
131 | 162 | |
132 | | - // Count multibyte sequence heads |
133 | | - for( $i = 0xc0; $i < 0xff; $i++ ) { |
134 | | - $total += $counts[$i]; |
135 | | - } |
136 | | - return $total; |
| 163 | + |
| 164 | +/** |
| 165 | + * Fallback implementation of mb_strpos, hardcoded to UTF-8. |
| 166 | + * @param $haystack String |
| 167 | + * @param $needle String |
| 168 | + * @param $offset String: optional start position |
| 169 | + * @param $encoding String: optional encoding; ignored |
| 170 | + * @return int |
| 171 | + */ |
| 172 | +function fallback_mb_strpos( $haystack, $needle, $offset = 0, $encoding = '' ) { |
| 173 | + $needle = preg_quote( $needle, '/' ); |
| 174 | + |
| 175 | + $ar = array(); |
| 176 | + preg_match( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset ); |
| 177 | + |
| 178 | + if( isset( $ar[0][1] ) ) { |
| 179 | + return $ar[0][1]; |
| 180 | + } else { |
| 181 | + return false; |
137 | 182 | } |
138 | 183 | } |
139 | 184 | |
140 | | - |
141 | 185 | if( !function_exists( 'mb_strpos' ) ) { |
142 | | - /** |
143 | | - * Fallback implementation of mb_strpos, hardcoded to UTF-8. |
144 | | - * @param $haystack String |
145 | | - * @param $needle String |
146 | | - * @param $offset String: optional start position |
147 | | - * @param $encoding String: optional encoding; ignored |
148 | | - * @return int |
149 | | - */ |
| 186 | + |
150 | 187 | function mb_strpos( $haystack, $needle, $offset = 0, $encoding = '' ) { |
151 | | - $needle = preg_quote( $needle, '/' ); |
| 188 | + return fallback_mb_strpos( $haystack, $needle, $offset, $encoding ); |
| 189 | + } |
| 190 | + |
| 191 | +} |
152 | 192 | |
153 | | - $ar = array(); |
154 | | - preg_match( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset ); |
155 | 193 | |
156 | | - if( isset( $ar[0][1] ) ) { |
157 | | - return $ar[0][1]; |
158 | | - } else { |
159 | | - return false; |
160 | | - } |
| 194 | + |
| 195 | +/** |
| 196 | + * Fallback implementation of mb_strrpos, hardcoded to UTF-8. |
| 197 | + * @param $haystack String |
| 198 | + * @param $needle String |
| 199 | + * @param $offset String: optional start position |
| 200 | + * @param $encoding String: optional encoding; ignored |
| 201 | + * @return int |
| 202 | + */ |
| 203 | +function fallback_mb_strrpos( $haystack, $needle, $offset = 0, $encoding = '' ) { |
| 204 | + $needle = preg_quote( $needle, '/' ); |
| 205 | + |
| 206 | + $ar = array(); |
| 207 | + preg_match_all( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset ); |
| 208 | + |
| 209 | + if( isset( $ar[0] ) && count( $ar[0] ) > 0 && |
| 210 | + isset( $ar[0][count( $ar[0] ) - 1][1] ) ) { |
| 211 | + return $ar[0][count( $ar[0] ) - 1][1]; |
| 212 | + } else { |
| 213 | + return false; |
161 | 214 | } |
162 | 215 | } |
163 | 216 | |
164 | 217 | if( !function_exists( 'mb_strrpos' ) ) { |
165 | | - /** |
166 | | - * Fallback implementation of mb_strrpos, hardcoded to UTF-8. |
167 | | - * @param $haystack String |
168 | | - * @param $needle String |
169 | | - * @param $offset String: optional start position |
170 | | - * @param $encoding String: optional encoding; ignored |
171 | | - * @return int |
172 | | - */ |
173 | 218 | function mb_strrpos( $haystack, $needle, $offset = 0, $encoding = '' ) { |
174 | | - $needle = preg_quote( $needle, '/' ); |
175 | | - |
176 | | - $ar = array(); |
177 | | - preg_match_all( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset ); |
178 | | - |
179 | | - if( isset( $ar[0] ) && count( $ar[0] ) > 0 && |
180 | | - isset( $ar[0][count( $ar[0] ) - 1][1] ) ) { |
181 | | - return $ar[0][count( $ar[0] ) - 1][1]; |
182 | | - } else { |
183 | | - return false; |
184 | | - } |
| 219 | + return fallback_mb_strrpos( $haystack, $needle, $offset, $encoding ); |
185 | 220 | } |
186 | 221 | } |
187 | 222 | |
| 223 | + |
188 | 224 | // Support for Wietse Venema's taint feature |
189 | 225 | if ( !function_exists( 'istainted' ) ) { |
190 | 226 | function istainted( $var ) { |
— | — | @@ -200,6 +236,7 @@ |
201 | 237 | /// @endcond |
202 | 238 | |
203 | 239 | |
| 240 | + |
204 | 241 | /** |
205 | 242 | * Like array_diff( $a, $b ) except that it works with two-dimensional arrays. |
206 | 243 | */ |