r79494 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r79493‎ | r79494 | r79495 >
Date:15:54, 2 January 2011
Author:soxred93
Status:resolved (Comments)
Tags:
Comment:
Followup to r79463: Move fallback functions to new Fallback class
Modified paths:
  • /trunk/phase3/includes/AutoLoader.php (modified) (history)
  • /trunk/phase3/includes/Fallback.php (added) (history)
  • /trunk/phase3/includes/GlobalFunctions.php (modified) (history)
  • /trunk/phase3/tests/phpunit/includes/GlobalTest.php (modified) (history)

Diff [purge]

Index: trunk/phase3/tests/phpunit/includes/GlobalTest.php
@@ -422,7 +422,7 @@
423423
424424 $this->assertEquals(
425425 call_user_func_array( 'mb_substr', $param_set ),
426 - call_user_func_array( 'fallback_mb_substr', $param_set ),
 426+ call_user_func_array( array( 'Fallback', 'fallback_mb_substr' ), $param_set ),
427427 'Fallback mb_substr with params ' . implode( ', ', $old_param_set )
428428 );
429429 }
@@ -431,7 +431,7 @@
432432 //mb_strlen
433433 $this->assertEquals(
434434 mb_strlen( $sampleUTF ),
435 - fallback_mb_strlen( $sampleUTF ),
 435+ Fallback::fallback_mb_strlen( $sampleUTF ),
436436 'Fallback mb_strlen'
437437 );
438438
@@ -452,13 +452,13 @@
453453
454454 $this->assertEquals(
455455 call_user_func_array( 'mb_strpos', $param_set ),
456 - call_user_func_array( 'fallback_mb_strpos', $param_set ),
 456+ call_user_func_array( array( 'Fallback', 'fallback_mb_strpos' ), $param_set ),
457457 'Fallback mb_strpos with params ' . implode( ', ', $old_param_set )
458458 );
459459
460460 $this->assertEquals(
461461 call_user_func_array( 'mb_strrpos', $param_set ),
462 - call_user_func_array( 'fallback_mb_strrpos', $param_set ),
 462+ call_user_func_array( array( 'Fallback', 'fallback_mb_strrpos' ), $param_set ),
463463 'Fallback mb_strrpos with params ' . implode( ', ', $old_param_set )
464464 );
465465 }
Index: trunk/phase3/includes/GlobalFunctions.php
@@ -21,201 +21,39 @@
2222 * PHP extensions may be included here.
2323 */
2424
25 -# iconv support is not in the default configuration and so may not be present.
26 -# Assume will only ever use utf-8 and iso-8859-1.
27 -# This will *not* work in all circumstances.
28 -function fallback_iconv( $from, $to, $string ) {
29 - if ( substr( $to, -8 ) == '//IGNORE' ) {
30 - $to = substr( $to, 0, strlen( $to ) - 8 );
31 - }
32 - if( strcasecmp( $from, $to ) == 0 ) {
33 - return $string;
34 - }
35 - if( strcasecmp( $from, 'utf-8' ) == 0 ) {
36 - return utf8_decode( $string );
37 - }
38 - if( strcasecmp( $to, 'utf-8' ) == 0 ) {
39 - return utf8_encode( $string );
40 - }
41 - return $string;
42 -}
43 -
4425 if( !function_exists( 'iconv' ) ) {
4526 function iconv( $from, $to, $string ) {
46 - return fallback_iconv( $from, $to, $string );
 27+ return Fallback::fallback_iconv( $from, $to, $string );
4728 }
4829 }
4930
50 -
51 -
52 -
53 -/**
54 - * Fallback implementation for mb_substr, hardcoded to UTF-8.
55 - * Attempts to be at least _moderately_ efficient; best optimized
56 - * for relatively small offset and count values -- about 5x slower
57 - * than native mb_string in my testing.
58 - *
59 - * Larger offsets are still fairly efficient for Latin text, but
60 - * can be up to 100x slower than native if the text is heavily
61 - * multibyte and we have to slog through a few hundred kb.
62 - */
63 -function fallback_mb_substr( $str, $start, $count='end' ) {
64 - if( $start != 0 ) {
65 - $split = fallback_mb_substr_split_unicode( $str, intval( $start ) );
66 - $str = substr( $str, $split );
67 - }
68 -
69 - if( $count !== 'end' ) {
70 - $split = fallback_mb_substr_split_unicode( $str, intval( $count ) );
71 - $str = substr( $str, 0, $split );
72 - }
73 -
74 - return $str;
75 -}
76 -
77 -function fallback_mb_substr_split_unicode( $str, $splitPos ) {
78 - if( $splitPos == 0 ) {
79 - return 0;
80 - }
81 -
82 - $byteLen = strlen( $str );
83 -
84 - if( $splitPos > 0 ) {
85 - if( $splitPos > 256 ) {
86 - // Optimize large string offsets by skipping ahead N bytes.
87 - // This will cut out most of our slow time on Latin-based text,
88 - // and 1/2 to 1/3 on East European and Asian scripts.
89 - $bytePos = $splitPos;
90 - while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {
91 - ++$bytePos;
92 - }
93 - $charPos = mb_strlen( substr( $str, 0, $bytePos ) );
94 - } else {
95 - $charPos = 0;
96 - $bytePos = 0;
97 - }
98 -
99 - while( $charPos++ < $splitPos ) {
100 - ++$bytePos;
101 - // Move past any tail bytes
102 - while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {
103 - ++$bytePos;
104 - }
105 - }
106 - } else {
107 - $splitPosX = $splitPos + 1;
108 - $charPos = 0; // relative to end of string; we don't care about the actual char position here
109 - $bytePos = $byteLen;
110 - while( $bytePos > 0 && $charPos-- >= $splitPosX ) {
111 - --$bytePos;
112 - // Move past any tail bytes
113 - while ( $bytePos > 0 && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {
114 - --$bytePos;
115 - }
116 - }
117 - }
118 -
119 - return $bytePos;
120 -}
121 -
12231 if ( !function_exists( 'mb_substr' ) ) {
12332 function mb_substr( $str, $start, $count='end' ) {
124 - return fallback_mb_substr( $str, $start, $count );
 33+ return Fallback::fallback_mb_substr( $str, $start, $count );
12534 }
12635
12736 function mb_substr_split_unicode( $str, $splitPos ) {
128 - return fallback_mb_substr_split_unicode( $str, $splitPos );
 37+ return Fallback::fallback_mb_substr_split_unicode( $str, $splitPos );
12938 }
13039 }
13140
132 -
133 -
134 -/**
135 - * Fallback implementation of mb_strlen, hardcoded to UTF-8.
136 - * @param string $str
137 - * @param string $enc optional encoding; ignored
138 - * @return int
139 - */
140 -function fallback_mb_strlen( $str, $enc = '' ) {
141 - $counts = count_chars( $str );
142 - $total = 0;
143 -
144 - // Count ASCII bytes
145 - for( $i = 0; $i < 0x80; $i++ ) {
146 - $total += $counts[$i];
147 - }
148 -
149 - // Count multibyte sequence heads
150 - for( $i = 0xc0; $i < 0xff; $i++ ) {
151 - $total += $counts[$i];
152 - }
153 - return $total;
154 -}
155 -
15641 if ( !function_exists( 'mb_strlen' ) ) {
15742 function mb_strlen( $str, $enc = '' ) {
158 - return fallback_mb_strlen( $str, $enc );
 43+ return Fallback::fallback_mb_strlen( $str, $enc );
15944 }
16045 }
16146
162 -
163 -
164 -/**
165 - * Fallback implementation of mb_strpos, hardcoded to UTF-8.
166 - * @param $haystack String
167 - * @param $needle String
168 - * @param $offset String: optional start position
169 - * @param $encoding String: optional encoding; ignored
170 - * @return int
171 - */
172 -function fallback_mb_strpos( $haystack, $needle, $offset = 0, $encoding = '' ) {
173 - $needle = preg_quote( $needle, '/' );
174 -
175 - $ar = array();
176 - preg_match( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset );
177 -
178 - if( isset( $ar[0][1] ) ) {
179 - return $ar[0][1];
180 - } else {
181 - return false;
182 - }
183 -}
184 -
18547 if( !function_exists( 'mb_strpos' ) ) {
18648
18749 function mb_strpos( $haystack, $needle, $offset = 0, $encoding = '' ) {
188 - return fallback_mb_strpos( $haystack, $needle, $offset, $encoding );
 50+ return Fallback::fallback_mb_strpos( $haystack, $needle, $offset, $encoding );
18951 }
19052
19153 }
19254
193 -
194 -
195 -/**
196 - * Fallback implementation of mb_strrpos, hardcoded to UTF-8.
197 - * @param $haystack String
198 - * @param $needle String
199 - * @param $offset String: optional start position
200 - * @param $encoding String: optional encoding; ignored
201 - * @return int
202 - */
203 -function fallback_mb_strrpos( $haystack, $needle, $offset = 0, $encoding = '' ) {
204 - $needle = preg_quote( $needle, '/' );
205 -
206 - $ar = array();
207 - preg_match_all( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset );
208 -
209 - if( isset( $ar[0] ) && count( $ar[0] ) > 0 &&
210 - isset( $ar[0][count( $ar[0] ) - 1][1] ) ) {
211 - return $ar[0][count( $ar[0] ) - 1][1];
212 - } else {
213 - return false;
214 - }
215 -}
216 -
21755 if( !function_exists( 'mb_strrpos' ) ) {
21856 function mb_strrpos( $haystack, $needle, $offset = 0, $encoding = '' ) {
219 - return fallback_mb_strrpos( $haystack, $needle, $offset, $encoding );
 57+ return Fallback::fallback_mb_strrpos( $haystack, $needle, $offset, $encoding );
22058 }
22159 }
22260
Index: trunk/phase3/includes/AutoLoader.php
@@ -86,6 +86,7 @@
8787 'FatalError' => 'includes/Exception.php',
8888 'FakeTitle' => 'includes/FakeTitle.php',
8989 'FakeMemCachedClient' => 'includes/ObjectCache.php',
 90+ 'Fallback' => 'includes/Fallback.php',
9091 'FauxRequest' => 'includes/WebRequest.php',
9192 'FauxResponse' => 'includes/WebResponse.php',
9293 'FeedItem' => 'includes/Feed.php',
Index: trunk/phase3/includes/Fallback.php
@@ -0,0 +1,177 @@
 2+<?php
 3+
 4+/**
 5+ * This program is free software; you can redistribute it and/or modify
 6+ * it under the terms of the GNU General Public License as published by
 7+ * the Free Software Foundation; either version 2 of the License, or
 8+ * (at your option) any later version.
 9+ *
 10+ * This program is distributed in the hope that it will be useful,
 11+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 13+ * GNU General Public License for more details.
 14+ *
 15+ * You should have received a copy of the GNU General Public License along
 16+ * with this program; if not, write to the Free Software Foundation, Inc.,
 17+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 18+ * http://www.gnu.org/copyleft/gpl.html
 19+ *
 20+ */
 21+
 22+/**
 23+ * Fallback functions for PHP installed without mbstring support
 24+ */
 25+class Fallback {
 26+
 27+ public static function fallback_iconv( $from, $to, $string ) {
 28+ if ( substr( $to, -8 ) == '//IGNORE' ) {
 29+ $to = substr( $to, 0, strlen( $to ) - 8 );
 30+ }
 31+ if( strcasecmp( $from, $to ) == 0 ) {
 32+ return $string;
 33+ }
 34+ if( strcasecmp( $from, 'utf-8' ) == 0 ) {
 35+ return utf8_decode( $string );
 36+ }
 37+ if( strcasecmp( $to, 'utf-8' ) == 0 ) {
 38+ return utf8_encode( $string );
 39+ }
 40+ return $string;
 41+ }
 42+
 43+ /**
 44+ * Fallback implementation for mb_substr, hardcoded to UTF-8.
 45+ * Attempts to be at least _moderately_ efficient; best optimized
 46+ * for relatively small offset and count values -- about 5x slower
 47+ * than native mb_string in my testing.
 48+ *
 49+ * Larger offsets are still fairly efficient for Latin text, but
 50+ * can be up to 100x slower than native if the text is heavily
 51+ * multibyte and we have to slog through a few hundred kb.
 52+ */
 53+ public static function fallback_mb_substr( $str, $start, $count='end' ) {
 54+ if( $start != 0 ) {
 55+ $split = self::fallback_mb_substr_split_unicode( $str, intval( $start ) );
 56+ $str = substr( $str, $split );
 57+ }
 58+
 59+ if( $count !== 'end' ) {
 60+ $split = self::fallback_mb_substr_split_unicode( $str, intval( $count ) );
 61+ $str = substr( $str, 0, $split );
 62+ }
 63+
 64+ return $str;
 65+ }
 66+
 67+ public static function fallback_mb_substr_split_unicode( $str, $splitPos ) {
 68+ if( $splitPos == 0 ) {
 69+ return 0;
 70+ }
 71+
 72+ $byteLen = strlen( $str );
 73+
 74+ if( $splitPos > 0 ) {
 75+ if( $splitPos > 256 ) {
 76+ // Optimize large string offsets by skipping ahead N bytes.
 77+ // This will cut out most of our slow time on Latin-based text,
 78+ // and 1/2 to 1/3 on East European and Asian scripts.
 79+ $bytePos = $splitPos;
 80+ while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {
 81+ ++$bytePos;
 82+ }
 83+ $charPos = mb_strlen( substr( $str, 0, $bytePos ) );
 84+ } else {
 85+ $charPos = 0;
 86+ $bytePos = 0;
 87+ }
 88+
 89+ while( $charPos++ < $splitPos ) {
 90+ ++$bytePos;
 91+ // Move past any tail bytes
 92+ while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {
 93+ ++$bytePos;
 94+ }
 95+ }
 96+ } else {
 97+ $splitPosX = $splitPos + 1;
 98+ $charPos = 0; // relative to end of string; we don't care about the actual char position here
 99+ $bytePos = $byteLen;
 100+ while( $bytePos > 0 && $charPos-- >= $splitPosX ) {
 101+ --$bytePos;
 102+ // Move past any tail bytes
 103+ while ( $bytePos > 0 && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {
 104+ --$bytePos;
 105+ }
 106+ }
 107+ }
 108+
 109+ return $bytePos;
 110+ }
 111+
 112+ /**
 113+ * Fallback implementation of mb_strlen, hardcoded to UTF-8.
 114+ * @param string $str
 115+ * @param string $enc optional encoding; ignored
 116+ * @return int
 117+ */
 118+ public static function fallback_mb_strlen( $str, $enc = '' ) {
 119+ $counts = count_chars( $str );
 120+ $total = 0;
 121+
 122+ // Count ASCII bytes
 123+ for( $i = 0; $i < 0x80; $i++ ) {
 124+ $total += $counts[$i];
 125+ }
 126+
 127+ // Count multibyte sequence heads
 128+ for( $i = 0xc0; $i < 0xff; $i++ ) {
 129+ $total += $counts[$i];
 130+ }
 131+ return $total;
 132+ }
 133+
 134+
 135+ /**
 136+ * Fallback implementation of mb_strpos, hardcoded to UTF-8.
 137+ * @param $haystack String
 138+ * @param $needle String
 139+ * @param $offset String: optional start position
 140+ * @param $encoding String: optional encoding; ignored
 141+ * @return int
 142+ */
 143+ public static function fallback_mb_strpos( $haystack, $needle, $offset = 0, $encoding = '' ) {
 144+ $needle = preg_quote( $needle, '/' );
 145+
 146+ $ar = array();
 147+ preg_match( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset );
 148+
 149+ if( isset( $ar[0][1] ) ) {
 150+ return $ar[0][1];
 151+ } else {
 152+ return false;
 153+ }
 154+ }
 155+
 156+ /**
 157+ * Fallback implementation of mb_strrpos, hardcoded to UTF-8.
 158+ * @param $haystack String
 159+ * @param $needle String
 160+ * @param $offset String: optional start position
 161+ * @param $encoding String: optional encoding; ignored
 162+ * @return int
 163+ */
 164+ public static function fallback_mb_strrpos( $haystack, $needle, $offset = 0, $encoding = '' ) {
 165+ $needle = preg_quote( $needle, '/' );
 166+
 167+ $ar = array();
 168+ preg_match_all( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset );
 169+
 170+ if( isset( $ar[0] ) && count( $ar[0] ) > 0 &&
 171+ isset( $ar[0][count( $ar[0] ) - 1][1] ) ) {
 172+ return $ar[0][count( $ar[0] ) - 1][1];
 173+ } else {
 174+ return false;
 175+ }
 176+ }
 177+
 178+}
\ No newline at end of file
Property changes on: trunk/phase3/includes/Fallback.php
___________________________________________________________________
Added: svn:eol-style
1179 + native

Sign-offs

UserFlagDate
Happy-meloninspected22:15, 23 March 2011

Follow-up revisions

RevisionCommit summaryAuthorDate
r79546Fix r79494: Don't prefix functions now that they're in their own classsoxred9301:44, 4 January 2011

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r79463Move fallback function creation out of function_exists() conditionals....soxred9301:29, 2 January 2011

Comments

#Comment by Bryan (talk | contribs)   08:02, 3 January 2011

You don't need to prefix the functions is they're in their own class.

#Comment by X! (talk | contribs)   01:44, 4 January 2011

Fixed in r79546

Status & tagging log