r79494 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r79493‎ \| r79494 \| r79495 >
Date:	15:54, 2 January 2011
Author:	soxred93
Status:	resolved (Comments)
Tags:
Comment:	Followup to r79463: Move fallback functions to new Fallback class
Modified paths:	/trunk/phase3/includes/AutoLoader.php (modified) (history) /trunk/phase3/includes/Fallback.php (added) (history) /trunk/phase3/includes/GlobalFunctions.php (modified) (history) /trunk/phase3/tests/phpunit/includes/GlobalTest.php (modified) (history)

Diff [purge]

Index: trunk/phase3/tests/phpunit/includes/GlobalTest.php
—	—	@@ -422,7 +422,7 @@
423	423
424	424	$this->assertEquals(
425	425	call_user_func_array( 'mb_substr', $param_set ),
426		~~- call_user_func_array( 'fallback_mb_substr', $param_set ),~~
	426	+ call_user_func_array( array( 'Fallback', 'fallback_mb_substr' ), $param_set ),
427	427	'Fallback mb_substr with params ' . implode( ', ', $old_param_set )
428	428	);
429	429	}
—	—	@@ -431,7 +431,7 @@
432	432	//mb_strlen
433	433	$this->assertEquals(
434	434	mb_strlen( $sampleUTF ),
435		~~- fallback_mb_strlen( $sampleUTF ),~~
	435	+ Fallback::fallback_mb_strlen( $sampleUTF ),
436	436	'Fallback mb_strlen'
437	437	);
438	438
—	—	@@ -452,13 +452,13 @@
453	453
454	454	$this->assertEquals(
455	455	call_user_func_array( 'mb_strpos', $param_set ),
456		~~- call_user_func_array( 'fallback_mb_strpos', $param_set ),~~
	456	+ call_user_func_array( array( 'Fallback', 'fallback_mb_strpos' ), $param_set ),
457	457	'Fallback mb_strpos with params ' . implode( ', ', $old_param_set )
458	458	);
459	459
460	460	$this->assertEquals(
461	461	call_user_func_array( 'mb_strrpos', $param_set ),
462		~~- call_user_func_array( 'fallback_mb_strrpos', $param_set ),~~
	462	+ call_user_func_array( array( 'Fallback', 'fallback_mb_strrpos' ), $param_set ),
463	463	'Fallback mb_strrpos with params ' . implode( ', ', $old_param_set )
464	464	);
465	465	}
Index: trunk/phase3/includes/GlobalFunctions.php
—	—	@@ -21,201 +21,39 @@
22	22	* PHP extensions may be included here.
23	23	*/
24	24
25		~~-# iconv support is not in the default configuration and so may not be present.~~
26		~~-# Assume will only ever use utf-8 and iso-8859-1.~~
27		~~-# This will not work in all circumstances.~~
28		~~-function fallback_iconv( $from, $to, $string ) {~~
29		~~- if ( substr( $to, -8 ) == '//IGNORE' ) {~~
30		~~- $to = substr( $to, 0, strlen( $to ) - 8 );~~
31		~~- }~~
32		~~- if( strcasecmp( $from, $to ) == 0 ) {~~
33		~~- return $string;~~
34		~~- }~~
35		~~- if( strcasecmp( $from, 'utf-8' ) == 0 ) {~~
36		~~- return utf8_decode( $string );~~
37		~~- }~~
38		~~- if( strcasecmp( $to, 'utf-8' ) == 0 ) {~~
39		~~- return utf8_encode( $string );~~
40		~~- }~~
41		~~- return $string;~~
42		-}
43		-
44	25	if( !function_exists( 'iconv' ) ) {
45	26	function iconv( $from, $to, $string ) {
46		~~- return fallback_iconv( $from, $to, $string );~~
	27	+ return Fallback::fallback_iconv( $from, $to, $string );
47	28	}
48	29	}
49	30
50		-
51		-
52		-
53		-/**
54		~~- * Fallback implementation for mb_substr, hardcoded to UTF-8.~~
55		~~- * Attempts to be at least _moderately_ efficient; best optimized~~
56		~~- * for relatively small offset and count values -- about 5x slower~~
57		~~- * than native mb_string in my testing.~~
58		- *
59		~~- * Larger offsets are still fairly efficient for Latin text, but~~
60		~~- * can be up to 100x slower than native if the text is heavily~~
61		~~- * multibyte and we have to slog through a few hundred kb.~~
62		~~- */~~
63		~~-function fallback_mb_substr( $str, $start, $count='end' ) {~~
64		~~- if( $start != 0 ) {~~
65		~~- $split = fallback_mb_substr_split_unicode( $str, intval( $start ) );~~
66		~~- $str = substr( $str, $split );~~
67		~~- }~~
68		-
69		~~- if( $count !== 'end' ) {~~
70		~~- $split = fallback_mb_substr_split_unicode( $str, intval( $count ) );~~
71		~~- $str = substr( $str, 0, $split );~~
72		~~- }~~
73		-
74		~~- return $str;~~
75		-}
76		-
77		~~-function fallback_mb_substr_split_unicode( $str, $splitPos ) {~~
78		~~- if( $splitPos == 0 ) {~~
79		~~- return 0;~~
80		~~- }~~
81		-
82		~~- $byteLen = strlen( $str );~~
83		-
84		~~- if( $splitPos > 0 ) {~~
85		~~- if( $splitPos > 256 ) {~~
86		~~- // Optimize large string offsets by skipping ahead N bytes.~~
87		~~- // This will cut out most of our slow time on Latin-based text,~~
88		~~- // and 1/2 to 1/3 on East European and Asian scripts.~~
89		~~- $bytePos = $splitPos;~~
90		~~- while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {~~
91		~~- ++$bytePos;~~
92		~~- }~~
93		~~- $charPos = mb_strlen( substr( $str, 0, $bytePos ) );~~
94		~~- } else {~~
95		~~- $charPos = 0;~~
96		~~- $bytePos = 0;~~
97		~~- }~~
98		-
99		~~- while( $charPos++ < $splitPos ) {~~
100		~~- ++$bytePos;~~
101		~~- // Move past any tail bytes~~
102		~~- while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {~~
103		~~- ++$bytePos;~~
104		~~- }~~
105		~~- }~~
106		~~- } else {~~
107		~~- $splitPosX = $splitPos + 1;~~
108		~~- $charPos = 0; // relative to end of string; we don't care about the actual char position here~~
109		~~- $bytePos = $byteLen;~~
110		~~- while( $bytePos > 0 && $charPos-- >= $splitPosX ) {~~
111		~~- --$bytePos;~~
112		~~- // Move past any tail bytes~~
113		~~- while ( $bytePos > 0 && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {~~
114		~~- --$bytePos;~~
115		~~- }~~
116		~~- }~~
117		~~- }~~
118		-
119		~~- return $bytePos;~~
120		-}
121		-
122	31	if ( !function_exists( 'mb_substr' ) ) {
123	32	function mb_substr( $str, $start, $count='end' ) {
124		~~- return fallback_mb_substr( $str, $start, $count );~~
	33	+ return Fallback::fallback_mb_substr( $str, $start, $count );
125	34	}
126	35
127	36	function mb_substr_split_unicode( $str, $splitPos ) {
128		~~- return fallback_mb_substr_split_unicode( $str, $splitPos );~~
	37	+ return Fallback::fallback_mb_substr_split_unicode( $str, $splitPos );
129	38	}
130	39	}
131	40
132		-
133		-
134		-/**
135		~~- * Fallback implementation of mb_strlen, hardcoded to UTF-8.~~
136		~~- * @param string $str~~
137		~~- * @param string $enc optional encoding; ignored~~
138		~~- * @return int~~
139		~~- */~~
140		~~-function fallback_mb_strlen( $str, $enc = '' ) {~~
141		~~- $counts = count_chars( $str );~~
142		~~- $total = 0;~~
143		-
144		~~- // Count ASCII bytes~~
145		~~- for( $i = 0; $i < 0x80; $i++ ) {~~
146		~~- $total += $counts[$i];~~
147		~~- }~~
148		-
149		~~- // Count multibyte sequence heads~~
150		~~- for( $i = 0xc0; $i < 0xff; $i++ ) {~~
151		~~- $total += $counts[$i];~~
152		~~- }~~
153		~~- return $total;~~
154		-}
155		-
156	41	if ( !function_exists( 'mb_strlen' ) ) {
157	42	function mb_strlen( $str, $enc = '' ) {
158		~~- return fallback_mb_strlen( $str, $enc );~~
	43	+ return Fallback::fallback_mb_strlen( $str, $enc );
159	44	}
160	45	}
161	46
162		-
163		-
164		-/**
165		~~- * Fallback implementation of mb_strpos, hardcoded to UTF-8.~~
166		~~- * @param $haystack String~~
167		~~- * @param $needle String~~
168		~~- * @param $offset String: optional start position~~
169		~~- * @param $encoding String: optional encoding; ignored~~
170		~~- * @return int~~
171		~~- */~~
172		~~-function fallback_mb_strpos( $haystack, $needle, $offset = 0, $encoding = '' ) {~~
173		~~- $needle = preg_quote( $needle, '/' );~~
174		-
175		~~- $ar = array();~~
176		~~- preg_match( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset );~~
177		-
178		~~- if( isset( $ar[0][1] ) ) {~~
179		~~- return $ar[0][1];~~
180		~~- } else {~~
181		~~- return false;~~
182		~~- }~~
183		-}
184		-
185	47	if( !function_exists( 'mb_strpos' ) ) {
186	48
187	49	function mb_strpos( $haystack, $needle, $offset = 0, $encoding = '' ) {
188		~~- return fallback_mb_strpos( $haystack, $needle, $offset, $encoding );~~
	50	+ return Fallback::fallback_mb_strpos( $haystack, $needle, $offset, $encoding );
189	51	}
190	52
191	53	}
192	54
193		-
194		-
195		-/**
196		~~- * Fallback implementation of mb_strrpos, hardcoded to UTF-8.~~
197		~~- * @param $haystack String~~
198		~~- * @param $needle String~~
199		~~- * @param $offset String: optional start position~~
200		~~- * @param $encoding String: optional encoding; ignored~~
201		~~- * @return int~~
202		~~- */~~
203		~~-function fallback_mb_strrpos( $haystack, $needle, $offset = 0, $encoding = '' ) {~~
204		~~- $needle = preg_quote( $needle, '/' );~~
205		-
206		~~- $ar = array();~~
207		~~- preg_match_all( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset );~~
208		-
209		~~- if( isset( $ar[0] ) && count( $ar[0] ) > 0 &&~~
210		~~- isset( $ar[0][count( $ar[0] ) - 1][1] ) ) {~~
211		~~- return $ar[0][count( $ar[0] ) - 1][1];~~
212		~~- } else {~~
213		~~- return false;~~
214		~~- }~~
215		-}
216		-
217	55	if( !function_exists( 'mb_strrpos' ) ) {
218	56	function mb_strrpos( $haystack, $needle, $offset = 0, $encoding = '' ) {
219		~~- return fallback_mb_strrpos( $haystack, $needle, $offset, $encoding );~~
	57	+ return Fallback::fallback_mb_strrpos( $haystack, $needle, $offset, $encoding );
220	58	}
221	59	}
222	60
Index: trunk/phase3/includes/AutoLoader.php
—	—	@@ -86,6 +86,7 @@
87	87	'FatalError' => 'includes/Exception.php',
88	88	'FakeTitle' => 'includes/FakeTitle.php',
89	89	'FakeMemCachedClient' => 'includes/ObjectCache.php',
	90	+ 'Fallback' => 'includes/Fallback.php',
90	91	'FauxRequest' => 'includes/WebRequest.php',
91	92	'FauxResponse' => 'includes/WebResponse.php',
92	93	'FeedItem' => 'includes/Feed.php',
Index: trunk/phase3/includes/Fallback.php
—	—	@@ -0,0 +1,177 @@
	2	+<?php
	3	+
	4	+/**
	5	+ * This program is free software; you can redistribute it and/or modify
	6	+ * it under the terms of the GNU General Public License as published by
	7	+ * the Free Software Foundation; either version 2 of the License, or
	8	+ * (at your option) any later version.
	9	+ *
	10	+ * This program is distributed in the hope that it will be useful,
	11	+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	13	+ * GNU General Public License for more details.
	14	+ *
	15	+ * You should have received a copy of the GNU General Public License along
	16	+ * with this program; if not, write to the Free Software Foundation, Inc.,
	17	+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
	18	+ * http://www.gnu.org/copyleft/gpl.html
	19	+ *
	20	+ */
	21	+
	22	+/**
	23	+ * Fallback functions for PHP installed without mbstring support
	24	+ */
	25	+class Fallback {
	26	+
	27	+ public static function fallback_iconv( $from, $to, $string ) {
	28	+ if ( substr( $to, -8 ) == '//IGNORE' ) {
	29	+ $to = substr( $to, 0, strlen( $to ) - 8 );
	30	+ }
	31	+ if( strcasecmp( $from, $to ) == 0 ) {
	32	+ return $string;
	33	+ }
	34	+ if( strcasecmp( $from, 'utf-8' ) == 0 ) {
	35	+ return utf8_decode( $string );
	36	+ }
	37	+ if( strcasecmp( $to, 'utf-8' ) == 0 ) {
	38	+ return utf8_encode( $string );
	39	+ }
	40	+ return $string;
	41	+ }
	42	+
	43	+ /**
	44	+ * Fallback implementation for mb_substr, hardcoded to UTF-8.
	45	+ * Attempts to be at least _moderately_ efficient; best optimized
	46	+ * for relatively small offset and count values -- about 5x slower
	47	+ * than native mb_string in my testing.
	48	+ *
	49	+ * Larger offsets are still fairly efficient for Latin text, but
	50	+ * can be up to 100x slower than native if the text is heavily
	51	+ * multibyte and we have to slog through a few hundred kb.
	52	+ */
	53	+ public static function fallback_mb_substr( $str, $start, $count='end' ) {
	54	+ if( $start != 0 ) {
	55	+ $split = self::fallback_mb_substr_split_unicode( $str, intval( $start ) );
	56	+ $str = substr( $str, $split );
	57	+ }
	58	+
	59	+ if( $count !== 'end' ) {
	60	+ $split = self::fallback_mb_substr_split_unicode( $str, intval( $count ) );
	61	+ $str = substr( $str, 0, $split );
	62	+ }
	63	+
	64	+ return $str;
	65	+ }
	66	+
	67	+ public static function fallback_mb_substr_split_unicode( $str, $splitPos ) {
	68	+ if( $splitPos == 0 ) {
	69	+ return 0;
	70	+ }
	71	+
	72	+ $byteLen = strlen( $str );
	73	+
	74	+ if( $splitPos > 0 ) {
	75	+ if( $splitPos > 256 ) {
	76	+ // Optimize large string offsets by skipping ahead N bytes.
	77	+ // This will cut out most of our slow time on Latin-based text,
	78	+ // and 1/2 to 1/3 on East European and Asian scripts.
	79	+ $bytePos = $splitPos;
	80	+ while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {
	81	+ ++$bytePos;
	82	+ }
	83	+ $charPos = mb_strlen( substr( $str, 0, $bytePos ) );
	84	+ } else {
	85	+ $charPos = 0;
	86	+ $bytePos = 0;
	87	+ }
	88	+
	89	+ while( $charPos++ < $splitPos ) {
	90	+ ++$bytePos;
	91	+ // Move past any tail bytes
	92	+ while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {
	93	+ ++$bytePos;
	94	+ }
	95	+ }
	96	+ } else {
	97	+ $splitPosX = $splitPos + 1;
	98	+ $charPos = 0; // relative to end of string; we don't care about the actual char position here
	99	+ $bytePos = $byteLen;
	100	+ while( $bytePos > 0 && $charPos-- >= $splitPosX ) {
	101	+ --$bytePos;
	102	+ // Move past any tail bytes
	103	+ while ( $bytePos > 0 && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {
	104	+ --$bytePos;
	105	+ }
	106	+ }
	107	+ }
	108	+
	109	+ return $bytePos;
	110	+ }
	111	+
	112	+ /**
	113	+ * Fallback implementation of mb_strlen, hardcoded to UTF-8.
	114	+ * @param string $str
	115	+ * @param string $enc optional encoding; ignored
	116	+ * @return int
	117	+ */
	118	+ public static function fallback_mb_strlen( $str, $enc = '' ) {
	119	+ $counts = count_chars( $str );
	120	+ $total = 0;
	121	+
	122	+ // Count ASCII bytes
	123	+ for( $i = 0; $i < 0x80; $i++ ) {
	124	+ $total += $counts[$i];
	125	+ }
	126	+
	127	+ // Count multibyte sequence heads
	128	+ for( $i = 0xc0; $i < 0xff; $i++ ) {
	129	+ $total += $counts[$i];
	130	+ }
	131	+ return $total;
	132	+ }
	133	+
	134	+
	135	+ /**
	136	+ * Fallback implementation of mb_strpos, hardcoded to UTF-8.
	137	+ * @param $haystack String
	138	+ * @param $needle String
	139	+ * @param $offset String: optional start position
	140	+ * @param $encoding String: optional encoding; ignored
	141	+ * @return int
	142	+ */
	143	+ public static function fallback_mb_strpos( $haystack, $needle, $offset = 0, $encoding = '' ) {
	144	+ $needle = preg_quote( $needle, '/' );
	145	+
	146	+ $ar = array();
	147	+ preg_match( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset );
	148	+
	149	+ if( isset( $ar[0][1] ) ) {
	150	+ return $ar[0][1];
	151	+ } else {
	152	+ return false;
	153	+ }
	154	+ }
	155	+
	156	+ /**
	157	+ * Fallback implementation of mb_strrpos, hardcoded to UTF-8.
	158	+ * @param $haystack String
	159	+ * @param $needle String
	160	+ * @param $offset String: optional start position
	161	+ * @param $encoding String: optional encoding; ignored
	162	+ * @return int
	163	+ */
	164	+ public static function fallback_mb_strrpos( $haystack, $needle, $offset = 0, $encoding = '' ) {
	165	+ $needle = preg_quote( $needle, '/' );
	166	+
	167	+ $ar = array();
	168	+ preg_match_all( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset );
	169	+
	170	+ if( isset( $ar[0] ) && count( $ar[0] ) > 0 &&
	171	+ isset( $ar[0][count( $ar[0] ) - 1][1] ) ) {
	172	+ return $ar[0][count( $ar[0] ) - 1][1];
	173	+ } else {
	174	+ return false;
	175	+ }
	176	+ }
	177	+
	178	+}
\ No newline at end of file
Property changes on: trunk/phase3/includes/Fallback.php
___________________________________________________________________
Added: svn:eol-style
1	179	+ native

Sign-offs

User	Flag	Date
Happy-melon	inspected	22:15, 23 March 2011

Follow-up revisions

Revision	Commit summary	Author	Date
r79546	Fix r79494: Don't prefix functions now that they're in their own class	soxred93	01:44, 4 January 2011

Past revisions this follows-up on

Revision	Commit summary	Author	Date
r79463	Move fallback function creation out of function_exists() conditionals....	soxred93	01:29, 2 January 2011

Comments

#Comment by Bryan (talk | contribs) 08:02, 3 January 2011

You don't need to prefix the functions is they're in their own class.

#Comment by X! (talk | contribs) 01:44, 4 January 2011

Fixed in r79546

Status & tagging log

13:44, 8 June 2011 Reedy (talk | contribs) changed the status of r79494 [removed: new added: resolved]
19:51, 9 February 2011 IAlex (talk | contribs) changed the status of r79494 [removed: fixme added: new]
08:02, 3 January 2011 Bryan (talk | contribs) changed the status of r79494 [removed: new added: fixme]