r79463 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r79462‎ \| r79463 \| r79464 >
Date:	01:29, 2 January 2011
Author:	soxred93
Status:	resolved (Comments)
Tags:
Comment:	Move fallback function creation out of function_exists() conditionals. This allows for unit testing of the fallback functions to ensure that they work like the real functions do
Modified paths:	/trunk/phase3/includes/GlobalFunctions.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/GlobalFunctions.php
—	—	@@ -20,170 +20,206 @@
21	21	* Re-implementations of newer functions or functions in non-standard
22	22	* PHP extensions may be included here.
23	23	*/
	24	+
	25	+# iconv support is not in the default configuration and so may not be present.
	26	+# Assume will only ever use utf-8 and iso-8859-1.
	27	+# This will not work in all circumstances.
	28	+function fallback_iconv( $from, $to, $string ) {
	29	+ if ( substr( $to, -8 ) == '//IGNORE' ) {
	30	+ $to = substr( $to, 0, strlen( $to ) - 8 );
	31	+ }
	32	+ if( strcasecmp( $from, $to ) == 0 ) {
	33	+ return $string;
	34	+ }
	35	+ if( strcasecmp( $from, 'utf-8' ) == 0 ) {
	36	+ return utf8_decode( $string );
	37	+ }
	38	+ if( strcasecmp( $to, 'utf-8' ) == 0 ) {
	39	+ return utf8_encode( $string );
	40	+ }
	41	+ return $string;
	42	+}
	43	+
24	44	if( !function_exists( 'iconv' ) ) {
25		~~- # iconv support is not in the default configuration and so may not be present.~~
26		~~- # Assume will only ever use utf-8 and iso-8859-1.~~
27		~~- # This will not work in all circumstances.~~
28	45	function iconv( $from, $to, $string ) {
29		~~- if ( substr( $to, -8 ) == '//IGNORE' ) {~~
30		~~- $to = substr( $to, 0, strlen( $to ) - 8 );~~
31		~~- }~~
32		~~- if( strcasecmp( $from, $to ) == 0 ) {~~
33		~~- return $string;~~
34		~~- }~~
35		~~- if( strcasecmp( $from, 'utf-8' ) == 0 ) {~~
36		~~- return utf8_decode( $string );~~
37		~~- }~~
38		~~- if( strcasecmp( $to, 'utf-8' ) == 0 ) {~~
39		~~- return utf8_encode( $string );~~
40		~~- }~~
41		~~- return $string;~~
	46	+ return fallback_iconv( $from, $to, $string )
42	47	}
43	48	}
44	49
45		~~-if ( !function_exists( 'mb_substr' ) ) {~~
46		- /**
47		~~- * Fallback implementation for mb_substr, hardcoded to UTF-8.~~
48		~~- * Attempts to be at least _moderately_ efficient; best optimized~~
49		~~- * for relatively small offset and count values -- about 5x slower~~
50		~~- * than native mb_string in my testing.~~
51		- *
52		~~- * Larger offsets are still fairly efficient for Latin text, but~~
53		~~- * can be up to 100x slower than native if the text is heavily~~
54		~~- * multibyte and we have to slog through a few hundred kb.~~
55		~~- */~~
56		~~- function mb_substr( $str, $start, $count='end' ) {~~
57		~~- if( $start != 0 ) {~~
58		~~- $split = mb_substr_split_unicode( $str, intval( $start ) );~~
59		~~- $str = substr( $str, $split );~~
60		~~- }~~
61	50
62		~~- if( $count !== 'end' ) {~~
63		~~- $split = mb_substr_split_unicode( $str, intval( $count ) );~~
64		~~- $str = substr( $str, 0, $split );~~
65		~~- }~~
66	51
67		~~- return $str;~~
	52	+
	53	+/**
	54	+ * Fallback implementation for mb_substr, hardcoded to UTF-8.
	55	+ * Attempts to be at least _moderately_ efficient; best optimized
	56	+ * for relatively small offset and count values -- about 5x slower
	57	+ * than native mb_string in my testing.
	58	+ *
	59	+ * Larger offsets are still fairly efficient for Latin text, but
	60	+ * can be up to 100x slower than native if the text is heavily
	61	+ * multibyte and we have to slog through a few hundred kb.
	62	+ */
	63	+function fallback_mb_substr( $str, $start, $count='end' ) {
	64	+ if( $start != 0 ) {
	65	+ $split = fallback_mb_substr_split_unicode( $str, intval( $start ) );
	66	+ $str = substr( $str, $split );
68	67	}
69	68
70		~~- function mb_substr_split_unicode( $str, $splitPos ) {~~
71		~~- if( $splitPos == 0 ) {~~
72		~~- return 0;~~
73		~~- }~~
	69	+ if( $count !== 'end' ) {
	70	+ $split = fallback_mb_substr_split_unicode( $str, intval( $count ) );
	71	+ $str = substr( $str, 0, $split );
	72	+ }
74	73
75		~~- $byteLen = strlen( $str );~~
	74	+ return $str;
	75	+}
76	76
77		~~- if( $splitPos > 0 ) {~~
78		~~- if( $splitPos > 256 ) {~~
79		~~- // Optimize large string offsets by skipping ahead N bytes.~~
80		~~- // This will cut out most of our slow time on Latin-based text,~~
81		~~- // and 1/2 to 1/3 on East European and Asian scripts.~~
82		~~- $bytePos = $splitPos;~~
83		~~- while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {~~
84		~~- ++$bytePos;~~
85		~~- }~~
86		~~- $charPos = mb_strlen( substr( $str, 0, $bytePos ) );~~
87		~~- } else {~~
88		~~- $charPos = 0;~~
89		~~- $bytePos = 0;~~
	77	+function fallback_mb_substr_split_unicode( $str, $splitPos ) {
	78	+ if( $splitPos == 0 ) {
	79	+ return 0;
	80	+ }
	81	+
	82	+ $byteLen = strlen( $str );
	83	+
	84	+ if( $splitPos > 0 ) {
	85	+ if( $splitPos > 256 ) {
	86	+ // Optimize large string offsets by skipping ahead N bytes.
	87	+ // This will cut out most of our slow time on Latin-based text,
	88	+ // and 1/2 to 1/3 on East European and Asian scripts.
	89	+ $bytePos = $splitPos;
	90	+ while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {
	91	+ ++$bytePos;
90	92	}
	93	+ $charPos = mb_strlen( substr( $str, 0, $bytePos ) );
	94	+ } else {
	95	+ $charPos = 0;
	96	+ $bytePos = 0;
	97	+ }
91	98
92		~~- while( $charPos++ < $splitPos ) {~~
	99	+ while( $charPos++ < $splitPos ) {
	100	+ ++$bytePos;
	101	+ // Move past any tail bytes
	102	+ while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {
93	103	++$bytePos;
94		~~- // Move past any tail bytes~~
95		~~- while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {~~
96		~~- ++$bytePos;~~
97		~~- }~~
98	104	}
99		~~- } else {~~
100		~~- $splitPosX = $splitPos + 1;~~
101		~~- $charPos = 0; // relative to end of string; we don't care about the actual char position here~~
102		~~- $bytePos = $byteLen;~~
103		~~- while( $bytePos > 0 && $charPos-- >= $splitPosX ) {~~
	105	+ }
	106	+ } else {
	107	+ $splitPosX = $splitPos + 1;
	108	+ $charPos = 0; // relative to end of string; we don't care about the actual char position here
	109	+ $bytePos = $byteLen;
	110	+ while( $bytePos > 0 && $charPos-- >= $splitPosX ) {
	111	+ --$bytePos;
	112	+ // Move past any tail bytes
	113	+ while ( $bytePos > 0 && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {
104	114	--$bytePos;
105		~~- // Move past any tail bytes~~
106		~~- while ( $bytePos > 0 && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {~~
107		~~- --$bytePos;~~
108		~~- }~~
109	115	}
110	116	}
	117	+ }
111	118
112		~~- return $bytePos;~~
	119	+ return $bytePos;
	120	+}
	121	+
	122	+if ( !function_exists( 'mb_substr' ) ) {
	123	+ function mb_substr( $str, $start, $count='end' ) {
	124	+ return fallback_mb_substr( $str, $start, $count )
113	125	}
	126	+
	127	+ function mb_substr_split_unicode( $str, $splitPos ) {
	128	+ return fallback_mb_substr_split_unicode( $str, $splitPos );
	129	+ }
114	130	}
115	131
	132	+
	133	+
	134	+/**
	135	+ * Fallback implementation of mb_strlen, hardcoded to UTF-8.
	136	+ * @param string $str
	137	+ * @param string $enc optional encoding; ignored
	138	+ * @return int
	139	+ */
	140	+function fallback_mb_strlen( $str, $enc = '' ) {
	141	+ $counts = count_chars( $str );
	142	+ $total = 0;
	143	+
	144	+ // Count ASCII bytes
	145	+ for( $i = 0; $i < 0x80; $i++ ) {
	146	+ $total += $counts[$i];
	147	+ }
	148	+
	149	+ // Count multibyte sequence heads
	150	+ for( $i = 0xc0; $i < 0xff; $i++ ) {
	151	+ $total += $counts[$i];
	152	+ }
	153	+ return $total;
	154	+}
	155	+
116	156	if ( !function_exists( 'mb_strlen' ) ) {
117		- /**
118		~~- * Fallback implementation of mb_strlen, hardcoded to UTF-8.~~
119		~~- * @param string $str~~
120		~~- * @param string $enc optional encoding; ignored~~
121		~~- * @return int~~
122		~~- */~~
123	157	function mb_strlen( $str, $enc = '' ) {
124		~~- $counts = count_chars( $str );~~
125		~~- $total = 0;~~
	158	+ return fallback_mb_strlen( $str, $enc );
	159	+ }
	160	+}
126	161
127		~~- // Count ASCII bytes~~
128		~~- for( $i = 0; $i < 0x80; $i++ ) {~~
129		~~- $total += $counts[$i];~~
130		~~- }~~
131	162
132		~~- // Count multibyte sequence heads~~
133		~~- for( $i = 0xc0; $i < 0xff; $i++ ) {~~
134		~~- $total += $counts[$i];~~
135		~~- }~~
136		~~- return $total;~~
	163	+
	164	+/**
	165	+ * Fallback implementation of mb_strpos, hardcoded to UTF-8.
	166	+ * @param $haystack String
	167	+ * @param $needle String
	168	+ * @param $offset String: optional start position
	169	+ * @param $encoding String: optional encoding; ignored
	170	+ * @return int
	171	+ */
	172	+function fallback_mb_strpos( $haystack, $needle, $offset = 0, $encoding = '' ) {
	173	+ $needle = preg_quote( $needle, '/' );
	174	+
	175	+ $ar = array();
	176	+ preg_match( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset );
	177	+
	178	+ if( isset( $ar[0][1] ) ) {
	179	+ return $ar[0][1];
	180	+ } else {
	181	+ return false;
137	182	}
138	183	}
139	184
140		-
141	185	if( !function_exists( 'mb_strpos' ) ) {
142		- /**
143		~~- * Fallback implementation of mb_strpos, hardcoded to UTF-8.~~
144		~~- * @param $haystack String~~
145		~~- * @param $needle String~~
146		~~- * @param $offset String: optional start position~~
147		~~- * @param $encoding String: optional encoding; ignored~~
148		~~- * @return int~~
149		~~- */~~
	186	+
150	187	function mb_strpos( $haystack, $needle, $offset = 0, $encoding = '' ) {
151		~~- $needle = preg_quote( $needle, '/' );~~
	188	+ return fallback_mb_strpos( $haystack, $needle, $offset, $encoding );
	189	+ }
	190	+
	191	+}
152	192
153		~~- $ar = array();~~
154		~~- preg_match( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset );~~
155	193
156		~~- if( isset( $ar[0][1] ) ) {~~
157		~~- return $ar[0][1];~~
158		~~- } else {~~
159		~~- return false;~~
160		~~- }~~
	194	+
	195	+/**
	196	+ * Fallback implementation of mb_strrpos, hardcoded to UTF-8.
	197	+ * @param $haystack String
	198	+ * @param $needle String
	199	+ * @param $offset String: optional start position
	200	+ * @param $encoding String: optional encoding; ignored
	201	+ * @return int
	202	+ */
	203	+function fallback_mb_strrpos( $haystack, $needle, $offset = 0, $encoding = '' ) {
	204	+ $needle = preg_quote( $needle, '/' );
	205	+
	206	+ $ar = array();
	207	+ preg_match_all( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset );
	208	+
	209	+ if( isset( $ar[0] ) && count( $ar[0] ) > 0 &&
	210	+ isset( $ar[0][count( $ar[0] ) - 1][1] ) ) {
	211	+ return $ar[0][count( $ar[0] ) - 1][1];
	212	+ } else {
	213	+ return false;
161	214	}
162	215	}
163	216
164	217	if( !function_exists( 'mb_strrpos' ) ) {
165		- /**
166		~~- * Fallback implementation of mb_strrpos, hardcoded to UTF-8.~~
167		~~- * @param $haystack String~~
168		~~- * @param $needle String~~
169		~~- * @param $offset String: optional start position~~
170		~~- * @param $encoding String: optional encoding; ignored~~
171		~~- * @return int~~
172		~~- */~~
173	218	function mb_strrpos( $haystack, $needle, $offset = 0, $encoding = '' ) {
174		~~- $needle = preg_quote( $needle, '/' );~~
175		-
176		~~- $ar = array();~~
177		~~- preg_match_all( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset );~~
178		-
179		~~- if( isset( $ar[0] ) && count( $ar[0] ) > 0 &&~~
180		~~- isset( $ar[0][count( $ar[0] ) - 1][1] ) ) {~~
181		~~- return $ar[0][count( $ar[0] ) - 1][1];~~
182		~~- } else {~~
183		~~- return false;~~
184		~~- }~~
	219	+ return fallback_mb_strrpos( $haystack, $needle, $offset, $encoding );
185	220	}
186	221	}
187	222
	223	+
188	224	// Support for Wietse Venema's taint feature
189	225	if ( !function_exists( 'istainted' ) ) {
190	226	function istainted( $var ) {
—	—	@@ -200,6 +236,7 @@
201	237	/// @endcond
202	238
203	239
	240	+
204	241	/**
205	242	* Like array_diff( $a, $b ) except that it works with two-dimensional arrays.
206	243	*/

Follow-up revisions

Revision	Commit summary	Author	Date
r79465	Fix r79463 and r79464: Syntax error	soxred93	01:35, 2 January 2011
r79494	Followup to r79463: Move fallback functions to new Fallback class	soxred93	15:54, 2 January 2011

Comments

#Comment by Nikerabbit (talk | contribs) 08:19, 2 January 2011

Whii, can we now move them away from GlobalFunctions.php?

#Comment by Platonides (talk | contribs) 14:37, 2 January 2011

Why not? Fallback::mb_strrpos()...

#Comment by X! (talk | contribs) 14:39, 2 January 2011

That's on the to-do list for today. :)

#Comment by X! (talk | contribs) 15:55, 2 January 2011

Done in r79494.

Status & tagging log

13:44, 8 June 2011 Reedy (talk | contribs) changed the status of r79463 [removed: new added: resolved]