Index: trunk/phase3/languages/Language.php |
— | — | @@ -2155,42 +2155,26 @@ |
2156 | 2156 | */ |
2157 | 2157 | function truncate( $string, $length, $ellipsis = '...' ) { |
2158 | 2158 | # Use the localized ellipsis character |
2159 | | - if( $ellipsis == '...' ) { |
| 2159 | + if ( $ellipsis == '...' ) { |
2160 | 2160 | $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) ); |
2161 | 2161 | } |
2162 | | - |
2163 | | - if( $length == 0 ) { |
| 2162 | + # Check if there is no need to truncate |
| 2163 | + if ( $length == 0 ) { |
2164 | 2164 | return $ellipsis; |
2165 | | - } |
2166 | | - if ( strlen( $string ) <= abs( $length ) ) { |
| 2165 | + } elseif ( strlen( $string ) <= abs( $length ) ) { |
2167 | 2166 | return $string; |
2168 | 2167 | } |
2169 | 2168 | $stringOriginal = $string; |
2170 | | - if( $length > 0 ) { |
2171 | | - $string = substr( $string, 0, $length ); |
2172 | | - $char = ord( $string[strlen( $string ) - 1] ); |
2173 | | - $m = array(); |
2174 | | - if ($char >= 0xc0) { |
2175 | | - # We got the first byte only of a multibyte char; remove it. |
2176 | | - $string = substr( $string, 0, -1 ); |
2177 | | - } elseif( $char >= 0x80 && |
2178 | | - preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' . |
2179 | | - '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) ) { |
2180 | | - # We chopped in the middle of a character; remove it |
2181 | | - $string = $m[1]; |
2182 | | - } |
| 2169 | + if ( $length > 0 ) { |
| 2170 | + $string = substr( $string, 0, $length ); // xyz... |
| 2171 | + $string = self::removeBadCharLast( $string ); |
2183 | 2172 | $string = $string . $ellipsis; |
2184 | | - |
2185 | 2173 | } else { |
2186 | | - $string = substr( $string, $length ); |
2187 | | - $char = ord( $string[0] ); |
2188 | | - if( $char >= 0x80 && $char < 0xc0 ) { |
2189 | | - # We chopped in the middle of a character; remove the whole thing |
2190 | | - $string = preg_replace( '/^[\x80-\xbf]+/', '', $string ); |
2191 | | - } |
| 2174 | + $string = substr( $string, $length ); // ...xyz |
| 2175 | + $string = self::removeBadCharFirst( $string ); |
2192 | 2176 | $string = $ellipsis . $string; |
2193 | 2177 | } |
2194 | | - # Do not truncate if the ellipsis actually make the string longer. Bug 22181 |
| 2178 | + # Do not truncate if the ellipsis makes the string longer (bug 22181) |
2195 | 2179 | if ( strlen( $string ) < strlen( $stringOriginal ) ) { |
2196 | 2180 | return $string; |
2197 | 2181 | } else { |
— | — | @@ -2199,6 +2183,176 @@ |
2200 | 2184 | } |
2201 | 2185 | |
2202 | 2186 | /** |
| 2187 | + * Remove bytes that represent an incomplete Unicode character |
| 2188 | + * at the end of string (e.g. bytes of the char are missing) |
| 2189 | + * |
| 2190 | + * @param $string String |
| 2191 | + * @return string |
| 2192 | + */ |
| 2193 | + protected function removeBadCharLast( $string ) { |
| 2194 | + $char = ord( $string[strlen( $string ) - 1] ); |
| 2195 | + $m = array(); |
| 2196 | + if ( $char >= 0xc0 ) { |
| 2197 | + # We got the first byte only of a multibyte char; remove it. |
| 2198 | + $string = substr( $string, 0, -1 ); |
| 2199 | + } elseif ( $char >= 0x80 && |
| 2200 | + preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' . |
| 2201 | + '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) ) |
| 2202 | + { |
| 2203 | + # We chopped in the middle of a character; remove it |
| 2204 | + $string = $m[1]; |
| 2205 | + } |
| 2206 | + return $string; |
| 2207 | + } |
| 2208 | + |
| 2209 | + /** |
| 2210 | + * Remove bytes that represent an incomplete Unicode character |
| 2211 | + * at the start of string (e.g. bytes of the char are missing) |
| 2212 | + * |
| 2213 | + * @param $string String |
| 2214 | + * @return string |
| 2215 | + */ |
| 2216 | + protected function removeBadCharFirst( $string ) { |
| 2217 | + $char = ord( $string[0] ); |
| 2218 | + if ( $char >= 0x80 && $char < 0xc0 ) { |
| 2219 | + # We chopped in the middle of a character; remove the whole thing |
| 2220 | + $string = preg_replace( '/^[\x80-\xbf]+/', '', $string ); |
| 2221 | + } |
| 2222 | + return $string; |
| 2223 | + } |
| 2224 | + |
| 2225 | + /* |
| 2226 | + * Truncate a string of valid HTML to a specified length in bytes, |
| 2227 | + * appending an optional string (e.g. for ellipses), and return valid HTML |
| 2228 | + * |
| 2229 | + * This is only intended for styled/linked text, such as HTML with |
| 2230 | + * tags like <span> and <a>, were the tags are self-contained (valid HTML) |
| 2231 | + * |
| 2232 | + * Note: tries to fix broken HTML with MWTidy |
| 2233 | + * |
| 2234 | + * @param string $text String to truncate |
| 2235 | + * @param int $length (zero/positive) Maximum length (excluding ellipses) |
| 2236 | + * @param string $ellipsis String to append to the truncated text |
| 2237 | + * @returns string |
| 2238 | + */ |
| 2239 | + function truncateHtml( $text, $length, $ellipsis = '...' ) { |
| 2240 | + # Use the localized ellipsis character |
| 2241 | + if ( $ellipsis == '...' ) { |
| 2242 | + $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) ); |
| 2243 | + } |
| 2244 | + # Check if there is no need to truncate |
| 2245 | + if ( $length <= 0 ) { |
| 2246 | + return $ellipsis; // no text shown, nothing to format |
| 2247 | + } elseif ( strlen($text) <= $length ) { |
| 2248 | + return $text; // string short enough even *with* HTML |
| 2249 | + } |
| 2250 | + $text = MWTidy::tidy( $text ); // fix tags |
| 2251 | + $displayLen = 0; // innerHTML legth so far |
| 2252 | + $doTruncate = true; // truncated string plus '...' shorter than original? |
| 2253 | + $tagType = 0; // 0-open, 1-close |
| 2254 | + $bracketState = 0; // 1-tag start, 2-tag name, 0-neither |
| 2255 | + $entityState = 0; // 0-not entity, 1-entity |
| 2256 | + $tag = $ret = $ch = ''; |
| 2257 | + $openTags = array(); |
| 2258 | + $textLen = strlen($text); |
| 2259 | + for( $pos = 0; $pos < $textLen; ++$pos ) { |
| 2260 | + $ch = $text[$pos]; |
| 2261 | + $lastCh = $pos ? $text[$pos-1] : ''; |
| 2262 | + $ret .= $ch; // add to result string |
| 2263 | + if ( $ch == '<' ) { |
| 2264 | + self::onEndBracket( $tag, $tagType, $lastCh, $openTags ); // for bad HTML |
| 2265 | + $entityState = 0; // for bad HTML |
| 2266 | + $bracketState = 1; // tag started (checking for backslash) |
| 2267 | + } elseif ( $ch == '>' ) { |
| 2268 | + self::onEndBracket( $tag, $tagType, $lastCh, $openTags ); |
| 2269 | + $entityState = 0; // for bad HTML |
| 2270 | + $bracketState = 0; // out of brackets |
| 2271 | + } elseif ( $bracketState == 1 ) { |
| 2272 | + if ( $ch == '/' ) { |
| 2273 | + $tagType = 1; // close tag (e.g. "</span>") |
| 2274 | + } else { |
| 2275 | + $tagType = 0; // open tag (e.g. "<span>") |
| 2276 | + $tag .= $ch; |
| 2277 | + } |
| 2278 | + $bracketState = 2; // building tag name |
| 2279 | + } elseif ( $bracketState == 2 ) { |
| 2280 | + if ( $ch != ' ' ) { |
| 2281 | + $tag .= $ch; |
| 2282 | + } else { |
| 2283 | + // Name found (e.g. "<a href=..."), add on tag attributes... |
| 2284 | + $pos += self::skipAndAppend( $ret, $text, "<>", $pos + 1 ); |
| 2285 | + } |
| 2286 | + } elseif ( $bracketState == 0 ) { |
| 2287 | + if ( $entityState ) { |
| 2288 | + if ( $ch == ';' ) { |
| 2289 | + $entityState = 0; |
| 2290 | + $displayLen++; // entity is one displayed char |
| 2291 | + } |
| 2292 | + } else { |
| 2293 | + if ( $ch == '&' ) { |
| 2294 | + $entityState = 1; // entity found, (e.g. " ") |
| 2295 | + } else { |
| 2296 | + $displayLen++; // this char is displayed |
| 2297 | + // Add on the other display text after this... |
| 2298 | + $skipped = self::skipAndAppend( |
| 2299 | + $ret, $text, "<>&", $pos + 1, $length - $displayLen ); |
| 2300 | + $displayLen += $skipped; |
| 2301 | + $pos += $skipped; |
| 2302 | + } |
| 2303 | + } |
| 2304 | + } |
| 2305 | + if( !$doTruncate ) continue; |
| 2306 | + # Truncate if not in the middle of a bracket/entity... |
| 2307 | + if ( $displayLen >= $length && $bracketState == 0 && $entityState == 0 ) { |
| 2308 | + $remaining = substr( $text, $pos + 1 ); // remaining string |
| 2309 | + $remaining = StringUtils::delimiterReplace( '<', '>', '', $remaining ); // rm tags |
| 2310 | + $remaining = StringUtils::delimiterReplace( '&', ';', '', $remaining ); // rm entities |
| 2311 | + $doTruncate = ( strlen($remaining) > strlen($ellipsis) ); |
| 2312 | + if ( $doTruncate ) { |
| 2313 | + $ret = self::removeBadCharLast( $ret ) . $ellipsis; |
| 2314 | + break; |
| 2315 | + } |
| 2316 | + } |
| 2317 | + } |
| 2318 | + if ( $displayLen == 0 ) { |
| 2319 | + return ''; // no text shown, nothing to format |
| 2320 | + } |
| 2321 | + self::onEndBracket( $tag, $text[$textLen-1], $tagType, $openTags ); // for bad HTML |
| 2322 | + while ( count( $openTags ) > 0 ) { |
| 2323 | + $ret .= '</' . array_pop( $openTags ) . '>'; // close open tags |
| 2324 | + } |
| 2325 | + return $ret; |
| 2326 | + } |
| 2327 | + |
| 2328 | + // truncateHtml() helper function |
| 2329 | + // like strcspn() but adds the skipped chars to $ret |
| 2330 | + private function skipAndAppend( &$ret, $text, $search, $start, $len = -1 ) { |
| 2331 | + $skipCount = 0; |
| 2332 | + if( $start < strlen($text) ) { |
| 2333 | + $skipCount = strcspn( $text, $search, $start, $len ); |
| 2334 | + $ret .= substr( $text, $start, $skipCount ); |
| 2335 | + } |
| 2336 | + return $skipCount; |
| 2337 | + } |
| 2338 | + |
| 2339 | + // truncateHtml() helper function |
| 2340 | + // (a) push or pop $tag from $openTags as needed |
| 2341 | + // (b) clear $tag value |
| 2342 | + private function onEndBracket( &$tag, $tagType, $lastCh, &$openTags ) { |
| 2343 | + $tag = ltrim( $tag ); |
| 2344 | + if( $tag != '' ) { |
| 2345 | + if( $tagType == 0 && $lastCh != '/' ) { |
| 2346 | + $openTags[] = $tag; // tag opened (didn't close itself) |
| 2347 | + } else if( $tagType == 1 ) { |
| 2348 | + if( $openTags && $tag == $openTags[count($openTags)-1] ) { |
| 2349 | + array_pop( $openTags ); // tag closed |
| 2350 | + } |
| 2351 | + } |
| 2352 | + $tag = ''; |
| 2353 | + } |
| 2354 | + } |
| 2355 | + |
| 2356 | + /** |
2203 | 2357 | * Grammatical transformations, needed for inflected languages |
2204 | 2358 | * Invoked by putting {{grammar:case|word}} in a message |
2205 | 2359 | * |
Index: trunk/extensions/CodeReview/backend/CodeCommentLinker.php |
— | — | @@ -19,128 +19,6 @@ |
20 | 20 | array( $this, 'messageBugLink' ), $text ); |
21 | 21 | return $text; |
22 | 22 | } |
23 | | - |
24 | | - /* |
25 | | - * Truncate a valid HTML string with self-contained tags only. |
26 | | - * Intended for styled/linked text (tags like <span> and <a>). |
27 | | - * Note: tries to fix broken HTML with MWTidy |
28 | | - * @TODO: cleanup and move to language.php |
29 | | - * @param string $text |
30 | | - * @param int $maxLen (zero/positive) |
31 | | - * @param string $ellipsis |
32 | | - * @returns string |
33 | | - */ |
34 | | - function truncateHtml( $text, $maxLen, $ellipsis = '...' ) { |
35 | | - global $wgLang; |
36 | | - if( strlen($text) <= $maxLen ) { |
37 | | - return $text; // string short enough even *with* HTML |
38 | | - } elseif ( $maxLen <= 0 ) { |
39 | | - return ''; // no text shown, nothing to format |
40 | | - } |
41 | | - $text = MWTidy::tidy( $text ); // fix tags |
42 | | - $displayLen = 0; // innerHTML legth so far |
43 | | - $doTruncate = true; // truncated string plus '...' shorter than original? |
44 | | - $tagType = 0; // 0-open, 1-close |
45 | | - $bracketState = 0; // 1-tag start, 2-tag name, 0-neither |
46 | | - $entityState = 0; // 0-not entity, 1-entity |
47 | | - $tag = $ret = $ch = ''; |
48 | | - $openTags = array(); |
49 | | - $textLen = strlen($text); |
50 | | - for( $pos = 0; $pos < $textLen; ++$pos ) { |
51 | | - $ch = $text[$pos]; |
52 | | - $lastCh = $pos ? $text[$pos-1] : ''; |
53 | | - $ret .= $ch; // add to result string |
54 | | - if ( $ch == '<' ) { |
55 | | - self::onEndBracket( $tag, $tagType, $lastCh, $openTags ); // for bad HTML |
56 | | - $entityState = 0; // for bad HTML |
57 | | - $bracketState = 1; // tag started (checking for backslash) |
58 | | - } elseif ( $ch == '>' ) { |
59 | | - self::onEndBracket( $tag, $tagType, $lastCh, $openTags ); |
60 | | - $entityState = 0; // for bad HTML |
61 | | - $bracketState = 0; // out of brackets |
62 | | - } elseif ( $bracketState == 1 ) { |
63 | | - if ( $ch == '/' ) { |
64 | | - $tagType = 1; // close tag (e.g. "</span>") |
65 | | - } else { |
66 | | - $tagType = 0; // open tag (e.g. "<span>") |
67 | | - $tag .= $ch; |
68 | | - } |
69 | | - $bracketState = 2; // building tag name |
70 | | - } elseif ( $bracketState == 2 ) { |
71 | | - if ( $ch != ' ' ) { |
72 | | - $tag .= $ch; |
73 | | - } else { |
74 | | - // Name found (e.g. "<a href=..."), add on tag attributes... |
75 | | - $pos += self::skipAndAppend( $ret, $text, "<>", $pos + 1 ); |
76 | | - } |
77 | | - } elseif ( $bracketState == 0 ) { |
78 | | - if ( $entityState ) { |
79 | | - if ( $ch == ';' ) { |
80 | | - $entityState = 0; |
81 | | - $displayLen++; // entity is one displayed char |
82 | | - } |
83 | | - } else { |
84 | | - if ( $ch == '&' ) { |
85 | | - $entityState = 1; // entity found, (e.g. " ") |
86 | | - } else { |
87 | | - $displayLen++; // this char is displayed |
88 | | - // Add on the other display text after this... |
89 | | - $skipped = self::skipAndAppend( |
90 | | - $ret, $text, "<>&", $pos + 1, $maxLen - $displayLen ); |
91 | | - $displayLen += $skipped; |
92 | | - $pos += $skipped; |
93 | | - } |
94 | | - } |
95 | | - } |
96 | | - if( !$doTruncate ) continue; |
97 | | - # Truncate if not in the middle of a bracket/entity... |
98 | | - if ( $bracketState == 0 && $entityState == 0 && $displayLen >= $maxLen ) { |
99 | | - $remaining = substr( $text, $pos + 1 ); // remaining string |
100 | | - $remaining = StringUtils::delimiterReplace( '<', '>', '', $remaining ); // rm tags |
101 | | - $remaining = StringUtils::delimiterReplace( '&', ';', '', $remaining ); // rm entities |
102 | | - $doTruncate = ( strlen($remaining) > strlen($ellipsis) ); |
103 | | - if ( $doTruncate ) { |
104 | | - # Hack: go one char over so truncate() will handle multi-byte chars |
105 | | - $ret = $wgLang->truncate( $ret . 'x', strlen($ret), '' ) . $ellipsis; |
106 | | - break; |
107 | | - } |
108 | | - } |
109 | | - } |
110 | | - if( $displayLen == 0 ) { |
111 | | - return ''; // no text shown, nothing to format |
112 | | - } |
113 | | - self::onEndBracket( $tag, $text[$textLen-1], $tagType, $openTags ); // for bad HTML |
114 | | - while ( count( $openTags ) > 0 ) { |
115 | | - $ret .= '</' . array_pop( $openTags ) . '>'; // close open tags |
116 | | - } |
117 | | - return $ret; |
118 | | - } |
119 | | - |
120 | | - // like strcspn() but adds the skipped chars to $ret |
121 | | - private function skipAndAppend( &$ret, $text, $search, $start, $len = -1 ) { |
122 | | - $skipCount = 0; |
123 | | - if( $start < strlen($text) ) { |
124 | | - $skipCount = strcspn( $text, $search, $start, $len ); |
125 | | - $ret .= substr( $text, $start, $skipCount ); |
126 | | - } |
127 | | - return $skipCount; |
128 | | - } |
129 | | - |
130 | | - // (a) push or pop $tag from $openTags as needed |
131 | | - // (b) clear $tag value |
132 | | - private function onEndBracket( &$tag, $tagType, $lastCh, &$openTags ) { |
133 | | - $tag = ltrim( $tag ); |
134 | | - if( $tag != '' ) { |
135 | | - if( $tagType == 0 && $lastCh != '/' ) { |
136 | | - $openTags[] = $tag; // tag opened (didn't close itself) |
137 | | - } else if( $tagType == 1 ) { |
138 | | - if( $openTags && $tag == $openTags[count($openTags)-1] ) { |
139 | | - array_pop( $openTags ); // tag closed |
140 | | - } |
141 | | - } |
142 | | - $tag = ''; |
143 | | - } |
144 | | - } |
145 | 23 | |
146 | 24 | function generalLink( $arr ) { |
147 | 25 | $url = $arr[2] . $arr[3]; |
Index: trunk/extensions/CodeReview/ui/SpecialCode.php |
— | — | @@ -154,8 +154,7 @@ |
155 | 155 | $lines = explode( "\n", $message, 2 ); |
156 | 156 | $first = $lines[0]; |
157 | 157 | $html = $this->formatMessage( $first ); |
158 | | - $linker = new CodeCommentLinkerHtml( $this->mRepo ); |
159 | | - return $linker->truncateHtml( $html, 80 ); |
| 158 | + return $wgLang->truncateHtml( $html, 80 ); |
160 | 159 | } |
161 | 160 | /* |
162 | 161 | * Formatted HTML array for properties display |