r62907 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r62906‎ | r62907 | r62908 >
Date:04:14, 24 February 2010
Author:aaron
Status:ok (Comments)
Tags:
Comment:
* Moved truncateHtml() to language.php
* Renamed $maxLen -> $length
* Made $length=0 case match truncate()
Modified paths:
  • /trunk/extensions/CodeReview/backend/CodeCommentLinker.php (modified) (history)
  • /trunk/extensions/CodeReview/ui/SpecialCode.php (modified) (history)
  • /trunk/phase3/languages/Language.php (modified) (history)

Diff [purge]

Index: trunk/phase3/languages/Language.php
@@ -2155,42 +2155,26 @@
21562156 */
21572157 function truncate( $string, $length, $ellipsis = '...' ) {
21582158 # Use the localized ellipsis character
2159 - if( $ellipsis == '...' ) {
 2159+ if ( $ellipsis == '...' ) {
21602160 $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
21612161 }
2162 -
2163 - if( $length == 0 ) {
 2162+ # Check if there is no need to truncate
 2163+ if ( $length == 0 ) {
21642164 return $ellipsis;
2165 - }
2166 - if ( strlen( $string ) <= abs( $length ) ) {
 2165+ } elseif ( strlen( $string ) <= abs( $length ) ) {
21672166 return $string;
21682167 }
21692168 $stringOriginal = $string;
2170 - if( $length > 0 ) {
2171 - $string = substr( $string, 0, $length );
2172 - $char = ord( $string[strlen( $string ) - 1] );
2173 - $m = array();
2174 - if ($char >= 0xc0) {
2175 - # We got the first byte only of a multibyte char; remove it.
2176 - $string = substr( $string, 0, -1 );
2177 - } elseif( $char >= 0x80 &&
2178 - preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
2179 - '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) ) {
2180 - # We chopped in the middle of a character; remove it
2181 - $string = $m[1];
2182 - }
 2169+ if ( $length > 0 ) {
 2170+ $string = substr( $string, 0, $length ); // xyz...
 2171+ $string = self::removeBadCharLast( $string );
21832172 $string = $string . $ellipsis;
2184 -
21852173 } else {
2186 - $string = substr( $string, $length );
2187 - $char = ord( $string[0] );
2188 - if( $char >= 0x80 && $char < 0xc0 ) {
2189 - # We chopped in the middle of a character; remove the whole thing
2190 - $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
2191 - }
 2174+ $string = substr( $string, $length ); // ...xyz
 2175+ $string = self::removeBadCharFirst( $string );
21922176 $string = $ellipsis . $string;
21932177 }
2194 - # Do not truncate if the ellipsis actually make the string longer. Bug 22181
 2178+ # Do not truncate if the ellipsis makes the string longer (bug 22181)
21952179 if ( strlen( $string ) < strlen( $stringOriginal ) ) {
21962180 return $string;
21972181 } else {
@@ -2199,6 +2183,176 @@
22002184 }
22012185
22022186 /**
 2187+ * Remove bytes that represent an incomplete Unicode character
 2188+ * at the end of string (e.g. bytes of the char are missing)
 2189+ *
 2190+ * @param $string String
 2191+ * @return string
 2192+ */
 2193+ protected function removeBadCharLast( $string ) {
 2194+ $char = ord( $string[strlen( $string ) - 1] );
 2195+ $m = array();
 2196+ if ( $char >= 0xc0 ) {
 2197+ # We got the first byte only of a multibyte char; remove it.
 2198+ $string = substr( $string, 0, -1 );
 2199+ } elseif ( $char >= 0x80 &&
 2200+ preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
 2201+ '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) )
 2202+ {
 2203+ # We chopped in the middle of a character; remove it
 2204+ $string = $m[1];
 2205+ }
 2206+ return $string;
 2207+ }
 2208+
 2209+ /**
 2210+ * Remove bytes that represent an incomplete Unicode character
 2211+ * at the start of string (e.g. bytes of the char are missing)
 2212+ *
 2213+ * @param $string String
 2214+ * @return string
 2215+ */
 2216+ protected function removeBadCharFirst( $string ) {
 2217+ $char = ord( $string[0] );
 2218+ if ( $char >= 0x80 && $char < 0xc0 ) {
 2219+ # We chopped in the middle of a character; remove the whole thing
 2220+ $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
 2221+ }
 2222+ return $string;
 2223+ }
 2224+
 2225+ /*
 2226+ * Truncate a string of valid HTML to a specified length in bytes,
 2227+ * appending an optional string (e.g. for ellipses), and return valid HTML
 2228+ *
 2229+ * This is only intended for styled/linked text, such as HTML with
 2230+ * tags like <span> and <a>, were the tags are self-contained (valid HTML)
 2231+ *
 2232+ * Note: tries to fix broken HTML with MWTidy
 2233+ *
 2234+ * @param string $text String to truncate
 2235+ * @param int $length (zero/positive) Maximum length (excluding ellipses)
 2236+ * @param string $ellipsis String to append to the truncated text
 2237+ * @returns string
 2238+ */
 2239+ function truncateHtml( $text, $length, $ellipsis = '...' ) {
 2240+ # Use the localized ellipsis character
 2241+ if ( $ellipsis == '...' ) {
 2242+ $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
 2243+ }
 2244+ # Check if there is no need to truncate
 2245+ if ( $length <= 0 ) {
 2246+ return $ellipsis; // no text shown, nothing to format
 2247+ } elseif ( strlen($text) <= $length ) {
 2248+ return $text; // string short enough even *with* HTML
 2249+ }
 2250+ $text = MWTidy::tidy( $text ); // fix tags
 2251+ $displayLen = 0; // innerHTML legth so far
 2252+ $doTruncate = true; // truncated string plus '...' shorter than original?
 2253+ $tagType = 0; // 0-open, 1-close
 2254+ $bracketState = 0; // 1-tag start, 2-tag name, 0-neither
 2255+ $entityState = 0; // 0-not entity, 1-entity
 2256+ $tag = $ret = $ch = '';
 2257+ $openTags = array();
 2258+ $textLen = strlen($text);
 2259+ for( $pos = 0; $pos < $textLen; ++$pos ) {
 2260+ $ch = $text[$pos];
 2261+ $lastCh = $pos ? $text[$pos-1] : '';
 2262+ $ret .= $ch; // add to result string
 2263+ if ( $ch == '<' ) {
 2264+ self::onEndBracket( $tag, $tagType, $lastCh, $openTags ); // for bad HTML
 2265+ $entityState = 0; // for bad HTML
 2266+ $bracketState = 1; // tag started (checking for backslash)
 2267+ } elseif ( $ch == '>' ) {
 2268+ self::onEndBracket( $tag, $tagType, $lastCh, $openTags );
 2269+ $entityState = 0; // for bad HTML
 2270+ $bracketState = 0; // out of brackets
 2271+ } elseif ( $bracketState == 1 ) {
 2272+ if ( $ch == '/' ) {
 2273+ $tagType = 1; // close tag (e.g. "</span>")
 2274+ } else {
 2275+ $tagType = 0; // open tag (e.g. "<span>")
 2276+ $tag .= $ch;
 2277+ }
 2278+ $bracketState = 2; // building tag name
 2279+ } elseif ( $bracketState == 2 ) {
 2280+ if ( $ch != ' ' ) {
 2281+ $tag .= $ch;
 2282+ } else {
 2283+ // Name found (e.g. "<a href=..."), add on tag attributes...
 2284+ $pos += self::skipAndAppend( $ret, $text, "<>", $pos + 1 );
 2285+ }
 2286+ } elseif ( $bracketState == 0 ) {
 2287+ if ( $entityState ) {
 2288+ if ( $ch == ';' ) {
 2289+ $entityState = 0;
 2290+ $displayLen++; // entity is one displayed char
 2291+ }
 2292+ } else {
 2293+ if ( $ch == '&' ) {
 2294+ $entityState = 1; // entity found, (e.g. "&nbsp;")
 2295+ } else {
 2296+ $displayLen++; // this char is displayed
 2297+ // Add on the other display text after this...
 2298+ $skipped = self::skipAndAppend(
 2299+ $ret, $text, "<>&", $pos + 1, $length - $displayLen );
 2300+ $displayLen += $skipped;
 2301+ $pos += $skipped;
 2302+ }
 2303+ }
 2304+ }
 2305+ if( !$doTruncate ) continue;
 2306+ # Truncate if not in the middle of a bracket/entity...
 2307+ if ( $displayLen >= $length && $bracketState == 0 && $entityState == 0 ) {
 2308+ $remaining = substr( $text, $pos + 1 ); // remaining string
 2309+ $remaining = StringUtils::delimiterReplace( '<', '>', '', $remaining ); // rm tags
 2310+ $remaining = StringUtils::delimiterReplace( '&', ';', '', $remaining ); // rm entities
 2311+ $doTruncate = ( strlen($remaining) > strlen($ellipsis) );
 2312+ if ( $doTruncate ) {
 2313+ $ret = self::removeBadCharLast( $ret ) . $ellipsis;
 2314+ break;
 2315+ }
 2316+ }
 2317+ }
 2318+ if ( $displayLen == 0 ) {
 2319+ return ''; // no text shown, nothing to format
 2320+ }
 2321+ self::onEndBracket( $tag, $text[$textLen-1], $tagType, $openTags ); // for bad HTML
 2322+ while ( count( $openTags ) > 0 ) {
 2323+ $ret .= '</' . array_pop( $openTags ) . '>'; // close open tags
 2324+ }
 2325+ return $ret;
 2326+ }
 2327+
 2328+ // truncateHtml() helper function
 2329+ // like strcspn() but adds the skipped chars to $ret
 2330+ private function skipAndAppend( &$ret, $text, $search, $start, $len = -1 ) {
 2331+ $skipCount = 0;
 2332+ if( $start < strlen($text) ) {
 2333+ $skipCount = strcspn( $text, $search, $start, $len );
 2334+ $ret .= substr( $text, $start, $skipCount );
 2335+ }
 2336+ return $skipCount;
 2337+ }
 2338+
 2339+ // truncateHtml() helper function
 2340+ // (a) push or pop $tag from $openTags as needed
 2341+ // (b) clear $tag value
 2342+ private function onEndBracket( &$tag, $tagType, $lastCh, &$openTags ) {
 2343+ $tag = ltrim( $tag );
 2344+ if( $tag != '' ) {
 2345+ if( $tagType == 0 && $lastCh != '/' ) {
 2346+ $openTags[] = $tag; // tag opened (didn't close itself)
 2347+ } else if( $tagType == 1 ) {
 2348+ if( $openTags && $tag == $openTags[count($openTags)-1] ) {
 2349+ array_pop( $openTags ); // tag closed
 2350+ }
 2351+ }
 2352+ $tag = '';
 2353+ }
 2354+ }
 2355+
 2356+ /**
22032357 * Grammatical transformations, needed for inflected languages
22042358 * Invoked by putting {{grammar:case|word}} in a message
22052359 *
Index: trunk/extensions/CodeReview/backend/CodeCommentLinker.php
@@ -19,128 +19,6 @@
2020 array( $this, 'messageBugLink' ), $text );
2121 return $text;
2222 }
23 -
24 - /*
25 - * Truncate a valid HTML string with self-contained tags only.
26 - * Intended for styled/linked text (tags like <span> and <a>).
27 - * Note: tries to fix broken HTML with MWTidy
28 - * @TODO: cleanup and move to language.php
29 - * @param string $text
30 - * @param int $maxLen (zero/positive)
31 - * @param string $ellipsis
32 - * @returns string
33 - */
34 - function truncateHtml( $text, $maxLen, $ellipsis = '...' ) {
35 - global $wgLang;
36 - if( strlen($text) <= $maxLen ) {
37 - return $text; // string short enough even *with* HTML
38 - } elseif ( $maxLen <= 0 ) {
39 - return ''; // no text shown, nothing to format
40 - }
41 - $text = MWTidy::tidy( $text ); // fix tags
42 - $displayLen = 0; // innerHTML legth so far
43 - $doTruncate = true; // truncated string plus '...' shorter than original?
44 - $tagType = 0; // 0-open, 1-close
45 - $bracketState = 0; // 1-tag start, 2-tag name, 0-neither
46 - $entityState = 0; // 0-not entity, 1-entity
47 - $tag = $ret = $ch = '';
48 - $openTags = array();
49 - $textLen = strlen($text);
50 - for( $pos = 0; $pos < $textLen; ++$pos ) {
51 - $ch = $text[$pos];
52 - $lastCh = $pos ? $text[$pos-1] : '';
53 - $ret .= $ch; // add to result string
54 - if ( $ch == '<' ) {
55 - self::onEndBracket( $tag, $tagType, $lastCh, $openTags ); // for bad HTML
56 - $entityState = 0; // for bad HTML
57 - $bracketState = 1; // tag started (checking for backslash)
58 - } elseif ( $ch == '>' ) {
59 - self::onEndBracket( $tag, $tagType, $lastCh, $openTags );
60 - $entityState = 0; // for bad HTML
61 - $bracketState = 0; // out of brackets
62 - } elseif ( $bracketState == 1 ) {
63 - if ( $ch == '/' ) {
64 - $tagType = 1; // close tag (e.g. "</span>")
65 - } else {
66 - $tagType = 0; // open tag (e.g. "<span>")
67 - $tag .= $ch;
68 - }
69 - $bracketState = 2; // building tag name
70 - } elseif ( $bracketState == 2 ) {
71 - if ( $ch != ' ' ) {
72 - $tag .= $ch;
73 - } else {
74 - // Name found (e.g. "<a href=..."), add on tag attributes...
75 - $pos += self::skipAndAppend( $ret, $text, "<>", $pos + 1 );
76 - }
77 - } elseif ( $bracketState == 0 ) {
78 - if ( $entityState ) {
79 - if ( $ch == ';' ) {
80 - $entityState = 0;
81 - $displayLen++; // entity is one displayed char
82 - }
83 - } else {
84 - if ( $ch == '&' ) {
85 - $entityState = 1; // entity found, (e.g. "&nbsp;")
86 - } else {
87 - $displayLen++; // this char is displayed
88 - // Add on the other display text after this...
89 - $skipped = self::skipAndAppend(
90 - $ret, $text, "<>&", $pos + 1, $maxLen - $displayLen );
91 - $displayLen += $skipped;
92 - $pos += $skipped;
93 - }
94 - }
95 - }
96 - if( !$doTruncate ) continue;
97 - # Truncate if not in the middle of a bracket/entity...
98 - if ( $bracketState == 0 && $entityState == 0 && $displayLen >= $maxLen ) {
99 - $remaining = substr( $text, $pos + 1 ); // remaining string
100 - $remaining = StringUtils::delimiterReplace( '<', '>', '', $remaining ); // rm tags
101 - $remaining = StringUtils::delimiterReplace( '&', ';', '', $remaining ); // rm entities
102 - $doTruncate = ( strlen($remaining) > strlen($ellipsis) );
103 - if ( $doTruncate ) {
104 - # Hack: go one char over so truncate() will handle multi-byte chars
105 - $ret = $wgLang->truncate( $ret . 'x', strlen($ret), '' ) . $ellipsis;
106 - break;
107 - }
108 - }
109 - }
110 - if( $displayLen == 0 ) {
111 - return ''; // no text shown, nothing to format
112 - }
113 - self::onEndBracket( $tag, $text[$textLen-1], $tagType, $openTags ); // for bad HTML
114 - while ( count( $openTags ) > 0 ) {
115 - $ret .= '</' . array_pop( $openTags ) . '>'; // close open tags
116 - }
117 - return $ret;
118 - }
119 -
120 - // like strcspn() but adds the skipped chars to $ret
121 - private function skipAndAppend( &$ret, $text, $search, $start, $len = -1 ) {
122 - $skipCount = 0;
123 - if( $start < strlen($text) ) {
124 - $skipCount = strcspn( $text, $search, $start, $len );
125 - $ret .= substr( $text, $start, $skipCount );
126 - }
127 - return $skipCount;
128 - }
129 -
130 - // (a) push or pop $tag from $openTags as needed
131 - // (b) clear $tag value
132 - private function onEndBracket( &$tag, $tagType, $lastCh, &$openTags ) {
133 - $tag = ltrim( $tag );
134 - if( $tag != '' ) {
135 - if( $tagType == 0 && $lastCh != '/' ) {
136 - $openTags[] = $tag; // tag opened (didn't close itself)
137 - } else if( $tagType == 1 ) {
138 - if( $openTags && $tag == $openTags[count($openTags)-1] ) {
139 - array_pop( $openTags ); // tag closed
140 - }
141 - }
142 - $tag = '';
143 - }
144 - }
14523
14624 function generalLink( $arr ) {
14725 $url = $arr[2] . $arr[3];
Index: trunk/extensions/CodeReview/ui/SpecialCode.php
@@ -154,8 +154,7 @@
155155 $lines = explode( "\n", $message, 2 );
156156 $first = $lines[0];
157157 $html = $this->formatMessage( $first );
158 - $linker = new CodeCommentLinkerHtml( $this->mRepo );
159 - return $linker->truncateHtml( $html, 80 );
 158+ return $wgLang->truncateHtml( $html, 80 );
160159 }
161160 /*
162161 * Formatted HTML array for properties display

Follow-up revisions

RevisionCommit summaryAuthorDate
r62997r62907: removed static calls, renamed helper functionsaaron07:11, 26 February 2010
r746341.16wmf4: MFT r62907 to fix fatals in CodeReviewcatrope17:06, 11 October 2010
r746371.16wmf4: Revert r74634: take r62907 back out, causing issues with truncation.catrope17:22, 11 October 2010
r74638Back out r62907 for now, doesn't play nicely with Language.phpdemon17:23, 11 October 2010
r746391.16wmf4: Revert r74637, r74638: misguided attempts at fixing what really was...catrope17:32, 11 October 2010

Comments

#Comment by Nikerabbit (talk | contribs)   19:40, 24 February 2010
  • Is Language the best place for this, except that truncate() already is? How about OutputPage or Html?
  • How about adding @since 1.17 to method documentation?

Status & tagging log