r62488 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r62487‎ | r62488 | r62489 >
Date:03:07, 15 February 2010
Author:aaron
Status:ok (Comments)
Tags:
Comment:
* Fixed bug 22326: "Truncated urls in summaries lead to different places"
* Broke long lines
Modified paths:
  • /trunk/extensions/CodeReview/backend/CodeCommentLinker.php (modified) (history)
  • /trunk/extensions/CodeReview/ui/SpecialCode.php (modified) (history)

Diff [purge]

Index: trunk/extensions/CodeReview/backend/CodeCommentLinker.php
@@ -11,11 +11,103 @@
1212 function link( $text ) {
1313 # Catch links like http://www.mediawiki.org/wiki/Special:Code/MediaWiki/44245#c829
1414 # Ended by space or brackets (like those pesky <br /> tags)
15 - $text = preg_replace_callback( '/(^|[^\w[])(' . wfUrlProtocols() . ')([^ <>]+)(\b)/', array( $this, 'generalLink' ), $text );
16 - $text = preg_replace_callback( '/\br(\d+)\b/', array( $this, 'messageRevLink' ), $text );
17 - $text = preg_replace_callback( '/\bbug #?(\d+)\b/i', array( $this, 'messageBugLink' ), $text );
 15+ $text = preg_replace_callback( '/(^|[^\w[])(' . wfUrlProtocols() . ')([^ <>]+)(\b)/',
 16+ array( $this, 'generalLink' ), $text );
 17+ $text = preg_replace_callback( '/\br(\d+)\b/',
 18+ array( $this, 'messageRevLink' ), $text );
 19+ $text = preg_replace_callback( '/\bbug #?(\d+)\b/i',
 20+ array( $this, 'messageBugLink' ), $text );
1821 return $text;
1922 }
 23+
 24+ // truncate() for valid HTML with self-contained tags only
 25+ // Note: tries to fix broken HTML with MWTidy
 26+ // @TODO: cleanup and move to language.php
 27+ function truncateHtml( $text, $maxLen, $ellipsis = '...' ) {
 28+ global $wgLang;
 29+ if( strlen($text) <= $maxLen ) {
 30+ return $text; // string short enough even *with* HTML
 31+ }
 32+ $text = MWTidy::tidy( $text ); // fix tags
 33+ $displayLen = 0;
 34+ $doTruncate = true; // truncated string plus '...' shorter than original?
 35+ $tagType = 0; // 0-open, 1-close
 36+ $bracketState = 0; // 1-tag start, 2-tag name, 3-tag params, 0-neither
 37+ $entityState = 0; // 0-not entity, 1-entity
 38+ $tag = $ret = $ch = $lastCh = '';
 39+ $openTags = array();
 40+ for( $pos = 0; $pos < strlen($text); $pos++ ) {
 41+ $lastCh = $ch;
 42+ $ch = $text[$pos];
 43+ if ( $ch == '<' ) {
 44+ self::onEndBracket( $tag, $tagType, $lastCh, $openTags ); // for bad HTML
 45+ $entityState = 0; // for bad HTML
 46+ $bracketState = 1; // tag started (checking for backslash)
 47+ } elseif ( $ch == '>' ) {
 48+ self::onEndBracket( $tag, $tagType, $lastCh, $openTags );
 49+ $entityState = 0; // for bad HTML
 50+ $bracketState = 0; // out of brackets
 51+ } elseif ( $bracketState == 1 ) {
 52+ if ( $ch == '/' ) {
 53+ $tagType = 1; // close tag
 54+ } else {
 55+ $tagType = 0; // open tag
 56+ $tag .= $ch;
 57+ }
 58+ $bracketState = 2; // building tag name
 59+ } elseif ( $bracketState == 2 ) {
 60+ if ( $ch != ' ' ) {
 61+ $tag .= $ch;
 62+ } else {
 63+ $bracketState = 3; // name found (e.g. "<a href=...")
 64+ }
 65+ } elseif ( $bracketState == 0 ) {
 66+ if ( $entityState ) {
 67+ if ( $ch == ';' ) {
 68+ $entityState = 0;
 69+ $displayLen++; // entity is one char
 70+ }
 71+ } elseif ( $ch == '&' ) {
 72+ $entityState = 1; // entity found, (e.g. "&nbsp;")
 73+ } else {
 74+ $displayLen++; // not in brackets
 75+ }
 76+ }
 77+ $ret .= $ch;
 78+ if( !$doTruncate ) continue;
 79+ # Truncate if not in the middle of a bracket/entity...
 80+ if ( $bracketState == 0 && $entityState == 0 && $displayLen >= $maxLen ) {
 81+ $left = substr( $text, $pos + 1 ); // remaining string
 82+ $left = StringUtils::delimiterReplace( '<', '>', '', $left ); // rm tags
 83+ $left = StringUtils::delimiterReplace( '&', ';', '', $left ); // rm entities
 84+ $doTruncate = ( strlen($left) > strlen($ellipsis) );
 85+ if ( $doTruncate ) {
 86+ # Hack: go one char over so truncate() will handle multi-byte chars
 87+ $ret = $wgLang->truncate( $ret . 'x', strlen($ret), '' ) . $ellipsis;
 88+ break;
 89+ }
 90+ }
 91+ }
 92+ self::onEndBracket( $tag, $lastCh, $tagType, $openTags ); // for bad HTML
 93+ while ( count( $openTags ) > 0 ) {
 94+ $ret .= '</' . array_pop($openTags) . '>'; // close open tags
 95+ }
 96+ return $ret;
 97+ }
 98+
 99+ protected function onEndBracket( &$tag, $tagType, $lastCh, &$openTags ) {
 100+ $tag = ltrim( $tag );
 101+ if( $tag != '' ) {
 102+ if( $tagType == 0 && $lastCh != '/' ) {
 103+ $openTags[] = $tag; // tag opened (didn't close itself)
 104+ } else if( $tagType == 1 ) {
 105+ if( $openTags && $tag == $openTags[count($openTags)-1] ) {
 106+ array_pop( $openTags ); // tag closed
 107+ }
 108+ }
 109+ $tag = '';
 110+ }
 111+ }
20112
21113 function generalLink( $arr ) {
22114 $url = $arr[2] . $arr[3];
Index: trunk/extensions/CodeReview/ui/SpecialCode.php
@@ -151,8 +151,9 @@
152152 $message = trim( $value );
153153 $lines = explode( "\n", $message, 2 );
154154 $first = $lines[0];
155 - $trimmed = $wgLang->truncate( $first, 80 );
156 - return $this->formatMessage( $trimmed );
 155+ $html = $this->formatMessage( $first );
 156+ $linker = new CodeCommentLinkerHtml( $this->mRepo );
 157+ return $linker->truncateHtml( $html, 80 );
157158 }
158159 /*
159160 * Formatted HTML array for properties display

Follow-up revisions

RevisionCommit summaryAuthorDate
r62506CodeReview: MWTidy was only introduced in v1.15, so I've fixed the new CodeCo...happydog09:51, 15 February 2010
r62588Follow up r62488, r62538: consistency fix for $maxLen=0 case; no display text...aaron19:48, 16 February 2010

Comments

#Comment by Tim Starling (talk | contribs)   08:02, 19 February 2010

Very nice. You'll be rewriting Parser.php in no time.

Next time you write one of these, you should try using strcspn() to skip across long stretches of non-markup characters, it'll be faster for typical test cases.

I probably would have used $remaining instead of $left. It took me a while to figure out that you didn't mean the left hand side of the string.

# Hack: go one char over so truncate() will handle multi-byte chars
$ret = $wgLang->truncate( $ret . 'x', strlen($ret),  ) . $ellipsis;

Obviously this could be done in Language::truncate() itself.

#Comment by Tim Starling (talk | contribs)   08:14, 19 February 2010

By the way, since you're in this area, there's a bug in the general link regex:

> print $c->link("http://foo\nbar")
[http://foo
bar http://foo
bar]

I wish CodeCommentLinkerWiki could just leave URLs alone, letting the parser link them, so that things like <nowiki> would work.

#Comment by Aaron Schulz (talk | contribs)   23:38, 19 February 2010

strcspn() looks like a great way to speed up + simplifiy...working on this now.

#Comment by Aaron Schulz (talk | contribs)   23:39, 19 February 2010
  • simplify

Status & tagging log