Index: trunk/phase3/languages/Language.php |
— | — | @@ -2393,31 +2393,34 @@ |
2394 | 2394 | if ( $ellipsis == '...' ) { |
2395 | 2395 | $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) ); |
2396 | 2396 | } |
2397 | | - $eLength = $adjustLength ? strlen( $ellipsis ) : 0; |
2398 | 2397 | # Check if there is no need to truncate |
2399 | 2398 | if ( $length == 0 ) { |
2400 | | - return $ellipsis; |
| 2399 | + return $ellipsis; // convention |
2401 | 2400 | } elseif ( strlen( $string ) <= abs( $length ) ) { |
2402 | | - return $string; |
2403 | | - } elseif ( strlen( $ellipsis ) >= abs( $length ) ) { |
2404 | | - // Not combined with first (length == 0) if statement, since |
2405 | | - // we want to return the string instead of ellipsis if both |
2406 | | - // this and the proceeding elseif are true. |
2407 | | - return $ellipsis; |
| 2401 | + return $string; // no need to truncate |
2408 | 2402 | } |
2409 | 2403 | $stringOriginal = $string; |
2410 | | - if ( $length > 0 ) { |
2411 | | - $length -= $eLength; |
2412 | | - $string = substr( $string, 0, $length ); // xyz... |
2413 | | - $string = $this->removeBadCharLast( $string ); |
2414 | | - $string = $string . $ellipsis; |
| 2404 | + # If ellipsis length is >= $length then we can't apply $adjustLength |
| 2405 | + if ( $adjustLength && strlen( $ellipsis ) >= abs( $length ) ) { |
| 2406 | + $string = $ellipsis; // this can be slightly unexpected |
| 2407 | + # Otherwise, truncate and add ellipsis... |
2415 | 2408 | } else { |
2416 | | - $length += $eLength; |
2417 | | - $string = substr( $string, $length ); // ...xyz |
2418 | | - $string = $this->removeBadCharFirst( $string ); |
2419 | | - $string = $ellipsis . $string; |
| 2409 | + $eLength = $adjustLength ? strlen( $ellipsis ) : 0; |
| 2410 | + if ( $length > 0 ) { |
| 2411 | + $length -= $eLength; |
| 2412 | + $string = substr( $string, 0, $length ); // xyz... |
| 2413 | + $string = $this->removeBadCharLast( $string ); |
| 2414 | + $string = $string . $ellipsis; |
| 2415 | + } else { |
| 2416 | + $length += $eLength; |
| 2417 | + $string = substr( $string, $length ); // ...xyz |
| 2418 | + $string = $this->removeBadCharFirst( $string ); |
| 2419 | + $string = $ellipsis . $string; |
| 2420 | + } |
2420 | 2421 | } |
2421 | | - # Do not truncate if the ellipsis makes the string longer/equal (bug 22181) |
| 2422 | + # Do not truncate if the ellipsis makes the string longer/equal (bug 22181). |
| 2423 | + # This check is *not* redundant if $adjustLength, due to the single case where |
| 2424 | + # LEN($ellipsis) > ABS($limit arg); $stringOriginal could be shorter than $string. |
2422 | 2425 | if ( strlen( $string ) < strlen( $stringOriginal ) ) { |
2423 | 2426 | return $string; |
2424 | 2427 | } else { |
— | — | @@ -2433,17 +2436,19 @@ |
2434 | 2437 | * @return string |
2435 | 2438 | */ |
2436 | 2439 | protected function removeBadCharLast( $string ) { |
2437 | | - $char = ord( $string[strlen( $string ) - 1] ); |
2438 | | - $m = array(); |
2439 | | - if ( $char >= 0xc0 ) { |
2440 | | - # We got the first byte only of a multibyte char; remove it. |
2441 | | - $string = substr( $string, 0, -1 ); |
2442 | | - } elseif ( $char >= 0x80 && |
2443 | | - preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' . |
2444 | | - '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) ) |
2445 | | - { |
2446 | | - # We chopped in the middle of a character; remove it |
2447 | | - $string = $m[1]; |
| 2440 | + if ( $string != '' ) { |
| 2441 | + $char = ord( $string[strlen( $string ) - 1] ); |
| 2442 | + $m = array(); |
| 2443 | + if ( $char >= 0xc0 ) { |
| 2444 | + # We got the first byte only of a multibyte char; remove it. |
| 2445 | + $string = substr( $string, 0, -1 ); |
| 2446 | + } elseif ( $char >= 0x80 && |
| 2447 | + preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' . |
| 2448 | + '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) ) |
| 2449 | + { |
| 2450 | + # We chopped in the middle of a character; remove it |
| 2451 | + $string = $m[1]; |
| 2452 | + } |
2448 | 2453 | } |
2449 | 2454 | return $string; |
2450 | 2455 | } |
— | — | @@ -2456,10 +2461,12 @@ |
2457 | 2462 | * @return string |
2458 | 2463 | */ |
2459 | 2464 | protected function removeBadCharFirst( $string ) { |
2460 | | - $char = ord( $string[0] ); |
2461 | | - if ( $char >= 0x80 && $char < 0xc0 ) { |
2462 | | - # We chopped in the middle of a character; remove the whole thing |
2463 | | - $string = preg_replace( '/^[\x80-\xbf]+/', '', $string ); |
| 2465 | + if ( $string != '' ) { |
| 2466 | + $char = ord( $string[0] ); |
| 2467 | + if ( $char >= 0x80 && $char < 0xc0 ) { |
| 2468 | + # We chopped in the middle of a character; remove the whole thing |
| 2469 | + $string = preg_replace( '/^[\x80-\xbf]+/', '', $string ); |
| 2470 | + } |
2464 | 2471 | } |
2465 | 2472 | return $string; |
2466 | 2473 | } |
— | — | @@ -2469,10 +2476,9 @@ |
2470 | 2477 | * appending an optional string (e.g. for ellipses), and return valid HTML |
2471 | 2478 | * |
2472 | 2479 | * This is only intended for styled/linked text, such as HTML with |
2473 | | - * tags like <span> and <a>, were the tags are self-contained (valid HTML) |
| 2480 | + * tags like <span> and <a>, were the tags are self-contained (valid HTML). |
| 2481 | + * Also, this will not detect things like "display:none" CSS. |
2474 | 2482 | * |
2475 | | - * Note: tries to fix broken HTML with MWTidy |
2476 | | - * |
2477 | 2483 | * Note: since 1.18 you do not need to leave extra room in $length for ellipses. |
2478 | 2484 | * |
2479 | 2485 | * @param string $text HTML string to truncate |
— | — | @@ -2485,23 +2491,44 @@ |
2486 | 2492 | if ( $ellipsis == '...' ) { |
2487 | 2493 | $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) ); |
2488 | 2494 | } |
2489 | | - $length -= strlen( $ellipsis ); |
2490 | | - # Check if there is no need to truncate |
| 2495 | + # Check if there is clearly no need to truncate |
2491 | 2496 | if ( $length <= 0 ) { |
2492 | | - return $ellipsis; // no text shown, nothing to format |
| 2497 | + return $ellipsis; // no text shown, nothing to format (convention) |
2493 | 2498 | } elseif ( strlen( $text ) <= $length ) { |
2494 | | - return $text; // string short enough even *with* HTML |
| 2499 | + return $text; // string short enough even *with* HTML (short-circuit) |
2495 | 2500 | } |
2496 | | - $text = MWTidy::tidy( $text ); // fix tags |
| 2501 | + |
2497 | 2502 | $displayLen = 0; // innerHTML legth so far |
2498 | 2503 | $testingEllipsis = false; // checking if ellipses will make string longer/equal? |
2499 | 2504 | $tagType = 0; // 0-open, 1-close |
2500 | 2505 | $bracketState = 0; // 1-tag start, 2-tag name, 0-neither |
2501 | 2506 | $entityState = 0; // 0-not entity, 1-entity |
2502 | | - $tag = $ret = ''; |
| 2507 | + $tag = $ret = ''; // accumulated tag name, accumulated result string |
2503 | 2508 | $openTags = array(); // open tag stack |
| 2509 | + |
2504 | 2510 | $textLen = strlen( $text ); |
2505 | | - for ( $pos = 0; $pos < $textLen; ++$pos ) { |
| 2511 | + $neLength = max( 0, $length - strlen( $ellipsis ) ); // non-ellipsis len if truncated |
| 2512 | + for ( $pos = 0; true; ++$pos ) { |
| 2513 | + # Consider truncation once the display length has reached the maximim. |
| 2514 | + # Check that we're not in the middle of a bracket/entity... |
| 2515 | + if ( $displayLen >= $neLength && $bracketState == 0 && $entityState == 0 ) { |
| 2516 | + if ( !$testingEllipsis ) { |
| 2517 | + $testingEllipsis = true; |
| 2518 | + # Save where we are; we will truncate here unless there turn out to |
| 2519 | + # be so few remaining characters that truncation is not necessary. |
| 2520 | + $pOpenTags = $openTags; // save state |
| 2521 | + $pRet = $ret; // save state |
| 2522 | + } elseif ( $displayLen > $length && $displayLen > strlen( $ellipsis ) ) { |
| 2523 | + # String in fact does need truncation, the truncation point was OK. |
| 2524 | + $openTags = $pOpenTags; // reload state |
| 2525 | + $ret = $this->removeBadCharLast( $pRet ); // reload state, multi-byte char fix |
| 2526 | + $ret .= $ellipsis; // add ellipsis |
| 2527 | + break; |
| 2528 | + } |
| 2529 | + } |
| 2530 | + if ( $pos >= $textLen ) break; // extra iteration just for above checks |
| 2531 | + |
| 2532 | + # Read the next char... |
2506 | 2533 | $ch = $text[$pos]; |
2507 | 2534 | $lastCh = $pos ? $text[$pos - 1] : ''; |
2508 | 2535 | $ret .= $ch; // add to result string |
— | — | @@ -2539,31 +2566,14 @@ |
2540 | 2567 | $entityState = 1; // entity found, (e.g. " ") |
2541 | 2568 | } else { |
2542 | 2569 | $displayLen++; // this char is displayed |
2543 | | - // Add on the other display text after this... |
2544 | | - $skipped = $this->truncate_skip( |
2545 | | - $ret, $text, "<>&", $pos + 1, $length - $displayLen ); |
| 2570 | + // Add the next $max display text chars after this in one swoop... |
| 2571 | + $max = ( $testingEllipsis ? $length : $neLength ) - $displayLen; |
| 2572 | + $skipped = $this->truncate_skip( $ret, $text, "<>&", $pos + 1, $max ); |
2546 | 2573 | $displayLen += $skipped; |
2547 | 2574 | $pos += $skipped; |
2548 | 2575 | } |
2549 | 2576 | } |
2550 | 2577 | } |
2551 | | - # Consider truncation once the display length has reached the maximim. |
2552 | | - # Double-check that we're not in the middle of a bracket/entity... |
2553 | | - if ( $displayLen >= $length && $bracketState == 0 && $entityState == 0 ) { |
2554 | | - if ( !$testingEllipsis ) { |
2555 | | - $testingEllipsis = true; |
2556 | | - # Save where we are; we will truncate here unless |
2557 | | - # the ellipsis actually makes the string longer. |
2558 | | - $pOpenTags = $openTags; // save state |
2559 | | - $pRet = $ret; // save state |
2560 | | - } elseif ( $displayLen > ( $length + strlen( $ellipsis ) ) ) { |
2561 | | - # Ellipsis won't make string longer/equal, the truncation point was OK. |
2562 | | - $openTags = $pOpenTags; // reload state |
2563 | | - $ret = $this->removeBadCharLast( $pRet ); // reload state, multi-byte char fix |
2564 | | - $ret .= $ellipsis; // add ellipsis |
2565 | | - break; |
2566 | | - } |
2567 | | - } |
2568 | 2578 | } |
2569 | 2579 | if ( $displayLen == 0 ) { |
2570 | 2580 | return ''; // no text shown, nothing to format |
— | — | @@ -2578,7 +2588,12 @@ |
2579 | 2589 | |
2580 | 2590 | // truncateHtml() helper function |
2581 | 2591 | // like strcspn() but adds the skipped chars to $ret |
2582 | | - private function truncate_skip( &$ret, $text, $search, $start, $len = -1 ) { |
| 2592 | + private function truncate_skip( &$ret, $text, $search, $start, $len = null ) { |
| 2593 | + if ( $len === null ) { |
| 2594 | + $len = -1; // -1 means "no limit" for strcspn |
| 2595 | + } elseif ( $len < 0 ) { |
| 2596 | + $len = 0; // sanity |
| 2597 | + } |
2583 | 2598 | $skipCount = 0; |
2584 | 2599 | if ( $start < strlen( $text ) ) { |
2585 | 2600 | $skipCount = strcspn( $text, $search, $start, $len ); |