Index: trunk/phase3/includes/parser/Parser.php |
— | — | @@ -3448,7 +3448,7 @@ |
3449 | 3449 | * @private |
3450 | 3450 | */ |
3451 | 3451 | function formatHeadings( $text, $isMain=true ) { |
3452 | | - global $wgMaxTocLevel, $wgContLang; |
| 3452 | + global $wgMaxTocLevel, $wgContLang, $wgEnforceHtmlIds; |
3453 | 3453 | |
3454 | 3454 | $doNumberHeadings = $this->mOptions->getNumberHeadings(); |
3455 | 3455 | $showEditLink = $this->mOptions->getEditSection(); |
— | — | @@ -3593,17 +3593,71 @@ |
3594 | 3594 | } |
3595 | 3595 | } |
3596 | 3596 | |
3597 | | - list( $anchor, $legacyAnchor, $tocline, $headlineHint ) = |
3598 | | - $this->processHeadingText( $headline ); |
| 3597 | + # The safe header is a version of the header text safe to use for links |
| 3598 | + # Avoid insertion of weird stuff like <math> by expanding the relevant sections |
| 3599 | + $safeHeadline = $this->mStripState->unstripBoth( $headline ); |
3599 | 3600 | |
| 3601 | + # Remove link placeholders by the link text. |
| 3602 | + # <!--LINK number--> |
| 3603 | + # turns into |
| 3604 | + # link text with suffix |
| 3605 | + $safeHeadline = $this->replaceLinkHoldersText( $safeHeadline ); |
| 3606 | + |
| 3607 | + # Strip out HTML (other than plain <sup> and <sub>: bug 8393) |
| 3608 | + $tocline = preg_replace( |
| 3609 | + array( '#<(?!/?(sup|sub)).*?'.'>#', '#<(/?(sup|sub)).*?'.'>#' ), |
| 3610 | + array( '', '<$1>'), |
| 3611 | + $safeHeadline |
| 3612 | + ); |
| 3613 | + $tocline = trim( $tocline ); |
| 3614 | + |
| 3615 | + # For the anchor, strip out HTML-y stuff period |
| 3616 | + $safeHeadline = preg_replace( '/<.*?'.'>/', '', $safeHeadline ); |
| 3617 | + $safeHeadline = trim( $safeHeadline ); |
| 3618 | + |
| 3619 | + # Save headline for section edit hint before it's escaped |
| 3620 | + $headlineHint = $safeHeadline; |
| 3621 | + |
| 3622 | + if ( $wgEnforceHtmlIds ) { |
| 3623 | + $legacyHeadline = false; |
| 3624 | + $safeHeadline = Sanitizer::escapeId( $safeHeadline, |
| 3625 | + 'noninitial' ); |
| 3626 | + } else { |
| 3627 | + # For reverse compatibility, provide an id that's |
| 3628 | + # HTML4-compatible, like we used to. |
| 3629 | + # |
| 3630 | + # It may be worth noting, academically, that it's possible for |
| 3631 | + # the legacy anchor to conflict with a non-legacy headline |
| 3632 | + # anchor on the page. In this case likely the "correct" thing |
| 3633 | + # would be to either drop the legacy anchors or make sure |
| 3634 | + # they're numbered first. However, this would require people |
| 3635 | + # to type in section names like "abc_.D7.93.D7.90.D7.A4" |
| 3636 | + # manually, so let's not bother worrying about it. |
| 3637 | + $legacyHeadline = Sanitizer::escapeId( $safeHeadline, |
| 3638 | + 'noninitial' ); |
| 3639 | + $safeHeadline = Sanitizer::escapeId( $safeHeadline, 'xml' ); |
| 3640 | + |
| 3641 | + if ( $legacyHeadline == $safeHeadline ) { |
| 3642 | + # No reason to have both (in fact, we can't) |
| 3643 | + $legacyHeadline = false; |
| 3644 | + } elseif ( $legacyHeadline != Sanitizer::escapeId( |
| 3645 | + $legacyHeadline, 'xml' ) ) { |
| 3646 | + # The legacy id is invalid XML. We used to allow this, but |
| 3647 | + # there's no reason to do so anymore. Backward |
| 3648 | + # compatibility will fail slightly in this case, but it's |
| 3649 | + # no big deal. |
| 3650 | + $legacyHeadline = false; |
| 3651 | + } |
| 3652 | + } |
| 3653 | + |
3600 | 3654 | # HTML names must be case-insensitively unique (bug 10721). FIXME: |
3601 | 3655 | # Does this apply to Unicode characters? Because we aren't |
3602 | 3656 | # handling those here. |
3603 | | - $arrayKey = strtolower( $anchor ); |
3604 | | - if ( $legacyAnchor === false ) { |
| 3657 | + $arrayKey = strtolower( $safeHeadline ); |
| 3658 | + if ( $legacyHeadline === false ) { |
3605 | 3659 | $legacyArrayKey = false; |
3606 | 3660 | } else { |
3607 | | - $legacyArrayKey = strtolower( $legacyAnchor ); |
| 3661 | + $legacyArrayKey = strtolower( $legacyHeadline ); |
3608 | 3662 | } |
3609 | 3663 | |
3610 | 3664 | # count how many in assoc. array so we can track dupes in anchors |
— | — | @@ -3625,10 +3679,12 @@ |
3626 | 3680 | } |
3627 | 3681 | |
3628 | 3682 | # Create the anchor for linking from the TOC to the section |
| 3683 | + $anchor = $safeHeadline; |
| 3684 | + $legacyAnchor = $legacyHeadline; |
3629 | 3685 | if ( $refers[$arrayKey] > 1 ) { |
3630 | 3686 | $anchor .= '_' . $refers[$arrayKey]; |
3631 | 3687 | } |
3632 | | - if ( $legacyAnchor !== false && $refers[$legacyArrayKey] > 1 ) { |
| 3688 | + if ( $legacyHeadline !== false && $refers[$legacyArrayKey] > 1 ) { |
3633 | 3689 | $legacyAnchor .= '_' . $refers[$legacyArrayKey]; |
3634 | 3690 | } |
3635 | 3691 | if( $enoughToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) { |
— | — | @@ -3700,70 +3756,6 @@ |
3701 | 3757 | } |
3702 | 3758 | } |
3703 | 3759 | |
3704 | | - private function processHeadingText( $headline ) { |
3705 | | - global $wgEnforceHtmlIds; |
3706 | | - |
3707 | | - # The safe header is a version of the header text safe to use for links |
3708 | | - # Avoid insertion of weird stuff like <math> by expanding the relevant sections |
3709 | | - $safeHeadline = $this->mStripState->unstripBoth( $headline ); |
3710 | | - |
3711 | | - # Remove link placeholders by the link text. |
3712 | | - # <!--LINK number--> |
3713 | | - # turns into |
3714 | | - # link text with suffix |
3715 | | - $safeHeadline = $this->replaceLinkHoldersText( $safeHeadline ); |
3716 | | - |
3717 | | - # Strip out HTML (other than plain <sup> and <sub>: bug 8393) |
3718 | | - $tocline = preg_replace( |
3719 | | - array( '#<(?!/?(sup|sub)).*?'.'>#', '#<(/?(sup|sub)).*?'.'>#' ), |
3720 | | - array( '', '<$1>'), |
3721 | | - $safeHeadline |
3722 | | - ); |
3723 | | - $tocline = trim( $tocline ); |
3724 | | - |
3725 | | - # For the anchor, strip out HTML-y stuff period |
3726 | | - $safeHeadline = preg_replace( '/<.*?'.'>/', '', $safeHeadline ); |
3727 | | - $safeHeadline = trim( $safeHeadline ); |
3728 | | - |
3729 | | - # Save headline for section edit hint before it's escaped |
3730 | | - $headlineHint = $safeHeadline; |
3731 | | - |
3732 | | - if ( $wgEnforceHtmlIds ) { |
3733 | | - $legacyHeadline = false; |
3734 | | - $safeHeadline = Sanitizer::escapeId( $safeHeadline, |
3735 | | - 'noninitial' ); |
3736 | | - } else { |
3737 | | - # For reverse compatibility, provide an id that's |
3738 | | - # HTML4-compatible, like we used to. |
3739 | | - # |
3740 | | - # It may be worth noting, academically, that it's possible for |
3741 | | - # the legacy anchor to conflict with a non-legacy headline |
3742 | | - # anchor on the page. In this case likely the "correct" thing |
3743 | | - # would be to either drop the legacy anchors or make sure |
3744 | | - # they're numbered first. However, this would require people |
3745 | | - # to type in section names like "abc_.D7.93.D7.90.D7.A4" |
3746 | | - # manually, so let's not bother worrying about it. |
3747 | | - $legacyHeadline = Sanitizer::escapeId( $safeHeadline, |
3748 | | - 'noninitial' ); |
3749 | | - $safeHeadline = Sanitizer::escapeId( $safeHeadline, 'xml' ); |
3750 | | - |
3751 | | - if ( $legacyHeadline == $safeHeadline ) { |
3752 | | - # No reason to have both (in fact, we can't) |
3753 | | - $legacyHeadline = false; |
3754 | | - } elseif ( $legacyHeadline != Sanitizer::escapeId( |
3755 | | - $legacyHeadline, 'xml' ) ) { |
3756 | | - # The legacy id is invalid XML. We used to allow this, but |
3757 | | - # there's no reason to do so anymore. Backward |
3758 | | - # compatibility will fail slightly in this case, but it's |
3759 | | - # no big deal. |
3760 | | - $legacyHeadline = false; |
3761 | | - } |
3762 | | - } |
3763 | | - |
3764 | | - return array( $safeHeadline, $legacyHeadline, $tocline, |
3765 | | - $headlineHint ); |
3766 | | - } |
3767 | | - |
3768 | 3760 | /** |
3769 | 3761 | * Transform wiki markup when saving a page by doing \r\n -> \n |
3770 | 3762 | * conversion, substitting signatures, {{subst:}} templates, etc. |
— | — | @@ -4744,9 +4736,21 @@ |
4745 | 4737 | * "== Header ==". |
4746 | 4738 | */ |
4747 | 4739 | public function guessSectionNameFromWikiText( $text ) { |
| 4740 | + # Strip out wikitext links(they break the anchor) |
4748 | 4741 | $text = $this->stripSectionName( $text ); |
4749 | | - list( $text, /* unneeded here */ ) = $this->processHeadingText( $text ); |
4750 | | - return "#$text"; |
| 4742 | + $headline = Sanitizer::decodeCharReferences( $text ); |
| 4743 | + # strip out HTML |
| 4744 | + $headline = StringUtils::delimiterReplace( '<', '>', '', $headline ); |
| 4745 | + $headline = trim( $headline ); |
| 4746 | + $sectionanchor = '#' . urlencode( str_replace( ' ', '_', $headline ) ); |
| 4747 | + $replacearray = array( |
| 4748 | + '%3A' => ':', |
| 4749 | + '%' => '.' |
| 4750 | + ); |
| 4751 | + return str_replace( |
| 4752 | + array_keys( $replacearray ), |
| 4753 | + array_values( $replacearray ), |
| 4754 | + $sectionanchor ); |
4751 | 4755 | } |
4752 | 4756 | |
4753 | 4757 | /** |