Index: trunk/phase3/includes/parser/Parser.php |
— | — | @@ -3448,7 +3448,7 @@ |
3449 | 3449 | * @private |
3450 | 3450 | */ |
3451 | 3451 | function formatHeadings( $text, $isMain=true ) { |
3452 | | - global $wgMaxTocLevel, $wgContLang, $wgEnforceHtmlIds; |
| 3452 | + global $wgMaxTocLevel, $wgContLang; |
3453 | 3453 | |
3454 | 3454 | $doNumberHeadings = $this->mOptions->getNumberHeadings(); |
3455 | 3455 | $showEditLink = $this->mOptions->getEditSection(); |
— | — | @@ -3593,71 +3593,17 @@ |
3594 | 3594 | } |
3595 | 3595 | } |
3596 | 3596 | |
3597 | | - # The safe header is a version of the header text safe to use for links |
3598 | | - # Avoid insertion of weird stuff like <math> by expanding the relevant sections |
3599 | | - $safeHeadline = $this->mStripState->unstripBoth( $headline ); |
| 3597 | + list( $anchor, $legacyAnchor, $tocline, $headlineHint ) = |
| 3598 | + $this->processHeadingText( $headline ); |
3600 | 3599 | |
3601 | | - # Remove link placeholders by the link text. |
3602 | | - # <!--LINK number--> |
3603 | | - # turns into |
3604 | | - # link text with suffix |
3605 | | - $safeHeadline = $this->replaceLinkHoldersText( $safeHeadline ); |
3606 | | - |
3607 | | - # Strip out HTML (other than plain <sup> and <sub>: bug 8393) |
3608 | | - $tocline = preg_replace( |
3609 | | - array( '#<(?!/?(sup|sub)).*?'.'>#', '#<(/?(sup|sub)).*?'.'>#' ), |
3610 | | - array( '', '<$1>'), |
3611 | | - $safeHeadline |
3612 | | - ); |
3613 | | - $tocline = trim( $tocline ); |
3614 | | - |
3615 | | - # For the anchor, strip out HTML-y stuff period |
3616 | | - $safeHeadline = preg_replace( '/<.*?'.'>/', '', $safeHeadline ); |
3617 | | - $safeHeadline = trim( $safeHeadline ); |
3618 | | - |
3619 | | - # Save headline for section edit hint before it's escaped |
3620 | | - $headlineHint = $safeHeadline; |
3621 | | - |
3622 | | - if ( $wgEnforceHtmlIds ) { |
3623 | | - $legacyHeadline = false; |
3624 | | - $safeHeadline = Sanitizer::escapeId( $safeHeadline, |
3625 | | - 'noninitial' ); |
3626 | | - } else { |
3627 | | - # For reverse compatibility, provide an id that's |
3628 | | - # HTML4-compatible, like we used to. |
3629 | | - # |
3630 | | - # It may be worth noting, academically, that it's possible for |
3631 | | - # the legacy anchor to conflict with a non-legacy headline |
3632 | | - # anchor on the page. In this case likely the "correct" thing |
3633 | | - # would be to either drop the legacy anchors or make sure |
3634 | | - # they're numbered first. However, this would require people |
3635 | | - # to type in section names like "abc_.D7.93.D7.90.D7.A4" |
3636 | | - # manually, so let's not bother worrying about it. |
3637 | | - $legacyHeadline = Sanitizer::escapeId( $safeHeadline, |
3638 | | - 'noninitial' ); |
3639 | | - $safeHeadline = Sanitizer::escapeId( $safeHeadline, 'xml' ); |
3640 | | - |
3641 | | - if ( $legacyHeadline == $safeHeadline ) { |
3642 | | - # No reason to have both (in fact, we can't) |
3643 | | - $legacyHeadline = false; |
3644 | | - } elseif ( $legacyHeadline != Sanitizer::escapeId( |
3645 | | - $legacyHeadline, 'xml' ) ) { |
3646 | | - # The legacy id is invalid XML. We used to allow this, but |
3647 | | - # there's no reason to do so anymore. Backward |
3648 | | - # compatibility will fail slightly in this case, but it's |
3649 | | - # no big deal. |
3650 | | - $legacyHeadline = false; |
3651 | | - } |
3652 | | - } |
3653 | | - |
3654 | 3600 | # HTML names must be case-insensitively unique (bug 10721). FIXME: |
3655 | 3601 | # Does this apply to Unicode characters? Because we aren't |
3656 | 3602 | # handling those here. |
3657 | | - $arrayKey = strtolower( $safeHeadline ); |
3658 | | - if ( $legacyHeadline === false ) { |
| 3603 | + $arrayKey = strtolower( $anchor ); |
| 3604 | + if ( $legacyAnchor === false ) { |
3659 | 3605 | $legacyArrayKey = false; |
3660 | 3606 | } else { |
3661 | | - $legacyArrayKey = strtolower( $legacyHeadline ); |
| 3607 | + $legacyArrayKey = strtolower( $legacyAnchor ); |
3662 | 3608 | } |
3663 | 3609 | |
3664 | 3610 | # count how many in assoc. array so we can track dupes in anchors |
— | — | @@ -3679,12 +3625,10 @@ |
3680 | 3626 | } |
3681 | 3627 | |
3682 | 3628 | # Create the anchor for linking from the TOC to the section |
3683 | | - $anchor = $safeHeadline; |
3684 | | - $legacyAnchor = $legacyHeadline; |
3685 | 3629 | if ( $refers[$arrayKey] > 1 ) { |
3686 | 3630 | $anchor .= '_' . $refers[$arrayKey]; |
3687 | 3631 | } |
3688 | | - if ( $legacyHeadline !== false && $refers[$legacyArrayKey] > 1 ) { |
| 3632 | + if ( $legacyAnchor !== false && $refers[$legacyArrayKey] > 1 ) { |
3689 | 3633 | $legacyAnchor .= '_' . $refers[$legacyArrayKey]; |
3690 | 3634 | } |
3691 | 3635 | if( $enoughToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) { |
— | — | @@ -3756,6 +3700,70 @@ |
3757 | 3701 | } |
3758 | 3702 | } |
3759 | 3703 | |
| 3704 | + private function processHeadingText( $headline ) { |
| 3705 | + global $wgEnforceHtmlIds; |
| 3706 | + |
| 3707 | + # The safe header is a version of the header text safe to use for links |
| 3708 | + # Avoid insertion of weird stuff like <math> by expanding the relevant sections |
| 3709 | + $safeHeadline = $this->mStripState->unstripBoth( $headline ); |
| 3710 | + |
| 3711 | + # Remove link placeholders by the link text. |
| 3712 | + # <!--LINK number--> |
| 3713 | + # turns into |
| 3714 | + # link text with suffix |
| 3715 | + $safeHeadline = $this->replaceLinkHoldersText( $safeHeadline ); |
| 3716 | + |
| 3717 | + # Strip out HTML (other than plain <sup> and <sub>: bug 8393) |
| 3718 | + $tocline = preg_replace( |
| 3719 | + array( '#<(?!/?(sup|sub)).*?'.'>#', '#<(/?(sup|sub)).*?'.'>#' ), |
| 3720 | + array( '', '<$1>'), |
| 3721 | + $safeHeadline |
| 3722 | + ); |
| 3723 | + $tocline = trim( $tocline ); |
| 3724 | + |
| 3725 | + # For the anchor, strip out HTML-y stuff period |
| 3726 | + $safeHeadline = preg_replace( '/<.*?'.'>/', '', $safeHeadline ); |
| 3727 | + $safeHeadline = trim( $safeHeadline ); |
| 3728 | + |
| 3729 | + # Save headline for section edit hint before it's escaped |
| 3730 | + $headlineHint = $safeHeadline; |
| 3731 | + |
| 3732 | + if ( $wgEnforceHtmlIds ) { |
| 3733 | + $legacyHeadline = false; |
| 3734 | + $safeHeadline = Sanitizer::escapeId( $safeHeadline, |
| 3735 | + 'noninitial' ); |
| 3736 | + } else { |
| 3737 | + # For reverse compatibility, provide an id that's |
| 3738 | + # HTML4-compatible, like we used to. |
| 3739 | + # |
| 3740 | + # It may be worth noting, academically, that it's possible for |
| 3741 | + # the legacy anchor to conflict with a non-legacy headline |
| 3742 | + # anchor on the page. In this case likely the "correct" thing |
| 3743 | + # would be to either drop the legacy anchors or make sure |
| 3744 | + # they're numbered first. However, this would require people |
| 3745 | + # to type in section names like "abc_.D7.93.D7.90.D7.A4" |
| 3746 | + # manually, so let's not bother worrying about it. |
| 3747 | + $legacyHeadline = Sanitizer::escapeId( $safeHeadline, |
| 3748 | + 'noninitial' ); |
| 3749 | + $safeHeadline = Sanitizer::escapeId( $safeHeadline, 'xml' ); |
| 3750 | + |
| 3751 | + if ( $legacyHeadline == $safeHeadline ) { |
| 3752 | + # No reason to have both (in fact, we can't) |
| 3753 | + $legacyHeadline = false; |
| 3754 | + } elseif ( $legacyHeadline != Sanitizer::escapeId( |
| 3755 | + $legacyHeadline, 'xml' ) ) { |
| 3756 | + # The legacy id is invalid XML. We used to allow this, but |
| 3757 | + # there's no reason to do so anymore. Backward |
| 3758 | + # compatibility will fail slightly in this case, but it's |
| 3759 | + # no big deal. |
| 3760 | + $legacyHeadline = false; |
| 3761 | + } |
| 3762 | + } |
| 3763 | + |
| 3764 | + return array( $safeHeadline, $legacyHeadline, $tocline, |
| 3765 | + $headlineHint ); |
| 3766 | + } |
| 3767 | + |
3760 | 3768 | /** |
3761 | 3769 | * Transform wiki markup when saving a page by doing \r\n -> \n |
3762 | 3770 | * conversion, substitting signatures, {{subst:}} templates, etc. |
— | — | @@ -4736,21 +4744,9 @@ |
4737 | 4745 | * "== Header ==". |
4738 | 4746 | */ |
4739 | 4747 | public function guessSectionNameFromWikiText( $text ) { |
4740 | | - # Strip out wikitext links(they break the anchor) |
4741 | 4748 | $text = $this->stripSectionName( $text ); |
4742 | | - $headline = Sanitizer::decodeCharReferences( $text ); |
4743 | | - # strip out HTML |
4744 | | - $headline = StringUtils::delimiterReplace( '<', '>', '', $headline ); |
4745 | | - $headline = trim( $headline ); |
4746 | | - $sectionanchor = '#' . urlencode( str_replace( ' ', '_', $headline ) ); |
4747 | | - $replacearray = array( |
4748 | | - '%3A' => ':', |
4749 | | - '%' => '.' |
4750 | | - ); |
4751 | | - return str_replace( |
4752 | | - array_keys( $replacearray ), |
4753 | | - array_values( $replacearray ), |
4754 | | - $sectionanchor ); |
| 4749 | + list( $text, /* unneeded here */ ) = $this->processHeadingText( $text ); |
| 4750 | + return "#$text"; |
4755 | 4751 | } |
4756 | 4752 | |
4757 | 4753 | /** |