r45487 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r45486‎ | r45487 | r45488 >
Date:04:23, 7 January 2009
Author:brion
Status:ok
Tags:
Comment:
Pulling r45473 back for now "Reduce code duplication correctly this time"
Let's hold off on further section anchor generation changes until we have decent test cases covering the different ways we we stuff through...
Modified paths:
  • /trunk/phase3/includes/parser/Parser.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/parser/Parser.php
@@ -3448,7 +3448,7 @@
34493449 * @private
34503450 */
34513451 function formatHeadings( $text, $isMain=true ) {
3452 - global $wgMaxTocLevel, $wgContLang;
 3452+ global $wgMaxTocLevel, $wgContLang, $wgEnforceHtmlIds;
34533453
34543454 $doNumberHeadings = $this->mOptions->getNumberHeadings();
34553455 $showEditLink = $this->mOptions->getEditSection();
@@ -3593,17 +3593,71 @@
35943594 }
35953595 }
35963596
3597 - list( $anchor, $legacyAnchor, $tocline, $headlineHint ) =
3598 - $this->processHeadingText( $headline );
 3597+ # The safe header is a version of the header text safe to use for links
 3598+ # Avoid insertion of weird stuff like <math> by expanding the relevant sections
 3599+ $safeHeadline = $this->mStripState->unstripBoth( $headline );
35993600
 3601+ # Remove link placeholders by the link text.
 3602+ # <!--LINK number-->
 3603+ # turns into
 3604+ # link text with suffix
 3605+ $safeHeadline = $this->replaceLinkHoldersText( $safeHeadline );
 3606+
 3607+ # Strip out HTML (other than plain <sup> and <sub>: bug 8393)
 3608+ $tocline = preg_replace(
 3609+ array( '#<(?!/?(sup|sub)).*?'.'>#', '#<(/?(sup|sub)).*?'.'>#' ),
 3610+ array( '', '<$1>'),
 3611+ $safeHeadline
 3612+ );
 3613+ $tocline = trim( $tocline );
 3614+
 3615+ # For the anchor, strip out HTML-y stuff period
 3616+ $safeHeadline = preg_replace( '/<.*?'.'>/', '', $safeHeadline );
 3617+ $safeHeadline = trim( $safeHeadline );
 3618+
 3619+ # Save headline for section edit hint before it's escaped
 3620+ $headlineHint = $safeHeadline;
 3621+
 3622+ if ( $wgEnforceHtmlIds ) {
 3623+ $legacyHeadline = false;
 3624+ $safeHeadline = Sanitizer::escapeId( $safeHeadline,
 3625+ 'noninitial' );
 3626+ } else {
 3627+ # For reverse compatibility, provide an id that's
 3628+ # HTML4-compatible, like we used to.
 3629+ #
 3630+ # It may be worth noting, academically, that it's possible for
 3631+ # the legacy anchor to conflict with a non-legacy headline
 3632+ # anchor on the page. In this case likely the "correct" thing
 3633+ # would be to either drop the legacy anchors or make sure
 3634+ # they're numbered first. However, this would require people
 3635+ # to type in section names like "abc_.D7.93.D7.90.D7.A4"
 3636+ # manually, so let's not bother worrying about it.
 3637+ $legacyHeadline = Sanitizer::escapeId( $safeHeadline,
 3638+ 'noninitial' );
 3639+ $safeHeadline = Sanitizer::escapeId( $safeHeadline, 'xml' );
 3640+
 3641+ if ( $legacyHeadline == $safeHeadline ) {
 3642+ # No reason to have both (in fact, we can't)
 3643+ $legacyHeadline = false;
 3644+ } elseif ( $legacyHeadline != Sanitizer::escapeId(
 3645+ $legacyHeadline, 'xml' ) ) {
 3646+ # The legacy id is invalid XML. We used to allow this, but
 3647+ # there's no reason to do so anymore. Backward
 3648+ # compatibility will fail slightly in this case, but it's
 3649+ # no big deal.
 3650+ $legacyHeadline = false;
 3651+ }
 3652+ }
 3653+
36003654 # HTML names must be case-insensitively unique (bug 10721). FIXME:
36013655 # Does this apply to Unicode characters? Because we aren't
36023656 # handling those here.
3603 - $arrayKey = strtolower( $anchor );
3604 - if ( $legacyAnchor === false ) {
 3657+ $arrayKey = strtolower( $safeHeadline );
 3658+ if ( $legacyHeadline === false ) {
36053659 $legacyArrayKey = false;
36063660 } else {
3607 - $legacyArrayKey = strtolower( $legacyAnchor );
 3661+ $legacyArrayKey = strtolower( $legacyHeadline );
36083662 }
36093663
36103664 # count how many in assoc. array so we can track dupes in anchors
@@ -3625,10 +3679,12 @@
36263680 }
36273681
36283682 # Create the anchor for linking from the TOC to the section
 3683+ $anchor = $safeHeadline;
 3684+ $legacyAnchor = $legacyHeadline;
36293685 if ( $refers[$arrayKey] > 1 ) {
36303686 $anchor .= '_' . $refers[$arrayKey];
36313687 }
3632 - if ( $legacyAnchor !== false && $refers[$legacyArrayKey] > 1 ) {
 3688+ if ( $legacyHeadline !== false && $refers[$legacyArrayKey] > 1 ) {
36333689 $legacyAnchor .= '_' . $refers[$legacyArrayKey];
36343690 }
36353691 if( $enoughToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) {
@@ -3700,70 +3756,6 @@
37013757 }
37023758 }
37033759
3704 - private function processHeadingText( $headline ) {
3705 - global $wgEnforceHtmlIds;
3706 -
3707 - # The safe header is a version of the header text safe to use for links
3708 - # Avoid insertion of weird stuff like <math> by expanding the relevant sections
3709 - $safeHeadline = $this->mStripState->unstripBoth( $headline );
3710 -
3711 - # Remove link placeholders by the link text.
3712 - # <!--LINK number-->
3713 - # turns into
3714 - # link text with suffix
3715 - $safeHeadline = $this->replaceLinkHoldersText( $safeHeadline );
3716 -
3717 - # Strip out HTML (other than plain <sup> and <sub>: bug 8393)
3718 - $tocline = preg_replace(
3719 - array( '#<(?!/?(sup|sub)).*?'.'>#', '#<(/?(sup|sub)).*?'.'>#' ),
3720 - array( '', '<$1>'),
3721 - $safeHeadline
3722 - );
3723 - $tocline = trim( $tocline );
3724 -
3725 - # For the anchor, strip out HTML-y stuff period
3726 - $safeHeadline = preg_replace( '/<.*?'.'>/', '', $safeHeadline );
3727 - $safeHeadline = trim( $safeHeadline );
3728 -
3729 - # Save headline for section edit hint before it's escaped
3730 - $headlineHint = $safeHeadline;
3731 -
3732 - if ( $wgEnforceHtmlIds ) {
3733 - $legacyHeadline = false;
3734 - $safeHeadline = Sanitizer::escapeId( $safeHeadline,
3735 - 'noninitial' );
3736 - } else {
3737 - # For reverse compatibility, provide an id that's
3738 - # HTML4-compatible, like we used to.
3739 - #
3740 - # It may be worth noting, academically, that it's possible for
3741 - # the legacy anchor to conflict with a non-legacy headline
3742 - # anchor on the page. In this case likely the "correct" thing
3743 - # would be to either drop the legacy anchors or make sure
3744 - # they're numbered first. However, this would require people
3745 - # to type in section names like "abc_.D7.93.D7.90.D7.A4"
3746 - # manually, so let's not bother worrying about it.
3747 - $legacyHeadline = Sanitizer::escapeId( $safeHeadline,
3748 - 'noninitial' );
3749 - $safeHeadline = Sanitizer::escapeId( $safeHeadline, 'xml' );
3750 -
3751 - if ( $legacyHeadline == $safeHeadline ) {
3752 - # No reason to have both (in fact, we can't)
3753 - $legacyHeadline = false;
3754 - } elseif ( $legacyHeadline != Sanitizer::escapeId(
3755 - $legacyHeadline, 'xml' ) ) {
3756 - # The legacy id is invalid XML. We used to allow this, but
3757 - # there's no reason to do so anymore. Backward
3758 - # compatibility will fail slightly in this case, but it's
3759 - # no big deal.
3760 - $legacyHeadline = false;
3761 - }
3762 - }
3763 -
3764 - return array( $safeHeadline, $legacyHeadline, $tocline,
3765 - $headlineHint );
3766 - }
3767 -
37683760 /**
37693761 * Transform wiki markup when saving a page by doing \r\n -> \n
37703762 * conversion, substitting signatures, {{subst:}} templates, etc.
@@ -4744,9 +4736,21 @@
47454737 * "== Header ==".
47464738 */
47474739 public function guessSectionNameFromWikiText( $text ) {
 4740+ # Strip out wikitext links(they break the anchor)
47484741 $text = $this->stripSectionName( $text );
4749 - list( $text, /* unneeded here */ ) = $this->processHeadingText( $text );
4750 - return "#$text";
 4742+ $headline = Sanitizer::decodeCharReferences( $text );
 4743+ # strip out HTML
 4744+ $headline = StringUtils::delimiterReplace( '<', '>', '', $headline );
 4745+ $headline = trim( $headline );
 4746+ $sectionanchor = '#' . urlencode( str_replace( ' ', '_', $headline ) );
 4747+ $replacearray = array(
 4748+ '%3A' => ':',
 4749+ '%' => '.'
 4750+ );
 4751+ return str_replace(
 4752+ array_keys( $replacearray ),
 4753+ array_values( $replacearray ),
 4754+ $sectionanchor );
47514755 }
47524756
47534757 /**

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r45473Reduce code duplication correctly this time...simetrical00:41, 7 January 2009

Status & tagging log