r45473 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r45472‎ | r45473 | r45474 >
Date:00:41, 7 January 2009
Author:simetrical
Status:reverted (Comments)
Tags:
Comment:
Reduce code duplication correctly this time

This reverts r45470 and fixes the problems it identified. Things should
now work as they always used to, but with less code duplication, and
with $wgEnforceHtmlIds = false working correctly as well.
Modified paths:
  • /trunk/phase3/includes/parser/Parser.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/parser/Parser.php
@@ -3448,7 +3448,7 @@
34493449 * @private
34503450 */
34513451 function formatHeadings( $text, $isMain=true ) {
3452 - global $wgMaxTocLevel, $wgContLang, $wgEnforceHtmlIds;
 3452+ global $wgMaxTocLevel, $wgContLang;
34533453
34543454 $doNumberHeadings = $this->mOptions->getNumberHeadings();
34553455 $showEditLink = $this->mOptions->getEditSection();
@@ -3593,71 +3593,17 @@
35943594 }
35953595 }
35963596
3597 - # The safe header is a version of the header text safe to use for links
3598 - # Avoid insertion of weird stuff like <math> by expanding the relevant sections
3599 - $safeHeadline = $this->mStripState->unstripBoth( $headline );
 3597+ list( $anchor, $legacyAnchor, $tocline, $headlineHint ) =
 3598+ $this->processHeadingText( $headline );
36003599
3601 - # Remove link placeholders by the link text.
3602 - # <!--LINK number-->
3603 - # turns into
3604 - # link text with suffix
3605 - $safeHeadline = $this->replaceLinkHoldersText( $safeHeadline );
3606 -
3607 - # Strip out HTML (other than plain <sup> and <sub>: bug 8393)
3608 - $tocline = preg_replace(
3609 - array( '#<(?!/?(sup|sub)).*?'.'>#', '#<(/?(sup|sub)).*?'.'>#' ),
3610 - array( '', '<$1>'),
3611 - $safeHeadline
3612 - );
3613 - $tocline = trim( $tocline );
3614 -
3615 - # For the anchor, strip out HTML-y stuff period
3616 - $safeHeadline = preg_replace( '/<.*?'.'>/', '', $safeHeadline );
3617 - $safeHeadline = trim( $safeHeadline );
3618 -
3619 - # Save headline for section edit hint before it's escaped
3620 - $headlineHint = $safeHeadline;
3621 -
3622 - if ( $wgEnforceHtmlIds ) {
3623 - $legacyHeadline = false;
3624 - $safeHeadline = Sanitizer::escapeId( $safeHeadline,
3625 - 'noninitial' );
3626 - } else {
3627 - # For reverse compatibility, provide an id that's
3628 - # HTML4-compatible, like we used to.
3629 - #
3630 - # It may be worth noting, academically, that it's possible for
3631 - # the legacy anchor to conflict with a non-legacy headline
3632 - # anchor on the page. In this case likely the "correct" thing
3633 - # would be to either drop the legacy anchors or make sure
3634 - # they're numbered first. However, this would require people
3635 - # to type in section names like "abc_.D7.93.D7.90.D7.A4"
3636 - # manually, so let's not bother worrying about it.
3637 - $legacyHeadline = Sanitizer::escapeId( $safeHeadline,
3638 - 'noninitial' );
3639 - $safeHeadline = Sanitizer::escapeId( $safeHeadline, 'xml' );
3640 -
3641 - if ( $legacyHeadline == $safeHeadline ) {
3642 - # No reason to have both (in fact, we can't)
3643 - $legacyHeadline = false;
3644 - } elseif ( $legacyHeadline != Sanitizer::escapeId(
3645 - $legacyHeadline, 'xml' ) ) {
3646 - # The legacy id is invalid XML. We used to allow this, but
3647 - # there's no reason to do so anymore. Backward
3648 - # compatibility will fail slightly in this case, but it's
3649 - # no big deal.
3650 - $legacyHeadline = false;
3651 - }
3652 - }
3653 -
36543600 # HTML names must be case-insensitively unique (bug 10721). FIXME:
36553601 # Does this apply to Unicode characters? Because we aren't
36563602 # handling those here.
3657 - $arrayKey = strtolower( $safeHeadline );
3658 - if ( $legacyHeadline === false ) {
 3603+ $arrayKey = strtolower( $anchor );
 3604+ if ( $legacyAnchor === false ) {
36593605 $legacyArrayKey = false;
36603606 } else {
3661 - $legacyArrayKey = strtolower( $legacyHeadline );
 3607+ $legacyArrayKey = strtolower( $legacyAnchor );
36623608 }
36633609
36643610 # count how many in assoc. array so we can track dupes in anchors
@@ -3679,12 +3625,10 @@
36803626 }
36813627
36823628 # Create the anchor for linking from the TOC to the section
3683 - $anchor = $safeHeadline;
3684 - $legacyAnchor = $legacyHeadline;
36853629 if ( $refers[$arrayKey] > 1 ) {
36863630 $anchor .= '_' . $refers[$arrayKey];
36873631 }
3688 - if ( $legacyHeadline !== false && $refers[$legacyArrayKey] > 1 ) {
 3632+ if ( $legacyAnchor !== false && $refers[$legacyArrayKey] > 1 ) {
36893633 $legacyAnchor .= '_' . $refers[$legacyArrayKey];
36903634 }
36913635 if( $enoughToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) {
@@ -3756,6 +3700,70 @@
37573701 }
37583702 }
37593703
 3704+ private function processHeadingText( $headline ) {
 3705+ global $wgEnforceHtmlIds;
 3706+
 3707+ # The safe header is a version of the header text safe to use for links
 3708+ # Avoid insertion of weird stuff like <math> by expanding the relevant sections
 3709+ $safeHeadline = $this->mStripState->unstripBoth( $headline );
 3710+
 3711+ # Remove link placeholders by the link text.
 3712+ # <!--LINK number-->
 3713+ # turns into
 3714+ # link text with suffix
 3715+ $safeHeadline = $this->replaceLinkHoldersText( $safeHeadline );
 3716+
 3717+ # Strip out HTML (other than plain <sup> and <sub>: bug 8393)
 3718+ $tocline = preg_replace(
 3719+ array( '#<(?!/?(sup|sub)).*?'.'>#', '#<(/?(sup|sub)).*?'.'>#' ),
 3720+ array( '', '<$1>'),
 3721+ $safeHeadline
 3722+ );
 3723+ $tocline = trim( $tocline );
 3724+
 3725+ # For the anchor, strip out HTML-y stuff period
 3726+ $safeHeadline = preg_replace( '/<.*?'.'>/', '', $safeHeadline );
 3727+ $safeHeadline = trim( $safeHeadline );
 3728+
 3729+ # Save headline for section edit hint before it's escaped
 3730+ $headlineHint = $safeHeadline;
 3731+
 3732+ if ( $wgEnforceHtmlIds ) {
 3733+ $legacyHeadline = false;
 3734+ $safeHeadline = Sanitizer::escapeId( $safeHeadline,
 3735+ 'noninitial' );
 3736+ } else {
 3737+ # For reverse compatibility, provide an id that's
 3738+ # HTML4-compatible, like we used to.
 3739+ #
 3740+ # It may be worth noting, academically, that it's possible for
 3741+ # the legacy anchor to conflict with a non-legacy headline
 3742+ # anchor on the page. In this case likely the "correct" thing
 3743+ # would be to either drop the legacy anchors or make sure
 3744+ # they're numbered first. However, this would require people
 3745+ # to type in section names like "abc_.D7.93.D7.90.D7.A4"
 3746+ # manually, so let's not bother worrying about it.
 3747+ $legacyHeadline = Sanitizer::escapeId( $safeHeadline,
 3748+ 'noninitial' );
 3749+ $safeHeadline = Sanitizer::escapeId( $safeHeadline, 'xml' );
 3750+
 3751+ if ( $legacyHeadline == $safeHeadline ) {
 3752+ # No reason to have both (in fact, we can't)
 3753+ $legacyHeadline = false;
 3754+ } elseif ( $legacyHeadline != Sanitizer::escapeId(
 3755+ $legacyHeadline, 'xml' ) ) {
 3756+ # The legacy id is invalid XML. We used to allow this, but
 3757+ # there's no reason to do so anymore. Backward
 3758+ # compatibility will fail slightly in this case, but it's
 3759+ # no big deal.
 3760+ $legacyHeadline = false;
 3761+ }
 3762+ }
 3763+
 3764+ return array( $safeHeadline, $legacyHeadline, $tocline,
 3765+ $headlineHint );
 3766+ }
 3767+
37603768 /**
37613769 * Transform wiki markup when saving a page by doing \r\n -> \n
37623770 * conversion, substitting signatures, {{subst:}} templates, etc.
@@ -4736,21 +4744,9 @@
47374745 * "== Header ==".
47384746 */
47394747 public function guessSectionNameFromWikiText( $text ) {
4740 - # Strip out wikitext links(they break the anchor)
47414748 $text = $this->stripSectionName( $text );
4742 - $headline = Sanitizer::decodeCharReferences( $text );
4743 - # strip out HTML
4744 - $headline = StringUtils::delimiterReplace( '<', '>', '', $headline );
4745 - $headline = trim( $headline );
4746 - $sectionanchor = '#' . urlencode( str_replace( ' ', '_', $headline ) );
4747 - $replacearray = array(
4748 - '%3A' => ':',
4749 - '%' => '.'
4750 - );
4751 - return str_replace(
4752 - array_keys( $replacearray ),
4753 - array_values( $replacearray ),
4754 - $sectionanchor );
 4749+ list( $text, /* unneeded here */ ) = $this->processHeadingText( $text );
 4750+ return "#$text";
47554751 }
47564752
47574753 /**

Follow-up revisions

RevisionCommit summaryAuthorDate
r45487Pulling r45473 back for now "Reduce code duplication correctly this time"...brion04:23, 7 January 2009

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r45470Reverting r45341 " Use Sanitizer::escapeId() in another place Reduce code dup...brion00:00, 7 January 2009

Comments

#Comment by Brion VIBBER (talk | contribs)   04:24, 7 January 2009

Reverted in r45487

Let's hold off on further section anchor generation changes until we have decent test cases covering the different ways we we stuff through...

Status & tagging log