r45646 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r45645‎ | r45646 | r45647 >
Date:17:16, 10 January 2009
Author:nikerabbit
Status:ok
Tags:
Comment:
* Reverting r45588, causes fatal errors when saving new sections
Modified paths:
  • /trunk/phase3/includes/parser/Parser.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/parser/Parser.php
@@ -3449,7 +3449,7 @@
34503450 * @private
34513451 */
34523452 function formatHeadings( $text, $isMain=true ) {
3453 - global $wgMaxTocLevel, $wgContLang;
 3453+ global $wgMaxTocLevel, $wgContLang, $wgEnforceHtmlIds;
34543454
34553455 $doNumberHeadings = $this->mOptions->getNumberHeadings();
34563456 $showEditLink = $this->mOptions->getEditSection();
@@ -3594,17 +3594,71 @@
35953595 }
35963596 }
35973597
3598 - list( $anchor, $legacyAnchor, $tocline, $headlineHint ) =
3599 - $this->processHeadingText( $headline );
 3598+ # The safe header is a version of the header text safe to use for links
 3599+ # Avoid insertion of weird stuff like <math> by expanding the relevant sections
 3600+ $safeHeadline = $this->mStripState->unstripBoth( $headline );
36003601
 3602+ # Remove link placeholders by the link text.
 3603+ # <!--LINK number-->
 3604+ # turns into
 3605+ # link text with suffix
 3606+ $safeHeadline = $this->replaceLinkHoldersText( $safeHeadline );
 3607+
 3608+ # Strip out HTML (other than plain <sup> and <sub>: bug 8393)
 3609+ $tocline = preg_replace(
 3610+ array( '#<(?!/?(sup|sub)).*?'.'>#', '#<(/?(sup|sub)).*?'.'>#' ),
 3611+ array( '', '<$1>'),
 3612+ $safeHeadline
 3613+ );
 3614+ $tocline = trim( $tocline );
 3615+
 3616+ # For the anchor, strip out HTML-y stuff period
 3617+ $safeHeadline = preg_replace( '/<.*?'.'>/', '', $safeHeadline );
 3618+ $safeHeadline = trim( $safeHeadline );
 3619+
 3620+ # Save headline for section edit hint before it's escaped
 3621+ $headlineHint = $safeHeadline;
 3622+
 3623+ if ( $wgEnforceHtmlIds ) {
 3624+ $legacyHeadline = false;
 3625+ $safeHeadline = Sanitizer::escapeId( $safeHeadline,
 3626+ 'noninitial' );
 3627+ } else {
 3628+ # For reverse compatibility, provide an id that's
 3629+ # HTML4-compatible, like we used to.
 3630+ #
 3631+ # It may be worth noting, academically, that it's possible for
 3632+ # the legacy anchor to conflict with a non-legacy headline
 3633+ # anchor on the page. In this case likely the "correct" thing
 3634+ # would be to either drop the legacy anchors or make sure
 3635+ # they're numbered first. However, this would require people
 3636+ # to type in section names like "abc_.D7.93.D7.90.D7.A4"
 3637+ # manually, so let's not bother worrying about it.
 3638+ $legacyHeadline = Sanitizer::escapeId( $safeHeadline,
 3639+ 'noninitial' );
 3640+ $safeHeadline = Sanitizer::escapeId( $safeHeadline, 'xml' );
 3641+
 3642+ if ( $legacyHeadline == $safeHeadline ) {
 3643+ # No reason to have both (in fact, we can't)
 3644+ $legacyHeadline = false;
 3645+ } elseif ( $legacyHeadline != Sanitizer::escapeId(
 3646+ $legacyHeadline, 'xml' ) ) {
 3647+ # The legacy id is invalid XML. We used to allow this, but
 3648+ # there's no reason to do so anymore. Backward
 3649+ # compatibility will fail slightly in this case, but it's
 3650+ # no big deal.
 3651+ $legacyHeadline = false;
 3652+ }
 3653+ }
 3654+
36013655 # HTML names must be case-insensitively unique (bug 10721). FIXME:
36023656 # Does this apply to Unicode characters? Because we aren't
36033657 # handling those here.
3604 - $arrayKey = strtolower( $anchor );
3605 - if ( $legacyAnchor === false ) {
 3658+ $arrayKey = strtolower( $safeHeadline );
 3659+ if ( $legacyHeadline === false ) {
36063660 $legacyArrayKey = false;
36073661 } else {
3608 - $legacyArrayKey = strtolower( $legacyAnchor );
 3662+ $legacyArrayKey = strtolower( $legacyHeadline );
36093663 }
36103664
36113665 # count how many in assoc. array so we can track dupes in anchors
@@ -3626,10 +3680,12 @@
36273681 }
36283682
36293683 # Create the anchor for linking from the TOC to the section
 3684+ $anchor = $safeHeadline;
 3685+ $legacyAnchor = $legacyHeadline;
36303686 if ( $refers[$arrayKey] > 1 ) {
36313687 $anchor .= '_' . $refers[$arrayKey];
36323688 }
3633 - if ( $legacyAnchor !== false && $refers[$legacyArrayKey] > 1 ) {
 3689+ if ( $legacyHeadline !== false && $refers[$legacyArrayKey] > 1 ) {
36343690 $legacyAnchor .= '_' . $refers[$legacyArrayKey];
36353691 }
36363692 if( $enoughToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) {
@@ -3701,70 +3757,6 @@
37023758 }
37033759 }
37043760
3705 - private function processHeadingText( $headline ) {
3706 - global $wgEnforceHtmlIds;
3707 -
3708 - # The safe header is a version of the header text safe to use for links
3709 - # Avoid insertion of weird stuff like <math> by expanding the relevant sections
3710 - $safeHeadline = $this->mStripState->unstripBoth( $headline );
3711 -
3712 - # Remove link placeholders by the link text.
3713 - # <!--LINK number-->
3714 - # turns into
3715 - # link text with suffix
3716 - $safeHeadline = $this->replaceLinkHoldersText( $safeHeadline );
3717 -
3718 - # Strip out HTML (other than plain <sup> and <sub>: bug 8393)
3719 - $tocline = preg_replace(
3720 - array( '#<(?!/?(sup|sub)).*?'.'>#', '#<(/?(sup|sub)).*?'.'>#' ),
3721 - array( '', '<$1>'),
3722 - $safeHeadline
3723 - );
3724 - $tocline = trim( $tocline );
3725 -
3726 - # For the anchor, strip out HTML-y stuff period
3727 - $safeHeadline = preg_replace( '/<.*?'.'>/', '', $safeHeadline );
3728 - $safeHeadline = trim( $safeHeadline );
3729 -
3730 - # Save headline for section edit hint before it's escaped
3731 - $headlineHint = $safeHeadline;
3732 -
3733 - if ( $wgEnforceHtmlIds ) {
3734 - $legacyHeadline = false;
3735 - $safeHeadline = Sanitizer::escapeId( $safeHeadline,
3736 - 'noninitial' );
3737 - } else {
3738 - # For reverse compatibility, provide an id that's
3739 - # HTML4-compatible, like we used to.
3740 - #
3741 - # It may be worth noting, academically, that it's possible for
3742 - # the legacy anchor to conflict with a non-legacy headline
3743 - # anchor on the page. In this case likely the "correct" thing
3744 - # would be to either drop the legacy anchors or make sure
3745 - # they're numbered first. However, this would require people
3746 - # to type in section names like "abc_.D7.93.D7.90.D7.A4"
3747 - # manually, so let's not bother worrying about it.
3748 - $legacyHeadline = Sanitizer::escapeId( $safeHeadline,
3749 - 'noninitial' );
3750 - $safeHeadline = Sanitizer::escapeId( $safeHeadline, 'xml' );
3751 -
3752 - if ( $legacyHeadline == $safeHeadline ) {
3753 - # No reason to have both (in fact, we can't)
3754 - $legacyHeadline = false;
3755 - } elseif ( $legacyHeadline != Sanitizer::escapeId(
3756 - $legacyHeadline, 'xml' ) ) {
3757 - # The legacy id is invalid XML. We used to allow this, but
3758 - # there's no reason to do so anymore. Backward
3759 - # compatibility will fail slightly in this case, but it's
3760 - # no big deal.
3761 - $legacyHeadline = false;
3762 - }
3763 - }
3764 -
3765 - return array( $safeHeadline, $legacyHeadline, $tocline,
3766 - $headlineHint );
3767 - }
3768 -
37693761 /**
37703762 * Transform wiki markup when saving a page by doing \r\n -> \n
37713763 * conversion, substitting signatures, {{subst:}} templates, etc.
@@ -4745,9 +4737,21 @@
47464738 * "== Header ==".
47474739 */
47484740 public function guessSectionNameFromWikiText( $text ) {
 4741+ # Strip out wikitext links(they break the anchor)
47494742 $text = $this->stripSectionName( $text );
4750 - list( $text, /* unneeded here */ ) = $this->processHeadingText( $text );
4751 - return "#$text";
 4743+ $headline = Sanitizer::decodeCharReferences( $text );
 4744+ # strip out HTML
 4745+ $headline = StringUtils::delimiterReplace( '<', '>', '', $headline );
 4746+ $headline = trim( $headline );
 4747+ $sectionanchor = '#' . urlencode( str_replace( ' ', '_', $headline ) );
 4748+ $replacearray = array(
 4749+ '%3A' => ':',
 4750+ '%' => '.'
 4751+ );
 4752+ return str_replace(
 4753+ array_keys( $replacearray ),
 4754+ array_values( $replacearray ),
 4755+ $sectionanchor );
47524756 }
47534757
47544758 /**

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r45588Reduce code duplication correctly this time, again...simetrical23:59, 8 January 2009

Status & tagging log