r45473 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r45472‎ \| r45473 \| r45474 >
Date:	00:41, 7 January 2009
Author:	simetrical
Status:	reverted (Comments)
Tags:
Comment:	Reduce code duplication correctly this time This reverts r45470 and fixes the problems it identified. Things should now work as they always used to, but with less code duplication, and with $wgEnforceHtmlIds = false working correctly as well.
Modified paths:	/trunk/phase3/includes/parser/Parser.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/parser/Parser.php
—	—	@@ -3448,7 +3448,7 @@
3449	3449	* @private
3450	3450	*/
3451	3451	function formatHeadings( $text, $isMain=true ) {
3452		~~- global $wgMaxTocLevel, $wgContLang, $wgEnforceHtmlIds;~~
	3452	+ global $wgMaxTocLevel, $wgContLang;
3453	3453
3454	3454	$doNumberHeadings = $this->mOptions->getNumberHeadings();
3455	3455	$showEditLink = $this->mOptions->getEditSection();
—	—	@@ -3593,71 +3593,17 @@
3594	3594	}
3595	3595	}
3596	3596
3597		~~- # The safe header is a version of the header text safe to use for links~~
3598		~~- # Avoid insertion of weird stuff like <math> by expanding the relevant sections~~
3599		~~- $safeHeadline = $this->mStripState->unstripBoth( $headline );~~
	3597	+ list( $anchor, $legacyAnchor, $tocline, $headlineHint ) =
	3598	+ $this->processHeadingText( $headline );
3600	3599
3601		~~- # Remove link placeholders by the link text.~~
3602		~~- # <!--LINK number-->~~
3603		~~- # turns into~~
3604		~~- # link text with suffix~~
3605		~~- $safeHeadline = $this->replaceLinkHoldersText( $safeHeadline );~~
3606		-
3607		~~- # Strip out HTML (other than plain <sup> and <sub>: bug 8393)~~
3608		~~- $tocline = preg_replace(~~
3609		~~- array( '#<(?!/?(sup\|sub)).?'.'>#', '#<(/?(sup\|sub)).?'.'>#' ),~~
3610		~~- array( '', '<$1>'),~~
3611		~~- $safeHeadline~~
3612		~~- );~~
3613		~~- $tocline = trim( $tocline );~~
3614		-
3615		~~- # For the anchor, strip out HTML-y stuff period~~
3616		~~- $safeHeadline = preg_replace( '/<.*?'.'>/', '', $safeHeadline );~~
3617		~~- $safeHeadline = trim( $safeHeadline );~~
3618		-
3619		~~- # Save headline for section edit hint before it's escaped~~
3620		~~- $headlineHint = $safeHeadline;~~
3621		-
3622		~~- if ( $wgEnforceHtmlIds ) {~~
3623		~~- $legacyHeadline = false;~~
3624		~~- $safeHeadline = Sanitizer::escapeId( $safeHeadline,~~
3625		~~- 'noninitial' );~~
3626		~~- } else {~~
3627		~~- # For reverse compatibility, provide an id that's~~
3628		~~- # HTML4-compatible, like we used to.~~
3629		~~- #~~
3630		~~- # It may be worth noting, academically, that it's possible for~~
3631		~~- # the legacy anchor to conflict with a non-legacy headline~~
3632		~~- # anchor on the page. In this case likely the "correct" thing~~
3633		~~- # would be to either drop the legacy anchors or make sure~~
3634		~~- # they're numbered first. However, this would require people~~
3635		~~- # to type in section names like "abc_.D7.93.D7.90.D7.A4"~~
3636		~~- # manually, so let's not bother worrying about it.~~
3637		~~- $legacyHeadline = Sanitizer::escapeId( $safeHeadline,~~
3638		~~- 'noninitial' );~~
3639		~~- $safeHeadline = Sanitizer::escapeId( $safeHeadline, 'xml' );~~
3640		-
3641		~~- if ( $legacyHeadline == $safeHeadline ) {~~
3642		~~- # No reason to have both (in fact, we can't)~~
3643		~~- $legacyHeadline = false;~~
3644		~~- } elseif ( $legacyHeadline != Sanitizer::escapeId(~~
3645		~~- $legacyHeadline, 'xml' ) ) {~~
3646		~~- # The legacy id is invalid XML. We used to allow this, but~~
3647		~~- # there's no reason to do so anymore. Backward~~
3648		~~- # compatibility will fail slightly in this case, but it's~~
3649		~~- # no big deal.~~
3650		~~- $legacyHeadline = false;~~
3651		~~- }~~
3652		~~- }~~
3653		-
3654	3600	# HTML names must be case-insensitively unique (bug 10721). FIXME:
3655	3601	# Does this apply to Unicode characters? Because we aren't
3656	3602	# handling those here.
3657		~~- $arrayKey = strtolower( $safeHeadline );~~
3658		~~- if ( $legacyHeadline === false ) {~~
	3603	+ $arrayKey = strtolower( $anchor );
	3604	+ if ( $legacyAnchor === false ) {
3659	3605	$legacyArrayKey = false;
3660	3606	} else {
3661		~~- $legacyArrayKey = strtolower( $legacyHeadline );~~
	3607	+ $legacyArrayKey = strtolower( $legacyAnchor );
3662	3608	}
3663	3609
3664	3610	# count how many in assoc. array so we can track dupes in anchors
—	—	@@ -3679,12 +3625,10 @@
3680	3626	}
3681	3627
3682	3628	# Create the anchor for linking from the TOC to the section
3683		~~- $anchor = $safeHeadline;~~
3684		~~- $legacyAnchor = $legacyHeadline;~~
3685	3629	if ( $refers[$arrayKey] > 1 ) {
3686	3630	$anchor .= '_' . $refers[$arrayKey];
3687	3631	}
3688		~~- if ( $legacyHeadline !== false && $refers[$legacyArrayKey] > 1 ) {~~
	3632	+ if ( $legacyAnchor !== false && $refers[$legacyArrayKey] > 1 ) {
3689	3633	$legacyAnchor .= '_' . $refers[$legacyArrayKey];
3690	3634	}
3691	3635	if( $enoughToc && ( !isset($wgMaxTocLevel) \|\| $toclevel<$wgMaxTocLevel ) ) {
—	—	@@ -3756,6 +3700,70 @@
3757	3701	}
3758	3702	}
3759	3703
	3704	+ private function processHeadingText( $headline ) {
	3705	+ global $wgEnforceHtmlIds;
	3706	+
	3707	+ # The safe header is a version of the header text safe to use for links
	3708	+ # Avoid insertion of weird stuff like <math> by expanding the relevant sections
	3709	+ $safeHeadline = $this->mStripState->unstripBoth( $headline );
	3710	+
	3711	+ # Remove link placeholders by the link text.
	3712	+ # <!--LINK number-->
	3713	+ # turns into
	3714	+ # link text with suffix
	3715	+ $safeHeadline = $this->replaceLinkHoldersText( $safeHeadline );
	3716	+
	3717	+ # Strip out HTML (other than plain <sup> and <sub>: bug 8393)
	3718	+ $tocline = preg_replace(
	3719	+ array( '#<(?!/?(sup\|sub)).?'.'>#', '#<(/?(sup\|sub)).?'.'>#' ),
	3720	+ array( '', '<$1>'),
	3721	+ $safeHeadline
	3722	+ );
	3723	+ $tocline = trim( $tocline );
	3724	+
	3725	+ # For the anchor, strip out HTML-y stuff period
	3726	+ $safeHeadline = preg_replace( '/<.*?'.'>/', '', $safeHeadline );
	3727	+ $safeHeadline = trim( $safeHeadline );
	3728	+
	3729	+ # Save headline for section edit hint before it's escaped
	3730	+ $headlineHint = $safeHeadline;
	3731	+
	3732	+ if ( $wgEnforceHtmlIds ) {
	3733	+ $legacyHeadline = false;
	3734	+ $safeHeadline = Sanitizer::escapeId( $safeHeadline,
	3735	+ 'noninitial' );
	3736	+ } else {
	3737	+ # For reverse compatibility, provide an id that's
	3738	+ # HTML4-compatible, like we used to.
	3739	+ #
	3740	+ # It may be worth noting, academically, that it's possible for
	3741	+ # the legacy anchor to conflict with a non-legacy headline
	3742	+ # anchor on the page. In this case likely the "correct" thing
	3743	+ # would be to either drop the legacy anchors or make sure
	3744	+ # they're numbered first. However, this would require people
	3745	+ # to type in section names like "abc_.D7.93.D7.90.D7.A4"
	3746	+ # manually, so let's not bother worrying about it.
	3747	+ $legacyHeadline = Sanitizer::escapeId( $safeHeadline,
	3748	+ 'noninitial' );
	3749	+ $safeHeadline = Sanitizer::escapeId( $safeHeadline, 'xml' );
	3750	+
	3751	+ if ( $legacyHeadline == $safeHeadline ) {
	3752	+ # No reason to have both (in fact, we can't)
	3753	+ $legacyHeadline = false;
	3754	+ } elseif ( $legacyHeadline != Sanitizer::escapeId(
	3755	+ $legacyHeadline, 'xml' ) ) {
	3756	+ # The legacy id is invalid XML. We used to allow this, but
	3757	+ # there's no reason to do so anymore. Backward
	3758	+ # compatibility will fail slightly in this case, but it's
	3759	+ # no big deal.
	3760	+ $legacyHeadline = false;
	3761	+ }
	3762	+ }
	3763	+
	3764	+ return array( $safeHeadline, $legacyHeadline, $tocline,
	3765	+ $headlineHint );
	3766	+ }
	3767	+
3760	3768	/**
3761	3769	* Transform wiki markup when saving a page by doing \r\n -> \n
3762	3770	* conversion, substitting signatures, {{subst:}} templates, etc.
—	—	@@ -4736,21 +4744,9 @@
4737	4745	* "== Header ==".
4738	4746	*/
4739	4747	public function guessSectionNameFromWikiText( $text ) {
4740		~~- # Strip out wikitext links(they break the anchor)~~
4741	4748	$text = $this->stripSectionName( $text );
4742		~~- $headline = Sanitizer::decodeCharReferences( $text );~~
4743		~~- # strip out HTML~~
4744		~~- $headline = StringUtils::delimiterReplace( '<', '>', '', $headline );~~
4745		~~- $headline = trim( $headline );~~
4746		~~- $sectionanchor = '#' . urlencode( str_replace( ' ', '_', $headline ) );~~
4747		~~- $replacearray = array(~~
4748		~~- '%3A' => ':',~~
4749		~~- '%' => '.'~~
4750		~~- );~~
4751		~~- return str_replace(~~
4752		~~- array_keys( $replacearray ),~~
4753		~~- array_values( $replacearray ),~~
4754		~~- $sectionanchor );~~
	4749	+ list( $text, /* unneeded here */ ) = $this->processHeadingText( $text );
	4750	+ return "#$text";
4755	4751	}
4756	4752
4757	4753	/**

Follow-up revisions

Revision	Commit summary	Author	Date
r45487	Pulling r45473 back for now "Reduce code duplication correctly this time"...	brion	04:23, 7 January 2009

Past revisions this follows-up on

Revision	Commit summary	Author	Date
r45470	Reverting r45341 " Use Sanitizer::escapeId() in another place Reduce code dup...	brion	00:00, 7 January 2009

Comments

#Comment by Brion VIBBER (talk | contribs) 04:24, 7 January 2009

Reverted in r45487

Let's hold off on further section anchor generation changes until we have decent test cases covering the different ways we we stuff through...

Status & tagging log

04:24, 7 January 2009 Brion VIBBER (talk | contribs) changed the status of r45473 [removed: new added: reverted]