r62069 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r62068‎ \| r62069 \| r62070 >
Date:	15:00, 6 February 2010
Author:	conrad
Status:	reverted
Tags:
Comment:	Allow pipe trick to work after PST. Fixes bug 4099, bug 8785, partially bug 16714, bug 2700.
Modified paths:	/trunk/phase3/RELEASE-NOTES (modified) (history) /trunk/phase3/includes/parser/Parser.php (modified) (history) /trunk/phase3/maintenance/parserTests.txt (modified) (history)

Diff [purge]

Index: trunk/phase3/maintenance/parserTests.txt
—	—	@@ -3112,7 +3112,66 @@
3113	3113	[[Ns:Article, Context\|Article]]
3114	3114	!! end
3115	3115
	3116	+!! test
	3117	+pre-save transform: context links ("pipe trick") with url escaped page names
	3118	+!! options
	3119	+pst
	3120	+!! input
	3121	+[[Hello wo%52ld\|]]
	3122	+[[Hello wo%52ld (again)\|]]
	3123	+!! result
	3124	+[[Hello wo%52ld\|Hello woRld]]
	3125	+[[Hello wo%52ld (again)\|Hello woRld]]
	3126	+!! end
3116	3127
	3128	+!! test
	3129	+pre-save transform: context links ("pipe trick") with variables are not pre-empted
	3130	+!! options
	3131	+pst title=[[Test (page)]]
	3132	+!! input
	3133	+[[{{{1\|}}}\|]]
	3134	+[[\|{{{1\|}}}]]
	3135	+[[{{subst:PAGENAME}}\|]]
	3136	+!! result
	3137	+[[{{{1\|}}}\|]]
	3138	+[[\|{{{1\|}}}]]
	3139	+[[Test (page)\|Test]]
	3140	+!! end
	3141	+
	3142	+!! article
	3143	+Template:pipetest
	3144	+!! text
	3145	+[[{{{1}}}\|]]
	3146	+!! endarticle
	3147	+
	3148	+!! article
	3149	+Template:testpipe
	3150	+!! text
	3151	+[[\|{{{1}}}]]
	3152	+!! endarticle
	3153	+
	3154	+!! test
	3155	+("pipe trick") should work outside PST
	3156	+!!options
	3157	+title=[[Help:hello (world)]]
	3158	+!! input
	3159	+{{pipetest\|hi (world)}}
	3160	+{{pipetest\|hi (world), world}}
	3161	+{{pipetest\|Help:hi (world), world}}
	3162	+{{pipetest\|:Help:hi (world), world}}
	3163	+{{testpipe\|hi}}
	3164	+[[{{PAGENAME}}\|]]
	3165	+!! result
	3166	+<p><a href="https://www.mediawiki.org/index.php?title=Hi_(world)&action=edit&redlink=1" class="new" title="Hi (world) (page does not exist)">hi</a>
	3167	+<a href="https://www.mediawiki.org/index.php?title=Hi_(world),_world&action=edit&redlink=1" class="new" title="Hi (world), world (page does not exist)">hi</a>
	3168	+<a href="https://www.mediawiki.org/index.php?title=Help:Hi_(world),_world&action=edit&redlink=1" class="new" title="Help:Hi (world), world (page does not exist)">hi</a>
	3169	+<a href="https://www.mediawiki.org/index.php?title=Help:Hi_(world),_world&action=edit&redlink=1" class="new" title="Help:Hi (world), world (page does not exist)">hi</a>
	3170	+<a href="https://www.mediawiki.org/index.php?title=Hi_(world)&action=edit&redlink=1" class="new" title="Hi (world) (page does not exist)">hi</a>
	3171	+<a href="https://www.mediawiki.org/index.php?title=Hello_(world)&action=edit&redlink=1" class="new" title="Hello (world) (page does not exist)">Hello</a>
	3172	+</p>
	3173	+!! end
	3174	+
	3175	+
3117	3176	###
3118	3177	### Message transform tests
3119	3178	###
Index: trunk/phase3/includes/parser/Parser.php
—	—	@@ -1511,7 +1511,7 @@
1512	1512	if ( !$tc ) {
1513	1513	$tc = Title::legalChars() . '#%';
1514	1514	# Match a link having the form [[namespace:link\|alternate]]trail
1515		~~- $e1 = "/^([{$tc}]+)(?:\\\|(.+?))?]](.*)\$/sD";~~
	1515	+ $e1 = "/^([{$tc}])(\\\|.?)?]](.*)\$/sD";
1516	1516	# Match cases where there is no "]]", which might still be images
1517	1517	$e1_img = "/^([{$tc}]+)\\\|(.*)\$/sD";
1518	1518	}
—	—	@@ -1591,7 +1591,15 @@
1592	1592
1593	1593	wfProfileIn( __METHOD__."-e1" );
1594	1594	if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
1595		~~- $text = $m[2];~~
	1595	+
	1596	+ if( $m[2] === '' ) {
	1597	+ $text = '';
	1598	+ } elseif( $m[2] === '\|' ) {
	1599	+ $text = $this->getPipeTrickText( $m[1] );
	1600	+ } else {
	1601	+ $text = substr( $m[2], 1 );
	1602	+ }
	1603	+
1596	1604	# If we get a ] at the beginning of $m[3] that means we have a link that's something like:
1597	1605	# [[Image:Foo.jpg\|[http://example.com desc]]] <- having three ] in a row fucks up,
1598	1606	# the real problem is with the $e1 regex
—	—	@@ -1608,18 +1616,20 @@
1609	1617	$text .= ']'; # so that replaceExternalLinks($text) works later
1610	1618	$m[3] = substr( $m[3], 1 );
1611	1619	}
	1620	+
	1621	+ # Handle pipe-trick for [[\|<blah>]]
	1622	+ $lnk = $m[1] === '' ? $this->getPipeTrickLink( $text ) : $m[1];
1612	1623	# fix up urlencoded title texts
1613		~~- if( strpos( $m[1], '%' ) !== false ) {~~
	1624	+ if( strpos( $lnk, '%' ) !== false ) {
1614	1625	# Should anchors '#' also be rejected?
1615		~~- $m[1] = str_replace( array('<', '>'), array('<', '>'), urldecode($m[1]) );~~
	1626	+ $lnk = str_replace( array('<', '>'), array('<', '>'), urldecode($lnk) );
1616	1627	}
	1628	+
1617	1629	$trail = $m[3];
1618	1630	} elseif( preg_match($e1_img, $line, $m) ) { # Invalid, but might be an image with a link in its caption
1619	1631	$might_be_img = true;
1620	1632	$text = $m[2];
1621		~~- if ( strpos( $m[1], '%' ) !== false ) {~~
1622		~~- $m[1] = urldecode($m[1]);~~
1623		~~- }~~
	1633	+ $lnk = strpos( $m[1], '%' ) === false ? $m[1] : urldecode( $m[1] );
1624	1634	$trail = "";
1625	1635	} else { # Invalid form; output directly
1626	1636	$s .= $prefix . '[[' . $line ;
—	—	@@ -1632,7 +1642,7 @@
1633	1643	# Don't allow internal links to pages containing
1634	1644	# PROTO: where PROTO is a valid URL protocol; these
1635	1645	# should be external links.
1636		~~- if ( preg_match( '/^\b(?:' . wfUrlProtocols() . ')/', $m[1] ) ) {~~
	1646	+ if ( preg_match( '/^\b(?:' . wfUrlProtocols() . ')/', $lnk ) ) {
1637	1647	$s .= $prefix . '[[' . $line ;
1638	1648	wfProfileOut( __METHOD__."-misc" );
1639	1649	continue;
—	—	@@ -1640,12 +1650,12 @@
1641	1651
1642	1652	# Make subpage if necessary
1643	1653	if ( $useSubpages ) {
1644		~~- $link = $this->maybeDoSubpageLink( $m[1], $text );~~
	1654	+ $link = $this->maybeDoSubpageLink( $lnk, $text );
1645	1655	} else {
1646		~~- $link = $m[1];~~
	1656	+ $link = $lnk;
1647	1657	}
1648	1658
1649		~~- $noforce = (substr( $m[1], 0, 1 ) !== ':');~~
	1659	+ $noforce = (substr( $lnk, 0, 1 ) !== ':');
1650	1660	if (!$noforce) {
1651	1661	# Strip off leading ':'
1652	1662	$link = substr( $link, 1 );
—	—	@@ -1893,6 +1903,71 @@
1894	1904	return Linker::normalizeSubpageLink( $this->mTitle, $target, $text );
1895	1905	}
1896	1906
	1907	+ /**
	1908	+ * Returns valid title characters and namespace characters for pipe trick.
	1909	+ *
	1910	+ * FIXME: the namespace characters should not be specified like this...
	1911	+ */
	1912	+ static function getPipeTrickCharacterClasses() {
	1913	+ global $wgLegalTitleChars;
	1914	+ return array( "[$wgLegalTitleChars]", '[ _0-9A-Za-z\x80-\xff-]' );
	1915	+ }
	1916	+
	1917	+ /**
	1918	+ * From the [[title\|]] return link-text as though the used typed [[title\|link-text]]
	1919	+ *
	1920	+ * For most links this be as though the user typed [[ns:title\|title]]
	1921	+ * However [[ns:title (context)]], [[ns:title, context]] and [[ns:title (context), context]]
	1922	+ * all return the \|title]] with no context or indicative punctuation.
	1923	+ */
	1924	+ function getPipeTrickText( $link ) {
	1925	+ static $rexps = FALSE;
	1926	+ if( !$rexps ) {
	1927	+ list( $tc, $nc ) = Parser::getPipeTrickCharacterClasses();
	1928	+ $rexps = array (
	1929	+ # try this first, to turn "[[A, B (C)\|]]" into "A, B"
	1930	+ "/^(:?$nc+:\|:\|)($tc+?)( \$$tc+\$\| （$tc+）)$/", # [[ns:page (context)\|]]
	1931	+ "/^(:?$nc+:\|:\|)($tc+?)( \$$tc+\$\|)(, $tc+\|)$/" # [[ns:page (context), context\|]]
	1932	+ );
	1933	+ }
	1934	+ $text = urldecode( $link );
	1935	+
	1936	+ for( $i = 0; $i < count( $rexps ); $i++) {
	1937	+ if( preg_match( $rexps[$i], $text, $m ) )
	1938	+ return $m[2];
	1939	+ }
	1940	+ return $text;
	1941	+ }
	1942	+
	1943	+ /**
	1944	+ * From the [[\|link-text]] return the title as though the user typed [[title\|link-text]]
	1945	+ *
	1946	+ * On most pages this will return link-text or "" if the link-text is not a valid title
	1947	+ * On pages like [[ns:title (context)]] and [[ns:title, context]] it will act like
	1948	+ * [[ns:link-text (context)\|link-text]] and [[ns:link-text, context\|link-text]]
	1949	+ */
	1950	+ function getPipeTrickLink( $text ) {
	1951	+ static $rexps = FALSE, $tc;
	1952	+ if( !$rexps ) {
	1953	+ list( $tc, $nc ) = Parser::getPipeTrickCharacterClasses();
	1954	+ $rexps = array (
	1955	+ "/^($nc+:\|)$tc+?( \$$tc+\$)$/", # [[ns:page (context)]]
	1956	+ "/^($nc+:\|)$tc+?(, $tc+\|)$/" # [[ns:page, context]]
	1957	+ );
	1958	+ }
	1959	+
	1960	+ if( !preg_match( "/^$tc+$/", $text ) )
	1961	+ return '';
	1962	+
	1963	+ $t = $this->mTitle->getText();
	1964	+
	1965	+ for( $i = 0; $i < count( $rexps ); $i++) {
	1966	+ if( preg_match( $rexps[$i], $t, $m ) )
	1967	+ return "$m[1]$text$m[2]";
	1968	+ }
	1969	+ return $text;
	1970	+ }
	1971	+
1897	1972	/**#@+
1898	1973	* Used by doBlockLevels()
1899	1974	* @private
—	—	@@ -3986,33 +4061,11 @@
3987	4062	'~~~' => $sigText
3988	4063	) );
3989	4064
3990		~~- # Context links: [[\|name]] and [[name (context)\|]]~~
3991		~~- #~~
3992		~~- global $wgLegalTitleChars;~~
3993		~~- $tc = "[$wgLegalTitleChars]";~~
3994		~~- $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!~~
	4065	+ # Links of the form [[\|<blah>]] or [[<blah>\|]] perform pipe tricks
	4066	+ list( $tc, $nc ) = Parser::getPipeTrickCharacterClasses();
	4067	+ $pipeTrickRe = "/\[\[(?:(\\\|)($tc+)\|($tc+)\\\|)\]\]/";
	4068	+ $text = preg_replace_callback( $pipeTrickRe, array( $this, 'pstPipeTrickCallback' ), $text);
3995	4069
3996		~~- $p1 = "/\[\[(:?$nc+:\|:\|)($tc+?)( \$$tc+\$)\\\|]]/"; # [[ns:page (context)\|]]~~
3997		~~- $p4 = "/\[\[(:?$nc+:\|:\|)($tc+?)(（$tc+）)\\\|]]/"; # [[ns:page（context）\|]]~~
3998		~~- $p3 = "/\[\[(:?$nc+:\|:\|)($tc+?)( \$$tc+\$\|)(, $tc+\|)\\\|]]/"; # [[ns:page (context), context\|]]~~
3999		~~- $p2 = "/\[\[\\\|($tc+)]]/"; # [[\|page]]~~
4000		-
4001		~~- # try $p1 first, to turn "[[A, B (C)\|]]" into "[[A, B (C)\|A, B]]"~~
4002		~~- $text = preg_replace( $p1, '[[\\1\\2\\3\|\\2]]', $text );~~
4003		~~- $text = preg_replace( $p4, '[[\\1\\2\\3\|\\2]]', $text );~~
4004		~~- $text = preg_replace( $p3, '[[\\1\\2\\3\\4\|\\2]]', $text );~~
4005		-
4006		~~- $t = $this->mTitle->getText();~~
4007		~~- $m = array();~~
4008		~~- if ( preg_match( "/^($nc+:\|)$tc+?( \$$tc+\$)$/", $t, $m ) ) {~~
4009		~~- $text = preg_replace( $p2, "[[$m[1]\\1$m[2]\|\\1]]", $text );~~
4010		~~- } elseif ( preg_match( "/^($nc+:\|)$tc+?(, $tc+\|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {~~
4011		~~- $text = preg_replace( $p2, "[[$m[1]\\1$m[2]\|\\1]]", $text );~~
4012		~~- } else {~~
4013		~~- # if there's no context, don't bother duplicating the title~~
4014		~~- $text = preg_replace( $p2, '[[\\1]]', $text );~~
4015		~~- }~~
4016		-
4017	4070	# Trim trailing whitespace
4018	4071	$text = rtrim( $text );
4019	4072
—	—	@@ -4020,6 +4073,25 @@
4021	4074	}
4022	4075
4023	4076	/**
	4077	+ * Called from pstPass2 to perform the pipe trick on links.
	4078	+ * Original was either [[\|text]] or [[link\|]]
	4079	+ *
	4080	+ * @param Array ("\|" or "", text, link) $m
	4081	+ */
	4082	+ function pstPipeTrickCallback($m)
	4083	+ {
	4084	+ if( $m[1] ) { # [[\|<blah>]]
	4085	+ $text = $m[2];
	4086	+ $link = $this->getPipeTrickLink( $text );
	4087	+ } else { # [[<blah>\|]]
	4088	+ $link = $m[3];
	4089	+ $text = $this->getPipeTrickText( $link );
	4090	+ }
	4091	+
	4092	+ return $link === $text ? "[[$link]]" : "[[$link\|$text]]";
	4093	+ }
	4094	+
	4095	+ /**
4024	4096	* Fetch the user's signature text, if any, and normalize to
4025	4097	* validated, ready-to-insert wikitext.
4026	4098	* If you have pre-fetched the nickname or the fancySig option, you can
Index: trunk/phase3/RELEASE-NOTES
—	—	@@ -830,6 +830,8 @@
831	831	* (bug 20809) Expose EditFormPreloadText via the API
832	832	* (bug 18427) Comment (edit summary) parser option for API
833	833	* (bug 5210) preload parser should parse <noinclude> (as well as <includeonly>)
	834	+* (bug 8785) Pipe trick should work with colon functions
	835	+* (bug 4099) Pipe trick doesn't work when emptiness is only provided by empty template parameter
834	836
835	837	=== Languages updated in 1.16 ===
836	838

Follow-up revisions

Revision	Commit summary	Author	Date
r62085	Fix bug 20339 allow pipe-trick in log reasons...	conrad	14:50, 7 February 2010
r62194	Pretty sure that...	reedy	20:46, 9 February 2010
r62689	Moving Conrad's recent parser work out to a branch. Reverted r62434, r62416, ...	tstarling	05:19, 19 February 2010

Past revisions this follows-up on

Revision	Commit summary	Author	Date
r61710	bug 22297 - "syntax for substitution that doesn't break transclusion"...	conrad	11:58, 30 January 2010

Status & tagging log

05:27, 19 February 2010 Tim Starling (talk | contribs) changed the status of r62069 [removed: new added: reverted]