r86897 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r86896‎ | r86897 | r86898 >
Date:21:00, 25 April 2011
Author:brion
Status:resolved
Tags:
Comment:
* (bug 28627) External link normalization now handles file: URL cases without throwing notice warnings.

Added some test cases for wfMakeUrlIndex() to GlobalTests (tweaks $wgUrlProtocols to toss in file:// support so it can test them).
Needs more cases for other URL styles probably; some of the more pathological file: URL cases still won't normalize really cleanly but will go through the function without exploding. The most-needed variants will be the Windows/IE-compatible ones I think -- so file:///c:/foo or file://server/foo.
Modified paths:
  • /trunk/phase3/RELEASE-NOTES (modified) (history)
  • /trunk/phase3/includes/GlobalFunctions.php (modified) (history)
  • /trunk/phase3/tests/phpunit/includes/GlobalTest.php (modified) (history)

Diff [purge]

Index: trunk/phase3/tests/phpunit/includes/GlobalTest.php
@@ -2,19 +2,22 @@
33
44 class GlobalTest extends MediaWikiTestCase {
55 function setUp() {
6 - global $wgReadOnlyFile, $wgContLang, $wgLang;
 6+ global $wgReadOnlyFile, $wgContLang, $wgLang, $wgUrlProtocols;
77 $this->originals['wgReadOnlyFile'] = $wgReadOnlyFile;
 8+ $this->originals['wgUrlProtocols'] = $wgUrlProtocols;
89 $wgReadOnlyFile = tempnam( wfTempDir(), "mwtest_readonly" );
 10+ $wgUrlProtocols[] = 'file://';
911 unlink( $wgReadOnlyFile );
1012 $wgContLang = $wgLang = Language::factory( 'en' );
1113 }
1214
1315 function tearDown() {
14 - global $wgReadOnlyFile;
 16+ global $wgReadOnlyFile, $wgUrlProtocols;
1517 if ( file_exists( $wgReadOnlyFile ) ) {
1618 unlink( $wgReadOnlyFile );
1719 }
1820 $wgReadOnlyFile = $this->originals['wgReadOnlyFile'];
 21+ $wgUrlProtocols = $this->originals['wgUrlProtocols'];
1922 }
2023
2124 /** @dataProvider provideForWfArrayDiff2 */
@@ -815,6 +818,54 @@
816819 */);
817820 }
818821
 822+ /**
 823+ * @dataProvider provideMakeUrlIndex()
 824+ */
 825+ function testMakeUrlIndex( $url, $expected ) {
 826+ $index = wfMakeUrlIndex( $url );
 827+ $this->assertEquals( $expected, $index, "wfMakeUrlIndex(\"$url\")" );
 828+ }
 829+
 830+ function provideMakeUrlIndex() {
 831+ return array(
 832+ array(
 833+ // just a regular :)
 834+ 'https://bugzilla.wikimedia.org/show_bug.cgi?id=28627',
 835+ 'https://org.wikimedia.bugzilla./show_bug.cgi?id=28627'
 836+ ),
 837+ array(
 838+ // mailtos are handled special
 839+ // is this really right though? that final . probably belongs earlier?
 840+ 'mailto:wiki@wikimedia.org',
 841+ 'mailto:org.wikimedia@wiki.',
 842+ ),
 843+
 844+ // file URL cases per bug 28627...
 845+ array(
 846+ // three slashes: local filesystem path Unix-style
 847+ 'file:///whatever/you/like.txt',
 848+ 'file://./whatever/you/like.txt'
 849+ ),
 850+ array(
 851+ // three slashes: local filesystem path Windows-style
 852+ 'file:///c:/whatever/you/like.txt',
 853+ 'file://./c:/whatever/you/like.txt'
 854+ ),
 855+ array(
 856+ // two slashes: UNC filesystem path Windows-style
 857+ 'file://intranet/whatever/you/like.txt',
 858+ 'file://intranet./whatever/you/like.txt'
 859+ ),
 860+ // Multiple-slash cases that can sorta work on Mozilla
 861+ // if you hack it just right are kinda pathological,
 862+ // and unreliable cross-platform or on IE which means they're
 863+ // unlikely to appear on intranets.
 864+ //
 865+ // Those will survive the algorithm but with results that
 866+ // are less consistent.
 867+ );
 868+ }
 869+
819870 /* TODO: many more! */
820871 }
821872
Index: trunk/phase3/includes/GlobalFunctions.php
@@ -2750,8 +2750,12 @@
27512751 $domainpart = '';
27522752 }
27532753 $reversedHost = $domainpart . '@' . $mailparts[0];
 2754+ } else if ( isset( $bits['host'] ) ) {
 2755+ $reversedHost = strtolower( implode( '.', array_reverse( explode( '.', $bits['host'] ) ) ) );
27542756 } else {
2755 - $reversedHost = strtolower( implode( '.', array_reverse( explode( '.', $bits['host'] ) ) ) );
 2757+ // In file: URIs for instance it's common to have an empty host,
 2758+ // which turns up as not getting a 'host' member from parse_url.
 2759+ $reversedHost = '.';
27562760 }
27572761 // Add an extra dot to the end
27582762 // Why? Is it in wrong place in mailto links?
@@ -2766,6 +2770,13 @@
27672771 $index .= ':' . $bits['port'];
27682772 }
27692773 if ( isset( $bits['path'] ) ) {
 2774+ // parse_url() removes the initial '/' from the path
 2775+ // for file: URLs with Windows-style paths, such as
 2776+ // file:///c:/windows/stuff. We need to add it back
 2777+ // to keep our division between host and path properly.
 2778+ if ( strlen( $bits['path'] ) > 0 && substr( $bits['path'], 0, 1 ) !== '/' ) {
 2779+ $index .= '/';
 2780+ }
27702781 $index .= $bits['path'];
27712782 } else {
27722783 $index .= '/';
Index: trunk/phase3/RELEASE-NOTES
@@ -243,6 +243,8 @@
244244 * (bug 28532) wfMsgExt() and wfMsgWikiHtml() use $wgOut->parse()
245245 * (bug 16129) Transcluded special pages expose strip markers when they output parsed messages
246246 * (bug 27249) "Installed software" table in Special:Version should always be left-to-right
 247+* (bug 28627) External link normalization now handles file: URL cases without
 248+ throwing notice warnings.
247249
248250 === API changes in 1.18 ===
249251 * (bug 26339) Throw warning when truncating an overlarge API result

Follow-up revisions

RevisionCommit summaryAuthorDate
r90084Revert r86897 in wfMakeUrlIndex() and solve the issue in wfParseUrl()platonides20:57, 14 June 2011
r92434MFT to REL1_18...hashar15:05, 18 July 2011

Status & tagging log