Index: trunk/phase3/maintenance/fixExtLinksProtocolRelative.php |
— | — | @@ -0,0 +1,81 @@ |
| 2 | +<?php |
| 3 | +/** |
| 4 | + * Fixes any entries for protocol-relative URLs in the externallinks table, |
| 5 | + * replacing each protocol-relative entry with two entries, one for http |
| 6 | + * and one for https. |
| 7 | + * |
| 8 | + * This program is free software; you can redistribute it and/or modify |
| 9 | + * it under the terms of the GNU General Public License as published by |
| 10 | + * the Free Software Foundation; either version 2 of the License, or |
| 11 | + * (at your option) any later version. |
| 12 | + * |
| 13 | + * This program is distributed in the hope that it will be useful, |
| 14 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 16 | + * GNU General Public License for more details. |
| 17 | + * |
| 18 | + * You should have received a copy of the GNU General Public License along |
| 19 | + * with this program; if not, write to the Free Software Foundation, Inc., |
| 20 | + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
| 21 | + * http://www.gnu.org/copyleft/gpl.html |
| 22 | + * |
| 23 | + * @ingroup Maintenance |
| 24 | + */ |
| 25 | + |
| 26 | +require_once( dirname( __FILE__ ) . '/Maintenance.php' ); |
| 27 | + |
| 28 | +class FixExtLinksProtocolRelative extends LoggedUpdateMaintenance { |
| 29 | + public function __construct() { |
| 30 | + parent::__construct(); |
| 31 | + $this->mDescription = "Fixes any entries in the externallinks table containing protocol-relative URLs"; |
| 32 | + } |
| 33 | + |
| 34 | + protected function getUpdateKey() { |
| 35 | + return 'fix protocol-relative URLs in externallinks'; |
| 36 | + } |
| 37 | + |
| 38 | + protected function updateSkippedMessage() { |
| 39 | + return 'protocol-relative URLs in externallinks table already fixed.'; |
| 40 | + } |
| 41 | + |
| 42 | + protected function doDBUpdates() { |
| 43 | + $db = wfGetDB( DB_MASTER ); |
| 44 | + if ( !$db->tableExists( 'externallinks' ) ) { |
| 45 | + $this->error( "externallinks table does not exist" ); |
| 46 | + return false; |
| 47 | + } |
| 48 | + $this->output( "Fixing protocol-relative entries in the externallinks table...\n" ); |
| 49 | + $res = $db->select( 'externallinks', array( 'el_from', 'el_to', 'el_index' ), |
| 50 | + array( 'el_index' . $db->buildLike( '//', $db->anyString() ) ), |
| 51 | + __METHOD__ |
| 52 | + ); |
| 53 | + $count = 0; |
| 54 | + foreach ( $res as $row ) { |
| 55 | + $count++; |
| 56 | + if ( $count % 100 == 0 ) { |
| 57 | + $this->output( $count ); |
| 58 | + wfWaitForSlaves(); |
| 59 | + } |
| 60 | + $db->insert( 'externallinks', |
| 61 | + array( |
| 62 | + array( |
| 63 | + 'el_from' => $row->el_from, |
| 64 | + 'el_to' => $row->el_to, |
| 65 | + 'el_index' => "http:{$row->el_index}", |
| 66 | + ), |
| 67 | + array( |
| 68 | + 'el_from' => $row->el_from, |
| 69 | + 'el_to' => $row->el_to, |
| 70 | + 'el_index' => "https:{$row->el_index}", |
| 71 | + ) |
| 72 | + ), __METHOD__, array( 'IGNORE' ) |
| 73 | + ); |
| 74 | + $db->delete( 'externallinks', array( 'el_index' => $row->el_index ), __METHOD__ ); |
| 75 | + } |
| 76 | + $this->output( "Done, $count rows updated.\n" ); |
| 77 | + return true; |
| 78 | + } |
| 79 | +} |
| 80 | + |
| 81 | +$maintClass = "FixExtLinksProtocolRelative"; |
| 82 | +require_once( RUN_MAINTENANCE_IF_MAIN ); |
Property changes on: trunk/phase3/maintenance/fixExtLinksProtocolRelative.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 83 | + native |
Index: trunk/phase3/tests/phpunit/includes/GlobalFunctions/GlobalTest.php |
— | — | @@ -831,42 +831,42 @@ |
832 | 832 | } |
833 | 833 | |
834 | 834 | /** |
835 | | - * @dataProvider provideMakeUrlIndex() |
| 835 | + * @dataProvider provideMakeUrlIndexes() |
836 | 836 | */ |
837 | | - function testMakeUrlIndex( $url, $expected ) { |
838 | | - $index = wfMakeUrlIndex( $url ); |
839 | | - $this->assertEquals( $expected, $index, "wfMakeUrlIndex(\"$url\")" ); |
| 837 | + function testMakeUrlIndexes( $url, $expected ) { |
| 838 | + $index = wfMakeUrlIndexes( $url ); |
| 839 | + $this->assertEquals( $expected, $index, "wfMakeUrlIndexes(\"$url\")" ); |
840 | 840 | } |
841 | 841 | |
842 | | - function provideMakeUrlIndex() { |
| 842 | + function provideMakeUrlIndexes() { |
843 | 843 | return array( |
844 | 844 | array( |
845 | 845 | // just a regular :) |
846 | 846 | 'https://bugzilla.wikimedia.org/show_bug.cgi?id=28627', |
847 | | - 'https://org.wikimedia.bugzilla./show_bug.cgi?id=28627' |
| 847 | + array( 'https://org.wikimedia.bugzilla./show_bug.cgi?id=28627' ) |
848 | 848 | ), |
849 | 849 | array( |
850 | 850 | // mailtos are handled special |
851 | 851 | // is this really right though? that final . probably belongs earlier? |
852 | 852 | 'mailto:wiki@wikimedia.org', |
853 | | - 'mailto:org.wikimedia@wiki.', |
| 853 | + array( 'mailto:org.wikimedia@wiki.' ) |
854 | 854 | ), |
855 | 855 | |
856 | 856 | // file URL cases per bug 28627... |
857 | 857 | array( |
858 | 858 | // three slashes: local filesystem path Unix-style |
859 | 859 | 'file:///whatever/you/like.txt', |
860 | | - 'file://./whatever/you/like.txt' |
| 860 | + array( 'file://./whatever/you/like.txt' ) |
861 | 861 | ), |
862 | 862 | array( |
863 | 863 | // three slashes: local filesystem path Windows-style |
864 | 864 | 'file:///c:/whatever/you/like.txt', |
865 | | - 'file://./c:/whatever/you/like.txt' |
| 865 | + array( 'file://./c:/whatever/you/like.txt' ) |
866 | 866 | ), |
867 | 867 | array( |
868 | 868 | // two slashes: UNC filesystem path Windows-style |
869 | 869 | 'file://intranet/whatever/you/like.txt', |
870 | | - 'file://intranet./whatever/you/like.txt' |
| 870 | + array( 'file://intranet./whatever/you/like.txt' ) |
871 | 871 | ), |
872 | 872 | // Multiple-slash cases that can sorta work on Mozilla |
873 | 873 | // if you hack it just right are kinda pathological, |
— | — | @@ -875,6 +875,15 @@ |
876 | 876 | // |
877 | 877 | // Those will survive the algorithm but with results that |
878 | 878 | // are less consistent. |
| 879 | + |
| 880 | + // protocol-relative URL cases per bug 29854... |
| 881 | + array( |
| 882 | + '//bugzilla.wikimedia.org/show_bug.cgi?id=28627', |
| 883 | + array( |
| 884 | + 'http://org.wikimedia.bugzilla./show_bug.cgi?id=28627', |
| 885 | + 'https://org.wikimedia.bugzilla./show_bug.cgi?id=28627' |
| 886 | + ) |
| 887 | + ), |
879 | 888 | ); |
880 | 889 | } |
881 | 890 | |
Index: trunk/phase3/includes/GlobalFunctions.php |
— | — | @@ -647,12 +647,12 @@ |
648 | 648 | } |
649 | 649 | |
650 | 650 | /** |
651 | | - * Make a URL index, appropriate for the el_index field of externallinks. |
| 651 | + * Make URL indexes, appropriate for the el_index field of externallinks. |
652 | 652 | * |
653 | 653 | * @param $url String |
654 | | - * @return String |
| 654 | + * @return array |
655 | 655 | */ |
656 | | -function wfMakeUrlIndex( $url ) { |
| 656 | +function wfMakeUrlIndexes( $url ) { |
657 | 657 | $bits = wfParseUrl( $url ); |
658 | 658 | |
659 | 659 | // Reverse the labels in the hostname, convert to lower case |
— | — | @@ -692,7 +692,12 @@ |
693 | 693 | if ( isset( $bits['fragment'] ) ) { |
694 | 694 | $index .= '#' . $bits['fragment']; |
695 | 695 | } |
696 | | - return $index; |
| 696 | + |
| 697 | + if ( $prot == '' ) { |
| 698 | + return array( "http:$index", "https:$index" ); |
| 699 | + } else { |
| 700 | + return array( $index ); |
| 701 | + } |
697 | 702 | } |
698 | 703 | |
699 | 704 | /** |
Index: trunk/phase3/includes/installer/DatabaseUpdater.php |
— | — | @@ -43,7 +43,8 @@ |
44 | 44 | 'DeleteDefaultMessages', |
45 | 45 | 'PopulateRevisionLength', |
46 | 46 | 'PopulateRevisionSha1', |
47 | | - 'PopulateImageSha1' |
| 47 | + 'PopulateImageSha1', |
| 48 | + 'FixExtLinksProtocolRelative', |
48 | 49 | ); |
49 | 50 | |
50 | 51 | /** |
Index: trunk/phase3/includes/LinksUpdate.php |
— | — | @@ -456,11 +456,13 @@ |
457 | 457 | $arr = array(); |
458 | 458 | $diffs = array_diff_key( $this->mExternals, $existing ); |
459 | 459 | foreach( $diffs as $url => $dummy ) { |
460 | | - $arr[] = array( |
461 | | - 'el_from' => $this->mId, |
462 | | - 'el_to' => $url, |
463 | | - 'el_index' => wfMakeUrlIndex( $url ), |
464 | | - ); |
| 460 | + foreach( wfMakeUrlIndexes( $url ) as $index ) { |
| 461 | + $arr[] = array( |
| 462 | + 'el_from' => $this->mId, |
| 463 | + 'el_to' => $url, |
| 464 | + 'el_index' => $index, |
| 465 | + ); |
| 466 | + } |
465 | 467 | } |
466 | 468 | return $arr; |
467 | 469 | } |
Index: trunk/phase3/includes/api/ApiQueryExternalLinks.php |
— | — | @@ -69,6 +69,11 @@ |
70 | 70 | $this->addOption( 'ORDER BY', 'el_from' ); |
71 | 71 | } |
72 | 72 | |
| 73 | + // If we're querying all protocols, use DISTINCT to avoid repeating protocol-relative links twice |
| 74 | + if ( $protocol === null ) { |
| 75 | + $this->addOption( 'DISTINCT' ); |
| 76 | + } |
| 77 | + |
73 | 78 | $this->addOption( 'LIMIT', $params['limit'] + 1 ); |
74 | 79 | $offset = isset( $params['offset'] ) ? $params['offset'] : 0; |
75 | 80 | if ( $offset ) { |
Index: trunk/phase3/includes/AutoLoader.php |
— | — | @@ -861,6 +861,7 @@ |
862 | 862 | 'FakeMaintenance' => 'maintenance/Maintenance.php', |
863 | 863 | 'LoggedUpdateMaintenance' => 'maintenance/Maintenance.php', |
864 | 864 | 'Maintenance' => 'maintenance/Maintenance.php', |
| 865 | + 'FixExtLinksProtocolRelative' => 'maintenance/fixExtLinksProtocolRelative.php', |
865 | 866 | 'PopulateCategory' => 'maintenance/populateCategory.php', |
866 | 867 | 'PopulateImageSha1' => 'maintenance/populateImageSha1.php', |
867 | 868 | 'PopulateLogSearch' => 'maintenance/populateLogSearch.php', |