r72394 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r72393‎ | r72394 | r72395 >
Date:21:32, 4 September 2010
Author:platonides
Status:ok
Tags:
Comment:
Converting dumpInterwiki.php into new Maintenance. In two steps to get better diffs.
dumpInterwiki.inc is copied from rebuildInterwiki.inc + diverging changes.
Prepare rebuildInterwiki.php to use the specials db list like dumpInterwiki (r19198, bug 3529). We need to somewhere get where are they really hosted.
Modified paths:
  • /trunk/phase3/maintenance/dumpInterwiki.inc (modified) (history)
  • /trunk/phase3/maintenance/rebuildInterwiki.php (modified) (history)

Diff [purge]

Index: trunk/phase3/maintenance/rebuildInterwiki.php
@@ -33,7 +33,7 @@
3434 class RebuildInterwiki extends Maintenance {
3535 public function __construct() {
3636 parent::__construct();
37 - $this->mDescription = "Protect or unprotect an article from the command line.";
 37+ $this->mDescription = "Rebuild the interwiki table using the file on meta and the language list.";
3838 $this->addOption( 'langlist', 'File with one language code per line', false, true );
3939 $this->addOption( 'dblist', 'File with one db per line', false, true );
4040 $this->addOption( 'd', 'Output folder', false, true );
@@ -46,6 +46,9 @@
4747 # List of all database names
4848 $this->dblist = array_map( "trim", file( $this->getOption( 'dblist', "/home/wikipedia/common/all.dblist" ) ) );
4949
 50+ # Special-case databases
 51+ //$this->specials = array_flip( array_map( "trim", file( $this->getOption( 'specialdbs', "/home/wikipedia/common/special.dblist" ) ) ) );
 52+
5053 $this->makeInterwikiSQL( $this->getOption( 'd', '/home/wikipedia/conf/interwiki/sql' ) );
5154 }
5255
@@ -66,7 +69,7 @@
6770 );
6871
6972 # Special-case hostnames
70 - $specials = array(
 73+ $this->specials = array(
7174 'sourceswiki' => 'sources.wikipedia.org',
7275 'quotewiki' => 'wikiquote.org',
7376 'textbookwiki' => 'wikibooks.org',
@@ -117,7 +120,7 @@
118121 $lines = array_map( 'trim', explode( "\n", trim( $intermap ) ) );
119122
120123 if ( !$lines || count( $lines ) < 2 ) {
121 - wfDie( "m:Interwiki_map not found" );
 124+ $this->error( "m:Interwiki_map not found", true );
122125 }
123126
124127 $iwArray = array();
@@ -141,11 +144,11 @@
142145
143146 foreach ( $this->dblist as $db ) {
144147 $sql = "-- Generated by rebuildInterwiki.php";
145 - if ( isset( $specials[$db] ) ) {
 148+ if ( isset( $this->specials[$db] ) ) {
146149 # Special wiki
147150 # Has interwiki links and interlanguage links to wikipedia
148151
149 - $host = $specials[$db];
 152+ $host = $this->specials[$db];
150153 $sql .= "\n--$host\n\n";
151154 $sql .= "USE $db;\n" .
152155 "TRUNCATE TABLE interwiki;\n" .
Index: trunk/phase3/maintenance/dumpInterwiki.inc
@@ -1,6 +1,6 @@
22 <?php
33 /**
4 - * Rebuild interwiki table using the file on meta and the language list
 4+ * Build constant slightly compact database of interwiki prefixes
55 * Wikimedia specific!
66 *
77 * @file
@@ -28,184 +28,214 @@
2929 }
3030 }
3131
32 -function getRebuildInterwikiDump() {
33 - global $langlist, $languageAliases, $prefixRewrites, $wgContLang;
 32+require_once( dirname( __FILE__ ) . '/Maintenance.php' );
3433
35 - # Multi-language sites
36 - # db suffix => db suffix, iw prefix, hostname
37 - $sites = array(
38 - 'wiki' => new Site( 'wiki', 'w', 'wikipedia.org' ),
39 - 'wiktionary' => new Site( 'wiktionary', 'wikt', 'wiktionary.org' ),
40 - 'wikiquote' => new Site( 'wikiquote', 'q', 'wikiquote.org' ),
41 - 'wikibooks' => new Site( 'wikibooks', 'b', 'wikibooks.org' ),
42 - 'wikinews' => new Site( 'wikinews', 'n', 'wikinews.org' ),
43 - 'wikisource' => new Site( 'wikisource', 's', 'wikisource.org' ),
44 - 'wikimedia' => new Site( 'wikimedia', 'chapter', 'wikimedia.org' ),
45 - 'wikiversity' => new Site( 'wikiversity', 'v', 'wikiversity.org' ),
46 - );
 34+class DumpInterwiki extends Maintenance {
4735
48 - # List of language prefixes likely to be found in multi-language sites
49 - $langlist = array_map( "trim", file( "/home/wikipedia/common/langlist" ) );
 36+ public function __construct() {
 37+ parent::__construct();
 38+ $this->mDescription = "Build constant slightly compact database of interwiki prefixes.";
 39+ $this->addOption( 'langlist', 'File with one language code per line', false, true );
 40+ $this->addOption( 'dblist', 'File with one db per line', false, true );
 41+ $this->addOption( 'specialdbs', "File with one 'special' db per line", false, true );
 42+ $this->addOption( 'o', 'Cdb output file', false, true );
 43+ }
5044
51 - # List of all database names
52 - $dblist = array_map( "trim", file( "/home/wikipedia/common/all.dblist" ) );
 45+ function execute() {
 46+ # List of language prefixes likely to be found in multi-language sites
 47+ $this->langlist = array_map( "trim", file( $this->getOption( 'langlist', "/home/wikipedia/common/langlist" ) ) );
5348
54 - # Special-case databases
55 - $specials = array_flip(
56 - array_map( "trim",
57 - file( "/home/wikipedia/common/special.dblist" ) ) );
 49+ # List of all database names
 50+ $this->dblist = array_map( "trim", file( $this->getOption( 'dblist', "/home/wikipedia/common/all.dblist" ) ) );
5851
59 - # Extra interwiki links that can't be in the intermap for some reason
60 - $extraLinks = array(
61 - array( 'm', 'http://meta.wikimedia.org/wiki/$1', 1 ),
62 - array( 'meta', 'http://meta.wikimedia.org/wiki/$1', 1 ),
63 - array( 'sep11', 'http://sep11.wikipedia.org/wiki/$1', 1 ),
64 - );
 52+ # Special-case databases
 53+ $this->specials = array_flip( array_map( "trim", file( $this->getOption( 'specialdbs', "/home/wikipedia/common/special.dblist" ) ) ) );
6554
66 - # Language aliases, usually configured as redirects to the real wiki in apache
67 - # Interlanguage links are made directly to the real wiki
68 - # Something horrible happens if you forget to list an alias here, I can't
69 - # remember what
70 - $languageAliases = array(
71 - 'zh-cn' => 'zh',
72 - 'zh-tw' => 'zh',
73 - 'dk' => 'da',
74 - 'nb' => 'no',
75 - );
 55+ if ( $this->hasOption( 'o' ) ) {
 56+ $this->dbFile = CdbWriter::open( $this->getOption( 'o' ) ) ;
 57+ } else {
 58+ $this->dbFile = false;
 59+ }
7660
77 - # Special case prefix rewrites, for the benefit of Swedish which uses s:t
78 - # as an abbreviation for saint
79 - $prefixRewrites = array(
80 - 'svwiki' => array ( 's' => 'src' ),
81 - );
82 -
83 - # Construct a list of reserved prefixes
84 - $reserved = array();
85 - foreach ( $langlist as $lang ) {
86 - $reserved[$lang] = 1;
 61+ $this->getRebuildInterwikiDump();
8762 }
88 - foreach ( $languageAliases as $alias => $lang ) {
89 - $reserved[$alias] = 1;
90 - }
91 - foreach ( $sites as $site ) {
92 - $reserved[$site->lateral] = 1;
93 - }
9463
95 - # Extract the intermap from meta
96 - $intermap = Http::get( 'http://meta.wikimedia.org/w/index.php?title=Interwiki_map&action=raw', 30 );
97 - $lines = array_map( 'trim', explode( "\n", trim( $intermap ) ) );
 64+ function getRebuildInterwikiDump() {
 65+ global $wgContLang;
9866
99 - if ( !$lines || count( $lines ) < 2 ) {
100 - wfDie( "m:Interwiki_map not found" );
101 - }
 67+ # Multi-language sites
 68+ # db suffix => db suffix, iw prefix, hostname
 69+ $sites = array(
 70+ 'wiki' => new Site( 'wiki', 'w', 'wikipedia.org' ),
 71+ 'wiktionary' => new Site( 'wiktionary', 'wikt', 'wiktionary.org' ),
 72+ 'wikiquote' => new Site( 'wikiquote', 'q', 'wikiquote.org' ),
 73+ 'wikibooks' => new Site( 'wikibooks', 'b', 'wikibooks.org' ),
 74+ 'wikinews' => new Site( 'wikinews', 'n', 'wikinews.org' ),
 75+ 'wikisource' => new Site( 'wikisource', 's', 'wikisource.org' ),
 76+ 'wikimedia' => new Site( 'wikimedia', 'chapter', 'wikimedia.org' ),
 77+ 'wikiversity' => new Site( 'wikiversity', 'v', 'wikiversity.org' ),
 78+ );
10279
103 - # Global iterwiki map
104 - foreach ( $lines as $line ) {
105 - if ( preg_match( '/^\|\s*(.*?)\s*\|\|\s*(.*?)\s*$/', $line, $matches ) ) {
106 - $prefix = $wgContLang->lc( $matches[1] );
107 - $prefix = str_replace( ' ', '_', $prefix );
108 - $prefix = strtolower( $matches[1] );
109 - $url = $matches[2];
110 - if ( preg_match( '/(wikipedia|wiktionary|wikisource|wikiquote|wikibooks|wikimedia)\.org/', $url ) ) {
111 - $local = 1;
112 - } else {
113 - $local = 0;
114 - }
 80+ # Extra interwiki links that can't be in the intermap for some reason
 81+ $extraLinks = array(
 82+ array( 'm', 'http://meta.wikimedia.org/wiki/$1', 1 ),
 83+ array( 'meta', 'http://meta.wikimedia.org/wiki/$1', 1 ),
 84+ array( 'sep11', 'http://sep11.wikipedia.org/wiki/$1', 1 ),
 85+ );
11586
116 - if ( empty( $reserved[$prefix] ) ) {
117 - $imap = array( "iw_prefix" => $prefix, "iw_url" => $url, "iw_local" => $local );
118 - makeLink ( $imap, "__global" );
119 - }
 87+ # Language aliases, usually configured as redirects to the real wiki in apache
 88+ # Interlanguage links are made directly to the real wiki
 89+ # Something horrible happens if you forget to list an alias here, I can't
 90+ # remember what
 91+ $this->languageAliases = array(
 92+ 'zh-cn' => 'zh',
 93+ 'zh-tw' => 'zh',
 94+ 'dk' => 'da',
 95+ 'nb' => 'no',
 96+ );
 97+
 98+ # Special case prefix rewrites, for the benefit of Swedish which uses s:t
 99+ # as an abbreviation for saint
 100+ $this->prefixRewrites = array(
 101+ 'svwiki' => array( 's' => 'src' ),
 102+ );
 103+
 104+ # Construct a list of reserved prefixes
 105+ $reserved = array();
 106+ foreach ( $this->langlist as $lang ) {
 107+ $reserved[$lang] = 1;
120108 }
121 - }
 109+ foreach ( $this->languageAliases as $alias => $lang ) {
 110+ $reserved[$alias] = 1;
 111+ }
 112+ foreach ( $sites as $site ) {
 113+ $reserved[$site->lateral] = 1;
 114+ }
122115
123 - # Exclude Wikipedia for Wikipedia
124 - makeLink ( array ( 'iw_prefix' => 'wikipedia', 'is_url' => null ), "_wiki" );
125 -
126 - # Multilanguage sites
127 - foreach ( $sites as $site )
128 - makeLanguageLinks ( $site, "_" . $site->suffix );
 116+ # Extract the intermap from meta
 117+ $intermap = Http::get( 'http://meta.wikimedia.org/w/index.php?title=Interwiki_map&action=raw', 30 );
 118+ $lines = array_map( 'trim', explode( "\n", trim( $intermap ) ) );
129119
 120+ if ( !$lines || count( $lines ) < 2 ) {
 121+ $this->error( "m:Interwiki_map not found", true );
 122+ }
130123
131 - foreach ( $dblist as $db ) {
132 - if ( isset( $specials[$db] ) ) {
133 - # Special wiki
134 - # Has interwiki links and interlanguage links to wikipedia
 124+ # Global iterwiki map
 125+ foreach ( $lines as $line ) {
 126+ if ( preg_match( '/^\|\s*(.*?)\s*\|\|\s*(.*?)\s*$/', $line, $matches ) ) {
 127+ $prefix = $wgContLang->lc( $matches[1] );
 128+ $prefix = str_replace( ' ', '_', $prefix );
 129+ $prefix = strtolower( $matches[1] );
 130+ $url = $matches[2];
 131+ if ( preg_match( '/(wikipedia|wiktionary|wikisource|wikiquote|wikibooks|wikimedia)\.org/', $url ) ) {
 132+ $local = 1;
 133+ } else {
 134+ $local = 0;
 135+ }
135136
136 - makeLink( array( 'iw_prefix' => $db, 'iw_url' => "wiki" ), "__sites" );
137 - # Links to multilanguage sites
138 - foreach ( $sites as $targetSite ) {
139 - makeLink( array( 'iw_prefix' => $targetSite->lateral,
140 - 'iw_url' => $targetSite->getURL( 'en' ),
141 - 'iw_local' => 1 ), $db );
 137+ if ( empty( $reserved[$prefix] ) ) {
 138+ $imap = array( "iw_prefix" => $prefix, "iw_url" => $url, "iw_local" => $local );
 139+ $this->makeLink ( $imap, "__global" );
 140+ }
142141 }
 142+ }
143143
144 - } else {
145 - # Find out which site this DB belongs to
146 - $site = false;
147 - foreach ( $sites as $candidateSite ) {
148 - $suffix = $candidateSite->suffix;
149 - if ( preg_match( "/(.*)$suffix$/", $db, $matches ) ) {
150 - $site = $candidateSite;
151 - break;
 144+ # Exclude Wikipedia for Wikipedia
 145+ $this->makeLink ( array ( 'iw_prefix' => 'wikipedia', 'is_url' => null ), "_wiki" );
 146+
 147+ # Multilanguage sites
 148+ foreach ( $sites as $site ) {
 149+ $this->makeLanguageLinks ( $site, "_" . $site->suffix );
 150+ }
 151+
 152+
 153+ foreach ( $dblist as $db ) {
 154+ if ( isset( $this->specials[$db] ) ) {
 155+ # Special wiki
 156+ # Has interwiki links and interlanguage links to wikipedia
 157+
 158+ $this->makeLink( array( 'iw_prefix' => $db, 'iw_url' => "wiki" ), "__sites" );
 159+ # Links to multilanguage sites
 160+ foreach ( $sites as $targetSite ) {
 161+ $this->makeLink( array( 'iw_prefix' => $targetSite->lateral,
 162+ 'iw_url' => $targetSite->getURL( 'en' ),
 163+ 'iw_local' => 1 ), $db );
152164 }
153 - }
154 - makeLink( array( 'iw_prefix' => $db, 'iw_url' => $site->suffix ), "__sites" );
155 - if ( !$site ) {
156 - print "Invalid database $db\n";
157 - continue;
158 - }
159 - $lang = $matches[1];
 165+ } else {
 166+ # Find out which site this DB belongs to
 167+ $site = false;
 168+ foreach ( $sites as $candidateSite ) {
 169+ $suffix = $candidateSite->suffix;
 170+ if ( preg_match( "/(.*)$suffix$/", $db, $matches ) ) {
 171+ $site = $candidateSite;
 172+ break;
 173+ }
 174+ }
 175+ $this->makeLink( array( 'iw_prefix' => $db, 'iw_url' => $site->suffix ), "__sites" );
 176+ if ( !$site ) {
 177+ $this->error( "Invalid database $db\n" );
 178+ continue;
 179+ }
 180+ $lang = $matches[1];
160181
161 - # Lateral links
162 - foreach ( $sites as $targetSite ) {
163 - if ( $targetSite->suffix != $site->suffix ) {
164 - makeLink( array( 'iw_prefix' => $targetSite->lateral,
165 - 'iw_url' => $targetSite->getURL( $lang ),
166 - 'iw_local' => 1 ), $db );
 182+ # Lateral links
 183+ foreach ( $sites as $targetSite ) {
 184+ if ( $targetSite->suffix != $site->suffix ) {
 185+ $this->makeLink( array( 'iw_prefix' => $targetSite->lateral,
 186+ 'iw_url' => $targetSite->getURL( $lang ),
 187+ 'iw_local' => 1 ), $db );
 188+ }
167189 }
168 - }
169190
170 - if ( $site->suffix == "wiki" ) {
171 - makeLink( array( 'iw_prefix' => 'w',
172 - 'iw_url' => "http://en.wikipedia.org/wiki/$1",
173 - 'iw_local' => 1 ), $db );
 191+ if ( $site->suffix == "wiki" ) {
 192+ $this->makeLink( array( 'iw_prefix' => 'w',
 193+ 'iw_url' => "http://en.wikipedia.org/wiki/$1",
 194+ 'iw_local' => 1 ), $db );
 195+ }
 196+
174197 }
175 -
176198 }
 199+ foreach ( $extraLinks as $link ) {
 200+ $this->makeLink( $link, "__global" );
 201+ }
177202 }
178 - foreach ( $extraLinks as $link )
179 - makeLink( $link, "__global" );
180 -}
181203
182 -# ------------------------------------------------------------------------------------------
 204+ # ------------------------------------------------------------------------------------------
183205
184 -# Executes part of an INSERT statement, corresponding to all interlanguage links to a particular site
185 -function makeLanguageLinks( &$site, $source ) {
186 - global $langlist, $languageAliases;
187 - # Actual languages with their own databases
188 - foreach ( $langlist as $targetLang ) {
189 - makeLink( array( $targetLang, $site->getURL( $targetLang ), 1 ), $source );
 206+ # Executes part of an INSERT statement, corresponding to all interlanguage links to a particular site
 207+ function makeLanguageLinks( &$site, $source ) {
 208+ # Actual languages with their own databases
 209+ foreach ( $this->langlist as $targetLang ) {
 210+ $this->makeLink( array( $targetLang, $site->getURL( $targetLang ), 1 ), $source );
 211+ }
 212+
 213+ # Language aliases
 214+ foreach ( $this->languageAliases as $alias => $lang ) {
 215+ $this->makeLink( array( $alias, $site->getURL( $lang ), 1 ), $source );
 216+ }
190217 }
191218
192 - # Language aliases
193 - foreach ( $languageAliases as $alias => $lang ) {
194 - makeLink( array( $alias, $site->getURL( $lang ), 1 ), $source );
 219+ function makeLink( $entry, $source ) {
 220+ global $prefixRewrites, $dbFile;
 221+ if ( isset( $this->prefixRewrites[$source] ) && isset( $this->prefixRewrites[$source][$entry[0]] ) )
 222+ $entry[0] = $this->prefixRewrites[$source][$entry[0]];
 223+
 224+ if ( !array_key_exists( "iw_prefix", $entry ) ) {
 225+ $entry = array( "iw_prefix" => $entry[0], "iw_url" => $entry[1], "iw_local" => $entry[2] );
 226+ }
 227+ if ( array_key_exists( $source, $this->prefixRewrites ) &&
 228+ array_key_exists( $entry['iw_prefix'], $this->prefixRewrites[$source] ) ) {
 229+ $entry['iw_prefix'] = $this->prefixRewrites[$source][$entry['iw_prefix']];
 230+ }
 231+
 232+ if ( $this->dbFile ) {
 233+ $this->dbFile->set( "{$source}:{$entry['iw_prefix']}", trim( "{$entry['iw_local']} {$entry['iw_url']}" ) );
 234+ } else {
 235+ $this->output( "{$source}:{$entry['iw_prefix']} {$entry['iw_url']} {$entry['iw_local']}\n" );
 236+ }
195237 }
196238 }
197239
198 -function makeLink( $entry, $source ) {
199 - global $prefixRewrites, $dbFile;
200 - if ( isset( $prefixRewrites[$source] ) && isset( $prefixRewrites[$source][$entry[0]] ) )
201 - $entry[0] = $prefixRewrites[$source][$entry[0]];
202 - if ( !array_key_exists( "iw_prefix", $entry ) )
203 - $entry = array( "iw_prefix" => $entry[0], "iw_url" => $entry[1], "iw_local" => $entry[2] );
204 - if ( array_key_exists( $source, $prefixRewrites ) &&
205 - array_key_exists( $entry['iw_prefix'], $prefixRewrites[$source] ) )
206 - $entry['iw_prefix'] = $prefixRewrites[$source][$entry['iw_prefix']];
207 - if ( $dbFile )
208 - $dbFile->set( "{$source}:{$entry['iw_prefix']}", trim( "{$entry['iw_local']} {$entry['iw_url']}" ) );
209 - else
210 - print "{$source}:{$entry['iw_prefix']} {$entry['iw_url']} {$entry['iw_local']}\n";
 240+$maintClass = "DumpInterwiki";
 241+require_once( DO_MAINTENANCE );
211242
212 - }

Follow-up revisions

RevisionCommit summaryAuthorDate
r72396Follow up r72394. Remove global line.platonides21:35, 4 September 2010

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r19198use full specials list; fixes bug 3529brion05:54, 13 January 2007

Status & tagging log