r72395 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r72394‎ | r72395 | r72396 >
Date:21:33, 4 September 2010
Author:platonides
Status:ok
Tags:
Comment:
Move dumpInterwiki.inc over dumpInterwiki.php
Modified paths:
  • /trunk/phase3/maintenance/dumpInterwiki.inc (deleted) (history)
  • /trunk/phase3/maintenance/dumpInterwiki.php (replaced) (history)

Diff [purge]

Index: trunk/phase3/maintenance/dumpInterwiki.inc
@@ -1,241 +0,0 @@
2 -<?php
3 -/**
4 - * Build constant slightly compact database of interwiki prefixes
5 - * Wikimedia specific!
6 - *
7 - * @file
8 - * @todo document
9 - * @ingroup Maintenance
10 - * @ingroup Wikimedia
11 - */
12 -
13 -/**
14 - * @todo document
15 - * @ingroup Maintenance
16 - */
17 -class Site {
18 - var $suffix, $lateral, $url;
19 -
20 - function __construct( $s, $l, $u ) {
21 - $this->suffix = $s;
22 - $this->lateral = $l;
23 - $this->url = $u;
24 - }
25 -
26 - function getURL( $lang ) {
27 - $xlang = str_replace( '_', '-', $lang );
28 - return "http://$xlang.{$this->url}/wiki/\$1";
29 - }
30 -}
31 -
32 -require_once( dirname( __FILE__ ) . '/Maintenance.php' );
33 -
34 -class DumpInterwiki extends Maintenance {
35 -
36 - public function __construct() {
37 - parent::__construct();
38 - $this->mDescription = "Build constant slightly compact database of interwiki prefixes.";
39 - $this->addOption( 'langlist', 'File with one language code per line', false, true );
40 - $this->addOption( 'dblist', 'File with one db per line', false, true );
41 - $this->addOption( 'specialdbs', "File with one 'special' db per line", false, true );
42 - $this->addOption( 'o', 'Cdb output file', false, true );
43 - }
44 -
45 - function execute() {
46 - # List of language prefixes likely to be found in multi-language sites
47 - $this->langlist = array_map( "trim", file( $this->getOption( 'langlist', "/home/wikipedia/common/langlist" ) ) );
48 -
49 - # List of all database names
50 - $this->dblist = array_map( "trim", file( $this->getOption( 'dblist', "/home/wikipedia/common/all.dblist" ) ) );
51 -
52 - # Special-case databases
53 - $this->specials = array_flip( array_map( "trim", file( $this->getOption( 'specialdbs', "/home/wikipedia/common/special.dblist" ) ) ) );
54 -
55 - if ( $this->hasOption( 'o' ) ) {
56 - $this->dbFile = CdbWriter::open( $this->getOption( 'o' ) ) ;
57 - } else {
58 - $this->dbFile = false;
59 - }
60 -
61 - $this->getRebuildInterwikiDump();
62 - }
63 -
64 - function getRebuildInterwikiDump() {
65 - global $wgContLang;
66 -
67 - # Multi-language sites
68 - # db suffix => db suffix, iw prefix, hostname
69 - $sites = array(
70 - 'wiki' => new Site( 'wiki', 'w', 'wikipedia.org' ),
71 - 'wiktionary' => new Site( 'wiktionary', 'wikt', 'wiktionary.org' ),
72 - 'wikiquote' => new Site( 'wikiquote', 'q', 'wikiquote.org' ),
73 - 'wikibooks' => new Site( 'wikibooks', 'b', 'wikibooks.org' ),
74 - 'wikinews' => new Site( 'wikinews', 'n', 'wikinews.org' ),
75 - 'wikisource' => new Site( 'wikisource', 's', 'wikisource.org' ),
76 - 'wikimedia' => new Site( 'wikimedia', 'chapter', 'wikimedia.org' ),
77 - 'wikiversity' => new Site( 'wikiversity', 'v', 'wikiversity.org' ),
78 - );
79 -
80 - # Extra interwiki links that can't be in the intermap for some reason
81 - $extraLinks = array(
82 - array( 'm', 'http://meta.wikimedia.org/wiki/$1', 1 ),
83 - array( 'meta', 'http://meta.wikimedia.org/wiki/$1', 1 ),
84 - array( 'sep11', 'http://sep11.wikipedia.org/wiki/$1', 1 ),
85 - );
86 -
87 - # Language aliases, usually configured as redirects to the real wiki in apache
88 - # Interlanguage links are made directly to the real wiki
89 - # Something horrible happens if you forget to list an alias here, I can't
90 - # remember what
91 - $this->languageAliases = array(
92 - 'zh-cn' => 'zh',
93 - 'zh-tw' => 'zh',
94 - 'dk' => 'da',
95 - 'nb' => 'no',
96 - );
97 -
98 - # Special case prefix rewrites, for the benefit of Swedish which uses s:t
99 - # as an abbreviation for saint
100 - $this->prefixRewrites = array(
101 - 'svwiki' => array( 's' => 'src' ),
102 - );
103 -
104 - # Construct a list of reserved prefixes
105 - $reserved = array();
106 - foreach ( $this->langlist as $lang ) {
107 - $reserved[$lang] = 1;
108 - }
109 - foreach ( $this->languageAliases as $alias => $lang ) {
110 - $reserved[$alias] = 1;
111 - }
112 - foreach ( $sites as $site ) {
113 - $reserved[$site->lateral] = 1;
114 - }
115 -
116 - # Extract the intermap from meta
117 - $intermap = Http::get( 'http://meta.wikimedia.org/w/index.php?title=Interwiki_map&action=raw', 30 );
118 - $lines = array_map( 'trim', explode( "\n", trim( $intermap ) ) );
119 -
120 - if ( !$lines || count( $lines ) < 2 ) {
121 - $this->error( "m:Interwiki_map not found", true );
122 - }
123 -
124 - # Global iterwiki map
125 - foreach ( $lines as $line ) {
126 - if ( preg_match( '/^\|\s*(.*?)\s*\|\|\s*(.*?)\s*$/', $line, $matches ) ) {
127 - $prefix = $wgContLang->lc( $matches[1] );
128 - $prefix = str_replace( ' ', '_', $prefix );
129 - $prefix = strtolower( $matches[1] );
130 - $url = $matches[2];
131 - if ( preg_match( '/(wikipedia|wiktionary|wikisource|wikiquote|wikibooks|wikimedia)\.org/', $url ) ) {
132 - $local = 1;
133 - } else {
134 - $local = 0;
135 - }
136 -
137 - if ( empty( $reserved[$prefix] ) ) {
138 - $imap = array( "iw_prefix" => $prefix, "iw_url" => $url, "iw_local" => $local );
139 - $this->makeLink ( $imap, "__global" );
140 - }
141 - }
142 - }
143 -
144 - # Exclude Wikipedia for Wikipedia
145 - $this->makeLink ( array ( 'iw_prefix' => 'wikipedia', 'is_url' => null ), "_wiki" );
146 -
147 - # Multilanguage sites
148 - foreach ( $sites as $site ) {
149 - $this->makeLanguageLinks ( $site, "_" . $site->suffix );
150 - }
151 -
152 -
153 - foreach ( $dblist as $db ) {
154 - if ( isset( $this->specials[$db] ) ) {
155 - # Special wiki
156 - # Has interwiki links and interlanguage links to wikipedia
157 -
158 - $this->makeLink( array( 'iw_prefix' => $db, 'iw_url' => "wiki" ), "__sites" );
159 - # Links to multilanguage sites
160 - foreach ( $sites as $targetSite ) {
161 - $this->makeLink( array( 'iw_prefix' => $targetSite->lateral,
162 - 'iw_url' => $targetSite->getURL( 'en' ),
163 - 'iw_local' => 1 ), $db );
164 - }
165 - } else {
166 - # Find out which site this DB belongs to
167 - $site = false;
168 - foreach ( $sites as $candidateSite ) {
169 - $suffix = $candidateSite->suffix;
170 - if ( preg_match( "/(.*)$suffix$/", $db, $matches ) ) {
171 - $site = $candidateSite;
172 - break;
173 - }
174 - }
175 - $this->makeLink( array( 'iw_prefix' => $db, 'iw_url' => $site->suffix ), "__sites" );
176 - if ( !$site ) {
177 - $this->error( "Invalid database $db\n" );
178 - continue;
179 - }
180 - $lang = $matches[1];
181 -
182 - # Lateral links
183 - foreach ( $sites as $targetSite ) {
184 - if ( $targetSite->suffix != $site->suffix ) {
185 - $this->makeLink( array( 'iw_prefix' => $targetSite->lateral,
186 - 'iw_url' => $targetSite->getURL( $lang ),
187 - 'iw_local' => 1 ), $db );
188 - }
189 - }
190 -
191 - if ( $site->suffix == "wiki" ) {
192 - $this->makeLink( array( 'iw_prefix' => 'w',
193 - 'iw_url' => "http://en.wikipedia.org/wiki/$1",
194 - 'iw_local' => 1 ), $db );
195 - }
196 -
197 - }
198 - }
199 - foreach ( $extraLinks as $link ) {
200 - $this->makeLink( $link, "__global" );
201 - }
202 - }
203 -
204 - # ------------------------------------------------------------------------------------------
205 -
206 - # Executes part of an INSERT statement, corresponding to all interlanguage links to a particular site
207 - function makeLanguageLinks( &$site, $source ) {
208 - # Actual languages with their own databases
209 - foreach ( $this->langlist as $targetLang ) {
210 - $this->makeLink( array( $targetLang, $site->getURL( $targetLang ), 1 ), $source );
211 - }
212 -
213 - # Language aliases
214 - foreach ( $this->languageAliases as $alias => $lang ) {
215 - $this->makeLink( array( $alias, $site->getURL( $lang ), 1 ), $source );
216 - }
217 - }
218 -
219 - function makeLink( $entry, $source ) {
220 - global $prefixRewrites, $dbFile;
221 - if ( isset( $this->prefixRewrites[$source] ) && isset( $this->prefixRewrites[$source][$entry[0]] ) )
222 - $entry[0] = $this->prefixRewrites[$source][$entry[0]];
223 -
224 - if ( !array_key_exists( "iw_prefix", $entry ) ) {
225 - $entry = array( "iw_prefix" => $entry[0], "iw_url" => $entry[1], "iw_local" => $entry[2] );
226 - }
227 - if ( array_key_exists( $source, $this->prefixRewrites ) &&
228 - array_key_exists( $entry['iw_prefix'], $this->prefixRewrites[$source] ) ) {
229 - $entry['iw_prefix'] = $this->prefixRewrites[$source][$entry['iw_prefix']];
230 - }
231 -
232 - if ( $this->dbFile ) {
233 - $this->dbFile->set( "{$source}:{$entry['iw_prefix']}", trim( "{$entry['iw_local']} {$entry['iw_url']}" ) );
234 - } else {
235 - $this->output( "{$source}:{$entry['iw_prefix']} {$entry['iw_url']} {$entry['iw_local']}\n" );
236 - }
237 - }
238 -}
239 -
240 -$maintClass = "DumpInterwiki";
241 -require_once( DO_MAINTENANCE );
242 -
Index: trunk/phase3/maintenance/dumpInterwiki.php
@@ -1,6 +1,6 @@
22 <?php
33 /**
4 - * Rebuild interwiki table using the file on meta and the language list
 4+ * Build constant slightly compact database of interwiki prefixes
55 * Wikimedia specific!
66 *
77 * @file
@@ -9,19 +9,233 @@
1010 * @ingroup Wikimedia
1111 */
1212
13 -/** */
14 -$oldCwd = getcwd();
 13+/**
 14+ * @todo document
 15+ * @ingroup Maintenance
 16+ */
 17+class Site {
 18+ var $suffix, $lateral, $url;
1519
16 -$optionsWithArgs = array( "o" );
17 -require_once( dirname( __FILE__ ) . '/commandLine.inc' );
18 -require( dirname( __FILE__ ) . "/dumpInterwiki.inc" );
19 -chdir( $oldCwd );
 20+ function __construct( $s, $l, $u ) {
 21+ $this->suffix = $s;
 22+ $this->lateral = $l;
 23+ $this->url = $u;
 24+ }
2025
21 -# Output
22 -if ( isset( $options['o'] ) ) {
23 - # To database specified with -o
24 - $dbFile = CdbWriter::open( $options['o'] );
 26+ function getURL( $lang ) {
 27+ $xlang = str_replace( '_', '-', $lang );
 28+ return "http://$xlang.{$this->url}/wiki/\$1";
 29+ }
2530 }
2631
27 -getRebuildInterwikiDump();
 32+require_once( dirname( __FILE__ ) . '/Maintenance.php' );
2833
 34+class DumpInterwiki extends Maintenance {
 35+
 36+ public function __construct() {
 37+ parent::__construct();
 38+ $this->mDescription = "Build constant slightly compact database of interwiki prefixes.";
 39+ $this->addOption( 'langlist', 'File with one language code per line', false, true );
 40+ $this->addOption( 'dblist', 'File with one db per line', false, true );
 41+ $this->addOption( 'specialdbs', "File with one 'special' db per line", false, true );
 42+ $this->addOption( 'o', 'Cdb output file', false, true );
 43+ }
 44+
 45+ function execute() {
 46+ # List of language prefixes likely to be found in multi-language sites
 47+ $this->langlist = array_map( "trim", file( $this->getOption( 'langlist', "/home/wikipedia/common/langlist" ) ) );
 48+
 49+ # List of all database names
 50+ $this->dblist = array_map( "trim", file( $this->getOption( 'dblist', "/home/wikipedia/common/all.dblist" ) ) );
 51+
 52+ # Special-case databases
 53+ $this->specials = array_flip( array_map( "trim", file( $this->getOption( 'specialdbs', "/home/wikipedia/common/special.dblist" ) ) ) );
 54+
 55+ if ( $this->hasOption( 'o' ) ) {
 56+ $this->dbFile = CdbWriter::open( $this->getOption( 'o' ) ) ;
 57+ } else {
 58+ $this->dbFile = false;
 59+ }
 60+
 61+ $this->getRebuildInterwikiDump();
 62+ }
 63+
 64+ function getRebuildInterwikiDump() {
 65+ global $wgContLang;
 66+
 67+ # Multi-language sites
 68+ # db suffix => db suffix, iw prefix, hostname
 69+ $sites = array(
 70+ 'wiki' => new Site( 'wiki', 'w', 'wikipedia.org' ),
 71+ 'wiktionary' => new Site( 'wiktionary', 'wikt', 'wiktionary.org' ),
 72+ 'wikiquote' => new Site( 'wikiquote', 'q', 'wikiquote.org' ),
 73+ 'wikibooks' => new Site( 'wikibooks', 'b', 'wikibooks.org' ),
 74+ 'wikinews' => new Site( 'wikinews', 'n', 'wikinews.org' ),
 75+ 'wikisource' => new Site( 'wikisource', 's', 'wikisource.org' ),
 76+ 'wikimedia' => new Site( 'wikimedia', 'chapter', 'wikimedia.org' ),
 77+ 'wikiversity' => new Site( 'wikiversity', 'v', 'wikiversity.org' ),
 78+ );
 79+
 80+ # Extra interwiki links that can't be in the intermap for some reason
 81+ $extraLinks = array(
 82+ array( 'm', 'http://meta.wikimedia.org/wiki/$1', 1 ),
 83+ array( 'meta', 'http://meta.wikimedia.org/wiki/$1', 1 ),
 84+ array( 'sep11', 'http://sep11.wikipedia.org/wiki/$1', 1 ),
 85+ );
 86+
 87+ # Language aliases, usually configured as redirects to the real wiki in apache
 88+ # Interlanguage links are made directly to the real wiki
 89+ # Something horrible happens if you forget to list an alias here, I can't
 90+ # remember what
 91+ $this->languageAliases = array(
 92+ 'zh-cn' => 'zh',
 93+ 'zh-tw' => 'zh',
 94+ 'dk' => 'da',
 95+ 'nb' => 'no',
 96+ );
 97+
 98+ # Special case prefix rewrites, for the benefit of Swedish which uses s:t
 99+ # as an abbreviation for saint
 100+ $this->prefixRewrites = array(
 101+ 'svwiki' => array( 's' => 'src' ),
 102+ );
 103+
 104+ # Construct a list of reserved prefixes
 105+ $reserved = array();
 106+ foreach ( $this->langlist as $lang ) {
 107+ $reserved[$lang] = 1;
 108+ }
 109+ foreach ( $this->languageAliases as $alias => $lang ) {
 110+ $reserved[$alias] = 1;
 111+ }
 112+ foreach ( $sites as $site ) {
 113+ $reserved[$site->lateral] = 1;
 114+ }
 115+
 116+ # Extract the intermap from meta
 117+ $intermap = Http::get( 'http://meta.wikimedia.org/w/index.php?title=Interwiki_map&action=raw', 30 );
 118+ $lines = array_map( 'trim', explode( "\n", trim( $intermap ) ) );
 119+
 120+ if ( !$lines || count( $lines ) < 2 ) {
 121+ $this->error( "m:Interwiki_map not found", true );
 122+ }
 123+
 124+ # Global iterwiki map
 125+ foreach ( $lines as $line ) {
 126+ if ( preg_match( '/^\|\s*(.*?)\s*\|\|\s*(.*?)\s*$/', $line, $matches ) ) {
 127+ $prefix = $wgContLang->lc( $matches[1] );
 128+ $prefix = str_replace( ' ', '_', $prefix );
 129+ $prefix = strtolower( $matches[1] );
 130+ $url = $matches[2];
 131+ if ( preg_match( '/(wikipedia|wiktionary|wikisource|wikiquote|wikibooks|wikimedia)\.org/', $url ) ) {
 132+ $local = 1;
 133+ } else {
 134+ $local = 0;
 135+ }
 136+
 137+ if ( empty( $reserved[$prefix] ) ) {
 138+ $imap = array( "iw_prefix" => $prefix, "iw_url" => $url, "iw_local" => $local );
 139+ $this->makeLink ( $imap, "__global" );
 140+ }
 141+ }
 142+ }
 143+
 144+ # Exclude Wikipedia for Wikipedia
 145+ $this->makeLink ( array ( 'iw_prefix' => 'wikipedia', 'is_url' => null ), "_wiki" );
 146+
 147+ # Multilanguage sites
 148+ foreach ( $sites as $site ) {
 149+ $this->makeLanguageLinks ( $site, "_" . $site->suffix );
 150+ }
 151+
 152+
 153+ foreach ( $dblist as $db ) {
 154+ if ( isset( $this->specials[$db] ) ) {
 155+ # Special wiki
 156+ # Has interwiki links and interlanguage links to wikipedia
 157+
 158+ $this->makeLink( array( 'iw_prefix' => $db, 'iw_url' => "wiki" ), "__sites" );
 159+ # Links to multilanguage sites
 160+ foreach ( $sites as $targetSite ) {
 161+ $this->makeLink( array( 'iw_prefix' => $targetSite->lateral,
 162+ 'iw_url' => $targetSite->getURL( 'en' ),
 163+ 'iw_local' => 1 ), $db );
 164+ }
 165+ } else {
 166+ # Find out which site this DB belongs to
 167+ $site = false;
 168+ foreach ( $sites as $candidateSite ) {
 169+ $suffix = $candidateSite->suffix;
 170+ if ( preg_match( "/(.*)$suffix$/", $db, $matches ) ) {
 171+ $site = $candidateSite;
 172+ break;
 173+ }
 174+ }
 175+ $this->makeLink( array( 'iw_prefix' => $db, 'iw_url' => $site->suffix ), "__sites" );
 176+ if ( !$site ) {
 177+ $this->error( "Invalid database $db\n" );
 178+ continue;
 179+ }
 180+ $lang = $matches[1];
 181+
 182+ # Lateral links
 183+ foreach ( $sites as $targetSite ) {
 184+ if ( $targetSite->suffix != $site->suffix ) {
 185+ $this->makeLink( array( 'iw_prefix' => $targetSite->lateral,
 186+ 'iw_url' => $targetSite->getURL( $lang ),
 187+ 'iw_local' => 1 ), $db );
 188+ }
 189+ }
 190+
 191+ if ( $site->suffix == "wiki" ) {
 192+ $this->makeLink( array( 'iw_prefix' => 'w',
 193+ 'iw_url' => "http://en.wikipedia.org/wiki/$1",
 194+ 'iw_local' => 1 ), $db );
 195+ }
 196+
 197+ }
 198+ }
 199+ foreach ( $extraLinks as $link ) {
 200+ $this->makeLink( $link, "__global" );
 201+ }
 202+ }
 203+
 204+ # ------------------------------------------------------------------------------------------
 205+
 206+ # Executes part of an INSERT statement, corresponding to all interlanguage links to a particular site
 207+ function makeLanguageLinks( &$site, $source ) {
 208+ # Actual languages with their own databases
 209+ foreach ( $this->langlist as $targetLang ) {
 210+ $this->makeLink( array( $targetLang, $site->getURL( $targetLang ), 1 ), $source );
 211+ }
 212+
 213+ # Language aliases
 214+ foreach ( $this->languageAliases as $alias => $lang ) {
 215+ $this->makeLink( array( $alias, $site->getURL( $lang ), 1 ), $source );
 216+ }
 217+ }
 218+
 219+ function makeLink( $entry, $source ) {
 220+ global $prefixRewrites, $dbFile;
 221+ if ( isset( $this->prefixRewrites[$source] ) && isset( $this->prefixRewrites[$source][$entry[0]] ) )
 222+ $entry[0] = $this->prefixRewrites[$source][$entry[0]];
 223+
 224+ if ( !array_key_exists( "iw_prefix", $entry ) ) {
 225+ $entry = array( "iw_prefix" => $entry[0], "iw_url" => $entry[1], "iw_local" => $entry[2] );
 226+ }
 227+ if ( array_key_exists( $source, $this->prefixRewrites ) &&
 228+ array_key_exists( $entry['iw_prefix'], $this->prefixRewrites[$source] ) ) {
 229+ $entry['iw_prefix'] = $this->prefixRewrites[$source][$entry['iw_prefix']];
 230+ }
 231+
 232+ if ( $this->dbFile ) {
 233+ $this->dbFile->set( "{$source}:{$entry['iw_prefix']}", trim( "{$entry['iw_local']} {$entry['iw_url']}" ) );
 234+ } else {
 235+ $this->output( "{$source}:{$entry['iw_prefix']} {$entry['iw_url']} {$entry['iw_local']}\n" );
 236+ }
 237+ }
 238+}
 239+
 240+$maintClass = "DumpInterwiki";
 241+require_once( DO_MAINTENANCE );
 242+

Status & tagging log