r72389 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r72388‎ | r72389 | r72390 >
Date:19:15, 4 September 2010
Author:platonides
Status:deferred (Comments)
Tags:
Comment:
Port rebuildInterwiki to the new Maintenance system.
Modified paths:
  • /trunk/phase3/maintenance/rebuildInterwiki.inc (deleted) (history)
  • /trunk/phase3/maintenance/rebuildInterwiki.php (replaced) (history)

Diff [purge]

Index: trunk/phase3/maintenance/rebuildInterwiki.inc
@@ -1,259 +0,0 @@
2 -<?php
3 -/**
4 - * Rebuild interwiki table using the file on meta and the language list
5 - * Wikimedia specific!
6 - *
7 - * @file
8 - * @todo document
9 - * @ingroup Maintenance
10 - * @ingroup Wikimedia
11 - */
12 -
13 -/**
14 - * @todo document
15 - * @ingroup Maintenance
16 - */
17 -class Site {
18 - var $suffix, $lateral, $url;
19 -
20 - function __construct( $s, $l, $u ) {
21 - $this->suffix = $s;
22 - $this->lateral = $l;
23 - $this->url = $u;
24 - }
25 -
26 - function getURL( $lang ) {
27 - $xlang = str_replace( '_', '-', $lang );
28 - return "http://$xlang.{$this->url}/wiki/\$1";
29 - }
30 -}
31 -
32 -function makeInterwikiSQL( $destDir ) {
33 - global $langlist, $languageAliases, $prefixRewrites;
34 -
35 - # Multi-language sites
36 - # db suffix => db suffix, iw prefix, hostname
37 - $sites = array(
38 - 'wiki' => new Site( 'wiki', 'w', 'wikipedia.org' ),
39 - 'wiktionary' => new Site( 'wiktionary', 'wikt', 'wiktionary.org' ),
40 - 'wikiquote' => new Site( 'wikiquote', 'q', 'wikiquote.org' ),
41 - 'wikibooks' => new Site( 'wikibooks', 'b', 'wikibooks.org' ),
42 - 'wikinews' => new Site( 'wikinews', 'n', 'wikinews.org' ),
43 - 'wikisource' => new Site( 'wikisource', 's', 'wikisource.org' ),
44 - 'wikimedia' => new Site( 'wikimedia', 'chapter', 'wikimedia.org' ),
45 - 'wikiversity' => new Site( 'wikiversity', 'v', 'wikiversity.org' ),
46 - );
47 -
48 - # List of language prefixes likely to be found in multi-language sites
49 - $langlist = array_map( "trim", file( "/home/wikipedia/common/langlist" ) );
50 -
51 - # List of all database names
52 - $dblist = array_map( "trim", file( "/home/wikipedia/common/all.dblist" ) );
53 -
54 - # Special-case hostnames
55 - $specials = array(
56 - 'sourceswiki' => 'sources.wikipedia.org',
57 - 'quotewiki' => 'wikiquote.org',
58 - 'textbookwiki' => 'wikibooks.org',
59 - 'sep11wiki' => 'sep11.wikipedia.org',
60 - 'metawiki' => 'meta.wikimedia.org',
61 - 'commonswiki' => 'commons.wikimedia.org',
62 - 'specieswiki' => 'species.wikimedia.org',
63 - );
64 -
65 - # Extra interwiki links that can't be in the intermap for some reason
66 - $extraLinks = array(
67 - array( 'm', 'http://meta.wikimedia.org/wiki/$1', 1 ),
68 - array( 'meta', 'http://meta.wikimedia.org/wiki/$1', 1 ),
69 - array( 'sep11', 'http://sep11.wikipedia.org/wiki/$1', 1 ),
70 - );
71 -
72 - # Language aliases, usually configured as redirects to the real wiki in apache
73 - # Interlanguage links are made directly to the real wiki
74 - # Something horrible happens if you forget to list an alias here, I can't
75 - # remember what
76 - $languageAliases = array(
77 - 'zh-cn' => 'zh',
78 - 'zh-tw' => 'zh',
79 - 'dk' => 'da',
80 - 'nb' => 'no',
81 - );
82 -
83 - # Special case prefix rewrites, for the benefit of Swedish which uses s:t
84 - # as an abbreviation for saint
85 - $prefixRewrites = array(
86 - 'svwiki' => array( 's' => 'src' ),
87 - );
88 -
89 - # Construct a list of reserved prefixes
90 - $reserved = array();
91 - foreach ( $langlist as $lang ) {
92 - $reserved[$lang] = 1;
93 - }
94 - foreach ( $languageAliases as $alias => $lang ) {
95 - $reserved[$alias] = 1;
96 - }
97 - foreach ( $sites as $site ) {
98 - $reserved[$site->lateral] = 1;
99 - }
100 -
101 - # Extract the intermap from meta
102 - $intermap = Http::get( 'http://meta.wikimedia.org/w/index.php?title=Interwiki_map&action=raw', 30 );
103 - $lines = array_map( 'trim', explode( "\n", trim( $intermap ) ) );
104 -
105 - if ( !$lines || count( $lines ) < 2 ) {
106 - wfDie( "m:Interwiki_map not found" );
107 - }
108 -
109 - $iwArray = array();
110 -
111 - foreach ( $lines as $line ) {
112 - $matches = array();
113 - if ( preg_match( '/^\|\s*(.*?)\s*\|\|\s*(https?:\/\/.*?)\s*$/', $line, $matches ) ) {
114 - $prefix = strtolower( $matches[1] );
115 - $url = $matches[2];
116 - if ( preg_match( '/(wikipedia|wiktionary|wikisource|wikiquote|wikibooks|wikimedia)\.org/', $url ) ) {
117 - $local = 1;
118 - } else {
119 - $local = 0;
120 - }
121 -
122 - if ( empty( $reserved[$prefix] ) ) {
123 - $iwArray[$prefix] = array( "iw_prefix" => $prefix, "iw_url" => $url, "iw_local" => $local );
124 - }
125 - }
126 - }
127 -
128 -
129 -
130 - foreach ( $dblist as $db ) {
131 - $sql = "-- Generated by rebuildInterwiki.php";
132 - if ( isset( $specials[$db] ) ) {
133 - # Special wiki
134 - # Has interwiki links and interlanguage links to wikipedia
135 -
136 - $host = $specials[$db];
137 - $sql .= "\n--$host\n\n";
138 - $sql .= "USE $db;\n" .
139 - "TRUNCATE TABLE interwiki;\n" .
140 - "INSERT INTO interwiki (iw_prefix, iw_url, iw_local) VALUES \n";
141 - $first = true;
142 -
143 - # Intermap links
144 - foreach ( $iwArray as $iwEntry ) {
145 - $sql .= makeLink( $iwEntry, $first, $db );
146 - }
147 -
148 - # Links to multilanguage sites
149 - foreach ( $sites as $targetSite ) {
150 - $sql .= makeLink( array( $targetSite->lateral, $targetSite->getURL( 'en' ), 1 ), $first, $db );
151 - }
152 -
153 - # Interlanguage links to wikipedia
154 - $sql .= makeLanguageLinks( $sites['wiki'], $first, $db );
155 -
156 - # Extra links
157 - foreach ( $extraLinks as $link ) {
158 - $sql .= makeLink( $link, $first, $db );
159 - }
160 -
161 - $sql .= ";\n";
162 - } else {
163 - # Find out which site this DB belongs to
164 - $site = false;
165 - foreach ( $sites as $candidateSite ) {
166 - $suffix = $candidateSite->suffix;
167 - if ( preg_match( "/(.*)$suffix$/", $db, $matches ) ) {
168 - $site = $candidateSite;
169 - break;
170 - }
171 - }
172 - if ( !$site ) {
173 - print "Invalid database $db\n";
174 - continue;
175 - }
176 - $lang = $matches[1];
177 - $host = "$lang." . $site->url;
178 - $sql .= "\n--$host\n\n";
179 -
180 - $sql .= "USE $db;\n" .
181 - "TRUNCATE TABLE interwiki;\n" .
182 - "INSERT INTO interwiki (iw_prefix,iw_url,iw_local) VALUES\n";
183 - $first = true;
184 -
185 - # Intermap links
186 - foreach ( $iwArray as $iwEntry ) {
187 - # Suppress links with the same name as the site
188 - if ( ( $suffix == 'wiki' && $iwEntry['iw_prefix'] != 'wikipedia' ) ||
189 - ( $suffix != 'wiki' && $suffix != $iwEntry['iw_prefix'] ) )
190 - {
191 - $sql .= makeLink( $iwEntry, $first, $db );
192 - }
193 - }
194 -
195 - # Lateral links
196 - foreach ( $sites as $targetSite ) {
197 - # Suppress link to self
198 - if ( $targetSite->suffix != $site->suffix ) {
199 - $sql .= makeLink( array( $targetSite->lateral, $targetSite->getURL( $lang ), 1 ), $first, $db );
200 - }
201 - }
202 -
203 - # Interlanguage links
204 - $sql .= makeLanguageLinks( $site, $first, $db );
205 -
206 - # w link within wikipedias
207 - # Other sites already have it as a lateral link
208 - if ( $site->suffix == "wiki" ) {
209 - $sql .= makeLink( array( "w", "http://en.wikipedia.org/wiki/$1", 1 ), $first, $db );
210 - }
211 -
212 - # Extra links
213 - foreach ( $extraLinks as $link ) {
214 - $sql .= makeLink( $link, $first, $db );
215 - }
216 - $sql .= ";\n";
217 - }
218 - file_put_contents( "$destDir/$db.sql", $sql );
219 - }
220 -}
221 -
222 -# ------------------------------------------------------------------------------------------
223 -
224 -# Returns part of an INSERT statement, corresponding to all interlanguage links to a particular site
225 -function makeLanguageLinks( &$site, &$first, $source ) {
226 - global $langlist, $languageAliases;
227 -
228 - $sql = "";
229 -
230 - # Actual languages with their own databases
231 - foreach ( $langlist as $targetLang ) {
232 - $sql .= makeLink( array( $targetLang, $site->getURL( $targetLang ), 1 ), $first, $source );
233 - }
234 -
235 - # Language aliases
236 - foreach ( $languageAliases as $alias => $lang ) {
237 - $sql .= makeLink( array( $alias, $site->getURL( $lang ), 1 ), $first, $source );
238 - }
239 - return $sql;
240 -}
241 -
242 -# Make SQL for a single link from an array
243 -function makeLink( $entry, &$first, $source ) {
244 - global $prefixRewrites;
245 -
246 - if ( isset( $prefixRewrites[$source] ) && isset( $prefixRewrites[$source][$entry[0]] ) ) {
247 - $entry[0] = $prefixRewrites[$source][$entry[0]];
248 - }
249 -
250 - $sql = "";
251 - # Add comma
252 - if ( $first ) {
253 - $first = false;
254 - } else {
255 - $sql .= ",\n";
256 - }
257 - $dbr = wfGetDB( DB_SLAVE );
258 - $sql .= "(" . $dbr->makeList( $entry ) . ")";
259 - return $sql;
260 -}
Index: trunk/phase3/maintenance/rebuildInterwiki.php
@@ -1,29 +0,0 @@
2 -<?php
3 -/**
4 - * Rebuild interwiki table using the file on meta and the language list
5 - * Wikimedia specific!
6 - *
7 - * @file
8 - * @todo document
9 - * @ingroup Maintenance
10 - * @ingroup Wikimedia
11 - */
12 -
13 -/** */
14 -$oldCwd = getcwd();
15 -
16 -$optionsWithArgs = array( "d" );
17 -require_once( dirname( __FILE__ ) . '/commandLine.inc' );
18 -require( "rebuildInterwiki.inc" );
19 -chdir( $oldCwd );
20 -
21 -# Output
22 -if ( isset( $options['d'] ) ) {
23 - $destDir = $options['d'];
24 -} else {
25 - $destDir = '/home/wikipedia/conf/interwiki/sql';
26 -}
27 -
28 -echo "Making new interwiki SQL files in $destDir\n";
29 -makeInterwikiSQL( $destDir );
30 -
Index: trunk/phase3/maintenance/rebuildInterwiki.php
@@ -0,0 +1,274 @@
 2+<?php
 3+/**
 4+ * Rebuild interwiki table using the file on meta and the language list
 5+ * Wikimedia specific!
 6+ *
 7+ * @file
 8+ * @todo document
 9+ * @ingroup Maintenance
 10+ * @ingroup Wikimedia
 11+ */
 12+
 13+/**
 14+ * @todo document
 15+ * @ingroup Maintenance
 16+ */
 17+class Site {
 18+ var $suffix, $lateral, $url;
 19+
 20+ function __construct( $s, $l, $u ) {
 21+ $this->suffix = $s;
 22+ $this->lateral = $l;
 23+ $this->url = $u;
 24+ }
 25+
 26+ function getURL( $lang ) {
 27+ $xlang = str_replace( '_', '-', $lang );
 28+ return "http://$xlang.{$this->url}/wiki/\$1";
 29+ }
 30+}
 31+
 32+require_once( dirname( __FILE__ ) . '/Maintenance.php' );
 33+
 34+class RebuildInterwiki extends Maintenance {
 35+ public function __construct() {
 36+ parent::__construct();
 37+ $this->mDescription = "Protect or unprotect an article from the command line.";
 38+ $this->addOption( 'langlist', 'File with one language code per line', false, true );
 39+ $this->addOption( 'dblist', 'File with one db per line', false, true );
 40+ $this->addOption( 'd', 'Output folder', false, true );
 41+ }
 42+
 43+ function execute() {
 44+ # List of language prefixes likely to be found in multi-language sites
 45+ $this->langlist = array_map( "trim", file( $this->getOption( 'langlist', "/home/wikipedia/common/langlist" ) ) );
 46+
 47+ # List of all database names
 48+ $this->dblist = array_map( "trim", file( $this->getOption( 'dblist', "/home/wikipedia/common/all.dblist" ) ) );
 49+
 50+ $this->makeInterwikiSQL( $this->getOption( 'd', '/home/wikipedia/conf/interwiki/sql' ) );
 51+ }
 52+
 53+ function makeInterwikiSQL( $destDir ) {
 54+ $this->output( "Making new interwiki SQL files in $destDir\n" );
 55+
 56+ # Multi-language sites
 57+ # db suffix => db suffix, iw prefix, hostname
 58+ $sites = array(
 59+ 'wiki' => new Site( 'wiki', 'w', 'wikipedia.org' ),
 60+ 'wiktionary' => new Site( 'wiktionary', 'wikt', 'wiktionary.org' ),
 61+ 'wikiquote' => new Site( 'wikiquote', 'q', 'wikiquote.org' ),
 62+ 'wikibooks' => new Site( 'wikibooks', 'b', 'wikibooks.org' ),
 63+ 'wikinews' => new Site( 'wikinews', 'n', 'wikinews.org' ),
 64+ 'wikisource' => new Site( 'wikisource', 's', 'wikisource.org' ),
 65+ 'wikimedia' => new Site( 'wikimedia', 'chapter', 'wikimedia.org' ),
 66+ 'wikiversity' => new Site( 'wikiversity', 'v', 'wikiversity.org' ),
 67+ );
 68+
 69+ # Special-case hostnames
 70+ $specials = array(
 71+ 'sourceswiki' => 'sources.wikipedia.org',
 72+ 'quotewiki' => 'wikiquote.org',
 73+ 'textbookwiki' => 'wikibooks.org',
 74+ 'sep11wiki' => 'sep11.wikipedia.org',
 75+ 'metawiki' => 'meta.wikimedia.org',
 76+ 'commonswiki' => 'commons.wikimedia.org',
 77+ 'specieswiki' => 'species.wikimedia.org',
 78+ );
 79+
 80+ # Extra interwiki links that can't be in the intermap for some reason
 81+ $extraLinks = array(
 82+ array( 'm', 'http://meta.wikimedia.org/wiki/$1', 1 ),
 83+ array( 'meta', 'http://meta.wikimedia.org/wiki/$1', 1 ),
 84+ array( 'sep11', 'http://sep11.wikipedia.org/wiki/$1', 1 ),
 85+ );
 86+
 87+ # Language aliases, usually configured as redirects to the real wiki in apache
 88+ # Interlanguage links are made directly to the real wiki
 89+ # Something horrible happens if you forget to list an alias here, I can't
 90+ # remember what
 91+ $this->languageAliases = array(
 92+ 'zh-cn' => 'zh',
 93+ 'zh-tw' => 'zh',
 94+ 'dk' => 'da',
 95+ 'nb' => 'no',
 96+ );
 97+
 98+ # Special case prefix rewrites, for the benefit of Swedish which uses s:t
 99+ # as an abbreviation for saint
 100+ $this->prefixRewrites = array(
 101+ 'svwiki' => array( 's' => 'src' ),
 102+ );
 103+
 104+ # Construct a list of reserved prefixes
 105+ $reserved = array();
 106+ foreach ( $this->langlist as $lang ) {
 107+ $reserved[$lang] = 1;
 108+ }
 109+ foreach ( $this->languageAliases as $alias => $lang ) {
 110+ $reserved[$alias] = 1;
 111+ }
 112+ foreach ( $sites as $site ) {
 113+ $reserved[$site->lateral] = 1;
 114+ }
 115+
 116+ # Extract the intermap from meta
 117+ $intermap = Http::get( 'http://meta.wikimedia.org/w/index.php?title=Interwiki_map&action=raw', 30 );
 118+ $lines = array_map( 'trim', explode( "\n", trim( $intermap ) ) );
 119+
 120+ if ( !$lines || count( $lines ) < 2 ) {
 121+ wfDie( "m:Interwiki_map not found" );
 122+ }
 123+
 124+ $iwArray = array();
 125+
 126+ foreach ( $lines as $line ) {
 127+ $matches = array();
 128+ if ( preg_match( '/^\|\s*(.*?)\s*\|\|\s*(https?:\/\/.*?)\s*$/', $line, $matches ) ) {
 129+ $prefix = strtolower( $matches[1] );
 130+ $url = $matches[2];
 131+ if ( preg_match( '/(wikipedia|wiktionary|wikisource|wikiquote|wikibooks|wikimedia)\.org/', $url ) ) {
 132+ $local = 1;
 133+ } else {
 134+ $local = 0;
 135+ }
 136+
 137+ if ( empty( $reserved[$prefix] ) ) {
 138+ $iwArray[$prefix] = array( "iw_prefix" => $prefix, "iw_url" => $url, "iw_local" => $local );
 139+ }
 140+ }
 141+ }
 142+
 143+ foreach ( $this->dblist as $db ) {
 144+ $sql = "-- Generated by rebuildInterwiki.php";
 145+ if ( isset( $specials[$db] ) ) {
 146+ # Special wiki
 147+ # Has interwiki links and interlanguage links to wikipedia
 148+
 149+ $host = $specials[$db];
 150+ $sql .= "\n--$host\n\n";
 151+ $sql .= "USE $db;\n" .
 152+ "TRUNCATE TABLE interwiki;\n" .
 153+ "INSERT INTO interwiki (iw_prefix, iw_url, iw_local) VALUES \n";
 154+ $first = true;
 155+
 156+ # Intermap links
 157+ foreach ( $iwArray as $iwEntry ) {
 158+ $sql .= $this->makeLink( $iwEntry, $first, $db );
 159+ }
 160+
 161+ # Links to multilanguage sites
 162+ foreach ( $sites as $targetSite ) {
 163+ $sql .= $this->makeLink( array( $targetSite->lateral, $targetSite->getURL( 'en' ), 1 ), $first, $db );
 164+ }
 165+
 166+ # Interlanguage links to wikipedia
 167+ $sql .= $this->makeLanguageLinks( $sites['wiki'], $first, $db );
 168+
 169+ # Extra links
 170+ foreach ( $extraLinks as $link ) {
 171+ $sql .= $this->makeLink( $link, $first, $db );
 172+ }
 173+
 174+ $sql .= ";\n";
 175+ } else {
 176+ # Find out which site this DB belongs to
 177+ $site = false;
 178+ foreach ( $sites as $candidateSite ) {
 179+ $suffix = $candidateSite->suffix;
 180+ if ( preg_match( "/(.*)$suffix$/", $db, $matches ) ) {
 181+ $site = $candidateSite;
 182+ break;
 183+ }
 184+ }
 185+ if ( !$site ) {
 186+ print "Invalid database $db\n";
 187+ continue;
 188+ }
 189+ $lang = $matches[1];
 190+ $host = "$lang." . $site->url;
 191+ $sql .= "\n--$host\n\n";
 192+
 193+ $sql .= "USE $db;\n" .
 194+ "TRUNCATE TABLE interwiki;\n" .
 195+ "INSERT INTO interwiki (iw_prefix,iw_url,iw_local) VALUES\n";
 196+ $first = true;
 197+
 198+ # Intermap links
 199+ foreach ( $iwArray as $iwEntry ) {
 200+ # Suppress links with the same name as the site
 201+ if ( ( $suffix == 'wiki' && $iwEntry['iw_prefix'] != 'wikipedia' ) ||
 202+ ( $suffix != 'wiki' && $suffix != $iwEntry['iw_prefix'] ) )
 203+ {
 204+ $sql .= $this->makeLink( $iwEntry, $first, $db );
 205+ }
 206+ }
 207+
 208+ # Lateral links
 209+ foreach ( $sites as $targetSite ) {
 210+ # Suppress link to self
 211+ if ( $targetSite->suffix != $site->suffix ) {
 212+ $sql .= $this->makeLink( array( $targetSite->lateral, $targetSite->getURL( $lang ), 1 ), $first, $db );
 213+ }
 214+ }
 215+
 216+ # Interlanguage links
 217+ $sql .= $this->makeLanguageLinks( $site, $first, $db );
 218+
 219+ # w link within wikipedias
 220+ # Other sites already have it as a lateral link
 221+ if ( $site->suffix == "wiki" ) {
 222+ $sql .= $this->makeLink( array( "w", "http://en.wikipedia.org/wiki/$1", 1 ), $first, $db );
 223+ }
 224+
 225+ # Extra links
 226+ foreach ( $extraLinks as $link ) {
 227+ $sql .= $this->makeLink( $link, $first, $db );
 228+ }
 229+ $sql .= ";\n";
 230+ }
 231+ file_put_contents( "$destDir/$db.sql", $sql );
 232+ }
 233+ }
 234+
 235+ # ------------------------------------------------------------------------------------------
 236+
 237+ # Returns part of an INSERT statement, corresponding to all interlanguage links to a particular site
 238+ function makeLanguageLinks( &$site, &$first, $source ) {
 239+ $sql = "";
 240+
 241+ # Actual languages with their own databases
 242+ foreach ( $this->langlist as $targetLang ) {
 243+ $sql .= $this->makeLink( array( $targetLang, $site->getURL( $targetLang ), 1 ), $first, $source );
 244+ }
 245+
 246+ # Language aliases
 247+ foreach ( $this->languageAliases as $alias => $lang ) {
 248+ $sql .= $this->makeLink( array( $alias, $site->getURL( $lang ), 1 ), $first, $source );
 249+ }
 250+ return $sql;
 251+ }
 252+
 253+ # Make SQL for a single link from an array
 254+ function makeLink( $entry, &$first, $source ) {
 255+
 256+ if ( isset( $this->prefixRewrites[$source] ) && isset($entry[0]) && isset( $this->prefixRewrites[$source][$entry[0]] ) ) {
 257+ $entry[0] = $this->prefixRewrites[$source][$entry[0]];
 258+ }
 259+
 260+ $sql = "";
 261+ # Add comma
 262+ if ( $first ) {
 263+ $first = false;
 264+ } else {
 265+ $sql .= ",\n";
 266+ }
 267+ $dbr = wfGetDB( DB_SLAVE );
 268+ $sql .= "(" . $dbr->makeList( $entry ) . ")";
 269+ return $sql;
 270+ }
 271+}
 272+
 273+$maintClass = "RebuildInterwiki";
 274+require_once( DO_MAINTENANCE );
 275+
Property changes on: trunk/phase3/maintenance/rebuildInterwiki.php
___________________________________________________________________
Added: svn:eol-style
1276 + native
Added: svn:keywords
2277 + Author Date Id Revision

Comments

#Comment by Platonides (talk | contribs)   19:53, 4 September 2010

It didn't produced a nice diff. :(

You should compare ignoring whitespaces the old rebuildInterwiki.inc with the new rebuildInterwiki.php

Other than the change from global to properties and functions to methods, the file locations have been added as parameters (so it could be used outside wikimedia) and added the isset($entry[0]) in makeLink() because it's not always set (it may be an $iwEntry) and it produced a notive.

Status & tagging log