Index: trunk/phase3/maintenance/rebuildInterwiki.php |
— | — | @@ -33,7 +33,7 @@ |
34 | 34 | class RebuildInterwiki extends Maintenance { |
35 | 35 | public function __construct() { |
36 | 36 | parent::__construct(); |
37 | | - $this->mDescription = "Protect or unprotect an article from the command line."; |
| 37 | + $this->mDescription = "Rebuild the interwiki table using the file on meta and the language list."; |
38 | 38 | $this->addOption( 'langlist', 'File with one language code per line', false, true ); |
39 | 39 | $this->addOption( 'dblist', 'File with one db per line', false, true ); |
40 | 40 | $this->addOption( 'd', 'Output folder', false, true ); |
— | — | @@ -46,6 +46,9 @@ |
47 | 47 | # List of all database names |
48 | 48 | $this->dblist = array_map( "trim", file( $this->getOption( 'dblist', "/home/wikipedia/common/all.dblist" ) ) ); |
49 | 49 | |
| 50 | + # Special-case databases |
| 51 | + //$this->specials = array_flip( array_map( "trim", file( $this->getOption( 'specialdbs', "/home/wikipedia/common/special.dblist" ) ) ) ); |
| 52 | + |
50 | 53 | $this->makeInterwikiSQL( $this->getOption( 'd', '/home/wikipedia/conf/interwiki/sql' ) ); |
51 | 54 | } |
52 | 55 | |
— | — | @@ -66,7 +69,7 @@ |
67 | 70 | ); |
68 | 71 | |
69 | 72 | # Special-case hostnames |
70 | | - $specials = array( |
| 73 | + $this->specials = array( |
71 | 74 | 'sourceswiki' => 'sources.wikipedia.org', |
72 | 75 | 'quotewiki' => 'wikiquote.org', |
73 | 76 | 'textbookwiki' => 'wikibooks.org', |
— | — | @@ -117,7 +120,7 @@ |
118 | 121 | $lines = array_map( 'trim', explode( "\n", trim( $intermap ) ) ); |
119 | 122 | |
120 | 123 | if ( !$lines || count( $lines ) < 2 ) { |
121 | | - wfDie( "m:Interwiki_map not found" ); |
| 124 | + $this->error( "m:Interwiki_map not found", true ); |
122 | 125 | } |
123 | 126 | |
124 | 127 | $iwArray = array(); |
— | — | @@ -141,11 +144,11 @@ |
142 | 145 | |
143 | 146 | foreach ( $this->dblist as $db ) { |
144 | 147 | $sql = "-- Generated by rebuildInterwiki.php"; |
145 | | - if ( isset( $specials[$db] ) ) { |
| 148 | + if ( isset( $this->specials[$db] ) ) { |
146 | 149 | # Special wiki |
147 | 150 | # Has interwiki links and interlanguage links to wikipedia |
148 | 151 | |
149 | | - $host = $specials[$db]; |
| 152 | + $host = $this->specials[$db]; |
150 | 153 | $sql .= "\n--$host\n\n"; |
151 | 154 | $sql .= "USE $db;\n" . |
152 | 155 | "TRUNCATE TABLE interwiki;\n" . |
Index: trunk/phase3/maintenance/dumpInterwiki.inc |
— | — | @@ -1,6 +1,6 @@ |
2 | 2 | <?php |
3 | 3 | /** |
4 | | - * Rebuild interwiki table using the file on meta and the language list |
| 4 | + * Build constant slightly compact database of interwiki prefixes |
5 | 5 | * Wikimedia specific! |
6 | 6 | * |
7 | 7 | * @file |
— | — | @@ -28,184 +28,214 @@ |
29 | 29 | } |
30 | 30 | } |
31 | 31 | |
32 | | -function getRebuildInterwikiDump() { |
33 | | - global $langlist, $languageAliases, $prefixRewrites, $wgContLang; |
| 32 | +require_once( dirname( __FILE__ ) . '/Maintenance.php' ); |
34 | 33 | |
35 | | - # Multi-language sites |
36 | | - # db suffix => db suffix, iw prefix, hostname |
37 | | - $sites = array( |
38 | | - 'wiki' => new Site( 'wiki', 'w', 'wikipedia.org' ), |
39 | | - 'wiktionary' => new Site( 'wiktionary', 'wikt', 'wiktionary.org' ), |
40 | | - 'wikiquote' => new Site( 'wikiquote', 'q', 'wikiquote.org' ), |
41 | | - 'wikibooks' => new Site( 'wikibooks', 'b', 'wikibooks.org' ), |
42 | | - 'wikinews' => new Site( 'wikinews', 'n', 'wikinews.org' ), |
43 | | - 'wikisource' => new Site( 'wikisource', 's', 'wikisource.org' ), |
44 | | - 'wikimedia' => new Site( 'wikimedia', 'chapter', 'wikimedia.org' ), |
45 | | - 'wikiversity' => new Site( 'wikiversity', 'v', 'wikiversity.org' ), |
46 | | - ); |
| 34 | +class DumpInterwiki extends Maintenance { |
47 | 35 | |
48 | | - # List of language prefixes likely to be found in multi-language sites |
49 | | - $langlist = array_map( "trim", file( "/home/wikipedia/common/langlist" ) ); |
| 36 | + public function __construct() { |
| 37 | + parent::__construct(); |
| 38 | + $this->mDescription = "Build constant slightly compact database of interwiki prefixes."; |
| 39 | + $this->addOption( 'langlist', 'File with one language code per line', false, true ); |
| 40 | + $this->addOption( 'dblist', 'File with one db per line', false, true ); |
| 41 | + $this->addOption( 'specialdbs', "File with one 'special' db per line", false, true ); |
| 42 | + $this->addOption( 'o', 'Cdb output file', false, true ); |
| 43 | + } |
50 | 44 | |
51 | | - # List of all database names |
52 | | - $dblist = array_map( "trim", file( "/home/wikipedia/common/all.dblist" ) ); |
| 45 | + function execute() { |
| 46 | + # List of language prefixes likely to be found in multi-language sites |
| 47 | + $this->langlist = array_map( "trim", file( $this->getOption( 'langlist', "/home/wikipedia/common/langlist" ) ) ); |
53 | 48 | |
54 | | - # Special-case databases |
55 | | - $specials = array_flip( |
56 | | - array_map( "trim", |
57 | | - file( "/home/wikipedia/common/special.dblist" ) ) ); |
| 49 | + # List of all database names |
| 50 | + $this->dblist = array_map( "trim", file( $this->getOption( 'dblist', "/home/wikipedia/common/all.dblist" ) ) ); |
58 | 51 | |
59 | | - # Extra interwiki links that can't be in the intermap for some reason |
60 | | - $extraLinks = array( |
61 | | - array( 'm', 'http://meta.wikimedia.org/wiki/$1', 1 ), |
62 | | - array( 'meta', 'http://meta.wikimedia.org/wiki/$1', 1 ), |
63 | | - array( 'sep11', 'http://sep11.wikipedia.org/wiki/$1', 1 ), |
64 | | - ); |
| 52 | + # Special-case databases |
| 53 | + $this->specials = array_flip( array_map( "trim", file( $this->getOption( 'specialdbs', "/home/wikipedia/common/special.dblist" ) ) ) ); |
65 | 54 | |
66 | | - # Language aliases, usually configured as redirects to the real wiki in apache |
67 | | - # Interlanguage links are made directly to the real wiki |
68 | | - # Something horrible happens if you forget to list an alias here, I can't |
69 | | - # remember what |
70 | | - $languageAliases = array( |
71 | | - 'zh-cn' => 'zh', |
72 | | - 'zh-tw' => 'zh', |
73 | | - 'dk' => 'da', |
74 | | - 'nb' => 'no', |
75 | | - ); |
| 55 | + if ( $this->hasOption( 'o' ) ) { |
| 56 | + $this->dbFile = CdbWriter::open( $this->getOption( 'o' ) ) ; |
| 57 | + } else { |
| 58 | + $this->dbFile = false; |
| 59 | + } |
76 | 60 | |
77 | | - # Special case prefix rewrites, for the benefit of Swedish which uses s:t |
78 | | - # as an abbreviation for saint |
79 | | - $prefixRewrites = array( |
80 | | - 'svwiki' => array ( 's' => 'src' ), |
81 | | - ); |
82 | | - |
83 | | - # Construct a list of reserved prefixes |
84 | | - $reserved = array(); |
85 | | - foreach ( $langlist as $lang ) { |
86 | | - $reserved[$lang] = 1; |
| 61 | + $this->getRebuildInterwikiDump(); |
87 | 62 | } |
88 | | - foreach ( $languageAliases as $alias => $lang ) { |
89 | | - $reserved[$alias] = 1; |
90 | | - } |
91 | | - foreach ( $sites as $site ) { |
92 | | - $reserved[$site->lateral] = 1; |
93 | | - } |
94 | 63 | |
95 | | - # Extract the intermap from meta |
96 | | - $intermap = Http::get( 'http://meta.wikimedia.org/w/index.php?title=Interwiki_map&action=raw', 30 ); |
97 | | - $lines = array_map( 'trim', explode( "\n", trim( $intermap ) ) ); |
| 64 | + function getRebuildInterwikiDump() { |
| 65 | + global $wgContLang; |
98 | 66 | |
99 | | - if ( !$lines || count( $lines ) < 2 ) { |
100 | | - wfDie( "m:Interwiki_map not found" ); |
101 | | - } |
| 67 | + # Multi-language sites |
| 68 | + # db suffix => db suffix, iw prefix, hostname |
| 69 | + $sites = array( |
| 70 | + 'wiki' => new Site( 'wiki', 'w', 'wikipedia.org' ), |
| 71 | + 'wiktionary' => new Site( 'wiktionary', 'wikt', 'wiktionary.org' ), |
| 72 | + 'wikiquote' => new Site( 'wikiquote', 'q', 'wikiquote.org' ), |
| 73 | + 'wikibooks' => new Site( 'wikibooks', 'b', 'wikibooks.org' ), |
| 74 | + 'wikinews' => new Site( 'wikinews', 'n', 'wikinews.org' ), |
| 75 | + 'wikisource' => new Site( 'wikisource', 's', 'wikisource.org' ), |
| 76 | + 'wikimedia' => new Site( 'wikimedia', 'chapter', 'wikimedia.org' ), |
| 77 | + 'wikiversity' => new Site( 'wikiversity', 'v', 'wikiversity.org' ), |
| 78 | + ); |
102 | 79 | |
103 | | - # Global iterwiki map |
104 | | - foreach ( $lines as $line ) { |
105 | | - if ( preg_match( '/^\|\s*(.*?)\s*\|\|\s*(.*?)\s*$/', $line, $matches ) ) { |
106 | | - $prefix = $wgContLang->lc( $matches[1] ); |
107 | | - $prefix = str_replace( ' ', '_', $prefix ); |
108 | | - $prefix = strtolower( $matches[1] ); |
109 | | - $url = $matches[2]; |
110 | | - if ( preg_match( '/(wikipedia|wiktionary|wikisource|wikiquote|wikibooks|wikimedia)\.org/', $url ) ) { |
111 | | - $local = 1; |
112 | | - } else { |
113 | | - $local = 0; |
114 | | - } |
| 80 | + # Extra interwiki links that can't be in the intermap for some reason |
| 81 | + $extraLinks = array( |
| 82 | + array( 'm', 'http://meta.wikimedia.org/wiki/$1', 1 ), |
| 83 | + array( 'meta', 'http://meta.wikimedia.org/wiki/$1', 1 ), |
| 84 | + array( 'sep11', 'http://sep11.wikipedia.org/wiki/$1', 1 ), |
| 85 | + ); |
115 | 86 | |
116 | | - if ( empty( $reserved[$prefix] ) ) { |
117 | | - $imap = array( "iw_prefix" => $prefix, "iw_url" => $url, "iw_local" => $local ); |
118 | | - makeLink ( $imap, "__global" ); |
119 | | - } |
| 87 | + # Language aliases, usually configured as redirects to the real wiki in apache |
| 88 | + # Interlanguage links are made directly to the real wiki |
| 89 | + # Something horrible happens if you forget to list an alias here, I can't |
| 90 | + # remember what |
| 91 | + $this->languageAliases = array( |
| 92 | + 'zh-cn' => 'zh', |
| 93 | + 'zh-tw' => 'zh', |
| 94 | + 'dk' => 'da', |
| 95 | + 'nb' => 'no', |
| 96 | + ); |
| 97 | + |
| 98 | + # Special case prefix rewrites, for the benefit of Swedish which uses s:t |
| 99 | + # as an abbreviation for saint |
| 100 | + $this->prefixRewrites = array( |
| 101 | + 'svwiki' => array( 's' => 'src' ), |
| 102 | + ); |
| 103 | + |
| 104 | + # Construct a list of reserved prefixes |
| 105 | + $reserved = array(); |
| 106 | + foreach ( $this->langlist as $lang ) { |
| 107 | + $reserved[$lang] = 1; |
120 | 108 | } |
121 | | - } |
| 109 | + foreach ( $this->languageAliases as $alias => $lang ) { |
| 110 | + $reserved[$alias] = 1; |
| 111 | + } |
| 112 | + foreach ( $sites as $site ) { |
| 113 | + $reserved[$site->lateral] = 1; |
| 114 | + } |
122 | 115 | |
123 | | - # Exclude Wikipedia for Wikipedia |
124 | | - makeLink ( array ( 'iw_prefix' => 'wikipedia', 'is_url' => null ), "_wiki" ); |
125 | | - |
126 | | - # Multilanguage sites |
127 | | - foreach ( $sites as $site ) |
128 | | - makeLanguageLinks ( $site, "_" . $site->suffix ); |
| 116 | + # Extract the intermap from meta |
| 117 | + $intermap = Http::get( 'http://meta.wikimedia.org/w/index.php?title=Interwiki_map&action=raw', 30 ); |
| 118 | + $lines = array_map( 'trim', explode( "\n", trim( $intermap ) ) ); |
129 | 119 | |
| 120 | + if ( !$lines || count( $lines ) < 2 ) { |
| 121 | + $this->error( "m:Interwiki_map not found", true ); |
| 122 | + } |
130 | 123 | |
131 | | - foreach ( $dblist as $db ) { |
132 | | - if ( isset( $specials[$db] ) ) { |
133 | | - # Special wiki |
134 | | - # Has interwiki links and interlanguage links to wikipedia |
| 124 | + # Global iterwiki map |
| 125 | + foreach ( $lines as $line ) { |
| 126 | + if ( preg_match( '/^\|\s*(.*?)\s*\|\|\s*(.*?)\s*$/', $line, $matches ) ) { |
| 127 | + $prefix = $wgContLang->lc( $matches[1] ); |
| 128 | + $prefix = str_replace( ' ', '_', $prefix ); |
| 129 | + $prefix = strtolower( $matches[1] ); |
| 130 | + $url = $matches[2]; |
| 131 | + if ( preg_match( '/(wikipedia|wiktionary|wikisource|wikiquote|wikibooks|wikimedia)\.org/', $url ) ) { |
| 132 | + $local = 1; |
| 133 | + } else { |
| 134 | + $local = 0; |
| 135 | + } |
135 | 136 | |
136 | | - makeLink( array( 'iw_prefix' => $db, 'iw_url' => "wiki" ), "__sites" ); |
137 | | - # Links to multilanguage sites |
138 | | - foreach ( $sites as $targetSite ) { |
139 | | - makeLink( array( 'iw_prefix' => $targetSite->lateral, |
140 | | - 'iw_url' => $targetSite->getURL( 'en' ), |
141 | | - 'iw_local' => 1 ), $db ); |
| 137 | + if ( empty( $reserved[$prefix] ) ) { |
| 138 | + $imap = array( "iw_prefix" => $prefix, "iw_url" => $url, "iw_local" => $local ); |
| 139 | + $this->makeLink ( $imap, "__global" ); |
| 140 | + } |
142 | 141 | } |
| 142 | + } |
143 | 143 | |
144 | | - } else { |
145 | | - # Find out which site this DB belongs to |
146 | | - $site = false; |
147 | | - foreach ( $sites as $candidateSite ) { |
148 | | - $suffix = $candidateSite->suffix; |
149 | | - if ( preg_match( "/(.*)$suffix$/", $db, $matches ) ) { |
150 | | - $site = $candidateSite; |
151 | | - break; |
| 144 | + # Exclude Wikipedia for Wikipedia |
| 145 | + $this->makeLink ( array ( 'iw_prefix' => 'wikipedia', 'is_url' => null ), "_wiki" ); |
| 146 | + |
| 147 | + # Multilanguage sites |
| 148 | + foreach ( $sites as $site ) { |
| 149 | + $this->makeLanguageLinks ( $site, "_" . $site->suffix ); |
| 150 | + } |
| 151 | + |
| 152 | + |
| 153 | + foreach ( $dblist as $db ) { |
| 154 | + if ( isset( $this->specials[$db] ) ) { |
| 155 | + # Special wiki |
| 156 | + # Has interwiki links and interlanguage links to wikipedia |
| 157 | + |
| 158 | + $this->makeLink( array( 'iw_prefix' => $db, 'iw_url' => "wiki" ), "__sites" ); |
| 159 | + # Links to multilanguage sites |
| 160 | + foreach ( $sites as $targetSite ) { |
| 161 | + $this->makeLink( array( 'iw_prefix' => $targetSite->lateral, |
| 162 | + 'iw_url' => $targetSite->getURL( 'en' ), |
| 163 | + 'iw_local' => 1 ), $db ); |
152 | 164 | } |
153 | | - } |
154 | | - makeLink( array( 'iw_prefix' => $db, 'iw_url' => $site->suffix ), "__sites" ); |
155 | | - if ( !$site ) { |
156 | | - print "Invalid database $db\n"; |
157 | | - continue; |
158 | | - } |
159 | | - $lang = $matches[1]; |
| 165 | + } else { |
| 166 | + # Find out which site this DB belongs to |
| 167 | + $site = false; |
| 168 | + foreach ( $sites as $candidateSite ) { |
| 169 | + $suffix = $candidateSite->suffix; |
| 170 | + if ( preg_match( "/(.*)$suffix$/", $db, $matches ) ) { |
| 171 | + $site = $candidateSite; |
| 172 | + break; |
| 173 | + } |
| 174 | + } |
| 175 | + $this->makeLink( array( 'iw_prefix' => $db, 'iw_url' => $site->suffix ), "__sites" ); |
| 176 | + if ( !$site ) { |
| 177 | + $this->error( "Invalid database $db\n" ); |
| 178 | + continue; |
| 179 | + } |
| 180 | + $lang = $matches[1]; |
160 | 181 | |
161 | | - # Lateral links |
162 | | - foreach ( $sites as $targetSite ) { |
163 | | - if ( $targetSite->suffix != $site->suffix ) { |
164 | | - makeLink( array( 'iw_prefix' => $targetSite->lateral, |
165 | | - 'iw_url' => $targetSite->getURL( $lang ), |
166 | | - 'iw_local' => 1 ), $db ); |
| 182 | + # Lateral links |
| 183 | + foreach ( $sites as $targetSite ) { |
| 184 | + if ( $targetSite->suffix != $site->suffix ) { |
| 185 | + $this->makeLink( array( 'iw_prefix' => $targetSite->lateral, |
| 186 | + 'iw_url' => $targetSite->getURL( $lang ), |
| 187 | + 'iw_local' => 1 ), $db ); |
| 188 | + } |
167 | 189 | } |
168 | | - } |
169 | 190 | |
170 | | - if ( $site->suffix == "wiki" ) { |
171 | | - makeLink( array( 'iw_prefix' => 'w', |
172 | | - 'iw_url' => "http://en.wikipedia.org/wiki/$1", |
173 | | - 'iw_local' => 1 ), $db ); |
| 191 | + if ( $site->suffix == "wiki" ) { |
| 192 | + $this->makeLink( array( 'iw_prefix' => 'w', |
| 193 | + 'iw_url' => "http://en.wikipedia.org/wiki/$1", |
| 194 | + 'iw_local' => 1 ), $db ); |
| 195 | + } |
| 196 | + |
174 | 197 | } |
175 | | - |
176 | 198 | } |
| 199 | + foreach ( $extraLinks as $link ) { |
| 200 | + $this->makeLink( $link, "__global" ); |
| 201 | + } |
177 | 202 | } |
178 | | - foreach ( $extraLinks as $link ) |
179 | | - makeLink( $link, "__global" ); |
180 | | -} |
181 | 203 | |
182 | | -# ------------------------------------------------------------------------------------------ |
| 204 | + # ------------------------------------------------------------------------------------------ |
183 | 205 | |
184 | | -# Executes part of an INSERT statement, corresponding to all interlanguage links to a particular site |
185 | | -function makeLanguageLinks( &$site, $source ) { |
186 | | - global $langlist, $languageAliases; |
187 | | - # Actual languages with their own databases |
188 | | - foreach ( $langlist as $targetLang ) { |
189 | | - makeLink( array( $targetLang, $site->getURL( $targetLang ), 1 ), $source ); |
| 206 | + # Executes part of an INSERT statement, corresponding to all interlanguage links to a particular site |
| 207 | + function makeLanguageLinks( &$site, $source ) { |
| 208 | + # Actual languages with their own databases |
| 209 | + foreach ( $this->langlist as $targetLang ) { |
| 210 | + $this->makeLink( array( $targetLang, $site->getURL( $targetLang ), 1 ), $source ); |
| 211 | + } |
| 212 | + |
| 213 | + # Language aliases |
| 214 | + foreach ( $this->languageAliases as $alias => $lang ) { |
| 215 | + $this->makeLink( array( $alias, $site->getURL( $lang ), 1 ), $source ); |
| 216 | + } |
190 | 217 | } |
191 | 218 | |
192 | | - # Language aliases |
193 | | - foreach ( $languageAliases as $alias => $lang ) { |
194 | | - makeLink( array( $alias, $site->getURL( $lang ), 1 ), $source ); |
| 219 | + function makeLink( $entry, $source ) { |
| 220 | + global $prefixRewrites, $dbFile; |
| 221 | + if ( isset( $this->prefixRewrites[$source] ) && isset( $this->prefixRewrites[$source][$entry[0]] ) ) |
| 222 | + $entry[0] = $this->prefixRewrites[$source][$entry[0]]; |
| 223 | + |
| 224 | + if ( !array_key_exists( "iw_prefix", $entry ) ) { |
| 225 | + $entry = array( "iw_prefix" => $entry[0], "iw_url" => $entry[1], "iw_local" => $entry[2] ); |
| 226 | + } |
| 227 | + if ( array_key_exists( $source, $this->prefixRewrites ) && |
| 228 | + array_key_exists( $entry['iw_prefix'], $this->prefixRewrites[$source] ) ) { |
| 229 | + $entry['iw_prefix'] = $this->prefixRewrites[$source][$entry['iw_prefix']]; |
| 230 | + } |
| 231 | + |
| 232 | + if ( $this->dbFile ) { |
| 233 | + $this->dbFile->set( "{$source}:{$entry['iw_prefix']}", trim( "{$entry['iw_local']} {$entry['iw_url']}" ) ); |
| 234 | + } else { |
| 235 | + $this->output( "{$source}:{$entry['iw_prefix']} {$entry['iw_url']} {$entry['iw_local']}\n" ); |
| 236 | + } |
195 | 237 | } |
196 | 238 | } |
197 | 239 | |
198 | | -function makeLink( $entry, $source ) { |
199 | | - global $prefixRewrites, $dbFile; |
200 | | - if ( isset( $prefixRewrites[$source] ) && isset( $prefixRewrites[$source][$entry[0]] ) ) |
201 | | - $entry[0] = $prefixRewrites[$source][$entry[0]]; |
202 | | - if ( !array_key_exists( "iw_prefix", $entry ) ) |
203 | | - $entry = array( "iw_prefix" => $entry[0], "iw_url" => $entry[1], "iw_local" => $entry[2] ); |
204 | | - if ( array_key_exists( $source, $prefixRewrites ) && |
205 | | - array_key_exists( $entry['iw_prefix'], $prefixRewrites[$source] ) ) |
206 | | - $entry['iw_prefix'] = $prefixRewrites[$source][$entry['iw_prefix']]; |
207 | | - if ( $dbFile ) |
208 | | - $dbFile->set( "{$source}:{$entry['iw_prefix']}", trim( "{$entry['iw_local']} {$entry['iw_url']}" ) ); |
209 | | - else |
210 | | - print "{$source}:{$entry['iw_prefix']} {$entry['iw_url']} {$entry['iw_local']}\n"; |
| 240 | +$maintClass = "DumpInterwiki"; |
| 241 | +require_once( DO_MAINTENANCE ); |
211 | 242 | |
212 | | - } |