r100128 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r100127‎ | r100128 | r100129 >
Date:17:29, 18 October 2011
Author:reedy
Status:ok (Comments)
Tags:
Comment:
Kill dead/rotting importUseModWiki(pedia)? maintenance scripts
Modified paths:
  • /trunk/phase3/maintenance/importUseModWiki.php (deleted) (history)
  • /trunk/phase3/maintenance/importUseModWikipedia.php (deleted) (history)

Diff [purge]

Index: trunk/phase3/maintenance/importUseModWiki.php
@@ -1,375 +0,0 @@
2 -<?php
3 -/**
4 - * Import data from a UseModWiki into a MediaWiki wiki
5 - * 2003-02-09 Brion VIBBER <brion@pobox.com>
6 - * Based loosely on Magnus's code from 2001-2002
7 - *
8 - * Updated limited version to get something working temporarily
9 - * 2003-10-09
10 - * Be sure to run the link & index rebuilding scripts!
11 - *
12 - * Some more munging for charsets etc
13 - * 2003-11-28
14 - *
15 - * Partial fix for pages starting with lowercase letters (??)
16 - * and CamelCase and /Subpage link conversion
17 - * 2004-11-17
18 - *
19 - * Rewrite output to create Special:Export format for import
20 - * instead of raw SQL. Should be 'future-proof' against future
21 - * schema changes.
22 - * 2005-03-14
23 - *
24 - * This program is free software; you can redistribute it and/or modify
25 - * it under the terms of the GNU General Public License as published by
26 - * the Free Software Foundation; either version 2 of the License, or
27 - * (at your option) any later version.
28 - *
29 - * This program is distributed in the hope that it will be useful,
30 - * but WITHOUT ANY WARRANTY; without even the implied warranty of
31 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32 - * GNU General Public License for more details.
33 - *
34 - * You should have received a copy of the GNU General Public License along
35 - * with this program; if not, write to the Free Software Foundation, Inc.,
36 - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
37 - * http://www.gnu.org/copyleft/gpl.html
38 - *
39 - * @todo document
40 - * @file
41 - * @ingroup Maintenance
42 - */
43 -
44 -require_once( "Maintenance.php" );
45 -
46 -class ImportUseModWiki extends Maintenance {
47 -
48 - private $encoding, $rootDirectory = '';
49 -
50 - /**
51 - * Field separators
52 - * @var String
53 - */
54 - private $FS1, $FS2, $FS3 = '';
55 -
56 - /**
57 - * @var Array
58 - */
59 - private $usercache, $nowiki = array();
60 -
61 - public function __construct() {
62 - parent::__construct();
63 - $this->mDescription = "Import pages from UseMod wikis";
64 - $this->addOption( 'encoding', 'Encoding of the imported text, default CP1252', false, true );
65 - /**
66 - * If UseModWiki's New File System is used:
67 - * $NewFS = 1; # 1 = new multibyte $FS, 0 = old $FS
68 - * Use "\xb3"; for the Old File System
69 - * Changed with UTF-8 UseModWiki
70 - * http://www.usemod.com/cgi-bin/wiki.pl?SupportForUtf8
71 - * http://www.usemod.com/cgi-bin/wiki.pl?WikiBugs/NewFieldSeparatorWronglyTreated
72 - * http://www.meatballwiki.org/wiki/WikiEngine#Q_amp_A
73 - */
74 - $this->addOption( 'separator', 'Field separator to use, default \x1E\xFF\xFE\x1E', false, true );
75 - $this->addArg( 'path', 'Path to your UseMod wiki' );
76 - }
77 -
78 - public function execute() {
79 - $this->rootDirectory = $this->getArg();
80 - $this->encoding = $this->getOption( 'encoding', 'CP1252' );
81 - $sep = $this->getOption( 'separator', "\x1E\xFF\xFE\x1E" );
82 - $this->FS1 = "{$sep}1";
83 - $this->FS2 = "{$sep}2";
84 - $this->FS3 = "{$sep}3";
85 -
86 - echo <<<XML
87 -<?xml version="1.0" encoding="UTF-8" ?>
88 -<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.1/"
89 - xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
90 - xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.1/
91 - http://www.mediawiki.org/xml/export-0.1.xsd"
92 - version="0.1"
93 - xml:lang="en">
94 -<!-- generated by importUseModWiki.php -->
95 -
96 -XML;
97 - $letters = array(
98 - 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
99 - 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',
100 - 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'other' );
101 - foreach ( $letters as $letter ) {
102 - $dir = "{$this->rootDirectory}/page/$letter";
103 - if ( is_dir( $dir ) )
104 - $this->importPageDirectory( $dir );
105 - }
106 - echo <<<XML
107 -</mediawiki>
108 -
109 -XML;
110 - }
111 -
112 - private function importPageDirectory( $dir, $prefix = "" ) {
113 - echo "\n<!-- Checking page directory " . $this->xmlCommentSafe( $dir ) . " -->\n";
114 - $mydir = opendir( $dir );
115 - while ( $entry = readdir( $mydir ) ) {
116 - $m = array();
117 - if ( preg_match( '/^(.+)\.db$/', $entry, $m ) ) {
118 - echo $this->importPage( $prefix . $m[1] );
119 - } else {
120 - if ( is_dir( "$dir/$entry" ) ) {
121 - if ( $entry != '.' && $entry != '..' ) {
122 - $this->importPageDirectory( "$dir/$entry", "$entry/" );
123 - }
124 - } else {
125 - echo "<!-- File '" . $this->xmlCommentSafe( $entry ) . "' doesn't seem to contain an article. Skipping. -->\n";
126 - }
127 - }
128 - }
129 - }
130 -
131 - private function useModFilename( $title ) {
132 - $c = substr( $title, 0, 1 );
133 - if ( preg_match( '/[A-Z]/i', $c ) ) {
134 - return strtoupper( $c ) . "/$title";
135 - }
136 - return "other/$title";
137 - }
138 -
139 - private function fetchPage( $title ) {
140 - $fname = $this->rootDirectory . "/page/" . $this->useModFilename( $title ) . ".db";
141 - if ( !file_exists( $fname ) ) {
142 - echo "Couldn't open file '$fname' for page '$title'.\n";
143 - die( -1 );
144 - }
145 -
146 - $page = $this->splitHash( $this->FS1, file_get_contents( $fname ) );
147 - $section = $this->splitHash( $this->FS2, $page["text_default"] );
148 - $text = $this->splitHash( $this->FS3, $section["data"] );
149 -
150 - return $this->array2object( array( "text" => $text["text"] , "summary" => $text["summary"] ,
151 - "minor" => $text["minor"] , "ts" => $section["ts"] ,
152 - "username" => $section["username"] , "host" => $section["host"] ) );
153 - }
154 -
155 - private function fetchKeptPages( $title ) {
156 - $fname = $this->rootDirectory . "/keep/" . $this->useModFilename( $title ) . ".kp";
157 - if ( !file_exists( $fname ) ) return array();
158 -
159 - $keptlist = explode( $this->FS1, file_get_contents( $fname ) );
160 - array_shift( $keptlist ); # Drop the junk at beginning of file
161 -
162 - $revisions = array();
163 - foreach ( $keptlist as $rev ) {
164 - $section = $this->splitHash( $this->FS2, $rev );
165 - $text = $this->splitHash( $this->FS3, $section["data"] );
166 - if ( $text["text"] && $text["minor"] != "" && ( $section["ts"] * 1 > 0 ) ) {
167 - array_push( $revisions, $this->array2object( array ( "text" => $text["text"] , "summary" => $text["summary"] ,
168 - "minor" => $text["minor"] , "ts" => $section["ts"] ,
169 - "username" => $section["username"] , "host" => $section["host"] ) ) );
170 - } else {
171 - echo "<!-- skipped a bad old revision -->\n";
172 - }
173 - }
174 - return $revisions;
175 - }
176 -
177 - private function splitHash( $sep , $str ) {
178 - $temp = explode ( $sep , $str ) ;
179 - $ret = array () ;
180 - for ( $i = 0; $i + 1 < count ( $temp ) ; $i++ ) {
181 - $ret[$temp[$i]] = $temp[++$i] ;
182 - }
183 - return $ret ;
184 - }
185 -
186 - private function checkUserCache( $name, $host ) {
187 - if ( $name ) {
188 - if ( in_array( $name, $this->usercache ) ) {
189 - $userid = $this->usercache[$name];
190 - } else {
191 - # If we haven't imported user accounts
192 - $userid = 0;
193 - }
194 - $username = str_replace( '_', ' ', $name );
195 - } else {
196 - $userid = 0;
197 - $username = $host;
198 - }
199 - return array( $userid, $username );
200 - }
201 -
202 - private function importPage( $title ) {
203 - echo "\n<!-- Importing page " . $this->xmlCommentSafe( $title ) . " -->\n";
204 - $page = $this->fetchPage( $title );
205 -
206 - $newtitle = $this->xmlsafe( str_replace( '_', ' ', $this->recodeText( $title ) ) );
207 -
208 - $munged = $this->mungeFormat( $page->text );
209 - if ( $munged != $page->text ) {
210 - /**
211 - * Save a *new* revision with the conversion, and put the
212 - * previous last version into the history.
213 - */
214 - $next = $this->array2object( array(
215 - 'text' => $munged,
216 - 'minor' => 1,
217 - 'username' => 'Conversion script',
218 - 'host' => '127.0.0.1',
219 - 'ts' => time(),
220 - 'summary' => 'link fix',
221 - ) );
222 - $revisions = array( $page, $next );
223 - } else {
224 - /**
225 - * Current revision:
226 - */
227 - $revisions = array( $page );
228 - }
229 - $xml = <<<XML
230 - <page>
231 - <title>$newtitle</title>
232 -
233 -XML;
234 -
235 - # History
236 - $revisions = array_merge( $revisions, $this->fetchKeptPages( $title ) );
237 - if ( count( $revisions ) == 0 ) {
238 - return NULL; // Was "$sql", which does not appear to be defined.
239 - }
240 -
241 - foreach ( $revisions as $rev ) {
242 - $text = $this->xmlsafe( $this->recodeText( $rev->text ) );
243 - $minor = ( $rev->minor ? '<minor/>' : '' );
244 - list( /* $userid */ , $username ) = $this->checkUserCache( $rev->username, $rev->host );
245 - $username = $this->xmlsafe( $this->recodeText( $username ) );
246 - $timestamp = $this->xmlsafe( $this->timestamp2ISO8601( $rev->ts ) );
247 - $comment = $this->xmlsafe( $this->recodeText( $rev->summary ) );
248 -
249 - $xml .= <<<XML
250 - <revision>
251 - <timestamp>$timestamp</timestamp>
252 - <contributor><username>$username</username></contributor>
253 - $minor
254 - <comment>$comment</comment>
255 - <text>$text</text>
256 - </revision>
257 -
258 -XML;
259 - }
260 - $xml .= "</page>\n\n";
261 - return $xml;
262 - }
263 -
264 - private function recodeText( $string ) {
265 - # For currently latin-1 wikis
266 - $string = str_replace( "\r\n", "\n", $string );
267 - $string = @iconv( $this->encoding, "UTF-8", $string );
268 - $string = $this->mungeToUtf8( $string ); # Any old &#1234; stuff
269 - return $string;
270 - }
271 -
272 - /**
273 - * @todo FIXME: Don't use /e
274 - */
275 - private function mungeToUtf8( $string ) {
276 - $string = preg_replace ( '/&#([0-9]+);/e', 'wfUtf8Sequence($1)', $string );
277 - $string = preg_replace ( '/&#x([0-9a-f]+);/ie', 'wfUtf8Sequence(0x$1)', $string );
278 - # Should also do named entities here
279 - return $string;
280 - }
281 -
282 - private function timestamp2ISO8601( $ts ) {
283 - # 2003-08-05T18:30:02Z
284 - return gmdate( 'Y-m-d', $ts ) . 'T' . gmdate( 'H:i:s', $ts ) . 'Z';
285 - }
286 -
287 - /**
288 - * The page may contain old data which has not been properly normalized.
289 - * Invalid UTF-8 sequences or forbidden control characters will make our
290 - * XML output invalid, so be sure to strip them out.
291 - * @param String $string Text to clean up
292 - * @return String
293 - */
294 - private function xmlsafe( $string ) {
295 - $string = UtfNormal::cleanUp( $string );
296 - $string = htmlspecialchars( $string );
297 - return $string;
298 - }
299 -
300 - private function xmlCommentSafe( $text ) {
301 - return str_replace( '--', '\\-\\-', $this->xmlsafe( $this->recodeText( $text ) ) );
302 - }
303 -
304 - private function array2object( $arr ) {
305 - $o = (object)0;
306 - foreach ( $arr as $x => $y ) {
307 - $o->$x = $y;
308 - }
309 - return $o;
310 - }
311 -
312 - /**
313 - * Make CamelCase and /Talk links work
314 - */
315 - private function mungeFormat( $text ) {
316 - $this->nowiki = array();
317 - $staged = preg_replace_callback(
318 - '/(<nowiki>.*?<\\/nowiki>|(?:http|https|ftp):\\S+|\[\[[^]\\n]+]])/s',
319 - array( $this, 'nowikiPlaceholder' ), $text );
320 -
321 - # This is probably not 100% correct, I'm just
322 - # glancing at the UseModWiki code.
323 - $upper = "[A-Z]";
324 - $lower = "[a-z_0-9]";
325 - $any = "[A-Za-z_0-9]";
326 - $camel = "(?:$upper+$lower+$upper+$any*)";
327 - $subpage = "(?:\\/$any+)";
328 - $substart = "(?:\\/$upper$any*)";
329 -
330 - $munged = preg_replace( "/(?!\\[\\[)($camel$subpage*|$substart$subpage*)\\b(?!\\]\\]|>)/",
331 - '[[$1]]', $staged );
332 -
333 - $final = preg_replace( '/' . preg_quote( $this->placeholder() ) . '/s',
334 - array( $this, 'nowikiShift' ), $munged );
335 - return $final;
336 - }
337 -
338 - private function placeholder( $x = null ) {
339 - return '\xffplaceholder\xff';
340 - }
341 -
342 - public function nowikiPlaceholder( $matches ) {
343 - $this->nowiki[] = $matches[1];
344 - return $this->placeholder();
345 - }
346 -
347 - public function nowikiShift() {
348 - return array_shift( $this->nowiki );
349 - }
350 -}
351 -
352 -function wfUtf8Sequence( $codepoint ) {
353 - if ( $codepoint < 0x80 ) {
354 - return chr( $codepoint );
355 - }
356 - if ( $codepoint < 0x800 ) {
357 - return chr( $codepoint >> 6 & 0x3f | 0xc0 ) .
358 - chr( $codepoint & 0x3f | 0x80 );
359 - }
360 - if ( $codepoint < 0x10000 ) {
361 - return chr( $codepoint >> 12 & 0x0f | 0xe0 ) .
362 - chr( $codepoint >> 6 & 0x3f | 0x80 ) .
363 - chr( $codepoint & 0x3f | 0x80 );
364 - }
365 - if ( $codepoint < 0x100000 ) {
366 - return chr( $codepoint >> 18 & 0x07 | 0xf0 ) . # Double-check this
367 - chr( $codepoint >> 12 & 0x3f | 0x80 ) .
368 - chr( $codepoint >> 6 & 0x3f | 0x80 ) .
369 - chr( $codepoint & 0x3f | 0x80 );
370 - }
371 - # Doesn't yet handle outside the BMP
372 - return "&#$codepoint;";
373 -}
374 -
375 -$maintClass = 'ImportUseModWiki';
376 -require_once( RUN_MAINTENANCE_IF_MAIN );
Index: trunk/phase3/maintenance/importUseModWikipedia.php
@@ -1,892 +0,0 @@
2 -<?php
3 -
4 -/**
5 - * A script to read a dump of the English Wikipedia from the UseModWiki period, and to
6 - * generate an XML dump in MediaWiki format.
7 - *
8 - * Some relevant code was ported from UseModWiki 0.92.
9 - *
10 - */
11 -
12 -require_once( dirname( __FILE__ ) . '/Maintenance.php' );
13 -require_once( dirname( __FILE__ ) .'/../includes/normal/UtfNormalUtil.php' );
14 -
15 -
16 -class ImportUseModWikipedia extends Maintenance {
17 - var $encodeMap, $decodeMap;
18 -
19 - var $deepRenames = array(
20 - 'JimboWales' => 983862286,
21 - 'TexaS' => 983918410,
22 - 'HistoryOfUnitedStatesTalk' => 984795423,
23 - 'MetallicA' => 985128533,
24 - 'PythagoreanTheorem' => 985225545,
25 - 'TheCanonofScripture' => 985368223,
26 - 'TaoTehChing' => 985368222,
27 - //'TheMostRemarkableFormulaInTheWorld' => 985368221,
28 - 'TheRecorder' => 985368220,
29 - 'GladstoneOregon' => 985368219,
30 - 'PacificBeach' => '?',
31 - 'AaRiver' => '?',
32 - );
33 -
34 - var $replacements = array();
35 -
36 - var $renameTextLinksOps = array(
37 - 983846265 => array(
38 - 'TestIgnore' => 'IgnoreTest',
39 - ),
40 - 983848080 => array(
41 - 'UnitedLocomotiveWorks' => 'Atlas Shrugged/United Locomotive Works'
42 - ),
43 - 983856376 => array(
44 - 'WikiPedia' => 'Wikipedia',
45 - ),
46 - 983896152 => array(
47 - 'John_F_Kennedy' => 'John_F._Kennedy',
48 - ),
49 - 983905871 => array(
50 - 'LarrySanger' => 'Larry_Sanger'
51 - ),
52 - 984697068 => array(
53 - 'UnitedStates' => 'United States',
54 - ),
55 - 984792748 => array(
56 - 'LibertarianisM' => 'Libertarianism'
57 - ),
58 - 985327832 => array(
59 - 'AnarchisM' => 'Anarchism',
60 - ),
61 - 985290063 => array(
62 - 'HistoryOfUnitedStatesDiscussion' => 'History_Of_United_States_Discussion'
63 - ),
64 - 985290091 => array(
65 - 'BritishEmpire' => 'British Empire'
66 - ),
67 - /*
68 - 985468958 => array(
69 - 'ScienceFiction' => 'Science fiction',
70 - ),*/
71 - );
72 -
73 - /**
74 - * Hack for observed substitution issues
75 - */
76 - var $skipSelfSubstitution = array(
77 - 'Pythagorean_Theorem',
78 - 'The_Most_Remarkable_Formula_In_The_World',
79 - 'Wine',
80 - );
81 -
82 - var $unixLineEndingsOps = array(
83 - 987743732 => 'Wikipedia_FAQ'
84 - );
85 -
86 - var $replacementsDone = array();
87 -
88 - var $moveLog = array();
89 - var $moveDests = array();
90 - var $revId;
91 -
92 - var $rc = array();
93 - var $textCache = array();
94 - var $blacklist = array();
95 -
96 - var $FS, $FS1, $FS2, $FS3;
97 - var $FreeLinkPattern, $UrlPattern, $LinkPattern, $InterLinkPattern;
98 -
99 - var $cp1252Table = array(
100 -0x80 => 0x20ac,
101 -0x81 => 0x0081,
102 -0x82 => 0x201a,
103 -0x83 => 0x0192,
104 -0x84 => 0x201e,
105 -0x85 => 0x2026,
106 -0x86 => 0x2020,
107 -0x87 => 0x2021,
108 -0x88 => 0x02c6,
109 -0x89 => 0x2030,
110 -0x8a => 0x0160,
111 -0x8b => 0x2039,
112 -0x8c => 0x0152,
113 -0x8d => 0x008d,
114 -0x8e => 0x017d,
115 -0x8f => 0x008f,
116 -0x90 => 0x0090,
117 -0x91 => 0x2018,
118 -0x92 => 0x2019,
119 -0x93 => 0x201c,
120 -0x94 => 0x201d,
121 -0x95 => 0x2022,
122 -0x96 => 0x2013,
123 -0x97 => 0x2014,
124 -0x98 => 0x02dc,
125 -0x99 => 0x2122,
126 -0x9a => 0x0161,
127 -0x9b => 0x203a,
128 -0x9c => 0x0153,
129 -0x9d => 0x009d,
130 -0x9e => 0x017e,
131 -0x9f => 0x0178);
132 -
133 - public function __construct() {
134 - parent::__construct();
135 - $this->addOption( 'datadir', 'the value of $DataDir from wiki.cgi', true, true );
136 - $this->addOption( 'outfile', 'the name of the output XML file', true, true );
137 - $this->initLinkPatterns();
138 -
139 - $this->encodeMap = $this->decodeMap = array();
140 -
141 - for ($source = 0; $source <= 0xff; $source++) {
142 - if ( isset( $this->cp1252Table[$source] ) ) {
143 - $dest = $this->cp1252Table[$source];
144 - } else {
145 - $dest = $source;
146 - }
147 - $sourceChar = chr( $source );
148 - $destChar = codepointToUtf8( $dest );
149 - $this->encodeMap[$sourceChar] = $destChar;
150 - $this->decodeMap[$destChar] = $sourceChar;
151 - }
152 - }
153 -
154 - function initLinkPatterns() {
155 - # Field separators are used in the URL-style patterns below.
156 - $this->FS = "\xb3"; # The FS character is a superscript "3"
157 - $this->FS1 = $this->FS . "1"; # The FS values are used to separate fields
158 - $this->FS2 = $this->FS . "2"; # in stored hashtables and other data structures.
159 - $this->FS3 = $this->FS . "3"; # The FS character is not allowed in user data.
160 -
161 - $UpperLetter = "[A-Z";
162 - $LowerLetter = "[a-z";
163 - $AnyLetter = "[A-Za-z";
164 - $AnyLetter .= "_0-9";
165 - $UpperLetter .= "]"; $LowerLetter .= "]"; $AnyLetter .= "]";
166 -
167 - # Main link pattern: lowercase between uppercase, then anything
168 - $LpA = $UpperLetter . "+" . $LowerLetter . "+" . $UpperLetter
169 - . $AnyLetter . "*";
170 - # Optional subpage link pattern: uppercase, lowercase, then anything
171 - $LpB = $UpperLetter . "+" . $LowerLetter . "+" . $AnyLetter . "*";
172 -
173 - # Loose pattern: If subpage is used, subpage may be simple name
174 - $this->LinkPattern = "((?:(?:$LpA)?\\/$LpB)|$LpA)";
175 - $QDelim = '(?:"")?'; # Optional quote delimiter (not in output)
176 - $this->LinkPattern .= $QDelim;
177 -
178 - # Inter-site convention: sites must start with uppercase letter
179 - # (Uppercase letter avoids confusion with URLs)
180 - $InterSitePattern = $UpperLetter . $AnyLetter . "+";
181 - $this->InterLinkPattern = "((?:$InterSitePattern:[^\\]\\s\"<>{$this->FS}]+)$QDelim)";
182 -
183 - $AnyLetter = "[-,. _0-9A-Za-z]";
184 - $this->FreeLinkPattern = "($AnyLetter+)";
185 - $this->FreeLinkPattern = "((?:(?:$AnyLetter+)?\\/)?$AnyLetter+)";
186 - $this->FreeLinkPattern .= $QDelim;
187 -
188 - # Url-style links are delimited by one of:
189 - # 1. Whitespace (kept in output)
190 - # 2. Left or right angle-bracket (< or >) (kept in output)
191 - # 3. Right square-bracket (]) (kept in output)
192 - # 4. A single double-quote (") (kept in output)
193 - # 5. A $FS (field separator) character (kept in output)
194 - # 6. A double double-quote ("") (removed from output)
195 -
196 - $UrlProtocols = "http|https|ftp|afs|news|nntp|mid|cid|mailto|wais|"
197 - . "prospero|telnet|gopher";
198 - $UrlProtocols .= '|file';
199 - $this->UrlPattern = "((?:(?:$UrlProtocols):[^\\]\\s\"<>{$this->FS}]+)$QDelim)";
200 - $ImageExtensions = "(gif|jpg|png|bmp|jpeg)";
201 - $RFCPattern = "RFC\\s?(\\d+)";
202 - $ISBNPattern = "ISBN:?([0-9- xX]{10,})";
203 - }
204 -
205 - function execute() {
206 - $this->articleFileName = '/tmp/importUseMod.' . mt_rand( 0, 0x7ffffff ) . '.tmp';
207 - $this->patchFileName = '/tmp/importUseMod.' . mt_rand( 0, 0x7ffffff ) . '.tmp';
208 - $this->dataDir = $this->getOption( 'datadir' );
209 - $this->outFile = fopen( $this->getOption( 'outfile' ), 'w' );
210 - if ( !$this->outFile ) {
211 - echo "Unable to open output file\n";
212 - return 1;
213 - }
214 - $this->writeXmlHeader();
215 - $this->readRclog();
216 - $this->writeMoveLog();
217 - $this->writeRevisions();
218 - $this->reconcileCurrentRevs();
219 - $this->writeXmlFooter();
220 - unlink( $this->articleFileName );
221 - unlink( $this->patchFileName );
222 - return 0;
223 - }
224 -
225 - function writeXmlHeader() {
226 - fwrite( $this->outFile, <<<EOT
227 -<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.3/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.3/ http://www.mediawiki.org/xml/export-0.3.xsd" version="0.3" xml:lang="en">
228 - <siteinfo>
229 - <sitename>Wikipedia</sitename>
230 - <base>http://www.wikipedia.com/</base>
231 - <generator>MediaWiki 1.18alpha importUseModWikipedia.php</generator>
232 - <case>case-sensitive</case>
233 - <namespaces>
234 - <namespace key="0" />
235 - </namespaces>
236 - </siteinfo>
237 -
238 -EOT
239 - );
240 - }
241 -
242 - function writeXmlFooter() {
243 - fwrite( $this->outFile, "</mediawiki>\n" );
244 - }
245 -
246 - function readRclog() {
247 - $rcFile = fopen( "{$this->dataDir}/rclog", 'r' );
248 - while ( $line = fgets( $rcFile ) ) {
249 - $bits = explode( $this->FS3, $line );
250 - if ( count( $bits ) !== 7 ) {
251 - echo "Error reading rclog\n";
252 - return;
253 - }
254 - $params = array(
255 - 'timestamp' => $bits[0],
256 - 'rctitle' => $bits[1],
257 - 'summary' => $bits[2],
258 - 'minor' => $bits[3],
259 - 'host' => $bits[4],
260 - 'kind' => $bits[5],
261 - 'extra' => array()
262 - );
263 - $extraList = explode( $this->FS2, $bits[6] );
264 -
265 - for ( $i = 0; $i < count( $extraList ); $i += 2 ) {
266 - $params['extra'][$extraList[$i]] = $extraList[$i + 1];
267 - }
268 - $this->rc[$params['timestamp']][] = $params;
269 - }
270 - }
271 -
272 - function writeMoveLog() {
273 - $this->moveLog = array();
274 - $deepRenames = $this->deepRenames;
275 - echo "Calculating move log...\n";
276 - $this->processDiffFile( array( $this, 'moveLogCallback' ) );
277 -
278 - // We have the timestamp intervals, now make a guess at the actual timestamp
279 - foreach ( $this->moveLog as $newTitle => $params ) {
280 - // Is there a time specified?
281 - $drTime = false;
282 - if ( isset( $deepRenames[$params['old']] ) ) {
283 - $drTime = $deepRenames[$params['old']];
284 - if ( $drTime !== '?' ) {
285 - if ( ( !isset( $params['endTime'] ) || $drTime < $params['endTime'] )
286 - && $drTime > $params['startTime'] )
287 - {
288 - $this->moveLog[$newTitle]['timestamp'] = $drTime;
289 - $this->moveLog[$newTitle]['deep'] = true;
290 -
291 - echo "{$params['old']} -> $newTitle at $drTime\n";
292 - unset( $deepRenames[$params['old']] );
293 - continue;
294 - } else {
295 - echo "WARNING: deep rename time invalid: {$params['old']}\n";
296 - unset( $deepRenames[$params['old']] );
297 - }
298 - }
299 - }
300 -
301 - // Guess that it is one second after the last edit to the page before it was moved
302 - $this->moveLog[$newTitle]['timestamp'] = $params['startTime'] + 1;
303 - if ( $drTime === '?' ) {
304 - $this->moveLog[$newTitle]['deep'] = true;
305 - unset( $deepRenames[$params['old']] );
306 - }
307 - if ( isset( $params['endTime'] ) ) {
308 - $this->printLatin1( "{$params['old']} -> $newTitle between " .
309 - "{$params['startTime']} and {$params['endTime']}\n" );
310 - } else {
311 - $this->printLatin1( "{$params['old']} -> $newTitle after " .
312 - "{$params['startTime']}\n" );
313 - }
314 - }
315 -
316 - // Write the move log to the XML file
317 - $id = 1;
318 - foreach ( $this->moveLog as $newTitle => $params ) {
319 - $out = "<logitem>\n" .
320 - $this->element( 'id', $id++ ) .
321 - $this->element( 'timestamp', wfTimestamp( TS_ISO_8601, $params['timestamp'] ) ) .
322 - "<contributor>\n" .
323 - $this->element( 'username', 'UseModWiki admin' ) .
324 - "</contributor>" .
325 - $this->element( 'type', 'move' ) .
326 - $this->element( 'action', 'move' ) .
327 - $this->element( 'logtitle', $params['old'] ) .
328 - "<params xml:space=\"preserve\">" .
329 - htmlspecialchars( $this->encode( "{$newTitle}\n1" ) ) .
330 - "</params>\n" .
331 - "</logitem>\n";
332 - fwrite( $this->outFile, $out );
333 - }
334 -
335 - // Check for remaining deep rename entries
336 - if ( $deepRenames ) {
337 - echo "WARNING: the following entries in \$this->deepRenames are " .
338 - "invalid, since no such move exists:\n" .
339 - implode( "\n", array_keys( $deepRenames ) ) .
340 - "\n\n";
341 - }
342 -
343 - }
344 -
345 - function element( $name, $value ) {
346 - return "<$name>" . htmlspecialchars( $this->encode( $value ) ) . "</$name>\n";
347 - }
348 -
349 - function moveLogCallback( $entry ) {
350 - $rctitle = $entry['rctitle'];
351 - $title = $entry['title'];
352 - $this->moveDests[$rctitle] = $title;
353 -
354 - if ( $rctitle === $title ) {
355 - if ( isset( $this->moveLog[$rctitle] )
356 - && !isset( $this->moveLog[$rctitle]['endTime'] ) )
357 - {
358 - // This is the latest time that the page could have been moved
359 - $this->moveLog[$rctitle]['endTime'] = $entry['timestamp'];
360 - }
361 - } else {
362 - if ( !isset( $this->moveLog[$rctitle] ) ) {
363 - // Initialise the move log entry
364 - $this->moveLog[$rctitle] = array(
365 - 'old' => $title
366 - );
367 - }
368 - // Update the earliest time the page could have been moved
369 - $this->moveLog[$rctitle]['startTime'] = $entry['timestamp'];
370 - }
371 - }
372 -
373 - function writeRevisions() {
374 - $this->numGoodRevs = 0;
375 - $this->revId = 1;
376 - $this->processDiffFile( array( $this, 'revisionCallback' ) );
377 - echo "\n\nImported {$this->numGoodRevs} out of {$this->numRevs}\n";
378 - }
379 -
380 - function revisionCallback( $params ) {
381 - $title = $params['rctitle'];
382 - $editTime = $params['timestamp'];
383 -
384 - if ( isset( $this->blacklist[$title] ) ) {
385 - return;
386 - }
387 - $this->doPendingOps( $editTime );
388 -
389 - $origText = $this->getText( $title );
390 - $text = $this->patch( $origText, $params['diff'] );
391 - if ( $text === false ) {
392 - echo "$editTime $title attempting resolution...\n";
393 - $linkSubstitutes = $this->resolveFailedDiff( $origText, $params['diff'] );
394 - if ( !$linkSubstitutes ) {
395 - $this->printLatin1( "$editTime $title DIFF FAILED\n" );
396 - $this->blacklist[$title] = true;
397 - return;
398 - }
399 - $this->printLatin1( "$editTime $title requires substitutions:\n" );
400 - $time = $editTime - 1;
401 - foreach ( $linkSubstitutes as $old => $new ) {
402 - $this->printLatin1( "SUBSTITUTE $old -> $new\n" );
403 - $this->renameTextLinks( $old, $new, $time-- );
404 - }
405 - $origText = $this->getText( $title );
406 - $text = $this->patch( $origText, $params['diff'] );
407 - if ( $text === false ) {
408 - $this->printLatin1( "$editTime $title STILL FAILS!\n" );
409 - $this->blacklist[$title] = true;
410 - return;
411 - }
412 -
413 - echo "\n";
414 - }
415 -
416 - $params['text'] = $text;
417 - $this->saveRevision( $params );
418 - $this->numGoodRevs++;
419 - #$this->printLatin1( "$editTime $title\n" );
420 - }
421 -
422 - function doPendingOps( $editTime ) {
423 - foreach ( $this->moveLog as $newTitle => $entry ) {
424 - if ( $entry['timestamp'] <= $editTime ) {
425 - unset( $this->moveLog[$newTitle] );
426 - if ( isset( $entry['deep'] ) ) {
427 - $this->renameTextLinks( $entry['old'], $newTitle, $entry['timestamp'] );
428 - }
429 - }
430 - }
431 -
432 - foreach ( $this->renameTextLinksOps as $renameTime => $replacements ) {
433 - if ( $editTime >= $renameTime ) {
434 - foreach ( $replacements as $old => $new ) {
435 - $this->printLatin1( "SUBSTITUTE $old -> $new\n" );
436 - $this->renameTextLinks( $old, $new, $renameTime );
437 - }
438 - unset( $this->renameTextLinksOps[$renameTime] );
439 - }
440 - }
441 -
442 - foreach ( $this->unixLineEndingsOps as $fixTime => $title ) {
443 - if ( $editTime >= $fixTime ) {
444 - $this->printLatin1( "$fixTime $title FIXING LINE ENDINGS\n" );
445 - $text = $this->getText( $title );
446 - $text = str_replace( "\r", '', $text );
447 - $this->saveRevision( array(
448 - 'rctitle' => $title,
449 - 'timestamp' => $fixTime,
450 - 'extra' => array( 'name' => 'UseModWiki admin' ),
451 - 'text' => $text,
452 - 'summary' => 'Fixing line endings',
453 - ) );
454 - unset( $this->unixLineEndingsOps[$fixTime] );
455 - }
456 - }
457 - }
458 -
459 - function patch( $source, $diff ) {
460 - file_put_contents( $this->articleFileName, $source );
461 - file_put_contents( $this->patchFileName, $diff );
462 - $error = wfShellExec(
463 - wfEscapeShellArg(
464 - 'patch',
465 - '-n',
466 - '-r', '-',
467 - '--no-backup-if-mismatch',
468 - '--binary',
469 - $this->articleFileName,
470 - $this->patchFileName
471 - ) . ' 2>&1',
472 - $status
473 - );
474 - $text = file_get_contents( $this->articleFileName );
475 - if ( $status || $text === false ) {
476 - return false;
477 - } else {
478 - return $text;
479 - }
480 - }
481 -
482 - function resolveFailedDiff( $origText, $diff ) {
483 - $context = array();
484 - $diffLines = explode( "\n", $diff );
485 - for ( $i = 0; $i < count( $diffLines ); $i++ ) {
486 - $diffLine = $diffLines[$i];
487 - if ( !preg_match( '/^(\d+)(?:,\d+)?[acd]\d+(?:,\d+)?$/', $diffLine, $m ) ) {
488 - continue;
489 - }
490 -
491 - $sourceIndex = intval( $m[1] );
492 - $i++;
493 - while ( $i < count( $diffLines ) && substr( $diffLines[$i], 0, 1 ) === '<' ) {
494 - $context[$sourceIndex - 1] = substr( $diffLines[$i], 2 );
495 - $sourceIndex++;
496 - $i++;
497 - }
498 - $i--;
499 - }
500 -
501 - $changedLinks = array();
502 - $origLines = explode( "\n", $origText );
503 - foreach ( $context as $i => $contextLine ) {
504 - $origLine = isset( $origLines[$i] ) ? $origLines[$i] : '';
505 - if ( $contextLine === $origLine ) {
506 - continue;
507 - }
508 - $newChanges = $this->resolveTextChange( $origLine, $contextLine );
509 - if ( is_array( $newChanges ) ) {
510 - $changedLinks += $newChanges;
511 - } else {
512 - echo "Resolution failure on line " . ( $i + 1 ) . "\n";
513 - $this->printLatin1( $newChanges );
514 - }
515 - }
516 -
517 - return $changedLinks;
518 - }
519 -
520 - function resolveTextChange( $source, $dest ) {
521 - $changedLinks = array();
522 - $sourceLinks = $this->getLinkList( $source );
523 - $destLinks = $this->getLinkList( $dest );
524 - $newLinks = array_diff( $destLinks, $sourceLinks );
525 - $removedLinks = array_diff( $sourceLinks, $destLinks );
526 -
527 - // Match up the removed links with the new links
528 - foreach ( $newLinks as $newLink ) {
529 - $minDistance = 100000000;
530 - $bestRemovedLink = false;
531 - foreach ( $removedLinks as $removedLink ) {
532 - $editDistance = levenshtein( $newLink, $removedLink );
533 - if ( $editDistance < $minDistance ) {
534 - $minDistance = $editDistance;
535 - $bestRemovedLink = $removedLink;
536 - }
537 - }
538 - if ( $bestRemovedLink !== false ) {
539 - $changedLinks[$bestRemovedLink] = $newLink;
540 - $newLinks = array_diff( $newLinks, array( $newLink ) );
541 - $removedLinks = array_diff( $removedLinks, array( $bestRemovedLink ) );
542 - }
543 - }
544 -
545 - $proposal = $source;
546 - foreach ( $changedLinks as $removedLink => $newLink ) {
547 - $proposal = $this->substituteTextLinks( $removedLink, $newLink, $proposal );
548 - }
549 - if ( $proposal !== $dest ) {
550 - // Resolution failed
551 - $msg = "Source line: $source\n" .
552 - "Source links: " . implode( ', ', $sourceLinks ) . "\n" .
553 - "Context line: $dest\n" .
554 - "Context links: " . implode( ', ', $destLinks ) . "\n" .
555 - "Proposal: $proposal\n";
556 - return $msg;
557 - }
558 - return $changedLinks;
559 - }
560 -
561 - function processDiffFile( $callback ) {
562 - $diffFile = fopen( "{$this->dataDir}/diff_log", 'r' );
563 -
564 - $delimiter = "------\n";
565 - file_put_contents( $this->articleFileName, "Describe the new page here.\n" );
566 -
567 - $line = fgets( $diffFile );
568 - $lineNum = 1;
569 - if ( $line !== $delimiter ) {
570 - echo "Invalid diff file\n";
571 - return false;
572 - }
573 - $lastReportLine = 0;
574 - $this->numRevs = 0;
575 -
576 - while ( true ) {
577 - $line = fgets( $diffFile );
578 - $lineNum++;
579 - if ( $line === false ) {
580 - break;
581 - }
582 - if ( $lineNum > $lastReportLine + 1000 ) {
583 - $lastReportLine = $lineNum;
584 - fwrite( STDERR, "$lineNum \r" );
585 - fflush( STDERR );
586 - }
587 - $line = trim( $line );
588 - if ( !preg_match( '/^([^|]+)\|(\d+)$/', $line, $matches ) ) {
589 - echo "Invalid header on line $lineNum\n";
590 - return true;
591 - }
592 - list( , $title, $editTime ) = $matches;
593 -
594 - $diff = '';
595 - $diffStartLine = $lineNum;
596 - while ( true ) {
597 - $line = fgets( $diffFile );
598 - $lineNum++;
599 - if ( $line === $delimiter ) {
600 - break;
601 - }
602 - if ( $line === false ) {
603 - break 2;
604 - }
605 - $diff .= $line;
606 - }
607 -
608 - $this->numRevs++;
609 -
610 - if ( !isset( $this->rc[$editTime] ) ) {
611 - $this->printLatin1( "$editTime $title DELETED, skipping\n" );
612 - continue;
613 - }
614 -
615 - if ( count( $this->rc[$editTime] ) == 1 ) {
616 - $params = $this->rc[$editTime][0];
617 - } else {
618 - $params = false;
619 - $candidates = '';
620 - foreach ( $this->rc[$editTime] as $rc ) {
621 - if ( $rc['rctitle'] === $title ) {
622 - $params = $rc;
623 - break;
624 - }
625 - if ( $candidates === '' ) {
626 - $candidates = $rc['rctitle'];
627 - } else {
628 - $candidates .= ', ' . $rc['rctitle'];
629 - }
630 - }
631 - if ( !$params ) {
632 - $this->printLatin1( "$editTime $title ERROR cannot resolve rclog\n" );
633 - $this->printLatin1( "$editTime $title CANDIDATES: $candidates\n" );
634 - continue;
635 - }
636 - }
637 - $params['diff'] = $diff;
638 - $params['title'] = $title;
639 - $params['diffStartLine'] = $diffStartLine;
640 - call_user_func( $callback, $params );
641 - }
642 - echo "\n";
643 -
644 - if ( !feof( $diffFile ) ) {
645 - echo "Stopped at line $lineNum\n";
646 - }
647 - return true;
648 - }
649 -
650 - function reconcileCurrentRevs() {
651 - foreach ( $this->textCache as $title => $text ) {
652 - $fileName = "{$this->dataDir}/page/";
653 - if ( preg_match( '/^[A-Z]/', $title, $m ) ) {
654 - $fileName .= $m[0];
655 - } else {
656 - $fileName .= 'other';
657 - }
658 - $fileName .= "/$title.db";
659 -
660 - if ( !file_exists( $fileName ) ) {
661 - $this->printLatin1( "ERROR: Cannot find page file for {$title}\n" );
662 - continue;
663 - }
664 -
665 - $fileContents = file_get_contents( $fileName );
666 - $page = $this->unserializeUseMod( $fileContents, $this->FS1 );
667 - $section = $this->unserializeUseMod( $page['text_default'], $this->FS2 );
668 - $data = $this->unserializeUseMod( $section['data'], $this->FS3 );
669 - $pageText = $data['text'];
670 - if ( $text !== $pageText ) {
671 - $substs = $this->resolveTextChange( $text, $pageText );
672 - if ( is_array( $substs ) ) {
673 - foreach ( $substs as $source => $dest ) {
674 - if ( isset( $this->moveLog[$dest] ) ) {
675 - $this->printLatin1( "ERROR: need deep rename: $source\n" );
676 - } else {
677 - $this->printLatin1( "ERROR: need substitute: $source -> $dest\n" );
678 - }
679 - }
680 - } else {
681 - $this->printLatin1( "ERROR: unresolved diff in $title:\n" );
682 - wfSuppressWarnings();
683 - $diff = xdiff_string_diff( $text, $pageText ) . '';
684 - wfRestoreWarnings();
685 - $this->printLatin1( "$diff\n" );
686 - }
687 - }
688 - }
689 - }
690 -
691 - function makeTitle( $titleText ) {
692 - return Title::newFromText( $this->encode( $titleText ) );
693 - }
694 -
695 - function getText( $titleText ) {
696 - if ( !isset( $this->textCache[$titleText] ) ) {
697 - return "Describe the new page here.\n";
698 - } else {
699 - return $this->textCache[$titleText];
700 - }
701 - }
702 -
703 - function saveRevision( $params ) {
704 - $this->textCache[$params['rctitle']] = $params['text'];
705 -
706 - $out = "<page>\n" .
707 - $this->element( 'title', $params['rctitle'] ) .
708 - "<revision>\n" .
709 - $this->element( 'id', $this->revId ++ ) .
710 - $this->element( 'timestamp', wfTimestamp( TS_ISO_8601, $params['timestamp'] ) ) .
711 - "<contributor>\n";
712 - if ( isset( $params['extra']['name'] ) ) {
713 - $out .= $this->element( 'username', $params['extra']['name'] );
714 - }
715 - if ( isset( $params['extra']['id'] ) ) {
716 - $out .= $this->element( 'id', $params['extra']['id'] );
717 - }
718 - if ( isset( $params['host'] ) ) {
719 - $out .= $this->element( 'ip', $params['host'] );
720 - }
721 - $out .=
722 - "</contributor>\n" .
723 - $this->element( 'comment', $params['summary'] ) .
724 - "<text xml:space=\"preserve\">" .
725 - htmlspecialchars( $this->encode( $params['text'] ) ) .
726 - "</text>\n" .
727 - "</revision>\n" .
728 - "</page>\n";
729 - fwrite( $this->outFile, $out );
730 - }
731 -
732 - function renameTextLinks( $old, $new, $timestamp ) {
733 - $newWithUnderscores = $new;
734 - $old = str_replace( '_', ' ', $old );
735 - $new = str_replace( '_', ' ', $new );
736 -
737 - foreach ( $this->textCache as $title => $oldText ) {
738 - if ( $newWithUnderscores === $title
739 - && in_array( $title, $this->skipSelfSubstitution ) )
740 - {
741 - // Hack to make Pythagorean_Theorem etc. work
742 - continue;
743 - }
744 -
745 - $newText = $this->substituteTextLinks( $old, $new, $oldText );
746 - if ( $oldText !== $newText ) {
747 - $this->saveRevision( array(
748 - 'rctitle' => $title,
749 - 'timestamp' => $timestamp,
750 - 'text' => $newText,
751 - 'extra' => array( 'name' => 'Page move link fixup script' ),
752 - 'summary' => '',
753 - 'minor' => true
754 - ) );
755 - }
756 - }
757 - }
758 -
759 - function substituteTextLinks( $old, $new, $text ) {
760 - $this->saveUrl = array();
761 - $this->old = $old;
762 - $this->new = $new;
763 -
764 - $text = str_replace( $this->FS, '', $text ); # Remove separators (paranoia)
765 - $text = preg_replace_callback( '/(<pre>(.*?)<\/pre>)/is',
766 - array( $this, 'storeRaw' ), $text );
767 - $text = preg_replace_callback( '/(<code>(.*?)<\/code>)/is',
768 - array( $this, 'storeRaw' ), $text );
769 - $text = preg_replace_callback( '/(<nowiki>(.*?)<\/nowiki>)/s',
770 - array( $this, 'storeRaw' ), $text );
771 -
772 - $text = preg_replace_callback( "/\[\[{$this->FreeLinkPattern}\|([^\]]+)\]\]/",
773 - array( $this, 'subFreeLink' ), $text );
774 - $text = preg_replace_callback( "/\[\[{$this->FreeLinkPattern}\]\]/",
775 - array( $this, 'subFreeLink' ), $text );
776 - $text = preg_replace_callback( "/(\[{$this->UrlPattern}\s+([^\]]+?)\])/",
777 - array( $this, 'storeRaw' ), $text );
778 - $text = preg_replace_callback( "/(\[{$this->InterLinkPattern}\s+([^\]]+?)\])/",
779 - array( $this, 'storeRaw' ), $text );
780 - $text = preg_replace_callback( "/(\[?{$this->UrlPattern}\]?)/",
781 - array( $this, 'storeRaw' ), $text );
782 - $text = preg_replace_callback( "/(\[?{$this->InterLinkPattern}\]?)/",
783 - array( $this, 'storeRaw' ), $text );
784 - $text = preg_replace_callback( "/{$this->LinkPattern}/",
785 - array( $this, 'subWikiLink' ), $text );
786 -
787 - $text = preg_replace_callback( "/{$this->FS}(\d+){$this->FS}/",
788 - array( $this, 'restoreRaw' ), $text ); # Restore saved text
789 - return $text;
790 - }
791 -
792 - function getLinkList( $text ) {
793 - $this->saveUrl = array();
794 - $this->linkList = array();
795 -
796 - $text = str_replace( $this->FS, '', $text ); # Remove separators (paranoia)
797 - $text = preg_replace_callback( '/(<pre>(.*?)<\/pre>)/is',
798 - array( $this, 'storeRaw' ), $text );
799 - $text = preg_replace_callback( '/(<code>(.*?)<\/code>)/is',
800 - array( $this, 'storeRaw' ), $text );
801 - $text = preg_replace_callback( '/(<nowiki>(.*?)<\/nowiki>)/s',
802 - array( $this, 'storeRaw' ), $text );
803 -
804 - $text = preg_replace_callback( "/\[\[{$this->FreeLinkPattern}\|([^\]]+)\]\]/",
805 - array( $this, 'storeLink' ), $text );
806 - $text = preg_replace_callback( "/\[\[{$this->FreeLinkPattern}\]\]/",
807 - array( $this, 'storeLink' ), $text );
808 - $text = preg_replace_callback( "/(\[{$this->UrlPattern}\s+([^\]]+?)\])/",
809 - array( $this, 'storeRaw' ), $text );
810 - $text = preg_replace_callback( "/(\[{$this->InterLinkPattern}\s+([^\]]+?)\])/",
811 - array( $this, 'storeRaw' ), $text );
812 - $text = preg_replace_callback( "/(\[?{$this->UrlPattern}\]?)/",
813 - array( $this, 'storeRaw' ), $text );
814 - $text = preg_replace_callback( "/(\[?{$this->InterLinkPattern}\]?)/",
815 - array( $this, 'storeRaw' ), $text );
816 - $text = preg_replace_callback( "/{$this->LinkPattern}/",
817 - array( $this, 'storeLink' ), $text );
818 -
819 - return $this->linkList;
820 - }
821 -
822 - function storeRaw( $m ) {
823 - $this->saveUrl[] = $m[1];
824 - return $this->FS . (count( $this->saveUrl ) - 1) . $this->FS;
825 - }
826 -
827 - function subFreeLink( $m ) {
828 - $link = $m[1];
829 - if ( isset( $m[2] ) ) {
830 - $name = $m[2];
831 - } else {
832 - $name = '';
833 - }
834 - $oldlink = $link;
835 - $link = preg_replace( '/^\s+/', '', $link );
836 - $link = preg_replace( '/\s+$/', '', $link );
837 - if ( $link == $this->old ) {
838 - $link = $this->new;
839 - } else {
840 - $link = $oldlink; # Preserve spaces if no match
841 - }
842 - $link = "[[$link";
843 - if ( $name !== "" ) {
844 - $link .= "|$name";
845 - }
846 - $link .= "]]";
847 - return $this->storeRaw( array( 1 => $link ) );
848 - }
849 -
850 - function subWikiLink( $m ) {
851 - $link = $m[1];
852 - if ( $link == $this->old ) {
853 - $link = $this->new;
854 - if ( !preg_match( "/^{$this->LinkPattern}$/", $this->new ) ) {
855 - $link = "[[$link]]";
856 - }
857 - }
858 - return $this->storeRaw( array( 1 => $link ) );
859 - }
860 -
861 - function restoreRaw( $m ) {
862 - return $this->saveUrl[$m[1]];
863 - }
864 -
865 - function storeLink( $m ) {
866 - $this->linkList[] = $m[1];
867 - return $this->storeRaw( $m );
868 - }
869 -
870 - function encode( $s ) {
871 - return strtr( $s, $this->encodeMap );
872 - }
873 -
874 - function decode( $s ) {
875 - return strtr( $s, $this->decodeMap );
876 - }
877 -
878 - function printLatin1( $s ) {
879 - echo $this->encode( $s );
880 - }
881 -
882 - function unserializeUseMod( $s, $sep ) {
883 - $parts = explode( $sep, $s );
884 - $result = array();
885 - for ( $i = 0; $i < count( $parts ); $i += 2 ) {
886 - $result[$parts[$i]] = $parts[$i+1];
887 - }
888 - return $result;
889 - }
890 -}
891 -
892 -$maintClass = 'ImportUseModWikipedia';
893 -require_once( RUN_MAINTENANCE_IF_MAIN );

Comments

#Comment by Hashar (talk | contribs)   12:23, 27 October 2011

Fine! Should we get that use_mode script out of REL1_18 too?

#Comment by Reedy (talk | contribs)   17:17, 27 October 2011

Could do. Wouldn't do any harm being left (though, I'd be suprised if it actually works)

Status & tagging log