r88016 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r88015‎ \| r88016 \| r88017 >
Date:	19:04, 13 May 2011
Author:	demon
Status:	deferred (Comments)
Tags:
Comment:	Rewrite importUseModWiki to subclass maintenance, general cleanup, etc. Also fixing bug 22287, change default field separator
Modified paths:	/trunk/phase3/maintenance/importUseModWiki.php (modified) (history)

Diff [purge]

Index: trunk/phase3/maintenance/importUseModWiki.php
—	—	@@ -40,40 +40,49 @@
41	41	* @ingroup Maintenance
42	42	*/
43	43
44		~~-if ( php_sapi_name() != 'cli' ) {~~
45		~~- echo "Please customize the settings and run me from the command line.";~~
46		~~- die( -1 );~~
47		-}
	44	+require_once( "Maintenance.php" );
48	45
49		~~-/** Set these correctly! */~~
50		~~-$wgImportEncoding = "CP1252"; /* We convert all to UTF-8 */~~
51		~~-$wgRootDirectory = "/kalman/Projects/wiki2002/wiki/lib-http/db/wiki";~~
	46	+class ImportUseModWiki extends Maintenance {
52	47
53		~~-/* On a large wiki, you might run out of memory */~~
54		~~-@ini_set( 'memory_limit', '40M' );~~
	48	+ private $encoding, $rootDirectory = '';
55	49
56		~~-/* globals */~~
57		~~-$wgFieldSeparator = "\xb3"; # Some wikis may use different char~~
58		~~- $FS = $wgFieldSeparator ;~~
59		~~- $FS1 = $FS . "1" ;~~
60		~~- $FS2 = $FS . "2" ;~~
61		~~- $FS3 = $FS . "3" ;~~
	50	+ /**
	51	+ * Field separators
	52	+ * @var String
	53	+ */
	54	+ private $FS1, $FS2, $FS3 = '';
62	55
63		~~-# Unicode sanitization tools~~
64		~~-require_once( dirname( dirname( __FILE__ ) ) . '/includes/normal/UtfNormal.php' );~~
	56	+ /**
	57	+ * @var Array
	58	+ */
	59	+ private $usercache, $nowiki = array();
65	60
66		~~-$usercache = array();~~
	61	+ public function __construct() {
	62	+ parent::__construct();
	63	+ $this->mDescription = "Import pages from UseMod wikis";
	64	+ $this->addOption( 'encoding', 'Encoding of the imported text, default CP1252', false, true );
	65	+ /**
	66	+ * If UseModWiki's New File System is used:
	67	+ * $NewFS = 1; # 1 = new multibyte $FS, 0 = old $FS
	68	+ * Use "\xb3"; for the Old File System
	69	+ * Changed with UTF-8 UseModWiki
	70	+ * http://www.usemod.com/cgi-bin/wiki.pl?SupportForUtf8
	71	+ * http://www.usemod.com/cgi-bin/wiki.pl?WikiBugs/NewFieldSeparatorWronglyTreated
	72	+ * http://www.meatballwiki.org/wiki/WikiEngine#Q_amp_A
	73	+ */
	74	+ $this->addOption( 'separator', 'Field separator to use, default \x1E\xFF\xFE\x1E', false, true );
	75	+ $this->addArg( 'path', 'Path to your UseMod wiki' );
	76	+ }
67	77
68		~~-importPages();~~
	78	+ public function execute() {
	79	+ $this->rootDirectory = $this->getArg();
	80	+ $this->encoding = $this->getOption( 'encoding', 'CP1252' );
	81	+ $sep = $this->getOption( 'separator', "\x1E\xFF\xFE\x1E" );
	82	+ $this->FS1 = "{$sep}1";
	83	+ $this->FS2 = "{$sep}2";
	84	+ $this->FS3 = "{$sep}3";
69	85
70		~~-# ------------------------------------------------------------------------------~~
71		-
72		~~-function importPages()~~
73		-{
74		~~- global $wgRootDirectory;~~
75		-
76		~~- $gt = '>';~~
77		~~- echo <<<XML~~
	86	+ echo <<<XML
78	87	<?xml version="1.0" encoding="UTF-8" ?>
79	88	<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.1/"
80	89	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
—	—	@@ -89,290 +98,278 @@
90	99	'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',
91	100	'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'other' );
92	101	foreach ( $letters as $letter ) {
93		~~- $dir = "$wgRootDirectory/page/$letter";~~
	102	+ $dir = "{$this->rootDirectory}/page/$letter";
94	103	if ( is_dir( $dir ) )
95		~~- importPageDirectory( $dir );~~
	104	+ $this->importPageDirectory( $dir );
96	105	}
97	106	echo <<<XML
98	107	</mediawiki>
99	108
100	109	XML;
101		-}
	110	+ }
102	111
103		~~-function importPageDirectory( $dir, $prefix = "" )~~
104		-{
105		~~- echo "\n<!-- Checking page directory " . xmlCommentSafe( $dir ) . " -->\n";~~
106		~~- $mydir = opendir( $dir );~~
107		~~- while ( $entry = readdir( $mydir ) ) {~~
108		~~- $m = array();~~
109		~~- if ( preg_match( '/^(.+)\.db$/', $entry, $m ) ) {~~
110		~~- echo importPage( $prefix . $m[1] );~~
111		~~- } else {~~
112		~~- if ( is_dir( "$dir/$entry" ) ) {~~
113		~~- if ( $entry != '.' && $entry != '..' ) {~~
114		~~- importPageDirectory( "$dir/$entry", "$entry/" );~~
115		~~- }~~
	112	+ private function importPageDirectory( $dir, $prefix = "" ) {
	113	+ echo "\n<!-- Checking page directory " . $this->xmlCommentSafe( $dir ) . " -->\n";
	114	+ $mydir = opendir( $dir );
	115	+ while ( $entry = readdir( $mydir ) ) {
	116	+ $m = array();
	117	+ if ( preg_match( '/^(.+)\.db$/', $entry, $m ) ) {
	118	+ echo $this->importPage( $prefix . $m[1] );
116	119	} else {
117		~~- echo "<!-- File '" . xmlCommentSafe( $entry ) . "' doesn't seem to contain an article. Skipping. -->\n";~~
	120	+ if ( is_dir( "$dir/$entry" ) ) {
	121	+ if ( $entry != '.' && $entry != '..' ) {
	122	+ $this->importPageDirectory( "$dir/$entry", "$entry/" );
	123	+ }
	124	+ } else {
	125	+ echo "<!-- File '" . $this->xmlCommentSafe( $entry ) . "' doesn't seem to contain an article. Skipping. -->\n";
	126	+ }
118	127	}
119	128	}
120	129	}
121		-}
122	130
	131	+ private function useModFilename( $title ) {
	132	+ $c = substr( $title, 0, 1 );
	133	+ if ( preg_match( '/[A-Z]/i', $c ) ) {
	134	+ return strtoupper( $c ) . "/$title";
	135	+ }
	136	+ return "other/$title";
	137	+ }
123	138
124		~~-# ------------------------------------------------------------------------------~~
	139	+ private function fetchPage( $title ) {
	140	+ $fname = $this->rootDirectory . "/page/" . $this->useModFilename( $title ) . ".db";
	141	+ if ( !file_exists( $fname ) ) {
	142	+ echo "Couldn't open file '$fname' for page '$title'.\n";
	143	+ die( -1 );
	144	+ }
125	145
126		~~-/* fetch_ functions~~
127		~~- Grab a given item from the database~~
128		~~- */~~
	146	+ $page = $this->splitHash( $this->FS1, file_get_contents( $fname ) );
	147	+ $section = $this->splitHash( $this->FS2, $page["text_default"] );
	148	+ $text = $this->splitHash( $this->FS3, $section["data"] );
129	149
130		~~-function useModFilename( $title ) {~~
131		~~- $c = substr( $title, 0, 1 );~~
132		~~- if ( preg_match( '/[A-Z]/i', $c ) ) {~~
133		~~- return strtoupper( $c ) . "/$title";~~
	150	+ return $this->array2object( array( "text" => $text["text"] , "summary" => $text["summary"] ,
	151	+ "minor" => $text["minor"] , "ts" => $section["ts"] ,
	152	+ "username" => $section["username"] , "host" => $section["host"] ) );
134	153	}
135		~~- return "other/$title";~~
136		-}
137	154
138		~~-function fetchPage( $title )~~
139		-{
140		~~- global $FS1, $FS2, $FS3, $wgRootDirectory;~~
	155	+ private function fetchKeptPages( $title ) {
	156	+ $fname = $this->rootDirectory . "/keep/" . $this->useModFilename( $title ) . ".kp";
	157	+ if ( !file_exists( $fname ) ) return array();
141	158
142		~~- $fname = $wgRootDirectory . "/page/" . useModFilename( $title ) . ".db";~~
143		~~- if ( !file_exists( $fname ) ) {~~
144		~~- echo "Couldn't open file '$fname' for page '$title'.\n";~~
145		~~- die( -1 );~~
146		~~- }~~
	159	+ $keptlist = explode( $this->FS1, file_get_contents( $fname ) );
	160	+ array_shift( $keptlist ); # Drop the junk at beginning of file
147	161
148		~~- $page = splitHash( $FS1, file_get_contents( $fname ) );~~
149		~~- $section = splitHash( $FS2, $page["text_default"] );~~
150		~~- $text = splitHash( $FS3, $section["data"] );~~
151		-
152		~~- return array2object( array( "text" => $text["text"] , "summary" => $text["summary"] ,~~
153		~~- "minor" => $text["minor"] , "ts" => $section["ts"] ,~~
154		~~- "username" => $section["username"] , "host" => $section["host"] ) );~~
155		-}
156		-
157		~~-function fetchKeptPages( $title )~~
158		-{
159		~~- global $FS1, $FS2, $FS3, $wgRootDirectory;~~
160		-
161		~~- $fname = $wgRootDirectory . "/keep/" . useModFilename( $title ) . ".kp";~~
162		~~- if ( !file_exists( $fname ) ) return array();~~
163		-
164		~~- $keptlist = explode( $FS1, file_get_contents( $fname ) );~~
165		~~- array_shift( $keptlist ); # Drop the junk at beginning of file~~
166		-
167		~~- $revisions = array();~~
168		~~- foreach ( $keptlist as $rev ) {~~
169		~~- $section = splitHash( $FS2, $rev );~~
170		~~- $text = splitHash( $FS3, $section["data"] );~~
171		~~- if ( $text["text"] && $text["minor"] != "" && ( $section["ts"] * 1 > 0 ) ) {~~
172		~~- array_push( $revisions, array2object( array ( "text" => $text["text"] , "summary" => $text["summary"] ,~~
173		~~- "minor" => $text["minor"] , "ts" => $section["ts"] ,~~
174		~~- "username" => $section["username"] , "host" => $section["host"] ) ) );~~
175		~~- } else {~~
176		~~- echo "<!-- skipped a bad old revision -->\n";~~
	162	+ $revisions = array();
	163	+ foreach ( $keptlist as $rev ) {
	164	+ $section = $this->splitHash( $this->FS2, $rev );
	165	+ $text = $this->splitHash( $this->FS3, $section["data"] );
	166	+ if ( $text["text"] && $text["minor"] != "" && ( $section["ts"] * 1 > 0 ) ) {
	167	+ array_push( $revisions, $this->array2object( array ( "text" => $text["text"] , "summary" => $text["summary"] ,
	168	+ "minor" => $text["minor"] , "ts" => $section["ts"] ,
	169	+ "username" => $section["username"] , "host" => $section["host"] ) ) );
	170	+ } else {
	171	+ echo "<!-- skipped a bad old revision -->\n";
	172	+ }
177	173	}
	174	+ return $revisions;
178	175	}
179		~~- return $revisions;~~
180		-}
181	176
182		~~-function splitHash ( $sep , $str ) {~~
183		~~- $temp = explode ( $sep , $str ) ;~~
184		~~- $ret = array () ;~~
185		~~- for ( $i = 0; $i + 1 < count ( $temp ) ; $i++ ) {~~
186		~~- $ret[$temp[$i]] = $temp[++$i] ;~~
187		~~- }~~
188		~~- return $ret ;~~
	177	+ private function splitHash( $sep , $str ) {
	178	+ $temp = explode ( $sep , $str ) ;
	179	+ $ret = array () ;
	180	+ for ( $i = 0; $i + 1 < count ( $temp ) ; $i++ ) {
	181	+ $ret[$temp[$i]] = $temp[++$i] ;
	182	+ }
	183	+ return $ret ;
189	184	}
190	185
191		-
192		~~-/* import_ functions~~
193		~~- Take a fetched item and produce SQL~~
194		~~- */~~
195		-
196		~~-function checkUserCache( $name, $host )~~
197		-{
198		~~- global $usercache;~~
199		-
200		~~- if ( $name ) {~~
201		~~- if ( in_array( $name, $usercache ) ) {~~
202		~~- $userid = $usercache[$name];~~
	186	+ private function checkUserCache( $name, $host ) {
	187	+ if ( $name ) {
	188	+ if ( in_array( $name, $this->usercache ) ) {
	189	+ $userid = $this->usercache[$name];
	190	+ } else {
	191	+ # If we haven't imported user accounts
	192	+ $userid = 0;
	193	+ }
	194	+ $username = str_replace( '_', ' ', $name );
203	195	} else {
204		~~- # If we haven't imported user accounts~~
205	196	$userid = 0;
	197	+ $username = $host;
206	198	}
207		~~- $username = str_replace( '_', ' ', $name );~~
208		~~- } else {~~
209		~~- $userid = 0;~~
210		~~- $username = $host;~~
	199	+ return array( $userid, $username );
211	200	}
212		~~- return array( $userid, $username );~~
213		-}
214	201
215		~~-function importPage( $title )~~
216		-{
217		~~- echo "\n<!-- Importing page " . xmlCommentSafe( $title ) . " -->\n";~~
218		~~- $page = fetchPage( $title );~~
	202	+ private function importPage( $title ) {
	203	+ echo "\n<!-- Importing page " . $this->xmlCommentSafe( $title ) . " -->\n";
	204	+ $page = $this->fetchPage( $title );
219	205
220		~~- $newtitle = xmlsafe( str_replace( '_', ' ', recodeText( $title ) ) );~~
	206	+ $newtitle = $this->xmlsafe( str_replace( '_', ' ', $this->recodeText( $title ) ) );
221	207
222		~~- $munged = mungeFormat( $page->text );~~
223		~~- if ( $munged != $page->text ) {~~
224		- /**
225		~~- * Save a new revision with the conversion, and put the~~
226		~~- * previous last version into the history.~~
227		~~- */~~
228		~~- $next = array2object( array(~~
229		~~- 'text' => $munged,~~
230		~~- 'minor' => 1,~~
231		~~- 'username' => 'Conversion script',~~
232		~~- 'host' => '127.0.0.1',~~
233		~~- 'ts' => time(),~~
234		~~- 'summary' => 'link fix',~~
235		~~- ) );~~
236		~~- $revisions = array( $page, $next );~~
237		~~- } else {~~
238		- /**
239		~~- * Current revision:~~
240		~~- */~~
241		~~- $revisions = array( $page );~~
242		~~- }~~
243		~~- $xml = <<<XML~~
244		~~- <page>~~
245		~~- <title>$newtitle</title>~~
	208	+ $munged = $this->mungeFormat( $page->text );
	209	+ if ( $munged != $page->text ) {
	210	+ /**
	211	+ * Save a new revision with the conversion, and put the
	212	+ * previous last version into the history.
	213	+ */
	214	+ $next = $this->array2object( array(
	215	+ 'text' => $munged,
	216	+ 'minor' => 1,
	217	+ 'username' => 'Conversion script',
	218	+ 'host' => '127.0.0.1',
	219	+ 'ts' => time(),
	220	+ 'summary' => 'link fix',
	221	+ ) );
	222	+ $revisions = array( $page, $next );
	223	+ } else {
	224	+ /**
	225	+ * Current revision:
	226	+ */
	227	+ $revisions = array( $page );
	228	+ }
	229	+ $xml = <<<XML
	230	+ <page>
	231	+ <title>$newtitle</title>
246	232
247	233	XML;
248	234
249		~~- # History~~
250		~~- $revisions = array_merge( $revisions, fetchKeptPages( $title ) );~~
251		~~- if ( count( $revisions ) == 0 ) {~~
252		~~- return NULL; // Was "$sql", which does not appear to be defined.~~
253		~~- }~~
	235	+ # History
	236	+ $revisions = array_merge( $revisions, $this->fetchKeptPages( $title ) );
	237	+ if ( count( $revisions ) == 0 ) {
	238	+ return NULL; // Was "$sql", which does not appear to be defined.
	239	+ }
254	240
255		~~- foreach ( $revisions as $rev ) {~~
256		~~- $text = xmlsafe( recodeText( $rev->text ) );~~
257		~~- $minor = ( $rev->minor ? '<minor/>' : '' );~~
258		~~- list( /* $userid */ , $username ) = checkUserCache( $rev->username, $rev->host );~~
259		~~- $username = xmlsafe( recodeText( $username ) );~~
260		~~- $timestamp = xmlsafe( timestamp2ISO8601( $rev->ts ) );~~
261		~~- $comment = xmlsafe( recodeText( $rev->summary ) );~~
	241	+ foreach ( $revisions as $rev ) {
	242	+ $text = $this->xmlsafe( $this->recodeText( $rev->text ) );
	243	+ $minor = ( $rev->minor ? '<minor/>' : '' );
	244	+ list( /* $userid */ , $username ) = $this->checkUserCache( $rev->username, $rev->host );
	245	+ $username = $this->xmlsafe( $this->recodeText( $username ) );
	246	+ $timestamp = $this->xmlsafe( $this->timestamp2ISO8601( $rev->ts ) );
	247	+ $comment = $this->xmlsafe( $this->recodeText( $rev->summary ) );
262	248
263		~~- $xml .= <<<XML~~
264		~~- <revision>~~
265		~~- <timestamp>$timestamp</timestamp>~~
266		~~- <contributor><username>$username</username></contributor>~~
267		~~- $minor~~
268		~~- <comment>$comment</comment>~~
269		~~- <text>$text</text>~~
270		~~- </revision>~~
	249	+ $xml .= <<<XML
	250	+ <revision>
	251	+ <timestamp>$timestamp</timestamp>
	252	+ <contributor><username>$username</username></contributor>
	253	+ $minor
	254	+ <comment>$comment</comment>
	255	+ <text>$text</text>
	256	+ </revision>
271	257
272	258	XML;
	259	+ }
	260	+ $xml .= "</page>\n\n";
	261	+ return $xml;
273	262	}
274		~~- $xml .= "</page>\n\n";~~
275		~~- return $xml;~~
276		-}
277	263
278		~~-# Whee!~~
279		~~-function recodeText( $string ) {~~
280		~~- global $wgImportEncoding;~~
281		~~- # For currently latin-1 wikis~~
282		~~- $string = str_replace( "\r\n", "\n", $string );~~
283		~~- $string = @iconv( $wgImportEncoding, "UTF-8", $string );~~
284		~~- $string = wfMungeToUtf8( $string ); # Any old Ӓ stuff~~
285		~~- return $string;~~
286		-}
	264	+ private function recodeText( $string ) {
	265	+ # For currently latin-1 wikis
	266	+ $string = str_replace( "\r\n", "\n", $string );
	267	+ $string = @iconv( $this->encoding, "UTF-8", $string );
	268	+ $string = $this->mungeToUtf8( $string ); # Any old Ӓ stuff
	269	+ return $string;
	270	+ }
287	271
288		~~-function wfUtf8Sequence( $codepoint ) {~~
289		~~- if ( $codepoint < 0x80 ) return chr( $codepoint );~~
290		~~- if ( $codepoint < 0x800 ) return chr( $codepoint >> 6 & 0x3f \| 0xc0 ) .~~
291		~~- chr( $codepoint & 0x3f \| 0x80 );~~
292		~~- if ( $codepoint < 0x10000 ) return chr( $codepoint >> 12 & 0x0f \| 0xe0 ) .~~
293		~~- chr( $codepoint >> 6 & 0x3f \| 0x80 ) .~~
294		~~- chr( $codepoint & 0x3f \| 0x80 );~~
295		~~- if ( $codepoint < 0x100000 ) return chr( $codepoint >> 18 & 0x07 \| 0xf0 ) . # Double-check this~~
296		~~- chr( $codepoint >> 12 & 0x3f \| 0x80 ) .~~
297		~~- chr( $codepoint >> 6 & 0x3f \| 0x80 ) .~~
298		~~- chr( $codepoint & 0x3f \| 0x80 );~~
299		~~- # Doesn't yet handle outside the BMP~~
300		~~- return "&#$codepoint;";~~
301		-}
	272	+ /**
	273	+ * @fixme don't use /e
	274	+ */
	275	+ private function mungeToUtf8( $string ) {
	276	+ $string = preg_replace ( '/&#([0-9]+);/e', 'wfUtf8Sequence($1)', $string );
	277	+ $string = preg_replace ( '/&#x([0-9a-f]+);/ie', 'wfUtf8Sequence(0x$1)', $string );
	278	+ # Should also do named entities here
	279	+ return $string;
	280	+ }
302	281
303		~~-function wfMungeToUtf8( $string ) {~~
304		~~- $string = preg_replace ( '/&#([0-9]+);/e', 'wfUtf8Sequence($1)', $string );~~
305		~~- $string = preg_replace ( '/&#x([0-9a-f]+);/ie', 'wfUtf8Sequence(0x$1)', $string );~~
306		~~- # Should also do named entities here~~
307		~~- return $string;~~
308		-}
	282	+ private function timestamp2ISO8601( $ts ) {
	283	+ # 2003-08-05T18:30:02Z
	284	+ return gmdate( 'Y-m-d', $ts ) . 'T' . gmdate( 'H:i:s', $ts ) . 'Z';
	285	+ }
309	286
310		~~-function timestamp2ISO8601( $ts ) {~~
311		~~- # 2003-08-05T18:30:02Z~~
312		~~- return gmdate( 'Y-m-d', $ts ) . 'T' . gmdate( 'H:i:s', $ts ) . 'Z';~~
313		-}
314		-
315		~~-function xmlsafe( $string ) {~~
316	287	/**
317	288	* The page may contain old data which has not been properly normalized.
318	289	* Invalid UTF-8 sequences or forbidden control characters will make our
319	290	* XML output invalid, so be sure to strip them out.
	291	+ * @param String $string Text to clean up
	292	+ * @return String
320	293	*/
321		~~- $string = UtfNormal::cleanUp( $string );~~
	294	+ private function xmlsafe( $string ) {
	295	+ $string = UtfNormal::cleanUp( $string );
	296	+ $string = htmlspecialchars( $string );
	297	+ return $string;
	298	+ }
322	299
323		~~- $string = htmlspecialchars( $string );~~
324		~~- return $string;~~
325		-}
	300	+ private function xmlCommentSafe( $text ) {
	301	+ return str_replace( '--', '\\-\\-', $this->xmlsafe( $this->recodeText( $text ) ) );
	302	+ }
326	303
327		~~-function xmlCommentSafe( $text ) {~~
328		~~- return str_replace( '--', '\\-\\-', xmlsafe( recodeText( $text ) ) );~~
329		-}
330		-
331		-
332		~~-function array2object( $arr ) {~~
333		~~- $o = (object)0;~~
334		~~- foreach ( $arr as $x => $y ) {~~
335		~~- $o->$x = $y;~~
	304	+ private function array2object( $arr ) {
	305	+ $o = (object)0;
	306	+ foreach ( $arr as $x => $y ) {
	307	+ $o->$x = $y;
	308	+ }
	309	+ return $o;
336	310	}
337		~~- return $o;~~
338		-}
339	311
	312	+ /**
	313	+ * Make CamelCase and /Talk links work
	314	+ */
	315	+ private function mungeFormat( $text ) {
	316	+ $this->nowiki = array();
	317	+ $staged = preg_replace_callback(
	318	+ '/(<nowiki>.*?<\\/nowiki>\|(?:http\|https\|ftp):\\S+\|\[\[[^]\\n]+]])/s',
	319	+ array( $this, 'nowikiPlaceholder' ), $text );
340	320
341		-/**
342		~~- * Make CamelCase and /Talk links work~~
343		~~- */~~
344		~~-function mungeFormat( $text ) {~~
345		~~- global $nowiki;~~
346		~~- $nowiki = array();~~
347		~~- $staged = preg_replace_callback(~~
348		~~- '/(<nowiki>.*?<\\/nowiki>\|(?:http\|https\|ftp):\\S+\|\[\[[^]\\n]+]])/s',~~
349		~~- 'nowikiPlaceholder', $text );~~
	321	+ # This is probably not 100% correct, I'm just
	322	+ # glancing at the UseModWiki code.
	323	+ $upper = "[A-Z]";
	324	+ $lower = "[a-z_0-9]";
	325	+ $any = "[A-Za-z_0-9]";
	326	+ $camel = "(?:$upper+$lower+$upper+$any*)";
	327	+ $subpage = "(?:\\/$any+)";
	328	+ $substart = "(?:\\/$upper$any*)";
350	329
351		~~- # This is probably not 100% correct, I'm just~~
352		~~- # glancing at the UseModWiki code.~~
353		~~- $upper = "[A-Z]";~~
354		~~- $lower = "[a-z_0-9]";~~
355		~~- $any = "[A-Za-z_0-9]";~~
356		~~- $camel = "(?:$upper+$lower+$upper+$any*)";~~
357		~~- $subpage = "(?:\\/$any+)";~~
358		~~- $substart = "(?:\\/$upper$any*)";~~
	330	+ $munged = preg_replace( "/(?!\\[\\[)($camel$subpage\|$substart$subpage)\\b(?!\\]\\]\|>)/",
	331	+ '[[$1]]', $staged );
359	332
360		~~- $munged = preg_replace( "/(?!\\[\\[)($camel$subpage\|$substart$subpage)\\b(?!\\]\\]\|>)/",~~
361		~~- '[[$1]]', $staged );~~
	333	+ $final = preg_replace( '/' . preg_quote( $this->placeholder() ) . '/s',
	334	+ array( $this, 'nowikiShift' ), $munged );
	335	+ return $final;
	336	+ }
362	337
363		~~- $final = preg_replace( '/' . preg_quote( placeholder() ) . '/es',~~
364		~~- 'array_shift( $nowiki )', $munged );~~
365		~~- return $final;~~
366		-}
	338	+ private function placeholder( $x = null ) {
	339	+ return '\xffplaceholder\xff';
	340	+ }
367	341
	342	+ public function nowikiPlaceholder( $matches ) {
	343	+ $this->nowiki[] = $matches[1];
	344	+ return $this->placeholder();
	345	+ }
368	346
369		~~-function placeholder( $x = null ) {~~
370		~~- return '\xffplaceholder\xff';~~
	347	+ public function nowikiShift() {
	348	+ return array_shift( $this->nowiki );
	349	+ }
371	350	}
372	351
373		~~-function nowikiPlaceholder( $matches ) {~~
374		~~- global $nowiki;~~
375		~~- $nowiki[] = $matches[1];~~
376		~~- return placeholder();~~
	352	+function wfUtf8Sequence( $codepoint ) {
	353	+ if ( $codepoint < 0x80 ) {
	354	+ return chr( $codepoint );
	355	+ }
	356	+ if ( $codepoint < 0x800 ) {
	357	+ return chr( $codepoint >> 6 & 0x3f \| 0xc0 ) .
	358	+ chr( $codepoint & 0x3f \| 0x80 );
	359	+ }
	360	+ if ( $codepoint < 0x10000 ) {
	361	+ return chr( $codepoint >> 12 & 0x0f \| 0xe0 ) .
	362	+ chr( $codepoint >> 6 & 0x3f \| 0x80 ) .
	363	+ chr( $codepoint & 0x3f \| 0x80 );
	364	+ }
	365	+ if ( $codepoint < 0x100000 ) {
	366	+ return chr( $codepoint >> 18 & 0x07 \| 0xf0 ) . # Double-check this
	367	+ chr( $codepoint >> 12 & 0x3f \| 0x80 ) .
	368	+ chr( $codepoint >> 6 & 0x3f \| 0x80 ) .
	369	+ chr( $codepoint & 0x3f \| 0x80 );
	370	+ }
	371	+ # Doesn't yet handle outside the BMP
	372	+ return "&#$codepoint;";
377	373	}
378	374
379		-
	375	+$maintClass = 'ImportUseModWiki';
	376	+require_once( RUN_MAINTENANCE_IF_MAIN );

Follow-up revisions

Revision	Commit summary	Author	Date
r89390	Minor tweaks:...	demon	03:48, 3 June 2011

Comments

#Comment by 😂 (talk | contribs) 15:11, 15 May 2011

Deferred? This is a core change :)

Status & tagging log

08:35, 14 May 2011 Hashar (talk | contribs) changed the status of r88016 [removed: new added: deferred]