Index: trunk/phase3/maintenance/importUseModWiki.php |
— | — | @@ -40,40 +40,49 @@ |
41 | 41 | * @ingroup Maintenance |
42 | 42 | */ |
43 | 43 | |
44 | | -if ( php_sapi_name() != 'cli' ) { |
45 | | - echo "Please customize the settings and run me from the command line."; |
46 | | - die( -1 ); |
47 | | -} |
| 44 | +require_once( "Maintenance.php" ); |
48 | 45 | |
49 | | -/** Set these correctly! */ |
50 | | -$wgImportEncoding = "CP1252"; /* We convert all to UTF-8 */ |
51 | | -$wgRootDirectory = "/kalman/Projects/wiki2002/wiki/lib-http/db/wiki"; |
| 46 | +class ImportUseModWiki extends Maintenance { |
52 | 47 | |
53 | | -/* On a large wiki, you might run out of memory */ |
54 | | -@ini_set( 'memory_limit', '40M' ); |
| 48 | + private $encoding, $rootDirectory = ''; |
55 | 49 | |
56 | | -/* globals */ |
57 | | -$wgFieldSeparator = "\xb3"; # Some wikis may use different char |
58 | | - $FS = $wgFieldSeparator ; |
59 | | - $FS1 = $FS . "1" ; |
60 | | - $FS2 = $FS . "2" ; |
61 | | - $FS3 = $FS . "3" ; |
| 50 | + /** |
| 51 | + * Field separators |
| 52 | + * @var String |
| 53 | + */ |
| 54 | + private $FS1, $FS2, $FS3 = ''; |
62 | 55 | |
63 | | -# Unicode sanitization tools |
64 | | -require_once( dirname( dirname( __FILE__ ) ) . '/includes/normal/UtfNormal.php' ); |
| 56 | + /** |
| 57 | + * @var Array |
| 58 | + */ |
| 59 | + private $usercache, $nowiki = array(); |
65 | 60 | |
66 | | -$usercache = array(); |
| 61 | + public function __construct() { |
| 62 | + parent::__construct(); |
| 63 | + $this->mDescription = "Import pages from UseMod wikis"; |
| 64 | + $this->addOption( 'encoding', 'Encoding of the imported text, default CP1252', false, true ); |
| 65 | + /** |
| 66 | + * If UseModWiki's New File System is used: |
| 67 | + * $NewFS = 1; # 1 = new multibyte $FS, 0 = old $FS |
| 68 | + * Use "\xb3"; for the Old File System |
| 69 | + * Changed with UTF-8 UseModWiki |
| 70 | + * http://www.usemod.com/cgi-bin/wiki.pl?SupportForUtf8 |
| 71 | + * http://www.usemod.com/cgi-bin/wiki.pl?WikiBugs/NewFieldSeparatorWronglyTreated |
| 72 | + * http://www.meatballwiki.org/wiki/WikiEngine#Q_amp_A |
| 73 | + */ |
| 74 | + $this->addOption( 'separator', 'Field separator to use, default \x1E\xFF\xFE\x1E', false, true ); |
| 75 | + $this->addArg( 'path', 'Path to your UseMod wiki' ); |
| 76 | + } |
67 | 77 | |
68 | | -importPages(); |
| 78 | + public function execute() { |
| 79 | + $this->rootDirectory = $this->getArg(); |
| 80 | + $this->encoding = $this->getOption( 'encoding', 'CP1252' ); |
| 81 | + $sep = $this->getOption( 'separator', "\x1E\xFF\xFE\x1E" ); |
| 82 | + $this->FS1 = "{$sep}1"; |
| 83 | + $this->FS2 = "{$sep}2"; |
| 84 | + $this->FS3 = "{$sep}3"; |
69 | 85 | |
70 | | -# ------------------------------------------------------------------------------ |
71 | | - |
72 | | -function importPages() |
73 | | -{ |
74 | | - global $wgRootDirectory; |
75 | | - |
76 | | - $gt = '>'; |
77 | | - echo <<<XML |
| 86 | + echo <<<XML |
78 | 87 | <?xml version="1.0" encoding="UTF-8" ?> |
79 | 88 | <mediawiki xmlns="http://www.mediawiki.org/xml/export-0.1/" |
80 | 89 | xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
— | — | @@ -89,290 +98,278 @@ |
90 | 99 | 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', |
91 | 100 | 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'other' ); |
92 | 101 | foreach ( $letters as $letter ) { |
93 | | - $dir = "$wgRootDirectory/page/$letter"; |
| 102 | + $dir = "{$this->rootDirectory}/page/$letter"; |
94 | 103 | if ( is_dir( $dir ) ) |
95 | | - importPageDirectory( $dir ); |
| 104 | + $this->importPageDirectory( $dir ); |
96 | 105 | } |
97 | 106 | echo <<<XML |
98 | 107 | </mediawiki> |
99 | 108 | |
100 | 109 | XML; |
101 | | -} |
| 110 | + } |
102 | 111 | |
103 | | -function importPageDirectory( $dir, $prefix = "" ) |
104 | | -{ |
105 | | - echo "\n<!-- Checking page directory " . xmlCommentSafe( $dir ) . " -->\n"; |
106 | | - $mydir = opendir( $dir ); |
107 | | - while ( $entry = readdir( $mydir ) ) { |
108 | | - $m = array(); |
109 | | - if ( preg_match( '/^(.+)\.db$/', $entry, $m ) ) { |
110 | | - echo importPage( $prefix . $m[1] ); |
111 | | - } else { |
112 | | - if ( is_dir( "$dir/$entry" ) ) { |
113 | | - if ( $entry != '.' && $entry != '..' ) { |
114 | | - importPageDirectory( "$dir/$entry", "$entry/" ); |
115 | | - } |
| 112 | + private function importPageDirectory( $dir, $prefix = "" ) { |
| 113 | + echo "\n<!-- Checking page directory " . $this->xmlCommentSafe( $dir ) . " -->\n"; |
| 114 | + $mydir = opendir( $dir ); |
| 115 | + while ( $entry = readdir( $mydir ) ) { |
| 116 | + $m = array(); |
| 117 | + if ( preg_match( '/^(.+)\.db$/', $entry, $m ) ) { |
| 118 | + echo $this->importPage( $prefix . $m[1] ); |
116 | 119 | } else { |
117 | | - echo "<!-- File '" . xmlCommentSafe( $entry ) . "' doesn't seem to contain an article. Skipping. -->\n"; |
| 120 | + if ( is_dir( "$dir/$entry" ) ) { |
| 121 | + if ( $entry != '.' && $entry != '..' ) { |
| 122 | + $this->importPageDirectory( "$dir/$entry", "$entry/" ); |
| 123 | + } |
| 124 | + } else { |
| 125 | + echo "<!-- File '" . $this->xmlCommentSafe( $entry ) . "' doesn't seem to contain an article. Skipping. -->\n"; |
| 126 | + } |
118 | 127 | } |
119 | 128 | } |
120 | 129 | } |
121 | | -} |
122 | 130 | |
| 131 | + private function useModFilename( $title ) { |
| 132 | + $c = substr( $title, 0, 1 ); |
| 133 | + if ( preg_match( '/[A-Z]/i', $c ) ) { |
| 134 | + return strtoupper( $c ) . "/$title"; |
| 135 | + } |
| 136 | + return "other/$title"; |
| 137 | + } |
123 | 138 | |
124 | | -# ------------------------------------------------------------------------------ |
| 139 | + private function fetchPage( $title ) { |
| 140 | + $fname = $this->rootDirectory . "/page/" . $this->useModFilename( $title ) . ".db"; |
| 141 | + if ( !file_exists( $fname ) ) { |
| 142 | + echo "Couldn't open file '$fname' for page '$title'.\n"; |
| 143 | + die( -1 ); |
| 144 | + } |
125 | 145 | |
126 | | -/* fetch_ functions |
127 | | - Grab a given item from the database |
128 | | - */ |
| 146 | + $page = $this->splitHash( $this->FS1, file_get_contents( $fname ) ); |
| 147 | + $section = $this->splitHash( $this->FS2, $page["text_default"] ); |
| 148 | + $text = $this->splitHash( $this->FS3, $section["data"] ); |
129 | 149 | |
130 | | -function useModFilename( $title ) { |
131 | | - $c = substr( $title, 0, 1 ); |
132 | | - if ( preg_match( '/[A-Z]/i', $c ) ) { |
133 | | - return strtoupper( $c ) . "/$title"; |
| 150 | + return $this->array2object( array( "text" => $text["text"] , "summary" => $text["summary"] , |
| 151 | + "minor" => $text["minor"] , "ts" => $section["ts"] , |
| 152 | + "username" => $section["username"] , "host" => $section["host"] ) ); |
134 | 153 | } |
135 | | - return "other/$title"; |
136 | | -} |
137 | 154 | |
138 | | -function fetchPage( $title ) |
139 | | -{ |
140 | | - global $FS1, $FS2, $FS3, $wgRootDirectory; |
| 155 | + private function fetchKeptPages( $title ) { |
| 156 | + $fname = $this->rootDirectory . "/keep/" . $this->useModFilename( $title ) . ".kp"; |
| 157 | + if ( !file_exists( $fname ) ) return array(); |
141 | 158 | |
142 | | - $fname = $wgRootDirectory . "/page/" . useModFilename( $title ) . ".db"; |
143 | | - if ( !file_exists( $fname ) ) { |
144 | | - echo "Couldn't open file '$fname' for page '$title'.\n"; |
145 | | - die( -1 ); |
146 | | - } |
| 159 | + $keptlist = explode( $this->FS1, file_get_contents( $fname ) ); |
| 160 | + array_shift( $keptlist ); # Drop the junk at beginning of file |
147 | 161 | |
148 | | - $page = splitHash( $FS1, file_get_contents( $fname ) ); |
149 | | - $section = splitHash( $FS2, $page["text_default"] ); |
150 | | - $text = splitHash( $FS3, $section["data"] ); |
151 | | - |
152 | | - return array2object( array( "text" => $text["text"] , "summary" => $text["summary"] , |
153 | | - "minor" => $text["minor"] , "ts" => $section["ts"] , |
154 | | - "username" => $section["username"] , "host" => $section["host"] ) ); |
155 | | -} |
156 | | - |
157 | | -function fetchKeptPages( $title ) |
158 | | -{ |
159 | | - global $FS1, $FS2, $FS3, $wgRootDirectory; |
160 | | - |
161 | | - $fname = $wgRootDirectory . "/keep/" . useModFilename( $title ) . ".kp"; |
162 | | - if ( !file_exists( $fname ) ) return array(); |
163 | | - |
164 | | - $keptlist = explode( $FS1, file_get_contents( $fname ) ); |
165 | | - array_shift( $keptlist ); # Drop the junk at beginning of file |
166 | | - |
167 | | - $revisions = array(); |
168 | | - foreach ( $keptlist as $rev ) { |
169 | | - $section = splitHash( $FS2, $rev ); |
170 | | - $text = splitHash( $FS3, $section["data"] ); |
171 | | - if ( $text["text"] && $text["minor"] != "" && ( $section["ts"] * 1 > 0 ) ) { |
172 | | - array_push( $revisions, array2object( array ( "text" => $text["text"] , "summary" => $text["summary"] , |
173 | | - "minor" => $text["minor"] , "ts" => $section["ts"] , |
174 | | - "username" => $section["username"] , "host" => $section["host"] ) ) ); |
175 | | - } else { |
176 | | - echo "<!-- skipped a bad old revision -->\n"; |
| 162 | + $revisions = array(); |
| 163 | + foreach ( $keptlist as $rev ) { |
| 164 | + $section = $this->splitHash( $this->FS2, $rev ); |
| 165 | + $text = $this->splitHash( $this->FS3, $section["data"] ); |
| 166 | + if ( $text["text"] && $text["minor"] != "" && ( $section["ts"] * 1 > 0 ) ) { |
| 167 | + array_push( $revisions, $this->array2object( array ( "text" => $text["text"] , "summary" => $text["summary"] , |
| 168 | + "minor" => $text["minor"] , "ts" => $section["ts"] , |
| 169 | + "username" => $section["username"] , "host" => $section["host"] ) ) ); |
| 170 | + } else { |
| 171 | + echo "<!-- skipped a bad old revision -->\n"; |
| 172 | + } |
177 | 173 | } |
| 174 | + return $revisions; |
178 | 175 | } |
179 | | - return $revisions; |
180 | | -} |
181 | 176 | |
182 | | -function splitHash ( $sep , $str ) { |
183 | | - $temp = explode ( $sep , $str ) ; |
184 | | - $ret = array () ; |
185 | | - for ( $i = 0; $i + 1 < count ( $temp ) ; $i++ ) { |
186 | | - $ret[$temp[$i]] = $temp[++$i] ; |
187 | | - } |
188 | | - return $ret ; |
| 177 | + private function splitHash( $sep , $str ) { |
| 178 | + $temp = explode ( $sep , $str ) ; |
| 179 | + $ret = array () ; |
| 180 | + for ( $i = 0; $i + 1 < count ( $temp ) ; $i++ ) { |
| 181 | + $ret[$temp[$i]] = $temp[++$i] ; |
| 182 | + } |
| 183 | + return $ret ; |
189 | 184 | } |
190 | 185 | |
191 | | - |
192 | | -/* import_ functions |
193 | | - Take a fetched item and produce SQL |
194 | | - */ |
195 | | - |
196 | | -function checkUserCache( $name, $host ) |
197 | | -{ |
198 | | - global $usercache; |
199 | | - |
200 | | - if ( $name ) { |
201 | | - if ( in_array( $name, $usercache ) ) { |
202 | | - $userid = $usercache[$name]; |
| 186 | + private function checkUserCache( $name, $host ) { |
| 187 | + if ( $name ) { |
| 188 | + if ( in_array( $name, $this->usercache ) ) { |
| 189 | + $userid = $this->usercache[$name]; |
| 190 | + } else { |
| 191 | + # If we haven't imported user accounts |
| 192 | + $userid = 0; |
| 193 | + } |
| 194 | + $username = str_replace( '_', ' ', $name ); |
203 | 195 | } else { |
204 | | - # If we haven't imported user accounts |
205 | 196 | $userid = 0; |
| 197 | + $username = $host; |
206 | 198 | } |
207 | | - $username = str_replace( '_', ' ', $name ); |
208 | | - } else { |
209 | | - $userid = 0; |
210 | | - $username = $host; |
| 199 | + return array( $userid, $username ); |
211 | 200 | } |
212 | | - return array( $userid, $username ); |
213 | | -} |
214 | 201 | |
215 | | -function importPage( $title ) |
216 | | -{ |
217 | | - echo "\n<!-- Importing page " . xmlCommentSafe( $title ) . " -->\n"; |
218 | | - $page = fetchPage( $title ); |
| 202 | + private function importPage( $title ) { |
| 203 | + echo "\n<!-- Importing page " . $this->xmlCommentSafe( $title ) . " -->\n"; |
| 204 | + $page = $this->fetchPage( $title ); |
219 | 205 | |
220 | | - $newtitle = xmlsafe( str_replace( '_', ' ', recodeText( $title ) ) ); |
| 206 | + $newtitle = $this->xmlsafe( str_replace( '_', ' ', $this->recodeText( $title ) ) ); |
221 | 207 | |
222 | | - $munged = mungeFormat( $page->text ); |
223 | | - if ( $munged != $page->text ) { |
224 | | - /** |
225 | | - * Save a *new* revision with the conversion, and put the |
226 | | - * previous last version into the history. |
227 | | - */ |
228 | | - $next = array2object( array( |
229 | | - 'text' => $munged, |
230 | | - 'minor' => 1, |
231 | | - 'username' => 'Conversion script', |
232 | | - 'host' => '127.0.0.1', |
233 | | - 'ts' => time(), |
234 | | - 'summary' => 'link fix', |
235 | | - ) ); |
236 | | - $revisions = array( $page, $next ); |
237 | | - } else { |
238 | | - /** |
239 | | - * Current revision: |
240 | | - */ |
241 | | - $revisions = array( $page ); |
242 | | - } |
243 | | - $xml = <<<XML |
244 | | - <page> |
245 | | - <title>$newtitle</title> |
| 208 | + $munged = $this->mungeFormat( $page->text ); |
| 209 | + if ( $munged != $page->text ) { |
| 210 | + /** |
| 211 | + * Save a *new* revision with the conversion, and put the |
| 212 | + * previous last version into the history. |
| 213 | + */ |
| 214 | + $next = $this->array2object( array( |
| 215 | + 'text' => $munged, |
| 216 | + 'minor' => 1, |
| 217 | + 'username' => 'Conversion script', |
| 218 | + 'host' => '127.0.0.1', |
| 219 | + 'ts' => time(), |
| 220 | + 'summary' => 'link fix', |
| 221 | + ) ); |
| 222 | + $revisions = array( $page, $next ); |
| 223 | + } else { |
| 224 | + /** |
| 225 | + * Current revision: |
| 226 | + */ |
| 227 | + $revisions = array( $page ); |
| 228 | + } |
| 229 | + $xml = <<<XML |
| 230 | + <page> |
| 231 | + <title>$newtitle</title> |
246 | 232 | |
247 | 233 | XML; |
248 | 234 | |
249 | | - # History |
250 | | - $revisions = array_merge( $revisions, fetchKeptPages( $title ) ); |
251 | | - if ( count( $revisions ) == 0 ) { |
252 | | - return NULL; // Was "$sql", which does not appear to be defined. |
253 | | - } |
| 235 | + # History |
| 236 | + $revisions = array_merge( $revisions, $this->fetchKeptPages( $title ) ); |
| 237 | + if ( count( $revisions ) == 0 ) { |
| 238 | + return NULL; // Was "$sql", which does not appear to be defined. |
| 239 | + } |
254 | 240 | |
255 | | - foreach ( $revisions as $rev ) { |
256 | | - $text = xmlsafe( recodeText( $rev->text ) ); |
257 | | - $minor = ( $rev->minor ? '<minor/>' : '' ); |
258 | | - list( /* $userid */ , $username ) = checkUserCache( $rev->username, $rev->host ); |
259 | | - $username = xmlsafe( recodeText( $username ) ); |
260 | | - $timestamp = xmlsafe( timestamp2ISO8601( $rev->ts ) ); |
261 | | - $comment = xmlsafe( recodeText( $rev->summary ) ); |
| 241 | + foreach ( $revisions as $rev ) { |
| 242 | + $text = $this->xmlsafe( $this->recodeText( $rev->text ) ); |
| 243 | + $minor = ( $rev->minor ? '<minor/>' : '' ); |
| 244 | + list( /* $userid */ , $username ) = $this->checkUserCache( $rev->username, $rev->host ); |
| 245 | + $username = $this->xmlsafe( $this->recodeText( $username ) ); |
| 246 | + $timestamp = $this->xmlsafe( $this->timestamp2ISO8601( $rev->ts ) ); |
| 247 | + $comment = $this->xmlsafe( $this->recodeText( $rev->summary ) ); |
262 | 248 | |
263 | | - $xml .= <<<XML |
264 | | - <revision> |
265 | | - <timestamp>$timestamp</timestamp> |
266 | | - <contributor><username>$username</username></contributor> |
267 | | - $minor |
268 | | - <comment>$comment</comment> |
269 | | - <text>$text</text> |
270 | | - </revision> |
| 249 | + $xml .= <<<XML |
| 250 | + <revision> |
| 251 | + <timestamp>$timestamp</timestamp> |
| 252 | + <contributor><username>$username</username></contributor> |
| 253 | + $minor |
| 254 | + <comment>$comment</comment> |
| 255 | + <text>$text</text> |
| 256 | + </revision> |
271 | 257 | |
272 | 258 | XML; |
| 259 | + } |
| 260 | + $xml .= "</page>\n\n"; |
| 261 | + return $xml; |
273 | 262 | } |
274 | | - $xml .= "</page>\n\n"; |
275 | | - return $xml; |
276 | | -} |
277 | 263 | |
278 | | -# Whee! |
279 | | -function recodeText( $string ) { |
280 | | - global $wgImportEncoding; |
281 | | - # For currently latin-1 wikis |
282 | | - $string = str_replace( "\r\n", "\n", $string ); |
283 | | - $string = @iconv( $wgImportEncoding, "UTF-8", $string ); |
284 | | - $string = wfMungeToUtf8( $string ); # Any old Ӓ stuff |
285 | | - return $string; |
286 | | -} |
| 264 | + private function recodeText( $string ) { |
| 265 | + # For currently latin-1 wikis |
| 266 | + $string = str_replace( "\r\n", "\n", $string ); |
| 267 | + $string = @iconv( $this->encoding, "UTF-8", $string ); |
| 268 | + $string = $this->mungeToUtf8( $string ); # Any old Ӓ stuff |
| 269 | + return $string; |
| 270 | + } |
287 | 271 | |
288 | | -function wfUtf8Sequence( $codepoint ) { |
289 | | - if ( $codepoint < 0x80 ) return chr( $codepoint ); |
290 | | - if ( $codepoint < 0x800 ) return chr( $codepoint >> 6 & 0x3f | 0xc0 ) . |
291 | | - chr( $codepoint & 0x3f | 0x80 ); |
292 | | - if ( $codepoint < 0x10000 ) return chr( $codepoint >> 12 & 0x0f | 0xe0 ) . |
293 | | - chr( $codepoint >> 6 & 0x3f | 0x80 ) . |
294 | | - chr( $codepoint & 0x3f | 0x80 ); |
295 | | - if ( $codepoint < 0x100000 ) return chr( $codepoint >> 18 & 0x07 | 0xf0 ) . # Double-check this |
296 | | - chr( $codepoint >> 12 & 0x3f | 0x80 ) . |
297 | | - chr( $codepoint >> 6 & 0x3f | 0x80 ) . |
298 | | - chr( $codepoint & 0x3f | 0x80 ); |
299 | | - # Doesn't yet handle outside the BMP |
300 | | - return "&#$codepoint;"; |
301 | | -} |
| 272 | + /** |
| 273 | + * @fixme don't use /e |
| 274 | + */ |
| 275 | + private function mungeToUtf8( $string ) { |
| 276 | + $string = preg_replace ( '/&#([0-9]+);/e', 'wfUtf8Sequence($1)', $string ); |
| 277 | + $string = preg_replace ( '/&#x([0-9a-f]+);/ie', 'wfUtf8Sequence(0x$1)', $string ); |
| 278 | + # Should also do named entities here |
| 279 | + return $string; |
| 280 | + } |
302 | 281 | |
303 | | -function wfMungeToUtf8( $string ) { |
304 | | - $string = preg_replace ( '/&#([0-9]+);/e', 'wfUtf8Sequence($1)', $string ); |
305 | | - $string = preg_replace ( '/&#x([0-9a-f]+);/ie', 'wfUtf8Sequence(0x$1)', $string ); |
306 | | - # Should also do named entities here |
307 | | - return $string; |
308 | | -} |
| 282 | + private function timestamp2ISO8601( $ts ) { |
| 283 | + # 2003-08-05T18:30:02Z |
| 284 | + return gmdate( 'Y-m-d', $ts ) . 'T' . gmdate( 'H:i:s', $ts ) . 'Z'; |
| 285 | + } |
309 | 286 | |
310 | | -function timestamp2ISO8601( $ts ) { |
311 | | - # 2003-08-05T18:30:02Z |
312 | | - return gmdate( 'Y-m-d', $ts ) . 'T' . gmdate( 'H:i:s', $ts ) . 'Z'; |
313 | | -} |
314 | | - |
315 | | -function xmlsafe( $string ) { |
316 | 287 | /** |
317 | 288 | * The page may contain old data which has not been properly normalized. |
318 | 289 | * Invalid UTF-8 sequences or forbidden control characters will make our |
319 | 290 | * XML output invalid, so be sure to strip them out. |
| 291 | + * @param String $string Text to clean up |
| 292 | + * @return String |
320 | 293 | */ |
321 | | - $string = UtfNormal::cleanUp( $string ); |
| 294 | + private function xmlsafe( $string ) { |
| 295 | + $string = UtfNormal::cleanUp( $string ); |
| 296 | + $string = htmlspecialchars( $string ); |
| 297 | + return $string; |
| 298 | + } |
322 | 299 | |
323 | | - $string = htmlspecialchars( $string ); |
324 | | - return $string; |
325 | | -} |
| 300 | + private function xmlCommentSafe( $text ) { |
| 301 | + return str_replace( '--', '\\-\\-', $this->xmlsafe( $this->recodeText( $text ) ) ); |
| 302 | + } |
326 | 303 | |
327 | | -function xmlCommentSafe( $text ) { |
328 | | - return str_replace( '--', '\\-\\-', xmlsafe( recodeText( $text ) ) ); |
329 | | -} |
330 | | - |
331 | | - |
332 | | -function array2object( $arr ) { |
333 | | - $o = (object)0; |
334 | | - foreach ( $arr as $x => $y ) { |
335 | | - $o->$x = $y; |
| 304 | + private function array2object( $arr ) { |
| 305 | + $o = (object)0; |
| 306 | + foreach ( $arr as $x => $y ) { |
| 307 | + $o->$x = $y; |
| 308 | + } |
| 309 | + return $o; |
336 | 310 | } |
337 | | - return $o; |
338 | | -} |
339 | 311 | |
| 312 | + /** |
| 313 | + * Make CamelCase and /Talk links work |
| 314 | + */ |
| 315 | + private function mungeFormat( $text ) { |
| 316 | + $this->nowiki = array(); |
| 317 | + $staged = preg_replace_callback( |
| 318 | + '/(<nowiki>.*?<\\/nowiki>|(?:http|https|ftp):\\S+|\[\[[^]\\n]+]])/s', |
| 319 | + array( $this, 'nowikiPlaceholder' ), $text ); |
340 | 320 | |
341 | | -/** |
342 | | - * Make CamelCase and /Talk links work |
343 | | - */ |
344 | | -function mungeFormat( $text ) { |
345 | | - global $nowiki; |
346 | | - $nowiki = array(); |
347 | | - $staged = preg_replace_callback( |
348 | | - '/(<nowiki>.*?<\\/nowiki>|(?:http|https|ftp):\\S+|\[\[[^]\\n]+]])/s', |
349 | | - 'nowikiPlaceholder', $text ); |
| 321 | + # This is probably not 100% correct, I'm just |
| 322 | + # glancing at the UseModWiki code. |
| 323 | + $upper = "[A-Z]"; |
| 324 | + $lower = "[a-z_0-9]"; |
| 325 | + $any = "[A-Za-z_0-9]"; |
| 326 | + $camel = "(?:$upper+$lower+$upper+$any*)"; |
| 327 | + $subpage = "(?:\\/$any+)"; |
| 328 | + $substart = "(?:\\/$upper$any*)"; |
350 | 329 | |
351 | | - # This is probably not 100% correct, I'm just |
352 | | - # glancing at the UseModWiki code. |
353 | | - $upper = "[A-Z]"; |
354 | | - $lower = "[a-z_0-9]"; |
355 | | - $any = "[A-Za-z_0-9]"; |
356 | | - $camel = "(?:$upper+$lower+$upper+$any*)"; |
357 | | - $subpage = "(?:\\/$any+)"; |
358 | | - $substart = "(?:\\/$upper$any*)"; |
| 330 | + $munged = preg_replace( "/(?!\\[\\[)($camel$subpage*|$substart$subpage*)\\b(?!\\]\\]|>)/", |
| 331 | + '[[$1]]', $staged ); |
359 | 332 | |
360 | | - $munged = preg_replace( "/(?!\\[\\[)($camel$subpage*|$substart$subpage*)\\b(?!\\]\\]|>)/", |
361 | | - '[[$1]]', $staged ); |
| 333 | + $final = preg_replace( '/' . preg_quote( $this->placeholder() ) . '/s', |
| 334 | + array( $this, 'nowikiShift' ), $munged ); |
| 335 | + return $final; |
| 336 | + } |
362 | 337 | |
363 | | - $final = preg_replace( '/' . preg_quote( placeholder() ) . '/es', |
364 | | - 'array_shift( $nowiki )', $munged ); |
365 | | - return $final; |
366 | | -} |
| 338 | + private function placeholder( $x = null ) { |
| 339 | + return '\xffplaceholder\xff'; |
| 340 | + } |
367 | 341 | |
| 342 | + public function nowikiPlaceholder( $matches ) { |
| 343 | + $this->nowiki[] = $matches[1]; |
| 344 | + return $this->placeholder(); |
| 345 | + } |
368 | 346 | |
369 | | -function placeholder( $x = null ) { |
370 | | - return '\xffplaceholder\xff'; |
| 347 | + public function nowikiShift() { |
| 348 | + return array_shift( $this->nowiki ); |
| 349 | + } |
371 | 350 | } |
372 | 351 | |
373 | | -function nowikiPlaceholder( $matches ) { |
374 | | - global $nowiki; |
375 | | - $nowiki[] = $matches[1]; |
376 | | - return placeholder(); |
| 352 | +function wfUtf8Sequence( $codepoint ) { |
| 353 | + if ( $codepoint < 0x80 ) { |
| 354 | + return chr( $codepoint ); |
| 355 | + } |
| 356 | + if ( $codepoint < 0x800 ) { |
| 357 | + return chr( $codepoint >> 6 & 0x3f | 0xc0 ) . |
| 358 | + chr( $codepoint & 0x3f | 0x80 ); |
| 359 | + } |
| 360 | + if ( $codepoint < 0x10000 ) { |
| 361 | + return chr( $codepoint >> 12 & 0x0f | 0xe0 ) . |
| 362 | + chr( $codepoint >> 6 & 0x3f | 0x80 ) . |
| 363 | + chr( $codepoint & 0x3f | 0x80 ); |
| 364 | + } |
| 365 | + if ( $codepoint < 0x100000 ) { |
| 366 | + return chr( $codepoint >> 18 & 0x07 | 0xf0 ) . # Double-check this |
| 367 | + chr( $codepoint >> 12 & 0x3f | 0x80 ) . |
| 368 | + chr( $codepoint >> 6 & 0x3f | 0x80 ) . |
| 369 | + chr( $codepoint & 0x3f | 0x80 ); |
| 370 | + } |
| 371 | + # Doesn't yet handle outside the BMP |
| 372 | + return "&#$codepoint;"; |
377 | 373 | } |
378 | 374 | |
379 | | - |
| 375 | +$maintClass = 'ImportUseModWiki'; |
| 376 | +require_once( RUN_MAINTENANCE_IF_MAIN ); |