r81238 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r81237‎ | r81238 | r81239 >
Date:07:07, 31 January 2011
Author:tstarling
Status:ok (Comments)
Tags:
Comment:
Cleanup for r66268, r66267: merge WikiImporter back into Import.php, where it started. It doesn't really make sense to have a class called WikiImporter in a file called ImportXMLReader.php, and a few little helper classes for it in a file called Import.php.
Modified paths:
  • /trunk/phase3/includes/AutoLoader.php (modified) (history)
  • /trunk/phase3/includes/Import.php (modified) (history)
  • /trunk/phase3/includes/ImportXMLReader.php (deleted) (history)

Diff [purge]

Index: trunk/phase3/includes/ImportXMLReader.php
@@ -1,737 +0,0 @@
2 -<?php
3 -/**
4 - * XML file reader for the page data importer
5 - *
6 - * @file
7 - */
8 -
9 -/**
10 - * implements Special:Import
11 - * @ingroup SpecialPage
12 - */
13 -class WikiImporter {
14 - private $reader = null;
15 - private $mLogItemCallback, $mUploadCallback, $mRevisionCallback, $mPageCallback;
16 - private $mSiteInfoCallback, $mTargetNamespace, $mPageOutCallback;
17 - private $mDebug;
18 -
19 - /**
20 - * Creates an ImportXMLReader drawing from the source provided
21 - */
22 - function __construct( $source ) {
23 - $this->reader = new XMLReader2();
24 -
25 - stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' );
26 - $id = UploadSourceAdapter::registerSource( $source );
27 - $this->reader->open( "uploadsource://$id" );
28 -
29 - // Default callbacks
30 - $this->setRevisionCallback( array( $this, "importRevision" ) );
31 - $this->setUploadCallback( array( $this, 'importUpload' ) );
32 - $this->setLogItemCallback( array( $this, 'importLogItem' ) );
33 - $this->setPageOutCallback( array( $this, 'finishImportPage' ) );
34 - }
35 -
36 - private function throwXmlError( $err ) {
37 - $this->debug( "FAILURE: $err" );
38 - wfDebug( "WikiImporter XML error: $err\n" );
39 - }
40 -
41 - private function debug( $data ) {
42 - if( $this->mDebug ) {
43 - wfDebug( "IMPORT: $data\n" );
44 - }
45 - }
46 -
47 - private function warn( $data ) {
48 - wfDebug( "IMPORT: $data\n" );
49 - }
50 -
51 - private function notice( $data ) {
52 - global $wgCommandLineMode;
53 - if( $wgCommandLineMode ) {
54 - print "$data\n";
55 - } else {
56 - global $wgOut;
57 - $wgOut->addHTML( "<li>" . htmlspecialchars( $data ) . "</li>\n" );
58 - }
59 - }
60 -
61 - /**
62 - * Set debug mode...
63 - */
64 - function setDebug( $debug ) {
65 - $this->mDebug = $debug;
66 - }
67 -
68 - /**
69 - * Sets the action to perform as each new page in the stream is reached.
70 - * @param $callback callback
71 - * @return callback
72 - */
73 - public function setPageCallback( $callback ) {
74 - $previous = $this->mPageCallback;
75 - $this->mPageCallback = $callback;
76 - return $previous;
77 - }
78 -
79 - /**
80 - * Sets the action to perform as each page in the stream is completed.
81 - * Callback accepts the page title (as a Title object), a second object
82 - * with the original title form (in case it's been overridden into a
83 - * local namespace), and a count of revisions.
84 - *
85 - * @param $callback callback
86 - * @return callback
87 - */
88 - public function setPageOutCallback( $callback ) {
89 - $previous = $this->mPageOutCallback;
90 - $this->mPageOutCallback = $callback;
91 - return $previous;
92 - }
93 -
94 - /**
95 - * Sets the action to perform as each page revision is reached.
96 - * @param $callback callback
97 - * @return callback
98 - */
99 - public function setRevisionCallback( $callback ) {
100 - $previous = $this->mRevisionCallback;
101 - $this->mRevisionCallback = $callback;
102 - return $previous;
103 - }
104 -
105 - /**
106 - * Sets the action to perform as each file upload version is reached.
107 - * @param $callback callback
108 - * @return callback
109 - */
110 - public function setUploadCallback( $callback ) {
111 - $previous = $this->mUploadCallback;
112 - $this->mUploadCallback = $callback;
113 - return $previous;
114 - }
115 -
116 - /**
117 - * Sets the action to perform as each log item reached.
118 - * @param $callback callback
119 - * @return callback
120 - */
121 - public function setLogItemCallback( $callback ) {
122 - $previous = $this->mLogItemCallback;
123 - $this->mLogItemCallback = $callback;
124 - return $previous;
125 - }
126 -
127 - /**
128 - * Sets the action to perform when site info is encountered
129 - * @param $callback callback
130 - * @return callback
131 - */
132 - public function setSiteInfoCallback( $callback ) {
133 - $previous = $this->mSiteInfoCallback;
134 - $this->mSiteInfoCallback = $callback;
135 - return $previous;
136 - }
137 -
138 - /**
139 - * Set a target namespace to override the defaults
140 - */
141 - public function setTargetNamespace( $namespace ) {
142 - if( is_null( $namespace ) ) {
143 - // Don't override namespaces
144 - $this->mTargetNamespace = null;
145 - } elseif( $namespace >= 0 ) {
146 - // FIXME: Check for validity
147 - $this->mTargetNamespace = intval( $namespace );
148 - } else {
149 - return false;
150 - }
151 - }
152 -
153 - /**
154 - * Default per-revision callback, performs the import.
155 - * @param $revision WikiRevision
156 - */
157 - public function importRevision( $revision ) {
158 - $dbw = wfGetDB( DB_MASTER );
159 - return $dbw->deadlockLoop( array( $revision, 'importOldRevision' ) );
160 - }
161 -
162 - /**
163 - * Default per-revision callback, performs the import.
164 - * @param $rev WikiRevision
165 - */
166 - public function importLogItem( $rev ) {
167 - $dbw = wfGetDB( DB_MASTER );
168 - return $dbw->deadlockLoop( array( $rev, 'importLogItem' ) );
169 - }
170 -
171 - /**
172 - * Dummy for now...
173 - */
174 - public function importUpload( $revision ) {
175 - //$dbw = wfGetDB( DB_MASTER );
176 - //return $dbw->deadlockLoop( array( $revision, 'importUpload' ) );
177 - return false;
178 - }
179 -
180 - /**
181 - * Mostly for hook use
182 - */
183 - public function finishImportPage( $title, $origTitle, $revCount, $sRevCount, $pageInfo ) {
184 - $args = func_get_args();
185 - return wfRunHooks( 'AfterImportPage', $args );
186 - }
187 -
188 - /**
189 - * Alternate per-revision callback, for debugging.
190 - * @param $revision WikiRevision
191 - */
192 - public function debugRevisionHandler( &$revision ) {
193 - $this->debug( "Got revision:" );
194 - if( is_object( $revision->title ) ) {
195 - $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
196 - } else {
197 - $this->debug( "-- Title: <invalid>" );
198 - }
199 - $this->debug( "-- User: " . $revision->user_text );
200 - $this->debug( "-- Timestamp: " . $revision->timestamp );
201 - $this->debug( "-- Comment: " . $revision->comment );
202 - $this->debug( "-- Text: " . $revision->text );
203 - }
204 -
205 - /**
206 - * Notify the callback function when a new <page> is reached.
207 - * @param $title Title
208 - */
209 - function pageCallback( $title ) {
210 - if( isset( $this->mPageCallback ) ) {
211 - call_user_func( $this->mPageCallback, $title );
212 - }
213 - }
214 -
215 - /**
216 - * Notify the callback function when a </page> is closed.
217 - * @param $title Title
218 - * @param $origTitle Title
219 - * @param $revCount Integer
220 - * @param $sucCount Int: number of revisions for which callback returned true
221 - * @param $pageInfo Array: associative array of page information
222 - */
223 - private function pageOutCallback( $title, $origTitle, $revCount, $sucCount, $pageInfo ) {
224 - if( isset( $this->mPageOutCallback ) ) {
225 - $args = func_get_args();
226 - call_user_func_array( $this->mPageOutCallback, $args );
227 - }
228 - }
229 -
230 - /**
231 - * Notify the callback function of a revision
232 - * @param $revision A WikiRevision object
233 - */
234 - private function revisionCallback( $revision ) {
235 - if ( isset( $this->mRevisionCallback ) ) {
236 - return call_user_func_array( $this->mRevisionCallback,
237 - array( $revision, $this ) );
238 - } else {
239 - return false;
240 - }
241 - }
242 -
243 - /**
244 - * Notify the callback function of a new log item
245 - * @param $revision A WikiRevision object
246 - */
247 - private function logItemCallback( $revision ) {
248 - if ( isset( $this->mLogItemCallback ) ) {
249 - return call_user_func_array( $this->mLogItemCallback,
250 - array( $revision, $this ) );
251 - } else {
252 - return false;
253 - }
254 - }
255 -
256 - /**
257 - * Shouldn't something like this be built-in to XMLReader?
258 - * Fetches text contents of the current element, assuming
259 - * no sub-elements or such scary things.
260 - * @return string
261 - * @access private
262 - */
263 - private function nodeContents() {
264 - return $this->reader->nodeContents();
265 - }
266 -
267 - # --------------
268 -
269 - /** Left in for debugging */
270 - private function dumpElement() {
271 - static $lookup = null;
272 - if (!$lookup) {
273 - $xmlReaderConstants = array(
274 - "NONE",
275 - "ELEMENT",
276 - "ATTRIBUTE",
277 - "TEXT",
278 - "CDATA",
279 - "ENTITY_REF",
280 - "ENTITY",
281 - "PI",
282 - "COMMENT",
283 - "DOC",
284 - "DOC_TYPE",
285 - "DOC_FRAGMENT",
286 - "NOTATION",
287 - "WHITESPACE",
288 - "SIGNIFICANT_WHITESPACE",
289 - "END_ELEMENT",
290 - "END_ENTITY",
291 - "XML_DECLARATION",
292 - );
293 - $lookup = array();
294 -
295 - foreach( $xmlReaderConstants as $name ) {
296 - $lookup[constant("XmlReader::$name")] = $name;
297 - }
298 - }
299 -
300 - print( var_dump(
301 - $lookup[$this->reader->nodeType],
302 - $this->reader->name,
303 - $this->reader->value
304 - )."\n\n" );
305 - }
306 -
307 - /**
308 - * Primary entry point
309 - */
310 - public function doImport() {
311 - $this->reader->read();
312 -
313 - if ( $this->reader->name != 'mediawiki' ) {
314 - throw new MWException( "Expected <mediawiki> tag, got ".
315 - $this->reader->name );
316 - }
317 - $this->debug( "<mediawiki> tag is correct." );
318 -
319 - $this->debug( "Starting primary dump processing loop." );
320 -
321 - $keepReading = $this->reader->read();
322 - $skip = false;
323 - while ( $keepReading ) {
324 - $tag = $this->reader->name;
325 - $type = $this->reader->nodeType;
326 -
327 - if ( !wfRunHooks( 'ImportHandleToplevelXMLTag', $this->reader ) ) {
328 - // Do nothing
329 - } elseif ( $tag == 'mediawiki' && $type == XmlReader::END_ELEMENT ) {
330 - break;
331 - } elseif ( $tag == 'siteinfo' ) {
332 - $this->handleSiteInfo();
333 - } elseif ( $tag == 'page' ) {
334 - $this->handlePage();
335 - } elseif ( $tag == 'logitem' ) {
336 - $this->handleLogItem();
337 - } elseif ( $tag != '#text' ) {
338 - $this->warn( "Unhandled top-level XML tag $tag" );
339 -
340 - $skip = true;
341 - }
342 -
343 - if ($skip) {
344 - $keepReading = $this->reader->next();
345 - $skip = false;
346 - $this->debug( "Skip" );
347 - } else {
348 - $keepReading = $this->reader->read();
349 - }
350 - }
351 -
352 - return true;
353 - }
354 -
355 - private function handleSiteInfo() {
356 - // Site info is useful, but not actually used for dump imports.
357 - // Includes a quick short-circuit to save performance.
358 - if ( ! $this->mSiteInfoCallback ) {
359 - $this->reader->next();
360 - return true;
361 - }
362 - throw new MWException( "SiteInfo tag is not yet handled, do not set mSiteInfoCallback" );
363 - }
364 -
365 - private function handleLogItem() {
366 - $this->debug( "Enter log item handler." );
367 - $logInfo = array();
368 -
369 - // Fields that can just be stuffed in the pageInfo object
370 - $normalFields = array( 'id', 'comment', 'type', 'action', 'timestamp',
371 - 'logtitle', 'params' );
372 -
373 - while ( $this->reader->read() ) {
374 - if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
375 - $this->reader->name == 'logitem') {
376 - break;
377 - }
378 -
379 - $tag = $this->reader->name;
380 -
381 - if ( !wfRunHooks( 'ImportHandleLogItemXMLTag',
382 - $this->reader, $logInfo ) ) {
383 - // Do nothing
384 - } elseif ( in_array( $tag, $normalFields ) ) {
385 - $logInfo[$tag] = $this->nodeContents();
386 - } elseif ( $tag == 'contributor' ) {
387 - $logInfo['contributor'] = $this->handleContributor();
388 - } elseif ( $tag != '#text' ) {
389 - $this->warn( "Unhandled log-item XML tag $tag" );
390 - }
391 - }
392 -
393 - $this->processLogItem( $logInfo );
394 - }
395 -
396 - private function processLogItem( $logInfo ) {
397 - $revision = new WikiRevision;
398 -
399 - $revision->setID( $logInfo['id'] );
400 - $revision->setType( $logInfo['type'] );
401 - $revision->setAction( $logInfo['action'] );
402 - $revision->setTimestamp( $logInfo['timestamp'] );
403 - $revision->setParams( $logInfo['params'] );
404 - $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) );
405 -
406 - if ( isset( $logInfo['comment'] ) ) {
407 - $revision->setComment( $logInfo['comment'] );
408 - }
409 -
410 - if ( isset( $logInfo['contributor']['ip'] ) ) {
411 - $revision->setUserIP( $logInfo['contributor']['ip'] );
412 - }
413 - if ( isset( $logInfo['contributor']['username'] ) ) {
414 - $revision->setUserName( $logInfo['contributor']['username'] );
415 - }
416 -
417 - return $this->logItemCallback( $revision );
418 - }
419 -
420 - private function handlePage() {
421 - // Handle page data.
422 - $this->debug( "Enter page handler." );
423 - $pageInfo = array( 'revisionCount' => 0, 'successfulRevisionCount' => 0 );
424 -
425 - // Fields that can just be stuffed in the pageInfo object
426 - $normalFields = array( 'title', 'id', 'redirect', 'restrictions' );
427 -
428 - $skip = false;
429 - $badTitle = false;
430 -
431 - while ( $skip ? $this->reader->next() : $this->reader->read() ) {
432 - if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
433 - $this->reader->name == 'page') {
434 - break;
435 - }
436 -
437 - $tag = $this->reader->name;
438 -
439 - if ( $badTitle ) {
440 - // The title is invalid, bail out of this page
441 - $skip = true;
442 - } elseif ( !wfRunHooks( 'ImportHandlePageXMLTag', array( $this->reader,
443 - &$pageInfo ) ) ) {
444 - // Do nothing
445 - } elseif ( in_array( $tag, $normalFields ) ) {
446 - $pageInfo[$tag] = $this->nodeContents();
447 - if ( $tag == 'title' ) {
448 - $title = $this->processTitle( $pageInfo['title'] );
449 -
450 - if ( !$title ) {
451 - $badTitle = true;
452 - $skip = true;
453 - }
454 -
455 - $this->pageCallback( $title );
456 - list( $pageInfo['_title'], $origTitle ) = $title;
457 - }
458 - } elseif ( $tag == 'revision' ) {
459 - $this->handleRevision( $pageInfo );
460 - } elseif ( $tag == 'upload' ) {
461 - $this->handleUpload( $pageInfo );
462 - } elseif ( $tag != '#text' ) {
463 - $this->warn( "Unhandled page XML tag $tag" );
464 - $skip = true;
465 - }
466 - }
467 -
468 - $this->pageOutCallback( $pageInfo['_title'], $origTitle,
469 - $pageInfo['revisionCount'],
470 - $pageInfo['successfulRevisionCount'],
471 - $pageInfo );
472 - }
473 -
474 - private function handleRevision( &$pageInfo ) {
475 - $this->debug( "Enter revision handler" );
476 - $revisionInfo = array();
477 -
478 - $normalFields = array( 'id', 'timestamp', 'comment', 'minor', 'text' );
479 -
480 - $skip = false;
481 -
482 - while ( $skip ? $this->reader->next() : $this->reader->read() ) {
483 - if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
484 - $this->reader->name == 'revision') {
485 - break;
486 - }
487 -
488 - $tag = $this->reader->name;
489 -
490 - if ( !wfRunHooks( 'ImportHandleRevisionXMLTag', $this->reader,
491 - $pageInfo, $revisionInfo ) ) {
492 - // Do nothing
493 - } elseif ( in_array( $tag, $normalFields ) ) {
494 - $revisionInfo[$tag] = $this->nodeContents();
495 - } elseif ( $tag == 'contributor' ) {
496 - $revisionInfo['contributor'] = $this->handleContributor();
497 - } elseif ( $tag != '#text' ) {
498 - $this->warn( "Unhandled revision XML tag $tag" );
499 - $skip = true;
500 - }
501 - }
502 -
503 - $pageInfo['revisionCount']++;
504 - if ( $this->processRevision( $pageInfo, $revisionInfo ) ) {
505 - $pageInfo['successfulRevisionCount']++;
506 - }
507 - }
508 -
509 - private function processRevision( $pageInfo, $revisionInfo ) {
510 - $revision = new WikiRevision;
511 -
512 - $revision->setID( $revisionInfo['id'] );
513 - $revision->setText( $revisionInfo['text'] );
514 - $revision->setTitle( $pageInfo['_title'] );
515 - $revision->setTimestamp( $revisionInfo['timestamp'] );
516 -
517 - if ( isset( $revisionInfo['comment'] ) ) {
518 - $revision->setComment( $revisionInfo['comment'] );
519 - }
520 -
521 - if ( isset( $revisionInfo['minor'] ) )
522 - $revision->setMinor( true );
523 -
524 - if ( isset( $revisionInfo['contributor']['ip'] ) ) {
525 - $revision->setUserIP( $revisionInfo['contributor']['ip'] );
526 - }
527 - if ( isset( $revisionInfo['contributor']['username'] ) ) {
528 - $revision->setUserName( $revisionInfo['contributor']['username'] );
529 - }
530 -
531 - return $this->revisionCallback( $revision );
532 - }
533 -
534 - private function handleUpload( &$pageInfo ) {
535 - $this->debug( "Enter upload handler" );
536 - $uploadInfo = array();
537 -
538 - $normalFields = array( 'timestamp', 'comment', 'filename', 'text',
539 - 'src', 'size' );
540 -
541 - $skip = false;
542 -
543 - while ( $skip ? $this->reader->next() : $this->reader->read() ) {
544 - if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
545 - $this->reader->name == 'upload') {
546 - break;
547 - }
548 -
549 - $tag = $this->reader->name;
550 -
551 - if ( !wfRunHooks( 'ImportHandleUploadXMLTag', $this->reader,
552 - $pageInfo ) ) {
553 - // Do nothing
554 - } elseif ( in_array( $tag, $normalFields ) ) {
555 - $uploadInfo[$tag] = $this->nodeContents();
556 - } elseif ( $tag == 'contributor' ) {
557 - $uploadInfo['contributor'] = $this->handleContributor();
558 - } elseif ( $tag != '#text' ) {
559 - $this->warn( "Unhandled upload XML tag $tag" );
560 - $skip = true;
561 - }
562 - }
563 -
564 - return $this->processUpload( $pageInfo, $uploadInfo );
565 - }
566 -
567 - private function processUpload( $pageInfo, $uploadInfo ) {
568 - $revision = new WikiRevision;
569 -
570 - $revision->setTitle( $pageInfo['_title'] );
571 - $revision->setID( $uploadInfo['id'] );
572 - $revision->setTimestamp( $uploadInfo['timestamp'] );
573 - $revision->setText( $uploadInfo['text'] );
574 - $revision->setFilename( $uploadInfo['filename'] );
575 - $revision->setSrc( $uploadInfo['src'] );
576 - $revision->setSize( intval( $uploadInfo['size'] ) );
577 - $revision->setComment( $uploadInfo['comment'] );
578 -
579 - if ( isset( $uploadInfo['contributor']['ip'] ) ) {
580 - $revision->setUserIP( $uploadInfo['contributor']['ip'] );
581 - }
582 - if ( isset( $uploadInfo['contributor']['username'] ) ) {
583 - $revision->setUserName( $uploadInfo['contributor']['username'] );
584 - }
585 -
586 - return $this->uploadCallback( $revision );
587 - }
588 -
589 - private function handleContributor() {
590 - $fields = array( 'id', 'ip', 'username' );
591 - $info = array();
592 -
593 - while ( $this->reader->read() ) {
594 - if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
595 - $this->reader->name == 'contributor') {
596 - break;
597 - }
598 -
599 - $tag = $this->reader->name;
600 -
601 - if ( in_array( $tag, $fields ) ) {
602 - $info[$tag] = $this->nodeContents();
603 - }
604 - }
605 -
606 - return $info;
607 - }
608 -
609 - private function processTitle( $text ) {
610 - $workTitle = $text;
611 - $origTitle = Title::newFromText( $workTitle );
612 -
613 - if( !is_null( $this->mTargetNamespace ) && !is_null( $origTitle ) ) {
614 - $title = Title::makeTitle( $this->mTargetNamespace,
615 - $origTitle->getDBkey() );
616 - } else {
617 - $title = Title::newFromText( $workTitle );
618 - }
619 -
620 - if( is_null( $title ) ) {
621 - // Invalid page title? Ignore the page
622 - $this->notice( "Skipping invalid page title '$workTitle'" );
623 - return false;
624 - } elseif( $title->getInterwiki() != '' ) {
625 - $this->notice( "Skipping interwiki page title '$workTitle'" );
626 - return false;
627 - }
628 -
629 - return array( $origTitle, $title );
630 - }
631 -}
632 -
633 -/** This is a horrible hack used to keep source compatibility */
634 -class UploadSourceAdapter {
635 - static $sourceRegistrations = array();
636 -
637 - private $mSource;
638 - private $mBuffer;
639 - private $mPosition;
640 -
641 - static function registerSource( $source ) {
642 - $id = wfGenerateToken();
643 -
644 - self::$sourceRegistrations[$id] = $source;
645 -
646 - return $id;
647 - }
648 -
649 - function stream_open( $path, $mode, $options, &$opened_path ) {
650 - $url = parse_url($path);
651 - $id = $url['host'];
652 -
653 - if ( !isset( self::$sourceRegistrations[$id] ) ) {
654 - return false;
655 - }
656 -
657 - $this->mSource = self::$sourceRegistrations[$id];
658 -
659 - return true;
660 - }
661 -
662 - function stream_read( $count ) {
663 - $return = '';
664 - $leave = false;
665 -
666 - while ( !$leave && !$this->mSource->atEnd() &&
667 - strlen($this->mBuffer) < $count ) {
668 - $read = $this->mSource->readChunk();
669 -
670 - if ( !strlen($read) ) {
671 - $leave = true;
672 - }
673 -
674 - $this->mBuffer .= $read;
675 - }
676 -
677 - if ( strlen($this->mBuffer) ) {
678 - $return = substr( $this->mBuffer, 0, $count );
679 - $this->mBuffer = substr( $this->mBuffer, $count );
680 - }
681 -
682 - $this->mPosition += strlen($return);
683 -
684 - return $return;
685 - }
686 -
687 - function stream_write( $data ) {
688 - return false;
689 - }
690 -
691 - function stream_tell() {
692 - return $this->mPosition;
693 - }
694 -
695 - function stream_eof() {
696 - return $this->mSource->atEnd();
697 - }
698 -
699 - function url_stat() {
700 - $result = array();
701 -
702 - $result['dev'] = $result[0] = 0;
703 - $result['ino'] = $result[1] = 0;
704 - $result['mode'] = $result[2] = 0;
705 - $result['nlink'] = $result[3] = 0;
706 - $result['uid'] = $result[4] = 0;
707 - $result['gid'] = $result[5] = 0;
708 - $result['rdev'] = $result[6] = 0;
709 - $result['size'] = $result[7] = 0;
710 - $result['atime'] = $result[8] = 0;
711 - $result['mtime'] = $result[9] = 0;
712 - $result['ctime'] = $result[10] = 0;
713 - $result['blksize'] = $result[11] = 0;
714 - $result['blocks'] = $result[12] = 0;
715 -
716 - return $result;
717 - }
718 -}
719 -
720 -class XMLReader2 extends XMLReader {
721 - function nodeContents() {
722 - if( $this->isEmptyElement ) {
723 - return "";
724 - }
725 - $buffer = "";
726 - while( $this->read() ) {
727 - switch( $this->nodeType ) {
728 - case XmlReader::TEXT:
729 - case XmlReader::SIGNIFICANT_WHITESPACE:
730 - $buffer .= $this->value;
731 - break;
732 - case XmlReader::END_ELEMENT:
733 - return $buffer;
734 - }
735 - }
736 - return $this->close();
737 - }
738 -}
Index: trunk/phase3/includes/AutoLoader.php
@@ -681,7 +681,7 @@
682682 'WantedFilesPage' => 'includes/specials/SpecialWantedfiles.php',
683683 'WantedPagesPage' => 'includes/specials/SpecialWantedpages.php',
684684 'WantedTemplatesPage' => 'includes/specials/SpecialWantedtemplates.php',
685 - 'WikiImporter' => 'includes/ImportXMLReader.php',
 685+ 'WikiImporter' => 'includes/Import.php',
686686 'WikiRevision' => 'includes/Import.php',
687687 'WithoutInterwikiPage' => 'includes/specials/SpecialWithoutinterwiki.php',
688688
Index: trunk/phase3/includes/Import.php
@@ -25,6 +25,739 @@
2626 */
2727
2828 /**
 29+ * XML file reader for the page data importer
 30+ *
 31+ * implements Special:Import
 32+ * @ingroup SpecialPage
 33+ */
 34+class WikiImporter {
 35+ private $reader = null;
 36+ private $mLogItemCallback, $mUploadCallback, $mRevisionCallback, $mPageCallback;
 37+ private $mSiteInfoCallback, $mTargetNamespace, $mPageOutCallback;
 38+ private $mDebug;
 39+
 40+ /**
 41+ * Creates an ImportXMLReader drawing from the source provided
 42+ */
 43+ function __construct( $source ) {
 44+ $this->reader = new XMLReader2();
 45+
 46+ stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' );
 47+ $id = UploadSourceAdapter::registerSource( $source );
 48+ $this->reader->open( "uploadsource://$id" );
 49+
 50+ // Default callbacks
 51+ $this->setRevisionCallback( array( $this, "importRevision" ) );
 52+ $this->setUploadCallback( array( $this, 'importUpload' ) );
 53+ $this->setLogItemCallback( array( $this, 'importLogItem' ) );
 54+ $this->setPageOutCallback( array( $this, 'finishImportPage' ) );
 55+ }
 56+
 57+ private function throwXmlError( $err ) {
 58+ $this->debug( "FAILURE: $err" );
 59+ wfDebug( "WikiImporter XML error: $err\n" );
 60+ }
 61+
 62+ private function debug( $data ) {
 63+ if( $this->mDebug ) {
 64+ wfDebug( "IMPORT: $data\n" );
 65+ }
 66+ }
 67+
 68+ private function warn( $data ) {
 69+ wfDebug( "IMPORT: $data\n" );
 70+ }
 71+
 72+ private function notice( $data ) {
 73+ global $wgCommandLineMode;
 74+ if( $wgCommandLineMode ) {
 75+ print "$data\n";
 76+ } else {
 77+ global $wgOut;
 78+ $wgOut->addHTML( "<li>" . htmlspecialchars( $data ) . "</li>\n" );
 79+ }
 80+ }
 81+
 82+ /**
 83+ * Set debug mode...
 84+ */
 85+ function setDebug( $debug ) {
 86+ $this->mDebug = $debug;
 87+ }
 88+
 89+ /**
 90+ * Sets the action to perform as each new page in the stream is reached.
 91+ * @param $callback callback
 92+ * @return callback
 93+ */
 94+ public function setPageCallback( $callback ) {
 95+ $previous = $this->mPageCallback;
 96+ $this->mPageCallback = $callback;
 97+ return $previous;
 98+ }
 99+
 100+ /**
 101+ * Sets the action to perform as each page in the stream is completed.
 102+ * Callback accepts the page title (as a Title object), a second object
 103+ * with the original title form (in case it's been overridden into a
 104+ * local namespace), and a count of revisions.
 105+ *
 106+ * @param $callback callback
 107+ * @return callback
 108+ */
 109+ public function setPageOutCallback( $callback ) {
 110+ $previous = $this->mPageOutCallback;
 111+ $this->mPageOutCallback = $callback;
 112+ return $previous;
 113+ }
 114+
 115+ /**
 116+ * Sets the action to perform as each page revision is reached.
 117+ * @param $callback callback
 118+ * @return callback
 119+ */
 120+ public function setRevisionCallback( $callback ) {
 121+ $previous = $this->mRevisionCallback;
 122+ $this->mRevisionCallback = $callback;
 123+ return $previous;
 124+ }
 125+
 126+ /**
 127+ * Sets the action to perform as each file upload version is reached.
 128+ * @param $callback callback
 129+ * @return callback
 130+ */
 131+ public function setUploadCallback( $callback ) {
 132+ $previous = $this->mUploadCallback;
 133+ $this->mUploadCallback = $callback;
 134+ return $previous;
 135+ }
 136+
 137+ /**
 138+ * Sets the action to perform as each log item reached.
 139+ * @param $callback callback
 140+ * @return callback
 141+ */
 142+ public function setLogItemCallback( $callback ) {
 143+ $previous = $this->mLogItemCallback;
 144+ $this->mLogItemCallback = $callback;
 145+ return $previous;
 146+ }
 147+
 148+ /**
 149+ * Sets the action to perform when site info is encountered
 150+ * @param $callback callback
 151+ * @return callback
 152+ */
 153+ public function setSiteInfoCallback( $callback ) {
 154+ $previous = $this->mSiteInfoCallback;
 155+ $this->mSiteInfoCallback = $callback;
 156+ return $previous;
 157+ }
 158+
 159+ /**
 160+ * Set a target namespace to override the defaults
 161+ */
 162+ public function setTargetNamespace( $namespace ) {
 163+ if( is_null( $namespace ) ) {
 164+ // Don't override namespaces
 165+ $this->mTargetNamespace = null;
 166+ } elseif( $namespace >= 0 ) {
 167+ // FIXME: Check for validity
 168+ $this->mTargetNamespace = intval( $namespace );
 169+ } else {
 170+ return false;
 171+ }
 172+ }
 173+
 174+ /**
 175+ * Default per-revision callback, performs the import.
 176+ * @param $revision WikiRevision
 177+ */
 178+ public function importRevision( $revision ) {
 179+ $dbw = wfGetDB( DB_MASTER );
 180+ return $dbw->deadlockLoop( array( $revision, 'importOldRevision' ) );
 181+ }
 182+
 183+ /**
 184+ * Default per-revision callback, performs the import.
 185+ * @param $rev WikiRevision
 186+ */
 187+ public function importLogItem( $rev ) {
 188+ $dbw = wfGetDB( DB_MASTER );
 189+ return $dbw->deadlockLoop( array( $rev, 'importLogItem' ) );
 190+ }
 191+
 192+ /**
 193+ * Dummy for now...
 194+ */
 195+ public function importUpload( $revision ) {
 196+ //$dbw = wfGetDB( DB_MASTER );
 197+ //return $dbw->deadlockLoop( array( $revision, 'importUpload' ) );
 198+ return false;
 199+ }
 200+
 201+ /**
 202+ * Mostly for hook use
 203+ */
 204+ public function finishImportPage( $title, $origTitle, $revCount, $sRevCount, $pageInfo ) {
 205+ $args = func_get_args();
 206+ return wfRunHooks( 'AfterImportPage', $args );
 207+ }
 208+
 209+ /**
 210+ * Alternate per-revision callback, for debugging.
 211+ * @param $revision WikiRevision
 212+ */
 213+ public function debugRevisionHandler( &$revision ) {
 214+ $this->debug( "Got revision:" );
 215+ if( is_object( $revision->title ) ) {
 216+ $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
 217+ } else {
 218+ $this->debug( "-- Title: <invalid>" );
 219+ }
 220+ $this->debug( "-- User: " . $revision->user_text );
 221+ $this->debug( "-- Timestamp: " . $revision->timestamp );
 222+ $this->debug( "-- Comment: " . $revision->comment );
 223+ $this->debug( "-- Text: " . $revision->text );
 224+ }
 225+
 226+ /**
 227+ * Notify the callback function when a new <page> is reached.
 228+ * @param $title Title
 229+ */
 230+ function pageCallback( $title ) {
 231+ if( isset( $this->mPageCallback ) ) {
 232+ call_user_func( $this->mPageCallback, $title );
 233+ }
 234+ }
 235+
 236+ /**
 237+ * Notify the callback function when a </page> is closed.
 238+ * @param $title Title
 239+ * @param $origTitle Title
 240+ * @param $revCount Integer
 241+ * @param $sucCount Int: number of revisions for which callback returned true
 242+ * @param $pageInfo Array: associative array of page information
 243+ */
 244+ private function pageOutCallback( $title, $origTitle, $revCount, $sucCount, $pageInfo ) {
 245+ if( isset( $this->mPageOutCallback ) ) {
 246+ $args = func_get_args();
 247+ call_user_func_array( $this->mPageOutCallback, $args );
 248+ }
 249+ }
 250+
 251+ /**
 252+ * Notify the callback function of a revision
 253+ * @param $revision A WikiRevision object
 254+ */
 255+ private function revisionCallback( $revision ) {
 256+ if ( isset( $this->mRevisionCallback ) ) {
 257+ return call_user_func_array( $this->mRevisionCallback,
 258+ array( $revision, $this ) );
 259+ } else {
 260+ return false;
 261+ }
 262+ }
 263+
 264+ /**
 265+ * Notify the callback function of a new log item
 266+ * @param $revision A WikiRevision object
 267+ */
 268+ private function logItemCallback( $revision ) {
 269+ if ( isset( $this->mLogItemCallback ) ) {
 270+ return call_user_func_array( $this->mLogItemCallback,
 271+ array( $revision, $this ) );
 272+ } else {
 273+ return false;
 274+ }
 275+ }
 276+
 277+ /**
 278+ * Shouldn't something like this be built-in to XMLReader?
 279+ * Fetches text contents of the current element, assuming
 280+ * no sub-elements or such scary things.
 281+ * @return string
 282+ * @access private
 283+ */
 284+ private function nodeContents() {
 285+ return $this->reader->nodeContents();
 286+ }
 287+
 288+ # --------------
 289+
 290+ /** Left in for debugging */
 291+ private function dumpElement() {
 292+ static $lookup = null;
 293+ if (!$lookup) {
 294+ $xmlReaderConstants = array(
 295+ "NONE",
 296+ "ELEMENT",
 297+ "ATTRIBUTE",
 298+ "TEXT",
 299+ "CDATA",
 300+ "ENTITY_REF",
 301+ "ENTITY",
 302+ "PI",
 303+ "COMMENT",
 304+ "DOC",
 305+ "DOC_TYPE",
 306+ "DOC_FRAGMENT",
 307+ "NOTATION",
 308+ "WHITESPACE",
 309+ "SIGNIFICANT_WHITESPACE",
 310+ "END_ELEMENT",
 311+ "END_ENTITY",
 312+ "XML_DECLARATION",
 313+ );
 314+ $lookup = array();
 315+
 316+ foreach( $xmlReaderConstants as $name ) {
 317+ $lookup[constant("XmlReader::$name")] = $name;
 318+ }
 319+ }
 320+
 321+ print( var_dump(
 322+ $lookup[$this->reader->nodeType],
 323+ $this->reader->name,
 324+ $this->reader->value
 325+ )."\n\n" );
 326+ }
 327+
 328+ /**
 329+ * Primary entry point
 330+ */
 331+ public function doImport() {
 332+ $this->reader->read();
 333+
 334+ if ( $this->reader->name != 'mediawiki' ) {
 335+ throw new MWException( "Expected <mediawiki> tag, got ".
 336+ $this->reader->name );
 337+ }
 338+ $this->debug( "<mediawiki> tag is correct." );
 339+
 340+ $this->debug( "Starting primary dump processing loop." );
 341+
 342+ $keepReading = $this->reader->read();
 343+ $skip = false;
 344+ while ( $keepReading ) {
 345+ $tag = $this->reader->name;
 346+ $type = $this->reader->nodeType;
 347+
 348+ if ( !wfRunHooks( 'ImportHandleToplevelXMLTag', $this->reader ) ) {
 349+ // Do nothing
 350+ } elseif ( $tag == 'mediawiki' && $type == XmlReader::END_ELEMENT ) {
 351+ break;
 352+ } elseif ( $tag == 'siteinfo' ) {
 353+ $this->handleSiteInfo();
 354+ } elseif ( $tag == 'page' ) {
 355+ $this->handlePage();
 356+ } elseif ( $tag == 'logitem' ) {
 357+ $this->handleLogItem();
 358+ } elseif ( $tag != '#text' ) {
 359+ $this->warn( "Unhandled top-level XML tag $tag" );
 360+
 361+ $skip = true;
 362+ }
 363+
 364+ if ($skip) {
 365+ $keepReading = $this->reader->next();
 366+ $skip = false;
 367+ $this->debug( "Skip" );
 368+ } else {
 369+ $keepReading = $this->reader->read();
 370+ }
 371+ }
 372+
 373+ return true;
 374+ }
 375+
 376+ private function handleSiteInfo() {
 377+ // Site info is useful, but not actually used for dump imports.
 378+ // Includes a quick short-circuit to save performance.
 379+ if ( ! $this->mSiteInfoCallback ) {
 380+ $this->reader->next();
 381+ return true;
 382+ }
 383+ throw new MWException( "SiteInfo tag is not yet handled, do not set mSiteInfoCallback" );
 384+ }
 385+
 386+ private function handleLogItem() {
 387+ $this->debug( "Enter log item handler." );
 388+ $logInfo = array();
 389+
 390+ // Fields that can just be stuffed in the pageInfo object
 391+ $normalFields = array( 'id', 'comment', 'type', 'action', 'timestamp',
 392+ 'logtitle', 'params' );
 393+
 394+ while ( $this->reader->read() ) {
 395+ if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
 396+ $this->reader->name == 'logitem') {
 397+ break;
 398+ }
 399+
 400+ $tag = $this->reader->name;
 401+
 402+ if ( !wfRunHooks( 'ImportHandleLogItemXMLTag',
 403+ $this->reader, $logInfo ) ) {
 404+ // Do nothing
 405+ } elseif ( in_array( $tag, $normalFields ) ) {
 406+ $logInfo[$tag] = $this->nodeContents();
 407+ } elseif ( $tag == 'contributor' ) {
 408+ $logInfo['contributor'] = $this->handleContributor();
 409+ } elseif ( $tag != '#text' ) {
 410+ $this->warn( "Unhandled log-item XML tag $tag" );
 411+ }
 412+ }
 413+
 414+ $this->processLogItem( $logInfo );
 415+ }
 416+
 417+ private function processLogItem( $logInfo ) {
 418+ $revision = new WikiRevision;
 419+
 420+ $revision->setID( $logInfo['id'] );
 421+ $revision->setType( $logInfo['type'] );
 422+ $revision->setAction( $logInfo['action'] );
 423+ $revision->setTimestamp( $logInfo['timestamp'] );
 424+ $revision->setParams( $logInfo['params'] );
 425+ $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) );
 426+
 427+ if ( isset( $logInfo['comment'] ) ) {
 428+ $revision->setComment( $logInfo['comment'] );
 429+ }
 430+
 431+ if ( isset( $logInfo['contributor']['ip'] ) ) {
 432+ $revision->setUserIP( $logInfo['contributor']['ip'] );
 433+ }
 434+ if ( isset( $logInfo['contributor']['username'] ) ) {
 435+ $revision->setUserName( $logInfo['contributor']['username'] );
 436+ }
 437+
 438+ return $this->logItemCallback( $revision );
 439+ }
 440+
 441+ private function handlePage() {
 442+ // Handle page data.
 443+ $this->debug( "Enter page handler." );
 444+ $pageInfo = array( 'revisionCount' => 0, 'successfulRevisionCount' => 0 );
 445+
 446+ // Fields that can just be stuffed in the pageInfo object
 447+ $normalFields = array( 'title', 'id', 'redirect', 'restrictions' );
 448+
 449+ $skip = false;
 450+ $badTitle = false;
 451+
 452+ while ( $skip ? $this->reader->next() : $this->reader->read() ) {
 453+ if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
 454+ $this->reader->name == 'page') {
 455+ break;
 456+ }
 457+
 458+ $tag = $this->reader->name;
 459+
 460+ if ( $badTitle ) {
 461+ // The title is invalid, bail out of this page
 462+ $skip = true;
 463+ } elseif ( !wfRunHooks( 'ImportHandlePageXMLTag', array( $this->reader,
 464+ &$pageInfo ) ) ) {
 465+ // Do nothing
 466+ } elseif ( in_array( $tag, $normalFields ) ) {
 467+ $pageInfo[$tag] = $this->nodeContents();
 468+ if ( $tag == 'title' ) {
 469+ $title = $this->processTitle( $pageInfo['title'] );
 470+
 471+ if ( !$title ) {
 472+ $badTitle = true;
 473+ $skip = true;
 474+ }
 475+
 476+ $this->pageCallback( $title );
 477+ list( $pageInfo['_title'], $origTitle ) = $title;
 478+ }
 479+ } elseif ( $tag == 'revision' ) {
 480+ $this->handleRevision( $pageInfo );
 481+ } elseif ( $tag == 'upload' ) {
 482+ $this->handleUpload( $pageInfo );
 483+ } elseif ( $tag != '#text' ) {
 484+ $this->warn( "Unhandled page XML tag $tag" );
 485+ $skip = true;
 486+ }
 487+ }
 488+
 489+ $this->pageOutCallback( $pageInfo['_title'], $origTitle,
 490+ $pageInfo['revisionCount'],
 491+ $pageInfo['successfulRevisionCount'],
 492+ $pageInfo );
 493+ }
 494+
 495+ private function handleRevision( &$pageInfo ) {
 496+ $this->debug( "Enter revision handler" );
 497+ $revisionInfo = array();
 498+
 499+ $normalFields = array( 'id', 'timestamp', 'comment', 'minor', 'text' );
 500+
 501+ $skip = false;
 502+
 503+ while ( $skip ? $this->reader->next() : $this->reader->read() ) {
 504+ if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
 505+ $this->reader->name == 'revision') {
 506+ break;
 507+ }
 508+
 509+ $tag = $this->reader->name;
 510+
 511+ if ( !wfRunHooks( 'ImportHandleRevisionXMLTag', $this->reader,
 512+ $pageInfo, $revisionInfo ) ) {
 513+ // Do nothing
 514+ } elseif ( in_array( $tag, $normalFields ) ) {
 515+ $revisionInfo[$tag] = $this->nodeContents();
 516+ } elseif ( $tag == 'contributor' ) {
 517+ $revisionInfo['contributor'] = $this->handleContributor();
 518+ } elseif ( $tag != '#text' ) {
 519+ $this->warn( "Unhandled revision XML tag $tag" );
 520+ $skip = true;
 521+ }
 522+ }
 523+
 524+ $pageInfo['revisionCount']++;
 525+ if ( $this->processRevision( $pageInfo, $revisionInfo ) ) {
 526+ $pageInfo['successfulRevisionCount']++;
 527+ }
 528+ }
 529+
 530+ private function processRevision( $pageInfo, $revisionInfo ) {
 531+ $revision = new WikiRevision;
 532+
 533+ $revision->setID( $revisionInfo['id'] );
 534+ $revision->setText( $revisionInfo['text'] );
 535+ $revision->setTitle( $pageInfo['_title'] );
 536+ $revision->setTimestamp( $revisionInfo['timestamp'] );
 537+
 538+ if ( isset( $revisionInfo['comment'] ) ) {
 539+ $revision->setComment( $revisionInfo['comment'] );
 540+ }
 541+
 542+ if ( isset( $revisionInfo['minor'] ) )
 543+ $revision->setMinor( true );
 544+
 545+ if ( isset( $revisionInfo['contributor']['ip'] ) ) {
 546+ $revision->setUserIP( $revisionInfo['contributor']['ip'] );
 547+ }
 548+ if ( isset( $revisionInfo['contributor']['username'] ) ) {
 549+ $revision->setUserName( $revisionInfo['contributor']['username'] );
 550+ }
 551+
 552+ return $this->revisionCallback( $revision );
 553+ }
 554+
 555+ private function handleUpload( &$pageInfo ) {
 556+ $this->debug( "Enter upload handler" );
 557+ $uploadInfo = array();
 558+
 559+ $normalFields = array( 'timestamp', 'comment', 'filename', 'text',
 560+ 'src', 'size' );
 561+
 562+ $skip = false;
 563+
 564+ while ( $skip ? $this->reader->next() : $this->reader->read() ) {
 565+ if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
 566+ $this->reader->name == 'upload') {
 567+ break;
 568+ }
 569+
 570+ $tag = $this->reader->name;
 571+
 572+ if ( !wfRunHooks( 'ImportHandleUploadXMLTag', $this->reader,
 573+ $pageInfo ) ) {
 574+ // Do nothing
 575+ } elseif ( in_array( $tag, $normalFields ) ) {
 576+ $uploadInfo[$tag] = $this->nodeContents();
 577+ } elseif ( $tag == 'contributor' ) {
 578+ $uploadInfo['contributor'] = $this->handleContributor();
 579+ } elseif ( $tag != '#text' ) {
 580+ $this->warn( "Unhandled upload XML tag $tag" );
 581+ $skip = true;
 582+ }
 583+ }
 584+
 585+ return $this->processUpload( $pageInfo, $uploadInfo );
 586+ }
 587+
 588+ private function processUpload( $pageInfo, $uploadInfo ) {
 589+ $revision = new WikiRevision;
 590+
 591+ $revision->setTitle( $pageInfo['_title'] );
 592+ $revision->setID( $uploadInfo['id'] );
 593+ $revision->setTimestamp( $uploadInfo['timestamp'] );
 594+ $revision->setText( $uploadInfo['text'] );
 595+ $revision->setFilename( $uploadInfo['filename'] );
 596+ $revision->setSrc( $uploadInfo['src'] );
 597+ $revision->setSize( intval( $uploadInfo['size'] ) );
 598+ $revision->setComment( $uploadInfo['comment'] );
 599+
 600+ if ( isset( $uploadInfo['contributor']['ip'] ) ) {
 601+ $revision->setUserIP( $uploadInfo['contributor']['ip'] );
 602+ }
 603+ if ( isset( $uploadInfo['contributor']['username'] ) ) {
 604+ $revision->setUserName( $uploadInfo['contributor']['username'] );
 605+ }
 606+
 607+ return $this->uploadCallback( $revision );
 608+ }
 609+
 610+ private function handleContributor() {
 611+ $fields = array( 'id', 'ip', 'username' );
 612+ $info = array();
 613+
 614+ while ( $this->reader->read() ) {
 615+ if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
 616+ $this->reader->name == 'contributor') {
 617+ break;
 618+ }
 619+
 620+ $tag = $this->reader->name;
 621+
 622+ if ( in_array( $tag, $fields ) ) {
 623+ $info[$tag] = $this->nodeContents();
 624+ }
 625+ }
 626+
 627+ return $info;
 628+ }
 629+
 630+ private function processTitle( $text ) {
 631+ $workTitle = $text;
 632+ $origTitle = Title::newFromText( $workTitle );
 633+
 634+ if( !is_null( $this->mTargetNamespace ) && !is_null( $origTitle ) ) {
 635+ $title = Title::makeTitle( $this->mTargetNamespace,
 636+ $origTitle->getDBkey() );
 637+ } else {
 638+ $title = Title::newFromText( $workTitle );
 639+ }
 640+
 641+ if( is_null( $title ) ) {
 642+ // Invalid page title? Ignore the page
 643+ $this->notice( "Skipping invalid page title '$workTitle'" );
 644+ return false;
 645+ } elseif( $title->getInterwiki() != '' ) {
 646+ $this->notice( "Skipping interwiki page title '$workTitle'" );
 647+ return false;
 648+ }
 649+
 650+ return array( $origTitle, $title );
 651+ }
 652+}
 653+
 654+/** This is a horrible hack used to keep source compatibility */
 655+class UploadSourceAdapter {
 656+ static $sourceRegistrations = array();
 657+
 658+ private $mSource;
 659+ private $mBuffer;
 660+ private $mPosition;
 661+
 662+ static function registerSource( $source ) {
 663+ $id = wfGenerateToken();
 664+
 665+ self::$sourceRegistrations[$id] = $source;
 666+
 667+ return $id;
 668+ }
 669+
 670+ function stream_open( $path, $mode, $options, &$opened_path ) {
 671+ $url = parse_url($path);
 672+ $id = $url['host'];
 673+
 674+ if ( !isset( self::$sourceRegistrations[$id] ) ) {
 675+ return false;
 676+ }
 677+
 678+ $this->mSource = self::$sourceRegistrations[$id];
 679+
 680+ return true;
 681+ }
 682+
 683+ function stream_read( $count ) {
 684+ $return = '';
 685+ $leave = false;
 686+
 687+ while ( !$leave && !$this->mSource->atEnd() &&
 688+ strlen($this->mBuffer) < $count ) {
 689+ $read = $this->mSource->readChunk();
 690+
 691+ if ( !strlen($read) ) {
 692+ $leave = true;
 693+ }
 694+
 695+ $this->mBuffer .= $read;
 696+ }
 697+
 698+ if ( strlen($this->mBuffer) ) {
 699+ $return = substr( $this->mBuffer, 0, $count );
 700+ $this->mBuffer = substr( $this->mBuffer, $count );
 701+ }
 702+
 703+ $this->mPosition += strlen($return);
 704+
 705+ return $return;
 706+ }
 707+
 708+ function stream_write( $data ) {
 709+ return false;
 710+ }
 711+
 712+ function stream_tell() {
 713+ return $this->mPosition;
 714+ }
 715+
 716+ function stream_eof() {
 717+ return $this->mSource->atEnd();
 718+ }
 719+
 720+ function url_stat() {
 721+ $result = array();
 722+
 723+ $result['dev'] = $result[0] = 0;
 724+ $result['ino'] = $result[1] = 0;
 725+ $result['mode'] = $result[2] = 0;
 726+ $result['nlink'] = $result[3] = 0;
 727+ $result['uid'] = $result[4] = 0;
 728+ $result['gid'] = $result[5] = 0;
 729+ $result['rdev'] = $result[6] = 0;
 730+ $result['size'] = $result[7] = 0;
 731+ $result['atime'] = $result[8] = 0;
 732+ $result['mtime'] = $result[9] = 0;
 733+ $result['ctime'] = $result[10] = 0;
 734+ $result['blksize'] = $result[11] = 0;
 735+ $result['blocks'] = $result[12] = 0;
 736+
 737+ return $result;
 738+ }
 739+}
 740+
 741+class XMLReader2 extends XMLReader {
 742+ function nodeContents() {
 743+ if( $this->isEmptyElement ) {
 744+ return "";
 745+ }
 746+ $buffer = "";
 747+ while( $this->read() ) {
 748+ switch( $this->nodeType ) {
 749+ case XmlReader::TEXT:
 750+ case XmlReader::SIGNIFICANT_WHITESPACE:
 751+ $buffer .= $this->value;
 752+ break;
 753+ case XmlReader::END_ELEMENT:
 754+ return $buffer;
 755+ }
 756+ }
 757+ return $this->close();
 758+ }
 759+}
 760+
 761+/**
29762 * @todo document (e.g. one-sentence class description).
30763 * @ingroup SpecialPage
31764 */

Follow-up revisions

RevisionCommit summaryAuthorDate
r814111.17: MFT r81186, r81187, r81197, r81209, r81210, r81211, r81215, r81238, r81...catrope20:23, 2 February 2011

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r66267Rewrite of XML Dump Processing:...werdna13:28, 12 May 2010
r66268Function accessibility changes, documentation, revert function rename for r66267werdna13:37, 12 May 2010

Comments

#Comment by Reedy (talk | contribs)   16:43, 31 January 2011

Are we 1.17 backporting this?

#Comment by Catrope (talk | contribs)   16:46, 31 January 2011

If all this revision does is move a class from one file to the other, I see no reason to.

#Comment by 😂 (talk | contribs)   16:53, 31 January 2011

Roan wanted me to paste what I said on IRC:

^demon: RoanKattouw: (re: tim's import move) I can already see that causing issues if it isn't put in 1.17
^demon: RoanKattouw: When a user updates their wiki to 1.18 and the classes have moved, they now have this extra file on their disk with duplicate class names.
^demon: And if an extension explicitly included it (which does happen), it would explode on the next upgrade.
^demon: Also less likelihood of merge conflict if someone makes another change to Import.php before release that we *do* want to backport.
^demon: </donenow>
#Comment by Tim Starling (talk | contribs)   20:20, 31 January 2011

Luckily, no extension explicitly includes it.

#Comment by 😂 (talk | contribs)   13:47, 2 February 2011

But it wouldn't be the first time, and we don't know what out of tree users might be doing :)

Status & tagging log