r66267 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r66266‎ | r66267 | r66268 >
Date:13:28, 12 May 2010
Author:werdna
Status:resolved (Comments)
Tags:
Comment:
Rewrite of XML Dump Processing:
* Full rewrite of the WikiImporter class to a new class, XMLDumpImporter, which uses XMLReader instead of the deprecated PHP SAX interface.
* Fixes for the following bugs:
** XML Dump Processor chokes on unrecognised tags.
** Special:Import does not allow you to import a dump including only log entries.
* Added hooks to allow extensions to handle tags in any part of the XML document.
* Fully source-compatible with the previous interface.
Modified paths:
  • /trunk/phase3/includes/AutoLoader.php (modified) (history)
  • /trunk/phase3/includes/Import.php (modified) (history)
  • /trunk/phase3/includes/ImportXMLReader.php (added) (history)
  • /trunk/phase3/includes/api/ApiImport.php (modified) (history)
  • /trunk/phase3/includes/specials/SpecialImport.php (modified) (history)
  • /trunk/phase3/languages/messages/MessagesEn.php (modified) (history)
  • /trunk/phase3/maintenance/importDump.php (modified) (history)
  • /trunk/phase3/maintenance/renderDump.php (modified) (history)

Diff [purge]

Index: trunk/phase3/maintenance/importDump.php
@@ -140,7 +140,7 @@
141141 $this->startTime = wfTime();
142142
143143 $source = new ImportStreamSource( $handle );
144 - $importer = new WikiImporter( $source );
 144+ $importer = new ImportXMLReader( $source );
145145
146146 $importer->setDebug( $this->debug );
147147 $importer->setPageCallback( array( &$this, 'reportPage' ) );
Index: trunk/phase3/maintenance/renderDump.php
@@ -46,7 +46,7 @@
4747 $this->startTime = wfTime();
4848
4949 $source = new ImportStreamSource( $this->getStdin() );
50 - $importer = new WikiImporter( $source );
 50+ $importer = new ImportXMLReader( $source );
5151
5252 $importer->setRevisionCallback(
5353 array( &$this, 'handleRevision' ) );
Index: trunk/phase3/includes/api/ApiImport.php
@@ -74,7 +74,7 @@
7575 $this->dieUsageMsg( array( 'import-unknownerror', $source->getMessage() ) );
7676 }
7777
78 - $importer = new WikiImporter( $source );
 78+ $importer = new ImportXMLReader( $source );
7979 if ( isset( $params['namespace'] ) ) {
8080 $importer->setTargetNamespace( $params['namespace'] );
8181 }
@@ -193,4 +193,4 @@
194194 function getData() {
195195 return $this->mResultArr;
196196 }
197 -}
\ No newline at end of file
 197+}
Index: trunk/phase3/includes/AutoLoader.php
@@ -605,7 +605,7 @@
606606 'WantedPagesPage' => 'includes/specials/SpecialWantedpages.php',
607607 'WantedTemplatesPage' => 'includes/specials/SpecialWantedtemplates.php',
608608 'WhatLinksHerePage' => 'includes/specials/SpecialWhatlinkshere.php',
609 - 'WikiImporter' => 'includes/Import.php',
 609+ 'ImportXMLReader' => 'includes/ImportXMLReader.php',
610610 'WikiRevision' => 'includes/Import.php',
611611 'WithoutInterwikiPage' => 'includes/specials/SpecialWithoutinterwiki.php',
612612
Index: trunk/phase3/includes/ImportXMLReader.php
@@ -0,0 +1,703 @@
 2+<?php
 3+/**
 4+ * implements Special:Import
 5+ * @ingroup SpecialPage
 6+ */
 7+class ImportXMLReader {
 8+ private $reader = null;
 9+ private $mLogItemCallback, $mUploadCallback, $mRevisionCallback, $mPageCallback;
 10+ private $mSiteInfoCallback, $mTargetNamespace, $mPageOutCallback;
 11+ private $mDebug;
 12+
 13+ function __construct( $source ) {
 14+ $this->reader = new XMLReader();
 15+
 16+ stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' );
 17+ $id = UploadSourceAdapter::registerSource( $source );
 18+ $this->reader->open( "uploadsource://$id" );
 19+
 20+ // Default callbacks
 21+ $this->setRevisionCallback( array( $this, "importRevision" ) );
 22+ $this->setUploadCallback( array( $this, 'importUpload' ) );
 23+ $this->setLogItemCallback( array( $this, 'importLogItem' ) );
 24+ }
 25+
 26+ function throwXmlError( $err ) {
 27+ $this->debug( "FAILURE: $err" );
 28+ wfDebug( "WikiImporter XML error: $err\n" );
 29+ }
 30+
 31+ function debug( $data ) {
 32+ if( $this->mDebug ) {
 33+ wfDebug( "IMPORT: $data\n" );
 34+ }
 35+ }
 36+
 37+ function warn( $data ) {
 38+ wfDebug( "IMPORT: $data\n" );
 39+ }
 40+
 41+ function notice( $data ) {
 42+ global $wgCommandLineMode;
 43+ if( $wgCommandLineMode ) {
 44+ print "$data\n";
 45+ } else {
 46+ global $wgOut;
 47+ $wgOut->addHTML( "<li>" . htmlspecialchars( $data ) . "</li>\n" );
 48+ }
 49+ }
 50+
 51+ /**
 52+ * Set debug mode...
 53+ */
 54+ function setDebug( $debug ) {
 55+ $this->mDebug = $debug;
 56+ }
 57+
 58+ /**
 59+ * Sets the action to perform as each new page in the stream is reached.
 60+ * @param $callback callback
 61+ * @return callback
 62+ */
 63+ function setPageCallback( $callback ) {
 64+ $previous = $this->mPageCallback;
 65+ $this->mPageCallback = $callback;
 66+ return $previous;
 67+ }
 68+
 69+ /**
 70+ * Sets the action to perform as each page in the stream is completed.
 71+ * Callback accepts the page title (as a Title object), a second object
 72+ * with the original title form (in case it's been overridden into a
 73+ * local namespace), and a count of revisions.
 74+ *
 75+ * @param $callback callback
 76+ * @return callback
 77+ */
 78+ function setPageOutCallback( $callback ) {
 79+ $previous = $this->mPageOutCallback;
 80+ $this->mPageOutCallback = $callback;
 81+ return $previous;
 82+ }
 83+
 84+ /**
 85+ * Sets the action to perform as each page revision is reached.
 86+ * @param $callback callback
 87+ * @return callback
 88+ */
 89+ function setRevisionCallback( $callback ) {
 90+ $previous = $this->mRevisionCallback;
 91+ $this->mRevisionCallback = $callback;
 92+ return $previous;
 93+ }
 94+
 95+ /**
 96+ * Sets the action to perform as each file upload version is reached.
 97+ * @param $callback callback
 98+ * @return callback
 99+ */
 100+ function setUploadCallback( $callback ) {
 101+ $previous = $this->mUploadCallback;
 102+ $this->mUploadCallback = $callback;
 103+ return $previous;
 104+ }
 105+
 106+ /**
 107+ * Sets the action to perform as each log item reached.
 108+ * @param $callback callback
 109+ * @return callback
 110+ */
 111+ function setLogItemCallback( $callback ) {
 112+ $previous = $this->mLogItemCallback;
 113+ $this->mLogItemCallback = $callback;
 114+ return $previous;
 115+ }
 116+
 117+ /**
 118+ * Sets the action to perform when site info is encountered
 119+ * @param $callback callback
 120+ * @return callback
 121+ */
 122+ function setSiteInfoCallback( $callback ) {
 123+ $previous = $this->mSiteInfoCallback;
 124+ $this->mSiteInfoCallback = $callback;
 125+ return $previous;
 126+ }
 127+
 128+ /**
 129+ * Set a target namespace to override the defaults
 130+ */
 131+ function setTargetNamespace( $namespace ) {
 132+ if( is_null( $namespace ) ) {
 133+ // Don't override namespaces
 134+ $this->mTargetNamespace = null;
 135+ } elseif( $namespace >= 0 ) {
 136+ // FIXME: Check for validity
 137+ $this->mTargetNamespace = intval( $namespace );
 138+ } else {
 139+ return false;
 140+ }
 141+ }
 142+
 143+ /**
 144+ * Default per-revision callback, performs the import.
 145+ * @param $revision WikiRevision
 146+ * @private
 147+ */
 148+ function importRevision( $revision ) {
 149+ $dbw = wfGetDB( DB_MASTER );
 150+ return $dbw->deadlockLoop( array( $revision, 'importOldRevision' ) );
 151+ }
 152+
 153+ /**
 154+ * Default per-revision callback, performs the import.
 155+ * @param $rev WikiRevision
 156+ * @private
 157+ */
 158+ function importLogItem( $rev ) {
 159+ $dbw = wfGetDB( DB_MASTER );
 160+ return $dbw->deadlockLoop( array( $rev, 'importLogItem' ) );
 161+ }
 162+
 163+ /**
 164+ * Dummy for now...
 165+ */
 166+ function importUpload( $revision ) {
 167+ //$dbw = wfGetDB( DB_MASTER );
 168+ //return $dbw->deadlockLoop( array( $revision, 'importUpload' ) );
 169+ return false;
 170+ }
 171+
 172+ /**
 173+ * Alternate per-revision callback, for debugging.
 174+ * @param $revision WikiRevision
 175+ * @private
 176+ */
 177+ function debugRevisionHandler( &$revision ) {
 178+ $this->debug( "Got revision:" );
 179+ if( is_object( $revision->title ) ) {
 180+ $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
 181+ } else {
 182+ $this->debug( "-- Title: <invalid>" );
 183+ }
 184+ $this->debug( "-- User: " . $revision->user_text );
 185+ $this->debug( "-- Timestamp: " . $revision->timestamp );
 186+ $this->debug( "-- Comment: " . $revision->comment );
 187+ $this->debug( "-- Text: " . $revision->text );
 188+ }
 189+
 190+ /**
 191+ * Notify the callback function when a new <page> is reached.
 192+ * @param $title Title
 193+ * @private
 194+ */
 195+ function pageCallback( $title ) {
 196+ if( is_callable( $this->mPageCallback ) ) {
 197+ call_user_func( $this->mPageCallback, $title );
 198+ }
 199+ }
 200+
 201+ /**
 202+ * Notify the callback function when a </page> is closed.
 203+ * @param $title Title
 204+ * @param $origTitle Title
 205+ * @param $revisionCount int
 206+ * @param $successCount Int: number of revisions for which callback returned true
 207+ * @private
 208+ */
 209+ function pageOutCallback( $title, $origTitle, $revisionCount, $successCount ) {
 210+ if( is_callable( $this->mPageOutCallback ) ) {
 211+ call_user_func_array( $this->mPageOutCallback,
 212+ array( $title, $origTitle, $revisionCount, $successCount ) );
 213+ }
 214+ }
 215+
 216+ function revisionCallback( $revision ) {
 217+ if ( is_callable( $this->mRevisionCallback ) ) {
 218+ return call_user_func_array( $this->mRevisionCallback,
 219+ array( $revision, $this ) );
 220+ } else {
 221+ return false;
 222+ }
 223+ }
 224+
 225+ function logItemCallback( $revision ) {
 226+ if ( is_callable( $this->mLogItemCallback ) ) {
 227+ return call_user_func_array( $this->mLogItemCallback,
 228+ array( $revision, $this ) );
 229+ } else {
 230+ return false;
 231+ }
 232+ }
 233+
 234+ /**
 235+ * Shouldn't something like this be built-in to XMLReader?
 236+ * Fetches text contents of the current element, assuming
 237+ * no sub-elements or such scary things.
 238+ * @return string
 239+ * @access private
 240+ */
 241+ function nodeContents() {
 242+ if( $this->reader->isEmptyElement ) {
 243+ return "";
 244+ }
 245+ $buffer = "";
 246+ while( $this->reader->read() ) {
 247+ switch( $this->reader->nodeType ) {
 248+ case XmlReader::TEXT:
 249+ case XmlReader::SIGNIFICANT_WHITESPACE:
 250+ $buffer .= $this->reader->value;
 251+ break;
 252+ case XmlReader::END_ELEMENT:
 253+ return $buffer;
 254+ }
 255+ }
 256+ return $this->close();
 257+ }
 258+
 259+ # --------------
 260+
 261+ function dumpElement() {
 262+ static $lookup = null;
 263+ if (!$lookup) {
 264+ $xmlReaderConstants = array(
 265+ "NONE",
 266+ "ELEMENT",
 267+ "ATTRIBUTE",
 268+ "TEXT",
 269+ "CDATA",
 270+ "ENTITY_REF",
 271+ "ENTITY",
 272+ "PI",
 273+ "COMMENT",
 274+ "DOC",
 275+ "DOC_TYPE",
 276+ "DOC_FRAGMENT",
 277+ "NOTATION",
 278+ "WHITESPACE",
 279+ "SIGNIFICANT_WHITESPACE",
 280+ "END_ELEMENT",
 281+ "END_ENTITY",
 282+ "XML_DECLARATION",
 283+ );
 284+ $lookup = array();
 285+
 286+ foreach( $xmlReaderConstants as $name ) {
 287+ $lookup[constant("XmlReader::$name")] = $name;
 288+ }
 289+ }
 290+
 291+ print( var_dump(
 292+ $lookup[$this->reader->nodeType],
 293+ $this->reader->name,
 294+ $this->reader->value
 295+ )."\n\n" );
 296+ }
 297+
 298+ function doImport() {
 299+ $this->reader->read();
 300+
 301+ if ( $this->reader->name != 'mediawiki' ) {
 302+ throw new MWException( "Expected <mediawiki> tag, got ".
 303+ $this->reader->name );
 304+ }
 305+ $this->debug( "<mediawiki> tag is correct." );
 306+
 307+ $this->debug( "Starting primary dump processing loop." );
 308+
 309+ $keepReading = $this->reader->read();
 310+ $skip = false;
 311+ while ( $keepReading ) {
 312+ $tag = $this->reader->name;
 313+ $type = $this->reader->nodeType;
 314+
 315+ if ( !wfRunHooks( 'ImportHandleToplevelXMLTag', $this->reader ) ) {
 316+ // Do nothing
 317+ } elseif ( $tag == 'mediawiki' && $type == XmlReader::END_ELEMENT ) {
 318+ break;
 319+ } elseif ( $tag == 'siteinfo' ) {
 320+ $this->handleSiteInfo();
 321+ } elseif ( $tag == 'page' ) {
 322+ $this->handlePage();
 323+ } elseif ( $tag == 'logitem' ) {
 324+ $this->handleLogItem();
 325+ } elseif ( $tag != '#text' ) {
 326+ $this->warn( "Unhandled top-level XML tag $tag" );
 327+
 328+ $skip = true;
 329+ }
 330+
 331+ if ($skip) {
 332+ $keepReading = $this->reader->next();
 333+ $skip = false;
 334+ $this->debug( "Skip" );
 335+ } else {
 336+ $keepReading = $this->reader->read();
 337+ }
 338+ }
 339+
 340+ return true;
 341+ }
 342+
 343+ function handleSiteInfo() {
 344+ // Site info is useful, but not actually used for dump imports.
 345+ // Includes a quick short-circuit to save performance.
 346+ if ( ! $this->mSiteInfoCallback ) {
 347+ $this->reader->next();
 348+ return true;
 349+ }
 350+ throw new MWException( "SiteInfo tag is not yet handled, do not set mSiteInfoCallback" );
 351+ }
 352+
 353+ function handleLogItem() {
 354+ $this->debug( "Enter log item handler." );
 355+ $logInfo = array();
 356+
 357+ // Fields that can just be stuffed in the pageInfo object
 358+ $normalFields = array( 'id', 'comment', 'type', 'action', 'timestamp',
 359+ 'logtitle', 'params' );
 360+
 361+ while ( $this->reader->read() ) {
 362+ if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
 363+ $this->reader->name == 'logitem') {
 364+ break;
 365+ }
 366+
 367+ $tag = $this->reader->name;
 368+
 369+ if ( !wfRunHooks( 'ImportHandleLogItemXMLTag',
 370+ $this->reader, &$logInfo ) ) {
 371+ // Do nothing
 372+ } if ( in_array( $tag, $normalFields ) ) {
 373+ $logInfo[$tag] = $this->nodeContents();
 374+ } elseif ( $tag == 'contributor' ) {
 375+ $logInfo['contributor'] = $this->handleContributor();
 376+ } elseif ( $tag != '#text' ) {
 377+ $this->warn( "Unhandled log-item XML tag $tag" );
 378+ }
 379+ }
 380+
 381+ $this->processLogItem( $logInfo );
 382+ }
 383+
 384+ function processLogItem( $logInfo ) {
 385+ $revision = new WikiRevision;
 386+
 387+ $revision->setID( $logInfo['id'] );
 388+ $revision->setType( $logInfo['type'] );
 389+ $revision->setAction( $logInfo['action'] );
 390+ $revision->setTimestamp( $logInfo['timestamp'] );
 391+ $revision->setParams( $logInfo['params'] );
 392+ $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) );
 393+
 394+ if ( isset( $logInfo['comment'] ) ) {
 395+ $revision->setComment( $logInfo['comment'] );
 396+ }
 397+
 398+ if ( isset( $logInfo['contributor']['ip'] ) ) {
 399+ $revision->setUserIP( $logInfo['contributor']['ip'] );
 400+ }
 401+ if ( isset( $logInfo['contributor']['username'] ) ) {
 402+ $revision->setUserName( $logInfo['contributor']['username'] );
 403+ }
 404+
 405+ return $this->logItemCallback( $revision );
 406+ }
 407+
 408+ function handlePage() {
 409+ // Handle page data.
 410+ $this->debug( "Enter page handler." );
 411+ $pageInfo = array( 'revisionCount' => 0, 'successfulRevisionCount' => 0 );
 412+
 413+ // Fields that can just be stuffed in the pageInfo object
 414+ $normalFields = array( 'title', 'id', 'redirect', 'restrictions' );
 415+
 416+ $skip = false;
 417+ $badTitle = false;
 418+
 419+ while ( $skip ? $this->reader->next() : $this->reader->read() ) {
 420+ if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
 421+ $this->reader->name == 'page') {
 422+ break;
 423+ }
 424+
 425+ $tag = $this->reader->name;
 426+
 427+ if ( $badTitle ) {
 428+ // The title is invalid, bail out of this page
 429+ $skip = true;
 430+ } elseif ( !wfRunHooks( 'ImportHandlePageXMLTag', $this->reader,
 431+ &$pageInfo ) ) {
 432+ // Do nothing
 433+ } if ( in_array( $tag, $normalFields ) ) {
 434+ $pageInfo[$tag] = $this->nodeContents();
 435+ if ( $tag == 'title' ) {
 436+ $title = $this->processTitle( $pageInfo['title'] );
 437+
 438+ if ( !$title ) {
 439+ $badTitle = true;
 440+ $skip = true;
 441+ }
 442+
 443+ $this->pageCallback( $title );
 444+ list( $pageInfo['_title'], $origTitle ) = $title;
 445+ }
 446+ } elseif ( $tag == 'revision' ) {
 447+ $this->handleRevision( $pageInfo );
 448+ } elseif ( $tag == 'upload' ) {
 449+ $this->handleUpload( $pageInfo );
 450+ } elseif ( $tag != '#text' ) {
 451+ $this->warn( "Unhandled page XML tag $tag" );
 452+ $skip = true;
 453+ }
 454+ }
 455+
 456+ $this->pageOutCallback( $pageInfo['_title'], $origTitle,
 457+ $pageInfo['revisionCount'],
 458+ $pageInfo['successfulRevisionCount'] );
 459+ }
 460+
 461+ function handleRevision( &$pageInfo ) {
 462+ $this->debug( "Enter revision handler" );
 463+ $revisionInfo = array();
 464+
 465+ $normalFields = array( 'id', 'timestamp', 'comment', 'minor', 'text' );
 466+
 467+ $skip = false;
 468+
 469+ while ( $skip ? $this->reader->next() : $this->reader->read() ) {
 470+ if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
 471+ $this->reader->name == 'revision') {
 472+ break;
 473+ }
 474+
 475+ $tag = $this->reader->name;
 476+
 477+ if ( !wfRunHooks( 'ImportHandleRevisionXMLTag', $this->reader,
 478+ &$pageInfo, &$revisionInfo ) ) {
 479+ // Do nothing
 480+ } if ( in_array( $tag, $normalFields ) ) {
 481+ $revisionInfo[$tag] = $this->nodeContents();
 482+ } elseif ( $tag == 'contributor' ) {
 483+ $revisionInfo['contributor'] = $this->handleContributor();
 484+ } elseif ( $tag != '#text' ) {
 485+ $this->warn( "Unhandled revision XML tag $tag" );
 486+ $skip = true;
 487+ }
 488+ }
 489+
 490+ $pageInfo['revisionCount']++;
 491+ if ( $this->processRevision( $pageInfo, $revisionInfo ) ) {
 492+ $pageInfo['successfulRevisionCount']++;
 493+ }
 494+ }
 495+
 496+ function processRevision( $pageInfo, $revisionInfo ) {
 497+ $revision = new WikiRevision;
 498+
 499+ $revision->setID( $revisionInfo['id'] );
 500+ $revision->setText( $revisionInfo['text'] );
 501+ $revision->setTitle( $pageInfo['_title'] );
 502+ $revision->setTimestamp( $revisionInfo['timestamp'] );
 503+
 504+ if ( isset( $revisionInfo['comment'] ) ) {
 505+ $revision->setComment( $revisionInfo['comment'] );
 506+ }
 507+
 508+ if ( isset( $revisionInfo['minor'] ) )
 509+ $revision->setMinor( true );
 510+
 511+ if ( isset( $revisionInfo['contributor']['ip'] ) ) {
 512+ $revision->setUserIP( $revisionInfo['contributor']['ip'] );
 513+ }
 514+ if ( isset( $revisionInfo['contributor']['username'] ) ) {
 515+ $revision->setUserName( $revisionInfo['contributor']['username'] );
 516+ }
 517+
 518+ return $this->revisionCallback( $revision );
 519+ }
 520+
 521+ function handleUpload( &$pageInfo ) {
 522+ $this->debug( "Enter upload handler" );
 523+ $uploadInfo = array();
 524+
 525+ $normalFields = array( 'timestamp', 'comment', 'filename', 'text',
 526+ 'src', 'size' );
 527+
 528+ $skip = false;
 529+
 530+ while ( $skip ? $this->reader->next() : $this->reader->read() ) {
 531+ if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
 532+ $this->reader->name == 'upload') {
 533+ break;
 534+ }
 535+
 536+ $tag = $this->reader->name;
 537+
 538+ if ( !wfRunHooks( 'ImportHandleUploadXMLTag', $this->reader,
 539+ &$pageInfo, &$revisionInfo ) ) {
 540+ // Do nothing
 541+ } if ( in_array( $tag, $normalFields ) ) {
 542+ $uploadInfo[$tag] = $this->nodeContents();
 543+ } elseif ( $tag == 'contributor' ) {
 544+ $uploadInfo['contributor'] = $this->handleContributor();
 545+ } elseif ( $tag != '#text' ) {
 546+ $this->warn( "Unhandled upload XML tag $tag" );
 547+ $skip = true;
 548+ }
 549+ }
 550+
 551+ return $this->processUpload( $pageInfo, $uploadInfo );
 552+ }
 553+
 554+ function processUpload( $pageInfo, $uploadInfo ) {
 555+ $revision = new WikiRevision;
 556+
 557+ $revision->setTitle( $pageInfo['_title'] );
 558+ $revision->setID( $uploadInfo['id'] );
 559+ $revision->setTimestamp( $uploadInfo['timestamp'] );
 560+ $revision->setText( $uploadInfo['text'] );
 561+ $revision->setFilename( $uploadInfo['filename'] );
 562+ $revision->setSrc( $uploadInfo['src'] );
 563+ $revision->setSize( intval( $uploadInfo['size'] ) );
 564+ $revision->setComment( $uploadInfo['comment'] );
 565+
 566+ if ( isset( $uploadInfo['contributor']['ip'] ) ) {
 567+ $revision->setUserIP( $revisionInfo['contributor']['ip'] );
 568+ }
 569+ if ( isset( $uploadInfo['contributor']['username'] ) ) {
 570+ $revision->setUserName( $revisionInfo['contributor']['username'] );
 571+ }
 572+
 573+ return $this->uploadCallback( $revision );
 574+ }
 575+
 576+ function handleContributor() {
 577+ $fields = array( 'id', 'ip', 'username' );
 578+ $info = array();
 579+
 580+ while ( $this->reader->read() ) {
 581+ if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
 582+ $this->reader->name == 'contributor') {
 583+ break;
 584+ }
 585+
 586+ $tag = $this->reader->name;
 587+
 588+ if ( in_array( $tag, $fields ) ) {
 589+ $info[$tag] = $this->nodeContents();
 590+ }
 591+ }
 592+
 593+ return $info;
 594+ }
 595+
 596+ function processTitle( $text ) {
 597+ $workTitle = $text;
 598+ $origTitle = Title::newFromText( $workTitle );
 599+ $title = null;
 600+
 601+ if( !is_null( $this->mTargetNamespace ) && !is_null( $origTitle ) ) {
 602+ $title = Title::makeTitle( $this->mTargetNamespace,
 603+ $origTitle->getDBkey() );
 604+ } else {
 605+ $title = Title::newFromText( $workTitle );
 606+ }
 607+
 608+ if( is_null( $title ) ) {
 609+ // Invalid page title? Ignore the page
 610+ $this->notice( "Skipping invalid page title '$workTitle'" );
 611+ } elseif( $title->getInterwiki() != '' ) {
 612+ $this->notice( "Skipping interwiki page title '$workTitle'" );
 613+ $title = null;
 614+ }
 615+
 616+ return array( $origTitle, $title );
 617+ }
 618+}
 619+
 620+class UploadSourceAdapter {
 621+ static $sourceRegistrations = array();
 622+
 623+ private $mSource;
 624+ private $mBuffer;
 625+ private $mPosition;
 626+
 627+ static function registerSource( $source ) {
 628+ $id = wfGenerateToken();
 629+
 630+ self::$sourceRegistrations[$id] = $source;
 631+
 632+ return $id;
 633+ }
 634+
 635+ function stream_open( $path, $mode, $options, &$opened_path ) {
 636+ $url = parse_url($path);
 637+ $id = $url['host'];
 638+
 639+ if ( !isset( self::$sourceRegistrations[$id] ) ) {
 640+ return false;
 641+ }
 642+
 643+ $this->mSource = self::$sourceRegistrations[$id];
 644+
 645+ return true;
 646+ }
 647+
 648+ function stream_read( $count ) {
 649+ $return = '';
 650+ $leave = false;
 651+
 652+ while ( !$leave && !$this->mSource->atEnd() &&
 653+ count($this->mBuffer) < $count ) {
 654+ $read = $this->mSource->readChunk();
 655+
 656+ if ( !count($read) ) {
 657+ $leave = true;
 658+ }
 659+
 660+ $this->mBuffer .= $read;
 661+ }
 662+
 663+ if ( count($this->mBuffer) ) {
 664+ $return = substr( $this->mBuffer, 0, $count );
 665+ $this->mBuffer = substr( $this->mBuffer, $count );
 666+ }
 667+
 668+ $this->mPosition += strlen($return);
 669+
 670+ return $return;
 671+ }
 672+
 673+ function stream_write( $data ) {
 674+ return false;
 675+ }
 676+
 677+ function stream_tell() {
 678+ return $this->mPosition;
 679+ }
 680+
 681+ function stream_eof() {
 682+ return $this->mSource->atEnd();
 683+ }
 684+
 685+ function url_stat() {
 686+ $result = array();
 687+
 688+ $result['dev'] = $result[0] = 0;
 689+ $result['ino'] = $result[1] = 0;
 690+ $result['mode'] = $result[2] = 0;
 691+ $result['nlink'] = $result[3] = 0;
 692+ $result['uid'] = $result[4] = 0;
 693+ $result['gid'] = $result[5] = 0;
 694+ $result['rdev'] = $result[6] = 0;
 695+ $result['size'] = $result[7] = 0;
 696+ $result['atime'] = $result[8] = 0;
 697+ $result['mtime'] = $result[9] = 0;
 698+ $result['ctime'] = $result[10] = 0;
 699+ $result['blksize'] = $result[11] = 0;
 700+ $result['blocks'] = $result[12] = 0;
 701+
 702+ return $result;
 703+ }
 704+}
Index: trunk/phase3/includes/specials/SpecialImport.php
@@ -107,7 +107,7 @@
108108 } else {
109109 $wgOut->addWikiMsg( "importstart" );
110110
111 - $importer = new WikiImporter( $source );
 111+ $importer = new ImportXMLReader( $source );
112112 if( !is_null( $this->namespace ) ) {
113113 $importer->setTargetNamespace( $this->namespace );
114114 }
@@ -274,9 +274,13 @@
275275 */
276276 class ImportReporter {
277277 private $reason=false;
 278+ private $mOriginalLogCallback = null;
 279+ private $mLogItemCount = 0;
278280
279281 function __construct( $importer, $upload, $interwiki , $reason=false ) {
280282 $importer->setPageOutCallback( array( $this, 'reportPage' ) );
 283+ $this->mOriginalLogCallback =
 284+ $importer->setLogItemCallback( array( $this, 'reportLogItem' ) );
281285 $this->mPageCount = 0;
282286 $this->mIsUpload = $upload;
283287 $this->mInterwiki = $interwiki;
@@ -287,6 +291,13 @@
288292 global $wgOut;
289293 $wgOut->addHTML( "<ul>\n" );
290294 }
 295+
 296+ function reportLogItem( /* ... */ ) {
 297+ $this->mLogItemCount++;
 298+ if ( is_callable( $this->mOriginalLogCallback ) ) {
 299+ call_user_func_array( $this->mOriginalLogCallback, func_get_args() );
 300+ }
 301+ }
291302
292303 function reportPage( $title, $origTitle, $revisionCount, $successCount ) {
293304 global $wgOut, $wgUser, $wgLang, $wgContLang;
@@ -340,7 +351,12 @@
341352
342353 function close() {
343354 global $wgOut;
344 - if( $this->mPageCount == 0 ) {
 355+
 356+ if ( $this->mLogItemCount > 0 ) {
 357+ $msg = wfMsgExt( 'imported-log-entries', 'parseinline',
 358+ $this->mLogItemCount );
 359+ $wgOut->addHTML( Xml::tags( 'li', null, $msg ) );
 360+ } elseif( $this->mPageCount == 0 && $this->mLogItemCount == 0 ) {
345361 $wgOut->addHTML( "</ul>\n" );
346362 return new WikiErrorMsg( "importnopages" );
347363 }
Index: trunk/phase3/includes/Import.php
@@ -371,658 +371,6 @@
372372 }
373373
374374 /**
375 - * implements Special:Import
376 - * @ingroup SpecialPage
377 - */
378 -class WikiImporter {
379 - var $mDebug = false;
380 - var $mSource = null;
381 - var $mPageCallback = null;
382 - var $mPageOutCallback = null;
383 - var $mRevisionCallback = null;
384 - var $mLogItemCallback = null;
385 - var $mUploadCallback = null;
386 - var $mTargetNamespace = null;
387 - var $mXmlNamespace = false;
388 - var $lastfield;
389 - var $tagStack = array();
390 -
391 - function __construct( $source ) {
392 - $this->setRevisionCallback( array( $this, "importRevision" ) );
393 - $this->setUploadCallback( array( $this, "importUpload" ) );
394 - $this->setLogItemCallback( array( $this, "importLogItem" ) );
395 - $this->mSource = $source;
396 - }
397 -
398 - function throwXmlError( $err ) {
399 - $this->debug( "FAILURE: $err" );
400 - wfDebug( "WikiImporter XML error: $err\n" );
401 - }
402 -
403 - function handleXmlNamespace ( $parser, $data, $prefix=false, $uri=false ) {
404 - if( preg_match( '/www.mediawiki.org/',$prefix ) ) {
405 - $prefix = str_replace( '/','\/',$prefix );
406 - $this->mXmlNamespace='/^'.$prefix.':/';
407 - }
408 - }
409 -
410 - function stripXmlNamespace($name) {
411 - if( $this->mXmlNamespace ) {
412 - return(preg_replace($this->mXmlNamespace,'',$name,1));
413 - }
414 - else {
415 - return($name);
416 - }
417 - }
418 -
419 - # --------------
420 -
421 - function doImport() {
422 - if( empty( $this->mSource ) ) {
423 - return new WikiErrorMsg( "importnotext" );
424 - }
425 -
426 - $parser = xml_parser_create_ns( "UTF-8" );
427 -
428 - # case folding violates XML standard, turn it off
429 - xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
430 -
431 - xml_set_object( $parser, $this );
432 - xml_set_element_handler( $parser, "in_start", "" );
433 - xml_set_start_namespace_decl_handler( $parser, "handleXmlNamespace" );
434 -
435 - $offset = 0; // for context extraction on error reporting
436 - do {
437 - $chunk = $this->mSource->readChunk();
438 - if( !xml_parse( $parser, $chunk, $this->mSource->atEnd() ) ) {
439 - wfDebug( "WikiImporter::doImport encountered XML parsing error\n" );
440 - return new WikiXmlError( $parser, wfMsgHtml( 'import-parse-failure' ), $chunk, $offset );
441 - }
442 - $offset += strlen( $chunk );
443 - } while( $chunk !== false && !$this->mSource->atEnd() );
444 - xml_parser_free( $parser );
445 -
446 - return true;
447 - }
448 -
449 - function debug( $data ) {
450 - if( $this->mDebug ) {
451 - wfDebug( "IMPORT: $data\n" );
452 - }
453 - }
454 -
455 - function notice( $data ) {
456 - global $wgCommandLineMode;
457 - if( $wgCommandLineMode ) {
458 - print "$data\n";
459 - } else {
460 - global $wgOut;
461 - $wgOut->addHTML( "<li>" . htmlspecialchars( $data ) . "</li>\n" );
462 - }
463 - }
464 -
465 - /**
466 - * Set debug mode...
467 - */
468 - function setDebug( $debug ) {
469 - $this->mDebug = $debug;
470 - }
471 -
472 - /**
473 - * Sets the action to perform as each new page in the stream is reached.
474 - * @param $callback callback
475 - * @return callback
476 - */
477 - function setPageCallback( $callback ) {
478 - $previous = $this->mPageCallback;
479 - $this->mPageCallback = $callback;
480 - return $previous;
481 - }
482 -
483 - /**
484 - * Sets the action to perform as each page in the stream is completed.
485 - * Callback accepts the page title (as a Title object), a second object
486 - * with the original title form (in case it's been overridden into a
487 - * local namespace), and a count of revisions.
488 - *
489 - * @param $callback callback
490 - * @return callback
491 - */
492 - function setPageOutCallback( $callback ) {
493 - $previous = $this->mPageOutCallback;
494 - $this->mPageOutCallback = $callback;
495 - return $previous;
496 - }
497 -
498 - /**
499 - * Sets the action to perform as each page revision is reached.
500 - * @param $callback callback
501 - * @return callback
502 - */
503 - function setRevisionCallback( $callback ) {
504 - $previous = $this->mRevisionCallback;
505 - $this->mRevisionCallback = $callback;
506 - return $previous;
507 - }
508 -
509 - /**
510 - * Sets the action to perform as each file upload version is reached.
511 - * @param $callback callback
512 - * @return callback
513 - */
514 - function setUploadCallback( $callback ) {
515 - $previous = $this->mUploadCallback;
516 - $this->mUploadCallback = $callback;
517 - return $previous;
518 - }
519 -
520 - /**
521 - * Sets the action to perform as each log item reached.
522 - * @param $callback callback
523 - * @return callback
524 - */
525 - function setLogItemCallback( $callback ) {
526 - $previous = $this->mLogItemCallback;
527 - $this->mLogItemCallback = $callback;
528 - return $previous;
529 - }
530 -
531 - /**
532 - * Set a target namespace to override the defaults
533 - */
534 - function setTargetNamespace( $namespace ) {
535 - if( is_null( $namespace ) ) {
536 - // Don't override namespaces
537 - $this->mTargetNamespace = null;
538 - } elseif( $namespace >= 0 ) {
539 - // FIXME: Check for validity
540 - $this->mTargetNamespace = intval( $namespace );
541 - } else {
542 - return false;
543 - }
544 - }
545 -
546 - /**
547 - * Default per-revision callback, performs the import.
548 - * @param $revision WikiRevision
549 - * @private
550 - */
551 - function importRevision( $revision ) {
552 - $dbw = wfGetDB( DB_MASTER );
553 - return $dbw->deadlockLoop( array( $revision, 'importOldRevision' ) );
554 - }
555 -
556 - /**
557 - * Default per-revision callback, performs the import.
558 - * @param $rev WikiRevision
559 - * @private
560 - */
561 - function importLogItem( $rev ) {
562 - $dbw = wfGetDB( DB_MASTER );
563 - return $dbw->deadlockLoop( array( $rev, 'importLogItem' ) );
564 - }
565 -
566 - /**
567 - * Dummy for now...
568 - */
569 - function importUpload( $revision ) {
570 - //$dbw = wfGetDB( DB_MASTER );
571 - //return $dbw->deadlockLoop( array( $revision, 'importUpload' ) );
572 - return false;
573 - }
574 -
575 - /**
576 - * Alternate per-revision callback, for debugging.
577 - * @param $revision WikiRevision
578 - * @private
579 - */
580 - function debugRevisionHandler( &$revision ) {
581 - $this->debug( "Got revision:" );
582 - if( is_object( $revision->title ) ) {
583 - $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
584 - } else {
585 - $this->debug( "-- Title: <invalid>" );
586 - }
587 - $this->debug( "-- User: " . $revision->user_text );
588 - $this->debug( "-- Timestamp: " . $revision->timestamp );
589 - $this->debug( "-- Comment: " . $revision->comment );
590 - $this->debug( "-- Text: " . $revision->text );
591 - }
592 -
593 - /**
594 - * Notify the callback function when a new <page> is reached.
595 - * @param $title Title
596 - * @private
597 - */
598 - function pageCallback( $title ) {
599 - if( is_callable( $this->mPageCallback ) ) {
600 - call_user_func( $this->mPageCallback, $title );
601 - }
602 - }
603 -
604 - /**
605 - * Notify the callback function when a </page> is closed.
606 - * @param $title Title
607 - * @param $origTitle Title
608 - * @param $revisionCount int
609 - * @param $successCount Int: number of revisions for which callback returned true
610 - * @private
611 - */
612 - function pageOutCallback( $title, $origTitle, $revisionCount, $successCount ) {
613 - if( is_callable( $this->mPageOutCallback ) ) {
614 - call_user_func( $this->mPageOutCallback, $title, $origTitle,
615 - $revisionCount, $successCount );
616 - }
617 - }
618 -
619 - # XML parser callbacks from here out -- beware!
620 - function donothing( $parser, $x, $y="" ) {
621 - #$this->debug( "donothing" );
622 - }
623 -
624 - function in_start( $parser, $name, $attribs ) {
625 - $name = $this->stripXmlNamespace($name);
626 - $this->debug( "in_start $name" );
627 - if( $name != "mediawiki" ) {
628 - return $this->throwXMLerror( "Expected <mediawiki>, got <$name>" );
629 - }
630 - xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
631 - }
632 -
633 - function in_mediawiki( $parser, $name, $attribs ) {
634 - $name = $this->stripXmlNamespace($name);
635 - $this->debug( "in_mediawiki $name" );
636 - if( $name == 'siteinfo' ) {
637 - xml_set_element_handler( $parser, "in_siteinfo", "out_siteinfo" );
638 - } elseif( $name == 'page' ) {
639 - $this->push( $name );
640 - $this->workRevisionCount = 0;
641 - $this->workSuccessCount = 0;
642 - $this->uploadCount = 0;
643 - $this->uploadSuccessCount = 0;
644 - xml_set_element_handler( $parser, "in_page", "out_page" );
645 - } elseif( $name == 'logitem' ) {
646 - $this->push( $name );
647 - $this->workRevision = new WikiRevision;
648 - xml_set_element_handler( $parser, "in_logitem", "out_logitem" );
649 - } else {
650 - return $this->throwXMLerror( "Expected <page>, got <$name>" );
651 - }
652 - }
653 - function out_mediawiki( $parser, $name ) {
654 - $name = $this->stripXmlNamespace($name);
655 - $this->debug( "out_mediawiki $name" );
656 - if( $name != "mediawiki" ) {
657 - return $this->throwXMLerror( "Expected </mediawiki>, got </$name>" );
658 - }
659 - xml_set_element_handler( $parser, "donothing", "donothing" );
660 - }
661 -
662 -
663 - function in_siteinfo( $parser, $name, $attribs ) {
664 - // no-ops for now
665 - $name = $this->stripXmlNamespace($name);
666 - $this->debug( "in_siteinfo $name" );
667 - switch( $name ) {
668 - case "sitename":
669 - case "base":
670 - case "generator":
671 - case "case":
672 - case "namespaces":
673 - case "namespace":
674 - break;
675 - default:
676 - return $this->throwXMLerror( "Element <$name> not allowed in <siteinfo>." );
677 - }
678 - }
679 -
680 - function out_siteinfo( $parser, $name ) {
681 - $name = $this->stripXmlNamespace($name);
682 - if( $name == "siteinfo" ) {
683 - xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
684 - }
685 - }
686 -
687 -
688 - function in_page( $parser, $name, $attribs ) {
689 - $name = $this->stripXmlNamespace($name);
690 - $this->debug( "in_page $name" );
691 - switch( $name ) {
692 - case "id":
693 - case "title":
694 - case "redirect":
695 - case "restrictions":
696 - $this->appendfield = $name;
697 - $this->appenddata = "";
698 - xml_set_element_handler( $parser, "in_nothing", "out_append" );
699 - xml_set_character_data_handler( $parser, "char_append" );
700 - break;
701 - case "revision":
702 - $this->push( "revision" );
703 - if( is_object( $this->pageTitle ) ) {
704 - $this->workRevision = new WikiRevision;
705 - $this->workRevision->setTitle( $this->pageTitle );
706 - $this->workRevisionCount++;
707 - } else {
708 - // Skipping items due to invalid page title
709 - $this->workRevision = null;
710 - }
711 - xml_set_element_handler( $parser, "in_revision", "out_revision" );
712 - break;
713 - case "upload":
714 - $this->push( "upload" );
715 - if( is_object( $this->pageTitle ) ) {
716 - $this->workRevision = new WikiRevision;
717 - $this->workRevision->setTitle( $this->pageTitle );
718 - $this->uploadCount++;
719 - } else {
720 - // Skipping items due to invalid page title
721 - $this->workRevision = null;
722 - }
723 - xml_set_element_handler( $parser, "in_upload", "out_upload" );
724 - break;
725 - default:
726 - return $this->throwXMLerror( "Element <$name> not allowed in a <page>." );
727 - }
728 - }
729 -
730 - function out_page( $parser, $name ) {
731 - $name = $this->stripXmlNamespace($name);
732 - $this->debug( "out_page $name" );
733 - $this->pop();
734 - if( $name != "page" ) {
735 - return $this->throwXMLerror( "Expected </page>, got </$name>" );
736 - }
737 - xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
738 -
739 - $this->pageOutCallback( $this->pageTitle, $this->origTitle,
740 - $this->workRevisionCount, $this->workSuccessCount );
741 -
742 - $this->workTitle = null;
743 - $this->workRevision = null;
744 - $this->workRevisionCount = 0;
745 - $this->workSuccessCount = 0;
746 - $this->pageTitle = null;
747 - $this->origTitle = null;
748 - }
749 -
750 - function in_nothing( $parser, $name, $attribs ) {
751 - $name = $this->stripXmlNamespace($name);
752 - $this->debug( "in_nothing $name" );
753 - return $this->throwXMLerror( "No child elements allowed here; got <$name>" );
754 - }
755 -
756 - function char_append( $parser, $data ) {
757 - $this->debug( "char_append '$data'" );
758 - $this->appenddata .= $data;
759 - }
760 -
761 - function out_append( $parser, $name ) {
762 - $name = $this->stripXmlNamespace($name);
763 - $this->debug( "out_append $name" );
764 - if( $name != $this->appendfield ) {
765 - return $this->throwXMLerror( "Expected </{$this->appendfield}>, got </$name>" );
766 - }
767 -
768 - switch( $this->appendfield ) {
769 - case "title":
770 - $this->workTitle = $this->appenddata;
771 - $this->origTitle = Title::newFromText( $this->workTitle );
772 - if( !is_null( $this->mTargetNamespace ) && !is_null( $this->origTitle ) ) {
773 - $this->pageTitle = Title::makeTitle( $this->mTargetNamespace,
774 - $this->origTitle->getDBkey() );
775 - } else {
776 - $this->pageTitle = Title::newFromText( $this->workTitle );
777 - }
778 - if( is_null( $this->pageTitle ) ) {
779 - // Invalid page title? Ignore the page
780 - $this->notice( "Skipping invalid page title '$this->workTitle'" );
781 - } elseif( $this->pageTitle->getInterwiki() != '' ) {
782 - $this->notice( "Skipping interwiki page title '$this->workTitle'" );
783 - $this->pageTitle = null;
784 - } else {
785 - $this->pageCallback( $this->workTitle );
786 - }
787 - break;
788 - case "id":
789 - if ( $this->parentTag() == 'revision' || $this->parentTag() == 'logitem' ) {
790 - if( $this->workRevision )
791 - $this->workRevision->setID( $this->appenddata );
792 - }
793 - break;
794 - case "text":
795 - if( $this->workRevision )
796 - $this->workRevision->setText( $this->appenddata );
797 - break;
798 - case "username":
799 - if( $this->workRevision )
800 - $this->workRevision->setUsername( $this->appenddata );
801 - break;
802 - case "ip":
803 - if( $this->workRevision )
804 - $this->workRevision->setUserIP( $this->appenddata );
805 - break;
806 - case "timestamp":
807 - if( $this->workRevision )
808 - $this->workRevision->setTimestamp( $this->appenddata );
809 - break;
810 - case "comment":
811 - if( $this->workRevision )
812 - $this->workRevision->setComment( $this->appenddata );
813 - break;
814 - case "type":
815 - if( $this->workRevision )
816 - $this->workRevision->setType( $this->appenddata );
817 - break;
818 - case "action":
819 - if( $this->workRevision )
820 - $this->workRevision->setAction( $this->appenddata );
821 - break;
822 - case "logtitle":
823 - if( $this->workRevision )
824 - $this->workRevision->setTitle( Title::newFromText( $this->appenddata ) );
825 - break;
826 - case "params":
827 - if( $this->workRevision )
828 - $this->workRevision->setParams( $this->appenddata );
829 - break;
830 - case "minor":
831 - if( $this->workRevision )
832 - $this->workRevision->setMinor( true );
833 - break;
834 - case "filename":
835 - if( $this->workRevision )
836 - $this->workRevision->setFilename( $this->appenddata );
837 - break;
838 - case "src":
839 - if( $this->workRevision )
840 - $this->workRevision->setSrc( $this->appenddata );
841 - break;
842 - case "size":
843 - if( $this->workRevision )
844 - $this->workRevision->setSize( intval( $this->appenddata ) );
845 - break;
846 - default:
847 - $this->debug( "Bad append: {$this->appendfield}" );
848 - }
849 - $this->appendfield = "";
850 - $this->appenddata = "";
851 -
852 - $parent = $this->parentTag();
853 - xml_set_element_handler( $parser, "in_$parent", "out_$parent" );
854 - xml_set_character_data_handler( $parser, "donothing" );
855 - }
856 -
857 - function in_revision( $parser, $name, $attribs ) {
858 - $name = $this->stripXmlNamespace($name);
859 - $this->debug( "in_revision $name" );
860 - switch( $name ) {
861 - case "id":
862 - case "timestamp":
863 - case "comment":
864 - case "minor":
865 - case "text":
866 - $this->appendfield = $name;
867 - xml_set_element_handler( $parser, "in_nothing", "out_append" );
868 - xml_set_character_data_handler( $parser, "char_append" );
869 - break;
870 - case "contributor":
871 - $this->push( "contributor" );
872 - xml_set_element_handler( $parser, "in_contributor", "out_contributor" );
873 - break;
874 - default:
875 - return $this->throwXMLerror( "Element <$name> not allowed in a <revision>." );
876 - }
877 - }
878 -
879 - function out_revision( $parser, $name ) {
880 - $name = $this->stripXmlNamespace($name);
881 - $this->debug( "out_revision $name" );
882 - $this->pop();
883 - if( $name != "revision" ) {
884 - return $this->throwXMLerror( "Expected </revision>, got </$name>" );
885 - }
886 - xml_set_element_handler( $parser, "in_page", "out_page" );
887 -
888 - if( $this->workRevision ) {
889 - $ok = call_user_func_array( $this->mRevisionCallback,
890 - array( $this->workRevision, $this ) );
891 - if( $ok ) {
892 - $this->workSuccessCount++;
893 - }
894 - }
895 - }
896 -
897 - function in_logitem( $parser, $name, $attribs ) {
898 - $name = $this->stripXmlNamespace($name);
899 - $this->debug( "in_logitem $name" );
900 - switch( $name ) {
901 - case "id":
902 - case "timestamp":
903 - case "comment":
904 - case "type":
905 - case "action":
906 - case "logtitle":
907 - case "params":
908 - $this->appendfield = $name;
909 - xml_set_element_handler( $parser, "in_nothing", "out_append" );
910 - xml_set_character_data_handler( $parser, "char_append" );
911 - break;
912 - case "contributor":
913 - $this->push( "contributor" );
914 - xml_set_element_handler( $parser, "in_contributor", "out_contributor" );
915 - break;
916 - default:
917 - return $this->throwXMLerror( "Element <$name> not allowed in a <revision>." );
918 - }
919 - }
920 -
921 - function out_logitem( $parser, $name ) {
922 - $name = $this->stripXmlNamespace($name);
923 - $this->debug( "out_logitem $name" );
924 - $this->pop();
925 - if( $name != "logitem" ) {
926 - return $this->throwXMLerror( "Expected </logitem>, got </$name>" );
927 - }
928 - xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
929 -
930 - if( $this->workRevision ) {
931 - $ok = call_user_func_array( $this->mLogItemCallback,
932 - array( $this->workRevision, $this ) );
933 - if( $ok ) {
934 - $this->workSuccessCount++;
935 - }
936 - }
937 - }
938 -
939 - function in_upload( $parser, $name, $attribs ) {
940 - $name = $this->stripXmlNamespace($name);
941 - $this->debug( "in_upload $name" );
942 - switch( $name ) {
943 - case "timestamp":
944 - case "comment":
945 - case "text":
946 - case "filename":
947 - case "src":
948 - case "size":
949 - $this->appendfield = $name;
950 - xml_set_element_handler( $parser, "in_nothing", "out_append" );
951 - xml_set_character_data_handler( $parser, "char_append" );
952 - break;
953 - case "contributor":
954 - $this->push( "contributor" );
955 - xml_set_element_handler( $parser, "in_contributor", "out_contributor" );
956 - break;
957 - default:
958 - return $this->throwXMLerror( "Element <$name> not allowed in an <upload>." );
959 - }
960 - }
961 -
962 - function out_upload( $parser, $name ) {
963 - $name = $this->stripXmlNamespace($name);
964 - $this->debug( "out_revision $name" );
965 - $this->pop();
966 - if( $name != "upload" ) {
967 - return $this->throwXMLerror( "Expected </upload>, got </$name>" );
968 - }
969 - xml_set_element_handler( $parser, "in_page", "out_page" );
970 -
971 - if( $this->workRevision ) {
972 - $ok = call_user_func_array( $this->mUploadCallback,
973 - array( $this->workRevision, $this ) );
974 - if( $ok ) {
975 - $this->workUploadSuccessCount++;
976 - }
977 - }
978 - }
979 -
980 - function in_contributor( $parser, $name, $attribs ) {
981 - $name = $this->stripXmlNamespace($name);
982 - $this->debug( "in_contributor $name" );
983 - switch( $name ) {
984 - case "username":
985 - case "ip":
986 - case "id":
987 - $this->appendfield = $name;
988 - xml_set_element_handler( $parser, "in_nothing", "out_append" );
989 - xml_set_character_data_handler( $parser, "char_append" );
990 - break;
991 - default:
992 - $this->throwXMLerror( "Invalid tag <$name> in <contributor>" );
993 - }
994 - }
995 -
996 - function out_contributor( $parser, $name ) {
997 - $name = $this->stripXmlNamespace($name);
998 - $this->debug( "out_contributor $name" );
999 - $this->pop();
1000 - if( $name != "contributor" ) {
1001 - return $this->throwXMLerror( "Expected </contributor>, got </$name>" );
1002 - }
1003 - $parent = $this->parentTag();
1004 - xml_set_element_handler( $parser, "in_$parent", "out_$parent" );
1005 - }
1006 -
1007 - private function push( $name ) {
1008 - array_push( $this->tagStack, $name );
1009 - $this->debug( "PUSH $name" );
1010 - }
1011 -
1012 - private function pop() {
1013 - $name = array_pop( $this->tagStack );
1014 - $this->debug( "POP $name" );
1015 - return $name;
1016 - }
1017 -
1018 - private function parentTag() {
1019 - $name = $this->tagStack[count( $this->tagStack ) - 1];
1020 - $this->debug( "PARENT $name" );
1021 - return $name;
1022 - }
1023 -
1024 -}
1025 -
1026 -/**
1027375 * @todo document (e.g. one-sentence class description).
1028376 * @ingroup SpecialPage
1029377 */
Index: trunk/phase3/languages/messages/MessagesEn.php
@@ -3203,6 +3203,7 @@
32043204 'importstart' => 'Importing pages...',
32053205 'import-revision-count' => '$1 {{PLURAL:$1|revision|revisions}}',
32063206 'importnopages' => 'No pages to import.',
 3207+'imported-log-entries' => 'Imported $1 {{PLURAL:$1|log entry|log entries}}.',
32073208 'importfailed' => 'Import failed: <nowiki>$1</nowiki>',
32083209 'importunknownsource' => 'Unknown import source type',
32093210 'importcantopen' => 'Could not open import file',

Follow-up revisions

RevisionCommit summaryAuthorDate
r66268Function accessibility changes, documentation, revert function rename for r66267werdna13:37, 12 May 2010
r66269Missing commits from r66268, revert class rename in r66267werdna13:38, 12 May 2010
r66271Follow-up r66267: Use formatNum for the number...raymond14:02, 12 May 2010
r68512Follow up r66267....platonides12:19, 24 June 2010
r68513Follow up r54225. DumpRenderer::handleRevision needs to be public in order to...platonides12:44, 24 June 2010
r79838Followup r66267, usage of $revisionInfo is undefined, but isset is on $upload...reedy19:51, 7 January 2011
r80507Fix r66267, make hook code consistent with documentation, fix E_NOTICEwerdna18:33, 18 January 2011
r81238Cleanup for r66268, r66267: merge WikiImporter back into Import.php, where it...tstarling07:07, 31 January 2011
r82482All functions of the importer have $title, origTitle, but processTitle() retu...hartman20:35, 19 February 2011
r82912Partial merge r82461, close() the reader when we're done with it....demon21:06, 27 February 2011

Comments

#Comment by Reedy (talk | contribs)   20:00, 7 January 2011

~Line 550 of includes/ImportXmlReader.php

			if ( !wfRunHooks( 'ImportHandleUploadXMLTag', $this->reader,
						$pageInfo, $revisionInfo ) ) {
				// Do nothing

$revisionInfo is undefined


Documenation added by IAlex in r68945

'ImportHandleUploadXMLTag': When parsing a XML tag in a file upload
$reader: XMLReader object
$revisionInfo: Array of information
Return false to stop further processing of the tag

One doesn't match the other

Will ping on documentation

#Comment by Werdna (talk | contribs)   00:23, 19 January 2011

Fixed.

#Comment by Tim Starling (talk | contribs)   08:01, 31 January 2011

Marking fixme due to "return $this->close();" in nodeContents(), this is probably unintended. Also noted on CR r68587. Otherwise good, this can be marked OK or resolved once nodeContents() is fixed.

#Comment by Tim Starling (talk | contribs)   04:19, 3 February 2011

Fixed in r81437.

Status & tagging log