r44246 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r44245‎ | r44246 | r44247 >
Date:02:16, 5 December 2008
Author:ariel
Status:ok (Comments)
Tags:
Comment:
handle xmlns for imports (bug #4520)
Modified paths:
  • /trunk/phase3/includes/Import.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/Import.php
@@ -383,6 +383,7 @@
384384 var $mLogItemCallback = null;
385385 var $mUploadCallback = null;
386386 var $mTargetNamespace = null;
 387+ var $mXmlNamespace = false;
387388 var $lastfield;
388389 var $tagStack = array();
389390
@@ -398,6 +399,22 @@
399400 wfDebug( "WikiImporter XML error: $err\n" );
400401 }
401402
 403+ function handleXmlNamespace ( $parser, $data, $prefix=false, $uri=false ) {
 404+ if( preg_match( '/www.mediawiki.org/',$prefix ) ) {
 405+ $prefix = str_replace( '/','\/',$prefix );
 406+ $this->mXmlNamespace='/^'.$prefix.':/';
 407+ }
 408+ }
 409+
 410+ function stripXmlNamespace($name) {
 411+ if( $this->mXmlNamespace ) {
 412+ return(preg_replace($this->mXmlNamespace,'',$name,1));
 413+ }
 414+ else {
 415+ return($name);
 416+ }
 417+ }
 418+
402419 # --------------
403420
404421 function doImport() {
@@ -405,13 +422,14 @@
406423 return new WikiErrorMsg( "importnotext" );
407424 }
408425
409 - $parser = xml_parser_create( "UTF-8" );
 426+ $parser = xml_parser_create_ns( "UTF-8" );
410427
411428 # case folding violates XML standard, turn it off
412429 xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
413430
414431 xml_set_object( $parser, $this );
415432 xml_set_element_handler( $parser, "in_start", "" );
 433+ xml_set_start_namespace_decl_handler( $parser, "handleXmlNamespace" );
416434
417435 $offset = 0; // for context extraction on error reporting
418436 do {
@@ -603,6 +621,7 @@
604622 }
605623
606624 function in_start( $parser, $name, $attribs ) {
 625+ $name = $this->stripXmlNamespace($name);
607626 $this->debug( "in_start $name" );
608627 if( $name != "mediawiki" ) {
609628 return $this->throwXMLerror( "Expected <mediawiki>, got <$name>" );
@@ -611,6 +630,7 @@
612631 }
613632
614633 function in_mediawiki( $parser, $name, $attribs ) {
 634+ $name = $this->stripXmlNamespace($name);
615635 $this->debug( "in_mediawiki $name" );
616636 if( $name == 'siteinfo' ) {
617637 xml_set_element_handler( $parser, "in_siteinfo", "out_siteinfo" );
@@ -630,6 +650,7 @@
631651 }
632652 }
633653 function out_mediawiki( $parser, $name ) {
 654+ $name = $this->stripXmlNamespace($name);
634655 $this->debug( "out_mediawiki $name" );
635656 if( $name != "mediawiki" ) {
636657 return $this->throwXMLerror( "Expected </mediawiki>, got </$name>" );
@@ -640,6 +661,7 @@
641662
642663 function in_siteinfo( $parser, $name, $attribs ) {
643664 // no-ops for now
 665+ $name = $this->stripXmlNamespace($name);
644666 $this->debug( "in_siteinfo $name" );
645667 switch( $name ) {
646668 case "sitename":
@@ -655,6 +677,7 @@
656678 }
657679
658680 function out_siteinfo( $parser, $name ) {
 681+ $name = $this->stripXmlNamespace($name);
659682 if( $name == "siteinfo" ) {
660683 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
661684 }
@@ -662,6 +685,7 @@
663686
664687
665688 function in_page( $parser, $name, $attribs ) {
 689+ $name = $this->stripXmlNamespace($name);
666690 $this->debug( "in_page $name" );
667691 switch( $name ) {
668692 case "id":
@@ -702,6 +726,7 @@
703727 }
704728
705729 function out_page( $parser, $name ) {
 730+ $name = $this->stripXmlNamespace($name);
706731 $this->debug( "out_page $name" );
707732 $this->pop();
708733 if( $name != "page" ) {
@@ -721,6 +746,7 @@
722747 }
723748
724749 function in_nothing( $parser, $name, $attribs ) {
 750+ $name = $this->stripXmlNamespace($name);
725751 $this->debug( "in_nothing $name" );
726752 return $this->throwXMLerror( "No child elements allowed here; got <$name>" );
727753 }
@@ -731,6 +757,7 @@
732758 }
733759
734760 function out_append( $parser, $name ) {
 761+ $name = $this->stripXmlNamespace($name);
735762 $this->debug( "out_append $name" );
736763 if( $name != $this->appendfield ) {
737764 return $this->throwXMLerror( "Expected </{$this->appendfield}>, got </$name>" );
@@ -823,6 +850,7 @@
824851 }
825852
826853 function in_revision( $parser, $name, $attribs ) {
 854+ $name = $this->stripXmlNamespace($name);
827855 $this->debug( "in_revision $name" );
828856 switch( $name ) {
829857 case "id":
@@ -844,6 +872,7 @@
845873 }
846874
847875 function out_revision( $parser, $name ) {
 876+ $name = $this->stripXmlNamespace($name);
848877 $this->debug( "out_revision $name" );
849878 $this->pop();
850879 if( $name != "revision" ) {
@@ -861,6 +890,7 @@
862891 }
863892
864893 function in_logitem( $parser, $name, $attribs ) {
 894+ $name = $this->stripXmlNamespace($name);
865895 $this->debug( "in_logitem $name" );
866896 switch( $name ) {
867897 case "id":
@@ -884,6 +914,7 @@
885915 }
886916
887917 function out_logitem( $parser, $name ) {
 918+ $name = $this->stripXmlNamespace($name);
888919 $this->debug( "out_logitem $name" );
889920 $this->pop();
890921 if( $name != "logitem" ) {
@@ -901,6 +932,7 @@
902933 }
903934
904935 function in_upload( $parser, $name, $attribs ) {
 936+ $name = $this->stripXmlNamespace($name);
905937 $this->debug( "in_upload $name" );
906938 switch( $name ) {
907939 case "timestamp":
@@ -923,6 +955,7 @@
924956 }
925957
926958 function out_upload( $parser, $name ) {
 959+ $name = $this->stripXmlNamespace($name);
927960 $this->debug( "out_revision $name" );
928961 $this->pop();
929962 if( $name != "upload" ) {
@@ -940,6 +973,7 @@
941974 }
942975
943976 function in_contributor( $parser, $name, $attribs ) {
 977+ $name = $this->stripXmlNamespace($name);
944978 $this->debug( "in_contributor $name" );
945979 switch( $name ) {
946980 case "username":
@@ -955,6 +989,7 @@
956990 }
957991
958992 function out_contributor( $parser, $name ) {
 993+ $name = $this->stripXmlNamespace($name);
959994 $this->debug( "out_contributor $name" );
960995 $this->pop();
961996 if( $name != "contributor" ) {

Comments

#Comment by Brion VIBBER (talk | contribs)   22:55, 5 December 2008

Couple of things...

First, I'd recommend doing a full check to match the namespace URLs against the prefix "http://www.mediawiki.org/xml/export", to make sure we don't get false positives with any other NSs we might happen to define.

Second, rather than saving a single matching namespace declaration and doing stripping on those, since in theory we might have multiple such declarations, just go ahead and strip all matching URL prefixes from the names whenever they come in.

Which reminds me, we really shouldn't be using the version numbers in the namespace URL here, I think. :) We'll want to fix that up separately, and use a non-versioned namespace in the future. (Versions should be indicated separately, say with a 'version=' attribute.)

And for future reference -- the 'str_replace( '/','\/',$prefix );' bit can be done more consistently with preg_quote($prefix, '/').

Status & tagging log