Index: trunk/phase3/includes/MimeMagic.php |
— | — | @@ -455,71 +455,20 @@ |
456 | 456 | /* |
457 | 457 | * look for XML formats (XHTML and SVG) |
458 | 458 | */ |
459 | | - $xml_type = NULL; |
460 | | - if ( substr( $head, 0, 5 ) == "<?xml" ) { |
461 | | - $xml_type = "ASCII"; |
462 | | - } elseif ( substr( $head, 0, 8 ) == "\xef\xbb\xbf<?xml") { |
463 | | - $xml_type = "UTF-8"; |
464 | | - } elseif ( substr( $head, 0, 12 ) == "\xfe\xff\x00<\x00?\x00x\x00m\x00l" ) { |
465 | | - $xml_type = "UTF-16BE"; |
466 | | - } elseif ( substr( $head, 0, 12 ) == "\xff\xfe<\x00?\x00x\x00m\x00l\x00") { |
467 | | - $xml_type = "UTF-16LE"; |
468 | | - } else { |
469 | | - /* |
470 | | - echo "WARNING: Undetected xml_type ...\n"; |
471 | | - for( $i = 0; $i < 10; $i++ ) { |
472 | | - $c = ord( $head{$i} ); |
473 | | - if( $c < 32 || $c > 126 ) { |
474 | | - printf( "\\x%02x", $c ); |
475 | | - } else { |
476 | | - print $head{$i}; |
477 | | - } |
478 | | - } |
479 | | - echo "\n"; |
480 | | - */ |
481 | | - } |
482 | | - |
483 | | - if( $xml_type == 'UTF-16BE' || $xml_type == 'UTF-16LE' ) { |
484 | | - // Quick and dirty fold down to ASCII! |
485 | | - $pack = array( 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' ); |
486 | | - $chars = unpack( $pack[$xml_type], substr( $head, 2 ) ); |
487 | | - $head = ''; |
488 | | - foreach( $chars as $codepoint ) { |
489 | | - if( $codepoint < 128 ) { |
490 | | - $head .= chr( $codepoint ); |
491 | | - } else { |
492 | | - $head .= '?'; |
493 | | - } |
494 | | - } |
495 | | - } |
496 | | - |
497 | | - $match = array(); |
498 | | - $doctype = ""; |
499 | | - $tag = ""; |
500 | | - |
501 | | - if ( preg_match( '%<!DOCTYPE\s+[\w-]+\s+PUBLIC\s+["'."'".'"](.*?)["'."'".'"].*>%siD', |
502 | | - $head, $match ) ) { |
503 | | - $doctype = $match[1]; |
504 | | - } |
505 | | - |
506 | | - if( $xml_type || $doctype ) { |
507 | | - if ( preg_match( '%<(\w+)\b%si', $head, $match ) ) { |
508 | | - $tag = $match[1]; |
509 | | - } |
510 | | - |
511 | | - #print "<br>ANALYSING $file: doctype= $doctype; tag= $tag<br>"; |
512 | | - |
513 | | - if ( strpos( $doctype, "-//W3C//DTD SVG" ) === 0 ) { |
514 | | - return "image/svg+xml"; |
515 | | - } elseif ( $tag === "svg" ) { |
516 | | - return "image/svg+xml"; |
517 | | - } elseif ( strpos( $doctype, "-//W3C//DTD XHTML" ) === 0 ) { |
518 | | - return "text/html"; |
519 | | - } elseif ( $tag === "html" ) { |
520 | | - return "text/html"; |
| 459 | + $xml = new XmlTypeCheck( $file ); |
| 460 | + if( $xml->wellFormed ) { |
| 461 | + $types = array( |
| 462 | + 'http://www.w3.org/2000/svg:svg' => 'image/svg+xml', |
| 463 | + 'svg' => 'image/svg+xml', |
| 464 | + 'http://www.w3.org/1999/xhtml:html' => 'text/html', // application/xhtml+xml? |
| 465 | + 'html' => 'text/html', // application/xhtml+xml? |
| 466 | + ); |
| 467 | + if( isset( $types[$xml->rootElement] ) ) { |
| 468 | + $mime = $types[$xml->rootElement]; |
| 469 | + return $mime; |
521 | 470 | } else { |
522 | 471 | /// Fixme -- this would be the place to allow additional XML type checks |
523 | | - return "application/xml"; |
| 472 | + return 'application/xml'; |
524 | 473 | } |
525 | 474 | } |
526 | 475 | |
— | — | @@ -541,7 +490,17 @@ |
542 | 491 | |
543 | 492 | if ( $script_type ) { |
544 | 493 | if ( $script_type !== "UTF-8" && $script_type !== "ASCII") { |
545 | | - $head = iconv( $script_type, "ASCII//IGNORE", $head); |
| 494 | + // Quick and dirty fold down to ASCII! |
| 495 | + $pack = array( 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' ); |
| 496 | + $chars = unpack( $pack[$script_type], substr( $head, 2 ) ); |
| 497 | + $head = ''; |
| 498 | + foreach( $chars as $codepoint ) { |
| 499 | + if( $codepoint < 128 ) { |
| 500 | + $head .= chr( $codepoint ); |
| 501 | + } else { |
| 502 | + $head .= '?'; |
| 503 | + } |
| 504 | + } |
546 | 505 | } |
547 | 506 | |
548 | 507 | $match = array(); |
Index: trunk/phase3/includes/AutoLoader.php |
— | — | @@ -271,6 +271,7 @@ |
272 | 272 | 'WikiErrorMsg' => 'includes/WikiError.php', |
273 | 273 | 'WikiXmlError' => 'includes/WikiError.php', |
274 | 274 | 'Xml' => 'includes/Xml.php', |
| 275 | + 'XmlTypeCheck' => 'includes/XmlTypeCheck.php', |
275 | 276 | 'ZhClient' => 'includes/ZhClient.php', |
276 | 277 | 'memcached' => 'includes/memcached-client.php', |
277 | 278 | 'EmaillingJob' => 'includes/JobQueue.php', |
Index: trunk/phase3/includes/XmlTypeCheck.php |
— | — | @@ -0,0 +1,93 @@ |
| 2 | +<?php |
| 3 | + |
| 4 | +class XmlTypeCheck { |
| 5 | + /** |
| 6 | + * Will be set to true or false to indicate whether the file is |
| 7 | + * well-formed XML. Note that this doesn't check schema validity. |
| 8 | + */ |
| 9 | + public $wellFormed = false; |
| 10 | + |
| 11 | + /** |
| 12 | + * Name of the document's root element, including any namespace |
| 13 | + * as an expanded URL. |
| 14 | + */ |
| 15 | + public $rootElement = ''; |
| 16 | + |
| 17 | + private $softNamespaces; |
| 18 | + private $namespaces = array(); |
| 19 | + |
| 20 | + /** |
| 21 | + * @param $file string filename |
| 22 | + * @param $softNamespaces bool |
| 23 | + * If set to true, use of undeclared XML namespaces will be ignored. |
| 24 | + * This matches the behavior of rsvg, but more compliant consumers |
| 25 | + * such as Firefox will reject such files. |
| 26 | + * Leave off for the default, stricter checks. |
| 27 | + */ |
| 28 | + function __construct( $file, $softNamespaces=false ) { |
| 29 | + $this->softNamespaces = $softNamespaces; |
| 30 | + $this->run( $file ); |
| 31 | + } |
| 32 | + |
| 33 | + private function run( $fname ) { |
| 34 | + if( $this->softNamespaces ) { |
| 35 | + $parser = xml_parser_create( 'UTF-8' ); |
| 36 | + } else { |
| 37 | + $parser = xml_parser_create_ns( 'UTF-8' ); |
| 38 | + } |
| 39 | + |
| 40 | + // case folding violates XML standard, turn it off |
| 41 | + xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false ); |
| 42 | + |
| 43 | + xml_set_element_handler( $parser, array( $this, 'elementOpen' ), false ); |
| 44 | + |
| 45 | + $file = fopen( $fname, "rb" ); |
| 46 | + do { |
| 47 | + $chunk = fread( $file, 32768 ); |
| 48 | + $ret = xml_parse( $parser, $chunk, feof( $file ) ); |
| 49 | + if( $ret == 0 ) { |
| 50 | + // XML isn't well-formed! |
| 51 | + fclose( $file ); |
| 52 | + xml_parser_free( $parser ); |
| 53 | + return; |
| 54 | + } |
| 55 | + } while( !feof( $file ) ); |
| 56 | + |
| 57 | + $this->wellFormed = true; |
| 58 | + |
| 59 | + fclose( $file ); |
| 60 | + xml_parser_free( $parser ); |
| 61 | + } |
| 62 | + |
| 63 | + private function elementOpen( $parser, $name, $attribs ) { |
| 64 | + if( $this->softNamespaces ) { |
| 65 | + // Check namespaces manually, so expat doesn't throw |
| 66 | + // errors on use of undeclared namespaces. |
| 67 | + foreach( $attribs as $attrib => $val ) { |
| 68 | + if( $attrib == 'xmlns' ) { |
| 69 | + $this->namespaces[''] = $val; |
| 70 | + } elseif( substr( $attrib, 0, strlen( 'xmlns:' ) ) == 'xmlns:' ) { |
| 71 | + $this->namespaces[substr( $attrib, strlen( 'xmlns:' ) )] = $val; |
| 72 | + } |
| 73 | + } |
| 74 | + |
| 75 | + if( strpos( $name, ':' ) === false ) { |
| 76 | + $ns = ''; |
| 77 | + $subname = $name; |
| 78 | + } else { |
| 79 | + list( $ns, $subname ) = explode( ':', $name, 2 ); |
| 80 | + } |
| 81 | + |
| 82 | + if( isset( $this->namespaces[$ns] ) ) { |
| 83 | + $name = $this->namespaces[$ns] . ':' . $subname; |
| 84 | + } else { |
| 85 | + // Technically this is invalid for XML with Namespaces. |
| 86 | + // But..... we'll just let it slide in soft mode. |
| 87 | + } |
| 88 | + } |
| 89 | + |
| 90 | + // We only need the first open element |
| 91 | + $this->rootElement = $name; |
| 92 | + xml_set_element_handler( $parser, false, false ); |
| 93 | + } |
| 94 | +} |
Property changes on: trunk/phase3/includes/XmlTypeCheck.php |
___________________________________________________________________ |
Name: svn:eol-style |
1 | 95 | + native |