Index: trunk/phase3/includes/upload/UploadBase.php |
— | — | @@ -302,7 +302,7 @@ |
303 | 303 | * @param $mime string representing the mime |
304 | 304 | * @return mixed true if the file is verified, an array otherwise |
305 | 305 | */ |
306 | | - protected function verifyMimeType( $magic, $mime ) { |
| 306 | + protected function verifyMimeType( $mime ) { |
307 | 307 | global $wgVerifyMimeType; |
308 | 308 | if ( $wgVerifyMimeType ) { |
309 | 309 | wfDebug ( "\n\nmime: <$mime> extension: <{$this->mFinalExtension}>\n\n"); |
— | — | @@ -319,6 +319,8 @@ |
320 | 320 | $fp = fopen( $this->mTempPath, 'rb' ); |
321 | 321 | $chunk = fread( $fp, 256 ); |
322 | 322 | fclose( $fp ); |
| 323 | + |
| 324 | + $magic = MimeMagic::singleton(); |
323 | 325 | $extMime = $magic->guessTypesForExtension( $this->mFinalExtension ); |
324 | 326 | $ieTypes = $magic->getIEMimeTypes( $this->mTempPath, $chunk, $extMime ); |
325 | 327 | foreach ( $ieTypes as $ieType ) { |
— | — | @@ -344,12 +346,9 @@ |
345 | 347 | $this->mFileProps = File::getPropsFromPath( $this->mTempPath, $this->mFinalExtension ); |
346 | 348 | $this->checkMacBinary(); |
347 | 349 | |
348 | | - # magically determine mime type |
349 | | - $magic = MimeMagic::singleton(); |
350 | | - $mime = $magic->guessMimeType( $this->mTempPath, false ); |
351 | | - |
352 | 350 | # check mime type, if desired |
353 | | - $status = $this->verifyMimeType( $magic, $mime ); |
| 351 | + $mime = $this->mFileProps[ 'file-mime' ]; |
| 352 | + $status = $this->verifyMimeType( $mime ); |
354 | 353 | if ( $status !== true ) { |
355 | 354 | return $status; |
356 | 355 | } |
Index: trunk/phase3/includes/MimeMagic.php |
— | — | @@ -409,18 +409,70 @@ |
410 | 410 | return in_array( strtolower( $extension ), $types ); |
411 | 411 | } |
412 | 412 | |
| 413 | + /** improves a mime type using the file extension. Some file formats are very generic, |
| 414 | + * so their mime type is not very meaningful. A more useful mime type can be derived |
| 415 | + * by looking at the file extension. Typically, this method would be called on the |
| 416 | + * result of guessMimeType(). |
| 417 | + * |
| 418 | + * Currently, this method does the following: |
| 419 | + * |
| 420 | + * If $mime is "unknown/unknown" and isRecognizableExtension( $ext ) returns false, |
| 421 | + * return the result of guessTypesForExtension($ext). |
| 422 | + * |
| 423 | + * If $mime is "application/x-opc+zip" and isMatchingExtension( $ext, $mime ) |
| 424 | + * gives true, return the result of guessTypesForExtension($ext). |
| 425 | + * |
| 426 | + * @param $mime String: the mime type, typically guessed from a file's content. |
| 427 | + * @param $ext String: the file extension, as taken from the file name |
| 428 | + * |
| 429 | + * @return string the mime type |
| 430 | + */ |
| 431 | + function improveTypeFromExtension( $mime, $ext ) { |
| 432 | + if ( $mime === "unknown/unknown" ) { |
| 433 | + if( $this->isRecognizableExtension( $ext ) ) { |
| 434 | + wfDebug( __METHOD__. ": refusing to guess mime type for .$ext file, we should have recognized it\n" ); |
| 435 | + } else { |
| 436 | + /* Not something we can detect, so simply |
| 437 | + * trust the file extension */ |
| 438 | + $mime = $this->guessTypesForExtension( $ext ); |
| 439 | + } |
| 440 | + } |
| 441 | + else if ( $mime === "application/x-opc+zip" ) { |
| 442 | + if ( $this->isMatchingExtension( $ext, $mime ) ) { |
| 443 | + /* A known file extension for an OPC file, |
| 444 | + * find the proper mime type for that file extension */ |
| 445 | + $mime = $this->guessTypesForExtension( $ext ); |
| 446 | + } else { |
| 447 | + wfDebug( __METHOD__. ": refusing to guess better type for $mime file, .$ext is not a known OPC extension.\n" ); |
| 448 | + $mime = "application/zip"; |
| 449 | + } |
| 450 | + } |
413 | 451 | |
| 452 | + if ( isset( $this->mMimeTypeAliases[$mime] ) ) { |
| 453 | + $mime = $this->mMimeTypeAliases[$mime]; |
| 454 | + } |
| 455 | + |
| 456 | + wfDebug(__METHOD__.": improved mime type for .$ext: $mime\n"); |
| 457 | + return $mime; |
| 458 | + } |
| 459 | + |
414 | 460 | /** mime type detection. This uses detectMimeType to detect the mime type of the file, |
415 | 461 | * but applies additional checks to determine some well known file formats that may be missed |
416 | | - * or misinterpreter by the default mime detection (namely xml based formats like XHTML or SVG). |
| 462 | + * or misinterpreter by the default mime detection (namely XML based formats like XHTML or SVG, |
| 463 | + * as well as ZIP based formats like OPC/ODF files). |
417 | 464 | * |
418 | 465 | * @param $file String: the file to check |
419 | | - * @param $ext Mixed: the file extension, or true to extract it from the filename. |
420 | | - * Set it to false to ignore the extension. |
| 466 | + * @param $ext Mixed: the file extension, or true (default) to extract it from the filename. |
| 467 | + * Set it to false to ignore the extension. DEPRECATED! Set to false, use |
| 468 | + * improveTypeFromExtension($mime, $ext) later to improve mime type. |
421 | 469 | * |
422 | 470 | * @return string the mime type of $file |
423 | 471 | */ |
424 | 472 | function guessMimeType( $file, $ext = true ) { |
| 473 | + if( $ext ) { # TODO: make $ext default to false. Or better, remove it. |
| 474 | + wfDebug( __METHOD__.": WARNING: use of the \$ext parameter is deprecated. Use improveTypeFromExtension(\$mime, \$ext) instead.\n" ); |
| 475 | + } |
| 476 | + |
425 | 477 | $mime = $this->doGuessMimeType( $file, $ext ); |
426 | 478 | |
427 | 479 | if( !$mime ) { |
— | — | @@ -432,11 +484,11 @@ |
433 | 485 | $mime = $this->mMimeTypeAliases[$mime]; |
434 | 486 | } |
435 | 487 | |
436 | | - wfDebug(__METHOD__.": final mime type of $file: $mime\n"); |
| 488 | + wfDebug(__METHOD__.": guessed mime type of $file: $mime\n"); |
437 | 489 | return $mime; |
438 | 490 | } |
439 | 491 | |
440 | | - function doGuessMimeType( $file, $ext = true ) { |
| 492 | + private function doGuessMimeType( $file, $ext ) { # TODO: remove $ext param |
441 | 493 | // Read a chunk of the file |
442 | 494 | wfSuppressWarnings(); |
443 | 495 | $f = fopen( $file, "rt" ); |
— | — | @@ -447,6 +499,8 @@ |
448 | 500 | $tail = fread( $f, 65558 ); // 65558 = maximum size of a zip EOCDR |
449 | 501 | fclose( $f ); |
450 | 502 | |
| 503 | + wfDebug( __METHOD__ . ": analyzing head and tail of $file for magic numbers.\n" ); |
| 504 | + |
451 | 505 | // Hardcode a few magic number checks... |
452 | 506 | $headers = array( |
453 | 507 | // Multimedia... |
— | — | @@ -602,11 +656,16 @@ |
603 | 657 | * @param $header String: some reasonably-sized chunk of file header |
604 | 658 | * @param $tail String: the tail of the file |
605 | 659 | * @param $ext Mixed: the file extension, or true to extract it from the filename. |
606 | | - * Set it to false to ignore the extension. |
| 660 | + * Set it to false (default) to ignore the extension. DEPRECATED! Set to false, |
| 661 | + * use improveTypeFromExtension($mime, $ext) later to improve mime type. |
607 | 662 | * |
608 | 663 | * @return string |
609 | 664 | */ |
610 | 665 | function detectZipType( $header, $tail = null, $ext = false ) { |
| 666 | + if( $ext ) { # TODO: remove $ext param |
| 667 | + wfDebug( __METHOD__.": WARNING: use of the \$ext parameter is deprecated. Use improveTypeFromExtension(\$mime, \$ext) instead.\n" ); |
| 668 | + } |
| 669 | + |
611 | 670 | $mime = 'application/zip'; |
612 | 671 | $opendocTypes = array( |
613 | 672 | 'chart-template', |
— | — | @@ -637,7 +696,8 @@ |
638 | 697 | wfDebug( __METHOD__.": detected $mime from ZIP archive\n" ); |
639 | 698 | } elseif( preg_match( $openxmlRegex, substr( $header, 30 ) ) ) { |
640 | 699 | $mime = "application/x-opc+zip"; |
641 | | - if( $ext !== true && $ext !== false ) { |
| 700 | + # TODO: remove the block below, as soon as improveTypeFromExtension is used everywhere |
| 701 | + if( $ext !== true && $ext !== false ) { |
642 | 702 | /** This is the mode used by getPropsFromPath |
643 | 703 | * These mime's are stored in the database, where we don't really want |
644 | 704 | * x-opc+zip, because we use it only for internal purposes |
— | — | @@ -695,15 +755,20 @@ |
696 | 756 | * If no mime type can be determined, this function returns "unknown/unknown". |
697 | 757 | * |
698 | 758 | * @param $file String: the file to check |
699 | | - * @param $ext Mixed: the file extension, or true to extract it from the filename. |
700 | | - * Set it to false to ignore the extension. |
| 759 | + * @param $ext Mixed: the file extension, or true (default) to extract it from the filename. |
| 760 | + * Set it to false to ignore the extension. DEPRECATED! Set to false, use |
| 761 | + * improveTypeFromExtension($mime, $ext) later to improve mime type. |
701 | 762 | * |
702 | 763 | * @return string the mime type of $file |
703 | 764 | * @access private |
704 | 765 | */ |
705 | | - function detectMimeType( $file, $ext = true ) { |
| 766 | + private function detectMimeType( $file, $ext = true ) { |
706 | 767 | global $wgMimeDetectorCommand; |
707 | 768 | |
| 769 | + if( $ext ) { # TODO: make $ext default to false. Or better, remove it. |
| 770 | + wfDebug( __METHOD__.": WARNING: use of the \$ext parameter is deprecated. Use improveTypeFromExtension(\$mime, \$ext) instead.\n" ); |
| 771 | + } |
| 772 | + |
708 | 773 | $m = null; |
709 | 774 | if ( $wgMimeDetectorCommand ) { |
710 | 775 | $fn = wfEscapeShellArg( $file ); |
Index: trunk/phase3/includes/filerepo/File.php |
— | — | @@ -1186,7 +1186,14 @@ |
1187 | 1187 | if ( $info['fileExists'] ) { |
1188 | 1188 | $magic = MimeMagic::singleton(); |
1189 | 1189 | |
1190 | | - $info['mime'] = $magic->guessMimeType( $path, $ext ); |
| 1190 | + if ( $ext === true ) { |
| 1191 | + $i = strrpos( $path, '.' ); |
| 1192 | + $ext = strtolower( $i ? substr( $path, $i + 1 ) : '' ); |
| 1193 | + } |
| 1194 | + |
| 1195 | + $info['file-mime'] = $magic->guessMimeType( $path, false ); # mime type according to file contents |
| 1196 | + $info['mime'] = $magic->improveTypeFromExtension( $info['file-mime'], $ext ); # logical mime type |
| 1197 | + |
1191 | 1198 | list( $info['major_mime'], $info['minor_mime'] ) = self::splitMime( $info['mime'] ); |
1192 | 1199 | $info['media_type'] = $magic->getMediaType( $path, $info['mime'] ); |
1193 | 1200 | |