Index: trunk/phase3/includes/media/BitmapMetadataHandler.php |
— | — | @@ -40,16 +40,16 @@ |
41 | 41 | |
42 | 42 | |
43 | 43 | /** |
44 | | - * |
45 | | - * get exif info using exif class. |
| 44 | + * Get exif info using exif class. |
46 | 45 | * Basically what used to be in BitmapHandler::getMetadata(). |
47 | 46 | * Just calls stuff in the Exif class. |
48 | 47 | * |
49 | 48 | * @param $filename string |
50 | 49 | */ |
51 | | - function getExif ( $filename ) { |
52 | | - if ( file_exists( $filename ) ) { |
53 | | - $exif = new Exif( $filename ); |
| 50 | + function getExif ( $filename, $byteOrder ) { |
| 51 | + global $wgShowEXIF; |
| 52 | + if ( file_exists( $filename ) && $wgShowEXIF ) { |
| 53 | + $exif = new Exif( $filename, $byteOrder ); |
54 | 54 | $data = $exif->getFilteredData(); |
55 | 55 | if ( $data ) { |
56 | 56 | $this->addMetadata( $data, 'exif' ); |
— | — | @@ -117,7 +117,6 @@ |
118 | 118 | static function Jpeg ( $filename ) { |
119 | 119 | $showXMP = function_exists( 'xml_parser_create_ns' ); |
120 | 120 | $meta = new self(); |
121 | | - $meta->getExif( $filename ); |
122 | 121 | |
123 | 122 | $seg = JpegMetadataExtractor::segmentSplitter( $filename ); |
124 | 123 | if ( isset( $seg['COM'] ) && isset( $seg['COM'][0] ) ) { |
— | — | @@ -141,6 +140,9 @@ |
142 | 141 | $meta->addMetadata( $array, $type ); |
143 | 142 | } |
144 | 143 | } |
| 144 | + if ( isset( $seg['byteOrder'] ) ) { |
| 145 | + $meta->getExif( $filename, $seg['byteOrder'] ); |
| 146 | + } |
145 | 147 | return $meta->getMetadataArray(); |
146 | 148 | } |
147 | 149 | |
— | — | @@ -208,4 +210,60 @@ |
209 | 211 | return $baseArray; |
210 | 212 | } |
211 | 213 | |
| 214 | + /** |
| 215 | + * This doesn't do much yet, but eventually I plan to add |
| 216 | + * XMP support for Tiff. (PHP's exif support already extracts |
| 217 | + * but needs some further processing because PHP's exif support |
| 218 | + * is stupid...) |
| 219 | + * |
| 220 | + * @todo Add XMP support, so this function actually makes |
| 221 | + * sense to put here. |
| 222 | + * |
| 223 | + * The various exceptions this throws are caught later. |
| 224 | + * @param $filename String |
| 225 | + * @return Array The metadata. |
| 226 | + */ |
| 227 | + static public function Tiff ( $filename ) { |
| 228 | + if ( file_exists( $filename ) ) { |
| 229 | + $byteOrder = self::getTiffByteOrder( $filename ); |
| 230 | + if ( !$byteOrder ) { |
| 231 | + throw new MWException( "Error determining byte order of $filename" ); |
| 232 | + } |
| 233 | + $exif = new Exif( $filename, $byteOrder ); |
| 234 | + $data = $exif->getFilteredData(); |
| 235 | + if ( $data ) { |
| 236 | + $data['MEDIAWIKI_EXIF_VERSION'] = Exif::version(); |
| 237 | + return $data; |
| 238 | + } else { |
| 239 | + throw new MWException( "Could not extract data from tiff file $filename" ); |
| 240 | + } |
| 241 | + } else { |
| 242 | + throw new MWException( "File doesn't exist - $filename" ); |
| 243 | + } |
| 244 | + } |
| 245 | + /** |
| 246 | + * Read the first 2 bytes of a tiff file to figure out |
| 247 | + * Little Endian or Big Endian. Needed for exif stuff. |
| 248 | + * |
| 249 | + * @param $filename String The filename |
| 250 | + * @return String 'BE' or 'LE' or false |
| 251 | + */ |
| 252 | + static function getTiffByteOrder( $filename ) { |
| 253 | + $fh = fopen( $filename, 'rb' ); |
| 254 | + if ( !$fh ) return false; |
| 255 | + $head = fread( $fh, 2 ); |
| 256 | + fclose( $fh ); |
| 257 | + |
| 258 | + switch( $head ) { |
| 259 | + case 'II': |
| 260 | + return 'LE'; // II for intel. |
| 261 | + case 'MM': |
| 262 | + return 'BE'; // MM for motorla. |
| 263 | + default: |
| 264 | + return false; // Something went wrong. |
| 265 | + |
| 266 | + } |
| 267 | + } |
| 268 | + |
| 269 | + |
212 | 270 | } |
Index: trunk/phase3/includes/media/JpegMetadataExtractor.php |
— | — | @@ -28,7 +28,10 @@ |
29 | 29 | |
30 | 30 | $segmentCount = 0; |
31 | 31 | |
32 | | - $segments = array( 'XMP_ext' => array(), 'COM' => array() ); |
| 32 | + $segments = array( |
| 33 | + 'XMP_ext' => array(), |
| 34 | + 'COM' => array(), |
| 35 | + ); |
33 | 36 | |
34 | 37 | if ( !$filename ) { |
35 | 38 | throw new MWException( "No filename specified for " . __METHOD__ ); |
— | — | @@ -82,23 +85,34 @@ |
83 | 86 | wfDebug( __METHOD__ . ' Ignoring JPEG comment as is garbage.' ); |
84 | 87 | } |
85 | 88 | |
86 | | - } elseif ( $buffer === "\xE1" && $showXMP ) { |
| 89 | + } elseif ( $buffer === "\xE1" ) { |
87 | 90 | // APP1 section (Exif, XMP, and XMP extended) |
88 | 91 | // only extract if XMP is enabled. |
89 | 92 | $temp = self::jpegExtractMarker( $fh ); |
90 | | - |
91 | 93 | // check what type of app segment this is. |
92 | | - if ( substr( $temp, 0, 29 ) === "http://ns.adobe.com/xap/1.0/\x00" ) { |
| 94 | + if ( substr( $temp, 0, 29 ) === "http://ns.adobe.com/xap/1.0/\x00" && $showXMP ) { |
93 | 95 | $segments["XMP"] = substr( $temp, 29 ); |
94 | | - } elseif ( substr( $temp, 0, 35 ) === "http://ns.adobe.com/xmp/extension/\x00" ) { |
| 96 | + } elseif ( substr( $temp, 0, 35 ) === "http://ns.adobe.com/xmp/extension/\x00" && $showXMP ) { |
95 | 97 | $segments["XMP_ext"][] = substr( $temp, 35 ); |
96 | | - } elseif ( substr( $temp, 0, 29 ) === "XMP\x00://ns.adobe.com/xap/1.0/\x00" ) { |
| 98 | + } elseif ( substr( $temp, 0, 29 ) === "XMP\x00://ns.adobe.com/xap/1.0/\x00" && $showXMP ) { |
97 | 99 | // Some images (especially flickr images) seem to have this. |
98 | 100 | // I really have no idea what the deal is with them, but |
99 | 101 | // whatever... |
100 | 102 | $segments["XMP"] = substr( $temp, 29 ); |
101 | 103 | wfDebug( __METHOD__ . ' Found XMP section with wrong app identifier ' |
102 | 104 | . "Using anyways.\n" ); |
| 105 | + } elseif ( substr( $temp, 0, 6 ) === "Exif\0\0" ) { |
| 106 | + // Just need to find out what the byte order is. |
| 107 | + // because php's exif plugin sucks... |
| 108 | + // This is a II for little Endian, MM for big. Not a unicode BOM. |
| 109 | + $byteOrderMarker = substr( $temp, 6, 2 ); |
| 110 | + if ( $byteOrderMarker === 'MM' ) { |
| 111 | + $segments['byteOrder'] = 'BE'; |
| 112 | + } elseif ( $byteOrderMarker === 'II' ) { |
| 113 | + $segments['byteOrder'] = 'LE'; |
| 114 | + } else { |
| 115 | + wfDebug( __METHOD__ . ' Invalid byte ordering?!' ); |
| 116 | + } |
103 | 117 | } |
104 | 118 | } elseif ( $buffer === "\xED" ) { |
105 | 119 | // APP13 - PSIR. IPTC and some photoshop stuff |
Index: trunk/phase3/includes/media/Tiff.php |
— | — | @@ -56,13 +56,20 @@ |
57 | 57 | */ |
58 | 58 | function getMetadata( $image, $filename ) { |
59 | 59 | global $wgShowEXIF; |
60 | | - if ( $wgShowEXIF && file_exists( $filename ) ) { |
61 | | - $exif = new Exif( $filename ); |
62 | | - $data = $exif->getFilteredData(); |
63 | | - if ( $data ) { |
64 | | - $data['MEDIAWIKI_EXIF_VERSION'] = Exif::version(); |
65 | | - return serialize( $data ); |
66 | | - } else { |
| 60 | + if ( $wgShowEXIF ) { |
| 61 | + try { |
| 62 | + $meta = BitmapMetadataHandler::Tiff( $filename ); |
| 63 | + if ( !is_array( $meta ) ) { |
| 64 | + // This should never happen, but doesn't hurt to be paranoid. |
| 65 | + throw new MWException('Metadata array is not an array'); |
| 66 | + } |
| 67 | + $meta['MEDIAWIKI_EXIF_VERSION'] = Exif::version(); |
| 68 | + return serialize( $meta ); |
| 69 | + } |
| 70 | + catch ( MWException $e ) { |
| 71 | + // BitmapMetadataHandler throws an exception in certain exceptional |
| 72 | + // cases like if file does not exist. |
| 73 | + wfDebug( __METHOD__ . ': ' . $e->getMessage() . "\n" ); |
67 | 74 | return ExifBitmapHandler::BROKEN_FILE; |
68 | 75 | } |
69 | 76 | } else { |
Index: trunk/phase3/includes/media/Exif.php |
— | — | @@ -90,6 +90,11 @@ |
91 | 91 | */ |
92 | 92 | var $log = false; |
93 | 93 | |
| 94 | + /** |
| 95 | + * The byte order of the file. Needed because php's |
| 96 | + * extension doesn't fully process some obscure props. |
| 97 | + */ |
| 98 | + private $byteOrder; |
94 | 99 | //@} |
95 | 100 | |
96 | 101 | /** |
— | — | @@ -102,7 +107,7 @@ |
103 | 108 | * DigitalZoomRatio = 0/0 is rejected. need to determine if that's valid. |
104 | 109 | * possibly should treat 0/0 = 0. need to read exif spec on that. |
105 | 110 | */ |
106 | | - function __construct( $file ) { |
| 111 | + function __construct( $file, $byteOrder = '' ) { |
107 | 112 | /** |
108 | 113 | * Page numbers here refer to pages in the EXIF 2.2 standard |
109 | 114 | * |
— | — | @@ -275,6 +280,16 @@ |
276 | 281 | |
277 | 282 | $this->file = $file; |
278 | 283 | $this->basename = wfBaseName( $this->file ); |
| 284 | + if ( $byteOrder === 'BE' || $byteOrder === 'LE' ) { |
| 285 | + $this->byteOrder = $byteOrder; |
| 286 | + } else { |
| 287 | + // Only give a warning for b/c, since originally we didn't |
| 288 | + // require this. The number of things affected by this is |
| 289 | + // rather small. |
| 290 | + wfWarn( 'Exif class did not have byte order specified. ' |
| 291 | + . 'Some properties may be decoded incorrectly.' ); |
| 292 | + $this->byteOrder = 'BE'; // BE seems about twice as popular as LE in jpg's. |
| 293 | + } |
279 | 294 | |
280 | 295 | $this->debugFile( $this->basename, __FUNCTION__, true ); |
281 | 296 | if( function_exists( 'exif_read_data' ) ) { |
— | — | @@ -394,7 +409,16 @@ |
395 | 410 | } |
396 | 411 | $newVal .= ord( substr($val, $i, 1) ); |
397 | 412 | } |
398 | | - $this->mFilteredExifData['GPSVersionID'] = $newVal; |
| 413 | + if ( $this->byteOrder === 'LE' ) { |
| 414 | + // Need to reverse the string |
| 415 | + $newVal2 = ''; |
| 416 | + for ( $i = strlen( $newVal ) - 1; $i >= 0; $i-- ) { |
| 417 | + $newVal2 .= substr( $newVal, $i, 1 ); |
| 418 | + } |
| 419 | + $this->mFilteredExifData['GPSVersionID'] = $newVal2; |
| 420 | + } else { |
| 421 | + $this->mFilteredExifData['GPSVersionID'] = $newVal; |
| 422 | + } |
399 | 423 | unset( $this->mFilteredExifData['GPSVersion'] ); |
400 | 424 | } |
401 | 425 | |
— | — | @@ -415,7 +439,6 @@ |
416 | 440 | unset($this->mFilteredExifData[$prop]); |
417 | 441 | return; |
418 | 442 | } |
419 | | - |
420 | 443 | $charCode = substr( $this->mFilteredExifData[$prop], 0, 8); |
421 | 444 | $val = substr( $this->mFilteredExifData[$prop], 8); |
422 | 445 | |
— | — | @@ -426,7 +449,7 @@ |
427 | 450 | $charset = "Shift-JIS"; |
428 | 451 | break; |
429 | 452 | case "UNICODE\x00": |
430 | | - $charset = "UTF-16"; |
| 453 | + $charset = "UTF-16" . $this->byteOrder; |
431 | 454 | break; |
432 | 455 | default: //ascii or undefined. |
433 | 456 | $charset = ""; |
Index: trunk/phase3/includes/media/GIFMetadataExtractor.php |
— | — | @@ -126,14 +126,14 @@ |
127 | 127 | |
128 | 128 | // The standard says this should be ASCII, however its unclear if |
129 | 129 | // thats true in practise. Check to see if its valid utf-8, if so |
130 | | - // assume its that, otherwise assume its iso-8859-1 |
| 130 | + // assume its that, otherwise assume its windows-1252 (iso-8859-1) |
131 | 131 | $dataCopy = $data; |
132 | 132 | // quickIsNFCVerify has the side effect of replacing any invalid characters |
133 | 133 | UtfNormal::quickIsNFCVerify( $dataCopy ); |
134 | 134 | |
135 | 135 | if ( $dataCopy !== $data ) { |
136 | 136 | wfSuppressWarnings(); |
137 | | - $data = iconv( 'ISO-8859-1', 'UTF-8', $data ); |
| 137 | + $data = iconv( 'windows-1252', 'UTF-8', $data ); |
138 | 138 | wfRestoreWarnings(); |
139 | 139 | } |
140 | 140 | |