r69646 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r69645‎ | r69646 | r69647 >
Date:22:55, 20 July 2010
Author:bawolff
Status:ok
Tags:
Comment:
Misc. minor fixes.

* Improve formatting of GPSDOP, ycbcrpositioning, GPSTimeStamp, GPSAltitude
* some changes to how pre-1902 dates are handled.
* fix broken GPSVersion extraction
* minor fixes to how JPEGComments are extracted
* fix broken extraction of UserComment tag
* minor fixes to how iptc charset is detected (follow-up r68176)
* Make latitude, longitude, artist, copyright, imageDescription,
and altitude be displayed by default.
Modified paths:
  • /branches/img_metadata/phase3/includes/Exif.php (modified) (history)
  • /branches/img_metadata/phase3/includes/media/BitmapMetadataHandler.php (modified) (history)
  • /branches/img_metadata/phase3/includes/media/IPTC.php (modified) (history)
  • /branches/img_metadata/phase3/languages/messages/MessagesEn.php (modified) (history)
  • /branches/img_metadata/phase3/maintenance/language/messageTypes.inc (modified) (history)
  • /branches/img_metadata/phase3/maintenance/language/messages.inc (modified) (history)

Diff [purge]

Index: branches/img_metadata/phase3/maintenance/language/messages.inc
@@ -2923,6 +2923,13 @@
29242924 'exif-gpsdestdistance-m',
29252925 'exif-gpsdestdistance-n',
29262926 ),
 2927+ 'exif-gdop' => array(
 2928+ 'exif-gpsdop-excellent',
 2929+ 'exif-gpsdop-good',
 2930+ 'exif-gpsdop-moderate',
 2931+ 'exif-gpsdop-fair',
 2932+ 'exif-gpsdop-poor',
 2933+ ),
29272934 'exif-objectcycle' => array(
29282935 'exif-objectcycle-a',
29292936 'exif-objectcycle-p',
@@ -2932,6 +2939,10 @@
29332940 'exif-gpsdirection-t',
29342941 'exif-gpsdirection-m',
29352942 ),
 2943+ 'exif-ycbcrpositioning' => array(
 2944+ 'exif-ycbcrpositioning-1',
 2945+ 'exif-ycbcrpositioning-2',
 2946+ ),
29362947 'edit-externally' => array(
29372948 'edit-externally',
29382949 'edit-externally-help',
Index: branches/img_metadata/phase3/maintenance/language/messageTypes.inc
@@ -599,6 +599,13 @@
600600 'exif-gpsdestdistance-n',
601601 'exif-gpsdirection-t',
602602 'exif-gpsdirection-m',
 603+ 'exif-gpsdop-excellent',
 604+ 'exif-gpsdop-good',
 605+ 'exif-gpsdop-moderate',
 606+ 'exif-gpsdop-fair',
 607+ 'exif-gpsdop-poor',
 608+ 'exif-ycbcrpositioning-1',
 609+ 'exif-ycbcrpositioning-2',
603610 //non-exif metadata that is still image metadata
604611 'exif-jpegfilecomment',
605612 'exif-keywords',
Index: branches/img_metadata/phase3/includes/Exif.php
@@ -24,7 +24,7 @@
2525 */
2626
2727 /**
28 - * @todo document (e.g. one-sentence class-overview description)
 28+ * Class to extract and validate Exif data from jpeg (and possibly tiff) files.
2929 * @ingroup Media
3030 */
3131 class Exif {
@@ -376,7 +376,7 @@
377377
378378 //GPSVersion(ID) is treated as the wrong type by php exif support.
379379 //Go through each byte turning it into a version string.
380 - //For example: "\x00\x00\x02\x02" -> "2.2.0.0"
 380+ //For example: "\x02\x02\x00\x00" -> "2.2.0.0"
381381
382382 //Also change exif tag name from GPSVersion (what php exif thinks it is)
383383 //to GPSVersionID (what the exif standard thinks it is).
@@ -388,7 +388,7 @@
389389 if ( $i !== 0 ) {
390390 $newVal .= '.';
391391 }
392 - $newVal .= ord( substr($val, strlen($val) - $i - 1, 1) );
 392+ $newVal .= ord( substr($val, $i, 1) );
393393 }
394394 $this->mFilteredExifData['GPSVersionID'] = $newVal;
395395 unset( $this->mFilteredExifData['GPSVersion'] );
@@ -396,12 +396,10 @@
397397
398398 }
399399 /**
400 - * do userComment and similar. pg. 34 of exif standard.
 400+ * Do userComment tags and similar. See pg. 34 of exif standard.
401401 * basically first 8 bytes is charset, rest is value.
402 - * Needs more testing, as its unclear from exif standard if they mean
403 - * utf-8, utf-16, or something else by 'unicode'.
 402+ * This has not been tested on any shift-JIS strings.
404403 * @param $prop String prop name.
405 - * @todo this is in need of testing.
406404 */
407405 private function charCodeString ( $prop ) {
408406 if ( isset( $this->mFilteredExifData[$prop] ) ) {
@@ -421,11 +419,27 @@
422420 switch ($charCode) {
423421 case "\x4A\x49\x53\x00\x00\x00\x00\x00":
424422 //JIS
425 - $val = iconv("Shift-JIS", "UTF-8//IGNORE", $val);
 423+ $charset = "Shift-JIS";
426424 break;
427 - default: //unicode or undefined. leave as is.
 425+ case "UNICODE\x00":
 426+ $charset = "UTF-16";
428427 break;
 428+ default: //ascii or undefined.
 429+ $charset = "";
 430+ break;
429431 }
 432+ // This could possibly check to see if iconv is really installed
 433+ // or if we're using the compatability wraper in globalFunctions.php
 434+ if ($charset) {
 435+ $val = iconv($charset, 'UTF-8//IGNORE', $val);
 436+ } else {
 437+ // if valid utf-8, assume that, otherwise assume windows-1252
 438+ $valCopy = $val;
 439+ UtfNormal::quickIsNFCVerify( $valCopy ); //validates $valCopy.
 440+ if ( $valCopy !== $val ) {
 441+ $val = iconv('Windows-1252', 'UTF-8//IGNORE', $val);
 442+ }
 443+ }
430444
431445 //trim and check to make sure not only whitespace.
432446 $val = trim($val);
@@ -437,7 +451,7 @@
438452 }
439453
440454 //all's good.
441 - $this->mFilteredData[$prop] = $val;
 455+ $this->mFilteredExifData[$prop] = $val;
442456 }
443457 }
444458 /**
@@ -765,7 +779,10 @@
766780 }
767781
768782 /**
769 - * @todo document (e.g. one-sentence class-overview description)
 783+ * Format Image metadata values into a human readable form.
 784+ * Note despite the name, this formats more than just exif
 785+ * values.
 786+ * @todo Perhaps rename to FormatMetadata
770787 * @ingroup Media
771788 */
772789 class FormatExif {
@@ -818,6 +835,34 @@
819836 } else {
820837 $type = 'ul'; // default unorcdered list.
821838 }
 839+
 840+ //This is done differently as the tag is an array.
 841+ if ($tag == 'GPSTimeStamp' && count($vals) === 3) {
 842+ //hour min sec array
 843+
 844+ $h = explode('/', $vals[0]);
 845+ $m = explode('/', $vals[1]);
 846+ $s = explode('/', $vals[2]);
 847+
 848+ // this should already be validated
 849+ // when loaded from file, but it could
 850+ // come from a foreign repo, so be
 851+ // paranoid.
 852+ if ( !isset($h[1])
 853+ || !isset($m[1])
 854+ || !isset($s[1])
 855+ || $h[1] == 0
 856+ || $m[1] == 0
 857+ || $s[1] == 0
 858+ ) {
 859+ continue;
 860+ }
 861+ $tags[$tag] = intval( $h[0] / $h[1] )
 862+ . ':' . intval( $m[0] / $m[1] )
 863+ . ':' . str_pad( intval( $s[0] / $s[1] ), 2, '0', STR_PAD_LEFT );
 864+ continue;
 865+ }
 866+
822867
823868 foreach ( $vals as &$val ) {
824869
@@ -867,7 +912,17 @@
868913 break;
869914
870915 // TODO: YCbCrSubSampling
871 - // TODO: YCbCrPositioning
 916+ case 'YCbCrPositioning':
 917+ switch ( $val ) {
 918+ case 1:
 919+ case 2:
 920+ $val = $this->msg( $tag, $val );
 921+ break;
 922+ default:
 923+ $val = $val;
 924+ break;
 925+ }
 926+ break;
872927
873928 case 'XResolution':
874929 case 'YResolution':
@@ -914,12 +969,21 @@
915970 case 'DateTime':
916971 case 'DateTimeOriginal':
917972 case 'DateTimeDigitized':
918 - if ( $val == '0000:00:00 00:00:00' ) {
 973+ case 'GPSDateStamp':
 974+ if ( $val == '0000:00:00 00:00:00' || $val == ' : : : : ' ) {
919975 $val = wfMsg( 'exif-unknowndate' );
920976 } elseif ( preg_match( '/^(?:\d{4}):(?:\d\d):(?:\d\d) (?:\d\d):(?:\d\d):(?:\d\d)$/', $val ) ) {
921977 $val = $wgLang->timeanddate( wfTimestamp( TS_MW, $val ) );
922978 } elseif ( preg_match( '/^(?:\d{4}):(?:\d\d):(?:\d\d)$/', $val ) ) {
923 - $val = $wgLang->date( wfTimestamp( TS_MW, $val . ' 00:00:00' ) );
 979+ // avoid using wfTimestamp here for the pre-1902 photos
 980+ // due to reverse y2k38 bug. $wgLang->timeanddate() is also
 981+ // broken on dates from before 1902 so don't worry about it
 982+ // in the above case (not to mention that most photos from the
 983+ // 1800's don't have a time recorded anyways).
 984+ $val = $wgLang->date( substr( $val, 0, 4 )
 985+ . substr( $val, 5, 2 )
 986+ . substr( $val, 8, 2 )
 987+ . '000000' );
924988 }
925989 // else it will just output $val without formatting it.
926990 break;
@@ -1197,10 +1261,6 @@
11981262 }
11991263 break;
12001264
1201 - case 'GPSDateStamp':
1202 - $val = $wgLang->date( substr( $val, 0, 4 ) . substr( $val, 5, 2 ) . substr( $val, 8, 2 ) . '000000' );
1203 - break;
1204 -
12051265 case 'GPSLatitude':
12061266 case 'GPSDestLatitude':
12071267 $val = $this->formatCoords( $val, 'latitude' );
@@ -1213,10 +1273,10 @@
12141274 case 'GPSSpeedRef':
12151275 switch( $val ) {
12161276 case 'K': case 'M': case 'N':
1217 - $tags[$tag] = $this->msg( 'GPSSpeed', $val );
 1277+ $val = $this->msg( 'GPSSpeed', $val );
12181278 break;
12191279 default:
1220 - $tags[$tag] = $val;
 1280+ $val = $val;
12211281 break;
12221282 }
12231283 break;
@@ -1224,15 +1284,31 @@
12251285 case 'GPSDestDistanceRef':
12261286 switch( $val ) {
12271287 case 'K': case 'M': case 'N':
1228 - $tags[$tag] = $this->msg( 'GPSDestDistance', $val );
 1288+ $val = $this->msg( 'GPSDestDistance', $val );
12291289 break;
12301290 default:
1231 - $tags[$tag] = $val;
 1291+ $val = $val;
12321292 break;
12331293 }
12341294 break;
12351295
 1296+ case 'GPSDOP':
 1297+ // See http://en.wikipedia.org/wiki/Dilution_of_precision_(GPS)
 1298+ if ( $val <= 2 ) {
 1299+ $val = $this->msg( $tag, 'excellent', $this->formatNum( $val ) );
 1300+ } elseif ( $val <= 5 ) {
 1301+ $val = $this->msg( $tag, 'good', $this->formatNum( $val ) );
 1302+ } elseif ( $val <= 10 ) {
 1303+ $val = $this->msg( $tag, 'moderate', $this->formatNum( $val ) );
 1304+ } elseif ( $val <= 20 ) {
 1305+ $val = $this->msg( $tag, 'fair', $this->formatNum( $val ) );
 1306+ } else {
 1307+ $val = $this->msg( $tag, 'poor', $this->formatNum( $val ) );
 1308+ }
 1309+ break;
12361310
 1311+
 1312+
12371313 // This is not in the Exif standard, just a special
12381314 // case for our purposes which enables wikis to wikify
12391315 // the make, model and software name to link to their articles.
@@ -1450,8 +1526,8 @@
14511527 /**
14521528 * Calculate the greatest common divisor of two integers.
14531529 *
1454 - * @param $a Integer: FIXME
1455 - * @param $b Integer: FIXME
 1530+ * @param $a Integer: Numerator
 1531+ * @param $b Integer: Denominator
14561532 * @return int
14571533 * @private
14581534 */
Index: branches/img_metadata/phase3/includes/media/BitmapMetadataHandler.php
@@ -8,6 +8,10 @@
99 @todo other image formats.
1010 */
1111 class BitmapMetadataHandler {
 12+ const MAX_JPEG_SEGMENTS = 200;
 13+ // the max segment is a sanity check.
 14+ // A jpeg file should never even remotely have
 15+ // that many segments. Your average file has about 10.
1216 private $filetype;
1317 private $filename;
1418 private $metadata = Array();
@@ -36,9 +40,10 @@
3741 */
3842 function jpegSegmentSplitter () {
3943 $filename = $this->filename;
 44+ $segmentCount = 0;
4045 if ( $this->filetype !== 'image/jpeg' ) throw new MWException( "jpegSegmentSplitter called on non-jpeg" );
4146
42 - $segments = Array( 'XMP_ext' => Array() );
 47+ $segments = Array( 'XMP_ext' => array(), 'COM' => array() );
4348
4449 if ( !$filename ) throw new MWException( "No filename specified for BitmapMetadataHandler" );
4550 if ( !file_exists( $filename ) || is_dir( $filename ) ) throw new MWException( "Invalid file $filename passed to BitmapMetadataHandler" );
@@ -51,6 +56,11 @@
5257 if ( $buffer !== "\xFF\xD8" ) throw new MWException( "Not a jpeg, no SOI" );
5358 while ( !feof( $fh ) ) {
5459 $buffer = fread( $fh, 1 );
 60+ $segmentCount++;
 61+ if ( $segmentCount > self::MAX_JPEG_SEGMENTS ) {
 62+ // this is just a sanity check
 63+ throw new MWException('Too many jpeg segments. Aborting');
 64+ }
5565 if ( $buffer !== "\xFF" ) {
5666 throw new MWException( "Error reading jpeg file marker" );
5767 }
@@ -58,7 +68,17 @@
5969 $buffer = fread( $fh, 1 );
6070 if ( $buffer === "\xFE" ) {
6171 // COM section -- file comment
62 - $segments["COM"] = self::jpegExtractMarker( $fh );
 72+ // First see if valid utf-8,
 73+ // if not try to convert it to windows-1252.
 74+ $com = $oldCom = trim( self::jpegExtractMarker( $fh ) );
 75+ UtfNormal::quickIsNFCVerify( $com );
 76+ //turns $com to valid utf-8.
 77+ //thus if no change, its utf-8, otherwise its something else.
 78+ if ( $com !== $oldCom ) {
 79+ $oldCom = iconv( 'windows-1252', 'UTF-8//IGNORE', $oldCom );
 80+ }
 81+ $segments["COM"][] = $oldCom;
 82+
6383 } elseif ( $buffer === "\xE1" ) {
6484 // APP1 section (Exif, XMP, and XMP extended)
6585 $temp = self::jpegExtractMarker( $fh );
@@ -67,8 +87,6 @@
6888 if ( substr( $temp, 0, 29 ) === "http://ns.adobe.com/xap/1.0/\x00" ) {
6989 $segments["XMP"] = $temp;
7090 } elseif ( substr( $temp, 0, 35 ) === "http://ns.adobe.com/xmp/extension/\x00" ) {
71 - // fixme - put some limit on this? what if someone
72 - // uploaded a file with 100mb worth of metadata.
7391 $segments["XMP_ext"][] = $temp;
7492 }
7593 } elseif ( $buffer === "\xED" ) {
@@ -288,7 +306,7 @@
289307 $meta->getExif();
290308 $seg = Array();
291309 $seg = $meta->JpegSegmentSplitter();
292 - if ( isset( $seg['COM'] ) ) {
 310+ if ( isset( $seg['COM'] ) && isset( $seg['COM'][0] ) ) {
293311 $meta->addMetadata( Array( 'JPEGFileComment' => $seg['COM'] ), 'file-comment' );
294312 }
295313 if ( isset( $seg['PSIR'] ) ) {
Index: branches/img_metadata/phase3/includes/media/IPTC.php
@@ -25,14 +25,24 @@
2626 return $data;
2727 }
2828
29 - $c = '?';
 29+ $c = '';
3030 //charset info contained in tag 1:90.
3131 if (isset($parsed['1#090']) && isset($parsed['1#090'][0])) {
3232 $c = self::getCharset($parsed['1#090'][0]);
 33+ if ($c === false) {
 34+ //Unknown charset. refuse to parse.
 35+ //note: There is a different between
 36+ //unknown and no charset specified.
 37+ return array();
 38+ }
3339 unset( $parsed['1#090'] );
3440 }
3541
3642 foreach ( $parsed as $tag => $val ) {
 43+ if ( isset( $val[0] ) && trim($val[0]) == '' ) {
 44+ wfDebugLog('iptc', "IPTC tag $tag had only whitespace as its value.");
 45+ continue;
 46+ }
3747 switch( $tag ) {
3848 case '2#120': /*IPTC caption. mapped with exif ImageDescription*/
3949 $data['ImageDescription'] = self::convIPTC( $val, $c );
@@ -265,10 +275,11 @@
266276 }
267277
268278 if ( substr($date, 0, 4) < "1902" ) {
269 - //We run into the reverse y2k38 bug.
270 - //might do something better later...
271 - wfDebugLog( 'iptc', "IPTC: date is too early (reverse y2k38 bug) ( $date )");
272 - return null;
 279+ // We run into the reverse y2k38 bug.
 280+ // Avoid using wfTimestamp as it doesn't work for pre-1902 dates
 281+ return substr( $date, 0, 4 ) . ':'
 282+ . substr( $date, 4, 2 ) . ':'
 283+ . substr( $date, 6, 2 );
273284 }
274285
275286 $unixTS = wfTimestamp( TS_UNIX, $date . substr( $time, 0, 6 ));
@@ -300,10 +311,10 @@
301312 foreach ($data as &$val) {
302313 $val = self::convIPTCHelper( $val, $charset );
303314 }
304 -
305315 } else {
306316 $data = self::convIPTCHelper ( $data, $charset );
307317 }
 318+
308319 return $data;
309320 }
310321 /**
@@ -312,21 +323,21 @@
313324 * @param $charset String: The charset
314325 */
315326 private static function convIPTCHelper ( $data, $charset ) {
316 - if ( $charset !== '?' ) {
 327+ if ( $charset ) {
317328 $data = iconv($charset, "UTF-8//IGNORE", $data);
318329 if ($data === false) {
319330 $data = "";
320 - wfDebug(__METHOD__ . " Error converting iptc data charset $charset to utf-8");
 331+ wfDebugLog('iptc', __METHOD__ . " Error converting iptc data charset $charset to utf-8");
321332 }
322333 } else {
323 - //treat as utf-8 if is valid utf-8. otherwise pretend its iso-8859-1
 334+ //treat as utf-8 if is valid utf-8. otherwise pretend its windows-1252
324335 // most of the time if there is no 1:90 tag, it is either ascii, latin1, or utf-8
325336 $oldData = $data;
326337 UtfNormal::quickIsNFCVerify( $data ); //make $data valid utf-8
327 - if ($data === $oldData) return $data;
328 - else return self::convIPTCHelper ( $oldData, 'ISO-8859-1' ); //should this be windows-1252?
 338+ if ($data === $oldData) return $data; //if validation didn't change $data
 339+ else return self::convIPTCHelper ( $oldData, 'Windows-1252' );
329340 }
330 - return $data;
 341+ return trim( $data );
331342 }
332343
333344 /**
@@ -455,9 +466,9 @@
456467 $c = 'CSN_369103';
457468 break;
458469 default:
459 - wfDebug(__METHOD__ . 'Unkown charset in iptc 1:90: ' . bin2hex( $tag ) );
460 - //at this point should we give up and refuse to parse iptc?
461 - $c = '?';
 470+ wfDebugLog('iptc', __METHOD__ . 'Unkown charset in iptc 1:90: ' . bin2hex( $tag ) );
 471+ //at this point just give up and refuse to parse iptc?
 472+ $c = false;
462473 }
463474 return $c;
464475 }
Index: branches/img_metadata/phase3/languages/messages/MessagesEn.php
@@ -3634,7 +3634,13 @@
36353635 * exposuretime
36363636 * fnumber
36373637 * isospeedratings
3638 -* focallength',
 3638+* focallength
 3639+* artist
 3640+* copyright
 3641+* imagedescription
 3642+* gpslatitude
 3643+* gpslongitude
 3644+* gpsaltitude',
36393645
36403646 # EXIF tags
36413647 'exif-imagewidth' => 'Width',
@@ -3938,8 +3944,8 @@
39393945 'exif-gpslongitude-w' => 'West longitude',
39403946
39413947 # Pseudotags used for GPSAltitude
3942 -'exif-gpsaltitude-above-sealevel' => '$1 meters above sea level',
3943 -'exif-gpsaltitude-below-sealevel' => '$1 meters below sea level',
 3948+'exif-gpsaltitude-above-sealevel' => '$1 {{plural:$1|meters|meter}} above sea level',
 3949+'exif-gpsaltitude-below-sealevel' => '$1 {{plural:$1|meters|meter}} below sea level',
39443950
39453951 'exif-gpsstatus-a' => 'Measurement in progress',
39463952 'exif-gpsstatus-v' => 'Measurement interoperability',
@@ -3961,10 +3967,19 @@
39623968 'exif-gpsdirection-t' => 'True direction',
39633969 'exif-gpsdirection-m' => 'Magnetic direction',
39643970
 3971+'exif-gpsdop-excellent' => 'Excellent ($1)',
 3972+'exif-gpsdop-good' => 'Good ($1)',
 3973+'exif-gpsdop-moderate' => 'Moderate ($1)',
 3974+'exif-gpsdop-fair' => 'Fair ($1)',
 3975+'exif-gpsdop-poor' => 'Poor ($1)',
 3976+
39653977 'exif-objectcycle-a' => 'Morning only',
39663978 'exif-objectcycle-p' => 'Evening only',
39673979 'exif-objectcycle-b' => 'Both morning and evening',
39683980
 3981+'exif-ycbcrpositioning-1' => 'Centered',
 3982+'exif-ycbcrpositioning-2' => 'Co-sited',
 3983+
39693984 # External editor support
39703985 'edit-externally' => 'Edit this file using an external application',
39713986 'edit-externally-help' => '(See the [http://www.mediawiki.org/wiki/Manual:External_editors setup instructions] for more information)',

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r68176some stuff related to iptc charset detectionbawolff17:58, 17 June 2010

Status & tagging log