r71326 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r71325‎ | r71326 | r71327 >
Date:04:18, 20 August 2010
Author:bawolff
Status:deferred
Tags:
Comment:
Misc. fixes.
*Handle unexpected input in XMP better.
*Make png keywords and xmp language names case-insensitive
*Move some jpeg specific methods to the jpeg subclass.
*Use class constants for value when error extracting metadata (per comment on r71168)
Modified paths:
  • /branches/img_metadata/phase3/includes/media/Bitmap.php (modified) (history)
  • /branches/img_metadata/phase3/includes/media/BitmapMetadataHandler.php (modified) (history)
  • /branches/img_metadata/phase3/includes/media/GIF.php (modified) (history)
  • /branches/img_metadata/phase3/includes/media/Generic.php (modified) (history)
  • /branches/img_metadata/phase3/includes/media/Jpeg.php (modified) (history)
  • /branches/img_metadata/phase3/includes/media/PNG.php (modified) (history)
  • /branches/img_metadata/phase3/includes/media/PNGMetadataExtractor.php (modified) (history)
  • /branches/img_metadata/phase3/includes/media/XMP.php (modified) (history)

Diff [purge]

Index: branches/img_metadata/phase3/includes/media/XMP.php
@@ -321,10 +321,7 @@
322322 * @param $elm String Namespace of element followed by a space and then tag name of element.
323323 */
324324 private function endElementModeIgnore ( $elm ) {
325 - if ( count( $this->curItem ) == 0 ) {
326 - // just to be paranoid.
327 - throw new MWException( ' In ignore mode with no curItem' );
328 - }
 325+
329326 if ( $this->curItem[0] === $elm ) {
330327 array_shift( $this->curItem );
331328 array_shift( $this->mode );
@@ -466,7 +463,8 @@
467464 */
468465 function endElement( $parser, $elm ) {
469466 if ( $elm === ( self::NS_RDF . ' RDF' )
470 - || $elm === 'adobe:ns:meta/ xmpmeta' )
 467+ || $elm === 'adobe:ns:meta/ xmpmeta'
 468+ || $elm === 'adobe:ns:meta/ xapmeta' )
471469 {
472470 // ignore these.
473471 return;
@@ -477,6 +475,23 @@
478476 wfDebugLog( 'XMP', __METHOD__ . ' encoutered <rdf:type>' );
479477 }
480478
 479+ if ( strpos( $elm, ' ' ) === false ) {
 480+ // This probably shouldn't happen.
 481+ wfDebugLog( 'XMP', __METHOD__ . " Encountered </$elm> which has no namespace. Skipping." );
 482+ return;
 483+ }
 484+
 485+ if ( count( $this->mode[0] ) === 0 ) {
 486+ // This should never ever happen.
 487+ throw new MWException( 'Encountered end element with no mode' );
 488+ }
 489+
 490+ if ( count( $this->curItem ) == 0 && $this->mode[0] !== self::MODE_INITIAL ) {
 491+ // just to be paranoid. Should always have a curItem, except for initially
 492+ // (aka during MODE_INITAL).
 493+ throw new MWException( "Hit end element </$elm> but no curItem" );
 494+ }
 495+
481496 switch( $this->mode[0] ) {
482497 case self::MODE_IGNORE:
483498 $this->endElementModeIgnore( $elm );
@@ -721,7 +736,8 @@
722737 . " <rdf:li> did not contain, or has invalid xml:lang attribute in lang alternative" );
723738 }
724739
725 - $this->itemLang = $attribs[ self::NS_XML . ' lang' ];
 740+ // Lang is case-insensitive.
 741+ $this->itemLang = strtolower( $attribs[ self::NS_XML . ' lang' ] );
726742
727743 // need to add curItem[0] on again since one is for the specific item
728744 // and one is for the entire group.
@@ -741,9 +757,10 @@
742758 function startElement( $parser, $elm, $attribs ) {
743759
744760 if ( $elm === self::NS_RDF . ' RDF'
745 - || $elm === 'adobe:ns:meta/ xmpmeta' )
 761+ || $elm === 'adobe:ns:meta/ xmpmeta'
 762+ || $elm === 'adobe:ns:meta/ xapmeta')
746763 {
747 - /* ignore */
 764+ /* ignore. */
748765 return;
749766 } elseif ( $elm === self::NS_RDF . ' Description' ) {
750767 if ( count( $this->mode ) === 0 ) {
@@ -761,9 +778,20 @@
762779 wfDebugLog( 'XMP', __METHOD__ . ' Encoutered <rdf:type> which isn\'t currently supported' );
763780 }
764781
 782+ if ( strpos( $elm, ' ' ) === false ) {
 783+ // This probably shouldn't happen.
 784+ wfDebugLog( 'XMP', __METHOD__ . " Encountered <$elm> which has no namespace. Skipping." );
 785+ return;
 786+ }
765787
766788 list( $ns, $tag ) = explode( ' ', $elm, 2 );
767789
 790+ if ( count( $this->mode ) === 0 ) {
 791+ // This should not happen.
 792+ throw new MWException('Error extracting XMP, '
 793+ . "encountered <$elm> with no mode" );
 794+ }
 795+
768796 switch( $this->mode[0] ) {
769797 case self::MODE_IGNORE:
770798 $this->startElementModeIgnore( $elm );
@@ -822,6 +850,13 @@
823851 $this->mode[0] = self::MODE_QDESC;
824852 }
825853
 854+ if ( strpos( $name, ' ' ) === false ) {
 855+ // This shouldn't happen, but so far some old software forgets namespace
 856+ // on rdf:about.
 857+ wfDebugLog( 'XMP', __METHOD__ . ' Encoutered non-namespaced attribute: '
 858+ . " $name=\"$val\". Skipping. " );
 859+ continue;
 860+ }
826861 list( $ns, $tag ) = explode( ' ', $name, 2 );
827862 if ( $ns === self::NS_RDF ) {
828863 if ( $tag === 'value' || $tag === 'resource' ) {
Index: branches/img_metadata/phase3/includes/media/GIF.php
@@ -10,6 +10,8 @@
1111 * @ingroup Media
1212 */
1313 class GIFHandler extends BitmapHandler {
 14+
 15+ const BROKEN_FILE = '0'; // value to store in img_metadata if error extracting metadata.
1416
1517 function getMetadata( $image, $filename ) {
1618 try {
@@ -17,7 +19,7 @@
1820 } catch( Exception $e ) {
1921 // Broken file?
2022 wfDebug( __METHOD__ . ': ' . $e->getMessage() . "\n" );
21 - return '0';
 23+ return self::BROKEN_FILE;
2224 }
2325
2426 return serialize($parsedGIFMetadata);
@@ -64,7 +66,7 @@
6567 }
6668
6769 function isMetadataValid( $image, $metadata ) {
68 - if ( $metadata === '0' ) {
 70+ if ( $metadata === self::BROKEN_FILE ) {
6971 // Do not repetitivly regenerate metadata on broken file.
7072 return self::METADATA_GOOD;
7173 }
Index: branches/img_metadata/phase3/includes/media/BitmapMetadataHandler.php
@@ -10,7 +10,6 @@
1111 */
1212 class BitmapMetadataHandler {
1313
14 - private $filename;
1514 private $metadata = Array();
1615 private $metaPriority = Array(
1716 20 => Array( 'other' ),
@@ -44,9 +43,9 @@
4544 * Basically what used to be in BitmapHandler::getMetadata().
4645 * Just calls stuff in the Exif class.
4746 */
48 - function getExif () {
49 - if ( file_exists( $this->filename ) ) {
50 - $exif = new Exif( $this->filename );
 47+ function getExif ( $filename ) {
 48+ if ( file_exists( $filename ) ) {
 49+ $exif = new Exif( $filename );
5150 $data = $exif->getFilteredData();
5251 if ( $data ) {
5352 $this->addMetadata( $data, 'exif' );
@@ -105,15 +104,6 @@
106105 return $temp;
107106 }
108107
109 - /** constructor.
110 - * This generally shouldn't be called directly
111 - * instead one of the static methods should be used
112 - *
113 - * @param string $file - full path to file
114 - */
115 - function __construct ( $file ) {
116 - $this->filename = $file;
117 - }
118108 /** Main entry point for jpeg's.
119109 *
120110 * @param string $file filename (with full path)
@@ -122,8 +112,8 @@
123113 */
124114 static function Jpeg ( $filename ) {
125115 $showXMP = function_exists( 'xml_parser_create_ns' );
126 - $meta = new self( $filename );
127 - $meta->getExif();
 116+ $meta = new self();
 117+ $meta->getExif( $filename );
128118 $seg = Array();
129119 $seg = JpegMetadataExtractor::segmentSplitter( $filename );
130120 if ( isset( $seg['COM'] ) && isset( $seg['COM'][0] ) ) {
@@ -160,7 +150,7 @@
161151 static public function PNG ( $filename ) {
162152 $showXMP = function_exists( 'xml_parser_create_ns' );
163153
164 - $meta = new self( $filename );
 154+ $meta = new self();
165155 $array = PNGMetadataExtractor::getMetadata( $filename );
166156 if ( isset( $array['text']['xmp']['x-default'] ) && $array['text']['xmp']['x-default'] !== '' && $showXMP ) {
167157 $xmp = new XMPReader();
@@ -188,7 +178,7 @@
189179 */
190180 static public function GIF ( $filename ) {
191181
192 - $meta = new self( $filename );
 182+ $meta = new self();
193183 $baseArray = GIFMetadataExtractor::getMetadata( $filename );
194184
195185 if ( count( $baseArray['comment'] ) > 0 ) {
Index: branches/img_metadata/phase3/includes/media/Bitmap.php
@@ -373,80 +373,4 @@
374374 return 'exif';
375375 }
376376
377 - /**
378 - * In practise this is only used by jpegs...
379 - *
380 - * It should perhaps be moved to Jpeg.php.
381 - */
382 - function isMetadataValid( $image, $metadata ) {
383 - global $wgShowEXIF;
384 - if ( !$wgShowEXIF ) {
385 - # Metadata disabled and so an empty field is expected
386 - return self::METADATA_GOOD;
387 - }
388 - if ( $metadata === '0' ) {
389 - # Old special value indicating that there is no EXIF data in the file.
390 - # or that there was an error well extracting the metadata.
391 - wfDebug( __METHOD__ . ": back-compat version\n");
392 - return self::METADATA_COMPATIBLE;
393 - }
394 - if ( $metadata === '-1' ) {
395 - return self::METADATA_GOOD;
396 - }
397 - wfSuppressWarnings();
398 - $exif = unserialize( $metadata );
399 - wfRestoreWarnings();
400 - if ( !isset( $exif['MEDIAWIKI_EXIF_VERSION'] ) ||
401 - $exif['MEDIAWIKI_EXIF_VERSION'] != Exif::version() )
402 - {
403 - if ( isset( $exif['MEDIAWIKI_EXIF_VERSION'] ) &&
404 - $exif['MEDIAWIKI_EXIF_VERSION'] == 1 )
405 - {
406 - //back-compatible but old
407 - wfDebug( __METHOD__.": back-compat version\n" );
408 - return self::METADATA_COMPATIBLE;
409 - }
410 - # Wrong (non-compatible) version
411 - wfDebug( __METHOD__.": wrong version\n" );
412 - return self::METADATA_BAD;
413 - }
414 - return self::METADATA_GOOD;
415 - }
416 -
417 - /**
418 - * Get a list of EXIF metadata items which should be displayed when
419 - * the metadata table is collapsed.
420 - *
421 - * @return array of strings
422 - * @access private
423 - */
424 - function visibleMetadataFields() {
425 - $fields = array();
426 - $lines = explode( "\n", wfMsgForContent( 'metadata-fields' ) );
427 - foreach( $lines as $line ) {
428 - $matches = array();
429 - if( preg_match( '/^\\*\s*(.*?)\s*$/', $line, $matches ) ) {
430 - $fields[] = $matches[1];
431 - }
432 - }
433 - $fields = array_map( 'strtolower', $fields );
434 - return $fields;
435 - }
436 -
437 - function formatMetadata( $image ) {
438 - $metadata = $image->getMetadata();
439 - if ( !$metadata || $metadata == '-1' ) {
440 - return false;
441 - }
442 - $exif = unserialize( $metadata );
443 - if ( !$exif ) {
444 - return false;
445 - }
446 - unset( $exif['MEDIAWIKI_EXIF_VERSION'] );
447 - if ( count( $exif ) == 0 ) {
448 - return false;
449 - }
450 - return $this->formatMetadataHelper( $exif );
451 - }
452 -
453377 }
Index: branches/img_metadata/phase3/includes/media/Generic.php
@@ -297,6 +297,27 @@
298298 }
299299
300300 /**
 301+ * Get a list of metadata items which should be displayed when
 302+ * the metadata table is collapsed.
 303+ *
 304+ * @return array of strings
 305+ * @access private
 306+ */
 307+ function visibleMetadataFields() {
 308+ $fields = array();
 309+ $lines = explode( "\n", wfMsgForContent( 'metadata-fields' ) );
 310+ foreach( $lines as $line ) {
 311+ $matches = array();
 312+ if( preg_match( '/^\\*\s*(.*?)\s*$/', $line, $matches ) ) {
 313+ $fields[] = $matches[1];
 314+ }
 315+ }
 316+ $fields = array_map( 'strtolower', $fields );
 317+ return $fields;
 318+ }
 319+
 320+
 321+ /**
301322 * @todo Fixme: document this!
302323 * 'value' thingy goes into a wikitext table; it used to be escaped but
303324 * that was incompatible with previous practice of customized display
Index: branches/img_metadata/phase3/includes/media/PNGMetadataExtractor.php
@@ -20,26 +20,26 @@
2121 * and http://www.w3.org/TR/PNG/#11keywords
2222 */
2323 self::$text_chunks = array(
24 - 'XML:com.adobe.xmp' => 'xmp',
 24+ 'xml:com.adobe.xmp' => 'xmp',
2525 # Artist is unofficial. Author is the recommended
2626 # keyword in the PNG spec. However some people output
2727 # Artist so support both.
28 - 'Artist' => 'Artist',
29 - 'Model' => 'Model',
30 - 'Make' => 'Make',
31 - 'Author' => 'Artist',
32 - 'Comment' => 'PNGFileComment',
33 - 'Description' => 'ImageDescription',
34 - 'Title' => 'ObjectName',
35 - 'Copyright' => 'Copyright',
 28+ 'artist' => 'Artist',
 29+ 'model' => 'Model',
 30+ 'make' => 'Make',
 31+ 'author' => 'Artist',
 32+ 'comment' => 'PNGFileComment',
 33+ 'description' => 'ImageDescription',
 34+ 'title' => 'ObjectName',
 35+ 'copyright' => 'Copyright',
3636 # Source as in original device used to make image
3737 # not as in who gave you the image
38 - 'Source' => 'Model',
39 - 'Software' => 'Software',
40 - 'Disclaimer' => 'Disclaimer',
41 - 'Warning' => 'ContentWarning',
42 - 'URL' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement.
43 - 'Label' => 'Label',
 38+ 'source' => 'Model',
 39+ 'software' => 'Software',
 40+ 'disclaimer' => 'Disclaimer',
 41+ 'warning' => 'ContentWarning',
 42+ 'url' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement.
 43+ 'label' => 'Label',
4444 /* Other potentially useful things - Creation Time, Document */
4545 );
4646
@@ -106,6 +106,8 @@
107107 * $items[5] = content
108108 */
109109
 110+ // Theoretically should be case-sensitive, but in practise...
 111+ $items[1] = strtolower( $items[1] );
110112 if ( !isset( self::$text_chunks[$items[1]] ) ) {
111113 // Only extract textual chunks on our list.
112114 fseek( $fh, self::$CRC_size, SEEK_CUR );
@@ -158,6 +160,9 @@
159161 throw new Exception( __METHOD__ . ": Read error on tEXt chunk" );
160162 return;
161163 }
 164+
 165+ // Theoretically should be case-sensitive, but in practise...
 166+ $keyword = strtolower( $keyword );
162167 if ( !isset( self::$text_chunks[ $keyword ] ) ) {
163168 // Don't recognize chunk, so skip.
164169 fseek( $fh, self::$CRC_size, SEEK_CUR );
@@ -187,6 +192,9 @@
188193 throw new Exception( __METHOD__ . ": Read error on zTXt chunk" );
189194 return;
190195 }
 196+ // Theoretically should be case-sensitive, but in practise...
 197+ $keyword = strtolower( $keyword );
 198+
191199 if ( !isset( self::$text_chunks[ $keyword ] ) ) {
192200 // Don't recognize chunk, so skip.
193201 fseek( $fh, self::$CRC_size, SEEK_CUR );
Index: branches/img_metadata/phase3/includes/media/PNG.php
@@ -10,6 +10,8 @@
1111 * @ingroup Media
1212 */
1313 class PNGHandler extends BitmapHandler {
 14+
 15+ const BROKEN_FILE = '0';
1416
1517 function getMetadata( $image, $filename ) {
1618 try {
@@ -17,7 +19,7 @@
1820 } catch( Exception $e ) {
1921 // Broken file?
2022 wfDebug( __METHOD__ . ': ' . $e->getMessage() . "\n" );
21 - return '0';
 23+ return self::BROKEN_FILE;
2224 }
2325
2426 return serialize($metadata);
@@ -55,7 +57,7 @@
5658
5759 function isMetadataValid( $image, $metadata ) {
5860
59 - if ( $metadata === '0' ) {
 61+ if ( $metadata === self::BROKEN_FILE ) {
6062 // Do not repetitivly regenerate metadata on broken file.
6163 return self::METADATA_GOOD;
6264 }
Index: branches/img_metadata/phase3/includes/media/Jpeg.php
@@ -10,6 +10,9 @@
1111 */
1212 class JpegHandler extends BitmapHandler {
1313
 14+ const BROKEN_FILE = '-1'; // error extracting metadata
 15+ const OLD_BROKEN_FILE = '0'; // outdated error extracting metadata.
 16+
1417 function getMetadata ( $image, $filename ) {
1518 try {
1619 $meta = BitmapMetadataHandler::Jpeg( $filename );
@@ -24,7 +27,7 @@
2528 // BitmapMetadataHandler throws an exception in certain exceptional cases like if file does not exist.
2629 wfDebug( __METHOD__ . ': ' . $e->getMessage() . "\n" );
2730
28 - /* This used to use 0 for the cases
 31+ /* This used to use 0 (self::OLD_BROKEN_FILE) for the cases
2932 * * No metadata in the file
3033 * * Something is broken in the file.
3134 * However, if the metadata support gets expanded then you can't tell if the 0 is from
@@ -33,7 +36,7 @@
3437 * Thus switch to using -1 to denote only a broken file, and use an array with only
3538 * MEDIAWIKI_EXIF_VERSION to denote no props.
3639 */
37 - return '-1';
 40+ return self::BROKEN_FILE;
3841 }
3942 }
4043
@@ -72,4 +75,57 @@
7376 $metadata['MEDIAWIKI_EXIF_VERSION'] = 1;
7477 return $metadata;
7578 }
 79+
 80+ function isMetadataValid( $image, $metadata ) {
 81+ global $wgShowEXIF;
 82+ if ( !$wgShowEXIF ) {
 83+ # Metadata disabled and so an empty field is expected
 84+ return self::METADATA_GOOD;
 85+ }
 86+ if ( $metadata === self::OLD_BROKEN_FILE ) {
 87+ # Old special value indicating that there is no EXIF data in the file.
 88+ # or that there was an error well extracting the metadata.
 89+ wfDebug( __METHOD__ . ": back-compat version\n");
 90+ return self::METADATA_COMPATIBLE;
 91+ }
 92+ if ( $metadata === self::BROKEN_FILE ) {
 93+ return self::METADATA_GOOD;
 94+ }
 95+ wfSuppressWarnings();
 96+ $exif = unserialize( $metadata );
 97+ wfRestoreWarnings();
 98+ if ( !isset( $exif['MEDIAWIKI_EXIF_VERSION'] ) ||
 99+ $exif['MEDIAWIKI_EXIF_VERSION'] != Exif::version() )
 100+ {
 101+ if ( isset( $exif['MEDIAWIKI_EXIF_VERSION'] ) &&
 102+ $exif['MEDIAWIKI_EXIF_VERSION'] == 1 )
 103+ {
 104+ //back-compatible but old
 105+ wfDebug( __METHOD__.": back-compat version\n" );
 106+ return self::METADATA_COMPATIBLE;
 107+ }
 108+ # Wrong (non-compatible) version
 109+ wfDebug( __METHOD__.": wrong version\n" );
 110+ return self::METADATA_BAD;
 111+ }
 112+ return self::METADATA_GOOD;
 113+ }
 114+
 115+ function formatMetadata( $image ) {
 116+ $metadata = $image->getMetadata();
 117+ if ( !$metadata || $metadata == self::BROKEN_FILE ) {
 118+ return false;
 119+ }
 120+ $exif = unserialize( $metadata );
 121+ if ( !$exif ) {
 122+ return false;
 123+ }
 124+ unset( $exif['MEDIAWIKI_EXIF_VERSION'] );
 125+ if ( count( $exif ) == 0 ) {
 126+ return false;
 127+ }
 128+ return $this->formatMetadataHelper( $exif );
 129+ }
 130+
 131+
76132 }

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r71168Add support for extracting XMP and file comments from GIF images....bawolff20:58, 16 August 2010

Status & tagging log