r70530 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r70529‎ | r70530 | r70531 >
Date:23:34, 5 August 2010
Author:bawolff
Status:deferred
Tags:
Comment:
More XMP stuff
* add stuff for multi-lingual values
* add stuff for validating values
* make the XMPInfo::$items not be public per code review on r70481
* also fix exif handling of negative rationals that I happened to stumble upon
Modified paths:
  • /branches/img_metadata/phase3/includes/AutoLoader.php (modified) (history)
  • /branches/img_metadata/phase3/includes/Exif.php (modified) (history)
  • /branches/img_metadata/phase3/includes/media/XMP.php (modified) (history)
  • /branches/img_metadata/phase3/includes/media/XMPInfo.php (modified) (history)
  • /branches/img_metadata/phase3/includes/media/XMPValidate.php (added) (history)

Diff [purge]

Index: branches/img_metadata/phase3/includes/Exif.php
@@ -649,7 +649,7 @@
650650
651651 private function isSrational( $in ) {
652652 $m = array();
653 - if ( !is_array( $in ) && preg_match( '/^(\d+)\/(\d+[1-9]|[1-9]\d*)$/', $in, $m ) ) { # Avoid division by zero
 653+ if ( !is_array( $in ) && preg_match( '/^(-?\d+)\/(\d+[1-9]|[1-9]\d*)$/', $in, $m ) ) { # Avoid division by zero
654654 return $this->isSlong( $m[0] ) && $this->isSlong( $m[1] );
655655 } else {
656656 $this->debug( $in, __FUNCTION__, 'fed a non-fraction value' );
@@ -1396,7 +1396,7 @@
13971397 $val = htmlspecialchars( $val );
13981398 }
13991399 break;
1400 -
 1400+
14011401 default:
14021402 $val = $this->formatNum( $val );
14031403 break;
@@ -1500,7 +1500,7 @@
15011501 }
15021502 return $wgLang->commaList( $out );
15031503 }
1504 - if ( preg_match( '/^(\d+)\/(\d+)$/', $num, $m ) )
 1504+ if ( preg_match( '/^(-?\d+)\/(\d+)$/', $num, $m ) )
15051505 return $wgLang->formatNum( $m[2] != 0 ? $m[1] / $m[2] : $num );
15061506 else
15071507 return $wgLang->formatNum( $num );
@@ -1516,10 +1516,10 @@
15171517 */
15181518 function formatFraction( $num ) {
15191519 $m = array();
1520 - if ( preg_match( '/^(\d+)\/(\d+)$/', $num, $m ) ) {
 1520+ if ( preg_match( '/^(-?\d+)\/(\d+)$/', $num, $m ) ) {
15211521 $numerator = intval( $m[1] );
15221522 $denominator = intval( $m[2] );
1523 - $gcd = $this->gcd( $numerator, $denominator );
 1523+ $gcd = $this->gcd( abs( $numerator ), $denominator );
15241524 if( $gcd != 0 ) {
15251525 // 0 shouldn't happen! ;)
15261526 return $this->formatNum( $numerator / $gcd ) . '/' . $this->formatNum( $denominator / $gcd );
Index: branches/img_metadata/phase3/includes/media/XMP.php
@@ -20,15 +20,18 @@
2121 */
2222 class XMPReader {
2323
24 - private $curItem = array();
25 - private $ancestorStruct = false;
26 - private $charContent = false;
27 - private $mode = array();
28 - private $results = array();
29 - private $processingArray = false;
 24+ private $curItem = array(); // array to hold the current element (and previous element, and so on)
 25+ private $ancestorStruct = false; // the structure name when processing nested structures.
 26+ private $charContent = false; // temporary holder for character data that appears in xmp doc.
 27+ private $mode = array(); // stores the state the xmpreader is in (see MODE_FOO constants)
 28+ private $results = array(); // array to hold results
 29+ private $processingArray = false; // if we're doing a seq or bag.
 30+ private $itemLang = false; // used for lang alts only
3031
3132 private $xmlParser;
3233
 34+ protected $items;
 35+
3336 /*
3437 * These are various mode constants.
3538 * they are used to figure out what to do
@@ -41,18 +44,20 @@
4245 const MODE_INITIAL = 0;
4346 const MODE_IGNORE = 1;
4447 const MODE_LI = 2;
45 - const MODE_QDESC = 9;
 48+ const MODE_LI_LANG = 3;
 49+ const MODE_QDESC = 4;
4650
4751 // The following MODE constants are also used in the
4852 // $items array to denote what type of property the item is.
49 - const MODE_SIMPLE = 3;
50 - const MODE_STRUCT = 4; // structure (associative array)
51 - const MODE_SEQ = 5; // orderd list
52 - const MODE_BAG = 6; // unordered list
53 - const MODE_LANG = 7; // lang alt. TODO: implement
54 - const MODE_ALT = 8; // non-language alt. Currently unused
 53+ const MODE_SIMPLE = 10;
 54+ const MODE_STRUCT = 11; // structure (associative array)
 55+ const MODE_SEQ = 12; // orderd list
 56+ const MODE_BAG = 13; // unordered list
 57+ const MODE_LANG = 14; // lang alt. TODO: implement
 58+ const MODE_ALT = 15; // non-language alt. Currently not implemented, and not needed atm.
5559
5660 const NS_RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
 61+ const NS_XML = 'http://www.w3.org/XML/1998/namespace';
5762
5863
5964 /** Constructor.
@@ -66,6 +71,8 @@
6772 throw new MWException( 'XMP support requires XML Parser' );
6873 }
6974
 75+ $this->items = XMPInfo::getItems();
 76+
7077 $this->xmlParser = xml_parser_create_ns( 'UTF-8', ' ' );
7178 xml_parser_set_option( $this->xmlParser, XML_OPTION_CASE_FOLDING, 0 );
7279 xml_parser_set_option( $this->xmlParser, XML_OPTION_SKIP_WHITE, 1 );
@@ -228,10 +235,38 @@
229236 ) {
230237 throw new MWException( "nesting mismatch. got a </$elm> but expected a </" . $this->curItem[0] . '>' );
231238 }
 239+
 240+ // Validate structures.
 241+ list( $ns, $tag ) = explode( ' ', $elm, 2 );
 242+ if ( isset( $this->items[$ns][$tag]['validate'] ) ) {
 243+
 244+ $info =& $this->items[$ns][$tag];
 245+ $finalName = isset( $info['map_name'] )
 246+ ? $info['map_name'] : $tag;
 247+
 248+ $validate = is_array( $info['validate'] ) ? $info['validate']
 249+ : array( 'XMPValidate', $info['validate'] );
 250+
 251+ if ( is_callable( $validate ) ) {
 252+ $val =& $this->results['xmp-' . $info['map_group']][$finalName];
 253+ call_user_func_array( $validate, array( $info, &$val, false ) );
 254+ if ( is_null( $val ) ) {
 255+ // the idea being the validation function will unset the variable if
 256+ // its invalid.
 257+ wfDebugLog( 'XMP', __METHOD__ . " <$ns:$tag> failed validation." );
 258+ unset( $this->results['xmp-' . $info['map_group']][$finalName] );
 259+ }
 260+ } else {
 261+ wfDebugLog( 'XMP', __METHOD__ . " Validation function for $finalName ("
 262+ . $validate[0] . '::' . $validate[1] . '()) is not callable.' );
 263+ }
 264+ }
 265+
232266 array_shift( $this->curItem );
233267 array_shift( $this->mode );
234268 $this->ancestorStruct = false;
235269 $this->processingArray = false;
 270+ $this->itemLang = false;
236271 }
237272 /** Hit a closing element in MODE_LI (either rdf:Seq, or rdf:Bag )
238273 * Just resets some private variables
@@ -241,11 +276,25 @@
242277 * @param $elm String namespace . ' ' . element name
243278 */
244279 private function endElementModeLi( $elm ) {
 280+
 281+ list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 );
 282+ $info = $this->items[$ns][$tag];
 283+ $finalName = isset( $info['map_name'] )
 284+ ? $info['map_name'] : $tag;
 285+
245286 if ( $elm === self::NS_RDF . ' Seq' ) {
246 - /* fixme, record _format*/
247287 array_shift( $this->mode );
 288+ $this->results['xmp-' . $info['map_group']][$finalName]['_format'] = 'ol';
248289 } elseif ( $elm === self::NS_RDF . ' Bag' ) {
249290 array_shift( $this->mode );
 291+ $this->results['xmp-' . $info['map_group']][$finalName]['_format'] = 'ul';
 292+ } elseif ( $elm === self::NS_RDF . ' Alt' ) {
 293+ array_shift( $this->mode );
 294+ // extra if needed as you could theoretically have a non-language alt.
 295+ if ( $info['mode'] === self::MODE_LANG ) {
 296+ $this->results['xmp-' . $info['map_group']][$finalName]['_format'] = 'lang';
 297+ }
 298+
250299 } else {
251300 throw new MWException( __METHOD__ . " expected </rdf:seq> or </rdf:bag> but instead got $elm." );
252301 }
@@ -299,6 +348,7 @@
300349 case self::MODE_STRUCT:
301350 case self::MODE_SEQ:
302351 case self::MODE_BAG:
 352+ case self::MODE_LANG:
303353 $this->endElementNested( $elm );
304354 break;
305355 case self::MODE_INITIAL:
@@ -309,6 +359,7 @@
310360 }
311361 break;
312362 case self::MODE_LI:
 363+ case self::MODE_LI_LANG:
313364 $this->endElementModeLi( $elm );
314365 break;
315366 case self::MODE_QDESC:
@@ -361,6 +412,20 @@
362413 }
363414
364415 }
 416+ /* Start element in MODE_LANG (language alternative)
 417+ * this should always be <rdf:Alt>
 418+ *
 419+ * @param $elm String namespace . ' ' . tag
 420+ * @throws MWException if we have an element thats not <rdf:Alt>
 421+ */
 422+ private function startElementModeLang( $elm ) {
 423+ if ( $elm === self::NS_RDF . ' Alt' ) {
 424+ array_unshift( $this->mode, self::MODE_LI_LANG );
 425+ } else {
 426+ throw new MWException( "Expected <rdf:Seq> but got $elm." );
 427+ }
 428+
 429+ }
365430 /** Handle an opening element when in MODE_SIMPLE
366431 * This should not happen often. This is for if a simple element
367432 * already opened has a child element. Could happen for a
@@ -420,13 +485,13 @@
421486 private function startElementModeInitial( $ns, $tag, $attribs ) {
422487 if ( $ns !== self::NS_RDF ) {
423488
424 - if ( isset( XMPInfo::$items[$ns][$tag] ) ) {
425 - $mode = XMPInfo::$items[$ns][$tag]['mode'];
 489+ if ( isset( $this->items[$ns][$tag] ) ) {
 490+ $mode = $this->items[$ns][$tag]['mode'];
426491 array_unshift( $this->mode, $mode );
427492 array_unshift( $this->curItem, $ns . ' ' . $tag );
428493 if ( $mode === self::MODE_STRUCT ) {
429 - $this->ancestorStruct = isset( XMPInfo::$items[$ns][$tag]['map_name'] )
430 - ? XMPInfo::$items[$ns][$tag]['map_name'] : $tag;
 494+ $this->ancestorStruct = isset( $this->items[$ns][$tag]['map_name'] )
 495+ ? $this->items[$ns][$tag]['map_name'] : $tag;
431496 }
432497 if ( $this->charContent !== false ) {
433498 // Something weird.
@@ -453,16 +518,16 @@
454519 private function startElementModeStruct( $ns, $tag, $attribs ) {
455520 if ( $ns !== self::NS_RDF ) {
456521
457 - if ( isset( XMPInfo::$items[$ns][$tag] ) ) {
458 - if ( isset( XMPInfo::$items[$ns][$this->ancestorStruct]['children'] )
459 - && !isset( XMPInfo::$items[$ns][$this->ancestorStruct]['children'][$tag] ) )
 522+ if ( isset( $this->items[$ns][$tag] ) ) {
 523+ if ( isset( $this->items[$ns][$this->ancestorStruct]['children'] )
 524+ && !isset( $this->items[$ns][$this->ancestorStruct]['children'][$tag] ) )
460525 {
461526 // This assumes that we don't have inter-namespace nesting
462527 // which we don't in all the properties we're interested in.
463528 throw new MWException( " <$tag> appeared nested in <" . $this->ancestorStruct
464529 . "> where it is not allowed." );
465530 }
466 - array_unshift( $this->mode, XMPInfo::$items[$ns][$tag]['mode'] );
 531+ array_unshift( $this->mode, $this->items[$ns][$tag]['mode'] );
467532 array_unshift( $this->curItem, $ns . ' ' . $tag );
468533 if ( $this->charContent !== false ) {
469534 // Something weird.
@@ -484,7 +549,7 @@
485550 }
486551 }
487552 /** opening element in MODE_LI
488 - * process elements of array's
 553+ * process elements of arrays
489554 *
490555 * @param $elm String namespace . ' ' . tag
491556 * @throws MWException if gets a tag other than <rdf:li>
@@ -499,7 +564,33 @@
500565 array_unshift( $this->curItem, $this->curItem[0] );
501566 $this->processingArray = true;
502567 }
 568+ /** opening element in MODE_LI_LANG
 569+ * process elements of language alternatives
 570+ *
 571+ * @param $elm String namespace . ' ' . tag
 572+ * @param $attribs array array of elements (most importantly xml:lang)
 573+ * @throws MWException if gets a tag other than <rdf:li> or if no xml:lang
 574+ */
 575+ private function startElementModeLiLang( $elm, $attribs ) {
 576+ if ( $elm !== self::NS_RDF . ' li' ) {
 577+ throw new MWException( __METHOD__ . " <rdf:li> expected but got $elm." );
 578+ }
 579+ if ( !isset( $attribs[ self::NS_XML . ' lang'] )
 580+ || !preg_match( '/^[-A-Za-z0-9]{2,}$/D', $attribs[ self::NS_XML . ' lang' ] ) )
 581+ {
 582+ throw new MWException( __METHOD__
 583+ . " <rdf:li> did not contain, or has invalid xml:lang attribute in lang alternative" );
 584+ }
503585
 586+ $this->itemLang = $attribs[ self::NS_XML . ' lang' ];
 587+
 588+ // need to add curItem[0] on again since one is for the specific item
 589+ // and one is for the entire group.
 590+ array_unshift( $this->curItem, $this->curItem[0] );
 591+ array_unshift( $this->mode, self::MODE_SIMPLE );
 592+ $this->processingArray = true;
 593+ }
 594+
504595 /** Hits an opening element.
505596 * Generally just calls a helper based on what MODE we're in.
506597 * Also does some initial set up for the wrapper element
@@ -550,6 +641,12 @@
551642 case self::MODE_SEQ:
552643 $this->startElementModeSeq( $elm );
553644 break;
 645+ case self::MODE_LANG:
 646+ $this->startElementModeLang( $elm );
 647+ break;
 648+ case self::MODE_LI_LANG:
 649+ $this->startElementModeLiLang( $elm, $attribs );
 650+ break;
554651 case self::MODE_LI:
555652 $this->startElementModeLi( $elm );
556653 break;
@@ -590,7 +687,7 @@
591688 // value attribute is a weird way of just putting the contents.
592689 $this->char( $val );
593690 }
594 - } elseif ( isset( XMPInfo::$items[$ns][$tag] ) ) {
 691+ } elseif ( isset( $this->items[$ns][$tag] ) ) {
595692 if ( $this->mode[0] === self::MODE_SIMPLE ) {
596693 throw new MWException( __METHOD__
597694 . " $ns:$tag found as attribute where not allowed" );
@@ -611,17 +708,37 @@
612709 */
613710 private function saveValue( $ns, $tag, $val ) {
614711
615 - $info =& XMPInfo::$items[$ns][$tag];
 712+ $info =& $this->items[$ns][$tag];
616713 $finalName = isset( $info['map_name'] )
617714 ? $info['map_name'] : $tag;
618715 if ( isset( $info['validate'] ) ) {
619 - // FIXME
 716+ $validate = is_array( $info['validate'] ) ? $info['validate']
 717+ : array( 'XMPValidate', $info['validate'] );
 718+
 719+ if ( is_callable( $validate ) ) {
 720+ call_user_func_array( $validate, array( $info, &$val, true ) );
 721+ // the resoning behind using &$val instead of using the return value
 722+ // is to be consistant between here and validating structures.
 723+ if ( is_null( $val ) ) {
 724+ wfDebugLog( 'XMP', __METHOD__ . " <$ns:$tag> failed validation." );
 725+ return;
 726+ }
 727+ } else {
 728+ wfDebugLog( 'XMP', __METHOD__ . " Validation function for $finalName ("
 729+ . $validate[0] . '::' . $validate[1] . '()) is not callable.' );
 730+ }
620731 }
621732
622733 if ( $this->ancestorStruct ) {
623734 $this->results['xmp-' . $info['map_group']][$this->ancestorStruct][$finalName] = $val;
624735 } elseif ( $this->processingArray ) {
625 - $this->results['xmp-' . $info['map_group']][$finalName][] = $val;
 736+ if ( $this->itemLang === false ) {
 737+ // normal array
 738+ $this->results['xmp-' . $info['map_group']][$finalName][] = $val;
 739+ } else {
 740+ // lang array.
 741+ $this->results['xmp-' . $info['map_group']][$finalName][$this->itemLang] = $val;
 742+ }
626743 } else {
627744 $this->results['xmp-' . $info['map_group']][$finalName] = $val;
628745 }
Index: branches/img_metadata/phase3/includes/media/XMPValidate.php
@@ -0,0 +1,105 @@
 2+<?php
 3+/**
 4+* This contains some static methods for
 5+* validating XMP properties. See XMPInfo and XMPReader classes.
 6+*
 7+* Each of these functions take the stame parameters
 8+* * an info array which is a subset of the XMPInfo::items array
 9+* * A value (passed as reference) to validate. This can be either a
 10+* simple value or an array
 11+* * A boolean to determine if this is validating a simple or complex values
 12+*
 13+* It should be noted that when an array is being validated, typically the validation
 14+* function is called once for each value, and then once at the end for the entire array.
 15+*
 16+* These validation functions can also be used to modify the data. See the gps and flash one's
 17+* for example.
 18+*
 19+* @see http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart1.pdf starting at pg 28
 20+* @see http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart2.pdf starting at pg 11
 21+*/
 22+class XMPValidate {
 23+ /**
 24+ * function to validate boolean properties ( True or False )
 25+ *
 26+ * @param $info Array information about current property
 27+ * @param &$val Mixed current value to validate
 28+ * @param $standalone Boolean if this is a simple property or array
 29+ */
 30+ public static function validateBoolean( $info, &$val, $standalone ) {
 31+ if ( !$standalone ) {
 32+ // this only validates standalone properties, not arrays, etc
 33+ return;
 34+ }
 35+ if ( $val !== 'True' && $val !== 'False' ) {
 36+ wfDebugLog( 'XMP', __METHOD__ . " Expected True or False but got $val" );
 37+ $val = null;
 38+ }
 39+
 40+ }
 41+ /**
 42+ * function to validate rational properties ( 12/10 )
 43+ *
 44+ * @param $info Array information about current property
 45+ * @param &$val Mixed current value to validate
 46+ * @param $standalone Boolean if this is a simple property or array
 47+ */
 48+ public static function validateRational( $info, &$val, $standalone ) {
 49+ if ( !$standalone ) {
 50+ // this only validates standalone properties, not arrays, etc
 51+ return;
 52+ }
 53+ if ( !preg_match( '/^(-?\d+)\/(\d+[1-9]|[1-9]\d*)$/', $val ) ) {
 54+ wfDebugLog( 'XMP', __METHOD__ . " Expected rational but got $val" );
 55+ $val = null;
 56+ }
 57+
 58+ }
 59+ /**
 60+ * function to validate properties with a fixed number of allowed
 61+ * choices. (closed choice)
 62+ *
 63+ * @param $info Array information about current property
 64+ * @param &$val Mixed current value to validate
 65+ * @param $standalone Boolean if this is a simple property or array
 66+ */
 67+ public static function validateClosed( $info, &$val, $standalone ) {
 68+ if ( !$standalone ) {
 69+ // this only validates standalone properties, not arrays, etc
 70+ return;
 71+ }
 72+ if ( !isset( $info['choices'][$val] ) ) {
 73+ wfDebugLog( 'XMP', __METHOD__ . " Expected closed choice, but got $val" );
 74+ $val = null;
 75+ }
 76+ }
 77+ /**
 78+ * function to validate and modify flash structure
 79+ *
 80+ * @param $info Array information about current property
 81+ * @param &$val Mixed current value to validate
 82+ * @param $standalone Boolean if this is a simple property or array
 83+ */
 84+ public static function validateFlash( $info, &$val, $standalone ) {
 85+ if ( $standalone ) {
 86+ // this only validates flash structs, not individual properties
 87+ return;
 88+ }
 89+ if ( !( isset( $val['Fired'] )
 90+ && isset( $val['Function'] )
 91+ && isset( $val['Mode'] )
 92+ && isset( $val['RedEyeMode'] )
 93+ && isset( $val['Return'] )
 94+ ) ) {
 95+ wfDebugLog( 'XMP', __METHOD__ . " Flash structure did not have all the required compoenents" );
 96+ $val = null;
 97+ } else {
 98+ $val = ( "\0" | ( $val['Fired'] === 'True' )
 99+ | ( intval( $val['Return'] ) << 1 )
 100+ | ( intval( $val['Mode'] ) << 3 )
 101+ | ( ( $val['Function'] === 'True' ) << 5 )
 102+ | ( ( $val['RedEyeMode'] === 'True' ) << 6 ) );
 103+ }
 104+ }
 105+
 106+}
Property changes on: branches/img_metadata/phase3/includes/media/XMPValidate.php
___________________________________________________________________
Added: svn:eol-style
1107 + native
Index: branches/img_metadata/phase3/includes/media/XMPInfo.php
@@ -6,6 +6,13 @@
77 */
88 class XMPInfo {
99
 10+ /** get the items array
 11+ * @return Array XMP item configuration array.
 12+ */
 13+ public static function getItems ( ) {
 14+ return self::$items;
 15+ }
 16+
1017 /**
1118 * XMPInfo::$items keeps a list of all the items
1219 * we are interested to extract, as well as
@@ -25,7 +32,7 @@
2633 * currently this just has a bunch of exif values as this class is only half-done
2734 */
2835
29 - static public $items = array(
 36+ static private $items = array(
3037 'http://ns.adobe.com/exif/1.0/' => array(
3138 'ApertureValue' => array(
3239 'map_group' => 'exif',
@@ -143,6 +150,8 @@
144151 'RedEyeMode' => true,
145152 'Return' => true,
146153 ),
 154+ 'validate' => 'validateFlash',
 155+ 'map_group' => 'exif',
147156 ),
148157 'Fired' => array(
149158 'map_group' => 'exif',
@@ -174,10 +183,23 @@
175184 'mode' => XMPReader::MODE_SIMPLE,
176185 ),
177186 /* End Flash */
178 - 'ISOSpeedRatings' => array(
 187+ 'ISOSpeedRatings' => array(
179188 'map_group' => 'exif',
180189 'mode' => XMPReader::MODE_SEQ,
181190 ),
182191 ),
 192+ 'http://purl.org/dc/elements/1.1/' => array(
 193+ 'title' => array(
 194+ 'map_group' => 'general',
 195+ 'map_name' => 'Headline',
 196+ 'mode' => XMPReader::MODE_LANG
 197+ ),
 198+ 'description' => array(
 199+ 'map_group' => 'general',
 200+ 'map_name' => 'ImageDescription',
 201+ 'mode' => XMPReader::MODE_LANG
 202+ ),
 203+
 204+ ),
183205 );
184206 }
Index: branches/img_metadata/phase3/includes/AutoLoader.php
@@ -462,6 +462,7 @@
463463 'TransformParameterError' => 'includes/media/MediaTransformOutput.php',
464464 'XMPReader' => 'includes/media/XMP.php',
465465 'XMPInfo' => 'includes/media/XMPInfo.php',
 466+ 'XMPValidate' => 'includes/media/XMPValidate.php',
466467
467468 # includes/normal
468469 'UtfNormal' => 'includes/normal/UtfNormal.php',

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r70481XMP stuff. Follow up to r70232....bawolff21:38, 4 August 2010

Status & tagging log