Index: branches/img_metadata/phase3/includes/media/XMP.php |
— | — | @@ -0,0 +1,734 @@ |
| 2 | +<?php |
| 3 | +/** Class for reading xmp data containing properties relevent to |
| 4 | +* images, and spitting out an array that FormatExif accepts. |
| 5 | +* |
| 6 | +* It should be noted this is not done yet |
| 7 | +* |
| 8 | +* Note, this is not meant to recognize every possible thing you can |
| 9 | +* encode in XMP. It should recognize all the properties we want. |
| 10 | +* For example it doesn't have support for structures with multiple |
| 11 | +* nesting levels, as none of the properties we're supporting use that |
| 12 | +* feature. If it comes across properties it doesn't recognize, it should |
| 13 | +* ignore them. |
| 14 | +* |
| 15 | +* The main methods one would call in this class are |
| 16 | +* - parse( $content ) |
| 17 | +* Reads in xmp content. |
| 18 | +* - getResults |
| 19 | +* Outputs a results array. |
| 20 | +* |
| 21 | +*/ |
| 22 | +class XMPReader { |
| 23 | + |
| 24 | + private $curItem = array(); |
| 25 | + private $ancestorStruct = false; |
| 26 | + private $charContent = false; |
| 27 | + private $mode = array(); |
| 28 | + private $results = array(); |
| 29 | + private $processingArray = false; |
| 30 | + |
| 31 | + private $xmlParser; |
| 32 | + |
| 33 | + protected $items; // Contains an array of all properties we try to extract. |
| 34 | + |
| 35 | + /* |
| 36 | + * These are various mode constants. |
| 37 | + * they are used to figure out what to do |
| 38 | + * with an element when its encoutered. |
| 39 | + * |
| 40 | + * For example, MODE_IGNORE is used when processing |
| 41 | + * a property we're not interested in. So if a new |
| 42 | + * element pops up when we're in that mode, we ignore it. |
| 43 | + */ |
| 44 | + const MODE_INITIAL = 0; |
| 45 | + const MODE_IGNORE = 1; |
| 46 | + const MODE_LI = 2; |
| 47 | + |
| 48 | + // The following MODE constants are also used in the |
| 49 | + // $items array to denote what type of property the item is. |
| 50 | + const MODE_SIMPLE = 3; |
| 51 | + const MODE_STRUCT = 4; // structure (associative array) |
| 52 | + const MODE_SEQ = 5; // orderd list |
| 53 | + const MODE_BAG = 6; // unordered list |
| 54 | + const MODE_LANG = 7; // lang alt. TODO: implement |
| 55 | + const MODE_ALT = 8; // non-language alt. Currently unused |
| 56 | + |
| 57 | + const NS_RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'; |
| 58 | + |
| 59 | + |
| 60 | + /** Constructor. |
| 61 | + * |
| 62 | + * Primary job is to intialize the items array |
| 63 | + * which is used to determine which props to extract. |
| 64 | + */ |
| 65 | + function __construct() { |
| 66 | + |
| 67 | + /* |
| 68 | + * $this->items keeps a list of all the items |
| 69 | + * we are interested to extract, as well as |
| 70 | + * information about the item like what type |
| 71 | + * it is. |
| 72 | + * |
| 73 | + * Format is an array of namespaces, |
| 74 | + * each containing an array of tags |
| 75 | + * each tag is an array of information about the |
| 76 | + * tag, including: |
| 77 | + * * map_group - what group (used for precedence during conflicts) |
| 78 | + * * mode - What type of item (self::MODE_SIMPLE usually, see above for all values) |
| 79 | + * * validate - method to validate input. Could also post-process the input. (TODO: implement this) |
| 80 | + * * choices - array of potential values (format of 'value' => true ) |
| 81 | + * * children - for MODE_STRUCT items, allowed children. |
| 82 | + * |
| 83 | + * currently this just has a bunch of exif values as this class is only half-done |
| 84 | + */ |
| 85 | + |
| 86 | + $this->items = array( |
| 87 | + 'http://ns.adobe.com/exif/1.0/' => array( |
| 88 | + 'ApertureValue' => array( |
| 89 | + 'map_group' => 'exif', |
| 90 | + 'mode' => self::MODE_SIMPLE, |
| 91 | + 'validate' => 'validateRational' |
| 92 | + ), |
| 93 | + 'BrightnessValue' => array( |
| 94 | + 'map_group' => 'exif', |
| 95 | + 'mode' => self::MODE_SIMPLE, |
| 96 | + 'validate' => 'validateRational' |
| 97 | + ), |
| 98 | + 'CompressedBitsPerPixel' => array( |
| 99 | + 'map_group' => 'exif', |
| 100 | + 'mode' => self::MODE_SIMPLE, |
| 101 | + 'validate' => 'validateRational' |
| 102 | + ), |
| 103 | + 'DigitalZoomRatio' => array( |
| 104 | + 'map_group' => 'exif', |
| 105 | + 'mode' => self::MODE_SIMPLE, |
| 106 | + 'validate' => 'validateRational' |
| 107 | + ), |
| 108 | + 'ExposureBiasValue' => array( |
| 109 | + 'map_group' => 'exif', |
| 110 | + 'mode' => self::MODE_SIMPLE, |
| 111 | + 'validate' => 'validateRational' |
| 112 | + ), |
| 113 | + 'ExposureIndex' => array( |
| 114 | + 'map_group' => 'exif', |
| 115 | + 'mode' => self::MODE_SIMPLE, |
| 116 | + 'validate' => 'validateRational' |
| 117 | + ), |
| 118 | + 'ExposureTime' => array( |
| 119 | + 'map_group' => 'exif', |
| 120 | + 'mode' => self::MODE_SIMPLE, |
| 121 | + 'validate' => 'validateRational' |
| 122 | + ), |
| 123 | + 'FlashEnergy' => array( |
| 124 | + 'map_group' => 'exif', |
| 125 | + 'mode' => self::MODE_SIMPLE, |
| 126 | + 'validate' => 'validateRational' |
| 127 | + ), |
| 128 | + 'FNumber' => array( |
| 129 | + 'map_group' => 'exif', |
| 130 | + 'mode' => self::MODE_SIMPLE, |
| 131 | + 'validate' => 'validateRational' |
| 132 | + ), |
| 133 | + 'FocalLength' => array( |
| 134 | + 'map_group' => 'exif', |
| 135 | + 'mode' => self::MODE_SIMPLE, |
| 136 | + 'validate' => 'validateRational' |
| 137 | + ), |
| 138 | + 'FocalPlaneXResolution' => array( |
| 139 | + 'map_group' => 'exif', |
| 140 | + 'mode' => self::MODE_SIMPLE, |
| 141 | + 'validate' => 'validateRational' |
| 142 | + ), |
| 143 | + 'FocalPlaneYResolution' => array( |
| 144 | + 'map_group' => 'exif', |
| 145 | + 'mode' => self::MODE_SIMPLE, |
| 146 | + 'validate' => 'validateRational' |
| 147 | + ), |
| 148 | + /* FIXME GPSAltitude */ |
| 149 | + 'GPSDestBearing' => array( |
| 150 | + 'map_group' => 'exif', |
| 151 | + 'mode' => self::MODE_SIMPLE, |
| 152 | + 'validate' => 'validateRational' |
| 153 | + ), |
| 154 | + 'GPSDestDistance' => array( |
| 155 | + 'map_group' => 'exif', |
| 156 | + 'mode' => self::MODE_SIMPLE, |
| 157 | + 'validate' => 'validateRational' |
| 158 | + ), |
| 159 | + 'GPSDOP' => array( |
| 160 | + 'map_group' => 'exif', |
| 161 | + 'mode' => self::MODE_SIMPLE, |
| 162 | + 'validate' => 'validateRational' |
| 163 | + ), |
| 164 | + 'GPSImgDirection' => array( |
| 165 | + 'map_group' => 'exif', |
| 166 | + 'mode' => self::MODE_SIMPLE, |
| 167 | + 'validate' => 'validateRational' |
| 168 | + ), |
| 169 | + 'GPSSpeed' => array( |
| 170 | + 'map_group' => 'exif', |
| 171 | + 'mode' => self::MODE_SIMPLE, |
| 172 | + 'validate' => 'validateRational' |
| 173 | + ), |
| 174 | + 'GPSTrack' => array( |
| 175 | + 'map_group' => 'exif', |
| 176 | + 'mode' => self::MODE_SIMPLE, |
| 177 | + 'validate' => 'validateRational' |
| 178 | + ), |
| 179 | + 'MaxApertureValue' => array( |
| 180 | + 'map_group' => 'exif', |
| 181 | + 'mode' => self::MODE_SIMPLE, |
| 182 | + 'validate' => 'validateRational' |
| 183 | + ), |
| 184 | + 'ShutterSpeedValue' => array( |
| 185 | + 'map_group' => 'exif', |
| 186 | + 'mode' => self::MODE_SIMPLE, |
| 187 | + 'validate' => 'validateRational' |
| 188 | + ), |
| 189 | + 'SubjectDistance' => array( |
| 190 | + 'map_group' => 'exif', |
| 191 | + 'mode' => self::MODE_SIMPLE, |
| 192 | + 'validate' => 'validateRational' |
| 193 | + ), |
| 194 | + |
| 195 | + /* Flash */ |
| 196 | + 'Flash' => array( |
| 197 | + 'mode' => self::MODE_STRUCT, |
| 198 | + 'children' => array( |
| 199 | + 'Fired' => true, |
| 200 | + 'Function' => true, |
| 201 | + 'Mode' => true, |
| 202 | + 'RedEyeMode' => true, |
| 203 | + 'Return' => true, |
| 204 | + ), |
| 205 | + ), |
| 206 | + 'Fired' => array( |
| 207 | + 'map_group' => 'exif', |
| 208 | + 'validate' => 'validateBoolean', |
| 209 | + 'mode' => self::MODE_SIMPLE |
| 210 | + ), |
| 211 | + 'Function' => array( |
| 212 | + 'map_group' => 'exif', |
| 213 | + 'validate' => 'validateBoolean', |
| 214 | + 'mode' => self::MODE_SIMPLE, |
| 215 | + ), |
| 216 | + 'Mode' => array( |
| 217 | + 'map_group' => 'exif', |
| 218 | + 'validate' => 'validateClosed', |
| 219 | + 'mode' => self::MODE_SIMPLE, |
| 220 | + 'choices' => array( '0' => true, '1' => true, |
| 221 | + '2' => true, '3' => true ), |
| 222 | + ), |
| 223 | + 'Return' => array( |
| 224 | + 'map_group' => 'exif', |
| 225 | + 'validate' => 'validateClosed', |
| 226 | + 'mode' => self::MODE_SIMPLE, |
| 227 | + 'choices' => array( '0' => true, |
| 228 | + '2' => true, '3' => true ), |
| 229 | + ), |
| 230 | + 'RedEyeMode' => array( |
| 231 | + 'map_group' => 'exif', |
| 232 | + 'validate' => 'validateBoolean', |
| 233 | + 'mode' => self::MODE_SIMPLE, |
| 234 | + ), |
| 235 | + /* End Flash */ |
| 236 | + 'ISOSpeedRatings' => array( |
| 237 | + 'map_group' => 'exif', |
| 238 | + 'mode' => self::MODE_SEQ, |
| 239 | + ), |
| 240 | + ), |
| 241 | + ); |
| 242 | + |
| 243 | + if ( !function_exists('xml_parser_create_ns') ) { |
| 244 | + // this should already be checked by this point |
| 245 | + throw new MWException('XMP support requires XML Parser'); |
| 246 | + } |
| 247 | + |
| 248 | + $this->xmlParser = xml_parser_create_ns( 'UTF-8', ' ' ); |
| 249 | + xml_parser_set_option( $this->xmlParser, XML_OPTION_CASE_FOLDING, 0 ); |
| 250 | + xml_parser_set_option( $this->xmlParser, XML_OPTION_SKIP_WHITE, 1 ); |
| 251 | + |
| 252 | + xml_set_element_handler( $this->xmlParser, |
| 253 | + array( $this, 'startElement' ), |
| 254 | + array( $this, 'endElement' ) ); |
| 255 | + |
| 256 | + xml_set_character_data_handler( $this->xmlParser, array( $this, 'char' ) ); |
| 257 | + } |
| 258 | + |
| 259 | + /** Destroy the xml parser |
| 260 | + * |
| 261 | + * not sure if this is actualy needed. |
| 262 | + */ |
| 263 | + function __destruct() { |
| 264 | + // not sure if this is needed. |
| 265 | + xml_parser_free( $this->xmlParser ); |
| 266 | + } |
| 267 | + |
| 268 | + /** Get the result array |
| 269 | + * @return Array array of results as an array of arrays suitable for |
| 270 | + * FormatExif. |
| 271 | + */ |
| 272 | + public function getResults() { |
| 273 | + return $this->results; |
| 274 | + } |
| 275 | + |
| 276 | + /** |
| 277 | + * Main function to call to parse XMP. Use getResults to |
| 278 | + * get results. |
| 279 | + * |
| 280 | + * Also catches any errors during processing, writes them to |
| 281 | + * debug log, blanks result array and returns false. |
| 282 | + * |
| 283 | + * @param String: $content XMP data |
| 284 | + * @return Boolean success. |
| 285 | + * @todo charset detection (usually UTF-8, but UTF-16 or 32 is allowed). |
| 286 | + */ |
| 287 | + public function parse( $content ) { |
| 288 | + try { |
| 289 | + $ok = xml_parse( $this->xmlParser, $content, true ); |
| 290 | + if (!$ok) { |
| 291 | + $error = xml_error_string( xml_get_error_code( $this->xmlParser ) ); |
| 292 | + $where = 'line: ' . xml_get_current_line_number( $this->xmlParser ) |
| 293 | + . ' column: ' . xml_get_current_column_number( $this->xmlParser ) |
| 294 | + . ' byte offset: ' . xml_get_current_byte_index( $this->xmlParser ); |
| 295 | + |
| 296 | + wfDebugLog( 'XMP', "XMPReader::parse : Error reading XMP content: $error ($where)"); |
| 297 | + $this->results = array(); //blank if error. |
| 298 | + return false; |
| 299 | + } |
| 300 | + } catch (MWException $e) { |
| 301 | + wfDebugLog( 'XMP', 'XMP parse error: ' . $e ); |
| 302 | + $this->results = array(); |
| 303 | + return false; |
| 304 | + } |
| 305 | + return true; |
| 306 | + } |
| 307 | + |
| 308 | + /** Character data handler |
| 309 | + * Called whenever character data is found in the xmp document. |
| 310 | + * |
| 311 | + * does nothing if we're in MODE_IGNORE or if the data is whitespace |
| 312 | + * throws an error if we're not in MODE_SIMPLE (as we're not allowed to have character |
| 313 | + * data in the other modes). |
| 314 | + * |
| 315 | + * @param $parser XMLParser reference to the xml parser |
| 316 | + * @param $data String Character data |
| 317 | + * @throws MWException on invalid data |
| 318 | + */ |
| 319 | + function char( $parser, $data ) { |
| 320 | + |
| 321 | + $data = trim( $data ); |
| 322 | + if ( trim($data) === "" ) { |
| 323 | + return; |
| 324 | + } |
| 325 | + |
| 326 | + if ( !isset( $this->mode[0] ) ) { |
| 327 | + throw new MWException('Unexpected character data before first rdf:Description element'); |
| 328 | + } |
| 329 | + |
| 330 | + if ( $this->mode[0] === self::MODE_IGNORE ) return; |
| 331 | + |
| 332 | + if ( $this->mode[0] !== self::MODE_SIMPLE ) { |
| 333 | + throw new MWException('character data where not expected. (mode ' . $this->mode[0] . ')'); |
| 334 | + } |
| 335 | + |
| 336 | + //to check, how does this handle w.s. |
| 337 | + if ( $this->charContent === false ) { |
| 338 | + $this->charContent = $data; |
| 339 | + } else { |
| 340 | + // I don't think this should happen, |
| 341 | + // but just in case. |
| 342 | + $this->charContent .= $data; |
| 343 | + //FIXME |
| 344 | + wfDebugLog( 'XMP', 'XMP: Consecuitive CDATA'); |
| 345 | + } |
| 346 | + |
| 347 | + } |
| 348 | + /** When we hit a closing element in MODE_IGNORE |
| 349 | + * Check to see if this is the element we started to ignore, |
| 350 | + * in which case we get out of MODE_IGNORE |
| 351 | + * |
| 352 | + * @param $elm String Namespace of element followed by a space and then tag name of element. |
| 353 | + */ |
| 354 | + private function endElementModeIgnore ( $elm ) { |
| 355 | + if ( count( $this->curItem ) == 0 ) { |
| 356 | + // just to be paranoid. |
| 357 | + throw new MWException(' In ignore mode with no curItem'); |
| 358 | + } |
| 359 | + if ( $this->curItem[0] === $elm ) { |
| 360 | + array_shift( $this->curItem ); |
| 361 | + array_shift( $this->mode ); |
| 362 | + } |
| 363 | + return; |
| 364 | + |
| 365 | + } |
| 366 | + /** Hit a closing element when in MODE_SIMPLE. |
| 367 | + * This generally means that we finished processing a |
| 368 | + * property value, and now have to save the result to the |
| 369 | + * results array |
| 370 | + * |
| 371 | + * @param $elm String namespace, space, and tag name. |
| 372 | + */ |
| 373 | + private function endElementModeSimple ( $elm ) { |
| 374 | + if ( $this->charContent !== false ) { |
| 375 | + if ( $this->processingArray ) { |
| 376 | + // if we're processing an array, use the original element |
| 377 | + // name instead of rdf:li. |
| 378 | + list($ns, $tag) = explode(' ', $this->curItem[0], 2); |
| 379 | + } else { |
| 380 | + list($ns, $tag) = explode(' ', $elm, 2); |
| 381 | + } |
| 382 | + $this->saveValue( $ns, $tag, $this->charContent ); |
| 383 | + |
| 384 | + $this->charContent = false; //reset |
| 385 | + } |
| 386 | + array_shift( $this->curItem ); |
| 387 | + array_shift( $this->mode ); |
| 388 | + |
| 389 | + } |
| 390 | + /** Hit a closing element in MODE_STRUCT, MODE_SEQ, MODE_BAG |
| 391 | + * generally means we've finished processing a nested structure. |
| 392 | + * resets some internal variables to indicate that. |
| 393 | + * |
| 394 | + * Note this means we hit the </closing element> not the </rdf:Seq>. |
| 395 | + * |
| 396 | + * @param $elm String namespace . space . tag name. |
| 397 | + */ |
| 398 | + private function endElementNested( $elm ) { |
| 399 | + if ( $this->curItem[0] !== $elm ) { |
| 400 | + throw new MWException("nesting mismatch. got a </$elm> but expected a </" . $this->curItem[0] . '>'); |
| 401 | + } |
| 402 | + array_shift( $this->curItem ); |
| 403 | + array_shift( $this->mode ); |
| 404 | + $this->ancestorStruct = false; |
| 405 | + $this->processingArray = false; |
| 406 | + } |
| 407 | + /** Hit a closing element in MODE_LI (either rdf:Seq, or rdf:Bag ) |
| 408 | + * Just resets some private variables |
| 409 | + * |
| 410 | + * note we still have to hit the outer </property> |
| 411 | + * |
| 412 | + * @param $elm String namespace . ' ' . element name |
| 413 | + */ |
| 414 | + private function endElementModeLi( $elm ) { |
| 415 | + if ( $elm === self::NS_RDF . ' Seq' ) { |
| 416 | + /* fixme, record _format*/ |
| 417 | + array_shift( $this->mode ); |
| 418 | + } elseif ( $elm === self::NS_RDF . ' Bag' ) { |
| 419 | + array_shift( $this->mode ); |
| 420 | + } else { |
| 421 | + throw new MWException( __METHOD__ . " expected <rdf:seq> or <rdf:bag> but instead got $elm." ); |
| 422 | + } |
| 423 | + } |
| 424 | + /** Handler for hitting a closing element. |
| 425 | + * |
| 426 | + * generally just calls a helper function depending on what mode we're in. |
| 427 | + * Ignores the outer wrapping elements that are optional in xmp and have no meaning. |
| 428 | + * @param $parser XMLParser |
| 429 | + * @param $elm String namespace . ' ' . element name |
| 430 | + */ |
| 431 | + function endElement( $parser, $elm ) { |
| 432 | + if ( $elm === (self::NS_RDF . ' RDF') |
| 433 | + || $elm === 'adobe:ns:meta/ xmpmeta' ) |
| 434 | + { |
| 435 | + //ignore these. |
| 436 | + return; |
| 437 | + } |
| 438 | + |
| 439 | + switch( $this->mode[0] ) { |
| 440 | + case self::MODE_IGNORE: |
| 441 | + $this->endElementModeIgnore( $elm ); |
| 442 | + break; |
| 443 | + case self::MODE_SIMPLE: |
| 444 | + $this->endElementModeSimple( $elm ); |
| 445 | + break; |
| 446 | + case self::MODE_STRUCT: |
| 447 | + case self::MODE_SEQ: |
| 448 | + case self::MODE_BAG: |
| 449 | + $this->endElementNested( $elm ); |
| 450 | + break; |
| 451 | + case self::MODE_INITIAL: |
| 452 | + if ( $elm === self::NS_RDF . ' Description' ) { |
| 453 | + array_shift( $this->mode ); |
| 454 | + } else { |
| 455 | + throw new MWException('Element ended unexpected while in MODE_INITIAL'); |
| 456 | + } |
| 457 | + break; |
| 458 | + case self::MODE_LI: |
| 459 | + $this->endElementModeLi( $elm ); |
| 460 | + break; |
| 461 | + default: |
| 462 | + wfDebugLog( 'XMP', __METHOD__ ." no mode (elm = $elm)"); |
| 463 | + break; |
| 464 | + } |
| 465 | + } |
| 466 | + |
| 467 | + |
| 468 | + /** Hit an opening element while in MODE_IGNORE |
| 469 | + * |
| 470 | + * Mostly ignores, unless we encouter the element that we are ignoring. |
| 471 | + * |
| 472 | + * @param $elm String namespace . ' ' . tag name |
| 473 | + */ |
| 474 | + private function startElementModeIgnore( $elm ) { |
| 475 | + if ( $elm === $this->curItem[0] ) { |
| 476 | + array_unshift( $this->curItem, $elm ); |
| 477 | + array_unshift( $this->mode, self::MODE_IGNORE ); |
| 478 | + } |
| 479 | + } |
| 480 | + /* Start element in MODE_BAG |
| 481 | + * this should always be <rdf:Bag> |
| 482 | + * |
| 483 | + * @param $elm String namespace . ' ' . tag |
| 484 | + * @throws MWException if we have an element thats not <rdf:Bag> |
| 485 | + */ |
| 486 | + private function startElementModeBag( $elm ) { |
| 487 | + if ( $elm === self::NS_RDF . ' Bag' ) { |
| 488 | + array_unshift( $this->mode, self::MODE_LI ); |
| 489 | + } else { |
| 490 | + throw new MWException("Expected <rdf:Bag> but got $elm."); |
| 491 | + } |
| 492 | + |
| 493 | + } |
| 494 | + /* Start element in MODE_SEQ |
| 495 | + * this should always be <rdf:Seq> |
| 496 | + * |
| 497 | + * @param $elm String namespace . ' ' . tag |
| 498 | + * @throws MWException if we have an element thats not <rdf:Seq> |
| 499 | + */ |
| 500 | + private function startElementModeSeq( $elm ) { |
| 501 | + if ( $elm === self::NS_RDF . ' Seq' ) { |
| 502 | + array_unshift( $this->mode, self::MODE_LI ); |
| 503 | + } else { |
| 504 | + throw new MWException("Expected <rdf:Seq> but got $elm."); |
| 505 | + } |
| 506 | + |
| 507 | + } |
| 508 | + /** Handle an opening element when in MODE_SIMPLE |
| 509 | + * This should not happen often. This is for if a simple element |
| 510 | + * already opened has a child element. Could happen for a |
| 511 | + * qualified element, or if using overly verbose syntax. |
| 512 | + * |
| 513 | + * @param $elm String namespace and tag names seperated by space. |
| 514 | + */ |
| 515 | + private function startElementModeSimple( $elm ) { |
| 516 | + if ( $elm === self::NS_RDF . ' Description' |
| 517 | + || $elm === self::NS_RDF . ' value') |
| 518 | + { |
| 519 | + //fixme, better handling of value |
| 520 | + array_unshift( $this->mode, self::MODE_SIMPLE ); |
| 521 | + array_unshift( $this->curItem, $this->curItem[0] ); |
| 522 | + } else { |
| 523 | + //something else we don't recognize, like a qualifier maybe. |
| 524 | + array_unshift( $this->mode, self::MODE_IGNORE ); |
| 525 | + array_unshift( $this->curItem, $elm ); |
| 526 | + |
| 527 | + } |
| 528 | + |
| 529 | + } |
| 530 | + /** Starting an element when in MODE_INITIAL |
| 531 | + * This usually happens when we hit an element inside |
| 532 | + * the outer rdf:Description |
| 533 | + * |
| 534 | + * This is generally where most props start |
| 535 | + * |
| 536 | + * @param $ns String Namespace |
| 537 | + * @param $tag String tag name (without namespace prefix) |
| 538 | + * @param $attribs Array array of attributes |
| 539 | + */ |
| 540 | + private function startElementModeInitial( $ns, $tag, $attribs ) { |
| 541 | + if ($ns !== self::NS_RDF) { |
| 542 | + |
| 543 | + if ( isset( $this->items[$ns][$tag] ) ) { |
| 544 | + $mode = $this->items[$ns][$tag]['mode']; |
| 545 | + array_unshift( $this->mode, $mode ); |
| 546 | + array_unshift( $this->curItem, $ns . ' ' . $tag ); |
| 547 | + if ( $mode === self::MODE_STRUCT ) { |
| 548 | + $this->ancestorStruct = isset( $this->items[$ns][$tag]['map_name'] ) |
| 549 | + ? $this->items[$ns][$tag]['map_name'] : $tag; |
| 550 | + } |
| 551 | + if ( $this->charContent !== false ) { |
| 552 | + // Something weird. |
| 553 | + // Should not happen in valid XMP. |
| 554 | + throw new MWException('tag nested in non-whitespace characters.'); |
| 555 | + } |
| 556 | + } else { |
| 557 | + array_unshift( $this->mode, self::MODE_IGNORE ); |
| 558 | + array_unshift( $this->curItem, $ns . ' ' . $tag ); |
| 559 | + return; |
| 560 | + } |
| 561 | + |
| 562 | + } |
| 563 | + //process attributes |
| 564 | + $this->doAttribs( $attribs ); |
| 565 | + } |
| 566 | + /** Hit an opening element when in a Struct (MODE_STRUCT) |
| 567 | + * This is generally for fields of a compound property |
| 568 | + * |
| 569 | + * @param $ns String namespace |
| 570 | + * @param $tag String tag name (no ns) |
| 571 | + * @param $attribs Array array of attribs w/ values. |
| 572 | + */ |
| 573 | + private function startElementModeStruct( $ns, $tag, $attribs ) { |
| 574 | + if ($ns !== self::NS_RDF) { |
| 575 | + |
| 576 | + if ( isset( $this->items[$ns][$tag] ) ) { |
| 577 | + if ( isset( $this->items[$ns][$this->ancestorStruct]['children'] ) |
| 578 | + && !isset($this->items[$ns][$this->ancestorStruct]['children'][$tag]) ) |
| 579 | + { |
| 580 | + //This assumes that we don't have inter-namespace nesting |
| 581 | + //which we don't in all the properties we're interested in. |
| 582 | + throw new MWException(" <$tag> appeared nested in <" . $this->ancestorStruct |
| 583 | + . "> where it is not allowed."); |
| 584 | + } |
| 585 | + array_unshift( $this->mode, $this->items[$ns][$tag]['mode'] ); |
| 586 | + array_unshift( $this->curItem, $ns . ' ' . $tag ); |
| 587 | + if ( $this->charContent !== false ) { |
| 588 | + // Something weird. |
| 589 | + // Should not happen in valid XMP. |
| 590 | + throw new MWException("tag <$tag> nested in non-whitespace characters (" . $this->charContent . ")."); |
| 591 | + } |
| 592 | + } else { |
| 593 | + array_unshift( $this->mode, self::MODE_IGNORE ); |
| 594 | + array_unshift( $this->curItem, $elm ); |
| 595 | + return; |
| 596 | + } |
| 597 | + |
| 598 | + } |
| 599 | + |
| 600 | + if ( $ns === self::NS_RDF && $tag === 'Description' ) { |
| 601 | + $this->doAttribs( $attribs ); |
| 602 | + } |
| 603 | + } |
| 604 | + /** opening element in MODE_LI |
| 605 | + * process elements of array's |
| 606 | + * |
| 607 | + * @param $elm String namespace . ' ' . tag |
| 608 | + * @throws MWException if gets a tag other than <rdf:li> |
| 609 | + */ |
| 610 | + private function startElementModeLi( $elm ) { |
| 611 | + if ( $elm !== self::NS_RDF . ' li' ) { |
| 612 | + throw new MWException("<rdf:li> expected but got $elm."); |
| 613 | + } |
| 614 | + array_unshift( $this->mode, self::MODE_SIMPLE ); |
| 615 | + //need to add curItem[0] on again since one is for the specific item |
| 616 | + // and one is for the entire group. |
| 617 | + array_unshift( $this->curItem, $this->curItem[0] ); |
| 618 | + $this->processingArray = true; |
| 619 | + } |
| 620 | + |
| 621 | + /** Hits an opening element. |
| 622 | + * Generally just calls a helper based on what MODE we're in. |
| 623 | + * Also does some initial set up for the wrapper element |
| 624 | + * |
| 625 | + * @param $parser XMLParser |
| 626 | + * @param $elm String namespace <space> element |
| 627 | + * @param $attribs Array attribute name => value |
| 628 | + */ |
| 629 | + function startElement( $parser, $elm, $attribs ) { |
| 630 | + |
| 631 | + |
| 632 | + if ($elm === self::NS_RDF . ' RDF' |
| 633 | + || $elm === 'adobe:ns:meta/ xmpmeta' ) |
| 634 | + { |
| 635 | + /* ignore */ |
| 636 | + return; |
| 637 | + } |
| 638 | + |
| 639 | + if ( $elm === self::NS_RDF . ' Description' ) { |
| 640 | + if ( count( $this->mode ) === 0 ) { |
| 641 | + //outer rdf:desc |
| 642 | + array_unshift( $this->mode, self::MODE_INITIAL ); |
| 643 | + } else { |
| 644 | + //inner rdf:desc |
| 645 | + // fixme this doesn't handle qualifiers right. |
| 646 | + $this->doAttribs( $attribs ); |
| 647 | + return; |
| 648 | + } |
| 649 | + } |
| 650 | + |
| 651 | + list($ns, $tag) = explode( ' ', $elm, 2 ); |
| 652 | + |
| 653 | + switch( $this->mode[0] ) { |
| 654 | + case self::MODE_IGNORE: |
| 655 | + $this->startElementModeIgnore( $elm ); |
| 656 | + break; |
| 657 | + case self::MODE_SIMPLE: |
| 658 | + $this->startElementModeSimple( $elm ); |
| 659 | + break; |
| 660 | + case self::MODE_INITIAL: |
| 661 | + $this->startElementModeInitial( $ns, $tag, $attribs ); |
| 662 | + break; |
| 663 | + case self::MODE_STRUCT: |
| 664 | + $this->startElementModeStruct( $ns, $tag, $attribs ); |
| 665 | + break; |
| 666 | + case self::MODE_BAG: |
| 667 | + $this->startElementModeBag( $elm ); |
| 668 | + break; |
| 669 | + case self::MODE_SEQ: |
| 670 | + $this->startElementModeSeq( $elm ); |
| 671 | + break; |
| 672 | + case self::MODE_LI: |
| 673 | + $this->startElementModeLi( $elm ); |
| 674 | + break; |
| 675 | + default: |
| 676 | + throw new MWException('StartElement in unknown mode: ' . $this->mode[0] ); |
| 677 | + break; |
| 678 | + } |
| 679 | + |
| 680 | + |
| 681 | + |
| 682 | + } |
| 683 | + /** process attributes. |
| 684 | + * Simple values can be stored as either a tag or attribute |
| 685 | + * |
| 686 | + * @param $attribs Array attribute=>value array. |
| 687 | + */ |
| 688 | + private function doAttribs( $attribs ) { |
| 689 | + foreach( $attribs as $name => $val ) { |
| 690 | + list($ns, $tag) = explode(' ', $name, 2); |
| 691 | + if ( $ns === self::NS_RDF ) { |
| 692 | + if ( $tag === 'value' || $tag === 'resource' ) { |
| 693 | + //resource is for url. |
| 694 | + // value attribute is a weird way of just putting the contents. |
| 695 | + $this->char( $val ); |
| 696 | + } |
| 697 | + } elseif ( isset( $this->items[$ns][$tag] ) ) { |
| 698 | + if ( $this->mode[0] === self::MODE_SIMPLE ) { |
| 699 | + throw new MWException( __METHOD__ |
| 700 | + . " $ns:$tag found as attribute where not allowed" ); |
| 701 | + } |
| 702 | + $this->saveValue( $ns, $tag, $val ); |
| 703 | + } |
| 704 | + } |
| 705 | + } |
| 706 | + /** Given a value, save it to results array |
| 707 | + * |
| 708 | + * note also uses $this->ancestorStruct and |
| 709 | + * $this->processingArray to determine what name to |
| 710 | + * save the value under. (in addition to $tag). |
| 711 | + * |
| 712 | + * @param $ns String namespace of tag this is for |
| 713 | + * @param $tag String tag name |
| 714 | + * @param $val String value to save |
| 715 | + */ |
| 716 | + private function saveValue( $ns, $tag, $val ) { |
| 717 | + |
| 718 | + $info =& $this->items[$ns][$tag]; |
| 719 | + $finalName = isset( $info['map_name'] ) |
| 720 | + ? $info['map_name'] : $tag; |
| 721 | + if ( isset( $info['validate'] ) ) { |
| 722 | + //FIXME |
| 723 | + } |
| 724 | + |
| 725 | + if ( $this->ancestorStruct ) { |
| 726 | + $this->results['xmp-' . $info['map_group']][$this->ancestorStruct][$finalName] = $val; |
| 727 | + } elseif ( $this->processingArray ) { |
| 728 | + $this->results['xmp-' . $info['map_group']][$finalName][] = $val; |
| 729 | + } else { |
| 730 | + $this->results['xmp-' . $info['map_group']][$finalName] = $val; |
| 731 | + } |
| 732 | + } |
| 733 | + |
| 734 | + |
| 735 | +} |
Property changes on: branches/img_metadata/phase3/includes/media/XMP.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 736 | + native |
Index: branches/img_metadata/phase3/includes/AutoLoader.php |
— | — | @@ -460,6 +460,7 @@ |
461 | 461 | 'ThumbnailImage' => 'includes/media/MediaTransformOutput.php', |
462 | 462 | 'TiffHandler' => 'includes/media/Tiff.php', |
463 | 463 | 'TransformParameterError' => 'includes/media/MediaTransformOutput.php', |
| 464 | + 'XMPReader' => 'includes/media/XMP.php', |
464 | 465 | |
465 | 466 | # includes/normal |
466 | 467 | 'UtfNormal' => 'includes/normal/UtfNormal.php', |