Index: branches/img_metadata/phase3/includes/media/XMP.php |
— | — | @@ -1,9 +1,8 @@ |
2 | 2 | <?php |
3 | | -/** Class for reading xmp data containing properties relevant to |
| 3 | +/** |
| 4 | +* Class for reading xmp data containing properties relevant to |
4 | 5 | * images, and spitting out an array that FormatExif accepts. |
5 | 6 | * |
6 | | -* It should be noted this is not done yet |
7 | | -* |
8 | 7 | * Note, this is not meant to recognize every possible thing you can |
9 | 8 | * encode in XMP. It should recognize all the properties we want. |
10 | 9 | * For example it doesn't have support for structures with multiple |
— | — | @@ -20,6 +19,10 @@ |
21 | 20 | * - getResults |
22 | 21 | * Outputs a results array. |
23 | 22 | * |
| 23 | +* Note XMP kind of looks like rdf. They are not the same thing - XMP is |
| 24 | +* encoded as a specific subset of rdf. This class can read XMP. It cannot |
| 25 | +* read rdf. |
| 26 | +* |
24 | 27 | */ |
25 | 28 | class XMPReader { |
26 | 29 | |
— | — | @@ -66,7 +69,8 @@ |
67 | 70 | const NS_XML = 'http://www.w3.org/XML/1998/namespace'; |
68 | 71 | |
69 | 72 | |
70 | | - /** Constructor. |
| 73 | + /** |
| 74 | + * Constructor. |
71 | 75 | * |
72 | 76 | * Primary job is to initialize the XMLParser |
73 | 77 | */ |
— | — | @@ -108,7 +112,7 @@ |
109 | 113 | |
110 | 114 | /** Destroy the xml parser |
111 | 115 | * |
112 | | - * not sure if this is actually needed. |
| 116 | + * Not sure if this is actually needed. |
113 | 117 | */ |
114 | 118 | function __destruct() { |
115 | 119 | // not sure if this is needed. |
— | — | @@ -338,13 +342,18 @@ |
339 | 343 | return $this->parse( $actualContent, $atEnd ); |
340 | 344 | } |
341 | 345 | |
342 | | - /** Character data handler |
| 346 | + /** |
| 347 | + * Character data handler |
343 | 348 | * Called whenever character data is found in the xmp document. |
344 | 349 | * |
345 | 350 | * does nothing if we're in MODE_IGNORE or if the data is whitespace |
346 | 351 | * throws an error if we're not in MODE_SIMPLE (as we're not allowed to have character |
347 | 352 | * data in the other modes). |
348 | 353 | * |
| 354 | + * As an example, this happens when we encounter XMP like: |
| 355 | + * <exif:DigitalZoomRatio>0/10</exif:DigitalZoomRatio> |
| 356 | + * and are processing the 0/10 bit. |
| 357 | + * |
349 | 358 | * @param $parser XMLParser reference to the xml parser |
350 | 359 | * @param $data String Character data |
351 | 360 | * @throws MWException on invalid data |
— | — | @@ -391,11 +400,19 @@ |
392 | 401 | return; |
393 | 402 | |
394 | 403 | } |
395 | | - /** Hit a closing element when in MODE_SIMPLE. |
| 404 | + /** |
| 405 | + * Hit a closing element when in MODE_SIMPLE. |
396 | 406 | * This generally means that we finished processing a |
397 | 407 | * property value, and now have to save the result to the |
398 | 408 | * results array |
399 | 409 | * |
| 410 | + * For example, when processing: |
| 411 | + * <exif:DigitalZoomRatio>0/10</exif:DigitalZoomRatio> |
| 412 | + * this deals with when we hit </exif:DigitalZoomRatio>. |
| 413 | + * |
| 414 | + * Or it could be if we hit the end element of a property |
| 415 | + * of a compound data structure (like a member of an array). |
| 416 | + * |
400 | 417 | * @param $elm String namespace, space, and tag name. |
401 | 418 | */ |
402 | 419 | private function endElementModeSimple ( $elm ) { |
— | — | @@ -415,12 +432,19 @@ |
416 | 433 | array_shift( $this->mode ); |
417 | 434 | |
418 | 435 | } |
419 | | - /** Hit a closing element in MODE_STRUCT, MODE_SEQ, MODE_BAG |
| 436 | + /** |
| 437 | + * Hit a closing element in MODE_STRUCT, MODE_SEQ, MODE_BAG |
420 | 438 | * generally means we've finished processing a nested structure. |
421 | 439 | * resets some internal variables to indicate that. |
422 | 440 | * |
423 | 441 | * Note this means we hit the </closing element> not the </rdf:Seq>. |
424 | 442 | * |
| 443 | + * For example, when processing: |
| 444 | + * <exif:ISOSpeedRatings> <rdf:Seq> <rdf:li>64</rdf:li> |
| 445 | + * </rdf:Seq> </exif:ISOSpeedRatings> |
| 446 | + * |
| 447 | + * This method is called when we hit the </exif:ISOSpeedRatings> tag. |
| 448 | + * |
425 | 449 | * @param $elm String namespace . space . tag name. |
426 | 450 | */ |
427 | 451 | private function endElementNested( $elm ) { |
— | — | @@ -470,11 +494,21 @@ |
471 | 495 | $this->processingArray = false; |
472 | 496 | $this->itemLang = false; |
473 | 497 | } |
474 | | - /** Hit a closing element in MODE_LI (either rdf:Seq, or rdf:Bag ) |
| 498 | + |
| 499 | + /** |
| 500 | + * Hit a closing element in MODE_LI (either rdf:Seq, or rdf:Bag ) |
475 | 501 | * Add information about what type of element this is. |
476 | 502 | * |
477 | | - * note we still have to hit the outer </property> |
| 503 | + * Note we still have to hit the outer </property> |
478 | 504 | * |
| 505 | + * For example, when processing: |
| 506 | + * <exif:ISOSpeedRatings> <rdf:Seq> <rdf:li>64</rdf:li> |
| 507 | + * </rdf:Seq> </exif:ISOSpeedRatings> |
| 508 | + * |
| 509 | + * This method is called when we hit the </rdf:Seq>. |
| 510 | + * (For comparison, we call endElementModeSimple when we |
| 511 | + * hit the </rdf:li>) |
| 512 | + * |
479 | 513 | * @param $elm String namespace . ' ' . element name |
480 | 514 | */ |
481 | 515 | private function endElementModeLi( $elm ) { |
— | — | @@ -505,10 +539,14 @@ |
506 | 540 | throw new MWException( __METHOD__ . " expected </rdf:seq> or </rdf:bag> but instead got $elm." ); |
507 | 541 | } |
508 | 542 | } |
509 | | - /** end element while in MODE_QDESC |
| 543 | + /** |
| 544 | + * End element while in MODE_QDESC |
510 | 545 | * mostly when ending an element when we have a simple value |
511 | | - * that has qualifiers |
| 546 | + * that has qualifiers. |
512 | 547 | * |
| 548 | + * Qualifiers aren't all that common, and we don't do anything |
| 549 | + * with them. |
| 550 | + * |
513 | 551 | * @param $elm String namespace and element |
514 | 552 | */ |
515 | 553 | private function endElementModeQDesc( $elm ) { |
— | — | @@ -524,10 +562,15 @@ |
525 | 563 | |
526 | 564 | |
527 | 565 | } |
528 | | - /** Handler for hitting a closing element. |
| 566 | + /** |
| 567 | + * Handler for hitting a closing element. |
529 | 568 | * |
530 | | - * generally just calls a helper function depending on what mode we're in. |
531 | | - * Ignores the outer wrapping elements that are optional in xmp and have no meaning. |
| 569 | + * generally just calls a helper function depending on what |
| 570 | + * mode we're in. |
| 571 | + * |
| 572 | + * Ignores the outer wrapping elements that are optional in |
| 573 | + * xmp and have no meaning. |
| 574 | + * |
532 | 575 | * @param $parser XMLParser |
533 | 576 | * @param $elm String namespace . ' ' . element name |
534 | 577 | */ |
— | — | @@ -542,17 +585,22 @@ |
543 | 586 | |
544 | 587 | if ( $elm === self::NS_RDF . ' type' ) { |
545 | 588 | // these aren't really supported properly yet. |
| 589 | + // However, it appears they almost never used. |
546 | 590 | wfDebugLog( 'XMP', __METHOD__ . ' encountered <rdf:type>' ); |
547 | 591 | } |
548 | 592 | |
549 | 593 | if ( strpos( $elm, ' ' ) === false ) { |
550 | 594 | // This probably shouldn't happen. |
| 595 | + // However, there is a bug in an adobe product |
| 596 | + // that forgets the namespace on some things. |
| 597 | + // (Luckily they are unimportant things). |
551 | 598 | wfDebugLog( 'XMP', __METHOD__ . " Encountered </$elm> which has no namespace. Skipping." ); |
552 | 599 | return; |
553 | 600 | } |
554 | 601 | |
555 | 602 | if ( count( $this->mode[0] ) === 0 ) { |
556 | | - // This should never ever happen. |
| 603 | + // This should never ever happen and means |
| 604 | + // there is a pretty major bug in this class. |
557 | 605 | throw new MWException( 'Encountered end element with no mode' ); |
558 | 606 | } |
559 | 607 | |
— | — | @@ -580,7 +628,7 @@ |
581 | 629 | if ( $elm === self::NS_RDF . ' Description' ) { |
582 | 630 | array_shift( $this->mode ); |
583 | 631 | } else { |
584 | | - throw new MWException( 'Element ended unexpected while in MODE_INITIAL' ); |
| 632 | + throw new MWException( 'Element ended unexpectedly while in MODE_INITIAL' ); |
585 | 633 | } |
586 | 634 | break; |
587 | 635 | case self::MODE_LI: |
— | — | @@ -597,9 +645,14 @@ |
598 | 646 | } |
599 | 647 | |
600 | 648 | |
601 | | - /** Hit an opening element while in MODE_IGNORE |
| 649 | + /** |
| 650 | + * Hit an opening element while in MODE_IGNORE |
602 | 651 | * |
| 652 | + * XMP is extensible, so ignore any tag we don't understand. |
| 653 | + * |
603 | 654 | * Mostly ignores, unless we encounter the element that we are ignoring. |
| 655 | + * in which case we add it to the item stack, so we can ignore things |
| 656 | + * that are nested, correctly. |
604 | 657 | * |
605 | 658 | * @param $elm String namespace . ' ' . tag name |
606 | 659 | */ |
— | — | @@ -609,7 +662,8 @@ |
610 | 663 | array_unshift( $this->mode, self::MODE_IGNORE ); |
611 | 664 | } |
612 | 665 | } |
613 | | - /* Start element in MODE_BAG |
| 666 | + /** |
| 667 | + * Start element in MODE_BAG (unordered array) |
614 | 668 | * this should always be <rdf:Bag> |
615 | 669 | * |
616 | 670 | * @param $elm String namespace . ' ' . tag |
— | — | @@ -623,7 +677,8 @@ |
624 | 678 | } |
625 | 679 | |
626 | 680 | } |
627 | | - /* Start element in MODE_SEQ |
| 681 | + /** |
| 682 | + * Start element in MODE_SEQ (ordered array) |
628 | 683 | * this should always be <rdf:Seq> |
629 | 684 | * |
630 | 685 | * @param $elm String namespace . ' ' . tag |
— | — | @@ -642,9 +697,17 @@ |
643 | 698 | } |
644 | 699 | |
645 | 700 | } |
646 | | - /* Start element in MODE_LANG (language alternative) |
| 701 | + /** |
| 702 | + * Start element in MODE_LANG (language alternative) |
647 | 703 | * this should always be <rdf:Alt> |
648 | 704 | * |
| 705 | + * This tag tends to be used for metadata like describe this |
| 706 | + * picture, which can be translated into multiple languages. |
| 707 | + * |
| 708 | + * XMP supports non-linguistic alternative selections, |
| 709 | + * which are really only used for thumbnails, which |
| 710 | + * we don't care about. |
| 711 | + * |
649 | 712 | * @param $elm String namespace . ' ' . tag |
650 | 713 | * @throws MWException if we have an element that's not <rdf:Alt> |
651 | 714 | */ |
— | — | @@ -656,12 +719,22 @@ |
657 | 720 | } |
658 | 721 | |
659 | 722 | } |
660 | | - /** Handle an opening element when in MODE_SIMPLE |
| 723 | + /** |
| 724 | + * Handle an opening element when in MODE_SIMPLE |
| 725 | + * |
661 | 726 | * This should not happen often. This is for if a simple element |
662 | 727 | * already opened has a child element. Could happen for a |
663 | 728 | * qualified element. |
664 | 729 | * |
| 730 | + * For example: |
| 731 | + * <exif:DigitalZoomRatio><rdf:Description><rdf:value>0/10</rdf:value> |
| 732 | + * <foo:someQualifier>Bar</foo:someQualifier> </rdf:Description> |
| 733 | + * </exif:DigitalZoomRatio> |
| 734 | + * |
| 735 | + * This method is called when processing the <rdf:Description> element |
| 736 | + * |
665 | 737 | * @param $elm String namespace and tag names separated by space. |
| 738 | + * @param $attribs Array Attributes of the element. |
666 | 739 | */ |
667 | 740 | private function startElementModeSimple( $elm, $attribs ) { |
668 | 741 | if ( $elm === self::NS_RDF . ' Description' ) { |
— | — | @@ -686,10 +759,17 @@ |
687 | 760 | } |
688 | 761 | |
689 | 762 | } |
690 | | - /** Start an element when in MODE_QDESC. |
| 763 | + /** |
| 764 | + * Start an element when in MODE_QDESC. |
691 | 765 | * This generally happens when a simple element has an inner |
692 | 766 | * rdf:Description to hold qualifier elements. |
693 | 767 | * |
| 768 | + * For example in: |
| 769 | + * <exif:DigitalZoomRatio><rdf:Description><rdf:value>0/10</rdf:value> |
| 770 | + * <foo:someQualifier>Bar</foo:someQualifier> </rdf:Description> |
| 771 | + * </exif:DigitalZoomRatio> |
| 772 | + * Called when processing the <rdf:value> or <foo:someQualifier>. |
| 773 | + * |
694 | 774 | * @param $elm String namespace and tag name separated by a space. |
695 | 775 | * |
696 | 776 | */ |
— | — | @@ -702,11 +782,12 @@ |
703 | 783 | array_unshift( $this->curItem, $elm ); |
704 | 784 | } |
705 | 785 | } |
706 | | - /** Starting an element when in MODE_INITIAL |
| 786 | + /** |
| 787 | + * Starting an element when in MODE_INITIAL |
707 | 788 | * This usually happens when we hit an element inside |
708 | 789 | * the outer rdf:Description |
709 | 790 | * |
710 | | - * This is generally where most props start |
| 791 | + * This is generally where most properties start. |
711 | 792 | * |
712 | 793 | * @param $ns String Namespace |
713 | 794 | * @param $tag String tag name (without namespace prefix) |
— | — | @@ -753,9 +834,20 @@ |
754 | 835 | // process attributes |
755 | 836 | $this->doAttribs( $attribs ); |
756 | 837 | } |
757 | | - /** Hit an opening element when in a Struct (MODE_STRUCT) |
758 | | - * This is generally for fields of a compound property |
| 838 | + /** |
| 839 | + * Hit an opening element when in a Struct (MODE_STRUCT) |
| 840 | + * This is generally for fields of a compound property. |
759 | 841 | * |
| 842 | + * Example of a struct (abbreviated; flash has more properties): |
| 843 | + * |
| 844 | + * <exif:Flash> <rdf:Description> <exif:Fired>True</exif:Fired> |
| 845 | + * <exif:Mode>1</exif:Mode></rdf:Description></exif:Flash> |
| 846 | + * |
| 847 | + * or: |
| 848 | + * |
| 849 | + * <exif:Flash rdf:parseType='Resource'> <exif:Fired>True</exif:Fired> |
| 850 | + * <exif:Mode>1</exif:Mode></exif:Flash> |
| 851 | + * |
760 | 852 | * @param $ns String namespace |
761 | 853 | * @param $tag String tag name (no ns) |
762 | 854 | * @param $attribs Array array of attribs w/ values. |
— | — | @@ -793,9 +885,15 @@ |
794 | 886 | array_unshift( $this->curItem, $this->curItem[0] ); |
795 | 887 | } |
796 | 888 | } |
797 | | - /** opening element in MODE_LI |
798 | | - * process elements of arrays |
| 889 | + /** |
| 890 | + * opening element in MODE_LI |
| 891 | + * process elements of arrays. |
799 | 892 | * |
| 893 | + * Example: |
| 894 | + * <exif:ISOSpeedRatings> <rdf:Seq> <rdf:li>64</rdf:li> |
| 895 | + * </rdf:Seq> </exif:ISOSpeedRatings> |
| 896 | + * This method is called when we hit the <rdf:li> element. |
| 897 | + * |
800 | 898 | * @param $elm String: namespace . ' ' . tagname |
801 | 899 | * @param $attribs Array: Attributes. (needed for BAGSTRUCTS) |
802 | 900 | * @throws MWException if gets a tag other than <rdf:li> |
— | — | @@ -837,9 +935,16 @@ |
838 | 936 | } |
839 | 937 | |
840 | 938 | } |
841 | | - /** opening element in MODE_LI_LANG |
| 939 | + /** |
| 940 | + * Opening element in MODE_LI_LANG. |
842 | 941 | * process elements of language alternatives |
843 | 942 | * |
| 943 | + * Example: |
| 944 | + * <dc:title> <rdf:Alt> <rdf:li xml:lang="x-default">My house |
| 945 | + * </rdf:li> </rdf:Alt> </dc:title> |
| 946 | + * |
| 947 | + * This method is called when we hit the <rdf:li> element. |
| 948 | + * |
844 | 949 | * @param $elm String namespace . ' ' . tag |
845 | 950 | * @param $attribs array array of elements (most importantly xml:lang) |
846 | 951 | * @throws MWException if gets a tag other than <rdf:li> or if no xml:lang |
— | — | @@ -865,7 +970,8 @@ |
866 | 971 | $this->processingArray = true; |
867 | 972 | } |
868 | 973 | |
869 | | - /** Hits an opening element. |
| 974 | + /** |
| 975 | + * Hits an opening element. |
870 | 976 | * Generally just calls a helper based on what MODE we're in. |
871 | 977 | * Also does some initial set up for the wrapper element |
872 | 978 | * |
— | — | @@ -951,9 +1057,16 @@ |
952 | 1058 | |
953 | 1059 | |
954 | 1060 | } |
955 | | - /** process attributes. |
| 1061 | + /** |
| 1062 | + * Process attributes. |
956 | 1063 | * Simple values can be stored as either a tag or attribute |
957 | 1064 | * |
| 1065 | + * Often the initial <rdf:Description> tag just has all the simple |
| 1066 | + * properties as attributes. |
| 1067 | + * |
| 1068 | + * Example: |
| 1069 | + * <rdf:Description rdf:about="" xmlns:exif="http://ns.adobe.com/exif/1.0/" exif:DigitalZoomRatio="0/10"> |
| 1070 | + * |
958 | 1071 | * @param $attribs Array attribute=>value array. |
959 | 1072 | */ |
960 | 1073 | private function doAttribs( $attribs ) { |
— | — | @@ -996,7 +1109,8 @@ |
997 | 1110 | } |
998 | 1111 | } |
999 | 1112 | } |
1000 | | - /** Given a value, save it to results array |
| 1113 | + /** |
| 1114 | + * Given an extracted value, save it to results array |
1001 | 1115 | * |
1002 | 1116 | * note also uses $this->ancestorStruct and |
1003 | 1117 | * $this->processingArray to determine what name to |