Index: branches/img_metadata/phase3/includes/media/XMP.php |
— | — | @@ -30,6 +30,7 @@ |
31 | 31 | |
32 | 32 | private $xmlParser; |
33 | 33 | private $charset = false; |
| 34 | + private $extendedXMPOffset = 0; |
34 | 35 | |
35 | 36 | protected $items; |
36 | 37 | |
— | — | @@ -74,6 +75,20 @@ |
75 | 76 | |
76 | 77 | $this->items = XMPInfo::getItems(); |
77 | 78 | |
| 79 | + $this->resetXMLParser(); |
| 80 | + |
| 81 | + } |
| 82 | + /** |
| 83 | + * Main use is if a single item has multiple xmp documents describing it. |
| 84 | + * For example in jpeg's with extendedXMP |
| 85 | + */ |
| 86 | + private function resetXMLParser() { |
| 87 | + |
| 88 | + if ($this->xmlParser) { |
| 89 | + //is this needed? |
| 90 | + xml_parser_free( $this->xmlParser ); |
| 91 | + } |
| 92 | + |
78 | 93 | $this->xmlParser = xml_parser_create_ns( 'UTF-8', ' ' ); |
79 | 94 | xml_parser_set_option( $this->xmlParser, XML_OPTION_CASE_FOLDING, 0 ); |
80 | 95 | xml_parser_set_option( $this->xmlParser, XML_OPTION_SKIP_WHITE, 1 ); |
— | — | @@ -83,6 +98,8 @@ |
84 | 99 | array( $this, 'endElement' ) ); |
85 | 100 | |
86 | 101 | xml_set_character_data_handler( $this->xmlParser, array( $this, 'char' ) ); |
| 102 | + |
| 103 | + |
87 | 104 | } |
88 | 105 | |
89 | 106 | /** Destroy the xml parser |
— | — | @@ -99,6 +116,9 @@ |
100 | 117 | * FormatExif. |
101 | 118 | */ |
102 | 119 | public function getResults() { |
| 120 | + // xmp-special is for metadata that affects how stuff |
| 121 | + // is extracted. For example xmpNote:HasExtendedXMP |
| 122 | + unset( $this->results['xmp-special'] ); |
103 | 123 | return $this->results; |
104 | 124 | } |
105 | 125 | |
— | — | @@ -110,13 +130,17 @@ |
111 | 131 | * debug log, blanks result array and returns false. |
112 | 132 | * |
113 | 133 | * @param String: $content XMP data |
| 134 | + * @param Boolean: $allOfIt If this is all the data (true) or if its split up (false). Default true |
| 135 | + * @param Boolean: $reset - does xml parser need to be reset. Default false |
114 | 136 | * @return Boolean success. |
115 | | - * @todo charset detection (usually UTF-8, but UTF-16 or 32 is allowed). |
116 | 137 | */ |
117 | | - public function parse( $content ) { |
| 138 | + public function parse( $content, $allOfIt = true, $reset = false ) { |
| 139 | + if ( $reset ) { |
| 140 | + $this->resetXMLParser(); |
| 141 | + } |
118 | 142 | try { |
119 | 143 | |
120 | | - // detect encoding by looking for BOM |
| 144 | + // detect encoding by looking for BOM which is supposed to be in processing instruction. |
121 | 145 | // see page 12 of http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart3.pdf |
122 | 146 | if ( !$this->charset ) { |
123 | 147 | $bom = array(); |
— | — | @@ -147,6 +171,7 @@ |
148 | 172 | } |
149 | 173 | |
150 | 174 | } else { |
| 175 | + // standard specificly says, if no bom assume utf-8 |
151 | 176 | $this->charset = 'UTF-8'; |
152 | 177 | } |
153 | 178 | } |
— | — | @@ -155,7 +180,7 @@ |
156 | 181 | $content = iconv( $this->charset, 'UTF-8//IGNORE', $content ); |
157 | 182 | } |
158 | 183 | |
159 | | - $ok = xml_parse( $this->xmlParser, $content, true ); |
| 184 | + $ok = xml_parse( $this->xmlParser, $content, $allOfIt ); |
160 | 185 | if ( !$ok ) { |
161 | 186 | $error = xml_error_string( xml_get_error_code( $this->xmlParser ) ); |
162 | 187 | $where = 'line: ' . xml_get_current_line_number( $this->xmlParser ) |
— | — | @@ -174,6 +199,66 @@ |
175 | 200 | return true; |
176 | 201 | } |
177 | 202 | |
| 203 | + /** Entry point for XMPExtended blocks in jpeg files |
| 204 | + * |
| 205 | + * @todo In serious need of testing |
| 206 | + * @see http://www.adobe.ge/devnet/xmp/pdfs/XMPSpecificationPart3.pdf XMP spec part 3 page 20 |
| 207 | + * @param String $content XMPExtended block minus the namespace signature |
| 208 | + * @return Boolean If it succeded. |
| 209 | + */ |
| 210 | + public function parseExtended( $content ) { |
| 211 | + // FIXME: This is untested. Hard to find example files |
| 212 | + // or programs that make such files.. |
| 213 | + $guid = substr( $content, 0, 32 ); |
| 214 | + if ( !isset( $this->results['xmp-special']['HasExtendedXMP'] ) |
| 215 | + || $this->results['xmp-special']['HasExtendedXMP'] !== $guid ) |
| 216 | + { |
| 217 | + wfDebugLog('XMP', __METHOD__ . " Ignoring XMPExtended block due to wrong guid (guid= '$guid' )"); |
| 218 | + return; |
| 219 | + } |
| 220 | + $len = unpack( 'Nlength/Noffset', substr( $content, 32, 8 ) ); |
| 221 | + |
| 222 | + if (!$len || $len['length'] < 4 || $len['offset'] < 0 || $len['offset'] > $len['length'] ) { |
| 223 | + wfDebugLog('XMP', __METHOD__ . 'Error reading extended XMP block, invalid length or offset.'); |
| 224 | + return false; |
| 225 | + } |
| 226 | + |
| 227 | + |
| 228 | + // we're not very robust here. we should accept it in the wrong order. To quote |
| 229 | + // the xmp standard: |
| 230 | + // "A JPEG writer should write the ExtendedXMP marker segments in order, immediately following the |
| 231 | + // StandardXMP. However, the JPEG standard does not require preservation of marker segment order. A |
| 232 | + // robust JPEG reader should tolerate the marker segments in any order." |
| 233 | + // |
| 234 | + // otoh the probability that an image will have more than 128k of metadata is rather low... |
| 235 | + // so the probability that it will have > 128k, and be in the wrong order is very low... |
| 236 | + |
| 237 | + if ( $len['offset'] !== $this->extendedXMPOffset ) { |
| 238 | + wfDebugLog('XMP', __METHOD__ . 'Ignoring XMPExtended block due to wrong order. (Offset was ' |
| 239 | + . $len['offset'] . ' but expected ' . $this->extendedXMPOffset . ')'); |
| 240 | + return false; |
| 241 | + } |
| 242 | + |
| 243 | + if ( $len['offset'] === 0 ) { |
| 244 | + // if we're starting the extended block, we've probably already |
| 245 | + // done the XMPStandard block, so reset. |
| 246 | + $this->resetXMLParser(); |
| 247 | + } |
| 248 | + |
| 249 | + $this->extendedXMPOffset += $len['length']; |
| 250 | + |
| 251 | + $actualContent = substr( $content, 40 ); |
| 252 | + |
| 253 | + if ( $this->extendedXMPOffset === strlen( $actualContent ) ) { |
| 254 | + $atEnd = true; |
| 255 | + } else { |
| 256 | + $atEnd = false; |
| 257 | + } |
| 258 | + |
| 259 | + wfDebugLog('XMP', __METHOD__ . 'Parsing a XMPExtended block'); |
| 260 | + return $this->parse( $actualContent, $atEnd ); |
| 261 | + } |
| 262 | + |
178 | 263 | /** Character data handler |
179 | 264 | * Called whenever character data is found in the xmp document. |
180 | 265 | * |
— | — | @@ -657,6 +742,9 @@ |
658 | 743 | // In practise I have yet to see a file that |
659 | 744 | // uses this element, however it is mentioned |
660 | 745 | // on page 25 of part 1 of the xmp standard. |
| 746 | + // |
| 747 | + // also it seems as if exiv2 and exiftool do not support |
| 748 | + // this either (That or I misunderstand the standard) |
661 | 749 | wfDebugLog( 'XMP', __METHOD__ . ' Encoutered <rdf:type> which isn\'t currently supported' ); |
662 | 750 | } |
663 | 751 | |
Index: branches/img_metadata/phase3/includes/media/BitmapMetadataHandler.php |
— | — | @@ -88,7 +88,7 @@ |
89 | 89 | if ( substr( $temp, 0, 29 ) === "http://ns.adobe.com/xap/1.0/\x00" ) { |
90 | 90 | $segments["XMP"] = substr( $temp, 29 ); |
91 | 91 | } elseif ( substr( $temp, 0, 35 ) === "http://ns.adobe.com/xmp/extension/\x00" ) { |
92 | | - $segments["XMP_ext"][] = $temp; |
| 92 | + $segments["XMP_ext"][] = substr( $temp, 35 ); |
93 | 93 | } |
94 | 94 | } elseif ( $buffer === "\xED" ) { |
95 | 95 | // APP13 - PSIR. IPTC and some photoshop stuff |
— | — | @@ -316,8 +316,11 @@ |
317 | 317 | if ( isset( $seg['XMP'] ) ) { |
318 | 318 | $xmp = new XMPReader(); |
319 | 319 | $xmp->parse( $seg['XMP'] ); |
320 | | - if ( isset( $seg['XMP_ext'] ) ) { |
321 | | - /* FIXME!! */ |
| 320 | + foreach( $seg['XMP_ext'] as $xmpExt ) { |
| 321 | + /* Support for extended xmp in jpeg files |
| 322 | + * is not well tested and a bit fragile. |
| 323 | + */ |
| 324 | + $xmp->parseExtended( $xmpExt ); |
322 | 325 | |
323 | 326 | } |
324 | 327 | $res = $xmp->getResults(); |
Index: branches/img_metadata/phase3/includes/media/XMPInfo.php |
— | — | @@ -201,5 +201,12 @@ |
202 | 202 | ), |
203 | 203 | |
204 | 204 | ), |
| 205 | + //Note, this property affects how jpeg metadata is extracted. |
| 206 | + 'http://ns.adobe.com/xmp/note/' => array( |
| 207 | + 'HasExtendedXMP' => array( |
| 208 | + 'map_group' => 'special', |
| 209 | + 'mode' => XMPReader::MODE_SIMPLE, |
| 210 | + ), |
| 211 | + ), |
205 | 212 | ); |
206 | 213 | } |