r72706 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r72705‎ | r72706 | r72707 >
Date:09:47, 10 September 2010
Author:daniel
Status:ok (Comments)
Tags:
Comment:
introduced PagedTiffInfoParserState for parsing output of identify and tiffinfo
Modified paths:
  • /trunk/extensions/PagedTiffHandler/PagedTiffHandler.image.php (modified) (history)

Diff [purge]

Index: trunk/extensions/PagedTiffHandler/PagedTiffHandler.image.php
@@ -186,31 +186,6 @@
187187 return $this->_meta;
188188 }
189189
190 - private function addPageEntry( &$entry, &$metadata, &$prevPage ) {
191 - if ( !isset( $entry['page'] ) ) {
192 - $entry['page'] = $prevPage +1;
193 - } else {
194 - if ( $prevPage >= $entry['page'] ) {
195 - $metadata['errors'][] = "inconsistent page numbering in TIFF directory";
196 - return false;
197 - }
198 - }
199 -
200 - if ( isset( $entry['width'] ) && isset( $entry['height'] ) ) {
201 - $prevPage = max($prevPage, $entry['page']);
202 -
203 - if ( !isset( $entry['alpha'] ) ) {
204 - $entry['alpha'] = 'false';
205 - }
206 -
207 - $entry['pixels'] = $entry['height'] * $entry['width'];
208 - $metadata['page_data'][$entry['page']] = $entry;
209 - }
210 -
211 - $entry = array();
212 - return true;
213 - }
214 -
215190 /**
216191 * helper function of retrieveMetaData().
217192 * parses shell return from tiffinfo-command into an array.
@@ -221,27 +196,25 @@
222197 $dump = preg_replace( '/ Image Length:/', "\n Image Length:", $dump ); #HACK: width and length are given on a single line...
223198 $rows = preg_split('/[\r\n]+\s*/', $dump);
224199
225 - $data = array();
226 - $data['page_data'] = array();
 200+ $state = new PagedTiffInfoParserState();
227201
228202 $ignoreIFDs = array();
229 - $entry = array();
230 -
231203 $ignore = false;
232 - $prevPage = 0;
233204
234205 foreach ( $rows as $row ) {
235206 $row = trim( $row );
236207
237 - if ( preg_match('/^<|^$/', $row) ) {
 208+ # ignore XML rows
 209+ if ( preg_match('/^<|^$/', $row) ) {
238210 continue;
239211 }
240212
241213 $error = false;
242214
 215+ # handle fatal errors
243216 foreach ( $wgTiffTiffinfoRejectMessages as $pattern ) {
244217 if ( preg_match( $pattern, trim( $row ) ) ) {
245 - $data['errors'][] = $row;
 218+ $state->addError( $row );
246219 $error = true;
247220 break;
248221 }
@@ -250,19 +223,25 @@
251224 if ( $error ) continue;
252225
253226 if ( preg_match('/^TIFF Directory at offset 0x[a-f0-9]+ \((\d+)\)/', $row, $m) ) {
 227+ # new IFD starting, flush previous page
 228+
254229 if ( $ignore ) {
255 - $entry = array();
256 - } else if ( $entry ) {
257 - $ok = $this->addPageEntry($entry, $data, $prevPage);
 230+ $state->resetPage();
 231+ } else {
 232+ $ok = $state->finishPage();
 233+
258234 if ( !$ok ) {
259235 $error = true;
260236 continue;
261237 }
262238 }
263239
 240+ # check if the next IFD is to be ignored
264241 $offset = (int)$m[1];
265242 $ignore = !empty( $ignoreIFDs[ $offset ] );
266243 } else if ( preg_match('#^(TIFF.*?Directory): (.*?/.*?): (.*)#i', $row, $m) ) {
 244+ # handle warnings
 245+
267246 $bypass = false;
268247 $msg = $m[3];
269248
@@ -274,23 +253,25 @@
275254 }
276255
277256 if ( !$bypass ) {
278 - $data['warnings'][] = $msg;
 257+ $state->addWarning( $msg );
279258 }
280259 } else if ( preg_match('/^\s*(.*?)\s*:\s*(.*?)\s*$/', $row, $m) ) {
 260+ # handle key/value pair
 261+
281262 $key = $m[1];
282263 $value = $m[2];
283264
284265 if ( $key == 'Page Number' && preg_match('/(\d+)-(\d+)/', $value, $m) ) {
285 - $data['page_amount'] = (int)$m[2];
286 - $entry['page'] = (int)$m[1] +1;
 266+ $state->setFileProperty('page_amount', (int)$m[2]);
 267+ $state->setPageProperty('page', (int)$m[1] +1);
287268 } else if ( $key == 'Samples/Pixel' ) {
288 - if ($value == '4') $entry['alpha'] = 'true';
 269+ if ($value == '4') $state->setPageProperty('alpha', 'true');
289270 } else if ( $key == 'Extra samples' ) {
290 - if (preg_match('.*alpha.*', $value)) $entry['alpha'] = 'true';
 271+ if (preg_match('.*alpha.*', $value)) $state->setPageProperty('alpha', 'true');
291272 } else if ( $key == 'Image Width' || $key == 'PixelXDimension' ) {
292 - $entry['width'] = (int)$value;
 273+ $state->setPageProperty('width', (int)$value);
293274 } else if ( $key == 'Image Length' || $key == 'PixelYDimension' ) {
294 - $entry['height'] = (int)$value;
 275+ $state->setPageProperty('height', (int)$value);
295276 } else if ( preg_match('/.*IFDOffset/', $key) ) {
296277 # ignore extra IFDs, see <http://www.awaresystems.be/imaging/tiff/tifftags/exififd.html>
297278 # Note: we assume that we will always see the reference before the actual IFD, so we know which IFDs to ignore
@@ -303,18 +284,9 @@
304285
305286 }
306287
307 - if ( $entry && !$ignore ) {
308 - $ok = $this->addPageEntry($entry, $data, $prevPage);
309 - }
 288+ $state->finish( !$ignore );
310289
311 - if ( !isset( $data['page_amount'] ) ) {
312 - $data['page_amount'] = count( $data['page_data'] );
313 - }
314 -
315 - if ( ! $data['page_data'] ) {
316 - $data['errors'][] = 'no page data found in tiff directory!';
317 - }
318 -
 290+ $data = $state->getMetadata();
319291 return $data;
320292 }
321293
@@ -342,18 +314,17 @@
343315 protected function parseIdentifyOutput( $dump ) {
344316 global $wgTiffIdentifyRejectMessages, $wgTiffIdentifyBypassMessages;
345317
346 - $data = array();
 318+ $state = new PagedTiffInfoParserState();
 319+
347320 if ( strval( $dump ) == '' ) {
348 - $data['errors'][] = "no metadata";
349 - return $data;
 321+ $state->addError( "no metadata" );
 322+ return $state->getMetadata();
350323 }
351324
352325 $infos = null;
353326 preg_match_all( '/\[BEGIN\](.+?)\[END\]/si', $dump, $infos, PREG_SET_ORDER );
354 - $data['page_amount'] = count( $infos );
355 - $data['page_data'] = array();
356327 foreach ( $infos as $info ) {
357 - $entry = array();
 328+ $state->resetPage();
358329 $lines = explode( "\n", $info[1] );
359330 foreach ( $lines as $line ) {
360331 if ( trim( $line ) == '' ) {
@@ -363,22 +334,21 @@
364335 if ( trim( $parts[0] ) == 'alpha' && trim( $parts[1] ) == '%A' ) {
365336 continue;
366337 }
367 - if ( trim( $parts[0] ) == 'alpha2' && !isset( $entry['alpha'] ) ) {
 338+ if ( trim( $parts[0] ) == 'alpha2' && !$state->hasPageProperty( 'alpha' ) ) {
368339 switch( trim( $parts[1] ) ) {
369340 case 'DirectClassRGBMatte':
370341 case 'DirectClassRGBA':
371 - $entry['alpha'] = 'true';
 342+ $state->setPageProperty('alpha', 'true');
372343 break;
373344 default:
374 - $entry['alpha'] = 'false';
 345+ $state->setPageProperty('alpha', 'false');
375346 break;
376347 }
377348 continue;
378349 }
379 - $entry[trim( $parts[0] )] = trim( $parts[1] );
 350+ $state->setPageProperty( trim( $parts[0] ), trim( $parts[1] ) );
380351 }
381 - $entry['pixels'] = $entry['height'] * $entry['width'];
382 - $data['page_data'][$entry['page']] = $entry;
 352+ $state->finishPage();
383353 }
384354
385355
@@ -393,7 +363,7 @@
394364 $knownError = false;
395365 foreach ( $wgTiffIdentifyRejectMessages as $msg ) {
396366 if ( preg_match( $msg, trim( $error ) ) ) {
397 - $data['errors'][] = $error;
 367+ $state->addError( $error );
398368 $knownError = true;
399369 break;
400370 }
@@ -402,17 +372,109 @@
403373 // ignore messages that match $wgTiffIdentifyBypassMessages
404374 foreach ( $wgTiffIdentifyBypassMessages as $msg ) {
405375 if ( preg_match( $msg, trim( $error ) ) ) {
406 - // $data['warnings'][] = $error;
407376 $knownError = true;
408377 break;
409378 }
410379 }
411380 }
412381 if ( !$knownError ) {
413 - $data['warnings'][] = $error;
 382+ $state->addWarning( $error );
414383 }
415384 }
416385 }
 386+
 387+ $state->finish();
 388+
 389+ $data = $state->getMetadata();
417390 return $data;
418391 }
419392 }
 393+
 394+class PagedTiffInfoParserState {
 395+ var $metadata; # all data
 396+ var $page; # current page
 397+ var $prevPage;
 398+
 399+ function __construct() {
 400+ $this->metadata = array();
 401+ $this->page = array();
 402+ $this->prevPage = 0;
 403+
 404+ $this->metadata['page_data'] = array();
 405+ }
 406+
 407+ function finish( $finishPage = true ) {
 408+ if ( $finishPage ) {
 409+ $this->finishPage( );
 410+ }
 411+
 412+ if ( !isset( $this->metadata['page_amount'] ) ) {
 413+ $this->metadata['page_amount'] = count( $this->metadata['page_data'] );
 414+ }
 415+
 416+ if ( ! $this->metadata['page_data'] ) {
 417+ $this->metadata['errors'][] = 'no page data found in tiff directory!';
 418+ }
 419+ }
 420+
 421+ function resetPage( ) {
 422+ $this->page = array();
 423+ }
 424+
 425+ function finishPage( ) {
 426+ if ( !isset( $this->page['page'] ) ) {
 427+ $this->page['page'] = $this->prevPage +1;
 428+ } else {
 429+ if ( $this->prevPage >= $this->page['page'] ) {
 430+ $this->metadata['errors'][] = "inconsistent page numbering in TIFF directory";
 431+ return false;
 432+ }
 433+ }
 434+
 435+ if ( isset( $this->page['width'] ) && isset( $this->page['height'] ) ) {
 436+ $this->prevPage = max($this->prevPage, $this->page['page']);
 437+
 438+ if ( !isset( $this->page['alpha'] ) ) {
 439+ $this->page['alpha'] = 'false';
 440+ }
 441+
 442+ $this->page['pixels'] = $this->page['height'] * $this->page['width'];
 443+ $this->metadata['page_data'][$this->page['page']] = $this->page;
 444+ }
 445+
 446+ $this->page = array();
 447+ return true;
 448+ }
 449+
 450+ function setPageProperty( $key, $value ) {
 451+ $this->page[$key] = $value;
 452+ }
 453+
 454+ function hasPageProperty( $key ) {
 455+ return isset( $this->page[$key] ) && ! is_null( $this->page[$key] );
 456+ }
 457+
 458+ function setFileProperty( $key, $value ) {
 459+ $this->metadata[$key] = $value;
 460+ }
 461+
 462+ function hasFileProperty( $key, $value ) {
 463+ return isset( $this->metadata[$key] ) && ! is_null( $this->metadata[$key] );
 464+ }
 465+
 466+ function addError( $message ) {
 467+ $this->metadata['errors'][] = $message;
 468+ }
 469+
 470+ function addWarning( $message ) {
 471+ $this->metadata['warnings'][] = $message;
 472+ }
 473+
 474+ function getMetadata( ) {
 475+ return $this->metadata;
 476+ }
 477+
 478+ function hasErrors() {
 479+ return !empty( $this->metadata['errors'] );
 480+ }
 481+}
\ No newline at end of file

Comments

#Comment by Duesentrieb (talk | contribs)   09:50, 10 September 2010

this tries to address the concerns raised about r72278

#Comment by Duesentrieb (talk | contribs)   09:12, 13 September 2010

a patch combining r72371 with r72706 might be useful. but i don't know how to do it :)

Status & tagging log