r100786 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r100785‎ | r100786 | r100787 >
Date:04:22, 26 October 2011
Author:reedy
Status:ok
Tags:
Comment:
Move DjVuImage to media/DjVuImage.php
Modified paths:
  • /trunk/phase3/includes/AutoLoader.php (modified) (history)
  • /trunk/phase3/includes/DjVuImage.php (deleted) (history)
  • /trunk/phase3/includes/media/DjVuImage.php (added) (history)

Diff [purge]

Index: trunk/phase3/includes/DjVuImage.php
@@ -1,379 +0,0 @@
2 -<?php
3 -/**
4 - * DjVu image handler
5 - *
6 - * Copyright © 2006 Brion Vibber <brion@pobox.com>
7 - * http://www.mediawiki.org/
8 - *
9 - * This program is free software; you can redistribute it and/or modify
10 - * it under the terms of the GNU General Public License as published by
11 - * the Free Software Foundation; either version 2 of the License, or
12 - * (at your option) any later version.
13 - *
14 - * This program is distributed in the hope that it will be useful,
15 - * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 - * GNU General Public License for more details.
18 - *
19 - * You should have received a copy of the GNU General Public License along
20 - * with this program; if not, write to the Free Software Foundation, Inc.,
21 - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 - * http://www.gnu.org/copyleft/gpl.html
23 - *
24 - * @file
25 - */
26 -
27 -/**
28 - * Support for detecting/validating DjVu image files and getting
29 - * some basic file metadata (resolution etc)
30 - *
31 - * File format docs are available in source package for DjVuLibre:
32 - * http://djvulibre.djvuzone.org/
33 - *
34 - * @ingroup Media
35 - */
36 -class DjVuImage {
37 - function __construct( $filename ) {
38 - $this->mFilename = $filename;
39 - }
40 -
41 - /**
42 - * Check if the given file is indeed a valid DjVu image file
43 - * @return bool
44 - */
45 - public function isValid() {
46 - $info = $this->getInfo();
47 - return $info !== false;
48 - }
49 -
50 -
51 - /**
52 - * Return data in the style of getimagesize()
53 - * @return array or false on failure
54 - */
55 - public function getImageSize() {
56 - $data = $this->getInfo();
57 -
58 - if( $data !== false ) {
59 - $width = $data['width'];
60 - $height = $data['height'];
61 -
62 - return array( $width, $height, 'DjVu',
63 - "width=\"$width\" height=\"$height\"" );
64 - }
65 - return false;
66 - }
67 -
68 - // ---------
69 -
70 - /**
71 - * For debugging; dump the IFF chunk structure
72 - */
73 - function dump() {
74 - $file = fopen( $this->mFilename, 'rb' );
75 - $header = fread( $file, 12 );
76 - // @todo FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables.
77 - extract( unpack( 'a4magic/a4chunk/NchunkLength', $header ) );
78 - echo "$chunk $chunkLength\n";
79 - $this->dumpForm( $file, $chunkLength, 1 );
80 - fclose( $file );
81 - }
82 -
83 - private function dumpForm( $file, $length, $indent ) {
84 - $start = ftell( $file );
85 - $secondary = fread( $file, 4 );
86 - echo str_repeat( ' ', $indent * 4 ) . "($secondary)\n";
87 - while( ftell( $file ) - $start < $length ) {
88 - $chunkHeader = fread( $file, 8 );
89 - if( $chunkHeader == '' ) {
90 - break;
91 - }
92 - // @todo FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables.
93 - extract( unpack( 'a4chunk/NchunkLength', $chunkHeader ) );
94 - echo str_repeat( ' ', $indent * 4 ) . "$chunk $chunkLength\n";
95 -
96 - if( $chunk == 'FORM' ) {
97 - $this->dumpForm( $file, $chunkLength, $indent + 1 );
98 - } else {
99 - fseek( $file, $chunkLength, SEEK_CUR );
100 - if( $chunkLength & 1 == 1 ) {
101 - // Padding byte between chunks
102 - fseek( $file, 1, SEEK_CUR );
103 - }
104 - }
105 - }
106 - }
107 -
108 - function getInfo() {
109 - wfSuppressWarnings();
110 - $file = fopen( $this->mFilename, 'rb' );
111 - wfRestoreWarnings();
112 - if( $file === false ) {
113 - wfDebug( __METHOD__ . ": missing or failed file read\n" );
114 - return false;
115 - }
116 -
117 - $header = fread( $file, 16 );
118 - $info = false;
119 -
120 - if( strlen( $header ) < 16 ) {
121 - wfDebug( __METHOD__ . ": too short file header\n" );
122 - } else {
123 - // @todo FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables.
124 - extract( unpack( 'a4magic/a4form/NformLength/a4subtype', $header ) );
125 -
126 - if( $magic != 'AT&T' ) {
127 - wfDebug( __METHOD__ . ": not a DjVu file\n" );
128 - } elseif( $subtype == 'DJVU' ) {
129 - // Single-page document
130 - $info = $this->getPageInfo( $file, $formLength );
131 - } elseif( $subtype == 'DJVM' ) {
132 - // Multi-page document
133 - $info = $this->getMultiPageInfo( $file, $formLength );
134 - } else {
135 - wfDebug( __METHOD__ . ": unrecognized DJVU file type '$formType'\n" );
136 - }
137 - }
138 - fclose( $file );
139 - return $info;
140 - }
141 -
142 - private function readChunk( $file ) {
143 - $header = fread( $file, 8 );
144 - if( strlen( $header ) < 8 ) {
145 - return array( false, 0 );
146 - } else {
147 - // @todo FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables.
148 - extract( unpack( 'a4chunk/Nlength', $header ) );
149 - return array( $chunk, $length );
150 - }
151 - }
152 -
153 - private function skipChunk( $file, $chunkLength ) {
154 - fseek( $file, $chunkLength, SEEK_CUR );
155 -
156 - if( $chunkLength & 0x01 == 1 && !feof( $file ) ) {
157 - // padding byte
158 - fseek( $file, 1, SEEK_CUR );
159 - }
160 - }
161 -
162 - private function getMultiPageInfo( $file, $formLength ) {
163 - // For now, we'll just look for the first page in the file
164 - // and report its information, hoping others are the same size.
165 - $start = ftell( $file );
166 - do {
167 - list( $chunk, $length ) = $this->readChunk( $file );
168 - if( !$chunk ) {
169 - break;
170 - }
171 -
172 - if( $chunk == 'FORM' ) {
173 - $subtype = fread( $file, 4 );
174 - if( $subtype == 'DJVU' ) {
175 - wfDebug( __METHOD__ . ": found first subpage\n" );
176 - return $this->getPageInfo( $file, $length );
177 - }
178 - $this->skipChunk( $file, $length - 4 );
179 - } else {
180 - wfDebug( __METHOD__ . ": skipping '$chunk' chunk\n" );
181 - $this->skipChunk( $file, $length );
182 - }
183 - } while( $length != 0 && !feof( $file ) && ftell( $file ) - $start < $formLength );
184 -
185 - wfDebug( __METHOD__ . ": multi-page DJVU file contained no pages\n" );
186 - return false;
187 - }
188 -
189 - private function getPageInfo( $file, $formLength ) {
190 - list( $chunk, $length ) = $this->readChunk( $file );
191 - if( $chunk != 'INFO' ) {
192 - wfDebug( __METHOD__ . ": expected INFO chunk, got '$chunk'\n" );
193 - return false;
194 - }
195 -
196 - if( $length < 9 ) {
197 - wfDebug( __METHOD__ . ": INFO should be 9 or 10 bytes, found $length\n" );
198 - return false;
199 - }
200 - $data = fread( $file, $length );
201 - if( strlen( $data ) < $length ) {
202 - wfDebug( __METHOD__ . ": INFO chunk cut off\n" );
203 - return false;
204 - }
205 -
206 - // @todo FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables.
207 - extract( unpack(
208 - 'nwidth/' .
209 - 'nheight/' .
210 - 'Cminor/' .
211 - 'Cmajor/' .
212 - 'vresolution/' .
213 - 'Cgamma', $data ) );
214 - # Newer files have rotation info in byte 10, but we don't use it yet.
215 -
216 - return array(
217 - 'width' => $width,
218 - 'height' => $height,
219 - 'version' => "$major.$minor",
220 - 'resolution' => $resolution,
221 - 'gamma' => $gamma / 10.0 );
222 - }
223 -
224 - /**
225 - * Return an XML string describing the DjVu image
226 - * @return string
227 - */
228 - function retrieveMetaData() {
229 - global $wgDjvuToXML, $wgDjvuDump, $wgDjvuTxt;
230 - wfProfileIn( __METHOD__ );
231 -
232 - if ( isset( $wgDjvuDump ) ) {
233 - # djvudump is faster as of version 3.5
234 - # http://sourceforge.net/tracker/index.php?func=detail&aid=1704049&group_id=32953&atid=406583
235 - wfProfileIn( 'djvudump' );
236 - $cmd = wfEscapeShellArg( $wgDjvuDump ) . ' ' . wfEscapeShellArg( $this->mFilename );
237 - $dump = wfShellExec( $cmd );
238 - $xml = $this->convertDumpToXML( $dump );
239 - wfProfileOut( 'djvudump' );
240 - } elseif ( isset( $wgDjvuToXML ) ) {
241 - wfProfileIn( 'djvutoxml' );
242 - $cmd = wfEscapeShellArg( $wgDjvuToXML ) . ' --without-anno --without-text ' .
243 - wfEscapeShellArg( $this->mFilename );
244 - $xml = wfShellExec( $cmd );
245 - wfProfileOut( 'djvutoxml' );
246 - } else {
247 - $xml = null;
248 - }
249 - # Text layer
250 - if ( isset( $wgDjvuTxt ) ) {
251 - wfProfileIn( 'djvutxt' );
252 - $cmd = wfEscapeShellArg( $wgDjvuTxt ) . ' --detail=page ' . wfEscapeShellArg( $this->mFilename ) ;
253 - wfDebug( __METHOD__.": $cmd\n" );
254 - $retval = '';
255 - $txt = wfShellExec( $cmd, $retval );
256 - wfProfileOut( 'djvutxt' );
257 - if( $retval == 0) {
258 - # Strip some control characters
259 - $txt = preg_replace( "/[\013\035\037]/", "", $txt );
260 - $reg = <<<EOR
261 - /\(page\s[\d-]*\s[\d-]*\s[\d-]*\s[\d-]*\s*"
262 - ((?> # Text to match is composed of atoms of either:
263 - \\\\. # - any escaped character
264 - | # - any character different from " and \
265 - [^"\\\\]+
266 - )*?)
267 - "\s*\)
268 - | # Or page can be empty ; in this case, djvutxt dumps ()
269 - \(\s*()\)/sx
270 -EOR;
271 - $txt = preg_replace_callback( $reg, array( $this, 'pageTextCallback' ), $txt );
272 - $txt = "<DjVuTxt>\n<HEAD></HEAD>\n<BODY>\n" . $txt . "</BODY>\n</DjVuTxt>\n";
273 - $xml = preg_replace( "/<DjVuXML>/", "<mw-djvu><DjVuXML>", $xml, 1 );
274 - $xml = $xml . $txt. '</mw-djvu>' ;
275 - }
276 - }
277 - wfProfileOut( __METHOD__ );
278 - return $xml;
279 - }
280 -
281 - function pageTextCallback( $matches ) {
282 - # Get rid of invalid UTF-8, strip control characters
283 - return '<PAGE value="' . htmlspecialchars( UtfNormal::cleanUp( $matches[1] ) ) . '" />';
284 - }
285 -
286 - /**
287 - * Hack to temporarily work around djvutoxml bug
288 - */
289 - function convertDumpToXML( $dump ) {
290 - if ( strval( $dump ) == '' ) {
291 - return false;
292 - }
293 -
294 - $xml = <<<EOT
295 -<?xml version="1.0" ?>
296 -<!DOCTYPE DjVuXML PUBLIC "-//W3C//DTD DjVuXML 1.1//EN" "pubtext/DjVuXML-s.dtd">
297 -<DjVuXML>
298 -<HEAD></HEAD>
299 -<BODY>
300 -EOT;
301 -
302 - $dump = str_replace( "\r", '', $dump );
303 - $line = strtok( $dump, "\n" );
304 - $m = false;
305 - $good = false;
306 - if ( preg_match( '/^( *)FORM:DJVU/', $line, $m ) ) {
307 - # Single-page
308 - if ( $this->parseFormDjvu( $line, $xml ) ) {
309 - $good = true;
310 - } else {
311 - return false;
312 - }
313 - } elseif ( preg_match( '/^( *)FORM:DJVM/', $line, $m ) ) {
314 - # Multi-page
315 - $parentLevel = strlen( $m[1] );
316 - # Find DIRM
317 - $line = strtok( "\n" );
318 - while ( $line !== false ) {
319 - $childLevel = strspn( $line, ' ' );
320 - if ( $childLevel <= $parentLevel ) {
321 - # End of chunk
322 - break;
323 - }
324 -
325 - if ( preg_match( '/^ *DIRM.*indirect/', $line ) ) {
326 - wfDebug( "Indirect multi-page DjVu document, bad for server!\n" );
327 - return false;
328 - }
329 - if ( preg_match( '/^ *FORM:DJVU/', $line ) ) {
330 - # Found page
331 - if ( $this->parseFormDjvu( $line, $xml ) ) {
332 - $good = true;
333 - } else {
334 - return false;
335 - }
336 - }
337 - $line = strtok( "\n" );
338 - }
339 - }
340 - if ( !$good ) {
341 - return false;
342 - }
343 -
344 - $xml .= "</BODY>\n</DjVuXML>\n";
345 - return $xml;
346 - }
347 -
348 - function parseFormDjvu( $line, &$xml ) {
349 - $parentLevel = strspn( $line, ' ' );
350 - $line = strtok( "\n" );
351 -
352 - # Find INFO
353 - while ( $line !== false ) {
354 - $childLevel = strspn( $line, ' ' );
355 - if ( $childLevel <= $parentLevel ) {
356 - # End of chunk
357 - break;
358 - }
359 -
360 - if ( preg_match( '/^ *INFO *\[\d*\] *DjVu *(\d+)x(\d+), *\w*, *(\d+) *dpi, *gamma=([0-9.-]+)/', $line, $m ) ) {
361 - $xml .= Xml::tags( 'OBJECT',
362 - array(
363 - #'data' => '',
364 - #'type' => 'image/x.djvu',
365 - 'height' => $m[2],
366 - 'width' => $m[1],
367 - #'usemap' => '',
368 - ),
369 - "\n" .
370 - Xml::element( 'PARAM', array( 'name' => 'DPI', 'value' => $m[3] ) ) . "\n" .
371 - Xml::element( 'PARAM', array( 'name' => 'GAMMA', 'value' => $m[4] ) ) . "\n"
372 - ) . "\n";
373 - return true;
374 - }
375 - $line = strtok( "\n" );
376 - }
377 - # Not found
378 - return false;
379 - }
380 -}
Index: trunk/phase3/includes/media/DjVuImage.php
@@ -0,0 +1,379 @@
 2+<?php
 3+/**
 4+ * DjVu image handler
 5+ *
 6+ * Copyright © 2006 Brion Vibber <brion@pobox.com>
 7+ * http://www.mediawiki.org/
 8+ *
 9+ * This program is free software; you can redistribute it and/or modify
 10+ * it under the terms of the GNU General Public License as published by
 11+ * the Free Software Foundation; either version 2 of the License, or
 12+ * (at your option) any later version.
 13+ *
 14+ * This program is distributed in the hope that it will be useful,
 15+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
 16+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 17+ * GNU General Public License for more details.
 18+ *
 19+ * You should have received a copy of the GNU General Public License along
 20+ * with this program; if not, write to the Free Software Foundation, Inc.,
 21+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 22+ * http://www.gnu.org/copyleft/gpl.html
 23+ *
 24+ * @file
 25+ */
 26+
 27+/**
 28+ * Support for detecting/validating DjVu image files and getting
 29+ * some basic file metadata (resolution etc)
 30+ *
 31+ * File format docs are available in source package for DjVuLibre:
 32+ * http://djvulibre.djvuzone.org/
 33+ *
 34+ * @ingroup Media
 35+ */
 36+class DjVuImage {
 37+ function __construct( $filename ) {
 38+ $this->mFilename = $filename;
 39+ }
 40+
 41+ /**
 42+ * Check if the given file is indeed a valid DjVu image file
 43+ * @return bool
 44+ */
 45+ public function isValid() {
 46+ $info = $this->getInfo();
 47+ return $info !== false;
 48+ }
 49+
 50+
 51+ /**
 52+ * Return data in the style of getimagesize()
 53+ * @return array or false on failure
 54+ */
 55+ public function getImageSize() {
 56+ $data = $this->getInfo();
 57+
 58+ if( $data !== false ) {
 59+ $width = $data['width'];
 60+ $height = $data['height'];
 61+
 62+ return array( $width, $height, 'DjVu',
 63+ "width=\"$width\" height=\"$height\"" );
 64+ }
 65+ return false;
 66+ }
 67+
 68+ // ---------
 69+
 70+ /**
 71+ * For debugging; dump the IFF chunk structure
 72+ */
 73+ function dump() {
 74+ $file = fopen( $this->mFilename, 'rb' );
 75+ $header = fread( $file, 12 );
 76+ // @todo FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables.
 77+ extract( unpack( 'a4magic/a4chunk/NchunkLength', $header ) );
 78+ echo "$chunk $chunkLength\n";
 79+ $this->dumpForm( $file, $chunkLength, 1 );
 80+ fclose( $file );
 81+ }
 82+
 83+ private function dumpForm( $file, $length, $indent ) {
 84+ $start = ftell( $file );
 85+ $secondary = fread( $file, 4 );
 86+ echo str_repeat( ' ', $indent * 4 ) . "($secondary)\n";
 87+ while( ftell( $file ) - $start < $length ) {
 88+ $chunkHeader = fread( $file, 8 );
 89+ if( $chunkHeader == '' ) {
 90+ break;
 91+ }
 92+ // @todo FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables.
 93+ extract( unpack( 'a4chunk/NchunkLength', $chunkHeader ) );
 94+ echo str_repeat( ' ', $indent * 4 ) . "$chunk $chunkLength\n";
 95+
 96+ if( $chunk == 'FORM' ) {
 97+ $this->dumpForm( $file, $chunkLength, $indent + 1 );
 98+ } else {
 99+ fseek( $file, $chunkLength, SEEK_CUR );
 100+ if( $chunkLength & 1 == 1 ) {
 101+ // Padding byte between chunks
 102+ fseek( $file, 1, SEEK_CUR );
 103+ }
 104+ }
 105+ }
 106+ }
 107+
 108+ function getInfo() {
 109+ wfSuppressWarnings();
 110+ $file = fopen( $this->mFilename, 'rb' );
 111+ wfRestoreWarnings();
 112+ if( $file === false ) {
 113+ wfDebug( __METHOD__ . ": missing or failed file read\n" );
 114+ return false;
 115+ }
 116+
 117+ $header = fread( $file, 16 );
 118+ $info = false;
 119+
 120+ if( strlen( $header ) < 16 ) {
 121+ wfDebug( __METHOD__ . ": too short file header\n" );
 122+ } else {
 123+ // @todo FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables.
 124+ extract( unpack( 'a4magic/a4form/NformLength/a4subtype', $header ) );
 125+
 126+ if( $magic != 'AT&T' ) {
 127+ wfDebug( __METHOD__ . ": not a DjVu file\n" );
 128+ } elseif( $subtype == 'DJVU' ) {
 129+ // Single-page document
 130+ $info = $this->getPageInfo( $file, $formLength );
 131+ } elseif( $subtype == 'DJVM' ) {
 132+ // Multi-page document
 133+ $info = $this->getMultiPageInfo( $file, $formLength );
 134+ } else {
 135+ wfDebug( __METHOD__ . ": unrecognized DJVU file type '$formType'\n" );
 136+ }
 137+ }
 138+ fclose( $file );
 139+ return $info;
 140+ }
 141+
 142+ private function readChunk( $file ) {
 143+ $header = fread( $file, 8 );
 144+ if( strlen( $header ) < 8 ) {
 145+ return array( false, 0 );
 146+ } else {
 147+ // @todo FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables.
 148+ extract( unpack( 'a4chunk/Nlength', $header ) );
 149+ return array( $chunk, $length );
 150+ }
 151+ }
 152+
 153+ private function skipChunk( $file, $chunkLength ) {
 154+ fseek( $file, $chunkLength, SEEK_CUR );
 155+
 156+ if( $chunkLength & 0x01 == 1 && !feof( $file ) ) {
 157+ // padding byte
 158+ fseek( $file, 1, SEEK_CUR );
 159+ }
 160+ }
 161+
 162+ private function getMultiPageInfo( $file, $formLength ) {
 163+ // For now, we'll just look for the first page in the file
 164+ // and report its information, hoping others are the same size.
 165+ $start = ftell( $file );
 166+ do {
 167+ list( $chunk, $length ) = $this->readChunk( $file );
 168+ if( !$chunk ) {
 169+ break;
 170+ }
 171+
 172+ if( $chunk == 'FORM' ) {
 173+ $subtype = fread( $file, 4 );
 174+ if( $subtype == 'DJVU' ) {
 175+ wfDebug( __METHOD__ . ": found first subpage\n" );
 176+ return $this->getPageInfo( $file, $length );
 177+ }
 178+ $this->skipChunk( $file, $length - 4 );
 179+ } else {
 180+ wfDebug( __METHOD__ . ": skipping '$chunk' chunk\n" );
 181+ $this->skipChunk( $file, $length );
 182+ }
 183+ } while( $length != 0 && !feof( $file ) && ftell( $file ) - $start < $formLength );
 184+
 185+ wfDebug( __METHOD__ . ": multi-page DJVU file contained no pages\n" );
 186+ return false;
 187+ }
 188+
 189+ private function getPageInfo( $file, $formLength ) {
 190+ list( $chunk, $length ) = $this->readChunk( $file );
 191+ if( $chunk != 'INFO' ) {
 192+ wfDebug( __METHOD__ . ": expected INFO chunk, got '$chunk'\n" );
 193+ return false;
 194+ }
 195+
 196+ if( $length < 9 ) {
 197+ wfDebug( __METHOD__ . ": INFO should be 9 or 10 bytes, found $length\n" );
 198+ return false;
 199+ }
 200+ $data = fread( $file, $length );
 201+ if( strlen( $data ) < $length ) {
 202+ wfDebug( __METHOD__ . ": INFO chunk cut off\n" );
 203+ return false;
 204+ }
 205+
 206+ // @todo FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables.
 207+ extract( unpack(
 208+ 'nwidth/' .
 209+ 'nheight/' .
 210+ 'Cminor/' .
 211+ 'Cmajor/' .
 212+ 'vresolution/' .
 213+ 'Cgamma', $data ) );
 214+ # Newer files have rotation info in byte 10, but we don't use it yet.
 215+
 216+ return array(
 217+ 'width' => $width,
 218+ 'height' => $height,
 219+ 'version' => "$major.$minor",
 220+ 'resolution' => $resolution,
 221+ 'gamma' => $gamma / 10.0 );
 222+ }
 223+
 224+ /**
 225+ * Return an XML string describing the DjVu image
 226+ * @return string
 227+ */
 228+ function retrieveMetaData() {
 229+ global $wgDjvuToXML, $wgDjvuDump, $wgDjvuTxt;
 230+ wfProfileIn( __METHOD__ );
 231+
 232+ if ( isset( $wgDjvuDump ) ) {
 233+ # djvudump is faster as of version 3.5
 234+ # http://sourceforge.net/tracker/index.php?func=detail&aid=1704049&group_id=32953&atid=406583
 235+ wfProfileIn( 'djvudump' );
 236+ $cmd = wfEscapeShellArg( $wgDjvuDump ) . ' ' . wfEscapeShellArg( $this->mFilename );
 237+ $dump = wfShellExec( $cmd );
 238+ $xml = $this->convertDumpToXML( $dump );
 239+ wfProfileOut( 'djvudump' );
 240+ } elseif ( isset( $wgDjvuToXML ) ) {
 241+ wfProfileIn( 'djvutoxml' );
 242+ $cmd = wfEscapeShellArg( $wgDjvuToXML ) . ' --without-anno --without-text ' .
 243+ wfEscapeShellArg( $this->mFilename );
 244+ $xml = wfShellExec( $cmd );
 245+ wfProfileOut( 'djvutoxml' );
 246+ } else {
 247+ $xml = null;
 248+ }
 249+ # Text layer
 250+ if ( isset( $wgDjvuTxt ) ) {
 251+ wfProfileIn( 'djvutxt' );
 252+ $cmd = wfEscapeShellArg( $wgDjvuTxt ) . ' --detail=page ' . wfEscapeShellArg( $this->mFilename ) ;
 253+ wfDebug( __METHOD__.": $cmd\n" );
 254+ $retval = '';
 255+ $txt = wfShellExec( $cmd, $retval );
 256+ wfProfileOut( 'djvutxt' );
 257+ if( $retval == 0) {
 258+ # Strip some control characters
 259+ $txt = preg_replace( "/[\013\035\037]/", "", $txt );
 260+ $reg = <<<EOR
 261+ /\(page\s[\d-]*\s[\d-]*\s[\d-]*\s[\d-]*\s*"
 262+ ((?> # Text to match is composed of atoms of either:
 263+ \\\\. # - any escaped character
 264+ | # - any character different from " and \
 265+ [^"\\\\]+
 266+ )*?)
 267+ "\s*\)
 268+ | # Or page can be empty ; in this case, djvutxt dumps ()
 269+ \(\s*()\)/sx
 270+EOR;
 271+ $txt = preg_replace_callback( $reg, array( $this, 'pageTextCallback' ), $txt );
 272+ $txt = "<DjVuTxt>\n<HEAD></HEAD>\n<BODY>\n" . $txt . "</BODY>\n</DjVuTxt>\n";
 273+ $xml = preg_replace( "/<DjVuXML>/", "<mw-djvu><DjVuXML>", $xml, 1 );
 274+ $xml = $xml . $txt. '</mw-djvu>' ;
 275+ }
 276+ }
 277+ wfProfileOut( __METHOD__ );
 278+ return $xml;
 279+ }
 280+
 281+ function pageTextCallback( $matches ) {
 282+ # Get rid of invalid UTF-8, strip control characters
 283+ return '<PAGE value="' . htmlspecialchars( UtfNormal::cleanUp( $matches[1] ) ) . '" />';
 284+ }
 285+
 286+ /**
 287+ * Hack to temporarily work around djvutoxml bug
 288+ */
 289+ function convertDumpToXML( $dump ) {
 290+ if ( strval( $dump ) == '' ) {
 291+ return false;
 292+ }
 293+
 294+ $xml = <<<EOT
 295+<?xml version="1.0" ?>
 296+<!DOCTYPE DjVuXML PUBLIC "-//W3C//DTD DjVuXML 1.1//EN" "pubtext/DjVuXML-s.dtd">
 297+<DjVuXML>
 298+<HEAD></HEAD>
 299+<BODY>
 300+EOT;
 301+
 302+ $dump = str_replace( "\r", '', $dump );
 303+ $line = strtok( $dump, "\n" );
 304+ $m = false;
 305+ $good = false;
 306+ if ( preg_match( '/^( *)FORM:DJVU/', $line, $m ) ) {
 307+ # Single-page
 308+ if ( $this->parseFormDjvu( $line, $xml ) ) {
 309+ $good = true;
 310+ } else {
 311+ return false;
 312+ }
 313+ } elseif ( preg_match( '/^( *)FORM:DJVM/', $line, $m ) ) {
 314+ # Multi-page
 315+ $parentLevel = strlen( $m[1] );
 316+ # Find DIRM
 317+ $line = strtok( "\n" );
 318+ while ( $line !== false ) {
 319+ $childLevel = strspn( $line, ' ' );
 320+ if ( $childLevel <= $parentLevel ) {
 321+ # End of chunk
 322+ break;
 323+ }
 324+
 325+ if ( preg_match( '/^ *DIRM.*indirect/', $line ) ) {
 326+ wfDebug( "Indirect multi-page DjVu document, bad for server!\n" );
 327+ return false;
 328+ }
 329+ if ( preg_match( '/^ *FORM:DJVU/', $line ) ) {
 330+ # Found page
 331+ if ( $this->parseFormDjvu( $line, $xml ) ) {
 332+ $good = true;
 333+ } else {
 334+ return false;
 335+ }
 336+ }
 337+ $line = strtok( "\n" );
 338+ }
 339+ }
 340+ if ( !$good ) {
 341+ return false;
 342+ }
 343+
 344+ $xml .= "</BODY>\n</DjVuXML>\n";
 345+ return $xml;
 346+ }
 347+
 348+ function parseFormDjvu( $line, &$xml ) {
 349+ $parentLevel = strspn( $line, ' ' );
 350+ $line = strtok( "\n" );
 351+
 352+ # Find INFO
 353+ while ( $line !== false ) {
 354+ $childLevel = strspn( $line, ' ' );
 355+ if ( $childLevel <= $parentLevel ) {
 356+ # End of chunk
 357+ break;
 358+ }
 359+
 360+ if ( preg_match( '/^ *INFO *\[\d*\] *DjVu *(\d+)x(\d+), *\w*, *(\d+) *dpi, *gamma=([0-9.-]+)/', $line, $m ) ) {
 361+ $xml .= Xml::tags( 'OBJECT',
 362+ array(
 363+ #'data' => '',
 364+ #'type' => 'image/x.djvu',
 365+ 'height' => $m[2],
 366+ 'width' => $m[1],
 367+ #'usemap' => '',
 368+ ),
 369+ "\n" .
 370+ Xml::element( 'PARAM', array( 'name' => 'DPI', 'value' => $m[3] ) ) . "\n" .
 371+ Xml::element( 'PARAM', array( 'name' => 'GAMMA', 'value' => $m[4] ) ) . "\n"
 372+ ) . "\n";
 373+ return true;
 374+ }
 375+ $line = strtok( "\n" );
 376+ }
 377+ # Not found
 378+ return false;
 379+ }
 380+}
Property changes on: trunk/phase3/includes/media/DjVuImage.php
___________________________________________________________________
Added: svn:eol-style
1381 + native
Index: trunk/phase3/includes/AutoLoader.php
@@ -51,7 +51,7 @@
5252 'DeferrableUpdate' => 'includes/DeferredUpdates.php',
5353 'DeferredUpdates' => 'includes/DeferredUpdates.php',
5454 'DiffHistoryBlob' => 'includes/HistoryBlob.php',
55 - 'DjVuImage' => 'includes/DjVuImage.php',
 55+
5656 'DoubleReplacer' => 'includes/StringUtils.php',
5757 'DummyLinker' => 'includes/Linker.php',
5858 'Dump7ZipOutput' => 'includes/Export.php',
@@ -560,6 +560,7 @@
561561 'BitmapHandler_ClientOnly' => 'includes/media/Bitmap_ClientOnly.php',
562562 'BitmapMetadataHandler' => 'includes/media/BitmapMetadataHandler.php',
563563 'BmpHandler' => 'includes/media/BMP.php',
 564+ 'DjVuImage' => 'includes/media/DjVuImage.php',
564565 'DjVuHandler' => 'includes/media/DjVu.php',
565566 'Exif' => 'includes/media/Exif.php',
566567 'FormatExif' => 'includes/media/FormatMetadata.php',

Status & tagging log