Index: trunk/phase3/includes/DjVuImage.php |
— | — | @@ -1,379 +0,0 @@ |
2 | | -<?php |
3 | | -/** |
4 | | - * DjVu image handler |
5 | | - * |
6 | | - * Copyright © 2006 Brion Vibber <brion@pobox.com> |
7 | | - * http://www.mediawiki.org/ |
8 | | - * |
9 | | - * This program is free software; you can redistribute it and/or modify |
10 | | - * it under the terms of the GNU General Public License as published by |
11 | | - * the Free Software Foundation; either version 2 of the License, or |
12 | | - * (at your option) any later version. |
13 | | - * |
14 | | - * This program is distributed in the hope that it will be useful, |
15 | | - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | | - * GNU General Public License for more details. |
18 | | - * |
19 | | - * You should have received a copy of the GNU General Public License along |
20 | | - * with this program; if not, write to the Free Software Foundation, Inc., |
21 | | - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
22 | | - * http://www.gnu.org/copyleft/gpl.html |
23 | | - * |
24 | | - * @file |
25 | | - */ |
26 | | - |
27 | | -/** |
28 | | - * Support for detecting/validating DjVu image files and getting |
29 | | - * some basic file metadata (resolution etc) |
30 | | - * |
31 | | - * File format docs are available in source package for DjVuLibre: |
32 | | - * http://djvulibre.djvuzone.org/ |
33 | | - * |
34 | | - * @ingroup Media |
35 | | - */ |
36 | | -class DjVuImage { |
37 | | - function __construct( $filename ) { |
38 | | - $this->mFilename = $filename; |
39 | | - } |
40 | | - |
41 | | - /** |
42 | | - * Check if the given file is indeed a valid DjVu image file |
43 | | - * @return bool |
44 | | - */ |
45 | | - public function isValid() { |
46 | | - $info = $this->getInfo(); |
47 | | - return $info !== false; |
48 | | - } |
49 | | - |
50 | | - |
51 | | - /** |
52 | | - * Return data in the style of getimagesize() |
53 | | - * @return array or false on failure |
54 | | - */ |
55 | | - public function getImageSize() { |
56 | | - $data = $this->getInfo(); |
57 | | - |
58 | | - if( $data !== false ) { |
59 | | - $width = $data['width']; |
60 | | - $height = $data['height']; |
61 | | - |
62 | | - return array( $width, $height, 'DjVu', |
63 | | - "width=\"$width\" height=\"$height\"" ); |
64 | | - } |
65 | | - return false; |
66 | | - } |
67 | | - |
68 | | - // --------- |
69 | | - |
70 | | - /** |
71 | | - * For debugging; dump the IFF chunk structure |
72 | | - */ |
73 | | - function dump() { |
74 | | - $file = fopen( $this->mFilename, 'rb' ); |
75 | | - $header = fread( $file, 12 ); |
76 | | - // @todo FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables. |
77 | | - extract( unpack( 'a4magic/a4chunk/NchunkLength', $header ) ); |
78 | | - echo "$chunk $chunkLength\n"; |
79 | | - $this->dumpForm( $file, $chunkLength, 1 ); |
80 | | - fclose( $file ); |
81 | | - } |
82 | | - |
83 | | - private function dumpForm( $file, $length, $indent ) { |
84 | | - $start = ftell( $file ); |
85 | | - $secondary = fread( $file, 4 ); |
86 | | - echo str_repeat( ' ', $indent * 4 ) . "($secondary)\n"; |
87 | | - while( ftell( $file ) - $start < $length ) { |
88 | | - $chunkHeader = fread( $file, 8 ); |
89 | | - if( $chunkHeader == '' ) { |
90 | | - break; |
91 | | - } |
92 | | - // @todo FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables. |
93 | | - extract( unpack( 'a4chunk/NchunkLength', $chunkHeader ) ); |
94 | | - echo str_repeat( ' ', $indent * 4 ) . "$chunk $chunkLength\n"; |
95 | | - |
96 | | - if( $chunk == 'FORM' ) { |
97 | | - $this->dumpForm( $file, $chunkLength, $indent + 1 ); |
98 | | - } else { |
99 | | - fseek( $file, $chunkLength, SEEK_CUR ); |
100 | | - if( $chunkLength & 1 == 1 ) { |
101 | | - // Padding byte between chunks |
102 | | - fseek( $file, 1, SEEK_CUR ); |
103 | | - } |
104 | | - } |
105 | | - } |
106 | | - } |
107 | | - |
108 | | - function getInfo() { |
109 | | - wfSuppressWarnings(); |
110 | | - $file = fopen( $this->mFilename, 'rb' ); |
111 | | - wfRestoreWarnings(); |
112 | | - if( $file === false ) { |
113 | | - wfDebug( __METHOD__ . ": missing or failed file read\n" ); |
114 | | - return false; |
115 | | - } |
116 | | - |
117 | | - $header = fread( $file, 16 ); |
118 | | - $info = false; |
119 | | - |
120 | | - if( strlen( $header ) < 16 ) { |
121 | | - wfDebug( __METHOD__ . ": too short file header\n" ); |
122 | | - } else { |
123 | | - // @todo FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables. |
124 | | - extract( unpack( 'a4magic/a4form/NformLength/a4subtype', $header ) ); |
125 | | - |
126 | | - if( $magic != 'AT&T' ) { |
127 | | - wfDebug( __METHOD__ . ": not a DjVu file\n" ); |
128 | | - } elseif( $subtype == 'DJVU' ) { |
129 | | - // Single-page document |
130 | | - $info = $this->getPageInfo( $file, $formLength ); |
131 | | - } elseif( $subtype == 'DJVM' ) { |
132 | | - // Multi-page document |
133 | | - $info = $this->getMultiPageInfo( $file, $formLength ); |
134 | | - } else { |
135 | | - wfDebug( __METHOD__ . ": unrecognized DJVU file type '$formType'\n" ); |
136 | | - } |
137 | | - } |
138 | | - fclose( $file ); |
139 | | - return $info; |
140 | | - } |
141 | | - |
142 | | - private function readChunk( $file ) { |
143 | | - $header = fread( $file, 8 ); |
144 | | - if( strlen( $header ) < 8 ) { |
145 | | - return array( false, 0 ); |
146 | | - } else { |
147 | | - // @todo FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables. |
148 | | - extract( unpack( 'a4chunk/Nlength', $header ) ); |
149 | | - return array( $chunk, $length ); |
150 | | - } |
151 | | - } |
152 | | - |
153 | | - private function skipChunk( $file, $chunkLength ) { |
154 | | - fseek( $file, $chunkLength, SEEK_CUR ); |
155 | | - |
156 | | - if( $chunkLength & 0x01 == 1 && !feof( $file ) ) { |
157 | | - // padding byte |
158 | | - fseek( $file, 1, SEEK_CUR ); |
159 | | - } |
160 | | - } |
161 | | - |
162 | | - private function getMultiPageInfo( $file, $formLength ) { |
163 | | - // For now, we'll just look for the first page in the file |
164 | | - // and report its information, hoping others are the same size. |
165 | | - $start = ftell( $file ); |
166 | | - do { |
167 | | - list( $chunk, $length ) = $this->readChunk( $file ); |
168 | | - if( !$chunk ) { |
169 | | - break; |
170 | | - } |
171 | | - |
172 | | - if( $chunk == 'FORM' ) { |
173 | | - $subtype = fread( $file, 4 ); |
174 | | - if( $subtype == 'DJVU' ) { |
175 | | - wfDebug( __METHOD__ . ": found first subpage\n" ); |
176 | | - return $this->getPageInfo( $file, $length ); |
177 | | - } |
178 | | - $this->skipChunk( $file, $length - 4 ); |
179 | | - } else { |
180 | | - wfDebug( __METHOD__ . ": skipping '$chunk' chunk\n" ); |
181 | | - $this->skipChunk( $file, $length ); |
182 | | - } |
183 | | - } while( $length != 0 && !feof( $file ) && ftell( $file ) - $start < $formLength ); |
184 | | - |
185 | | - wfDebug( __METHOD__ . ": multi-page DJVU file contained no pages\n" ); |
186 | | - return false; |
187 | | - } |
188 | | - |
189 | | - private function getPageInfo( $file, $formLength ) { |
190 | | - list( $chunk, $length ) = $this->readChunk( $file ); |
191 | | - if( $chunk != 'INFO' ) { |
192 | | - wfDebug( __METHOD__ . ": expected INFO chunk, got '$chunk'\n" ); |
193 | | - return false; |
194 | | - } |
195 | | - |
196 | | - if( $length < 9 ) { |
197 | | - wfDebug( __METHOD__ . ": INFO should be 9 or 10 bytes, found $length\n" ); |
198 | | - return false; |
199 | | - } |
200 | | - $data = fread( $file, $length ); |
201 | | - if( strlen( $data ) < $length ) { |
202 | | - wfDebug( __METHOD__ . ": INFO chunk cut off\n" ); |
203 | | - return false; |
204 | | - } |
205 | | - |
206 | | - // @todo FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables. |
207 | | - extract( unpack( |
208 | | - 'nwidth/' . |
209 | | - 'nheight/' . |
210 | | - 'Cminor/' . |
211 | | - 'Cmajor/' . |
212 | | - 'vresolution/' . |
213 | | - 'Cgamma', $data ) ); |
214 | | - # Newer files have rotation info in byte 10, but we don't use it yet. |
215 | | - |
216 | | - return array( |
217 | | - 'width' => $width, |
218 | | - 'height' => $height, |
219 | | - 'version' => "$major.$minor", |
220 | | - 'resolution' => $resolution, |
221 | | - 'gamma' => $gamma / 10.0 ); |
222 | | - } |
223 | | - |
224 | | - /** |
225 | | - * Return an XML string describing the DjVu image |
226 | | - * @return string |
227 | | - */ |
228 | | - function retrieveMetaData() { |
229 | | - global $wgDjvuToXML, $wgDjvuDump, $wgDjvuTxt; |
230 | | - wfProfileIn( __METHOD__ ); |
231 | | - |
232 | | - if ( isset( $wgDjvuDump ) ) { |
233 | | - # djvudump is faster as of version 3.5 |
234 | | - # http://sourceforge.net/tracker/index.php?func=detail&aid=1704049&group_id=32953&atid=406583 |
235 | | - wfProfileIn( 'djvudump' ); |
236 | | - $cmd = wfEscapeShellArg( $wgDjvuDump ) . ' ' . wfEscapeShellArg( $this->mFilename ); |
237 | | - $dump = wfShellExec( $cmd ); |
238 | | - $xml = $this->convertDumpToXML( $dump ); |
239 | | - wfProfileOut( 'djvudump' ); |
240 | | - } elseif ( isset( $wgDjvuToXML ) ) { |
241 | | - wfProfileIn( 'djvutoxml' ); |
242 | | - $cmd = wfEscapeShellArg( $wgDjvuToXML ) . ' --without-anno --without-text ' . |
243 | | - wfEscapeShellArg( $this->mFilename ); |
244 | | - $xml = wfShellExec( $cmd ); |
245 | | - wfProfileOut( 'djvutoxml' ); |
246 | | - } else { |
247 | | - $xml = null; |
248 | | - } |
249 | | - # Text layer |
250 | | - if ( isset( $wgDjvuTxt ) ) { |
251 | | - wfProfileIn( 'djvutxt' ); |
252 | | - $cmd = wfEscapeShellArg( $wgDjvuTxt ) . ' --detail=page ' . wfEscapeShellArg( $this->mFilename ) ; |
253 | | - wfDebug( __METHOD__.": $cmd\n" ); |
254 | | - $retval = ''; |
255 | | - $txt = wfShellExec( $cmd, $retval ); |
256 | | - wfProfileOut( 'djvutxt' ); |
257 | | - if( $retval == 0) { |
258 | | - # Strip some control characters |
259 | | - $txt = preg_replace( "/[\013\035\037]/", "", $txt ); |
260 | | - $reg = <<<EOR |
261 | | - /\(page\s[\d-]*\s[\d-]*\s[\d-]*\s[\d-]*\s*" |
262 | | - ((?> # Text to match is composed of atoms of either: |
263 | | - \\\\. # - any escaped character |
264 | | - | # - any character different from " and \ |
265 | | - [^"\\\\]+ |
266 | | - )*?) |
267 | | - "\s*\) |
268 | | - | # Or page can be empty ; in this case, djvutxt dumps () |
269 | | - \(\s*()\)/sx |
270 | | -EOR; |
271 | | - $txt = preg_replace_callback( $reg, array( $this, 'pageTextCallback' ), $txt ); |
272 | | - $txt = "<DjVuTxt>\n<HEAD></HEAD>\n<BODY>\n" . $txt . "</BODY>\n</DjVuTxt>\n"; |
273 | | - $xml = preg_replace( "/<DjVuXML>/", "<mw-djvu><DjVuXML>", $xml, 1 ); |
274 | | - $xml = $xml . $txt. '</mw-djvu>' ; |
275 | | - } |
276 | | - } |
277 | | - wfProfileOut( __METHOD__ ); |
278 | | - return $xml; |
279 | | - } |
280 | | - |
281 | | - function pageTextCallback( $matches ) { |
282 | | - # Get rid of invalid UTF-8, strip control characters |
283 | | - return '<PAGE value="' . htmlspecialchars( UtfNormal::cleanUp( $matches[1] ) ) . '" />'; |
284 | | - } |
285 | | - |
286 | | - /** |
287 | | - * Hack to temporarily work around djvutoxml bug |
288 | | - */ |
289 | | - function convertDumpToXML( $dump ) { |
290 | | - if ( strval( $dump ) == '' ) { |
291 | | - return false; |
292 | | - } |
293 | | - |
294 | | - $xml = <<<EOT |
295 | | -<?xml version="1.0" ?> |
296 | | -<!DOCTYPE DjVuXML PUBLIC "-//W3C//DTD DjVuXML 1.1//EN" "pubtext/DjVuXML-s.dtd"> |
297 | | -<DjVuXML> |
298 | | -<HEAD></HEAD> |
299 | | -<BODY> |
300 | | -EOT; |
301 | | - |
302 | | - $dump = str_replace( "\r", '', $dump ); |
303 | | - $line = strtok( $dump, "\n" ); |
304 | | - $m = false; |
305 | | - $good = false; |
306 | | - if ( preg_match( '/^( *)FORM:DJVU/', $line, $m ) ) { |
307 | | - # Single-page |
308 | | - if ( $this->parseFormDjvu( $line, $xml ) ) { |
309 | | - $good = true; |
310 | | - } else { |
311 | | - return false; |
312 | | - } |
313 | | - } elseif ( preg_match( '/^( *)FORM:DJVM/', $line, $m ) ) { |
314 | | - # Multi-page |
315 | | - $parentLevel = strlen( $m[1] ); |
316 | | - # Find DIRM |
317 | | - $line = strtok( "\n" ); |
318 | | - while ( $line !== false ) { |
319 | | - $childLevel = strspn( $line, ' ' ); |
320 | | - if ( $childLevel <= $parentLevel ) { |
321 | | - # End of chunk |
322 | | - break; |
323 | | - } |
324 | | - |
325 | | - if ( preg_match( '/^ *DIRM.*indirect/', $line ) ) { |
326 | | - wfDebug( "Indirect multi-page DjVu document, bad for server!\n" ); |
327 | | - return false; |
328 | | - } |
329 | | - if ( preg_match( '/^ *FORM:DJVU/', $line ) ) { |
330 | | - # Found page |
331 | | - if ( $this->parseFormDjvu( $line, $xml ) ) { |
332 | | - $good = true; |
333 | | - } else { |
334 | | - return false; |
335 | | - } |
336 | | - } |
337 | | - $line = strtok( "\n" ); |
338 | | - } |
339 | | - } |
340 | | - if ( !$good ) { |
341 | | - return false; |
342 | | - } |
343 | | - |
344 | | - $xml .= "</BODY>\n</DjVuXML>\n"; |
345 | | - return $xml; |
346 | | - } |
347 | | - |
348 | | - function parseFormDjvu( $line, &$xml ) { |
349 | | - $parentLevel = strspn( $line, ' ' ); |
350 | | - $line = strtok( "\n" ); |
351 | | - |
352 | | - # Find INFO |
353 | | - while ( $line !== false ) { |
354 | | - $childLevel = strspn( $line, ' ' ); |
355 | | - if ( $childLevel <= $parentLevel ) { |
356 | | - # End of chunk |
357 | | - break; |
358 | | - } |
359 | | - |
360 | | - if ( preg_match( '/^ *INFO *\[\d*\] *DjVu *(\d+)x(\d+), *\w*, *(\d+) *dpi, *gamma=([0-9.-]+)/', $line, $m ) ) { |
361 | | - $xml .= Xml::tags( 'OBJECT', |
362 | | - array( |
363 | | - #'data' => '', |
364 | | - #'type' => 'image/x.djvu', |
365 | | - 'height' => $m[2], |
366 | | - 'width' => $m[1], |
367 | | - #'usemap' => '', |
368 | | - ), |
369 | | - "\n" . |
370 | | - Xml::element( 'PARAM', array( 'name' => 'DPI', 'value' => $m[3] ) ) . "\n" . |
371 | | - Xml::element( 'PARAM', array( 'name' => 'GAMMA', 'value' => $m[4] ) ) . "\n" |
372 | | - ) . "\n"; |
373 | | - return true; |
374 | | - } |
375 | | - $line = strtok( "\n" ); |
376 | | - } |
377 | | - # Not found |
378 | | - return false; |
379 | | - } |
380 | | -} |
Index: trunk/phase3/includes/media/DjVuImage.php |
— | — | @@ -0,0 +1,379 @@ |
| 2 | +<?php |
| 3 | +/** |
| 4 | + * DjVu image handler |
| 5 | + * |
| 6 | + * Copyright © 2006 Brion Vibber <brion@pobox.com> |
| 7 | + * http://www.mediawiki.org/ |
| 8 | + * |
| 9 | + * This program is free software; you can redistribute it and/or modify |
| 10 | + * it under the terms of the GNU General Public License as published by |
| 11 | + * the Free Software Foundation; either version 2 of the License, or |
| 12 | + * (at your option) any later version. |
| 13 | + * |
| 14 | + * This program is distributed in the hope that it will be useful, |
| 15 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 16 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 17 | + * GNU General Public License for more details. |
| 18 | + * |
| 19 | + * You should have received a copy of the GNU General Public License along |
| 20 | + * with this program; if not, write to the Free Software Foundation, Inc., |
| 21 | + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
| 22 | + * http://www.gnu.org/copyleft/gpl.html |
| 23 | + * |
| 24 | + * @file |
| 25 | + */ |
| 26 | + |
| 27 | +/** |
| 28 | + * Support for detecting/validating DjVu image files and getting |
| 29 | + * some basic file metadata (resolution etc) |
| 30 | + * |
| 31 | + * File format docs are available in source package for DjVuLibre: |
| 32 | + * http://djvulibre.djvuzone.org/ |
| 33 | + * |
| 34 | + * @ingroup Media |
| 35 | + */ |
| 36 | +class DjVuImage { |
| 37 | + function __construct( $filename ) { |
| 38 | + $this->mFilename = $filename; |
| 39 | + } |
| 40 | + |
| 41 | + /** |
| 42 | + * Check if the given file is indeed a valid DjVu image file |
| 43 | + * @return bool |
| 44 | + */ |
| 45 | + public function isValid() { |
| 46 | + $info = $this->getInfo(); |
| 47 | + return $info !== false; |
| 48 | + } |
| 49 | + |
| 50 | + |
| 51 | + /** |
| 52 | + * Return data in the style of getimagesize() |
| 53 | + * @return array or false on failure |
| 54 | + */ |
| 55 | + public function getImageSize() { |
| 56 | + $data = $this->getInfo(); |
| 57 | + |
| 58 | + if( $data !== false ) { |
| 59 | + $width = $data['width']; |
| 60 | + $height = $data['height']; |
| 61 | + |
| 62 | + return array( $width, $height, 'DjVu', |
| 63 | + "width=\"$width\" height=\"$height\"" ); |
| 64 | + } |
| 65 | + return false; |
| 66 | + } |
| 67 | + |
| 68 | + // --------- |
| 69 | + |
| 70 | + /** |
| 71 | + * For debugging; dump the IFF chunk structure |
| 72 | + */ |
| 73 | + function dump() { |
| 74 | + $file = fopen( $this->mFilename, 'rb' ); |
| 75 | + $header = fread( $file, 12 ); |
| 76 | + // @todo FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables. |
| 77 | + extract( unpack( 'a4magic/a4chunk/NchunkLength', $header ) ); |
| 78 | + echo "$chunk $chunkLength\n"; |
| 79 | + $this->dumpForm( $file, $chunkLength, 1 ); |
| 80 | + fclose( $file ); |
| 81 | + } |
| 82 | + |
| 83 | + private function dumpForm( $file, $length, $indent ) { |
| 84 | + $start = ftell( $file ); |
| 85 | + $secondary = fread( $file, 4 ); |
| 86 | + echo str_repeat( ' ', $indent * 4 ) . "($secondary)\n"; |
| 87 | + while( ftell( $file ) - $start < $length ) { |
| 88 | + $chunkHeader = fread( $file, 8 ); |
| 89 | + if( $chunkHeader == '' ) { |
| 90 | + break; |
| 91 | + } |
| 92 | + // @todo FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables. |
| 93 | + extract( unpack( 'a4chunk/NchunkLength', $chunkHeader ) ); |
| 94 | + echo str_repeat( ' ', $indent * 4 ) . "$chunk $chunkLength\n"; |
| 95 | + |
| 96 | + if( $chunk == 'FORM' ) { |
| 97 | + $this->dumpForm( $file, $chunkLength, $indent + 1 ); |
| 98 | + } else { |
| 99 | + fseek( $file, $chunkLength, SEEK_CUR ); |
| 100 | + if( $chunkLength & 1 == 1 ) { |
| 101 | + // Padding byte between chunks |
| 102 | + fseek( $file, 1, SEEK_CUR ); |
| 103 | + } |
| 104 | + } |
| 105 | + } |
| 106 | + } |
| 107 | + |
| 108 | + function getInfo() { |
| 109 | + wfSuppressWarnings(); |
| 110 | + $file = fopen( $this->mFilename, 'rb' ); |
| 111 | + wfRestoreWarnings(); |
| 112 | + if( $file === false ) { |
| 113 | + wfDebug( __METHOD__ . ": missing or failed file read\n" ); |
| 114 | + return false; |
| 115 | + } |
| 116 | + |
| 117 | + $header = fread( $file, 16 ); |
| 118 | + $info = false; |
| 119 | + |
| 120 | + if( strlen( $header ) < 16 ) { |
| 121 | + wfDebug( __METHOD__ . ": too short file header\n" ); |
| 122 | + } else { |
| 123 | + // @todo FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables. |
| 124 | + extract( unpack( 'a4magic/a4form/NformLength/a4subtype', $header ) ); |
| 125 | + |
| 126 | + if( $magic != 'AT&T' ) { |
| 127 | + wfDebug( __METHOD__ . ": not a DjVu file\n" ); |
| 128 | + } elseif( $subtype == 'DJVU' ) { |
| 129 | + // Single-page document |
| 130 | + $info = $this->getPageInfo( $file, $formLength ); |
| 131 | + } elseif( $subtype == 'DJVM' ) { |
| 132 | + // Multi-page document |
| 133 | + $info = $this->getMultiPageInfo( $file, $formLength ); |
| 134 | + } else { |
| 135 | + wfDebug( __METHOD__ . ": unrecognized DJVU file type '$formType'\n" ); |
| 136 | + } |
| 137 | + } |
| 138 | + fclose( $file ); |
| 139 | + return $info; |
| 140 | + } |
| 141 | + |
| 142 | + private function readChunk( $file ) { |
| 143 | + $header = fread( $file, 8 ); |
| 144 | + if( strlen( $header ) < 8 ) { |
| 145 | + return array( false, 0 ); |
| 146 | + } else { |
| 147 | + // @todo FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables. |
| 148 | + extract( unpack( 'a4chunk/Nlength', $header ) ); |
| 149 | + return array( $chunk, $length ); |
| 150 | + } |
| 151 | + } |
| 152 | + |
| 153 | + private function skipChunk( $file, $chunkLength ) { |
| 154 | + fseek( $file, $chunkLength, SEEK_CUR ); |
| 155 | + |
| 156 | + if( $chunkLength & 0x01 == 1 && !feof( $file ) ) { |
| 157 | + // padding byte |
| 158 | + fseek( $file, 1, SEEK_CUR ); |
| 159 | + } |
| 160 | + } |
| 161 | + |
| 162 | + private function getMultiPageInfo( $file, $formLength ) { |
| 163 | + // For now, we'll just look for the first page in the file |
| 164 | + // and report its information, hoping others are the same size. |
| 165 | + $start = ftell( $file ); |
| 166 | + do { |
| 167 | + list( $chunk, $length ) = $this->readChunk( $file ); |
| 168 | + if( !$chunk ) { |
| 169 | + break; |
| 170 | + } |
| 171 | + |
| 172 | + if( $chunk == 'FORM' ) { |
| 173 | + $subtype = fread( $file, 4 ); |
| 174 | + if( $subtype == 'DJVU' ) { |
| 175 | + wfDebug( __METHOD__ . ": found first subpage\n" ); |
| 176 | + return $this->getPageInfo( $file, $length ); |
| 177 | + } |
| 178 | + $this->skipChunk( $file, $length - 4 ); |
| 179 | + } else { |
| 180 | + wfDebug( __METHOD__ . ": skipping '$chunk' chunk\n" ); |
| 181 | + $this->skipChunk( $file, $length ); |
| 182 | + } |
| 183 | + } while( $length != 0 && !feof( $file ) && ftell( $file ) - $start < $formLength ); |
| 184 | + |
| 185 | + wfDebug( __METHOD__ . ": multi-page DJVU file contained no pages\n" ); |
| 186 | + return false; |
| 187 | + } |
| 188 | + |
| 189 | + private function getPageInfo( $file, $formLength ) { |
| 190 | + list( $chunk, $length ) = $this->readChunk( $file ); |
| 191 | + if( $chunk != 'INFO' ) { |
| 192 | + wfDebug( __METHOD__ . ": expected INFO chunk, got '$chunk'\n" ); |
| 193 | + return false; |
| 194 | + } |
| 195 | + |
| 196 | + if( $length < 9 ) { |
| 197 | + wfDebug( __METHOD__ . ": INFO should be 9 or 10 bytes, found $length\n" ); |
| 198 | + return false; |
| 199 | + } |
| 200 | + $data = fread( $file, $length ); |
| 201 | + if( strlen( $data ) < $length ) { |
| 202 | + wfDebug( __METHOD__ . ": INFO chunk cut off\n" ); |
| 203 | + return false; |
| 204 | + } |
| 205 | + |
| 206 | + // @todo FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables. |
| 207 | + extract( unpack( |
| 208 | + 'nwidth/' . |
| 209 | + 'nheight/' . |
| 210 | + 'Cminor/' . |
| 211 | + 'Cmajor/' . |
| 212 | + 'vresolution/' . |
| 213 | + 'Cgamma', $data ) ); |
| 214 | + # Newer files have rotation info in byte 10, but we don't use it yet. |
| 215 | + |
| 216 | + return array( |
| 217 | + 'width' => $width, |
| 218 | + 'height' => $height, |
| 219 | + 'version' => "$major.$minor", |
| 220 | + 'resolution' => $resolution, |
| 221 | + 'gamma' => $gamma / 10.0 ); |
| 222 | + } |
| 223 | + |
| 224 | + /** |
| 225 | + * Return an XML string describing the DjVu image |
| 226 | + * @return string |
| 227 | + */ |
| 228 | + function retrieveMetaData() { |
| 229 | + global $wgDjvuToXML, $wgDjvuDump, $wgDjvuTxt; |
| 230 | + wfProfileIn( __METHOD__ ); |
| 231 | + |
| 232 | + if ( isset( $wgDjvuDump ) ) { |
| 233 | + # djvudump is faster as of version 3.5 |
| 234 | + # http://sourceforge.net/tracker/index.php?func=detail&aid=1704049&group_id=32953&atid=406583 |
| 235 | + wfProfileIn( 'djvudump' ); |
| 236 | + $cmd = wfEscapeShellArg( $wgDjvuDump ) . ' ' . wfEscapeShellArg( $this->mFilename ); |
| 237 | + $dump = wfShellExec( $cmd ); |
| 238 | + $xml = $this->convertDumpToXML( $dump ); |
| 239 | + wfProfileOut( 'djvudump' ); |
| 240 | + } elseif ( isset( $wgDjvuToXML ) ) { |
| 241 | + wfProfileIn( 'djvutoxml' ); |
| 242 | + $cmd = wfEscapeShellArg( $wgDjvuToXML ) . ' --without-anno --without-text ' . |
| 243 | + wfEscapeShellArg( $this->mFilename ); |
| 244 | + $xml = wfShellExec( $cmd ); |
| 245 | + wfProfileOut( 'djvutoxml' ); |
| 246 | + } else { |
| 247 | + $xml = null; |
| 248 | + } |
| 249 | + # Text layer |
| 250 | + if ( isset( $wgDjvuTxt ) ) { |
| 251 | + wfProfileIn( 'djvutxt' ); |
| 252 | + $cmd = wfEscapeShellArg( $wgDjvuTxt ) . ' --detail=page ' . wfEscapeShellArg( $this->mFilename ) ; |
| 253 | + wfDebug( __METHOD__.": $cmd\n" ); |
| 254 | + $retval = ''; |
| 255 | + $txt = wfShellExec( $cmd, $retval ); |
| 256 | + wfProfileOut( 'djvutxt' ); |
| 257 | + if( $retval == 0) { |
| 258 | + # Strip some control characters |
| 259 | + $txt = preg_replace( "/[\013\035\037]/", "", $txt ); |
| 260 | + $reg = <<<EOR |
| 261 | + /\(page\s[\d-]*\s[\d-]*\s[\d-]*\s[\d-]*\s*" |
| 262 | + ((?> # Text to match is composed of atoms of either: |
| 263 | + \\\\. # - any escaped character |
| 264 | + | # - any character different from " and \ |
| 265 | + [^"\\\\]+ |
| 266 | + )*?) |
| 267 | + "\s*\) |
| 268 | + | # Or page can be empty ; in this case, djvutxt dumps () |
| 269 | + \(\s*()\)/sx |
| 270 | +EOR; |
| 271 | + $txt = preg_replace_callback( $reg, array( $this, 'pageTextCallback' ), $txt ); |
| 272 | + $txt = "<DjVuTxt>\n<HEAD></HEAD>\n<BODY>\n" . $txt . "</BODY>\n</DjVuTxt>\n"; |
| 273 | + $xml = preg_replace( "/<DjVuXML>/", "<mw-djvu><DjVuXML>", $xml, 1 ); |
| 274 | + $xml = $xml . $txt. '</mw-djvu>' ; |
| 275 | + } |
| 276 | + } |
| 277 | + wfProfileOut( __METHOD__ ); |
| 278 | + return $xml; |
| 279 | + } |
| 280 | + |
| 281 | + function pageTextCallback( $matches ) { |
| 282 | + # Get rid of invalid UTF-8, strip control characters |
| 283 | + return '<PAGE value="' . htmlspecialchars( UtfNormal::cleanUp( $matches[1] ) ) . '" />'; |
| 284 | + } |
| 285 | + |
| 286 | + /** |
| 287 | + * Hack to temporarily work around djvutoxml bug |
| 288 | + */ |
| 289 | + function convertDumpToXML( $dump ) { |
| 290 | + if ( strval( $dump ) == '' ) { |
| 291 | + return false; |
| 292 | + } |
| 293 | + |
| 294 | + $xml = <<<EOT |
| 295 | +<?xml version="1.0" ?> |
| 296 | +<!DOCTYPE DjVuXML PUBLIC "-//W3C//DTD DjVuXML 1.1//EN" "pubtext/DjVuXML-s.dtd"> |
| 297 | +<DjVuXML> |
| 298 | +<HEAD></HEAD> |
| 299 | +<BODY> |
| 300 | +EOT; |
| 301 | + |
| 302 | + $dump = str_replace( "\r", '', $dump ); |
| 303 | + $line = strtok( $dump, "\n" ); |
| 304 | + $m = false; |
| 305 | + $good = false; |
| 306 | + if ( preg_match( '/^( *)FORM:DJVU/', $line, $m ) ) { |
| 307 | + # Single-page |
| 308 | + if ( $this->parseFormDjvu( $line, $xml ) ) { |
| 309 | + $good = true; |
| 310 | + } else { |
| 311 | + return false; |
| 312 | + } |
| 313 | + } elseif ( preg_match( '/^( *)FORM:DJVM/', $line, $m ) ) { |
| 314 | + # Multi-page |
| 315 | + $parentLevel = strlen( $m[1] ); |
| 316 | + # Find DIRM |
| 317 | + $line = strtok( "\n" ); |
| 318 | + while ( $line !== false ) { |
| 319 | + $childLevel = strspn( $line, ' ' ); |
| 320 | + if ( $childLevel <= $parentLevel ) { |
| 321 | + # End of chunk |
| 322 | + break; |
| 323 | + } |
| 324 | + |
| 325 | + if ( preg_match( '/^ *DIRM.*indirect/', $line ) ) { |
| 326 | + wfDebug( "Indirect multi-page DjVu document, bad for server!\n" ); |
| 327 | + return false; |
| 328 | + } |
| 329 | + if ( preg_match( '/^ *FORM:DJVU/', $line ) ) { |
| 330 | + # Found page |
| 331 | + if ( $this->parseFormDjvu( $line, $xml ) ) { |
| 332 | + $good = true; |
| 333 | + } else { |
| 334 | + return false; |
| 335 | + } |
| 336 | + } |
| 337 | + $line = strtok( "\n" ); |
| 338 | + } |
| 339 | + } |
| 340 | + if ( !$good ) { |
| 341 | + return false; |
| 342 | + } |
| 343 | + |
| 344 | + $xml .= "</BODY>\n</DjVuXML>\n"; |
| 345 | + return $xml; |
| 346 | + } |
| 347 | + |
| 348 | + function parseFormDjvu( $line, &$xml ) { |
| 349 | + $parentLevel = strspn( $line, ' ' ); |
| 350 | + $line = strtok( "\n" ); |
| 351 | + |
| 352 | + # Find INFO |
| 353 | + while ( $line !== false ) { |
| 354 | + $childLevel = strspn( $line, ' ' ); |
| 355 | + if ( $childLevel <= $parentLevel ) { |
| 356 | + # End of chunk |
| 357 | + break; |
| 358 | + } |
| 359 | + |
| 360 | + if ( preg_match( '/^ *INFO *\[\d*\] *DjVu *(\d+)x(\d+), *\w*, *(\d+) *dpi, *gamma=([0-9.-]+)/', $line, $m ) ) { |
| 361 | + $xml .= Xml::tags( 'OBJECT', |
| 362 | + array( |
| 363 | + #'data' => '', |
| 364 | + #'type' => 'image/x.djvu', |
| 365 | + 'height' => $m[2], |
| 366 | + 'width' => $m[1], |
| 367 | + #'usemap' => '', |
| 368 | + ), |
| 369 | + "\n" . |
| 370 | + Xml::element( 'PARAM', array( 'name' => 'DPI', 'value' => $m[3] ) ) . "\n" . |
| 371 | + Xml::element( 'PARAM', array( 'name' => 'GAMMA', 'value' => $m[4] ) ) . "\n" |
| 372 | + ) . "\n"; |
| 373 | + return true; |
| 374 | + } |
| 375 | + $line = strtok( "\n" ); |
| 376 | + } |
| 377 | + # Not found |
| 378 | + return false; |
| 379 | + } |
| 380 | +} |
Property changes on: trunk/phase3/includes/media/DjVuImage.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 381 | + native |
Index: trunk/phase3/includes/AutoLoader.php |
— | — | @@ -51,7 +51,7 @@ |
52 | 52 | 'DeferrableUpdate' => 'includes/DeferredUpdates.php', |
53 | 53 | 'DeferredUpdates' => 'includes/DeferredUpdates.php', |
54 | 54 | 'DiffHistoryBlob' => 'includes/HistoryBlob.php', |
55 | | - 'DjVuImage' => 'includes/DjVuImage.php', |
| 55 | + |
56 | 56 | 'DoubleReplacer' => 'includes/StringUtils.php', |
57 | 57 | 'DummyLinker' => 'includes/Linker.php', |
58 | 58 | 'Dump7ZipOutput' => 'includes/Export.php', |
— | — | @@ -560,6 +560,7 @@ |
561 | 561 | 'BitmapHandler_ClientOnly' => 'includes/media/Bitmap_ClientOnly.php', |
562 | 562 | 'BitmapMetadataHandler' => 'includes/media/BitmapMetadataHandler.php', |
563 | 563 | 'BmpHandler' => 'includes/media/BMP.php', |
| 564 | + 'DjVuImage' => 'includes/media/DjVuImage.php', |
564 | 565 | 'DjVuHandler' => 'includes/media/DjVu.php', |
565 | 566 | 'Exif' => 'includes/media/Exif.php', |
566 | 567 | 'FormatExif' => 'includes/media/FormatMetadata.php', |