Index: trunk/phase3/includes/media/DjVu.php |
— | — | @@ -52,6 +52,8 @@ |
53 | 53 | $m = false; |
54 | 54 | if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) { |
55 | 55 | return array( 'width' => $m[2], 'page' => $m[1] ); |
| 56 | + } else if ( preg_match( '/^page(\d+)-djvutxt$/', $str, $m ) ) { |
| 57 | + return array( 'djvutxt' => 1, 'page' => $m[1] ); |
56 | 58 | } else { |
57 | 59 | return false; |
58 | 60 | } |
— | — | @@ -64,8 +66,22 @@ |
65 | 67 | ); |
66 | 68 | } |
67 | 69 | |
| 70 | + |
| 71 | + function normaliseParams( $image, &$params ) { |
| 72 | + global $wgDjvuTxt; |
| 73 | + if( $params['djvutxt'] && $wgDjvuTxt) { |
| 74 | + if ( !isset( $params['page'] ) ) { |
| 75 | + $params['page'] = 1; |
| 76 | + } |
| 77 | + $params['width'] = 0; |
| 78 | + $params['height'] = 0; |
| 79 | + return true; |
| 80 | + } |
| 81 | + else return parent::normaliseParams( $image, $params ); |
| 82 | + } |
| 83 | + |
68 | 84 | function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) { |
69 | | - global $wgDjvuRenderer, $wgDjvuPostProcessor; |
| 85 | + global $wgDjvuRenderer, $wgDjvuPostProcessor, $wgDjvuTxt, $wgSed; |
70 | 86 | |
71 | 87 | // Fetch XML and check it, to give a more informative error message than the one which |
72 | 88 | // normaliseParams will inevitably give. |
— | — | @@ -96,12 +112,22 @@ |
97 | 113 | |
98 | 114 | # Use a subshell (brackets) to aggregate stderr from both pipeline commands |
99 | 115 | # before redirecting it to the overall stdout. This works in both Linux and Windows XP. |
100 | | - $cmd = '(' . wfEscapeShellArg( $wgDjvuRenderer ) . " -format=ppm -page={$page} -size={$width}x{$height} " . |
101 | | - wfEscapeShellArg( $srcPath ); |
102 | | - if ( $wgDjvuPostProcessor ) { |
103 | | - $cmd .= " | {$wgDjvuPostProcessor}"; |
| 116 | + |
| 117 | + if( $params['djvutxt'] && $wgDjvuTxt && $wgSed ) { |
| 118 | + #Read text from djvu |
| 119 | + $cmd = '(' . wfEscapeShellArg( $wgDjvuTxt ) . " --page={$page} " . wfEscapeShellArg( $srcPath ); |
| 120 | + #Escape < > & characters |
| 121 | + $cmd .= ' | ' . wfEscapeShellArg( $wgSed ) . ' "s/\&/\&/g ; s/</\</g ; s/>/\>/g ; s/\"/\"/g "'; |
| 122 | + $cmd .= ' > ' . wfEscapeShellArg($dstPath) . ') 2>&1'; |
104 | 123 | } |
105 | | - $cmd .= ' > ' . wfEscapeShellArg($dstPath) . ') 2>&1'; |
| 124 | + else { |
| 125 | + $cmd = '(' . wfEscapeShellArg( $wgDjvuRenderer ) . " -format=ppm -page={$page} -size={$width}x{$height} " . |
| 126 | + wfEscapeShellArg( $srcPath ); |
| 127 | + if ( $wgDjvuPostProcessor ) { |
| 128 | + $cmd .= " | {$wgDjvuPostProcessor}"; |
| 129 | + } |
| 130 | + $cmd .= ' > ' . wfEscapeShellArg($dstPath) . ') 2>&1'; |
| 131 | + } |
106 | 132 | wfProfileIn( 'ddjvu' ); |
107 | 133 | wfDebug( __METHOD__.": $cmd\n" ); |
108 | 134 | $err = wfShellExec( $cmd, $retval ); |
Index: trunk/phase3/includes/DefaultSettings.php |
— | — | @@ -1898,6 +1898,11 @@ |
1899 | 1899 | $wgDiff = '/usr/bin/diff'; |
1900 | 1900 | |
1901 | 1901 | /** |
| 1902 | + * Path to the GNU sed utility. |
| 1903 | + */ |
| 1904 | +$wgSed = '/bin/sed'; |
| 1905 | + |
| 1906 | +/** |
1902 | 1907 | * We can also compress text stored in the 'text' table. If this is set on, new |
1903 | 1908 | * revisions will be compressed on page save if zlib support is available. Any |
1904 | 1909 | * compressed revisions will be decompressed on load regardless of this setting |
— | — | @@ -3533,6 +3538,13 @@ |
3534 | 3539 | $wgDjvuRenderer = null; |
3535 | 3540 | |
3536 | 3541 | /** |
| 3542 | + * Path of the djvutxt DJVU text extraction utility |
| 3543 | + * Enable this and $wgDjvuDump to enable text layer extraction from djvu files |
| 3544 | + */ |
| 3545 | +# $wgDjvuTxt = 'djvutxt'; |
| 3546 | +$wgDjvuTxt = null; |
| 3547 | + |
| 3548 | +/** |
3537 | 3549 | * Path of the djvutoxml executable |
3538 | 3550 | * This works like djvudump except much, much slower as of version 3.5. |
3539 | 3551 | * |