Index: trunk/phase3/includes/DefaultSettings.php |
— | — | @@ -3535,6 +3535,13 @@ |
3536 | 3536 | $wgDjvuRenderer = null; |
3537 | 3537 | |
3538 | 3538 | /** |
| 3539 | + * Path of the djvutxt DJVU text extraction utility |
| 3540 | + * Enable this and $wgDjvuDump to enable text layer extraction from djvu files |
| 3541 | + */ |
| 3542 | +# $wgDjvuTxt = 'djvutxt'; |
| 3543 | +$wgDjvuTxt = null; |
| 3544 | + |
| 3545 | +/** |
3539 | 3546 | * Path of the djvutoxml executable |
3540 | 3547 | * This works like djvudump except much, much slower as of version 3.5. |
3541 | 3548 | * |
Index: trunk/phase3/includes/media/DjVu.php |
— | — | @@ -52,6 +52,8 @@ |
53 | 53 | $m = false; |
54 | 54 | if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) { |
55 | 55 | return array( 'width' => $m[2], 'page' => $m[1] ); |
| 56 | + } else if ( preg_match( '/^page(\d+)-djvutxt$/', $str, $m ) ) { |
| 57 | + return array( 'djvutxt' => 1, 'page' => $m[1] ); |
56 | 58 | } else { |
57 | 59 | return false; |
58 | 60 | } |
— | — | @@ -64,8 +66,21 @@ |
65 | 67 | ); |
66 | 68 | } |
67 | 69 | |
| 70 | + function normaliseParams( $image, &$params ) { |
| 71 | + global $wgDjvuTxt; |
| 72 | + if( $params['djvutxt'] && $wgDjvuTxt) { |
| 73 | + if ( !isset( $params['page'] ) ) { |
| 74 | + $params['page'] = 1; |
| 75 | + } |
| 76 | + $params['width'] = 0; |
| 77 | + $params['height'] = 0; |
| 78 | + return true; |
| 79 | + } |
| 80 | + else return parent::normaliseParams( $image, $params ); |
| 81 | + } |
| 82 | + |
68 | 83 | function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) { |
69 | | - global $wgDjvuRenderer, $wgDjvuPostProcessor; |
| 84 | + global $wgDjvuRenderer, $wgDjvuPostProcessor, $wgDjvuTxt; |
70 | 85 | |
71 | 86 | // Fetch XML and check it, to give a more informative error message than the one which |
72 | 87 | // normaliseParams will inevitably give. |
— | — | @@ -94,18 +109,36 @@ |
95 | 110 | return new MediaTransformError( 'thumbnail_error', $width, $height, wfMsg( 'thumbnail_dest_directory' ) ); |
96 | 111 | } |
97 | 112 | |
98 | | - # Use a subshell (brackets) to aggregate stderr from both pipeline commands |
99 | | - # before redirecting it to the overall stdout. This works in both Linux and Windows XP. |
100 | | - $cmd = '(' . wfEscapeShellArg( $wgDjvuRenderer ) . " -format=ppm -page={$page} -size={$width}x{$height} " . |
101 | | - wfEscapeShellArg( $srcPath ); |
102 | | - if ( $wgDjvuPostProcessor ) { |
103 | | - $cmd .= " | {$wgDjvuPostProcessor}"; |
| 113 | + if( $params['djvutxt'] && $wgDjvuTxt ) { |
| 114 | + # Extract djvu text |
| 115 | + $cmd = wfEscapeShellArg( $wgDjvuTxt ) . " --page={$page} " . wfEscapeShellArg( $srcPath ) ; |
| 116 | + wfProfileIn( 'djvutxt' ); |
| 117 | + wfDebug( __METHOD__.": $cmd\n" ); |
| 118 | + $err = wfShellExec( $cmd, $retval ); |
| 119 | + wfProfileOut( 'djvutxt' ); |
| 120 | + # Escape html characters |
| 121 | + $txt = htmlspecialchars( $err ); |
| 122 | + # Write result to file |
| 123 | + if($retval == 0) { |
| 124 | + $f = fopen($dstPath, 'w'); |
| 125 | + fwrite($f, $txt); |
| 126 | + fclose($f); |
| 127 | + } |
104 | 128 | } |
105 | | - $cmd .= ' > ' . wfEscapeShellArg($dstPath) . ') 2>&1'; |
106 | | - wfProfileIn( 'ddjvu' ); |
107 | | - wfDebug( __METHOD__.": $cmd\n" ); |
108 | | - $err = wfShellExec( $cmd, $retval ); |
109 | | - wfProfileOut( 'ddjvu' ); |
| 129 | + else { |
| 130 | + # Use a subshell (brackets) to aggregate stderr from both pipeline commands |
| 131 | + # before redirecting it to the overall stdout. This works in both Linux and Windows XP. |
| 132 | + $cmd = '(' . wfEscapeShellArg( $wgDjvuRenderer ) . " -format=ppm -page={$page} -size={$width}x{$height} " . |
| 133 | + wfEscapeShellArg( $srcPath ); |
| 134 | + if ( $wgDjvuPostProcessor ) { |
| 135 | + $cmd .= " | {$wgDjvuPostProcessor}"; |
| 136 | + } |
| 137 | + $cmd .= ' > ' . wfEscapeShellArg($dstPath) . ') 2>&1'; |
| 138 | + wfProfileIn( 'ddjvu' ); |
| 139 | + wfDebug( __METHOD__.": $cmd\n" ); |
| 140 | + $err = wfShellExec( $cmd, $retval ); |
| 141 | + wfProfileOut( 'ddjvu' ); |
| 142 | + } |
110 | 143 | |
111 | 144 | $removed = $this->removeBadFile( $dstPath, $retval ); |
112 | 145 | if ( $retval != 0 || $removed ) { |