Index: trunk/phase3/includes/media/DjVu.php |
— | — | @@ -135,7 +135,7 @@ |
136 | 136 | /** |
137 | 137 | * Cache a document tree for the DjVu XML metadata |
138 | 138 | */ |
139 | | - function getMetaTree( $image ) { |
| 139 | + function getMetaTree( $image , $gettext = false ) { |
140 | 140 | if ( isset( $image->dejaMetaTree ) ) { |
141 | 141 | return $image->dejaMetaTree; |
142 | 142 | } |
— | — | @@ -149,15 +149,32 @@ |
150 | 150 | |
151 | 151 | wfSuppressWarnings(); |
152 | 152 | try { |
153 | | - $image->dejaMetaTree = new SimpleXMLElement( $metadata ); |
| 153 | + // Set to false rather than null to avoid further attempts |
| 154 | + $image->dejaMetaTree = false; |
| 155 | + $image->djvuTextTree = false; |
| 156 | + $tree = new SimpleXMLElement( $metadata ); |
| 157 | + if( $tree->getName() == 'mw-djvu' ) { |
| 158 | + foreach($tree->children() as $b){ |
| 159 | + if( $b->getName() == 'DjVuTxt' ) { |
| 160 | + $image->djvuTextTree = $b; |
| 161 | + } |
| 162 | + else if ( $b->getName() == 'DjVuXML' ) { |
| 163 | + $image->dejaMetaTree = $b; |
| 164 | + } |
| 165 | + } |
| 166 | + } else { |
| 167 | + $image->dejaMetaTree = $tree; |
| 168 | + } |
154 | 169 | } catch( Exception $e ) { |
155 | 170 | wfDebug( "Bogus multipage XML metadata on '$image->name'\n" ); |
156 | | - // Set to false rather than null to avoid further attempts |
157 | | - $image->dejaMetaTree = false; |
158 | 171 | } |
159 | 172 | wfRestoreWarnings(); |
160 | 173 | wfProfileOut( __METHOD__ ); |
161 | | - return $image->dejaMetaTree; |
| 174 | + if( $gettext ) { |
| 175 | + return $image->djvuTextTree; |
| 176 | + } else { |
| 177 | + return $image->dejaMetaTree; |
| 178 | + } |
162 | 179 | } |
163 | 180 | |
164 | 181 | function getImageSize( $image, $path ) { |
— | — | @@ -211,4 +228,21 @@ |
212 | 229 | return false; |
213 | 230 | } |
214 | 231 | } |
| 232 | + |
| 233 | + function getPageText( $image, $page ){ |
| 234 | + $tree = $this->getMetaTree( $image, true ); |
| 235 | + if ( !$tree ) { |
| 236 | + return false; |
| 237 | + } |
| 238 | + |
| 239 | + $o = $tree->BODY[0]->PAGE[$page-1]; |
| 240 | + if ( $o ) { |
| 241 | + $txt = $o['value']; |
| 242 | + return $txt; |
| 243 | + } else { |
| 244 | + return false; |
| 245 | + } |
| 246 | + |
| 247 | + } |
| 248 | + |
215 | 249 | } |
Index: trunk/phase3/includes/DjVuImage.php |
— | — | @@ -224,7 +224,7 @@ |
225 | 225 | * @return string |
226 | 226 | */ |
227 | 227 | function retrieveMetaData() { |
228 | | - global $wgDjvuToXML, $wgDjvuDump; |
| 228 | + global $wgDjvuToXML, $wgDjvuDump, $wgDjvuTxt; |
229 | 229 | if ( isset( $wgDjvuDump ) ) { |
230 | 230 | # djvudump is faster as of version 3.5 |
231 | 231 | # http://sourceforge.net/tracker/index.php?func=detail&aid=1704049&group_id=32953&atid=406583 |
— | — | @@ -242,6 +242,22 @@ |
243 | 243 | } else { |
244 | 244 | $xml = null; |
245 | 245 | } |
| 246 | + # Text layer |
| 247 | + if ( isset( $wgDjvuTxt ) ) { |
| 248 | + wfProfileIn( 'djvutxt' ); |
| 249 | + $cmd = wfEscapeShellArg( $wgDjvuTxt ) . ' --detail=page ' . wfEscapeShellArg( $this->mFilename ) ; |
| 250 | + wfDebug( __METHOD__.": $cmd\n" ); |
| 251 | + $txt = wfShellExec( $cmd, $retval ); |
| 252 | + wfProfileOut( 'djvutxt' ); |
| 253 | + if( $retval == 0) { |
| 254 | + $txt = htmlspecialchars($txt); |
| 255 | + $txt = preg_replace( "/\(page\s\d*\s\d*\s\d*\s\d*\s*\"(.*?)\"\s*\)/s", "<PAGE value=\"$1\" />", $txt ); |
| 256 | + $txt = preg_replace( "/\(\)/", "<PAGE value=\"\" />", $txt ); |
| 257 | + $txt = "<DjVuTxt>\n<HEAD></HEAD>\n<BODY>\n" . $txt . "</BODY>\n</DjVuTxt>\n"; |
| 258 | + $xml = preg_replace( "/<DjVuXML>/", "<mw-djvu><DjVuXML>", $xml ); |
| 259 | + $xml = $xml . $txt. '</mw-djvu>' ; |
| 260 | + } |
| 261 | + } |
246 | 262 | return $xml; |
247 | 263 | } |
248 | 264 | |
Index: trunk/extensions/ProofreadPage/ProofreadPage.php |
— | — | @@ -741,14 +741,12 @@ |
742 | 742 | |
743 | 743 | $image = wfFindFile( $imageTitle ); |
744 | 744 | if ( $image && $image->exists() && $image->getMimeType() == 'image/vnd.djvu' ) { |
745 | | - $name = $image->thumbName( array( 'width' => '##WIDTH##', 'page' => $m[2] ) ); |
746 | | - $name = str_replace( '##WIDTH##px', 'djvutxt', $name ); |
747 | | - $name = str_replace( '.jpg', '.txt', $name ); |
748 | | - $url = $image->getThumbUrl( $name ); |
749 | | - |
750 | | - if ( $url[0] == '/' ) $url = "http://localhost" . $url; |
751 | | - $text = Http::get( $url ); |
752 | | - if ( $text ) $textbox1 = $text; |
| 745 | + $text = $image->handler->getPageText($image, $m[2]); |
| 746 | + if ( $text ) { |
| 747 | + $text = preg_replace( "/(\\\\n)/", "\n", $text ); |
| 748 | + $text = preg_replace( "/(\\\\\d*)/", "", $text ); |
| 749 | + $textbox1 = $text; |
| 750 | + } |
753 | 751 | } |
754 | 752 | } |
755 | 753 | return true; |