r51458 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r51457‎ | r51458 | r51459 >
Date:09:16, 4 June 2009
Author:thomasv
Status:ok
Tags:
Comment:
store djvu text layer in img_metadata. fetch it in proofreadpage
Modified paths:
  • /trunk/extensions/ProofreadPage/ProofreadPage.php (modified) (history)
  • /trunk/phase3/includes/DjVuImage.php (modified) (history)
  • /trunk/phase3/includes/media/DjVu.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/media/DjVu.php
@@ -135,7 +135,7 @@
136136 /**
137137 * Cache a document tree for the DjVu XML metadata
138138 */
139 - function getMetaTree( $image ) {
 139+ function getMetaTree( $image , $gettext = false ) {
140140 if ( isset( $image->dejaMetaTree ) ) {
141141 return $image->dejaMetaTree;
142142 }
@@ -149,15 +149,32 @@
150150
151151 wfSuppressWarnings();
152152 try {
153 - $image->dejaMetaTree = new SimpleXMLElement( $metadata );
 153+ // Set to false rather than null to avoid further attempts
 154+ $image->dejaMetaTree = false;
 155+ $image->djvuTextTree = false;
 156+ $tree = new SimpleXMLElement( $metadata );
 157+ if( $tree->getName() == 'mw-djvu' ) {
 158+ foreach($tree->children() as $b){
 159+ if( $b->getName() == 'DjVuTxt' ) {
 160+ $image->djvuTextTree = $b;
 161+ }
 162+ else if ( $b->getName() == 'DjVuXML' ) {
 163+ $image->dejaMetaTree = $b;
 164+ }
 165+ }
 166+ } else {
 167+ $image->dejaMetaTree = $tree;
 168+ }
154169 } catch( Exception $e ) {
155170 wfDebug( "Bogus multipage XML metadata on '$image->name'\n" );
156 - // Set to false rather than null to avoid further attempts
157 - $image->dejaMetaTree = false;
158171 }
159172 wfRestoreWarnings();
160173 wfProfileOut( __METHOD__ );
161 - return $image->dejaMetaTree;
 174+ if( $gettext ) {
 175+ return $image->djvuTextTree;
 176+ } else {
 177+ return $image->dejaMetaTree;
 178+ }
162179 }
163180
164181 function getImageSize( $image, $path ) {
@@ -211,4 +228,21 @@
212229 return false;
213230 }
214231 }
 232+
 233+ function getPageText( $image, $page ){
 234+ $tree = $this->getMetaTree( $image, true );
 235+ if ( !$tree ) {
 236+ return false;
 237+ }
 238+
 239+ $o = $tree->BODY[0]->PAGE[$page-1];
 240+ if ( $o ) {
 241+ $txt = $o['value'];
 242+ return $txt;
 243+ } else {
 244+ return false;
 245+ }
 246+
 247+ }
 248+
215249 }
Index: trunk/phase3/includes/DjVuImage.php
@@ -224,7 +224,7 @@
225225 * @return string
226226 */
227227 function retrieveMetaData() {
228 - global $wgDjvuToXML, $wgDjvuDump;
 228+ global $wgDjvuToXML, $wgDjvuDump, $wgDjvuTxt;
229229 if ( isset( $wgDjvuDump ) ) {
230230 # djvudump is faster as of version 3.5
231231 # http://sourceforge.net/tracker/index.php?func=detail&aid=1704049&group_id=32953&atid=406583
@@ -242,6 +242,22 @@
243243 } else {
244244 $xml = null;
245245 }
 246+ # Text layer
 247+ if ( isset( $wgDjvuTxt ) ) {
 248+ wfProfileIn( 'djvutxt' );
 249+ $cmd = wfEscapeShellArg( $wgDjvuTxt ) . ' --detail=page ' . wfEscapeShellArg( $this->mFilename ) ;
 250+ wfDebug( __METHOD__.": $cmd\n" );
 251+ $txt = wfShellExec( $cmd, $retval );
 252+ wfProfileOut( 'djvutxt' );
 253+ if( $retval == 0) {
 254+ $txt = htmlspecialchars($txt);
 255+ $txt = preg_replace( "/\(page\s\d*\s\d*\s\d*\s\d*\s*\&quot;(.*?)\&quot;\s*\)/s", "<PAGE value=\"$1\" />", $txt );
 256+ $txt = preg_replace( "/\(\)/", "<PAGE value=\"\" />", $txt );
 257+ $txt = "<DjVuTxt>\n<HEAD></HEAD>\n<BODY>\n" . $txt . "</BODY>\n</DjVuTxt>\n";
 258+ $xml = preg_replace( "/<DjVuXML>/", "<mw-djvu><DjVuXML>", $xml );
 259+ $xml = $xml . $txt. '</mw-djvu>' ;
 260+ }
 261+ }
246262 return $xml;
247263 }
248264
Index: trunk/extensions/ProofreadPage/ProofreadPage.php
@@ -741,14 +741,12 @@
742742
743743 $image = wfFindFile( $imageTitle );
744744 if ( $image && $image->exists() && $image->getMimeType() == 'image/vnd.djvu' ) {
745 - $name = $image->thumbName( array( 'width' => '##WIDTH##', 'page' => $m[2] ) );
746 - $name = str_replace( '##WIDTH##px', 'djvutxt', $name );
747 - $name = str_replace( '.jpg', '.txt', $name );
748 - $url = $image->getThumbUrl( $name );
749 -
750 - if ( $url[0] == '/' ) $url = "http://localhost" . $url;
751 - $text = Http::get( $url );
752 - if ( $text ) $textbox1 = $text;
 745+ $text = $image->handler->getPageText($image, $m[2]);
 746+ if ( $text ) {
 747+ $text = preg_replace( "/(\\\\n)/", "\n", $text );
 748+ $text = preg_replace( "/(\\\\\d*)/", "", $text );
 749+ $textbox1 = $text;
 750+ }
753751 }
754752 }
755753 return true;

Status & tagging log