r57267 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r57266‎ | r57267 | r57268 >
Date:23:35, 1 October 2009
Author:brion
Status:ok
Tags:
Comment:
update PdfHandler from trunk
Modified paths:
  • /branches/wmf-deployment-2009-10-01/extensions/PdfHandler (modified) (history)
  • /branches/wmf-deployment-2009-10-01/extensions/PdfHandler/PdfHandler.i18n.php (modified) (history)
  • /branches/wmf-deployment-2009-10-01/extensions/PdfHandler/PdfHandler.image.php (modified) (history)
  • /branches/wmf-deployment-2009-10-01/extensions/PdfHandler/PdfHandler.php (modified) (history)
  • /branches/wmf-deployment-2009-10-01/extensions/PdfHandler/PdfHandler_body.php (modified) (history)

Diff [purge]

Index: branches/wmf-deployment-2009-10-01/extensions/PdfHandler/PdfHandler_body.php
@@ -202,4 +202,19 @@
203203 $data = $this->getMetaArray( $image );
204204 return PdfImage::getPageSize( $data, $page );
205205 }
 206+
 207+ function getPageText( $image, $page ){
 208+ $data = $this->getMetaArray( $image, true );
 209+ if ( !$data ) {
 210+ return false;
 211+ }
 212+ if( ! isset( $data['text'] ) ) {
 213+ return false;
 214+ }
 215+ if( ! isset( $data['text'][$page-1] ) ) {
 216+ return false;
 217+ }
 218+ return $data['text'][$page-1];
 219+ }
 220+
206221 }
Index: branches/wmf-deployment-2009-10-01/extensions/PdfHandler/PdfHandler.i18n.php
@@ -322,11 +322,12 @@
323323
324324 /** Macedonian (Македонски)
325325 * @author Bjankuloski06
 326+ * @author Brest
326327 */
327328 $messages['mk'] = array(
328 - 'pdf-desc' => 'Помагало за гледање на PDF податотеки во сликовен режим',
329 - 'pdf_no_metadata' => 'Не можам да земам метаподатоци од PDF податотеката',
330 - 'pdf_page_error' => 'Бројот на страницата не е во опсегот',
 329+ 'pdf-desc' => 'Ракувач за прегледување PDF податотеки во сликовен режим',
 330+ 'pdf_no_metadata' => 'Не може да се земат метаподатоци од PDF',
 331+ 'pdf_page_error' => 'Бројот на страница е надвор од опсег',
331332 );
332333
333334 /** Malayalam (മലയാളം)
Index: branches/wmf-deployment-2009-10-01/extensions/PdfHandler/PdfHandler.php
@@ -39,6 +39,7 @@
4040 $wgPdfProcessor = 'gs';
4141 $wgPdfPostProcessor = 'convert';
4242 $wgPdfInfo = 'pdfinfo';
 43+$wgPdftoText = 'pdftotext';
4344
4445 $wgPdfOutputExtension = "jpg";
4546 $wgPdfHandlerDpi = 150;
Index: branches/wmf-deployment-2009-10-01/extensions/PdfHandler/PdfHandler.image.php
@@ -79,7 +79,7 @@
8080 }
8181
8282 public function retrieveMetaData() {
83 - global $wgPdfInfo;
 83+ global $wgPdfInfo, $wgPdftoText;
8484
8585 if ( $wgPdfInfo ) {
8686 wfProfileIn( 'pdfinfo' );
@@ -93,6 +93,25 @@
9494 } else {
9595 $data = null;
9696 }
 97+
 98+ # Read text layer
 99+ if ( isset( $wgPdftoText ) ) {
 100+ wfProfileIn( 'pdftotext' );
 101+ $cmd = wfEscapeShellArg( $wgPdftoText ) . ' '. wfEscapeShellArg( $this->mFilename ) . ' - ';
 102+ wfDebug( __METHOD__.": $cmd\n" );
 103+ $txt = wfShellExec( $cmd, $retval );
 104+ wfProfileOut( 'pdftotext' );
 105+ if( $retval == 0 ) {
 106+ $txt = str_replace( "\r\n", "\n", $txt );
 107+ $pages = explode( "\f", $txt );
 108+ foreach( $pages as $page => $pageText ) {
 109+ # Get rid of invalid UTF-8, strip control characters
 110+ # Note we need to do this per page, as \f page feed would be stripped.
 111+ $pages[$page] = UtfNormal::cleanUp( $pageText );
 112+ }
 113+ $data['text'] = $pages;
 114+ }
 115+ }
97116 return $data;
98117 }
99118
Property changes on: branches/wmf-deployment-2009-10-01/extensions/PdfHandler
___________________________________________________________________
Name: svn:mergeinfo
100119 + /branches/REL1_15/phase3/extensions/PdfHandler:51646
/trunk/extensions/PdfHandler:56151-57266
/trunk/phase3/extensions/PdfHandler:56213,56215-56216,56218,56325,56334-56336,56338,56340,56343,56345,56347,56350

Status & tagging log