Index: branches/wmf-deployment-2009-10-01/extensions/PdfHandler/PdfHandler_body.php |
— | — | @@ -202,4 +202,19 @@ |
203 | 203 | $data = $this->getMetaArray( $image ); |
204 | 204 | return PdfImage::getPageSize( $data, $page ); |
205 | 205 | } |
| 206 | + |
| 207 | + function getPageText( $image, $page ){ |
| 208 | + $data = $this->getMetaArray( $image, true ); |
| 209 | + if ( !$data ) { |
| 210 | + return false; |
| 211 | + } |
| 212 | + if( ! isset( $data['text'] ) ) { |
| 213 | + return false; |
| 214 | + } |
| 215 | + if( ! isset( $data['text'][$page-1] ) ) { |
| 216 | + return false; |
| 217 | + } |
| 218 | + return $data['text'][$page-1]; |
| 219 | + } |
| 220 | + |
206 | 221 | } |
Index: branches/wmf-deployment-2009-10-01/extensions/PdfHandler/PdfHandler.i18n.php |
— | — | @@ -322,11 +322,12 @@ |
323 | 323 | |
324 | 324 | /** Macedonian (Македонски) |
325 | 325 | * @author Bjankuloski06 |
| 326 | + * @author Brest |
326 | 327 | */ |
327 | 328 | $messages['mk'] = array( |
328 | | - 'pdf-desc' => 'Помагало за гледање на PDF податотеки во сликовен режим', |
329 | | - 'pdf_no_metadata' => 'Не можам да земам метаподатоци од PDF податотеката', |
330 | | - 'pdf_page_error' => 'Бројот на страницата не е во опсегот', |
| 329 | + 'pdf-desc' => 'Ракувач за прегледување PDF податотеки во сликовен режим', |
| 330 | + 'pdf_no_metadata' => 'Не може да се земат метаподатоци од PDF', |
| 331 | + 'pdf_page_error' => 'Бројот на страница е надвор од опсег', |
331 | 332 | ); |
332 | 333 | |
333 | 334 | /** Malayalam (മലയാളം) |
Index: branches/wmf-deployment-2009-10-01/extensions/PdfHandler/PdfHandler.php |
— | — | @@ -39,6 +39,7 @@ |
40 | 40 | $wgPdfProcessor = 'gs'; |
41 | 41 | $wgPdfPostProcessor = 'convert'; |
42 | 42 | $wgPdfInfo = 'pdfinfo'; |
| 43 | +$wgPdftoText = 'pdftotext'; |
43 | 44 | |
44 | 45 | $wgPdfOutputExtension = "jpg"; |
45 | 46 | $wgPdfHandlerDpi = 150; |
Index: branches/wmf-deployment-2009-10-01/extensions/PdfHandler/PdfHandler.image.php |
— | — | @@ -79,7 +79,7 @@ |
80 | 80 | } |
81 | 81 | |
82 | 82 | public function retrieveMetaData() { |
83 | | - global $wgPdfInfo; |
| 83 | + global $wgPdfInfo, $wgPdftoText; |
84 | 84 | |
85 | 85 | if ( $wgPdfInfo ) { |
86 | 86 | wfProfileIn( 'pdfinfo' ); |
— | — | @@ -93,6 +93,25 @@ |
94 | 94 | } else { |
95 | 95 | $data = null; |
96 | 96 | } |
| 97 | + |
| 98 | + # Read text layer |
| 99 | + if ( isset( $wgPdftoText ) ) { |
| 100 | + wfProfileIn( 'pdftotext' ); |
| 101 | + $cmd = wfEscapeShellArg( $wgPdftoText ) . ' '. wfEscapeShellArg( $this->mFilename ) . ' - '; |
| 102 | + wfDebug( __METHOD__.": $cmd\n" ); |
| 103 | + $txt = wfShellExec( $cmd, $retval ); |
| 104 | + wfProfileOut( 'pdftotext' ); |
| 105 | + if( $retval == 0 ) { |
| 106 | + $txt = str_replace( "\r\n", "\n", $txt ); |
| 107 | + $pages = explode( "\f", $txt ); |
| 108 | + foreach( $pages as $page => $pageText ) { |
| 109 | + # Get rid of invalid UTF-8, strip control characters |
| 110 | + # Note we need to do this per page, as \f page feed would be stripped. |
| 111 | + $pages[$page] = UtfNormal::cleanUp( $pageText ); |
| 112 | + } |
| 113 | + $data['text'] = $pages; |
| 114 | + } |
| 115 | + } |
97 | 116 | return $data; |
98 | 117 | } |
99 | 118 | |
Property changes on: branches/wmf-deployment-2009-10-01/extensions/PdfHandler |
___________________________________________________________________ |
Name: svn:mergeinfo |
100 | 119 | + /branches/REL1_15/phase3/extensions/PdfHandler:51646 |
/trunk/extensions/PdfHandler:56151-57266 |
/trunk/phase3/extensions/PdfHandler:56213,56215-56216,56218,56325,56334-56336,56338,56340,56343,56345,56347,56350 |