Index: trunk/extensions/PdfHandler/PdfHandler.image.php |
— | — | @@ -101,14 +101,14 @@ |
102 | 102 | wfDebug( __METHOD__.": $cmd\n" ); |
103 | 103 | $txt = wfShellExec( $cmd, $retval ); |
104 | 104 | wfProfileOut( 'pdftotext' ); |
105 | | - if( $retval == 0) { |
106 | | - # Get rid of invalid UTF-8, strip control characters |
107 | | - wfSuppressWarnings(); |
108 | | - $txt = iconv( "UTF-8","UTF-8//IGNORE", $txt ); |
109 | | - wfRestoreWarnings(); |
110 | | - $txt = preg_replace( "/[\013\035\037]/", "", $txt ); |
111 | | - $txt = htmlspecialchars($txt); |
112 | | - $pages = preg_split("/\f/s", $txt ); |
| 105 | + if( $retval == 0 ) { |
| 106 | + $txt = str_replace( "\r\n", "\n", $txt ); |
| 107 | + $pages = explode( "\f", $txt ); |
| 108 | + foreach( $pages as $page => $pageText ) { |
| 109 | + # Get rid of invalid UTF-8, strip control characters |
| 110 | + # Note we need to do this per page, as \f page feed would be stripped. |
| 111 | + $pages[$page] = UtfNormal::cleanUp( $pageText ); |
| 112 | + } |
113 | 113 | $data['text'] = $pages; |
114 | 114 | } |
115 | 115 | } |