Index: trunk/phase3/includes/parser/Tidy.php |
— | — | @@ -40,6 +40,7 @@ |
41 | 41 | $this->mUniqPrefix = "\x7fUNIQ" . |
42 | 42 | dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) ); |
43 | 43 | $this->mMarkerIndex = 0; |
| 44 | + |
44 | 45 | $wrappedtext = preg_replace_callback( ParserOutput::EDITSECTION_REGEX, |
45 | 46 | array( &$this, 'replaceEditSectionLinksCallback' ), $text ); |
46 | 47 | |
— | — | @@ -82,7 +83,6 @@ |
83 | 84 | * @ingroup Parser |
84 | 85 | */ |
85 | 86 | class MWTidy { |
86 | | - |
87 | 87 | /** |
88 | 88 | * Interface with html tidy, used if $wgUseTidy = true. |
89 | 89 | * If tidy isn't able to correct the markup, the original will be |
— | — | @@ -97,12 +97,17 @@ |
98 | 98 | $wrapper = new MWTidyWrapper; |
99 | 99 | $wrappedtext = $wrapper->getWrapped( $text ); |
100 | 100 | |
101 | | - if( $wgTidyInternal ) { |
102 | | - $correctedtext = self::execInternalTidy( $wrappedtext ); |
| 101 | + $retVal = null; |
| 102 | + if ( $wgTidyInternal ) { |
| 103 | + $correctedtext = self::execInternalTidy( $wrappedtext, false, $retVal ); |
103 | 104 | } else { |
104 | | - $correctedtext = self::execExternalTidy( $wrappedtext ); |
| 105 | + $correctedtext = self::execExternalTidy( $wrappedtext, false, $retVal ); |
105 | 106 | } |
106 | | - if( is_null( $correctedtext ) ) { |
| 107 | + |
| 108 | + if ( $retVal < 0 ) { |
| 109 | + wfDebug( "Possible tidy configuration error!\n" ); |
| 110 | + return $text . "\n<!-- Tidy was unable to run -->\n"; |
| 111 | + } elseif ( is_null( $correctedtext ) ) { |
107 | 112 | wfDebug( "Tidy error detected!\n" ); |
108 | 113 | return $text . "\n<!-- Tidy found serious XHTML errors -->\n"; |
109 | 114 | } |
— | — | @@ -132,6 +137,7 @@ |
133 | 138 | } else { |
134 | 139 | $errorStr = self::execExternalTidy( $text, true, $retval ); |
135 | 140 | } |
| 141 | + |
136 | 142 | return ( $retval < 0 && $errorStr == '' ) || $retval == 0; |
137 | 143 | } |
138 | 144 | |
— | — | @@ -140,7 +146,7 @@ |
141 | 147 | * Also called in OutputHandler.php for full page validation |
142 | 148 | * |
143 | 149 | * @param $text String: HTML to check |
144 | | - * @param $stderr Boolean: Whether to read from STDERR rather than STDOUT |
| 150 | + * @param $stderr Boolean: Whether to read result from STDERR rather than STDOUT |
145 | 151 | * @param &$retval Exit code (-1 on internal error) |
146 | 152 | * @return mixed String or null |
147 | 153 | */ |
— | — | @@ -151,7 +157,7 @@ |
152 | 158 | $cleansource = ''; |
153 | 159 | $opts = ' -utf8'; |
154 | 160 | |
155 | | - if( $stderr ) { |
| 161 | + if ( $stderr ) { |
156 | 162 | $descriptorspec = array( |
157 | 163 | 0 => array( 'pipe', 'r' ), |
158 | 164 | 1 => array( 'file', wfGetNull(), 'a' ), |
— | — | @@ -168,79 +174,82 @@ |
169 | 175 | $readpipe = $stderr ? 2 : 1; |
170 | 176 | $pipes = array(); |
171 | 177 | |
172 | | - if( function_exists( 'proc_open' ) ) { |
173 | | - $process = proc_open( "$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes ); |
174 | | - if ( is_resource( $process ) ) { |
175 | | - // Theoretically, this style of communication could cause a deadlock |
176 | | - // here. If the stdout buffer fills up, then writes to stdin could |
177 | | - // block. This doesn't appear to happen with tidy, because tidy only |
178 | | - // writes to stdout after it's finished reading from stdin. Search |
179 | | - // for tidyParseStdin and tidySaveStdout in console/tidy.c |
180 | | - fwrite( $pipes[0], $text ); |
181 | | - fclose( $pipes[0] ); |
182 | | - while ( !feof( $pipes[$readpipe] ) ) { |
183 | | - $cleansource .= fgets( $pipes[$readpipe], 1024 ); |
184 | | - } |
185 | | - fclose( $pipes[$readpipe] ); |
186 | | - $retval = proc_close( $process ); |
187 | | - } else { |
188 | | - $retval = -1; |
| 178 | + $process = proc_open( |
| 179 | + "$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes ); |
| 180 | + |
| 181 | + if ( is_resource( $process ) ) { |
| 182 | + // Theoretically, this style of communication could cause a deadlock |
| 183 | + // here. If the stdout buffer fills up, then writes to stdin could |
| 184 | + // block. This doesn't appear to happen with tidy, because tidy only |
| 185 | + // writes to stdout after it's finished reading from stdin. Search |
| 186 | + // for tidyParseStdin and tidySaveStdout in console/tidy.c |
| 187 | + fwrite( $pipes[0], $text ); |
| 188 | + fclose( $pipes[0] ); |
| 189 | + while ( !feof( $pipes[$readpipe] ) ) { |
| 190 | + $cleansource .= fgets( $pipes[$readpipe], 1024 ); |
189 | 191 | } |
| 192 | + fclose( $pipes[$readpipe] ); |
| 193 | + $retval = proc_close( $process ); |
190 | 194 | } else { |
191 | | - $retval = -1; |
| 195 | + wfWarn( "Unable to start external tidy process" ); |
| 196 | + $retval = -1; |
192 | 197 | } |
193 | 198 | |
194 | | - if( !$stderr && $cleansource == '' && $text != '' ) { |
| 199 | + if ( !$stderr && $cleansource == '' && $text != '' ) { |
195 | 200 | // Some kind of error happened, so we couldn't get the corrected text. |
196 | 201 | // Just give up; we'll use the source text and append a warning. |
197 | 202 | $cleansource = null; |
198 | 203 | } |
| 204 | + |
199 | 205 | wfProfileOut( __METHOD__ ); |
200 | 206 | return $cleansource; |
201 | 207 | } |
202 | 208 | |
203 | 209 | /** |
204 | | - * Use the HTML tidy PECL extension to use the tidy library in-process, |
| 210 | + * Use the HTML tidy extension to use the tidy library in-process, |
205 | 211 | * saving the overhead of spawning a new process. |
206 | 212 | * |
207 | | - * 'pear install tidy' should be able to compile the extension module. |
208 | | - * |
209 | | - * @param $text |
210 | | - * @param $stderr |
211 | | - * @param $retval |
212 | | - * |
213 | | - * @return string |
| 213 | + * @param $text String: HTML to check |
| 214 | + * @param $stderr Boolean: Whether to read result from error status instead of output |
| 215 | + * @param &$retval Exit code (-1 on internal error) |
| 216 | + * @return mixed String or null |
214 | 217 | */ |
215 | 218 | private static function execInternalTidy( $text, $stderr = false, &$retval = null ) { |
216 | 219 | global $wgTidyConf, $wgDebugTidy; |
217 | 220 | wfProfileIn( __METHOD__ ); |
218 | 221 | |
| 222 | + if ( !MWInit::classExists( 'tidy' ) ) { |
| 223 | + wfWarn( "Unable to load internal tidy class." ); |
| 224 | + $retval = -1; |
| 225 | + return null; |
| 226 | + } |
| 227 | + |
219 | 228 | $tidy = new tidy; |
220 | 229 | $tidy->parseString( $text, $wgTidyConf, 'utf8' ); |
221 | 230 | |
222 | | - if( $stderr ) { |
| 231 | + if ( $stderr ) { |
223 | 232 | $retval = $tidy->getStatus(); |
| 233 | + |
224 | 234 | wfProfileOut( __METHOD__ ); |
225 | 235 | return $tidy->errorBuffer; |
226 | 236 | } else { |
227 | 237 | $tidy->cleanRepair(); |
228 | 238 | $retval = $tidy->getStatus(); |
229 | | - if( $retval == 2 ) { |
| 239 | + if ( $retval == 2 ) { |
230 | 240 | // 2 is magic number for fatal error |
231 | 241 | // http://www.php.net/manual/en/function.tidy-get-status.php |
232 | 242 | $cleansource = null; |
233 | 243 | } else { |
234 | 244 | $cleansource = tidy_get_output( $tidy ); |
| 245 | + if ( $wgDebugTidy && $retval > 0 ) { |
| 246 | + $cleansource .= "<!--\nTidy reports:\n" . |
| 247 | + str_replace( '-->', '-->', $tidy->errorBuffer ) . |
| 248 | + "\n-->"; |
| 249 | + } |
235 | 250 | } |
236 | | - if ( $wgDebugTidy && $retval > 0 ) { |
237 | | - $cleansource .= "<!--\nTidy reports:\n" . |
238 | | - str_replace( '-->', '-->', $tidy->errorBuffer ) . |
239 | | - "\n-->"; |
240 | | - } |
241 | 251 | |
242 | 252 | wfProfileOut( __METHOD__ ); |
243 | 253 | return $cleansource; |
244 | 254 | } |
245 | 255 | } |
246 | | - |
247 | 256 | } |
Index: trunk/phase3/includes/DefaultSettings.php |
— | — | @@ -2856,8 +2856,9 @@ |
2857 | 2857 | * - $wgTidyBin should be set to the path of the binary and |
2858 | 2858 | * - $wgTidyConf to the path of the configuration file. |
2859 | 2859 | * - $wgTidyOpts can include any number of parameters. |
2860 | | - * - $wgTidyInternal controls the use of the PECL extension to use an in- |
2861 | | - * process tidy library instead of spawning a separate program. |
| 2860 | + * - $wgTidyInternal controls the use of the PECL extension or the |
| 2861 | + * libtidy (PHP >= 5) extension to use an in-process tidy library instead |
| 2862 | + * of spawning a separate program. |
2862 | 2863 | * Normally you shouldn't need to override the setting except for |
2863 | 2864 | * debugging. To install, use 'pear install tidy' and add a line |
2864 | 2865 | * 'extension=tidy.so' to php.ini. |