r92106 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r92105‎ | r92106 | r92107 >
Date:22:10, 13 July 2011
Author:aaron
Status:resolved
Tags:
Comment:
* Removed function_exists check in MWTidy, proc_open is on any PHP at out minimum PHP version
* Show warnings if we can't run tidy for some reason (bug 27889)
* Moved $wgDebugTidy check (changing the value from null would break MWTidy::tidy())
* Fixed some outdated comment
* Made a few w/s changes
Modified paths:
  • /trunk/phase3/includes/DefaultSettings.php (modified) (history)
  • /trunk/phase3/includes/parser/Tidy.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/parser/Tidy.php
@@ -40,6 +40,7 @@
4141 $this->mUniqPrefix = "\x7fUNIQ" .
4242 dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) );
4343 $this->mMarkerIndex = 0;
 44+
4445 $wrappedtext = preg_replace_callback( ParserOutput::EDITSECTION_REGEX,
4546 array( &$this, 'replaceEditSectionLinksCallback' ), $text );
4647
@@ -82,7 +83,6 @@
8384 * @ingroup Parser
8485 */
8586 class MWTidy {
86 -
8787 /**
8888 * Interface with html tidy, used if $wgUseTidy = true.
8989 * If tidy isn't able to correct the markup, the original will be
@@ -97,12 +97,17 @@
9898 $wrapper = new MWTidyWrapper;
9999 $wrappedtext = $wrapper->getWrapped( $text );
100100
101 - if( $wgTidyInternal ) {
102 - $correctedtext = self::execInternalTidy( $wrappedtext );
 101+ $retVal = null;
 102+ if ( $wgTidyInternal ) {
 103+ $correctedtext = self::execInternalTidy( $wrappedtext, false, $retVal );
103104 } else {
104 - $correctedtext = self::execExternalTidy( $wrappedtext );
 105+ $correctedtext = self::execExternalTidy( $wrappedtext, false, $retVal );
105106 }
106 - if( is_null( $correctedtext ) ) {
 107+
 108+ if ( $retVal < 0 ) {
 109+ wfDebug( "Possible tidy configuration error!\n" );
 110+ return $text . "\n<!-- Tidy was unable to run -->\n";
 111+ } elseif ( is_null( $correctedtext ) ) {
107112 wfDebug( "Tidy error detected!\n" );
108113 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
109114 }
@@ -132,6 +137,7 @@
133138 } else {
134139 $errorStr = self::execExternalTidy( $text, true, $retval );
135140 }
 141+
136142 return ( $retval < 0 && $errorStr == '' ) || $retval == 0;
137143 }
138144
@@ -140,7 +146,7 @@
141147 * Also called in OutputHandler.php for full page validation
142148 *
143149 * @param $text String: HTML to check
144 - * @param $stderr Boolean: Whether to read from STDERR rather than STDOUT
 150+ * @param $stderr Boolean: Whether to read result from STDERR rather than STDOUT
145151 * @param &$retval Exit code (-1 on internal error)
146152 * @return mixed String or null
147153 */
@@ -151,7 +157,7 @@
152158 $cleansource = '';
153159 $opts = ' -utf8';
154160
155 - if( $stderr ) {
 161+ if ( $stderr ) {
156162 $descriptorspec = array(
157163 0 => array( 'pipe', 'r' ),
158164 1 => array( 'file', wfGetNull(), 'a' ),
@@ -168,79 +174,82 @@
169175 $readpipe = $stderr ? 2 : 1;
170176 $pipes = array();
171177
172 - if( function_exists( 'proc_open' ) ) {
173 - $process = proc_open( "$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes );
174 - if ( is_resource( $process ) ) {
175 - // Theoretically, this style of communication could cause a deadlock
176 - // here. If the stdout buffer fills up, then writes to stdin could
177 - // block. This doesn't appear to happen with tidy, because tidy only
178 - // writes to stdout after it's finished reading from stdin. Search
179 - // for tidyParseStdin and tidySaveStdout in console/tidy.c
180 - fwrite( $pipes[0], $text );
181 - fclose( $pipes[0] );
182 - while ( !feof( $pipes[$readpipe] ) ) {
183 - $cleansource .= fgets( $pipes[$readpipe], 1024 );
184 - }
185 - fclose( $pipes[$readpipe] );
186 - $retval = proc_close( $process );
187 - } else {
188 - $retval = -1;
 178+ $process = proc_open(
 179+ "$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes );
 180+
 181+ if ( is_resource( $process ) ) {
 182+ // Theoretically, this style of communication could cause a deadlock
 183+ // here. If the stdout buffer fills up, then writes to stdin could
 184+ // block. This doesn't appear to happen with tidy, because tidy only
 185+ // writes to stdout after it's finished reading from stdin. Search
 186+ // for tidyParseStdin and tidySaveStdout in console/tidy.c
 187+ fwrite( $pipes[0], $text );
 188+ fclose( $pipes[0] );
 189+ while ( !feof( $pipes[$readpipe] ) ) {
 190+ $cleansource .= fgets( $pipes[$readpipe], 1024 );
189191 }
 192+ fclose( $pipes[$readpipe] );
 193+ $retval = proc_close( $process );
190194 } else {
191 - $retval = -1;
 195+ wfWarn( "Unable to start external tidy process" );
 196+ $retval = -1;
192197 }
193198
194 - if( !$stderr && $cleansource == '' && $text != '' ) {
 199+ if ( !$stderr && $cleansource == '' && $text != '' ) {
195200 // Some kind of error happened, so we couldn't get the corrected text.
196201 // Just give up; we'll use the source text and append a warning.
197202 $cleansource = null;
198203 }
 204+
199205 wfProfileOut( __METHOD__ );
200206 return $cleansource;
201207 }
202208
203209 /**
204 - * Use the HTML tidy PECL extension to use the tidy library in-process,
 210+ * Use the HTML tidy extension to use the tidy library in-process,
205211 * saving the overhead of spawning a new process.
206212 *
207 - * 'pear install tidy' should be able to compile the extension module.
208 - *
209 - * @param $text
210 - * @param $stderr
211 - * @param $retval
212 - *
213 - * @return string
 213+ * @param $text String: HTML to check
 214+ * @param $stderr Boolean: Whether to read result from error status instead of output
 215+ * @param &$retval Exit code (-1 on internal error)
 216+ * @return mixed String or null
214217 */
215218 private static function execInternalTidy( $text, $stderr = false, &$retval = null ) {
216219 global $wgTidyConf, $wgDebugTidy;
217220 wfProfileIn( __METHOD__ );
218221
 222+ if ( !MWInit::classExists( 'tidy' ) ) {
 223+ wfWarn( "Unable to load internal tidy class." );
 224+ $retval = -1;
 225+ return null;
 226+ }
 227+
219228 $tidy = new tidy;
220229 $tidy->parseString( $text, $wgTidyConf, 'utf8' );
221230
222 - if( $stderr ) {
 231+ if ( $stderr ) {
223232 $retval = $tidy->getStatus();
 233+
224234 wfProfileOut( __METHOD__ );
225235 return $tidy->errorBuffer;
226236 } else {
227237 $tidy->cleanRepair();
228238 $retval = $tidy->getStatus();
229 - if( $retval == 2 ) {
 239+ if ( $retval == 2 ) {
230240 // 2 is magic number for fatal error
231241 // http://www.php.net/manual/en/function.tidy-get-status.php
232242 $cleansource = null;
233243 } else {
234244 $cleansource = tidy_get_output( $tidy );
 245+ if ( $wgDebugTidy && $retval > 0 ) {
 246+ $cleansource .= "<!--\nTidy reports:\n" .
 247+ str_replace( '-->', '--&gt;', $tidy->errorBuffer ) .
 248+ "\n-->";
 249+ }
235250 }
236 - if ( $wgDebugTidy && $retval > 0 ) {
237 - $cleansource .= "<!--\nTidy reports:\n" .
238 - str_replace( '-->', '--&gt;', $tidy->errorBuffer ) .
239 - "\n-->";
240 - }
241251
242252 wfProfileOut( __METHOD__ );
243253 return $cleansource;
244254 }
245255 }
246 -
247256 }
Index: trunk/phase3/includes/DefaultSettings.php
@@ -2856,8 +2856,9 @@
28572857 * - $wgTidyBin should be set to the path of the binary and
28582858 * - $wgTidyConf to the path of the configuration file.
28592859 * - $wgTidyOpts can include any number of parameters.
2860 - * - $wgTidyInternal controls the use of the PECL extension to use an in-
2861 - * process tidy library instead of spawning a separate program.
 2860+ * - $wgTidyInternal controls the use of the PECL extension or the
 2861+ * libtidy (PHP >= 5) extension to use an in-process tidy library instead
 2862+ * of spawning a separate program.
28622863 * Normally you shouldn't need to override the setting except for
28632864 * debugging. To install, use 'pear install tidy' and add a line
28642865 * 'extension=tidy.so' to php.ini.

Follow-up revisions

RevisionCommit summaryAuthorDate
r92552Profiling errors. r92106 missed a wfProfileOut...platonides19:05, 19 July 2011

Status & tagging log