Index: trunk/phase3/includes/OutputHandler.php |
— | — | @@ -123,48 +123,10 @@ |
124 | 124 | * Replace the output with an error if the HTML is not valid |
125 | 125 | */ |
126 | 126 | function wfHtmlValidationHandler( $s ) { |
127 | | - global $IP, $wgTidyInternal, $wgTidyConf; |
128 | | - if ( $wgTidyInternal ) { |
129 | | - $tidy = new tidy; |
130 | | - |
131 | | - $tidy->parseString( $s, $wgTidyConf, 'utf8' ); |
132 | | - if ( $tidy->getStatus() == 0 ) { |
133 | | - return $s; |
134 | | - } |
135 | 127 | |
136 | | - $errors = $tidy->errorBuffer; |
137 | | - } else { |
138 | | - // Copied from Parser::externalTidy(); |
139 | | - global $wgTidyBin, $wgTidyOpts; |
140 | | - |
141 | | - $cleansource = ''; |
142 | | - $opts = ' -utf8'; |
143 | | - |
144 | | - $descriptorspec = array( |
145 | | - 0 => array( 'pipe', 'r' ), |
146 | | - 1 => array( 'file', wfGetNull(), 'a' ), |
147 | | - 2 => array( 'pipe', 'w' ) |
148 | | - ); |
149 | | - $pipes = array(); |
150 | | - if( function_exists( 'proc_open' ) ) { |
151 | | - $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes ); |
152 | | - if ( is_resource( $process ) ) { |
153 | | - fwrite( $pipes[0], $s ); |
154 | | - fclose( $pipes[0] ); |
155 | | - while( !feof( $pipes[2] ) ) { |
156 | | - $errors .= fgets( $pipes[2], 1024 ); |
157 | | - } |
158 | | - fclose( $pipes[2] ); |
159 | | - $ret = proc_close( $process ); |
160 | | - if( ( $ret < 0 && $errors == '' ) || $ret == 0 ) |
161 | | - return $s; |
162 | | - } else { |
163 | | - return $s; |
164 | | - } |
165 | | - |
166 | | - } else { |
167 | | - return $s; |
168 | | - } |
| 128 | + $errors = ''; |
| 129 | + if ( MWTidy::checkErrors( $s, $errors ) ) { |
| 130 | + return $s; |
169 | 131 | } |
170 | 132 | |
171 | 133 | header( 'Cache-Control: no-cache' ); |
— | — | @@ -197,7 +159,7 @@ |
198 | 160 | |
199 | 161 | $out .= '</ul>'; |
200 | 162 | $out .= '<pre>' . htmlspecialchars( $errors ) . '</pre>'; |
201 | | - $out .= '<ol>'; |
| 163 | + $out .= "<ol>\n"; |
202 | 164 | $line = strtok( $s, "\n" ); |
203 | 165 | $i = 1; |
204 | 166 | while ( $line !== false ) { |
— | — | @@ -206,7 +168,7 @@ |
207 | 169 | } else { |
208 | 170 | $out .= '<li>'; |
209 | 171 | } |
210 | | - $out .= htmlspecialchars( $line ) . '</li>'; |
| 172 | + $out .= htmlspecialchars( $line ) . "</li>\n"; |
211 | 173 | $line = strtok( "\n" ); |
212 | 174 | $i++; |
213 | 175 | } |
Index: trunk/phase3/includes/parser/Parser.php |
— | — | @@ -374,8 +374,8 @@ |
375 | 375 | |
376 | 376 | $text = Sanitizer::normalizeCharReferences( $text ); |
377 | 377 | |
378 | | - if (($wgUseTidy and $this->mOptions->mTidy) or $wgAlwaysUseTidy) { |
379 | | - $text = self::tidy($text); |
| 378 | + if ( ( $wgUseTidy && $this->mOptions->mTidy ) || $wgAlwaysUseTidy ) { |
| 379 | + $text = MWTidy::tidy( $text ); |
380 | 380 | } else { |
381 | 381 | # attempt to sanitize at least some nesting problems |
382 | 382 | # (bug #2702 and quite a few others) |
— | — | @@ -648,129 +648,17 @@ |
649 | 649 | $this->mStripState->general->setPair( $rnd, $text ); |
650 | 650 | return $rnd; |
651 | 651 | } |
652 | | - |
| 652 | + |
653 | 653 | /** |
654 | | - * Interface with html tidy, used if $wgUseTidy = true. |
655 | | - * If tidy isn't able to correct the markup, the original will be |
656 | | - * returned in all its glory with a warning comment appended. |
657 | | - * |
658 | | - * Either the external tidy program or the in-process tidy extension |
659 | | - * will be used depending on availability. Override the default |
660 | | - * $wgTidyInternal setting to disable the internal if it's not working. |
661 | | - * |
662 | | - * @param string $text Hideous HTML input |
663 | | - * @return string Corrected HTML output |
664 | | - * @public |
665 | | - * @static |
| 654 | + * Interface with html tidy |
| 655 | + * @deprecated Use MWTidy::tidy() |
666 | 656 | */ |
667 | | - function tidy( $text ) { |
668 | | - global $wgTidyInternal; |
669 | | - |
670 | | - $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'. |
671 | | -' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'. |
672 | | -'<head><title>test</title></head><body>'.$text.'</body></html>'; |
673 | | - |
674 | | - # Tidy is known to clobber tabs; convert 'em to entities |
675 | | - $wrappedtext = str_replace("\t", '	', $wrappedtext); |
676 | | - |
677 | | - if( $wgTidyInternal ) { |
678 | | - $correctedtext = self::internalTidy( $wrappedtext ); |
679 | | - } else { |
680 | | - $correctedtext = self::externalTidy( $wrappedtext ); |
681 | | - } |
682 | | - if( is_null( $correctedtext ) ) { |
683 | | - wfDebug( "Tidy error detected!\n" ); |
684 | | - return $text . "\n<!-- Tidy found serious XHTML errors -->\n"; |
685 | | - } |
686 | | - |
687 | | - # Convert the tabs back from entities |
688 | | - $correctedtext = str_replace('	', "\t", $correctedtext); |
689 | | - |
690 | | - return $correctedtext; |
| 657 | + public static function tidy( $text ) { |
| 658 | + wfDeprecated( __METHOD__ ); |
| 659 | + return MWTidy::tidy( $text ); |
691 | 660 | } |
692 | 661 | |
693 | 662 | /** |
694 | | - * Spawn an external HTML tidy process and get corrected markup back from it. |
695 | | - * |
696 | | - * @private |
697 | | - * @static |
698 | | - */ |
699 | | - function externalTidy( $text ) { |
700 | | - global $wgTidyConf, $wgTidyBin, $wgTidyOpts; |
701 | | - wfProfileIn( __METHOD__ ); |
702 | | - |
703 | | - $cleansource = ''; |
704 | | - $opts = ' -utf8'; |
705 | | - |
706 | | - $descriptorspec = array( |
707 | | - 0 => array('pipe', 'r'), |
708 | | - 1 => array('pipe', 'w'), |
709 | | - 2 => array('file', wfGetNull(), 'a') |
710 | | - ); |
711 | | - $pipes = array(); |
712 | | - if( function_exists('proc_open') ) { |
713 | | - $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes); |
714 | | - if (is_resource($process)) { |
715 | | - // Theoretically, this style of communication could cause a deadlock |
716 | | - // here. If the stdout buffer fills up, then writes to stdin could |
717 | | - // block. This doesn't appear to happen with tidy, because tidy only |
718 | | - // writes to stdout after it's finished reading from stdin. Search |
719 | | - // for tidyParseStdin and tidySaveStdout in console/tidy.c |
720 | | - fwrite($pipes[0], $text); |
721 | | - fclose($pipes[0]); |
722 | | - while (!feof($pipes[1])) { |
723 | | - $cleansource .= fgets($pipes[1], 1024); |
724 | | - } |
725 | | - fclose($pipes[1]); |
726 | | - proc_close($process); |
727 | | - } |
728 | | - } |
729 | | - |
730 | | - wfProfileOut( __METHOD__ ); |
731 | | - |
732 | | - if( $cleansource == '' && $text != '') { |
733 | | - // Some kind of error happened, so we couldn't get the corrected text. |
734 | | - // Just give up; we'll use the source text and append a warning. |
735 | | - return null; |
736 | | - } else { |
737 | | - return $cleansource; |
738 | | - } |
739 | | - } |
740 | | - |
741 | | - /** |
742 | | - * Use the HTML tidy PECL extension to use the tidy library in-process, |
743 | | - * saving the overhead of spawning a new process. |
744 | | - * |
745 | | - * 'pear install tidy' should be able to compile the extension module. |
746 | | - * |
747 | | - * @private |
748 | | - * @static |
749 | | - */ |
750 | | - function internalTidy( $text ) { |
751 | | - global $wgTidyConf, $IP, $wgDebugTidy; |
752 | | - wfProfileIn( __METHOD__ ); |
753 | | - |
754 | | - $tidy = new tidy; |
755 | | - $tidy->parseString( $text, $wgTidyConf, 'utf8' ); |
756 | | - $tidy->cleanRepair(); |
757 | | - if( $tidy->getStatus() == 2 ) { |
758 | | - // 2 is magic number for fatal error |
759 | | - // http://www.php.net/manual/en/function.tidy-get-status.php |
760 | | - $cleansource = null; |
761 | | - } else { |
762 | | - $cleansource = tidy_get_output( $tidy ); |
763 | | - } |
764 | | - if ( $wgDebugTidy && $tidy->getStatus() > 0 ) { |
765 | | - $cleansource .= "<!--\nTidy reports:\n" . |
766 | | - str_replace( '-->', '-->', $tidy->errorBuffer ) . |
767 | | - "\n-->"; |
768 | | - } |
769 | | - |
770 | | - wfProfileOut( __METHOD__ ); |
771 | | - return $cleansource; |
772 | | - } |
773 | | - |
774 | | - /** |
775 | 663 | * parse the wiki syntax used to render tables |
776 | 664 | * |
777 | 665 | * @private |
Index: trunk/phase3/includes/parser/Tidy.php |
— | — | @@ -0,0 +1,170 @@ |
| 2 | +<?php |
| 3 | + |
| 4 | +/** |
| 5 | + * Class to interact with HTML tidy |
| 6 | + * |
| 7 | + * Either the external tidy program or the in-process tidy extension |
| 8 | + * will be used depending on availability. Override the default |
| 9 | + * $wgTidyInternal setting to disable the internal if it's not working. |
| 10 | + * |
| 11 | + * @ingroup Parser |
| 12 | + */ |
| 13 | +class MWTidy { |
| 14 | + |
| 15 | + /** |
| 16 | + * Interface with html tidy, used if $wgUseTidy = true. |
| 17 | + * If tidy isn't able to correct the markup, the original will be |
| 18 | + * returned in all its glory with a warning comment appended. |
| 19 | + * |
| 20 | + * @param string $text Hideous HTML input |
| 21 | + * @return string Corrected HTML output |
| 22 | + */ |
| 23 | + public static function tidy( $text ) { |
| 24 | + global $wgTidyInternal; |
| 25 | + |
| 26 | + $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'. |
| 27 | +' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'. |
| 28 | +'<head><title>test</title></head><body>'.$text.'</body></html>'; |
| 29 | + |
| 30 | + # Tidy is known to clobber tabs; convert them to entities |
| 31 | + $wrappedtext = str_replace( "\t", '	', $wrappedtext ); |
| 32 | + |
| 33 | + if( $wgTidyInternal ) { |
| 34 | + $correctedtext = self::execInternalTidy( $wrappedtext ); |
| 35 | + } else { |
| 36 | + $correctedtext = self::execExternalTidy( $wrappedtext ); |
| 37 | + } |
| 38 | + if( is_null( $correctedtext ) ) { |
| 39 | + wfDebug( "Tidy error detected!\n" ); |
| 40 | + return $text . "\n<!-- Tidy found serious XHTML errors -->\n"; |
| 41 | + } |
| 42 | + |
| 43 | + # Convert the tabs back from entities |
| 44 | + $correctedtext = str_replace( '	', "\t", $correctedtext ); |
| 45 | + |
| 46 | + return $correctedtext; |
| 47 | + } |
| 48 | + |
| 49 | + /** |
| 50 | + * Check HTML for errors, used if $wgValidateAllHtml = true. |
| 51 | + * |
| 52 | + * @param $text String |
| 53 | + * @param &$errorStr String: return the error string |
| 54 | + * @return Boolean: whether the HTML is valid |
| 55 | + */ |
| 56 | + public static function checkErrors( $text, &$errorStr = null ) { |
| 57 | + global $wgTidyInternal; |
| 58 | + |
| 59 | + $retval = 0; |
| 60 | + if( $wgTidyInternal ) { |
| 61 | + $errorStr = self::execInternalTidy( $text, true, $retval ); |
| 62 | + } else { |
| 63 | + $errorStr = self::execExternalTidy( $text, true, $retval ); |
| 64 | + } |
| 65 | + return ( $retval < 0 && $errorStr == '' ) || $retval == 0; |
| 66 | + } |
| 67 | + |
| 68 | + /** |
| 69 | + * Spawn an external HTML tidy process and get corrected markup back from it. |
| 70 | + * Also called in OutputHandler.php for full page validation |
| 71 | + * |
| 72 | + * @param $text String: HTML to check |
| 73 | + * @param $stderr Boolean: Whether to read from STDERR rather than STDOUT |
| 74 | + * @param &$retval Exit code (-1 on internal error) |
| 75 | + * @retrun mixed String or null |
| 76 | + */ |
| 77 | + private static function execExternalTidy( $text, $stderr = false, &$retval = null ) { |
| 78 | + global $wgTidyConf, $wgTidyBin, $wgTidyOpts; |
| 79 | + wfProfileIn( __METHOD__ ); |
| 80 | + |
| 81 | + $cleansource = ''; |
| 82 | + $opts = ' -utf8'; |
| 83 | + |
| 84 | + if( $stderr ) { |
| 85 | + $descriptorspec = array( |
| 86 | + 0 => array( 'pipe', 'r' ), |
| 87 | + 1 => array( 'file', wfGetNull(), 'a' ), |
| 88 | + 2 => array( 'pipe', 'w' ) |
| 89 | + ); |
| 90 | + } else { |
| 91 | + $descriptorspec = array( |
| 92 | + 0 => array( 'pipe', 'r' ), |
| 93 | + 1 => array( 'pipe', 'w' ), |
| 94 | + 2 => array( 'file', wfGetNull(), 'a' ) |
| 95 | + ); |
| 96 | + } |
| 97 | + |
| 98 | + $readpipe = $stderr ? 2 : 1; |
| 99 | + $pipes = array(); |
| 100 | + |
| 101 | + if( function_exists( 'proc_open' ) ) { |
| 102 | + $process = proc_open( "$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes ); |
| 103 | + if ( is_resource( $process ) ) { |
| 104 | + // Theoretically, this style of communication could cause a deadlock |
| 105 | + // here. If the stdout buffer fills up, then writes to stdin could |
| 106 | + // block. This doesn't appear to happen with tidy, because tidy only |
| 107 | + // writes to stdout after it's finished reading from stdin. Search |
| 108 | + // for tidyParseStdin and tidySaveStdout in console/tidy.c |
| 109 | + fwrite( $pipes[0], $text ); |
| 110 | + fclose( $pipes[0] ); |
| 111 | + while ( !feof( $pipes[$readpipe] ) ) { |
| 112 | + $cleansource .= fgets( $pipes[$readpipe], 1024 ); |
| 113 | + } |
| 114 | + fclose( $pipes[$readpipe] ); |
| 115 | + $retval = proc_close( $process ); |
| 116 | + } else { |
| 117 | + $retval = -1; |
| 118 | + } |
| 119 | + } else { |
| 120 | + $retval = -1; |
| 121 | + } |
| 122 | + |
| 123 | + wfProfileOut( __METHOD__ ); |
| 124 | + |
| 125 | + if( !$stderr && $cleansource == '' && $text != '' ) { |
| 126 | + // Some kind of error happened, so we couldn't get the corrected text. |
| 127 | + // Just give up; we'll use the source text and append a warning. |
| 128 | + return null; |
| 129 | + } else { |
| 130 | + return $cleansource; |
| 131 | + } |
| 132 | + } |
| 133 | + |
| 134 | + /** |
| 135 | + * Use the HTML tidy PECL extension to use the tidy library in-process, |
| 136 | + * saving the overhead of spawning a new process. |
| 137 | + * |
| 138 | + * 'pear install tidy' should be able to compile the extension module. |
| 139 | + */ |
| 140 | + private static function execInternalTidy( $text, $stderr = false, &$retval = null ) { |
| 141 | + global $wgTidyConf, $IP, $wgDebugTidy; |
| 142 | + wfProfileIn( __METHOD__ ); |
| 143 | + |
| 144 | + $tidy = new tidy; |
| 145 | + $tidy->parseString( $text, $wgTidyConf, 'utf8' ); |
| 146 | + |
| 147 | + if( $stderr ) { |
| 148 | + $retval = $tidy->getStatus(); |
| 149 | + return $tidy->errorBuffer; |
| 150 | + } else { |
| 151 | + $tidy->cleanRepair(); |
| 152 | + $retval = $tidy->getStatus(); |
| 153 | + if( $retval == 2 ) { |
| 154 | + // 2 is magic number for fatal error |
| 155 | + // http://www.php.net/manual/en/function.tidy-get-status.php |
| 156 | + $cleansource = null; |
| 157 | + } else { |
| 158 | + $cleansource = tidy_get_output( $tidy ); |
| 159 | + } |
| 160 | + if ( $wgDebugTidy && $retval > 0 ) { |
| 161 | + $cleansource .= "<!--\nTidy reports:\n" . |
| 162 | + str_replace( '-->', '-->', $tidy->errorBuffer ) . |
| 163 | + "\n-->"; |
| 164 | + } |
| 165 | + |
| 166 | + wfProfileOut( __METHOD__ ); |
| 167 | + return $cleansource; |
| 168 | + } |
| 169 | + } |
| 170 | + |
| 171 | +} |
Property changes on: trunk/phase3/includes/parser/Tidy.php |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 172 | + native |
Index: trunk/phase3/RELEASE-NOTES |
— | — | @@ -1,4 +1,4 @@ |
2 | | -= MediaWiki release notes = |
| 2 | + = MediaWiki release notes = |
3 | 3 | |
4 | 4 | Security reminder: MediaWiki does not require PHP's register_globals |
5 | 5 | setting since version 1.2.0. If you have it on, turn it *off* if you can. |
— | — | @@ -92,10 +92,12 @@ |
93 | 93 | * (bug 14423) Check block flag validity for block logging |
94 | 94 | * DB transaction and slave-lag avoidance tweaks for Email Notifications |
95 | 95 | * (bug 17104) Removed [Mark as patrolled] link for already patrolled revisions |
96 | | -* (bug 17106) Added 'redirect=no' and 'mw-redirect' class to redirects at "user contributions" |
| 96 | +* (bug 17106) Added 'redirect=no' and 'mw-redirect' class to redirects at |
| 97 | + "user contributions" |
97 | 98 | * Rollback links on new pages removed from "user contributions" |
98 | | -* (bug 15811) Re-upload form tweaks: license fields removed, destination locked, comment |
99 | | - label uses better message |
| 99 | +* (bug 15811) Re-upload form tweaks: license fields removed, destination locked, |
| 100 | + comment label uses better message |
| 101 | +* Whole HTML validation ($wgValidateAllHtml) now works with external tidy |
100 | 102 | |
101 | 103 | == API changes in 1.15 == |
102 | 104 | * (bug 16858) Revamped list=deletedrevs to make listing deleted contributions |