r85857 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r85856‎ | r85857 | r85858 >
Date:02:10, 12 April 2011
Author:tstarling
Status:ok
Tags:
Comment:
MFT r85856: Fix for bug 28450: escaped CSS comments
Modified paths:
  • /branches/REL1_16/phase3/includes/Sanitizer.php (modified) (history)

Diff [purge]

Index: branches/REL1_16/phase3/includes/Sanitizer.php
@@ -726,29 +726,35 @@
727727
728728 /**
729729 * Pick apart some CSS and check it for forbidden or unsafe structures.
730 - * Returns a sanitized string, or false if it was just too evil.
 730+ * Returns a sanitized string. This sanitized string will have
 731+ * character references and escape sequences decoded, and comments
 732+ * stripped. If the input is just too evil, only a comment complaining
 733+ * about evilness will be returned.
731734 *
732735 * Currently URL references, 'expression', 'tps' are forbidden.
733736 *
 737+ * NOTE: Despite the fact that character references are decoded, the
 738+ * returned string may contain character references given certain
 739+ * clever input strings. These character references must
 740+ * be escaped before the return value is embedded in HTML.
 741+ *
734742 * @param $value String
735 - * @return Mixed
 743+ * @return String
736744 */
737745 static function checkCss( $value ) {
 746+ // Decode character references like {
738747 $value = Sanitizer::decodeCharReferences( $value );
739748
740 - // Remove any comments; IE gets token splitting wrong
741 - $value = StringUtils::delimiterReplace( '/*', '*/', ' ', $value );
742 -
743 - // Remove anything after a comment-start token, to guard against
744 - // incorrect client implementations.
745 - $commentPos = strpos( $value, '/*' );
746 - if ( $commentPos !== false ) {
747 - $value = substr( $value, 0, $commentPos );
748 - }
749 -
750749 // Decode escape sequences and line continuation
751750 // See the grammar in the CSS 2 spec, appendix D.
752 - static $decodeRegex, $reencodeTable;
 751+ // This has to be done AFTER decoding character references.
 752+ // This means it isn't possible for this function to return
 753+ // unsanitized escape sequences. It is possible to manufacture
 754+ // input that contains character references that decode to
 755+ // escape sequences that decode to character references, but
 756+ // it's OK for the return value to contain character references
 757+ // because the caller is supposed to escape those anyway.
 758+ static $decodeRegex;
753759 if ( !$decodeRegex ) {
754760 $space = '[\\x20\\t\\r\\n\\f]';
755761 $nl = '(?:\\n|\\r\\n|\\r|\\f)';
@@ -763,7 +769,22 @@
764770 }
765771 $value = preg_replace_callback( $decodeRegex,
766772 array( __CLASS__, 'cssDecodeCallback' ), $value );
 773+
 774+ // Remove any comments; IE gets token splitting wrong
 775+ // This must be done AFTER decoding character references and
 776+ // escape sequences, because those steps can introduce comments
 777+ // This step cannot introduce character references or escape
 778+ // sequences, because it replaces comments with spaces rather
 779+ // than removing them completely.
 780+ $value = StringUtils::delimiterReplace( '/*', '*/', ' ', $value );
767781
 782+ // Remove anything after a comment-start token, to guard against
 783+ // incorrect client implementations.
 784+ $commentPos = strpos( $value, '/*' );
 785+ if ( $commentPos !== false ) {
 786+ $value = substr( $value, 0, $commentPos );
 787+ }
 788+
768789 // Reject problematic keywords and control characters
769790 if ( preg_match( '/[\000-\010\016-\037\177]/', $value ) ) {
770791 return '/* invalid control char */';

Follow-up revisions

RevisionCommit summaryAuthorDate
r85858MFT r85856: Fix for bug 28450: escaped CSS commentststarling02:11, 12 April 2011
r85859MFT r85856: Fix for bug 28450: escaped CSS commentststarling02:11, 12 April 2011

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r85856Fix for bug 28450: escaped CSS commentststarling02:10, 12 April 2011

Status & tagging log