Index: trunk/phase3/includes/Sanitizer.php |
— | — | @@ -722,28 +722,34 @@ |
723 | 723 | |
724 | 724 | /** |
725 | 725 | * Pick apart some CSS and check it for forbidden or unsafe structures. |
726 | | - * Returns a sanitized string, or false if it was just too evil. |
| 726 | + * Returns a sanitized string. This sanitized string will have |
| 727 | + * character references and escape sequences decoded, and comments |
| 728 | + * stripped. If the input is just too evil, only a comment complaining |
| 729 | + * about evilness will be returned. |
727 | 730 | * |
728 | 731 | * Currently URL references, 'expression', 'tps' are forbidden. |
729 | 732 | * |
| 733 | + * NOTE: Despite the fact that character references are decoded, the |
| 734 | + * returned string may contain character references given certain |
| 735 | + * clever input strings. These character references must |
| 736 | + * be escaped before the return value is embedded in HTML. |
| 737 | + * |
730 | 738 | * @param $value String |
731 | | - * @return Mixed |
| 739 | + * @return String |
732 | 740 | */ |
733 | 741 | static function checkCss( $value ) { |
| 742 | + // Decode character references like { |
734 | 743 | $value = Sanitizer::decodeCharReferences( $value ); |
735 | 744 | |
736 | | - // Remove any comments; IE gets token splitting wrong |
737 | | - $value = StringUtils::delimiterReplace( '/*', '*/', ' ', $value ); |
738 | | - |
739 | | - // Remove anything after a comment-start token, to guard against |
740 | | - // incorrect client implementations. |
741 | | - $commentPos = strpos( $value, '/*' ); |
742 | | - if ( $commentPos !== false ) { |
743 | | - $value = substr( $value, 0, $commentPos ); |
744 | | - } |
745 | | - |
746 | 745 | // Decode escape sequences and line continuation |
747 | 746 | // See the grammar in the CSS 2 spec, appendix D. |
| 747 | + // This has to be done AFTER decoding character references. |
| 748 | + // This means it isn't possible for this function to return |
| 749 | + // unsanitized escape sequences. It is possible to manufacture |
| 750 | + // input that contains character references that decode to |
| 751 | + // escape sequences that decode to character references, but |
| 752 | + // it's OK for the return value to contain character references |
| 753 | + // because the caller is supposed to escape those anyway. |
748 | 754 | static $decodeRegex; |
749 | 755 | if ( !$decodeRegex ) { |
750 | 756 | $space = '[\\x20\\t\\r\\n\\f]'; |
— | — | @@ -759,7 +765,22 @@ |
760 | 766 | } |
761 | 767 | $value = preg_replace_callback( $decodeRegex, |
762 | 768 | array( __CLASS__, 'cssDecodeCallback' ), $value ); |
| 769 | + |
| 770 | + // Remove any comments; IE gets token splitting wrong |
| 771 | + // This must be done AFTER decoding character references and |
| 772 | + // escape sequences, because those steps can introduce comments |
| 773 | + // This step cannot introduce character references or escape |
| 774 | + // sequences, because it replaces comments with spaces rather |
| 775 | + // than removing them completely. |
| 776 | + $value = StringUtils::delimiterReplace( '/*', '*/', ' ', $value ); |
763 | 777 | |
| 778 | + // Remove anything after a comment-start token, to guard against |
| 779 | + // incorrect client implementations. |
| 780 | + $commentPos = strpos( $value, '/*' ); |
| 781 | + if ( $commentPos !== false ) { |
| 782 | + $value = substr( $value, 0, $commentPos ); |
| 783 | + } |
| 784 | + |
764 | 785 | // Reject problematic keywords and control characters |
765 | 786 | if ( preg_match( '/[\000-\010\016-\037\177]/', $value ) ) { |
766 | 787 | return '/* invalid control char */'; |