Index: branches/wmf/1.17wmf1/includes/Sanitizer.php |
— | — | @@ -735,28 +735,34 @@ |
736 | 736 | |
737 | 737 | /** |
738 | 738 | * Pick apart some CSS and check it for forbidden or unsafe structures. |
739 | | - * Returns a sanitized string, or false if it was just too evil. |
| 739 | + * Returns a sanitized string. This sanitized string will have |
| 740 | + * character references and escape sequences decoded, and comments |
| 741 | + * stripped. If the input is just too evil, only a comment complaining |
| 742 | + * about evilness will be returned. |
740 | 743 | * |
741 | 744 | * Currently URL references, 'expression', 'tps' are forbidden. |
742 | 745 | * |
| 746 | + * NOTE: Despite the fact that character references are decoded, the |
| 747 | + * returned string may contain character references given certain |
| 748 | + * clever input strings. These character references must |
| 749 | + * be escaped before the return value is embedded in HTML. |
| 750 | + * |
743 | 751 | * @param $value String |
744 | | - * @return Mixed |
| 752 | + * @return String |
745 | 753 | */ |
746 | 754 | static function checkCss( $value ) { |
| 755 | + // Decode character references like { |
747 | 756 | $value = Sanitizer::decodeCharReferences( $value ); |
748 | 757 | |
749 | | - // Remove any comments; IE gets token splitting wrong |
750 | | - $value = StringUtils::delimiterReplace( '/*', '*/', ' ', $value ); |
751 | | - |
752 | | - // Remove anything after a comment-start token, to guard against |
753 | | - // incorrect client implementations. |
754 | | - $commentPos = strpos( $value, '/*' ); |
755 | | - if ( $commentPos !== false ) { |
756 | | - $value = substr( $value, 0, $commentPos ); |
757 | | - } |
758 | | - |
759 | 758 | // Decode escape sequences and line continuation |
760 | 759 | // See the grammar in the CSS 2 spec, appendix D. |
| 760 | + // This has to be done AFTER decoding character references. |
| 761 | + // This means it isn't possible for this function to return |
| 762 | + // unsanitized escape sequences. It is possible to manufacture |
| 763 | + // input that contains character references that decode to |
| 764 | + // escape sequences that decode to character references, but |
| 765 | + // it's OK for the return value to contain character references |
| 766 | + // because the caller is supposed to escape those anyway. |
761 | 767 | static $decodeRegex; |
762 | 768 | if ( !$decodeRegex ) { |
763 | 769 | $space = '[\\x20\\t\\r\\n\\f]'; |
— | — | @@ -772,7 +778,22 @@ |
773 | 779 | } |
774 | 780 | $value = preg_replace_callback( $decodeRegex, |
775 | 781 | array( __CLASS__, 'cssDecodeCallback' ), $value ); |
| 782 | + |
| 783 | + // Remove any comments; IE gets token splitting wrong |
| 784 | + // This must be done AFTER decoding character references and |
| 785 | + // escape sequences, because those steps can introduce comments |
| 786 | + // This step cannot introduce character references or escape |
| 787 | + // sequences, because it replaces comments with spaces rather |
| 788 | + // than removing them completely. |
| 789 | + $value = StringUtils::delimiterReplace( '/*', '*/', ' ', $value ); |
776 | 790 | |
| 791 | + // Remove anything after a comment-start token, to guard against |
| 792 | + // incorrect client implementations. |
| 793 | + $commentPos = strpos( $value, '/*' ); |
| 794 | + if ( $commentPos !== false ) { |
| 795 | + $value = substr( $value, 0, $commentPos ); |
| 796 | + } |
| 797 | + |
777 | 798 | // Reject problematic keywords and control characters |
778 | 799 | if ( preg_match( '/[\000-\010\016-\037\177]/', $value ) ) { |
779 | 800 | return '/* invalid control char */'; |