Index: branches/REL1_16/phase3/includes/Sanitizer.php |
— | — | @@ -726,29 +726,35 @@ |
727 | 727 | |
728 | 728 | /** |
729 | 729 | * Pick apart some CSS and check it for forbidden or unsafe structures. |
730 | | - * Returns a sanitized string, or false if it was just too evil. |
| 730 | + * Returns a sanitized string. This sanitized string will have |
| 731 | + * character references and escape sequences decoded, and comments |
| 732 | + * stripped. If the input is just too evil, only a comment complaining |
| 733 | + * about evilness will be returned. |
731 | 734 | * |
732 | 735 | * Currently URL references, 'expression', 'tps' are forbidden. |
733 | 736 | * |
| 737 | + * NOTE: Despite the fact that character references are decoded, the |
| 738 | + * returned string may contain character references given certain |
| 739 | + * clever input strings. These character references must |
| 740 | + * be escaped before the return value is embedded in HTML. |
| 741 | + * |
734 | 742 | * @param $value String |
735 | | - * @return Mixed |
| 743 | + * @return String |
736 | 744 | */ |
737 | 745 | static function checkCss( $value ) { |
| 746 | + // Decode character references like { |
738 | 747 | $value = Sanitizer::decodeCharReferences( $value ); |
739 | 748 | |
740 | | - // Remove any comments; IE gets token splitting wrong |
741 | | - $value = StringUtils::delimiterReplace( '/*', '*/', ' ', $value ); |
742 | | - |
743 | | - // Remove anything after a comment-start token, to guard against |
744 | | - // incorrect client implementations. |
745 | | - $commentPos = strpos( $value, '/*' ); |
746 | | - if ( $commentPos !== false ) { |
747 | | - $value = substr( $value, 0, $commentPos ); |
748 | | - } |
749 | | - |
750 | 749 | // Decode escape sequences and line continuation |
751 | 750 | // See the grammar in the CSS 2 spec, appendix D. |
752 | | - static $decodeRegex, $reencodeTable; |
| 751 | + // This has to be done AFTER decoding character references. |
| 752 | + // This means it isn't possible for this function to return |
| 753 | + // unsanitized escape sequences. It is possible to manufacture |
| 754 | + // input that contains character references that decode to |
| 755 | + // escape sequences that decode to character references, but |
| 756 | + // it's OK for the return value to contain character references |
| 757 | + // because the caller is supposed to escape those anyway. |
| 758 | + static $decodeRegex; |
753 | 759 | if ( !$decodeRegex ) { |
754 | 760 | $space = '[\\x20\\t\\r\\n\\f]'; |
755 | 761 | $nl = '(?:\\n|\\r\\n|\\r|\\f)'; |
— | — | @@ -763,7 +769,22 @@ |
764 | 770 | } |
765 | 771 | $value = preg_replace_callback( $decodeRegex, |
766 | 772 | array( __CLASS__, 'cssDecodeCallback' ), $value ); |
| 773 | + |
| 774 | + // Remove any comments; IE gets token splitting wrong |
| 775 | + // This must be done AFTER decoding character references and |
| 776 | + // escape sequences, because those steps can introduce comments |
| 777 | + // This step cannot introduce character references or escape |
| 778 | + // sequences, because it replaces comments with spaces rather |
| 779 | + // than removing them completely. |
| 780 | + $value = StringUtils::delimiterReplace( '/*', '*/', ' ', $value ); |
767 | 781 | |
| 782 | + // Remove anything after a comment-start token, to guard against |
| 783 | + // incorrect client implementations. |
| 784 | + $commentPos = strpos( $value, '/*' ); |
| 785 | + if ( $commentPos !== false ) { |
| 786 | + $value = substr( $value, 0, $commentPos ); |
| 787 | + } |
| 788 | + |
768 | 789 | // Reject problematic keywords and control characters |
769 | 790 | if ( preg_match( '/[\000-\010\016-\037\177]/', $value ) ) { |
770 | 791 | return '/* invalid control char */'; |