Index: trunk/phase3/includes/Sanitizer.php |
— | — | @@ -43,7 +43,7 @@ |
44 | 44 | $attrib = '[A-Za-z0-9]'; |
45 | 45 | $space = '[\x09\x0a\x0d\x20]'; |
46 | 46 | define( 'MW_ATTRIBS_REGEX', |
47 | | - "/(?:^|$space)($attrib+) |
| 47 | + "/(?:^|$space)((?:xml:|xmlns:)?$attrib+) |
48 | 48 | ($space*=$space* |
49 | 49 | (?: |
50 | 50 | # The attribute value: quoted or alone |
— | — | @@ -59,9 +59,14 @@ |
60 | 60 | /** |
61 | 61 | * Regular expression to match URIs that could trigger script execution |
62 | 62 | */ |
63 | | -define( 'MW_SCRIPT_URL_PATTERN', '/(^|\s)(javascript|vbscript)[^\w]/i' ); |
| 63 | +define( 'MW_EVIL_URI_PATTERN', '!(^|\s|\*/\s*)(javascript|vbscript)([^\w]|$)!i' ); |
64 | 64 | |
65 | 65 | /** |
| 66 | + * Regular expression to match namespace attributes |
| 67 | + */ |
| 68 | +define( 'MW_XMLNS_ATTRIBUTE_PATTRN', "/^xmlns:$attrib+$/" ); |
| 69 | + |
| 70 | +/** |
66 | 71 | * List of all named character entities defined in HTML 4.01 |
67 | 72 | * http://www.w3.org/TR/html4/sgml/entities.html |
68 | 73 | * @private |
— | — | @@ -614,9 +619,21 @@ |
615 | 620 | |
616 | 621 | $out = array(); |
617 | 622 | foreach( $attribs as $attribute => $value ) { |
| 623 | + #allow XML namespace declaration. Useful especially with RDFa |
| 624 | + print "($attribute=$value)"; |
| 625 | + |
| 626 | + if ( preg_match( MW_XMLNS_ATTRIBUTE_PATTRN, $attribute ) ) { |
| 627 | + if ( !preg_match( MW_EVIL_URI_PATTERN, $value ) ) { |
| 628 | + $out[$attribute] = $value; |
| 629 | + } |
| 630 | + |
| 631 | + continue; |
| 632 | + } |
| 633 | + |
618 | 634 | if( !isset( $whitelist[$attribute] ) ) { |
619 | 635 | continue; |
620 | 636 | } |
| 637 | + |
621 | 638 | # Strip javascript "expression" from stylesheets. |
622 | 639 | # http://msdn.microsoft.com/workshop/author/dhtml/overview/recalc.asp |
623 | 640 | if( $attribute == 'style' ) { |
— | — | @@ -633,12 +650,14 @@ |
634 | 651 | $wgEnforceHtmlIds ? 'noninitial' : 'xml' ); |
635 | 652 | } |
636 | 653 | |
637 | | - //RDFa properties allow URIs. check them |
| 654 | + //RDFa and microdata properties allow URIs. check them |
638 | 655 | if ( $attribute === 'rel' || $attribute === 'rev' || |
639 | 656 | $attribute === 'about' || $attribute === 'property' || $attribute === 'resource' || |
640 | | - $attribute === 'datatype' || $attribute === 'typeof' ) { |
| 657 | + $attribute === 'datatype' || $attribute === 'typeof' || |
| 658 | + $attribute === 'item' || $attribute === 'itemprop' || $attribute === 'subject' ) { |
| 659 | + |
641 | 660 | //Paranoia. Allow "simple" values but suppress javascript |
642 | | - if ( preg_match( MW_SCRIPT_URL_PATTERN, $value ) ) { |
| 661 | + if ( preg_match( MW_EVIL_URI_PATTERN, $value ) ) { |
643 | 662 | continue; |
644 | 663 | } |
645 | 664 | } |
— | — | @@ -1180,11 +1199,24 @@ |
1181 | 1200 | * @return Array |
1182 | 1201 | */ |
1183 | 1202 | static function setupAttributeWhitelist() { |
1184 | | - $common = array( 'id', 'class', 'lang', 'dir', 'title', 'style', |
1185 | | - #RDFa attributes as specified in section 9 of http://www.w3.org/TR/2008/REC-rdfa-syntax-20081014 |
1186 | | - 'about', 'property', 'resource', 'datatype', 'typeof', |
1187 | | - ); |
| 1203 | + global $wgAllowRdfaAttributes, $wgHtml5, $wgAllowItemAttributes; |
1188 | 1204 | |
| 1205 | + $common = array( 'id', 'class', 'lang', 'dir', 'title', 'style', 'xml:lang' ); |
| 1206 | + |
| 1207 | + if ( $wgAllowRdfaAttributes ) { |
| 1208 | + #RDFa attributes as specified in section 9 of http://www.w3.org/TR/2008/REC-rdfa-syntax-20081014 |
| 1209 | + $common = array_merge( $common, array( |
| 1210 | + 'about', 'property', 'resource', 'datatype', 'typeof', |
| 1211 | + ) ); |
| 1212 | + } |
| 1213 | + |
| 1214 | + if ( $wgHtml5 && $wgAllowItemAttributes ) { |
| 1215 | + # add HTML5 microdata tages as pecified by http://www.w3.org/TR/html5/microdata.html |
| 1216 | + $common = array_merge( $common, array( |
| 1217 | + 'item', 'itemprop', 'subject' |
| 1218 | + ) ); |
| 1219 | + } |
| 1220 | + |
1189 | 1221 | $block = array_merge( $common, array( 'align' ) ); |
1190 | 1222 | $tablealign = array( 'align', 'char', 'charoff', 'valign' ); |
1191 | 1223 | $tablecell = array( 'abbr', |
Index: trunk/phase3/includes/DefaultSettings.php |
— | — | @@ -915,6 +915,16 @@ |
916 | 916 | $wgHtml5 = true; |
917 | 917 | |
918 | 918 | /** |
| 919 | + * Enabled RDFa attributes for use in wikitext. |
| 920 | + */ |
| 921 | +$wgAllowRdfaAttributes = true; |
| 922 | + |
| 923 | +/** |
| 924 | + * Enabled HTML 5 data attributes for use in wikitext, if $wgHtml5 is also true. |
| 925 | + */ |
| 926 | +$wgAllowItemAttributes = true; |
| 927 | + |
| 928 | +/** |
919 | 929 | * Should we try to make our HTML output well-formed XML? If set to false, |
920 | 930 | * output will be a few bytes shorter, and the HTML will arguably be more |
921 | 931 | * readable. If set to true, life will be much easier for the authors of |