Index: trunk/phase3/includes/Sanitizer.php |
— | — | @@ -353,7 +353,7 @@ |
354 | 354 | if ( !$staticInitialised ) { |
355 | 355 | |
356 | 356 | $htmlpairsStatic = array( # Tags that must be closed |
357 | | - 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1', |
| 357 | + 'a', 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1', |
358 | 358 | 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's', |
359 | 359 | 'strike', 'strong', 'tt', 'var', 'div', 'center', |
360 | 360 | 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre', |
— | — | @@ -605,6 +605,8 @@ |
606 | 606 | */ |
607 | 607 | static function validateAttributes( $attribs, $whitelist ) { |
608 | 608 | $whitelist = array_flip( $whitelist ); |
| 609 | + $hrefExp = '/^(' . wfUrlProtocols() . ')[^\s]+$/'; |
| 610 | + |
609 | 611 | $out = array(); |
610 | 612 | foreach( $attribs as $attribute => $value ) { |
611 | 613 | if( !isset( $whitelist[$attribute] ) ) { |
— | — | @@ -626,6 +628,23 @@ |
627 | 629 | $wgEnforceHtmlIds ? 'noninitial' : 'xml' ); |
628 | 630 | } |
629 | 631 | |
| 632 | + if ( $attribute === 'href' || $attribute === 'src' ) { |
| 633 | + if ( !preg_match( $hrefExp, $value ) ) { |
| 634 | + continue; //drop any href or src attributes not using an allowed protocol. |
| 635 | + //NOTE: this also drops all relative URLs |
| 636 | + } |
| 637 | + } |
| 638 | + |
| 639 | + //RDFa properties allow URIs. check them |
| 640 | + if ( $attribute === 'rel' || $attribute === 'rev' || |
| 641 | + $attribute === 'about' || $attribute === 'property' || $attribute === 'resource' || |
| 642 | + $attribute === 'datatype' || $attribute === 'typeof' ) { |
| 643 | + //Paranoia. Allow "simple" values but suppress javascript |
| 644 | + if ( preg_match( '/(^|\s)javascript\s*:/i', $value ) ) { |
| 645 | + continue; |
| 646 | + } |
| 647 | + } |
| 648 | + |
630 | 649 | // If this attribute was previously set, override it. |
631 | 650 | // Output should only have one attribute of each name. |
632 | 651 | $out[$attribute] = $value; |
— | — | @@ -1154,7 +1173,11 @@ |
1155 | 1174 | * @return Array |
1156 | 1175 | */ |
1157 | 1176 | static function setupAttributeWhitelist() { |
1158 | | - $common = array( 'id', 'class', 'lang', 'dir', 'title', 'style' ); |
| 1177 | + $common = array( 'id', 'class', 'lang', 'dir', 'title', 'style', |
| 1178 | + #RDFa attributes as specified in section 9 of http://www.w3.org/TR/2008/REC-rdfa-syntax-20081014 |
| 1179 | + 'about', 'property', 'resource', 'datatype', 'typeof', |
| 1180 | + ); |
| 1181 | + |
1159 | 1182 | $block = array_merge( $common, array( 'align' ) ); |
1160 | 1183 | $tablealign = array( 'align', 'char', 'charoff', 'valign' ); |
1161 | 1184 | $tablecell = array( 'abbr', |
— | — | @@ -1260,6 +1283,9 @@ |
1261 | 1284 | 'td' => array_merge( $common, $tablecell, $tablealign ), |
1262 | 1285 | 'th' => array_merge( $common, $tablecell, $tablealign ), |
1263 | 1286 | |
| 1287 | + # 12.2 |
| 1288 | + 'a' => array_merge( $common, array( 'href', 'rel', 'rev' ) ), # rel/rev esp. for RDFa |
| 1289 | + |
1264 | 1290 | # 13.2 |
1265 | 1291 | # Not usually allowed, but may be used for extension-style hooks |
1266 | 1292 | # such as <math> when it is rasterized |