r14624 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r14623‎ | r14624 | r14625 >
Date:22:56, 6 June 2006
Author:brion
Status:old
Tags:
Comment:
* (bug 3202) Attributes now allowed on <pre> tags
* Sanitizer::validateTagAttributes now available to discard illegal/unsafe
attribute values from an array.
Modified paths:
  • /trunk/phase3/RELEASE-NOTES (modified) (history)
  • /trunk/phase3/includes/Parser.php (modified) (history)
  • /trunk/phase3/includes/Sanitizer.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/Parser.php
@@ -132,6 +132,7 @@
133133 $this->mTagHooks = array();
134134 $this->mFunctionHooks = array();
135135 $this->clearState();
 136+ $this->setHook( 'pre', array( $this, 'renderPreTag' ) );
136137 }
137138
138139 /**
@@ -422,7 +423,7 @@
423424 $commentState = array();
424425
425426 $elements = array_merge(
426 - array( 'nowiki', 'pre', 'gallery' ),
 427+ array( 'nowiki', 'gallery' ),
427428 array_keys( $this->mTagHooks ) );
428429 global $wgRawHtml;
429430 if( $wgRawHtml ) {
@@ -463,11 +464,6 @@
464465 case 'math':
465466 $output = MathRenderer::renderMath( $content );
466467 break;
467 - case 'pre':
468 - // Backwards-compatibility hack
469 - $content = preg_replace( '!<nowiki>(.*?)</nowiki>!is', '\\1', $content );
470 - $output = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
471 - break;
472468 case 'gallery':
473469 $output = $this->renderImageGallery( $content );
474470 break;
@@ -4032,6 +4028,19 @@
40334029 }
40344030
40354031 /**
 4032+ * Tag hook handler for 'pre'.
 4033+ */
 4034+ function renderPreTag( $text, $attribs, $parser ) {
 4035+ // Backwards-compatibility hack
 4036+ $content = preg_replace( '!<nowiki>(.*?)</nowiki>!is', '\\1', $text );
 4037+
 4038+ $attribs = Sanitizer::validateTagAttributes( $attribs, 'pre' );
 4039+ return wfOpenElement( 'pre', $attribs ) .
 4040+ wfEscapeHTMLTagsOnly( $content ) .
 4041+ '</pre>';
 4042+ }
 4043+
 4044+ /**
40364045 * Renders an image gallery from a text with one line per image.
40374046 * text labels may be given by using |-style alternative text. E.g.
40384047 * Image:one.jpg|The number "1"
Index: trunk/phase3/includes/Sanitizer.php
@@ -538,53 +538,26 @@
539539 }
540540
541541 /**
542 - * Take a tag soup fragment listing an HTML element's attributes
543 - * and normalize it to well-formed XML, discarding unwanted attributes.
 542+ * Take an array of attribute names and values and normalize or discard
 543+ * illegal values for the given element type.
544544 *
545 - * - Normalizes attribute names to lowercase
546545 * - Discards attributes not on a whitelist for the given element
547 - * - Turns broken or invalid entities into plaintext
548 - * - Double-quotes all attribute values
549 - * - Attributes without values are given the name as attribute
550 - * - Double attributes are discarded
551546 * - Unsafe style attributes are discarded
552 - * - Prepends space if there are attributes.
553547 *
554 - * @param string $text
 548+ * @param array $attribs
555549 * @param string $element
556 - * @return string
 550+ * @return array
557551 *
558552 * @todo Check for legal values where the DTD limits things.
559553 * @todo Check for unique id attribute :P
560554 */
561 - function fixTagAttributes( $text, $element ) {
562 - if( trim( $text ) == '' ) {
563 - return '';
564 - }
565 -
566 - # Unquoted attribute
567 - # Since we quote this later, this can be anything distinguishable
568 - # from the end of the attribute
569 - $pairs = array();
570 - if( !preg_match_all(
571 - MW_ATTRIBS_REGEX,
572 - $text,
573 - $pairs,
574 - PREG_SET_ORDER ) ) {
575 - return '';
576 - }
577 -
 555+ function validateTagAttributes( $attribs, $element ) {
578556 $whitelist = array_flip( Sanitizer::attributeWhitelist( $element ) );
579 - $attribs = array();
580 - foreach( $pairs as $set ) {
581 - $attribute = strtolower( $set[1] );
 557+ $out = array();
 558+ foreach( $attribs as $attribute => $value ) {
582559 if( !isset( $whitelist[$attribute] ) ) {
583560 continue;
584561 }
585 -
586 - $raw = Sanitizer::getTagAttributeCallback( $set );
587 - $value = Sanitizer::normalizeAttributeValue( $raw );
588 -
589562 # Strip javascript "expression" from stylesheets.
590563 # http://msdn.microsoft.com/workshop/author/dhtml/overview/recalc.asp
591564 if( $attribute == 'style' ) {
@@ -592,7 +565,7 @@
593566
594567 // Remove any comments; IE gets token splitting wrong
595568 $stripped = preg_replace( '!/\\*.*?\\*/!S', ' ', $stripped );
596 - $value = htmlspecialchars( $stripped );
 569+ $value = $stripped;
597570
598571 // ... and continue checks
599572 $stripped = preg_replace( '!\\\\([0-9A-Fa-f]{1,6})[ \\n\\r\\t\\f]?!e',
@@ -608,9 +581,48 @@
609582 if ( $attribute === 'id' )
610583 $value = Sanitizer::escapeId( $value );
611584
 585+ // If this attribute was previously set, override it.
 586+ // Output should only have one attribute of each name.
 587+ $out[$attribute] = $value;
 588+ }
 589+ return $out;
 590+ }
 591+
 592+ /**
 593+ * Take a tag soup fragment listing an HTML element's attributes
 594+ * and normalize it to well-formed XML, discarding unwanted attributes.
 595+ * Output is safe for further wikitext processing, with escaping of
 596+ * values that could trigger problems.
 597+ *
 598+ * - Normalizes attribute names to lowercase
 599+ * - Discards attributes not on a whitelist for the given element
 600+ * - Turns broken or invalid entities into plaintext
 601+ * - Double-quotes all attribute values
 602+ * - Attributes without values are given the name as attribute
 603+ * - Double attributes are discarded
 604+ * - Unsafe style attributes are discarded
 605+ * - Prepends space if there are attributes.
 606+ *
 607+ * @param string $text
 608+ * @param string $element
 609+ * @return string
 610+ */
 611+ function fixTagAttributes( $text, $element ) {
 612+ if( trim( $text ) == '' ) {
 613+ return '';
 614+ }
 615+
 616+ $stripped = Sanitizer::validateTagAttributes(
 617+ Sanitizer::decodeTagAttributes( $text ), $element );
 618+
 619+ $attribs = array();
 620+ foreach( $stripped as $attribute => $value ) {
 621+ $encAttribute = htmlspecialchars( $attribute );
 622+
 623+ $encValue = htmlspecialchars( $value );
612624 # Templates and links may be expanded in later parsing,
613625 # creating invalid or dangerous output. Suppress this.
614 - $value = strtr( $value, array(
 626+ $encValue = strtr( $encValue, array(
615627 '<' => '&lt;', // This should never happen,
616628 '>' => '&gt;', // we've received invalid input
617629 '"' => '&quot;', // which should have been escaped.
@@ -625,16 +637,13 @@
626638 ) );
627639
628640 # Stupid hack
629 - $value = preg_replace_callback(
 641+ $encValue = preg_replace_callback(
630642 '/(' . wfUrlProtocols() . ')/',
631643 array( 'Sanitizer', 'armorLinksCallback' ),
632 - $value );
633 -
634 - // If this attribute was previously set, override it.
635 - // Output should only have one attribute of each name.
636 - $attribs[$attribute] = "$attribute=\"$value\"";
 644+ $encValue );
 645+
 646+ $attribs[] = "$encAttribute=\"$encValue\"";
637647 }
638 -
639648 return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : '';
640649 }
641650
Index: trunk/phase3/RELEASE-NOTES
@@ -443,6 +443,9 @@
444444 * (bug 6175) Improvement to German translation (de)
445445 * Redirect Special:Logs to Special:Log
446446 * (bug 6206) Linktrail for Swedish localization (se)
 447+* (bug 3202) Attributes now allowed on <pre> tags
 448+* Sanitizer::validateTagAttributes now available to discard illegal/unsafe
 449+ attribute values from an array.
447450
448451
449452 == Compatibility ==

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r14623Parser tests for <pre> tag with attributes (bug 3202)brion22:50, 6 June 2006

Status & tagging log