r14689 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r14688‎ | r14689 | r14690 >
Date:21:21, 9 June 2006
Author:brion
Status:old
Tags:
Comment:
* Whitespace now normalized more or less properly in HTML attributes
Modified paths:
  • /trunk/phase3/RELEASE-NOTES (modified) (history)
  • /trunk/phase3/includes/Sanitizer.php (modified) (history)
  • /trunk/phase3/maintenance/parserTests.txt (modified) (history)

Diff [purge]

Index: trunk/phase3/maintenance/parserTests.txt
@@ -3497,7 +3497,7 @@
34983498 !! input
34993499 {{div style|" ><script>alert(document.cookie)</script>}}
35003500 !! result
3501 -<div style="float: right; ">Magic div</div>
 3501+<div style="float: right;">Magic div</div>
35023502
35033503 !! end
35043504
@@ -3668,6 +3668,42 @@
36693669
36703670 !! end
36713671
 3672+
 3673+!! article
 3674+Template:Identity
 3675+!! text
 3676+{{{1}}}
 3677+!! endarticle
 3678+
 3679+!! test
 3680+Expansion of multi-line templates in attribute values (bug 6255)
 3681+!! input
 3682+<div style="background: {{identity|#00FF00}}">-</div>
 3683+!! result
 3684+<div style="background: #00FF00">-</div>
 3685+
 3686+!! end
 3687+
 3688+
 3689+!! test
 3690+Expansion of multi-line templates in attribute values (bug 6255 sanity check)
 3691+!! input
 3692+<div style="background:
 3693+#00FF00">-</div>
 3694+!! result
 3695+<div style="background: #00FF00">-</div>
 3696+
 3697+!! end
 3698+
 3699+!! test
 3700+Expansion of multi-line templates in attribute values (bug 6255 sanity check)
 3701+!! input
 3702+<div style="background: &#10;#00FF00">-</div>
 3703+!! result
 3704+<div style="background: &#10;#00FF00">-</div>
 3705+
 3706+!! end
 3707+
36723708 ###
36733709 ### Parser hooks (see maintenance/parserTestsParserHook.php for the <tag> extension)
36743710 ###
@@ -4290,7 +4326,7 @@
42914327 <table>
42924328
42934329 <u class="&#124;">} &gt;
4294 -<br style="onmouseover='alert(document.cookie);' " />
 4330+<br style="onmouseover='alert(document.cookie);'" />
42954331
42964332 MOVE YOUR MOUSE CURSOR OVER THIS TEXT
42974333 <tr>
Index: trunk/phase3/includes/Sanitizer.php
@@ -618,36 +618,67 @@
619619 $attribs = array();
620620 foreach( $stripped as $attribute => $value ) {
621621 $encAttribute = htmlspecialchars( $attribute );
 622+ $encValue = Sanitizer::safeEncodeAttribute( $value );
622623
623 - $encValue = htmlspecialchars( $value );
624 - # Templates and links may be expanded in later parsing,
625 - # creating invalid or dangerous output. Suppress this.
626 - $encValue = strtr( $encValue, array(
627 - '<' => '&lt;', // This should never happen,
628 - '>' => '&gt;', // we've received invalid input
629 - '"' => '&quot;', // which should have been escaped.
630 - '{' => '&#123;',
631 - '[' => '&#91;',
632 - "''" => '&#39;&#39;',
633 - 'ISBN' => '&#73;SBN',
634 - 'RFC' => '&#82;FC',
635 - 'PMID' => '&#80;MID',
636 - '|' => '&#124;',
637 - '__' => '&#95;_',
638 - ) );
639 -
640 - # Stupid hack
641 - $encValue = preg_replace_callback(
642 - '/(' . wfUrlProtocols() . ')/',
643 - array( 'Sanitizer', 'armorLinksCallback' ),
644 - $encValue );
645 -
646624 $attribs[] = "$encAttribute=\"$encValue\"";
647625 }
648626 return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : '';
649627 }
650628
651629 /**
 630+ * Encode an attribute value for HTML output.
 631+ * @param $text
 632+ * @return HTML-encoded text fragment
 633+ */
 634+ function encodeAttribute( $text ) {
 635+ $encValue = htmlspecialchars( $text );
 636+
 637+ // Whitespace is normalized during attribute decoding,
 638+ // so if we've been passed non-spaces we must encode them
 639+ // ahead of time or they won't be preserved.
 640+ $encValue = strtr( $encValue, array(
 641+ "\n" => '&#10;',
 642+ "\r" => '&#13;',
 643+ "\t" => '&#9;',
 644+ ) );
 645+
 646+ return $encValue;
 647+ }
 648+
 649+ /**
 650+ * Encode an attribute value for HTML tags, with extra armoring
 651+ * against further wiki processing.
 652+ * @param $text
 653+ * @return HTML-encoded text fragment
 654+ */
 655+ function safeEncodeAttribute( $text ) {
 656+ $encValue = Sanitizer::encodeAttribute( $text );
 657+
 658+ # Templates and links may be expanded in later parsing,
 659+ # creating invalid or dangerous output. Suppress this.
 660+ $encValue = strtr( $encValue, array(
 661+ '<' => '&lt;', // This should never happen,
 662+ '>' => '&gt;', // we've received invalid input
 663+ '"' => '&quot;', // which should have been escaped.
 664+ '{' => '&#123;',
 665+ '[' => '&#91;',
 666+ "''" => '&#39;&#39;',
 667+ 'ISBN' => '&#73;SBN',
 668+ 'RFC' => '&#82;FC',
 669+ 'PMID' => '&#80;MID',
 670+ '|' => '&#124;',
 671+ '__' => '&#95;_',
 672+ ) );
 673+
 674+ # Stupid hack
 675+ $encValue = preg_replace_callback(
 676+ '/(' . wfUrlProtocols() . ')/',
 677+ array( 'Sanitizer', 'armorLinksCallback' ),
 678+ $encValue );
 679+ return $encValue;
 680+ }
 681+
 682+ /**
652683 * Given a value escape it so that it can be used in an id attribute and
653684 * return it, this does not validate the value however (see first link)
654685 *
@@ -711,6 +742,12 @@
712743 foreach( $pairs as $set ) {
713744 $attribute = strtolower( $set[1] );
714745 $value = Sanitizer::getTagAttributeCallback( $set );
 746+
 747+ // Normalize whitespace
 748+ $value = preg_replace( '/[\t\r\n ]+/', ' ', $value );
 749+ $value = trim( $value );
 750+
 751+ // Decode character references
715752 $attribs[$attribute] = Sanitizer::decodeCharReferences( $value );
716753 }
717754 return $attribs;
Index: trunk/phase3/RELEASE-NOTES
@@ -467,7 +467,10 @@
468468 * (bug 2069) Merge the LanguageUtf8 class into the Language class
469469 * Update to Yiddish localization (yi)
470470 * (bug 6254) Update to Indonesian translation (id) #20
 471+* (bug 6255) Fix transclusions starting with "#" or "*" in HTML attributes
 472+* Whitespace now normalized more or less properly in HTML attributes
471473
 474+
472475 == Compatibility ==
473476
474477 MediaWiki 1.7 requires PHP 5 (5.1 recommended). PHP 4 is no longer supported.

Status & tagging log