r14543 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r14542‎ | r14543 | r14544 >
Date:00:01, 3 June 2006
Author:wegge
Status:old
Tags:
Comment:
Bug 6171: Sanitizing of HTML-elements with optional end tags.
Modified paths:
  • /trunk/phase3/RELEASE-NOTES (modified) (history)
  • /trunk/phase3/includes/Sanitizer.php (modified) (history)
  • /trunk/phase3/maintenance/parserTests.txt (modified) (history)

Diff [purge]

Index: trunk/phase3/maintenance/parserTests.txt
@@ -4293,6 +4293,49 @@
42944294
42954295 !!end
42964296
 4297+!! test
 4298+Parsing optional HTML elements (Bug 6171)
 4299+!! options
 4300+!! input
 4301+<table>
 4302+ <tr>
 4303+ <td> Some tabular data</td>
 4304+ <td> More tabular data ...
 4305+ <td> And yet som tabular data</td>
 4306+ </tr>
 4307+</table>
 4308+!! result
 4309+<table>
 4310+ <tr>
 4311+ <td> Some tabular data</td>
 4312+ <td> More tabular data ...</td>
 4313+ <td> And yet som tabular data</td>
 4314+ </tr>
 4315+</table>
 4316+
 4317+!! end
 4318+
 4319+!! test
 4320+Correct handling of <td>, <tr> (Bug 6171)
 4321+!! options
 4322+!! input
 4323+<table>
 4324+ <tr>
 4325+ <td> Some tabular data</td>
 4326+ <td> More tabular data ...</td>
 4327+ <td> And yet som tabular data</td>
 4328+ </tr>
 4329+</table>
 4330+!! result
 4331+<table>
 4332+ <tr>
 4333+ <td> Some tabular data</td>
 4334+ <td> More tabular data ...</td>
 4335+ <td> And yet som tabular data</td>
 4336+ </tr>
 4337+</table>
 4338+
 4339+!! end
42974340 #
42984341 #
42994342 #
Index: trunk/phase3/includes/Sanitizer.php
@@ -367,8 +367,8 @@
368368 $tabletags = array();
369369 }
370370
371 - $htmlsingle = array_merge( $tabletags, $htmlsingle );
372 - $htmlelements = array_merge( $htmlsingle, $htmlpairs );
 371+ $htmlsingleallowed = array_merge( $htmlsingle, $tabletags );
 372+ $htmlelements = array_merge( $htmlsingle, $htmlpairs, $htmlnest );
373373
374374 # Remove HTML comments
375375 $text = Sanitizer::removeHTMLcomments( $text );
@@ -391,10 +391,28 @@
392392 if( in_array( $t, $htmlsingleonly ) ) {
393393 $badtag = 1;
394394 } elseif ( ( $ot = @array_pop( $tagstack ) ) != $t ) {
395 - @array_push( $tagstack, $ot );
396 - # <li> can be nested in <ul> or <ol>, skip those cases:
397 - if(!(in_array($ot, $htmllist) && in_array($t, $listtags) )) {
398 - $badtag = 1;
 395+ if ( in_array($ot, $htmlsingleallowed) ) {
 396+ # Pop all elements with an optional close tag
 397+ # and see if we find a match below them
 398+ $optstack = array();
 399+ array_push ($optstack, $ot);
 400+ while ( ( ( $ot = @array_pop( $tagstack ) ) != $t ) &&
 401+ in_array($ot, $htmlsingleallowed) ) {
 402+ array_push ($optstack, $ot);
 403+ }
 404+ if ( $t != $ot ) {
 405+ # No match. Push the optinal elements back again
 406+ $badtag = 1;
 407+ while ( $ot = @array_pop( $optstack ) ) {
 408+ array_push( $tagstack, $ot );
 409+ }
 410+ }
 411+ } else {
 412+ @array_push( $tagstack, $ot );
 413+ # <li> can be nested in <ul> or <ol>, skip those cases:
 414+ if(!(in_array($ot, $htmllist) && in_array($t, $listtags) )) {
 415+ $badtag = 1;
 416+ }
399417 }
400418 } else {
401419 if ( $t == 'table' ) {
Index: trunk/phase3/RELEASE-NOTES
@@ -412,8 +412,10 @@
413413 * New message sp-newimages-showfrom replaces rclistfrom on special:newimages
414414 * Improve handling of ;: definition list construct with overlapping or
415415 nested HTML tags
 416+* (bug 6171) Fix sanitizing of HTML-elements with an optional closing
 417+ tag. The sanitizer still needs to learn how to make well-formed XML
 418+ in this case.
416419
417 -
418420 == Compatibility ==
419421
420422 MediaWiki 1.7 requires PHP 5 (5.1 recommended). PHP 4 is no longer supported.

Status & tagging log