Index: branches/hashar/includes/Sanitizer.php |
— | — | @@ -328,7 +328,7 @@ |
329 | 329 | * @return string |
330 | 330 | */ |
331 | 331 | function removeHTMLtags( $text, $processCallback = null, $args = array() ) { |
332 | | - global $wgUseTidy, $wgUserHtml; |
| 332 | + global $wgUseTidy, $wgUserHtml, $wgDebugSanitizer; |
333 | 333 | $fname = 'Parser::removeHTMLtags'; |
334 | 334 | wfProfileIn( $fname ); |
335 | 335 | |
— | — | @@ -375,8 +375,10 @@ |
376 | 376 | $bits = explode( '<', $text ); |
377 | 377 | $text = array_shift( $bits ); |
378 | 378 | if(!$wgUseTidy) { |
| 379 | + if($wgDebugSanitizer) { wfDebug("\nSanitizer: BEGIN removeHTMLtags without tidy\n\n"); } |
379 | 380 | $tagstack = array(); $tablestack = array(); |
380 | 381 | foreach ( $bits as $x ) { |
| 382 | + if($wgDebugSanitizer) { wfDebug("Sanitizer: NEW BIT: '$x'\n"); } |
381 | 383 | $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) ); |
382 | 384 | preg_match( '/^(\\/?)(\\w+)([^>]*?)(\\/{0,1}>)([^<]*)$/', |
383 | 385 | $x, $regs ); |
— | — | @@ -387,13 +389,17 @@ |
388 | 390 | if ( in_array( $t = strtolower( $t ), $htmlelements ) ) { |
389 | 391 | # Check our stack |
390 | 392 | if ( $slash ) { |
| 393 | + if($wgDebugSanitizer) { wfDebug("Sanitizer: slash: $t\n"); } |
391 | 394 | # Closing a tag... |
392 | 395 | if( in_array( $t, $htmlsingleonly ) ) { |
| 396 | + if($wgDebugSanitizer) { wfDebug("Sanitizer: htmlsingleonly: $t\n"); } |
393 | 397 | $badtag = 1; |
394 | 398 | } elseif ( ( $ot = @array_pop( $tagstack ) ) != $t ) { |
| 399 | + if($wgDebugSanitizer) { wfDebug("Sanitizer: diff: $t != $ot\n"); } |
395 | 400 | @array_push( $tagstack, $ot ); |
396 | 401 | # <li> can be nested in <ul> or <ol>, skip those cases: |
397 | 402 | if(!(in_array($ot, $htmllist) && in_array($t, $listtags) )) { |
| 403 | + if($wgDebugSanitizer) { wfDebug("Sanitizer: $t FUN => badtag\n"); } |
398 | 404 | $badtag = 1; |
399 | 405 | } |
400 | 406 | } else { |
— | — | @@ -403,21 +409,27 @@ |
404 | 410 | $newparams = ''; |
405 | 411 | } |
406 | 412 | } else { |
| 413 | + if($wgDebugSanitizer) { wfDebug("Sanitizer: $t NO slash\n"); } |
407 | 414 | # Keep track for later |
408 | 415 | if ( in_array( $t, $tabletags ) && |
409 | 416 | ! in_array( 'table', $tagstack ) ) { |
| 417 | + if($wgDebugSanitizer) { wfDebug("Sanitizer: $t out of table? => badtag\n"); } |
410 | 418 | $badtag = 1; |
411 | 419 | } else if ( in_array( $t, $tagstack ) && |
412 | 420 | ! in_array ( $t , $htmlnest ) ) { |
| 421 | + if($wgDebugSanitizer) { wfDebug("Sanitizer: $t not a nest => badtag\n"); } |
413 | 422 | $badtag = 1 ; |
414 | 423 | # Is it a self closed htmlpair ? (bug 5487) |
415 | 424 | } else if( $brace == '/>' && |
416 | 425 | in_array($t, $htmlpairs) ) { |
| 426 | + if($wgDebugSanitizer) { wfDebug("Sanitizer: $t not a self closed pair => badtag\n"); } |
417 | 427 | $badtag = 1; |
418 | 428 | } elseif( in_array( $t, $htmlsingleonly ) ) { |
| 429 | + if($wgDebugSanitizer) { wfDebug("Sanitizer: $t htmlsingleonly close forced\n"); } |
419 | 430 | # Hack to force empty tag for uncloseable elements |
420 | 431 | $brace = '/>'; |
421 | 432 | } else if( in_array( $t, $htmlsingle ) ) { |
| 433 | + if($wgDebugSanitizer) { wfDebug("Sanitizer: $t htmlsingle noclose forced\n"); } |
422 | 434 | # Hack to not close $htmlsingle tags |
423 | 435 | $brace = NULL; |
424 | 436 | } else { |
— | — | @@ -440,16 +452,28 @@ |
441 | 453 | if ( ! $badtag ) { |
442 | 454 | $rest = str_replace( '>', '>', $rest ); |
443 | 455 | $close = ( $brace == '/>' ) ? ' /' : ''; |
444 | | - $text .= "<$slash$t$newparams$close>$rest"; |
| 456 | + $toadd = "<$slash$t$newparams$close>$rest"; |
| 457 | + if($wgDebugSanitizer) { wfDebug("Sanitizer: RESULT: '$toadd'\n"); } |
| 458 | + $text .= $toadd; |
445 | 459 | continue; |
446 | 460 | } |
447 | 461 | } |
| 462 | + if($wgDebugSanitizer) { wfDebug("Sanitizer: RESULT: escaping '$x'\n"); } |
448 | 463 | $text .= '<' . str_replace( '>', '>', $x); |
449 | 464 | } |
450 | | - # Close off any remaining tags |
451 | | - while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) { |
452 | | - $text .= "</$t>\n"; |
453 | | - if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); } |
| 465 | + |
| 466 | + if( is_array( $tagstack ) ) { |
| 467 | + if($wgDebugSanitizer) { wfDebug("Sanitizer: start closing remaining tags:\n");} |
| 468 | + # Close off any remaining tags |
| 469 | + while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) { |
| 470 | + if($wgDebugSanitizer) { wfDebug("Sanitizer: closing $t\n");} |
| 471 | + $text .= "</$t>\n"; |
| 472 | + if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); } |
| 473 | + } |
| 474 | + if($wgDebugSanitizer) { |
| 475 | + wfDebug("Sanitizer: closed all remainingtags.\n"); |
| 476 | + wfDebug("\nSanitizer: END OF removeHTMLtags without tidy\n"); |
| 477 | + } |
454 | 478 | } |
455 | 479 | } else { |
456 | 480 | # this might be possible using tidy itself |
Index: branches/hashar/includes/DefaultSettings.php |
— | — | @@ -703,6 +703,7 @@ |
704 | 704 | $wgDebugRedirects = false; |
705 | 705 | $wgDebugRawPage = false; # Avoid overlapping debug entries by leaving out CSS |
706 | 706 | |
| 707 | +$wgDebugSanitizer = false; |
707 | 708 | $wgDebugComments = false; |
708 | 709 | $wgReadOnly = null; |
709 | 710 | $wgLogQueries = false; |