r13441 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r13440‎ | r13441 | r13442 >
Date:08:36, 3 April 2006
Author:brion
Status:old
Tags:
Comment:
* Additional protections against HTML breakage in table parsing
Avoid splitting table cells on "||" appearing in HTML tag attributes.
Bad tag nesting is still possible, but this should keep things from
being split unexpectedly in the middle of an element, and might
avoid some potential injection points.
See http://mail.wikipedia.org/pipermail/wikitech-l/2006-April/034637.html
Modified paths:
  • /trunk/phase3/RELEASE-NOTES (modified) (history)
  • /trunk/phase3/includes/GlobalFunctions.php (modified) (history)
  • /trunk/phase3/includes/Parser.php (modified) (history)
  • /trunk/phase3/maintenance/parserTests.txt (modified) (history)

Diff [purge]

Index: trunk/phase3/maintenance/parserTests.txt
@@ -1035,6 +1035,23 @@
10361036 !! end
10371037
10381038
 1039+# FIXME: this one has incorrect tag nesting still.
 1040+!! test
 1041+Table security: embedded pipes (http://mail.wikipedia.org/pipermail/wikitech-l/2006-April/034637.html)
 1042+!! input
 1043+{|
 1044+| |[ftp://|x||]" onmouseover="alert(document.cookie)">test
 1045+!! result
 1046+<table>
 1047+<tr>
 1048+<td><a href="ftp://|x||" class='external free' title="ftp://|x||" rel="nofollow">ftp://|x</td><td></a>" onmouseover="alert(document.cookie)">test
 1049+</td>
 1050+</tr>
 1051+</table>
 1052+
 1053+!! end
 1054+
 1055+
10391056 ###
10401057 ### Internal links
10411058 ###
Index: trunk/phase3/includes/GlobalFunctions.php
@@ -1749,4 +1749,40 @@
17501750 $wgPostCommitUpdateList = array();
17511751 }
17521752
 1753+/**
 1754+ * More or less "markup-safe" explode()
 1755+ * Ignores any instances of the separator inside <...>
 1756+ * @param string $separator
 1757+ * @param string $text
 1758+ * @return array
 1759+ */
 1760+function wfExplodeMarkup( $separator, $text ) {
 1761+ $placeholder = "\x00";
 1762+
 1763+ // Just in case...
 1764+ $text = str_replace( $placeholder, '', $text );
 1765+
 1766+ // Trim stuff
 1767+ $replacer = new ReplacerCallback( $separator, $placeholder );
 1768+ $cleaned = preg_replace_callback( '/(<.*?>)/', array( $replacer, 'go' ), $text );
 1769+
 1770+ $items = explode( $separator, $cleaned );
 1771+ foreach( $items as $i => $str ) {
 1772+ $items[$i] = str_replace( $placeholder, $separator, $str );
 1773+ }
 1774+
 1775+ return $items;
 1776+}
 1777+
 1778+class ReplacerCallback {
 1779+ function ReplacerCallback( $from, $to ) {
 1780+ $this->from = $from;
 1781+ $this->to = $to;
 1782+ }
 1783+
 1784+ function go( $matches ) {
 1785+ return str_replace( $this->from, $this->to, $matches[1] );
 1786+ }
 1787+}
 1788+
17531789 ?>
Index: trunk/phase3/includes/Parser.php
@@ -783,7 +783,13 @@
784784 }
785785 $after = substr ( $x , 1 ) ;
786786 if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
787 - $after = explode ( '||' , $after ) ;
 787+
 788+ // Split up multiple cells on the same line.
 789+ // FIXME: This can result in improper nesting of tags processed
 790+ // by earlier parser steps, but should avoid splitting up eg
 791+ // attribute values containing literal "||".
 792+ $after = wfExplodeMarkup( '||', $after );
 793+
788794 $t[$k] = '' ;
789795
790796 # Loop through each table cell
Index: trunk/phase3/RELEASE-NOTES
@@ -733,6 +733,7 @@
734734 * (bug 5277) Use audio/midi rather that audio/mid
735735 * (bug 5410) Use namespace name when a custom namespace's nstab-NS message is nonexistent
736736 * (bug 5432) Fix inconsistencies in cookie names when using table prefixes
 737+* Additional protections against HTML breakage in table parsing
737738
738739
739740 === Caveats ===

Status & tagging log