r109278 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r109277‎ | r109278 | r109279 >
Date:01:42, 18 January 2012
Author:gwicke
Status:deferred
Tags:
Comment:
Add the start of a minimal sanitizer stage, that only strips IDN ignored
characters from host portions of links hrefs for now. This module needs to be
filled up with pretty much everything Sanitizer.php does, including tag and
attribute whitelists and attribute value sanitation (especially for style
attributes).

We'll also need to think about round-tripping of sanitized tokens.
Modified paths:
  • /trunk/extensions/VisualEditor/modules/parser/ext.core.Sanitizer.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.environment.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.js (modified) (history)

Diff [purge]

Index: trunk/extensions/VisualEditor/modules/parser/ext.core.Sanitizer.js
@@ -0,0 +1,82 @@
 2+/*
 3+ * General token sanitizer. Strips out (or encapsulates) unsafe and disallowed
 4+ * tag types and attributes. Should run last in the third, synchronous
 5+ * expansion stage. Tokens from extensions which should not be sanitized
 6+ * can bypass sanitation by setting their rank to 3.
 7+ *
 8+ * @author Gabriel Wicke <gwicke@wikimedia.org>
 9+ */
 10+
 11+// Include general utilities
 12+var Util = require('./ext.Util.js').Util,
 13+ u = new Util();
 14+
 15+
 16+function Sanitizer ( manager ) {
 17+ this.register( manager );
 18+}
 19+
 20+// constants
 21+Sanitizer.prototype.handledRank = 2.99;
 22+Sanitizer.prototype.anyRank = 2.9901;
 23+
 24+
 25+// Register this transformer with the TokenTransformer
 26+Sanitizer.prototype.register = function ( manager ) {
 27+ this.manager = manager;
 28+ manager.addTransform( this.onAnchor.bind(this), this.handledRank, 'tag', 'a' );
 29+};
 30+
 31+Sanitizer.prototype.onAnchor = function ( token ) {
 32+ // perform something similar to Sanitizer::cleanUrl
 33+ if ( token.type === 'ENDTAG' ) {
 34+ return { token: token };
 35+ }
 36+ var hrefKV = this.manager.env.lookupKV( token.attribs, 'href' );
 37+ if ( hrefKV !== null ) {
 38+ var bits = hrefKV[1].match( /(.*?\/\/)([^\/]+)(\/?.*)/ );
 39+ if ( bits ) {
 40+ proto = bits[1];
 41+ host = bits[2];
 42+ path = bits[3];
 43+ } else {
 44+ proto = '';
 45+ host = '';
 46+ path = hrefKV[1];
 47+ }
 48+ host = this._stripIDNs( host );
 49+ hrefKV[1] = proto + host + path;
 50+ }
 51+ return { token: token };
 52+};
 53+
 54+// XXX: We actually need to strip IDN ignored characters in the link text as
 55+// well, so that readers are not mislead. This should perhaps happen at an
 56+// earlier stage, while converting links to html.
 57+Sanitizer.prototype._IDNRegexp = new RegExp(
 58+ "[\t ]|" + // general whitespace
 59+ "\u00ad|" + // 00ad SOFT HYPHEN
 60+ "\u1806|" + // 1806 MONGOLIAN TODO SOFT HYPHEN
 61+ "\u200b|" + // 200b ZERO WIDTH SPACE
 62+ "\u2060|" + // 2060 WORD JOINER
 63+ "\ufeff|" + // feff ZERO WIDTH NO-BREAK SPACE
 64+ "\u034f|" + // 034f COMBINING GRAPHEME JOINER
 65+ "\u180b|" + // 180b MONGOLIAN FREE VARIATION SELECTOR ONE
 66+ "\u180c|" + // 180c MONGOLIAN FREE VARIATION SELECTOR TWO
 67+ "\u180d|" + // 180d MONGOLIAN FREE VARIATION SELECTOR THREE
 68+ "\u200c|" + // 200c ZERO WIDTH NON-JOINER
 69+ "\u200d|" + // 200d ZERO WIDTH JOINER
 70+ "[\ufe00-\ufe0f]" // fe00-fe00f VARIATION SELECTOR-1-16
 71+ , 'g'
 72+ );
 73+
 74+Sanitizer.prototype._stripIDNs = function ( host ) {
 75+ return host.replace( this._IDNRegexp, '' );
 76+};
 77+
 78+
 79+
 80+
 81+if (typeof module == "object") {
 82+ module.exports.Sanitizer = Sanitizer;
 83+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/ext.core.Sanitizer.js
___________________________________________________________________
Added: svn:eol-style
184 + native
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.js
@@ -18,6 +18,7 @@
1919 QuoteTransformer = require('./ext.core.QuoteTransformer.js').QuoteTransformer,
2020 PostExpandParagraphHandler = require('./ext.core.PostExpandParagraphHandler.js')
2121 .PostExpandParagraphHandler,
 22+ Sanitizer = require('./ext.core.Sanitizer.js').Sanitizer,
2223 TemplateHandler = require('./ext.core.TemplateHandler.js').TemplateHandler,
2324 Cite = require('./ext.Cite.js').Cite,
2425 FauxHTML5 = require('./mediawiki.HTML5TreeBuilder.node.js').FauxHTML5,
@@ -71,6 +72,7 @@
7273 // Add token transformations..
7374 new QuoteTransformer( this.tokenPostProcessor );
7475 new PostExpandParagraphHandler( this.tokenPostProcessor );
 76+ new Sanitizer( this.tokenPostProcessor );
7577
7678 //var citeExtension = new Cite( this.tokenTransformer );
7779
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.environment.js
@@ -14,12 +14,15 @@
1515 };
1616
1717 MWParserEnvironment.prototype.lookupKV = function ( kvs, key ) {
 18+ if ( ! kvs ) {
 19+ return null;
 20+ }
1821 var kv;
1922 for ( var i = 0, l = kvs.length; i < l; i++ ) {
2023 kv = kvs[i];
2124 if ( kv[0] === key ) {
2225 // found, return it.
23 - return kv[1];
 26+ return kv;
2427 }
2528 }
2629 // nothing found!

Status & tagging log