Index: trunk/extensions/VisualEditor/modules/parser/ext.core.Sanitizer.js |
— | — | @@ -0,0 +1,82 @@ |
| 2 | +/* |
| 3 | + * General token sanitizer. Strips out (or encapsulates) unsafe and disallowed |
| 4 | + * tag types and attributes. Should run last in the third, synchronous |
| 5 | + * expansion stage. Tokens from extensions which should not be sanitized |
| 6 | + * can bypass sanitation by setting their rank to 3. |
| 7 | + * |
| 8 | + * @author Gabriel Wicke <gwicke@wikimedia.org> |
| 9 | + */ |
| 10 | + |
| 11 | +// Include general utilities |
| 12 | +var Util = require('./ext.Util.js').Util, |
| 13 | + u = new Util(); |
| 14 | + |
| 15 | + |
| 16 | +function Sanitizer ( manager ) { |
| 17 | + this.register( manager ); |
| 18 | +} |
| 19 | + |
| 20 | +// constants |
| 21 | +Sanitizer.prototype.handledRank = 2.99; |
| 22 | +Sanitizer.prototype.anyRank = 2.9901; |
| 23 | + |
| 24 | + |
| 25 | +// Register this transformer with the TokenTransformer |
| 26 | +Sanitizer.prototype.register = function ( manager ) { |
| 27 | + this.manager = manager; |
| 28 | + manager.addTransform( this.onAnchor.bind(this), this.handledRank, 'tag', 'a' ); |
| 29 | +}; |
| 30 | + |
| 31 | +Sanitizer.prototype.onAnchor = function ( token ) { |
| 32 | + // perform something similar to Sanitizer::cleanUrl |
| 33 | + if ( token.type === 'ENDTAG' ) { |
| 34 | + return { token: token }; |
| 35 | + } |
| 36 | + var hrefKV = this.manager.env.lookupKV( token.attribs, 'href' ); |
| 37 | + if ( hrefKV !== null ) { |
| 38 | + var bits = hrefKV[1].match( /(.*?\/\/)([^\/]+)(\/?.*)/ ); |
| 39 | + if ( bits ) { |
| 40 | + proto = bits[1]; |
| 41 | + host = bits[2]; |
| 42 | + path = bits[3]; |
| 43 | + } else { |
| 44 | + proto = ''; |
| 45 | + host = ''; |
| 46 | + path = hrefKV[1]; |
| 47 | + } |
| 48 | + host = this._stripIDNs( host ); |
| 49 | + hrefKV[1] = proto + host + path; |
| 50 | + } |
| 51 | + return { token: token }; |
| 52 | +}; |
| 53 | + |
| 54 | +// XXX: We actually need to strip IDN ignored characters in the link text as |
| 55 | +// well, so that readers are not mislead. This should perhaps happen at an |
| 56 | +// earlier stage, while converting links to html. |
| 57 | +Sanitizer.prototype._IDNRegexp = new RegExp( |
| 58 | + "[\t ]|" + // general whitespace |
| 59 | + "\u00ad|" + // 00ad SOFT HYPHEN |
| 60 | + "\u1806|" + // 1806 MONGOLIAN TODO SOFT HYPHEN |
| 61 | + "\u200b|" + // 200b ZERO WIDTH SPACE |
| 62 | + "\u2060|" + // 2060 WORD JOINER |
| 63 | + "\ufeff|" + // feff ZERO WIDTH NO-BREAK SPACE |
| 64 | + "\u034f|" + // 034f COMBINING GRAPHEME JOINER |
| 65 | + "\u180b|" + // 180b MONGOLIAN FREE VARIATION SELECTOR ONE |
| 66 | + "\u180c|" + // 180c MONGOLIAN FREE VARIATION SELECTOR TWO |
| 67 | + "\u180d|" + // 180d MONGOLIAN FREE VARIATION SELECTOR THREE |
| 68 | + "\u200c|" + // 200c ZERO WIDTH NON-JOINER |
| 69 | + "\u200d|" + // 200d ZERO WIDTH JOINER |
| 70 | + "[\ufe00-\ufe0f]" // fe00-fe00f VARIATION SELECTOR-1-16 |
| 71 | + , 'g' |
| 72 | + ); |
| 73 | + |
| 74 | +Sanitizer.prototype._stripIDNs = function ( host ) { |
| 75 | + return host.replace( this._IDNRegexp, '' ); |
| 76 | +}; |
| 77 | + |
| 78 | + |
| 79 | + |
| 80 | + |
| 81 | +if (typeof module == "object") { |
| 82 | + module.exports.Sanitizer = Sanitizer; |
| 83 | +} |
Property changes on: trunk/extensions/VisualEditor/modules/parser/ext.core.Sanitizer.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 84 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.js |
— | — | @@ -18,6 +18,7 @@ |
19 | 19 | QuoteTransformer = require('./ext.core.QuoteTransformer.js').QuoteTransformer, |
20 | 20 | PostExpandParagraphHandler = require('./ext.core.PostExpandParagraphHandler.js') |
21 | 21 | .PostExpandParagraphHandler, |
| 22 | + Sanitizer = require('./ext.core.Sanitizer.js').Sanitizer, |
22 | 23 | TemplateHandler = require('./ext.core.TemplateHandler.js').TemplateHandler, |
23 | 24 | Cite = require('./ext.Cite.js').Cite, |
24 | 25 | FauxHTML5 = require('./mediawiki.HTML5TreeBuilder.node.js').FauxHTML5, |
— | — | @@ -71,6 +72,7 @@ |
72 | 73 | // Add token transformations.. |
73 | 74 | new QuoteTransformer( this.tokenPostProcessor ); |
74 | 75 | new PostExpandParagraphHandler( this.tokenPostProcessor ); |
| 76 | + new Sanitizer( this.tokenPostProcessor ); |
75 | 77 | |
76 | 78 | //var citeExtension = new Cite( this.tokenTransformer ); |
77 | 79 | |
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.environment.js |
— | — | @@ -14,12 +14,15 @@ |
15 | 15 | }; |
16 | 16 | |
17 | 17 | MWParserEnvironment.prototype.lookupKV = function ( kvs, key ) { |
| 18 | + if ( ! kvs ) { |
| 19 | + return null; |
| 20 | + } |
18 | 21 | var kv; |
19 | 22 | for ( var i = 0, l = kvs.length; i < l; i++ ) { |
20 | 23 | kv = kvs[i]; |
21 | 24 | if ( kv[0] === key ) { |
22 | 25 | // found, return it. |
23 | | - return kv[1]; |
| 26 | + return kv; |
24 | 27 | } |
25 | 28 | } |
26 | 29 | // nothing found! |