Index: trunk/extensions/VisualEditor/tests/parser/parserTests-whitelist.js |
— | — | @@ -46,36 +46,24 @@ |
47 | 47 | |
48 | 48 | /* Missing token transform functionality */ |
49 | 49 | |
50 | | -// We don't implement percent encoding for URIs yet. |
51 | | -testWhiteList["Link containing double-single-quotes '' (bug 4598)"] = "<p><a data-mw-type=\"internal\" href=\"/wiki/Lista d''e paise d''o munno\">Lista d''e paise d''o munno</a></p>"; |
| 50 | +// Single quotes are legal in HTML5 URIs. See |
| 51 | +// http://www.whatwg.org/specs/web-apps/current-work/multipage/urls.html#url-manipulation-and-creation |
| 52 | +testWhiteList["Link containing double-single-quotes '' (bug 4598)"] = "<p><a href=\"/wiki/Lista_d''e_paise_d''o_munno\" data-mw-type=\"internal\">Lista d''e paise d''o munno</a></p>"; |
52 | 53 | |
53 | | -testWhiteList["Link containing \"<#\" and \">#\" as a hex sequences"] = "<p><a data-mw-type=\"internal\" href=\"/wiki/<%23\"><%23</a><a data-mw-type=\"internal\" href=\"/wiki/>%23\">>%23</a></p>"; |
54 | 54 | |
55 | | - |
56 | 55 | // Sanitizer |
57 | 56 | testWhiteList["Invalid attributes in table cell (bug 1830)"] = "<table><tbody><tr><td Cell:=\"\">broken</td></tr></tbody></table>"; |
58 | 57 | testWhiteList["Table security: embedded pipes (http://lists.wikimedia.org/mailman/htdig/wikitech-l/2006-April/022293.html)"] = "<table><tbody><tr><td> |<a href=\"ftp://|x||\">[1]</a>\" onmouseover=\"alert(document.cookie)\">test</td></tr></tbody></table>"; |
59 | 58 | |
60 | | -// Sanitizer, but UTF8 in link might actually be ok in HTML5 |
| 59 | +// Sanitizer, but UTF8 in link is ok in HTML5 |
61 | 60 | testWhiteList["External link containing double-single-quotes with no space separating the url from text in italics"] = "<p><a href=\"http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm\" data-mw-type=\"external\" data-mw-rt=\"{"sourcePos":[0,146]}\"><i>La muerte de Casagemas</i> (1901) en el sitio de </a><a href=\"/wiki/Museo_Picasso_(París)\" data-mw-type=\"internal\">Museo Picasso</a>.</p>"; |
62 | 61 | |
63 | | -// plain percent sign is also valid in HTML5 |
64 | | -testWhiteList["Bug 4781, 5267: %28, %29 in URL"] = "<p><a href=\"http://www.example.com/?title=Ben-Hur_(1959_film)\" data-mw-sourcePos=\"0:53\">http://www.example.com/?title=Ben-Hur_(1959_film)</a></p>"; |
65 | | - |
66 | 62 | testWhiteList["External links: wiki links within external link (Bug 3695)"] = "<p><a href=\"http://example.com\" data-mw-type=\"external\" data-mw-sourcePos=\"0:54\"></a><a data-mw-type=\"internal\" href=\"/wiki/Wikilink\">wikilink</a> embedded in ext link</p>"; |
67 | 63 | |
68 | | -testWhiteList["Bug 4781, 5267: %25 in URL"] = "<p><a href=\"http://www.example.com/?title=100%_Bran\" data-mw-sourcePos=\"0:41\">http://www.example.com/?title=100%_Bran</a></p>"; |
69 | | - |
70 | 64 | testWhiteList["<pre> with forbidden attribute values (bug 3202)"] = "<pre width=\"8\" style=\"\">Narrow screen goodies</pre>"; |
71 | 65 | |
72 | | -testWhiteList["Link containing % (not as a hex sequence)"] = "<p><a href=\"/wiki/7%_Solution\" data-mw-type=\"internal\">7% Solution</a></p>"; |
| 66 | +//testWhiteList["Piped link to URL"] = "<p>Piped link to URL: [<a href=\"http://www.example.com|an\" data-mw-type=\"external\">example URL</a>]</p>"; |
73 | 67 | |
74 | | -testWhiteList["Link containing % as a single hex sequence interpreted to char"] = "<p><a href=\"/wiki/7%_Solution\" data-mw-type=\"internal\">7% Solution</a></p>"; |
75 | | - |
76 | | -testWhiteList["Link containing double-single-quotes '' (bug 4598)"] = "<p><a href=\"/wiki/Lista_d''e_paise_d''o_munno\" data-mw-type=\"internal\">Lista d''e paise d''o munno</a></p>"; |
77 | | - |
78 | | -testWhiteList["Brackets in urls"] = "<p><a href=\"http://example.com/index.php?foozoid[]=bar\">http://example.com/index.php?foozoid[]=bar</a></p><p><a href=\"http://example.com/index.php?foozoid[]=bar\">http://example.com/index.php?foozoid[]=bar</a></p>"; |
79 | | - |
80 | 68 | if (typeof module == "object") { |
81 | 69 | module.exports.testWhiteList = testWhiteList; |
82 | 70 | } |
Index: trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt |
— | — | @@ -645,7 +645,7 @@ |
646 | 646 | ) |
647 | 647 | / s:[.:,] !(space / eolf) { return s } |
648 | 648 | / htmlentity |
649 | | - / urlencoded_char |
| 649 | + /// urlencoded_char |
650 | 650 | / [&%] )+ |
651 | 651 | { |
652 | 652 | return proto + addr + rest.join(''); |
— | — | @@ -1674,17 +1674,19 @@ |
1675 | 1675 | |
1676 | 1676 | |
1677 | 1677 | wikilink_preprocessor_text |
1678 | | - = r:( t:[^%<~[{\n\r\t|!\]} &=]+ { return t.join(''); } |
1679 | | - / urlencoded_char |
| 1678 | + = r:( t:[^<~[{\n\r\t|!\]} &=]+ { return t.join(''); } |
| 1679 | + /// urlencoded_char |
1680 | 1680 | / directive |
1681 | 1681 | / !inline_breaks !"|" !"]]" text_char )+ { |
1682 | 1682 | return flatten_stringlist ( r ); |
1683 | 1683 | } |
1684 | 1684 | |
1685 | 1685 | extlink_preprocessor_text |
| 1686 | + // added special separator character class inline: separates url from |
| 1687 | + // description / text |
1686 | 1688 | = r:( t:[^'<~[{\n\r|!\]}\t&="' \u00A0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]+ { return t.join(''); } |
1687 | 1689 | / directive |
1688 | | - / urlencoded_char |
| 1690 | + /// urlencoded_char |
1689 | 1691 | / !inline_breaks no_punctuation_char |
1690 | 1692 | / s:[.:,] !(space / eolf) { return s } |
1691 | 1693 | / [&%] )+ { |
Index: trunk/extensions/VisualEditor/modules/parser/ext.core.LinkHandler.js |
— | — | @@ -54,7 +54,7 @@ |
55 | 55 | } |
56 | 56 | content = out; |
57 | 57 | } else { |
58 | | - content = href; |
| 58 | + content = [ env.decodeURI( env.tokensToString( href ) ) ]; |
59 | 59 | } |
60 | 60 | if ( tail ) { |
61 | 61 | content.push( tail ); |
— | — | @@ -105,19 +105,19 @@ |
106 | 106 | |
107 | 107 | var content = token.attribs.slice(1, -1); |
108 | 108 | |
109 | | - // XXX: get /wiki from config! |
| 109 | + // TODO: get /wiki from config! |
110 | 110 | var a = new TagTk( 'a', [ new KV( 'href', '/wiki' + title.makeLink() ) ] ); |
111 | 111 | a.dataAttribs = token.dataAttribs; |
112 | 112 | |
113 | 113 | var MD5 = new jshashes.MD5(), |
114 | 114 | hash = MD5.hex( title.key ), |
115 | | - // XXX: Hackhack.. |
| 115 | + // TODO: Hackhack.. Move to proper test harness setup! |
116 | 116 | path = 'http://example.com/images/' + |
117 | 117 | [ hash[0], hash.substr(0, 2) ].join('/') + '/' + title.key; |
118 | 118 | |
119 | 119 | |
120 | 120 | |
121 | | - // XXX: extract options |
| 121 | + // extract options |
122 | 122 | var options = [], |
123 | 123 | caption = null; |
124 | 124 | for( var i = 0, l = content.length; i<l; i++ ) { |
— | — | @@ -132,7 +132,7 @@ |
133 | 133 | } |
134 | 134 | } else { |
135 | 135 | var bits = oText[0].split( '=', 2 ); |
136 | | - if ( bits.length > 1 && this._prefixImageOptions[ bits[0].strip ] ) { |
| 136 | + if ( bits.length > 1 && this._prefixImageOptions[ bits[0].trim() ] ) { |
137 | 137 | console.log('handle prefix ' + bits ); |
138 | 138 | } else { |
139 | 139 | caption = oContent; |
— | — | @@ -217,7 +217,9 @@ |
218 | 218 | }; |
219 | 219 | |
220 | 220 | ExternalLinkHandler.prototype.onUrlLink = function ( token, manager, cb ) { |
221 | | - var href = this.manager.env.lookupKV( token.attribs, 'href' ).v; |
| 221 | + var href = this.manager.env.sanitizeURI( |
| 222 | + this.manager.env.lookupKV( token.attribs, 'href' ).v |
| 223 | + ); |
222 | 224 | if ( this._isImageLink( href ) ) { |
223 | 225 | return { token: new SelfclosingTagTk( 'img', |
224 | 226 | [ |
— | — | @@ -241,6 +243,8 @@ |
242 | 244 | ExternalLinkHandler.prototype.onExtLink = function ( token, manager, cb ) { |
243 | 245 | var href = this.manager.env.lookupKV( token.attribs, 'href' ).v, |
244 | 246 | content= this.manager.env.lookupKV( token.attribs, 'content' ).v; |
| 247 | + href = this.manager.env.sanitizeURI( href ); |
| 248 | + console.warn('extlink href: ' + href ); |
245 | 249 | //console.warn( 'content: ' + JSON.stringify( content, null, 2 ) ); |
246 | 250 | // validate the href |
247 | 251 | if ( this.imageParser.parseURL( href ) ) { |
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.Title.js |
— | — | @@ -10,7 +10,7 @@ |
11 | 11 | Title.prototype.makeLink = function () { |
12 | 12 | // XXX: links always point to the canonical namespace name. |
13 | 13 | if ( false && this.nskey ) { |
14 | | - return this.env.wgScriptPath + this.nskey + ':' + this.key; |
| 14 | + return this.env.sanitizeURI( this.env.wgScriptPath + this.nskey + ':' + this.key ); |
15 | 15 | } else { |
16 | 16 | var l = this.env.wgScriptPath, |
17 | 17 | ns = this.ns.getDefaultName(); |
— | — | @@ -18,7 +18,7 @@ |
19 | 19 | if ( ns ) { |
20 | 20 | l += ns + ':'; |
21 | 21 | } |
22 | | - return l + this.key; |
| 22 | + return this.env.sanitizeURI( l + this.key ); |
23 | 23 | } |
24 | 24 | }; |
25 | 25 | |
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.environment.js |
— | — | @@ -208,7 +208,45 @@ |
209 | 209 | return out.join(''); |
210 | 210 | }; |
211 | 211 | |
| 212 | +MWParserEnvironment.prototype.decodeURI = function ( s ) { |
| 213 | + return s.replace( /%[0-9a-f][0-9a-f]/g, function( m ) { |
| 214 | + try { |
| 215 | + return decodeURI( m ); |
| 216 | + } catch ( e ) { |
| 217 | + return m; |
| 218 | + } |
| 219 | + } ); |
| 220 | +}; |
212 | 221 | |
| 222 | +MWParserEnvironment.prototype.sanitizeURI = function ( s ) { |
| 223 | + var host = s.match(/^[a-zA-Z]+:\/\/[^\/]+(?:\/|$)/), |
| 224 | + path = s, |
| 225 | + anchor = null; |
| 226 | + console.warn( 'host: ' + host ); |
| 227 | + if ( host ) { |
| 228 | + path = s.substr( host[0].length ); |
| 229 | + host = host[0]; |
| 230 | + } else { |
| 231 | + host = ''; |
| 232 | + } |
| 233 | + var bits = path.split('#'); |
| 234 | + if ( bits.length > 1 ) { |
| 235 | + anchor = bits[bits.length - 1]; |
| 236 | + path = path.substr(0, path.length - anchor.length - 1); |
| 237 | + } |
| 238 | + host = host.replace( /%(?![0-9a-fA-F][0-9a-fA-F])|[#|]/g, function ( m ) { |
| 239 | + return encodeURIComponent( m ); |
| 240 | + } ); |
| 241 | + path = path.replace( /%(?![0-9a-fA-F][0-9a-fA-F])|[\[\]#|]/g, function ( m ) { |
| 242 | + return encodeURIComponent( m ); |
| 243 | + } ); |
| 244 | + s = host + path; |
| 245 | + if ( anchor !== null ) { |
| 246 | + s += '#' + anchor; |
| 247 | + } |
| 248 | + return s; |
| 249 | +}; |
| 250 | + |
213 | 251 | /** |
214 | 252 | * Simple debug helper |
215 | 253 | */ |