Index: trunk/extensions/VisualEditor/tests/parser/parserTests.js |
— | — | @@ -202,7 +202,7 @@ |
203 | 203 | fetchTemplates: false, |
204 | 204 | debug: this.argv.debug, |
205 | 205 | trace: this.argv.trace, |
206 | | - wgScriptPath: '/' |
| 206 | + wgScriptPath: '/wiki/' |
207 | 207 | }); |
208 | 208 | } |
209 | 209 | |
— | — | @@ -308,7 +308,7 @@ |
309 | 309 | return this.htmlparser.document.getElementsByTagName('body')[0] |
310 | 310 | .innerHTML |
311 | 311 | // a few things we ignore for now.. |
312 | | - .replace(/\/wiki\/Main_Page/g, 'Main Page') |
| 312 | + //.replace(/\/wiki\/Main_Page/g, 'Main Page') |
313 | 313 | // do not expect a toc for now |
314 | 314 | .replace(/<table[^>]+?id="toc"[^>]*>.+?<\/table>/mg, '') |
315 | 315 | // do not expect section editing for now |
— | — | @@ -316,7 +316,7 @@ |
317 | 317 | // general class and titles, typically on links |
318 | 318 | .replace(/(title|class|rel)="[^"]+"/g, '') |
319 | 319 | // strip red link markup, we do not check if a page exists yet |
320 | | - .replace(/\/index.php\?title=([^']+)&action=edit&redlink=1/g, '$1') |
| 320 | + .replace(/\/index.php\?title=([^']+?)&action=edit&redlink=1/g, '/wiki/$1') |
321 | 321 | // the expected html has some extra space in tags, strip it |
322 | 322 | .replace(/<a +href/g, '<a href') |
323 | 323 | .replace(/" +>/g, '">'); |
Index: trunk/extensions/VisualEditor/tests/parser/parserTests-whitelist.js |
— | — | @@ -13,7 +13,7 @@ |
14 | 14 | testWhiteList["Unclosed and unmatched quotes"] = "<p data-mw-sourcePos=\"0:66\"><i><b>Bold italic text </b>with bold deactivated<b> in between.</b></i></p><p><i><b>Bold italic text </b></i><b>with italic deactivated<i> in between.</i></b></p><p><b>Bold text..</b></p><p>..spanning two paragraphs (should not work).<b></b></p><p><b>Bold tag left open</b></p><p><i>Italic tag left open</i></p><p>Normal text.<!-- Unmatching number of opening, closing tags: -->\n</p><p><b>This year'</b>s election <i>should</i> beat <b>last year'</b>s.</p><p><i>Tom<b>s car is bigger than </b></i><b>Susan</b>s.</p>"; |
15 | 15 | |
16 | 16 | // The expected result for this test is really broken html. |
17 | | -testWhiteList["Link containing double-single-quotes '' in text embedded in italics (bug 4598 sanity check)"] = "<p data-mw-sourcePos=\"0:45\"><i>Some <a data-mw-type=\"internal\" href=\"Link\">pretty </a></i><a data-mw-type=\"internal\" href=\"Link\">italics<i> and stuff</i></a><i>!</i></p>"; |
| 17 | +testWhiteList["Link containing double-single-quotes '' in text embedded in italics (bug 4598 sanity check)"] = "<p data-mw-sourcePos=\"0:45\"><i>Some <a data-mw-type=\"internal\" href=\"/wiki/Link\">pretty </a></i><a data-mw-type=\"internal\" href=\"/wiki/Link\">italics<i> and stuff</i></a><i>!</i></p>"; |
18 | 18 | |
19 | 19 | testWhiteList["External link containing double-single-quotes in text embedded in italics (bug 4598 sanity check)"] = "<p><i>Some <a href=\"http://example.com/\">pretty </a></i><a href=\"http://example.com/\">italics<i> and stuff</i></a><i>!</i></p>"; |
20 | 20 | |
— | — | @@ -47,9 +47,9 @@ |
48 | 48 | /* Missing token transform functionality */ |
49 | 49 | |
50 | 50 | // We don't implement percent encoding for URIs yet. |
51 | | -testWhiteList["Link containing double-single-quotes '' (bug 4598)"] = "<p><a data-mw-type=\"internal\" href=\"Lista d''e paise d''o munno\">Lista d''e paise d''o munno</a></p>"; |
| 51 | +testWhiteList["Link containing double-single-quotes '' (bug 4598)"] = "<p><a data-mw-type=\"internal\" href=\"/wiki/Lista d''e paise d''o munno\">Lista d''e paise d''o munno</a></p>"; |
52 | 52 | |
53 | | -testWhiteList["Link containing \"<#\" and \">#\" as a hex sequences"] = "<p><a data-mw-type=\"internal\" href=\"<%23\"><%23</a><a data-mw-type=\"internal\" href=\">%23\">>%23</a></p>"; |
| 53 | +testWhiteList["Link containing \"<#\" and \">#\" as a hex sequences"] = "<p><a data-mw-type=\"internal\" href=\"/wiki/<%23\"><%23</a><a data-mw-type=\"internal\" href=\"/wiki/>%23\">>%23</a></p>"; |
54 | 54 | |
55 | 55 | |
56 | 56 | // Sanitizer |
— | — | @@ -57,17 +57,23 @@ |
58 | 58 | testWhiteList["Table security: embedded pipes (http://lists.wikimedia.org/mailman/htdig/wikitech-l/2006-April/022293.html)"] = "<table><tbody><tr><td> |<a href=\"ftp://|x||\">[1]</a>\" onmouseover=\"alert(document.cookie)\">test</td></tr></tbody></table>"; |
59 | 59 | |
60 | 60 | // Sanitizer, but UTF8 in link might actually be ok in HTML5 |
61 | | -testWhiteList["External link containing double-single-quotes with no space separating the url from text in italics"] = "<p><a href=\"http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm\"><i>La muerte de Casagemas</i> (1901) en el sitio de </a><a data-mw-type=\"internal\" href=\"Museo Picasso (París)\">Museo Picasso</a>.</p>"; |
| 61 | +testWhiteList["External link containing double-single-quotes with no space separating the url from text in italics"] = "<p><a href=\"http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm\" data-mw-type=\"external\" data-mw-rt=\"{"sourcePos":[0,146]}\"><i>La muerte de Casagemas</i> (1901) en el sitio de </a><a href=\"/wiki/Museo_Picasso_(París)\" data-mw-type=\"internal\">Museo Picasso</a>.</p>"; |
62 | 62 | |
63 | 63 | // plain percent sign is also valid in HTML5 |
64 | 64 | testWhiteList["Bug 4781, 5267: %28, %29 in URL"] = "<p><a href=\"http://www.example.com/?title=Ben-Hur_(1959_film)\" data-mw-sourcePos=\"0:53\">http://www.example.com/?title=Ben-Hur_(1959_film)</a></p>"; |
65 | 65 | |
66 | | -testWhiteList["External links: wiki links within external link (Bug 3695)"] = "<p><a href=\"http://example.com\" data-mw-type=\"external\" data-mw-sourcePos=\"0:54\"></a><a data-mw-type=\"internal\" href=\"wikilink\">wikilink</a> embedded in ext link</p>"; |
| 66 | +testWhiteList["External links: wiki links within external link (Bug 3695)"] = "<p><a href=\"http://example.com\" data-mw-type=\"external\" data-mw-sourcePos=\"0:54\"></a><a data-mw-type=\"internal\" href=\"/wiki/Wikilink\">wikilink</a> embedded in ext link</p>"; |
67 | 67 | |
68 | 68 | testWhiteList["Bug 4781, 5267: %25 in URL"] = "<p><a href=\"http://www.example.com/?title=100%_Bran\" data-mw-sourcePos=\"0:41\">http://www.example.com/?title=100%_Bran</a></p>"; |
69 | 69 | |
70 | 70 | testWhiteList["<pre> with forbidden attribute values (bug 3202)"] = "<pre width=\"8\" style=\"\">Narrow screen goodies</pre>"; |
71 | 71 | |
| 72 | +testWhiteList["Link containing % (not as a hex sequence)"] = "<p><a href=\"/wiki/7%_Solution\" data-mw-type=\"internal\">7% Solution</a></p>"; |
| 73 | + |
| 74 | +testWhiteList["Link containing % as a single hex sequence interpreted to char"] = "<p><a href=\"/wiki/7%_Solution\" data-mw-type=\"internal\">7% Solution</a></p>"; |
| 75 | + |
| 76 | +testWhiteList["Link containing double-single-quotes '' (bug 4598)"] = "<p><a href=\"/wiki/Lista_d''e_paise_d''o_munno\" data-mw-type=\"internal\">Lista d''e paise d''o munno</a></p>"; |
| 77 | + |
72 | 78 | if (typeof module == "object") { |
73 | 79 | module.exports.testWhiteList = testWhiteList; |
74 | 80 | } |
Index: trunk/extensions/VisualEditor/modules/parser/ext.core.ParserFunctions.js |
— | — | @@ -366,7 +366,11 @@ |
367 | 367 | }; |
368 | 368 | |
369 | 369 | ParserFunctions.prototype['pf_localurl'] = function ( target, argList, argDict ) { |
370 | | - return ( this.manager.env.wgScriptPath + 'index' + |
| 370 | + return ( |
| 371 | + '/' + |
| 372 | + // FIXME! Figure out correct prefix to use |
| 373 | + //this.manager.env.wgScriptPath + |
| 374 | + 'index' + |
371 | 375 | this.manager.env.wgScriptExtension + '?title=' + |
372 | 376 | this.manager.env.normalizeTitle( target ) + '&' + |
373 | 377 | argList.map( |
Index: trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt |
— | — | @@ -770,14 +770,14 @@ |
771 | 771 | } |
772 | 772 | |
773 | 773 | / { |
774 | | - return { pos: posStack.pop('lcontent' , pos), content: null }; |
| 774 | + return { pos: posStack.pop('lcontent' , pos), content: [] }; |
775 | 775 | } |
776 | 776 | ) |
777 | 777 | "]]" |
778 | 778 | // XXX In real MediaWiki, this is a language-dependent positive character |
779 | 779 | // class. Can we work out a static negative class instead? |
780 | 780 | // XXX: Exclude uppercase chars from non-latin languages too! |
781 | | - trail:( ![A-Z \t(),.:-] tc:text_char { return tc } )* { |
| 781 | + tail:( ![A-Z \t(),.:-] tc:text_char { return tc } )* { |
782 | 782 | var obj = new SelfclosingTagTk( 'wikilink' ), |
783 | 783 | textTokens = []; |
784 | 784 | obj.attribs.push( new KV('href', target) ); |
— | — | @@ -788,22 +788,12 @@ |
789 | 789 | // XXX: Point to object with path, revision and input information |
790 | 790 | obj.source = input; |
791 | 791 | |
| 792 | + //console.warn('lcontent: ' + JSON.stringify( lcontent, null, 2 ) ); |
792 | 793 | // Deal with content. XXX: Properly support pipe-trick etc |
793 | | - if (lcontent.content && lcontent.content.length) { |
794 | | - textTokens = lcontent.content; |
795 | | - if (trail) { |
796 | | - textTokens.push( trail.join('') ); |
797 | | - } |
798 | | - } else { |
799 | | - if (trail) { |
800 | | - textTokens = $.extend(true, [], target).concat( [ trail.join('') ] ); |
801 | | - } else { |
802 | | - // copy list |
803 | | - textTokens = $.extend(true, [], target); |
804 | | - } |
805 | | - } |
| 794 | + lcontent.tail = tail && tail.join('') || ''; |
806 | 795 | |
807 | | - obj.attribs.push( new KV( 'content', flatten( textTokens ) ) ); |
| 796 | + obj.attribs.push( new KV( 'content', lcontent.content ) ); |
| 797 | + obj.attribs.push( new KV( 'tail', lcontent.tail ) ); |
808 | 798 | //console.warn( "XXX:" + pp([obj].concat(textTokens, [new EndTagTk( 'a' )])) ); |
809 | 799 | return [obj]; |
810 | 800 | } |
— | — | @@ -826,6 +816,24 @@ |
827 | 817 | } |
828 | 818 | / & { return clearFlag('linkdesc'); } |
829 | 819 | |
| 820 | +link_option |
| 821 | + = & { setFlag('pipe'); return setFlag('linkdesc'); } |
| 822 | + h:inline |
| 823 | + // 'equal' syntaxFlag is set for links in template parameters. Consume the |
| 824 | + // '=' here. |
| 825 | + hs:( '=' inline)? |
| 826 | + { |
| 827 | + //console.warn('link_text' + pp(h) + pp(hs)); |
| 828 | + clearFlag('pipe'); |
| 829 | + clearFlag('linkdesc'); |
| 830 | + if( hs !== '' ) { |
| 831 | + return h.concat(hs); |
| 832 | + } else { |
| 833 | + return h; |
| 834 | + } |
| 835 | + } |
| 836 | + / & { clearFlag('pipe'); return clearFlag('linkdesc'); } |
| 837 | + |
830 | 838 | link_end = "]]" |
831 | 839 | |
832 | 840 | /* Generic quote production for italic and bold, further processed in a token |
— | — | @@ -1674,7 +1682,7 @@ |
1675 | 1683 | = r:( t:[^%<~[{\n\r\t|!\]} &=]+ { return t.join(''); } |
1676 | 1684 | / urlencoded_char |
1677 | 1685 | / directive |
1678 | | - / !inline_breaks !"]]" text_char )+ { |
| 1686 | + / !inline_breaks !"|" !"]]" text_char )+ { |
1679 | 1687 | return flatten_stringlist ( r ); |
1680 | 1688 | } |
1681 | 1689 | |
Index: trunk/extensions/VisualEditor/modules/parser/ext.core.LinkHandler.js |
— | — | @@ -24,12 +24,12 @@ |
25 | 25 | WikiLinkHandler.prototype.rank = 1.15; // after AttributeExpander |
26 | 26 | |
27 | 27 | WikiLinkHandler.prototype.onWikiLink = function ( token, manager, cb ) { |
28 | | - var env = this.manager.env; |
| 28 | + var env = this.manager.env, |
| 29 | + href = env.lookupKV( token.attribs, 'href' ).v, |
| 30 | + tail = env.lookupKV( token.attribs, 'tail' ).v; |
29 | 31 | var title = this.manager.env.makeTitleFromPrefixedText( |
30 | | - env.tokensToString( |
31 | | - env.lookupKV( token.attribs, 'href' ).v |
32 | | - ) |
33 | | - ); |
| 32 | + env.tokensToString( href ) |
| 33 | + ); |
34 | 34 | |
35 | 35 | if ( title.ns.isFile() ) { |
36 | 36 | return this.renderFile( token, manager, cb, title ); |
— | — | @@ -39,16 +39,65 @@ |
40 | 40 | } else { |
41 | 41 | // Check if page exists |
42 | 42 | // |
43 | | - var obj = new TagTk( 'a', [ this.manager.env.lookupKV( token.attribs, 'href' ) ] ); |
| 43 | + //console.warn( 'title: ' + JSON.stringify( title ) ); |
| 44 | + var obj = new TagTk( 'a', [ new KV( 'href', title.makeLink() ) ] ), |
| 45 | + content = this.manager.env.lookupKV( token.attribs, 'content' ).v; |
| 46 | + //console.warn('content: ' + JSON.stringify( content, null, 2 ) ); |
| 47 | + // XXX: handle trail |
| 48 | + if ( content.length ) { |
| 49 | + var out = [] |
| 50 | + for ( var i = 0, l = content.length; i < l ; i++ ) { |
| 51 | + out = out.concat( content[i] ); |
| 52 | + if ( i < l - 1 ) { |
| 53 | + out.push( '|' ); |
| 54 | + } |
| 55 | + } |
| 56 | + content = out; |
| 57 | + } else { |
| 58 | + content = href; |
| 59 | + } |
| 60 | + if ( tail ) { |
| 61 | + content.push( tail ); |
| 62 | + } |
| 63 | + |
44 | 64 | obj.attribs.push( new KV('data-mw-type', 'internal') ); |
45 | | - var out = [obj].concat( this.manager.env.lookupKV( token.attribs, 'content' ).v, |
46 | | - new EndTagTk( 'a' ) ); |
| 65 | + var out = [obj].concat( content, new EndTagTk( 'a' ) ); |
47 | 66 | //console.warn( JSON.stringify( out, null, 2 ) ); |
48 | 67 | return { tokens: out }; |
49 | 68 | } |
50 | 69 | }; |
51 | 70 | |
| 71 | +WikiLinkHandler.prototype._simpleImageOptions = { |
| 72 | + // halign |
| 73 | + 'left': 'halign', |
| 74 | + 'right': 'halign', |
| 75 | + 'center': 'halign', |
| 76 | + 'none': 'halign', |
| 77 | + // valign |
| 78 | + 'baseline': 'valign', |
| 79 | + 'sub': 'valign', |
| 80 | + 'super': 'valign', |
| 81 | + 'top': 'valign', |
| 82 | + 'text-top': 'valign', |
| 83 | + 'middle': 'valign', |
| 84 | + 'bottom': 'valign', |
| 85 | + 'text-bottom': 'valign', |
| 86 | + // format |
| 87 | + 'border': 'format', |
| 88 | + 'frameless': 'format', |
| 89 | + 'frame': 'format', |
| 90 | + 'thumbnail': 'format', |
| 91 | + 'thumb': 'format' |
| 92 | +}; |
52 | 93 | |
| 94 | +WikiLinkHandler.prototype._prefixImageOptions = { |
| 95 | + 'link': 'link', |
| 96 | + 'alt': 'alt', |
| 97 | + 'page': 'page', |
| 98 | + 'thumbnail': 'thumb', |
| 99 | + 'thumb': 'thumb' |
| 100 | +}; |
| 101 | + |
53 | 102 | WikiLinkHandler.prototype.renderFile = function ( token, manager, cb, title ) { |
54 | 103 | var env = manager.env; |
55 | 104 | // distinguish media types |
— | — | @@ -67,15 +116,39 @@ |
68 | 117 | [ hash[0], hash.substr(0, 2) ].join('/') + '/' + title.key; |
69 | 118 | |
70 | 119 | |
71 | | - // XXX: parse options |
72 | | - var contentPos = token.dataAttribs.contentPos; |
73 | | - var optionSource = token.source.substr( contentPos[0], contentPos[1] - contentPos[0] ); |
74 | | - console.log( 'optionSource: ' + optionSource ); |
| 120 | + |
| 121 | + // XXX: extract options |
| 122 | + var options = [], |
| 123 | + caption = null; |
| 124 | + for( var i = 0, l = content.length; i<l; i++ ) { |
| 125 | + var oContent = content[i], |
| 126 | + oText = manager.env.tokensToString( oContent, true ); |
| 127 | + if ( oText.constructor === String ) { |
| 128 | + var oText = oText.trim(); |
| 129 | + if ( this._simpleImageOptions[ oText ] ) { |
| 130 | + options.push( new KV( this._simpleImageOptions[ oText ], |
| 131 | + oText ) ); |
| 132 | + continue; |
| 133 | + } |
| 134 | + } else { |
| 135 | + var bits = oText[0].split( '=', 2 ); |
| 136 | + if ( bits.length > 1 && this._prefixImageOptions[ bits[0].strip ] ) { |
| 137 | + console.log('handle prefix ' + bits ); |
| 138 | + } else { |
| 139 | + caption = oContent; |
| 140 | + } |
| 141 | + } |
| 142 | + } |
| 143 | + |
| 144 | + |
| 145 | + //var contentPos = token.dataAttribs.contentPos; |
| 146 | + //var optionSource = token.source.substr( contentPos[0], contentPos[1] - contentPos[0] ); |
| 147 | + //console.log( 'optionSource: ' + optionSource ); |
75 | 148 | // XXX: The trouble with re-parsing is the need to re-expand templates. |
76 | | - // Figure out often non-image links contain image-like parameters! |
77 | | - var options = this.imageParser.processImageOptions( optionSource ); |
| 149 | + // Figure out how often non-image links contain image-like parameters! |
| 150 | + //var options = this.imageParser.processImageOptions( optionSource ); |
78 | 151 | //console.log( JSON.stringify( options, null, 2 ) ); |
79 | | - // XXX: check if the file exists, generate thumbnail |
| 152 | + // XXX: check if the file exists, generate thumbnail, get size |
80 | 153 | // XXX: render according to mode (inline, thumb, framed etc) |
81 | 154 | var img = new SelfclosingTagTk( 'img', |
82 | 155 | [ |
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.Title.js |
— | — | @@ -12,7 +12,13 @@ |
13 | 13 | if ( false && this.nskey ) { |
14 | 14 | return this.env.wgScriptPath + this.nskey + ':' + this.key; |
15 | 15 | } else { |
16 | | - return this.env.wgScriptPath + [this.ns.getDefaultName(), this.key].join(':'); |
| 16 | + var l = this.env.wgScriptPath, |
| 17 | + ns = this.ns.getDefaultName(); |
| 18 | + |
| 19 | + if ( ns ) { |
| 20 | + l += ns + ':'; |
| 21 | + } |
| 22 | + return l + this.key; |
17 | 23 | } |
18 | 24 | }; |
19 | 25 | |
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js |
— | — | @@ -132,6 +132,7 @@ |
133 | 133 | }, |
134 | 134 | '|': function ( input, pos, syntaxFlags ) { |
135 | 135 | return syntaxFlags.template || |
| 136 | + syntaxFlags.linkdesc || |
136 | 137 | ( syntaxFlags.table && |
137 | 138 | ( input[pos + 1].match(/[|}]/) !== null || |
138 | 139 | syntaxFlags.tableCellArg |
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.environment.js |
— | — | @@ -117,7 +117,7 @@ |
118 | 118 | return new Title( text, 0, '', this ); |
119 | 119 | } |
120 | 120 | } else { |
121 | | - return new Title( text, 0, this ); |
| 121 | + return new Title( text, 0, '', this ); |
122 | 122 | } |
123 | 123 | }; |
124 | 124 | |
— | — | @@ -174,7 +174,7 @@ |
175 | 175 | return name; |
176 | 176 | }; |
177 | 177 | |
178 | | -MWParserEnvironment.prototype.tokensToString = function ( tokens ) { |
| 178 | +MWParserEnvironment.prototype.tokensToString = function ( tokens, strict ) { |
179 | 179 | var out = []; |
180 | 180 | //console.warn( 'MWParserEnvironment.tokensToString, tokens: ' + JSON.stringify( tokens ) ); |
181 | 181 | // XXX: quick hack, track down non-array sources later! |
— | — | @@ -195,6 +195,9 @@ |
196 | 196 | } else if ( token.type === 'COMMENT' || token.type === 'NEWLINE' ) { |
197 | 197 | // strip comments and newlines |
198 | 198 | } else { |
| 199 | + if ( strict ) { |
| 200 | + return [out.join(''), null]; |
| 201 | + } |
199 | 202 | var tstring = JSON.stringify( token ); |
200 | 203 | this.dp ( 'MWParserEnvironment.tokensToString, non-text token: ' + |
201 | 204 | tstring + JSON.stringify( tokens, null, 2 ) ); |