Index: trunk/extensions/VisualEditor/tests/parser/parserTests-whitelist.js |
— | — | @@ -23,6 +23,7 @@ |
24 | 24 | // empty table tags / with only a caption are legal in HTML5. |
25 | 25 | testWhiteList["A table with no data."] = "<table></table>"; |
26 | 26 | testWhiteList["A table with nothing but a caption"] = "<table><caption> caption</caption></table>"; |
| 27 | +testWhiteList["Fuzz testing: Parser22"] = "<p data-sourcePos=\"0:23\"><a href=\"http://===r:::https://b\">http://===r:::https://b</a></p><table></table>"; |
27 | 28 | |
28 | 29 | // MediaWiki changes the order of attributes in tables, ignore that |
29 | 30 | testWhiteList["Multiplication table"] = "<table border=\"1\" cellpadding=\"2\"><caption>Multiplication table</caption><tbody><tr><th> × </th><th> 1 </th><th> 2 </th><th> 3</th></tr><tr><th> 1</th><td> 1 </td><td> 2 </td><td> 3</td></tr><tr><th> 2</th><td> 2 </td><td> 4 </td><td> 6</td></tr><tr><th> 3</th><td> 3 </td><td> 6 </td><td> 9</td></tr><tr><th> 4</th><td> 4 </td><td> 8 </td><td> 12</td></tr><tr><th> 5</th><td> 5 </td><td> 10 </td><td> 15</td></tr></tbody></table>"; |
— | — | @@ -51,7 +52,7 @@ |
52 | 53 | testWhiteList["Table security: embedded pipes (http://lists.wikimedia.org/mailman/htdig/wikitech-l/2006-April/022293.html)"] = "<table><tbody><tr><td> |<a href=\"ftp://|x||\">[1]</a>\" onmouseover=\"alert(document.cookie)\">test</td></tr></tbody></table>"; |
53 | 54 | |
54 | 55 | // Sanitizer, but UTF8 in link might actually be ok in HTML5 |
55 | | -testWhiteList["External link containing double-single-quotes with no space separating the url from text in italics"] = "<p><a href=\"http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm\"><i>La muerte de Casagemas</i> (1901) en el sitio de </a><a data-type=\"internal\" href=\"Museo Picasso (París)\">Museo Picasso</a></p>"; |
| 56 | +testWhiteList["External link containing double-single-quotes with no space separating the url from text in italics"] = "<p><a href=\"http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm\"><i>La muerte de Casagemas</i> (1901) en el sitio de </a><a data-type=\"internal\" href=\"Museo Picasso (París)\">Museo Picasso</a>.</p>"; |
56 | 57 | |
57 | 58 | |
58 | 59 | if (typeof module == "object") { |
Index: trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt |
— | — | @@ -653,10 +653,13 @@ |
654 | 654 | } |
655 | 655 | |
656 | 656 | //[^][<>"\\x00-\\x20\\x7F\p{Zs}] |
| 657 | + |
| 658 | +no_punctuation_char = [^ :\]\[\n"'<>\x00-\x20\x7f,.&%\u00A0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000] |
| 659 | + |
657 | 660 | url |
658 | 661 | = proto:url_protocol |
659 | 662 | rest:( ( !inline_breaks |
660 | | - c:[^ :\]\[\n"'<>\x00-\x20\x7f,.&%\u00A0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000] |
| 663 | + c:no_punctuation_char |
661 | 664 | { return c } |
662 | 665 | ) |
663 | 666 | / s:[.:,] !(space / eolf) { return s } |
— | — | @@ -735,13 +738,16 @@ |
736 | 739 | } |
737 | 740 | / & { clearFlag('template'); return false; } |
738 | 741 | |
| 742 | +// TODO: handle link prefixes as in al[[Razi]] |
739 | 743 | wikilink |
740 | 744 | = "[[" |
741 | 745 | ! url |
742 | 746 | target:link_target |
743 | 747 | lcontent:( "|" lt:link_text { return lt } )* |
744 | 748 | "]]" |
745 | | - suffix:(![ \]] tc:text_char { return tc })* { |
| 749 | + // XXX In real MediaWiki, this is a language-dependent positive character |
| 750 | + // class. Can we work out a static negative class instead? |
| 751 | + trail:(! [ \t(),.:-] tc:text_char { return tc })* { |
746 | 752 | var obj = { |
747 | 753 | type: 'TAG', |
748 | 754 | name: 'a', |
— | — | @@ -749,21 +755,20 @@ |
750 | 756 | ['data-type', 'internal'] |
751 | 757 | ] |
752 | 758 | }, |
753 | | - suffixTokens = [], |
754 | 759 | textTokens = []; |
755 | 760 | obj.attribs.push(['href', target]); |
756 | 761 | if (lcontent && lcontent.length) { |
757 | 762 | textTokens = lcontent; |
758 | | - if (suffix) { |
759 | | - suffixTokens = [{ type: 'TEXT', value: suffix.join('') }]; |
| 763 | + if (trail) { |
| 764 | + textTokens.push( { type: 'TEXT', value: trail.join('') } ); |
760 | 765 | } |
761 | 766 | } else { |
762 | | - if (suffix) { |
763 | | - target += suffix.join(''); |
| 767 | + if (trail) { |
| 768 | + target += trail.join(''); |
764 | 769 | } |
765 | 770 | textTokens = [{type: 'TEXT', value: target}]; |
766 | 771 | } |
767 | | - return [obj].concat(textTokens, [{type: 'ENDTAG', name: 'a'}], suffixTokens); |
| 772 | + return [obj].concat(textTokens, [{type: 'ENDTAG', name: 'a'}]); |
768 | 773 | } |
769 | 774 | |
770 | 775 | link_target |