Index: trunk/extensions/VisualEditor/tests/parser/parserTests.js |
— | — | @@ -321,13 +321,14 @@ |
322 | 322 | // known-ok differences. |
323 | 323 | function normalizeOut ( out ) { |
324 | 324 | // TODO: Do not strip newlines in pre and nowiki blocks! |
325 | | - return out.replace(/\n| data-[a-zA-Z]+="[^">]+"/g, '') |
| 325 | + return out.replace(/\n| data-[a-zA-Z]+="[^">]*"/g, '') |
326 | 326 | .replace(/<!--.*?-->\n?/gm, ''); |
327 | 327 | } |
328 | 328 | |
329 | 329 | function formatHTML ( source ) { |
330 | 330 | // Quick hack to insert newlines before some block level start tags |
331 | | - return source.replace(/(?!^)<((div|dd|dt|li|p|table|tr|td|tbody|dl|ol|ul)[^>]*)>/g, |
| 331 | + return source.replace( |
| 332 | + /(?!^)<((div|dd|dt|li|p|table|tr|td|tbody|dl|ol|ul|h1|h2|h3|h4|h5|h6)[^>]*)>/g, |
332 | 333 | '\n<$1>'); |
333 | 334 | } |
334 | 335 | |
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.DOMPostProcessor.js |
— | — | @@ -18,6 +18,12 @@ |
19 | 19 | case 'pre': |
20 | 20 | case 'center': |
21 | 21 | case 'blockquote': |
| 22 | + case 'h1': |
| 23 | + case 'h2': |
| 24 | + case 'h3': |
| 25 | + case 'h4': |
| 26 | + case 'h5': |
| 27 | + case 'h6': |
22 | 28 | return true; |
23 | 29 | default: |
24 | 30 | return false; |
Index: trunk/extensions/VisualEditor/modules/parser/pegParser.pegjs.txt |
— | — | @@ -370,7 +370,7 @@ |
371 | 371 | * structures to avoid unnecessarily leaving the text production on plain |
372 | 372 | * content. */ |
373 | 373 | |
374 | | -text_char = [^'<~[{\n\r:\]}] |
| 374 | +text_char = [^'<~[{\n\r:\]}|!=] |
375 | 375 | |
376 | 376 | text = t:text_char+ { return t.join(''); } |
377 | 377 | |
— | — | @@ -386,6 +386,9 @@ |
387 | 387 | * n nntp(s) urls |
388 | 388 | * m mailto urls |
389 | 389 | * |
| 390 | + * ! and | table cell delimiters, might be better to specialize those |
| 391 | + * = headings - also specialize those! |
| 392 | + * |
390 | 393 | * The following chars are also included for now, but only apply in some |
391 | 394 | * contexts and should probably be enabled only in those: |
392 | 395 | * : separate definition in ; term : definition |
— | — | @@ -393,7 +396,7 @@ |
394 | 397 | * } end of parser func/transclusion/template arg |
395 | 398 | */ |
396 | 399 | |
397 | | -urltext = ( t:[^'<~[{\n\rfghimnstw:\]} &]+ { return t.join(''); } |
| 400 | +urltext = ( t:[^'<~[{\n\rfghimnstw|!:\]} &=]+ { return t.join(''); } |
398 | 401 | // XXX: use general entity decode! |
399 | 402 | / "&" { return "&"; } // decode ampersand in text |
400 | 403 | / urllink |
— | — | @@ -456,7 +459,7 @@ |
457 | 460 | = & { blockStart = pos; return true; } b:block { |
458 | 461 | b = flatten(b); |
459 | 462 | var bs = b[0]; |
460 | | - dp('toplevelblock:' + pp(b)); |
| 463 | + //dp('toplevelblock:' + pp(b)); |
461 | 464 | if (bs.attribs === undefined) { |
462 | 465 | bs.attribs = []; |
463 | 466 | } |
— | — | @@ -498,6 +501,8 @@ |
499 | 502 | = h |
500 | 503 | / table |
501 | 504 | / lists |
| 505 | + // tag-only lines should not trigger pre |
| 506 | + / space* bt:block_tag space* &eolf { return bt } |
502 | 507 | / pre_indent |
503 | 508 | / pre |
504 | 509 | |
— | — | @@ -514,21 +519,26 @@ |
515 | 520 | |
516 | 521 | // Syntax stops to limit inline expansion defending on syntactic context |
517 | 522 | inline_breaks |
518 | | - = //& { console.log(pp(syntaxFlags)); return true; } |
| 523 | + = |
| 524 | + & { // Important hack: disable caching for this production, as the default |
| 525 | + // cache key does not take into account flag states! |
| 526 | + cacheKey = ''; |
| 527 | + return true; |
| 528 | + } |
519 | 529 | & { return syntaxFlags['table']; } |
520 | | - a:(newline [!|] / '||' / '!!' / '|}') { dp("table break" + pp(a)); return true; } |
| 530 | + a:(newline [!|] / '||' / '!!' / '|}') { dp("table break" + pp(a) + pos); return true; } |
521 | 531 | / & { return (syntaxFlags['colon'] && |
522 | 532 | ! syntaxFlags.extlink && |
523 | 533 | ! syntaxFlags.linkdesk); } ":" { return true; } |
524 | 534 | / & { return syntaxFlags['extlink']; } "]" { return true; } |
525 | 535 | / & { return syntaxFlags['linkdesc']; } link_end { return true; } |
526 | 536 | / & { return syntaxFlags['h']; } |
527 | | - ( & { return syntaxFlags['h1'] } '=' newline { return true; } |
528 | | - / & { return syntaxFlags['h2'] } '==' newline { return true; } |
529 | | - / & { return syntaxFlags['h3'] } '===' newline { return true; } |
530 | | - / & { return syntaxFlags['h4'] } '====' newline { return true; } |
531 | | - / & { return syntaxFlags['h5'] } '=====' newline { return true; } |
532 | | - / & { return syntaxFlags['h6'] } '======' newline { return true; } |
| 537 | + ( & { return syntaxFlags['h1'] } '=' space* newline { return true; } |
| 538 | + / & { return syntaxFlags['h2'] } '==' space* newline { return true; } |
| 539 | + / & { return syntaxFlags['h3'] } '===' space* newline { return true; } |
| 540 | + / & { return syntaxFlags['h4'] } '====' space* newline { return true; } |
| 541 | + / & { return syntaxFlags['h5'] } '=====' space* newline { return true; } |
| 542 | + / & { return syntaxFlags['h6'] } '======' space* newline { return true; } |
533 | 543 | ) |
534 | 544 | |
535 | 545 | inline |
— | — | @@ -553,6 +563,7 @@ |
554 | 564 | return out; |
555 | 565 | } |
556 | 566 | |
| 567 | + |
557 | 568 | inlineline |
558 | 569 | = c:(urltext / !inline_breaks (inline_element / [^\n]))+ { |
559 | 570 | var out = []; |
— | — | @@ -572,7 +583,7 @@ |
573 | 584 | if (text.length) { |
574 | 585 | out.push({type: 'TEXT', value: text.join('')}); |
575 | 586 | } |
576 | | - //dp('inlineline out:', pp(out)); |
| 587 | + dp('inlineline out:', pp(out)); |
577 | 588 | return out; |
578 | 589 | } |
579 | 590 | |
— | — | @@ -580,7 +591,8 @@ |
581 | 592 | * -> need (start, end) offsets within block |
582 | 593 | */ |
583 | 594 | inline_element |
584 | | - = comment |
| 595 | + = & { dp('inline_element enter' + input.substr(pos, 10)); return true; } |
| 596 | + comment |
585 | 597 | // Can actually also be block-level elements, we don't really try to enforce |
586 | 598 | // a content model in the tokenizer. The HTML tree builder and DOM |
587 | 599 | // transformations are better equipped to deal with it. |
— | — | @@ -590,35 +602,43 @@ |
591 | 603 | / extlink |
592 | 604 | / quote |
593 | 605 | |
594 | | -/* Headings */ |
595 | | -h = h1 / h2 / h3 / h4 / h5 / h6 |
| 606 | +/* Headings |
| 607 | + * |
| 608 | + * Listed in reverse order on purpose ;) */ |
| 609 | +h = h6 / h5 / h4 / h3 / h2 / h1 |
596 | 610 | |
597 | | -h1 = sol '=' |
598 | | - ( |
| 611 | +/* We might want to consider using a single rule for all headings, and |
| 612 | + * figuring out the level in the action. This saves quite some backtracking, |
| 613 | + * and the conversion of equal signs into text should not be a problem as |
| 614 | + * equals are not part of other syntax. */ |
| 615 | +h1 = '=' |
| 616 | + r:( |
599 | 617 | & { setFlag('h'); return setFlag('h1') } |
600 | | - c:inlineline '=' comment? &newline { |
| 618 | + c:inlineline '=' (space / comment)* |
| 619 | + &eolf |
| 620 | + { |
601 | 621 | clearFlag('h'); |
602 | 622 | clearFlag('h1'); |
603 | 623 | return [{type: 'TAG', name: 'h1'}] |
604 | 624 | .concat(c, [{type: 'ENDTAG', name: 'h1'}]); |
605 | 625 | } |
606 | | - / { clearFlag('h'); clearFlag('h1'); return null } |
607 | | - ) |
| 626 | + / & { dp('nomatch exit h1'); clearFlag('h'); clearFlag('h1'); return false } { return null } |
| 627 | + ) { return r } |
608 | 628 | |
609 | | -h2 = sol '==' |
610 | | - ( |
| 629 | +h2 = '==' |
| 630 | + r:( |
611 | 631 | & { setFlag('h'); return setFlag('h2') } |
612 | | - c:inlineline '==' comment? &newline { |
| 632 | + c:inlineline '==' (space / comment)* &newline { |
613 | 633 | clearFlag('h'); |
614 | 634 | clearFlag('h2'); |
615 | 635 | return [{type: 'TAG', name: 'h2'}] |
616 | 636 | .concat(c, [{type: 'ENDTAG', name: 'h2'}]); |
617 | 637 | } |
618 | | - / { clearFlag('h'); clearFlag('h2'); return null } |
619 | | - ) |
| 638 | + / & { clearFlag('h'); clearFlag('h2'); return false } |
| 639 | + ) { return r } |
620 | 640 | |
621 | | -h3 = sol '===' |
622 | | - ( |
| 641 | +h3 = '===' |
| 642 | + r:( |
623 | 643 | & { setFlag('h'); return setFlag('h3') } |
624 | 644 | c:inlineline '===' comment? &newline { |
625 | 645 | clearFlag('h'); |
— | — | @@ -626,11 +646,11 @@ |
627 | 647 | return [{type: 'TAG', name: 'h3'}] |
628 | 648 | .concat(c, [{type: 'ENDTAG', name: 'h3'}]); |
629 | 649 | } |
630 | | - / { clearFlag('h'); clearFlag('h3'); return null } |
631 | | - ) |
| 650 | + / & { clearFlag('h'); clearFlag('h3'); return false } |
| 651 | + ) { return r } |
632 | 652 | |
633 | | -h4 = sol '====' |
634 | | - ( |
| 653 | +h4 = '====' |
| 654 | + r:( |
635 | 655 | & { setFlag('h'); return setFlag('h4') } |
636 | 656 | c:inlineline '====' comment? &newline { |
637 | 657 | clearFlag('h'); |
— | — | @@ -638,37 +658,32 @@ |
639 | 659 | return [{type: 'TAG', name: 'h4'}] |
640 | 660 | .concat(c, [{type: 'ENDTAG', name: 'h4'}]); |
641 | 661 | } |
642 | | - / { clearFlag('h'); clearFlag('h4'); return null } |
643 | | - ) |
| 662 | + / & { clearFlag('h'); clearFlag('h4'); return false } |
| 663 | + ) { return r } |
644 | 664 | |
645 | | -h5 = sol '=====' |
646 | | - (& { setFlag('h'); return setFlag('h5') } |
| 665 | +h5 = '=====' |
| 666 | + r:(& { setFlag('h'); return setFlag('h5') } |
647 | 667 | c:inlineline '=====' comment? &newline { |
648 | 668 | clearFlag('h'); |
649 | 669 | clearFlag('h5'); |
650 | 670 | return [{type: 'TAG', name: 'h5'}] |
651 | 671 | .concat(c, [{type: 'ENDTAG', name: 'h5'}]); |
652 | 672 | } |
653 | | - / { clearFlag('h'); clearFlag('h5'); return null } |
654 | | - ) |
| 673 | + / & { clearFlag('h'); clearFlag('h5'); return false } |
| 674 | + ) { return r } |
655 | 675 | |
656 | | -h6 = sol '======' |
657 | | - (& { setFlag('h'); return setFlag('h6') } |
| 676 | +h6 = '======' |
| 677 | + r:(& { setFlag('h'); return setFlag('h6') } |
658 | 678 | c:inlineline '======' comment? &newline { |
659 | 679 | clearFlag('h'); |
660 | 680 | clearFlag('h6'); |
661 | 681 | return [{type: 'TAG', name: 'h6'}] |
662 | 682 | .concat(c, [{type: 'ENDTAG', name: 'h6'}]); |
663 | 683 | } |
664 | | - / { clearFlag('h'); clearFlag('h6'); return null } |
665 | | - ) |
| 684 | + / & { clearFlag('h'); clearFlag('h6'); return false } |
| 685 | + ) { return r } |
666 | 686 | |
667 | | -heading_marker |
668 | | - = '=' '='* |
669 | 687 | |
670 | | -heading_text |
671 | | - = h:( !(heading_marker newline) x:inlineline { return x } )* { return h.join(''); } |
672 | | - |
673 | 688 | pre_indent |
674 | 689 | = l:pre_indent_line ls:(sol pre_indent_line)* { |
675 | 690 | return [{type: 'TAG', name: 'pre'}] |
— | — | @@ -741,11 +756,12 @@ |
742 | 757 | / 'telnet://' // Well if we're going to support the above.. -ævar |
743 | 758 | / 'worldwind://' |
744 | 759 | |
| 760 | +// javascript does not support unicode features.. |
745 | 761 | unicode_separator_space = [ \u00A0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000] |
746 | 762 | |
747 | 763 | url |
748 | 764 | = proto:url_protocol |
749 | | - rest:( [^ :\]\[\n<>\x00-\x20\x7f,.&] |
| 765 | + rest:( [^ :\]\[\n"'<>\x00-\x20\x7f,.&\u00A0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000] |
750 | 766 | / s:[.:,] !(space / eolf) { return s } |
751 | 767 | // XXX: use general entity decode! |
752 | 768 | / '&' { return '&' } |
— | — | @@ -807,7 +823,7 @@ |
808 | 824 | / !"}}" x:([^|\n]) { return x } |
809 | 825 | |
810 | 826 | wikilink |
811 | | - = "[[" target:link_target text:("|" link_text)* "]]" { |
| 827 | + = "[[" target:link_target text:("|" lt:link_text { return lt })* "]]" suffix:text? { |
812 | 828 | var obj = { |
813 | 829 | type: 'TAG', |
814 | 830 | name: 'a', |
— | — | @@ -815,8 +831,11 @@ |
816 | 832 | }; |
817 | 833 | obj.attribs.push(['href', target]); |
818 | 834 | if (text && text.length) { |
819 | | - var textTokens = text[0][1]; // XXX |
| 835 | + var textTokens = text; // XXX |
820 | 836 | } else { |
| 837 | + if (suffix !== '') { |
| 838 | + target += suffix; |
| 839 | + } |
821 | 840 | var textTokens = [{type: 'TEXT', value: target}]; |
822 | 841 | } |
823 | 842 | return [obj].concat(textTokens, [{type: 'ENDTAG', name: 'a'}]); |
— | — | @@ -1120,7 +1139,6 @@ |
1121 | 1140 | |
1122 | 1141 | |
1123 | 1142 | /* Tables */ |
1124 | | - |
1125 | 1143 | table |
1126 | 1144 | = tas:table_start c:table_caption? b:table_body? table_end { |
1127 | 1145 | var res = {type: 'TAG', name: 'table'} |
— | — | @@ -1144,18 +1162,19 @@ |
1145 | 1163 | } |
1146 | 1164 | |
1147 | 1165 | table_start |
1148 | | - = sol |
1149 | | - "{|" |
1150 | | - & { setFlag('table'); return true; } |
1151 | | - ta:table_attribs* |
1152 | | - space* { |
1153 | | - //dp("table_start " + pp(ta) + ", pos:" + pos); |
1154 | | - return ta; |
1155 | | - } |
1156 | | - / sol "{|" { clearFlag('table'); return null; } |
| 1166 | + = "{|" |
| 1167 | + res:( |
| 1168 | + & { setFlag('table'); return true; } |
| 1169 | + ta:table_attribs* |
| 1170 | + { |
| 1171 | + dp("table_start " + pp(ta) + ", pos:" + pos); |
| 1172 | + return ta; |
| 1173 | + } |
| 1174 | + / & { clearFlag('table'); return false; } { return null; } |
| 1175 | + ) { return res } |
1157 | 1176 | |
1158 | 1177 | table_attribs |
1159 | | - = text / ! inline_breaks !newline . |
| 1178 | + = text / ! inline_breaks !newline ![|] c:. { return c } |
1160 | 1179 | |
1161 | 1180 | table_caption |
1162 | 1181 | = newline |
— | — | @@ -1190,11 +1209,14 @@ |
1191 | 1210 | } |
1192 | 1211 | |
1193 | 1212 | table_data |
1194 | | - = & { dp("table_data enter, pos=" + pos); return true; } |
| 1213 | + = & { dp("table_data enter, pos=" + pos + input.substr(pos,10)); return true; } |
1195 | 1214 | ("||" / newline "|") |
1196 | 1215 | ! [}+-] |
1197 | 1216 | a:thtd_attribs? |
1198 | | - td:(!inline_breaks block)* { |
| 1217 | + // use inline_breaks to break on tr etc |
| 1218 | + td:(!inline_breaks |
| 1219 | + & { dp("table_data 2, pos=" + pos + input.substr(pos,10)); return true; } |
| 1220 | + b:block { return b })* { |
1199 | 1221 | dp("table data result: " + pp(td) + ", attribts: " + pp(a)); |
1200 | 1222 | return [{ type: 'TAG', name: 'td', attribs: [['data-unparsed', a]]}] |
1201 | 1223 | .concat(td, [{type: 'ENDTAG', name: 'td'}]); |
— | — | @@ -1210,8 +1232,8 @@ |
1211 | 1233 | |
1212 | 1234 | thtd_attribs |
1213 | 1235 | // In particular, do not match [|\n] |
1214 | | - = a:(text / ! inline_breaks [="':;/,.-] )+ "|" ! [|}+-] { |
1215 | | - return a; |
| 1236 | + = a:(text / ! inline_breaks c:[="':;/,. -] { return c } )+ "|" ! "|" { |
| 1237 | + return a; |
1216 | 1238 | } |
1217 | 1239 | |
1218 | 1240 | |