Index: trunk/extensions/VisualEditor/tests/parser/parserTests.js |
— | — | @@ -133,9 +133,10 @@ |
134 | 134 | console.log(item); |
135 | 135 | throw new Error('Missing input from test case ' + item.title); |
136 | 136 | } |
| 137 | + console.log('====================================================='); |
137 | 138 | console.log(item.title); |
138 | | - console.log("INPUT:"); |
139 | | - console.log(item.input + "\n"); |
| 139 | + console.log("INPUT:"); |
| 140 | + console.log(item.input + "\n"); |
140 | 141 | |
141 | 142 | |
142 | 143 | parser.parseToTree(item.input + "\n", function(tree, err) { |
— | — | @@ -161,7 +162,10 @@ |
162 | 163 | var out = tokenizer.parser.document |
163 | 164 | .getElementsByTagName('body')[0] |
164 | 165 | .innerHTML |
165 | | - .replace(/<li>/g, '\n<li>'); |
| 166 | + // Hack: add some line breaks for |
| 167 | + // block-levels |
| 168 | + .replace(/(.)<((dd|dt|li|p|table|dl|ol|ul)[^>]*)>/g, |
| 169 | + '$1\n<$2>'); |
166 | 170 | console.log(out); |
167 | 171 | } |
168 | 172 | } |
Index: trunk/extensions/VisualEditor/modules/parser/pegParser.pegjs.txt |
— | — | @@ -343,6 +343,11 @@ |
344 | 344 | // Start position of top-level block |
345 | 345 | // Could also provide positions for lower-level blocks using a stack. |
346 | 346 | var blockStart = 0; |
| 347 | + |
| 348 | + var isEOF = function (pos) { |
| 349 | + // XXX: cache the length.. |
| 350 | + return pos === input.length; |
| 351 | + }; |
347 | 352 | } |
348 | 353 | |
349 | 354 | start |
— | — | @@ -350,6 +355,7 @@ |
351 | 356 | return flatten(e); |
352 | 357 | } |
353 | 358 | |
| 359 | + |
354 | 360 | anyblock = block / inline |
355 | 361 | anyblockline = block / inlineline |
356 | 362 | |
— | — | @@ -363,14 +369,18 @@ |
364 | 370 | |
365 | 371 | // Start of line |
366 | 372 | sol = (newline / & { return pos === 0; } { return true; }) |
367 | | - cn:(c:comment n:newline? { return [c, n] })? { |
| 373 | + cn:(c:comment n:newline? { return [c, {type: 'TEXT', value: n}] })* { |
368 | 374 | return [{type: 'NEWLINE'}].concat(cn); |
369 | 375 | } |
370 | 376 | |
| 377 | +eof = & { return isEOF(pos); } { return true; } |
371 | 378 | |
| 379 | + |
372 | 380 | newline |
373 | 381 | = '\n' / '\r\n' |
374 | 382 | |
| 383 | +eolf = newline / eof |
| 384 | + |
375 | 385 | toplevelblock |
376 | 386 | = & { blockStart = pos; return true; } b:block { |
377 | 387 | b = flatten(b); |
— | — | @@ -387,9 +397,10 @@ |
388 | 398 | } |
389 | 399 | |
390 | 400 | block |
391 | | - = (sol space* &newline)? bl:block_lines { return [{type: 'NEWLINE'}].concat(bl); } |
| 401 | + = block_lines |
| 402 | + / pre |
| 403 | + / comment &eolf |
392 | 404 | / para |
393 | | - / comment |
394 | 405 | / (s:sol { |
395 | 406 | if (s) { |
396 | 407 | return [s, {type: 'NEWLINE'}]; |
— | — | @@ -399,12 +410,16 @@ |
400 | 411 | } |
401 | 412 | ) |
402 | 413 | |
| 414 | +block_lines |
| 415 | + = s:sol (space* newline)? |
| 416 | + bl:block_line { return s.concat(bl); } |
403 | 417 | |
404 | 418 | // Block structures with start-of-line wiki syntax |
405 | | -block_lines |
| 419 | +block_line |
406 | 420 | = h |
407 | 421 | / table |
408 | 422 | / lists |
| 423 | + / space* generic_tag space* &eolf |
409 | 424 | / pre_indent |
410 | 425 | |
411 | 426 | |
— | — | @@ -493,11 +508,12 @@ |
494 | 509 | = (sol br)? pl:para_lines { return pl; } |
495 | 510 | |
496 | 511 | para_lines |
497 | | - = s:sol c:inlineline cs:(!block_lines para_lines)* { |
498 | | - var res = [{type: 'TAG', name: 'p'}]; |
| 512 | + = s:sol? c:inlineline cs:(!block_lines para_lines)* { |
| 513 | + var res = []; |
499 | 514 | if (s !== '') { |
500 | | - res.push(s) |
| 515 | + res.push(s); |
501 | 516 | } |
| 517 | + res.push({type: 'TAG', name: 'p'}); |
502 | 518 | //console.log('paralines' + pp(res.concat(c, cs, [{type: 'ENDTAG', name: 'p'}]))); |
503 | 519 | return res.concat(c, cs, [{type: 'ENDTAG', name: 'p'}]); |
504 | 520 | } |
— | — | @@ -505,12 +521,14 @@ |
506 | 522 | br = space* &newline { return {type: 'SELFCLOSINGTAG', name: 'br'} } |
507 | 523 | |
508 | 524 | pre_indent |
509 | | - = l:pre_indent_line+ { |
| 525 | + = l:pre_indent_line ls:(sol pre_indent_line)* { |
510 | 526 | return [{type: 'TAG', name: 'pre'}] |
511 | | - .concat( l |
| 527 | + .concat( [l], ls |
512 | 528 | , [{type: 'ENDTAG', name: 'pre'}]); |
513 | 529 | } |
514 | | -pre_indent_line = sol space l:inlineline { return l } |
| 530 | +pre_indent_line = space l:inlineline { |
| 531 | + return [{type: 'TEXT', value: '\n'}].concat(l); |
| 532 | +} |
515 | 533 | |
516 | 534 | // Syntax that stops inline expansion |
517 | 535 | inline_breaks |
— | — | @@ -586,7 +604,7 @@ |
587 | 605 | / quote |
588 | 606 | |
589 | 607 | comment |
590 | | - = '<!--' c:comment_chars* '-->' |
| 608 | + = '<!--' c:comment_chars* ('-->' / eof) |
591 | 609 | (space* newline space* comment)* { |
592 | 610 | return [{ type: 'COMMENT', value: c.join('') }]; |
593 | 611 | } |
— | — | @@ -704,13 +722,29 @@ |
705 | 723 | /* Will need to check anything xmlish agains known/allowed HTML tags and |
706 | 724 | * registered extensions, otherwise fail the match. Should ref be treated as a |
707 | 725 | * regular extension? */ |
708 | | -xmlish_tag = nowiki / pre / ref / references / generic_tag |
| 726 | +xmlish_tag = nowiki / ref / references / generic_tag |
709 | 727 | |
710 | | -nowiki = "<nowiki>" ts:(t:[^<]+ { return t.join('') } / !"</nowiki>" .)+ { |
711 | | - // return nowiki tags as well? |
712 | | - return [{type: 'TEXT', value: ts.join('')}]; |
713 | | -} |
| 728 | +pre |
| 729 | + = "<pre" |
| 730 | + attribs:generic_attribute* |
| 731 | + ">" |
| 732 | + ts:(t:[^<]+ { return t.join('') } / !"</pre>" t2:. {return t2})+ |
| 733 | + ("</pre>" / eof) { |
| 734 | + // return nowiki tags as well? |
| 735 | + return [ {type: 'TAG', name: 'pre', attribs: attribs} |
| 736 | + , {type: 'TEXT', value: ts.join('')} |
| 737 | + , {type: 'ENDTAG', name: 'pre'} |
| 738 | + ]; |
| 739 | + } |
714 | 740 | |
| 741 | +nowiki |
| 742 | + = "<nowiki>" |
| 743 | + ts:(t:[^<]+ { return t.join('') } / !"</nowiki>" .)+ |
| 744 | + "</nowiki>" { |
| 745 | + // return nowiki tags as well? |
| 746 | + return [{type: 'TEXT', value: ts.join('')}]; |
| 747 | + } |
| 748 | + |
715 | 749 | // See http://dev.w3.org/html5/spec/Overview.html#syntax-tag-name and |
716 | 750 | // following paragraphs |
717 | 751 | generic_tag |
— | — | @@ -719,9 +753,9 @@ |
720 | 754 | selfclose:"/"? |
721 | 755 | ">" { |
722 | 756 | var res = {name: name.join(''), attribs: attribs}; |
723 | | - if ( end !== '' ) { |
| 757 | + if ( end != '' ) { |
724 | 758 | res.type = 'ENDTAG'; |
725 | | - } else if ( selfclose !== '' ) { |
| 759 | + } else if ( selfclose != '' ) { |
726 | 760 | res.type = 'SELFCLOSINGTAG'; |
727 | 761 | } else { |
728 | 762 | res.type = 'TAG'; |
— | — | @@ -862,26 +896,24 @@ |
863 | 897 | / "'" t:[^'>]+ "'" { return [null, unquote("'", t.join(''))]; } |
864 | 898 | / '"' t:[^">]+ '"' { return [null, unquote('"', t.join(''))]; } |
865 | 899 | |
866 | | -lists = es:(dtdd / li)+ |
| 900 | +lists = e:(dtdd / li) es:(sol (dtdd / li))* |
867 | 901 | { |
868 | 902 | return annotateList( [ { type: 'TAG', name: 'list'} ] |
869 | | - .concat(flatten(es) |
| 903 | + .concat(flatten([e].concat(es)) |
870 | 904 | ,[{ type: 'ENDTAG', name: 'list' }])); |
871 | 905 | } |
872 | 906 | |
873 | | -li = s:sol |
874 | | - bullets:list_char+ |
| 907 | +li = bullets:list_char+ |
875 | 908 | c:inlineline |
876 | 909 | &newline |
877 | 910 | { |
878 | | - return s.concat([ { type: 'TAG', |
| 911 | + return [ { type: 'TAG', |
879 | 912 | name: 'listItem', |
880 | 913 | bullets: bullets } |
881 | | - , c ]); |
| 914 | + , c ]; |
882 | 915 | } |
883 | 916 | |
884 | | -dtdd = s:sol |
885 | | - bullets:list_char+ |
| 917 | +dtdd = bullets:list_char+ |
886 | 918 | c:(inline_element / (n:[^:\n] { return {type: 'TEXT', value: n}; }))+ |
887 | 919 | ":" |
888 | 920 | d:(inline_element / (n:[^\n] { return {type: 'TEXT', value: n}; }))+ |
— | — | @@ -893,7 +925,7 @@ |
894 | 926 | } else { |
895 | 927 | var dtbullets = bullets.slice(0, bullets.length - 1); |
896 | 928 | dtbullets.push(':'); |
897 | | - return s.concat([ { type: 'TAG', name: 'listItem', bullets: bullets } ]) |
| 929 | + return [ { type: 'TAG', name: 'listItem', bullets: bullets } ] |
898 | 930 | .concat( c |
899 | 931 | ,[{ type: 'TAG', name: 'listItem', bullets: dtbullets } ] |
900 | 932 | , d ); |