Index: trunk/extensions/ParserPlayground/tests/parserTests.js |
— | — | @@ -58,7 +58,12 @@ |
59 | 59 | console.log(testFileName); |
60 | 60 | } |
61 | 61 | |
62 | | -var testParser = PEG.buildParser(fs.readFileSync('parserTests.pegjs', 'utf8')); |
| 62 | +try { |
| 63 | + var testParser = PEG.buildParser(fs.readFileSync('parserTests.pegjs', 'utf8')); |
| 64 | +} catch (e) { |
| 65 | + console.log(e); |
| 66 | +} |
| 67 | + |
63 | 68 | var testFile = fs.readFileSync(testFileName, 'utf8'); |
64 | 69 | |
65 | 70 | |
Index: trunk/extensions/ParserPlayground/modules/pegParser.pegjs.txt |
— | — | @@ -1,7 +1,50 @@ |
2 | 2 | /* Produces output more or less compatible with FakeParser; plug it into FP's output and see */ |
3 | 3 | |
4 | 4 | { |
| 5 | + var dp = function ( msg ) { |
| 6 | + if ( false ) { |
| 7 | + console.log(msg); |
| 8 | + } |
| 9 | + } |
| 10 | + |
5 | 11 | |
| 12 | + // Forbidden chars in anything content |
| 13 | + var forbiddenThingRegexp = { |
| 14 | + // key: regexp |
| 15 | + // value: nesting counter, regexp removed if zero reached |
| 16 | + kvs: {}, |
| 17 | + regexp: "", |
| 18 | + rebuild: function () { |
| 19 | + var keys = []; |
| 20 | + for (var key in this.kvs) { |
| 21 | + keys.push(key); |
| 22 | + } |
| 23 | + this.regexp = keys.join('|'); |
| 24 | + }, |
| 25 | + push: function (key) { |
| 26 | + if (this.kvs[key] !== undefined) { |
| 27 | + this.kvs[key]++; |
| 28 | + } else { |
| 29 | + this.kvs[key] = 1; |
| 30 | + this.rebuild(); |
| 31 | + } |
| 32 | + }, |
| 33 | + pop: function (key) { |
| 34 | + if (this.kvs[key] !== undefined) { |
| 35 | + if(this.kvs[key] == 1) { |
| 36 | + delete this.kvs[key]; |
| 37 | + this.rebuild(); |
| 38 | + } else { |
| 39 | + this.kvs[key]--; |
| 40 | + } |
| 41 | + } else { |
| 42 | + throw "Trying to pop non-existing forbiddenThingRegexp"; |
| 43 | + } |
| 44 | + } |
| 45 | + } |
| 46 | + |
| 47 | + |
| 48 | + |
6 | 49 | /* Temporary debugging help. Is there anything similar in JS or a library? */ |
7 | 50 | var print_r = function (arr, level) { |
8 | 51 | |
— | — | @@ -26,7 +69,7 @@ |
27 | 70 | dumped_text += level_padding + "[" + item + "] => "; |
28 | 71 | dumped_text += print_r(value, level + 2); |
29 | 72 | } else { |
30 | | - dumped_text += level_padding + "[" + item + "] => " + value + "\n"; |
| 73 | + dumped_text += level_padding + "[" + item + "] => '" + value + "'\n"; |
31 | 74 | } |
32 | 75 | |
33 | 76 | } |
— | — | @@ -50,18 +93,73 @@ |
51 | 94 | else |
52 | 95 | es.push(ei); |
53 | 96 | }); |
54 | | - //console.log(print_r(es, 10)); |
| 97 | + //dp(print_r(es)); |
55 | 98 | return { |
56 | 99 | type: 'page', |
57 | 100 | content: es |
58 | 101 | } |
59 | 102 | } |
60 | 103 | |
61 | | -anyblock = block / inline_element |
| 104 | +anyblock = block / inline |
| 105 | +anyblockline = block / inlineline |
62 | 106 | |
63 | 107 | anything |
64 | | - = text / [^\n] |
| 108 | + = text |
| 109 | + / s:.+ { |
| 110 | + // reject match if forbiddenThingRegexp matches |
| 111 | + var str = s.join(''); |
| 112 | + dp("anything: " +print_r(str)); |
| 113 | + if (forbiddenThingRegexp.regexp !== '') { |
| 114 | + var m = str.search(forbiddenThingRegexp.regexp) |
| 115 | + if ( m > 0 ) { |
| 116 | + dp("anything reverse " + (str.length - m) |
| 117 | + + ", matched: " + str.substr(0,m)); |
| 118 | + // reverse parser position |
| 119 | + pos -= str.length - m; |
| 120 | + return str.substr(0,m); |
| 121 | + } else { |
| 122 | + if (m == 0) { |
| 123 | + pos -= str.length; |
| 124 | + return null; |
| 125 | + } else { |
| 126 | + return str; |
| 127 | + } |
| 128 | + } |
| 129 | + } else { |
| 130 | + return str; |
| 131 | + } |
| 132 | + } |
65 | 133 | |
| 134 | +anyline |
| 135 | + = text |
| 136 | + / s:[^\n]+ { |
| 137 | + // reject match if forbiddenThingRegexp matches |
| 138 | + var str = s.join(''); |
| 139 | + dp("anyline: " + print_r(str) + ", pos:" + pos); |
| 140 | + if (forbiddenThingRegexp.regexp !== '') { |
| 141 | + var m = str.search(forbiddenThingRegexp.regexp) |
| 142 | + if ( m > 0 ) { |
| 143 | + // reverse parser position |
| 144 | + pos -= str.length - m; |
| 145 | + dp("anyline reverse " + (str.length - m) |
| 146 | + + ", matched: " + str.substr(0,m)); |
| 147 | + return str.substr(0,m); |
| 148 | + } else { |
| 149 | + if (m == 0) { |
| 150 | + pos -= str.length; |
| 151 | + return null; |
| 152 | + } else { |
| 153 | + return str; |
| 154 | + } |
| 155 | + } |
| 156 | + } else { |
| 157 | + return str; |
| 158 | + } |
| 159 | + } |
| 160 | + |
| 161 | + |
| 162 | + |
| 163 | + |
66 | 164 | // All chars that cannot start syntactic structures |
67 | 165 | text = t:[A-Za-z0-9,._ -]+ { return t.join('') } |
68 | 166 | |
— | — | @@ -132,14 +230,14 @@ |
133 | 231 | = '=' '='* |
134 | 232 | |
135 | 233 | heading_text |
136 | | - = h:( !heading_marker x:(anything) { return x } )* { return h.join(''); } |
| 234 | + = h:( !heading_marker x:(anyline) { return x } )* { return h.join(''); } |
137 | 235 | |
138 | 236 | br |
139 | 237 | = newline { return {type: 'br'} } |
140 | 238 | |
141 | 239 | para |
142 | | - = c:inline newline { return {type: 'para', content: c } } / |
143 | | - c:anything |
| 240 | + = c:inlineline newline { return {type: 'para', content: c } } / |
| 241 | + c:anyline |
144 | 242 | |
145 | 243 | inline |
146 | 244 | = c:(inline_element / anything)+ { |
— | — | @@ -168,6 +266,33 @@ |
169 | 267 | return out; |
170 | 268 | } |
171 | 269 | |
| 270 | +inlineline |
| 271 | + = c:(inline_element / anyline)+ { |
| 272 | + var out = []; |
| 273 | + var text = ''; |
| 274 | + for (var i = 0; i < c.length; i++) { |
| 275 | + if (typeof c[i] == 'string') { |
| 276 | + text += c[i]; |
| 277 | + } else { |
| 278 | + if (text.length) { |
| 279 | + out.push({ |
| 280 | + type: 'text', |
| 281 | + text: text |
| 282 | + }); |
| 283 | + text = ''; |
| 284 | + } |
| 285 | + out.push(c[i]); |
| 286 | + } |
| 287 | + } |
| 288 | + if (text.length) { |
| 289 | + out.push({ |
| 290 | + type: 'text', |
| 291 | + text: text |
| 292 | + }); |
| 293 | + } |
| 294 | + return out; |
| 295 | +} |
| 296 | + |
172 | 297 | inline_element |
173 | 298 | = comment |
174 | 299 | / xmlish_tag |
— | — | @@ -277,7 +402,7 @@ |
278 | 403 | = h:( !"]]" x:([^|]) { return x } )* { return h.join(''); } |
279 | 404 | |
280 | 405 | link_text |
281 | | - = h:( !"]]" x:(anything) { return x } )* { return h.join(''); } |
| 406 | + = h:( !"]]" x:(anyline) { return x } )* { return h.join(''); } |
282 | 407 | |
283 | 408 | bold |
284 | 409 | = bold_marker c:bold_text bold_marker { |
— | — | @@ -291,7 +416,7 @@ |
292 | 417 | = "'''" |
293 | 418 | |
294 | 419 | bold_text |
295 | | - = h:( !bold_marker x:(anything) { return x } )+ { return h.join(''); } |
| 420 | + = h:( !bold_marker x:(anyline) { return x } )+ { return h.join(''); } |
296 | 421 | |
297 | 422 | italic |
298 | 423 | = italic_marker c:italic_text italic_marker { |
— | — | @@ -305,7 +430,7 @@ |
306 | 431 | = "''" |
307 | 432 | |
308 | 433 | italic_text |
309 | | - = h:( !italic_marker x:(anything) { return x } )+ { return h.join(''); } |
| 434 | + = h:( !italic_marker x:(anyline) { return x } )+ { return h.join(''); } |
310 | 435 | |
311 | 436 | /* Will need to check anything xmlish agains known/allowed HTML tags and |
312 | 437 | * registered extensions, otherwise fail the match. Should ref be treated as a |
— | — | @@ -354,7 +479,7 @@ |
355 | 480 | } |
356 | 481 | |
357 | 482 | ref_content |
358 | | - = !ref_end a:(inline_element / anything) { |
| 483 | + = !ref_end a:(inline_element / anyline) { |
359 | 484 | return a; |
360 | 485 | } |
361 | 486 | |
— | — | @@ -398,7 +523,7 @@ |
399 | 524 | } |
400 | 525 | |
401 | 526 | references_content |
402 | | - = !references_end a:(inline_element / anything) { |
| 527 | + = !references_end a:(inline_element / anyline) { |
403 | 528 | return a; |
404 | 529 | } |
405 | 530 | |
— | — | @@ -481,12 +606,12 @@ |
482 | 607 | } |
483 | 608 | bstack = bnext; |
484 | 609 | }); |
485 | | - //console.log("out: " + print_r(out, 5)); |
| 610 | + //dp("out: " + print_r(out, 5)); |
486 | 611 | return out; |
487 | 612 | } |
488 | 613 | |
489 | 614 | li = bullets:list_char+ |
490 | | - c:(inline / anything) |
| 615 | + c:(inlineline / anyline) |
491 | 616 | newline |
492 | 617 | { |
493 | 618 | var type; |
— | — | @@ -507,7 +632,7 @@ |
508 | 633 | dtdd = bullets:list_char+ |
509 | 634 | c:(inline_element / [^:\n])+ |
510 | 635 | ":" |
511 | | - d:(inline / anything) |
| 636 | + d:(inline / anyline) |
512 | 637 | newline |
513 | 638 | { |
514 | 639 | // reject rule if bullets do not end in semicolon |
— | — | @@ -551,16 +676,23 @@ |
552 | 677 | // FIXME: actually parse and build structure |
553 | 678 | res.attributes = { unparsed: tas } |
554 | 679 | } |
555 | | - console.log(print_r(res)); |
| 680 | + dp(print_r(res)); |
556 | 681 | return res; |
557 | 682 | } |
558 | 683 | |
559 | | -table_start = "{|" ta:table_attribs* space* newline? { return ta } |
| 684 | +table_start |
| 685 | + = "{|" |
| 686 | + & { forbiddenThingRegexp.push('(\n|^)\\||\\|[|\x7d+]'); return true; } |
| 687 | + ta:table_attribs* space* newline? { |
| 688 | + dp("table_start " + print_r(ta) + ", pos:" + pos); |
| 689 | + return ta; |
| 690 | + } |
| 691 | + / "{|" { forbiddenThingRegexp.pop('(\n|^)\\||\\|[|\x7d+]'); return null; } |
560 | 692 | |
561 | | -table_attribs = anything |
| 693 | +table_attribs = anyline |
562 | 694 | |
563 | 695 | table_caption |
564 | | - = "|+" c:(inline_element / anything / [^\n])* newline? { |
| 696 | + = "|+" c:inline* newline? { |
565 | 697 | return { |
566 | 698 | type: 'tableCaption', |
567 | 699 | content: c |
— | — | @@ -568,13 +700,14 @@ |
569 | 701 | } |
570 | 702 | |
571 | 703 | table_body |
572 | | - = firstrow:table_firstrow otherrows:table_row* { |
573 | | - /* console.log('table first and otherrows: ' |
| 704 | + = & { dp("table_body enter"); return true; } |
| 705 | + firstrow:table_firstrow otherrows:table_row* { |
| 706 | + /* dp('table first and otherrows: ' |
574 | 707 | * + print_r([firstrow].concat(otherrows))); */ |
575 | 708 | return [firstrow].concat(otherrows); |
576 | 709 | } |
577 | 710 | / otherrows:table_row* { |
578 | | - //console.log('table otherrows: ' + print_r(otherrows)); |
| 711 | + //dp('table otherrows: ' + print_r(otherrows)); |
579 | 712 | return otherrows; |
580 | 713 | } |
581 | 714 | |
— | — | @@ -600,43 +733,43 @@ |
601 | 734 | * starting with a pipe might be used. Checking this requires access to the |
602 | 735 | * matched source string. */ |
603 | 736 | table_data |
604 | | - = t:table { |
605 | | - //console.log("recursive table result:" + print_r(t)); |
606 | | - return { |
607 | | - type: 'tableCell', |
608 | | - content: [t] |
609 | | - } |
610 | | - } |
611 | | - / "||" td_attr? td:[^|]* newline? { |
| 737 | + = & { dp("table_data enter, pos=" + pos); return true; } |
| 738 | + "||" td_attr? td:anyblock* newline? { |
| 739 | + //dp("table || result:" + print_r(td)); |
612 | 740 | return { |
613 | 741 | type: 'tableCell', |
614 | | - content: td.join('') |
| 742 | + content: td |
615 | 743 | }; |
616 | 744 | } |
617 | | - / "|" ![}+-] td_attr? td:[^|\n]* newline? { |
| 745 | + / & { dp("table_data : | enter pos=" + pos); return true; } |
| 746 | + "|" ![}+-] td_attr? td:anyblock* newline? { |
| 747 | + //dp("table | result:" + print_r(td)); |
618 | 748 | return { |
619 | 749 | type: 'tableCell', |
620 | | - content: td.join('') |
| 750 | + content: td |
621 | 751 | }; |
622 | 752 | } |
623 | 753 | |
624 | | -td_attr = a:[^\n|]+ "|" !"|" { return a.join('') } |
| 754 | +td_attr = a:[^\n|]+ "|" !"|" { |
| 755 | + dp("td_attr: " + a.join('')); |
| 756 | + return a.join(''); |
| 757 | +} |
625 | 758 | |
626 | 759 | table_header |
627 | | - = "!!" c:(block/ inline_element / text / ("!" !"!") / [^!\n])* newline? { |
| 760 | + = "!!" c:(inline / ("!" !"!") / [^!\n])* newline? { |
628 | 761 | return { |
629 | 762 | type: 'tableHeader', |
630 | 763 | content: c |
631 | 764 | } |
632 | 765 | } |
633 | | - / "!" c:(block/ inline_element / text / '!' !'!' / [^!\n])* newline? { |
| 766 | + / "!" c:(inline / text / '!' !'!' / [^!\n])* newline? { |
634 | 767 | return { |
635 | 768 | type: 'tableHeader', |
636 | 769 | content: c |
637 | 770 | } |
638 | 771 | } |
639 | 772 | |
640 | | -table_end = "|}" newline? |
| 773 | +table_end = "|}" newline? { forbiddenThingRegexp.pop('(\n|^)\\||\\|[|\x7d+]'); } |
641 | 774 | |
642 | 775 | |
643 | 776 | /* Wikidom TODO: |