Index: trunk/extensions/VisualEditor/modules/parser/pegParser.pegjs.txt |
— | — | @@ -1,11 +1,47 @@ |
2 | 2 | /* Produces output more or less compatible with FakeParser; plug it into FP's output and see */ |
3 | 3 | { |
| 4 | + /* Fixme: use static functions to separate module! Unfortunately, this |
| 5 | + * does not work: |
| 6 | + * var tu = require('./mediawiki.tokenizer.utils.js'); |
| 7 | + * console.log(tu.flatten([])); |
| 8 | + * Using exports in the module gets a bit further, but accesses to |
| 9 | + * tu.flatten in productions still fail. Thus, I just moved the functions |
| 10 | + * here until a solution is found: |
| 11 | + */ |
| 12 | + |
| 13 | + /* Static utilities */ |
| 14 | + |
| 15 | + // Flatten a list of lists. |
| 16 | + var flatten = function ( e ) { |
| 17 | + var es = []; |
| 18 | + // flatten sub-arrays |
| 19 | + for(var i = 0, length = e.length; i < length; i++) { |
| 20 | + var ei = e[i]; |
| 21 | + if ($.isArray(ei)) |
| 22 | + es = es.concat(flatten(ei)); |
| 23 | + else |
| 24 | + es.push(ei); |
| 25 | + }; |
| 26 | + return es; |
| 27 | + }; |
| 28 | + |
| 29 | + // Remove escaped quotes from attributes etc |
| 30 | + var unquote = function (quotec, text) { |
| 31 | + return text.replace('\\' + quotec, quotec); |
| 32 | + }; |
| 33 | + |
| 34 | + |
| 35 | + // Debug print with global switch |
4 | 36 | var dp = function ( msg ) { |
5 | 37 | if ( false ) { |
6 | 38 | console.log(msg); |
7 | 39 | } |
8 | 40 | }; |
9 | 41 | |
| 42 | + var pp = function ( s ) { return JSON.stringify(s, null, 2); } |
| 43 | + |
| 44 | + /* End static utilities */ |
| 45 | + |
10 | 46 | /* |
11 | 47 | * Flags for specific parse environments (inside tables, links etc). Flags |
12 | 48 | * trigger syntactic stops in the inline_breaks production, which |
— | — | @@ -27,89 +63,9 @@ |
28 | 64 | syntaxFlags[flag]--; |
29 | 65 | }; |
30 | 66 | |
31 | | - |
32 | | - var pp = function ( s ) { return JSON.stringify(s, null, 2); } |
33 | | - |
34 | | - // Convert list prefixes to a list of WikiDom list styles |
35 | | - var bulletsToTypes = function (bullets) { |
36 | | - var bTypes = []; |
37 | | - var blen = bullets.length; |
38 | | - for (var i = 0; i < bullets.length; i++) { |
39 | | - switch (bullets[i]) { |
40 | | - case '*': |
41 | | - bTypes.push('bullet'); break; |
42 | | - case '#': |
43 | | - bTypes.push('number'); break; |
44 | | - case ';': |
45 | | - bTypes.push('term'); break; |
46 | | - case ':': |
47 | | - bTypes.push('description'); break; |
48 | | - } |
49 | | - } |
50 | | - return bTypes; |
51 | | - }; |
52 | | - |
53 | | - /*var extractInline = function ( node ) { |
54 | | - return { text: extractText(node, 0) }; |
55 | | - }; |
56 | | - |
57 | | - |
58 | | - // return [text [annotations]] |
59 | | - var extractText = function ( node, offset ) { |
60 | | - dp("extract: " + pp(node)); |
61 | | - if (typeof node === 'string') { |
62 | | - return [node, []]; |
63 | | - } else if ($.isArray(node)) { |
64 | | - var texts = [], |
65 | | - annotations = []; |
66 | | - for (var i = 0, length = node.length; i < length; i++) { |
67 | | - var res = extractText(node[i], offset); |
68 | | - texts.push(res[0]); |
69 | | - annotations.concat(res[1]); |
70 | | - offset += res[0].length; |
71 | | - } |
72 | | - return [texts.join(''), annotations]; |
73 | | - } else if ( 'text' in node ) { |
74 | | - var res = extractText(node, offset); |
75 | | - if ('annotations' in node) { |
76 | | - return [res[0], node.annotations.concat(res[1])]; |
77 | | - } else { |
78 | | - return res; |
79 | | - } |
80 | | - } else if ( 'content' in node ) { |
81 | | - return extractText(node.content, offset); |
82 | | - } else if ( 'children' in node ) { |
83 | | - var texts = []; |
84 | | - for (var i = 0, length = node.children.length; i < length; i++) { |
85 | | - texts.push(extractText(node.children[i])); |
86 | | - } |
87 | | - return texts.join(''); |
88 | | - } else { |
89 | | - throw ("extract failed: " + pp(node)); |
90 | | - } |
91 | | - }; |
92 | | - */ |
93 | | - |
94 | 67 | // Start position of top-level block |
95 | 68 | // Could also provide positions for lower-level blocks using a stack. |
96 | 69 | var blockStart = 0; |
97 | | - |
98 | | - var unquote = function (quotec, text) { |
99 | | - return text.replace('\\' + quotec, quotec); |
100 | | - }; |
101 | | - |
102 | | - var flatten = function ( e ) { |
103 | | - var es = []; |
104 | | - // flatten sub-arrays |
105 | | - for(var i = 0, length = e.length; i < length; i++) { |
106 | | - var ei = e[i]; |
107 | | - if ($.isArray(ei)) |
108 | | - es = es.concat(flatten(ei)); |
109 | | - else |
110 | | - es.push(ei); |
111 | | - }; |
112 | | - return es; |
113 | | - }; |
114 | 70 | } |
115 | 71 | |
116 | 72 | start |
— | — | @@ -455,26 +411,33 @@ |
456 | 412 | |
457 | 413 | link_end = "]]" |
458 | 414 | |
| 415 | +/* This implementation of bold and italic is very basic so far, and misses the |
| 416 | + * finer points of doQuotes in the parser. A rough plan to get closer: |
| 417 | + * - '''' -> ' ''' |
| 418 | + * - last ''''' in a row of ' is used |
| 419 | + * - if *both* italics and bolds are unbalanced, check for prefix |
| 420 | + * - convert single-letter or multi-letter non-space prefixed tick back to |
| 421 | + * text |
| 422 | + */ |
459 | 423 | bold |
460 | 424 | = bold_marker |
461 | 425 | & { dp('benter:' + pos); return setFlag('bold'); } |
462 | 426 | c:inlineline |
463 | | - bold_marker { |
| 427 | + (bold_marker / &newline) { |
464 | 428 | clearFlag('bold'); |
465 | 429 | return [{ type: 'TAG', name: 'b' }] |
466 | 430 | .concat(c, [{type: 'ENDTAG', name: 'b'}]); |
467 | 431 | } |
468 | 432 | / bold_marker { clearFlag('bold'); return null } |
469 | 433 | |
470 | | -bold_marker |
471 | | - = "'''" |
| 434 | +bold_marker = "'''" |
472 | 435 | |
473 | 436 | |
474 | 437 | italic |
475 | 438 | = italic_marker |
476 | 439 | & { dp('ienter:' + pos); return setFlag('italic'); } |
477 | 440 | c:inlineline |
478 | | - italic_marker { |
| 441 | + (italic_marker / &newline) { |
479 | 442 | clearFlag('italic'); |
480 | 443 | dp('ileave:' + pos); |
481 | 444 | return [{ type: 'TAG', name: 'i' }] |
— | — | @@ -482,8 +445,7 @@ |
483 | 446 | } |
484 | 447 | / italic_marker { clearFlag('italic'); return null } |
485 | 448 | |
486 | | -italic_marker |
487 | | - = "''" |
| 449 | +italic_marker = "''" |
488 | 450 | |
489 | 451 | /* Will need to check anything xmlish agains known/allowed HTML tags and |
490 | 452 | * registered extensions, otherwise fail the match. Should ref be treated as a |