Index: trunk/extensions/VisualEditor/modules/es/serializers/es.HtmlSerializer.js |
— | — | @@ -69,9 +69,15 @@ |
70 | 70 | }; |
71 | 71 | |
72 | 72 | es.HtmlSerializer.prototype.comment = function( node ) { |
73 | | - return '<!--' + node.text + '-->'; |
| 73 | + return '<!--(' + node.text + ')-->'; |
74 | 74 | }; |
75 | 75 | |
| 76 | +es.HtmlSerializer.prototype.pre = function( node ) { |
| 77 | + return es.Html.makeTag( |
| 78 | + 'pre', {}, this.document( node, true ) |
| 79 | + ); |
| 80 | +}; |
| 81 | + |
76 | 82 | es.HtmlSerializer.prototype.horizontalRule = function( node ) { |
77 | 83 | return es.Html.makeTag( 'hr', {}, false ); |
78 | 84 | }; |
Index: trunk/extensions/VisualEditor/modules/parser/pegParser.pegjs.txt |
— | — | @@ -6,45 +6,30 @@ |
7 | 7 | console.log(msg); |
8 | 8 | } |
9 | 9 | } |
10 | | - |
11 | 10 | |
12 | | - // Forbidden chars in anything content |
13 | | - var forbiddenThingRegexp = { |
14 | | - // key: regexp |
15 | | - // value: nesting counter, regexp removed if zero reached |
16 | | - kvs: {}, |
17 | | - regexp: "", |
18 | | - rebuild: function () { |
19 | | - var keys = []; |
20 | | - for (var key in this.kvs) { |
21 | | - keys.push(key); |
22 | | - } |
23 | | - this.regexp = keys.join('|'); |
24 | | - }, |
25 | | - push: function (key) { |
26 | | - if (this.kvs[key] !== undefined) { |
27 | | - this.kvs[key]++; |
28 | | - } else { |
29 | | - this.kvs[key] = 1; |
30 | | - this.rebuild(); |
31 | | - } |
32 | | - }, |
33 | | - pop: function (key) { |
34 | | - if (this.kvs[key] !== undefined) { |
35 | | - if(this.kvs[key] == 1) { |
36 | | - delete this.kvs[key]; |
37 | | - this.rebuild(); |
38 | | - } else { |
39 | | - this.kvs[key]--; |
40 | | - } |
41 | | - } else { |
42 | | - throw "Trying to pop non-existing forbiddenThingRegexp"; |
43 | | - } |
| 11 | + /* |
| 12 | + * Flags for specific parse environments (inside tables, links etc). Flags |
| 13 | + * trigger syntactic stops in the inline_breaks production, which |
| 14 | + * terminates inline and attribute matches. Flags merely reduce the number |
| 15 | + * of productions needed: The grammar is still context-free as the |
| 16 | + * productions can just be unrolled for all combinations of environments |
| 17 | + * at the cost of a much larger grammar. |
| 18 | + */ |
| 19 | + var syntaxFlags = {}; |
| 20 | + var setFlag = function(flag) { |
| 21 | + if (syntaxFlags[flag] !== undefined) { |
| 22 | + syntaxFlags[flag]++; |
| 23 | + } else { |
| 24 | + syntaxFlags[flag] = 1; |
44 | 25 | } |
| 26 | + return true; |
45 | 27 | } |
46 | | - |
| 28 | + var clearFlag = function(flag) { |
| 29 | + syntaxFlags[flag]--; |
| 30 | + } |
47 | 31 | |
48 | 32 | |
| 33 | + |
49 | 34 | /* Temporary debugging help. Is there anything similar in JS or a library? */ |
50 | 35 | var print_r = function (arr, level) { |
51 | 36 | |
— | — | @@ -106,13 +91,13 @@ |
107 | 92 | = e:block* { |
108 | 93 | var es = []; |
109 | 94 | // flatten sub-arrays, as a list block can contain multiple lists |
110 | | - $.each(e, function(i, ei) { |
111 | | - if (ei.constructor == Array) |
| 95 | + for(var i = 0, length = e.length; i < length; i++) { |
| 96 | + var ei = e[i]; |
| 97 | + if ($.isArray(ei)) |
112 | 98 | es = es.concat(ei); |
113 | 99 | else |
114 | 100 | es.push(ei); |
115 | | - }); |
116 | | - dp(es); |
| 101 | + }; |
117 | 102 | return { |
118 | 103 | type: 'page', |
119 | 104 | children: es |
— | — | @@ -122,126 +107,88 @@ |
123 | 108 | anyblock = block / inline |
124 | 109 | anyblockline = block / inlineline |
125 | 110 | |
126 | | -anything |
127 | | - = text |
128 | | - / s:.+ { |
129 | | - // reject match if forbiddenThingRegexp matches |
130 | | - var str = s.join(''); |
131 | | - dp("anything: " +print_r(str)); |
132 | | - if (forbiddenThingRegexp.regexp !== '') { |
133 | | - var m = str.search(forbiddenThingRegexp.regexp) |
134 | | - if ( m > 0 ) { |
135 | | - dp("anything reverse " + (str.length - m) |
136 | | - + ", matched: " + str.substr(0,m)); |
137 | | - // reverse parser position |
138 | | - pos -= str.length - m; |
139 | | - return {text: str.substr(0,m)}; |
140 | | - } else { |
141 | | - if (m == 0) { |
142 | | - pos -= str.length; |
143 | | - return null; |
144 | | - } else { |
145 | | - return {text: str}; |
146 | | - } |
147 | | - } |
148 | | - } else { |
149 | | - return {text: str}; |
150 | | - } |
151 | | - } |
152 | 111 | |
153 | | -anyline |
154 | | - = text |
155 | | - / s:[^\n]+ { |
156 | | - // reject match if forbiddenThingRegexp matches |
157 | | - var str = s.join(''); |
158 | | - dp("anyline: " + print_r(str) + ", pos:" + pos); |
159 | | - if (forbiddenThingRegexp.regexp !== '') { |
160 | | - var m = str.search(forbiddenThingRegexp.regexp) |
161 | | - if ( m > 0 ) { |
162 | | - // reverse parser position |
163 | | - pos -= str.length - m; |
164 | | - dp("anyline reverse " + (str.length - m) |
165 | | - + ", matched: " + str.substr(0,m)); |
166 | | - return {text: str.substr(0,m)}; |
167 | | - } else { |
168 | | - if (m == 0) { |
169 | | - pos -= str.length; |
170 | | - return null; |
171 | | - } else { |
172 | | - return {text : str}; |
173 | | - } |
174 | | - } |
175 | | - } else { |
176 | | - return {text: str}; |
177 | | - } |
178 | | - } |
179 | | - |
180 | | - |
181 | | - |
182 | | - |
183 | 112 | // All chars that cannot start syntactic structures |
184 | 113 | text = t:[A-Za-z0-9,._ -]+ { return t.join('') } |
185 | 114 | |
186 | 115 | space |
187 | | - = s:[ ]+ { return s.join(''); } |
| 116 | + = s:[ \t]+ { return s.join(''); } |
188 | 117 | |
| 118 | + |
| 119 | +// Start of line |
| 120 | +sol = (newline / & { return pos === 0; } { return true; }) comment? |
| 121 | + |
189 | 122 | newline |
190 | | - = [\n] |
| 123 | + = '\n' / '\r\n' |
191 | 124 | |
192 | 125 | block |
193 | | - = br |
194 | | - / h |
| 126 | + = block_lines |
| 127 | + / para |
| 128 | + / br |
| 129 | + / newline &newline |
| 130 | + / comment |
| 131 | + |
| 132 | +block_lines |
| 133 | + = h |
195 | 134 | / table |
196 | 135 | / lists |
197 | | - / para |
| 136 | + / pre_indent |
198 | 137 | |
| 138 | + |
| 139 | +/* Headings */ |
199 | 140 | h = h1 / h2 / h3 / h4 / h5 / h6 |
200 | 141 | |
201 | | -h1 = '=' c:heading_text '=' newline { |
| 142 | +h1 = sol |
| 143 | + '=' c:heading_text '=' &newline { |
202 | 144 | return { |
203 | 145 | type: 'heading', |
204 | | - level: 1, |
| 146 | + attributes: {level: 1}, |
205 | 147 | text: c |
206 | 148 | } |
207 | 149 | } |
208 | 150 | |
209 | | -h2 = '==' c:heading_text '==' newline { |
| 151 | +h2 = sol |
| 152 | + '==' c:heading_text '==' &newline { |
210 | 153 | return { |
211 | 154 | type: 'heading', |
212 | | - level: 2, |
213 | | - text: c |
| 155 | + attributes: {level: 2}, |
| 156 | + content: c |
214 | 157 | } |
215 | 158 | } |
216 | 159 | |
217 | | -h3 = '===' c:heading_text '===' newline { |
| 160 | +h3 = sol |
| 161 | + '===' c:heading_text '===' &newline { |
218 | 162 | return { |
219 | 163 | type: 'heading', |
220 | | - level: 3, |
221 | | - text: c |
| 164 | + attributes: {level: 3}, |
| 165 | + content: c |
222 | 166 | } |
223 | 167 | } |
224 | 168 | |
225 | | -h4 = '====' c:heading_text '====' newline { |
| 169 | +h4 = sol |
| 170 | + '====' c:heading_text '====' &newline { |
226 | 171 | return { |
227 | 172 | type: 'heading', |
228 | | - level: 4, |
229 | | - text: c |
| 173 | + attributes: {level: 4}, |
| 174 | + content: c |
230 | 175 | } |
231 | 176 | } |
232 | 177 | |
233 | | -h5 = '=====' c:heading_text '=====' newline { |
| 178 | +h5 = sol |
| 179 | + '=====' c:heading_text '=====' &newline { |
234 | 180 | return { |
235 | 181 | type: 'heading', |
236 | | - level: 5, |
237 | | - text: c |
| 182 | + attributes: {level: 5}, |
| 183 | + content: c |
238 | 184 | } |
239 | 185 | } |
240 | 186 | |
241 | | -h6 = '======' c:heading_text '======' newline { |
| 187 | +h6 = sol |
| 188 | + '======' c:heading_text '======' &newline { |
242 | 189 | return { |
243 | 190 | type: 'heading', |
244 | | - level: 6, |
245 | | - text: c |
| 191 | + attributes: {level: 6}, |
| 192 | + content: c |
246 | 193 | } |
247 | 194 | } |
248 | 195 | |
— | — | @@ -249,18 +196,36 @@ |
250 | 197 | = '=' '='* |
251 | 198 | |
252 | 199 | heading_text |
253 | | - = h:( !heading_marker x:(anyline) { return x } )* { return h.join(''); } |
| 200 | + = h:( !heading_marker x:(inlineline) { return x } )* { return h.join(''); } |
254 | 201 | |
255 | 202 | br |
256 | | - = newline { return {type: 'br'} } |
| 203 | + = newline !newline { return {type: 'br'} } |
257 | 204 | |
258 | 205 | // TODO: convert inline content to annotations! |
259 | 206 | para |
260 | | - = c:inlineline newline { return {type: 'paragraph', content: c[0] } } / |
261 | | - c:anyline |
| 207 | + = sol c:inlineline cs:(!block_lines para)* { return {type: 'paragraph', content: c[0] } } |
262 | 208 | |
| 209 | +pre_indent |
| 210 | + = l:pre_indent_line+ { |
| 211 | + return { |
| 212 | + type: 'pre', |
| 213 | + children: l |
| 214 | + } |
| 215 | + } |
| 216 | +pre_indent_line = sol space l:inlineline { return l } |
| 217 | + |
| 218 | +// Syntax that stops inline expansion |
| 219 | +inline_breaks |
| 220 | + = //& { console.log(print_r(syntaxFlags)); return true; } |
| 221 | + & { return syntaxFlags['table']; } |
| 222 | + a:(newline [!|] / '||' / '!!' / '|}') { dp("table break" + print_r(a)); return true; } |
| 223 | + / & { return syntaxFlags['italic']; } italic_marker { return true; } |
| 224 | + / & { return syntaxFlags['bold']; } bold_marker { return true; } |
| 225 | + / & { return syntaxFlags['linkdesc']; } link_end { return true; } |
| 226 | + |
| 227 | + |
263 | 228 | inline |
264 | | - = c:(inline_element / anything)+ { |
| 229 | + = c:(text / inline_element / (!inline_breaks ch:. { return ch; }))+ { |
265 | 230 | var out = []; |
266 | 231 | var text = ''; |
267 | 232 | for (var i = 0; i < c.length; i++) { |
— | — | @@ -287,9 +252,10 @@ |
288 | 253 | } |
289 | 254 | |
290 | 255 | inlineline |
291 | | - = c:(inline_element / anyline)+ { |
| 256 | + = c:(text / !inline_breaks (inline_element / [^\n]))+ { |
292 | 257 | var out = []; |
293 | 258 | var text = ''; |
| 259 | + dp("inlineline: " + print_r(c)); |
294 | 260 | for (var i = 0; i < c.length; i++) { |
295 | 261 | if (typeof c[i] == 'string') { |
296 | 262 | text += c[i]; |
— | — | @@ -313,7 +279,9 @@ |
314 | 280 | return out; |
315 | 281 | } |
316 | 282 | |
317 | | -// TODO: convert all these to annotations! |
| 283 | +/* TODO: convert all these to annotations! |
| 284 | + * -> need (start, end) offsets within block |
| 285 | + */ |
318 | 286 | inline_element |
319 | 287 | = comment |
320 | 288 | / xmlish_tag |
— | — | @@ -333,11 +301,8 @@ |
334 | 302 | |
335 | 303 | comment_chars |
336 | 304 | = c:[^-] { return c; } |
337 | | - / c:'-' !'-' { return c; } |
| 305 | + / c:'-' !'->' { return c; } |
338 | 306 | |
339 | | -inline_text_run |
340 | | - = c:[^\n]+ { return c.join(''); } |
341 | | - |
342 | 307 | extlink |
343 | 308 | = "[" target:url " " text:extlink_text "]" { |
344 | 309 | return { |
— | — | @@ -423,36 +388,50 @@ |
424 | 389 | = h:( !"]]" x:([^|]) { return x } )* { return h.join(''); } |
425 | 390 | |
426 | 391 | link_text |
427 | | - = h:( !"]]" x:(anyline) { return x } )* { return h.join(''); } |
| 392 | + = h:( & { return setFlag('linkdesc'); } |
| 393 | + x:inlineline { return x } |
| 394 | + )* { |
| 395 | + clearFlag('linkdesc') |
| 396 | + return h.join(''); |
| 397 | + } |
| 398 | + / & { clearFlag('linkdesc') } { return null; } |
428 | 399 | |
| 400 | +link_end = "]]" |
| 401 | + |
429 | 402 | bold |
430 | | - = bold_marker c:bold_text bold_marker { |
431 | | - return { |
432 | | - type: 'b', |
433 | | - text: c, |
434 | | - } |
435 | | -} |
| 403 | + = bold_marker |
| 404 | + & { dp('benter:' + pos); return setFlag('bold'); } |
| 405 | + c:inlineline |
| 406 | + bold_marker { |
| 407 | + clearFlag('bold'); |
| 408 | + return { |
| 409 | + type: 'b', |
| 410 | + text: c, |
| 411 | + } |
| 412 | + } |
| 413 | + / bold_marker { clearFlag('bold'); return null } |
436 | 414 | |
437 | 415 | bold_marker |
438 | 416 | = "'''" |
439 | 417 | |
440 | | -bold_text |
441 | | - = h:( !bold_marker x:(anyline) { return x } )+ { return h.join(''); } |
442 | 418 | |
443 | 419 | italic |
444 | | - = italic_marker c:italic_text italic_marker { |
445 | | - return { |
446 | | - type: 'i', |
447 | | - text: c |
448 | | - } |
449 | | -} |
| 420 | + = italic_marker |
| 421 | + & { dp('ienter:' + pos); return setFlag('italic'); } |
| 422 | + c:inlineline |
| 423 | + italic_marker { |
| 424 | + clearFlag('italic'); |
| 425 | + dp('ileave:' + pos); |
| 426 | + return { |
| 427 | + type: 'i', |
| 428 | + text: c |
| 429 | + } |
| 430 | + } |
| 431 | + / italic_marker { clearFlag('italic'); return null } |
450 | 432 | |
451 | 433 | italic_marker |
452 | 434 | = "''" |
453 | 435 | |
454 | | -italic_text |
455 | | - = h:( !italic_marker x:(anyline) { return x } )+ { return h.join(''); } |
456 | | - |
457 | 436 | /* Will need to check anything xmlish agains known/allowed HTML tags and |
458 | 437 | * registered extensions, otherwise fail the match. Should ref be treated as a |
459 | 438 | * regular extension? */ |
— | — | @@ -500,7 +479,7 @@ |
501 | 480 | } |
502 | 481 | |
503 | 482 | ref_content |
504 | | - = !ref_end a:(inline_element / anyline) { |
| 483 | + = !ref_end a:(inline) { |
505 | 484 | return a; |
506 | 485 | } |
507 | 486 | |
— | — | @@ -544,7 +523,7 @@ |
545 | 524 | } |
546 | 525 | |
547 | 526 | references_content |
548 | | - = !references_end a:(inline_element / anyline) { |
| 527 | + = !references_end a:(inline) { |
549 | 528 | return a; |
550 | 529 | } |
551 | 530 | |
— | — | @@ -584,9 +563,10 @@ |
585 | 564 | } |
586 | 565 | } |
587 | 566 | |
588 | | -li = bullets:list_char+ |
589 | | - c:(inlineline / anyline) |
590 | | - newline |
| 567 | +li = sol |
| 568 | + bullets:list_char+ |
| 569 | + c:inlineline |
| 570 | + &newline |
591 | 571 | { |
592 | 572 | return { |
593 | 573 | type: 'listItem', |
— | — | @@ -597,11 +577,12 @@ |
598 | 578 | }; |
599 | 579 | } |
600 | 580 | |
601 | | -dtdd = bullets:list_char+ |
| 581 | +dtdd = sol |
| 582 | + bullets:list_char+ |
602 | 583 | c:(inline_element / [^:\n])+ |
603 | 584 | ":" |
604 | 585 | d:(inline_element / [^\n])+ |
605 | | - newline |
| 586 | + &newline |
606 | 587 | { |
607 | 588 | // reject rule if bullets do not end in semicolon |
608 | 589 | if (bullets[bullets.length - 1] != ';') { |
— | — | @@ -641,23 +622,27 @@ |
642 | 623 | // FIXME: actually parse and build structure |
643 | 624 | res.attributes = { unparsed: tas } |
644 | 625 | } |
645 | | - dp(print_r(res)); |
| 626 | + //dp(print_r(res)); |
646 | 627 | return res; |
647 | 628 | } |
648 | 629 | |
649 | 630 | table_start |
650 | | - = "{|" |
651 | | - & { forbiddenThingRegexp.push('(\n|^)\\||\\|[|\x7d+]'); return true; } |
652 | | - ta:table_attribs* space* newline? { |
653 | | - dp("table_start " + print_r(ta) + ", pos:" + pos); |
| 631 | + = sol |
| 632 | + "{|" |
| 633 | + & { setFlag('table'); return true; } |
| 634 | + ta:table_attribs* |
| 635 | + space* { |
| 636 | + //dp("table_start " + print_r(ta) + ", pos:" + pos); |
654 | 637 | return ta; |
655 | 638 | } |
656 | | - / "{|" { forbiddenThingRegexp.pop('(\n|^)\\||\\|[|\x7d+]'); return null; } |
| 639 | + / sol "{|" { clearFlag('table'); return null; } |
657 | 640 | |
658 | | -table_attribs = anyline |
| 641 | +table_attribs |
| 642 | + = text / ! inline_breaks !newline . |
659 | 643 | |
660 | 644 | table_caption |
661 | | - = "|+" c:inline* newline? { |
| 645 | + = newline |
| 646 | + "|+" c:inline* { |
662 | 647 | return { |
663 | 648 | type: 'tableCaption', |
664 | 649 | content: c[0] |
— | — | @@ -685,7 +670,9 @@ |
686 | 671 | } |
687 | 672 | |
688 | 673 | table_row |
689 | | - = "|-" space* newline? td:(table_data / table_header)* { |
| 674 | + = & { dp("table row enter"); return true; } |
| 675 | + newline |
| 676 | + "|-" thtd_attribs? space* td:(table_data / table_header)* { |
690 | 677 | return { |
691 | 678 | type: 'tableRow', |
692 | 679 | children: td |
— | — | @@ -694,44 +681,39 @@ |
695 | 682 | |
696 | 683 | table_data |
697 | 684 | = & { dp("table_data enter, pos=" + pos); return true; } |
698 | | - "||" td_attr? td:anyblock* newline? { |
699 | | - //dp("table || result:" + print_r(td)); |
| 685 | + ("||" / newline "|") |
| 686 | + ! [}+-] |
| 687 | + a:thtd_attribs? |
| 688 | + td:(!inline_breaks anyblock)* { |
| 689 | + dp("table data result: " + print_r(td) + ", attribts: " + print_r(a)); |
700 | 690 | return { |
701 | 691 | type: 'tableCell', |
| 692 | + attributes: { unparsed: a }, |
702 | 693 | children: td |
703 | 694 | }; |
704 | 695 | } |
705 | | - / & { dp("table_data : | enter pos=" + pos); return true; } |
706 | | - "|" ![}+-] td_attr? td:anyblock* newline? { |
707 | | - //dp("table | result:" + print_r(td)); |
708 | | - return { |
709 | | - type: 'tableCell', |
710 | | - children: td |
711 | | - }; |
712 | | - } |
713 | 696 | |
714 | | -td_attr = a:[^\n|]+ "|" !"|" { |
715 | | - dp("td_attr: " + a.join('')); |
716 | | - return a.join(''); |
717 | | -} |
| 697 | +table_header |
| 698 | + = ("!!" / newline "!") |
| 699 | + a:thtd_attribs? |
| 700 | + c:inline { |
| 701 | + return { |
| 702 | + type: 'tableHeading', |
| 703 | + attributes: { unparsed: a }, |
| 704 | + children: c |
| 705 | + } |
| 706 | + } |
718 | 707 | |
719 | | -table_header |
720 | | - = "!!" c:(inline / ("!" !"!") / [^!\n])* newline? { |
721 | | - return { |
722 | | - type: 'tableHeading', |
723 | | - children: c |
724 | | - } |
725 | | - } |
726 | | - / "!" c:(inline / text / '!' !'!' / [^!\n])* newline? { |
727 | | - return { |
728 | | - type: 'tableHeading', |
729 | | - children: c |
730 | | - } |
731 | | - } |
| 708 | +thtd_attribs |
| 709 | + // In particular, do not match [|\n] |
| 710 | + = a:(text / ! inline_breaks [="':;/,.-] )+ "|" ! [|}+-] { |
| 711 | + return a; |
| 712 | + } |
732 | 713 | |
733 | | -table_end = "|}" newline? { forbiddenThingRegexp.pop('(\n|^)\\||\\|[|\x7d+]'); } |
734 | 714 | |
| 715 | +table_end = newline? "|}" { clearFlag('table'); } |
735 | 716 | |
| 717 | + |
736 | 718 | /* Wikidom TODO: |
737 | 719 | * split off text into content nodes |
738 | 720 | * convert inlines into annotations |