Index: trunk/extensions/VisualEditor/tests/parser/parserTests.js |
— | — | @@ -109,7 +109,43 @@ |
110 | 110 | function nodeToHtml(node) { |
111 | 111 | return $('<div>').append(node).html(); |
112 | 112 | } |
| 113 | + /* Temporary debugging help. Is there anything similar in JS or a library? */ |
| 114 | + var print_r = function (arr, level) { |
113 | 115 | |
| 116 | + var dumped_text = ""; |
| 117 | + if (!level) level = 0; |
| 118 | + |
| 119 | + //The padding given at the beginning of the line. |
| 120 | + var level_padding = ""; |
| 121 | + var bracket_level_padding = ""; |
| 122 | + |
| 123 | + for (var j = 0; j < level + 1; j++) level_padding += " "; |
| 124 | + for (var b = 0; b < level; b++) bracket_level_padding += " "; |
| 125 | + |
| 126 | + if (typeof(arr) == 'object') { //Array/Hashes/Objects |
| 127 | + dumped_text += "Array\n"; |
| 128 | + dumped_text += bracket_level_padding + "(\n"; |
| 129 | + for (var item in arr) { |
| 130 | + |
| 131 | + var value = arr[item]; |
| 132 | + |
| 133 | + if (typeof(value) == 'object') { //If it is an array, |
| 134 | + dumped_text += level_padding + "[" + item + "] => "; |
| 135 | + dumped_text += print_r(value, level + 2); |
| 136 | + } else { |
| 137 | + dumped_text += level_padding + "[" + item + "] => '" + value + "'\n"; |
| 138 | + } |
| 139 | + |
| 140 | + } |
| 141 | + dumped_text += bracket_level_padding + ")\n\n"; |
| 142 | + } else { //Strings/Chars/Numbers etc. |
| 143 | + dumped_text = "=>" + arr + "<=(" + typeof(arr) + ")"; |
| 144 | + } |
| 145 | + |
| 146 | + return dumped_text; |
| 147 | + |
| 148 | + }; |
| 149 | + |
114 | 150 | function processTest(item) { |
115 | 151 | if (!('title' in item)) { |
116 | 152 | console.log(item); |
— | — | @@ -137,16 +173,16 @@ |
138 | 174 | 'references': MWReferencesTagHook |
139 | 175 | } |
140 | 176 | }); |
141 | | - var res = es.HtmlSerializer.stringify(tree,environment); |
142 | | - if (err) { |
143 | | - console.log('RENDER FAIL', err); |
144 | | - } else { |
145 | | - console.log('EXPECTED:'); |
146 | | - console.log(item.result + "\n"); |
| 177 | + //var res = es.HtmlSerializer.stringify(tree,environment); |
| 178 | + if (err) { |
| 179 | + console.log('RENDER FAIL', err); |
| 180 | + } else { |
| 181 | + console.log('EXPECTED:'); |
| 182 | + console.log(item.result + "\n"); |
147 | 183 | |
148 | | - console.log('RENDERED:'); |
149 | | - console.log(res + "\n"); |
150 | | - } |
| 184 | + console.log('RENDERED:'); |
| 185 | + console.log(print_r(tree)); |
| 186 | + } |
151 | 187 | } |
152 | 188 | }); |
153 | 189 | } |
Index: trunk/extensions/VisualEditor/modules/parser/lib.pegjs.js |
— | — | @@ -3823,6 +3823,7 @@ |
3824 | 3824 | } |
3825 | 3825 | |
3826 | 3826 | var source = this.emitter(ast); |
| 3827 | + //console.log(source); |
3827 | 3828 | var result = eval(source); |
3828 | 3829 | result._source = source; |
3829 | 3830 | |
Index: trunk/extensions/VisualEditor/modules/parser/pegParser.pegjs.txt |
— | — | @@ -1,11 +1,10 @@ |
2 | 2 | /* Produces output more or less compatible with FakeParser; plug it into FP's output and see */ |
3 | | - |
4 | 3 | { |
5 | 4 | var dp = function ( msg ) { |
6 | 5 | if ( false ) { |
7 | 6 | console.log(msg); |
8 | 7 | } |
9 | | - } |
| 8 | + }; |
10 | 9 | |
11 | 10 | /* |
12 | 11 | * Flags for specific parse environments (inside tables, links etc). Flags |
— | — | @@ -23,10 +22,10 @@ |
24 | 23 | syntaxFlags[flag] = 1; |
25 | 24 | } |
26 | 25 | return true; |
27 | | - } |
| 26 | + }; |
28 | 27 | var clearFlag = function(flag) { |
29 | 28 | syntaxFlags[flag]--; |
30 | | - } |
| 29 | + }; |
31 | 30 | |
32 | 31 | |
33 | 32 | |
— | — | @@ -65,7 +64,7 @@ |
66 | 65 | |
67 | 66 | return dumped_text; |
68 | 67 | |
69 | | - } |
| 68 | + }; |
70 | 69 | |
71 | 70 | // Convert list prefixes to a list of WikiDom list styles |
72 | 71 | var bulletsToTypes = function (bullets) { |
— | — | @@ -86,26 +85,35 @@ |
87 | 86 | return bTypes; |
88 | 87 | }; |
89 | 88 | |
90 | | - var extractInline = function ( node ) { |
91 | | - return { text: extractText(node) |
92 | | - } |
| 89 | + /*var extractInline = function ( node ) { |
| 90 | + return { text: extractText(node, 0) }; |
93 | 91 | }; |
94 | 92 | |
95 | 93 | |
96 | | - var extractText = function ( node ) { |
| 94 | + // return [text [annotations]] |
| 95 | + var extractText = function ( node, offset ) { |
97 | 96 | dp("extract: " + print_r(node)); |
98 | 97 | if (typeof node === 'string') { |
99 | | - return node; |
| 98 | + return [node, []]; |
100 | 99 | } else if ($.isArray(node)) { |
101 | | - var texts = []; |
| 100 | + var texts = [], |
| 101 | + annotations = []; |
102 | 102 | for (var i = 0, length = node.length; i < length; i++) { |
103 | | - texts.push(extractText(node[i])); |
| 103 | + var res = extractText(node[i], offset); |
| 104 | + texts.push(res[0]); |
| 105 | + annotations.concat(res[1]); |
| 106 | + offset += res[0].length; |
104 | 107 | } |
105 | | - return texts.join(''); |
| 108 | + return [texts.join(''), annotations]; |
106 | 109 | } else if ( 'text' in node ) { |
107 | | - return extractText(node.text); |
| 110 | + var res = extractText(node, offset); |
| 111 | + if ('annotations' in node) { |
| 112 | + return [res[0], node.annotations.concat(res[1])]; |
| 113 | + } else { |
| 114 | + return res; |
| 115 | + } |
108 | 116 | } else if ( 'content' in node ) { |
109 | | - return extractText(node.content); |
| 117 | + return extractText(node.content, offset); |
110 | 118 | } else if ( 'children' in node ) { |
111 | 119 | var texts = []; |
112 | 120 | for (var i = 0, length = node.children.length; i < length; i++) { |
— | — | @@ -113,27 +121,35 @@ |
114 | 122 | } |
115 | 123 | return texts.join(''); |
116 | 124 | } else { |
117 | | - console.log("extract failed!" + print_r(node)); |
118 | 125 | throw ("extract failed: " + print_r(node)); |
119 | 126 | } |
120 | 127 | }; |
| 128 | + */ |
| 129 | + |
| 130 | + // Start position of top-level block |
| 131 | + var blockStart = 0; |
| 132 | + |
| 133 | + var unquote = function (quotec, text) { |
| 134 | + return text.replace('\\' + quotec, quotec); |
| 135 | + }; |
| 136 | + |
| 137 | + var flatten = function ( e ) { |
| 138 | + var es = []; |
| 139 | + // flatten sub-arrays |
| 140 | + for(var i = 0, length = e.length; i < length; i++) { |
| 141 | + var ei = e[i]; |
| 142 | + if ($.isArray(ei)) |
| 143 | + es = es.concat(flatten(ei)); |
| 144 | + else |
| 145 | + es.push(ei); |
| 146 | + }; |
| 147 | + return es; |
| 148 | + }; |
121 | 149 | } |
122 | 150 | |
123 | 151 | start |
124 | | - = e:block* newline* { |
125 | | - var es = []; |
126 | | - // flatten sub-arrays, as a list block can contain multiple lists |
127 | | - for(var i = 0, length = e.length; i < length; i++) { |
128 | | - var ei = e[i]; |
129 | | - if ($.isArray(ei)) |
130 | | - es = es.concat(ei); |
131 | | - else |
132 | | - es.push(ei); |
133 | | - }; |
134 | | - return { |
135 | | - type: 'page', |
136 | | - children: es |
137 | | - } |
| 152 | + = e:toplevelblock* newline* { |
| 153 | + return flatten(e); |
138 | 154 | } |
139 | 155 | |
140 | 156 | anyblock = block / inline |
— | — | @@ -157,12 +173,26 @@ |
158 | 174 | newline |
159 | 175 | = '\n' / '\r\n' |
160 | 176 | |
| 177 | +toplevelblock |
| 178 | + = & { blockStart = pos; return true; } b:block { |
| 179 | + b = flatten(b); |
| 180 | + var bs = b[0]; |
| 181 | + dp('toplevelblock:' + print_r(b) + bs); |
| 182 | + if (bs.attribs === undefined) { |
| 183 | + bs.attribs = []; |
| 184 | + } |
| 185 | + bs.attribs.push(['startPos', blockStart]); |
| 186 | + bs.attribs.push(['endPos', pos]); |
| 187 | + return b; |
| 188 | + } |
| 189 | + |
161 | 190 | block |
162 | | - = (sol space* &newline)? block_lines |
| 191 | + = (sol space* &newline)? bl:block_lines { return bl; } |
163 | 192 | / para |
164 | 193 | / comment |
165 | | - / sol |
| 194 | + / (s:sol { return [{type: 'TEXT', value: s}]; }) |
166 | 195 | |
| 196 | +// Block structures with start-of-line wiki syntax |
167 | 197 | block_lines |
168 | 198 | = h |
169 | 199 | / table |
— | — | @@ -176,14 +206,11 @@ |
177 | 207 | h1 = sol '=' |
178 | 208 | ( |
179 | 209 | & { setFlag('h'); return setFlag('h1') } |
180 | | - c:inlineline '=' &newline { |
| 210 | + c:inlineline '=' comment? &newline { |
181 | 211 | clearFlag('h'); |
182 | 212 | clearFlag('h1'); |
183 | | - return { |
184 | | - type: 'heading', |
185 | | - attributes: {level: 1}, |
186 | | - content: extractInline(c) |
187 | | - } |
| 213 | + return [{type: 'TAG', name: 'h1'}] |
| 214 | + .concat(c, [{type: 'ENDTAG', name: 'h1'}]); |
188 | 215 | } |
189 | 216 | / { clearFlag('h'); clearFlag('h1'); return null } |
190 | 217 | ) |
— | — | @@ -191,14 +218,11 @@ |
192 | 219 | h2 = sol '==' |
193 | 220 | ( |
194 | 221 | & { setFlag('h'); return setFlag('h2') } |
195 | | - c:inlineline '==' &newline { |
| 222 | + c:inlineline '==' comment? &newline { |
196 | 223 | clearFlag('h'); |
197 | 224 | clearFlag('h2'); |
198 | | - return { |
199 | | - type: 'heading', |
200 | | - attributes: {level: 2}, |
201 | | - content: extractInline(c) |
202 | | - } |
| 225 | + return [{type: 'TAG', name: 'h2'}] |
| 226 | + .concat(c, [{type: 'ENDTAG', name: 'h2'}]); |
203 | 227 | } |
204 | 228 | / { clearFlag('h'); clearFlag('h2'); return null } |
205 | 229 | ) |
— | — | @@ -206,57 +230,45 @@ |
207 | 231 | h3 = sol '===' |
208 | 232 | ( |
209 | 233 | & { setFlag('h'); return setFlag('h3') } |
210 | | - c:inlineline '===' &newline { |
| 234 | + c:inlineline '===' comment? &newline { |
211 | 235 | clearFlag('h'); |
212 | 236 | clearFlag('h3'); |
213 | | - return { |
214 | | - type: 'heading', |
215 | | - attributes: {level: 3}, |
216 | | - content: extractInline(c) |
217 | | - } |
218 | | - } |
| 237 | + return [{type: 'TAG', name: 'h3'}] |
| 238 | + .concat(c, [{type: 'ENDTAG', name: 'h3'}]); |
| 239 | + } |
219 | 240 | / { clearFlag('h'); clearFlag('h3'); return null } |
220 | 241 | ) |
221 | 242 | |
222 | 243 | h4 = sol '====' |
223 | 244 | ( |
224 | 245 | & { setFlag('h'); return setFlag('h4') } |
225 | | - c:inlineline '====' &newline { |
| 246 | + c:inlineline '====' comment? &newline { |
226 | 247 | clearFlag('h'); |
227 | 248 | clearFlag('h4'); |
228 | | - return { |
229 | | - type: 'heading', |
230 | | - attributes: {level: 4}, |
231 | | - content: extractInline(c) |
232 | | - } |
233 | | - } |
| 249 | + return [{type: 'TAG', name: 'h4'}] |
| 250 | + .concat(c, [{type: 'ENDTAG', name: 'h4'}]); |
| 251 | + } |
234 | 252 | / { clearFlag('h'); clearFlag('h4'); return null } |
235 | 253 | ) |
236 | 254 | |
237 | 255 | h5 = sol '=====' |
238 | 256 | (& { setFlag('h'); return setFlag('h5') } |
239 | | - c:inlineline '=====' &newline { |
| 257 | + c:inlineline '=====' comment? &newline { |
240 | 258 | clearFlag('h'); |
241 | 259 | clearFlag('h5'); |
242 | | - return { |
243 | | - type: 'heading', |
244 | | - attributes: {level: 5}, |
245 | | - content: extractInline(c) |
246 | | - } |
| 260 | + return [{type: 'TAG', name: 'h5'}] |
| 261 | + .concat(c, [{type: 'ENDTAG', name: 'h5'}]); |
247 | 262 | } |
248 | 263 | / { clearFlag('h'); clearFlag('h5'); return null } |
249 | 264 | ) |
250 | 265 | |
251 | 266 | h6 = sol '======' |
252 | 267 | (& { setFlag('h'); return setFlag('h6') } |
253 | | - c:inlineline '======' &newline { |
| 268 | + c:inlineline '======' comment? &newline { |
254 | 269 | clearFlag('h'); |
255 | 270 | clearFlag('h6'); |
256 | | - return { |
257 | | - type: 'heading', |
258 | | - attributes: {level: 6}, |
259 | | - content: extractInline(c) |
260 | | - } |
| 271 | + return [{type: 'TAG', name: 'h6'}] |
| 272 | + .concat(c, [{type: 'ENDTAG', name: 'h6'}]); |
261 | 273 | } |
262 | 274 | / { clearFlag('h'); clearFlag('h6'); return null } |
263 | 275 | ) |
— | — | @@ -270,24 +282,25 @@ |
271 | 283 | |
272 | 284 | // TODO: convert inline content to annotations! |
273 | 285 | para |
274 | | - = (sol br)? para_lines |
| 286 | + = (sol br)? pl:para_lines { return pl; } |
275 | 287 | |
276 | 288 | para_lines |
277 | 289 | = s:sol c:inlineline cs:(!block_lines para_lines)* { |
278 | | - return { |
279 | | - type: 'paragraph', |
280 | | - content: extractInline([s].concat([c]).concat(cs)) |
281 | | - } |
| 290 | + var res = [{type: 'TAG', name: 'p'}]; |
| 291 | + if (s !== '') { |
| 292 | + res.push(s) |
| 293 | + } |
| 294 | + //console.log('paralines' + print_r(res.concat(c, cs, [{type: 'ENDTAG', name: 'p'}]))); |
| 295 | + return res.concat(c, cs, [{type: 'ENDTAG', name: 'p'}]); |
282 | 296 | } |
283 | 297 | |
284 | | -br = space* &newline { return {type: 'br'} } |
| 298 | +br = space* &newline { return {type: 'SELFCLOSINGTAG', name: 'br'} } |
285 | 299 | |
286 | 300 | pre_indent |
287 | 301 | = l:pre_indent_line+ { |
288 | | - return { |
289 | | - type: 'pre', |
290 | | - content: extractInline(l) |
291 | | - } |
| 302 | + return [{type: 'TAG', name: 'pre'}] |
| 303 | + .concat( l |
| 304 | + , [{type: 'ENDTAG', name: 'pre'}]); |
292 | 305 | } |
293 | 306 | pre_indent_line = sol space l:inlineline { return l } |
294 | 307 | |
— | — | @@ -319,20 +332,14 @@ |
320 | 333 | text += c[i]; |
321 | 334 | } else { |
322 | 335 | if (text.length) { |
323 | | - out.push({ |
324 | | - type: 'text', |
325 | | - text: text |
326 | | - }); |
| 336 | + out.push({ type: "TEXT", value: text }); |
327 | 337 | text = ''; |
328 | 338 | } |
329 | | - out.push(c[i]); |
| 339 | + out.concat(c[i]); |
330 | 340 | } |
331 | 341 | } |
332 | 342 | if (text.length) { |
333 | | - out.push({ |
334 | | - type: 'text', |
335 | | - text: text |
336 | | - }); |
| 343 | + out.push({ type: 'TEXT', value: text }); |
337 | 344 | } |
338 | 345 | return out; |
339 | 346 | } |
— | — | @@ -347,21 +354,16 @@ |
348 | 355 | text += c[i]; |
349 | 356 | } else { |
350 | 357 | if (text.length) { |
351 | | - out.push({ |
352 | | - type: 'text', |
353 | | - text: text |
354 | | - }); |
| 358 | + out.push({type: 'TEXT', value: text}); |
355 | 359 | text = ''; |
356 | 360 | } |
357 | 361 | out.push(c[i]); |
358 | 362 | } |
359 | 363 | } |
360 | 364 | if (text.length) { |
361 | | - out.push({ |
362 | | - text: text, |
363 | | - //annotations: [] |
364 | | - }); |
| 365 | + out.push({type: 'TEXT', value: text}); |
365 | 366 | } |
| 367 | + //dp('inlineline out:', print_r(out)); |
366 | 368 | return out; |
367 | 369 | } |
368 | 370 | |
— | — | @@ -380,10 +382,7 @@ |
381 | 383 | comment |
382 | 384 | = '<!--' c:comment_chars* '-->' |
383 | 385 | (space* newline space* comment)* { |
384 | | - return { |
385 | | - type: 'comment', |
386 | | - text: c.join('') |
387 | | - } |
| 386 | + return { type: 'COMMENT', value: c.join('') }; |
388 | 387 | } |
389 | 388 | |
390 | 389 | comment_chars |
— | — | @@ -392,11 +391,11 @@ |
393 | 392 | |
394 | 393 | extlink |
395 | 394 | = "[" target:url " " text:extlink_text "]" { |
396 | | - return { |
397 | | - type: 'extlink', |
398 | | - target: target, |
399 | | - text: text |
400 | | - } |
| 395 | + return [ { type: 'TAG', |
| 396 | + name: 'a', |
| 397 | + attribs: [['href', target]] } |
| 398 | + , {type: 'TEXT', value: text} |
| 399 | + , {type: 'ENDTAG', name: 'a'}]; |
401 | 400 | } |
402 | 401 | |
403 | 402 | // = "[" target:url text:extlink_text "]" { return { type: 'extlink', target: target, text: text } } |
— | — | @@ -409,36 +408,29 @@ |
410 | 409 | |
411 | 410 | template |
412 | 411 | = "{{" target:link_target params:("|" p:template_param { return p })* "}}" { |
413 | | - var obj = { |
414 | | - type: 'template', |
415 | | - target: target |
416 | | - }; |
| 412 | + var obj = { type: 'SELFCLOSINGTAG', name: 'template', attribs: [['target', target]] } |
417 | 413 | if (params && params.length) { |
418 | | - obj.params = params; |
| 414 | + obj.attribs.push(params); |
419 | 415 | } |
420 | 416 | return obj; |
421 | 417 | } |
422 | 418 | |
423 | 419 | template_param |
424 | 420 | = name:template_param_name "=" c:template_param_text { |
425 | | - return { |
426 | | - name: name, |
427 | | - content: c |
428 | | - }; |
| 421 | + return [name, c]; |
429 | 422 | } / c:template_param_text { |
430 | | - return { |
431 | | - content: c |
432 | | - }; |
| 423 | + return [null, c]; |
433 | 424 | } |
434 | 425 | |
435 | 426 | tplarg |
436 | 427 | = "{{{" name:link_target params:("|" p:template_param { return p })* "}}}" { |
437 | | - var obj = { |
438 | | - type: 'tplarg', |
439 | | - name: name |
| 428 | + var obj = { |
| 429 | + type: 'SELFCLOSINGTAG', |
| 430 | + name: 'templatearg', |
| 431 | + attribs: [['argname', name]] |
440 | 432 | }; |
441 | 433 | if (params && params.length) { |
442 | | - obj.params = params; |
| 434 | + obj.attribs.push(params); |
443 | 435 | } |
444 | 436 | return obj; |
445 | 437 | } |
— | — | @@ -463,13 +455,14 @@ |
464 | 456 | link |
465 | 457 | = "[[" target:link_target text:("|" link_text)* "]]" { |
466 | 458 | var obj = { |
467 | | - type: 'link', |
468 | | - target: target |
| 459 | + type: 'TAG', |
| 460 | + name: 'a', |
| 461 | + attribs: [['data-type', 'internal']] |
469 | 462 | }; |
470 | 463 | if (text && text.length) { |
471 | | - obj.text = text[0][1]; // ehhhh |
| 464 | + obj.attribs.push(['href', text[0][1]]); // ehhhh |
472 | 465 | } |
473 | | - return obj; |
| 466 | + return [obj, {type: 'ENDTAG', name: 'a'}]; |
474 | 467 | } |
475 | 468 | |
476 | 469 | link_target |
— | — | @@ -492,10 +485,8 @@ |
493 | 486 | c:inlineline |
494 | 487 | bold_marker { |
495 | 488 | clearFlag('bold'); |
496 | | - return { |
497 | | - type: 'b', |
498 | | - content: {text: c} |
499 | | - } |
| 489 | + return [{ type: 'TAG', name: 'b' }] |
| 490 | + .concat(c, [{type: 'ENDTAG', name: 'b'}]); |
500 | 491 | } |
501 | 492 | / bold_marker { clearFlag('bold'); return null } |
502 | 493 | |
— | — | @@ -510,11 +501,9 @@ |
511 | 502 | italic_marker { |
512 | 503 | clearFlag('italic'); |
513 | 504 | dp('ileave:' + pos); |
514 | | - return { |
515 | | - type: 'i', |
516 | | - content: {text: c} |
517 | | - } |
518 | | - } |
| 505 | + return [{ type: 'TAG', name: 'i' }] |
| 506 | + .concat(c, [{ type: 'ENDTAG', name: 'i'}]); |
| 507 | + } |
519 | 508 | / italic_marker { clearFlag('italic'); return null } |
520 | 509 | |
521 | 510 | italic_marker |
— | — | @@ -530,25 +519,24 @@ |
531 | 520 | /* Can we do backreferences to genericize this? */ |
532 | 521 | ref_full |
533 | 522 | = start:ref_start ">" content:ref_content* close:ref_end { |
534 | | - return { |
535 | | - type: 'ext', |
536 | | - name: 'ref', |
537 | | - params: start.params, |
538 | | - ws: start.ws, |
539 | | - content: content, |
540 | | - close: close |
541 | | - } |
| 523 | + return [ |
| 524 | + { type: 'TAG', |
| 525 | + name: 'ext', |
| 526 | + attribs: [['data-extname', 'ref']] |
| 527 | + .concat(start.params, [['data-startws', start.ws]])}, |
| 528 | + content, |
| 529 | + {type: 'ENDTAG', name: 'ref'} |
| 530 | + ]; |
542 | 531 | } |
543 | 532 | |
544 | 533 | ref_empty |
545 | 534 | = start:ref_start close:(space* "/>") { |
546 | | - return { |
547 | | - type: 'ext', |
548 | | - name: 'ref', |
549 | | - ws: start.ws, |
550 | | - params: start.params, |
551 | | - close: close |
552 | | - } |
| 535 | + return [{ type: 'SELFCLOSINGTAG', |
| 536 | + name: 'ext', |
| 537 | + attribs: [['data-extname', 'ref']] |
| 538 | + .concat(start.params |
| 539 | + ,[['data-startws', start.ws]]) |
| 540 | + }]; |
553 | 541 | } |
554 | 542 | |
555 | 543 | ref_start |
— | — | @@ -565,7 +553,7 @@ |
566 | 554 | } |
567 | 555 | |
568 | 556 | ref_content |
569 | | - = !ref_end a:(inline) { |
| 557 | + = !ref_end a:inline { // XXX: ineffective syntactic stop |
570 | 558 | return a; |
571 | 559 | } |
572 | 560 | |
— | — | @@ -574,25 +562,27 @@ |
575 | 563 | |
576 | 564 | references_full |
577 | 565 | = start:references_start ">" content:references_content* close:references_end { |
578 | | - return { |
579 | | - type: 'ext', |
580 | | - name: 'references', |
581 | | - params: start.params, |
582 | | - ws: start.ws, |
583 | | - content: content, |
584 | | - close: close |
585 | | - } |
| 566 | + return [ |
| 567 | + { type: 'TAG', |
| 568 | + name: 'ext', |
| 569 | + attribs: [['data-extname', 'references']] |
| 570 | + .concat(start.params |
| 571 | + ,[['data-startws', start.ws]]) |
| 572 | + }, |
| 573 | + content, |
| 574 | + { type: 'ENDTAG', name: 'ext' } |
| 575 | + ]; |
586 | 576 | } |
587 | 577 | |
588 | 578 | references_empty |
589 | 579 | = start:references_start close:(space* "/>") { |
590 | | - return { |
591 | | - type: 'ext', |
592 | | - name: 'references', |
593 | | - ws: start.ws, |
594 | | - params: start.params, |
595 | | - close: close |
596 | | - } |
| 580 | + return |
| 581 | + [{ type: 'SELFCLOSINGTAG', |
| 582 | + name: 'ext', |
| 583 | + attribs: [['data-extname', 'references']] |
| 584 | + .concat(start.params |
| 585 | + ,[['data-startws', start.ws]]) |
| 586 | + }]; |
597 | 587 | } |
598 | 588 | |
599 | 589 | references_start |
— | — | @@ -609,14 +599,14 @@ |
610 | 600 | } |
611 | 601 | |
612 | 602 | references_content |
613 | | - = !references_end a:(inline) { |
| 603 | + = !references_end a:inline { |
614 | 604 | return a; |
615 | 605 | } |
616 | 606 | |
617 | 607 | |
618 | 608 | ext_param |
619 | 609 | = space* name:ext_param_name "=" val:ext_param_val { |
620 | | - val.name = name; |
| 610 | + val[0] = name; |
621 | 611 | return val; |
622 | 612 | } |
623 | 613 | |
— | — | @@ -626,9 +616,9 @@ |
627 | 617 | } |
628 | 618 | |
629 | 619 | ext_param_val |
630 | | - = t:[0-9A-Za-z]+ { return {text: t.join('') } } |
631 | | - / "'" t:[^'>]+ "'" { return { quote: "'", text: t.join('') } } |
632 | | - / '"' t:[^">]+ '"' { return { quote: '"', text: t.join('') } } |
| 620 | + = t:[0-9A-Za-z]+ { return [null, t.join('')]; } |
| 621 | + / "'" t:[^'>]+ "'" { return [null, unquote("'", t.join(''))]; } |
| 622 | + / '"' t:[^">]+ '"' { return [null, unquote('"', t.join(''))]; } |
633 | 623 | |
634 | 624 | lists = es:(dtdd / li)+ |
635 | 625 | { |
— | — | @@ -643,10 +633,10 @@ |
644 | 634 | flatEs.push(ei); |
645 | 635 | } |
646 | 636 | } |
647 | | - return { |
648 | | - type: 'list', |
649 | | - children: flatEs |
650 | | - } |
| 637 | + return [ { type: 'TAG', |
| 638 | + name: 'ul'} ] // XXX!! |
| 639 | + .concat(flatEs |
| 640 | + ,[{ type: 'ENDTAG', name: 'ul' }]); |
651 | 641 | } |
652 | 642 | |
653 | 643 | li = sol |
— | — | @@ -654,38 +644,33 @@ |
655 | 645 | c:inlineline |
656 | 646 | &newline |
657 | 647 | { |
658 | | - return { |
659 | | - type: 'listItem', |
660 | | - attributes: { |
661 | | - styles: bulletsToTypes(bullets) |
662 | | - }, |
663 | | - content: extractInline(c) |
664 | | - }; |
| 648 | + return [ { type: 'TAG', |
| 649 | + name: 'li', |
| 650 | + attribs: [['data-styles', bullets]] } |
| 651 | + , c |
| 652 | + , { type: 'ENDTAG', name: 'li' } |
| 653 | + ]; |
665 | 654 | } |
666 | 655 | |
667 | 656 | dtdd = sol |
668 | 657 | bullets:list_char+ |
669 | | - c:(inline_element / [^:\n])+ |
| 658 | + c:(inline_element / (n:[^:\n] { return {type: 'TEXT', value: n}; }))+ |
670 | 659 | ":" |
671 | | - d:(inline_element / [^\n])+ |
| 660 | + d:(inline_element / (n:[^\n] { return {type: 'TEXT', value: n}; }))+ |
672 | 661 | &newline |
673 | 662 | { |
674 | 663 | // reject rule if bullets do not end in semicolon |
675 | 664 | if (bullets[bullets.length - 1] != ';') { |
676 | 665 | return null; |
677 | 666 | } else { |
678 | | - return [ |
679 | | - { |
680 | | - type: 'listItem', |
681 | | - attributes: {styles: bulletsToTypes(bullets)}, |
682 | | - content: extractInline(c) |
683 | | - }, { |
684 | | - type: 'listItem', |
685 | | - attributes: {styles: bulletsToTypes( |
686 | | - bullets.slice(0, bullets.length - 1) + ':')}, |
687 | | - content: extractInline(d) |
688 | | - } |
689 | | - ] |
| 667 | + return [ { type: 'TAG', name: 'dl', attribs: [['data-styles', bullets]] } |
| 668 | + , { type: 'TAG', name: 'dt' } ] |
| 669 | + .concat( c |
| 670 | + , [ {type: 'ENDTAG', name: 'dt'} |
| 671 | + , {type: 'TAG', name: 'dd'} ] |
| 672 | + , d |
| 673 | + , [ {type: 'ENDTAG', name: 'dd'} |
| 674 | + , {type: 'ENDTAG', name: 'dl'} ]); |
690 | 675 | } |
691 | 676 | } |
692 | 677 | |
— | — | @@ -697,19 +682,23 @@ |
698 | 683 | |
699 | 684 | table |
700 | 685 | = tas:table_start c:table_caption? b:table_body? table_end { |
701 | | - var res = {type: 'table'} |
| 686 | + var res = {type: 'TAG', name: 'table'} |
702 | 687 | var body = b !== '' ? b : []; |
703 | | - if (c !== '') { |
704 | | - res.children = [c].concat(body); |
705 | | - } else { |
706 | | - res.children = body; |
707 | | - } |
708 | 688 | if (tas.length > 0) { |
709 | 689 | // FIXME: actually parse and build structure |
710 | | - res.attributes = { unparsed: tas } |
| 690 | + res.attribs = [['data-unparsed', tas.join('')]]; |
711 | 691 | } |
| 692 | + |
| 693 | + if (c !== '') { |
| 694 | + var caption = [{type: 'TAG', name: 'caption'}] |
| 695 | + .concat(c, [{type: 'ENDTAG', name: 'caption'}]); |
| 696 | + } else { |
| 697 | + var caption = []; |
712 | 698 | //dp(print_r(res)); |
713 | | - return res; |
| 699 | + |
| 700 | + return [res].concat(caption, body, |
| 701 | + [{type: 'ENDTAG', name: 'table'}]); |
| 702 | + } |
714 | 703 | } |
715 | 704 | |
716 | 705 | table_start |
— | — | @@ -729,10 +718,7 @@ |
730 | 719 | table_caption |
731 | 720 | = newline |
732 | 721 | "|+" c:inline* { |
733 | | - return { |
734 | | - type: 'tableCaption', |
735 | | - content: c[0] |
736 | | - } |
| 722 | + return c; |
737 | 723 | } |
738 | 724 | |
739 | 725 | table_body |
— | — | @@ -749,20 +735,16 @@ |
750 | 736 | |
751 | 737 | table_firstrow |
752 | 738 | = td:table_data+ { |
753 | | - return { |
754 | | - type: 'tableRow', |
755 | | - children: td |
756 | | - }; |
| 739 | + return [{ type: 'TAG', name: 'tr' }] |
| 740 | + .concat(td, [{type: 'ENDTAG', name: 'tr'}]); |
757 | 741 | } |
758 | 742 | |
759 | 743 | table_row |
760 | 744 | = & { dp("table row enter"); return true; } |
761 | 745 | newline |
762 | 746 | "|-" thtd_attribs? space* td:(table_data / table_header)* { |
763 | | - return { |
764 | | - type: 'tableRow', |
765 | | - children: td |
766 | | - }; |
| 747 | + return [{type: 'TAG', name: 'tr'}] |
| 748 | + .concat(td, [{type: 'ENDTAG', name: 'tr'}]); |
767 | 749 | } |
768 | 750 | |
769 | 751 | table_data |
— | — | @@ -772,22 +754,16 @@ |
773 | 755 | a:thtd_attribs? |
774 | 756 | td:(!inline_breaks anyblock)* { |
775 | 757 | dp("table data result: " + print_r(td) + ", attribts: " + print_r(a)); |
776 | | - return { |
777 | | - type: 'tableCell', |
778 | | - attributes: { unparsed: a }, |
779 | | - children: td |
780 | | - }; |
| 758 | + return [{ type: 'TAG', name: 'td', attribs: [['data-unparsed', a]]}] |
| 759 | + .concat(td, [{type: 'ENDTAG', name: 'td'}]); |
781 | 760 | } |
782 | 761 | |
783 | 762 | table_header |
784 | 763 | = ("!!" / newline "!") |
785 | 764 | a:thtd_attribs? |
786 | 765 | c:inline { |
787 | | - return { |
788 | | - type: 'tableHeading', |
789 | | - attributes: { unparsed: a }, |
790 | | - children: c |
791 | | - } |
| 766 | + return [{type: 'TAG', name: 'th', attribs: [['data-unparsed', a]]}] |
| 767 | + .concat(c, [{type: 'ENDTAG', name: 'th'}]); |
792 | 768 | } |
793 | 769 | |
794 | 770 | thtd_attribs |
— | — | @@ -804,7 +780,34 @@ |
805 | 781 | * split off text into content nodes |
806 | 782 | * convert inlines into annotations |
807 | 783 | * change contents into children |
| 784 | + * |
| 785 | + * { text: text, |
| 786 | + * annotations: [(normal annotations)], |
| 787 | + * maybeannotations: [ |
| 788 | + * { type: 'something', |
| 789 | + * side: MA_START, |
| 790 | + * tag: { start: x, length: y } |
| 791 | + * } |
| 792 | + * ] |
| 793 | + * } |
| 794 | + * offsets in annotations: presume maybeannotations are actually text |
| 795 | + * -> need to transform annotations if match found |
| 796 | + * -> format annotations, comments can run to the end (re-opened after |
| 797 | + * block-level tags); only closed on table cells, object,? |
| 798 | + * -> other annotations (images, templates etc) are limited by block-level |
| 799 | + * elements, tightly bound |
| 800 | + * |
| 801 | + * Block-level elements |
| 802 | + * -------------------- |
| 803 | + * - Need some early clean-up to provide structure and offsets |
| 804 | + * - Establish scope limits for some inlines |
| 805 | + * - Line-based balanced by construction |
| 806 | + * - HTML tags need balancing/ matching / implicit close |
| 807 | + * - content in illegal places (e.g. between table and td tags) needs foster |
| 808 | + * parenting |
| 809 | + * - grammar will match outermost pair if unmatched pairs are recognized as |
| 810 | + * tokens (or as text) |
| 811 | + * - post-processing needed, but has to be limited by scope |
808 | 812 | */ |
809 | | - |
810 | 813 | /* Tabs do not mix well with the hybrid production syntax */ |
811 | 814 | /* vim: et:ts=4:sw=4:cindent */ |