r100271 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r100270‎ | r100271 | r100272 >
Date:20:19, 19 October 2011
Author:gwicke
Status:deferred
Tags:parserplayground 
Comment:
List handling in parser

List handling is pushed directly into the rule actions for now. Probably not
the most elegant way to do this, will revisit this later.
Modified paths:
  • /trunk/extensions/ParserPlayground/modules/ext.parserPlayground.renderer.js (modified) (history)
  • /trunk/extensions/ParserPlayground/modules/ext.parserPlayground.serializer.js (modified) (history)
  • /trunk/extensions/ParserPlayground/modules/pegParser.pegjs.txt (modified) (history)
  • /trunk/extensions/ParserPlayground/tests/parserTests.js (modified) (history)

Diff [purge]

Index: trunk/extensions/ParserPlayground/tests/parserTests.js
@@ -128,6 +128,8 @@
129129 if (err) {
130130 console.log('RENDER FAIL', err);
131131 } else {
 132+ console.log('INPUT:');
 133+ console.log(item.input + "\n");
132134 console.log('EXPECTED:');
133135 console.log(item.result + "\n");
134136
Index: trunk/extensions/ParserPlayground/modules/pegParser.pegjs.txt
@@ -1,7 +1,61 @@
22 /* Produces output more or less compatible with FakeParser; plug it into FP's output and see */
33
 4+{
 5+
 6+ /* Temporary debugging help */
 7+ var print_r = function (arr, level) {
 8+
 9+ var dumped_text = "";
 10+ if (!level) level = 0;
 11+
 12+ //The padding given at the beginning of the line.
 13+ var level_padding = "";
 14+ var bracket_level_padding = "";
 15+
 16+ for (var j = 0; j < level + 1; j++) level_padding += " ";
 17+ for (var b = 0; b < level; b++) bracket_level_padding += " ";
 18+
 19+ if (typeof(arr) == 'object') { //Array/Hashes/Objects
 20+ dumped_text += "Array\n";
 21+ dumped_text += bracket_level_padding + "(\n";
 22+ for (var item in arr) {
 23+
 24+ var value = arr[item];
 25+
 26+ if (typeof(value) == 'object') { //If it is an array,
 27+ dumped_text += level_padding + "[" + item + "] => ";
 28+ dumped_text += print_r(value, level + 2);
 29+ } else {
 30+ dumped_text += level_padding + "[" + item + "] => " + value + "\n";
 31+ }
 32+
 33+ }
 34+ dumped_text += bracket_level_padding + ")\n\n";
 35+ } else { //Stings/Chars/Numbers etc.
 36+ dumped_text = "===>" + arr + "<===(" + typeof(arr) + ")";
 37+ }
 38+
 39+ return dumped_text;
 40+
 41+ }
 42+}
 43+
444 start
5 - = e:block* { return {type: 'page', content: e } }
 45+ = e:block* {
 46+ var es = [];
 47+ // flatten sub-arrays, as a list block can contain multiple lists
 48+ $.each(e, function(i, ei) {
 49+ if (ei.constructor == Array)
 50+ es = es.concat(ei);
 51+ else
 52+ es.push(ei);
 53+ });
 54+ //console.log(print_r(es, 10));
 55+ return {
 56+ type: 'page',
 57+ content: es
 58+ }
 59+ }
660
761 anything
862 = a:[A-Za-z0-9,._ -]+ { return a.join('') } / [^\n]
@@ -15,7 +69,7 @@
1670 block
1771 = br
1872 / h
19 - / li
 73+ / lists
2074 / para
2175
2276 h = h1 / h2 / h3 / h4 / h5 / h6
@@ -82,7 +136,7 @@
83137 c:anything
84138
85139 inline
86 - = c:inline_element+ {
 140+ = c:(inline_element / anything)+ {
87141 var out = [];
88142 var text = '';
89143 for (var i = 0; i < c.length; i++) {
@@ -116,7 +170,6 @@
117171 / link
118172 / bold
119173 / italic
120 - / anything
121174
122175 comment
123176 = '<!--' c:comment_chars+ '-->' {
@@ -293,7 +346,7 @@
294347 }
295348
296349 ref_content
297 - = !ref_end a:inline_element {
 350+ = !ref_end a:(inline_element / anything) {
298351 return a;
299352 }
300353
@@ -337,7 +390,7 @@
338391 }
339392
340393 references_content
341 - = !references_end a:inline_element {
 394+ = !references_end a:(inline_element / anything) {
342395 return a;
343396 }
344397
@@ -358,20 +411,120 @@
359412 / "'" t:[^'>]+ "'" { return { quote: "'", text: t.join('') } }
360413 / '"' t:[^">]+ '"' { return { quote: '"', text: t.join('') } }
361414
 415+lists = es:(dtdd / li)+
 416+{
 417+ var out = [], // List of list nodes
 418+ bstack = "", // Bullet stack, previous element's listStyle
 419+ bnext = "", // Next element's listStyle
 420+ nodes = []; // Stack of currently active, nested list nodes
 421+
 422+ var commonPrefixLength = function (x, y) {
 423+ var minLength = Math.min(x.length, y.length);
 424+ for(var i = 0; i < minLength; i++) {
 425+ if (x[i] != y[i])
 426+ break;
 427+ }
 428+ return i;
 429+ }
 430+
 431+ var pushN = function ( n ) {
 432+ if (nodes.length > 0) {
 433+ nodes[nodes.length - 1].content.push(n);
 434+ } else {
 435+ out.push(n);
 436+ nodes.push(n);
 437+ }
362438
 439+ }
 440+
 441+ var openLists = function ( bs, bn ) {
 442+ var prefix = commonPrefixLength (bs, bn);
 443+ nodes = nodes.slice(0, prefix);
 444+ $.each(bn.slice(prefix, bn.length), function (i, c) {
 445+ switch (c) {
 446+ case '*':
 447+ pushN({type: 'ul', content: []});
 448+ break;
 449+ case '#':
 450+ pushN({type: 'ol', content: []});
 451+ break;
 452+ case ';':
 453+ case ':':
 454+ pushN({type: 'dl', content: []});
 455+ break;
 456+ default:
 457+ throw("Unknown node prefix " + c);
 458+ }
 459+ });
 460+ }
 461+
 462+
 463+ $.each(es, function(i, e) {
 464+ if (e.type == 'dtdd') {
 465+ bnext = e.content[0].listStyle;
 466+ lnode = openLists( bstack, bnext );
 467+
 468+ nodes[nodes.length - 1].content =
 469+ nodes[nodes.length - 1].content.concat(e.content);
 470+ } else {
 471+ bnext = e.listStyle;
 472+ openLists( bstack, bnext, nodes );
 473+ nodes[nodes.length - 1].content.push(e);
 474+ }
 475+ bstack = bnext;
 476+ });
 477+ //console.log("out: " + print_r(out, 5));
 478+ return out;
 479+
 480+
 481+
 482+}
 483+
363484 li = bullets:list_char+
364485 c:(inline / anything)
365486 newline
366487 {
 488+ var type;
 489+ switch (bullets[bullets.length - 1]) {
 490+ case '#':
 491+ case '*':
 492+ type = 'li'; break;
 493+ case ';': type = 'dt'; break;
 494+ case ':': type = 'dd'; break;
 495+ }
367496 return {
368 - type: 'li',
 497+ type: type,
369498 listStyle: bullets,
370499 content: c
371500 };
372501 }
373502
374 -list_char =
375 - '*' /
376 - '#' /
377 - ':' /
378 - ';'
 503+dtdd = bullets:list_char+
 504+ c:(inline_element / [^:\n])+
 505+ ":"
 506+ d:(inline / anything)
 507+ newline
 508+{
 509+ // reject rule if bullets do not end in colon
 510+ if (bullets[bullets.length - 1] != ';')
 511+ return null;
 512+ else
 513+ return { type: 'dtdd',
 514+ content: [
 515+ {
 516+ type: 'dt',
 517+ listStyle: bullets,
 518+ content: c
 519+ },
 520+ {
 521+ type: 'dd',
 522+ listStyle: bullets.slice(0, bullets.length - 1) + ':',
 523+ content: d
 524+ }
 525+
 526+ ]
 527+ };
 528+}
 529+
 530+
 531+list_char = [*#:;]
Index: trunk/extensions/ParserPlayground/modules/ext.parserPlayground.renderer.js
@@ -123,6 +123,9 @@
124124 case 'span':
125125 case 'ol':
126126 case 'ul':
 127+ case 'dl':
 128+ case 'dt':
 129+ case 'dd':
127130 case 'li':
128131 var $span = $('<' + tree.type + '>');
129132 if ('attrs' in tree) {
Index: trunk/extensions/ParserPlayground/modules/ext.parserPlayground.serializer.js
@@ -119,7 +119,14 @@
120120 // @fixme validate that text doesn't contain '-->'
121121 src = '<!--' + tree.text + '-->';
122122 break;
 123+ case 'ul':
 124+ case 'ol':
 125+ case 'dl':
 126+ src = subParseArray(tree.content);
 127+ break;
123128 case 'li':
 129+ case 'dt':
 130+ case 'dd':
124131 src = tree.listStyle.join('');
125132 src += subParseArray(tree.content) + '\n';
126133 break;

Status & tagging log