r104235 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r104234‎ | r104235 | r104236 >
Date:12:47, 25 November 2011
Author:gwicke
Status:deferred
Tags:
Comment:
Better HTML, pre and blocklevel handling. Hackish source formatting for easier
comparison with parserTest results.
Modified paths:
  • /trunk/extensions/VisualEditor/modules/parser/pegParser.pegjs.txt (modified) (history)
  • /trunk/extensions/VisualEditor/tests/parser/parserTests.js (modified) (history)

Diff [purge]

Index: trunk/extensions/VisualEditor/tests/parser/parserTests.js
@@ -133,9 +133,10 @@
134134 console.log(item);
135135 throw new Error('Missing input from test case ' + item.title);
136136 }
 137+ console.log('=====================================================');
137138 console.log(item.title);
138 - console.log("INPUT:");
139 - console.log(item.input + "\n");
 139+ console.log("INPUT:");
 140+ console.log(item.input + "\n");
140141
141142
142143 parser.parseToTree(item.input + "\n", function(tree, err) {
@@ -161,7 +162,10 @@
162163 var out = tokenizer.parser.document
163164 .getElementsByTagName('body')[0]
164165 .innerHTML
165 - .replace(/<li>/g, '\n<li>');
 166+ // Hack: add some line breaks for
 167+ // block-levels
 168+ .replace(/(.)<((dd|dt|li|p|table|dl|ol|ul)[^>]*)>/g,
 169+ '$1\n<$2>');
166170 console.log(out);
167171 }
168172 }
Index: trunk/extensions/VisualEditor/modules/parser/pegParser.pegjs.txt
@@ -343,6 +343,11 @@
344344 // Start position of top-level block
345345 // Could also provide positions for lower-level blocks using a stack.
346346 var blockStart = 0;
 347+
 348+ var isEOF = function (pos) {
 349+ // XXX: cache the length..
 350+ return pos === input.length;
 351+ };
347352 }
348353
349354 start
@@ -350,6 +355,7 @@
351356 return flatten(e);
352357 }
353358
 359+
354360 anyblock = block / inline
355361 anyblockline = block / inlineline
356362
@@ -363,14 +369,18 @@
364370
365371 // Start of line
366372 sol = (newline / & { return pos === 0; } { return true; })
367 - cn:(c:comment n:newline? { return [c, n] })? {
 373+ cn:(c:comment n:newline? { return [c, {type: 'TEXT', value: n}] })* {
368374 return [{type: 'NEWLINE'}].concat(cn);
369375 }
370376
 377+eof = & { return isEOF(pos); } { return true; }
371378
 379+
372380 newline
373381 = '\n' / '\r\n'
374382
 383+eolf = newline / eof
 384+
375385 toplevelblock
376386 = & { blockStart = pos; return true; } b:block {
377387 b = flatten(b);
@@ -387,9 +397,10 @@
388398 }
389399
390400 block
391 - = (sol space* &newline)? bl:block_lines { return [{type: 'NEWLINE'}].concat(bl); }
 401+ = block_lines
 402+ / pre
 403+ / comment &eolf
392404 / para
393 - / comment
394405 / (s:sol {
395406 if (s) {
396407 return [s, {type: 'NEWLINE'}];
@@ -399,12 +410,16 @@
400411 }
401412 )
402413
 414+block_lines
 415+ = s:sol (space* newline)?
 416+ bl:block_line { return s.concat(bl); }
403417
404418 // Block structures with start-of-line wiki syntax
405 -block_lines
 419+block_line
406420 = h
407421 / table
408422 / lists
 423+ / space* generic_tag space* &eolf
409424 / pre_indent
410425
411426
@@ -493,11 +508,12 @@
494509 = (sol br)? pl:para_lines { return pl; }
495510
496511 para_lines
497 - = s:sol c:inlineline cs:(!block_lines para_lines)* {
498 - var res = [{type: 'TAG', name: 'p'}];
 512+ = s:sol? c:inlineline cs:(!block_lines para_lines)* {
 513+ var res = [];
499514 if (s !== '') {
500 - res.push(s)
 515+ res.push(s);
501516 }
 517+ res.push({type: 'TAG', name: 'p'});
502518 //console.log('paralines' + pp(res.concat(c, cs, [{type: 'ENDTAG', name: 'p'}])));
503519 return res.concat(c, cs, [{type: 'ENDTAG', name: 'p'}]);
504520 }
@@ -505,12 +521,14 @@
506522 br = space* &newline { return {type: 'SELFCLOSINGTAG', name: 'br'} }
507523
508524 pre_indent
509 - = l:pre_indent_line+ {
 525+ = l:pre_indent_line ls:(sol pre_indent_line)* {
510526 return [{type: 'TAG', name: 'pre'}]
511 - .concat( l
 527+ .concat( [l], ls
512528 , [{type: 'ENDTAG', name: 'pre'}]);
513529 }
514 -pre_indent_line = sol space l:inlineline { return l }
 530+pre_indent_line = space l:inlineline {
 531+ return [{type: 'TEXT', value: '\n'}].concat(l);
 532+}
515533
516534 // Syntax that stops inline expansion
517535 inline_breaks
@@ -586,7 +604,7 @@
587605 / quote
588606
589607 comment
590 - = '<!--' c:comment_chars* '-->'
 608+ = '<!--' c:comment_chars* ('-->' / eof)
591609 (space* newline space* comment)* {
592610 return [{ type: 'COMMENT', value: c.join('') }];
593611 }
@@ -704,13 +722,29 @@
705723 /* Will need to check anything xmlish agains known/allowed HTML tags and
706724 * registered extensions, otherwise fail the match. Should ref be treated as a
707725 * regular extension? */
708 -xmlish_tag = nowiki / pre / ref / references / generic_tag
 726+xmlish_tag = nowiki / ref / references / generic_tag
709727
710 -nowiki = "<nowiki>" ts:(t:[^<]+ { return t.join('') } / !"</nowiki>" .)+ {
711 - // return nowiki tags as well?
712 - return [{type: 'TEXT', value: ts.join('')}];
713 -}
 728+pre
 729+ = "<pre"
 730+ attribs:generic_attribute*
 731+ ">"
 732+ ts:(t:[^<]+ { return t.join('') } / !"</pre>" t2:. {return t2})+
 733+ ("</pre>" / eof) {
 734+ // return nowiki tags as well?
 735+ return [ {type: 'TAG', name: 'pre', attribs: attribs}
 736+ , {type: 'TEXT', value: ts.join('')}
 737+ , {type: 'ENDTAG', name: 'pre'}
 738+ ];
 739+ }
714740
 741+nowiki
 742+ = "<nowiki>"
 743+ ts:(t:[^<]+ { return t.join('') } / !"</nowiki>" .)+
 744+ "</nowiki>" {
 745+ // return nowiki tags as well?
 746+ return [{type: 'TEXT', value: ts.join('')}];
 747+ }
 748+
715749 // See http://dev.w3.org/html5/spec/Overview.html#syntax-tag-name and
716750 // following paragraphs
717751 generic_tag
@@ -719,9 +753,9 @@
720754 selfclose:"/"?
721755 ">" {
722756 var res = {name: name.join(''), attribs: attribs};
723 - if ( end !== '' ) {
 757+ if ( end != '' ) {
724758 res.type = 'ENDTAG';
725 - } else if ( selfclose !== '' ) {
 759+ } else if ( selfclose != '' ) {
726760 res.type = 'SELFCLOSINGTAG';
727761 } else {
728762 res.type = 'TAG';
@@ -862,26 +896,24 @@
863897 / "'" t:[^'>]+ "'" { return [null, unquote("'", t.join(''))]; }
864898 / '"' t:[^">]+ '"' { return [null, unquote('"', t.join(''))]; }
865899
866 -lists = es:(dtdd / li)+
 900+lists = e:(dtdd / li) es:(sol (dtdd / li))*
867901 {
868902 return annotateList( [ { type: 'TAG', name: 'list'} ]
869 - .concat(flatten(es)
 903+ .concat(flatten([e].concat(es))
870904 ,[{ type: 'ENDTAG', name: 'list' }]));
871905 }
872906
873 -li = s:sol
874 - bullets:list_char+
 907+li = bullets:list_char+
875908 c:inlineline
876909 &newline
877910 {
878 - return s.concat([ { type: 'TAG',
 911+ return [ { type: 'TAG',
879912 name: 'listItem',
880913 bullets: bullets }
881 - , c ]);
 914+ , c ];
882915 }
883916
884 -dtdd = s:sol
885 - bullets:list_char+
 917+dtdd = bullets:list_char+
886918 c:(inline_element / (n:[^:\n] { return {type: 'TEXT', value: n}; }))+
887919 ":"
888920 d:(inline_element / (n:[^\n] { return {type: 'TEXT', value: n}; }))+
@@ -893,7 +925,7 @@
894926 } else {
895927 var dtbullets = bullets.slice(0, bullets.length - 1);
896928 dtbullets.push(':');
897 - return s.concat([ { type: 'TAG', name: 'listItem', bullets: bullets } ])
 929+ return [ { type: 'TAG', name: 'listItem', bullets: bullets } ]
898930 .concat( c
899931 ,[{ type: 'TAG', name: 'listItem', bullets: dtbullets } ]
900932 , d );

Status & tagging log