r105125 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r105124‎ | r105125 | r105126 >
Date:19:23, 4 December 2011
Author:gwicke
Status:deferred
Tags:
Comment:
Fixes to tables, headings and misc smaller stuff. Tracked down an issue caused
by improperly caching of production results, which interfered with the
flag-dependent inline_break production.
Modified paths:
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.DOMPostProcessor.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/pegParser.pegjs.txt (modified) (history)
  • /trunk/extensions/VisualEditor/tests/parser/parserTests.js (modified) (history)

Diff [purge]

Index: trunk/extensions/VisualEditor/tests/parser/parserTests.js
@@ -321,13 +321,14 @@
322322 // known-ok differences.
323323 function normalizeOut ( out ) {
324324 // TODO: Do not strip newlines in pre and nowiki blocks!
325 - return out.replace(/\n| data-[a-zA-Z]+="[^">]+"/g, '')
 325+ return out.replace(/\n| data-[a-zA-Z]+="[^">]*"/g, '')
326326 .replace(/<!--.*?-->\n?/gm, '');
327327 }
328328
329329 function formatHTML ( source ) {
330330 // Quick hack to insert newlines before some block level start tags
331 - return source.replace(/(?!^)<((div|dd|dt|li|p|table|tr|td|tbody|dl|ol|ul)[^>]*)>/g,
 331+ return source.replace(
 332+ /(?!^)<((div|dd|dt|li|p|table|tr|td|tbody|dl|ol|ul|h1|h2|h3|h4|h5|h6)[^>]*)>/g,
332333 '\n<$1>');
333334 }
334335
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.DOMPostProcessor.js
@@ -18,6 +18,12 @@
1919 case 'pre':
2020 case 'center':
2121 case 'blockquote':
 22+ case 'h1':
 23+ case 'h2':
 24+ case 'h3':
 25+ case 'h4':
 26+ case 'h5':
 27+ case 'h6':
2228 return true;
2329 default:
2430 return false;
Index: trunk/extensions/VisualEditor/modules/parser/pegParser.pegjs.txt
@@ -370,7 +370,7 @@
371371 * structures to avoid unnecessarily leaving the text production on plain
372372 * content. */
373373
374 -text_char = [^'<~[{\n\r:\]}]
 374+text_char = [^'<~[{\n\r:\]}|!=]
375375
376376 text = t:text_char+ { return t.join(''); }
377377
@@ -386,6 +386,9 @@
387387 * n nntp(s) urls
388388 * m mailto urls
389389 *
 390+ * ! and | table cell delimiters, might be better to specialize those
 391+ * = headings - also specialize those!
 392+ *
390393 * The following chars are also included for now, but only apply in some
391394 * contexts and should probably be enabled only in those:
392395 * : separate definition in ; term : definition
@@ -393,7 +396,7 @@
394397 * } end of parser func/transclusion/template arg
395398 */
396399
397 -urltext = ( t:[^'<~[{\n\rfghimnstw:\]} &]+ { return t.join(''); }
 400+urltext = ( t:[^'<~[{\n\rfghimnstw|!:\]} &=]+ { return t.join(''); }
398401 // XXX: use general entity decode!
399402 / "&amp;" { return "&"; } // decode ampersand in text
400403 / urllink
@@ -456,7 +459,7 @@
457460 = & { blockStart = pos; return true; } b:block {
458461 b = flatten(b);
459462 var bs = b[0];
460 - dp('toplevelblock:' + pp(b));
 463+ //dp('toplevelblock:' + pp(b));
461464 if (bs.attribs === undefined) {
462465 bs.attribs = [];
463466 }
@@ -498,6 +501,8 @@
499502 = h
500503 / table
501504 / lists
 505+ // tag-only lines should not trigger pre
 506+ / space* bt:block_tag space* &eolf { return bt }
502507 / pre_indent
503508 / pre
504509
@@ -514,21 +519,26 @@
515520
516521 // Syntax stops to limit inline expansion defending on syntactic context
517522 inline_breaks
518 - = //& { console.log(pp(syntaxFlags)); return true; }
 523+ =
 524+ & { // Important hack: disable caching for this production, as the default
 525+ // cache key does not take into account flag states!
 526+ cacheKey = '';
 527+ return true;
 528+ }
519529 & { return syntaxFlags['table']; }
520 - a:(newline [!|] / '||' / '!!' / '|}') { dp("table break" + pp(a)); return true; }
 530+ a:(newline [!|] / '||' / '!!' / '|}') { dp("table break" + pp(a) + pos); return true; }
521531 / & { return (syntaxFlags['colon'] &&
522532 ! syntaxFlags.extlink &&
523533 ! syntaxFlags.linkdesk); } ":" { return true; }
524534 / & { return syntaxFlags['extlink']; } "]" { return true; }
525535 / & { return syntaxFlags['linkdesc']; } link_end { return true; }
526536 / & { return syntaxFlags['h']; }
527 - ( & { return syntaxFlags['h1'] } '=' newline { return true; }
528 - / & { return syntaxFlags['h2'] } '==' newline { return true; }
529 - / & { return syntaxFlags['h3'] } '===' newline { return true; }
530 - / & { return syntaxFlags['h4'] } '====' newline { return true; }
531 - / & { return syntaxFlags['h5'] } '=====' newline { return true; }
532 - / & { return syntaxFlags['h6'] } '======' newline { return true; }
 537+ ( & { return syntaxFlags['h1'] } '=' space* newline { return true; }
 538+ / & { return syntaxFlags['h2'] } '==' space* newline { return true; }
 539+ / & { return syntaxFlags['h3'] } '===' space* newline { return true; }
 540+ / & { return syntaxFlags['h4'] } '====' space* newline { return true; }
 541+ / & { return syntaxFlags['h5'] } '=====' space* newline { return true; }
 542+ / & { return syntaxFlags['h6'] } '======' space* newline { return true; }
533543 )
534544
535545 inline
@@ -553,6 +563,7 @@
554564 return out;
555565 }
556566
 567+
557568 inlineline
558569 = c:(urltext / !inline_breaks (inline_element / [^\n]))+ {
559570 var out = [];
@@ -572,7 +583,7 @@
573584 if (text.length) {
574585 out.push({type: 'TEXT', value: text.join('')});
575586 }
576 - //dp('inlineline out:', pp(out));
 587+ dp('inlineline out:', pp(out));
577588 return out;
578589 }
579590
@@ -580,7 +591,8 @@
581592 * -> need (start, end) offsets within block
582593 */
583594 inline_element
584 - = comment
 595+ = & { dp('inline_element enter' + input.substr(pos, 10)); return true; }
 596+ comment
585597 // Can actually also be block-level elements, we don't really try to enforce
586598 // a content model in the tokenizer. The HTML tree builder and DOM
587599 // transformations are better equipped to deal with it.
@@ -590,35 +602,43 @@
591603 / extlink
592604 / quote
593605
594 -/* Headings */
595 -h = h1 / h2 / h3 / h4 / h5 / h6
 606+/* Headings
 607+ *
 608+ * Listed in reverse order on purpose ;) */
 609+h = h6 / h5 / h4 / h3 / h2 / h1
596610
597 -h1 = sol '='
598 - (
 611+/* We might want to consider using a single rule for all headings, and
 612+ * figuring out the level in the action. This saves quite some backtracking,
 613+ * and the conversion of equal signs into text should not be a problem as
 614+ * equals are not part of other syntax. */
 615+h1 = '='
 616+ r:(
599617 & { setFlag('h'); return setFlag('h1') }
600 - c:inlineline '=' comment? &newline {
 618+ c:inlineline '=' (space / comment)*
 619+ &eolf
 620+ {
601621 clearFlag('h');
602622 clearFlag('h1');
603623 return [{type: 'TAG', name: 'h1'}]
604624 .concat(c, [{type: 'ENDTAG', name: 'h1'}]);
605625 }
606 - / { clearFlag('h'); clearFlag('h1'); return null }
607 - )
 626+ / & { dp('nomatch exit h1'); clearFlag('h'); clearFlag('h1'); return false } { return null }
 627+ ) { return r }
608628
609 -h2 = sol '=='
610 - (
 629+h2 = '=='
 630+ r:(
611631 & { setFlag('h'); return setFlag('h2') }
612 - c:inlineline '==' comment? &newline {
 632+ c:inlineline '==' (space / comment)* &newline {
613633 clearFlag('h');
614634 clearFlag('h2');
615635 return [{type: 'TAG', name: 'h2'}]
616636 .concat(c, [{type: 'ENDTAG', name: 'h2'}]);
617637 }
618 - / { clearFlag('h'); clearFlag('h2'); return null }
619 - )
 638+ / & { clearFlag('h'); clearFlag('h2'); return false }
 639+ ) { return r }
620640
621 -h3 = sol '==='
622 - (
 641+h3 = '==='
 642+ r:(
623643 & { setFlag('h'); return setFlag('h3') }
624644 c:inlineline '===' comment? &newline {
625645 clearFlag('h');
@@ -626,11 +646,11 @@
627647 return [{type: 'TAG', name: 'h3'}]
628648 .concat(c, [{type: 'ENDTAG', name: 'h3'}]);
629649 }
630 - / { clearFlag('h'); clearFlag('h3'); return null }
631 - )
 650+ / & { clearFlag('h'); clearFlag('h3'); return false }
 651+ ) { return r }
632652
633 -h4 = sol '===='
634 - (
 653+h4 = '===='
 654+ r:(
635655 & { setFlag('h'); return setFlag('h4') }
636656 c:inlineline '====' comment? &newline {
637657 clearFlag('h');
@@ -638,37 +658,32 @@
639659 return [{type: 'TAG', name: 'h4'}]
640660 .concat(c, [{type: 'ENDTAG', name: 'h4'}]);
641661 }
642 - / { clearFlag('h'); clearFlag('h4'); return null }
643 - )
 662+ / & { clearFlag('h'); clearFlag('h4'); return false }
 663+ ) { return r }
644664
645 -h5 = sol '====='
646 - (& { setFlag('h'); return setFlag('h5') }
 665+h5 = '====='
 666+ r:(& { setFlag('h'); return setFlag('h5') }
647667 c:inlineline '=====' comment? &newline {
648668 clearFlag('h');
649669 clearFlag('h5');
650670 return [{type: 'TAG', name: 'h5'}]
651671 .concat(c, [{type: 'ENDTAG', name: 'h5'}]);
652672 }
653 - / { clearFlag('h'); clearFlag('h5'); return null }
654 - )
 673+ / & { clearFlag('h'); clearFlag('h5'); return false }
 674+ ) { return r }
655675
656 -h6 = sol '======'
657 - (& { setFlag('h'); return setFlag('h6') }
 676+h6 = '======'
 677+ r:(& { setFlag('h'); return setFlag('h6') }
658678 c:inlineline '======' comment? &newline {
659679 clearFlag('h');
660680 clearFlag('h6');
661681 return [{type: 'TAG', name: 'h6'}]
662682 .concat(c, [{type: 'ENDTAG', name: 'h6'}]);
663683 }
664 - / { clearFlag('h'); clearFlag('h6'); return null }
665 - )
 684+ / & { clearFlag('h'); clearFlag('h6'); return false }
 685+ ) { return r }
666686
667 -heading_marker
668 - = '=' '='*
669687
670 -heading_text
671 - = h:( !(heading_marker newline) x:inlineline { return x } )* { return h.join(''); }
672 -
673688 pre_indent
674689 = l:pre_indent_line ls:(sol pre_indent_line)* {
675690 return [{type: 'TAG', name: 'pre'}]
@@ -741,11 +756,12 @@
742757 / 'telnet://' // Well if we're going to support the above.. -ævar
743758 / 'worldwind://'
744759
 760+// javascript does not support unicode features..
745761 unicode_separator_space = [ \u00A0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]
746762
747763 url
748764 = proto:url_protocol
749 - rest:( [^ :\]\[\n<>\x00-\x20\x7f,.&]
 765+ rest:( [^ :\]\[\n"'<>\x00-\x20\x7f,.&\u00A0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]
750766 / s:[.:,] !(space / eolf) { return s }
751767 // XXX: use general entity decode!
752768 / '&amp;' { return '&' }
@@ -807,7 +823,7 @@
808824 / !"}}" x:([^|\n]) { return x }
809825
810826 wikilink
811 - = "[[" target:link_target text:("|" link_text)* "]]" {
 827+ = "[[" target:link_target text:("|" lt:link_text { return lt })* "]]" suffix:text? {
812828 var obj = {
813829 type: 'TAG',
814830 name: 'a',
@@ -815,8 +831,11 @@
816832 };
817833 obj.attribs.push(['href', target]);
818834 if (text && text.length) {
819 - var textTokens = text[0][1]; // XXX
 835+ var textTokens = text; // XXX
820836 } else {
 837+ if (suffix !== '') {
 838+ target += suffix;
 839+ }
821840 var textTokens = [{type: 'TEXT', value: target}];
822841 }
823842 return [obj].concat(textTokens, [{type: 'ENDTAG', name: 'a'}]);
@@ -1120,7 +1139,6 @@
11211140
11221141
11231142 /* Tables */
1124 -
11251143 table
11261144 = tas:table_start c:table_caption? b:table_body? table_end {
11271145 var res = {type: 'TAG', name: 'table'}
@@ -1144,18 +1162,19 @@
11451163 }
11461164
11471165 table_start
1148 - = sol
1149 - "{|"
1150 - & { setFlag('table'); return true; }
1151 - ta:table_attribs*
1152 - space* {
1153 - //dp("table_start " + pp(ta) + ", pos:" + pos);
1154 - return ta;
1155 - }
1156 - / sol "{|" { clearFlag('table'); return null; }
 1166+ = "{|"
 1167+ res:(
 1168+ & { setFlag('table'); return true; }
 1169+ ta:table_attribs*
 1170+ {
 1171+ dp("table_start " + pp(ta) + ", pos:" + pos);
 1172+ return ta;
 1173+ }
 1174+ / & { clearFlag('table'); return false; } { return null; }
 1175+ ) { return res }
11571176
11581177 table_attribs
1159 - = text / ! inline_breaks !newline .
 1178+ = text / ! inline_breaks !newline ![|] c:. { return c }
11601179
11611180 table_caption
11621181 = newline
@@ -1190,11 +1209,14 @@
11911210 }
11921211
11931212 table_data
1194 - = & { dp("table_data enter, pos=" + pos); return true; }
 1213+ = & { dp("table_data enter, pos=" + pos + input.substr(pos,10)); return true; }
11951214 ("||" / newline "|")
11961215 ! [}+-]
11971216 a:thtd_attribs?
1198 - td:(!inline_breaks block)* {
 1217+ // use inline_breaks to break on tr etc
 1218+ td:(!inline_breaks
 1219+ & { dp("table_data 2, pos=" + pos + input.substr(pos,10)); return true; }
 1220+ b:block { return b })* {
11991221 dp("table data result: " + pp(td) + ", attribts: " + pp(a));
12001222 return [{ type: 'TAG', name: 'td', attribs: [['data-unparsed', a]]}]
12011223 .concat(td, [{type: 'ENDTAG', name: 'td'}]);
@@ -1210,8 +1232,8 @@
12111233
12121234 thtd_attribs
12131235 // In particular, do not match [|\n]
1214 - = a:(text / ! inline_breaks [="':;/,.-] )+ "|" ! [|}+-] {
1215 - return a;
 1236+ = a:(text / ! inline_breaks c:[="':;/,. -] { return c } )+ "|" ! "|" {
 1237+ return a;
12161238 }
12171239
12181240

Status & tagging log