r107059 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r107058‎ | r107059 | r107060 >
Date:11:43, 22 December 2011
Author:gwicke
Status:deferred
Tags:
Comment:
Refactor table productions to support table fragments in templates (table
start / row / table end). The old productions are not deleted yet to make it
easy to compare the output on more complex articles. 181 tests passing after
adding two table tests with whitespace-only differences to the whitelist.
Modified paths:
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.HTML5TreeBuilder.node.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.TokenTransformDispatcher.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt (modified) (history)
  • /trunk/extensions/VisualEditor/tests/parser/parserTests-whitelist.js (modified) (history)
  • /trunk/extensions/VisualEditor/tests/parser/parserTests.js (modified) (history)

Diff [purge]

Index: trunk/extensions/VisualEditor/tests/parser/parserTests-whitelist.js
@@ -29,8 +29,15 @@
3030
3131 testWhiteList["Nested table"] = "<table border=\"1\"><tbody><tr><td> α</td><td><table bgcolor=\"#ABCDEF\" border=\"2\"><tbody><tr><td>nested</td></tr><tr><td>table</td></tr></tbody></table></td><td>the original table again</td></tr></tbody></table>";
3232
 33+// Very minor whitespace difference at end of cell (MediaWiki inserts a
 34+// newline before the close tag even if there was no trailing space in the cell)
 35+testWhiteList["Table rowspan"] = "<table border=\"1\" data-sourcePos=\"0:121\"><tbody><tr><td> Cell 1, row 1 </td><td rowspan=\"2\"> Cell 2, row 1 (and 2) </td><td> Cell 3, row 1 </td></tr><tr><td> Cell 1, row 2 </td><td> Cell 3, row 2 </td></tr></tbody></table>";
3336
 37+// Inter-element whitespace only
 38+testWhiteList["Indented table markup mixed with indented pre content (proposed in bug 6200)"] = " \n\n<table><tbody><tr><td><pre>\nText that should be rendered preformatted\n</pre></td></tr></tbody></table>";
3439
 40+
 41+
3542 /* Missing token transform functionality */
3643
3744 // We don't implement percent encoding for URIs yet.
Index: trunk/extensions/VisualEditor/tests/parser/parserTests.js
@@ -351,7 +351,7 @@
352352 */
353353 ParserTests.prototype.normalizeHTML = function (source) {
354354 // TODO: Do not strip newlines in pre and nowiki blocks!
355 - source = source.replace(/\n/g, '');
 355+ source = source.replace(/[\r\n]/g, '');
356356 try {
357357 this.htmlparser.parse('<body>' + source + '</body>');
358358 return this.htmlparser.document
@@ -383,7 +383,7 @@
384384 // known-ok differences.
385385 ParserTests.prototype.normalizeOut = function ( out ) {
386386 // TODO: Do not strip newlines in pre and nowiki blocks!
387 - return out.replace(/\n| data-[a-zA-Z]+="[^">]*"/g, '')
 387+ return out.replace(/[\r\n]| data-[a-zA-Z]+="[^">]*"/g, '')
388388 .replace(/<!--.*?-->\n?/gm, '');
389389 };
390390
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.TokenTransformDispatcher.js
@@ -178,7 +178,7 @@
179179 return tokenCTX;
180180 };
181181
182 -/* Call all transformers on other token types.
 182+/* Call all transformers on non-tag token types.
183183 *
184184 * @param tokenCTX {TokenContext} The current token and its context.
185185 * @param ts List of token transformers for this token type.
@@ -227,7 +227,7 @@
228228 var tokenCTX = new TokenContext(undefined, accum, this, undefined);
229229 var origLen = tokens.length;
230230 for ( var i = 0; i < tokens.length; i++ ) {
231 - tokenCTX.lastToken = tokenCTX.token; // XXX: Fix for re-entrant case!
 231+ tokenCTX.lastToken = tokenCTX.token; // FIXME: Fix re-entrant case!
232232 tokenCTX.token = tokens[i];
233233 tokenCTX.pos = i;
234234 tokenCTX.accum = accum;
@@ -253,6 +253,7 @@
254254 tokenCTX = this._transformToken( tokenCTX, this.transformers.martian );
255255 break;
256256 }
 257+ // add special DELAYED value
257258 if( $.isArray(tokenCTX.token) ) {
258259 // Splice in the returned tokens (while replacing the original
259260 // token), and process them next.
@@ -372,3 +373,4 @@
373374 if (typeof module == "object") {
374375 module.exports.TokenTransformDispatcher = TokenTransformDispatcher;
375376 }
 377+
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.HTML5TreeBuilder.node.js
@@ -73,7 +73,7 @@
7474 break;
7575 case "NEWLINE":
7676 //this.emit('end');
77 - this.emit('token', {type: 'Characters', data: "\n"});
 77+ //this.emit('token', {type: 'Characters', data: "\n"});
7878 break;
7979 default:
8080 console.log("Unhandled token: " + JSON.stringify(token));
Index: trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt
@@ -205,6 +205,7 @@
206206 };
207207 var clearFlag = function(flag) {
208208 syntaxFlags[flag]--;
 209+ return false;
209210 };
210211
211212 // Start position of top-level block
@@ -341,16 +342,16 @@
342343
343344
344345 // Start of line
345 -sol = (newline / & { return pos === 0; } { return true; })
 346+sol = nl:(newlineToken / & { return pos === 0; } { return [] })
346347 // Eat multi-line comments, so that syntax after still matches as if it
347348 // was actually preceded by a newline
348 - cn:(c:comment n:newline? {
 349+ cn:( c:comment n:newline? {
349350 if ( n !== '' ) {
350351 return [c, {type: 'TEXT', value: n}];
351352 } else {
352353 return [c];
353354 }
354 - }
 355+ }
355356 )*
356357 // Eat includeonly/noinclude at start of line, so that start-of-line
357358 // syntax after it still matches
@@ -365,7 +366,7 @@
366367 }
367368 }
368369
369 - return [{type: 'NEWLINE'}].concat(cn, niToken);
 370+ return nl.concat(cn, niToken);
370371 }
371372
372373 eof = & { return isEOF(pos); } { return true; }
@@ -402,13 +403,13 @@
403404 / bt:block_tag { return [bt] } // avoid a paragraph if we know that the line starts with a block tag
404405 / para
405406 / inlineline // includes generic tags; wrapped into paragraphs in DOM postprocessor
406 - / s:sol {
 407+ / s:sol /*{
407408 if (s) {
408409 return [s, {type: 'NEWLINE'}];
409410 } else {
410411 return [{type: 'NEWLINE'}];
411412 }
412 - }
 413+ }*/
413414
414415 block_lines
415416 = s:sol
@@ -422,7 +423,8 @@
423424 // Block structures with start-of-line wiki syntax
424425 block_line
425426 = h
426 - / table
 427+ /// table
 428+ / & [{}|] tl:table_lines { return tl; }
427429 / lists
428430 // tag-only lines should not trigger pre
429431 / st:optionalSpaceToken
@@ -450,7 +452,10 @@
451453 return true;
452454 }
453455 & { return syntaxFlags['table']; }
454 - a:(newline [!|] / '||' / '!!' / '|}') { dp("table break" + pp(a) + pos); return true; }
 456+ ( a:(newline [!|] / '||' / '!!' / '|}') { dp("table break" + pp(a) + pos); return true; }
 457+ / & { return syntaxFlags['tableCellArg'] }
 458+ "|" { return true }
 459+ )
455460 / & { return (syntaxFlags['colon'] &&
456461 ! syntaxFlags.extlink && // example: ; [[Link:Term]] : Definition
457462 ! syntaxFlags.linkdesc); } ":" { return true; }
@@ -910,11 +915,15 @@
911916 }
912917
913918 generic_attribute_value
914 - = "=" space* v:att_value {return v}
 919+ = "=" space* v:att_value {
 920+ return v;
 921+ }
915922
916923 // XXX: attributes can contain templates and template args!!
917924 att_value
918 - = t:[^ \t'"<>='\n]+ { return t.join(''); }
 925+ = t:(!inline_breaks c:[^ \t'"<>='\n] { return c } )+ {
 926+ return t.join('');
 927+ }
919928 // XXX: is "\"" also valid html? or just Wikitext?
920929 / "'" t:[^'>]* "'" { return unquote("'", t.join('')); }
921930 / '"' t:[^">]* '"' { return unquote('"', t.join('')); }
@@ -972,8 +981,125 @@
973982
974983 list_char = [*#:;]
975984
 985+/* split up table handling into individual tokens */
976986
977 -/* Tables */
 987+table_lines
 988+ = & { return setFlag('table'); }
 989+ tl:table_line
 990+ tls:( s:sol tl2:table_line { return s.concat(tl2); } )* {
 991+ clearFlag('table');
 992+ //console.log('table_lines: ' + pp(tl.concat(tls)));
 993+ return tl.concat( tls );
 994+ }
 995+ / & { return clearFlag('table'); }
 996+
 997+// Make sure this is only active at start-of-line!
 998+table_line
 999+ = table_start_tag
 1000+ / table_row_tag
 1001+ / table_data_tags
 1002+ / table_heading_tags
 1003+ / table_caption_tag
 1004+ / table_end_tag
 1005+
 1006+table_start_tag
 1007+ = "{|"
 1008+ ta:generic_attribute*
 1009+ space*
 1010+ te:table_end_tag? // can occur somewhere in the middle of the line too
 1011+ {
 1012+ var tok = [{type: 'TAG', name: 'table'}];
 1013+ if ( ta )
 1014+ tok[0].attribs = ta;
 1015+ if ( te )
 1016+ tok = tok.concat(te);
 1017+ return tok;
 1018+ }
 1019+
 1020+table_caption_tag
 1021+ = "|+"
 1022+ c:inline* {
 1023+ return [{type: 'TAG', name: 'caption'}]
 1024+ .concat( c, [{type: 'ENDTAG', name: 'caption'}]);
 1025+ }
 1026+
 1027+
 1028+// Only add table rows directly to start-of-line productions, so we get
 1029+// implicit rows!
 1030+table_row_tag
 1031+ = //& { console.log("table row enter"); return true; }
 1032+ "|-"
 1033+ a:generic_attribute*
 1034+ space*
 1035+ {
 1036+ // We rely on our tree builder to close the row as needed. This is
 1037+ // needed to support building tables from fragment templates with
 1038+ // individual cells or rows.
 1039+ return [{type: 'TAG', name: 'tr', attribs: a}];
 1040+ }
 1041+
 1042+table_data_tags
 1043+ = //("|" / !'!') // move the missing-'|' case to table_row_tag!
 1044+ "|"
 1045+ td:table_data_tag
 1046+ tds:( "||" tdt:table_data_tag { return tdt } )* {
 1047+ return td.concat(tds);
 1048+ }
 1049+
 1050+table_data_tag
 1051+ = //& { dp("table_data enter, pos=" + pos + input.substr(pos,10)); return true; }
 1052+ ! [}+-]
 1053+ //& { dp('before attrib, pos=' + pos); return true; }
 1054+ a:table_cell_args?
 1055+ //& { dp('past attrib, pos=' + pos); return true; }
 1056+ //& { console.log("past attrib, pos=" + pos + input.substr(pos,10)); return true; }
 1057+ // use inline_breaks to break on tr etc
 1058+ td:( !inline_breaks
 1059+ //& { dp("table_data 2, pos=" + pos + input.substr(pos,10)); return true; }
 1060+ b:block { return b } )*
 1061+ {
 1062+ if ( a == '' ) {
 1063+ a = [];
 1064+ }
 1065+ //dp("table data result: " + pp(td) + ", attribts: " + pp(a));
 1066+ return [{ type: 'TAG', name: 'td', attribs: a}]
 1067+ .concat( td, [{type: 'ENDTAG', name: 'td'}] );
 1068+ }
 1069+
 1070+table_heading_tags
 1071+ = "!"
 1072+ th:table_heading_tag
 1073+ ths:( "!!" tht:table_heading_tag { return tht } )* {
 1074+ return th.concat(ths);
 1075+ }
 1076+
 1077+table_heading_tag
 1078+ = a:table_cell_args?
 1079+ c:inline {
 1080+ if ( a == '' ) {
 1081+ a = [];
 1082+ }
 1083+ return [{type: 'TAG', name: 'th', attribs: a}]
 1084+ .concat( c, [{type: 'ENDTAG', name: 'th'}] );
 1085+ }
 1086+
 1087+table_end_tag
 1088+ = "|}" {
 1089+ var tok = [{type: 'ENDTAG', name: 'table'}];
 1090+ return tok;
 1091+ }
 1092+
 1093+table_cell_args
 1094+ = & { return setFlag('tableCellArg'); }
 1095+ as:generic_attribute+ space* "|" !"|" {
 1096+ clearFlag('tableCellArg');
 1097+ return as;
 1098+ }
 1099+ / & { return clearFlag('tableCellArg'); }
 1100+
 1101+
 1102+
 1103+/* Tables, full-table version (no support for table fragments from templates) */
9781104 table
9791105 = tas:table_start space* c:table_caption? b:table_body? te:table_end {
9801106 var res = {type: 'TAG', name: 'table'}
@@ -1009,9 +1135,6 @@
10101136 / & { clearFlag('table'); return false; } { return null; }
10111137 ) { return res }
10121138
1013 -table_attribs
1014 - = text / ! inline_breaks !newline ![|] c:. { return c }
1015 -
10161139 table_caption
10171140 = n:newlineToken
10181141 "|+" c:inline* {
@@ -1040,7 +1163,10 @@
10411164 table_row
10421165 = //& { dp("table row enter"); return true; }
10431166 n:newlineToken
1044 - "|-" thtd_attribs? space* td:(table_data / table_heading)* {
 1167+ "|-"
 1168+ a:(as:generic_attribute+ space* "|" !"|" { return as } )?
 1169+ space*
 1170+ td:(table_data / table_heading)* {
10451171 return n.concat([{type: 'TAG', name: 'tr'}]
10461172 , td, [{type: 'ENDTAG', name: 'tr'}]);
10471173 }

Status & tagging log