Index: trunk/extensions/VisualEditor/tests/parser/parserTests-whitelist.js |
— | — | @@ -29,8 +29,15 @@ |
30 | 30 | |
31 | 31 | testWhiteList["Nested table"] = "<table border=\"1\"><tbody><tr><td> α</td><td><table bgcolor=\"#ABCDEF\" border=\"2\"><tbody><tr><td>nested</td></tr><tr><td>table</td></tr></tbody></table></td><td>the original table again</td></tr></tbody></table>"; |
32 | 32 | |
| 33 | +// Very minor whitespace difference at end of cell (MediaWiki inserts a |
| 34 | +// newline before the close tag even if there was no trailing space in the cell) |
| 35 | +testWhiteList["Table rowspan"] = "<table border=\"1\" data-sourcePos=\"0:121\"><tbody><tr><td> Cell 1, row 1 </td><td rowspan=\"2\"> Cell 2, row 1 (and 2) </td><td> Cell 3, row 1 </td></tr><tr><td> Cell 1, row 2 </td><td> Cell 3, row 2 </td></tr></tbody></table>"; |
33 | 36 | |
| 37 | +// Inter-element whitespace only |
| 38 | +testWhiteList["Indented table markup mixed with indented pre content (proposed in bug 6200)"] = " \n\n<table><tbody><tr><td><pre>\nText that should be rendered preformatted\n</pre></td></tr></tbody></table>"; |
34 | 39 | |
| 40 | + |
| 41 | + |
35 | 42 | /* Missing token transform functionality */ |
36 | 43 | |
37 | 44 | // We don't implement percent encoding for URIs yet. |
Index: trunk/extensions/VisualEditor/tests/parser/parserTests.js |
— | — | @@ -351,7 +351,7 @@ |
352 | 352 | */ |
353 | 353 | ParserTests.prototype.normalizeHTML = function (source) { |
354 | 354 | // TODO: Do not strip newlines in pre and nowiki blocks! |
355 | | - source = source.replace(/\n/g, ''); |
| 355 | + source = source.replace(/[\r\n]/g, ''); |
356 | 356 | try { |
357 | 357 | this.htmlparser.parse('<body>' + source + '</body>'); |
358 | 358 | return this.htmlparser.document |
— | — | @@ -383,7 +383,7 @@ |
384 | 384 | // known-ok differences. |
385 | 385 | ParserTests.prototype.normalizeOut = function ( out ) { |
386 | 386 | // TODO: Do not strip newlines in pre and nowiki blocks! |
387 | | - return out.replace(/\n| data-[a-zA-Z]+="[^">]*"/g, '') |
| 387 | + return out.replace(/[\r\n]| data-[a-zA-Z]+="[^">]*"/g, '') |
388 | 388 | .replace(/<!--.*?-->\n?/gm, ''); |
389 | 389 | }; |
390 | 390 | |
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.TokenTransformDispatcher.js |
— | — | @@ -178,7 +178,7 @@ |
179 | 179 | return tokenCTX; |
180 | 180 | }; |
181 | 181 | |
182 | | -/* Call all transformers on other token types. |
| 182 | +/* Call all transformers on non-tag token types. |
183 | 183 | * |
184 | 184 | * @param tokenCTX {TokenContext} The current token and its context. |
185 | 185 | * @param ts List of token transformers for this token type. |
— | — | @@ -227,7 +227,7 @@ |
228 | 228 | var tokenCTX = new TokenContext(undefined, accum, this, undefined); |
229 | 229 | var origLen = tokens.length; |
230 | 230 | for ( var i = 0; i < tokens.length; i++ ) { |
231 | | - tokenCTX.lastToken = tokenCTX.token; // XXX: Fix for re-entrant case! |
| 231 | + tokenCTX.lastToken = tokenCTX.token; // FIXME: Fix re-entrant case! |
232 | 232 | tokenCTX.token = tokens[i]; |
233 | 233 | tokenCTX.pos = i; |
234 | 234 | tokenCTX.accum = accum; |
— | — | @@ -253,6 +253,7 @@ |
254 | 254 | tokenCTX = this._transformToken( tokenCTX, this.transformers.martian ); |
255 | 255 | break; |
256 | 256 | } |
| 257 | + // add special DELAYED value |
257 | 258 | if( $.isArray(tokenCTX.token) ) { |
258 | 259 | // Splice in the returned tokens (while replacing the original |
259 | 260 | // token), and process them next. |
— | — | @@ -372,3 +373,4 @@ |
373 | 374 | if (typeof module == "object") { |
374 | 375 | module.exports.TokenTransformDispatcher = TokenTransformDispatcher; |
375 | 376 | } |
| 377 | + |
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.HTML5TreeBuilder.node.js |
— | — | @@ -73,7 +73,7 @@ |
74 | 74 | break; |
75 | 75 | case "NEWLINE": |
76 | 76 | //this.emit('end'); |
77 | | - this.emit('token', {type: 'Characters', data: "\n"}); |
| 77 | + //this.emit('token', {type: 'Characters', data: "\n"}); |
78 | 78 | break; |
79 | 79 | default: |
80 | 80 | console.log("Unhandled token: " + JSON.stringify(token)); |
Index: trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt |
— | — | @@ -205,6 +205,7 @@ |
206 | 206 | }; |
207 | 207 | var clearFlag = function(flag) { |
208 | 208 | syntaxFlags[flag]--; |
| 209 | + return false; |
209 | 210 | }; |
210 | 211 | |
211 | 212 | // Start position of top-level block |
— | — | @@ -341,16 +342,16 @@ |
342 | 343 | |
343 | 344 | |
344 | 345 | // Start of line |
345 | | -sol = (newline / & { return pos === 0; } { return true; }) |
| 346 | +sol = nl:(newlineToken / & { return pos === 0; } { return [] }) |
346 | 347 | // Eat multi-line comments, so that syntax after still matches as if it |
347 | 348 | // was actually preceded by a newline |
348 | | - cn:(c:comment n:newline? { |
| 349 | + cn:( c:comment n:newline? { |
349 | 350 | if ( n !== '' ) { |
350 | 351 | return [c, {type: 'TEXT', value: n}]; |
351 | 352 | } else { |
352 | 353 | return [c]; |
353 | 354 | } |
354 | | - } |
| 355 | + } |
355 | 356 | )* |
356 | 357 | // Eat includeonly/noinclude at start of line, so that start-of-line |
357 | 358 | // syntax after it still matches |
— | — | @@ -365,7 +366,7 @@ |
366 | 367 | } |
367 | 368 | } |
368 | 369 | |
369 | | - return [{type: 'NEWLINE'}].concat(cn, niToken); |
| 370 | + return nl.concat(cn, niToken); |
370 | 371 | } |
371 | 372 | |
372 | 373 | eof = & { return isEOF(pos); } { return true; } |
— | — | @@ -402,13 +403,13 @@ |
403 | 404 | / bt:block_tag { return [bt] } // avoid a paragraph if we know that the line starts with a block tag |
404 | 405 | / para |
405 | 406 | / inlineline // includes generic tags; wrapped into paragraphs in DOM postprocessor |
406 | | - / s:sol { |
| 407 | + / s:sol /*{ |
407 | 408 | if (s) { |
408 | 409 | return [s, {type: 'NEWLINE'}]; |
409 | 410 | } else { |
410 | 411 | return [{type: 'NEWLINE'}]; |
411 | 412 | } |
412 | | - } |
| 413 | + }*/ |
413 | 414 | |
414 | 415 | block_lines |
415 | 416 | = s:sol |
— | — | @@ -422,7 +423,8 @@ |
423 | 424 | // Block structures with start-of-line wiki syntax |
424 | 425 | block_line |
425 | 426 | = h |
426 | | - / table |
| 427 | + /// table |
| 428 | + / & [{}|] tl:table_lines { return tl; } |
427 | 429 | / lists |
428 | 430 | // tag-only lines should not trigger pre |
429 | 431 | / st:optionalSpaceToken |
— | — | @@ -450,7 +452,10 @@ |
451 | 453 | return true; |
452 | 454 | } |
453 | 455 | & { return syntaxFlags['table']; } |
454 | | - a:(newline [!|] / '||' / '!!' / '|}') { dp("table break" + pp(a) + pos); return true; } |
| 456 | + ( a:(newline [!|] / '||' / '!!' / '|}') { dp("table break" + pp(a) + pos); return true; } |
| 457 | + / & { return syntaxFlags['tableCellArg'] } |
| 458 | + "|" { return true } |
| 459 | + ) |
455 | 460 | / & { return (syntaxFlags['colon'] && |
456 | 461 | ! syntaxFlags.extlink && // example: ; [[Link:Term]] : Definition |
457 | 462 | ! syntaxFlags.linkdesc); } ":" { return true; } |
— | — | @@ -910,11 +915,15 @@ |
911 | 916 | } |
912 | 917 | |
913 | 918 | generic_attribute_value |
914 | | - = "=" space* v:att_value {return v} |
| 919 | + = "=" space* v:att_value { |
| 920 | + return v; |
| 921 | + } |
915 | 922 | |
916 | 923 | // XXX: attributes can contain templates and template args!! |
917 | 924 | att_value |
918 | | - = t:[^ \t'"<>='\n]+ { return t.join(''); } |
| 925 | + = t:(!inline_breaks c:[^ \t'"<>='\n] { return c } )+ { |
| 926 | + return t.join(''); |
| 927 | + } |
919 | 928 | // XXX: is "\"" also valid html? or just Wikitext? |
920 | 929 | / "'" t:[^'>]* "'" { return unquote("'", t.join('')); } |
921 | 930 | / '"' t:[^">]* '"' { return unquote('"', t.join('')); } |
— | — | @@ -972,8 +981,125 @@ |
973 | 982 | |
974 | 983 | list_char = [*#:;] |
975 | 984 | |
| 985 | +/* split up table handling into individual tokens */ |
976 | 986 | |
977 | | -/* Tables */ |
| 987 | +table_lines |
| 988 | + = & { return setFlag('table'); } |
| 989 | + tl:table_line |
| 990 | + tls:( s:sol tl2:table_line { return s.concat(tl2); } )* { |
| 991 | + clearFlag('table'); |
| 992 | + //console.log('table_lines: ' + pp(tl.concat(tls))); |
| 993 | + return tl.concat( tls ); |
| 994 | + } |
| 995 | + / & { return clearFlag('table'); } |
| 996 | + |
| 997 | +// Make sure this is only active at start-of-line! |
| 998 | +table_line |
| 999 | + = table_start_tag |
| 1000 | + / table_row_tag |
| 1001 | + / table_data_tags |
| 1002 | + / table_heading_tags |
| 1003 | + / table_caption_tag |
| 1004 | + / table_end_tag |
| 1005 | + |
| 1006 | +table_start_tag |
| 1007 | + = "{|" |
| 1008 | + ta:generic_attribute* |
| 1009 | + space* |
| 1010 | + te:table_end_tag? // can occur somewhere in the middle of the line too |
| 1011 | + { |
| 1012 | + var tok = [{type: 'TAG', name: 'table'}]; |
| 1013 | + if ( ta ) |
| 1014 | + tok[0].attribs = ta; |
| 1015 | + if ( te ) |
| 1016 | + tok = tok.concat(te); |
| 1017 | + return tok; |
| 1018 | + } |
| 1019 | + |
| 1020 | +table_caption_tag |
| 1021 | + = "|+" |
| 1022 | + c:inline* { |
| 1023 | + return [{type: 'TAG', name: 'caption'}] |
| 1024 | + .concat( c, [{type: 'ENDTAG', name: 'caption'}]); |
| 1025 | + } |
| 1026 | + |
| 1027 | + |
| 1028 | +// Only add table rows directly to start-of-line productions, so we get |
| 1029 | +// implicit rows! |
| 1030 | +table_row_tag |
| 1031 | + = //& { console.log("table row enter"); return true; } |
| 1032 | + "|-" |
| 1033 | + a:generic_attribute* |
| 1034 | + space* |
| 1035 | + { |
| 1036 | + // We rely on our tree builder to close the row as needed. This is |
| 1037 | + // needed to support building tables from fragment templates with |
| 1038 | + // individual cells or rows. |
| 1039 | + return [{type: 'TAG', name: 'tr', attribs: a}]; |
| 1040 | + } |
| 1041 | + |
| 1042 | +table_data_tags |
| 1043 | + = //("|" / !'!') // move the missing-'|' case to table_row_tag! |
| 1044 | + "|" |
| 1045 | + td:table_data_tag |
| 1046 | + tds:( "||" tdt:table_data_tag { return tdt } )* { |
| 1047 | + return td.concat(tds); |
| 1048 | + } |
| 1049 | + |
| 1050 | +table_data_tag |
| 1051 | + = //& { dp("table_data enter, pos=" + pos + input.substr(pos,10)); return true; } |
| 1052 | + ! [}+-] |
| 1053 | + //& { dp('before attrib, pos=' + pos); return true; } |
| 1054 | + a:table_cell_args? |
| 1055 | + //& { dp('past attrib, pos=' + pos); return true; } |
| 1056 | + //& { console.log("past attrib, pos=" + pos + input.substr(pos,10)); return true; } |
| 1057 | + // use inline_breaks to break on tr etc |
| 1058 | + td:( !inline_breaks |
| 1059 | + //& { dp("table_data 2, pos=" + pos + input.substr(pos,10)); return true; } |
| 1060 | + b:block { return b } )* |
| 1061 | + { |
| 1062 | + if ( a == '' ) { |
| 1063 | + a = []; |
| 1064 | + } |
| 1065 | + //dp("table data result: " + pp(td) + ", attribts: " + pp(a)); |
| 1066 | + return [{ type: 'TAG', name: 'td', attribs: a}] |
| 1067 | + .concat( td, [{type: 'ENDTAG', name: 'td'}] ); |
| 1068 | + } |
| 1069 | + |
| 1070 | +table_heading_tags |
| 1071 | + = "!" |
| 1072 | + th:table_heading_tag |
| 1073 | + ths:( "!!" tht:table_heading_tag { return tht } )* { |
| 1074 | + return th.concat(ths); |
| 1075 | + } |
| 1076 | + |
| 1077 | +table_heading_tag |
| 1078 | + = a:table_cell_args? |
| 1079 | + c:inline { |
| 1080 | + if ( a == '' ) { |
| 1081 | + a = []; |
| 1082 | + } |
| 1083 | + return [{type: 'TAG', name: 'th', attribs: a}] |
| 1084 | + .concat( c, [{type: 'ENDTAG', name: 'th'}] ); |
| 1085 | + } |
| 1086 | + |
| 1087 | +table_end_tag |
| 1088 | + = "|}" { |
| 1089 | + var tok = [{type: 'ENDTAG', name: 'table'}]; |
| 1090 | + return tok; |
| 1091 | + } |
| 1092 | + |
| 1093 | +table_cell_args |
| 1094 | + = & { return setFlag('tableCellArg'); } |
| 1095 | + as:generic_attribute+ space* "|" !"|" { |
| 1096 | + clearFlag('tableCellArg'); |
| 1097 | + return as; |
| 1098 | + } |
| 1099 | + / & { return clearFlag('tableCellArg'); } |
| 1100 | + |
| 1101 | + |
| 1102 | + |
| 1103 | +/* Tables, full-table version (no support for table fragments from templates) */ |
978 | 1104 | table |
979 | 1105 | = tas:table_start space* c:table_caption? b:table_body? te:table_end { |
980 | 1106 | var res = {type: 'TAG', name: 'table'} |
— | — | @@ -1009,9 +1135,6 @@ |
1010 | 1136 | / & { clearFlag('table'); return false; } { return null; } |
1011 | 1137 | ) { return res } |
1012 | 1138 | |
1013 | | -table_attribs |
1014 | | - = text / ! inline_breaks !newline ![|] c:. { return c } |
1015 | | - |
1016 | 1139 | table_caption |
1017 | 1140 | = n:newlineToken |
1018 | 1141 | "|+" c:inline* { |
— | — | @@ -1040,7 +1163,10 @@ |
1041 | 1164 | table_row |
1042 | 1165 | = //& { dp("table row enter"); return true; } |
1043 | 1166 | n:newlineToken |
1044 | | - "|-" thtd_attribs? space* td:(table_data / table_heading)* { |
| 1167 | + "|-" |
| 1168 | + a:(as:generic_attribute+ space* "|" !"|" { return as } )? |
| 1169 | + space* |
| 1170 | + td:(table_data / table_heading)* { |
1045 | 1171 | return n.concat([{type: 'TAG', name: 'tr'}] |
1046 | 1172 | , td, [{type: 'ENDTAG', name: 'tr'}]); |
1047 | 1173 | } |