Index: trunk/extensions/VisualEditor/tests/parser/parserTests.js |
— | — | @@ -11,7 +11,7 @@ |
12 | 12 | */ |
13 | 13 | |
14 | 14 | (function() { |
15 | | -"use strict"; |
| 15 | +//"use strict"; |
16 | 16 | |
17 | 17 | var fs = require('fs'), |
18 | 18 | path = require('path'), |
— | — | @@ -54,7 +54,8 @@ |
55 | 55 | _import(pj('parser', 'mediawiki.parser.environment.js'), ['MWParserEnvironment']); |
56 | 56 | _import(pj('parser', 'ext.cite.taghook.ref.js'), ['MWRefTagHook']); |
57 | 57 | |
58 | | -_require(pj('parser', 'mediawiki.html5TokenEmitter.js')); |
| 58 | +_import(pj('parser', 'mediawiki.html5TokenEmitter.js'), ['FauxHTML5']); |
| 59 | +_import(pj('parser', 'mediawiki.DOMPostProcessor.js'), ['DOMPostProcessor']); |
59 | 60 | |
60 | 61 | // WikiDom and serializers |
61 | 62 | _require(pj('es', 'es.js')); |
— | — | @@ -131,21 +132,25 @@ |
132 | 133 | return $('<div>').append(node).html(); |
133 | 134 | } |
134 | 135 | |
| 136 | +var htmlparser = new HTML5.Parser(); |
| 137 | + |
135 | 138 | /* Normalize the expected parser output by parsing it using a HTML5 parser and |
136 | 139 | * re-serializing it to HTML. Ideally, the parser would normalize inter-tag |
137 | 140 | * whitespace for us. For now, we fake that by simply stripping all newlines. |
138 | 141 | */ |
139 | 142 | function normalizeHTML(source) { |
140 | | - var parser = new HTML5.Parser(); |
141 | 143 | // TODO: Do not strip newlines in pre and nowiki blocks! |
142 | 144 | source = source.replace(/\n/g, ''); |
143 | 145 | try { |
144 | | - parser.parse('<body>' + source + '</body>'); |
145 | | - return parser.document |
| 146 | + htmlparser.parse('<body>' + source + '</body>'); |
| 147 | + return htmlparser.document |
146 | 148 | .getElementsByTagName('body')[0] |
147 | | - .innerHTML; |
| 149 | + .innerHTML |
| 150 | + // a few things we ignore for now.. |
| 151 | + .replace(/(title|class|rel)="[^"]+"/g, ''); |
148 | 152 | } catch(e) { |
149 | | - console.log("normalizeHTML failed:" + e); |
| 153 | + console.log("normalizeHTML failed on" + |
| 154 | + source + " with the following error: " + e); |
150 | 155 | console.trace(); |
151 | 156 | return source; |
152 | 157 | } |
— | — | @@ -172,7 +177,8 @@ |
173 | 178 | failOutputTests = 0; |
174 | 179 | |
175 | 180 | function processTest(item) { |
176 | | - var tokenizer = new FauxHTML5.Tokenizer(); |
| 181 | + var tokenizer = new FauxHTML5.Tokenizer(), |
| 182 | + postProcessor = new DOMPostProcessor(); |
177 | 183 | if (!('title' in item)) { |
178 | 184 | console.log(item); |
179 | 185 | throw new Error('Missing title from test case.'); |
— | — | @@ -208,7 +214,15 @@ |
209 | 215 | }); |
210 | 216 | //var res = es.HtmlSerializer.stringify(tokens,environment); |
211 | 217 | //console.log(JSON.stringify(tokens)); |
| 218 | + |
| 219 | + // Build a DOM tree from tokens using the HTML tree |
| 220 | + // builder/parser. |
212 | 221 | processTokens(tokens, tokenizer); |
| 222 | + |
| 223 | + // Perform post-processing on DOM. |
| 224 | + postProcessor.doPostProcess(tokenizer.parser.document); |
| 225 | + |
| 226 | + // And serialize the result. |
213 | 227 | var out = tokenizer.parser.document |
214 | 228 | .getElementsByTagName('body')[0] |
215 | 229 | .innerHTML; |
— | — | @@ -217,7 +231,12 @@ |
218 | 232 | printTitle(); |
219 | 233 | failTreeTests++; |
220 | 234 | console.log('RENDER FAIL', err); |
221 | | - } else if ( normalizeOut(out) !== normalizeHTML(item.result) ) { |
| 235 | + return; |
| 236 | + } |
| 237 | + |
| 238 | + var normalizedOut = normalizeOut(out); |
| 239 | + var normalizedExpected = normalizeHTML(item.result); |
| 240 | + if ( normalizedOut !== normalizedExpected ) { |
222 | 241 | printTitle(); |
223 | 242 | failOutputTests++; |
224 | 243 | console.log('RAW EXPECTED:'); |
— | — | @@ -226,12 +245,12 @@ |
227 | 246 | console.log('RAW RENDERED:'); |
228 | 247 | console.log(formatHTML(out) + "\n"); |
229 | 248 | |
230 | | - var a = formatHTML(normalizeHTML( item.result )); |
| 249 | + var a = formatHTML(normalizedExpected); |
231 | 250 | |
232 | 251 | console.log('NORMALIZED EXPECTED:'); |
233 | 252 | console.log(a + "\n"); |
234 | 253 | |
235 | | - var b = formatHTML(normalizeOut( out )); |
| 254 | + var b = formatHTML(normalizedOut); |
236 | 255 | |
237 | 256 | console.log('NORMALIZED RENDERED:') |
238 | 257 | console.log(formatHTML(normalizeOut(out)) + "\n"); |
— | — | @@ -241,7 +260,7 @@ |
242 | 261 | console.log(patch.replace(/^[^\n]*\n[^\n]*\n[^\n]*\n[^\n]*\n/, '')); |
243 | 262 | } else { |
244 | 263 | passedTests++; |
245 | | - console.log( 'PASS: ' + item.title ); |
| 264 | + console.log( 'PASSED: ' + item.title ); |
246 | 265 | } |
247 | 266 | } |
248 | 267 | }); |
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.html5TokenEmitter.js |
— | — | @@ -3,16 +3,16 @@ |
4 | 4 | |
5 | 5 | var HTML5 = require('./html5/index'); |
6 | 6 | |
7 | | -FauxHTML5 = {} |
| 7 | +FauxHTML5 = {}; |
8 | 8 | |
9 | 9 | |
10 | 10 | FauxHTML5.Tokenizer = function ( ) { |
11 | 11 | this.parser = new HTML5.Parser(); |
12 | 12 | this.parser.parse(this); |
13 | 13 | return this; |
14 | | -} |
| 14 | +}; |
15 | 15 | |
16 | | -FauxHTML5.Tokenizer.prototype = new events.EventEmitter; |
| 16 | +FauxHTML5.Tokenizer.prototype = new events.EventEmitter(); |
17 | 17 | |
18 | 18 | FauxHTML5.Tokenizer.prototype.processToken = function (token) { |
19 | 19 | var att = function (maybeAttribs) { |
— | — | @@ -43,7 +43,7 @@ |
44 | 44 | data: att(token.attribs)}); |
45 | 45 | break; |
46 | 46 | case "SELFCLOSINGTAG": |
47 | | - this.emit('token', {type: 'EmptyTag', |
| 47 | + this.emit('token', {type: 'StartTag', |
48 | 48 | name: token.name, |
49 | 49 | data: att(token.attribs)}); |
50 | 50 | break; |
— | — | @@ -61,4 +61,8 @@ |
62 | 62 | console.log("Unhandled token: " + JSON.stringify(token)); |
63 | 63 | break; |
64 | 64 | } |
| 65 | +}; |
| 66 | + |
| 67 | +if (typeof module == "object") { |
| 68 | + module.exports.FauxHTML5 = FauxHTML5; |
65 | 69 | } |
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.DOMPostProcessor.js |
— | — | @@ -0,0 +1,92 @@ |
| 2 | +/* Perform post-processing steps on an already-built HTML DOM. */ |
| 3 | + |
| 4 | +var isBlock = function isBlock (name) { |
| 5 | + switch (name.toLowerCase()) { |
| 6 | + case 'div': |
| 7 | + case 'table': |
| 8 | + case 'td': |
| 9 | + case 'tr': |
| 10 | + case 'tbody': |
| 11 | + case 'p': |
| 12 | + case 'ul': |
| 13 | + case 'ol': |
| 14 | + case 'li': |
| 15 | + case 'dl': |
| 16 | + case 'dt': |
| 17 | + case 'dd': |
| 18 | + case 'img': // hmm! |
| 19 | + case 'pre': |
| 20 | + case 'center': |
| 21 | + case 'blockquote': |
| 22 | + return true; |
| 23 | + default: |
| 24 | + return false; |
| 25 | + } |
| 26 | +}; |
| 27 | + |
| 28 | +var process_inlines_in_p = function ( document ) { |
| 29 | + // document.body does not always work in jsdom |
| 30 | + var body = document.getElementsByTagName('body')[0], |
| 31 | + children = body.cloneNode(false), |
| 32 | + cnodes = body.childNodes, |
| 33 | + inlineStack = []; |
| 34 | + |
| 35 | + function wrapInlines (inlines) { |
| 36 | + var newp = document.createElement('p'); |
| 37 | + for(var i = 0, length = inlines.length; i < length; i++) { |
| 38 | + newp.appendChild(inlines[i]); |
| 39 | + } |
| 40 | + body.appendChild(newp); |
| 41 | + inlineStack = []; |
| 42 | + } |
| 43 | + var i, |
| 44 | + length = cnodes.length; |
| 45 | + // Clear body |
| 46 | + for(i = 0; i < length; i++) { |
| 47 | + var cnode = body.firstChild; |
| 48 | + children.appendChild(cnode); |
| 49 | + } |
| 50 | + |
| 51 | + function isElementContentWhitespace ( e ) { |
| 52 | + return (e.data.match(/^[ \r\n\t]*$/) !== null); |
| 53 | + } |
| 54 | + |
| 55 | + // Now re-append all block elements and inline elements wrapped in |
| 56 | + // paragraphs. |
| 57 | + for(i = 0; i < length; i++) { |
| 58 | + var child = children.firstChild, |
| 59 | + ctype = child.nodeType; |
| 60 | + //console.log(child + ctype); |
| 61 | + if ((ctype === 3 && (inlineStack.length || !isElementContentWhitespace(child))) || |
| 62 | + (ctype !== 3 && // text |
| 63 | + ctype !== 8 && // comment |
| 64 | + !isBlock(child.nodeName))) { |
| 65 | + // text node |
| 66 | + inlineStack.push(child); |
| 67 | + } else if (inlineStack.length) { |
| 68 | + wrapInlines(inlineStack); |
| 69 | + body.appendChild(child); |
| 70 | + } else { |
| 71 | + body.appendChild(child); |
| 72 | + } |
| 73 | + } |
| 74 | + |
| 75 | + if (inlineStack.length) { |
| 76 | + wrapInlines(inlineStack); |
| 77 | + } |
| 78 | +}; |
| 79 | + |
| 80 | +function DOMPostProcessor () { |
| 81 | + this.processors = [process_inlines_in_p]; |
| 82 | +} |
| 83 | + |
| 84 | +DOMPostProcessor.prototype.doPostProcess = function ( document ) { |
| 85 | + for(var i = 0; i < this.processors.length; i++) { |
| 86 | + this.processors[i](document); |
| 87 | + } |
| 88 | +}; |
| 89 | + |
| 90 | + |
| 91 | +if (typeof module == "object") { |
| 92 | + module.exports.DOMPostProcessor = DOMPostProcessor; |
| 93 | +} |
Property changes on: trunk/extensions/VisualEditor/modules/parser/mediawiki.DOMPostProcessor.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 94 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/treebuilder.js |
— | — | @@ -1,4 +1,4 @@ |
2 | | -"use strict"; |
| 2 | +//"use strict"; |
3 | 3 | |
4 | 4 | var HTML5 = require('../html5'); |
5 | 5 | var assert = require('assert'); |
Index: trunk/extensions/VisualEditor/modules/parser/html5/serializer.js |
— | — | @@ -1,4 +1,4 @@ |
2 | | -"use strict"; |
| 2 | +//"use strict"; |
3 | 3 | var HTML5 = require('../html5'); |
4 | 4 | var events = require('events'); |
5 | 5 | |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser.js |
— | — | @@ -1,4 +1,4 @@ |
2 | | -"use strict"; |
| 2 | +//"use strict"; |
3 | 3 | |
4 | 4 | var HTML5 = exports.HTML5 = require('../html5'); |
5 | 5 | |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_head_phase.js |
— | — | @@ -1,4 +1,4 @@ |
2 | | -"use strict"; |
| 2 | +//"use strict"; |
3 | 3 | var Phase = require('./phase').Phase; |
4 | 4 | var HTML5 = require('../../html5'); |
5 | 5 | |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/after_head_phase.js |
— | — | @@ -1,4 +1,4 @@ |
2 | | -"use strict"; |
| 2 | +//"use strict"; |
3 | 3 | var Phase = require('./phase').Phase; |
4 | 4 | var HTML5 = require('../../html5'); |
5 | 5 | |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_body_phase.js |
— | — | @@ -1,4 +1,4 @@ |
2 | | -"use strict"; |
| 2 | +//"use strict"; |
3 | 3 | var HTML5 = require('../../html5'); |
4 | 4 | var Phase = require('./phase').Phase; |
5 | 5 | var assert = require('assert') |
— | — | @@ -599,7 +599,7 @@ |
600 | 600 | } |
601 | 601 | |
602 | 602 | p.prototype.endTagHeading = function(name) { |
603 | | - for(i in HTML5.HEADING_ELEMENTS) { |
| 603 | + for(var i in HTML5.HEADING_ELEMENTS) { |
604 | 604 | var el = HTML5.HEADING_ELEMENTS[i]; |
605 | 605 | if(this.inScope(el)) { |
606 | 606 | this.tree.generateImpliedEndTags(); |
— | — | @@ -610,7 +610,7 @@ |
611 | 611 | if(this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() != name) |
612 | 612 | this.parse_error('end-tag-too-early', {name: name}); |
613 | 613 | |
614 | | - for(i in HTML5.HEADING_ELEMENTS) { |
| 614 | + for(var i in HTML5.HEADING_ELEMENTS) { |
615 | 615 | var el = HTML5.HEADING_ELEMENTS[i]; |
616 | 616 | if(this.inScope(el)) { |
617 | 617 | this.tree.remove_open_elements_until(function(e) { |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/before_head_phase.js |
— | — | @@ -1,4 +1,4 @@ |
2 | | -"use strict"; |
| 2 | +//"use strict"; |
3 | 3 | var Phase = require('./phase').Phase; |
4 | 4 | |
5 | 5 | var start_tag_handlers = { |
Index: trunk/extensions/VisualEditor/modules/parser/pegParser.pegjs.txt |
— | — | @@ -403,9 +403,9 @@ |
404 | 404 | = block_lines |
405 | 405 | / pre |
406 | 406 | / comment &eolf |
| 407 | + / pre |
407 | 408 | / para |
408 | | - / pre |
409 | | - / block_tag // TODO: handle nesting of inline content for these! |
| 409 | + / inline // includes generic tags; wrapped into paragraphs in DOM postprocessor |
410 | 410 | / (s:sol { |
411 | 411 | if (s) { |
412 | 412 | return [s, {type: 'NEWLINE'}]; |
— | — | @@ -511,9 +511,8 @@ |
512 | 512 | |
513 | 513 | // TODO: convert inline content to annotations! |
514 | 514 | para |
515 | | - = (sol br)? pl:para_line pls:(!block_lines para_line)* { |
516 | | - return [{type: 'TAG', name: 'p'}] |
517 | | - .concat([pl], pls, [{type: 'ENDTAG', name: 'p'}]); |
| 515 | + = s1:sol s2:sol c:inlineline { |
| 516 | + return s1.concat(s2, [{type: 'TAG', name: 'p'}], c); |
518 | 517 | } |
519 | 518 | |
520 | 519 | para_line |
— | — | @@ -795,6 +794,7 @@ |
796 | 795 | generic_tag |
797 | 796 | = "<" end:"/"? name:[0-9a-zA-Z]+ |
798 | 797 | attribs:generic_attribute* |
| 798 | + space* |
799 | 799 | selfclose:"/"? |
800 | 800 | ">" { |
801 | 801 | var res = {name: name.join(''), attribs: attribs}; |
— | — | @@ -970,6 +970,13 @@ |
971 | 971 | } else { |
972 | 972 | var dtbullets = bullets.slice(0, bullets.length - 1); |
973 | 973 | dtbullets.push(':'); |
| 974 | + |
| 975 | + // convert trailing space into |
| 976 | + var clen = c.length; |
| 977 | + if (clen && c[clen - 1].type === 'TEXT' && c[clen - 1].value == ' ') { |
| 978 | + c[clen - 1].value = "\u00a0"; |
| 979 | + } |
| 980 | + |
974 | 981 | return [ { type: 'TAG', name: 'listItem', bullets: bullets } ] |
975 | 982 | .concat( c |
976 | 983 | ,[{ type: 'TAG', name: 'listItem', bullets: dtbullets } ] |