Index: trunk/extensions/VisualEditor/tests/parser/parserTests.js |
— | — | @@ -63,6 +63,8 @@ |
64 | 64 | _import(pj('parser', 'mediawiki.HTML5TreeBuilder.node.js'), ['FauxHTML5']); |
65 | 65 | _import(pj('parser', 'mediawiki.DOMPostProcessor.js'), ['DOMPostProcessor']); |
66 | 66 | |
| 67 | +_import(pj('parser', 'mediawiki.DOMConverter'), ['DOMConverter']); |
| 68 | + |
67 | 69 | _import(pj('parser', 'ext.core.QuoteTransformer.js'), ['QuoteTransformer']); |
68 | 70 | |
69 | 71 | _import(pj('parser', 'ext.Cite.js'), ['Cite']); |
— | — | @@ -121,6 +123,11 @@ |
122 | 124 | description: 'Print out a whitelist entry for failing tests. Default false.', |
123 | 125 | default: false, |
124 | 126 | boolean: true |
| 127 | + }, |
| 128 | + 'wikidom': { |
| 129 | + description: 'Print out a WikiDom conversion of the HTML DOM', |
| 130 | + default: false, |
| 131 | + boolean: true |
125 | 132 | } |
126 | 133 | } |
127 | 134 | ).check( function(argv) { |
— | — | @@ -181,6 +188,8 @@ |
182 | 189 | |
183 | 190 | this.postProcessor = new DOMPostProcessor(); |
184 | 191 | |
| 192 | + this.DOMConverter = new DOMConverter(); |
| 193 | + |
185 | 194 | var pt = this; |
186 | 195 | |
187 | 196 | // Set up the TokenTransformDispatcher with a callback for the remaining |
— | — | @@ -196,13 +205,18 @@ |
197 | 206 | pt.buildTree( tokens, treeBuilder ); |
198 | 207 | |
199 | 208 | // Perform post-processing on DOM. |
200 | | - pt.postProcessor.doPostProcess(treeBuilder.parser.document); |
| 209 | + pt.postProcessor.doPostProcess(treeBuilder.document); |
201 | 210 | |
202 | 211 | // And serialize the result. |
203 | 212 | var out = treeBuilder.document.body.innerHTML; |
204 | 213 | |
205 | 214 | // Finally, check the result vs. the expected result. |
206 | 215 | pt.checkResult( pt.currentItem, out ); |
| 216 | + |
| 217 | + if ( pt.argv.wikidom ) { |
| 218 | + // Test HTML DOM -> WikiDOM conversion |
| 219 | + pt.printWikiDom( treeBuilder.document.body ); |
| 220 | + } |
207 | 221 | }); |
208 | 222 | |
209 | 223 | // Add token transformations.. |
— | — | @@ -225,8 +239,6 @@ |
226 | 240 | } |
227 | 241 | |
228 | 242 | |
229 | | - |
230 | | - |
231 | 243 | /** |
232 | 244 | * Get an object holding our tests cases. Eventually from a cache file |
233 | 245 | */ |
— | — | @@ -520,6 +532,21 @@ |
521 | 533 | }; |
522 | 534 | |
523 | 535 | |
| 536 | +/** |
| 537 | + * Print out a WikiDom conversion of the HTML DOM |
| 538 | + */ |
| 539 | +ParserTests.prototype.printWikiDom = function ( body ) { |
| 540 | + console.log('WikiDom'.cyan + ':'); |
| 541 | + console.log( |
| 542 | + JSON.stringify( |
| 543 | + this.DOMConverter.HTMLtoWiki(body), |
| 544 | + null, |
| 545 | + 2 |
| 546 | + ) + "\n" |
| 547 | + ); |
| 548 | +}; |
| 549 | + |
| 550 | + |
524 | 551 | ParserTests.prototype.buildTree = function ( tokens, treeBuilder ) { |
525 | 552 | // push a body element, just to be sure to have one |
526 | 553 | treeBuilder.processToken({type: 'TAG', name: 'body'}); |
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.HTML5TreeBuilder.node.js |
— | — | @@ -62,8 +62,8 @@ |
63 | 63 | break; |
64 | 64 | case "END": |
65 | 65 | this.emit('end'); |
| 66 | + this.document = this.parser.document; |
66 | 67 | // HACK: This should not be needed really. |
67 | | - this.document = this.parser.document; |
68 | 68 | this.document.body = this.document.getElementsByTagName('body')[0]; |
69 | 69 | break; |
70 | 70 | case "NEWLINE": |
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.DOMPostProcessor.js |
— | — | @@ -30,12 +30,18 @@ |
31 | 31 | } |
32 | 32 | }; |
33 | 33 | |
| 34 | +// Quick HACK: define Node constants |
| 35 | +// https://developer.mozilla.org/en/nodeType |
| 36 | +var Node = { |
| 37 | + TEXT_NODE: 3, |
| 38 | + COMMENT_NODE: 8 |
| 39 | +}; |
| 40 | + |
34 | 41 | // Wrap all top-level inline elements in paragraphs. This should also be |
35 | 42 | // applied inside block-level elements, but in that case the first paragraph |
36 | 43 | // usually remains plain inline. |
37 | 44 | var process_inlines_in_p = function ( document ) { |
38 | | - // document.body does not always work in jsdom, so work around it. |
39 | | - var body = document.getElementsByTagName('body')[0], |
| 45 | + var body = document.body, |
40 | 46 | newP = document.createElement('p'), |
41 | 47 | cnodes = body.childNodes, |
42 | 48 | haveInlines = false, |
— | — | @@ -50,8 +56,8 @@ |
51 | 57 | ctype = child.nodeType; |
52 | 58 | //console.log(child + ctype); |
53 | 59 | if ((ctype === 3 && (haveInlines || !isElementContentWhitespace(child))) || |
54 | | - (ctype !== 3 && // text |
55 | | - ctype !== 8 && // comment |
| 60 | + (ctype !== Node.TEXT_NODE && |
| 61 | + ctype !== Node.COMMENT_NODE && |
56 | 62 | !isBlock(child.nodeName))) { |
57 | 63 | // text node |
58 | 64 | newP.appendChild(child); |
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.DOMConverter.js |
— | — | @@ -0,0 +1,306 @@ |
| 2 | +/** |
| 3 | + * Conversions between HTML DOM and WikiDom |
| 4 | + * |
| 5 | + * @class |
| 6 | + * @constructor |
| 7 | + */ |
| 8 | +function DOMConverter () { |
| 9 | +} |
| 10 | + |
| 11 | +// Quick HACK: define Node constants |
| 12 | +// https://developer.mozilla.org/en/nodeType |
| 13 | +var Node = { |
| 14 | + ELEMENT_NODE: 1, |
| 15 | + ATTRIBUTE_NODE: 2, |
| 16 | + TEXT_NODE: 3, |
| 17 | + CDATA_SECTION_NODE: 4, |
| 18 | + ENTITY_REFERENCE_NODE: 5, |
| 19 | + ENTITY_NODE: 6, |
| 20 | + PROCESSING_INSTRUCTION_NODE: 7, |
| 21 | + COMMENT_NODE: 8, |
| 22 | + DOCUMENT_NODE: 9, |
| 23 | + DOCUMENT_TYPE_NODE: 10, |
| 24 | + DOCUMENT_FRAGMENT_NODE: 11, |
| 25 | + NOTATION_NODE: 12 |
| 26 | +}; |
| 27 | + |
| 28 | +DOMConverter.prototype.getHTMLHandlerInfo = function ( nodeName ) { |
| 29 | + switch ( nodeName.toLowerCase() ) { |
| 30 | + case 'p': |
| 31 | + return { |
| 32 | + handler: this._convertHTMLLeaf, |
| 33 | + type: 'paragraph' |
| 34 | + }; |
| 35 | + case 'li': |
| 36 | + case 'dl': |
| 37 | + case 'dd': |
| 38 | + return { |
| 39 | + handler: this._convertHTMLLeaf, |
| 40 | + type: 'listItem' |
| 41 | + }; |
| 42 | + case 'pre': |
| 43 | + return { |
| 44 | + handler: this._convertHTMLLeaf, |
| 45 | + type: 'pre' |
| 46 | + }; |
| 47 | + case 'ul': |
| 48 | + case 'ol': |
| 49 | + case 'dl': |
| 50 | + return { |
| 51 | + handler: this._convertHTMLBranch, |
| 52 | + type: 'list' |
| 53 | + }; |
| 54 | + default: |
| 55 | + console.log( 'HTML to Wiki DOM conversion error. Unsupported node name ' + |
| 56 | + nodeName ); |
| 57 | + return { |
| 58 | + handler: this._convertHTMLBranch, |
| 59 | + type: nodeName.toLowerCase() |
| 60 | + }; |
| 61 | + break; |
| 62 | + } |
| 63 | +}; |
| 64 | + |
| 65 | +DOMConverter.prototype.getHTMLAnnotationType = function ( nodeName ) { |
| 66 | + switch ( nodeName.toLowerCase() ) { |
| 67 | + case 'i': |
| 68 | + return 'textStyle/italic'; |
| 69 | + case 'b': |
| 70 | + return 'textStyle/bold'; |
| 71 | + case 'span': |
| 72 | + return 'textStyle/span'; |
| 73 | + case 'a': |
| 74 | + return 'link/unknown'; // XXX: distinguish internal / external etc |
| 75 | + default: |
| 76 | + console.log( 'HTML to Wiki DOM conversion error. Unsupported html annotation ' + |
| 77 | + nodeName ); |
| 78 | + return undefined; |
| 79 | + break; |
| 80 | + } |
| 81 | +}; |
| 82 | + |
| 83 | +/** |
| 84 | + * Convert a HTML DOM to WikiDom |
| 85 | + * |
| 86 | + * @method |
| 87 | + * @param {Object} root of HTML DOM (usually the body element) |
| 88 | + * @returns {Object} WikiDom version |
| 89 | + */ |
| 90 | +DOMConverter.prototype.HTMLtoWiki = function ( node ) { |
| 91 | + var children = node.childNodes, |
| 92 | + out = { |
| 93 | + type: 'document', |
| 94 | + children: [] |
| 95 | + }; |
| 96 | + for ( var i = 0, l = children.length; i < l; i++ ) { |
| 97 | + var cnode = children[i]; |
| 98 | + switch ( cnode.nodeType ) { |
| 99 | + case Node.ELEMENT_NODE: |
| 100 | + // Call a handler for the particular node type |
| 101 | + var hi = this.getHTMLHandlerInfo( cnode.nodeName ); |
| 102 | + var res = hi.handler.call(this, cnode, 0, hi.type ); |
| 103 | + out.children.push( res.node ); |
| 104 | + break; |
| 105 | + case Node.TEXT_NODE: |
| 106 | + // Add text as content, and increment offset |
| 107 | + // BUT: Should not appear at toplevel! |
| 108 | + break; |
| 109 | + case Node.COMMENT_NODE: |
| 110 | + // Add a comment annotation to which text? Not clear how this |
| 111 | + // can be represented in WikiDom. |
| 112 | + break; |
| 113 | + default: |
| 114 | + console.log( "HTML to Wiki DOM conversion error. Unhandled node type " + |
| 115 | + cnode.innerHTML ); |
| 116 | + break; |
| 117 | + } |
| 118 | + } |
| 119 | + return out; |
| 120 | +}; |
| 121 | + |
| 122 | +/** |
| 123 | + * Private HTML branch node handler |
| 124 | + * |
| 125 | + * @param {Object} HTML DOM element |
| 126 | + * @param {Int} WikiDom offset within a block |
| 127 | + * @returns {Object} WikiDom object |
| 128 | + */ |
| 129 | +DOMConverter.prototype._convertHTMLBranch = function ( node, offset, type ) { |
| 130 | + var children = node.childNodes, |
| 131 | + wnode = { |
| 132 | + type: type, |
| 133 | + attributes: this._HTMLPropertiesToWikiAttributes( node ), |
| 134 | + children: [] |
| 135 | + }; |
| 136 | + for ( var i = 0, l = children.length; i < l; i++ ) { |
| 137 | + var cnode = children[i]; |
| 138 | + switch ( cnode.nodeType ) { |
| 139 | + case Node.ELEMENT_NODE: |
| 140 | + // Call a handler for the particular node type |
| 141 | + var hi = this.getHTMLHandlerInfo( cnode.nodeName ); |
| 142 | + var res = hi.handler.call(this, cnode, offset + 1, hi.type ); |
| 143 | + wnode.children.push( res.node ); |
| 144 | + offset = res.offset; |
| 145 | + break; |
| 146 | + case Node.TEXT_NODE: |
| 147 | + // Create a paragraph and add it to children? |
| 148 | + break; |
| 149 | + case Node.COMMENT_NODE: |
| 150 | + // add a comment node. |
| 151 | + break; |
| 152 | + default: |
| 153 | + console.log( "HTML to Wiki DOM conversion error. Unhandled node " + |
| 154 | + cnode.innerHTML ); |
| 155 | + break; |
| 156 | + } |
| 157 | + } |
| 158 | + return { |
| 159 | + offset: offset, |
| 160 | + node: wnode |
| 161 | + }; |
| 162 | +}; |
| 163 | + |
| 164 | +/** |
| 165 | + * Private HTML leaf node handler |
| 166 | + * |
| 167 | + * @param {Object} HTML DOM element |
| 168 | + * @param {Int} WikiDom offset within a block |
| 169 | + * @returns {Object} WikiDom object |
| 170 | + */ |
| 171 | +DOMConverter.prototype._convertHTMLLeaf = function ( node, offset, type ) { |
| 172 | + var children = node.childNodes, |
| 173 | + wnode = { |
| 174 | + type: type, |
| 175 | + attributes: this._HTMLPropertiesToWikiAttributes( node ), |
| 176 | + content: { |
| 177 | + text: '', |
| 178 | + annotations: [] |
| 179 | + } |
| 180 | + }; |
| 181 | + //console.log( 'res wnode: ' + JSON.stringify(wnode, null, 2)); |
| 182 | + for ( var i = 0, l = children.length; i < l; i++ ) { |
| 183 | + var cnode = children[i]; |
| 184 | + switch ( cnode.nodeType ) { |
| 185 | + case Node.ELEMENT_NODE: |
| 186 | + // Call a handler for the particular annotation node type |
| 187 | + var annotationtype = this.getHTMLAnnotationType( cnode.nodeName ); |
| 188 | + if ( annotationtype ) { |
| 189 | + var res = this._convertHTMLAnnotation( cnode, offset, annotationtype ); |
| 190 | + //console.log( 'res leaf: ' + JSON.stringify(res, null, 2)); |
| 191 | + offset += res.text.length; |
| 192 | + wnode.content.text += res.text; |
| 193 | + //console.log( 'res annotations: ' + JSON.stringify(res, null, 2)); |
| 194 | + wnode.content.annotations = wnode.content.annotations |
| 195 | + .concat( res.annotations ); |
| 196 | + } |
| 197 | + break; |
| 198 | + case Node.TEXT_NODE: |
| 199 | + // Add text as content, and increment offset |
| 200 | + wnode.content.text += cnode.data; |
| 201 | + offset += cnode.data.length; |
| 202 | + break; |
| 203 | + case Node.COMMENT_NODE: |
| 204 | + // add a comment annotation? |
| 205 | + break; |
| 206 | + default: |
| 207 | + console.log( "HTML to Wiki DOM conversion error. Unhandled node " + |
| 208 | + cnode.innerHTML ); |
| 209 | + break; |
| 210 | + } |
| 211 | + } |
| 212 | + return { |
| 213 | + offset: offset, |
| 214 | + node: wnode |
| 215 | + }; |
| 216 | +}; |
| 217 | + |
| 218 | +DOMConverter.prototype._convertHTMLAnnotation = function ( node, offset, type ) { |
| 219 | + var children = node.childNodes, |
| 220 | + text = '', |
| 221 | + annotations = [ |
| 222 | + { |
| 223 | + type: type, |
| 224 | + data: this._HTMLPropertiesToWikiData( node ), |
| 225 | + range: { |
| 226 | + start: offset, |
| 227 | + end: offset |
| 228 | + } |
| 229 | + } |
| 230 | + ]; |
| 231 | + for ( var i = 0, l = children.length; i < l; i++ ) { |
| 232 | + var cnode = children[i]; |
| 233 | + switch ( cnode.nodeType ) { |
| 234 | + case Node.ELEMENT_NODE: |
| 235 | + // Call a handler for the particular annotation node type |
| 236 | + var annotationtype = this.getHTMLAnnotationType(cnode.nodeName); |
| 237 | + if ( annotationtype ) { |
| 238 | + var res = this._convertHTMLAnnotation( cnode, offset, annotationtype ); |
| 239 | + //console.log( 'res annotations 2: ' + JSON.stringify(res, null, 2)); |
| 240 | + text += res.text; |
| 241 | + offset += res.text.length; |
| 242 | + annotations = annotations.concat( res.annotations ); |
| 243 | + } |
| 244 | + break; |
| 245 | + case Node.TEXT_NODE: |
| 246 | + // Add text as content, and increment offset |
| 247 | + text += cnode.data; |
| 248 | + offset += cnode.data.length; |
| 249 | + break; |
| 250 | + case Node.COMMENT_NODE: |
| 251 | + // add a comment annotation? |
| 252 | + break; |
| 253 | + default: |
| 254 | + console.log( "HTML to Wiki DOM conversion error. Unhandled node " + |
| 255 | + cnode.innerHTML ); |
| 256 | + break; |
| 257 | + } |
| 258 | + } |
| 259 | + annotations[0].range.end = offset; |
| 260 | + return { |
| 261 | + text: text, |
| 262 | + annotations: annotations |
| 263 | + }; |
| 264 | +}; |
| 265 | + |
| 266 | +DOMConverter.prototype._HTMLPropertiesToWikiAttributes = function ( elem ) { |
| 267 | + var attribs = elem.attributes, |
| 268 | + out = {}; |
| 269 | + for ( var i = 0, l = attribs.length; i < l; i++ ) { |
| 270 | + var attrib = attribs.item(i), |
| 271 | + key = attrib.name; |
| 272 | + console.log('key: ' + key); |
| 273 | + if ( key.match( /^data-/ ) ) { |
| 274 | + // strip data- prefix from data-* |
| 275 | + out[key.replace( /^data-/, '' )] = attrib.value; |
| 276 | + } else { |
| 277 | + // prefix html properties with html/ |
| 278 | + out['html/' + key] = attrib.value; |
| 279 | + } |
| 280 | + } |
| 281 | + return out; |
| 282 | +}; |
| 283 | + |
| 284 | +DOMConverter.prototype._HTMLPropertiesToWikiData = function ( elem ) { |
| 285 | + var attribs = elem.attributes, |
| 286 | + out = {}; |
| 287 | + for ( var i = 0, l = attribs.length; i < l; i++ ) { |
| 288 | + var attrib = attribs.item(i), |
| 289 | + key = attrib.name; |
| 290 | + if ( key.match( /^data-/ ) ) { |
| 291 | + // strip data- prefix from data-* |
| 292 | + out[key.replace( /^data-/, '' )] = attrib.value; |
| 293 | + } else { |
| 294 | + // pass through a few whitelisted keys |
| 295 | + // XXX: This subsets html DOM |
| 296 | + if ( ['title'].indexOf(key) != -1 ) { |
| 297 | + out[key] = attrib.value; |
| 298 | + } |
| 299 | + } |
| 300 | + } |
| 301 | + return out; |
| 302 | +}; |
| 303 | + |
| 304 | + |
| 305 | +if (typeof module == "object") { |
| 306 | + module.exports.DOMConverter = DOMConverter; |
| 307 | +} |
Property changes on: trunk/extensions/VisualEditor/modules/parser/mediawiki.DOMConverter.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 308 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/ext.Cite.js |
— | — | @@ -257,7 +257,8 @@ |
258 | 258 | type: 'TAG', |
259 | 259 | name: 'ol', |
260 | 260 | attribs: [ |
261 | | - ['class', 'references'] |
| 261 | + ['class', 'references'], |
| 262 | + ['data-object', 'references'] // Object type |
262 | 263 | ] |
263 | 264 | } |
264 | 265 | ].concat( listItems, { type: 'ENDTAG', name: 'ol' } ); |