Index: trunk/extensions/VisualEditor/tests/parser/parserTests.js |
— | — | @@ -10,6 +10,8 @@ |
11 | 11 | * 2011-07-20 <brion@pobox.com> |
12 | 12 | */ |
13 | 13 | |
| 14 | +"use strict"; |
| 15 | + |
14 | 16 | var fs = require('fs'), |
15 | 17 | path = require('path'); |
16 | 18 | |
— | — | @@ -29,6 +31,9 @@ |
30 | 32 | }) |
31 | 33 | } |
32 | 34 | |
| 35 | +// needed for html5 parser adapter |
| 36 | +//var events = require('events'); |
| 37 | + |
33 | 38 | // For now most modules only need this for $.extend and $.each :) |
34 | 39 | global.$ = require('jquery'); |
35 | 40 | |
— | — | @@ -40,11 +45,14 @@ |
41 | 46 | // Local CommonJS-friendly libs |
42 | 47 | global.PEG = _require(pj('parser', 'lib.pegjs.js')); |
43 | 48 | |
| 49 | + |
44 | 50 | // Our code... |
45 | 51 | _import(pj('parser', 'mediawiki.parser.peg.js'), ['PegParser']); |
46 | 52 | _import(pj('parser', 'mediawiki.parser.environment.js'), ['MWParserEnvironment']); |
47 | 53 | _import(pj('parser', 'ext.cite.taghook.ref.js'), ['MWRefTagHook']); |
48 | 54 | |
| 55 | +_require(pj('parser', 'mediawiki.html5TokenEmitter.js')); |
| 56 | + |
49 | 57 | // WikiDom and serializers |
50 | 58 | _require(pj('es', 'es.js')); |
51 | 59 | _require(pj('es', 'es.Html.js')); |
— | — | @@ -109,44 +117,10 @@ |
110 | 118 | function nodeToHtml(node) { |
111 | 119 | return $('<div>').append(node).html(); |
112 | 120 | } |
113 | | - /* Temporary debugging help. Is there anything similar in JS or a library? */ |
114 | | - var print_r = function (arr, level) { |
115 | 121 | |
116 | | - var dumped_text = ""; |
117 | | - if (!level) level = 0; |
118 | 122 | |
119 | | - //The padding given at the beginning of the line. |
120 | | - var level_padding = ""; |
121 | | - var bracket_level_padding = ""; |
122 | | - |
123 | | - for (var j = 0; j < level + 1; j++) level_padding += " "; |
124 | | - for (var b = 0; b < level; b++) bracket_level_padding += " "; |
125 | | - |
126 | | - if (typeof(arr) == 'object') { //Array/Hashes/Objects |
127 | | - dumped_text += "Array\n"; |
128 | | - dumped_text += bracket_level_padding + "(\n"; |
129 | | - for (var item in arr) { |
130 | | - |
131 | | - var value = arr[item]; |
132 | | - |
133 | | - if (typeof(value) == 'object') { //If it is an array, |
134 | | - dumped_text += level_padding + "[" + item + "] => "; |
135 | | - dumped_text += print_r(value, level + 2); |
136 | | - } else { |
137 | | - dumped_text += level_padding + "[" + item + "] => '" + value + "'\n"; |
138 | | - } |
139 | | - |
140 | | - } |
141 | | - dumped_text += bracket_level_padding + ")\n\n"; |
142 | | - } else { //Strings/Chars/Numbers etc. |
143 | | - dumped_text = "=>" + arr + "<=(" + typeof(arr) + ")"; |
144 | | - } |
145 | | - |
146 | | - return dumped_text; |
147 | | - |
148 | | - }; |
149 | | - |
150 | 123 | function processTest(item) { |
| 124 | + var tokenizer = new FauxHTML5.Tokenizer(); |
151 | 125 | if (!('title' in item)) { |
152 | 126 | console.log(item); |
153 | 127 | throw new Error('Missing title from test case.'); |
— | — | @@ -163,6 +137,7 @@ |
164 | 138 | console.log("INPUT:"); |
165 | 139 | console.log(item.input + "\n"); |
166 | 140 | |
| 141 | + |
167 | 142 | parser.parseToTree(item.input + "\n", function(tree, err) { |
168 | 143 | if (err) { |
169 | 144 | console.log('PARSE FAIL', err); |
— | — | @@ -174,6 +149,7 @@ |
175 | 150 | } |
176 | 151 | }); |
177 | 152 | //var res = es.HtmlSerializer.stringify(tree,environment); |
| 153 | + processTokens(tree, tokenizer); |
178 | 154 | if (err) { |
179 | 155 | console.log('RENDER FAIL', err); |
180 | 156 | } else { |
— | — | @@ -181,12 +157,24 @@ |
182 | 158 | console.log(item.result + "\n"); |
183 | 159 | |
184 | 160 | console.log('RENDERED:'); |
185 | | - console.log(print_r(tree)); |
| 161 | + //console.log(JSON.stringify(tree, null, 2)); |
| 162 | + console.log(tokenizer.parser.document.innerHTML); |
186 | 163 | } |
187 | 164 | } |
188 | 165 | }); |
189 | 166 | } |
190 | 167 | |
| 168 | +function processTokens ( tokens, tokenizer ) { |
| 169 | + tokenizer.processToken({type: 'TAG', name: 'body'}); |
| 170 | + // Process all tokens |
| 171 | + for (var i = 0, length = tokens.length; i < length; i++) { |
| 172 | + tokenizer.processToken(tokens[i]); |
| 173 | + } |
| 174 | + // And signal the end |
| 175 | + tokenizer.processToken({type: 'END'}); |
| 176 | +} |
| 177 | + |
| 178 | + |
191 | 179 | cases.forEach(function(item) { |
192 | 180 | if (typeof item == 'object') { |
193 | 181 | if (item.type == 'article') { |
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.peg.js |
— | — | @@ -23,6 +23,7 @@ |
24 | 24 | out = parser.parse(text); |
25 | 25 | } catch (e) { |
26 | 26 | err = e; |
| 27 | + console.trace(); |
27 | 28 | } finally { |
28 | 29 | callback(out, err); |
29 | 30 | } |
Index: trunk/extensions/VisualEditor/modules/parser/html5/treewalker.js |
— | — | @@ -0,0 +1,95 @@ |
| 2 | +var HTML5 = require('../html5'); |
| 3 | +var events = require('events'); |
| 4 | + |
| 5 | +function error(msg) { |
| 6 | + return {type: 'SerializeError', data: msg}; |
| 7 | +} |
| 8 | + |
| 9 | +function empty_tag(node) { |
| 10 | + if(node.hasChildNodes()) return error(_("Void element has children")); |
| 11 | + return {type: 'EmptyTag', name: node.tagName, data: node.attributes, namespace: node.namespace}; |
| 12 | +} |
| 13 | + |
| 14 | +function start_tag(node) { |
| 15 | + return {type: 'StartTag', name: node.tagName, data: node.attributes, namespace: node.namespace}; |
| 16 | +} |
| 17 | + |
| 18 | +function end_tag(node) { |
| 19 | + return {type: 'EndTag', name: node.tagName, namespace: node.namespace }; |
| 20 | +} |
| 21 | + |
| 22 | +function text(data, target) { |
| 23 | + if(m = new RegExp("^[" + HTML5.SPACE_CHARACTERS + "]+").exec(data)) { |
| 24 | + target.emit('token', {type: 'SpaceCharacters', data: m[0]}); |
| 25 | + data = data.slice(m[0].length, data.length); |
| 26 | + if(data.length == 0) return; |
| 27 | + } |
| 28 | + |
| 29 | + if(m = new RegExp("["+HTML5.SPACE_CHARACTERS + "]+$").exec(data)) { |
| 30 | + target.emit('token', {type: 'Characters', data: data.slice(0, m.length)}); |
| 31 | + target.emit('token', {type: 'SpaceCharacters', data: data.slice(m.index, data.length)}); |
| 32 | + } else { |
| 33 | + target.emit('token', {type: 'Characters', data: data}); |
| 34 | + } |
| 35 | +} |
| 36 | + |
| 37 | +function comment(data) { |
| 38 | + return {type: 'Comment', data: data}; |
| 39 | +} |
| 40 | + |
| 41 | +function doctype(node) { |
| 42 | + return {type: 'Doctype', name: node.nodeName, publicId: node.publicId, systemId: node.systemId, correct: node.correct}; |
| 43 | +} |
| 44 | + |
| 45 | +function unknown(node) { |
| 46 | + return error(_("unknown node: ")+ JSON.stringify(node)); |
| 47 | +} |
| 48 | + |
| 49 | +function _(str) { |
| 50 | + return str; |
| 51 | +} |
| 52 | + |
| 53 | +HTML5.TreeWalker = function(document, dest) { |
| 54 | + if(dest instanceof Function) this.addListener('token', dest); |
| 55 | + walk(document, this); |
| 56 | +}; |
| 57 | + |
| 58 | +function walk(node, dest) { |
| 59 | + switch(node.nodeType) { |
| 60 | + case node.DOCUMENT_FRAGMENT_NODE: |
| 61 | + case node.DOCUMENT_NODE: |
| 62 | + for(var child = 0; child < node.childNodes.length; ++child) { |
| 63 | + walk(node.childNodes[child], dest); |
| 64 | + } |
| 65 | + break; |
| 66 | + |
| 67 | + case node.ELEMENT_NODE: |
| 68 | + if(HTML5.VOID_ELEMENTS.indexOf(node.tagName.toLowerCase()) != -1) { |
| 69 | + dest.emit('token', empty_tag(node)); |
| 70 | + } else { |
| 71 | + dest.emit('token', start_tag(node)); |
| 72 | + for(var child = 0; child < node.childNodes.length; ++child) { |
| 73 | + walk(node.childNodes[child], dest); |
| 74 | + } |
| 75 | + dest.emit('token', end_tag(node)); |
| 76 | + } |
| 77 | + break; |
| 78 | + |
| 79 | + case node.TEXT_NODE: |
| 80 | + text(node.nodeValue, dest); |
| 81 | + break; |
| 82 | + |
| 83 | + case node.COMMENT_NODE: |
| 84 | + dest.emit('token', comment(node.nodeValue)); |
| 85 | + break; |
| 86 | + |
| 87 | + case node.DOCUMENT_TYPE_NODE: |
| 88 | + dest.emit('token', doctype(node)); |
| 89 | + break; |
| 90 | + |
| 91 | + default: |
| 92 | + dest.emit('token', unknown(node)); |
| 93 | + } |
| 94 | +} |
| 95 | + |
| 96 | +HTML5.TreeWalker.prototype = new events.EventEmitter; |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/treewalker.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 97 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/debug.js |
— | — | @@ -0,0 +1,30 @@ |
| 2 | +var HTML5 = require('../html5'); |
| 3 | + |
| 4 | +var debugFlags = {any: true} |
| 5 | + |
| 6 | +HTML5.debug = function() { |
| 7 | + section = arguments[0]; |
| 8 | + if(debugFlags[section] || debugFlags[section.split('.')[0]]) { |
| 9 | + var out = []; |
| 10 | + for(var i in arguments) { |
| 11 | + out.push(arguments[i]) |
| 12 | + } |
| 13 | + console.dir(out) |
| 14 | + } |
| 15 | +} |
| 16 | + |
| 17 | +HTML5.enableDebug = function(section) { |
| 18 | + debugFlags[section] = true; |
| 19 | +} |
| 20 | + |
| 21 | +HTML5.disableDebug = function(section) { |
| 22 | + debugFlags[section] = false; |
| 23 | +} |
| 24 | + |
| 25 | +HTML5.dumpTagStack = function(tags) { |
| 26 | + var r = []; |
| 27 | + for(i in tags) { |
| 28 | + r.push(tags[i].tagName); |
| 29 | + } |
| 30 | + return r.join(', '); |
| 31 | +} |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/debug.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 32 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/treebuilder.js |
— | — | @@ -0,0 +1,301 @@ |
| 2 | +"use strict"; |
| 3 | + |
| 4 | +var HTML5 = require('../html5'); |
| 5 | +var assert = require('assert'); |
| 6 | +//if(!Array.prototype.last) { |
| 7 | +// Array.prototype.last = function() { return this[this.length - 1] }; |
| 8 | +//} |
| 9 | + |
| 10 | +HTML5.TreeBuilder = function TreeBuilder(document) { |
| 11 | + this.open_elements = []; |
| 12 | + this.document = document; |
| 13 | + this.activeFormattingElements = []; |
| 14 | +} |
| 15 | + |
| 16 | +var b = HTML5.TreeBuilder; |
| 17 | + |
| 18 | +b.prototype.reset = function() { |
| 19 | + |
| 20 | +} |
| 21 | + |
| 22 | +b.prototype.copyAttributeToElement = function(element, attribute) { |
| 23 | + if(attribute.nodeType && attribute.nodeType == attribute.ATTRIBUTE_NODE) { |
| 24 | + element.setAttributeNode(attribute.cloneNode()); |
| 25 | + } else { |
| 26 | + try { |
| 27 | + element.setAttribute(attribute.nodeName, attribute.nodeValue) |
| 28 | + } catch(e) { |
| 29 | + console.log("Can't set attribute '" + attribute.nodeName + "' to value '" + attribute.nodeValue + "': (" + e + ')'); |
| 30 | + } |
| 31 | + if(attribute.namespace) { |
| 32 | + var at = element.getAttributeNode(attribute.nodeName); |
| 33 | + at.namespace = attribute.namespace; |
| 34 | + } |
| 35 | + } |
| 36 | +} |
| 37 | + |
| 38 | +b.prototype.createElement = function (name, attributes, namespace) { |
| 39 | + try { |
| 40 | + var el = this.document.createElement(name); |
| 41 | + } catch(e) { |
| 42 | + console.log("Can't create element '"+ name + "' (" + e + ")") |
| 43 | + } |
| 44 | + el.namespace = namespace; |
| 45 | + if(attributes) { |
| 46 | + if(attributes.item) { |
| 47 | + for(var i = 0; i < attributes.length; i++) { |
| 48 | +HTML5.debug('treebuilder.copyAttributes', attributes.item(i)); |
| 49 | + this.copyAttributeToElement(el, attributes.item(i)); |
| 50 | + } |
| 51 | + } else { |
| 52 | + for(var i = 0; i < attributes.length; i++) { |
| 53 | +HTML5.debug('treebuilder.copyAttributes', attributes[i]); |
| 54 | + this.copyAttributeToElement(el, attributes[i]); |
| 55 | + } |
| 56 | + } |
| 57 | + } |
| 58 | + return el; |
| 59 | +} |
| 60 | + |
| 61 | +b.prototype.insert_element = function(name, attributes, namespace) { |
| 62 | + HTML5.debug('treebuilder.insert_element', name) |
| 63 | + if(this.insert_from_table) { |
| 64 | + return this.insert_element_from_table(name, attributes, namespace) |
| 65 | + } else { |
| 66 | + return this.insert_element_normal(name, attributes, namespace) |
| 67 | + } |
| 68 | +} |
| 69 | + |
| 70 | +b.prototype.insert_foreign_element = function(name, attributes, namespace) { |
| 71 | + return this.insert_element(name, attributes, namespace); |
| 72 | +} |
| 73 | + |
| 74 | +b.prototype.insert_element_normal = function(name, attributes, namespace) { |
| 75 | + var element = this.createElement(name, attributes, namespace); |
| 76 | + this.open_elements[this.open_elements.length - 1].appendChild(element); |
| 77 | + this.open_elements.push(element); |
| 78 | + return element; |
| 79 | +} |
| 80 | + |
| 81 | +b.prototype.insert_element_from_table = function(name, attributes, namespace) { |
| 82 | + var element = this.createElement(name, attributes, namespace) |
| 83 | + if(HTML5.TABLE_INSERT_MODE_ELEMENTS.indexOf(this.open_elements[this.open_elements.length - 1].tagName.toLowerCase()) != -1) { |
| 84 | + // We should be in the InTable mode. This means we want to do |
| 85 | + // special magic element rearranging |
| 86 | + var t = this.getTableMisnestedNodePosition() |
| 87 | + if(!t.insertBefore) { |
| 88 | + t.parent.appendChild(element) |
| 89 | + } else { |
| 90 | + t.parent.insertBefore(element, t.insertBefore) |
| 91 | + } |
| 92 | + this.open_elements.push(element) |
| 93 | + } else { |
| 94 | + return this.insert_element_normal(name, attributes, namespace); |
| 95 | + } |
| 96 | + return element; |
| 97 | +} |
| 98 | + |
| 99 | +b.prototype.insert_comment = function(data, parent) { |
| 100 | + try { |
| 101 | + var c = this.document.createComment(data); |
| 102 | + if(!parent) parent = this.open_elements[this.open_elements.length - 1]; |
| 103 | + parent.appendChild(c); |
| 104 | + } catch(e) { |
| 105 | + console.log("Can't create comment ("+ data + ")") |
| 106 | + } |
| 107 | +} |
| 108 | + |
| 109 | +b.prototype.insert_doctype = function (name, publicId, systemId) { |
| 110 | + try { |
| 111 | + var doctype = this.document.implementation.createDocumentType(name, publicId, systemId); |
| 112 | + this.document.appendChild(doctype); |
| 113 | + } catch(e) { |
| 114 | + console.log("Can't create doctype ("+ name + " / " + publicId + " / " + systemId + ")") |
| 115 | + } |
| 116 | +} |
| 117 | + |
| 118 | + |
| 119 | +b.prototype.insert_text = function(data, parent) { |
| 120 | + if(!parent) parent = this.open_elements[this.open_elements.length - 1]; |
| 121 | + if(!this.insert_from_table || HTML5.TABLE_INSERT_MODE_ELEMENTS.indexOf(this.open_elements[this.open_elements.length - 1].tagName.toLowerCase()) == -1) { |
| 122 | + if(parent.lastChild && parent.lastChild.nodeType == parent.TEXT_NODE) { |
| 123 | + parent.lastChild.appendData(data); |
| 124 | + } else { |
| 125 | + try { |
| 126 | + var tn = this.document.createTextNode(data); |
| 127 | + parent.appendChild(tn); |
| 128 | + } catch(e) { |
| 129 | + console.log("Can't create tex node (" + data + ")"); |
| 130 | + } |
| 131 | + } |
| 132 | + } else { |
| 133 | + // We should be in the inTable phase. This means we want to do special |
| 134 | + // magic element rearranging. |
| 135 | + var t = this.getTableMisnestedNodePosition(); |
| 136 | + insertText(t.parent, data, t.insertBefore) |
| 137 | + } |
| 138 | +} |
| 139 | + |
| 140 | +b.prototype.remove_open_elements_until = function(nameOrCb) { |
| 141 | + HTML5.debug('treebuilder.remove_open_elements_until', nameOrCb) |
| 142 | + var finished = false; |
| 143 | + while(!finished) { |
| 144 | + var element = this.pop_element(); |
| 145 | + finished = (typeof nameOrCb == 'function' ? nameOrCb(element) : element.tagName.toLowerCase() == nameOrCb); |
| 146 | + } |
| 147 | + return element; |
| 148 | +} |
| 149 | + |
| 150 | +b.prototype.pop_element = function() { |
| 151 | + var el = this.open_elements.pop() |
| 152 | + HTML5.debug('treebuilder.pop_element', el.name) |
| 153 | + return el |
| 154 | +} |
| 155 | + |
| 156 | +function insertText(node, data, before) { |
| 157 | + var t = node.ownerDocument.createTextNode(data) |
| 158 | + if(before) { |
| 159 | + if(before.previousSibling && before.previousSibling.nodeType == before.previousSibling.TEXT_NODE) { |
| 160 | + before.previousSibling.nodeValue += data; |
| 161 | + } else { |
| 162 | + node.insertBefore(t, before) |
| 163 | + } |
| 164 | + } else { |
| 165 | + node.appendChild(t) |
| 166 | + } |
| 167 | +} |
| 168 | + |
| 169 | +b.prototype.getTableMisnestedNodePosition = function() { |
| 170 | + // The foster parent element is the one which comes before the most |
| 171 | + // recently opened table element |
| 172 | + // XXX - this is really inelegant |
| 173 | + var lastTable, fosterParent, insertBefore |
| 174 | + |
| 175 | + for(var i = this.open_elements.length - 1; i >= 0; i--) { |
| 176 | + var element = this.open_elements[i] |
| 177 | + if(element.tagName.toLowerCase() == 'table') { |
| 178 | + lastTable = element |
| 179 | + break |
| 180 | + } |
| 181 | + } |
| 182 | + |
| 183 | + if(lastTable) { |
| 184 | + // XXX - we should check that the parent really is a node here |
| 185 | + if(lastTable.parentNode) { |
| 186 | + fosterParent = lastTable.parentNode |
| 187 | + insertBefore = lastTable |
| 188 | + } else { |
| 189 | + fosterParent = this.open_elements[this.open_elements.indexOf(lastTable) - 1] |
| 190 | + } |
| 191 | + } else { |
| 192 | + fosterParent = this.open_elements[0] |
| 193 | + } |
| 194 | + |
| 195 | + return {parent: fosterParent, insertBefore: insertBefore} |
| 196 | +} |
| 197 | + |
| 198 | +b.prototype.elementInScope = function(name, tableVariant) { |
| 199 | + if(this.open_elements.length == 0) return false |
| 200 | + for(var i = this.open_elements.length - 1; i >= 0; i--) { |
| 201 | + if (this.open_elements[i].tagName == undefined) return false |
| 202 | + else if(this.open_elements[i].tagName.toLowerCase() == name) return true |
| 203 | + else if(this.open_elements[i].tagName.toLowerCase() == 'table') return false |
| 204 | + else if(!tableVariant && HTML5.SCOPING_ELEMENTS.indexOf(this.open_elements[i].tagName.toLowerCase()) != -1) return false |
| 205 | + else if(this.open_elements[i].tagName.toLowerCase() == 'html') return false; |
| 206 | + } |
| 207 | + return false; |
| 208 | +} |
| 209 | + |
| 210 | +b.prototype.generateImpliedEndTags = function(exclude) { |
| 211 | + if(exclude) exclude = exclude.toLowerCase() |
| 212 | + if(this.open_elements.length == 0) { |
| 213 | + HTML5.debug('treebuilder.generateImpliedEndTags', 'no open elements') |
| 214 | + return |
| 215 | + } |
| 216 | + var name = this.open_elements[this.open_elements.length - 1].tagName.toLowerCase(); |
| 217 | + if(['dd', 'dt', 'li', 'p', 'td', 'th', 'tr'].indexOf(name) != -1 && name != exclude) { |
| 218 | + var p = this.pop_element(); |
| 219 | + this.generateImpliedEndTags(exclude); |
| 220 | + } |
| 221 | +} |
| 222 | + |
| 223 | +b.prototype.reconstructActiveFormattingElements = function() { |
| 224 | + // Within this algorithm the order of steps decribed in the specification |
| 225 | + // is not quite the same as the order of steps in the code. It should still |
| 226 | + // do the same though. |
| 227 | + |
| 228 | + // Step 1: stop if there's nothing to do |
| 229 | + if(this.activeFormattingElements.length == 0) return; |
| 230 | + |
| 231 | + // Step 2 and 3: start with the last element |
| 232 | + var i = this.activeFormattingElements.length - 1; |
| 233 | + var entry = this.activeFormattingElements[i]; |
| 234 | + if(entry == HTML5.Marker || this.open_elements.indexOf(entry) != -1) return; |
| 235 | + |
| 236 | + while(entry != HTML5.Marker && this.open_elements.indexOf(entry) == -1) { |
| 237 | + i -= 1; |
| 238 | + entry = this.activeFormattingElements[i]; |
| 239 | + if(!entry) break; |
| 240 | + } |
| 241 | + |
| 242 | + while(true) { |
| 243 | + i += 1; |
| 244 | + var clone = this.activeFormattingElements[i].cloneNode(); |
| 245 | + |
| 246 | + var element = this.insert_element(clone.tagName, clone.attributes); |
| 247 | + |
| 248 | + this.activeFormattingElements[i] = element; |
| 249 | + |
| 250 | + if(element == this.activeFormattingElements[this.activeFormattingElements.length - 1]) break; |
| 251 | + } |
| 252 | + |
| 253 | +} |
| 254 | + |
| 255 | +b.prototype.elementInActiveFormattingElements = function(name) { |
| 256 | + var els = this.activeFormattingElements; |
| 257 | + for(var i = els.length - 1; i >= 0; i--) { |
| 258 | + if(els[i] == HTML5.Marker) break; |
| 259 | + if(els[i].tagName.toLowerCase() == name) return els[i]; |
| 260 | + } |
| 261 | + return false; |
| 262 | +} |
| 263 | + |
| 264 | +b.prototype.reparentChildren = function(o, n) { |
| 265 | + while(o.childNodes.length > 0) { |
| 266 | + var el = o.removeChild(o.childNodes[0]); |
| 267 | + n.appendChild(el); |
| 268 | + } |
| 269 | +} |
| 270 | + |
| 271 | +b.prototype.clearActiveFormattingElements = function() { |
| 272 | + while(!(this.activeFormattingElements.length == 0 || this.activeFormattingElements.pop() == HTML5.Marker)); |
| 273 | +} |
| 274 | + |
| 275 | +b.prototype.getFragment = function() { |
| 276 | + // assert.ok(this.parser.inner_html) |
| 277 | + var fragment = this.document.createDocumentFragment() |
| 278 | + this.reparentChildren(this.root_pointer, fragment) |
| 279 | + return fragment |
| 280 | +} |
| 281 | + |
| 282 | +b.prototype.create_structure_elements = function(container) { |
| 283 | + this.html_pointer = this.document.getElementsByTagName('html')[0] |
| 284 | + if(!this.html_pointer) { |
| 285 | + this.html_pointer = this.createElement('html'); |
| 286 | + this.document.appendChild(this.html_pointer); |
| 287 | + } |
| 288 | + if(container == 'html') return; |
| 289 | + if(!this.head_pointer) { |
| 290 | + this.head_pointer = this.document.getElementsByTagName('head')[0] |
| 291 | + if(!this.head_pointer) { |
| 292 | + this.head_pointer = this.createElement('head'); |
| 293 | + this.html_pointer.appendChild(this.head_pointer); |
| 294 | + } |
| 295 | + } |
| 296 | + if(container == 'head') return; |
| 297 | + this.body_pointer = this.document.getElementsByTagName('body')[0] |
| 298 | + if(!this.body_pointer) { |
| 299 | + this.body_pointer = this.createElement('body'); |
| 300 | + this.html_pointer.appendChild(this.body_pointer); |
| 301 | + } |
| 302 | +} |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/treebuilder.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 303 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/index.js |
— | — | @@ -0,0 +1,13 @@ |
| 2 | +exports.HTML5 = exports; |
| 3 | + |
| 4 | +exports.HTML5.moduleName = 'HTML5'; |
| 5 | + |
| 6 | +require('./constants'); |
| 7 | +require('./tokenizer'); |
| 8 | +require('./treebuilder'); |
| 9 | +require('./treewalker'); |
| 10 | +require('./serializer'); |
| 11 | +require('./parser'); |
| 12 | +require('./debug'); |
| 13 | + |
| 14 | + |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/index.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 15 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/buffer.js |
— | — | @@ -0,0 +1,109 @@ |
| 2 | +var buffer = require('buffer'); |
| 3 | +var HTML5 = require('../html5'); |
| 4 | + |
| 5 | +function Buffer() { |
| 6 | + this.data = ''; |
| 7 | + this.start = 0; |
| 8 | + this.committed = 0; |
| 9 | + var eof; |
| 10 | + this.__defineSetter__('eof', function(f) { |
| 11 | + eof = f |
| 12 | + HTML5.debug('buffer.eof=', f) |
| 13 | + }) |
| 14 | + this.__defineGetter__('eof', function() { return eof }) |
| 15 | + this.eof = false; |
| 16 | +} |
| 17 | + |
| 18 | +exports.Buffer = Buffer; |
| 19 | + |
| 20 | +Buffer.prototype = { |
| 21 | + slice: function() { |
| 22 | + HTML5.debug('buffer.slice') |
| 23 | + if(this.start >= this.data.length) { |
| 24 | + if(!this.eof) throw HTML5.DRAIN |
| 25 | + return HTML5.EOF; |
| 26 | + } |
| 27 | + return this.data.slice(this.start, this.data.length); |
| 28 | + }, |
| 29 | + char: function() { |
| 30 | + HTML5.debug('buffer.char') |
| 31 | + if(!this.eof && this.start >= this.data.length - 1) throw HTML5.DRAIN; |
| 32 | + if(this.start >= this.data.length) { |
| 33 | + return HTML5.EOF; |
| 34 | + } |
| 35 | + return this.data[this.start++]; |
| 36 | + }, |
| 37 | + advance: function(amount) { |
| 38 | + HTML5.debug('buffer.advance', amount) |
| 39 | + this.start += amount; |
| 40 | + if(this.start >= this.data.length) { |
| 41 | + if(!this.eof) throw HTML5.DRAIN; |
| 42 | + return HTML5.EOF; |
| 43 | + } else { |
| 44 | + if(this.committed > this.data.length / 2) { |
| 45 | + // Sliiiide |
| 46 | + this.data = this.data.slice(this.committed); |
| 47 | + this.start = this.start - this.committed; |
| 48 | + this.committed = 0; |
| 49 | + } |
| 50 | + } |
| 51 | + }, |
| 52 | + matchWhile: function(re) { |
| 53 | + HTML5.debug('buffer.matchWhile', re); |
| 54 | + if(this.eof && this.data.length >= this.start) return HTML5.EOF; |
| 55 | + var r = new RegExp("^"+re+"+"); |
| 56 | + if(m = r.exec(this.slice())) { |
| 57 | + if(!this.eof && m[0].length == this.data.length - this.start) throw HTML5.DRAIN; |
| 58 | + this.advance(m[0].length); |
| 59 | + return m[0]; |
| 60 | + } else { |
| 61 | + return ''; |
| 62 | + } |
| 63 | + }, |
| 64 | + matchUntil: function(re) { |
| 65 | + HTML5.debug('buffer.matchUntil', re); |
| 66 | + if(m = new RegExp(re + (this.eof ? "|\0" : "")).exec(this.slice())) { |
| 67 | + var t = this.data.slice(this.start, this.start + m.index); |
| 68 | + this.advance(m.index); |
| 69 | + return t.toString(); |
| 70 | + } else { |
| 71 | + if(this.eof) return HTML5.EOF; |
| 72 | + throw HTML5.DRAIN; |
| 73 | + } |
| 74 | + }, |
| 75 | + append: function(data) { |
| 76 | + HTML5.debug('buffer.append', data); |
| 77 | + this.data += data |
| 78 | + }, |
| 79 | + shift: function(n) { |
| 80 | + HTML5.debug('buffer.shift', n); |
| 81 | + if(!this.eof && this.start + n >= this.data.length) throw HTML5.DRAIN; |
| 82 | + if(this.eof && this.start >= this.data.length) return HTML5.EOF; |
| 83 | + var d = this.data.slice(this.start, this.start + n).toString(); |
| 84 | + this.advance(Math.min(n, this.data.length - this.start)); |
| 85 | + return d; |
| 86 | + }, |
| 87 | + peek: function(n) { |
| 88 | + HTML5.debug('buffer.peek', n) |
| 89 | + if(!this.eof && this.start + n >= this.data.length) throw HTML5.DRAIN; |
| 90 | + if(this.eof && this.start >= this.data.length) return HTML5.EOF; |
| 91 | + return this.data.slice(this.start, Math.min(this.start + n, this.data.length)).toString(); |
| 92 | + }, |
| 93 | + length: function() { |
| 94 | + HTML5.debug('buffer.length') |
| 95 | + return this.data.length - this.start - 1; |
| 96 | + }, |
| 97 | + unget: function(d) { |
| 98 | + HTML5.debug('buffer.unget', d); |
| 99 | + if(d == HTML5.EOF) return; |
| 100 | + this.start -= (d.length); |
| 101 | + }, |
| 102 | + undo: function() { |
| 103 | + HTML5.debug('buffer.undo') |
| 104 | + this.start = this.committed; |
| 105 | + }, |
| 106 | + commit: function() { |
| 107 | + HTML5.debug('buffer.commit') |
| 108 | + this.committed = this.start; |
| 109 | + } |
| 110 | +} |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/buffer.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 111 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/serializer.js |
— | — | @@ -0,0 +1,152 @@ |
| 2 | +"use strict"; |
| 3 | +var HTML5 = require('../html5'); |
| 4 | +var events = require('events'); |
| 5 | + |
| 6 | +function keys(o) { |
| 7 | + var r = []; |
| 8 | + for(var k in o) { |
| 9 | + r.push(k); |
| 10 | + } |
| 11 | + return r; |
| 12 | +} |
| 13 | + |
| 14 | +function hescape(s) { |
| 15 | + return s.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">") |
| 16 | +} |
| 17 | + |
| 18 | +var default_opts = { |
| 19 | + lowercase: true, |
| 20 | + minimize_boolean_attributes: true, |
| 21 | + quote_attr_values: true, |
| 22 | + use_best_quote_char: true, |
| 23 | + use_trailing_solidus: true, |
| 24 | + escape_lt_in_attrs: true, |
| 25 | + space_before_trailing_solidus: true |
| 26 | +} |
| 27 | + |
| 28 | +HTML5.serialize = function(src, target, override) { |
| 29 | + if(!override) { |
| 30 | + options = default_opts |
| 31 | + } else { |
| 32 | + options = {} |
| 33 | + for(k in default_opts) options[k] = default_opts[k] |
| 34 | + for(k in override) options[k] = override[k] |
| 35 | + } |
| 36 | + var dest; |
| 37 | + if(target instanceof Function) { |
| 38 | + dest = new events.EventEmitter(); |
| 39 | + dest.addListener('data', target); |
| 40 | + } else if(!target) { |
| 41 | + dest = new events.EventEmitter(); |
| 42 | + var ret = ''; |
| 43 | + dest.addListener('data', function(d) { |
| 44 | + ret += d; |
| 45 | + }); |
| 46 | + } else { |
| 47 | + dest = target; |
| 48 | + } |
| 49 | + var strict = false; |
| 50 | + var errors = []; |
| 51 | + |
| 52 | + function serialize_error(data) { |
| 53 | + errors.push(data); |
| 54 | + if(strict) throw(data); |
| 55 | + } |
| 56 | + |
| 57 | + var in_cdata = false; |
| 58 | + //TODO: Filters |
| 59 | + var doctype; |
| 60 | + var escape_rcdata = false; |
| 61 | + var w = new HTML5.TreeWalker(src, function(tok) { |
| 62 | + if(tok.type == "Doctype") { |
| 63 | + doctype = "<!DOCTYPE " + tok.name + ">"; |
| 64 | + dest.emit('data', doctype); |
| 65 | + } else if(tok.type == 'Characters' || tok.type == 'SpaceCharacters') { |
| 66 | + if(in_cdata || tok.type == 'SpaceCharacters') { |
| 67 | + if(in_cdata && tok.data.indexOf("</") != -1) { |
| 68 | + serialize_error("Unexpected </ in CDATA") |
| 69 | + } |
| 70 | + dest.emit('data', tok.data); |
| 71 | + } else { |
| 72 | + if(tok.data) dest.emit('data', hescape(tok.data)); |
| 73 | + } |
| 74 | + } else if(tok.type == "StartTag" || tok.type == 'EmptyTag') { |
| 75 | + if(HTML5.RCDATA_ELEMENTS.indexOf(tok.name.toLowerCase()) != -1 && !escape_rcdata) { |
| 76 | + in_cdata = true; |
| 77 | + } else if (in_cdata) { |
| 78 | + serialize_error("Unexpected child element of a CDATA element"); |
| 79 | + } |
| 80 | + var attributes = ""; |
| 81 | + var attrs= []; |
| 82 | + for(var ki = 0; ki < tok.data.length; ki++) { |
| 83 | + attrs.push(tok.data.item(ki)); |
| 84 | + } |
| 85 | + attrs = attrs.sort(); |
| 86 | + for(var ki in attrs) { |
| 87 | + var quote_attr = false; |
| 88 | + v = tok.data.getNamedItem(attrs[ki].nodeName).nodeValue; |
| 89 | + attributes += " "+attrs[ki].nodeName; |
| 90 | + if(!options.minimize_boolean_attributes || ((HTML5.BOOLEAN_ATTRIBUTES[tok.name] || []).indexOf(ki) == -1 && (HTML5.BOOLEAN_ATTRIBUTES["_global"].indexOf(ki) == -1))) { |
| 91 | + attributes += "="; |
| 92 | + if(options.quote_attr_values || v.length == 0) { |
| 93 | + quote_attr = true; |
| 94 | + } else { |
| 95 | + quote_attr = new RegExp("[" + HTML5.SPACE_CHARACTERS_IN + "<=>'\"" + "]").test(v) |
| 96 | + } |
| 97 | + |
| 98 | + v = v.replace(/&/g, '&'); |
| 99 | + if(options.escape_lt_in_attrs) v = v.replace(/</g, '<'); |
| 100 | + if(quote_attr) { |
| 101 | + var the_quote_char = '"'; |
| 102 | + if(options.use_best_quote_char) { |
| 103 | + if(v.indexOf("'") != -1 && v.indexOf('"') == -1) { |
| 104 | + the_quote_char = '"'; |
| 105 | + } else if(v.indexOf('"') != -1 && v.indexOf("'") == -1) { |
| 106 | + the_quote_char = "'" |
| 107 | + } |
| 108 | + } |
| 109 | + if(the_quote_char == '"') { |
| 110 | + v = v.replace(/"/g, '"'); |
| 111 | + } else { |
| 112 | + v = v.replace(/'/g, '''); |
| 113 | + } |
| 114 | + attributes += the_quote_char + v + the_quote_char; |
| 115 | + } else { |
| 116 | + attributes += v; |
| 117 | + } |
| 118 | + } |
| 119 | + } |
| 120 | + |
| 121 | + if(HTML5.VOID_ELEMENTS.indexOf(tok.name.toLowerCase()) != -1 && options.use_trailing_solidus) { |
| 122 | + if(options.space_before_trailing_solidus) { |
| 123 | + attributes += " /"; |
| 124 | + } else { |
| 125 | + attributes += "/"; |
| 126 | + } |
| 127 | + } |
| 128 | + |
| 129 | + if(options.lowercase) tok.name = tok.name.toLowerCase() |
| 130 | + |
| 131 | + dest.emit('data', "<" + tok.name + attributes + ">"); |
| 132 | + |
| 133 | + } else if(tok.type == 'EndTag') { |
| 134 | + if(HTML5.RCDATA_ELEMENTS.indexOf(tok.name.toLowerCase()) != -1) { |
| 135 | + in_cdata = false; |
| 136 | + } else if(in_cdata) { |
| 137 | + serialize_error("Unexpected child element of a CDATA element"); |
| 138 | + } |
| 139 | + |
| 140 | + if(options.lowercase) tok.name = tok.name.toLowerCase() |
| 141 | + dest.emit('data', '</' + tok.name + '>'); |
| 142 | + } else if(tok.type == 'Comment') { |
| 143 | + if(tok.data.match(/--/)) serialize_error("Comment contains --"); |
| 144 | + dest.emit('data', '<!--' + tok.data + '-->'); |
| 145 | + } else { |
| 146 | + serialize_error(tok.data); |
| 147 | + } |
| 148 | + }); |
| 149 | + |
| 150 | + dest.emit('end') |
| 151 | + |
| 152 | + if(ret) return ret; |
| 153 | +} |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/serializer.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 154 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/tokenizer.js |
— | — | @@ -0,0 +1,865 @@ |
| 2 | +//require('../core-upgrade'); |
| 3 | +var HTML5 = require('../html5'); |
| 4 | +var events = require('events'); |
| 5 | +var Buffer = require('./buffer').Buffer; |
| 6 | +var Models = HTML5.Models; |
| 7 | + |
| 8 | +function keys(h) { |
| 9 | + var r = []; |
| 10 | + for(var k in h) { |
| 11 | + r.push(k); |
| 12 | + } |
| 13 | + return r; |
| 14 | +} |
| 15 | + |
| 16 | +HTML5.Tokenizer = t = function HTML5Tokenizer(input, document) { |
| 17 | + if(!input) throw(new Error("No input given")); |
| 18 | + var content_model; |
| 19 | + this.document = document; |
| 20 | + this.__defineSetter__('content_model', function(model) { |
| 21 | + HTML5.debug('tokenizer.content_model=', model) |
| 22 | + content_model = model |
| 23 | + }) |
| 24 | + this.__defineGetter__('content_model', function() { |
| 25 | + return content_model |
| 26 | + }) |
| 27 | + this.content_model = Models.PCDATA; |
| 28 | + var state; |
| 29 | + var buffer = this.buffer = new Buffer(); |
| 30 | + this.__defineSetter__('state', function(newstate) { |
| 31 | + HTML5.debug('tokenizer.state=', newstate) |
| 32 | + state = newstate; |
| 33 | + buffer.commit(); |
| 34 | + }); |
| 35 | + this.state = 'data_state'; |
| 36 | + this.escapeFlag = false; |
| 37 | + this.lastFourChars = ''; |
| 38 | + this.current_token = null; |
| 39 | + |
| 40 | + if(input instanceof events.EventEmitter) { |
| 41 | + source = input; |
| 42 | + this.pump = null; |
| 43 | + } else { |
| 44 | + var source = new events.EventEmitter(); |
| 45 | + this.pump = function() { |
| 46 | + source.emit('data', input); |
| 47 | + source.emit('end'); |
| 48 | + } |
| 49 | + } |
| 50 | + |
| 51 | + this.commit = function() { |
| 52 | + buffer.commit(); |
| 53 | + }; |
| 54 | + |
| 55 | + var tokenizer = this; |
| 56 | + source.addListener('data', function(data) { |
| 57 | + if(typeof data !== 'string') data = data.toString(); |
| 58 | + buffer.append(data); |
| 59 | + try { |
| 60 | + while(tokenizer[state](buffer)); |
| 61 | + } catch(e) { |
| 62 | + if(e != HTML5.DRAIN) { |
| 63 | + throw(e); |
| 64 | + } else { |
| 65 | + HTML5.debug('tokenizer.drain', 'Drain') |
| 66 | + buffer.undo(); |
| 67 | + } |
| 68 | + } |
| 69 | + }); |
| 70 | + source.addListener('end', function(t) { return function() { |
| 71 | + buffer.eof = true; |
| 72 | + while(tokenizer[state](buffer)); |
| 73 | + t.emit('end'); |
| 74 | + }}(this)); |
| 75 | +} |
| 76 | + |
| 77 | +t.prototype = new events.EventEmitter; |
| 78 | + |
| 79 | +t.prototype.tokenize = function() { |
| 80 | + if(this.pump) this.pump(); |
| 81 | +} |
| 82 | + |
| 83 | +t.prototype.emitToken = function(tok) { |
| 84 | + tok = this.normalize_token(tok); |
| 85 | + HTML5.debug('tokenizer.token', tok) |
| 86 | + if (this.content_model == Models.SCRIPT_CDATA && (tok.type == 'Characters' || tok.type == 'SpaceCharacters')) { |
| 87 | + this.script_buffer += tok.data; |
| 88 | + } else { |
| 89 | + this.emit('token', tok); |
| 90 | + } |
| 91 | +} |
| 92 | + |
| 93 | +t.prototype.consume_entity = function(buffer, from_attr) { |
| 94 | + var char = null; |
| 95 | + var chars = buffer.char(); |
| 96 | + if(chars == HTML5.EOF) return false; |
| 97 | + if(chars.match(HTML5.SPACE_CHARACTERS) || chars == '<' || chars == '&') { |
| 98 | + buffer.unget(chars); |
| 99 | + } else if(chars[0] == '#') { // Maybe a numeric entity |
| 100 | + var c = buffer.shift(2); |
| 101 | + if(c == HTML5.EOF) { |
| 102 | + buffer.unget(chars); |
| 103 | + return false; |
| 104 | + } |
| 105 | + chars += c; |
| 106 | + if(chars[1] && chars[1].toLowerCase() == 'x' && HTML5.HEX_DIGITS_R.test(chars[2])) { |
| 107 | + // Hex entity |
| 108 | + buffer.unget(chars[2]); |
| 109 | + char = this.consume_numeric_entity(buffer, true); |
| 110 | + } else if(chars[1] && HTML5.DIGITS_R.test(chars[1])) { |
| 111 | + // Decimal entity |
| 112 | + buffer.unget(chars.slice(1)); |
| 113 | + char = this.consume_numeric_entity(buffer, false); |
| 114 | + } else { |
| 115 | + // Not numeric |
| 116 | + buffer.unget(chars); |
| 117 | + this.parse_error("expected-numeric-entity"); |
| 118 | + } |
| 119 | + } else { |
| 120 | + var filteredEntityList = keys(HTML5.ENTITIES).filter(function(e) { |
| 121 | + return e[0] == chars[0]; |
| 122 | + }); |
| 123 | + var entityName = null; |
| 124 | + while(true) { |
| 125 | + if(filteredEntityList.some(function(e) { |
| 126 | + return e.indexOf(chars) == 0; |
| 127 | + })) { |
| 128 | + filteredEntityList = filteredEntityList.filter(function(e) { |
| 129 | + return e.indexOf(chars) == 0; |
| 130 | + }); |
| 131 | + chars += buffer.char() |
| 132 | + } else { |
| 133 | + break; |
| 134 | + } |
| 135 | + |
| 136 | + if(HTML5.ENTITIES[chars]) { |
| 137 | + entityName = chars; |
| 138 | + if(entityName[entityName.length - 1] == ';') break; |
| 139 | + } |
| 140 | + } |
| 141 | + |
| 142 | + if(entityName) { |
| 143 | + char = HTML5.ENTITIES[entityName]; |
| 144 | + |
| 145 | + if(entityName[entityName.length - 1] != ';' && this.from_attribute && (HTML5.ASCII_LETTERS_R.test(chars.substr(entityName.length, 1) || HTML5.DIGITS.test(chars.substr(entityName.length, 1))))) { |
| 146 | + buffer.unget(chars); |
| 147 | + char = '&'; |
| 148 | + } else { |
| 149 | + buffer.unget(chars.slice(entityName.length)); |
| 150 | + } |
| 151 | + } else { |
| 152 | + this.parse_error("expected-named-entity"); |
| 153 | + buffer.unget(chars); |
| 154 | + } |
| 155 | + } |
| 156 | + |
| 157 | + return char; |
| 158 | +} |
| 159 | + |
| 160 | +t.prototype.consume_numeric_entity = function(buffer, hex) { |
| 161 | + if(hex) { |
| 162 | + var allowed = HTML5.HEX_DIGITS_R; |
| 163 | + var radix = 16; |
| 164 | + } else { |
| 165 | + var allowed = HTML5.DIGITS_R; |
| 166 | + var radix = 10; |
| 167 | + } |
| 168 | + |
| 169 | + chars = ''; |
| 170 | + |
| 171 | + var c = buffer.char(); |
| 172 | + while(allowed.test(c)) { |
| 173 | + chars = chars + c; |
| 174 | + c = buffer.char(); |
| 175 | + } |
| 176 | + |
| 177 | + var charAsInt = parseInt(chars, radix); |
| 178 | + |
| 179 | + if(charAsInt == 13) { |
| 180 | + this.parse_error("incorrect-cr-newline-entity"); |
| 181 | + charAsInt = 10; |
| 182 | + } else if(charAsInt >= 128 && charAsInt <= 159) { |
| 183 | + this.parse_error("illegal-windows-1252-entity"); |
| 184 | + charAsInt = HTML5.ENTITIES_WINDOWS1252[charAsInt - 128]; |
| 185 | + } |
| 186 | + |
| 187 | + if(0 < charAsInt && charAsInt <= 1114111 && !(55296 <= charAsInt && charAsInt <= 57343)) { |
| 188 | + char = String.fromCharCode(charAsInt); |
| 189 | + } else { |
| 190 | + char = String.fromCharCode(0xFFFD); |
| 191 | + this.parse_error("cant-convert-numeric-entity"); |
| 192 | + } |
| 193 | + |
| 194 | + if(c != ';') { |
| 195 | + this.parse_error("numeric-entity-without-semicolon"); |
| 196 | + buffer.unget(c); |
| 197 | + } |
| 198 | + |
| 199 | + return char; |
| 200 | +} |
| 201 | + |
| 202 | +t.prototype.process_entity_in_attribute = function(buffer) { |
| 203 | + var entity = this.consume_entity(buffer); |
| 204 | + if(entity) { |
| 205 | + this.current_token.data.last().nodeValue += entity; |
| 206 | + } else { |
| 207 | + this.current_token.data.last().nodeValue += '&'; |
| 208 | + } |
| 209 | +} |
| 210 | + |
| 211 | +t.prototype.process_solidus_in_tag = function(buffer) { |
| 212 | + var data = buffer.peek(1); |
| 213 | + if(this.current_token.type == 'StartTag' && data == '>') { |
| 214 | + this.current_token.type = 'EmptyTag'; |
| 215 | + return true; |
| 216 | + } else { |
| 217 | + this.parse_error("incorrectly-placed-solidus"); |
| 218 | + return false; |
| 219 | + } |
| 220 | +} |
| 221 | + |
| 222 | +t.prototype.data_state = function(buffer) { |
| 223 | + var c = buffer.char() |
| 224 | + if(c != HTML5.EOF && this.content_model == Models.CDATA || this.content_model == Models.RCDATA || this.content_model == Models.SCRIPT_CDATA) { |
| 225 | + this.lastFourChars += c; |
| 226 | + if(this.lastFourChars.length >= 4) { |
| 227 | + this.lastFourChars = this.lastFourChars.substr(-4) |
| 228 | + } |
| 229 | + } |
| 230 | + |
| 231 | + if (this.content_model == Models.SCRIPT_CDATA) { |
| 232 | + if (this.script_buffer == null) { |
| 233 | + this.script_buffer = ''; |
| 234 | + } |
| 235 | + } |
| 236 | + |
| 237 | + if(c == HTML5.EOF) { |
| 238 | + this.emitToken(HTML5.EOF_TOK); |
| 239 | + this.commit(); |
| 240 | + return false; |
| 241 | + } else if(c == '&' && (this.content_model == Models.PCDATA || this.content_model == Models.RCDATA) && !this.escapeFlag) { |
| 242 | + this.state = 'entity_data_state'; |
| 243 | + } else if(c == '-' && (this.content_model == Models.CDATA || this.content_model == Models.RCDATA || this.content_model == Models.SCRIPT_CDATA) && !this.escapeFlag && this.lastFourChars == '<!--') { |
| 244 | + this.escapeFlag = true; |
| 245 | + this.emitToken({type: 'Characters', data: c}); |
| 246 | + this.commit(); |
| 247 | + } else if(c == '<' && !this.escapeFlag && (this.content_model == Models.PCDATA || this.content_model == Models.RCDATA || this.content_model == Models.CDATA || this.content_model == Models.SCRIPT_CDATA)) { |
| 248 | + this.state = 'tag_open_state'; |
| 249 | + } else if(c == '>' && this.escapeFlag && (this.content_model == Models.CDATA || this.content_model == Models.RCDATA || this.content_model == Models.SCRIPT_CDATA) && this.lastFourChars.match(/-->$/)) { |
| 250 | + this.escapeFlag = false; |
| 251 | + this.emitToken({type: 'Characters', data: c}); |
| 252 | + this.commit(); |
| 253 | + } else if(HTML5.SPACE_CHARACTERS_R.test(c)) { |
| 254 | + this.emitToken({type: 'SpaceCharacters', data: c + buffer.matchWhile(HTML5.SPACE_CHARACTERS)}); |
| 255 | + this.commit(); |
| 256 | + } else { |
| 257 | + var o = buffer.matchUntil("[&<>-]") |
| 258 | + this.emitToken({type: 'Characters', data: c + o}); |
| 259 | + this.lastFourChars += c+o |
| 260 | + this.lastFourChars = this.lastFourChars.slice(-4) |
| 261 | + this.commit(); |
| 262 | + } |
| 263 | + return true; |
| 264 | +} |
| 265 | + |
| 266 | +t.prototype.entity_data_state = function(buffer) { |
| 267 | + var entity = this.consume_entity(buffer); |
| 268 | + if(entity) { |
| 269 | + this.emitToken({type: 'Characters', data: entity}); |
| 270 | + } else { |
| 271 | + this.emitToken({type: 'Characters', data: '&'}); |
| 272 | + } |
| 273 | + this.state = 'data_state'; |
| 274 | + return true; |
| 275 | +} |
| 276 | + |
| 277 | +t.prototype.tag_open_state = function(buffer) { |
| 278 | + var data = buffer.char(); |
| 279 | + if(this.content_model == Models.PCDATA) { |
| 280 | + if(data == '!') { |
| 281 | + this.state = 'markup_declaration_open_state'; |
| 282 | + } else if (data == '/') { |
| 283 | + this.state = 'close_tag_open_state'; |
| 284 | + } else if (data != HTML5.EOF && HTML5.ASCII_LETTERS_R.test(data)) { |
| 285 | + this.current_token = {type: 'StartTag', name: data, data: []}; |
| 286 | + this.state = 'tag_name_state'; |
| 287 | + } else if (data == '>') { |
| 288 | + // XXX In theory it could be something besides a tag name. But |
| 289 | + // do we really care? |
| 290 | + this.parse_error("expected-tag-name-but-got-right-bracket"); |
| 291 | + this.emitToken({type: 'Characters', data: "<>"}); |
| 292 | + this.state = 'data_state'; |
| 293 | + } else if (data == '?') { |
| 294 | + // XXX In theory it could be something besides a tag name. But |
| 295 | + // do we really care? |
| 296 | + this.parse_error("expected-tag-name-but-got-question-mark"); |
| 297 | + buffer.unget(data); |
| 298 | + this.state = 'bogus_comment_state'; |
| 299 | + } else { |
| 300 | + // XXX |
| 301 | + this.parse_error("expected-tag-name"); |
| 302 | + this.emitToken({type: 'Characters', data: "<"}); |
| 303 | + buffer.unget(data); |
| 304 | + this.state = 'data_state'; |
| 305 | + } |
| 306 | + } else { |
| 307 | + // We know the content model flag is set to either RCDATA or CDATA or SCRIPT_CDATA |
| 308 | + // now because this state can never be entered with the PLAINTEXT |
| 309 | + // flag. |
| 310 | + if (data == '/') { |
| 311 | + this.state = 'close_tag_open_state'; |
| 312 | + } else { |
| 313 | + this.emitToken({type: 'Characters', data: "<"}); |
| 314 | + buffer.unget(data); |
| 315 | + this.state = 'data_state'; |
| 316 | + } |
| 317 | + } |
| 318 | + return true |
| 319 | +} |
| 320 | + |
| 321 | +t.prototype.close_tag_open_state = function(buffer) { |
| 322 | + if(this.content_model == Models.RCDATA || this.content_model == Models.CDATA || this.content_model == Models.SCRIPT_CDATA) { |
| 323 | + var chars = ''; |
| 324 | + if(this.current_token) { |
| 325 | + for(var i = 0; i <= this.current_token.name.length; i++) { |
| 326 | + var c = buffer.char(); |
| 327 | + if(c == HTML5.EOF) break; |
| 328 | + chars += c; |
| 329 | + } |
| 330 | + buffer.unget(chars); |
| 331 | + } |
| 332 | + |
| 333 | + if(this.current_token |
| 334 | + && this.current_token.name.toLowerCase() == chars.slice(0, this.current_token.name.length).toLowerCase() |
| 335 | + && (chars.length > this.current_token.name.length ? new RegExp('[' + HTML5.SPACE_CHARACTERS_IN + '></\0]').test(chars.substr(-1)) : true) |
| 336 | + ) { |
| 337 | + this.content_model = Models.PCDATA; |
| 338 | + } else { |
| 339 | + this.emitToken({type: 'Characters', data: '</'}); |
| 340 | + this.state = 'data_state'; |
| 341 | + return true |
| 342 | + } |
| 343 | + } |
| 344 | + |
| 345 | + data = buffer.char() |
| 346 | + if (data == HTML5.EOF) { |
| 347 | + this.parse_error("expected-closing-tag-but-got-eof"); |
| 348 | + this.emitToken({type: 'Characters', data: '</'}); |
| 349 | + buffer.unget(data); |
| 350 | + this.state = 'data_state' |
| 351 | + } else if (HTML5.ASCII_LETTERS_R.test(data)) { |
| 352 | + this.current_token = {type: 'EndTag', name: data, data: []} |
| 353 | + this.state = 'tag_name_state'; |
| 354 | + } else if (data == '>') { |
| 355 | + this.parse_error("expected-closing-tag-but-got-right-bracket"); |
| 356 | + this.state = 'data_state'; |
| 357 | + } else { |
| 358 | + this.parse_error("expected-closing-tag-but-got-char", {data: data}); // param 1 is datavars: |
| 359 | + buffer.unget(data); |
| 360 | + this.state = 'bogus_comment_state'; |
| 361 | + } |
| 362 | + return true; |
| 363 | +} |
| 364 | + |
| 365 | +t.prototype.tag_name_state = function(buffer) { |
| 366 | + data = buffer.char(); |
| 367 | + if(data == HTML5.EOF) { |
| 368 | + this.parse_error('eof-in-tag-name'); |
| 369 | + this.emit_current_token(); |
| 370 | + } else if(HTML5.SPACE_CHARACTERS_R.test(data)) { |
| 371 | + this.state = 'before_attribute_name_state'; |
| 372 | + } else if(HTML5.ASCII_LETTERS_R.test(data)) { |
| 373 | + this.current_token.name += data + buffer.matchWhile(HTML5.ASCII_LETTERS); |
| 374 | + } else if(data == '>') { |
| 375 | + this.emit_current_token(); |
| 376 | + } else if(data == '/') { |
| 377 | + this.process_solidus_in_tag(buffer) |
| 378 | + this.state = 'self_closing_tag_state'; |
| 379 | + } else { |
| 380 | + this.current_token.name += data; |
| 381 | + } |
| 382 | + this.commit(); |
| 383 | + |
| 384 | + return true; |
| 385 | +} |
| 386 | + |
| 387 | +t.prototype.before_attribute_name_state = function(buffer) { |
| 388 | + var data = buffer.shift(1); |
| 389 | + if(HTML5.SPACE_CHARACTERS_R.test(data)) { |
| 390 | + buffer.matchWhile(HTML5.SPACE_CHARACTERS); |
| 391 | + } else if (data == HTML5.EOF) { |
| 392 | + this.parse_error("expected-attribute-name-but-got-eof"); |
| 393 | + this.emit_current_token(); |
| 394 | + } else if (HTML5.ASCII_LETTERS_R.test(data)) { |
| 395 | + this.current_token.data.push({nodeName: data, nodeValue: ""}); |
| 396 | + this.state = 'attribute_name_state'; |
| 397 | + } else if(data == '>') { |
| 398 | + this.emit_current_token(); |
| 399 | + } else if(data == '/') { |
| 400 | + this.state = 'self_closing_tag_state'; |
| 401 | + } else if(data == "'" || data == '"' || data == '=') { |
| 402 | + this.parse_error("invalid-character-in-attribute-name"); |
| 403 | + this.current_token.data.push({nodeName: data, nodeValue: ""}); |
| 404 | + this.state = 'attribute_name_state'; |
| 405 | + } else { |
| 406 | + this.current_token.data.push({nodeName: data, nodeValue: ""}); |
| 407 | + this.state = 'attribute_name_state'; |
| 408 | + } |
| 409 | + return true; |
| 410 | +} |
| 411 | + |
| 412 | +t.prototype.attribute_name_state = function(buffer) { |
| 413 | + var data = buffer.shift(1); |
| 414 | + var leavingThisState = true; |
| 415 | + var emitToken = false; |
| 416 | + if(data == '=') { |
| 417 | + this.state = 'before_attribute_value_state'; |
| 418 | + } else if(data == HTML5.EOF) { |
| 419 | + this.parse_error("eof-in-attribute-name"); |
| 420 | + this.state = 'data_state'; |
| 421 | + emitToken = true; |
| 422 | + } else if(HTML5.ASCII_LETTERS_R.test(data)) { |
| 423 | + this.current_token.data.last().nodeName += data + buffer.matchWhile(HTML5.ASCII_LETTERS); |
| 424 | + leavingThisState = false; |
| 425 | + } else if(data == '>') { |
| 426 | + // XXX If we emit here the attributes are converted to a dict |
| 427 | + // without being checked and when the code below runs we error |
| 428 | + // because data is a dict not a list |
| 429 | + emitToken = true; |
| 430 | + } else if(HTML5.SPACE_CHARACTERS_R.test(data)) { |
| 431 | + this.state = 'after_attribute_name_state'; |
| 432 | + } else if(data == '/') { |
| 433 | + if(!this.process_solidus_in_tag(buffer)) { |
| 434 | + this.state = 'before_attribute_name_state'; |
| 435 | + } |
| 436 | + } else if(data == "'" || data == '"') { |
| 437 | + this.parse_error("invalid-character-in-attribute-name"); |
| 438 | + this.current_token.data.last().nodeName += data; |
| 439 | + leavingThisState = false; |
| 440 | + } else { |
| 441 | + this.current_token.data.last().nodeName += data; |
| 442 | + leavingThisState = false; |
| 443 | + } |
| 444 | + |
| 445 | + if(leavingThisState) { |
| 446 | + // Attributes are not dropped at this stage. That happens when the |
| 447 | + // start tag token is emitted so values can still be safely appended |
| 448 | + // to attributes, but we do want to report the parse error in time. |
| 449 | + if(this.lowercase_attr_name) { |
| 450 | + this.current_token.data.last().nodeName = this.current_token.data.last().nodeName.toLowerCase(); |
| 451 | + } |
| 452 | + for (k in this.current_token.data.slice(0, -1)) { |
| 453 | + // FIXME this is a fucking mess. |
| 454 | + if(this.current_token.data.slice(-1)[0] == this.current_token.data.slice(0, -1)[k].name) { |
| 455 | + this.parse_error("duplicate-attribute"); |
| 456 | + break; // Don't emit more than one of these errors |
| 457 | + } |
| 458 | + } |
| 459 | + if(emitToken) this.emit_current_token(); |
| 460 | + } else { |
| 461 | + this.commit() |
| 462 | + } |
| 463 | + return true; |
| 464 | +} |
| 465 | + |
| 466 | +t.prototype.after_attribute_name_state = function(buffer) { |
| 467 | + var data = buffer.shift(1); |
| 468 | + if(HTML5.SPACE_CHARACTERS_R.test(data)) { |
| 469 | + buffer.matchWhile(HTML5.SPACE_CHARACTERS); |
| 470 | + } else if(data == '=') { |
| 471 | + this.state = 'before_attribute_value_state'; |
| 472 | + } else if(data == '>') { |
| 473 | + this.emit_current_token(); |
| 474 | + } else if(data == HTML5.EOF) { |
| 475 | + this.parse_error("expected-end-of-tag-but-got-eof"); |
| 476 | + this.emit_current_token(); |
| 477 | + } else if(HTML5.ASCII_LETTERS_R.test(data)) { |
| 478 | + this.current_token.data.push({nodeName: data, nodeValue: ""}); |
| 479 | + this.state = 'attribute_name_state'; |
| 480 | + } else if(data == '/') { |
| 481 | + this.state = 'self_closing_tag_state'; |
| 482 | + } else { |
| 483 | + this.current_token.data.push({nodeName: data, nodeValue: ""}); |
| 484 | + this.state = 'attribute_name_state'; |
| 485 | + } |
| 486 | + return true; |
| 487 | +} |
| 488 | + |
| 489 | +t.prototype.before_attribute_value_state = function(buffer) { |
| 490 | + var data = buffer.shift(1); |
| 491 | + if(HTML5.SPACE_CHARACTERS_R.test(data)) { |
| 492 | + buffer.matchWhile(HTML5.SPACE_CHARACTERS); |
| 493 | + } else if(data == '"') { |
| 494 | + this.state = 'attribute_value_double_quoted_state'; |
| 495 | + } else if(data == '&') { |
| 496 | + this.state = 'attribute_value_unquoted_state'; |
| 497 | + buffer.unget(data); |
| 498 | + } else if(data == "'") { |
| 499 | + this.state = 'attribute_value_single_quoted_state'; |
| 500 | + } else if(data == '>') { |
| 501 | + this.emit_current_token(); |
| 502 | + } else if(data == '=') { |
| 503 | + this.parse_error("equals-in-unquoted-attribute-value"); |
| 504 | + this.current_token.data.last().nodeValue += data; |
| 505 | + this.state = 'attribute_value_unquoted_state'; |
| 506 | + } else if(data == HTML5.EOF) { |
| 507 | + this.parse_error("expected-attribute-value-but-got-eof"); |
| 508 | + this.emit_current_token(); |
| 509 | + this.state = 'attribute_value_unquoted_state'; |
| 510 | + } else { |
| 511 | + this.current_token.data.last().nodeValue += data |
| 512 | + this.state = 'attribute_value_unquoted_state' |
| 513 | + } |
| 514 | + |
| 515 | + return true; |
| 516 | +} |
| 517 | + |
| 518 | +t.prototype.attribute_value_double_quoted_state = function(buffer) { |
| 519 | + var data = buffer.shift(1); |
| 520 | + if(data == '"') { |
| 521 | + this.state = 'after_attribute_value_state'; |
| 522 | + } else if(data == '&') { |
| 523 | + this.process_entity_in_attribute(buffer); |
| 524 | + } else if(data == HTML5.EOF) { |
| 525 | + this.parse_error("eof-in-attribute-value-double-quote"); |
| 526 | + this.emit_current_token(); |
| 527 | + } else { |
| 528 | + this.current_token.data.last().nodeValue += data + buffer.matchUntil('["&]'); |
| 529 | + } |
| 530 | + return true; |
| 531 | +} |
| 532 | + |
| 533 | +t.prototype.attribute_value_single_quoted_state = function(buffer) { |
| 534 | + var data = buffer.shift(1); |
| 535 | + if(data == "'") { |
| 536 | + this.state = 'after_attribute_value_state'; |
| 537 | + } else if(data == '&') { |
| 538 | + this.process_entity_in_attribute(buffer); |
| 539 | + } else if(data == HTML5.EOF) { |
| 540 | + this.parse_error("eof-in-attribute-value-single-quote"); |
| 541 | + this.emit_current_token(); |
| 542 | + } else { |
| 543 | + this.current_token.data.last().nodeValue += data + buffer.matchUntil("['&]"); |
| 544 | + } |
| 545 | + return true; |
| 546 | +} |
| 547 | + |
| 548 | +t.prototype.attribute_value_unquoted_state = function(buffer) { |
| 549 | + var data = buffer.shift(1); |
| 550 | + if(HTML5.SPACE_CHARACTERS_R.test(data)) { |
| 551 | + this.state = 'before_attribute_name_state'; |
| 552 | + } else if(data == '&') { |
| 553 | + this.process_entity_in_attribute(buffer); |
| 554 | + } else if(data == '>') { |
| 555 | + this.emit_current_token(); |
| 556 | + } else if(data == '"' || data == "'" || data == '=') { |
| 557 | + this.parse_error("unexpected-character-in-unquoted-attribute-value"); |
| 558 | + this.current_token.data.last().nodeValue += data; |
| 559 | + } else if(data == HTML5.EOF) { |
| 560 | + this.parse_error("eof-in-attribute-value-no-quotes"); |
| 561 | + this.emit_current_token(); |
| 562 | + } else { |
| 563 | + var o = buffer.matchUntil("["+ HTML5.SPACE_CHARACTERS_IN + '&<>' +"]") |
| 564 | + this.current_token.data.last().nodeValue += data + o |
| 565 | + } |
| 566 | + return true; |
| 567 | +} |
| 568 | + |
| 569 | +t.prototype.after_attribute_value_state = function(buffer) { |
| 570 | + var data = buffer.shift(1); |
| 571 | + if(HTML5.SPACE_CHARACTERS_R.test(data)) { |
| 572 | + this.state = 'before_attribute_name_state'; |
| 573 | + } else if(data == '>') { |
| 574 | + this.emit_current_token(); |
| 575 | + this.state = 'data_state'; |
| 576 | + } else if(data == '/') { |
| 577 | + this.state = 'self_closing_tag_state'; |
| 578 | + } else if(data == HTML5.EOF) { |
| 579 | + this.parse_error( "unexpected-EOF-after-attribute-value"); |
| 580 | + this.emit_current_token(); |
| 581 | + buffer.unget(data); |
| 582 | + this.state = 'data_state'; |
| 583 | + } else { |
| 584 | + this.emitToken({type: 'ParseError', data: "unexpected-character-after-attribute-value"}); |
| 585 | + buffer.unget(data); |
| 586 | + this.state = 'before_attribute_name_state'; |
| 587 | + } |
| 588 | + return true; |
| 589 | +} |
| 590 | + |
| 591 | +t.prototype.self_closing_tag_state = function(buffer) { |
| 592 | + var c = buffer.shift(1); |
| 593 | + if(c == '>') { |
| 594 | + this.current_token.self_closing = true; |
| 595 | + this.emit_current_token(); |
| 596 | + this.state = 'data_state'; |
| 597 | + } else if(c == HTML5.EOF) { |
| 598 | + this.parse_error("eof-in-tag-name"); |
| 599 | + buffer.unget(c); |
| 600 | + this.state = 'data_state'; |
| 601 | + } else { |
| 602 | + this.parse_error("expected-self-closing-tag"); |
| 603 | + buffer.unget(c); |
| 604 | + this.state = 'before_attribute_name_state'; |
| 605 | + } |
| 606 | + return true; |
| 607 | +} |
| 608 | + |
| 609 | +t.prototype.bogus_comment_state = function(buffer) { |
| 610 | + var tok = {type: 'Comment', data: buffer.matchUntil('>')} |
| 611 | + buffer.char() |
| 612 | + this.emitToken(tok); |
| 613 | + this.state = 'data_state'; |
| 614 | + return true; |
| 615 | +} |
| 616 | + |
| 617 | +t.prototype.markup_declaration_open_state = function(buffer) { |
| 618 | + var chars = buffer.shift(2); |
| 619 | + if(chars == '--') { |
| 620 | + this.current_token = {type: 'Comment', data: ''}; |
| 621 | + this.state = 'comment_start_state'; |
| 622 | + } else { |
| 623 | + var newchars = buffer.shift(5); |
| 624 | + if(newchars == HTML5.EOF || chars == HTML5.EOF) { |
| 625 | + this.parse_error("expected-dashes-or-doctype"); |
| 626 | + this.state = 'bogus_comment_state' |
| 627 | + if(chars != HTML5.EOF) buffer.unget(chars); |
| 628 | + return true; |
| 629 | + } |
| 630 | + |
| 631 | + // Check for EOF better -- FIXME |
| 632 | + chars += newchars; |
| 633 | + if(chars.toUpperCase() == 'DOCTYPE') { |
| 634 | + this.current_token = {type: 'Doctype', name: '', publicId: null, systemId: null, correct: true}; |
| 635 | + this.state = 'doctype_state'; |
| 636 | + } else { |
| 637 | + this.parse_error("expected-dashes-or-doctype"); |
| 638 | + buffer.unget(chars); |
| 639 | + this.state = 'bogus_comment_state'; |
| 640 | + } |
| 641 | + } |
| 642 | + return true; |
| 643 | +} |
| 644 | + |
| 645 | +t.prototype.comment_start_state = function(buffer) { |
| 646 | + var data = buffer.shift(1); |
| 647 | + if(data == '-') { |
| 648 | + this.state = 'comment_start_dash_state'; |
| 649 | + } else if(data == '>') { |
| 650 | + this.parse_error("incorrect comment"); |
| 651 | + this.emitToken(this.current_token); |
| 652 | + this.state = 'data_state'; |
| 653 | + } else if(data == HTML5.EOF) { |
| 654 | + this.parse_error("eof-in-comment"); |
| 655 | + this.emitToken(this.current_token); |
| 656 | + this.state = 'data_state'; |
| 657 | + } else { |
| 658 | + this.current_token.data += data + buffer.matchUntil('-'); |
| 659 | + this.state = 'comment_state'; |
| 660 | + } |
| 661 | + return true; |
| 662 | +} |
| 663 | + |
| 664 | +t.prototype.comment_start_dash_state = function(buffer) { |
| 665 | + var data = buffer.shift(1); |
| 666 | + if(data == '-') { |
| 667 | + this.state = 'comment_end_state' |
| 668 | + } else if(data == '>') { |
| 669 | + this.parse_error("incorrect-comment"); |
| 670 | + this.emitToken(this.current_token); |
| 671 | + this.state = 'data_state'; |
| 672 | + } else if(data == HTML5.EOF) { |
| 673 | + this.parse_error("eof-in-comment"); |
| 674 | + this.emitToken(this.current_token); |
| 675 | + this.state = 'data_state'; |
| 676 | + } else { |
| 677 | + this.current_token.data += '-' + data + buffer.matchUntil('-'); |
| 678 | + this.state = 'comment_state'; |
| 679 | + } |
| 680 | + return true; |
| 681 | +} |
| 682 | + |
| 683 | +t.prototype.comment_state = function(buffer) { |
| 684 | + var data = buffer.shift(1); |
| 685 | + if(data == '-') { |
| 686 | + this.state = 'comment_end_dash_state'; |
| 687 | + } else if(data == HTML5.EOF) { |
| 688 | + this.parse_error("eof-in-comment"); |
| 689 | + this.emitToken(this.current_token); |
| 690 | + this.state = 'data_state'; |
| 691 | + } else { |
| 692 | + this.current_token.data += data + buffer.matchUntil('-'); |
| 693 | + } |
| 694 | + return true; |
| 695 | +} |
| 696 | + |
| 697 | +t.prototype.comment_end_dash_state = function(buffer) { |
| 698 | + var data = buffer.char(); |
| 699 | + if(data == '-') { |
| 700 | + this.state = 'comment_end_state'; |
| 701 | + } else if (data == HTML5.EOF) { |
| 702 | + this.parse_error("eof-in-comment-end-dash"); |
| 703 | + this.emitToken(this.current_token); |
| 704 | + this.state = 'data_state'; |
| 705 | + } else { |
| 706 | + this.current_token.data += '-' + data + buffer.matchUntil('-'); |
| 707 | + // Consume the next character which is either a "-" or an :EOF as |
| 708 | + // well so if there's a "-" directly after the "-" we go nicely to |
| 709 | + // the "comment end state" without emitting a ParseError there. |
| 710 | + buffer.char(); |
| 711 | + } |
| 712 | + return true; |
| 713 | +} |
| 714 | + |
| 715 | +t.prototype.comment_end_state = function(buffer) { |
| 716 | + var data = buffer.shift(1); |
| 717 | + if(data == '>') { |
| 718 | + this.emitToken(this.current_token); |
| 719 | + this.state = 'data_state'; |
| 720 | + } else if(data == '-') { |
| 721 | + this.parse_error("unexpected-dash-after-double-dash-in-comment"); |
| 722 | + this.current_token.data += data; |
| 723 | + } else if (data == HTML5.EOF) { |
| 724 | + this.parse_error("eof-in-comment-double-dash"); |
| 725 | + this.emitToken(this.current_token); |
| 726 | + this.state = 'data_state'; |
| 727 | + } else { |
| 728 | + // XXX |
| 729 | + this.parse_error("unexpected-char-in-comment"); |
| 730 | + this.current_token.data += '--' + data; |
| 731 | + this.state = 'comment_state'; |
| 732 | + } |
| 733 | + return true; |
| 734 | +} |
| 735 | + |
| 736 | +t.prototype.doctype_state = function(buffer) { |
| 737 | + var data = buffer.shift(1); |
| 738 | + if(HTML5.SPACE_CHARACTERS_R.test(data)) { |
| 739 | + this.state = 'before_doctype_name_state'; |
| 740 | + } else { |
| 741 | + this.parse_error("need-space-after-doctype"); |
| 742 | + buffer.unget(data); |
| 743 | + this.state = 'before_doctype_name_state'; |
| 744 | + } |
| 745 | + return true; |
| 746 | +} |
| 747 | + |
| 748 | +t.prototype.before_doctype_name_state = function(buffer) { |
| 749 | + var data = buffer.shift(1); |
| 750 | + if(HTML5.SPACE_CHARACTERS_R.test(data)) { |
| 751 | + } else if(data == '>') { |
| 752 | + this.parse_error("expected-doctype-name-but-got-right-bracket"); |
| 753 | + this.current_token.correct = false; |
| 754 | + this.emit_current_token(); |
| 755 | + this.state = 'data_state'; |
| 756 | + } else if(data == HTML5.EOF) { |
| 757 | + this.parse_error("expected-doctype-name-but-got-eof"); |
| 758 | + this.current_token.correct = false; |
| 759 | + this.emit_current_token(); |
| 760 | + this.state = 'data_state'; |
| 761 | + } else { |
| 762 | + this.current_token.name = data; |
| 763 | + this.state = 'doctype_name_state'; |
| 764 | + } |
| 765 | + return true |
| 766 | +} |
| 767 | + |
| 768 | +t.prototype.doctype_name_state = function(buffer) { |
| 769 | + var data = buffer.shift(1); |
| 770 | + if(HTML5.SPACE_CHARACTERS_R.test(data)) { |
| 771 | + this.state = 'bogus_doctype_state'; |
| 772 | + } else if(data == '>') { |
| 773 | + this.emit_current_token(); |
| 774 | + this.state = 'data_state'; |
| 775 | + } else if(data == HTML5.EOF) { |
| 776 | + this.current_token.correct = false; |
| 777 | + buffer.unget(data); |
| 778 | + this.parse_error("eof-in-doctype"); |
| 779 | + this.emit_current_token(); |
| 780 | + this.state = 'data_state'; |
| 781 | + } else { |
| 782 | + this.current_token.name += data; |
| 783 | + } |
| 784 | + return true; |
| 785 | +} |
| 786 | +/* |
| 787 | + data += buffer.shift(5); |
| 788 | + var token = data.toLowerCase(); |
| 789 | + if(token == 'public') { |
| 790 | + this.state = 'before_doctype_public_identifier_state'; |
| 791 | + } else if(token == 'system') { |
| 792 | + this.state = 'before_doctype_system_identifier_state'; |
| 793 | + } else { |
| 794 | + buffer.unget(data); |
| 795 | + this.parse_error("expected-space-or-right-bracket-in-doctype", {data: data}); |
| 796 | + this.state = 'bogus_doctype_state'; |
| 797 | + } |
| 798 | + } |
| 799 | + return true |
| 800 | +} |
| 801 | +*/ |
| 802 | + |
| 803 | +t.prototype.bogus_doctype_state = function(buffer) { |
| 804 | + var data = buffer.shift(1); |
| 805 | + this.current_token.correct = false; |
| 806 | + if(data == '>') { |
| 807 | + this.emit_current_token(); |
| 808 | + this.state = 'data_state'; |
| 809 | + } else if(data == HTML5.EOF) { |
| 810 | + throw(new Error("Unimplemented!")) |
| 811 | + } |
| 812 | + return true; |
| 813 | +} |
| 814 | + |
| 815 | +t.prototype.parse_error = function(message) { |
| 816 | + this.emitToken({type: 'ParseError', data: message}); |
| 817 | +} |
| 818 | + |
| 819 | +t.prototype.emit_current_token = function() { |
| 820 | + var tok = this.current_token; |
| 821 | + switch(tok.type) { |
| 822 | + case 'StartTag': |
| 823 | + case 'EndTag': |
| 824 | + case 'EmptyTag': |
| 825 | + if(tok.type == 'EndTag' && tok.self_closing) { |
| 826 | + this.parse_error('self-closing-end-tag'); |
| 827 | + } |
| 828 | + break; |
| 829 | + } |
| 830 | + if (this.current_token.name == "script" && tok.type == 'EndTag') { |
| 831 | + this.emitToken({ type: 'Characters', data: this.script_buffer }); |
| 832 | + this.script_buffer = null; |
| 833 | + } |
| 834 | + this.emitToken(tok); |
| 835 | + this.state = 'data_state'; |
| 836 | +} |
| 837 | + |
| 838 | +t.prototype.normalize_token = function(token) { |
| 839 | + if(token.type == 'EmptyTag') { |
| 840 | + if(HTML5.VOID_ELEMENTS.indexOf(token.name) == -1) { |
| 841 | + this.parse_error('incorrectly-placed-solidus'); |
| 842 | + } |
| 843 | + token.type = 'StartTag'; |
| 844 | + } |
| 845 | + |
| 846 | + if(token.type == 'StartTag') { |
| 847 | + token.name = token.name.toLowerCase(); |
| 848 | + if(token.data.length != 0) { |
| 849 | + var data = {}; |
| 850 | + token.data.reverse(); |
| 851 | + token.data.forEach(function(e) { |
| 852 | + data[e.nodeName.toLowerCase()] = e.nodeValue; |
| 853 | + }); |
| 854 | + token.data = []; |
| 855 | + for(var k in data) { |
| 856 | + token.data.push({nodeName: k, nodeValue: data[k]}); |
| 857 | + } |
| 858 | + } |
| 859 | + } else if(token.type == 'EndTag') { |
| 860 | + if(token.data.length != 0) this.parse_error('attributes-in-end-tag'); |
| 861 | + token.name = token.name.toLowerCase(); |
| 862 | + } |
| 863 | + |
| 864 | + return token; |
| 865 | +} |
| 866 | + |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/tokenizer.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 867 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/constants.js |
— | — | @@ -0,0 +1,1101 @@ |
| 2 | +var HTML5 = require('../html5'); |
| 3 | + |
| 4 | +HTML5.CONTENT_MODEL_FLAGS = [ |
| 5 | + 'PCDATA', |
| 6 | + 'RCDATA', |
| 7 | + 'CDATA', |
| 8 | + 'SCRIPT_CDATA', |
| 9 | + 'PLAINTEXT' |
| 10 | +]; |
| 11 | + |
| 12 | +HTML5.Marker = {type: 'Marker', data: 'this is a marker token'}; |
| 13 | + |
| 14 | +HTML5.EOF = -1 |
| 15 | +HTML5.EOF_TOK = {type: 'EOF', data: 'End of File' }; |
| 16 | +HTML5.DRAIN = -2; |
| 17 | + |
| 18 | +HTML5.SCOPING_ELEMENTS = [ |
| 19 | + 'applet', |
| 20 | + 'button', |
| 21 | + 'caption', |
| 22 | + 'html', |
| 23 | + 'marquee', |
| 24 | + 'object', |
| 25 | + 'table', |
| 26 | + 'td', |
| 27 | + 'th' |
| 28 | +]; |
| 29 | +HTML5.FORMATTING_ELEMENTS = [ |
| 30 | + 'a', |
| 31 | + 'b', |
| 32 | + 'big', |
| 33 | + 'code', |
| 34 | + 'em', |
| 35 | + 'font', |
| 36 | + 'i', |
| 37 | + 'nobr', |
| 38 | + 's', |
| 39 | + 'small', |
| 40 | + 'strike', |
| 41 | + 'strong', |
| 42 | + 'tt', |
| 43 | + 'u' |
| 44 | +]; |
| 45 | +HTML5.SPECIAL_ELEMENTS = [ |
| 46 | + 'address', |
| 47 | + 'area', |
| 48 | + 'base', |
| 49 | + 'basefont', |
| 50 | + 'bgsound', |
| 51 | + 'blockquote', |
| 52 | + 'body', |
| 53 | + 'br', |
| 54 | + 'center', |
| 55 | + 'col', |
| 56 | + 'colgroup', |
| 57 | + 'dd', |
| 58 | + 'dir', |
| 59 | + 'div', |
| 60 | + 'dl', |
| 61 | + 'dt', |
| 62 | + 'embed', |
| 63 | + 'fieldset', |
| 64 | + 'form', |
| 65 | + 'frame', |
| 66 | + 'frameset', |
| 67 | + 'h1', |
| 68 | + 'h2', |
| 69 | + 'h3', |
| 70 | + 'h4', |
| 71 | + 'h5', |
| 72 | + 'h6', |
| 73 | + 'head', |
| 74 | + 'hr', |
| 75 | + 'iframe', |
| 76 | + 'image', |
| 77 | + 'img', |
| 78 | + 'input', |
| 79 | + 'isindex', |
| 80 | + 'li', |
| 81 | + 'link', |
| 82 | + 'listing', |
| 83 | + 'menu', |
| 84 | + 'meta', |
| 85 | + 'noembed', |
| 86 | + 'noframes', |
| 87 | + 'noscript', |
| 88 | + 'ol', |
| 89 | + 'optgroup', |
| 90 | + 'option', |
| 91 | + 'p', |
| 92 | + 'param', |
| 93 | + 'plaintext', |
| 94 | + 'pre', |
| 95 | + 'script', |
| 96 | + 'select', |
| 97 | + 'spacer', |
| 98 | + 'style', |
| 99 | + 'tbody', |
| 100 | + 'textarea', |
| 101 | + 'tfoot', |
| 102 | + 'thead', |
| 103 | + 'title', |
| 104 | + 'tr', |
| 105 | + 'ul', |
| 106 | + 'wbr' |
| 107 | +]; |
| 108 | +HTML5.SPACE_CHARACTERS_IN = "\t\n\x0B\x0C\x20\u0012\r"; |
| 109 | +HTML5.SPACE_CHARACTERS = "[\t\n\x0B\x0C\x20\r]"; |
| 110 | +HTML5.SPACE_CHARACTERS_R = /^[\t\n\x0B\x0C \r]/; |
| 111 | + |
| 112 | +HTML5.TABLE_INSERT_MODE_ELEMENTS = [ |
| 113 | + 'table', |
| 114 | + 'tbody', |
| 115 | + 'tfoot', |
| 116 | + 'thead', |
| 117 | + 'tr' |
| 118 | +]; |
| 119 | + |
| 120 | +HTML5.ASCII_LOWERCASE = 'abcdefghijklmnopqrstuvwxyz'; |
| 121 | +HTML5.ASCII_UPPERCASE = HTML5.ASCII_LOWERCASE.toUpperCase(); |
| 122 | +HTML5.ASCII_LETTERS = "[a-zA-Z]"; |
| 123 | +HTML5.ASCII_LETTERS_R = /^[a-zA-Z]/; |
| 124 | +HTML5.DIGITS = '0123456789'; |
| 125 | +HTML5.DIGITS_R = new RegExp('^[0123456789]'); |
| 126 | +HTML5.HEX_DIGITS = HTML5.DIGITS + 'abcdefABCDEF'; |
| 127 | +HTML5.HEX_DIGITS_R = new RegExp('^[' + HTML5.DIGITS + 'abcdefABCDEF' +']' ); |
| 128 | + |
| 129 | +// Heading elements need to be ordered |
| 130 | +HTML5.HEADING_ELEMENTS = [ |
| 131 | + 'h1', |
| 132 | + 'h2', |
| 133 | + 'h3', |
| 134 | + 'h4', |
| 135 | + 'h5', |
| 136 | + 'h6' |
| 137 | +]; |
| 138 | + |
| 139 | +HTML5.VOID_ELEMENTS = [ |
| 140 | + 'base', |
| 141 | + 'link', |
| 142 | + 'meta', |
| 143 | + 'hr', |
| 144 | + 'br', |
| 145 | + 'img', |
| 146 | + 'embed', |
| 147 | + 'param', |
| 148 | + 'area', |
| 149 | + 'col', |
| 150 | + 'input' |
| 151 | +]; |
| 152 | + |
| 153 | +HTML5.CDATA_ELEMENTS = [ |
| 154 | + 'title', |
| 155 | + 'textarea' |
| 156 | +]; |
| 157 | + |
| 158 | +HTML5.RCDATA_ELEMENTS = [ |
| 159 | + 'style', |
| 160 | + 'script', |
| 161 | + 'xmp', |
| 162 | + 'iframe', |
| 163 | + 'noembed', |
| 164 | + 'noframes', |
| 165 | + 'noscript' |
| 166 | +]; |
| 167 | + |
| 168 | +HTML5.BOOLEAN_ATTRIBUTES = { |
| 169 | + '_global': ['irrelevant'], |
| 170 | + // Fixme? |
| 171 | + 'style': ['scoped'], |
| 172 | + 'img': ['ismap'], |
| 173 | + 'audio': ['autoplay', 'controls'], |
| 174 | + 'video': ['autoplay', 'controls'], |
| 175 | + 'script': ['defer', 'async'], |
| 176 | + 'details': ['open'], |
| 177 | + 'datagrid': ['multiple', 'disabled'], |
| 178 | + 'command': ['hidden', 'disabled', 'checked', 'default'], |
| 179 | + 'menu': ['autosubmit'], |
| 180 | + 'fieldset': ['disabled', 'readonly'], |
| 181 | + 'option': ['disabled', 'readonly', 'selected'], |
| 182 | + 'optgroup': ['disabled', 'readonly'], |
| 183 | + 'button': ['disabled', 'autofocus'], |
| 184 | + 'input': ['disabled', 'readonly', 'required', 'autofocus', 'checked', 'ismap'], |
| 185 | + 'select': ['disabled', 'readonly', 'autofocus', 'multiple'], |
| 186 | + 'output': ['disabled', 'readonly'] |
| 187 | +} |
| 188 | + |
| 189 | +// entitiesWindows1252 has to be _ordered_ and needs to have an index. |
| 190 | +HTML5.ENTITIES_WINDOWS1252 = [ |
| 191 | + 8364, // 0x80 0x20AC EURO SIGN |
| 192 | + 65533, // 0x81 UNDEFINED |
| 193 | + 8218, // 0x82 0x201A SINGLE LOW-9 QUOTATION MARK |
| 194 | + 402, // 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK |
| 195 | + 8222, // 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK |
| 196 | + 8230, // 0x85 0x2026 HORIZONTAL ELLIPSIS |
| 197 | + 8224, // 0x86 0x2020 DAGGER |
| 198 | + 8225, // 0x87 0x2021 DOUBLE DAGGER |
| 199 | + 710, // 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT |
| 200 | + 8240, // 0x89 0x2030 PER MILLE SIGN |
| 201 | + 352, // 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON |
| 202 | + 8249, // 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK |
| 203 | + 338, // 0x8C 0x0152 LATIN CAPITAL LIGATURE OE |
| 204 | + 65533, // 0x8D UNDEFINED |
| 205 | + 381, // 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON |
| 206 | + 65533, // 0x8F UNDEFINED |
| 207 | + 65533, // 0x90 UNDEFINED |
| 208 | + 8216, // 0x91 0x2018 LEFT SINGLE QUOTATION MARK |
| 209 | + 8217, // 0x92 0x2019 RIGHT SINGLE QUOTATION MARK |
| 210 | + 8220, // 0x93 0x201C LEFT DOUBLE QUOTATION MARK |
| 211 | + 8221, // 0x94 0x201D RIGHT DOUBLE QUOTATION MARK |
| 212 | + 8226, // 0x95 0x2022 BULLET |
| 213 | + 8211, // 0x96 0x2013 EN DASH |
| 214 | + 8212, // 0x97 0x2014 EM DASH |
| 215 | + 732, // 0x98 0x02DC SMALL TILDE |
| 216 | + 8482, // 0x99 0x2122 TRADE MARK SIGN |
| 217 | + 353, // 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON |
| 218 | + 8250, // 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK |
| 219 | + 339, // 0x9C 0x0153 LATIN SMALL LIGATURE OE |
| 220 | + 65533, // 0x9D UNDEFINED |
| 221 | + 382, // 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON |
| 222 | + 376 // 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS |
| 223 | +]; |
| 224 | + |
| 225 | +HTML5.ENTITIES = { |
| 226 | + 'AElig': "\u00C6", |
| 227 | + 'AElig;': "\u00C6", |
| 228 | + 'AMP': '&', |
| 229 | + 'AMP;': '&', |
| 230 | + 'Aacute': "\u00C1", |
| 231 | + 'Aacute;': "\u00C1", |
| 232 | + 'Acirc': "\u00C2", |
| 233 | + 'Acirc;': "\u00C2", |
| 234 | + 'Agrave': "\u00C0", |
| 235 | + 'Agrave;': "\u00C0", |
| 236 | + 'Alpha;': "\u0391", |
| 237 | + 'Aring': "\u00C5", |
| 238 | + 'Aring;': "\u00C5", |
| 239 | + 'Atilde': "\u00C3", |
| 240 | + 'Atilde;': "\u00C3", |
| 241 | + 'Auml': "\u00C4", |
| 242 | + 'Auml;': "\u00C4", |
| 243 | + 'Beta;': "\u0392", |
| 244 | + 'COPY': "\u00A9", |
| 245 | + 'COPY;': "\u00A9", |
| 246 | + 'Ccedil': "\u00C7", |
| 247 | + 'Ccedil;': "\u00C7", |
| 248 | + 'Chi;': "\u03A7", |
| 249 | + 'Dagger;': "\u2021", |
| 250 | + 'Delta;': "\u0394", |
| 251 | + 'ETH': "\u00D0", |
| 252 | + 'ETH;': "\u00D0", |
| 253 | + 'Eacute': "\u00C9", |
| 254 | + 'Eacute;': "\u00C9", |
| 255 | + 'Ecirc': "\u00CA", |
| 256 | + 'Ecirc;': "\u00CA", |
| 257 | + 'Egrave': "\u00C8", |
| 258 | + 'Egrave;': "\u00C8", |
| 259 | + 'Epsilon;': "\u0395", |
| 260 | + 'Eta;': "\u0397", |
| 261 | + 'Euml': "\u00CB", |
| 262 | + 'Euml;': "\u00CB", |
| 263 | + 'GT': '>', |
| 264 | + 'GT;': '>', |
| 265 | + 'Gamma;': "\u0393", |
| 266 | + 'Iacute': "\u00CD", |
| 267 | + 'Iacute;': "\u00CD", |
| 268 | + 'Icirc': "\u00CE", |
| 269 | + 'Icirc;': "\u00CE", |
| 270 | + 'Igrave': "\u00CC", |
| 271 | + 'Igrave;': "\u00CC", |
| 272 | + 'Iota;': "\u0399", |
| 273 | + 'Iuml': "\u00CF", |
| 274 | + 'Iuml;': "\u00CF", |
| 275 | + 'Kappa;': "\u039A", |
| 276 | + 'LT': '<', |
| 277 | + 'LT;': '<', |
| 278 | + 'Lambda;': "\u039B", |
| 279 | + 'Mu;': "\u039C", |
| 280 | + 'Ntilde': "\u00D1", |
| 281 | + 'Ntilde;': "\u00D1", |
| 282 | + 'Nu;': "\u039D", |
| 283 | + 'OElig;': "\u0152", |
| 284 | + 'Oacute': "\u00D3", |
| 285 | + 'Oacute;': "\u00D3", |
| 286 | + 'Ocirc': "\u00D4", |
| 287 | + 'Ocirc;': "\u00D4", |
| 288 | + 'Ograve': "\u00D2", |
| 289 | + 'Ograve;': "\u00D2", |
| 290 | + 'Omega;': "\u03A9", |
| 291 | + 'Omicron;': "\u039F", |
| 292 | + 'Oslash': "\u00D8", |
| 293 | + 'Oslash;': "\u00D8", |
| 294 | + 'Otilde': "\u00D5", |
| 295 | + 'Otilde;': "\u00D5", |
| 296 | + 'Ouml': "\u00D6", |
| 297 | + 'Ouml;': "\u00D6", |
| 298 | + 'Phi;': "\u03A6", |
| 299 | + 'Pi;': "\u03A0", |
| 300 | + 'Prime;': "\u2033", |
| 301 | + 'Psi;': "\u03A8", |
| 302 | + 'QUOT': '"', |
| 303 | + 'QUOT;': '"', |
| 304 | + 'REG': "\u00AE", |
| 305 | + 'REG;': "\u00AE", |
| 306 | + 'Rho;': "\u03A1", |
| 307 | + 'Scaron;': "\u0160", |
| 308 | + 'Sigma;': "\u03A3", |
| 309 | + 'THORN': "\u00DE", |
| 310 | + 'THORN;': "\u00DE", |
| 311 | + 'TRADE;': "\u2122", |
| 312 | + 'Tau;': "\u03A4", |
| 313 | + 'Theta;': "\u0398", |
| 314 | + 'Uacute': "\u00DA", |
| 315 | + 'Ucirc': "\u00DB", |
| 316 | + 'Ucirc;': "\u00DB", |
| 317 | + 'Ugrave': "\u00D9", |
| 318 | + 'Ugrave;': "\u00D9", |
| 319 | + 'Upsilon;': "\u03A5", |
| 320 | + 'Uuml': "\u00DC", |
| 321 | + 'Uuml;': "\u00DC", |
| 322 | + 'Xi;': "\u039E", |
| 323 | + 'Yacute': "\u00DD", |
| 324 | + 'Yacute;': "\u00DD", |
| 325 | + 'Yuml;': "\u0178", |
| 326 | + 'Zeta;': "\u0396", |
| 327 | + 'aacute': "\u00E1", |
| 328 | + 'aacute;': "\u00E1", |
| 329 | + 'acirc': "\u00E2", |
| 330 | + 'acirc;': "\u00E2", |
| 331 | + 'acute': "\u00B4", |
| 332 | + 'acute;': "\u00B4", |
| 333 | + 'aelig': "\u00E6", |
| 334 | + 'aelig;': "\u00E6", |
| 335 | + 'agrave': "\u00E0", |
| 336 | + 'agrave;': "\u00E0", |
| 337 | + 'alefsym;': "\u2135", |
| 338 | + 'alpha;': "\u03B1", |
| 339 | + 'amp': '&', |
| 340 | + 'amp;': '&', |
| 341 | + 'and;': "\u2227", |
| 342 | + 'ang;': "\u2220", |
| 343 | + 'apos;': "'", |
| 344 | + 'aring': "\u00E5", |
| 345 | + 'aring;': "\u00E5", |
| 346 | + 'asymp;': "\u2248", |
| 347 | + 'atilde': "\u00E3", |
| 348 | + 'atilde;': "\u00E3", |
| 349 | + 'auml': "\u00E4", |
| 350 | + 'auml;': "\u00E4", |
| 351 | + 'bdquo;': "\u201E", |
| 352 | + 'beta;': "\u03B2", |
| 353 | + 'brvbar': "\u00A6", |
| 354 | + 'brvbar;': "\u00A6", |
| 355 | + 'bull;': "\u2022", |
| 356 | + 'cap;': "\u2229", |
| 357 | + 'ccedil': "\u00E7", |
| 358 | + 'ccedil;': "\u00E7", |
| 359 | + 'cedil': "\u00B8", |
| 360 | + 'cent': "\u00A2", |
| 361 | + 'cent;': "\u00A2", |
| 362 | + 'chi;': "\u03C7", |
| 363 | + 'circ;': "\u02C6", |
| 364 | + 'clubs;': "\u2663", |
| 365 | + 'cong;': "\u2245", |
| 366 | + 'copy': "\u00A9", |
| 367 | + 'copy;': "\u00A9", |
| 368 | + 'crarr;': "\u21B5", |
| 369 | + 'cup;': "\u222A", |
| 370 | + 'curren': "\u00A4", |
| 371 | + 'curren;': "\u00A4", |
| 372 | + 'dArr;': "\u21D3", |
| 373 | + 'dagger;': "\u2020", |
| 374 | + 'darr;': "\u2193", |
| 375 | + 'deg': "\u00B0", |
| 376 | + 'deg;': "\u00B0", |
| 377 | + 'delta;': "\u03B4", |
| 378 | + 'diams;': "\u2666", |
| 379 | + 'divide': "\u00F7", |
| 380 | + 'divide;': "\u00F7", |
| 381 | + 'eacute': "\u00E9", |
| 382 | + 'eacute;': "\u00E9", |
| 383 | + 'ecirc': "\u00EA", |
| 384 | + 'ecirc;': "\u00EA", |
| 385 | + 'egrave': "\u00E8", |
| 386 | + 'egrave;': "\u00E8", |
| 387 | + 'empty;': "\u2205", |
| 388 | + 'emsp;': "\u2003", |
| 389 | + 'ensp;': "\u2002", |
| 390 | + 'epsilon;': "\u03B5", |
| 391 | + 'equiv;': "\u2261", |
| 392 | + 'eta;': "\u03B7", |
| 393 | + 'eth': "\u00F0", |
| 394 | + 'eth;': "\u00F0", |
| 395 | + 'euml': "\u00EB", |
| 396 | + 'euml;': "\u00EB", |
| 397 | + 'euro;': "\u20AC", |
| 398 | + 'exist;': "\u2203", |
| 399 | + 'fnof;': "\u0192", |
| 400 | + 'forall;': "\u2200", |
| 401 | + 'frac12': "\u00BD", |
| 402 | + 'frac12;': "\u00BD", |
| 403 | + 'frac14': "\u00BC", |
| 404 | + 'frac14;': "\u00BC", |
| 405 | + 'frac34': "\u00BE", |
| 406 | + 'frac34;': "\u00BE", |
| 407 | + 'frasl;': "\u2044", |
| 408 | + 'gamma;': "\u03B3", |
| 409 | + 'ge;': "\u2265", |
| 410 | + 'gt': '>', |
| 411 | + 'gt;': '>', |
| 412 | + 'hArr;': "\u21D4", |
| 413 | + 'harr;': "\u2194", |
| 414 | + 'hearts;': "\u2665", |
| 415 | + 'hellip;': "\u2026", |
| 416 | + 'iacute': "\u00ED", |
| 417 | + 'iacute;': "\u00ED", |
| 418 | + 'icirc': "\u00EE", |
| 419 | + 'icirc;': "\u00EE", |
| 420 | + 'iexcl': "\u00A1", |
| 421 | + 'iexcl;': "\u00A1", |
| 422 | + 'igrave': "\u00EC", |
| 423 | + 'igrave;': "\u00EC", |
| 424 | + 'image;': "\u2111", |
| 425 | + 'infin;': "\u221E", |
| 426 | + 'int;': "\u222B", |
| 427 | + 'iota;': "\u03B9", |
| 428 | + 'iquest': "\u00BF", |
| 429 | + 'iquest;': "\u00BF", |
| 430 | + 'isin;': "\u2208", |
| 431 | + 'iuml': "\u00EF", |
| 432 | + 'iuml;': "\u00EF", |
| 433 | + 'kappa;': "\u03BA", |
| 434 | + 'lArr;': "\u21D0", |
| 435 | + 'lambda;': "\u03BB", |
| 436 | + 'lang;': "\u27E8", |
| 437 | + 'laquo': "\u00AB", |
| 438 | + 'laquo;': "\u00AB", |
| 439 | + 'larr;': "\u2190", |
| 440 | + 'lceil;': "\u2308", |
| 441 | + 'ldquo;': "\u201C", |
| 442 | + 'le;': "\u2264", |
| 443 | + 'lfloor;': "\u230A", |
| 444 | + 'lowast;': "\u2217", |
| 445 | + 'loz;': "\u25CA", |
| 446 | + 'lrm;': "\u200E", |
| 447 | + 'lsaquo;': "\u2039", |
| 448 | + 'lsquo;': "\u2018", |
| 449 | + 'lt': '<', |
| 450 | + 'lt;': '<', |
| 451 | + 'macr': "\u00AF", |
| 452 | + 'macr;': "\u00AF", |
| 453 | + 'mdash;': "\u2014", |
| 454 | + 'micro': "\u00B5", |
| 455 | + 'micro;': "\u00B5", |
| 456 | + 'middot': "\u00B7", |
| 457 | + 'middot;': "\u00B7", |
| 458 | + 'minus;': "\u2212", |
| 459 | + 'mu;': "\u03BC", |
| 460 | + 'nabla;': "\u2207", |
| 461 | + 'nbsp': "\u00A0", |
| 462 | + 'nbsp;': "\u00A0", |
| 463 | + 'ndash;': "\u2013", |
| 464 | + 'ne;': "\u2260", |
| 465 | + 'ni;': "\u220B", |
| 466 | + 'not': "\u00AC", |
| 467 | + 'not;': "\u00AC", |
| 468 | + 'notin;': "\u2209", |
| 469 | + 'nsub;': "\u2284", |
| 470 | + 'ntilde': "\u00F1", |
| 471 | + 'ntilde;': "\u00F1", |
| 472 | + 'nu;': "\u03BD", |
| 473 | + 'oacute': "\u00F3", |
| 474 | + 'oacute;': "\u00F3", |
| 475 | + 'ocirc': "\u00F4", |
| 476 | + 'ocirc;': "\u00F4", |
| 477 | + 'oelig;': "\u0153", |
| 478 | + 'ograve': "\u00F2", |
| 479 | + 'ograve;': "\u00F2", |
| 480 | + 'oline;': "\u203E", |
| 481 | + 'omega;': "\u03C9", |
| 482 | + 'omicron;': "\u03BF", |
| 483 | + 'oplus;': "\u2295", |
| 484 | + 'or;': "\u2228", |
| 485 | + 'ordf': "\u00AA", |
| 486 | + 'ordf;': "\u00AA", |
| 487 | + 'ordm': "\u00BA", |
| 488 | + 'ordm;': "\u00BA", |
| 489 | + 'oslash': "\u00F8", |
| 490 | + 'oslash;': "\u00F8", |
| 491 | + 'otilde': "\u00F5", |
| 492 | + 'otilde;': "\u00F5", |
| 493 | + 'otimes;': "\u2297", |
| 494 | + 'ouml': "\u00F6", |
| 495 | + 'ouml;': "\u00F6", |
| 496 | + 'para': "\u00B6", |
| 497 | + 'para;': "\u00B6", |
| 498 | + 'part;': "\u2202", |
| 499 | + 'permil;': "\u2030", |
| 500 | + 'perp;': "\u22A5", |
| 501 | + 'phi;': "\u03C6", |
| 502 | + 'pi;': "\u03C0", |
| 503 | + 'piv;': "\u03D6", |
| 504 | + 'plusmn': "\u00B1", |
| 505 | + 'plusmn;': "\u00B1", |
| 506 | + 'pound': "\u00A3", |
| 507 | + 'pound;': "\u00A3", |
| 508 | + 'prime;': "\u2032", |
| 509 | + 'prod;': "\u220F", |
| 510 | + 'prop;': "\u221D", |
| 511 | + 'psi;': "\u03C8", |
| 512 | + 'quot': '"', |
| 513 | + 'quot;': '"', |
| 514 | + 'rArr;': "\u21D2", |
| 515 | + 'radic;': "\u221A", |
| 516 | + 'rang;': "\u27E9", |
| 517 | + 'raquo': "\u00BB", |
| 518 | + 'raquo;': "\u00BB", |
| 519 | + 'rarr;': "\u2192", |
| 520 | + 'rceil;': "\u2309", |
| 521 | + 'rdquo;': "\u201D", |
| 522 | + 'real;': "\u211C", |
| 523 | + 'reg': "\u00AE", |
| 524 | + 'reg;': "\u00AE", |
| 525 | + 'rfloor;': "\u230B", |
| 526 | + 'rho;': "\u03C1", |
| 527 | + 'rlm;': "\u200F", |
| 528 | + 'rsaquo;': "\u203A", |
| 529 | + 'rsquo;': "\u2019", |
| 530 | + 'sbquo;': "\u201A", |
| 531 | + 'scaron;': "\u0161", |
| 532 | + 'sdot;': "\u22C5", |
| 533 | + 'sect': "\u00A7", |
| 534 | + 'sect;': "\u00A7", |
| 535 | + 'shy': "\u00AD", |
| 536 | + 'shy;': "\u00AD", |
| 537 | + 'sigma;': "\u03C3", |
| 538 | + 'sigmaf;': "\u03C2", |
| 539 | + 'sim;': "\u223C", |
| 540 | + 'spades;': "\u2660", |
| 541 | + 'sub;': "\u2282", |
| 542 | + 'sube;': "\u2286", |
| 543 | + 'sum;': "\u2211", |
| 544 | + 'sup1': "\u00B9", |
| 545 | + 'sup1;': "\u00B9", |
| 546 | + 'sup2': "\u00B2", |
| 547 | + 'sup2;': "\u00B2", |
| 548 | + 'sup3': "\u00B3", |
| 549 | + 'sup3;': "\u00B3", |
| 550 | + 'sup;': "\u2283", |
| 551 | + 'supe;': "\u2287", |
| 552 | + 'szlig': "\u00DF", |
| 553 | + 'szlig;': "\u00DF", |
| 554 | + 'tau;': "\u03C4", |
| 555 | + 'there4;': "\u2234", |
| 556 | + 'theta;': "\u03B8", |
| 557 | + 'thetasym;': "\u03D1", |
| 558 | + 'thinsp;': "\u2009", |
| 559 | + 'thorn': "\u00FE", |
| 560 | + 'thorn;': "\u00FE", |
| 561 | + 'tilde;': "\u02DC", |
| 562 | + 'times': "\u00D7", |
| 563 | + 'times;': "\u00D7", |
| 564 | + 'trade;': "\u2122", |
| 565 | + 'uArr;': "\u21D1", |
| 566 | + 'uacute': "\u00FA", |
| 567 | + 'uacute;': "\u00FA", |
| 568 | + 'uarr;': "\u2191", |
| 569 | + 'ucirc': "\u00FB", |
| 570 | + 'ucirc;': "\u00FB", |
| 571 | + 'ugrave': "\u00F9", |
| 572 | + 'ugrave;': "\u00F9", |
| 573 | + 'uml': "\u00A8", |
| 574 | + 'uml;': "\u00A8", |
| 575 | + 'upsih;': "\u03D2", |
| 576 | + 'upsilon;': "\u03C5", |
| 577 | + 'uuml': "\u00FC", |
| 578 | + 'uuml;': "\u00FC", |
| 579 | + 'weierp;': "\u2118", |
| 580 | + 'xi;': "\u03BE", |
| 581 | + 'yacute': "\u00FD", |
| 582 | + 'yacute;': "\u00FD", |
| 583 | + 'yen': "\u00A5", |
| 584 | + 'yen;': "\u00A5", |
| 585 | + 'yuml': "\u00FF", |
| 586 | + 'yuml;': "\u00FF", |
| 587 | + 'zeta;': "\u03B6", |
| 588 | + 'zwj;': "\u200D", |
| 589 | + 'zwnj;': "\u200C" |
| 590 | +} |
| 591 | + |
| 592 | +HTML5.ENCODINGS = [ |
| 593 | + 'ansi_x3.4-1968', |
| 594 | + 'iso-ir-6', |
| 595 | + 'ansi_x3.4-1986', |
| 596 | + 'iso_646.irv:1991', |
| 597 | + 'ascii', |
| 598 | + 'iso646-us', |
| 599 | + 'us-ascii', |
| 600 | + 'us', |
| 601 | + 'ibm367', |
| 602 | + 'cp367', |
| 603 | + 'csascii', |
| 604 | + 'ks_c_5601-1987', |
| 605 | + 'korean', |
| 606 | + 'iso-2022-kr', |
| 607 | + 'csiso2022kr', |
| 608 | + 'euc-kr', |
| 609 | + 'iso-2022-jp', |
| 610 | + 'csiso2022jp', |
| 611 | + 'iso-2022-jp-2', |
| 612 | + '', |
| 613 | + 'iso-ir-58', |
| 614 | + 'chinese', |
| 615 | + 'csiso58gb231280', |
| 616 | + 'iso_8859-1:1987', |
| 617 | + 'iso-ir-100', |
| 618 | + 'iso_8859-1', |
| 619 | + 'iso-8859-1', |
| 620 | + 'latin1', |
| 621 | + 'l1', |
| 622 | + 'ibm819', |
| 623 | + 'cp819', |
| 624 | + 'csisolatin1', |
| 625 | + 'iso_8859-2:1987', |
| 626 | + 'iso-ir-101', |
| 627 | + 'iso_8859-2', |
| 628 | + 'iso-8859-2', |
| 629 | + 'latin2', |
| 630 | + 'l2', |
| 631 | + 'csisolatin2', |
| 632 | + 'iso_8859-3:1988', |
| 633 | + 'iso-ir-109', |
| 634 | + 'iso_8859-3', |
| 635 | + 'iso-8859-3', |
| 636 | + 'latin3', |
| 637 | + 'l3', |
| 638 | + 'csisolatin3', |
| 639 | + 'iso_8859-4:1988', |
| 640 | + 'iso-ir-110', |
| 641 | + 'iso_8859-4', |
| 642 | + 'iso-8859-4', |
| 643 | + 'latin4', |
| 644 | + 'l4', |
| 645 | + 'csisolatin4', |
| 646 | + 'iso_8859-6:1987', |
| 647 | + 'iso-ir-127', |
| 648 | + 'iso_8859-6', |
| 649 | + 'iso-8859-6', |
| 650 | + 'ecma-114', |
| 651 | + 'asmo-708', |
| 652 | + 'arabic', |
| 653 | + 'csisolatinarabic', |
| 654 | + 'iso_8859-7:1987', |
| 655 | + 'iso-ir-126', |
| 656 | + 'iso_8859-7', |
| 657 | + 'iso-8859-7', |
| 658 | + 'elot_928', |
| 659 | + 'ecma-118', |
| 660 | + 'greek', |
| 661 | + 'greek8', |
| 662 | + 'csisolatingreek', |
| 663 | + 'iso_8859-8:1988', |
| 664 | + 'iso-ir-138', |
| 665 | + 'iso_8859-8', |
| 666 | + 'iso-8859-8', |
| 667 | + 'hebrew', |
| 668 | + 'csisolatinhebrew', |
| 669 | + 'iso_8859-5:1988', |
| 670 | + 'iso-ir-144', |
| 671 | + 'iso_8859-5', |
| 672 | + 'iso-8859-5', |
| 673 | + 'cyrillic', |
| 674 | + 'csisolatincyrillic', |
| 675 | + 'iso_8859-9:1989', |
| 676 | + 'iso-ir-148', |
| 677 | + 'iso_8859-9', |
| 678 | + 'iso-8859-9', |
| 679 | + 'latin5', |
| 680 | + 'l5', |
| 681 | + 'csisolatin5', |
| 682 | + 'iso-8859-10', |
| 683 | + 'iso-ir-157', |
| 684 | + 'l6', |
| 685 | + 'iso_8859-10:1992', |
| 686 | + 'csisolatin6', |
| 687 | + 'latin6', |
| 688 | + 'hp-roman8', |
| 689 | + 'roman8', |
| 690 | + 'r8', |
| 691 | + 'ibm037', |
| 692 | + 'cp037', |
| 693 | + 'csibm037', |
| 694 | + 'ibm424', |
| 695 | + 'cp424', |
| 696 | + 'csibm424', |
| 697 | + 'ibm437', |
| 698 | + 'cp437', |
| 699 | + '437', |
| 700 | + 'cspc8codepage437', |
| 701 | + 'ibm500', |
| 702 | + 'cp500', |
| 703 | + 'csibm500', |
| 704 | + 'ibm775', |
| 705 | + 'cp775', |
| 706 | + 'cspc775baltic', |
| 707 | + 'ibm850', |
| 708 | + 'cp850', |
| 709 | + '850', |
| 710 | + 'cspc850multilingual', |
| 711 | + 'ibm852', |
| 712 | + 'cp852', |
| 713 | + '852', |
| 714 | + 'cspcp852', |
| 715 | + 'ibm855', |
| 716 | + 'cp855', |
| 717 | + '855', |
| 718 | + 'csibm855', |
| 719 | + 'ibm857', |
| 720 | + 'cp857', |
| 721 | + '857', |
| 722 | + 'csibm857', |
| 723 | + 'ibm860', |
| 724 | + 'cp860', |
| 725 | + '860', |
| 726 | + 'csibm860', |
| 727 | + 'ibm861', |
| 728 | + 'cp861', |
| 729 | + '861', |
| 730 | + 'cp-is', |
| 731 | + 'csibm861', |
| 732 | + 'ibm862', |
| 733 | + 'cp862', |
| 734 | + '862', |
| 735 | + 'cspc862latinhebrew', |
| 736 | + 'ibm863', |
| 737 | + 'cp863', |
| 738 | + '863', |
| 739 | + 'csibm863', |
| 740 | + 'ibm864', |
| 741 | + 'cp864', |
| 742 | + 'csibm864', |
| 743 | + 'ibm865', |
| 744 | + 'cp865', |
| 745 | + '865', |
| 746 | + 'csibm865', |
| 747 | + 'ibm866', |
| 748 | + 'cp866', |
| 749 | + '866', |
| 750 | + 'csibm866', |
| 751 | + 'ibm869', |
| 752 | + 'cp869', |
| 753 | + '869', |
| 754 | + 'cp-gr', |
| 755 | + 'csibm869', |
| 756 | + 'ibm1026', |
| 757 | + 'cp1026', |
| 758 | + 'csibm1026', |
| 759 | + 'koi8-r', |
| 760 | + 'cskoi8r', |
| 761 | + 'koi8-u', |
| 762 | + 'big5-hkscs', |
| 763 | + 'ptcp154', |
| 764 | + 'csptcp154', |
| 765 | + 'pt154', |
| 766 | + 'cp154', |
| 767 | + 'utf-7', |
| 768 | + 'utf-16be', |
| 769 | + 'utf-16le', |
| 770 | + 'utf-16', |
| 771 | + 'utf-8', |
| 772 | + 'iso-8859-13', |
| 773 | + 'iso-8859-14', |
| 774 | + 'iso-ir-199', |
| 775 | + 'iso_8859-14:1998', |
| 776 | + 'iso_8859-14', |
| 777 | + 'latin8', |
| 778 | + 'iso-celtic', |
| 779 | + 'l8', |
| 780 | + 'iso-8859-15', |
| 781 | + 'iso_8859-15', |
| 782 | + 'iso-8859-16', |
| 783 | + 'iso-ir-226', |
| 784 | + 'iso_8859-16:2001', |
| 785 | + 'iso_8859-16', |
| 786 | + 'latin10', |
| 787 | + 'l10', |
| 788 | + 'gbk', |
| 789 | + 'cp936', |
| 790 | + 'ms936', |
| 791 | + 'gb18030', |
| 792 | + 'shift_jis', |
| 793 | + 'ms_kanji', |
| 794 | + 'csshiftjis', |
| 795 | + 'euc-jp', |
| 796 | + 'gb2312', |
| 797 | + 'big5', |
| 798 | + 'csbig5', |
| 799 | + 'windows-1250', |
| 800 | + 'windows-1251', |
| 801 | + 'windows-1252', |
| 802 | + 'windows-1253', |
| 803 | + 'windows-1254', |
| 804 | + 'windows-1255', |
| 805 | + 'windows-1256', |
| 806 | + 'windows-1257', |
| 807 | + 'windows-1258', |
| 808 | + 'tis-620', |
| 809 | + 'hz-gb-2312' |
| 810 | +]; |
| 811 | + |
| 812 | +HTML5.E = { |
| 813 | + "null-character": |
| 814 | + "Null character in input stream, replaced with U+FFFD.", |
| 815 | + "incorrectly-placed-solidus": |
| 816 | + "Solidus (/) incorrectly placed in tag.", |
| 817 | + "incorrect-cr-newline-entity": |
| 818 | + "Incorrect CR newline entity, replaced with LF.", |
| 819 | + "illegal-windows-1252-entity": |
| 820 | + "Entity used with illegal number (windows-1252 reference).", |
| 821 | + "cant-convert-numeric-entity": |
| 822 | + "Numeric entity couldn't be converted to character " + |
| 823 | + "(codepoint U+%(charAsInt)08x).", |
| 824 | + "illegal-codepoint-for-numeric-entity": |
| 825 | + "Numeric entity represents an illegal codepoint=> " + |
| 826 | + "U+%(charAsInt)08x.", |
| 827 | + "numeric-entity-without-semicolon": |
| 828 | + "Numeric entity didn't end with ';'.", |
| 829 | + "expected-numeric-entity-but-got-eof": |
| 830 | + "Numeric entity expected. Got end of file instead.", |
| 831 | + "expected-numeric-entity": |
| 832 | + "Numeric entity expected but none found.", |
| 833 | + "named-entity-without-semicolon": |
| 834 | + "Named entity didn't end with ';'.", |
| 835 | + "expected-named-entity": |
| 836 | + "Named entity expected. Got none.", |
| 837 | + "attributes-in-end-tag": |
| 838 | + "End tag contains unexpected attributes.", |
| 839 | + "expected-tag-name-but-got-right-bracket": |
| 840 | + "Expected tag name. Got '>' instead.", |
| 841 | + "expected-tag-name-but-got-question-mark": |
| 842 | + "Expected tag name. Got '?' instead. (HTML doesn't " + |
| 843 | + "support processing instructions.)", |
| 844 | + "expected-tag-name": |
| 845 | + "Expected tag name. Got something else instead", |
| 846 | + "expected-closing-tag-but-got-right-bracket": |
| 847 | + "Expected closing tag. Got '>' instead. Ignoring '</>'.", |
| 848 | + "expected-closing-tag-but-got-eof": |
| 849 | + "Expected closing tag. Unexpected end of file.", |
| 850 | + "expected-closing-tag-but-got-char": |
| 851 | + "Expected closing tag. Unexpected character '%(data)' found.", |
| 852 | + "eof-in-tag-name": |
| 853 | + "Unexpected end of file in the tag name.", |
| 854 | + "expected-attribute-name-but-got-eof": |
| 855 | + "Unexpected end of file. Expected attribute name instead.", |
| 856 | + "eof-in-attribute-name": |
| 857 | + "Unexpected end of file in attribute name.", |
| 858 | + "duplicate-attribute": |
| 859 | + "Dropped duplicate attribute on tag.", |
| 860 | + "expected-end-of-tag-name-but-got-eof": |
| 861 | + "Unexpected end of file. Expected = or end of tag.", |
| 862 | + "expected-attribute-value-but-got-eof": |
| 863 | + "Unexpected end of file. Expected attribute value.", |
| 864 | + "eof-in-attribute-value-double-quote": |
| 865 | + "Unexpected end of file in attribute value (\").", |
| 866 | + "eof-in-attribute-value-single-quote": |
| 867 | + "Unexpected end of file in attribute value (').", |
| 868 | + "eof-in-attribute-value-no-quotes": |
| 869 | + "Unexpected end of file in attribute value.", |
| 870 | + "expected-dashes-or-doctype": |
| 871 | + "Expected '--' or 'DOCTYPE'. Not found.", |
| 872 | + "incorrect-comment": |
| 873 | + "Incorrect comment.", |
| 874 | + "eof-in-comment": |
| 875 | + "Unexpected end of file in comment.", |
| 876 | + "eof-in-comment-end-dash": |
| 877 | + "Unexpected end of file in comment (-)", |
| 878 | + "unexpected-dash-after-double-dash-in-comment": |
| 879 | + "Unexpected '-' after '--' found in comment.", |
| 880 | + "eof-in-comment-double-dash": |
| 881 | + "Unexpected end of file in comment (--).", |
| 882 | + "unexpected-char-in-comment": |
| 883 | + "Unexpected character in comment found.", |
| 884 | + "need-space-after-doctype": |
| 885 | + "No space after literal string 'DOCTYPE'.", |
| 886 | + "expected-doctype-name-but-got-right-bracket": |
| 887 | + "Unexpected > character. Expected DOCTYPE name.", |
| 888 | + "expected-doctype-name-but-got-eof": |
| 889 | + "Unexpected end of file. Expected DOCTYPE name.", |
| 890 | + "eof-in-doctype-name": |
| 891 | + "Unexpected end of file in DOCTYPE name.", |
| 892 | + "eof-in-doctype": |
| 893 | + "Unexpected end of file in DOCTYPE.", |
| 894 | + "expected-space-or-right-bracket-in-doctype": |
| 895 | + "Expected space or '>'. Got '%(data)'", |
| 896 | + "unexpected-end-of-doctype": |
| 897 | + "Unexpected end of DOCTYPE.", |
| 898 | + "unexpected-char-in-doctype": |
| 899 | + "Unexpected character in DOCTYPE.", |
| 900 | + "eof-in-bogus-doctype": |
| 901 | + "Unexpected end of file in bogus doctype.", |
| 902 | + "eof-in-innerhtml": |
| 903 | + "Unexpected EOF in inner html mode.", |
| 904 | + "unexpected-doctype": |
| 905 | + "Unexpected DOCTYPE. Ignored.", |
| 906 | + "non-html-root": |
| 907 | + "html needs to be the first start tag.", |
| 908 | + "expected-doctype-but-got-eof": |
| 909 | + "Unexpected End of file. Expected DOCTYPE.", |
| 910 | + "unknown-doctype": |
| 911 | + "Erroneous DOCTYPE.", |
| 912 | + "expected-doctype-but-got-chars": |
| 913 | + "Unexpected non-space characters. Expected DOCTYPE.", |
| 914 | + "expected-doctype-but-got-start-tag": |
| 915 | + "Unexpected start tag (%(name)). Expected DOCTYPE.", |
| 916 | + "expected-doctype-but-got-end-tag": |
| 917 | + "Unexpected end tag (%(name)). Expected DOCTYPE.", |
| 918 | + "end-tag-after-implied-root": |
| 919 | + "Unexpected end tag (%(name)) after the (implied) root element.", |
| 920 | + "expected-named-closing-tag-but-got-eof": |
| 921 | + "Unexpected end of file. Expected end tag (%(name)).", |
| 922 | + "two-heads-are-not-better-than-one": |
| 923 | + "Unexpected start tag head in existing head. Ignored.", |
| 924 | + "unexpected-end-tag": |
| 925 | + "Unexpected end tag (%(name)). Ignored.", |
| 926 | + "unexpected-start-tag-out-of-my-head": |
| 927 | + "Unexpected start tag (%(name)) that can be in head. Moved.", |
| 928 | + "unexpected-start-tag": |
| 929 | + "Unexpected start tag (%(name)).", |
| 930 | + "missing-end-tag": |
| 931 | + "Missing end tag (%(name)).", |
| 932 | + "missing-end-tags": |
| 933 | + "Missing end tags (%(name)).", |
| 934 | + "unexpected-start-tag-implies-end-tag": |
| 935 | + "Unexpected start tag (%(startName)) " + |
| 936 | + "implies end tag (%(endName)).", |
| 937 | + "unexpected-start-tag-treated-as": |
| 938 | + "Unexpected start tag (%(originalName)). Treated as %(newName).", |
| 939 | + "deprecated-tag": |
| 940 | + "Unexpected start tag %(name). Don't use it!", |
| 941 | + "unexpected-start-tag-ignored": |
| 942 | + "Unexpected start tag %(name). Ignored.", |
| 943 | + "expected-one-end-tag-but-got-another": |
| 944 | + "Unexpected end tag (%(gotName). " + |
| 945 | + "Missing end tag (%(expectedName)).", |
| 946 | + "end-tag-too-early": |
| 947 | + "End tag (%(name)) seen too early. Expected other end tag.", |
| 948 | + "end-tag-too-early-named": |
| 949 | + "Unexpected end tag (%(gotName)). Expected end tag (%(expectedName).", |
| 950 | + "end-tag-too-early-ignored": |
| 951 | + "End tag (%(name)) seen too early. Ignored.", |
| 952 | + "adoption-agency-1.1": |
| 953 | + "End tag (%(name) violates step 1, " + |
| 954 | + "paragraph 1 of the adoption agency algorithm.", |
| 955 | + "adoption-agency-1.2": |
| 956 | + "End tag (%(name) violates step 1, " + |
| 957 | + "paragraph 2 of the adoption agency algorithm.", |
| 958 | + "adoption-agency-1.3": |
| 959 | + "End tag (%(name) violates step 1, " + |
| 960 | + "paragraph 3 of the adoption agency algorithm.", |
| 961 | + "unexpected-end-tag-treated-as": |
| 962 | + "Unexpected end tag (%(originalName)). Treated as %(newName).", |
| 963 | + "no-end-tag": |
| 964 | + "This element (%(name)) has no end tag.", |
| 965 | + "unexpected-implied-end-tag-in-table": |
| 966 | + "Unexpected implied end tag (%(name)) in the table phase.", |
| 967 | + "unexpected-implied-end-tag-in-table-body": |
| 968 | + "Unexpected implied end tag (%(name)) in the table body phase.", |
| 969 | + "unexpected-char-implies-table-voodoo": |
| 970 | + "Unexpected non-space characters in " + |
| 971 | + "table context caused voodoo mode.", |
| 972 | + "unpexted-hidden-input-in-table": |
| 973 | + "Unexpected input with type hidden in table context.", |
| 974 | + "unexpected-start-tag-implies-table-voodoo": |
| 975 | + "Unexpected start tag (%(name)) in " + |
| 976 | + "table context caused voodoo mode.", |
| 977 | + "unexpected-end-tag-implies-table-voodoo": |
| 978 | + "Unexpected end tag (%(name)) in " + |
| 979 | + "table context caused voodoo mode.", |
| 980 | + "unexpected-cell-in-table-body": |
| 981 | + "Unexpected table cell start tag (%(name)) " + |
| 982 | + "in the table body phase.", |
| 983 | + "unexpected-cell-end-tag": |
| 984 | + "Got table cell end tag (%(name)) " + |
| 985 | + "while required end tags are missing.", |
| 986 | + "unexpected-end-tag-in-table-body": |
| 987 | + "Unexpected end tag (%(name)) in the table body phase. Ignored.", |
| 988 | + "unexpected-implied-end-tag-in-table-row": |
| 989 | + "Unexpected implied end tag (%(name)) in the table row phase.", |
| 990 | + "unexpected-end-tag-in-table-row": |
| 991 | + "Unexpected end tag (%(name)) in the table row phase. Ignored.", |
| 992 | + "unexpected-select-in-select": |
| 993 | + "Unexpected select start tag in the select phase " + |
| 994 | + "treated as select end tag.", |
| 995 | + "unexpected-input-in-select": |
| 996 | + "Unexpected input start tag in the select phase.", |
| 997 | + "unexpected-start-tag-in-select": |
| 998 | + "Unexpected start tag token (%(name)) in the select phase. " + |
| 999 | + "Ignored.", |
| 1000 | + "unexpected-end-tag-in-select": |
| 1001 | + "Unexpected end tag (%(name)) in the select phase. Ignored.", |
| 1002 | + "unexpected-table-element-start-tag-in-select-in-table": |
| 1003 | + "Unexpected table element start tag (%(name))s in the select in table phase.", |
| 1004 | + "unexpected-table-element-end-tag-in-select-in-table": |
| 1005 | + "Unexpected table element end tag (%(name))s in the select in table phase.", |
| 1006 | + "unexpected-char-after-body": |
| 1007 | + "Unexpected non-space characters in the after body phase.", |
| 1008 | + "unexpected-start-tag-after-body": |
| 1009 | + "Unexpected start tag token (%(name))" + |
| 1010 | + "in the after body phase.", |
| 1011 | + "unexpected-end-tag-after-body": |
| 1012 | + "Unexpected end tag token (%(name))" + |
| 1013 | + " in the after body phase.", |
| 1014 | + "unexpected-char-in-frameset": |
| 1015 | + "Unepxected characters in the frameset phase. Characters ignored.", |
| 1016 | + "unexpected-start-tag-in-frameset": |
| 1017 | + "Unexpected start tag token (%(name))" + |
| 1018 | + " in the frameset phase. Ignored.", |
| 1019 | + "unexpected-frameset-in-frameset-innerhtml": |
| 1020 | + "Unexpected end tag token (frameset " + |
| 1021 | + "in the frameset phase (innerHTML).", |
| 1022 | + "unexpected-end-tag-in-frameset": |
| 1023 | + "Unexpected end tag token (%(name))" + |
| 1024 | + " in the frameset phase. Ignored.", |
| 1025 | + "unexpected-char-after-frameset": |
| 1026 | + "Unexpected non-space characters in the " + |
| 1027 | + "after frameset phase. Ignored.", |
| 1028 | + "unexpected-start-tag-after-frameset": |
| 1029 | + "Unexpected start tag (%(name))" + |
| 1030 | + " in the after frameset phase. Ignored.", |
| 1031 | + "unexpected-end-tag-after-frameset": |
| 1032 | + "Unexpected end tag (%(name))" + |
| 1033 | + " in the after frameset phase. Ignored.", |
| 1034 | + "expected-eof-but-got-char": |
| 1035 | + "Unexpected non-space characters. Expected end of file.", |
| 1036 | + "expected-eof-but-got-char": |
| 1037 | + "Unexpected non-space characters. Expected end of file.", |
| 1038 | + "expected-eof-but-got-start-tag": |
| 1039 | + "Unexpected start tag (%(name))" + |
| 1040 | + ". Expected end of file.", |
| 1041 | + "expected-eof-but-got-end-tag": |
| 1042 | + "Unexpected end tag (%(name))" + |
| 1043 | + ". Expected end of file.", |
| 1044 | + "unexpected-end-table-in-caption": |
| 1045 | + "Unexpected end table tag in caption. Generates implied end caption.", |
| 1046 | + "end-html-in-innerhtml": |
| 1047 | + "Unexpected html end tag in inner html mode.", |
| 1048 | + "expected-self-closing-tag": |
| 1049 | + "Expected a > after the /.", |
| 1050 | + "self-closing-end-tag": |
| 1051 | + "Self closing end tag.", |
| 1052 | + "eof-in-table": |
| 1053 | + "Unexpected end of file. Expected table content.", |
| 1054 | + "html-in-foreign-content": |
| 1055 | + "HTML start tag \"%(name)\" in a foreign namespace context.", |
| 1056 | + "unexpected-start-tag-in-table": |
| 1057 | + "Unexpected %(name). Expected table content." |
| 1058 | +}; |
| 1059 | + |
| 1060 | +HTML5.Models = {PCDATA: 0, RCDATA: 1, CDATA: 2, SCRIPT_CDATA: 3}; |
| 1061 | + |
| 1062 | +HTML5.PHASES = PHASES = { |
| 1063 | + initial: require('./parser/initial_phase').Phase, |
| 1064 | + beforeHTML: require('./parser/before_html_phase').Phase, |
| 1065 | + beforeHead: require('./parser/before_head_phase').Phase, |
| 1066 | + inHead: require('./parser/in_head_phase').Phase, |
| 1067 | + afterHead: require('./parser/after_head_phase').Phase, |
| 1068 | + inBody: require('./parser/in_body_phase').Phase, |
| 1069 | + inTable: require('./parser/in_table_phase').Phase, |
| 1070 | + inCaption: require('./parser/in_caption_phase').Phase, |
| 1071 | + inColumnGroup: require('./parser/in_column_group_phase').Phase, |
| 1072 | + inTableBody: require('./parser/in_table_body_phase').Phase, |
| 1073 | + inRow: require('./parser/in_row_phase').Phase, |
| 1074 | + inCell: require('./parser/in_cell_phase').Phase, |
| 1075 | + inSelect: require('./parser/in_select_phase').Phase, |
| 1076 | + inSelectInTable: require('./parser/in_select_in_table_phase').Phase, |
| 1077 | + afterBody: require('./parser/after_body_phase').Phase, |
| 1078 | + inFrameset: require('./parser/in_frameset_phase').Phase, |
| 1079 | + afterFrameset: require('./parser/after_frameset_phase').Phase, |
| 1080 | + afterAfterBody: require('./parser/after_after_body_phase').Phase, |
| 1081 | + afterAfterFrameset: require('./parser/after_after_frameset_phase').Phase, |
| 1082 | + inForeignContent: require('./parser/in_foreign_content_phase').Phase, |
| 1083 | + trailingEnd: require('./parser/trailing_end_phase').Phase, |
| 1084 | + rootElement: require('./parser/root_element_phase').Phase, |
| 1085 | +}; |
| 1086 | + |
| 1087 | +HTML5.TAGMODES = { |
| 1088 | + select: 'inSelect', |
| 1089 | + td: 'inCell', |
| 1090 | + th: 'inCell', |
| 1091 | + tr: 'inRow', |
| 1092 | + tbody: 'inTableBody', |
| 1093 | + thead: 'inTableBody', |
| 1094 | + tfoot: 'inTableBody', |
| 1095 | + caption: 'inCaption', |
| 1096 | + colgroup: 'inColumnGroup', |
| 1097 | + table: 'inTable', |
| 1098 | + head: 'inBody', |
| 1099 | + body: 'inBody', |
| 1100 | + frameset: 'inFrameset' |
| 1101 | +}; |
| 1102 | + |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/constants.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 1103 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser.js |
— | — | @@ -0,0 +1,226 @@ |
| 2 | +"use strict"; |
| 3 | + |
| 4 | +var HTML5 = exports.HTML5 = require('../html5'); |
| 5 | + |
| 6 | +var events = require('events'); |
| 7 | + |
| 8 | +require('./treebuilder'); |
| 9 | +require('../mediawiki.html5TokenEmitter'); |
| 10 | + |
| 11 | +var Phase = require('./parser/phase').Phase; |
| 12 | + |
| 13 | +var Parser = HTML5.Parser = function HTML5Parser(options) { |
| 14 | + events.EventEmitter.apply(this); |
| 15 | + this.strict = false; |
| 16 | + this.errors = []; |
| 17 | + var phase; |
| 18 | + |
| 19 | + this.__defineSetter__('phase', function(p) { |
| 20 | + phase = p; |
| 21 | + if(!p) throw( new Error("Can't leave phase undefined")); |
| 22 | + if(!p instanceof Function) throw( new Error("Not a function")); |
| 23 | + }); |
| 24 | + |
| 25 | + this.__defineGetter__('phase', function() { |
| 26 | + return phase; |
| 27 | + }); |
| 28 | + |
| 29 | + if(options) for(o in options) { |
| 30 | + this[o] = options[o]; |
| 31 | + } |
| 32 | + |
| 33 | + if(!this.document) { |
| 34 | + var l3, jsdom |
| 35 | + jsdom = require('jsdom') |
| 36 | + l3 = jsdom.dom.level3.core |
| 37 | + var DOM = jsdom.browserAugmentation(l3) |
| 38 | + this.document = new DOM.Document('html'); |
| 39 | + } |
| 40 | + |
| 41 | + this.tree = new HTML5.TreeBuilder(this.document); |
| 42 | +} |
| 43 | + |
| 44 | +Parser.prototype = new events.EventEmitter; |
| 45 | + |
| 46 | +Parser.prototype.parse = function(tokenizer) { |
| 47 | + this.tokenizer = tokenizer; |
| 48 | + this.setup(); |
| 49 | + //this.tokenizer.tokenize(); |
| 50 | +} |
| 51 | + |
| 52 | +Parser.prototype.parse_fragment = function(source, element) { |
| 53 | + HTML5.debug('parser.parse_fragment', source, element) |
| 54 | + // FIXME: Check to make sure element is inside document |
| 55 | + //this.tokenizer = new HTML5.Tokenizer(source, this.document); |
| 56 | + if(element && element.ownerDocument) { |
| 57 | + this.setup(element.tagName, null); |
| 58 | + this.tree.open_elements.push(element); |
| 59 | + this.tree.root_pointer = element; |
| 60 | + } else if(element) { |
| 61 | + this.setup(element, null); |
| 62 | + this.tree.open_elements.push(this.tree.html_pointer); |
| 63 | + this.tree.open_elements.push(this.tree.body_pointer); |
| 64 | + this.tree.root_pointer = this.tree.body_pointer; |
| 65 | + } else { |
| 66 | + this.setup('div', null); |
| 67 | + this.tree.open_elements.push(this.tree.html_pointer); |
| 68 | + this.tree.open_elements.push(this.tree.body_pointer); |
| 69 | + this.tree.root_pointer = this.tree.body_pointer; |
| 70 | + } |
| 71 | + //this.tokenizer.tokenize(); |
| 72 | +} |
| 73 | + |
| 74 | +Object.defineProperty(Parser.prototype, 'fragment', { |
| 75 | + get: function() { |
| 76 | + return this.tree.getFragment(); |
| 77 | + } |
| 78 | +}); |
| 79 | + |
| 80 | +Parser.prototype.newPhase = function(name) { |
| 81 | + this.phase = new PHASES[name](this, this.tree); |
| 82 | + HTML5.debug('parser.newPhase', name) |
| 83 | + this.phaseName = name; |
| 84 | +} |
| 85 | + |
| 86 | +Parser.prototype.do_token = function(token) { |
| 87 | + var method = 'process' + token.type; |
| 88 | + |
| 89 | + switch(token.type) { |
| 90 | + case 'Characters': |
| 91 | + case 'SpaceCharacters': |
| 92 | + case 'Comment': |
| 93 | + this.phase[method](token.data); |
| 94 | + break; |
| 95 | + case 'StartTag': |
| 96 | + if (token.name == "script") { |
| 97 | + this.inScript = true; |
| 98 | + this.scriptBuffer = ''; |
| 99 | + } |
| 100 | + this.phase[method](token.name, token.data, token.self_closing); |
| 101 | + break; |
| 102 | + case 'EndTag': |
| 103 | + this.phase[method](token.name); |
| 104 | + if (token.name == "script") { |
| 105 | + this.inScript = false; |
| 106 | + } |
| 107 | + break; |
| 108 | + case 'Doctype': |
| 109 | + this.phase[method](token.name, token.publicId, token.systemId, token.correct); |
| 110 | + break; |
| 111 | + case 'EOF': |
| 112 | + this.phase[method](); |
| 113 | + break; |
| 114 | + default: |
| 115 | + this.parse_error(token.data, token.datavars) |
| 116 | + } |
| 117 | +} |
| 118 | + |
| 119 | +Parser.prototype.setup = function(container, encoding) { |
| 120 | + this.tokenizer.addListener('token', function(t) { |
| 121 | + return function(token) { t.do_token(token); }; |
| 122 | + }(this)); |
| 123 | + this.tokenizer.addListener('end', function(t) { |
| 124 | + return function() { t.emit('end'); }; |
| 125 | + }(this)); |
| 126 | + this.emit('setup', this); |
| 127 | + |
| 128 | + var inner_html = !!container; |
| 129 | + container = container || 'div'; |
| 130 | + |
| 131 | + this.tree.reset(); |
| 132 | + this.first_start_tag = false; |
| 133 | + this.errors = []; |
| 134 | + |
| 135 | + // FIXME: instantiate tokenizer and plumb. Pass lowercasing options. |
| 136 | + |
| 137 | + if(inner_html) { |
| 138 | + this.inner_html = container.toLowerCase(); |
| 139 | + switch(this.inner_html) { |
| 140 | + case 'title': |
| 141 | + case 'textarea': |
| 142 | + this.tokenizer.content_model = HTML5.Models.RCDATA; |
| 143 | + break; |
| 144 | + case 'script': |
| 145 | + this.tokenizer.content_model = HTML5.Models.SCRIPT_CDATA; |
| 146 | + break; |
| 147 | + case 'style': |
| 148 | + case 'xmp': |
| 149 | + case 'iframe': |
| 150 | + case 'noembed': |
| 151 | + case 'noframes': |
| 152 | + case 'noscript': |
| 153 | + this.tokenizer.content_model = HTML5.Models.CDATA; |
| 154 | + break; |
| 155 | + case 'plaintext': |
| 156 | + this.tokenizer.content_model = HTML5.Models.PLAINTEXT; |
| 157 | + break; |
| 158 | + default: |
| 159 | + this.tokenizer.content_model = HTML5.Models.PCDATA; |
| 160 | + } |
| 161 | + this.tree.create_structure_elements(inner_html); |
| 162 | + switch(inner_html) { |
| 163 | + case 'html': |
| 164 | + this.newPhase('afterHtml') |
| 165 | + break; |
| 166 | + case 'head': |
| 167 | + this.newPhase('inHead') |
| 168 | + break; |
| 169 | + default: |
| 170 | + this.newPhase('inBody') |
| 171 | + } |
| 172 | + this.reset_insertion_mode(this.inner_html); |
| 173 | + } else { |
| 174 | + this.inner_html = false; |
| 175 | + this.newPhase('initial'); |
| 176 | + } |
| 177 | + |
| 178 | + this.last_phase = null; |
| 179 | + |
| 180 | +} |
| 181 | + |
| 182 | +Parser.prototype.parse_error = function(code, data) { |
| 183 | + // FIXME: this.errors.push([this.tokenizer.position, code, data]); |
| 184 | + this.errors.push([code, data]); |
| 185 | + if(this.strict) throw(this.errors.last()); |
| 186 | +} |
| 187 | + |
| 188 | +Parser.prototype.reset_insertion_mode = function(context) { |
| 189 | + var last = false; |
| 190 | + |
| 191 | + var node_name; |
| 192 | + |
| 193 | + for(var i = this.tree.open_elements.length - 1; i >= 0; i--) { |
| 194 | + var node = this.tree.open_elements[i] |
| 195 | + node_name = node.tagName.toLowerCase() |
| 196 | + if(node == this.tree.open_elements[0]) { |
| 197 | + last = true |
| 198 | + if(node_name != 'th' && node_name != 'td') { |
| 199 | + // XXX |
| 200 | + // assert.ok(this.inner_html); |
| 201 | + node_name = context.tagName; |
| 202 | + } |
| 203 | + } |
| 204 | + |
| 205 | + if(!(node_name == 'select' || node_name == 'colgroup' || node_name == 'head' || node_name == 'frameset')) { |
| 206 | + // XXX |
| 207 | + // assert.ok(this.inner_html) |
| 208 | + } |
| 209 | + |
| 210 | + |
| 211 | + if(HTML5.TAGMODES[node_name]) { |
| 212 | + this.newPhase(HTML5.TAGMODES[node_name]); |
| 213 | + } else if(node_name == 'html') { |
| 214 | + this.newPhase(this.tree.head_pointer ? 'afterHead' : 'beforeHead'); |
| 215 | + } else if(last) { |
| 216 | + this.newPhase('inBody'); |
| 217 | + } else { |
| 218 | + continue; |
| 219 | + } |
| 220 | + |
| 221 | + break; |
| 222 | + } |
| 223 | +} |
| 224 | + |
| 225 | +Parser.prototype._ = function(str) { |
| 226 | + return(str); |
| 227 | +} |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 228 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/COPYING |
— | — | @@ -0,0 +1,19 @@ |
| 2 | +Copyright (c) 2010 Aria Stewart <aredridel@nbtsc.org> |
| 3 | + |
| 4 | +Permission is hereby granted, free of charge, to any person obtaining a copy |
| 5 | +of this software and associated documentation files (the "Software"), to deal |
| 6 | +in the Software without restriction, including without limitation the rights |
| 7 | +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 8 | +copies of the Software, and to permit persons to whom the Software is |
| 9 | +furnished to do so, subject to the following conditions: |
| 10 | + |
| 11 | +The above copyright notice and this permission notice shall be included in |
| 12 | +all copies or substantial portions of the Software. |
| 13 | + |
| 14 | +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 15 | +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 16 | +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 17 | +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 18 | +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 19 | +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| 20 | +THE SOFTWARE. |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/before_html_phase.js |
— | — | @@ -0,0 +1,52 @@ |
| 2 | +var Phase = require('./phase').Phase; |
| 3 | +var HTML5 = require('../../html5'); |
| 4 | + |
| 5 | +exports.Phase = p = function BeforeHtmlPhase(parser, tree) { |
| 6 | + Phase.call(this, parser, tree); |
| 7 | + this.name = 'before_html_phase' |
| 8 | +} |
| 9 | + |
| 10 | +p.prototype = new Phase; |
| 11 | + |
| 12 | +p.prototype.processEOF = function() { |
| 13 | + this.insert_html_element(); |
| 14 | + this.parser.phase.processEOF(); |
| 15 | +} |
| 16 | + |
| 17 | +p.prototype.processComment = function(data) { |
| 18 | + this.tree.insert_comment(data, this.tree.document); |
| 19 | +} |
| 20 | + |
| 21 | +p.prototype.processSpaceCharacters = function(data) { |
| 22 | +} |
| 23 | + |
| 24 | +p.prototype.processCharacters = function(data) { |
| 25 | + this.insert_html_element(); |
| 26 | + this.parser.phase.processCharacters(data); |
| 27 | +} |
| 28 | + |
| 29 | +p.prototype.processStartTag = function(name, attributes, self_closing) { |
| 30 | + if(name == 'html') this.parser.first_start_tag = true; |
| 31 | + this.insert_html_element(); |
| 32 | + this.parser.phase.processStartTag(name, attributes); |
| 33 | +} |
| 34 | + |
| 35 | +p.prototype.processEndTag = function(name) { |
| 36 | + this.insert_html_element(); |
| 37 | + this.parser.phase.processEndTag(name); |
| 38 | +} |
| 39 | + |
| 40 | +p.prototype.insert_html_element = function() { |
| 41 | + var de |
| 42 | + if(de = this.tree.document.documentElement) { |
| 43 | + if(de.tagName != 'HTML') |
| 44 | + HTML5.debug('parser.before_html_phase', 'Non-HTML root element!') |
| 45 | + this.tree.open_elements.push(de) |
| 46 | + while(de.childNodes.length >= 1) de.removeChild(de.firstChild) |
| 47 | + } else { |
| 48 | + var element = this.tree.createElement('html', []); |
| 49 | + this.tree.open_elements.push(element); |
| 50 | + this.tree.document.appendChild(element); |
| 51 | + } |
| 52 | + this.parser.newPhase('beforeHead'); |
| 53 | +} |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/before_html_phase.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 54 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/after_frameset_phase.js |
— | — | @@ -0,0 +1,42 @@ |
| 2 | +var Phase = require('./phase').Phase; |
| 3 | +var inBody = require('./in_body_phase').Phase; |
| 4 | + |
| 5 | +var start_tag_handlers = { |
| 6 | + html: 'startTagHtml', |
| 7 | + noframes: 'startTagNoframes', |
| 8 | + '-default': 'startTagOther', |
| 9 | +} |
| 10 | + |
| 11 | +var end_tag_handlers = { |
| 12 | + html: 'endTagHtml', |
| 13 | + '-default': 'endTagOther', |
| 14 | +} |
| 15 | + |
| 16 | +exports.Phase = p = function AfterFramesetPhase(parser, tree) { |
| 17 | + Phase.call(this, parser, tree); |
| 18 | + this.start_tag_handlers = start_tag_handlers; |
| 19 | + this.end_tag_handlers = end_tag_handlers; |
| 20 | +} |
| 21 | + |
| 22 | +p.prototype = new Phase; |
| 23 | + |
| 24 | +p.prototype.processCharacters = function(data) { |
| 25 | + this.parse_error("unexpected-char-after-frameset"); |
| 26 | +} |
| 27 | + |
| 28 | +p.prototype.startTagNoframes = function(name, attributes) { |
| 29 | + new inBody(this.parser, this.tree).processStartTag(name, attributes); |
| 30 | +} |
| 31 | + |
| 32 | +p.prototype.startTagOther = function(name, attributes) { |
| 33 | + this.parse_error("unexpected-start-tag-after-frameset", {name: name}); |
| 34 | +} |
| 35 | + |
| 36 | +p.prototype.endTagHtml = function(name) { |
| 37 | + this.parser.last_phase = this.parser.phase; |
| 38 | + this.parser.newPhase('trailingEnd'); |
| 39 | +} |
| 40 | + |
| 41 | +p.prototype.endTagOther = function(name) { |
| 42 | + this.parse_error("unexpected-end-tag-after-frameset", {name: name}); |
| 43 | +} |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/after_frameset_phase.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 44 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_head_phase.js |
— | — | @@ -0,0 +1,167 @@ |
| 2 | +"use strict"; |
| 3 | +var Phase = require('./phase').Phase; |
| 4 | +var HTML5 = require('../../html5'); |
| 5 | + |
| 6 | +var start_tag_handlers = { |
| 7 | + html: 'startTagHtml', |
| 8 | + head: 'startTagHead', |
| 9 | + title: 'startTagTitle', |
| 10 | + type: 'startTagType', |
| 11 | + style: 'startTagStyle', |
| 12 | + script: 'startTagScript', |
| 13 | + noscript: 'startTagNoScript', |
| 14 | + base: 'startTagBaseLinkMeta', |
| 15 | + link: 'startTagBaseLinkMeta', |
| 16 | + meta: 'startTagBaseLinkMeta', |
| 17 | + "-default": 'startTagOther', |
| 18 | +} |
| 19 | + |
| 20 | +var end_tag_handlers = { |
| 21 | + head: 'endTagHead', |
| 22 | + html: 'endTagImplyAfterHead', |
| 23 | + body: 'endTagImplyAfterHead', |
| 24 | + p: 'endTagImplyAfterHead', |
| 25 | + br: 'endTagImplyAfterHead', |
| 26 | + title: 'endTagTitleStyleScriptNoscript', |
| 27 | + style: 'endTagTitleStyleScriptNoscript', |
| 28 | + script: 'endTagTitleStyleScriptNoscript', |
| 29 | + noscript: 'endTagTitleStyleScriptNoscript', |
| 30 | + "-default": 'endTagOther', |
| 31 | +} |
| 32 | + |
| 33 | +exports.Phase = p = function InHeadPhase(parser, tree) { |
| 34 | + Phase.call(this, parser, tree); |
| 35 | + this.name = 'in_head_phase'; |
| 36 | + this.start_tag_handlers = start_tag_handlers; |
| 37 | + this.end_tag_handlers = end_tag_handlers; |
| 38 | +} |
| 39 | + |
| 40 | +p.prototype = new Phase; |
| 41 | + |
| 42 | +p.prototype.processEOF = function() { |
| 43 | + var name = this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() |
| 44 | + if(['title', 'style', 'script'].indexOf(name) != -1) { |
| 45 | + this.parse_error("expected-named-closing-tag-but-got-eof", {name: name}); |
| 46 | + this.tree.pop_element(); |
| 47 | + } |
| 48 | + |
| 49 | + this.anything_else(); |
| 50 | + |
| 51 | + this.parser.phase.processEOF(); |
| 52 | +} |
| 53 | + |
| 54 | +p.prototype.processCharacters = function(data) { |
| 55 | + var name = this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() |
| 56 | + if(['title', 'style', 'script', 'noscript'].indexOf(name) != -1) { |
| 57 | + this.tree.insert_text(data); |
| 58 | + } else { |
| 59 | + this.anything_else(); |
| 60 | + this.parser.phase.processCharacters(data); |
| 61 | + } |
| 62 | +} |
| 63 | + |
| 64 | +p.prototype.startTagHead = function(name, attributes) { |
| 65 | + this.parse_error('two-heads-are-not-better-than-one'); |
| 66 | +} |
| 67 | + |
| 68 | +p.prototype.startTagTitle = function(name, attributes) { |
| 69 | + var element = this.tree.createElement(name, attributes); |
| 70 | + this.appendToHead(element); |
| 71 | + this.tree.open_elements.push(element); |
| 72 | + this.parser.tokenizer.content_model = HTML5.Models.RCDATA; |
| 73 | +} |
| 74 | + |
| 75 | +p.prototype.startTagStyle = function(name, attributes) { |
| 76 | + if(this.tree.head_pointer && this.parser.phaseName == 'inHead') { |
| 77 | + var element = this.tree.createElement(name, attributes); |
| 78 | + this.appendToHead(element); |
| 79 | + this.tree.open_elements.push(element); |
| 80 | + } else { |
| 81 | + this.tree.insert_element(name, attributes); |
| 82 | + } |
| 83 | + this.parser.tokenizer.content_model = HTML5.Models.CDATA; |
| 84 | +} |
| 85 | + |
| 86 | +p.prototype.startTagNoScript = function(name, attributes) { |
| 87 | + // XXX Need to decide whether to implement the scripting disabled case |
| 88 | + var element = this.tree.createElement(name, attributes); |
| 89 | + if(this.tree.head_pointer && this.parser.phaseName == 'inHead') { |
| 90 | + this.appendToHead(element); |
| 91 | + } else { |
| 92 | + this.tree.open_elements[this.tree.open_elements.length - 1].appendChild(element); |
| 93 | + } |
| 94 | + this.tree.open_elements.push(element); |
| 95 | + this.parser.tokenizer.content_model = HTML5.Models.CDATA; |
| 96 | +} |
| 97 | + |
| 98 | +p.prototype.startTagScript = function(name, attributes) { |
| 99 | + // XXX Inner HTML case may be wrong |
| 100 | + var element = this.tree.createElement(name, attributes); |
| 101 | + //element.flags.push('parser-inserted'); |
| 102 | + if(this.tree.head_pointer && this.parser.phaseName == 'inHead') { |
| 103 | + this.appendToHead(element); |
| 104 | + } else { |
| 105 | + this.tree.open_elements[this.tree.open_elements.length - 1].appendChild(element); |
| 106 | + } |
| 107 | + this.tree.open_elements.push(element); |
| 108 | + this.parser.tokenizer.content_model = HTML5.Models.SCRIPT_CDATA; |
| 109 | +} |
| 110 | + |
| 111 | +p.prototype.startTagBaseLinkMeta = function(name, attributes) { |
| 112 | + var element = this.tree.createElement(name, attributes); |
| 113 | + if(this.tree.head_pointer && this.parser.phaseName == 'inHead') { |
| 114 | + this.appendToHead(element); |
| 115 | + } else { |
| 116 | + this.tree.open_elements[this.tree.open_elements.length - 1].appendChild(element); |
| 117 | + } |
| 118 | +} |
| 119 | + |
| 120 | +p.prototype.startTagOther = function(name, attributes) { |
| 121 | + this.anything_else(); |
| 122 | + this.parser.phase.processStartTag(name, attributes); |
| 123 | +} |
| 124 | + |
| 125 | +p.prototype.endTagHead = function(name) { |
| 126 | + if(this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() == 'head') { |
| 127 | + this.tree.pop_element() |
| 128 | + } else { |
| 129 | + this.parse_error('unexpected-end-tag', {name: 'head'}); |
| 130 | + } |
| 131 | + this.parser.newPhase('afterHead'); |
| 132 | +} |
| 133 | + |
| 134 | +p.prototype.endTagImplyAfterHead = function(name) { |
| 135 | + this.anything_else(); |
| 136 | + this.parser.phase.processEndTag(name); |
| 137 | +} |
| 138 | + |
| 139 | +p.prototype.endTagTitleStyleScriptNoscript = function(name) { |
| 140 | + if(this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() == name.toLowerCase()) { |
| 141 | + this.tree.pop_element() |
| 142 | + } else { |
| 143 | + this.parse_error('unexpected-end-tag', {name: name}); |
| 144 | + } |
| 145 | +} |
| 146 | + |
| 147 | +p.prototype.endTagOther = function(name) { |
| 148 | + this.anything_else(); |
| 149 | +} |
| 150 | + |
| 151 | +p.prototype.anything_else = function() { |
| 152 | + if(this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() == 'head') { |
| 153 | + this.endTagHead('head') |
| 154 | + } else { |
| 155 | + this.parser.newPhase('afterHead'); |
| 156 | + } |
| 157 | +} |
| 158 | + |
| 159 | +// protected |
| 160 | + |
| 161 | +p.prototype.appendToHead = function(element) { |
| 162 | + if(!this.tree.head_pointer) { |
| 163 | + // FIXME assert(this.parser.inner_html) |
| 164 | + this.tree.open_elements[this.tree.open_elements.length - 1].appendChild(element); |
| 165 | + } else { |
| 166 | + this.tree.head_pointer.appendChild(element); |
| 167 | + } |
| 168 | +} |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_head_phase.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 169 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/initial_phase.js |
— | — | @@ -0,0 +1,133 @@ |
| 2 | +var Phase = require('./phase').Phase; |
| 3 | + |
| 4 | +exports.Phase = p = function InitialPhase(parser, tree) { |
| 5 | + Phase.call(this, parser, tree); |
| 6 | + this.name = 'initial_phase'; |
| 7 | +} |
| 8 | + |
| 9 | +p.prototype = new Phase; |
| 10 | + |
| 11 | +p.prototype.processEOF = function() { |
| 12 | + this.parse_error("expected-doctype-but-got-eof"); |
| 13 | + this.parser.newPhase('beforeHTML'); |
| 14 | + this.parser.phase.processEOF(); |
| 15 | +} |
| 16 | + |
| 17 | +p.prototype.processComment = function(data) { |
| 18 | + this.tree.insert_comment(data, this.tree.document); |
| 19 | +} |
| 20 | + |
| 21 | +p.prototype.processDoctype = function(name, publicId, systemId, correct) { |
| 22 | + if(name.toLowerCase() != 'html' || publicId || systemId) { |
| 23 | + this.parse_error("unknown-doctype"); |
| 24 | + } |
| 25 | + |
| 26 | + // XXX need to update DOCTYPE tokens |
| 27 | + this.tree.insert_doctype(name, publicId, systemId); |
| 28 | + |
| 29 | + publicId = (publicId || '').toString().toUpperCase(); |
| 30 | + |
| 31 | + if(name.toLowerCase() != 'html') { |
| 32 | + // XXX quirks mode |
| 33 | + } else { |
| 34 | + if((["+//silmaril//dtd html pro v0r11 19970101//en", |
| 35 | + "-//advasoft ltd//dtd html 3.0 aswedit + extensions//en", |
| 36 | + "-//as//dtd html 3.0 aswedit + extensions//en", |
| 37 | + "-//ietf//dtd html 2.0 level 1//en", |
| 38 | + "-//ietf//dtd html 2.0 level 2//en", |
| 39 | + "-//ietf//dtd html 2.0 strict level 1//en", |
| 40 | + "-//ietf//dtd html 2.0 strict level 2//en", |
| 41 | + "-//ietf//dtd html 2.0 strict//en", |
| 42 | + "-//ietf//dtd html 2.0//en", |
| 43 | + "-//ietf//dtd html 2.1e//en", |
| 44 | + "-//ietf//dtd html 3.0//en", |
| 45 | + "-//ietf//dtd html 3.0//en//", |
| 46 | + "-//ietf//dtd html 3.2 final//en", |
| 47 | + "-//ietf//dtd html 3.2//en", |
| 48 | + "-//ietf//dtd html 3//en", |
| 49 | + "-//ietf//dtd html level 0//en", |
| 50 | + "-//ietf//dtd html level 0//en//2.0", |
| 51 | + "-//ietf//dtd html level 1//en", |
| 52 | + "-//ietf//dtd html level 1//en//2.0", |
| 53 | + "-//ietf//dtd html level 2//en", |
| 54 | + "-//ietf//dtd html level 2//en//2.0", |
| 55 | + "-//ietf//dtd html level 3//en", |
| 56 | + "-//ietf//dtd html level 3//en//3.0", |
| 57 | + "-//ietf//dtd html strict level 0//en", |
| 58 | + "-//ietf//dtd html strict level 0//en//2.0", |
| 59 | + "-//ietf//dtd html strict level 1//en", |
| 60 | + "-//ietf//dtd html strict level 1//en//2.0", |
| 61 | + "-//ietf//dtd html strict level 2//en", |
| 62 | + "-//ietf//dtd html strict level 2//en//2.0", |
| 63 | + "-//ietf//dtd html strict level 3//en", |
| 64 | + "-//ietf//dtd html strict level 3//en//3.0", |
| 65 | + "-//ietf//dtd html strict//en", |
| 66 | + "-//ietf//dtd html strict//en//2.0", |
| 67 | + "-//ietf//dtd html strict//en//3.0", |
| 68 | + "-//ietf//dtd html//en", |
| 69 | + "-//ietf//dtd html//en//2.0", |
| 70 | + "-//ietf//dtd html//en//3.0", |
| 71 | + "-//metrius//dtd metrius presentational//en", |
| 72 | + "-//microsoft//dtd internet explorer 2.0 html strict//en", |
| 73 | + "-//microsoft//dtd internet explorer 2.0 html//en", |
| 74 | + "-//microsoft//dtd internet explorer 2.0 tables//en", |
| 75 | + "-//microsoft//dtd internet explorer 3.0 html strict//en", |
| 76 | + "-//microsoft//dtd internet explorer 3.0 html//en", |
| 77 | + "-//microsoft//dtd internet explorer 3.0 tables//en", |
| 78 | + "-//netscape comm. corp.//dtd html//en", |
| 79 | + "-//netscape comm. corp.//dtd strict html//en", |
| 80 | + "-//o'reilly and associates//dtd html 2.0//en", |
| 81 | + "-//o'reilly and associates//dtd html extended 1.0//en", |
| 82 | + "-//spyglass//dtd html 2.0 extended//en", |
| 83 | + "-//sq//dtd html 2.0 hotmetal + extensions//en", |
| 84 | + "-//sun microsystems corp.//dtd hotjava html//en", |
| 85 | + "-//sun microsystems corp.//dtd hotjava strict html//en", |
| 86 | + "-//w3c//dtd html 3 1995-03-24//en", |
| 87 | + "-//w3c//dtd html 3.2 draft//en", |
| 88 | + "-//w3c//dtd html 3.2 final//en", |
| 89 | + "-//w3c//dtd html 3.2//en", |
| 90 | + "-//w3c//dtd html 3.2s draft//en", |
| 91 | + "-//w3c//dtd html 4.0 frameset//en", |
| 92 | + "-//w3c//dtd html 4.0 transitional//en", |
| 93 | + "-//w3c//dtd html experimental 19960712//en", |
| 94 | + "-//w3c//dtd html experimental 970421//en", |
| 95 | + "-//w3c//dtd w3 html//en", |
| 96 | + "-//w3o//dtd w3 html 3.0//en", |
| 97 | + "-//w3o//dtd w3 html 3.0//en//", |
| 98 | + "-//w3o//dtd w3 html strict 3.0//en//", |
| 99 | + "-//webtechs//dtd mozilla html 2.0//en", |
| 100 | + "-//webtechs//dtd mozilla html//en", |
| 101 | + "-/w3c/dtd html 4.0 transitional/en", |
| 102 | + "html"].indexOf(publicId) != -1) || |
| 103 | + (systemId == null && ["-//w3c//dtd html 4.01 frameset//EN", |
| 104 | + "-//w3c//dtd html 4.01 transitional//EN"].indexOf(publicId) != -1) || |
| 105 | + (systemId == |
| 106 | + "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")) { |
| 107 | + // XXX quirks mode |
| 108 | + } |
| 109 | + } |
| 110 | + |
| 111 | + this.parser.newPhase('beforeHTML'); |
| 112 | +} |
| 113 | + |
| 114 | +p.prototype.processSpaceCharacters = function(data) { |
| 115 | + |
| 116 | +} |
| 117 | + |
| 118 | +p.prototype.processCharacters = function(data) { |
| 119 | + this.parse_error('expected-doctype-but-got-chars'); |
| 120 | + this.parser.newPhase('beforeHTML'); |
| 121 | + this.parser.phase.processCharacters(data); |
| 122 | +} |
| 123 | + |
| 124 | +p.prototype.processStartTag = function(name, attributes, self_closing) { |
| 125 | + this.parse_error('expected-doctype-but-got-start-tag', {name: name}); |
| 126 | + this.parser.newPhase('beforeHTML'); |
| 127 | + this.parser.phase.processStartTag(name, attributes); |
| 128 | +} |
| 129 | + |
| 130 | +p.prototype.processEndTag = function(name) { |
| 131 | + this.parse_error('expected-doctype-but-got-end-tag', {name: name}); |
| 132 | + this.parser.newPhase('beforeHTML'); |
| 133 | + this.parser.phase.processEndTag(name); |
| 134 | +} |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/initial_phase.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 135 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_table_body_phase.js |
— | — | @@ -0,0 +1,108 @@ |
| 2 | +var Phase = require('./phase').Phase; |
| 3 | +var inTable = require('./in_table_phase').Phase |
| 4 | + |
| 5 | +var starts = { |
| 6 | + html: 'startTagHtml', |
| 7 | + tr: 'startTagTr', |
| 8 | + td: 'startTagTableCell', |
| 9 | + th: 'startTagTableCell', |
| 10 | + caption: 'startTagTableOther', |
| 11 | + col: 'startTagTableOther', |
| 12 | + colgroup: 'startTagTableOther', |
| 13 | + tbody: 'startTagTableOther', |
| 14 | + tfoot: 'startTagTableOther', |
| 15 | + thead: 'startTagTableOther', |
| 16 | + '-default': 'startTagOther', |
| 17 | +} |
| 18 | + |
| 19 | +var ends = { |
| 20 | + table: 'endTagTable', |
| 21 | + tbody: 'endTagTableRowGroup', |
| 22 | + tfoot: 'endTagTableRowGroup', |
| 23 | + thead: 'endTagTableRowGroup', |
| 24 | + body: 'endTagIgnore', |
| 25 | + caption: 'endTagIgnore', |
| 26 | + col: 'endTagIgnore', |
| 27 | + colgroup: 'endTagIgnore', |
| 28 | + html: 'endTagIgnore', |
| 29 | + td: 'endTagIgnore', |
| 30 | + th: 'endTagIgnore', |
| 31 | + tr: 'endTagIgnore', |
| 32 | + '-default': 'endTagOther', |
| 33 | +} |
| 34 | + |
| 35 | +exports.Phase = function InTableBodyPhase(parser, tree) { |
| 36 | + Phase.call(this, parser, tree); |
| 37 | + this.start_tag_handlers = starts; |
| 38 | + this.end_tag_handlers = ends; |
| 39 | +} |
| 40 | + |
| 41 | +var p = exports.Phase.prototype = new Phase; |
| 42 | + |
| 43 | +p.processCharacters = function(data) { |
| 44 | + new inTable(this.parser, this.tree).processCharacters(data); |
| 45 | +} |
| 46 | + |
| 47 | +p.startTagTr = function(name, attributes) { |
| 48 | + this.clearStackToTableBodyContext(); |
| 49 | + this.tree.insert_element(name, attributes); |
| 50 | + this.parser.newPhase('inRow'); |
| 51 | +} |
| 52 | + |
| 53 | +p.startTagTableCell = function(name, attributes) { |
| 54 | + this.parse_error("unexpected-cell-in-table-body", {name: name}) |
| 55 | + this.startTagTr('tr', {}) |
| 56 | + this.parser.phase.processStartTag(name, attributes); |
| 57 | +} |
| 58 | + |
| 59 | +p.startTagTableOther = function(name, attributes) { |
| 60 | + // XXX any ideas on how to share this with endTagTable |
| 61 | + if(this.inScope('tbody', true) || this.inScope('thead', true) || this.inScope('tfoot', true)) { |
| 62 | + this.clearStackToTableBodyContext(); |
| 63 | + this.endTagTableRowGroup(this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase()); |
| 64 | + this.parser.phase.processStartTag(name, attributes); |
| 65 | + } else { |
| 66 | + // inner_html case |
| 67 | + this.parse_error |
| 68 | + } |
| 69 | +} |
| 70 | + |
| 71 | +p.startTagOther = function(name, attributes) { |
| 72 | + new inTable(this.parser, this.tree).processStartTag(name, attributes); |
| 73 | +} |
| 74 | + |
| 75 | +p.endTagTableRowGroup = function(name) { |
| 76 | + if(this.inScope(name, true)) { |
| 77 | + this.clearStackToTableBodyContext(); |
| 78 | + this.tree.pop_element(); |
| 79 | + this.parser.newPhase('inTable'); |
| 80 | + } else { |
| 81 | + this.parse_error('unexpected-end-tag-in-table-body', {name: name}) |
| 82 | + } |
| 83 | +} |
| 84 | + |
| 85 | +p.endTagTable = function(name) { |
| 86 | + if(this.inScope('tbody', true) || this.inScope('thead', true) || this.inScope('tfoot', true)) { |
| 87 | + this.clearStackToTableBodyContext(); |
| 88 | + this.endTagTableRowGroup(this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase()) |
| 89 | + this.parser.phase.processEndTag(name) |
| 90 | + } else { |
| 91 | + // inner_html case |
| 92 | + this.parse_error(); |
| 93 | + } |
| 94 | +} |
| 95 | + |
| 96 | +p.endTagIgnore = function(name) { |
| 97 | + this.parse_error("unexpected-end-tag-in-table-body", {name: name}); |
| 98 | +} |
| 99 | + |
| 100 | +p.endTagOther = function(name) { |
| 101 | + new inTable(this.parser, this.tree).processEndTag(name); |
| 102 | +} |
| 103 | + |
| 104 | +p.clearStackToTableBodyContext = function() { |
| 105 | + while(name = this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase(), name != 'tbody' && name != 'tfoot' && name != 'thead' && name != 'html') { |
| 106 | + this.parse_error("unexpected-implied-end-tag-in-table", {name: name}) |
| 107 | + this.tree.pop_element(); |
| 108 | + } |
| 109 | +} |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_table_body_phase.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 110 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/root_element_phase.js |
— | — | @@ -0,0 +1,42 @@ |
| 2 | +var Phase = require('./phase').Phase |
| 3 | + |
| 4 | +exports.Phase = function rootElementPhase(parser, tree) { |
| 5 | + Phase.call(this, parser, tree) |
| 6 | +} |
| 7 | + |
| 8 | +var p = exports.Phase.prototype = new Phase; |
| 9 | + |
| 10 | +p.processEOF = function() { |
| 11 | + this.insert_html_element() |
| 12 | + this.parser.phase.processEOF() |
| 13 | +} |
| 14 | + |
| 15 | +p.processComment = function(data) { |
| 16 | + this.tree.insert_comment(data, this.tree.document) |
| 17 | +} |
| 18 | + |
| 19 | +p.processSpaceCharacters = function(data) { |
| 20 | +} |
| 21 | + |
| 22 | +p.processCharacters = function(data) { |
| 23 | + this.insert_html_element() |
| 24 | + this.parser.phase.processCharacters(data) |
| 25 | +} |
| 26 | + |
| 27 | +p.processStartTag = function(name, attributes) { |
| 28 | + if(name == 'html') this.parser.first_start_tag = true |
| 29 | + this.insert_html_element() |
| 30 | + this.parser.phase.processStartTag(name, attributes) |
| 31 | +} |
| 32 | + |
| 33 | +p.processEndTag = function(name) { |
| 34 | + this.insert_html_element() |
| 35 | + this.parser.phase.processEndTag(name) |
| 36 | +} |
| 37 | + |
| 38 | +p.insert_html_element = function() { |
| 39 | + var element = this.tree.createElement('html', {}) |
| 40 | + this.tree.open_elements.push(element) |
| 41 | + this.tree.document.appendChild(element) |
| 42 | + this.parser.newPhase('beforeHead') |
| 43 | +} |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/root_element_phase.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 44 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_frameset_phase.js |
— | — | @@ -0,0 +1,67 @@ |
| 2 | +var Phase = require('./phase').Phase; |
| 3 | +var inBody = require('./in_body_phase').Phase; |
| 4 | + |
| 5 | +var start_tag_handlers = { |
| 6 | + html: 'startTagHtml', |
| 7 | + frameset: 'startTagFrameset', |
| 8 | + frame: 'startTagFrame', |
| 9 | + noframes: 'startTagNoframes', |
| 10 | + "-default": 'startTagOther' |
| 11 | +} |
| 12 | + |
| 13 | +var end_tag_handlers = { |
| 14 | + frameset: 'endTagFrameset', |
| 15 | + noframes: 'endTagNoframes', |
| 16 | + '-default': 'endTagOther', |
| 17 | +} |
| 18 | + |
| 19 | +exports.Phase = p = function InFramesetPhase(parser, tree) { |
| 20 | + Phase.call(this, parser, tree); |
| 21 | + this.start_tag_handlers = start_tag_handlers; |
| 22 | + this.end_tag_handlers = end_tag_handlers; |
| 23 | +} |
| 24 | + |
| 25 | +p.prototype = new Phase; |
| 26 | + |
| 27 | +p.prototype.processCharacters = function(data) { |
| 28 | + this.parse_error("unexpected-char-in-frameset"); |
| 29 | +} |
| 30 | + |
| 31 | +p.prototype.startTagFrameset = function(name, attributes) { |
| 32 | + this.tree.insert_element(name, attributes); |
| 33 | +} |
| 34 | + |
| 35 | +p.prototype.startTagFrame = function(name, attributes) { |
| 36 | + this.tree.insert_element(name, attributes); |
| 37 | + this.tree.pop_element(); |
| 38 | +} |
| 39 | + |
| 40 | +p.prototype.startTagNoframes = function(name, attributes) { |
| 41 | + new inBody(this.parser, this.tree).processStartTag(name, attributes); |
| 42 | +} |
| 43 | + |
| 44 | +p.prototype.startTagOther = function(name, attributes) { |
| 45 | + this.parse_error("unexpected-start-tag-in-frameset", {name: name}); |
| 46 | +} |
| 47 | + |
| 48 | +p.prototype.endTagFrameset = function(name, attributes) { |
| 49 | + if(this.tree.open_elements.last().tagName.toLowerCase() == 'html') { |
| 50 | + // inner_html case |
| 51 | + this.parse_error("unexpected-frameset-in-frameset-innerhtml"); |
| 52 | + } else { |
| 53 | + this.tree.pop_element(); |
| 54 | + } |
| 55 | + |
| 56 | + if(!this.parser.inner_html && this.tree.open_elements.last().tagName.toLowerCase() != 'frameset') { |
| 57 | + // If we're not in inner_html mode an the current node is not a "frameset" element (anymore) then switch |
| 58 | + this.parser.newPhase('afterFrameset'); |
| 59 | + } |
| 60 | +} |
| 61 | + |
| 62 | +p.prototype.endTagNoframes = function(name) { |
| 63 | + new inBody(this.parser, this.tree).processEndTag(name); |
| 64 | +} |
| 65 | + |
| 66 | +p.prototype.endTagOther = function(name) { |
| 67 | + this.parse_error("unexpected-end-tag-in-frameset", {name: name}); |
| 68 | +} |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_frameset_phase.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 69 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_row_phase.js |
— | — | @@ -0,0 +1,113 @@ |
| 2 | +var Phase = require('./phase').Phase; |
| 3 | +var HTML5 = require('../../html5') |
| 4 | +var inTable = require('./in_table_phase').Phase; |
| 5 | +var assert = require('assert'); |
| 6 | + |
| 7 | +var starts = { |
| 8 | + html: 'startTagHtml', |
| 9 | + td: 'startTagTableCell', |
| 10 | + th: 'startTagTableCell', |
| 11 | + caption: 'startTagTableOther', |
| 12 | + col: 'startTagTableOther', |
| 13 | + colgroup: 'startTagTableOther', |
| 14 | + tbody: 'startTagTableOther', |
| 15 | + tfoot: 'startTagTableOther', |
| 16 | + thead: 'startTagTableOther', |
| 17 | + tr: 'startTagTableOther', |
| 18 | + '-default': 'startTagOther', |
| 19 | +} |
| 20 | + |
| 21 | +var ends = { |
| 22 | + tr: 'endTagTr', |
| 23 | + table: 'endTagTable', |
| 24 | + tbody: 'endTagTableRowGroup', |
| 25 | + tfoot: 'endTagTableRowGroup', |
| 26 | + thead: 'endTagTableRowGroup', |
| 27 | + body: 'endTagIgnore', |
| 28 | + caption: 'endTagIgnore', |
| 29 | + col: 'endTagIgnore', |
| 30 | + colgroup: 'endTagIgnore', |
| 31 | + html: 'endTagIgnore', |
| 32 | + td: 'endTagIgnore', |
| 33 | + th: 'endTagIgnore', |
| 34 | + '-default': 'endTagOther', |
| 35 | +} |
| 36 | + |
| 37 | +exports.Phase = function InRowPhase(parser, tree) { |
| 38 | + Phase.call(this, parser, tree); |
| 39 | + this.start_tag_handlers = starts; |
| 40 | + this.end_tag_handlers = ends; |
| 41 | +} |
| 42 | + |
| 43 | +var p = exports.Phase.prototype = new Phase; |
| 44 | + |
| 45 | +p.processCharacters = function(data) { |
| 46 | + new inTable(this.parser, this.tree).processCharacters(data); |
| 47 | +} |
| 48 | + |
| 49 | +p.startTagTableCell = function(name, attributes) { |
| 50 | + this.clearStackToTableRowContext(); |
| 51 | + this.tree.insert_element(name, attributes); |
| 52 | + this.parser.newPhase('inCell'); |
| 53 | + this.tree.activeFormattingElements.push(HTML5.Marker); |
| 54 | +} |
| 55 | + |
| 56 | +p.startTagTableOther = function(name, attributes) { |
| 57 | + var ignoreEndTag = this.ignoreEndTagTr(); |
| 58 | + this.endTagTr('tr'); |
| 59 | + // XXX how are we sure it's always ignored in the inner_html case? |
| 60 | + if(!ignoreEndTag) this.parser.phase.processStartTag(name, attributes); |
| 61 | +} |
| 62 | + |
| 63 | +p.startTagOther = function(name, attributes) { |
| 64 | + new inTable(this.parser, this.tree).processStartTag(name, attributes); |
| 65 | +} |
| 66 | + |
| 67 | +p.endTagTr = function(name) { |
| 68 | + if(this.ignoreEndTagTr()) { |
| 69 | + assert.ok(this.parser.inner_html); |
| 70 | + this.parse_error |
| 71 | + } else { |
| 72 | + this.clearStackToTableRowContext(); |
| 73 | + this.tree.pop_element(); |
| 74 | + this.parser.newPhase('inTableBody'); |
| 75 | + } |
| 76 | +} |
| 77 | + |
| 78 | +p.endTagTable = function(name) { |
| 79 | + var ignoreEndTag = this.ignoreEndTagTr(); |
| 80 | + this.endTagTr('tr'); |
| 81 | + // Reprocess the current tag if the tr end tag was not ignored |
| 82 | + // XXX how are we sure it's always ignored in the inner_html case? |
| 83 | + if(!ignoreEndTag) this.parser.phase.processEndTag(name) |
| 84 | +} |
| 85 | + |
| 86 | +p.endTagTableRowGroup = function(name) { |
| 87 | + if(this.inScope(name, true)) { |
| 88 | + this.endTagTr('tr'); |
| 89 | + this.parser.phase.processEndTag(name); |
| 90 | + } else { |
| 91 | + // inner_html case |
| 92 | + this.parse_error(); |
| 93 | + } |
| 94 | +} |
| 95 | + |
| 96 | +p.endTagIgnore = function(name) { |
| 97 | + this.parse_error("unexpected-end-tag-in-table-row", {name: name}) |
| 98 | +} |
| 99 | + |
| 100 | +p.endTagOther = function(name) { |
| 101 | + new inTable(this.parser, this.tree).processEndTag(name); |
| 102 | +} |
| 103 | + |
| 104 | +p.clearStackToTableRowContext = function() { |
| 105 | + var name; |
| 106 | + while(name = this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase(), (name != 'tr' && name != 'html')) { |
| 107 | + this.parse_error("unexpected-implied-end-tag-in-table-row", {name: name}) |
| 108 | + this.tree.pop_element(); |
| 109 | + } |
| 110 | +} |
| 111 | + |
| 112 | +p.ignoreEndTagTr = function() { |
| 113 | + return !this.inScope('tr', true); |
| 114 | +} |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_row_phase.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 115 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/trailing_end_phase.js |
— | — | @@ -0,0 +1,35 @@ |
| 2 | +var Phase = require('./phase').Phase; |
| 3 | + |
| 4 | +exports.Phase = p = function TrailingEndPhase(parser, tree) { |
| 5 | + Phase.call(this, parser, tree); |
| 6 | +} |
| 7 | + |
| 8 | +p.prototype = new Phase; |
| 9 | + |
| 10 | +p.prototype.processEOF = function() {}; |
| 11 | + |
| 12 | +p.prototype.processComment = function(data) { |
| 13 | + this.tree.insert_comment(data); |
| 14 | +} |
| 15 | + |
| 16 | +p.prototype.processSpaceCharacters = function(data) { |
| 17 | + this.parser.last_phase.processSpaceCharacters(data); |
| 18 | +} |
| 19 | + |
| 20 | +p.prototype.processCharacters = function(data) { |
| 21 | + this.parse_error('expected-eof-but-got-char'); |
| 22 | + this.parser.phase = this.parser.last_phase; |
| 23 | + this.parser.phase.processCharacters(data); |
| 24 | +} |
| 25 | + |
| 26 | +p.prototype.processStartTag = function(name, attributes) { |
| 27 | + this.parse_error('expected-eof-but-got-start-tag'); |
| 28 | + this.parser.phase = this.parser.last_phase; |
| 29 | + this.parser.phase.processStartTag(name, attributes); |
| 30 | +} |
| 31 | + |
| 32 | +p.prototype.processEndTag = function(name, attributes) { |
| 33 | + this.parse_error('expected-eof-but-got-end-tag'); |
| 34 | + this.parser.phase = this.parser.last_phase; |
| 35 | + this.parser.phase.processEndTag(name); |
| 36 | +} |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/trailing_end_phase.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 37 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/after_after_body_phase.js |
— | — | @@ -0,0 +1,48 @@ |
| 2 | +var Phase = require('./phase').Phase; |
| 3 | +var inBody = require('./in_body_phase').Phase; |
| 4 | + |
| 5 | +var start_tag_handlers = { |
| 6 | + html: 'startTagHtml', |
| 7 | + '-default': 'startTagOther', |
| 8 | +} |
| 9 | + |
| 10 | +exports.Phase = p = function AfterAfterBodyPhase(parser, tree) { |
| 11 | + Phase.call(this, parser, tree); |
| 12 | + this.start_tag_handlers = start_tag_handlers; |
| 13 | +} |
| 14 | + |
| 15 | +p.prototype = new Phase; |
| 16 | + |
| 17 | +p.prototype.processComment = function(data) { |
| 18 | + this.tree.insert_comment(data); |
| 19 | +} |
| 20 | + |
| 21 | +p.prototype.processDoctype = function(data) { |
| 22 | + new inBody(this.parser, this.tree).processDoctype(data); |
| 23 | +} |
| 24 | + |
| 25 | +p.prototype.processSpaceCharacters = function(data) { |
| 26 | + new inBody(this.parser, this.tree).processSpaceCharacters(data); |
| 27 | +} |
| 28 | + |
| 29 | +p.prototype.startTagHtml = function(data) { |
| 30 | + new inBody(this.parser, this.tree).startTagHtml(data); |
| 31 | +} |
| 32 | + |
| 33 | +p.prototype.startTagOther = function(name, attributes) { |
| 34 | + this.parse_error('unexpected-start-tag', {name: name}); |
| 35 | + this.parser.newPhase('inBody'); |
| 36 | + this.parser.phase.processStartTag(name, attributes); |
| 37 | +} |
| 38 | + |
| 39 | +p.prototype.endTagOther = function(name) { |
| 40 | + this.parse_error('unexpected-end-tag', {name: name}); |
| 41 | + this.parser.newPhase('inBody'); |
| 42 | + this.parser.phase.processEndTag(name); |
| 43 | +} |
| 44 | + |
| 45 | +p.prototype.processCharacters = function(data) { |
| 46 | + this.parse_error('unexpected-char-after-body'); |
| 47 | + this.parser.newPhase('inBody'); |
| 48 | + this.parser.phase.processCharacters(data); |
| 49 | +} |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/after_after_body_phase.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 50 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_column_group_phase.js |
— | — | @@ -0,0 +1,65 @@ |
| 2 | +var Phase = require('./phase').Phase |
| 3 | +var HTML5 = require('../../html5') |
| 4 | +var assert = require('assert') |
| 5 | + |
| 6 | +var start = { |
| 7 | + html: 'startTagHtml', |
| 8 | + col: 'startTagCol', |
| 9 | + '-default': 'startTagOther', |
| 10 | +} |
| 11 | + |
| 12 | +var end = { |
| 13 | + colgroup: 'endTagColgroup', |
| 14 | + col: 'endTagCol', |
| 15 | + '-default': 'endTagOther', |
| 16 | +} |
| 17 | + |
| 18 | +exports.Phase = function InColgroupPhase(parser, tree) { |
| 19 | + Phase.call(this, parser, tree) |
| 20 | + this.start_tag_handlers = start |
| 21 | + this.end_tag_handlers = end |
| 22 | +} |
| 23 | + |
| 24 | +var p = exports.Phase.prototype = new Phase; |
| 25 | + |
| 26 | +p.ignoreEndTagColgroup = function() { |
| 27 | + return this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() == 'html' |
| 28 | +} |
| 29 | + |
| 30 | +p.processCharacters = function(data) { |
| 31 | + var ignoreEndTag = this.ignoreEndTagColgroup() |
| 32 | + this.endTagColgroup('colgroup') |
| 33 | + if(!ignoreEndTag) this.parser.phase.processCharacters(data) |
| 34 | +} |
| 35 | + |
| 36 | +p.startTagCol = function(name, attributes) { |
| 37 | + this.tree.insert_element(name, attributes) |
| 38 | + this.tree.pop_element() |
| 39 | +} |
| 40 | + |
| 41 | +p.startTagOther = function(name, attributes) { |
| 42 | + var ignoreEndTag = this.ignoreEndTagColgroup() |
| 43 | + this.endTagColgroup('colgroup') |
| 44 | + if(!ignoreEndTag) this.parser.phase.processStartTag(name, attributes) |
| 45 | +} |
| 46 | + |
| 47 | +p.endTagColgroup = function(name) { |
| 48 | + if(this.ignoreEndTagColgroup()) { |
| 49 | + // inner_html case |
| 50 | + assert.ok(this.parser.inner_html) |
| 51 | + this.parse_error() |
| 52 | + } else { |
| 53 | + this.tree.pop_element() |
| 54 | + this.parser.newPhase('inTable') |
| 55 | + } |
| 56 | +} |
| 57 | + |
| 58 | +p.endTagCol = function(name) { |
| 59 | + this.parse_error("no-end-tag", {name: 'col'}) |
| 60 | +} |
| 61 | + |
| 62 | +p.endTagOther = function(name) { |
| 63 | + var ignoreEndTag = this.ignoreEndTagColgroup() |
| 64 | + this.endTagColgroup('colgroup') |
| 65 | + if(!ignoreEndTag) this.parser.phase.processEndTag(name) |
| 66 | +} |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_column_group_phase.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 67 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_select_in_table_phase.js |
— | — | @@ -0,0 +1,62 @@ |
| 2 | +var HTML5 = require('../../html5') |
| 3 | +var Phase = require('./phase').Phase; |
| 4 | +var inSelect = require('./in_select_phase').Phase; |
| 5 | + |
| 6 | +var start_tag_handlers = { |
| 7 | + caption: 'startTagTable', |
| 8 | + table: 'startTagTable', |
| 9 | + tbody: 'startTagTable', |
| 10 | + tfoot: 'startTagTable', |
| 11 | + thead: 'startTagTable', |
| 12 | + tr: 'startTagTable', |
| 13 | + td: 'startTagTable', |
| 14 | + th: 'startTagTable', |
| 15 | + '-default': 'startTagOther' |
| 16 | +} |
| 17 | + |
| 18 | +var end_tag_handlers = { |
| 19 | + caption: 'endTagTable', |
| 20 | + table: 'endTagTable', |
| 21 | + tbody: 'endTagTable', |
| 22 | + tfoot: 'endTagTable', |
| 23 | + thead: 'endTagTable', |
| 24 | + tr: 'endTagTable', |
| 25 | + td: 'endTagTable', |
| 26 | + th: 'endTagTable', |
| 27 | + '-default': 'endTagOther' |
| 28 | +} |
| 29 | + |
| 30 | +exports.Phase = function InSelectInTablePhase(parser, tree) { |
| 31 | + Phase.call(this, parser, tree); |
| 32 | + this.start_tag_handlers = start_tag_handlers; |
| 33 | + this.end_tag_handlers = end_tag_handlers; |
| 34 | + this.name = 'in_select_in_table'; |
| 35 | +} |
| 36 | + |
| 37 | +var p = exports.Phase.prototype = new Phase; |
| 38 | + |
| 39 | +p.processCharacters = function(data) { |
| 40 | + new inSelect(this.parser, this.tree).processCharacters(data) |
| 41 | +} |
| 42 | + |
| 43 | +p.startTagTable = function(name, attributes) { |
| 44 | + this.parse_error("unexpected-table-element-start-tag-in-select-in-table", {name: name}) |
| 45 | + this.endTagOther("select") |
| 46 | + this.parser.phase.processStartTag(name, attributes) |
| 47 | +} |
| 48 | + |
| 49 | +p.startTagOther = function(name, attributes) { |
| 50 | + new inSelect(this.parser, this.tree).processStartTag(name, attributes) |
| 51 | +} |
| 52 | + |
| 53 | +p.endTagTable = function(name) { |
| 54 | + this.parse_error("unexpected-table-element-end-tag-in-select-in-table", {name: name}) |
| 55 | + if(this.tree.elementInScope(name, true)) { |
| 56 | + this.endTagOther("select") |
| 57 | + this.parser.phase.processEndTag(name) |
| 58 | + } |
| 59 | +} |
| 60 | + |
| 61 | +p.endTagOther = function(name) { |
| 62 | + new inSelect(this.parser, this.tree).processEndTag(name) |
| 63 | +} |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_select_in_table_phase.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 64 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_select_phase.js |
— | — | @@ -0,0 +1,104 @@ |
| 2 | +var Phase = require('./phase').Phase; |
| 3 | + |
| 4 | +var starts = { |
| 5 | + html: 'startTagHtml', |
| 6 | + option: 'startTagOption', |
| 7 | + optgroup: 'startTagOptgroup', |
| 8 | + select: 'startTagSelect', |
| 9 | + '-default': 'startTagOther', |
| 10 | +} |
| 11 | + |
| 12 | +var ends = { |
| 13 | + option: 'endTagOption', |
| 14 | + optgroup: 'endTagOptgroup', |
| 15 | + select: 'endTagSelect', |
| 16 | + caption: 'endTagTableElements', |
| 17 | + table: 'endTagTableElements', |
| 18 | + tbody: 'endTagTableElements', |
| 19 | + tfoot: 'endTagTableElements', |
| 20 | + thead: 'endTagTableElements', |
| 21 | + tr: 'endTagTableElements', |
| 22 | + td: 'endTagTableElements', |
| 23 | + th: 'endTagTableElements', |
| 24 | + '-default': 'endTagOther', |
| 25 | +} |
| 26 | + |
| 27 | +exports.Phase = function InSelectPhase(parser, tree) { |
| 28 | + Phase.call(this, parser, tree); |
| 29 | + this.start_tag_handlers = starts; |
| 30 | + this.end_tag_handlers = ends; |
| 31 | +} |
| 32 | + |
| 33 | +var p = exports.Phase.prototype = new Phase; |
| 34 | + |
| 35 | +p.processCharacters = function(data) { |
| 36 | + this.tree.insert_text(data); |
| 37 | +} |
| 38 | + |
| 39 | +p.startTagOption = function(name, attributes) { |
| 40 | + // we need to imply </option> if <option> is the current node |
| 41 | + if(this.tree.open_elements.last().tagName.toLowerCase() == 'option') this.tree.pop_element(); |
| 42 | + this.tree.insert_element(name, attributes); |
| 43 | +} |
| 44 | + |
| 45 | +p.startTagOptgroup = function(name, attributes) { |
| 46 | + if(this.tree.open_elements.last().tagName.toLowerCase() == 'option') this.tree.pop_element(); |
| 47 | + if(this.tree.open_elements.last().tagName.toLowerCase() == 'optgroup') this.tree.pop_element(); |
| 48 | + this.tree.insert_element(name, attributes); |
| 49 | +} |
| 50 | + |
| 51 | +p.endTagOption = function(name) { |
| 52 | + if(this.tree.open_elements.last().tagName.toLowerCase() == 'option') { |
| 53 | + this.tree.pop_element(); |
| 54 | + } else { |
| 55 | + this.parse_error('unexpected-end-tag-in-select', {name: 'option'}); |
| 56 | + } |
| 57 | +} |
| 58 | + |
| 59 | +p.endTagOptgroup = function(name) { |
| 60 | + // </optgroup> implicitly closes <option> |
| 61 | + if(this.tree.open_elements.last().tagName.toLowerCase() == 'option' && this.tree.open_elements[this.tree.open_elements.length - 2].tagName.toLowerCase() == 'optgroup') { |
| 62 | + this.tree.pop_element(); |
| 63 | + } |
| 64 | + |
| 65 | + // it also closes </optgroup> |
| 66 | + if(this.tree.open_elements.last().tagName.toLowerCase() == 'optgroup') { |
| 67 | + this.tree.pop_element(); |
| 68 | + } else { |
| 69 | + // But nothing else |
| 70 | + this.parse_error('unexpected-end-tag-in-select', {name: 'optgroup'}); |
| 71 | + } |
| 72 | +} |
| 73 | + |
| 74 | +p.startTagSelect = function(name) { |
| 75 | + this.parse_error("unexpected-select-in-select"); |
| 76 | + this.endTagSelect('select'); |
| 77 | +} |
| 78 | + |
| 79 | +p.endTagSelect = function(name) { |
| 80 | + if(this.inScope('select', true)) { |
| 81 | + this.tree.remove_open_elements_until('select'); |
| 82 | + this.parser.reset_insertion_mode(this.tree.open_elements.last()); |
| 83 | + } else { |
| 84 | + // inner_html case |
| 85 | + this.parse_error(); |
| 86 | + } |
| 87 | +} |
| 88 | + |
| 89 | +p.endTagTableElements = function(name) { |
| 90 | + this.parse_error('unexpected-end-tag-in-select', {name: name}); |
| 91 | + |
| 92 | + if(this.inScope(name, true)) { |
| 93 | + this.endTagSelect('select'); |
| 94 | + this.parser.phase.processEndTag(name); |
| 95 | + } |
| 96 | +} |
| 97 | + |
| 98 | +p.startTagOther = function(name, attributes) { |
| 99 | + this.parse_error("unexpected-start-tag-in-select", {name: name}) |
| 100 | +} |
| 101 | + |
| 102 | +p.endTagOther = function(name) { |
| 103 | + this.parse_error('unexpected-end-tag-in-select', {name: name}); |
| 104 | +} |
| 105 | + |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_select_phase.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 106 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_cell_phase.js |
— | — | @@ -0,0 +1,102 @@ |
| 2 | +var Phase = require('./phase').Phase; |
| 3 | +var inBody = require('./in_body_phase').Phase; |
| 4 | + |
| 5 | +var starts = { |
| 6 | + html: 'startTagHtml', |
| 7 | + caption: 'startTagTableOther', |
| 8 | + col: 'startTagTableOther', |
| 9 | + colgroup: 'startTagTableOther', |
| 10 | + tbody: 'startTagTableOther', |
| 11 | + td: 'startTagTableOther', |
| 12 | + tfoot: 'startTagTableOther', |
| 13 | + th: 'startTagTableOther', |
| 14 | + thead: 'startTagTableOther', |
| 15 | + tr: 'startTagTableOther', |
| 16 | + '-default': 'startTagOther', |
| 17 | +} |
| 18 | + |
| 19 | +var ends = { |
| 20 | + td: 'endTagTableCell', |
| 21 | + th: 'endTagTableCell', |
| 22 | + body: 'endTagIgnore', |
| 23 | + caption: 'endTagIgnore', |
| 24 | + col: 'endTagIgnore', |
| 25 | + colgroup: 'endTagIgnore', |
| 26 | + html: 'endTagIgnore', |
| 27 | + table: 'endTagImply', |
| 28 | + tbody: 'endTagImply', |
| 29 | + tfoot: 'endTagImply', |
| 30 | + thead: 'endTagImply', |
| 31 | + tr: 'endTagImply', |
| 32 | + '-default': 'endTagOther', |
| 33 | +} |
| 34 | + |
| 35 | +exports.Phase = function InCellPhase(parser, tree) { |
| 36 | + Phase.call(this, parser, tree); |
| 37 | + this.start_tag_handlers = starts; |
| 38 | + this.end_tag_handlers = ends; |
| 39 | +} |
| 40 | + |
| 41 | +exports.Phase.prototype = new Phase; |
| 42 | + |
| 43 | +p = exports.Phase.prototype; |
| 44 | + |
| 45 | +p.processCharacters = function(data) { |
| 46 | + new inBody(this.parser, this.tree).processCharacters(data); |
| 47 | +} |
| 48 | + |
| 49 | +p.startTagTableOther = function(name, attributes) { |
| 50 | + if(this.inScope('td', true) || this.inScope('th', true)) { |
| 51 | + this.closeCell(); |
| 52 | + this.parser.phase.processStartTag(name, attributes); |
| 53 | + } else { |
| 54 | + // inner_html case |
| 55 | + this.parse_error(); |
| 56 | + } |
| 57 | +} |
| 58 | + |
| 59 | +p.startTagOther = function(name, attributes) { |
| 60 | + new inBody(this.parser, this.tree).processStartTag(name, attributes); |
| 61 | +} |
| 62 | + |
| 63 | +p.endTagTableCell = function(name) { |
| 64 | + if(this.inScope(name, true)) { |
| 65 | + this.tree.generateImpliedEndTags(name); |
| 66 | + if(this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() != name.toLowerCase()) { |
| 67 | + this.parse_error('unexpected-cell-end-tag', {name: name}); |
| 68 | + this.tree.remove_open_elements_until(name); |
| 69 | + } else { |
| 70 | + this.tree.pop_element(); |
| 71 | + } |
| 72 | + this.tree.clearActiveFormattingElements(); |
| 73 | + this.parser.newPhase('inRow'); |
| 74 | + } else { |
| 75 | + this.parse_error('unexpected-end-tag', {name: name}); |
| 76 | + } |
| 77 | +} |
| 78 | + |
| 79 | +p.endTagIgnore = function(name) { |
| 80 | + this.parse_error('unexpected-end-tag', {name: name}); |
| 81 | +} |
| 82 | + |
| 83 | +p.endTagImply = function(name) { |
| 84 | + if(this.inScope(name, true)) { |
| 85 | + this.closeCell(); |
| 86 | + this.parser.phase.processEndTag(name); |
| 87 | + } else { |
| 88 | + // sometimes inner_html case |
| 89 | + this.parse_error |
| 90 | + } |
| 91 | +} |
| 92 | + |
| 93 | +p.endTagOther = function(name) { |
| 94 | + new inBody(this.parser, this.tree).processEndTag(name); |
| 95 | +} |
| 96 | + |
| 97 | +p.closeCell = function() { |
| 98 | + if(this.inScope('td', true)) { |
| 99 | + this.endTagTableCell('td'); |
| 100 | + } else if(this.inScope('th', true)) { |
| 101 | + this.endTagTableCell('th'); |
| 102 | + } |
| 103 | +} |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_cell_phase.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 104 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/after_after_frameset_phase.js |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/after_after_frameset_phase.js |
___________________________________________________________________ |
Added: svn:eol-style |
2 | 105 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_foreign_content_phase.js |
— | — | @@ -0,0 +1,58 @@ |
| 2 | +var Phase = require('./phase').Phase; |
| 3 | + |
| 4 | +var starts = { |
| 5 | + '-default': 'startTagOther' |
| 6 | +} |
| 7 | + |
| 8 | +var ends = { |
| 9 | + '-default': 'endTagOther' |
| 10 | +} |
| 11 | + |
| 12 | +exports.Phase = function InForeignContentPhase(parser, tree) { |
| 13 | + Phase.call(this, parser, tree); |
| 14 | + this.name = 'in_foreign_content_phase'; |
| 15 | + this.start_tag_handlers = starts; |
| 16 | + this.end_tag_handlers = ends; |
| 17 | +} |
| 18 | + |
| 19 | +var p = exports.Phase.prototype = new Phase; |
| 20 | + |
| 21 | +p.startTagOther = function(name, attributes, self_closing) { |
| 22 | + if(['mglyph', 'malignmark'].indexOf(name) != -1 |
| 23 | + && ['mi', 'mo', 'mn', 'ms', 'mtext'].indexOf(this.tree.open_elements[this.tree.open_elements.length - 1].tagName) != -1 |
| 24 | + && this.tree.open_elements[this.tree.open_elements.length - 1].namespace == 'math') { |
| 25 | + this.parser.secondary_phase.processStartTag(name, attributes); |
| 26 | + if(this.parser.phase == 'inForeignContent') { |
| 27 | + if(this.tree.open_elements.any(function(e) { return e.namespace })) { |
| 28 | + this.parser.phase = this.parser.secondary_phase; |
| 29 | + } |
| 30 | + } |
| 31 | + } else if(['b', 'big', 'blockquote', 'body', 'br', 'center', 'code', 'dd', 'div', 'dl', 'dt', 'em', 'embed', 'font', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'hr', 'i', 'img', 'li', 'listing', 'menu', 'meta', 'nobr', 'ol', 'p', 'pre', 'ruby', 's', 'small', 'span', 'strong', 'strike', 'sub', 'sup', 'table', 'tt', 'u', 'ul', 'var'].indexOf(name) != -1) { |
| 32 | + this.parse_error('html-in-foreign-content', {name: name}); |
| 33 | + while(this.tree.open_elements[this.tree.open_elements.length - 1].namespace) { |
| 34 | + this.tree.open_elements.pop(); |
| 35 | + } |
| 36 | + this.parser.phase = this.parser.secondary_phase; |
| 37 | + this.parser.phase.processStartTag(name, attributes); |
| 38 | + } else { |
| 39 | + if(this.tree.open_elements[this.tree.open_elements.length - 1].namespace == 'math') { |
| 40 | + attributes = this.adjust_mathml_attributes(attributes) |
| 41 | + } |
| 42 | + attributes = this.adjust_foreign_attributes(attributes) |
| 43 | + this.tree.insert_foreign_element(name, attributes, this.tree.open_elements[this.tree.open_elements.length - 1].namespace); |
| 44 | + if(self_closing) this.tree.open_elements.pop() |
| 45 | + } |
| 46 | +} |
| 47 | + |
| 48 | +p.endTagOther = function(name) { |
| 49 | + this.parser.secondary_phase.processEndTag(name) |
| 50 | + if(this.parser.phase == 'inForeignContent') { |
| 51 | + if(this.tree.open_elements.any(function(e) { return e.namespace })) { |
| 52 | + this.parser.phase = this.parser.secondary_phase; |
| 53 | + } |
| 54 | + } |
| 55 | +} |
| 56 | + |
| 57 | +p.processCharacters = function(characters) { |
| 58 | + this.tree.insert_text(characters); |
| 59 | +} |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_foreign_content_phase.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 60 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_table_phase.js |
— | — | @@ -0,0 +1,129 @@ |
| 2 | +var Phase = require('./phase').Phase; |
| 3 | +var inBody = require('./in_body_phase').Phase; |
| 4 | +var HTML5 = require('../../html5') |
| 5 | + |
| 6 | +var start_tag_handlers = { |
| 7 | + html: 'startTagHtml', |
| 8 | + caption: 'startTagCaption', |
| 9 | + colgroup: 'startTagColgroup', |
| 10 | + col: 'startTagCol', |
| 11 | + table: 'startTagTable', |
| 12 | + tbody: 'startTagRowGroup', |
| 13 | + tfoot: 'startTagRowGroup', |
| 14 | + thead: 'startTagRowGroup', |
| 15 | + td: 'startTagImplyTbody', |
| 16 | + th: 'startTagImplyTbody', |
| 17 | + tr: 'startTagImplyTbody', |
| 18 | + '-default': 'startTagOther', |
| 19 | +} |
| 20 | + |
| 21 | +var end_tag_handlers = { |
| 22 | + table: 'endTagTable', |
| 23 | + body: 'endTagIgnore', |
| 24 | + caption: 'endTagIgnore', |
| 25 | + col: 'endTagIgnore', |
| 26 | + colgroup: 'endTagIgnore', |
| 27 | + html: 'endTagIgnore', |
| 28 | + tbody: 'endTagIgnore', |
| 29 | + td: 'endTagIgnore', |
| 30 | + tfoot: 'endTagIgnore', |
| 31 | + th: 'endTagIgnore', |
| 32 | + thead: 'endTagIgnore', |
| 33 | + tr: 'endTagIgnore', |
| 34 | + '-default': 'endTagOther', |
| 35 | +} |
| 36 | + |
| 37 | +exports.Phase = p = function InTablePhase(parser, tree) { |
| 38 | + Phase.call(this, parser, tree); |
| 39 | + this.start_tag_handlers = start_tag_handlers; |
| 40 | + this.end_tag_handlers = end_tag_handlers; |
| 41 | +}; |
| 42 | + |
| 43 | +p.prototype = new Phase; |
| 44 | + |
| 45 | +p.prototype.processCharacters = function(data) { |
| 46 | + this.parse_error("unexpected-char-implies-table-voodoo"); |
| 47 | + this.tree.insert_from_table = true; |
| 48 | + new inBody(this.parser, this.tree).processCharacters(data); |
| 49 | + this.tree.insert_from_table = false; |
| 50 | +} |
| 51 | + |
| 52 | +p.prototype.startTagCaption = function(name, attributes) { |
| 53 | + this.clearStackToTableContext(); |
| 54 | + this.tree.activeFormattingElements.push(HTML5.Marker); |
| 55 | + this.tree.insert_element(name, attributes); |
| 56 | + this.parser.newPhase('inCaption'); |
| 57 | +} |
| 58 | + |
| 59 | +p.prototype.startTagColgroup = function(name, attributes) { |
| 60 | + this.clearStackToTableContext(); |
| 61 | + this.tree.insert_element(name, attributes); |
| 62 | + this.parser.newPhase('inColumnGroup'); |
| 63 | +} |
| 64 | + |
| 65 | +p.prototype.startTagCol = function(name, attributes) { |
| 66 | + this.startTagColgroup('colgroup', {}); |
| 67 | + this.parser.phase.processStartTag(name, attributes); |
| 68 | +} |
| 69 | + |
| 70 | +p.prototype.startTagRowGroup = function(name, attributes) { |
| 71 | + this.clearStackToTableContext(); |
| 72 | + this.tree.insert_element(name, attributes); |
| 73 | + this.parser.newPhase('inTableBody'); |
| 74 | +} |
| 75 | + |
| 76 | +p.prototype.startTagImplyTbody = function(name, attributes) { |
| 77 | + this.startTagRowGroup('tbody', {}); |
| 78 | + this.parser.phase.processStartTag(name, attributes); |
| 79 | +} |
| 80 | + |
| 81 | +p.prototype.startTagTable = function(name, attributes) { |
| 82 | + this.parse_error("unexpected-start-tag-implies-end-tag", |
| 83 | + {startName: "table", endName: "table"}); |
| 84 | + this.parser.phase.processEndTag('table'); |
| 85 | + if(!this.parser.inner_html) this.parser.phase.processStartTag(name, attributes); |
| 86 | +} |
| 87 | + |
| 88 | +p.prototype.startTagOther = function(name, attributes) { |
| 89 | + this.parse_error("unexpected-start-tag-implies-table-voodoo", {name: name}); |
| 90 | + this.tree.insert_from_table = true; |
| 91 | + new inBody(this.parser, this.tree).processStartTag(name, attributes); |
| 92 | + this.tree.insert_from_table = false; |
| 93 | +} |
| 94 | + |
| 95 | +p.prototype.endTagTable = function(name) { |
| 96 | + if(this.inScope(name, true)) { |
| 97 | + this.tree.generateImpliedEndTags(); |
| 98 | + if(this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() != name) { |
| 99 | + this.parse_error("end-tag-too-early-named", {gotName: 'table', expectedName: this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase()}); |
| 100 | + } |
| 101 | + |
| 102 | + this.tree.remove_open_elements_until('table'); |
| 103 | + this.parser.reset_insertion_mode(this.tree.open_elements[this.tree.open_elements.length - 1]); |
| 104 | + } else { |
| 105 | + assert.ok(this.parser.inner_html); |
| 106 | + this.parse_error(); |
| 107 | + } |
| 108 | +} |
| 109 | + |
| 110 | +p.prototype.endTagIgnore = function(name) { |
| 111 | + this.parse_error("unexpected-end-tag", {name: name}); |
| 112 | +} |
| 113 | + |
| 114 | +p.prototype.endTagOther = function(name) { |
| 115 | + this.parse_error("unexpected-end-tag-implies-table-voodoo", {name: name}) |
| 116 | + // Make all the special element rearranging voodoo kick in |
| 117 | + this.tree.insert_from_table = true |
| 118 | + // Process the end tag in the "in body" mode |
| 119 | + new inBody(this.parser, this.tree).processEndTag(name) |
| 120 | + this.tree.insert_from_table = false |
| 121 | +} |
| 122 | + |
| 123 | +p.prototype.clearStackToTableContext = function() { |
| 124 | + var name; |
| 125 | + while(name = this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase(), (name != 'table' && name != 'html')) { |
| 126 | + this.parse_error("unexpected-implied-end-tag-in-table", {name: name}) |
| 127 | + this.tree.pop_element() |
| 128 | + } |
| 129 | + // When the current node is <html> it's an inner_html case |
| 130 | +} |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_table_phase.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 131 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/after_body_phase.js |
— | — | @@ -0,0 +1,51 @@ |
| 2 | +var Phase = require('./phase').Phase; |
| 3 | + |
| 4 | +var end_tag_handlers = { |
| 5 | + html: 'endTagHtml', |
| 6 | + '-default': 'endTagOther', |
| 7 | +} |
| 8 | + |
| 9 | +exports.Phase = p = function AfterBodyPhase(parser, tree) { |
| 10 | + Phase.call(this, parser, tree); |
| 11 | + this.end_tag_handlers = end_tag_handlers; |
| 12 | +} |
| 13 | + |
| 14 | +p.prototype = new Phase; |
| 15 | + |
| 16 | +p.prototype.processComment = function(data) { |
| 17 | + // This is needed because data is to be appended to the html element here |
| 18 | + // and not to whatever is currently open. |
| 19 | + this.tree.insert_comment(data, this.tree.open_elements[0]); |
| 20 | +} |
| 21 | + |
| 22 | +p.prototype.processCharacters = function(data) { |
| 23 | + this.parse_error('unexpected-char-after-body') |
| 24 | + this.parser.newPhase('inBody') |
| 25 | + this.parser.phase.processCharacters(data) |
| 26 | +} |
| 27 | + |
| 28 | +p.prototype.processStartTag = function(name, attributes, self_closing) { |
| 29 | + this.parse_error('unexpected-start-tag-after-body', {name: name}); |
| 30 | + this.parser.newPhase('inBody'); |
| 31 | + this.parser.phase.processStartTag(name, attributes, self_closing); |
| 32 | +} |
| 33 | + |
| 34 | +p.prototype.endTagHtml = function(name) { |
| 35 | + if(this.parser.inner_html) { |
| 36 | + this.parse_error('end-html-in-innerhtml'); |
| 37 | + } else { |
| 38 | + // XXX This may need to be done, not sure |
| 39 | + // Don't set last_phase to the current phase but to the inBody phase |
| 40 | + // instead. No need for extra parse_errors if there's something after |
| 41 | + // </html>. |
| 42 | + // Try <!doctype html>X</html>X for instance |
| 43 | + this.parser.last_phase = this.parser.phase; |
| 44 | + this.parser.newPhase('afterAfterBody'); |
| 45 | + } |
| 46 | +} |
| 47 | + |
| 48 | +p.prototype.endTagOther = function(name) { |
| 49 | + this.parse_error('unexpected-end-tag-after-body', {name: name}); |
| 50 | + this.parser.newPhase('inBody'); |
| 51 | + this.parser.phase.processEndTag(name); |
| 52 | +} |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/after_body_phase.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 53 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/phase.js |
— | — | @@ -0,0 +1,161 @@ |
| 2 | +var HTML5 = require('../parser').HTML5; |
| 3 | +var assert = require('assert'); |
| 4 | + |
| 5 | +exports.Phase = function Phase(parser, tree) { |
| 6 | + this.tree = tree; |
| 7 | + this.parser = parser; |
| 8 | + this.end_tag_handlers = {"-default": 'endTagOther'}; |
| 9 | + this.start_tag_handlers = {"-default": 'startTagOther'}; |
| 10 | +} |
| 11 | + |
| 12 | +exports.Phase.prototype = { |
| 13 | + parse_error: function(code, options) { |
| 14 | + this.parser.parse_error(code, options); |
| 15 | + }, |
| 16 | + processEOF: function() { |
| 17 | + this.tree.generateImpliedEndTags(); |
| 18 | + if(this.tree.open_elements.length > 2) { |
| 19 | + this.parse_error('expected-closing-tag-but-got-eof'); |
| 20 | + } else if(this.tree.open_elements.length == 2 |
| 21 | + && this.tree.open_elements[1].tagName.toLowerCase() != 'body') { |
| 22 | + // This happens for framesets or something? |
| 23 | + this.parse_error('expected-closing-tag-but-got-eof'); |
| 24 | + } else if(this.parser.inner_html && this.tree.open_elements.length > 1) { |
| 25 | + // XXX This is not what the specification says. Not sure what to do here. |
| 26 | + this.parse_error('eof-in-innerhtml'); |
| 27 | + } |
| 28 | + }, |
| 29 | + processComment: function(data) { |
| 30 | + // For most phases the following is correct. Where it's not it will be |
| 31 | + // overridden. |
| 32 | + this.tree.insert_comment(data, this.tree.open_elements.last()); |
| 33 | + }, |
| 34 | + processDoctype: function(name, publicId, systemId, correct) { |
| 35 | + this.parse_error('unexpected-doctype'); |
| 36 | + }, |
| 37 | + processSpaceCharacters: function(data) { |
| 38 | + this.tree.insert_text(data); |
| 39 | + }, |
| 40 | + processStartTag: function(name, attributes, self_closing) { |
| 41 | + if(this[this.start_tag_handlers[name]]) { |
| 42 | + this[this.start_tag_handlers[name]](name, attributes, self_closing); |
| 43 | + } else if(this[this.start_tag_handlers["-default"]]) { |
| 44 | + this[this.start_tag_handlers["-default"]](name, attributes, self_closing); |
| 45 | + } else { |
| 46 | + throw(new Error("No handler found for "+name)); |
| 47 | + } |
| 48 | + }, |
| 49 | + processEndTag: function(name) { |
| 50 | + if(this[this.end_tag_handlers[name]]) { |
| 51 | + this[this.end_tag_handlers[name]](name); |
| 52 | + } else if(this[this.end_tag_handlers["-default"]]) { |
| 53 | + this[this.end_tag_handlers["-default"]](name); |
| 54 | + } else { |
| 55 | + throw(new Error("No handler found for "+name)); |
| 56 | + } |
| 57 | + }, |
| 58 | + inScope: function(name, treeVariant) { |
| 59 | + return this.tree.elementInScope(name, treeVariant); |
| 60 | + }, |
| 61 | + startTagHtml: function(name, attributes) { |
| 62 | + if(this.parser.first_start_tag == false && name == 'html') { |
| 63 | + this.parse_error('non-html-root') |
| 64 | + } |
| 65 | + // XXX Need a check here to see if the first start tag token emitted is this token. . . if it's not, invoke parse_error. |
| 66 | + for(var i = 0; i < attributes.length; i++) { |
| 67 | + if(!this.tree.open_elements[0].getAttribute(attributes[i].nodeName)) { |
| 68 | + this.tree.open_elements[0].setAttribute(attributes[i].nodeName, attributes[i].nodeValue) |
| 69 | + } |
| 70 | + } |
| 71 | + this.parser.first_start_tag = false; |
| 72 | + }, |
| 73 | + adjust_mathml_attributes: function(attributes) { |
| 74 | + return attributes.map(function(a) { |
| 75 | + if(a[0] =='definitionurl') { |
| 76 | + return ['definitionURL', a[1]] |
| 77 | + } else { |
| 78 | + return a; |
| 79 | + } |
| 80 | + }); |
| 81 | + }, |
| 82 | + adjust_svg_attributes: function(attributes) { |
| 83 | + return attributes.map(function(a) { |
| 84 | + return SVGAttributeMap[a] ? SVGAttributeMap[a] : a; |
| 85 | + }); |
| 86 | + }, |
| 87 | + adjust_foreign_attributes: function (attributes) { |
| 88 | + for(var i = 0; i < attributes.length; i++) { |
| 89 | + if(attributes[i].nodeName.indexOf(':') != -1) { |
| 90 | + var t = attributes[i].nodeName.split(/:/); |
| 91 | + attributes[i].namespace = t[0]; |
| 92 | + attributes[i].nodeName = t[1]; |
| 93 | + } |
| 94 | + } |
| 95 | + return attributes; |
| 96 | + } |
| 97 | +} |
| 98 | + |
| 99 | +var SVGAttributeMap = { |
| 100 | + attributename: 'attributeName', |
| 101 | + attributetype: 'attributeType', |
| 102 | + basefrequency: 'baseFrequency', |
| 103 | + baseprofile: 'baseProfile', |
| 104 | + calcmode: 'calcMode', |
| 105 | + clippathunits: 'clipPathUnits', |
| 106 | + contentscripttype: 'contentScriptType', |
| 107 | + contentstyletype: 'contentStyleType', |
| 108 | + diffuseconstant: 'diffuseConstant', |
| 109 | + edgemode: 'edgeMode', |
| 110 | + externalresourcesrequired: 'externalResourcesRequired', |
| 111 | + filterres: 'filterRes', |
| 112 | + filterunits: 'filterUnits', |
| 113 | + glyphref: 'glyphRef', |
| 114 | + gradienttransform: 'gradientTransform', |
| 115 | + gradientunits: 'gradientUnits', |
| 116 | + kernelmatrix: 'kernelMatrix', |
| 117 | + kernelunitlength: 'kernelUnitLength', |
| 118 | + keypoints: 'keyPoints', |
| 119 | + keysplines: 'keySplines', |
| 120 | + keytimes: 'keyTimes', |
| 121 | + lengthadjust: 'lengthAdjust', |
| 122 | + limitingconeangle: 'limitingConeAngle', |
| 123 | + markerheight: 'markerHeight', |
| 124 | + markerunits: 'markerUnits', |
| 125 | + markerwidth: 'markerWidth', |
| 126 | + maskcontentunits: 'maskContentUnits', |
| 127 | + maskunits: 'maskUnits', |
| 128 | + numoctaves: 'numOctaves', |
| 129 | + pathlength: 'pathLength', |
| 130 | + patterncontentunits: 'patternContentUnits', |
| 131 | + patterntransform: 'patternTransform', |
| 132 | + patternunits: 'patternUnits', |
| 133 | + pointsatx: 'pointsAtX', |
| 134 | + pointsaty: 'pointsAtY', |
| 135 | + pointsatz: 'pointsAtZ', |
| 136 | + preservealpha: 'preserveAlpha', |
| 137 | + preserveaspectratio: 'preserveAspectRatio', |
| 138 | + primitiveunits: 'primitiveUnits', |
| 139 | + refx: 'refX', |
| 140 | + refy: 'refY', |
| 141 | + repeatcount: 'repeatCount', |
| 142 | + repeatdur: 'repeatDur', |
| 143 | + requiredextensions: 'requiredExtensions', |
| 144 | + requiredfeatures: 'requiredFeatures', |
| 145 | + specularconstant: 'specularConstant', |
| 146 | + specularexponent: 'specularExponent', |
| 147 | + spreadmethod: 'spreadMethod', |
| 148 | + startoffset: 'startOffset', |
| 149 | + stddeviation: 'stdDeviation', |
| 150 | + stitchtiles: 'stitchTiles', |
| 151 | + surfacescale: 'surfaceScale', |
| 152 | + systemlanguage: 'systemLanguage', |
| 153 | + tablevalues: 'tableValues', |
| 154 | + targetx: 'targetX', |
| 155 | + targety: 'targetY', |
| 156 | + textlength: 'textLength', |
| 157 | + viewbox: 'viewBox', |
| 158 | + viewtarget: 'viewTarget', |
| 159 | + xchannelselector: 'xChannelSelector', |
| 160 | + ychannelselector: 'yChannelSelector', |
| 161 | + zoomandpan: 'zoomAndPan' |
| 162 | +}; |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/phase.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 163 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/after_head_phase.js |
— | — | @@ -0,0 +1,84 @@ |
| 2 | +"use strict"; |
| 3 | +var Phase = require('./phase').Phase; |
| 4 | +var HTML5 = require('../../html5'); |
| 5 | + |
| 6 | +var start_tag_handlers = { |
| 7 | + html: 'startTagHtml', |
| 8 | + body: 'startTagBody', |
| 9 | + frameset: 'startTagFrameset', |
| 10 | + base: 'startTagFromHead', |
| 11 | + link: 'startTagFromHead', |
| 12 | + meta: 'startTagFromHead', |
| 13 | + script: 'startTagFromHead', |
| 14 | + style: 'startTagFromHead', |
| 15 | + title: 'startTagFromHead', |
| 16 | + "-default": 'startTagOther', |
| 17 | +}; |
| 18 | + |
| 19 | +var end_tag_handlers = { |
| 20 | + body: 'endTagBodyHtmlBr', |
| 21 | + html: 'endTagBodyHtmlBr', |
| 22 | + br: 'endTagBodyHtmlBr', |
| 23 | + "-default": 'endTagOther', |
| 24 | +}; |
| 25 | + |
| 26 | +exports.Phase = p = function AfterHeadPhase(parser, tree) { |
| 27 | + Phase.call(this, parser, tree); |
| 28 | + this.start_tag_handlers = start_tag_handlers; |
| 29 | + this.end_tag_handlers = end_tag_handlers; |
| 30 | + |
| 31 | + this.name = 'after_head_phase'; |
| 32 | +} |
| 33 | + |
| 34 | +p.prototype = new Phase; |
| 35 | + |
| 36 | + |
| 37 | +p.prototype.processEOF = function() { |
| 38 | + this.anything_else(); |
| 39 | + this.parser.phase.processEOF(); |
| 40 | +} |
| 41 | + |
| 42 | +p.prototype.processCharacters = function(data) { |
| 43 | + this.anything_else(); |
| 44 | + this.parser.phase.processCharacters(data); |
| 45 | +} |
| 46 | + |
| 47 | +p.prototype.startTagBody = function(name, attributes) { |
| 48 | + this.tree.insert_element(name, attributes); |
| 49 | + this.parser.newPhase('inBody'); |
| 50 | +} |
| 51 | + |
| 52 | +p.prototype.startTagFrameset = function(name, attributes) { |
| 53 | + this.tree.insert_element(name, attributes); |
| 54 | + this.parser.newPhase('inFrameset'); |
| 55 | +} |
| 56 | + |
| 57 | +p.prototype.startTagFromHead = function(name, attributes) { |
| 58 | + this.parse_error("unexpected-start-tag-out-of-my-head", {name: name}); |
| 59 | + this.parser.newPhase('inHead'); |
| 60 | + this.parser.phase.processStartTag(name, attributes); |
| 61 | +} |
| 62 | + |
| 63 | +p.prototype.startTagOther = function(name, attributes) { |
| 64 | + this.anything_else(); |
| 65 | + this.parser.phase.processStartTag(name, attributes); |
| 66 | +} |
| 67 | + |
| 68 | +p.prototype.endTagBodyHtmlBr = function(name) { |
| 69 | + this.anything_else(); |
| 70 | + this.parser.phase.processEndTag(name); |
| 71 | +} |
| 72 | + |
| 73 | +p.prototype.endTagOther = function(name) { |
| 74 | + this.parse_error('unexpected-end-tag', {name: name}); |
| 75 | +} |
| 76 | + |
| 77 | +p.prototype.anything_else = function() { |
| 78 | + this.tree.insert_element('body', []); |
| 79 | + this.parser.newPhase('inBody'); |
| 80 | +} |
| 81 | + |
| 82 | +p.prototype.processEndTag = function(name) { |
| 83 | + this.anything_else() |
| 84 | + this.parser.phase.processEndTag(name) |
| 85 | +} |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/after_head_phase.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 86 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_caption_phase.js |
— | — | @@ -0,0 +1,94 @@ |
| 2 | +var Phase = require('./phase').Phase; |
| 3 | +var inBody = require('./in_body_phase').Phase; |
| 4 | +var assert = require('assert'); |
| 5 | + |
| 6 | +var starts = { |
| 7 | + html: 'startTagHtml', |
| 8 | + caption: 'startTagTableElement', |
| 9 | + col: 'startTagTableElement', |
| 10 | + colgroup: 'startTagTableElement', |
| 11 | + tbody: 'startTagTableElement', |
| 12 | + td: 'startTagTableElement', |
| 13 | + tfoot: 'startTagTableElement', |
| 14 | + thead: 'startTagTableElement', |
| 15 | + tr: 'startTagTableElement', |
| 16 | + '-default': 'startTagOther' |
| 17 | +} |
| 18 | + |
| 19 | +var ends = { |
| 20 | + caption: 'endTagCaption', |
| 21 | + table: 'endTagTable', |
| 22 | + body: 'endTagIgnore', |
| 23 | + col: 'endTagIgnore', |
| 24 | + colgroup: 'endTagIgnore', |
| 25 | + html: 'endTagIgnore', |
| 26 | + tbody: 'endTagIgnore', |
| 27 | + td: 'endTagIgnore', |
| 28 | + tfood: 'endTagIgnore', |
| 29 | + thead: 'endTagIgnore', |
| 30 | + tr: 'endTagIgnore', |
| 31 | + '-default': 'endTagOther' |
| 32 | +} |
| 33 | + |
| 34 | +exports.Phase = p = function InCaptionPhase(parser, tree) { |
| 35 | + Phase.call(this, parser, tree); |
| 36 | + this.start_tag_handlers = starts; |
| 37 | + this.end_tag_handlers = ends; |
| 38 | +} |
| 39 | + |
| 40 | +p.prototype = new Phase; |
| 41 | + |
| 42 | +p.prototype.ignoreEndTagCaption = function() { |
| 43 | + return !this.inScope('caption', true); |
| 44 | +} |
| 45 | + |
| 46 | +p.prototype.processCharacters = function(data) { |
| 47 | + new inBody(this.parser, this.tree).processCharacters(data); |
| 48 | +} |
| 49 | + |
| 50 | +p.prototype.startTagTableElement = function(name, attributes) { |
| 51 | + this.parse_error('unexpected-end-tag', {name: name}); |
| 52 | + var ignoreEndTag = this.ignoreEndTagCaption(); |
| 53 | + this.parser.phase.processEndTag('caption'); |
| 54 | + if(!ignoreEndTag) this.parser.phase.processStartTag(name, attributes) |
| 55 | +} |
| 56 | + |
| 57 | +p.prototype.startTagOther = function(name, attributes) { |
| 58 | + new inBody(this.parser, this.tree).processStartTag(name, attributes); |
| 59 | +} |
| 60 | + |
| 61 | +p.prototype.endTagCaption = function(name) { |
| 62 | + if(this.ignoreEndTagCaption()) { |
| 63 | + // inner_html case |
| 64 | + assert.ok(this.parser.inner_html); |
| 65 | + this.parse_error('unexpected-end-tag', {name: name}); |
| 66 | + } else { |
| 67 | + // AT this code is quite similar to endTagTable in inTable |
| 68 | + this.tree.generateImpliedEndTags(); |
| 69 | + if(this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() != 'caption') { |
| 70 | + this.parse_error('expected-one-end-tag-but-got-another', |
| 71 | + {gotName: "caption", expectedName: this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase()}); |
| 72 | + } |
| 73 | + |
| 74 | + this.tree.remove_open_elements_until('caption'); |
| 75 | + |
| 76 | + this.tree.clearActiveFormattingElements(); |
| 77 | + |
| 78 | + this.parser.newPhase('inTable'); |
| 79 | + } |
| 80 | +} |
| 81 | + |
| 82 | +p.prototype.endTagTable = function(name) { |
| 83 | + this.parse_error("unexpected-end-table-in-caption"); |
| 84 | + var ignoreEndTag = this.ignoreEndTagCaption(); |
| 85 | + this.parser.phase.processEndTag('caption') |
| 86 | + if(!ignoreEndTag) this.parser.phase.processEndTag(name); |
| 87 | +} |
| 88 | + |
| 89 | +p.prototype.endTagIgnore = function(name) { |
| 90 | + this.parse_error('unexpected-end-tag', {name: name}); |
| 91 | +} |
| 92 | + |
| 93 | +p.prototype.endTagOther = function(name) { |
| 94 | + new inBody(this.parser, this.tree).processEndTag(name); |
| 95 | +} |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_caption_phase.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 96 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_body_phase.js |
— | — | @@ -0,0 +1,722 @@ |
| 2 | +"use strict"; |
| 3 | +var HTML5 = require('../../html5'); |
| 4 | +var Phase = require('./phase').Phase; |
| 5 | +var assert = require('assert') |
| 6 | + |
| 7 | +var start_tag_handlers = { |
| 8 | + html: 'startTagHtml', |
| 9 | + head: 'startTagHead', |
| 10 | + base: 'startTagProcessInHead', |
| 11 | + link: 'startTagProcessInHead', |
| 12 | + meta: 'startTagProcessInHead', |
| 13 | + script: 'startTagProcessInHead', |
| 14 | + style: 'startTagProcessInHead', |
| 15 | + title: 'startTagProcessInHead', |
| 16 | + body: 'startTagBody', |
| 17 | + form: 'startTagForm', |
| 18 | + plaintext: 'startTagPlaintext', |
| 19 | + a: 'startTagA', |
| 20 | + button: 'startTagButton', |
| 21 | + xmp: 'startTagXmp', |
| 22 | + table: 'startTagTable', |
| 23 | + hr: 'startTagHr', |
| 24 | + image: 'startTagImage', |
| 25 | + input: 'startTagInput', |
| 26 | + textarea: 'startTagTextarea', |
| 27 | + select: 'startTagSelect', |
| 28 | + isindex: 'startTagIsindex', |
| 29 | + applet: 'startTagAppletMarqueeObject', |
| 30 | + marquee: 'startTagAppletMarqueeObject', |
| 31 | + object: 'startTagAppletMarqueeObject', |
| 32 | + li: 'startTagListItem', |
| 33 | + dd: 'startTagListItem', |
| 34 | + dt: 'startTagListItem', |
| 35 | + address: 'startTagCloseP', |
| 36 | + blockquote: 'startTagCloseP', |
| 37 | + center: 'startTagCloseP', |
| 38 | + dir: 'startTagCloseP', |
| 39 | + div: 'startTagCloseP', |
| 40 | + dl: 'startTagCloseP', |
| 41 | + fieldset: 'startTagCloseP', |
| 42 | + listing: 'startTagCloseP', |
| 43 | + menu: 'startTagCloseP', |
| 44 | + ol: 'startTagCloseP', |
| 45 | + p: 'startTagCloseP', |
| 46 | + pre: 'startTagCloseP', |
| 47 | + ul: 'startTagCloseP', |
| 48 | + b: 'startTagFormatting', |
| 49 | + big: 'startTagFormatting', |
| 50 | + em: 'startTagFormatting', |
| 51 | + font: 'startTagFormatting', |
| 52 | + i: 'startTagFormatting', |
| 53 | + s: 'startTagFormatting', |
| 54 | + small: 'startTagFormatting', |
| 55 | + strike: 'startTagFormatting', |
| 56 | + strong: 'startTagFormatting', |
| 57 | + tt: 'startTagFormatting', |
| 58 | + u: 'startTagFormatting', |
| 59 | + nobr: 'startTagNobr', |
| 60 | + area: 'startTagVoidFormatting', |
| 61 | + basefont: 'startTagVoidFormatting', |
| 62 | + bgsound: 'startTagVoidFormatting', |
| 63 | + br: 'startTagVoidFormatting', |
| 64 | + embed: 'startTagVoidFormatting', |
| 65 | + img: 'startTagVoidFormatting', |
| 66 | + param: 'startTagVoidFormatting', |
| 67 | + spacer: 'startTagVoidFormatting', |
| 68 | + wbr: 'startTagVoidFormatting', |
| 69 | + iframe: 'startTagCdata', |
| 70 | + noembed: 'startTagCdata', |
| 71 | + noframes: 'startTagCdata', |
| 72 | + noscript: 'startTagCdata', |
| 73 | + h1: 'startTagHeading', |
| 74 | + h2: 'startTagHeading', |
| 75 | + h3: 'startTagHeading', |
| 76 | + h4: 'startTagHeading', |
| 77 | + h5: 'startTagHeading', |
| 78 | + h6: 'startTagHeading', |
| 79 | + caption: 'startTagMisplaced', |
| 80 | + col: 'startTagMisplaced', |
| 81 | + colgroup: 'startTagMisplaced', |
| 82 | + frame: 'startTagMisplaced', |
| 83 | + frameset: 'startTagMisplaced', |
| 84 | + //head: 'startTagMisplaced', |
| 85 | + tbody: 'startTagMisplaced', |
| 86 | + td: 'startTagMisplaced', |
| 87 | + tfoot: 'startTagMisplaced', |
| 88 | + th: 'startTagMisplaced', |
| 89 | + thead: 'startTagMisplaced', |
| 90 | + tr: 'startTagMisplaced', |
| 91 | + option: 'startTagMisplaced', |
| 92 | + optgroup: 'startTagMisplaced', |
| 93 | + 'event-source': 'startTagNew', |
| 94 | + section: 'startTagNew', |
| 95 | + nav: 'startTagNew', |
| 96 | + article: 'startTagNew', |
| 97 | + aside: 'startTagNew', |
| 98 | + header: 'startTagNew', |
| 99 | + footer: 'startTagNew', |
| 100 | + datagrid: 'startTagNew', |
| 101 | + command: 'startTagNew', |
| 102 | + math: 'startTagMath', |
| 103 | + svg: 'startTagSVG', |
| 104 | + "-default": 'startTagOther', |
| 105 | +} |
| 106 | + |
| 107 | +var end_tag_handlers = { |
| 108 | + p: 'endTagP', |
| 109 | + body: 'endTagBody', |
| 110 | + html: 'endTagHtml', |
| 111 | + form: 'endTagForm', |
| 112 | + applet: 'endTagAppletButtonMarqueeObject', |
| 113 | + button: 'endTagAppletButtonMarqueeObject', |
| 114 | + marquee: 'endTagAppletButtonMarqueeObject', |
| 115 | + object: 'endTagAppletButtonMarqueeObject', |
| 116 | + dd: 'endTagListItem', |
| 117 | + dt: 'endTagListItem', |
| 118 | + li: 'endTagListItem', |
| 119 | + address: 'endTagBlock', |
| 120 | + blockquote: 'endTagBlock', |
| 121 | + center: 'endTagBlock', |
| 122 | + div: 'endTagBlock', |
| 123 | + dl: 'endTagBlock', |
| 124 | + fieldset: 'endTagBlock', |
| 125 | + listing: 'endTagBlock', |
| 126 | + menu: 'endTagBlock', |
| 127 | + ol: 'endTagBlock', |
| 128 | + pre: 'endTagBlock', |
| 129 | + ul: 'endTagBlock', |
| 130 | + h1: 'endTagHeading', |
| 131 | + h2: 'endTagHeading', |
| 132 | + h3: 'endTagHeading', |
| 133 | + h4: 'endTagHeading', |
| 134 | + h5: 'endTagHeading', |
| 135 | + h6: 'endTagHeading', |
| 136 | + a: 'endTagFormatting', |
| 137 | + b: 'endTagFormatting', |
| 138 | + big: 'endTagFormatting', |
| 139 | + em: 'endTagFormatting', |
| 140 | + font: 'endTagFormatting', |
| 141 | + i: 'endTagFormatting', |
| 142 | + nobr: 'endTagFormatting', |
| 143 | + s: 'endTagFormatting', |
| 144 | + small: 'endTagFormatting', |
| 145 | + strike: 'endTagFormatting', |
| 146 | + strong: 'endTagFormatting', |
| 147 | + tt: 'endTagFormatting', |
| 148 | + u: 'endTagFormatting', |
| 149 | + head: 'endTagMisplaced', |
| 150 | + frameset: 'endTagMisplaced', |
| 151 | + select: 'endTagMisplaced', |
| 152 | + optgroup: 'endTagMisplaced', |
| 153 | + option: 'endTagMisplaced', |
| 154 | + table: 'endTagMisplaced', |
| 155 | + caption: 'endTagMisplaced', |
| 156 | + colgroup: 'endTagMisplaced', |
| 157 | + col: 'endTagMisplaced', |
| 158 | + thead: 'endTagMisplaced', |
| 159 | + tfoot: 'endTagMisplaced', |
| 160 | + tbody: 'endTagMisplaced', |
| 161 | + tr: 'endTagMisplaced', |
| 162 | + td: 'endTagMisplaced', |
| 163 | + th: 'endTagMisplaced', |
| 164 | + br: 'endTagBr', |
| 165 | + area: 'endTagNone', |
| 166 | + basefont: 'endTagNone', |
| 167 | + bgsound: 'endTagNone', |
| 168 | + embed: 'endTagNone', |
| 169 | + hr: 'endTagNone', |
| 170 | + image: 'endTagNone', |
| 171 | + img: 'endTagNone', |
| 172 | + input: 'endTagNone', |
| 173 | + isindex: 'endTagNone', |
| 174 | + param: 'endTagNone', |
| 175 | + spacer: 'endTagNone', |
| 176 | + wbr: 'endTagNone', |
| 177 | + frame: 'endTagNone', |
| 178 | + noframes: 'endTagCdataTextAreaXmp', |
| 179 | + noscript: 'endTagCdataTextAreaXmp', |
| 180 | + noembed: 'endTagCdataTextAreaXmp', |
| 181 | + textarea: 'endTagCdataTextAreaXmp', |
| 182 | + xmp: 'endTagCdataTextAreaXmp', |
| 183 | + iframe: 'endTagCdataTextAreaXmp', |
| 184 | + 'event-source': 'endTagNew', |
| 185 | + section: 'endTagNew', |
| 186 | + nav: 'endTagNew', |
| 187 | + article: 'endTagNew', |
| 188 | + aside: 'endTagNew', |
| 189 | + header: 'endTagNew', |
| 190 | + footer: 'endTagNew', |
| 191 | + datagrid: 'endTagNew', |
| 192 | + command: 'endTagNew', |
| 193 | + "-default": 'endTagOther', |
| 194 | +} |
| 195 | + |
| 196 | +exports.Phase = p = function InBodyPhase(parser, tree) { |
| 197 | + Phase.call(this, parser, tree); |
| 198 | + this.start_tag_handlers = start_tag_handlers; |
| 199 | + this.end_tag_handlers = end_tag_handlers; |
| 200 | + this.name = 'in_body_phase'; |
| 201 | +} |
| 202 | + |
| 203 | +p.prototype = new Phase; |
| 204 | + |
| 205 | +p.prototype.processSpaceCharactersDropNewline = function(data) { |
| 206 | + this.dropNewline = false |
| 207 | + var lastTag = this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() |
| 208 | + if(data.length > 0 && data[0] == "\n" && ('pre' == lastTag || 'textarea' == lastTag) && !this.tree.open_elements[this.tree.open_elements.length - 1].hasChildNodes()) { |
| 209 | + data = data.slice(1) |
| 210 | + } |
| 211 | + |
| 212 | + if(data.length > 0) { |
| 213 | + this.tree.reconstructActiveFormattingElements() |
| 214 | + this.tree.insert_text(data) |
| 215 | + } |
| 216 | +} |
| 217 | + |
| 218 | +p.prototype.processSpaceCharacters = function(data) { |
| 219 | + if(this.dropNewline) { |
| 220 | + this.processSpaceCharactersDropNewline(data) |
| 221 | + } else { |
| 222 | + this.processSpaceCharactersNonPre(data) |
| 223 | + } |
| 224 | +} |
| 225 | + |
| 226 | +p.prototype.processSpaceCharactersNonPre = function(data) { |
| 227 | + this.tree.reconstructActiveFormattingElements(); |
| 228 | + this.tree.insert_text(data); |
| 229 | +} |
| 230 | + |
| 231 | +p.prototype.processCharacters = function(data) { |
| 232 | + // XXX The specification says to do this for every character at the moment, |
| 233 | + // but apparently that doesn't match the real world so we don't do it for |
| 234 | + // space characters. |
| 235 | + this.tree.reconstructActiveFormattingElements(); |
| 236 | + this.tree.insert_text(data); |
| 237 | +} |
| 238 | + |
| 239 | +p.prototype.startTagProcessInHead = function(name, attributes) { |
| 240 | + new PHASES.inHead(this.parser, this.tree).processStartTag(name, attributes); |
| 241 | +} |
| 242 | + |
| 243 | +p.prototype.startTagBody = function(name, attributes) { |
| 244 | + this.parse_error('unexpected-start-tag', {name: 'body'}); |
| 245 | + if(this.tree.open_elements.length == 1 |
| 246 | + || this.tree.open_elements[1].tagName.toLowerCase() != 'body') { |
| 247 | + assert.ok(this.parser.inner_html) |
| 248 | + } else { |
| 249 | + for(var i = 0; i < attributes.length; i++) { |
| 250 | + if(!this.tree.open_elements[1].getAttribute(attributes[i].nodeName)) { |
| 251 | + this.tree.open_elements[1].setAttribute(attributes[i].nodeName, attributes[i].nodeValue); |
| 252 | + } |
| 253 | + } |
| 254 | + } |
| 255 | +} |
| 256 | + |
| 257 | +p.prototype.startTagCloseP = function(name, attributes) { |
| 258 | + if(this.inScope('p')) this.endTagP('p'); |
| 259 | + this.tree.insert_element(name, attributes); |
| 260 | + if(name == 'pre') { |
| 261 | + this.dropNewline = true |
| 262 | + } |
| 263 | +} |
| 264 | + |
| 265 | +p.prototype.startTagForm = function(name, attributes) { |
| 266 | + if(this.tree.formPointer) { |
| 267 | + this.parse_error('unexpected-start-tag', {name: name}); |
| 268 | + } else { |
| 269 | + if(this.inScope('p')) this.endTagP('p'); |
| 270 | + this.tree.insert_element(name, attributes); |
| 271 | + this.tree.formPointer = this.tree.open_elements[this.tree.open_elements.length - 1]; |
| 272 | + } |
| 273 | +} |
| 274 | + |
| 275 | +p.prototype.startTagListItem = function(name, attributes) { |
| 276 | + if(this.inScope('p')) this.endTagP('p'); |
| 277 | + var stopNames = {li: ['li'], dd: ['dd', 'dt'], dt: ['dd', 'dt']}; |
| 278 | + var stopName = stopNames[name]; |
| 279 | + |
| 280 | + var els = this.tree.open_elements; |
| 281 | + for(var i = els.length - 1; i >= 0; i--) { |
| 282 | + var node = els[i]; |
| 283 | + if(stopName.indexOf(node.tagName.toLowerCase()) != -1) { |
| 284 | + var poppedNodes = []; |
| 285 | + while(els.length - 1 >= i) { |
| 286 | + poppedNodes.push(els.pop()); |
| 287 | + } |
| 288 | + if(poppedNodes.length >= 1) { |
| 289 | + this.parse_error(poppedNodes.length == 1 ? "missing-end-tag" : "missing-end-tags", |
| 290 | + {name: poppedNodes.slice(0).map(function (n) { return n.name }).join(', ')}); |
| 291 | + } |
| 292 | + break; |
| 293 | + } |
| 294 | + |
| 295 | + // Phrasing eliments are all non special, non scoping, non |
| 296 | + // formatting elements |
| 297 | + if(HTML5.SPECIAL_ELEMENTS.concat(HTML5.SCOPING_ELEMENTS).indexOf(node.tagName.toLowerCase()) != -1 && (node.tagName.toLowerCase() != 'address' && node.tagName.toLowerCase() != 'div')) break; |
| 298 | + } |
| 299 | + |
| 300 | + // Always insert an <li> element |
| 301 | + this.tree.insert_element(name, attributes); |
| 302 | +} |
| 303 | + |
| 304 | +p.prototype.startTagPlaintext = function(name, attributes) { |
| 305 | + if(this.inScope('p')) this.endTagP('p'); |
| 306 | + this.tree.insert_element(name, attributes); |
| 307 | + this.parser.tokenizer.content_model = HTML5.Models.PLAINTEXT; |
| 308 | +} |
| 309 | + |
| 310 | +p.prototype.startTagHeading = function(name, attributes) { |
| 311 | + if(this.inScope('p')) this.endTagP('p'); |
| 312 | + this.tree.insert_element(name, attributes); |
| 313 | +} |
| 314 | + |
| 315 | +p.prototype.startTagA = function(name, attributes) { |
| 316 | + var afeAElement; |
| 317 | + if(afeAElement = this.tree.elementInActiveFormattingElements('a')) { |
| 318 | + this.parse_error("unexpected-start-tag-implies-end-tag", {startName: "a", endName: "a"}); |
| 319 | + this.endTagFormatting('a'); |
| 320 | + var pos; |
| 321 | + pos = this.tree.open_elements.indexOf(afeAElement); |
| 322 | + if(pos != -1) this.tree.open_elements.splice(pos, 1); |
| 323 | + pos = this.tree.activeFormattingElements.indexOf(afeAElement); |
| 324 | + if(pos != -1) this.tree.activeFormattingElements.splice(pos, 1); |
| 325 | + } |
| 326 | + this.tree.reconstructActiveFormattingElements(); |
| 327 | + this.addFormattingElement(name, attributes); |
| 328 | +} |
| 329 | + |
| 330 | +p.prototype.startTagFormatting = function(name, attributes) { |
| 331 | + this.tree.reconstructActiveFormattingElements(); |
| 332 | + this.addFormattingElement(name, attributes); |
| 333 | +} |
| 334 | + |
| 335 | +p.prototype.startTagNobr = function(name, attributes) { |
| 336 | + this.tree.reconstructActiveFormattingElements(); |
| 337 | + if(this.inScope('nobr')) { |
| 338 | + this.parse_error("unexpected-start-tag-implies-end-tag", {startName: 'nobr', endName: 'nobr'}); |
| 339 | + this.processEndTag('nobr'); |
| 340 | + } |
| 341 | + this.addFormattingElement(name, attributes); |
| 342 | +} |
| 343 | + |
| 344 | +p.prototype.startTagButton = function(name, attributes) { |
| 345 | + if(this.inScope('button')) { |
| 346 | + this.parse_error('unexpected-start-tag-implies-end-tag', {startName: 'button', endName: 'button'}); |
| 347 | + this.processEndTag('button'); |
| 348 | + this.parser.phase.processStartTag(name, attributes); |
| 349 | + } else { |
| 350 | + this.tree.reconstructActiveFormattingElements(); |
| 351 | + this.tree.insert_element(name, attributes); |
| 352 | + this.tree.activeFormattingElements.push(HTML5.Marker); |
| 353 | + } |
| 354 | +} |
| 355 | + |
| 356 | +p.prototype.startTagAppletMarqueeObject = function(name, attributes) { |
| 357 | + this.tree.reconstructActiveFormattingElements(); |
| 358 | + this.tree.insert_element(name, attributes) |
| 359 | + this.tree.activeFormattingElements.push(HTML5.Marker); |
| 360 | +} |
| 361 | + |
| 362 | +p.prototype.endTagAppletButtonMarqueeObject = function(name) { |
| 363 | + if(this.inScope(name)) this.tree.generateImpliedEndTags() |
| 364 | + if(this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() != name) { |
| 365 | + this.parse_error('end-tag-too-early', {name: name}) |
| 366 | + } |
| 367 | + if(this.inScope(name)) { |
| 368 | + this.tree.remove_open_elements_until(name) |
| 369 | + this.tree.clearActiveFormattingElements() |
| 370 | + } |
| 371 | +} |
| 372 | + |
| 373 | +p.prototype.startTagXmp = function(name, attributes) { |
| 374 | + this.tree.reconstructActiveFormattingElements(); |
| 375 | + this.tree.insert_element(name, attributes); |
| 376 | + this.parser.tokenizer.content_model = HTML5.Models.CDATA; |
| 377 | +} |
| 378 | + |
| 379 | +p.prototype.startTagTable = function(name, attributes) { |
| 380 | + if(this.inScope('p')) this.processEndTag('p'); |
| 381 | + this.tree.insert_element(name, attributes); |
| 382 | + this.parser.newPhase('inTable'); |
| 383 | +} |
| 384 | + |
| 385 | +p.prototype.startTagVoidFormatting = function(name, attributes) { |
| 386 | + this.tree.reconstructActiveFormattingElements(); |
| 387 | + this.tree.insert_element(name, attributes); |
| 388 | + this.tree.pop_element(); |
| 389 | +} |
| 390 | + |
| 391 | +p.prototype.startTagHr = function(name, attributes) { |
| 392 | + if(this.inScope('p')) this.endTagP('p'); |
| 393 | + this.tree.insert_element(name, attributes); |
| 394 | + this.tree.pop_element(); |
| 395 | +} |
| 396 | + |
| 397 | +p.prototype.startTagImage = function(name, attributes) { |
| 398 | + // No, really... |
| 399 | + this.parse_error('unexpected-start-tag-treated-as', {originalName: 'image', newName: 'img'}); |
| 400 | + this.processStartTag('img', attributes); |
| 401 | +} |
| 402 | + |
| 403 | +p.prototype.startTagInput = function(name, attributes) { |
| 404 | + this.tree.reconstructActiveFormattingElements(); |
| 405 | + this.tree.insert_element(name, attributes); |
| 406 | + if(this.tree.formPointer) { |
| 407 | + // XXX Not sure what to do here |
| 408 | + } |
| 409 | + this.tree.pop_element(); |
| 410 | +} |
| 411 | + |
| 412 | +p.prototype.startTagIsindex = function(name, attributes) { |
| 413 | + this.parse_error('deprecated-tag', {name: 'isindex'}); |
| 414 | + if(this.tree.formPointer) return; |
| 415 | + this.processStartTag('form'); |
| 416 | + this.processStartTag('hr'); |
| 417 | + this.processStartTag('p'); |
| 418 | + this.processStartTag('label'); |
| 419 | + this.processCharacters("This is a searchable index. Insert your search keywords here: "); |
| 420 | + attributes.push({nodeName: 'name', nodeValue: 'isindex'}) |
| 421 | + this.processStartTag('input', attributes); |
| 422 | + this.processEndTag('label'); |
| 423 | + this.processEndTag('p'); |
| 424 | + this.processStartTag('hr'); |
| 425 | + this.processEndTag('form'); |
| 426 | +} |
| 427 | + |
| 428 | +p.prototype.startTagTextarea = function(name, attributes) { |
| 429 | + // XXX Form element pointer checking here as well... |
| 430 | + this.tree.insert_element(name, attributes) |
| 431 | + this.parser.tokenizer.content_model = HTML5.Models.RCDATA; |
| 432 | + this.dropNewline = true |
| 433 | +} |
| 434 | + |
| 435 | +p.prototype.startTagCdata = function(name, attributes) { |
| 436 | + this.tree.insert_element(name, attributes) |
| 437 | + this.parser.tokenizer.content_model = HTML5.Models.CDATA; |
| 438 | +} |
| 439 | + |
| 440 | +p.prototype.startTagSelect = function(name, attributes) { |
| 441 | + this.tree.reconstructActiveFormattingElements(); |
| 442 | + this.tree.insert_element(name, attributes); |
| 443 | + |
| 444 | + var phaseName = this.parser.phaseName; |
| 445 | + if(phaseName == 'inTable' || phaseName == 'inCaption' |
| 446 | + || phaseName == 'inColumnGroup' |
| 447 | + || phaseName == 'inTableBody' |
| 448 | + || phaseName == 'inRow' |
| 449 | + || phaseName == 'inCell') { |
| 450 | + this.parser.newPhase('inSelectInTable'); |
| 451 | + } else { |
| 452 | + this.parser.newPhase('inSelect'); |
| 453 | + } |
| 454 | +} |
| 455 | + |
| 456 | +p.prototype.startTagMisplaced = function(name, attributes) { |
| 457 | + this.parse_error('unexpected-start-tag-ignored', {name: name}); |
| 458 | +} |
| 459 | + |
| 460 | +p.prototype.endTagMisplaced = function(name) { |
| 461 | + // This handles elements with end tags in other insertion modes. |
| 462 | + this.parse_error("unexpected-end-tag", {name: name}) |
| 463 | +} |
| 464 | + |
| 465 | +p.prototype.endTagBr = function(name) { |
| 466 | + this.parse_error("unexpected-end-tag-treated-as", {originalName: "br", newName: "br element"}) |
| 467 | + this.tree.reconstructActiveFormattingElements() |
| 468 | + this.tree.insert_element(name, []) |
| 469 | + this.tree.pop_element() |
| 470 | + |
| 471 | +} |
| 472 | + |
| 473 | +p.prototype.startTagOptionOptgroup = function(name, attributes) { |
| 474 | + if(this.inScope('option')) endTagOther('option'); |
| 475 | + this.tree.reconstructActiveFormattingElements(); |
| 476 | + this.tree.insert_element(name, attributes); |
| 477 | +} |
| 478 | + |
| 479 | +p.prototype.startTagNew = function(name, attributes) { |
| 480 | + this.startTagOther(name, attributes); |
| 481 | +} |
| 482 | + |
| 483 | +p.prototype.startTagOther = function(name, attributes) { |
| 484 | + this.tree.reconstructActiveFormattingElements(); |
| 485 | + this.tree.insert_element(name, attributes); |
| 486 | +} |
| 487 | + |
| 488 | +p.prototype.endTagOther = function endTagOther(name) { |
| 489 | + var nodes = this.tree.open_elements; |
| 490 | + for(var eli = nodes.length - 1; eli > 0; eli--) { |
| 491 | + var currentNode = nodes[eli]; |
| 492 | + if(nodes[eli].tagName.toLowerCase() == name) { |
| 493 | + this.tree.generateImpliedEndTags(); |
| 494 | + if(this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() != name) { |
| 495 | + this.parse_error('unexpected-end-tag', {name: name}); |
| 496 | + } |
| 497 | + |
| 498 | + this.tree.remove_open_elements_until(function(el) { |
| 499 | + return el == currentNode; |
| 500 | + }); |
| 501 | + |
| 502 | + break; |
| 503 | + } else { |
| 504 | + |
| 505 | + if(HTML5.SPECIAL_ELEMENTS.concat(HTML5.SCOPING_ELEMENTS).indexOf(nodes[eli].tagName.toLowerCase()) != -1) { |
| 506 | + this.parse_error('unexpected-end-tag', {name: name}); |
| 507 | + break; |
| 508 | + } |
| 509 | + } |
| 510 | + } |
| 511 | +} |
| 512 | + |
| 513 | +p.prototype.startTagMath = function(name, attributes) { |
| 514 | + this.tree.reconstructActiveFormattingElements(); |
| 515 | + attributes = this.adjust_mathml_attributes(attributes); |
| 516 | + attributes = this.adjust_foreign_attributes(attributes); |
| 517 | + this.tree.insert_foreign_element(name, attributes, 'math'); |
| 518 | + if(false) { |
| 519 | + // If the token has its self-closing flag set, pop the current node off |
| 520 | + // the stack of open elements and acknowledge the token's self-closing flag |
| 521 | + } else { |
| 522 | + this.parser.secondary_phase = this.parser.phase; |
| 523 | + this.parser.newPhase('inForeignContent'); |
| 524 | + } |
| 525 | +} |
| 526 | + |
| 527 | +p.prototype.startTagSVG = function(name, attributes) { |
| 528 | + this.tree.reconstructActiveFormattingElements(); |
| 529 | + attributes = this.adjust_svg_attributes(attributes); |
| 530 | + attributes = this.adjust_foreign_attributes(attributes); |
| 531 | + this.tree.insert_foreign_element(name, attributes, 'svg'); |
| 532 | + if(false) { |
| 533 | + // If the token has its self-closing flag set, pop the current node off |
| 534 | + // the stack of open elements and acknowledge the token's self-closing flag |
| 535 | + } else { |
| 536 | + this.parser.secondary_phase = this.parser.phase; |
| 537 | + this.parser.newPhase('inForeignContent'); |
| 538 | + } |
| 539 | +} |
| 540 | + |
| 541 | +p.prototype.endTagP = function(name) { |
| 542 | + if(this.inScope('p')) this.tree.generateImpliedEndTags('p'); |
| 543 | + if(!this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() == 'p') |
| 544 | + this.parse_error('unexpected-end-tag', {name: 'p'}); |
| 545 | + if(this.inScope('p')) { |
| 546 | + while(this.inScope('p')) this.tree.pop_element(); |
| 547 | + } else { |
| 548 | + this.startTagCloseP('p', {}); |
| 549 | + this.endTagP('p'); |
| 550 | + } |
| 551 | +} |
| 552 | + |
| 553 | +p.prototype.endTagBody = function(name) { |
| 554 | + if(this.tree.open_elements[1].tagName.toLowerCase() != 'body') { |
| 555 | + // inner_html case |
| 556 | + this.parse_error('unexpected-end-tag', {name: 'body'}); |
| 557 | + return; |
| 558 | + } |
| 559 | + |
| 560 | + if(this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() != 'body') { |
| 561 | + this.parse_error('expected-one-end-tag-but-got-another', { |
| 562 | + expectedName: 'body', |
| 563 | + gotName: this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() |
| 564 | + }); |
| 565 | + } |
| 566 | + this.parser.newPhase('afterBody'); |
| 567 | +} |
| 568 | + |
| 569 | +p.prototype.endTagHtml = function(name) { |
| 570 | + this.endTagBody(name); |
| 571 | + if(!this.inner_html) this.parser.phase.processEndTag(name); |
| 572 | +} |
| 573 | + |
| 574 | +p.prototype.endTagBlock = function(name) { |
| 575 | + if(this.inScope(name)) this.tree.generateImpliedEndTags(); |
| 576 | + if(!this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() == 'name') { |
| 577 | + this.parse_error('end-tag-too-early', {name: name}); |
| 578 | + } |
| 579 | + if(this.inScope(name)) this.tree.remove_open_elements_until(name); |
| 580 | +} |
| 581 | + |
| 582 | +p.prototype.endTagForm = function(name) { |
| 583 | + if(this.inScope(name)) { |
| 584 | + this.tree.generateImpliedEndTags(); |
| 585 | + } |
| 586 | + |
| 587 | + if(this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() != name) { |
| 588 | + this.parse_error('end-tag-too-early-ignored', {name: 'form'}); |
| 589 | + } else { |
| 590 | + this.tree.pop_element(); |
| 591 | + } |
| 592 | + this.tree.formPointer = null; |
| 593 | +} |
| 594 | + |
| 595 | +p.prototype.endTagListItem = function(name) { |
| 596 | + if(this.inScope(name)) this.tree.generateImpliedEndTags(name); |
| 597 | + if(this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() != name) |
| 598 | + this.parse_error('end-tag-too-early', {name: name}); |
| 599 | + if(this.inScope(name)) this.tree.remove_open_elements_until(name); |
| 600 | +} |
| 601 | + |
| 602 | +p.prototype.endTagHeading = function(name) { |
| 603 | + for(i in HTML5.HEADING_ELEMENTS) { |
| 604 | + var el = HTML5.HEADING_ELEMENTS[i]; |
| 605 | + if(this.inScope(el)) { |
| 606 | + this.tree.generateImpliedEndTags(); |
| 607 | + break; |
| 608 | + } |
| 609 | + } |
| 610 | + |
| 611 | + if(this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() != name) |
| 612 | + this.parse_error('end-tag-too-early', {name: name}); |
| 613 | + |
| 614 | + for(i in HTML5.HEADING_ELEMENTS) { |
| 615 | + var el = HTML5.HEADING_ELEMENTS[i]; |
| 616 | + if(this.inScope(el)) { |
| 617 | + this.tree.remove_open_elements_until(function(e) { |
| 618 | + return HTML5.HEADING_ELEMENTS.indexOf(e.tagName.toLowerCase()) != -1 |
| 619 | + }); |
| 620 | + break; |
| 621 | + } |
| 622 | + } |
| 623 | +} |
| 624 | + |
| 625 | +p.prototype.endTagFormatting = function(name) { |
| 626 | + while(true) { |
| 627 | + var afeElement = this.tree.elementInActiveFormattingElements(name); |
| 628 | + if(!afeElement || (this.tree.open_elements.indexOf(afeElement) != -1 |
| 629 | + && !this.inScope(afeElement.tagName.toLowerCase()))) { |
| 630 | + this.parse_error('adoption-agency-1.1', {name: name}); |
| 631 | + return; |
| 632 | + } else if(this.tree.open_elements.indexOf(afeElement) == -1) { |
| 633 | + this.parse_error('adoption-agency-1.2', {name: name}); |
| 634 | + this.tree.activeFormattingElements.splice(this.tree.activeFormattingElements.indexOf(afeElement), 1); |
| 635 | + return; |
| 636 | + } |
| 637 | + |
| 638 | + if(afeElement != this.tree.open_elements[this.tree.open_elements.length - 1]) { |
| 639 | + this.parse_error('adoption-agency-1.3', {name: name}); |
| 640 | + } |
| 641 | + |
| 642 | + // Start of the adoption agency algorithm proper |
| 643 | + var afeIndex = this.tree.open_elements.indexOf(afeElement); |
| 644 | + var furthestBlock = null; |
| 645 | + var els = this.tree.open_elements.slice(afeIndex); |
| 646 | + var len = els.length; |
| 647 | + for(var i = 0; i < len; i++) { |
| 648 | + var element = els[i]; |
| 649 | + if(HTML5.SPECIAL_ELEMENTS.concat(HTML5.SCOPING_ELEMENTS).indexOf(element.tagName.toLowerCase()) != -1) { |
| 650 | + furthestBlock = element; |
| 651 | + break; |
| 652 | + } |
| 653 | + } |
| 654 | + |
| 655 | + if(!furthestBlock) { |
| 656 | + var element = this.tree.remove_open_elements_until(function(el) { |
| 657 | + return el == afeElement; |
| 658 | + }); |
| 659 | + this.tree.activeFormattingElements.splice(this.tree.activeFormattingElements.indexOf(element), 1); |
| 660 | + return; |
| 661 | + } |
| 662 | + |
| 663 | + |
| 664 | + var commonAncestor = this.tree.open_elements[afeIndex - 1]; |
| 665 | + |
| 666 | + var bookmark = this.tree.activeFormattingElements.indexOf(afeElement); |
| 667 | + |
| 668 | + var lastNode; |
| 669 | + var node; |
| 670 | + lastNode = node = furthestBlock; |
| 671 | + |
| 672 | + while(true) { |
| 673 | + node = this.tree.open_elements[this.tree.open_elements.indexOf(node) - 1]; |
| 674 | + while(this.tree.activeFormattingElements.indexOf(node) == -1) { |
| 675 | + var tmpNode = node; |
| 676 | + node = this.tree.open_elements[this.tree.open_elements.indexOf(node) - 1]; |
| 677 | + this.tree.open_elements.splice(this.tree.open_elements.indexOf(tmpNode), 1); |
| 678 | + } |
| 679 | + |
| 680 | + if(node == afeElement) break; |
| 681 | + |
| 682 | + if(lastNode == furthestBlock) { |
| 683 | + bookmark = this.tree.activeFormattingElements.indexOf(node) + 1; |
| 684 | + } |
| 685 | + |
| 686 | + var cite = node.parentNode; |
| 687 | + |
| 688 | + if(node.hasChildNodes()) { |
| 689 | + var clone = node.cloneNode(); |
| 690 | + this.tree.activeFormattingElements[this.tree.activeFormattingElements.indexOf(node)] = clone; |
| 691 | + this.tree.open_elements[this.tree.open_elements.indexOf(node)] = clone; |
| 692 | + node = clone; |
| 693 | + } |
| 694 | + |
| 695 | + if(lastNode.parent) lastNode.parent.removeChild(lastNode); |
| 696 | + node.appendChild(lastNode); |
| 697 | +5 |
| 698 | + lastNode = node |
| 699 | + |
| 700 | + } |
| 701 | + |
| 702 | + if(lastNode.parent) lastNode.parent.removeChild(lastNode); |
| 703 | + commonAncestor.appendChild(lastNode); |
| 704 | + |
| 705 | + clone = afeElement.cloneNode(); |
| 706 | + |
| 707 | + this.tree.reparentChildren(furthestBlock, clone); |
| 708 | + |
| 709 | + furthestBlock.appendChild(clone); |
| 710 | + |
| 711 | + this.tree.activeFormattingElements.splice(this.tree.activeFormattingElements.indexOf(afeElement), 1); |
| 712 | + this.tree.activeFormattingElements.splice(Math.min(bookmark, this.tree.activeFormattingElements.length), 0, clone); |
| 713 | + |
| 714 | + this.tree.open_elements.splice(this.tree.open_elements.indexOf(afeElement), 1); |
| 715 | + this.tree.open_elements.splice(this.tree.open_elements.indexOf(furthestBlock) + 1, 0, clone); |
| 716 | + |
| 717 | + } |
| 718 | +} |
| 719 | + |
| 720 | +p.prototype.addFormattingElement = function(name, attributes) { |
| 721 | + this.tree.insert_element(name, attributes); |
| 722 | + this.tree.activeFormattingElements.push(this.tree.open_elements[this.tree.open_elements.length - 1]); |
| 723 | +} |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_body_phase.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 724 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/before_head_phase.js |
— | — | @@ -0,0 +1,59 @@ |
| 2 | +"use strict"; |
| 3 | +var Phase = require('./phase').Phase; |
| 4 | + |
| 5 | +var start_tag_handlers = { |
| 6 | + html: 'startTagHtml', |
| 7 | + head: 'startTagHead', |
| 8 | + '-default': 'startTagOther', |
| 9 | +} |
| 10 | + |
| 11 | +var end_tag_handlers = { |
| 12 | + html: 'endTagImplyHead', |
| 13 | + head: 'endTagImplyHead', |
| 14 | + body: 'endTagImplyHead', |
| 15 | + br: 'endTagImplyHead', |
| 16 | + p: 'endTagImplyHead', |
| 17 | + '-default': 'endTagOther', |
| 18 | +} |
| 19 | + |
| 20 | +exports.Phase = p = function (parser, tree) { |
| 21 | + Phase.call(this, parser, tree); |
| 22 | + this.start_tag_handlers = start_tag_handlers; |
| 23 | + this.end_tag_handlers = end_tag_handlers; |
| 24 | + this.name = 'before_head_phase'; |
| 25 | +} |
| 26 | + |
| 27 | +p.prototype = new Phase; |
| 28 | + |
| 29 | +p.prototype.processEOF = function() { |
| 30 | + this.startTagHead('head', {}); |
| 31 | + this.parser.phase.processEOF(); |
| 32 | +} |
| 33 | + |
| 34 | +p.prototype.processCharacters = function(data) { |
| 35 | + this.startTagHead('head', {}); |
| 36 | + this.parser.phase.processCharacters(data); |
| 37 | +} |
| 38 | + |
| 39 | +p.prototype.processSpaceCharacters = function(data) { |
| 40 | +} |
| 41 | + |
| 42 | +p.prototype.startTagHead = function(name, attributes) { |
| 43 | + this.tree.insert_element(name, attributes); |
| 44 | + this.tree.head_pointer = this.tree.open_elements[this.tree.open_elements.length - 1]; |
| 45 | + this.parser.newPhase('inHead'); |
| 46 | +} |
| 47 | + |
| 48 | +p.prototype.startTagOther = function(name, attributes) { |
| 49 | + this.startTagHead('head', {}); |
| 50 | + this.parser.phase.processStartTag(name, attributes); |
| 51 | +} |
| 52 | + |
| 53 | +p.prototype.endTagImplyHead = function(name) { |
| 54 | + this.startTagHead('head', {}); |
| 55 | + this.parser.phase.processEndTag(name); |
| 56 | +} |
| 57 | + |
| 58 | +p.prototype.endTagOther = function(name) { |
| 59 | + this.parse_error('end-tag-after-implied-root', {name: name}); |
| 60 | +} |
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/before_head_phase.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 61 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/pegParser.pegjs.txt |
— | — | @@ -28,44 +28,8 @@ |
29 | 29 | }; |
30 | 30 | |
31 | 31 | |
| 32 | + var pp = function ( s ) { return JSON.stringify(s, null, 2); } |
32 | 33 | |
33 | | - /* Temporary debugging help. Is there anything similar in JS or a library? */ |
34 | | - var print_r = function (arr, level) { |
35 | | - |
36 | | - var dumped_text = ""; |
37 | | - if (!level) level = 0; |
38 | | - |
39 | | - //The padding given at the beginning of the line. |
40 | | - var level_padding = ""; |
41 | | - var bracket_level_padding = ""; |
42 | | - |
43 | | - for (var j = 0; j < level + 1; j++) level_padding += " "; |
44 | | - for (var b = 0; b < level; b++) bracket_level_padding += " "; |
45 | | - |
46 | | - if (typeof(arr) == 'object') { //Array/Hashes/Objects |
47 | | - dumped_text += "Array\n"; |
48 | | - dumped_text += bracket_level_padding + "(\n"; |
49 | | - for (var item in arr) { |
50 | | - |
51 | | - var value = arr[item]; |
52 | | - |
53 | | - if (typeof(value) == 'object') { //If it is an array, |
54 | | - dumped_text += level_padding + "[" + item + "] => "; |
55 | | - dumped_text += print_r(value, level + 2); |
56 | | - } else { |
57 | | - dumped_text += level_padding + "[" + item + "] => '" + value + "'\n"; |
58 | | - } |
59 | | - |
60 | | - } |
61 | | - dumped_text += bracket_level_padding + ")\n\n"; |
62 | | - } else { //Strings/Chars/Numbers etc. |
63 | | - dumped_text = "=>" + arr + "<=(" + typeof(arr) + ")"; |
64 | | - } |
65 | | - |
66 | | - return dumped_text; |
67 | | - |
68 | | - }; |
69 | | - |
70 | 34 | // Convert list prefixes to a list of WikiDom list styles |
71 | 35 | var bulletsToTypes = function (bullets) { |
72 | 36 | var bTypes = []; |
— | — | @@ -92,7 +56,7 @@ |
93 | 57 | |
94 | 58 | // return [text [annotations]] |
95 | 59 | var extractText = function ( node, offset ) { |
96 | | - dp("extract: " + print_r(node)); |
| 60 | + dp("extract: " + pp(node)); |
97 | 61 | if (typeof node === 'string') { |
98 | 62 | return [node, []]; |
99 | 63 | } else if ($.isArray(node)) { |
— | — | @@ -121,7 +85,7 @@ |
122 | 86 | } |
123 | 87 | return texts.join(''); |
124 | 88 | } else { |
125 | | - throw ("extract failed: " + print_r(node)); |
| 89 | + throw ("extract failed: " + pp(node)); |
126 | 90 | } |
127 | 91 | }; |
128 | 92 | */ |
— | — | @@ -165,7 +129,7 @@ |
166 | 130 | |
167 | 131 | // Start of line |
168 | 132 | sol = (newline / & { return pos === 0; } { return true; }) |
169 | | - cn:(comment n:newline? { return n })? { |
| 133 | + cn:(c:comment n:newline? { return [c, n] })? { |
170 | 134 | return cn; |
171 | 135 | } |
172 | 136 | |
— | — | @@ -177,7 +141,7 @@ |
178 | 142 | = & { blockStart = pos; return true; } b:block { |
179 | 143 | b = flatten(b); |
180 | 144 | var bs = b[0]; |
181 | | - dp('toplevelblock:' + print_r(b) + bs); |
| 145 | + dp('toplevelblock:' + pp(b)); |
182 | 146 | if (bs.attribs === undefined) { |
183 | 147 | bs.attribs = []; |
184 | 148 | } |
— | — | @@ -190,8 +154,16 @@ |
191 | 155 | = (sol space* &newline)? bl:block_lines { return bl; } |
192 | 156 | / para |
193 | 157 | / comment |
194 | | - / (s:sol { return [{type: 'TEXT', value: s}]; }) |
| 158 | + / (s:sol { |
| 159 | + if (s) { |
| 160 | + return [s, {type: 'NEWLINE'}]; |
| 161 | + } else { |
| 162 | + return [{type: 'NEWLINE'}]; |
| 163 | + } |
| 164 | + } |
| 165 | + ) |
195 | 166 | |
| 167 | + |
196 | 168 | // Block structures with start-of-line wiki syntax |
197 | 169 | block_lines |
198 | 170 | = h |
— | — | @@ -290,7 +262,7 @@ |
291 | 263 | if (s !== '') { |
292 | 264 | res.push(s) |
293 | 265 | } |
294 | | - //console.log('paralines' + print_r(res.concat(c, cs, [{type: 'ENDTAG', name: 'p'}]))); |
| 266 | + //console.log('paralines' + pp(res.concat(c, cs, [{type: 'ENDTAG', name: 'p'}]))); |
295 | 267 | return res.concat(c, cs, [{type: 'ENDTAG', name: 'p'}]); |
296 | 268 | } |
297 | 269 | |
— | — | @@ -306,9 +278,9 @@ |
307 | 279 | |
308 | 280 | // Syntax that stops inline expansion |
309 | 281 | inline_breaks |
310 | | - = //& { console.log(print_r(syntaxFlags)); return true; } |
| 282 | + = //& { console.log(pp(syntaxFlags)); return true; } |
311 | 283 | & { return syntaxFlags['table']; } |
312 | | - a:(newline [!|] / '||' / '!!' / '|}') { dp("table break" + print_r(a)); return true; } |
| 284 | + a:(newline [!|] / '||' / '!!' / '|}') { dp("table break" + pp(a)); return true; } |
313 | 285 | / & { return syntaxFlags['italic']; } italic_marker { return true; } |
314 | 286 | / & { return syntaxFlags['bold']; } bold_marker { return true; } |
315 | 287 | / & { return syntaxFlags['linkdesc']; } link_end { return true; } |
— | — | @@ -326,21 +298,21 @@ |
327 | 299 | inline |
328 | 300 | = c:(text / inline_element / (!inline_breaks ch:. { return ch; }))+ { |
329 | 301 | var out = []; |
330 | | - var text = ''; |
| 302 | + var text = []; |
331 | 303 | c = flatten(c); |
332 | 304 | for (var i = 0; i < c.length; i++) { |
333 | 305 | if (typeof c[i] == 'string') { |
334 | | - text += c[i]; |
| 306 | + text.push(c[i]); |
335 | 307 | } else { |
336 | 308 | if (text.length) { |
337 | | - out.push({ type: "TEXT", value: text }); |
338 | | - text = ''; |
| 309 | + out.push({ type: "TEXT", value: text.join('') }); |
| 310 | + text = []; |
339 | 311 | } |
340 | 312 | out.concat(c[i]); |
341 | 313 | } |
342 | 314 | } |
343 | 315 | if (text.length) { |
344 | | - out.push({ type: 'TEXT', value: text }); |
| 316 | + out.push({ type: 'TEXT', value: text.join('') }); |
345 | 317 | } |
346 | 318 | return out; |
347 | 319 | } |
— | — | @@ -348,23 +320,23 @@ |
349 | 321 | inlineline |
350 | 322 | = c:(text / !inline_breaks (inline_element / [^\n]))+ { |
351 | 323 | var out = []; |
352 | | - var text = ''; |
| 324 | + var text = []; |
353 | 325 | c = flatten(c); |
354 | 326 | for (var i = 0; i < c.length; i++) { |
355 | 327 | if (typeof c[i] == 'string') { |
356 | | - text += c[i]; |
| 328 | + text.push(c[i]); |
357 | 329 | } else { |
358 | 330 | if (text.length) { |
359 | | - out.push({type: 'TEXT', value: text}); |
360 | | - text = ''; |
| 331 | + out.push({type: 'TEXT', value: text.join('')}); |
| 332 | + text = []; |
361 | 333 | } |
362 | 334 | out.push(c[i]); |
363 | 335 | } |
364 | 336 | } |
365 | 337 | if (text.length) { |
366 | | - out.push({type: 'TEXT', value: text}); |
| 338 | + out.push({type: 'TEXT', value: text.join('')}); |
367 | 339 | } |
368 | | - //dp('inlineline out:', print_r(out)); |
| 340 | + //dp('inlineline out:', pp(out)); |
369 | 341 | return out; |
370 | 342 | } |
371 | 343 | |
— | — | @@ -383,7 +355,7 @@ |
384 | 356 | comment |
385 | 357 | = '<!--' c:comment_chars* '-->' |
386 | 358 | (space* newline space* comment)* { |
387 | | - return { type: 'COMMENT', value: c.join('') }; |
| 359 | + return [{ type: 'COMMENT', value: c.join('') }]; |
388 | 360 | } |
389 | 361 | |
390 | 362 | comment_chars |
— | — | @@ -623,20 +595,9 @@ |
624 | 596 | |
625 | 597 | lists = es:(dtdd / li)+ |
626 | 598 | { |
627 | | - // Flatten es |
628 | | - var esLen = es.length; |
629 | | - var flatEs = []; |
630 | | - for (var i = 0; i < esLen; i++) { |
631 | | - var ei = es[i]; |
632 | | - if ($.isArray(ei)) { |
633 | | - flatEs = flatEs.concat(ei); |
634 | | - } else { |
635 | | - flatEs.push(ei); |
636 | | - } |
637 | | - } |
638 | 599 | return [ { type: 'TAG', |
639 | 600 | name: 'ul'} ] // XXX!! |
640 | | - .concat(flatEs |
| 601 | + .concat(flatten(es) |
641 | 602 | ,[{ type: 'ENDTAG', name: 'ul' }]); |
642 | 603 | } |
643 | 604 | |
— | — | @@ -685,21 +646,22 @@ |
686 | 647 | = tas:table_start c:table_caption? b:table_body? table_end { |
687 | 648 | var res = {type: 'TAG', name: 'table'} |
688 | 649 | var body = b !== '' ? b : []; |
| 650 | + dp("body: " + pp(body)); |
689 | 651 | if (tas.length > 0) { |
690 | 652 | // FIXME: actually parse and build structure |
691 | 653 | res.attribs = [['data-unparsed', tas.join('')]]; |
692 | 654 | } |
693 | 655 | |
694 | | - if (c !== '') { |
| 656 | + if (c != '') { |
695 | 657 | var caption = [{type: 'TAG', name: 'caption'}] |
696 | 658 | .concat(c, [{type: 'ENDTAG', name: 'caption'}]); |
697 | 659 | } else { |
698 | 660 | var caption = []; |
699 | | - //dp(print_r(res)); |
700 | | - |
701 | | - return [res].concat(caption, body, |
702 | | - [{type: 'ENDTAG', name: 'table'}]); |
703 | 661 | } |
| 662 | + //dp(pp(res)); |
| 663 | + |
| 664 | + return [res].concat(caption, body, |
| 665 | + [{type: 'ENDTAG', name: 'table'}]); |
704 | 666 | } |
705 | 667 | |
706 | 668 | table_start |
— | — | @@ -708,7 +670,7 @@ |
709 | 671 | & { setFlag('table'); return true; } |
710 | 672 | ta:table_attribs* |
711 | 673 | space* { |
712 | | - //dp("table_start " + print_r(ta) + ", pos:" + pos); |
| 674 | + //dp("table_start " + pp(ta) + ", pos:" + pos); |
713 | 675 | return ta; |
714 | 676 | } |
715 | 677 | / sol "{|" { clearFlag('table'); return null; } |
— | — | @@ -726,11 +688,11 @@ |
727 | 689 | = & { dp("table_body enter"); return true; } |
728 | 690 | firstrow:table_firstrow otherrows:table_row* { |
729 | 691 | /* dp('table first and otherrows: ' |
730 | | - * + print_r([firstrow].concat(otherrows))); */ |
| 692 | + * + pp([firstrow].concat(otherrows))); */ |
731 | 693 | return [firstrow].concat(otherrows); |
732 | 694 | } |
733 | 695 | / otherrows:table_row* { |
734 | | - //dp('table otherrows: ' + print_r(otherrows)); |
| 696 | + //dp('table otherrows: ' + pp(otherrows)); |
735 | 697 | return otherrows; |
736 | 698 | } |
737 | 699 | |
— | — | @@ -754,7 +716,7 @@ |
755 | 717 | ! [}+-] |
756 | 718 | a:thtd_attribs? |
757 | 719 | td:(!inline_breaks anyblock)* { |
758 | | - dp("table data result: " + print_r(td) + ", attribts: " + print_r(a)); |
| 720 | + dp("table data result: " + pp(td) + ", attribts: " + pp(a)); |
759 | 721 | return [{ type: 'TAG', name: 'td', attribs: [['data-unparsed', a]]}] |
760 | 722 | .concat(td, [{type: 'ENDTAG', name: 'td'}]); |
761 | 723 | } |