r103585 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r103584‎ | r103585 | r103586 >
Date:13:57, 18 November 2011
Author:gwicke
Status:deferred
Tags:
Comment:
Add node.js-compatible HTML5 parser and hook it up to the PEG tokenizer.
Builds a DOM tree (jsdom) from the tokens and then serializes that using
document.innerHTML. This is all very experimental, so don't be surprised by
rough edges.
Modified paths:
  • /trunk/extensions/VisualEditor/modules/parser/html5 (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/COPYING (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/buffer.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/constants.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/debug.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/index.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/parser (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/parser.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/parser/after_after_body_phase.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/parser/after_after_frameset_phase.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/parser/after_body_phase.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/parser/after_frameset_phase.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/parser/after_head_phase.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/parser/before_head_phase.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/parser/before_html_phase.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/parser/in_body_phase.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/parser/in_caption_phase.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/parser/in_cell_phase.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/parser/in_column_group_phase.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/parser/in_foreign_content_phase.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/parser/in_frameset_phase.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/parser/in_head_phase.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/parser/in_row_phase.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/parser/in_select_in_table_phase.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/parser/in_select_phase.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/parser/in_table_body_phase.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/parser/in_table_phase.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/parser/initial_phase.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/parser/phase.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/parser/root_element_phase.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/parser/trailing_end_phase.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/serializer.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/tokenizer.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/treebuilder.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/html5/treewalker.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.peg.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/pegParser.pegjs.txt (modified) (history)
  • /trunk/extensions/VisualEditor/tests/parser/parserTests.js (modified) (history)

Diff [purge]

Index: trunk/extensions/VisualEditor/tests/parser/parserTests.js
@@ -10,6 +10,8 @@
1111 * 2011-07-20 <brion@pobox.com>
1212 */
1313
 14+"use strict";
 15+
1416 var fs = require('fs'),
1517 path = require('path');
1618
@@ -29,6 +31,9 @@
3032 })
3133 }
3234
 35+// needed for html5 parser adapter
 36+//var events = require('events');
 37+
3338 // For now most modules only need this for $.extend and $.each :)
3439 global.$ = require('jquery');
3540
@@ -40,11 +45,14 @@
4146 // Local CommonJS-friendly libs
4247 global.PEG = _require(pj('parser', 'lib.pegjs.js'));
4348
 49+
4450 // Our code...
4551 _import(pj('parser', 'mediawiki.parser.peg.js'), ['PegParser']);
4652 _import(pj('parser', 'mediawiki.parser.environment.js'), ['MWParserEnvironment']);
4753 _import(pj('parser', 'ext.cite.taghook.ref.js'), ['MWRefTagHook']);
4854
 55+_require(pj('parser', 'mediawiki.html5TokenEmitter.js'));
 56+
4957 // WikiDom and serializers
5058 _require(pj('es', 'es.js'));
5159 _require(pj('es', 'es.Html.js'));
@@ -109,44 +117,10 @@
110118 function nodeToHtml(node) {
111119 return $('<div>').append(node).html();
112120 }
113 - /* Temporary debugging help. Is there anything similar in JS or a library? */
114 - var print_r = function (arr, level) {
115121
116 - var dumped_text = "";
117 - if (!level) level = 0;
118122
119 - //The padding given at the beginning of the line.
120 - var level_padding = "";
121 - var bracket_level_padding = "";
122 -
123 - for (var j = 0; j < level + 1; j++) level_padding += " ";
124 - for (var b = 0; b < level; b++) bracket_level_padding += " ";
125 -
126 - if (typeof(arr) == 'object') { //Array/Hashes/Objects
127 - dumped_text += "Array\n";
128 - dumped_text += bracket_level_padding + "(\n";
129 - for (var item in arr) {
130 -
131 - var value = arr[item];
132 -
133 - if (typeof(value) == 'object') { //If it is an array,
134 - dumped_text += level_padding + "[" + item + "] => ";
135 - dumped_text += print_r(value, level + 2);
136 - } else {
137 - dumped_text += level_padding + "[" + item + "] => '" + value + "'\n";
138 - }
139 -
140 - }
141 - dumped_text += bracket_level_padding + ")\n\n";
142 - } else { //Strings/Chars/Numbers etc.
143 - dumped_text = "=>" + arr + "<=(" + typeof(arr) + ")";
144 - }
145 -
146 - return dumped_text;
147 -
148 - };
149 -
150123 function processTest(item) {
 124+ var tokenizer = new FauxHTML5.Tokenizer();
151125 if (!('title' in item)) {
152126 console.log(item);
153127 throw new Error('Missing title from test case.');
@@ -163,6 +137,7 @@
164138 console.log("INPUT:");
165139 console.log(item.input + "\n");
166140
 141+
167142 parser.parseToTree(item.input + "\n", function(tree, err) {
168143 if (err) {
169144 console.log('PARSE FAIL', err);
@@ -174,6 +149,7 @@
175150 }
176151 });
177152 //var res = es.HtmlSerializer.stringify(tree,environment);
 153+ processTokens(tree, tokenizer);
178154 if (err) {
179155 console.log('RENDER FAIL', err);
180156 } else {
@@ -181,12 +157,24 @@
182158 console.log(item.result + "\n");
183159
184160 console.log('RENDERED:');
185 - console.log(print_r(tree));
 161+ //console.log(JSON.stringify(tree, null, 2));
 162+ console.log(tokenizer.parser.document.innerHTML);
186163 }
187164 }
188165 });
189166 }
190167
 168+function processTokens ( tokens, tokenizer ) {
 169+ tokenizer.processToken({type: 'TAG', name: 'body'});
 170+ // Process all tokens
 171+ for (var i = 0, length = tokens.length; i < length; i++) {
 172+ tokenizer.processToken(tokens[i]);
 173+ }
 174+ // And signal the end
 175+ tokenizer.processToken({type: 'END'});
 176+}
 177+
 178+
191179 cases.forEach(function(item) {
192180 if (typeof item == 'object') {
193181 if (item.type == 'article') {
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.peg.js
@@ -23,6 +23,7 @@
2424 out = parser.parse(text);
2525 } catch (e) {
2626 err = e;
 27+ console.trace();
2728 } finally {
2829 callback(out, err);
2930 }
Index: trunk/extensions/VisualEditor/modules/parser/html5/treewalker.js
@@ -0,0 +1,95 @@
 2+var HTML5 = require('../html5');
 3+var events = require('events');
 4+
 5+function error(msg) {
 6+ return {type: 'SerializeError', data: msg};
 7+}
 8+
 9+function empty_tag(node) {
 10+ if(node.hasChildNodes()) return error(_("Void element has children"));
 11+ return {type: 'EmptyTag', name: node.tagName, data: node.attributes, namespace: node.namespace};
 12+}
 13+
 14+function start_tag(node) {
 15+ return {type: 'StartTag', name: node.tagName, data: node.attributes, namespace: node.namespace};
 16+}
 17+
 18+function end_tag(node) {
 19+ return {type: 'EndTag', name: node.tagName, namespace: node.namespace };
 20+}
 21+
 22+function text(data, target) {
 23+ if(m = new RegExp("^[" + HTML5.SPACE_CHARACTERS + "]+").exec(data)) {
 24+ target.emit('token', {type: 'SpaceCharacters', data: m[0]});
 25+ data = data.slice(m[0].length, data.length);
 26+ if(data.length == 0) return;
 27+ }
 28+
 29+ if(m = new RegExp("["+HTML5.SPACE_CHARACTERS + "]+$").exec(data)) {
 30+ target.emit('token', {type: 'Characters', data: data.slice(0, m.length)});
 31+ target.emit('token', {type: 'SpaceCharacters', data: data.slice(m.index, data.length)});
 32+ } else {
 33+ target.emit('token', {type: 'Characters', data: data});
 34+ }
 35+}
 36+
 37+function comment(data) {
 38+ return {type: 'Comment', data: data};
 39+}
 40+
 41+function doctype(node) {
 42+ return {type: 'Doctype', name: node.nodeName, publicId: node.publicId, systemId: node.systemId, correct: node.correct};
 43+}
 44+
 45+function unknown(node) {
 46+ return error(_("unknown node: ")+ JSON.stringify(node));
 47+}
 48+
 49+function _(str) {
 50+ return str;
 51+}
 52+
 53+HTML5.TreeWalker = function(document, dest) {
 54+ if(dest instanceof Function) this.addListener('token', dest);
 55+ walk(document, this);
 56+};
 57+
 58+function walk(node, dest) {
 59+ switch(node.nodeType) {
 60+ case node.DOCUMENT_FRAGMENT_NODE:
 61+ case node.DOCUMENT_NODE:
 62+ for(var child = 0; child < node.childNodes.length; ++child) {
 63+ walk(node.childNodes[child], dest);
 64+ }
 65+ break;
 66+
 67+ case node.ELEMENT_NODE:
 68+ if(HTML5.VOID_ELEMENTS.indexOf(node.tagName.toLowerCase()) != -1) {
 69+ dest.emit('token', empty_tag(node));
 70+ } else {
 71+ dest.emit('token', start_tag(node));
 72+ for(var child = 0; child < node.childNodes.length; ++child) {
 73+ walk(node.childNodes[child], dest);
 74+ }
 75+ dest.emit('token', end_tag(node));
 76+ }
 77+ break;
 78+
 79+ case node.TEXT_NODE:
 80+ text(node.nodeValue, dest);
 81+ break;
 82+
 83+ case node.COMMENT_NODE:
 84+ dest.emit('token', comment(node.nodeValue));
 85+ break;
 86+
 87+ case node.DOCUMENT_TYPE_NODE:
 88+ dest.emit('token', doctype(node));
 89+ break;
 90+
 91+ default:
 92+ dest.emit('token', unknown(node));
 93+ }
 94+}
 95+
 96+HTML5.TreeWalker.prototype = new events.EventEmitter;
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/treewalker.js
___________________________________________________________________
Added: svn:eol-style
197 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/debug.js
@@ -0,0 +1,30 @@
 2+var HTML5 = require('../html5');
 3+
 4+var debugFlags = {any: true}
 5+
 6+HTML5.debug = function() {
 7+ section = arguments[0];
 8+ if(debugFlags[section] || debugFlags[section.split('.')[0]]) {
 9+ var out = [];
 10+ for(var i in arguments) {
 11+ out.push(arguments[i])
 12+ }
 13+ console.dir(out)
 14+ }
 15+}
 16+
 17+HTML5.enableDebug = function(section) {
 18+ debugFlags[section] = true;
 19+}
 20+
 21+HTML5.disableDebug = function(section) {
 22+ debugFlags[section] = false;
 23+}
 24+
 25+HTML5.dumpTagStack = function(tags) {
 26+ var r = [];
 27+ for(i in tags) {
 28+ r.push(tags[i].tagName);
 29+ }
 30+ return r.join(', ');
 31+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/debug.js
___________________________________________________________________
Added: svn:eol-style
132 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/treebuilder.js
@@ -0,0 +1,301 @@
 2+"use strict";
 3+
 4+var HTML5 = require('../html5');
 5+var assert = require('assert');
 6+//if(!Array.prototype.last) {
 7+// Array.prototype.last = function() { return this[this.length - 1] };
 8+//}
 9+
 10+HTML5.TreeBuilder = function TreeBuilder(document) {
 11+ this.open_elements = [];
 12+ this.document = document;
 13+ this.activeFormattingElements = [];
 14+}
 15+
 16+var b = HTML5.TreeBuilder;
 17+
 18+b.prototype.reset = function() {
 19+
 20+}
 21+
 22+b.prototype.copyAttributeToElement = function(element, attribute) {
 23+ if(attribute.nodeType && attribute.nodeType == attribute.ATTRIBUTE_NODE) {
 24+ element.setAttributeNode(attribute.cloneNode());
 25+ } else {
 26+ try {
 27+ element.setAttribute(attribute.nodeName, attribute.nodeValue)
 28+ } catch(e) {
 29+ console.log("Can't set attribute '" + attribute.nodeName + "' to value '" + attribute.nodeValue + "': (" + e + ')');
 30+ }
 31+ if(attribute.namespace) {
 32+ var at = element.getAttributeNode(attribute.nodeName);
 33+ at.namespace = attribute.namespace;
 34+ }
 35+ }
 36+}
 37+
 38+b.prototype.createElement = function (name, attributes, namespace) {
 39+ try {
 40+ var el = this.document.createElement(name);
 41+ } catch(e) {
 42+ console.log("Can't create element '"+ name + "' (" + e + ")")
 43+ }
 44+ el.namespace = namespace;
 45+ if(attributes) {
 46+ if(attributes.item) {
 47+ for(var i = 0; i < attributes.length; i++) {
 48+HTML5.debug('treebuilder.copyAttributes', attributes.item(i));
 49+ this.copyAttributeToElement(el, attributes.item(i));
 50+ }
 51+ } else {
 52+ for(var i = 0; i < attributes.length; i++) {
 53+HTML5.debug('treebuilder.copyAttributes', attributes[i]);
 54+ this.copyAttributeToElement(el, attributes[i]);
 55+ }
 56+ }
 57+ }
 58+ return el;
 59+}
 60+
 61+b.prototype.insert_element = function(name, attributes, namespace) {
 62+ HTML5.debug('treebuilder.insert_element', name)
 63+ if(this.insert_from_table) {
 64+ return this.insert_element_from_table(name, attributes, namespace)
 65+ } else {
 66+ return this.insert_element_normal(name, attributes, namespace)
 67+ }
 68+}
 69+
 70+b.prototype.insert_foreign_element = function(name, attributes, namespace) {
 71+ return this.insert_element(name, attributes, namespace);
 72+}
 73+
 74+b.prototype.insert_element_normal = function(name, attributes, namespace) {
 75+ var element = this.createElement(name, attributes, namespace);
 76+ this.open_elements[this.open_elements.length - 1].appendChild(element);
 77+ this.open_elements.push(element);
 78+ return element;
 79+}
 80+
 81+b.prototype.insert_element_from_table = function(name, attributes, namespace) {
 82+ var element = this.createElement(name, attributes, namespace)
 83+ if(HTML5.TABLE_INSERT_MODE_ELEMENTS.indexOf(this.open_elements[this.open_elements.length - 1].tagName.toLowerCase()) != -1) {
 84+ // We should be in the InTable mode. This means we want to do
 85+ // special magic element rearranging
 86+ var t = this.getTableMisnestedNodePosition()
 87+ if(!t.insertBefore) {
 88+ t.parent.appendChild(element)
 89+ } else {
 90+ t.parent.insertBefore(element, t.insertBefore)
 91+ }
 92+ this.open_elements.push(element)
 93+ } else {
 94+ return this.insert_element_normal(name, attributes, namespace);
 95+ }
 96+ return element;
 97+}
 98+
 99+b.prototype.insert_comment = function(data, parent) {
 100+ try {
 101+ var c = this.document.createComment(data);
 102+ if(!parent) parent = this.open_elements[this.open_elements.length - 1];
 103+ parent.appendChild(c);
 104+ } catch(e) {
 105+ console.log("Can't create comment ("+ data + ")")
 106+ }
 107+}
 108+
 109+b.prototype.insert_doctype = function (name, publicId, systemId) {
 110+ try {
 111+ var doctype = this.document.implementation.createDocumentType(name, publicId, systemId);
 112+ this.document.appendChild(doctype);
 113+ } catch(e) {
 114+ console.log("Can't create doctype ("+ name + " / " + publicId + " / " + systemId + ")")
 115+ }
 116+}
 117+
 118+
 119+b.prototype.insert_text = function(data, parent) {
 120+ if(!parent) parent = this.open_elements[this.open_elements.length - 1];
 121+ if(!this.insert_from_table || HTML5.TABLE_INSERT_MODE_ELEMENTS.indexOf(this.open_elements[this.open_elements.length - 1].tagName.toLowerCase()) == -1) {
 122+ if(parent.lastChild && parent.lastChild.nodeType == parent.TEXT_NODE) {
 123+ parent.lastChild.appendData(data);
 124+ } else {
 125+ try {
 126+ var tn = this.document.createTextNode(data);
 127+ parent.appendChild(tn);
 128+ } catch(e) {
 129+ console.log("Can't create tex node (" + data + ")");
 130+ }
 131+ }
 132+ } else {
 133+ // We should be in the inTable phase. This means we want to do special
 134+ // magic element rearranging.
 135+ var t = this.getTableMisnestedNodePosition();
 136+ insertText(t.parent, data, t.insertBefore)
 137+ }
 138+}
 139+
 140+b.prototype.remove_open_elements_until = function(nameOrCb) {
 141+ HTML5.debug('treebuilder.remove_open_elements_until', nameOrCb)
 142+ var finished = false;
 143+ while(!finished) {
 144+ var element = this.pop_element();
 145+ finished = (typeof nameOrCb == 'function' ? nameOrCb(element) : element.tagName.toLowerCase() == nameOrCb);
 146+ }
 147+ return element;
 148+}
 149+
 150+b.prototype.pop_element = function() {
 151+ var el = this.open_elements.pop()
 152+ HTML5.debug('treebuilder.pop_element', el.name)
 153+ return el
 154+}
 155+
 156+function insertText(node, data, before) {
 157+ var t = node.ownerDocument.createTextNode(data)
 158+ if(before) {
 159+ if(before.previousSibling && before.previousSibling.nodeType == before.previousSibling.TEXT_NODE) {
 160+ before.previousSibling.nodeValue += data;
 161+ } else {
 162+ node.insertBefore(t, before)
 163+ }
 164+ } else {
 165+ node.appendChild(t)
 166+ }
 167+}
 168+
 169+b.prototype.getTableMisnestedNodePosition = function() {
 170+ // The foster parent element is the one which comes before the most
 171+ // recently opened table element
 172+ // XXX - this is really inelegant
 173+ var lastTable, fosterParent, insertBefore
 174+
 175+ for(var i = this.open_elements.length - 1; i >= 0; i--) {
 176+ var element = this.open_elements[i]
 177+ if(element.tagName.toLowerCase() == 'table') {
 178+ lastTable = element
 179+ break
 180+ }
 181+ }
 182+
 183+ if(lastTable) {
 184+ // XXX - we should check that the parent really is a node here
 185+ if(lastTable.parentNode) {
 186+ fosterParent = lastTable.parentNode
 187+ insertBefore = lastTable
 188+ } else {
 189+ fosterParent = this.open_elements[this.open_elements.indexOf(lastTable) - 1]
 190+ }
 191+ } else {
 192+ fosterParent = this.open_elements[0]
 193+ }
 194+
 195+ return {parent: fosterParent, insertBefore: insertBefore}
 196+}
 197+
 198+b.prototype.elementInScope = function(name, tableVariant) {
 199+ if(this.open_elements.length == 0) return false
 200+ for(var i = this.open_elements.length - 1; i >= 0; i--) {
 201+ if (this.open_elements[i].tagName == undefined) return false
 202+ else if(this.open_elements[i].tagName.toLowerCase() == name) return true
 203+ else if(this.open_elements[i].tagName.toLowerCase() == 'table') return false
 204+ else if(!tableVariant && HTML5.SCOPING_ELEMENTS.indexOf(this.open_elements[i].tagName.toLowerCase()) != -1) return false
 205+ else if(this.open_elements[i].tagName.toLowerCase() == 'html') return false;
 206+ }
 207+ return false;
 208+}
 209+
 210+b.prototype.generateImpliedEndTags = function(exclude) {
 211+ if(exclude) exclude = exclude.toLowerCase()
 212+ if(this.open_elements.length == 0) {
 213+ HTML5.debug('treebuilder.generateImpliedEndTags', 'no open elements')
 214+ return
 215+ }
 216+ var name = this.open_elements[this.open_elements.length - 1].tagName.toLowerCase();
 217+ if(['dd', 'dt', 'li', 'p', 'td', 'th', 'tr'].indexOf(name) != -1 && name != exclude) {
 218+ var p = this.pop_element();
 219+ this.generateImpliedEndTags(exclude);
 220+ }
 221+}
 222+
 223+b.prototype.reconstructActiveFormattingElements = function() {
 224+ // Within this algorithm the order of steps decribed in the specification
 225+ // is not quite the same as the order of steps in the code. It should still
 226+ // do the same though.
 227+
 228+ // Step 1: stop if there's nothing to do
 229+ if(this.activeFormattingElements.length == 0) return;
 230+
 231+ // Step 2 and 3: start with the last element
 232+ var i = this.activeFormattingElements.length - 1;
 233+ var entry = this.activeFormattingElements[i];
 234+ if(entry == HTML5.Marker || this.open_elements.indexOf(entry) != -1) return;
 235+
 236+ while(entry != HTML5.Marker && this.open_elements.indexOf(entry) == -1) {
 237+ i -= 1;
 238+ entry = this.activeFormattingElements[i];
 239+ if(!entry) break;
 240+ }
 241+
 242+ while(true) {
 243+ i += 1;
 244+ var clone = this.activeFormattingElements[i].cloneNode();
 245+
 246+ var element = this.insert_element(clone.tagName, clone.attributes);
 247+
 248+ this.activeFormattingElements[i] = element;
 249+
 250+ if(element == this.activeFormattingElements[this.activeFormattingElements.length - 1]) break;
 251+ }
 252+
 253+}
 254+
 255+b.prototype.elementInActiveFormattingElements = function(name) {
 256+ var els = this.activeFormattingElements;
 257+ for(var i = els.length - 1; i >= 0; i--) {
 258+ if(els[i] == HTML5.Marker) break;
 259+ if(els[i].tagName.toLowerCase() == name) return els[i];
 260+ }
 261+ return false;
 262+}
 263+
 264+b.prototype.reparentChildren = function(o, n) {
 265+ while(o.childNodes.length > 0) {
 266+ var el = o.removeChild(o.childNodes[0]);
 267+ n.appendChild(el);
 268+ }
 269+}
 270+
 271+b.prototype.clearActiveFormattingElements = function() {
 272+ while(!(this.activeFormattingElements.length == 0 || this.activeFormattingElements.pop() == HTML5.Marker));
 273+}
 274+
 275+b.prototype.getFragment = function() {
 276+ // assert.ok(this.parser.inner_html)
 277+ var fragment = this.document.createDocumentFragment()
 278+ this.reparentChildren(this.root_pointer, fragment)
 279+ return fragment
 280+}
 281+
 282+b.prototype.create_structure_elements = function(container) {
 283+ this.html_pointer = this.document.getElementsByTagName('html')[0]
 284+ if(!this.html_pointer) {
 285+ this.html_pointer = this.createElement('html');
 286+ this.document.appendChild(this.html_pointer);
 287+ }
 288+ if(container == 'html') return;
 289+ if(!this.head_pointer) {
 290+ this.head_pointer = this.document.getElementsByTagName('head')[0]
 291+ if(!this.head_pointer) {
 292+ this.head_pointer = this.createElement('head');
 293+ this.html_pointer.appendChild(this.head_pointer);
 294+ }
 295+ }
 296+ if(container == 'head') return;
 297+ this.body_pointer = this.document.getElementsByTagName('body')[0]
 298+ if(!this.body_pointer) {
 299+ this.body_pointer = this.createElement('body');
 300+ this.html_pointer.appendChild(this.body_pointer);
 301+ }
 302+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/treebuilder.js
___________________________________________________________________
Added: svn:eol-style
1303 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/index.js
@@ -0,0 +1,13 @@
 2+exports.HTML5 = exports;
 3+
 4+exports.HTML5.moduleName = 'HTML5';
 5+
 6+require('./constants');
 7+require('./tokenizer');
 8+require('./treebuilder');
 9+require('./treewalker');
 10+require('./serializer');
 11+require('./parser');
 12+require('./debug');
 13+
 14+
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/index.js
___________________________________________________________________
Added: svn:eol-style
115 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/buffer.js
@@ -0,0 +1,109 @@
 2+var buffer = require('buffer');
 3+var HTML5 = require('../html5');
 4+
 5+function Buffer() {
 6+ this.data = '';
 7+ this.start = 0;
 8+ this.committed = 0;
 9+ var eof;
 10+ this.__defineSetter__('eof', function(f) {
 11+ eof = f
 12+ HTML5.debug('buffer.eof=', f)
 13+ })
 14+ this.__defineGetter__('eof', function() { return eof })
 15+ this.eof = false;
 16+}
 17+
 18+exports.Buffer = Buffer;
 19+
 20+Buffer.prototype = {
 21+ slice: function() {
 22+ HTML5.debug('buffer.slice')
 23+ if(this.start >= this.data.length) {
 24+ if(!this.eof) throw HTML5.DRAIN
 25+ return HTML5.EOF;
 26+ }
 27+ return this.data.slice(this.start, this.data.length);
 28+ },
 29+ char: function() {
 30+ HTML5.debug('buffer.char')
 31+ if(!this.eof && this.start >= this.data.length - 1) throw HTML5.DRAIN;
 32+ if(this.start >= this.data.length) {
 33+ return HTML5.EOF;
 34+ }
 35+ return this.data[this.start++];
 36+ },
 37+ advance: function(amount) {
 38+ HTML5.debug('buffer.advance', amount)
 39+ this.start += amount;
 40+ if(this.start >= this.data.length) {
 41+ if(!this.eof) throw HTML5.DRAIN;
 42+ return HTML5.EOF;
 43+ } else {
 44+ if(this.committed > this.data.length / 2) {
 45+ // Sliiiide
 46+ this.data = this.data.slice(this.committed);
 47+ this.start = this.start - this.committed;
 48+ this.committed = 0;
 49+ }
 50+ }
 51+ },
 52+ matchWhile: function(re) {
 53+ HTML5.debug('buffer.matchWhile', re);
 54+ if(this.eof && this.data.length >= this.start) return HTML5.EOF;
 55+ var r = new RegExp("^"+re+"+");
 56+ if(m = r.exec(this.slice())) {
 57+ if(!this.eof && m[0].length == this.data.length - this.start) throw HTML5.DRAIN;
 58+ this.advance(m[0].length);
 59+ return m[0];
 60+ } else {
 61+ return '';
 62+ }
 63+ },
 64+ matchUntil: function(re) {
 65+ HTML5.debug('buffer.matchUntil', re);
 66+ if(m = new RegExp(re + (this.eof ? "|\0" : "")).exec(this.slice())) {
 67+ var t = this.data.slice(this.start, this.start + m.index);
 68+ this.advance(m.index);
 69+ return t.toString();
 70+ } else {
 71+ if(this.eof) return HTML5.EOF;
 72+ throw HTML5.DRAIN;
 73+ }
 74+ },
 75+ append: function(data) {
 76+ HTML5.debug('buffer.append', data);
 77+ this.data += data
 78+ },
 79+ shift: function(n) {
 80+ HTML5.debug('buffer.shift', n);
 81+ if(!this.eof && this.start + n >= this.data.length) throw HTML5.DRAIN;
 82+ if(this.eof && this.start >= this.data.length) return HTML5.EOF;
 83+ var d = this.data.slice(this.start, this.start + n).toString();
 84+ this.advance(Math.min(n, this.data.length - this.start));
 85+ return d;
 86+ },
 87+ peek: function(n) {
 88+ HTML5.debug('buffer.peek', n)
 89+ if(!this.eof && this.start + n >= this.data.length) throw HTML5.DRAIN;
 90+ if(this.eof && this.start >= this.data.length) return HTML5.EOF;
 91+ return this.data.slice(this.start, Math.min(this.start + n, this.data.length)).toString();
 92+ },
 93+ length: function() {
 94+ HTML5.debug('buffer.length')
 95+ return this.data.length - this.start - 1;
 96+ },
 97+ unget: function(d) {
 98+ HTML5.debug('buffer.unget', d);
 99+ if(d == HTML5.EOF) return;
 100+ this.start -= (d.length);
 101+ },
 102+ undo: function() {
 103+ HTML5.debug('buffer.undo')
 104+ this.start = this.committed;
 105+ },
 106+ commit: function() {
 107+ HTML5.debug('buffer.commit')
 108+ this.committed = this.start;
 109+ }
 110+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/buffer.js
___________________________________________________________________
Added: svn:eol-style
1111 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/serializer.js
@@ -0,0 +1,152 @@
 2+"use strict";
 3+var HTML5 = require('../html5');
 4+var events = require('events');
 5+
 6+function keys(o) {
 7+ var r = [];
 8+ for(var k in o) {
 9+ r.push(k);
 10+ }
 11+ return r;
 12+}
 13+
 14+function hescape(s) {
 15+ return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;")
 16+}
 17+
 18+var default_opts = {
 19+ lowercase: true,
 20+ minimize_boolean_attributes: true,
 21+ quote_attr_values: true,
 22+ use_best_quote_char: true,
 23+ use_trailing_solidus: true,
 24+ escape_lt_in_attrs: true,
 25+ space_before_trailing_solidus: true
 26+}
 27+
 28+HTML5.serialize = function(src, target, override) {
 29+ if(!override) {
 30+ options = default_opts
 31+ } else {
 32+ options = {}
 33+ for(k in default_opts) options[k] = default_opts[k]
 34+ for(k in override) options[k] = override[k]
 35+ }
 36+ var dest;
 37+ if(target instanceof Function) {
 38+ dest = new events.EventEmitter();
 39+ dest.addListener('data', target);
 40+ } else if(!target) {
 41+ dest = new events.EventEmitter();
 42+ var ret = '';
 43+ dest.addListener('data', function(d) {
 44+ ret += d;
 45+ });
 46+ } else {
 47+ dest = target;
 48+ }
 49+ var strict = false;
 50+ var errors = [];
 51+
 52+ function serialize_error(data) {
 53+ errors.push(data);
 54+ if(strict) throw(data);
 55+ }
 56+
 57+ var in_cdata = false;
 58+ //TODO: Filters
 59+ var doctype;
 60+ var escape_rcdata = false;
 61+ var w = new HTML5.TreeWalker(src, function(tok) {
 62+ if(tok.type == "Doctype") {
 63+ doctype = "<!DOCTYPE " + tok.name + ">";
 64+ dest.emit('data', doctype);
 65+ } else if(tok.type == 'Characters' || tok.type == 'SpaceCharacters') {
 66+ if(in_cdata || tok.type == 'SpaceCharacters') {
 67+ if(in_cdata && tok.data.indexOf("</") != -1) {
 68+ serialize_error("Unexpected </ in CDATA")
 69+ }
 70+ dest.emit('data', tok.data);
 71+ } else {
 72+ if(tok.data) dest.emit('data', hescape(tok.data));
 73+ }
 74+ } else if(tok.type == "StartTag" || tok.type == 'EmptyTag') {
 75+ if(HTML5.RCDATA_ELEMENTS.indexOf(tok.name.toLowerCase()) != -1 && !escape_rcdata) {
 76+ in_cdata = true;
 77+ } else if (in_cdata) {
 78+ serialize_error("Unexpected child element of a CDATA element");
 79+ }
 80+ var attributes = "";
 81+ var attrs= [];
 82+ for(var ki = 0; ki < tok.data.length; ki++) {
 83+ attrs.push(tok.data.item(ki));
 84+ }
 85+ attrs = attrs.sort();
 86+ for(var ki in attrs) {
 87+ var quote_attr = false;
 88+ v = tok.data.getNamedItem(attrs[ki].nodeName).nodeValue;
 89+ attributes += " "+attrs[ki].nodeName;
 90+ if(!options.minimize_boolean_attributes || ((HTML5.BOOLEAN_ATTRIBUTES[tok.name] || []).indexOf(ki) == -1 && (HTML5.BOOLEAN_ATTRIBUTES["_global"].indexOf(ki) == -1))) {
 91+ attributes += "=";
 92+ if(options.quote_attr_values || v.length == 0) {
 93+ quote_attr = true;
 94+ } else {
 95+ quote_attr = new RegExp("[" + HTML5.SPACE_CHARACTERS_IN + "<=>'\"" + "]").test(v)
 96+ }
 97+
 98+ v = v.replace(/&/g, '&amp;');
 99+ if(options.escape_lt_in_attrs) v = v.replace(/</g, '&lt;');
 100+ if(quote_attr) {
 101+ var the_quote_char = '"';
 102+ if(options.use_best_quote_char) {
 103+ if(v.indexOf("'") != -1 && v.indexOf('"') == -1) {
 104+ the_quote_char = '"';
 105+ } else if(v.indexOf('"') != -1 && v.indexOf("'") == -1) {
 106+ the_quote_char = "'"
 107+ }
 108+ }
 109+ if(the_quote_char == '"') {
 110+ v = v.replace(/"/g, '&quot;');
 111+ } else {
 112+ v = v.replace(/'/g, '&#39;');
 113+ }
 114+ attributes += the_quote_char + v + the_quote_char;
 115+ } else {
 116+ attributes += v;
 117+ }
 118+ }
 119+ }
 120+
 121+ if(HTML5.VOID_ELEMENTS.indexOf(tok.name.toLowerCase()) != -1 && options.use_trailing_solidus) {
 122+ if(options.space_before_trailing_solidus) {
 123+ attributes += " /";
 124+ } else {
 125+ attributes += "/";
 126+ }
 127+ }
 128+
 129+ if(options.lowercase) tok.name = tok.name.toLowerCase()
 130+
 131+ dest.emit('data', "<" + tok.name + attributes + ">");
 132+
 133+ } else if(tok.type == 'EndTag') {
 134+ if(HTML5.RCDATA_ELEMENTS.indexOf(tok.name.toLowerCase()) != -1) {
 135+ in_cdata = false;
 136+ } else if(in_cdata) {
 137+ serialize_error("Unexpected child element of a CDATA element");
 138+ }
 139+
 140+ if(options.lowercase) tok.name = tok.name.toLowerCase()
 141+ dest.emit('data', '</' + tok.name + '>');
 142+ } else if(tok.type == 'Comment') {
 143+ if(tok.data.match(/--/)) serialize_error("Comment contains --");
 144+ dest.emit('data', '<!--' + tok.data + '-->');
 145+ } else {
 146+ serialize_error(tok.data);
 147+ }
 148+ });
 149+
 150+ dest.emit('end')
 151+
 152+ if(ret) return ret;
 153+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/serializer.js
___________________________________________________________________
Added: svn:eol-style
1154 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/tokenizer.js
@@ -0,0 +1,865 @@
 2+//require('../core-upgrade');
 3+var HTML5 = require('../html5');
 4+var events = require('events');
 5+var Buffer = require('./buffer').Buffer;
 6+var Models = HTML5.Models;
 7+
 8+function keys(h) {
 9+ var r = [];
 10+ for(var k in h) {
 11+ r.push(k);
 12+ }
 13+ return r;
 14+}
 15+
 16+HTML5.Tokenizer = t = function HTML5Tokenizer(input, document) {
 17+ if(!input) throw(new Error("No input given"));
 18+ var content_model;
 19+ this.document = document;
 20+ this.__defineSetter__('content_model', function(model) {
 21+ HTML5.debug('tokenizer.content_model=', model)
 22+ content_model = model
 23+ })
 24+ this.__defineGetter__('content_model', function() {
 25+ return content_model
 26+ })
 27+ this.content_model = Models.PCDATA;
 28+ var state;
 29+ var buffer = this.buffer = new Buffer();
 30+ this.__defineSetter__('state', function(newstate) {
 31+ HTML5.debug('tokenizer.state=', newstate)
 32+ state = newstate;
 33+ buffer.commit();
 34+ });
 35+ this.state = 'data_state';
 36+ this.escapeFlag = false;
 37+ this.lastFourChars = '';
 38+ this.current_token = null;
 39+
 40+ if(input instanceof events.EventEmitter) {
 41+ source = input;
 42+ this.pump = null;
 43+ } else {
 44+ var source = new events.EventEmitter();
 45+ this.pump = function() {
 46+ source.emit('data', input);
 47+ source.emit('end');
 48+ }
 49+ }
 50+
 51+ this.commit = function() {
 52+ buffer.commit();
 53+ };
 54+
 55+ var tokenizer = this;
 56+ source.addListener('data', function(data) {
 57+ if(typeof data !== 'string') data = data.toString();
 58+ buffer.append(data);
 59+ try {
 60+ while(tokenizer[state](buffer));
 61+ } catch(e) {
 62+ if(e != HTML5.DRAIN) {
 63+ throw(e);
 64+ } else {
 65+ HTML5.debug('tokenizer.drain', 'Drain')
 66+ buffer.undo();
 67+ }
 68+ }
 69+ });
 70+ source.addListener('end', function(t) { return function() {
 71+ buffer.eof = true;
 72+ while(tokenizer[state](buffer));
 73+ t.emit('end');
 74+ }}(this));
 75+}
 76+
 77+t.prototype = new events.EventEmitter;
 78+
 79+t.prototype.tokenize = function() {
 80+ if(this.pump) this.pump();
 81+}
 82+
 83+t.prototype.emitToken = function(tok) {
 84+ tok = this.normalize_token(tok);
 85+ HTML5.debug('tokenizer.token', tok)
 86+ if (this.content_model == Models.SCRIPT_CDATA && (tok.type == 'Characters' || tok.type == 'SpaceCharacters')) {
 87+ this.script_buffer += tok.data;
 88+ } else {
 89+ this.emit('token', tok);
 90+ }
 91+}
 92+
 93+t.prototype.consume_entity = function(buffer, from_attr) {
 94+ var char = null;
 95+ var chars = buffer.char();
 96+ if(chars == HTML5.EOF) return false;
 97+ if(chars.match(HTML5.SPACE_CHARACTERS) || chars == '<' || chars == '&') {
 98+ buffer.unget(chars);
 99+ } else if(chars[0] == '#') { // Maybe a numeric entity
 100+ var c = buffer.shift(2);
 101+ if(c == HTML5.EOF) {
 102+ buffer.unget(chars);
 103+ return false;
 104+ }
 105+ chars += c;
 106+ if(chars[1] && chars[1].toLowerCase() == 'x' && HTML5.HEX_DIGITS_R.test(chars[2])) {
 107+ // Hex entity
 108+ buffer.unget(chars[2]);
 109+ char = this.consume_numeric_entity(buffer, true);
 110+ } else if(chars[1] && HTML5.DIGITS_R.test(chars[1])) {
 111+ // Decimal entity
 112+ buffer.unget(chars.slice(1));
 113+ char = this.consume_numeric_entity(buffer, false);
 114+ } else {
 115+ // Not numeric
 116+ buffer.unget(chars);
 117+ this.parse_error("expected-numeric-entity");
 118+ }
 119+ } else {
 120+ var filteredEntityList = keys(HTML5.ENTITIES).filter(function(e) {
 121+ return e[0] == chars[0];
 122+ });
 123+ var entityName = null;
 124+ while(true) {
 125+ if(filteredEntityList.some(function(e) {
 126+ return e.indexOf(chars) == 0;
 127+ })) {
 128+ filteredEntityList = filteredEntityList.filter(function(e) {
 129+ return e.indexOf(chars) == 0;
 130+ });
 131+ chars += buffer.char()
 132+ } else {
 133+ break;
 134+ }
 135+
 136+ if(HTML5.ENTITIES[chars]) {
 137+ entityName = chars;
 138+ if(entityName[entityName.length - 1] == ';') break;
 139+ }
 140+ }
 141+
 142+ if(entityName) {
 143+ char = HTML5.ENTITIES[entityName];
 144+
 145+ if(entityName[entityName.length - 1] != ';' && this.from_attribute && (HTML5.ASCII_LETTERS_R.test(chars.substr(entityName.length, 1) || HTML5.DIGITS.test(chars.substr(entityName.length, 1))))) {
 146+ buffer.unget(chars);
 147+ char = '&';
 148+ } else {
 149+ buffer.unget(chars.slice(entityName.length));
 150+ }
 151+ } else {
 152+ this.parse_error("expected-named-entity");
 153+ buffer.unget(chars);
 154+ }
 155+ }
 156+
 157+ return char;
 158+}
 159+
 160+t.prototype.consume_numeric_entity = function(buffer, hex) {
 161+ if(hex) {
 162+ var allowed = HTML5.HEX_DIGITS_R;
 163+ var radix = 16;
 164+ } else {
 165+ var allowed = HTML5.DIGITS_R;
 166+ var radix = 10;
 167+ }
 168+
 169+ chars = '';
 170+
 171+ var c = buffer.char();
 172+ while(allowed.test(c)) {
 173+ chars = chars + c;
 174+ c = buffer.char();
 175+ }
 176+
 177+ var charAsInt = parseInt(chars, radix);
 178+
 179+ if(charAsInt == 13) {
 180+ this.parse_error("incorrect-cr-newline-entity");
 181+ charAsInt = 10;
 182+ } else if(charAsInt >= 128 && charAsInt <= 159) {
 183+ this.parse_error("illegal-windows-1252-entity");
 184+ charAsInt = HTML5.ENTITIES_WINDOWS1252[charAsInt - 128];
 185+ }
 186+
 187+ if(0 < charAsInt && charAsInt <= 1114111 && !(55296 <= charAsInt && charAsInt <= 57343)) {
 188+ char = String.fromCharCode(charAsInt);
 189+ } else {
 190+ char = String.fromCharCode(0xFFFD);
 191+ this.parse_error("cant-convert-numeric-entity");
 192+ }
 193+
 194+ if(c != ';') {
 195+ this.parse_error("numeric-entity-without-semicolon");
 196+ buffer.unget(c);
 197+ }
 198+
 199+ return char;
 200+}
 201+
 202+t.prototype.process_entity_in_attribute = function(buffer) {
 203+ var entity = this.consume_entity(buffer);
 204+ if(entity) {
 205+ this.current_token.data.last().nodeValue += entity;
 206+ } else {
 207+ this.current_token.data.last().nodeValue += '&';
 208+ }
 209+}
 210+
 211+t.prototype.process_solidus_in_tag = function(buffer) {
 212+ var data = buffer.peek(1);
 213+ if(this.current_token.type == 'StartTag' && data == '>') {
 214+ this.current_token.type = 'EmptyTag';
 215+ return true;
 216+ } else {
 217+ this.parse_error("incorrectly-placed-solidus");
 218+ return false;
 219+ }
 220+}
 221+
 222+t.prototype.data_state = function(buffer) {
 223+ var c = buffer.char()
 224+ if(c != HTML5.EOF && this.content_model == Models.CDATA || this.content_model == Models.RCDATA || this.content_model == Models.SCRIPT_CDATA) {
 225+ this.lastFourChars += c;
 226+ if(this.lastFourChars.length >= 4) {
 227+ this.lastFourChars = this.lastFourChars.substr(-4)
 228+ }
 229+ }
 230+
 231+ if (this.content_model == Models.SCRIPT_CDATA) {
 232+ if (this.script_buffer == null) {
 233+ this.script_buffer = '';
 234+ }
 235+ }
 236+
 237+ if(c == HTML5.EOF) {
 238+ this.emitToken(HTML5.EOF_TOK);
 239+ this.commit();
 240+ return false;
 241+ } else if(c == '&' && (this.content_model == Models.PCDATA || this.content_model == Models.RCDATA) && !this.escapeFlag) {
 242+ this.state = 'entity_data_state';
 243+ } else if(c == '-' && (this.content_model == Models.CDATA || this.content_model == Models.RCDATA || this.content_model == Models.SCRIPT_CDATA) && !this.escapeFlag && this.lastFourChars == '<!--') {
 244+ this.escapeFlag = true;
 245+ this.emitToken({type: 'Characters', data: c});
 246+ this.commit();
 247+ } else if(c == '<' && !this.escapeFlag && (this.content_model == Models.PCDATA || this.content_model == Models.RCDATA || this.content_model == Models.CDATA || this.content_model == Models.SCRIPT_CDATA)) {
 248+ this.state = 'tag_open_state';
 249+ } else if(c == '>' && this.escapeFlag && (this.content_model == Models.CDATA || this.content_model == Models.RCDATA || this.content_model == Models.SCRIPT_CDATA) && this.lastFourChars.match(/-->$/)) {
 250+ this.escapeFlag = false;
 251+ this.emitToken({type: 'Characters', data: c});
 252+ this.commit();
 253+ } else if(HTML5.SPACE_CHARACTERS_R.test(c)) {
 254+ this.emitToken({type: 'SpaceCharacters', data: c + buffer.matchWhile(HTML5.SPACE_CHARACTERS)});
 255+ this.commit();
 256+ } else {
 257+ var o = buffer.matchUntil("[&<>-]")
 258+ this.emitToken({type: 'Characters', data: c + o});
 259+ this.lastFourChars += c+o
 260+ this.lastFourChars = this.lastFourChars.slice(-4)
 261+ this.commit();
 262+ }
 263+ return true;
 264+}
 265+
 266+t.prototype.entity_data_state = function(buffer) {
 267+ var entity = this.consume_entity(buffer);
 268+ if(entity) {
 269+ this.emitToken({type: 'Characters', data: entity});
 270+ } else {
 271+ this.emitToken({type: 'Characters', data: '&'});
 272+ }
 273+ this.state = 'data_state';
 274+ return true;
 275+}
 276+
 277+t.prototype.tag_open_state = function(buffer) {
 278+ var data = buffer.char();
 279+ if(this.content_model == Models.PCDATA) {
 280+ if(data == '!') {
 281+ this.state = 'markup_declaration_open_state';
 282+ } else if (data == '/') {
 283+ this.state = 'close_tag_open_state';
 284+ } else if (data != HTML5.EOF && HTML5.ASCII_LETTERS_R.test(data)) {
 285+ this.current_token = {type: 'StartTag', name: data, data: []};
 286+ this.state = 'tag_name_state';
 287+ } else if (data == '>') {
 288+ // XXX In theory it could be something besides a tag name. But
 289+ // do we really care?
 290+ this.parse_error("expected-tag-name-but-got-right-bracket");
 291+ this.emitToken({type: 'Characters', data: "<>"});
 292+ this.state = 'data_state';
 293+ } else if (data == '?') {
 294+ // XXX In theory it could be something besides a tag name. But
 295+ // do we really care?
 296+ this.parse_error("expected-tag-name-but-got-question-mark");
 297+ buffer.unget(data);
 298+ this.state = 'bogus_comment_state';
 299+ } else {
 300+ // XXX
 301+ this.parse_error("expected-tag-name");
 302+ this.emitToken({type: 'Characters', data: "<"});
 303+ buffer.unget(data);
 304+ this.state = 'data_state';
 305+ }
 306+ } else {
 307+ // We know the content model flag is set to either RCDATA or CDATA or SCRIPT_CDATA
 308+ // now because this state can never be entered with the PLAINTEXT
 309+ // flag.
 310+ if (data == '/') {
 311+ this.state = 'close_tag_open_state';
 312+ } else {
 313+ this.emitToken({type: 'Characters', data: "<"});
 314+ buffer.unget(data);
 315+ this.state = 'data_state';
 316+ }
 317+ }
 318+ return true
 319+}
 320+
 321+t.prototype.close_tag_open_state = function(buffer) {
 322+ if(this.content_model == Models.RCDATA || this.content_model == Models.CDATA || this.content_model == Models.SCRIPT_CDATA) {
 323+ var chars = '';
 324+ if(this.current_token) {
 325+ for(var i = 0; i <= this.current_token.name.length; i++) {
 326+ var c = buffer.char();
 327+ if(c == HTML5.EOF) break;
 328+ chars += c;
 329+ }
 330+ buffer.unget(chars);
 331+ }
 332+
 333+ if(this.current_token
 334+ && this.current_token.name.toLowerCase() == chars.slice(0, this.current_token.name.length).toLowerCase()
 335+ && (chars.length > this.current_token.name.length ? new RegExp('[' + HTML5.SPACE_CHARACTERS_IN + '></\0]').test(chars.substr(-1)) : true)
 336+ ) {
 337+ this.content_model = Models.PCDATA;
 338+ } else {
 339+ this.emitToken({type: 'Characters', data: '</'});
 340+ this.state = 'data_state';
 341+ return true
 342+ }
 343+ }
 344+
 345+ data = buffer.char()
 346+ if (data == HTML5.EOF) {
 347+ this.parse_error("expected-closing-tag-but-got-eof");
 348+ this.emitToken({type: 'Characters', data: '</'});
 349+ buffer.unget(data);
 350+ this.state = 'data_state'
 351+ } else if (HTML5.ASCII_LETTERS_R.test(data)) {
 352+ this.current_token = {type: 'EndTag', name: data, data: []}
 353+ this.state = 'tag_name_state';
 354+ } else if (data == '>') {
 355+ this.parse_error("expected-closing-tag-but-got-right-bracket");
 356+ this.state = 'data_state';
 357+ } else {
 358+ this.parse_error("expected-closing-tag-but-got-char", {data: data}); // param 1 is datavars:
 359+ buffer.unget(data);
 360+ this.state = 'bogus_comment_state';
 361+ }
 362+ return true;
 363+}
 364+
 365+t.prototype.tag_name_state = function(buffer) {
 366+ data = buffer.char();
 367+ if(data == HTML5.EOF) {
 368+ this.parse_error('eof-in-tag-name');
 369+ this.emit_current_token();
 370+ } else if(HTML5.SPACE_CHARACTERS_R.test(data)) {
 371+ this.state = 'before_attribute_name_state';
 372+ } else if(HTML5.ASCII_LETTERS_R.test(data)) {
 373+ this.current_token.name += data + buffer.matchWhile(HTML5.ASCII_LETTERS);
 374+ } else if(data == '>') {
 375+ this.emit_current_token();
 376+ } else if(data == '/') {
 377+ this.process_solidus_in_tag(buffer)
 378+ this.state = 'self_closing_tag_state';
 379+ } else {
 380+ this.current_token.name += data;
 381+ }
 382+ this.commit();
 383+
 384+ return true;
 385+}
 386+
 387+t.prototype.before_attribute_name_state = function(buffer) {
 388+ var data = buffer.shift(1);
 389+ if(HTML5.SPACE_CHARACTERS_R.test(data)) {
 390+ buffer.matchWhile(HTML5.SPACE_CHARACTERS);
 391+ } else if (data == HTML5.EOF) {
 392+ this.parse_error("expected-attribute-name-but-got-eof");
 393+ this.emit_current_token();
 394+ } else if (HTML5.ASCII_LETTERS_R.test(data)) {
 395+ this.current_token.data.push({nodeName: data, nodeValue: ""});
 396+ this.state = 'attribute_name_state';
 397+ } else if(data == '>') {
 398+ this.emit_current_token();
 399+ } else if(data == '/') {
 400+ this.state = 'self_closing_tag_state';
 401+ } else if(data == "'" || data == '"' || data == '=') {
 402+ this.parse_error("invalid-character-in-attribute-name");
 403+ this.current_token.data.push({nodeName: data, nodeValue: ""});
 404+ this.state = 'attribute_name_state';
 405+ } else {
 406+ this.current_token.data.push({nodeName: data, nodeValue: ""});
 407+ this.state = 'attribute_name_state';
 408+ }
 409+ return true;
 410+}
 411+
 412+t.prototype.attribute_name_state = function(buffer) {
 413+ var data = buffer.shift(1);
 414+ var leavingThisState = true;
 415+ var emitToken = false;
 416+ if(data == '=') {
 417+ this.state = 'before_attribute_value_state';
 418+ } else if(data == HTML5.EOF) {
 419+ this.parse_error("eof-in-attribute-name");
 420+ this.state = 'data_state';
 421+ emitToken = true;
 422+ } else if(HTML5.ASCII_LETTERS_R.test(data)) {
 423+ this.current_token.data.last().nodeName += data + buffer.matchWhile(HTML5.ASCII_LETTERS);
 424+ leavingThisState = false;
 425+ } else if(data == '>') {
 426+ // XXX If we emit here the attributes are converted to a dict
 427+ // without being checked and when the code below runs we error
 428+ // because data is a dict not a list
 429+ emitToken = true;
 430+ } else if(HTML5.SPACE_CHARACTERS_R.test(data)) {
 431+ this.state = 'after_attribute_name_state';
 432+ } else if(data == '/') {
 433+ if(!this.process_solidus_in_tag(buffer)) {
 434+ this.state = 'before_attribute_name_state';
 435+ }
 436+ } else if(data == "'" || data == '"') {
 437+ this.parse_error("invalid-character-in-attribute-name");
 438+ this.current_token.data.last().nodeName += data;
 439+ leavingThisState = false;
 440+ } else {
 441+ this.current_token.data.last().nodeName += data;
 442+ leavingThisState = false;
 443+ }
 444+
 445+ if(leavingThisState) {
 446+ // Attributes are not dropped at this stage. That happens when the
 447+ // start tag token is emitted so values can still be safely appended
 448+ // to attributes, but we do want to report the parse error in time.
 449+ if(this.lowercase_attr_name) {
 450+ this.current_token.data.last().nodeName = this.current_token.data.last().nodeName.toLowerCase();
 451+ }
 452+ for (k in this.current_token.data.slice(0, -1)) {
 453+ // FIXME this is a fucking mess.
 454+ if(this.current_token.data.slice(-1)[0] == this.current_token.data.slice(0, -1)[k].name) {
 455+ this.parse_error("duplicate-attribute");
 456+ break; // Don't emit more than one of these errors
 457+ }
 458+ }
 459+ if(emitToken) this.emit_current_token();
 460+ } else {
 461+ this.commit()
 462+ }
 463+ return true;
 464+}
 465+
 466+t.prototype.after_attribute_name_state = function(buffer) {
 467+ var data = buffer.shift(1);
 468+ if(HTML5.SPACE_CHARACTERS_R.test(data)) {
 469+ buffer.matchWhile(HTML5.SPACE_CHARACTERS);
 470+ } else if(data == '=') {
 471+ this.state = 'before_attribute_value_state';
 472+ } else if(data == '>') {
 473+ this.emit_current_token();
 474+ } else if(data == HTML5.EOF) {
 475+ this.parse_error("expected-end-of-tag-but-got-eof");
 476+ this.emit_current_token();
 477+ } else if(HTML5.ASCII_LETTERS_R.test(data)) {
 478+ this.current_token.data.push({nodeName: data, nodeValue: ""});
 479+ this.state = 'attribute_name_state';
 480+ } else if(data == '/') {
 481+ this.state = 'self_closing_tag_state';
 482+ } else {
 483+ this.current_token.data.push({nodeName: data, nodeValue: ""});
 484+ this.state = 'attribute_name_state';
 485+ }
 486+ return true;
 487+}
 488+
 489+t.prototype.before_attribute_value_state = function(buffer) {
 490+ var data = buffer.shift(1);
 491+ if(HTML5.SPACE_CHARACTERS_R.test(data)) {
 492+ buffer.matchWhile(HTML5.SPACE_CHARACTERS);
 493+ } else if(data == '"') {
 494+ this.state = 'attribute_value_double_quoted_state';
 495+ } else if(data == '&') {
 496+ this.state = 'attribute_value_unquoted_state';
 497+ buffer.unget(data);
 498+ } else if(data == "'") {
 499+ this.state = 'attribute_value_single_quoted_state';
 500+ } else if(data == '>') {
 501+ this.emit_current_token();
 502+ } else if(data == '=') {
 503+ this.parse_error("equals-in-unquoted-attribute-value");
 504+ this.current_token.data.last().nodeValue += data;
 505+ this.state = 'attribute_value_unquoted_state';
 506+ } else if(data == HTML5.EOF) {
 507+ this.parse_error("expected-attribute-value-but-got-eof");
 508+ this.emit_current_token();
 509+ this.state = 'attribute_value_unquoted_state';
 510+ } else {
 511+ this.current_token.data.last().nodeValue += data
 512+ this.state = 'attribute_value_unquoted_state'
 513+ }
 514+
 515+ return true;
 516+}
 517+
 518+t.prototype.attribute_value_double_quoted_state = function(buffer) {
 519+ var data = buffer.shift(1);
 520+ if(data == '"') {
 521+ this.state = 'after_attribute_value_state';
 522+ } else if(data == '&') {
 523+ this.process_entity_in_attribute(buffer);
 524+ } else if(data == HTML5.EOF) {
 525+ this.parse_error("eof-in-attribute-value-double-quote");
 526+ this.emit_current_token();
 527+ } else {
 528+ this.current_token.data.last().nodeValue += data + buffer.matchUntil('["&]');
 529+ }
 530+ return true;
 531+}
 532+
 533+t.prototype.attribute_value_single_quoted_state = function(buffer) {
 534+ var data = buffer.shift(1);
 535+ if(data == "'") {
 536+ this.state = 'after_attribute_value_state';
 537+ } else if(data == '&') {
 538+ this.process_entity_in_attribute(buffer);
 539+ } else if(data == HTML5.EOF) {
 540+ this.parse_error("eof-in-attribute-value-single-quote");
 541+ this.emit_current_token();
 542+ } else {
 543+ this.current_token.data.last().nodeValue += data + buffer.matchUntil("['&]");
 544+ }
 545+ return true;
 546+}
 547+
 548+t.prototype.attribute_value_unquoted_state = function(buffer) {
 549+ var data = buffer.shift(1);
 550+ if(HTML5.SPACE_CHARACTERS_R.test(data)) {
 551+ this.state = 'before_attribute_name_state';
 552+ } else if(data == '&') {
 553+ this.process_entity_in_attribute(buffer);
 554+ } else if(data == '>') {
 555+ this.emit_current_token();
 556+ } else if(data == '"' || data == "'" || data == '=') {
 557+ this.parse_error("unexpected-character-in-unquoted-attribute-value");
 558+ this.current_token.data.last().nodeValue += data;
 559+ } else if(data == HTML5.EOF) {
 560+ this.parse_error("eof-in-attribute-value-no-quotes");
 561+ this.emit_current_token();
 562+ } else {
 563+ var o = buffer.matchUntil("["+ HTML5.SPACE_CHARACTERS_IN + '&<>' +"]")
 564+ this.current_token.data.last().nodeValue += data + o
 565+ }
 566+ return true;
 567+}
 568+
 569+t.prototype.after_attribute_value_state = function(buffer) {
 570+ var data = buffer.shift(1);
 571+ if(HTML5.SPACE_CHARACTERS_R.test(data)) {
 572+ this.state = 'before_attribute_name_state';
 573+ } else if(data == '>') {
 574+ this.emit_current_token();
 575+ this.state = 'data_state';
 576+ } else if(data == '/') {
 577+ this.state = 'self_closing_tag_state';
 578+ } else if(data == HTML5.EOF) {
 579+ this.parse_error( "unexpected-EOF-after-attribute-value");
 580+ this.emit_current_token();
 581+ buffer.unget(data);
 582+ this.state = 'data_state';
 583+ } else {
 584+ this.emitToken({type: 'ParseError', data: "unexpected-character-after-attribute-value"});
 585+ buffer.unget(data);
 586+ this.state = 'before_attribute_name_state';
 587+ }
 588+ return true;
 589+}
 590+
 591+t.prototype.self_closing_tag_state = function(buffer) {
 592+ var c = buffer.shift(1);
 593+ if(c == '>') {
 594+ this.current_token.self_closing = true;
 595+ this.emit_current_token();
 596+ this.state = 'data_state';
 597+ } else if(c == HTML5.EOF) {
 598+ this.parse_error("eof-in-tag-name");
 599+ buffer.unget(c);
 600+ this.state = 'data_state';
 601+ } else {
 602+ this.parse_error("expected-self-closing-tag");
 603+ buffer.unget(c);
 604+ this.state = 'before_attribute_name_state';
 605+ }
 606+ return true;
 607+}
 608+
 609+t.prototype.bogus_comment_state = function(buffer) {
 610+ var tok = {type: 'Comment', data: buffer.matchUntil('>')}
 611+ buffer.char()
 612+ this.emitToken(tok);
 613+ this.state = 'data_state';
 614+ return true;
 615+}
 616+
 617+t.prototype.markup_declaration_open_state = function(buffer) {
 618+ var chars = buffer.shift(2);
 619+ if(chars == '--') {
 620+ this.current_token = {type: 'Comment', data: ''};
 621+ this.state = 'comment_start_state';
 622+ } else {
 623+ var newchars = buffer.shift(5);
 624+ if(newchars == HTML5.EOF || chars == HTML5.EOF) {
 625+ this.parse_error("expected-dashes-or-doctype");
 626+ this.state = 'bogus_comment_state'
 627+ if(chars != HTML5.EOF) buffer.unget(chars);
 628+ return true;
 629+ }
 630+
 631+ // Check for EOF better -- FIXME
 632+ chars += newchars;
 633+ if(chars.toUpperCase() == 'DOCTYPE') {
 634+ this.current_token = {type: 'Doctype', name: '', publicId: null, systemId: null, correct: true};
 635+ this.state = 'doctype_state';
 636+ } else {
 637+ this.parse_error("expected-dashes-or-doctype");
 638+ buffer.unget(chars);
 639+ this.state = 'bogus_comment_state';
 640+ }
 641+ }
 642+ return true;
 643+}
 644+
 645+t.prototype.comment_start_state = function(buffer) {
 646+ var data = buffer.shift(1);
 647+ if(data == '-') {
 648+ this.state = 'comment_start_dash_state';
 649+ } else if(data == '>') {
 650+ this.parse_error("incorrect comment");
 651+ this.emitToken(this.current_token);
 652+ this.state = 'data_state';
 653+ } else if(data == HTML5.EOF) {
 654+ this.parse_error("eof-in-comment");
 655+ this.emitToken(this.current_token);
 656+ this.state = 'data_state';
 657+ } else {
 658+ this.current_token.data += data + buffer.matchUntil('-');
 659+ this.state = 'comment_state';
 660+ }
 661+ return true;
 662+}
 663+
 664+t.prototype.comment_start_dash_state = function(buffer) {
 665+ var data = buffer.shift(1);
 666+ if(data == '-') {
 667+ this.state = 'comment_end_state'
 668+ } else if(data == '>') {
 669+ this.parse_error("incorrect-comment");
 670+ this.emitToken(this.current_token);
 671+ this.state = 'data_state';
 672+ } else if(data == HTML5.EOF) {
 673+ this.parse_error("eof-in-comment");
 674+ this.emitToken(this.current_token);
 675+ this.state = 'data_state';
 676+ } else {
 677+ this.current_token.data += '-' + data + buffer.matchUntil('-');
 678+ this.state = 'comment_state';
 679+ }
 680+ return true;
 681+}
 682+
 683+t.prototype.comment_state = function(buffer) {
 684+ var data = buffer.shift(1);
 685+ if(data == '-') {
 686+ this.state = 'comment_end_dash_state';
 687+ } else if(data == HTML5.EOF) {
 688+ this.parse_error("eof-in-comment");
 689+ this.emitToken(this.current_token);
 690+ this.state = 'data_state';
 691+ } else {
 692+ this.current_token.data += data + buffer.matchUntil('-');
 693+ }
 694+ return true;
 695+}
 696+
 697+t.prototype.comment_end_dash_state = function(buffer) {
 698+ var data = buffer.char();
 699+ if(data == '-') {
 700+ this.state = 'comment_end_state';
 701+ } else if (data == HTML5.EOF) {
 702+ this.parse_error("eof-in-comment-end-dash");
 703+ this.emitToken(this.current_token);
 704+ this.state = 'data_state';
 705+ } else {
 706+ this.current_token.data += '-' + data + buffer.matchUntil('-');
 707+ // Consume the next character which is either a "-" or an :EOF as
 708+ // well so if there's a "-" directly after the "-" we go nicely to
 709+ // the "comment end state" without emitting a ParseError there.
 710+ buffer.char();
 711+ }
 712+ return true;
 713+}
 714+
 715+t.prototype.comment_end_state = function(buffer) {
 716+ var data = buffer.shift(1);
 717+ if(data == '>') {
 718+ this.emitToken(this.current_token);
 719+ this.state = 'data_state';
 720+ } else if(data == '-') {
 721+ this.parse_error("unexpected-dash-after-double-dash-in-comment");
 722+ this.current_token.data += data;
 723+ } else if (data == HTML5.EOF) {
 724+ this.parse_error("eof-in-comment-double-dash");
 725+ this.emitToken(this.current_token);
 726+ this.state = 'data_state';
 727+ } else {
 728+ // XXX
 729+ this.parse_error("unexpected-char-in-comment");
 730+ this.current_token.data += '--' + data;
 731+ this.state = 'comment_state';
 732+ }
 733+ return true;
 734+}
 735+
 736+t.prototype.doctype_state = function(buffer) {
 737+ var data = buffer.shift(1);
 738+ if(HTML5.SPACE_CHARACTERS_R.test(data)) {
 739+ this.state = 'before_doctype_name_state';
 740+ } else {
 741+ this.parse_error("need-space-after-doctype");
 742+ buffer.unget(data);
 743+ this.state = 'before_doctype_name_state';
 744+ }
 745+ return true;
 746+}
 747+
 748+t.prototype.before_doctype_name_state = function(buffer) {
 749+ var data = buffer.shift(1);
 750+ if(HTML5.SPACE_CHARACTERS_R.test(data)) {
 751+ } else if(data == '>') {
 752+ this.parse_error("expected-doctype-name-but-got-right-bracket");
 753+ this.current_token.correct = false;
 754+ this.emit_current_token();
 755+ this.state = 'data_state';
 756+ } else if(data == HTML5.EOF) {
 757+ this.parse_error("expected-doctype-name-but-got-eof");
 758+ this.current_token.correct = false;
 759+ this.emit_current_token();
 760+ this.state = 'data_state';
 761+ } else {
 762+ this.current_token.name = data;
 763+ this.state = 'doctype_name_state';
 764+ }
 765+ return true
 766+}
 767+
 768+t.prototype.doctype_name_state = function(buffer) {
 769+ var data = buffer.shift(1);
 770+ if(HTML5.SPACE_CHARACTERS_R.test(data)) {
 771+ this.state = 'bogus_doctype_state';
 772+ } else if(data == '>') {
 773+ this.emit_current_token();
 774+ this.state = 'data_state';
 775+ } else if(data == HTML5.EOF) {
 776+ this.current_token.correct = false;
 777+ buffer.unget(data);
 778+ this.parse_error("eof-in-doctype");
 779+ this.emit_current_token();
 780+ this.state = 'data_state';
 781+ } else {
 782+ this.current_token.name += data;
 783+ }
 784+ return true;
 785+}
 786+/*
 787+ data += buffer.shift(5);
 788+ var token = data.toLowerCase();
 789+ if(token == 'public') {
 790+ this.state = 'before_doctype_public_identifier_state';
 791+ } else if(token == 'system') {
 792+ this.state = 'before_doctype_system_identifier_state';
 793+ } else {
 794+ buffer.unget(data);
 795+ this.parse_error("expected-space-or-right-bracket-in-doctype", {data: data});
 796+ this.state = 'bogus_doctype_state';
 797+ }
 798+ }
 799+ return true
 800+}
 801+*/
 802+
 803+t.prototype.bogus_doctype_state = function(buffer) {
 804+ var data = buffer.shift(1);
 805+ this.current_token.correct = false;
 806+ if(data == '>') {
 807+ this.emit_current_token();
 808+ this.state = 'data_state';
 809+ } else if(data == HTML5.EOF) {
 810+ throw(new Error("Unimplemented!"))
 811+ }
 812+ return true;
 813+}
 814+
 815+t.prototype.parse_error = function(message) {
 816+ this.emitToken({type: 'ParseError', data: message});
 817+}
 818+
 819+t.prototype.emit_current_token = function() {
 820+ var tok = this.current_token;
 821+ switch(tok.type) {
 822+ case 'StartTag':
 823+ case 'EndTag':
 824+ case 'EmptyTag':
 825+ if(tok.type == 'EndTag' && tok.self_closing) {
 826+ this.parse_error('self-closing-end-tag');
 827+ }
 828+ break;
 829+ }
 830+ if (this.current_token.name == "script" && tok.type == 'EndTag') {
 831+ this.emitToken({ type: 'Characters', data: this.script_buffer });
 832+ this.script_buffer = null;
 833+ }
 834+ this.emitToken(tok);
 835+ this.state = 'data_state';
 836+}
 837+
 838+t.prototype.normalize_token = function(token) {
 839+ if(token.type == 'EmptyTag') {
 840+ if(HTML5.VOID_ELEMENTS.indexOf(token.name) == -1) {
 841+ this.parse_error('incorrectly-placed-solidus');
 842+ }
 843+ token.type = 'StartTag';
 844+ }
 845+
 846+ if(token.type == 'StartTag') {
 847+ token.name = token.name.toLowerCase();
 848+ if(token.data.length != 0) {
 849+ var data = {};
 850+ token.data.reverse();
 851+ token.data.forEach(function(e) {
 852+ data[e.nodeName.toLowerCase()] = e.nodeValue;
 853+ });
 854+ token.data = [];
 855+ for(var k in data) {
 856+ token.data.push({nodeName: k, nodeValue: data[k]});
 857+ }
 858+ }
 859+ } else if(token.type == 'EndTag') {
 860+ if(token.data.length != 0) this.parse_error('attributes-in-end-tag');
 861+ token.name = token.name.toLowerCase();
 862+ }
 863+
 864+ return token;
 865+}
 866+
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/tokenizer.js
___________________________________________________________________
Added: svn:eol-style
1867 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/constants.js
@@ -0,0 +1,1101 @@
 2+var HTML5 = require('../html5');
 3+
 4+HTML5.CONTENT_MODEL_FLAGS = [
 5+ 'PCDATA',
 6+ 'RCDATA',
 7+ 'CDATA',
 8+ 'SCRIPT_CDATA',
 9+ 'PLAINTEXT'
 10+];
 11+
 12+HTML5.Marker = {type: 'Marker', data: 'this is a marker token'};
 13+
 14+HTML5.EOF = -1
 15+HTML5.EOF_TOK = {type: 'EOF', data: 'End of File' };
 16+HTML5.DRAIN = -2;
 17+
 18+HTML5.SCOPING_ELEMENTS = [
 19+ 'applet',
 20+ 'button',
 21+ 'caption',
 22+ 'html',
 23+ 'marquee',
 24+ 'object',
 25+ 'table',
 26+ 'td',
 27+ 'th'
 28+];
 29+HTML5.FORMATTING_ELEMENTS = [
 30+ 'a',
 31+ 'b',
 32+ 'big',
 33+ 'code',
 34+ 'em',
 35+ 'font',
 36+ 'i',
 37+ 'nobr',
 38+ 's',
 39+ 'small',
 40+ 'strike',
 41+ 'strong',
 42+ 'tt',
 43+ 'u'
 44+];
 45+HTML5.SPECIAL_ELEMENTS = [
 46+ 'address',
 47+ 'area',
 48+ 'base',
 49+ 'basefont',
 50+ 'bgsound',
 51+ 'blockquote',
 52+ 'body',
 53+ 'br',
 54+ 'center',
 55+ 'col',
 56+ 'colgroup',
 57+ 'dd',
 58+ 'dir',
 59+ 'div',
 60+ 'dl',
 61+ 'dt',
 62+ 'embed',
 63+ 'fieldset',
 64+ 'form',
 65+ 'frame',
 66+ 'frameset',
 67+ 'h1',
 68+ 'h2',
 69+ 'h3',
 70+ 'h4',
 71+ 'h5',
 72+ 'h6',
 73+ 'head',
 74+ 'hr',
 75+ 'iframe',
 76+ 'image',
 77+ 'img',
 78+ 'input',
 79+ 'isindex',
 80+ 'li',
 81+ 'link',
 82+ 'listing',
 83+ 'menu',
 84+ 'meta',
 85+ 'noembed',
 86+ 'noframes',
 87+ 'noscript',
 88+ 'ol',
 89+ 'optgroup',
 90+ 'option',
 91+ 'p',
 92+ 'param',
 93+ 'plaintext',
 94+ 'pre',
 95+ 'script',
 96+ 'select',
 97+ 'spacer',
 98+ 'style',
 99+ 'tbody',
 100+ 'textarea',
 101+ 'tfoot',
 102+ 'thead',
 103+ 'title',
 104+ 'tr',
 105+ 'ul',
 106+ 'wbr'
 107+];
 108+HTML5.SPACE_CHARACTERS_IN = "\t\n\x0B\x0C\x20\u0012\r";
 109+HTML5.SPACE_CHARACTERS = "[\t\n\x0B\x0C\x20\r]";
 110+HTML5.SPACE_CHARACTERS_R = /^[\t\n\x0B\x0C \r]/;
 111+
 112+HTML5.TABLE_INSERT_MODE_ELEMENTS = [
 113+ 'table',
 114+ 'tbody',
 115+ 'tfoot',
 116+ 'thead',
 117+ 'tr'
 118+];
 119+
 120+HTML5.ASCII_LOWERCASE = 'abcdefghijklmnopqrstuvwxyz';
 121+HTML5.ASCII_UPPERCASE = HTML5.ASCII_LOWERCASE.toUpperCase();
 122+HTML5.ASCII_LETTERS = "[a-zA-Z]";
 123+HTML5.ASCII_LETTERS_R = /^[a-zA-Z]/;
 124+HTML5.DIGITS = '0123456789';
 125+HTML5.DIGITS_R = new RegExp('^[0123456789]');
 126+HTML5.HEX_DIGITS = HTML5.DIGITS + 'abcdefABCDEF';
 127+HTML5.HEX_DIGITS_R = new RegExp('^[' + HTML5.DIGITS + 'abcdefABCDEF' +']' );
 128+
 129+// Heading elements need to be ordered
 130+HTML5.HEADING_ELEMENTS = [
 131+ 'h1',
 132+ 'h2',
 133+ 'h3',
 134+ 'h4',
 135+ 'h5',
 136+ 'h6'
 137+];
 138+
 139+HTML5.VOID_ELEMENTS = [
 140+ 'base',
 141+ 'link',
 142+ 'meta',
 143+ 'hr',
 144+ 'br',
 145+ 'img',
 146+ 'embed',
 147+ 'param',
 148+ 'area',
 149+ 'col',
 150+ 'input'
 151+];
 152+
 153+HTML5.CDATA_ELEMENTS = [
 154+ 'title',
 155+ 'textarea'
 156+];
 157+
 158+HTML5.RCDATA_ELEMENTS = [
 159+ 'style',
 160+ 'script',
 161+ 'xmp',
 162+ 'iframe',
 163+ 'noembed',
 164+ 'noframes',
 165+ 'noscript'
 166+];
 167+
 168+HTML5.BOOLEAN_ATTRIBUTES = {
 169+ '_global': ['irrelevant'],
 170+ // Fixme?
 171+ 'style': ['scoped'],
 172+ 'img': ['ismap'],
 173+ 'audio': ['autoplay', 'controls'],
 174+ 'video': ['autoplay', 'controls'],
 175+ 'script': ['defer', 'async'],
 176+ 'details': ['open'],
 177+ 'datagrid': ['multiple', 'disabled'],
 178+ 'command': ['hidden', 'disabled', 'checked', 'default'],
 179+ 'menu': ['autosubmit'],
 180+ 'fieldset': ['disabled', 'readonly'],
 181+ 'option': ['disabled', 'readonly', 'selected'],
 182+ 'optgroup': ['disabled', 'readonly'],
 183+ 'button': ['disabled', 'autofocus'],
 184+ 'input': ['disabled', 'readonly', 'required', 'autofocus', 'checked', 'ismap'],
 185+ 'select': ['disabled', 'readonly', 'autofocus', 'multiple'],
 186+ 'output': ['disabled', 'readonly']
 187+}
 188+
 189+// entitiesWindows1252 has to be _ordered_ and needs to have an index.
 190+HTML5.ENTITIES_WINDOWS1252 = [
 191+ 8364, // 0x80 0x20AC EURO SIGN
 192+ 65533, // 0x81 UNDEFINED
 193+ 8218, // 0x82 0x201A SINGLE LOW-9 QUOTATION MARK
 194+ 402, // 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK
 195+ 8222, // 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK
 196+ 8230, // 0x85 0x2026 HORIZONTAL ELLIPSIS
 197+ 8224, // 0x86 0x2020 DAGGER
 198+ 8225, // 0x87 0x2021 DOUBLE DAGGER
 199+ 710, // 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT
 200+ 8240, // 0x89 0x2030 PER MILLE SIGN
 201+ 352, // 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON
 202+ 8249, // 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
 203+ 338, // 0x8C 0x0152 LATIN CAPITAL LIGATURE OE
 204+ 65533, // 0x8D UNDEFINED
 205+ 381, // 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON
 206+ 65533, // 0x8F UNDEFINED
 207+ 65533, // 0x90 UNDEFINED
 208+ 8216, // 0x91 0x2018 LEFT SINGLE QUOTATION MARK
 209+ 8217, // 0x92 0x2019 RIGHT SINGLE QUOTATION MARK
 210+ 8220, // 0x93 0x201C LEFT DOUBLE QUOTATION MARK
 211+ 8221, // 0x94 0x201D RIGHT DOUBLE QUOTATION MARK
 212+ 8226, // 0x95 0x2022 BULLET
 213+ 8211, // 0x96 0x2013 EN DASH
 214+ 8212, // 0x97 0x2014 EM DASH
 215+ 732, // 0x98 0x02DC SMALL TILDE
 216+ 8482, // 0x99 0x2122 TRADE MARK SIGN
 217+ 353, // 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON
 218+ 8250, // 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
 219+ 339, // 0x9C 0x0153 LATIN SMALL LIGATURE OE
 220+ 65533, // 0x9D UNDEFINED
 221+ 382, // 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON
 222+ 376 // 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
 223+];
 224+
 225+HTML5.ENTITIES = {
 226+ 'AElig': "\u00C6",
 227+ 'AElig;': "\u00C6",
 228+ 'AMP': '&',
 229+ 'AMP;': '&',
 230+ 'Aacute': "\u00C1",
 231+ 'Aacute;': "\u00C1",
 232+ 'Acirc': "\u00C2",
 233+ 'Acirc;': "\u00C2",
 234+ 'Agrave': "\u00C0",
 235+ 'Agrave;': "\u00C0",
 236+ 'Alpha;': "\u0391",
 237+ 'Aring': "\u00C5",
 238+ 'Aring;': "\u00C5",
 239+ 'Atilde': "\u00C3",
 240+ 'Atilde;': "\u00C3",
 241+ 'Auml': "\u00C4",
 242+ 'Auml;': "\u00C4",
 243+ 'Beta;': "\u0392",
 244+ 'COPY': "\u00A9",
 245+ 'COPY;': "\u00A9",
 246+ 'Ccedil': "\u00C7",
 247+ 'Ccedil;': "\u00C7",
 248+ 'Chi;': "\u03A7",
 249+ 'Dagger;': "\u2021",
 250+ 'Delta;': "\u0394",
 251+ 'ETH': "\u00D0",
 252+ 'ETH;': "\u00D0",
 253+ 'Eacute': "\u00C9",
 254+ 'Eacute;': "\u00C9",
 255+ 'Ecirc': "\u00CA",
 256+ 'Ecirc;': "\u00CA",
 257+ 'Egrave': "\u00C8",
 258+ 'Egrave;': "\u00C8",
 259+ 'Epsilon;': "\u0395",
 260+ 'Eta;': "\u0397",
 261+ 'Euml': "\u00CB",
 262+ 'Euml;': "\u00CB",
 263+ 'GT': '>',
 264+ 'GT;': '>',
 265+ 'Gamma;': "\u0393",
 266+ 'Iacute': "\u00CD",
 267+ 'Iacute;': "\u00CD",
 268+ 'Icirc': "\u00CE",
 269+ 'Icirc;': "\u00CE",
 270+ 'Igrave': "\u00CC",
 271+ 'Igrave;': "\u00CC",
 272+ 'Iota;': "\u0399",
 273+ 'Iuml': "\u00CF",
 274+ 'Iuml;': "\u00CF",
 275+ 'Kappa;': "\u039A",
 276+ 'LT': '<',
 277+ 'LT;': '<',
 278+ 'Lambda;': "\u039B",
 279+ 'Mu;': "\u039C",
 280+ 'Ntilde': "\u00D1",
 281+ 'Ntilde;': "\u00D1",
 282+ 'Nu;': "\u039D",
 283+ 'OElig;': "\u0152",
 284+ 'Oacute': "\u00D3",
 285+ 'Oacute;': "\u00D3",
 286+ 'Ocirc': "\u00D4",
 287+ 'Ocirc;': "\u00D4",
 288+ 'Ograve': "\u00D2",
 289+ 'Ograve;': "\u00D2",
 290+ 'Omega;': "\u03A9",
 291+ 'Omicron;': "\u039F",
 292+ 'Oslash': "\u00D8",
 293+ 'Oslash;': "\u00D8",
 294+ 'Otilde': "\u00D5",
 295+ 'Otilde;': "\u00D5",
 296+ 'Ouml': "\u00D6",
 297+ 'Ouml;': "\u00D6",
 298+ 'Phi;': "\u03A6",
 299+ 'Pi;': "\u03A0",
 300+ 'Prime;': "\u2033",
 301+ 'Psi;': "\u03A8",
 302+ 'QUOT': '"',
 303+ 'QUOT;': '"',
 304+ 'REG': "\u00AE",
 305+ 'REG;': "\u00AE",
 306+ 'Rho;': "\u03A1",
 307+ 'Scaron;': "\u0160",
 308+ 'Sigma;': "\u03A3",
 309+ 'THORN': "\u00DE",
 310+ 'THORN;': "\u00DE",
 311+ 'TRADE;': "\u2122",
 312+ 'Tau;': "\u03A4",
 313+ 'Theta;': "\u0398",
 314+ 'Uacute': "\u00DA",
 315+ 'Ucirc': "\u00DB",
 316+ 'Ucirc;': "\u00DB",
 317+ 'Ugrave': "\u00D9",
 318+ 'Ugrave;': "\u00D9",
 319+ 'Upsilon;': "\u03A5",
 320+ 'Uuml': "\u00DC",
 321+ 'Uuml;': "\u00DC",
 322+ 'Xi;': "\u039E",
 323+ 'Yacute': "\u00DD",
 324+ 'Yacute;': "\u00DD",
 325+ 'Yuml;': "\u0178",
 326+ 'Zeta;': "\u0396",
 327+ 'aacute': "\u00E1",
 328+ 'aacute;': "\u00E1",
 329+ 'acirc': "\u00E2",
 330+ 'acirc;': "\u00E2",
 331+ 'acute': "\u00B4",
 332+ 'acute;': "\u00B4",
 333+ 'aelig': "\u00E6",
 334+ 'aelig;': "\u00E6",
 335+ 'agrave': "\u00E0",
 336+ 'agrave;': "\u00E0",
 337+ 'alefsym;': "\u2135",
 338+ 'alpha;': "\u03B1",
 339+ 'amp': '&',
 340+ 'amp;': '&',
 341+ 'and;': "\u2227",
 342+ 'ang;': "\u2220",
 343+ 'apos;': "'",
 344+ 'aring': "\u00E5",
 345+ 'aring;': "\u00E5",
 346+ 'asymp;': "\u2248",
 347+ 'atilde': "\u00E3",
 348+ 'atilde;': "\u00E3",
 349+ 'auml': "\u00E4",
 350+ 'auml;': "\u00E4",
 351+ 'bdquo;': "\u201E",
 352+ 'beta;': "\u03B2",
 353+ 'brvbar': "\u00A6",
 354+ 'brvbar;': "\u00A6",
 355+ 'bull;': "\u2022",
 356+ 'cap;': "\u2229",
 357+ 'ccedil': "\u00E7",
 358+ 'ccedil;': "\u00E7",
 359+ 'cedil': "\u00B8",
 360+ 'cent': "\u00A2",
 361+ 'cent;': "\u00A2",
 362+ 'chi;': "\u03C7",
 363+ 'circ;': "\u02C6",
 364+ 'clubs;': "\u2663",
 365+ 'cong;': "\u2245",
 366+ 'copy': "\u00A9",
 367+ 'copy;': "\u00A9",
 368+ 'crarr;': "\u21B5",
 369+ 'cup;': "\u222A",
 370+ 'curren': "\u00A4",
 371+ 'curren;': "\u00A4",
 372+ 'dArr;': "\u21D3",
 373+ 'dagger;': "\u2020",
 374+ 'darr;': "\u2193",
 375+ 'deg': "\u00B0",
 376+ 'deg;': "\u00B0",
 377+ 'delta;': "\u03B4",
 378+ 'diams;': "\u2666",
 379+ 'divide': "\u00F7",
 380+ 'divide;': "\u00F7",
 381+ 'eacute': "\u00E9",
 382+ 'eacute;': "\u00E9",
 383+ 'ecirc': "\u00EA",
 384+ 'ecirc;': "\u00EA",
 385+ 'egrave': "\u00E8",
 386+ 'egrave;': "\u00E8",
 387+ 'empty;': "\u2205",
 388+ 'emsp;': "\u2003",
 389+ 'ensp;': "\u2002",
 390+ 'epsilon;': "\u03B5",
 391+ 'equiv;': "\u2261",
 392+ 'eta;': "\u03B7",
 393+ 'eth': "\u00F0",
 394+ 'eth;': "\u00F0",
 395+ 'euml': "\u00EB",
 396+ 'euml;': "\u00EB",
 397+ 'euro;': "\u20AC",
 398+ 'exist;': "\u2203",
 399+ 'fnof;': "\u0192",
 400+ 'forall;': "\u2200",
 401+ 'frac12': "\u00BD",
 402+ 'frac12;': "\u00BD",
 403+ 'frac14': "\u00BC",
 404+ 'frac14;': "\u00BC",
 405+ 'frac34': "\u00BE",
 406+ 'frac34;': "\u00BE",
 407+ 'frasl;': "\u2044",
 408+ 'gamma;': "\u03B3",
 409+ 'ge;': "\u2265",
 410+ 'gt': '>',
 411+ 'gt;': '>',
 412+ 'hArr;': "\u21D4",
 413+ 'harr;': "\u2194",
 414+ 'hearts;': "\u2665",
 415+ 'hellip;': "\u2026",
 416+ 'iacute': "\u00ED",
 417+ 'iacute;': "\u00ED",
 418+ 'icirc': "\u00EE",
 419+ 'icirc;': "\u00EE",
 420+ 'iexcl': "\u00A1",
 421+ 'iexcl;': "\u00A1",
 422+ 'igrave': "\u00EC",
 423+ 'igrave;': "\u00EC",
 424+ 'image;': "\u2111",
 425+ 'infin;': "\u221E",
 426+ 'int;': "\u222B",
 427+ 'iota;': "\u03B9",
 428+ 'iquest': "\u00BF",
 429+ 'iquest;': "\u00BF",
 430+ 'isin;': "\u2208",
 431+ 'iuml': "\u00EF",
 432+ 'iuml;': "\u00EF",
 433+ 'kappa;': "\u03BA",
 434+ 'lArr;': "\u21D0",
 435+ 'lambda;': "\u03BB",
 436+ 'lang;': "\u27E8",
 437+ 'laquo': "\u00AB",
 438+ 'laquo;': "\u00AB",
 439+ 'larr;': "\u2190",
 440+ 'lceil;': "\u2308",
 441+ 'ldquo;': "\u201C",
 442+ 'le;': "\u2264",
 443+ 'lfloor;': "\u230A",
 444+ 'lowast;': "\u2217",
 445+ 'loz;': "\u25CA",
 446+ 'lrm;': "\u200E",
 447+ 'lsaquo;': "\u2039",
 448+ 'lsquo;': "\u2018",
 449+ 'lt': '<',
 450+ 'lt;': '<',
 451+ 'macr': "\u00AF",
 452+ 'macr;': "\u00AF",
 453+ 'mdash;': "\u2014",
 454+ 'micro': "\u00B5",
 455+ 'micro;': "\u00B5",
 456+ 'middot': "\u00B7",
 457+ 'middot;': "\u00B7",
 458+ 'minus;': "\u2212",
 459+ 'mu;': "\u03BC",
 460+ 'nabla;': "\u2207",
 461+ 'nbsp': "\u00A0",
 462+ 'nbsp;': "\u00A0",
 463+ 'ndash;': "\u2013",
 464+ 'ne;': "\u2260",
 465+ 'ni;': "\u220B",
 466+ 'not': "\u00AC",
 467+ 'not;': "\u00AC",
 468+ 'notin;': "\u2209",
 469+ 'nsub;': "\u2284",
 470+ 'ntilde': "\u00F1",
 471+ 'ntilde;': "\u00F1",
 472+ 'nu;': "\u03BD",
 473+ 'oacute': "\u00F3",
 474+ 'oacute;': "\u00F3",
 475+ 'ocirc': "\u00F4",
 476+ 'ocirc;': "\u00F4",
 477+ 'oelig;': "\u0153",
 478+ 'ograve': "\u00F2",
 479+ 'ograve;': "\u00F2",
 480+ 'oline;': "\u203E",
 481+ 'omega;': "\u03C9",
 482+ 'omicron;': "\u03BF",
 483+ 'oplus;': "\u2295",
 484+ 'or;': "\u2228",
 485+ 'ordf': "\u00AA",
 486+ 'ordf;': "\u00AA",
 487+ 'ordm': "\u00BA",
 488+ 'ordm;': "\u00BA",
 489+ 'oslash': "\u00F8",
 490+ 'oslash;': "\u00F8",
 491+ 'otilde': "\u00F5",
 492+ 'otilde;': "\u00F5",
 493+ 'otimes;': "\u2297",
 494+ 'ouml': "\u00F6",
 495+ 'ouml;': "\u00F6",
 496+ 'para': "\u00B6",
 497+ 'para;': "\u00B6",
 498+ 'part;': "\u2202",
 499+ 'permil;': "\u2030",
 500+ 'perp;': "\u22A5",
 501+ 'phi;': "\u03C6",
 502+ 'pi;': "\u03C0",
 503+ 'piv;': "\u03D6",
 504+ 'plusmn': "\u00B1",
 505+ 'plusmn;': "\u00B1",
 506+ 'pound': "\u00A3",
 507+ 'pound;': "\u00A3",
 508+ 'prime;': "\u2032",
 509+ 'prod;': "\u220F",
 510+ 'prop;': "\u221D",
 511+ 'psi;': "\u03C8",
 512+ 'quot': '"',
 513+ 'quot;': '"',
 514+ 'rArr;': "\u21D2",
 515+ 'radic;': "\u221A",
 516+ 'rang;': "\u27E9",
 517+ 'raquo': "\u00BB",
 518+ 'raquo;': "\u00BB",
 519+ 'rarr;': "\u2192",
 520+ 'rceil;': "\u2309",
 521+ 'rdquo;': "\u201D",
 522+ 'real;': "\u211C",
 523+ 'reg': "\u00AE",
 524+ 'reg;': "\u00AE",
 525+ 'rfloor;': "\u230B",
 526+ 'rho;': "\u03C1",
 527+ 'rlm;': "\u200F",
 528+ 'rsaquo;': "\u203A",
 529+ 'rsquo;': "\u2019",
 530+ 'sbquo;': "\u201A",
 531+ 'scaron;': "\u0161",
 532+ 'sdot;': "\u22C5",
 533+ 'sect': "\u00A7",
 534+ 'sect;': "\u00A7",
 535+ 'shy': "\u00AD",
 536+ 'shy;': "\u00AD",
 537+ 'sigma;': "\u03C3",
 538+ 'sigmaf;': "\u03C2",
 539+ 'sim;': "\u223C",
 540+ 'spades;': "\u2660",
 541+ 'sub;': "\u2282",
 542+ 'sube;': "\u2286",
 543+ 'sum;': "\u2211",
 544+ 'sup1': "\u00B9",
 545+ 'sup1;': "\u00B9",
 546+ 'sup2': "\u00B2",
 547+ 'sup2;': "\u00B2",
 548+ 'sup3': "\u00B3",
 549+ 'sup3;': "\u00B3",
 550+ 'sup;': "\u2283",
 551+ 'supe;': "\u2287",
 552+ 'szlig': "\u00DF",
 553+ 'szlig;': "\u00DF",
 554+ 'tau;': "\u03C4",
 555+ 'there4;': "\u2234",
 556+ 'theta;': "\u03B8",
 557+ 'thetasym;': "\u03D1",
 558+ 'thinsp;': "\u2009",
 559+ 'thorn': "\u00FE",
 560+ 'thorn;': "\u00FE",
 561+ 'tilde;': "\u02DC",
 562+ 'times': "\u00D7",
 563+ 'times;': "\u00D7",
 564+ 'trade;': "\u2122",
 565+ 'uArr;': "\u21D1",
 566+ 'uacute': "\u00FA",
 567+ 'uacute;': "\u00FA",
 568+ 'uarr;': "\u2191",
 569+ 'ucirc': "\u00FB",
 570+ 'ucirc;': "\u00FB",
 571+ 'ugrave': "\u00F9",
 572+ 'ugrave;': "\u00F9",
 573+ 'uml': "\u00A8",
 574+ 'uml;': "\u00A8",
 575+ 'upsih;': "\u03D2",
 576+ 'upsilon;': "\u03C5",
 577+ 'uuml': "\u00FC",
 578+ 'uuml;': "\u00FC",
 579+ 'weierp;': "\u2118",
 580+ 'xi;': "\u03BE",
 581+ 'yacute': "\u00FD",
 582+ 'yacute;': "\u00FD",
 583+ 'yen': "\u00A5",
 584+ 'yen;': "\u00A5",
 585+ 'yuml': "\u00FF",
 586+ 'yuml;': "\u00FF",
 587+ 'zeta;': "\u03B6",
 588+ 'zwj;': "\u200D",
 589+ 'zwnj;': "\u200C"
 590+}
 591+
 592+HTML5.ENCODINGS = [
 593+ 'ansi_x3.4-1968',
 594+ 'iso-ir-6',
 595+ 'ansi_x3.4-1986',
 596+ 'iso_646.irv:1991',
 597+ 'ascii',
 598+ 'iso646-us',
 599+ 'us-ascii',
 600+ 'us',
 601+ 'ibm367',
 602+ 'cp367',
 603+ 'csascii',
 604+ 'ks_c_5601-1987',
 605+ 'korean',
 606+ 'iso-2022-kr',
 607+ 'csiso2022kr',
 608+ 'euc-kr',
 609+ 'iso-2022-jp',
 610+ 'csiso2022jp',
 611+ 'iso-2022-jp-2',
 612+ '',
 613+ 'iso-ir-58',
 614+ 'chinese',
 615+ 'csiso58gb231280',
 616+ 'iso_8859-1:1987',
 617+ 'iso-ir-100',
 618+ 'iso_8859-1',
 619+ 'iso-8859-1',
 620+ 'latin1',
 621+ 'l1',
 622+ 'ibm819',
 623+ 'cp819',
 624+ 'csisolatin1',
 625+ 'iso_8859-2:1987',
 626+ 'iso-ir-101',
 627+ 'iso_8859-2',
 628+ 'iso-8859-2',
 629+ 'latin2',
 630+ 'l2',
 631+ 'csisolatin2',
 632+ 'iso_8859-3:1988',
 633+ 'iso-ir-109',
 634+ 'iso_8859-3',
 635+ 'iso-8859-3',
 636+ 'latin3',
 637+ 'l3',
 638+ 'csisolatin3',
 639+ 'iso_8859-4:1988',
 640+ 'iso-ir-110',
 641+ 'iso_8859-4',
 642+ 'iso-8859-4',
 643+ 'latin4',
 644+ 'l4',
 645+ 'csisolatin4',
 646+ 'iso_8859-6:1987',
 647+ 'iso-ir-127',
 648+ 'iso_8859-6',
 649+ 'iso-8859-6',
 650+ 'ecma-114',
 651+ 'asmo-708',
 652+ 'arabic',
 653+ 'csisolatinarabic',
 654+ 'iso_8859-7:1987',
 655+ 'iso-ir-126',
 656+ 'iso_8859-7',
 657+ 'iso-8859-7',
 658+ 'elot_928',
 659+ 'ecma-118',
 660+ 'greek',
 661+ 'greek8',
 662+ 'csisolatingreek',
 663+ 'iso_8859-8:1988',
 664+ 'iso-ir-138',
 665+ 'iso_8859-8',
 666+ 'iso-8859-8',
 667+ 'hebrew',
 668+ 'csisolatinhebrew',
 669+ 'iso_8859-5:1988',
 670+ 'iso-ir-144',
 671+ 'iso_8859-5',
 672+ 'iso-8859-5',
 673+ 'cyrillic',
 674+ 'csisolatincyrillic',
 675+ 'iso_8859-9:1989',
 676+ 'iso-ir-148',
 677+ 'iso_8859-9',
 678+ 'iso-8859-9',
 679+ 'latin5',
 680+ 'l5',
 681+ 'csisolatin5',
 682+ 'iso-8859-10',
 683+ 'iso-ir-157',
 684+ 'l6',
 685+ 'iso_8859-10:1992',
 686+ 'csisolatin6',
 687+ 'latin6',
 688+ 'hp-roman8',
 689+ 'roman8',
 690+ 'r8',
 691+ 'ibm037',
 692+ 'cp037',
 693+ 'csibm037',
 694+ 'ibm424',
 695+ 'cp424',
 696+ 'csibm424',
 697+ 'ibm437',
 698+ 'cp437',
 699+ '437',
 700+ 'cspc8codepage437',
 701+ 'ibm500',
 702+ 'cp500',
 703+ 'csibm500',
 704+ 'ibm775',
 705+ 'cp775',
 706+ 'cspc775baltic',
 707+ 'ibm850',
 708+ 'cp850',
 709+ '850',
 710+ 'cspc850multilingual',
 711+ 'ibm852',
 712+ 'cp852',
 713+ '852',
 714+ 'cspcp852',
 715+ 'ibm855',
 716+ 'cp855',
 717+ '855',
 718+ 'csibm855',
 719+ 'ibm857',
 720+ 'cp857',
 721+ '857',
 722+ 'csibm857',
 723+ 'ibm860',
 724+ 'cp860',
 725+ '860',
 726+ 'csibm860',
 727+ 'ibm861',
 728+ 'cp861',
 729+ '861',
 730+ 'cp-is',
 731+ 'csibm861',
 732+ 'ibm862',
 733+ 'cp862',
 734+ '862',
 735+ 'cspc862latinhebrew',
 736+ 'ibm863',
 737+ 'cp863',
 738+ '863',
 739+ 'csibm863',
 740+ 'ibm864',
 741+ 'cp864',
 742+ 'csibm864',
 743+ 'ibm865',
 744+ 'cp865',
 745+ '865',
 746+ 'csibm865',
 747+ 'ibm866',
 748+ 'cp866',
 749+ '866',
 750+ 'csibm866',
 751+ 'ibm869',
 752+ 'cp869',
 753+ '869',
 754+ 'cp-gr',
 755+ 'csibm869',
 756+ 'ibm1026',
 757+ 'cp1026',
 758+ 'csibm1026',
 759+ 'koi8-r',
 760+ 'cskoi8r',
 761+ 'koi8-u',
 762+ 'big5-hkscs',
 763+ 'ptcp154',
 764+ 'csptcp154',
 765+ 'pt154',
 766+ 'cp154',
 767+ 'utf-7',
 768+ 'utf-16be',
 769+ 'utf-16le',
 770+ 'utf-16',
 771+ 'utf-8',
 772+ 'iso-8859-13',
 773+ 'iso-8859-14',
 774+ 'iso-ir-199',
 775+ 'iso_8859-14:1998',
 776+ 'iso_8859-14',
 777+ 'latin8',
 778+ 'iso-celtic',
 779+ 'l8',
 780+ 'iso-8859-15',
 781+ 'iso_8859-15',
 782+ 'iso-8859-16',
 783+ 'iso-ir-226',
 784+ 'iso_8859-16:2001',
 785+ 'iso_8859-16',
 786+ 'latin10',
 787+ 'l10',
 788+ 'gbk',
 789+ 'cp936',
 790+ 'ms936',
 791+ 'gb18030',
 792+ 'shift_jis',
 793+ 'ms_kanji',
 794+ 'csshiftjis',
 795+ 'euc-jp',
 796+ 'gb2312',
 797+ 'big5',
 798+ 'csbig5',
 799+ 'windows-1250',
 800+ 'windows-1251',
 801+ 'windows-1252',
 802+ 'windows-1253',
 803+ 'windows-1254',
 804+ 'windows-1255',
 805+ 'windows-1256',
 806+ 'windows-1257',
 807+ 'windows-1258',
 808+ 'tis-620',
 809+ 'hz-gb-2312'
 810+];
 811+
 812+HTML5.E = {
 813+ "null-character":
 814+ "Null character in input stream, replaced with U+FFFD.",
 815+ "incorrectly-placed-solidus":
 816+ "Solidus (/) incorrectly placed in tag.",
 817+ "incorrect-cr-newline-entity":
 818+ "Incorrect CR newline entity, replaced with LF.",
 819+ "illegal-windows-1252-entity":
 820+ "Entity used with illegal number (windows-1252 reference).",
 821+ "cant-convert-numeric-entity":
 822+ "Numeric entity couldn't be converted to character " +
 823+ "(codepoint U+%(charAsInt)08x).",
 824+ "illegal-codepoint-for-numeric-entity":
 825+ "Numeric entity represents an illegal codepoint=> " +
 826+ "U+%(charAsInt)08x.",
 827+ "numeric-entity-without-semicolon":
 828+ "Numeric entity didn't end with ';'.",
 829+ "expected-numeric-entity-but-got-eof":
 830+ "Numeric entity expected. Got end of file instead.",
 831+ "expected-numeric-entity":
 832+ "Numeric entity expected but none found.",
 833+ "named-entity-without-semicolon":
 834+ "Named entity didn't end with ';'.",
 835+ "expected-named-entity":
 836+ "Named entity expected. Got none.",
 837+ "attributes-in-end-tag":
 838+ "End tag contains unexpected attributes.",
 839+ "expected-tag-name-but-got-right-bracket":
 840+ "Expected tag name. Got '>' instead.",
 841+ "expected-tag-name-but-got-question-mark":
 842+ "Expected tag name. Got '?' instead. (HTML doesn't " +
 843+ "support processing instructions.)",
 844+ "expected-tag-name":
 845+ "Expected tag name. Got something else instead",
 846+ "expected-closing-tag-but-got-right-bracket":
 847+ "Expected closing tag. Got '>' instead. Ignoring '</>'.",
 848+ "expected-closing-tag-but-got-eof":
 849+ "Expected closing tag. Unexpected end of file.",
 850+ "expected-closing-tag-but-got-char":
 851+ "Expected closing tag. Unexpected character '%(data)' found.",
 852+ "eof-in-tag-name":
 853+ "Unexpected end of file in the tag name.",
 854+ "expected-attribute-name-but-got-eof":
 855+ "Unexpected end of file. Expected attribute name instead.",
 856+ "eof-in-attribute-name":
 857+ "Unexpected end of file in attribute name.",
 858+ "duplicate-attribute":
 859+ "Dropped duplicate attribute on tag.",
 860+ "expected-end-of-tag-name-but-got-eof":
 861+ "Unexpected end of file. Expected = or end of tag.",
 862+ "expected-attribute-value-but-got-eof":
 863+ "Unexpected end of file. Expected attribute value.",
 864+ "eof-in-attribute-value-double-quote":
 865+ "Unexpected end of file in attribute value (\").",
 866+ "eof-in-attribute-value-single-quote":
 867+ "Unexpected end of file in attribute value (').",
 868+ "eof-in-attribute-value-no-quotes":
 869+ "Unexpected end of file in attribute value.",
 870+ "expected-dashes-or-doctype":
 871+ "Expected '--' or 'DOCTYPE'. Not found.",
 872+ "incorrect-comment":
 873+ "Incorrect comment.",
 874+ "eof-in-comment":
 875+ "Unexpected end of file in comment.",
 876+ "eof-in-comment-end-dash":
 877+ "Unexpected end of file in comment (-)",
 878+ "unexpected-dash-after-double-dash-in-comment":
 879+ "Unexpected '-' after '--' found in comment.",
 880+ "eof-in-comment-double-dash":
 881+ "Unexpected end of file in comment (--).",
 882+ "unexpected-char-in-comment":
 883+ "Unexpected character in comment found.",
 884+ "need-space-after-doctype":
 885+ "No space after literal string 'DOCTYPE'.",
 886+ "expected-doctype-name-but-got-right-bracket":
 887+ "Unexpected > character. Expected DOCTYPE name.",
 888+ "expected-doctype-name-but-got-eof":
 889+ "Unexpected end of file. Expected DOCTYPE name.",
 890+ "eof-in-doctype-name":
 891+ "Unexpected end of file in DOCTYPE name.",
 892+ "eof-in-doctype":
 893+ "Unexpected end of file in DOCTYPE.",
 894+ "expected-space-or-right-bracket-in-doctype":
 895+ "Expected space or '>'. Got '%(data)'",
 896+ "unexpected-end-of-doctype":
 897+ "Unexpected end of DOCTYPE.",
 898+ "unexpected-char-in-doctype":
 899+ "Unexpected character in DOCTYPE.",
 900+ "eof-in-bogus-doctype":
 901+ "Unexpected end of file in bogus doctype.",
 902+ "eof-in-innerhtml":
 903+ "Unexpected EOF in inner html mode.",
 904+ "unexpected-doctype":
 905+ "Unexpected DOCTYPE. Ignored.",
 906+ "non-html-root":
 907+ "html needs to be the first start tag.",
 908+ "expected-doctype-but-got-eof":
 909+ "Unexpected End of file. Expected DOCTYPE.",
 910+ "unknown-doctype":
 911+ "Erroneous DOCTYPE.",
 912+ "expected-doctype-but-got-chars":
 913+ "Unexpected non-space characters. Expected DOCTYPE.",
 914+ "expected-doctype-but-got-start-tag":
 915+ "Unexpected start tag (%(name)). Expected DOCTYPE.",
 916+ "expected-doctype-but-got-end-tag":
 917+ "Unexpected end tag (%(name)). Expected DOCTYPE.",
 918+ "end-tag-after-implied-root":
 919+ "Unexpected end tag (%(name)) after the (implied) root element.",
 920+ "expected-named-closing-tag-but-got-eof":
 921+ "Unexpected end of file. Expected end tag (%(name)).",
 922+ "two-heads-are-not-better-than-one":
 923+ "Unexpected start tag head in existing head. Ignored.",
 924+ "unexpected-end-tag":
 925+ "Unexpected end tag (%(name)). Ignored.",
 926+ "unexpected-start-tag-out-of-my-head":
 927+ "Unexpected start tag (%(name)) that can be in head. Moved.",
 928+ "unexpected-start-tag":
 929+ "Unexpected start tag (%(name)).",
 930+ "missing-end-tag":
 931+ "Missing end tag (%(name)).",
 932+ "missing-end-tags":
 933+ "Missing end tags (%(name)).",
 934+ "unexpected-start-tag-implies-end-tag":
 935+ "Unexpected start tag (%(startName)) " +
 936+ "implies end tag (%(endName)).",
 937+ "unexpected-start-tag-treated-as":
 938+ "Unexpected start tag (%(originalName)). Treated as %(newName).",
 939+ "deprecated-tag":
 940+ "Unexpected start tag %(name). Don't use it!",
 941+ "unexpected-start-tag-ignored":
 942+ "Unexpected start tag %(name). Ignored.",
 943+ "expected-one-end-tag-but-got-another":
 944+ "Unexpected end tag (%(gotName). " +
 945+ "Missing end tag (%(expectedName)).",
 946+ "end-tag-too-early":
 947+ "End tag (%(name)) seen too early. Expected other end tag.",
 948+ "end-tag-too-early-named":
 949+ "Unexpected end tag (%(gotName)). Expected end tag (%(expectedName).",
 950+ "end-tag-too-early-ignored":
 951+ "End tag (%(name)) seen too early. Ignored.",
 952+ "adoption-agency-1.1":
 953+ "End tag (%(name) violates step 1, " +
 954+ "paragraph 1 of the adoption agency algorithm.",
 955+ "adoption-agency-1.2":
 956+ "End tag (%(name) violates step 1, " +
 957+ "paragraph 2 of the adoption agency algorithm.",
 958+ "adoption-agency-1.3":
 959+ "End tag (%(name) violates step 1, " +
 960+ "paragraph 3 of the adoption agency algorithm.",
 961+ "unexpected-end-tag-treated-as":
 962+ "Unexpected end tag (%(originalName)). Treated as %(newName).",
 963+ "no-end-tag":
 964+ "This element (%(name)) has no end tag.",
 965+ "unexpected-implied-end-tag-in-table":
 966+ "Unexpected implied end tag (%(name)) in the table phase.",
 967+ "unexpected-implied-end-tag-in-table-body":
 968+ "Unexpected implied end tag (%(name)) in the table body phase.",
 969+ "unexpected-char-implies-table-voodoo":
 970+ "Unexpected non-space characters in " +
 971+ "table context caused voodoo mode.",
 972+ "unpexted-hidden-input-in-table":
 973+ "Unexpected input with type hidden in table context.",
 974+ "unexpected-start-tag-implies-table-voodoo":
 975+ "Unexpected start tag (%(name)) in " +
 976+ "table context caused voodoo mode.",
 977+ "unexpected-end-tag-implies-table-voodoo":
 978+ "Unexpected end tag (%(name)) in " +
 979+ "table context caused voodoo mode.",
 980+ "unexpected-cell-in-table-body":
 981+ "Unexpected table cell start tag (%(name)) " +
 982+ "in the table body phase.",
 983+ "unexpected-cell-end-tag":
 984+ "Got table cell end tag (%(name)) " +
 985+ "while required end tags are missing.",
 986+ "unexpected-end-tag-in-table-body":
 987+ "Unexpected end tag (%(name)) in the table body phase. Ignored.",
 988+ "unexpected-implied-end-tag-in-table-row":
 989+ "Unexpected implied end tag (%(name)) in the table row phase.",
 990+ "unexpected-end-tag-in-table-row":
 991+ "Unexpected end tag (%(name)) in the table row phase. Ignored.",
 992+ "unexpected-select-in-select":
 993+ "Unexpected select start tag in the select phase " +
 994+ "treated as select end tag.",
 995+ "unexpected-input-in-select":
 996+ "Unexpected input start tag in the select phase.",
 997+ "unexpected-start-tag-in-select":
 998+ "Unexpected start tag token (%(name)) in the select phase. " +
 999+ "Ignored.",
 1000+ "unexpected-end-tag-in-select":
 1001+ "Unexpected end tag (%(name)) in the select phase. Ignored.",
 1002+ "unexpected-table-element-start-tag-in-select-in-table":
 1003+ "Unexpected table element start tag (%(name))s in the select in table phase.",
 1004+ "unexpected-table-element-end-tag-in-select-in-table":
 1005+ "Unexpected table element end tag (%(name))s in the select in table phase.",
 1006+ "unexpected-char-after-body":
 1007+ "Unexpected non-space characters in the after body phase.",
 1008+ "unexpected-start-tag-after-body":
 1009+ "Unexpected start tag token (%(name))" +
 1010+ "in the after body phase.",
 1011+ "unexpected-end-tag-after-body":
 1012+ "Unexpected end tag token (%(name))" +
 1013+ " in the after body phase.",
 1014+ "unexpected-char-in-frameset":
 1015+ "Unepxected characters in the frameset phase. Characters ignored.",
 1016+ "unexpected-start-tag-in-frameset":
 1017+ "Unexpected start tag token (%(name))" +
 1018+ " in the frameset phase. Ignored.",
 1019+ "unexpected-frameset-in-frameset-innerhtml":
 1020+ "Unexpected end tag token (frameset " +
 1021+ "in the frameset phase (innerHTML).",
 1022+ "unexpected-end-tag-in-frameset":
 1023+ "Unexpected end tag token (%(name))" +
 1024+ " in the frameset phase. Ignored.",
 1025+ "unexpected-char-after-frameset":
 1026+ "Unexpected non-space characters in the " +
 1027+ "after frameset phase. Ignored.",
 1028+ "unexpected-start-tag-after-frameset":
 1029+ "Unexpected start tag (%(name))" +
 1030+ " in the after frameset phase. Ignored.",
 1031+ "unexpected-end-tag-after-frameset":
 1032+ "Unexpected end tag (%(name))" +
 1033+ " in the after frameset phase. Ignored.",
 1034+ "expected-eof-but-got-char":
 1035+ "Unexpected non-space characters. Expected end of file.",
 1036+ "expected-eof-but-got-char":
 1037+ "Unexpected non-space characters. Expected end of file.",
 1038+ "expected-eof-but-got-start-tag":
 1039+ "Unexpected start tag (%(name))" +
 1040+ ". Expected end of file.",
 1041+ "expected-eof-but-got-end-tag":
 1042+ "Unexpected end tag (%(name))" +
 1043+ ". Expected end of file.",
 1044+ "unexpected-end-table-in-caption":
 1045+ "Unexpected end table tag in caption. Generates implied end caption.",
 1046+ "end-html-in-innerhtml":
 1047+ "Unexpected html end tag in inner html mode.",
 1048+ "expected-self-closing-tag":
 1049+ "Expected a > after the /.",
 1050+ "self-closing-end-tag":
 1051+ "Self closing end tag.",
 1052+ "eof-in-table":
 1053+ "Unexpected end of file. Expected table content.",
 1054+ "html-in-foreign-content":
 1055+ "HTML start tag \"%(name)\" in a foreign namespace context.",
 1056+ "unexpected-start-tag-in-table":
 1057+ "Unexpected %(name). Expected table content."
 1058+};
 1059+
 1060+HTML5.Models = {PCDATA: 0, RCDATA: 1, CDATA: 2, SCRIPT_CDATA: 3};
 1061+
 1062+HTML5.PHASES = PHASES = {
 1063+ initial: require('./parser/initial_phase').Phase,
 1064+ beforeHTML: require('./parser/before_html_phase').Phase,
 1065+ beforeHead: require('./parser/before_head_phase').Phase,
 1066+ inHead: require('./parser/in_head_phase').Phase,
 1067+ afterHead: require('./parser/after_head_phase').Phase,
 1068+ inBody: require('./parser/in_body_phase').Phase,
 1069+ inTable: require('./parser/in_table_phase').Phase,
 1070+ inCaption: require('./parser/in_caption_phase').Phase,
 1071+ inColumnGroup: require('./parser/in_column_group_phase').Phase,
 1072+ inTableBody: require('./parser/in_table_body_phase').Phase,
 1073+ inRow: require('./parser/in_row_phase').Phase,
 1074+ inCell: require('./parser/in_cell_phase').Phase,
 1075+ inSelect: require('./parser/in_select_phase').Phase,
 1076+ inSelectInTable: require('./parser/in_select_in_table_phase').Phase,
 1077+ afterBody: require('./parser/after_body_phase').Phase,
 1078+ inFrameset: require('./parser/in_frameset_phase').Phase,
 1079+ afterFrameset: require('./parser/after_frameset_phase').Phase,
 1080+ afterAfterBody: require('./parser/after_after_body_phase').Phase,
 1081+ afterAfterFrameset: require('./parser/after_after_frameset_phase').Phase,
 1082+ inForeignContent: require('./parser/in_foreign_content_phase').Phase,
 1083+ trailingEnd: require('./parser/trailing_end_phase').Phase,
 1084+ rootElement: require('./parser/root_element_phase').Phase,
 1085+};
 1086+
 1087+HTML5.TAGMODES = {
 1088+ select: 'inSelect',
 1089+ td: 'inCell',
 1090+ th: 'inCell',
 1091+ tr: 'inRow',
 1092+ tbody: 'inTableBody',
 1093+ thead: 'inTableBody',
 1094+ tfoot: 'inTableBody',
 1095+ caption: 'inCaption',
 1096+ colgroup: 'inColumnGroup',
 1097+ table: 'inTable',
 1098+ head: 'inBody',
 1099+ body: 'inBody',
 1100+ frameset: 'inFrameset'
 1101+};
 1102+
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/constants.js
___________________________________________________________________
Added: svn:eol-style
11103 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser.js
@@ -0,0 +1,226 @@
 2+"use strict";
 3+
 4+var HTML5 = exports.HTML5 = require('../html5');
 5+
 6+var events = require('events');
 7+
 8+require('./treebuilder');
 9+require('../mediawiki.html5TokenEmitter');
 10+
 11+var Phase = require('./parser/phase').Phase;
 12+
 13+var Parser = HTML5.Parser = function HTML5Parser(options) {
 14+ events.EventEmitter.apply(this);
 15+ this.strict = false;
 16+ this.errors = [];
 17+ var phase;
 18+
 19+ this.__defineSetter__('phase', function(p) {
 20+ phase = p;
 21+ if(!p) throw( new Error("Can't leave phase undefined"));
 22+ if(!p instanceof Function) throw( new Error("Not a function"));
 23+ });
 24+
 25+ this.__defineGetter__('phase', function() {
 26+ return phase;
 27+ });
 28+
 29+ if(options) for(o in options) {
 30+ this[o] = options[o];
 31+ }
 32+
 33+ if(!this.document) {
 34+ var l3, jsdom
 35+ jsdom = require('jsdom')
 36+ l3 = jsdom.dom.level3.core
 37+ var DOM = jsdom.browserAugmentation(l3)
 38+ this.document = new DOM.Document('html');
 39+ }
 40+
 41+ this.tree = new HTML5.TreeBuilder(this.document);
 42+}
 43+
 44+Parser.prototype = new events.EventEmitter;
 45+
 46+Parser.prototype.parse = function(tokenizer) {
 47+ this.tokenizer = tokenizer;
 48+ this.setup();
 49+ //this.tokenizer.tokenize();
 50+}
 51+
 52+Parser.prototype.parse_fragment = function(source, element) {
 53+ HTML5.debug('parser.parse_fragment', source, element)
 54+ // FIXME: Check to make sure element is inside document
 55+ //this.tokenizer = new HTML5.Tokenizer(source, this.document);
 56+ if(element && element.ownerDocument) {
 57+ this.setup(element.tagName, null);
 58+ this.tree.open_elements.push(element);
 59+ this.tree.root_pointer = element;
 60+ } else if(element) {
 61+ this.setup(element, null);
 62+ this.tree.open_elements.push(this.tree.html_pointer);
 63+ this.tree.open_elements.push(this.tree.body_pointer);
 64+ this.tree.root_pointer = this.tree.body_pointer;
 65+ } else {
 66+ this.setup('div', null);
 67+ this.tree.open_elements.push(this.tree.html_pointer);
 68+ this.tree.open_elements.push(this.tree.body_pointer);
 69+ this.tree.root_pointer = this.tree.body_pointer;
 70+ }
 71+ //this.tokenizer.tokenize();
 72+}
 73+
 74+Object.defineProperty(Parser.prototype, 'fragment', {
 75+ get: function() {
 76+ return this.tree.getFragment();
 77+ }
 78+});
 79+
 80+Parser.prototype.newPhase = function(name) {
 81+ this.phase = new PHASES[name](this, this.tree);
 82+ HTML5.debug('parser.newPhase', name)
 83+ this.phaseName = name;
 84+}
 85+
 86+Parser.prototype.do_token = function(token) {
 87+ var method = 'process' + token.type;
 88+
 89+ switch(token.type) {
 90+ case 'Characters':
 91+ case 'SpaceCharacters':
 92+ case 'Comment':
 93+ this.phase[method](token.data);
 94+ break;
 95+ case 'StartTag':
 96+ if (token.name == "script") {
 97+ this.inScript = true;
 98+ this.scriptBuffer = '';
 99+ }
 100+ this.phase[method](token.name, token.data, token.self_closing);
 101+ break;
 102+ case 'EndTag':
 103+ this.phase[method](token.name);
 104+ if (token.name == "script") {
 105+ this.inScript = false;
 106+ }
 107+ break;
 108+ case 'Doctype':
 109+ this.phase[method](token.name, token.publicId, token.systemId, token.correct);
 110+ break;
 111+ case 'EOF':
 112+ this.phase[method]();
 113+ break;
 114+ default:
 115+ this.parse_error(token.data, token.datavars)
 116+ }
 117+}
 118+
 119+Parser.prototype.setup = function(container, encoding) {
 120+ this.tokenizer.addListener('token', function(t) {
 121+ return function(token) { t.do_token(token); };
 122+ }(this));
 123+ this.tokenizer.addListener('end', function(t) {
 124+ return function() { t.emit('end'); };
 125+ }(this));
 126+ this.emit('setup', this);
 127+
 128+ var inner_html = !!container;
 129+ container = container || 'div';
 130+
 131+ this.tree.reset();
 132+ this.first_start_tag = false;
 133+ this.errors = [];
 134+
 135+ // FIXME: instantiate tokenizer and plumb. Pass lowercasing options.
 136+
 137+ if(inner_html) {
 138+ this.inner_html = container.toLowerCase();
 139+ switch(this.inner_html) {
 140+ case 'title':
 141+ case 'textarea':
 142+ this.tokenizer.content_model = HTML5.Models.RCDATA;
 143+ break;
 144+ case 'script':
 145+ this.tokenizer.content_model = HTML5.Models.SCRIPT_CDATA;
 146+ break;
 147+ case 'style':
 148+ case 'xmp':
 149+ case 'iframe':
 150+ case 'noembed':
 151+ case 'noframes':
 152+ case 'noscript':
 153+ this.tokenizer.content_model = HTML5.Models.CDATA;
 154+ break;
 155+ case 'plaintext':
 156+ this.tokenizer.content_model = HTML5.Models.PLAINTEXT;
 157+ break;
 158+ default:
 159+ this.tokenizer.content_model = HTML5.Models.PCDATA;
 160+ }
 161+ this.tree.create_structure_elements(inner_html);
 162+ switch(inner_html) {
 163+ case 'html':
 164+ this.newPhase('afterHtml')
 165+ break;
 166+ case 'head':
 167+ this.newPhase('inHead')
 168+ break;
 169+ default:
 170+ this.newPhase('inBody')
 171+ }
 172+ this.reset_insertion_mode(this.inner_html);
 173+ } else {
 174+ this.inner_html = false;
 175+ this.newPhase('initial');
 176+ }
 177+
 178+ this.last_phase = null;
 179+
 180+}
 181+
 182+Parser.prototype.parse_error = function(code, data) {
 183+ // FIXME: this.errors.push([this.tokenizer.position, code, data]);
 184+ this.errors.push([code, data]);
 185+ if(this.strict) throw(this.errors.last());
 186+}
 187+
 188+Parser.prototype.reset_insertion_mode = function(context) {
 189+ var last = false;
 190+
 191+ var node_name;
 192+
 193+ for(var i = this.tree.open_elements.length - 1; i >= 0; i--) {
 194+ var node = this.tree.open_elements[i]
 195+ node_name = node.tagName.toLowerCase()
 196+ if(node == this.tree.open_elements[0]) {
 197+ last = true
 198+ if(node_name != 'th' && node_name != 'td') {
 199+ // XXX
 200+ // assert.ok(this.inner_html);
 201+ node_name = context.tagName;
 202+ }
 203+ }
 204+
 205+ if(!(node_name == 'select' || node_name == 'colgroup' || node_name == 'head' || node_name == 'frameset')) {
 206+ // XXX
 207+ // assert.ok(this.inner_html)
 208+ }
 209+
 210+
 211+ if(HTML5.TAGMODES[node_name]) {
 212+ this.newPhase(HTML5.TAGMODES[node_name]);
 213+ } else if(node_name == 'html') {
 214+ this.newPhase(this.tree.head_pointer ? 'afterHead' : 'beforeHead');
 215+ } else if(last) {
 216+ this.newPhase('inBody');
 217+ } else {
 218+ continue;
 219+ }
 220+
 221+ break;
 222+ }
 223+}
 224+
 225+Parser.prototype._ = function(str) {
 226+ return(str);
 227+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser.js
___________________________________________________________________
Added: svn:eol-style
1228 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/COPYING
@@ -0,0 +1,19 @@
 2+Copyright (c) 2010 Aria Stewart <aredridel@nbtsc.org>
 3+
 4+Permission is hereby granted, free of charge, to any person obtaining a copy
 5+of this software and associated documentation files (the "Software"), to deal
 6+in the Software without restriction, including without limitation the rights
 7+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8+copies of the Software, and to permit persons to whom the Software is
 9+furnished to do so, subject to the following conditions:
 10+
 11+The above copyright notice and this permission notice shall be included in
 12+all copies or substantial portions of the Software.
 13+
 14+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 19+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 20+THE SOFTWARE.
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/before_html_phase.js
@@ -0,0 +1,52 @@
 2+var Phase = require('./phase').Phase;
 3+var HTML5 = require('../../html5');
 4+
 5+exports.Phase = p = function BeforeHtmlPhase(parser, tree) {
 6+ Phase.call(this, parser, tree);
 7+ this.name = 'before_html_phase'
 8+}
 9+
 10+p.prototype = new Phase;
 11+
 12+p.prototype.processEOF = function() {
 13+ this.insert_html_element();
 14+ this.parser.phase.processEOF();
 15+}
 16+
 17+p.prototype.processComment = function(data) {
 18+ this.tree.insert_comment(data, this.tree.document);
 19+}
 20+
 21+p.prototype.processSpaceCharacters = function(data) {
 22+}
 23+
 24+p.prototype.processCharacters = function(data) {
 25+ this.insert_html_element();
 26+ this.parser.phase.processCharacters(data);
 27+}
 28+
 29+p.prototype.processStartTag = function(name, attributes, self_closing) {
 30+ if(name == 'html') this.parser.first_start_tag = true;
 31+ this.insert_html_element();
 32+ this.parser.phase.processStartTag(name, attributes);
 33+}
 34+
 35+p.prototype.processEndTag = function(name) {
 36+ this.insert_html_element();
 37+ this.parser.phase.processEndTag(name);
 38+}
 39+
 40+p.prototype.insert_html_element = function() {
 41+ var de
 42+ if(de = this.tree.document.documentElement) {
 43+ if(de.tagName != 'HTML')
 44+ HTML5.debug('parser.before_html_phase', 'Non-HTML root element!')
 45+ this.tree.open_elements.push(de)
 46+ while(de.childNodes.length >= 1) de.removeChild(de.firstChild)
 47+ } else {
 48+ var element = this.tree.createElement('html', []);
 49+ this.tree.open_elements.push(element);
 50+ this.tree.document.appendChild(element);
 51+ }
 52+ this.parser.newPhase('beforeHead');
 53+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/before_html_phase.js
___________________________________________________________________
Added: svn:eol-style
154 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/after_frameset_phase.js
@@ -0,0 +1,42 @@
 2+var Phase = require('./phase').Phase;
 3+var inBody = require('./in_body_phase').Phase;
 4+
 5+var start_tag_handlers = {
 6+ html: 'startTagHtml',
 7+ noframes: 'startTagNoframes',
 8+ '-default': 'startTagOther',
 9+}
 10+
 11+var end_tag_handlers = {
 12+ html: 'endTagHtml',
 13+ '-default': 'endTagOther',
 14+}
 15+
 16+exports.Phase = p = function AfterFramesetPhase(parser, tree) {
 17+ Phase.call(this, parser, tree);
 18+ this.start_tag_handlers = start_tag_handlers;
 19+ this.end_tag_handlers = end_tag_handlers;
 20+}
 21+
 22+p.prototype = new Phase;
 23+
 24+p.prototype.processCharacters = function(data) {
 25+ this.parse_error("unexpected-char-after-frameset");
 26+}
 27+
 28+p.prototype.startTagNoframes = function(name, attributes) {
 29+ new inBody(this.parser, this.tree).processStartTag(name, attributes);
 30+}
 31+
 32+p.prototype.startTagOther = function(name, attributes) {
 33+ this.parse_error("unexpected-start-tag-after-frameset", {name: name});
 34+}
 35+
 36+p.prototype.endTagHtml = function(name) {
 37+ this.parser.last_phase = this.parser.phase;
 38+ this.parser.newPhase('trailingEnd');
 39+}
 40+
 41+p.prototype.endTagOther = function(name) {
 42+ this.parse_error("unexpected-end-tag-after-frameset", {name: name});
 43+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/after_frameset_phase.js
___________________________________________________________________
Added: svn:eol-style
144 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_head_phase.js
@@ -0,0 +1,167 @@
 2+"use strict";
 3+var Phase = require('./phase').Phase;
 4+var HTML5 = require('../../html5');
 5+
 6+var start_tag_handlers = {
 7+ html: 'startTagHtml',
 8+ head: 'startTagHead',
 9+ title: 'startTagTitle',
 10+ type: 'startTagType',
 11+ style: 'startTagStyle',
 12+ script: 'startTagScript',
 13+ noscript: 'startTagNoScript',
 14+ base: 'startTagBaseLinkMeta',
 15+ link: 'startTagBaseLinkMeta',
 16+ meta: 'startTagBaseLinkMeta',
 17+ "-default": 'startTagOther',
 18+}
 19+
 20+var end_tag_handlers = {
 21+ head: 'endTagHead',
 22+ html: 'endTagImplyAfterHead',
 23+ body: 'endTagImplyAfterHead',
 24+ p: 'endTagImplyAfterHead',
 25+ br: 'endTagImplyAfterHead',
 26+ title: 'endTagTitleStyleScriptNoscript',
 27+ style: 'endTagTitleStyleScriptNoscript',
 28+ script: 'endTagTitleStyleScriptNoscript',
 29+ noscript: 'endTagTitleStyleScriptNoscript',
 30+ "-default": 'endTagOther',
 31+}
 32+
 33+exports.Phase = p = function InHeadPhase(parser, tree) {
 34+ Phase.call(this, parser, tree);
 35+ this.name = 'in_head_phase';
 36+ this.start_tag_handlers = start_tag_handlers;
 37+ this.end_tag_handlers = end_tag_handlers;
 38+}
 39+
 40+p.prototype = new Phase;
 41+
 42+p.prototype.processEOF = function() {
 43+ var name = this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase()
 44+ if(['title', 'style', 'script'].indexOf(name) != -1) {
 45+ this.parse_error("expected-named-closing-tag-but-got-eof", {name: name});
 46+ this.tree.pop_element();
 47+ }
 48+
 49+ this.anything_else();
 50+
 51+ this.parser.phase.processEOF();
 52+}
 53+
 54+p.prototype.processCharacters = function(data) {
 55+ var name = this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase()
 56+ if(['title', 'style', 'script', 'noscript'].indexOf(name) != -1) {
 57+ this.tree.insert_text(data);
 58+ } else {
 59+ this.anything_else();
 60+ this.parser.phase.processCharacters(data);
 61+ }
 62+}
 63+
 64+p.prototype.startTagHead = function(name, attributes) {
 65+ this.parse_error('two-heads-are-not-better-than-one');
 66+}
 67+
 68+p.prototype.startTagTitle = function(name, attributes) {
 69+ var element = this.tree.createElement(name, attributes);
 70+ this.appendToHead(element);
 71+ this.tree.open_elements.push(element);
 72+ this.parser.tokenizer.content_model = HTML5.Models.RCDATA;
 73+}
 74+
 75+p.prototype.startTagStyle = function(name, attributes) {
 76+ if(this.tree.head_pointer && this.parser.phaseName == 'inHead') {
 77+ var element = this.tree.createElement(name, attributes);
 78+ this.appendToHead(element);
 79+ this.tree.open_elements.push(element);
 80+ } else {
 81+ this.tree.insert_element(name, attributes);
 82+ }
 83+ this.parser.tokenizer.content_model = HTML5.Models.CDATA;
 84+}
 85+
 86+p.prototype.startTagNoScript = function(name, attributes) {
 87+ // XXX Need to decide whether to implement the scripting disabled case
 88+ var element = this.tree.createElement(name, attributes);
 89+ if(this.tree.head_pointer && this.parser.phaseName == 'inHead') {
 90+ this.appendToHead(element);
 91+ } else {
 92+ this.tree.open_elements[this.tree.open_elements.length - 1].appendChild(element);
 93+ }
 94+ this.tree.open_elements.push(element);
 95+ this.parser.tokenizer.content_model = HTML5.Models.CDATA;
 96+}
 97+
 98+p.prototype.startTagScript = function(name, attributes) {
 99+ // XXX Inner HTML case may be wrong
 100+ var element = this.tree.createElement(name, attributes);
 101+ //element.flags.push('parser-inserted');
 102+ if(this.tree.head_pointer && this.parser.phaseName == 'inHead') {
 103+ this.appendToHead(element);
 104+ } else {
 105+ this.tree.open_elements[this.tree.open_elements.length - 1].appendChild(element);
 106+ }
 107+ this.tree.open_elements.push(element);
 108+ this.parser.tokenizer.content_model = HTML5.Models.SCRIPT_CDATA;
 109+}
 110+
 111+p.prototype.startTagBaseLinkMeta = function(name, attributes) {
 112+ var element = this.tree.createElement(name, attributes);
 113+ if(this.tree.head_pointer && this.parser.phaseName == 'inHead') {
 114+ this.appendToHead(element);
 115+ } else {
 116+ this.tree.open_elements[this.tree.open_elements.length - 1].appendChild(element);
 117+ }
 118+}
 119+
 120+p.prototype.startTagOther = function(name, attributes) {
 121+ this.anything_else();
 122+ this.parser.phase.processStartTag(name, attributes);
 123+}
 124+
 125+p.prototype.endTagHead = function(name) {
 126+ if(this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() == 'head') {
 127+ this.tree.pop_element()
 128+ } else {
 129+ this.parse_error('unexpected-end-tag', {name: 'head'});
 130+ }
 131+ this.parser.newPhase('afterHead');
 132+}
 133+
 134+p.prototype.endTagImplyAfterHead = function(name) {
 135+ this.anything_else();
 136+ this.parser.phase.processEndTag(name);
 137+}
 138+
 139+p.prototype.endTagTitleStyleScriptNoscript = function(name) {
 140+ if(this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() == name.toLowerCase()) {
 141+ this.tree.pop_element()
 142+ } else {
 143+ this.parse_error('unexpected-end-tag', {name: name});
 144+ }
 145+}
 146+
 147+p.prototype.endTagOther = function(name) {
 148+ this.anything_else();
 149+}
 150+
 151+p.prototype.anything_else = function() {
 152+ if(this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() == 'head') {
 153+ this.endTagHead('head')
 154+ } else {
 155+ this.parser.newPhase('afterHead');
 156+ }
 157+}
 158+
 159+// protected
 160+
 161+p.prototype.appendToHead = function(element) {
 162+ if(!this.tree.head_pointer) {
 163+ // FIXME assert(this.parser.inner_html)
 164+ this.tree.open_elements[this.tree.open_elements.length - 1].appendChild(element);
 165+ } else {
 166+ this.tree.head_pointer.appendChild(element);
 167+ }
 168+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_head_phase.js
___________________________________________________________________
Added: svn:eol-style
1169 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/initial_phase.js
@@ -0,0 +1,133 @@
 2+var Phase = require('./phase').Phase;
 3+
 4+exports.Phase = p = function InitialPhase(parser, tree) {
 5+ Phase.call(this, parser, tree);
 6+ this.name = 'initial_phase';
 7+}
 8+
 9+p.prototype = new Phase;
 10+
 11+p.prototype.processEOF = function() {
 12+ this.parse_error("expected-doctype-but-got-eof");
 13+ this.parser.newPhase('beforeHTML');
 14+ this.parser.phase.processEOF();
 15+}
 16+
 17+p.prototype.processComment = function(data) {
 18+ this.tree.insert_comment(data, this.tree.document);
 19+}
 20+
 21+p.prototype.processDoctype = function(name, publicId, systemId, correct) {
 22+ if(name.toLowerCase() != 'html' || publicId || systemId) {
 23+ this.parse_error("unknown-doctype");
 24+ }
 25+
 26+ // XXX need to update DOCTYPE tokens
 27+ this.tree.insert_doctype(name, publicId, systemId);
 28+
 29+ publicId = (publicId || '').toString().toUpperCase();
 30+
 31+ if(name.toLowerCase() != 'html') {
 32+ // XXX quirks mode
 33+ } else {
 34+ if((["+//silmaril//dtd html pro v0r11 19970101//en",
 35+ "-//advasoft ltd//dtd html 3.0 aswedit + extensions//en",
 36+ "-//as//dtd html 3.0 aswedit + extensions//en",
 37+ "-//ietf//dtd html 2.0 level 1//en",
 38+ "-//ietf//dtd html 2.0 level 2//en",
 39+ "-//ietf//dtd html 2.0 strict level 1//en",
 40+ "-//ietf//dtd html 2.0 strict level 2//en",
 41+ "-//ietf//dtd html 2.0 strict//en",
 42+ "-//ietf//dtd html 2.0//en",
 43+ "-//ietf//dtd html 2.1e//en",
 44+ "-//ietf//dtd html 3.0//en",
 45+ "-//ietf//dtd html 3.0//en//",
 46+ "-//ietf//dtd html 3.2 final//en",
 47+ "-//ietf//dtd html 3.2//en",
 48+ "-//ietf//dtd html 3//en",
 49+ "-//ietf//dtd html level 0//en",
 50+ "-//ietf//dtd html level 0//en//2.0",
 51+ "-//ietf//dtd html level 1//en",
 52+ "-//ietf//dtd html level 1//en//2.0",
 53+ "-//ietf//dtd html level 2//en",
 54+ "-//ietf//dtd html level 2//en//2.0",
 55+ "-//ietf//dtd html level 3//en",
 56+ "-//ietf//dtd html level 3//en//3.0",
 57+ "-//ietf//dtd html strict level 0//en",
 58+ "-//ietf//dtd html strict level 0//en//2.0",
 59+ "-//ietf//dtd html strict level 1//en",
 60+ "-//ietf//dtd html strict level 1//en//2.0",
 61+ "-//ietf//dtd html strict level 2//en",
 62+ "-//ietf//dtd html strict level 2//en//2.0",
 63+ "-//ietf//dtd html strict level 3//en",
 64+ "-//ietf//dtd html strict level 3//en//3.0",
 65+ "-//ietf//dtd html strict//en",
 66+ "-//ietf//dtd html strict//en//2.0",
 67+ "-//ietf//dtd html strict//en//3.0",
 68+ "-//ietf//dtd html//en",
 69+ "-//ietf//dtd html//en//2.0",
 70+ "-//ietf//dtd html//en//3.0",
 71+ "-//metrius//dtd metrius presentational//en",
 72+ "-//microsoft//dtd internet explorer 2.0 html strict//en",
 73+ "-//microsoft//dtd internet explorer 2.0 html//en",
 74+ "-//microsoft//dtd internet explorer 2.0 tables//en",
 75+ "-//microsoft//dtd internet explorer 3.0 html strict//en",
 76+ "-//microsoft//dtd internet explorer 3.0 html//en",
 77+ "-//microsoft//dtd internet explorer 3.0 tables//en",
 78+ "-//netscape comm. corp.//dtd html//en",
 79+ "-//netscape comm. corp.//dtd strict html//en",
 80+ "-//o'reilly and associates//dtd html 2.0//en",
 81+ "-//o'reilly and associates//dtd html extended 1.0//en",
 82+ "-//spyglass//dtd html 2.0 extended//en",
 83+ "-//sq//dtd html 2.0 hotmetal + extensions//en",
 84+ "-//sun microsystems corp.//dtd hotjava html//en",
 85+ "-//sun microsystems corp.//dtd hotjava strict html//en",
 86+ "-//w3c//dtd html 3 1995-03-24//en",
 87+ "-//w3c//dtd html 3.2 draft//en",
 88+ "-//w3c//dtd html 3.2 final//en",
 89+ "-//w3c//dtd html 3.2//en",
 90+ "-//w3c//dtd html 3.2s draft//en",
 91+ "-//w3c//dtd html 4.0 frameset//en",
 92+ "-//w3c//dtd html 4.0 transitional//en",
 93+ "-//w3c//dtd html experimental 19960712//en",
 94+ "-//w3c//dtd html experimental 970421//en",
 95+ "-//w3c//dtd w3 html//en",
 96+ "-//w3o//dtd w3 html 3.0//en",
 97+ "-//w3o//dtd w3 html 3.0//en//",
 98+ "-//w3o//dtd w3 html strict 3.0//en//",
 99+ "-//webtechs//dtd mozilla html 2.0//en",
 100+ "-//webtechs//dtd mozilla html//en",
 101+ "-/w3c/dtd html 4.0 transitional/en",
 102+ "html"].indexOf(publicId) != -1) ||
 103+ (systemId == null && ["-//w3c//dtd html 4.01 frameset//EN",
 104+ "-//w3c//dtd html 4.01 transitional//EN"].indexOf(publicId) != -1) ||
 105+ (systemId ==
 106+ "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")) {
 107+ // XXX quirks mode
 108+ }
 109+ }
 110+
 111+ this.parser.newPhase('beforeHTML');
 112+}
 113+
 114+p.prototype.processSpaceCharacters = function(data) {
 115+
 116+}
 117+
 118+p.prototype.processCharacters = function(data) {
 119+ this.parse_error('expected-doctype-but-got-chars');
 120+ this.parser.newPhase('beforeHTML');
 121+ this.parser.phase.processCharacters(data);
 122+}
 123+
 124+p.prototype.processStartTag = function(name, attributes, self_closing) {
 125+ this.parse_error('expected-doctype-but-got-start-tag', {name: name});
 126+ this.parser.newPhase('beforeHTML');
 127+ this.parser.phase.processStartTag(name, attributes);
 128+}
 129+
 130+p.prototype.processEndTag = function(name) {
 131+ this.parse_error('expected-doctype-but-got-end-tag', {name: name});
 132+ this.parser.newPhase('beforeHTML');
 133+ this.parser.phase.processEndTag(name);
 134+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/initial_phase.js
___________________________________________________________________
Added: svn:eol-style
1135 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_table_body_phase.js
@@ -0,0 +1,108 @@
 2+var Phase = require('./phase').Phase;
 3+var inTable = require('./in_table_phase').Phase
 4+
 5+var starts = {
 6+ html: 'startTagHtml',
 7+ tr: 'startTagTr',
 8+ td: 'startTagTableCell',
 9+ th: 'startTagTableCell',
 10+ caption: 'startTagTableOther',
 11+ col: 'startTagTableOther',
 12+ colgroup: 'startTagTableOther',
 13+ tbody: 'startTagTableOther',
 14+ tfoot: 'startTagTableOther',
 15+ thead: 'startTagTableOther',
 16+ '-default': 'startTagOther',
 17+}
 18+
 19+var ends = {
 20+ table: 'endTagTable',
 21+ tbody: 'endTagTableRowGroup',
 22+ tfoot: 'endTagTableRowGroup',
 23+ thead: 'endTagTableRowGroup',
 24+ body: 'endTagIgnore',
 25+ caption: 'endTagIgnore',
 26+ col: 'endTagIgnore',
 27+ colgroup: 'endTagIgnore',
 28+ html: 'endTagIgnore',
 29+ td: 'endTagIgnore',
 30+ th: 'endTagIgnore',
 31+ tr: 'endTagIgnore',
 32+ '-default': 'endTagOther',
 33+}
 34+
 35+exports.Phase = function InTableBodyPhase(parser, tree) {
 36+ Phase.call(this, parser, tree);
 37+ this.start_tag_handlers = starts;
 38+ this.end_tag_handlers = ends;
 39+}
 40+
 41+var p = exports.Phase.prototype = new Phase;
 42+
 43+p.processCharacters = function(data) {
 44+ new inTable(this.parser, this.tree).processCharacters(data);
 45+}
 46+
 47+p.startTagTr = function(name, attributes) {
 48+ this.clearStackToTableBodyContext();
 49+ this.tree.insert_element(name, attributes);
 50+ this.parser.newPhase('inRow');
 51+}
 52+
 53+p.startTagTableCell = function(name, attributes) {
 54+ this.parse_error("unexpected-cell-in-table-body", {name: name})
 55+ this.startTagTr('tr', {})
 56+ this.parser.phase.processStartTag(name, attributes);
 57+}
 58+
 59+p.startTagTableOther = function(name, attributes) {
 60+ // XXX any ideas on how to share this with endTagTable
 61+ if(this.inScope('tbody', true) || this.inScope('thead', true) || this.inScope('tfoot', true)) {
 62+ this.clearStackToTableBodyContext();
 63+ this.endTagTableRowGroup(this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase());
 64+ this.parser.phase.processStartTag(name, attributes);
 65+ } else {
 66+ // inner_html case
 67+ this.parse_error
 68+ }
 69+}
 70+
 71+p.startTagOther = function(name, attributes) {
 72+ new inTable(this.parser, this.tree).processStartTag(name, attributes);
 73+}
 74+
 75+p.endTagTableRowGroup = function(name) {
 76+ if(this.inScope(name, true)) {
 77+ this.clearStackToTableBodyContext();
 78+ this.tree.pop_element();
 79+ this.parser.newPhase('inTable');
 80+ } else {
 81+ this.parse_error('unexpected-end-tag-in-table-body', {name: name})
 82+ }
 83+}
 84+
 85+p.endTagTable = function(name) {
 86+ if(this.inScope('tbody', true) || this.inScope('thead', true) || this.inScope('tfoot', true)) {
 87+ this.clearStackToTableBodyContext();
 88+ this.endTagTableRowGroup(this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase())
 89+ this.parser.phase.processEndTag(name)
 90+ } else {
 91+ // inner_html case
 92+ this.parse_error();
 93+ }
 94+}
 95+
 96+p.endTagIgnore = function(name) {
 97+ this.parse_error("unexpected-end-tag-in-table-body", {name: name});
 98+}
 99+
 100+p.endTagOther = function(name) {
 101+ new inTable(this.parser, this.tree).processEndTag(name);
 102+}
 103+
 104+p.clearStackToTableBodyContext = function() {
 105+ while(name = this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase(), name != 'tbody' && name != 'tfoot' && name != 'thead' && name != 'html') {
 106+ this.parse_error("unexpected-implied-end-tag-in-table", {name: name})
 107+ this.tree.pop_element();
 108+ }
 109+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_table_body_phase.js
___________________________________________________________________
Added: svn:eol-style
1110 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/root_element_phase.js
@@ -0,0 +1,42 @@
 2+var Phase = require('./phase').Phase
 3+
 4+exports.Phase = function rootElementPhase(parser, tree) {
 5+ Phase.call(this, parser, tree)
 6+}
 7+
 8+var p = exports.Phase.prototype = new Phase;
 9+
 10+p.processEOF = function() {
 11+ this.insert_html_element()
 12+ this.parser.phase.processEOF()
 13+}
 14+
 15+p.processComment = function(data) {
 16+ this.tree.insert_comment(data, this.tree.document)
 17+}
 18+
 19+p.processSpaceCharacters = function(data) {
 20+}
 21+
 22+p.processCharacters = function(data) {
 23+ this.insert_html_element()
 24+ this.parser.phase.processCharacters(data)
 25+}
 26+
 27+p.processStartTag = function(name, attributes) {
 28+ if(name == 'html') this.parser.first_start_tag = true
 29+ this.insert_html_element()
 30+ this.parser.phase.processStartTag(name, attributes)
 31+}
 32+
 33+p.processEndTag = function(name) {
 34+ this.insert_html_element()
 35+ this.parser.phase.processEndTag(name)
 36+}
 37+
 38+p.insert_html_element = function() {
 39+ var element = this.tree.createElement('html', {})
 40+ this.tree.open_elements.push(element)
 41+ this.tree.document.appendChild(element)
 42+ this.parser.newPhase('beforeHead')
 43+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/root_element_phase.js
___________________________________________________________________
Added: svn:eol-style
144 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_frameset_phase.js
@@ -0,0 +1,67 @@
 2+var Phase = require('./phase').Phase;
 3+var inBody = require('./in_body_phase').Phase;
 4+
 5+var start_tag_handlers = {
 6+ html: 'startTagHtml',
 7+ frameset: 'startTagFrameset',
 8+ frame: 'startTagFrame',
 9+ noframes: 'startTagNoframes',
 10+ "-default": 'startTagOther'
 11+}
 12+
 13+var end_tag_handlers = {
 14+ frameset: 'endTagFrameset',
 15+ noframes: 'endTagNoframes',
 16+ '-default': 'endTagOther',
 17+}
 18+
 19+exports.Phase = p = function InFramesetPhase(parser, tree) {
 20+ Phase.call(this, parser, tree);
 21+ this.start_tag_handlers = start_tag_handlers;
 22+ this.end_tag_handlers = end_tag_handlers;
 23+}
 24+
 25+p.prototype = new Phase;
 26+
 27+p.prototype.processCharacters = function(data) {
 28+ this.parse_error("unexpected-char-in-frameset");
 29+}
 30+
 31+p.prototype.startTagFrameset = function(name, attributes) {
 32+ this.tree.insert_element(name, attributes);
 33+}
 34+
 35+p.prototype.startTagFrame = function(name, attributes) {
 36+ this.tree.insert_element(name, attributes);
 37+ this.tree.pop_element();
 38+}
 39+
 40+p.prototype.startTagNoframes = function(name, attributes) {
 41+ new inBody(this.parser, this.tree).processStartTag(name, attributes);
 42+}
 43+
 44+p.prototype.startTagOther = function(name, attributes) {
 45+ this.parse_error("unexpected-start-tag-in-frameset", {name: name});
 46+}
 47+
 48+p.prototype.endTagFrameset = function(name, attributes) {
 49+ if(this.tree.open_elements.last().tagName.toLowerCase() == 'html') {
 50+ // inner_html case
 51+ this.parse_error("unexpected-frameset-in-frameset-innerhtml");
 52+ } else {
 53+ this.tree.pop_element();
 54+ }
 55+
 56+ if(!this.parser.inner_html && this.tree.open_elements.last().tagName.toLowerCase() != 'frameset') {
 57+ // If we're not in inner_html mode an the current node is not a "frameset" element (anymore) then switch
 58+ this.parser.newPhase('afterFrameset');
 59+ }
 60+}
 61+
 62+p.prototype.endTagNoframes = function(name) {
 63+ new inBody(this.parser, this.tree).processEndTag(name);
 64+}
 65+
 66+p.prototype.endTagOther = function(name) {
 67+ this.parse_error("unexpected-end-tag-in-frameset", {name: name});
 68+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_frameset_phase.js
___________________________________________________________________
Added: svn:eol-style
169 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_row_phase.js
@@ -0,0 +1,113 @@
 2+var Phase = require('./phase').Phase;
 3+var HTML5 = require('../../html5')
 4+var inTable = require('./in_table_phase').Phase;
 5+var assert = require('assert');
 6+
 7+var starts = {
 8+ html: 'startTagHtml',
 9+ td: 'startTagTableCell',
 10+ th: 'startTagTableCell',
 11+ caption: 'startTagTableOther',
 12+ col: 'startTagTableOther',
 13+ colgroup: 'startTagTableOther',
 14+ tbody: 'startTagTableOther',
 15+ tfoot: 'startTagTableOther',
 16+ thead: 'startTagTableOther',
 17+ tr: 'startTagTableOther',
 18+ '-default': 'startTagOther',
 19+}
 20+
 21+var ends = {
 22+ tr: 'endTagTr',
 23+ table: 'endTagTable',
 24+ tbody: 'endTagTableRowGroup',
 25+ tfoot: 'endTagTableRowGroup',
 26+ thead: 'endTagTableRowGroup',
 27+ body: 'endTagIgnore',
 28+ caption: 'endTagIgnore',
 29+ col: 'endTagIgnore',
 30+ colgroup: 'endTagIgnore',
 31+ html: 'endTagIgnore',
 32+ td: 'endTagIgnore',
 33+ th: 'endTagIgnore',
 34+ '-default': 'endTagOther',
 35+}
 36+
 37+exports.Phase = function InRowPhase(parser, tree) {
 38+ Phase.call(this, parser, tree);
 39+ this.start_tag_handlers = starts;
 40+ this.end_tag_handlers = ends;
 41+}
 42+
 43+var p = exports.Phase.prototype = new Phase;
 44+
 45+p.processCharacters = function(data) {
 46+ new inTable(this.parser, this.tree).processCharacters(data);
 47+}
 48+
 49+p.startTagTableCell = function(name, attributes) {
 50+ this.clearStackToTableRowContext();
 51+ this.tree.insert_element(name, attributes);
 52+ this.parser.newPhase('inCell');
 53+ this.tree.activeFormattingElements.push(HTML5.Marker);
 54+}
 55+
 56+p.startTagTableOther = function(name, attributes) {
 57+ var ignoreEndTag = this.ignoreEndTagTr();
 58+ this.endTagTr('tr');
 59+ // XXX how are we sure it's always ignored in the inner_html case?
 60+ if(!ignoreEndTag) this.parser.phase.processStartTag(name, attributes);
 61+}
 62+
 63+p.startTagOther = function(name, attributes) {
 64+ new inTable(this.parser, this.tree).processStartTag(name, attributes);
 65+}
 66+
 67+p.endTagTr = function(name) {
 68+ if(this.ignoreEndTagTr()) {
 69+ assert.ok(this.parser.inner_html);
 70+ this.parse_error
 71+ } else {
 72+ this.clearStackToTableRowContext();
 73+ this.tree.pop_element();
 74+ this.parser.newPhase('inTableBody');
 75+ }
 76+}
 77+
 78+p.endTagTable = function(name) {
 79+ var ignoreEndTag = this.ignoreEndTagTr();
 80+ this.endTagTr('tr');
 81+ // Reprocess the current tag if the tr end tag was not ignored
 82+ // XXX how are we sure it's always ignored in the inner_html case?
 83+ if(!ignoreEndTag) this.parser.phase.processEndTag(name)
 84+}
 85+
 86+p.endTagTableRowGroup = function(name) {
 87+ if(this.inScope(name, true)) {
 88+ this.endTagTr('tr');
 89+ this.parser.phase.processEndTag(name);
 90+ } else {
 91+ // inner_html case
 92+ this.parse_error();
 93+ }
 94+}
 95+
 96+p.endTagIgnore = function(name) {
 97+ this.parse_error("unexpected-end-tag-in-table-row", {name: name})
 98+}
 99+
 100+p.endTagOther = function(name) {
 101+ new inTable(this.parser, this.tree).processEndTag(name);
 102+}
 103+
 104+p.clearStackToTableRowContext = function() {
 105+ var name;
 106+ while(name = this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase(), (name != 'tr' && name != 'html')) {
 107+ this.parse_error("unexpected-implied-end-tag-in-table-row", {name: name})
 108+ this.tree.pop_element();
 109+ }
 110+}
 111+
 112+p.ignoreEndTagTr = function() {
 113+ return !this.inScope('tr', true);
 114+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_row_phase.js
___________________________________________________________________
Added: svn:eol-style
1115 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/trailing_end_phase.js
@@ -0,0 +1,35 @@
 2+var Phase = require('./phase').Phase;
 3+
 4+exports.Phase = p = function TrailingEndPhase(parser, tree) {
 5+ Phase.call(this, parser, tree);
 6+}
 7+
 8+p.prototype = new Phase;
 9+
 10+p.prototype.processEOF = function() {};
 11+
 12+p.prototype.processComment = function(data) {
 13+ this.tree.insert_comment(data);
 14+}
 15+
 16+p.prototype.processSpaceCharacters = function(data) {
 17+ this.parser.last_phase.processSpaceCharacters(data);
 18+}
 19+
 20+p.prototype.processCharacters = function(data) {
 21+ this.parse_error('expected-eof-but-got-char');
 22+ this.parser.phase = this.parser.last_phase;
 23+ this.parser.phase.processCharacters(data);
 24+}
 25+
 26+p.prototype.processStartTag = function(name, attributes) {
 27+ this.parse_error('expected-eof-but-got-start-tag');
 28+ this.parser.phase = this.parser.last_phase;
 29+ this.parser.phase.processStartTag(name, attributes);
 30+}
 31+
 32+p.prototype.processEndTag = function(name, attributes) {
 33+ this.parse_error('expected-eof-but-got-end-tag');
 34+ this.parser.phase = this.parser.last_phase;
 35+ this.parser.phase.processEndTag(name);
 36+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/trailing_end_phase.js
___________________________________________________________________
Added: svn:eol-style
137 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/after_after_body_phase.js
@@ -0,0 +1,48 @@
 2+var Phase = require('./phase').Phase;
 3+var inBody = require('./in_body_phase').Phase;
 4+
 5+var start_tag_handlers = {
 6+ html: 'startTagHtml',
 7+ '-default': 'startTagOther',
 8+}
 9+
 10+exports.Phase = p = function AfterAfterBodyPhase(parser, tree) {
 11+ Phase.call(this, parser, tree);
 12+ this.start_tag_handlers = start_tag_handlers;
 13+}
 14+
 15+p.prototype = new Phase;
 16+
 17+p.prototype.processComment = function(data) {
 18+ this.tree.insert_comment(data);
 19+}
 20+
 21+p.prototype.processDoctype = function(data) {
 22+ new inBody(this.parser, this.tree).processDoctype(data);
 23+}
 24+
 25+p.prototype.processSpaceCharacters = function(data) {
 26+ new inBody(this.parser, this.tree).processSpaceCharacters(data);
 27+}
 28+
 29+p.prototype.startTagHtml = function(data) {
 30+ new inBody(this.parser, this.tree).startTagHtml(data);
 31+}
 32+
 33+p.prototype.startTagOther = function(name, attributes) {
 34+ this.parse_error('unexpected-start-tag', {name: name});
 35+ this.parser.newPhase('inBody');
 36+ this.parser.phase.processStartTag(name, attributes);
 37+}
 38+
 39+p.prototype.endTagOther = function(name) {
 40+ this.parse_error('unexpected-end-tag', {name: name});
 41+ this.parser.newPhase('inBody');
 42+ this.parser.phase.processEndTag(name);
 43+}
 44+
 45+p.prototype.processCharacters = function(data) {
 46+ this.parse_error('unexpected-char-after-body');
 47+ this.parser.newPhase('inBody');
 48+ this.parser.phase.processCharacters(data);
 49+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/after_after_body_phase.js
___________________________________________________________________
Added: svn:eol-style
150 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_column_group_phase.js
@@ -0,0 +1,65 @@
 2+var Phase = require('./phase').Phase
 3+var HTML5 = require('../../html5')
 4+var assert = require('assert')
 5+
 6+var start = {
 7+ html: 'startTagHtml',
 8+ col: 'startTagCol',
 9+ '-default': 'startTagOther',
 10+}
 11+
 12+var end = {
 13+ colgroup: 'endTagColgroup',
 14+ col: 'endTagCol',
 15+ '-default': 'endTagOther',
 16+}
 17+
 18+exports.Phase = function InColgroupPhase(parser, tree) {
 19+ Phase.call(this, parser, tree)
 20+ this.start_tag_handlers = start
 21+ this.end_tag_handlers = end
 22+}
 23+
 24+var p = exports.Phase.prototype = new Phase;
 25+
 26+p.ignoreEndTagColgroup = function() {
 27+ return this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() == 'html'
 28+}
 29+
 30+p.processCharacters = function(data) {
 31+ var ignoreEndTag = this.ignoreEndTagColgroup()
 32+ this.endTagColgroup('colgroup')
 33+ if(!ignoreEndTag) this.parser.phase.processCharacters(data)
 34+}
 35+
 36+p.startTagCol = function(name, attributes) {
 37+ this.tree.insert_element(name, attributes)
 38+ this.tree.pop_element()
 39+}
 40+
 41+p.startTagOther = function(name, attributes) {
 42+ var ignoreEndTag = this.ignoreEndTagColgroup()
 43+ this.endTagColgroup('colgroup')
 44+ if(!ignoreEndTag) this.parser.phase.processStartTag(name, attributes)
 45+}
 46+
 47+p.endTagColgroup = function(name) {
 48+ if(this.ignoreEndTagColgroup()) {
 49+ // inner_html case
 50+ assert.ok(this.parser.inner_html)
 51+ this.parse_error()
 52+ } else {
 53+ this.tree.pop_element()
 54+ this.parser.newPhase('inTable')
 55+ }
 56+}
 57+
 58+p.endTagCol = function(name) {
 59+ this.parse_error("no-end-tag", {name: 'col'})
 60+}
 61+
 62+p.endTagOther = function(name) {
 63+ var ignoreEndTag = this.ignoreEndTagColgroup()
 64+ this.endTagColgroup('colgroup')
 65+ if(!ignoreEndTag) this.parser.phase.processEndTag(name)
 66+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_column_group_phase.js
___________________________________________________________________
Added: svn:eol-style
167 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_select_in_table_phase.js
@@ -0,0 +1,62 @@
 2+var HTML5 = require('../../html5')
 3+var Phase = require('./phase').Phase;
 4+var inSelect = require('./in_select_phase').Phase;
 5+
 6+var start_tag_handlers = {
 7+ caption: 'startTagTable',
 8+ table: 'startTagTable',
 9+ tbody: 'startTagTable',
 10+ tfoot: 'startTagTable',
 11+ thead: 'startTagTable',
 12+ tr: 'startTagTable',
 13+ td: 'startTagTable',
 14+ th: 'startTagTable',
 15+ '-default': 'startTagOther'
 16+}
 17+
 18+var end_tag_handlers = {
 19+ caption: 'endTagTable',
 20+ table: 'endTagTable',
 21+ tbody: 'endTagTable',
 22+ tfoot: 'endTagTable',
 23+ thead: 'endTagTable',
 24+ tr: 'endTagTable',
 25+ td: 'endTagTable',
 26+ th: 'endTagTable',
 27+ '-default': 'endTagOther'
 28+}
 29+
 30+exports.Phase = function InSelectInTablePhase(parser, tree) {
 31+ Phase.call(this, parser, tree);
 32+ this.start_tag_handlers = start_tag_handlers;
 33+ this.end_tag_handlers = end_tag_handlers;
 34+ this.name = 'in_select_in_table';
 35+}
 36+
 37+var p = exports.Phase.prototype = new Phase;
 38+
 39+p.processCharacters = function(data) {
 40+ new inSelect(this.parser, this.tree).processCharacters(data)
 41+}
 42+
 43+p.startTagTable = function(name, attributes) {
 44+ this.parse_error("unexpected-table-element-start-tag-in-select-in-table", {name: name})
 45+ this.endTagOther("select")
 46+ this.parser.phase.processStartTag(name, attributes)
 47+}
 48+
 49+p.startTagOther = function(name, attributes) {
 50+ new inSelect(this.parser, this.tree).processStartTag(name, attributes)
 51+}
 52+
 53+p.endTagTable = function(name) {
 54+ this.parse_error("unexpected-table-element-end-tag-in-select-in-table", {name: name})
 55+ if(this.tree.elementInScope(name, true)) {
 56+ this.endTagOther("select")
 57+ this.parser.phase.processEndTag(name)
 58+ }
 59+}
 60+
 61+p.endTagOther = function(name) {
 62+ new inSelect(this.parser, this.tree).processEndTag(name)
 63+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_select_in_table_phase.js
___________________________________________________________________
Added: svn:eol-style
164 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_select_phase.js
@@ -0,0 +1,104 @@
 2+var Phase = require('./phase').Phase;
 3+
 4+var starts = {
 5+ html: 'startTagHtml',
 6+ option: 'startTagOption',
 7+ optgroup: 'startTagOptgroup',
 8+ select: 'startTagSelect',
 9+ '-default': 'startTagOther',
 10+}
 11+
 12+var ends = {
 13+ option: 'endTagOption',
 14+ optgroup: 'endTagOptgroup',
 15+ select: 'endTagSelect',
 16+ caption: 'endTagTableElements',
 17+ table: 'endTagTableElements',
 18+ tbody: 'endTagTableElements',
 19+ tfoot: 'endTagTableElements',
 20+ thead: 'endTagTableElements',
 21+ tr: 'endTagTableElements',
 22+ td: 'endTagTableElements',
 23+ th: 'endTagTableElements',
 24+ '-default': 'endTagOther',
 25+}
 26+
 27+exports.Phase = function InSelectPhase(parser, tree) {
 28+ Phase.call(this, parser, tree);
 29+ this.start_tag_handlers = starts;
 30+ this.end_tag_handlers = ends;
 31+}
 32+
 33+var p = exports.Phase.prototype = new Phase;
 34+
 35+p.processCharacters = function(data) {
 36+ this.tree.insert_text(data);
 37+}
 38+
 39+p.startTagOption = function(name, attributes) {
 40+ // we need to imply </option> if <option> is the current node
 41+ if(this.tree.open_elements.last().tagName.toLowerCase() == 'option') this.tree.pop_element();
 42+ this.tree.insert_element(name, attributes);
 43+}
 44+
 45+p.startTagOptgroup = function(name, attributes) {
 46+ if(this.tree.open_elements.last().tagName.toLowerCase() == 'option') this.tree.pop_element();
 47+ if(this.tree.open_elements.last().tagName.toLowerCase() == 'optgroup') this.tree.pop_element();
 48+ this.tree.insert_element(name, attributes);
 49+}
 50+
 51+p.endTagOption = function(name) {
 52+ if(this.tree.open_elements.last().tagName.toLowerCase() == 'option') {
 53+ this.tree.pop_element();
 54+ } else {
 55+ this.parse_error('unexpected-end-tag-in-select', {name: 'option'});
 56+ }
 57+}
 58+
 59+p.endTagOptgroup = function(name) {
 60+ // </optgroup> implicitly closes <option>
 61+ if(this.tree.open_elements.last().tagName.toLowerCase() == 'option' && this.tree.open_elements[this.tree.open_elements.length - 2].tagName.toLowerCase() == 'optgroup') {
 62+ this.tree.pop_element();
 63+ }
 64+
 65+ // it also closes </optgroup>
 66+ if(this.tree.open_elements.last().tagName.toLowerCase() == 'optgroup') {
 67+ this.tree.pop_element();
 68+ } else {
 69+ // But nothing else
 70+ this.parse_error('unexpected-end-tag-in-select', {name: 'optgroup'});
 71+ }
 72+}
 73+
 74+p.startTagSelect = function(name) {
 75+ this.parse_error("unexpected-select-in-select");
 76+ this.endTagSelect('select');
 77+}
 78+
 79+p.endTagSelect = function(name) {
 80+ if(this.inScope('select', true)) {
 81+ this.tree.remove_open_elements_until('select');
 82+ this.parser.reset_insertion_mode(this.tree.open_elements.last());
 83+ } else {
 84+ // inner_html case
 85+ this.parse_error();
 86+ }
 87+}
 88+
 89+p.endTagTableElements = function(name) {
 90+ this.parse_error('unexpected-end-tag-in-select', {name: name});
 91+
 92+ if(this.inScope(name, true)) {
 93+ this.endTagSelect('select');
 94+ this.parser.phase.processEndTag(name);
 95+ }
 96+}
 97+
 98+p.startTagOther = function(name, attributes) {
 99+ this.parse_error("unexpected-start-tag-in-select", {name: name})
 100+}
 101+
 102+p.endTagOther = function(name) {
 103+ this.parse_error('unexpected-end-tag-in-select', {name: name});
 104+}
 105+
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_select_phase.js
___________________________________________________________________
Added: svn:eol-style
1106 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_cell_phase.js
@@ -0,0 +1,102 @@
 2+var Phase = require('./phase').Phase;
 3+var inBody = require('./in_body_phase').Phase;
 4+
 5+var starts = {
 6+ html: 'startTagHtml',
 7+ caption: 'startTagTableOther',
 8+ col: 'startTagTableOther',
 9+ colgroup: 'startTagTableOther',
 10+ tbody: 'startTagTableOther',
 11+ td: 'startTagTableOther',
 12+ tfoot: 'startTagTableOther',
 13+ th: 'startTagTableOther',
 14+ thead: 'startTagTableOther',
 15+ tr: 'startTagTableOther',
 16+ '-default': 'startTagOther',
 17+}
 18+
 19+var ends = {
 20+ td: 'endTagTableCell',
 21+ th: 'endTagTableCell',
 22+ body: 'endTagIgnore',
 23+ caption: 'endTagIgnore',
 24+ col: 'endTagIgnore',
 25+ colgroup: 'endTagIgnore',
 26+ html: 'endTagIgnore',
 27+ table: 'endTagImply',
 28+ tbody: 'endTagImply',
 29+ tfoot: 'endTagImply',
 30+ thead: 'endTagImply',
 31+ tr: 'endTagImply',
 32+ '-default': 'endTagOther',
 33+}
 34+
 35+exports.Phase = function InCellPhase(parser, tree) {
 36+ Phase.call(this, parser, tree);
 37+ this.start_tag_handlers = starts;
 38+ this.end_tag_handlers = ends;
 39+}
 40+
 41+exports.Phase.prototype = new Phase;
 42+
 43+p = exports.Phase.prototype;
 44+
 45+p.processCharacters = function(data) {
 46+ new inBody(this.parser, this.tree).processCharacters(data);
 47+}
 48+
 49+p.startTagTableOther = function(name, attributes) {
 50+ if(this.inScope('td', true) || this.inScope('th', true)) {
 51+ this.closeCell();
 52+ this.parser.phase.processStartTag(name, attributes);
 53+ } else {
 54+ // inner_html case
 55+ this.parse_error();
 56+ }
 57+}
 58+
 59+p.startTagOther = function(name, attributes) {
 60+ new inBody(this.parser, this.tree).processStartTag(name, attributes);
 61+}
 62+
 63+p.endTagTableCell = function(name) {
 64+ if(this.inScope(name, true)) {
 65+ this.tree.generateImpliedEndTags(name);
 66+ if(this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() != name.toLowerCase()) {
 67+ this.parse_error('unexpected-cell-end-tag', {name: name});
 68+ this.tree.remove_open_elements_until(name);
 69+ } else {
 70+ this.tree.pop_element();
 71+ }
 72+ this.tree.clearActiveFormattingElements();
 73+ this.parser.newPhase('inRow');
 74+ } else {
 75+ this.parse_error('unexpected-end-tag', {name: name});
 76+ }
 77+}
 78+
 79+p.endTagIgnore = function(name) {
 80+ this.parse_error('unexpected-end-tag', {name: name});
 81+}
 82+
 83+p.endTagImply = function(name) {
 84+ if(this.inScope(name, true)) {
 85+ this.closeCell();
 86+ this.parser.phase.processEndTag(name);
 87+ } else {
 88+ // sometimes inner_html case
 89+ this.parse_error
 90+ }
 91+}
 92+
 93+p.endTagOther = function(name) {
 94+ new inBody(this.parser, this.tree).processEndTag(name);
 95+}
 96+
 97+p.closeCell = function() {
 98+ if(this.inScope('td', true)) {
 99+ this.endTagTableCell('td');
 100+ } else if(this.inScope('th', true)) {
 101+ this.endTagTableCell('th');
 102+ }
 103+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_cell_phase.js
___________________________________________________________________
Added: svn:eol-style
1104 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/after_after_frameset_phase.js
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/after_after_frameset_phase.js
___________________________________________________________________
Added: svn:eol-style
2105 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_foreign_content_phase.js
@@ -0,0 +1,58 @@
 2+var Phase = require('./phase').Phase;
 3+
 4+var starts = {
 5+ '-default': 'startTagOther'
 6+}
 7+
 8+var ends = {
 9+ '-default': 'endTagOther'
 10+}
 11+
 12+exports.Phase = function InForeignContentPhase(parser, tree) {
 13+ Phase.call(this, parser, tree);
 14+ this.name = 'in_foreign_content_phase';
 15+ this.start_tag_handlers = starts;
 16+ this.end_tag_handlers = ends;
 17+}
 18+
 19+var p = exports.Phase.prototype = new Phase;
 20+
 21+p.startTagOther = function(name, attributes, self_closing) {
 22+ if(['mglyph', 'malignmark'].indexOf(name) != -1
 23+ && ['mi', 'mo', 'mn', 'ms', 'mtext'].indexOf(this.tree.open_elements[this.tree.open_elements.length - 1].tagName) != -1
 24+ && this.tree.open_elements[this.tree.open_elements.length - 1].namespace == 'math') {
 25+ this.parser.secondary_phase.processStartTag(name, attributes);
 26+ if(this.parser.phase == 'inForeignContent') {
 27+ if(this.tree.open_elements.any(function(e) { return e.namespace })) {
 28+ this.parser.phase = this.parser.secondary_phase;
 29+ }
 30+ }
 31+ } else if(['b', 'big', 'blockquote', 'body', 'br', 'center', 'code', 'dd', 'div', 'dl', 'dt', 'em', 'embed', 'font', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'hr', 'i', 'img', 'li', 'listing', 'menu', 'meta', 'nobr', 'ol', 'p', 'pre', 'ruby', 's', 'small', 'span', 'strong', 'strike', 'sub', 'sup', 'table', 'tt', 'u', 'ul', 'var'].indexOf(name) != -1) {
 32+ this.parse_error('html-in-foreign-content', {name: name});
 33+ while(this.tree.open_elements[this.tree.open_elements.length - 1].namespace) {
 34+ this.tree.open_elements.pop();
 35+ }
 36+ this.parser.phase = this.parser.secondary_phase;
 37+ this.parser.phase.processStartTag(name, attributes);
 38+ } else {
 39+ if(this.tree.open_elements[this.tree.open_elements.length - 1].namespace == 'math') {
 40+ attributes = this.adjust_mathml_attributes(attributes)
 41+ }
 42+ attributes = this.adjust_foreign_attributes(attributes)
 43+ this.tree.insert_foreign_element(name, attributes, this.tree.open_elements[this.tree.open_elements.length - 1].namespace);
 44+ if(self_closing) this.tree.open_elements.pop()
 45+ }
 46+}
 47+
 48+p.endTagOther = function(name) {
 49+ this.parser.secondary_phase.processEndTag(name)
 50+ if(this.parser.phase == 'inForeignContent') {
 51+ if(this.tree.open_elements.any(function(e) { return e.namespace })) {
 52+ this.parser.phase = this.parser.secondary_phase;
 53+ }
 54+ }
 55+}
 56+
 57+p.processCharacters = function(characters) {
 58+ this.tree.insert_text(characters);
 59+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_foreign_content_phase.js
___________________________________________________________________
Added: svn:eol-style
160 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_table_phase.js
@@ -0,0 +1,129 @@
 2+var Phase = require('./phase').Phase;
 3+var inBody = require('./in_body_phase').Phase;
 4+var HTML5 = require('../../html5')
 5+
 6+var start_tag_handlers = {
 7+ html: 'startTagHtml',
 8+ caption: 'startTagCaption',
 9+ colgroup: 'startTagColgroup',
 10+ col: 'startTagCol',
 11+ table: 'startTagTable',
 12+ tbody: 'startTagRowGroup',
 13+ tfoot: 'startTagRowGroup',
 14+ thead: 'startTagRowGroup',
 15+ td: 'startTagImplyTbody',
 16+ th: 'startTagImplyTbody',
 17+ tr: 'startTagImplyTbody',
 18+ '-default': 'startTagOther',
 19+}
 20+
 21+var end_tag_handlers = {
 22+ table: 'endTagTable',
 23+ body: 'endTagIgnore',
 24+ caption: 'endTagIgnore',
 25+ col: 'endTagIgnore',
 26+ colgroup: 'endTagIgnore',
 27+ html: 'endTagIgnore',
 28+ tbody: 'endTagIgnore',
 29+ td: 'endTagIgnore',
 30+ tfoot: 'endTagIgnore',
 31+ th: 'endTagIgnore',
 32+ thead: 'endTagIgnore',
 33+ tr: 'endTagIgnore',
 34+ '-default': 'endTagOther',
 35+}
 36+
 37+exports.Phase = p = function InTablePhase(parser, tree) {
 38+ Phase.call(this, parser, tree);
 39+ this.start_tag_handlers = start_tag_handlers;
 40+ this.end_tag_handlers = end_tag_handlers;
 41+};
 42+
 43+p.prototype = new Phase;
 44+
 45+p.prototype.processCharacters = function(data) {
 46+ this.parse_error("unexpected-char-implies-table-voodoo");
 47+ this.tree.insert_from_table = true;
 48+ new inBody(this.parser, this.tree).processCharacters(data);
 49+ this.tree.insert_from_table = false;
 50+}
 51+
 52+p.prototype.startTagCaption = function(name, attributes) {
 53+ this.clearStackToTableContext();
 54+ this.tree.activeFormattingElements.push(HTML5.Marker);
 55+ this.tree.insert_element(name, attributes);
 56+ this.parser.newPhase('inCaption');
 57+}
 58+
 59+p.prototype.startTagColgroup = function(name, attributes) {
 60+ this.clearStackToTableContext();
 61+ this.tree.insert_element(name, attributes);
 62+ this.parser.newPhase('inColumnGroup');
 63+}
 64+
 65+p.prototype.startTagCol = function(name, attributes) {
 66+ this.startTagColgroup('colgroup', {});
 67+ this.parser.phase.processStartTag(name, attributes);
 68+}
 69+
 70+p.prototype.startTagRowGroup = function(name, attributes) {
 71+ this.clearStackToTableContext();
 72+ this.tree.insert_element(name, attributes);
 73+ this.parser.newPhase('inTableBody');
 74+}
 75+
 76+p.prototype.startTagImplyTbody = function(name, attributes) {
 77+ this.startTagRowGroup('tbody', {});
 78+ this.parser.phase.processStartTag(name, attributes);
 79+}
 80+
 81+p.prototype.startTagTable = function(name, attributes) {
 82+ this.parse_error("unexpected-start-tag-implies-end-tag",
 83+ {startName: "table", endName: "table"});
 84+ this.parser.phase.processEndTag('table');
 85+ if(!this.parser.inner_html) this.parser.phase.processStartTag(name, attributes);
 86+}
 87+
 88+p.prototype.startTagOther = function(name, attributes) {
 89+ this.parse_error("unexpected-start-tag-implies-table-voodoo", {name: name});
 90+ this.tree.insert_from_table = true;
 91+ new inBody(this.parser, this.tree).processStartTag(name, attributes);
 92+ this.tree.insert_from_table = false;
 93+}
 94+
 95+p.prototype.endTagTable = function(name) {
 96+ if(this.inScope(name, true)) {
 97+ this.tree.generateImpliedEndTags();
 98+ if(this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() != name) {
 99+ this.parse_error("end-tag-too-early-named", {gotName: 'table', expectedName: this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase()});
 100+ }
 101+
 102+ this.tree.remove_open_elements_until('table');
 103+ this.parser.reset_insertion_mode(this.tree.open_elements[this.tree.open_elements.length - 1]);
 104+ } else {
 105+ assert.ok(this.parser.inner_html);
 106+ this.parse_error();
 107+ }
 108+}
 109+
 110+p.prototype.endTagIgnore = function(name) {
 111+ this.parse_error("unexpected-end-tag", {name: name});
 112+}
 113+
 114+p.prototype.endTagOther = function(name) {
 115+ this.parse_error("unexpected-end-tag-implies-table-voodoo", {name: name})
 116+ // Make all the special element rearranging voodoo kick in
 117+ this.tree.insert_from_table = true
 118+ // Process the end tag in the "in body" mode
 119+ new inBody(this.parser, this.tree).processEndTag(name)
 120+ this.tree.insert_from_table = false
 121+}
 122+
 123+p.prototype.clearStackToTableContext = function() {
 124+ var name;
 125+ while(name = this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase(), (name != 'table' && name != 'html')) {
 126+ this.parse_error("unexpected-implied-end-tag-in-table", {name: name})
 127+ this.tree.pop_element()
 128+ }
 129+ // When the current node is <html> it's an inner_html case
 130+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_table_phase.js
___________________________________________________________________
Added: svn:eol-style
1131 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/after_body_phase.js
@@ -0,0 +1,51 @@
 2+var Phase = require('./phase').Phase;
 3+
 4+var end_tag_handlers = {
 5+ html: 'endTagHtml',
 6+ '-default': 'endTagOther',
 7+}
 8+
 9+exports.Phase = p = function AfterBodyPhase(parser, tree) {
 10+ Phase.call(this, parser, tree);
 11+ this.end_tag_handlers = end_tag_handlers;
 12+}
 13+
 14+p.prototype = new Phase;
 15+
 16+p.prototype.processComment = function(data) {
 17+ // This is needed because data is to be appended to the html element here
 18+ // and not to whatever is currently open.
 19+ this.tree.insert_comment(data, this.tree.open_elements[0]);
 20+}
 21+
 22+p.prototype.processCharacters = function(data) {
 23+ this.parse_error('unexpected-char-after-body')
 24+ this.parser.newPhase('inBody')
 25+ this.parser.phase.processCharacters(data)
 26+}
 27+
 28+p.prototype.processStartTag = function(name, attributes, self_closing) {
 29+ this.parse_error('unexpected-start-tag-after-body', {name: name});
 30+ this.parser.newPhase('inBody');
 31+ this.parser.phase.processStartTag(name, attributes, self_closing);
 32+}
 33+
 34+p.prototype.endTagHtml = function(name) {
 35+ if(this.parser.inner_html) {
 36+ this.parse_error('end-html-in-innerhtml');
 37+ } else {
 38+ // XXX This may need to be done, not sure
 39+ // Don't set last_phase to the current phase but to the inBody phase
 40+ // instead. No need for extra parse_errors if there's something after
 41+ // </html>.
 42+ // Try <!doctype html>X</html>X for instance
 43+ this.parser.last_phase = this.parser.phase;
 44+ this.parser.newPhase('afterAfterBody');
 45+ }
 46+}
 47+
 48+p.prototype.endTagOther = function(name) {
 49+ this.parse_error('unexpected-end-tag-after-body', {name: name});
 50+ this.parser.newPhase('inBody');
 51+ this.parser.phase.processEndTag(name);
 52+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/after_body_phase.js
___________________________________________________________________
Added: svn:eol-style
153 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/phase.js
@@ -0,0 +1,161 @@
 2+var HTML5 = require('../parser').HTML5;
 3+var assert = require('assert');
 4+
 5+exports.Phase = function Phase(parser, tree) {
 6+ this.tree = tree;
 7+ this.parser = parser;
 8+ this.end_tag_handlers = {"-default": 'endTagOther'};
 9+ this.start_tag_handlers = {"-default": 'startTagOther'};
 10+}
 11+
 12+exports.Phase.prototype = {
 13+ parse_error: function(code, options) {
 14+ this.parser.parse_error(code, options);
 15+ },
 16+ processEOF: function() {
 17+ this.tree.generateImpliedEndTags();
 18+ if(this.tree.open_elements.length > 2) {
 19+ this.parse_error('expected-closing-tag-but-got-eof');
 20+ } else if(this.tree.open_elements.length == 2
 21+ && this.tree.open_elements[1].tagName.toLowerCase() != 'body') {
 22+ // This happens for framesets or something?
 23+ this.parse_error('expected-closing-tag-but-got-eof');
 24+ } else if(this.parser.inner_html && this.tree.open_elements.length > 1) {
 25+ // XXX This is not what the specification says. Not sure what to do here.
 26+ this.parse_error('eof-in-innerhtml');
 27+ }
 28+ },
 29+ processComment: function(data) {
 30+ // For most phases the following is correct. Where it's not it will be
 31+ // overridden.
 32+ this.tree.insert_comment(data, this.tree.open_elements.last());
 33+ },
 34+ processDoctype: function(name, publicId, systemId, correct) {
 35+ this.parse_error('unexpected-doctype');
 36+ },
 37+ processSpaceCharacters: function(data) {
 38+ this.tree.insert_text(data);
 39+ },
 40+ processStartTag: function(name, attributes, self_closing) {
 41+ if(this[this.start_tag_handlers[name]]) {
 42+ this[this.start_tag_handlers[name]](name, attributes, self_closing);
 43+ } else if(this[this.start_tag_handlers["-default"]]) {
 44+ this[this.start_tag_handlers["-default"]](name, attributes, self_closing);
 45+ } else {
 46+ throw(new Error("No handler found for "+name));
 47+ }
 48+ },
 49+ processEndTag: function(name) {
 50+ if(this[this.end_tag_handlers[name]]) {
 51+ this[this.end_tag_handlers[name]](name);
 52+ } else if(this[this.end_tag_handlers["-default"]]) {
 53+ this[this.end_tag_handlers["-default"]](name);
 54+ } else {
 55+ throw(new Error("No handler found for "+name));
 56+ }
 57+ },
 58+ inScope: function(name, treeVariant) {
 59+ return this.tree.elementInScope(name, treeVariant);
 60+ },
 61+ startTagHtml: function(name, attributes) {
 62+ if(this.parser.first_start_tag == false && name == 'html') {
 63+ this.parse_error('non-html-root')
 64+ }
 65+ // XXX Need a check here to see if the first start tag token emitted is this token. . . if it's not, invoke parse_error.
 66+ for(var i = 0; i < attributes.length; i++) {
 67+ if(!this.tree.open_elements[0].getAttribute(attributes[i].nodeName)) {
 68+ this.tree.open_elements[0].setAttribute(attributes[i].nodeName, attributes[i].nodeValue)
 69+ }
 70+ }
 71+ this.parser.first_start_tag = false;
 72+ },
 73+ adjust_mathml_attributes: function(attributes) {
 74+ return attributes.map(function(a) {
 75+ if(a[0] =='definitionurl') {
 76+ return ['definitionURL', a[1]]
 77+ } else {
 78+ return a;
 79+ }
 80+ });
 81+ },
 82+ adjust_svg_attributes: function(attributes) {
 83+ return attributes.map(function(a) {
 84+ return SVGAttributeMap[a] ? SVGAttributeMap[a] : a;
 85+ });
 86+ },
 87+ adjust_foreign_attributes: function (attributes) {
 88+ for(var i = 0; i < attributes.length; i++) {
 89+ if(attributes[i].nodeName.indexOf(':') != -1) {
 90+ var t = attributes[i].nodeName.split(/:/);
 91+ attributes[i].namespace = t[0];
 92+ attributes[i].nodeName = t[1];
 93+ }
 94+ }
 95+ return attributes;
 96+ }
 97+}
 98+
 99+var SVGAttributeMap = {
 100+ attributename: 'attributeName',
 101+ attributetype: 'attributeType',
 102+ basefrequency: 'baseFrequency',
 103+ baseprofile: 'baseProfile',
 104+ calcmode: 'calcMode',
 105+ clippathunits: 'clipPathUnits',
 106+ contentscripttype: 'contentScriptType',
 107+ contentstyletype: 'contentStyleType',
 108+ diffuseconstant: 'diffuseConstant',
 109+ edgemode: 'edgeMode',
 110+ externalresourcesrequired: 'externalResourcesRequired',
 111+ filterres: 'filterRes',
 112+ filterunits: 'filterUnits',
 113+ glyphref: 'glyphRef',
 114+ gradienttransform: 'gradientTransform',
 115+ gradientunits: 'gradientUnits',
 116+ kernelmatrix: 'kernelMatrix',
 117+ kernelunitlength: 'kernelUnitLength',
 118+ keypoints: 'keyPoints',
 119+ keysplines: 'keySplines',
 120+ keytimes: 'keyTimes',
 121+ lengthadjust: 'lengthAdjust',
 122+ limitingconeangle: 'limitingConeAngle',
 123+ markerheight: 'markerHeight',
 124+ markerunits: 'markerUnits',
 125+ markerwidth: 'markerWidth',
 126+ maskcontentunits: 'maskContentUnits',
 127+ maskunits: 'maskUnits',
 128+ numoctaves: 'numOctaves',
 129+ pathlength: 'pathLength',
 130+ patterncontentunits: 'patternContentUnits',
 131+ patterntransform: 'patternTransform',
 132+ patternunits: 'patternUnits',
 133+ pointsatx: 'pointsAtX',
 134+ pointsaty: 'pointsAtY',
 135+ pointsatz: 'pointsAtZ',
 136+ preservealpha: 'preserveAlpha',
 137+ preserveaspectratio: 'preserveAspectRatio',
 138+ primitiveunits: 'primitiveUnits',
 139+ refx: 'refX',
 140+ refy: 'refY',
 141+ repeatcount: 'repeatCount',
 142+ repeatdur: 'repeatDur',
 143+ requiredextensions: 'requiredExtensions',
 144+ requiredfeatures: 'requiredFeatures',
 145+ specularconstant: 'specularConstant',
 146+ specularexponent: 'specularExponent',
 147+ spreadmethod: 'spreadMethod',
 148+ startoffset: 'startOffset',
 149+ stddeviation: 'stdDeviation',
 150+ stitchtiles: 'stitchTiles',
 151+ surfacescale: 'surfaceScale',
 152+ systemlanguage: 'systemLanguage',
 153+ tablevalues: 'tableValues',
 154+ targetx: 'targetX',
 155+ targety: 'targetY',
 156+ textlength: 'textLength',
 157+ viewbox: 'viewBox',
 158+ viewtarget: 'viewTarget',
 159+ xchannelselector: 'xChannelSelector',
 160+ ychannelselector: 'yChannelSelector',
 161+ zoomandpan: 'zoomAndPan'
 162+};
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/phase.js
___________________________________________________________________
Added: svn:eol-style
1163 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/after_head_phase.js
@@ -0,0 +1,84 @@
 2+"use strict";
 3+var Phase = require('./phase').Phase;
 4+var HTML5 = require('../../html5');
 5+
 6+var start_tag_handlers = {
 7+ html: 'startTagHtml',
 8+ body: 'startTagBody',
 9+ frameset: 'startTagFrameset',
 10+ base: 'startTagFromHead',
 11+ link: 'startTagFromHead',
 12+ meta: 'startTagFromHead',
 13+ script: 'startTagFromHead',
 14+ style: 'startTagFromHead',
 15+ title: 'startTagFromHead',
 16+ "-default": 'startTagOther',
 17+};
 18+
 19+var end_tag_handlers = {
 20+ body: 'endTagBodyHtmlBr',
 21+ html: 'endTagBodyHtmlBr',
 22+ br: 'endTagBodyHtmlBr',
 23+ "-default": 'endTagOther',
 24+};
 25+
 26+exports.Phase = p = function AfterHeadPhase(parser, tree) {
 27+ Phase.call(this, parser, tree);
 28+ this.start_tag_handlers = start_tag_handlers;
 29+ this.end_tag_handlers = end_tag_handlers;
 30+
 31+ this.name = 'after_head_phase';
 32+}
 33+
 34+p.prototype = new Phase;
 35+
 36+
 37+p.prototype.processEOF = function() {
 38+ this.anything_else();
 39+ this.parser.phase.processEOF();
 40+}
 41+
 42+p.prototype.processCharacters = function(data) {
 43+ this.anything_else();
 44+ this.parser.phase.processCharacters(data);
 45+}
 46+
 47+p.prototype.startTagBody = function(name, attributes) {
 48+ this.tree.insert_element(name, attributes);
 49+ this.parser.newPhase('inBody');
 50+}
 51+
 52+p.prototype.startTagFrameset = function(name, attributes) {
 53+ this.tree.insert_element(name, attributes);
 54+ this.parser.newPhase('inFrameset');
 55+}
 56+
 57+p.prototype.startTagFromHead = function(name, attributes) {
 58+ this.parse_error("unexpected-start-tag-out-of-my-head", {name: name});
 59+ this.parser.newPhase('inHead');
 60+ this.parser.phase.processStartTag(name, attributes);
 61+}
 62+
 63+p.prototype.startTagOther = function(name, attributes) {
 64+ this.anything_else();
 65+ this.parser.phase.processStartTag(name, attributes);
 66+}
 67+
 68+p.prototype.endTagBodyHtmlBr = function(name) {
 69+ this.anything_else();
 70+ this.parser.phase.processEndTag(name);
 71+}
 72+
 73+p.prototype.endTagOther = function(name) {
 74+ this.parse_error('unexpected-end-tag', {name: name});
 75+}
 76+
 77+p.prototype.anything_else = function() {
 78+ this.tree.insert_element('body', []);
 79+ this.parser.newPhase('inBody');
 80+}
 81+
 82+p.prototype.processEndTag = function(name) {
 83+ this.anything_else()
 84+ this.parser.phase.processEndTag(name)
 85+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/after_head_phase.js
___________________________________________________________________
Added: svn:eol-style
186 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_caption_phase.js
@@ -0,0 +1,94 @@
 2+var Phase = require('./phase').Phase;
 3+var inBody = require('./in_body_phase').Phase;
 4+var assert = require('assert');
 5+
 6+var starts = {
 7+ html: 'startTagHtml',
 8+ caption: 'startTagTableElement',
 9+ col: 'startTagTableElement',
 10+ colgroup: 'startTagTableElement',
 11+ tbody: 'startTagTableElement',
 12+ td: 'startTagTableElement',
 13+ tfoot: 'startTagTableElement',
 14+ thead: 'startTagTableElement',
 15+ tr: 'startTagTableElement',
 16+ '-default': 'startTagOther'
 17+}
 18+
 19+var ends = {
 20+ caption: 'endTagCaption',
 21+ table: 'endTagTable',
 22+ body: 'endTagIgnore',
 23+ col: 'endTagIgnore',
 24+ colgroup: 'endTagIgnore',
 25+ html: 'endTagIgnore',
 26+ tbody: 'endTagIgnore',
 27+ td: 'endTagIgnore',
 28+ tfood: 'endTagIgnore',
 29+ thead: 'endTagIgnore',
 30+ tr: 'endTagIgnore',
 31+ '-default': 'endTagOther'
 32+}
 33+
 34+exports.Phase = p = function InCaptionPhase(parser, tree) {
 35+ Phase.call(this, parser, tree);
 36+ this.start_tag_handlers = starts;
 37+ this.end_tag_handlers = ends;
 38+}
 39+
 40+p.prototype = new Phase;
 41+
 42+p.prototype.ignoreEndTagCaption = function() {
 43+ return !this.inScope('caption', true);
 44+}
 45+
 46+p.prototype.processCharacters = function(data) {
 47+ new inBody(this.parser, this.tree).processCharacters(data);
 48+}
 49+
 50+p.prototype.startTagTableElement = function(name, attributes) {
 51+ this.parse_error('unexpected-end-tag', {name: name});
 52+ var ignoreEndTag = this.ignoreEndTagCaption();
 53+ this.parser.phase.processEndTag('caption');
 54+ if(!ignoreEndTag) this.parser.phase.processStartTag(name, attributes)
 55+}
 56+
 57+p.prototype.startTagOther = function(name, attributes) {
 58+ new inBody(this.parser, this.tree).processStartTag(name, attributes);
 59+}
 60+
 61+p.prototype.endTagCaption = function(name) {
 62+ if(this.ignoreEndTagCaption()) {
 63+ // inner_html case
 64+ assert.ok(this.parser.inner_html);
 65+ this.parse_error('unexpected-end-tag', {name: name});
 66+ } else {
 67+ // AT this code is quite similar to endTagTable in inTable
 68+ this.tree.generateImpliedEndTags();
 69+ if(this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() != 'caption') {
 70+ this.parse_error('expected-one-end-tag-but-got-another',
 71+ {gotName: "caption", expectedName: this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase()});
 72+ }
 73+
 74+ this.tree.remove_open_elements_until('caption');
 75+
 76+ this.tree.clearActiveFormattingElements();
 77+
 78+ this.parser.newPhase('inTable');
 79+ }
 80+}
 81+
 82+p.prototype.endTagTable = function(name) {
 83+ this.parse_error("unexpected-end-table-in-caption");
 84+ var ignoreEndTag = this.ignoreEndTagCaption();
 85+ this.parser.phase.processEndTag('caption')
 86+ if(!ignoreEndTag) this.parser.phase.processEndTag(name);
 87+}
 88+
 89+p.prototype.endTagIgnore = function(name) {
 90+ this.parse_error('unexpected-end-tag', {name: name});
 91+}
 92+
 93+p.prototype.endTagOther = function(name) {
 94+ new inBody(this.parser, this.tree).processEndTag(name);
 95+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_caption_phase.js
___________________________________________________________________
Added: svn:eol-style
196 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_body_phase.js
@@ -0,0 +1,722 @@
 2+"use strict";
 3+var HTML5 = require('../../html5');
 4+var Phase = require('./phase').Phase;
 5+var assert = require('assert')
 6+
 7+var start_tag_handlers = {
 8+ html: 'startTagHtml',
 9+ head: 'startTagHead',
 10+ base: 'startTagProcessInHead',
 11+ link: 'startTagProcessInHead',
 12+ meta: 'startTagProcessInHead',
 13+ script: 'startTagProcessInHead',
 14+ style: 'startTagProcessInHead',
 15+ title: 'startTagProcessInHead',
 16+ body: 'startTagBody',
 17+ form: 'startTagForm',
 18+ plaintext: 'startTagPlaintext',
 19+ a: 'startTagA',
 20+ button: 'startTagButton',
 21+ xmp: 'startTagXmp',
 22+ table: 'startTagTable',
 23+ hr: 'startTagHr',
 24+ image: 'startTagImage',
 25+ input: 'startTagInput',
 26+ textarea: 'startTagTextarea',
 27+ select: 'startTagSelect',
 28+ isindex: 'startTagIsindex',
 29+ applet: 'startTagAppletMarqueeObject',
 30+ marquee: 'startTagAppletMarqueeObject',
 31+ object: 'startTagAppletMarqueeObject',
 32+ li: 'startTagListItem',
 33+ dd: 'startTagListItem',
 34+ dt: 'startTagListItem',
 35+ address: 'startTagCloseP',
 36+ blockquote: 'startTagCloseP',
 37+ center: 'startTagCloseP',
 38+ dir: 'startTagCloseP',
 39+ div: 'startTagCloseP',
 40+ dl: 'startTagCloseP',
 41+ fieldset: 'startTagCloseP',
 42+ listing: 'startTagCloseP',
 43+ menu: 'startTagCloseP',
 44+ ol: 'startTagCloseP',
 45+ p: 'startTagCloseP',
 46+ pre: 'startTagCloseP',
 47+ ul: 'startTagCloseP',
 48+ b: 'startTagFormatting',
 49+ big: 'startTagFormatting',
 50+ em: 'startTagFormatting',
 51+ font: 'startTagFormatting',
 52+ i: 'startTagFormatting',
 53+ s: 'startTagFormatting',
 54+ small: 'startTagFormatting',
 55+ strike: 'startTagFormatting',
 56+ strong: 'startTagFormatting',
 57+ tt: 'startTagFormatting',
 58+ u: 'startTagFormatting',
 59+ nobr: 'startTagNobr',
 60+ area: 'startTagVoidFormatting',
 61+ basefont: 'startTagVoidFormatting',
 62+ bgsound: 'startTagVoidFormatting',
 63+ br: 'startTagVoidFormatting',
 64+ embed: 'startTagVoidFormatting',
 65+ img: 'startTagVoidFormatting',
 66+ param: 'startTagVoidFormatting',
 67+ spacer: 'startTagVoidFormatting',
 68+ wbr: 'startTagVoidFormatting',
 69+ iframe: 'startTagCdata',
 70+ noembed: 'startTagCdata',
 71+ noframes: 'startTagCdata',
 72+ noscript: 'startTagCdata',
 73+ h1: 'startTagHeading',
 74+ h2: 'startTagHeading',
 75+ h3: 'startTagHeading',
 76+ h4: 'startTagHeading',
 77+ h5: 'startTagHeading',
 78+ h6: 'startTagHeading',
 79+ caption: 'startTagMisplaced',
 80+ col: 'startTagMisplaced',
 81+ colgroup: 'startTagMisplaced',
 82+ frame: 'startTagMisplaced',
 83+ frameset: 'startTagMisplaced',
 84+ //head: 'startTagMisplaced',
 85+ tbody: 'startTagMisplaced',
 86+ td: 'startTagMisplaced',
 87+ tfoot: 'startTagMisplaced',
 88+ th: 'startTagMisplaced',
 89+ thead: 'startTagMisplaced',
 90+ tr: 'startTagMisplaced',
 91+ option: 'startTagMisplaced',
 92+ optgroup: 'startTagMisplaced',
 93+ 'event-source': 'startTagNew',
 94+ section: 'startTagNew',
 95+ nav: 'startTagNew',
 96+ article: 'startTagNew',
 97+ aside: 'startTagNew',
 98+ header: 'startTagNew',
 99+ footer: 'startTagNew',
 100+ datagrid: 'startTagNew',
 101+ command: 'startTagNew',
 102+ math: 'startTagMath',
 103+ svg: 'startTagSVG',
 104+ "-default": 'startTagOther',
 105+}
 106+
 107+var end_tag_handlers = {
 108+ p: 'endTagP',
 109+ body: 'endTagBody',
 110+ html: 'endTagHtml',
 111+ form: 'endTagForm',
 112+ applet: 'endTagAppletButtonMarqueeObject',
 113+ button: 'endTagAppletButtonMarqueeObject',
 114+ marquee: 'endTagAppletButtonMarqueeObject',
 115+ object: 'endTagAppletButtonMarqueeObject',
 116+ dd: 'endTagListItem',
 117+ dt: 'endTagListItem',
 118+ li: 'endTagListItem',
 119+ address: 'endTagBlock',
 120+ blockquote: 'endTagBlock',
 121+ center: 'endTagBlock',
 122+ div: 'endTagBlock',
 123+ dl: 'endTagBlock',
 124+ fieldset: 'endTagBlock',
 125+ listing: 'endTagBlock',
 126+ menu: 'endTagBlock',
 127+ ol: 'endTagBlock',
 128+ pre: 'endTagBlock',
 129+ ul: 'endTagBlock',
 130+ h1: 'endTagHeading',
 131+ h2: 'endTagHeading',
 132+ h3: 'endTagHeading',
 133+ h4: 'endTagHeading',
 134+ h5: 'endTagHeading',
 135+ h6: 'endTagHeading',
 136+ a: 'endTagFormatting',
 137+ b: 'endTagFormatting',
 138+ big: 'endTagFormatting',
 139+ em: 'endTagFormatting',
 140+ font: 'endTagFormatting',
 141+ i: 'endTagFormatting',
 142+ nobr: 'endTagFormatting',
 143+ s: 'endTagFormatting',
 144+ small: 'endTagFormatting',
 145+ strike: 'endTagFormatting',
 146+ strong: 'endTagFormatting',
 147+ tt: 'endTagFormatting',
 148+ u: 'endTagFormatting',
 149+ head: 'endTagMisplaced',
 150+ frameset: 'endTagMisplaced',
 151+ select: 'endTagMisplaced',
 152+ optgroup: 'endTagMisplaced',
 153+ option: 'endTagMisplaced',
 154+ table: 'endTagMisplaced',
 155+ caption: 'endTagMisplaced',
 156+ colgroup: 'endTagMisplaced',
 157+ col: 'endTagMisplaced',
 158+ thead: 'endTagMisplaced',
 159+ tfoot: 'endTagMisplaced',
 160+ tbody: 'endTagMisplaced',
 161+ tr: 'endTagMisplaced',
 162+ td: 'endTagMisplaced',
 163+ th: 'endTagMisplaced',
 164+ br: 'endTagBr',
 165+ area: 'endTagNone',
 166+ basefont: 'endTagNone',
 167+ bgsound: 'endTagNone',
 168+ embed: 'endTagNone',
 169+ hr: 'endTagNone',
 170+ image: 'endTagNone',
 171+ img: 'endTagNone',
 172+ input: 'endTagNone',
 173+ isindex: 'endTagNone',
 174+ param: 'endTagNone',
 175+ spacer: 'endTagNone',
 176+ wbr: 'endTagNone',
 177+ frame: 'endTagNone',
 178+ noframes: 'endTagCdataTextAreaXmp',
 179+ noscript: 'endTagCdataTextAreaXmp',
 180+ noembed: 'endTagCdataTextAreaXmp',
 181+ textarea: 'endTagCdataTextAreaXmp',
 182+ xmp: 'endTagCdataTextAreaXmp',
 183+ iframe: 'endTagCdataTextAreaXmp',
 184+ 'event-source': 'endTagNew',
 185+ section: 'endTagNew',
 186+ nav: 'endTagNew',
 187+ article: 'endTagNew',
 188+ aside: 'endTagNew',
 189+ header: 'endTagNew',
 190+ footer: 'endTagNew',
 191+ datagrid: 'endTagNew',
 192+ command: 'endTagNew',
 193+ "-default": 'endTagOther',
 194+}
 195+
 196+exports.Phase = p = function InBodyPhase(parser, tree) {
 197+ Phase.call(this, parser, tree);
 198+ this.start_tag_handlers = start_tag_handlers;
 199+ this.end_tag_handlers = end_tag_handlers;
 200+ this.name = 'in_body_phase';
 201+}
 202+
 203+p.prototype = new Phase;
 204+
 205+p.prototype.processSpaceCharactersDropNewline = function(data) {
 206+ this.dropNewline = false
 207+ var lastTag = this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase()
 208+ if(data.length > 0 && data[0] == "\n" && ('pre' == lastTag || 'textarea' == lastTag) && !this.tree.open_elements[this.tree.open_elements.length - 1].hasChildNodes()) {
 209+ data = data.slice(1)
 210+ }
 211+
 212+ if(data.length > 0) {
 213+ this.tree.reconstructActiveFormattingElements()
 214+ this.tree.insert_text(data)
 215+ }
 216+}
 217+
 218+p.prototype.processSpaceCharacters = function(data) {
 219+ if(this.dropNewline) {
 220+ this.processSpaceCharactersDropNewline(data)
 221+ } else {
 222+ this.processSpaceCharactersNonPre(data)
 223+ }
 224+}
 225+
 226+p.prototype.processSpaceCharactersNonPre = function(data) {
 227+ this.tree.reconstructActiveFormattingElements();
 228+ this.tree.insert_text(data);
 229+}
 230+
 231+p.prototype.processCharacters = function(data) {
 232+ // XXX The specification says to do this for every character at the moment,
 233+ // but apparently that doesn't match the real world so we don't do it for
 234+ // space characters.
 235+ this.tree.reconstructActiveFormattingElements();
 236+ this.tree.insert_text(data);
 237+}
 238+
 239+p.prototype.startTagProcessInHead = function(name, attributes) {
 240+ new PHASES.inHead(this.parser, this.tree).processStartTag(name, attributes);
 241+}
 242+
 243+p.prototype.startTagBody = function(name, attributes) {
 244+ this.parse_error('unexpected-start-tag', {name: 'body'});
 245+ if(this.tree.open_elements.length == 1
 246+ || this.tree.open_elements[1].tagName.toLowerCase() != 'body') {
 247+ assert.ok(this.parser.inner_html)
 248+ } else {
 249+ for(var i = 0; i < attributes.length; i++) {
 250+ if(!this.tree.open_elements[1].getAttribute(attributes[i].nodeName)) {
 251+ this.tree.open_elements[1].setAttribute(attributes[i].nodeName, attributes[i].nodeValue);
 252+ }
 253+ }
 254+ }
 255+}
 256+
 257+p.prototype.startTagCloseP = function(name, attributes) {
 258+ if(this.inScope('p')) this.endTagP('p');
 259+ this.tree.insert_element(name, attributes);
 260+ if(name == 'pre') {
 261+ this.dropNewline = true
 262+ }
 263+}
 264+
 265+p.prototype.startTagForm = function(name, attributes) {
 266+ if(this.tree.formPointer) {
 267+ this.parse_error('unexpected-start-tag', {name: name});
 268+ } else {
 269+ if(this.inScope('p')) this.endTagP('p');
 270+ this.tree.insert_element(name, attributes);
 271+ this.tree.formPointer = this.tree.open_elements[this.tree.open_elements.length - 1];
 272+ }
 273+}
 274+
 275+p.prototype.startTagListItem = function(name, attributes) {
 276+ if(this.inScope('p')) this.endTagP('p');
 277+ var stopNames = {li: ['li'], dd: ['dd', 'dt'], dt: ['dd', 'dt']};
 278+ var stopName = stopNames[name];
 279+
 280+ var els = this.tree.open_elements;
 281+ for(var i = els.length - 1; i >= 0; i--) {
 282+ var node = els[i];
 283+ if(stopName.indexOf(node.tagName.toLowerCase()) != -1) {
 284+ var poppedNodes = [];
 285+ while(els.length - 1 >= i) {
 286+ poppedNodes.push(els.pop());
 287+ }
 288+ if(poppedNodes.length >= 1) {
 289+ this.parse_error(poppedNodes.length == 1 ? "missing-end-tag" : "missing-end-tags",
 290+ {name: poppedNodes.slice(0).map(function (n) { return n.name }).join(', ')});
 291+ }
 292+ break;
 293+ }
 294+
 295+ // Phrasing eliments are all non special, non scoping, non
 296+ // formatting elements
 297+ if(HTML5.SPECIAL_ELEMENTS.concat(HTML5.SCOPING_ELEMENTS).indexOf(node.tagName.toLowerCase()) != -1 && (node.tagName.toLowerCase() != 'address' && node.tagName.toLowerCase() != 'div')) break;
 298+ }
 299+
 300+ // Always insert an <li> element
 301+ this.tree.insert_element(name, attributes);
 302+}
 303+
 304+p.prototype.startTagPlaintext = function(name, attributes) {
 305+ if(this.inScope('p')) this.endTagP('p');
 306+ this.tree.insert_element(name, attributes);
 307+ this.parser.tokenizer.content_model = HTML5.Models.PLAINTEXT;
 308+}
 309+
 310+p.prototype.startTagHeading = function(name, attributes) {
 311+ if(this.inScope('p')) this.endTagP('p');
 312+ this.tree.insert_element(name, attributes);
 313+}
 314+
 315+p.prototype.startTagA = function(name, attributes) {
 316+ var afeAElement;
 317+ if(afeAElement = this.tree.elementInActiveFormattingElements('a')) {
 318+ this.parse_error("unexpected-start-tag-implies-end-tag", {startName: "a", endName: "a"});
 319+ this.endTagFormatting('a');
 320+ var pos;
 321+ pos = this.tree.open_elements.indexOf(afeAElement);
 322+ if(pos != -1) this.tree.open_elements.splice(pos, 1);
 323+ pos = this.tree.activeFormattingElements.indexOf(afeAElement);
 324+ if(pos != -1) this.tree.activeFormattingElements.splice(pos, 1);
 325+ }
 326+ this.tree.reconstructActiveFormattingElements();
 327+ this.addFormattingElement(name, attributes);
 328+}
 329+
 330+p.prototype.startTagFormatting = function(name, attributes) {
 331+ this.tree.reconstructActiveFormattingElements();
 332+ this.addFormattingElement(name, attributes);
 333+}
 334+
 335+p.prototype.startTagNobr = function(name, attributes) {
 336+ this.tree.reconstructActiveFormattingElements();
 337+ if(this.inScope('nobr')) {
 338+ this.parse_error("unexpected-start-tag-implies-end-tag", {startName: 'nobr', endName: 'nobr'});
 339+ this.processEndTag('nobr');
 340+ }
 341+ this.addFormattingElement(name, attributes);
 342+}
 343+
 344+p.prototype.startTagButton = function(name, attributes) {
 345+ if(this.inScope('button')) {
 346+ this.parse_error('unexpected-start-tag-implies-end-tag', {startName: 'button', endName: 'button'});
 347+ this.processEndTag('button');
 348+ this.parser.phase.processStartTag(name, attributes);
 349+ } else {
 350+ this.tree.reconstructActiveFormattingElements();
 351+ this.tree.insert_element(name, attributes);
 352+ this.tree.activeFormattingElements.push(HTML5.Marker);
 353+ }
 354+}
 355+
 356+p.prototype.startTagAppletMarqueeObject = function(name, attributes) {
 357+ this.tree.reconstructActiveFormattingElements();
 358+ this.tree.insert_element(name, attributes)
 359+ this.tree.activeFormattingElements.push(HTML5.Marker);
 360+}
 361+
 362+p.prototype.endTagAppletButtonMarqueeObject = function(name) {
 363+ if(this.inScope(name)) this.tree.generateImpliedEndTags()
 364+ if(this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() != name) {
 365+ this.parse_error('end-tag-too-early', {name: name})
 366+ }
 367+ if(this.inScope(name)) {
 368+ this.tree.remove_open_elements_until(name)
 369+ this.tree.clearActiveFormattingElements()
 370+ }
 371+}
 372+
 373+p.prototype.startTagXmp = function(name, attributes) {
 374+ this.tree.reconstructActiveFormattingElements();
 375+ this.tree.insert_element(name, attributes);
 376+ this.parser.tokenizer.content_model = HTML5.Models.CDATA;
 377+}
 378+
 379+p.prototype.startTagTable = function(name, attributes) {
 380+ if(this.inScope('p')) this.processEndTag('p');
 381+ this.tree.insert_element(name, attributes);
 382+ this.parser.newPhase('inTable');
 383+}
 384+
 385+p.prototype.startTagVoidFormatting = function(name, attributes) {
 386+ this.tree.reconstructActiveFormattingElements();
 387+ this.tree.insert_element(name, attributes);
 388+ this.tree.pop_element();
 389+}
 390+
 391+p.prototype.startTagHr = function(name, attributes) {
 392+ if(this.inScope('p')) this.endTagP('p');
 393+ this.tree.insert_element(name, attributes);
 394+ this.tree.pop_element();
 395+}
 396+
 397+p.prototype.startTagImage = function(name, attributes) {
 398+ // No, really...
 399+ this.parse_error('unexpected-start-tag-treated-as', {originalName: 'image', newName: 'img'});
 400+ this.processStartTag('img', attributes);
 401+}
 402+
 403+p.prototype.startTagInput = function(name, attributes) {
 404+ this.tree.reconstructActiveFormattingElements();
 405+ this.tree.insert_element(name, attributes);
 406+ if(this.tree.formPointer) {
 407+ // XXX Not sure what to do here
 408+ }
 409+ this.tree.pop_element();
 410+}
 411+
 412+p.prototype.startTagIsindex = function(name, attributes) {
 413+ this.parse_error('deprecated-tag', {name: 'isindex'});
 414+ if(this.tree.formPointer) return;
 415+ this.processStartTag('form');
 416+ this.processStartTag('hr');
 417+ this.processStartTag('p');
 418+ this.processStartTag('label');
 419+ this.processCharacters("This is a searchable index. Insert your search keywords here: ");
 420+ attributes.push({nodeName: 'name', nodeValue: 'isindex'})
 421+ this.processStartTag('input', attributes);
 422+ this.processEndTag('label');
 423+ this.processEndTag('p');
 424+ this.processStartTag('hr');
 425+ this.processEndTag('form');
 426+}
 427+
 428+p.prototype.startTagTextarea = function(name, attributes) {
 429+ // XXX Form element pointer checking here as well...
 430+ this.tree.insert_element(name, attributes)
 431+ this.parser.tokenizer.content_model = HTML5.Models.RCDATA;
 432+ this.dropNewline = true
 433+}
 434+
 435+p.prototype.startTagCdata = function(name, attributes) {
 436+ this.tree.insert_element(name, attributes)
 437+ this.parser.tokenizer.content_model = HTML5.Models.CDATA;
 438+}
 439+
 440+p.prototype.startTagSelect = function(name, attributes) {
 441+ this.tree.reconstructActiveFormattingElements();
 442+ this.tree.insert_element(name, attributes);
 443+
 444+ var phaseName = this.parser.phaseName;
 445+ if(phaseName == 'inTable' || phaseName == 'inCaption'
 446+ || phaseName == 'inColumnGroup'
 447+ || phaseName == 'inTableBody'
 448+ || phaseName == 'inRow'
 449+ || phaseName == 'inCell') {
 450+ this.parser.newPhase('inSelectInTable');
 451+ } else {
 452+ this.parser.newPhase('inSelect');
 453+ }
 454+}
 455+
 456+p.prototype.startTagMisplaced = function(name, attributes) {
 457+ this.parse_error('unexpected-start-tag-ignored', {name: name});
 458+}
 459+
 460+p.prototype.endTagMisplaced = function(name) {
 461+ // This handles elements with end tags in other insertion modes.
 462+ this.parse_error("unexpected-end-tag", {name: name})
 463+}
 464+
 465+p.prototype.endTagBr = function(name) {
 466+ this.parse_error("unexpected-end-tag-treated-as", {originalName: "br", newName: "br element"})
 467+ this.tree.reconstructActiveFormattingElements()
 468+ this.tree.insert_element(name, [])
 469+ this.tree.pop_element()
 470+
 471+}
 472+
 473+p.prototype.startTagOptionOptgroup = function(name, attributes) {
 474+ if(this.inScope('option')) endTagOther('option');
 475+ this.tree.reconstructActiveFormattingElements();
 476+ this.tree.insert_element(name, attributes);
 477+}
 478+
 479+p.prototype.startTagNew = function(name, attributes) {
 480+ this.startTagOther(name, attributes);
 481+}
 482+
 483+p.prototype.startTagOther = function(name, attributes) {
 484+ this.tree.reconstructActiveFormattingElements();
 485+ this.tree.insert_element(name, attributes);
 486+}
 487+
 488+p.prototype.endTagOther = function endTagOther(name) {
 489+ var nodes = this.tree.open_elements;
 490+ for(var eli = nodes.length - 1; eli > 0; eli--) {
 491+ var currentNode = nodes[eli];
 492+ if(nodes[eli].tagName.toLowerCase() == name) {
 493+ this.tree.generateImpliedEndTags();
 494+ if(this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() != name) {
 495+ this.parse_error('unexpected-end-tag', {name: name});
 496+ }
 497+
 498+ this.tree.remove_open_elements_until(function(el) {
 499+ return el == currentNode;
 500+ });
 501+
 502+ break;
 503+ } else {
 504+
 505+ if(HTML5.SPECIAL_ELEMENTS.concat(HTML5.SCOPING_ELEMENTS).indexOf(nodes[eli].tagName.toLowerCase()) != -1) {
 506+ this.parse_error('unexpected-end-tag', {name: name});
 507+ break;
 508+ }
 509+ }
 510+ }
 511+}
 512+
 513+p.prototype.startTagMath = function(name, attributes) {
 514+ this.tree.reconstructActiveFormattingElements();
 515+ attributes = this.adjust_mathml_attributes(attributes);
 516+ attributes = this.adjust_foreign_attributes(attributes);
 517+ this.tree.insert_foreign_element(name, attributes, 'math');
 518+ if(false) {
 519+ // If the token has its self-closing flag set, pop the current node off
 520+ // the stack of open elements and acknowledge the token's self-closing flag
 521+ } else {
 522+ this.parser.secondary_phase = this.parser.phase;
 523+ this.parser.newPhase('inForeignContent');
 524+ }
 525+}
 526+
 527+p.prototype.startTagSVG = function(name, attributes) {
 528+ this.tree.reconstructActiveFormattingElements();
 529+ attributes = this.adjust_svg_attributes(attributes);
 530+ attributes = this.adjust_foreign_attributes(attributes);
 531+ this.tree.insert_foreign_element(name, attributes, 'svg');
 532+ if(false) {
 533+ // If the token has its self-closing flag set, pop the current node off
 534+ // the stack of open elements and acknowledge the token's self-closing flag
 535+ } else {
 536+ this.parser.secondary_phase = this.parser.phase;
 537+ this.parser.newPhase('inForeignContent');
 538+ }
 539+}
 540+
 541+p.prototype.endTagP = function(name) {
 542+ if(this.inScope('p')) this.tree.generateImpliedEndTags('p');
 543+ if(!this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() == 'p')
 544+ this.parse_error('unexpected-end-tag', {name: 'p'});
 545+ if(this.inScope('p')) {
 546+ while(this.inScope('p')) this.tree.pop_element();
 547+ } else {
 548+ this.startTagCloseP('p', {});
 549+ this.endTagP('p');
 550+ }
 551+}
 552+
 553+p.prototype.endTagBody = function(name) {
 554+ if(this.tree.open_elements[1].tagName.toLowerCase() != 'body') {
 555+ // inner_html case
 556+ this.parse_error('unexpected-end-tag', {name: 'body'});
 557+ return;
 558+ }
 559+
 560+ if(this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() != 'body') {
 561+ this.parse_error('expected-one-end-tag-but-got-another', {
 562+ expectedName: 'body',
 563+ gotName: this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase()
 564+ });
 565+ }
 566+ this.parser.newPhase('afterBody');
 567+}
 568+
 569+p.prototype.endTagHtml = function(name) {
 570+ this.endTagBody(name);
 571+ if(!this.inner_html) this.parser.phase.processEndTag(name);
 572+}
 573+
 574+p.prototype.endTagBlock = function(name) {
 575+ if(this.inScope(name)) this.tree.generateImpliedEndTags();
 576+ if(!this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() == 'name') {
 577+ this.parse_error('end-tag-too-early', {name: name});
 578+ }
 579+ if(this.inScope(name)) this.tree.remove_open_elements_until(name);
 580+}
 581+
 582+p.prototype.endTagForm = function(name) {
 583+ if(this.inScope(name)) {
 584+ this.tree.generateImpliedEndTags();
 585+ }
 586+
 587+ if(this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() != name) {
 588+ this.parse_error('end-tag-too-early-ignored', {name: 'form'});
 589+ } else {
 590+ this.tree.pop_element();
 591+ }
 592+ this.tree.formPointer = null;
 593+}
 594+
 595+p.prototype.endTagListItem = function(name) {
 596+ if(this.inScope(name)) this.tree.generateImpliedEndTags(name);
 597+ if(this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() != name)
 598+ this.parse_error('end-tag-too-early', {name: name});
 599+ if(this.inScope(name)) this.tree.remove_open_elements_until(name);
 600+}
 601+
 602+p.prototype.endTagHeading = function(name) {
 603+ for(i in HTML5.HEADING_ELEMENTS) {
 604+ var el = HTML5.HEADING_ELEMENTS[i];
 605+ if(this.inScope(el)) {
 606+ this.tree.generateImpliedEndTags();
 607+ break;
 608+ }
 609+ }
 610+
 611+ if(this.tree.open_elements[this.tree.open_elements.length - 1].tagName.toLowerCase() != name)
 612+ this.parse_error('end-tag-too-early', {name: name});
 613+
 614+ for(i in HTML5.HEADING_ELEMENTS) {
 615+ var el = HTML5.HEADING_ELEMENTS[i];
 616+ if(this.inScope(el)) {
 617+ this.tree.remove_open_elements_until(function(e) {
 618+ return HTML5.HEADING_ELEMENTS.indexOf(e.tagName.toLowerCase()) != -1
 619+ });
 620+ break;
 621+ }
 622+ }
 623+}
 624+
 625+p.prototype.endTagFormatting = function(name) {
 626+ while(true) {
 627+ var afeElement = this.tree.elementInActiveFormattingElements(name);
 628+ if(!afeElement || (this.tree.open_elements.indexOf(afeElement) != -1
 629+ && !this.inScope(afeElement.tagName.toLowerCase()))) {
 630+ this.parse_error('adoption-agency-1.1', {name: name});
 631+ return;
 632+ } else if(this.tree.open_elements.indexOf(afeElement) == -1) {
 633+ this.parse_error('adoption-agency-1.2', {name: name});
 634+ this.tree.activeFormattingElements.splice(this.tree.activeFormattingElements.indexOf(afeElement), 1);
 635+ return;
 636+ }
 637+
 638+ if(afeElement != this.tree.open_elements[this.tree.open_elements.length - 1]) {
 639+ this.parse_error('adoption-agency-1.3', {name: name});
 640+ }
 641+
 642+ // Start of the adoption agency algorithm proper
 643+ var afeIndex = this.tree.open_elements.indexOf(afeElement);
 644+ var furthestBlock = null;
 645+ var els = this.tree.open_elements.slice(afeIndex);
 646+ var len = els.length;
 647+ for(var i = 0; i < len; i++) {
 648+ var element = els[i];
 649+ if(HTML5.SPECIAL_ELEMENTS.concat(HTML5.SCOPING_ELEMENTS).indexOf(element.tagName.toLowerCase()) != -1) {
 650+ furthestBlock = element;
 651+ break;
 652+ }
 653+ }
 654+
 655+ if(!furthestBlock) {
 656+ var element = this.tree.remove_open_elements_until(function(el) {
 657+ return el == afeElement;
 658+ });
 659+ this.tree.activeFormattingElements.splice(this.tree.activeFormattingElements.indexOf(element), 1);
 660+ return;
 661+ }
 662+
 663+
 664+ var commonAncestor = this.tree.open_elements[afeIndex - 1];
 665+
 666+ var bookmark = this.tree.activeFormattingElements.indexOf(afeElement);
 667+
 668+ var lastNode;
 669+ var node;
 670+ lastNode = node = furthestBlock;
 671+
 672+ while(true) {
 673+ node = this.tree.open_elements[this.tree.open_elements.indexOf(node) - 1];
 674+ while(this.tree.activeFormattingElements.indexOf(node) == -1) {
 675+ var tmpNode = node;
 676+ node = this.tree.open_elements[this.tree.open_elements.indexOf(node) - 1];
 677+ this.tree.open_elements.splice(this.tree.open_elements.indexOf(tmpNode), 1);
 678+ }
 679+
 680+ if(node == afeElement) break;
 681+
 682+ if(lastNode == furthestBlock) {
 683+ bookmark = this.tree.activeFormattingElements.indexOf(node) + 1;
 684+ }
 685+
 686+ var cite = node.parentNode;
 687+
 688+ if(node.hasChildNodes()) {
 689+ var clone = node.cloneNode();
 690+ this.tree.activeFormattingElements[this.tree.activeFormattingElements.indexOf(node)] = clone;
 691+ this.tree.open_elements[this.tree.open_elements.indexOf(node)] = clone;
 692+ node = clone;
 693+ }
 694+
 695+ if(lastNode.parent) lastNode.parent.removeChild(lastNode);
 696+ node.appendChild(lastNode);
 697+5
 698+ lastNode = node
 699+
 700+ }
 701+
 702+ if(lastNode.parent) lastNode.parent.removeChild(lastNode);
 703+ commonAncestor.appendChild(lastNode);
 704+
 705+ clone = afeElement.cloneNode();
 706+
 707+ this.tree.reparentChildren(furthestBlock, clone);
 708+
 709+ furthestBlock.appendChild(clone);
 710+
 711+ this.tree.activeFormattingElements.splice(this.tree.activeFormattingElements.indexOf(afeElement), 1);
 712+ this.tree.activeFormattingElements.splice(Math.min(bookmark, this.tree.activeFormattingElements.length), 0, clone);
 713+
 714+ this.tree.open_elements.splice(this.tree.open_elements.indexOf(afeElement), 1);
 715+ this.tree.open_elements.splice(this.tree.open_elements.indexOf(furthestBlock) + 1, 0, clone);
 716+
 717+ }
 718+}
 719+
 720+p.prototype.addFormattingElement = function(name, attributes) {
 721+ this.tree.insert_element(name, attributes);
 722+ this.tree.activeFormattingElements.push(this.tree.open_elements[this.tree.open_elements.length - 1]);
 723+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/in_body_phase.js
___________________________________________________________________
Added: svn:eol-style
1724 + native
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser/before_head_phase.js
@@ -0,0 +1,59 @@
 2+"use strict";
 3+var Phase = require('./phase').Phase;
 4+
 5+var start_tag_handlers = {
 6+ html: 'startTagHtml',
 7+ head: 'startTagHead',
 8+ '-default': 'startTagOther',
 9+}
 10+
 11+var end_tag_handlers = {
 12+ html: 'endTagImplyHead',
 13+ head: 'endTagImplyHead',
 14+ body: 'endTagImplyHead',
 15+ br: 'endTagImplyHead',
 16+ p: 'endTagImplyHead',
 17+ '-default': 'endTagOther',
 18+}
 19+
 20+exports.Phase = p = function (parser, tree) {
 21+ Phase.call(this, parser, tree);
 22+ this.start_tag_handlers = start_tag_handlers;
 23+ this.end_tag_handlers = end_tag_handlers;
 24+ this.name = 'before_head_phase';
 25+}
 26+
 27+p.prototype = new Phase;
 28+
 29+p.prototype.processEOF = function() {
 30+ this.startTagHead('head', {});
 31+ this.parser.phase.processEOF();
 32+}
 33+
 34+p.prototype.processCharacters = function(data) {
 35+ this.startTagHead('head', {});
 36+ this.parser.phase.processCharacters(data);
 37+}
 38+
 39+p.prototype.processSpaceCharacters = function(data) {
 40+}
 41+
 42+p.prototype.startTagHead = function(name, attributes) {
 43+ this.tree.insert_element(name, attributes);
 44+ this.tree.head_pointer = this.tree.open_elements[this.tree.open_elements.length - 1];
 45+ this.parser.newPhase('inHead');
 46+}
 47+
 48+p.prototype.startTagOther = function(name, attributes) {
 49+ this.startTagHead('head', {});
 50+ this.parser.phase.processStartTag(name, attributes);
 51+}
 52+
 53+p.prototype.endTagImplyHead = function(name) {
 54+ this.startTagHead('head', {});
 55+ this.parser.phase.processEndTag(name);
 56+}
 57+
 58+p.prototype.endTagOther = function(name) {
 59+ this.parse_error('end-tag-after-implied-root', {name: name});
 60+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/html5/parser/before_head_phase.js
___________________________________________________________________
Added: svn:eol-style
161 + native
Index: trunk/extensions/VisualEditor/modules/parser/pegParser.pegjs.txt
@@ -28,44 +28,8 @@
2929 };
3030
3131
 32+ var pp = function ( s ) { return JSON.stringify(s, null, 2); }
3233
33 - /* Temporary debugging help. Is there anything similar in JS or a library? */
34 - var print_r = function (arr, level) {
35 -
36 - var dumped_text = "";
37 - if (!level) level = 0;
38 -
39 - //The padding given at the beginning of the line.
40 - var level_padding = "";
41 - var bracket_level_padding = "";
42 -
43 - for (var j = 0; j < level + 1; j++) level_padding += " ";
44 - for (var b = 0; b < level; b++) bracket_level_padding += " ";
45 -
46 - if (typeof(arr) == 'object') { //Array/Hashes/Objects
47 - dumped_text += "Array\n";
48 - dumped_text += bracket_level_padding + "(\n";
49 - for (var item in arr) {
50 -
51 - var value = arr[item];
52 -
53 - if (typeof(value) == 'object') { //If it is an array,
54 - dumped_text += level_padding + "[" + item + "] => ";
55 - dumped_text += print_r(value, level + 2);
56 - } else {
57 - dumped_text += level_padding + "[" + item + "] => '" + value + "'\n";
58 - }
59 -
60 - }
61 - dumped_text += bracket_level_padding + ")\n\n";
62 - } else { //Strings/Chars/Numbers etc.
63 - dumped_text = "=>" + arr + "<=(" + typeof(arr) + ")";
64 - }
65 -
66 - return dumped_text;
67 -
68 - };
69 -
7034 // Convert list prefixes to a list of WikiDom list styles
7135 var bulletsToTypes = function (bullets) {
7236 var bTypes = [];
@@ -92,7 +56,7 @@
9357
9458 // return [text [annotations]]
9559 var extractText = function ( node, offset ) {
96 - dp("extract: " + print_r(node));
 60+ dp("extract: " + pp(node));
9761 if (typeof node === 'string') {
9862 return [node, []];
9963 } else if ($.isArray(node)) {
@@ -121,7 +85,7 @@
12286 }
12387 return texts.join('');
12488 } else {
125 - throw ("extract failed: " + print_r(node));
 89+ throw ("extract failed: " + pp(node));
12690 }
12791 };
12892 */
@@ -165,7 +129,7 @@
166130
167131 // Start of line
168132 sol = (newline / & { return pos === 0; } { return true; })
169 - cn:(comment n:newline? { return n })? {
 133+ cn:(c:comment n:newline? { return [c, n] })? {
170134 return cn;
171135 }
172136
@@ -177,7 +141,7 @@
178142 = & { blockStart = pos; return true; } b:block {
179143 b = flatten(b);
180144 var bs = b[0];
181 - dp('toplevelblock:' + print_r(b) + bs);
 145+ dp('toplevelblock:' + pp(b));
182146 if (bs.attribs === undefined) {
183147 bs.attribs = [];
184148 }
@@ -190,8 +154,16 @@
191155 = (sol space* &newline)? bl:block_lines { return bl; }
192156 / para
193157 / comment
194 - / (s:sol { return [{type: 'TEXT', value: s}]; })
 158+ / (s:sol {
 159+ if (s) {
 160+ return [s, {type: 'NEWLINE'}];
 161+ } else {
 162+ return [{type: 'NEWLINE'}];
 163+ }
 164+ }
 165+ )
195166
 167+
196168 // Block structures with start-of-line wiki syntax
197169 block_lines
198170 = h
@@ -290,7 +262,7 @@
291263 if (s !== '') {
292264 res.push(s)
293265 }
294 - //console.log('paralines' + print_r(res.concat(c, cs, [{type: 'ENDTAG', name: 'p'}])));
 266+ //console.log('paralines' + pp(res.concat(c, cs, [{type: 'ENDTAG', name: 'p'}])));
295267 return res.concat(c, cs, [{type: 'ENDTAG', name: 'p'}]);
296268 }
297269
@@ -306,9 +278,9 @@
307279
308280 // Syntax that stops inline expansion
309281 inline_breaks
310 - = //& { console.log(print_r(syntaxFlags)); return true; }
 282+ = //& { console.log(pp(syntaxFlags)); return true; }
311283 & { return syntaxFlags['table']; }
312 - a:(newline [!|] / '||' / '!!' / '|}') { dp("table break" + print_r(a)); return true; }
 284+ a:(newline [!|] / '||' / '!!' / '|}') { dp("table break" + pp(a)); return true; }
313285 / & { return syntaxFlags['italic']; } italic_marker { return true; }
314286 / & { return syntaxFlags['bold']; } bold_marker { return true; }
315287 / & { return syntaxFlags['linkdesc']; } link_end { return true; }
@@ -326,21 +298,21 @@
327299 inline
328300 = c:(text / inline_element / (!inline_breaks ch:. { return ch; }))+ {
329301 var out = [];
330 - var text = '';
 302+ var text = [];
331303 c = flatten(c);
332304 for (var i = 0; i < c.length; i++) {
333305 if (typeof c[i] == 'string') {
334 - text += c[i];
 306+ text.push(c[i]);
335307 } else {
336308 if (text.length) {
337 - out.push({ type: "TEXT", value: text });
338 - text = '';
 309+ out.push({ type: "TEXT", value: text.join('') });
 310+ text = [];
339311 }
340312 out.concat(c[i]);
341313 }
342314 }
343315 if (text.length) {
344 - out.push({ type: 'TEXT', value: text });
 316+ out.push({ type: 'TEXT', value: text.join('') });
345317 }
346318 return out;
347319 }
@@ -348,23 +320,23 @@
349321 inlineline
350322 = c:(text / !inline_breaks (inline_element / [^\n]))+ {
351323 var out = [];
352 - var text = '';
 324+ var text = [];
353325 c = flatten(c);
354326 for (var i = 0; i < c.length; i++) {
355327 if (typeof c[i] == 'string') {
356 - text += c[i];
 328+ text.push(c[i]);
357329 } else {
358330 if (text.length) {
359 - out.push({type: 'TEXT', value: text});
360 - text = '';
 331+ out.push({type: 'TEXT', value: text.join('')});
 332+ text = [];
361333 }
362334 out.push(c[i]);
363335 }
364336 }
365337 if (text.length) {
366 - out.push({type: 'TEXT', value: text});
 338+ out.push({type: 'TEXT', value: text.join('')});
367339 }
368 - //dp('inlineline out:', print_r(out));
 340+ //dp('inlineline out:', pp(out));
369341 return out;
370342 }
371343
@@ -383,7 +355,7 @@
384356 comment
385357 = '<!--' c:comment_chars* '-->'
386358 (space* newline space* comment)* {
387 - return { type: 'COMMENT', value: c.join('') };
 359+ return [{ type: 'COMMENT', value: c.join('') }];
388360 }
389361
390362 comment_chars
@@ -623,20 +595,9 @@
624596
625597 lists = es:(dtdd / li)+
626598 {
627 - // Flatten es
628 - var esLen = es.length;
629 - var flatEs = [];
630 - for (var i = 0; i < esLen; i++) {
631 - var ei = es[i];
632 - if ($.isArray(ei)) {
633 - flatEs = flatEs.concat(ei);
634 - } else {
635 - flatEs.push(ei);
636 - }
637 - }
638599 return [ { type: 'TAG',
639600 name: 'ul'} ] // XXX!!
640 - .concat(flatEs
 601+ .concat(flatten(es)
641602 ,[{ type: 'ENDTAG', name: 'ul' }]);
642603 }
643604
@@ -685,21 +646,22 @@
686647 = tas:table_start c:table_caption? b:table_body? table_end {
687648 var res = {type: 'TAG', name: 'table'}
688649 var body = b !== '' ? b : [];
 650+ dp("body: " + pp(body));
689651 if (tas.length > 0) {
690652 // FIXME: actually parse and build structure
691653 res.attribs = [['data-unparsed', tas.join('')]];
692654 }
693655
694 - if (c !== '') {
 656+ if (c != '') {
695657 var caption = [{type: 'TAG', name: 'caption'}]
696658 .concat(c, [{type: 'ENDTAG', name: 'caption'}]);
697659 } else {
698660 var caption = [];
699 - //dp(print_r(res));
700 -
701 - return [res].concat(caption, body,
702 - [{type: 'ENDTAG', name: 'table'}]);
703661 }
 662+ //dp(pp(res));
 663+
 664+ return [res].concat(caption, body,
 665+ [{type: 'ENDTAG', name: 'table'}]);
704666 }
705667
706668 table_start
@@ -708,7 +670,7 @@
709671 & { setFlag('table'); return true; }
710672 ta:table_attribs*
711673 space* {
712 - //dp("table_start " + print_r(ta) + ", pos:" + pos);
 674+ //dp("table_start " + pp(ta) + ", pos:" + pos);
713675 return ta;
714676 }
715677 / sol "{|" { clearFlag('table'); return null; }
@@ -726,11 +688,11 @@
727689 = & { dp("table_body enter"); return true; }
728690 firstrow:table_firstrow otherrows:table_row* {
729691 /* dp('table first and otherrows: '
730 - * + print_r([firstrow].concat(otherrows))); */
 692+ * + pp([firstrow].concat(otherrows))); */
731693 return [firstrow].concat(otherrows);
732694 }
733695 / otherrows:table_row* {
734 - //dp('table otherrows: ' + print_r(otherrows));
 696+ //dp('table otherrows: ' + pp(otherrows));
735697 return otherrows;
736698 }
737699
@@ -754,7 +716,7 @@
755717 ! [}+-]
756718 a:thtd_attribs?
757719 td:(!inline_breaks anyblock)* {
758 - dp("table data result: " + print_r(td) + ", attribts: " + print_r(a));
 720+ dp("table data result: " + pp(td) + ", attribts: " + pp(a));
759721 return [{ type: 'TAG', name: 'td', attribs: [['data-unparsed', a]]}]
760722 .concat(td, [{type: 'ENDTAG', name: 'td'}]);
761723 }

Status & tagging log