r106213 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r106212‎ | r106213 | r106214 >
Date:17:25, 14 December 2011
Author:gwicke
Status:deferred
Tags:
Comment:
Add ability to pass through JSON data to WikiDom in data-json-* attributes,
and fix parser to actually parse the Barack Obama article except for one table
with nested templates at the start-of-line.
Modified paths:
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.DOMConverter.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.HTML5TreeBuilder.node.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt (modified) (history)
  • /trunk/extensions/VisualEditor/tests/parser/parserTests.js (modified) (history)

Diff [purge]

Index: trunk/extensions/VisualEditor/tests/parser/parserTests.js
@@ -203,7 +203,7 @@
204204
205205 // Build a DOM tree from tokens using the HTML tree builder/parser.
206206 pt.buildTree( tokens, treeBuilder );
207 -
 207+
208208 // Perform post-processing on DOM.
209209 pt.postProcessor.doPostProcess(treeBuilder.document);
210210
@@ -447,9 +447,8 @@
448448 //});
449449 //var res = es.HtmlSerializer.stringify(tokens,environment);
450450
451 - //console.log(JSON.stringify(tokens));
452451 //Slightly better token output debugging:
453 - //console.log( util.inspect( tokens, false, null ).yellow);
 452+ //console.log( util.inspect( res.tokens, false, null ).yellow);
454453
455454 // Transform tokens using the TokenTransformDispatcher. When done, the
456455 // TokenTransformDispatcher calls buildTree() and checkResult() with the
@@ -457,6 +456,9 @@
458457
459458 // Append the end
460459 res.tokens.push({type: 'END'});
 460+
 461+ //console.log(JSON.stringify(res.tokens, null, 2));
 462+
461463 this.tokenDispatcher.transformTokens( res.tokens );
462464 }
463465 };
@@ -554,6 +556,12 @@
555557 for (var i = 0, length = tokens.length; i < length; i++) {
556558 treeBuilder.processToken(tokens[i]);
557559 }
 560+
 561+ // FIXME HACK: For some reason the end token is not processed sometimes,
 562+ // which normally fixes the body reference up.
 563+ treeBuilder.document.body = treeBuilder.parser
 564+ .document.getElementsByTagName('body')[0];
 565+
558566 };
559567
560568 /**
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.HTML5TreeBuilder.node.js
@@ -25,7 +25,7 @@
2626 // html tree builder by emitting the token.
2727 FauxHTML5.TreeBuilder.prototype.processToken = function (token) {
2828 var att = function (maybeAttribs) {
29 - if ( $.isArray(maybeAttribs) ) {
 29+ if ( $.isArray( maybeAttribs ) ) {
3030 var atts = [];
3131 for(var i = 0, length = maybeAttribs.length; i < length; i++) {
3232 var att = maybeAttribs[i];
@@ -62,12 +62,16 @@
6363 break;
6464 case "END":
6565 this.emit('end');
 66+ console.log("at end..");
6667 this.document = this.parser.document;
67 - // HACK: This should not be needed really.
68 - this.document.body = this.document.getElementsByTagName('body')[0];
 68+ if ( ! this.document.body ) {
 69+ // HACK: This should not be needed really.
 70+ this.document.body = this.parser.document.getElementsByTagName('body')[0];
 71+ }
6972 break;
7073 case "NEWLINE":
7174 //this.emit('end');
 75+ this.emit('token', {type: 'Characters', data: "\n"});
7276 break;
7377 default:
7478 console.log("Unhandled token: " + JSON.stringify(token));
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.DOMConverter.js
@@ -395,8 +395,11 @@
396396 var attrib = attribs.item(i),
397397 key = attrib.name;
398398 console.log('key: ' + key);
399 - if ( key.match( /^data-/ ) ) {
 399+ if ( key.match( /^data-json-/ ) ) {
400400 // strip data- prefix from data-*
 401+ out[key.replace( /^data-json-/, '' )] = JSON.parse(attrib.value);
 402+ } else if ( key.match( /^data-/ ) ) {
 403+ // strip data- prefix from data-*
401404 out[key.replace( /^data-/, '' )] = attrib.value;
402405 } else {
403406 // prefix html properties with html/
@@ -412,8 +415,11 @@
413416 for ( var i = 0, l = attribs.length; i < l; i++ ) {
414417 var attrib = attribs.item(i),
415418 key = attrib.name;
416 - if ( key.match( /^data-/ ) ) {
 419+ if ( key.match( /^data-json-/ ) ) {
417420 // strip data- prefix from data-*
 421+ out[key.replace( /^data-json-/, '' )] = JSON.parse(attrib.value);
 422+ } else if ( key.match( /^data-/ ) ) {
 423+ // strip data- prefix from data-*
418424 out[key.replace( /^data-/, '' )] = attrib.value;
419425 } else {
420426 // pass through a few whitelisted keys
Index: trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt
@@ -644,7 +644,7 @@
645645 params:(newline? "|" newline? p:template_param { return p })*
646646 "}}" {
647647 var obj = { type: 'TAG', name: 'template',
648 - attribs: [['target', target]],
 648+ attribs: [['data-target', target]],
649649 args: {}}
650650 if (params && params.length) {
651651 var position = 1;
@@ -659,7 +659,7 @@
660660 }
661661 // HACK: temporarily also push the args into an attribute
662662 // (just for debugging)
663 - obj.attribs.push(['data-args', JSON.stringify(obj.args)]);
 663+ obj.attribs.push(['data-json-args', JSON.stringify(obj.args)]);
664664 }
665665 // Should actually use a self-closing tag here, but the Node HTML5
666666 // parser only recognizes known self-closing tags for now, so use an
@@ -687,7 +687,7 @@
688688 };
689689 if (params && params.length) {
690690 // HACK, not final.
691 - obj.attribs.push(['data-args', JSON.stringify(params)]);
 691+ obj.attribs.push(['data-json-args', JSON.stringify(params)]);
692692 }
693693 return obj;
694694 }

Status & tagging log