r106281 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r106280‎ | r106281 | r106282 >
Date:23:38, 14 December 2011
Author:gwicke
Status:deferred
Tags:
Comment:
A collection of small bug fixes to the grammar, Cite, the Token format
converter and the HTML DOM -> WikiDom converter. The tokenizer now digests all
parserTests.
Modified paths:
  • /trunk/extensions/VisualEditor/modules/parser/ext.Cite.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.DOMConverter.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.HTML5TreeBuilder.node.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt (modified) (history)
  • /trunk/extensions/VisualEditor/tests/parser/parserTests.js (modified) (history)

Diff [purge]

Index: trunk/extensions/VisualEditor/tests/parser/parserTests.js
@@ -63,7 +63,7 @@
6464 _import(pj('parser', 'mediawiki.HTML5TreeBuilder.node.js'), ['FauxHTML5']);
6565 _import(pj('parser', 'mediawiki.DOMPostProcessor.js'), ['DOMPostProcessor']);
6666
67 -_import(pj('parser', 'mediawiki.DOMConverter'), ['DOMConverter']);
 67+_import(pj('parser', 'mediawiki.DOMConverter.js'), ['DOMConverter']);
6868
6969 _import(pj('parser', 'ext.core.QuoteTransformer.js'), ['QuoteTransformer']);
7070
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.HTML5TreeBuilder.node.js
@@ -55,6 +55,9 @@
5656 this.emit('token', {type: 'StartTag',
5757 name: token.name,
5858 data: att(token.attribs)});
 59+ this.emit('token', {type: 'EndTag',
 60+ name: token.name,
 61+ data: att(token.attribs)});
5962 break;
6063 case "COMMENT":
6164 this.emit('token', {type: 'Comment',
@@ -62,7 +65,6 @@
6366 break;
6467 case "END":
6568 this.emit('end');
66 - console.log("at end..");
6769 this.document = this.parser.document;
6870 if ( ! this.document.body ) {
6971 // HACK: This should not be needed really.
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.DOMConverter.js
@@ -178,7 +178,7 @@
179179 case Node.ELEMENT_NODE:
180180 // Call a handler for the particular node type
181181 var hi = this.getHTMLHandlerInfo( cnode.nodeName );
182 - var res = hi.handler.call(this, cnode, 0, hi.type );
 182+ var res = hi.handler.call(this, cnode, hi.type );
183183 if ( hi.attribs ) {
184184 $.extend( res.node.attributes, hi.attribs );
185185 }
@@ -208,15 +208,17 @@
209209 * @param {Int} WikiDom offset within a block
210210 * @returns {Object} WikiDom object
211211 */
212 -DOMConverter.prototype._convertHTMLBranch = function ( node, offset, type ) {
 212+DOMConverter.prototype._convertHTMLBranch = function ( node, type ) {
 213+
213214 var children = node.childNodes,
214215 wnode = {
215216 type: type,
216217 attributes: this._HTMLPropertiesToWikiAttributes( node ),
217218 children: []
218 - };
219 -
220 - var parNode = null;
 219+ },
 220+ parNode = null,
 221+ offset = 0,
 222+ res;
221223
222224 function newPara () {
223225 offset = 0;
@@ -238,29 +240,29 @@
239241 var annotationtype = this.getHTMLAnnotationType( cnode.nodeName );
240242 if ( annotationtype ) {
241243 if ( !parNode ) {
242 - newPara()
 244+ newPara();
243245 }
244 - var res = this._convertHTMLAnnotation( cnode, offset, annotationtype );
 246+ offset = 0;
 247+ res = this._convertHTMLAnnotation( cnode, 0, annotationtype );
245248 //console.log( 'res leaf: ' + JSON.stringify(res, null, 2));
246249 offset += res.text.length;
247250 parNode.content.text += res.text;
248251 //console.log( 'res annotations: ' + JSON.stringify(res, null, 2));
249252 parNode.content.annotations = parNode.content.annotations
250253 .concat( res.annotations );
251 - break;
252254 } else {
253255 // Close last paragraph, if still open.
254256 parNode = null;
255257 // Call a handler for the particular node type
256258 var hi = this.getHTMLHandlerInfo( cnode.nodeName );
257 - var res = hi.handler.call(this, cnode, 0, hi.type );
 259+ res = hi.handler.call(this, cnode, hi.type );
258260 if ( hi.attribs ) {
259261 $.extend( res.node.attributes, hi.attribs );
260262 }
261263 wnode.children.push( res.node );
262264 offset = res.offset;
263 - break;
264265 }
 266+ break;
265267 case Node.TEXT_NODE:
266268 if ( !parNode ) {
267269 newPara();
@@ -290,9 +292,8 @@
291293 * @param {Int} WikiDom offset within a block
292294 * @returns {Object} WikiDom object
293295 */
294 -DOMConverter.prototype._convertHTMLLeaf = function ( node, offset, type ) {
295 - // XXX Does the offset in every leaf start at zero?
296 - offset = 0;
 296+DOMConverter.prototype._convertHTMLLeaf = function ( node, type ) {
 297+ var offset = 0;
297298
298299 var children = node.childNodes,
299300 wnode = {
@@ -394,7 +395,6 @@
395396 for ( var i = 0, l = attribs.length; i < l; i++ ) {
396397 var attrib = attribs.item(i),
397398 key = attrib.name;
398 - console.log('key: ' + key);
399399 if ( key.match( /^data-json-/ ) ) {
400400 // strip data- prefix from data-*
401401 out[key.replace( /^data-json-/, '' )] = JSON.parse(attrib.value);
@@ -426,6 +426,9 @@
427427 // XXX: This subsets html DOM
428428 if ( ['title'].indexOf(key) != -1 ) {
429429 out[key] = attrib.value;
 430+ } else {
 431+ // prefix key with 'html/'
 432+ out['html/' + key] = attrib.value;
430433 }
431434 }
432435 }
Index: trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt
@@ -643,9 +643,12 @@
644644 = "{{" target:template_target
645645 params:(newline? "|" newline? p:template_param { return p })*
646646 "}}" {
647 - var obj = { type: 'TAG', name: 'template',
648 - attribs: [['data-target', target]],
649 - args: {}}
 647+ var obj = {
 648+ type: 'SELFCLOSINGTAG',
 649+ name: 'template',
 650+ attribs: [['data-target', target]],
 651+ args: {}
 652+ };
650653 if (params && params.length) {
651654 var position = 1;
652655 for ( var i = 0, l = params.length; i < l; i++ ) {
@@ -706,18 +709,19 @@
707710 wikilink
708711 = "[["
709712 ! url
710 - target:link_target text:("|" lt:link_text { return lt })* "]]" suffix:text? {
 713+ target:link_target ltext:("|" lt:link_text { return lt })* "]]"
 714+ suffix:(![ \]] text_char)* {
711715 var obj = {
712716 type: 'TAG',
713717 name: 'a',
714718 attribs: [['data-type', 'internal']]
715719 };
716720 obj.attribs.push(['href', target]);
717 - if (text && text.length) {
718 - var textTokens = text;
 721+ if (ltext && ltext.length) {
 722+ var textTokens = ltext;
719723 } else {
720 - if (suffix !== '') {
721 - target += suffix;
 724+ if (suffix) {
 725+ target += suffix.join('');
722726 }
723727 var textTokens = [{type: 'TEXT', value: target}];
724728 }
@@ -735,13 +739,13 @@
736740 )* { return h.join(''); }
737741
738742 link_text
739 - = h:( & { return setFlag('linkdesc'); }
740 - x:inlineline { return x }
741 - )* {
742 - clearFlag('linkdesc')
743 - return h;
744 - }
745 - / & { clearFlag('linkdesc') } { return null; }
 743+ = & { return setFlag('linkdesc'); }
 744+ h:inlineline
 745+ {
 746+ clearFlag('linkdesc');
 747+ return h;
 748+ }
 749+ / & { clearFlag('linkdesc'); return false }
746750
747751 link_end = "]]"
748752
@@ -1002,7 +1006,7 @@
10031007 }
10041008
10051009 table_firstrow
1006 - = td:table_data+ {
 1010+ = td:(table_data / table_header)+ {
10071011 //dp('firstrow: ' + pp(td));
10081012 return [{ type: 'TAG', name: 'tr' }]
10091013 .concat(td, [{type: 'ENDTAG', name: 'tr'}]);
Index: trunk/extensions/VisualEditor/modules/parser/ext.Cite.js
@@ -7,6 +7,8 @@
88 function Cite () {
99 this.refGroups = {};
1010 this.refTokens = [];
 11+ // Within ref block
 12+ this.isActive = false;
1113 }
1214
1315 /**
@@ -111,17 +113,27 @@
112114
113115 var token = tokenCTX.token;
114116 // Collect all tokens between ref start and endtag
115 - if ( token.type === 'TAG' && token.name.toLowerCase() === 'ref' ) {
 117+ if ( ! this.isActive &&
 118+ token.type === 'TAG' &&
 119+ token.name.toLowerCase() === 'ref' ) {
116120 this.curRef = tokenCTX.token;
117121 // Prepend self for 'any' token type
118122 tokenCTX.dispatcher.prependListener(this.onRefCB, 'any' );
119123 tokenCTX.token = null;
 124+ this.isActive = true;
120125 return tokenCTX;
121 - } else if ( token.type === 'ENDTAG' && token.name.toLowerCase() === 'ref' ) {
 126+ } else if ( this.isActive &&
 127+ // Also accept really broken ref close tags..
 128+ ['TAG', 'ENDTAG', 'SELFCLOSINGTAG'].indexOf(token.type) >= 0 &&
 129+ token.name.toLowerCase() === 'ref'
 130+ )
 131+ {
 132+ this.isActive = false;
122133 tokenCTX.dispatcher.removeListener(this.onRefCB, 'any' );
123134 // fall through for further processing!
124135 } else {
125136 // Inside ref block: Collect all other tokens in refTokens and abort
 137+ console.log(JSON.stringify(tokenCTX.token, null, 2));
126138 this.refTokens.push(tokenCTX.token);
127139 tokenCTX.token = null;
128140 return tokenCTX;
@@ -287,8 +299,9 @@
288300 // Clean up
289301 this.refGroups = {};
290302 this.refTokens = [];
 303+ this.isActive = false;
291304 return tokenCTX;
292 -}
 305+};
293306
294307 if (typeof module == "object") {
295308 module.exports.Cite = Cite;

Status & tagging log