r110495 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r110494‎ | r110495 | r110496 >
Date:16:30, 1 February 2012
Author:gwicke
Status:deferred
Tags:
Comment:
Change token format to plain strings for text tokens, and specific objects for
other tokens. This is only the first half of the conversion. The next step is
to drop the type attribute on most tokens and match on the constructor in the
token transform machinery.
Modified paths:
  • /trunk/extensions/VisualEditor/modules/parser/ext.Cite.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/ext.core.ParserFunctions.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/ext.core.PostExpandParagraphHandler.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/ext.core.QuoteTransformer.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/ext.core.Sanitizer.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/ext.core.TemplateHandler.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.HTML5TreeBuilder.node.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.TokenTransformManager.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.environment.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt (modified) (history)

Diff [purge]

Index: trunk/extensions/VisualEditor/modules/parser/ext.core.QuoteTransformer.js
@@ -88,7 +88,7 @@
8989 this.bolds.push(this.currentChunk);
9090 break;
9191 case 4:
92 - this.currentChunk.push( {type: 'TEXT', value: "'"} );
 92+ this.currentChunk.push( "'" );
9393 this._startNewChunk();
9494 this.currentChunk.push(ctx);
9595 this.bolds.push(this.currentChunk);
@@ -108,7 +108,7 @@
109109 break;
110110 default: // longer than 5, only use the last 5 ticks
111111 var newvalue = token.value.substr(0, qlen - 5 );
112 - this.currentChunk.push ( {type: 'TEXT', value: newvalue} );
 112+ this.currentChunk.push ( newvalue );
113113 this._startNewChunk();
114114 this.currentChunk.push(ctx);
115115 this.italics.push(this.currentChunk);
@@ -153,9 +153,9 @@
154154 var ctx = this.bolds[j][0];
155155 //console.log("balancing!" + JSON.stringify(ctx.prevToken, null, 2));
156156 if (ctx.prevToken) {
157 - if (ctx.prevToken.type === 'TEXT') {
158 - var lastchar = prevToken.value[ctx.prevToken.value.length - 1],
159 - secondtolastchar = ctx.prevToken.value[ctx.prevToken.value.length - 2];
 157+ if (ctx.prevToken.constructor === String) {
 158+ var lastchar = prevToken[ctx.prevToken.length - 1],
 159+ secondtolastchar = ctx.prevToken[ctx.prevToken.length - 2];
160160 if (lastchar === ' ' && firstspace === -1) {
161161 firstspace = j;
162162 } else if (lastchar !== ' ') {
@@ -215,7 +215,7 @@
216216 // italic tags. In the process, one quote needs to be converted back to text.
217217 QuoteTransformer.prototype.convertBold = function ( i ) {
218218 var chunk = this.bolds[i],
219 - textToken = { type: 'TEXT', value: "'" };
 219+ textToken = "'";
220220 //console.log('convertbold!');
221221 if ( chunk.pos ) {
222222 this.chunks[chunk.pos - 1].push( textToken );
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.HTML5TreeBuilder.node.js
@@ -61,7 +61,7 @@
6262 if ( maybeAttribs && $.isArray( maybeAttribs ) ) {
6363 for(var i = 0, length = maybeAttribs.length; i < length; i++) {
6464 var att = maybeAttribs[i];
65 - atts.push({nodeName: att[0], nodeValue: att[1]});
 65+ atts.push({nodeName: att.k, nodeValue: att.v});
6666 }
6767 }
6868 return atts;
@@ -71,54 +71,55 @@
7272 // html tree builder by emitting the token.
7373 FauxHTML5.TreeBuilder.prototype.processToken = function (token) {
7474
75 - switch (token.type) {
76 - case "TEXT":
77 - this.emit('token', {type: 'Characters', data: token.value});
78 - break;
79 - case "TAG":
80 - this.emit('token', {type: 'StartTag',
81 - name: token.name,
82 - data: this._att(token.attribs)});
83 - break;
84 - case "ENDTAG":
85 - this.emit('token', {type: 'EndTag',
86 - name: token.name,
87 - data: this._att(token.attribs)});
88 - break;
89 - case "SELFCLOSINGTAG":
90 - this.emit('token', {type: 'StartTag',
91 - name: token.name,
92 - data: this._att(token.attribs)});
93 - if ( HTML5.VOID_ELEMENTS.indexOf( token.name.toLowerCase() ) < 0 ) {
94 - // VOID_ELEMENTS are automagically treated as self-closing by
95 - // the tree builder
 75+ if ( token.constructor === String ) {
 76+ this.emit('token', {type: 'Characters', data: token});
 77+ } else {
 78+ switch (token.type) {
 79+ case "TAG":
 80+ this.emit('token', {type: 'StartTag',
 81+ name: token.name,
 82+ data: this._att(token.attribs)});
 83+ break;
 84+ case "ENDTAG":
9685 this.emit('token', {type: 'EndTag',
9786 name: token.name,
9887 data: this._att(token.attribs)});
99 - }
100 - break;
101 - case "COMMENT":
102 - this.emit('token', {type: 'Comment',
103 - data: token.value});
104 - break;
105 - case "END":
106 - this.emit('end');
107 - this.emit('token', { type: 'EOF' } );
108 - this.document = this.parser.document;
109 - if ( ! this.document.body ) {
110 - // HACK: This should not be needed really.
111 - this.document.body = this.parser.document.getElementsByTagName('body')[0];
112 - }
113 - // Emit the document to consumers
114 - //this.emit('document', this.document);
115 - break;
116 - case "NEWLINE":
117 - //this.emit('end');
118 - //this.emit('token', {type: 'Characters', data: "\n"});
119 - break;
120 - default:
121 - console.log("Unhandled token: " + JSON.stringify(token));
122 - break;
 88+ break;
 89+ case "SELFCLOSINGTAG":
 90+ this.emit('token', {type: 'StartTag',
 91+ name: token.name,
 92+ data: this._att(token.attribs)});
 93+ if ( HTML5.VOID_ELEMENTS.indexOf( token.name.toLowerCase() ) < 0 ) {
 94+ // VOID_ELEMENTS are automagically treated as self-closing by
 95+ // the tree builder
 96+ this.emit('token', {type: 'EndTag',
 97+ name: token.name,
 98+ data: this._att(token.attribs)});
 99+ }
 100+ break;
 101+ case "COMMENT":
 102+ this.emit('token', {type: 'Comment',
 103+ data: token.value});
 104+ break;
 105+ case "END":
 106+ this.emit('end');
 107+ this.emit('token', { type: 'EOF' } );
 108+ this.document = this.parser.document;
 109+ if ( ! this.document.body ) {
 110+ // HACK: This should not be needed really.
 111+ this.document.body = this.parser.document.getElementsByTagName('body')[0];
 112+ }
 113+ // Emit the document to consumers
 114+ //this.emit('document', this.document);
 115+ break;
 116+ case "NEWLINE":
 117+ //this.emit('end');
 118+ //this.emit('token', {type: 'Characters', data: "\n"});
 119+ break;
 120+ default:
 121+ console.log("Unhandled token: " + JSON.stringify(token));
 122+ break;
 123+ }
123124 }
124125 };
125126
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.TokenTransformManager.js
@@ -151,9 +151,16 @@
152152 * fully processed). The token type change case still needs to be covered
153153 * though.
154154 */
 155+TokenTransformManager.prototype._setTokenRank = function ( token ) {
 156+};
155157 TokenTransformManager.prototype._resetTokenRank = function ( res, transformer ) {
156158 if ( res.token ) {
157159 // reset rank after type or name change
 160+
 161+ // Convert String literal to String object
 162+ if ( res.token.constructor === String && res.token.rank === undefined ) {
 163+ res.token = new String( res.token );
 164+ }
158165 if ( transformer.rank < 1 ) {
159166 res.token.rank = 0;
160167 } else {
@@ -161,6 +168,11 @@
162169 }
163170 } else if ( res.tokens && transformer.rank > 2 ) {
164171 for ( var i = 0; i < res.tokens.length; i++ ) {
 172+ var token = res.tokens[i];
 173+ // convert string literal to string object
 174+ if ( token.constructor === String && token.rank === undefined ) {
 175+ res.tokens[i] = new String( token );
 176+ }
165177 if ( res.tokens[i].rank === undefined ) {
166178 // Do not run phase 0 on newly created tokens from
167179 // phase 1.
@@ -229,10 +241,16 @@
230242 break;
231243 }
232244 // track progress on token
 245+ if ( res.token.rank === undefined && res.token.constructor === String ) {
 246+ res.token = new String ( res.token );
 247+ }
233248 res.token.rank = transformer.rank;
234249 }
235250 if ( ! aborted ) {
236251 // Mark token as fully processed.
 252+ if ( res.token.rank === undefined && res.token.constructor === String ) {
 253+ res.token = new String ( res.token );
 254+ }
237255 res.token.rank = phaseEndRank;
238256 }
239257 }
@@ -279,10 +297,16 @@
280298 aborted = true;
281299 break;
282300 }
 301+ if ( res.token.rank === undefined && res.token.constructor === String ) {
 302+ res.token = new String ( res.token );
 303+ }
283304 res.token.rank = transformer.rank;
284305 }
285306 if ( ! aborted ) {
286307 // mark token as completely processed
 308+ if ( res.token.rank === undefined && res.token.constructor === String ) {
 309+ res.token = new String ( res.token );
 310+ }
287311 res.token.rank = phaseEndRank; // need phase passed in!
288312 }
289313
@@ -418,6 +442,8 @@
419443 AsyncTokenTransformManager.prototype.onChunk = function ( tokens ) {
420444 // Set top-level callback to next transform phase
421445 var res = this.transformTokens ( tokens, this.tokenCB );
 446+ this.env.dp('AsyncTokenTransformManager onChunk res.async=' + res.async +
 447+ ' tokens=' + JSON.stringify( tokens ) );
422448
423449 if ( ! this.tailAccumulator ) {
424450 this.emit( 'chunk', res.tokens );
@@ -429,7 +455,6 @@
430456 this.tailAccumulator = res.async;
431457 this.tokenCB = res.async.getParentCB ( 'sibling' );
432458 }
433 - this.env.dp('AsyncTokenTransformManager onChunk res.async=' + res.async);
434459 //this.phase2TailCB( tokens, true );
435460
436461 // The next processed chunk should call back as a sibling to last
@@ -462,27 +487,29 @@
463488 for ( var i = 0; i < tokensLength; i++ ) {
464489 token = tokens[i];
465490
466 - switch( token.type ) {
467 - case 'TAG':
468 - case 'ENDTAG':
469 - case 'SELFCLOSINGTAG':
470 - res = this._transformTagToken( token, cb, phaseEndRank );
471 - break;
472 - case 'TEXT':
473 - res = this._transformToken( token, cb, phaseEndRank, ts.text );
474 - break;
475 - case 'COMMENT':
476 - res = this._transformToken( token, cb, phaseEndRank, ts.comment);
477 - break;
478 - case 'NEWLINE':
479 - res = this._transformToken( token, cb, phaseEndRank, ts.newline );
480 - break;
481 - case 'END':
482 - res = this._transformToken( token, cb, phaseEndRank, ts.end );
483 - break;
484 - default:
485 - res = this._transformToken( token, cb, phaseEndRank, ts.martian );
486 - break;
 491+ if ( token.constructor === String ) {
 492+ res = this._transformToken( token, cb, phaseEndRank, ts.text );
 493+ //console.log( 'transform string ' + token + ' res:' + JSON.stringify( res ) );
 494+ } else {
 495+ switch( token.type ) {
 496+ case 'TAG':
 497+ case 'ENDTAG':
 498+ case 'SELFCLOSINGTAG':
 499+ res = this._transformTagToken( token, cb, phaseEndRank );
 500+ break;
 501+ case 'COMMENT':
 502+ res = this._transformToken( token, cb, phaseEndRank, ts.comment);
 503+ break;
 504+ case 'NEWLINE':
 505+ res = this._transformToken( token, cb, phaseEndRank, ts.newline );
 506+ break;
 507+ case 'END':
 508+ res = this._transformToken( token, cb, phaseEndRank, ts.end );
 509+ break;
 510+ default:
 511+ res = this._transformToken( token, cb, phaseEndRank, ts.martian );
 512+ break;
 513+ }
487514 }
488515
489516 if( res.tokens ) {
@@ -638,29 +665,31 @@
639666
640667 for ( var i = 0; i < tokensLength; i++ ) {
641668 token = tokens[i];
 669+
 670+ if ( token.constructor === String ) {
 671+ res = this._transformToken( token, cb, this.phaseEndRank,
 672+ ts.text );
 673+ } else {
642674
643 - switch( token.type ) {
644 - case 'TAG':
645 - case 'ENDTAG':
646 - case 'SELFCLOSINGTAG':
647 - res = this._transformTagToken( token, cb, this.phaseEndRank );
648 - break;
649 - case 'TEXT':
650 - res = this._transformToken( token, cb, this.phaseEndRank,
651 - ts.text );
652 - break;
653 - case 'COMMENT':
654 - res = this._transformToken( token, cb, this.phaseEndRank, ts.comment );
655 - break;
656 - case 'NEWLINE':
657 - res = this._transformToken( token, cb, this.phaseEndRank, ts.newline );
658 - break;
659 - case 'END':
660 - res = this._transformToken( token, cb, this.phaseEndRank, ts.end );
661 - break;
662 - default:
663 - res = this._transformToken( token, cb, this.phaseEndRank, ts.martian );
664 - break;
 675+ switch( token.type ) {
 676+ case 'TAG':
 677+ case 'ENDTAG':
 678+ case 'SELFCLOSINGTAG':
 679+ res = this._transformTagToken( token, cb, this.phaseEndRank );
 680+ break;
 681+ case 'COMMENT':
 682+ res = this._transformToken( token, cb, this.phaseEndRank, ts.comment );
 683+ break;
 684+ case 'NEWLINE':
 685+ res = this._transformToken( token, cb, this.phaseEndRank, ts.newline );
 686+ break;
 687+ case 'END':
 688+ res = this._transformToken( token, cb, this.phaseEndRank, ts.end );
 689+ break;
 690+ default:
 691+ res = this._transformToken( token, cb, this.phaseEndRank, ts.martian );
 692+ break;
 693+ }
665694 }
666695
667696 if( res.tokens ) {
@@ -744,7 +773,7 @@
745774 pipe.addListener( 'end',
746775 this.onEnd.bind( this, this._returnAttributeKey.bind( this, i ) )
747776 );
748 - pipe.process( attributes[i][0].concat([{type:'END'}]) );
 777+ pipe.process( attributes[i].k.concat([{type:'END'}]) );
749778
750779 // transform the value
751780 pipe = this.manager.getAttributePipeline( this.manager.args );
@@ -754,11 +783,11 @@
755784 pipe.addListener( 'end',
756785 this.onEnd.bind( this, this._returnAttributeValue.bind( this, i ) )
757786 );
758 - //console.log('starting attribute transform of ' + JSON.stringify( attributes[i][1] ) );
759 - pipe.process( attributes[i][1].concat([{type:'END'}]) );
 787+ //console.log('starting attribute transform of ' + JSON.stringify( attributes[i].v ) );
 788+ pipe.process( attributes[i].v.concat([{type:'END'}]) );
760789 }
761790 this.outstanding--;
762 - if ( this.outstanding == 0 ) {
 791+ if ( this.outstanding === 0 ) {
763792 this._returnAttributes();
764793 }
765794 };
@@ -768,7 +797,7 @@
769798 var out = [];
770799 for ( var i = 0, l = this.kvs.length; i < l; i++ ) {
771800 var kv = this.kvs[i];
772 - out.push( [kv.key, kv.value] );
 801+ out.push( new KV(kv.key, kv.value) );
773802 }
774803
775804 // and call the callback with the result
Index: trunk/extensions/VisualEditor/modules/parser/ext.core.ParserFunctions.js
@@ -56,19 +56,16 @@
5757 };
5858
5959 ParserFunctions.prototype['pf_lc'] = function ( target, argList, argDict ) {
60 - return [{type: 'TEXT', value: target.toLowerCase()}];
 60+ return [ target.toLowerCase() ];
6161 };
6262
6363 ParserFunctions.prototype['pf_uc'] = function ( target, argList, argDict ) {
64 - return [{type: 'TEXT', value: target.toUpperCase()}];
 64+ return [ target.toUpperCase() ];
6565 };
6666
6767 ParserFunctions.prototype['pf_ucfirst'] = function ( target, argList, argDict ) {
6868 if ( target ) {
69 - return [{
70 - type: 'TEXT',
71 - value: target[0].toUpperCase() + target.substr(1)
72 - }];
 69+ return [ target[0].toUpperCase() + target.substr(1) ];
7370 } else {
7471 return [];
7572 }
@@ -76,10 +73,7 @@
7774
7875 ParserFunctions.prototype['pf_lcfirst'] = function ( target, argList, argDict ) {
7976 if ( target ) {
80 - return [{
81 - type: 'TEXT',
82 - value: target[0].toLowerCase() + target.substr(1)
83 - }];
 77+ return [ target[0].toLowerCase() + target.substr(1) ];
8478 } else {
8579 return [];
8680 }
@@ -93,20 +87,19 @@
9488 // Based on http://jacwright.com/projects/javascript/date_format/ for now, MIT
9589 // licensed.
9690 ParserFunctions.prototype['pf_#time'] = function ( target, argList, argDict ) {
97 - //return [{type: 'TEXT', value: 'January 22, 2012'}];
9891 var res,
9992 tpl = target.trim();
10093 //try {
10194 // var date = new Date( this.manager.env.tokensToString( argList[0][1] ) );
102 - // res = [{type: 'TEXT', value: date.format( target ) }];
 95+ // res = [ date.format( target ) ];
10396 //} catch ( e ) {
10497 // this.manager.env.dp( 'ERROR: #time ' + e );
10598
10699 try {
107 - res = [{type: 'TEXT', value: new Date().format ( tpl ) }];
 100+ res = [ new Date().format ( tpl ) ];
108101 } catch ( e2 ) {
109102 this.manager.env.dp( 'ERROR: #time ' + e2 );
110 - res = [{type: 'TEXT', value: new Date().toString() }];
 103+ res = [ new Date().toString() ];
111104 }
112105 //}
113106 return res;
@@ -130,6 +123,7 @@
131124 return returnStr;
132125 };
133126
 127+// XXX: support localization
134128 Date.replaceChars = {
135129 shortMonths: ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
136130 longMonths: ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'],
@@ -190,7 +184,7 @@
191185 var f = new Function ( 'return (' + target + ')' );
192186 res = f();
193187 } catch ( e ) {
194 - return [{type: 'TEXT', value: 'class="error" in expression ' + target }];
 188+ return [ 'class="error" in expression ' + target ];
195189 }
196190 if ( res ) {
197191 return ( argList[0] && argList[0][1] ) || [];
@@ -211,9 +205,9 @@
212206 var f = new Function ( 'return (' + target + ')' );
213207 res = f();
214208 } catch ( e ) {
215 - return [{type: 'TEXT', value: 'class="error" in expression ' + target }];
 209+ return [ 'class="error" in expression ' + target ];
216210 }
217 - return [{type: 'TEXT', value: res.toString()}];
 211+ return [ res.toString() ];
218212 };
219213
220214
@@ -227,51 +221,51 @@
228222 return ( argList[0] && argList[0][1] ) || [];
229223 };
230224 ParserFunctions.prototype['pf_formatnum'] = function ( target, argList, argDict ) {
231 - return [{type: 'TEXT', value: target}];
 225+ return [ target ];
232226 };
233227 ParserFunctions.prototype['pf_currentpage'] = function ( target, argList, argDict ) {
234 - return [{type: 'TEXT', value: target}];
 228+ return [ target ];
235229 };
236230 ParserFunctions.prototype['pf_pagename'] = function ( target, argList, argDict ) {
237 - return [{type: 'TEXT', value: target}];
 231+ return [ target ];
238232 };
239233 ParserFunctions.prototype['pf_pagesize'] = function ( target, argList, argDict ) {
240 - return [{type: 'TEXT', value: '100'}];
 234+ return [ '100' ];
241235 };
242236 ParserFunctions.prototype['pf_pagename'] = function ( target, argList, argDict ) {
243 - return [{type: 'TEXT', value: target}];
 237+ return [ target ];
244238 };
245239 ParserFunctions.prototype['pf_fullpagename'] = function ( target, argList, argDict ) {
246 - return [{type: 'TEXT', value: target}];
 240+ return [target];
247241 };
248242 ParserFunctions.prototype['pf_fullpagenamee'] = function ( target, argList, argDict ) {
249 - return [{type: 'TEXT', value: target}];
 243+ return [target];
250244 };
251245 ParserFunctions.prototype['pf_fullurl'] = function ( target, argList, argDict ) {
252 - return [{type: 'TEXT', value: target}];
 246+ return [target];
253247 };
254248 ParserFunctions.prototype['pf_urlencode'] = function ( target, argList, argDict ) {
255249 this.manager.env.tp( 'urlencode: ' + target );
256 - return [{type: 'TEXT', value: target.trim()}];
 250+ return [target.trim()];
257251 };
258252 ParserFunctions.prototype['pf_anchorencode'] = function ( target, argList, argDict ) {
259 - return [{type: 'TEXT', value: target}];
 253+ return [target];
260254 };
261255 ParserFunctions.prototype['pf_namespace'] = function ( target, argList, argDict ) {
262 - return [{type: 'TEXT', value: 'Main'}];
 256+ return ['Main'];
263257 };
264258 ParserFunctions.prototype['pf_protectionlevel'] = function ( target, argList, argDict ) {
265 - return [{type: 'TEXT', value: ''}];
 259+ return [''];
266260 };
267261 ParserFunctions.prototype['pf_ns'] = function ( target, argList, argDict ) {
268 - return [{type: 'TEXT', value: target}];
 262+ return [target];
269263 };
270264
271265 ParserFunctions.prototype['pf_subjectspace'] = function ( target, argList, argDict ) {
272 - return [{type: 'TEXT', value: 'Main'}];
 266+ return ['Main'];
273267 };
274268 ParserFunctions.prototype['pf_talkspace'] = function ( target, argList, argDict ) {
275 - return [{type: 'TEXT', value: 'Talk'}];
 269+ return ['Talk'];
276270 };
277271
278272 if (typeof module == "object") {
Index: trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt
@@ -75,10 +75,10 @@
7676 };
7777
7878 var pushList = function ( listName, itemName ) {
79 - out.push({type: 'TAG', name: listName});
80 - out.push({type: 'TAG', name: itemName});
81 - endtags.push({type: 'ENDTAG', name: listName});
82 - endtags.push({type: 'ENDTAG', name: itemName});
 79+ out.push( new TagTk( listName ));
 80+ out.push( new TagTk( itemName ));
 81+ endtags.push( new EndTagTk( listName ));
 82+ endtags.push( new EndTagTk( itemName ));
8383 };
8484
8585 var popTags = function ( n ) {
@@ -103,8 +103,8 @@
104104 if (changeLen === 0) {
105105 var itemToken = endtags.pop();
106106 out.push(itemToken);
107 - out.push({type: 'TAG', name: itemToken.name});
108 - endtags.push({type: 'ENDTAG', name: itemToken.name});
 107+ out.push(new TagTk( itemToken.name ));
 108+ endtags.push(new EndTagTk( itemToken.name ));
109109 } else if ( bs.length == bn.length
110110 && changeLen == 1
111111 && isDlDd( bs[prefixLen], bn[prefixLen] ) ) {
@@ -115,16 +115,16 @@
116116 } else {
117117 var newName = 'dd';
118118 }
119 - out.push({type: 'TAG', name: newName});
120 - endtags.push({type: 'ENDTAG', name: newName});
 119+ out.push(new TagTk( newName ));
 120+ endtags.push(new EndTagTk( newName ));
121121 } else {
122122 popTags(bs.length - prefixLen);
123123
124124 if (prefixLen > 0 && bn.length == prefixLen ) {
125125 var itemToken = endtags.pop();
126126 out.push(itemToken);
127 - out.push({type: 'TAG', name: itemToken.name});
128 - endtags.push({type: 'ENDTAG', name: itemToken.name});
 127+ out.push(new TagTk( itemToken.name ));
 128+ endtags.push(new EndTagTk( itemToken.name ));
129129 }
130130
131131 for(var i = prefixLen; i < bn.length; i++) {
@@ -264,7 +264,7 @@
265265 // texts)
266266 //console.log( pp( flatten ( e ) ) );
267267 cache = {};
268 - __parseArgs[2]( [ { type: 'END' } ] );
 268+ __parseArgs[2]( [ new EOFTk( ) ] );
269269 return []; //flatten(e);
270270 }
271271
@@ -350,7 +350,7 @@
351351 optionalSpaceToken
352352 = s:space* {
353353 if ( s.length ) {
354 - return [{type: 'TEXT', value: s.join('')}];
 354+ return [s.join('')];
355355 } else {
356356 return [];
357357 }
@@ -363,7 +363,7 @@
364364 // was actually preceded by a newline
365365 cn:( c:comment n:newline? {
366366 if ( n !== '' ) {
367 - return [c, {type: 'TEXT', value: n}];
 367+ return [c, n];
368368 } else {
369369 return [c];
370370 }
@@ -376,9 +376,9 @@
377377 var niToken = [];
378378 if ( ni !== '') {
379379 if ( ni[0] === '/' ) {
380 - niToken = [{type: 'ENDTAG', name: ni[1]}];
 380+ niToken = [new EndTagTk( ni[1] )];
381381 } else {
382 - niToken = [{type: 'TAG', name: ni[1]}];
 382+ niToken = [new TagTk( ni[1] )];
383383 }
384384 }
385385
@@ -391,20 +391,25 @@
392392 newline
393393 = '\n' / '\r\n'
394394
395 -newlineToken = newline { return [{type: 'NEWLINE'}] }
 395+newlineToken = newline { return [new NlTk()] }
396396
397397 eolf = newline / eof
398398
399399 toplevelblock
400400 = & { blockStart = pos; return true; } b:block {
401401 b = flatten(b);
402 - var bs = b[0];
403402 if ( b.length ) {
 403+ var bs = b[0];
 404+ if ( bs.constructor === String && bs.attribs === undefined ) {
 405+ b[0] = new String( bs );
 406+ bs = b[0];
 407+ }
404408 //dp('toplevelblock:' + pp(b));
405409 if (bs.attribs === undefined) {
406410 bs.attribs = [];
407411 }
408 - bs.attribs.push(['data-sourcePos', blockStart + ':' + pos]);
 412+ bs.attribs.push(new KV('data-sourcePos', blockStart + ':' + pos));
 413+ //console.log( 'toplevelblock: ' + pp( bs ));
409414 }
410415 // XXX: only run this for lines that actually need it!
411416 //b.push({type: 'NEWLINE'});
@@ -461,10 +466,10 @@
462467
463468 para
464469 = s1:sol s2:sol c:inlineline {
465 - return s1.concat(s2, /* [{type: 'TAG', name: 'p'}],*/ c);
 470+ return s1.concat(s2, /* [new TagTk('p')],*/ c);
466471 }
467472
468 -br = space* &newline { return {type: 'SELFCLOSINGTAG', name: 'br'} }
 473+br = space* &newline { return new SelfclosingTagTk( 'br' ) }
469474
470475 // Syntax stops to limit inline expansion defending on syntactic context
471476 inline_breaks
@@ -504,14 +509,14 @@
505510 }
506511 } else {
507512 if (text.length) {
508 - out.push({ type: "TEXT", value: text.join('') });
 513+ out.push( text.join('') );
509514 text = [];
510515 }
511516 out.push(ci);
512517 }
513518 }
514519 if (text.length) {
515 - out.push({ type: 'TEXT', value: text.join('') });
 520+ out.push( text.join('') );
516521 }
517522 //dp('inline out:' + pp(out));
518523 return out;
@@ -531,14 +536,14 @@
532537 }
533538 } else {
534539 if (text.length) {
535 - out.push({type: 'TEXT', value: text.join('')});
 540+ out.push( text.join('') );
536541 text = [];
537542 }
538543 out.push(ci);
539544 }
540545 }
541546 if (text.length) {
542 - out.push({type: 'TEXT', value: text.join('')});
 547+ out.push( text.join('') );
543548 }
544549 //dp('inlineline out:' + pp(out));
545550 return out;
@@ -550,7 +555,7 @@
551556 / & '{' ( & '{{{{{' template / tplarg / template )
552557 /// & '{' ( tplarg / template )
553558 // Eat three opening brackets as text.
554 - / '[[[' { return { type: 'TEXT', value: '[[[' } }
 559+ / '[[[' { return '[[[' }
555560 / & '[' ( wikilink / extlink )
556561 / & "'" quote
557562
@@ -563,7 +568,7 @@
564569 & { return setFlag('h'); }
565570 c:inlineline
566571 e:'='+
567 - spc:(sp:space+ { return {type: 'TEXT', value: sp.join('') } } / comment)*
 572+ spc:(sp:space+ { return sp.join('') } / comment)*
568573 &eolf
569574 {
570575 clearFlag('h');
@@ -571,24 +576,24 @@
572577 // convert surplus equals into text
573578 if(s.length > level) {
574579 var extras = s.substr(0, s.length - level);
575 - if(c[0].type == 'TEXT') {
576 - c[0].value = extras + c[0].value;
 580+ if(c[0].constructor === String) {
 581+ c[0] = extras + c[0];
577582 } else {
578 - c.unshift({type: 'TEXT', value: extras});
 583+ c.unshift( extras );
579584 }
580585 }
581586 if(e.length > level) {
582587 var extras = e.substr(0, e.length - level),
583588 lastElem = c[c.length - 1];
584 - if(lastElem.type == 'TEXT') {
585 - lastElem.value = lastElem.value + extras;
 589+ if(lastElem.constructor === String) {
 590+ lastElem += extras;
586591 } else {
587 - c.push({type: 'TEXT', value: extras});
 592+ c.push( extras );
588593 }
589594 }
590595
591 - return [{type: 'TAG', name: 'h' + level}]
592 - .concat(c, [{type: 'ENDTAG', name: 'h' + level}, spc]);
 596+ return [new TagTk( 'h' + level )]
 597+ .concat(c, [new EndTagTk( 'h' + level ), spc]);
593598 }
594599 / & { dp('nomatch exit h'); clearFlag('h'); return false } { return null }
595600 ) { return r }
@@ -596,12 +601,12 @@
597602
598603 pre_indent
599604 = l:pre_indent_line ls:(sol pre_indent_line)* {
600 - return [{type: 'TAG', name: 'pre'}]
 605+ return [new TagTk( 'pre' )]
601606 .concat( [l], ls
602 - , [{type: 'ENDTAG', name: 'pre'}]);
 607+ , [new EndTagTk( 'pre' )]);
603608 }
604609 pre_indent_line = space l:inlineline {
605 - return [{type: 'TEXT', value: '\n'}].concat(l);
 610+ return [ '\n' ].concat(l);
606611 }
607612
608613
@@ -618,11 +623,9 @@
619624
620625 urllink
621626 = target:url {
622 - return [ { type: 'TAG',
623 - name: 'a',
624 - attribs: [['href', target]] }
625 - , {type: 'TEXT', value: target}
626 - , {type: 'ENDTAG', name: 'a'}
 627+ return [ new TagTk( 'a', [new KV('href', target)] )
 628+ , target
 629+ , new EndTagTk( 'a' )
627630 ];
628631 }
629632
@@ -636,20 +639,16 @@
637640 clearFlag('extlink');
638641 if ( text == '' ) {
639642 // XXX: Link numbering should be implemented in post-processor.
640 - text = [{type: 'TEXT', value: "[" + linkCount + "]"}];
 643+ text = [ "[" + linkCount + "]" ];
641644 linkCount++;
642645 }
643646 return [
644 - {
645 - type: 'TAG',
646 - name: 'a',
647 - attribs: [
648 - ['href', target],
649 - ['data-type', 'external']
650 - ],
651 - }
 647+ new TagTk( 'a', [
 648+ new KV('href', target),
 649+ new KV('data-type', 'external')
 650+ ] ),
652651 ].concat( text
653 - , [{type: 'ENDTAG', name: 'a'}]);
 652+ , [ new EndTagTk( 'a' )]);
654653 }
655654 / "[" & { clearFlag('extlink'); return false; }
656655
@@ -719,42 +718,16 @@
720719 params:(newline? "|" newline? p:template_param { return p })*
721720 "}}" {
722721 target = flatten( target );
723 - var obj = {
724 - type: 'SELFCLOSINGTAG',
725 - name: 'template',
726 - attribs: [], //[['data-target', JSON.stringify(target)]],
727 - orderedArgs: params,
728 - //args: {},
729 - target: target
730 - };
731 - // XXX: this is kind of broken, as arg keys need to be expanded
732 - //if (params && params.length) {
733 - // var position = 1;
734 - // for ( var i = 0, l = params.length; i < l; i++ ) {
735 - // var param = params[i];
736 - // if ( param[0] === null ) {
737 - // obj.args[position] = param[1];
738 - // position++;
739 - // } else {
740 - // // Last value wins for duplicates.
741 - // obj.args[param[0]] = param[1];
742 - // }
743 - // }
744 - // // HACK: temporarily also push the args into an attribute
745 - // // (just for debugging)
746 - // obj.attribs.push(['data-json-args', JSON.stringify(obj.args)]);
747 - //}
748 - // Should actually use a self-closing tag here, but the Node HTML5
749 - // parser only recognizes known self-closing tags for now, so use an
750 - // explicit end tag for now.
751 - //console.log(pp(obj));
 722+ var obj = new SelfclosingTagTk( 'template' );
 723+ obj.orderedArgs = params;
 724+ obj.target = target;
752725 //console.log( 'tokenizer template ' + JSON.stringify( target ));
753726 return obj;
754727 }
755728
756729 // XXX: support template and args in target!
757730 //template_target
758 -// = h:( !"}}" x:([^|\n]) { return x } )* { return { type: 'TEXT', value: h.join('') } }
 731+// = h:( !"}}" x:([^|\n]) { return x } )* { return h.join('') }
759732
760733 tplarg
761734 = "{{{"
@@ -762,20 +735,17 @@
763736 params:( newline? "|" newline? p:template_param { return p })*
764737 "}}}" {
765738 name = flatten( name );
766 - var obj = {
767 - type: 'SELFCLOSINGTAG',
768 - name: 'templatearg',
769 - attribs: [],
770 - argname: name,
771 - defaultvalue: []
772 - };
 739+ var obj = new SelfclosingTagTk( 'templatearg', [] );
 740+ obj.argname= name;
773741 if (params && params.length) {
774742 // HACK, not final.
775 - //obj.attribs.push(['data-defaultvalue', params[0][1]]);
 743+ //obj.attribs.push(new KV('data-defaultvalue', params[0][1]));
776744 //console.log(JSON.stringify(params, null, 2));
777 - //obj.attribs.push(['data-json-args', JSON.stringify(params)]);
778 - obj.defaultvalue = params[0][1].length ? params[0][1] :
779 - [{type: 'TEXT', value: ''}];
 745+ //obj.attribs.push(new KV('data-json-args', JSON.stringify(params)));
 746+ obj.defaultvalue = params[0].v.length ? params[0].v :
 747+ [ '' ];
 748+ } else {
 749+ obj.defaultvalue = [];
780750 }
781751 //console.log( 'tokenizer tplarg ' + JSON.stringify( obj, null, 2 ));
782752 return obj;
@@ -785,14 +755,14 @@
786756 = name:template_param_name space* "=" space* c:template_param_text? {
787757 //console.log( 'named template_param matched' + pp([name, flatten( c )]) );
788758 if ( c !== '' ) {
789 - return [name, flatten( c )];
 759+ return new KV(name, flatten( c ));
790760 } else {
791 - return [name, []];
 761+ return new KV(name, []);
792762 }
793763 } / c:template_param_text {
794 - return [[], flatten( c ) ];
 764+ return new KV([], flatten( c ) );
795765 }
796 - / & [|}] { return [[], []]; }
 766+ / & [|}] { return new KV([], []); }
797767
798768
799769 // FIXME: handle template args and templates in key! (or even parser functions?)
@@ -827,27 +797,24 @@
828798 // class. Can we work out a static negative class instead?
829799 // XXX: Exclude uppercase chars from non-latin languages too!
830800 trail:(! [A-Z \t(),.:-] tc:text_char { return tc })* {
831 - var obj = {
832 - type: 'TAG',
833 - name: 'a',
834 - attribs: [
835 - ['data-type', 'internal']
836 - ]
837 - },
 801+ var obj = new TagTk( 'a',
 802+ [
 803+ new KV('data-type', 'internal')
 804+ ] ),
838805 textTokens = [];
839 - obj.attribs.push(['href', target]);
 806+ obj.attribs.push( new KV('href', target) );
840807 if (lcontent && lcontent.length) {
841808 textTokens = lcontent;
842809 if (trail) {
843 - textTokens.push( { type: 'TEXT', value: trail.join('') } );
 810+ textTokens.push( trail.join('') );
844811 }
845812 } else {
846813 if (trail) {
847814 target += trail.join('');
848815 }
849 - textTokens = [{type: 'TEXT', value: target}];
 816+ textTokens = [ target ];
850817 }
851 - return [obj].concat(textTokens, [{type: 'ENDTAG', name: 'a'}]);
 818+ return [obj].concat(textTokens, [new EndTagTk( 'a' )]);
852819 }
853820
854821 link_target
@@ -872,18 +839,16 @@
873840 link_end = "]]"
874841
875842 /* Generic quote production for italic and bold, further processed in a token
876 - * stream transformation in doQuotes. Relies on NEWLINE tokens being emitted
 843+ * stream transformation in doQuotes. Relies on NlTk tokens being emitted
877844 * for each line of text to balance quotes per line.
878845 *
879846 * We are not using a simple pair rule here as we need to support mis-nested
880847 * bolds/italics and MediaWiki's special heuristics for apostrophes, which are
881848 * all not context free. */
882849 quote = "''" x:"'"* {
883 - return {
884 - type: 'TAG',
885 - name : 'mw-quote', // Will be consumed in token transforms
886 - value: "''" + x.join('')
887 - }
 850+ var res = new TagTk( 'mw-quote' ); // Will be consumed in token transforms
 851+ res.value = "''" + x.join('');
 852+ return res;
888853 }
889854
890855 /* XXX: Extension tags can require a change in the tokenizer mode, which
@@ -901,16 +866,16 @@
902867 = "<pre"
903868 attribs:generic_attribute*
904869 ">"
905 - ts:(t1:[^<]+ { return {type:'TEXT',value:t1.join('')} }
 870+ ts:(t1:[^<]+ { return t1.join('') }
906871 / nowiki
907 - / !"</pre>" t2:. {return {type:'TEXT',value:t2}})+
 872+ / !"</pre>" t2:. { return t2 })+
908873 ("</pre>" / eof) {
909874 // return nowiki tags as well?
910875 //console.log('inpre');
911 - return [ {type: 'TAG', name: 'pre', attribs: attribs} ]
912 - .concat(ts, [{type: 'ENDTAG', name: 'pre'}]);
 876+ return [ new TagTk( 'pre', attribs ) ]
 877+ .concat(ts, [ new EndTagTk( 'pre' ) ]);
913878 }
914 - / "</pre>" { return {type: 'TEXT', value: "</pre>"}; }
 879+ / "</pre>" { return "</pre>"; }
915880
916881 nowiki
917882 = "<nowiki>" nc:nowiki_content "</nowiki>" {
@@ -919,9 +884,9 @@
920885 }
921886 / "<nowiki>" {
922887 //console.log('nowiki fallback');
923 - return [{type: 'TEXT', value: '<nowiki>'}];
 888+ return ['<nowiki>'];
924889 }
925 - / "</nowiki>" { return [{type: 'TEXT', value: '</nowiki>'}]; }
 890+ / "</nowiki>" { return ['</nowiki>']; }
926891
927892 nowiki_content
928893 = ts:( t:[^<]+ { return t.join('') }
@@ -932,7 +897,7 @@
933898 / (!("</nowiki>" / "</pre>") c:. {return c})
934899 )* {
935900 // return nowiki tags as well?
936 - return [{type: 'TEXT', value: ts.join('')}];
 901+ return [ts.join('')];
937902 }
938903
939904 // See http://dev.w3.org/html5/spec/Overview.html#syntax-tag-name and
@@ -946,13 +911,13 @@
947912 // abort match if tag is not block-level
948913 return null;
949914 }
950 - var res = {name: name, attribs: attribs};
 915+ var res;
951916 if ( end != '' ) {
952 - res.type = 'ENDTAG';
 917+ res = new EndTagTk( name, attribs );
953918 } else if ( selfclose != '' ) {
954 - res.type = 'SELFCLOSINGTAG';
 919+ res = new SelfclosingTagTk( name, attribs );
955920 } else {
956 - res.type = 'TAG';
 921+ res = new TagTk( name, attribs );
957922 }
958923 return [res];
959924 }
@@ -974,15 +939,16 @@
975940 space*
976941 selfclose:"/"?
977942 ">" {
978 - var res = {name: name.join(''), attribs: attribs};
 943+ name = name.join('');
 944+ var res;
979945 if ( end != '' ) {
980 - res.type = 'ENDTAG';
 946+ res = new EndTagTk( name, attribs );
981947 } else if ( selfclose != '' ) {
982 - res.type = 'SELFCLOSINGTAG';
 948+ res = new SelfclosingTagTk( name, attribs );
983949 } else {
984 - res.type = 'TAG';
 950+ res = new TagTk( name, attribs );
985951 }
986 - res.attribs.push(['data-sourceTagPos', (tagStartPos - 1) + ":" + pos]);
 952+ res.attribs.push(new KV('data-sourceTagPos', (tagStartPos - 1) + ":" + pos));
987953 return res;
988954 }
989955
@@ -993,9 +959,9 @@
994960 v:generic_attribute_value { return v })?
995961 {
996962 if ( value !== '' ) {
997 - return [name, value];
 963+ return new KV( name, value );
998964 } else {
999 - return [name,''];
 965+ return new KV( name, '' );
1000966 }
1001967 }
1002968
@@ -1024,9 +990,9 @@
1025991 /* Lists */
1026992 lists = e:(dtdd / li) es:(sol (dtdd / li))*
1027993 {
1028 - return annotateList( [ { type: 'TAG', name: 'list'} ]
 994+ return annotateList( [ new TagTk( 'list' ) ]
1029995 .concat(flatten([e].concat(es))
1030 - ,[{ type: 'ENDTAG', name: 'list' }]));
 996+ ,[ new EndTagTk( 'list' ) ]));
1031997 }
1032998
1033999 li = bullets:list_char+
@@ -1035,10 +1001,9 @@
10361002 {
10371003 if ( c == '' )
10381004 c = [];
1039 - return [ { type: 'TAG',
1040 - name: 'listItem',
1041 - bullets: bullets }
1042 - , c ];
 1005+ var li = new TagTk( 'listItem' );
 1006+ li.bullets = bullets;
 1007+ return [ li, c ];
10431008 }
10441009
10451010 dtdd
@@ -1055,17 +1020,18 @@
10561021 // Convert trailing space into &nbsp;
10571022 // XXX: This should be moved to a serializer
10581023 //var clen = c.length;
1059 - //if (clen && c[clen - 1].type === 'TEXT') {
 1024+ //if (clen && c[clen - 1].constructor === String) {
10601025 // var val = c[clen - 1].value;
10611026 // if(val.length && val[val.length - 1] == ' ') {
10621027 // c[clen - 1].value = val.substr(0, val.length - 1) + "\u00a0";
10631028 // }
10641029 //}
10651030
1066 - return [ { type: 'TAG', name: 'listItem', bullets: bullets + ";" } ]
1067 - .concat( c
1068 - ,[{ type: 'TAG', name: 'listItem', bullets: bullets + ":" } ]
1069 - , d );
 1031+ var li = new TagTk( 'listItem' );
 1032+ li.bullets = bullets + ";";
 1033+ var li2 = new TagTk( 'listItem' );
 1034+ li2.bullets = bullets + ":";
 1035+ return [ li ].concat( c, [ li2 ], d );
10701036 }
10711037 // Fall-back case to clear the colon flag
10721038 / & { return true; } { syntaxFlags['colon'] = 0; return null; }
@@ -1111,19 +1077,19 @@
11121078 space*
11131079 te:table_end_tag? // can occur somewhere in the middle of the line too
11141080 {
1115 - var tok = [{type: 'TAG', name: 'table'}];
 1081+ var toks = [ new TagTk( 'table' ) ];
11161082 if ( ta )
1117 - tok[0].attribs = ta;
 1083+ toks[0].attribs = ta;
11181084 if ( te )
1119 - tok = tok.concat(te);
1120 - return tok;
 1085+ toks = toks.concat( te );
 1086+ return toks;
11211087 }
11221088
11231089 table_caption_tag
11241090 = "|+"
11251091 c:inline* {
1126 - return [{type: 'TAG', name: 'caption'}]
1127 - .concat( c, [{type: 'ENDTAG', name: 'caption'}]);
 1092+ return [ new TagTk( 'caption' )]
 1093+ .concat( c, [ new EndTagTk( 'caption' ) ]);
11281094 }
11291095
11301096
@@ -1138,7 +1104,7 @@
11391105 // We rely on our tree builder to close the row as needed. This is
11401106 // needed to support building tables from fragment templates with
11411107 // individual cells or rows.
1142 - var trToken = [{type: 'TAG', name: 'tr', attribs: a}];
 1108+ var trToken = [ new TagTk( 'tr', a ) ];
11431109 if ( !td ) {
11441110 return trToken;
11451111 } else {
@@ -1167,8 +1133,8 @@
11681134 a = [];
11691135 }
11701136 //dp("table data result: " + pp(td) + ", attribts: " + pp(a));
1171 - return [{ type: 'TAG', name: 'td', attribs: a}]
1172 - .concat( td, [{type: 'ENDTAG', name: 'td'}] );
 1137+ return [ new TagTk( 'td', a )]
 1138+ .concat( td, [ new EndTagTk ( 'td' ) ] );
11731139 }
11741140
11751141 table_heading_tags
@@ -1184,13 +1150,13 @@
11851151 if ( a == '' ) {
11861152 a = [];
11871153 }
1188 - return [{type: 'TAG', name: 'th', attribs: a}]
1189 - .concat( c, [{type: 'ENDTAG', name: 'th'}] );
 1154+ return [ new TagTk( 'th', a ) ]
 1155+ .concat( c, [new EndTagTk( 'th' )] );
11901156 }
11911157
11921158 table_end_tag
11931159 = "|}" {
1194 - var tok = [{type: 'ENDTAG', name: 'table'}];
 1160+ var tok = [new EndTagTk( 'table' )];
11951161 return tok;
11961162 }
11971163
@@ -1209,25 +1175,24 @@
12101176 * when those work as intended! */
12111177 table
12121178 = tas:table_start space* c:table_caption? b:table_body? te:table_end {
1213 - var res = {type: 'TAG', name: 'table'}
 1179+ var res = new TagTk( 'table' );
12141180 var body = b !== '' ? b : [];
12151181 dp("body: " + pp(body));
12161182 if (tas.length > 0) {
12171183 // FIXME: actually parse and build structure
1218 - //res.attribs = [['data-unparsed', tas.join('')]];
 1184+ //res.attribs = [new KV('data-unparsed', tas.join(''))];
12191185 res.attribs = tas;
12201186 }
12211187
12221188 if (c != '') {
1223 - var caption = [{type: 'TAG', name: 'caption'}]
1224 - .concat(c, [{type: 'ENDTAG', name: 'caption'}], te);
 1189+ var caption = [ new TagTk( 'caption' ) ]
 1190+ .concat(c, [new EndTagTk( 'caption' )], te);
12251191 } else {
12261192 var caption = [];
12271193 }
12281194 //dp(pp(res));
12291195
1230 - return [res].concat(caption, body,
1231 - [{type: 'ENDTAG', name: 'table'}]);
 1196+ return [res].concat(caption, body, [new EndTagTk( 'table' )]);
12321197 }
12331198
12341199 table_start
@@ -1263,8 +1228,8 @@
12641229 table_firstrow
12651230 = td:(table_data / table_heading)+ {
12661231 //dp('firstrow: ' + pp(td));
1267 - return [{ type: 'TAG', name: 'tr' }]
1268 - .concat(td, [{type: 'ENDTAG', name: 'tr'}]);
 1232+ return [ new TagTk( 'tr' )]
 1233+ .concat(td, [ new EndTagTk( 'tr' )]);
12691234 }
12701235
12711236 table_row
@@ -1274,8 +1239,8 @@
12751240 a:(as:generic_attribute+ space* "|" !"|" { return as } )?
12761241 space*
12771242 td:(table_data / table_heading)* {
1278 - return n.concat([{type: 'TAG', name: 'tr'}]
1279 - , td, [{type: 'ENDTAG', name: 'tr'}]);
 1243+ return n.concat([ new TagTk( 'tr' )]
 1244+ , td, [ new EndTagTk( 'tr' )]);
12801245 }
12811246
12821247 table_data
@@ -1293,8 +1258,8 @@
12941259 a = [];
12951260 }
12961261 //dp("table data result: " + pp(td) + ", attribts: " + pp(a));
1297 - return n.concat( [{ type: 'TAG', name: 'td', attribs: a}]
1298 - , td, [{type: 'ENDTAG', name: 'td'}] );
 1262+ return n.concat( [ new TagTk( 'td', a ) ]
 1263+ , td, [ new EndTagTk( 'td' ) ] );
12991264 }
13001265
13011266 table_heading
@@ -1304,8 +1269,8 @@
13051270 if ( a == '' ) {
13061271 a = [];
13071272 }
1308 - return n.concat( [{type: 'TAG', name: 'th', attribs: a}]
1309 - , c, [{type: 'ENDTAG', name: 'th'}]);
 1273+ return n.concat( [ new TagTk( 'th', a )]
 1274+ , c, [ new EndTagTk( 'th' ) ]);
13101275 }
13111276
13121277 thtd_attribs
Index: trunk/extensions/VisualEditor/modules/parser/ext.Cite.js
@@ -178,11 +178,8 @@
179179 // XXX: Add round-trip info here?
180180 ]
181181 },
 182+ '[' + bits.join(' ') + ']',
182183 {
183 - type: 'TEXT',
184 - value: '[' + bits.join(' ') + ']'
185 - },
186 - {
187184 type: 'ENDTAG',
188185 name: 'a'
189186 },
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js
@@ -10,13 +10,16 @@
1111 path = require('path'),
1212 fs = require('fs'),
1313 $ = require('jquery'),
14 - events = require('events');
 14+ events = require('events'),
 15+ defines = require('./mediawiki.parser.defines.js');
1516
1617 function PegTokenizer() {
1718 var pegSrcPath = path.join( __dirname, 'pegTokenizer.pegjs.txt' );
1819 this.src = fs.readFileSync( pegSrcPath, 'utf8' );
1920 }
2021
 22+
 23+
2124 // Inherit from EventEmitter
2225 PegTokenizer.prototype = new events.EventEmitter();
2326 PegTokenizer.prototype.constructor = PegTokenizer;
Index: trunk/extensions/VisualEditor/modules/parser/ext.core.Sanitizer.js
@@ -34,7 +34,7 @@
3535 }
3636 var hrefKV = this.manager.env.lookupKV( token.attribs, 'href' );
3737 if ( hrefKV !== null ) {
38 - var bits = hrefKV[1].match( /(.*?\/\/)([^\/]+)(\/?.*)/ );
 38+ var bits = hrefKV.v.match( /(.*?\/\/)([^\/]+)(\/?.*)/ );
3939 if ( bits ) {
4040 proto = bits[1];
4141 host = bits[2];
@@ -42,10 +42,10 @@
4343 } else {
4444 proto = '';
4545 host = '';
46 - path = hrefKV[1];
 46+ path = hrefKV.v;
4747 }
4848 host = this._stripIDNs( host );
49 - hrefKV[1] = proto + host + path;
 49+ hrefKV.v = proto + host + path;
5050 }
5151 return { token: token };
5252 };
Index: trunk/extensions/VisualEditor/modules/parser/ext.core.TemplateHandler.js
@@ -16,7 +16,8 @@
1717 qs = require('querystring'),
1818 ParserFunctions = require('./ext.core.ParserFunctions.js').ParserFunctions,
1919 AttributeTransformManager = require('./mediawiki.TokenTransformManager.js')
20 - .AttributeTransformManager;
 20+ .AttributeTransformManager,
 21+ defines = require('./mediawiki.parser.defines.js');
2122
2223
2324 function TemplateHandler ( manager ) {
@@ -70,11 +71,9 @@
7172 },
7273 transformCB,
7374 i = 0,
74 - kvs = [],
75 - res,
76 - kv;
 75+ res;
7776
78 - var attributes = [[[{ type: 'TEXT', value: '' }] , token.target ]]
 77+ var attributes = [ {k: [''] , v: token.target} ]
7978 .concat( this._nameArgs( token.orderedArgs ) );
8079
8180 //this.manager.env.dp( 'before AttributeTransformManager: ' +
@@ -109,8 +108,8 @@
110109 out = [];
111110 for ( var i = 0, l = orderedArgs.length; i < l; i++ ) {
112111 // FIXME: Also check for whitespace-only named args!
113 - if ( ! orderedArgs[i][0].length ) {
114 - out.push( [[{ type: 'TEXT', value: n }], orderedArgs[i][1]]);
 112+ if ( ! orderedArgs[i].k.length ) {
 113+ out.push( {k: [ n.toString() ], v: orderedArgs[i].v } );
115114 n++;
116115 } else {
117116 out.push( orderedArgs[i] );
@@ -129,7 +128,7 @@
130129 this.manager.env.dp( 'TemplateHandler._returnAttributes: ' + JSON.stringify(attributes) );
131130 // Remove the target from the attributes
132131 tplExpandData.attribsAsync = false;
133 - tplExpandData.target = attributes[0][1];
 132+ tplExpandData.target = attributes[0].v;
134133 attributes.shift();
135134 tplExpandData.expandedArgs = attributes;
136135 if ( tplExpandData.overallAsync ) {
@@ -195,23 +194,10 @@
196195 if( checkRes ) {
197196 // Loop detected or depth limit exceeded, abort!
198197 res = [
199 - {
200 - type: 'TEXT',
201 - value: checkRes
202 - },
203 - {
204 - type: 'TAG',
205 - name: 'a',
206 - attrib: [['href', target]]
207 - },
208 - {
209 - type: 'TEXT',
210 - value: target
211 - },
212 - {
213 - type: 'ENDTAG',
214 - name: 'a'
215 - }
 198+ checkRes,
 199+ new TagTk( 'a', [{k: 'href', v: target}] ),
 200+ target,
 201+ new EndTagTk( 'a' )
216202 ];
217203 if ( tplExpandData.overallAsync ) {
218204 return tplExpandData.cb( res, false );
@@ -367,7 +353,7 @@
368354 */
369355 TemplateHandler.prototype.onTemplateArg = function ( token, cb, frame ) {
370356
371 - var attributes = [[token.argname, token.defaultvalue]];
 357+ var attributes = [{k: token.argname, v: token.defaultvalue}];
372358
373359 token.resultTokens = false;
374360
@@ -391,8 +377,8 @@
392378
393379 TemplateHandler.prototype._returnArgAttributes = function ( token, cb, frame, attributes ) {
394380 //console.log( '_returnArgAttributes: ' + JSON.stringify( attributes ));
395 - var argName = this.manager.env.tokensToString( attributes[0][0] ).trim(),
396 - defaultValue = attributes[0][1],
 381+ var argName = this.manager.env.tokensToString( attributes[0].k ).trim(),
 382+ defaultValue = attributes[0].v,
397383 res;
398384 if ( argName in this.manager.args ) {
399385 // return tokens for argument
@@ -405,7 +391,7 @@
406392 if ( defaultValue.length ) {
407393 res = defaultValue;
408394 } else {
409 - res = [{ type: 'TEXT', value: '{{{' + argName + '}}}' }];
 395+ res = [ '{{{' + argName + '}}}' ];
410396 }
411397 }
412398 if ( token.resultTokens !== false ) {
Index: trunk/extensions/VisualEditor/modules/parser/ext.core.PostExpandParagraphHandler.js
@@ -74,7 +74,7 @@
7575 //console.log( 'PostExpandParagraphHandler.onAny' );
7676 this.tokens.push( token );
7777 if ( token.type === 'COMMENT' ||
78 - ( token.type === 'TEXT' && token.value.match( /^[\t ]+$/ ) )
 78+ ( token.constructor === String && token.match( /^[\t ]+$/ ) )
7979 )
8080 {
8181 // Continue with collection..
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.environment.js
@@ -25,7 +25,7 @@
2626 var kv;
2727 for ( var i = 0, l = kvs.length; i < l; i++ ) {
2828 kv = kvs[i];
29 - if ( kv[0] === key ) {
 29+ if ( kv.k === key ) {
3030 // found, return it.
3131 return kv;
3232 }
@@ -42,9 +42,9 @@
4343 var res = {};
4444 for ( var i = 0, l = kvs.length; i < l; i++ ) {
4545 var kv = kvs[i],
46 - key = this.tokensToString( kv[0] ).trim();
 46+ key = this.tokensToString( kv.k ).trim();
4747 if( res[key] === undefined ) {
48 - res[key] = kv[1];
 48+ res[key] = kv.v;
4949 }
5050 }
5151 //console.log( 'KVtoHash: ' + JSON.stringify( res ));
@@ -122,8 +122,8 @@
123123 JSON.stringify( token ) );
124124 continue;
125125 }
126 - if ( token.type === 'TEXT' ) {
127 - out.push( token.value );
 126+ if ( token.constructor === String ) {
 127+ out.push( token );
128128 } else if ( token.type === 'COMMENT' || token.type === 'NEWLINE' ) {
129129 // strip comments and newlines
130130 } else {

Follow-up revisions

RevisionCommit summaryAuthorDate
r110657Fix nowiki tokenization regression introduced r110495gwicke13:10, 3 February 2012

Status & tagging log