r113285 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r113284‎ | r113285 | r113286 >
Date:20:06, 7 March 2012
Author:gwicke
Status:deferred
Tags:
Comment:
Token representation clean-up. Now all tokens are differentiated using
constructors instead of type attributes.
Modified paths:
  • /trunk/extensions/VisualEditor/modules/parser/ext.Cite.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/ext.core.LinkHandler.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/ext.core.NoIncludeOnly.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/ext.core.PostExpandParagraphHandler.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/ext.core.TemplateHandler.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/ext.util.TokenCollector.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.HTML5TreeBuilder.node.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.TokenTransformManager.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.defines.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.environment.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt (modified) (history)

Diff [purge]

Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.HTML5TreeBuilder.node.js
@@ -110,28 +110,25 @@
111111 name: token.name,
112112 data: this._att(token.attribs)});
113113 break;
114 - default:
115 - switch (token.type) {
116 - case "COMMENT":
117 - this.emit('token', {type: 'Comment',
118 - data: token.value});
119 - break;
120 - case "END":
121 - this.emit('end');
122 - this.emit('token', { type: 'EOF' } );
123 - this.document = this.parser.document;
124 - if ( ! this.document.body ) {
125 - // HACK: This should not be needed really.
126 - this.document.body = this.parser.document.getElementsByTagName('body')[0];
127 - }
128 - // Emit the document to consumers
129 - //this.emit('document', this.document);
130 - break;
131 - default:
132 - console.warn("Unhandled token: " + JSON.stringify(token));
133 - break;
 114+ case CommentTk:
 115+ this.emit('token', {type: 'Comment',
 116+ data: token.value});
 117+ break;
 118+ case EOFTk:
 119+ this.emit('end');
 120+ this.emit('token', { type: 'EOF' } );
 121+ this.document = this.parser.document;
 122+ if ( ! this.document.body ) {
 123+ // HACK: This should not be needed really.
 124+ this.document.body = this.parser.document.getElementsByTagName('body')[0];
134125 }
 126+ // Emit the document to consumers
 127+ //this.emit('document', this.document);
135128 break;
 129+ default:
 130+ console.warn("Unhandled token: " + JSON.stringify(token));
 131+ break;
 132+ break;
136133 }
137134 };
138135
Index: trunk/extensions/VisualEditor/modules/parser/ext.core.NoIncludeOnly.js
@@ -36,7 +36,7 @@
3737
3838 OnlyInclude.prototype.onAnyInclude = function ( token, manager ) {
3939 //this.manager.env.dp( 'onAnyInclude', token, this );
40 - if ( token.type === 'END' ) {
 40+ if ( token.constructor === EOFTk ) {
4141 this.inOnlyInclude = false;
4242 if ( this.accum.length && ! this.foundOnlyInclude ) {
4343 var res = this.accum;
@@ -92,7 +92,7 @@
9393 } else {
9494 tokens.shift();
9595 if ( tokens.length &&
96 - tokens[tokens.length - 1].type !== 'END' ) {
 96+ tokens[tokens.length - 1].constructor !== EOFTk ) {
9797 tokens.pop();
9898 }
9999 return { tokens: tokens };
@@ -114,7 +114,7 @@
115115 if ( isInclude ) {
116116 tokens.shift();
117117 if ( tokens.length &&
118 - tokens[tokens.length - 1].type !== 'END' ) {
 118+ tokens[tokens.length - 1].constructor !== EOFTk ) {
119119 tokens.pop();
120120 }
121121 return { tokens: tokens };
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.TokenTransformManager.js
@@ -37,7 +37,7 @@
3838
3939 TokenTransformManager.prototype._construct = function () {
4040 this.transformers = {
41 - tag: {}, // for TAG, ENDTAG, SELFCLOSINGTAG, keyed on name
 41+ tag: {}, // for TagTk, EndTagTk, SelfclosingTagTk, keyed on name
4242 text: [],
4343 newline: [],
4444 comment: [],
@@ -452,19 +452,16 @@
453453 case SelfclosingTagTk:
454454 res = this._transformTagToken( token, cb );
455455 break;
 456+ case CommentTk:
 457+ res = this._transformToken( token, ts.comment, cb );
 458+ break;
 459+ case EOFTk:
 460+ res = this._transformToken( token, ts.end, cb );
 461+ break;
456462 default:
457 - switch( token.type ) {
458 - case 'COMMENT':
459 - res = this._transformToken( token, ts.comment, cb );
460 - break;
461 - case 'END':
462 - res = this._transformToken( token, ts.end, cb );
463 - break;
464 - default:
465 - res = this._transformToken( token, ts.martian, cb );
466 - break;
467 - }
 463+ res = this._transformToken( token, ts.martian, cb );
468464 break;
 465+ break;
469466 }
470467
471468 if( res.tokens ) {
@@ -526,15 +523,9 @@
527524 function ( tokens, notYetDone, allTokensProcessed ) {
528525 //tokens = this._transformPhase2( this.frame, tokens, this.parentCB );
529526
530 - //if ( tokens.length && tokens[tokens.length - 1].type === 'END' ) {
531 - // this.env.dp( 'AsyncTokenTransformManager, stripping end ' );
532 - // tokens.pop();
533 - //}
534 -
535527 this.env.dp( 'AsyncTokenTransformManager._returnTokens, emitting chunk: ',
536528 tokens );
537529
538 -
539530 if( !allTokensProcessed ) {
540531 var res = this.transformTokens( tokens, this._returnTokens.bind(this) );
541532 this.emit( 'chunk', res.tokens );
@@ -559,13 +550,7 @@
560551 this.emit( 'chunk', tokens );
561552
562553 if ( ! notYetDone ) {
563 - //console.warn('AsyncTokenTransformManager._returnTokens done. tokens:' +
564 - // JSON.stringify( tokens, null, 2 ) + ', listeners: ' +
565 - // JSON.stringify( this.listeners( 'chunk' ), null, 2 ) );
566554 // signal our done-ness to consumers.
567 - //if ( this.atTopLevel ) {
568 - // this.emit( 'chunk', [{type: 'END'}]);
569 - //}
570555 this.emit( 'end' );
571556 // and reset internal state.
572557 this._reset();
@@ -663,18 +648,15 @@
664649 case SelfclosingTagTk:
665650 res = this._transformTagToken( token, this.prevToken );
666651 break;
 652+ case CommentTk:
 653+ res = this._transformToken( token, ts.comment, this.prevToken );
 654+ break;
 655+ case EOFTk:
 656+ res = this._transformToken( token, ts.end, this.prevToken );
 657+ break;
667658 default:
668 - switch( token.type ) {
669 - case 'COMMENT':
670 - res = this._transformToken( token, ts.comment, this.prevToken );
671 - break;
672 - case 'END':
673 - res = this._transformToken( token, ts.end, this.prevToken );
674 - break;
675 - default:
676 - res = this._transformToken( token, ts.martian, this.prevToken );
677 - break;
678 - }
 659+ res = this._transformToken( token, ts.martian, this.prevToken );
 660+ break;
679661 }
680662
681663 if( res.tokens ) {
@@ -766,7 +748,7 @@
767749 pipe.on( 'end',
768750 this.onEnd.bind( this, this._returnAttributeKey.bind( this, i ) )
769751 );
770 - pipe.process( attributes[i].k.concat([{type:'END'}]) );
 752+ pipe.process( attributes[i].k.concat([ new EOFTk() ]) );
771753 } else {
772754 kv.key = cur.k;
773755 }
@@ -785,7 +767,7 @@
786768 this.onEnd.bind( this, this._returnAttributeValue.bind( this, i ) )
787769 );
788770 //console.warn('starting attribute transform of ' + JSON.stringify( attributes[i].v ) );
789 - pipe.process( cur.v.concat([{type:'END'}]) );
 771+ pipe.process( cur.v.concat([ new EOFTk() ]) );
790772 } else {
791773 kv.value = cur.v;
792774 }
@@ -820,7 +802,7 @@
821803 * Collect chunks returned from the pipeline
822804 */
823805 AttributeTransformManager.prototype.onChunk = function ( cb, chunk ) {
824 - if ( chunk.length && chunk[chunk.length - 1].type === 'END' ) {
 806+ if ( chunk.length && chunk[chunk.length - 1].constructor === EOFTk ) {
825807 chunk.pop();
826808 }
827809 cb( chunk, true );
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.defines.js
@@ -18,7 +18,6 @@
1919 TagTk.prototype.toString = toString;
2020
2121 function EndTagTk( name, attribs ) {
22 - //this.type = 'ENDTAG';
2322 this.name = name;
2423 this.attribs = attribs || [];
2524 }
@@ -52,7 +51,6 @@
5352 NlTk.prototype.toString = toString;
5453
5554 function CommentTk( value ) {
56 - this.type = 'COMMENT';
5755 this.value = value;
5856 }
5957 CommentTk.prototype = new Object();
@@ -62,9 +60,7 @@
6361 CommentTk.prototype.constructor = CommentTk;
6462 CommentTk.prototype.toString = toString;
6563
66 -function EOFTk( ) {
67 - this.type = 'END';
68 -}
 64+function EOFTk( ) { }
6965 EOFTk.prototype = new Object();
7066 EOFTk.prototype.toJSON = function () {
7167 return $.extend( { type: 'EOFTk' }, this );
Index: trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt
@@ -354,16 +354,6 @@
355355 start
356356 = e:toplevelblock* newline* {
357357 // end is passed inline as a token, as well as a separate event for now.
358 -
359 - // this does not work yet.
360 - //console.warn('about to emit' + pp(self));
361 - //self._tokenizer.emit('chunk', [ { type: 'END' } ] );
362 - //self._tokenizer.emit('end');
363 - // Append the end (for obvious reasons this should not
364 - // be part of a stream, only when tokenizing complete
365 - // texts)
366 - //console.warn( pp( flatten ( e ) ) );
367 - cache = {};
368358 __parseArgs[2]( [ new EOFTk( ) ] );
369359 return []; //flatten(e);
370360 }
@@ -550,7 +540,7 @@
551541 comment
552542 = '<!--' c:comment_chars* ('-->' / eof)
553543 cs:(space* newline space* cn:comment { return cn })* {
554 - return [{ type: 'COMMENT', value: c.join('') }].concat(cs);
 544+ return [new CommentTk( c.join('') )].concat(cs);
555545 }
556546
557547 comment_chars
Index: trunk/extensions/VisualEditor/modules/parser/ext.core.LinkHandler.js
@@ -170,9 +170,9 @@
171171 var token = tokens[i];
172172 if ( token.constructor === String ) {
173173 s += token;
174 - } else if ( token.type === 'NEWLINE' ) {
 174+ } else if ( token.constructor === NlTk ) {
175175 s += '\n'; // XXX: preserve original newline
176 - } else if ( token.type === 'COMMENT' ) {
 176+ } else if ( token.constructor === CommentTk ) {
177177 // strip it
178178 } else {
179179 var res = this.imageParser.processImageOptions( s, 'img_options' ),
Index: trunk/extensions/VisualEditor/modules/parser/ext.Cite.js
@@ -125,7 +125,7 @@
126126 return tokenCTX;
127127 } else if ( this.isActive &&
128128 // Also accept really broken ref close tags..
129 - ['TAG', 'ENDTAG', 'SELFCLOSINGTAG'].indexOf(token.type) >= 0 &&
 129+ [TagTk, EndTagTk, SelfclosingTagTk].indexOf(token.constructor) >= 0 &&
130130 token.name.toLowerCase() === 'ref'
131131 )
132132 {
Index: trunk/extensions/VisualEditor/modules/parser/ext.core.TemplateHandler.js
@@ -280,7 +280,7 @@
281281 // Strip 'end' tokens and trailing newlines
282282 var l = res[res.length - 1];
283283 while ( res.length &&
284 - ( l.type === 'END' || l.constructor === NlTk )
 284+ ( l.constructor === EOFTk || l.constructor === NlTk )
285285 )
286286 {
287287 this.manager.env.dp( 'TemplateHandler, stripping end or whitespace tokens' );
Index: trunk/extensions/VisualEditor/modules/parser/ext.core.PostExpandParagraphHandler.js
@@ -73,7 +73,7 @@
7474 PostExpandParagraphHandler.prototype.onAny = function ( token, frame, cb ) {
7575 //console.warn( 'PostExpandParagraphHandler.onAny' );
7676 this.tokens.push( token );
77 - if ( token.type === 'COMMENT' ||
 77+ if ( token.constructor === CommentTk ||
7878 ( token.constructor === String && token.match( /^[\t ]+$/ ) )
7979 )
8080 {
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.environment.js
@@ -194,7 +194,7 @@
195195 }
196196 if ( token.constructor === String ) {
197197 out.push( token );
198 - } else if ( token.type === 'COMMENT' || token.type === 'NEWLINE' ) {
 198+ } else if ( token.constructor === CommentTk || token.constructor === NlTk ) {
199199 // strip comments and newlines
200200 } else {
201201 if ( strict ) {
Index: trunk/extensions/VisualEditor/modules/parser/ext.util.TokenCollector.js
@@ -61,7 +61,7 @@
6262 this.tokens.push ( token );
6363 this.isActive = false;
6464 this.manager.removeTransform( this.rank + this._anyDelta, 'any' );
65 - if ( token.type !== 'END' || this.toEnd ) {
 65+ if ( token.constructor !== EOFTk || this.toEnd ) {
6666 // end token
6767 res = this.transformation ( this.tokens, this.cb, this.manager );
6868 this.tokens = [];
@@ -75,7 +75,7 @@
7676 this.tokens = [];
7777 return { tokens: res };
7878 }
79 - } else if ( token.type !== 'END' ) {
 79+ } else if ( token.constructor !== EOFTk ) {
8080 this.manager.env.dp( 'starting collection on ', token );
8181 // start collection
8282 this.tokens.push ( token );

Status & tagging log