r113351 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r113350‎ | r113351 | r113352 >
Date:09:00, 8 March 2012
Author:gwicke
Status:deferred
Tags:
Comment:
A bit more documentation and naming cleanup in the tokenizer wrapper.
Modified paths:
  • /trunk/extensions/VisualEditor/modules/parser/ext.core.LinkHandler.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js (modified) (history)

Diff [purge]

Index: trunk/extensions/VisualEditor/modules/parser/ext.core.LinkHandler.js
@@ -249,10 +249,10 @@
250250 //console.warn('extlink href: ' + href );
251251 //console.warn( 'content: ' + JSON.stringify( content, null, 2 ) );
252252 // validate the href
253 - if ( this.imageParser.parseURL( href ) ) {
 253+ if ( this.imageParser.tokenizeURL( href ) ) {
254254 if ( content.length === 1 &&
255255 content[0].constructor === String &&
256 - this.imageParser.parseURL( content[0] ) &&
 256+ this.imageParser.tokenizeURL( content[0] ) &&
257257 this._isImageLink( content[0] ) )
258258 {
259259 var src = content[0];
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js
@@ -32,16 +32,31 @@
3333 */
3434 PegTokenizer.prototype.process = function( text ) {
3535 var out, err;
36 - if ( !this.parser ) {
 36+ if ( !this.tokenizer ) {
 37+ // Construct a singleton static tokenizer.
3738 var pegSrcPath = path.join( __dirname, 'pegTokenizer.pegjs.txt' );
3839 this.src = fs.readFileSync( pegSrcPath, 'utf8' );
39 - // Only create a single parser, as parse() is a static method.
40 - var parserSource = PEG.buildParser(this.src).toSource();
41 - //console.warn( parserSource );
42 - parserSource = parserSource.replace( 'parse: function(input, startRule) {',
 40+ var tokenizerSource = PEG.buildParser(this.src).toSource();
 41+
 42+ /* We patch the generated source to assign the arguments array for the
 43+ * parse function to a function-scoped variable. We use this to pass
 44+ * in callbacks and other information, which can be used from actions
 45+ * run when matching a production. In particular, we pass in a
 46+ * callback called for a chunk of tokens in toplevelblock. Setting this
 47+ * callback per call to parse() keeps the tokenizer reentrant, so that it
 48+ * can be reused to expand templates while a main parse is ongoing.
 49+ * PEG tokenizer construction is very expensive, so having a single
 50+ * reentrant tokenizer is a big win.
 51+ *
 52+ * We could also make modules available to the tokenizer by prepending
 53+ * requires to the source.
 54+ */
 55+ tokenizerSource = tokenizerSource.replace( 'parse: function(input, startRule) {',
4356 'parse: function(input, startRule) { var __parseArgs = arguments;' );
44 - //console.warn( parserSource );
45 - PegTokenizer.prototype.parser = eval( parserSource );
 57+ //console.warn( tokenizerSource );
 58+ PegTokenizer.prototype.tokenizer = eval( tokenizerSource );
 59+ // alias the parse method
 60+ this.tokenizer.tokenize = this.tokenizer.parse;
4661 }
4762
4863 // Some input normalization: force a trailing newline
@@ -52,7 +67,7 @@
5368 // XXX: Commented out exception handling during development to get
5469 // reasonable traces.
5570 //try {
56 - this.parser.parse(text, 'start',
 71+ this.tokenizer.tokenize(text, 'start',
5772 // callback
5873 this.emit.bind( this, 'chunk' ),
5974 // inline break test
@@ -68,12 +83,15 @@
6984 };
7085
7186 PegTokenizer.prototype.processImageOptions = function( text ) {
72 - return this.parser.parse(text, 'img_options', null, this );
 87+ return this.tokenizer.tokenize(text, 'img_options', null, this );
7388 };
7489
75 -PegTokenizer.prototype.parseURL = function( text ) {
 90+/**
 91+ * Tokenize a URL
 92+ */
 93+PegTokenizer.prototype.tokenizeURL = function( text ) {
7694 try {
77 - return this.parser.parse(text, 'url', null, this );
 95+ return this.tokenizer.tokenize(text, 'url', null, this );
7896 } catch ( e ) {
7997 return false;
8098 }

Status & tagging log