r113351 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r113350‎ \| r113351 \| r113352 >
Date:	09:00, 8 March 2012
Author:	gwicke
Status:	deferred
Tags:
Comment:	A bit more documentation and naming cleanup in the tokenizer wrapper.
Modified paths:	/trunk/extensions/VisualEditor/modules/parser/ext.core.LinkHandler.js (modified) (history) /trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js (modified) (history)

Diff [purge]

Index: trunk/extensions/VisualEditor/modules/parser/ext.core.LinkHandler.js
—	—	@@ -249,10 +249,10 @@
250	250	//console.warn('extlink href: ' + href );
251	251	//console.warn( 'content: ' + JSON.stringify( content, null, 2 ) );
252	252	// validate the href
253		~~- if ( this.imageParser.parseURL( href ) ) {~~
	253	+ if ( this.imageParser.tokenizeURL( href ) ) {
254	254	if ( content.length === 1 &&
255	255	content[0].constructor === String &&
256		~~- this.imageParser.parseURL( content[0] ) &&~~
	256	+ this.imageParser.tokenizeURL( content[0] ) &&
257	257	this._isImageLink( content[0] ) )
258	258	{
259	259	var src = content[0];
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js
—	—	@@ -32,16 +32,31 @@
33	33	*/
34	34	PegTokenizer.prototype.process = function( text ) {
35	35	var out, err;
36		~~- if ( !this.parser ) {~~
	36	+ if ( !this.tokenizer ) {
	37	+ // Construct a singleton static tokenizer.
37	38	var pegSrcPath = path.join( __dirname, 'pegTokenizer.pegjs.txt' );
38	39	this.src = fs.readFileSync( pegSrcPath, 'utf8' );
39		~~- // Only create a single parser, as parse() is a static method.~~
40		~~- var parserSource = PEG.buildParser(this.src).toSource();~~
41		~~- //console.warn( parserSource );~~
42		~~- parserSource = parserSource.replace( 'parse: function(input, startRule) {',~~
	40	+ var tokenizerSource = PEG.buildParser(this.src).toSource();
	41	+
	42	+ /* We patch the generated source to assign the arguments array for the
	43	+ * parse function to a function-scoped variable. We use this to pass
	44	+ * in callbacks and other information, which can be used from actions
	45	+ * run when matching a production. In particular, we pass in a
	46	+ * callback called for a chunk of tokens in toplevelblock. Setting this
	47	+ * callback per call to parse() keeps the tokenizer reentrant, so that it
	48	+ * can be reused to expand templates while a main parse is ongoing.
	49	+ * PEG tokenizer construction is very expensive, so having a single
	50	+ * reentrant tokenizer is a big win.
	51	+ *
	52	+ * We could also make modules available to the tokenizer by prepending
	53	+ * requires to the source.
	54	+ */
	55	+ tokenizerSource = tokenizerSource.replace( 'parse: function(input, startRule) {',
43	56	'parse: function(input, startRule) { var __parseArgs = arguments;' );
44		~~- //console.warn( parserSource );~~
45		~~- PegTokenizer.prototype.parser = eval( parserSource );~~
	57	+ //console.warn( tokenizerSource );
	58	+ PegTokenizer.prototype.tokenizer = eval( tokenizerSource );
	59	+ // alias the parse method
	60	+ this.tokenizer.tokenize = this.tokenizer.parse;
46	61	}
47	62
48	63	// Some input normalization: force a trailing newline
—	—	@@ -52,7 +67,7 @@
53	68	// XXX: Commented out exception handling during development to get
54	69	// reasonable traces.
55	70	//try {
56		~~- this.parser.parse(text, 'start',~~
	71	+ this.tokenizer.tokenize(text, 'start',
57	72	// callback
58	73	this.emit.bind( this, 'chunk' ),
59	74	// inline break test
—	—	@@ -68,12 +83,15 @@
69	84	};
70	85
71	86	PegTokenizer.prototype.processImageOptions = function( text ) {
72		~~- return this.parser.parse(text, 'img_options', null, this );~~
	87	+ return this.tokenizer.tokenize(text, 'img_options', null, this );
73	88	};
74	89
75		~~-PegTokenizer.prototype.parseURL = function( text ) {~~
	90	+/**
	91	+ * Tokenize a URL
	92	+ */
	93	+PegTokenizer.prototype.tokenizeURL = function( text ) {
76	94	try {
77		~~- return this.parser.parse(text, 'url', null, this );~~
	95	+ return this.tokenizer.tokenize(text, 'url', null, this );
78	96	} catch ( e ) {
79	97	return false;
80	98	}

Status & tagging log

10:04, 8 March 2012 GWicke (talk | contribs) changed the status of r113351 [removed: new added: deferred]