r108462 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r108461‎ \| r108462 \| r108463 >
Date:	19:33, 9 January 2012
Author:	gwicke
Status:	deferred
Tags:
Comment:	A bit of cleanup in ParserPipeline, with better and more consistent support for multiple input types.
Modified paths:	/trunk/extensions/VisualEditor/modules/parser/ext.core.TemplateHandler.js (modified) (history) /trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.js (modified) (history) /trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js (modified) (history)

Diff [purge]

Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js
—	—	@@ -23,10 +23,10 @@
24	24
25	25	PegTokenizer.src = false;
26	26
27		~~-PegTokenizer.prototype.tokenize = function( text ) {~~
	27	+PegTokenizer.prototype.process = function( text ) {
28	28	var out, err;
29	29	if ( !this.parser ) {
30		~~- // Only create a single parser, as it is fully static.~~
	30	+ // Only create a single parser, as parse() is a static method.
31	31	PegTokenizer.prototype.parser = PEG.buildParser(this.src);
32	32	// add reference to this for event emission
33	33	// XXX: pass a cb into parse() instead, but need to modify pegjs a bit
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.js
—	—	@@ -20,19 +20,35 @@
21	21	DOMPostProcessor = require('./mediawiki.DOMPostProcessor.js').DOMPostProcessor,
22	22	DOMConverter = require('./mediawiki.DOMConverter.js').DOMConverter;
23	23
24		~~-function ParserPipeline( env ) {~~
25		~~- // Set up a simple parser pipeline.~~
	24	+/**
	25	+ * Set up a simple parser pipeline. There will be a single pipeline overall,
	26	+ * but there can be multiple sub-pipelines for template expansions etc, which
	27	+ * in turn differ by input type. The main input type will be fixed at
	28	+ * construction time though.
	29	+ *
	30	+ * @class
	31	+ * @constructor
	32	+ * @param {Object} Environment.
	33	+ */
	34	+function ParserPipeline( env, inputType ) {
26	35
27		~~- // XXX: create a full-fledged environment~~
	36	+ if ( ! inputType ) {
	37	+ // Actually the only one supported for now, but could also create
	38	+ // others for serialized tokens etc
	39	+ inputType = 'text/wiki';
	40	+ }
	41	+
	42	+
	43	+ // XXX: create a full-fledged environment based on
	44	+ // mediawiki.parser.environment.js.
28	45	if ( !env ) {
29	46	this.env = {};
30	47	} else {
31	48	this.env = env;
32	49	}
33	50
34		~~- // Create an input pipeline for the given input (for now fixed to~~
35		~~- // text/wiki).~~
36		~~- this.inputPipeline = this.makeInputPipeline( 'text/wiki', {} );~~
	51	+ // Create an input pipeline for the given input type.
	52	+ this.inputPipeline = this.makeInputPipeline ( inputType );
37	53
38	54
39	55	this.tokenPostProcessor = new TokenTransformManager.SyncTokenTransformManager ( env );
—	—	@@ -56,10 +72,11 @@
57	73	* Final processing on the HTML DOM.
58	74	*/
59	75
60		~~- // Generic DOM transformer.~~
61		~~- // This currently performs minor tree-dependent clean up like wrapping~~
62		~~- // plain text in paragraphs. For HTML output, it would also be configured~~
63		~~- // to perform more aggressive nesting cleanup.~~
	76	+ /* Generic DOM transformer.
	77	+ * This currently performs minor tree-dependent clean up like wrapping
	78	+ * plain text in paragraphs. For HTML output, it would also be configured
	79	+ * to perform more aggressive nesting cleanup.
	80	+ */
64	81	this.postProcessor = new DOMPostProcessor();
65	82	this.postProcessor.listenForDocumentFrom( this.treeBuilder );
66	83
—	—	@@ -80,6 +97,20 @@
81	98	this.postProcessor.addListener( 'document', this.setDocumentProperty.bind( this ) );
82	99	}
83	100
	101	+/**
	102	+ * Factory method for the input (up to async token transforms / phase two)
	103	+ * parts of the parser pipeline.
	104	+ *
	105	+ * @method
	106	+ * @param {String} Input type. Try 'text/wiki'.
	107	+ * @param {Object} Expanded template arguments to pass to the
	108	+ * AsyncTokenTransformManager.
	109	+ * @returns {Object} { first: <first stage>, last: AsyncTokenTransformManager }
	110	+ * First stage is supposed to implement a process() function
	111	+ * that can accept all input at once. The wikitext tokenizer for example
	112	+ * accepts the wiki text this way. The last stage of the input pipeline is
	113	+ * always an AsyncTokenTransformManager, which emits its output in events.
	114	+ */
84	115	ParserPipeline.prototype.makeInputPipeline = function ( inputType, args ) {
85	116	if ( inputType === 'text/wiki' ) {
86	117	var wikiTokenizer = new PegTokenizer();
—	—	@@ -100,11 +131,21 @@
101	132	} else {
102	133	throw "ParserPipeline.makeInputPipeline: Unsupported input type " + inputType;
103	134	}
104		-}
	135	+};
105	136
106		~~-ParserPipeline.prototype.parse = function ( text ) {~~
107		~~- // Set the pipeline in motion by feeding the tokenizer~~
108		~~- this.inputPipeline.first.tokenize( text );~~
	137	+
	138	+/**
	139	+ * Parse an input
	140	+ *
	141	+ * @method
	142	+ * @param {Mixed} All arguments are passed through to the underlying input
	143	+ * pipeline's first element's process() method. For a wikitext pipeline (the
	144	+ * default), this would be the wikitext to tokenize.
	145	+ */
	146	+ParserPipeline.prototype.parse = function ( ) {
	147	+ // Set the pipeline in motion by feeding the first element with the given
	148	+ // arguments.
	149	+ this.inputPipeline.first.process.apply( this.inputPipeline.first , arguments );
109	150	};
110	151
111	152	// XXX: Lame hack: set document property. Instead, emit events
—	—	@@ -115,7 +156,7 @@
116	157
117	158
118	159	// XXX: remove JSON serialization here, that should only be performed when
119		~~-// needed.~~
	160	+// needed (and normally without pretty-printing).
120	161	ParserPipeline.prototype.getWikiDom = function () {
121	162	return JSON.stringify(
122	163	this.DOMConverter.HTMLtoWiki( this.document.body ),
Index: trunk/extensions/VisualEditor/modules/parser/ext.core.TemplateHandler.js
—	—	@@ -1,6 +1,11 @@
2	2	/**
3		~~- * Template and template argument handling.~~
	3	+ * Template and template argument handling, first cut.
4	4	*
	5	+ * AsyncTokenTransformManager objects provide preprocessor-frame-like
	6	+ * functionality once template args etc are fully expanded, and isolate
	7	+ * individual transforms from concurrency issues. Template argument expansion
	8	+ * is performed using a structure managed in this extension.
	9	+ *
5	10	* @author Gabriel Wicke <gwicke@wikimedia.org>
6	11	* @author Brion Vibber <brion@wikimedia.org>
7	12	*/
—	—	@@ -45,11 +50,7 @@
46	51	// check for msg, msgnw, raw magics
47	52	// check for parser functions
48	53
49		~~- // create a new frame~~
50		~~- // XXX FIXME: create a new AsyncTokenTransformManager with default~~
51		~~- // transformations!~~
52		~~- //~~
53		~~- // nestedAsyncTokenTransformManager = this.manager.newChildPipeline( inputType, args );~~
	54	+ // create a new frame for argument and title expansions
54	55	var newFrame = {
55	56	args: {},
56	57	env: frame.env,
—	—	@@ -118,6 +119,13 @@
119	120	* target were expanded in frame.
120	121	*/
121	122	TemplateHandler.prototype._expandTemplate = function ( frame ) {
	123	+ // Create a new nested transformation pipeline for the input type
	124	+ // (includes the tokenizer and synchronous stage-1 transforms for
	125	+ // 'text/wiki' input).
	126	+ // Returned pipe (for now):
	127	+ // { first: tokenizer, last: AsyncTokenTransformManager }
	128	+ var pipe = this.manager.newChildPipeline( inputType, args );
	129	+
122	130	// Set up a pipeline:
123	131	// fetch template source -> tokenizer
124	132	// getInputPipeline( inputType )

Status & tagging log

23:50, 10 January 2012 GWicke (talk | contribs) changed the status of r108462 [removed: new added: deferred]