r108462 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r108461‎ | r108462 | r108463 >
Date:19:33, 9 January 2012
Author:gwicke
Status:deferred
Tags:
Comment:
A bit of cleanup in ParserPipeline, with better and more consistent support
for multiple input types.
Modified paths:
  • /trunk/extensions/VisualEditor/modules/parser/ext.core.TemplateHandler.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js (modified) (history)

Diff [purge]

Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js
@@ -23,10 +23,10 @@
2424
2525 PegTokenizer.src = false;
2626
27 -PegTokenizer.prototype.tokenize = function( text ) {
 27+PegTokenizer.prototype.process = function( text ) {
2828 var out, err;
2929 if ( !this.parser ) {
30 - // Only create a single parser, as it is fully static.
 30+ // Only create a single parser, as parse() is a static method.
3131 PegTokenizer.prototype.parser = PEG.buildParser(this.src);
3232 // add reference to this for event emission
3333 // XXX: pass a cb into parse() instead, but need to modify pegjs a bit
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.js
@@ -20,19 +20,35 @@
2121 DOMPostProcessor = require('./mediawiki.DOMPostProcessor.js').DOMPostProcessor,
2222 DOMConverter = require('./mediawiki.DOMConverter.js').DOMConverter;
2323
24 -function ParserPipeline( env ) {
25 - // Set up a simple parser pipeline.
 24+/**
 25+ * Set up a simple parser pipeline. There will be a single pipeline overall,
 26+ * but there can be multiple sub-pipelines for template expansions etc, which
 27+ * in turn differ by input type. The main input type will be fixed at
 28+ * construction time though.
 29+ *
 30+ * @class
 31+ * @constructor
 32+ * @param {Object} Environment.
 33+ */
 34+function ParserPipeline( env, inputType ) {
2635
27 - // XXX: create a full-fledged environment
 36+ if ( ! inputType ) {
 37+ // Actually the only one supported for now, but could also create
 38+ // others for serialized tokens etc
 39+ inputType = 'text/wiki';
 40+ }
 41+
 42+
 43+ // XXX: create a full-fledged environment based on
 44+ // mediawiki.parser.environment.js.
2845 if ( !env ) {
2946 this.env = {};
3047 } else {
3148 this.env = env;
3249 }
3350
34 - // Create an input pipeline for the given input (for now fixed to
35 - // text/wiki).
36 - this.inputPipeline = this.makeInputPipeline( 'text/wiki', {} );
 51+ // Create an input pipeline for the given input type.
 52+ this.inputPipeline = this.makeInputPipeline ( inputType );
3753
3854
3955 this.tokenPostProcessor = new TokenTransformManager.SyncTokenTransformManager ( env );
@@ -56,10 +72,11 @@
5773 * Final processing on the HTML DOM.
5874 */
5975
60 - // Generic DOM transformer.
61 - // This currently performs minor tree-dependent clean up like wrapping
62 - // plain text in paragraphs. For HTML output, it would also be configured
63 - // to perform more aggressive nesting cleanup.
 76+ /* Generic DOM transformer.
 77+ * This currently performs minor tree-dependent clean up like wrapping
 78+ * plain text in paragraphs. For HTML output, it would also be configured
 79+ * to perform more aggressive nesting cleanup.
 80+ */
6481 this.postProcessor = new DOMPostProcessor();
6582 this.postProcessor.listenForDocumentFrom( this.treeBuilder );
6683
@@ -80,6 +97,20 @@
8198 this.postProcessor.addListener( 'document', this.setDocumentProperty.bind( this ) );
8299 }
83100
 101+/**
 102+ * Factory method for the input (up to async token transforms / phase two)
 103+ * parts of the parser pipeline.
 104+ *
 105+ * @method
 106+ * @param {String} Input type. Try 'text/wiki'.
 107+ * @param {Object} Expanded template arguments to pass to the
 108+ * AsyncTokenTransformManager.
 109+ * @returns {Object} { first: <first stage>, last: AsyncTokenTransformManager }
 110+ * First stage is supposed to implement a process() function
 111+ * that can accept all input at once. The wikitext tokenizer for example
 112+ * accepts the wiki text this way. The last stage of the input pipeline is
 113+ * always an AsyncTokenTransformManager, which emits its output in events.
 114+ */
84115 ParserPipeline.prototype.makeInputPipeline = function ( inputType, args ) {
85116 if ( inputType === 'text/wiki' ) {
86117 var wikiTokenizer = new PegTokenizer();
@@ -100,11 +131,21 @@
101132 } else {
102133 throw "ParserPipeline.makeInputPipeline: Unsupported input type " + inputType;
103134 }
104 -}
 135+};
105136
106 -ParserPipeline.prototype.parse = function ( text ) {
107 - // Set the pipeline in motion by feeding the tokenizer
108 - this.inputPipeline.first.tokenize( text );
 137+
 138+/**
 139+ * Parse an input
 140+ *
 141+ * @method
 142+ * @param {Mixed} All arguments are passed through to the underlying input
 143+ * pipeline's first element's process() method. For a wikitext pipeline (the
 144+ * default), this would be the wikitext to tokenize.
 145+ */
 146+ParserPipeline.prototype.parse = function ( ) {
 147+ // Set the pipeline in motion by feeding the first element with the given
 148+ // arguments.
 149+ this.inputPipeline.first.process.apply( this.inputPipeline.first , arguments );
109150 };
110151
111152 // XXX: Lame hack: set document property. Instead, emit events
@@ -115,7 +156,7 @@
116157
117158
118159 // XXX: remove JSON serialization here, that should only be performed when
119 -// needed.
 160+// needed (and normally without pretty-printing).
120161 ParserPipeline.prototype.getWikiDom = function () {
121162 return JSON.stringify(
122163 this.DOMConverter.HTMLtoWiki( this.document.body ),
Index: trunk/extensions/VisualEditor/modules/parser/ext.core.TemplateHandler.js
@@ -1,6 +1,11 @@
22 /**
3 - * Template and template argument handling.
 3+ * Template and template argument handling, first cut.
44 *
 5+ * AsyncTokenTransformManager objects provide preprocessor-frame-like
 6+ * functionality once template args etc are fully expanded, and isolate
 7+ * individual transforms from concurrency issues. Template argument expansion
 8+ * is performed using a structure managed in this extension.
 9+ *
510 * @author Gabriel Wicke <gwicke@wikimedia.org>
611 * @author Brion Vibber <brion@wikimedia.org>
712 */
@@ -45,11 +50,7 @@
4651 // check for msg, msgnw, raw magics
4752 // check for parser functions
4853
49 - // create a new frame
50 - // XXX FIXME: create a new AsyncTokenTransformManager with default
51 - // transformations!
52 - //
53 - // nestedAsyncTokenTransformManager = this.manager.newChildPipeline( inputType, args );
 54+ // create a new frame for argument and title expansions
5455 var newFrame = {
5556 args: {},
5657 env: frame.env,
@@ -118,6 +119,13 @@
119120 * target were expanded in frame.
120121 */
121122 TemplateHandler.prototype._expandTemplate = function ( frame ) {
 123+ // Create a new nested transformation pipeline for the input type
 124+ // (includes the tokenizer and synchronous stage-1 transforms for
 125+ // 'text/wiki' input).
 126+ // Returned pipe (for now):
 127+ // { first: tokenizer, last: AsyncTokenTransformManager }
 128+ var pipe = this.manager.newChildPipeline( inputType, args );
 129+
122130 // Set up a pipeline:
123131 // fetch template source -> tokenizer
124132 // getInputPipeline( inputType )

Status & tagging log