Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js |
— | — | @@ -23,10 +23,10 @@ |
24 | 24 | |
25 | 25 | PegTokenizer.src = false; |
26 | 26 | |
27 | | -PegTokenizer.prototype.tokenize = function( text ) { |
| 27 | +PegTokenizer.prototype.process = function( text ) { |
28 | 28 | var out, err; |
29 | 29 | if ( !this.parser ) { |
30 | | - // Only create a single parser, as it is fully static. |
| 30 | + // Only create a single parser, as parse() is a static method. |
31 | 31 | PegTokenizer.prototype.parser = PEG.buildParser(this.src); |
32 | 32 | // add reference to this for event emission |
33 | 33 | // XXX: pass a cb into parse() instead, but need to modify pegjs a bit |
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.js |
— | — | @@ -20,19 +20,35 @@ |
21 | 21 | DOMPostProcessor = require('./mediawiki.DOMPostProcessor.js').DOMPostProcessor, |
22 | 22 | DOMConverter = require('./mediawiki.DOMConverter.js').DOMConverter; |
23 | 23 | |
24 | | -function ParserPipeline( env ) { |
25 | | - // Set up a simple parser pipeline. |
| 24 | +/** |
| 25 | + * Set up a simple parser pipeline. There will be a single pipeline overall, |
| 26 | + * but there can be multiple sub-pipelines for template expansions etc, which |
| 27 | + * in turn differ by input type. The main input type will be fixed at |
| 28 | + * construction time though. |
| 29 | + * |
| 30 | + * @class |
| 31 | + * @constructor |
| 32 | + * @param {Object} Environment. |
| 33 | + */ |
| 34 | +function ParserPipeline( env, inputType ) { |
26 | 35 | |
27 | | - // XXX: create a full-fledged environment |
| 36 | + if ( ! inputType ) { |
| 37 | + // Actually the only one supported for now, but could also create |
| 38 | + // others for serialized tokens etc |
| 39 | + inputType = 'text/wiki'; |
| 40 | + } |
| 41 | + |
| 42 | + |
| 43 | + // XXX: create a full-fledged environment based on |
| 44 | + // mediawiki.parser.environment.js. |
28 | 45 | if ( !env ) { |
29 | 46 | this.env = {}; |
30 | 47 | } else { |
31 | 48 | this.env = env; |
32 | 49 | } |
33 | 50 | |
34 | | - // Create an input pipeline for the given input (for now fixed to |
35 | | - // text/wiki). |
36 | | - this.inputPipeline = this.makeInputPipeline( 'text/wiki', {} ); |
| 51 | + // Create an input pipeline for the given input type. |
| 52 | + this.inputPipeline = this.makeInputPipeline ( inputType ); |
37 | 53 | |
38 | 54 | |
39 | 55 | this.tokenPostProcessor = new TokenTransformManager.SyncTokenTransformManager ( env ); |
— | — | @@ -56,10 +72,11 @@ |
57 | 73 | * Final processing on the HTML DOM. |
58 | 74 | */ |
59 | 75 | |
60 | | - // Generic DOM transformer. |
61 | | - // This currently performs minor tree-dependent clean up like wrapping |
62 | | - // plain text in paragraphs. For HTML output, it would also be configured |
63 | | - // to perform more aggressive nesting cleanup. |
| 76 | + /* Generic DOM transformer. |
| 77 | + * This currently performs minor tree-dependent clean up like wrapping |
| 78 | + * plain text in paragraphs. For HTML output, it would also be configured |
| 79 | + * to perform more aggressive nesting cleanup. |
| 80 | + */ |
64 | 81 | this.postProcessor = new DOMPostProcessor(); |
65 | 82 | this.postProcessor.listenForDocumentFrom( this.treeBuilder ); |
66 | 83 | |
— | — | @@ -80,6 +97,20 @@ |
81 | 98 | this.postProcessor.addListener( 'document', this.setDocumentProperty.bind( this ) ); |
82 | 99 | } |
83 | 100 | |
| 101 | +/** |
| 102 | + * Factory method for the input (up to async token transforms / phase two) |
| 103 | + * parts of the parser pipeline. |
| 104 | + * |
| 105 | + * @method |
| 106 | + * @param {String} Input type. Try 'text/wiki'. |
| 107 | + * @param {Object} Expanded template arguments to pass to the |
| 108 | + * AsyncTokenTransformManager. |
| 109 | + * @returns {Object} { first: <first stage>, last: AsyncTokenTransformManager } |
| 110 | + * First stage is supposed to implement a process() function |
| 111 | + * that can accept all input at once. The wikitext tokenizer for example |
| 112 | + * accepts the wiki text this way. The last stage of the input pipeline is |
| 113 | + * always an AsyncTokenTransformManager, which emits its output in events. |
| 114 | + */ |
84 | 115 | ParserPipeline.prototype.makeInputPipeline = function ( inputType, args ) { |
85 | 116 | if ( inputType === 'text/wiki' ) { |
86 | 117 | var wikiTokenizer = new PegTokenizer(); |
— | — | @@ -100,11 +131,21 @@ |
101 | 132 | } else { |
102 | 133 | throw "ParserPipeline.makeInputPipeline: Unsupported input type " + inputType; |
103 | 134 | } |
104 | | -} |
| 135 | +}; |
105 | 136 | |
106 | | -ParserPipeline.prototype.parse = function ( text ) { |
107 | | - // Set the pipeline in motion by feeding the tokenizer |
108 | | - this.inputPipeline.first.tokenize( text ); |
| 137 | + |
| 138 | +/** |
| 139 | + * Parse an input |
| 140 | + * |
| 141 | + * @method |
| 142 | + * @param {Mixed} All arguments are passed through to the underlying input |
| 143 | + * pipeline's first element's process() method. For a wikitext pipeline (the |
| 144 | + * default), this would be the wikitext to tokenize. |
| 145 | + */ |
| 146 | +ParserPipeline.prototype.parse = function ( ) { |
| 147 | + // Set the pipeline in motion by feeding the first element with the given |
| 148 | + // arguments. |
| 149 | + this.inputPipeline.first.process.apply( this.inputPipeline.first , arguments ); |
109 | 150 | }; |
110 | 151 | |
111 | 152 | // XXX: Lame hack: set document property. Instead, emit events |
— | — | @@ -115,7 +156,7 @@ |
116 | 157 | |
117 | 158 | |
118 | 159 | // XXX: remove JSON serialization here, that should only be performed when |
119 | | -// needed. |
| 160 | +// needed (and normally without pretty-printing). |
120 | 161 | ParserPipeline.prototype.getWikiDom = function () { |
121 | 162 | return JSON.stringify( |
122 | 163 | this.DOMConverter.HTMLtoWiki( this.document.body ), |
Index: trunk/extensions/VisualEditor/modules/parser/ext.core.TemplateHandler.js |
— | — | @@ -1,6 +1,11 @@ |
2 | 2 | /** |
3 | | - * Template and template argument handling. |
| 3 | + * Template and template argument handling, first cut. |
4 | 4 | * |
| 5 | + * AsyncTokenTransformManager objects provide preprocessor-frame-like |
| 6 | + * functionality once template args etc are fully expanded, and isolate |
| 7 | + * individual transforms from concurrency issues. Template argument expansion |
| 8 | + * is performed using a structure managed in this extension. |
| 9 | + * |
5 | 10 | * @author Gabriel Wicke <gwicke@wikimedia.org> |
6 | 11 | * @author Brion Vibber <brion@wikimedia.org> |
7 | 12 | */ |
— | — | @@ -45,11 +50,7 @@ |
46 | 51 | // check for msg, msgnw, raw magics |
47 | 52 | // check for parser functions |
48 | 53 | |
49 | | - // create a new frame |
50 | | - // XXX FIXME: create a new AsyncTokenTransformManager with default |
51 | | - // transformations! |
52 | | - // |
53 | | - // nestedAsyncTokenTransformManager = this.manager.newChildPipeline( inputType, args ); |
| 54 | + // create a new frame for argument and title expansions |
54 | 55 | var newFrame = { |
55 | 56 | args: {}, |
56 | 57 | env: frame.env, |
— | — | @@ -118,6 +119,13 @@ |
119 | 120 | * target were expanded in frame. |
120 | 121 | */ |
121 | 122 | TemplateHandler.prototype._expandTemplate = function ( frame ) { |
| 123 | + // Create a new nested transformation pipeline for the input type |
| 124 | + // (includes the tokenizer and synchronous stage-1 transforms for |
| 125 | + // 'text/wiki' input). |
| 126 | + // Returned pipe (for now): |
| 127 | + // { first: tokenizer, last: AsyncTokenTransformManager } |
| 128 | + var pipe = this.manager.newChildPipeline( inputType, args ); |
| 129 | + |
122 | 130 | // Set up a pipeline: |
123 | 131 | // fetch template source -> tokenizer |
124 | 132 | // getInputPipeline( inputType ) |