r108435 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r108434‎ | r108435 | r108436 >
Date:17:49, 9 January 2012
Author:gwicke
Status:deferred
Tags:
Comment:
Two batteries worth of token transform manager refactoring.

* TokenTransformDispatcher is now renamed to TokenTransformManager, and is
also turned into a base class
* SyncTokenTransformManager and AsyncTokenTransformManager subclass
TokenTransformManager and implement synchronous (phase 1,3) and asynchronous
(phase 2) transformation stages.
* Communication between stages uses the same chunk / end events as all the
other token stages.
* The AsyncTokenTransformManager now supports the creation of nested
AsyncTokenTransformManagers for template expansion.
The AsyncTokenTransformManager object takes on the responsibilities of a
preprocessor frame. Transforms are newly created (or potentially resurrected
from a cache), so that transforms do not have to worry about concurrency.
* The environment is pushed through to all transform managers and the
individual transforms.
Modified paths:
  • /trunk/extensions/VisualEditor/modules/parser/ext.Cite.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/ext.core.QuoteTransformer.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/ext.core.TemplateHandler.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.TokenTransformDispatcher.js (deleted) (history)
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.TokenTransformManager.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js (modified) (history)

Diff [purge]

Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.TokenTransformDispatcher.js
@@ -1,625 +0,0 @@
2 -/* Generic token transformation dispatcher with support for asynchronous token
3 - * expansion. Individual transformations register for the token types they are
4 - * interested in and are called on each matching token.
5 - *
6 - * See
7 - * https://www.mediawiki.org/wiki/Future/Parser_development/Token_stream_transformations
8 - * for more documentation.
9 - *
10 - * @author Gabriel Wicke <gwicke@wikimedia.org>
11 - */
12 -
13 -var events = require('events');
14 -
15 -/**
16 - * Central dispatcher and manager for potentially asynchronous token
17 - * transformations.
18 - *
19 - * @class
20 - * @constructor
21 - * @param {Function} callback, a callback function accepting a token list as
22 - * its only argument.
23 - */
24 -function TokenTransformDispatcher( ) {
25 - this.transformers = {
26 - // phase 0 and 1, rank 2 marks tokens as fully processed for these
27 - // phases.
28 - 2: {
29 - tag: {}, // for TAG, ENDTAG, SELFCLOSINGTAG, keyed on name
30 - text: [],
31 - newline: [],
32 - comment: [],
33 - end: [], // eof
34 - martian: [], // none of the above (unknown token type)
35 - any: [] // all tokens, before more specific handlers are run
36 - },
37 - // phase 3, with ranks >= 2 but < 3. 3 marks tokens as fully
38 - // processed.
39 - 3: {
40 - tag: {}, // for TAG, ENDTAG, SELFCLOSINGTAG, keyed on name
41 - text: [],
42 - newline: [],
43 - comment: [],
44 - end: [], // eof
45 - martian: [], // none of the above (unknown token type)
46 - any: [] // all tokens, before more specific handlers are run
47 - }
48 - };
49 - this._reset();
50 -}
51 -
52 -// Inherit from EventEmitter
53 -TokenTransformDispatcher.prototype = new events.EventEmitter();
54 -TokenTransformDispatcher.prototype.constructor = TokenTransformDispatcher;
55 -
56 -/**
57 - * Register to a token source, normally the tokenizer.
58 - * The event emitter emits a 'chunk' event with a chunk of tokens,
59 - * and signals the end of tokens by triggering the 'end' event.
60 - *
61 - * @param {Object} EventEmitter token even emitter.
62 - */
63 -TokenTransformDispatcher.prototype.listenForTokensFrom = function ( tokenEmitter ) {
64 - tokenEmitter.addListener('chunk', this.transformTokens.bind( this ) );
65 - tokenEmitter.addListener('end', this.onEndEvent.bind( this ) );
66 -};
67 -
68 -
69 -/**
70 - * Reset the internal token and outstanding-callback state of the
71 - * TokenTransformDispatcher, but keep registrations untouched.
72 - *
73 - * @method
74 - */
75 -TokenTransformDispatcher.prototype._reset = function ( env ) {
76 - this.tailAccumulator = undefined;
77 - this.phase2TailCB = this._returnTokens01.bind( this );
78 - this.accum = new TokenAccumulator(null);
79 - this.firstaccum = this.accum;
80 - this.prevToken = undefined;
81 - this.frame = {
82 - args: {}, // no arguments at the top level
83 - env: this.env
84 - };
85 - // Should be as static as possible re this and frame
86 - // This is circular, but that should not really matter for non-broken GCs
87 - // that handle pure JS ref loops.
88 - this.frame.transformPhase = this._transformPhase01.bind( this, this.frame );
89 -};
90 -
91 -TokenTransformDispatcher.prototype._rankToPhase = function ( rank ) {
92 - if ( rank < 0 || rank > 3 ) {
93 - throw "TransformDispatcher error: Invalid transformation rank " + rank;
94 - }
95 - if ( rank <= 2 ) {
96 - return 2;
97 - } else {
98 - return 3;
99 - }
100 -};
101 -
102 -/**
103 - * Add a transform registration.
104 - *
105 - * @method
106 - * @param {Function} transform.
107 - * @param {Number} rank, [0,3) with [0,1) in-order on input token stream,
108 - * [1,2) out-of-order and [2,3) in-order on output token stream
109 - * @param {String} type, one of 'tag', 'text', 'newline', 'comment', 'end',
110 - * 'martian' (unknown token), 'any' (any token, matched before other matches).
111 - * @param {String} tag name for tags, omitted for non-tags
112 - */
113 -TokenTransformDispatcher.prototype.addTransform = function ( transformation, rank, type, name ) {
114 - var phase = this._rankToPhase( rank ),
115 - transArr,
116 - transformer = {
117 - transform: transformation,
118 - rank: rank
119 - };
120 - if ( type === 'tag' ) {
121 - name = name.toLowerCase();
122 - transArr = this.transformers[phase].tag[name];
123 - if ( ! transArr ) {
124 - transArr = this.transformers[phase].tag[name] = [];
125 - }
126 - } else {
127 - transArr = this.transformers[phase][type];
128 - }
129 - transArr.push(transformer);
130 - // sort ascending by rank
131 - transArr.sort( this._cmpTransformations );
132 -};
133 -
134 -/**
135 - * Remove a transform registration
136 - *
137 - * @method
138 - * @param {Function} transform.
139 - * @param {Number} rank, [0,3) with [0,1) in-order on input token stream,
140 - * [1,2) out-of-order and [2,3) in-order on output token stream
141 - * @param {String} type, one of 'tag', 'text', 'newline', 'comment', 'end',
142 - * 'martian' (unknown token), 'any' (any token, matched before other matches).
143 - * @param {String} tag name for tags, omitted for non-tags
144 - */
145 -TokenTransformDispatcher.prototype.removeTransform = function ( rank, type, name ) {
146 - var i = -1,
147 - phase = this._rankToPhase( rank ),
148 - ts;
149 -
150 - function rankUnEqual ( i ) {
151 - return i.rank !== rank;
152 - }
153 -
154 - if ( type === 'tag' ) {
155 - name = name.toLowerCase();
156 - var maybeTransArr = this.transformers[phase].tag.name;
157 - if ( maybeTransArr ) {
158 - this.transformers[phase].tag.name = maybeTransArr.filter( rankUnEqual );
159 - }
160 - } else {
161 - this.transformers[phase][type] = this.transformers[phase][type].filter( rankUnEqual ) ;
162 - }
163 -};
164 -
165 -/**
166 - * Enforce separation between phases when token types or tag names have
167 - * changed, or when multiple tokens were returned. Processing will restart
168 - * with the new rank.
169 - */
170 -TokenTransformDispatcher.prototype._resetTokenRank = function ( res, transformer ) {
171 - if ( res.token ) {
172 - // reset rank after type or name change
173 - if ( transformer.rank < 1 ) {
174 - res.token.rank = 0;
175 - } else {
176 - res.token.rank = 1;
177 - }
178 - } else if ( res.tokens && transformer.rank > 2 ) {
179 - for ( var i = 0; i < res.tokens.length; i++ ) {
180 - if ( res.tokens[i].rank === undefined ) {
181 - // Do not run phase 0 on newly created tokens from
182 - // phase 1.
183 - res.tokens[i].rank = 2;
184 - }
185 - }
186 - }
187 -};
188 -
189 -/**
190 - * Comparison for sorting transformations by ascending rank.
191 - */
192 -TokenTransformDispatcher.prototype._cmpTransformations = function ( a, b ) {
193 - return a.rank - b.rank;
194 -};
195 -
196 -/* Call all transformers on a tag.
197 - *
198 - * @method
199 - * @param {Object} The current token.
200 - * @param {Function} Completion callback for async processing.
201 - * @param {Number} Rank of phase end, both key for transforms and rank for
202 - * processed tokens.
203 - * @param {Object} The frame, contains a reference to the environment.
204 - * @returns {Object} Token(s) and async indication.
205 - */
206 -TokenTransformDispatcher.prototype._transformTagToken = function ( token, cb, phaseEndRank, frame ) {
207 - // prepend 'any' transformers
208 - var ts = this.transformers[phaseEndRank].any,
209 - res = { token: token },
210 - transform,
211 - l, i,
212 - aborted = false,
213 - tName = token.name.toLowerCase(),
214 - tagts = this.transformers[phaseEndRank].tag[tName];
215 -
216 - if ( tagts && tagts.length ) {
217 - // could cache this per tag type to avoid re-sorting each time
218 - ts = ts.concat(tagts);
219 - ts.sort( this._cmpTransformations );
220 - }
221 - //console.log(JSON.stringify(ts, null, 2));
222 - if ( ts ) {
223 - for ( i = 0, l = ts.length; i < l; i++ ) {
224 - transformer = ts[i];
225 - if ( res.token.rank && transformer.rank <= res.token.rank ) {
226 - // skip transformation, was already applied.
227 - continue;
228 - }
229 - // Transform token with side effects
230 - res = transformer.transform( res.token, cb, frame, this.prevToken );
231 - // if multiple tokens or null token: process returned tokens (in parent)
232 - if ( !res.token || // async implies tokens instead of token, so no
233 - // need to check explicitly
234 - res.token.type !== token.type ||
235 - res.token.name !== token.name ) {
236 - this._resetTokenRank ( res, transformer );
237 - aborted = true;
238 - break;
239 - }
240 - // track progress on token
241 - res.token.rank = transformer.rank;
242 - }
243 - if ( ! aborted ) {
244 - // Mark token as fully processed.
245 - res.token.rank = phaseEndRank;
246 - }
247 - }
248 - return res;
249 -};
250 -
251 -/* Call all transformers on non-tag token types.
252 - *
253 - * @method
254 - * @param {Object} The current token.
255 - * @param {Function} Completion callback for async processing.
256 - * @param {Number} Rank of phase end, both key for transforms and rank for
257 - * processed tokens.
258 - * @param {Object} The frame, contains a reference to the environment.
259 - * @param {Array} ts List of token transformers for this token type.
260 - * @returns {Object} Token(s) and async indication.
261 - */
262 -TokenTransformDispatcher.prototype._transformToken = function ( token, cb, phaseEndRank, frame, ts ) {
263 - // prepend 'any' transformers
264 - var anyTrans = this.transformers[phaseEndRank].any;
265 - if ( anyTrans.length ) {
266 - ts = this.transformers[phaseEndRank].any.concat(ts);
267 - ts.sort( this._cmpTransformations );
268 - }
269 - var transformer,
270 - res = { token: token },
271 - aborted = false;
272 - if ( ts ) {
273 - for (var i = 0, l = ts.length; i < l; i++ ) {
274 - transformer = ts[i];
275 - if ( res.token.rank && transformer.rank <= res.token.rank ) {
276 - // skip transformation, was already applied.
277 - continue;
278 - }
279 - // Transform the token.
280 - // XXX: consider moving the rank out of the token itself to avoid
281 - // transformations messing with it in broken ways. Not sure if
282 - // some transformations need to manipulate it though. gwicke
283 - res = transformer.transform( res.token, cb, frame, this.prevToken );
284 - if ( !res.token ||
285 - res.token.type !== token.type ) {
286 - this._resetTokenRank ( res, transformer );
287 - aborted = true;
288 - break;
289 - }
290 - res.token.rank = transformer.rank;
291 - }
292 - if ( ! aborted ) {
293 - // mark token as completely processed
294 - res.token.rank = phaseEndRank; // need phase passed in!
295 - }
296 -
297 - }
298 - return res;
299 -};
300 -
301 -/**
302 - * Transform and expand tokens.
303 - *
304 - * Callback for token chunks emitted from the tokenizer.
305 - */
306 -TokenTransformDispatcher.prototype.transformTokens = function ( tokens ) {
307 - //console.log('TokenTransformDispatcher transformTokens');
308 - var res = this._transformPhase01 ( this.frame, tokens, this.phase2TailCB );
309 - this.phase2TailCB( tokens, true );
310 - if ( res.async ) {
311 - this.tailAccumulator = res.async;
312 - this.phase2TailCB = res.async.getParentCB ( 'sibling' );
313 - }
314 -};
315 -
316 -/**
317 - * Callback for the end event emitted from the tokenizer.
318 - * Either signals the end of input to the tail of an ongoing asynchronous
319 - * processing pipeline, or directly emits 'end' if the processing was fully
320 - * synchronous.
321 - */
322 -TokenTransformDispatcher.prototype.onEndEvent = function () {
323 - if ( this.tailAccumulator ) {
324 - this.tailAccumulator.siblingDone();
325 - } else {
326 - // nothing was asynchronous, so we'll have to emit end here.
327 - this.emit('end');
328 - this._reset();
329 - }
330 -};
331 -
332 -
333 -/**
334 - * Run transformations from phases 0 and 1. This includes starting and
335 - * managing asynchronous transformations.
336 - *
337 - * return protocol for transform*Token:
338 - * { tokens: [tokens], async: true }: async expansion -> outstanding++ in parent
339 - * { tokens: [tokens] }: fully expanded, tokens will be reprocessed
340 - * { token: token }: single-token return
341 - */
342 -TokenTransformDispatcher.prototype._transformPhase01 = function ( frame, tokens, parentCB ) {
343 -
344 - //console.log('_transformPhase01: ' + JSON.stringify(tokens) );
345 -
346 - var res,
347 - phaseEndRank = 2,
348 - // Prepare a new accumulator, to be used by async children (if any)
349 - localAccum = [],
350 - accum = new TokenAccumulator( parentCB ),
351 - cb = accum.getParentCB( 'child' ),
352 - activeAccum = null,
353 - tokensLength = tokens.length,
354 - token,
355 - ts = this.transformers[phaseEndRank];
356 -
357 - for ( var i = 0; i < tokensLength; i++ ) {
358 - token = tokens[i];
359 -
360 - switch( token.type ) {
361 - case 'TAG':
362 - case 'ENDTAG':
363 - case 'SELFCLOSINGTAG':
364 - res = this._transformTagToken( token, cb, phaseEndRank, frame );
365 - break;
366 - case 'TEXT':
367 - res = this._transformToken( token, cb, phaseEndRank, frame, ts.text );
368 - break;
369 - case 'COMMENT':
370 - res = this._transformToken( token, cb, phaseEndRank, frame, ts.comment);
371 - break;
372 - case 'NEWLINE':
373 - res = this._transformToken( token, cb, phaseEndRank, frame, ts.newline );
374 - break;
375 - case 'END':
376 - res = this._transformToken( token, cb, phaseEndRank, frame, ts.end );
377 - break;
378 - default:
379 - res = this._transformToken( token, cb, phaseEndRank, frame, ts.martian );
380 - break;
381 - }
382 -
383 - if( res.tokens ) {
384 - // Splice in the returned tokens (while replacing the original
385 - // token), and process them next.
386 - [].splice.apply( tokens, [i, 1].concat(res.tokens) );
387 - tokensLength = tokens.length;
388 - i--; // continue at first inserted token
389 - } else if ( res.token ) {
390 - if ( res.token.rank === 2 ) {
391 - // token is done.
392 - if ( activeAccum ) {
393 - // push to accumulator
394 - activeAccum.push( res.token );
395 - } else {
396 - // If there is no accumulator yet, then directly return the
397 - // token to the parent. Collect them in localAccum for this
398 - // purpose.
399 - localAccum.push(res.token);
400 - }
401 - } else {
402 - // re-process token.
403 - tokens[i] = res.token;
404 - i--;
405 - }
406 - } else if ( res.async ) {
407 - // The child now switched to activeAccum, we have to create a new
408 - // accumulator for the next potential child.
409 - activeAccum = accum;
410 - accum = new TokenAccumulator( activeAccum.getParentCB( 'sibling' ) );
411 - cb = accum.getParentCB( 'child' );
412 - }
413 - }
414 -
415 - // Return finished tokens directly to caller, and indicate if further
416 - // async actions are outstanding. The caller needs to point a sibling to
417 - // the returned accumulator, or call .siblingDone() to mark the end of a
418 - // chain.
419 - return { tokens: localAccum, async: activeAccum };
420 -};
421 -
422 -/**
423 - * Callback from tokens fully processed for phase 0 and 1, which are now ready
424 - * for synchronous and globally in-order phase 2 processing.
425 - */
426 -TokenTransformDispatcher.prototype._returnTokens01 = function ( tokens, notYetDone ) {
427 - // FIXME: store frame in object?
428 - tokens = this._transformPhase2( this.frame, tokens, this.parentCB );
429 - //console.log('_returnTokens01, after _transformPhase2.');
430 -
431 - this.emit( 'chunk', tokens );
432 -
433 - if ( ! notYetDone ) {
434 - console.log('_returnTokens01 done.');
435 - // signal our done-ness to consumers.
436 - this.emit( 'end' );
437 - // and reset internal state.
438 - this._reset();
439 - }
440 -};
441 -
442 -
443 -/**
444 - * Phase 3 (rank [2,3))
445 - *
446 - * Global in-order traversal on expanded token stream (after async phase 1).
447 - * Very similar to _transformPhase01, but without async handling.
448 - */
449 -TokenTransformDispatcher.prototype._transformPhase2 = function ( frame, tokens, cb ) {
450 - var res,
451 - phaseEndRank = 3,
452 - localAccum = [],
453 - localAccumLength = 0,
454 - tokensLength = tokens.length,
455 - token,
456 - ts = this.transformers[phaseEndRank];
457 -
458 - for ( var i = 0; i < tokensLength; i++ ) {
459 - token = tokens[i];
460 -
461 - switch( token.type ) {
462 - case 'TAG':
463 - case 'ENDTAG':
464 - case 'SELFCLOSINGTAG':
465 - res = this._transformTagToken( token, cb, phaseEndRank,
466 - frame );
467 - break;
468 - case 'TEXT':
469 - res = this._transformToken( token, cb, phaseEndRank, frame,
470 - ts.text );
471 - break;
472 - case 'COMMENT':
473 - res = this._transformToken( token, cb, phaseEndRank, frame,
474 - ts.comment );
475 - break;
476 - case 'NEWLINE':
477 - res = this._transformToken( token, cb, phaseEndRank, frame,
478 - ts.newline );
479 - break;
480 - case 'END':
481 - res = this._transformToken( token, cb, phaseEndRank, frame,
482 - ts.end );
483 - break;
484 - default:
485 - res = this._transformToken( token, cb, phaseEndRank, frame,
486 - ts.martian );
487 - break;
488 - }
489 -
490 - if( res.tokens ) {
491 - // Splice in the returned tokens (while replacing the original
492 - // token), and process them next.
493 - [].splice.apply( tokens, [i, 1].concat(res.tokens) );
494 - tokensLength = tokens.length;
495 - i--; // continue at first inserted token
496 - } else if ( res.token ) {
497 - if ( res.token.rank === phaseEndRank ) {
498 - // token is done.
499 - localAccum.push(res.token);
500 - this.prevToken = res.token;
501 - } else {
502 - // re-process token.
503 - tokens[i] = res.token;
504 - i--;
505 - }
506 - }
507 - }
508 - return localAccum;
509 -};
510 -
511 -
512 -/**
513 - * Token accumulators buffer tokens between asynchronous processing points,
514 - * and return fully processed token chunks in-order and as soon as possible.
515 - *
516 - * @class
517 - * @constructor
518 - * @param {Object} next TokenAccumulator to link to
519 - * @param {Array} (optional) tokens, init accumulator with tokens or []
520 - */
521 -function TokenAccumulator ( parentCB ) {
522 - this.parentCB = parentCB;
523 - this.accum = [];
524 - // Wait for child and sibling by default
525 - // Note: Need to decrement outstanding on last accum
526 - // in a chain.
527 - this.outstanding = 2;
528 -}
529 -
530 -/**
531 - * Curry a parentCB with the object and reference.
532 - *
533 - * @method
534 - * @param {Object} TokenAccumulator
535 - * @param {misc} Reference / key for callback
536 - * @returns {Function}
537 - */
538 -TokenAccumulator.prototype.getParentCB = function ( reference ) {
539 - return this._returnTokens01.bind( this, reference );
540 -};
541 -
542 -/**
543 - * Pass tokens to an accumulator
544 - *
545 - * @method
546 - * @param {Object} token
547 - */
548 -TokenAccumulator.prototype._returnTokens01 = function ( reference, tokens, notYetDone ) {
549 - var res,
550 - cb,
551 - returnTokens = [];
552 -
553 - if ( ! notYetDone ) {
554 - this.outstanding--;
555 - }
556 -
557 - if ( reference === 'child' ) {
558 - // XXX: Use some marker to avoid re-transforming token chunks several
559 - // times?
560 - res = this._transformPhase01( this.frame, tokens, this.parentCB );
561 -
562 - if ( res.async ) {
563 - // new asynchronous expansion started, chain of accumulators
564 - // created
565 - if ( this.outstanding === 0 ) {
566 - // Last accum in chain should only wait for child
567 - res.async.outstanding--;
568 - cb = this.parentCB;
569 - } else {
570 - cb = this.parentCB;
571 - // set own callback to new sibling, the end of accumulator chain
572 - this.parentCB = res.async.getParentCB( 'sibling' );
573 - }
574 - }
575 - if ( ! notYetDone ) {
576 - // Child is done, return accumulator from sibling. Siblings
577 - // process tokens themselves, so we concat those to the result of
578 - // processing tokens from the child.
579 - tokens = res.tokens.concat( this.accum );
580 - this.accum = [];
581 - }
582 - this.cb( res.tokens, res.async );
583 - return null;
584 - } else {
585 - // sibling
586 - if ( this.outstanding === 0 ) {
587 - tokens = this.accum.concat( tokens );
588 - // A sibling will transform tokens, so we don't have to do this
589 - // again.
590 - this.parentCB( res.tokens, false );
591 - return null;
592 - } else if ( this.outstanding === 1 && notYetDone ) {
593 - // Sibling is not yet done, but child is. Return own parentCB to
594 - // allow the sibling to go direct, and call back parent with
595 - // tokens. The internal accumulator is empty at this stage, as its
596 - // tokens are passed to the parent when the child is done.
597 - return this.parentCB( tokens, true);
598 - }
599 -
600 -
601 - }
602 -};
603 -
604 -/**
605 - * Mark the sibling as done (normally at the tail of a chain).
606 - */
607 -TokenAccumulator.prototype.siblingDone = function () {
608 - this._returnTokens01 ( 'sibling', [], false );
609 -};
610 -
611 -
612 -/**
613 - * Push a token into the accumulator
614 - *
615 - * @method
616 - * @param {Object} token
617 - */
618 -TokenAccumulator.prototype.push = function ( token ) {
619 - return this.accum.push(token);
620 -};
621 -
622 -
623 -
624 -if (typeof module == "object") {
625 - module.exports.TokenTransformDispatcher = TokenTransformDispatcher;
626 -}
Index: trunk/extensions/VisualEditor/modules/parser/ext.core.QuoteTransformer.js
@@ -4,8 +4,9 @@
55 * @author Gabriel Wicke <gwicke@wikimedia.org>
66 */
77
8 -function QuoteTransformer ( ) {
 8+function QuoteTransformer ( dispatcher ) {
99 this.reset();
 10+ this.register( dispatcher );
1011 }
1112
1213 // constants
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js
@@ -26,9 +26,13 @@
2727 PegTokenizer.prototype.tokenize = function( text ) {
2828 var out, err;
2929 if ( !this.parser ) {
30 - this.parser = PEG.buildParser(this.src);
 30+ // Only create a single parser, as it is fully static.
 31+ PegTokenizer.prototype.parser = PEG.buildParser(this.src);
3132 // add reference to this for event emission
32 - this.parser._tokenizer = this;
 33+ // XXX: pass a cb into parse() instead, but need to modify pegjs a bit
 34+ // for that.
 35+ //PegTokenizer.prototype.parser._tokenizer = undefined;
 36+
3337 // Print the generated parser source
3438 //console.log(this.parser.toSource());
3539 }
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.TokenTransformManager.js
@@ -0,0 +1,736 @@
 2+/**
 3+ * Token transformation managers with a (mostly) abstract
 4+ * TokenTransformManager base class and AsyncTokenTransformManager and
 5+ * SyncTokenTransformManager implementation subclasses. Individual
 6+ * transformations register for the token types they are interested in and are
 7+ * called on each matching token.
 8+ *
 9+ * Async token transformations are supported by the TokenAccumulator class,
 10+ * that manages as-early-as-possible and in-order return of tokens including
 11+ * buffering.
 12+ *
 13+ * See
 14+ * https://www.mediawiki.org/wiki/Future/Parser_development/Token_stream_transformations
 15+ * for more documentation.
 16+ *
 17+ * @author Gabriel Wicke <gwicke@wikimedia.org>
 18+ */
 19+
 20+var events = require('events');
 21+
 22+/**
 23+ * Base class for token transform managers
 24+ *
 25+ * @class
 26+ * @constructor
 27+ * @param {Function} callback, a callback function accepting a token list as
 28+ * its only argument.
 29+ */
 30+function TokenTransformManager( ) {
 31+ // Separate the constructor, so that we can call it from subclasses.
 32+ this._construct();
 33+}
 34+
 35+// Inherit from EventEmitter
 36+TokenTransformManager.prototype = new events.EventEmitter();
 37+TokenTransformManager.prototype.constructor = TokenTransformManager;
 38+
 39+TokenTransformManager.prototype._construct = function () {
 40+ this.transformers = {
 41+ tag: {}, // for TAG, ENDTAG, SELFCLOSINGTAG, keyed on name
 42+ text: [],
 43+ newline: [],
 44+ comment: [],
 45+ end: [], // eof
 46+ martian: [], // none of the above (unknown token type)
 47+ any: [] // all tokens, before more specific handlers are run
 48+ };
 49+};
 50+
 51+/**
 52+ * Register to a token source, normally the tokenizer.
 53+ * The event emitter emits a 'chunk' event with a chunk of tokens,
 54+ * and signals the end of tokens by triggering the 'end' event.
 55+ * XXX: Perform registration directly in the constructor?
 56+ *
 57+ * @method
 58+ * @param {Object} EventEmitter token even emitter.
 59+ */
 60+TokenTransformManager.prototype.listenForTokensFrom = function ( tokenEmitter ) {
 61+ tokenEmitter.addListener('chunk', this.onChunk.bind( this ) );
 62+ tokenEmitter.addListener('end', this.onEndEvent.bind( this ) );
 63+};
 64+
 65+
 66+
 67+/**
 68+ * Map a rank to a phase.
 69+ *
 70+ * XXX: Might not be needed anymore, as phases are now subclassed and
 71+ * registrations are separated.
 72+ */
 73+TokenTransformManager.prototype._rankToPhase = function ( rank ) {
 74+ if ( rank < 0 || rank > 3 ) {
 75+ throw "TransformManager error: Invalid transformation rank " + rank;
 76+ }
 77+ if ( rank <= 2 ) {
 78+ return 2;
 79+ } else {
 80+ return 3;
 81+ }
 82+};
 83+
 84+/**
 85+ * Add a transform registration.
 86+ *
 87+ * @method
 88+ * @param {Function} transform.
 89+ * @param {Number} rank, [0,3) with [0,1) in-order on input token stream,
 90+ * [1,2) out-of-order and [2,3) in-order on output token stream
 91+ * @param {String} type, one of 'tag', 'text', 'newline', 'comment', 'end',
 92+ * 'martian' (unknown token), 'any' (any token, matched before other matches).
 93+ * @param {String} tag name for tags, omitted for non-tags
 94+ */
 95+TokenTransformManager.prototype.addTransform = function ( transformation, rank, type, name ) {
 96+ var transArr,
 97+ transformer = {
 98+ transform: transformation,
 99+ rank: rank
 100+ };
 101+ if ( type === 'tag' ) {
 102+ name = name.toLowerCase();
 103+ transArr = this.transformers.tag[name];
 104+ if ( ! transArr ) {
 105+ transArr = this.transformers.tag[name] = [];
 106+ }
 107+ } else {
 108+ transArr = this.transformers[type];
 109+ }
 110+ transArr.push(transformer);
 111+ // sort ascending by rank
 112+ transArr.sort( this._cmpTransformations );
 113+};
 114+
 115+/**
 116+ * Remove a transform registration
 117+ *
 118+ * @method
 119+ * @param {Function} transform.
 120+ * @param {Number} rank, [0,3) with [0,1) in-order on input token stream,
 121+ * [1,2) out-of-order and [2,3) in-order on output token stream
 122+ * @param {String} type, one of 'tag', 'text', 'newline', 'comment', 'end',
 123+ * 'martian' (unknown token), 'any' (any token, matched before other matches).
 124+ * @param {String} tag name for tags, omitted for non-tags
 125+ */
 126+TokenTransformManager.prototype.removeTransform = function ( rank, type, name ) {
 127+ var i = -1,
 128+ ts;
 129+
 130+ function rankUnEqual ( i ) {
 131+ return i.rank !== rank;
 132+ }
 133+
 134+ if ( type === 'tag' ) {
 135+ name = name.toLowerCase();
 136+ var maybeTransArr = this.transformers.tag.name;
 137+ if ( maybeTransArr ) {
 138+ this.transformers.tag.name = maybeTransArr.filter( rankUnEqual );
 139+ }
 140+ } else {
 141+ this.transformers[type] = this.transformers[type].filter( rankUnEqual ) ;
 142+ }
 143+};
 144+
 145+/**
 146+ * Enforce separation between phases when token types or tag names have
 147+ * changed, or when multiple tokens were returned. Processing will restart
 148+ * with the new rank.
 149+ *
 150+ * XXX: This should also be moved to the subclass (actually partially implicit if
 151+ * _transformTagToken and _transformToken are subclassed and set the rank when
 152+ * fully processed). The token type change case still needs to be covered
 153+ * though.
 154+ */
 155+TokenTransformManager.prototype._resetTokenRank = function ( res, transformer ) {
 156+ if ( res.token ) {
 157+ // reset rank after type or name change
 158+ if ( transformer.rank < 1 ) {
 159+ res.token.rank = 0;
 160+ } else {
 161+ res.token.rank = 1;
 162+ }
 163+ } else if ( res.tokens && transformer.rank > 2 ) {
 164+ for ( var i = 0; i < res.tokens.length; i++ ) {
 165+ if ( res.tokens[i].rank === undefined ) {
 166+ // Do not run phase 0 on newly created tokens from
 167+ // phase 1.
 168+ res.tokens[i].rank = 2;
 169+ }
 170+ }
 171+ }
 172+};
 173+
 174+/**
 175+ * Comparison for sorting transformations by ascending rank.
 176+ */
 177+TokenTransformManager.prototype._cmpTransformations = function ( a, b ) {
 178+ return a.rank - b.rank;
 179+};
 180+
 181+/* Call all transformers on a tag.
 182+ * XXX: Move to subclasses and use a different signature?
 183+ *
 184+ * @method
 185+ * @param {Object} The current token.
 186+ * @param {Function} Completion callback for async processing.
 187+ * @param {Number} Rank of phase end, both key for transforms and rank for
 188+ * processed tokens.
 189+ * @returns {Object} Token(s) and async indication.
 190+ */
 191+TokenTransformManager.prototype._transformTagToken = function ( token, cb, phaseEndRank ) {
 192+ // prepend 'any' transformers
 193+ var ts = this.transformers.any,
 194+ res = { token: token },
 195+ transform,
 196+ l, i,
 197+ aborted = false,
 198+ tName = token.name.toLowerCase(),
 199+ tagts = this.transformers.tag[tName];
 200+
 201+ if ( tagts && tagts.length ) {
 202+ // could cache this per tag type to avoid re-sorting each time
 203+ ts = ts.concat(tagts);
 204+ ts.sort( this._cmpTransformations );
 205+ }
 206+ //console.log(JSON.stringify(ts, null, 2));
 207+ if ( ts ) {
 208+ for ( i = 0, l = ts.length; i < l; i++ ) {
 209+ transformer = ts[i];
 210+ if ( res.token.rank && transformer.rank <= res.token.rank ) {
 211+ // skip transformation, was already applied.
 212+ continue;
 213+ }
 214+ // Transform token with side effects
 215+ res = transformer.transform( res.token, cb, this, this.prevToken );
 216+ // if multiple tokens or null token: process returned tokens (in parent)
 217+ if ( !res.token || // async implies tokens instead of token, so no
 218+ // need to check explicitly
 219+ res.token.type !== token.type ||
 220+ res.token.name !== token.name ) {
 221+ this._resetTokenRank ( res, transformer );
 222+ aborted = true;
 223+ break;
 224+ }
 225+ // track progress on token
 226+ res.token.rank = transformer.rank;
 227+ }
 228+ if ( ! aborted ) {
 229+ // Mark token as fully processed.
 230+ res.token.rank = phaseEndRank;
 231+ }
 232+ }
 233+ return res;
 234+};
 235+
 236+
 237+/* Call all transformers on non-tag token types.
 238+ * XXX: different signature for sync vs. async, move to subclass?
 239+ *
 240+ * @method
 241+ * @param {Object} The current token.
 242+ * @param {Function} Completion callback for async processing.
 243+ * @param {Number} Rank of phase end, both key for transforms and rank for
 244+ * processed tokens.
 245+ * @param {Array} ts List of token transformers for this token type.
 246+ * @returns {Object} Token(s) and async indication.
 247+ */
 248+TokenTransformManager.prototype._transformToken = function ( token, cb, phaseEndRank, ts ) {
 249+ // prepend 'any' transformers
 250+ var anyTrans = this.transformers.any;
 251+ if ( anyTrans.length ) {
 252+ ts = this.transformers.any.concat(ts);
 253+ ts.sort( this._cmpTransformations );
 254+ }
 255+ var transformer,
 256+ res = { token: token },
 257+ aborted = false;
 258+ if ( ts ) {
 259+ for (var i = 0, l = ts.length; i < l; i++ ) {
 260+ transformer = ts[i];
 261+ if ( res.token.rank && transformer.rank <= res.token.rank ) {
 262+ // skip transformation, was already applied.
 263+ continue;
 264+ }
 265+ // Transform the token.
 266+ // XXX: consider moving the rank out of the token itself to avoid
 267+ // transformations messing with it in broken ways. Not sure if
 268+ // some transformations need to manipulate it though. gwicke
 269+ res = transformer.transform( res.token, cb, this, this.prevToken );
 270+ if ( !res.token ||
 271+ res.token.type !== token.type ) {
 272+ this._resetTokenRank ( res, transformer );
 273+ aborted = true;
 274+ break;
 275+ }
 276+ res.token.rank = transformer.rank;
 277+ }
 278+ if ( ! aborted ) {
 279+ // mark token as completely processed
 280+ res.token.rank = phaseEndRank; // need phase passed in!
 281+ }
 282+
 283+ }
 284+ return res;
 285+};
 286+
 287+
 288+
 289+/******************** Async token transforms: Phase 2 **********************/
 290+
 291+/**
 292+ * Asynchronous and potentially out-of-order token transformations, used in phase 2.
 293+ *
 294+ * return protocol for individual transforms:
 295+ * { tokens: [tokens], async: true }: async expansion -> outstanding++ in parent
 296+ * { tokens: [tokens] }: fully expanded, tokens will be reprocessed
 297+ * { token: token }: single-token return
 298+ *
 299+ * @class
 300+ * @constructor
 301+ * @param {Function} childFactory: A function that can be used to create a
 302+ * new, nested transform manager:
 303+ * nestedAsyncTokenTransformManager = manager.newChildPipeline( inputType, args );
 304+ * @param {Object} args, the argument map for templates
 305+ * @param {Object} env, the environment.
 306+ */
 307+function AsyncTokenTransformManager ( childFactory, args, env ) {
 308+ // Factory function for new AsyncTokenTransformManager creation with
 309+ // default transforms enabled
 310+ // Also sets up a tokenizer and phase-1-transform depending on the input format
 311+ // nestedAsyncTokenTransformManager = manager.newChildPipeline( inputType, args );
 312+ this.childFactory = childFactory;
 313+ this._construct();
 314+ this._reset( args, env );
 315+}
 316+
 317+// Inherit from TokenTransformManager, and thus also from EventEmitter.
 318+AsyncTokenTransformManager.prototype = new TokenTransformManager();
 319+AsyncTokenTransformManager.prototype.constructor = AsyncTokenTransformManager;
 320+
 321+/**
 322+ * Create a new child pipeline.
 323+ *
 324+ * @method
 325+ * @param {String} Input type, currently only support 'text/wiki'.
 326+ * @param {Object} Template arguments
 327+ * @returns {Object} Pipeline, which is an object with 'first' pointing to the
 328+ * first stage of the pipeline, and 'last' pointing to the last stage.
 329+ */
 330+AsyncTokenTransformManager.prototype.newChildPipeline = function ( inputType, args ) {
 331+ var pipe = this.childFactory( inputType, args );
 332+ return pipe;
 333+};
 334+
 335+/**
 336+ * Reset the internal token and outstanding-callback state of the
 337+ * TokenTransformManager, but keep registrations untouched.
 338+ *
 339+ * @method
 340+ * @param {Object} args, template arguments
 341+ * @param {Object} The environment.
 342+ */
 343+AsyncTokenTransformManager.prototype._reset = function ( args, env ) {
 344+ // Note: Much of this is frame-like.
 345+ this.tailAccumulator = undefined;
 346+ // eventize: bend to event emitter callback
 347+ this.tokenCB = this._returnTokens.bind( this );
 348+ this.accum = new TokenAccumulator(null);
 349+ this.firstaccum = this.accum;
 350+ this.prevToken = undefined;
 351+ if ( ! args ) {
 352+ this.args = {}; // no arguments at the top level
 353+ } else {
 354+ this.args = args;
 355+ }
 356+ if ( ! env ) {
 357+ if ( !this.env ) {
 358+ throw "AsyncTokenTransformManager: environment needed!" + env;
 359+ }
 360+ } else {
 361+ this.env = env;
 362+ }
 363+};
 364+
 365+
 366+/**
 367+ * Transform and expand tokens. Transformed token chunks will be emitted in
 368+ * the 'chunk' event.
 369+ *
 370+ * @method
 371+ * @param {Array} chunk of tokens
 372+ */
 373+AsyncTokenTransformManager.prototype.onChunk = function ( tokens ) {
 374+ //console.log('TokenTransformManager onChunk');
 375+ // Set top-level callback to next transform phase
 376+ var res = this.transformTokens ( tokens, this.tokenCB );
 377+ this.tailAccumulator = res.async;
 378+ this.emit( 'chunk', tokens );
 379+ //this.phase2TailCB( tokens, true );
 380+ if ( res.async ) {
 381+ this.tokenCB = res.async.getParentCB ( 'sibling' );
 382+ }
 383+};
 384+
 385+/**
 386+ * Run transformations from phases 0 and 1. This includes starting and
 387+ * managing asynchronous transformations.
 388+ *
 389+ */
 390+AsyncTokenTransformManager.prototype.transformTokens = function ( tokens, parentCB ) {
 391+
 392+ //console.log('_transformPhase01: ' + JSON.stringify(tokens) );
 393+
 394+ var res,
 395+ phaseEndRank = 2, // parametrize!
 396+ // Prepare a new accumulator, to be used by async children (if any)
 397+ localAccum = [],
 398+ accum = new TokenAccumulator( parentCB ),
 399+ cb = accum.getParentCB( 'child' ),
 400+ activeAccum = null,
 401+ tokensLength = tokens.length,
 402+ token,
 403+ ts = this.transformers;
 404+
 405+ for ( var i = 0; i < tokensLength; i++ ) {
 406+ token = tokens[i];
 407+
 408+ switch( token.type ) {
 409+ case 'TAG':
 410+ case 'ENDTAG':
 411+ case 'SELFCLOSINGTAG':
 412+ res = this._transformTagToken( token, cb, phaseEndRank );
 413+ break;
 414+ case 'TEXT':
 415+ res = this._transformToken( token, cb, phaseEndRank, ts.text );
 416+ break;
 417+ case 'COMMENT':
 418+ res = this._transformToken( token, cb, phaseEndRank, ts.comment);
 419+ break;
 420+ case 'NEWLINE':
 421+ res = this._transformToken( token, cb, phaseEndRank, ts.newline );
 422+ break;
 423+ case 'END':
 424+ res = this._transformToken( token, cb, phaseEndRank, ts.end );
 425+ break;
 426+ default:
 427+ res = this._transformToken( token, cb, phaseEndRank, ts.martian );
 428+ break;
 429+ }
 430+
 431+ if( res.tokens ) {
 432+ // Splice in the returned tokens (while replacing the original
 433+ // token), and process them next.
 434+ [].splice.apply( tokens, [i, 1].concat(res.tokens) );
 435+ tokensLength = tokens.length;
 436+ i--; // continue at first inserted token
 437+ } else if ( res.token ) {
 438+ if ( res.token.rank === 2 ) {
 439+ // token is done.
 440+ if ( activeAccum ) {
 441+ // push to accumulator
 442+ activeAccum.push( res.token );
 443+ } else {
 444+ // If there is no accumulator yet, then directly return the
 445+ // token to the parent. Collect them in localAccum for this
 446+ // purpose.
 447+ localAccum.push(res.token);
 448+ }
 449+ } else {
 450+ // re-process token.
 451+ tokens[i] = res.token;
 452+ i--;
 453+ }
 454+ } else if ( res.async ) {
 455+ // The child now switched to activeAccum, we have to create a new
 456+ // accumulator for the next potential child.
 457+ activeAccum = accum;
 458+ accum = new TokenAccumulator( activeAccum.getParentCB( 'sibling' ) );
 459+ cb = accum.getParentCB( 'child' );
 460+ }
 461+ }
 462+
 463+ // Return finished tokens directly to caller, and indicate if further
 464+ // async actions are outstanding. The caller needs to point a sibling to
 465+ // the returned accumulator, or call .siblingDone() to mark the end of a
 466+ // chain.
 467+ return { tokens: localAccum, async: activeAccum };
 468+};
 469+
 470+/**
 471+ * Callback from tokens fully processed for phase 0 and 1, which are now ready
 472+ * for synchronous and globally in-order phase 2 processing.
 473+ *
 474+ * @method
 475+ * @param {Array} chunk of tokens
 476+ * @param {Mixed} Either a falsy value if this is the last callback
 477+ * (everything is done), or a truish value if not yet done.
 478+ */
 479+AsyncTokenTransformManager.prototype._returnTokens = function ( tokens, notYetDone ) {
 480+ // FIXME: store frame in object?
 481+ this.emit('chunk', tokens);
 482+ //tokens = this._transformPhase2( this.frame, tokens, this.parentCB );
 483+ //console.log('AsyncTokenTransformManager._returnTokens, after _transformPhase2.');
 484+
 485+ //this.emit( 'chunk', tokens );
 486+
 487+ if ( ! notYetDone ) {
 488+ console.log('AsyncTokenTransformManager._returnTokens done.');
 489+ // signal our done-ness to consumers.
 490+ this.emit( 'end' );
 491+ // and reset internal state.
 492+ this._reset();
 493+ }
 494+};
 495+
 496+/**
 497+ * Callback for the end event emitted from the tokenizer.
 498+ * Either signals the end of input to the tail of an ongoing asynchronous
 499+ * processing pipeline, or directly emits 'end' if the processing was fully
 500+ * synchronous.
 501+ */
 502+AsyncTokenTransformManager.prototype.onEndEvent = function () {
 503+ if ( this.tailAccumulator ) {
 504+ this.tailAccumulator.siblingDone();
 505+ } else {
 506+ // nothing was asynchronous, so we'll have to emit end here.
 507+ this.emit('end');
 508+ this._reset();
 509+ }
 510+};
 511+
 512+
 513+
 514+
 515+
 516+
 517+/*************** In-order, synchronous transformer (phase 1 and 3) ***************/
 518+
 519+/**
 520+ * Subclass for phase 3, in-order and synchronous processing.
 521+ *
 522+ * @class
 523+ * @constructor
 524+ * @param {Object} environment.
 525+ */
 526+function SyncTokenTransformManager ( env ) {
 527+ // both inherited
 528+ this._construct();
 529+ this.args = {}; // no arguments at the top level
 530+ this.env = env;
 531+}
 532+
 533+// Inherit from TokenTransformManager, and thus also from EventEmitter.
 534+SyncTokenTransformManager.prototype = new TokenTransformManager();
 535+SyncTokenTransformManager.prototype.constructor = SyncTokenTransformManager;
 536+
 537+
 538+/**
 539+ * Global in-order and synchronous traversal on token stream. Emits
 540+ * transformed chunks of tokens in the 'chunk' event.
 541+ *
 542+ * @method
 543+ * @param {Array} Token chunk.
 544+ */
 545+SyncTokenTransformManager.prototype.onChunk = function ( tokens ) {
 546+ var res,
 547+ phaseEndRank = 3,
 548+ localAccum = [],
 549+ localAccumLength = 0,
 550+ tokensLength = tokens.length,
 551+ cb = undefined, // XXX: not meaningful for purely synchronous processing!
 552+ token,
 553+ // Top-level frame only in phase 3, as everything is already expanded.
 554+ ts = this.transformers;
 555+
 556+ for ( var i = 0; i < tokensLength; i++ ) {
 557+ token = tokens[i];
 558+
 559+ switch( token.type ) {
 560+ case 'TAG':
 561+ case 'ENDTAG':
 562+ case 'SELFCLOSINGTAG':
 563+ res = this._transformTagToken( token, cb, phaseEndRank );
 564+ break;
 565+ case 'TEXT':
 566+ res = this._transformToken( token, cb, phaseEndRank,
 567+ ts.text );
 568+ break;
 569+ case 'COMMENT':
 570+ res = this._transformToken( token, cb, phaseEndRank, ts.comment );
 571+ break;
 572+ case 'NEWLINE':
 573+ res = this._transformToken( token, cb, phaseEndRank, ts.newline );
 574+ break;
 575+ case 'END':
 576+ res = this._transformToken( token, cb, phaseEndRank, ts.end );
 577+ break;
 578+ default:
 579+ res = this._transformToken( token, cb, phaseEndRank, ts.martian );
 580+ break;
 581+ }
 582+
 583+ if( res.tokens ) {
 584+ // Splice in the returned tokens (while replacing the original
 585+ // token), and process them next.
 586+ [].splice.apply( tokens, [i, 1].concat(res.tokens) );
 587+ tokensLength = tokens.length;
 588+ i--; // continue at first inserted token
 589+ } else if ( res.token ) {
 590+ if ( res.token.rank === phaseEndRank ) {
 591+ // token is done.
 592+ localAccum.push(res.token);
 593+ this.prevToken = res.token;
 594+ } else {
 595+ // re-process token.
 596+ tokens[i] = res.token;
 597+ i--;
 598+ }
 599+ }
 600+ }
 601+ this.emit( 'chunk', localAccum );
 602+};
 603+
 604+/**
 605+ * Callback for the end event emitted from the tokenizer.
 606+ * Either signals the end of input to the tail of an ongoing asynchronous
 607+ * processing pipeline, or directly emits 'end' if the processing was fully
 608+ * synchronous.
 609+ */
 610+SyncTokenTransformManager.prototype.onEndEvent = function () {
 611+ // This phase is fully synchronous, so just pass the end along and prepare
 612+ // for the next round.
 613+ this.emit('end');
 614+};
 615+
 616+
 617+
 618+
 619+
 620+
 621+/******************************* TokenAccumulator *************************/
 622+/**
 623+ * Token accumulators buffer tokens between asynchronous processing points,
 624+ * and return fully processed token chunks in-order and as soon as possible.
 625+ * They support the AsyncTokenTransformManager.
 626+ *
 627+ * @class
 628+ * @constructor
 629+ * @param {Object} next TokenAccumulator to link to
 630+ * @param {Array} (optional) tokens, init accumulator with tokens or []
 631+ */
 632+function TokenAccumulator ( parentCB ) {
 633+ this.parentCB = parentCB;
 634+ this.accum = [];
 635+ // Wait for child and sibling by default
 636+ // Note: Need to decrement outstanding on last accum
 637+ // in a chain.
 638+ this.outstanding = 2;
 639+}
 640+
 641+/**
 642+ * Curry a parentCB with the object and reference.
 643+ *
 644+ * @method
 645+ * @param {Object} TokenAccumulator
 646+ * @param {misc} Reference / key for callback
 647+ * @returns {Function}
 648+ */
 649+TokenAccumulator.prototype.getParentCB = function ( reference ) {
 650+ return this._returnTokens.bind( this, reference );
 651+};
 652+
 653+/**
 654+ * Pass tokens to an accumulator
 655+ *
 656+ * @method
 657+ * @param {Object} token
 658+ */
 659+TokenAccumulator.prototype._returnTokens = function ( reference, tokens, notYetDone ) {
 660+ var res,
 661+ cb,
 662+ returnTokens = [];
 663+
 664+ if ( ! notYetDone ) {
 665+ this.outstanding--;
 666+ }
 667+
 668+ if ( reference === 'child' ) {
 669+ // XXX: Use some marker to avoid re-transforming token chunks several
 670+ // times?
 671+ res = this.transformTokens( tokens, this.parentCB );
 672+
 673+ if ( res.async ) {
 674+ // new asynchronous expansion started, chain of accumulators
 675+ // created
 676+ if ( this.outstanding === 0 ) {
 677+ // Last accum in chain should only wait for child
 678+ res.async.outstanding--;
 679+ cb = this.parentCB;
 680+ } else {
 681+ cb = this.parentCB;
 682+ // set own callback to new sibling, the end of accumulator chain
 683+ this.parentCB = res.async.getParentCB( 'sibling' );
 684+ }
 685+ }
 686+ if ( ! notYetDone ) {
 687+ // Child is done, return accumulator from sibling. Siblings
 688+ // process tokens themselves, so we concat those to the result of
 689+ // processing tokens from the child.
 690+ tokens = res.tokens.concat( this.accum );
 691+ this.accum = [];
 692+ }
 693+ this.cb( res.tokens, res.async );
 694+ return null;
 695+ } else {
 696+ // sibling
 697+ if ( this.outstanding === 0 ) {
 698+ tokens = this.accum.concat( tokens );
 699+ // A sibling will transform tokens, so we don't have to do this
 700+ // again.
 701+ this.parentCB( res.tokens, false );
 702+ return null;
 703+ } else if ( this.outstanding === 1 && notYetDone ) {
 704+ // Sibling is not yet done, but child is. Return own parentCB to
 705+ // allow the sibling to go direct, and call back parent with
 706+ // tokens. The internal accumulator is empty at this stage, as its
 707+ // tokens are passed to the parent when the child is done.
 708+ return this.parentCB( tokens, true);
 709+ }
 710+
 711+
 712+ }
 713+};
 714+
 715+/**
 716+ * Mark the sibling as done (normally at the tail of a chain).
 717+ */
 718+TokenAccumulator.prototype.siblingDone = function () {
 719+ this._returnTokens01 ( 'sibling', [], false );
 720+};
 721+
 722+
 723+/**
 724+ * Push a token into the accumulator
 725+ *
 726+ * @method
 727+ * @param {Object} token
 728+ */
 729+TokenAccumulator.prototype.push = function ( token ) {
 730+ return this.accum.push(token);
 731+};
 732+
 733+
 734+if (typeof module == "object") {
 735+ module.exports.AsyncTokenTransformManager = AsyncTokenTransformManager;
 736+ module.exports.SyncTokenTransformManager = SyncTokenTransformManager;
 737+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/mediawiki.TokenTransformManager.js
___________________________________________________________________
Added: svn:eol-style
1738 + native
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.js
@@ -13,44 +13,44 @@
1414 var fs = require('fs'),
1515 path = require('path'),
1616 PegTokenizer = require('./mediawiki.tokenizer.peg.js').PegTokenizer,
17 - TokenTransformDispatcher = require('./mediawiki.TokenTransformDispatcher.js').TokenTransformDispatcher,
 17+ TokenTransformManager = require('./mediawiki.TokenTransformManager.js'),
1818 QuoteTransformer = require('./ext.core.QuoteTransformer.js').QuoteTransformer,
1919 Cite = require('./ext.Cite.js').Cite,
2020 FauxHTML5 = require('./mediawiki.HTML5TreeBuilder.node.js').FauxHTML5,
2121 DOMPostProcessor = require('./mediawiki.DOMPostProcessor.js').DOMPostProcessor,
2222 DOMConverter = require('./mediawiki.DOMConverter.js').DOMConverter;
2323
24 -function ParserPipeline( config ) {
 24+function ParserPipeline( env ) {
2525 // Set up a simple parser pipeline.
2626
27 - if ( !config ) {
28 - config = {};
 27+ // XXX: create a full-fledged environment
 28+ if ( !env ) {
 29+ this.env = {};
 30+ } else {
 31+ this.env = env;
2932 }
3033
31 - this.wikiTokenizer = new PegTokenizer();
 34+ // Create an input pipeline for the given input (for now fixed to
 35+ // text/wiki).
 36+ this.inputPipeline = this.makeInputPipeline( 'text/wiki', {} );
3237
33 - /**
34 - * Token stream transformations.
35 - * This is where all the wiki-specific functionality is implemented.
36 - * See https://www.mediawiki.org/wiki/Future/Parser_development/Token_stream_transformations
37 - */
38 - this.tokenTransformer = new TokenTransformDispatcher ();
3938
 39+ this.tokenPostProcessor = new TokenTransformManager.SyncTokenTransformManager ( env );
 40+ this.tokenPostProcessor.listenForTokensFrom ( this.inputPipeline.last );
 41+
 42+
4043 // Add token transformations..
41 - var qt = new QuoteTransformer();
42 - qt.register(this.tokenTransformer);
 44+ var qt = new QuoteTransformer( this.tokenPostProcessor );
4345
44 - //var citeExtension = new Cite();
45 - //citeExtension.register(this.tokenDispatcher);
 46+ //var citeExtension = new Cite( this.tokenTransformer );
4647
47 - this.tokenTransformer.listenForTokensFrom( this.wikiTokenizer );
4848
4949 /**
5050 * The tree builder creates a DOM tree from the token soup emitted from
5151 * the TokenTransformDispatcher.
5252 */
5353 this.treeBuilder = new FauxHTML5.TreeBuilder();
54 - this.treeBuilder.listenForTokensFrom( this.tokenTransformer );
 54+ this.treeBuilder.listenForTokensFrom( this.tokenPostProcessor );
5555
5656 /**
5757 * Final processing on the HTML DOM.
@@ -80,9 +80,31 @@
8181 this.postProcessor.addListener( 'document', this.setDocumentProperty.bind( this ) );
8282 }
8383
 84+ParserPipeline.prototype.makeInputPipeline = function ( inputType, args ) {
 85+ if ( inputType === 'text/wiki' ) {
 86+ var wikiTokenizer = new PegTokenizer();
 87+
 88+ /**
 89+ * Token stream transformations.
 90+ * This is where all the wiki-specific functionality is implemented.
 91+ * See https://www.mediawiki.org/wiki/Future/Parser_development/Token_stream_transformations
 92+ */
 93+ var tokenPreProcessor = new TokenTransformManager.SyncTokenTransformManager ( this.env );
 94+ tokenPreProcessor.listenForTokensFrom ( wikiTokenizer );
 95+
 96+ var tokenExpander = new TokenTransformManager.AsyncTokenTransformManager (
 97+ this.makeInputPipeline.bind( this ), args, this.env );
 98+ tokenExpander.listenForTokensFrom ( tokenPreProcessor );
 99+
 100+ return { first: wikiTokenizer, last: tokenExpander };
 101+ } else {
 102+ throw "ParserPipeline.makeInputPipeline: Unsupported input type " + inputType;
 103+ }
 104+}
 105+
84106 ParserPipeline.prototype.parse = function ( text ) {
85107 // Set the pipeline in motion by feeding the tokenizer
86 - this.wikiTokenizer.tokenize( text );
 108+ this.inputPipeline.first.tokenize( text );
87109 };
88110
89111 // XXX: Lame hack: set document property. Instead, emit events
Index: trunk/extensions/VisualEditor/modules/parser/ext.core.TemplateHandler.js
@@ -18,16 +18,16 @@
1919 // constants
2020 TemplateHandler.prototype.rank = 1.1;
2121
22 -TemplateHandler.prototype.register = function ( dispatcher ) {
23 - this.dispatcher = dispatcher;
 22+TemplateHandler.prototype.register = function ( manager ) {
 23+ this.manager = manager;
2424 // Register for template and templatearg tag tokens
25 - dispatcher.addTransform( this.onTemplate.bind(this),
 25+ manager.addTransform( this.onTemplate.bind(this),
2626 this.rank, 'tag', 'template' );
27 - dispatcher.addTransform( this.onTemplateArg.bind(this),
 27+ manager.addTransform( this.onTemplateArg.bind(this),
2828 this.rank, 'tag', 'templatearg' );
2929
3030 // Reset internal state when the parser pipeline is done
31 - dispatcher.addTransform( this.reset.bind(this),
 31+ manager.addTransform( this.reset.bind(this),
3232 this.rank, 'end' );
3333 };
3434
@@ -39,13 +39,17 @@
4040 * calls or sets up the callback to _expandTemplate, which then fetches and
4141 * processes the template.
4242 */
43 -TemplateHandler.prototype.onTemplate = function ( token, cb, frame ) {
 43+TemplateHandler.prototype.onTemplate = function ( token, cb ) {
4444 // check for 'subst:'
4545 // check for variable magic names
4646 // check for msg, msgnw, raw magics
4747 // check for parser functions
4848
4949 // create a new frame
 50+ // XXX FIXME: create a new AsyncTokenTransformManager with default
 51+ // transformations!
 52+ //
 53+ // nestedAsyncTokenTransformManager = this.manager.newChildPipeline( inputType, args );
5054 var newFrame = {
5155 args: {},
5256 env: frame.env,
@@ -69,7 +73,7 @@
7074 kv = { key: [], value: [] };
7175 // transform the value
7276 argCB = this._returnArgValue.bind( this, { index: i, frame: newFrame } );
73 - res = frame.transformPhase( frame, args[key], argCB );
 77+ res = frame.transformTokens( frame, args[key], argCB );
7478 if ( res.async ) {
7579 newFrame.outstanding++;
7680 }
@@ -116,6 +120,12 @@
117121 TemplateHandler.prototype._expandTemplate = function ( frame ) {
118122 // Set up a pipeline:
119123 // fetch template source -> tokenizer
 124+ // getInputPipeline( inputType )
 125+ // normally tokenizer -> transforms 1/2
 126+ // encapsulation by default, generic de-encapsulation in phase 3
 127+ // { type: 'object', name: 'template', value: [tokens] }
 128+ // -> then un-wrap and replace with contents in phase 3 if for-viewing
 129+ // mode
120130 // -> TokenTransformDispatcher (phase 1/2 only, with frame passed in)
121131 // -> frame.cb( tokens )
122132
@@ -177,6 +187,9 @@
178188 };
179189
180190
 191+/**
 192+ * Expand template arguments with tokens from the containing frame.
 193+ */
181194 TemplateHandler.prototype.onTemplateArg = function ( token, cb, frame ) {
182195 var argName = token.attribs[0][1]; // XXX: do this properly!
183196 if ( argName in frame.args ) {
Index: trunk/extensions/VisualEditor/modules/parser/ext.Cite.js
@@ -4,11 +4,12 @@
55 * @class
66 * @constructor
77 */
8 -function Cite () {
 8+function Cite ( dispatcher ) {
99 this.refGroups = {};
1010 this.refTokens = [];
1111 // Within ref block
1212 this.isActive = false;
 13+ this.register( dispatcher );
1314 }
1415
1516 /**

Status & tagging log