Index: trunk/extensions/VisualEditor/tests/parser/parserTests.js |
— | — | @@ -382,33 +382,18 @@ |
383 | 383 | this.currentItem = item; |
384 | 384 | |
385 | 385 | // Tokenize the input |
386 | | - var res = pThingy.wikiTokenizer.tokenize(item.input); |
| 386 | + pThingy.parse(item.input); |
| 387 | + var doc = pThingy.document; |
387 | 388 | |
388 | 389 | // Check for errors |
389 | | - if (res.err) { |
| 390 | + if (doc.err) { |
390 | 391 | this.printTitle(item); |
391 | 392 | this.failParseTests++; |
392 | 393 | console.log('PARSE FAIL', res.err); |
393 | 394 | } else { |
394 | | - //var res = es.HtmlSerializer.stringify(tokens,environment); |
| 395 | + // Check the result vs. the expected result. |
| 396 | + this.checkResult( this.currentItem, doc.body.innerHTML ); |
395 | 397 | |
396 | | - //Slightly better token output debugging: |
397 | | - //console.log( util.inspect( res.tokens, false, null ).yellow); |
398 | | - |
399 | | - // Transform tokens using the TokenTransformDispatcher. When done, the |
400 | | - // TokenTransformDispatcher calls buildTree() and checkResult() with the |
401 | | - // transformed tokens. |
402 | | - |
403 | | - //console.log(JSON.stringify(res.tokens, null, 2)); |
404 | | - |
405 | | - pThingy.tokenDispatcher.transformTokens( res.tokens ); |
406 | | - |
407 | | - // XXX make this NOT a property |
408 | | - var out = pThingy.document.body.innerHTML; |
409 | | - |
410 | | - // Finally, check the result vs. the expected result. |
411 | | - this.checkResult( this.currentItem, out ); |
412 | | - |
413 | 398 | if ( this.argv.wikidom ) { |
414 | 399 | // Test HTML DOM -> WikiDOM conversion |
415 | 400 | this.printWikiDom( pThingy.getWikiDom() ); |
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.TokenTransformDispatcher.js |
— | — | @@ -2,27 +2,14 @@ |
3 | 3 | * expansion. Individual transformations register for the token types they are |
4 | 4 | * interested in and are called on each matching token. |
5 | 5 | * |
6 | | - * A transformer might set TokenContext.token to null, a single token, or an |
7 | | - * array of tokens before returning it. |
8 | | - * - Null removes the token and stops further processing for this token. |
9 | | - * - A single token is further processed using the remaining transformations |
10 | | - * registered for this token, and finally placed in the output token list. |
11 | | - * - A list of tokens stops the processing for this token. Instead, processing |
12 | | - * restarts with the first returned token. |
13 | | - * |
14 | | - * Additionally, transformers performing asynchronous actions on a token can |
15 | | - * create a new TokenAccumulator using .newAccumulator(). This creates a new |
16 | | - * accumulator for each asynchronous result, with the asynchronously processed |
17 | | - * token last in its internal accumulator. This setup avoids the need to apply |
18 | | - * operational-transform-like index transformations when parallel expansions |
19 | | - * insert tokens in front of other ongoing expansion tasks. |
| 6 | + * See |
| 7 | + * https://www.mediawiki.org/wiki/Future/Parser_development/Token_stream_transformations |
| 8 | + * for more documentation. |
20 | 9 | * |
21 | | - * XXX: I am not completely happy with the mutable TokenContext construct. At |
22 | | - * least the token should probably be passed as a separate argument. Also, |
23 | | - * integrate the general environment (configuration, cache etc). (gwicke) |
24 | | - * */ |
| 10 | + * @author Gabriel Wicke <gwicke@wikimedia.org> |
| 11 | + */ |
25 | 12 | |
26 | | -$ = require('jquery'); |
| 13 | +var events = require('events'); |
27 | 14 | |
28 | 15 | /** |
29 | 16 | * Central dispatcher for potentially asynchronous token transformations. |
— | — | @@ -32,347 +19,603 @@ |
33 | 20 | * @param {Function} callback, a callback function accepting a token list as |
34 | 21 | * its only argument. |
35 | 22 | */ |
36 | | -function TokenTransformDispatcher( callback ) { |
37 | | - this.cb = callback; // Called with transformed token list when done |
| 23 | +function TokenTransformDispatcher( ) { |
38 | 24 | this.transformers = { |
39 | | - tag: {}, // for TAG, ENDTAG, SELFCLOSINGTAG, keyed on name |
40 | | - text: [], |
41 | | - newline: [], |
42 | | - comment: [], |
43 | | - end: [], // eof |
44 | | - martian: [], // none of the above (unknown token type) |
45 | | - any: [] // all tokens, before more specific handlers are run |
| 25 | + // phase 0 and 1, rank 2 marks tokens as fully processed for these |
| 26 | + // phases. |
| 27 | + 2: { |
| 28 | + tag: {}, // for TAG, ENDTAG, SELFCLOSINGTAG, keyed on name |
| 29 | + text: [], |
| 30 | + newline: [], |
| 31 | + comment: [], |
| 32 | + end: [], // eof |
| 33 | + martian: [], // none of the above (unknown token type) |
| 34 | + any: [] // all tokens, before more specific handlers are run |
| 35 | + }, |
| 36 | + // phase 3, with ranks >= 2 but < 3. 3 marks tokens as fully |
| 37 | + // processed. |
| 38 | + 3: { |
| 39 | + tag: {}, // for TAG, ENDTAG, SELFCLOSINGTAG, keyed on name |
| 40 | + text: [], |
| 41 | + newline: [], |
| 42 | + comment: [], |
| 43 | + end: [], // eof |
| 44 | + martian: [], // none of the above (unknown token type) |
| 45 | + any: [] // all tokens, before more specific handlers are run |
| 46 | + } |
46 | 47 | }; |
47 | 48 | this.reset(); |
48 | 49 | } |
49 | 50 | |
| 51 | +// Inherit from EventEmitter |
| 52 | +TokenTransformDispatcher.prototype = new events.EventEmitter(); |
| 53 | + |
50 | 54 | /** |
| 55 | + * Register to a token source, normally the tokenizer. |
| 56 | + * The event emitter emits an 'tokens' event which contains a chunk of tokens, |
| 57 | + * and signals the end of tokens by triggering the 'end' event. |
| 58 | + * |
| 59 | + * @param {Object} EventEmitter token even emitter. |
| 60 | + */ |
| 61 | +TokenTransformDispatcher.prototype.subscribeToTokenEmitter = function ( tokenEmitter ) { |
| 62 | + tokenEmitter.addListener('chunk', this.transformTokens.bind( this ) ); |
| 63 | + tokenEmitter.addListener('end', this.onEndEvent.bind( this ) ); |
| 64 | +}; |
| 65 | + |
| 66 | + |
| 67 | +/** |
51 | 68 | * Reset the internal token and outstanding-callback state of the |
52 | 69 | * TokenTransformDispatcher, but keep registrations untouched. |
53 | 70 | * |
54 | 71 | * @method |
55 | 72 | */ |
56 | | -TokenTransformDispatcher.prototype.reset = function () { |
| 73 | +TokenTransformDispatcher.prototype.reset = function ( env ) { |
| 74 | + this.tailAccumulator = undefined; |
| 75 | + this.phase2TailCB = this.returnTokens01.bind( this ); |
57 | 76 | this.accum = new TokenAccumulator(null); |
58 | 77 | this.firstaccum = this.accum; |
59 | | - this.outstanding = 1; // Number of outstanding processing steps |
60 | | - // (e.g., async template fetches/expansions) |
| 78 | + this.prevToken = undefined; |
| 79 | + this.frame = { |
| 80 | + args: {}, // no arguments at the top level |
| 81 | + env: this.env |
| 82 | + }; |
| 83 | + // Should be as static as possible re this and frame |
| 84 | + // This is circular, but that should not really matter for non-broken GCs |
| 85 | + // that handle pure JS ref loops. |
| 86 | + this.frame.transformPhase = this.transformPhase01.bind( this, this.frame ); |
61 | 87 | }; |
62 | 88 | |
| 89 | +TokenTransformDispatcher.prototype._rankToPhase = function ( rank ) { |
| 90 | + if ( rank < 0 || rank > 3 ) { |
| 91 | + throw "TransformDispatcher error: Invalid transformation rank " + rank; |
| 92 | + } |
| 93 | + if ( rank <= 2 ) { |
| 94 | + return 2; |
| 95 | + } else { |
| 96 | + return 3; |
| 97 | + } |
| 98 | +}; |
| 99 | + |
63 | 100 | /** |
64 | | - * Append a listener registration. The new listener will be executed after |
65 | | - * other listeners for the same token have been called. |
| 101 | + * Add a transform registration. |
66 | 102 | * |
67 | 103 | * @method |
68 | | - * @param {Function} listener, a function accepting a TokenContext and |
69 | | - * returning a TokenContext. |
| 104 | + * @param {Function} transform. |
70 | 105 | * @param {String} type, one of 'tag', 'text', 'newline', 'comment', 'end', |
71 | 106 | * 'martian' (unknown token), 'any' (any token, matched before other matches). |
72 | 107 | * @param {String} tag name for tags, omitted for non-tags |
73 | 108 | */ |
74 | | -TokenTransformDispatcher.prototype.appendListener = function ( listener, type, name ) { |
| 109 | +TokenTransformDispatcher.prototype.addTransform = function ( transformation, rank, type, name ) { |
| 110 | + var phase = this._rankToPhase( rank ), |
| 111 | + transArr, |
| 112 | + transformer = { |
| 113 | + transform: transformation, |
| 114 | + rank: rank |
| 115 | + }; |
75 | 116 | if ( type === 'tag' ) { |
76 | 117 | name = name.toLowerCase(); |
77 | | - if ( $.isArray(this.transformers.tag.name) ) { |
78 | | - this.transformers.tag[name].push(listener); |
79 | | - } else { |
80 | | - this.transformers.tag[name] = [listener]; |
| 118 | + transArr = this.transformers[phase].tag[name]; |
| 119 | + if ( ! transArr ) { |
| 120 | + transArr = this.transformers[phase].tag[name] = []; |
81 | 121 | } |
82 | 122 | } else { |
83 | | - this.transformers[type].push(listener); |
| 123 | + transArr = this.transformers[phase][type]; |
84 | 124 | } |
| 125 | + transArr.push(transformer); |
| 126 | + // sort ascending by rank |
| 127 | + transArr.sort( function ( t1, t2 ) { return t1.rank - t2.rank; } ); |
85 | 128 | }; |
86 | 129 | |
87 | 130 | /** |
88 | | - * Prepend a listener registration. The new listener will be called before |
89 | | - * other listeners for the same token have been called. |
| 131 | + * Remove a transform registration |
90 | 132 | * |
91 | 133 | * @method |
92 | | - * @param {Function} listener, a function accepting a TokenContext and |
93 | | - * returning a TokenContext. |
| 134 | + * @param {Number} rank, the numeric rank of the handler. |
94 | 135 | * @param {String} type, one of 'tag', 'text', 'newline', 'comment', 'end', |
95 | 136 | * 'martian' (unknown token), 'any' (any token, matched before other matches). |
96 | 137 | * @param {String} tag name for tags, omitted for non-tags |
97 | 138 | */ |
98 | | -TokenTransformDispatcher.prototype.prependListener = function ( listener, type, name ) { |
| 139 | +TokenTransformDispatcher.prototype.removeTransform = function ( rank, type, name ) { |
| 140 | + var i = -1, |
| 141 | + phase = this._rankToPhase( rank ), |
| 142 | + ts; |
| 143 | + |
| 144 | + function rankUnEqual ( i ) { |
| 145 | + return i.rank !== rank; |
| 146 | + } |
| 147 | + |
99 | 148 | if ( type === 'tag' ) { |
100 | 149 | name = name.toLowerCase(); |
101 | | - if ( $.isArray(this.transformers.tag.name) ) { |
102 | | - this.transformers.tag[name].unshift(listener); |
103 | | - } else { |
104 | | - this.transformers.tag[name] = [listener]; |
| 150 | + var maybeTransArr = this.transformers[phase].tag.name; |
| 151 | + if ( maybeTransArr ) { |
| 152 | + this.transformers[phase].tag.name = maybeTransArr.filter( rankUnEqual ); |
105 | 153 | } |
106 | 154 | } else { |
107 | | - this.transformers[type].unshift(listener); |
| 155 | + this.transformers[phase][type] = this.transformers[phase][type].filter( rankUnEqual ) ; |
108 | 156 | } |
109 | 157 | }; |
110 | 158 | |
111 | 159 | /** |
112 | | - * Remove a listener registration |
113 | | - * |
114 | | - * XXX: matching the function for equality is not ideal. Use a string key |
115 | | - * instead? |
116 | | - * |
117 | | - * @method |
118 | | - * @param {Function} listener, a function accepting a TokenContext and |
119 | | - * returning a TokenContext. |
120 | | - * @param {String} type, one of 'tag', 'text', 'newline', 'comment', 'end', |
121 | | - * 'martian' (unknown token), 'any' (any token, matched before other matches). |
122 | | - * @param {String} tag name for tags, omitted for non-tags |
| 160 | + * Enforce separation between phases when token types or tag names have |
| 161 | + * changed, or when multiple tokens were returned. Processing will restart |
| 162 | + * with the new rank. |
123 | 163 | */ |
124 | | -TokenTransformDispatcher.prototype.removeListener = function ( listener, type, name ) { |
125 | | - var i = -1; |
126 | | - var ts; |
127 | | - if ( type === 'tag' ) { |
128 | | - name = name.toLowerCase(); |
129 | | - if ( $.isArray(this.transformers.tag.name) ) { |
130 | | - ts = this.transformers.tag[name]; |
131 | | - i = ts.indexOf(listener); |
| 164 | +TokenTransformDispatcher.prototype._resetTokenRank = function ( res, transformer ) { |
| 165 | + if ( res.token ) { |
| 166 | + // reset rank after type or name change |
| 167 | + if ( transformer.rank < 1 ) { |
| 168 | + res.token.rank = 0; |
| 169 | + } else { |
| 170 | + res.token.rank = 1; |
132 | 171 | } |
133 | | - } else { |
134 | | - ts = this.transformers[type]; |
135 | | - i = ts.indexOf(listener); |
| 172 | + } else if ( res.tokens && transformer.rank > 2 ) { |
| 173 | + for ( var i = 0; i < res.tokens.length; i++ ) { |
| 174 | + if ( res.tokens[i].rank === undefined ) { |
| 175 | + // Do not run phase 0 on newly created tokens from |
| 176 | + // phase 1. |
| 177 | + res.tokens[i].rank = 2; |
| 178 | + } |
| 179 | + } |
136 | 180 | } |
137 | | - if ( i >= 0 ) { |
138 | | - ts.splice(i, 1); |
139 | | - } |
140 | 181 | }; |
141 | 182 | |
142 | | -/* Constructor for information context relevant to token transformers |
143 | | - * |
144 | | - * @param token The token to precess |
145 | | - * @param accum {TokenAccumulator} The active TokenAccumulator. |
146 | | - * @param processor {TokenTransformDispatcher} The TokenTransformDispatcher object. |
147 | | - * @param lastToken Last returned token or {undefined}. |
148 | | - * @returns {TokenContext}. |
149 | | - */ |
150 | | -function TokenContext ( token, accum, dispatcher, lastToken ) { |
151 | | - this.token = token; |
152 | | - this.accum = accum; |
153 | | - this.dispatcher = dispatcher; |
154 | | - this.lastToken = lastToken; |
155 | | - return this; |
156 | | -} |
157 | 183 | |
158 | 184 | /* Call all transformers on a tag. |
159 | 185 | * |
160 | | - * @param {TokenContext} The current token and its context. |
161 | | - * @returns {TokenContext} Context with updated token and/or accum. |
| 186 | + * @param {Object} The current token. |
| 187 | + * @param {Function} Completion callback for async processing. |
| 188 | + * @param {Number} Rank of phase end, both key for transforms and rank for |
| 189 | + * processed tokens. |
| 190 | + * @param {Object} The frame, contains a reference to the environment. |
| 191 | + * @returns {Object} Token(s) and async indication. |
162 | 192 | */ |
163 | | -TokenTransformDispatcher.prototype._transformTagToken = function ( tokenCTX ) { |
| 193 | +TokenTransformDispatcher.prototype._transformTagToken = function ( token, cb, phaseEndRank, frame ) { |
164 | 194 | // prepend 'any' transformers |
165 | | - var ts = this.transformers.any; |
166 | | - var tagts = this.transformers.tag[tokenCTX.token.name.toLowerCase()]; |
| 195 | + var ts = this.transformers[phaseEndRank].any, |
| 196 | + res = { token: token }, |
| 197 | + transform, |
| 198 | + l, i, |
| 199 | + aborted = false, |
| 200 | + tName = token.name.toLowerCase(), |
| 201 | + tagts = this.transformers[phaseEndRank].tag[tName]; |
| 202 | + |
167 | 203 | if ( tagts ) { |
168 | 204 | ts = ts.concat(tagts); |
169 | 205 | } |
170 | 206 | //console.log(JSON.stringify(ts, null, 2)); |
171 | 207 | if ( ts ) { |
172 | | - for (var i = 0, l = ts.length; i < l; i++ ) { |
| 208 | + for ( i = 0, l = ts.length; i < l; i++ ) { |
| 209 | + transformer = ts[i]; |
| 210 | + if ( res.token.rank && transformer.rank <= res.token.rank ) { |
| 211 | + // skip transformation, was already applied. |
| 212 | + continue; |
| 213 | + } |
173 | 214 | // Transform token with side effects |
174 | | - tokenCTX = ts[i]( tokenCTX ); |
175 | | - if ( tokenCTX.token === null || $.isArray(tokenCTX.token) ) { |
| 215 | + res = transformer.transform( res.token, cb, frame, this.prevToken ); |
| 216 | + // if multiple tokens or null token: process returned tokens (in parent) |
| 217 | + if ( !res.token || // async implies tokens instead of token, so no |
| 218 | + // need to check explicitly |
| 219 | + res.token.type !== token.type || |
| 220 | + res.token.name !== token.name ) { |
| 221 | + this._resetTokenRank ( res, transformer ); |
| 222 | + aborted = true; |
176 | 223 | break; |
177 | 224 | } |
178 | | - |
| 225 | + // track progress on token |
| 226 | + res.token.rank = transformer.rank; |
179 | 227 | } |
| 228 | + if ( ! aborted ) { |
| 229 | + // Mark token as fully processed. |
| 230 | + res.token.rank = phaseEndRank; |
| 231 | + } |
180 | 232 | } |
181 | | - return tokenCTX; |
| 233 | + return res; |
182 | 234 | }; |
183 | 235 | |
184 | 236 | /* Call all transformers on non-tag token types. |
185 | 237 | * |
186 | | - * @param tokenCTX {TokenContext} The current token and its context. |
187 | | - * @param ts List of token transformers for this token type. |
188 | | - * @returns {TokenContext} Context with updated token and/or accum. |
| 238 | + * @param {Object} The current token. |
| 239 | + * @param {Function} Completion callback for async processing. |
| 240 | + * @param {Number} Rank of phase end, both key for transforms and rank for |
| 241 | + * processed tokens. |
| 242 | + * @param {Object} The frame, contains a reference to the environment. |
| 243 | + * @param {Array} ts List of token transformers for this token type. |
| 244 | + * @returns {Object} Token(s) and async indication. |
189 | 245 | */ |
190 | | -TokenTransformDispatcher.prototype._transformToken = function ( tokenCTX, ts ) { |
| 246 | +TokenTransformDispatcher.prototype._transformToken = function ( token, cb, phaseEndRank, frame, ts ) { |
191 | 247 | // prepend 'any' transformers |
192 | | - ts = this.transformers.any.concat(ts); |
| 248 | + ts = this.transformers[phaseEndRank].any.concat(ts); |
| 249 | + var transformer, |
| 250 | + res = { token: token }, |
| 251 | + aborted = false; |
193 | 252 | if ( ts ) { |
194 | 253 | for (var i = 0, l = ts.length; i < l; i++ ) { |
| 254 | + transformer = ts[i]; |
| 255 | + if ( res.token.rank && transformer.rank <= res.token.rank ) { |
| 256 | + // skip transformation, was already applied. |
| 257 | + continue; |
| 258 | + } |
195 | 259 | // Transform token with side effects |
196 | | - tokenCTX = ts[i]( tokenCTX ); |
197 | | - if ( tokenCTX.token === null || $.isArray(tokenCTX.token) ) { |
| 260 | + // XXX: it should be a better idea to move the token.rank out of |
| 261 | + // token and into a wrapper object to ensure that transformations |
| 262 | + // don't mess with it! |
| 263 | + res = transformer.transform( res.token, cb, frame, this.prevToken ); |
| 264 | + if ( !res.token || |
| 265 | + res.token.type !== token.type ) { |
| 266 | + this._resetTokenRank ( res, transformer ); |
| 267 | + aborted = true; |
198 | 268 | break; |
199 | 269 | } |
| 270 | + res.token.rank = transformer.rank; |
200 | 271 | } |
| 272 | + if ( ! aborted ) { |
| 273 | + // mark token as completely processed |
| 274 | + res.token.rank = phaseEndRank; // need phase passed in! |
| 275 | + } |
| 276 | + |
201 | 277 | } |
202 | | - return tokenCTX; |
| 278 | + return res; |
203 | 279 | }; |
204 | 280 | |
205 | 281 | /** |
206 | 282 | * Transform and expand tokens. |
207 | 283 | * |
208 | | - * Normally called with undefined accum. Asynchronous expansions will call |
209 | | - * this with their known accum, which allows expanded tokens to be spliced in |
210 | | - * at the appropriate location in the token list, which is always at the tail |
211 | | - * end of the current accumulator. Calls back registered callback if there are |
212 | | - * no more outstanding asynchronous expansions. |
213 | | - * |
214 | | - * @param {Array} Tokens to process. |
215 | | - * @param {Object} TokenAccumulator object. Undefined for first call, set to |
216 | | - * accumulator with expanded token at tail for asynchronous expansions. |
217 | | - * @param {Int} delta, default 1. Decrement the outstanding async callback |
218 | | - * count by this much to determine when all outstanding actions are done. |
219 | | - * Main use of this argument is to avoid counting some extra callbacks from |
220 | | - * actions before they are done. |
| 284 | + * Callback for token chunks emitted from the tokenizer. |
221 | 285 | */ |
222 | | -TokenTransformDispatcher.prototype.transformTokens = function ( tokens, accum, delta ) { |
223 | | - if ( accum === undefined ) { |
224 | | - this.reset(); |
225 | | - accum = this.accum; |
| 286 | +TokenTransformDispatcher.prototype.transformTokens = function ( tokens ) { |
| 287 | + //console.log('TokenTransformDispatcher transformTokens'); |
| 288 | + var res = this.transformPhase01 ( this.frame, tokens, this.phase2TailCB ); |
| 289 | + this.phase2TailCB( tokens, true ); |
| 290 | + if ( res.async ) { |
| 291 | + this.tailAccumulator = res.async; |
| 292 | + this.phase2TailCB = res.async.getParentCB ( 'sibling' ); |
226 | 293 | } |
| 294 | +}; |
227 | 295 | |
228 | | - //console.log('transformTokens: ' + JSON.stringify(tokens) + JSON.stringify(accum.accum) ); |
| 296 | +/** |
| 297 | + * Callback for the event emitted from the tokenizer. |
| 298 | + * |
| 299 | + * This simply decrements the outstanding counter on the top-level |
| 300 | + */ |
| 301 | +TokenTransformDispatcher.prototype.onEndEvent = function () { |
| 302 | + if ( this.tailAccumulator ) { |
| 303 | + this.tailAccumulator.siblingDone(); |
| 304 | + } else { |
| 305 | + // nothing was asynchronous, so we'll have to emit end here. |
| 306 | + this.emit('end'); |
| 307 | + } |
| 308 | +}; |
229 | 309 | |
230 | | - var tokenCTX = new TokenContext(undefined, accum, this, undefined); |
231 | | - var origLen = tokens.length; |
232 | | - for ( var i = 0; i < tokens.length; i++ ) { |
233 | | - tokenCTX.lastToken = tokenCTX.token; // FIXME: Fix re-entrant case! |
234 | | - tokenCTX.token = tokens[i]; |
235 | | - tokenCTX.pos = i; |
236 | | - tokenCTX.accum = accum; |
237 | | - switch(tokenCTX.token.type) { |
| 310 | +/** |
| 311 | + * add parent, parentref args |
| 312 | + * return |
| 313 | + * {tokens: [tokens], async: true}: async expansion -> outstanding++ in parent |
| 314 | + * {tokens: [tokens], async: false}: fully expanded |
| 315 | + * {token: {token}}: single-token return |
| 316 | + * child after first expand (example: template expanded) |
| 317 | + * return some finished tokens, reuse parent accumulator |
| 318 | + * if new accumulator: set parent, ref |
| 319 | + */ |
| 320 | + |
| 321 | +TokenTransformDispatcher.prototype.transformPhase01 = function ( frame, tokens, parentCB ) { |
| 322 | + |
| 323 | + //console.log('transformPhase01: ' + JSON.stringify(tokens) ); |
| 324 | + |
| 325 | + var res, |
| 326 | + phaseEndRank = 2, |
| 327 | + // Prepare a new accumulator, to be used by async children (if any) |
| 328 | + localAccum = [], |
| 329 | + accum = new TokenAccumulator( parentCB ), |
| 330 | + cb = accum.getParentCB( 'child' ), |
| 331 | + activeAccum = null, |
| 332 | + tokensLength = tokens.length, |
| 333 | + token, |
| 334 | + ts = this.transformers[phaseEndRank]; |
| 335 | + |
| 336 | + for ( var i = 0; i < tokensLength; i++ ) { |
| 337 | + token = tokens[i]; |
| 338 | + |
| 339 | + switch( token.type ) { |
238 | 340 | case 'TAG': |
239 | 341 | case 'ENDTAG': |
240 | 342 | case 'SELFCLOSINGTAG': |
241 | | - tokenCTX = this._transformTagToken( tokenCTX ); |
| 343 | + res = this._transformTagToken( token, cb, phaseEndRank, frame ); |
242 | 344 | break; |
243 | 345 | case 'TEXT': |
244 | | - tokenCTX = this._transformToken( tokenCTX, this.transformers.text ); |
| 346 | + res = this._transformToken( token, cb, phaseEndRank, frame, ts.text ); |
245 | 347 | break; |
246 | 348 | case 'COMMENT': |
247 | | - tokenCTX = this._transformToken( tokenCTX, this.transformers.comment); |
| 349 | + res = this._transformToken( token, cb, phaseEndRank, frame, ts.comment); |
248 | 350 | break; |
249 | 351 | case 'NEWLINE': |
250 | | - tokenCTX = this._transformToken( tokenCTX, this.transformers.newline ); |
| 352 | + res = this._transformToken( token, cb, phaseEndRank, frame, ts.newline ); |
251 | 353 | break; |
252 | 354 | case 'END': |
253 | | - tokenCTX = this._transformToken( tokenCTX, this.transformers.end ); |
| 355 | + res = this._transformToken( token, cb, phaseEndRank, frame, ts.end ); |
254 | 356 | break; |
255 | 357 | default: |
256 | | - tokenCTX = this._transformToken( tokenCTX, this.transformers.martian ); |
| 358 | + res = this._transformToken( token, cb, phaseEndRank, frame, ts.martian ); |
257 | 359 | break; |
258 | 360 | } |
259 | | - // add special DELAYED value |
260 | | - if( $.isArray(tokenCTX.token) ) { |
| 361 | + |
| 362 | + if( res.tokens ) { |
261 | 363 | // Splice in the returned tokens (while replacing the original |
262 | 364 | // token), and process them next. |
263 | | - [].splice.apply(tokens, [i, 1].concat(tokenCTX.token)); |
264 | | - //l += tokenCTX.token.length - 1; |
| 365 | + [].splice.apply( tokens, [i, 1].concat(res.tokens) ); |
| 366 | + tokensLength = tokens.length; |
265 | 367 | i--; // continue at first inserted token |
266 | | - } else if (tokenCTX.token) { |
267 | | - // push to accumulator |
268 | | - accum.push(tokenCTX.token); |
| 368 | + } else if ( res.token ) { |
| 369 | + if ( res.token.rank === 2 ) { |
| 370 | + // token is done. |
| 371 | + if ( activeAccum ) { |
| 372 | + // push to accumulator |
| 373 | + activeAccum.push( res.token ); |
| 374 | + } else { |
| 375 | + // If there is no accumulator yet, then directly return the |
| 376 | + // token to the parent. Collect them in localAccum for this |
| 377 | + // purpose. |
| 378 | + localAccum.push(res.token); |
| 379 | + } |
| 380 | + } else { |
| 381 | + // re-process token. |
| 382 | + tokens[i] = res.token; |
| 383 | + i--; |
| 384 | + } |
| 385 | + } else if ( res.async ) { |
| 386 | + // The child now switched to activeAccum, we have to create a new |
| 387 | + // accumulator for the next potential child. |
| 388 | + activeAccum = accum; |
| 389 | + accum = new TokenAccumulator( activeAccum.getParentCB( 'sibling' ) ); |
| 390 | + cb = accum.getParentCB( 'child' ); |
269 | 391 | } |
270 | | - // Update current accum, in case a new one was spliced in by a |
271 | | - // transformation starting asynch work. |
272 | | - accum = tokenCTX.accum; |
273 | 392 | } |
274 | 393 | |
275 | | - if ( delta === undefined ) { |
276 | | - delta = 1; |
277 | | - } |
278 | | - |
279 | | - this.finish( delta ); |
| 394 | + // Return finished tokens directly to caller, and indicate if further |
| 395 | + // async actions are outstanding. The caller needs to point a sibling to |
| 396 | + // the returned accumulator, or call .siblingDone() to mark the end of a |
| 397 | + // chain. |
| 398 | + return { tokens: localAccum, async: activeAccum }; |
280 | 399 | }; |
281 | 400 | |
282 | 401 | /** |
283 | | - * Decrement the number of outstanding async actions by delta and call the |
284 | | - * callback with a list of tokens if none are remaining. |
285 | | - * |
286 | | - * @method |
287 | | - * @param {Int} delta, how much to decrement the number of outstanding async |
288 | | - * actions. |
| 402 | + * Callback from tokens fully processed for phase 0 and 1, which are now ready |
| 403 | + * for synchronous and globally in-order phase 2 processing. |
289 | 404 | */ |
290 | | -TokenTransformDispatcher.prototype.finish = function ( delta ) { |
291 | | - this.outstanding -= delta; |
292 | | - if ( this.outstanding === 0 ) { |
293 | | - // Join the token accumulators back into a single token list |
294 | | - var a = this.firstaccum; |
295 | | - var tokens = a.accum; |
296 | | - while ( a.next !== null ) { |
297 | | - a = a.next; |
298 | | - tokens = tokens.concat(a.accum); |
299 | | - } |
300 | | - //console.log('TOKENS: ' + JSON.stringify(tokens, null, 2)); |
301 | | - // Call our callback with the flattened token list |
302 | | - this.cb(tokens); |
| 405 | +TokenTransformDispatcher.prototype.returnTokens01 = function ( tokens, notYetDone ) { |
| 406 | + // FIXME: store frame in object? |
| 407 | + tokens = this.transformPhase2( this.frame, tokens, this.parentCB ); |
| 408 | + //console.log('returnTokens01, after transformPhase2.'); |
| 409 | + |
| 410 | + this.emit( 'chunk', tokens ); |
| 411 | + |
| 412 | + if ( ! notYetDone ) { |
| 413 | + console.log('returnTokens01 done.'); |
| 414 | + // signal our done-ness to consumers. |
| 415 | + this.emit( 'end' ); |
| 416 | + // and reset internal state. |
| 417 | + this.reset(); |
303 | 418 | } |
304 | 419 | }; |
305 | 420 | |
| 421 | + |
306 | 422 | /** |
307 | | - * Start a new accumulator for asynchronous work. |
| 423 | + * Phase 2 |
308 | 424 | * |
309 | | - * @param {Object} TokenAccumulator object after which to insert a new |
310 | | - * accumulator |
311 | | - * @count {Int} (optional, default 1) The number of callbacks to expect before |
312 | | - * considering the asynch work on the new accumulator done. |
313 | | - * */ |
314 | | -TokenTransformDispatcher.prototype.newAccumulator = function ( accum, count ) { |
315 | | - if ( count !== undefined ) { |
316 | | - this.outstanding += count; |
317 | | - } else { |
318 | | - this.outstanding++; |
| 425 | + * Global in-order traversal on expanded token stream (after async phase 1). |
| 426 | + * Very similar to transformPhase01, but without async handling. |
| 427 | + */ |
| 428 | +TokenTransformDispatcher.prototype.transformPhase2 = function ( frame, tokens, cb ) { |
| 429 | + var res, |
| 430 | + phaseEndRank = 3, |
| 431 | + localAccum = [], |
| 432 | + localAccumLength = 0, |
| 433 | + tokensLength = tokens.length, |
| 434 | + token, |
| 435 | + ts = this.transformers[phaseEndRank]; |
| 436 | + |
| 437 | + for ( var i = 0; i < tokensLength; i++ ) { |
| 438 | + token = tokens[i]; |
| 439 | + |
| 440 | + switch( token.type ) { |
| 441 | + case 'TAG': |
| 442 | + case 'ENDTAG': |
| 443 | + case 'SELFCLOSINGTAG': |
| 444 | + res = this._transformTagToken( token, cb, phaseEndRank, |
| 445 | + frame ); |
| 446 | + break; |
| 447 | + case 'TEXT': |
| 448 | + res = this._transformToken( token, cb, phaseEndRank, frame, |
| 449 | + ts.text ); |
| 450 | + break; |
| 451 | + case 'COMMENT': |
| 452 | + res = this._transformToken( token, cb, phaseEndRank, frame, |
| 453 | + ts.comment ); |
| 454 | + break; |
| 455 | + case 'NEWLINE': |
| 456 | + res = this._transformToken( token, cb, phaseEndRank, frame, |
| 457 | + ts.newline ); |
| 458 | + break; |
| 459 | + case 'END': |
| 460 | + res = this._transformToken( token, cb, phaseEndRank, frame, |
| 461 | + ts.end ); |
| 462 | + break; |
| 463 | + default: |
| 464 | + res = this._transformToken( token, cb, phaseEndRank, frame, |
| 465 | + ts.martian ); |
| 466 | + break; |
| 467 | + } |
| 468 | + |
| 469 | + if( res.tokens ) { |
| 470 | + // Splice in the returned tokens (while replacing the original |
| 471 | + // token), and process them next. |
| 472 | + [].splice.apply( tokens, [i, 1].concat(res.tokens) ); |
| 473 | + tokensLength = tokens.length; |
| 474 | + i--; // continue at first inserted token |
| 475 | + } else if ( res.token ) { |
| 476 | + if ( res.token.rank === phaseEndRank ) { |
| 477 | + // token is done. |
| 478 | + localAccum.push(res.token); |
| 479 | + this.prevToken = res.token; |
| 480 | + } else { |
| 481 | + // re-process token. |
| 482 | + tokens[i] = res.token; |
| 483 | + i--; |
| 484 | + } |
| 485 | + } |
319 | 486 | } |
320 | | - if ( accum === undefined ) { |
321 | | - accum = this.accum; |
322 | | - } |
323 | | - return accum.insertAccumulator( ); |
| 487 | + return localAccum; |
324 | 488 | }; |
325 | 489 | |
| 490 | + |
326 | 491 | /** |
327 | | - * Token accumulators in a linked list. Using a linked list simplifies async |
328 | | - * callbacks for template expansions as it avoids stable references to chunks. |
| 492 | + * Token accumulators buffer tokens between asynchronous processing points, |
| 493 | + * and return fully processed token chunks in-order and as soon as possible. |
329 | 494 | * |
330 | 495 | * @class |
331 | 496 | * @constructor |
332 | 497 | * @param {Object} next TokenAccumulator to link to |
333 | 498 | * @param {Array} (optional) tokens, init accumulator with tokens or [] |
334 | 499 | */ |
335 | | -function TokenAccumulator ( next, tokens ) { |
336 | | - this.next = next; |
337 | | - if ( tokens ) { |
338 | | - this.accum = tokens; |
339 | | - } else { |
340 | | - this.accum = []; |
341 | | - } |
342 | | - return this; |
| 500 | +function TokenAccumulator ( parentCB ) { |
| 501 | + this.parentCB = parentCB; |
| 502 | + this.accum = []; |
| 503 | + // Wait for child and sibling by default |
| 504 | + // Note: Need to decrement outstanding on last accum |
| 505 | + // in a chain. |
| 506 | + this.outstanding = 2; |
343 | 507 | } |
344 | 508 | |
345 | 509 | /** |
346 | | - * Push a token into the accumulator |
| 510 | + * Curry a parentCB with the object and reference. |
347 | 511 | * |
348 | | - * @method |
349 | | - * @param {Object} token |
| 512 | + * @param {Object} TokenAccumulator |
| 513 | + * @param {misc} Reference / key for callback |
| 514 | + * @returns {Function} |
350 | 515 | */ |
351 | | -TokenAccumulator.prototype.push = function ( token ) { |
352 | | - return this.accum.push(token); |
| 516 | +TokenAccumulator.prototype.getParentCB = function ( reference ) { |
| 517 | + return this.returnTokens01.bind( this, reference ); |
353 | 518 | }; |
354 | 519 | |
355 | 520 | /** |
356 | | - * Pop a token from the accumulator |
| 521 | + * Pass tokens to an accumulator |
357 | 522 | * |
358 | 523 | * @method |
359 | | - * @returns {Object} token |
| 524 | + * @param {Object} token |
360 | 525 | */ |
361 | | -TokenAccumulator.prototype.pop = function ( ) { |
362 | | - return this.accum.pop(); |
| 526 | +TokenAccumulator.prototype.returnTokens01 = function ( reference, tokens, notYetDone ) { |
| 527 | + var res, |
| 528 | + cb, |
| 529 | + returnTokens = []; |
| 530 | + |
| 531 | + if ( ! notYetDone ) { |
| 532 | + this.outstanding--; |
| 533 | + } |
| 534 | + |
| 535 | + if ( reference === 'child' ) { |
| 536 | + // XXX: Use some marker to avoid re-transforming token chunks several |
| 537 | + // times? |
| 538 | + res = this.transformPhase01( this.frame, tokens, this.parentCB ); |
| 539 | + |
| 540 | + if ( res.async ) { |
| 541 | + // new asynchronous expansion started, chain of accumulators |
| 542 | + // created |
| 543 | + if ( this.outstanding === 0 ) { |
| 544 | + // Last accum in chain should only wait for child |
| 545 | + res.async.outstanding--; |
| 546 | + cb = this.parentCB; |
| 547 | + } else { |
| 548 | + cb = this.parentCB; |
| 549 | + // set own callback to new sibling, the end of accumulator chain |
| 550 | + this.parentCB = res.async.getParentCB( 'sibling' ); |
| 551 | + } |
| 552 | + } |
| 553 | + if ( ! notYetDone ) { |
| 554 | + // Child is done, return accumulator from sibling. Siblings |
| 555 | + // process tokens themselves, so we concat those to the result of |
| 556 | + // processing tokens from the child. |
| 557 | + tokens = res.tokens.concat( this.accum ); |
| 558 | + this.accum = []; |
| 559 | + } |
| 560 | + this.cb( res.tokens, res.async ); |
| 561 | + return null; |
| 562 | + } else { |
| 563 | + // sibling |
| 564 | + if ( this.outstanding === 0 ) { |
| 565 | + tokens = this.accum.concat( tokens ); |
| 566 | + // A sibling will transform tokens, so we don't have to do this |
| 567 | + // again. |
| 568 | + this.parentCB( res.tokens, false ); |
| 569 | + return null; |
| 570 | + } else if ( this.outstanding === 1 && notYetDone ) { |
| 571 | + // Sibling is not yet done, but child is. Return own parentCB to |
| 572 | + // allow the sibling to go direct, and call back parent with |
| 573 | + // tokens. The internal accumulator is empty at this stage, as its |
| 574 | + // tokens are passed to the parent when the child is done. |
| 575 | + return this.parentCB( tokens, true); |
| 576 | + } |
| 577 | + |
| 578 | + |
| 579 | + } |
363 | 580 | }; |
364 | 581 | |
365 | 582 | /** |
366 | | - * Insert an accumulator after this one. |
| 583 | + * Mark the sibling as done (normally at the tail of a chain). |
| 584 | + */ |
| 585 | +TokenAccumulator.prototype.siblingDone = function () { |
| 586 | + this.returnTokens01 ( 'sibling', [], false ); |
| 587 | +}; |
| 588 | + |
| 589 | + |
| 590 | +/** |
| 591 | + * Push a token into the accumulator |
367 | 592 | * |
368 | 593 | * @method |
369 | | - * @returns {Object} created TokenAccumulator |
| 594 | + * @param {Object} token |
370 | 595 | */ |
371 | | -TokenAccumulator.prototype.insertAccumulator = function ( ) { |
372 | | - this.next = new TokenAccumulator(this.next); |
373 | | - return this.next; |
| 596 | +TokenAccumulator.prototype.push = function ( token ) { |
| 597 | + return this.accum.push(token); |
374 | 598 | }; |
375 | 599 | |
| 600 | + |
| 601 | + |
| 602 | +/* TODO list |
| 603 | + * |
| 604 | + * transformPhase01 called first for phase 0-1 (in-order per source file) |
| 605 | + * then only phase 2 (order independent, if 2 <= token phase < 3, 3 ~ done) |
| 606 | + * -> don't execute order-dependent transforms in this phase! |
| 607 | + * * enforce phase on tokens, but not priority within phase |
| 608 | + * -> cycles possible in async phase |
| 609 | + * final transform (phase 2) globally in-order and synchronous in root returnTokens01 |
| 610 | + * |
| 611 | + * |
| 612 | + * Transformation phases |
| 613 | + * [0,2) |
| 614 | + * [2,3] (and 1..2 in templates etc, but clamp phase on *returned* tokens to 2) |
| 615 | + * 3 |
| 616 | + * |
| 617 | + */ |
| 618 | + |
| 619 | + |
376 | 620 | if (typeof module == "object") { |
377 | 621 | module.exports.TokenTransformDispatcher = TokenTransformDispatcher; |
378 | 622 | } |
379 | | - |
Index: trunk/extensions/VisualEditor/modules/parser/ext.core.QuoteTransformer.js |
— | — | @@ -1,70 +1,92 @@ |
2 | 2 | /* |
3 | | - * Italic/Bold handling. |
| 3 | + * MediaWiki-compatible italic/bold handling as a token stream transformation. |
4 | 4 | * |
5 | | - * - list of tokens |
6 | | - * - NEWLINE |
7 | | - * - ticks (2+) -> list with link in line token list? |
8 | | - * - process on newline |
9 | | - * - need access to text nodes before for conversion back to text |
| 5 | + * @author Gabriel Wicke <gwicke@wikimedia.org> |
10 | 6 | */ |
11 | 7 | |
12 | 8 | function QuoteTransformer ( ) { |
13 | 9 | // Bold and italic tokens are collected in these lists, and then processed |
14 | 10 | // in onNewLine. |
| 11 | + this.quoteAndNewlineRank = 2.1; |
| 12 | + this.anyRank = 2.101; // Just after regular quote and newline |
| 13 | + this.reset(); |
| 14 | +} |
| 15 | + |
| 16 | +QuoteTransformer.prototype.reset = function ( ) { |
15 | 17 | this.italics = []; |
16 | 18 | this.bolds = []; |
17 | | -} |
| 19 | + this.currentChunk = []; |
| 20 | + // List of chunks, each starting with a (potentially) bold or italic token |
| 21 | + // and followed by plain tokens. |
| 22 | + this.chunks = []; |
| 23 | +}; |
18 | 24 | |
| 25 | + |
19 | 26 | // Register this transformer with the TokenTransformer |
20 | 27 | QuoteTransformer.prototype.register = function ( dispatcher ) { |
| 28 | + this.dispatcher = dispatcher; |
21 | 29 | // Register for NEWLINE and QUOTE tag tokens |
22 | | - var self = this; |
23 | | - dispatcher.appendListener( function (ctx) { |
24 | | - return self.onNewLine(ctx); |
25 | | - }, 'newline' ); |
26 | | - dispatcher.appendListener( function (ctx) { |
27 | | - return self.onQuote(ctx); |
28 | | - }, 'tag', 'mw-quote' ); |
| 30 | + dispatcher.addTransform( this.onNewLine.bind(this), |
| 31 | + this.quoteAndNewlineRank, 'newline' ); |
| 32 | + dispatcher.addTransform( this.onQuote.bind(this), |
| 33 | + this.quoteAndNewlineRank, 'tag', 'mw-quote' ); |
| 34 | + // Reset internal state when we are done |
| 35 | + dispatcher.addTransform( this.reset.bind(this), |
| 36 | + this.quoteAndNewlineRank, 'end' ); |
29 | 37 | }; |
30 | 38 | |
31 | 39 | // Make a copy of the token context |
32 | | -QuoteTransformer.prototype.ctx = function ( tokenCTX ) { |
33 | | - return $.extend({}, tokenCTX); |
| 40 | +QuoteTransformer.prototype._startNewChunk = function ( ) { |
| 41 | + this.currentChunk.pos = this.chunks.length; |
| 42 | + this.chunks.push( this.currentChunk ); |
| 43 | + this.currentChunk = []; |
34 | 44 | }; |
35 | 45 | |
36 | 46 | // Handle QUOTE tags. These are collected in italic/bold lists depending on |
37 | 47 | // the length of quote string. Actual analysis and conversion to the |
38 | 48 | // appropriate tag tokens is deferred until the next NEWLINE token triggers |
39 | 49 | // onNewLine. |
40 | | -QuoteTransformer.prototype.onQuote = function ( tokenCTX ) { |
41 | | - var token = tokenCTX.token, |
42 | | - qlen = token.value.length, |
43 | | - out = null, |
44 | | - lastToken = tokenCTX.lastToken, |
45 | | - ctx = this.ctx(tokenCTX), |
46 | | - ctx2, |
47 | | - accum = tokenCTX.accum; |
| 50 | +// |
| 51 | +// XXX: Cannot use async stuff here, need to buffer things locally instead! |
| 52 | +// FIXME: Convert to internal buffering! -> return all tokens with rank set to |
| 53 | +// own rank to avoid reprocessing |
| 54 | +QuoteTransformer.prototype.onQuote = function ( token, cb, frame, prevToken ) { |
| 55 | + var qlen = token.value.length, |
| 56 | + tokens = [], // output tokens |
| 57 | + ctx = { |
| 58 | + token: token, |
| 59 | + cb: cb, |
| 60 | + frame: frame, |
| 61 | + prevToken: prevToken |
| 62 | + }, |
| 63 | + ctx2 = { |
| 64 | + cb: cb, |
| 65 | + frame: frame, |
| 66 | + prevToken: prevToken |
| 67 | + }; |
| 68 | + |
48 | 69 | |
| 70 | + if ( this.chunks.length === 0 ) { |
| 71 | + // register for any token if not yet active |
| 72 | + this.dispatcher.addTransform( this.onAny.bind(this), this.anyRank, 'tag', 'mw-quote' ); |
| 73 | + } |
| 74 | + |
| 75 | + this._startNewChunk(); |
| 76 | + |
49 | 77 | switch (qlen) { |
50 | 78 | case 2: |
51 | | - // Start a new accumulator, so we can later go back using the |
52 | | - // reference to this accumulator and append our tags at the end of |
53 | | - // it. |
54 | | - accum = tokenCTX.dispatcher.newAccumulator(accum); |
55 | | - this.italics.push(ctx); |
| 79 | + this.currentChunk.push(ctx); |
| 80 | + this.italics.push(this.currentChunk); |
56 | 81 | break; |
57 | 82 | case 3: |
58 | | - accum = tokenCTX.dispatcher.newAccumulator(accum); |
59 | | - this.bolds.push(ctx); |
| 83 | + this.currentChunk.push(ctx); |
| 84 | + this.bolds.push(this.currentChunk); |
60 | 85 | break; |
61 | 86 | case 4: |
62 | | - if (lastToken && lastToken.type === 'TEXT') { |
63 | | - lastToken.value += "'"; |
64 | | - } else { |
65 | | - out = {type: 'TEXT', value: "'"}; |
66 | | - } |
67 | | - accum = tokenCTX.dispatcher.newAccumulator(accum); |
68 | | - this.bolds.push(ctx); |
| 87 | + this.currentChunk.push( {type: 'TEXT', value: "'"} ); |
| 88 | + this._startNewChunk(); |
| 89 | + this.currentChunk.push(ctx); |
| 90 | + this.bolds.push(this.currentChunk); |
69 | 91 | break; |
70 | 92 | case 5: |
71 | 93 | // The order of italic vs. bold does not matter. Those are |
— | — | @@ -72,39 +94,50 @@ |
73 | 95 | // by the HTML 5 tree builder. This does not always result in the |
74 | 96 | // prettiest result, but at least it is always correct and very |
75 | 97 | // convenient. |
76 | | - accum = tokenCTX.dispatcher.newAccumulator(accum, 2); |
77 | | - this.italics.push(ctx); |
78 | | - ctx2 = this.ctx(tokenCTX); |
79 | | - ctx2.token = {attribs: ctx.token.attribs}; |
80 | | - this.bolds.push(ctx2); |
| 98 | + this.currentChunk.push(ctx); |
| 99 | + this.italics.push(this.currentChunk); |
| 100 | + this._startNewChunk(); |
| 101 | + ctx2.token = { attribs: token.attribs }; |
| 102 | + this.currentChunk.push(ctx2); |
| 103 | + this.bolds.push(this.currentChunk); |
81 | 104 | break; |
82 | 105 | default: // longer than 5, only use the last 5 ticks |
83 | 106 | var newvalue = token.value.substr(0, qlen - 5 ); |
84 | | - if (lastToken && lastToken.type === 'TEXT') { |
85 | | - lastToken.value += newvalue; |
86 | | - } else { |
87 | | - out = {type: 'TEXT', value: newvalue}; |
88 | | - } |
89 | | - accum = tokenCTX.dispatcher.newAccumulator(accum, 2); |
90 | | - this.italics.push(ctx); |
91 | | - ctx2 = this.ctx(tokenCTX); |
92 | | - ctx2.token = {attribs: ctx.token.attribs}; |
93 | | - this.bolds.push(ctx2); |
| 107 | + this.currentChunk.push ( {type: 'TEXT', value: newvalue} ); |
| 108 | + this._startNewChunk(); |
| 109 | + this.currentChunk.push(ctx); |
| 110 | + this.italics.push(this.currentChunk); |
| 111 | + this._startNewChunk(); |
| 112 | + ctx2.token = { attribs: ctx.token.attribs }; |
| 113 | + this.currentChunk.push(ctx2); |
| 114 | + this.bolds.push(this.currentChunk); |
94 | 115 | break; |
95 | 116 | } |
96 | 117 | |
97 | | - tokenCTX.token = out; |
98 | | - tokenCTX.accum = accum; |
99 | | - return tokenCTX; |
| 118 | + return { token: null }; |
100 | 119 | }; |
101 | 120 | |
| 121 | +QuoteTransformer.prototype.onAny = function ( token, cb, frame, prevToken ) { |
| 122 | + //console.log('qt onAny: ' + JSON.stringify(token, null, 2)); |
| 123 | + this.currentChunk.push( token ); |
| 124 | + return {}; |
| 125 | +}; |
| 126 | + |
102 | 127 | // Handle NEWLINE tokens, which trigger the actual quote analysis on the |
103 | 128 | // collected quote tokens so far. |
104 | | -QuoteTransformer.prototype.onNewLine = function ( tokenCTX ) { |
105 | | - if(!this.bolds && !this.italics) { |
| 129 | +QuoteTransformer.prototype.onNewLine = function ( token, cb, frame, prevToken ) { |
| 130 | + var res; |
| 131 | + |
| 132 | + if( ! this.chunks.length ) { |
106 | 133 | // Nothing to do, quick abort. |
107 | | - return tokenCTX; |
| 134 | + return { token: token }; |
108 | 135 | } |
| 136 | + |
| 137 | + |
| 138 | + token.rank = this.quoteAndNewlineRank; |
| 139 | + this.currentChunk.push( token ); |
| 140 | + this._startNewChunk(); |
| 141 | + |
109 | 142 | //console.log("onNewLine: " + this.italics + this.bolds); |
110 | 143 | // balance out tokens, convert placeholders into tags |
111 | 144 | if (this.italics.length % 2 && this.bolds.length % 2) { |
— | — | @@ -113,11 +146,11 @@ |
114 | 147 | firstspace = -1; |
115 | 148 | for (var j = 0; j < this.bolds.length; j++) { |
116 | 149 | var ctx = this.bolds[j]; |
117 | | - //console.log("balancing!" + JSON.stringify(ctx.lastToken, null, 2)); |
118 | | - if (ctx.lastToken) { |
119 | | - if (ctx.lastToken.type === 'TEXT') { |
120 | | - var lastchar = ctx.lastToken.value[ctx.lastToken.value.length - 1], |
121 | | - secondtolastchar = ctx.lastToken.value[ctx.lastToken.value.length - 2]; |
| 150 | + //console.log("balancing!" + JSON.stringify(ctx.prevToken, null, 2)); |
| 151 | + if (ctx.prevToken) { |
| 152 | + if (ctx.prevToken.type === 'TEXT') { |
| 153 | + var lastchar = ctx.prevToken.value[ctx.prevToken.value.length - 1], |
| 154 | + secondtolastchar = ctx.prevToken.value[ctx.prevToken.value.length - 2]; |
122 | 155 | if (lastchar === ' ' && firstspace === -1) { |
123 | 156 | firstspace = j; |
124 | 157 | } else if (lastchar !== ' ') { |
— | — | @@ -129,8 +162,8 @@ |
130 | 163 | firstmultiletterword = j; |
131 | 164 | } |
132 | 165 | } |
133 | | - } else if ( ( ctx.lastToken.type === 'NEWLINE' || |
134 | | - ctx.lastToken.type === 'TAG' ) && |
| 166 | + } else if ( ( ctx.prevToken.type === 'NEWLINE' || |
| 167 | + ctx.prevToken.type === 'TAG' ) && |
135 | 168 | firstmultiletterword == -1 ) { |
136 | 169 | // This is an approximation, as the original doQuotes |
137 | 170 | // operates on the source and just looks at space vs. |
— | — | @@ -153,51 +186,55 @@ |
154 | 187 | } |
155 | 188 | } |
156 | 189 | |
157 | | - this.quotesToTags(this.italics, 'i', tokenCTX.dispatcher); |
158 | | - this.quotesToTags(this.bolds, 'b', tokenCTX.dispatcher); |
| 190 | + this.quotesToTags( this.italics, 'i' ); |
| 191 | + this.quotesToTags( this.bolds, 'b' ); |
159 | 192 | |
160 | | - this.bolds = []; |
161 | | - this.italics = []; |
| 193 | + //console.log('chunks: ' + JSON.stringify( this.chunks, null, 2 ) ); |
162 | 194 | |
163 | | - // Pass through the NEWLINE token unchanged |
164 | | - return tokenCTX; |
| 195 | + // return all collected tokens including the newline |
| 196 | + res = { tokens: [].concat.apply([], this.chunks) }; |
| 197 | + |
| 198 | + // prepare for next session |
| 199 | + this.reset(); |
| 200 | + |
| 201 | + // remove 'any' registration |
| 202 | + this.dispatcher.removeTransform( this.anyRank, 'any' ); |
| 203 | + |
| 204 | + return res; |
| 205 | + |
165 | 206 | }; |
166 | 207 | |
167 | 208 | // Convert a bold token to italic to balance an uneven number of both bold and |
168 | 209 | // italic tags. In the process, one quote needs to be converted back to text. |
169 | 210 | QuoteTransformer.prototype.convertBold = function ( i ) { |
170 | | - var ctx = this.bolds[i]; |
| 211 | + var chunk = this.bolds[i], |
| 212 | + textToken = { type: 'TEXT', value: "'" }; |
171 | 213 | //console.log('convertbold!'); |
172 | | - if ( ctx.lastToken && ctx.lastToken.type === 'TEXT' ) { |
173 | | - ctx.lastToken.value += "'"; |
| 214 | + if ( chunk.pos ) { |
| 215 | + this.chunks[chunk.pos - 1].push( textToken ); |
174 | 216 | } else { |
175 | | - // Add a text token! |
176 | | - ctx.token = [{type: 'TEXT', value: "'"}, ctx.token]; |
| 217 | + // prepend another chunk |
| 218 | + this.chunks.unshift( [ textToken ] ); |
177 | 219 | } |
178 | 220 | |
| 221 | + // delete from bolds |
179 | 222 | this.bolds.splice(i, 1); |
180 | 223 | |
181 | | - this.italics.push(ctx); |
| 224 | + this.italics.push(chunk); |
182 | 225 | this.italics.sort(function(a,b) { return a.pos - b.pos; } ); |
183 | | - //console.log(this.italics.map(function(a) { return a.pos })); |
184 | | - //console.log(this.bolds.map(function(a) { return a.pos })); |
185 | 226 | }; |
186 | 227 | |
187 | 228 | // Convert italics/bolds into tags |
188 | | -QuoteTransformer.prototype.quotesToTags = function ( contexts, name, dispatcher ) { |
| 229 | +QuoteTransformer.prototype.quotesToTags = function ( chunks, name ) { |
189 | 230 | var toggle = true, |
190 | 231 | t, |
| 232 | + j, |
191 | 233 | out = []; |
192 | | - for (var j = 0; j < contexts.length; j++) { |
193 | | - t = contexts[j].token; |
194 | 234 | |
195 | | - if ( $.isArray(t) ) { |
196 | | - // Slip in a text token from bold to italic rebalancing. Don't |
197 | | - // count this callback towards completion. |
198 | | - var realToken = t.pop(); |
199 | | - dispatcher.transformTokens( t, contexts[j].accum, 0 ); |
200 | | - t = realToken; |
201 | | - } |
| 235 | + for (j = 0; j < chunks.length; j++) { |
| 236 | + //console.log( 'quotesToTags ' + name + ': ' + JSON.stringify( chunks, null, 2 ) ); |
| 237 | + t = chunks[j][0].token; |
| 238 | + //console.log( 'quotesToTags t: ' + JSON.stringify( t, null, 2)); |
202 | 239 | |
203 | 240 | if(toggle) { |
204 | 241 | t.type = 'TAG'; |
— | — | @@ -206,21 +243,13 @@ |
207 | 244 | } |
208 | 245 | t.name = name; |
209 | 246 | delete t.value; |
| 247 | + chunks[j][0] = t; |
210 | 248 | toggle = !toggle; |
211 | | - // Re-add and process the new token with the original accumulator, but |
212 | | - // don't yet count this callback towards callback completion. |
213 | | - dispatcher.transformTokens( [t], contexts[j].accum, 0 ); |
214 | 249 | } |
215 | | - var l = contexts.length; |
216 | 250 | if (!toggle) { |
217 | 251 | // Add end tag, but don't count it towards completion. |
218 | | - dispatcher.transformTokens( [{type: 'ENDTAG', name: name}], |
219 | | - contexts[contexts.length - 1].accum, 0 ); |
| 252 | + this.currentChunk.push( {type: 'ENDTAG', name: name} ); |
220 | 253 | } |
221 | | - // Now finally count the number of contexts towards completion, which |
222 | | - // causes the dispatcher to call its own callback if no more asynch |
223 | | - // callbacks are outstanding. |
224 | | - dispatcher.finish( contexts.length ); |
225 | 254 | }; |
226 | 255 | |
227 | 256 | if (typeof module == "object") { |
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.HTML5TreeBuilder.node.js |
— | — | @@ -15,12 +15,38 @@ |
16 | 16 | |
17 | 17 | // Sets up the parser |
18 | 18 | this.parser.parse(this); |
19 | | - this.document = this.parser.document; |
20 | | - return this; |
| 19 | + |
| 20 | + // implicitly start a new document |
| 21 | + this.processToken({type: 'TAG', name: 'body'}); |
21 | 22 | }; |
22 | 23 | |
23 | 24 | FauxHTML5.TreeBuilder.prototype = new events.EventEmitter(); |
24 | 25 | |
| 26 | +FauxHTML5.TreeBuilder.prototype.subscribeToTokenEmitter = function ( emitter ) { |
| 27 | + emitter.addListener('chunk', this.onChunk.bind( this ) ); |
| 28 | + emitter.addListener('end', this.onEnd.bind( this ) ); |
| 29 | +}; |
| 30 | + |
| 31 | +FauxHTML5.TreeBuilder.prototype.onChunk = function ( tokens ) { |
| 32 | + for (var i = 0, length = tokens.length; i < length; i++) { |
| 33 | + this.processToken(tokens[i]); |
| 34 | + } |
| 35 | +}; |
| 36 | + |
| 37 | +FauxHTML5.TreeBuilder.prototype.onEnd = function ( ) { |
| 38 | + //console.log('Fauxhtml5 onEnd'); |
| 39 | + // FIXME HACK: For some reason the end token is not processed sometimes, |
| 40 | + // which normally fixes the body reference up. |
| 41 | + this.document = this.parser.document; |
| 42 | + this.document.body = this.parser |
| 43 | + .document.getElementsByTagName('body')[0]; |
| 44 | + |
| 45 | + // XXX: more clean up to allow reuse. |
| 46 | + this.parser.setup(); |
| 47 | + this.processToken({type: 'TAG', name: 'body'}); |
| 48 | +}; |
| 49 | + |
| 50 | + |
25 | 51 | // Adapt the token format to internal HTML tree builder format, call the actual |
26 | 52 | // html tree builder by emitting the token. |
27 | 53 | FauxHTML5.TreeBuilder.prototype.processToken = function (token) { |
— | — | @@ -65,6 +91,7 @@ |
66 | 92 | break; |
67 | 93 | case "END": |
68 | 94 | this.emit('end'); |
| 95 | + this.emit('token', { type: 'EOF' } ); |
69 | 96 | this.document = this.parser.document; |
70 | 97 | if ( ! this.document.body ) { |
71 | 98 | // HACK: This should not be needed really. |
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js |
— | — | @@ -8,19 +8,25 @@ |
9 | 9 | |
10 | 10 | var PEG = require('pegjs'), |
11 | 11 | path = require('path'), |
12 | | - fs = require('fs'); |
| 12 | + fs = require('fs'), |
| 13 | + events = require('events'); |
13 | 14 | |
14 | 15 | function PegTokenizer() { |
15 | 16 | var pegSrcPath = path.join( __dirname, 'pegTokenizer.pegjs.txt' ); |
16 | 17 | this.src = fs.readFileSync( pegSrcPath, 'utf8' ); |
17 | 18 | } |
18 | 19 | |
| 20 | +// Inherit from EventEmitter |
| 21 | +PegTokenizer.prototype = new events.EventEmitter(); |
| 22 | + |
19 | 23 | PegTokenizer.src = false; |
20 | 24 | |
21 | 25 | PegTokenizer.prototype.tokenize = function( text ) { |
22 | 26 | var out, err; |
23 | 27 | if ( !this.parser ) { |
24 | 28 | this.parser = PEG.buildParser(this.src); |
| 29 | + // add reference to this for event emission |
| 30 | + this.parser._tokenizer = this; |
25 | 31 | } |
26 | 32 | |
27 | 33 | // some normalization |
— | — | @@ -28,21 +34,30 @@ |
29 | 35 | text += "\n"; |
30 | 36 | } |
31 | 37 | |
32 | | - try { |
| 38 | + // XXX: Commented out exception handling during development to get |
| 39 | + // reasonable traces. Calling a trace on the extension does not really cut |
| 40 | + // it. |
| 41 | + //try { |
33 | 42 | out = this.parser.parse(text); |
34 | | - } catch (e) { |
35 | | - err = e; |
36 | | - console.trace(); |
37 | | - } finally { |
| 43 | + // emit tokens here until we get that to work per toplevelblock in the |
| 44 | + // actual tokenizer |
| 45 | + this.emit('chunk', out); |
| 46 | + this.emit('end'); |
| 47 | + //} catch (e) { |
| 48 | + //err = e; |
| 49 | + //console.trace(); |
| 50 | + //} finally { |
| 51 | + return { err: err }; |
| 52 | + //} |
| 53 | +}; |
38 | 54 | |
39 | | - // Append the end (for obvious reasons this should not |
40 | | - // be part of a stream, only when tokenizing complete |
41 | | - // texts) |
42 | | - out.push({type: 'END'}); |
| 55 | +/***************************************************************************** |
| 56 | + * LEGACY stuff |
| 57 | + * |
| 58 | + * This is kept around as a template for the ongoing template expansion work! |
| 59 | + * It won't work with the token infrastructure. |
| 60 | + */ |
43 | 61 | |
44 | | - return {tokens: out, err: err}; |
45 | | - } |
46 | | -} |
47 | 62 | |
48 | 63 | /** |
49 | 64 | * @param {object} tree |
— | — | @@ -91,7 +106,7 @@ |
92 | 107 | content: self.env.expandTemplateArgs( templateTree, tree.params ) |
93 | 108 | }); |
94 | 109 | } |
95 | | - }) |
| 110 | + }); |
96 | 111 | } ); |
97 | 112 | // Wait for async... |
98 | 113 | return; |
— | — | @@ -123,7 +138,7 @@ |
124 | 139 | PegTokenizer.src = page.revisions[0]['*']; |
125 | 140 | } |
126 | 141 | }); |
127 | | - callback() |
| 142 | + callback(); |
128 | 143 | }, |
129 | 144 | dataType: 'json', |
130 | 145 | cache: false |
Index: trunk/extensions/VisualEditor/modules/parser/html5/parser.js |
— | — | @@ -44,6 +44,14 @@ |
45 | 45 | |
46 | 46 | Parser.prototype.parse = function(tokenizer) { |
47 | 47 | this.tokenizer = tokenizer; |
| 48 | + |
| 49 | + this.tokenizer.addListener('token', function(t) { |
| 50 | + return function(token) { t.do_token(token); }; |
| 51 | + }(this)); |
| 52 | + this.tokenizer.addListener('end', function(t) { |
| 53 | + return function() { t.emit('end'); }; |
| 54 | + }(this)); |
| 55 | + |
48 | 56 | this.setup(); |
49 | 57 | //this.tokenizer.tokenize(); |
50 | 58 | } |
— | — | @@ -116,12 +124,6 @@ |
117 | 125 | } |
118 | 126 | |
119 | 127 | Parser.prototype.setup = function(container, encoding) { |
120 | | - this.tokenizer.addListener('token', function(t) { |
121 | | - return function(token) { t.do_token(token); }; |
122 | | - }(this)); |
123 | | - this.tokenizer.addListener('end', function(t) { |
124 | | - return function() { t.emit('end'); }; |
125 | | - }(this)); |
126 | 128 | this.emit('setup', this); |
127 | 129 | |
128 | 130 | var inner_html = !!container; |
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.js |
— | — | @@ -10,103 +10,73 @@ |
11 | 11 | path = require('path'), |
12 | 12 | PegTokenizer = require('./mediawiki.tokenizer.peg.js').PegTokenizer, |
13 | 13 | TokenTransformDispatcher = require('./mediawiki.TokenTransformDispatcher.js').TokenTransformDispatcher, |
14 | | - DOMPostProcessor = require('./mediawiki.DOMPostProcessor.js').DOMPostProcessor, |
15 | | - DOMConverter = require('./mediawiki.DOMConverter.js').DOMConverter, |
16 | 14 | QuoteTransformer = require('./ext.core.QuoteTransformer.js').QuoteTransformer, |
17 | 15 | Cite = require('./ext.Cite.js').Cite, |
18 | | - MWRefTagHook = require('./ext.cite.taghook.ref.js').MWRefTagHook, |
19 | | - FauxHTML5 = require('./mediawiki.HTML5TreeBuilder.node.js').FauxHTML5; |
| 16 | + FauxHTML5 = require('./mediawiki.HTML5TreeBuilder.node.js').FauxHTML5, |
| 17 | + DOMPostProcessor = require('./mediawiki.DOMPostProcessor.js').DOMPostProcessor, |
| 18 | + DOMConverter = require('./mediawiki.DOMConverter.js').DOMConverter; |
20 | 19 | |
21 | 20 | function ParseThingy( config ) { |
22 | | - // XXX: move the actual parsing to separate method, only perform pipeline |
23 | | - // setup in the constructor! |
| 21 | + // Set up a simple parser pipeline. |
24 | 22 | |
25 | 23 | if ( !config ) { |
26 | 24 | config = {}; |
27 | 25 | } |
28 | 26 | |
29 | | - |
30 | 27 | this.wikiTokenizer = new PegTokenizer(); |
31 | 28 | |
32 | | - this.postProcessor = new DOMPostProcessor(); |
| 29 | + this.tokenDispatcher = new TokenTransformDispatcher (); |
33 | 30 | |
34 | | - this.DOMConverter = new DOMConverter(); |
| 31 | + // Add token transformations.. |
| 32 | + var qt = new QuoteTransformer(); |
| 33 | + qt.register(this.tokenDispatcher); |
35 | 34 | |
36 | | - var pthingy = this; |
| 35 | + //var citeExtension = new Cite(); |
| 36 | + //citeExtension.register(this.tokenDispatcher); |
37 | 37 | |
38 | | - // Set up the TokenTransformDispatcher with a callback for the remaining |
39 | | - // processing. |
40 | | - // XXX: convert to event listener (listening for token chunks from |
41 | | - // tokenizer) and event emitter (emitting token chunks) |
42 | | - // XXX: A parser environment and configuration will be added here to the |
43 | | - // token transform dispatcher. |
44 | | - this.tokenDispatcher = new TokenTransformDispatcher ( function ( tokens ) { |
45 | | - |
46 | | - //console.log("TOKENS: " + JSON.stringify(tokens, null, 2)); |
47 | | - |
48 | | - // Create a new tree builder, which also creates a new document. |
49 | | - // XXX: implicitly clean up old state after processing end token, so |
50 | | - // that we can reuse the tree builder. |
51 | | - // XXX: convert to event listener listening for token chunks from the |
52 | | - // token transformer and and emitting an additional 'done' event after |
53 | | - // processing the 'end' token. |
54 | | - var treeBuilder = new FauxHTML5.TreeBuilder(); |
| 38 | + this.tokenDispatcher.subscribeToTokenEmitter( this.wikiTokenizer ); |
55 | 39 | |
56 | | - // Build a DOM tree from tokens using the HTML tree builder/parser. |
57 | | - // XXX: convert to event listener (token chunks from |
58 | | - // TokenTransformDispatcher) and event emitter (DOM tree to |
59 | | - // DOMPostProcessor) |
60 | | - pthingy.buildTree( tokens, treeBuilder ); |
61 | | - |
62 | | - // Perform post-processing on DOM. |
63 | | - // XXX: convert to event listener (listening on treeBuilder 'finished' |
64 | | - // event) |
65 | | - pthingy.postProcessor.doPostProcess(treeBuilder.document); |
| 40 | + // Create a new tree builder, which also creates a new document. |
| 41 | + // XXX: implicitly clean up old state after processing end token, so |
| 42 | + // that we can reuse the tree builder. |
| 43 | + // XXX: convert to event listener listening for token chunks from the |
| 44 | + // token transformer and and emitting an additional 'done' event after |
| 45 | + // processing the 'end' token. |
| 46 | + this.treeBuilder = new FauxHTML5.TreeBuilder(); |
| 47 | + this.treeBuilder.subscribeToTokenEmitter( this.tokenDispatcher ); |
66 | 48 | |
67 | | - // FIXME: move HTML serialization to separate pipeline! |
68 | | - pthingy.document = treeBuilder.document; |
| 49 | + // Prepare these two, but only call them from parse and getWikiDom for |
| 50 | + // now. These will be called in a callback later, when the full pipeline |
| 51 | + // is used asynchronously. |
| 52 | + this.postProcessor = new DOMPostProcessor(); |
69 | 53 | |
70 | | - // XXX: emit event with result |
71 | | - pthingy.getWikiDom = function() { |
72 | | - return JSON.stringify( |
73 | | - pthingy.DOMConverter.HTMLtoWiki( treeBuilder.document.body ), |
74 | | - null, |
75 | | - 2 |
76 | | - ) + "\n"; |
77 | | - }; |
| 54 | + this.DOMConverter = new DOMConverter(); |
| 55 | +} |
78 | 56 | |
79 | | - }); |
| 57 | +ParseThingy.prototype.parse = function ( text ) { |
| 58 | + // Set the pipeline in motion by feeding the tokenizer |
| 59 | + this.wikiTokenizer.tokenize( text ); |
80 | 60 | |
81 | | - // Add token transformations.. |
82 | | - var qt = new QuoteTransformer(); |
83 | | - qt.register(this.tokenDispatcher); |
| 61 | + // XXX: this will have to happen in a callback! |
| 62 | + this.document = this.treeBuilder.document; |
84 | 63 | |
85 | | - var citeExtension = new Cite(); |
86 | | - citeExtension.register(this.tokenDispatcher); |
| 64 | + //console.log(this.document.body.innerHTML); |
87 | 65 | |
88 | | -} |
| 66 | + // Perform synchronous post-processing on DOM. |
| 67 | + // XXX: convert to event listener (listening on treeBuilder 'finished' |
| 68 | + // event) |
| 69 | + this.postProcessor.doPostProcess( this.document ); |
| 70 | +}; |
89 | 71 | |
90 | | - |
91 | | -ParseThingy.prototype = { |
92 | | - //XXX: This will be moved to the treeBuilder event listener callback, |
93 | | - //where it will process each received chunk. |
94 | | - buildTree: function ( tokens, treeBuilder ) { |
95 | | - // push a body element, just to be sure to have one |
96 | | - treeBuilder.processToken({type: 'TAG', name: 'body'}); |
97 | | - // Process all tokens |
98 | | - for (var i = 0, length = tokens.length; i < length; i++) { |
99 | | - treeBuilder.processToken(tokens[i]); |
100 | | - } |
101 | | - |
102 | | - // FIXME HACK: For some reason the end token is not processed sometimes, |
103 | | - // which normally fixes the body reference up. |
104 | | - treeBuilder.document.body = treeBuilder.parser |
105 | | - .document.getElementsByTagName('body')[0]; |
106 | | - |
107 | | - } |
| 72 | +ParseThingy.prototype.getWikiDom = function () { |
| 73 | + return JSON.stringify( |
| 74 | + pthingy.DOMConverter.HTMLtoWiki( this.document.body ), |
| 75 | + null, |
| 76 | + 2 |
| 77 | + ); |
108 | 78 | }; |
109 | 79 | |
| 80 | + |
110 | 81 | if (typeof module == "object") { |
111 | 82 | module.exports.ParseThingy = ParseThingy; |
112 | 83 | } |
113 | | - |
Index: trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt |
— | — | @@ -246,11 +246,22 @@ |
247 | 247 | return bnames; |
248 | 248 | })(); |
249 | 249 | |
| 250 | + var self = this; |
| 251 | + |
250 | 252 | |
251 | 253 | } |
252 | 254 | |
253 | 255 | start |
254 | 256 | = e:toplevelblock* newline* { |
| 257 | + // end is passed inline as a token, as well as a separate event for now. |
| 258 | + |
| 259 | + // this does not work yet. |
| 260 | + //console.log('about to emit' + pp(self)); |
| 261 | + //self._tokenizer.emit('chunk', [ { type: 'END' } ] ); |
| 262 | + //self._tokenizer.emit('end'); |
| 263 | + // Append the end (for obvious reasons this should not |
| 264 | + // be part of a stream, only when tokenizing complete |
| 265 | + // texts) |
255 | 266 | return flatten(e); |
256 | 267 | } |
257 | 268 | |
— | — | @@ -393,6 +404,10 @@ |
394 | 405 | // XXX: only run this for lines that actually need it! |
395 | 406 | //b.push({type: 'NEWLINE'}); |
396 | 407 | // Move this to a token stream transform! |
| 408 | + //console.log('about to emit' + pp(self)); |
| 409 | + //self._tokenizer.emit('chunk', b); |
| 410 | + //console.log('emitted chunk' + pp(b)); |
| 411 | + //return []; |
397 | 412 | return b; |
398 | 413 | } |
399 | 414 | |