Index: trunk/extensions/VisualEditor/tests/parser/parserTests-whitelist.js |
— | — | @@ -6,16 +6,18 @@ |
7 | 7 | |
8 | 8 | // The nesting of italic/bold tags is changed in this test, but the resulting |
9 | 9 | // formatting is identical |
10 | | -testWhiteList['Italics and bold'] = "<ul><li> plain</li><li> plain<i>italic</i>plain</li><li> plain<i>italic</i>plain<i>italic</i>plain</li><li> plain<b>bold</b>plain</li><li> plain<b>bold</b>plain<b>bold</b>plain</li><li> plain<i>italic</i>plain<b>bold</b>plain</li><li> plain<b>bold</b>plain<i>italic</i>plain</li><li> plain<i>italic<b>bold-italic</b>italic</i>plain</li><li> plain<b>bold<i>bold-italic</i>bold</b>plain</li><li> plain<i><b>bold-italic</b>italic</i>plain</li><li> plain<i><b>bold-italic</b></i><b>bold</b>plain</li><li> plain<i>italic<b>bold-italic</b></i>plain</li><li> plain<b>bold<i>bold-italic</i></b>plain</li><li> plain l'<i>italic</i>plain</li><li> plain l'<b>bold</b> plain</li></ul>"; |
| 10 | +testWhiteList["Italics and bold"] = "<ul><li> plain</li><li> plain<i>italic</i>plain</li><li> plain<i>italic</i>plain<i>italic</i>plain</li><li> plain<b>bold</b>plain</li><li> plain<b>bold</b>plain<b>bold</b>plain</li><li> plain<i>italic</i>plain<b>bold</b>plain</li><li> plain<b>bold</b>plain<i>italic</i>plain</li><li> plain<i>italic<b>bold-italic</b>italic</i>plain</li><li> plain<b>bold<i>bold-italic</i>bold</b>plain</li><li> plain<i><b>bold-italic</b>italic</i>plain</li><li> plain<i><b>bold-italic</b></i><b>bold</b>plain</li><li> plain<i>italic<b>bold-italic</b></i>plain</li><li> plain<b>bold<i>bold-italic</i></b>plain</li><li> plain l'<i>italic</i>plain</li><li> plain l'<b>bold</b> plain</li></ul>"; |
11 | 11 | |
12 | | -testWhiteList["Bug 2702: Mismatched <i>, <b> and <a> tags are invalid"] = "<p><i><a href=\"http://example.com\">text</a></i><a href=\"http://example.com\"><b>text</b></a><b></b><i>Something <a href=\"http://example.com\">in italic</a></i><i>Something <a href=\"http://example.com\">mixed</a></i><a href=\"http://example.com\"><b>, even bold</b></a><b></b><i><b>Now <a href=\"http://example.com\">both</a></b></i></p>"; |
| 12 | +testWhiteList["Bug 2702: Mismatched <i>, <b> and <a> tags are invalid"] = "<p><i><a href=\"http://example.com\">text</a></i><a href=\"http://example.com\" data-sourcePos=\"30:61\"><b>text</b></a><i data-sourcePos=\"62:106\">Something <a href=\"http://example.com\">in italic</a></i><i data-sourcePos=\"107:164\">Something <a href=\"http://example.com\">mixed</a></i><a href=\"http://example.com\"><b>, even bold</b></a><i data-sourcePos=\"165:204\"><b data-sourcePos=\"165:204\">Now <a href=\"http://example.com\">both</a></b></i></p>"; |
13 | 13 | |
14 | | -testWhiteList["Unclosed and unmatched quotes"] = "<p><i><b>Bold italic text </b>with bold deactivated<b> in between.</b></i></p><p><i><b>Bold italic text </b></i><b>with italic deactivated<i> in between.</i></b></p><p><b>Bold text..</b></p><p>..spanning two paragraphs (should not work).<b></b></p><p><b>Bold tag left open</b></p><p><i>Italic tag left open</i></p><p>Normal text.<!-- Unmatching number of opening, closing tags: -->\n</p><p><b>This year'</b>s election <i>should</i> beat <b>last year'</b>s.</p><p><i>Tom<b>s car is bigger than </b></i><b>Susan</b>s.</p>"; |
| 14 | +testWhiteList["Unclosed and unmatched quotes"] = "<p><i><b>Bold italic text </b>with bold deactivated<b> in between.</b></i></p><p><i><b>Bold italic text </b></i><b>with italic deactivated<i> in between.</i></b></p><p><b></b>Bold text..</p><p>..spanning two paragraphs (should not work).<b></b></p><p><b></b>Bold tag left open</p><p><i></i>Italic tag left open</p><p>Normal text.<!-- Unmatching number of opening, closing tags: -->\n</p><p><b>This year'</b>s election <i>should</i> beat <b>last year'</b>s.</p><p><i>Tom<b>s car is bigger than </b></i><b>Susan</b>s.</p>"; |
15 | 15 | |
16 | | -testWhiteList["Link containing double-single-quotes '' in text embedded in italics (bug 4598 sanity check)"] = "<p><i>Some <a data-type=\"internal\" href=\"Link\">pretty </a></i><a data-type=\"internal\" href=\"Link\">italics<i> and stuff</i></a><i>!</i></p>"; |
| 16 | +testWhiteList["Link containing double-single-quotes '' in text embedded in italics (bug 4598 sanity check)"] = "<p><i>Some <a data-type=\"internal\" href=\"Link\">pretty </a></i><a data-type=\"internal\" href=\"Link\">italics<i></i> and stuff</a>!</p>"; |
17 | 17 | |
18 | 18 | testWhiteList["External link containing double-single-quotes in text embedded in italics (bug 4598 sanity check)"] = "<p><i>Some <a href=\"http://example.com/\">pretty </a></i><a href=\"http://example.com/\">italics<i> and stuff</i></a><i>!</i></p>"; |
19 | 19 | |
| 20 | +// This is a rare edge case, and the new behavior is arguably more consistent |
| 21 | +testWhiteList["5 quotes, code coverage +1 line"] = "<p><i>'</i></p>"; |
20 | 22 | |
21 | 23 | |
22 | 24 | // empty table tags / with only a caption are legal in HTML5. |
Index: trunk/extensions/VisualEditor/tests/parser/parserTests.js |
— | — | @@ -63,6 +63,8 @@ |
64 | 64 | _import(pj('parser', 'mediawiki.HTML5TreeBuilder.node.js'), ['FauxHTML5']); |
65 | 65 | _import(pj('parser', 'mediawiki.DOMPostProcessor.js'), ['DOMPostProcessor']); |
66 | 66 | |
| 67 | +_import(pj('parser', 'ext.core.QuoteTransformer.js'), ['QuoteTransformer']); |
| 68 | + |
67 | 69 | // WikiDom and serializers |
68 | 70 | _require(pj('es', 'es.js')); |
69 | 71 | _require(pj('es', 'es.Html.js')); |
— | — | @@ -178,13 +180,17 @@ |
179 | 181 | this.postProcessor = new DOMPostProcessor(); |
180 | 182 | |
181 | 183 | var pt = this; |
| 184 | + |
| 185 | + // Set up the TokenTransformer with a callback for the remaining |
| 186 | + // processing. |
182 | 187 | this.tokenTransformer = new TokenTransformer ( function ( tokens ) { |
| 188 | + |
183 | 189 | //console.log("TOKENS: " + JSON.stringify(tokens, null, 2)); |
| 190 | + |
184 | 191 | // Create a new tree builder, which also creates a new document. |
185 | 192 | var treeBuilder = new FauxHTML5.TreeBuilder(); |
186 | 193 | |
187 | | - // Build a DOM tree from tokens using the HTML tree |
188 | | - // builder/parser. |
| 194 | + // Build a DOM tree from tokens using the HTML tree builder/parser. |
189 | 195 | pt.buildTree( tokens, treeBuilder ); |
190 | 196 | |
191 | 197 | // Perform post-processing on DOM. |
— | — | @@ -193,9 +199,14 @@ |
194 | 200 | // And serialize the result. |
195 | 201 | var out = treeBuilder.body().innerHTML; |
196 | 202 | |
| 203 | + // Finally, check the result vs. the expected result. |
197 | 204 | pt.checkResult( pt.currentItem, out ); |
198 | 205 | }); |
199 | 206 | |
| 207 | + // Add token transformations.. |
| 208 | + var qt = new QuoteTransformer(); |
| 209 | + qt.register(this.tokenTransformer); |
| 210 | + |
200 | 211 | // Test statistics |
201 | 212 | this.passedTests = 0; |
202 | 213 | this.passedTestsManual = 0; |
— | — | @@ -610,9 +621,8 @@ |
611 | 622 | this.reportSummary(); |
612 | 623 | }; |
613 | 624 | |
614 | | -var pt = new ParserTests(); |
615 | | -console.log(pt.processArticle); |
616 | | -pt.main(); |
| 625 | +// Construct the ParserTests object and run the parser tests |
| 626 | +new ParserTests().main(); |
617 | 627 | |
618 | 628 | |
619 | 629 | })(); |
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.TokenTransformer.js |
— | — | @@ -32,7 +32,7 @@ |
33 | 33 | } |
34 | 34 | |
35 | 35 | TokenTransformer.prototype.reset = function () { |
36 | | - this.accum = new TokenAccumulator(); |
| 36 | + this.accum = new TokenAccumulator(null); |
37 | 37 | this.firstaccum = this.accum; |
38 | 38 | this.outstanding = 1; // Number of outstanding processing steps |
39 | 39 | // (e.g., async template fetches/expansions) |
— | — | @@ -147,18 +147,21 @@ |
148 | 148 | * @returns nothing: Calls back registered callback if there are no more |
149 | 149 | * outstanding asynchronous expansions. |
150 | 150 | * */ |
151 | | -TokenTransformer.prototype.transformTokens = function ( tokens, accum ) { |
| 151 | +TokenTransformer.prototype.transformTokens = function ( tokens, accum, delta ) { |
152 | 152 | if ( accum === undefined ) { |
153 | 153 | this.reset(); |
154 | 154 | accum = this.accum; |
155 | | - } else { |
156 | | - // Prepare to replace the last token in the current accumulator. |
157 | | - accum.pop(); |
158 | 155 | } |
| 156 | + |
| 157 | + //console.log('transformTokens: ' + JSON.stringify(tokens) + JSON.stringify(accum.accum) ); |
| 158 | + |
159 | 159 | var tokenCTX = new TokenContext(undefined, accum, this, undefined); |
160 | | - for ( var i = 0, l = tokens.length; i < l; i++ ) { |
161 | | - tokenCTX.lastToken = tokenCTX.token; |
| 160 | + var origLen = tokens.length; |
| 161 | + for ( var i = 0; i < tokens.length; i++ ) { |
| 162 | + tokenCTX.lastToken = tokenCTX.token; // XXX: Fix for re-entrant case! |
162 | 163 | tokenCTX.token = tokens[i]; |
| 164 | + tokenCTX.pos = i; |
| 165 | + tokenCTX.accum = accum; |
163 | 166 | var ts; |
164 | 167 | switch(tokenCTX.token.type) { |
165 | 168 | case 'TAG': |
— | — | @@ -186,52 +189,67 @@ |
187 | 190 | // Splice in the returned tokens (while replacing the original |
188 | 191 | // token), and process them next. |
189 | 192 | [].splice.apply(tokens, [i, 1].concat(tokenCTX.token)); |
190 | | - l += res.token.length - 1; |
| 193 | + //l += tokenCTX.token.length - 1; |
191 | 194 | i--; // continue at first inserted token |
192 | 195 | } else if (tokenCTX.token) { |
193 | | - // push to accumulator (not necessarily the last one) |
| 196 | + // push to accumulator |
194 | 197 | accum.push(tokenCTX.token); |
195 | 198 | } |
196 | 199 | // Update current accum, in case a new one was spliced in by a |
197 | 200 | // transformation starting asynch work. |
198 | 201 | accum = tokenCTX.accum; |
199 | 202 | } |
200 | | - this.finish(); |
| 203 | + |
| 204 | + if ( delta === undefined ) { |
| 205 | + delta = 1; |
| 206 | + } |
| 207 | + |
| 208 | + this.finish( delta ); |
201 | 209 | }; |
202 | 210 | |
203 | | -TokenTransformer.prototype.finish = function ( ) { |
204 | | - this.outstanding--; |
| 211 | +TokenTransformer.prototype.finish = function ( delta ) { |
| 212 | + this.outstanding -= delta; |
205 | 213 | if ( this.outstanding === 0 ) { |
206 | 214 | // Join the token accumulators back into a single token list |
207 | 215 | var a = this.firstaccum; |
208 | 216 | var tokens = a.accum; |
209 | | - while ( a.next !== undefined ) { |
| 217 | + while ( a.next !== null ) { |
210 | 218 | a = a.next; |
211 | | - tokens.concat(a.accum); |
| 219 | + tokens = tokens.concat(a.accum); |
212 | 220 | } |
| 221 | + //console.log('TOKENS: ' + JSON.stringify(tokens, null, 2)); |
213 | 222 | // Call our callback with the flattened token list |
214 | 223 | this.cb(tokens); |
215 | 224 | } |
216 | 225 | }; |
217 | 226 | |
218 | 227 | /* Start a new accumulator for asynchronous work. */ |
219 | | -TokenTransformer.prototype.newAccumulator = function ( ) { |
220 | | - this.outstanding++; |
221 | | - return this.accum.insertAccumulator( ); |
| 228 | +TokenTransformer.prototype.newAccumulator = function ( accum, count ) { |
| 229 | + if ( count !== undefined ) { |
| 230 | + this.outstanding += count; |
| 231 | + } else { |
| 232 | + this.outstanding++; |
| 233 | + } |
| 234 | + if ( accum === undefined ) { |
| 235 | + accum = this.accum; |
| 236 | + } |
| 237 | + return accum.insertAccumulator( ); |
222 | 238 | }; |
223 | 239 | |
224 | 240 | // Token accumulators in a linked list. Using a linked list simplifies async |
225 | 241 | // callbacks for template expansions. |
226 | 242 | function TokenAccumulator ( next, tokens ) { |
227 | 243 | this.next = next; |
228 | | - if ( tokens ) |
| 244 | + if ( tokens ) { |
229 | 245 | this.accum = tokens; |
230 | | - else |
| 246 | + } else { |
231 | 247 | this.accum = []; |
| 248 | + } |
| 249 | + return this; |
232 | 250 | } |
233 | 251 | |
234 | 252 | TokenAccumulator.prototype.push = function ( token ) { |
235 | | - this.accum.push(token); |
| 253 | + return this.accum.push(token); |
236 | 254 | }; |
237 | 255 | |
238 | 256 | TokenAccumulator.prototype.pop = function ( ) { |
— | — | @@ -239,7 +257,7 @@ |
240 | 258 | }; |
241 | 259 | |
242 | 260 | TokenAccumulator.prototype.insertAccumulator = function ( ) { |
243 | | - this.next = new TokenAccumulator(this.next, tokens); |
| 261 | + this.next = new TokenAccumulator(this.next); |
244 | 262 | return this.next; |
245 | 263 | }; |
246 | 264 | |
Index: trunk/extensions/VisualEditor/modules/parser/ext.core.QuoteTransformer.js |
— | — | @@ -0,0 +1,211 @@ |
| 2 | +/* |
| 3 | + * Italic/Bold handling. |
| 4 | + * |
| 5 | + * - list of tokens |
| 6 | + * - NEWLINE |
| 7 | + * - ticks (2+) -> list with link in line token list? |
| 8 | + * - process on newline |
| 9 | + * - need access to text nodes before for conversion back to text |
| 10 | + */ |
| 11 | + |
| 12 | +function QuoteTransformer ( ) { |
| 13 | + this.italics = []; |
| 14 | + this.bolds = []; |
| 15 | + this.inserted = 0; |
| 16 | +} |
| 17 | + |
| 18 | +QuoteTransformer.prototype.register = function ( tokenTransformer ) { |
| 19 | + // Register for NEWLINE and QUOTE tag tokens |
| 20 | + var self = this; |
| 21 | + tokenTransformer.appendListener( function (ctx) { |
| 22 | + return self.onNewLine(ctx); |
| 23 | + }, 'newline' ); |
| 24 | + tokenTransformer.appendListener( function (ctx) { |
| 25 | + return self.onQuote(ctx); |
| 26 | + }, 'tag', 'QUOTE' ); |
| 27 | +}; |
| 28 | + |
| 29 | +// Extract a copy of the token context with the info we need |
| 30 | +QuoteTransformer.prototype.ctx = function ( tokenCTX ) { |
| 31 | + return { |
| 32 | + accum: tokenCTX.accum, |
| 33 | + token: tokenCTX.token, |
| 34 | + lastToken: tokenCTX.lastToken, |
| 35 | + pos: tokenCTX.pos |
| 36 | + }; |
| 37 | +}; |
| 38 | + |
| 39 | +QuoteTransformer.prototype.onQuote = function ( tokenCTX ) { |
| 40 | + // depending on length, add starting 's to preceding text node |
| 41 | + // (if any) |
| 42 | + // add token index to italic/bold lists |
| 43 | + // add placeholder for token |
| 44 | + var token = tokenCTX.token, |
| 45 | + qlen = token.value.length, |
| 46 | + out = null, |
| 47 | + lastToken = tokenCTX.lastToken, |
| 48 | + ctx = this.ctx(tokenCTX), |
| 49 | + ctx2, |
| 50 | + accum = tokenCTX.accum; |
| 51 | + switch (qlen) { |
| 52 | + case 2: |
| 53 | + accum = tokenCTX.transformer.newAccumulator(accum); |
| 54 | + this.italics.push(ctx); |
| 55 | + break; |
| 56 | + case 3: |
| 57 | + accum = tokenCTX.transformer.newAccumulator(accum); |
| 58 | + this.bolds.push(ctx); |
| 59 | + break; |
| 60 | + case 4: |
| 61 | + if (lastToken && lastToken.type === 'TEXT') { |
| 62 | + lastToken.value += "'"; |
| 63 | + } else { |
| 64 | + out = {type: 'TEXT', value: "'"}; |
| 65 | + } |
| 66 | + accum = tokenCTX.transformer.newAccumulator(accum); |
| 67 | + this.bolds.push(ctx); |
| 68 | + break; |
| 69 | + case 5: |
| 70 | + // order does not matter here, will be fixed |
| 71 | + // by HTML tree builder |
| 72 | + accum = tokenCTX.transformer.newAccumulator(accum, 2); |
| 73 | + this.italics.push(ctx); |
| 74 | + ctx2 = this.ctx(tokenCTX); |
| 75 | + ctx2.token = {attribs: ctx.token.attribs}; |
| 76 | + this.bolds.push(ctx2); |
| 77 | + break; |
| 78 | + default: // longer than 5, only use the last 5 ticks |
| 79 | + var newvalue = token.value.substr(0, qlen - 5 ); |
| 80 | + if (lastToken && lastToken.type === 'TEXT') { |
| 81 | + lastToken.value += newvalue; |
| 82 | + } else { |
| 83 | + out = {type: 'TEXT', value: newvalue}; |
| 84 | + } |
| 85 | + accum = tokenCTX.transformer.newAccumulator(accum, 2); |
| 86 | + this.italics.push(ctx); |
| 87 | + ctx2 = this.ctx(tokenCTX); |
| 88 | + ctx2.token = {attribs: ctx.token.attribs}; |
| 89 | + this.bolds.push(ctx2); |
| 90 | + break; |
| 91 | + } |
| 92 | + tokenCTX.token = out; |
| 93 | + tokenCTX.accum = accum; |
| 94 | + return tokenCTX; |
| 95 | +}; |
| 96 | + |
| 97 | +QuoteTransformer.prototype.onNewLine = function ( tokenCTX ) { |
| 98 | + if(!this.bolds && !this.italics) { |
| 99 | + // Nothing to do, quick abort. |
| 100 | + return tokenCTX; |
| 101 | + } |
| 102 | + //console.log("onNewLine: " + this.italics + this.bolds); |
| 103 | + // balance out tokens, convert placeholders into tags |
| 104 | + if (this.italics.length % 2 && this.bolds.length % 2) { |
| 105 | + var firstsingleletterword = -1, |
| 106 | + firstmultiletterword = -1, |
| 107 | + firstspace = -1; |
| 108 | + for (var j = 0; j < this.bolds.length; j++) { |
| 109 | + var ctx = this.bolds[j]; |
| 110 | + //console.log("balancing!" + JSON.stringify(ctx.lastToken, null, 2)); |
| 111 | + if (ctx.lastToken) { |
| 112 | + if (ctx.lastToken.type === 'TEXT') { |
| 113 | + var lastchar = ctx.lastToken.value[ctx.lastToken.value.length - 1], |
| 114 | + secondtolastchar = ctx.lastToken.value[ctx.lastToken.value.length - 2]; |
| 115 | + if (lastchar === ' ' && firstspace === -1) { |
| 116 | + firstspace = j; |
| 117 | + } else if (lastchar !== ' ') { |
| 118 | + if ( secondtolastchar === ' ' && |
| 119 | + firstsingleletterword === -1) |
| 120 | + { |
| 121 | + firstsingleletterword = j; |
| 122 | + } else if ( firstmultiletterword == -1) { |
| 123 | + firstmultiletterword = j; |
| 124 | + } |
| 125 | + } |
| 126 | + } else if ( ( ctx.lastToken.type === 'NEWLINE' || |
| 127 | + ctx.lastToken.type === 'TAG' ) && |
| 128 | + firstspace == -1 ) { |
| 129 | + firstmultiletterword = j; |
| 130 | + } |
| 131 | + } |
| 132 | + } |
| 133 | + |
| 134 | + |
| 135 | + // now see if we can convert a bold to an italic and |
| 136 | + // an apostrophe |
| 137 | + if (firstsingleletterword > -1) { |
| 138 | + this.convertBold(firstsingleletterword); |
| 139 | + } else if (firstmultiletterword > -1) { |
| 140 | + this.convertBold(firstmultiletterword); |
| 141 | + } else if (firstspace > -1) { |
| 142 | + this.convertBold(firstspace); |
| 143 | + } |
| 144 | + } |
| 145 | + |
| 146 | + this.quotesToTags(this.italics, 'i', tokenCTX.transformer); |
| 147 | + this.quotesToTags(this.bolds, 'b', tokenCTX.transformer); |
| 148 | + |
| 149 | + this.bolds = []; |
| 150 | + this.italics = []; |
| 151 | + |
| 152 | + // Pass through the NEWLINE token unchanged |
| 153 | + return tokenCTX; |
| 154 | +}; |
| 155 | + |
| 156 | +QuoteTransformer.prototype.convertBold = function ( i ) { |
| 157 | + var ctx = this.bolds[i]; |
| 158 | + //console.log('convertbold!'); |
| 159 | + if ( ctx.lastToken && ctx.lastToken.type === 'TEXT' ) { |
| 160 | + ctx.lastToken.value += "'"; |
| 161 | + } else { |
| 162 | + // Add a text token! |
| 163 | + ctx.token = [{type: 'TEXT', value: "'"}, ctx.token]; |
| 164 | + } |
| 165 | + |
| 166 | + this.bolds.splice(i, 1); |
| 167 | + |
| 168 | + this.italics.push(ctx); |
| 169 | + this.italics.sort(function(a,b) { return a.pos - b.pos; } ); |
| 170 | + //console.log(this.italics.map(function(a) { return a.pos })); |
| 171 | + //console.log(this.bolds.map(function(a) { return a.pos })); |
| 172 | +}; |
| 173 | + |
| 174 | +// convert italics/bolds into tags |
| 175 | +QuoteTransformer.prototype.quotesToTags = function ( contexts, name, transformer ) { |
| 176 | + var toggle = true, |
| 177 | + t, |
| 178 | + out = []; |
| 179 | + for (var j = 0; j < contexts.length; j++) { |
| 180 | + t = contexts[j].token; |
| 181 | + |
| 182 | + if ( $.isArray(t) ) { |
| 183 | + // Slip in a text token from bold to italic rebalancing |
| 184 | + var realToken = t.pop(); |
| 185 | + transformer.transformTokens( t, contexts[j].accum, 0 ); |
| 186 | + t = realToken; |
| 187 | + } |
| 188 | + |
| 189 | + if(toggle) { |
| 190 | + t.type = 'TAG'; |
| 191 | + } else { |
| 192 | + t.type = 'ENDTAG'; |
| 193 | + } |
| 194 | + t.name = name; |
| 195 | + delete t.value; |
| 196 | + toggle = !toggle; |
| 197 | + // Re-add and process the new token with the original accumulator |
| 198 | + transformer.transformTokens( [t], contexts[j].accum, 0 ); |
| 199 | + } |
| 200 | + var l = contexts.length; |
| 201 | + if (!toggle) { |
| 202 | + // add end tag, but don't count it towards the finish |
| 203 | + transformer.transformTokens( [{type: 'ENDTAG', name: name}], |
| 204 | + contexts[contexts.length - 1].accum, 0 ); |
| 205 | + } |
| 206 | + // now allow the transformer to finish |
| 207 | + transformer.finish( contexts.length ); |
| 208 | +}; |
| 209 | + |
| 210 | +if (typeof module == "object") { |
| 211 | + module.exports.QuoteTransformer = QuoteTransformer; |
| 212 | +} |
Property changes on: trunk/extensions/VisualEditor/modules/parser/ext.core.QuoteTransformer.js |
___________________________________________________________________ |
Added: svn:eol-style |
1 | 213 | + native |
Index: trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt |
— | — | @@ -183,163 +183,7 @@ |
184 | 184 | return out; |
185 | 185 | }; |
186 | 186 | |
187 | | - /* |
188 | | - * Italic/Bold handling. |
189 | | - * |
190 | | - * - list of tokens |
191 | | - * - NEWLINE |
192 | | - * - ticks (2+) -> list with link in line token list? |
193 | | - * - process on newline |
194 | | - * - need access to text nodes before/after for conversion back to text |
195 | | - */ |
196 | | - var doQuotes = function ( tokens ) { |
197 | 187 | |
198 | | - var italics = [], |
199 | | - bolds = [], |
200 | | - out = [], |
201 | | - inserted = 0; |
202 | | - |
203 | | - var convertBold = function ( i ) { |
204 | | - var index = bolds[i]; |
205 | | - var txt = out[index - 1]; |
206 | | - txt.value += "'"; |
207 | | - if ( i > 0 ) { |
208 | | - bolds = bolds.slice(0, i) |
209 | | - .concat(bolds.slice(i + 1, bolds.length - i - 1)); |
210 | | - } else { |
211 | | - bolds.shift(); |
212 | | - } |
213 | | - |
214 | | - italics.push(index); |
215 | | - italics.sort(function(a,b) { return a - b }); |
216 | | - }; |
217 | | - |
218 | | - // convert italics/bolds into tags |
219 | | - var quotesToTags = function ( offsets, name ) { |
220 | | - var toggle = true; |
221 | | - for (var j = 0; j < offsets.length; j++) { |
222 | | - var t = out[offsets[j]]; |
223 | | - if(toggle) { |
224 | | - t.type = 'TAG'; |
225 | | - } else { |
226 | | - t.type = 'ENDTAG'; |
227 | | - } |
228 | | - t.name = name; |
229 | | - delete t.value; |
230 | | - toggle = !toggle; |
231 | | - } |
232 | | - if (!toggle) { |
233 | | - // add end tag |
234 | | - out.push({type: 'ENDTAG', name: name}); |
235 | | - inserted++; |
236 | | - } |
237 | | - toggle = true; |
238 | | - }; |
239 | | - |
240 | | - for (var i = 0, length = tokens.length; i < length; i++) { |
241 | | - var token = tokens[i]; |
242 | | - switch (token.type) { |
243 | | - case 'QUOTE': |
244 | | - // depending on length, add starting 's to preceding text node |
245 | | - // (if any) |
246 | | - // add token index to italic/bold lists |
247 | | - // add placeholder for token |
248 | | - var qlen = token.value.length; |
249 | | - switch (qlen) { |
250 | | - case 2: italics.push(i + inserted); out.push(token); break; |
251 | | - case 3: bolds.push(i + inserted); out.push(token); break; |
252 | | - case 4: |
253 | | - token.value = "'''"; |
254 | | - if (i > 0 && tokens[i-1].type === 'TEXT') { |
255 | | - tokens[i-1].value += "'"; |
256 | | - } else { |
257 | | - out.push({type: 'TEXT', value: "'"}); |
258 | | - inserted++; |
259 | | - } |
260 | | - bolds.push(i + inserted); |
261 | | - out.push(token); |
262 | | - break; |
263 | | - case 5: |
264 | | - // order does not matter here, will be fixed |
265 | | - // by HTML parser backend |
266 | | - italics.push(i + inserted); |
267 | | - out.push({type: 'QUOTE', value: "''"}); |
268 | | - inserted++; |
269 | | - bolds.push(i + inserted); |
270 | | - out.push({type: 'QUOTE', value: "'''"}); |
271 | | - break; |
272 | | - default: // longer than 5, only use the last 5 ticks |
273 | | - token.value = "'''''"; |
274 | | - var newvalue = token.value.substr(0, qlen - 5 ); |
275 | | - if (i > 0 && tokens[i-1].type === 'TEXT') { |
276 | | - tokens[i-1].value += newvalue; |
277 | | - } else { |
278 | | - out.push({type: 'TEXT', value: newvalue}); |
279 | | - inserted++; |
280 | | - } |
281 | | - italics.push(i + inserted); |
282 | | - out.push({type: 'QUOTE', value: "''"}); |
283 | | - inserted++; |
284 | | - bolds.push(i + inserted); |
285 | | - out.push({type: 'QUOTE', value: "'''"}); |
286 | | - break; |
287 | | - } |
288 | | - break; |
289 | | - |
290 | | - case 'NEWLINE': |
291 | | - // balance out tokens, convert placeholders into tags |
292 | | - if (italics.length % 2 && bolds.length % 2) { |
293 | | - dp("balancing!"); |
294 | | - var firstsingleletterword = -1, |
295 | | - firstmultiletterword = -1, |
296 | | - firstspace = -1; |
297 | | - for (var j = 0; j < bolds.length; j++) { |
298 | | - var ticki = bolds[j]; |
299 | | - if (ticki > 0 && out[ticki - 1].type === 'TEXT') { |
300 | | - var txt = out[ticki - 1], |
301 | | - lastchar = txt.value[txt.value.length - 1], |
302 | | - secondtolastchar = txt.value[txt.value.length - 2]; |
303 | | - dp('txt: ' + pp(txt)); |
304 | | - if (lastchar === ' ' && firstspace === -1) { |
305 | | - firstspace = j; |
306 | | - } else if (lastchar !== ' ') { |
307 | | - if ( secondtolastchar === ' ' && |
308 | | - firstsingleletterword === -1) |
309 | | - { |
310 | | - firstsingleletterword = j; |
311 | | - } else if ( firstmultiletterword == -1) { |
312 | | - firstmultiletterword = j; |
313 | | - } |
314 | | - } |
315 | | - } |
316 | | - } |
317 | | - |
318 | | - |
319 | | - // now see if we can convert a bold to an italic and |
320 | | - // an apostrophe |
321 | | - if (firstsingleletterword > -1) { |
322 | | - convertBold(firstsingleletterword); |
323 | | - } else if (firstmultiletterword > -1) { |
324 | | - convertBold(firstmultiletterword); |
325 | | - } else if (firstspace > -1) { |
326 | | - convertBold(firstspace); |
327 | | - } |
328 | | - } |
329 | | - |
330 | | - quotesToTags(bolds, 'b'); |
331 | | - quotesToTags(italics, 'i'); |
332 | | - bolds = []; |
333 | | - italics = []; |
334 | | - out.push(token); |
335 | | - break; |
336 | | - default: |
337 | | - out.push(token); |
338 | | - } |
339 | | - } |
340 | | - return out; |
341 | | - }; |
342 | | - |
343 | | - |
344 | 188 | /* End static utilities */ |
345 | 189 | |
346 | 190 | /* |
— | — | @@ -404,8 +248,7 @@ |
405 | 249 | |
406 | 250 | start |
407 | 251 | = e:toplevelblock* newline* { |
408 | | - // XXX: move doQuotes out to general token stream transformer |
409 | | - return doQuotes(flatten(e)); |
| 252 | + return flatten(e); |
410 | 253 | } |
411 | 254 | |
412 | 255 | |
— | — | @@ -499,7 +342,14 @@ |
500 | 343 | |
501 | 344 | // Start of line |
502 | 345 | sol = (newline / & { return pos === 0; } { return true; }) |
503 | | - cn:(c:comment n:newline? { return [c, {type: 'TEXT', value: n}] })* { |
| 346 | + cn:(c:comment n:newline? { |
| 347 | + if ( n !== '' ) { |
| 348 | + return [c, {type: 'TEXT', value: n}]; |
| 349 | + } else { |
| 350 | + return [c]; |
| 351 | + } |
| 352 | + } |
| 353 | + )* { |
504 | 354 | return [{type: 'NEWLINE'}].concat(cn); |
505 | 355 | } |
506 | 356 | |
— | — | @@ -548,7 +398,7 @@ |
549 | 399 | block_lines |
550 | 400 | = s:sol |
551 | 401 | // eat an empty line before the block |
552 | | - s2:(ss:space* so:sol { return [{type: 'TEXT', value: ss.join('')}].concat(so) })? |
| 402 | + s2:(os:optionalSpaceToken so:sol { return os.concat(so) })? |
553 | 403 | bl:block_line { |
554 | 404 | var s2_ = (s2 !== '') ? s2 : []; |
555 | 405 | return s.concat(s2_, bl); |
— | — | @@ -605,7 +455,9 @@ |
606 | 456 | for (var i = 0, l = c.length; i < l; i++) { |
607 | 457 | var ci = c[i]; |
608 | 458 | if (typeof ci == 'string') { |
609 | | - text.push(ci); |
| 459 | + if(ci !== '') { |
| 460 | + text.push(ci); |
| 461 | + } |
610 | 462 | } else { |
611 | 463 | if (text.length) { |
612 | 464 | out.push({ type: "TEXT", value: text.join('') }); |
— | — | @@ -630,7 +482,9 @@ |
631 | 483 | for (var i = 0; i < c.length; i++) { |
632 | 484 | var ci = c[i] |
633 | 485 | if (typeof ci == 'string') { |
634 | | - text.push(ci); |
| 486 | + if(ci !== '') { |
| 487 | + text.push(ci); |
| 488 | + } |
635 | 489 | } else { |
636 | 490 | if (text.length) { |
637 | 491 | out.push({type: 'TEXT', value: text.join('')}); |
— | — | @@ -900,7 +754,8 @@ |
901 | 755 | * all not context free. */ |
902 | 756 | quote = "''" x:"'"* { |
903 | 757 | return { |
904 | | - type : 'QUOTE', |
| 758 | + type: 'TAG', |
| 759 | + name : 'QUOTE', |
905 | 760 | value: "''" + x.join('') |
906 | 761 | } |
907 | 762 | } |