Index: trunk/extensions/VisualEditor/tests/parser/parserTests-whitelist.js |
— | — | @@ -10,14 +10,15 @@ |
11 | 11 | |
12 | 12 | testWhiteList["Bug 2702: Mismatched <i>, <b> and <a> tags are invalid"] = "<p><i><a href=\"http://example.com\">text</a></i><a href=\"http://example.com\" data-sourcePos=\"30:61\"><b>text</b></a><i data-sourcePos=\"62:106\">Something <a href=\"http://example.com\">in italic</a></i><i data-sourcePos=\"107:164\">Something <a href=\"http://example.com\">mixed</a></i><a href=\"http://example.com\"><b>, even bold</b></a><i data-sourcePos=\"165:204\"><b data-sourcePos=\"165:204\">Now <a href=\"http://example.com\">both</a></b></i></p>"; |
13 | 13 | |
14 | | -testWhiteList["Unclosed and unmatched quotes"] = "<p><i><b>Bold italic text </b>with bold deactivated<b> in between.</b></i></p><p><i><b>Bold italic text </b></i><b>with italic deactivated<i> in between.</i></b></p><p><b></b>Bold text..</p><p>..spanning two paragraphs (should not work).<b></b></p><p><b></b>Bold tag left open</p><p><i></i>Italic tag left open</p><p>Normal text.<!-- Unmatching number of opening, closing tags: -->\n</p><p><b>This year'</b>s election <i>should</i> beat <b>last year'</b>s.</p><p><i>Tom<b>s car is bigger than </b></i><b>Susan</b>s.</p>"; |
| 14 | +testWhiteList["Unclosed and unmatched quotes"] = "<p data-sourcePos=\"0:66\"><i><b>Bold italic text </b>with bold deactivated<b> in between.</b></i></p><p><i><b>Bold italic text </b></i><b>with italic deactivated<i> in between.</i></b></p><p><b>Bold text..</b></p><p>..spanning two paragraphs (should not work).<b></b></p><p><b>Bold tag left open</b></p><p><i>Italic tag left open</i></p><p>Normal text.<!-- Unmatching number of opening, closing tags: -->\n</p><p><b>This year'</b>s election <i>should</i> beat <b>last year'</b>s.</p><p><i>Tom<b>s car is bigger than </b></i><b>Susan</b>s.</p>"; |
15 | 15 | |
16 | | -testWhiteList["Link containing double-single-quotes '' in text embedded in italics (bug 4598 sanity check)"] = "<p><i>Some <a data-type=\"internal\" href=\"Link\">pretty </a></i><a data-type=\"internal\" href=\"Link\">italics<i></i> and stuff</a>!</p>"; |
| 16 | +// The expected result for this test is really broken html. |
| 17 | +testWhiteList["Link containing double-single-quotes '' in text embedded in italics (bug 4598 sanity check)"] = "<p data-sourcePos=\"0:45\"><i>Some <a data-type=\"internal\" href=\"Link\">pretty </a></i><a data-type=\"internal\" href=\"Link\">italics<i> and stuff</i></a><i>!</i></p>"; |
17 | 18 | |
18 | 19 | testWhiteList["External link containing double-single-quotes in text embedded in italics (bug 4598 sanity check)"] = "<p><i>Some <a href=\"http://example.com/\">pretty </a></i><a href=\"http://example.com/\">italics<i> and stuff</i></a><i>!</i></p>"; |
19 | 20 | |
20 | 21 | // This is a rare edge case, and the new behavior is arguably more consistent |
21 | | -testWhiteList["5 quotes, code coverage +1 line"] = "<p><i>'</i></p>"; |
| 22 | +testWhiteList["5 quotes, code coverage +1 line"] = "<p>'<i></i></p>"; |
22 | 23 | |
23 | 24 | |
24 | 25 | // empty table tags / with only a caption are legal in HTML5. |
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.TokenTransformDispatcher.js |
— | — | @@ -127,7 +127,7 @@ |
128 | 128 | } |
129 | 129 | transArr.push(transformer); |
130 | 130 | // sort ascending by rank |
131 | | - transArr.sort( function ( t1, t2 ) { return t1.rank - t2.rank; } ); |
| 131 | + transArr.sort( this._cmpTransformations ); |
132 | 132 | }; |
133 | 133 | |
134 | 134 | /** |
— | — | @@ -185,6 +185,12 @@ |
186 | 186 | } |
187 | 187 | }; |
188 | 188 | |
| 189 | +/** |
| 190 | + * Comparison for sorting transformations by ascending rank. |
| 191 | + */ |
| 192 | +TokenTransformDispatcher.prototype._cmpTransformations = function ( a, b ) { |
| 193 | + return a.rank - b.rank; |
| 194 | +}; |
189 | 195 | |
190 | 196 | /* Call all transformers on a tag. |
191 | 197 | * |
— | — | @@ -206,8 +212,10 @@ |
207 | 213 | tName = token.name.toLowerCase(), |
208 | 214 | tagts = this.transformers[phaseEndRank].tag[tName]; |
209 | 215 | |
210 | | - if ( tagts ) { |
| 216 | + if ( tagts && tagts.length ) { |
| 217 | + // could cache this per tag type to avoid re-sorting each time |
211 | 218 | ts = ts.concat(tagts); |
| 219 | + ts.sort( this._cmpTransformations ); |
212 | 220 | } |
213 | 221 | //console.log(JSON.stringify(ts, null, 2)); |
214 | 222 | if ( ts ) { |
— | — | @@ -252,7 +260,11 @@ |
253 | 261 | */ |
254 | 262 | TokenTransformDispatcher.prototype._transformToken = function ( token, cb, phaseEndRank, frame, ts ) { |
255 | 263 | // prepend 'any' transformers |
256 | | - ts = this.transformers[phaseEndRank].any.concat(ts); |
| 264 | + var anyTrans = this.transformers[phaseEndRank].any; |
| 265 | + if ( anyTrans.length ) { |
| 266 | + ts = this.transformers[phaseEndRank].any.concat(ts); |
| 267 | + ts.sort( this._cmpTransformations ); |
| 268 | + } |
257 | 269 | var transformer, |
258 | 270 | res = { token: token }, |
259 | 271 | aborted = false; |
Index: trunk/extensions/VisualEditor/modules/parser/ext.core.QuoteTransformer.js |
— | — | @@ -5,20 +5,25 @@ |
6 | 6 | */ |
7 | 7 | |
8 | 8 | function QuoteTransformer ( ) { |
9 | | - // Bold and italic tokens are collected in these lists, and then processed |
10 | | - // in onNewLine. |
11 | 9 | this.quoteAndNewlineRank = 2.1; |
12 | 10 | this.anyRank = 2.101; // Just after regular quote and newline |
13 | 11 | this.reset(); |
14 | 12 | } |
15 | 13 | |
16 | 14 | QuoteTransformer.prototype.reset = function ( ) { |
17 | | - this.italics = []; |
18 | | - this.bolds = []; |
| 15 | + // A chunk starts with a token context around a quote token and is |
| 16 | + // (optionally) followed by non-quote tokens. The quote token and its |
| 17 | + // context is later replaced with the actual tag token for italic or bold. |
19 | 18 | this.currentChunk = []; |
20 | 19 | // List of chunks, each starting with a (potentially) bold or italic token |
21 | 20 | // and followed by plain tokens. |
22 | 21 | this.chunks = []; |
| 22 | + // References to chunks in which the first token context / quote token |
| 23 | + // should be converted to italic or bold tokens. |
| 24 | + this.italics = []; |
| 25 | + this.bolds = []; |
| 26 | + |
| 27 | + this.isActive = false; |
23 | 28 | }; |
24 | 29 | |
25 | 30 | |
— | — | @@ -37,19 +42,15 @@ |
38 | 43 | |
39 | 44 | // Make a copy of the token context |
40 | 45 | QuoteTransformer.prototype._startNewChunk = function ( ) { |
41 | | - this.currentChunk.pos = this.chunks.length; |
42 | 46 | this.chunks.push( this.currentChunk ); |
43 | 47 | this.currentChunk = []; |
| 48 | + this.currentChunk.pos = this.chunks.length - 1; |
44 | 49 | }; |
45 | 50 | |
46 | 51 | // Handle QUOTE tags. These are collected in italic/bold lists depending on |
47 | 52 | // the length of quote string. Actual analysis and conversion to the |
48 | 53 | // appropriate tag tokens is deferred until the next NEWLINE token triggers |
49 | 54 | // onNewLine. |
50 | | -// |
51 | | -// XXX: Cannot use async stuff here, need to buffer things locally instead! |
52 | | -// FIXME: Convert to internal buffering! -> return all tokens with rank set to |
53 | | -// own rank to avoid reprocessing |
54 | 55 | QuoteTransformer.prototype.onQuote = function ( token, cb, frame, prevToken ) { |
55 | 56 | var qlen = token.value.length, |
56 | 57 | tokens = [], // output tokens |
— | — | @@ -66,9 +67,10 @@ |
67 | 68 | }; |
68 | 69 | |
69 | 70 | |
70 | | - if ( this.chunks.length === 0 ) { |
| 71 | + if ( ! this.isActive ) { |
71 | 72 | // register for any token if not yet active |
72 | | - this.dispatcher.addTransform( this.onAny.bind(this), this.anyRank, 'tag', 'mw-quote' ); |
| 73 | + this.dispatcher.addTransform( this.onAny.bind(this), this.anyRank, 'any' ); |
| 74 | + this.isActive = true; |
73 | 75 | } |
74 | 76 | |
75 | 77 | this._startNewChunk(); |
— | — | @@ -114,7 +116,7 @@ |
115 | 117 | break; |
116 | 118 | } |
117 | 119 | |
118 | | - return { token: null }; |
| 120 | + return {}; |
119 | 121 | }; |
120 | 122 | |
121 | 123 | QuoteTransformer.prototype.onAny = function ( token, cb, frame, prevToken ) { |
— | — | @@ -128,28 +130,28 @@ |
129 | 131 | QuoteTransformer.prototype.onNewLine = function ( token, cb, frame, prevToken ) { |
130 | 132 | var res; |
131 | 133 | |
132 | | - if( ! this.chunks.length ) { |
| 134 | + if( ! this.isActive ) { |
133 | 135 | // Nothing to do, quick abort. |
134 | 136 | return { token: token }; |
135 | 137 | } |
136 | 138 | |
137 | 139 | |
138 | 140 | token.rank = this.quoteAndNewlineRank; |
139 | | - this.currentChunk.push( token ); |
140 | | - this._startNewChunk(); |
141 | 141 | |
142 | | - //console.log("onNewLine: " + this.italics + this.bolds); |
| 142 | + //console.log('chunks: ' + JSON.stringify( this.chunks, null, 2 ) ); |
| 143 | + |
| 144 | + //console.log("onNewLine: " + this.italics.length + 'i/b' + this.bolds.length); |
143 | 145 | // balance out tokens, convert placeholders into tags |
144 | 146 | if (this.italics.length % 2 && this.bolds.length % 2) { |
145 | 147 | var firstsingleletterword = -1, |
146 | 148 | firstmultiletterword = -1, |
147 | 149 | firstspace = -1; |
148 | 150 | for (var j = 0; j < this.bolds.length; j++) { |
149 | | - var ctx = this.bolds[j]; |
| 151 | + var ctx = this.bolds[j][0]; |
150 | 152 | //console.log("balancing!" + JSON.stringify(ctx.prevToken, null, 2)); |
151 | 153 | if (ctx.prevToken) { |
152 | 154 | if (ctx.prevToken.type === 'TEXT') { |
153 | | - var lastchar = ctx.prevToken.value[ctx.prevToken.value.length - 1], |
| 155 | + var lastchar = prevToken.value[ctx.prevToken.value.length - 1], |
154 | 156 | secondtolastchar = ctx.prevToken.value[ctx.prevToken.value.length - 2]; |
155 | 157 | if (lastchar === ' ' && firstspace === -1) { |
156 | 158 | firstspace = j; |
— | — | @@ -189,12 +191,15 @@ |
190 | 192 | this.quotesToTags( this.italics, 'i' ); |
191 | 193 | this.quotesToTags( this.bolds, 'b' ); |
192 | 194 | |
| 195 | + this.currentChunk.push( token ); |
| 196 | + this._startNewChunk(); |
| 197 | + |
193 | 198 | //console.log('chunks: ' + JSON.stringify( this.chunks, null, 2 ) ); |
194 | 199 | |
195 | 200 | // return all collected tokens including the newline |
196 | 201 | res = { tokens: [].concat.apply([], this.chunks) }; |
197 | 202 | |
198 | | - // prepare for next session |
| 203 | + // prepare for next line |
199 | 204 | this.reset(); |
200 | 205 | |
201 | 206 | // remove 'any' registration |
— | — | @@ -246,7 +251,7 @@ |
247 | 252 | toggle = !toggle; |
248 | 253 | } |
249 | 254 | if (!toggle) { |
250 | | - // Add end tag, but don't count it towards completion. |
| 255 | + // Add end tag |
251 | 256 | this.currentChunk.push( {type: 'ENDTAG', name: name} ); |
252 | 257 | } |
253 | 258 | }; |