r108025 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r108024‎ | r108025 | r108026 >
Date:14:09, 4 January 2012
Author:gwicke
Status:deferred
Tags:
Comment:
Fix quote handling and tweak the whitelist a bit. 'any' token registrations
are now merged with specific registrations by rank. Not yet clear if that is a
good idea overall, need to check use cases when implementing template expansion
and other functionality.

183 parser test now passing.
Modified paths:
  • /trunk/extensions/VisualEditor/modules/parser/ext.core.QuoteTransformer.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.TokenTransformDispatcher.js (modified) (history)
  • /trunk/extensions/VisualEditor/tests/parser/parserTests-whitelist.js (modified) (history)

Diff [purge]

Index: trunk/extensions/VisualEditor/tests/parser/parserTests-whitelist.js
@@ -10,14 +10,15 @@
1111
1212 testWhiteList["Bug 2702: Mismatched <i>, <b> and <a> tags are invalid"] = "<p><i><a href=\"http://example.com\">text</a></i><a href=\"http://example.com\" data-sourcePos=\"30:61\"><b>text</b></a><i data-sourcePos=\"62:106\">Something <a href=\"http://example.com\">in italic</a></i><i data-sourcePos=\"107:164\">Something <a href=\"http://example.com\">mixed</a></i><a href=\"http://example.com\"><b>, even bold</b></a><i data-sourcePos=\"165:204\"><b data-sourcePos=\"165:204\">Now <a href=\"http://example.com\">both</a></b></i></p>";
1313
14 -testWhiteList["Unclosed and unmatched quotes"] = "<p><i><b>Bold italic text </b>with bold deactivated<b> in between.</b></i></p><p><i><b>Bold italic text </b></i><b>with italic deactivated<i> in between.</i></b></p><p><b></b>Bold text..</p><p>..spanning two paragraphs (should not work).<b></b></p><p><b></b>Bold tag left open</p><p><i></i>Italic tag left open</p><p>Normal text.<!-- Unmatching number of opening, closing tags: -->\n</p><p><b>This year'</b>s election <i>should</i> beat <b>last year'</b>s.</p><p><i>Tom<b>s car is bigger than </b></i><b>Susan</b>s.</p>";
 14+testWhiteList["Unclosed and unmatched quotes"] = "<p data-sourcePos=\"0:66\"><i><b>Bold italic text </b>with bold deactivated<b> in between.</b></i></p><p><i><b>Bold italic text </b></i><b>with italic deactivated<i> in between.</i></b></p><p><b>Bold text..</b></p><p>..spanning two paragraphs (should not work).<b></b></p><p><b>Bold tag left open</b></p><p><i>Italic tag left open</i></p><p>Normal text.<!-- Unmatching number of opening, closing tags: -->\n</p><p><b>This year'</b>s election <i>should</i> beat <b>last year'</b>s.</p><p><i>Tom<b>s car is bigger than </b></i><b>Susan</b>s.</p>";
1515
16 -testWhiteList["Link containing double-single-quotes '' in text embedded in italics (bug 4598 sanity check)"] = "<p><i>Some <a data-type=\"internal\" href=\"Link\">pretty </a></i><a data-type=\"internal\" href=\"Link\">italics<i></i> and stuff</a>!</p>";
 16+// The expected result for this test is really broken html.
 17+testWhiteList["Link containing double-single-quotes '' in text embedded in italics (bug 4598 sanity check)"] = "<p data-sourcePos=\"0:45\"><i>Some <a data-type=\"internal\" href=\"Link\">pretty </a></i><a data-type=\"internal\" href=\"Link\">italics<i> and stuff</i></a><i>!</i></p>";
1718
1819 testWhiteList["External link containing double-single-quotes in text embedded in italics (bug 4598 sanity check)"] = "<p><i>Some <a href=\"http://example.com/\">pretty </a></i><a href=\"http://example.com/\">italics<i> and stuff</i></a><i>!</i></p>";
1920
2021 // This is a rare edge case, and the new behavior is arguably more consistent
21 -testWhiteList["5 quotes, code coverage +1 line"] = "<p><i>'</i></p>";
 22+testWhiteList["5 quotes, code coverage +1 line"] = "<p>'<i></i></p>";
2223
2324
2425 // empty table tags / with only a caption are legal in HTML5.
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.TokenTransformDispatcher.js
@@ -127,7 +127,7 @@
128128 }
129129 transArr.push(transformer);
130130 // sort ascending by rank
131 - transArr.sort( function ( t1, t2 ) { return t1.rank - t2.rank; } );
 131+ transArr.sort( this._cmpTransformations );
132132 };
133133
134134 /**
@@ -185,6 +185,12 @@
186186 }
187187 };
188188
 189+/**
 190+ * Comparison for sorting transformations by ascending rank.
 191+ */
 192+TokenTransformDispatcher.prototype._cmpTransformations = function ( a, b ) {
 193+ return a.rank - b.rank;
 194+};
189195
190196 /* Call all transformers on a tag.
191197 *
@@ -206,8 +212,10 @@
207213 tName = token.name.toLowerCase(),
208214 tagts = this.transformers[phaseEndRank].tag[tName];
209215
210 - if ( tagts ) {
 216+ if ( tagts && tagts.length ) {
 217+ // could cache this per tag type to avoid re-sorting each time
211218 ts = ts.concat(tagts);
 219+ ts.sort( this._cmpTransformations );
212220 }
213221 //console.log(JSON.stringify(ts, null, 2));
214222 if ( ts ) {
@@ -252,7 +260,11 @@
253261 */
254262 TokenTransformDispatcher.prototype._transformToken = function ( token, cb, phaseEndRank, frame, ts ) {
255263 // prepend 'any' transformers
256 - ts = this.transformers[phaseEndRank].any.concat(ts);
 264+ var anyTrans = this.transformers[phaseEndRank].any;
 265+ if ( anyTrans.length ) {
 266+ ts = this.transformers[phaseEndRank].any.concat(ts);
 267+ ts.sort( this._cmpTransformations );
 268+ }
257269 var transformer,
258270 res = { token: token },
259271 aborted = false;
Index: trunk/extensions/VisualEditor/modules/parser/ext.core.QuoteTransformer.js
@@ -5,20 +5,25 @@
66 */
77
88 function QuoteTransformer ( ) {
9 - // Bold and italic tokens are collected in these lists, and then processed
10 - // in onNewLine.
119 this.quoteAndNewlineRank = 2.1;
1210 this.anyRank = 2.101; // Just after regular quote and newline
1311 this.reset();
1412 }
1513
1614 QuoteTransformer.prototype.reset = function ( ) {
17 - this.italics = [];
18 - this.bolds = [];
 15+ // A chunk starts with a token context around a quote token and is
 16+ // (optionally) followed by non-quote tokens. The quote token and its
 17+ // context is later replaced with the actual tag token for italic or bold.
1918 this.currentChunk = [];
2019 // List of chunks, each starting with a (potentially) bold or italic token
2120 // and followed by plain tokens.
2221 this.chunks = [];
 22+ // References to chunks in which the first token context / quote token
 23+ // should be converted to italic or bold tokens.
 24+ this.italics = [];
 25+ this.bolds = [];
 26+
 27+ this.isActive = false;
2328 };
2429
2530
@@ -37,19 +42,15 @@
3843
3944 // Make a copy of the token context
4045 QuoteTransformer.prototype._startNewChunk = function ( ) {
41 - this.currentChunk.pos = this.chunks.length;
4246 this.chunks.push( this.currentChunk );
4347 this.currentChunk = [];
 48+ this.currentChunk.pos = this.chunks.length - 1;
4449 };
4550
4651 // Handle QUOTE tags. These are collected in italic/bold lists depending on
4752 // the length of quote string. Actual analysis and conversion to the
4853 // appropriate tag tokens is deferred until the next NEWLINE token triggers
4954 // onNewLine.
50 -//
51 -// XXX: Cannot use async stuff here, need to buffer things locally instead!
52 -// FIXME: Convert to internal buffering! -> return all tokens with rank set to
53 -// own rank to avoid reprocessing
5455 QuoteTransformer.prototype.onQuote = function ( token, cb, frame, prevToken ) {
5556 var qlen = token.value.length,
5657 tokens = [], // output tokens
@@ -66,9 +67,10 @@
6768 };
6869
6970
70 - if ( this.chunks.length === 0 ) {
 71+ if ( ! this.isActive ) {
7172 // register for any token if not yet active
72 - this.dispatcher.addTransform( this.onAny.bind(this), this.anyRank, 'tag', 'mw-quote' );
 73+ this.dispatcher.addTransform( this.onAny.bind(this), this.anyRank, 'any' );
 74+ this.isActive = true;
7375 }
7476
7577 this._startNewChunk();
@@ -114,7 +116,7 @@
115117 break;
116118 }
117119
118 - return { token: null };
 120+ return {};
119121 };
120122
121123 QuoteTransformer.prototype.onAny = function ( token, cb, frame, prevToken ) {
@@ -128,28 +130,28 @@
129131 QuoteTransformer.prototype.onNewLine = function ( token, cb, frame, prevToken ) {
130132 var res;
131133
132 - if( ! this.chunks.length ) {
 134+ if( ! this.isActive ) {
133135 // Nothing to do, quick abort.
134136 return { token: token };
135137 }
136138
137139
138140 token.rank = this.quoteAndNewlineRank;
139 - this.currentChunk.push( token );
140 - this._startNewChunk();
141141
142 - //console.log("onNewLine: " + this.italics + this.bolds);
 142+ //console.log('chunks: ' + JSON.stringify( this.chunks, null, 2 ) );
 143+
 144+ //console.log("onNewLine: " + this.italics.length + 'i/b' + this.bolds.length);
143145 // balance out tokens, convert placeholders into tags
144146 if (this.italics.length % 2 && this.bolds.length % 2) {
145147 var firstsingleletterword = -1,
146148 firstmultiletterword = -1,
147149 firstspace = -1;
148150 for (var j = 0; j < this.bolds.length; j++) {
149 - var ctx = this.bolds[j];
 151+ var ctx = this.bolds[j][0];
150152 //console.log("balancing!" + JSON.stringify(ctx.prevToken, null, 2));
151153 if (ctx.prevToken) {
152154 if (ctx.prevToken.type === 'TEXT') {
153 - var lastchar = ctx.prevToken.value[ctx.prevToken.value.length - 1],
 155+ var lastchar = prevToken.value[ctx.prevToken.value.length - 1],
154156 secondtolastchar = ctx.prevToken.value[ctx.prevToken.value.length - 2];
155157 if (lastchar === ' ' && firstspace === -1) {
156158 firstspace = j;
@@ -189,12 +191,15 @@
190192 this.quotesToTags( this.italics, 'i' );
191193 this.quotesToTags( this.bolds, 'b' );
192194
 195+ this.currentChunk.push( token );
 196+ this._startNewChunk();
 197+
193198 //console.log('chunks: ' + JSON.stringify( this.chunks, null, 2 ) );
194199
195200 // return all collected tokens including the newline
196201 res = { tokens: [].concat.apply([], this.chunks) };
197202
198 - // prepare for next session
 203+ // prepare for next line
199204 this.reset();
200205
201206 // remove 'any' registration
@@ -246,7 +251,7 @@
247252 toggle = !toggle;
248253 }
249254 if (!toggle) {
250 - // Add end tag, but don't count it towards completion.
 255+ // Add end tag
251256 this.currentChunk.push( {type: 'ENDTAG', name: name} );
252257 }
253258 };

Status & tagging log