r105926 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r105925‎ | r105926 | r105927 >
Date:20:53, 12 December 2011
Author:gwicke
Status:deferred
Tags:
Comment:
Convert quote handling (italic/bold) to a core extension operating on the
token stream. This is the first token transformation exercising the
TokenTransformer class as its dispatcher. Template expansions, wiki link
formatting, tag sanitation and extensions should be able to use the same
dispatcher by registering for specific token types.

The parser performance is very slightly improved as the token stream is only
traversed once.
Modified paths:
  • /trunk/extensions/VisualEditor/modules/parser/ext.core.QuoteTransformer.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.TokenTransformer.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt (modified) (history)
  • /trunk/extensions/VisualEditor/tests/parser/parserTests-whitelist.js (modified) (history)
  • /trunk/extensions/VisualEditor/tests/parser/parserTests.js (modified) (history)

Diff [purge]

Index: trunk/extensions/VisualEditor/tests/parser/parserTests-whitelist.js
@@ -6,16 +6,18 @@
77
88 // The nesting of italic/bold tags is changed in this test, but the resulting
99 // formatting is identical
10 -testWhiteList['Italics and bold'] = "<ul><li> plain</li><li> plain<i>italic</i>plain</li><li> plain<i>italic</i>plain<i>italic</i>plain</li><li> plain<b>bold</b>plain</li><li> plain<b>bold</b>plain<b>bold</b>plain</li><li> plain<i>italic</i>plain<b>bold</b>plain</li><li> plain<b>bold</b>plain<i>italic</i>plain</li><li> plain<i>italic<b>bold-italic</b>italic</i>plain</li><li> plain<b>bold<i>bold-italic</i>bold</b>plain</li><li> plain<i><b>bold-italic</b>italic</i>plain</li><li> plain<i><b>bold-italic</b></i><b>bold</b>plain</li><li> plain<i>italic<b>bold-italic</b></i>plain</li><li> plain<b>bold<i>bold-italic</i></b>plain</li><li> plain l'<i>italic</i>plain</li><li> plain l'<b>bold</b> plain</li></ul>";
 10+testWhiteList["Italics and bold"] = "<ul><li> plain</li><li> plain<i>italic</i>plain</li><li> plain<i>italic</i>plain<i>italic</i>plain</li><li> plain<b>bold</b>plain</li><li> plain<b>bold</b>plain<b>bold</b>plain</li><li> plain<i>italic</i>plain<b>bold</b>plain</li><li> plain<b>bold</b>plain<i>italic</i>plain</li><li> plain<i>italic<b>bold-italic</b>italic</i>plain</li><li> plain<b>bold<i>bold-italic</i>bold</b>plain</li><li> plain<i><b>bold-italic</b>italic</i>plain</li><li> plain<i><b>bold-italic</b></i><b>bold</b>plain</li><li> plain<i>italic<b>bold-italic</b></i>plain</li><li> plain<b>bold<i>bold-italic</i></b>plain</li><li> plain l'<i>italic</i>plain</li><li> plain l'<b>bold</b> plain</li></ul>";
1111
12 -testWhiteList["Bug 2702: Mismatched <i>, <b> and <a> tags are invalid"] = "<p><i><a href=\"http://example.com\">text</a></i><a href=\"http://example.com\"><b>text</b></a><b></b><i>Something <a href=\"http://example.com\">in italic</a></i><i>Something <a href=\"http://example.com\">mixed</a></i><a href=\"http://example.com\"><b>, even bold</b></a><b></b><i><b>Now <a href=\"http://example.com\">both</a></b></i></p>";
 12+testWhiteList["Bug 2702: Mismatched <i>, <b> and <a> tags are invalid"] = "<p><i><a href=\"http://example.com\">text</a></i><a href=\"http://example.com\" data-sourcePos=\"30:61\"><b>text</b></a><i data-sourcePos=\"62:106\">Something <a href=\"http://example.com\">in italic</a></i><i data-sourcePos=\"107:164\">Something <a href=\"http://example.com\">mixed</a></i><a href=\"http://example.com\"><b>, even bold</b></a><i data-sourcePos=\"165:204\"><b data-sourcePos=\"165:204\">Now <a href=\"http://example.com\">both</a></b></i></p>";
1313
14 -testWhiteList["Unclosed and unmatched quotes"] = "<p><i><b>Bold italic text </b>with bold deactivated<b> in between.</b></i></p><p><i><b>Bold italic text </b></i><b>with italic deactivated<i> in between.</i></b></p><p><b>Bold text..</b></p><p>..spanning two paragraphs (should not work).<b></b></p><p><b>Bold tag left open</b></p><p><i>Italic tag left open</i></p><p>Normal text.<!-- Unmatching number of opening, closing tags: -->\n</p><p><b>This year'</b>s election <i>should</i> beat <b>last year'</b>s.</p><p><i>Tom<b>s car is bigger than </b></i><b>Susan</b>s.</p>";
 14+testWhiteList["Unclosed and unmatched quotes"] = "<p><i><b>Bold italic text </b>with bold deactivated<b> in between.</b></i></p><p><i><b>Bold italic text </b></i><b>with italic deactivated<i> in between.</i></b></p><p><b></b>Bold text..</p><p>..spanning two paragraphs (should not work).<b></b></p><p><b></b>Bold tag left open</p><p><i></i>Italic tag left open</p><p>Normal text.<!-- Unmatching number of opening, closing tags: -->\n</p><p><b>This year'</b>s election <i>should</i> beat <b>last year'</b>s.</p><p><i>Tom<b>s car is bigger than </b></i><b>Susan</b>s.</p>";
1515
16 -testWhiteList["Link containing double-single-quotes '' in text embedded in italics (bug 4598 sanity check)"] = "<p><i>Some <a data-type=\"internal\" href=\"Link\">pretty </a></i><a data-type=\"internal\" href=\"Link\">italics<i> and stuff</i></a><i>!</i></p>";
 16+testWhiteList["Link containing double-single-quotes '' in text embedded in italics (bug 4598 sanity check)"] = "<p><i>Some <a data-type=\"internal\" href=\"Link\">pretty </a></i><a data-type=\"internal\" href=\"Link\">italics<i></i> and stuff</a>!</p>";
1717
1818 testWhiteList["External link containing double-single-quotes in text embedded in italics (bug 4598 sanity check)"] = "<p><i>Some <a href=\"http://example.com/\">pretty </a></i><a href=\"http://example.com/\">italics<i> and stuff</i></a><i>!</i></p>";
1919
 20+// This is a rare edge case, and the new behavior is arguably more consistent
 21+testWhiteList["5 quotes, code coverage +1 line"] = "<p><i>'</i></p>";
2022
2123
2224 // empty table tags / with only a caption are legal in HTML5.
Index: trunk/extensions/VisualEditor/tests/parser/parserTests.js
@@ -63,6 +63,8 @@
6464 _import(pj('parser', 'mediawiki.HTML5TreeBuilder.node.js'), ['FauxHTML5']);
6565 _import(pj('parser', 'mediawiki.DOMPostProcessor.js'), ['DOMPostProcessor']);
6666
 67+_import(pj('parser', 'ext.core.QuoteTransformer.js'), ['QuoteTransformer']);
 68+
6769 // WikiDom and serializers
6870 _require(pj('es', 'es.js'));
6971 _require(pj('es', 'es.Html.js'));
@@ -178,13 +180,17 @@
179181 this.postProcessor = new DOMPostProcessor();
180182
181183 var pt = this;
 184+
 185+ // Set up the TokenTransformer with a callback for the remaining
 186+ // processing.
182187 this.tokenTransformer = new TokenTransformer ( function ( tokens ) {
 188+
183189 //console.log("TOKENS: " + JSON.stringify(tokens, null, 2));
 190+
184191 // Create a new tree builder, which also creates a new document.
185192 var treeBuilder = new FauxHTML5.TreeBuilder();
186193
187 - // Build a DOM tree from tokens using the HTML tree
188 - // builder/parser.
 194+ // Build a DOM tree from tokens using the HTML tree builder/parser.
189195 pt.buildTree( tokens, treeBuilder );
190196
191197 // Perform post-processing on DOM.
@@ -193,9 +199,14 @@
194200 // And serialize the result.
195201 var out = treeBuilder.body().innerHTML;
196202
 203+ // Finally, check the result vs. the expected result.
197204 pt.checkResult( pt.currentItem, out );
198205 });
199206
 207+ // Add token transformations..
 208+ var qt = new QuoteTransformer();
 209+ qt.register(this.tokenTransformer);
 210+
200211 // Test statistics
201212 this.passedTests = 0;
202213 this.passedTestsManual = 0;
@@ -610,9 +621,8 @@
611622 this.reportSummary();
612623 };
613624
614 -var pt = new ParserTests();
615 -console.log(pt.processArticle);
616 -pt.main();
 625+// Construct the ParserTests object and run the parser tests
 626+new ParserTests().main();
617627
618628
619629 })();
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.TokenTransformer.js
@@ -32,7 +32,7 @@
3333 }
3434
3535 TokenTransformer.prototype.reset = function () {
36 - this.accum = new TokenAccumulator();
 36+ this.accum = new TokenAccumulator(null);
3737 this.firstaccum = this.accum;
3838 this.outstanding = 1; // Number of outstanding processing steps
3939 // (e.g., async template fetches/expansions)
@@ -147,18 +147,21 @@
148148 * @returns nothing: Calls back registered callback if there are no more
149149 * outstanding asynchronous expansions.
150150 * */
151 -TokenTransformer.prototype.transformTokens = function ( tokens, accum ) {
 151+TokenTransformer.prototype.transformTokens = function ( tokens, accum, delta ) {
152152 if ( accum === undefined ) {
153153 this.reset();
154154 accum = this.accum;
155 - } else {
156 - // Prepare to replace the last token in the current accumulator.
157 - accum.pop();
158155 }
 156+
 157+ //console.log('transformTokens: ' + JSON.stringify(tokens) + JSON.stringify(accum.accum) );
 158+
159159 var tokenCTX = new TokenContext(undefined, accum, this, undefined);
160 - for ( var i = 0, l = tokens.length; i < l; i++ ) {
161 - tokenCTX.lastToken = tokenCTX.token;
 160+ var origLen = tokens.length;
 161+ for ( var i = 0; i < tokens.length; i++ ) {
 162+ tokenCTX.lastToken = tokenCTX.token; // XXX: Fix for re-entrant case!
162163 tokenCTX.token = tokens[i];
 164+ tokenCTX.pos = i;
 165+ tokenCTX.accum = accum;
163166 var ts;
164167 switch(tokenCTX.token.type) {
165168 case 'TAG':
@@ -186,52 +189,67 @@
187190 // Splice in the returned tokens (while replacing the original
188191 // token), and process them next.
189192 [].splice.apply(tokens, [i, 1].concat(tokenCTX.token));
190 - l += res.token.length - 1;
 193+ //l += tokenCTX.token.length - 1;
191194 i--; // continue at first inserted token
192195 } else if (tokenCTX.token) {
193 - // push to accumulator (not necessarily the last one)
 196+ // push to accumulator
194197 accum.push(tokenCTX.token);
195198 }
196199 // Update current accum, in case a new one was spliced in by a
197200 // transformation starting asynch work.
198201 accum = tokenCTX.accum;
199202 }
200 - this.finish();
 203+
 204+ if ( delta === undefined ) {
 205+ delta = 1;
 206+ }
 207+
 208+ this.finish( delta );
201209 };
202210
203 -TokenTransformer.prototype.finish = function ( ) {
204 - this.outstanding--;
 211+TokenTransformer.prototype.finish = function ( delta ) {
 212+ this.outstanding -= delta;
205213 if ( this.outstanding === 0 ) {
206214 // Join the token accumulators back into a single token list
207215 var a = this.firstaccum;
208216 var tokens = a.accum;
209 - while ( a.next !== undefined ) {
 217+ while ( a.next !== null ) {
210218 a = a.next;
211 - tokens.concat(a.accum);
 219+ tokens = tokens.concat(a.accum);
212220 }
 221+ //console.log('TOKENS: ' + JSON.stringify(tokens, null, 2));
213222 // Call our callback with the flattened token list
214223 this.cb(tokens);
215224 }
216225 };
217226
218227 /* Start a new accumulator for asynchronous work. */
219 -TokenTransformer.prototype.newAccumulator = function ( ) {
220 - this.outstanding++;
221 - return this.accum.insertAccumulator( );
 228+TokenTransformer.prototype.newAccumulator = function ( accum, count ) {
 229+ if ( count !== undefined ) {
 230+ this.outstanding += count;
 231+ } else {
 232+ this.outstanding++;
 233+ }
 234+ if ( accum === undefined ) {
 235+ accum = this.accum;
 236+ }
 237+ return accum.insertAccumulator( );
222238 };
223239
224240 // Token accumulators in a linked list. Using a linked list simplifies async
225241 // callbacks for template expansions.
226242 function TokenAccumulator ( next, tokens ) {
227243 this.next = next;
228 - if ( tokens )
 244+ if ( tokens ) {
229245 this.accum = tokens;
230 - else
 246+ } else {
231247 this.accum = [];
 248+ }
 249+ return this;
232250 }
233251
234252 TokenAccumulator.prototype.push = function ( token ) {
235 - this.accum.push(token);
 253+ return this.accum.push(token);
236254 };
237255
238256 TokenAccumulator.prototype.pop = function ( ) {
@@ -239,7 +257,7 @@
240258 };
241259
242260 TokenAccumulator.prototype.insertAccumulator = function ( ) {
243 - this.next = new TokenAccumulator(this.next, tokens);
 261+ this.next = new TokenAccumulator(this.next);
244262 return this.next;
245263 };
246264
Index: trunk/extensions/VisualEditor/modules/parser/ext.core.QuoteTransformer.js
@@ -0,0 +1,211 @@
 2+/*
 3+ * Italic/Bold handling.
 4+ *
 5+ * - list of tokens
 6+ * - NEWLINE
 7+ * - ticks (2+) -> list with link in line token list?
 8+ * - process on newline
 9+ * - need access to text nodes before for conversion back to text
 10+ */
 11+
 12+function QuoteTransformer ( ) {
 13+ this.italics = [];
 14+ this.bolds = [];
 15+ this.inserted = 0;
 16+}
 17+
 18+QuoteTransformer.prototype.register = function ( tokenTransformer ) {
 19+ // Register for NEWLINE and QUOTE tag tokens
 20+ var self = this;
 21+ tokenTransformer.appendListener( function (ctx) {
 22+ return self.onNewLine(ctx);
 23+ }, 'newline' );
 24+ tokenTransformer.appendListener( function (ctx) {
 25+ return self.onQuote(ctx);
 26+ }, 'tag', 'QUOTE' );
 27+};
 28+
 29+// Extract a copy of the token context with the info we need
 30+QuoteTransformer.prototype.ctx = function ( tokenCTX ) {
 31+ return {
 32+ accum: tokenCTX.accum,
 33+ token: tokenCTX.token,
 34+ lastToken: tokenCTX.lastToken,
 35+ pos: tokenCTX.pos
 36+ };
 37+};
 38+
 39+QuoteTransformer.prototype.onQuote = function ( tokenCTX ) {
 40+ // depending on length, add starting 's to preceding text node
 41+ // (if any)
 42+ // add token index to italic/bold lists
 43+ // add placeholder for token
 44+ var token = tokenCTX.token,
 45+ qlen = token.value.length,
 46+ out = null,
 47+ lastToken = tokenCTX.lastToken,
 48+ ctx = this.ctx(tokenCTX),
 49+ ctx2,
 50+ accum = tokenCTX.accum;
 51+ switch (qlen) {
 52+ case 2:
 53+ accum = tokenCTX.transformer.newAccumulator(accum);
 54+ this.italics.push(ctx);
 55+ break;
 56+ case 3:
 57+ accum = tokenCTX.transformer.newAccumulator(accum);
 58+ this.bolds.push(ctx);
 59+ break;
 60+ case 4:
 61+ if (lastToken && lastToken.type === 'TEXT') {
 62+ lastToken.value += "'";
 63+ } else {
 64+ out = {type: 'TEXT', value: "'"};
 65+ }
 66+ accum = tokenCTX.transformer.newAccumulator(accum);
 67+ this.bolds.push(ctx);
 68+ break;
 69+ case 5:
 70+ // order does not matter here, will be fixed
 71+ // by HTML tree builder
 72+ accum = tokenCTX.transformer.newAccumulator(accum, 2);
 73+ this.italics.push(ctx);
 74+ ctx2 = this.ctx(tokenCTX);
 75+ ctx2.token = {attribs: ctx.token.attribs};
 76+ this.bolds.push(ctx2);
 77+ break;
 78+ default: // longer than 5, only use the last 5 ticks
 79+ var newvalue = token.value.substr(0, qlen - 5 );
 80+ if (lastToken && lastToken.type === 'TEXT') {
 81+ lastToken.value += newvalue;
 82+ } else {
 83+ out = {type: 'TEXT', value: newvalue};
 84+ }
 85+ accum = tokenCTX.transformer.newAccumulator(accum, 2);
 86+ this.italics.push(ctx);
 87+ ctx2 = this.ctx(tokenCTX);
 88+ ctx2.token = {attribs: ctx.token.attribs};
 89+ this.bolds.push(ctx2);
 90+ break;
 91+ }
 92+ tokenCTX.token = out;
 93+ tokenCTX.accum = accum;
 94+ return tokenCTX;
 95+};
 96+
 97+QuoteTransformer.prototype.onNewLine = function ( tokenCTX ) {
 98+ if(!this.bolds && !this.italics) {
 99+ // Nothing to do, quick abort.
 100+ return tokenCTX;
 101+ }
 102+ //console.log("onNewLine: " + this.italics + this.bolds);
 103+ // balance out tokens, convert placeholders into tags
 104+ if (this.italics.length % 2 && this.bolds.length % 2) {
 105+ var firstsingleletterword = -1,
 106+ firstmultiletterword = -1,
 107+ firstspace = -1;
 108+ for (var j = 0; j < this.bolds.length; j++) {
 109+ var ctx = this.bolds[j];
 110+ //console.log("balancing!" + JSON.stringify(ctx.lastToken, null, 2));
 111+ if (ctx.lastToken) {
 112+ if (ctx.lastToken.type === 'TEXT') {
 113+ var lastchar = ctx.lastToken.value[ctx.lastToken.value.length - 1],
 114+ secondtolastchar = ctx.lastToken.value[ctx.lastToken.value.length - 2];
 115+ if (lastchar === ' ' && firstspace === -1) {
 116+ firstspace = j;
 117+ } else if (lastchar !== ' ') {
 118+ if ( secondtolastchar === ' ' &&
 119+ firstsingleletterword === -1)
 120+ {
 121+ firstsingleletterword = j;
 122+ } else if ( firstmultiletterword == -1) {
 123+ firstmultiletterword = j;
 124+ }
 125+ }
 126+ } else if ( ( ctx.lastToken.type === 'NEWLINE' ||
 127+ ctx.lastToken.type === 'TAG' ) &&
 128+ firstspace == -1 ) {
 129+ firstmultiletterword = j;
 130+ }
 131+ }
 132+ }
 133+
 134+
 135+ // now see if we can convert a bold to an italic and
 136+ // an apostrophe
 137+ if (firstsingleletterword > -1) {
 138+ this.convertBold(firstsingleletterword);
 139+ } else if (firstmultiletterword > -1) {
 140+ this.convertBold(firstmultiletterword);
 141+ } else if (firstspace > -1) {
 142+ this.convertBold(firstspace);
 143+ }
 144+ }
 145+
 146+ this.quotesToTags(this.italics, 'i', tokenCTX.transformer);
 147+ this.quotesToTags(this.bolds, 'b', tokenCTX.transformer);
 148+
 149+ this.bolds = [];
 150+ this.italics = [];
 151+
 152+ // Pass through the NEWLINE token unchanged
 153+ return tokenCTX;
 154+};
 155+
 156+QuoteTransformer.prototype.convertBold = function ( i ) {
 157+ var ctx = this.bolds[i];
 158+ //console.log('convertbold!');
 159+ if ( ctx.lastToken && ctx.lastToken.type === 'TEXT' ) {
 160+ ctx.lastToken.value += "'";
 161+ } else {
 162+ // Add a text token!
 163+ ctx.token = [{type: 'TEXT', value: "'"}, ctx.token];
 164+ }
 165+
 166+ this.bolds.splice(i, 1);
 167+
 168+ this.italics.push(ctx);
 169+ this.italics.sort(function(a,b) { return a.pos - b.pos; } );
 170+ //console.log(this.italics.map(function(a) { return a.pos }));
 171+ //console.log(this.bolds.map(function(a) { return a.pos }));
 172+};
 173+
 174+// convert italics/bolds into tags
 175+QuoteTransformer.prototype.quotesToTags = function ( contexts, name, transformer ) {
 176+ var toggle = true,
 177+ t,
 178+ out = [];
 179+ for (var j = 0; j < contexts.length; j++) {
 180+ t = contexts[j].token;
 181+
 182+ if ( $.isArray(t) ) {
 183+ // Slip in a text token from bold to italic rebalancing
 184+ var realToken = t.pop();
 185+ transformer.transformTokens( t, contexts[j].accum, 0 );
 186+ t = realToken;
 187+ }
 188+
 189+ if(toggle) {
 190+ t.type = 'TAG';
 191+ } else {
 192+ t.type = 'ENDTAG';
 193+ }
 194+ t.name = name;
 195+ delete t.value;
 196+ toggle = !toggle;
 197+ // Re-add and process the new token with the original accumulator
 198+ transformer.transformTokens( [t], contexts[j].accum, 0 );
 199+ }
 200+ var l = contexts.length;
 201+ if (!toggle) {
 202+ // add end tag, but don't count it towards the finish
 203+ transformer.transformTokens( [{type: 'ENDTAG', name: name}],
 204+ contexts[contexts.length - 1].accum, 0 );
 205+ }
 206+ // now allow the transformer to finish
 207+ transformer.finish( contexts.length );
 208+};
 209+
 210+if (typeof module == "object") {
 211+ module.exports.QuoteTransformer = QuoteTransformer;
 212+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/ext.core.QuoteTransformer.js
___________________________________________________________________
Added: svn:eol-style
1213 + native
Index: trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt
@@ -183,163 +183,7 @@
184184 return out;
185185 };
186186
187 - /*
188 - * Italic/Bold handling.
189 - *
190 - * - list of tokens
191 - * - NEWLINE
192 - * - ticks (2+) -> list with link in line token list?
193 - * - process on newline
194 - * - need access to text nodes before/after for conversion back to text
195 - */
196 - var doQuotes = function ( tokens ) {
197187
198 - var italics = [],
199 - bolds = [],
200 - out = [],
201 - inserted = 0;
202 -
203 - var convertBold = function ( i ) {
204 - var index = bolds[i];
205 - var txt = out[index - 1];
206 - txt.value += "'";
207 - if ( i > 0 ) {
208 - bolds = bolds.slice(0, i)
209 - .concat(bolds.slice(i + 1, bolds.length - i - 1));
210 - } else {
211 - bolds.shift();
212 - }
213 -
214 - italics.push(index);
215 - italics.sort(function(a,b) { return a - b });
216 - };
217 -
218 - // convert italics/bolds into tags
219 - var quotesToTags = function ( offsets, name ) {
220 - var toggle = true;
221 - for (var j = 0; j < offsets.length; j++) {
222 - var t = out[offsets[j]];
223 - if(toggle) {
224 - t.type = 'TAG';
225 - } else {
226 - t.type = 'ENDTAG';
227 - }
228 - t.name = name;
229 - delete t.value;
230 - toggle = !toggle;
231 - }
232 - if (!toggle) {
233 - // add end tag
234 - out.push({type: 'ENDTAG', name: name});
235 - inserted++;
236 - }
237 - toggle = true;
238 - };
239 -
240 - for (var i = 0, length = tokens.length; i < length; i++) {
241 - var token = tokens[i];
242 - switch (token.type) {
243 - case 'QUOTE':
244 - // depending on length, add starting 's to preceding text node
245 - // (if any)
246 - // add token index to italic/bold lists
247 - // add placeholder for token
248 - var qlen = token.value.length;
249 - switch (qlen) {
250 - case 2: italics.push(i + inserted); out.push(token); break;
251 - case 3: bolds.push(i + inserted); out.push(token); break;
252 - case 4:
253 - token.value = "'''";
254 - if (i > 0 && tokens[i-1].type === 'TEXT') {
255 - tokens[i-1].value += "'";
256 - } else {
257 - out.push({type: 'TEXT', value: "'"});
258 - inserted++;
259 - }
260 - bolds.push(i + inserted);
261 - out.push(token);
262 - break;
263 - case 5:
264 - // order does not matter here, will be fixed
265 - // by HTML parser backend
266 - italics.push(i + inserted);
267 - out.push({type: 'QUOTE', value: "''"});
268 - inserted++;
269 - bolds.push(i + inserted);
270 - out.push({type: 'QUOTE', value: "'''"});
271 - break;
272 - default: // longer than 5, only use the last 5 ticks
273 - token.value = "'''''";
274 - var newvalue = token.value.substr(0, qlen - 5 );
275 - if (i > 0 && tokens[i-1].type === 'TEXT') {
276 - tokens[i-1].value += newvalue;
277 - } else {
278 - out.push({type: 'TEXT', value: newvalue});
279 - inserted++;
280 - }
281 - italics.push(i + inserted);
282 - out.push({type: 'QUOTE', value: "''"});
283 - inserted++;
284 - bolds.push(i + inserted);
285 - out.push({type: 'QUOTE', value: "'''"});
286 - break;
287 - }
288 - break;
289 -
290 - case 'NEWLINE':
291 - // balance out tokens, convert placeholders into tags
292 - if (italics.length % 2 && bolds.length % 2) {
293 - dp("balancing!");
294 - var firstsingleletterword = -1,
295 - firstmultiletterword = -1,
296 - firstspace = -1;
297 - for (var j = 0; j < bolds.length; j++) {
298 - var ticki = bolds[j];
299 - if (ticki > 0 && out[ticki - 1].type === 'TEXT') {
300 - var txt = out[ticki - 1],
301 - lastchar = txt.value[txt.value.length - 1],
302 - secondtolastchar = txt.value[txt.value.length - 2];
303 - dp('txt: ' + pp(txt));
304 - if (lastchar === ' ' && firstspace === -1) {
305 - firstspace = j;
306 - } else if (lastchar !== ' ') {
307 - if ( secondtolastchar === ' ' &&
308 - firstsingleletterword === -1)
309 - {
310 - firstsingleletterword = j;
311 - } else if ( firstmultiletterword == -1) {
312 - firstmultiletterword = j;
313 - }
314 - }
315 - }
316 - }
317 -
318 -
319 - // now see if we can convert a bold to an italic and
320 - // an apostrophe
321 - if (firstsingleletterword > -1) {
322 - convertBold(firstsingleletterword);
323 - } else if (firstmultiletterword > -1) {
324 - convertBold(firstmultiletterword);
325 - } else if (firstspace > -1) {
326 - convertBold(firstspace);
327 - }
328 - }
329 -
330 - quotesToTags(bolds, 'b');
331 - quotesToTags(italics, 'i');
332 - bolds = [];
333 - italics = [];
334 - out.push(token);
335 - break;
336 - default:
337 - out.push(token);
338 - }
339 - }
340 - return out;
341 - };
342 -
343 -
344188 /* End static utilities */
345189
346190 /*
@@ -404,8 +248,7 @@
405249
406250 start
407251 = e:toplevelblock* newline* {
408 - // XXX: move doQuotes out to general token stream transformer
409 - return doQuotes(flatten(e));
 252+ return flatten(e);
410253 }
411254
412255
@@ -499,7 +342,14 @@
500343
501344 // Start of line
502345 sol = (newline / & { return pos === 0; } { return true; })
503 - cn:(c:comment n:newline? { return [c, {type: 'TEXT', value: n}] })* {
 346+ cn:(c:comment n:newline? {
 347+ if ( n !== '' ) {
 348+ return [c, {type: 'TEXT', value: n}];
 349+ } else {
 350+ return [c];
 351+ }
 352+ }
 353+ )* {
504354 return [{type: 'NEWLINE'}].concat(cn);
505355 }
506356
@@ -548,7 +398,7 @@
549399 block_lines
550400 = s:sol
551401 // eat an empty line before the block
552 - s2:(ss:space* so:sol { return [{type: 'TEXT', value: ss.join('')}].concat(so) })?
 402+ s2:(os:optionalSpaceToken so:sol { return os.concat(so) })?
553403 bl:block_line {
554404 var s2_ = (s2 !== '') ? s2 : [];
555405 return s.concat(s2_, bl);
@@ -605,7 +455,9 @@
606456 for (var i = 0, l = c.length; i < l; i++) {
607457 var ci = c[i];
608458 if (typeof ci == 'string') {
609 - text.push(ci);
 459+ if(ci !== '') {
 460+ text.push(ci);
 461+ }
610462 } else {
611463 if (text.length) {
612464 out.push({ type: "TEXT", value: text.join('') });
@@ -630,7 +482,9 @@
631483 for (var i = 0; i < c.length; i++) {
632484 var ci = c[i]
633485 if (typeof ci == 'string') {
634 - text.push(ci);
 486+ if(ci !== '') {
 487+ text.push(ci);
 488+ }
635489 } else {
636490 if (text.length) {
637491 out.push({type: 'TEXT', value: text.join('')});
@@ -900,7 +754,8 @@
901755 * all not context free. */
902756 quote = "''" x:"'"* {
903757 return {
904 - type : 'QUOTE',
 758+ type: 'TAG',
 759+ name : 'QUOTE',
905760 value: "''" + x.join('')
906761 }
907762 }

Status & tagging log