r105926 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r105925‎ \| r105926 \| r105927 >
Date:	20:53, 12 December 2011
Author:	gwicke
Status:	deferred
Tags:
Comment:	Convert quote handling (italic/bold) to a core extension operating on the token stream. This is the first token transformation exercising the TokenTransformer class as its dispatcher. Template expansions, wiki link formatting, tag sanitation and extensions should be able to use the same dispatcher by registering for specific token types. The parser performance is very slightly improved as the token stream is only traversed once.
Modified paths:	/trunk/extensions/VisualEditor/modules/parser/ext.core.QuoteTransformer.js (added) (history) /trunk/extensions/VisualEditor/modules/parser/mediawiki.TokenTransformer.js (modified) (history) /trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt (modified) (history) /trunk/extensions/VisualEditor/tests/parser/parserTests-whitelist.js (modified) (history) /trunk/extensions/VisualEditor/tests/parser/parserTests.js (modified) (history)

Diff [purge]

Index: trunk/extensions/VisualEditor/tests/parser/parserTests-whitelist.js
—	—	@@ -6,16 +6,18 @@
7	7
8	8	// The nesting of italic/bold tags is changed in this test, but the resulting
9	9	// formatting is identical
10		-testWhiteList['Italics and bold'] = "<ul><li> plain</li><li> plain<i>italic</i>plain</li><li> plain<i>italic</i>plain<i>italic</i>plain</li><li> plain<b>bold</b>plain</li><li> plain<b>bold</b>plain<b>bold</b>plain</li><li> plain<i>italic</i>plain<b>bold</b>plain</li><li> plain<b>bold</b>plain<i>italic</i>plain</li><li> plain<i>italic<b>bold-italic</b>italic</i>plain</li><li> plain<b>bold<i>bold-italic</i>bold</b>plain</li><li> plain<i><b>bold-italic</b>italic</i>plain</li><li> plain<i><b>bold-italic</b></i><b>bold</b>plain</li><li> plain<i>italic<b>bold-italic</b></i>plain</li><li> plain<b>bold<i>bold-italic</i></b>plain</li><li> plain l'<i>italic</i>plain</li><li> plain l'<b>bold</b> plain</li></ul>";
	10	+testWhiteList["Italics and bold"] = "<ul><li> plain</li><li> plain<i>italic</i>plain</li><li> plain<i>italic</i>plain<i>italic</i>plain</li><li> plain<b>bold</b>plain</li><li> plain<b>bold</b>plain<b>bold</b>plain</li><li> plain<i>italic</i>plain<b>bold</b>plain</li><li> plain<b>bold</b>plain<i>italic</i>plain</li><li> plain<i>italic<b>bold-italic</b>italic</i>plain</li><li> plain<b>bold<i>bold-italic</i>bold</b>plain</li><li> plain<i><b>bold-italic</b>italic</i>plain</li><li> plain<i><b>bold-italic</b></i><b>bold</b>plain</li><li> plain<i>italic<b>bold-italic</b></i>plain</li><li> plain<b>bold<i>bold-italic</i></b>plain</li><li> plain l'<i>italic</i>plain</li><li> plain l'<b>bold</b> plain</li></ul>";
11	11
12		-testWhiteList["Bug 2702: Mismatched <i>, <b> and <a> tags are invalid"] = "<p><i><a href=\"http://example.com\">text</a></i><a href=\"http://example.com\"><b>text</b></a><b></b><i>Something <a href=\"http://example.com\">in italic</a></i><i>Something <a href=\"http://example.com\">mixed</a></i><a href=\"http://example.com\"><b>, even bold</b></a><b></b><i><b>Now <a href=\"http://example.com\">both</a></b></i></p>";
	12	+testWhiteList["Bug 2702: Mismatched <i>, <b> and <a> tags are invalid"] = "<p><i><a href=\"http://example.com\">text</a></i><a href=\"http://example.com\" data-sourcePos=\"30:61\"><b>text</b></a><i data-sourcePos=\"62:106\">Something <a href=\"http://example.com\">in italic</a></i><i data-sourcePos=\"107:164\">Something <a href=\"http://example.com\">mixed</a></i><a href=\"http://example.com\"><b>, even bold</b></a><i data-sourcePos=\"165:204\"><b data-sourcePos=\"165:204\">Now <a href=\"http://example.com\">both</a></b></i></p>";
13	13
14		-testWhiteList["Unclosed and unmatched quotes"] = "<p><i><b>Bold italic text </b>with bold deactivated<b> in between.</b></i></p><p><i><b>Bold italic text </b></i><b>with italic deactivated<i> in between.</i></b></p><p><b>Bold text..</b></p><p>..spanning two paragraphs (should not work).<b></b></p><p><b>Bold tag left open</b></p><p><i>Italic tag left open</i></p><p>Normal text.<!-- Unmatching number of opening, closing tags: -->\n</p><p><b>This year'</b>s election <i>should</i> beat <b>last year'</b>s.</p><p><i>Tom<b>s car is bigger than </b></i><b>Susan</b>s.</p>";
	14	+testWhiteList["Unclosed and unmatched quotes"] = "<p><i><b>Bold italic text </b>with bold deactivated<b> in between.</b></i></p><p><i><b>Bold italic text </b></i><b>with italic deactivated<i> in between.</i></b></p><p><b></b>Bold text..</p><p>..spanning two paragraphs (should not work).<b></b></p><p><b></b>Bold tag left open</p><p><i></i>Italic tag left open</p><p>Normal text.<!-- Unmatching number of opening, closing tags: -->\n</p><p><b>This year'</b>s election <i>should</i> beat <b>last year'</b>s.</p><p><i>Tom<b>s car is bigger than </b></i><b>Susan</b>s.</p>";
15	15
16		-testWhiteList["Link containing double-single-quotes '' in text embedded in italics (bug 4598 sanity check)"] = "<p><i>Some <a data-type=\"internal\" href=\"Link\">pretty </a></i><a data-type=\"internal\" href=\"Link\">italics<i> and stuff</i></a><i>!</i></p>";
	16	+testWhiteList["Link containing double-single-quotes '' in text embedded in italics (bug 4598 sanity check)"] = "<p><i>Some <a data-type=\"internal\" href=\"Link\">pretty </a></i><a data-type=\"internal\" href=\"Link\">italics<i></i> and stuff</a>!</p>";
17	17
18	18	testWhiteList["External link containing double-single-quotes in text embedded in italics (bug 4598 sanity check)"] = "<p><i>Some <a href=\"http://example.com/\">pretty </a></i><a href=\"http://example.com/\">italics<i> and stuff</i></a><i>!</i></p>";
19	19
	20	+// This is a rare edge case, and the new behavior is arguably more consistent
	21	+testWhiteList["5 quotes, code coverage +1 line"] = "<p><i>'</i></p>";
20	22
21	23
22	24	// empty table tags / with only a caption are legal in HTML5.
Index: trunk/extensions/VisualEditor/tests/parser/parserTests.js
—	—	@@ -63,6 +63,8 @@
64	64	_import(pj('parser', 'mediawiki.HTML5TreeBuilder.node.js'), ['FauxHTML5']);
65	65	_import(pj('parser', 'mediawiki.DOMPostProcessor.js'), ['DOMPostProcessor']);
66	66
	67	+_import(pj('parser', 'ext.core.QuoteTransformer.js'), ['QuoteTransformer']);
	68	+
67	69	// WikiDom and serializers
68	70	_require(pj('es', 'es.js'));
69	71	_require(pj('es', 'es.Html.js'));
—	—	@@ -178,13 +180,17 @@
179	181	this.postProcessor = new DOMPostProcessor();
180	182
181	183	var pt = this;
	184	+
	185	+ // Set up the TokenTransformer with a callback for the remaining
	186	+ // processing.
182	187	this.tokenTransformer = new TokenTransformer ( function ( tokens ) {
	188	+
183	189	//console.log("TOKENS: " + JSON.stringify(tokens, null, 2));
	190	+
184	191	// Create a new tree builder, which also creates a new document.
185	192	var treeBuilder = new FauxHTML5.TreeBuilder();
186	193
187		~~- // Build a DOM tree from tokens using the HTML tree~~
188		~~- // builder/parser.~~
	194	+ // Build a DOM tree from tokens using the HTML tree builder/parser.
189	195	pt.buildTree( tokens, treeBuilder );
190	196
191	197	// Perform post-processing on DOM.
—	—	@@ -193,9 +199,14 @@
194	200	// And serialize the result.
195	201	var out = treeBuilder.body().innerHTML;
196	202
	203	+ // Finally, check the result vs. the expected result.
197	204	pt.checkResult( pt.currentItem, out );
198	205	});
199	206
	207	+ // Add token transformations..
	208	+ var qt = new QuoteTransformer();
	209	+ qt.register(this.tokenTransformer);
	210	+
200	211	// Test statistics
201	212	this.passedTests = 0;
202	213	this.passedTestsManual = 0;
—	—	@@ -610,9 +621,8 @@
611	622	this.reportSummary();
612	623	};
613	624
614		~~-var pt = new ParserTests();~~
615		~~-console.log(pt.processArticle);~~
616		~~-pt.main();~~
	625	+// Construct the ParserTests object and run the parser tests
	626	+new ParserTests().main();
617	627
618	628
619	629	})();
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.TokenTransformer.js
—	—	@@ -32,7 +32,7 @@
33	33	}
34	34
35	35	TokenTransformer.prototype.reset = function () {
36		~~- this.accum = new TokenAccumulator();~~
	36	+ this.accum = new TokenAccumulator(null);
37	37	this.firstaccum = this.accum;
38	38	this.outstanding = 1; // Number of outstanding processing steps
39	39	// (e.g., async template fetches/expansions)
—	—	@@ -147,18 +147,21 @@
148	148	* @returns nothing: Calls back registered callback if there are no more
149	149	* outstanding asynchronous expansions.
150	150	* */
151		~~-TokenTransformer.prototype.transformTokens = function ( tokens, accum ) {~~
	151	+TokenTransformer.prototype.transformTokens = function ( tokens, accum, delta ) {
152	152	if ( accum === undefined ) {
153	153	this.reset();
154	154	accum = this.accum;
155		~~- } else {~~
156		~~- // Prepare to replace the last token in the current accumulator.~~
157		~~- accum.pop();~~
158	155	}
	156	+
	157	+ //console.log('transformTokens: ' + JSON.stringify(tokens) + JSON.stringify(accum.accum) );
	158	+
159	159	var tokenCTX = new TokenContext(undefined, accum, this, undefined);
160		~~- for ( var i = 0, l = tokens.length; i < l; i++ ) {~~
161		~~- tokenCTX.lastToken = tokenCTX.token;~~
	160	+ var origLen = tokens.length;
	161	+ for ( var i = 0; i < tokens.length; i++ ) {
	162	+ tokenCTX.lastToken = tokenCTX.token; // XXX: Fix for re-entrant case!
162	163	tokenCTX.token = tokens[i];
	164	+ tokenCTX.pos = i;
	165	+ tokenCTX.accum = accum;
163	166	var ts;
164	167	switch(tokenCTX.token.type) {
165	168	case 'TAG':
—	—	@@ -186,52 +189,67 @@
187	190	// Splice in the returned tokens (while replacing the original
188	191	// token), and process them next.
189	192	[].splice.apply(tokens, [i, 1].concat(tokenCTX.token));
190		~~- l += res.token.length - 1;~~
	193	+ //l += tokenCTX.token.length - 1;
191	194	i--; // continue at first inserted token
192	195	} else if (tokenCTX.token) {
193		~~- // push to accumulator (not necessarily the last one)~~
	196	+ // push to accumulator
194	197	accum.push(tokenCTX.token);
195	198	}
196	199	// Update current accum, in case a new one was spliced in by a
197	200	// transformation starting asynch work.
198	201	accum = tokenCTX.accum;
199	202	}
200		~~- this.finish();~~
	203	+
	204	+ if ( delta === undefined ) {
	205	+ delta = 1;
	206	+ }
	207	+
	208	+ this.finish( delta );
201	209	};
202	210
203		~~-TokenTransformer.prototype.finish = function ( ) {~~
204		~~- this.outstanding--;~~
	211	+TokenTransformer.prototype.finish = function ( delta ) {
	212	+ this.outstanding -= delta;
205	213	if ( this.outstanding === 0 ) {
206	214	// Join the token accumulators back into a single token list
207	215	var a = this.firstaccum;
208	216	var tokens = a.accum;
209		~~- while ( a.next !== undefined ) {~~
	217	+ while ( a.next !== null ) {
210	218	a = a.next;
211		~~- tokens.concat(a.accum);~~
	219	+ tokens = tokens.concat(a.accum);
212	220	}
	221	+ //console.log('TOKENS: ' + JSON.stringify(tokens, null, 2));
213	222	// Call our callback with the flattened token list
214	223	this.cb(tokens);
215	224	}
216	225	};
217	226
218	227	/* Start a new accumulator for asynchronous work. */
219		~~-TokenTransformer.prototype.newAccumulator = function ( ) {~~
220		~~- this.outstanding++;~~
221		~~- return this.accum.insertAccumulator( );~~
	228	+TokenTransformer.prototype.newAccumulator = function ( accum, count ) {
	229	+ if ( count !== undefined ) {
	230	+ this.outstanding += count;
	231	+ } else {
	232	+ this.outstanding++;
	233	+ }
	234	+ if ( accum === undefined ) {
	235	+ accum = this.accum;
	236	+ }
	237	+ return accum.insertAccumulator( );
222	238	};
223	239
224	240	// Token accumulators in a linked list. Using a linked list simplifies async
225	241	// callbacks for template expansions.
226	242	function TokenAccumulator ( next, tokens ) {
227	243	this.next = next;
228		~~- if ( tokens )~~
	244	+ if ( tokens ) {
229	245	this.accum = tokens;
230		~~- else~~
	246	+ } else {
231	247	this.accum = [];
	248	+ }
	249	+ return this;
232	250	}
233	251
234	252	TokenAccumulator.prototype.push = function ( token ) {
235		~~- this.accum.push(token);~~
	253	+ return this.accum.push(token);
236	254	};
237	255
238	256	TokenAccumulator.prototype.pop = function ( ) {
—	—	@@ -239,7 +257,7 @@
240	258	};
241	259
242	260	TokenAccumulator.prototype.insertAccumulator = function ( ) {
243		~~- this.next = new TokenAccumulator(this.next, tokens);~~
	261	+ this.next = new TokenAccumulator(this.next);
244	262	return this.next;
245	263	};
246	264
Index: trunk/extensions/VisualEditor/modules/parser/ext.core.QuoteTransformer.js
—	—	@@ -0,0 +1,211 @@
	2	+/*
	3	+ * Italic/Bold handling.
	4	+ *
	5	+ * - list of tokens
	6	+ * - NEWLINE
	7	+ * - ticks (2+) -> list with link in line token list?
	8	+ * - process on newline
	9	+ * - need access to text nodes before for conversion back to text
	10	+ */
	11	+
	12	+function QuoteTransformer ( ) {
	13	+ this.italics = [];
	14	+ this.bolds = [];
	15	+ this.inserted = 0;
	16	+}
	17	+
	18	+QuoteTransformer.prototype.register = function ( tokenTransformer ) {
	19	+ // Register for NEWLINE and QUOTE tag tokens
	20	+ var self = this;
	21	+ tokenTransformer.appendListener( function (ctx) {
	22	+ return self.onNewLine(ctx);
	23	+ }, 'newline' );
	24	+ tokenTransformer.appendListener( function (ctx) {
	25	+ return self.onQuote(ctx);
	26	+ }, 'tag', 'QUOTE' );
	27	+};
	28	+
	29	+// Extract a copy of the token context with the info we need
	30	+QuoteTransformer.prototype.ctx = function ( tokenCTX ) {
	31	+ return {
	32	+ accum: tokenCTX.accum,
	33	+ token: tokenCTX.token,
	34	+ lastToken: tokenCTX.lastToken,
	35	+ pos: tokenCTX.pos
	36	+ };
	37	+};
	38	+
	39	+QuoteTransformer.prototype.onQuote = function ( tokenCTX ) {
	40	+ // depending on length, add starting 's to preceding text node
	41	+ // (if any)
	42	+ // add token index to italic/bold lists
	43	+ // add placeholder for token
	44	+ var token = tokenCTX.token,
	45	+ qlen = token.value.length,
	46	+ out = null,
	47	+ lastToken = tokenCTX.lastToken,
	48	+ ctx = this.ctx(tokenCTX),
	49	+ ctx2,
	50	+ accum = tokenCTX.accum;
	51	+ switch (qlen) {
	52	+ case 2:
	53	+ accum = tokenCTX.transformer.newAccumulator(accum);
	54	+ this.italics.push(ctx);
	55	+ break;
	56	+ case 3:
	57	+ accum = tokenCTX.transformer.newAccumulator(accum);
	58	+ this.bolds.push(ctx);
	59	+ break;
	60	+ case 4:
	61	+ if (lastToken && lastToken.type === 'TEXT') {
	62	+ lastToken.value += "'";
	63	+ } else {
	64	+ out = {type: 'TEXT', value: "'"};
	65	+ }
	66	+ accum = tokenCTX.transformer.newAccumulator(accum);
	67	+ this.bolds.push(ctx);
	68	+ break;
	69	+ case 5:
	70	+ // order does not matter here, will be fixed
	71	+ // by HTML tree builder
	72	+ accum = tokenCTX.transformer.newAccumulator(accum, 2);
	73	+ this.italics.push(ctx);
	74	+ ctx2 = this.ctx(tokenCTX);
	75	+ ctx2.token = {attribs: ctx.token.attribs};
	76	+ this.bolds.push(ctx2);
	77	+ break;
	78	+ default: // longer than 5, only use the last 5 ticks
	79	+ var newvalue = token.value.substr(0, qlen - 5 );
	80	+ if (lastToken && lastToken.type === 'TEXT') {
	81	+ lastToken.value += newvalue;
	82	+ } else {
	83	+ out = {type: 'TEXT', value: newvalue};
	84	+ }
	85	+ accum = tokenCTX.transformer.newAccumulator(accum, 2);
	86	+ this.italics.push(ctx);
	87	+ ctx2 = this.ctx(tokenCTX);
	88	+ ctx2.token = {attribs: ctx.token.attribs};
	89	+ this.bolds.push(ctx2);
	90	+ break;
	91	+ }
	92	+ tokenCTX.token = out;
	93	+ tokenCTX.accum = accum;
	94	+ return tokenCTX;
	95	+};
	96	+
	97	+QuoteTransformer.prototype.onNewLine = function ( tokenCTX ) {
	98	+ if(!this.bolds && !this.italics) {
	99	+ // Nothing to do, quick abort.
	100	+ return tokenCTX;
	101	+ }
	102	+ //console.log("onNewLine: " + this.italics + this.bolds);
	103	+ // balance out tokens, convert placeholders into tags
	104	+ if (this.italics.length % 2 && this.bolds.length % 2) {
	105	+ var firstsingleletterword = -1,
	106	+ firstmultiletterword = -1,
	107	+ firstspace = -1;
	108	+ for (var j = 0; j < this.bolds.length; j++) {
	109	+ var ctx = this.bolds[j];
	110	+ //console.log("balancing!" + JSON.stringify(ctx.lastToken, null, 2));
	111	+ if (ctx.lastToken) {
	112	+ if (ctx.lastToken.type === 'TEXT') {
	113	+ var lastchar = ctx.lastToken.value[ctx.lastToken.value.length - 1],
	114	+ secondtolastchar = ctx.lastToken.value[ctx.lastToken.value.length - 2];
	115	+ if (lastchar === ' ' && firstspace === -1) {
	116	+ firstspace = j;
	117	+ } else if (lastchar !== ' ') {
	118	+ if ( secondtolastchar === ' ' &&
	119	+ firstsingleletterword === -1)
	120	+ {
	121	+ firstsingleletterword = j;
	122	+ } else if ( firstmultiletterword == -1) {
	123	+ firstmultiletterword = j;
	124	+ }
	125	+ }
	126	+ } else if ( ( ctx.lastToken.type === 'NEWLINE' \|\|
	127	+ ctx.lastToken.type === 'TAG' ) &&
	128	+ firstspace == -1 ) {
	129	+ firstmultiletterword = j;
	130	+ }
	131	+ }
	132	+ }
	133	+
	134	+
	135	+ // now see if we can convert a bold to an italic and
	136	+ // an apostrophe
	137	+ if (firstsingleletterword > -1) {
	138	+ this.convertBold(firstsingleletterword);
	139	+ } else if (firstmultiletterword > -1) {
	140	+ this.convertBold(firstmultiletterword);
	141	+ } else if (firstspace > -1) {
	142	+ this.convertBold(firstspace);
	143	+ }
	144	+ }
	145	+
	146	+ this.quotesToTags(this.italics, 'i', tokenCTX.transformer);
	147	+ this.quotesToTags(this.bolds, 'b', tokenCTX.transformer);
	148	+
	149	+ this.bolds = [];
	150	+ this.italics = [];
	151	+
	152	+ // Pass through the NEWLINE token unchanged
	153	+ return tokenCTX;
	154	+};
	155	+
	156	+QuoteTransformer.prototype.convertBold = function ( i ) {
	157	+ var ctx = this.bolds[i];
	158	+ //console.log('convertbold!');
	159	+ if ( ctx.lastToken && ctx.lastToken.type === 'TEXT' ) {
	160	+ ctx.lastToken.value += "'";
	161	+ } else {
	162	+ // Add a text token!
	163	+ ctx.token = [{type: 'TEXT', value: "'"}, ctx.token];
	164	+ }
	165	+
	166	+ this.bolds.splice(i, 1);
	167	+
	168	+ this.italics.push(ctx);
	169	+ this.italics.sort(function(a,b) { return a.pos - b.pos; } );
	170	+ //console.log(this.italics.map(function(a) { return a.pos }));
	171	+ //console.log(this.bolds.map(function(a) { return a.pos }));
	172	+};
	173	+
	174	+// convert italics/bolds into tags
	175	+QuoteTransformer.prototype.quotesToTags = function ( contexts, name, transformer ) {
	176	+ var toggle = true,
	177	+ t,
	178	+ out = [];
	179	+ for (var j = 0; j < contexts.length; j++) {
	180	+ t = contexts[j].token;
	181	+
	182	+ if ( $.isArray(t) ) {
	183	+ // Slip in a text token from bold to italic rebalancing
	184	+ var realToken = t.pop();
	185	+ transformer.transformTokens( t, contexts[j].accum, 0 );
	186	+ t = realToken;
	187	+ }
	188	+
	189	+ if(toggle) {
	190	+ t.type = 'TAG';
	191	+ } else {
	192	+ t.type = 'ENDTAG';
	193	+ }
	194	+ t.name = name;
	195	+ delete t.value;
	196	+ toggle = !toggle;
	197	+ // Re-add and process the new token with the original accumulator
	198	+ transformer.transformTokens( [t], contexts[j].accum, 0 );
	199	+ }
	200	+ var l = contexts.length;
	201	+ if (!toggle) {
	202	+ // add end tag, but don't count it towards the finish
	203	+ transformer.transformTokens( [{type: 'ENDTAG', name: name}],
	204	+ contexts[contexts.length - 1].accum, 0 );
	205	+ }
	206	+ // now allow the transformer to finish
	207	+ transformer.finish( contexts.length );
	208	+};
	209	+
	210	+if (typeof module == "object") {
	211	+ module.exports.QuoteTransformer = QuoteTransformer;
	212	+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/ext.core.QuoteTransformer.js
___________________________________________________________________
Added: svn:eol-style
1	213	+ native
Index: trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt
—	—	@@ -183,163 +183,7 @@
184	184	return out;
185	185	};
186	186
187		- /*
188		~~- * Italic/Bold handling.~~
189		- *
190		~~- * - list of tokens~~
191		~~- * - NEWLINE~~
192		~~- * - ticks (2+) -> list with link in line token list?~~
193		~~- * - process on newline~~
194		~~- * - need access to text nodes before/after for conversion back to text~~
195		~~- */~~
196		~~- var doQuotes = function ( tokens ) {~~
197	187
198		~~- var italics = [],~~
199		~~- bolds = [],~~
200		~~- out = [],~~
201		~~- inserted = 0;~~
202		-
203		~~- var convertBold = function ( i ) {~~
204		~~- var index = bolds[i];~~
205		~~- var txt = out[index - 1];~~
206		~~- txt.value += "'";~~
207		~~- if ( i > 0 ) {~~
208		~~- bolds = bolds.slice(0, i)~~
209		~~- .concat(bolds.slice(i + 1, bolds.length - i - 1));~~
210		~~- } else {~~
211		~~- bolds.shift();~~
212		~~- }~~
213		-
214		~~- italics.push(index);~~
215		~~- italics.sort(function(a,b) { return a - b });~~
216		~~- };~~
217		-
218		~~- // convert italics/bolds into tags~~
219		~~- var quotesToTags = function ( offsets, name ) {~~
220		~~- var toggle = true;~~
221		~~- for (var j = 0; j < offsets.length; j++) {~~
222		~~- var t = out[offsets[j]];~~
223		~~- if(toggle) {~~
224		~~- t.type = 'TAG';~~
225		~~- } else {~~
226		~~- t.type = 'ENDTAG';~~
227		~~- }~~
228		~~- t.name = name;~~
229		~~- delete t.value;~~
230		~~- toggle = !toggle;~~
231		~~- }~~
232		~~- if (!toggle) {~~
233		~~- // add end tag~~
234		~~- out.push({type: 'ENDTAG', name: name});~~
235		~~- inserted++;~~
236		~~- }~~
237		~~- toggle = true;~~
238		~~- };~~
239		-
240		~~- for (var i = 0, length = tokens.length; i < length; i++) {~~
241		~~- var token = tokens[i];~~
242		~~- switch (token.type) {~~
243		~~- case 'QUOTE':~~
244		~~- // depending on length, add starting 's to preceding text node~~
245		~~- // (if any)~~
246		~~- // add token index to italic/bold lists~~
247		~~- // add placeholder for token~~
248		~~- var qlen = token.value.length;~~
249		~~- switch (qlen) {~~
250		~~- case 2: italics.push(i + inserted); out.push(token); break;~~
251		~~- case 3: bolds.push(i + inserted); out.push(token); break;~~
252		~~- case 4:~~
253		~~- token.value = "'''";~~
254		~~- if (i > 0 && tokens[i-1].type === 'TEXT') {~~
255		~~- tokens[i-1].value += "'";~~
256		~~- } else {~~
257		~~- out.push({type: 'TEXT', value: "'"});~~
258		~~- inserted++;~~
259		~~- }~~
260		~~- bolds.push(i + inserted);~~
261		~~- out.push(token);~~
262		~~- break;~~
263		~~- case 5:~~
264		~~- // order does not matter here, will be fixed~~
265		~~- // by HTML parser backend~~
266		~~- italics.push(i + inserted);~~
267		~~- out.push({type: 'QUOTE', value: "''"});~~
268		~~- inserted++;~~
269		~~- bolds.push(i + inserted);~~
270		~~- out.push({type: 'QUOTE', value: "'''"});~~
271		~~- break;~~
272		~~- default: // longer than 5, only use the last 5 ticks~~
273		~~- token.value = "'''''";~~
274		~~- var newvalue = token.value.substr(0, qlen - 5 );~~
275		~~- if (i > 0 && tokens[i-1].type === 'TEXT') {~~
276		~~- tokens[i-1].value += newvalue;~~
277		~~- } else {~~
278		~~- out.push({type: 'TEXT', value: newvalue});~~
279		~~- inserted++;~~
280		~~- }~~
281		~~- italics.push(i + inserted);~~
282		~~- out.push({type: 'QUOTE', value: "''"});~~
283		~~- inserted++;~~
284		~~- bolds.push(i + inserted);~~
285		~~- out.push({type: 'QUOTE', value: "'''"});~~
286		~~- break;~~
287		~~- }~~
288		~~- break;~~
289		-
290		~~- case 'NEWLINE':~~
291		~~- // balance out tokens, convert placeholders into tags~~
292		~~- if (italics.length % 2 && bolds.length % 2) {~~
293		~~- dp("balancing!");~~
294		~~- var firstsingleletterword = -1,~~
295		~~- firstmultiletterword = -1,~~
296		~~- firstspace = -1;~~
297		~~- for (var j = 0; j < bolds.length; j++) {~~
298		~~- var ticki = bolds[j];~~
299		~~- if (ticki > 0 && out[ticki - 1].type === 'TEXT') {~~
300		~~- var txt = out[ticki - 1],~~
301		~~- lastchar = txt.value[txt.value.length - 1],~~
302		~~- secondtolastchar = txt.value[txt.value.length - 2];~~
303		~~- dp('txt: ' + pp(txt));~~
304		~~- if (lastchar === ' ' && firstspace === -1) {~~
305		~~- firstspace = j;~~
306		~~- } else if (lastchar !== ' ') {~~
307		~~- if ( secondtolastchar === ' ' &&~~
308		~~- firstsingleletterword === -1)~~
309		~~- {~~
310		~~- firstsingleletterword = j;~~
311		~~- } else if ( firstmultiletterword == -1) {~~
312		~~- firstmultiletterword = j;~~
313		~~- }~~
314		~~- }~~
315		~~- }~~
316		~~- }~~
317		-
318		-
319		~~- // now see if we can convert a bold to an italic and~~
320		~~- // an apostrophe~~
321		~~- if (firstsingleletterword > -1) {~~
322		~~- convertBold(firstsingleletterword);~~
323		~~- } else if (firstmultiletterword > -1) {~~
324		~~- convertBold(firstmultiletterword);~~
325		~~- } else if (firstspace > -1) {~~
326		~~- convertBold(firstspace);~~
327		~~- }~~
328		~~- }~~
329		-
330		~~- quotesToTags(bolds, 'b');~~
331		~~- quotesToTags(italics, 'i');~~
332		~~- bolds = [];~~
333		~~- italics = [];~~
334		~~- out.push(token);~~
335		~~- break;~~
336		~~- default:~~
337		~~- out.push(token);~~
338		~~- }~~
339		~~- }~~
340		~~- return out;~~
341		~~- };~~
342		-
343		-
344	188	/* End static utilities */
345	189
346	190	/*
—	—	@@ -404,8 +248,7 @@
405	249
406	250	start
407	251	= e:toplevelblock* newline* {
408		~~- // XXX: move doQuotes out to general token stream transformer~~
409		~~- return doQuotes(flatten(e));~~
	252	+ return flatten(e);
410	253	}
411	254
412	255
—	—	@@ -499,7 +342,14 @@
500	343
501	344	// Start of line
502	345	sol = (newline / & { return pos === 0; } { return true; })
503		~~- cn:(c:comment n:newline? { return [c, {type: 'TEXT', value: n}] })* {~~
	346	+ cn:(c:comment n:newline? {
	347	+ if ( n !== '' ) {
	348	+ return [c, {type: 'TEXT', value: n}];
	349	+ } else {
	350	+ return [c];
	351	+ }
	352	+ }
	353	+ )* {
504	354	return [{type: 'NEWLINE'}].concat(cn);
505	355	}
506	356
—	—	@@ -548,7 +398,7 @@
549	399	block_lines
550	400	= s:sol
551	401	// eat an empty line before the block
552		~~- s2:(ss:space* so:sol { return [{type: 'TEXT', value: ss.join('')}].concat(so) })?~~
	402	+ s2:(os:optionalSpaceToken so:sol { return os.concat(so) })?
553	403	bl:block_line {
554	404	var s2_ = (s2 !== '') ? s2 : [];
555	405	return s.concat(s2_, bl);
—	—	@@ -605,7 +455,9 @@
606	456	for (var i = 0, l = c.length; i < l; i++) {
607	457	var ci = c[i];
608	458	if (typeof ci == 'string') {
609		~~- text.push(ci);~~
	459	+ if(ci !== '') {
	460	+ text.push(ci);
	461	+ }
610	462	} else {
611	463	if (text.length) {
612	464	out.push({ type: "TEXT", value: text.join('') });
—	—	@@ -630,7 +482,9 @@
631	483	for (var i = 0; i < c.length; i++) {
632	484	var ci = c[i]
633	485	if (typeof ci == 'string') {
634		~~- text.push(ci);~~
	486	+ if(ci !== '') {
	487	+ text.push(ci);
	488	+ }
635	489	} else {
636	490	if (text.length) {
637	491	out.push({type: 'TEXT', value: text.join('')});
—	—	@@ -900,7 +754,8 @@
901	755	* all not context free. */
902	756	quote = "''" x:"'"* {
903	757	return {
904		~~- type : 'QUOTE',~~
	758	+ type: 'TAG',
	759	+ name : 'QUOTE',
905	760	value: "''" + x.join('')
906	761	}
907	762	}

Status & tagging log

20:55, 12 December 2011 GWicke (talk | contribs) changed the status of r105926 [removed: new added: deferred]