r90596 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r90595‎ \| r90596 \| r90597 >
Date:	19:31, 22 June 2011
Author:	tparscal
Status:	deferred
Tags:
Comment:	Optimization, cleanup and documentation.
Modified paths:	/trunk/parsers/wikidom/demos/es/index.html (modified) (history) /trunk/parsers/wikidom/lib/es/es.TextFlow.js (modified) (history)

Diff [purge]

Index: trunk/parsers/wikidom/lib/es/es.TextFlow.js
—	—	@@ -5,86 +5,140 @@
6	6	this.$ = $container;
7	7	}
8	8
9		~~-TextFlow.encodeHtml = function( text ) {~~
	9	+TextFlow.prototype.htmlEncode = function( text, trim ) {
	10	+ if ( trim ) {
	11	+ // Trailing whitespace
	12	+ text = text.replace( /\s+$/, '' );
	13	+ }
10	14	return text
	15	+ // Tags
11	16	.replace( /&/g, '&' )
12		~~- .replace( / /g, ' ' )~~
13	17	.replace( /</g, '<' )
14	18	.replace( />/g, '>' )
15		~~- .replace( /'/g, ''' )~~
16		~~- .replace( /"/g, '"' )~~
	19	+ // Quotes - probably not needed
	20	+ //.replace( /'/g, ''' )
	21	+ //.replace( /"/g, '"' )
	22	+ // Whitespace
	23	+ .replace( / /g, ' ' )
17	24	.replace( /\n/g, '<span class="editSurface-whitespace">\\n</span>' )
18	25	.replace( /\t/g, '<span class="editSurface-whitespace">\\t</span>' );
19	26	};
20	27
	28	+/**
	29	+ * Renders text into a series of div elements, each a single line of wrapped text.
	30	+ *
	31	+ * TODO: Allow re-flowing from a given offset on to make re-flow faster when modifying the text
	32	+ *
	33	+ * @param text {String} Text to render
	34	+ */
21	35	TextFlow.prototype.render = function( text ) {
22		~~- //console.time( 'TextFlow.render' );~~
23	36
24		~~- // Clear all lines -- FIXME: This should adaptively re-use/cleanup existing lines~~
25		~~- this.$.empty();~~
26		-
27	37	// Measure the container width
28	38	var $ruler = $( '<div> </div>' ).appendTo( this.$ );
29	39	var width = $ruler.innerWidth()
30	40	$ruler.remove();
31	41
32		~~- // Build list of line break offsets~~
	42	+ /*
	43	+ * Word boundary scan
	44	+ *
	45	+ * To perform binary-search on words, rather than characters, we need to collect word boundary
	46	+ * offsets into an array. This list of offsets always starts with 0 and ends with the length of
	47	+ * the text, e.g. [0, ..., text.length]. The offset of the right side of the breaking character
	48	+ * is stored, so the gaps between stored offsets always include the breaking character at the
	49	+ * end.
	50	+ *
	51	+ * To avoid encoding the same words as HTML over and over while fitting text to lines, we also
	52	+ * build a list of HTML encoded strings for each gap between the offsets stored in the "words"
	53	+ * array. Slices of the "html" array can be joined, producing the encoded HTML of the words. In
	54	+ * the final pass, each line will get encoded 1 more time, to allow for whitespace trimming.
	55	+ */
33	56	var words = [0],
34		~~- boundary = /[ \.\,\;\:\-\t\r\n\f]/,~~
35		~~- left = 0,~~
36		~~- right = 0,~~
37		~~- search = 0;~~
38		~~- while ( ( search = text.substr( right ).search( boundary ) ) >= 0 ) {~~
39		~~- right += search;~~
40		~~- words.push( ++right );~~
	57	+ html = [],
	58	+ boundary = /([ \.\,\;\:\-\t\r\n\f])/g,
	59	+ match,
	60	+ right,
	61	+ left = 0;
	62	+ while ( match = boundary.exec( text ) ) {
	63	+ // Include the boundary character in the range
	64	+ right = match.index + 1;
	65	+ // Store the boundary offset
	66	+ words.push( right );
	67	+ // Store the word's encoded HTML
	68	+ html.push( this.htmlEncode( text.substring( left, right ) ) );
	69	+ // Remember the previous match
41	70	left = right;
42	71	}
43		~~- words.push( right );~~
44		~~- words.push( text.length );~~
	72	+ // Ensure the words array ends in a boundary, which may automatically happen if the text ends
	73	+ // in a period, for instance, but may not in other cases
	74	+ if ( right !== text.length ) {
	75	+ words.push( text.length );
	76	+ }
45	77
46		~~- // Create lines from text~~
47		~~- var pos = 0,~~
48		~~- index = 0,~~
49		~~- metrics = [];~~
50		~~- while ( pos < words.length ) {~~
51		~~- // Create line~~
52		~~- var $line = $( '<div class="editSurface-line"></div>' )~~
53		~~- .attr( 'line-index', index )~~
54		~~- .appendTo( this.$ ),~~
55		~~- line = $line[0];~~
56		-
57		~~- // Use binary search-like technique for efficiency~~
58		~~- var l = pos,~~
59		~~- r = words.length,~~
60		~~- m;~~
	78	+ /*
	79	+ * Line wrapping
	80	+ *
	81	+ * Now that we have linear access to the offsets around non-breakable areas within the text, we
	82	+ * can perform a binary-search for the best fit of words within a line.
	83	+ *
	84	+ * TODO: It may be possible to improve the efficiency of this code by making a best guess and
	85	+ * working from there, rather than always starting with [i .. words.length], which results in
	86	+ * reducing the right position in all but the last line, and in most cases 2 or 3 times.
	87	+ */
	88	+ var lineOffset = 0,
	89	+ lines = [],
	90	+ $lineRuler = $( '<div class="editSurface-line"></div>' ).appendTo( this.$ ),
	91	+ lineRuler = $lineRuler[0];
	92	+ while ( lineOffset < words.length ) {
	93	+ var left = lineOffset,
	94	+ right = words.length,
	95	+ middle,
	96	+ clampedLeft;
61	97	do {
62		~~- m = Math.ceil( ( l + r ) / 2 );~~
63		~~- line.innerHTML = TextFlow.encodeHtml( text.substring( words[pos], words[m] ) );~~
64		~~- if ( line.clientWidth > width ) {~~
65		~~- // Text is too long~~
66		~~- r = m - 1;~~
	98	+ // Place "middle" directly in the center of "left" and "right"
	99	+ middle = Math.ceil( ( left + right ) / 2 );
	100	+ // Prepare the line for measurement using pre-encoded HTML
	101	+ lineRuler.innerHTML = html.slice( lineOffset, middle ).join( '' );
	102	+ // Test for over/under using width of the rendered line
	103	+ if ( lineRuler.clientWidth > width ) {
	104	+ // Words after "middle" won't fit
	105	+ right = middle - 1;
67	106	} else {
68		~~- l = m;~~
	107	+ // Words before "middle" will fit
	108	+ left = middle;
69	109	}
70		~~- } while ( l < r );~~
71		~~- line.innerHTML = TextFlow.encodeHtml( text.substring( words[pos], words[l] ) );~~
	110	+ } while ( left < right );
72	111
73	112	// TODO: Check if it fits yet, if not, do binary search within the really long word
74	113
75		~~- metrics.push({~~
76		~~- 'text': text.substring( words[pos], words[l] ),~~
77		~~- 'offset': words[pos],~~
78		~~- 'length': words[l] - words[pos],~~
79		~~- 'width': line.clientWidth,~~
80		~~- 'index': index~~
	114	+ // On the last line, l and r will both equal words.length, which is not a valid index
	115	+ clampedLeft = left === words.length ? left - 1 : left;
	116	+
	117	+ // Collect line information
	118	+ lines.push({
	119	+ 'text': text.substring( words[lineOffset], words[clampedLeft] ),
	120	+ 'start': words[lineOffset],
	121	+ 'end': words[clampedLeft],
	122	+ 'width': lineRuler.clientWidth
81	123	});
82	124
83	125	// Step forward
84		~~- index++;~~
85		~~- pos = l;~~
	126	+ lineOffset = left;
86	127	}
	128	+ // Cleanup - technically this will get removed by the empty() call below, but if that changes
	129	+ // we don't want to accidentally introduce any bugs, so explicit removal is preferred
	130	+ $lineRuler.remove();
87	131
88		~~- //console.timeEnd( 'TextFlow.render' );~~
	132	+ // TODO: It may be more efficient to re-use existing lines
89	133
90		~~- return metrics;~~
	134	+ // Make way for the new lines
	135	+ this.$.empty();
	136	+ for ( var i = 0; i < lines.length; i++ ) {
	137	+ this.$.append(
	138	+ $( '<div class="editSurface-line"></div>' )
	139	+ .attr( 'line-index', i )
	140	+ .html( this.htmlEncode( text.substring( lines[i].start, lines[i].end ), true ) )
	141	+ );
	142	+ }
	143	+
	144	+ return lines;
91	145	};
Index: trunk/parsers/wikidom/demos/es/index.html
—	—	@@ -31,7 +31,7 @@
32	32	{ 'text': "Word wrap following hyphens is sometimes not desired, and can be avoided by using a so-called non-breaking hyphen instead of a regular hyphen. On the other hand, when using word processors, invisible hyphens, called soft hyphens, can also be inserted inside words so that word wrap can occur following the soft hyphens." },
33	33	{ 'text': "Sometimes, word wrap is not desirable between words. In such cases, word wrap can usually be avoided by using a hard space or non-breaking space between the words, instead of regular spaces." },
34	34	//{ 'text': "OccasionallyThereAreWordsThatAreSoLongTheyExceedTheWidthOfTheLineAndEndUpWrappingBetweenMultipleLines." },
35		~~- { 'text': "Text might have \ttabs\t in it too." }~~
	35	+ { 'text': "Text might have \ttabs\t in it too. Not all text will end in a line breaking character" }
36	36	])
37	37	]);
38	38	var surface = new Surface( $('#es'), doc );

Status & tagging log

22:00, 28 June 2011 Reedy (talk | contribs) changed the status of r90596 [removed: new added: deferred]