Index: trunk/parsers/wikidom/lib/es/es.TextFlow.js |
— | — | @@ -5,86 +5,140 @@ |
6 | 6 | this.$ = $container; |
7 | 7 | } |
8 | 8 | |
9 | | -TextFlow.encodeHtml = function( text ) { |
| 9 | +TextFlow.prototype.htmlEncode = function( text, trim ) { |
| 10 | + if ( trim ) { |
| 11 | + // Trailing whitespace |
| 12 | + text = text.replace( /\s+$/, '' ); |
| 13 | + } |
10 | 14 | return text |
| 15 | + // Tags |
11 | 16 | .replace( /&/g, '&' ) |
12 | | - .replace( / /g, ' ' ) |
13 | 17 | .replace( /</g, '<' ) |
14 | 18 | .replace( />/g, '>' ) |
15 | | - .replace( /'/g, ''' ) |
16 | | - .replace( /"/g, '"' ) |
| 19 | + // Quotes - probably not needed |
| 20 | + //.replace( /'/g, ''' ) |
| 21 | + //.replace( /"/g, '"' ) |
| 22 | + // Whitespace |
| 23 | + .replace( / /g, ' ' ) |
17 | 24 | .replace( /\n/g, '<span class="editSurface-whitespace">\\n</span>' ) |
18 | 25 | .replace( /\t/g, '<span class="editSurface-whitespace">\\t</span>' ); |
19 | 26 | }; |
20 | 27 | |
| 28 | +/** |
| 29 | + * Renders text into a series of div elements, each a single line of wrapped text. |
| 30 | + * |
| 31 | + * TODO: Allow re-flowing from a given offset on to make re-flow faster when modifying the text |
| 32 | + * |
| 33 | + * @param text {String} Text to render |
| 34 | + */ |
21 | 35 | TextFlow.prototype.render = function( text ) { |
22 | | - //console.time( 'TextFlow.render' ); |
23 | 36 | |
24 | | - // Clear all lines -- FIXME: This should adaptively re-use/cleanup existing lines |
25 | | - this.$.empty(); |
26 | | - |
27 | 37 | // Measure the container width |
28 | 38 | var $ruler = $( '<div> </div>' ).appendTo( this.$ ); |
29 | 39 | var width = $ruler.innerWidth() |
30 | 40 | $ruler.remove(); |
31 | 41 | |
32 | | - // Build list of line break offsets |
| 42 | + /* |
| 43 | + * Word boundary scan |
| 44 | + * |
| 45 | + * To perform binary-search on words, rather than characters, we need to collect word boundary |
| 46 | + * offsets into an array. This list of offsets always starts with 0 and ends with the length of |
| 47 | + * the text, e.g. [0, ..., text.length]. The offset of the right side of the breaking character |
| 48 | + * is stored, so the gaps between stored offsets always include the breaking character at the |
| 49 | + * end. |
| 50 | + * |
| 51 | + * To avoid encoding the same words as HTML over and over while fitting text to lines, we also |
| 52 | + * build a list of HTML encoded strings for each gap between the offsets stored in the "words" |
| 53 | + * array. Slices of the "html" array can be joined, producing the encoded HTML of the words. In |
| 54 | + * the final pass, each line will get encoded 1 more time, to allow for whitespace trimming. |
| 55 | + */ |
33 | 56 | var words = [0], |
34 | | - boundary = /[ \.\,\;\:\-\t\r\n\f]/, |
35 | | - left = 0, |
36 | | - right = 0, |
37 | | - search = 0; |
38 | | - while ( ( search = text.substr( right ).search( boundary ) ) >= 0 ) { |
39 | | - right += search; |
40 | | - words.push( ++right ); |
| 57 | + html = [], |
| 58 | + boundary = /([ \.\,\;\:\-\t\r\n\f])/g, |
| 59 | + match, |
| 60 | + right, |
| 61 | + left = 0; |
| 62 | + while ( match = boundary.exec( text ) ) { |
| 63 | + // Include the boundary character in the range |
| 64 | + right = match.index + 1; |
| 65 | + // Store the boundary offset |
| 66 | + words.push( right ); |
| 67 | + // Store the word's encoded HTML |
| 68 | + html.push( this.htmlEncode( text.substring( left, right ) ) ); |
| 69 | + // Remember the previous match |
41 | 70 | left = right; |
42 | 71 | } |
43 | | - words.push( right ); |
44 | | - words.push( text.length ); |
| 72 | + // Ensure the words array ends in a boundary, which may automatically happen if the text ends |
| 73 | + // in a period, for instance, but may not in other cases |
| 74 | + if ( right !== text.length ) { |
| 75 | + words.push( text.length ); |
| 76 | + } |
45 | 77 | |
46 | | - // Create lines from text |
47 | | - var pos = 0, |
48 | | - index = 0, |
49 | | - metrics = []; |
50 | | - while ( pos < words.length ) { |
51 | | - // Create line |
52 | | - var $line = $( '<div class="editSurface-line"></div>' ) |
53 | | - .attr( 'line-index', index ) |
54 | | - .appendTo( this.$ ), |
55 | | - line = $line[0]; |
56 | | - |
57 | | - // Use binary search-like technique for efficiency |
58 | | - var l = pos, |
59 | | - r = words.length, |
60 | | - m; |
| 78 | + /* |
| 79 | + * Line wrapping |
| 80 | + * |
| 81 | + * Now that we have linear access to the offsets around non-breakable areas within the text, we |
| 82 | + * can perform a binary-search for the best fit of words within a line. |
| 83 | + * |
| 84 | + * TODO: It may be possible to improve the efficiency of this code by making a best guess and |
| 85 | + * working from there, rather than always starting with [i .. words.length], which results in |
| 86 | + * reducing the right position in all but the last line, and in most cases 2 or 3 times. |
| 87 | + */ |
| 88 | + var lineOffset = 0, |
| 89 | + lines = [], |
| 90 | + $lineRuler = $( '<div class="editSurface-line"></div>' ).appendTo( this.$ ), |
| 91 | + lineRuler = $lineRuler[0]; |
| 92 | + while ( lineOffset < words.length ) { |
| 93 | + var left = lineOffset, |
| 94 | + right = words.length, |
| 95 | + middle, |
| 96 | + clampedLeft; |
61 | 97 | do { |
62 | | - m = Math.ceil( ( l + r ) / 2 ); |
63 | | - line.innerHTML = TextFlow.encodeHtml( text.substring( words[pos], words[m] ) ); |
64 | | - if ( line.clientWidth > width ) { |
65 | | - // Text is too long |
66 | | - r = m - 1; |
| 98 | + // Place "middle" directly in the center of "left" and "right" |
| 99 | + middle = Math.ceil( ( left + right ) / 2 ); |
| 100 | + // Prepare the line for measurement using pre-encoded HTML |
| 101 | + lineRuler.innerHTML = html.slice( lineOffset, middle ).join( '' ); |
| 102 | + // Test for over/under using width of the rendered line |
| 103 | + if ( lineRuler.clientWidth > width ) { |
| 104 | + // Words after "middle" won't fit |
| 105 | + right = middle - 1; |
67 | 106 | } else { |
68 | | - l = m; |
| 107 | + // Words before "middle" will fit |
| 108 | + left = middle; |
69 | 109 | } |
70 | | - } while ( l < r ); |
71 | | - line.innerHTML = TextFlow.encodeHtml( text.substring( words[pos], words[l] ) ); |
| 110 | + } while ( left < right ); |
72 | 111 | |
73 | 112 | // TODO: Check if it fits yet, if not, do binary search within the really long word |
74 | 113 | |
75 | | - metrics.push({ |
76 | | - 'text': text.substring( words[pos], words[l] ), |
77 | | - 'offset': words[pos], |
78 | | - 'length': words[l] - words[pos], |
79 | | - 'width': line.clientWidth, |
80 | | - 'index': index |
| 114 | + // On the last line, l and r will both equal words.length, which is not a valid index |
| 115 | + clampedLeft = left === words.length ? left - 1 : left; |
| 116 | + |
| 117 | + // Collect line information |
| 118 | + lines.push({ |
| 119 | + 'text': text.substring( words[lineOffset], words[clampedLeft] ), |
| 120 | + 'start': words[lineOffset], |
| 121 | + 'end': words[clampedLeft], |
| 122 | + 'width': lineRuler.clientWidth |
81 | 123 | }); |
82 | 124 | |
83 | 125 | // Step forward |
84 | | - index++; |
85 | | - pos = l; |
| 126 | + lineOffset = left; |
86 | 127 | } |
| 128 | + // Cleanup - technically this will get removed by the empty() call below, but if that changes |
| 129 | + // we don't want to accidentally introduce any bugs, so explicit removal is preferred |
| 130 | + $lineRuler.remove(); |
87 | 131 | |
88 | | - //console.timeEnd( 'TextFlow.render' ); |
| 132 | + // TODO: It may be more efficient to re-use existing lines |
89 | 133 | |
90 | | - return metrics; |
| 134 | + // Make way for the new lines |
| 135 | + this.$.empty(); |
| 136 | + for ( var i = 0; i < lines.length; i++ ) { |
| 137 | + this.$.append( |
| 138 | + $( '<div class="editSurface-line"></div>' ) |
| 139 | + .attr( 'line-index', i ) |
| 140 | + .html( this.htmlEncode( text.substring( lines[i].start, lines[i].end ), true ) ) |
| 141 | + ); |
| 142 | + } |
| 143 | + |
| 144 | + return lines; |
91 | 145 | }; |
Index: trunk/parsers/wikidom/demos/es/index.html |
— | — | @@ -31,7 +31,7 @@ |
32 | 32 | { 'text': "Word wrap following hyphens is sometimes not desired, and can be avoided by using a so-called non-breaking hyphen instead of a regular hyphen. On the other hand, when using word processors, invisible hyphens, called soft hyphens, can also be inserted inside words so that word wrap can occur following the soft hyphens." }, |
33 | 33 | { 'text': "Sometimes, word wrap is not desirable between words. In such cases, word wrap can usually be avoided by using a hard space or non-breaking space between the words, instead of regular spaces." }, |
34 | 34 | //{ 'text': "OccasionallyThereAreWordsThatAreSoLongTheyExceedTheWidthOfTheLineAndEndUpWrappingBetweenMultipleLines." }, |
35 | | - { 'text': "Text might have \ttabs\t in it too." } |
| 35 | + { 'text': "Text might have \ttabs\t in it too. Not all text will end in a line breaking character" } |
36 | 36 | ]) |
37 | 37 | ]); |
38 | 38 | var surface = new Surface( $('#es'), doc ); |