r90596 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r90595‎ | r90596 | r90597 >
Date:19:31, 22 June 2011
Author:tparscal
Status:deferred
Tags:
Comment:
Optimization, cleanup and documentation.
Modified paths:
  • /trunk/parsers/wikidom/demos/es/index.html (modified) (history)
  • /trunk/parsers/wikidom/lib/es/es.TextFlow.js (modified) (history)

Diff [purge]

Index: trunk/parsers/wikidom/lib/es/es.TextFlow.js
@@ -5,86 +5,140 @@
66 this.$ = $container;
77 }
88
9 -TextFlow.encodeHtml = function( text ) {
 9+TextFlow.prototype.htmlEncode = function( text, trim ) {
 10+ if ( trim ) {
 11+ // Trailing whitespace
 12+ text = text.replace( /\s+$/, '' );
 13+ }
1014 return text
 15+ // Tags
1116 .replace( /&/g, '&' )
12 - .replace( / /g, ' ' )
1317 .replace( /</g, '&lt;' )
1418 .replace( />/g, '&gt;' )
15 - .replace( /'/g, '&apos;' )
16 - .replace( /"/g, '&quot;' )
 19+ // Quotes - probably not needed
 20+ //.replace( /'/g, '&#039;' )
 21+ //.replace( /"/g, '&quot;' )
 22+ // Whitespace
 23+ .replace( / /g, '&nbsp;' )
1724 .replace( /\n/g, '<span class="editSurface-whitespace">\\n</span>' )
1825 .replace( /\t/g, '<span class="editSurface-whitespace">\\t</span>' );
1926 };
2027
 28+/**
 29+ * Renders text into a series of div elements, each a single line of wrapped text.
 30+ *
 31+ * TODO: Allow re-flowing from a given offset on to make re-flow faster when modifying the text
 32+ *
 33+ * @param text {String} Text to render
 34+ */
2135 TextFlow.prototype.render = function( text ) {
22 - //console.time( 'TextFlow.render' );
2336
24 - // Clear all lines -- FIXME: This should adaptively re-use/cleanup existing lines
25 - this.$.empty();
26 -
2737 // Measure the container width
2838 var $ruler = $( '<div>&nbsp;</div>' ).appendTo( this.$ );
2939 var width = $ruler.innerWidth()
3040 $ruler.remove();
3141
32 - // Build list of line break offsets
 42+ /*
 43+ * Word boundary scan
 44+ *
 45+ * To perform binary-search on words, rather than characters, we need to collect word boundary
 46+ * offsets into an array. This list of offsets always starts with 0 and ends with the length of
 47+ * the text, e.g. [0, ..., text.length]. The offset of the right side of the breaking character
 48+ * is stored, so the gaps between stored offsets always include the breaking character at the
 49+ * end.
 50+ *
 51+ * To avoid encoding the same words as HTML over and over while fitting text to lines, we also
 52+ * build a list of HTML encoded strings for each gap between the offsets stored in the "words"
 53+ * array. Slices of the "html" array can be joined, producing the encoded HTML of the words. In
 54+ * the final pass, each line will get encoded 1 more time, to allow for whitespace trimming.
 55+ */
3356 var words = [0],
34 - boundary = /[ \.\,\;\:\-\t\r\n\f]/,
35 - left = 0,
36 - right = 0,
37 - search = 0;
38 - while ( ( search = text.substr( right ).search( boundary ) ) >= 0 ) {
39 - right += search;
40 - words.push( ++right );
 57+ html = [],
 58+ boundary = /([ \.\,\;\:\-\t\r\n\f])/g,
 59+ match,
 60+ right,
 61+ left = 0;
 62+ while ( match = boundary.exec( text ) ) {
 63+ // Include the boundary character in the range
 64+ right = match.index + 1;
 65+ // Store the boundary offset
 66+ words.push( right );
 67+ // Store the word's encoded HTML
 68+ html.push( this.htmlEncode( text.substring( left, right ) ) );
 69+ // Remember the previous match
4170 left = right;
4271 }
43 - words.push( right );
44 - words.push( text.length );
 72+ // Ensure the words array ends in a boundary, which may automatically happen if the text ends
 73+ // in a period, for instance, but may not in other cases
 74+ if ( right !== text.length ) {
 75+ words.push( text.length );
 76+ }
4577
46 - // Create lines from text
47 - var pos = 0,
48 - index = 0,
49 - metrics = [];
50 - while ( pos < words.length ) {
51 - // Create line
52 - var $line = $( '<div class="editSurface-line"></div>' )
53 - .attr( 'line-index', index )
54 - .appendTo( this.$ ),
55 - line = $line[0];
56 -
57 - // Use binary search-like technique for efficiency
58 - var l = pos,
59 - r = words.length,
60 - m;
 78+ /*
 79+ * Line wrapping
 80+ *
 81+ * Now that we have linear access to the offsets around non-breakable areas within the text, we
 82+ * can perform a binary-search for the best fit of words within a line.
 83+ *
 84+ * TODO: It may be possible to improve the efficiency of this code by making a best guess and
 85+ * working from there, rather than always starting with [i .. words.length], which results in
 86+ * reducing the right position in all but the last line, and in most cases 2 or 3 times.
 87+ */
 88+ var lineOffset = 0,
 89+ lines = [],
 90+ $lineRuler = $( '<div class="editSurface-line"></div>' ).appendTo( this.$ ),
 91+ lineRuler = $lineRuler[0];
 92+ while ( lineOffset < words.length ) {
 93+ var left = lineOffset,
 94+ right = words.length,
 95+ middle,
 96+ clampedLeft;
6197 do {
62 - m = Math.ceil( ( l + r ) / 2 );
63 - line.innerHTML = TextFlow.encodeHtml( text.substring( words[pos], words[m] ) );
64 - if ( line.clientWidth > width ) {
65 - // Text is too long
66 - r = m - 1;
 98+ // Place "middle" directly in the center of "left" and "right"
 99+ middle = Math.ceil( ( left + right ) / 2 );
 100+ // Prepare the line for measurement using pre-encoded HTML
 101+ lineRuler.innerHTML = html.slice( lineOffset, middle ).join( '' );
 102+ // Test for over/under using width of the rendered line
 103+ if ( lineRuler.clientWidth > width ) {
 104+ // Words after "middle" won't fit
 105+ right = middle - 1;
67106 } else {
68 - l = m;
 107+ // Words before "middle" will fit
 108+ left = middle;
69109 }
70 - } while ( l < r );
71 - line.innerHTML = TextFlow.encodeHtml( text.substring( words[pos], words[l] ) );
 110+ } while ( left < right );
72111
73112 // TODO: Check if it fits yet, if not, do binary search within the really long word
74113
75 - metrics.push({
76 - 'text': text.substring( words[pos], words[l] ),
77 - 'offset': words[pos],
78 - 'length': words[l] - words[pos],
79 - 'width': line.clientWidth,
80 - 'index': index
 114+ // On the last line, l and r will both equal words.length, which is not a valid index
 115+ clampedLeft = left === words.length ? left - 1 : left;
 116+
 117+ // Collect line information
 118+ lines.push({
 119+ 'text': text.substring( words[lineOffset], words[clampedLeft] ),
 120+ 'start': words[lineOffset],
 121+ 'end': words[clampedLeft],
 122+ 'width': lineRuler.clientWidth
81123 });
82124
83125 // Step forward
84 - index++;
85 - pos = l;
 126+ lineOffset = left;
86127 }
 128+ // Cleanup - technically this will get removed by the empty() call below, but if that changes
 129+ // we don't want to accidentally introduce any bugs, so explicit removal is preferred
 130+ $lineRuler.remove();
87131
88 - //console.timeEnd( 'TextFlow.render' );
 132+ // TODO: It may be more efficient to re-use existing lines
89133
90 - return metrics;
 134+ // Make way for the new lines
 135+ this.$.empty();
 136+ for ( var i = 0; i < lines.length; i++ ) {
 137+ this.$.append(
 138+ $( '<div class="editSurface-line"></div>' )
 139+ .attr( 'line-index', i )
 140+ .html( this.htmlEncode( text.substring( lines[i].start, lines[i].end ), true ) )
 141+ );
 142+ }
 143+
 144+ return lines;
91145 };
Index: trunk/parsers/wikidom/demos/es/index.html
@@ -31,7 +31,7 @@
3232 { 'text': "Word wrap following hyphens is sometimes not desired, and can be avoided by using a so-called non-breaking hyphen instead of a regular hyphen. On the other hand, when using word processors, invisible hyphens, called soft hyphens, can also be inserted inside words so that word wrap can occur following the soft hyphens." },
3333 { 'text': "Sometimes, word wrap is not desirable between words. In such cases, word wrap can usually be avoided by using a hard space or non-breaking space between the words, instead of regular spaces." },
3434 //{ 'text': "OccasionallyThereAreWordsThatAreSoLongTheyExceedTheWidthOfTheLineAndEndUpWrappingBetweenMultipleLines." },
35 - { 'text': "Text might have \ttabs\t in it too." }
 35+ { 'text': "Text might have \ttabs\t in it too. Not all text will end in a line breaking character" }
3636 ])
3737 ]);
3838 var surface = new Surface( $('#es'), doc );

Status & tagging log