r101439 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r101438‎ | r101439 | r101440 >
Date:23:49, 31 October 2011
Author:catrope
Status:deferred
Tags:
Comment:
Rewrite selectNodes() from scratch so it actually works. Comes with a shitload of tests but needs even more
Modified paths:
  • /trunk/parsers/wikidom/lib/hype/bases/es.DocumentNode.js (modified) (history)
  • /trunk/parsers/wikidom/tests/hype/es.DocumentNode.test.js (modified) (history)

Diff [purge]

Index: trunk/parsers/wikidom/tests/hype/es.DocumentNode.test.js
@@ -6,6 +6,10 @@
77 return es.extendObject( new es.DocumentNode( items ), this );
88 }
99
 10+DocumentNodeStub.prototype.getContentLength = function() {
 11+ return this.size;
 12+};
 13+
1014 DocumentNodeStub.prototype.getElementLength = function() {
1115 // Mimic document data which has an opening and closing around the content
1216 return this.size + 2;
@@ -103,96 +107,182 @@
104108 'getOffsetFromNode finds the right offset or returns -1 when node is not found'
105109 );
106110 }
 111+} );
107112
 113+test( 'es.DocumentNode.selectNodes', function() {
 114+
108115 // selectNodes tests
109116
 117+ // <f> a b c d e f g h </f> <g> a b c d e f g h </g> <h> a b c d e f g h </h>
 118+ //^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^
 119+ //0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0
 120+ // 0 1 2 3 4 5 6 7 8 0 1 2 3 4 5 6 7 8 0 1 2 3 4 5 6 7 8
110121 var f = new DocumentNodeStub( [], 'f', 8 ),
111122 g = new DocumentNodeStub( [], 'g', 8 ),
112123 h = new DocumentNodeStub( [], 'h', 8 ),
113124 root2 = new DocumentNodeStub( [f, g, h], 'root2', 30 );
 125+ // FIXME: QUnit thinks f == g because both are empty arrays. Rawr.
 126+ // TODO make sure there is a test case for everything that is special-cased in the code
 127+ // TODO also nest with a more complicated nested structure, like the one from es.DocumentModel.test.js
 128+
 129+ // Possible positions are:
 130+ // * before beginning
 131+ // * at beginning
 132+ // * middle
 133+ // * at end
 134+ // * past end
114135 var selectNodesTests = [
115 - /*
 136+ // Complete set of combinations within the same node:
116137 {
117 - 'input': new es.Range( 0, 5 ),
118 - 'output': [{ 'node': f, 'range': new es.Range( 1, 4 ) }]
 138+ 'node': root2,
 139+ 'input': new es.Range( 0, 0 ),
 140+ 'output': [],
 141+ 'desc': 'Zero-length range before the beginning of a node'
119142 },
120143 {
121 - 'input': new es.Range( 11, 16 ),
122 - 'output': [{ 'node': g, 'range': new es.Range( 1, 4 ) }]
 144+ 'node': root2,
 145+ 'input': new es.Range( 0, 1 ),
 146+ 'output': [{ 'node': f, 'range': new es.Range( 0, 0 ) }],
 147+ 'desc': 'Range starting before the beginning of a node and ending at the beginning'
123148 },
124149 {
125 - 'input': new es.Range( 22, 27 ),
126 - 'output': [{ 'node': h, 'range': new es.Range( 1, 4 ) }]
 150+ 'node': root2,
 151+ 'input': new es.Range( 10, 15 ),
 152+ 'output': [{ 'node': g, 'range': new es.Range( 0, 4 ) }],
 153+ 'desc': 'Range starting before the beginning of a node and ending in the middle'
127154 },
128155 {
129 - 'input': new es.Range( 0, 33 ),
130 - 'output': [
131 - { 'node': f, 'range': new es.Range( 0, 8 ) },
132 - { 'node': g, 'range': new es.Range( 0, 8 ) },
133 - { 'node': h, 'range': new es.Range( 0, 8 ) }
134 - ]
 156+ 'node': root2,
 157+ 'input': new es.Range( 20, 29 ),
 158+ 'output': [{ 'node': h, 'range': new es.Range( 0, 8 ) }],
 159+ 'desc': 'Range starting before the beginning of a node and ending at the end'
135160 },
136161 {
 162+ 'node': root2,
 163+ 'input': new es.Range( 0, 10 ),
 164+ 'output': [{ 'node': f } ],
 165+ 'desc': 'Range starting before the beginning of a node and ending past the end'
 166+ },
 167+ {
 168+ 'node': root2,
 169+ 'input': new es.Range( 11, 11 ),
 170+ 'output': [{ 'node': g, 'range': new es.Range( 0, 0 ) }],
 171+ 'desc': 'Zero-length range at the beginning of a node'
 172+ },
 173+ {
 174+ 'node': root2,
 175+ 'input': new es.Range( 21, 26 ),
 176+ 'output': [{ 'node': h, 'range': new es.Range( 0, 5 ) }],
 177+ 'desc': 'Range starting at the beginning of a node and ending in the middle'
 178+ },
 179+ {
 180+ 'node': root2,
 181+ 'input': new es.Range( 1, 9 ),
 182+ 'output': [{ 'node': f, 'range': new es.Range( 0, 8 ) }],
 183+ 'desc': 'Range starting at the beginning of a node and ending at the end'
 184+ },
 185+ {
 186+ 'node': root2,
 187+ 'input': new es.Range( 11, 20 ),
 188+ 'output': [{ 'node': g, 'range': new es.Range( 0, 8 ) }],
 189+ 'desc': 'Range starting at the beginning of a node and ending past the end'
 190+ },
 191+ {
 192+ 'node': root2,
 193+ 'input': new es.Range( 22, 22 ),
 194+ 'output': [{ 'node': h, 'range': new es.Range( 1, 1 ) }],
 195+ 'desc': 'Zero-length range in the middle of a node'
 196+ },
 197+ {
 198+ 'node': root2,
 199+ 'input': new es.Range( 2, 7 ),
 200+ 'output': [{ 'node': f, 'range': new es.Range( 1, 6 ) }],
 201+ 'desc': 'Range starting and ending in the middle of the same node'
 202+ },
 203+ {
 204+ 'node': root2,
 205+ 'input': new es.Range( 13, 19 ),
 206+ 'output': [{ 'node': g, 'range': new es.Range( 2, 8 ) }],
 207+ 'desc': 'Range starting in the middle of a node and ending at the end'
 208+ },
 209+ {
 210+ 'node': root2,
 211+ 'input': new es.Range( 24, 30 ),
 212+ 'output': [{ 'node': h, 'range': new es.Range( 3, 8 ) }],
 213+ 'desc': 'Range starting in the middle of a node and ending past the end'
 214+ },
 215+ {
 216+ 'node': root2,
 217+ 'input': new es.Range( 9, 9 ),
 218+ 'output': [{ 'node': f, 'range': new es.Range( 8, 8 ) }],
 219+ 'desc': 'Zero-length range at the end of a node'
 220+ },
 221+ {
 222+ 'node': root2,
 223+ 'input': new es.Range( 19, 20 ),
 224+ 'output': [{ 'node': g, 'range': new es.Range( 8, 8 ) }],
 225+ 'desc': 'Range starting at the end of a node and ending past the end'
 226+ },
 227+ {
 228+ 'node': root2,
 229+ 'input': new es.Range( 30, 30 ),
 230+ 'output': [],
 231+ 'desc': 'Zero-length range past the end of a node'
 232+ },
 233+ // TODO add a complete set of combinations for cross-node ranges
 234+ {
 235+ 'node': root2,
137236 'input': new es.Range( 5, 25 ),
138237 'output': [
139238 { 'node': f, 'range': new es.Range( 4, 8 ) },
140 - { 'node': g, 'range': new es.Range( 0, 8 ) },
 239+ { 'node': g },
141240 { 'node': h, 'range': new es.Range( 0, 4 ) }
142 - ]
 241+ ],
 242+ 'desc': 'Range starting in the middle of the first node and ending in the middle of the third'
143243 },
144244 {
145 - 'input': new es.Range( 5, 9 ),
146 - 'output': [{ 'node': f, 'range': new es.Range( 5, 9 ) }]
147 - },
148 - {
149 - 'input': new es.Range( 5, 10 ),
150 - 'output': [{ 'node': f, 'range': new es.Range( 5, 10 ) }]
151 - },
152 - {
 245+ 'node': root2,
153246 'input': new es.Range( 5, 11 ),
154 - 'output': [{ 'node': f, 'range': new es.Range( 5, 10 ) }]
 247+ 'output': [
 248+ { 'node': f, 'range': new es.Range( 4, 8 ) },
 249+ { 'node': g, 'range': new es.Range( 0, 0 ) }
 250+ ],
 251+ 'desc': 'Range starting in the middle of a node and ending at the beginning of the second'
155252 },
156253 {
 254+ 'node': root2,
157255 'input': new es.Range( 5, 12 ),
158256 'output': [
159 - { 'node': f, 'range': new es.Range( 5, 10 ) },
 257+ { 'node': f, 'range': new es.Range( 4, 8 ) },
160258 { 'node': g, 'range': new es.Range( 0, 1 ) }
161 - ]
 259+ ],
 260+ 'desc': 'Range starting in the middle of a node and ending after the first character of the second'
162261 },
163262 {
 263+ 'node': root2,
164264 'input': new es.Range( 8, 16 ),
165265 'output': [
166 - { 'node': f, 'range': new es.Range( 8, 10 ) },
 266+ { 'node': f, 'range': new es.Range( 7, 8 ) },
167267 { 'node': g, 'range': new es.Range( 0, 5 ) }
168 - ]
 268+ ],
 269+ 'desc': 'Range starting before the last character of a node and ending in the middle of the next node'
169270 },
170271 {
 272+ 'node': root2,
171273 'input': new es.Range( 9, 16 ),
172274 'output': [
173 - { 'node': f, 'range': new es.Range( 9, 10 ) },
 275+ { 'node': f, 'range': new es.Range( 8, 8 ) },
174276 { 'node': g, 'range': new es.Range( 0, 5 ) }
175 - ]
 277+ ],
 278+ 'desc': 'Range starting at the end of a node and ending in the middle of the next node'
176279 },
177 - {
178 - 'input': new es.Range( 10, 16 ),
179 - 'output': [{ 'node': g, 'range': new es.Range( 0, 5 ) }]
180 - },
181 - {
182 - 'input': new es.Range( 11, 16 ),
183 - 'output': [{ 'node': g, 'range': new es.Range( 0, 5 ) }]
184 - },
185 - {
186 - 'input': new es.Range( 12, 16 ),
187 - 'output': [{ 'node': g, 'range': new es.Range( 1, 5 ) }]
188 - }
189 - */
190280 ];
191281
192 - for ( i = 0; i < selectNodesTests.length; i++ ) {
 282+ for ( var i = 0; i < selectNodesTests.length; i++ ) {
193283 deepEqual(
194284 root2.selectNodes( selectNodesTests[i].input ),
195285 selectNodesTests[i].output,
196 - 'selectNodes returns the correct items and ranges.'
 286+ selectNodesTests[i].desc
197287 );
198288 }
199289 } );
Index: trunk/parsers/wikidom/lib/hype/bases/es.DocumentNode.js
@@ -106,36 +106,87 @@
107107 * covered by the range and the range within the node that is covered
108108 */
109109 es.DocumentNode.prototype.selectNodes = function( range ) {
 110+ var nodes = [], i, left, right, start, end, startInside, endInside;
110111 range.normalize();
111 - var nodes = [];
112 - for ( var i = 0, length = this.length, left = 0, right; i < length; i++ ) {
113 - right = left + this[i].getElementLength();
114 - if ( range.start >= left && range.start < right ) {
115 - if ( range.end < right ) {
116 - nodes.push( {
117 - 'node': this[i],
118 - 'range': new es.Range( range.start - left, range.end - left )
119 - } );
120 - break;
121 - } else {
122 - nodes.push( {
123 - 'node': this[i],
124 - 'range': new es.Range( range.start - left, right - left - 1 )
125 - } );
126 - }
127 - } else if ( range.end >= left && range.end < right ) {
128 - nodes.push( {
129 - 'node': this[i],
130 - 'range': new es.Range( 0, range.end - left )
131 - } );
132 - break;
133 - } else if ( left >= range.start && right <= range.end ) {
134 - nodes.push( {
135 - 'node': this[i],
136 - 'range': new es.Range( 0, right - left - 1 )
137 - } );
 112+ start = range.start;
 113+ end = range.end;
 114+
 115+ if ( start < 0 ) {
 116+ throw 'The start offset of the range is negative';
 117+ }
 118+
 119+
 120+ if ( this.length == 0 ) {
 121+ // Special case: this node doesn't have any children
 122+ // The return value is simply the range itself, if it is not out of bounds
 123+ if ( end > this.getContentLength() ) {
 124+ throw 'The end offset of the range is past the end of the node';
138125 }
139 - left = right;
 126+ return [ { 'node': this, 'range': new es.Range( start, end ) } ];
140127 }
 128+
 129+ // This node has children, loop over them
 130+ left = 1; // First offset inside the first child. Offset 0 is before the first child
 131+ for ( i = 0; i < this.length; i++ ) {
 132+ // left <= any offset inside this[i] <= right
 133+ right = left + this[i].getContentLength();
 134+
 135+ if ( start == end && ( start == left - 1 || start == right + 1 ) ) {
 136+ // Empty range outside of any node
 137+ return [];
 138+ }
 139+ if ( start == left - 1 && end == right + 1 ) {
 140+ // The range covers the entire node, including its opening and closing elements
 141+ return [ { 'node': this[i] } ];
 142+ }
 143+ if ( start == left - 1 ) {
 144+ // start is between this[i-1] and this[i], move it to left for convenience
 145+ // We don't need to check for start < end here because we already have start != end and start <= end
 146+ start = left;
 147+ }
 148+ if ( end == right + 1 ) {
 149+ // end is between this[i] and this[i+1], move it to right for convenience
 150+ // We don't need to check for start < end here because we already have start != end and start <= end
 151+ end = right;
 152+ }
 153+
 154+ startInside = start >= left && start <= right; // is the start inside this[i]?
 155+ endInside = end >= left && end <= right; // is the end inside this[i]?
 156+
 157+ if ( startInside && endInside ) {
 158+ // The range is entirely inside this[i]
 159+ // Recurse into this[i]
 160+ // Since the start and end are both inside this[i], we know for sure that we're done, so return
 161+ return this[i].selectNodes( new es.Range( start - left, end - left ) );
 162+ } else if ( startInside ) {
 163+ // The start is inside this[i] but the end isn't
 164+ // Add a range from the start of the range to the end of this[i]
 165+ nodes.push( { 'node': this[i], 'range': new es.Range( start - left, right - left ) } );
 166+ } else if ( endInside ) {
 167+ // The end is inside this[i] but the start isn't
 168+ // Add a range from the start of this[i] to the end of the range
 169+ nodes.push( { 'node': this[i], 'range': new es.Range( 0, end - left ) } );
 170+ // We've found the end, so we're done
 171+ return nodes;
 172+ } else if ( nodes.length > 0 ) {
 173+ // Neither the start nor the end is inside this[i], but nodes is non-empty,
 174+ // so this[i] must be between the start and the end
 175+ // Add the entire node, so no range property
 176+ nodes.push( { 'node': this[i] } );
 177+ }
 178+
 179+ // Move left to the start of this[i+1] for the next iteration
 180+ // +2 because we need to jump over the offset between this[i] and this[i+1]
 181+ left = right + 2;
 182+ }
 183+
 184+ // If we got here, that means that at least some part of the range is out of bounds
 185+ // This is an error
 186+ if ( nodes.length == 0 ) {
 187+ throw 'The start offset of the range is past the end of the node';
 188+ } else {
 189+ // Apparently the start was inside this node, but the end wasn't
 190+ throw 'The end offset of the range is past the end of the node';
 191+ }
141192 return nodes;
142193 };

Status & tagging log