Index: trunk/extensions/VisualEditor/tests/parser/parserTests.js |
— | — | @@ -23,6 +23,7 @@ |
24 | 24 | jsDiff = require('diff'), |
25 | 25 | colors = require('colors'), |
26 | 26 | util = require( 'util' ), |
| 27 | + jsdom = require( 'jsdom' ), |
27 | 28 | HTML5 = require('html5').HTML5, //TODO is this fixup for tests only, or part of real parsing... |
28 | 29 | PEG = require('pegjs'), |
29 | 30 | // Handle options/arguments with optimist module |
— | — | @@ -165,14 +166,16 @@ |
166 | 167 | |
167 | 168 | try { |
168 | 169 | this.testParser = PEG.buildParser(fs.readFileSync('parserTests.pegjs', 'utf8')); |
169 | | - } catch (e) { |
170 | | - console.log(e); |
| 170 | + } catch (e2) { |
| 171 | + console.log(e2); |
171 | 172 | } |
172 | 173 | |
173 | 174 | this.cases = this.getTests(); |
174 | 175 | |
175 | 176 | this.articles = {}; |
176 | 177 | |
| 178 | + //this.htmlwindow = jsdom.jsdom(null, null, {parser: HTML5}).createWindow(); |
| 179 | + //this.htmlparser = new HTML5.Parser({document: this.htmlwindow.document}); |
177 | 180 | this.htmlparser = new HTML5.Parser(); |
178 | 181 | |
179 | 182 | // Test statistics |
— | — | @@ -207,7 +210,7 @@ |
208 | 211 | testFile = fs.readFileSync( this.testFileName2, 'utf8' ); |
209 | 212 | fileDependencies.push( this.testFileName2 ); |
210 | 213 | } |
211 | | - catch(e) { console.log(e); } |
| 214 | + catch( e3 ) { console.log( e3 ); } |
212 | 215 | } |
213 | 216 | } |
214 | 217 | if( !this.argv.cache ) { |
— | — | @@ -232,7 +235,7 @@ |
233 | 236 | cache_content = fs.readFileSync( this.cache_file, 'utf8' ); |
234 | 237 | // Fetch previous digest |
235 | 238 | cache_file_digest = cache_content.match( /^CACHE: (\w+)\n/ )[1]; |
236 | | - } catch(e) { |
| 239 | + } catch( e4 ) { |
237 | 240 | // cache file does not exist |
238 | 241 | } |
239 | 242 | |
— | — | @@ -288,8 +291,7 @@ |
289 | 292 | source = source.replace(/[\r\n]/g, ''); |
290 | 293 | try { |
291 | 294 | this.htmlparser.parse('<body>' + source + '</body>'); |
292 | | - return this.htmlparser.document |
293 | | - .getElementsByTagName('body')[0] |
| 295 | + return this.htmlparser.document.getElementsByTagName('body')[0] |
294 | 296 | .innerHTML |
295 | 297 | // a few things we ignore for now.. |
296 | 298 | .replace(/\/wiki\/Main_Page/g, 'Main Page') |
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.HTML5TreeBuilder.node.js |
— | — | @@ -56,22 +56,20 @@ |
57 | 57 | this.processToken({type: 'TAG', name: 'body'}); |
58 | 58 | }; |
59 | 59 | |
| 60 | +FauxHTML5.TreeBuilder.prototype._att = function (maybeAttribs) { |
| 61 | + var atts = []; |
| 62 | + if ( maybeAttribs && $.isArray( maybeAttribs ) ) { |
| 63 | + for(var i = 0, length = maybeAttribs.length; i < length; i++) { |
| 64 | + var att = maybeAttribs[i]; |
| 65 | + atts.push({nodeName: att[0], nodeValue: att[1]}); |
| 66 | + } |
| 67 | + } |
| 68 | + return atts; |
| 69 | +}; |
60 | 70 | |
61 | 71 | // Adapt the token format to internal HTML tree builder format, call the actual |
62 | 72 | // html tree builder by emitting the token. |
63 | 73 | FauxHTML5.TreeBuilder.prototype.processToken = function (token) { |
64 | | - var att = function (maybeAttribs) { |
65 | | - if ( $.isArray( maybeAttribs ) ) { |
66 | | - var atts = []; |
67 | | - for(var i = 0, length = maybeAttribs.length; i < length; i++) { |
68 | | - var att = maybeAttribs[i]; |
69 | | - atts.push({nodeName: att[0], nodeValue: att[1]}); |
70 | | - } |
71 | | - return atts; |
72 | | - } else { |
73 | | - return []; |
74 | | - } |
75 | | - }; |
76 | 74 | |
77 | 75 | switch (token.type) { |
78 | 76 | case "TEXT": |
— | — | @@ -80,23 +78,23 @@ |
81 | 79 | case "TAG": |
82 | 80 | this.emit('token', {type: 'StartTag', |
83 | 81 | name: token.name, |
84 | | - data: att(token.attribs)}); |
| 82 | + data: this._att(token.attribs)}); |
85 | 83 | break; |
86 | 84 | case "ENDTAG": |
87 | 85 | this.emit('token', {type: 'EndTag', |
88 | 86 | name: token.name, |
89 | | - data: att(token.attribs)}); |
| 87 | + data: this._att(token.attribs)}); |
90 | 88 | break; |
91 | 89 | case "SELFCLOSINGTAG": |
92 | 90 | this.emit('token', {type: 'StartTag', |
93 | 91 | name: token.name, |
94 | | - data: att(token.attribs)}); |
| 92 | + data: this._att(token.attribs)}); |
95 | 93 | if ( HTML5.VOID_ELEMENTS.indexOf( token.name.toLowerCase() ) < 0 ) { |
96 | 94 | // VOID_ELEMENTS are automagically treated as self-closing by |
97 | 95 | // the tree builder |
98 | 96 | this.emit('token', {type: 'EndTag', |
99 | 97 | name: token.name, |
100 | | - data: att(token.attribs)}); |
| 98 | + data: this._att(token.attribs)}); |
101 | 99 | } |
102 | 100 | break; |
103 | 101 | case "COMMENT": |
Index: trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt |
— | — | @@ -263,6 +263,7 @@ |
264 | 264 | // be part of a stream, only when tokenizing complete |
265 | 265 | // texts) |
266 | 266 | //console.log( pp( flatten ( e ) ) ); |
| 267 | + cache = {}; |
267 | 268 | __parseArgs[2]( [ { type: 'END' } ] ); |
268 | 269 | return []; //flatten(e); |
269 | 270 | } |
— | — | @@ -300,8 +301,8 @@ |
301 | 302 | */ |
302 | 303 | |
303 | 304 | urltext = ( t:[^'<~[{\n\rfghimnstw|!:\]} &=]+ { return t.join(''); } |
| 305 | + / & url_chars urllink |
304 | 306 | / htmlentity |
305 | | - / urllink |
306 | 307 | // Convert trailing space into |
307 | 308 | // XXX: This should be moved to a serializer |
308 | 309 | / ' ' & ':' { return "\u00a0"; } |
— | — | @@ -411,6 +412,7 @@ |
412 | 413 | //console.log('about to emit' + pp(self)); |
413 | 414 | //console.log( '__parseArgs call: ' + pp( b )); |
414 | 415 | __parseArgs[2]( flatten( b ) ); |
| 416 | + |
415 | 417 | //return []; |
416 | 418 | return true; |
417 | 419 | } |
— | — | @@ -653,6 +655,9 @@ |
654 | 656 | |
655 | 657 | /* Defaul URL protocols in MediaWiki (see DefaultSettings). Normally these can |
656 | 658 | * be configured dynamically. */ |
| 659 | + |
| 660 | +url_chars = [/fghimnstw] |
| 661 | + |
657 | 662 | url_protocol |
658 | 663 | = '//' // for protocol-relative URLs |
659 | 664 | / 'ftp://' |
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js |
— | — | @@ -28,6 +28,7 @@ |
29 | 29 | if ( !this.parser ) { |
30 | 30 | // Only create a single parser, as parse() is a static method. |
31 | 31 | var parserSource = PEG.buildParser(this.src).toSource(); |
| 32 | + //console.log( parserSource ); |
32 | 33 | parserSource = parserSource.replace( 'parse: function(input, startRule) {', |
33 | 34 | 'parse: function(input, startRule) { var __parseArgs = arguments;' ); |
34 | 35 | //console.log( parserSource ); |
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.js |
— | — | @@ -233,7 +233,7 @@ |
234 | 234 | |
235 | 235 | ParserPipeline.prototype.cachePipeline = function ( inputType, pipelinePart, pipe ) { |
236 | 236 | var cache = this.pipelineCache[inputType][pipelinePart]; |
237 | | - if ( cache && cache.length < 50 ) { |
| 237 | + if ( cache && cache.length < 5 ) { |
238 | 238 | cache.push( pipe ); |
239 | 239 | } |
240 | 240 | }; |