Index: trunk/extensions/VisualEditor/modules/parser/ext.core.LinkHandler.js |
— | — | @@ -249,10 +249,10 @@ |
250 | 250 | //console.warn('extlink href: ' + href ); |
251 | 251 | //console.warn( 'content: ' + JSON.stringify( content, null, 2 ) ); |
252 | 252 | // validate the href |
253 | | - if ( this.imageParser.parseURL( href ) ) { |
| 253 | + if ( this.imageParser.tokenizeURL( href ) ) { |
254 | 254 | if ( content.length === 1 && |
255 | 255 | content[0].constructor === String && |
256 | | - this.imageParser.parseURL( content[0] ) && |
| 256 | + this.imageParser.tokenizeURL( content[0] ) && |
257 | 257 | this._isImageLink( content[0] ) ) |
258 | 258 | { |
259 | 259 | var src = content[0]; |
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js |
— | — | @@ -32,16 +32,31 @@ |
33 | 33 | */ |
34 | 34 | PegTokenizer.prototype.process = function( text ) { |
35 | 35 | var out, err; |
36 | | - if ( !this.parser ) { |
| 36 | + if ( !this.tokenizer ) { |
| 37 | + // Construct a singleton static tokenizer. |
37 | 38 | var pegSrcPath = path.join( __dirname, 'pegTokenizer.pegjs.txt' ); |
38 | 39 | this.src = fs.readFileSync( pegSrcPath, 'utf8' ); |
39 | | - // Only create a single parser, as parse() is a static method. |
40 | | - var parserSource = PEG.buildParser(this.src).toSource(); |
41 | | - //console.warn( parserSource ); |
42 | | - parserSource = parserSource.replace( 'parse: function(input, startRule) {', |
| 40 | + var tokenizerSource = PEG.buildParser(this.src).toSource(); |
| 41 | + |
| 42 | + /* We patch the generated source to assign the arguments array for the |
| 43 | + * parse function to a function-scoped variable. We use this to pass |
| 44 | + * in callbacks and other information, which can be used from actions |
| 45 | + * run when matching a production. In particular, we pass in a |
| 46 | + * callback called for a chunk of tokens in toplevelblock. Setting this |
| 47 | + * callback per call to parse() keeps the tokenizer reentrant, so that it |
| 48 | + * can be reused to expand templates while a main parse is ongoing. |
| 49 | + * PEG tokenizer construction is very expensive, so having a single |
| 50 | + * reentrant tokenizer is a big win. |
| 51 | + * |
| 52 | + * We could also make modules available to the tokenizer by prepending |
| 53 | + * requires to the source. |
| 54 | + */ |
| 55 | + tokenizerSource = tokenizerSource.replace( 'parse: function(input, startRule) {', |
43 | 56 | 'parse: function(input, startRule) { var __parseArgs = arguments;' ); |
44 | | - //console.warn( parserSource ); |
45 | | - PegTokenizer.prototype.parser = eval( parserSource ); |
| 57 | + //console.warn( tokenizerSource ); |
| 58 | + PegTokenizer.prototype.tokenizer = eval( tokenizerSource ); |
| 59 | + // alias the parse method |
| 60 | + this.tokenizer.tokenize = this.tokenizer.parse; |
46 | 61 | } |
47 | 62 | |
48 | 63 | // Some input normalization: force a trailing newline |
— | — | @@ -52,7 +67,7 @@ |
53 | 68 | // XXX: Commented out exception handling during development to get |
54 | 69 | // reasonable traces. |
55 | 70 | //try { |
56 | | - this.parser.parse(text, 'start', |
| 71 | + this.tokenizer.tokenize(text, 'start', |
57 | 72 | // callback |
58 | 73 | this.emit.bind( this, 'chunk' ), |
59 | 74 | // inline break test |
— | — | @@ -68,12 +83,15 @@ |
69 | 84 | }; |
70 | 85 | |
71 | 86 | PegTokenizer.prototype.processImageOptions = function( text ) { |
72 | | - return this.parser.parse(text, 'img_options', null, this ); |
| 87 | + return this.tokenizer.tokenize(text, 'img_options', null, this ); |
73 | 88 | }; |
74 | 89 | |
75 | | -PegTokenizer.prototype.parseURL = function( text ) { |
| 90 | +/** |
| 91 | + * Tokenize a URL |
| 92 | + */ |
| 93 | +PegTokenizer.prototype.tokenizeURL = function( text ) { |
76 | 94 | try { |
77 | | - return this.parser.parse(text, 'url', null, this ); |
| 95 | + return this.tokenizer.tokenize(text, 'url', null, this ); |
78 | 96 | } catch ( e ) { |
79 | 97 | return false; |
80 | 98 | } |