Index: trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt |
— | — | @@ -413,57 +413,14 @@ |
414 | 414 | * Syntax stops: Avoid eating significant tokens for higher-level productions |
415 | 415 | * in nested inline productions. |
416 | 416 | * |
417 | | - * XXX: Repeated testing of flags is not terribly efficient. |
| 417 | + * Repeated testing of flags is not terribly efficient. See new and faster |
| 418 | + * version below. |
418 | 419 | */ |
419 | 420 | |
420 | | -inline_breaks_ = |
421 | | - & [=|!}:\r\n\]<] |
422 | | - & { cacheKey = ''; ilbpos = pos; return true; } |
423 | | - res:inline_breaks_o |
424 | | -{ |
425 | | - console.warn( 'ilbo res: ' + JSON.stringify( [ res, input.substr( ilbpos, 4 ) ] ) ); |
426 | | - return res; |
427 | | -} |
428 | | - |
429 | | - |
430 | | -inline_breaks_o |
431 | | - = & [=|!}:\r\n\]<] // don't check further if char cannot match |
432 | | - res:( |
433 | | - & { // Important hack: disable caching for this production, as the default |
434 | | - // cache key does not take into account flag states! |
435 | | - cacheKey = ''; |
436 | | - //console.warn('ilb: ' + input.substr(pos, 5) ); |
437 | | - return true; |
438 | | - } |
439 | | - |
440 | | - & { return syntaxFlags['table']; } |
441 | | - ( a:(newline [!|] / '||' / '!!' / '|}') { |
442 | | - //console.warn("table break" + pp(a) + pos); |
443 | | - return true; |
444 | | - } |
445 | | - / & { return syntaxFlags['tableCellArg'] } |
446 | | - "|" { return true } |
447 | | - ) |
448 | | - / & { return (syntaxFlags['colon'] && |
449 | | - ! syntaxFlags.extlink && // example: ; [[Link:Term]] : Definition |
450 | | - ! syntaxFlags.linkdesc); } ":" { return true; } |
451 | | - / & { return syntaxFlags['extlink']; } "]" { return true; } |
452 | | - / & { return syntaxFlags['linkdesc']; } link_end { return true; } |
453 | | - / & { return syntaxFlags['h']; } '='+ space* newline { return true; } |
454 | | - / & { return syntaxFlags['template']; } ('|' / '}}' ) { |
455 | | - //console.warn( 'template break @' + pos + input.substr(pos-1, 4) ); |
456 | | - return true; |
457 | | - } |
458 | | - / & { return syntaxFlags['equal']; } '=' { |
459 | | - //console.warn( 'equal stop @' + pos + input.substr(pos-1, 4) ); |
460 | | - return true; |
461 | | - } |
462 | | - / & { return syntaxFlags['pre']; } '</pre>' { |
463 | | - //console.warn( 'pre stop @' + pos + input.substr(pos-1, 4) ); |
464 | | - return true; |
465 | | - } |
466 | | - ) { return res } |
467 | | - |
| 421 | +/* |
| 422 | + * Syntax stops: Avoid eating significant tokens for higher-level productions |
| 423 | + * in nested inline productions. |
| 424 | + */ |
468 | 425 | inline_breaks |
469 | 426 | = & [=|!}:\r\n\]<] |
470 | 427 | & { // Important hack: disable caching for this production, as the default |
— | — | @@ -661,6 +618,14 @@ |
662 | 619 | * Templates, -arguments and wikilinks |
663 | 620 | **************************************************************/ |
664 | 621 | |
| 622 | +/* |
| 623 | + * Precedence: template arguments win over templates. See |
| 624 | + * http://www.mediawiki.org/wiki/Preprocessor_ABNF#Ideal_precedence |
| 625 | + * 4: {{{{·}}}} → {·{{{·}}}·} |
| 626 | + * 5: {{{{{·}}}}} → {{·{{{·}}}·}} |
| 627 | + * 6: {{{{{{·}}}}}} → {{{·{{{·}}}·}}} |
| 628 | + * 7: {{{{{{{·}}}}}}} → {·{{{·{{{·}}}·}}}·} |
| 629 | + */ |
665 | 630 | tplarg_or_template = & '{{{{{' template / tplarg / template |
666 | 631 | |
667 | 632 | template |
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js |
— | — | @@ -4,6 +4,7 @@ |
5 | 5 | * |
6 | 6 | * Use along with a HTML5TreeBuilder and the DOMPostProcessor(s) for HTML |
7 | 7 | * output. |
| 8 | + * |
8 | 9 | */ |
9 | 10 | |
10 | 11 | var PEG = require('pegjs'), |
— | — | @@ -26,6 +27,11 @@ |
27 | 28 | |
28 | 29 | PegTokenizer.src = false; |
29 | 30 | |
| 31 | +/* |
| 32 | + * The main worker. Sets up event emission ('chunk' and 'end' events). |
| 33 | + * Consumers are supposed to register with PegTokenizer before calling |
| 34 | + * process(). |
| 35 | + */ |
30 | 36 | PegTokenizer.prototype.process = function( text ) { |
31 | 37 | var out, err; |
32 | 38 | if ( !this.parser ) { |
— | — | @@ -36,23 +42,15 @@ |
37 | 43 | 'parse: function(input, startRule) { var __parseArgs = arguments;' ); |
38 | 44 | //console.warn( parserSource ); |
39 | 45 | PegTokenizer.prototype.parser = eval( parserSource ); |
40 | | - // add reference to this for event emission |
41 | | - // XXX: pass a cb into parse() instead, but need to modify pegjs a bit |
42 | | - // for that. |
43 | | - //PegTokenizer.prototype.parser._tokenizer = undefined; |
44 | | - |
45 | | - // Print the generated parser source |
46 | | - //console.warn(this.parser.toSource()); |
47 | 46 | } |
48 | 47 | |
49 | | - // some normalization |
| 48 | + // Some input normalization: force a trailing newline |
50 | 49 | if ( text.substring(text.length - 1) !== "\n" ) { |
51 | 50 | text += "\n"; |
52 | 51 | } |
53 | 52 | |
54 | 53 | // XXX: Commented out exception handling during development to get |
55 | | - // reasonable traces. Calling a trace on the extension does not really cut |
56 | | - // it. |
| 54 | + // reasonable traces. |
57 | 55 | //try { |
58 | 56 | this.parser.parse(text, 'start', |
59 | 57 | // callback |
— | — | @@ -60,9 +58,6 @@ |
61 | 59 | // inline break test |
62 | 60 | this |
63 | 61 | ); |
64 | | - // emit tokens here until we get that to work per toplevelblock in the |
65 | | - // actual tokenizer |
66 | | - //this.emit('chunk', out.concat( [{ type: 'END' }] ) ); |
67 | 62 | this.emit('end'); |
68 | 63 | //} catch (e) { |
69 | 64 | //err = e; |
— | — | @@ -72,6 +67,58 @@ |
73 | 68 | //} |
74 | 69 | }; |
75 | 70 | |
| 71 | + |
| 72 | +/* |
| 73 | + * Inline breaks, flag-enabled production which detects end positions for |
| 74 | + * active higher-level productions in inline and other nested productions. |
| 75 | + * Those inner productions are then exited, so that the outer production can |
| 76 | + * handle the end marker. |
| 77 | + */ |
| 78 | +PegTokenizer.prototype.inline_breaks = function (input, pos, syntaxFlags ) { |
| 79 | + switch( input[pos] ) { |
| 80 | + case '=': |
| 81 | + return syntaxFlags.equal || |
| 82 | + ( syntaxFlags.h && |
| 83 | + input.substr( pos + 1, 200) |
| 84 | + .match(/[ \t]*[\r\n]/) !== null ) || null; |
| 85 | + case '|': |
| 86 | + return syntaxFlags.template || |
| 87 | + ( syntaxFlags.table && |
| 88 | + ( input[pos + 1].match(/[|}]/) !== null || |
| 89 | + syntaxFlags.tableCellArg |
| 90 | + ) |
| 91 | + ) || null; |
| 92 | + case "!": |
| 93 | + return syntaxFlags.table && input[pos + 1] === "!" || |
| 94 | + null; |
| 95 | + case "}": |
| 96 | + return syntaxFlags.template && input[pos + 1] === "}" || null; |
| 97 | + case ":": |
| 98 | + return syntaxFlags.colon && |
| 99 | + ! syntaxFlags.extlink && |
| 100 | + ! syntaxFlags.linkdesc || null; |
| 101 | + case "\r": |
| 102 | + return syntaxFlags.table && |
| 103 | + input.substr(pos, 4).match(/\r\n?[!|]/) !== null || |
| 104 | + null; |
| 105 | + case "\n": |
| 106 | + return syntaxFlags.table && |
| 107 | + input[pos + 1] === '!' || |
| 108 | + input[pos + 1] === '|' || |
| 109 | + null; |
| 110 | + case "]": |
| 111 | + return syntaxFlags.extlink || |
| 112 | + ( syntaxFlags.linkdesc && input[pos + 1] === ']' ) || |
| 113 | + null; |
| 114 | + case "<": |
| 115 | + return syntaxFlags.pre && input.substr( pos, 6 ) === '</pre>' || null; |
| 116 | + default: |
| 117 | + return null; |
| 118 | + } |
| 119 | +}; |
| 120 | + |
| 121 | +// Alternate version of the above. The hash is likely faster, but the nested |
| 122 | +// function calls seem to cancel that out. |
76 | 123 | PegTokenizer.prototype.breakMap = { |
77 | 124 | '=': function(input, pos, syntaxFlags) { |
78 | 125 | return syntaxFlags.equal || |
— | — | @@ -120,163 +167,14 @@ |
121 | 168 | } |
122 | 169 | }; |
123 | 170 | |
124 | | -PegTokenizer.prototype.inline_breaks_ = function (input, pos, syntaxFlags ) { |
| 171 | +PegTokenizer.prototype.inline_breaks_hash = function (input, pos, syntaxFlags ) { |
125 | 172 | return this.breakMap[ input[pos] ]( input, pos, syntaxFlags); |
126 | 173 | //console.warn( 'ilbn res: ' + JSON.stringify( [ res, input.substr( pos, 4 ) ] ) ); |
127 | 174 | //return res; |
128 | 175 | }; |
129 | 176 | |
130 | | -PegTokenizer.prototype.inline_breaks = function (input, pos, syntaxFlags ) { |
131 | | - switch( input[pos] ) { |
132 | | - case '=': |
133 | | - return syntaxFlags.equal || |
134 | | - ( syntaxFlags.h && |
135 | | - input.substr( pos + 1, 200) |
136 | | - .match(/[ \t]*[\r\n]/) !== null ) || null; |
137 | | - case '|': |
138 | | - return syntaxFlags.template || |
139 | | - ( syntaxFlags.table && |
140 | | - ( input[pos + 1].match(/[|}]/) !== null || |
141 | | - syntaxFlags.tableCellArg |
142 | | - ) |
143 | | - ) || null; |
144 | | - case "!": |
145 | | - return syntaxFlags.table && input[pos + 1] === "!" || |
146 | | - null; |
147 | | - case "}": |
148 | | - return syntaxFlags.template && input[pos + 1] === "}" || null; |
149 | | - case ":": |
150 | | - return syntaxFlags.colon && |
151 | | - ! syntaxFlags.extlink && |
152 | | - ! syntaxFlags.linkdesc || null; |
153 | | - case "\r": |
154 | | - return syntaxFlags.table && |
155 | | - input.substr(pos, 4).match(/\r\n?[!|]/) !== null || |
156 | | - null; |
157 | | - case "\n": |
158 | | - return syntaxFlags.table && |
159 | | - input[pos + 1] === '!' || |
160 | | - input[pos + 1] === '|' || |
161 | | - null; |
162 | | - case "]": |
163 | | - return syntaxFlags.extlink || |
164 | | - ( syntaxFlags.linkdesc && input[pos + 1] === ']' ) || |
165 | | - null; |
166 | | - case "<": |
167 | | - return syntaxFlags.pre && input.substr( pos, 6 ) === '</pre>' || null; |
168 | | - default: |
169 | | - return null; |
170 | | - } |
171 | | -}; |
172 | 177 | |
173 | 178 | |
174 | | -/***************************************************************************** |
175 | | - * LEGACY stuff |
176 | | - * |
177 | | - * This is kept around as a template for the ongoing template expansion work! |
178 | | - * It won't work with the token infrastructure. |
179 | | - */ |
180 | | - |
181 | | - |
182 | | -/** |
183 | | - * @param {object} tree |
184 | | - * @param {function(tree, error)} callback |
185 | | - */ |
186 | | -PegTokenizer.prototype.expandTree = function(tree, callback) { |
187 | | - var self = this; |
188 | | - var subParseArray = function(listOfTrees) { |
189 | | - var content = []; |
190 | | - $.each(listOfTrees, function(i, subtree) { |
191 | | - self.expandTree(subtree, function(substr, err) { |
192 | | - content.push(tree); |
193 | | - }); |
194 | | - }); |
195 | | - return content; |
196 | | - }; |
197 | | - var src; |
198 | | - if (typeof tree === "string") { |
199 | | - callback(tree); |
200 | | - return; |
201 | | - } |
202 | | - if (tree.type == 'template') { |
203 | | - // expand a template node! |
204 | | - |
205 | | - // Resolve a possibly relative link |
206 | | - var templateName = this.env.resolveTitle( tree.target, 'Template' ); |
207 | | - this.env.fetchTemplate( tree.target, tree.params || {}, function( templateSrc, error ) { |
208 | | - // @fixme should pre-parse/cache these too? |
209 | | - self.parseToTree( templateSrc, function( templateTree, error ) { |
210 | | - if ( error ) { |
211 | | - callback({ |
212 | | - type: 'placeholder', |
213 | | - orig: tree, |
214 | | - content: [ |
215 | | - { |
216 | | - // @fixme broken link? |
217 | | - type: 'link', |
218 | | - target: templateName |
219 | | - } |
220 | | - ] |
221 | | - }); |
222 | | - } else { |
223 | | - callback({ |
224 | | - type: 'placeholder', |
225 | | - orig: tree, |
226 | | - content: self.env.expandTemplateArgs( templateTree, tree.params ) |
227 | | - }); |
228 | | - } |
229 | | - }); |
230 | | - } ); |
231 | | - // Wait for async... |
232 | | - return; |
233 | | - } |
234 | | - var out = $.extend( tree ); // @fixme prefer a deep copy? |
235 | | - if (tree.content) { |
236 | | - out.content = subParseArray(tree.content); |
237 | | - } |
238 | | - callback(out); |
239 | | -}; |
240 | | - |
241 | | -PegTokenizer.prototype.initSource = function(callback) { |
242 | | - if (PegTokenizer.src) { |
243 | | - callback(); |
244 | | - } else { |
245 | | - if ( typeof parserPlaygroundPegPage !== 'undefined' ) { |
246 | | - $.ajax({ |
247 | | - url: wgScriptPath + '/api' + wgScriptExtension, |
248 | | - data: { |
249 | | - format: 'json', |
250 | | - action: 'query', |
251 | | - prop: 'revisions', |
252 | | - rvprop: 'content', |
253 | | - titles: parserPlaygroundPegPage |
254 | | - }, |
255 | | - success: function(data, xhr) { |
256 | | - $.each(data.query.pages, function(i, page) { |
257 | | - if (page.revisions && page.revisions.length) { |
258 | | - PegTokenizer.src = page.revisions[0]['*']; |
259 | | - } |
260 | | - }); |
261 | | - callback(); |
262 | | - }, |
263 | | - dataType: 'json', |
264 | | - cache: false |
265 | | - }, 'json'); |
266 | | - } else { |
267 | | - $.ajax({ |
268 | | - url: mw.config.get('wgParserPlaygroundAssetsPath', mw.config.get('wgExtensionAssetsPath')) + '/ParserPlayground/modules/pegParser.pegjs.txt', |
269 | | - success: function(data) { |
270 | | - PegTokenizer.src = data; |
271 | | - callback(); |
272 | | - }, |
273 | | - dataType: 'text', |
274 | | - cache: false |
275 | | - }); |
276 | | - } |
277 | | - } |
278 | | -}; |
279 | | - |
280 | | - |
281 | 179 | if (typeof module == "object") { |
282 | 180 | module.exports.PegTokenizer = PegTokenizer; |
283 | 181 | } |