r112031 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r112030‎ | r112031 | r112032 >
Date:18:26, 21 February 2012
Author:gwicke
Status:deferred
Tags:
Comment:
Remove some more unused code and tidy up some more.
Modified paths:
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt (modified) (history)

Diff [purge]

Index: trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt
@@ -413,57 +413,14 @@
414414 * Syntax stops: Avoid eating significant tokens for higher-level productions
415415 * in nested inline productions.
416416 *
417 - * XXX: Repeated testing of flags is not terribly efficient.
 417+ * Repeated testing of flags is not terribly efficient. See new and faster
 418+ * version below.
418419 */
419420
420 -inline_breaks_ =
421 - & [=|!}:\r\n\]<]
422 - & { cacheKey = ''; ilbpos = pos; return true; }
423 - res:inline_breaks_o
424 -{
425 - console.warn( 'ilbo res: ' + JSON.stringify( [ res, input.substr( ilbpos, 4 ) ] ) );
426 - return res;
427 -}
428 -
429 -
430 -inline_breaks_o
431 - = & [=|!}:\r\n\]<] // don't check further if char cannot match
432 - res:(
433 - & { // Important hack: disable caching for this production, as the default
434 - // cache key does not take into account flag states!
435 - cacheKey = '';
436 - //console.warn('ilb: ' + input.substr(pos, 5) );
437 - return true;
438 - }
439 -
440 - & { return syntaxFlags['table']; }
441 - ( a:(newline [!|] / '||' / '!!' / '|}') {
442 - //console.warn("table break" + pp(a) + pos);
443 - return true;
444 - }
445 - / & { return syntaxFlags['tableCellArg'] }
446 - "|" { return true }
447 - )
448 - / & { return (syntaxFlags['colon'] &&
449 - ! syntaxFlags.extlink && // example: ; [[Link:Term]] : Definition
450 - ! syntaxFlags.linkdesc); } ":" { return true; }
451 - / & { return syntaxFlags['extlink']; } "]" { return true; }
452 - / & { return syntaxFlags['linkdesc']; } link_end { return true; }
453 - / & { return syntaxFlags['h']; } '='+ space* newline { return true; }
454 - / & { return syntaxFlags['template']; } ('|' / '}}' ) {
455 - //console.warn( 'template break @' + pos + input.substr(pos-1, 4) );
456 - return true;
457 - }
458 - / & { return syntaxFlags['equal']; } '=' {
459 - //console.warn( 'equal stop @' + pos + input.substr(pos-1, 4) );
460 - return true;
461 - }
462 - / & { return syntaxFlags['pre']; } '</pre>' {
463 - //console.warn( 'pre stop @' + pos + input.substr(pos-1, 4) );
464 - return true;
465 - }
466 - ) { return res }
467 -
 421+/*
 422+ * Syntax stops: Avoid eating significant tokens for higher-level productions
 423+ * in nested inline productions.
 424+ */
468425 inline_breaks
469426 = & [=|!}:\r\n\]<]
470427 & { // Important hack: disable caching for this production, as the default
@@ -661,6 +618,14 @@
662619 * Templates, -arguments and wikilinks
663620 **************************************************************/
664621
 622+/*
 623+ * Precedence: template arguments win over templates. See
 624+ * http://www.mediawiki.org/wiki/Preprocessor_ABNF#Ideal_precedence
 625+ * 4: {{{{·}}}} → {·{{{·}}}·}
 626+ * 5: {{{{{·}}}}} → {{·{{{·}}}·}}
 627+ * 6: {{{{{{·}}}}}} → {{{·{{{·}}}·}}}
 628+ * 7: {{{{{{{·}}}}}}} → {·{{{·{{{·}}}·}}}·}
 629+ */
665630 tplarg_or_template = & '{{{{{' template / tplarg / template
666631
667632 template
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js
@@ -4,6 +4,7 @@
55 *
66 * Use along with a HTML5TreeBuilder and the DOMPostProcessor(s) for HTML
77 * output.
 8+ *
89 */
910
1011 var PEG = require('pegjs'),
@@ -26,6 +27,11 @@
2728
2829 PegTokenizer.src = false;
2930
 31+/*
 32+ * The main worker. Sets up event emission ('chunk' and 'end' events).
 33+ * Consumers are supposed to register with PegTokenizer before calling
 34+ * process().
 35+ */
3036 PegTokenizer.prototype.process = function( text ) {
3137 var out, err;
3238 if ( !this.parser ) {
@@ -36,23 +42,15 @@
3743 'parse: function(input, startRule) { var __parseArgs = arguments;' );
3844 //console.warn( parserSource );
3945 PegTokenizer.prototype.parser = eval( parserSource );
40 - // add reference to this for event emission
41 - // XXX: pass a cb into parse() instead, but need to modify pegjs a bit
42 - // for that.
43 - //PegTokenizer.prototype.parser._tokenizer = undefined;
44 -
45 - // Print the generated parser source
46 - //console.warn(this.parser.toSource());
4746 }
4847
49 - // some normalization
 48+ // Some input normalization: force a trailing newline
5049 if ( text.substring(text.length - 1) !== "\n" ) {
5150 text += "\n";
5251 }
5352
5453 // XXX: Commented out exception handling during development to get
55 - // reasonable traces. Calling a trace on the extension does not really cut
56 - // it.
 54+ // reasonable traces.
5755 //try {
5856 this.parser.parse(text, 'start',
5957 // callback
@@ -60,9 +58,6 @@
6159 // inline break test
6260 this
6361 );
64 - // emit tokens here until we get that to work per toplevelblock in the
65 - // actual tokenizer
66 - //this.emit('chunk', out.concat( [{ type: 'END' }] ) );
6762 this.emit('end');
6863 //} catch (e) {
6964 //err = e;
@@ -72,6 +67,58 @@
7368 //}
7469 };
7570
 71+
 72+/*
 73+ * Inline breaks, flag-enabled production which detects end positions for
 74+ * active higher-level productions in inline and other nested productions.
 75+ * Those inner productions are then exited, so that the outer production can
 76+ * handle the end marker.
 77+ */
 78+PegTokenizer.prototype.inline_breaks = function (input, pos, syntaxFlags ) {
 79+ switch( input[pos] ) {
 80+ case '=':
 81+ return syntaxFlags.equal ||
 82+ ( syntaxFlags.h &&
 83+ input.substr( pos + 1, 200)
 84+ .match(/[ \t]*[\r\n]/) !== null ) || null;
 85+ case '|':
 86+ return syntaxFlags.template ||
 87+ ( syntaxFlags.table &&
 88+ ( input[pos + 1].match(/[|}]/) !== null ||
 89+ syntaxFlags.tableCellArg
 90+ )
 91+ ) || null;
 92+ case "!":
 93+ return syntaxFlags.table && input[pos + 1] === "!" ||
 94+ null;
 95+ case "}":
 96+ return syntaxFlags.template && input[pos + 1] === "}" || null;
 97+ case ":":
 98+ return syntaxFlags.colon &&
 99+ ! syntaxFlags.extlink &&
 100+ ! syntaxFlags.linkdesc || null;
 101+ case "\r":
 102+ return syntaxFlags.table &&
 103+ input.substr(pos, 4).match(/\r\n?[!|]/) !== null ||
 104+ null;
 105+ case "\n":
 106+ return syntaxFlags.table &&
 107+ input[pos + 1] === '!' ||
 108+ input[pos + 1] === '|' ||
 109+ null;
 110+ case "]":
 111+ return syntaxFlags.extlink ||
 112+ ( syntaxFlags.linkdesc && input[pos + 1] === ']' ) ||
 113+ null;
 114+ case "<":
 115+ return syntaxFlags.pre && input.substr( pos, 6 ) === '</pre>' || null;
 116+ default:
 117+ return null;
 118+ }
 119+};
 120+
 121+// Alternate version of the above. The hash is likely faster, but the nested
 122+// function calls seem to cancel that out.
76123 PegTokenizer.prototype.breakMap = {
77124 '=': function(input, pos, syntaxFlags) {
78125 return syntaxFlags.equal ||
@@ -120,163 +167,14 @@
121168 }
122169 };
123170
124 -PegTokenizer.prototype.inline_breaks_ = function (input, pos, syntaxFlags ) {
 171+PegTokenizer.prototype.inline_breaks_hash = function (input, pos, syntaxFlags ) {
125172 return this.breakMap[ input[pos] ]( input, pos, syntaxFlags);
126173 //console.warn( 'ilbn res: ' + JSON.stringify( [ res, input.substr( pos, 4 ) ] ) );
127174 //return res;
128175 };
129176
130 -PegTokenizer.prototype.inline_breaks = function (input, pos, syntaxFlags ) {
131 - switch( input[pos] ) {
132 - case '=':
133 - return syntaxFlags.equal ||
134 - ( syntaxFlags.h &&
135 - input.substr( pos + 1, 200)
136 - .match(/[ \t]*[\r\n]/) !== null ) || null;
137 - case '|':
138 - return syntaxFlags.template ||
139 - ( syntaxFlags.table &&
140 - ( input[pos + 1].match(/[|}]/) !== null ||
141 - syntaxFlags.tableCellArg
142 - )
143 - ) || null;
144 - case "!":
145 - return syntaxFlags.table && input[pos + 1] === "!" ||
146 - null;
147 - case "}":
148 - return syntaxFlags.template && input[pos + 1] === "}" || null;
149 - case ":":
150 - return syntaxFlags.colon &&
151 - ! syntaxFlags.extlink &&
152 - ! syntaxFlags.linkdesc || null;
153 - case "\r":
154 - return syntaxFlags.table &&
155 - input.substr(pos, 4).match(/\r\n?[!|]/) !== null ||
156 - null;
157 - case "\n":
158 - return syntaxFlags.table &&
159 - input[pos + 1] === '!' ||
160 - input[pos + 1] === '|' ||
161 - null;
162 - case "]":
163 - return syntaxFlags.extlink ||
164 - ( syntaxFlags.linkdesc && input[pos + 1] === ']' ) ||
165 - null;
166 - case "<":
167 - return syntaxFlags.pre && input.substr( pos, 6 ) === '</pre>' || null;
168 - default:
169 - return null;
170 - }
171 -};
172177
173178
174 -/*****************************************************************************
175 - * LEGACY stuff
176 - *
177 - * This is kept around as a template for the ongoing template expansion work!
178 - * It won't work with the token infrastructure.
179 - */
180 -
181 -
182 -/**
183 - * @param {object} tree
184 - * @param {function(tree, error)} callback
185 - */
186 -PegTokenizer.prototype.expandTree = function(tree, callback) {
187 - var self = this;
188 - var subParseArray = function(listOfTrees) {
189 - var content = [];
190 - $.each(listOfTrees, function(i, subtree) {
191 - self.expandTree(subtree, function(substr, err) {
192 - content.push(tree);
193 - });
194 - });
195 - return content;
196 - };
197 - var src;
198 - if (typeof tree === "string") {
199 - callback(tree);
200 - return;
201 - }
202 - if (tree.type == 'template') {
203 - // expand a template node!
204 -
205 - // Resolve a possibly relative link
206 - var templateName = this.env.resolveTitle( tree.target, 'Template' );
207 - this.env.fetchTemplate( tree.target, tree.params || {}, function( templateSrc, error ) {
208 - // @fixme should pre-parse/cache these too?
209 - self.parseToTree( templateSrc, function( templateTree, error ) {
210 - if ( error ) {
211 - callback({
212 - type: 'placeholder',
213 - orig: tree,
214 - content: [
215 - {
216 - // @fixme broken link?
217 - type: 'link',
218 - target: templateName
219 - }
220 - ]
221 - });
222 - } else {
223 - callback({
224 - type: 'placeholder',
225 - orig: tree,
226 - content: self.env.expandTemplateArgs( templateTree, tree.params )
227 - });
228 - }
229 - });
230 - } );
231 - // Wait for async...
232 - return;
233 - }
234 - var out = $.extend( tree ); // @fixme prefer a deep copy?
235 - if (tree.content) {
236 - out.content = subParseArray(tree.content);
237 - }
238 - callback(out);
239 -};
240 -
241 -PegTokenizer.prototype.initSource = function(callback) {
242 - if (PegTokenizer.src) {
243 - callback();
244 - } else {
245 - if ( typeof parserPlaygroundPegPage !== 'undefined' ) {
246 - $.ajax({
247 - url: wgScriptPath + '/api' + wgScriptExtension,
248 - data: {
249 - format: 'json',
250 - action: 'query',
251 - prop: 'revisions',
252 - rvprop: 'content',
253 - titles: parserPlaygroundPegPage
254 - },
255 - success: function(data, xhr) {
256 - $.each(data.query.pages, function(i, page) {
257 - if (page.revisions && page.revisions.length) {
258 - PegTokenizer.src = page.revisions[0]['*'];
259 - }
260 - });
261 - callback();
262 - },
263 - dataType: 'json',
264 - cache: false
265 - }, 'json');
266 - } else {
267 - $.ajax({
268 - url: mw.config.get('wgParserPlaygroundAssetsPath', mw.config.get('wgExtensionAssetsPath')) + '/ParserPlayground/modules/pegParser.pegjs.txt',
269 - success: function(data) {
270 - PegTokenizer.src = data;
271 - callback();
272 - },
273 - dataType: 'text',
274 - cache: false
275 - });
276 - }
277 - }
278 -};
279 -
280 -
281179 if (typeof module == "object") {
282180 module.exports.PegTokenizer = PegTokenizer;
283181 }

Status & tagging log