r112031 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r112030‎ \| r112031 \| r112032 >
Date:	18:26, 21 February 2012
Author:	gwicke
Status:	deferred
Tags:
Comment:	Remove some more unused code and tidy up some more.
Modified paths:	/trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js (modified) (history) /trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt (modified) (history)

Diff [purge]

Index: trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt
—	—	@@ -413,57 +413,14 @@
414	414	* Syntax stops: Avoid eating significant tokens for higher-level productions
415	415	* in nested inline productions.
416	416	*
417		~~- * XXX: Repeated testing of flags is not terribly efficient.~~
	417	+ * Repeated testing of flags is not terribly efficient. See new and faster
	418	+ * version below.
418	419	*/
419	420
420		~~-inline_breaks_ =~~
421		~~- & [=\|!}:\r\n\]<]~~
422		~~- & { cacheKey = ''; ilbpos = pos; return true; }~~
423		~~- res:inline_breaks_o~~
424		-{
425		~~- console.warn( 'ilbo res: ' + JSON.stringify( [ res, input.substr( ilbpos, 4 ) ] ) );~~
426		~~- return res;~~
427		-}
428		-
429		-
430		~~-inline_breaks_o~~
431		~~- = & [=\|!}:\r\n\]<] // don't check further if char cannot match~~
432		~~- res:(~~
433		~~- & { // Important hack: disable caching for this production, as the default~~
434		~~- // cache key does not take into account flag states!~~
435		~~- cacheKey = '';~~
436		~~- //console.warn('ilb: ' + input.substr(pos, 5) );~~
437		~~- return true;~~
438		~~- }~~
439		-
440		~~- & { return syntaxFlags['table']; }~~
441		~~- ( a:(newline [!\|] / '\|\|' / '!!' / '\|}') {~~
442		~~- //console.warn("table break" + pp(a) + pos);~~
443		~~- return true;~~
444		~~- }~~
445		~~- / & { return syntaxFlags['tableCellArg'] }~~
446		~~- "\|" { return true }~~
447		~~- )~~
448		~~- / & { return (syntaxFlags['colon'] &&~~
449		~~- ! syntaxFlags.extlink && // example: ; [[Link:Term]] : Definition~~
450		~~- ! syntaxFlags.linkdesc); } ":" { return true; }~~
451		~~- / & { return syntaxFlags['extlink']; } "]" { return true; }~~
452		~~- / & { return syntaxFlags['linkdesc']; } link_end { return true; }~~
453		~~- / & { return syntaxFlags['h']; } '='+ space* newline { return true; }~~
454		~~- / & { return syntaxFlags['template']; } ('\|' / '}}' ) {~~
455		~~- //console.warn( 'template break @' + pos + input.substr(pos-1, 4) );~~
456		~~- return true;~~
457		~~- }~~
458		~~- / & { return syntaxFlags['equal']; } '=' {~~
459		~~- //console.warn( 'equal stop @' + pos + input.substr(pos-1, 4) );~~
460		~~- return true;~~
461		~~- }~~
462		~~- / & { return syntaxFlags['pre']; } '</pre>' {~~
463		~~- //console.warn( 'pre stop @' + pos + input.substr(pos-1, 4) );~~
464		~~- return true;~~
465		~~- }~~
466		~~- ) { return res }~~
467		-
	421	+/*
	422	+ * Syntax stops: Avoid eating significant tokens for higher-level productions
	423	+ * in nested inline productions.
	424	+ */
468	425	inline_breaks
469	426	= & [=\|!}:\r\n\]<]
470	427	& { // Important hack: disable caching for this production, as the default
—	—	@@ -661,6 +618,14 @@
662	619	* Templates, -arguments and wikilinks
663	620	**************************************************************/
664	621
	622	+/*
	623	+ * Precedence: template arguments win over templates. See
	624	+ * http://www.mediawiki.org/wiki/Preprocessor_ABNF#Ideal_precedence
	625	+ * 4: {{{{·}}}} → {·{{{·}}}·}
	626	+ * 5: {{{{{·}}}}} → {{·{{{·}}}·}}
	627	+ * 6: {{{{{{·}}}}}} → {{{·{{{·}}}·}}}
	628	+ * 7: {{{{{{{·}}}}}}} → {·{{{·{{{·}}}·}}}·}
	629	+ */
665	630	tplarg_or_template = & '{{{{{' template / tplarg / template
666	631
667	632	template
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js
—	—	@@ -4,6 +4,7 @@
5	5	*
6	6	* Use along with a HTML5TreeBuilder and the DOMPostProcessor(s) for HTML
7	7	* output.
	8	+ *
8	9	*/
9	10
10	11	var PEG = require('pegjs'),
—	—	@@ -26,6 +27,11 @@
27	28
28	29	PegTokenizer.src = false;
29	30
	31	+/*
	32	+ * The main worker. Sets up event emission ('chunk' and 'end' events).
	33	+ * Consumers are supposed to register with PegTokenizer before calling
	34	+ * process().
	35	+ */
30	36	PegTokenizer.prototype.process = function( text ) {
31	37	var out, err;
32	38	if ( !this.parser ) {
—	—	@@ -36,23 +42,15 @@
37	43	'parse: function(input, startRule) { var __parseArgs = arguments;' );
38	44	//console.warn( parserSource );
39	45	PegTokenizer.prototype.parser = eval( parserSource );
40		~~- // add reference to this for event emission~~
41		~~- // XXX: pass a cb into parse() instead, but need to modify pegjs a bit~~
42		~~- // for that.~~
43		~~- //PegTokenizer.prototype.parser._tokenizer = undefined;~~
44		-
45		~~- // Print the generated parser source~~
46		~~- //console.warn(this.parser.toSource());~~
47	46	}
48	47
49		~~- // some normalization~~
	48	+ // Some input normalization: force a trailing newline
50	49	if ( text.substring(text.length - 1) !== "\n" ) {
51	50	text += "\n";
52	51	}
53	52
54	53	// XXX: Commented out exception handling during development to get
55		~~- // reasonable traces. Calling a trace on the extension does not really cut~~
56		~~- // it.~~
	54	+ // reasonable traces.
57	55	//try {
58	56	this.parser.parse(text, 'start',
59	57	// callback
—	—	@@ -60,9 +58,6 @@
61	59	// inline break test
62	60	this
63	61	);
64		~~- // emit tokens here until we get that to work per toplevelblock in the~~
65		~~- // actual tokenizer~~
66		~~- //this.emit('chunk', out.concat( [{ type: 'END' }] ) );~~
67	62	this.emit('end');
68	63	//} catch (e) {
69	64	//err = e;
—	—	@@ -72,6 +67,58 @@
73	68	//}
74	69	};
75	70
	71	+
	72	+/*
	73	+ * Inline breaks, flag-enabled production which detects end positions for
	74	+ * active higher-level productions in inline and other nested productions.
	75	+ * Those inner productions are then exited, so that the outer production can
	76	+ * handle the end marker.
	77	+ */
	78	+PegTokenizer.prototype.inline_breaks = function (input, pos, syntaxFlags ) {
	79	+ switch( input[pos] ) {
	80	+ case '=':
	81	+ return syntaxFlags.equal \|\|
	82	+ ( syntaxFlags.h &&
	83	+ input.substr( pos + 1, 200)
	84	+ .match(/[ \t]*[\r\n]/) !== null ) \|\| null;
	85	+ case '\|':
	86	+ return syntaxFlags.template \|\|
	87	+ ( syntaxFlags.table &&
	88	+ ( input[pos + 1].match(/[\|}]/) !== null \|\|
	89	+ syntaxFlags.tableCellArg
	90	+ )
	91	+ ) \|\| null;
	92	+ case "!":
	93	+ return syntaxFlags.table && input[pos + 1] === "!" \|\|
	94	+ null;
	95	+ case "}":
	96	+ return syntaxFlags.template && input[pos + 1] === "}" \|\| null;
	97	+ case ":":
	98	+ return syntaxFlags.colon &&
	99	+ ! syntaxFlags.extlink &&
	100	+ ! syntaxFlags.linkdesc \|\| null;
	101	+ case "\r":
	102	+ return syntaxFlags.table &&
	103	+ input.substr(pos, 4).match(/\r\n?[!\|]/) !== null \|\|
	104	+ null;
	105	+ case "\n":
	106	+ return syntaxFlags.table &&
	107	+ input[pos + 1] === '!' \|\|
	108	+ input[pos + 1] === '\|' \|\|
	109	+ null;
	110	+ case "]":
	111	+ return syntaxFlags.extlink \|\|
	112	+ ( syntaxFlags.linkdesc && input[pos + 1] === ']' ) \|\|
	113	+ null;
	114	+ case "<":
	115	+ return syntaxFlags.pre && input.substr( pos, 6 ) === '</pre>' \|\| null;
	116	+ default:
	117	+ return null;
	118	+ }
	119	+};
	120	+
	121	+// Alternate version of the above. The hash is likely faster, but the nested
	122	+// function calls seem to cancel that out.
76	123	PegTokenizer.prototype.breakMap = {
77	124	'=': function(input, pos, syntaxFlags) {
78	125	return syntaxFlags.equal \|\|
—	—	@@ -120,163 +167,14 @@
121	168	}
122	169	};
123	170
124		~~-PegTokenizer.prototype.inline_breaks_ = function (input, pos, syntaxFlags ) {~~
	171	+PegTokenizer.prototype.inline_breaks_hash = function (input, pos, syntaxFlags ) {
125	172	return this.breakMap[ input[pos] ]( input, pos, syntaxFlags);
126	173	//console.warn( 'ilbn res: ' + JSON.stringify( [ res, input.substr( pos, 4 ) ] ) );
127	174	//return res;
128	175	};
129	176
130		~~-PegTokenizer.prototype.inline_breaks = function (input, pos, syntaxFlags ) {~~
131		~~- switch( input[pos] ) {~~
132		~~- case '=':~~
133		~~- return syntaxFlags.equal \|\|~~
134		~~- ( syntaxFlags.h &&~~
135		~~- input.substr( pos + 1, 200)~~
136		~~- .match(/[ \t]*[\r\n]/) !== null ) \|\| null;~~
137		~~- case '\|':~~
138		~~- return syntaxFlags.template \|\|~~
139		~~- ( syntaxFlags.table &&~~
140		~~- ( input[pos + 1].match(/[\|}]/) !== null \|\|~~
141		~~- syntaxFlags.tableCellArg~~
142		~~- )~~
143		~~- ) \|\| null;~~
144		~~- case "!":~~
145		~~- return syntaxFlags.table && input[pos + 1] === "!" \|\|~~
146		~~- null;~~
147		~~- case "}":~~
148		~~- return syntaxFlags.template && input[pos + 1] === "}" \|\| null;~~
149		~~- case ":":~~
150		~~- return syntaxFlags.colon &&~~
151		~~- ! syntaxFlags.extlink &&~~
152		~~- ! syntaxFlags.linkdesc \|\| null;~~
153		~~- case "\r":~~
154		~~- return syntaxFlags.table &&~~
155		~~- input.substr(pos, 4).match(/\r\n?[!\|]/) !== null \|\|~~
156		~~- null;~~
157		~~- case "\n":~~
158		~~- return syntaxFlags.table &&~~
159		~~- input[pos + 1] === '!' \|\|~~
160		~~- input[pos + 1] === '\|' \|\|~~
161		~~- null;~~
162		~~- case "]":~~
163		~~- return syntaxFlags.extlink \|\|~~
164		~~- ( syntaxFlags.linkdesc && input[pos + 1] === ']' ) \|\|~~
165		~~- null;~~
166		~~- case "<":~~
167		~~- return syntaxFlags.pre && input.substr( pos, 6 ) === '</pre>' \|\| null;~~
168		~~- default:~~
169		~~- return null;~~
170		~~- }~~
171		~~-};~~
172	177
173	178
174		-/*****************************************************************************
175		~~- * LEGACY stuff~~
176		- *
177		~~- * This is kept around as a template for the ongoing template expansion work!~~
178		~~- * It won't work with the token infrastructure.~~
179		~~- */~~
180		-
181		-
182		-/**
183		~~- * @param {object} tree~~
184		~~- * @param {function(tree, error)} callback~~
185		~~- */~~
186		~~-PegTokenizer.prototype.expandTree = function(tree, callback) {~~
187		~~- var self = this;~~
188		~~- var subParseArray = function(listOfTrees) {~~
189		~~- var content = [];~~
190		~~- $.each(listOfTrees, function(i, subtree) {~~
191		~~- self.expandTree(subtree, function(substr, err) {~~
192		~~- content.push(tree);~~
193		~~- });~~
194		~~- });~~
195		~~- return content;~~
196		~~- };~~
197		~~- var src;~~
198		~~- if (typeof tree === "string") {~~
199		~~- callback(tree);~~
200		~~- return;~~
201		~~- }~~
202		~~- if (tree.type == 'template') {~~
203		~~- // expand a template node!~~
204		-
205		~~- // Resolve a possibly relative link~~
206		~~- var templateName = this.env.resolveTitle( tree.target, 'Template' );~~
207		~~- this.env.fetchTemplate( tree.target, tree.params \|\| {}, function( templateSrc, error ) {~~
208		~~- // @fixme should pre-parse/cache these too?~~
209		~~- self.parseToTree( templateSrc, function( templateTree, error ) {~~
210		~~- if ( error ) {~~
211		~~- callback({~~
212		~~- type: 'placeholder',~~
213		~~- orig: tree,~~
214		~~- content: [~~
215		~~- {~~
216		~~- // @fixme broken link?~~
217		~~- type: 'link',~~
218		~~- target: templateName~~
219		~~- }~~
220		~~- ]~~
221		~~- });~~
222		~~- } else {~~
223		~~- callback({~~
224		~~- type: 'placeholder',~~
225		~~- orig: tree,~~
226		~~- content: self.env.expandTemplateArgs( templateTree, tree.params )~~
227		~~- });~~
228		~~- }~~
229		~~- });~~
230		~~- } );~~
231		~~- // Wait for async...~~
232		~~- return;~~
233		~~- }~~
234		~~- var out = $.extend( tree ); // @fixme prefer a deep copy?~~
235		~~- if (tree.content) {~~
236		~~- out.content = subParseArray(tree.content);~~
237		~~- }~~
238		~~- callback(out);~~
239		~~-};~~
240		-
241		~~-PegTokenizer.prototype.initSource = function(callback) {~~
242		~~- if (PegTokenizer.src) {~~
243		~~- callback();~~
244		~~- } else {~~
245		~~- if ( typeof parserPlaygroundPegPage !== 'undefined' ) {~~
246		~~- $.ajax({~~
247		~~- url: wgScriptPath + '/api' + wgScriptExtension,~~
248		~~- data: {~~
249		~~- format: 'json',~~
250		~~- action: 'query',~~
251		~~- prop: 'revisions',~~
252		~~- rvprop: 'content',~~
253		~~- titles: parserPlaygroundPegPage~~
254		~~- },~~
255		~~- success: function(data, xhr) {~~
256		~~- $.each(data.query.pages, function(i, page) {~~
257		~~- if (page.revisions && page.revisions.length) {~~
258		~~- PegTokenizer.src = page.revisions[0]['*'];~~
259		~~- }~~
260		~~- });~~
261		~~- callback();~~
262		~~- },~~
263		~~- dataType: 'json',~~
264		~~- cache: false~~
265		~~- }, 'json');~~
266		~~- } else {~~
267		~~- $.ajax({~~
268		~~- url: mw.config.get('wgParserPlaygroundAssetsPath', mw.config.get('wgExtensionAssetsPath')) + '/ParserPlayground/modules/pegParser.pegjs.txt',~~
269		~~- success: function(data) {~~
270		~~- PegTokenizer.src = data;~~
271		~~- callback();~~
272		~~- },~~
273		~~- dataType: 'text',~~
274		~~- cache: false~~
275		~~- });~~
276		~~- }~~
277		~~- }~~
278		~~-};~~
279		-
280		-
281	179	if (typeof module == "object") {
282	180	module.exports.PegTokenizer = PegTokenizer;
283	181	}

Status & tagging log

18:30, 21 February 2012 GWicke (talk | contribs) changed the status of r112031 [removed: new added: deferred]