r103923 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r103922‎ \| r103923 \| r103924 >
Date:	16:51, 22 November 2011
Author:	gwicke
Status:	deferred
Tags:
Comment:	Minor improvement to italic/bold, documentation on failed modularization of static parser functions.
Modified paths:	/trunk/extensions/VisualEditor/modules/parser/pegParser.pegjs.txt (modified) (history)

Diff [purge]

Index: trunk/extensions/VisualEditor/modules/parser/pegParser.pegjs.txt
—	—	@@ -1,11 +1,47 @@
2	2	/* Produces output more or less compatible with FakeParser; plug it into FP's output and see */
3	3	{
	4	+ /* Fixme: use static functions to separate module! Unfortunately, this
	5	+ * does not work:
	6	+ * var tu = require('./mediawiki.tokenizer.utils.js');
	7	+ * console.log(tu.flatten([]));
	8	+ * Using exports in the module gets a bit further, but accesses to
	9	+ * tu.flatten in productions still fail. Thus, I just moved the functions
	10	+ * here until a solution is found:
	11	+ */
	12	+
	13	+ /* Static utilities */
	14	+
	15	+ // Flatten a list of lists.
	16	+ var flatten = function ( e ) {
	17	+ var es = [];
	18	+ // flatten sub-arrays
	19	+ for(var i = 0, length = e.length; i < length; i++) {
	20	+ var ei = e[i];
	21	+ if ($.isArray(ei))
	22	+ es = es.concat(flatten(ei));
	23	+ else
	24	+ es.push(ei);
	25	+ };
	26	+ return es;
	27	+ };
	28	+
	29	+ // Remove escaped quotes from attributes etc
	30	+ var unquote = function (quotec, text) {
	31	+ return text.replace('\\' + quotec, quotec);
	32	+ };
	33	+
	34	+
	35	+ // Debug print with global switch
4	36	var dp = function ( msg ) {
5	37	if ( false ) {
6	38	console.log(msg);
7	39	}
8	40	};
9	41
	42	+ var pp = function ( s ) { return JSON.stringify(s, null, 2); }
	43	+
	44	+ /* End static utilities */
	45	+
10	46	/*
11	47	* Flags for specific parse environments (inside tables, links etc). Flags
12	48	* trigger syntactic stops in the inline_breaks production, which
—	—	@@ -27,89 +63,9 @@
28	64	syntaxFlags[flag]--;
29	65	};
30	66
31		-
32		~~- var pp = function ( s ) { return JSON.stringify(s, null, 2); }~~
33		-
34		~~- // Convert list prefixes to a list of WikiDom list styles~~
35		~~- var bulletsToTypes = function (bullets) {~~
36		~~- var bTypes = [];~~
37		~~- var blen = bullets.length;~~
38		~~- for (var i = 0; i < bullets.length; i++) {~~
39		~~- switch (bullets[i]) {~~
40		~~- case '*':~~
41		~~- bTypes.push('bullet'); break;~~
42		~~- case '#':~~
43		~~- bTypes.push('number'); break;~~
44		~~- case ';':~~
45		~~- bTypes.push('term'); break;~~
46		~~- case ':':~~
47		~~- bTypes.push('description'); break;~~
48		~~- }~~
49		~~- }~~
50		~~- return bTypes;~~
51		~~- };~~
52		-
53		~~- /*var extractInline = function ( node ) {~~
54		~~- return { text: extractText(node, 0) };~~
55		~~- };~~
56		-
57		-
58		~~- // return [text [annotations]]~~
59		~~- var extractText = function ( node, offset ) {~~
60		~~- dp("extract: " + pp(node));~~
61		~~- if (typeof node === 'string') {~~
62		~~- return [node, []];~~
63		~~- } else if ($.isArray(node)) {~~
64		~~- var texts = [],~~
65		~~- annotations = [];~~
66		~~- for (var i = 0, length = node.length; i < length; i++) {~~
67		~~- var res = extractText(node[i], offset);~~
68		~~- texts.push(res[0]);~~
69		~~- annotations.concat(res[1]);~~
70		~~- offset += res[0].length;~~
71		~~- }~~
72		~~- return [texts.join(''), annotations];~~
73		~~- } else if ( 'text' in node ) {~~
74		~~- var res = extractText(node, offset);~~
75		~~- if ('annotations' in node) {~~
76		~~- return [res[0], node.annotations.concat(res[1])];~~
77		~~- } else {~~
78		~~- return res;~~
79		~~- }~~
80		~~- } else if ( 'content' in node ) {~~
81		~~- return extractText(node.content, offset);~~
82		~~- } else if ( 'children' in node ) {~~
83		~~- var texts = [];~~
84		~~- for (var i = 0, length = node.children.length; i < length; i++) {~~
85		~~- texts.push(extractText(node.children[i]));~~
86		~~- }~~
87		~~- return texts.join('');~~
88		~~- } else {~~
89		~~- throw ("extract failed: " + pp(node));~~
90		~~- }~~
91		~~- };~~
92		~~- */~~
93		-
94	67	// Start position of top-level block
95	68	// Could also provide positions for lower-level blocks using a stack.
96	69	var blockStart = 0;
97		-
98		~~- var unquote = function (quotec, text) {~~
99		~~- return text.replace('\\' + quotec, quotec);~~
100		~~- };~~
101		-
102		~~- var flatten = function ( e ) {~~
103		~~- var es = [];~~
104		~~- // flatten sub-arrays~~
105		~~- for(var i = 0, length = e.length; i < length; i++) {~~
106		~~- var ei = e[i];~~
107		~~- if ($.isArray(ei))~~
108		~~- es = es.concat(flatten(ei));~~
109		~~- else~~
110		~~- es.push(ei);~~
111		~~- };~~
112		~~- return es;~~
113		~~- };~~
114	70	}
115	71
116	72	start
—	—	@@ -455,26 +411,33 @@
456	412
457	413	link_end = "]]"
458	414
	415	+/* This implementation of bold and italic is very basic so far, and misses the
	416	+ * finer points of doQuotes in the parser. A rough plan to get closer:
	417	+ * - '''' -> ' '''
	418	+ * - last ''''' in a row of ' is used
	419	+ * - if both italics and bolds are unbalanced, check for prefix
	420	+ * - convert single-letter or multi-letter non-space prefixed tick back to
	421	+ * text
	422	+ */
459	423	bold
460	424	= bold_marker
461	425	& { dp('benter:' + pos); return setFlag('bold'); }
462	426	c:inlineline
463		~~- bold_marker {~~
	427	+ (bold_marker / &newline) {
464	428	clearFlag('bold');
465	429	return [{ type: 'TAG', name: 'b' }]
466	430	.concat(c, [{type: 'ENDTAG', name: 'b'}]);
467	431	}
468	432	/ bold_marker { clearFlag('bold'); return null }
469	433
470		~~-bold_marker~~
471		~~- = "'''"~~
	434	+bold_marker = "'''"
472	435
473	436
474	437	italic
475	438	= italic_marker
476	439	& { dp('ienter:' + pos); return setFlag('italic'); }
477	440	c:inlineline
478		~~- italic_marker {~~
	441	+ (italic_marker / &newline) {
479	442	clearFlag('italic');
480	443	dp('ileave:' + pos);
481	444	return [{ type: 'TAG', name: 'i' }]
—	—	@@ -482,8 +445,7 @@
483	446	}
484	447	/ italic_marker { clearFlag('italic'); return null }
485	448
486		~~-italic_marker~~
487		~~- = "''"~~
	449	+italic_marker = "''"
488	450
489	451	/* Will need to check anything xmlish agains known/allowed HTML tags and
490	452	* registered extensions, otherwise fail the match. Should ref be treated as a

Status & tagging log

13:58, 25 November 2011 Hashar (talk | contribs) changed the status of r103923 [removed: new added: deferred]