r103923 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r103922‎ | r103923 | r103924 >
Date:16:51, 22 November 2011
Author:gwicke
Status:deferred
Tags:
Comment:
Minor improvement to italic/bold, documentation on failed modularization of
static parser functions.
Modified paths:
  • /trunk/extensions/VisualEditor/modules/parser/pegParser.pegjs.txt (modified) (history)

Diff [purge]

Index: trunk/extensions/VisualEditor/modules/parser/pegParser.pegjs.txt
@@ -1,11 +1,47 @@
22 /* Produces output more or less compatible with FakeParser; plug it into FP's output and see */
33 {
 4+ /* Fixme: use static functions to separate module! Unfortunately, this
 5+ * does not work:
 6+ * var tu = require('./mediawiki.tokenizer.utils.js');
 7+ * console.log(tu.flatten([]));
 8+ * Using exports in the module gets a bit further, but accesses to
 9+ * tu.flatten in productions still fail. Thus, I just moved the functions
 10+ * here until a solution is found:
 11+ */
 12+
 13+ /* Static utilities */
 14+
 15+ // Flatten a list of lists.
 16+ var flatten = function ( e ) {
 17+ var es = [];
 18+ // flatten sub-arrays
 19+ for(var i = 0, length = e.length; i < length; i++) {
 20+ var ei = e[i];
 21+ if ($.isArray(ei))
 22+ es = es.concat(flatten(ei));
 23+ else
 24+ es.push(ei);
 25+ };
 26+ return es;
 27+ };
 28+
 29+ // Remove escaped quotes from attributes etc
 30+ var unquote = function (quotec, text) {
 31+ return text.replace('\\' + quotec, quotec);
 32+ };
 33+
 34+
 35+ // Debug print with global switch
436 var dp = function ( msg ) {
537 if ( false ) {
638 console.log(msg);
739 }
840 };
941
 42+ var pp = function ( s ) { return JSON.stringify(s, null, 2); }
 43+
 44+ /* End static utilities */
 45+
1046 /*
1147 * Flags for specific parse environments (inside tables, links etc). Flags
1248 * trigger syntactic stops in the inline_breaks production, which
@@ -27,89 +63,9 @@
2864 syntaxFlags[flag]--;
2965 };
3066
31 -
32 - var pp = function ( s ) { return JSON.stringify(s, null, 2); }
33 -
34 - // Convert list prefixes to a list of WikiDom list styles
35 - var bulletsToTypes = function (bullets) {
36 - var bTypes = [];
37 - var blen = bullets.length;
38 - for (var i = 0; i < bullets.length; i++) {
39 - switch (bullets[i]) {
40 - case '*':
41 - bTypes.push('bullet'); break;
42 - case '#':
43 - bTypes.push('number'); break;
44 - case ';':
45 - bTypes.push('term'); break;
46 - case ':':
47 - bTypes.push('description'); break;
48 - }
49 - }
50 - return bTypes;
51 - };
52 -
53 - /*var extractInline = function ( node ) {
54 - return { text: extractText(node, 0) };
55 - };
56 -
57 -
58 - // return [text [annotations]]
59 - var extractText = function ( node, offset ) {
60 - dp("extract: " + pp(node));
61 - if (typeof node === 'string') {
62 - return [node, []];
63 - } else if ($.isArray(node)) {
64 - var texts = [],
65 - annotations = [];
66 - for (var i = 0, length = node.length; i < length; i++) {
67 - var res = extractText(node[i], offset);
68 - texts.push(res[0]);
69 - annotations.concat(res[1]);
70 - offset += res[0].length;
71 - }
72 - return [texts.join(''), annotations];
73 - } else if ( 'text' in node ) {
74 - var res = extractText(node, offset);
75 - if ('annotations' in node) {
76 - return [res[0], node.annotations.concat(res[1])];
77 - } else {
78 - return res;
79 - }
80 - } else if ( 'content' in node ) {
81 - return extractText(node.content, offset);
82 - } else if ( 'children' in node ) {
83 - var texts = [];
84 - for (var i = 0, length = node.children.length; i < length; i++) {
85 - texts.push(extractText(node.children[i]));
86 - }
87 - return texts.join('');
88 - } else {
89 - throw ("extract failed: " + pp(node));
90 - }
91 - };
92 - */
93 -
9467 // Start position of top-level block
9568 // Could also provide positions for lower-level blocks using a stack.
9669 var blockStart = 0;
97 -
98 - var unquote = function (quotec, text) {
99 - return text.replace('\\' + quotec, quotec);
100 - };
101 -
102 - var flatten = function ( e ) {
103 - var es = [];
104 - // flatten sub-arrays
105 - for(var i = 0, length = e.length; i < length; i++) {
106 - var ei = e[i];
107 - if ($.isArray(ei))
108 - es = es.concat(flatten(ei));
109 - else
110 - es.push(ei);
111 - };
112 - return es;
113 - };
11470 }
11571
11672 start
@@ -455,26 +411,33 @@
456412
457413 link_end = "]]"
458414
 415+/* This implementation of bold and italic is very basic so far, and misses the
 416+ * finer points of doQuotes in the parser. A rough plan to get closer:
 417+ * - '''' -> ' '''
 418+ * - last ''''' in a row of ' is used
 419+ * - if *both* italics and bolds are unbalanced, check for prefix
 420+ * - convert single-letter or multi-letter non-space prefixed tick back to
 421+ * text
 422+ */
459423 bold
460424 = bold_marker
461425 & { dp('benter:' + pos); return setFlag('bold'); }
462426 c:inlineline
463 - bold_marker {
 427+ (bold_marker / &newline) {
464428 clearFlag('bold');
465429 return [{ type: 'TAG', name: 'b' }]
466430 .concat(c, [{type: 'ENDTAG', name: 'b'}]);
467431 }
468432 / bold_marker { clearFlag('bold'); return null }
469433
470 -bold_marker
471 - = "'''"
 434+bold_marker = "'''"
472435
473436
474437 italic
475438 = italic_marker
476439 & { dp('ienter:' + pos); return setFlag('italic'); }
477440 c:inlineline
478 - italic_marker {
 441+ (italic_marker / &newline) {
479442 clearFlag('italic');
480443 dp('ileave:' + pos);
481444 return [{ type: 'TAG', name: 'i' }]
@@ -482,8 +445,7 @@
483446 }
484447 / italic_marker { clearFlag('italic'); return null }
485448
486 -italic_marker
487 - = "''"
 449+italic_marker = "''"
488450
489451 /* Will need to check anything xmlish agains known/allowed HTML tags and
490452 * registered extensions, otherwise fail the match. Should ref be treated as a

Status & tagging log