r113620 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r113619‎ | r113620 | r113621 >
Date:13:08, 12 March 2012
Author:gwicke
Status:deferred
Tags:
Comment:
Refactor syntactic stops into an object and add a stack variant for option
values.
Modified paths:
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt (modified) (history)

Diff [purge]

Index: trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt
@@ -271,20 +271,58 @@
272272 * productions can just be unrolled for all combinations of environments
273273 * at the cost of a much larger grammar.
274274 */
275 - var syntaxFlags = {};
276 - var setFlag = function(flag) {
277 - if (syntaxFlags[flag] !== undefined) {
278 - syntaxFlags[flag]++;
 275+ function SyntaxStops () {
 276+ this.counters = {};
 277+ this.stacks = {};
 278+ }
 279+ SyntaxStops.prototype.inc = function(flag) {
 280+ if (this.counters[flag] !== undefined) {
 281+ this.counters[flag]++;
279282 } else {
280 - syntaxFlags[flag] = 1;
 283+ this.counters[flag] = 1;
281284 }
282285 return true;
283286 };
284 - var clearFlag = function(flag) {
285 - syntaxFlags[flag]--;
 287+ SyntaxStops.prototype.dec = function(flag) {
 288+ this.counters[flag]--;
286289 return false;
287290 };
 291+ SyntaxStops.prototype.onCount = function ( name ) {
 292+ return this.counters[name];
 293+ };
288294
 295+ /**
 296+ * A stack for nested, but not cumulative syntactic stops.
 297+ * Example: '=' is allowed in values of template arguments, even if those
 298+ * are nested in attribute names.
 299+ */
 300+ SyntaxStops.prototype.push = function ( name, value ) {
 301+ if( this.stacks[name] === undefined ) {
 302+ this.stacks[name] = [value];
 303+ } else {
 304+ this.stacks[name].push( value );
 305+ }
 306+ return true;
 307+ };
 308+ SyntaxStops.prototype.pop = function ( name ) {
 309+ if( this.stacks[name] !== undefined ) {
 310+ this.stacks[name].pop();
 311+ } else {
 312+ throw "SyntaxStops.pop: unknown stop for " + name;
 313+ }
 314+ return false;
 315+ };
 316+ SyntaxStops.prototype.onStack = function ( name ) {
 317+ var stack = this.stacks[name];
 318+ if ( stack === undefined || stack.length === 0 ) {
 319+ return false;
 320+ } else {
 321+ return stack[stack.length - 1];
 322+ }
 323+ };
 324+
 325+ var stops = new SyntaxStops();
 326+
289327 // Start position of top-level block
290328 // Could also provide positions for lower-level blocks using a stack.
291329 var blockStart = 0;
@@ -472,7 +510,7 @@
473511 // cache key does not take into account flag states!
474512 cacheKey = '';
475513 //console.warn('ilbf: ' + input.substr(pos, 5) );
476 - return null !== __parseArgs[3].inline_breaks( input, pos, syntaxFlags )
 514+ return null !== __parseArgs[3].inline_breaks( input, pos, stops )
477515 }
478516
479517 inline
@@ -504,13 +542,13 @@
505543 // XXX: Also check to end to avoid inline parsing?
506544 r:(
507545 s:'='+ // moved in here to make s accessible to inner action
508 - & { return setFlag('h'); }
 546+ & { return stops.inc('h'); }
509547 c:inlineline
510548 e:'='+
511549 spc:(sp:space+ { return sp.join('') } / comment)*
512550 &eolf
513551 {
514 - clearFlag('h');
 552+ stops.dec('h');
515553 var level = Math.min(s.length, e.length);
516554 // convert surplus equals into text
517555 if(s.length > level) {
@@ -534,7 +572,7 @@
535573 return [new TagTk( 'h' + level )]
536574 .concat(c, [new EndTagTk( 'h' + level ), spc]);
537575 }
538 - / & { /* dp('nomatch exit h'); */ clearFlag('h'); return false } { return null }
 576+ / & { /* dp('nomatch exit h'); */ stops.dec('h'); return false } { return null }
539577 ) { return r }
540578
541579 comment
@@ -554,22 +592,22 @@
555593 **************************************************************/
556594
557595 urllink
558 - = ! { return syntaxFlags['extlink'] }
 596+ = ! { return stops.onCount('extlink') }
559597 target:url {
560598 return [ new TagTk( 'urllink', [new KV('href', target)] ) ];
561599 }
562600
563601 extlink
564 - = ! { return syntaxFlags['extlink'] } // extlink cannot be nested
 602+ = ! { return stops.onCount('extlink') } // extlink cannot be nested
565603 (
566604 "["
567 - & { return setFlag('extlink'); }
 605+ & { return stops.inc('extlink'); }
568606 //target:urllink
569607 target:extlink_preprocessor_text
570608 text:(( space / [\u00A0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000] )*
571609 t:inlineline { return t } )?
572610 "]" {
573 - clearFlag('extlink');
 611+ stops.dec('extlink');
574612 if ( text === '' ) {
575613 // XXX: Link numbering should be implemented in post-processor.
576614 text = [ "[" + linkCount + "]" ];
@@ -583,7 +621,7 @@
584622 ] )
585623 ];
586624 }
587 - / "[" & { clearFlag('extlink'); return false; }
 625+ / "[" & { stops.dec('extlink'); return false; }
588626 )
589627
590628 /* Defaul URL protocols in MediaWiki (see DefaultSettings). Normally these can
@@ -706,7 +744,7 @@
707745 s0:space*
708746 eq:"="?
709747 s1:space*
710 - value:template_param_text?
 748+ value:template_param_value?
711749
712750 {
713751 //console.warn( 'named template_param matched' + pp([name, value ]) );
@@ -724,25 +762,35 @@
725763
726764 // FIXME: handle template args and templates in key! (or even parser functions?)
727765 template_param_name
728 - = & { return setFlag( 'equalTemplate' ) }
 766+ = & { return stops.push( 'equal', true ) }
729767 tpt:template_param_text
730768 {
731 - clearFlag( 'equalTemplate' );
 769+ stops.pop( 'equal' );
732770 //console.warn( 'template param name matched: ' + pp( tpt ) );
733771 return tpt;
734772 }
735773
736 - / & { return clearFlag( 'equalTemplate' ) }
 774+ / & { return stops.pop( 'equal' ) }
737775 //= h:( !"}}" x:([^=|\n]) { return x } )* { return h.join(''); }
738776
 777+template_param_value
 778+ = & { return stops.push( 'equal', false ) }
 779+ tpt:template_param_text
 780+ {
 781+ stops.pop( 'equal' );
 782+ //console.warn( 'template param value matched: ' + pp( tpt ) );
 783+ return tpt;
 784+ }
 785+ / & { return stops.pop( 'equal' ) }
 786+
739787 template_param_text
740 - = & { return setFlag('template') }
 788+ = & { return stops.inc('template') }
741789 il:inline {
742 - clearFlag('template');
 790+ stops.dec('template');
743791 //console.warn( 'tpt match: ' + pp (il));
744792 return il;
745793 }
746 - / & { return clearFlag('template'); }
 794+ / & { return stops.dec('template'); }
747795
748796
749797 // TODO: handle link prefixes as in al[[Razi]]
@@ -790,39 +838,39 @@
791839 / ! { return posStack.pop( 'wikilink', pos ); }
792840
793841 link_text
794 - = & { return setFlag('linkdesc'); }
 842+ = & { return stops.inc('linkdesc'); }
795843 h:inline
796844 // 'equal' syntaxFlag is set for links in template parameters. Consume the
797845 // '=' here.
798846 hs:( '=' inline)?
799847 {
800848 //console.warn('link_text' + pp(h) + pp(hs));
801 - clearFlag('linkdesc');
 849+ stops.dec('linkdesc');
802850 if( hs !== '' ) {
803851 return h.concat(hs);
804852 } else {
805853 return h;
806854 }
807855 }
808 - / & { return clearFlag('linkdesc'); }
 856+ / & { return stops.dec('linkdesc'); }
809857
810858 link_option
811 - = & { setFlag('pipe'); return setFlag('linkdesc'); }
 859+ = & { stops.inc('pipe'); return stops.inc('linkdesc'); }
812860 h:inline
813861 // 'equal' syntaxFlag is set for links in template parameters. Consume the
814862 // '=' here.
815863 hs:( '=' inline)?
816864 {
817865 //console.warn('link_text' + pp(h) + pp(hs));
818 - clearFlag('pipe');
819 - clearFlag('linkdesc');
 866+ stops.dec('pipe');
 867+ stops.dec('linkdesc');
820868 if( hs !== '' ) {
821869 return h.concat(hs);
822870 } else {
823871 return h;
824872 }
825873 }
826 - / & { clearFlag('pipe'); return clearFlag('linkdesc'); }
 874+ / & { stops.dec('pipe'); return stops.dec('linkdesc'); }
827875
828876 link_end = "]]"
829877
@@ -845,9 +893,9 @@
846894 * transformer, and only for images.
847895 */
848896 img_options =
849 - & { return setFlag( 'pipe' ); }
 897+ & { return stops.inc( 'pipe' ); }
850898 os:img_option* {
851 - clearFlag( 'pipe' );
 899+ stops.dec( 'pipe' );
852900 var options = {};
853901 os = flatten( os );
854902 for ( var i = 0, l = os.length; i < l; i++ ) {
@@ -857,7 +905,7 @@
858906 options._options = os;
859907 return options;
860908 }
861 -/ & { return clearFlag( 'pipe' ); }
 909+/ & { return stops.dec( 'pipe' ); }
862910
863911 img_option
864912 = "|" space*
@@ -909,10 +957,10 @@
910958 = 'link=' space*
911959 u:(
912960 t:url {
913 - clearFlag( 'pipe' );
 961+ stops.dec( 'pipe' );
914962 return t;
915963 }
916 - / & { return clearFlag( 'pipe' ); }
 964+ / & { return stops.dec( 'pipe' ); }
917965 )
918966 {
919967 return new KV( 'link', u );
@@ -946,16 +994,16 @@
947995 "<pre"
948996 attribs:generic_attribute*
949997 ">"
950 - & { return setFlag('pre'); }
 998+ & { return stops.inc('pre'); }
951999 l:inlineline
9521000 ls:(sol pre_indent_line)*
9531001 "</pre>"
9541002 {
955 - clearFlag('pre');
 1003+ stops.dec('pre');
9561004 return [ new TagTk( 'pre', attribs ) ]
9571005 .concat( l, flatten( ls ), [ new EndTagTk( 'pre' ) ] );
9581006 }
959 - / & { return clearFlag('pre'); }
 1007+ / & { return stops.dec('pre'); }
9601008
9611009 pre_indent_line = space l:inlineline {
9621010 return [ '\n' ].concat(l);
@@ -1140,15 +1188,15 @@
11411189 // }
11421190
11431191 generic_attribute_name
1144 - = & { return setFlag( 'equalAttrib' ) }
 1192+ = & { return stops.push( 'equal', true ) }
11451193 ! '/>'
11461194 name:attribute_preprocessor_text_line
11471195 {
1148 - clearFlag( 'equalAttrib' );
 1196+ stops.pop( 'equal' );
11491197 //console.warn( 'generic attribute name: ' + pp( name ) );
11501198 return name;
11511199 }
1152 - / & { return clearFlag( 'equalAttrib' ) }
 1200+ / & { return stops.pop( 'equal' ) }
11531201
11541202 // A generic attribute, possibly spanning multiple lines.
11551203 generic_attribute_newline_value
@@ -1226,12 +1274,12 @@
12271275 dtdd
12281276 = bullets:(!(";" !list_char) list_char)*
12291277 ";"
1230 - & {return setFlag('colon');}
 1278+ & {return stops.inc('colon');}
12311279 c:inlineline
12321280 ":"
12331281 // Fortunately dtdds cannot be nested, so we can simply set the flag
12341282 // back to 0 to disable it.
1235 - & {syntaxFlags['colon'] = 0; return true;}
 1283+ & { stops.counters['colon'] = 0; return true;}
12361284 d:inlineline
12371285 &eolf {
12381286 // Convert trailing space into &nbsp;
@@ -1251,7 +1299,7 @@
12521300 return [ li ].concat( c, [ li2 ], d );
12531301 }
12541302 // Fall-back case to clear the colon flag
1255 - / & { return true; } { syntaxFlags['colon'] = 0; return null; }
 1303+ / & { return true; } { stops.counters['colon'] = 0; return null; }
12561304
12571305
12581306 list_char = [*#:;]
@@ -1272,14 +1320,14 @@
12731321 *********************************************************************/
12741322
12751323 table_lines
1276 - = & { return setFlag('table'); }
 1324+ = & { return stops.inc('table'); }
12771325 tl:table_line
12781326 tls:( s:sol tl2:table_line { return s.concat(tl2); } )* {
1279 - clearFlag('table');
 1327+ stops.dec('table');
12801328 //console.warn('table_lines: ' + pp(tl.concat(tls)));
12811329 return tl.concat( tls );
12821330 }
1283 - / & { return clearFlag('table'); }
 1331+ / & { return stops.dec('table'); }
12841332
12851333 // This production assumes start-of-line position!
12861334 table_line
@@ -1380,12 +1428,12 @@
13811429 }
13821430
13831431 table_cell_args
1384 - = & { return setFlag('tableCellArg'); }
 1432+ = & { return stops.inc('tableCellArg'); }
13851433 as:generic_attribute* space* "|" !"|" {
1386 - clearFlag('tableCellArg');
 1434+ stops.dec('tableCellArg');
13871435 return as;
13881436 }
1389 - / & { return clearFlag('tableCellArg'); }
 1437+ / & { return stops.dec('tableCellArg'); }
13901438
13911439
13921440
@@ -1417,13 +1465,13 @@
14181466 table_start
14191467 = "{" pipe
14201468 res:(
1421 - & { setFlag('table'); return true; }
 1469+ & { stops.inc('table'); return true; }
14221470 ta:generic_attribute*
14231471 {
14241472 //dp("table_start " + pp(ta) + ", pos:" + pos);
14251473 return ta;
14261474 }
1427 - / & { clearFlag('table'); return false; } { return null; }
 1475+ / & { stops.dec('table'); return false; } { return null; }
14281476 ) { return res }
14291477
14301478 table_caption
@@ -1502,7 +1550,7 @@
15031551
15041552 table_end
15051553 = nt:newlineToken? ( pipe "}" / eof ) {
1506 - clearFlag('table');
 1554+ stops.dec('table');
15071555 if(nt)
15081556 return nt;
15091557 else
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js
@@ -103,48 +103,46 @@
104104 * Those inner productions are then exited, so that the outer production can
105105 * handle the end marker.
106106 */
107 -PegTokenizer.prototype.inline_breaks = function (input, pos, syntaxFlags ) {
 107+PegTokenizer.prototype.inline_breaks = function (input, pos, stops ) {
 108+ var counters = stops.counters;
108109 switch( input[pos] ) {
109110 case '=':
110 - return ( syntaxFlags.equalAttrib &&
111 - (syntaxFlags.equalTemplate || ! syntaxFlags.template ) ) ||
112 - (syntaxFlags.equalTemplate &&
113 - (syntaxFlags.equalAttrib || syntaxFlags.template)) ||
114 - ( syntaxFlags.h &&
 111+ return stops.onStack( 'equal' ) ||
 112+ ( counters.h &&
115113 input.substr( pos + 1, 200)
116114 .match(/[ \t]*[\r\n]/) !== null ) || null;
117115 case '|':
118 - return syntaxFlags.pipe ||
119 - syntaxFlags.template ||
120 - ( syntaxFlags.table &&
 116+ return counters.pipe ||
 117+ counters.template ||
 118+ ( counters.table &&
121119 ( input[pos + 1].match(/[|}]/) !== null ||
122 - syntaxFlags.tableCellArg
 120+ counters.tableCellArg
123121 )
124122 ) || null;
125123 case "!":
126 - return syntaxFlags.table && input[pos + 1] === "!" ||
 124+ return counters.table && input[pos + 1] === "!" ||
127125 null;
128126 case "}":
129 - return syntaxFlags.template && input[pos + 1] === "}" || null;
 127+ return counters.template && input[pos + 1] === "}" || null;
130128 case ":":
131 - return syntaxFlags.colon &&
132 - ! syntaxFlags.extlink &&
133 - ! syntaxFlags.linkdesc || null;
 129+ return counters.colon &&
 130+ ! counters.extlink &&
 131+ ! counters.linkdesc || null;
134132 case "\r":
135 - return syntaxFlags.table &&
 133+ return counters.table &&
136134 input.substr(pos, 4).match(/\r\n?[!|]/) !== null ||
137135 null;
138136 case "\n":
139 - return syntaxFlags.table &&
 137+ return counters.table &&
140138 input[pos + 1] === '!' ||
141139 input[pos + 1] === '|' ||
142140 null;
143141 case "]":
144 - return syntaxFlags.extlink ||
145 - ( syntaxFlags.linkdesc && input[pos + 1] === ']' ) ||
 142+ return counters.extlink ||
 143+ ( counters.linkdesc && input[pos + 1] === ']' ) ||
146144 null;
147145 case "<":
148 - return syntaxFlags.pre && input.substr( pos, 6 ) === '</pre>' || null;
 146+ return counters.pre && input.substr( pos, 6 ) === '</pre>' || null;
149147 default:
150148 return null;
151149 }

Status & tagging log