r113639 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r113638‎ | r113639 | r113640 >
Date:17:31, 12 March 2012
Author:gwicke
Status:deferred
Tags:
Comment:
Improved template tokenization. The parser can now template-expand
[[:en:Barack Obama]] without exceeding 1.7GB of memory (which is the node
limit).
Modified paths:
  • /trunk/extensions/VisualEditor/modules/parser/ext.core.ParserFunctions.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/ext.core.TemplateHandler.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.environment.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt (modified) (history)

Diff [purge]

Index: trunk/extensions/VisualEditor/modules/parser/ext.core.ParserFunctions.js
@@ -429,7 +429,7 @@
430430 return [ "MediaWiki" ];
431431 };
432432 ParserFunctions.prototype['pf_anchorencode'] = function ( target, argList, argDict ) {
433 - return [target];
 433+ return [ target.trim() ];
434434 };
435435 ParserFunctions.prototype['pf_protectionlevel'] = function ( target, argList, argDict ) {
436436 return [''];
Index: trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt
@@ -505,7 +505,7 @@
506506 * in nested inline productions.
507507 */
508508 inline_breaks
509 - = & [=|!}:\r\n\]<]
 509+ = & [=|!}{:\r\n\]<]
510510 & { // Important hack: disable caching for this production, as the default
511511 // cache key does not take into account flag states!
512512 cacheKey = '';
@@ -515,7 +515,7 @@
516516
517517 inline
518518 = c:(urltext / (! inline_breaks (inline_element / . )))+ {
519 - //console.warn('inline out:' + pp(out));
 519+ //console.warn('inline out:' + pp(c));
520520 return flatten_stringlist( c );
521521 }
522522
@@ -708,12 +708,19 @@
709709 * 6: {{{{{{·}}}}}} → {{{·{{{·}}}·}}}
710710 * 7: {{{{{{{·}}}}}}} → {·{{{·{{{·}}}·}}}·}
711711 */
712 -tplarg_or_template = & '{{{{{' template / tplarg / template
 712+tplarg_or_template
 713+ =
 714+ ! '{{{{{{{' (
 715+ &'{{{{{{' tplarg
 716+ / & '{{{{{' template
 717+ / tplarg
 718+ / template
 719+ )
713720
714721 template
715722 = "{{" (newline / space)* target:template_param_text
716 - params:(( newline / space )* "|"
717 - r:( &"|" { return new KV( '', '') } // empty argument
 723+ params:(( newline / space )* pipe
 724+ r:( &pipe { return new KV( '', '') } // empty argument
718725 / ( newline / space )* p:template_param { return p }
719726 ) { return r }
720727 )*
@@ -734,8 +741,8 @@
735742
736743 tplarg
737744 = "{{{"
738 - name:template_param_text
739 - params:( ( space / newline )* "|" ( space / newline )* p:template_param { return p })*
 745+ name:template_param_text?
 746+ params:( ( space / newline )* pipe ( space / newline )* p:template_param { return p })*
740747 ( space / newline )*
741748 "}}}" {
742749 name = flatten( name );
@@ -815,7 +822,7 @@
816823 target:wikilink_preprocessor_text
817824 lcontent:(
818825 & { return posStack.push('lcontent' , pos); }
819 - lcs:( "|" lt:link_text { return new KV( '', lt ); } )+ {
 826+ lcs:( pipe lt:link_text { return new KV( '', lt ); } )+ {
820827 return { pos: posStack.pop('lcontent' , pos), content: lcs };
821828 }
822829
@@ -920,7 +927,7 @@
921928 / & { return stops.dec( 'pipe' ); }
922929
923930 img_option
924 - = "|" space*
 931+ = pipe space*
925932 o:(
926933 img_attribute
927934 / img_format
@@ -1441,7 +1448,7 @@
14421449
14431450 table_cell_args
14441451 = & { return stops.inc('tableCellArg'); }
1445 - as:generic_attribute* space* "|" !"|" {
 1452+ as:generic_attribute* space* pipe !pipe {
14461453 stops.dec('tableCellArg');
14471454 return as;
14481455 }
@@ -1715,35 +1722,39 @@
17161723 // Returns either a list of tokens, or a plain string (if nothing is to be
17171724 // processed).
17181725 preprocessor_text
1719 - = r:( t:[^<~[{\n\r\t|!\]} &=]+ { return t.join(''); }
1720 - / directive
1721 - / !inline_breaks text_char )+ {
 1726+ = r:( t:[^<~[{\n\r\t|!\]}{ &=]+ { return t.join(''); }
 1727+ / !inline_breaks (
 1728+ directive
 1729+ / text_char )
 1730+ )+ {
17221731 return flatten ( r );
17231732 }
17241733
17251734 spaceless_preprocessor_text
1726 - = r:( t:[^'<~[{\n\r|!\]}\t &=]+ { return t.join(''); }
1727 - / directive
1728 - / !inline_breaks !' ' text_char )+ {
 1735+ = r:( t:[^'<~[{\n\r|!\]}{\t &=]+ { return t.join(''); }
 1736+ / !inline_breaks (
 1737+ directive
 1738+ / !' ' text_char )
 1739+ )+ {
17291740 return flatten_string ( r );
17301741 }
17311742
17321743
17331744 wikilink_preprocessor_text
1734 - = r:( t:[^<~[{\n\r\t|!\]} &=]+ { return t.join(''); }
1735 - /// urlencoded_char
1736 - / directive
1737 - / !inline_breaks !"|" !"]]" text_char )+ {
 1745+ = r:( t:[^<~[{\n\r\t|!\]}{ &=]+ { return t.join(''); }
 1746+ /// urlencoded_char
 1747+ / !inline_breaks ( directive / !"]]" text_char )
 1748+ )+ {
17381749 return flatten_stringlist ( r );
17391750 }
17401751
17411752 extlink_preprocessor_text
17421753 // added special separator character class inline: separates url from
17431754 // description / text
1744 - = r:( t:[^'<~[{\n\r|!\]}\t&="' \u00A0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]+ { return t.join(''); }
1745 - / directive
 1755+ = r:( t:[^'<~[{\n\r|!\]}{\t&="' \u00A0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]+ { return t.join(''); }
 1756+ / !inline_breaks ( directive / no_punctuation_char )
17461757 /// urlencoded_char
1747 - / !inline_breaks no_punctuation_char
 1758+ // !inline_breaks no_punctuation_char
17481759 / s:[.:,] !(space / eolf) { return s }
17491760 / [&%] )+ {
17501761 return flatten_string ( r );
@@ -1752,21 +1763,32 @@
17531764 // Attribute values with preprocessor support
17541765 attribute_preprocessor_text
17551766 = r:( ts:(!inline_breaks t:[^=<>{\n\r&'"\t ] {return t})+ { return ts.join(''); }
1756 - / directive
1757 - / !inline_breaks [&%] )+ {
 1767+ / !inline_breaks (
 1768+ directive
 1769+ / !inline_breaks [&%]
 1770+ )
 1771+ )+
 1772+ {
17581773 //console.warn('prep');
17591774 return flatten_string ( r );
17601775 }
 1776+
17611777 attribute_preprocessor_text_single
17621778 = r:( t:[^{&']+ { return t.join(''); }
1763 - / directive
1764 - / !inline_breaks [{&] )* {
 1779+ / !inline_breaks (
 1780+ directive
 1781+ / [{&] )
 1782+ )*
 1783+ {
17651784 return flatten_string ( r );
17661785 }
17671786 attribute_preprocessor_text_double
17681787 = r:( t:[^{&"]+ { return t.join(''); }
1769 - / directive
1770 - / !inline_breaks [{&] )* {
 1788+ / !inline_breaks (
 1789+ directive
 1790+ / [{&] )
 1791+ )*
 1792+ {
17711793 //console.warn( 'double:' + pp(r) );
17721794 return flatten_string ( r );
17731795 }
@@ -1774,21 +1796,28 @@
17751797 // Variants with the entire attribute on a single line
17761798 attribute_preprocessor_text_line
17771799 = r:( ts:(!inline_breaks t:[^=<>{\n\r&'"\t ] {return t})+ { return ts.join(''); }
1778 - / directive
1779 - / !inline_breaks !'\n' [&%] )+ {
 1800+ / !inline_breaks (
 1801+ directive
 1802+ / !'\n' [&%] )
 1803+ )+ {
17801804 //console.warn('prep');
17811805 return flatten_string ( r );
17821806 }
 1807+
17831808 attribute_preprocessor_text_single_line
17841809 = r:( t:[^{&']+ { return t.join(''); }
1785 - / directive
1786 - / !inline_breaks !'\n' [{&] )* {
 1810+ / !inline_breaks (
 1811+ directive
 1812+ / !'\n' [{&] )
 1813+ )* {
17871814 return flatten_string ( r );
17881815 }
17891816 attribute_preprocessor_text_double_line
17901817 = r:( t:[^{&"]+ { return t.join(''); }
1791 - / directive
1792 - / !inline_breaks !'\n' [{&] )* {
 1818+ / !inline_breaks (
 1819+ directive
 1820+ / !'\n' [{&] )
 1821+ )* {
17931822 //console.warn( 'double:' + pp(r) );
17941823 return flatten_string ( r );
17951824 }
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.tokenizer.peg.js
@@ -119,6 +119,12 @@
120120 counters.tableCellArg
121121 )
122122 ) || null;
 123+ case '{':
 124+ return (
 125+ counters.pipe ||
 126+ counters.template
 127+ ) && input.substr( pos, 5 ) === '{{!}}'
 128+ || null;
123129 case "!":
124130 return counters.table && input[pos + 1] === "!" ||
125131 null;
Index: trunk/extensions/VisualEditor/modules/parser/ext.core.TemplateHandler.js
@@ -168,9 +168,9 @@
169169 var prefix = target.split(':', 1)[0].toLowerCase().trim();
170170 if ( prefix && 'pf_' + prefix in this.parserFunctions ) {
171171 var funcArg = target.substr( prefix.length + 1 );
172 - this.manager.env.tp( 'func prefix: ', prefix,
173 - ' args=', tplExpandData.expandedArgs,
174 - ' funcArg=', funcArg);
 172+ this.manager.env.tp( 'func prefix/args: ', prefix,
 173+ tplExpandData.expandedArgs,
 174+ 'funcArg:', funcArg);
175175 //this.manager.env.dp( 'entering prefix', funcArg, args );
176176 res = this.parserFunctions[ 'pf_' + prefix ]( funcArg,
177177 tplExpandData.expandedArgs, args, tplExpandData.origToken.attribs );
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.parser.environment.js
@@ -177,8 +177,12 @@
178178 forceNS = '';
179179 }
180180
 181+
181182 name = name.trim().replace(/[\s_]+/g, '_');
182183
 184+ // XXX: strip subst for now..
 185+ name = name.replace( /^subst:/, '' );
 186+
183187 // Implement int: as alias for MediaWiki:
184188 if ( name.substr( 0, 4 ) === 'int:' ) {
185189 name = 'MediaWiki:' + name.substr( 4 );

Status & tagging log