r106023 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r106022‎ | r106023 | r106024 >
Date:14:48, 13 December 2011
Author:gwicke
Status:deferred
Tags:
Comment:
Convert the Cite extension to a token stream transformer.

This required a few further additions to the TokenTransformDispatcher. In
particular, there is now an 'any' token match whose callbacks are executed
before more specific callbacks. This is used by the Cite extension to eat all
tokens between ref and /ref tags. This need is very common, so should be
broken out to an intermediate layer in the future.

In general, the requirements for the TokenTransformDispatcher API are now
clearer, and the API should likely be cleaned up / simplified.
Modified paths:
  • /trunk/extensions/VisualEditor/modules/parser/ext.Cite.js (added) (history)
  • /trunk/extensions/VisualEditor/modules/parser/ext.core.QuoteTransformer.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/mediawiki.TokenTransformDispatcher.js (modified) (history)
  • /trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt (modified) (history)
  • /trunk/extensions/VisualEditor/tests/parser/parserTests.js (modified) (history)

Diff [purge]

Index: trunk/extensions/VisualEditor/tests/parser/parserTests.js
@@ -65,6 +65,8 @@
6666
6767 _import(pj('parser', 'ext.core.QuoteTransformer.js'), ['QuoteTransformer']);
6868
 69+_import(pj('parser', 'ext.Cite.js'), ['Cite']);
 70+
6971 // WikiDom and serializers
7072 //_require(pj('es', 'es.js'));
7173 //_require(pj('es', 'es.Html.js'));
@@ -207,6 +209,9 @@
208210 var qt = new QuoteTransformer();
209211 qt.register(this.tokenDispatcher);
210212
 213+ var citeExtension = new Cite();
 214+ citeExtension.register(this.tokenDispatcher);
 215+
211216 // Test statistics
212217 this.passedTests = 0;
213218 this.passedTestsManual = 0;
@@ -437,6 +442,9 @@
438443 // Transform tokens using the TokenTransformDispatcher. When done, the
439444 // TokenTransformDispatcher calls buildTree() and checkResult() with the
440445 // transformed tokens.
 446+
 447+ // Append the end
 448+ res.tokens.push({type: 'END'});
441449 this.tokenDispatcher.transformTokens( res.tokens );
442450 }
443451 };
@@ -519,8 +527,6 @@
520528 for (var i = 0, length = tokens.length; i < length; i++) {
521529 treeBuilder.processToken(tokens[i]);
522530 }
523 - // And signal the end
524 - treeBuilder.processToken({type: 'END'});
525531 };
526532
527533 /**
Index: trunk/extensions/VisualEditor/modules/parser/ext.Cite.js
@@ -0,0 +1,249 @@
 2+/**
 3+ * The ref / references tags don't do any fancy HTML, so we can actually
 4+ * implement this in terms of parse tree manipulations, skipping the need
 5+ * for renderer-specific plugins as well.
 6+ *
 7+ * Pretty neat huh!
 8+ */
 9+
 10+function Cite () {
 11+ this.refGroups = {};
 12+ this.refTokens = [];
 13+}
 14+
 15+Cite.prototype.register = function ( dispatcher ) {
 16+ // Register for ref and references tag tokens
 17+ var self = this;
 18+ this.onRefCB = function (ctx) {
 19+ return self.onRef(ctx);
 20+ };
 21+ dispatcher.appendListener( this.onRefCB, 'tag', 'ref' );
 22+ dispatcher.appendListener( function (ctx) {
 23+ return self.onReferences(ctx);
 24+ }, 'tag', 'references' );
 25+ dispatcher.appendListener( function (ctx) {
 26+ return self.onEnd(ctx);
 27+ }, 'end' );
 28+};
 29+
 30+
 31+// Convert list of key-value pairs to object, with first entry for a key
 32+// winning.
 33+// XXX: Move to general util module
 34+Cite.prototype.attribsToObject = function ( attribs ) {
 35+ if ( attribs === undefined ) {
 36+ return {};
 37+ }
 38+ var obj = {};
 39+ for ( var i = 0, l = attribs.length; i < l; i++ ) {
 40+ var kv = attribs[i];
 41+ if (! kv[0] in obj) {
 42+ obj[kv[0]] = kv[1];
 43+ }
 44+ }
 45+ return obj;
 46+};
 47+
 48+
 49+Cite.prototype.onRef = function ( tokenCTX ) {
 50+
 51+ var refGroups = this.refGroups;
 52+
 53+ var getRefGroup = function(group) {
 54+ if (!(group in refGroups)) {
 55+ var refs = [],
 56+ byName = {};
 57+ refGroups[group] = {
 58+ refs: refs,
 59+ byName: byName,
 60+ add: function(tokens, options) {
 61+ var ref;
 62+ if (options.name && options.name in byName) {
 63+ ref = byName[options.name];
 64+ } else {
 65+ var n = refs.length;
 66+ var key = n + '';
 67+ if (options.name) {
 68+ key = options.name + '-' + key;
 69+ }
 70+ ref = {
 71+ tokens: tokens,
 72+ index: n,
 73+ groupIndex: n, // @fixme
 74+ name: options.name,
 75+ group: options.group,
 76+ key: key,
 77+ target: 'cite_note-' + key,
 78+ linkbacks: []
 79+ };
 80+ refs[n] = ref;
 81+ if (options.name) {
 82+ byName[options.name] = ref;
 83+ }
 84+ }
 85+ ref.linkbacks.push(
 86+ 'cite_ref-' + ref.key + '-' + ref.linkbacks.length
 87+ );
 88+ return ref;
 89+ }
 90+ };
 91+ }
 92+ return refGroups[group];
 93+ };
 94+
 95+ var token = tokenCTX.token;
 96+ // Collect all tokens between ref start and endtag
 97+ if ( token.type === 'TAG' && token.name.toLowerCase() === 'ref' ) {
 98+ this.curRef = tokenCTX.token;
 99+ // Prepend self for 'any' token type
 100+ tokenCTX.dispatcher.prependListener(this.onRefCB, 'any' );
 101+ tokenCTX.token = null;
 102+ return tokenCTX;
 103+ } else if ( token.type === 'ENDTAG' && token.name.toLowerCase() === 'ref' ) {
 104+ tokenCTX.dispatcher.removeListener(this.onRefCB, 'any' );
 105+ // fall through for further processing!
 106+ } else {
 107+ // Inside ref block: Collect all other tokens in refTokens and abort
 108+ this.refTokens.push(tokenCTX.token);
 109+ tokenCTX.token = null;
 110+ return tokenCTX;
 111+ }
 112+
 113+ var options = $.extend({
 114+ name: null,
 115+ group: null
 116+ }, this.attribsToObject(this.curRef.attribs));
 117+
 118+ var group = getRefGroup(options.group);
 119+ var ref = group.add(this.refTokens, options);
 120+ this.refTokens = [];
 121+ var linkback = ref.linkbacks[ref.linkbacks.length - 1];
 122+
 123+
 124+ var bits = [];
 125+ if (options.group) {
 126+ bits.push(options.group);
 127+ }
 128+ //bits.push(env.formatNum( ref.groupIndex + 1 ));
 129+ bits.push(ref.groupIndex + 1);
 130+
 131+ tokenCTX.token = [
 132+ {
 133+ type: 'TAG',
 134+ name: 'span',
 135+ attribs: [['id', linkback],
 136+ ['class', 'reference'],
 137+ // ignore element when serializing back to wikitext
 138+ ['data-nosource', '']]
 139+ },
 140+ {
 141+ type: 'TAG',
 142+ name: 'a',
 143+ attribs:
 144+ [['data-type', 'hashlink'],
 145+ ['href', '#' + ref.target]
 146+ // XXX: Add round-trip info here?
 147+ ]
 148+ },
 149+ {
 150+ type: 'TEXT',
 151+ value: '[' + bits.join(' ') + ']'
 152+ },
 153+ {
 154+ type: 'ENDTAG',
 155+ name: 'a'
 156+ },
 157+ {
 158+ type: 'ENDTAG',
 159+ name: 'span'
 160+ }
 161+ ];
 162+ return tokenCTX;
 163+};
 164+
 165+Cite.prototype.onReferences = function ( tokenCTX ) {
 166+
 167+ var refGroups = this.refGroups;
 168+
 169+ var arrow = '↑';
 170+ var renderLine = function( ref ) {
 171+ //console.log('reftokens: ' + JSON.stringify(ref.tokens, null, 2));
 172+ var out = [{
 173+ type: 'TAG',
 174+ name: 'li',
 175+ attribs: [['id', ref.target]]
 176+ }];
 177+ if (ref.linkbacks.length == 1) {
 178+ out = out.concat([{
 179+ type: 'TAG',
 180+ name: 'a',
 181+ attribs:
 182+ [['data-type', 'hashlink'],
 183+ ['href', '#' + ref.linkbacks[0]]
 184+ ]
 185+ },
 186+ {type: 'TEXT', value: arrow},
 187+ {type: 'ENDTAG', name: 'a'}
 188+ ],
 189+ ref.tokens // The original content tokens
 190+ );
 191+ } else {
 192+ out.content.push({type: 'TEXT', value: arrow});
 193+ $.each(ref.linkbacks, function(i, linkback) {
 194+ out = out.concat([{
 195+ type: 'TAG',
 196+ name: 'a',
 197+ attribs:
 198+ [['data-type', 'hashlink'],
 199+ ['href', '#' + ref.linkbacks[0]]
 200+ ]
 201+ },
 202+ // XXX: make formatNum available!
 203+ //{type: 'TEXT', value: env.formatNum( ref.groupIndex + '.' + i)},
 204+ {type: 'TEXT', value: ref.groupIndex + '.' + i},
 205+ {type: 'ENDTAG', name: 'a'}
 206+ ],
 207+ ref.tokens // The original content tokens
 208+ );
 209+ });
 210+ }
 211+ return out;
 212+ };
 213+
 214+ var token = tokenCTX.token;
 215+
 216+ var options = $.extend({
 217+ name: null,
 218+ group: null
 219+ }, this.attribsToObject(token.attribs));
 220+
 221+ if (options.group in refGroups) {
 222+ var group = refGroups[options.group];
 223+ var listItems = $.map(group.refs, renderLine);
 224+ tokenCTX.token = [{
 225+ type: 'TAG',
 226+ name: 'ol',
 227+ attribs: [['class', 'references']]
 228+ }].concat(listItems, {type: 'ENDTAG', name: 'ol'});
 229+ } else {
 230+ tokenCTX.token = {
 231+ type: 'SELFCLOSINGTAG',
 232+ name: 'placeholder',
 233+ attribs: [['data-origNode', JSON.stringify(token)]]
 234+ };
 235+ }
 236+
 237+ return tokenCTX;
 238+};
 239+
 240+Cite.prototype.onEnd = function ( tokenCTX ) {
 241+ // XXX: Emit error messages if references tag was missing!
 242+ // Clean up
 243+ this.refGroups = {};
 244+ this.refTokens = [];
 245+ return tokenCTX;
 246+}
 247+
 248+if (typeof module == "object") {
 249+ module.exports.Cite = Cite;
 250+}
Property changes on: trunk/extensions/VisualEditor/modules/parser/ext.Cite.js
___________________________________________________________________
Added: svn:eol-style
1251 + native
Index: trunk/extensions/VisualEditor/modules/parser/mediawiki.TokenTransformDispatcher.js
@@ -25,8 +25,8 @@
2626 newline: [],
2727 comment: [],
2828 end: [], // eof
29 - martian: [] // none of the above
30 - // XXX: Add an any registration that always matches?
 29+ martian: [], // none of the above
 30+ any: [] // all tokens, before more specific handlers are run
3131 };
3232 this.reset();
3333 return this;
@@ -41,6 +41,7 @@
4242
4343 TokenTransformDispatcher.prototype.appendListener = function ( listener, type, name ) {
4444 if ( type === 'tag' ) {
 45+ name = name.toLowerCase();
4546 if ( $.isArray(this.transformers.tag.name) ) {
4647 this.transformers.tag[name].push(listener);
4748 } else {
@@ -53,6 +54,7 @@
5455
5556 TokenTransformDispatcher.prototype.prependListener = function ( listener, type, name ) {
5657 if ( type === 'tag' ) {
 58+ name = name.toLowerCase();
5759 if ( $.isArray(this.transformers.tag.name) ) {
5860 this.transformers.tag[name].unshift(listener);
5961 } else {
@@ -67,6 +69,7 @@
6870 var i = -1;
6971 var ts;
7072 if ( type === 'tag' ) {
 73+ name = name.toLowerCase();
7174 if ( $.isArray(this.transformers.tag.name) ) {
7275 ts = this.transformers.tag[name];
7376 i = ts.indexOf(listener);
@@ -102,7 +105,13 @@
103106 * @returns {TokenContext} Context with updated token and/or accum.
104107 */
105108 TokenTransformDispatcher.prototype._transformTagToken = function ( tokenCTX ) {
106 - var ts = this.transformers.tag[tokenCTX.token.name];
 109+ // prepend 'any' transformers
 110+ var ts = this.transformers.any;
 111+ var tagts = this.transformers.tag[tokenCTX.token.name.toLowerCase()];
 112+ if ( tagts ) {
 113+ ts = ts.concat(tagts);
 114+ }
 115+ //console.log(JSON.stringify(ts, null, 2));
107116 if ( ts ) {
108117 for (var i = 0, l = ts.length; i < l; i++ ) {
109118 // Transform token with side effects
@@ -123,8 +132,11 @@
124133 * @returns {TokenContext} Context with updated token and/or accum.
125134 */
126135 TokenTransformDispatcher.prototype._transformToken = function ( tokenCTX, ts ) {
 136+ // prepend 'any' transformers
 137+ ts = this.transformers.any.concat(ts);
127138 if ( ts ) {
128139 for (var i = 0, l = ts.length; i < l; i++ ) {
 140+ // Transform token with side effects
129141 tokenCTX = ts[i]( tokenCTX );
130142 if ( tokenCTX.token === null || $.isArray(tokenCTX.token) ) {
131143 break;
@@ -163,7 +175,6 @@
164176 tokenCTX.token = tokens[i];
165177 tokenCTX.pos = i;
166178 tokenCTX.accum = accum;
167 - var ts;
168179 switch(tokenCTX.token.type) {
169180 case 'TAG':
170181 case 'ENDTAG':
Index: trunk/extensions/VisualEditor/modules/parser/ext.core.QuoteTransformer.js
@@ -24,7 +24,7 @@
2525 }, 'newline' );
2626 dispatcher.appendListener( function (ctx) {
2727 return self.onQuote(ctx);
28 - }, 'tag', 'QUOTE' );
 28+ }, 'tag', 'mw-quote' );
2929 };
3030
3131 // Make a copy of the token context
Index: trunk/extensions/VisualEditor/modules/parser/pegTokenizer.pegjs.txt
@@ -755,7 +755,7 @@
756756 quote = "''" x:"'"* {
757757 return {
758758 type: 'TAG',
759 - name : 'QUOTE',
 759+ name : 'mw-quote', // Will be consumed in token transforms
760760 value: "''" + x.join('')
761761 }
762762 }

Status & tagging log