r92222 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r92221‎ | r92222 | r92223 >
Date:00:13, 15 July 2011
Author:brion
Status:deferred
Tags:
Comment:
ParserPlayground CLI batch test: use web workers to scale parsing over up to 8 cores. Not ideal but seems to work for now. :D
Modified paths:
  • /trunk/extensions/ParserPlayground/tests/README (modified) (history)
  • /trunk/extensions/ParserPlayground/tests/dumpReader.js (modified) (history)
  • /trunk/extensions/ParserPlayground/tests/roundtrip-test.js (added) (history)
  • /trunk/extensions/ParserPlayground/tests/roundtrip.js (modified) (history)
  • /trunk/extensions/ParserPlayground/tests/worker.js (added) (history)

Diff [purge]

Index: trunk/extensions/ParserPlayground/tests/roundtrip.js
@@ -1,39 +1,9 @@
22 var fs = require('fs'),
33 jsDiff = require('diff'),
 4+ Worker = require('webworker').Worker,
45 DumpReader = require('./dumpReader.js').DumpReader;
56
6 -// Fetch up some of our wacky parser bits...
7 -
8 -var basePath = '../modules/';
9 -function _require(filename) {
10 - return require(basePath + filename);
11 -}
12 -
13 -function _import(filename, symbols) {
14 - var module = _require(filename);
15 - symbols.forEach(function(symbol) {
16 - global[symbol] = module[symbol];
17 - })
18 -}
19 -
20 -// For now most modules only need this for $.extend and $.each :)
21 -global.$ = require('jquery');
22 -
23 -// Local CommonJS-friendly libs
24 -global.PEG = _require('lib.pegjs.js');
25 -
26 -// Our code...
27 -_import('ext.parserPlayground.serializer.js', ['MWTreeSerializer']);
28 -_import('ext.parserPlayground.pegParser.js', ['PegParser']);
29 -
30 -// Preload the grammar file...
31 -PegParser.src = fs.readFileSync(basePath + 'pegParser.pegjs.txt', 'utf8');
32 -
33 -
347 function runTests() {
35 - var parser = new PegParser(),
36 - serializer = new MWTreeSerializer();
37 -
388 function compareTest(a, b, msg) {
399 if (a === b) {
4010 console.log('OK: ', msg);
@@ -45,23 +15,84 @@
4616 return false;
4717 }
4818 }
 19+
 20+ var state = {
 21+ doneReading: false,
 22+ revsIn: 0,
 23+ revsOut: 0
 24+ };
 25+ function checkState() {
 26+ var remaining = state.revsIn - state.revsOut;
 27+ console.log(remaining + ' in queue... ' + (state.revsOut + '/' + state.revsIn));
 28+ if (remaining == 0) {
 29+ console.log('are we done?', state.doneReading);
 30+ }
 31+ if (state.doneReading) {
 32+ if (remaining <= 0) {
 33+ console.log('done!');
 34+ process.exit(0);
 35+ }
 36+ } else {
 37+ if (remaining < queueLength && process.stdin.readable) {
 38+ process.stdin.resume();
 39+ }
 40+ }
 41+ }
 42+
 43+ var nWorkers = 8;
 44+ var queueLength = nWorkers * 2;
 45+ var workerDir = __dirname;
 46+ var workers = [];
 47+ workerJs = require('path').join(workerDir, 'worker.js');
 48+ for (var i = 0; i < nWorkers; i++) {
 49+ var worker = new Worker(workerJs);
 50+ worker.onerror = function(err) {
 51+ console.log('worker error', err);
 52+ process.exit(1);
 53+ };
 54+ worker.onclose = function() {
 55+ console.log('worker closed');
 56+ };
 57+ worker.onmessage = function(msg) {
 58+ var data = msg.data;
 59+ compareTest(data.expected, data.received, data.msg);
 60+ state.revsOut++;
 61+ checkState();
 62+ };
 63+ workers[i] = worker;
 64+ }
 65+ //var worker = require(workerJs);
4966 function roundTripTest(text, msg) {
50 - parser.parseToTree(text, function(tree, err) {
51 - if (err) throw new Error(err);
52 - serializer.treeToSource(tree, function(newText, err) {
53 - if (err) throw new Error(err);
54 - compareTest(text, newText, msg)
55 - })
56 - })
 67+ var worker = workers[state.revsIn % nWorkers];
 68+ state.revsIn++;
 69+ var remaining = state.revsIn - state.revsOut;
 70+ if (remaining >= queueLength) {
 71+ // Throttle the input until we catch up!
 72+ process.stdin.pause();
 73+ }
 74+ worker.postMessage({
 75+ action: 'roundTrip',
 76+ text: text,
 77+ msg: msg
 78+ });
5779 }
5880
 81+ // We need to tell the child process where its working directory is.
 82+ workers.forEach(function(worker) {
 83+ worker.postMessage({
 84+ action: 'init',
 85+ dir: workerDir
 86+ });
 87+ });
 88+
5989 roundTripTest('A plain single line paragraph.', 'single-line paragraph');
6090 //roundTripTest('A plain single line paragraph.\n\nA second paragraph after a blank.', 'two single-line paragraphs');
6191
6292 var reader = new DumpReader();
6393 reader.on('end', function() {
64 - console.log('done!');
65 - process.exit();
 94+ console.log('done reading!');
 95+ state.doneReading = true;
 96+ checkState();
6697 });
6798 reader.on('error', function(err) {
6899 console.log('error!', err);
Index: trunk/extensions/ParserPlayground/tests/dumpReader.js
@@ -53,7 +53,9 @@
5454 cb.onEndElementNS(function(elem, prefix, uri) {
5555 // ping something!
5656 if (elem == 'mediawiki') {
57 - // yay
 57+ self.complete = true;
 58+ stream.pause();
 59+ self.emit('end', {});
5860 } else if (elem == 'page') {
5961 self.emit('page', workspace);
6062 workspace = stack.pop();
@@ -77,8 +79,9 @@
7880 buffer += cdata;
7981 });
8082 cb.onEndDocument(function() {
 83+ // This doesn't seem to run...?
8184 self.complete = true;
82 - stream.close();
 85+ stream.pause();
8386 self.emit('end', {});
8487 })
8588 cb.onError(function(err) {
@@ -93,7 +96,7 @@
9497 stream.on('end', function() {
9598 if (!complete) {
9699 // uh-oh!
97 - self.emit('error', 'End of file before end of XML stream.');
 100+ //self.emit('error', 'End of file before end of XML stream.');
98101 }
99102 });
100103 stream.on('error', function(err) {
Index: trunk/extensions/ParserPlayground/tests/worker.js
@@ -0,0 +1,53 @@
 2+var path = require('path');
 3+
 4+// Fake a worker-like interface so we can test it in-process
 5+// workers in node's webworker module don't seem to report errors very well.
 6+if (typeof module == "object") {
 7+ module.exports = {
 8+ postMessage: function(data) {
 9+ var msg = {data: JSON.parse(JSON.stringify(data))};
 10+ setTimeout(function() {
 11+ onmessage(msg);
 12+ }, 0);
 13+ },
 14+ onmessage: function(msg) {}
 15+ };
 16+
 17+ if (typeof postMessage === 'undefined') {
 18+ postMessage = function(data) {
 19+ var msg = {data: JSON.parse(JSON.stringify(data))};
 20+ setTimeout(function() {
 21+ module.exports.onmessage(msg)
 22+ }, 0);
 23+ }
 24+ }
 25+}
 26+
 27+// The worker context for some reason doesn't let us adjust globals,
 28+// so farming the good stuff out to a module which can.
 29+var didInit = false;
 30+var myWorker = {
 31+ postMessage: postMessage,
 32+ onmessage: function(msg) {
 33+ var data = msg.data;
 34+ if (data.action == 'init') {
 35+ if (didInit) {
 36+ throw new Error('second init request');
 37+ }
 38+ // Running as a worker in node.js we have to set the working directory
 39+ // or we won't be able to find our local files.
 40+ //
 41+ // It's a bit annoying. ;)
 42+ process.chdir(data.dir);
 43+ require(path.join(data.dir, 'roundtrip-test.js')).init(myWorker);
 44+ didInit = true;
 45+ } else {
 46+ throw new Error('Must init first!');
 47+ }
 48+ }
 49+};
 50+
 51+onmessage = function(msg) {
 52+ return myWorker.onmessage(msg);
 53+}
 54+
Property changes on: trunk/extensions/ParserPlayground/tests/worker.js
___________________________________________________________________
Added: svn:eol-style
155 + native
Index: trunk/extensions/ParserPlayground/tests/README
@@ -6,4 +6,4 @@
77 * jquery
88 * diff
99 * libxmljs (requires native compilation)
10 -
 10+* webworker
Index: trunk/extensions/ParserPlayground/tests/roundtrip-test.js
@@ -0,0 +1,62 @@
 2+module.exports.init = function(worker) {
 3+ var fs = require('fs'),
 4+ path = require('path');
 5+
 6+ // Fetch up some of our wacky parser bits...
 7+
 8+ //var basePath = '../modules/';
 9+ var basePath = path.join(path.dirname(process.cwd()), 'modules');
 10+ function _require(filename) {
 11+ return require(path.join(basePath, filename));
 12+ }
 13+
 14+ function _import(filename, symbols) {
 15+ var module = _require(filename);
 16+ symbols.forEach(function(symbol) {
 17+ global[symbol] = module[symbol];
 18+ })
 19+ }
 20+
 21+ // For now most modules only need this for $.extend and $.each :)
 22+ global.$ = require('jquery');
 23+
 24+ // Local CommonJS-friendly libs
 25+ global.PEG = _require('lib.pegjs.js');
 26+
 27+ // Our code...
 28+ _import('ext.parserPlayground.serializer.js', ['MWTreeSerializer']);
 29+ _import('ext.parserPlayground.pegParser.js', ['PegParser']);
 30+
 31+ // Preload the grammar file...
 32+ PegParser.src = fs.readFileSync(path.join(basePath, 'pegParser.pegjs.txt'), 'utf8');
 33+
 34+ var parser = new PegParser(),
 35+ serializer = new MWTreeSerializer();
 36+
 37+ function sendResult(expected, received, msg) {
 38+ worker.postMessage({
 39+ expected: expected,
 40+ received: received,
 41+ msg: msg
 42+ });
 43+ }
 44+
 45+ roundTripTest = function(text, msg) {
 46+ parser.parseToTree(text, function(tree, err) {
 47+ if (err) throw new Error(err);
 48+ serializer.treeToSource(tree, function(newText, err) {
 49+ if (err) throw new Error(err);
 50+ sendResult(text, newText, msg);
 51+ })
 52+ })
 53+ }
 54+
 55+ worker.onmessage = function(msg) {
 56+ var data = msg.data;
 57+ if (data.action == 'roundTrip') {
 58+ roundTripTest(data.text, data.msg);
 59+ } else {
 60+ throw new Error('unknown action ' + data.action);
 61+ }
 62+ }
 63+};
Property changes on: trunk/extensions/ParserPlayground/tests/roundtrip-test.js
___________________________________________________________________
Added: svn:eol-style
164 + native

Status & tagging log