Index: trunk/extensions/VisualEditor/tests/parser/dumpReader.js |
— | — | @@ -4,6 +4,7 @@ |
5 | 5 | |
6 | 6 | function DumpReader() { |
7 | 7 | events.EventEmitter.call(this); |
| 8 | + this.makeParser(); |
8 | 9 | } |
9 | 10 | |
10 | 11 | util.inherits(DumpReader, events.EventEmitter); |
— | — | @@ -11,7 +12,8 @@ |
12 | 13 | /** |
13 | 14 | * @param {Stream} stream input stream to read XML from |
14 | 15 | */ |
15 | | -DumpReader.prototype.read = function(stream) { |
| 16 | +DumpReader.prototype.makeParser = function() { |
| 17 | + |
16 | 18 | var self = this; |
17 | 19 | var complete = false; |
18 | 20 | |
— | — | @@ -30,82 +32,77 @@ |
31 | 33 | boolNodes = flip(['minor', 'redirect']), |
32 | 34 | ignoreNodes = flip(['mediawiki', 'siteinfo', 'upload', 'thread']); |
33 | 35 | |
34 | | - var parser = new libxml.SaxPushParser(function(cb) { |
35 | | - cb.onStartElementNS(function(elem, attrs, prefix, uri, namespaces) { |
36 | | - if (elem in ignoreNodes) { |
37 | | - // ... |
38 | | - } else if (elem == 'page') { |
39 | | - stack = []; |
40 | | - workspace = {}; |
41 | | - } else if (elem == 'revision') { |
42 | | - stack.push(workspace); |
43 | | - workspace = { |
44 | | - page: workspace |
45 | | - }; |
46 | | - } else if (elem in textNodes || elem in boolNodes) { |
47 | | - buffer = ''; |
48 | | - } else { |
49 | | - stack.push(workspace); |
50 | | - workspace = {}; |
51 | | - } |
52 | | - }); |
| 36 | + var parser = new libxml.SaxPushParser(); |
| 37 | + this.parser = parser; |
| 38 | + parser.on('startElementNS', function(elem, attrs, prefix, uri, namespaces) { |
| 39 | + //console.warn( 'elem: ' + elem ); |
| 40 | + if (elem in ignoreNodes) { |
| 41 | + // ... |
| 42 | + } else if (elem == 'page') { |
| 43 | + //console.warn( 'starting page' ); |
| 44 | + stack = []; |
| 45 | + workspace = {}; |
| 46 | + } else if (elem == 'revision') { |
| 47 | + stack.push(workspace); |
| 48 | + workspace = { |
| 49 | + page: workspace |
| 50 | + }; |
| 51 | + } else if (elem in textNodes || elem in boolNodes) { |
| 52 | + buffer = ''; |
| 53 | + } else { |
| 54 | + stack.push(workspace); |
| 55 | + workspace = {}; |
| 56 | + } |
| 57 | + }); |
53 | 58 | |
54 | | - cb.onEndElementNS(function(elem, prefix, uri) { |
55 | | - // ping something! |
56 | | - if (elem == 'mediawiki') { |
57 | | - self.complete = true; |
58 | | - stream.pause(); |
59 | | - self.emit('end', {}); |
60 | | - } else if (elem == 'page') { |
61 | | - self.emit('page', workspace); |
62 | | - workspace = stack.pop(); |
63 | | - } else if (elem == 'revision') { |
64 | | - self.emit('revision', workspace); |
65 | | - workspace = stack.pop(); |
66 | | - } else if (elem in textNodes) { |
67 | | - workspace[elem] = buffer; |
68 | | - } else if (elem in boolNodes) { |
69 | | - workspace[elem] = true; |
70 | | - } else { |
71 | | - var current = workspace; |
72 | | - workspace = stack.pop(); |
73 | | - workspace[elem] = current; |
74 | | - } |
75 | | - }); |
76 | | - cb.onCharacters(function(chars) { |
77 | | - buffer += chars; |
78 | | - }); |
79 | | - cb.onCdata(function(cdata) { |
80 | | - buffer += cdata; |
81 | | - }); |
82 | | - cb.onEndDocument(function() { |
83 | | - // This doesn't seem to run...? |
| 59 | + parser.on( 'endElementNS', function(elem, prefix, uri) { |
| 60 | + // ping something! |
| 61 | + if (elem == 'mediawiki') { |
84 | 62 | self.complete = true; |
85 | | - stream.pause(); |
| 63 | + //stream.pause(); |
86 | 64 | self.emit('end', {}); |
87 | | - }) |
88 | | - cb.onError(function(err) { |
89 | | - self.emit('error', err); |
90 | | - // Should we.... stop reading now or what? |
91 | | - }); |
| 65 | + } else if (elem == 'page') { |
| 66 | + self.emit('page', workspace); |
| 67 | + workspace = stack.pop(); |
| 68 | + } else if (elem == 'revision') { |
| 69 | + self.emit('revision', workspace); |
| 70 | + workspace = stack.pop(); |
| 71 | + } else if (elem in textNodes) { |
| 72 | + workspace[elem] = buffer; |
| 73 | + } else if (elem in boolNodes) { |
| 74 | + workspace[elem] = true; |
| 75 | + } else { |
| 76 | + var current = workspace; |
| 77 | + workspace = stack.pop(); |
| 78 | + workspace[elem] = current; |
| 79 | + } |
| 80 | + }); |
92 | 81 | |
93 | | - // Now, start reading the file in :D |
94 | | - stream.on('data', function(buffer) { |
95 | | - parser.push(buffer); // @fixme does this want bytes or chars? |
96 | | - }); |
97 | | - stream.on('end', function() { |
98 | | - if (!complete) { |
99 | | - // uh-oh! |
100 | | - //self.emit('error', 'End of file before end of XML stream.'); |
101 | | - } |
102 | | - }); |
103 | | - stream.on('error', function(err) { |
104 | | - self.emit('error', err); |
105 | | - }); |
| 82 | + parser.on( 'characters', function(chars) { |
| 83 | + buffer += chars; |
106 | 84 | }); |
| 85 | + parser.on( 'cdata', function(cdata) { |
| 86 | + buffer += cdata; |
| 87 | + }); |
| 88 | + parser.on( 'endDocument', function() { |
| 89 | + // This doesn't seem to run...? |
| 90 | + self.complete = true; |
| 91 | + //stream.pause(); |
| 92 | + self.emit('end', {}); |
| 93 | + }); |
| 94 | + parser.on( 'error', function(err) { |
| 95 | + self.emit('error', err); |
| 96 | + // Should we.... stop reading now or what? |
| 97 | + }); |
| 98 | + |
107 | 99 | }; |
108 | 100 | |
| 101 | +DumpReader.prototype.push = function( chunk ) { |
| 102 | + //console.log( 'dr read' + chunk ); |
| 103 | + this.parser.push( chunk ); |
| 104 | +}; |
109 | 105 | |
| 106 | + |
110 | 107 | module.exports.DumpReader = DumpReader; |
111 | 108 | |
112 | 109 | if (module === require.main) { |
— | — | @@ -127,6 +124,7 @@ |
128 | 125 | }); |
129 | 126 | console.log('Reading!'); |
130 | 127 | process.stdin.setEncoding('utf8'); |
| 128 | + |
| 129 | + process.stdin.on('data', reader.push.bind(reader) ); |
131 | 130 | process.stdin.resume(); |
132 | | - reader.read(process.stdin); |
133 | 131 | } |