r64459 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r64458‎ | r64459 | r64460 >
Date:22:25, 31 March 2010
Author:daniel
Status:deferred
Tags:
Comment:
StreamProcessorApp
Modified paths:
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/StreamProcessorApp.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/WordSenseIndexer.java (modified) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/WordSenseIndexer.java
@@ -1,6 +1,8 @@
22 package de.brightbyte.wikiword.extract;
33
 4+import java.io.FileNotFoundException;
45 import java.io.IOException;
 6+import java.io.UnsupportedEncodingException;
57 import java.text.ParseException;
68 import java.util.Collections;
79 import java.util.List;
@@ -11,6 +13,7 @@
1214 import de.brightbyte.data.measure.Measure;
1315 import de.brightbyte.io.ConsoleIO;
1416 import de.brightbyte.io.LineCursor;
 17+import de.brightbyte.io.LineSink;
1518 import de.brightbyte.io.OutputSink;
1619 import de.brightbyte.text.Chunker;
1720 import de.brightbyte.text.RegularExpressionChunker;
@@ -43,13 +46,21 @@
4447 }
4548
4649 @Override
47 - protected DataSink<String> openSink() {
48 - return new OutputSink(ConsoleIO.output); //FIXME: open stream as required
 50+ protected DataSink<String> openSink() throws PersistenceException {
 51+ try {
 52+ return new LineSink(getOutputWriter());
 53+ } catch (IOException e) {
 54+ throw new PersistenceException(e);
 55+ }
4956 }
5057
5158 @Override
52 - protected DataCursor<String> openCursor() {
53 - return new LineCursor(ConsoleIO.newReader()); //FIXME: open stream as required
 59+ protected DataCursor<String> openCursor() throws PersistenceException {
 60+ try {
 61+ return new LineCursor(getInputReader());
 62+ } catch (IOException e) {
 63+ throw new PersistenceException(e);
 64+ }
5465 }
5566
5667 @Override
@@ -98,7 +109,7 @@
99110 }
100111
101112 @Override
102 - protected String process(String line) throws PersistenceException, ParseException {
 113+ protected void process(String line) throws PersistenceException, ParseException {
103114 //TODO: logic for handling overlapping phrases in a PhraseOccuranceSequence
104115 /*
105116 PhraseOccuranceSequence sequence = analyzer.extractPhrases(line, phraseLength); //TODO: alternative tokenizer/splitter //TODO: split by sentence first.
@@ -113,7 +124,8 @@
114125 Disambiguator.Result<Term, LocalConcept> result = disambiguator.disambiguate(terms, null);
115126 if (flip) Collections.reverse(terms);
116127
117 - return assembleMeanings(terms, result);
 128+ String s = assembleMeanings(terms, result); //TODO: use proper TSV or something
 129+ commit(s);
118130 }
119131
120132 private String assembleMeanings(List<Term> terms, Result<Term, LocalConcept> result) {
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/StreamProcessorApp.java
@@ -1,20 +1,103 @@
22 package de.brightbyte.wikiword.extract;
33
 4+import java.io.BufferedOutputStream;
 5+import java.io.File;
 6+import java.io.FileNotFoundException;
 7+import java.io.FileOutputStream;
 8+import java.io.IOException;
 9+import java.io.InputStream;
 10+import java.io.InputStreamReader;
 11+import java.io.OutputStream;
 12+import java.io.OutputStreamWriter;
 13+import java.io.Reader;
 14+import java.io.UnsupportedEncodingException;
 15+import java.io.Writer;
 16+
417 import de.brightbyte.data.cursor.DataCursor;
518 import de.brightbyte.data.cursor.DataSink;
 19+import de.brightbyte.io.ConsoleIO;
620 import de.brightbyte.util.PersistenceException;
721 import de.brightbyte.wikiword.StoreBackedApp;
 22+import de.brightbyte.wikiword.builder.InputFileHelper;
823 import de.brightbyte.wikiword.store.WikiWordConceptStoreBase;
924
1025 public abstract class StreamProcessorApp<I, O, S extends WikiWordConceptStoreBase> extends StoreBackedApp<S> {
1126
12 - protected DataCursor<I> cursor;
13 - protected DataSink<O> sink;
 27+ protected DataCursor<? extends I> cursor;
 28+ protected DataSink<? super O> sink;
1429
 30+ protected InputFileHelper inputHelper;
 31+
1532 public StreamProcessorApp(boolean allowGlobal, boolean allowLocal) {
1633 super(allowGlobal, allowLocal);
1734 }
1835
 36+
 37+ protected File getOutputFile() {
 38+ if (outputFile==null) {
 39+ if (args.getParameterCount()>2) {
 40+ outputFile = new File(args.getParameter(2));
 41+ }
 42+ }
 43+ return outputFile;
 44+ }
 45+
 46+ protected String getOutputFileEncoding() {
 47+ return args.getStringOption("outputencoding", "UTF-8");
 48+ }
 49+
 50+ protected File outputFile;
 51+ protected Writer outputWriter;
 52+ protected OutputStream outputStream;
 53+ private InputStream inputStream;
 54+ private Reader inputReader;
 55+
 56+ protected Writer getOutputWriter() throws FileNotFoundException, UnsupportedEncodingException {
 57+ if (outputWriter==null) {
 58+ File f = getOutputFile();
 59+ if (f==null) outputWriter = ConsoleIO.writer;
 60+ else outputWriter = new OutputStreamWriter(getOutputStream(), getOutputFileEncoding());
 61+ }
 62+
 63+ return outputWriter;
 64+ }
 65+
 66+ protected OutputStream getOutputStream() throws FileNotFoundException {
 67+ if (outputStream==null) {
 68+ File f = getOutputFile();
 69+ if (f==null) outputStream = System.out;
 70+ else {
 71+ outputStream = new BufferedOutputStream(new FileOutputStream(f, args.isSet("append")));
 72+ info("Writing output to "+f);
 73+ }
 74+ }
 75+
 76+ return outputStream;
 77+ }
 78+
 79+ protected Reader getInputReader() throws IOException {
 80+ if (inputReader==null) {
 81+ File f = getOutputFile();
 82+ if (f==null) inputReader = ConsoleIO.newReader();
 83+ else inputReader = new InputStreamReader(getInputStream(), getOutputFileEncoding());
 84+ }
 85+
 86+ return inputReader;
 87+ }
 88+
 89+ protected InputStream getInputStream() throws IOException {
 90+ if (inputStream==null) {
 91+ File f = getOutputFile();
 92+ if (f==null) inputStream = System.in;
 93+ else {
 94+ inputStream = inputHelper.openFile(f);
 95+ info("Reading input from "+f);
 96+ }
 97+ }
 98+
 99+ return inputStream;
 100+ }
 101+
19102 @Override
20103 public void run() throws Exception {
21104 init();
@@ -25,13 +108,13 @@
26109 close();
27110 }
28111
29 - protected void open() {
 112+ protected void open() throws PersistenceException {
30113 cursor = openCursor();
31114 sink = openSink();
32115 }
33116
34 - protected abstract DataCursor<I> openCursor();
35 - protected abstract DataSink<O> openSink();
 117+ protected abstract DataCursor<? extends I> openCursor() throws PersistenceException;
 118+ protected abstract DataSink<? super O> openSink() throws PersistenceException;
36119
37120 protected void init() throws Exception {
38121 // noop
@@ -40,15 +123,18 @@
41124 sink.close();
42125 }
43126
44 - public void runTransfer(DataCursor<I> cursor) throws Exception {
 127+ public void runTransfer(DataCursor<? extends I> cursor) throws Exception {
45128 I rec;
46129 while ((rec = cursor.next()) != null) {
47130 //TODO: progress tracker
48 - O res = process(rec);
49 - if (res!=null) sink.commit(res);
 131+ process(rec);
50132 }
51133 }
52134
53 - protected abstract O process(I rec) throws Exception;
 135+ protected void commit(O rec) throws PersistenceException {
 136+ sink.commit(rec);
 137+ }
54138
 139+ protected abstract void process(I rec) throws Exception;
 140+
55141 }

Status & tagging log