Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/ImportApp.java |
— | — | @@ -9,10 +9,8 @@ |
10 | 10 | |
11 | 11 | import de.brightbyte.application.Agenda; |
12 | 12 | import de.brightbyte.application.Agenda.Monitor; |
13 | | -import de.brightbyte.io.ConsoleIO; |
14 | 13 | import de.brightbyte.io.Prompt; |
15 | 14 | import de.brightbyte.util.PersistenceException; |
16 | | -import de.brightbyte.wikiword.Corpus; |
17 | 15 | import de.brightbyte.wikiword.StoreBackedApp; |
18 | 16 | import de.brightbyte.wikiword.model.WikiWordConcept; |
19 | 17 | import de.brightbyte.wikiword.store.WikiWordStore; |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/WordSenseIndexer.java |
— | — | @@ -0,0 +1,74 @@ |
| 2 | +package de.brightbyte.wikiword.extract; |
| 3 | + |
| 4 | +import java.io.IOException; |
| 5 | + |
| 6 | +import sun.net.dns.ResolverConfiguration.Options; |
| 7 | + |
| 8 | +import de.brightbyte.data.cursor.DataCursor; |
| 9 | +import de.brightbyte.data.cursor.DataSink; |
| 10 | +import de.brightbyte.io.ConsoleIO; |
| 11 | +import de.brightbyte.io.LineCursor; |
| 12 | +import de.brightbyte.io.OutputSink; |
| 13 | +import de.brightbyte.util.PersistenceException; |
| 14 | +import de.brightbyte.wikiword.analyzer.PhraseOccuranceSequence; |
| 15 | +import de.brightbyte.wikiword.analyzer.PlainTextAnalyzer; |
| 16 | +import de.brightbyte.wikiword.disambig.Disambiguator; |
| 17 | +import de.brightbyte.wikiword.disambig.SlidingCoherenceDisambiguator; |
| 18 | +import de.brightbyte.wikiword.disambig.StoredFeatureFetcher; |
| 19 | +import de.brightbyte.wikiword.disambig.StoredMeaningFetcher; |
| 20 | +import de.brightbyte.wikiword.model.LocalConcept; |
| 21 | +import de.brightbyte.wikiword.store.DatabaseConceptStores; |
| 22 | +import de.brightbyte.wikiword.store.FeatureStore; |
| 23 | +import de.brightbyte.wikiword.store.LocalConceptStore; |
| 24 | +import de.brightbyte.wikiword.store.WikiWordConceptStore; |
| 25 | + |
| 26 | +public class WordSenseIndexer extends StreamProcessorApp<String, String, WikiWordConceptStore> { |
| 27 | + protected Disambiguator disambiguator; |
| 28 | + protected PlainTextAnalyzer analyzer; |
| 29 | + private int phraseLength; |
| 30 | + |
| 31 | + public WordSenseIndexer(boolean allowGlobal, boolean allowLocal) { |
| 32 | + super(allowGlobal, allowLocal); |
| 33 | + } |
| 34 | + |
| 35 | + @Override |
| 36 | + protected DataSink<String> openSink() { |
| 37 | + return new OutputSink(ConsoleIO.output); //FIXME: open stream as required |
| 38 | + } |
| 39 | + |
| 40 | + @Override |
| 41 | + protected DataCursor<String> openCursor() { |
| 42 | + return new LineCursor(ConsoleIO.newReader()); //FIXME: open stream as required |
| 43 | + } |
| 44 | + |
| 45 | + @Override |
| 46 | + protected void createStores() throws IOException, PersistenceException { |
| 47 | + conceptStore = DatabaseConceptStores.createConceptStore(getConfiguredDataSource(), getConfiguredDataset(), tweaks, true, true); |
| 48 | + registerStore(conceptStore); |
| 49 | + } |
| 50 | + |
| 51 | + protected FeatureStore<LocalConcept, Integer> getFeatureStore() throws PersistenceException { |
| 52 | + return conceptStore.getFeatureStore(); |
| 53 | + } |
| 54 | + |
| 55 | + protected LocalConceptStore getLocalConceptStore() { |
| 56 | + return (LocalConceptStore)(Object)conceptStore; //XXX: FUGLY! generic my ass. |
| 57 | + } |
| 58 | + |
| 59 | + protected void init() throws PersistenceException, InstantiationException { |
| 60 | + StoredMeaningFetcher meaningFetcher = new StoredMeaningFetcher(getLocalConceptStore()); |
| 61 | + StoredFeatureFetcher<LocalConcept, Integer> featureFetcher = new StoredFeatureFetcher<LocalConcept, Integer>(getFeatureStore()); |
| 62 | + disambiguator = new SlidingCoherenceDisambiguator<Integer>( meaningFetcher, featureFetcher, true ); |
| 63 | + |
| 64 | + analyzer = PlainTextAnalyzer.getPlainTextAnalyzer(getCorpus(), tweaks); |
| 65 | + |
| 66 | + phraseLength = args.getIntOption("phrase-length", tweaks.getTweak("wikiSenseIndexer.phraseLength", 6)); |
| 67 | + } |
| 68 | + |
| 69 | + @Override |
| 70 | + protected String process(String line) { |
| 71 | + PhraseOccuranceSequence sequence = analyzer.extractPhrases(line, phraseLength); |
| 72 | + return null; |
| 73 | + } |
| 74 | + |
| 75 | +} |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/StreamProcessorApp.java |
— | — | @@ -0,0 +1,54 @@ |
| 2 | +package de.brightbyte.wikiword.extract; |
| 3 | + |
| 4 | +import de.brightbyte.data.cursor.DataCursor; |
| 5 | +import de.brightbyte.data.cursor.DataSink; |
| 6 | +import de.brightbyte.util.PersistenceException; |
| 7 | +import de.brightbyte.wikiword.StoreBackedApp; |
| 8 | +import de.brightbyte.wikiword.store.WikiWordConceptStoreBase; |
| 9 | + |
| 10 | +public abstract class StreamProcessorApp<I, O, S extends WikiWordConceptStoreBase> extends StoreBackedApp<S> { |
| 11 | + |
| 12 | + protected DataCursor<I> cursor; |
| 13 | + protected DataSink<O> sink; |
| 14 | + |
| 15 | + public StreamProcessorApp(boolean allowGlobal, boolean allowLocal) { |
| 16 | + super(allowGlobal, allowLocal); |
| 17 | + } |
| 18 | + |
| 19 | + @Override |
| 20 | + public void run() throws Exception { |
| 21 | + init(); |
| 22 | + open(); |
| 23 | + |
| 24 | + runTransfer(cursor); |
| 25 | + |
| 26 | + close(); |
| 27 | + } |
| 28 | + |
| 29 | + protected void open() { |
| 30 | + cursor = openCursor(); |
| 31 | + sink = openSink(); |
| 32 | + } |
| 33 | + |
| 34 | + protected abstract DataCursor<I> openCursor(); |
| 35 | + protected abstract DataSink<O> openSink(); |
| 36 | + |
| 37 | + protected void init() throws Exception { |
| 38 | + // noop |
| 39 | + } |
| 40 | + protected void close() throws PersistenceException { |
| 41 | + sink.close(); |
| 42 | + } |
| 43 | + |
| 44 | + public void runTransfer(DataCursor<I> cursor) throws Exception { |
| 45 | + I rec; |
| 46 | + while ((rec = cursor.next()) != null) { |
| 47 | + //TODO: progress tracker |
| 48 | + O res = process(rec); |
| 49 | + if (res!=null) sink.commit(res); |
| 50 | + } |
| 51 | + } |
| 52 | + |
| 53 | + protected abstract O process(I rec); |
| 54 | + |
| 55 | +} |
Property changes on: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/StreamProcessorApp.java |
___________________________________________________________________ |
Name: svn:mergeinfo |
1 | 56 | + |