r64366 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r64365‎ | r64366 | r64367 >
Date:22:27, 29 March 2010
Author:daniel
Status:deferred
Tags:
Comment:
preparing WordSenseIndexer (incomplete)
Modified paths:
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/ImportApp.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/StreamProcessorApp.java (added) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/WordSenseIndexer.java (added) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/ImportApp.java
@@ -9,10 +9,8 @@
1010
1111 import de.brightbyte.application.Agenda;
1212 import de.brightbyte.application.Agenda.Monitor;
13 -import de.brightbyte.io.ConsoleIO;
1413 import de.brightbyte.io.Prompt;
1514 import de.brightbyte.util.PersistenceException;
16 -import de.brightbyte.wikiword.Corpus;
1715 import de.brightbyte.wikiword.StoreBackedApp;
1816 import de.brightbyte.wikiword.model.WikiWordConcept;
1917 import de.brightbyte.wikiword.store.WikiWordStore;
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/WordSenseIndexer.java
@@ -0,0 +1,74 @@
 2+package de.brightbyte.wikiword.extract;
 3+
 4+import java.io.IOException;
 5+
 6+import sun.net.dns.ResolverConfiguration.Options;
 7+
 8+import de.brightbyte.data.cursor.DataCursor;
 9+import de.brightbyte.data.cursor.DataSink;
 10+import de.brightbyte.io.ConsoleIO;
 11+import de.brightbyte.io.LineCursor;
 12+import de.brightbyte.io.OutputSink;
 13+import de.brightbyte.util.PersistenceException;
 14+import de.brightbyte.wikiword.analyzer.PhraseOccuranceSequence;
 15+import de.brightbyte.wikiword.analyzer.PlainTextAnalyzer;
 16+import de.brightbyte.wikiword.disambig.Disambiguator;
 17+import de.brightbyte.wikiword.disambig.SlidingCoherenceDisambiguator;
 18+import de.brightbyte.wikiword.disambig.StoredFeatureFetcher;
 19+import de.brightbyte.wikiword.disambig.StoredMeaningFetcher;
 20+import de.brightbyte.wikiword.model.LocalConcept;
 21+import de.brightbyte.wikiword.store.DatabaseConceptStores;
 22+import de.brightbyte.wikiword.store.FeatureStore;
 23+import de.brightbyte.wikiword.store.LocalConceptStore;
 24+import de.brightbyte.wikiword.store.WikiWordConceptStore;
 25+
 26+public class WordSenseIndexer extends StreamProcessorApp<String, String, WikiWordConceptStore> {
 27+ protected Disambiguator disambiguator;
 28+ protected PlainTextAnalyzer analyzer;
 29+ private int phraseLength;
 30+
 31+ public WordSenseIndexer(boolean allowGlobal, boolean allowLocal) {
 32+ super(allowGlobal, allowLocal);
 33+ }
 34+
 35+ @Override
 36+ protected DataSink<String> openSink() {
 37+ return new OutputSink(ConsoleIO.output); //FIXME: open stream as required
 38+ }
 39+
 40+ @Override
 41+ protected DataCursor<String> openCursor() {
 42+ return new LineCursor(ConsoleIO.newReader()); //FIXME: open stream as required
 43+ }
 44+
 45+ @Override
 46+ protected void createStores() throws IOException, PersistenceException {
 47+ conceptStore = DatabaseConceptStores.createConceptStore(getConfiguredDataSource(), getConfiguredDataset(), tweaks, true, true);
 48+ registerStore(conceptStore);
 49+ }
 50+
 51+ protected FeatureStore<LocalConcept, Integer> getFeatureStore() throws PersistenceException {
 52+ return conceptStore.getFeatureStore();
 53+ }
 54+
 55+ protected LocalConceptStore getLocalConceptStore() {
 56+ return (LocalConceptStore)(Object)conceptStore; //XXX: FUGLY! generic my ass.
 57+ }
 58+
 59+ protected void init() throws PersistenceException, InstantiationException {
 60+ StoredMeaningFetcher meaningFetcher = new StoredMeaningFetcher(getLocalConceptStore());
 61+ StoredFeatureFetcher<LocalConcept, Integer> featureFetcher = new StoredFeatureFetcher<LocalConcept, Integer>(getFeatureStore());
 62+ disambiguator = new SlidingCoherenceDisambiguator<Integer>( meaningFetcher, featureFetcher, true );
 63+
 64+ analyzer = PlainTextAnalyzer.getPlainTextAnalyzer(getCorpus(), tweaks);
 65+
 66+ phraseLength = args.getIntOption("phrase-length", tweaks.getTweak("wikiSenseIndexer.phraseLength", 6));
 67+ }
 68+
 69+ @Override
 70+ protected String process(String line) {
 71+ PhraseOccuranceSequence sequence = analyzer.extractPhrases(line, phraseLength);
 72+ return null;
 73+ }
 74+
 75+}
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/StreamProcessorApp.java
@@ -0,0 +1,54 @@
 2+package de.brightbyte.wikiword.extract;
 3+
 4+import de.brightbyte.data.cursor.DataCursor;
 5+import de.brightbyte.data.cursor.DataSink;
 6+import de.brightbyte.util.PersistenceException;
 7+import de.brightbyte.wikiword.StoreBackedApp;
 8+import de.brightbyte.wikiword.store.WikiWordConceptStoreBase;
 9+
 10+public abstract class StreamProcessorApp<I, O, S extends WikiWordConceptStoreBase> extends StoreBackedApp<S> {
 11+
 12+ protected DataCursor<I> cursor;
 13+ protected DataSink<O> sink;
 14+
 15+ public StreamProcessorApp(boolean allowGlobal, boolean allowLocal) {
 16+ super(allowGlobal, allowLocal);
 17+ }
 18+
 19+ @Override
 20+ public void run() throws Exception {
 21+ init();
 22+ open();
 23+
 24+ runTransfer(cursor);
 25+
 26+ close();
 27+ }
 28+
 29+ protected void open() {
 30+ cursor = openCursor();
 31+ sink = openSink();
 32+ }
 33+
 34+ protected abstract DataCursor<I> openCursor();
 35+ protected abstract DataSink<O> openSink();
 36+
 37+ protected void init() throws Exception {
 38+ // noop
 39+ }
 40+ protected void close() throws PersistenceException {
 41+ sink.close();
 42+ }
 43+
 44+ public void runTransfer(DataCursor<I> cursor) throws Exception {
 45+ I rec;
 46+ while ((rec = cursor.next()) != null) {
 47+ //TODO: progress tracker
 48+ O res = process(rec);
 49+ if (res!=null) sink.commit(res);
 50+ }
 51+ }
 52+
 53+ protected abstract O process(I rec);
 54+
 55+}
Property changes on: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/StreamProcessorApp.java
___________________________________________________________________
Name: svn:mergeinfo
156 +

Status & tagging log