r64257 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r64256‎ | r64257 | r64258 >
Date:15:28, 27 March 2010
Author:daniel
Status:deferred
Tags:
Comment:
SlidingCoherenceDisambiguator
Modified paths:
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/AbstractDisambiguator.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/PopularityDisambiguator.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java (added) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/StoredMeaningFetcher.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/WindowCoherenceDisambiguator.java (deleted) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/model/WikiWordConcept.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/model/WikiWordReference.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/query/QueryConsole.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/DatabaseFeatureStore.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/DatabaseLocalConceptStore.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/DatabaseWikiWordConceptStore.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/LocalConceptStore.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/WikiWordConceptStore.java (modified) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/WindowCoherenceDisambiguator.java
@@ -1,79 +0,0 @@
2 -package de.brightbyte.wikiword.disambig;
3 -
4 -import java.util.Collections;
5 -import java.util.HashMap;
6 -import java.util.List;
7 -import java.util.Map;
8 -
9 -import de.brightbyte.data.LabeledMatrix;
10 -import de.brightbyte.data.LabeledVector;
11 -import de.brightbyte.data.MapLabeledMatrix;
12 -import de.brightbyte.data.measure.Measure;
13 -import de.brightbyte.data.measure.Similarity;
14 -import de.brightbyte.util.PersistenceException;
15 -import de.brightbyte.wikiword.model.LocalConcept;
16 -
17 -public class WindowCoherenceDisambiguator<K> extends CoherenceDisambiguator<K> {
18 -
19 - protected int window = 2;
20 -
21 - public WindowCoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, K> featureFetcher, Measure<LocalConcept> popularityMeasure, Similarity<LabeledVector<K>> sim, int window) {
22 - super(meaningFetcher, featureFetcher, popularityMeasure, sim);
23 -
24 - this.window = window;
25 - }
26 -
27 - /* (non-Javadoc)
28 - * @see de.brightbyte.wikiword.disambig.Disambiguator#disambiguate(java.util.List)
29 - */
30 - public Result disambiguate(List<String> terms, Map<String, List<LocalConcept>> meanings) throws PersistenceException {
31 - Map<String, LocalConcept> disambig = new HashMap<String, LocalConcept>(meanings.size());
32 -
33 - LabeledMatrix<LocalConcept, LocalConcept> similarities = new MapLabeledMatrix<LocalConcept, LocalConcept>(true);
34 - FeatureCache<LocalConcept, K> features = new FeatureCache<LocalConcept, K>(featureFetcher); //TODO: keep a chain of n caches, resulting in LRU logic.
35 -
36 - for (int i=0; i<terms.size(); i++) {
37 - int from = i-window+1;
38 - int to = i+1;
39 -
40 - if (from<0) from = 0;
41 - if (to>terms.size()) to = terms.size();
42 -
43 - String t = terms.get(i);
44 - LocalConcept m;
45 -
46 - if (to-from < 2) {
47 - Result r = popularityDisambiguator.disambiguate(terms.subList(from, to), meanings);
48 - m = (LocalConcept)r.getMeanings().get(t); //UGLY cast
49 - } else {
50 - List<Map<String, LocalConcept>> interpretations = getInterpretations(from, to, terms, disambig, meanings);
51 - Result r = getBestInterpretation(terms, meanings, interpretations, similarities, features);
52 - m = (LocalConcept)r.getMeanings().get(t); //UGLY cast
53 - }
54 -
55 - disambig.put(t, m);
56 - }
57 -
58 - return getScore(disambig, similarities, features); //FIXME: this is unnecessarily expensive, we usually don't need the scores this calculates.
59 - }
60 -
61 - protected List<Map<String, LocalConcept>> getInterpretations(int from, int to, List<String> terms, Map<String, LocalConcept> known, Map<String, List<LocalConcept>> meanings) {
62 - Map<String, List<LocalConcept>> mset = new HashMap<String, List<LocalConcept>>();
63 -
64 - if (to>terms.size()) to = terms.size();
65 -
66 - for (int i=from; i<to; i++) {
67 - List<LocalConcept> m;
68 -
69 - String t = terms.get(i);
70 - LocalConcept c = known.get(t);
71 -
72 - if (c!=null) m = Collections.singletonList(c);
73 - else m = meanings.get(t);
74 -
75 - mset.put(t, m);
76 - }
77 -
78 - return getInterpretations(terms.subList(from, to), mset);
79 - }
80 -}
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java
@@ -0,0 +1,109 @@
 2+package de.brightbyte.wikiword.disambig;
 3+
 4+import java.util.ArrayList;
 5+import java.util.Collections;
 6+import java.util.HashMap;
 7+import java.util.List;
 8+import java.util.Map;
 9+
 10+import de.brightbyte.data.LabeledMatrix;
 11+import de.brightbyte.data.LabeledVector;
 12+import de.brightbyte.data.MapLabeledMatrix;
 13+import de.brightbyte.data.measure.CosineVectorSimilarity;
 14+import de.brightbyte.data.measure.Measure;
 15+import de.brightbyte.data.measure.ScalarVectorSimilarity;
 16+import de.brightbyte.data.measure.Similarity;
 17+import de.brightbyte.util.PersistenceException;
 18+import de.brightbyte.wikiword.model.LocalConcept;
 19+import de.brightbyte.wikiword.model.WikiWordRanking;
 20+
 21+public class SlidingCoherenceDisambiguator<K> extends CoherenceDisambiguator<K> {
 22+
 23+ protected int window ;
 24+
 25+ public SlidingCoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, K> featureFetcher, boolean featuresAreNormalized) {
 26+ this(meaningFetcher, featureFetcher, WikiWordRanking.theCardinality,
 27+ featuresAreNormalized ? ScalarVectorSimilarity.<K>getInstance() : CosineVectorSimilarity.<K>getInstance(), //if pre-normalized, use scalar to calc cosin
 28+ 5);
 29+ }
 30+
 31+ public SlidingCoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, K> featureFetcher, Measure<WikiWordRanking> popularityMeasure, Similarity<LabeledVector<K>> sim, int window) {
 32+ super(meaningFetcher, featureFetcher, popularityMeasure, sim);
 33+
 34+ this.window = window;
 35+ }
 36+
 37+ /* (non-Javadoc)
 38+ * @see de.brightbyte.wikiword.disambig.Disambiguator#disambiguate(java.util.List)
 39+ */
 40+ public Result disambiguate(List<String> terms, Map<String, List<LocalConcept>> meanings) throws PersistenceException {
 41+ if (window < 2 || terms.size()<2 || meanings.size()<2)
 42+ return popularityDisambiguator.disambiguate(terms, meanings);
 43+
 44+ //CAVEAT: because the map disambig can contain only one meaning per term, the same term can not occur with two meanings within the same term sequence.
 45+
 46+ Map<String, LocalConcept> disambig = new HashMap<String, LocalConcept>(meanings.size());
 47+
 48+ LabeledMatrix<LocalConcept, LocalConcept> similarities = new MapLabeledMatrix<LocalConcept, LocalConcept>(true);
 49+ FeatureCache<LocalConcept, K> features = new FeatureCache<LocalConcept, K>(featureFetcher); //TODO: keep a chain of n caches, resulting in LRU logic.
 50+
 51+ for (int i= window; ; i++) {
 52+ int from = i-window;
 53+ int to = i+1;
 54+
 55+ if (from<0) from = 0;
 56+ if (to>terms.size()) to = terms.size();
 57+
 58+ Result r ;
 59+
 60+ if (to-from < 2) {
 61+ r = popularityDisambiguator.disambiguate(terms.subList(from, to), meanings);
 62+ } else {
 63+ List<Map<String, LocalConcept>> interpretations = getInterpretations(from, to, terms, disambig, meanings);
 64+ r = getBestInterpretation(terms, meanings, interpretations, similarities, features);
 65+ }
 66+
 67+ for (int j=from; j<to; j++) {
 68+ String t = terms.get(j);
 69+ if (disambig.containsKey(t)) continue;
 70+
 71+ LocalConcept m;
 72+
 73+ m = (LocalConcept)r.getMeanings().get(t); //UGLY cast
 74+ if (m!=null) disambig.put(t, m);
 75+ }
 76+
 77+ if (to+1>terms.size()) break;
 78+ }
 79+
 80+ return getScore(disambig, similarities, features); //FIXME: this is unnecessarily expensive, we usually don't need the scores this calculates.
 81+ }
 82+
 83+ protected List<Map<String, LocalConcept>> getInterpretations(int from, int to, List<String> terms, Map<String, LocalConcept> known, Map<String, List<LocalConcept>> meanings) {
 84+ //strip out all terms with no known meaning
 85+ if (meanings.keySet().size() != terms.size()) {
 86+ List<String> t = new ArrayList<String>(terms.size());
 87+ t.addAll(terms);
 88+ t.retainAll(meanings.keySet());
 89+ terms = t;
 90+ }
 91+
 92+ Map<String, List<LocalConcept>> mset = new HashMap<String, List<LocalConcept>>();
 93+
 94+ if (to>terms.size()) to = terms.size();
 95+
 96+ for (int i=from; i<to; i++) {
 97+ List<LocalConcept> m;
 98+
 99+ String t = terms.get(i);
 100+ LocalConcept c = known.get(t);
 101+
 102+ if (c!=null) m = Collections.singletonList(c);
 103+ else m = meanings.get(t);
 104+
 105+ mset.put(t, m);
 106+ }
 107+
 108+ return getInterpretations(terms.subList(from, to), mset);
 109+ }
 110+}
Property changes on: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java
___________________________________________________________________
Name: svn:mergeinfo
1111 +
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/StoredMeaningFetcher.java
@@ -4,20 +4,27 @@
55
66 import de.brightbyte.data.cursor.DataSet;
77 import de.brightbyte.util.PersistenceException;
 8+import de.brightbyte.wikiword.ConceptType;
89 import de.brightbyte.wikiword.model.LocalConcept;
910 import de.brightbyte.wikiword.store.LocalConceptStore;
1011
1112 public class StoredMeaningFetcher implements MeaningFetcher<LocalConcept> {
1213 protected LocalConceptStore store;
 14+ protected ConceptType type;
1315
1416 public StoredMeaningFetcher(LocalConceptStore store) {
 17+ this(store, null);
 18+ }
 19+
 20+ public StoredMeaningFetcher(LocalConceptStore store, ConceptType type) {
1521 if (store==null) throw new NullPointerException();
1622
1723 this.store = store;
 24+ this.type = type;
1825 }
1926
2027 public List<LocalConcept> getMeanings(String term) throws PersistenceException {
21 - DataSet<LocalConcept> m = store.getMeanings(term); //FIXME: filter/cut-off rules, sort order! //XXX: relevance value?
 28+ DataSet<LocalConcept> m = store.getMeanings(term, type); //FIXME: filter/cut-off rules, sort order! //XXX: relevance value?
2229 return m.load();
2330 }
2431
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/PopularityDisambiguator.java
@@ -8,17 +8,22 @@
99 import de.brightbyte.data.measure.Measure;
1010 import de.brightbyte.data.measure.Measure.Comparator;
1111 import de.brightbyte.wikiword.model.LocalConcept;
 12+import de.brightbyte.wikiword.model.WikiWordRanking;
1213
1314 public class PopularityDisambiguator extends AbstractDisambiguator {
1415
15 - protected Measure<LocalConcept> popularityMeasure;
16 - protected Comparator<LocalConcept> popularityComparator;
 16+ protected Measure<WikiWordRanking> popularityMeasure;
 17+ protected Comparator<WikiWordRanking> popularityComparator;
1718
18 - public PopularityDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, Measure<LocalConcept> popularityMeasure) {
 19+ public PopularityDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher) {
 20+ this(meaningFetcher, WikiWordRanking.theCardinality);
 21+ }
 22+
 23+ public PopularityDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, Measure<WikiWordRanking> popularityMeasure) {
1924 super(meaningFetcher);
2025
2126 this.popularityMeasure = popularityMeasure;
22 - this.popularityComparator = new Measure.Comparator<LocalConcept>(popularityMeasure, true);
 27+ this.popularityComparator = new Measure.Comparator<WikiWordRanking>(popularityMeasure, true);
2328 }
2429
2530 public Result disambiguate(List<String> terms, Map<String, List<LocalConcept>> meanings) {
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java
@@ -9,23 +9,32 @@
1010 import de.brightbyte.data.LabeledMatrix;
1111 import de.brightbyte.data.LabeledVector;
1212 import de.brightbyte.data.MapLabeledMatrix;
 13+import de.brightbyte.data.measure.CosineVectorSimilarity;
1314 import de.brightbyte.data.measure.Measure;
 15+import de.brightbyte.data.measure.ScalarVectorSimilarity;
1416 import de.brightbyte.data.measure.Similarity;
1517 import de.brightbyte.util.PersistenceException;
1618 import de.brightbyte.wikiword.model.ConceptFeatures;
1719 import de.brightbyte.wikiword.model.LocalConcept;
 20+import de.brightbyte.wikiword.model.WikiWordRanking;
1821
1922 public class CoherenceDisambiguator<K> extends AbstractDisambiguator {
2023
2124 protected int minPopularity = 2; //FIXME: use complex cutoff specifier!
22 - protected double scoreThreshold = 0.002;
23 - protected double popularityBias = 0.01;
 25+ protected double scoreThreshold = 0.002; //FIXME: magic number
 26+ protected double popularityBias = 0.01; //FIXME: magic number
 27+
2428 protected Similarity<LabeledVector<K>> similarityMeasure;
2529 protected FeatureFetcher<LocalConcept, K> featureFetcher;
26 - protected Measure<LocalConcept> popularityMeasure;
 30+ protected Measure<WikiWordRanking> popularityMeasure;
2731 protected PopularityDisambiguator popularityDisambiguator;
2832
29 - public CoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, K> featureFetcher, Measure<LocalConcept> popularityMeasure, Similarity<LabeledVector<K>> sim) {
 33+ public CoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, K> featureFetcher, boolean featuresAreNormalized) {
 34+ this(meaningFetcher, featureFetcher, WikiWordRanking.theCardinality,
 35+ featuresAreNormalized ? ScalarVectorSimilarity.<K>getInstance() : CosineVectorSimilarity.<K>getInstance()); //if pre-normalized, use scalar to calc cosin
 36+ }
 37+
 38+ public CoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, K> featureFetcher, Measure<WikiWordRanking> popularityMeasure, Similarity<LabeledVector<K>> sim) {
3039 super(meaningFetcher);
3140
3241 if (popularityMeasure==null) throw new NullPointerException();
@@ -83,9 +92,8 @@
8493 * @see de.brightbyte.wikiword.disambig.Disambiguator#disambiguate(java.util.List)
8594 */
8695 public Result disambiguate(List<String> terms, Map<String, List<LocalConcept>> meanings) throws PersistenceException {
87 - if (meanings.size()==1) {
88 - return popularityDisambiguator.disambiguate(terms, meanings);
89 - }
 96+ if (terms.size()<2 || meanings.size()<2)
 97+ return popularityDisambiguator.disambiguate(terms, meanings);
9098
9199 LabeledMatrix<LocalConcept, LocalConcept> similarities = new MapLabeledMatrix<LocalConcept, LocalConcept>(true);
92100 FeatureCache<LocalConcept, K> features = new FeatureCache<LocalConcept, K>(featureFetcher); //TODO: keep a chain of n caches, resulting in LRU logic.
@@ -136,10 +144,7 @@
137145
138146 protected List<Map<String, LocalConcept>> getInterpretations(List<String> terms, Map<String, List<LocalConcept>> meanings) {
139147 if (terms.size()==0) {
140 - List<Map<String, LocalConcept>> combinations = new ArrayList<Map<String, LocalConcept>>();
141 - Map<String, LocalConcept> e = new HashMap<String, LocalConcept>();
142 - combinations.add(e);
143 - return combinations;
 148+ return Collections.singletonList(Collections.<String, LocalConcept>emptyMap());
144149 }
145150
146151 String t = terms.get(0);
@@ -153,6 +158,9 @@
154159
155160 for (Map<String, LocalConcept> be: base) {
156161 for (LocalConcept c: m) {
 162+ double p = popularityMeasure.measure(c);
 163+ if (p<minPopularity) continue;
 164+
157165 Map<String, LocalConcept> e = new HashMap<String, LocalConcept>();
158166 e.putAll(be);
159167 e.put(t, c);
@@ -165,7 +173,7 @@
166174 return interpretations;
167175 }
168176
169 - protected Result getScore(Map<String, LocalConcept> interp, LabeledMatrix<LocalConcept, LocalConcept> similarities, FeatureCache features) throws PersistenceException {
 177+ protected Result getScore(Map<String, LocalConcept> interp, LabeledMatrix<LocalConcept, LocalConcept> similarities, FeatureCache<LocalConcept, K> features) throws PersistenceException {
170178 double sim = 0;
171179 double pop = 0;
172180
@@ -190,6 +198,13 @@
191199 ConceptFeatures<LocalConcept, K> fa = features.getFeatures(a);
192200 ConceptFeatures<LocalConcept, K> fb = features.getFeatures(b);
193201
 202+ //force relevance/cardinality to the figures from the meaning lookup
 203+ //not strictly necessary, but nice to keep it consistent.
 204+ fa.getConceptReference().setCardinality(a.getCardinality());
 205+ fa.getConceptReference().setRelevance(a.getRelevance());
 206+ fb.getConceptReference().setCardinality(b.getCardinality());
 207+ fb.getConceptReference().setRelevance(b.getRelevance());
 208+
194209 d = similarityMeasure.similarity(fa.getFeatureVector(), fb.getFeatureVector());
195210 similarities.set(a, b, d);
196211 }
@@ -197,12 +212,12 @@
198213
199214 if (d<0) throw new IllegalArgumentException("encountered negative similarity score ("+d+") for "+a+" / "+b);
200215 sim += d;
201 - n ++; //should add up to combo.size*(combo.size()-1)/2, according to Gauss
 216+ n ++; //should add up to interp.size*(combo.size()-1)/2, according to Gauss
202217 }
203218
204 - int card = a.getCardinality(); //XXX: this may be local cardinality (indegree), we want the frequency of the meaning-assignment!
205 - if (card<=0) card= 1;
206 - pop += card;
 219+ double p = popularityMeasure.measure(a);
 220+ if (p<1) p= 1;
 221+ pop += p;
207222 c ++;
208223 }
209224
@@ -210,7 +225,7 @@
211226 sim = sim / n;
212227 pop = pop / c;
213228
214 - double popf = 1 - 1/(Math.sqrt(pop)+1); //converge against 1
 229+ double popf = 1 - 1/(Math.sqrt(pop)+1); //converge against 1 //XXX: black voodoo magic ad hoc formula with no deeper meaing.
215230
216231 double score = popf * popularityBias + sim * ( 1 - popularityBias );
217232 return new Result(interp, score, sim, pop);
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/AbstractDisambiguator.java
@@ -1,5 +1,6 @@
22 package de.brightbyte.wikiword.disambig;
33
 4+import java.util.ArrayList;
45 import java.util.HashMap;
56 import java.util.List;
67 import java.util.Map;
@@ -24,7 +25,7 @@
2526
2627 for (String t: terms) {
2728 List<LocalConcept> m = meaningFetcher.getMeanings(t);
28 - meanings.put(t, m);
 29+ if (m!=null && m.size()>0) meanings.put(t, m);
2930 }
3031
3132 return meanings;
@@ -32,7 +33,6 @@
3334
3435 public Result disambiguate(List<String> terms) throws PersistenceException {
3536 Map<String, List<LocalConcept>> meanings = fetchMeanings(terms);
36 -
3737 return disambiguate(terms, meanings);
3838 }
3939
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/DatabaseLocalConceptStore.java
@@ -119,10 +119,12 @@
120120 */
121121 }
122122
123 - protected String meaningWhere(String term) {
124 - return " JOIN "+meaningTable.getSQLName()+" as M ON C.id = M.concept " +
125 - " WHERE M.term_text = "+database.quoteString(term)+" " +
126 - " ORDER BY freq DESC";
 123+ protected String meaningWhere(String term, ConceptType t) {
 124+ String sql = " JOIN "+meaningTable.getSQLName()+" as M ON C.id = M.concept ";
 125+ sql += " WHERE M.term_text = "+database.quoteString(term)+" ";
 126+ if (t!=null) sql += " AND C.type = "+t.getCode()+" ";
 127+ sql += " ORDER BY freq DESC";
 128+ return sql;
127129 }
128130
129131 @Override
@@ -143,10 +145,14 @@
144146 return corpus;
145147 }
146148
147 - public DataSet<LocalConceptReference> listMeanings(String term)
 149+ public DataSet<LocalConceptReference> listMeanings(String term) throws PersistenceException {
 150+ return this.listMeanings(term, null);
 151+ }
 152+
 153+ public DataSet<LocalConceptReference> listMeanings(String term, ConceptType t)
148154 throws PersistenceException {
149155
150 - String sql = referenceSelect("M.freq") + meaningWhere(term);
 156+ String sql = referenceSelect("M.freq") + meaningWhere(term, t);
151157
152158 return new QueryDataSet<LocalConceptReference>(database, getRowReferenceFactory(), "listMeanings", sql, false);
153159 }
@@ -159,6 +165,10 @@
160166 return ((DatabaseLocalConceptInfoStore)getConceptInfoStore()).getMeanings(term);
161167 }
162168
 169+ public DataSet<LocalConcept> getMeanings(String term, ConceptType t) throws PersistenceException {
 170+ return ((DatabaseLocalConceptInfoStore)getConceptInfoStore()).getMeanings(term, t);
 171+ }
 172+
163173 public TermReference pickRandomTerm(int top) throws PersistenceException {
164174 return ((LocalStatisticsStore<LocalConcept, LocalConceptReference>)getStatisticsStore()).pickRandomTerm(top);
165175 }
@@ -313,10 +323,16 @@
314324 }
315325
316326
317 - public DataSet<LocalConcept> getMeanings(String term)
 327+ public DataSet<LocalConcept> getMeanings(String term)
318328 throws PersistenceException {
 329+
 330+ return getMeanings(term, null);
 331+ }
319332
320 - String sql = conceptSelect("M.freq") + meaningWhere(term);
 333+ public DataSet<LocalConcept> getMeanings(String term, ConceptType t)
 334+ throws PersistenceException {
 335+
 336+ String sql = conceptSelect("M.freq") + meaningWhere(term, t);
321337
322338 return new QueryDataSet<LocalConcept>(database, new ConceptFactory(), "getMeanins", sql, false);
323339 }
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/DatabaseWikiWordConceptStore.java
@@ -279,6 +279,10 @@
280280 }
281281 }
282282
 283+ public FeatureStore<T, Integer> getFeatureStore() throws PersistenceException {
 284+ return getProximityStore();
 285+ }
 286+
283287 public ProximityStore<T, R, Integer> getProximityStore() throws PersistenceException {
284288 try {
285289 if (proximityStore==null) proximityStore = newProximityStore();
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/LocalConceptStore.java
@@ -2,6 +2,7 @@
33
44 import de.brightbyte.data.cursor.DataSet;
55 import de.brightbyte.util.PersistenceException;
 6+import de.brightbyte.wikiword.ConceptType;
67 import de.brightbyte.wikiword.model.LocalConcept;
78 import de.brightbyte.wikiword.model.LocalConceptReference;
89 import de.brightbyte.wikiword.model.TermReference;
@@ -17,9 +18,13 @@
1819 public abstract DataSet<ConceptReference> getNarrowerConcepts() throws PersistenceException;
1920 */
2021
21 - //TODO: relevance limit? order?
 22+ //TODO: relevance limit? order? filter?
 23+ public abstract DataSet<LocalConceptReference> listMeanings(String term, ConceptType t) throws PersistenceException;
 24+
2225 public abstract DataSet<LocalConceptReference> listMeanings(String term) throws PersistenceException;
2326
 27+ public abstract DataSet<LocalConcept> getMeanings(String term, ConceptType t) throws PersistenceException;
 28+
2429 public abstract DataSet<LocalConcept> getMeanings(String term) throws PersistenceException;
2530
2631 public int getNumberOfTerms() throws PersistenceException;
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/WikiWordConceptStore.java
@@ -15,6 +15,7 @@
1616
1717 public StatisticsStore getStatisticsStore() throws PersistenceException;
1818 public ConceptInfoStore<T> getConceptInfoStore() throws PersistenceException;
 19+ public FeatureStore<T, Integer> getFeatureStore() throws PersistenceException;
1920 public ProximityStore<T, R, Integer> getProximityStore() throws PersistenceException;
2021
2122 public T getConcept(int id) throws PersistenceException;
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/DatabaseFeatureStore.java
@@ -148,13 +148,15 @@
149149 String conceptField, String nameField, String cardinalityField, String relevanceField,
150150 String keyField, String valueField) throws PersistenceException {
151151 try {
152 - LabeledVector<Integer> v = readVector(rs, conceptField, keyField, valueField, new MapLabeledVector<Integer>());
153 -
 152+ rs.next(); //TODO: return what iof this fails??
154153 int id = DatabaseUtil.asInt(rs.getObject(conceptField));
155154 String n = nameField == null ? null : DatabaseUtil.asString(rs.getObject(nameField));
156155 int c = cardinalityField == null ? 1 : DatabaseUtil.asInt(rs.getObject(cardinalityField));
157156 double r = relevanceField == null ? 1 : DatabaseUtil.asDouble(rs.getObject(relevanceField));
 157+ rs.previous();
158158
 159+ LabeledVector<Integer> v = readVector(rs, conceptField, keyField, valueField, new MapLabeledVector<Integer>());
 160+
159161 R ref = referenceFactory.newInstance(id, n, c, r);
160162 return new ConceptFeatures<T, Integer>(ref, v);
161163 } catch (SQLException e) {
@@ -174,7 +176,7 @@
175177 Object c = rs.getObject(conceptField);
176178 if (concept<0) concept = DatabaseUtil.asInt(c);
177179 else if (concept!=DatabaseUtil.asInt(c)) {
178 - rs.previous(); //push back
 180+ if (!rs.previous()) throw new RuntimeException ("push-back failed on result set! "+rs.getClass()); //push back
179181 break;
180182 }
181183 }
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/model/WikiWordConcept.java
@@ -17,6 +17,7 @@
1818
1919 public WikiWordConcept(WikiWordConceptReference reference, DatasetIdentifier dataset, ConceptType type) {
2020 if (type==null) throw new NullPointerException();
 21+ if (reference==null) throw new NullPointerException();
2122
2223 this.dataset = dataset;
2324 this.type = type;
@@ -44,17 +45,25 @@
4546 }
4647
4748 public int getCardinality() {
48 - return reference==null ? 1 : reference.getCardinality();
 49+ return reference.getCardinality();
4950 }
5051
5152 public double getRelevance() {
52 - return reference==null ? 1 : reference.getRelevance();
 53+ return reference.getRelevance();
5354 }
5455
5556 public boolean hasRanking() {
56 - return reference != null && ( reference.getCardinality()>0 || reference.getRelevance()>0 );
 57+ return ( reference.getCardinality()>0 || reference.getRelevance()>0 );
5758 }
5859
 60+ public void setCardinality(int cardinality) {
 61+ reference.setCardinality(cardinality);
 62+ }
 63+
 64+ public void setRelevance(double relevance) {
 65+ reference.setRelevance(relevance);
 66+ }
 67+
5968 @Override
6069 public int hashCode() {
6170 return reference.hashCode();
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/model/WikiWordReference.java
@@ -39,8 +39,8 @@
4040 protected final int id;
4141 protected final String name;
4242
43 - protected final int cardinality;
44 - protected final double relevance;
 43+ protected int cardinality;
 44+ protected double relevance;
4545
4646 public WikiWordReference(final int id, final String name, final int cardinality, final double relevance) {
4747 this.cardinality = cardinality;
@@ -57,7 +57,14 @@
5858 return relevance;
5959 }
6060
 61+ public void setRelevance(double relevance) {
 62+ this.relevance = relevance;
 63+ }
6164
 65+ public void setCardinality(int cardinality) {
 66+ this.cardinality = cardinality;
 67+ }
 68+
6269 public int getId() {
6370 return id;
6471 }
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/query/QueryConsole.java
@@ -17,16 +17,22 @@
1818 import de.brightbyte.util.PersistenceException;
1919 import de.brightbyte.wikiword.ConsoleApp;
2020 import de.brightbyte.wikiword.Corpus;
 21+import de.brightbyte.wikiword.disambig.Disambiguator;
 22+import de.brightbyte.wikiword.disambig.SlidingCoherenceDisambiguator;
 23+import de.brightbyte.wikiword.disambig.StoredFeatureFetcher;
 24+import de.brightbyte.wikiword.disambig.StoredMeaningFetcher;
2125 import de.brightbyte.wikiword.model.AbstractConceptOutput;
2226 import de.brightbyte.wikiword.model.ConceptFeatures;
2327 import de.brightbyte.wikiword.model.ConceptOutput;
2428 import de.brightbyte.wikiword.model.GlobalConcept;
2529 import de.brightbyte.wikiword.model.LocalConcept;
 30+import de.brightbyte.wikiword.model.LocalConceptReference;
2631 import de.brightbyte.wikiword.model.WikiWordConcept;
2732 import de.brightbyte.wikiword.model.WikiWordConceptReference;
2833 import de.brightbyte.wikiword.model.WikiWordReference;
2934 import de.brightbyte.wikiword.rdf.RdfOutput;
3035 import de.brightbyte.wikiword.store.DatabaseConceptStores;
 36+import de.brightbyte.wikiword.store.FeatureStore;
3137 import de.brightbyte.wikiword.store.GlobalConceptStore;
3238 import de.brightbyte.wikiword.store.LocalConceptStore;
3339 import de.brightbyte.wikiword.store.ProximityStore;
@@ -34,6 +40,8 @@
3541
3642 public class QueryConsole extends ConsoleApp<WikiWordConceptStore> {
3743
 44+ protected Disambiguator disambiguator;
 45+
3846 public QueryConsole() {
3947 super(true, true);
4048 }
@@ -104,6 +112,10 @@
105113 output.writeConcepts(meanings);
106114 }
107115
 116+ public void writeConceptReferences(DataSet<? extends WikiWordConceptReference<? extends WikiWordConcept>> meanings) throws PersistenceException {
 117+ output.writeConceptReferences(meanings);
 118+ }
 119+
108120 public void writeGlobalConcept(GlobalConcept concept) throws PersistenceException {
109121 output.writeGlobalConcept(concept);
110122 }
@@ -123,6 +135,17 @@
124136 }
125137 }
126138
 139+ public void writeInterpretation(Map<String, ? extends WikiWordConcept> interp) throws PersistenceException {
 140+ //XXX: hack!
 141+ try {
 142+ writer.write(interp.toString());
 143+ writer.write("\n");
 144+ writer.flush();
 145+ } catch (IOException e) {
 146+ throw new PersistenceException(e);
 147+ }
 148+ }
 149+
127150 }
128151
129152 protected class ConceptDumper extends AbstractConceptOutput {
@@ -344,6 +367,10 @@
345368 String id = params.get(1);
346369 showFeatureVector(Integer.parseInt(id), out);
347370 }
 371+ else if (cmd.equals("d") || cmd.equals("dis") || cmd.equals("disambig") || cmd.equals("disambiguate")) {
 372+ List<String> terms = params.subList(1,params.size());
 373+ showDisambiguation(terms, out);
 374+ }
348375 else if (cmd.equals("ls") || cmd.equals("list")) {
349376 listConcepts(out);
350377 }
@@ -369,6 +396,21 @@
370397 return conceptStore.getProximityStore();
371398 }
372399
 400+ protected FeatureStore<LocalConcept, Integer> getFeatureStore() throws PersistenceException {
 401+ return conceptStore.getFeatureStore();
 402+ }
 403+
 404+ protected Disambiguator getDisambiguator() throws PersistenceException {
 405+ if (disambiguator==null) disambiguator =
 406+ new SlidingCoherenceDisambiguator<Integer>(
 407+ new StoredMeaningFetcher(getLocalConceptStore()),
 408+ new StoredFeatureFetcher<LocalConcept, Integer>(getFeatureStore()),
 409+ true
 410+ );
 411+
 412+ return disambiguator;
 413+ }
 414+
373415 public void dumpStats() throws PersistenceException {
374416 Map<String, ? extends Number> m = ((WikiWordConceptStore)conceptStore).getStatisticsStore().getStatistics();
375417
@@ -387,8 +429,8 @@
388430 }
389431
390432 public void listMeaningsLocal(String term, ConsoleOutput out) throws PersistenceException {
391 - DataSet<LocalConcept> meanings = getLocalConceptStore().getMeanings(term);
392 - out.writeConcepts(meanings);
 433+ DataSet<LocalConceptReference> meanings = getLocalConceptStore().listMeanings(term);
 434+ out.writeConceptReferences(meanings);
393435 }
394436
395437 public void listMeaningsGlobal(String lang, String term, ConsoleOutput out) throws PersistenceException {
@@ -442,6 +484,11 @@
443485 out.writeFeatureVector(conceptFeatures.getFeatureVector());
444486 }
445487
 488+ public void showDisambiguation(List<String> terms, ConsoleOutput out) throws PersistenceException {
 489+ Disambiguator.Result r = getDisambiguator().disambiguate(terms);
 490+ out.writeInterpretation(r.getMeanings());
 491+ }
 492+
446493 public static void main(String[] argv) throws Exception {
447494 QueryConsole q = new QueryConsole();
448495 q.launch(argv);

Status & tagging log