Index: trunk/WikiWord/WikiWord/src/test/java/de/brightbyte/wikiword/disambig/PopularityDisambiguatorTest.java |
— | — | @@ -13,6 +13,7 @@ |
14 | 14 | import de.brightbyte.wikiword.model.PhraseOccurance; |
15 | 15 | import de.brightbyte.wikiword.model.PhraseOccuranceSet; |
16 | 16 | import de.brightbyte.wikiword.model.TermListNode; |
| 17 | +import de.brightbyte.wikiword.model.TermReference; |
17 | 18 | |
18 | 19 | public class PopularityDisambiguatorTest extends DisambiguatorTestBase { |
19 | 20 | |
— | — | @@ -21,7 +22,7 @@ |
22 | 23 | } |
23 | 24 | |
24 | 25 | public void testGetTermsForList() throws PersistenceException { |
25 | | - PopularityDisambiguator disambiguator = new PopularityDisambiguator(meaningFetcher, 10); |
| 26 | + PopularityDisambiguator<TermReference, LocalConcept> disambiguator = new PopularityDisambiguator<TermReference, LocalConcept>(meaningFetcher, 10); |
26 | 27 | |
27 | 28 | Term uk = new Term("UK"); |
28 | 29 | Term london = new Term("London"); |
— | — | @@ -47,7 +48,7 @@ |
48 | 49 | |
49 | 50 | //FIXME: Test case for getHorizon |
50 | 51 | |
51 | | - PopularityDisambiguator disambiguator = new PopularityDisambiguator(meaningFetcher, 10); |
| 52 | + PopularityDisambiguator<TermReference, LocalConcept> disambiguator = new PopularityDisambiguator<TermReference, LocalConcept>(meaningFetcher, 10); |
52 | 53 | |
53 | 54 | Collection<PhraseOccurance> terms = disambiguator.getTerms(set.getRootNode(), 0); |
54 | 55 | assertTrue("empty term set", sameElements( getBankAndMonumentTerms(0), terms) ); |
— | — | @@ -60,7 +61,7 @@ |
61 | 62 | } |
62 | 63 | |
63 | 64 | public void testGetMeaningsForList() throws PersistenceException { |
64 | | - PopularityDisambiguator disambiguator = new PopularityDisambiguator(meaningFetcher, 10); |
| 65 | + PopularityDisambiguator<TermReference, LocalConcept> disambiguator = new PopularityDisambiguator<TermReference, LocalConcept>(meaningFetcher, 10); |
65 | 66 | |
66 | 67 | Term uk = new Term("UK"); |
67 | 68 | Term london = new Term("London"); |
— | — | @@ -79,7 +80,7 @@ |
80 | 81 | } |
81 | 82 | |
82 | 83 | public void testGetMeaningsForNode() throws PersistenceException { |
83 | | - PopularityDisambiguator disambiguator = new PopularityDisambiguator(meaningFetcher, 10); |
| 84 | + PopularityDisambiguator<TermReference, LocalConcept> disambiguator = new PopularityDisambiguator<TermReference, LocalConcept>(meaningFetcher, 10); |
84 | 85 | |
85 | 86 | PhraseOccuranceSet set = getBankAndMonumentPhrases(); |
86 | 87 | Map<PhraseOccurance, List<? extends LocalConcept>> res = disambiguator.getMeanings(set.getRootNode()); |
— | — | @@ -94,7 +95,7 @@ |
95 | 96 | } |
96 | 97 | |
97 | 98 | public void testGetSequences() throws PersistenceException { |
98 | | - PopularityDisambiguator disambiguator = new PopularityDisambiguator(meaningFetcher, 10); |
| 99 | + PopularityDisambiguator<TermReference, LocalConcept> disambiguator = new PopularityDisambiguator<TermReference, LocalConcept>(meaningFetcher, 10); |
99 | 100 | PhraseOccuranceSet set = getBankAndMonumentPhrases(); |
100 | 101 | |
101 | 102 | Collection<List<PhraseOccurance>> res = disambiguator.getSequences(set.getRootNode(), 1); |
— | — | @@ -108,7 +109,7 @@ |
109 | 110 | } |
110 | 111 | |
111 | 112 | public void testDisambiguateTerms() throws PersistenceException { |
112 | | - PopularityDisambiguator disambiguator = new PopularityDisambiguator(meaningFetcher, 10); |
| 113 | + PopularityDisambiguator<TermReference, LocalConcept> disambiguator = new PopularityDisambiguator<TermReference, LocalConcept>(meaningFetcher, 10); |
113 | 114 | |
114 | 115 | Term uk = new Term("UK"); |
115 | 116 | Term london = new Term("London"); |
— | — | @@ -127,7 +128,7 @@ |
128 | 129 | public void testDisambiguateNode() throws PersistenceException { |
129 | 130 | PhraseOccuranceSet set = getBankAndMonumentPhrases(); |
130 | 131 | |
131 | | - PopularityDisambiguator disambiguator = new PopularityDisambiguator(meaningFetcher, 10); |
| 132 | + PopularityDisambiguator<TermReference, LocalConcept> disambiguator = new PopularityDisambiguator<TermReference, LocalConcept>(meaningFetcher, 10); |
132 | 133 | disambiguator.setTrace(traceOutput); |
133 | 134 | |
134 | 135 | Disambiguation<PhraseOccurance, LocalConcept> result = disambiguator.disambiguate(set.getRootNode(), null); |
Index: trunk/WikiWord/WikiWord/src/test/java/de/brightbyte/wikiword/disambig/DisambiguatorTestBase.java |
— | — | @@ -28,10 +28,11 @@ |
29 | 29 | import de.brightbyte.wikiword.model.PhraseOccurance; |
30 | 30 | import de.brightbyte.wikiword.model.PhraseOccuranceSet; |
31 | 31 | import de.brightbyte.wikiword.model.TermReference; |
| 32 | +import de.brightbyte.wikiword.model.WikiWordConcept; |
32 | 33 | |
33 | 34 | public class DisambiguatorTestBase extends TestCase { |
34 | 35 | |
35 | | - protected Map<String, List<? extends LocalConcept>> meanings = new HashMap<String, List<? extends LocalConcept>>(); |
| 36 | + protected Map<String, List<LocalConcept>> meanings = new HashMap<String, List<LocalConcept>>(); |
36 | 37 | protected Map<Integer, ConceptFeatures<LocalConcept, Integer>> features = new HashMap<Integer, ConceptFeatures<LocalConcept, Integer>>(); |
37 | 38 | protected Map<Integer, LocalConcept> conceptsById = new HashMap<Integer, LocalConcept>(); |
38 | 39 | protected Map<String, LocalConcept> conceptsByName = new HashMap<String, LocalConcept>(); |
— | — | @@ -48,7 +49,7 @@ |
49 | 50 | return new GroupingCursor<List<String>, String>(c, new ListAbstractor.Accessor<String>(groupBy)); |
50 | 51 | } |
51 | 52 | |
52 | | - protected static void readMeanings(Corpus corpus, InputStream in, Map<String, List<? extends LocalConcept>> meanings) throws IOException, PersistenceException { |
| 53 | + protected static void readMeanings(Corpus corpus, InputStream in, Map<String, List<LocalConcept>> meanings) throws IOException, PersistenceException { |
53 | 54 | DataCursor<List<List<String>>> cursor = openGroupedTableCursor(in, "UTF-8", 0, true); |
54 | 55 | |
55 | 56 | List<List<String>> group; |
— | — | @@ -112,19 +113,19 @@ |
113 | 114 | |
114 | 115 | protected MeaningFetcher<LocalConcept> meaningFetcher = new MeaningFetcher<LocalConcept>() { |
115 | 116 | |
116 | | - public <X extends TermReference> Map<X, List<? extends LocalConcept>> getMeanings( |
| 117 | + public <X extends TermReference> Map<X, List<LocalConcept>> getMeanings( |
117 | 118 | Collection<X> terms) throws PersistenceException { |
118 | | - Map<X, List<? extends LocalConcept>> m = new HashMap<X, List<? extends LocalConcept>>(); |
| 119 | + Map<X, List<LocalConcept>> m = new HashMap<X, List<LocalConcept>>(); |
119 | 120 | |
120 | 121 | for (X t: terms) { |
121 | | - List<? extends LocalConcept> n = getMeanings(t.getTerm()); |
| 122 | + List<LocalConcept> n = getMeanings(t.getTerm()); |
122 | 123 | if (n!=null) m.put(t, n); |
123 | 124 | } |
124 | 125 | |
125 | 126 | return m; |
126 | 127 | } |
127 | 128 | |
128 | | - public List<? extends LocalConcept> getMeanings(String term) |
| 129 | + public List<LocalConcept> getMeanings(String term) |
129 | 130 | throws PersistenceException { |
130 | 131 | return meanings.get(term); |
131 | 132 | } |
— | — | @@ -194,11 +195,11 @@ |
195 | 196 | return c; |
196 | 197 | } |
197 | 198 | |
198 | | - protected <X extends TermReference>Map<X, List<? extends LocalConcept>> getMeanings(Collection<List<X>> sequences) throws PersistenceException { |
199 | | - Map<X, List<? extends LocalConcept>> m = new HashMap<X, List<? extends LocalConcept>>(); |
| 199 | + protected <X extends TermReference>Map<X, List<LocalConcept>> getMeanings(Collection<List<X>> sequences) throws PersistenceException { |
| 200 | + Map<X, List<LocalConcept>> m = new HashMap<X, List<LocalConcept>>(); |
200 | 201 | |
201 | 202 | for (List<X> seq: sequences) { |
202 | | - Map<X, List<? extends LocalConcept>> meanings = meaningFetcher.getMeanings(seq); |
| 203 | + Map<X, List<LocalConcept>> meanings = meaningFetcher.getMeanings(seq); |
203 | 204 | m.putAll(meanings); |
204 | 205 | } |
205 | 206 | |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/StoredMeaningFetcher.java |
— | — | @@ -8,40 +8,40 @@ |
9 | 9 | import de.brightbyte.data.cursor.DataSet; |
10 | 10 | import de.brightbyte.io.Output; |
11 | 11 | import de.brightbyte.util.PersistenceException; |
12 | | -import de.brightbyte.wikiword.model.LocalConcept; |
| 12 | +import de.brightbyte.wikiword.model.WikiWordConcept; |
13 | 13 | import de.brightbyte.wikiword.model.TermReference; |
14 | | -import de.brightbyte.wikiword.store.LocalConceptStore; |
| 14 | +import de.brightbyte.wikiword.store.WikiWordConceptStore; |
15 | 15 | import de.brightbyte.wikiword.store.WikiWordConceptStore.ConceptQuerySpec; |
16 | 16 | |
17 | | -public class StoredMeaningFetcher implements MeaningFetcher<LocalConcept> { |
18 | | - protected LocalConceptStore store; |
| 17 | +public class StoredMeaningFetcher implements MeaningFetcher<WikiWordConcept> { |
| 18 | + protected WikiWordConceptStore store; |
19 | 19 | protected ConceptQuerySpec spec; |
20 | 20 | protected Output trace; |
21 | 21 | |
22 | | - public StoredMeaningFetcher(LocalConceptStore store) { |
| 22 | + public StoredMeaningFetcher(WikiWordConceptStore store) { |
23 | 23 | this(store, null); |
24 | 24 | } |
25 | 25 | |
26 | | - public StoredMeaningFetcher(LocalConceptStore store, ConceptQuerySpec type) { |
| 26 | + public StoredMeaningFetcher(WikiWordConceptStore store, ConceptQuerySpec type) { |
27 | 27 | if (store==null) throw new NullPointerException(); |
28 | 28 | |
29 | 29 | this.store = store; |
30 | 30 | this.spec = type; |
31 | 31 | } |
32 | 32 | |
33 | | - public List<LocalConcept> getMeanings(String term) throws PersistenceException { |
34 | | - DataSet<LocalConcept> m = store.getMeanings(term, spec); //FIXME: filter/cut-off rules, sort order! //XXX: relevance value? |
35 | | - List<LocalConcept> meanigns = m.load(); |
| 33 | + public List<WikiWordConcept> getMeanings(String term) throws PersistenceException { |
| 34 | + DataSet<WikiWordConcept> m = store.getMeanings(term, spec); //FIXME: filter/cut-off rules, sort order! //XXX: relevance value? |
| 35 | + List<WikiWordConcept> meanigns = m.load(); |
36 | 36 | |
37 | 37 | trace("fetched "+meanigns.size()+" meanings for \""+term+"\""); |
38 | 38 | return meanigns; |
39 | 39 | } |
40 | 40 | |
41 | | - public <X extends TermReference> Map<X, List<? extends LocalConcept>> getMeanings(Collection<X> terms) throws PersistenceException { |
42 | | - Map<X, List<? extends LocalConcept>> meanings = new HashMap<X, List<? extends LocalConcept>>(); |
| 41 | + public <X extends TermReference> Map<X, List<WikiWordConcept>> getMeanings(Collection<X> terms) throws PersistenceException { |
| 42 | + Map<X, List<WikiWordConcept>> meanings = new HashMap<X, List<WikiWordConcept>>(); |
43 | 43 | |
44 | 44 | for (X t: terms) { |
45 | | - List<LocalConcept> m = getMeanings(t.getTerm()); |
| 45 | + List<WikiWordConcept> m = getMeanings(t.getTerm()); |
46 | 46 | if (m!=null && m.size()>0) meanings.put(t, m); |
47 | 47 | } |
48 | 48 | |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java |
— | — | @@ -22,14 +22,13 @@ |
23 | 23 | import de.brightbyte.util.PersistenceException; |
24 | 24 | import de.brightbyte.util.SanityException; |
25 | 25 | import de.brightbyte.wikiword.model.ConceptFeatures; |
26 | | -import de.brightbyte.wikiword.model.LocalConcept; |
27 | 26 | import de.brightbyte.wikiword.model.PhraseNode; |
28 | 27 | import de.brightbyte.wikiword.model.TermReference; |
29 | 28 | import de.brightbyte.wikiword.model.WikiWordConcept; |
30 | 29 | |
31 | | -public class CoherenceDisambiguator extends AbstractDisambiguator<TermReference, LocalConcept> { |
| 30 | +public class CoherenceDisambiguator<T extends TermReference, C extends WikiWordConcept> extends AbstractDisambiguator<T, C> { |
32 | 31 | |
33 | | - public static class CoherenceDisambiguation<T extends TermReference, C extends LocalConcept> extends Disambiguator.Disambiguation<T, C> { |
| 32 | + public static class CoherenceDisambiguation<T extends TermReference, C extends WikiWordConcept> extends Disambiguator.Disambiguation<T, C> { |
34 | 33 | protected LabeledVector<Integer> centroid; |
35 | 34 | protected Map<Integer, ConceptFeatures<C, Integer>> features; |
36 | 35 | |
— | — | @@ -66,12 +65,12 @@ |
67 | 66 | //protected double popularityBias = 0.2; //FIXME: magic number. should "somehow" match popularityNormalizer and similarityNormalizer |
68 | 67 | //protected double weightBias = 0.5; //FIXME: magic number. should "somehow" match popularityNormalizer |
69 | 68 | |
70 | | - protected FeatureFetcher<LocalConcept, Integer> featureFetcher; |
| 69 | + protected FeatureFetcher<C, Integer> featureFetcher; |
71 | 70 | |
72 | 71 | protected Similarity<LabeledVector<Integer>> similarityMeasure; |
73 | | - protected Measure<WikiWordConcept> popularityMeasure; |
74 | | - protected PopularityDisambiguator popularityDisambiguator; |
75 | | - protected Comparator<LocalConcept> popularityComparator; |
| 72 | + protected Measure<? super C> popularityMeasure; |
| 73 | + protected PopularityDisambiguator<T, C> popularityDisambiguator; |
| 74 | + protected Comparator<? super C> popularityComparator; |
76 | 75 | |
77 | 76 | private Functor.Double popularityNormalizer = new Functor.Double() { //NOTE: must map [0:inf] to [0:1] and grow monotonously |
78 | 77 | public double apply(double pop) { |
— | — | @@ -90,18 +89,18 @@ |
91 | 90 | protected Functor2.Double weightCombiner = ProductCombiner.instance; |
92 | 91 | protected Functor.Double weightBooster = SquareBooster.instance; |
93 | 92 | |
94 | | - public CoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, Integer> featureFetcher, int cacheCapacity) { |
| 93 | + public CoherenceDisambiguator(MeaningFetcher<? extends C> meaningFetcher, FeatureFetcher<C, Integer> featureFetcher, int cacheCapacity) { |
95 | 94 | this(meaningFetcher, featureFetcher, cacheCapacity, null, null); |
96 | 95 | } |
97 | 96 | |
98 | | - public CoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, Integer> featureFetcher, int cacheCapacity, Measure<WikiWordConcept> popularityMeasure, Similarity<LabeledVector<Integer>> sim) { |
| 97 | + public CoherenceDisambiguator(MeaningFetcher<? extends C> meaningFetcher, FeatureFetcher<C, Integer> featureFetcher, int cacheCapacity, Measure<? super C> popularityMeasure, Similarity<LabeledVector<Integer>> sim) { |
99 | 98 | super(meaningFetcher, cacheCapacity); |
100 | 99 | |
101 | 100 | if (popularityMeasure==null) popularityMeasure = WikiWordConcept.theCardinality; |
102 | 101 | if (sim==null) sim = featureFetcher.getFeaturesAreNormalized() ? ScalarVectorSimilarity.<Integer>getInstance() : CosineVectorSimilarity.<Integer>getInstance(); //if pre-normalized, use scalar to calc cosin |
103 | 102 | if (featureFetcher==null) throw new NullPointerException(); |
104 | 103 | |
105 | | - if (cacheCapacity>0) featureFetcher = new CachingFeatureFetcher<LocalConcept, Integer>(featureFetcher, cacheCapacity); |
| 104 | + if (cacheCapacity>0) featureFetcher = new CachingFeatureFetcher<C, Integer>(featureFetcher, cacheCapacity); |
106 | 105 | |
107 | 106 | this.featureFetcher = featureFetcher; |
108 | 107 | this.popularityDisambiguator = new PopularityDisambiguator(getMeaningFetcher(), 0, popularityMeasure); |
— | — | @@ -110,7 +109,7 @@ |
111 | 110 | this.setSimilarityMeasure(sim); |
112 | 111 | } |
113 | 112 | |
114 | | - public FeatureFetcher<LocalConcept, Integer> getFeatureFetcher() { |
| 113 | + public FeatureFetcher<C, Integer> getFeatureFetcher() { |
115 | 114 | return featureFetcher; |
116 | 115 | } |
117 | 116 | |
— | — | @@ -123,7 +122,7 @@ |
124 | 123 | this.popularityNormalizer = popularityFactor; |
125 | 124 | } |
126 | 125 | |
127 | | - public Measure<WikiWordConcept> getPopularityMeasure() { |
| 126 | + public Measure<? super C> getPopularityMeasure() { |
128 | 127 | return popularityMeasure; |
129 | 128 | } |
130 | 129 | |
— | — | @@ -140,10 +139,10 @@ |
141 | 140 | return weightCombiner; |
142 | 141 | } |
143 | 142 | |
144 | | - public void setPopularityMeasure(Measure<WikiWordConcept> popularityMeasure) { |
| 143 | + public void setPopularityMeasure(Measure<? super C> popularityMeasure) { |
145 | 144 | this.popularityMeasure = popularityMeasure; |
146 | 145 | this.popularityDisambiguator.setPopularityMeasure(popularityMeasure); |
147 | | - this.popularityComparator = new Measure.Comparator<LocalConcept>(popularityMeasure, true); |
| 146 | + this.popularityComparator = new Measure.Comparator<C>(popularityMeasure, true); |
148 | 147 | } |
149 | 148 | |
150 | 149 | public void setWeightCombiner(Functor2.Double weightCombiner) { |
— | — | @@ -167,7 +166,7 @@ |
168 | 167 | this.similarityNormalizer = similarityFactor; |
169 | 168 | } |
170 | 169 | |
171 | | - public void setFeatureFetcher(FeatureFetcher<LocalConcept, Integer> featureFetcher) { |
| 170 | + public void setFeatureFetcher(FeatureFetcher<C, Integer> featureFetcher) { |
172 | 171 | this.featureFetcher = featureFetcher; |
173 | 172 | } |
174 | 173 | |
— | — | @@ -205,10 +204,10 @@ |
206 | 205 | this.maxMeanings = maxMeanings; |
207 | 206 | } |
208 | 207 | |
209 | | - protected FeatureFetcher<LocalConcept, Integer> getFeatureCache(Map<? extends TermReference, List<? extends LocalConcept>> meanings, Collection<? extends LocalConcept> context) throws PersistenceException { |
| 208 | + protected FeatureFetcher<C, Integer> getFeatureCache(Map<? extends T, List<? extends C>> meanings, Collection<? extends C> context) throws PersistenceException { |
210 | 209 | //NOTE: pre-fetch all features in one go |
211 | | - List<LocalConcept> concepts = new ArrayList<LocalConcept>(meanings.size()*10); |
212 | | - for (List<? extends LocalConcept> m: meanings.values()) { |
| 210 | + List<C> concepts = new ArrayList<C>(meanings.size()*10); |
| 211 | + for (List<? extends C> m: meanings.values()) { |
213 | 212 | if (m!=null) concepts.addAll(m); |
214 | 213 | } |
215 | 214 | |
— | — | @@ -221,17 +220,17 @@ |
222 | 221 | /* (non-Javadoc) |
223 | 222 | * @see de.brightbyte.wikiword.disambig.Disambiguator#disambiguate(java.util.List) |
224 | 223 | */ |
225 | | - public <X extends TermReference>CoherenceDisambiguation<X, LocalConcept> disambiguate(PhraseNode<X> root, Map<X, List<? extends LocalConcept>> meanings, Collection<? extends LocalConcept> context) throws PersistenceException { |
226 | | - if (meanings.isEmpty()) return new CoherenceDisambiguation<X, LocalConcept>(Collections.<X, LocalConcept>emptyMap(), Collections.<X>emptyList(), Collections.<Integer, ConceptFeatures<LocalConcept, Integer>>emptyMap(), ConceptFeatures.newIntFeaturVector(1), 0.0, "no terms or meanings"); |
| 224 | + public <X extends T>CoherenceDisambiguation<X, C> disambiguate(PhraseNode<X> root, Map<X, List<? extends C>> meanings, Collection<? extends C> context) throws PersistenceException { |
| 225 | + if (meanings.isEmpty()) return new CoherenceDisambiguation<X, C>(Collections.<X, C>emptyMap(), Collections.<X>emptyList(), Collections.<Integer, ConceptFeatures<C, Integer>>emptyMap(), ConceptFeatures.newIntFeaturVector(1), 0.0, "no terms or meanings"); |
227 | 226 | |
228 | | - LabeledMatrix<LocalConcept, LocalConcept> similarities = new MapLabeledMatrix<LocalConcept, LocalConcept>(true); |
229 | | - FeatureFetcher<LocalConcept, Integer> features = getFeatureCache(meanings, context); |
| 227 | + LabeledMatrix<C, C> similarities = new MapLabeledMatrix<C, C>(true); |
| 228 | + FeatureFetcher<C, Integer> features = getFeatureCache(meanings, context); |
230 | 229 | |
231 | 230 | int sz = meanings.size(); |
232 | 231 | if (context!=null) sz += context.size(); |
233 | 232 | |
234 | 233 | if (sz<2) { |
235 | | - Disambiguation<X, LocalConcept> r = popularityDisambiguator.disambiguate(root, meanings, context); |
| 234 | + Disambiguation<X, C> r = popularityDisambiguator.disambiguate(root, meanings, context); |
236 | 235 | return getScore(r.getInterpretation(), context, similarities, features); |
237 | 236 | } |
238 | 237 | |
— | — | @@ -240,7 +239,7 @@ |
241 | 240 | sz = meanings.size(); |
242 | 241 | if (context!=null) sz += context.size(); |
243 | 242 | if (sz <2) { |
244 | | - Disambiguation<X, LocalConcept> r = popularityDisambiguator.disambiguate(root, meanings, context); |
| 243 | + Disambiguation<X, C> r = popularityDisambiguator.disambiguate(root, meanings, context); |
245 | 244 | return getScore(r.getInterpretation(), context, similarities, features); |
246 | 245 | } |
247 | 246 | |
— | — | @@ -248,34 +247,34 @@ |
249 | 248 | return disambiguate(sequences, root, meanings, context); |
250 | 249 | } |
251 | 250 | |
252 | | - public <X extends TermReference>CoherenceDisambiguation<X, LocalConcept> disambiguate(Collection<List<X>> sequences, PhraseNode<X> root, Map<X, List<? extends LocalConcept>> meanings, Collection<? extends LocalConcept> context) throws PersistenceException { |
253 | | - LabeledMatrix<LocalConcept, LocalConcept> similarities = new MapLabeledMatrix<LocalConcept, LocalConcept>(true); |
254 | | - FeatureFetcher<LocalConcept, Integer> features = getFeatureCache(meanings, context); |
| 251 | + public <X extends T>CoherenceDisambiguation<X, C> disambiguate(Collection<List<X>> sequences, PhraseNode<X> root, Map<X, List<? extends C>> meanings, Collection<? extends C> context) throws PersistenceException { |
| 252 | + LabeledMatrix<C, C> similarities = new MapLabeledMatrix<C, C>(true); |
| 253 | + FeatureFetcher<C, Integer> features = getFeatureCache(meanings, context); |
255 | 254 | |
256 | 255 | return disambiguate(sequences, root, meanings, context, similarities, features); |
257 | 256 | } |
258 | 257 | |
259 | | - private <X extends TermReference>CoherenceDisambiguation<X, LocalConcept> disambiguate(Collection<List<X>> sequences, PhraseNode<X> root, Map<X, List<? extends LocalConcept>> meanings, Collection<? extends LocalConcept> context, LabeledMatrix<LocalConcept, LocalConcept> similarities, FeatureFetcher<LocalConcept, Integer> features) throws PersistenceException { |
| 258 | + private <X extends T>CoherenceDisambiguation<X, C> disambiguate(Collection<List<X>> sequences, PhraseNode<X> root, Map<X, List<? extends C>> meanings, Collection<? extends C> context, LabeledMatrix<C, C> similarities, FeatureFetcher<C, Integer> features) throws PersistenceException { |
260 | 259 | |
261 | 260 | //CAVEAT: because the map disambig can contain only one meaning per term, the same term can not occur with two meanings within the same term sequence. |
262 | 261 | |
263 | | - Collection<Disambiguator.Interpretation<X, LocalConcept>> interpretations = getInterpretations(sequences, meanings); |
| 262 | + Collection<Disambiguator.Interpretation<X, C>> interpretations = getInterpretations(sequences, meanings); |
264 | 263 | |
265 | 264 | return getBestInterpretation(root, meanings, context, interpretations, similarities, features); |
266 | 265 | } |
267 | 266 | |
268 | | - protected void pruneMeanings(Map<? extends TermReference, List<? extends LocalConcept>> meanings) { |
| 267 | + protected void pruneMeanings(Map<? extends T, List<? extends C>> meanings) { |
269 | 268 | if (minPopularity<=1) return; //nothing to do |
270 | 269 | |
271 | 270 | Iterator<?> eit = meanings.entrySet().iterator(); |
272 | 271 | while (eit.hasNext()) { |
273 | | - Entry<TermReference, List<? extends LocalConcept>> e = (Entry<TermReference, List<? extends LocalConcept>>) eit.next(); //XXX: ugly cast. got confused about generics. ugh. |
274 | | - List<? extends LocalConcept> m = e.getValue(); |
| 272 | + Entry<T, List<? extends C>> e = (Entry<T, List<? extends C>>) eit.next(); //XXX: ugly cast. got confused about generics. ugh. |
| 273 | + List<? extends C> m = e.getValue(); |
275 | 274 | if (m==null) continue; |
276 | 275 | |
277 | | - Iterator<? extends LocalConcept> cit = m.iterator(); |
| 276 | + Iterator<? extends C> cit = m.iterator(); |
278 | 277 | while (cit.hasNext()) { |
279 | | - LocalConcept c = cit.next(); |
| 278 | + C c = cit.next(); |
280 | 279 | double p = popularityMeasure.measure(c); |
281 | 280 | |
282 | 281 | if (p<minPopularity) { |
— | — | @@ -297,17 +296,17 @@ |
298 | 297 | } |
299 | 298 | } |
300 | 299 | |
301 | | - protected <X extends TermReference>CoherenceDisambiguation<X, LocalConcept> getBestInterpretation(PhraseNode<X> root, Map<X, List<? extends LocalConcept>> meanings, |
302 | | - Collection<? extends LocalConcept> context, Collection<Disambiguator.Interpretation<X, LocalConcept>> interpretations, |
303 | | - LabeledMatrix<LocalConcept, LocalConcept> similarities, FeatureFetcher<LocalConcept, Integer> features) throws PersistenceException { |
| 300 | + protected <X extends T>CoherenceDisambiguation<X, C> getBestInterpretation(PhraseNode<X> root, Map<X, List<? extends C>> meanings, |
| 301 | + Collection<? extends C> context, Collection<Disambiguator.Interpretation<X, C>> interpretations, |
| 302 | + LabeledMatrix<C, C> similarities, FeatureFetcher<C, Integer> features) throws PersistenceException { |
304 | 303 | |
305 | | - CoherenceDisambiguation<X, LocalConcept> best = null; |
| 304 | + CoherenceDisambiguation<X, C> best = null; |
306 | 305 | double bestScore = 0; |
307 | 306 | |
308 | 307 | if ( interpretations.size() == 0 ) return null; |
309 | 308 | else if ( interpretations.size() == 1 ) { |
310 | | - Disambiguator.Interpretation<X, LocalConcept> interp = interpretations.iterator().next(); |
311 | | - CoherenceDisambiguation<X, LocalConcept> r = getScore(interp, context, similarities, features); |
| 309 | + Disambiguator.Interpretation<X, C> interp = interpretations.iterator().next(); |
| 310 | + CoherenceDisambiguation<X, C> r = getScore(interp, context, similarities, features); |
312 | 311 | |
313 | 312 | trace("only one interpretation available: "+r); |
314 | 313 | return r; |
— | — | @@ -315,8 +314,8 @@ |
316 | 315 | |
317 | 316 | trace("finding best of "+interpretations.size()+" interpretations."); |
318 | 317 | |
319 | | - for (Disambiguator.Interpretation<X, LocalConcept> interp: interpretations) { |
320 | | - CoherenceDisambiguation<X, LocalConcept> r = getScore(interp, context, similarities, features); |
| 318 | + for (Disambiguator.Interpretation<X, C> interp: interpretations) { |
| 319 | + CoherenceDisambiguation<X, C> r = getScore(interp, context, similarities, features); |
321 | 320 | |
322 | 321 | if ( best == null || r.getScore() > bestScore) { |
323 | 322 | best = r; |
— | — | @@ -329,8 +328,8 @@ |
330 | 329 | if (best==null || bestScore<minScore) { |
331 | 330 | trace("best score is not good enough ("+bestScore+"<"+minScore+"), using popularity disambiguator."); |
332 | 331 | |
333 | | - Disambiguation<X, LocalConcept> p = popularityDisambiguator.disambiguate(root, meanings, context); |
334 | | - CoherenceDisambiguation<X, LocalConcept> r = getScore(p.getInterpretation(), context, similarities, features); |
| 332 | + Disambiguation<X, C> p = popularityDisambiguator.disambiguate(root, meanings, context); |
| 333 | + CoherenceDisambiguation<X, C> r = getScore(p.getInterpretation(), context, similarities, features); |
335 | 334 | |
336 | 335 | trace("best of "+interpretations.size()+" interpretations by popularity: "+r); |
337 | 336 | return r; |
— | — | @@ -342,40 +341,40 @@ |
343 | 342 | return best; |
344 | 343 | } |
345 | 344 | |
346 | | - public <X extends TermReference>Collection<Disambiguator.Interpretation<X, LocalConcept>> getInterpretations(Collection<List<X>> sequences, Map<X, List<? extends LocalConcept>> meanings) { |
347 | | - List<Disambiguator.Interpretation<X, LocalConcept>> interpretations = new ArrayList<Disambiguator.Interpretation<X, LocalConcept>>(); |
| 345 | + public <X extends T>Collection<Disambiguator.Interpretation<X, C>> getInterpretations(Collection<List<X>> sequences, Map<X, List<? extends C>> meanings) { |
| 346 | + List<Disambiguator.Interpretation<X, C>> interpretations = new ArrayList<Disambiguator.Interpretation<X, C>>(); |
348 | 347 | for (List<X> sq: sequences) { |
349 | 348 | if (sq.isEmpty()) continue; |
350 | | - Collection<Disambiguator.Interpretation<X, LocalConcept>> sqint = getSequenceInterpretations(sq, meanings); |
| 349 | + Collection<Disambiguator.Interpretation<X, C>> sqint = getSequenceInterpretations(sq, meanings); |
351 | 350 | interpretations.addAll(sqint); |
352 | 351 | } |
353 | 352 | |
354 | 353 | return interpretations; |
355 | 354 | } |
356 | 355 | |
357 | | - public <X extends TermReference>Collection<Disambiguator.Interpretation<X, LocalConcept>> getSequenceInterpretations(List<X> sequence, Map<X, List<? extends LocalConcept>> meanings) { |
| 356 | + public <X extends T>Collection<Disambiguator.Interpretation<X, C>> getSequenceInterpretations(List<X> sequence, Map<X, List<? extends C>> meanings) { |
358 | 357 | if (sequence.size()==0) { |
359 | | - return Collections.singletonList(new Disambiguator.Interpretation<X, LocalConcept>(Collections.<X, LocalConcept>emptyMap(), sequence)); |
| 358 | + return Collections.singletonList(new Disambiguator.Interpretation<X, C>(Collections.<X, C>emptyMap(), sequence)); |
360 | 359 | } |
361 | 360 | |
362 | 361 | X t = sequence.get(0); |
363 | | - List<? extends LocalConcept> m = meanings.get(t); |
| 362 | + List<? extends C> m = meanings.get(t); |
364 | 363 | |
365 | | - Collection<Disambiguator.Interpretation<X, LocalConcept>> base = getSequenceInterpretations(sequence.subList(1, sequence.size()), meanings); |
| 364 | + Collection<Disambiguator.Interpretation<X, C>> base = getSequenceInterpretations(sequence.subList(1, sequence.size()), meanings); |
366 | 365 | |
367 | | - List<Disambiguator.Interpretation<X, LocalConcept>> interpretations = new ArrayList<Disambiguator.Interpretation<X, LocalConcept>>(); |
| 366 | + List<Disambiguator.Interpretation<X, C>> interpretations = new ArrayList<Disambiguator.Interpretation<X, C>>(); |
368 | 367 | |
369 | | - for (Disambiguator.Interpretation<X, LocalConcept> be: base) { |
| 368 | + for (Disambiguator.Interpretation<X, C> be: base) { |
370 | 369 | if (m==null || m.isEmpty()) { |
371 | | - Disambiguator.Interpretation<X, LocalConcept>interp = new Disambiguator.Interpretation<X, LocalConcept>(be.getMeanings(), sequence); |
| 370 | + Disambiguator.Interpretation<X, C>interp = new Disambiguator.Interpretation<X, C>(be.getMeanings(), sequence); |
372 | 371 | interpretations.add(interp); |
373 | 372 | } else { |
374 | | - for (LocalConcept c: m) { |
375 | | - Map<X, LocalConcept> e = new HashMap<X, LocalConcept>(); |
| 373 | + for (C c: m) { |
| 374 | + Map<X, C> e = new HashMap<X, C>(); |
376 | 375 | e.putAll(be.getMeanings()); |
377 | 376 | e.put(t, c); |
378 | 377 | |
379 | | - Disambiguator.Interpretation<X, LocalConcept>interp = new Disambiguator.Interpretation<X, LocalConcept>(e, sequence); |
| 378 | + Disambiguator.Interpretation<X, C>interp = new Disambiguator.Interpretation<X, C>(e, sequence); |
380 | 379 | interpretations.add(interp); |
381 | 380 | } |
382 | 381 | } |
— | — | @@ -385,30 +384,30 @@ |
386 | 385 | return interpretations; |
387 | 386 | } |
388 | 387 | |
389 | | - protected <X extends TermReference>CoherenceDisambiguation<X, LocalConcept> getScore(Disambiguator.Interpretation<X, LocalConcept> interp, Collection<? extends LocalConcept> context, LabeledMatrix<LocalConcept, LocalConcept> similarities, FeatureFetcher<LocalConcept, Integer> features) throws PersistenceException { |
390 | | - Map<? extends TermReference, LocalConcept> concepts; |
| 388 | + protected <X extends T>CoherenceDisambiguation<X, C> getScore(Disambiguator.Interpretation<X, C> interp, Collection<? extends C> context, LabeledMatrix<C, C> similarities, FeatureFetcher<C, Integer> features) throws PersistenceException { |
| 389 | + Map<TermReference, C> concepts; |
391 | 390 | |
392 | 391 | if (context!=null || interp.getMeanings().size()!=interp.getSequence().size()) { |
393 | | - concepts = new HashMap<TermReference, LocalConcept>(); |
| 392 | + concepts = new HashMap<TermReference, C>(); |
394 | 393 | |
395 | | - for (X t: interp.getSequence()) { |
396 | | - LocalConcept m = interp.getMeanings().get(t); |
397 | | - ((HashMap<TermReference, LocalConcept>)concepts).put(t, m); |
| 394 | + for (TermReference t: interp.getSequence()) { |
| 395 | + C m = interp.getMeanings().get(t); |
| 396 | + ((HashMap<TermReference, C>)concepts).put(t, m); |
398 | 397 | } |
399 | 398 | |
400 | 399 | if (context != null) { |
401 | | - for (LocalConcept con: context) { |
402 | | - if (con!=null)((HashMap<TermReference, LocalConcept>)concepts).put(new Term(con.getName(), 1), con); |
| 400 | + for (C con: context) { |
| 401 | + if (con!=null)((HashMap<TermReference, C>)concepts).put(new Term(con.getName(), 1), con); |
403 | 402 | } |
404 | 403 | } |
405 | 404 | } else { |
406 | | - concepts = interp.getMeanings(); |
| 405 | + concepts = (HashMap<TermReference, C>)interp.getMeanings(); |
407 | 406 | } |
408 | 407 | |
409 | 408 | int c = concepts.size(); |
410 | 409 | |
411 | 410 | if (c == 0) { |
412 | | - CoherenceDisambiguation<X, LocalConcept> r = new CoherenceDisambiguation<X, LocalConcept>(interp.getMeanings(), interp.getSequence(), Collections.<Integer, ConceptFeatures<LocalConcept, Integer>>emptyMap(), ConceptFeatures.newIntFeaturVector(1), 0, "empty"); |
| 411 | + CoherenceDisambiguation<X, C> r = new CoherenceDisambiguation<X, C>(interp.getMeanings(), interp.getSequence(), Collections.<Integer, ConceptFeatures<C, Integer>>emptyMap(), ConceptFeatures.newIntFeaturVector(1), 0, "empty"); |
413 | 412 | return r; |
414 | 413 | } |
415 | 414 | |
— | — | @@ -420,23 +419,23 @@ |
421 | 420 | int simCount = 0; //simCount should be <= n in the end. count and check. |
422 | 421 | |
423 | 422 | LabeledVector<Integer> sum = ConceptFeatures.newIntFeaturVector( concepts.size() * 200 ); //XXX: magic number |
424 | | - Map<Integer, ConceptFeatures<LocalConcept, Integer>> disambigFeatures = new HashMap<Integer, ConceptFeatures<LocalConcept, Integer>>(); |
| 423 | + Map<Integer, ConceptFeatures<C, Integer>> disambigFeatures = new HashMap<Integer, ConceptFeatures<C, Integer>>(); |
425 | 424 | double sim = 0, pop = 0, weight = 0; |
426 | 425 | int i=0, j=0; |
427 | | - for (Map.Entry<? extends TermReference, LocalConcept> ea: concepts.entrySet()) { |
428 | | - LocalConcept a = ea.getValue(); |
| 426 | + for (Map.Entry<TermReference, C> ea: concepts.entrySet()) { |
| 427 | + C a = ea.getValue(); |
429 | 428 | TermReference term = ea.getKey(); |
430 | 429 | |
431 | 430 | i++; |
432 | 431 | if (a==null) continue; |
433 | 432 | |
434 | | - ConceptFeatures<LocalConcept, Integer> fa = features.getFeatures(a); |
| 433 | + ConceptFeatures<C, Integer> fa = features.getFeatures(a); |
435 | 434 | disambigFeatures.put(a.getId(), fa); |
436 | 435 | sum.add(fa.getFeatureVector()); |
437 | 436 | |
438 | 437 | j=0; |
439 | | - for (Map.Entry<? extends TermReference, LocalConcept> eb: concepts.entrySet()) { |
440 | | - LocalConcept b = eb.getValue(); |
| 438 | + for (Map.Entry<TermReference, C> eb: concepts.entrySet()) { |
| 439 | + C b = eb.getValue(); |
441 | 440 | |
442 | 441 | j++; |
443 | 442 | if (i==j) break; |
— | — | @@ -452,7 +451,7 @@ |
453 | 452 | d = similarities.get(a, b); |
454 | 453 | } |
455 | 454 | else { |
456 | | - ConceptFeatures<LocalConcept, Integer> fb = features.getFeatures(b); |
| 455 | + ConceptFeatures<C, Integer> fb = features.getFeatures(b); |
457 | 456 | |
458 | 457 | if (fa==null || fb==null) d = 0; |
459 | 458 | else { |
— | — | @@ -522,7 +521,7 @@ |
523 | 522 | double score = scoreCombiner.apply(simf, popf); |
524 | 523 | score = doubleSanity(score, "score", "check scoreCombiner!", 0, 0.1, 1, 0.1); |
525 | 524 | |
526 | | - CoherenceDisambiguation<X, LocalConcept> r = new CoherenceDisambiguation<X, LocalConcept>(interp.getMeanings(), interp.getSequence(), disambigFeatures, centroid, score, "simf="+simf+", popf="+popf+", sim="+sim+", pop="+pop+", weight="+weight); |
| 525 | + CoherenceDisambiguation<X, C> r = new CoherenceDisambiguation<X, C>(interp.getMeanings(), interp.getSequence(), disambigFeatures, centroid, score, "simf="+simf+", popf="+popf+", sim="+sim+", pop="+pop+", weight="+weight); |
527 | 526 | return r; |
528 | 527 | } |
529 | 528 | |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CachingFeatureFetcher.java |
— | — | @@ -31,7 +31,7 @@ |
32 | 32 | ConceptFeatures<C, K> f = cache.get(c.getId()); |
33 | 33 | if (f!=null) return f; |
34 | 34 | |
35 | | - f = fetcher.getFeatures(c); |
| 35 | + f = (ConceptFeatures<C, K>)fetcher.getFeatures(c); |
36 | 36 | cache.put(c.getId(), f); |
37 | 37 | |
38 | 38 | return f; |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/AbstractDisambiguator.java |
— | — | @@ -15,20 +15,20 @@ |
16 | 16 | |
17 | 17 | public abstract class AbstractDisambiguator<T extends TermReference, C extends WikiWordConcept> implements Disambiguator<T, C> { |
18 | 18 | |
19 | | - private MeaningFetcher<C> meaningFetcher; |
| 19 | + private MeaningFetcher<? extends C> meaningFetcher; |
20 | 20 | |
21 | 21 | private Output trace; |
22 | 22 | |
23 | 23 | private Map<String, C> meaningOverrides; |
24 | 24 | |
25 | | - public AbstractDisambiguator(MeaningFetcher<C> meaningFetcher, int cacheCapacity) { |
| 25 | + public AbstractDisambiguator(MeaningFetcher<? extends C> meaningFetcher, int cacheCapacity) { |
26 | 26 | if (meaningFetcher==null) throw new NullPointerException(); |
27 | 27 | |
28 | 28 | if (cacheCapacity>0) meaningFetcher = new CachingMeaningFetcher<C>(meaningFetcher, cacheCapacity); |
29 | 29 | this.meaningFetcher = meaningFetcher; |
30 | 30 | } |
31 | 31 | |
32 | | - public MeaningFetcher<C> getMeaningFetcher() { |
| 32 | + public MeaningFetcher<? extends C> getMeaningFetcher() { |
33 | 33 | return meaningFetcher; |
34 | 34 | } |
35 | 35 | |
— | — | @@ -100,7 +100,7 @@ |
101 | 101 | } |
102 | 102 | } |
103 | 103 | |
104 | | - Map<X, List<? extends C>> meanings = meaningFetcher.getMeanings(todo); |
| 104 | + Map<X, List<? extends C>> meanings = (Map<X, List<? extends C>>)(Object)meaningFetcher.getMeanings(todo); //FIXME: got confused by generics :( |
105 | 105 | |
106 | 106 | if (meaningOverrides!=null && todo.size()!=terms.size()) { |
107 | 107 | for (X t: terms) { |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/MeaningFetcher.java |
— | — | @@ -9,7 +9,7 @@ |
10 | 10 | import de.brightbyte.wikiword.model.WikiWordConcept; |
11 | 11 | |
12 | 12 | public interface MeaningFetcher<C extends WikiWordConcept> { |
13 | | - public List<? extends C> getMeanings(String term) throws PersistenceException; |
| 13 | + public List<C> getMeanings(String term) throws PersistenceException; |
14 | 14 | |
15 | | - public <X extends TermReference>Map<X, List<? extends C>> getMeanings(Collection<X> terms) throws PersistenceException; |
| 15 | + public <X extends TermReference>Map<X, List<C>> getMeanings(Collection<X> terms) throws PersistenceException; |
16 | 16 | } |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java |
— | — | @@ -14,31 +14,30 @@ |
15 | 15 | import de.brightbyte.data.measure.Similarity; |
16 | 16 | import de.brightbyte.util.PersistenceException; |
17 | 17 | import de.brightbyte.wikiword.model.ConceptFeatures; |
18 | | -import de.brightbyte.wikiword.model.LocalConcept; |
19 | 18 | import de.brightbyte.wikiword.model.PhraseNode; |
20 | 19 | import de.brightbyte.wikiword.model.TermReference; |
21 | 20 | import de.brightbyte.wikiword.model.WikiWordConcept; |
22 | 21 | |
23 | | -public class SlidingCoherenceDisambiguator extends CoherenceDisambiguator { |
| 22 | +public class SlidingCoherenceDisambiguator<T extends TermReference, C extends WikiWordConcept> extends CoherenceDisambiguator<T, C> { |
24 | 23 | |
25 | 24 | protected int window; |
26 | 25 | protected int initialWindow; |
27 | 26 | |
28 | | - public SlidingCoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, Integer> featureFetcher, int cacheCapacity) { |
| 27 | + public SlidingCoherenceDisambiguator(MeaningFetcher<? extends C> meaningFetcher, FeatureFetcher<C, Integer> featureFetcher, int cacheCapacity) { |
29 | 28 | this(meaningFetcher, featureFetcher, cacheCapacity, null, null, 5, 5); |
30 | 29 | } |
31 | 30 | |
32 | | - public SlidingCoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, Integer> featureFetcher, int cacheCapacity, Measure<WikiWordConcept> popularityMeasure, Similarity<LabeledVector<Integer>> sim, int window, int initialWindow) { |
| 31 | + public SlidingCoherenceDisambiguator(MeaningFetcher<? extends C> meaningFetcher, FeatureFetcher<C, Integer> featureFetcher, int cacheCapacity, Measure<C> popularityMeasure, Similarity<LabeledVector<Integer>> sim, int window, int initialWindow) { |
33 | 32 | super(meaningFetcher, featureFetcher, cacheCapacity, popularityMeasure, sim); |
34 | 33 | |
35 | 34 | this.window = window; |
36 | 35 | this.initialWindow = initialWindow; |
37 | 36 | } |
38 | 37 | |
39 | | - public <X extends TermReference>Disambiguation<X, LocalConcept> evalStep(List<X> baseSequence, Map<X, LocalConcept> interpretation, PhraseNode<X> node, |
40 | | - Map<X, List<? extends LocalConcept>> meanings, Collection<? extends LocalConcept> context, |
41 | | - LabeledMatrix<LocalConcept, LocalConcept> similarities, FeatureFetcher<LocalConcept, Integer> features) throws PersistenceException { |
42 | | - X term = node.getTermReference(); |
| 38 | + public <X extends T>Disambiguation<X, C> evalStep(List<X> baseSequence, Map<X, C> interpretation, PhraseNode<X> node, |
| 39 | + Map<X, List<? extends C>> meanings, Collection<? extends C> context, |
| 40 | + LabeledMatrix<C, C> similarities, FeatureFetcher<C, Integer> features) throws PersistenceException { |
| 41 | + X term = node.getTermReference(); |
43 | 42 | |
44 | 43 | List<X> sequence = new ArrayList<X>(baseSequence); |
45 | 44 | sequence.add(term); |
— | — | @@ -49,12 +48,12 @@ |
50 | 49 | |
51 | 50 | List<X> frame = sequence.subList(from, to); |
52 | 51 | |
53 | | - Disambiguation<X, LocalConcept> r ; |
| 52 | + Disambiguation<X, C> r ; |
54 | 53 | |
55 | 54 | if (to-from < 2) { |
56 | 55 | r = popularityDisambiguator.disambiguate(frame, meanings, context); |
57 | 56 | } else { |
58 | | - Collection<Disambiguator.Interpretation<X, LocalConcept>> interpretations = getInterpretations(frame, interpretation, meanings); |
| 57 | + Collection<Disambiguator.Interpretation<X, C>> interpretations = getInterpretations(frame, interpretation, meanings); |
59 | 58 | r = getBestInterpretation(node, meanings, context, interpretations, similarities, features); |
60 | 59 | } |
61 | 60 | |
— | — | @@ -64,19 +63,19 @@ |
65 | 64 | /* (non-Javadoc) |
66 | 65 | * @see de.brightbyte.wikiword.disambig.Disambiguator#disambiguate(java.util.List) |
67 | 66 | */ |
68 | | - public <X extends TermReference>CoherenceDisambiguation<X, LocalConcept> disambiguate(PhraseNode<X> root, Map<X, List<? extends LocalConcept>> meanings, Collection<? extends LocalConcept> context) throws PersistenceException { |
69 | | - if (meanings.isEmpty()) return new CoherenceDisambiguation<X, LocalConcept>(Collections.<X, LocalConcept>emptyMap(), Collections.<X>emptyList(), Collections.<Integer, ConceptFeatures<LocalConcept, Integer>>emptyMap(), ConceptFeatures.newIntFeaturVector(1), 0.0, "no terms or meanings"); |
| 67 | + public <X extends T>CoherenceDisambiguation<X, C> disambiguate(PhraseNode<X> root, Map<X, List<? extends C>> meanings, Collection<? extends C> context) throws PersistenceException { |
| 68 | + if (meanings.isEmpty()) return new CoherenceDisambiguation<X, C>(Collections.<X, C>emptyMap(), Collections.<X>emptyList(), Collections.<Integer, ConceptFeatures<C, Integer>>emptyMap(), ConceptFeatures.newIntFeaturVector(1), 0.0, "no terms or meanings"); |
70 | 69 | |
71 | 70 | int sz = meanings.size(); |
72 | 71 | if (context!=null) sz += context.size(); |
73 | 72 | |
74 | 73 | //CAVEAT: because the map disambig can contain only one meaning per term, the same term can not occur with two meanings within the same term sequence. |
75 | 74 | |
76 | | - LabeledMatrix<LocalConcept, LocalConcept> similarities = new MapLabeledMatrix<LocalConcept, LocalConcept>(true); |
77 | | - FeatureFetcher<LocalConcept, Integer> features = getFeatureCache(meanings, context); |
| 75 | + LabeledMatrix<C, C> similarities = new MapLabeledMatrix<C, C>(true); |
| 76 | + FeatureFetcher<C, Integer> features = getFeatureCache(meanings, context); |
78 | 77 | |
79 | 78 | if (window < 2 || sz<2) { |
80 | | - Disambiguation<X, LocalConcept> r = popularityDisambiguator.disambiguate(root, meanings, context); |
| 79 | + Disambiguation<X, C> r = popularityDisambiguator.disambiguate(root, meanings, context); |
81 | 80 | return getScore(r.getInterpretation(), context, similarities, features); |
82 | 81 | } |
83 | 82 | |
— | — | @@ -86,17 +85,17 @@ |
87 | 86 | if (context!=null) sz += context.size(); |
88 | 87 | |
89 | 88 | if (sz<2) { |
90 | | - Disambiguation<X, LocalConcept> r = popularityDisambiguator.disambiguate(root, meanings, context); |
| 89 | + Disambiguation<X, C> r = popularityDisambiguator.disambiguate(root, meanings, context); |
91 | 90 | return getScore(r.getInterpretation(), context, similarities, features); |
92 | 91 | } |
93 | 92 | |
94 | | - Map<X, LocalConcept> disambig = new HashMap<X, LocalConcept>(meanings.size()); |
| 93 | + Map<X, C> disambig = new HashMap<X, C>(meanings.size()); |
95 | 94 | PhraseNode<X> currentNode = root; |
96 | 95 | List<X> sequence = new ArrayList<X>(); |
97 | 96 | |
98 | 97 | if (initialWindow > 0) { //apply full coherence disambig to initial window size. initialWindow == 1 will trigger a popularity disambig. |
99 | 98 | Collection<List<X>> sequences = getSequences(root, initialWindow); |
100 | | - Disambiguation<X, LocalConcept> r; |
| 99 | + Disambiguation<X, C> r; |
101 | 100 | |
102 | 101 | if (initialWindow == 1) r = popularityDisambiguator.disambiguate(sequences, root, meanings, context); |
103 | 102 | else r = super.disambiguate(sequences, root, meanings, context); |
— | — | @@ -110,11 +109,11 @@ |
111 | 110 | Collection<? extends PhraseNode<X>> successors = currentNode.getSuccessors(); |
112 | 111 | if (successors==null || successors.isEmpty()) break; |
113 | 112 | |
114 | | - Disambiguation<X, LocalConcept> best = null; |
| 113 | + Disambiguation<X, C> best = null; |
115 | 114 | PhraseNode<X> bestNode = null; |
116 | 115 | |
117 | 116 | for (PhraseNode<X> n: successors) { |
118 | | - Disambiguation<X, LocalConcept> r = evalStep(sequence, disambig, n, meanings, context, similarities, features); //empty sequence will trigger popularity disambig |
| 117 | + Disambiguation<X, C> r = evalStep(sequence, disambig, n, meanings, context, similarities, features); //empty sequence will trigger popularity disambig |
119 | 118 | trace("evalStep("+n+"): " + r.toString()); |
120 | 119 | if (best == null || best.getScore() < r.getScore()) { |
121 | 120 | best = r; |
— | — | @@ -126,14 +125,14 @@ |
127 | 126 | currentNode = bestNode; |
128 | 127 | sequence.add(term); |
129 | 128 | |
130 | | - LocalConcept meaning = best.getMeanings().get(term); |
| 129 | + C meaning = best.getMeanings().get(term); |
131 | 130 | disambig.put(term, meaning); |
132 | 131 | } |
133 | 132 | |
134 | | - return getScore(new Disambiguator.Interpretation<X, LocalConcept>(disambig, sequence), context, similarities, features); //FIXME: this is unnecessarily expensive, we usually don't need the scores this calculates. |
| 133 | + return getScore(new Disambiguator.Interpretation<X, C>(disambig, sequence), context, similarities, features); //FIXME: this is unnecessarily expensive, we usually don't need the scores this calculates. |
135 | 134 | } |
136 | 135 | |
137 | | - protected <X extends TermReference>Collection<Disambiguator.Interpretation<X, LocalConcept>> getInterpretations(List<X> frame, Map<X, ? extends LocalConcept> known, Map<? extends TermReference, List<? extends LocalConcept>> meanings) { |
| 136 | + protected <X extends T>Collection<Disambiguator.Interpretation<X, C>> getInterpretations(List<X> frame, Map<X, ? extends C> known, Map<? extends T, List<? extends C>> meanings) { |
138 | 137 | //strip out all terms with no known meaning |
139 | 138 | if (meanings.keySet().size() != frame.size()) { |
140 | 139 | List<X> t = new ArrayList<X>(frame.size()); |
— | — | @@ -142,12 +141,12 @@ |
143 | 142 | frame = t; |
144 | 143 | } |
145 | 144 | |
146 | | - Map<X, List<? extends LocalConcept>> mset = new HashMap<X, List<? extends LocalConcept>>(); |
| 145 | + Map<X, List<? extends C>> mset = new HashMap<X, List<? extends C>>(); |
147 | 146 | |
148 | 147 | for (X t: frame) { |
149 | | - List<? extends LocalConcept> m; |
| 148 | + List<? extends C> m; |
150 | 149 | |
151 | | - LocalConcept c = known.get(t); |
| 150 | + C c = known.get(t); |
152 | 151 | |
153 | 152 | if (c!=null) m = Collections.singletonList(c); |
154 | 153 | else m = meanings.get(t); |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/PopularityDisambiguator.java |
— | — | @@ -10,36 +10,35 @@ |
11 | 11 | import de.brightbyte.data.Functor2; |
12 | 12 | import de.brightbyte.data.measure.Measure; |
13 | 13 | import de.brightbyte.data.measure.Measure.Comparator; |
14 | | -import de.brightbyte.wikiword.model.LocalConcept; |
15 | 14 | import de.brightbyte.wikiword.model.PhraseNode; |
16 | 15 | import de.brightbyte.wikiword.model.TermReference; |
17 | 16 | import de.brightbyte.wikiword.model.WikiWordConcept; |
18 | 17 | |
19 | | -public class PopularityDisambiguator extends AbstractDisambiguator<TermReference, LocalConcept> { |
| 18 | +public class PopularityDisambiguator<T extends TermReference, C extends WikiWordConcept> extends AbstractDisambiguator<T, C> { |
20 | 19 | |
21 | | - protected Measure<WikiWordConcept> popularityMeasure; |
22 | | - protected Comparator<LocalConcept> popularityComparator; |
| 20 | + protected Measure<? super C> popularityMeasure; |
| 21 | + protected Comparator<? super C> popularityComparator; |
23 | 22 | |
24 | 23 | protected Functor.Double weightBooster = SquareBooster.instance; |
25 | 24 | protected Functor2.Double weigthCombiner = new ProductCombiner(); //NOTE: pop and weight are not in the same scale. |
26 | 25 | |
27 | | - public PopularityDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, int cacheCapacity) { |
| 26 | + public PopularityDisambiguator(MeaningFetcher<C> meaningFetcher, int cacheCapacity) { |
28 | 27 | this(meaningFetcher, cacheCapacity, WikiWordConcept.theCardinality); |
29 | 28 | } |
30 | 29 | |
31 | | - public PopularityDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, int cacheCapacity, Measure<WikiWordConcept> popularityMeasure) { |
| 30 | + public PopularityDisambiguator(MeaningFetcher<C> meaningFetcher, int cacheCapacity, Measure<? super C> popularityMeasure) { |
32 | 31 | super(meaningFetcher, cacheCapacity); |
33 | 32 | |
34 | 33 | this.setPopularityMeasure(popularityMeasure); |
35 | 34 | } |
36 | 35 | |
37 | | - public Measure<WikiWordConcept> getPopularityMeasure() { |
| 36 | + public Measure<? super C> getPopularityMeasure() { |
38 | 37 | return popularityMeasure; |
39 | 38 | } |
40 | 39 | |
41 | | - public void setPopularityMeasure(Measure<WikiWordConcept> popularityMeasure) { |
| 40 | + public void setPopularityMeasure(Measure<? super C> popularityMeasure) { |
42 | 41 | this.popularityMeasure = popularityMeasure; |
43 | | - this.popularityComparator = new Measure.Comparator<LocalConcept>(popularityMeasure, true); |
| 42 | + this.popularityComparator = new Measure.Comparator<C>(popularityMeasure, true); |
44 | 43 | } |
45 | 44 | |
46 | 45 | public void setWeightCombiner(Functor2.Double weightCombiner) { |
— | — | @@ -62,16 +61,16 @@ |
63 | 62 | this.weigthCombiner = weigthCombiner; |
64 | 63 | } |
65 | 64 | |
66 | | - public <X extends TermReference>Disambiguation<X, LocalConcept> disambiguate(PhraseNode<X> root, Map<X, List<? extends LocalConcept>> meanings, Collection<? extends LocalConcept> context) { |
| 65 | + public <X extends T>Disambiguation<X, C> disambiguate(PhraseNode<X> root, Map<X, List<? extends C>> meanings, Collection<? extends C> context) { |
67 | 66 | Collection<List<X>> sequences = getSequences(root, Integer.MAX_VALUE); |
68 | 67 | return disambiguate(sequences, root, meanings, context); |
69 | 68 | } |
70 | 69 | |
71 | | - public <X extends TermReference>Disambiguation<X, LocalConcept> disambiguate(Collection<List<X>> sequences, PhraseNode<X> root, Map<X, List<? extends LocalConcept>> meanings, Collection<? extends LocalConcept> context) { |
72 | | - Disambiguation<X, LocalConcept> best = null; |
| 70 | + public <X extends T>Disambiguation<X, C> disambiguate(Collection<List<X>> sequences, PhraseNode<X> root, Map<X, List<? extends C>> meanings, Collection<? extends C> context) { |
| 71 | + Disambiguation<X, C> best = null; |
73 | 72 | |
74 | 73 | for (List<X> sequence: sequences) { |
75 | | - Disambiguation<X, LocalConcept> r = disambiguate(sequence, meanings, context); |
| 74 | + Disambiguation<X, C> r = disambiguate(sequence, meanings, context); |
76 | 75 | trace(r.toString()); |
77 | 76 | if (best == null || best.getScore() < r.getScore()) { |
78 | 77 | best = r; |
— | — | @@ -82,20 +81,20 @@ |
83 | 82 | return best; |
84 | 83 | } |
85 | 84 | |
86 | | - public <X extends TermReference>Disambiguation<X, LocalConcept> disambiguate(List<X> sequence, Map<X, List<? extends LocalConcept>> meanings, Collection<? extends LocalConcept> context) { |
87 | | - if (sequence.isEmpty() || meanings.isEmpty()) return new Disambiguator.Disambiguation<X, LocalConcept>(Collections.<X, LocalConcept>emptyMap(), Collections.<X>emptyList(), 0.0, "no terms or meanings"); |
| 85 | + public <X extends T>Disambiguation<X, C> disambiguate(List<X> sequence, Map<X, List<? extends C>> meanings, Collection<? extends C> context) { |
| 86 | + if (sequence.isEmpty() || meanings.isEmpty()) return new Disambiguator.Disambiguation<X, C>(Collections.<X, C>emptyMap(), Collections.<X>emptyList(), 0.0, "no terms or meanings"); |
88 | 87 | |
89 | | - Map<X, LocalConcept> disambig = new HashMap<X, LocalConcept>(); |
| 88 | + Map<X, C> disambig = new HashMap<X, C>(); |
90 | 89 | double score = 0; |
91 | 90 | int totalPop = 0; |
92 | 91 | |
93 | 92 | for (X t: sequence) { |
94 | | - List<? extends LocalConcept> m = meanings.get(t); |
| 93 | + List<? extends C> m = meanings.get(t); |
95 | 94 | if (m==null || m.size()==0) continue; |
96 | 95 | |
97 | 96 | if (m.size()>1) Collections.sort(m, popularityComparator); |
98 | 97 | |
99 | | - LocalConcept c = m.get(0); |
| 98 | + C c = m.get(0); |
100 | 99 | disambig.put(t, c); |
101 | 100 | |
102 | 101 | double pop = popularityMeasure.measure(c); |
— | — | @@ -108,7 +107,7 @@ |
109 | 108 | |
110 | 109 | if (disambig.size()>0) score = score / sequence.size(); //NOTE: treat unknown terms as having pop = 0 |
111 | 110 | |
112 | | - Disambiguation<X, LocalConcept> r = new Disambiguation<X, LocalConcept>(disambig, sequence, score, "score="+score+"; pop="+totalPop); |
| 111 | + Disambiguation<X, C> r = new Disambiguation<X, C>(disambig, sequence, score, "score="+score+"; pop="+totalPop); |
113 | 112 | return r; |
114 | 113 | } |
115 | 114 | |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CachingMeaningFetcher.java |
— | — | @@ -15,18 +15,18 @@ |
16 | 16 | public class CachingMeaningFetcher<C extends WikiWordConcept> implements MeaningFetcher<C> { |
17 | 17 | |
18 | 18 | protected MeaningFetcher<? extends C> fetcher; |
19 | | - protected MRUHashMap<String, List<? extends C>> cache; |
| 19 | + protected MRUHashMap<String, List<C>> cache; |
20 | 20 | |
21 | 21 | public CachingMeaningFetcher(MeaningFetcher<? extends C> fetcher, int capacity) { |
22 | 22 | this.fetcher = fetcher; |
23 | | - this.cache = new MRUHashMap<String, List<? extends C>>(capacity); |
| 23 | + this.cache = new MRUHashMap<String, List<C>>(capacity); |
24 | 24 | } |
25 | 25 | |
26 | | - public List<? extends C> getMeanings(String term) throws PersistenceException { |
27 | | - List<? extends C> meanings = cache.get(term); |
| 26 | + public List<C> getMeanings(String term) throws PersistenceException { |
| 27 | + List<C> meanings = cache.get(term); |
28 | 28 | |
29 | 29 | if (meanings==null) { |
30 | | - meanings = fetcher.getMeanings(term); |
| 30 | + meanings = (List<C>)fetcher.getMeanings(term); //XXX: ugly cast :( |
31 | 31 | cache.put(term, meanings); |
32 | 32 | } |
33 | 33 | |
— | — | @@ -34,12 +34,12 @@ |
35 | 35 | } |
36 | 36 | |
37 | 37 | |
38 | | - public <X extends TermReference> Map<X, List<? extends C>> getMeanings(Collection<X> terms) throws PersistenceException { |
39 | | - Map<X, List<? extends C>> meanings= new HashMap<X, List<? extends C>>(); |
| 38 | + public <X extends TermReference> Map<X, List<C>> getMeanings(Collection<X> terms) throws PersistenceException { |
| 39 | + Map<X, List<C>> meanings= new HashMap<X, List<C>>(); |
40 | 40 | List<X> todo = new ArrayList<X>(terms.size()); |
41 | 41 | |
42 | 42 | for (X t: terms) { |
43 | | - List<? extends C> m = cache.get(t.getTerm()); |
| 43 | + List<C> m = cache.get(t.getTerm()); |
44 | 44 | if (m!=null) { |
45 | 45 | meanings.put(t, m); |
46 | 46 | cache.put(t.getTerm(), m); |
— | — | @@ -50,11 +50,11 @@ |
51 | 51 | } |
52 | 52 | |
53 | 53 | if (!todo.isEmpty()) { |
54 | | - Map<X, List<? extends C>> parentMeanings = (Map<X, List<? extends C>>)(Object)fetcher.getMeanings(todo); //XXX: ugly cast, generics are a pain |
| 54 | + Map<X, List<C>> parentMeanings = (Map<X, List<C>>)(Object)fetcher.getMeanings(todo); //XXX: ugly cast, generics are a pain |
55 | 55 | meanings.putAll(parentMeanings); |
56 | 56 | |
57 | 57 | for (X t: todo) { |
58 | | - List<? extends C> m = parentMeanings.get(t); |
| 58 | + List<C> m = parentMeanings.get(t); |
59 | 59 | if (m==null) m = Collections.emptyList(); |
60 | 60 | cache.put(t.getTerm(), m); |
61 | 61 | } |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/LocalConceptStore.java |
— | — | @@ -12,8 +12,6 @@ |
13 | 13 | */ |
14 | 14 | public interface LocalConceptStore extends WikiWordConceptStore<LocalConcept>, WikiWordLocalStore { |
15 | 15 | |
16 | | - public abstract DataSet<LocalConcept> getMeanings(String term, ConceptQuerySpec spec) throws PersistenceException; |
17 | | - |
18 | 16 | public int getNumberOfTerms() throws PersistenceException; |
19 | 17 | |
20 | 18 | public abstract DataSet<TermMeaning> getAllTerms() throws PersistenceException; |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/WikiWordConceptStore.java |
— | — | @@ -3,13 +3,14 @@ |
4 | 4 | import de.brightbyte.data.cursor.DataSet; |
5 | 5 | import de.brightbyte.util.PersistenceException; |
6 | 6 | import de.brightbyte.wikiword.ConceptType; |
7 | | -import de.brightbyte.wikiword.model.LocalConcept; |
8 | 7 | import de.brightbyte.wikiword.model.WikiWordConcept; |
9 | 8 | |
10 | 9 | |
11 | 10 | public interface WikiWordConceptStore<T extends WikiWordConcept> extends WikiWordConceptStoreBase { |
12 | 11 | |
13 | 12 | public static class ConceptQuerySpec { |
| 13 | + //TODO: relevance limit, order, proper name types & preferred language |
| 14 | + |
14 | 15 | private boolean includeRelations; |
15 | 16 | private boolean includeStatistics; |
16 | 17 | private boolean includeResource; |
— | — | @@ -76,6 +77,8 @@ |
77 | 78 | |
78 | 79 | public DataSet<? extends T> getAllConcepts(ConceptQuerySpec spec) throws PersistenceException; |
79 | 80 | |
| 81 | + public abstract DataSet<? extends WikiWordConcept> getMeanings(String term, ConceptQuerySpec spec) throws PersistenceException; |
| 82 | + |
80 | 83 | public ConceptType getConceptType(int type) throws PersistenceException; |
81 | 84 | |
82 | 85 | public StatisticsStore getStatisticsStore() throws PersistenceException; |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/GlobalConceptStore.java |
— | — | @@ -7,7 +7,6 @@ |
8 | 8 | import de.brightbyte.wikiword.Corpus; |
9 | 9 | import de.brightbyte.wikiword.model.GlobalConcept; |
10 | 10 | import de.brightbyte.wikiword.model.LocalConcept; |
11 | | -import de.brightbyte.wikiword.store.WikiWordConceptStore.ConceptQuerySpec; |
12 | 11 | |
13 | 12 | |
14 | 13 | /** |
— | — | @@ -15,10 +14,7 @@ |
16 | 15 | */ |
17 | 16 | public interface GlobalConceptStore extends WikiWordConceptStore<GlobalConcept> { |
18 | 17 | |
19 | | - //TODO: relevance limit? order? |
20 | 18 | public DataSet<GlobalConcept> getMeanings(String lang, String term, ConceptQuerySpec spec) throws PersistenceException; |
21 | | - |
22 | | - public DataSet<GlobalConcept> getMeanings(String term, ConceptQuerySpec spec) throws PersistenceException; |
23 | 19 | |
24 | 20 | //public abstract ResultSet queryTermRefersTo() throws PersistenceException; |
25 | 21 | |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/model/WikiWordConcept.java |
— | — | @@ -11,7 +11,7 @@ |
12 | 12 | import de.brightbyte.wikiword.ConceptType; |
13 | 13 | import de.brightbyte.wikiword.DatasetIdentifier; |
14 | 14 | |
15 | | -public abstract class WikiWordConcept { |
| 15 | +public class WikiWordConcept { |
16 | 16 | public static class ByName implements Comparator<WikiWordConcept> { |
17 | 17 | protected Collator collator; |
18 | 18 | |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/query/QueryConsole.java |
— | — | @@ -480,7 +480,7 @@ |
481 | 481 | } |
482 | 482 | |
483 | 483 | public void listMeaningsLocal(String term, ConsoleOutput out) throws PersistenceException { |
484 | | - DataSet<LocalConcept> meanings = getLocalConceptStore().getMeanings(term, resolvedConceptSpec); |
| 484 | + DataSet<? extends WikiWordConcept> meanings = getLocalConceptStore().getMeanings(term, resolvedConceptSpec); |
485 | 485 | out.writeConcepts(meanings); |
486 | 486 | } |
487 | 487 | |