Index: trunk/WikiWord/WikiWord/src/test/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguatorTest.java |
— | — | @@ -25,7 +25,7 @@ |
26 | 26 | |
27 | 27 | List<Term> sequence = terms("UK", "London", "Underground", "Bank"); |
28 | 28 | |
29 | | - Disambiguation<Term, LocalConcept> result = disambiguator.disambiguate(sequence, null); |
| 29 | + Disambiguation<Term, LocalConcept> result = disambiguator.disambiguate(sequence, null, null); |
30 | 30 | |
31 | 31 | Map<? extends Term, ? extends LocalConcept> meanings = result.getMeanings(); |
32 | 32 | |
— | — | @@ -48,7 +48,7 @@ |
49 | 49 | disambiguator.setInitialWindow(1); |
50 | 50 | disambiguator.setWindow(3); |
51 | 51 | |
52 | | - Disambiguation<PhraseOccurance, LocalConcept> result = disambiguator.disambiguate(set.getRootNode(), null); |
| 52 | + Disambiguation<PhraseOccurance, LocalConcept> result = disambiguator.disambiguate(set.getRootNode(), null, null); |
53 | 53 | |
54 | 54 | List<? extends PhraseOccurance> sequence = result.getSequence(); |
55 | 55 | Map<? extends PhraseOccurance, ? extends LocalConcept> meanings = result.getMeanings(); |
— | — | @@ -70,7 +70,7 @@ |
71 | 71 | disambiguator.setInitialWindow(2); |
72 | 72 | disambiguator.setWindow(3); |
73 | 73 | |
74 | | - result = disambiguator.disambiguate(set.getRootNode(), null); |
| 74 | + result = disambiguator.disambiguate(set.getRootNode(), null, null); |
75 | 75 | |
76 | 76 | sequence = result.getSequence(); |
77 | 77 | meanings = result.getMeanings(); |
— | — | @@ -92,7 +92,7 @@ |
93 | 93 | disambiguator.setInitialWindow(3); |
94 | 94 | disambiguator.setWindow(3); |
95 | 95 | |
96 | | - result = disambiguator.disambiguate(set.getRootNode(), null); |
| 96 | + result = disambiguator.disambiguate(set.getRootNode(), null, null); |
97 | 97 | |
98 | 98 | sequence = result.getSequence(); |
99 | 99 | meanings = result.getMeanings(); |
Index: trunk/WikiWord/WikiWord/src/test/java/de/brightbyte/wikiword/disambig/PopularityDisambiguatorTest.java |
— | — | @@ -13,7 +13,6 @@ |
14 | 14 | import de.brightbyte.wikiword.model.PhraseOccurance; |
15 | 15 | import de.brightbyte.wikiword.model.PhraseOccuranceSet; |
16 | 16 | import de.brightbyte.wikiword.model.TermListNode; |
17 | | -import de.brightbyte.wikiword.model.TermReference; |
18 | 17 | |
19 | 18 | public class PopularityDisambiguatorTest extends DisambiguatorTestBase { |
20 | 19 | |
— | — | @@ -22,7 +21,7 @@ |
23 | 22 | } |
24 | 23 | |
25 | 24 | public void testGetTermsForList() throws PersistenceException { |
26 | | - PopularityDisambiguator<TermReference, LocalConcept> disambiguator = new PopularityDisambiguator<TermReference, LocalConcept>(meaningFetcher, 10); |
| 25 | + PopularityDisambiguator<LocalConcept> disambiguator = new PopularityDisambiguator<LocalConcept>(meaningFetcher, 10); |
27 | 26 | |
28 | 27 | Term uk = new Term("UK"); |
29 | 28 | Term london = new Term("London"); |
— | — | @@ -48,7 +47,7 @@ |
49 | 48 | |
50 | 49 | //FIXME: Test case for getHorizon |
51 | 50 | |
52 | | - PopularityDisambiguator<TermReference, LocalConcept> disambiguator = new PopularityDisambiguator<TermReference, LocalConcept>(meaningFetcher, 10); |
| 51 | + PopularityDisambiguator<LocalConcept> disambiguator = new PopularityDisambiguator<LocalConcept>(meaningFetcher, 10); |
53 | 52 | |
54 | 53 | Collection<PhraseOccurance> terms = disambiguator.getTerms(set.getRootNode(), 0); |
55 | 54 | assertTrue("empty term set", sameElements( getBankAndMonumentTerms(0), terms) ); |
— | — | @@ -61,7 +60,7 @@ |
62 | 61 | } |
63 | 62 | |
64 | 63 | public void testGetMeaningsForList() throws PersistenceException { |
65 | | - PopularityDisambiguator<TermReference, LocalConcept> disambiguator = new PopularityDisambiguator<TermReference, LocalConcept>(meaningFetcher, 10); |
| 64 | + PopularityDisambiguator<LocalConcept> disambiguator = new PopularityDisambiguator<LocalConcept>(meaningFetcher, 10); |
66 | 65 | |
67 | 66 | Term uk = new Term("UK"); |
68 | 67 | Term london = new Term("London"); |
— | — | @@ -72,7 +71,7 @@ |
73 | 72 | terms.add(london); |
74 | 73 | terms.add(underground); |
75 | 74 | |
76 | | - Map<Term, List<? extends LocalConcept>> res = disambiguator.getMeanings(terms); |
| 75 | + Map<Term, List<? extends LocalConcept>> res = disambiguator.getMeanings(terms, null); |
77 | 76 | |
78 | 77 | assertEquals(uk.getTerm(), meanings.get(uk.getTerm()), res.get(uk)); |
79 | 78 | assertEquals(london.getTerm(), meanings.get(london.getTerm()), res.get(london)); |
— | — | @@ -80,10 +79,10 @@ |
81 | 80 | } |
82 | 81 | |
83 | 82 | public void testGetMeaningsForNode() throws PersistenceException { |
84 | | - PopularityDisambiguator<TermReference, LocalConcept> disambiguator = new PopularityDisambiguator<TermReference, LocalConcept>(meaningFetcher, 10); |
| 83 | + PopularityDisambiguator<LocalConcept> disambiguator = new PopularityDisambiguator<LocalConcept>(meaningFetcher, 10); |
85 | 84 | |
86 | 85 | PhraseOccuranceSet set = getBankAndMonumentPhrases(); |
87 | | - Map<PhraseOccurance, List<? extends LocalConcept>> res = disambiguator.getMeanings(set.getRootNode()); |
| 86 | + Map<PhraseOccurance, List<? extends LocalConcept>> res = disambiguator.getMeanings(set.getRootNode(), null); |
88 | 87 | List<PhraseOccurance> terms = getBankAndMonumentTerms(1000); |
89 | 88 | |
90 | 89 | for (PhraseOccurance t: terms) { |
— | — | @@ -95,7 +94,7 @@ |
96 | 95 | } |
97 | 96 | |
98 | 97 | public void testGetSequences() throws PersistenceException { |
99 | | - PopularityDisambiguator<TermReference, LocalConcept> disambiguator = new PopularityDisambiguator<TermReference, LocalConcept>(meaningFetcher, 10); |
| 98 | + PopularityDisambiguator<LocalConcept> disambiguator = new PopularityDisambiguator<LocalConcept>(meaningFetcher, 10); |
100 | 99 | PhraseOccuranceSet set = getBankAndMonumentPhrases(); |
101 | 100 | |
102 | 101 | Collection<List<PhraseOccurance>> res = disambiguator.getSequences(set.getRootNode(), 1); |
— | — | @@ -109,7 +108,7 @@ |
110 | 109 | } |
111 | 110 | |
112 | 111 | public void testGetSequences2() throws PersistenceException { |
113 | | - PopularityDisambiguator<TermReference, LocalConcept> disambiguator = new PopularityDisambiguator<TermReference, LocalConcept>(meaningFetcher, 10); |
| 112 | + PopularityDisambiguator<LocalConcept> disambiguator = new PopularityDisambiguator<LocalConcept>(meaningFetcher, 10); |
114 | 113 | PhraseOccuranceSet set = getMargaretOfYorkPhrases(); |
115 | 114 | |
116 | 115 | Collection<List<PhraseOccurance>> res = disambiguator.getSequences(set.getRootNode(), 3); |
— | — | @@ -143,14 +142,14 @@ |
144 | 143 | } |
145 | 144 | |
146 | 145 | public void testDisambiguateTerms() throws PersistenceException { |
147 | | - PopularityDisambiguator<TermReference, LocalConcept> disambiguator = new PopularityDisambiguator<TermReference, LocalConcept>(meaningFetcher, 10); |
| 146 | + PopularityDisambiguator<LocalConcept> disambiguator = new PopularityDisambiguator<LocalConcept>(meaningFetcher, 10); |
148 | 147 | |
149 | 148 | Term uk = new Term("UK"); |
150 | 149 | Term london = new Term("London"); |
151 | 150 | Term underground = new Term("Underground"); |
152 | 151 | |
153 | 152 | List<Term> sequence = Arrays.asList(new Term[] {uk, london, underground}); |
154 | | - Disambiguator.Disambiguation<Term, LocalConcept> result = disambiguator.disambiguate(sequence, null); |
| 153 | + Disambiguator.Disambiguation<Term, LocalConcept> result = disambiguator.disambiguate(sequence, null, null); |
155 | 154 | |
156 | 155 | assertEquals("sequence", sequence, result.getSequence()); |
157 | 156 | |
— | — | @@ -162,10 +161,10 @@ |
163 | 162 | public void testDisambiguateNode() throws PersistenceException { |
164 | 163 | PhraseOccuranceSet set = getBankAndMonumentPhrases(); |
165 | 164 | |
166 | | - PopularityDisambiguator<TermReference, LocalConcept> disambiguator = new PopularityDisambiguator<TermReference, LocalConcept>(meaningFetcher, 10); |
| 165 | + PopularityDisambiguator<LocalConcept> disambiguator = new PopularityDisambiguator<LocalConcept>(meaningFetcher, 10); |
167 | 166 | disambiguator.setTrace(traceOutput); |
168 | 167 | |
169 | | - Disambiguation<PhraseOccurance, LocalConcept> result = disambiguator.disambiguate(set.getRootNode(), null); |
| 168 | + Disambiguation<PhraseOccurance, LocalConcept> result = disambiguator.disambiguate(set.getRootNode(), null, null); |
170 | 169 | |
171 | 170 | List<? extends PhraseOccurance> sequence = result.getSequence(); |
172 | 171 | Map<? extends PhraseOccurance, ? extends LocalConcept> meanings = result.getMeanings(); |
Index: trunk/WikiWord/WikiWord/src/test/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguatorTest.java |
— | — | @@ -155,7 +155,7 @@ |
156 | 156 | |
157 | 157 | List<Term> sequence = terms("UK", "London", "Underground", "Bank"); |
158 | 158 | |
159 | | - Disambiguation<Term, LocalConcept> result = disambiguator.disambiguate(sequence, null); |
| 159 | + Disambiguation<Term, LocalConcept> result = disambiguator.disambiguate(sequence, null, null); |
160 | 160 | |
161 | 161 | Map<? extends Term, ? extends LocalConcept> meanings = result.getMeanings(); |
162 | 162 | |
— | — | @@ -176,7 +176,7 @@ |
177 | 177 | CoherenceDisambiguator disambiguator = new CoherenceDisambiguator(meaningFetcher, featureFetcher, 10); |
178 | 178 | disambiguator.setTrace(traceOutput); |
179 | 179 | |
180 | | - Disambiguation<PhraseOccurance, LocalConcept> result = disambiguator.disambiguate(set.getRootNode(), null); |
| 180 | + Disambiguation<PhraseOccurance, LocalConcept> result = disambiguator.disambiguate(set.getRootNode(), null, null); |
181 | 181 | |
182 | 182 | List<? extends PhraseOccurance> sequence = result.getSequence(); |
183 | 183 | Map<? extends PhraseOccurance, ? extends LocalConcept> meanings = result.getMeanings(); |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/TermRelatedness.java |
— | — | @@ -29,14 +29,14 @@ |
30 | 30 | } |
31 | 31 | |
32 | 32 | protected Similarity<WikiWordConcept> relatedness; |
33 | | - protected Disambiguator<TermReference, C> disambig; |
| 33 | + protected Disambiguator<C> disambig; |
34 | 34 | private Collection<C> context; |
35 | 35 | |
36 | | - public TermRelatedness(Disambiguator<TermReference,C> disambig) { |
| 36 | + public TermRelatedness(Disambiguator<C> disambig) { |
37 | 37 | this(disambig, null, null); |
38 | 38 | } |
39 | 39 | |
40 | | - public TermRelatedness(Disambiguator<TermReference, C> disambig, Similarity<WikiWordConcept> relatedness, Collection<C> context) { |
| 40 | + public TermRelatedness(Disambiguator<C> disambig, Similarity<WikiWordConcept> relatedness, Collection<C> context) { |
41 | 41 | this.relatedness = relatedness; |
42 | 42 | this.disambig = disambig; |
43 | 43 | this.context = context; |
— | — | @@ -51,7 +51,7 @@ |
52 | 52 | |
53 | 53 | public Relatedness relatedness(String a, String b) { |
54 | 54 | try { |
55 | | - Disambiguator.Disambiguation<Term, ? extends WikiWordConcept> r = disambig.<Term>disambiguate(Term.asTerms(a, b), context); |
| 55 | + Disambiguator.Disambiguation<Term, ? extends WikiWordConcept> r = disambig.<Term>disambiguate(Term.asTerms(a, b), null, context); |
56 | 56 | if (r==null || r.getMeanings().size()!=2) return null; |
57 | 57 | |
58 | 58 | double d; |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java |
— | — | @@ -21,12 +21,13 @@ |
22 | 22 | import de.brightbyte.data.measure.Similarity; |
23 | 23 | import de.brightbyte.util.PersistenceException; |
24 | 24 | import de.brightbyte.util.SanityException; |
| 25 | +import de.brightbyte.wikiword.disambig.Disambiguator.Disambiguation; |
25 | 26 | import de.brightbyte.wikiword.model.ConceptFeatures; |
26 | 27 | import de.brightbyte.wikiword.model.PhraseNode; |
27 | 28 | import de.brightbyte.wikiword.model.TermReference; |
28 | 29 | import de.brightbyte.wikiword.model.WikiWordConcept; |
29 | 30 | |
30 | | -public class CoherenceDisambiguator<T extends TermReference, C extends WikiWordConcept> extends AbstractDisambiguator<T, C> { |
| 31 | +public class CoherenceDisambiguator<C extends WikiWordConcept> extends AbstractDisambiguator<C> { |
31 | 32 | |
32 | 33 | public static class CoherenceDisambiguation<T extends TermReference, C extends WikiWordConcept> extends Disambiguator.Disambiguation<T, C> { |
33 | 34 | protected LabeledVector<Integer> centroid; |
— | — | @@ -69,7 +70,7 @@ |
70 | 71 | |
71 | 72 | protected Similarity<LabeledVector<Integer>> similarityMeasure; |
72 | 73 | protected Measure<? super C> popularityMeasure; |
73 | | - protected PopularityDisambiguator<T, C> popularityDisambiguator; |
| 74 | + protected PopularityDisambiguator<C> popularityDisambiguator; |
74 | 75 | protected Comparator<? super C> popularityComparator; |
75 | 76 | |
76 | 77 | private Functor.Double popularityNormalizer = new Functor.Double() { //NOTE: must map [0:inf] to [0:1] and grow monotonously |
— | — | @@ -211,7 +212,7 @@ |
212 | 213 | this.maxMeanings = maxMeanings; |
213 | 214 | } |
214 | 215 | |
215 | | - protected FeatureFetcher<C, Integer> getFeatureCache(Map<? extends T, List<? extends C>> meanings, Collection<? extends C> context) throws PersistenceException { |
| 216 | + protected <X extends TermReference>FeatureFetcher<C, Integer> getFeatureCache(Map<X, List<? extends C>> meanings, Collection<? extends C> context) throws PersistenceException { |
216 | 217 | //NOTE: pre-fetch all features in one go |
217 | 218 | List<C> concepts = new ArrayList<C>(meanings.size()*10); |
218 | 219 | for (List<? extends C> m: meanings.values()) { |
— | — | @@ -227,7 +228,7 @@ |
228 | 229 | /* (non-Javadoc) |
229 | 230 | * @see de.brightbyte.wikiword.disambig.Disambiguator#disambiguate(java.util.List) |
230 | 231 | */ |
231 | | - public <X extends T>CoherenceDisambiguation<X, C> disambiguate(PhraseNode<X> root, Map<X, List<? extends C>> meanings, Collection<? extends C> context) throws PersistenceException { |
| 232 | + public <X extends TermReference>CoherenceDisambiguation<X, C> doDisambiguate(PhraseNode<X> root, Map<X, List<? extends C>> meanings, Collection<? extends C> context) throws PersistenceException { |
232 | 233 | if (meanings.isEmpty()) return new CoherenceDisambiguation<X, C>(Collections.<X, C>emptyMap(), Collections.<X>emptyList(), Collections.<Integer, ConceptFeatures<C, Integer>>emptyMap(), ConceptFeatures.newIntFeaturVector(1), 0.0, "no terms or meanings"); |
233 | 234 | |
234 | 235 | LabeledMatrix<C, C> similarities = new MapLabeledMatrix<C, C>(true); |
— | — | @@ -237,14 +238,14 @@ |
238 | 239 | if (context!=null) sz += context.size(); |
239 | 240 | |
240 | 241 | if (sz<2) { |
241 | | - Disambiguation<X, C> r = popularityDisambiguator.disambiguate(root, meanings, context); |
| 242 | + Disambiguation<X, C> r = popularityDisambiguator.doDisambiguate(root, meanings, context); |
242 | 243 | return getScore(r.getInterpretation(), context, similarities, features); |
243 | 244 | } |
244 | 245 | |
245 | 246 | sz = meanings.size(); |
246 | 247 | if (context!=null) sz += context.size(); |
247 | 248 | if (sz <2) { |
248 | | - Disambiguation<X, C> r = popularityDisambiguator.disambiguate(root, meanings, context); |
| 249 | + Disambiguation<X, C> r = popularityDisambiguator.doDisambiguate(root, meanings, context); |
249 | 250 | return getScore(r.getInterpretation(), context, similarities, features); |
250 | 251 | } |
251 | 252 | |
— | — | @@ -252,14 +253,14 @@ |
253 | 254 | return disambiguate(sequences, root, meanings, context); |
254 | 255 | } |
255 | 256 | |
256 | | - protected <X extends T>CoherenceDisambiguation<X, C> disambiguate(Collection<List<X>> sequences, PhraseNode<X> root, Map<X, List<? extends C>> meanings, Collection<? extends C> context) throws PersistenceException { |
| 257 | + protected <X extends TermReference>CoherenceDisambiguation<X, C> disambiguate(Collection<List<X>> sequences, PhraseNode<X> root, Map<X, List<? extends C>> meanings, Collection<? extends C> context) throws PersistenceException { |
257 | 258 | LabeledMatrix<C, C> similarities = new MapLabeledMatrix<C, C>(true); |
258 | 259 | FeatureFetcher<C, Integer> features = getFeatureCache(meanings, context); |
259 | 260 | |
260 | 261 | return disambiguate(sequences, root, meanings, context, similarities, features); |
261 | 262 | } |
262 | 263 | |
263 | | - private <X extends T>CoherenceDisambiguation<X, C> disambiguate(Collection<List<X>> sequences, PhraseNode<X> root, Map<X, List<? extends C>> meanings, Collection<? extends C> context, LabeledMatrix<C, C> similarities, FeatureFetcher<C, Integer> features) throws PersistenceException { |
| 264 | + private <X extends TermReference>CoherenceDisambiguation<X, C> disambiguate(Collection<List<X>> sequences, PhraseNode<X> root, Map<X, List<? extends C>> meanings, Collection<? extends C> context, LabeledMatrix<C, C> similarities, FeatureFetcher<C, Integer> features) throws PersistenceException { |
264 | 265 | |
265 | 266 | pruneMeaninglessSequences( sequences, meanings ); |
266 | 267 | |
— | — | @@ -270,16 +271,16 @@ |
271 | 272 | return getBestInterpretation(root, meanings, context, interpretations, similarities, features); |
272 | 273 | } |
273 | 274 | |
274 | | - protected <X extends T>Map<X, List<? extends C>> getMeanings(Collection<X> terms) throws PersistenceException { |
275 | | - Map<X, List<? extends C>> meanings = super.getMeanings(terms); |
| 275 | + protected <X extends TermReference>Map<X, List<? extends C>> getMeanings(Collection<X> terms, Map<X, C> known) throws PersistenceException { |
| 276 | + Map<X, List<? extends C>> meanings = super.getMeanings(terms, known); |
276 | 277 | pruneMeanings(meanings); |
277 | 278 | return meanings; |
278 | 279 | } |
279 | 280 | |
280 | | - protected void pruneMeanings(Map<? extends T, List<? extends C>> meanings) { |
| 281 | + protected <X extends TermReference>void pruneMeanings(Map<X, List<? extends C>> meanings) { |
281 | 282 | Iterator<?> eit = meanings.entrySet().iterator(); |
282 | 283 | while (eit.hasNext()) { |
283 | | - Entry<T, List<? extends C>> e = (Entry<T, List<? extends C>>) eit.next(); //XXX: ugly cast. got confused about generics. ugh. |
| 284 | + Entry<X, List<? extends C>> e = (Entry<X, List<? extends C>>) eit.next(); //XXX: ugly cast. got confused about generics. ugh. |
284 | 285 | List<? extends C> m = e.getValue(); |
285 | 286 | if (m==null) continue; |
286 | 287 | |
— | — | @@ -314,7 +315,7 @@ |
315 | 316 | } |
316 | 317 | } |
317 | 318 | |
318 | | - protected <X extends T>CoherenceDisambiguation<X, C> getBestInterpretation(PhraseNode<X> root, Map<X, List<? extends C>> meanings, |
| 319 | + protected <X extends TermReference>CoherenceDisambiguation<X, C> getBestInterpretation(PhraseNode<X> root, Map<X, List<? extends C>> meanings, |
319 | 320 | Collection<? extends C> context, Collection<Disambiguator.Interpretation<X, C>> interpretations, |
320 | 321 | LabeledMatrix<C, C> similarities, FeatureFetcher<C, Integer> features) throws PersistenceException { |
321 | 322 | |
— | — | @@ -349,7 +350,7 @@ |
350 | 351 | if (best==null || bestScore<minScore || Double.isNaN(bestScore)) { |
351 | 352 | trace("best score is not good enough ("+bestScore+"<"+minScore+"), using popularity disambiguator."); |
352 | 353 | |
353 | | - Disambiguation<X, C> p = popularityDisambiguator.disambiguate(root, meanings, context); |
| 354 | + Disambiguation<X, C> p = popularityDisambiguator.doDisambiguate(root, meanings, context); |
354 | 355 | CoherenceDisambiguation<X, C> r = getScore(p.getInterpretation(), context, similarities, features); |
355 | 356 | |
356 | 357 | trace("best of "+interpretations.size()+" interpretations by popularity: "+r); |
— | — | @@ -362,7 +363,7 @@ |
363 | 364 | return best; |
364 | 365 | } |
365 | 366 | |
366 | | - public <X extends T>Collection<Disambiguator.Interpretation<X, C>> getInterpretations(Collection<List<X>> sequences, Map<X, List<? extends C>> meanings) { |
| 367 | + public <X extends TermReference>Collection<Disambiguator.Interpretation<X, C>> getInterpretations(Collection<List<X>> sequences, Map<X, List<? extends C>> meanings) { |
367 | 368 | List<Disambiguator.Interpretation<X, C>> interpretations = new ArrayList<Disambiguator.Interpretation<X, C>>(); |
368 | 369 | for (List<X> sq: sequences) { |
369 | 370 | if (sq.isEmpty()) continue; |
— | — | @@ -373,7 +374,7 @@ |
374 | 375 | return interpretations; |
375 | 376 | } |
376 | 377 | |
377 | | - public <X extends T>Collection<Disambiguator.Interpretation<X, C>> getSequenceInterpretations(List<X> sequence, Map<X, List<? extends C>> meanings) { |
| 378 | + public <X extends TermReference>Collection<Disambiguator.Interpretation<X, C>> getSequenceInterpretations(List<X> sequence, Map<X, List<? extends C>> meanings) { |
378 | 379 | if (sequence.size()==0) { |
379 | 380 | return Collections.singletonList(new Disambiguator.Interpretation<X, C>(Collections.<X, C>emptyMap(), sequence)); |
380 | 381 | } |
— | — | @@ -573,5 +574,5 @@ |
574 | 575 | public boolean exploresAllSequences() { |
575 | 576 | return true; |
576 | 577 | } |
577 | | - |
| 578 | + |
578 | 579 | } |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/ForwardPopularityDisambiguator.java |
— | — | @@ -6,14 +6,13 @@ |
7 | 7 | import java.util.List; |
8 | 8 | import java.util.Map; |
9 | 9 | |
10 | | -import de.brightbyte.data.Functors; |
11 | 10 | import de.brightbyte.data.measure.Measure; |
12 | 11 | import de.brightbyte.wikiword.model.PhraseNode; |
13 | 12 | import de.brightbyte.wikiword.model.TermListNode; |
14 | 13 | import de.brightbyte.wikiword.model.TermReference; |
15 | 14 | import de.brightbyte.wikiword.model.WikiWordConcept; |
16 | 15 | |
17 | | -public class ForwardPopularityDisambiguator<T extends TermReference, C extends WikiWordConcept> extends PopularityDisambiguator<T, C> { |
| 16 | +public class ForwardPopularityDisambiguator<C extends WikiWordConcept> extends PopularityDisambiguator<C> { |
18 | 17 | |
19 | 18 | public ForwardPopularityDisambiguator(MeaningFetcher<? extends C> meaningFetcher, |
20 | 19 | int cacheCapacity) { |
— | — | @@ -26,13 +25,13 @@ |
27 | 26 | } |
28 | 27 | |
29 | 28 | @Override |
30 | | - public <X extends T> Disambiguator.Disambiguation<X, C> disambiguate(List<X> sequence, Map<X, List<? extends C>> meanings, Collection<? extends C> context) { |
| 29 | + public <X extends TermReference> Disambiguator.Disambiguation<X, C> doDisambiguate(List<X> sequence, Map<X, List<? extends C>> meanings, Collection<? extends C> context) { |
31 | 30 | PhraseNode<X> root = new TermListNode<X>( sequence, 0 ); |
32 | | - return disambiguate(root, meanings, context); |
| 31 | + return doDisambiguate(root, meanings, context); |
33 | 32 | } |
34 | 33 | |
35 | 34 | @Override |
36 | | - public <X extends T> Disambiguator.Disambiguation<X, C> disambiguate(PhraseNode<X> root, Map<X, List<? extends C>> meanings, Collection<? extends C> context) { |
| 35 | + public <X extends TermReference> Disambiguator.Disambiguation<X, C> doDisambiguate(PhraseNode<X> root, Map<X, List<? extends C>> meanings, Collection<? extends C> context) { |
37 | 36 | Map<X, C> disambig = new HashMap<X, C>(); |
38 | 37 | List<X> sequence = new ArrayList<X>(); |
39 | 38 | |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/AbstractDisambiguator.java |
— | — | @@ -5,7 +5,6 @@ |
6 | 6 | import java.util.Collections; |
7 | 7 | import java.util.Iterator; |
8 | 8 | import java.util.List; |
9 | | -import java.util.ListIterator; |
10 | 9 | import java.util.Map; |
11 | 10 | |
12 | 11 | import de.brightbyte.io.Output; |
— | — | @@ -15,7 +14,7 @@ |
16 | 15 | import de.brightbyte.wikiword.model.TermReference; |
17 | 16 | import de.brightbyte.wikiword.model.WikiWordConcept; |
18 | 17 | |
19 | | -public abstract class AbstractDisambiguator<T extends TermReference, C extends WikiWordConcept> implements Disambiguator<T, C> { |
| 18 | +public abstract class AbstractDisambiguator<C extends WikiWordConcept> implements Disambiguator<C> { |
20 | 19 | |
21 | 20 | private MeaningFetcher<? extends C> meaningFetcher; |
22 | 21 | |
— | — | @@ -38,13 +37,13 @@ |
39 | 38 | this.meaningOverrides = overrideMap; |
40 | 39 | } |
41 | 40 | |
42 | | - protected <X extends T>PhraseNode<X> getLastNode(PhraseNode<X> root, List<X> sequence) { |
| 41 | + protected <X extends TermReference>PhraseNode<X> getLastNode(PhraseNode<X> root, List<X> sequence) { |
43 | 42 | PhraseNode<X> n = findLastNode(root, sequence); |
44 | 43 | if (n==null) throw new IllegalArgumentException("sequence does not match node structure: "+sequence); |
45 | 44 | return n; |
46 | 45 | } |
47 | 46 | |
48 | | - private <X extends T>PhraseNode<X> findLastNode(PhraseNode<X> root, List<X> sequence) { |
| 47 | + private <X extends TermReference>PhraseNode<X> findLastNode(PhraseNode<X> root, List<X> sequence) { |
49 | 48 | if (root.getTermReference().getTerm().length()>0) { |
50 | 49 | X t = sequence.get(0); |
51 | 50 | if (!t.getTerm().equals(root.getTermReference().getTerm())) return null; |
— | — | @@ -74,19 +73,19 @@ |
75 | 74 | return root; |
76 | 75 | } |
77 | 76 | |
78 | | - protected <X extends T>Collection<X> getTerms(PhraseNode<X> root, int depth) { |
| 77 | + protected <X extends TermReference>Collection<X> getTerms(PhraseNode<X> root, int depth) { |
79 | 78 | PhraseNode.TermSetBuilder<X> builder = new PhraseNode.TermSetBuilder<X>(); |
80 | 79 | builder.walk(root, 0, null, depth, Double.MAX_VALUE); |
81 | 80 | return builder.getTerms(); |
82 | 81 | } |
83 | 82 | |
84 | | - protected <X extends T>Collection<List<X>> getSequences(PhraseNode<X> root, int depth) { |
| 83 | + protected <X extends TermReference>Collection<List<X>> getSequences(PhraseNode<X> root, int depth) { |
85 | 84 | PhraseNode.SequenceSetBuilder<X> builder = new PhraseNode.SequenceSetBuilder<X>(); |
86 | 85 | builder.walk(root, 0, null, depth, Double.MAX_VALUE); |
87 | 86 | return builder.getSequences(); |
88 | 87 | } |
89 | 88 | |
90 | | - protected <X extends T>void pruneMeaninglessSequences(Collection<List<X>> sequences, Map<X, List<? extends C>> meanings) { |
| 89 | + protected <X extends TermReference>void pruneMeaninglessSequences(Collection<List<X>> sequences, Map<X, List<? extends C>> meanings) { |
91 | 90 | Iterator<List<X>> it = sequences.iterator(); |
92 | 91 | outer: while ( it.hasNext() ) { |
93 | 92 | List<X> seq = it.next(); |
— | — | @@ -101,22 +100,27 @@ |
102 | 101 | } |
103 | 102 | } |
104 | 103 | |
105 | | - protected <X extends T>Map<X, List<? extends C>> getMeanings(PhraseNode<X> root) throws PersistenceException { |
| 104 | + protected <X extends TermReference>Map<X, List<? extends C>> getMeanings(PhraseNode<X> root, Map<X, C> known) throws PersistenceException { |
106 | 105 | Collection<X> terms = getTerms(root, Integer.MAX_VALUE); |
107 | | - return getMeanings(terms); |
| 106 | + return getMeanings(terms, known); |
108 | 107 | } |
109 | 108 | |
110 | | - protected <X extends T>Map<X, List<? extends C>> getMeanings(Collection<X> terms) throws PersistenceException { |
| 109 | + protected <X extends TermReference>Map<X, List<? extends C>> getMeanings(Collection<X> terms, Map<X, C> known) throws PersistenceException { |
111 | 110 | Collection<X> todo = terms; |
112 | 111 | |
113 | | - if (meaningOverrides!=null) { |
| 112 | + if (meaningOverrides!=null || known!=null) { |
114 | 113 | todo = new ArrayList<X>(); |
| 114 | + |
115 | 115 | for (X t: terms) { |
116 | | - if (!meaningOverrides.containsKey(t.getTerm())) todo.add(t); |
| 116 | + if ( ( meaningOverrides==null || !meaningOverrides.containsKey(t.getTerm()) ) |
| 117 | + && ( known == null || !known.containsKey(t.getTerm()) ) ) { |
| 118 | + todo.add(t); |
| 119 | + } |
117 | 120 | } |
118 | 121 | } |
119 | 122 | |
120 | | - Map<X, List<? extends C>> meanings = (Map<X, List<? extends C>>)(Object)meaningFetcher.getMeanings(todo); //FIXME: got confused by generics :( |
| 123 | + //FIXME: got confused by generics :( |
| 124 | + Map<X, List<? extends C>> meanings = (Map<X, List<? extends C>>)(Object)meaningFetcher.getMeanings(todo); |
121 | 125 | |
122 | 126 | if (meaningOverrides!=null && todo.size()!=terms.size()) { |
123 | 127 | for (X t: terms) { |
— | — | @@ -125,22 +129,29 @@ |
126 | 130 | } |
127 | 131 | } |
128 | 132 | |
| 133 | + if (known!=null && todo.size()!=terms.size()) { |
| 134 | + for (X t: terms) { |
| 135 | + C c = known.get(t.getTerm()); |
| 136 | + if (c!=null) meanings.put(t, Collections.singletonList(c)); |
| 137 | + } |
| 138 | + } |
| 139 | + |
129 | 140 | return meanings; |
130 | 141 | } |
131 | 142 | |
132 | | - public <X extends T>Disambiguation<X, C> disambiguate(List<X> terms, Collection<? extends C> context) throws PersistenceException { |
| 143 | + public <X extends TermReference>Disambiguation<X, C> disambiguate(List<X> terms, Map<X, C> known, Collection<? extends C> context) throws PersistenceException { |
133 | 144 | PhraseNode<X> root = new TermListNode<X>(terms, 0); |
134 | | - Map<X, List<? extends C>> meanings = getMeanings(terms); |
135 | | - return disambiguate(root, meanings, context); |
| 145 | + Map<X, List<? extends C>> meanings = getMeanings(terms, known); |
| 146 | + return doDisambiguate(root, meanings, context); |
136 | 147 | } |
137 | 148 | |
138 | | - public <X extends T>Disambiguation<X, C> disambiguate(PhraseNode<X> root, Collection<? extends C> context) throws PersistenceException { |
| 149 | + public <X extends TermReference>Disambiguation<X, C> disambiguate(PhraseNode<X> root, Map<X, C> known, Collection<? extends C> context) throws PersistenceException { |
139 | 150 | Collection<X> terms = getTerms(root, Integer.MAX_VALUE); |
140 | | - Map<X, List<? extends C>> meanings = getMeanings(terms); |
141 | | - return disambiguate(root, meanings, context); |
| 151 | + Map<X, List<? extends C>> meanings = getMeanings(terms, known); |
| 152 | + return doDisambiguate(root, meanings, context); |
142 | 153 | } |
143 | | - |
144 | | - public abstract <X extends T>Disambiguation<X, C> disambiguate(PhraseNode<X> root, Map<X, List<? extends C>> meanings, Collection<? extends C> context) throws PersistenceException; |
| 154 | + |
| 155 | + public abstract <X extends TermReference>Disambiguation<X, C> doDisambiguate(PhraseNode<X> root, Map<X, List<? extends C>> meanings, Collection<? extends C> context) throws PersistenceException; |
145 | 156 | |
146 | 157 | public Output getTrace() { |
147 | 158 | return trace; |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java |
— | — | @@ -18,7 +18,7 @@ |
19 | 19 | import de.brightbyte.wikiword.model.TermReference; |
20 | 20 | import de.brightbyte.wikiword.model.WikiWordConcept; |
21 | 21 | |
22 | | -public class SlidingCoherenceDisambiguator<T extends TermReference, C extends WikiWordConcept> extends CoherenceDisambiguator<T, C> { |
| 22 | +public class SlidingCoherenceDisambiguator<C extends WikiWordConcept> extends CoherenceDisambiguator<C> { |
23 | 23 | |
24 | 24 | protected int window; |
25 | 25 | protected int initialWindow; |
— | — | @@ -34,7 +34,7 @@ |
35 | 35 | this.initialWindow = initialWindow; |
36 | 36 | } |
37 | 37 | |
38 | | - public <X extends T>Disambiguation<X, C> evalStep(List<X> baseSequence, Map<X, C> interpretation, PhraseNode<X> node, |
| 38 | + public <X extends TermReference>Disambiguation<X, C> evalStep(List<X> baseSequence, Map<X, C> interpretation, PhraseNode<X> node, |
39 | 39 | Map<X, List<? extends C>> meanings, Collection<? extends C> context, |
40 | 40 | LabeledMatrix<C, C> similarities, FeatureFetcher<C, Integer> features) throws PersistenceException { |
41 | 41 | X term = node.getTermReference(); |
— | — | @@ -51,7 +51,7 @@ |
52 | 52 | Disambiguation<X, C> r ; |
53 | 53 | |
54 | 54 | if (to-from < 2) { |
55 | | - r = popularityDisambiguator.disambiguate(frame, meanings, context); |
| 55 | + r = popularityDisambiguator.doDisambiguate(frame, meanings, context); |
56 | 56 | } else { |
57 | 57 | Collection<Disambiguator.Interpretation<X, C>> interpretations = getInterpretations(frame, interpretation, meanings); |
58 | 58 | r = getBestInterpretation(node, meanings, context, interpretations, similarities, features); |
— | — | @@ -63,7 +63,7 @@ |
64 | 64 | /* (non-Javadoc) |
65 | 65 | * @see de.brightbyte.wikiword.disambig.Disambiguator#disambiguate(java.util.List) |
66 | 66 | */ |
67 | | - public <X extends T>CoherenceDisambiguation<X, C> disambiguate(PhraseNode<X> root, Map<X, List<? extends C>> meanings, Collection<? extends C> context) throws PersistenceException { |
| 67 | + public <X extends TermReference>CoherenceDisambiguation<X, C> doDisambiguate(PhraseNode<X> root, Map<X, List<? extends C>> meanings, Collection<? extends C> context) throws PersistenceException { |
68 | 68 | if (meanings.isEmpty()) return new CoherenceDisambiguation<X, C>(Collections.<X, C>emptyMap(), Collections.<X>emptyList(), Collections.<Integer, ConceptFeatures<C, Integer>>emptyMap(), ConceptFeatures.newIntFeaturVector(1), 0.0, "no terms or meanings"); |
69 | 69 | |
70 | 70 | int sz = meanings.size(); |
— | — | @@ -75,7 +75,7 @@ |
76 | 76 | FeatureFetcher<C, Integer> features = getFeatureCache(meanings, context); |
77 | 77 | |
78 | 78 | if (window < 2 || sz<2) { |
79 | | - Disambiguation<X, C> r = popularityDisambiguator.disambiguate(root, meanings, context); |
| 79 | + Disambiguation<X, C> r = popularityDisambiguator.doDisambiguate(root, meanings, context); |
80 | 80 | return getScore(r.getInterpretation(), context, similarities, features); |
81 | 81 | } |
82 | 82 | |
— | — | @@ -83,7 +83,7 @@ |
84 | 84 | if (context!=null) sz += context.size(); |
85 | 85 | |
86 | 86 | if (sz<2) { |
87 | | - Disambiguation<X, C> r = popularityDisambiguator.disambiguate(root, meanings, context); |
| 87 | + Disambiguation<X, C> r = popularityDisambiguator.doDisambiguate(root, meanings, context); |
88 | 88 | return getScore(r.getInterpretation(), context, similarities, features); |
89 | 89 | } |
90 | 90 | |
— | — | @@ -134,7 +134,7 @@ |
135 | 135 | return getScore(new Disambiguator.Interpretation<X, C>(disambig, sequence), context, similarities, features); //FIXME: this is unnecessarily expensive, we usually don't need the scores this calculates. |
136 | 136 | } |
137 | 137 | |
138 | | - protected <X extends T>Collection<Disambiguator.Interpretation<X, C>> getInterpretations(List<X> frame, Map<X, ? extends C> known, Map<? extends T, List<? extends C>> meanings) { |
| 138 | + protected <X extends TermReference>Collection<Disambiguator.Interpretation<X, C>> getInterpretations(List<X> frame, Map<X, ? extends C> known, Map<X, List<? extends C>> meanings) { |
139 | 139 | //strip out all terms with no known meaning |
140 | 140 | if (meanings.keySet().size() != frame.size()) { |
141 | 141 | List<X> t = new ArrayList<X>(frame.size()); |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/PopularityDisambiguator.java |
— | — | @@ -13,7 +13,7 @@ |
14 | 14 | import de.brightbyte.wikiword.model.TermReference; |
15 | 15 | import de.brightbyte.wikiword.model.WikiWordConcept; |
16 | 16 | |
17 | | -public class PopularityDisambiguator<T extends TermReference, C extends WikiWordConcept> extends AbstractDisambiguator<T, C> { |
| 17 | +public class PopularityDisambiguator<C extends WikiWordConcept> extends AbstractDisambiguator<C> { |
18 | 18 | |
19 | 19 | protected Measure<? super C> popularityMeasure; |
20 | 20 | |
— | — | @@ -58,18 +58,18 @@ |
59 | 59 | this.weigthCombiner = weigthCombiner; |
60 | 60 | } |
61 | 61 | |
62 | | - public <X extends T>Disambiguation<X, C> disambiguate(PhraseNode<X> root, Map<X, List<? extends C>> meanings, Collection<? extends C> context) { |
| 62 | + public <X extends TermReference>Disambiguation<X, C> doDisambiguate(PhraseNode<X> root, Map<X, List<? extends C>> meanings, Collection<? extends C> context) { |
63 | 63 | Collection<List<X>> sequences = getSequences(root, Integer.MAX_VALUE); |
64 | 64 | return disambiguate(sequences, root, meanings, context); |
65 | 65 | } |
66 | 66 | |
67 | | - protected <X extends T>Disambiguation<X, C> disambiguate(Collection<List<X>> sequences, PhraseNode<X> root, Map<X, List<? extends C>> meanings, Collection<? extends C> context) { |
| 67 | + protected <X extends TermReference>Disambiguation<X, C> disambiguate(Collection<List<X>> sequences, PhraseNode<X> root, Map<X, List<? extends C>> meanings, Collection<? extends C> context) { |
68 | 68 | Disambiguation<X, C> best = null; |
69 | 69 | |
70 | 70 | pruneMeaninglessSequences( sequences, meanings ); |
71 | 71 | |
72 | 72 | for (List<X> sequence: sequences) { |
73 | | - Disambiguation<X, C> r = disambiguate(sequence, meanings, context); |
| 73 | + Disambiguation<X, C> r = doDisambiguate(sequence, meanings, context); |
74 | 74 | trace(r.toString()); |
75 | 75 | if (best == null || best.getScore() < r.getScore()) { |
76 | 76 | best = r; |
— | — | @@ -80,7 +80,7 @@ |
81 | 81 | return best; |
82 | 82 | } |
83 | 83 | |
84 | | - protected <X extends T> C getBestMeaning(X term, Map<X, List<? extends C>> meanings, Measure<? super C> measure) { |
| 84 | + protected <X extends TermReference> C getBestMeaning(X term, Map<X, List<? extends C>> meanings, Measure<? super C> measure) { |
85 | 85 | List<? extends C> m = meanings.get(term); |
86 | 86 | if (m==null || m.size()==0) return null; |
87 | 87 | |
— | — | @@ -99,7 +99,7 @@ |
100 | 100 | return c; |
101 | 101 | } |
102 | 102 | |
103 | | - public <X extends T>Disambiguation<X, C> disambiguate(List<X> sequence, Map<X, List<? extends C>> meanings, Collection<? extends C> context) { |
| 103 | + public <X extends TermReference>Disambiguation<X, C> doDisambiguate(List<X> sequence, Map<X, List<? extends C>> meanings, Collection<? extends C> context) { |
104 | 104 | if (sequence.isEmpty() || meanings.isEmpty()) return new Disambiguator.Disambiguation<X, C>(Collections.<X, C>emptyMap(), Collections.<X>emptyList(), 0.0, "no terms or meanings"); |
105 | 105 | |
106 | 106 | Map<X, C> disambig = new HashMap<X, C>(); |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/Disambiguator.java |
— | — | @@ -14,7 +14,7 @@ |
15 | 15 | import de.brightbyte.wikiword.model.TermReference; |
16 | 16 | import de.brightbyte.wikiword.model.WikiWordConcept; |
17 | 17 | |
18 | | -public interface Disambiguator<T extends TermReference, C extends WikiWordConcept> { |
| 18 | +public interface Disambiguator<C extends WikiWordConcept> { |
19 | 19 | |
20 | 20 | public static class Interpretation<T extends TermReference, C extends WikiWordConcept> { |
21 | 21 | private final Map<T, C> meanings; |
— | — | @@ -166,8 +166,8 @@ |
167 | 167 | |
168 | 168 | public void setTrace(Output trace); |
169 | 169 | |
170 | | - public <X extends T>Disambiguation<X, C> disambiguate(List<X> terms, Collection<? extends C> context) throws PersistenceException; |
171 | | - public <X extends T>Disambiguation<X, C> disambiguate(PhraseNode<X> root, Collection<? extends C> context) throws PersistenceException; |
| 170 | + public <X extends TermReference>Disambiguation<X, C> disambiguate(List<X> terms, Map<X, C> known, Collection<? extends C> context) throws PersistenceException; |
| 171 | + public <X extends TermReference>Disambiguation<X, C> disambiguate(PhraseNode<X> root, Map<X, C> known, Collection<? extends C> context) throws PersistenceException; |
172 | 172 | |
173 | 173 | public boolean exploresAllSequences(); |
174 | 174 | |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/query/QueryConsole.java |
— | — | @@ -560,7 +560,7 @@ |
561 | 561 | terms.add(t2); |
562 | 562 | |
563 | 563 | CoherenceDisambiguation<Term, WikiWordConcept> r = |
564 | | - (CoherenceDisambiguation<Term, WikiWordConcept>) getDisambiguator().disambiguate(terms, null); |
| 564 | + (CoherenceDisambiguation<Term, WikiWordConcept>) getDisambiguator().disambiguate(terms, null, null); |
565 | 565 | |
566 | 566 | WikiWordConcept concept1 = (WikiWordConcept)r.getMeanings().get(t1); |
567 | 567 | WikiWordConcept concept2 = (WikiWordConcept)r.getMeanings().get(t2); |
— | — | @@ -581,7 +581,7 @@ |
582 | 582 | } |
583 | 583 | |
584 | 584 | public void showDisambiguation(PhraseNode<? extends TermReference> root, ConsoleOutput out) throws PersistenceException { |
585 | | - Disambiguator.Disambiguation r = getDisambiguator().disambiguate(root, null); |
| 585 | + Disambiguator.Disambiguation r = getDisambiguator().disambiguate(root, null, null); |
586 | 586 | out.writeInterpretation(r.getMeanings()); |
587 | 587 | } |
588 | 588 | |