Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java |
— | — | @@ -12,7 +12,6 @@ |
13 | 13 | |
14 | 14 | import de.brightbyte.data.Functor; |
15 | 15 | import de.brightbyte.data.Functor2; |
16 | | -import de.brightbyte.data.Functors; |
17 | 16 | import de.brightbyte.data.LabeledMatrix; |
18 | 17 | import de.brightbyte.data.LabeledVector; |
19 | 18 | import de.brightbyte.data.MapLabeledMatrix; |
— | — | @@ -32,46 +31,44 @@ |
33 | 32 | protected int minPopularity = 2; //FIXME: use complex cutoff specifier! |
34 | 33 | protected int maxMeanings = 8; //FIXME: magic... |
35 | 34 | |
36 | | - protected double minScore = 0.1; //FIXME: magic number. should "somehow" match popularityFactor and similarityFactor |
37 | | - protected double popularityBias = 0.2; //FIXME: magic number. should "somehow" match popularityFactor and similarityFactor |
| 35 | + protected double minScore = 0.1; //FIXME: magic number. should "somehow" match popularityNormalizer and similarityNormalizer |
| 36 | + //protected double popularityBias = 0.2; //FIXME: magic number. should "somehow" match popularityNormalizer and similarityNormalizer |
| 37 | + //protected double weightBias = 0.5; //FIXME: magic number. should "somehow" match popularityNormalizer |
38 | 38 | |
39 | 39 | protected FeatureCache.Manager<LocalConcept, Integer> featureCacheManager; |
40 | 40 | |
41 | 41 | protected Similarity<LabeledVector<Integer>> similarityMeasure; |
42 | 42 | protected Measure<WikiWordConcept> popularityMeasure; |
43 | | - protected Functor2<? extends Number, Number, Number> weightCombiner; |
44 | 43 | protected PopularityDisambiguator popularityDisambiguator; |
45 | 44 | protected Comparator<LocalConcept> popularityComparator; |
46 | 45 | |
47 | | - private Functor.Double popularityFactor = new Functor.Double() { //NOTE: must map [0:inf] to [0:1] and grow monotonously |
48 | | - |
| 46 | + private Functor.Double popularityNormalizer = new Functor.Double() { //NOTE: must map [0:inf] to [0:1] and grow monotonously |
49 | 47 | public double apply(double pop) { |
50 | 48 | return 1 - 1/(Math.sqrt(Math.log(pop))+1); //XXX: black voodoo magic ad hoc formula with no deeper meaing. |
51 | 49 | } |
| 50 | + }; |
52 | 51 | |
| 52 | + private Functor.Double weightNormalizer = new Functor.Double() { //NOTE: must map [0:inf] to [0:1] and grow monotonously |
| 53 | + public double apply(double pop) { |
| 54 | + return 1 - 1/(Math.sqrt(Math.log(pop))+1); //XXX: black voodoo magic ad hoc formula with no deeper meaing. |
| 55 | + } |
53 | 56 | }; |
54 | 57 | |
55 | | - private Functor.Double similarityFactor = new Functor.Double() { //NOTE: must map [0:1] to [0:1] and grow monotonously |
| 58 | + private Functor.Double similarityNormalizer = new Functor.Double() { //NOTE: must map [0:1] to [0:1] and grow monotonously |
56 | 59 | public double apply(double sim) { |
57 | 60 | return Math.sqrt(Math.sqrt(sim)); //XXX: black voodoo magic ad hoc formula with no deeper meaing. |
58 | 61 | } |
59 | 62 | }; |
60 | 63 | |
61 | | - private Functor2.Double scoreCombiner = new Functor2.Double() { //NOTE: must map ([0:1][0:1]) to [0:1] and grow monotonously over both params. |
62 | | - |
63 | | - public double apply(double popf, double simf) { |
64 | | - return popf * popularityBias + simf * ( 1 - popularityBias ); //linear combination |
65 | | - //return = Math.sqrt( popf * simf ); //normalized produkt |
66 | | - } |
| 64 | + protected Functor2.Double scoreCombiner = new LinearCombiner(0.8); |
| 65 | + protected Functor2.Double weightCombiner = new LinearCombiner(0.5); |
67 | 66 | |
68 | | - }; |
69 | | - |
70 | 67 | public CoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, Integer> featureFetcher, boolean featuresAreNormalized) { |
71 | | - this(meaningFetcher, featureFetcher, WikiWordConcept.theCardinality, Functors.Double.product2, |
| 68 | + this(meaningFetcher, featureFetcher, WikiWordConcept.theCardinality, |
72 | 69 | featuresAreNormalized ? ScalarVectorSimilarity.<Integer>getInstance() : CosineVectorSimilarity.<Integer>getInstance()); //if pre-normalized, use scalar to calc cosin |
73 | 70 | } |
74 | 71 | |
75 | | - public CoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, Integer> featureFetcher, Measure<WikiWordConcept> popularityMeasure, Functor2<? extends Number, Number, Number> weightCombiner, Similarity<LabeledVector<Integer>> sim) { |
| 72 | + public CoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, Integer> featureFetcher, Measure<WikiWordConcept> popularityMeasure, Similarity<LabeledVector<Integer>> sim) { |
76 | 73 | super(meaningFetcher); |
77 | 74 | |
78 | 75 | if (popularityMeasure==null) throw new NullPointerException(); |
— | — | @@ -79,19 +76,18 @@ |
80 | 77 | if (featureFetcher==null) throw new NullPointerException(); |
81 | 78 | |
82 | 79 | this.featureCacheManager = new FeatureCache.Manager<LocalConcept, Integer>(featureFetcher, 10); //TODO: depth |
83 | | - this.popularityDisambiguator = new PopularityDisambiguator(meaningFetcher, popularityMeasure, weightCombiner); |
| 80 | + this.popularityDisambiguator = new PopularityDisambiguator(meaningFetcher, popularityMeasure); |
84 | 81 | |
85 | 82 | this.setPopularityMeasure(popularityMeasure); |
86 | | - this.setWeightCombiner(weightCombiner); |
87 | 83 | this.setSimilarityMeasure(sim); |
88 | 84 | } |
89 | 85 | |
90 | | - public Functor.Double getPopularityFactor() { |
91 | | - return popularityFactor; |
| 86 | + public Functor.Double getPopularityNormalizer() { |
| 87 | + return popularityNormalizer; |
92 | 88 | } |
93 | 89 | |
94 | | - public void setPopularityFactor(Functor.Double popularityFactor) { |
95 | | - this.popularityFactor = popularityFactor; |
| 90 | + public void setPopularityNormalizer(Functor.Double popularityFactor) { |
| 91 | + this.popularityNormalizer = popularityFactor; |
96 | 92 | } |
97 | 93 | |
98 | 94 | public Measure<WikiWordConcept> getPopularityMeasure() { |
— | — | @@ -104,7 +100,7 @@ |
105 | 101 | this.popularityComparator = new Measure.Comparator<LocalConcept>(popularityMeasure, true); |
106 | 102 | } |
107 | 103 | |
108 | | - public void setWeightCombiner(Functor2<? extends Number, Number, Number> weightCombiner) { |
| 104 | + public void setWeightCombiner(Functor2.Double weightCombiner) { |
109 | 105 | this.weightCombiner = weightCombiner; |
110 | 106 | this.popularityDisambiguator.setWeightCombiner(weightCombiner); |
111 | 107 | } |
— | — | @@ -117,12 +113,12 @@ |
118 | 114 | this.scoreCombiner = scoreCombiner; |
119 | 115 | } |
120 | 116 | |
121 | | - public Functor.Double getSimilarityFactor() { |
122 | | - return similarityFactor; |
| 117 | + public Functor.Double getSimilarityNormalizer() { |
| 118 | + return similarityNormalizer; |
123 | 119 | } |
124 | 120 | |
125 | | - public void setSimilarityFactor(Functor.Double similarityFactor) { |
126 | | - this.similarityFactor = similarityFactor; |
| 121 | + public void setSimilarityNormalizer(Functor.Double similarityFactor) { |
| 122 | + this.similarityNormalizer = similarityFactor; |
127 | 123 | } |
128 | 124 | |
129 | 125 | public void setFeatureFetcher(FeatureFetcher<LocalConcept, Integer> featureFetcher) { |
— | — | @@ -139,14 +135,6 @@ |
140 | 136 | this.similarityMeasure = similarityMeasure; |
141 | 137 | } |
142 | 138 | |
143 | | - public double getPopularityBias() { |
144 | | - return popularityBias; |
145 | | - } |
146 | | - |
147 | | - public void setPopularityBias(double popularityBias) { |
148 | | - this.popularityBias = popularityBias; |
149 | | - } |
150 | | - |
151 | 139 | public int getMinPopularity() { |
152 | 140 | return minPopularity; |
153 | 141 | } |
— | — | @@ -382,8 +370,7 @@ |
383 | 371 | if (p<1) p= 1; |
384 | 372 | if (w<1) w= 1; |
385 | 373 | |
386 | | - pop += weightCombiner.apply(p, w).doubleValue(); |
387 | | - |
| 374 | + pop += p; |
388 | 375 | weight += w; |
389 | 376 | c ++; |
390 | 377 | } |
— | — | @@ -393,10 +380,12 @@ |
394 | 381 | pop = pop / c; //normalize |
395 | 382 | weight = weight / c; //normalize |
396 | 383 | |
397 | | - double popf = popularityFactor.apply(pop); |
398 | | - double simf = similarityFactor.apply(sim); |
| 384 | + double popf = popularityNormalizer.apply(pop); |
| 385 | + double simf = similarityNormalizer.apply(sim); |
| 386 | + double weightf = weightNormalizer.apply(weight); |
399 | 387 | |
400 | | - double score = scoreCombiner.apply(popf, simf); |
| 388 | + double score = weightCombiner.apply(weightf, popf); |
| 389 | + score = scoreCombiner.apply(score, simf); |
401 | 390 | |
402 | 391 | return new Result<X, LocalConcept>(interp.getMeanings(), interp.getSequence(), score, "simf="+simf+", popf="+popf+", sim="+sim+", pop="+pop+", weight="+weight); |
403 | 392 | } |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/AbstractDisambiguator.java |
— | — | @@ -86,10 +86,16 @@ |
87 | 87 | } |
88 | 88 | |
89 | 89 | protected <X extends T>PhraseNode<X> getLastNode(PhraseNode<X> root, List<X> sequence) { |
| 90 | + PhraseNode<X> n = findLastNode(root, sequence); |
| 91 | + if (n==null) throw new IllegalArgumentException("sequence does not match node structure: "+sequence); |
| 92 | + return n; |
| 93 | + } |
| 94 | + |
| 95 | + private <X extends T>PhraseNode<X> findLastNode(PhraseNode<X> root, List<X> sequence) { |
90 | 96 | terms: for (X t: sequence) { |
91 | 97 | Collection<? extends PhraseNode<X>> successors = root.getSuccessors(); |
92 | 98 | if (successors==null || successors.isEmpty()) |
93 | | - throw new IllegalArgumentException("sequence too long, no nodes left along this path."); |
| 99 | + return null; |
94 | 100 | |
95 | 101 | for (PhraseNode<X> n: successors) { |
96 | 102 | if (n.getTermReference().equals(t)) { |
— | — | @@ -98,7 +104,12 @@ |
99 | 105 | } |
100 | 106 | } |
101 | 107 | |
102 | | - throw new IllegalArgumentException("sequence does not match node structure; no node found matching "+t); |
| 108 | + for (PhraseNode<X> n: successors) { |
| 109 | + PhraseNode<X> m = findLastNode(n, sequence); |
| 110 | + if (m != null) return m; |
| 111 | + } |
| 112 | + |
| 113 | + return null; |
103 | 114 | } |
104 | 115 | |
105 | 116 | return root; |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java |
— | — | @@ -7,8 +7,6 @@ |
8 | 8 | import java.util.List; |
9 | 9 | import java.util.Map; |
10 | 10 | |
11 | | -import de.brightbyte.data.Functor2; |
12 | | -import de.brightbyte.data.Functors; |
13 | 11 | import de.brightbyte.data.LabeledMatrix; |
14 | 12 | import de.brightbyte.data.LabeledVector; |
15 | 13 | import de.brightbyte.data.MapLabeledMatrix; |
— | — | @@ -28,13 +26,13 @@ |
29 | 27 | protected int initialWindow; |
30 | 28 | |
31 | 29 | public SlidingCoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, Integer> featureFetcher, boolean featuresAreNormalized) { |
32 | | - this(meaningFetcher, featureFetcher, WikiWordConcept.theCardinality, Functors.Double.product2, |
| 30 | + this(meaningFetcher, featureFetcher, WikiWordConcept.theCardinality, |
33 | 31 | featuresAreNormalized ? ScalarVectorSimilarity.<Integer>getInstance() : CosineVectorSimilarity.<Integer>getInstance(), //if pre-normalized, use scalar to calc cosin |
34 | 32 | 5, 5); |
35 | 33 | } |
36 | 34 | |
37 | | - public SlidingCoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, Integer> featureFetcher, Measure<WikiWordConcept> popularityMeasure, Functor2<? extends Number, Number, Number> weightCombiner, Similarity<LabeledVector<Integer>> sim, int window, int initialWindow) { |
38 | | - super(meaningFetcher, featureFetcher, popularityMeasure, weightCombiner, sim); |
| 35 | + public SlidingCoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, Integer> featureFetcher, Measure<WikiWordConcept> popularityMeasure, Similarity<LabeledVector<Integer>> sim, int window, int initialWindow) { |
| 36 | + super(meaningFetcher, featureFetcher, popularityMeasure, sim); |
39 | 37 | |
40 | 38 | this.window = window; |
41 | 39 | this.initialWindow = initialWindow; |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/PopularityDisambiguator.java |
— | — | @@ -7,7 +7,6 @@ |
8 | 8 | import java.util.Map; |
9 | 9 | |
10 | 10 | import de.brightbyte.data.Functor2; |
11 | | -import de.brightbyte.data.Functors; |
12 | 11 | import de.brightbyte.data.measure.Measure; |
13 | 12 | import de.brightbyte.data.measure.Measure.Comparator; |
14 | 13 | import de.brightbyte.wikiword.model.LocalConcept; |
— | — | @@ -18,18 +17,18 @@ |
19 | 18 | public class PopularityDisambiguator extends AbstractDisambiguator<TermReference, LocalConcept> { |
20 | 19 | |
21 | 20 | protected Measure<WikiWordConcept> popularityMeasure; |
22 | | - protected Functor2<? extends Number, Number, Number> weigthCombiner; |
23 | 21 | protected Comparator<LocalConcept> popularityComparator; |
24 | 22 | |
| 23 | + protected Functor2.Double weigthCombiner = new LinearCombiner(0.5); |
| 24 | + |
25 | 25 | public PopularityDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher) { |
26 | | - this(meaningFetcher, WikiWordConcept.theCardinality, Functors.Double.product2); |
| 26 | + this(meaningFetcher, WikiWordConcept.theCardinality); |
27 | 27 | } |
28 | 28 | |
29 | | - public PopularityDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, Measure<WikiWordConcept> popularityMeasure, Functor2<? extends Number, Number, Number> weightCombiner) { |
| 29 | + public PopularityDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, Measure<WikiWordConcept> popularityMeasure) { |
30 | 30 | super(meaningFetcher); |
31 | 31 | |
32 | 32 | this.setPopularityMeasure(popularityMeasure); |
33 | | - this.setWeightCombiner(weightCombiner); |
34 | 33 | } |
35 | 34 | |
36 | 35 | public Measure<WikiWordConcept> getPopularityMeasure() { |
— | — | @@ -41,7 +40,7 @@ |
42 | 41 | this.popularityComparator = new Measure.Comparator<LocalConcept>(popularityMeasure, true); |
43 | 42 | } |
44 | 43 | |
45 | | - public void setWeightCombiner(Functor2<? extends Number, Number, Number> weightCombiner) { |
| 44 | + public void setWeightCombiner(Functor2.Double weightCombiner) { |
46 | 45 | this.weigthCombiner = weightCombiner; |
47 | 46 | } |
48 | 47 | |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/LinearCombiner.java |
— | — | @@ -0,0 +1,28 @@ |
| 2 | +/** |
| 3 | + * |
| 4 | + */ |
| 5 | +package de.brightbyte.wikiword.disambig; |
| 6 | + |
| 7 | +import de.brightbyte.data.Functor2; |
| 8 | + |
| 9 | +final class LinearCombiner implements Functor2.Double { |
| 10 | + |
| 11 | + private double bias; |
| 12 | + |
| 13 | + /** |
| 14 | + * @param disambiguator |
| 15 | + */ |
| 16 | + public LinearCombiner(double bias) { |
| 17 | + if (bias<0 || bias>1) throw new IllegalArgumentException("bias must be >=0 and <=1, found "+bias); |
| 18 | + this.bias = bias; |
| 19 | + } |
| 20 | + |
| 21 | + public LinearCombiner() { |
| 22 | + this(0.5); |
| 23 | + } |
| 24 | + |
| 25 | + public double apply(double a, double b) { |
| 26 | + return b * bias + b * ( 1 - bias ); |
| 27 | + //return = Math.sqrt( popf * simf ); //normalized produkt |
| 28 | + } |
| 29 | +} |
\ No newline at end of file |