r65638 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r65637‎ | r65638 | r65639 >
Date:21:51, 28 April 2010
Author:daniel
Status:deferred
Tags:
Comment:
revamped weightCombiner/weightNormalizer
Modified paths:
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/AbstractDisambiguator.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/LinearCombiner.java (added) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/PopularityDisambiguator.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java (modified) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java
@@ -12,7 +12,6 @@
1313
1414 import de.brightbyte.data.Functor;
1515 import de.brightbyte.data.Functor2;
16 -import de.brightbyte.data.Functors;
1716 import de.brightbyte.data.LabeledMatrix;
1817 import de.brightbyte.data.LabeledVector;
1918 import de.brightbyte.data.MapLabeledMatrix;
@@ -32,46 +31,44 @@
3332 protected int minPopularity = 2; //FIXME: use complex cutoff specifier!
3433 protected int maxMeanings = 8; //FIXME: magic...
3534
36 - protected double minScore = 0.1; //FIXME: magic number. should "somehow" match popularityFactor and similarityFactor
37 - protected double popularityBias = 0.2; //FIXME: magic number. should "somehow" match popularityFactor and similarityFactor
 35+ protected double minScore = 0.1; //FIXME: magic number. should "somehow" match popularityNormalizer and similarityNormalizer
 36+ //protected double popularityBias = 0.2; //FIXME: magic number. should "somehow" match popularityNormalizer and similarityNormalizer
 37+ //protected double weightBias = 0.5; //FIXME: magic number. should "somehow" match popularityNormalizer
3838
3939 protected FeatureCache.Manager<LocalConcept, Integer> featureCacheManager;
4040
4141 protected Similarity<LabeledVector<Integer>> similarityMeasure;
4242 protected Measure<WikiWordConcept> popularityMeasure;
43 - protected Functor2<? extends Number, Number, Number> weightCombiner;
4443 protected PopularityDisambiguator popularityDisambiguator;
4544 protected Comparator<LocalConcept> popularityComparator;
4645
47 - private Functor.Double popularityFactor = new Functor.Double() { //NOTE: must map [0:inf] to [0:1] and grow monotonously
48 -
 46+ private Functor.Double popularityNormalizer = new Functor.Double() { //NOTE: must map [0:inf] to [0:1] and grow monotonously
4947 public double apply(double pop) {
5048 return 1 - 1/(Math.sqrt(Math.log(pop))+1); //XXX: black voodoo magic ad hoc formula with no deeper meaing.
5149 }
 50+ };
5251
 52+ private Functor.Double weightNormalizer = new Functor.Double() { //NOTE: must map [0:inf] to [0:1] and grow monotonously
 53+ public double apply(double pop) {
 54+ return 1 - 1/(Math.sqrt(Math.log(pop))+1); //XXX: black voodoo magic ad hoc formula with no deeper meaing.
 55+ }
5356 };
5457
55 - private Functor.Double similarityFactor = new Functor.Double() { //NOTE: must map [0:1] to [0:1] and grow monotonously
 58+ private Functor.Double similarityNormalizer = new Functor.Double() { //NOTE: must map [0:1] to [0:1] and grow monotonously
5659 public double apply(double sim) {
5760 return Math.sqrt(Math.sqrt(sim)); //XXX: black voodoo magic ad hoc formula with no deeper meaing.
5861 }
5962 };
6063
61 - private Functor2.Double scoreCombiner = new Functor2.Double() { //NOTE: must map ([0:1][0:1]) to [0:1] and grow monotonously over both params.
62 -
63 - public double apply(double popf, double simf) {
64 - return popf * popularityBias + simf * ( 1 - popularityBias ); //linear combination
65 - //return = Math.sqrt( popf * simf ); //normalized produkt
66 - }
 64+ protected Functor2.Double scoreCombiner = new LinearCombiner(0.8);
 65+ protected Functor2.Double weightCombiner = new LinearCombiner(0.5);
6766
68 - };
69 -
7067 public CoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, Integer> featureFetcher, boolean featuresAreNormalized) {
71 - this(meaningFetcher, featureFetcher, WikiWordConcept.theCardinality, Functors.Double.product2,
 68+ this(meaningFetcher, featureFetcher, WikiWordConcept.theCardinality,
7269 featuresAreNormalized ? ScalarVectorSimilarity.<Integer>getInstance() : CosineVectorSimilarity.<Integer>getInstance()); //if pre-normalized, use scalar to calc cosin
7370 }
7471
75 - public CoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, Integer> featureFetcher, Measure<WikiWordConcept> popularityMeasure, Functor2<? extends Number, Number, Number> weightCombiner, Similarity<LabeledVector<Integer>> sim) {
 72+ public CoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, Integer> featureFetcher, Measure<WikiWordConcept> popularityMeasure, Similarity<LabeledVector<Integer>> sim) {
7673 super(meaningFetcher);
7774
7875 if (popularityMeasure==null) throw new NullPointerException();
@@ -79,19 +76,18 @@
8077 if (featureFetcher==null) throw new NullPointerException();
8178
8279 this.featureCacheManager = new FeatureCache.Manager<LocalConcept, Integer>(featureFetcher, 10); //TODO: depth
83 - this.popularityDisambiguator = new PopularityDisambiguator(meaningFetcher, popularityMeasure, weightCombiner);
 80+ this.popularityDisambiguator = new PopularityDisambiguator(meaningFetcher, popularityMeasure);
8481
8582 this.setPopularityMeasure(popularityMeasure);
86 - this.setWeightCombiner(weightCombiner);
8783 this.setSimilarityMeasure(sim);
8884 }
8985
90 - public Functor.Double getPopularityFactor() {
91 - return popularityFactor;
 86+ public Functor.Double getPopularityNormalizer() {
 87+ return popularityNormalizer;
9288 }
9389
94 - public void setPopularityFactor(Functor.Double popularityFactor) {
95 - this.popularityFactor = popularityFactor;
 90+ public void setPopularityNormalizer(Functor.Double popularityFactor) {
 91+ this.popularityNormalizer = popularityFactor;
9692 }
9793
9894 public Measure<WikiWordConcept> getPopularityMeasure() {
@@ -104,7 +100,7 @@
105101 this.popularityComparator = new Measure.Comparator<LocalConcept>(popularityMeasure, true);
106102 }
107103
108 - public void setWeightCombiner(Functor2<? extends Number, Number, Number> weightCombiner) {
 104+ public void setWeightCombiner(Functor2.Double weightCombiner) {
109105 this.weightCombiner = weightCombiner;
110106 this.popularityDisambiguator.setWeightCombiner(weightCombiner);
111107 }
@@ -117,12 +113,12 @@
118114 this.scoreCombiner = scoreCombiner;
119115 }
120116
121 - public Functor.Double getSimilarityFactor() {
122 - return similarityFactor;
 117+ public Functor.Double getSimilarityNormalizer() {
 118+ return similarityNormalizer;
123119 }
124120
125 - public void setSimilarityFactor(Functor.Double similarityFactor) {
126 - this.similarityFactor = similarityFactor;
 121+ public void setSimilarityNormalizer(Functor.Double similarityFactor) {
 122+ this.similarityNormalizer = similarityFactor;
127123 }
128124
129125 public void setFeatureFetcher(FeatureFetcher<LocalConcept, Integer> featureFetcher) {
@@ -139,14 +135,6 @@
140136 this.similarityMeasure = similarityMeasure;
141137 }
142138
143 - public double getPopularityBias() {
144 - return popularityBias;
145 - }
146 -
147 - public void setPopularityBias(double popularityBias) {
148 - this.popularityBias = popularityBias;
149 - }
150 -
151139 public int getMinPopularity() {
152140 return minPopularity;
153141 }
@@ -382,8 +370,7 @@
383371 if (p<1) p= 1;
384372 if (w<1) w= 1;
385373
386 - pop += weightCombiner.apply(p, w).doubleValue();
387 -
 374+ pop += p;
388375 weight += w;
389376 c ++;
390377 }
@@ -393,10 +380,12 @@
394381 pop = pop / c; //normalize
395382 weight = weight / c; //normalize
396383
397 - double popf = popularityFactor.apply(pop);
398 - double simf = similarityFactor.apply(sim);
 384+ double popf = popularityNormalizer.apply(pop);
 385+ double simf = similarityNormalizer.apply(sim);
 386+ double weightf = weightNormalizer.apply(weight);
399387
400 - double score = scoreCombiner.apply(popf, simf);
 388+ double score = weightCombiner.apply(weightf, popf);
 389+ score = scoreCombiner.apply(score, simf);
401390
402391 return new Result<X, LocalConcept>(interp.getMeanings(), interp.getSequence(), score, "simf="+simf+", popf="+popf+", sim="+sim+", pop="+pop+", weight="+weight);
403392 }
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/AbstractDisambiguator.java
@@ -86,10 +86,16 @@
8787 }
8888
8989 protected <X extends T>PhraseNode<X> getLastNode(PhraseNode<X> root, List<X> sequence) {
 90+ PhraseNode<X> n = findLastNode(root, sequence);
 91+ if (n==null) throw new IllegalArgumentException("sequence does not match node structure: "+sequence);
 92+ return n;
 93+ }
 94+
 95+ private <X extends T>PhraseNode<X> findLastNode(PhraseNode<X> root, List<X> sequence) {
9096 terms: for (X t: sequence) {
9197 Collection<? extends PhraseNode<X>> successors = root.getSuccessors();
9298 if (successors==null || successors.isEmpty())
93 - throw new IllegalArgumentException("sequence too long, no nodes left along this path.");
 99+ return null;
94100
95101 for (PhraseNode<X> n: successors) {
96102 if (n.getTermReference().equals(t)) {
@@ -98,7 +104,12 @@
99105 }
100106 }
101107
102 - throw new IllegalArgumentException("sequence does not match node structure; no node found matching "+t);
 108+ for (PhraseNode<X> n: successors) {
 109+ PhraseNode<X> m = findLastNode(n, sequence);
 110+ if (m != null) return m;
 111+ }
 112+
 113+ return null;
103114 }
104115
105116 return root;
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java
@@ -7,8 +7,6 @@
88 import java.util.List;
99 import java.util.Map;
1010
11 -import de.brightbyte.data.Functor2;
12 -import de.brightbyte.data.Functors;
1311 import de.brightbyte.data.LabeledMatrix;
1412 import de.brightbyte.data.LabeledVector;
1513 import de.brightbyte.data.MapLabeledMatrix;
@@ -28,13 +26,13 @@
2927 protected int initialWindow;
3028
3129 public SlidingCoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, Integer> featureFetcher, boolean featuresAreNormalized) {
32 - this(meaningFetcher, featureFetcher, WikiWordConcept.theCardinality, Functors.Double.product2,
 30+ this(meaningFetcher, featureFetcher, WikiWordConcept.theCardinality,
3331 featuresAreNormalized ? ScalarVectorSimilarity.<Integer>getInstance() : CosineVectorSimilarity.<Integer>getInstance(), //if pre-normalized, use scalar to calc cosin
3432 5, 5);
3533 }
3634
37 - public SlidingCoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, Integer> featureFetcher, Measure<WikiWordConcept> popularityMeasure, Functor2<? extends Number, Number, Number> weightCombiner, Similarity<LabeledVector<Integer>> sim, int window, int initialWindow) {
38 - super(meaningFetcher, featureFetcher, popularityMeasure, weightCombiner, sim);
 35+ public SlidingCoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, Integer> featureFetcher, Measure<WikiWordConcept> popularityMeasure, Similarity<LabeledVector<Integer>> sim, int window, int initialWindow) {
 36+ super(meaningFetcher, featureFetcher, popularityMeasure, sim);
3937
4038 this.window = window;
4139 this.initialWindow = initialWindow;
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/PopularityDisambiguator.java
@@ -7,7 +7,6 @@
88 import java.util.Map;
99
1010 import de.brightbyte.data.Functor2;
11 -import de.brightbyte.data.Functors;
1211 import de.brightbyte.data.measure.Measure;
1312 import de.brightbyte.data.measure.Measure.Comparator;
1413 import de.brightbyte.wikiword.model.LocalConcept;
@@ -18,18 +17,18 @@
1918 public class PopularityDisambiguator extends AbstractDisambiguator<TermReference, LocalConcept> {
2019
2120 protected Measure<WikiWordConcept> popularityMeasure;
22 - protected Functor2<? extends Number, Number, Number> weigthCombiner;
2321 protected Comparator<LocalConcept> popularityComparator;
2422
 23+ protected Functor2.Double weigthCombiner = new LinearCombiner(0.5);
 24+
2525 public PopularityDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher) {
26 - this(meaningFetcher, WikiWordConcept.theCardinality, Functors.Double.product2);
 26+ this(meaningFetcher, WikiWordConcept.theCardinality);
2727 }
2828
29 - public PopularityDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, Measure<WikiWordConcept> popularityMeasure, Functor2<? extends Number, Number, Number> weightCombiner) {
 29+ public PopularityDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, Measure<WikiWordConcept> popularityMeasure) {
3030 super(meaningFetcher);
3131
3232 this.setPopularityMeasure(popularityMeasure);
33 - this.setWeightCombiner(weightCombiner);
3433 }
3534
3635 public Measure<WikiWordConcept> getPopularityMeasure() {
@@ -41,7 +40,7 @@
4241 this.popularityComparator = new Measure.Comparator<LocalConcept>(popularityMeasure, true);
4342 }
4443
45 - public void setWeightCombiner(Functor2<? extends Number, Number, Number> weightCombiner) {
 44+ public void setWeightCombiner(Functor2.Double weightCombiner) {
4645 this.weigthCombiner = weightCombiner;
4746 }
4847
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/LinearCombiner.java
@@ -0,0 +1,28 @@
 2+/**
 3+ *
 4+ */
 5+package de.brightbyte.wikiword.disambig;
 6+
 7+import de.brightbyte.data.Functor2;
 8+
 9+final class LinearCombiner implements Functor2.Double {
 10+
 11+ private double bias;
 12+
 13+ /**
 14+ * @param disambiguator
 15+ */
 16+ public LinearCombiner(double bias) {
 17+ if (bias<0 || bias>1) throw new IllegalArgumentException("bias must be >=0 and <=1, found "+bias);
 18+ this.bias = bias;
 19+ }
 20+
 21+ public LinearCombiner() {
 22+ this(0.5);
 23+ }
 24+
 25+ public double apply(double a, double b) {
 26+ return b * bias + b * ( 1 - bias );
 27+ //return = Math.sqrt( popf * simf ); //normalized produkt
 28+ }
 29+}
\ No newline at end of file

Status & tagging log