r65980 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r65979‎ | r65980 | r65981 >
Date:08:36, 6 May 2010
Author:daniel
Status:deferred
Tags:
Comment:
disambig testing and debugging (popularity disambig)
Modified paths:
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/LinearBooster.java (added) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/PopularityDisambiguator.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/ProductCombiner.java (added) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/ProductSqrtCombiner.java (added) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SquareBooster.java (added) (history)
  • /trunk/WikiWord/WikiWord/src/test/java/de/brightbyte/wikiword/disambig/PopularityDisambiguatorTest.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/test/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguatorTest-meanings.csv (modified) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWord/src/test/java/de/brightbyte/wikiword/disambig/PopularityDisambiguatorTest.java
@@ -7,6 +7,8 @@
88 import java.util.List;
99 import java.util.Map;
1010
 11+import de.brightbyte.io.ConsoleIO;
 12+import de.brightbyte.io.Output;
1113 import de.brightbyte.util.PersistenceException;
1214 import de.brightbyte.wikiword.disambig.Disambiguator.Result;
1315 import de.brightbyte.wikiword.model.LocalConcept;
@@ -16,6 +18,8 @@
1719
1820 public class PopularityDisambiguatorTest extends DisambiguatorTestBase {
1921
 22+ private Output traceOut = ConsoleIO.output;
 23+
2024 public PopularityDisambiguatorTest() throws IOException, PersistenceException {
2125 super();
2226 }
@@ -121,14 +125,15 @@
122126
123127 assertEquals(uk.getTerm(), getConcept("United_Kingdom"), result.getMeanings().get(uk));
124128 assertEquals(london.getTerm(), getConcept("City_of_London"), result.getMeanings().get(london));
125 - assertEquals(underground.getTerm(), getConcept("London_Underground"), result.getMeanings().get(underground));
 129+ assertEquals(underground.getTerm(), getConcept("Subway"), result.getMeanings().get(underground));
126130 }
127131
128132 public void testDisambiguateNode() throws PersistenceException {
129133 PhraseOccuranceSet set = getBankAndMonumentPhrases();
130134
131135 PopularityDisambiguator disambiguator = new PopularityDisambiguator(meaningFetcher);
132 -
 136+ disambiguator.setTrace(traceOut);
 137+
133138 Result<PhraseOccurance, LocalConcept> result = disambiguator.disambiguate(set.getRootNode(), null);
134139
135140 List<? extends PhraseOccurance> sequence = result.getSequence();
@@ -138,13 +143,13 @@
139144 assertEquals("Underground", sequence.get(1).getTerm());
140145 assertEquals("station", sequence.get(2).getTerm());
141146
142 - assertNotNull( meanings.get( sequence.get(0).getTerm() ) );
143 - assertNotNull( meanings.get( sequence.get(1).getTerm() ) );
144 - assertNotNull( meanings.get( sequence.get(2).getTerm() ) );
 147+ assertNotNull( meanings.get( sequence.get(0) ) );
 148+ assertNotNull( meanings.get( sequence.get(1) ) );
 149+ assertNotNull( meanings.get( sequence.get(2) ) );
145150
146 - assertEquals("Bank_and_Monument_Underground_station", meanings.get( sequence.get(0).getTerm() ).getName() );
147 - assertEquals("Subway", meanings.get( sequence.get(1).getTerm() ).getName() );
148 - assertEquals("Metro_station", meanings.get( sequence.get(2).getTerm() ).getName() );
 151+ assertEquals("Bank_and_Monument_Underground_stations", meanings.get( sequence.get(0) ).getName() );
 152+ assertEquals("Subway", meanings.get( sequence.get(1) ).getName() );
 153+ assertEquals("Bus_station", meanings.get( sequence.get(2) ).getName() );
149154 }
150155
151156 }
Index: trunk/WikiWord/WikiWord/src/test/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguatorTest-meanings.csv
@@ -5,9 +5,9 @@
66 London 2001 City_of_London 1000 90
77 London 2002 Greater_London 888 80
88 London 2003 London_city_council 12 10
9 -Underground 3001 London_Underground 1000 90
10 -Underground 3002 Subway 888 60
11 -Bank 4001 Bank 1000 90
 9+Underground 3001 London_Underground 888 90
 10+Underground 3002 Subway 999 60
 11+Bank 4001 Bank 777 90
1212 Bank 4002 Bank_(sitting) 666 80
1313 Bank 4003 Bank_(geology) 230 60
1414 Bank 4004 Bank_of_England 220 60
@@ -15,7 +15,7 @@
1616 Bank and Monument Underground station 5001 Bank_and_Monument_Underground_stations 50 90
1717 Bank and Monument stations 5001 Bank_and_Monument_Underground_stations 66 60
1818 Bank and Monument 5001 Bank_and_Monument_Underground_stations 200 90
19 -Monument 6001 Monument 1000 90
 19+Monument 6001 Monument 888 90
2020 Monument 6002 Some_silly_monument 100 60
2121 Monument 5001 Bank_and_Monument_Underground_stations 100 10
2222 station 7001 Bus_station 1000 90
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/ProductCombiner.java
@@ -0,0 +1,22 @@
 2+/**
 3+ *
 4+ */
 5+package de.brightbyte.wikiword.disambig;
 6+
 7+import de.brightbyte.data.Functor2;
 8+
 9+final class ProductCombiner implements Functor2.Double {
 10+
 11+ public static final ProductCombiner instance = new ProductCombiner();
 12+
 13+ /**
 14+ * @param disambiguator
 15+ */
 16+ public ProductCombiner() {
 17+ }
 18+
 19+ public double apply(double a, double b) {
 20+ return a * b;
 21+ //return = Math.sqrt( popf * simf ); //normalized produkt
 22+ }
 23+}
\ No newline at end of file
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java
@@ -48,12 +48,6 @@
4949 }
5050 };
5151
52 - private Functor.Double weightNormalizer = new Functor.Double() { //NOTE: must map [0:inf] to [0:1] and grow monotonously
53 - public double apply(double pop) {
54 - return 1 - 1/(Math.sqrt(Math.log(pop))+1); //XXX: black voodoo magic ad hoc formula with no deeper meaing.
55 - }
56 - };
57 -
5852 private Functor.Double similarityNormalizer = new Functor.Double() { //NOTE: must map [0:1] to [0:1] and grow monotonously
5953 public double apply(double sim) {
6054 return Math.sqrt(Math.sqrt(sim)); //XXX: black voodoo magic ad hoc formula with no deeper meaing.
@@ -61,7 +55,7 @@
6256 };
6357
6458 protected Functor2.Double scoreCombiner = new LinearCombiner(0.8);
65 - protected Functor2.Double weightCombiner = new LinearCombiner(0.5);
 59+ protected Functor2.Double weightCombiner = ProductCombiner.instance;
6660
6761 public CoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, Integer> featureFetcher, boolean featuresAreNormalized) {
6862 this(meaningFetcher, featureFetcher, WikiWordConcept.theCardinality,
@@ -382,23 +376,23 @@
383377 if (p<1) p= 1;
384378 if (w<1) w= 1;
385379
386 - pop += p;
 380+ p = weightCombiner.apply(p, w);
 381+
 382+ pop += p; //XXX: keep raw and processed pop
387383 weight += w;
388384 c ++;
389385 }
390386
391387 //normalize
392 - sim = sim / n; //normalize
393 - pop = pop / c; //normalize
394 - weight = weight / c; //normalize
 388+ sim = sim / n; //scale
 389+ pop = pop / c; //scale
 390+ weight = weight / c; //scale
395391
396392 double popf = popularityNormalizer.apply(pop);
397393 double simf = similarityNormalizer.apply(sim);
398 - double weightf = weightNormalizer.apply(weight);
 394+
 395+ double score = scoreCombiner.apply(popf, simf);
399396
400 - double score = weightCombiner.apply(weightf, popf);
401 - score = scoreCombiner.apply(score, simf);
402 -
403397 return new Result<X, LocalConcept>(interp.getMeanings(), interp.getSequence(), score, "simf="+simf+", popf="+popf+", sim="+sim+", pop="+pop+", weight="+weight);
404398 }
405399
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/LinearBooster.java
@@ -0,0 +1,26 @@
 2+/**
 3+ *
 4+ */
 5+package de.brightbyte.wikiword.disambig;
 6+
 7+import de.brightbyte.data.Functor;
 8+
 9+final class LinearBooster implements Functor.Double {
 10+
 11+ private double scale;
 12+
 13+ /**
 14+ * @param disambiguator
 15+ */
 16+ public LinearBooster(double scale) {
 17+ this.scale = scale;
 18+ }
 19+
 20+ public LinearBooster() {
 21+ this(1.0);
 22+ }
 23+
 24+ public double apply(double a) {
 25+ return a * scale;
 26+ }
 27+}
\ No newline at end of file
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/ProductSqrtCombiner.java
@@ -0,0 +1,23 @@
 2+/**
 3+ *
 4+ */
 5+package de.brightbyte.wikiword.disambig;
 6+
 7+import de.brightbyte.data.Functor2;
 8+
 9+final class ProductSqrtCombiner implements Functor2.Double {
 10+
 11+ public static final ProductSqrtCombiner instance = new ProductSqrtCombiner();
 12+
 13+ /**
 14+ * @param disambiguator
 15+ */
 16+ public ProductSqrtCombiner() {
 17+ }
 18+
 19+ public double apply(double a, double b) {
 20+ if (a<0 || a>1) throw new IllegalArgumentException("ProductSqrt is only defined for values 0 <= x <= 1");
 21+ if (b<0 || b>1) throw new IllegalArgumentException("ProductSqrt is only defined for values 0 <= x <= 1");
 22+ return Math.sqrt( a * b ); //normalized produkt
 23+ }
 24+}
\ No newline at end of file
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/PopularityDisambiguator.java
@@ -1,12 +1,12 @@
22 package de.brightbyte.wikiword.disambig;
33
4 -import java.util.ArrayList;
54 import java.util.Collection;
65 import java.util.Collections;
76 import java.util.HashMap;
87 import java.util.List;
98 import java.util.Map;
109
 10+import de.brightbyte.data.Functor;
1111 import de.brightbyte.data.Functor2;
1212 import de.brightbyte.data.measure.Measure;
1313 import de.brightbyte.data.measure.Measure.Comparator;
@@ -20,7 +20,8 @@
2121 protected Measure<WikiWordConcept> popularityMeasure;
2222 protected Comparator<LocalConcept> popularityComparator;
2323
24 - protected Functor2.Double weigthCombiner = new LinearCombiner(0.5);
 24+ protected Functor.Double weightBooster = SquareBooster.instance;
 25+ protected Functor2.Double weigthCombiner = new ProductCombiner(); //NOTE: pop and weight are not in the same scale.
2526
2627 public PopularityDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher) {
2728 this(meaningFetcher, WikiWordConcept.theCardinality);
@@ -55,11 +56,13 @@
5657
5758 for (List<X> sequence: sequences) {
5859 Result<X, LocalConcept> r = disambiguate(sequence, meanings, context);
 60+ trace(r.toString());
5961 if (best == null || best.getScore() < r.getScore()) {
6062 best = r;
6163 }
6264 }
6365
 66+ trace("best:" + best.toString());
6467 return best;
6568 }
6669
@@ -70,14 +73,10 @@
7174 double score = 0;
7275 int totalPop = 0;
7376
74 - List<X> resultSequence = new ArrayList<X>(sequence.size());
75 -
7677 for (X t: sequence) {
7778 List<? extends LocalConcept> m = meanings.get(t);
7879 if (m==null || m.size()==0) continue;
7980
80 - resultSequence.add(t);
81 -
8281 if (m.size()>1) Collections.sort(m, popularityComparator);
8382
8483 LocalConcept c = m.get(0);
@@ -86,13 +85,14 @@
8786 double pop = popularityMeasure.measure(c);
8887 totalPop += pop;
8988
90 - double sc = weigthCombiner.apply(pop, t.getWeight()); //FIXME: pop and weight are not in the same scale.
 89+ double w = weightBooster.apply(t.getWeight());
 90+ double sc = weigthCombiner.apply(pop, w);
9191 score += sc;
9292 }
9393
94 - if (disambig.size()>0) score = score / disambig.size();
 94+ if (disambig.size()>0) score = score / sequence.size(); //NOTE: treat unknown terms as having pop = 0
9595
96 - Result<X, LocalConcept> r = new Result<X, LocalConcept>(disambig, resultSequence, score, "score="+score+"; pop="+totalPop);
 96+ Result<X, LocalConcept> r = new Result<X, LocalConcept>(disambig, sequence, score, "score="+score+"; pop="+totalPop);
9797 return r;
9898 }
9999
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SquareBooster.java
@@ -0,0 +1,21 @@
 2+/**
 3+ *
 4+ */
 5+package de.brightbyte.wikiword.disambig;
 6+
 7+import de.brightbyte.data.Functor;
 8+
 9+final class SquareBooster implements Functor.Double {
 10+
 11+ public static final SquareBooster instance = new SquareBooster();
 12+
 13+ /**
 14+ * @param disambiguator
 15+ */
 16+ public SquareBooster() {
 17+ }
 18+
 19+ public double apply(double a) {
 20+ return a * a;
 21+ }
 22+}
\ No newline at end of file

Status & tagging log