r65992 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r65991‎ | r65992 | r65993 >
Date:19:34, 6 May 2010
Author:daniel
Status:deferred
Tags:
Comment:
disambig testing and debugging (sliding coherence disambig - still not passing)
Modified paths:
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/Disambiguator.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/test/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguatorTest.java (modified) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWord/src/test/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguatorTest.java
@@ -7,6 +7,8 @@
88 import java.util.Map;
99
1010 import de.brightbyte.data.Pair;
 11+import de.brightbyte.io.ConsoleIO;
 12+import de.brightbyte.io.Output;
1113 import de.brightbyte.util.PersistenceException;
1214 import de.brightbyte.wikiword.disambig.Disambiguator.Interpretation;
1315 import de.brightbyte.wikiword.disambig.Disambiguator.Result;
@@ -16,6 +18,8 @@
1719
1820 public class SlidingCoherenceDisambiguatorTest extends DisambiguatorTestBase {
1921
 22+ private Output traceOutput = ConsoleIO.output;
 23+
2024 public SlidingCoherenceDisambiguatorTest() throws IOException, PersistenceException {
2125 super();
2226 }
@@ -42,7 +46,25 @@
4347 assertTrue("UK as United_Kingdom", interpretations.contains( new Disambiguator.Interpretation<Term, LocalConcept>( uk_as_United_Kingdom )) );
4448 assertTrue("UK as Great_Britain", interpretations.contains( new Disambiguator.Interpretation<Term, LocalConcept>( uk_as_Great_Britain )) );
4549 assertTrue("UK as England", interpretations.contains( new Disambiguator.Interpretation<Term, LocalConcept>( uk_as_England )) );
 50+
 51+ ///////////////////////////////////////////////////////////////////////////////////
 52+ Term freak = new Term("Freak");
 53+ Pair<Term, LocalConcept> freak_as_nothing = new Pair<Term, LocalConcept>(freak, null);
4654
 55+ sequence = new ArrayList<Term>();
 56+ sequence.add(freak);
 57+ sequence.add(london);
 58+
 59+ interpretations = disambiguator.getSequenceInterpretations(sequence, meaningFetcher.getMeanings(sequence));
 60+
 61+ assertEquals("number of interpretations", 3, interpretations.size());
 62+
 63+ Interpretation<Term, LocalConcept> first = interpretations.iterator().next();
 64+
 65+ assertEquals( first.getSequence(), sequence );
 66+ Interpretation<Term, LocalConcept> interp = new Disambiguator.Interpretation<Term, LocalConcept>( freak_as_nothing, london_as_City_of_London );
 67+ assertTrue("London as City_of_London", interpretations.contains( interp) );
 68+
4769 ///////////////////////////////////////////////////////////////////////////////////
4870
4971 sequence = new ArrayList<Term>();
@@ -103,6 +125,7 @@
104126 PhraseOccuranceSet set = getBankAndMonumentPhrases();
105127
106128 SlidingCoherenceDisambiguator disambiguator = new SlidingCoherenceDisambiguator(meaningFetcher, featureFetcher);
 129+ disambiguator.setTrace(traceOutput);
107130 disambiguator.setInitialWindow(1);
108131 disambiguator.setWindow(3);
109132
@@ -115,13 +138,59 @@
116139 assertEquals("Underground", sequence.get(1).getTerm());
117140 assertEquals("station", sequence.get(2).getTerm());
118141
119 - assertNotNull( meanings.get( sequence.get(0).getTerm() ) );
120 - assertNotNull( meanings.get( sequence.get(1).getTerm() ) );
121 - assertNotNull( meanings.get( sequence.get(2).getTerm() ) );
 142+ assertNotNull( meanings.get( sequence.get(0) ) );
 143+ assertNotNull( meanings.get( sequence.get(1) ) );
 144+ assertNotNull( meanings.get( sequence.get(2) ) );
122145
123 - assertEquals("Bank_and_Monument_Underground_station", meanings.get( sequence.get(0).getTerm() ).getName() );
124 - assertEquals("Subway", meanings.get( sequence.get(1).getTerm() ).getName() );
125 - assertEquals("Metro_station", meanings.get( sequence.get(2).getTerm() ).getName() );
 146+ assertEquals("Bank_and_Monument_Underground_stations", meanings.get( sequence.get(0) ).getName() );
 147+ assertEquals("London_Underground", meanings.get( sequence.get(1) ).getName() );
 148+ assertEquals("Metro_station", meanings.get( sequence.get(2) ).getName() );
 149+
 150+ ///////////////////////////////////////////////////////////////////////////
 151+ disambiguator.setTrace(traceOutput);
 152+ disambiguator.setInitialWindow(2);
 153+ disambiguator.setWindow(3);
 154+
 155+ result = disambiguator.disambiguate(set.getRootNode(), null);
 156+
 157+ sequence = result.getSequence();
 158+ meanings = result.getMeanings();
 159+
 160+ assertEquals("Bank and Monument", sequence.get(0).getTerm());
 161+ assertEquals("Underground", sequence.get(1).getTerm());
 162+ assertEquals("station", sequence.get(2).getTerm());
 163+
 164+ assertNotNull( meanings.get( sequence.get(0) ) );
 165+ assertNotNull( meanings.get( sequence.get(1) ) );
 166+ assertNotNull( meanings.get( sequence.get(2) ) );
 167+
 168+ assertEquals("Bank_and_Monument_Underground_stations", meanings.get( sequence.get(0) ).getName() );
 169+ assertEquals("London_Underground", meanings.get( sequence.get(1) ).getName() );
 170+ assertEquals("Metro_station", meanings.get( sequence.get(2) ).getName() );
 171+
 172+ ///////////////////////////////////////////////////////////////////////////
 173+ disambiguator.setTrace(traceOutput);
 174+ disambiguator.setInitialWindow(3);
 175+ disambiguator.setWindow(3);
 176+
 177+ result = disambiguator.disambiguate(set.getRootNode(), null);
 178+
 179+ sequence = result.getSequence();
 180+ meanings = result.getMeanings();
 181+
 182+ assertEquals("Bank and Monument", sequence.get(0).getTerm());
 183+ assertEquals("Underground", sequence.get(1).getTerm());
 184+ assertEquals("station", sequence.get(2).getTerm());
 185+
 186+ assertNotNull( meanings.get( sequence.get(0) ) );
 187+ assertNotNull( meanings.get( sequence.get(1) ) );
 188+ assertNotNull( meanings.get( sequence.get(2) ) );
 189+
 190+ assertEquals("Bank_and_Monument_Underground_stations", meanings.get( sequence.get(0) ).getName() );
 191+ assertEquals("London_Underground", meanings.get( sequence.get(1) ).getName() );
 192+ assertEquals("Metro_station", meanings.get( sequence.get(2) ).getName() );
 193+
 194+ throw new UnsupportedOperationException("todo: window 1, 2, ...");
126195 }
127196
128197 public void testDisambiguateTerms() throws PersistenceException {
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java
@@ -284,24 +284,25 @@
285285
286286 Collection<Disambiguator.Interpretation<X, LocalConcept>> base = getSequenceInterpretations(sequence.subList(1, sequence.size()), meanings);
287287
288 - if (m==null || m.size()==0) return base;
289 -
290288 List<Disambiguator.Interpretation<X, LocalConcept>> interpretations = new ArrayList<Disambiguator.Interpretation<X, LocalConcept>>();
291289
292290 for (Disambiguator.Interpretation<X, LocalConcept> be: base) {
293 - for (LocalConcept c: m) {
294 - Map<X, LocalConcept> e = new HashMap<X, LocalConcept>();
295 - e.putAll(be.getMeanings());
296 - e.put(t, c);
297 -
298 - if (!sequence.isEmpty()) {
 291+ if (m==null || m.isEmpty()) {
 292+ Disambiguator.Interpretation<X, LocalConcept>interp = new Disambiguator.Interpretation<X, LocalConcept>(be.getMeanings(), sequence);
 293+ interpretations.add(interp);
 294+ } else {
 295+ for (LocalConcept c: m) {
 296+ Map<X, LocalConcept> e = new HashMap<X, LocalConcept>();
 297+ e.putAll(be.getMeanings());
 298+ e.put(t, c);
 299+
299300 Disambiguator.Interpretation<X, LocalConcept>interp = new Disambiguator.Interpretation<X, LocalConcept>(e, sequence);
300301 interpretations.add(interp);
301302 }
302303 }
303304 }
304305
305 - trace(" ~ "+t+": "+m.size()+" meanings; collected "+interpretations.size()+" combinations");
 306+ trace(" ~ "+t+": "+(m==null ? "no": m.size())+" meanings; collected "+interpretations.size()+" combinations");
306307 return interpretations;
307308 }
308309
@@ -367,6 +368,10 @@
368369 }
369370
370371 if (d<0) throw new IllegalArgumentException("encountered negative similarity score ("+d+") for "+a+" / "+b);
 372+
 373+ assert d>=0;
 374+ assert d<=1;
 375+
371376 sim += d;
372377 n ++; //should add up to interp.size*(combo.size()-1)/2, according to Gauss
373378 }
@@ -384,14 +389,25 @@
385390 }
386391
387392 //normalize
388 - sim = sim / n; //scale
389 - pop = pop / c; //scale
390 - weight = weight / c; //scale
 393+ sim = n == 0 ? 0 : sim / n; //scale
 394+ pop = c == 0 ? 0 : pop / c; //scale
 395+ weight = c == 0 ? 0 : weight / c; //scale
391396
 397+ assert pop >= 0;
 398+ assert sim >= 0;
 399+ assert sim <= 1;
 400+
392401 double popf = popularityNormalizer.apply(pop);
393402 double simf = similarityNormalizer.apply(sim);
 403+
 404+ assert popf>=0;
 405+ assert popf<=1;
 406+ assert simf>=0;
 407+ assert simf<=1;
394408
395409 double score = scoreCombiner.apply(popf, simf);
 410+ assert score>=0;
 411+ assert score<=1;
396412
397413 return new Result<X, LocalConcept>(interp.getMeanings(), interp.getSequence(), score, "simf="+simf+", popf="+popf+", sim="+sim+", pop="+pop+", weight="+weight);
398414 }
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java
@@ -97,8 +97,11 @@
9898
9999 if (initialWindow > 0) { //apply full coherence disambig to initial window size. initialWindow == 1 will trigger a popularity disambig.
100100 Collection<List<X>> sequences = getSequences(root, initialWindow);
101 - Result<X, LocalConcept> r = super.disambiguate(sequences, root, meanings, context);
 101+ Result<X, LocalConcept> r;
102102
 103+ if (initialWindow == 1) r = popularityDisambiguator.disambiguate(sequences, root, meanings, context);
 104+ else r = super.disambiguate(sequences, root, meanings, context);
 105+
103106 sequence.addAll(r.getSequence());
104107 currentNode = getLastNode(root, sequence);
105108 disambig.putAll(r.getMeanings());
@@ -112,7 +115,8 @@
113116 PhraseNode<X> bestNode = null;
114117
115118 for (PhraseNode<X> n: successors) {
116 - Result<X, LocalConcept> r = evalStep(sequence, disambig, currentNode, meanings, context, similarities, features); //empty sequence will trigger popularity disambig
 119+ Result<X, LocalConcept> r = evalStep(sequence, disambig, n, meanings, context, similarities, features); //empty sequence will trigger popularity disambig
 120+ trace("evalStep("+n+"): " + r.toString());
117121 if (best == null || best.getScore() < r.getScore()) {
118122 best = r;
119123 bestNode = n;
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/Disambiguator.java
@@ -23,7 +23,7 @@
2424 private static <T extends TermReference, C extends WikiWordConcept>Map<T, C> buildMeaningMap(List<Pair<T, C>> interpretation) {
2525 Map<T, C> sequence = new HashMap<T, C>(interpretation.size());
2626 for (Pair<T, C> p: interpretation) {
27 - sequence.put(p.getA(), p.getB());
 27+ if (p.getB()!=null) sequence.put(p.getA(), p.getB());
2828 }
2929 return sequence;
3030 }

Status & tagging log