r65908 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r65907‎ | r65908 | r65909 >
Date:17:02, 4 May 2010
Author:daniel
Status:deferred
Tags:
Comment:
SlidingCoherenceDisambiguatorTest (still fails)
Modified paths:
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/FeatureCache.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/FeatureFetcher.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/StoredFeatureFetcher.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/model/ConceptFeatures.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/query/QueryConsole.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/test/java/de/brightbyte/wikiword/disambig (added) (history)
  • /trunk/WikiWord/WikiWord/src/test/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguatorTest-features.csv (added) (history)
  • /trunk/WikiWord/WikiWord/src/test/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguatorTest-meanings.csv (added) (history)
  • /trunk/WikiWord/WikiWord/src/test/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguatorTest.java (added) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWord/src/test/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguatorTest.java
@@ -0,0 +1,236 @@
 2+package de.brightbyte.wikiword.disambig;
 3+
 4+import java.io.IOException;
 5+import java.io.InputStream;
 6+import java.net.URL;
 7+import java.util.ArrayList;
 8+import java.util.Collection;
 9+import java.util.HashMap;
 10+import java.util.List;
 11+import java.util.Map;
 12+
 13+import de.brightbyte.abstraction.ListAbstractor;
 14+import de.brightbyte.data.LabeledVector;
 15+import de.brightbyte.data.MapLabeledVector;
 16+import de.brightbyte.data.cursor.DataCursor;
 17+import de.brightbyte.io.ChunkingCursor;
 18+import de.brightbyte.io.GroupingCursor;
 19+import de.brightbyte.io.LineCursor;
 20+import de.brightbyte.text.CsvLineChunker;
 21+import de.brightbyte.util.PersistenceException;
 22+import de.brightbyte.wikiword.ConceptType;
 23+import de.brightbyte.wikiword.Corpus;
 24+import de.brightbyte.wikiword.TweakSet;
 25+import de.brightbyte.wikiword.disambig.Disambiguator.Result;
 26+import de.brightbyte.wikiword.model.ConceptFeatures;
 27+import de.brightbyte.wikiword.model.LocalConcept;
 28+import de.brightbyte.wikiword.model.PhraseOccurance;
 29+import de.brightbyte.wikiword.model.PhraseOccuranceSet;
 30+import de.brightbyte.wikiword.model.TermReference;
 31+import junit.framework.TestCase;
 32+
 33+public class SlidingCoherenceDisambiguatorTest extends TestCase {
 34+
 35+ protected Map<String, List<? extends LocalConcept>> meanings = new HashMap<String, List<? extends LocalConcept>>();
 36+ protected Map<Integer, ConceptFeatures<LocalConcept, Integer>> features = new HashMap<Integer, ConceptFeatures<LocalConcept, Integer>>();
 37+
 38+ protected static DataCursor<List<String>> openTableCursor(InputStream in, String enc) throws IOException {
 39+ ChunkingCursor cursor = new ChunkingCursor(new LineCursor(in, enc), CsvLineChunker.tsv);
 40+ return cursor;
 41+ }
 42+
 43+ protected static DataCursor<List<List<String>>> openGroupedTableCursor(InputStream in, String enc, int groupBy, boolean skipHeader) throws IOException, PersistenceException {
 44+ DataCursor<List<String>> c = openTableCursor(in, enc);
 45+ if (skipHeader) c.next(); //skip first line
 46+
 47+ return new GroupingCursor<List<String>, String>(c, new ListAbstractor.Accessor<String>(groupBy));
 48+ }
 49+
 50+ protected static void readMeanings(Corpus corpus, InputStream in, Map<String, List<? extends LocalConcept>> meanings) throws IOException, PersistenceException {
 51+ DataCursor<List<List<String>>> cursor = openGroupedTableCursor(in, "UTF-8", 0, true);
 52+
 53+ List<List<String>> group;
 54+ while ((group = cursor.next()) != null) {
 55+ List<LocalConcept> concepts = new ArrayList<LocalConcept>(group.size());
 56+ String term = null;
 57+
 58+ for (List<String> row: group) {
 59+ term = row.get(0);
 60+ int id = Integer.parseInt(row.get(1));
 61+ String name = row.get(2);
 62+ int freq = Integer.parseInt(row.get(3));
 63+ int rule = Integer.parseInt(row.get(4));
 64+
 65+ int score = ((rule==10 || rule==30) && freq<2) ? 0 : freq*rule;
 66+
 67+ LocalConcept c = new LocalConcept(corpus, id, ConceptType.UNKNOWN, name);
 68+ c.setCardinality(freq);
 69+ c.setRelevance(score);
 70+
 71+ concepts.add(c);
 72+ }
 73+
 74+ if (term!=null) meanings.put(term, concepts);
 75+ }
 76+
 77+ cursor.close();
 78+ }
 79+
 80+ protected static void readFeatures(Corpus corpus, InputStream in, Map<Integer, ConceptFeatures<LocalConcept, Integer>> features) throws IOException, PersistenceException {
 81+ DataCursor<List<List<String>>> cursor = openGroupedTableCursor(in, "UTF-8", 0, true);
 82+
 83+ List<List<String>> group;
 84+ while ((group = cursor.next()) != null) {
 85+ LabeledVector<Integer> v = new MapLabeledVector<Integer>();
 86+ Integer id = null;
 87+ String name = null;
 88+
 89+ for (List<String> row: group) {
 90+ id = new Integer(row.get(0));
 91+ name = row.get(1);
 92+
 93+ int feature = Integer.parseInt(row.get(2));
 94+ double value = Double.parseDouble(row.get(3));
 95+
 96+ v.set(feature, value);
 97+ }
 98+
 99+ if (id!=null) {
 100+ double len = v.getLength();
 101+ v = v.scaled(len); //normalize
 102+
 103+ LocalConcept c = new LocalConcept(corpus, id, ConceptType.UNKNOWN, name);
 104+ ConceptFeatures<LocalConcept, Integer> f = new ConceptFeatures<LocalConcept, Integer>(c, v);
 105+ features.put(id, f);
 106+ }
 107+ }
 108+
 109+ cursor.close();
 110+ }
 111+
 112+ private MeaningFetcher<LocalConcept> meaningFetcher = new MeaningFetcher<LocalConcept>() {
 113+
 114+ public <X extends TermReference> Map<X, List<? extends LocalConcept>> getMeanings(
 115+ Collection<X> terms) throws PersistenceException {
 116+ Map<X, List<? extends LocalConcept>> m = new HashMap<X, List<? extends LocalConcept>>();
 117+
 118+ for (X t: terms) {
 119+ List<? extends LocalConcept> n = getMeanings(t.getTerm());
 120+ m.put(t, n);
 121+ }
 122+
 123+ return m;
 124+ }
 125+
 126+ public List<? extends LocalConcept> getMeanings(String term)
 127+ throws PersistenceException {
 128+ return meanings.get(term);
 129+ }
 130+
 131+ };
 132+
 133+ private FeatureFetcher<LocalConcept, Integer> featureFetcher = new FeatureFetcher<LocalConcept, Integer>() {
 134+
 135+ public boolean getFeaturesAreNormalized() {
 136+ return true;
 137+ }
 138+
 139+ public Map<Integer, ConceptFeatures<LocalConcept, Integer>> getFeatures(
 140+ Collection<? extends LocalConcept> concepts) throws PersistenceException {
 141+ Map<Integer, ConceptFeatures<LocalConcept, Integer>> m = new HashMap<Integer, ConceptFeatures<LocalConcept, Integer>>();
 142+
 143+ for (LocalConcept c: concepts) {
 144+ ConceptFeatures<LocalConcept, Integer> f = getFeatures(c);
 145+ m.put(c.getId(), f);
 146+ }
 147+
 148+ return m;
 149+ }
 150+
 151+ public ConceptFeatures<LocalConcept, Integer> getFeatures(LocalConcept c)
 152+ throws PersistenceException {
 153+ return features.get(c.getId());
 154+ }
 155+
 156+ };
 157+
 158+ protected Corpus corpus;
 159+ protected TweakSet tweaks;
 160+
 161+ public SlidingCoherenceDisambiguatorTest() throws IOException, PersistenceException {
 162+ tweaks = new TweakSet();
 163+ corpus = Corpus.forName("TEST", "en", tweaks);
 164+
 165+ URL meaningFile = getClass().getResource("SlidingCoherenceDisambiguatorTest-meanings.csv");
 166+ URL featureFile = getClass().getResource("SlidingCoherenceDisambiguatorTest-features.csv");
 167+
 168+ readMeanings(corpus, meaningFile.openStream(), meanings);
 169+ readFeatures(corpus, featureFile.openStream(), features);
 170+ }
 171+
 172+ protected List<Term> terms(String... terms) {
 173+ List<Term> list = new ArrayList<Term>();
 174+ for (String t: terms) list.add(new Term(t));
 175+ return list;
 176+ }
 177+
 178+ public void testDisambiguatePhraseNode() throws PersistenceException {
 179+ String text = "The Bank and Monument Underground station";
 180+ // 012345678901234567890123456789012345678901234567890
 181+ List<PhraseOccurance> phrases = new ArrayList<PhraseOccurance>();
 182+
 183+ phrases.add( new PhraseOccurance( text.substring( 0, 8 ), 1, 0, 8 ) ); //The Bank
 184+ phrases.add( new PhraseOccurance( text.substring( 0, 21 ), 2, 0, 21 ) ); //The Bank and Monument
 185+ phrases.add( new PhraseOccurance( text.substring( 0, 33 ), 3, 0, 33 ) ); //The Bank and Monument Underground
 186+
 187+ phrases.add( new PhraseOccurance( text.substring( 4, 8 ), 1, 4, 8-4 ) ); //Bank
 188+ phrases.add( new PhraseOccurance( text.substring( 4, 21 ), 2, 4, 21-4 ) ); //Bank and Monument
 189+ phrases.add( new PhraseOccurance( text.substring( 4, 33 ), 3, 4, 33-4 ) ); //Bank and Monument Underground
 190+ //phrases.add( new PhraseOccurance( text.substring( 4, 41 ), 4, 4, 41-4 ) ); //Bank and Monument Underground station
 191+
 192+ phrases.add( new PhraseOccurance( text.substring( 13, 21 ), 1, 13, 21-13 ) ); //Monument
 193+ phrases.add( new PhraseOccurance( text.substring( 13, 33 ), 2, 13, 33-13 ) ); //Monument Underground
 194+ phrases.add( new PhraseOccurance( text.substring( 13, 41 ), 3, 13, 41-13 ) ); //Monument Underground station
 195+
 196+ phrases.add( new PhraseOccurance( text.substring( 22, 33 ), 1, 22, 33-22 ) ); //Underground
 197+ phrases.add( new PhraseOccurance( text.substring( 22, 41 ), 2, 22, 41-22 ) ); //Underground stations
 198+
 199+ phrases.add( new PhraseOccurance( text.substring( 34, 41 ), 1, 34, 41-34 ) ); //station
 200+
 201+ PhraseOccuranceSet set = new PhraseOccuranceSet(text, phrases);
 202+
 203+ SlidingCoherenceDisambiguator disambiguator = new SlidingCoherenceDisambiguator(meaningFetcher, featureFetcher);
 204+ disambiguator.setInitialWindow(1);
 205+ disambiguator.setWindow(3);
 206+
 207+ Result<PhraseOccurance, LocalConcept> result = disambiguator.disambiguate(set.getRootNode(), null);
 208+
 209+ List<? extends PhraseOccurance> sequence = result.getSequence();
 210+ Map<? extends PhraseOccurance, ? extends LocalConcept> meanings = result.getMeanings();
 211+
 212+ assertEquals("Bank and Monument", sequence.get(0).getTerm());
 213+ assertEquals("Underground", sequence.get(1).getTerm());
 214+ assertEquals("station", sequence.get(2).getTerm());
 215+
 216+ assertNotNull( meanings.get( sequence.get(0).getTerm() ) );
 217+ assertNotNull( meanings.get( sequence.get(1).getTerm() ) );
 218+ assertNotNull( meanings.get( sequence.get(2).getTerm() ) );
 219+
 220+ assertEquals("Bank_and_Monument_Underground_station", meanings.get( sequence.get(0).getTerm() ).getName() );
 221+ assertEquals("Subway", meanings.get( sequence.get(1).getTerm() ).getName() );
 222+ assertEquals("Metro_station", meanings.get( sequence.get(2).getTerm() ).getName() );
 223+ }
 224+
 225+ public void testDisambiguateTerms() throws PersistenceException {
 226+ SlidingCoherenceDisambiguator disambiguator = new SlidingCoherenceDisambiguator(meaningFetcher, featureFetcher);
 227+ disambiguator.setInitialWindow(1);
 228+ disambiguator.setWindow(3);
 229+
 230+ String[] sequence = {"UK", "London", "Underground", "Bank"};
 231+
 232+ Result<Term, LocalConcept> result = disambiguator.disambiguate(terms(sequence), null);
 233+
 234+ //// .............. ///
 235+ }
 236+
 237+}
Index: trunk/WikiWord/WikiWord/src/test/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguatorTest-features.csv
@@ -0,0 +1,121 @@
 2+concept concept_name feature weight
 3+1001 United_Kingdom 1001 2
 4+1001 United_Kingdom 1004 0.66
 5+1001 United_Kingdom 1001001 0.222
 6+1001 United_Kingdom 2001001 0.222
 7+
 8+1002 Great_Britain 1002 2
 9+1002 Great_Britain 1004 0.65
 10+1002 Great_Britain 1001002 0.222
 11+
 12+1004 England 1004 2
 13+1004 England 2001 0.43
 14+1004 England 2002 0.45
 15+1004 England 4003 0.2
 16+1004 England 4004 0.45
 17+1004 England 1002 0.65
 18+1004 England 1001 0.66
 19+
 20+2001 City_of_London 2001 2
 21+2001 City_of_London 2002 0.77
 22+2001 City_of_London 1004 0.43
 23+2001 City_of_London 2003 0.55
 24+2001 City_of_London 3001 0.25
 25+2001 City_of_London 4003 0.15
 26+2001 City_of_London 4004 0.22
 27+2001 City_of_London 5001 0.21
 28+2001 City_of_London 6001 0.023
 29+
 30+2002 Greater_London 2002 2
 31+2002 Greater_London 2001 0.77
 32+2002 Greater_London 1004 0.45
 33+2002 Greater_London 2003 0.56
 34+2002 Greater_London 3001 0.22
 35+2002 Greater_London 5001 0.12
 36+2002 Greater_London 6001 0.022
 37+
 38+2003 London_city_council 2003 2
 39+2003 London_city_council 2001 0.55
 40+2003 London_city_council 2002 0.56
 41+2003 London_city_council 1002003 0.22
 42+
 43+3001 London_Underground 3001 2
 44+3001 London_Underground 3002 0.47
 45+3001 London_Underground 5001 0.66
 46+3001 London_Underground 2001 0.25
 47+3001 London_Underground 2002 0.22
 48+
 49+3002 Subway 3002 2
 50+3002 Subway 5001 0.47
 51+3002 Subway 3001 0.55
 52+3002 Subway 7001 0.178
 53+3002 Subway 7002 0.33
 54+3002 Subway 7003 0.25
 55+
 56+4001 Bank 4001 2
 57+4001 Bank 4004 0.55
 58+4001 Bank 1004001 0.33
 59+
 60+4002 Bank_(sitting) 4002 2
 61+4002 Bank_(sitting) 1004002 0.22
 62+4002 Bank_(sitting) 2004002 0.33
 63+4002 Bank_(sitting) 7002 0.02
 64+
 65+4003 Bank_(geology) 4003 2
 66+4003 Bank_(geology) 1004 0.2
 67+4003 Bank_(geology) 2001 0.15
 68+
 69+4004 Bank_of_England 4004 2
 70+4004 Bank_of_England 4001 0.55
 71+4004 Bank_of_England 1004 0.45
 72+4004 Bank_of_England 2001 0.22
 73+4004 Bank_of_England 5001 0.34
 74+
 75+5001 Bank_and_Monument_Underground_stations 5001 2
 76+5001 Bank_and_Monument_Underground_stations 3001 0.66
 77+5001 Bank_and_Monument_Underground_stations 3002 0.47
 78+5001 Bank_and_Monument_Underground_stations 4004 0.34
 79+5001 Bank_and_Monument_Underground_stations 2001 0.21
 80+5001 Bank_and_Monument_Underground_stations 2002 0.12
 81+
 82+6001 Monument 6001 2
 83+6001 Monument 5001 0.017
 84+6001 Monument 2001 0.023
 85+6001 Monument 2002 0.022
 86+6001 Monument 1006001 0.18
 87+6001 Monument 2006001 0.33
 88+
 89+6002 Some_silly_monument 6002 2
 90+6002 Some_silly_monument 6001 0.32
 91+6002 Some_silly_monument 7003 0.008
 92+6002 Some_silly_monument 1006002 0.08
 93+6002 Some_silly_monument 2006002 0.01
 94+
 95+7001 Bus_station 7001 2
 96+7001 Bus_station 1007001 0.1
 97+7001 Bus_station 2007001 0.2
 98+7001 Bus_station 7003 0.21
 99+7001 Bus_station 7002 0.32
 100+7001 Bus_station 5001 0.10
 101+7001 Bus_station 3002 0.178
 102+
 103+7002 Metro_station 7002 2
 104+7002 Metro_station 1007002 0.1
 105+7002 Metro_station 2007002 0.2
 106+7002 Metro_station 7003 0.22
 107+7002 Metro_station 7001 0.32
 108+7002 Metro_station 5001 0.17
 109+7002 Metro_station 3002 0.33
 110+7002 Metro_station 4002 0.02
 111+
 112+7003 Train_station 7003 2
 113+7003 Train_station 1007003 0.1
 114+7003 Train_station 2007003 0.2
 115+7003 Train_station 7002 0.22
 116+7003 Train_station 7001 0.21
 117+7003 Train_station 5001 0.11
 118+7003 Train_station 3002 0.25
 119+
 120+7004 Social_status 7004 2
 121+7004 Social_status 1007004 0.1
 122+7004 Social_status 2007004 0.2
Index: trunk/WikiWord/WikiWord/src/test/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguatorTest-meanings.csv
@@ -0,0 +1,24 @@
 2+term_text concept concept_name freq rule
 3+UK 1001 United_Kingdom 1000 90
 4+UK 1002 Great_Britain 670 60
 5+UK 1004 England 16 10
 6+London 2001 City_of_London 1000 90
 7+London 2002 Greater_London 888 80
 8+London 2003 London_city_council 12 10
 9+Underground 3001 London_Underground 1000 90
 10+Underground 3002 Subway 888 60
 11+Bank 4001 Bank 1000 90
 12+Bank 4002 Bank_(sitting) 666 80
 13+Bank 4003 Bank_(geology) 230 60
 14+Bank 4004 Bank_of_England 220 60
 15+Bank and Monument 5001 Bank_and_Monument_Underground_stations 200 60
 16+Bank and Monument Underground station 5001 Bank_and_Monument_Underground_stations 50 90
 17+Bank and Monument stations 5001 Bank_and_Monument_Underground_stations 66 60
 18+Bank and Monument 5001 Bank_and_Monument_Underground_stations 200 90
 19+Monument 6001 Monument 1000 90
 20+Monument 6002 Some_silly_monument 100 60
 21+Monument 5001 Bank_and_Monument_Underground_stations 100 10
 22+station 7001 Bus_station 1000 90
 23+station 7002 Metro_station 888 80
 24+station 7003 Train_station 666 60
 25+station 7004 Social_status 300 10
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java
@@ -165,7 +165,7 @@
166166 //NOTE: pre-fetch all features in one go
167167 List<LocalConcept> concepts = new ArrayList<LocalConcept>(meanings.size()*10);
168168 for (List<? extends LocalConcept> m: meanings.values()) {
169 - concepts.addAll(m);
 169+ if (m!=null) concepts.addAll(m);
170170 }
171171
172172 if (context!=null) concepts.addAll(context);
@@ -218,6 +218,7 @@
219219 while (eit.hasNext()) {
220220 Entry<TermReference, List<? extends LocalConcept>> e = (Entry<TermReference, List<? extends LocalConcept>>) eit.next(); //XXX: ugly cast. got confused about generics. ugh.
221221 List<? extends LocalConcept> m = e.getValue();
 222+ if (m==null) continue;
222223
223224 Iterator<? extends LocalConcept> cit = m.iterator();
224225 while (cit.hasNext()) {
@@ -327,13 +328,16 @@
328329 for (Map.Entry<? extends TermReference, LocalConcept> ea: concepts.entrySet()) {
329330 LocalConcept a = ea.getValue();
330331 TermReference term = ea.getKey();
 332+
 333+ i++;
 334+ if (a==null) continue;
331335
332 - i++;
333336 j=0;
334337 for (Map.Entry<? extends TermReference, LocalConcept> eb: concepts.entrySet()) {
335338 LocalConcept b = eb.getValue();
336339 j++;
337340 if (i==j) break;
 341+ if (b==null) continue;
338342
339343 double d;
340344
@@ -348,14 +352,18 @@
349353 ConceptFeatures<LocalConcept, Integer> fa = features.getFeatures(a);
350354 ConceptFeatures<LocalConcept, Integer> fb = features.getFeatures(b);
351355
352 - //force relevance/cardinality to the figures from the meaning lookup
353 - //not strictly necessary, but nice to keep it consistent.
354 - fa.getConcept().setCardinality(a.getCardinality());
355 - fa.getConcept().setRelevance(a.getRelevance());
356 - fb.getConcept().setCardinality(b.getCardinality());
357 - fb.getConcept().setRelevance(b.getRelevance());
 356+ if (fa==null || fb==null) d = 0;
 357+ else {
 358+ //force relevance/cardinality to the figures from the meaning lookup
 359+ //not strictly necessary, but nice to keep it consistent.
 360+ fa.getConcept().setCardinality(a.getCardinality());
 361+ fa.getConcept().setRelevance(a.getRelevance());
 362+ fb.getConcept().setCardinality(b.getCardinality());
 363+ fb.getConcept().setRelevance(b.getRelevance());
 364+
 365+ d = similarityMeasure.similarity(fa.getFeatureVector(), fb.getFeatureVector());
 366+ }
358367
359 - d = similarityMeasure.similarity(fa.getFeatureVector(), fb.getFeatureVector());
360368 similarities.set(a, b, d);
361369 }
362370 }
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/StoredFeatureFetcher.java
@@ -44,5 +44,9 @@
4545 protected void trace(String msg) {
4646 if (trace!=null) trace.println(msg);
4747 }
 48+
 49+ public boolean getFeaturesAreNormalized() {
 50+ return true;
 51+ }
4852
4953 }
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/FeatureCache.java
@@ -98,4 +98,8 @@
9999 cache.clear();
100100 }
101101
 102+ public boolean getFeaturesAreNormalized() {
 103+ return parent.getFeaturesAreNormalized();
 104+ }
 105+
102106 }
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java
@@ -25,9 +25,9 @@
2626 protected int window;
2727 protected int initialWindow;
2828
29 - public SlidingCoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, Integer> featureFetcher, boolean featuresAreNormalized) {
 29+ public SlidingCoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, Integer> featureFetcher) {
3030 this(meaningFetcher, featureFetcher, WikiWordConcept.theCardinality,
31 - featuresAreNormalized ? ScalarVectorSimilarity.<Integer>getInstance() : CosineVectorSimilarity.<Integer>getInstance(), //if pre-normalized, use scalar to calc cosin
 31+ featureFetcher.getFeaturesAreNormalized() ? ScalarVectorSimilarity.<Integer>getInstance() : CosineVectorSimilarity.<Integer>getInstance(), //if pre-normalized, use scalar to calc cosin
3232 5, 5);
3333 }
3434
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/FeatureFetcher.java
@@ -10,4 +10,5 @@
1111 public interface FeatureFetcher<C extends WikiWordConcept, K> {
1212 public ConceptFeatures<C, K> getFeatures(C c) throws PersistenceException;
1313 public Map<Integer, ConceptFeatures<C, K>> getFeatures(Collection<? extends C> c) throws PersistenceException;
 14+ public boolean getFeaturesAreNormalized();
1415 }
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/model/ConceptFeatures.java
@@ -7,6 +7,8 @@
88 protected WikiWordConcept concept;
99
1010 public ConceptFeatures(WikiWordConcept concept, LabeledVector<K> features) {
 11+ if (features==null) throw new NullPointerException();
 12+ if (concept==null) throw new NullPointerException();
1113 this.features = features;
1214 this.concept = concept;
1315 }
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/query/QueryConsole.java
@@ -451,7 +451,7 @@
452452 if (disambiguator==null) {
453453 StoredMeaningFetcher meaningFetcher = new StoredMeaningFetcher(getLocalConceptStore());
454454 StoredFeatureFetcher<LocalConcept, Integer> featureFetcher = new StoredFeatureFetcher<LocalConcept, Integer>(getFeatureStore());
455 - disambiguator = new SlidingCoherenceDisambiguator( meaningFetcher, featureFetcher, true );
 455+ disambiguator = new SlidingCoherenceDisambiguator( meaningFetcher, featureFetcher );
456456
457457 LeveledOutput.Trace trace = new LeveledOutput.Trace(out);
458458 meaningFetcher.setTrace(trace);

Status & tagging log