r66564 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r66563‎ | r66564 | r66565 >
Date:15:13, 17 May 2010
Author:daniel
Status:deferred
Tags:
Comment:
fetcher caches: allow sharing, implement negative caching
Modified paths:
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/AbstractDisambiguator.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/FeatureCache.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/MeaningCache.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/PopularityDisambiguator.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/query/QueryConsole.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/test/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguatorTest.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/test/java/de/brightbyte/wikiword/disambig/PopularityDisambiguatorTest.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/test/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguatorTest.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/WordSenseIndexer.java (modified) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWord/src/test/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguatorTest.java
@@ -19,7 +19,7 @@
2020 }
2121
2222 public void testDisambiguateTerms() throws PersistenceException {
23 - SlidingCoherenceDisambiguator disambiguator = new SlidingCoherenceDisambiguator(meaningFetcher, featureFetcher);
 23+ SlidingCoherenceDisambiguator disambiguator = new SlidingCoherenceDisambiguator(meaningFetcher, featureFetcher, 10);
2424 disambiguator.setInitialWindow(1);
2525 disambiguator.setWindow(3);
2626
@@ -43,7 +43,7 @@
4444 public void testDisambiguatePhraseNode() throws PersistenceException {
4545 PhraseOccuranceSet set = getBankAndMonumentPhrases();
4646
47 - SlidingCoherenceDisambiguator disambiguator = new SlidingCoherenceDisambiguator(meaningFetcher, featureFetcher);
 47+ SlidingCoherenceDisambiguator disambiguator = new SlidingCoherenceDisambiguator(meaningFetcher, featureFetcher, 10);
4848 disambiguator.setTrace(traceOutput);
4949 disambiguator.setInitialWindow(1);
5050 disambiguator.setWindow(3);
Index: trunk/WikiWord/WikiWord/src/test/java/de/brightbyte/wikiword/disambig/PopularityDisambiguatorTest.java
@@ -21,7 +21,7 @@
2222 }
2323
2424 public void testGetTermsForList() throws PersistenceException {
25 - PopularityDisambiguator disambiguator = new PopularityDisambiguator(meaningFetcher);
 25+ PopularityDisambiguator disambiguator = new PopularityDisambiguator(meaningFetcher, 10);
2626
2727 Term uk = new Term("UK");
2828 Term london = new Term("London");
@@ -47,7 +47,7 @@
4848
4949 //FIXME: Test case for getHorizon
5050
51 - PopularityDisambiguator disambiguator = new PopularityDisambiguator(meaningFetcher);
 51+ PopularityDisambiguator disambiguator = new PopularityDisambiguator(meaningFetcher, 10);
5252
5353 Collection<PhraseOccurance> terms = disambiguator.getTerms(set.getRootNode(), 0);
5454 assertTrue("empty term set", sameElements( getBankAndMonumentTerms(0), terms) );
@@ -60,7 +60,7 @@
6161 }
6262
6363 public void testGetMeaningsForList() throws PersistenceException {
64 - PopularityDisambiguator disambiguator = new PopularityDisambiguator(meaningFetcher);
 64+ PopularityDisambiguator disambiguator = new PopularityDisambiguator(meaningFetcher, 10);
6565
6666 Term uk = new Term("UK");
6767 Term london = new Term("London");
@@ -79,7 +79,7 @@
8080 }
8181
8282 public void testGetMeaningsForNode() throws PersistenceException {
83 - PopularityDisambiguator disambiguator = new PopularityDisambiguator(meaningFetcher);
 83+ PopularityDisambiguator disambiguator = new PopularityDisambiguator(meaningFetcher, 10);
8484
8585 PhraseOccuranceSet set = getBankAndMonumentPhrases();
8686 Map<PhraseOccurance, List<? extends LocalConcept>> res = disambiguator.getMeanings(set.getRootNode());
@@ -94,7 +94,7 @@
9595 }
9696
9797 public void testGetSequences() throws PersistenceException {
98 - PopularityDisambiguator disambiguator = new PopularityDisambiguator(meaningFetcher);
 98+ PopularityDisambiguator disambiguator = new PopularityDisambiguator(meaningFetcher, 10);
9999 PhraseOccuranceSet set = getBankAndMonumentPhrases();
100100
101101 Collection<List<PhraseOccurance>> res = disambiguator.getSequences(set.getRootNode(), 1);
@@ -108,7 +108,7 @@
109109 }
110110
111111 public void testDisambiguateTerms() throws PersistenceException {
112 - PopularityDisambiguator disambiguator = new PopularityDisambiguator(meaningFetcher);
 112+ PopularityDisambiguator disambiguator = new PopularityDisambiguator(meaningFetcher, 10);
113113
114114 Term uk = new Term("UK");
115115 Term london = new Term("London");
@@ -127,7 +127,7 @@
128128 public void testDisambiguateNode() throws PersistenceException {
129129 PhraseOccuranceSet set = getBankAndMonumentPhrases();
130130
131 - PopularityDisambiguator disambiguator = new PopularityDisambiguator(meaningFetcher);
 131+ PopularityDisambiguator disambiguator = new PopularityDisambiguator(meaningFetcher, 10);
132132 disambiguator.setTrace(traceOutput);
133133
134134 Result<PhraseOccurance, LocalConcept> result = disambiguator.disambiguate(set.getRootNode(), null);
Index: trunk/WikiWord/WikiWord/src/test/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguatorTest.java
@@ -25,7 +25,7 @@
2626 }
2727
2828 public void testGetSequenceInterpretations() throws PersistenceException {
29 - CoherenceDisambiguator disambiguator = new CoherenceDisambiguator(meaningFetcher, featureFetcher);
 29+ CoherenceDisambiguator disambiguator = new CoherenceDisambiguator(meaningFetcher, featureFetcher, 10);
3030
3131 Term uk = new Term("UK");
3232 Pair<Term, LocalConcept> uk_as_United_Kingdom = new Pair<Term, LocalConcept>(uk, getConcept("United_Kingdom"));
@@ -89,7 +89,7 @@
9090 }
9191
9292 public void testGetInterpretations() throws PersistenceException {
93 - CoherenceDisambiguator disambiguator = new CoherenceDisambiguator(meaningFetcher, featureFetcher);
 93+ CoherenceDisambiguator disambiguator = new CoherenceDisambiguator(meaningFetcher, featureFetcher, 10);
9494
9595 Term uk = new Term("UK");
9696 Pair<Term, LocalConcept> uk_as_United_Kingdom = new Pair<Term, LocalConcept>(uk, getConcept("United_Kingdom"));
@@ -120,7 +120,7 @@
121121 }
122122
123123 public void testDisambiguateTerms() throws PersistenceException {
124 - CoherenceDisambiguator disambiguator = new CoherenceDisambiguator(meaningFetcher, featureFetcher);
 124+ CoherenceDisambiguator disambiguator = new CoherenceDisambiguator(meaningFetcher, featureFetcher, 10);
125125 disambiguator.setTrace(traceOutput);
126126
127127 List<Term> sequence = terms("UK", "London", "Underground", "Bank");
@@ -143,7 +143,7 @@
144144 public void testDisambiguatePhraseNode() throws PersistenceException {
145145 PhraseOccuranceSet set = getBankAndMonumentPhrases();
146146
147 - CoherenceDisambiguator disambiguator = new CoherenceDisambiguator(meaningFetcher, featureFetcher);
 147+ CoherenceDisambiguator disambiguator = new CoherenceDisambiguator(meaningFetcher, featureFetcher, 10);
148148 disambiguator.setTrace(traceOutput);
149149
150150 Result<PhraseOccurance, LocalConcept> result = disambiguator.disambiguate(set.getRootNode(), null);
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/MeaningCache.java
@@ -2,6 +2,7 @@
33
44 import java.util.ArrayList;
55 import java.util.Collection;
 6+import java.util.Collections;
67 import java.util.HashMap;
78 import java.util.List;
89 import java.util.Map;
@@ -19,6 +20,7 @@
2021 protected List<MeaningCache<C>> stack;
2122
2223 public Manager(MeaningFetcher<? extends C> root, int maxDepth) {
 24+ if (root==null) throw new NullPointerException();
2325 this.stack = new ArrayList<MeaningCache<C>>(maxDepth+1);
2426 this.maxDepth = maxDepth;
2527 this.root = root;
@@ -94,12 +96,15 @@
9597 }
9698 }
9799
98 - Map<X, List<? extends C>> parentMeanings = parent.getMeanings(todo); //XXX: ugly cast, generics are a pain
99 -
100 - meanings.putAll(parentMeanings);
101 -
102 - for (Map.Entry<X, List<? extends C>> e: parentMeanings.entrySet()) {
103 - cache.put(e.getKey().getTerm(), e.getValue());
 100+ if (!todo.isEmpty()) {
 101+ Map<X, List<? extends C>> parentMeanings = parent.getMeanings(todo); //XXX: ugly cast, generics are a pain
 102+ meanings.putAll(parentMeanings);
 103+
 104+ for (X t: todo) {
 105+ List<? extends C> m = parentMeanings.get(t);
 106+ if (m==null) m = Collections.emptyList();
 107+ cache.put(t.getTerm(), m);
 108+ }
104109 }
105110
106111 return meanings;
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java
@@ -60,25 +60,35 @@
6161 protected Functor2.Double weightCombiner = ProductCombiner.instance;
6262 protected Functor.Double weightBooster = SquareBooster.instance;
6363
64 - public CoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, Integer> featureFetcher) {
65 - this(meaningFetcher, featureFetcher, WikiWordConcept.theCardinality,
66 - featureFetcher.getFeaturesAreNormalized() ? ScalarVectorSimilarity.<Integer>getInstance() : CosineVectorSimilarity.<Integer>getInstance()); //if pre-normalized, use scalar to calc cosin
 64+ public CoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, Integer> featureFetcher, int cacheDepth) {
 65+ this(meaningFetcher, featureFetcher, cacheDepth, null, null);
6766 }
6867
69 - public CoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, Integer> featureFetcher, Measure<WikiWordConcept> popularityMeasure, Similarity<LabeledVector<Integer>> sim) {
70 - super(meaningFetcher);
 68+ public CoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, Integer> featureFetcher, int cacheDepth, Measure<WikiWordConcept> popularityMeasure, Similarity<LabeledVector<Integer>> sim) {
 69+ this( new MeaningCache.Manager<LocalConcept>(meaningFetcher, cacheDepth),
 70+ new FeatureCache.Manager<LocalConcept, Integer>(featureFetcher, cacheDepth),
 71+ popularityMeasure, sim );
 72+ }
 73+
 74+ public CoherenceDisambiguator(MeaningCache.Manager<LocalConcept> meaningCacheManager, FeatureCache.Manager<LocalConcept, Integer> featureCacheManager, Measure<WikiWordConcept> popularityMeasure, Similarity<LabeledVector<Integer>> sim) {
 75+ super(meaningCacheManager);
7176
72 - if (popularityMeasure==null) throw new NullPointerException();
73 - if (sim==null) throw new NullPointerException();
74 - if (featureFetcher==null) throw new NullPointerException();
 77+ if (popularityMeasure==null) popularityMeasure = WikiWordConcept.theCardinality;
 78+ if (sim==null) sim = featureCacheManager.getFeaturesAreNormalized() ? ScalarVectorSimilarity.<Integer>getInstance() : CosineVectorSimilarity.<Integer>getInstance(); //if pre-normalized, use scalar to calc cosin
 79+ if (featureCacheManager==null) throw new NullPointerException();
7580
76 - this.featureCacheManager = new FeatureCache.Manager<LocalConcept, Integer>(featureFetcher, 10); //TODO: depth
77 - this.popularityDisambiguator = new PopularityDisambiguator(meaningFetcher, popularityMeasure);
 81+ this.featureCacheManager = featureCacheManager;
 82+ this.popularityDisambiguator = new PopularityDisambiguator(meaningCacheManager, popularityMeasure);
7883
7984 this.setPopularityMeasure(popularityMeasure);
8085 this.setSimilarityMeasure(sim);
8186 }
8287
 88+ public FeatureCache.Manager<LocalConcept, Integer> getFeatureCacheManager() {
 89+ return featureCacheManager;
 90+ }
 91+
 92+
8393 public Functor.Double getPopularityNormalizer() {
8494 return popularityNormalizer;
8595 }
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/AbstractDisambiguator.java
@@ -10,6 +10,7 @@
1111
1212 import de.brightbyte.io.Output;
1313 import de.brightbyte.util.PersistenceException;
 14+import de.brightbyte.wikiword.disambig.MeaningCache.Manager;
1415 import de.brightbyte.wikiword.model.PhraseNode;
1516 import de.brightbyte.wikiword.model.TermListNode;
1617 import de.brightbyte.wikiword.model.TermReference;
@@ -63,10 +64,18 @@
6465
6566 private Map<? extends T, C> meaningOverrides;
6667
67 - public AbstractDisambiguator(MeaningFetcher<? extends C> meaningFetcher) {
68 - if (meaningFetcher==null) throw new NullPointerException();
69 - this.meaningCacheManager = new MeaningCache.Manager<C>(meaningFetcher, 10);
 68+ public AbstractDisambiguator(MeaningFetcher<? extends C> meaningFetcher, int cacheDepth) {
 69+ this(new MeaningCache.Manager<C>(meaningFetcher, cacheDepth));
7070 }
 71+
 72+ public AbstractDisambiguator(MeaningCache.Manager<C> meaningCacheManager) {
 73+ if (meaningCacheManager==null) throw new NullPointerException();
 74+ this.meaningCacheManager = meaningCacheManager;
 75+ }
 76+
 77+ public MeaningCache.Manager<C> getMeaningCacheManager() {
 78+ return meaningCacheManager;
 79+ }
7180
7281 public void setMeaningOverrides(Map<? extends T, C> overrideMap) {
7382 this.meaningOverrides = overrideMap;
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/FeatureCache.java
@@ -19,6 +19,7 @@
2020 protected List<FeatureCache<C, K>> stack;
2121
2222 public Manager(FeatureFetcher<C, K> root, int maxDepth) {
 23+ if (root==null) throw new NullPointerException();
2324 this.stack = new ArrayList<FeatureCache<C, K>>(maxDepth+1);
2425 this.maxDepth = maxDepth;
2526 this.root = root;
@@ -38,6 +39,10 @@
3940
4041 return cache;
4142 }
 43+
 44+ public boolean getFeaturesAreNormalized() {
 45+ return root.getFeaturesAreNormalized();
 46+ }
4247 }
4348
4449 protected FeatureFetcher<C, K> parent;
@@ -77,9 +82,11 @@
7883 }
7984 }
8085
81 - Map<Integer, ConceptFeatures<C, K>> parentFeatures = parent.getFeatures(todo);
82 - features.putAll(parentFeatures);
83 - cache.putAll(parentFeatures);
 86+ if (!todo.isEmpty()) {
 87+ Map<Integer, ConceptFeatures<C, K>> parentFeatures = parent.getFeatures(todo);
 88+ features.putAll(parentFeatures);
 89+ cache.putAll(parentFeatures);
 90+ }
8491
8592 return features;
8693 }
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java
@@ -25,14 +25,18 @@
2626 protected int window;
2727 protected int initialWindow;
2828
29 - public SlidingCoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, Integer> featureFetcher) {
30 - this(meaningFetcher, featureFetcher, WikiWordConcept.theCardinality,
31 - featureFetcher.getFeaturesAreNormalized() ? ScalarVectorSimilarity.<Integer>getInstance() : CosineVectorSimilarity.<Integer>getInstance(), //if pre-normalized, use scalar to calc cosin
32 - 5, 5);
 29+ public SlidingCoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, Integer> featureFetcher, int cacheDepth) {
 30+ this(meaningFetcher, featureFetcher, cacheDepth, null, null, 5, 5);
3331 }
3432
35 - public SlidingCoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, Integer> featureFetcher, Measure<WikiWordConcept> popularityMeasure, Similarity<LabeledVector<Integer>> sim, int window, int initialWindow) {
36 - super(meaningFetcher, featureFetcher, popularityMeasure, sim);
 33+ public SlidingCoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, Integer> featureFetcher, int cacheDepth, Measure<WikiWordConcept> popularityMeasure, Similarity<LabeledVector<Integer>> sim, int window, int initialWindow) {
 34+ this(new MeaningCache.Manager<LocalConcept>(meaningFetcher, cacheDepth),
 35+ new FeatureCache.Manager<LocalConcept, Integer>(featureFetcher, cacheDepth),
 36+ popularityMeasure, sim, window, initialWindow);
 37+ }
 38+
 39+ public SlidingCoherenceDisambiguator(MeaningCache.Manager<LocalConcept> meaningCacheManager, FeatureCache.Manager<LocalConcept, Integer> featureCacheManager, Measure<WikiWordConcept> popularityMeasure, Similarity<LabeledVector<Integer>> sim, int window, int initialWindow) {
 40+ super(meaningCacheManager, featureCacheManager, popularityMeasure, sim);
3741
3842 this.window = window;
3943 this.initialWindow = initialWindow;
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/PopularityDisambiguator.java
@@ -23,12 +23,16 @@
2424 protected Functor.Double weightBooster = SquareBooster.instance;
2525 protected Functor2.Double weigthCombiner = new ProductCombiner(); //NOTE: pop and weight are not in the same scale.
2626
27 - public PopularityDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher) {
28 - this(meaningFetcher, WikiWordConcept.theCardinality);
 27+ public PopularityDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, int cacheDepth) {
 28+ this(meaningFetcher, cacheDepth, WikiWordConcept.theCardinality);
2929 }
3030
31 - public PopularityDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, Measure<WikiWordConcept> popularityMeasure) {
32 - super(meaningFetcher);
 31+ public PopularityDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, int cacheDepth, Measure<WikiWordConcept> popularityMeasure) {
 32+ this(new MeaningCache.Manager<LocalConcept>(meaningFetcher, cacheDepth), popularityMeasure);
 33+ }
 34+
 35+ public PopularityDisambiguator(MeaningCache.Manager<LocalConcept> meaningCacheManager, Measure<WikiWordConcept> popularityMeasure) {
 36+ super(meaningCacheManager);
3337
3438 this.setPopularityMeasure(popularityMeasure);
3539 }
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/query/QueryConsole.java
@@ -451,7 +451,7 @@
452452 if (disambiguator==null) {
453453 StoredMeaningFetcher meaningFetcher = new StoredMeaningFetcher(getLocalConceptStore());
454454 StoredFeatureFetcher<LocalConcept, Integer> featureFetcher = new StoredFeatureFetcher<LocalConcept, Integer>(getFeatureStore());
455 - disambiguator = new SlidingCoherenceDisambiguator( meaningFetcher, featureFetcher );
 455+ disambiguator = new SlidingCoherenceDisambiguator( meaningFetcher, featureFetcher, 10 ); //FIXME: cache depth from config
456456
457457 LeveledOutput.Trace trace = new LeveledOutput.Trace(out);
458458 meaningFetcher.setTrace(trace);
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/WordSenseIndexer.java
@@ -83,7 +83,7 @@
8484
8585 StoredMeaningFetcher meaningFetcher = new StoredMeaningFetcher(getLocalConceptStore(), spec);
8686 StoredFeatureFetcher<LocalConcept, Integer> featureFetcher = new StoredFeatureFetcher<LocalConcept, Integer>(getFeatureStore());
87 - disambiguator = new SlidingCoherenceDisambiguator( meaningFetcher, featureFetcher );
 87+ disambiguator = new SlidingCoherenceDisambiguator( meaningFetcher, featureFetcher, 10 ); //FIXME: cache depth from config
8888
8989 Measure<WikiWordConcept> popularityMeasure = new Measure<WikiWordConcept>(){ //boost locations //FIXME: configure!
9090 public double measure(WikiWordConcept concept) {

Status & tagging log