Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/FeatureCache.java |
— | — | @@ -1,6 +1,7 @@ |
2 | 2 | package de.brightbyte.wikiword.disambig; |
3 | 3 | |
4 | 4 | import java.util.ArrayList; |
| 5 | +import java.util.Collection; |
5 | 6 | import java.util.HashMap; |
6 | 7 | import java.util.List; |
7 | 8 | import java.util.Map; |
— | — | @@ -33,7 +34,7 @@ |
34 | 35 | return f; |
35 | 36 | } |
36 | 37 | |
37 | | - public Map<Integer, ConceptFeatures<C, K>> getFeatures(List<C> concepts) throws PersistenceException { |
| 38 | + public Map<Integer, ConceptFeatures<C, K>> getFeatures(Collection<C> concepts) throws PersistenceException { |
38 | 39 | Map<Integer, ConceptFeatures<C, K>> features = new HashMap<Integer, ConceptFeatures<C, K>> (); |
39 | 40 | List<C> todo = new ArrayList<C>(concepts.size()); |
40 | 41 | for (C c: concepts) { |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java |
— | — | @@ -40,12 +40,17 @@ |
41 | 41 | if (window < 2 || terms.size()<2 || meanings.size()<2) |
42 | 42 | return popularityDisambiguator.disambiguate(terms, meanings); |
43 | 43 | |
| 44 | + pruneMeanings(meanings); |
| 45 | + |
| 46 | + if (meanings.size()<2) |
| 47 | + return popularityDisambiguator.disambiguate(terms, meanings); |
| 48 | + |
44 | 49 | //CAVEAT: because the map disambig can contain only one meaning per term, the same term can not occur with two meanings within the same term sequence. |
45 | 50 | |
46 | 51 | Map<String, LocalConcept> disambig = new HashMap<String, LocalConcept>(meanings.size()); |
47 | 52 | |
48 | 53 | LabeledMatrix<LocalConcept, LocalConcept> similarities = new MapLabeledMatrix<LocalConcept, LocalConcept>(true); |
49 | | - FeatureCache<LocalConcept, K> features = new FeatureCache<LocalConcept, K>(featureFetcher); //TODO: keep a chain of n caches, resulting in LRU logic. |
| 54 | + FeatureCache<LocalConcept, K> features = getFeatureCache(meanings); |
50 | 55 | |
51 | 56 | for (int i= window; ; i++) { |
52 | 57 | int from = i-window; |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/StoredMeaningFetcher.java |
— | — | @@ -3,6 +3,7 @@ |
4 | 4 | import java.util.List; |
5 | 5 | |
6 | 6 | import de.brightbyte.data.cursor.DataSet; |
| 7 | +import de.brightbyte.io.Output; |
7 | 8 | import de.brightbyte.util.PersistenceException; |
8 | 9 | import de.brightbyte.wikiword.model.LocalConcept; |
9 | 10 | import de.brightbyte.wikiword.store.LocalConceptStore; |
— | — | @@ -11,6 +12,7 @@ |
12 | 13 | public class StoredMeaningFetcher implements MeaningFetcher<LocalConcept> { |
13 | 14 | protected LocalConceptStore store; |
14 | 15 | protected ConceptQuerySpec spec; |
| 16 | + protected Output trace; |
15 | 17 | |
16 | 18 | public StoredMeaningFetcher(LocalConceptStore store) { |
17 | 19 | this(store, null); |
— | — | @@ -24,8 +26,21 @@ |
25 | 27 | } |
26 | 28 | |
27 | 29 | public List<LocalConcept> getMeanings(String term) throws PersistenceException { |
| 30 | + trace("fetching meanings for \""+term+"\""); |
28 | 31 | DataSet<LocalConcept> m = store.getMeanings(term, spec); //FIXME: filter/cut-off rules, sort order! //XXX: relevance value? |
29 | 32 | return m.load(); |
30 | 33 | } |
31 | 34 | |
| 35 | + public Output getTrace() { |
| 36 | + return trace; |
| 37 | + } |
| 38 | + |
| 39 | + public void setTrace(Output trace) { |
| 40 | + this.trace = trace; |
| 41 | + } |
| 42 | + |
| 43 | + protected void trace(String msg) { |
| 44 | + if (trace!=null) trace.println(msg); |
| 45 | + } |
| 46 | + |
32 | 47 | } |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/TermRelatedness.java |
— | — | @@ -60,8 +60,8 @@ |
61 | 61 | d = relatedness.similarity(ca, cb); |
62 | 62 | } |
63 | 63 | else { |
64 | | - d = r.getCoherence(); |
65 | | - if (d<0) throw new RuntimeException("disambiguator did not provide a coherence score, and no concept similarity measure was defined!"); |
| 64 | + d = r.getScore(); |
| 65 | + if (d<0) throw new RuntimeException("disambiguator did not provide a score, and no concept similarity measure was defined!"); |
66 | 66 | } |
67 | 67 | |
68 | 68 | return new Relatedness(d, ca, cb); |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/PopularityDisambiguator.java |
— | — | @@ -31,7 +31,7 @@ |
32 | 32 | int pop = 0; |
33 | 33 | for (String t: terms) { |
34 | 34 | List<LocalConcept> m = meanings.get(t); |
35 | | - if (m.size()==0) continue; |
| 35 | + if (m==null || m.size()==0) continue; |
36 | 36 | |
37 | 37 | if (m.size()>0) Collections.sort(m, popularityComparator); |
38 | 38 | |
— | — | @@ -43,7 +43,7 @@ |
44 | 44 | |
45 | 45 | pop = pop / disambig.size(); |
46 | 46 | |
47 | | - Result r = new Result(disambig, pop, -1, pop); |
| 47 | + Result r = new Result(disambig, pop, "pop="+pop); |
48 | 48 | return r; |
49 | 49 | } |
50 | 50 | |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java |
— | — | @@ -3,9 +3,12 @@ |
4 | 4 | import java.util.ArrayList; |
5 | 5 | import java.util.Collections; |
6 | 6 | import java.util.HashMap; |
| 7 | +import java.util.Iterator; |
7 | 8 | import java.util.List; |
8 | 9 | import java.util.Map; |
| 10 | +import java.util.Map.Entry; |
9 | 11 | |
| 12 | +import de.brightbyte.data.Functor; |
10 | 13 | import de.brightbyte.data.LabeledMatrix; |
11 | 14 | import de.brightbyte.data.LabeledVector; |
12 | 15 | import de.brightbyte.data.MapLabeledMatrix; |
— | — | @@ -20,15 +23,31 @@ |
21 | 24 | |
22 | 25 | public class CoherenceDisambiguator<K> extends AbstractDisambiguator { |
23 | 26 | |
24 | | - protected int minPopularity = 2; //FIXME: use complex cutoff specifier! |
25 | | - protected double scoreThreshold = 0.002; //FIXME: magic number |
26 | | - protected double popularityBias = 0.01; //FIXME: magic number |
| 27 | + protected int minPopularity = 2; //FIXME: use complex cutoff specifier! |
| 28 | + protected int maxMeanings = 8; //FIXME: magic... |
27 | 29 | |
| 30 | + protected double minScore = 0.1; //FIXME: magic number. should "somehow" match popularityFactor and similarityFactor |
| 31 | + protected double popularityBias = 0.2; //FIXME: magic number. should "somehow" match popularityFactor and similarityFactor |
| 32 | + |
28 | 33 | protected Similarity<LabeledVector<K>> similarityMeasure; |
29 | 34 | protected FeatureFetcher<LocalConcept, K> featureFetcher; |
30 | 35 | protected Measure<WikiWordConcept> popularityMeasure; |
31 | 36 | protected PopularityDisambiguator popularityDisambiguator; |
32 | 37 | |
| 38 | + private Functor.Double popularityFactor = new Functor.Double() { //NOTE: must map [0:inf] to [0:1] and grow monotonously |
| 39 | + |
| 40 | + public double apply(double pop) { |
| 41 | + return 1 - 1/(Math.sqrt(Math.log(pop))+1); //XXX: black voodoo magic ad hoc formula with no deeper meaing. |
| 42 | + } |
| 43 | + |
| 44 | + }; |
| 45 | + |
| 46 | + private Functor.Double similarityFactor = new Functor.Double() { //NOTE: must map [0:1] to [0:1] and grow monotonously |
| 47 | + public double apply(double sim) { |
| 48 | + return Math.sqrt(Math.sqrt(sim)); //XXX: black voodoo magic ad hoc formula with no deeper meaing. |
| 49 | + } |
| 50 | + }; |
| 51 | + |
33 | 52 | public CoherenceDisambiguator(MeaningFetcher<LocalConcept> meaningFetcher, FeatureFetcher<LocalConcept, K> featureFetcher, boolean featuresAreNormalized) { |
34 | 53 | this(meaningFetcher, featureFetcher, WikiWordConcept.theCardinality, |
35 | 54 | featuresAreNormalized ? ScalarVectorSimilarity.<K>getInstance() : CosineVectorSimilarity.<K>getInstance()); //if pre-normalized, use scalar to calc cosin |
— | — | @@ -80,14 +99,37 @@ |
81 | 100 | this.minPopularity = min; |
82 | 101 | } |
83 | 102 | |
84 | | - public double getScoreThreshold() { |
85 | | - return scoreThreshold; |
| 103 | + public double getMinScore() { |
| 104 | + return minScore; |
86 | 105 | } |
87 | 106 | |
88 | | - public void setScoreThreshold(double threshold) { |
89 | | - this.scoreThreshold = threshold; |
| 107 | + public void setMinScore(double threshold) { |
| 108 | + this.minScore = threshold; |
90 | 109 | } |
91 | 110 | |
| 111 | + public int getMaxMeanings() { |
| 112 | + return maxMeanings; |
| 113 | + } |
| 114 | + |
| 115 | + public void setMaxMeanings(int maxMeanings) { |
| 116 | + this.maxMeanings = maxMeanings; |
| 117 | + } |
| 118 | + |
| 119 | + protected FeatureCache<LocalConcept, K> getFeatureCache(Map<String, List<LocalConcept>> meanings) throws PersistenceException { |
| 120 | + //TODO: keep a chain of n caches, resulting in LRU logic. |
| 121 | + FeatureCache<LocalConcept, K> features = new FeatureCache<LocalConcept, K>(featureFetcher); |
| 122 | + |
| 123 | + //NOTE: pre-fetch all features in one go |
| 124 | + List<LocalConcept> concepts = new ArrayList<LocalConcept>(meanings.size()*10); |
| 125 | + for (List<LocalConcept> m: meanings.values()) { |
| 126 | + concepts.addAll(m); |
| 127 | + } |
| 128 | + |
| 129 | + features.getFeatures(concepts); |
| 130 | + |
| 131 | + return features; |
| 132 | + } |
| 133 | + |
92 | 134 | /* (non-Javadoc) |
93 | 135 | * @see de.brightbyte.wikiword.disambig.Disambiguator#disambiguate(java.util.List) |
94 | 136 | */ |
— | — | @@ -95,25 +137,64 @@ |
96 | 138 | if (terms.size()<2 || meanings.size()<2) |
97 | 139 | return popularityDisambiguator.disambiguate(terms, meanings); |
98 | 140 | |
| 141 | + pruneMeanings(meanings); |
| 142 | + |
| 143 | + if (meanings.size()<2) |
| 144 | + return popularityDisambiguator.disambiguate(terms, meanings); |
| 145 | + |
| 146 | + //CAVEAT: because the map disambig can contain only one meaning per term, the same term can not occur with two meanings within the same term sequence. |
| 147 | + |
99 | 148 | LabeledMatrix<LocalConcept, LocalConcept> similarities = new MapLabeledMatrix<LocalConcept, LocalConcept>(true); |
100 | | - FeatureCache<LocalConcept, K> features = new FeatureCache<LocalConcept, K>(featureFetcher); //TODO: keep a chain of n caches, resulting in LRU logic. |
| 149 | + FeatureCache<LocalConcept, K> features = getFeatureCache(meanings); |
101 | 150 | |
102 | 151 | List<Map<String, LocalConcept>> interpretations = getInterpretations(terms, meanings); |
103 | 152 | |
104 | 153 | return getBestInterpretation(terms, meanings, interpretations, similarities, features); |
105 | 154 | } |
106 | 155 | |
| 156 | + protected void pruneMeanings(Map<String, List<LocalConcept>> meanings) { |
| 157 | + if (minPopularity<=1) return; //nothing to do |
| 158 | + |
| 159 | + Iterator<Map.Entry<String, List<LocalConcept>>> eit = meanings.entrySet().iterator(); |
| 160 | + while (eit.hasNext()) { |
| 161 | + Entry<String, List<LocalConcept>> e = eit.next(); |
| 162 | + List<LocalConcept> m = e.getValue(); |
| 163 | + |
| 164 | + Iterator<LocalConcept> cit = m.iterator(); |
| 165 | + while (cit.hasNext()) { |
| 166 | + LocalConcept c = cit.next(); |
| 167 | + double p = popularityMeasure.measure(c); |
| 168 | + |
| 169 | + if (p<minPopularity) { |
| 170 | + cit.remove(); |
| 171 | + } |
| 172 | + } |
| 173 | + |
| 174 | + if (m.size()==0) eit.remove(); |
| 175 | + else if (m.size()>maxMeanings) { |
| 176 | + Collections.sort(m, WikiWordConcept.byCardinality); |
| 177 | + m = m.subList(0, maxMeanings); |
| 178 | + e.setValue(m); |
| 179 | + } |
| 180 | + } |
| 181 | + } |
| 182 | + |
107 | 183 | protected Result getBestInterpretation(List<String> terms, Map<String, List<LocalConcept>> meanings, |
108 | 184 | List<Map<String, LocalConcept>> interpretations, |
109 | 185 | LabeledMatrix<LocalConcept, LocalConcept> similarities, FeatureCache<LocalConcept, K> features) throws PersistenceException { |
110 | 186 | |
111 | 187 | List<Result> rankings = new ArrayList<Result>(); |
112 | 188 | |
| 189 | + double traceLimit = -1; |
113 | 190 | for (Map<String, LocalConcept> interp: interpretations) { |
114 | 191 | Result r = getScore(interp, similarities, features); |
115 | | - if (r.getScore() <= scoreThreshold) continue; |
116 | 192 | |
117 | | - rankings.add(r); |
| 193 | + if (r.getScore() >= minScore) { |
| 194 | + rankings.add(r); |
| 195 | + |
| 196 | + if (traceLimit<0) traceLimit = r.getScore() / 2; |
| 197 | + if (r.getScore() >= traceLimit && rankings.size()<=10) trace(" = "+r); |
| 198 | + } |
118 | 199 | } |
119 | 200 | |
120 | 201 | if (rankings.size()==0) { |
— | — | @@ -123,20 +204,6 @@ |
124 | 205 | Collections.sort(rankings); |
125 | 206 | Collections.reverse(rankings); |
126 | 207 | |
127 | | - if (trace!=null) { |
128 | | - int c = 0; |
129 | | - double limit = -1; |
130 | | - for (Result r: rankings) { |
131 | | - if (limit<0) limit = r.getScore() / 2; |
132 | | - else if (r.getScore()<limit) break; |
133 | | - |
134 | | - trace(" = "+r); |
135 | | - c++; |
136 | | - |
137 | | - if (c>10) break; |
138 | | - } |
139 | | - } |
140 | | - |
141 | 208 | //TODO: if result is tight (less than 50% distance), use more popularity score! |
142 | 209 | Result r = rankings.get(0); |
143 | 210 | return r; |
— | — | @@ -158,9 +225,6 @@ |
159 | 226 | |
160 | 227 | for (Map<String, LocalConcept> be: base) { |
161 | 228 | for (LocalConcept c: m) { |
162 | | - double p = popularityMeasure.measure(c); |
163 | | - if (p<minPopularity) continue; |
164 | | - |
165 | 229 | Map<String, LocalConcept> e = new HashMap<String, LocalConcept>(); |
166 | 230 | e.putAll(be); |
167 | 231 | e.put(t, c); |
— | — | @@ -169,7 +233,7 @@ |
170 | 234 | } |
171 | 235 | } |
172 | 236 | |
173 | | - trace(" ~ "+t+": "+interpretations.size()+" combinations"); |
| 237 | + trace(" ~ "+t+": "+m.size()+" meanings; collected "+interpretations.size()+" combinations"); |
174 | 238 | return interpretations; |
175 | 239 | } |
176 | 240 | |
— | — | @@ -222,13 +286,17 @@ |
223 | 287 | } |
224 | 288 | |
225 | 289 | //normalize |
226 | | - sim = sim / n; |
227 | | - pop = pop / c; |
| 290 | + sim = sim / n; //normalize |
| 291 | + pop = pop / c; //normalize |
228 | 292 | |
229 | | - double popf = 1 - 1/(Math.sqrt(pop)+1); //converge against 1 //XXX: black voodoo magic ad hoc formula with no deeper meaing. |
| 293 | + double popf = popularityFactor.apply(pop); |
| 294 | + double simf = similarityFactor.apply(sim); |
230 | 295 | |
231 | | - double score = popf * popularityBias + sim * ( 1 - popularityBias ); |
232 | | - return new Result(interp, score, sim, pop); |
| 296 | + //FIXME: functor! |
| 297 | + double score = popf * popularityBias + simf * ( 1 - popularityBias ); |
| 298 | + //double score = Math.sqrt( popf * simf ); //FIXME: functor! |
| 299 | + |
| 300 | + return new Result(interp, score, "simf="+simf+", popf="+popf+", sim="+sim+", pop="+pop); |
233 | 301 | } |
234 | 302 | |
235 | 303 | } |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/Disambiguator.java |
— | — | @@ -12,15 +12,13 @@ |
13 | 13 | public static class Result implements Comparable { |
14 | 14 | private Map<String, ? extends WikiWordConcept> meanings; |
15 | 15 | private double score; |
16 | | - private double coherence; |
17 | | - private double popularity; |
| 16 | + private String description; |
18 | 17 | |
19 | | - public Result(Map<String, ? extends WikiWordConcept> meanings, double score, double coherence, double popularity) { |
| 18 | + public Result(Map<String, ? extends WikiWordConcept> meanings, double score, String description) { |
20 | 19 | super(); |
21 | 20 | this.meanings = meanings; |
22 | 21 | this.score = score; |
23 | | - this.coherence = coherence; |
24 | | - this.popularity = popularity; |
| 22 | + this.description = description; |
25 | 23 | } |
26 | 24 | |
27 | 25 | public Map<String, ? extends WikiWordConcept> getMeanings() { |
— | — | @@ -31,17 +29,13 @@ |
32 | 30 | return score; |
33 | 31 | } |
34 | 32 | |
35 | | - public double getCoherence() { |
36 | | - return coherence; |
| 33 | + public String getDescription() { |
| 34 | + return description; |
37 | 35 | } |
38 | 36 | |
39 | | - public double getPopularity() { |
40 | | - return popularity; |
41 | | - } |
42 | | - |
43 | 37 | @Override |
44 | 38 | public String toString() { |
45 | | - return "("+score+"|"+coherence+"&"+popularity+") "+meanings; |
| 39 | + return "("+score+"|"+description+") "+meanings; |
46 | 40 | } |
47 | 41 | |
48 | 42 | public int compareTo(Object o) { |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/FeatureFetcher.java |
— | — | @@ -1,5 +1,6 @@ |
2 | 2 | package de.brightbyte.wikiword.disambig; |
3 | 3 | |
| 4 | +import java.util.Collection; |
4 | 5 | import java.util.List; |
5 | 6 | import java.util.Map; |
6 | 7 | |
— | — | @@ -9,5 +10,5 @@ |
10 | 11 | |
11 | 12 | public interface FeatureFetcher<C extends WikiWordConcept, K> { |
12 | 13 | public ConceptFeatures<C, K> getFeatures(C c) throws PersistenceException; |
13 | | - public Map<Integer, ConceptFeatures<C, K>> getFeatures(List<C> c) throws PersistenceException; |
| 14 | + public Map<Integer, ConceptFeatures<C, K>> getFeatures(Collection<C> c) throws PersistenceException; |
14 | 15 | } |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/StoredFeatureFetcher.java |
— | — | @@ -1,8 +1,9 @@ |
2 | 2 | package de.brightbyte.wikiword.disambig; |
3 | 3 | |
4 | | -import java.util.List; |
| 4 | +import java.util.Collection; |
5 | 5 | import java.util.Map; |
6 | 6 | |
| 7 | +import de.brightbyte.io.Output; |
7 | 8 | import de.brightbyte.util.PersistenceException; |
8 | 9 | import de.brightbyte.wikiword.model.ConceptFeatures; |
9 | 10 | import de.brightbyte.wikiword.model.WikiWordConcept; |
— | — | @@ -10,6 +11,7 @@ |
11 | 12 | |
12 | 13 | public class StoredFeatureFetcher<C extends WikiWordConcept, K> implements FeatureFetcher<C, K> { |
13 | 14 | protected FeatureStore<C, K> store; |
| 15 | + protected Output trace; |
14 | 16 | |
15 | 17 | public StoredFeatureFetcher(FeatureStore<C, K> store) { |
16 | 18 | if (store==null) throw new NullPointerException(); |
— | — | @@ -18,14 +20,29 @@ |
19 | 21 | } |
20 | 22 | |
21 | 23 | public ConceptFeatures<C, K> getFeatures(C c) throws PersistenceException { |
| 24 | + trace("fetching features for "+c); |
22 | 25 | return store.getConceptFeatures(c.getId()); |
23 | 26 | } |
24 | 27 | |
25 | | - public Map<Integer, ConceptFeatures<C, K>> getFeatures(List<C> concepts) throws PersistenceException { |
| 28 | + public Map<Integer, ConceptFeatures<C, K>> getFeatures(Collection<C> concepts) throws PersistenceException { |
| 29 | + trace("fetching features for "+concepts); |
| 30 | + |
26 | 31 | int[] ids = new int[concepts.size()]; |
27 | 32 | int i = 0; |
28 | 33 | for (C c: concepts) ids[i++] = c.getId(); |
29 | 34 | return store.getConceptsFeatures(ids); |
30 | 35 | } |
31 | 36 | |
| 37 | + public Output getTrace() { |
| 38 | + return trace; |
| 39 | + } |
| 40 | + |
| 41 | + public void setTrace(Output trace) { |
| 42 | + this.trace = trace; |
| 43 | + } |
| 44 | + |
| 45 | + protected void trace(String msg) { |
| 46 | + if (trace!=null) trace.println(msg); |
| 47 | + } |
| 48 | + |
32 | 49 | } |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/AbstractDisambiguator.java |
— | — | @@ -1,13 +1,11 @@ |
2 | 2 | package de.brightbyte.wikiword.disambig; |
3 | 3 | |
4 | | -import java.util.ArrayList; |
5 | 4 | import java.util.HashMap; |
6 | 5 | import java.util.List; |
7 | 6 | import java.util.Map; |
8 | 7 | |
9 | 8 | import de.brightbyte.io.Output; |
10 | 9 | import de.brightbyte.util.PersistenceException; |
11 | | -import de.brightbyte.wikiword.disambig.Disambiguator.Result; |
12 | 10 | import de.brightbyte.wikiword.model.LocalConcept; |
13 | 11 | |
14 | 12 | public abstract class AbstractDisambiguator implements Disambiguator { |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/ConsoleApp.java |
— | — | @@ -1,21 +1,24 @@ |
2 | 2 | package de.brightbyte.wikiword; |
3 | 3 | |
4 | 4 | import java.util.ArrayList; |
5 | | -import java.util.Arrays; |
6 | 5 | import java.util.Collections; |
7 | 6 | import java.util.List; |
8 | 7 | |
9 | 8 | import de.brightbyte.io.Prompt; |
10 | 9 | import de.brightbyte.util.PersistenceException; |
| 10 | +import de.brightbyte.util.StructuredDataCodec; |
11 | 11 | import de.brightbyte.wikiword.store.WikiWordConceptStoreBase; |
12 | 12 | |
13 | 13 | public abstract class ConsoleApp<S extends WikiWordConceptStoreBase> extends StoreBackedApp<S> { |
14 | 14 | |
15 | 15 | protected Prompt prompt; |
| 16 | + protected StructuredDataCodec commandCodec; |
16 | 17 | |
17 | 18 | public ConsoleApp(boolean allowGlobal, boolean allowLocal) { |
18 | 19 | super(allowGlobal, allowLocal); |
19 | 20 | prompt = new Prompt(); |
| 21 | + commandCodec = new StructuredDataCodec(); |
| 22 | + commandCodec.setLenient(true); |
20 | 23 | } |
21 | 24 | |
22 | 25 | @Override |
— | — | @@ -27,13 +30,13 @@ |
28 | 31 | echo("hello"); |
29 | 32 | |
30 | 33 | while (true) { |
31 | | - List<String> params= promptCommand(); |
| 34 | + List<Object> params= promptCommand(); |
32 | 35 | if (params==null) break; |
33 | 36 | if (params.size()==0) continue; |
34 | 37 | |
35 | | - params = new ArrayList<String>(params); //make modifiable |
| 38 | + params = new ArrayList<Object>(params); //modifyable |
36 | 39 | |
37 | | - String cmd = params.get(0); |
| 40 | + String cmd = params.get(0).toString(); |
38 | 41 | cmd = cmd.trim().toLowerCase(); |
39 | 42 | |
40 | 43 | if (cmd.equals("quit") || cmd.equals("exit") || cmd.equals("q")) break; |
— | — | @@ -58,15 +61,16 @@ |
59 | 62 | // noop |
60 | 63 | } |
61 | 64 | |
62 | | - public List<String> promptCommand() { |
| 65 | + public List<Object> promptCommand() { |
63 | 66 | String s = prompt.prompt(">", ""); |
64 | 67 | if (s==null) return null; |
65 | 68 | |
66 | 69 | s = s.replaceAll("^\\s*|\\s*[;]\\s*$", ""); |
67 | 70 | if (s.length()==0) return Collections.emptyList(); |
68 | 71 | |
69 | | - String[] ss = s.split("\\s+"); |
70 | | - return Arrays.asList(ss); |
| 72 | + if (s.startsWith("#") || s.startsWith(";") || s.startsWith("//")) return Collections.emptyList(); |
| 73 | + |
| 74 | + return commandCodec.decodeList(s); |
71 | 75 | } |
72 | 76 | |
73 | 77 | public String prompt(String m, List<String> options, String def) { |
— | — | @@ -75,15 +79,15 @@ |
76 | 80 | return s; |
77 | 81 | } |
78 | 82 | |
79 | | - protected void beforeCommand(List<String> params) throws Exception { |
| 83 | + protected void beforeCommand(List<Object> params) throws Exception { |
80 | 84 | //noop |
81 | 85 | } |
82 | 86 | |
83 | | - protected void afterCommand(List<String> params) throws Exception { |
| 87 | + protected void afterCommand(List<Object> params) throws Exception { |
84 | 88 | //noop |
85 | 89 | } |
86 | 90 | |
87 | | - public abstract void runCommand(List<String> params) throws Exception; |
| 91 | + public abstract void runCommand(List<Object> params) throws Exception; |
88 | 92 | |
89 | 93 | protected void startEcho(String msg) { |
90 | 94 | prompt.print(msg); |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/DatabaseLocalConceptStore.java |
— | — | @@ -307,6 +307,8 @@ |
308 | 308 | tables += " LEFT JOIN "+conceptDescriptionTable.getSQLName()+" as D ON D.concept = C.id "; |
309 | 309 | } |
310 | 310 | |
| 311 | + //TODO: include features! |
| 312 | + |
311 | 313 | String sql = "SELECT " + fields + " FROM " + tables; |
312 | 314 | return sql; |
313 | 315 | } |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/query/QueryConsole.java |
— | — | @@ -13,6 +13,8 @@ |
14 | 14 | |
15 | 15 | import de.brightbyte.data.LabeledVector; |
16 | 16 | import de.brightbyte.data.cursor.DataSet; |
| 17 | +import de.brightbyte.db.DatabaseUtil; |
| 18 | +import de.brightbyte.io.LeveledOutput; |
17 | 19 | import de.brightbyte.rdf.RdfException; |
18 | 20 | import de.brightbyte.util.PersistenceException; |
19 | 21 | import de.brightbyte.wikiword.ConsoleApp; |
— | — | @@ -312,20 +314,20 @@ |
313 | 315 | } |
314 | 316 | |
315 | 317 | @Override |
316 | | - public void runCommand(List<String> params) throws Exception { |
317 | | - String cmd = params.get(0); |
| 318 | + public void runCommand(List<Object> params) throws Exception { |
| 319 | + String cmd = params.get(0).toString(); |
318 | 320 | cmd = cmd.trim().toLowerCase(); |
319 | 321 | |
320 | 322 | String format = null; |
321 | 323 | File target = null; |
322 | 324 | |
323 | | - if (params.size()>1 && params.get(params.size()-1).startsWith(">")) { |
324 | | - target = new File( params.get(params.size()-1).substring(1).trim() ); |
| 325 | + if (params.size()>1 && params.get(params.size()-1).toString().startsWith(">")) { |
| 326 | + target = new File( params.get(params.size()-1).toString().substring(1).trim() ); |
325 | 327 | params = params.subList(0, params.size()-1); |
326 | 328 | } |
327 | 329 | |
328 | | - if (params.size()>1 && params.get(params.size()-1).startsWith("|")) { |
329 | | - format = params.get(params.size()-1).substring(1).trim(); |
| 330 | + if (params.size()>1 && params.get(params.size()-1).toString().startsWith("|")) { |
| 331 | + format = params.get(params.size()-1).toString().substring(1).trim(); |
330 | 332 | params = params.subList(0, params.size()-1); |
331 | 333 | } |
332 | 334 | |
— | — | @@ -337,43 +339,47 @@ |
338 | 340 | } |
339 | 341 | else if (cmd.equals("m") || cmd.equals("mng") || cmd.equals("meanings")) { |
340 | 342 | if (isGlobalThesaurus()) { |
341 | | - String lang = params.get(1); |
342 | | - String term = params.get(2); |
| 343 | + String lang = params.get(1).toString(); |
| 344 | + String term = params.get(2).toString(); |
343 | 345 | listMeaningsGlobal(lang, term, out); |
344 | 346 | } |
345 | 347 | else { |
346 | | - String term = params.get(1); |
| 348 | + String term = params.get(1).toString(); |
347 | 349 | listMeaningsLocal(term, out); |
348 | 350 | } |
349 | 351 | } |
350 | 352 | else if (cmd.equals("s") || cmd.equals("cat") || cmd.equals("show")) { |
351 | 353 | if (params.size()>2 && isGlobalThesaurus()) { |
352 | | - String id = params.get(1); |
353 | | - String lang = params.get(2); |
354 | | - showConcept(Integer.parseInt(id), lang, out); |
| 354 | + int id = DatabaseUtil.asInt(params.get(1)); |
| 355 | + String lang = params.get(2).toString(); |
| 356 | + showConcept(id, lang, out); |
355 | 357 | } |
356 | 358 | else { |
357 | | - String id = params.get(1); |
358 | | - showConcept(Integer.parseInt(id), out); |
| 359 | + int id = DatabaseUtil.asInt(params.get(1)); |
| 360 | + showConcept(id, out); |
359 | 361 | } |
360 | 362 | } |
361 | 363 | else if (cmd.equals("e") || cmd.equals("env") || cmd.equals("environment")) { |
362 | 364 | if (params.size()>2 ) { |
363 | | - String id = params.get(1); |
364 | | - String min = params.get(2); |
365 | | - showEnvironment(Integer.parseInt(id), Double.parseDouble(min), out); |
| 365 | + int id = DatabaseUtil.asInt(params.get(1)); |
| 366 | + String min = params.get(2).toString(); |
| 367 | + showEnvironment(id, Double.parseDouble(min), out); |
366 | 368 | } |
367 | 369 | else { |
368 | | - String id = params.get(1); |
369 | | - showEnvironment(Integer.parseInt(id), 0, out); |
| 370 | + int id = DatabaseUtil.asInt(params.get(1)); |
| 371 | + showEnvironment(id, 0, out); |
370 | 372 | } |
371 | 373 | } |
372 | 374 | else if (cmd.equals("f") || cmd.equals("feat") || cmd.equals("features")) { |
373 | | - String id = params.get(1); |
374 | | - showFeatureVector(Integer.parseInt(id), out); |
| 375 | + int id = DatabaseUtil.asInt(params.get(1)); |
| 376 | + showFeatureVector(id, out); |
375 | 377 | } |
376 | 378 | else if (cmd.equals("d") || cmd.equals("dis") || cmd.equals("disambig") || cmd.equals("disambiguate")) { |
377 | | - List<String> terms = params.subList(1,params.size()); |
| 379 | + List<String> terms = new ArrayList<String>(params.size()-1); |
| 380 | + for (Object t: params.subList(1,params.size())) { |
| 381 | + terms.add(t.toString()); |
| 382 | + } |
| 383 | + |
378 | 384 | showDisambiguation(terms, out); |
379 | 385 | } |
380 | 386 | else if (cmd.equals("ls") || cmd.equals("list")) { |
— | — | @@ -406,12 +412,16 @@ |
407 | 413 | } |
408 | 414 | |
409 | 415 | protected Disambiguator getDisambiguator() throws PersistenceException { |
410 | | - if (disambiguator==null) disambiguator = |
411 | | - new SlidingCoherenceDisambiguator<Integer>( |
412 | | - new StoredMeaningFetcher(getLocalConceptStore()), |
413 | | - new StoredFeatureFetcher<LocalConcept, Integer>(getFeatureStore()), |
414 | | - true |
415 | | - ); |
| 416 | + if (disambiguator==null) { |
| 417 | + StoredMeaningFetcher meaningFetcher = new StoredMeaningFetcher(getLocalConceptStore()); |
| 418 | + StoredFeatureFetcher<LocalConcept, Integer> featureFetcher = new StoredFeatureFetcher<LocalConcept, Integer>(getFeatureStore()); |
| 419 | + disambiguator = new SlidingCoherenceDisambiguator<Integer>( meaningFetcher, featureFetcher, true ); |
| 420 | + |
| 421 | + LeveledOutput.Trace trace = new LeveledOutput.Trace(out); |
| 422 | + meaningFetcher.setTrace(trace); |
| 423 | + featureFetcher.setTrace(trace); |
| 424 | + disambiguator.setTrace(trace); |
| 425 | + } |
416 | 426 | |
417 | 427 | return disambiguator; |
418 | 428 | } |