Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/FilteredMappingCandidateCursor.java |
— | — | @@ -19,18 +19,18 @@ |
20 | 20 | protected MappingCandidateFilter filter; |
21 | 21 | |
22 | 22 | public FilteredMappingCandidateCursor(DataCursor<MappingCandidates> cursor, String field) { |
23 | | - this(cursor, FeatureSets.fieldAccessor(field, Integer.class)); |
| 23 | + this(cursor, new Record.Accessor<Integer>(field, Integer.class)); |
24 | 24 | } |
25 | 25 | |
26 | 26 | public FilteredMappingCandidateCursor(DataCursor<MappingCandidates> cursor, String field, int threshold) { |
27 | | - this(cursor, FeatureSets.fieldAccessor(field, Integer.class), threshold); |
| 27 | + this(cursor, new Record.Accessor<Integer>(field, Integer.class), threshold); |
28 | 28 | } |
29 | 29 | |
30 | | - public FilteredMappingCandidateCursor(DataCursor<MappingCandidates> cursor, PropertyAccessor<FeatureSet, ? extends Number> accessor) { |
| 30 | + public FilteredMappingCandidateCursor(DataCursor<MappingCandidates> cursor, PropertyAccessor<Record, ? extends Number> accessor) { |
31 | 31 | this(cursor, new MappingCandidateFeatureScorer(accessor)); |
32 | 32 | } |
33 | 33 | |
34 | | - public FilteredMappingCandidateCursor(DataCursor<MappingCandidates> cursor, PropertyAccessor<FeatureSet, ? extends Number> accessor, int threshold) { |
| 34 | + public FilteredMappingCandidateCursor(DataCursor<MappingCandidates> cursor, PropertyAccessor<Record, ? extends Number> accessor, int threshold) { |
35 | 35 | this(cursor, new MappingCandidateFeatureScorer(accessor), threshold); |
36 | 36 | } |
37 | 37 | |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/FeatureSet.java |
— | — | @@ -2,6 +2,9 @@ |
3 | 3 | |
4 | 4 | import java.util.Collection; |
5 | 5 | |
| 6 | +import de.brightbyte.abstraction.AbstractAccessor; |
| 7 | +import de.brightbyte.abstraction.PropertyAccessor; |
| 8 | +import de.brightbyte.data.Aggregator; |
6 | 9 | import de.brightbyte.data.LabeledVector; |
7 | 10 | |
8 | 11 | public interface FeatureSet { |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/MappingCandidateSelectorFilter.java |
— | — | @@ -3,7 +3,7 @@ |
4 | 4 | import java.util.Collection; |
5 | 5 | import java.util.Collections; |
6 | 6 | |
7 | | -import de.brightbyte.wikiword.integrator.data.FeatureSet; |
| 7 | +import de.brightbyte.wikiword.integrator.data.ConceptEntityRecord; |
8 | 8 | import de.brightbyte.wikiword.integrator.data.MappingCandidates; |
9 | 9 | |
10 | 10 | /** |
— | — | @@ -21,8 +21,8 @@ |
22 | 22 | this.selector = selector; |
23 | 23 | } |
24 | 24 | |
25 | | - public Collection<FeatureSet> filterCandidates(MappingCandidates candidates) { |
26 | | - FeatureSet selected = selector.selectCandidate(candidates); |
| 25 | + public Collection<ConceptEntityRecord> filterCandidates(MappingCandidates candidates) { |
| 26 | + ConceptEntityRecord selected = selector.selectCandidate(candidates); |
27 | 27 | if (selected==null) return Collections.emptyList(); |
28 | 28 | else return Collections.singleton(selected); |
29 | 29 | } |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/UniqueMappingCandidateSelector.java |
— | — | @@ -2,7 +2,7 @@ |
3 | 3 | |
4 | 4 | import java.util.Collection; |
5 | 5 | |
6 | | -import de.brightbyte.wikiword.integrator.data.FeatureSet; |
| 6 | +import de.brightbyte.wikiword.integrator.data.ConceptEntityRecord; |
7 | 7 | import de.brightbyte.wikiword.integrator.data.MappingCandidates; |
8 | 8 | |
9 | 9 | /** |
— | — | @@ -13,8 +13,8 @@ |
14 | 14 | */ |
15 | 15 | public class UniqueMappingCandidateSelector implements MappingCandidateSelector { |
16 | 16 | |
17 | | - public FeatureSet selectCandidate(MappingCandidates candidates) { |
18 | | - Collection<FeatureSet> cand = candidates.getCandidates(); |
| 17 | + public ConceptEntityRecord selectCandidate(MappingCandidates candidates) { |
| 18 | + Collection<ConceptEntityRecord> cand = candidates.getCandidates(); |
19 | 19 | |
20 | 20 | if (cand.size()==1) return cand.iterator().next(); |
21 | 21 | else return null; |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/MappingCandidateMultiScorer.java |
— | — | @@ -4,22 +4,23 @@ |
5 | 5 | import java.util.Collection; |
6 | 6 | |
7 | 7 | import de.brightbyte.data.Functor2; |
8 | | -import de.brightbyte.wikiword.integrator.data.FeatureSet; |
| 8 | +import de.brightbyte.wikiword.integrator.data.ConceptEntityRecord; |
| 9 | +import de.brightbyte.wikiword.integrator.data.ForeignEntityRecord; |
9 | 10 | |
10 | 11 | /** |
11 | 12 | * MappingCandidateScorer that accumulates the scores from multiple MappingCandidateScorers into a single score, |
12 | | - * using a aggregator functor (usually the sum or maximum, as implemented by Functors.Integer.sum resp. Functors.Integer.max). |
| 13 | + * using a accumulator functor (usually the sum or maximum, as implemented by Functors.Integer.sum resp. Functors.Integer.max). |
13 | 14 | * |
14 | 15 | * @author daniel |
15 | 16 | */ |
16 | 17 | public class MappingCandidateMultiScorer implements MappingCandidateScorer { |
17 | 18 | |
18 | 19 | protected Collection<MappingCandidateScorer> scorers = new ArrayList<MappingCandidateScorer>(); |
19 | | - protected Functor2<? extends Number, Number, Number> aggregator; |
| 20 | + protected Functor2<? extends Number, Number, Number> accumulator; |
20 | 21 | |
21 | | - public MappingCandidateMultiScorer(Functor2<? extends Number, Number, Number> aggregator, MappingCandidateScorer... scorers) { |
22 | | - if (aggregator==null) throw new NullPointerException(); |
23 | | - this.aggregator = aggregator; |
| 22 | + public MappingCandidateMultiScorer(Functor2<? extends Number, Number, Number> accumulator, MappingCandidateScorer... scorers) { |
| 23 | + if (accumulator==null) throw new NullPointerException(); |
| 24 | + this.accumulator = accumulator; |
24 | 25 | |
25 | 26 | for (MappingCandidateScorer scorer: scorers) { |
26 | 27 | addScorer(scorer); |
— | — | @@ -31,12 +32,12 @@ |
32 | 33 | scorers.add(scorer); |
33 | 34 | } |
34 | 35 | |
35 | | - public int getCandidateScore(FeatureSet subject, FeatureSet candidate) { |
| 36 | + public int getCandidateScore(ForeignEntityRecord subject, ConceptEntityRecord candidate) { |
36 | 37 | Number acc = null; |
37 | 38 | for (MappingCandidateScorer scorer: scorers) { |
38 | 39 | Number score = scorer.getCandidateScore(subject, candidate); |
39 | 40 | if (acc==null) acc = score; |
40 | | - else acc = aggregator.apply(acc, score); |
| 41 | + else acc = accumulator.apply(acc, score); |
41 | 42 | } |
42 | 43 | |
43 | 44 | return acc==null ? 0 : acc.intValue(); |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/MappingCandidateFeatureScorer.java |
— | — | @@ -1,8 +1,9 @@ |
2 | 2 | package de.brightbyte.wikiword.integrator.data.filter; |
3 | 3 | |
4 | 4 | import de.brightbyte.abstraction.PropertyAccessor; |
5 | | -import de.brightbyte.wikiword.integrator.data.FeatureSet; |
6 | | -import de.brightbyte.wikiword.integrator.data.FeatureSets; |
| 5 | +import de.brightbyte.wikiword.integrator.data.ConceptEntityRecord; |
| 6 | +import de.brightbyte.wikiword.integrator.data.ForeignEntityRecord; |
| 7 | +import de.brightbyte.wikiword.integrator.data.Record; |
7 | 8 | |
8 | 9 | /** |
9 | 10 | * MappingCandidateScorer that determins the score directly from each FeatureSet's features, |
— | — | @@ -12,7 +13,7 @@ |
13 | 14 | */ |
14 | 15 | public class MappingCandidateFeatureScorer implements MappingCandidateScorer { |
15 | 16 | |
16 | | - protected PropertyAccessor<FeatureSet, ? extends Number> accessor; |
| 17 | + protected PropertyAccessor<Record, ? extends Number> accessor; |
17 | 18 | |
18 | 19 | /** |
19 | 20 | * Creates a MappingCandidateFeatureScorer that uses the givenfeature's value as the |
— | — | @@ -21,19 +22,19 @@ |
22 | 23 | * @param field feature |
23 | 24 | */ |
24 | 25 | public MappingCandidateFeatureScorer(String feature) { |
25 | | - this(FeatureSets.fieldAccessor(feature, Number.class)); |
| 26 | + this(new Record.Accessor<Number>(feature, Number.class)); |
26 | 27 | } |
27 | 28 | |
28 | 29 | /** |
29 | 30 | * Creates a MappingCandidateFeatureScorer that determins the score using the given accessor. |
30 | 31 | * @param field accessor |
31 | 32 | */ |
32 | | - public MappingCandidateFeatureScorer(PropertyAccessor<FeatureSet, ? extends Number> accessor) { |
| 33 | + public MappingCandidateFeatureScorer(PropertyAccessor<Record, ? extends Number> accessor) { |
33 | 34 | if (accessor==null) throw new NullPointerException(); |
34 | 35 | this.accessor = accessor; |
35 | 36 | } |
36 | 37 | |
37 | | - public int getCandidateScore(FeatureSet subject, FeatureSet candidate) { |
| 38 | + public int getCandidateScore(ForeignEntityRecord subject, ConceptEntityRecord candidate) { |
38 | 39 | Number score = accessor.getValue(candidate); |
39 | 40 | return score.intValue(); |
40 | 41 | } |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/FeatureSetHasFeatureFilter.java |
— | — | @@ -20,7 +20,7 @@ |
21 | 21 | } |
22 | 22 | |
23 | 23 | public boolean matches(FeatureSet fs) { |
24 | | - return fs.get(feature) != null; |
| 24 | + return fs.getFeatures(feature) != null; |
25 | 25 | } |
26 | 26 | |
27 | 27 | } |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/RecordFieldValueFilter.java |
— | — | @@ -0,0 +1,88 @@ |
| 2 | +package de.brightbyte.wikiword.integrator.data.filter; |
| 3 | + |
| 4 | +import java.util.regex.Pattern; |
| 5 | + |
| 6 | +import de.brightbyte.data.filter.Filter; |
| 7 | +import de.brightbyte.wikiword.integrator.data.Record; |
| 8 | + |
| 9 | +/** |
| 10 | + * Filter that matches any FeatureSet with a given feature, where one of the feature's values |
| 11 | + * matches a given value filter. Some default filters for feature values are given as static |
| 12 | + * inner classes of FeatureSetFeatureValueFilter. |
| 13 | + * |
| 14 | + * @author daniel |
| 15 | + * |
| 16 | + * @param <T> The type of the feature values |
| 17 | + */ |
| 18 | +public class RecordFieldValueFilter<T> implements Filter<Record> { |
| 19 | + |
| 20 | + /** |
| 21 | + * Filter matching one specific value. |
| 22 | + * |
| 23 | + * @author daniel |
| 24 | + * |
| 25 | + * @param <V> The type of the values to filter |
| 26 | + */ |
| 27 | + public static class ValueFilter<V> implements Filter<V> { |
| 28 | + protected V value; |
| 29 | + |
| 30 | + public ValueFilter(V value) { |
| 31 | + this.value = value; |
| 32 | + } |
| 33 | + |
| 34 | + public boolean matches(V obj) { |
| 35 | + if (obj==value) return true; |
| 36 | + if (obj==null) return false; |
| 37 | + return obj.equals(value); |
| 38 | + } |
| 39 | + } |
| 40 | + |
| 41 | + /** |
| 42 | + * Filter matching any String or other CharSequence that matches a given regular expression. |
| 43 | + * |
| 44 | + * @author daniel |
| 45 | + */ |
| 46 | + public static class PatternFilter implements Filter<CharSequence> { |
| 47 | + protected Pattern pattern; |
| 48 | + |
| 49 | + public PatternFilter(String pattern, int flags) { |
| 50 | + this(Pattern.compile(pattern, flags)); |
| 51 | + } |
| 52 | + |
| 53 | + public PatternFilter(Pattern pattern) { |
| 54 | + if (pattern==null) throw new NullPointerException(); |
| 55 | + this.pattern = pattern; |
| 56 | + } |
| 57 | + |
| 58 | + public boolean matches(CharSequence obj) { |
| 59 | + if (obj==null) return false; |
| 60 | + return pattern.matcher(obj).matches(); |
| 61 | + } |
| 62 | + } |
| 63 | + |
| 64 | + protected String field; |
| 65 | + protected Filter<T> filter; |
| 66 | + |
| 67 | + public RecordFieldValueFilter(String field, Filter<T> filter) { |
| 68 | + if (field==null) throw new NullPointerException(); |
| 69 | + if (filter==null) throw new NullPointerException(); |
| 70 | + this.field = field; |
| 71 | + this.filter = filter; |
| 72 | + } |
| 73 | + |
| 74 | + public boolean matches(Record r) { |
| 75 | + Object value = r.get(field); |
| 76 | + if (value==null) return false; |
| 77 | + |
| 78 | + if (value instanceof Iterable) { |
| 79 | + for (Object v: (Iterable)value) { |
| 80 | + if (filter.matches((T)v)) return true; |
| 81 | + } |
| 82 | + |
| 83 | + return false; |
| 84 | + } else { |
| 85 | + return filter.matches((T)value); |
| 86 | + } |
| 87 | + } |
| 88 | + |
| 89 | +} |
Property changes on: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/RecordFieldValueFilter.java |
___________________________________________________________________ |
Name: svn:mergeinfo |
1 | 90 | + |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/FeatureSetFeatureValueFilter.java |
— | — | @@ -5,6 +5,8 @@ |
6 | 6 | |
7 | 7 | import de.brightbyte.data.filter.Filter; |
8 | 8 | import de.brightbyte.wikiword.integrator.data.FeatureSet; |
| 9 | +import de.brightbyte.wikiword.integrator.data.Record; |
| 10 | +import de.brightbyte.wikiword.integrator.data.FeatureSet.Feature; |
9 | 11 | |
10 | 12 | /** |
11 | 13 | * Filter that matches any FeatureSet with a given feature, where one of the feature's values |
— | — | @@ -72,11 +74,11 @@ |
73 | 75 | } |
74 | 76 | |
75 | 77 | public boolean matches(FeatureSet fs) { |
76 | | - Collection<Object> values = fs.get(feature); |
| 78 | + Collection<? extends Feature<? extends Object>> values = fs.getFeatures(feature); |
77 | 79 | if (values==null) return false; |
78 | 80 | |
79 | | - for (Object v: values) { |
80 | | - if (filter.matches((T)v)) return true; |
| 81 | + for (Feature<? extends Object> v: values) { |
| 82 | + if (filter.matches((T)v.getValue())) return true; |
81 | 83 | } |
82 | 84 | |
83 | 85 | return false; |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/MappingCandidateScorer.java |
— | — | @@ -1,6 +1,7 @@ |
2 | 2 | package de.brightbyte.wikiword.integrator.data.filter; |
3 | 3 | |
4 | | -import de.brightbyte.wikiword.integrator.data.FeatureSet; |
| 4 | +import de.brightbyte.wikiword.integrator.data.ConceptEntityRecord; |
| 5 | +import de.brightbyte.wikiword.integrator.data.ForeignEntityRecord; |
5 | 6 | |
6 | 7 | /** |
7 | 8 | * Determines a score value for a mapping between two given FeatureSets, the |
— | — | @@ -9,5 +10,5 @@ |
10 | 11 | * @author daniel |
11 | 12 | */ |
12 | 13 | public interface MappingCandidateScorer { |
13 | | - public int getCandidateScore(FeatureSet subject, FeatureSet candidate); |
| 14 | + public int getCandidateScore(ForeignEntityRecord subject, ConceptEntityRecord candidate); |
14 | 15 | } |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/BestMappingCandidateSelector.java |
— | — | @@ -1,5 +1,6 @@ |
2 | 2 | package de.brightbyte.wikiword.integrator.data.filter; |
3 | 3 | |
| 4 | +import de.brightbyte.wikiword.integrator.data.ConceptEntityRecord; |
4 | 5 | import de.brightbyte.wikiword.integrator.data.FeatureSet; |
5 | 6 | import de.brightbyte.wikiword.integrator.data.MappingCandidates; |
6 | 7 | |
— | — | @@ -18,11 +19,11 @@ |
19 | 20 | this.scorer = scorer; |
20 | 21 | } |
21 | 22 | |
22 | | - public FeatureSet selectCandidate(MappingCandidates candidates) { |
| 23 | + public ConceptEntityRecord selectCandidate(MappingCandidates candidates) { |
23 | 24 | int bestScore = 0; |
24 | | - FeatureSet best =null; |
| 25 | + ConceptEntityRecord best =null; |
25 | 26 | |
26 | | - for (FeatureSet candidate: candidates.getCandidates()) { |
| 27 | + for (ConceptEntityRecord candidate: candidates.getCandidates()) { |
27 | 28 | int score = scorer.getCandidateScore(candidates.getSubject(), candidate); |
28 | 29 | |
29 | 30 | if (best==null) { |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/MappingCandidateSelector.java |
— | — | @@ -1,6 +1,6 @@ |
2 | 2 | package de.brightbyte.wikiword.integrator.data.filter; |
3 | 3 | |
4 | | -import de.brightbyte.wikiword.integrator.data.FeatureSet; |
| 4 | +import de.brightbyte.wikiword.integrator.data.ConceptEntityRecord; |
5 | 5 | import de.brightbyte.wikiword.integrator.data.MappingCandidates; |
6 | 6 | |
7 | 7 | /** |
— | — | @@ -18,5 +18,5 @@ |
19 | 19 | * @param candidates |
20 | 20 | * @return the best candidate, or null to indicate that no sufficiently good candidate was found. |
21 | 21 | */ |
22 | | - public FeatureSet selectCandidate(MappingCandidates candidates); |
| 22 | + public ConceptEntityRecord selectCandidate(MappingCandidates candidates); |
23 | 23 | } |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/MappingCandidateThresholdFilter.java |
— | — | @@ -3,7 +3,7 @@ |
4 | 4 | import java.util.ArrayList; |
5 | 5 | import java.util.Collection; |
6 | 6 | |
7 | | -import de.brightbyte.wikiword.integrator.data.FeatureSet; |
| 7 | +import de.brightbyte.wikiword.integrator.data.ConceptEntityRecord; |
8 | 8 | import de.brightbyte.wikiword.integrator.data.MappingCandidates; |
9 | 9 | |
10 | 10 | /** |
— | — | @@ -23,10 +23,10 @@ |
24 | 24 | this.threshold = threshold; |
25 | 25 | } |
26 | 26 | |
27 | | - public Collection<FeatureSet> filterCandidates(MappingCandidates candidates) { |
28 | | - ArrayList<FeatureSet> res = new ArrayList<FeatureSet>(); |
| 27 | + public Collection<ConceptEntityRecord> filterCandidates(MappingCandidates candidates) { |
| 28 | + ArrayList<ConceptEntityRecord> res = new ArrayList<ConceptEntityRecord>(); |
29 | 29 | |
30 | | - for (FeatureSet candidate: candidates.getCandidates()) { |
| 30 | + for (ConceptEntityRecord candidate: candidates.getCandidates()) { |
31 | 31 | int score = scorer.getCandidateScore(candidates.getSubject(), candidate); |
32 | 32 | if (score>=threshold) res.add(candidate); |
33 | 33 | } |