r53547 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r53546‎ | r53547 | r53548 >
Date:11:44, 20 July 2009
Author:daniel
Status:deferred
Tags:
Comment:
rewriting integrator data model: fixing filters. intermediate commit, DOES NOT COMPILE
Modified paths:
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/FeatureSet.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/FilteredMappingCandidateCursor.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/BestMappingCandidateSelector.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/FeatureSetFeatureValueFilter.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/FeatureSetHasFeatureFilter.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/MappingCandidateFeatureScorer.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/MappingCandidateMultiScorer.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/MappingCandidateScorer.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/MappingCandidateSelector.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/MappingCandidateSelectorFilter.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/MappingCandidateThresholdFilter.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/RecordFieldValueFilter.java+(from+/trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/FeatureSetFeatureValueFilter.ja (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/UniqueMappingCandidateSelector.java (modified) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/FilteredMappingCandidateCursor.java
@@ -19,18 +19,18 @@
2020 protected MappingCandidateFilter filter;
2121
2222 public FilteredMappingCandidateCursor(DataCursor<MappingCandidates> cursor, String field) {
23 - this(cursor, FeatureSets.fieldAccessor(field, Integer.class));
 23+ this(cursor, new Record.Accessor<Integer>(field, Integer.class));
2424 }
2525
2626 public FilteredMappingCandidateCursor(DataCursor<MappingCandidates> cursor, String field, int threshold) {
27 - this(cursor, FeatureSets.fieldAccessor(field, Integer.class), threshold);
 27+ this(cursor, new Record.Accessor<Integer>(field, Integer.class), threshold);
2828 }
2929
30 - public FilteredMappingCandidateCursor(DataCursor<MappingCandidates> cursor, PropertyAccessor<FeatureSet, ? extends Number> accessor) {
 30+ public FilteredMappingCandidateCursor(DataCursor<MappingCandidates> cursor, PropertyAccessor<Record, ? extends Number> accessor) {
3131 this(cursor, new MappingCandidateFeatureScorer(accessor));
3232 }
3333
34 - public FilteredMappingCandidateCursor(DataCursor<MappingCandidates> cursor, PropertyAccessor<FeatureSet, ? extends Number> accessor, int threshold) {
 34+ public FilteredMappingCandidateCursor(DataCursor<MappingCandidates> cursor, PropertyAccessor<Record, ? extends Number> accessor, int threshold) {
3535 this(cursor, new MappingCandidateFeatureScorer(accessor), threshold);
3636 }
3737
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/FeatureSet.java
@@ -2,6 +2,9 @@
33
44 import java.util.Collection;
55
 6+import de.brightbyte.abstraction.AbstractAccessor;
 7+import de.brightbyte.abstraction.PropertyAccessor;
 8+import de.brightbyte.data.Aggregator;
69 import de.brightbyte.data.LabeledVector;
710
811 public interface FeatureSet {
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/MappingCandidateSelectorFilter.java
@@ -3,7 +3,7 @@
44 import java.util.Collection;
55 import java.util.Collections;
66
7 -import de.brightbyte.wikiword.integrator.data.FeatureSet;
 7+import de.brightbyte.wikiword.integrator.data.ConceptEntityRecord;
88 import de.brightbyte.wikiword.integrator.data.MappingCandidates;
99
1010 /**
@@ -21,8 +21,8 @@
2222 this.selector = selector;
2323 }
2424
25 - public Collection<FeatureSet> filterCandidates(MappingCandidates candidates) {
26 - FeatureSet selected = selector.selectCandidate(candidates);
 25+ public Collection<ConceptEntityRecord> filterCandidates(MappingCandidates candidates) {
 26+ ConceptEntityRecord selected = selector.selectCandidate(candidates);
2727 if (selected==null) return Collections.emptyList();
2828 else return Collections.singleton(selected);
2929 }
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/UniqueMappingCandidateSelector.java
@@ -2,7 +2,7 @@
33
44 import java.util.Collection;
55
6 -import de.brightbyte.wikiword.integrator.data.FeatureSet;
 6+import de.brightbyte.wikiword.integrator.data.ConceptEntityRecord;
77 import de.brightbyte.wikiword.integrator.data.MappingCandidates;
88
99 /**
@@ -13,8 +13,8 @@
1414 */
1515 public class UniqueMappingCandidateSelector implements MappingCandidateSelector {
1616
17 - public FeatureSet selectCandidate(MappingCandidates candidates) {
18 - Collection<FeatureSet> cand = candidates.getCandidates();
 17+ public ConceptEntityRecord selectCandidate(MappingCandidates candidates) {
 18+ Collection<ConceptEntityRecord> cand = candidates.getCandidates();
1919
2020 if (cand.size()==1) return cand.iterator().next();
2121 else return null;
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/MappingCandidateMultiScorer.java
@@ -4,22 +4,23 @@
55 import java.util.Collection;
66
77 import de.brightbyte.data.Functor2;
8 -import de.brightbyte.wikiword.integrator.data.FeatureSet;
 8+import de.brightbyte.wikiword.integrator.data.ConceptEntityRecord;
 9+import de.brightbyte.wikiword.integrator.data.ForeignEntityRecord;
910
1011 /**
1112 * MappingCandidateScorer that accumulates the scores from multiple MappingCandidateScorers into a single score,
12 - * using a aggregator functor (usually the sum or maximum, as implemented by Functors.Integer.sum resp. Functors.Integer.max).
 13+ * using a accumulator functor (usually the sum or maximum, as implemented by Functors.Integer.sum resp. Functors.Integer.max).
1314 *
1415 * @author daniel
1516 */
1617 public class MappingCandidateMultiScorer implements MappingCandidateScorer {
1718
1819 protected Collection<MappingCandidateScorer> scorers = new ArrayList<MappingCandidateScorer>();
19 - protected Functor2<? extends Number, Number, Number> aggregator;
 20+ protected Functor2<? extends Number, Number, Number> accumulator;
2021
21 - public MappingCandidateMultiScorer(Functor2<? extends Number, Number, Number> aggregator, MappingCandidateScorer... scorers) {
22 - if (aggregator==null) throw new NullPointerException();
23 - this.aggregator = aggregator;
 22+ public MappingCandidateMultiScorer(Functor2<? extends Number, Number, Number> accumulator, MappingCandidateScorer... scorers) {
 23+ if (accumulator==null) throw new NullPointerException();
 24+ this.accumulator = accumulator;
2425
2526 for (MappingCandidateScorer scorer: scorers) {
2627 addScorer(scorer);
@@ -31,12 +32,12 @@
3233 scorers.add(scorer);
3334 }
3435
35 - public int getCandidateScore(FeatureSet subject, FeatureSet candidate) {
 36+ public int getCandidateScore(ForeignEntityRecord subject, ConceptEntityRecord candidate) {
3637 Number acc = null;
3738 for (MappingCandidateScorer scorer: scorers) {
3839 Number score = scorer.getCandidateScore(subject, candidate);
3940 if (acc==null) acc = score;
40 - else acc = aggregator.apply(acc, score);
 41+ else acc = accumulator.apply(acc, score);
4142 }
4243
4344 return acc==null ? 0 : acc.intValue();
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/MappingCandidateFeatureScorer.java
@@ -1,8 +1,9 @@
22 package de.brightbyte.wikiword.integrator.data.filter;
33
44 import de.brightbyte.abstraction.PropertyAccessor;
5 -import de.brightbyte.wikiword.integrator.data.FeatureSet;
6 -import de.brightbyte.wikiword.integrator.data.FeatureSets;
 5+import de.brightbyte.wikiword.integrator.data.ConceptEntityRecord;
 6+import de.brightbyte.wikiword.integrator.data.ForeignEntityRecord;
 7+import de.brightbyte.wikiword.integrator.data.Record;
78
89 /**
910 * MappingCandidateScorer that determins the score directly from each FeatureSet's features,
@@ -12,7 +13,7 @@
1314 */
1415 public class MappingCandidateFeatureScorer implements MappingCandidateScorer {
1516
16 - protected PropertyAccessor<FeatureSet, ? extends Number> accessor;
 17+ protected PropertyAccessor<Record, ? extends Number> accessor;
1718
1819 /**
1920 * Creates a MappingCandidateFeatureScorer that uses the givenfeature's value as the
@@ -21,19 +22,19 @@
2223 * @param field feature
2324 */
2425 public MappingCandidateFeatureScorer(String feature) {
25 - this(FeatureSets.fieldAccessor(feature, Number.class));
 26+ this(new Record.Accessor<Number>(feature, Number.class));
2627 }
2728
2829 /**
2930 * Creates a MappingCandidateFeatureScorer that determins the score using the given accessor.
3031 * @param field accessor
3132 */
32 - public MappingCandidateFeatureScorer(PropertyAccessor<FeatureSet, ? extends Number> accessor) {
 33+ public MappingCandidateFeatureScorer(PropertyAccessor<Record, ? extends Number> accessor) {
3334 if (accessor==null) throw new NullPointerException();
3435 this.accessor = accessor;
3536 }
3637
37 - public int getCandidateScore(FeatureSet subject, FeatureSet candidate) {
 38+ public int getCandidateScore(ForeignEntityRecord subject, ConceptEntityRecord candidate) {
3839 Number score = accessor.getValue(candidate);
3940 return score.intValue();
4041 }
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/FeatureSetHasFeatureFilter.java
@@ -20,7 +20,7 @@
2121 }
2222
2323 public boolean matches(FeatureSet fs) {
24 - return fs.get(feature) != null;
 24+ return fs.getFeatures(feature) != null;
2525 }
2626
2727 }
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/RecordFieldValueFilter.java
@@ -0,0 +1,88 @@
 2+package de.brightbyte.wikiword.integrator.data.filter;
 3+
 4+import java.util.regex.Pattern;
 5+
 6+import de.brightbyte.data.filter.Filter;
 7+import de.brightbyte.wikiword.integrator.data.Record;
 8+
 9+/**
 10+ * Filter that matches any FeatureSet with a given feature, where one of the feature's values
 11+ * matches a given value filter. Some default filters for feature values are given as static
 12+ * inner classes of FeatureSetFeatureValueFilter.
 13+ *
 14+ * @author daniel
 15+ *
 16+ * @param <T> The type of the feature values
 17+ */
 18+public class RecordFieldValueFilter<T> implements Filter<Record> {
 19+
 20+ /**
 21+ * Filter matching one specific value.
 22+ *
 23+ * @author daniel
 24+ *
 25+ * @param <V> The type of the values to filter
 26+ */
 27+ public static class ValueFilter<V> implements Filter<V> {
 28+ protected V value;
 29+
 30+ public ValueFilter(V value) {
 31+ this.value = value;
 32+ }
 33+
 34+ public boolean matches(V obj) {
 35+ if (obj==value) return true;
 36+ if (obj==null) return false;
 37+ return obj.equals(value);
 38+ }
 39+ }
 40+
 41+ /**
 42+ * Filter matching any String or other CharSequence that matches a given regular expression.
 43+ *
 44+ * @author daniel
 45+ */
 46+ public static class PatternFilter implements Filter<CharSequence> {
 47+ protected Pattern pattern;
 48+
 49+ public PatternFilter(String pattern, int flags) {
 50+ this(Pattern.compile(pattern, flags));
 51+ }
 52+
 53+ public PatternFilter(Pattern pattern) {
 54+ if (pattern==null) throw new NullPointerException();
 55+ this.pattern = pattern;
 56+ }
 57+
 58+ public boolean matches(CharSequence obj) {
 59+ if (obj==null) return false;
 60+ return pattern.matcher(obj).matches();
 61+ }
 62+ }
 63+
 64+ protected String field;
 65+ protected Filter<T> filter;
 66+
 67+ public RecordFieldValueFilter(String field, Filter<T> filter) {
 68+ if (field==null) throw new NullPointerException();
 69+ if (filter==null) throw new NullPointerException();
 70+ this.field = field;
 71+ this.filter = filter;
 72+ }
 73+
 74+ public boolean matches(Record r) {
 75+ Object value = r.get(field);
 76+ if (value==null) return false;
 77+
 78+ if (value instanceof Iterable) {
 79+ for (Object v: (Iterable)value) {
 80+ if (filter.matches((T)v)) return true;
 81+ }
 82+
 83+ return false;
 84+ } else {
 85+ return filter.matches((T)value);
 86+ }
 87+ }
 88+
 89+}
Property changes on: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/RecordFieldValueFilter.java
___________________________________________________________________
Name: svn:mergeinfo
190 +
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/FeatureSetFeatureValueFilter.java
@@ -5,6 +5,8 @@
66
77 import de.brightbyte.data.filter.Filter;
88 import de.brightbyte.wikiword.integrator.data.FeatureSet;
 9+import de.brightbyte.wikiword.integrator.data.Record;
 10+import de.brightbyte.wikiword.integrator.data.FeatureSet.Feature;
911
1012 /**
1113 * Filter that matches any FeatureSet with a given feature, where one of the feature's values
@@ -72,11 +74,11 @@
7375 }
7476
7577 public boolean matches(FeatureSet fs) {
76 - Collection<Object> values = fs.get(feature);
 78+ Collection<? extends Feature<? extends Object>> values = fs.getFeatures(feature);
7779 if (values==null) return false;
7880
79 - for (Object v: values) {
80 - if (filter.matches((T)v)) return true;
 81+ for (Feature<? extends Object> v: values) {
 82+ if (filter.matches((T)v.getValue())) return true;
8183 }
8284
8385 return false;
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/MappingCandidateScorer.java
@@ -1,6 +1,7 @@
22 package de.brightbyte.wikiword.integrator.data.filter;
33
4 -import de.brightbyte.wikiword.integrator.data.FeatureSet;
 4+import de.brightbyte.wikiword.integrator.data.ConceptEntityRecord;
 5+import de.brightbyte.wikiword.integrator.data.ForeignEntityRecord;
56
67 /**
78 * Determines a score value for a mapping between two given FeatureSets, the
@@ -9,5 +10,5 @@
1011 * @author daniel
1112 */
1213 public interface MappingCandidateScorer {
13 - public int getCandidateScore(FeatureSet subject, FeatureSet candidate);
 14+ public int getCandidateScore(ForeignEntityRecord subject, ConceptEntityRecord candidate);
1415 }
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/BestMappingCandidateSelector.java
@@ -1,5 +1,6 @@
22 package de.brightbyte.wikiword.integrator.data.filter;
33
 4+import de.brightbyte.wikiword.integrator.data.ConceptEntityRecord;
45 import de.brightbyte.wikiword.integrator.data.FeatureSet;
56 import de.brightbyte.wikiword.integrator.data.MappingCandidates;
67
@@ -18,11 +19,11 @@
1920 this.scorer = scorer;
2021 }
2122
22 - public FeatureSet selectCandidate(MappingCandidates candidates) {
 23+ public ConceptEntityRecord selectCandidate(MappingCandidates candidates) {
2324 int bestScore = 0;
24 - FeatureSet best =null;
 25+ ConceptEntityRecord best =null;
2526
26 - for (FeatureSet candidate: candidates.getCandidates()) {
 27+ for (ConceptEntityRecord candidate: candidates.getCandidates()) {
2728 int score = scorer.getCandidateScore(candidates.getSubject(), candidate);
2829
2930 if (best==null) {
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/MappingCandidateSelector.java
@@ -1,6 +1,6 @@
22 package de.brightbyte.wikiword.integrator.data.filter;
33
4 -import de.brightbyte.wikiword.integrator.data.FeatureSet;
 4+import de.brightbyte.wikiword.integrator.data.ConceptEntityRecord;
55 import de.brightbyte.wikiword.integrator.data.MappingCandidates;
66
77 /**
@@ -18,5 +18,5 @@
1919 * @param candidates
2020 * @return the best candidate, or null to indicate that no sufficiently good candidate was found.
2121 */
22 - public FeatureSet selectCandidate(MappingCandidates candidates);
 22+ public ConceptEntityRecord selectCandidate(MappingCandidates candidates);
2323 }
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/filter/MappingCandidateThresholdFilter.java
@@ -3,7 +3,7 @@
44 import java.util.ArrayList;
55 import java.util.Collection;
66
7 -import de.brightbyte.wikiword.integrator.data.FeatureSet;
 7+import de.brightbyte.wikiword.integrator.data.ConceptEntityRecord;
88 import de.brightbyte.wikiword.integrator.data.MappingCandidates;
99
1010 /**
@@ -23,10 +23,10 @@
2424 this.threshold = threshold;
2525 }
2626
27 - public Collection<FeatureSet> filterCandidates(MappingCandidates candidates) {
28 - ArrayList<FeatureSet> res = new ArrayList<FeatureSet>();
 27+ public Collection<ConceptEntityRecord> filterCandidates(MappingCandidates candidates) {
 28+ ArrayList<ConceptEntityRecord> res = new ArrayList<ConceptEntityRecord>();
2929
30 - for (FeatureSet candidate: candidates.getCandidates()) {
 30+ for (ConceptEntityRecord candidate: candidates.getCandidates()) {
3131 int score = scorer.getCandidateScore(candidates.getSubject(), candidate);
3232 if (score>=threshold) res.add(candidate);
3333 }

Status & tagging log