Index: trunk/WikiWord/WikiWordIntegrator/src/test/java/de/brightbyte/wikiword/integrator/BuildConceptMappingsTest.java |
— | — | @@ -0,0 +1,22 @@ |
| 2 | +package de.brightbyte.wikiword.integrator; |
| 3 | + |
| 4 | +public class BuildConceptMappingsTest extends IntegratorAppTestBase<BuildConceptMappings> { |
| 5 | + |
| 6 | + public BuildConceptMappingsTest() { |
| 7 | + super("BuildConceptMappingsTest"); |
| 8 | + |
| 9 | + dumpActual = true; |
| 10 | + dumpExpected = true; |
| 11 | + } |
| 12 | + |
| 13 | + //----------------------------------------------------------------------------------------------------- |
| 14 | + public void testMatchTerms() throws Exception { |
| 15 | + runApp("selectOptimum"); |
| 16 | + } |
| 17 | + |
| 18 | + @Override |
| 19 | + protected BuildConceptMappings createApp() { |
| 20 | + return new BuildConceptMappings(); |
| 21 | + } |
| 22 | + |
| 23 | +} |
Index: trunk/WikiWord/WikiWordIntegrator/src/test/java/de/brightbyte/wikiword/integrator/BuildConceptAssociationsTest-matchTerms.properties |
— | — | @@ -1,7 +1,7 @@ |
2 | 2 | file="BuildConceptAssociationsTest-matchTerms.sql" |
3 | 3 | authority="acme" |
4 | 4 | foreign-id-field="foreign_id" |
5 | | -concept-id-field="concept_id" |
| 5 | +concept-id-field="concept" |
6 | 6 | concept-name-field="concept_name" |
7 | 7 | foreign-property-field="foreign_property" |
8 | 8 | concept-property-field="concept_property" |
Index: trunk/WikiWord/WikiWordIntegrator/src/test/java/de/brightbyte/wikiword/integrator/BuildConceptMappingsTest-selectOptimum.properties |
— | — | @@ -0,0 +1,7 @@ |
| 2 | +file="BuildConceptMappingsTest-selectOptimum.sql" |
| 3 | +authority="acme" |
| 4 | +foreign-authority-field="foreign_authority" |
| 5 | +foreign-id-field="foreign_id" |
| 6 | +concept-id-field="concept" |
| 7 | +concept-name-field="concept_name" |
| 8 | +optimization-field="weight" |
\ No newline at end of file |
Index: trunk/WikiWord/WikiWordIntegrator/src/test/java/de/brightbyte/wikiword/integrator/BuildConceptAssociationsTest-matchTerms.sql |
— | — | @@ -1,5 +1,5 @@ |
2 | 2 | select "foreign_id", |
3 | | - M."concept" as "concept_id", M."concept_name" as "concept_name", |
| 3 | + M."concept" as "concept", M."concept_name" as "concept_name", |
4 | 4 | F."property" as "foreign_property", F."value" as "value", |
5 | 5 | 'term' as "concept_property", M."freq" as "concept_property_freq", M."rule" as "concept_property_source", |
6 | 6 | "freq" * "rule" as "weight" |
Index: trunk/WikiWord/WikiWordIntegrator/src/test/java/de/brightbyte/wikiword/integrator/BuildConceptMappingsTest-selectOptimum.sql |
— | — | @@ -0,0 +1 @@ |
| 2 | +select * from "TEST_xx_assoc"; |
\ No newline at end of file |
Index: trunk/WikiWord/WikiWordIntegrator/src/test/java/de/brightbyte/wikiword/integrator/BuildConceptMappingsTest-selectOptimum.create.sql |
— | — | @@ -0,0 +1,13 @@ |
| 2 | +CREATE TABLE "TEST_xx_assoc" ( |
| 3 | + "foreign_authority" VARCHAR(64), |
| 4 | + "foreign_id" VARCHAR(255), |
| 5 | + "foreign_name" VARCHAR(255), |
| 6 | + "concept" INT, |
| 7 | + "concept_name" VARCHAR(255), |
| 8 | + "foreign_property" VARCHAR(64), |
| 9 | + "concept_property" VARCHAR(64), |
| 10 | + "concept_property_source" VARCHAR(64), |
| 11 | + "concept_property_freq" INT, |
| 12 | + "value" VARCHAR(255), |
| 13 | + "weight" DECIMAL(8,2) |
| 14 | +); |
Index: trunk/WikiWord/WikiWordIntegrator/src/test/java/de/brightbyte/wikiword/integrator/BuildConceptMappingsTest-selectOptimum.initial.data |
— | — | @@ -0,0 +1,12 @@ |
| 2 | +<?xml version='1.0' encoding='UTF-8'?> |
| 3 | +<dataset> |
| 4 | + <TEST_xx_assoc foreign_authority="acme" foreign_id="FooBar" foreign_name="FooBar" concept="10" concept_name="Bar Foo" value="B.F." foreign_property="alias" weight="10.0" concept_property="term" concept_property_source="10" concept_property_freq="1"/> |
| 5 | + <TEST_xx_assoc foreign_authority="acme" foreign_id="FooBar" foreign_name="FooBar" concept="10" concept_name="Bar Foo" value="Bar Foo" foreign_property="fullName" weight="360.0" concept_property="term" concept_property_source="60" concept_property_freq="6"/> |
| 6 | + <TEST_xx_assoc foreign_authority="acme" foreign_id="FooBar" foreign_name="FooBar" concept="10" concept_name="Bar Foo" value="Foo, Bar" foreign_property="sortName" weight="50.0" concept_property="term" concept_property_source="50" concept_property_freq="1"/> |
| 7 | + <TEST_xx_assoc foreign_authority="acme" foreign_id="FooBar" foreign_name="FooBar" concept="12" concept_name="Bar Foo (soap)" value="Bar Foo" foreign_property="fullName" weight="60.0" concept_property="term" concept_property_source="60" concept_property_freq="1"/> |
| 8 | + |
| 9 | + <TEST_xx_assoc foreign_authority="acme" foreign_id="XyzzyQuux" foreign_name="XyzzyQuux" concept="20" concept_name="Quux Xyzzy" value="QX" foreign_property="alias" weight="80.0" concept_property="term" concept_property_source="40" concept_property_freq="2"/> |
| 10 | + <TEST_xx_assoc foreign_authority="acme" foreign_id="XyzzyQuux" foreign_name="XyzzyQuux" concept="20" concept_name="Quux Xyzzy" value="Quux Xyzzy" foreign_property="fullName" weight="360.0" concept_property="term" concept_property_source="60" concept_property_freq="6"/> |
| 11 | + <TEST_xx_assoc foreign_authority="acme" foreign_id="XyzzyQuux" foreign_name="XyzzyQuux" concept="21" concept_name="Quality Xchange" value="QX" foreign_property="alias" weight="10.0" concept_property="term" concept_property_source="10" concept_property_freq="1"/> |
| 12 | +</dataset> |
| 13 | + |
Index: trunk/WikiWord/WikiWordIntegrator/src/test/java/de/brightbyte/wikiword/integrator/BuildConceptMappingsTest-selectOptimum.expected.data |
— | — | @@ -0,0 +1,6 @@ |
| 2 | +<?xml version='1.0' encoding='UTF-8'?> |
| 3 | +<dataset> |
| 4 | + <TEST_xx_selectOptimum foreign_authority="acme" foreign_id="FooBar" foreign_name="FooBar" concept="10" concept_name="Bar Foo" weight="420.0" /> |
| 5 | + <TEST_xx_selectOptimum foreign_authority="acme" foreign_id="XyzzyQuux" foreign_name="XyzzyQuux" concept="20" concept_name="Quux Xyzzy" weight="440.0"/> |
| 6 | +</dataset> |
| 7 | + |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/processor/OptimalMappingSelector.java |
— | — | @@ -5,7 +5,6 @@ |
6 | 6 | |
7 | 7 | import de.brightbyte.abstraction.PropertyAccessor; |
8 | 8 | import de.brightbyte.data.Functor; |
9 | | -import de.brightbyte.data.NaturalComparator; |
10 | 9 | import de.brightbyte.data.Optimum; |
11 | 10 | import de.brightbyte.data.PropertyComparator; |
12 | 11 | import de.brightbyte.util.PersistenceException; |
— | — | @@ -17,8 +16,8 @@ |
18 | 17 | |
19 | 18 | protected Optimum<FeatureSet> optimum; |
20 | 19 | |
21 | | - public OptimalMappingSelector(AssociationFeatureStoreBuilder store, String property, Functor<Number, ? extends Collection<Number>> aggregator) { |
22 | | - this(store, (Comparator<FeatureSet>)(Object)PropertyComparator.newMultiMapEntryComparator(property, (Comparator<Number>)(Object)NaturalComparator.instance, aggregator, Number.class)); |
| 20 | + public <T>OptimalMappingSelector(AssociationFeatureStoreBuilder store, String property, Functor<T, ? extends Collection<T>> aggregator, Comparator<T> comp, Class<T> type) { |
| 21 | + this(store, (Comparator<FeatureSet>)(Object)PropertyComparator.newMultiMapEntryComparator(property, comp, aggregator, type)); |
23 | 22 | } |
24 | 23 | |
25 | 24 | public OptimalMappingSelector(AssociationFeatureStoreBuilder store, PropertyAccessor<FeatureSet, Number> accessor) { |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/AbstractIntegratorApp.java |
— | — | @@ -9,6 +9,8 @@ |
10 | 10 | import java.sql.SQLException; |
11 | 11 | import java.util.Arrays; |
12 | 12 | import java.util.Collection; |
| 13 | +import java.util.Collections; |
| 14 | +import java.util.List; |
13 | 15 | import java.util.Map; |
14 | 16 | import java.util.regex.Pattern; |
15 | 17 | |
— | — | @@ -138,29 +140,15 @@ |
139 | 141 | Iterable<String> propertyFields = sourceDescriptor.getTweak("property-fields", (Iterable<String>)null); |
140 | 142 | |
141 | 143 | if (foreignFields==null) { |
142 | | - foreignFields = Arrays.asList(new String[] { |
143 | | - sourceDescriptor.getTweak("foreign-id-field", (String)null), |
144 | | - sourceDescriptor.getTweak("foreign-name-field", (String)null) |
145 | | - }); |
| 144 | + foreignFields = getDefaultForeignFields(); |
146 | 145 | } |
147 | 146 | |
148 | 147 | if (conceptFields==null) { |
149 | | - conceptFields = Arrays.asList(new String[] { |
150 | | - sourceDescriptor.getTweak("concept-id-field", (String)null), |
151 | | - sourceDescriptor.getTweak("concept-name-field", (String)null) |
152 | | - }); |
| 148 | + conceptFields = getDefaultConceptFields(); |
153 | 149 | } |
154 | 150 | |
155 | 151 | if (propertyFields==null) { |
156 | | - propertyFields = Arrays.asList(new String[] { |
157 | | - sourceDescriptor.getTweak("foreign-property-field", (String)null), |
158 | | - sourceDescriptor.getTweak("concept-property-field", (String)null), |
159 | | - sourceDescriptor.getTweak("concept-property-source-field", (String)null), |
160 | | - sourceDescriptor.getTweak("concept-property-freq-field", (String)null), |
161 | | - sourceDescriptor.getTweak("association-annotation-field", (String)null), |
162 | | - sourceDescriptor.getTweak("association-weight-field", (String)null), |
163 | | - sourceDescriptor.getTweak("association-value-field", (String)null) |
164 | | - }); |
| 152 | + propertyFields = getDefaultPropertyFields(); |
165 | 153 | } |
166 | 154 | |
167 | 155 | DataCursor<FeatureSet> fsc = openFeatureSetCursor(); |
— | — | @@ -174,6 +162,34 @@ |
175 | 163 | return cursor; |
176 | 164 | } |
177 | 165 | |
| 166 | + protected List<String> getDefaultForeignFields() { |
| 167 | + return Arrays.asList(new String[] { |
| 168 | + sourceDescriptor.getTweak("foreign-authority-field", "=" + sourceDescriptor.getAuthorityName()), |
| 169 | + sourceDescriptor.getTweak("foreign-id-field", (String)null), |
| 170 | + sourceDescriptor.getTweak("foreign-name-field", (String)null) |
| 171 | + }); |
| 172 | + } |
| 173 | + |
| 174 | + protected List<String> getDefaultConceptFields() { |
| 175 | + return Arrays.asList(new String[] { |
| 176 | + sourceDescriptor.getTweak("concept-id-field", (String)null), |
| 177 | + sourceDescriptor.getTweak("concept-name-field", (String)null) |
| 178 | + }); |
| 179 | + } |
| 180 | + |
| 181 | + protected List<String> getDefaultPropertyFields() { |
| 182 | + return Arrays.asList(new String[] { |
| 183 | + sourceDescriptor.getTweak("foreign-property-field", (String)null), |
| 184 | + sourceDescriptor.getTweak("concept-property-field", (String)null), |
| 185 | + sourceDescriptor.getTweak("concept-property-source-field", (String)null), |
| 186 | + sourceDescriptor.getTweak("concept-property-freq-field", (String)null), |
| 187 | + sourceDescriptor.getTweak("association-weight-field", (String)null), |
| 188 | + sourceDescriptor.getTweak("association-value-field", (String)null), |
| 189 | + sourceDescriptor.getTweak("association-annotation-field", (String)null), |
| 190 | + sourceDescriptor.getTweak("optimization-field", (String)null) |
| 191 | + }); |
| 192 | + } |
| 193 | + |
178 | 194 | protected DataCursor<FeatureSet> openFeatureSetCursor() throws IOException, SQLException, PersistenceException { |
179 | 195 | FeatureSetSourceDescriptor sourceDescriptor = getSourceDescriptor(); |
180 | 196 | |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/FeatureMapping.java |
— | — | @@ -1,9 +1,12 @@ |
2 | 2 | package de.brightbyte.wikiword.integrator.data; |
3 | 3 | |
| 4 | +import java.util.Collection; |
4 | 5 | import java.util.HashMap; |
5 | 6 | import java.util.Map; |
6 | 7 | |
| 8 | +import de.brightbyte.abstraction.MultiMapAbstractor; |
7 | 9 | import de.brightbyte.abstraction.PropertyAccessor; |
| 10 | +import de.brightbyte.data.Functor; |
8 | 11 | import de.brightbyte.db.DatabaseUtil; |
9 | 12 | import de.brightbyte.wikiword.integrator.FeatureSetSourceDescriptor; |
10 | 13 | |
— | — | @@ -14,18 +17,27 @@ |
15 | 18 | |
16 | 19 | } |
17 | 20 | |
| 21 | + public String toString() { |
| 22 | + return accessors.toString(); |
| 23 | + } |
| 24 | + |
18 | 25 | public void addMapping(String field, PropertyAccessor<FeatureSet, ?> accessor) { |
19 | 26 | accessors.put(field, accessor); |
20 | 27 | } |
21 | 28 | |
22 | | - public <T>void addMapping(String field, String feature, Class<T> type) { |
23 | | - PropertyAccessor<FeatureSet, T> accessor = FeatureSets.fieldAccessor(feature, type); |
| 29 | + public <T>void addMapping(String field, String feature, Class<T> type, Functor<?, ? extends Collection<?>> aggregator) { |
| 30 | + PropertyAccessor<FeatureSet, T> accessor; |
| 31 | + |
| 32 | + if (aggregator==null) accessor = FeatureSets.fieldAccessor(feature, type); |
| 33 | + else accessor = (PropertyAccessor<FeatureSet, T>)(Object)MultiMapAbstractor.accessor(feature, (Functor<T, ? extends Collection<T>>)aggregator, type); |
| 34 | + |
24 | 35 | addMapping(field, accessor); |
25 | 36 | } |
26 | 37 | |
27 | | - public <T>void addMapping(String field, FeatureSetSourceDescriptor source, String option, String defaultFeature, Class<T> type) { |
28 | | - String feature = source.getTweak(option, defaultFeature); |
29 | | - if (feature!=null) addMapping(field, feature, type); |
| 38 | + //FIXME: using Functor<T, ? extends Collection<T>> aggregator would be nice, but doesn't work with Functors.Double.sum, etc |
| 39 | + public <T>void addMapping(String field, FeatureSetSourceDescriptor source, String option, Class<T> type, Functor<?, ? extends Collection<?>> aggregator) { |
| 40 | + String feature = source.getTweak(option, null); |
| 41 | + if (feature!=null) addMapping(field, feature, type, aggregator); |
30 | 42 | } |
31 | 43 | |
32 | 44 | public void assertAccessor(String field) { |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/AssociationCursor.java |
— | — | @@ -10,8 +10,8 @@ |
11 | 11 | |
12 | 12 | private DataCursor<FeatureSet> source; |
13 | 13 | |
14 | | - protected Iterable<String> sourceFields; |
15 | | - protected Iterable<String> targetFields; |
| 14 | + protected Iterable<String> foreignFields; |
| 15 | + protected Iterable<String> conceptFields; |
16 | 16 | protected Iterable<String> propertyFields; |
17 | 17 | |
18 | 18 | public AssociationCursor(DataCursor<FeatureSet> source, String[] sourceFields, String[] targetFields, String[] propertyFields) { |
— | — | @@ -21,8 +21,8 @@ |
22 | 22 | public AssociationCursor(DataCursor<FeatureSet> source, Iterable<String> sourceFields, Iterable<String> targetFields, Iterable<String> propertyFields) { |
23 | 23 | if (source==null) throw new NullPointerException(); |
24 | 24 | this.source = source; |
25 | | - this.sourceFields = sourceFields; |
26 | | - this.targetFields = targetFields; |
| 25 | + this.foreignFields = sourceFields; |
| 26 | + this.conceptFields = targetFields; |
27 | 27 | this.propertyFields = propertyFields; |
28 | 28 | } |
29 | 29 | |
— | — | @@ -34,8 +34,8 @@ |
35 | 35 | } |
36 | 36 | |
37 | 37 | public Association newAssociation(FeatureSet row) throws PersistenceException { |
38 | | - FeatureSet source = sourceFields==null ? row : newFeatureSet(row, sourceFields); |
39 | | - FeatureSet target = targetFields==null ? row : newFeatureSet(row, targetFields); |
| 38 | + FeatureSet source = foreignFields==null ? row : newFeatureSet(row, foreignFields); |
| 39 | + FeatureSet target = conceptFields==null ? row : newFeatureSet(row, conceptFields); |
40 | 40 | FeatureSet props = propertyFields==null ? row : newFeatureSet(row, propertyFields); |
41 | 41 | |
42 | 42 | return new Association(source, target, props); |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/FeatureSets.java |
— | — | @@ -57,7 +57,7 @@ |
58 | 58 | |
59 | 59 | public static <V>PropertyAccessor<FeatureSet, V> fieldAccessor(String field, Class<V> type) { |
60 | 60 | if (field.startsWith("=")) { //HACK: force constant! //DOC |
61 | | - return (PropertyAccessor<FeatureSet, V>)(Object)new PropertyAccessor.Constant<String>(field.substring(1)); //X: if V is not String, this sucks! |
| 61 | + return (PropertyAccessor<FeatureSet, V>)(Object)constantAccessor(field.substring(1)); //X: if V is not String, this sucks! |
62 | 62 | } |
63 | 63 | |
64 | 64 | AbstractedAccessor<FeatureSet, List<Object>> accessor = |
— | — | @@ -65,5 +65,8 @@ |
66 | 66 | |
67 | 67 | return new ConvertingAccessor<FeatureSet, List<Object>, V>(accessor, new FirstValue<V>(), type); |
68 | 68 | } |
69 | | - |
| 69 | + |
| 70 | + public static <T>PropertyAccessor<FeatureSet, T> constantAccessor(T value) { |
| 71 | + return (PropertyAccessor<FeatureSet, T>)(Object)new PropertyAccessor.Constant<T>(value); |
| 72 | + } |
70 | 73 | } |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/BuildConceptMappings.java |
— | — | @@ -1,7 +1,10 @@ |
2 | 2 | package de.brightbyte.wikiword.integrator; |
3 | 3 | |
4 | 4 | import java.io.IOException; |
| 5 | +import java.math.BigDecimal; |
| 6 | +import java.math.BigInteger; |
5 | 7 | import java.util.Collection; |
| 8 | +import java.util.Comparator; |
6 | 9 | |
7 | 10 | import de.brightbyte.data.Functor; |
8 | 11 | import de.brightbyte.data.Functors; |
— | — | @@ -10,9 +13,11 @@ |
11 | 14 | import de.brightbyte.wikiword.integrator.data.Association; |
12 | 15 | import de.brightbyte.wikiword.integrator.data.CollapsingMatchesCursor; |
13 | 16 | import de.brightbyte.wikiword.integrator.data.FeatureMapping; |
| 17 | +import de.brightbyte.wikiword.integrator.data.FeatureSets; |
14 | 18 | import de.brightbyte.wikiword.integrator.data.MappingCandidates; |
15 | 19 | import de.brightbyte.wikiword.integrator.processor.ConceptMappingProcessor; |
16 | 20 | import de.brightbyte.wikiword.integrator.processor.OptimalMappingSelector; |
| 21 | +import de.brightbyte.wikiword.integrator.store.AssociationFeature2ConceptAssociationStoreBuilder; |
17 | 22 | import de.brightbyte.wikiword.integrator.store.AssociationFeature2ConceptMappingStoreBuilder; |
18 | 23 | import de.brightbyte.wikiword.integrator.store.AssociationFeatureStoreBuilder; |
19 | 24 | import de.brightbyte.wikiword.integrator.store.DatabaseConceptMappingStoreBuilder; |
— | — | @@ -35,17 +40,20 @@ |
36 | 41 | FeatureSetSourceDescriptor sourceDescriptor = getSourceDescriptor(); |
37 | 42 | |
38 | 43 | FeatureMapping fm = new FeatureMapping(); |
39 | | - fm.addMapping(AssociationFeature2ConceptMappingStoreBuilder.FOREIGN_AUTHORITY, sourceDescriptor, "authority-name-field", "=" + sourceDescriptor.getAuthorityName(), String.class); |
40 | | - fm.addMapping(AssociationFeature2ConceptMappingStoreBuilder.FOREIGN_ID, sourceDescriptor, "foreign-id-field", null, String.class); |
41 | | - fm.addMapping(AssociationFeature2ConceptMappingStoreBuilder.FOREIGN_NAME, sourceDescriptor, "foreign-name-field", null, String.class); |
| 44 | + fm.addMapping(AssociationFeature2ConceptMappingStoreBuilder.FOREIGN_AUTHORITY, sourceDescriptor, "foreign-authority-field", String.class, Functors.<String>firstElement()); |
| 45 | + if (!fm.hasAccessor(AssociationFeature2ConceptMappingStoreBuilder.FOREIGN_AUTHORITY)) fm.addMapping(AssociationFeature2ConceptAssociationStoreBuilder.FOREIGN_AUTHORITY, FeatureSets.constantAccessor(sourceDescriptor.getAuthorityName()) ); |
42 | 46 | |
| 47 | + fm.addMapping(AssociationFeature2ConceptMappingStoreBuilder.FOREIGN_ID, sourceDescriptor, "foreign-id-field", String.class, Functors.<String>firstElement()); |
| 48 | + fm.addMapping(AssociationFeature2ConceptMappingStoreBuilder.FOREIGN_NAME, sourceDescriptor, "foreign-name-field", String.class, Functors.<String>firstElement()); |
| 49 | + |
43 | 50 | FeatureMapping cm = new FeatureMapping(); |
44 | | - cm.addMapping(AssociationFeature2ConceptMappingStoreBuilder.CONCEPT_ID, sourceDescriptor, "concept-id-field", null, Integer.class); |
45 | | - cm.addMapping(AssociationFeature2ConceptMappingStoreBuilder.CONCEPT_NAME, sourceDescriptor, "concept-name-field", null, String.class); |
| 51 | + cm.addMapping(AssociationFeature2ConceptMappingStoreBuilder.CONCEPT_ID, sourceDescriptor, "concept-id-field", Integer.class, Functors.<Integer>firstElement()); |
| 52 | + cm.addMapping(AssociationFeature2ConceptMappingStoreBuilder.CONCEPT_NAME, sourceDescriptor, "concept-name-field", String.class, Functors.<String>firstElement()); |
46 | 53 | |
47 | 54 | FeatureMapping am = new FeatureMapping(); |
48 | | - am.addMapping(AssociationFeature2ConceptMappingStoreBuilder.ASSOCIATION_ANNOTATION, sourceDescriptor, "association-annotation-field", null, String.class); |
49 | | - am.addMapping(AssociationFeature2ConceptMappingStoreBuilder.ASSOCIATION_WEIGHT, sourceDescriptor, "association-weight-field", null, Double.class); |
| 55 | + am.addMapping(AssociationFeature2ConceptMappingStoreBuilder.ASSOCIATION_ANNOTATION, sourceDescriptor, "association-annotation-field", String.class, Functors.concat("|")); |
| 56 | + am.addMapping(AssociationFeature2ConceptMappingStoreBuilder.ASSOCIATION_WEIGHT, sourceDescriptor, "association-weight-field", Double.class, Functors.Double.sum); |
| 57 | + if (!am.hasAccessor(AssociationFeature2ConceptMappingStoreBuilder.ASSOCIATION_WEIGHT)) am.addMapping(AssociationFeature2ConceptMappingStoreBuilder.ASSOCIATION_WEIGHT, sourceDescriptor, "optimization-field", Double.class, Functors.Double.sum); |
50 | 58 | |
51 | 59 | return new AssociationFeature2ConceptMappingStoreBuilder.Factory<DatabaseConceptMappingStoreBuilder>( |
52 | 60 | mappingStoreFactory, |
— | — | @@ -55,17 +63,40 @@ |
56 | 64 | |
57 | 65 | @Override |
58 | 66 | protected void run() throws Exception { |
59 | | - Functor<Number, ? extends Collection<? extends Number>> aggregator = sourceDescriptor.getTweak("optiomization-aggregator", null); |
| 67 | + Functor<Number, ? extends Collection<? extends Number>> aggregator = sourceDescriptor.getTweak("optimization-aggregator", null); |
60 | 68 | |
61 | 69 | if (aggregator==null) { |
62 | | - String f = sourceDescriptor.getTweak("optiomization-function", "sum"); |
| 70 | + String f = sourceDescriptor.getTweak("optimization-aggregator-function", "sum"); |
63 | 71 | if (f.equals("sum")) aggregator = Functors.Double.sum; |
64 | 72 | else if (f.equals("max")) aggregator = Functors.Double.max; |
65 | 73 | else throw new IllegalArgumentException("unknwon aggregator function: "+f); |
66 | 74 | } |
67 | 75 | |
| 76 | + Class<? extends Number> type = sourceDescriptor.getTweak("optimization-class", null); |
| 77 | + |
| 78 | + if (type==null) { |
| 79 | + String c = sourceDescriptor.getTweak("optimization-type", "double"); |
| 80 | + if (c.equals("double")) type = Double.class; |
| 81 | + else if (c.equals("int")) type = Integer.class; |
| 82 | + else if (c.equals("long")) type = Long.class; |
| 83 | + else if (c.equals("bigint")) type = BigInteger.class; |
| 84 | + else if (c.equals("decimal") || c.equals("bigdecimal")) type = BigDecimal.class; |
| 85 | + else throw new IllegalArgumentException("unknwon comparator type: "+c); |
| 86 | + } |
| 87 | + |
| 88 | + Comparator<? extends Number> comp = sourceDescriptor.getTweak("optimization-comparator", null); |
| 89 | + |
| 90 | + if (comp==null) { |
| 91 | + if (type==Double.class) comp = Functors.Double.comparator; |
| 92 | + else if (type==Integer.class) comp = Functors.Integer.comparator; |
| 93 | + else if (type==Long.class) comp = Functors.Long.comparator; |
| 94 | + else if (type==BigInteger.class) comp = Functors.BigInteger.comparator; |
| 95 | + else if (type==BigDecimal.class) comp = Functors.BigDecimal.comparator; |
| 96 | + else throw new IllegalArgumentException("unknwon comparator function: "+type); |
| 97 | + } |
| 98 | + |
68 | 99 | AssociationFeatureStoreBuilder store = getStoreBuilder(); |
69 | | - this.propertyProcessor = createProcessor(store, sourceDescriptor.getTweak("optiomization-field", "freq"), aggregator); |
| 100 | + this.propertyProcessor = createProcessor(store, sourceDescriptor.getTweak("optimization-field", "freq"), aggregator, comp, type); |
70 | 101 | |
71 | 102 | section("-- fetching properties --------------------------------------------------"); |
72 | 103 | DataCursor<Association> asc = openAssociationCursor(); |
— | — | @@ -90,9 +121,9 @@ |
91 | 122 | throw new UnsupportedOperationException("not supported"); |
92 | 123 | } |
93 | 124 | |
94 | | - protected ConceptMappingProcessor createProcessor(AssociationFeatureStoreBuilder conceptStore, String property, Functor<Number, ? extends Collection<? extends Number>> aggregator) throws InstantiationException { |
| 125 | + protected ConceptMappingProcessor createProcessor(AssociationFeatureStoreBuilder conceptStore, String property, Functor<? extends Number, ? extends Collection<? extends Number>> aggregator, Comparator<? extends Number> comp, Class<? extends Number> type) throws InstantiationException { |
95 | 126 | //FIXME: parameter list is specific to OptimalMappingSelector |
96 | | - return instantiate(sourceDescriptor, "conceptMappingProcessorClass", OptimalMappingSelector.class, conceptStore, property, aggregator); |
| 127 | + return instantiate(sourceDescriptor, "conceptMappingProcessorClass", OptimalMappingSelector.class, conceptStore, property, aggregator, comp, type); |
97 | 128 | } |
98 | 129 | |
99 | 130 | public static void main(String[] argv) throws Exception { |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/BuildConceptAssociations.java |
— | — | @@ -1,11 +1,17 @@ |
2 | 2 | package de.brightbyte.wikiword.integrator; |
3 | 3 | |
4 | 4 | import java.io.IOException; |
| 5 | +import java.util.Arrays; |
| 6 | +import java.util.List; |
5 | 7 | |
| 8 | +import de.brightbyte.abstraction.PropertyAccessor; |
| 9 | +import de.brightbyte.data.Functors; |
6 | 10 | import de.brightbyte.data.cursor.DataCursor; |
7 | 11 | import de.brightbyte.util.PersistenceException; |
8 | 12 | import de.brightbyte.wikiword.integrator.data.Association; |
9 | 13 | import de.brightbyte.wikiword.integrator.data.FeatureMapping; |
| 14 | +import de.brightbyte.wikiword.integrator.data.FeatureSet; |
| 15 | +import de.brightbyte.wikiword.integrator.data.FeatureSets; |
10 | 16 | import de.brightbyte.wikiword.integrator.processor.ConceptAssociationPassThrough; |
11 | 17 | import de.brightbyte.wikiword.integrator.processor.ConceptAssociationProcessor; |
12 | 18 | import de.brightbyte.wikiword.integrator.store.AssociationFeature2ConceptAssociationStoreBuilder; |
— | — | @@ -31,29 +37,31 @@ |
32 | 38 | FeatureSetSourceDescriptor sourceDescriptor = getSourceDescriptor(); |
33 | 39 | |
34 | 40 | FeatureMapping fm = new FeatureMapping(); |
35 | | - fm.addMapping(AssociationFeature2ConceptAssociationStoreBuilder.FOREIGN_AUTHORITY, sourceDescriptor, "authority-name-field", "=" + sourceDescriptor.getAuthorityName(), String.class); |
36 | | - fm.addMapping(AssociationFeature2ConceptAssociationStoreBuilder.FOREIGN_ID, sourceDescriptor, "foreign-id-field", null, String.class); |
37 | | - fm.addMapping(AssociationFeature2ConceptAssociationStoreBuilder.FOREIGN_NAME, sourceDescriptor, "foreign-name-field", null, String.class); |
| 41 | + fm.addMapping(AssociationFeature2ConceptAssociationStoreBuilder.FOREIGN_AUTHORITY, sourceDescriptor, "foreign-authority-field", String.class, Functors.<String>firstElement()); |
| 42 | + if (!fm.hasAccessor(AssociationFeature2ConceptAssociationStoreBuilder.FOREIGN_AUTHORITY)) fm.addMapping(AssociationFeature2ConceptAssociationStoreBuilder.FOREIGN_AUTHORITY, FeatureSets.constantAccessor(sourceDescriptor.getAuthorityName()) ); |
| 43 | + |
| 44 | + fm.addMapping(AssociationFeature2ConceptAssociationStoreBuilder.FOREIGN_ID, sourceDescriptor, "foreign-id-field", String.class, Functors.<String>firstElement()); |
| 45 | + fm.addMapping(AssociationFeature2ConceptAssociationStoreBuilder.FOREIGN_NAME, sourceDescriptor, "foreign-name-field", String.class, Functors.<String>firstElement()); |
38 | 46 | |
39 | 47 | FeatureMapping cm = new FeatureMapping(); |
40 | | - cm.addMapping(AssociationFeature2ConceptAssociationStoreBuilder.CONCEPT_ID, sourceDescriptor, "concept-id-field", null, Integer.class); |
41 | | - cm.addMapping(AssociationFeature2ConceptAssociationStoreBuilder.CONCEPT_NAME, sourceDescriptor, "concept-name-field", null, String.class); |
| 48 | + cm.addMapping(AssociationFeature2ConceptAssociationStoreBuilder.CONCEPT_ID, sourceDescriptor, "concept-id-field", Integer.class, Functors.<Integer>firstElement()); |
| 49 | + cm.addMapping(AssociationFeature2ConceptAssociationStoreBuilder.CONCEPT_NAME, sourceDescriptor, "concept-name-field", String.class, Functors.<String>firstElement()); |
42 | 50 | |
43 | 51 | FeatureMapping am = new FeatureMapping(); |
44 | | - am.addMapping(AssociationFeature2ConceptAssociationStoreBuilder.FOREIGN_PROPERTY, sourceDescriptor, "foreign-property-field", null, String.class); |
45 | | - am.addMapping(AssociationFeature2ConceptAssociationStoreBuilder.CONCEPT_PROPERTY, sourceDescriptor, "concept-property-field", null, String.class); |
46 | | - am.addMapping(AssociationFeature2ConceptAssociationStoreBuilder.CONCEPT_PROPERTY_SOURCE, sourceDescriptor, "concept-property-source-field", null, String.class); |
47 | | - am.addMapping(AssociationFeature2ConceptAssociationStoreBuilder.CONCEPT_PROPERTY_FREQ, sourceDescriptor, "concept-property-freq-field", null, Integer.class); |
| 52 | + am.addMapping(AssociationFeature2ConceptAssociationStoreBuilder.FOREIGN_PROPERTY, sourceDescriptor, "foreign-property-field", String.class, Functors.concat("|")); |
| 53 | + am.addMapping(AssociationFeature2ConceptAssociationStoreBuilder.CONCEPT_PROPERTY, sourceDescriptor, "concept-property-field", String.class, Functors.concat("|")); |
| 54 | + am.addMapping(AssociationFeature2ConceptAssociationStoreBuilder.CONCEPT_PROPERTY_SOURCE, sourceDescriptor, "concept-property-source-field", String.class, Functors.concat("|")); |
| 55 | + am.addMapping(AssociationFeature2ConceptAssociationStoreBuilder.CONCEPT_PROPERTY_FREQ, sourceDescriptor, "concept-property-freq-field", Integer.class, Functors.Integer.sum); |
48 | 56 | |
49 | | - am.addMapping(AssociationFeature2ConceptAssociationStoreBuilder.ASSOCIATION_VALUE, sourceDescriptor, "association-value-field", null, String.class); |
50 | | - am.addMapping(AssociationFeature2ConceptAssociationStoreBuilder.ASSOCIATION_WEIGHT, sourceDescriptor, "association-weight-field", null, Double.class); |
| 57 | + am.addMapping(AssociationFeature2ConceptAssociationStoreBuilder.ASSOCIATION_VALUE, sourceDescriptor, "association-value-field", String.class, Functors.concat("|")); |
| 58 | + am.addMapping(AssociationFeature2ConceptAssociationStoreBuilder.ASSOCIATION_WEIGHT, sourceDescriptor, "association-weight-field", Double.class, Functors.Integer.sum); |
51 | 59 | |
52 | 60 | return new AssociationFeature2ConceptAssociationStoreBuilder.Factory<DatabaseConceptAssociationStoreBuilder>( |
53 | 61 | mappingStoreFactory, |
54 | 62 | fm, cm, am |
55 | 63 | ); |
56 | 64 | } |
57 | | - |
| 65 | + |
58 | 66 | @Override |
59 | 67 | protected void run() throws Exception { |
60 | 68 | AssociationFeatureStoreBuilder store = getStoreBuilder(); |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/store/DatabaseConceptMappingStoreBuilder.java |
— | — | @@ -70,8 +70,9 @@ |
71 | 71 | |
72 | 72 | public void storeMapping(String authority, String extId, String extName, int conceptId, String conceptName, double weight, String annotation) throws PersistenceException { |
73 | 73 | try { |
74 | | - mappingInserter.updateString("external_authority", authority); |
75 | | - mappingInserter.updateString("external_id", extId); |
| 74 | + mappingInserter.updateString("foreign_authority", authority); |
| 75 | + mappingInserter.updateString("foreign_id", extId); |
| 76 | + mappingInserter.updateString("foreign_name", extName); |
76 | 77 | mappingInserter.updateInt("concept", conceptId); |
77 | 78 | mappingInserter.updateString("concept_name", conceptName); |
78 | 79 | mappingInserter.updateString("annotation", annotation); |