Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/AbstractIntegratorApp.java |
— | — | @@ -38,6 +38,7 @@ |
39 | 39 | import de.brightbyte.wikiword.integrator.data.Association; |
40 | 40 | import de.brightbyte.wikiword.integrator.data.AssociationCursor; |
41 | 41 | import de.brightbyte.wikiword.integrator.data.FeatureSet; |
| 42 | +import de.brightbyte.wikiword.integrator.data.FeatureSetMangler; |
42 | 43 | import de.brightbyte.wikiword.integrator.data.FeatureSetValueSplitter; |
43 | 44 | import de.brightbyte.wikiword.integrator.data.MangelingFeatureSetCursor; |
44 | 45 | import de.brightbyte.wikiword.integrator.data.ResultSetFeatureSetCursor; |
— | — | @@ -301,11 +302,17 @@ |
302 | 303 | fsc = new AssemblingFeatureSetCursor(fsc, subjectField, propField, valueField); |
303 | 304 | } |
304 | 305 | |
305 | | - Map<String, Chunker> splitters = sourceDescriptor.getDataFieldChunkers(); |
306 | | - if (splitters!=null) { |
307 | | - fsc = new MangelingFeatureSetCursor(fsc, FeatureSetValueSplitter.multiFromChunkerMap(splitters)); |
| 306 | + FeatureSetMangler mangler = sourceDescriptor.getRowMangler(); |
| 307 | + |
| 308 | + if (mangler==null) { |
| 309 | + Map<String, Chunker> splitters = sourceDescriptor.getDataFieldChunkers(); |
| 310 | + if (splitters!=null) mangler = FeatureSetValueSplitter.multiFromChunkerMap(splitters); |
308 | 311 | } |
309 | 312 | |
| 313 | + if (mangler!=null) { |
| 314 | + fsc = new MangelingFeatureSetCursor(fsc, mangler); |
| 315 | + } |
| 316 | + |
310 | 317 | return fsc; |
311 | 318 | } |
312 | 319 | |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/PropertyMapping.java |
— | — | @@ -0,0 +1,76 @@ |
| 2 | +package de.brightbyte.wikiword.integrator.data; |
| 3 | + |
| 4 | +import java.sql.Blob; |
| 5 | +import java.sql.Clob; |
| 6 | +import java.util.Collections; |
| 7 | +import java.util.HashMap; |
| 8 | +import java.util.Map; |
| 9 | + |
| 10 | +import de.brightbyte.abstraction.PropertyAccessor; |
| 11 | +import de.brightbyte.db.DatabaseUtil; |
| 12 | + |
| 13 | +public class PropertyMapping<R> { |
| 14 | + protected Map<String, PropertyAccessor<R, ?>> accessors = new HashMap<String, PropertyAccessor<R, ?>>(); |
| 15 | + |
| 16 | + public PropertyMapping() { |
| 17 | + |
| 18 | + } |
| 19 | + |
| 20 | + public String toString() { |
| 21 | + return accessors.toString(); |
| 22 | + } |
| 23 | + |
| 24 | + public void addMapping(String field, PropertyAccessor<R, ?> accessor) { |
| 25 | + accessors.put(field, accessor); |
| 26 | + } |
| 27 | + |
| 28 | + public void assertAccessor(String field) { |
| 29 | + if (!hasAccessor(field)) throw new IllegalArgumentException("Mapping must provide a feature name for "+field); |
| 30 | + } |
| 31 | + |
| 32 | + public boolean hasAccessor(String field) { |
| 33 | + return accessors.containsKey(field); |
| 34 | + } |
| 35 | + |
| 36 | + public PropertyAccessor<R, ?> getAccessor(String field) { |
| 37 | + return accessors.get(field); |
| 38 | + } |
| 39 | + |
| 40 | + public <T> T requireValue(R row, String field, Class<T> type) { |
| 41 | + T v = getValue(row, field, type); |
| 42 | + |
| 43 | + if (v==null) { |
| 44 | + if (!hasAccessor(field)) throw new IllegalArgumentException("no accessor for "+field); |
| 45 | + else throw new IllegalArgumentException("no value for "+field+" using "+getAccessor(field)); |
| 46 | + } |
| 47 | + |
| 48 | + return v; |
| 49 | + } |
| 50 | + |
| 51 | + public <T> T getValue(R row, String field, Class<T> type) { |
| 52 | + return getValue(row, field, type, null); |
| 53 | + } |
| 54 | + |
| 55 | + public <T> T getValue(R row, String field, Class<T> type, T def) { |
| 56 | + |
| 57 | + PropertyAccessor<R, ?> accessor = getAccessor(field); |
| 58 | + if (accessor==null) throw new IllegalArgumentException("no accessor defined for field "+field); |
| 59 | + |
| 60 | + Object v = accessor.getValue(row); |
| 61 | + if (v==null) return def; |
| 62 | + |
| 63 | + if (type==null) { |
| 64 | + if (v instanceof byte[] || v instanceof char[] || v instanceof Clob || v instanceof Blob) { //XXX: UGLY HACK! |
| 65 | + type = (Class<T>)String.class; |
| 66 | + } else { |
| 67 | + type = ((PropertyAccessor<R, T>)accessor).getType(); |
| 68 | + } |
| 69 | + } |
| 70 | + |
| 71 | + return DatabaseUtil.as(v, type); //NOTE: convert if necessary //XXX: charset... |
| 72 | + } |
| 73 | + |
| 74 | + public Iterable<String> fields() { |
| 75 | + return Collections.unmodifiableSet(accessors.keySet()); |
| 76 | + } |
| 77 | +} |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/TsvFeatureSetCursor.java |
— | — | @@ -8,12 +8,11 @@ |
9 | 9 | import java.util.List; |
10 | 10 | |
11 | 11 | import de.brightbyte.data.cursor.DataCursor; |
12 | | -import de.brightbyte.io.LineCursor; |
13 | 12 | import de.brightbyte.io.ChunkingCursor; |
| 13 | +import de.brightbyte.io.LineCursor; |
14 | 14 | import de.brightbyte.text.Chunker; |
15 | 15 | import de.brightbyte.text.CsvLineChunker; |
16 | 16 | import de.brightbyte.util.ErrorHandler; |
17 | | -import de.brightbyte.util.LoggingErrorHandler; |
18 | 17 | import de.brightbyte.util.PersistenceException; |
19 | 18 | |
20 | 19 | public class TsvFeatureSetCursor implements DataCursor<FeatureSet> { |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/FeatureSetCursor.java |
— | — | @@ -0,0 +1,49 @@ |
| 2 | +package de.brightbyte.wikiword.integrator.data; |
| 3 | + |
| 4 | +import de.brightbyte.data.cursor.DataCursor; |
| 5 | +import de.brightbyte.util.PersistenceException; |
| 6 | + |
| 7 | +public class FeatureSetCursor<R> implements DataCursor<FeatureSet> { |
| 8 | + protected DataCursor<R> source; |
| 9 | + protected PropertyMapping<R> mapping; |
| 10 | + |
| 11 | + protected FeatureSetCursor(DataCursor<R> source) { |
| 12 | + if (source==null) throw new NullPointerException(); |
| 13 | + this.source = source; |
| 14 | + } |
| 15 | + |
| 16 | + public FeatureSetCursor(DataCursor<R> source, PropertyMapping<R> mapping) { |
| 17 | + this(source); |
| 18 | + if (mapping==null) throw new NullPointerException(); |
| 19 | + this.mapping = mapping; |
| 20 | + } |
| 21 | + |
| 22 | + public void close() { |
| 23 | + source.close(); |
| 24 | + } |
| 25 | + |
| 26 | + public FeatureSet next() throws PersistenceException { |
| 27 | + R r = source.next(); |
| 28 | + if (r==null) return null; |
| 29 | + return record(r); |
| 30 | + } |
| 31 | + |
| 32 | + protected FeatureSet record(R row) { |
| 33 | + if (mapping==null) throw new IllegalStateException("no peoperty mapping defined yet!"); |
| 34 | + |
| 35 | + FeatureSet ft = new DefaultFeatureSet(); |
| 36 | + |
| 37 | + for (String f : mapping.fields()) { |
| 38 | + Object v = mapping.getValue(row, f, null); //XXX: extra type conversion?! |
| 39 | + |
| 40 | + ft.put(f, v); |
| 41 | + } |
| 42 | + |
| 43 | + return ft; |
| 44 | + } |
| 45 | + |
| 46 | + protected void finalize() { |
| 47 | + close(); |
| 48 | + } |
| 49 | + |
| 50 | +} |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/FeatureMapping.java |
— | — | @@ -1,30 +1,13 @@ |
2 | 2 | package de.brightbyte.wikiword.integrator.data; |
3 | 3 | |
4 | 4 | import java.util.Collection; |
5 | | -import java.util.HashMap; |
6 | | -import java.util.Map; |
7 | 5 | |
8 | 6 | import de.brightbyte.abstraction.MultiMapAbstractor; |
9 | 7 | import de.brightbyte.abstraction.PropertyAccessor; |
10 | 8 | import de.brightbyte.data.Functor; |
11 | | -import de.brightbyte.db.DatabaseUtil; |
12 | 9 | import de.brightbyte.wikiword.integrator.FeatureSetSourceDescriptor; |
13 | 10 | |
14 | | -public class FeatureMapping { |
15 | | - protected Map<String, PropertyAccessor<FeatureSet, ?>> accessors = new HashMap<String, PropertyAccessor<FeatureSet, ?>>(); |
16 | | - |
17 | | - public FeatureMapping() { |
18 | | - |
19 | | - } |
20 | | - |
21 | | - public String toString() { |
22 | | - return accessors.toString(); |
23 | | - } |
24 | | - |
25 | | - public void addMapping(String field, PropertyAccessor<FeatureSet, ?> accessor) { |
26 | | - accessors.put(field, accessor); |
27 | | - } |
28 | | - |
| 11 | +public class FeatureMapping extends PropertyMapping<FeatureSet> { |
29 | 12 | public <T>void addMapping(String field, String feature, Class<T> type, Functor<?, ? extends Collection<?>> aggregator) { |
30 | 13 | PropertyAccessor<FeatureSet, T> accessor; |
31 | 14 | |
— | — | @@ -34,53 +17,8 @@ |
35 | 18 | addMapping(field, accessor); |
36 | 19 | } |
37 | 20 | |
38 | | - //FIXME: using Functor<T, ? extends Collection<T>> aggregator would be nice, but doesn't work with Functors.Double.sum, etc |
39 | 21 | public <T>void addMapping(String field, FeatureSetSourceDescriptor source, String option, Class<T> type, Functor<?, ? extends Collection<?>> aggregator) { |
40 | 22 | String feature = source.getTweak(option, null); |
41 | 23 | if (feature!=null) addMapping(field, feature, type, aggregator); |
42 | 24 | } |
43 | | - |
44 | | - public void assertAccessor(String field) { |
45 | | - if (!hasAccessor(field)) throw new IllegalArgumentException("Mapping must provide a feature name for "+field); |
46 | | - } |
47 | | - |
48 | | - public boolean hasAccessor(String field) { |
49 | | - return accessors.containsKey(field); |
50 | | - } |
51 | | - |
52 | | - public PropertyAccessor<FeatureSet, ?> getAccessor(String field) { |
53 | | - return accessors.get(field); |
54 | | - } |
55 | | - |
56 | | - public <T> T requireValue(FeatureSet features, String field, Class<T> type) { |
57 | | - T v = getValue(features, field, type); |
58 | | - |
59 | | - if (v==null) { |
60 | | - if (!hasAccessor(field)) throw new IllegalArgumentException("no accessor for "+field); |
61 | | - else throw new IllegalArgumentException("no value for "+field+" using "+getAccessor(field)); |
62 | | - } |
63 | | - |
64 | | - return v; |
65 | | - } |
66 | | - |
67 | | - public <T> T getValue(FeatureSet features, String field, Class<T> type) { |
68 | | - return getValue(features, field, type, null); |
69 | | - } |
70 | | - |
71 | | - public <T> T getValue(FeatureSet features, String field, Class<T> type, T def) { |
72 | | - PropertyAccessor<FeatureSet, ?> accessor = getAccessor(field); |
73 | | - if (accessor==null) return def; |
74 | | - |
75 | | - if (!type.isAssignableFrom(accessor.getType())) throw new IllegalArgumentException("incompatible value type: accessor provides "+accessor.getType()+", caller requested "+type); |
76 | | - |
77 | | - T v = (T)accessor.getValue(features); //NOTE: this is actually safe, provided accessor.getType() isn't lying |
78 | | - if (v==null) return def; |
79 | | - |
80 | | - //XXX: type conversion hack |
81 | | - if (type==String.class && v.getClass()!=String.class) v= (T)(Object)DatabaseUtil.asString(v); |
82 | | - if (type==Integer.class && v.getClass()!=Integer.class) v= (T)(Object)DatabaseUtil.asInt(v); |
83 | | - if (type==Double.class && v.getClass()!=Double.class) v= (T)(Object)DatabaseUtil.asDouble(v); |
84 | | - |
85 | | - return v; |
86 | | - } |
87 | 25 | } |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/FeatureSetValueMapper.java |
— | — | @@ -0,0 +1,20 @@ |
| 2 | +package de.brightbyte.wikiword.integrator.data; |
| 3 | + |
| 4 | +public class FeatureSetValueMapper implements FeatureSetMangler { |
| 5 | + |
| 6 | + protected FeatureMapping mapping; |
| 7 | + |
| 8 | + |
| 9 | + public FeatureSet apply(FeatureSet features) { |
| 10 | + FeatureSet ft = new DefaultFeatureSet(); |
| 11 | + |
| 12 | + for (String f : mapping.fields()) { |
| 13 | + Object v = mapping.getValue(features, f, null); //XXX: extra type conversion?! |
| 14 | + |
| 15 | + ft.put(f, v); |
| 16 | + } |
| 17 | + |
| 18 | + return ft; |
| 19 | + } |
| 20 | + |
| 21 | +} |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/DefaultFeatureSet.java |
— | — | @@ -37,5 +37,24 @@ |
38 | 38 | List<Object> list = get(key); |
39 | 39 | return FeatureSets.histogram(list); |
40 | 40 | } |
| 41 | + |
| 42 | + @Override |
| 43 | + public boolean put(String key, Object value) { |
| 44 | + boolean changed = false; |
| 45 | + if (value instanceof Object[]) { |
| 46 | + for(Object w: (Object[])value) { |
| 47 | + changed = put(key, w) | changed; |
| 48 | + } |
| 49 | + } if (value instanceof Iterable) { |
| 50 | + for(Object w: (Iterable)value) { |
| 51 | + changed = put(key, w) | changed; |
| 52 | + } |
| 53 | + } else { |
| 54 | + changed = super.put(key, value); |
| 55 | + } |
| 56 | + |
| 57 | + return changed; |
| 58 | + } |
41 | 59 | |
| 60 | + |
42 | 61 | } |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/FeatureSetSourceDescriptor.java |
— | — | @@ -8,15 +8,14 @@ |
9 | 9 | import java.util.Collections; |
10 | 10 | import java.util.List; |
11 | 11 | import java.util.Map; |
12 | | -import java.util.regex.Pattern; |
13 | 12 | |
14 | 13 | import de.brightbyte.data.Functor; |
15 | 14 | import de.brightbyte.db.SqlScriptRunner; |
16 | | -import de.brightbyte.db.SqlScriptRunner.RegularExpressionMangler; |
17 | 15 | import de.brightbyte.text.Chunker; |
18 | 16 | import de.brightbyte.text.CsvLineChunker; |
19 | 17 | import de.brightbyte.wikiword.TweakSet; |
20 | 18 | import de.brightbyte.wikiword.builder.InputFileHelper; |
| 19 | +import de.brightbyte.wikiword.integrator.data.FeatureSetMangler; |
21 | 20 | |
22 | 21 | public class FeatureSetSourceDescriptor extends TweakSet { |
23 | 22 | |
— | — | @@ -89,6 +88,10 @@ |
90 | 89 | return getTweak("field-chunkers", (Map<String, Chunker>)null); |
91 | 90 | } |
92 | 91 | |
| 92 | + public FeatureSetMangler getRowMangler() { |
| 93 | + return getTweak("row-mangler", (FeatureSetMangler)null); |
| 94 | + } |
| 95 | + |
93 | 96 | public String getPropertyValueField() { |
94 | 97 | return requireTweak("property-value-field"); |
95 | 98 | } |
Property changes on: trunk/WikiWord/WikiWordIntegrator |
___________________________________________________________________ |
Name: svn:ignore |
96 | 99 | - target |
97 | 100 | + target |
db.properties |