r53400 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r53399‎ | r53400 | r53401 >
Date:15:28, 17 July 2009
Author:daniel
Status:deferred
Tags:
Comment:
more flexible manglers and cursors
Modified paths:
  • /trunk/WikiWord/WikiWordIntegrator (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/AbstractIntegratorApp.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/FeatureSetSourceDescriptor.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/DefaultFeatureSet.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/FeatureMapping.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/FeatureSetCursor.java (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/FeatureSetValueMapper.java (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/PropertyMapping.java (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/TsvFeatureSetCursor.java (modified) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/AbstractIntegratorApp.java
@@ -38,6 +38,7 @@
3939 import de.brightbyte.wikiword.integrator.data.Association;
4040 import de.brightbyte.wikiword.integrator.data.AssociationCursor;
4141 import de.brightbyte.wikiword.integrator.data.FeatureSet;
 42+import de.brightbyte.wikiword.integrator.data.FeatureSetMangler;
4243 import de.brightbyte.wikiword.integrator.data.FeatureSetValueSplitter;
4344 import de.brightbyte.wikiword.integrator.data.MangelingFeatureSetCursor;
4445 import de.brightbyte.wikiword.integrator.data.ResultSetFeatureSetCursor;
@@ -301,11 +302,17 @@
302303 fsc = new AssemblingFeatureSetCursor(fsc, subjectField, propField, valueField);
303304 }
304305
305 - Map<String, Chunker> splitters = sourceDescriptor.getDataFieldChunkers();
306 - if (splitters!=null) {
307 - fsc = new MangelingFeatureSetCursor(fsc, FeatureSetValueSplitter.multiFromChunkerMap(splitters));
 306+ FeatureSetMangler mangler = sourceDescriptor.getRowMangler();
 307+
 308+ if (mangler==null) {
 309+ Map<String, Chunker> splitters = sourceDescriptor.getDataFieldChunkers();
 310+ if (splitters!=null) mangler = FeatureSetValueSplitter.multiFromChunkerMap(splitters);
308311 }
309312
 313+ if (mangler!=null) {
 314+ fsc = new MangelingFeatureSetCursor(fsc, mangler);
 315+ }
 316+
310317 return fsc;
311318 }
312319
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/PropertyMapping.java
@@ -0,0 +1,76 @@
 2+package de.brightbyte.wikiword.integrator.data;
 3+
 4+import java.sql.Blob;
 5+import java.sql.Clob;
 6+import java.util.Collections;
 7+import java.util.HashMap;
 8+import java.util.Map;
 9+
 10+import de.brightbyte.abstraction.PropertyAccessor;
 11+import de.brightbyte.db.DatabaseUtil;
 12+
 13+public class PropertyMapping<R> {
 14+ protected Map<String, PropertyAccessor<R, ?>> accessors = new HashMap<String, PropertyAccessor<R, ?>>();
 15+
 16+ public PropertyMapping() {
 17+
 18+ }
 19+
 20+ public String toString() {
 21+ return accessors.toString();
 22+ }
 23+
 24+ public void addMapping(String field, PropertyAccessor<R, ?> accessor) {
 25+ accessors.put(field, accessor);
 26+ }
 27+
 28+ public void assertAccessor(String field) {
 29+ if (!hasAccessor(field)) throw new IllegalArgumentException("Mapping must provide a feature name for "+field);
 30+ }
 31+
 32+ public boolean hasAccessor(String field) {
 33+ return accessors.containsKey(field);
 34+ }
 35+
 36+ public PropertyAccessor<R, ?> getAccessor(String field) {
 37+ return accessors.get(field);
 38+ }
 39+
 40+ public <T> T requireValue(R row, String field, Class<T> type) {
 41+ T v = getValue(row, field, type);
 42+
 43+ if (v==null) {
 44+ if (!hasAccessor(field)) throw new IllegalArgumentException("no accessor for "+field);
 45+ else throw new IllegalArgumentException("no value for "+field+" using "+getAccessor(field));
 46+ }
 47+
 48+ return v;
 49+ }
 50+
 51+ public <T> T getValue(R row, String field, Class<T> type) {
 52+ return getValue(row, field, type, null);
 53+ }
 54+
 55+ public <T> T getValue(R row, String field, Class<T> type, T def) {
 56+
 57+ PropertyAccessor<R, ?> accessor = getAccessor(field);
 58+ if (accessor==null) throw new IllegalArgumentException("no accessor defined for field "+field);
 59+
 60+ Object v = accessor.getValue(row);
 61+ if (v==null) return def;
 62+
 63+ if (type==null) {
 64+ if (v instanceof byte[] || v instanceof char[] || v instanceof Clob || v instanceof Blob) { //XXX: UGLY HACK!
 65+ type = (Class<T>)String.class;
 66+ } else {
 67+ type = ((PropertyAccessor<R, T>)accessor).getType();
 68+ }
 69+ }
 70+
 71+ return DatabaseUtil.as(v, type); //NOTE: convert if necessary //XXX: charset...
 72+ }
 73+
 74+ public Iterable<String> fields() {
 75+ return Collections.unmodifiableSet(accessors.keySet());
 76+ }
 77+}
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/TsvFeatureSetCursor.java
@@ -8,12 +8,11 @@
99 import java.util.List;
1010
1111 import de.brightbyte.data.cursor.DataCursor;
12 -import de.brightbyte.io.LineCursor;
1312 import de.brightbyte.io.ChunkingCursor;
 13+import de.brightbyte.io.LineCursor;
1414 import de.brightbyte.text.Chunker;
1515 import de.brightbyte.text.CsvLineChunker;
1616 import de.brightbyte.util.ErrorHandler;
17 -import de.brightbyte.util.LoggingErrorHandler;
1817 import de.brightbyte.util.PersistenceException;
1918
2019 public class TsvFeatureSetCursor implements DataCursor<FeatureSet> {
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/FeatureSetCursor.java
@@ -0,0 +1,49 @@
 2+package de.brightbyte.wikiword.integrator.data;
 3+
 4+import de.brightbyte.data.cursor.DataCursor;
 5+import de.brightbyte.util.PersistenceException;
 6+
 7+public class FeatureSetCursor<R> implements DataCursor<FeatureSet> {
 8+ protected DataCursor<R> source;
 9+ protected PropertyMapping<R> mapping;
 10+
 11+ protected FeatureSetCursor(DataCursor<R> source) {
 12+ if (source==null) throw new NullPointerException();
 13+ this.source = source;
 14+ }
 15+
 16+ public FeatureSetCursor(DataCursor<R> source, PropertyMapping<R> mapping) {
 17+ this(source);
 18+ if (mapping==null) throw new NullPointerException();
 19+ this.mapping = mapping;
 20+ }
 21+
 22+ public void close() {
 23+ source.close();
 24+ }
 25+
 26+ public FeatureSet next() throws PersistenceException {
 27+ R r = source.next();
 28+ if (r==null) return null;
 29+ return record(r);
 30+ }
 31+
 32+ protected FeatureSet record(R row) {
 33+ if (mapping==null) throw new IllegalStateException("no peoperty mapping defined yet!");
 34+
 35+ FeatureSet ft = new DefaultFeatureSet();
 36+
 37+ for (String f : mapping.fields()) {
 38+ Object v = mapping.getValue(row, f, null); //XXX: extra type conversion?!
 39+
 40+ ft.put(f, v);
 41+ }
 42+
 43+ return ft;
 44+ }
 45+
 46+ protected void finalize() {
 47+ close();
 48+ }
 49+
 50+}
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/FeatureMapping.java
@@ -1,30 +1,13 @@
22 package de.brightbyte.wikiword.integrator.data;
33
44 import java.util.Collection;
5 -import java.util.HashMap;
6 -import java.util.Map;
75
86 import de.brightbyte.abstraction.MultiMapAbstractor;
97 import de.brightbyte.abstraction.PropertyAccessor;
108 import de.brightbyte.data.Functor;
11 -import de.brightbyte.db.DatabaseUtil;
129 import de.brightbyte.wikiword.integrator.FeatureSetSourceDescriptor;
1310
14 -public class FeatureMapping {
15 - protected Map<String, PropertyAccessor<FeatureSet, ?>> accessors = new HashMap<String, PropertyAccessor<FeatureSet, ?>>();
16 -
17 - public FeatureMapping() {
18 -
19 - }
20 -
21 - public String toString() {
22 - return accessors.toString();
23 - }
24 -
25 - public void addMapping(String field, PropertyAccessor<FeatureSet, ?> accessor) {
26 - accessors.put(field, accessor);
27 - }
28 -
 11+public class FeatureMapping extends PropertyMapping<FeatureSet> {
2912 public <T>void addMapping(String field, String feature, Class<T> type, Functor<?, ? extends Collection<?>> aggregator) {
3013 PropertyAccessor<FeatureSet, T> accessor;
3114
@@ -34,53 +17,8 @@
3518 addMapping(field, accessor);
3619 }
3720
38 - //FIXME: using Functor<T, ? extends Collection<T>> aggregator would be nice, but doesn't work with Functors.Double.sum, etc
3921 public <T>void addMapping(String field, FeatureSetSourceDescriptor source, String option, Class<T> type, Functor<?, ? extends Collection<?>> aggregator) {
4022 String feature = source.getTweak(option, null);
4123 if (feature!=null) addMapping(field, feature, type, aggregator);
4224 }
43 -
44 - public void assertAccessor(String field) {
45 - if (!hasAccessor(field)) throw new IllegalArgumentException("Mapping must provide a feature name for "+field);
46 - }
47 -
48 - public boolean hasAccessor(String field) {
49 - return accessors.containsKey(field);
50 - }
51 -
52 - public PropertyAccessor<FeatureSet, ?> getAccessor(String field) {
53 - return accessors.get(field);
54 - }
55 -
56 - public <T> T requireValue(FeatureSet features, String field, Class<T> type) {
57 - T v = getValue(features, field, type);
58 -
59 - if (v==null) {
60 - if (!hasAccessor(field)) throw new IllegalArgumentException("no accessor for "+field);
61 - else throw new IllegalArgumentException("no value for "+field+" using "+getAccessor(field));
62 - }
63 -
64 - return v;
65 - }
66 -
67 - public <T> T getValue(FeatureSet features, String field, Class<T> type) {
68 - return getValue(features, field, type, null);
69 - }
70 -
71 - public <T> T getValue(FeatureSet features, String field, Class<T> type, T def) {
72 - PropertyAccessor<FeatureSet, ?> accessor = getAccessor(field);
73 - if (accessor==null) return def;
74 -
75 - if (!type.isAssignableFrom(accessor.getType())) throw new IllegalArgumentException("incompatible value type: accessor provides "+accessor.getType()+", caller requested "+type);
76 -
77 - T v = (T)accessor.getValue(features); //NOTE: this is actually safe, provided accessor.getType() isn't lying
78 - if (v==null) return def;
79 -
80 - //XXX: type conversion hack
81 - if (type==String.class && v.getClass()!=String.class) v= (T)(Object)DatabaseUtil.asString(v);
82 - if (type==Integer.class && v.getClass()!=Integer.class) v= (T)(Object)DatabaseUtil.asInt(v);
83 - if (type==Double.class && v.getClass()!=Double.class) v= (T)(Object)DatabaseUtil.asDouble(v);
84 -
85 - return v;
86 - }
8725 }
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/FeatureSetValueMapper.java
@@ -0,0 +1,20 @@
 2+package de.brightbyte.wikiword.integrator.data;
 3+
 4+public class FeatureSetValueMapper implements FeatureSetMangler {
 5+
 6+ protected FeatureMapping mapping;
 7+
 8+
 9+ public FeatureSet apply(FeatureSet features) {
 10+ FeatureSet ft = new DefaultFeatureSet();
 11+
 12+ for (String f : mapping.fields()) {
 13+ Object v = mapping.getValue(features, f, null); //XXX: extra type conversion?!
 14+
 15+ ft.put(f, v);
 16+ }
 17+
 18+ return ft;
 19+ }
 20+
 21+}
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/DefaultFeatureSet.java
@@ -37,5 +37,24 @@
3838 List<Object> list = get(key);
3939 return FeatureSets.histogram(list);
4040 }
 41+
 42+ @Override
 43+ public boolean put(String key, Object value) {
 44+ boolean changed = false;
 45+ if (value instanceof Object[]) {
 46+ for(Object w: (Object[])value) {
 47+ changed = put(key, w) | changed;
 48+ }
 49+ } if (value instanceof Iterable) {
 50+ for(Object w: (Iterable)value) {
 51+ changed = put(key, w) | changed;
 52+ }
 53+ } else {
 54+ changed = super.put(key, value);
 55+ }
 56+
 57+ return changed;
 58+ }
4159
 60+
4261 }
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/FeatureSetSourceDescriptor.java
@@ -8,15 +8,14 @@
99 import java.util.Collections;
1010 import java.util.List;
1111 import java.util.Map;
12 -import java.util.regex.Pattern;
1312
1413 import de.brightbyte.data.Functor;
1514 import de.brightbyte.db.SqlScriptRunner;
16 -import de.brightbyte.db.SqlScriptRunner.RegularExpressionMangler;
1715 import de.brightbyte.text.Chunker;
1816 import de.brightbyte.text.CsvLineChunker;
1917 import de.brightbyte.wikiword.TweakSet;
2018 import de.brightbyte.wikiword.builder.InputFileHelper;
 19+import de.brightbyte.wikiword.integrator.data.FeatureSetMangler;
2120
2221 public class FeatureSetSourceDescriptor extends TweakSet {
2322
@@ -89,6 +88,10 @@
9089 return getTweak("field-chunkers", (Map<String, Chunker>)null);
9190 }
9291
 92+ public FeatureSetMangler getRowMangler() {
 93+ return getTweak("row-mangler", (FeatureSetMangler)null);
 94+ }
 95+
9396 public String getPropertyValueField() {
9497 return requireTweak("property-value-field");
9598 }
Property changes on: trunk/WikiWord/WikiWordIntegrator
___________________________________________________________________
Name: svn:ignore
9699 - target
97100 + target
db.properties

Status & tagging log