r51548 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r51547‎ | r51548 | r51549 >
Date:17:17, 6 June 2009
Author:daniel
Status:deferred
Tags:
Comment:
BuildAssociations
Modified paths:
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/TweakSet.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/InputFileHelper.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/AbstractIntegratorApp.java (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/BuildConceptAssociations.java (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/FeatureSetSourceDescriptor.java (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/ForeignEntityStoreDescriptor.java (deleted) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/LoadForeignProperties.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/AssociationCursor.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/FeatureSetValueSplitter.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/FeatureSets.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/processor/AbstractConceptAssociationProcessor.java (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/processor/ConceptAssociationPassThrough.java (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/processor/ConceptAssociationProcessor.java (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/processor/ConceptMappingPassThrough.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/processor/ConceptMappingProcessor.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/processor/ForeignPropertyProcessor.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/processor/OptimalMappingSelector.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/processor/WikiWordProcessor.java (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/store/AssociationAsMappingStoreBuilder.java (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/store/AssociationFeatureStoreBuilder.java+(from+/trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/store/MappingFeatureStoreBuilder.java:512 (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/store/DatabaseConceptMappingStoreBuilder.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/store/MappingFeatureStoreBuilder.java (deleted) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/InputFileHelper.java
@@ -32,7 +32,7 @@
3333 externalBunzip = bz2;
3434 }
3535
36 - protected static final Pattern extensionPattern = Pattern.compile("\\.([^./\\]+)(\\.gz|\\.bz2)$", Pattern.CASE_INSENSITIVE);
 36+ protected static final Pattern extensionPattern = Pattern.compile("\\.([^./\\\\]+)(\\.gz|\\.bz2)$", Pattern.CASE_INSENSITIVE);
3737
3838 public String getFormat(String n) {
3939 Matcher m = extensionPattern.matcher(n);
@@ -41,17 +41,48 @@
4242 else return m.group(1).toLowerCase();
4343 }
4444
 45+ public URL getBaseURL(String n) {
 46+ if (n.equals("-")) n = new File(".").getAbsolutePath();
 47+
 48+ try {
 49+ URL u = new URL(n);
 50+ return u;
 51+ } catch (MalformedURLException e) {
 52+ //ignore and continue
 53+ }
 54+
 55+ try {
 56+ File f = new File(n);
 57+ return f.toURI().toURL();
 58+ } catch (MalformedURLException e) {
 59+ throw new IllegalArgumentException("failed to convert file name to URL: "+n);
 60+ }
 61+ }
 62+
4563 public InputStream open(String n) throws IOException {
 64+ return open(null, n);
 65+ }
 66+
 67+ public InputStream open(URL base, String n) throws IOException {
4668 if (n.equals("-")) return new BufferedInputStream(System.in);
4769
4870 try {
49 - URL u = new URL(n);
 71+ URL u = base == null || base.getProtocol().equals("file") ? new URL(n) : new URL(base, n);
5072 return openURL(u);
5173 } catch (MalformedURLException e) {
5274 //ignore and continue
5375 }
5476
55 - File f = new File(n);
 77+ File f;
 78+
 79+ if (base!=null && base.getProtocol().equals("file")) {
 80+ File b = new File(base.getPath());
 81+ if (b.isFile()) b = b.getParentFile();
 82+ f = new File(b, n);
 83+ } else {
 84+ f = new File(n);
 85+ }
 86+
5687 return openFile(f);
5788 }
5889
@@ -155,6 +186,6 @@
156187 slurper.start();
157188
158189 return new BufferedInputStream(proc.getInputStream());
159 - }
 190+ }
160191
161192 }
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/ForeignEntityStoreDescriptor.java
@@ -1,79 +0,0 @@
2 -package de.brightbyte.wikiword.integrator;
3 -
4 -import java.io.File;
5 -import java.net.MalformedURLException;
6 -import java.net.URL;
7 -import java.util.List;
8 -import java.util.Map;
9 -
10 -import de.brightbyte.wikiword.TweakSet;
11 -
12 -public class ForeignEntityStoreDescriptor extends TweakSet {
13 -
14 - public ForeignEntityStoreDescriptor() {
15 - super();
16 - }
17 -
18 - public ForeignEntityStoreDescriptor(TweakSet parent) {
19 - super(parent);
20 - }
21 -
22 - public String getDataEncoding() {
23 - return getTweak("foreign.encoding", "UTF-8");
24 - }
25 -
26 - public String getSqlQuery() {
27 - return getTweak("foreign.query", null);
28 - }
29 -
30 - public String getSourceFileName() { //FIXME
31 - return getTweak("foreign.file", null);
32 - }
33 -
34 - public String[] getDataFields() {
35 - List<String> v = getTweak("foreign.fields", (List<String>)null);
36 - if (v==null) return null;
37 - return (String[]) v.toArray(new String[v.size()]);
38 - }
39 -
40 - public Map<String, String> getSplitExpressions() { //FIXME:!
41 - return getTweak("split", (Map<String, String>)null);
42 - }
43 -
44 - public String getPropertyValueField() {
45 - return getTweak("foreign.property-value-field", null);
46 - }
47 -
48 - public String getPropertyNameField() {
49 - return getTweak("foreign.property-name-field", null);
50 - }
51 -
52 - public String getConceptIdField() {
53 - return getTweak("foreign.concept-id-field", null);
54 - }
55 -
56 - public String getConceptNameField() {
57 - return getTweak("foreign.concept-name-field", "name");
58 - }
59 -
60 - public String getAuthorityName() {
61 - String name = getTweak("foreign.authority", null);
62 - if (name==null) throw new RuntimeException("authority name not specified!");
63 - return name;
64 - }
65 -
66 - public void setBaseURL(URL baseURL) {
67 - parameters.put(".baseURL", baseURL);
68 - }
69 -
70 - public URL getBaseURL() {
71 - try {
72 - URL u = getTweak(".baseURL", (URL)null);
73 - if (u==null) u = new File(".").toURI().toURL();
74 - return u;
75 - } catch (MalformedURLException e) {
76 - return null;
77 - }
78 - }
79 -
80 -}
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/LoadForeignProperties.java
@@ -1,30 +1,12 @@
22 package de.brightbyte.wikiword.integrator;
33
44 import java.io.IOException;
5 -import java.io.InputStream;
6 -import java.sql.Connection;
7 -import java.sql.ResultSet;
8 -import java.sql.SQLException;
9 -import java.util.Arrays;
10 -import java.util.Collection;
11 -import java.util.Map;
12 -import java.util.regex.Pattern;
135
14 -import de.brightbyte.data.Functor;
156 import de.brightbyte.data.cursor.DataCursor;
16 -import de.brightbyte.db.SqlScriptRunner;
17 -import de.brightbyte.io.IOUtil;
187 import de.brightbyte.util.PersistenceException;
19 -import de.brightbyte.wikiword.StoreBackedApp;
20 -import de.brightbyte.wikiword.builder.InputFileHelper;
21 -import de.brightbyte.wikiword.integrator.data.AssemblingFeatureSetCursor;
228 import de.brightbyte.wikiword.integrator.data.FeatureSet;
23 -import de.brightbyte.wikiword.integrator.data.FeatureSetValueSplitter;
249 import de.brightbyte.wikiword.integrator.data.ForeignEntity;
2510 import de.brightbyte.wikiword.integrator.data.ForeignEntityCursor;
26 -import de.brightbyte.wikiword.integrator.data.MangelingFeatureSetCursor;
27 -import de.brightbyte.wikiword.integrator.data.ResultSetFeatureSetCursor;
28 -import de.brightbyte.wikiword.integrator.data.TsvFeatureSetCursor;
2911 import de.brightbyte.wikiword.integrator.processor.ForeignPropertyPassThrough;
3012 import de.brightbyte.wikiword.integrator.processor.ForeignPropertyProcessor;
3113 import de.brightbyte.wikiword.integrator.store.DatabaseForeignPropertyStoreBuilder;
@@ -36,56 +18,18 @@
3719 * ImportDump can be invoked as a standalone program, use --help as a
3820 * command line parameter for usage information.
3921 */
40 -public class LoadForeignProperties extends StoreBackedApp<ForeignPropertyStoreBuilder> {
41 -
42 - //protected ForeignPropertyStoreBuilder propertyStore;
43 - protected ForeignPropertyProcessor propertyProcessor;
44 - protected InputFileHelper inputHelper;
45 - private ForeignEntityStoreDescriptor sourceDescriptor;
 22+public class LoadForeignProperties extends AbstractIntegratorApp<ForeignPropertyStoreBuilder, ForeignPropertyProcessor, ForeignEntity> {
4623
47 - public LoadForeignProperties() {
48 - super(true, true);
49 - }
50 -
51 - protected InputFileHelper getInputHelper() {
52 - if (inputHelper==null) {
53 - inputHelper = new InputFileHelper(tweaks);
54 - }
55 - return inputHelper;
56 - }
57 -
5824 @Override
5925 protected WikiWordStoreFactory<? extends ForeignPropertyStoreBuilder> createConceptStoreFactory() throws IOException, PersistenceException {
6026 return new DatabaseForeignPropertyStoreBuilder.Factory(getTargetTableName(), getConfiguredDataset(), getConfiguredDataSource(), tweaks);
6127 }
6228
63 - protected String getTargetTableName() throws IOException {
64 - if (args.getParameterCount() > 2) return args.getParameter(2);
65 -
66 - String authority = getSourceDescriptor().getAuthorityName();
67 - authority = authority.replaceAll("[^\\w\\d]", "_").toLowerCase();
68 -
69 - return authority+"_property";
70 - }
71 -
72 - protected String getSourceDescriptionFileName() {
73 - if (args.getParameterCount() < 2) throw new IllegalArgumentException("missing second parameter (descripion file name)");
74 - return args.getParameter(1);
75 - }
76 -
7729 @Override
78 - protected void declareOptions() {
79 - super.declareOptions();
80 -
81 - args.declareHelp("<wiki>", null);
82 - args.declareHelp("<dataset>", "name of the wiki/thesaurus to process");
83 - args.declare("dataset", null, true, String.class, "sets the wiki name (overrides the <wiki-or-dump> parameter)");
84 - }
85 -
86 - @Override
8730 protected void run() throws Exception {
8831 section("-- fetching properties --------------------------------------------------");
89 - DataCursor<ForeignEntity> cursor = openPropertySource();
 32+ DataCursor<FeatureSet> fsc = openFeatureSetCursor();
 33+ DataCursor<ForeignEntity> cursor = new ForeignEntityCursor(fsc, sourceDescriptor.getAuthorityName(), sourceDescriptor.getPropertySubjectField(), sourceDescriptor.getPropertySubjectNameField());
9034
9135 section("-- process properties --------------------------------------------------");
9236 this.conceptStore.prepareImport();
@@ -96,83 +40,7 @@
9741
9842 this.conceptStore.finalizeImport();
9943 }
100 -
101 - protected DataCursor<ForeignEntity> openPropertySource() throws IOException, SQLException, PersistenceException {
102 - ForeignEntityStoreDescriptor sourceDescriptor = getSourceDescriptor();
103 -
104 - String enc = sourceDescriptor.getDataEncoding();
105 - String sql = sourceDescriptor.getSqlQuery();
106 - InputStream in = null;
107 -
108 - if (sql==null) {
109 - String n = sourceDescriptor.getSourceFileName();
110 - String format = getInputHelper().getFormat(n); //FIXME: explicit format!
111 - in = getInputHelper().open(sourceDescriptor.getBaseURL(), n);
112 -
113 - if (format!=null && format.equals("sql")) {
114 - sql = IOUtil.slurp(in, enc);
115 -
116 - in.close();
117 - in = null;
118 - }
119 - }
120 -
121 - DataCursor<FeatureSet> fsc;
122 - String[] fields = sourceDescriptor.getDataFields();
123 -
124 - if (sql!=null) {
125 - Collection<Functor<String, String>> manglers = Arrays.asList(getSqlScriptManglers());
126 - Connection con = getConfiguredDataSource().getConnection();
127 - ResultSet rs = SqlScriptRunner.runQuery(con, sql, manglers);
128 -
129 - fsc = new ResultSetFeatureSetCursor(rs, fields);
130 - } else {
131 - fsc = new TsvFeatureSetCursor(in, enc);
132 -
133 - if (fields!=null) ((TsvFeatureSetCursor)fsc).setFields(fields);
134 - else ((TsvFeatureSetCursor)fsc).readFields();
135 - }
136 -
137 - String propField = sourceDescriptor.getPropertyNameField();
138 - if (propField!=null) {
139 - String valueField = sourceDescriptor.getPropertyValueField();
140 - String idField = sourceDescriptor.getConceptIdField();
141 - fsc = new AssemblingFeatureSetCursor(fsc, idField, propField, valueField);
142 - }
143 -
144 - Map<String, String> splitExp = sourceDescriptor.getSplitExpressions();
145 - if (splitExp!=null) {
146 - fsc = new MangelingFeatureSetCursor(fsc, FeatureSetValueSplitter.multiFromStringMap(splitExp, 0));
147 - }
148 -
149 - return new ForeignEntityCursor(fsc, sourceDescriptor.getAuthorityName(), sourceDescriptor.getConceptIdField(), sourceDescriptor.getConceptNameField());
150 - }
15144
152 - protected ForeignEntityStoreDescriptor getSourceDescriptor() throws IOException {
153 - if (sourceDescriptor!=null) return sourceDescriptor;
154 -
155 - sourceDescriptor = new ForeignEntityStoreDescriptor(tweaks);
156 -
157 - String n = getSourceDescriptionFileName();
158 - InputStream in = getInputHelper().open(n);
159 - sourceDescriptor.setBaseURL(getInputHelper().getBaseURL(n));
160 - sourceDescriptor.loadTweaks(in);
161 - in.close();
162 -
163 - sourceDescriptor.setTweaks(System.getProperties(), "wikiword.source."); //XXX: doc
164 - sourceDescriptor.setTweaks(args, "source."); //XXX: doc
165 -
166 - return sourceDescriptor;
167 - }
168 -
169 - @SuppressWarnings("unchecked")
170 - protected Functor<String, String>[] getSqlScriptManglers() {
171 - return new Functor[] {
172 - new SqlScriptRunner.RegularExpressionMangler(Pattern.compile("/\\* *wikiword_prefix* \\*/"), getConfiguredDataset().getDbPrefix()),
173 - new SqlScriptRunner.RegularExpressionMangler(Pattern.compile("/\\* *wikiword_db* \\*/"), getConfiguredDatasetName()),
174 - };
175 - }
176 -
17745 public static void main(String[] argv) throws Exception {
17846 LoadForeignProperties app = new LoadForeignProperties();
17947 app.launch(argv);
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/FeatureSetSourceDescriptor.java
@@ -0,0 +1,85 @@
 2+package de.brightbyte.wikiword.integrator;
 3+
 4+import java.io.File;
 5+import java.net.MalformedURLException;
 6+import java.net.URL;
 7+import java.util.List;
 8+import java.util.Map;
 9+
 10+import de.brightbyte.text.Chunker;
 11+import de.brightbyte.wikiword.TweakSet;
 12+
 13+public class FeatureSetSourceDescriptor extends TweakSet {
 14+
 15+ public FeatureSetSourceDescriptor() {
 16+ this(null, null);
 17+ }
 18+
 19+ public FeatureSetSourceDescriptor(String prefix, TweakSet parent) {
 20+ super(prefix, parent);
 21+ }
 22+
 23+
 24+ public String getAuthorityName() {
 25+ String name = getTweak("authority", null);
 26+ if (name==null) throw new RuntimeException("authority name not specified!");
 27+ return name;
 28+ }
 29+
 30+ public String getDataEncoding() {
 31+ return getTweak("encoding", "UTF-8");
 32+ }
 33+
 34+ public String getSqlQuery() {
 35+ return getTweak("query", null);
 36+ }
 37+
 38+ public String getSourceFileName() { //FIXME
 39+ return getTweak("file", null);
 40+ }
 41+
 42+
 43+ public void setBaseURL(URL baseURL) {
 44+ parameters.put(".baseURL", baseURL);
 45+ }
 46+
 47+ public URL getBaseURL() {
 48+ try {
 49+ URL u = getTweak(".baseURL", (URL)null);
 50+ if (u==null) u = new File(".").toURI().toURL();
 51+ return u;
 52+ } catch (MalformedURLException e) {
 53+ return null;
 54+ }
 55+ }
 56+
 57+
 58+ public String[] getDataFields() {
 59+ List<String> v = getTweak("fields", (List<String>)null);
 60+ if (v==null) return null;
 61+ return (String[]) v.toArray(new String[v.size()]);
 62+ }
 63+
 64+
 65+ public Map<String, Chunker> getDataFieldChunkers() { //FIXME: factory/parser!
 66+ return getTweak("foreign.chunkers", (Map<String, Chunker>)null);
 67+ }
 68+
 69+ public String getPropertyValueField() {
 70+ return getTweak("property-value-field", null);
 71+ }
 72+
 73+ public String getPropertyNameField() {
 74+ return getTweak("property-name-field", null);
 75+ }
 76+
 77+ public String getPropertySubjectField() {
 78+ return getTweak("property-subject-field", null);
 79+ }
 80+
 81+ public String getPropertySubjectNameField() {
 82+ return getTweak("property-subject-name-field", null);
 83+ }
 84+
 85+
 86+}
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/FeatureSets.java
@@ -1,7 +1,16 @@
22 package de.brightbyte.wikiword.integrator.data;
33
 4+import java.util.List;
 5+
 6+import de.brightbyte.abstraction.AbstractedAccessor;
 7+import de.brightbyte.abstraction.Abstractor;
 8+import de.brightbyte.abstraction.ConvertingAccessor;
 9+import de.brightbyte.abstraction.MultiMapAbstractor;
 10+import de.brightbyte.abstraction.PropertyAccessor;
 11+import de.brightbyte.data.Functor;
412 import de.brightbyte.data.LabeledVector;
513 import de.brightbyte.data.MapLabeledVector;
 14+import de.brightbyte.data.MultiMap;
615
716 public class FeatureSets {
817 public static FeatureSet merge(FeatureSet... sets) {
@@ -35,4 +44,26 @@
3645
3746 return c;
3847 }
 48+
 49+ public static final Abstractor<MultiMap<String, Object, List<Object>>> abstractor = new MultiMapAbstractor<Object, List<Object>>();
 50+
 51+ public static class FirstValue<V> implements Functor<V, List<Object>> {
 52+ public V apply(List<Object> obj) {
 53+ if (obj==null || obj.isEmpty()) return null;
 54+ Object v = obj.get(0);
 55+ return (V)v;
 56+ }
 57+ }
 58+
 59+ public static <V>PropertyAccessor<FeatureSet, V> fieldAccessor(String field, Class<V> type) {
 60+ if (field.startsWith("=")) { //HACK: force constant! //DOC
 61+ return (PropertyAccessor<FeatureSet, V>)(Object)new PropertyAccessor.Constant<String>(field.substring(1));
 62+ }
 63+
 64+ AbstractedAccessor<MultiMap<String, Object, List<Object>>, List<Object>> accessor =
 65+ new AbstractedAccessor<MultiMap<String, Object, List<Object>>, List<Object>>(field, abstractor);
 66+
 67+ return new ConvertingAccessor<FeatureSet, List<Object>, V>(accessor, new FirstValue<V>(), type);
 68+ }
 69+
3970 }
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/FeatureSetValueSplitter.java
@@ -36,6 +36,16 @@
3737 return m;
3838 }
3939
 40+ public static FeatureSetMultiMangler multiFromChunkerMap(Map<String, Chunker> splitters) {
 41+ FeatureSetMultiMangler m = new FeatureSetMultiMangler();
 42+
 43+ for (Map.Entry<String, Chunker>e: splitters.entrySet()) {
 44+ m.addMangler(new FeatureSetValueSplitter(e.getKey(), e.getValue()));
 45+ }
 46+
 47+ return m;
 48+ }
 49+
4050 public static FeatureSetMultiMangler multiFromStringMap(Map<String, String> splitters, int flags) {
4151 FeatureSetMultiMangler m = new FeatureSetMultiMangler();
4252
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/AssociationCursor.java
@@ -1,5 +1,7 @@
22 package de.brightbyte.wikiword.integrator.data;
33
 4+import java.util.Arrays;
 5+
46 import de.brightbyte.data.cursor.DataCursor;
57 import de.brightbyte.util.PersistenceException;
68
@@ -7,11 +9,15 @@
810
911 private DataCursor<FeatureSet> source;
1012
11 - protected String[] sourceFields;
12 - protected String[] targetFields;
13 - protected String[] propertyFields;
 13+ protected Iterable<String> sourceFields;
 14+ protected Iterable<String> targetFields;
 15+ protected Iterable<String> propertyFields;
1416
1517 public AssociationCursor(DataCursor<FeatureSet> source, String[] sourceFields, String[] targetFields, String[] propertyFields) {
 18+ this(source, Arrays.asList(sourceFields), Arrays.asList(targetFields), Arrays.asList(propertyFields));
 19+ }
 20+
 21+ public AssociationCursor(DataCursor<FeatureSet> source, Iterable<String> sourceFields, Iterable<String> targetFields, Iterable<String> propertyFields) {
1622 this.sourceFields = sourceFields;
1723 this.targetFields = targetFields;
1824 this.propertyFields = propertyFields;
@@ -32,11 +38,12 @@
3339 return new Association(source, target, props);
3440 }
3541
36 - protected FeatureSet newFeatureSet(FeatureSet row, String[] fields) {
 42+ protected FeatureSet newFeatureSet(FeatureSet row, Iterable<String> fields) {
3743 FeatureSet m = new DefaultFeatureSet();
3844
39 - for (int i=0; i<fields.length; i++) {
40 - m.putAll(fields[i], row.get(i));
 45+ int i = 0;
 46+ for (String f: fields) {
 47+ m.putAll(f, row.get(i++));
4148 }
4249
4350 return m;
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/BuildConceptAssociations.java
@@ -0,0 +1,98 @@
 2+package de.brightbyte.wikiword.integrator;
 3+
 4+import java.io.IOException;
 5+import java.util.Arrays;
 6+
 7+import de.brightbyte.data.cursor.DataCursor;
 8+import de.brightbyte.util.PersistenceException;
 9+import de.brightbyte.wikiword.integrator.data.Association;
 10+import de.brightbyte.wikiword.integrator.data.AssociationCursor;
 11+import de.brightbyte.wikiword.integrator.data.FeatureSet;
 12+import de.brightbyte.wikiword.integrator.processor.ConceptAssociationPassThrough;
 13+import de.brightbyte.wikiword.integrator.processor.ConceptAssociationProcessor;
 14+import de.brightbyte.wikiword.integrator.store.AssociationAsMappingStoreBuilder;
 15+import de.brightbyte.wikiword.integrator.store.AssociationFeatureStoreBuilder;
 16+import de.brightbyte.wikiword.integrator.store.DatabaseConceptMappingStoreBuilder;
 17+import de.brightbyte.wikiword.integrator.store.DatabaseConceptMappingStoreBuilder.Factory;
 18+import de.brightbyte.wikiword.store.WikiWordStoreFactory;
 19+
 20+/**
 21+ * This is the primary entry point to the first phase of a WikiWord analysis.
 22+ * ImportDump can be invoked as a standalone program, use --help as a
 23+ * command line parameter for usage information.
 24+ */
 25+public class BuildConceptAssociations extends AbstractIntegratorApp<AssociationFeatureStoreBuilder, ConceptAssociationProcessor, Association> {
 26+
 27+ @Override
 28+ protected WikiWordStoreFactory<? extends AssociationFeatureStoreBuilder> createConceptStoreFactory() throws IOException, PersistenceException {
 29+ Factory mappingStoreFactory= new DatabaseConceptMappingStoreBuilder.Factory(
 30+ getTargetTableName(),
 31+ getConfiguredDataset(),
 32+ getConfiguredDataSource(),
 33+ tweaks);
 34+
 35+ FeatureSetSourceDescriptor sourceDescriptor = getSourceDescriptor();
 36+
 37+ return new AssociationAsMappingStoreBuilder.Factory<DatabaseConceptMappingStoreBuilder>(
 38+ mappingStoreFactory,
 39+ sourceDescriptor.getTweak("authority-name", "=" + sourceDescriptor.getAuthorityName()),
 40+ sourceDescriptor.getTweak("foreign-id-field", (String)null),
 41+ sourceDescriptor.getTweak("foreign-name-field", (String)null),
 42+ sourceDescriptor.getTweak("concept-id-field", (String)null),
 43+ sourceDescriptor.getTweak("concept-name-field", (String)null),
 44+ sourceDescriptor.getTweak("association-via-field", (String)null),
 45+ sourceDescriptor.getTweak("association-weight-field", (String)null)
 46+ );
 47+ }
 48+
 49+ @Override
 50+ protected void run() throws Exception {
 51+ section("-- fetching properties --------------------------------------------------");
 52+ DataCursor<FeatureSet> fsc = openFeatureSetCursor();
 53+
 54+ Iterable<String> foreignFields = sourceDescriptor.getTweak("foreign-fields", (Iterable<String>)null);
 55+ Iterable<String> conceptFields = sourceDescriptor.getTweak("concept-fields", (Iterable<String>)null);
 56+ Iterable<String> propertyFields = sourceDescriptor.getTweak("property-fields", (Iterable<String>)null);
 57+
 58+ if (foreignFields==null) {
 59+ foreignFields = Arrays.asList(new String[] {
 60+ sourceDescriptor.getTweak("foreign-id-field", (String)null),
 61+ sourceDescriptor.getTweak("foreign-name-field", (String)null)
 62+ });
 63+ }
 64+
 65+ if (conceptFields==null) {
 66+ conceptFields = Arrays.asList(new String[] {
 67+ sourceDescriptor.getTweak("concept-id-field", (String)null),
 68+ sourceDescriptor.getTweak("concept-name-field", (String)null)
 69+ });
 70+ }
 71+
 72+ if (propertyFields==null) {
 73+ propertyFields = Arrays.asList(new String[] {
 74+ sourceDescriptor.getTweak("association-via-field", (String)null),
 75+ sourceDescriptor.getTweak("association-weight-field", (String)null)
 76+ });
 77+ }
 78+
 79+ DataCursor<Association> cursor =
 80+ new AssociationCursor(fsc,
 81+ sourceDescriptor.getTweak("foreign-fields", (Iterable<String>)null),
 82+ sourceDescriptor.getTweak("concept-fields", (Iterable<String>)null),
 83+ sourceDescriptor.getTweak("property-fields", (Iterable<String>)null) );
 84+
 85+ section("-- process properties --------------------------------------------------");
 86+ this.conceptStore.prepareImport();
 87+
 88+ this.propertyProcessor = new ConceptAssociationPassThrough(conceptStore); //FIXME
 89+ this.propertyProcessor.processAssociations(cursor);
 90+ cursor.close();
 91+
 92+ this.conceptStore.finalizeImport();
 93+ }
 94+
 95+ public static void main(String[] argv) throws Exception {
 96+ LoadForeignProperties app = new LoadForeignProperties();
 97+ app.launch(argv);
 98+ }
 99+}
\ No newline at end of file
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/store/MappingFeatureStoreBuilder.java
@@ -1,10 +0,0 @@
2 -package de.brightbyte.wikiword.integrator.store;
3 -
4 -import de.brightbyte.util.PersistenceException;
5 -import de.brightbyte.wikiword.integrator.data.FeatureSet;
6 -import de.brightbyte.wikiword.store.WikiWordConceptStoreBase;
7 -import de.brightbyte.wikiword.store.builder.WikiWordStoreBuilder;
8 -
9 -public interface MappingFeatureStoreBuilder extends WikiWordStoreBuilder, WikiWordConceptStoreBase {
10 - public void storeMapping(FeatureSet source, FeatureSet target, FeatureSet props) throws PersistenceException;
11 -}
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/store/AssociationAsMappingStoreBuilder.java
@@ -0,0 +1,222 @@
 2+package de.brightbyte.wikiword.integrator.store;
 3+
 4+import java.util.Map;
 5+
 6+import de.brightbyte.abstraction.PropertyAccessor;
 7+import de.brightbyte.application.Agenda;
 8+import de.brightbyte.io.Output;
 9+import de.brightbyte.util.PersistenceException;
 10+import de.brightbyte.wikiword.DatasetIdentifier;
 11+import de.brightbyte.wikiword.integrator.data.FeatureSet;
 12+import de.brightbyte.wikiword.integrator.data.FeatureSets;
 13+import de.brightbyte.wikiword.store.WikiWordStoreFactory;
 14+
 15+public class AssociationAsMappingStoreBuilder implements
 16+ AssociationFeatureStoreBuilder {
 17+
 18+ public static class Factory<F extends ConceptMappingStoreBuilder> implements WikiWordStoreFactory<AssociationAsMappingStoreBuilder> {
 19+ protected WikiWordStoreFactory<F> mappingStoreFactory;
 20+ protected PropertyAccessor<FeatureSet, String> authorityAccessor;
 21+ protected PropertyAccessor<FeatureSet, String> externalIdAccessor;
 22+ protected PropertyAccessor<FeatureSet, String> externalNameAccessor;
 23+ protected PropertyAccessor<FeatureSet, Integer> conceptIdAccessor;
 24+ protected PropertyAccessor<FeatureSet, String> conceptNameAccessor;
 25+ protected PropertyAccessor<FeatureSet, String> associationViaAccessor;
 26+ protected PropertyAccessor<FeatureSet, Double> associationWeightAccessor;
 27+
 28+ public Factory(
 29+ WikiWordStoreFactory<F> mappingStoreFactory,
 30+ String authorityField,
 31+ String externalIdField,
 32+ String externalNameField,
 33+ String conceptIdField,
 34+ String conceptNameField,
 35+ String associationViaField,
 36+ String associationWeightField) {
 37+
 38+ this(mappingStoreFactory,
 39+ FeatureSets.fieldAccessor(authorityField, String.class),
 40+ FeatureSets.fieldAccessor(externalIdField, String.class),
 41+ externalNameField==null ? null : FeatureSets.fieldAccessor(externalNameField, String.class),
 42+ FeatureSets.fieldAccessor(conceptIdField, Integer.class),
 43+ conceptNameField==null ? null : FeatureSets.fieldAccessor(conceptNameField, String.class),
 44+ associationViaField==null ? null : FeatureSets.fieldAccessor(associationViaField, String.class),
 45+ associationWeightField==null ? null : FeatureSets.fieldAccessor(associationWeightField, Double.class)
 46+ );
 47+ }
 48+
 49+ public Factory(
 50+ WikiWordStoreFactory<F> mappingStoreFactory,
 51+ PropertyAccessor<FeatureSet, String> authorityAccessor,
 52+ PropertyAccessor<FeatureSet, String> externalIdAccessor,
 53+ PropertyAccessor<FeatureSet, String> externalNameAccessor,
 54+ PropertyAccessor<FeatureSet, Integer> conceptIdAccessor,
 55+ PropertyAccessor<FeatureSet, String> conceptNameAccessor,
 56+ PropertyAccessor<FeatureSet, String> associationViaAccessor,
 57+ PropertyAccessor<FeatureSet, Double> associationWeightAccessor) {
 58+
 59+ this.mappingStoreFactory = mappingStoreFactory;
 60+ this.authorityAccessor = authorityAccessor;
 61+ this.externalIdAccessor = externalIdAccessor;
 62+ this.externalNameAccessor = externalNameAccessor;
 63+ this.conceptIdAccessor = conceptIdAccessor;
 64+ this.conceptNameAccessor = conceptNameAccessor;
 65+ this.associationViaAccessor = associationViaAccessor;
 66+ this.associationWeightAccessor = associationWeightAccessor;
 67+ }
 68+
 69+ @SuppressWarnings("unchecked")
 70+ public AssociationAsMappingStoreBuilder newStore() throws PersistenceException {
 71+ ConceptMappingStoreBuilder store = mappingStoreFactory.newStore();
 72+
 73+ return new AssociationAsMappingStoreBuilder(
 74+ store,
 75+ authorityAccessor,
 76+ externalIdAccessor,
 77+ externalNameAccessor,
 78+ conceptIdAccessor,
 79+ conceptNameAccessor,
 80+ associationViaAccessor,
 81+ associationWeightAccessor);
 82+ }
 83+ }
 84+
 85+
 86+ protected ConceptMappingStoreBuilder store;
 87+ protected PropertyAccessor<FeatureSet, String> authorityAccessor;
 88+ protected PropertyAccessor<FeatureSet, String> externalIdAccessor;
 89+ protected PropertyAccessor<FeatureSet, String> externalNameAccessor;
 90+ protected PropertyAccessor<FeatureSet, Integer> conceptIdAccessor;
 91+ protected PropertyAccessor<FeatureSet, String> conceptNameAccessor;
 92+ protected PropertyAccessor<FeatureSet, String> associationViaAccessor;
 93+ protected PropertyAccessor<FeatureSet, Double> associationWeightAccessor;
 94+
 95+ public AssociationAsMappingStoreBuilder(
 96+ ConceptMappingStoreBuilder store,
 97+ String authorityField,
 98+ String externalIdField,
 99+ String externalNameField,
 100+ String conceptIdField,
 101+ String conceptNameField,
 102+ String associationViaField,
 103+ String associationWeightField) {
 104+
 105+ this(store,
 106+ FeatureSets.fieldAccessor(authorityField, String.class),
 107+ FeatureSets.fieldAccessor(externalIdField, String.class),
 108+ externalNameField==null ? null : FeatureSets.fieldAccessor(externalNameField, String.class),
 109+ FeatureSets.fieldAccessor(conceptIdField, Integer.class),
 110+ conceptNameField==null ? null : FeatureSets.fieldAccessor(conceptNameField, String.class),
 111+ associationViaField==null ? null : FeatureSets.fieldAccessor(associationViaField, String.class),
 112+ associationWeightField==null ? null : FeatureSets.fieldAccessor(associationWeightField, Double.class)
 113+ );
 114+ }
 115+
 116+ public AssociationAsMappingStoreBuilder(
 117+ ConceptMappingStoreBuilder store,
 118+ PropertyAccessor<FeatureSet, String> authorityAccessor,
 119+ PropertyAccessor<FeatureSet, String> externalIdAccessor,
 120+ PropertyAccessor<FeatureSet, String> externalNameAccessor,
 121+ PropertyAccessor<FeatureSet, Integer> conceptIdAccessor,
 122+ PropertyAccessor<FeatureSet, String> conceptNameAccessor,
 123+ PropertyAccessor<FeatureSet, String> associationViaAccessor,
 124+ PropertyAccessor<FeatureSet, Double> associationWeightAccessor) {
 125+
 126+ super();
 127+ this.store = store;
 128+ this.authorityAccessor = authorityAccessor;
 129+ this.externalIdAccessor = externalIdAccessor;
 130+ this.externalNameAccessor = externalNameAccessor;
 131+ this.conceptIdAccessor = conceptIdAccessor;
 132+ this.conceptNameAccessor = conceptNameAccessor;
 133+ this.associationViaAccessor = associationViaAccessor;
 134+ this.associationWeightAccessor = associationWeightAccessor;
 135+ }
 136+
 137+ public void storeMapping(FeatureSet foreign, FeatureSet concept, FeatureSet props) throws PersistenceException {
 138+ String authority = authorityAccessor.getValue(foreign);
 139+ String extId = externalIdAccessor.getValue(foreign);
 140+ String extName = externalNameAccessor.getValue(foreign);
 141+ int conceptId = conceptIdAccessor.getValue(concept);
 142+ String name = conceptNameAccessor.getValue(concept);
 143+ String via = associationViaAccessor.getValue(concept);
 144+ double weight = associationWeightAccessor.getValue(concept);
 145+
 146+ authorityAccessor.getValue(foreign);
 147+ store.storeMapping( authority, extId, extName, conceptId, name, via, weight);
 148+ }
 149+
 150+ public void checkConsistency() throws PersistenceException {
 151+ store.checkConsistency();
 152+ }
 153+
 154+ public void close(boolean flush) throws PersistenceException {
 155+ store.close(flush);
 156+ }
 157+
 158+ public Agenda createAgenda() throws PersistenceException {
 159+ return store.createAgenda();
 160+ }
 161+
 162+ public void dumpTableStats(Output out) throws PersistenceException {
 163+ store.dumpTableStats(out);
 164+ }
 165+
 166+ public void finalizeImport() throws PersistenceException {
 167+ store.finalizeImport();
 168+ }
 169+
 170+ public void flush() throws PersistenceException {
 171+ store.flush();
 172+ }
 173+
 174+ public Agenda getAgenda() throws PersistenceException {
 175+ return store.getAgenda();
 176+ }
 177+
 178+ public DatasetIdentifier getDatasetIdentifier() {
 179+ return store.getDatasetIdentifier();
 180+ }
 181+
 182+ public int getNumberOfWarnings() throws PersistenceException {
 183+ return store.getNumberOfWarnings();
 184+ }
 185+
 186+ public Map<String, ? extends Number> getTableStats() throws PersistenceException {
 187+ return store.getTableStats();
 188+ }
 189+
 190+ public void initialize(boolean purge, boolean dropAll) throws PersistenceException {
 191+ store.initialize(purge, dropAll);
 192+ }
 193+
 194+ public boolean isComplete() throws PersistenceException {
 195+ return store.isComplete();
 196+ }
 197+
 198+ public void open() throws PersistenceException {
 199+ store.open();
 200+ }
 201+
 202+ public void optimize() throws PersistenceException {
 203+ store.optimize();
 204+ }
 205+
 206+ public void prepareImport() throws PersistenceException {
 207+ store.prepareImport();
 208+ }
 209+
 210+ public void setLogLevel(int loglevel) {
 211+ store.setLogLevel(loglevel);
 212+ }
 213+
 214+ public void storeMapping(String authority, String extId, String extName, int concept, String name, String via, double weight) throws PersistenceException {
 215+ store.storeMapping(authority, extId, extName, concept, name, via, weight);
 216+ }
 217+
 218+ public void storeWarning(int rcId, String problem, String details) throws PersistenceException {
 219+ store.storeWarning(rcId, problem, details);
 220+ }
 221+
 222+
 223+}
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/store/DatabaseConceptMappingStoreBuilder.java
@@ -3,21 +3,49 @@
44 import java.sql.Connection;
55 import java.sql.SQLException;
66
 7+import javax.sql.DataSource;
 8+
79 import de.brightbyte.application.Agenda;
810 import de.brightbyte.db.Inserter;
911 import de.brightbyte.db.RelationTable;
1012 import de.brightbyte.util.PersistenceException;
1113 import de.brightbyte.wikiword.Corpus;
 14+import de.brightbyte.wikiword.DatasetIdentifier;
1215 import de.brightbyte.wikiword.TweakSet;
 16+import de.brightbyte.wikiword.store.WikiWordStoreFactory;
1317 import de.brightbyte.wikiword.store.builder.DatabaseWikiWordStoreBuilder;
1418
1519 public class DatabaseConceptMappingStoreBuilder extends DatabaseWikiWordStoreBuilder implements ConceptMappingStoreBuilder {
1620
 21+ public static class Factory implements WikiWordStoreFactory<DatabaseConceptMappingStoreBuilder> {
 22+ private String table;
 23+ private DataSource db;
 24+ private DatasetIdentifier dataset;
 25+ private TweakSet tweaks;
 26+
 27+ public Factory(String table, DatasetIdentifier dataset, DataSource db, TweakSet tweaks) {
 28+ super();
 29+ this.table = table;
 30+ this.db = db;
 31+ this.dataset = dataset;
 32+ this.tweaks = tweaks;
 33+ }
 34+
 35+ @SuppressWarnings("unchecked")
 36+ public DatabaseConceptMappingStoreBuilder newStore() throws PersistenceException {
 37+ try {
 38+ return new DatabaseConceptMappingStoreBuilder(table, dataset, db.getConnection(), tweaks);
 39+ } catch (SQLException e) {
 40+ throw new PersistenceException(e);
 41+ }
 42+ }
 43+ }
 44+
1745 protected RelationTable mappingTable;
1846 protected Inserter mappingInserter;
1947 protected IntegratorSchema integratorSchema;
2048
21 - public DatabaseConceptMappingStoreBuilder(String table, Corpus corpus, Connection connection, TweakSet tweaks) throws SQLException, PersistenceException {
 49+ public DatabaseConceptMappingStoreBuilder(String table, DatasetIdentifier corpus, Connection connection, TweakSet tweaks) throws SQLException, PersistenceException {
2250 this(table, new IntegratorSchema(corpus, connection, tweaks, true), tweaks, null);
2351 }
2452
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/store/AssociationFeatureStoreBuilder.java
@@ -0,0 +1,10 @@
 2+package de.brightbyte.wikiword.integrator.store;
 3+
 4+import de.brightbyte.util.PersistenceException;
 5+import de.brightbyte.wikiword.integrator.data.FeatureSet;
 6+import de.brightbyte.wikiword.store.WikiWordConceptStoreBase;
 7+import de.brightbyte.wikiword.store.builder.WikiWordStoreBuilder;
 8+
 9+public interface AssociationFeatureStoreBuilder extends WikiWordStoreBuilder, WikiWordConceptStoreBase {
 10+ public void storeMapping(FeatureSet foreign, FeatureSet concept, FeatureSet props) throws PersistenceException;
 11+}
Property changes on: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/store/AssociationFeatureStoreBuilder.java
___________________________________________________________________
Added: svn:mergeinfo
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/processor/OptimalMappingSelector.java
@@ -11,25 +11,25 @@
1212 import de.brightbyte.util.PersistenceException;
1313 import de.brightbyte.wikiword.integrator.data.FeatureSet;
1414 import de.brightbyte.wikiword.integrator.data.MappingCandidates;
15 -import de.brightbyte.wikiword.integrator.store.MappingFeatureStoreBuilder;
 15+import de.brightbyte.wikiword.integrator.store.AssociationFeatureStoreBuilder;
1616
1717 public class OptimalMappingSelector extends ConceptMappingPassThrough {
1818
1919 protected Optimum<FeatureSet> optimum;
2020
21 - public OptimalMappingSelector(MappingFeatureStoreBuilder store, String property, Functor<Number, ? extends Collection<Number>> aggregator) {
 21+ public OptimalMappingSelector(AssociationFeatureStoreBuilder store, String property, Functor<Number, ? extends Collection<Number>> aggregator) {
2222 this(store, (Comparator<FeatureSet>)(Object)PropertyComparator.newMultiMapEntryComparator(property, (Comparator<Number>)(Object)NaturalComparator.instance, aggregator, Number.class));
2323 }
2424
25 - public OptimalMappingSelector(MappingFeatureStoreBuilder store, PropertyAccessor<FeatureSet, Number> accessor) {
 25+ public OptimalMappingSelector(AssociationFeatureStoreBuilder store, PropertyAccessor<FeatureSet, Number> accessor) {
2626 this(store, new Optimum<FeatureSet>(accessor));
2727 }
2828
29 - public OptimalMappingSelector(MappingFeatureStoreBuilder store, Comparator<FeatureSet> comp) {
 29+ public OptimalMappingSelector(AssociationFeatureStoreBuilder store, Comparator<FeatureSet> comp) {
3030 this(store, new Optimum<FeatureSet>(comp));
3131 }
3232
33 - public OptimalMappingSelector(MappingFeatureStoreBuilder store, Optimum<FeatureSet> optimum) {
 33+ public OptimalMappingSelector(AssociationFeatureStoreBuilder store, Optimum<FeatureSet> optimum) {
3434 super(store);
3535
3636 if (optimum==null) throw new NullPointerException();
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/processor/ConceptMappingProcessor.java
@@ -4,6 +4,6 @@
55 import de.brightbyte.util.PersistenceException;
66 import de.brightbyte.wikiword.integrator.data.MappingCandidates;
77
8 -public interface ConceptMappingProcessor {
 8+public interface ConceptMappingProcessor extends WikiWordProcessor {
99 public void processMappings(DataCursor<MappingCandidates> cursor) throws PersistenceException;
1010 }
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/processor/ConceptAssociationProcessor.java
@@ -0,0 +1,9 @@
 2+package de.brightbyte.wikiword.integrator.processor;
 3+
 4+import de.brightbyte.data.cursor.DataCursor;
 5+import de.brightbyte.util.PersistenceException;
 6+import de.brightbyte.wikiword.integrator.data.Association;
 7+
 8+public interface ConceptAssociationProcessor extends WikiWordProcessor {
 9+ public void processAssociations(DataCursor<Association> cursor) throws PersistenceException;
 10+}
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/processor/AbstractConceptAssociationProcessor.java
@@ -0,0 +1,19 @@
 2+package de.brightbyte.wikiword.integrator.processor;
 3+
 4+import de.brightbyte.data.cursor.DataCursor;
 5+import de.brightbyte.util.PersistenceException;
 6+import de.brightbyte.wikiword.integrator.data.Association;
 7+
 8+public abstract class AbstractConceptAssociationProcessor extends AbstractProcessor<Association> implements ConceptAssociationProcessor {
 9+
 10+ public void processAssociations(DataCursor<Association> cursor) throws PersistenceException {
 11+ process(cursor);
 12+ }
 13+
 14+ protected void processEntry(Association e) throws PersistenceException {
 15+ processAssociation(e);
 16+ }
 17+
 18+ protected abstract void processAssociation(Association m) throws PersistenceException;
 19+
 20+}
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/processor/ForeignPropertyProcessor.java
@@ -4,6 +4,6 @@
55 import de.brightbyte.util.PersistenceException;
66 import de.brightbyte.wikiword.integrator.data.ForeignEntity;
77
8 -public interface ForeignPropertyProcessor {
 8+public interface ForeignPropertyProcessor extends WikiWordProcessor {
99 public void processProperties(DataCursor<ForeignEntity> cursor) throws PersistenceException;
1010 }
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/processor/ConceptMappingPassThrough.java
@@ -3,12 +3,12 @@
44 import de.brightbyte.util.PersistenceException;
55 import de.brightbyte.wikiword.integrator.data.FeatureSet;
66 import de.brightbyte.wikiword.integrator.data.MappingCandidates;
7 -import de.brightbyte.wikiword.integrator.store.MappingFeatureStoreBuilder;
 7+import de.brightbyte.wikiword.integrator.store.AssociationFeatureStoreBuilder;
88
99 public class ConceptMappingPassThrough extends AbstractConceptMappingProcessor {
10 - protected MappingFeatureStoreBuilder store;
 10+ protected AssociationFeatureStoreBuilder store;
1111
12 - public ConceptMappingPassThrough(MappingFeatureStoreBuilder store) {
 12+ public ConceptMappingPassThrough(AssociationFeatureStoreBuilder store) {
1313 if (store==null) throw new NullPointerException();
1414 this.store = store;
1515 }
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/processor/WikiWordProcessor.java
@@ -0,0 +1,5 @@
 2+package de.brightbyte.wikiword.integrator.processor;
 3+
 4+public interface WikiWordProcessor {
 5+
 6+}
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/processor/ConceptAssociationPassThrough.java
@@ -0,0 +1,19 @@
 2+package de.brightbyte.wikiword.integrator.processor;
 3+
 4+import de.brightbyte.util.PersistenceException;
 5+import de.brightbyte.wikiword.integrator.data.Association;
 6+import de.brightbyte.wikiword.integrator.store.AssociationFeatureStoreBuilder;
 7+
 8+public class ConceptAssociationPassThrough extends AbstractConceptAssociationProcessor {
 9+ protected AssociationFeatureStoreBuilder store;
 10+
 11+ public ConceptAssociationPassThrough(AssociationFeatureStoreBuilder store) {
 12+ if (store==null) throw new NullPointerException();
 13+ this.store = store;
 14+ }
 15+
 16+ protected void processAssociation(Association m) throws PersistenceException {
 17+ store.storeMapping(m.getSourceItem(), m.getTargetItem(), m.getProperties());
 18+ }
 19+
 20+}
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/AbstractIntegratorApp.java
@@ -0,0 +1,152 @@
 2+package de.brightbyte.wikiword.integrator;
 3+
 4+import java.io.IOException;
 5+import java.io.InputStream;
 6+import java.sql.Connection;
 7+import java.sql.ResultSet;
 8+import java.sql.SQLException;
 9+import java.util.Arrays;
 10+import java.util.Collection;
 11+import java.util.Map;
 12+import java.util.regex.Pattern;
 13+
 14+import de.brightbyte.data.Functor;
 15+import de.brightbyte.data.cursor.DataCursor;
 16+import de.brightbyte.db.SqlScriptRunner;
 17+import de.brightbyte.io.IOUtil;
 18+import de.brightbyte.text.Chunker;
 19+import de.brightbyte.util.PersistenceException;
 20+import de.brightbyte.wikiword.StoreBackedApp;
 21+import de.brightbyte.wikiword.builder.InputFileHelper;
 22+import de.brightbyte.wikiword.integrator.data.AssemblingFeatureSetCursor;
 23+import de.brightbyte.wikiword.integrator.data.FeatureSet;
 24+import de.brightbyte.wikiword.integrator.data.FeatureSetValueSplitter;
 25+import de.brightbyte.wikiword.integrator.data.MangelingFeatureSetCursor;
 26+import de.brightbyte.wikiword.integrator.data.ResultSetFeatureSetCursor;
 27+import de.brightbyte.wikiword.integrator.data.TsvFeatureSetCursor;
 28+import de.brightbyte.wikiword.integrator.processor.WikiWordProcessor;
 29+import de.brightbyte.wikiword.store.WikiWordConceptStoreBase;
 30+
 31+/**
 32+ * This is the primary entry point to the first phase of a WikiWord analysis.
 33+ * ImportDump can be invoked as a standalone program, use --help as a
 34+ * command line parameter for usage information.
 35+ */
 36+public abstract class AbstractIntegratorApp<S extends WikiWordConceptStoreBase, P extends WikiWordProcessor, E> extends StoreBackedApp<S> {
 37+
 38+ //protected ForeignPropertyStoreBuilder propertyStore;
 39+ protected InputFileHelper inputHelper;
 40+ protected P propertyProcessor;
 41+ protected FeatureSetSourceDescriptor sourceDescriptor;
 42+
 43+ public AbstractIntegratorApp() {
 44+ super(true, true);
 45+ }
 46+
 47+ protected InputFileHelper getInputHelper() {
 48+ if (inputHelper==null) {
 49+ inputHelper = new InputFileHelper(tweaks);
 50+ }
 51+ return inputHelper;
 52+ }
 53+
 54+ protected String getTargetTableName() throws IOException {
 55+ if (args.getParameterCount() > 2) return args.getParameter(2);
 56+
 57+ String authority = getSourceDescriptor().getAuthorityName();
 58+ authority = authority.replaceAll("[^\\w\\d]", "_").toLowerCase();
 59+
 60+ return authority+"_property";
 61+ }
 62+
 63+ protected String getSourceDescriptionFileName() {
 64+ if (args.getParameterCount() < 2) throw new IllegalArgumentException("missing second parameter (descripion file name)");
 65+ return args.getParameter(1);
 66+ }
 67+
 68+ @Override
 69+ protected void declareOptions() {
 70+ super.declareOptions();
 71+
 72+ args.declareHelp("<wiki>", null);
 73+ args.declareHelp("<dataset>", "name of the wiki/thesaurus to process");
 74+ args.declare("dataset", null, true, String.class, "sets the wiki name (overrides the <wiki-or-dump> parameter)");
 75+ }
 76+
 77+ protected DataCursor<FeatureSet> openFeatureSetCursor() throws IOException, SQLException, PersistenceException {
 78+ FeatureSetSourceDescriptor sourceDescriptor = getSourceDescriptor();
 79+
 80+ String enc = sourceDescriptor.getDataEncoding();
 81+ String sql = sourceDescriptor.getSqlQuery();
 82+ InputStream in = null;
 83+
 84+ if (sql==null) {
 85+ String n = sourceDescriptor.getSourceFileName();
 86+ String format = getInputHelper().getFormat(n); //FIXME: explicit format!
 87+ in = getInputHelper().open(sourceDescriptor.getBaseURL(), n);
 88+
 89+ if (format!=null && format.equals("sql")) {
 90+ sql = IOUtil.slurp(in, enc);
 91+
 92+ in.close();
 93+ in = null;
 94+ }
 95+ }
 96+
 97+ DataCursor<FeatureSet> fsc;
 98+ String[] fields = sourceDescriptor.getDataFields();
 99+
 100+ if (sql!=null) {
 101+ Collection<Functor<String, String>> manglers = Arrays.asList(getSqlScriptManglers());
 102+ Connection con = getConfiguredDataSource().getConnection();
 103+ ResultSet rs = SqlScriptRunner.runQuery(con, sql, manglers);
 104+
 105+ fsc = new ResultSetFeatureSetCursor(rs, fields);
 106+ } else {
 107+ fsc = new TsvFeatureSetCursor(in, enc);
 108+
 109+ if (fields!=null) ((TsvFeatureSetCursor)fsc).setFields(fields);
 110+ else ((TsvFeatureSetCursor)fsc).readFields();
 111+ }
 112+
 113+ String propField = sourceDescriptor.getPropertyNameField();
 114+ if (propField!=null) {
 115+ String valueField = sourceDescriptor.getPropertyValueField();
 116+ String subjectField = sourceDescriptor.getPropertySubjectField();
 117+ fsc = new AssemblingFeatureSetCursor(fsc, subjectField, propField, valueField);
 118+ }
 119+
 120+ Map<String, Chunker> splitters = sourceDescriptor.getDataFieldChunkers();
 121+ if (splitters!=null) {
 122+ fsc = new MangelingFeatureSetCursor(fsc, FeatureSetValueSplitter.multiFromChunkerMap(splitters));
 123+ }
 124+
 125+ return fsc;
 126+ }
 127+
 128+ protected FeatureSetSourceDescriptor getSourceDescriptor() throws IOException {
 129+ if (sourceDescriptor!=null) return sourceDescriptor;
 130+
 131+ sourceDescriptor = new FeatureSetSourceDescriptor("source", tweaks);
 132+
 133+ String n = getSourceDescriptionFileName();
 134+ InputStream in = getInputHelper().open(n);
 135+ sourceDescriptor.setBaseURL(getInputHelper().getBaseURL(n));
 136+ sourceDescriptor.loadTweaks(in);
 137+ in.close();
 138+
 139+ sourceDescriptor.setTweaks(System.getProperties(), "wikiword.source."); //XXX: doc
 140+ sourceDescriptor.setTweaks(args, "source."); //XXX: doc
 141+
 142+ return sourceDescriptor;
 143+ }
 144+
 145+ @SuppressWarnings("unchecked")
 146+ protected Functor<String, String>[] getSqlScriptManglers() {
 147+ return new Functor[] {
 148+ new SqlScriptRunner.RegularExpressionMangler(Pattern.compile("/\\* *wikiword_prefix* \\*/"), getConfiguredDataset().getDbPrefix()),
 149+ new SqlScriptRunner.RegularExpressionMangler(Pattern.compile("/\\* *wikiword_db* \\*/"), getConfiguredDatasetName()),
 150+ };
 151+ }
 152+
 153+}
\ No newline at end of file
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/TweakSet.java
@@ -21,14 +21,17 @@
2222 */
2323 public class TweakSet {
2424 protected Map<String, Object> parameters = new HashMap<String, Object>();
25 - protected TweakSet parent;
2625
 26+ private TweakSet parent;
 27+ private String prefix;
 28+
2729 public TweakSet() {
28 - this(null);
 30+ this(null, null);
2931 }
3032
31 - public TweakSet(TweakSet parent) {
 33+ public TweakSet(String prefix, TweakSet parent) {
3234 this.parent = parent;
 35+ this.prefix = prefix;
3336 }
3437
3538 public void loadTweaks(File f) throws IOException {
@@ -97,9 +100,10 @@
98101 public <T>T getTweak(String key, T def) {
99102 if (!parameters.containsKey(key)) {
100103 if (parent==null) return def;
101 - else return parent.getTweak(key, def);
 104+ else return parent.getTweak(prefix==null ? key : prefix + key, def);
102105 } else {
103106 return (T)parameters.get(key);
104107 }
105108 }
 109+
106110 }

Status & tagging log