Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/InputFileHelper.java |
— | — | @@ -32,7 +32,7 @@ |
33 | 33 | externalBunzip = bz2; |
34 | 34 | } |
35 | 35 | |
36 | | - protected static final Pattern extensionPattern = Pattern.compile("\\.([^./\\]+)(\\.gz|\\.bz2)$", Pattern.CASE_INSENSITIVE); |
| 36 | + protected static final Pattern extensionPattern = Pattern.compile("\\.([^./\\\\]+)(\\.gz|\\.bz2)$", Pattern.CASE_INSENSITIVE); |
37 | 37 | |
38 | 38 | public String getFormat(String n) { |
39 | 39 | Matcher m = extensionPattern.matcher(n); |
— | — | @@ -41,17 +41,48 @@ |
42 | 42 | else return m.group(1).toLowerCase(); |
43 | 43 | } |
44 | 44 | |
| 45 | + public URL getBaseURL(String n) { |
| 46 | + if (n.equals("-")) n = new File(".").getAbsolutePath(); |
| 47 | + |
| 48 | + try { |
| 49 | + URL u = new URL(n); |
| 50 | + return u; |
| 51 | + } catch (MalformedURLException e) { |
| 52 | + //ignore and continue |
| 53 | + } |
| 54 | + |
| 55 | + try { |
| 56 | + File f = new File(n); |
| 57 | + return f.toURI().toURL(); |
| 58 | + } catch (MalformedURLException e) { |
| 59 | + throw new IllegalArgumentException("failed to convert file name to URL: "+n); |
| 60 | + } |
| 61 | + } |
| 62 | + |
45 | 63 | public InputStream open(String n) throws IOException { |
| 64 | + return open(null, n); |
| 65 | + } |
| 66 | + |
| 67 | + public InputStream open(URL base, String n) throws IOException { |
46 | 68 | if (n.equals("-")) return new BufferedInputStream(System.in); |
47 | 69 | |
48 | 70 | try { |
49 | | - URL u = new URL(n); |
| 71 | + URL u = base == null || base.getProtocol().equals("file") ? new URL(n) : new URL(base, n); |
50 | 72 | return openURL(u); |
51 | 73 | } catch (MalformedURLException e) { |
52 | 74 | //ignore and continue |
53 | 75 | } |
54 | 76 | |
55 | | - File f = new File(n); |
| 77 | + File f; |
| 78 | + |
| 79 | + if (base!=null && base.getProtocol().equals("file")) { |
| 80 | + File b = new File(base.getPath()); |
| 81 | + if (b.isFile()) b = b.getParentFile(); |
| 82 | + f = new File(b, n); |
| 83 | + } else { |
| 84 | + f = new File(n); |
| 85 | + } |
| 86 | + |
56 | 87 | return openFile(f); |
57 | 88 | } |
58 | 89 | |
— | — | @@ -155,6 +186,6 @@ |
156 | 187 | slurper.start(); |
157 | 188 | |
158 | 189 | return new BufferedInputStream(proc.getInputStream()); |
159 | | - } |
| 190 | + } |
160 | 191 | |
161 | 192 | } |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/ForeignEntityStoreDescriptor.java |
— | — | @@ -1,79 +0,0 @@ |
2 | | -package de.brightbyte.wikiword.integrator; |
3 | | - |
4 | | -import java.io.File; |
5 | | -import java.net.MalformedURLException; |
6 | | -import java.net.URL; |
7 | | -import java.util.List; |
8 | | -import java.util.Map; |
9 | | - |
10 | | -import de.brightbyte.wikiword.TweakSet; |
11 | | - |
12 | | -public class ForeignEntityStoreDescriptor extends TweakSet { |
13 | | - |
14 | | - public ForeignEntityStoreDescriptor() { |
15 | | - super(); |
16 | | - } |
17 | | - |
18 | | - public ForeignEntityStoreDescriptor(TweakSet parent) { |
19 | | - super(parent); |
20 | | - } |
21 | | - |
22 | | - public String getDataEncoding() { |
23 | | - return getTweak("foreign.encoding", "UTF-8"); |
24 | | - } |
25 | | - |
26 | | - public String getSqlQuery() { |
27 | | - return getTweak("foreign.query", null); |
28 | | - } |
29 | | - |
30 | | - public String getSourceFileName() { //FIXME |
31 | | - return getTweak("foreign.file", null); |
32 | | - } |
33 | | - |
34 | | - public String[] getDataFields() { |
35 | | - List<String> v = getTweak("foreign.fields", (List<String>)null); |
36 | | - if (v==null) return null; |
37 | | - return (String[]) v.toArray(new String[v.size()]); |
38 | | - } |
39 | | - |
40 | | - public Map<String, String> getSplitExpressions() { //FIXME:! |
41 | | - return getTweak("split", (Map<String, String>)null); |
42 | | - } |
43 | | - |
44 | | - public String getPropertyValueField() { |
45 | | - return getTweak("foreign.property-value-field", null); |
46 | | - } |
47 | | - |
48 | | - public String getPropertyNameField() { |
49 | | - return getTweak("foreign.property-name-field", null); |
50 | | - } |
51 | | - |
52 | | - public String getConceptIdField() { |
53 | | - return getTweak("foreign.concept-id-field", null); |
54 | | - } |
55 | | - |
56 | | - public String getConceptNameField() { |
57 | | - return getTweak("foreign.concept-name-field", "name"); |
58 | | - } |
59 | | - |
60 | | - public String getAuthorityName() { |
61 | | - String name = getTweak("foreign.authority", null); |
62 | | - if (name==null) throw new RuntimeException("authority name not specified!"); |
63 | | - return name; |
64 | | - } |
65 | | - |
66 | | - public void setBaseURL(URL baseURL) { |
67 | | - parameters.put(".baseURL", baseURL); |
68 | | - } |
69 | | - |
70 | | - public URL getBaseURL() { |
71 | | - try { |
72 | | - URL u = getTweak(".baseURL", (URL)null); |
73 | | - if (u==null) u = new File(".").toURI().toURL(); |
74 | | - return u; |
75 | | - } catch (MalformedURLException e) { |
76 | | - return null; |
77 | | - } |
78 | | - } |
79 | | - |
80 | | -} |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/LoadForeignProperties.java |
— | — | @@ -1,30 +1,12 @@ |
2 | 2 | package de.brightbyte.wikiword.integrator; |
3 | 3 | |
4 | 4 | import java.io.IOException; |
5 | | -import java.io.InputStream; |
6 | | -import java.sql.Connection; |
7 | | -import java.sql.ResultSet; |
8 | | -import java.sql.SQLException; |
9 | | -import java.util.Arrays; |
10 | | -import java.util.Collection; |
11 | | -import java.util.Map; |
12 | | -import java.util.regex.Pattern; |
13 | 5 | |
14 | | -import de.brightbyte.data.Functor; |
15 | 6 | import de.brightbyte.data.cursor.DataCursor; |
16 | | -import de.brightbyte.db.SqlScriptRunner; |
17 | | -import de.brightbyte.io.IOUtil; |
18 | 7 | import de.brightbyte.util.PersistenceException; |
19 | | -import de.brightbyte.wikiword.StoreBackedApp; |
20 | | -import de.brightbyte.wikiword.builder.InputFileHelper; |
21 | | -import de.brightbyte.wikiword.integrator.data.AssemblingFeatureSetCursor; |
22 | 8 | import de.brightbyte.wikiword.integrator.data.FeatureSet; |
23 | | -import de.brightbyte.wikiword.integrator.data.FeatureSetValueSplitter; |
24 | 9 | import de.brightbyte.wikiword.integrator.data.ForeignEntity; |
25 | 10 | import de.brightbyte.wikiword.integrator.data.ForeignEntityCursor; |
26 | | -import de.brightbyte.wikiword.integrator.data.MangelingFeatureSetCursor; |
27 | | -import de.brightbyte.wikiword.integrator.data.ResultSetFeatureSetCursor; |
28 | | -import de.brightbyte.wikiword.integrator.data.TsvFeatureSetCursor; |
29 | 11 | import de.brightbyte.wikiword.integrator.processor.ForeignPropertyPassThrough; |
30 | 12 | import de.brightbyte.wikiword.integrator.processor.ForeignPropertyProcessor; |
31 | 13 | import de.brightbyte.wikiword.integrator.store.DatabaseForeignPropertyStoreBuilder; |
— | — | @@ -36,56 +18,18 @@ |
37 | 19 | * ImportDump can be invoked as a standalone program, use --help as a |
38 | 20 | * command line parameter for usage information. |
39 | 21 | */ |
40 | | -public class LoadForeignProperties extends StoreBackedApp<ForeignPropertyStoreBuilder> { |
41 | | - |
42 | | - //protected ForeignPropertyStoreBuilder propertyStore; |
43 | | - protected ForeignPropertyProcessor propertyProcessor; |
44 | | - protected InputFileHelper inputHelper; |
45 | | - private ForeignEntityStoreDescriptor sourceDescriptor; |
| 22 | +public class LoadForeignProperties extends AbstractIntegratorApp<ForeignPropertyStoreBuilder, ForeignPropertyProcessor, ForeignEntity> { |
46 | 23 | |
47 | | - public LoadForeignProperties() { |
48 | | - super(true, true); |
49 | | - } |
50 | | - |
51 | | - protected InputFileHelper getInputHelper() { |
52 | | - if (inputHelper==null) { |
53 | | - inputHelper = new InputFileHelper(tweaks); |
54 | | - } |
55 | | - return inputHelper; |
56 | | - } |
57 | | - |
58 | 24 | @Override |
59 | 25 | protected WikiWordStoreFactory<? extends ForeignPropertyStoreBuilder> createConceptStoreFactory() throws IOException, PersistenceException { |
60 | 26 | return new DatabaseForeignPropertyStoreBuilder.Factory(getTargetTableName(), getConfiguredDataset(), getConfiguredDataSource(), tweaks); |
61 | 27 | } |
62 | 28 | |
63 | | - protected String getTargetTableName() throws IOException { |
64 | | - if (args.getParameterCount() > 2) return args.getParameter(2); |
65 | | - |
66 | | - String authority = getSourceDescriptor().getAuthorityName(); |
67 | | - authority = authority.replaceAll("[^\\w\\d]", "_").toLowerCase(); |
68 | | - |
69 | | - return authority+"_property"; |
70 | | - } |
71 | | - |
72 | | - protected String getSourceDescriptionFileName() { |
73 | | - if (args.getParameterCount() < 2) throw new IllegalArgumentException("missing second parameter (descripion file name)"); |
74 | | - return args.getParameter(1); |
75 | | - } |
76 | | - |
77 | 29 | @Override |
78 | | - protected void declareOptions() { |
79 | | - super.declareOptions(); |
80 | | - |
81 | | - args.declareHelp("<wiki>", null); |
82 | | - args.declareHelp("<dataset>", "name of the wiki/thesaurus to process"); |
83 | | - args.declare("dataset", null, true, String.class, "sets the wiki name (overrides the <wiki-or-dump> parameter)"); |
84 | | - } |
85 | | - |
86 | | - @Override |
87 | 30 | protected void run() throws Exception { |
88 | 31 | section("-- fetching properties --------------------------------------------------"); |
89 | | - DataCursor<ForeignEntity> cursor = openPropertySource(); |
| 32 | + DataCursor<FeatureSet> fsc = openFeatureSetCursor(); |
| 33 | + DataCursor<ForeignEntity> cursor = new ForeignEntityCursor(fsc, sourceDescriptor.getAuthorityName(), sourceDescriptor.getPropertySubjectField(), sourceDescriptor.getPropertySubjectNameField()); |
90 | 34 | |
91 | 35 | section("-- process properties --------------------------------------------------"); |
92 | 36 | this.conceptStore.prepareImport(); |
— | — | @@ -96,83 +40,7 @@ |
97 | 41 | |
98 | 42 | this.conceptStore.finalizeImport(); |
99 | 43 | } |
100 | | - |
101 | | - protected DataCursor<ForeignEntity> openPropertySource() throws IOException, SQLException, PersistenceException { |
102 | | - ForeignEntityStoreDescriptor sourceDescriptor = getSourceDescriptor(); |
103 | | - |
104 | | - String enc = sourceDescriptor.getDataEncoding(); |
105 | | - String sql = sourceDescriptor.getSqlQuery(); |
106 | | - InputStream in = null; |
107 | | - |
108 | | - if (sql==null) { |
109 | | - String n = sourceDescriptor.getSourceFileName(); |
110 | | - String format = getInputHelper().getFormat(n); //FIXME: explicit format! |
111 | | - in = getInputHelper().open(sourceDescriptor.getBaseURL(), n); |
112 | | - |
113 | | - if (format!=null && format.equals("sql")) { |
114 | | - sql = IOUtil.slurp(in, enc); |
115 | | - |
116 | | - in.close(); |
117 | | - in = null; |
118 | | - } |
119 | | - } |
120 | | - |
121 | | - DataCursor<FeatureSet> fsc; |
122 | | - String[] fields = sourceDescriptor.getDataFields(); |
123 | | - |
124 | | - if (sql!=null) { |
125 | | - Collection<Functor<String, String>> manglers = Arrays.asList(getSqlScriptManglers()); |
126 | | - Connection con = getConfiguredDataSource().getConnection(); |
127 | | - ResultSet rs = SqlScriptRunner.runQuery(con, sql, manglers); |
128 | | - |
129 | | - fsc = new ResultSetFeatureSetCursor(rs, fields); |
130 | | - } else { |
131 | | - fsc = new TsvFeatureSetCursor(in, enc); |
132 | | - |
133 | | - if (fields!=null) ((TsvFeatureSetCursor)fsc).setFields(fields); |
134 | | - else ((TsvFeatureSetCursor)fsc).readFields(); |
135 | | - } |
136 | | - |
137 | | - String propField = sourceDescriptor.getPropertyNameField(); |
138 | | - if (propField!=null) { |
139 | | - String valueField = sourceDescriptor.getPropertyValueField(); |
140 | | - String idField = sourceDescriptor.getConceptIdField(); |
141 | | - fsc = new AssemblingFeatureSetCursor(fsc, idField, propField, valueField); |
142 | | - } |
143 | | - |
144 | | - Map<String, String> splitExp = sourceDescriptor.getSplitExpressions(); |
145 | | - if (splitExp!=null) { |
146 | | - fsc = new MangelingFeatureSetCursor(fsc, FeatureSetValueSplitter.multiFromStringMap(splitExp, 0)); |
147 | | - } |
148 | | - |
149 | | - return new ForeignEntityCursor(fsc, sourceDescriptor.getAuthorityName(), sourceDescriptor.getConceptIdField(), sourceDescriptor.getConceptNameField()); |
150 | | - } |
151 | 44 | |
152 | | - protected ForeignEntityStoreDescriptor getSourceDescriptor() throws IOException { |
153 | | - if (sourceDescriptor!=null) return sourceDescriptor; |
154 | | - |
155 | | - sourceDescriptor = new ForeignEntityStoreDescriptor(tweaks); |
156 | | - |
157 | | - String n = getSourceDescriptionFileName(); |
158 | | - InputStream in = getInputHelper().open(n); |
159 | | - sourceDescriptor.setBaseURL(getInputHelper().getBaseURL(n)); |
160 | | - sourceDescriptor.loadTweaks(in); |
161 | | - in.close(); |
162 | | - |
163 | | - sourceDescriptor.setTweaks(System.getProperties(), "wikiword.source."); //XXX: doc |
164 | | - sourceDescriptor.setTweaks(args, "source."); //XXX: doc |
165 | | - |
166 | | - return sourceDescriptor; |
167 | | - } |
168 | | - |
169 | | - @SuppressWarnings("unchecked") |
170 | | - protected Functor<String, String>[] getSqlScriptManglers() { |
171 | | - return new Functor[] { |
172 | | - new SqlScriptRunner.RegularExpressionMangler(Pattern.compile("/\\* *wikiword_prefix* \\*/"), getConfiguredDataset().getDbPrefix()), |
173 | | - new SqlScriptRunner.RegularExpressionMangler(Pattern.compile("/\\* *wikiword_db* \\*/"), getConfiguredDatasetName()), |
174 | | - }; |
175 | | - } |
176 | | - |
177 | 45 | public static void main(String[] argv) throws Exception { |
178 | 46 | LoadForeignProperties app = new LoadForeignProperties(); |
179 | 47 | app.launch(argv); |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/FeatureSetSourceDescriptor.java |
— | — | @@ -0,0 +1,85 @@ |
| 2 | +package de.brightbyte.wikiword.integrator; |
| 3 | + |
| 4 | +import java.io.File; |
| 5 | +import java.net.MalformedURLException; |
| 6 | +import java.net.URL; |
| 7 | +import java.util.List; |
| 8 | +import java.util.Map; |
| 9 | + |
| 10 | +import de.brightbyte.text.Chunker; |
| 11 | +import de.brightbyte.wikiword.TweakSet; |
| 12 | + |
| 13 | +public class FeatureSetSourceDescriptor extends TweakSet { |
| 14 | + |
| 15 | + public FeatureSetSourceDescriptor() { |
| 16 | + this(null, null); |
| 17 | + } |
| 18 | + |
| 19 | + public FeatureSetSourceDescriptor(String prefix, TweakSet parent) { |
| 20 | + super(prefix, parent); |
| 21 | + } |
| 22 | + |
| 23 | + |
| 24 | + public String getAuthorityName() { |
| 25 | + String name = getTweak("authority", null); |
| 26 | + if (name==null) throw new RuntimeException("authority name not specified!"); |
| 27 | + return name; |
| 28 | + } |
| 29 | + |
| 30 | + public String getDataEncoding() { |
| 31 | + return getTweak("encoding", "UTF-8"); |
| 32 | + } |
| 33 | + |
| 34 | + public String getSqlQuery() { |
| 35 | + return getTweak("query", null); |
| 36 | + } |
| 37 | + |
| 38 | + public String getSourceFileName() { //FIXME |
| 39 | + return getTweak("file", null); |
| 40 | + } |
| 41 | + |
| 42 | + |
| 43 | + public void setBaseURL(URL baseURL) { |
| 44 | + parameters.put(".baseURL", baseURL); |
| 45 | + } |
| 46 | + |
| 47 | + public URL getBaseURL() { |
| 48 | + try { |
| 49 | + URL u = getTweak(".baseURL", (URL)null); |
| 50 | + if (u==null) u = new File(".").toURI().toURL(); |
| 51 | + return u; |
| 52 | + } catch (MalformedURLException e) { |
| 53 | + return null; |
| 54 | + } |
| 55 | + } |
| 56 | + |
| 57 | + |
| 58 | + public String[] getDataFields() { |
| 59 | + List<String> v = getTweak("fields", (List<String>)null); |
| 60 | + if (v==null) return null; |
| 61 | + return (String[]) v.toArray(new String[v.size()]); |
| 62 | + } |
| 63 | + |
| 64 | + |
| 65 | + public Map<String, Chunker> getDataFieldChunkers() { //FIXME: factory/parser! |
| 66 | + return getTweak("foreign.chunkers", (Map<String, Chunker>)null); |
| 67 | + } |
| 68 | + |
| 69 | + public String getPropertyValueField() { |
| 70 | + return getTweak("property-value-field", null); |
| 71 | + } |
| 72 | + |
| 73 | + public String getPropertyNameField() { |
| 74 | + return getTweak("property-name-field", null); |
| 75 | + } |
| 76 | + |
| 77 | + public String getPropertySubjectField() { |
| 78 | + return getTweak("property-subject-field", null); |
| 79 | + } |
| 80 | + |
| 81 | + public String getPropertySubjectNameField() { |
| 82 | + return getTweak("property-subject-name-field", null); |
| 83 | + } |
| 84 | + |
| 85 | + |
| 86 | +} |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/FeatureSets.java |
— | — | @@ -1,7 +1,16 @@ |
2 | 2 | package de.brightbyte.wikiword.integrator.data; |
3 | 3 | |
| 4 | +import java.util.List; |
| 5 | + |
| 6 | +import de.brightbyte.abstraction.AbstractedAccessor; |
| 7 | +import de.brightbyte.abstraction.Abstractor; |
| 8 | +import de.brightbyte.abstraction.ConvertingAccessor; |
| 9 | +import de.brightbyte.abstraction.MultiMapAbstractor; |
| 10 | +import de.brightbyte.abstraction.PropertyAccessor; |
| 11 | +import de.brightbyte.data.Functor; |
4 | 12 | import de.brightbyte.data.LabeledVector; |
5 | 13 | import de.brightbyte.data.MapLabeledVector; |
| 14 | +import de.brightbyte.data.MultiMap; |
6 | 15 | |
7 | 16 | public class FeatureSets { |
8 | 17 | public static FeatureSet merge(FeatureSet... sets) { |
— | — | @@ -35,4 +44,26 @@ |
36 | 45 | |
37 | 46 | return c; |
38 | 47 | } |
| 48 | + |
| 49 | + public static final Abstractor<MultiMap<String, Object, List<Object>>> abstractor = new MultiMapAbstractor<Object, List<Object>>(); |
| 50 | + |
| 51 | + public static class FirstValue<V> implements Functor<V, List<Object>> { |
| 52 | + public V apply(List<Object> obj) { |
| 53 | + if (obj==null || obj.isEmpty()) return null; |
| 54 | + Object v = obj.get(0); |
| 55 | + return (V)v; |
| 56 | + } |
| 57 | + } |
| 58 | + |
| 59 | + public static <V>PropertyAccessor<FeatureSet, V> fieldAccessor(String field, Class<V> type) { |
| 60 | + if (field.startsWith("=")) { //HACK: force constant! //DOC |
| 61 | + return (PropertyAccessor<FeatureSet, V>)(Object)new PropertyAccessor.Constant<String>(field.substring(1)); |
| 62 | + } |
| 63 | + |
| 64 | + AbstractedAccessor<MultiMap<String, Object, List<Object>>, List<Object>> accessor = |
| 65 | + new AbstractedAccessor<MultiMap<String, Object, List<Object>>, List<Object>>(field, abstractor); |
| 66 | + |
| 67 | + return new ConvertingAccessor<FeatureSet, List<Object>, V>(accessor, new FirstValue<V>(), type); |
| 68 | + } |
| 69 | + |
39 | 70 | } |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/FeatureSetValueSplitter.java |
— | — | @@ -36,6 +36,16 @@ |
37 | 37 | return m; |
38 | 38 | } |
39 | 39 | |
| 40 | + public static FeatureSetMultiMangler multiFromChunkerMap(Map<String, Chunker> splitters) { |
| 41 | + FeatureSetMultiMangler m = new FeatureSetMultiMangler(); |
| 42 | + |
| 43 | + for (Map.Entry<String, Chunker>e: splitters.entrySet()) { |
| 44 | + m.addMangler(new FeatureSetValueSplitter(e.getKey(), e.getValue())); |
| 45 | + } |
| 46 | + |
| 47 | + return m; |
| 48 | + } |
| 49 | + |
40 | 50 | public static FeatureSetMultiMangler multiFromStringMap(Map<String, String> splitters, int flags) { |
41 | 51 | FeatureSetMultiMangler m = new FeatureSetMultiMangler(); |
42 | 52 | |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/AssociationCursor.java |
— | — | @@ -1,5 +1,7 @@ |
2 | 2 | package de.brightbyte.wikiword.integrator.data; |
3 | 3 | |
| 4 | +import java.util.Arrays; |
| 5 | + |
4 | 6 | import de.brightbyte.data.cursor.DataCursor; |
5 | 7 | import de.brightbyte.util.PersistenceException; |
6 | 8 | |
— | — | @@ -7,11 +9,15 @@ |
8 | 10 | |
9 | 11 | private DataCursor<FeatureSet> source; |
10 | 12 | |
11 | | - protected String[] sourceFields; |
12 | | - protected String[] targetFields; |
13 | | - protected String[] propertyFields; |
| 13 | + protected Iterable<String> sourceFields; |
| 14 | + protected Iterable<String> targetFields; |
| 15 | + protected Iterable<String> propertyFields; |
14 | 16 | |
15 | 17 | public AssociationCursor(DataCursor<FeatureSet> source, String[] sourceFields, String[] targetFields, String[] propertyFields) { |
| 18 | + this(source, Arrays.asList(sourceFields), Arrays.asList(targetFields), Arrays.asList(propertyFields)); |
| 19 | + } |
| 20 | + |
| 21 | + public AssociationCursor(DataCursor<FeatureSet> source, Iterable<String> sourceFields, Iterable<String> targetFields, Iterable<String> propertyFields) { |
16 | 22 | this.sourceFields = sourceFields; |
17 | 23 | this.targetFields = targetFields; |
18 | 24 | this.propertyFields = propertyFields; |
— | — | @@ -32,11 +38,12 @@ |
33 | 39 | return new Association(source, target, props); |
34 | 40 | } |
35 | 41 | |
36 | | - protected FeatureSet newFeatureSet(FeatureSet row, String[] fields) { |
| 42 | + protected FeatureSet newFeatureSet(FeatureSet row, Iterable<String> fields) { |
37 | 43 | FeatureSet m = new DefaultFeatureSet(); |
38 | 44 | |
39 | | - for (int i=0; i<fields.length; i++) { |
40 | | - m.putAll(fields[i], row.get(i)); |
| 45 | + int i = 0; |
| 46 | + for (String f: fields) { |
| 47 | + m.putAll(f, row.get(i++)); |
41 | 48 | } |
42 | 49 | |
43 | 50 | return m; |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/BuildConceptAssociations.java |
— | — | @@ -0,0 +1,98 @@ |
| 2 | +package de.brightbyte.wikiword.integrator; |
| 3 | + |
| 4 | +import java.io.IOException; |
| 5 | +import java.util.Arrays; |
| 6 | + |
| 7 | +import de.brightbyte.data.cursor.DataCursor; |
| 8 | +import de.brightbyte.util.PersistenceException; |
| 9 | +import de.brightbyte.wikiword.integrator.data.Association; |
| 10 | +import de.brightbyte.wikiword.integrator.data.AssociationCursor; |
| 11 | +import de.brightbyte.wikiword.integrator.data.FeatureSet; |
| 12 | +import de.brightbyte.wikiword.integrator.processor.ConceptAssociationPassThrough; |
| 13 | +import de.brightbyte.wikiword.integrator.processor.ConceptAssociationProcessor; |
| 14 | +import de.brightbyte.wikiword.integrator.store.AssociationAsMappingStoreBuilder; |
| 15 | +import de.brightbyte.wikiword.integrator.store.AssociationFeatureStoreBuilder; |
| 16 | +import de.brightbyte.wikiword.integrator.store.DatabaseConceptMappingStoreBuilder; |
| 17 | +import de.brightbyte.wikiword.integrator.store.DatabaseConceptMappingStoreBuilder.Factory; |
| 18 | +import de.brightbyte.wikiword.store.WikiWordStoreFactory; |
| 19 | + |
| 20 | +/** |
| 21 | + * This is the primary entry point to the first phase of a WikiWord analysis. |
| 22 | + * ImportDump can be invoked as a standalone program, use --help as a |
| 23 | + * command line parameter for usage information. |
| 24 | + */ |
| 25 | +public class BuildConceptAssociations extends AbstractIntegratorApp<AssociationFeatureStoreBuilder, ConceptAssociationProcessor, Association> { |
| 26 | + |
| 27 | + @Override |
| 28 | + protected WikiWordStoreFactory<? extends AssociationFeatureStoreBuilder> createConceptStoreFactory() throws IOException, PersistenceException { |
| 29 | + Factory mappingStoreFactory= new DatabaseConceptMappingStoreBuilder.Factory( |
| 30 | + getTargetTableName(), |
| 31 | + getConfiguredDataset(), |
| 32 | + getConfiguredDataSource(), |
| 33 | + tweaks); |
| 34 | + |
| 35 | + FeatureSetSourceDescriptor sourceDescriptor = getSourceDescriptor(); |
| 36 | + |
| 37 | + return new AssociationAsMappingStoreBuilder.Factory<DatabaseConceptMappingStoreBuilder>( |
| 38 | + mappingStoreFactory, |
| 39 | + sourceDescriptor.getTweak("authority-name", "=" + sourceDescriptor.getAuthorityName()), |
| 40 | + sourceDescriptor.getTweak("foreign-id-field", (String)null), |
| 41 | + sourceDescriptor.getTweak("foreign-name-field", (String)null), |
| 42 | + sourceDescriptor.getTweak("concept-id-field", (String)null), |
| 43 | + sourceDescriptor.getTweak("concept-name-field", (String)null), |
| 44 | + sourceDescriptor.getTweak("association-via-field", (String)null), |
| 45 | + sourceDescriptor.getTweak("association-weight-field", (String)null) |
| 46 | + ); |
| 47 | + } |
| 48 | + |
| 49 | + @Override |
| 50 | + protected void run() throws Exception { |
| 51 | + section("-- fetching properties --------------------------------------------------"); |
| 52 | + DataCursor<FeatureSet> fsc = openFeatureSetCursor(); |
| 53 | + |
| 54 | + Iterable<String> foreignFields = sourceDescriptor.getTweak("foreign-fields", (Iterable<String>)null); |
| 55 | + Iterable<String> conceptFields = sourceDescriptor.getTweak("concept-fields", (Iterable<String>)null); |
| 56 | + Iterable<String> propertyFields = sourceDescriptor.getTweak("property-fields", (Iterable<String>)null); |
| 57 | + |
| 58 | + if (foreignFields==null) { |
| 59 | + foreignFields = Arrays.asList(new String[] { |
| 60 | + sourceDescriptor.getTweak("foreign-id-field", (String)null), |
| 61 | + sourceDescriptor.getTweak("foreign-name-field", (String)null) |
| 62 | + }); |
| 63 | + } |
| 64 | + |
| 65 | + if (conceptFields==null) { |
| 66 | + conceptFields = Arrays.asList(new String[] { |
| 67 | + sourceDescriptor.getTweak("concept-id-field", (String)null), |
| 68 | + sourceDescriptor.getTweak("concept-name-field", (String)null) |
| 69 | + }); |
| 70 | + } |
| 71 | + |
| 72 | + if (propertyFields==null) { |
| 73 | + propertyFields = Arrays.asList(new String[] { |
| 74 | + sourceDescriptor.getTweak("association-via-field", (String)null), |
| 75 | + sourceDescriptor.getTweak("association-weight-field", (String)null) |
| 76 | + }); |
| 77 | + } |
| 78 | + |
| 79 | + DataCursor<Association> cursor = |
| 80 | + new AssociationCursor(fsc, |
| 81 | + sourceDescriptor.getTweak("foreign-fields", (Iterable<String>)null), |
| 82 | + sourceDescriptor.getTweak("concept-fields", (Iterable<String>)null), |
| 83 | + sourceDescriptor.getTweak("property-fields", (Iterable<String>)null) ); |
| 84 | + |
| 85 | + section("-- process properties --------------------------------------------------"); |
| 86 | + this.conceptStore.prepareImport(); |
| 87 | + |
| 88 | + this.propertyProcessor = new ConceptAssociationPassThrough(conceptStore); //FIXME |
| 89 | + this.propertyProcessor.processAssociations(cursor); |
| 90 | + cursor.close(); |
| 91 | + |
| 92 | + this.conceptStore.finalizeImport(); |
| 93 | + } |
| 94 | + |
| 95 | + public static void main(String[] argv) throws Exception { |
| 96 | + LoadForeignProperties app = new LoadForeignProperties(); |
| 97 | + app.launch(argv); |
| 98 | + } |
| 99 | +} |
\ No newline at end of file |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/store/MappingFeatureStoreBuilder.java |
— | — | @@ -1,10 +0,0 @@ |
2 | | -package de.brightbyte.wikiword.integrator.store; |
3 | | - |
4 | | -import de.brightbyte.util.PersistenceException; |
5 | | -import de.brightbyte.wikiword.integrator.data.FeatureSet; |
6 | | -import de.brightbyte.wikiword.store.WikiWordConceptStoreBase; |
7 | | -import de.brightbyte.wikiword.store.builder.WikiWordStoreBuilder; |
8 | | - |
9 | | -public interface MappingFeatureStoreBuilder extends WikiWordStoreBuilder, WikiWordConceptStoreBase { |
10 | | - public void storeMapping(FeatureSet source, FeatureSet target, FeatureSet props) throws PersistenceException; |
11 | | -} |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/store/AssociationAsMappingStoreBuilder.java |
— | — | @@ -0,0 +1,222 @@ |
| 2 | +package de.brightbyte.wikiword.integrator.store; |
| 3 | + |
| 4 | +import java.util.Map; |
| 5 | + |
| 6 | +import de.brightbyte.abstraction.PropertyAccessor; |
| 7 | +import de.brightbyte.application.Agenda; |
| 8 | +import de.brightbyte.io.Output; |
| 9 | +import de.brightbyte.util.PersistenceException; |
| 10 | +import de.brightbyte.wikiword.DatasetIdentifier; |
| 11 | +import de.brightbyte.wikiword.integrator.data.FeatureSet; |
| 12 | +import de.brightbyte.wikiword.integrator.data.FeatureSets; |
| 13 | +import de.brightbyte.wikiword.store.WikiWordStoreFactory; |
| 14 | + |
| 15 | +public class AssociationAsMappingStoreBuilder implements |
| 16 | + AssociationFeatureStoreBuilder { |
| 17 | + |
| 18 | + public static class Factory<F extends ConceptMappingStoreBuilder> implements WikiWordStoreFactory<AssociationAsMappingStoreBuilder> { |
| 19 | + protected WikiWordStoreFactory<F> mappingStoreFactory; |
| 20 | + protected PropertyAccessor<FeatureSet, String> authorityAccessor; |
| 21 | + protected PropertyAccessor<FeatureSet, String> externalIdAccessor; |
| 22 | + protected PropertyAccessor<FeatureSet, String> externalNameAccessor; |
| 23 | + protected PropertyAccessor<FeatureSet, Integer> conceptIdAccessor; |
| 24 | + protected PropertyAccessor<FeatureSet, String> conceptNameAccessor; |
| 25 | + protected PropertyAccessor<FeatureSet, String> associationViaAccessor; |
| 26 | + protected PropertyAccessor<FeatureSet, Double> associationWeightAccessor; |
| 27 | + |
| 28 | + public Factory( |
| 29 | + WikiWordStoreFactory<F> mappingStoreFactory, |
| 30 | + String authorityField, |
| 31 | + String externalIdField, |
| 32 | + String externalNameField, |
| 33 | + String conceptIdField, |
| 34 | + String conceptNameField, |
| 35 | + String associationViaField, |
| 36 | + String associationWeightField) { |
| 37 | + |
| 38 | + this(mappingStoreFactory, |
| 39 | + FeatureSets.fieldAccessor(authorityField, String.class), |
| 40 | + FeatureSets.fieldAccessor(externalIdField, String.class), |
| 41 | + externalNameField==null ? null : FeatureSets.fieldAccessor(externalNameField, String.class), |
| 42 | + FeatureSets.fieldAccessor(conceptIdField, Integer.class), |
| 43 | + conceptNameField==null ? null : FeatureSets.fieldAccessor(conceptNameField, String.class), |
| 44 | + associationViaField==null ? null : FeatureSets.fieldAccessor(associationViaField, String.class), |
| 45 | + associationWeightField==null ? null : FeatureSets.fieldAccessor(associationWeightField, Double.class) |
| 46 | + ); |
| 47 | + } |
| 48 | + |
| 49 | + public Factory( |
| 50 | + WikiWordStoreFactory<F> mappingStoreFactory, |
| 51 | + PropertyAccessor<FeatureSet, String> authorityAccessor, |
| 52 | + PropertyAccessor<FeatureSet, String> externalIdAccessor, |
| 53 | + PropertyAccessor<FeatureSet, String> externalNameAccessor, |
| 54 | + PropertyAccessor<FeatureSet, Integer> conceptIdAccessor, |
| 55 | + PropertyAccessor<FeatureSet, String> conceptNameAccessor, |
| 56 | + PropertyAccessor<FeatureSet, String> associationViaAccessor, |
| 57 | + PropertyAccessor<FeatureSet, Double> associationWeightAccessor) { |
| 58 | + |
| 59 | + this.mappingStoreFactory = mappingStoreFactory; |
| 60 | + this.authorityAccessor = authorityAccessor; |
| 61 | + this.externalIdAccessor = externalIdAccessor; |
| 62 | + this.externalNameAccessor = externalNameAccessor; |
| 63 | + this.conceptIdAccessor = conceptIdAccessor; |
| 64 | + this.conceptNameAccessor = conceptNameAccessor; |
| 65 | + this.associationViaAccessor = associationViaAccessor; |
| 66 | + this.associationWeightAccessor = associationWeightAccessor; |
| 67 | + } |
| 68 | + |
| 69 | + @SuppressWarnings("unchecked") |
| 70 | + public AssociationAsMappingStoreBuilder newStore() throws PersistenceException { |
| 71 | + ConceptMappingStoreBuilder store = mappingStoreFactory.newStore(); |
| 72 | + |
| 73 | + return new AssociationAsMappingStoreBuilder( |
| 74 | + store, |
| 75 | + authorityAccessor, |
| 76 | + externalIdAccessor, |
| 77 | + externalNameAccessor, |
| 78 | + conceptIdAccessor, |
| 79 | + conceptNameAccessor, |
| 80 | + associationViaAccessor, |
| 81 | + associationWeightAccessor); |
| 82 | + } |
| 83 | + } |
| 84 | + |
| 85 | + |
| 86 | + protected ConceptMappingStoreBuilder store; |
| 87 | + protected PropertyAccessor<FeatureSet, String> authorityAccessor; |
| 88 | + protected PropertyAccessor<FeatureSet, String> externalIdAccessor; |
| 89 | + protected PropertyAccessor<FeatureSet, String> externalNameAccessor; |
| 90 | + protected PropertyAccessor<FeatureSet, Integer> conceptIdAccessor; |
| 91 | + protected PropertyAccessor<FeatureSet, String> conceptNameAccessor; |
| 92 | + protected PropertyAccessor<FeatureSet, String> associationViaAccessor; |
| 93 | + protected PropertyAccessor<FeatureSet, Double> associationWeightAccessor; |
| 94 | + |
| 95 | + public AssociationAsMappingStoreBuilder( |
| 96 | + ConceptMappingStoreBuilder store, |
| 97 | + String authorityField, |
| 98 | + String externalIdField, |
| 99 | + String externalNameField, |
| 100 | + String conceptIdField, |
| 101 | + String conceptNameField, |
| 102 | + String associationViaField, |
| 103 | + String associationWeightField) { |
| 104 | + |
| 105 | + this(store, |
| 106 | + FeatureSets.fieldAccessor(authorityField, String.class), |
| 107 | + FeatureSets.fieldAccessor(externalIdField, String.class), |
| 108 | + externalNameField==null ? null : FeatureSets.fieldAccessor(externalNameField, String.class), |
| 109 | + FeatureSets.fieldAccessor(conceptIdField, Integer.class), |
| 110 | + conceptNameField==null ? null : FeatureSets.fieldAccessor(conceptNameField, String.class), |
| 111 | + associationViaField==null ? null : FeatureSets.fieldAccessor(associationViaField, String.class), |
| 112 | + associationWeightField==null ? null : FeatureSets.fieldAccessor(associationWeightField, Double.class) |
| 113 | + ); |
| 114 | + } |
| 115 | + |
| 116 | + public AssociationAsMappingStoreBuilder( |
| 117 | + ConceptMappingStoreBuilder store, |
| 118 | + PropertyAccessor<FeatureSet, String> authorityAccessor, |
| 119 | + PropertyAccessor<FeatureSet, String> externalIdAccessor, |
| 120 | + PropertyAccessor<FeatureSet, String> externalNameAccessor, |
| 121 | + PropertyAccessor<FeatureSet, Integer> conceptIdAccessor, |
| 122 | + PropertyAccessor<FeatureSet, String> conceptNameAccessor, |
| 123 | + PropertyAccessor<FeatureSet, String> associationViaAccessor, |
| 124 | + PropertyAccessor<FeatureSet, Double> associationWeightAccessor) { |
| 125 | + |
| 126 | + super(); |
| 127 | + this.store = store; |
| 128 | + this.authorityAccessor = authorityAccessor; |
| 129 | + this.externalIdAccessor = externalIdAccessor; |
| 130 | + this.externalNameAccessor = externalNameAccessor; |
| 131 | + this.conceptIdAccessor = conceptIdAccessor; |
| 132 | + this.conceptNameAccessor = conceptNameAccessor; |
| 133 | + this.associationViaAccessor = associationViaAccessor; |
| 134 | + this.associationWeightAccessor = associationWeightAccessor; |
| 135 | + } |
| 136 | + |
| 137 | + public void storeMapping(FeatureSet foreign, FeatureSet concept, FeatureSet props) throws PersistenceException { |
| 138 | + String authority = authorityAccessor.getValue(foreign); |
| 139 | + String extId = externalIdAccessor.getValue(foreign); |
| 140 | + String extName = externalNameAccessor.getValue(foreign); |
| 141 | + int conceptId = conceptIdAccessor.getValue(concept); |
| 142 | + String name = conceptNameAccessor.getValue(concept); |
| 143 | + String via = associationViaAccessor.getValue(concept); |
| 144 | + double weight = associationWeightAccessor.getValue(concept); |
| 145 | + |
| 146 | + authorityAccessor.getValue(foreign); |
| 147 | + store.storeMapping( authority, extId, extName, conceptId, name, via, weight); |
| 148 | + } |
| 149 | + |
| 150 | + public void checkConsistency() throws PersistenceException { |
| 151 | + store.checkConsistency(); |
| 152 | + } |
| 153 | + |
| 154 | + public void close(boolean flush) throws PersistenceException { |
| 155 | + store.close(flush); |
| 156 | + } |
| 157 | + |
| 158 | + public Agenda createAgenda() throws PersistenceException { |
| 159 | + return store.createAgenda(); |
| 160 | + } |
| 161 | + |
| 162 | + public void dumpTableStats(Output out) throws PersistenceException { |
| 163 | + store.dumpTableStats(out); |
| 164 | + } |
| 165 | + |
| 166 | + public void finalizeImport() throws PersistenceException { |
| 167 | + store.finalizeImport(); |
| 168 | + } |
| 169 | + |
| 170 | + public void flush() throws PersistenceException { |
| 171 | + store.flush(); |
| 172 | + } |
| 173 | + |
| 174 | + public Agenda getAgenda() throws PersistenceException { |
| 175 | + return store.getAgenda(); |
| 176 | + } |
| 177 | + |
| 178 | + public DatasetIdentifier getDatasetIdentifier() { |
| 179 | + return store.getDatasetIdentifier(); |
| 180 | + } |
| 181 | + |
| 182 | + public int getNumberOfWarnings() throws PersistenceException { |
| 183 | + return store.getNumberOfWarnings(); |
| 184 | + } |
| 185 | + |
| 186 | + public Map<String, ? extends Number> getTableStats() throws PersistenceException { |
| 187 | + return store.getTableStats(); |
| 188 | + } |
| 189 | + |
| 190 | + public void initialize(boolean purge, boolean dropAll) throws PersistenceException { |
| 191 | + store.initialize(purge, dropAll); |
| 192 | + } |
| 193 | + |
| 194 | + public boolean isComplete() throws PersistenceException { |
| 195 | + return store.isComplete(); |
| 196 | + } |
| 197 | + |
| 198 | + public void open() throws PersistenceException { |
| 199 | + store.open(); |
| 200 | + } |
| 201 | + |
| 202 | + public void optimize() throws PersistenceException { |
| 203 | + store.optimize(); |
| 204 | + } |
| 205 | + |
| 206 | + public void prepareImport() throws PersistenceException { |
| 207 | + store.prepareImport(); |
| 208 | + } |
| 209 | + |
| 210 | + public void setLogLevel(int loglevel) { |
| 211 | + store.setLogLevel(loglevel); |
| 212 | + } |
| 213 | + |
| 214 | + public void storeMapping(String authority, String extId, String extName, int concept, String name, String via, double weight) throws PersistenceException { |
| 215 | + store.storeMapping(authority, extId, extName, concept, name, via, weight); |
| 216 | + } |
| 217 | + |
| 218 | + public void storeWarning(int rcId, String problem, String details) throws PersistenceException { |
| 219 | + store.storeWarning(rcId, problem, details); |
| 220 | + } |
| 221 | + |
| 222 | + |
| 223 | +} |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/store/DatabaseConceptMappingStoreBuilder.java |
— | — | @@ -3,21 +3,49 @@ |
4 | 4 | import java.sql.Connection; |
5 | 5 | import java.sql.SQLException; |
6 | 6 | |
| 7 | +import javax.sql.DataSource; |
| 8 | + |
7 | 9 | import de.brightbyte.application.Agenda; |
8 | 10 | import de.brightbyte.db.Inserter; |
9 | 11 | import de.brightbyte.db.RelationTable; |
10 | 12 | import de.brightbyte.util.PersistenceException; |
11 | 13 | import de.brightbyte.wikiword.Corpus; |
| 14 | +import de.brightbyte.wikiword.DatasetIdentifier; |
12 | 15 | import de.brightbyte.wikiword.TweakSet; |
| 16 | +import de.brightbyte.wikiword.store.WikiWordStoreFactory; |
13 | 17 | import de.brightbyte.wikiword.store.builder.DatabaseWikiWordStoreBuilder; |
14 | 18 | |
15 | 19 | public class DatabaseConceptMappingStoreBuilder extends DatabaseWikiWordStoreBuilder implements ConceptMappingStoreBuilder { |
16 | 20 | |
| 21 | + public static class Factory implements WikiWordStoreFactory<DatabaseConceptMappingStoreBuilder> { |
| 22 | + private String table; |
| 23 | + private DataSource db; |
| 24 | + private DatasetIdentifier dataset; |
| 25 | + private TweakSet tweaks; |
| 26 | + |
| 27 | + public Factory(String table, DatasetIdentifier dataset, DataSource db, TweakSet tweaks) { |
| 28 | + super(); |
| 29 | + this.table = table; |
| 30 | + this.db = db; |
| 31 | + this.dataset = dataset; |
| 32 | + this.tweaks = tweaks; |
| 33 | + } |
| 34 | + |
| 35 | + @SuppressWarnings("unchecked") |
| 36 | + public DatabaseConceptMappingStoreBuilder newStore() throws PersistenceException { |
| 37 | + try { |
| 38 | + return new DatabaseConceptMappingStoreBuilder(table, dataset, db.getConnection(), tweaks); |
| 39 | + } catch (SQLException e) { |
| 40 | + throw new PersistenceException(e); |
| 41 | + } |
| 42 | + } |
| 43 | + } |
| 44 | + |
17 | 45 | protected RelationTable mappingTable; |
18 | 46 | protected Inserter mappingInserter; |
19 | 47 | protected IntegratorSchema integratorSchema; |
20 | 48 | |
21 | | - public DatabaseConceptMappingStoreBuilder(String table, Corpus corpus, Connection connection, TweakSet tweaks) throws SQLException, PersistenceException { |
| 49 | + public DatabaseConceptMappingStoreBuilder(String table, DatasetIdentifier corpus, Connection connection, TweakSet tweaks) throws SQLException, PersistenceException { |
22 | 50 | this(table, new IntegratorSchema(corpus, connection, tweaks, true), tweaks, null); |
23 | 51 | } |
24 | 52 | |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/store/AssociationFeatureStoreBuilder.java |
— | — | @@ -0,0 +1,10 @@ |
| 2 | +package de.brightbyte.wikiword.integrator.store; |
| 3 | + |
| 4 | +import de.brightbyte.util.PersistenceException; |
| 5 | +import de.brightbyte.wikiword.integrator.data.FeatureSet; |
| 6 | +import de.brightbyte.wikiword.store.WikiWordConceptStoreBase; |
| 7 | +import de.brightbyte.wikiword.store.builder.WikiWordStoreBuilder; |
| 8 | + |
| 9 | +public interface AssociationFeatureStoreBuilder extends WikiWordStoreBuilder, WikiWordConceptStoreBase { |
| 10 | + public void storeMapping(FeatureSet foreign, FeatureSet concept, FeatureSet props) throws PersistenceException; |
| 11 | +} |
Property changes on: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/store/AssociationFeatureStoreBuilder.java |
___________________________________________________________________ |
Added: svn:mergeinfo |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/processor/OptimalMappingSelector.java |
— | — | @@ -11,25 +11,25 @@ |
12 | 12 | import de.brightbyte.util.PersistenceException; |
13 | 13 | import de.brightbyte.wikiword.integrator.data.FeatureSet; |
14 | 14 | import de.brightbyte.wikiword.integrator.data.MappingCandidates; |
15 | | -import de.brightbyte.wikiword.integrator.store.MappingFeatureStoreBuilder; |
| 15 | +import de.brightbyte.wikiword.integrator.store.AssociationFeatureStoreBuilder; |
16 | 16 | |
17 | 17 | public class OptimalMappingSelector extends ConceptMappingPassThrough { |
18 | 18 | |
19 | 19 | protected Optimum<FeatureSet> optimum; |
20 | 20 | |
21 | | - public OptimalMappingSelector(MappingFeatureStoreBuilder store, String property, Functor<Number, ? extends Collection<Number>> aggregator) { |
| 21 | + public OptimalMappingSelector(AssociationFeatureStoreBuilder store, String property, Functor<Number, ? extends Collection<Number>> aggregator) { |
22 | 22 | this(store, (Comparator<FeatureSet>)(Object)PropertyComparator.newMultiMapEntryComparator(property, (Comparator<Number>)(Object)NaturalComparator.instance, aggregator, Number.class)); |
23 | 23 | } |
24 | 24 | |
25 | | - public OptimalMappingSelector(MappingFeatureStoreBuilder store, PropertyAccessor<FeatureSet, Number> accessor) { |
| 25 | + public OptimalMappingSelector(AssociationFeatureStoreBuilder store, PropertyAccessor<FeatureSet, Number> accessor) { |
26 | 26 | this(store, new Optimum<FeatureSet>(accessor)); |
27 | 27 | } |
28 | 28 | |
29 | | - public OptimalMappingSelector(MappingFeatureStoreBuilder store, Comparator<FeatureSet> comp) { |
| 29 | + public OptimalMappingSelector(AssociationFeatureStoreBuilder store, Comparator<FeatureSet> comp) { |
30 | 30 | this(store, new Optimum<FeatureSet>(comp)); |
31 | 31 | } |
32 | 32 | |
33 | | - public OptimalMappingSelector(MappingFeatureStoreBuilder store, Optimum<FeatureSet> optimum) { |
| 33 | + public OptimalMappingSelector(AssociationFeatureStoreBuilder store, Optimum<FeatureSet> optimum) { |
34 | 34 | super(store); |
35 | 35 | |
36 | 36 | if (optimum==null) throw new NullPointerException(); |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/processor/ConceptMappingProcessor.java |
— | — | @@ -4,6 +4,6 @@ |
5 | 5 | import de.brightbyte.util.PersistenceException; |
6 | 6 | import de.brightbyte.wikiword.integrator.data.MappingCandidates; |
7 | 7 | |
8 | | -public interface ConceptMappingProcessor { |
| 8 | +public interface ConceptMappingProcessor extends WikiWordProcessor { |
9 | 9 | public void processMappings(DataCursor<MappingCandidates> cursor) throws PersistenceException; |
10 | 10 | } |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/processor/ConceptAssociationProcessor.java |
— | — | @@ -0,0 +1,9 @@ |
| 2 | +package de.brightbyte.wikiword.integrator.processor; |
| 3 | + |
| 4 | +import de.brightbyte.data.cursor.DataCursor; |
| 5 | +import de.brightbyte.util.PersistenceException; |
| 6 | +import de.brightbyte.wikiword.integrator.data.Association; |
| 7 | + |
| 8 | +public interface ConceptAssociationProcessor extends WikiWordProcessor { |
| 9 | + public void processAssociations(DataCursor<Association> cursor) throws PersistenceException; |
| 10 | +} |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/processor/AbstractConceptAssociationProcessor.java |
— | — | @@ -0,0 +1,19 @@ |
| 2 | +package de.brightbyte.wikiword.integrator.processor; |
| 3 | + |
| 4 | +import de.brightbyte.data.cursor.DataCursor; |
| 5 | +import de.brightbyte.util.PersistenceException; |
| 6 | +import de.brightbyte.wikiword.integrator.data.Association; |
| 7 | + |
| 8 | +public abstract class AbstractConceptAssociationProcessor extends AbstractProcessor<Association> implements ConceptAssociationProcessor { |
| 9 | + |
| 10 | + public void processAssociations(DataCursor<Association> cursor) throws PersistenceException { |
| 11 | + process(cursor); |
| 12 | + } |
| 13 | + |
| 14 | + protected void processEntry(Association e) throws PersistenceException { |
| 15 | + processAssociation(e); |
| 16 | + } |
| 17 | + |
| 18 | + protected abstract void processAssociation(Association m) throws PersistenceException; |
| 19 | + |
| 20 | +} |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/processor/ForeignPropertyProcessor.java |
— | — | @@ -4,6 +4,6 @@ |
5 | 5 | import de.brightbyte.util.PersistenceException; |
6 | 6 | import de.brightbyte.wikiword.integrator.data.ForeignEntity; |
7 | 7 | |
8 | | -public interface ForeignPropertyProcessor { |
| 8 | +public interface ForeignPropertyProcessor extends WikiWordProcessor { |
9 | 9 | public void processProperties(DataCursor<ForeignEntity> cursor) throws PersistenceException; |
10 | 10 | } |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/processor/ConceptMappingPassThrough.java |
— | — | @@ -3,12 +3,12 @@ |
4 | 4 | import de.brightbyte.util.PersistenceException; |
5 | 5 | import de.brightbyte.wikiword.integrator.data.FeatureSet; |
6 | 6 | import de.brightbyte.wikiword.integrator.data.MappingCandidates; |
7 | | -import de.brightbyte.wikiword.integrator.store.MappingFeatureStoreBuilder; |
| 7 | +import de.brightbyte.wikiword.integrator.store.AssociationFeatureStoreBuilder; |
8 | 8 | |
9 | 9 | public class ConceptMappingPassThrough extends AbstractConceptMappingProcessor { |
10 | | - protected MappingFeatureStoreBuilder store; |
| 10 | + protected AssociationFeatureStoreBuilder store; |
11 | 11 | |
12 | | - public ConceptMappingPassThrough(MappingFeatureStoreBuilder store) { |
| 12 | + public ConceptMappingPassThrough(AssociationFeatureStoreBuilder store) { |
13 | 13 | if (store==null) throw new NullPointerException(); |
14 | 14 | this.store = store; |
15 | 15 | } |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/processor/WikiWordProcessor.java |
— | — | @@ -0,0 +1,5 @@ |
| 2 | +package de.brightbyte.wikiword.integrator.processor; |
| 3 | + |
| 4 | +public interface WikiWordProcessor { |
| 5 | + |
| 6 | +} |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/processor/ConceptAssociationPassThrough.java |
— | — | @@ -0,0 +1,19 @@ |
| 2 | +package de.brightbyte.wikiword.integrator.processor; |
| 3 | + |
| 4 | +import de.brightbyte.util.PersistenceException; |
| 5 | +import de.brightbyte.wikiword.integrator.data.Association; |
| 6 | +import de.brightbyte.wikiword.integrator.store.AssociationFeatureStoreBuilder; |
| 7 | + |
| 8 | +public class ConceptAssociationPassThrough extends AbstractConceptAssociationProcessor { |
| 9 | + protected AssociationFeatureStoreBuilder store; |
| 10 | + |
| 11 | + public ConceptAssociationPassThrough(AssociationFeatureStoreBuilder store) { |
| 12 | + if (store==null) throw new NullPointerException(); |
| 13 | + this.store = store; |
| 14 | + } |
| 15 | + |
| 16 | + protected void processAssociation(Association m) throws PersistenceException { |
| 17 | + store.storeMapping(m.getSourceItem(), m.getTargetItem(), m.getProperties()); |
| 18 | + } |
| 19 | + |
| 20 | +} |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/AbstractIntegratorApp.java |
— | — | @@ -0,0 +1,152 @@ |
| 2 | +package de.brightbyte.wikiword.integrator; |
| 3 | + |
| 4 | +import java.io.IOException; |
| 5 | +import java.io.InputStream; |
| 6 | +import java.sql.Connection; |
| 7 | +import java.sql.ResultSet; |
| 8 | +import java.sql.SQLException; |
| 9 | +import java.util.Arrays; |
| 10 | +import java.util.Collection; |
| 11 | +import java.util.Map; |
| 12 | +import java.util.regex.Pattern; |
| 13 | + |
| 14 | +import de.brightbyte.data.Functor; |
| 15 | +import de.brightbyte.data.cursor.DataCursor; |
| 16 | +import de.brightbyte.db.SqlScriptRunner; |
| 17 | +import de.brightbyte.io.IOUtil; |
| 18 | +import de.brightbyte.text.Chunker; |
| 19 | +import de.brightbyte.util.PersistenceException; |
| 20 | +import de.brightbyte.wikiword.StoreBackedApp; |
| 21 | +import de.brightbyte.wikiword.builder.InputFileHelper; |
| 22 | +import de.brightbyte.wikiword.integrator.data.AssemblingFeatureSetCursor; |
| 23 | +import de.brightbyte.wikiword.integrator.data.FeatureSet; |
| 24 | +import de.brightbyte.wikiword.integrator.data.FeatureSetValueSplitter; |
| 25 | +import de.brightbyte.wikiword.integrator.data.MangelingFeatureSetCursor; |
| 26 | +import de.brightbyte.wikiword.integrator.data.ResultSetFeatureSetCursor; |
| 27 | +import de.brightbyte.wikiword.integrator.data.TsvFeatureSetCursor; |
| 28 | +import de.brightbyte.wikiword.integrator.processor.WikiWordProcessor; |
| 29 | +import de.brightbyte.wikiword.store.WikiWordConceptStoreBase; |
| 30 | + |
| 31 | +/** |
| 32 | + * This is the primary entry point to the first phase of a WikiWord analysis. |
| 33 | + * ImportDump can be invoked as a standalone program, use --help as a |
| 34 | + * command line parameter for usage information. |
| 35 | + */ |
| 36 | +public abstract class AbstractIntegratorApp<S extends WikiWordConceptStoreBase, P extends WikiWordProcessor, E> extends StoreBackedApp<S> { |
| 37 | + |
| 38 | + //protected ForeignPropertyStoreBuilder propertyStore; |
| 39 | + protected InputFileHelper inputHelper; |
| 40 | + protected P propertyProcessor; |
| 41 | + protected FeatureSetSourceDescriptor sourceDescriptor; |
| 42 | + |
| 43 | + public AbstractIntegratorApp() { |
| 44 | + super(true, true); |
| 45 | + } |
| 46 | + |
| 47 | + protected InputFileHelper getInputHelper() { |
| 48 | + if (inputHelper==null) { |
| 49 | + inputHelper = new InputFileHelper(tweaks); |
| 50 | + } |
| 51 | + return inputHelper; |
| 52 | + } |
| 53 | + |
| 54 | + protected String getTargetTableName() throws IOException { |
| 55 | + if (args.getParameterCount() > 2) return args.getParameter(2); |
| 56 | + |
| 57 | + String authority = getSourceDescriptor().getAuthorityName(); |
| 58 | + authority = authority.replaceAll("[^\\w\\d]", "_").toLowerCase(); |
| 59 | + |
| 60 | + return authority+"_property"; |
| 61 | + } |
| 62 | + |
| 63 | + protected String getSourceDescriptionFileName() { |
| 64 | + if (args.getParameterCount() < 2) throw new IllegalArgumentException("missing second parameter (descripion file name)"); |
| 65 | + return args.getParameter(1); |
| 66 | + } |
| 67 | + |
| 68 | + @Override |
| 69 | + protected void declareOptions() { |
| 70 | + super.declareOptions(); |
| 71 | + |
| 72 | + args.declareHelp("<wiki>", null); |
| 73 | + args.declareHelp("<dataset>", "name of the wiki/thesaurus to process"); |
| 74 | + args.declare("dataset", null, true, String.class, "sets the wiki name (overrides the <wiki-or-dump> parameter)"); |
| 75 | + } |
| 76 | + |
| 77 | + protected DataCursor<FeatureSet> openFeatureSetCursor() throws IOException, SQLException, PersistenceException { |
| 78 | + FeatureSetSourceDescriptor sourceDescriptor = getSourceDescriptor(); |
| 79 | + |
| 80 | + String enc = sourceDescriptor.getDataEncoding(); |
| 81 | + String sql = sourceDescriptor.getSqlQuery(); |
| 82 | + InputStream in = null; |
| 83 | + |
| 84 | + if (sql==null) { |
| 85 | + String n = sourceDescriptor.getSourceFileName(); |
| 86 | + String format = getInputHelper().getFormat(n); //FIXME: explicit format! |
| 87 | + in = getInputHelper().open(sourceDescriptor.getBaseURL(), n); |
| 88 | + |
| 89 | + if (format!=null && format.equals("sql")) { |
| 90 | + sql = IOUtil.slurp(in, enc); |
| 91 | + |
| 92 | + in.close(); |
| 93 | + in = null; |
| 94 | + } |
| 95 | + } |
| 96 | + |
| 97 | + DataCursor<FeatureSet> fsc; |
| 98 | + String[] fields = sourceDescriptor.getDataFields(); |
| 99 | + |
| 100 | + if (sql!=null) { |
| 101 | + Collection<Functor<String, String>> manglers = Arrays.asList(getSqlScriptManglers()); |
| 102 | + Connection con = getConfiguredDataSource().getConnection(); |
| 103 | + ResultSet rs = SqlScriptRunner.runQuery(con, sql, manglers); |
| 104 | + |
| 105 | + fsc = new ResultSetFeatureSetCursor(rs, fields); |
| 106 | + } else { |
| 107 | + fsc = new TsvFeatureSetCursor(in, enc); |
| 108 | + |
| 109 | + if (fields!=null) ((TsvFeatureSetCursor)fsc).setFields(fields); |
| 110 | + else ((TsvFeatureSetCursor)fsc).readFields(); |
| 111 | + } |
| 112 | + |
| 113 | + String propField = sourceDescriptor.getPropertyNameField(); |
| 114 | + if (propField!=null) { |
| 115 | + String valueField = sourceDescriptor.getPropertyValueField(); |
| 116 | + String subjectField = sourceDescriptor.getPropertySubjectField(); |
| 117 | + fsc = new AssemblingFeatureSetCursor(fsc, subjectField, propField, valueField); |
| 118 | + } |
| 119 | + |
| 120 | + Map<String, Chunker> splitters = sourceDescriptor.getDataFieldChunkers(); |
| 121 | + if (splitters!=null) { |
| 122 | + fsc = new MangelingFeatureSetCursor(fsc, FeatureSetValueSplitter.multiFromChunkerMap(splitters)); |
| 123 | + } |
| 124 | + |
| 125 | + return fsc; |
| 126 | + } |
| 127 | + |
| 128 | + protected FeatureSetSourceDescriptor getSourceDescriptor() throws IOException { |
| 129 | + if (sourceDescriptor!=null) return sourceDescriptor; |
| 130 | + |
| 131 | + sourceDescriptor = new FeatureSetSourceDescriptor("source", tweaks); |
| 132 | + |
| 133 | + String n = getSourceDescriptionFileName(); |
| 134 | + InputStream in = getInputHelper().open(n); |
| 135 | + sourceDescriptor.setBaseURL(getInputHelper().getBaseURL(n)); |
| 136 | + sourceDescriptor.loadTweaks(in); |
| 137 | + in.close(); |
| 138 | + |
| 139 | + sourceDescriptor.setTweaks(System.getProperties(), "wikiword.source."); //XXX: doc |
| 140 | + sourceDescriptor.setTweaks(args, "source."); //XXX: doc |
| 141 | + |
| 142 | + return sourceDescriptor; |
| 143 | + } |
| 144 | + |
| 145 | + @SuppressWarnings("unchecked") |
| 146 | + protected Functor<String, String>[] getSqlScriptManglers() { |
| 147 | + return new Functor[] { |
| 148 | + new SqlScriptRunner.RegularExpressionMangler(Pattern.compile("/\\* *wikiword_prefix* \\*/"), getConfiguredDataset().getDbPrefix()), |
| 149 | + new SqlScriptRunner.RegularExpressionMangler(Pattern.compile("/\\* *wikiword_db* \\*/"), getConfiguredDatasetName()), |
| 150 | + }; |
| 151 | + } |
| 152 | + |
| 153 | +} |
\ No newline at end of file |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/TweakSet.java |
— | — | @@ -21,14 +21,17 @@ |
22 | 22 | */ |
23 | 23 | public class TweakSet { |
24 | 24 | protected Map<String, Object> parameters = new HashMap<String, Object>(); |
25 | | - protected TweakSet parent; |
26 | 25 | |
| 26 | + private TweakSet parent; |
| 27 | + private String prefix; |
| 28 | + |
27 | 29 | public TweakSet() { |
28 | | - this(null); |
| 30 | + this(null, null); |
29 | 31 | } |
30 | 32 | |
31 | | - public TweakSet(TweakSet parent) { |
| 33 | + public TweakSet(String prefix, TweakSet parent) { |
32 | 34 | this.parent = parent; |
| 35 | + this.prefix = prefix; |
33 | 36 | } |
34 | 37 | |
35 | 38 | public void loadTweaks(File f) throws IOException { |
— | — | @@ -97,9 +100,10 @@ |
98 | 101 | public <T>T getTweak(String key, T def) { |
99 | 102 | if (!parameters.containsKey(key)) { |
100 | 103 | if (parent==null) return def; |
101 | | - else return parent.getTweak(key, def); |
| 104 | + else return parent.getTweak(prefix==null ? key : prefix + key, def); |
102 | 105 | } else { |
103 | 106 | return (T)parameters.get(key); |
104 | 107 | } |
105 | 108 | } |
| 109 | + |
106 | 110 | } |