Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/processor/ForeignPropertyPassThrough.java |
— | — | @@ -10,6 +10,7 @@ |
11 | 11 | |
12 | 12 | public class ForeignPropertyPassThrough extends AbstractForeignPropertyProcessor { |
13 | 13 | protected ForeignPropertyStoreBuilder store; |
| 14 | + protected String qualifier; |
14 | 15 | |
15 | 16 | public ForeignPropertyPassThrough(ForeignPropertyStoreBuilder store) { |
16 | 17 | if (store==null) throw new NullPointerException(); |
— | — | @@ -24,9 +25,17 @@ |
25 | 26 | List<Object> vv = p.getValue(); |
26 | 27 | |
27 | 28 | for (Object v: vv) { |
28 | | - store.storeProperty(e.getAuthority(), e.getID(), prop, String.valueOf(v), null); |
| 29 | + store.storeProperty(e.getAuthority(), e.getID(), prop, String.valueOf(v), qualifier); |
29 | 30 | } |
30 | 31 | } |
31 | 32 | } |
32 | 33 | |
| 34 | + public String getQualifier() { |
| 35 | + return qualifier; |
| 36 | + } |
| 37 | + |
| 38 | + public void setQualifier(String qualifier) { |
| 39 | + this.qualifier = qualifier; |
| 40 | + } |
| 41 | + |
33 | 42 | } |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/AbstractIntegratorApp.java |
— | — | @@ -18,6 +18,7 @@ |
19 | 19 | import de.brightbyte.data.cursor.DataCursor; |
20 | 20 | import de.brightbyte.db.SqlScriptRunner; |
21 | 21 | import de.brightbyte.io.IOUtil; |
| 22 | +import de.brightbyte.io.LineCursor; |
22 | 23 | import de.brightbyte.text.Chunker; |
23 | 24 | import de.brightbyte.util.BeanUtils; |
24 | 25 | import de.brightbyte.util.PersistenceException; |
— | — | @@ -185,10 +186,19 @@ |
186 | 187 | |
187 | 188 | fsc = new ResultSetFeatureSetCursor(rs, fields); |
188 | 189 | } else { |
189 | | - fsc = new TsvFeatureSetCursor(in, enc); |
| 190 | + LineCursor lines = new LineCursor(in, enc); |
190 | 191 | |
191 | | - if (fields!=null) ((TsvFeatureSetCursor)fsc).setFields(fields); |
192 | | - else ((TsvFeatureSetCursor)fsc).readFields(); |
| 192 | + Chunker chunker = sourceDescriptor.getCsvLineChunker(); |
| 193 | + |
| 194 | + fsc = new TsvFeatureSetCursor(lines, chunker); |
| 195 | + |
| 196 | + if (fields!=null) { |
| 197 | + if (sourceDescriptor.getSkipHeader()) ((TsvFeatureSetCursor)fsc).readFields(); |
| 198 | + ((TsvFeatureSetCursor)fsc).setFields(fields); |
| 199 | + } else { |
| 200 | + ((TsvFeatureSetCursor)fsc).readFields(); |
| 201 | + fields = ((TsvFeatureSetCursor)fsc).getFields(); |
| 202 | + } |
193 | 203 | } |
194 | 204 | |
195 | 205 | String propField = sourceDescriptor.getPropertyNameField(); |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/LoadForeignProperties.java |
— | — | @@ -45,7 +45,14 @@ |
46 | 46 | @Override |
47 | 47 | protected ForeignPropertyProcessor createProcessor(ForeignPropertyStoreBuilder conceptStore) throws InstantiationException { |
48 | 48 | // FIXME: parameter list is restrictive, pass descriptor |
49 | | - return instantiate(sourceDescriptor, "foreignPropertyProcessorClass", ForeignPropertyPassThrough.class, conceptStore); |
| 49 | + ForeignPropertyProcessor processor = instantiate(sourceDescriptor, "foreignPropertyProcessorClass", ForeignPropertyPassThrough.class, conceptStore); |
| 50 | + |
| 51 | + if (processor instanceof ForeignPropertyPassThrough) { |
| 52 | + String qualifier = sourceDescriptor.getTweak("property-qualifier", null); |
| 53 | + if (qualifier!=null) ((ForeignPropertyPassThrough)processor).setQualifier(qualifier); |
| 54 | + } |
| 55 | + |
| 56 | + return processor; |
50 | 57 | } |
51 | 58 | |
52 | 59 | public static void main(String[] argv) throws Exception { |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/FeatureSetSourceDescriptor.java |
— | — | @@ -8,6 +8,7 @@ |
9 | 9 | import java.util.Map; |
10 | 10 | |
11 | 11 | import de.brightbyte.text.Chunker; |
| 12 | +import de.brightbyte.text.CsvLineChunker; |
12 | 13 | import de.brightbyte.wikiword.TweakSet; |
13 | 14 | |
14 | 15 | public class FeatureSetSourceDescriptor extends TweakSet { |
— | — | @@ -77,8 +78,8 @@ |
78 | 79 | } |
79 | 80 | |
80 | 81 | |
81 | | - public Map<String, Chunker> getDataFieldChunkers() { //FIXME: factory/parser! |
82 | | - return getTweak("foreign.chunkers", (Map<String, Chunker>)null); |
| 82 | + public Map<String, Chunker> getDataFieldChunkers() { |
| 83 | + return getTweak("field-chunkers", (Map<String, Chunker>)null); |
83 | 84 | } |
84 | 85 | |
85 | 86 | public String getPropertyValueField() { |
— | — | @@ -97,5 +98,26 @@ |
98 | 99 | return getTweak("property-subject-name-field", null); |
99 | 100 | } |
100 | 101 | |
| 102 | + public boolean getSkipHeader() { |
| 103 | + return getTweak("skip-header", false); |
| 104 | + } |
| 105 | + |
| 106 | + public Chunker getCsvLineChunker() { |
| 107 | + Chunker chunker = getTweak("csv-chunker", null); |
| 108 | + |
| 109 | + if (chunker==null) { |
| 110 | + char ch = getTweak("csv-separator", '\u008F'); |
| 111 | + if (ch!='\u008F') chunker = new CsvLineChunker(ch, getTweak("csv-backslash-escape", false)); |
| 112 | + } |
| 113 | + |
| 114 | + if (chunker==null) { |
| 115 | + if (getTweak("file-format", "tsv").equals("csv")) |
| 116 | + chunker = CsvLineChunker.csv; |
| 117 | + } |
| 118 | + |
| 119 | + if (chunker==null) chunker = CsvLineChunker.tsv; |
| 120 | + return chunker; |
| 121 | + } |
| 122 | + |
101 | 123 | |
102 | 124 | } |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/TsvFeatureSetCursor.java |
— | — | @@ -8,7 +8,8 @@ |
9 | 9 | |
10 | 10 | import de.brightbyte.data.cursor.DataCursor; |
11 | 11 | import de.brightbyte.io.LineCursor; |
12 | | -import de.brightbyte.io.TsvCursor; |
| 12 | +import de.brightbyte.io.ChunkingCursor; |
| 13 | +import de.brightbyte.text.Chunker; |
13 | 14 | import de.brightbyte.text.CsvLineChunker; |
14 | 15 | import de.brightbyte.util.PersistenceException; |
15 | 16 | |
— | — | @@ -17,25 +18,29 @@ |
18 | 19 | private String[] fields; |
19 | 20 | |
20 | 21 | public TsvFeatureSetCursor(InputStream in, String enc) throws UnsupportedEncodingException { |
21 | | - this( new TsvCursor(in, enc) ); |
| 22 | + this( new ChunkingCursor(in, enc) ); |
22 | 23 | } |
23 | 24 | |
24 | 25 | public TsvFeatureSetCursor(Reader rd) { |
25 | | - this(new TsvCursor(rd)); |
| 26 | + this(new ChunkingCursor(rd)); |
26 | 27 | } |
27 | 28 | |
28 | 29 | public TsvFeatureSetCursor(BufferedReader reader) { |
29 | | - this(new TsvCursor(reader)); |
| 30 | + this(new ChunkingCursor(reader)); |
30 | 31 | } |
31 | 32 | |
32 | 33 | public TsvFeatureSetCursor(LineCursor lines) { |
33 | | - this(new TsvCursor(lines)); |
| 34 | + this(new ChunkingCursor(lines)); |
34 | 35 | } |
35 | 36 | |
36 | | - public TsvFeatureSetCursor(LineCursor lines, CsvLineChunker chunker) { |
37 | | - this(new TsvCursor(lines, chunker)); |
| 37 | + public TsvFeatureSetCursor(LineCursor lines, char separator, boolean esc) { |
| 38 | + this(new ChunkingCursor(lines, new CsvLineChunker(separator, esc))); |
38 | 39 | } |
39 | 40 | |
| 41 | + public TsvFeatureSetCursor(LineCursor lines, Chunker chunker) { |
| 42 | + this(new ChunkingCursor(lines, chunker)); |
| 43 | + } |
| 44 | + |
40 | 45 | public TsvFeatureSetCursor(DataCursor<List<String>> source) { |
41 | 46 | if (source==null) throw new NullPointerException(); |
42 | 47 | this.source = source; |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/store/IntegratorSchema.java |
— | — | @@ -42,7 +42,7 @@ |
43 | 43 | return table; |
44 | 44 | } |
45 | 45 | |
46 | | - public RelationTable newConceptMappingTable(String name, boolean unique) { |
| 46 | + public RelationTable newConceptAssociationTable(String name, boolean unique) { |
47 | 47 | RelationTable table = new RelationTable(this, name, getDefaultTableAttributes()); |
48 | 48 | |
49 | 49 | table.addField( new DatabaseField(this, "external_authority", getTextType(64), null, true, null) ); |