Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/TweakSet.java |
— | — | @@ -21,7 +21,16 @@ |
22 | 22 | */ |
23 | 23 | public class TweakSet { |
24 | 24 | protected Map<String, Object> parameters = new HashMap<String, Object>(); |
| 25 | + protected TweakSet parent; |
25 | 26 | |
| 27 | + public TweakSet() { |
| 28 | + this(null); |
| 29 | + } |
| 30 | + |
| 31 | + public TweakSet(TweakSet parent) { |
| 32 | + this.parent = parent; |
| 33 | + } |
| 34 | + |
26 | 35 | public void loadTweaks(File f) throws IOException { |
27 | 36 | setTweaks( SystemUtils.loadProperties(f, null), null ); |
28 | 37 | } |
— | — | @@ -86,7 +95,11 @@ |
87 | 96 | |
88 | 97 | @SuppressWarnings("unchecked") |
89 | 98 | public <T>T getTweak(String key, T def) { |
90 | | - if (!parameters.containsKey(key)) return def; |
91 | | - return (T)parameters.get(key); |
| 99 | + if (!parameters.containsKey(key)) { |
| 100 | + if (parent==null) return def; |
| 101 | + else return parent.getTweak(key, def); |
| 102 | + } else { |
| 103 | + return (T)parameters.get(key); |
| 104 | + } |
92 | 105 | } |
93 | 106 | } |
Index: trunk/WikiWord/WikiWordBuilder/debug-biography-tweaks.properties |
— | — | @@ -28,10 +28,10 @@ |
29 | 29 | # unzip will be appended to the command given here. Spaces |
30 | 30 | # before the last / are taken to be part of the path, spaces |
31 | 31 | # after the last / separate parameters. |
32 | | -dumpdriver.externalBunzip = null |
33 | | -dumpdriver.externalGunzip = null |
34 | | -#dumpdriver.externalBunzip = "/bin/bunzip2 -c" |
35 | | -#dumpdriver.externalGunzip = "/bin/gunzip -c" |
| 32 | +input.externalBunzip = null |
| 33 | +input.externalGunzip = null |
| 34 | +#input.externalBunzip = "/bin/bunzip2 -c" |
| 35 | +#input.externalGunzip = "/bin/gunzip -c" |
36 | 36 | |
37 | 37 | ### Importer Output and Persistance ############ |
38 | 38 | importer.progressInterval = 1000 |
Index: trunk/WikiWord/WikiWordBuilder/debug-tweaks.properties |
— | — | @@ -28,10 +28,10 @@ |
29 | 29 | # unzip will be appended to the command given here. Spaces |
30 | 30 | # before the last / are taken to be part of the path, spaces |
31 | 31 | # after the last / separate parameters. |
32 | | -dumpdriver.externalBunzip = null |
33 | | -dumpdriver.externalGunzip = null |
34 | | -#dumpdriver.externalBunzip = "/bin/bunzip2 -c" |
35 | | -#dumpdriver.externalGunzip = "/bin/gunzip -c" |
| 32 | +input.externalBunzip = null |
| 33 | +input.externalGunzip = null |
| 34 | +#input.externalBunzip = "/bin/bunzip2 -c" |
| 35 | +#input.externalGunzip = "/bin/gunzip -c" |
36 | 36 | |
37 | 37 | ### Importer Output and Persistance ############ |
38 | 38 | importer.progressInterval = 1000 |
Index: trunk/WikiWord/WikiWordBuilder/debug-lifescience-tweaks.properties |
— | — | @@ -28,10 +28,10 @@ |
29 | 29 | # unzip will be appended to the command given here. Spaces |
30 | 30 | # before the last / are taken to be part of the path, spaces |
31 | 31 | # after the last / separate parameters. |
32 | | -dumpdriver.externalBunzip = null |
33 | | -dumpdriver.externalGunzip = null |
34 | | -#dumpdriver.externalBunzip = "/bin/bunzip2 -c" |
35 | | -#dumpdriver.externalGunzip = "/bin/gunzip -c" |
| 32 | +input.externalBunzip = null |
| 33 | +input.externalGunzip = null |
| 34 | +#input.externalBunzip = "/bin/bunzip2 -c" |
| 35 | +#input.externalGunzip = "/bin/gunzip -c" |
36 | 36 | |
37 | 37 | ### Importer Output and Persistance ############ |
38 | 38 | importer.progressInterval = 1000 |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/processor/XmlDumpDriver.java |
— | — | @@ -1,35 +1,27 @@ |
2 | 2 | package de.brightbyte.wikiword.processor; |
3 | 3 | |
4 | | -import java.io.BufferedInputStream; |
5 | | -import java.io.File; |
6 | | -import java.io.FileInputStream; |
7 | 4 | import java.io.IOException; |
8 | 5 | import java.io.InputStream; |
9 | 6 | import java.io.InterruptedIOException; |
10 | 7 | import java.net.URL; |
11 | | -import java.net.URLConnection; |
12 | 8 | import java.sql.SQLException; |
13 | 9 | import java.util.Iterator; |
14 | 10 | import java.util.Map; |
15 | 11 | import java.util.concurrent.TimeUnit; |
16 | | -import java.util.regex.Matcher; |
17 | | -import java.util.regex.Pattern; |
18 | | -import java.util.zip.GZIPInputStream; |
19 | 12 | |
20 | | -import org.apache.commons.compress.bzip2.CBZip2InputStream; |
21 | 13 | import org.mediawiki.importer.DumpWriter; |
22 | 14 | import org.mediawiki.importer.Page; |
23 | 15 | import org.mediawiki.importer.Revision; |
24 | 16 | import org.mediawiki.importer.Siteinfo; |
25 | 17 | import org.mediawiki.importer.XmlDumpReader; |
26 | 18 | |
27 | | -import de.brightbyte.io.IOUtil; |
28 | 19 | import de.brightbyte.io.LeveledOutput; |
29 | 20 | import de.brightbyte.job.BlockingJobQueue; |
30 | 21 | import de.brightbyte.util.PersistenceException; |
31 | 22 | import de.brightbyte.wikiword.Namespace; |
32 | 23 | import de.brightbyte.wikiword.NamespaceSet; |
33 | 24 | import de.brightbyte.wikiword.TweakSet; |
| 25 | +import de.brightbyte.wikiword.builder.InputFileHelper; |
34 | 26 | |
35 | 27 | /** |
36 | 28 | * DumpImportDriver implements ImportDriver for reading content from |
— | — | @@ -220,8 +212,7 @@ |
221 | 213 | } |
222 | 214 | |
223 | 215 | private int importQueueCapacity = 0; |
224 | | - private String externalBunzip = null; |
225 | | - private String externalGunzip = null; |
| 216 | + private InputFileHelper inputHelper; |
226 | 217 | |
227 | 218 | private void init(LeveledOutput log, TweakSet tweaks) { |
228 | 219 | if (log==null) throw new NullPointerException(); |
— | — | @@ -231,15 +222,17 @@ |
232 | 223 | this.log = log; |
233 | 224 | |
234 | 225 | importQueueCapacity = tweaks.getTweak("dumpdriver.pageImportQueue", 8); |
235 | | - externalBunzip = tweaks.getTweak("dumpdriver.externalBunzip", null); |
236 | | - externalGunzip = tweaks.getTweak("dumpdriver.externalGunzip", null); |
| 226 | + |
| 227 | + inputHelper = new InputFileHelper( |
| 228 | + tweaks.getTweak("dumpdriver.externalGunzip", tweaks.getTweak("input.externalGunzip", (String)null)), |
| 229 | + tweaks.getTweak("dumpdriver.externalBunzip", tweaks.getTweak("input.externalBunzip", (String)null))); |
237 | 230 | } |
238 | 231 | |
239 | 232 | public void run(WikiWordPageProcessor importer) throws IOException, SQLException, InterruptedException, PersistenceException { |
240 | 233 | DumpWriter sink = new Sink(importer, importQueueCapacity); |
241 | 234 | |
242 | 235 | try { |
243 | | - if (in==null) in = openURL(dump); |
| 236 | + if (in==null) in = inputHelper.openURL(dump); |
244 | 237 | XmlDumpReader reader = new XmlDumpReader(in, sink); |
245 | 238 | |
246 | 239 | reader.readDump(); |
— | — | @@ -252,106 +245,4 @@ |
253 | 246 | sink.close(); //NOTE: make sure the executor queue is terminated |
254 | 247 | } |
255 | 248 | } |
256 | | - |
257 | | - protected InputStream openURL(URL u) throws IOException { |
258 | | - String p = u.getProtocol(); |
259 | | - |
260 | | - if (p.equals("file")) { |
261 | | - File f = new File(u.getPath()); |
262 | | - return openFile(f); |
263 | | - } |
264 | | - else { |
265 | | - URLConnection con = u.openConnection(); |
266 | | - String mime = con.getContentType(); |
267 | | - mime = mime.replaceAll(";.*$", ""); |
268 | | - InputStream in = con.getInputStream(); |
269 | | - |
270 | | - if (mime.equals("application/x-gzip")) { |
271 | | - return new GZIPInputStream(in); //FIXME: somehow, this doesn't seem to work. or was the external gunzipper the problem? check this! |
272 | | - } |
273 | | - else if (mime.equals("application/x-bzip2")) { |
274 | | - validateBZ2(in); |
275 | | - return new CBZip2InputStream(in); |
276 | | - } |
277 | | - else if (mime.equals("application/xml")) { |
278 | | - return in; |
279 | | - } |
280 | | - |
281 | | - in.close(); |
282 | | - throw new IOException("MIME type not suitable for a wiki dump: "+mime); |
283 | | - } |
284 | | - } |
285 | | - |
286 | | - protected InputStream openFile(File file) throws IOException { |
287 | | - String f = file.getAbsolutePath(); |
288 | | - |
289 | | - if (f.equals("-")) |
290 | | - return new BufferedInputStream(System.in); |
291 | | - |
292 | | - InputStream in = new BufferedInputStream(new FileInputStream(file)); |
293 | | - if (f.endsWith(".gz")) { |
294 | | - if (externalGunzip!=null) return openProc(externalGunzip, file); |
295 | | - else return new GZIPInputStream(in); |
296 | | - } |
297 | | - else if (f.endsWith(".bz2")) { |
298 | | - if (externalBunzip!=null) { |
299 | | - return openProc(externalBunzip, file); |
300 | | - } |
301 | | - else { |
302 | | - validateBZ2(in); |
303 | | - return new CBZip2InputStream(in); |
304 | | - } |
305 | | - } |
306 | | - else |
307 | | - return in; |
308 | | - } |
309 | | - |
310 | | - protected static void validateBZ2(InputStream in) throws IOException { |
311 | | - int first = in.read(); |
312 | | - int second = in.read(); |
313 | | - if (first != 'B' || second != 'Z') |
314 | | - throw new IOException("Didn't find BZ file signature"); |
315 | | - } |
316 | | - |
317 | | - protected static final Pattern commandParamPattern = Pattern.compile("^(.*) +([^/\\\\]+)$"); |
318 | | - |
319 | | - public static InputStream openProc(String command, File f) throws IOException { |
320 | | - String[] cmd; |
321 | | - |
322 | | - Matcher m = commandParamPattern.matcher(command); |
323 | | - if (m.matches()) { |
324 | | - String[] p = m.group(2).trim().split("\\s+"); |
325 | | - |
326 | | - cmd = new String[p.length+2]; |
327 | | - cmd[0] = m.group(1).trim(); |
328 | | - System.arraycopy(p, 0, cmd, 1, p.length); |
329 | | - |
330 | | - cmd[cmd.length-1] = f.getAbsolutePath(); |
331 | | - } |
332 | | - else { |
333 | | - cmd = new String[] { |
334 | | - command, |
335 | | - f.getAbsolutePath() |
336 | | - }; |
337 | | - } |
338 | | - |
339 | | - Process proc = Runtime.getRuntime().exec(cmd); |
340 | | - final InputStream err = proc.getErrorStream(); |
341 | | - |
342 | | - //HACK! |
343 | | - Thread slurper = new Thread("stderr slurper for "+proc) { |
344 | | - @Override |
345 | | - public void run() { |
346 | | - try { |
347 | | - IOUtil.pump(err, System.err); |
348 | | - } catch (IOException e) { |
349 | | - e.printStackTrace(System.err); |
350 | | - } |
351 | | - } |
352 | | - }; |
353 | | - |
354 | | - slurper.start(); |
355 | | - |
356 | | - return new BufferedInputStream(proc.getInputStream()); |
357 | | - } |
358 | 249 | } |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/InputFileHelper.java |
— | — | @@ -0,0 +1,160 @@ |
| 2 | +package de.brightbyte.wikiword.builder; |
| 3 | + |
| 4 | +import java.io.BufferedInputStream; |
| 5 | +import java.io.File; |
| 6 | +import java.io.FileInputStream; |
| 7 | +import java.io.IOException; |
| 8 | +import java.io.InputStream; |
| 9 | +import java.net.MalformedURLException; |
| 10 | +import java.net.URL; |
| 11 | +import java.net.URLConnection; |
| 12 | +import java.util.regex.Matcher; |
| 13 | +import java.util.regex.Pattern; |
| 14 | +import java.util.zip.GZIPInputStream; |
| 15 | + |
| 16 | +import org.apache.commons.compress.bzip2.CBZip2InputStream; |
| 17 | + |
| 18 | +import de.brightbyte.io.IOUtil; |
| 19 | +import de.brightbyte.wikiword.TweakSet; |
| 20 | + |
| 21 | +public class InputFileHelper { |
| 22 | + |
| 23 | + private String externalBunzip = null; |
| 24 | + private String externalGunzip = null; |
| 25 | + |
| 26 | + public InputFileHelper(TweakSet tweaks) { |
| 27 | + this( tweaks.getTweak("input.externalGunzip", (String)null), |
| 28 | + tweaks.getTweak("input.externalBunzip", (String)null) ); |
| 29 | + } |
| 30 | + |
| 31 | + public InputFileHelper(String gz, String bz2) { |
| 32 | + externalGunzip = gz; |
| 33 | + externalBunzip = bz2; |
| 34 | + } |
| 35 | + |
| 36 | + protected static final Pattern extensionPattern = Pattern.compile("\\.([^./\\]+)(\\.gz|\\.bz2)$", Pattern.CASE_INSENSITIVE); |
| 37 | + |
| 38 | + public String getFormat(String n) { |
| 39 | + Matcher m = extensionPattern.matcher(n); |
| 40 | + |
| 41 | + if (!m.find()) return null; |
| 42 | + else return m.group(1).toLowerCase(); |
| 43 | + } |
| 44 | + |
| 45 | + public InputStream open(String n) throws IOException { |
| 46 | + if (n.equals("-")) return new BufferedInputStream(System.in); |
| 47 | + |
| 48 | + try { |
| 49 | + URL u = new URL(n); |
| 50 | + return openURL(u); |
| 51 | + } catch (MalformedURLException e) { |
| 52 | + //ignore and continue |
| 53 | + } |
| 54 | + |
| 55 | + File f = new File(n); |
| 56 | + return openFile(f); |
| 57 | + } |
| 58 | + |
| 59 | + public InputStream openURL(URL u) throws IOException { |
| 60 | + String p = u.getProtocol(); |
| 61 | + |
| 62 | + if (p.equals("file")) { |
| 63 | + File f = new File(u.getPath()); |
| 64 | + return openFile(f); |
| 65 | + } |
| 66 | + else { |
| 67 | + URLConnection con = u.openConnection(); |
| 68 | + String mime = con.getContentType(); |
| 69 | + mime = mime.replaceAll(";.*$", ""); |
| 70 | + InputStream in = con.getInputStream(); |
| 71 | + |
| 72 | + if (mime.equals("application/x-gzip")) { |
| 73 | + return new GZIPInputStream(in); //FIXME: somehow, this doesn't seem to work. or was the external gunzipper the problem? check this! |
| 74 | + } |
| 75 | + else if (mime.equals("application/x-bzip2")) { |
| 76 | + validateBZ2(in); |
| 77 | + return new CBZip2InputStream(in); |
| 78 | + } |
| 79 | + else if (mime.equals("application/xml")) { |
| 80 | + return in; |
| 81 | + } |
| 82 | + |
| 83 | + in.close(); |
| 84 | + throw new IOException("MIME type not suitable for a wiki dump: "+mime); |
| 85 | + } |
| 86 | + } |
| 87 | + |
| 88 | + public InputStream openFile(File file) throws IOException { |
| 89 | + String f = file.getAbsolutePath(); |
| 90 | + |
| 91 | + if (f.equals("-")) |
| 92 | + return new BufferedInputStream(System.in); |
| 93 | + |
| 94 | + InputStream in = new BufferedInputStream(new FileInputStream(file)); |
| 95 | + if (f.endsWith(".gz")) { |
| 96 | + if (externalGunzip!=null) return openProc(externalGunzip, file); |
| 97 | + else return new GZIPInputStream(in); |
| 98 | + } |
| 99 | + else if (f.endsWith(".bz2")) { |
| 100 | + if (externalBunzip!=null) { |
| 101 | + return openProc(externalBunzip, file); |
| 102 | + } |
| 103 | + else { |
| 104 | + validateBZ2(in); |
| 105 | + return new CBZip2InputStream(in); |
| 106 | + } |
| 107 | + } |
| 108 | + else |
| 109 | + return in; |
| 110 | + } |
| 111 | + |
| 112 | + protected static void validateBZ2(InputStream in) throws IOException { |
| 113 | + int first = in.read(); |
| 114 | + int second = in.read(); |
| 115 | + if (first != 'B' || second != 'Z') |
| 116 | + throw new IOException("Didn't find BZ file signature"); |
| 117 | + } |
| 118 | + |
| 119 | + protected static final Pattern commandParamPattern = Pattern.compile("^(.*) +([^/\\\\]+)$"); |
| 120 | + |
| 121 | + public static InputStream openProc(String command, File f) throws IOException { |
| 122 | + String[] cmd; |
| 123 | + |
| 124 | + Matcher m = commandParamPattern.matcher(command); |
| 125 | + if (m.matches()) { |
| 126 | + String[] p = m.group(2).trim().split("\\s+"); |
| 127 | + |
| 128 | + cmd = new String[p.length+2]; |
| 129 | + cmd[0] = m.group(1).trim(); |
| 130 | + System.arraycopy(p, 0, cmd, 1, p.length); |
| 131 | + |
| 132 | + cmd[cmd.length-1] = f.getAbsolutePath(); |
| 133 | + } |
| 134 | + else { |
| 135 | + cmd = new String[] { |
| 136 | + command, |
| 137 | + f.getAbsolutePath() |
| 138 | + }; |
| 139 | + } |
| 140 | + |
| 141 | + Process proc = Runtime.getRuntime().exec(cmd); |
| 142 | + final InputStream err = proc.getErrorStream(); |
| 143 | + |
| 144 | + //HACK! |
| 145 | + Thread slurper = new Thread("stderr slurper for "+proc) { |
| 146 | + @Override |
| 147 | + public void run() { |
| 148 | + try { |
| 149 | + IOUtil.pump(err, System.err); |
| 150 | + } catch (IOException e) { |
| 151 | + e.printStackTrace(System.err); |
| 152 | + } |
| 153 | + } |
| 154 | + }; |
| 155 | + |
| 156 | + slurper.start(); |
| 157 | + |
| 158 | + return new BufferedInputStream(proc.getInputStream()); |
| 159 | + } |
| 160 | + |
| 161 | +} |
Index: trunk/WikiWord/WikiWordIntegrator/src/test/java/de/brightbyte/wikiword/integrator/data/AssemblingFeatureSetCursorTest.java |
— | — | @@ -0,0 +1,65 @@ |
| 2 | +package de.brightbyte.wikiword.integrator.data; |
| 3 | + |
| 4 | +import java.util.ArrayList; |
| 5 | +import java.util.Arrays; |
| 6 | +import java.util.Collection; |
| 7 | +import java.util.List; |
| 8 | + |
| 9 | +import junit.framework.TestCase; |
| 10 | +import de.brightbyte.data.cursor.DataCursor; |
| 11 | +import de.brightbyte.data.cursor.IteratorCursor; |
| 12 | +import de.brightbyte.util.PersistenceException; |
| 13 | + |
| 14 | +public class AssemblingFeatureSetCursorTest extends TestCase { |
| 15 | + |
| 16 | + private static <T> Collection<T> slurp(DataCursor<T> cursor) throws PersistenceException { |
| 17 | + ArrayList<T> list = new ArrayList<T>(); |
| 18 | + T obj; |
| 19 | + while ((obj = cursor.next()) != null) list.add(obj); |
| 20 | + return list; |
| 21 | + } |
| 22 | + |
| 23 | + public void testNext() throws PersistenceException { |
| 24 | + FeatureSet a = new DefaultFeatureSet("name"); |
| 25 | + a.put("id", 1); |
| 26 | + a.put("property", "name"); |
| 27 | + a.put("value", "A"); |
| 28 | + a.put("value", "a"); |
| 29 | + a.put("xyzzy", "bla"); |
| 30 | + |
| 31 | + FeatureSet b = new DefaultFeatureSet("name"); |
| 32 | + b.put("id", 1); |
| 33 | + b.put("property", "foo"); |
| 34 | + b.put("value", "X"); |
| 35 | + b.put("value", "Y"); |
| 36 | + |
| 37 | + FeatureSet x = new DefaultFeatureSet("name"); |
| 38 | + x.put("id", 2); |
| 39 | + x.put("property", "name"); |
| 40 | + x.put("property", "alias"); |
| 41 | + x.put("value", "Foo"); |
| 42 | + |
| 43 | + //-------------------------------------- |
| 44 | + |
| 45 | + FeatureSet one = new DefaultFeatureSet(); |
| 46 | + one.put("id", 1); |
| 47 | + one.put("name", "A"); |
| 48 | + one.put("name", "a"); |
| 49 | + one.put("foo", "X"); |
| 50 | + one.put("foo", "Y"); |
| 51 | + |
| 52 | + FeatureSet two = new DefaultFeatureSet(); |
| 53 | + two.put("id", 2); |
| 54 | + two.put("name", "Foo"); |
| 55 | + two.put("alias", "Foo"); |
| 56 | + |
| 57 | + List<FeatureSet> exp= Arrays.asList(new FeatureSet[] {one, two}); |
| 58 | + List<FeatureSet> source= Arrays.asList(new FeatureSet[] {a, b, x}); |
| 59 | + |
| 60 | + DataCursor<FeatureSet> sourceCursor = new IteratorCursor<FeatureSet>(source.iterator()); |
| 61 | + DataCursor<FeatureSet> cursor = new AssemblingFeatureSetCursor(sourceCursor, "id", "property", "value"); |
| 62 | + |
| 63 | + assertEquals(exp, slurp(cursor)); |
| 64 | + } |
| 65 | + |
| 66 | +} |
Property changes on: trunk/WikiWord/WikiWordIntegrator/src/test/java/de/brightbyte/wikiword/integrator/data/AssemblingFeatureSetCursorTest.java |
___________________________________________________________________ |
Added: svn:mergeinfo |
Index: trunk/WikiWord/WikiWordIntegrator/src/test/java/de/brightbyte/wikiword/integrator/data/FeatureSetsTest.java |
— | — | @@ -2,14 +2,10 @@ |
3 | 3 | |
4 | 4 | import java.util.ArrayList; |
5 | 5 | |
| 6 | +import junit.framework.TestCase; |
6 | 7 | import de.brightbyte.data.LabeledVector; |
7 | 8 | import de.brightbyte.data.MapLabeledVector; |
8 | | -import de.brightbyte.wikiword.integrator.data.DefaultFeatureSet; |
9 | | -import de.brightbyte.wikiword.integrator.data.FeatureSet; |
10 | | -import de.brightbyte.wikiword.integrator.data.FeatureSets; |
11 | 9 | |
12 | | -import junit.framework.TestCase; |
13 | | - |
14 | 10 | public class FeatureSetsTest extends TestCase { |
15 | 11 | |
16 | 12 | public void testMerge() { |
Index: trunk/WikiWord/WikiWordIntegrator/src/test/java/de/brightbyte/wikiword/integrator/data/AssociationTest.java |
— | — | @@ -1,8 +1,5 @@ |
2 | 2 | package de.brightbyte.wikiword.integrator.data; |
3 | 3 | |
4 | | -import de.brightbyte.wikiword.integrator.data.Association; |
5 | | -import de.brightbyte.wikiword.integrator.data.DefaultFeatureSet; |
6 | | -import de.brightbyte.wikiword.integrator.data.FeatureSet; |
7 | 4 | import junit.framework.TestCase; |
8 | 5 | |
9 | 6 | public class AssociationTest extends TestCase { |
Index: trunk/WikiWord/WikiWordIntegrator/src/test/java/de/brightbyte/wikiword/integrator/data/CollapsingMatchesCursorTest.java |
— | — | @@ -3,15 +3,10 @@ |
4 | 4 | import java.util.ArrayList; |
5 | 5 | import java.util.Collection; |
6 | 6 | |
| 7 | +import junit.framework.TestCase; |
7 | 8 | import de.brightbyte.data.cursor.DataCursor; |
8 | 9 | import de.brightbyte.data.cursor.IteratorCursor; |
9 | 10 | import de.brightbyte.util.PersistenceException; |
10 | | -import de.brightbyte.wikiword.integrator.data.Association; |
11 | | -import de.brightbyte.wikiword.integrator.data.CollapsingMatchesCursor; |
12 | | -import de.brightbyte.wikiword.integrator.data.DefaultFeatureSet; |
13 | | -import de.brightbyte.wikiword.integrator.data.FeatureSet; |
14 | | -import de.brightbyte.wikiword.integrator.data.FeatureSets; |
15 | | -import junit.framework.TestCase; |
16 | 11 | |
17 | 12 | public class CollapsingMatchesCursorTest extends TestCase { |
18 | 13 | |
Index: trunk/WikiWord/WikiWordIntegrator/src/test/java/de/brightbyte/wikiword/integrator/data/CollapsingFeatureSetCursorTest.java |
— | — | @@ -0,0 +1,84 @@ |
| 2 | +package de.brightbyte.wikiword.integrator.data; |
| 3 | + |
| 4 | +import java.util.ArrayList; |
| 5 | +import java.util.Collection; |
| 6 | + |
| 7 | +import junit.framework.TestCase; |
| 8 | +import de.brightbyte.data.cursor.DataCursor; |
| 9 | +import de.brightbyte.data.cursor.IteratorCursor; |
| 10 | +import de.brightbyte.util.PersistenceException; |
| 11 | + |
| 12 | +public class CollapsingFeatureSetCursorTest extends TestCase { |
| 13 | + |
| 14 | + private static <T> Collection<T> slurp(DataCursor<T> cursor) throws PersistenceException { |
| 15 | + ArrayList<T> list = new ArrayList<T>(); |
| 16 | + T obj; |
| 17 | + while ((obj = cursor.next()) != null) list.add(obj); |
| 18 | + return list; |
| 19 | + } |
| 20 | + |
| 21 | + public void testNext() throws PersistenceException { |
| 22 | + FeatureSet a = new DefaultFeatureSet("id"); |
| 23 | + a.put("id", 1); |
| 24 | + a.put("foo", "A"); |
| 25 | + |
| 26 | + FeatureSet b = new DefaultFeatureSet("id"); |
| 27 | + b.put("id", 1); |
| 28 | + b.put("foo", "B"); |
| 29 | + |
| 30 | + FeatureSet x = new DefaultFeatureSet("id"); |
| 31 | + x.put("id", 2); |
| 32 | + x.put("foo", "X"); |
| 33 | + |
| 34 | + FeatureSet y = new DefaultFeatureSet("id"); |
| 35 | + y.put("id", 2); |
| 36 | + y.put("foo", "Y"); |
| 37 | + |
| 38 | + FeatureSet p = new DefaultFeatureSet("id"); |
| 39 | + p.put("id", 3); |
| 40 | + p.put("foo", "P"); |
| 41 | + |
| 42 | + FeatureSet q = new DefaultFeatureSet("id"); |
| 43 | + q.put("id", 3); |
| 44 | + q.put("foo", "Q"); |
| 45 | + |
| 46 | + //-------------------------------------- |
| 47 | + FeatureSet ab = new DefaultFeatureSet("id"); |
| 48 | + ab.put("id", 1); |
| 49 | + ab.put("id", 1); |
| 50 | + ab.put("foo", "A"); |
| 51 | + ab.put("foo", "B"); |
| 52 | + |
| 53 | + FeatureSet xy = new DefaultFeatureSet("id"); |
| 54 | + xy.put("id", 2); |
| 55 | + xy.put("id", 2); |
| 56 | + xy.put("foo", "X"); |
| 57 | + xy.put("foo", "Y"); |
| 58 | + |
| 59 | + FeatureSet pq = new DefaultFeatureSet("id"); |
| 60 | + pq.put("id", 3); |
| 61 | + pq.put("id", 3); |
| 62 | + pq.put("foo", "P"); |
| 63 | + pq.put("foo", "Q"); |
| 64 | + |
| 65 | + //-------------------------------------- |
| 66 | + ArrayList<FeatureSet> source = new ArrayList<FeatureSet>(); |
| 67 | + source.add(a); |
| 68 | + source.add(b); |
| 69 | + source.add(x); |
| 70 | + source.add(y); |
| 71 | + source.add(p); |
| 72 | + source.add(q); |
| 73 | + |
| 74 | + ArrayList<FeatureSet> exp = new ArrayList<FeatureSet>(); |
| 75 | + exp.add(ab); |
| 76 | + exp.add(xy); |
| 77 | + exp.add(pq); |
| 78 | + |
| 79 | + DataCursor<FeatureSet> sourceCursor = new IteratorCursor<FeatureSet>(source.iterator()); |
| 80 | + DataCursor<FeatureSet> cursor = new CollapsingFeatureSetCursor(sourceCursor, "id"); |
| 81 | + |
| 82 | + assertEquals(exp, slurp(cursor)); |
| 83 | + } |
| 84 | + |
| 85 | +} |
Index: trunk/WikiWord/WikiWordIntegrator/src/test/java/de/brightbyte/wikiword/integrator/data/CollapsingAssociationCursorTest.java |
— | — | @@ -3,16 +3,11 @@ |
4 | 4 | import java.util.ArrayList; |
5 | 5 | import java.util.Collection; |
6 | 6 | |
| 7 | +import junit.framework.TestCase; |
7 | 8 | import de.brightbyte.data.cursor.DataCursor; |
8 | 9 | import de.brightbyte.data.cursor.IteratorCursor; |
9 | 10 | import de.brightbyte.util.PersistenceException; |
10 | | -import de.brightbyte.wikiword.integrator.data.Association; |
11 | | -import de.brightbyte.wikiword.integrator.data.CollapsingAssociationCursor; |
12 | | -import de.brightbyte.wikiword.integrator.data.DefaultFeatureSet; |
13 | | -import de.brightbyte.wikiword.integrator.data.FeatureSet; |
14 | 11 | |
15 | | -import junit.framework.TestCase; |
16 | | - |
17 | 12 | public class CollapsingAssociationCursorTest extends TestCase { |
18 | 13 | |
19 | 14 | private static <T> Collection<T> slurp(DataCursor<T> cursor) throws PersistenceException { |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/processor/ConceptMappingProcessor.java |
— | — | @@ -3,7 +3,6 @@ |
4 | 4 | import de.brightbyte.data.cursor.DataCursor; |
5 | 5 | import de.brightbyte.util.PersistenceException; |
6 | 6 | import de.brightbyte.wikiword.integrator.data.MappingCandidates; |
7 | | -import de.brightbyte.wikiword.integrator.store.MappingFeatureStoreBuilder; |
8 | 7 | |
9 | 8 | public interface ConceptMappingProcessor { |
10 | 9 | public void processMappings(DataCursor<MappingCandidates> cursor) throws PersistenceException; |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/ForeignEntityStoreDescriptor.java |
— | — | @@ -0,0 +1,62 @@ |
| 2 | +package de.brightbyte.wikiword.integrator; |
| 3 | + |
| 4 | +import java.util.List; |
| 5 | +import java.util.Map; |
| 6 | + |
| 7 | +import de.brightbyte.wikiword.TweakSet; |
| 8 | + |
| 9 | +public class ForeignEntityStoreDescriptor extends TweakSet { |
| 10 | + |
| 11 | + public ForeignEntityStoreDescriptor() { |
| 12 | + super(); |
| 13 | + } |
| 14 | + |
| 15 | + public ForeignEntityStoreDescriptor(TweakSet parent) { |
| 16 | + super(parent); |
| 17 | + } |
| 18 | + |
| 19 | + public String getDataEncoding() { |
| 20 | + return getTweak("foreign.encoding", "UTF-8"); |
| 21 | + } |
| 22 | + |
| 23 | + public String getSqlQuery() { |
| 24 | + return getTweak("foreign.query", null); |
| 25 | + } |
| 26 | + |
| 27 | + public String getSourceFileName() { |
| 28 | + return getTweak("foreign.file", null); |
| 29 | + } |
| 30 | + |
| 31 | + public String[] getDataFields() { |
| 32 | + List<String> v = getTweak("foreign.field", (List<String>)null); |
| 33 | + if (v==null) return null; |
| 34 | + return (String[]) v.toArray(new String[v.size()]); |
| 35 | + } |
| 36 | + |
| 37 | + public Map<String, String> getSplitExpressions() { |
| 38 | + return getTweak("split", (Map<String, String>)null); |
| 39 | + } |
| 40 | + |
| 41 | + public String getPropertyValueField() { |
| 42 | + return getTweak("foreign.property-value-field", null); |
| 43 | + } |
| 44 | + |
| 45 | + public String getPropertyNameField() { |
| 46 | + return getTweak("foreign.property-name-field", "value"); |
| 47 | + } |
| 48 | + |
| 49 | + public String getConceptIdField() { |
| 50 | + return getTweak("foreign.concept-id-field", "id"); |
| 51 | + } |
| 52 | + |
| 53 | + public String getConceptNameField() { |
| 54 | + return getTweak("foreign.concept-name-field", "name"); |
| 55 | + } |
| 56 | + |
| 57 | + public String getAuthorityName() { |
| 58 | + String name = getTweak("foreign.authority-name", null); |
| 59 | + if (name==null) throw new RuntimeException("authority name not specified!"); |
| 60 | + return name; |
| 61 | + } |
| 62 | + |
| 63 | +} |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/LoadForeignProperties.java |
— | — | @@ -1,20 +1,34 @@ |
2 | 2 | package de.brightbyte.wikiword.integrator; |
3 | 3 | |
4 | 4 | import java.io.IOException; |
| 5 | +import java.io.InputStream; |
| 6 | +import java.sql.Connection; |
| 7 | +import java.sql.ResultSet; |
| 8 | +import java.sql.SQLException; |
| 9 | +import java.util.Arrays; |
| 10 | +import java.util.Collection; |
| 11 | +import java.util.Map; |
| 12 | +import java.util.regex.Pattern; |
5 | 13 | |
| 14 | +import de.brightbyte.data.Functor; |
6 | 15 | import de.brightbyte.data.cursor.DataCursor; |
| 16 | +import de.brightbyte.db.SqlScriptRunner; |
| 17 | +import de.brightbyte.io.IOUtil; |
7 | 18 | import de.brightbyte.util.PersistenceException; |
8 | 19 | import de.brightbyte.wikiword.StoreBackedApp; |
9 | | -import de.brightbyte.wikiword.builder.ImportApp; |
| 20 | +import de.brightbyte.wikiword.builder.InputFileHelper; |
| 21 | +import de.brightbyte.wikiword.integrator.data.AssemblingFeatureSetCursor; |
| 22 | +import de.brightbyte.wikiword.integrator.data.FeatureSet; |
| 23 | +import de.brightbyte.wikiword.integrator.data.FeatureSetValueSplitter; |
10 | 24 | import de.brightbyte.wikiword.integrator.data.ForeignEntity; |
| 25 | +import de.brightbyte.wikiword.integrator.data.ForeignEntityCursor; |
| 26 | +import de.brightbyte.wikiword.integrator.data.MangelingFeatureSetCursor; |
| 27 | +import de.brightbyte.wikiword.integrator.data.ResultSetFeatureSetCursor; |
| 28 | +import de.brightbyte.wikiword.integrator.data.TsvFeatureSetCursor; |
11 | 29 | import de.brightbyte.wikiword.integrator.processor.ForeignPropertyProcessor; |
12 | 30 | import de.brightbyte.wikiword.integrator.store.DatabaseForeignPropertyStoreBuilder; |
13 | 31 | import de.brightbyte.wikiword.integrator.store.ForeignPropertyStoreBuilder; |
14 | | -import de.brightbyte.wikiword.model.WikiWordConcept; |
15 | 32 | import de.brightbyte.wikiword.store.WikiWordStoreFactory; |
16 | | -import de.brightbyte.wikiword.store.builder.ConceptInfoStoreBuilder; |
17 | | -import de.brightbyte.wikiword.store.builder.DatabaseConceptStoreBuilders; |
18 | | -import de.brightbyte.wikiword.store.builder.WikiWordConceptStoreBuilder; |
19 | 33 | |
20 | 34 | /** |
21 | 35 | * This is the primary entry point to the first phase of a WikiWord analysis. |
— | — | @@ -25,6 +39,7 @@ |
26 | 40 | |
27 | 41 | protected ForeignPropertyStoreBuilder propertyStore; |
28 | 42 | protected ForeignPropertyProcessor propertyProcessor; |
| 43 | + protected InputFileHelper inputHelper; |
29 | 44 | |
30 | 45 | public LoadForeignProperties() { |
31 | 46 | super(true, true); |
— | — | @@ -35,10 +50,14 @@ |
36 | 51 | return new DatabaseForeignPropertyStoreBuilder.Factory(getTargetTableName(), getConfiguredDataset(), getConfiguredDataSource(), tweaks); |
37 | 52 | } |
38 | 53 | |
39 | | - private String getTargetTableName() { |
40 | | - return args.getParameterCount() > 3 ? args.getParameter(3) : "foreign_property"; |
| 54 | + protected String getTargetTableName() { |
| 55 | + return args.getParameterCount() > 2 ? args.getParameter(2) : "foreign_property"; |
41 | 56 | } |
42 | 57 | |
| 58 | + protected String getSourceDescriptionFileName() { |
| 59 | + return args.getParameter(1); |
| 60 | + } |
| 61 | + |
43 | 62 | @Override |
44 | 63 | protected void declareOptions() { |
45 | 64 | super.declareOptions(); |
— | — | @@ -59,11 +78,76 @@ |
60 | 79 | cursor.close(); |
61 | 80 | } |
62 | 81 | |
63 | | - protected DataCursor<ForeignEntity> openPropertySource() { |
64 | | - // TODO Auto-generated method stub |
65 | | - return null; |
| 82 | + protected DataCursor<ForeignEntity> openPropertySource() throws IOException, SQLException, PersistenceException { |
| 83 | + ForeignEntityStoreDescriptor sourceDescriptor = loadSourceDescriptor(); |
| 84 | + |
| 85 | + String enc = sourceDescriptor.getDataEncoding(); |
| 86 | + String sql = sourceDescriptor.getSqlQuery(); |
| 87 | + InputStream in = null; |
| 88 | + |
| 89 | + if (sql==null) { |
| 90 | + String n = sourceDescriptor.getSourceFileName(); |
| 91 | + String format = inputHelper.getFormat(n); |
| 92 | + in = inputHelper.open(n); |
| 93 | + |
| 94 | + if (format!=null && format.equals("sql")) { |
| 95 | + sql = IOUtil.slurp(in, enc); |
| 96 | + |
| 97 | + in.close(); |
| 98 | + in = null; |
| 99 | + } |
| 100 | + } |
| 101 | + |
| 102 | + DataCursor<FeatureSet> fsc; |
| 103 | + String[] fields = sourceDescriptor.getDataFields(); |
| 104 | + |
| 105 | + if (sql!=null) { |
| 106 | + Collection<Functor<String, String>> manglers = Arrays.asList(getSqlScriptManglers()); |
| 107 | + Connection con = getConfiguredDataSource().getConnection(); |
| 108 | + ResultSet rs = SqlScriptRunner.runQuery(con, sql, manglers); |
| 109 | + |
| 110 | + fsc = new ResultSetFeatureSetCursor(rs, fields); |
| 111 | + } else { |
| 112 | + fsc = new TsvFeatureSetCursor(in, enc); |
| 113 | + |
| 114 | + if (fields!=null) ((TsvFeatureSetCursor)fsc).setFields(fields); |
| 115 | + else ((TsvFeatureSetCursor)fsc).readFields(); |
| 116 | + } |
| 117 | + |
| 118 | + String propField = sourceDescriptor.getPropertyNameField(); |
| 119 | + if (propField!=null) { |
| 120 | + String valueField = sourceDescriptor.getPropertyValueField(); |
| 121 | + String idField = sourceDescriptor.getConceptIdField(); |
| 122 | + fsc = new AssemblingFeatureSetCursor(fsc, idField, propField, valueField); |
| 123 | + } |
| 124 | + |
| 125 | + Map<String, String> splitExp = sourceDescriptor.getSplitExpressions(); |
| 126 | + if (splitExp!=null) { |
| 127 | + fsc = new MangelingFeatureSetCursor(fsc, FeatureSetValueSplitter.multiFromStringMap(splitExp, 0)); |
| 128 | + } |
| 129 | + |
| 130 | + return new ForeignEntityCursor(fsc, sourceDescriptor.getAuthorityName(), sourceDescriptor.getConceptIdField(), sourceDescriptor.getConceptNameField()); |
66 | 131 | } |
67 | 132 | |
| 133 | + protected ForeignEntityStoreDescriptor loadSourceDescriptor() throws IOException { |
| 134 | + ForeignEntityStoreDescriptor descriptor = new ForeignEntityStoreDescriptor(); |
| 135 | + |
| 136 | + String n = getSourceDescriptionFileName(); |
| 137 | + InputStream in = inputHelper.open(n); |
| 138 | + descriptor.loadTweaks(in); |
| 139 | + in.close(); |
| 140 | + |
| 141 | + return descriptor; |
| 142 | + } |
| 143 | + |
| 144 | + @SuppressWarnings("unchecked") |
| 145 | + protected Functor<String, String>[] getSqlScriptManglers() { |
| 146 | + return new Functor[] { |
| 147 | + new SqlScriptRunner.RegularExpressionMangler(Pattern.compile("/\\* *wikiword_prefix* \\*/"), getConfiguredDataset().getDbPrefix()), |
| 148 | + new SqlScriptRunner.RegularExpressionMangler(Pattern.compile("/\\* *wikiword_db* \\*/"), getConfiguredDatasetName()), |
| 149 | + }; |
| 150 | + } |
| 151 | + |
68 | 152 | public static void main(String[] argv) throws Exception { |
69 | 153 | LoadForeignProperties app = new LoadForeignProperties(); |
70 | 154 | app.launch(argv); |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/FeatureSetValueSplitter.java |
— | — | @@ -2,12 +2,47 @@ |
3 | 3 | |
4 | 4 | import java.util.ArrayList; |
5 | 5 | import java.util.List; |
| 6 | +import java.util.Map; |
6 | 7 | import java.util.regex.Matcher; |
7 | 8 | import java.util.regex.Pattern; |
8 | 9 | |
9 | 10 | |
10 | 11 | public class FeatureSetValueSplitter implements FeatureSetMangler { |
| 12 | + |
| 13 | + public static FeatureSetMultiMangler multi(FeatureSetValueSplitter... splitters) { |
| 14 | + return new FeatureSetMultiMangler((FeatureSetMangler[])splitters); |
| 15 | + } |
11 | 16 | |
| 17 | + public static FeatureSetMultiMangler multiFromSplitters(Iterable<FeatureSetValueSplitter> splitters) { |
| 18 | + FeatureSetMultiMangler m = new FeatureSetMultiMangler(); |
| 19 | + |
| 20 | + for (FeatureSetValueSplitter s: splitters) { |
| 21 | + m.addMangler(s); |
| 22 | + } |
| 23 | + |
| 24 | + return m; |
| 25 | + } |
| 26 | + |
| 27 | + public static FeatureSetMultiMangler multiFromPatternMap(Map<String, Pattern> splitters) { |
| 28 | + FeatureSetMultiMangler m = new FeatureSetMultiMangler(); |
| 29 | + |
| 30 | + for (Map.Entry<String, Pattern>e: splitters.entrySet()) { |
| 31 | + m.addMangler(new FeatureSetValueSplitter(e.getKey(), e.getValue())); |
| 32 | + } |
| 33 | + |
| 34 | + return m; |
| 35 | + } |
| 36 | + |
| 37 | + public static FeatureSetMultiMangler multiFromStringMap(Map<String, String> splitters, int flags) { |
| 38 | + FeatureSetMultiMangler m = new FeatureSetMultiMangler(); |
| 39 | + |
| 40 | + for (Map.Entry<String, String>e: splitters.entrySet()) { |
| 41 | + m.addMangler(new FeatureSetValueSplitter(e.getKey(), e.getValue(), flags)); |
| 42 | + } |
| 43 | + |
| 44 | + return m; |
| 45 | + } |
| 46 | + |
12 | 47 | protected String field; |
13 | 48 | protected Matcher splitter; |
14 | 49 | |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/AssemblingFeatureSetCursor.java |
— | — | @@ -0,0 +1,55 @@ |
| 2 | +package de.brightbyte.wikiword.integrator.data; |
| 3 | + |
| 4 | +import java.util.List; |
| 5 | + |
| 6 | +import de.brightbyte.data.cursor.DataCursor; |
| 7 | +import de.brightbyte.util.PersistenceException; |
| 8 | + |
| 9 | +public class AssemblingFeatureSetCursor implements DataCursor<FeatureSet> { |
| 10 | + |
| 11 | + protected DataCursor<FeatureSet> cursor; |
| 12 | + protected FeatureSet prev; |
| 13 | + |
| 14 | + protected String recordIdField; |
| 15 | + protected String propertyNameField; |
| 16 | + protected String propertyValueField; |
| 17 | + |
| 18 | + public AssemblingFeatureSetCursor(DataCursor<FeatureSet> cursor, String recordIdField, String propertyNameField, String propertyValueField) { |
| 19 | + if (cursor==null) throw new NullPointerException(); |
| 20 | + if (recordIdField==null) throw new NullPointerException(); |
| 21 | + if (propertyNameField==null) throw new NullPointerException(); |
| 22 | + if (propertyValueField==null) throw new NullPointerException(); |
| 23 | + |
| 24 | + this.cursor = cursor; |
| 25 | + this.recordIdField = recordIdField; |
| 26 | + this.propertyNameField = propertyNameField; |
| 27 | + this.propertyValueField = propertyValueField; |
| 28 | + } |
| 29 | + |
| 30 | + public void close() { |
| 31 | + cursor.close(); |
| 32 | + } |
| 33 | + |
| 34 | + public FeatureSet next() throws PersistenceException { |
| 35 | + if (prev==null) prev = cursor.next(); |
| 36 | + if (prev==null) return null; |
| 37 | + |
| 38 | + FeatureSet a = new DefaultFeatureSet();; |
| 39 | + a.putAll(recordIdField, prev.get(recordIdField)); |
| 40 | + |
| 41 | + while (prev!=null) { |
| 42 | + List<Object> keys = prev.get(propertyNameField); |
| 43 | + List<Object> values = prev.get(propertyValueField); |
| 44 | + |
| 45 | + for (Object k: keys) { |
| 46 | + a.putAll(k.toString(), values); |
| 47 | + } |
| 48 | + |
| 49 | + prev = cursor.next(); |
| 50 | + if (prev==null || !prev.overlaps(a, recordIdField)) break; |
| 51 | + } |
| 52 | + |
| 53 | + return a; |
| 54 | + } |
| 55 | + |
| 56 | +} |
Property changes on: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/AssemblingFeatureSetCursor.java |
___________________________________________________________________ |
Added: svn:mergeinfo |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/ResultSetFeatureSetCursor.java |
— | — | @@ -1,13 +1,19 @@ |
2 | 2 | package de.brightbyte.wikiword.integrator.data; |
3 | 3 | |
4 | 4 | import java.sql.ResultSet; |
| 5 | +import java.sql.SQLException; |
5 | 6 | |
6 | 7 | import de.brightbyte.db.DatabaseDataSet; |
| 8 | +import de.brightbyte.db.DatabaseUtil; |
7 | 9 | |
8 | 10 | public class ResultSetFeatureSetCursor extends DatabaseDataSet.Cursor<FeatureSet> { |
9 | 11 | |
10 | | - public ResultSetFeatureSetCursor(ResultSet resultSet, String[] fields) { |
11 | | - super(resultSet, new ResultSetFeatureSetFactory(fields)); |
| 12 | + public ResultSetFeatureSetCursor(ResultSet resultSet) throws SQLException { |
| 13 | + this(resultSet, null); |
12 | 14 | } |
| 15 | + |
| 16 | + public ResultSetFeatureSetCursor(ResultSet resultSet, String[] fields) throws SQLException { |
| 17 | + super(resultSet, new ResultSetFeatureSetFactory(fields == null ? DatabaseUtil.getFieldNames(resultSet): fields)); |
| 18 | + } |
13 | 19 | |
14 | 20 | } |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/CollapsingFeatureSetCursor.java |
— | — | @@ -0,0 +1,42 @@ |
| 2 | +package de.brightbyte.wikiword.integrator.data; |
| 3 | + |
| 4 | +import de.brightbyte.data.cursor.DataCursor; |
| 5 | +import de.brightbyte.util.PersistenceException; |
| 6 | + |
| 7 | +public class CollapsingFeatureSetCursor implements DataCursor<FeatureSet> { |
| 8 | + |
| 9 | + protected DataCursor<FeatureSet> cursor; |
| 10 | + protected FeatureSet prev; |
| 11 | + |
| 12 | + protected String recordIdField; |
| 13 | + |
| 14 | + public CollapsingFeatureSetCursor(DataCursor<FeatureSet> cursor, String sourceKeyField) { |
| 15 | + if (cursor==null) throw new NullPointerException(); |
| 16 | + if (sourceKeyField==null) throw new NullPointerException(); |
| 17 | + |
| 18 | + this.cursor = cursor; |
| 19 | + this.recordIdField = sourceKeyField; |
| 20 | + } |
| 21 | + |
| 22 | + public void close() { |
| 23 | + cursor.close(); |
| 24 | + } |
| 25 | + |
| 26 | + public FeatureSet next() throws PersistenceException { |
| 27 | + if (prev==null) prev = cursor.next(); |
| 28 | + if (prev==null) return null; |
| 29 | + |
| 30 | + FeatureSet a = prev; |
| 31 | + |
| 32 | + while (true) { |
| 33 | + prev = cursor.next(); |
| 34 | + if (prev==null) break; |
| 35 | + |
| 36 | + if (!prev.overlaps(a, recordIdField)) break; |
| 37 | + a = FeatureSets.merge(a, prev); |
| 38 | + } |
| 39 | + |
| 40 | + return a; |
| 41 | + } |
| 42 | + |
| 43 | +} |