Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/dump/DumpTable.java |
— | — | @@ -0,0 +1,40 @@ |
| 2 | +package de.brightbyte.wikiword.dump; |
| 3 | + |
| 4 | +import de.brightbyte.util.PersistenceException; |
| 5 | + |
| 6 | +public class DumpTable extends SQLDumperApp { |
| 7 | + |
| 8 | + protected String table; |
| 9 | + |
| 10 | + public DumpTable() { |
| 11 | + super(true, true); |
| 12 | + } |
| 13 | + |
| 14 | + @Override |
| 15 | + protected void declareOptions() { |
| 16 | + super.declareOptions(); |
| 17 | + |
| 18 | + args.declare("fields", null, false, Boolean.class, "Database fields to dump, as a comma-separated list. Supports SQL syntax, like \"AS\"."); |
| 19 | + } |
| 20 | + |
| 21 | + @Override |
| 22 | + protected String getQuerySQL() { |
| 23 | + String fields = args.getOption("fields", "*"); //TODO: split, sanitize and quote to avoid injection! |
| 24 | + |
| 25 | + String t = conceptStoreDB.getSQLTableName(table, true); |
| 26 | + String sql = "SELECT "+fields+" FROM " + t; |
| 27 | + return sql; |
| 28 | + } |
| 29 | + |
| 30 | + protected void open(int paramOffset) throws PersistenceException { |
| 31 | + this.table = args.getParameter(paramOffset); |
| 32 | + |
| 33 | + sink = openSink(paramOffset+1); |
| 34 | + } |
| 35 | + |
| 36 | + |
| 37 | + public static void main(String[] argv) throws Exception { |
| 38 | + DumpTable app = new DumpTable(); |
| 39 | + app.launch(argv); |
| 40 | + } |
| 41 | +} |
Property changes on: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/dump/DumpTable.java |
___________________________________________________________________ |
Added: svn:mergeinfo |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/dump/SQLDumperApp.java |
— | — | @@ -0,0 +1,112 @@ |
| 2 | +package de.brightbyte.wikiword.dump; |
| 3 | + |
| 4 | +import java.io.IOException; |
| 5 | +import java.sql.ResultSet; |
| 6 | +import java.sql.SQLException; |
| 7 | +import java.util.List; |
| 8 | + |
| 9 | +import de.brightbyte.data.cursor.DataSink; |
| 10 | +import de.brightbyte.data.cursor.JoiningSink; |
| 11 | +import de.brightbyte.db.QueryDumper; |
| 12 | +import de.brightbyte.io.LineSink; |
| 13 | +import de.brightbyte.job.ChunkedProgressRateTracker; |
| 14 | +import de.brightbyte.text.CsvLineJoiner; |
| 15 | +import de.brightbyte.text.Joiner; |
| 16 | +import de.brightbyte.util.PersistenceException; |
| 17 | +import de.brightbyte.wikiword.extract.StreamOutputApp; |
| 18 | +import de.brightbyte.wikiword.schema.GlobalConceptStoreSchema; |
| 19 | +import de.brightbyte.wikiword.schema.LocalConceptStoreSchema; |
| 20 | +import de.brightbyte.wikiword.schema.WikiWordConceptStoreSchema; |
| 21 | +import de.brightbyte.wikiword.store.DatabaseConceptStores; |
| 22 | +import de.brightbyte.wikiword.store.DatabaseWikiWordStore; |
| 23 | +import de.brightbyte.wikiword.store.WikiWordConceptStoreBase; |
| 24 | + |
| 25 | +public abstract class SQLDumperApp extends StreamOutputApp<List<String>, WikiWordConceptStoreBase> { |
| 26 | + |
| 27 | + protected ChunkedProgressRateTracker dumpTracker; |
| 28 | + |
| 29 | + public SQLDumperApp(boolean allowGlobal, boolean allowLocal) { |
| 30 | + super(allowGlobal, allowLocal); |
| 31 | + |
| 32 | + dumpTracker = new ChunkedProgressRateTracker("dumping", 10000, 10); //TODO: init later, get values from tweaks |
| 33 | + dumpTracker.setLogOutput(out); |
| 34 | + } |
| 35 | + |
| 36 | + @Override |
| 37 | + protected void declareOptions() { |
| 38 | + super.declareOptions(); |
| 39 | + |
| 40 | + args.declare("no-output-header", null, false, Boolean.class, "The first line of the output file will not be a column header"); |
| 41 | + args.declare("output-format", null, true, String.class, "Format of the output file. May be csv or tsv, default is csv."); |
| 42 | + } |
| 43 | + |
| 44 | + protected WikiWordConceptStoreSchema conceptStoreDB; |
| 45 | + |
| 46 | + @Override |
| 47 | + protected void createStores() throws IOException, PersistenceException { |
| 48 | + conceptStore = DatabaseConceptStores.createConceptStore(getConfiguredDataSource(), getConfiguredDataset(), tweaks, true, true); |
| 49 | + |
| 50 | + registerStore(conceptStore); |
| 51 | + |
| 52 | + if (conceptStore instanceof DatabaseWikiWordStore) { |
| 53 | + conceptStoreDB = (WikiWordConceptStoreSchema)((DatabaseWikiWordStore)conceptStore).getDatabaseAccess(); |
| 54 | + } else { |
| 55 | + try { |
| 56 | + if ( isDatasetLocal() ) conceptStoreDB = new LocalConceptStoreSchema(getCorpus(), getConfiguredDataSource(), this.tweaks, false); |
| 57 | + else conceptStoreDB = new GlobalConceptStoreSchema(getConfiguredDataset(), getConfiguredDataSource(), this.tweaks, false); |
| 58 | + } catch (SQLException e) { |
| 59 | + throw new PersistenceException(e); |
| 60 | + } |
| 61 | + } |
| 62 | + } |
| 63 | + |
| 64 | + @Override |
| 65 | + public void run() throws Exception { |
| 66 | + boolean outputHasHeader = !args.isSet("no-output-header"); |
| 67 | + |
| 68 | + String sql = getQuerySQL(); |
| 69 | + |
| 70 | + info("Running query..."); |
| 71 | + ResultSet rs = conceptStoreDB.executeBigQuery("dumpList", sql); |
| 72 | + |
| 73 | + QueryDumper dumper = new QueryDumper(sink, (String[])null); |
| 74 | + dumper.addProgressListener(dumpTracker); |
| 75 | + configureDumper(dumper); |
| 76 | + |
| 77 | + info("dumping rows..."); |
| 78 | + |
| 79 | + if (outputHasHeader) dumper.dumpHeader(rs); |
| 80 | + int c = dumper.dumpRows(rs); |
| 81 | + |
| 82 | + rs.close(); |
| 83 | + |
| 84 | + info("complete, dumped "+c+" rows."); |
| 85 | + } |
| 86 | + |
| 87 | + protected abstract String getQuerySQL(); |
| 88 | + |
| 89 | + protected void configureDumper(QueryDumper dumper) { |
| 90 | + // NOOP |
| 91 | + } |
| 92 | + |
| 93 | + private Joiner joiner; |
| 94 | + |
| 95 | + @Override |
| 96 | + protected DataSink<? super List<String>> openSink(int paramOffset) throws PersistenceException { |
| 97 | + if (joiner==null) { |
| 98 | + String format = args.getOption("output-format", "csv").toLowerCase(); |
| 99 | + |
| 100 | + if (format.equals("csv")) joiner = new CsvLineJoiner(",", null, '"', false); |
| 101 | + else if (format.equals("tsv")) joiner = new CsvLineJoiner("\t", null, '\0', true); |
| 102 | + else throw new IllegalArgumentException("bad output format: "+format); |
| 103 | + } |
| 104 | + |
| 105 | + try { |
| 106 | + JoiningSink sink = new JoiningSink(new LineSink(getOutputWriter(paramOffset)), joiner); |
| 107 | + return sink; |
| 108 | + } catch (IOException e) { |
| 109 | + throw new PersistenceException(); |
| 110 | + } |
| 111 | + } |
| 112 | + |
| 113 | +} |
Property changes on: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/dump/SQLDumperApp.java |
___________________________________________________________________ |
Added: svn:mergeinfo |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/StreamProcessorApp.java |
— | — | @@ -1,51 +1,27 @@ |
2 | 2 | package de.brightbyte.wikiword.extract; |
3 | 3 | |
4 | | -import java.io.BufferedOutputStream; |
5 | | -import java.io.File; |
6 | | -import java.io.FileNotFoundException; |
7 | | -import java.io.FileOutputStream; |
8 | 4 | import java.io.IOException; |
9 | 5 | import java.io.InputStream; |
10 | 6 | import java.io.InputStreamReader; |
11 | | -import java.io.OutputStream; |
12 | | -import java.io.OutputStreamWriter; |
13 | 7 | import java.io.Reader; |
14 | | -import java.io.UnsupportedEncodingException; |
15 | | -import java.io.Writer; |
16 | 8 | |
17 | 9 | import de.brightbyte.data.cursor.DataCursor; |
18 | | -import de.brightbyte.data.cursor.DataSink; |
19 | 10 | import de.brightbyte.io.ConsoleIO; |
20 | 11 | import de.brightbyte.util.PersistenceException; |
21 | | -import de.brightbyte.wikiword.StoreBackedApp; |
22 | 12 | import de.brightbyte.wikiword.builder.InputFileHelper; |
23 | 13 | import de.brightbyte.wikiword.store.WikiWordConceptStoreBase; |
24 | 14 | |
25 | | -public abstract class StreamProcessorApp<I, O, S extends WikiWordConceptStoreBase> extends StoreBackedApp<S> { |
| 15 | +public abstract class StreamProcessorApp<I, O, S extends WikiWordConceptStoreBase> extends StreamOutputApp<O, S> { |
26 | 16 | |
27 | 17 | protected DataCursor<? extends I> cursor; |
28 | | - protected DataSink<? super O> sink; |
29 | 18 | |
30 | 19 | protected boolean usingStdin; |
31 | | - protected boolean usingStdout; |
32 | | - |
33 | 20 | protected InputFileHelper inputHelper; |
34 | 21 | |
35 | 22 | public StreamProcessorApp(boolean allowGlobal, boolean allowLocal) { |
36 | 23 | super(allowGlobal, allowLocal); |
37 | 24 | } |
38 | | - |
39 | 25 | |
40 | | - protected File getOutputFile(int paramIndex) { |
41 | | - if (outputFile==null) { |
42 | | - if (args.getParameterCount()>paramIndex) { |
43 | | - String f = args.getParameter(paramIndex); |
44 | | - if (!f.equals("-")) outputFile = new File(f); |
45 | | - } |
46 | | - } |
47 | | - return outputFile; |
48 | | - } |
49 | | - |
50 | 26 | protected String getInputPath(int paramIndex) { |
51 | 27 | if (inputPath==null) { |
52 | 28 | if (args.getParameterCount()>paramIndex) { |
— | — | @@ -55,59 +31,11 @@ |
56 | 32 | return inputPath; |
57 | 33 | } |
58 | 34 | |
59 | | - protected String getOutputFileEncoding() { |
60 | | - return args.getStringOption("output-encoding", "UTF-8"); |
61 | | - } |
62 | | - |
63 | | - protected void declareOptions() { |
64 | | - super.declareOptions(); |
65 | | - |
66 | | - args.declare("output-encoding", null, true, String.class, "Encoding to use for the poutput file"); |
67 | | - } |
68 | | - |
69 | 35 | protected String inputPath; |
70 | | - protected File outputFile; |
71 | | - protected Writer outputWriter; |
72 | | - protected OutputStream outputStream; |
73 | 36 | private InputStream inputStream; |
74 | 37 | private Reader inputReader; |
75 | 38 | |
76 | | - protected Writer getOutputWriter(int paramIndex) throws FileNotFoundException, UnsupportedEncodingException { |
77 | | - if (outputWriter==null) { |
78 | | - File f = getOutputFile(paramIndex); |
79 | | - if (f==null) { |
80 | | - outputWriter = ConsoleIO.writer; |
81 | | - usingStdout = true; |
82 | | - } else { |
83 | | - OutputStream out = getOutputStream(paramIndex); |
84 | | - outputWriter = new OutputStreamWriter(out, getOutputFileEncoding()); |
85 | | - usingStdout = out == System.out; |
86 | | - } |
87 | | - } |
88 | | - |
89 | | - if (usingStdout && out.getOutput() == ConsoleIO.output) { |
90 | | - out.setOutput(ConsoleIO.errorOutput); |
91 | | - } |
92 | | - |
93 | | - return outputWriter; |
94 | | - } |
95 | 39 | |
96 | | - protected OutputStream getOutputStream(int paramIndex) throws FileNotFoundException { |
97 | | - if (outputStream==null) { |
98 | | - File f = getOutputFile(paramIndex); |
99 | | - if (f==null) { |
100 | | - outputStream = System.out; |
101 | | - usingStdout = true; |
102 | | - } else { |
103 | | - outputStream = new BufferedOutputStream(new FileOutputStream(f, args.isSet("append"))); |
104 | | - usingStdout = false; |
105 | | - info("Writing output to "+f); |
106 | | - } |
107 | | - } |
108 | | - |
109 | | - return outputStream; |
110 | | - } |
111 | | - |
112 | 40 | protected Reader getInputReader(int paramIndex) throws IOException { |
113 | 41 | if (inputReader==null) { |
114 | 42 | String path = getInputPath(paramIndex); |
— | — | @@ -157,15 +85,7 @@ |
158 | 86 | } |
159 | 87 | |
160 | 88 | protected abstract DataCursor<? extends I> openCursor(int paramIndex) throws PersistenceException; |
161 | | - protected abstract DataSink<? super O> openSink(int paramIndex) throws PersistenceException; |
162 | 89 | |
163 | | - protected void init() throws Exception { |
164 | | - // noop |
165 | | - } |
166 | | - protected void close() throws PersistenceException { |
167 | | - sink.close(); |
168 | | - } |
169 | | - |
170 | 90 | public void runTransfer(DataCursor<? extends I> cursor) throws Exception { |
171 | 91 | I rec; |
172 | 92 | while ((rec = cursor.next()) != null) { |
— | — | @@ -174,10 +94,6 @@ |
175 | 95 | } |
176 | 96 | } |
177 | 97 | |
178 | | - protected void commit(O rec) throws PersistenceException { |
179 | | - sink.commit(rec); |
180 | | - } |
181 | | - |
182 | 98 | protected abstract void process(I rec) throws Exception; |
183 | 99 | |
184 | 100 | } |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/StreamOutputApp.java |
— | — | @@ -0,0 +1,108 @@ |
| 2 | +package de.brightbyte.wikiword.extract; |
| 3 | + |
| 4 | +import java.io.BufferedOutputStream; |
| 5 | +import java.io.File; |
| 6 | +import java.io.FileNotFoundException; |
| 7 | +import java.io.FileOutputStream; |
| 8 | +import java.io.OutputStream; |
| 9 | +import java.io.OutputStreamWriter; |
| 10 | +import java.io.UnsupportedEncodingException; |
| 11 | +import java.io.Writer; |
| 12 | + |
| 13 | +import de.brightbyte.data.cursor.DataSink; |
| 14 | +import de.brightbyte.io.ConsoleIO; |
| 15 | +import de.brightbyte.util.PersistenceException; |
| 16 | +import de.brightbyte.wikiword.StoreBackedApp; |
| 17 | +import de.brightbyte.wikiword.store.WikiWordConceptStoreBase; |
| 18 | + |
| 19 | +public abstract class StreamOutputApp<O, S extends WikiWordConceptStoreBase> extends StoreBackedApp<S> { |
| 20 | + |
| 21 | + protected DataSink<? super O> sink; |
| 22 | + |
| 23 | + protected boolean usingStdout; |
| 24 | + |
| 25 | + public StreamOutputApp(boolean allowGlobal, boolean allowLocal) { |
| 26 | + super(allowGlobal, allowLocal); |
| 27 | + } |
| 28 | + |
| 29 | + |
| 30 | + protected File getOutputFile(int paramIndex) { |
| 31 | + if (outputFile==null) { |
| 32 | + if (args.getParameterCount()>paramIndex) { |
| 33 | + String f = args.getParameter(paramIndex); |
| 34 | + if (!f.equals("-")) outputFile = new File(f); |
| 35 | + } |
| 36 | + } |
| 37 | + return outputFile; |
| 38 | + } |
| 39 | + |
| 40 | + protected String getOutputFileEncoding() { |
| 41 | + return args.getStringOption("output-encoding", "UTF-8"); |
| 42 | + } |
| 43 | + |
| 44 | + protected void declareOptions() { |
| 45 | + super.declareOptions(); |
| 46 | + |
| 47 | + args.declare("output-encoding", null, true, String.class, "Encoding to use for the poutput file"); |
| 48 | + } |
| 49 | + |
| 50 | + protected File outputFile; |
| 51 | + protected Writer outputWriter; |
| 52 | + protected OutputStream outputStream; |
| 53 | + |
| 54 | + protected Writer getOutputWriter(int paramIndex) throws FileNotFoundException, UnsupportedEncodingException { |
| 55 | + if (outputWriter==null) { |
| 56 | + File f = getOutputFile(paramIndex); |
| 57 | + if (f==null) { |
| 58 | + outputWriter = ConsoleIO.writer; |
| 59 | + usingStdout = true; |
| 60 | + } else { |
| 61 | + OutputStream out = getOutputStream(paramIndex); |
| 62 | + outputWriter = new OutputStreamWriter(out, getOutputFileEncoding()); |
| 63 | + usingStdout = out == System.out; |
| 64 | + } |
| 65 | + } |
| 66 | + |
| 67 | + if (usingStdout && out.getOutput() == ConsoleIO.output) { |
| 68 | + out.setOutput(ConsoleIO.errorOutput); |
| 69 | + } |
| 70 | + |
| 71 | + return outputWriter; |
| 72 | + } |
| 73 | + |
| 74 | + protected OutputStream getOutputStream(int paramIndex) throws FileNotFoundException { |
| 75 | + if (outputStream==null) { |
| 76 | + File f = getOutputFile(paramIndex); |
| 77 | + if (f==null) { |
| 78 | + outputStream = System.out; |
| 79 | + usingStdout = true; |
| 80 | + } else { |
| 81 | + outputStream = new BufferedOutputStream(new FileOutputStream(f, args.isSet("append"))); |
| 82 | + usingStdout = false; |
| 83 | + info("Writing output to "+f); |
| 84 | + } |
| 85 | + } |
| 86 | + |
| 87 | + return outputStream; |
| 88 | + } |
| 89 | + |
| 90 | + |
| 91 | + protected void open(int paramOffset) throws PersistenceException { |
| 92 | + sink = openSink(paramOffset); |
| 93 | + } |
| 94 | + |
| 95 | + protected abstract DataSink<? super O> openSink(int paramIndex) throws PersistenceException; |
| 96 | + |
| 97 | + protected void init() throws Exception { |
| 98 | + // noop |
| 99 | + } |
| 100 | + |
| 101 | + protected void close() throws PersistenceException { |
| 102 | + sink.close(); |
| 103 | + } |
| 104 | + |
| 105 | + protected void commit(O rec) throws PersistenceException { |
| 106 | + sink.commit(rec); |
| 107 | + } |
| 108 | + |
| 109 | +} |