r73651 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r73650‎ | r73651 | r73652 >
Date:10:18, 24 September 2010
Author:daniel
Status:deferred
Tags:
Comment:
DumpTable app
Modified paths:
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/dump (added) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/dump/DumpTable.java (added) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/dump/SQLDumperApp.java (added) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/StreamOutputApp.java (added) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/StreamProcessorApp.java (modified) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/dump/DumpTable.java
@@ -0,0 +1,40 @@
 2+package de.brightbyte.wikiword.dump;
 3+
 4+import de.brightbyte.util.PersistenceException;
 5+
 6+public class DumpTable extends SQLDumperApp {
 7+
 8+ protected String table;
 9+
 10+ public DumpTable() {
 11+ super(true, true);
 12+ }
 13+
 14+ @Override
 15+ protected void declareOptions() {
 16+ super.declareOptions();
 17+
 18+ args.declare("fields", null, false, Boolean.class, "Database fields to dump, as a comma-separated list. Supports SQL syntax, like \"AS\".");
 19+ }
 20+
 21+ @Override
 22+ protected String getQuerySQL() {
 23+ String fields = args.getOption("fields", "*"); //TODO: split, sanitize and quote to avoid injection!
 24+
 25+ String t = conceptStoreDB.getSQLTableName(table, true);
 26+ String sql = "SELECT "+fields+" FROM " + t;
 27+ return sql;
 28+ }
 29+
 30+ protected void open(int paramOffset) throws PersistenceException {
 31+ this.table = args.getParameter(paramOffset);
 32+
 33+ sink = openSink(paramOffset+1);
 34+ }
 35+
 36+
 37+ public static void main(String[] argv) throws Exception {
 38+ DumpTable app = new DumpTable();
 39+ app.launch(argv);
 40+ }
 41+}
Property changes on: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/dump/DumpTable.java
___________________________________________________________________
Added: svn:mergeinfo
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/dump/SQLDumperApp.java
@@ -0,0 +1,112 @@
 2+package de.brightbyte.wikiword.dump;
 3+
 4+import java.io.IOException;
 5+import java.sql.ResultSet;
 6+import java.sql.SQLException;
 7+import java.util.List;
 8+
 9+import de.brightbyte.data.cursor.DataSink;
 10+import de.brightbyte.data.cursor.JoiningSink;
 11+import de.brightbyte.db.QueryDumper;
 12+import de.brightbyte.io.LineSink;
 13+import de.brightbyte.job.ChunkedProgressRateTracker;
 14+import de.brightbyte.text.CsvLineJoiner;
 15+import de.brightbyte.text.Joiner;
 16+import de.brightbyte.util.PersistenceException;
 17+import de.brightbyte.wikiword.extract.StreamOutputApp;
 18+import de.brightbyte.wikiword.schema.GlobalConceptStoreSchema;
 19+import de.brightbyte.wikiword.schema.LocalConceptStoreSchema;
 20+import de.brightbyte.wikiword.schema.WikiWordConceptStoreSchema;
 21+import de.brightbyte.wikiword.store.DatabaseConceptStores;
 22+import de.brightbyte.wikiword.store.DatabaseWikiWordStore;
 23+import de.brightbyte.wikiword.store.WikiWordConceptStoreBase;
 24+
 25+public abstract class SQLDumperApp extends StreamOutputApp<List<String>, WikiWordConceptStoreBase> {
 26+
 27+ protected ChunkedProgressRateTracker dumpTracker;
 28+
 29+ public SQLDumperApp(boolean allowGlobal, boolean allowLocal) {
 30+ super(allowGlobal, allowLocal);
 31+
 32+ dumpTracker = new ChunkedProgressRateTracker("dumping", 10000, 10); //TODO: init later, get values from tweaks
 33+ dumpTracker.setLogOutput(out);
 34+ }
 35+
 36+ @Override
 37+ protected void declareOptions() {
 38+ super.declareOptions();
 39+
 40+ args.declare("no-output-header", null, false, Boolean.class, "The first line of the output file will not be a column header");
 41+ args.declare("output-format", null, true, String.class, "Format of the output file. May be csv or tsv, default is csv.");
 42+ }
 43+
 44+ protected WikiWordConceptStoreSchema conceptStoreDB;
 45+
 46+ @Override
 47+ protected void createStores() throws IOException, PersistenceException {
 48+ conceptStore = DatabaseConceptStores.createConceptStore(getConfiguredDataSource(), getConfiguredDataset(), tweaks, true, true);
 49+
 50+ registerStore(conceptStore);
 51+
 52+ if (conceptStore instanceof DatabaseWikiWordStore) {
 53+ conceptStoreDB = (WikiWordConceptStoreSchema)((DatabaseWikiWordStore)conceptStore).getDatabaseAccess();
 54+ } else {
 55+ try {
 56+ if ( isDatasetLocal() ) conceptStoreDB = new LocalConceptStoreSchema(getCorpus(), getConfiguredDataSource(), this.tweaks, false);
 57+ else conceptStoreDB = new GlobalConceptStoreSchema(getConfiguredDataset(), getConfiguredDataSource(), this.tweaks, false);
 58+ } catch (SQLException e) {
 59+ throw new PersistenceException(e);
 60+ }
 61+ }
 62+ }
 63+
 64+ @Override
 65+ public void run() throws Exception {
 66+ boolean outputHasHeader = !args.isSet("no-output-header");
 67+
 68+ String sql = getQuerySQL();
 69+
 70+ info("Running query...");
 71+ ResultSet rs = conceptStoreDB.executeBigQuery("dumpList", sql);
 72+
 73+ QueryDumper dumper = new QueryDumper(sink, (String[])null);
 74+ dumper.addProgressListener(dumpTracker);
 75+ configureDumper(dumper);
 76+
 77+ info("dumping rows...");
 78+
 79+ if (outputHasHeader) dumper.dumpHeader(rs);
 80+ int c = dumper.dumpRows(rs);
 81+
 82+ rs.close();
 83+
 84+ info("complete, dumped "+c+" rows.");
 85+ }
 86+
 87+ protected abstract String getQuerySQL();
 88+
 89+ protected void configureDumper(QueryDumper dumper) {
 90+ // NOOP
 91+ }
 92+
 93+ private Joiner joiner;
 94+
 95+ @Override
 96+ protected DataSink<? super List<String>> openSink(int paramOffset) throws PersistenceException {
 97+ if (joiner==null) {
 98+ String format = args.getOption("output-format", "csv").toLowerCase();
 99+
 100+ if (format.equals("csv")) joiner = new CsvLineJoiner(",", null, '"', false);
 101+ else if (format.equals("tsv")) joiner = new CsvLineJoiner("\t", null, '\0', true);
 102+ else throw new IllegalArgumentException("bad output format: "+format);
 103+ }
 104+
 105+ try {
 106+ JoiningSink sink = new JoiningSink(new LineSink(getOutputWriter(paramOffset)), joiner);
 107+ return sink;
 108+ } catch (IOException e) {
 109+ throw new PersistenceException();
 110+ }
 111+ }
 112+
 113+}
Property changes on: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/dump/SQLDumperApp.java
___________________________________________________________________
Added: svn:mergeinfo
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/StreamProcessorApp.java
@@ -1,51 +1,27 @@
22 package de.brightbyte.wikiword.extract;
33
4 -import java.io.BufferedOutputStream;
5 -import java.io.File;
6 -import java.io.FileNotFoundException;
7 -import java.io.FileOutputStream;
84 import java.io.IOException;
95 import java.io.InputStream;
106 import java.io.InputStreamReader;
11 -import java.io.OutputStream;
12 -import java.io.OutputStreamWriter;
137 import java.io.Reader;
14 -import java.io.UnsupportedEncodingException;
15 -import java.io.Writer;
168
179 import de.brightbyte.data.cursor.DataCursor;
18 -import de.brightbyte.data.cursor.DataSink;
1910 import de.brightbyte.io.ConsoleIO;
2011 import de.brightbyte.util.PersistenceException;
21 -import de.brightbyte.wikiword.StoreBackedApp;
2212 import de.brightbyte.wikiword.builder.InputFileHelper;
2313 import de.brightbyte.wikiword.store.WikiWordConceptStoreBase;
2414
25 -public abstract class StreamProcessorApp<I, O, S extends WikiWordConceptStoreBase> extends StoreBackedApp<S> {
 15+public abstract class StreamProcessorApp<I, O, S extends WikiWordConceptStoreBase> extends StreamOutputApp<O, S> {
2616
2717 protected DataCursor<? extends I> cursor;
28 - protected DataSink<? super O> sink;
2918
3019 protected boolean usingStdin;
31 - protected boolean usingStdout;
32 -
3320 protected InputFileHelper inputHelper;
3421
3522 public StreamProcessorApp(boolean allowGlobal, boolean allowLocal) {
3623 super(allowGlobal, allowLocal);
3724 }
38 -
3925
40 - protected File getOutputFile(int paramIndex) {
41 - if (outputFile==null) {
42 - if (args.getParameterCount()>paramIndex) {
43 - String f = args.getParameter(paramIndex);
44 - if (!f.equals("-")) outputFile = new File(f);
45 - }
46 - }
47 - return outputFile;
48 - }
49 -
5026 protected String getInputPath(int paramIndex) {
5127 if (inputPath==null) {
5228 if (args.getParameterCount()>paramIndex) {
@@ -55,59 +31,11 @@
5632 return inputPath;
5733 }
5834
59 - protected String getOutputFileEncoding() {
60 - return args.getStringOption("output-encoding", "UTF-8");
61 - }
62 -
63 - protected void declareOptions() {
64 - super.declareOptions();
65 -
66 - args.declare("output-encoding", null, true, String.class, "Encoding to use for the poutput file");
67 - }
68 -
6935 protected String inputPath;
70 - protected File outputFile;
71 - protected Writer outputWriter;
72 - protected OutputStream outputStream;
7336 private InputStream inputStream;
7437 private Reader inputReader;
7538
76 - protected Writer getOutputWriter(int paramIndex) throws FileNotFoundException, UnsupportedEncodingException {
77 - if (outputWriter==null) {
78 - File f = getOutputFile(paramIndex);
79 - if (f==null) {
80 - outputWriter = ConsoleIO.writer;
81 - usingStdout = true;
82 - } else {
83 - OutputStream out = getOutputStream(paramIndex);
84 - outputWriter = new OutputStreamWriter(out, getOutputFileEncoding());
85 - usingStdout = out == System.out;
86 - }
87 - }
88 -
89 - if (usingStdout && out.getOutput() == ConsoleIO.output) {
90 - out.setOutput(ConsoleIO.errorOutput);
91 - }
92 -
93 - return outputWriter;
94 - }
9539
96 - protected OutputStream getOutputStream(int paramIndex) throws FileNotFoundException {
97 - if (outputStream==null) {
98 - File f = getOutputFile(paramIndex);
99 - if (f==null) {
100 - outputStream = System.out;
101 - usingStdout = true;
102 - } else {
103 - outputStream = new BufferedOutputStream(new FileOutputStream(f, args.isSet("append")));
104 - usingStdout = false;
105 - info("Writing output to "+f);
106 - }
107 - }
108 -
109 - return outputStream;
110 - }
111 -
11240 protected Reader getInputReader(int paramIndex) throws IOException {
11341 if (inputReader==null) {
11442 String path = getInputPath(paramIndex);
@@ -157,15 +85,7 @@
15886 }
15987
16088 protected abstract DataCursor<? extends I> openCursor(int paramIndex) throws PersistenceException;
161 - protected abstract DataSink<? super O> openSink(int paramIndex) throws PersistenceException;
16289
163 - protected void init() throws Exception {
164 - // noop
165 - }
166 - protected void close() throws PersistenceException {
167 - sink.close();
168 - }
169 -
17090 public void runTransfer(DataCursor<? extends I> cursor) throws Exception {
17191 I rec;
17292 while ((rec = cursor.next()) != null) {
@@ -174,10 +94,6 @@
17595 }
17696 }
17797
178 - protected void commit(O rec) throws PersistenceException {
179 - sink.commit(rec);
180 - }
181 -
18298 protected abstract void process(I rec) throws Exception;
18399
184100 }
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/extract/StreamOutputApp.java
@@ -0,0 +1,108 @@
 2+package de.brightbyte.wikiword.extract;
 3+
 4+import java.io.BufferedOutputStream;
 5+import java.io.File;
 6+import java.io.FileNotFoundException;
 7+import java.io.FileOutputStream;
 8+import java.io.OutputStream;
 9+import java.io.OutputStreamWriter;
 10+import java.io.UnsupportedEncodingException;
 11+import java.io.Writer;
 12+
 13+import de.brightbyte.data.cursor.DataSink;
 14+import de.brightbyte.io.ConsoleIO;
 15+import de.brightbyte.util.PersistenceException;
 16+import de.brightbyte.wikiword.StoreBackedApp;
 17+import de.brightbyte.wikiword.store.WikiWordConceptStoreBase;
 18+
 19+public abstract class StreamOutputApp<O, S extends WikiWordConceptStoreBase> extends StoreBackedApp<S> {
 20+
 21+ protected DataSink<? super O> sink;
 22+
 23+ protected boolean usingStdout;
 24+
 25+ public StreamOutputApp(boolean allowGlobal, boolean allowLocal) {
 26+ super(allowGlobal, allowLocal);
 27+ }
 28+
 29+
 30+ protected File getOutputFile(int paramIndex) {
 31+ if (outputFile==null) {
 32+ if (args.getParameterCount()>paramIndex) {
 33+ String f = args.getParameter(paramIndex);
 34+ if (!f.equals("-")) outputFile = new File(f);
 35+ }
 36+ }
 37+ return outputFile;
 38+ }
 39+
 40+ protected String getOutputFileEncoding() {
 41+ return args.getStringOption("output-encoding", "UTF-8");
 42+ }
 43+
 44+ protected void declareOptions() {
 45+ super.declareOptions();
 46+
 47+ args.declare("output-encoding", null, true, String.class, "Encoding to use for the poutput file");
 48+ }
 49+
 50+ protected File outputFile;
 51+ protected Writer outputWriter;
 52+ protected OutputStream outputStream;
 53+
 54+ protected Writer getOutputWriter(int paramIndex) throws FileNotFoundException, UnsupportedEncodingException {
 55+ if (outputWriter==null) {
 56+ File f = getOutputFile(paramIndex);
 57+ if (f==null) {
 58+ outputWriter = ConsoleIO.writer;
 59+ usingStdout = true;
 60+ } else {
 61+ OutputStream out = getOutputStream(paramIndex);
 62+ outputWriter = new OutputStreamWriter(out, getOutputFileEncoding());
 63+ usingStdout = out == System.out;
 64+ }
 65+ }
 66+
 67+ if (usingStdout && out.getOutput() == ConsoleIO.output) {
 68+ out.setOutput(ConsoleIO.errorOutput);
 69+ }
 70+
 71+ return outputWriter;
 72+ }
 73+
 74+ protected OutputStream getOutputStream(int paramIndex) throws FileNotFoundException {
 75+ if (outputStream==null) {
 76+ File f = getOutputFile(paramIndex);
 77+ if (f==null) {
 78+ outputStream = System.out;
 79+ usingStdout = true;
 80+ } else {
 81+ outputStream = new BufferedOutputStream(new FileOutputStream(f, args.isSet("append")));
 82+ usingStdout = false;
 83+ info("Writing output to "+f);
 84+ }
 85+ }
 86+
 87+ return outputStream;
 88+ }
 89+
 90+
 91+ protected void open(int paramOffset) throws PersistenceException {
 92+ sink = openSink(paramOffset);
 93+ }
 94+
 95+ protected abstract DataSink<? super O> openSink(int paramIndex) throws PersistenceException;
 96+
 97+ protected void init() throws Exception {
 98+ // noop
 99+ }
 100+
 101+ protected void close() throws PersistenceException {
 102+ sink.close();
 103+ }
 104+
 105+ protected void commit(O rec) throws PersistenceException {
 106+ sink.commit(rec);
 107+ }
 108+
 109+}

Status & tagging log