Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/output/TextStreamOutput.java |
— | — | @@ -51,7 +51,7 @@ |
52 | 52 | s.append("Aspect:"); s.append(aspect); s.append(sep); |
53 | 53 | s.append("Page-Type:"); s.append(ptype.name()); s.append(sep); |
54 | 54 | s.append("Content-Type: "); s.append(format); s.append(sep); |
55 | | - s.append("Content-Length: "); s.append(data.length); s.append(sep); |
| 55 | + s.append("Content-Length: "); s.append(data.length); |
56 | 56 | s.append("; chars="); s.append(text.length()); |
57 | 57 | s.append("; codepoints="); s.append(Character.codePointCount(text, 0, text.length())); |
58 | 58 | s.append(sep); |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/output/TextFileOutput.java |
— | — | @@ -0,0 +1,98 @@ |
| 2 | +package de.brightbyte.wikiword.output; |
| 3 | + |
| 4 | +import java.io.File; |
| 5 | +import java.io.IOException; |
| 6 | +import java.io.UnsupportedEncodingException; |
| 7 | +import java.net.URLEncoder; |
| 8 | + |
| 9 | +import de.brightbyte.io.IOUtil; |
| 10 | +import de.brightbyte.util.PersistenceException; |
| 11 | +import de.brightbyte.util.StringUtils; |
| 12 | +import de.brightbyte.wikiword.DatasetIdentifier; |
| 13 | +import de.brightbyte.wikiword.ResourceType; |
| 14 | + |
| 15 | +public class TextFileOutput extends AbstractOutput implements TextOutput { |
| 16 | + |
| 17 | + protected String encoding; |
| 18 | + protected File outputDir; |
| 19 | + private boolean doHash; |
| 20 | + |
| 21 | + public TextFileOutput(DatasetIdentifier dataset, File outputDir, String enc, boolean doHash) { |
| 22 | + super(dataset); |
| 23 | + |
| 24 | + if (outputDir==null) throw new NullPointerException(); |
| 25 | + if (enc==null) throw new NullPointerException(); |
| 26 | + |
| 27 | + if (!outputDir.exists()) throw new IllegalArgumentException("output directory "+outputDir+" does not exist"); |
| 28 | + if (!outputDir.isDirectory()) throw new IllegalArgumentException(outputDir+" is not a directory"); |
| 29 | + if (!outputDir.canWrite()) throw new IllegalArgumentException("can't write to "+outputDir); |
| 30 | + |
| 31 | + this.encoding = enc; |
| 32 | + this.outputDir = outputDir; |
| 33 | + this.doHash = doHash; |
| 34 | + } |
| 35 | + |
| 36 | + public void storeDefinitionText(int rcId, String name, ResourceType ptype, String text) throws PersistenceException { |
| 37 | + writeFile(name, "def.txt", text); |
| 38 | + } |
| 39 | + |
| 40 | + public void storeSynopsisText(int rcId, String name, ResourceType ptype, String text) throws PersistenceException { |
| 41 | + writeFile(name, "syn.txt", text); |
| 42 | + } |
| 43 | + |
| 44 | + public void storePlainText(int rcId, String name, ResourceType ptype, String text) throws PersistenceException { |
| 45 | + writeFile(name, "txt", text); |
| 46 | + } |
| 47 | + |
| 48 | + public void storeRawText(int rcId, String name, ResourceType ptype, String text) throws PersistenceException { |
| 49 | + writeFile(name, "wiki", text); |
| 50 | + } |
| 51 | + |
| 52 | + protected void writeFile(String name, String ext, String text) throws PersistenceException { |
| 53 | + File f = getFilePath(name, ext); |
| 54 | + |
| 55 | + if (doHash) { |
| 56 | + File d = f.getParentFile(); |
| 57 | + |
| 58 | + if (!d.exists()) d.mkdirs(); |
| 59 | + } |
| 60 | + |
| 61 | + try { |
| 62 | + IOUtil.spit(f, text, this.encoding, false); |
| 63 | + } catch (IOException e) { |
| 64 | + throw new PersistenceException(e); |
| 65 | + } |
| 66 | + } |
| 67 | + |
| 68 | + private File getFilePath(String name, String ext) { |
| 69 | + File d = this.outputDir; |
| 70 | + |
| 71 | + if (this.doHash) { |
| 72 | + String md5 = StringUtils.hex( StringUtils.md5(name) ); |
| 73 | + d = new File(d, md5.substring(0, 1) + "/" + md5.substring(0, 2)); |
| 74 | + } |
| 75 | + |
| 76 | + return new File(d, sanatizeFileName(name) + "." +ext); |
| 77 | + } |
| 78 | + |
| 79 | + protected String sanatizeFileName(String name) { |
| 80 | + try { |
| 81 | + name = URLEncoder.encode(name, "UTF-8"); |
| 82 | + name = name.replace('%', '^'); |
| 83 | + |
| 84 | + return name; |
| 85 | + } catch (UnsupportedEncodingException e) { |
| 86 | + throw new RuntimeException(e); |
| 87 | + } |
| 88 | + } |
| 89 | + |
| 90 | + @Override |
| 91 | + public void close() throws PersistenceException { |
| 92 | + //noop |
| 93 | + } |
| 94 | + |
| 95 | + @Override |
| 96 | + public void flush() throws PersistenceException { |
| 97 | + //noop |
| 98 | + } |
| 99 | +} |