r59757 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r59756‎ | r59757 | r59758 >
Date:14:37, 5 December 2009
Author:daniel
Status:deferred
Tags:
Comment:
plain text extraction: addendum
Modified paths:
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/output/TextFileOutput.java (added) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/output/TextStreamOutput.java (modified) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/output/TextStreamOutput.java
@@ -51,7 +51,7 @@
5252 s.append("Aspect:"); s.append(aspect); s.append(sep);
5353 s.append("Page-Type:"); s.append(ptype.name()); s.append(sep);
5454 s.append("Content-Type: "); s.append(format); s.append(sep);
55 - s.append("Content-Length: "); s.append(data.length); s.append(sep);
 55+ s.append("Content-Length: "); s.append(data.length);
5656 s.append("; chars="); s.append(text.length());
5757 s.append("; codepoints="); s.append(Character.codePointCount(text, 0, text.length()));
5858 s.append(sep);
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/output/TextFileOutput.java
@@ -0,0 +1,98 @@
 2+package de.brightbyte.wikiword.output;
 3+
 4+import java.io.File;
 5+import java.io.IOException;
 6+import java.io.UnsupportedEncodingException;
 7+import java.net.URLEncoder;
 8+
 9+import de.brightbyte.io.IOUtil;
 10+import de.brightbyte.util.PersistenceException;
 11+import de.brightbyte.util.StringUtils;
 12+import de.brightbyte.wikiword.DatasetIdentifier;
 13+import de.brightbyte.wikiword.ResourceType;
 14+
 15+public class TextFileOutput extends AbstractOutput implements TextOutput {
 16+
 17+ protected String encoding;
 18+ protected File outputDir;
 19+ private boolean doHash;
 20+
 21+ public TextFileOutput(DatasetIdentifier dataset, File outputDir, String enc, boolean doHash) {
 22+ super(dataset);
 23+
 24+ if (outputDir==null) throw new NullPointerException();
 25+ if (enc==null) throw new NullPointerException();
 26+
 27+ if (!outputDir.exists()) throw new IllegalArgumentException("output directory "+outputDir+" does not exist");
 28+ if (!outputDir.isDirectory()) throw new IllegalArgumentException(outputDir+" is not a directory");
 29+ if (!outputDir.canWrite()) throw new IllegalArgumentException("can't write to "+outputDir);
 30+
 31+ this.encoding = enc;
 32+ this.outputDir = outputDir;
 33+ this.doHash = doHash;
 34+ }
 35+
 36+ public void storeDefinitionText(int rcId, String name, ResourceType ptype, String text) throws PersistenceException {
 37+ writeFile(name, "def.txt", text);
 38+ }
 39+
 40+ public void storeSynopsisText(int rcId, String name, ResourceType ptype, String text) throws PersistenceException {
 41+ writeFile(name, "syn.txt", text);
 42+ }
 43+
 44+ public void storePlainText(int rcId, String name, ResourceType ptype, String text) throws PersistenceException {
 45+ writeFile(name, "txt", text);
 46+ }
 47+
 48+ public void storeRawText(int rcId, String name, ResourceType ptype, String text) throws PersistenceException {
 49+ writeFile(name, "wiki", text);
 50+ }
 51+
 52+ protected void writeFile(String name, String ext, String text) throws PersistenceException {
 53+ File f = getFilePath(name, ext);
 54+
 55+ if (doHash) {
 56+ File d = f.getParentFile();
 57+
 58+ if (!d.exists()) d.mkdirs();
 59+ }
 60+
 61+ try {
 62+ IOUtil.spit(f, text, this.encoding, false);
 63+ } catch (IOException e) {
 64+ throw new PersistenceException(e);
 65+ }
 66+ }
 67+
 68+ private File getFilePath(String name, String ext) {
 69+ File d = this.outputDir;
 70+
 71+ if (this.doHash) {
 72+ String md5 = StringUtils.hex( StringUtils.md5(name) );
 73+ d = new File(d, md5.substring(0, 1) + "/" + md5.substring(0, 2));
 74+ }
 75+
 76+ return new File(d, sanatizeFileName(name) + "." +ext);
 77+ }
 78+
 79+ protected String sanatizeFileName(String name) {
 80+ try {
 81+ name = URLEncoder.encode(name, "UTF-8");
 82+ name = name.replace('%', '^');
 83+
 84+ return name;
 85+ } catch (UnsupportedEncodingException e) {
 86+ throw new RuntimeException(e);
 87+ }
 88+ }
 89+
 90+ @Override
 91+ public void close() throws PersistenceException {
 92+ //noop
 93+ }
 94+
 95+ @Override
 96+ public void flush() throws PersistenceException {
 97+ //noop
 98+ }
 99+}

Status & tagging log