r46384 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r46383‎ | r46384 | r46385 >
Date:20:52, 27 January 2009
Author:daniel
Status:deferred
Tags:
Comment:
commons as a pseudo-language
Modified paths:
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/Languages.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/AbstractAnalyzer.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/PlainTextAnalyzer.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/WikiTextAnalyzer.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/ImportDump.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/LanguageConfiguration_commons.java (added) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_commonswiki.java (added) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/test/java/de/brightbyte/wikiword/analyzer/PlainTextAnalyzerTest.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/test/java/de/brightbyte/wikiword/analyzer/PlainTextAnalyzerTestBase.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/test/java/de/brightbyte/wikiword/analyzer/WikiTextAnalyzerBenchmark.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/test/java/de/brightbyte/wikiword/analyzer/WikiTextAnalyzerTest.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/test/java/de/brightbyte/wikiword/analyzer/WikiTextAnalyzerTestBase.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder4LifeScience/src/test/java/de/brightbyte/wikiword/wikipro/PropertyDump.java (modified) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/Languages.java
@@ -16,9 +16,8 @@
1717 * It is loaded from the Languages.properties located in the de.brightbyte.wikiword package.
1818 */
1919 public class Languages {
20 - public static final Map<String, String> names;
2120
22 - static {
 21+ public static Map<String, String> load(TweakSet tweaks){
2322 try {
2423 InputStream in = Languages.class.getResourceAsStream("Languages.properties");
2524 if (in == null) throw new ExceptionInInitializerError("missing resource Languages.properties");
@@ -34,10 +33,22 @@
3534 ln.put(k, v);
3635 }
3736
38 - names = Collections.unmodifiableMap(ln);
 37+ if (tweaks.getTweak("languages.commonsAsLanguage", false)) {
 38+ ln.put("commons", "Commons");
 39+ } else {
 40+ ln.remove("commons");
 41+ }
 42+
 43+ if (tweaks.getTweak("languages.simpleAsLanguage", true)) {
 44+ ln.put("simple", "Simple English");
 45+ } else {
 46+ ln.remove("simple");
 47+ }
 48+
 49+ return Collections.unmodifiableMap(ln);
3950 }
4051 catch (IOException ex) {
41 - throw new ExceptionInInitializerError(ex);
 52+ throw new RuntimeException("failed to load Languages.properties via ClassLoader", ex);
4253 }
4354 }
4455
Index: trunk/WikiWord/WikiWordBuilder4LifeScience/src/test/java/de/brightbyte/wikiword/wikipro/PropertyDump.java
@@ -11,6 +11,7 @@
1212 import de.brightbyte.wikiword.Corpus;
1313 import de.brightbyte.wikiword.Namespace;
1414 import de.brightbyte.wikiword.NamespaceSet;
 15+import de.brightbyte.wikiword.TweakSet;
1516 import de.brightbyte.wikiword.analyzer.WikiTextAnalyzer;
1617 import de.brightbyte.wikiword.analyzer.WikiTextAnalyzer.WikiLink;
1718 import de.brightbyte.wikiword.analyzer.WikiTextAnalyzer.WikiPage;
@@ -34,7 +35,8 @@
3536
3637 String p = n;
3738
38 - WikiTextAnalyzer analyzer = WikiTextAnalyzer.getWikiTextAnalyzer(corpus);
 39+ TweakSet tweaks = new TweakSet();
 40+ WikiTextAnalyzer analyzer = WikiTextAnalyzer.getWikiTextAnalyzer(corpus, tweaks);
3941
4042 System.out.println("loading "+u+"...");
4143 String text = IOUtil.slurp(u, "UTF-8");
Index: trunk/WikiWord/WikiWordBuilder/src/test/java/de/brightbyte/wikiword/analyzer/PlainTextAnalyzerTestBase.java
@@ -7,6 +7,7 @@
88
99 import junit.framework.TestCase;
1010 import de.brightbyte.wikiword.Corpus;
 11+import de.brightbyte.wikiword.TweakSet;
1112 import de.brightbyte.wikiword.analyzer.PlainTextAnalyzer;
1213
1314 /**
@@ -15,15 +16,17 @@
1617 public abstract class PlainTextAnalyzerTestBase extends TestCase {
1718
1819 protected Corpus corpus;
 20+ protected TweakSet tweaks;
1921 protected PlainTextAnalyzer analyzer;
2022
2123 public PlainTextAnalyzerTestBase(String wikiName) {
2224 corpus = Corpus.forName("TEST", wikiName, (String[])null);
 25+ tweaks = new TweakSet();
2326 }
2427
2528 @Override
2629 public void setUp() throws Exception {
27 - analyzer = PlainTextAnalyzer.getPlainTextAnalyzer(corpus);
 30+ analyzer = PlainTextAnalyzer.getPlainTextAnalyzer(corpus, tweaks);
2831 analyzer.initialize();
2932 }
3033
Index: trunk/WikiWord/WikiWordBuilder/src/test/java/de/brightbyte/wikiword/analyzer/WikiTextAnalyzerBenchmark.java
@@ -4,6 +4,7 @@
55
66 import de.brightbyte.wikiword.Corpus;
77 import de.brightbyte.wikiword.NamespaceSet;
 8+import de.brightbyte.wikiword.TweakSet;
89 import de.brightbyte.wikiword.analyzer.WikiTextAnalyzer;
910
1011 public class WikiTextAnalyzerBenchmark {
@@ -11,6 +12,7 @@
1213 protected NamespaceSet namespaces;
1314 protected Corpus corpus;
1415 protected WikiTextAnalyzer analyzer;
 16+ protected TweakSet tweaks;
1517
1618 public WikiTextAnalyzerBenchmark(String wikiName) throws InstantiationException {
1719 corpus = Corpus.forName("TEST", wikiName, (String[])null);
@@ -19,9 +21,10 @@
2022 //site.Sitename = corpus.getFamily();
2123
2224 titleCase = true;
23 - namespaces = corpus.getNamespaces();
 25+ namespaces = corpus.getNamespaces();
 26+ tweaks = new TweakSet();
2427
25 - analyzer = WikiTextAnalyzer.getWikiTextAnalyzer(corpus);
 28+ analyzer = WikiTextAnalyzer.getWikiTextAnalyzer(corpus, tweaks);
2629 analyzer.initialize(namespaces, titleCase);
2730 }
2831
Index: trunk/WikiWord/WikiWordBuilder/src/test/java/de/brightbyte/wikiword/analyzer/WikiTextAnalyzerTestBase.java
@@ -19,6 +19,7 @@
2020 import junit.framework.TestCase;
2121 import de.brightbyte.wikiword.Corpus;
2222 import de.brightbyte.wikiword.NamespaceSet;
 23+import de.brightbyte.wikiword.TweakSet;
2324 import de.brightbyte.wikiword.analyzer.WikiTextAnalyzer;
2425
2526 /**
@@ -30,9 +31,11 @@
3132 protected NamespaceSet namespaces;
3233 protected Corpus corpus;
3334 protected WikiTextAnalyzer analyzer;
 35+ protected TweakSet tweaks;
3436
3537 public WikiTextAnalyzerTestBase(String wikiName) {
3638 corpus = Corpus.forName("TEST", wikiName, (String[])null);
 39+ tweaks = new TweakSet();
3740
3841 //site.Base = "http://"+corpus.getDomain()+"/wiki/";
3942 //site.Sitename = corpus.getFamily();
@@ -43,7 +46,7 @@
4447
4548 @Override
4649 public void setUp() throws Exception {
47 - analyzer = WikiTextAnalyzer.getWikiTextAnalyzer(corpus);
 50+ analyzer = WikiTextAnalyzer.getWikiTextAnalyzer(corpus, tweaks);
4851 analyzer.initialize(namespaces, titleCase);
4952 }
5053
Index: trunk/WikiWord/WikiWordBuilder/src/test/java/de/brightbyte/wikiword/analyzer/PlainTextAnalyzerTest.java
@@ -84,7 +84,7 @@
8585
8686 corpus = new Corpus("TEST", "generic", "generic", "generic", "generic", "xx", "generic", null);
8787 testAnalyzer = new TestPlainTextAnalyzer(corpus);
88 - testAnalyzer.configure(config);
 88+ testAnalyzer.configure(config, tweaks);
8989 testAnalyzer.initialize();
9090
9191 analyzer = testAnalyzer;
Index: trunk/WikiWord/WikiWordBuilder/src/test/java/de/brightbyte/wikiword/analyzer/WikiTextAnalyzerTest.java
@@ -643,7 +643,7 @@
644644
645645 corpus = new Corpus("TEST", "generic", "generic", "generic", "generic", "xx", "generic", null);
646646 PlainTextAnalyzer language = new PlainTextAnalyzer(corpus);
647 - language.configure(lconfig);
 647+ language.configure(lconfig, tweaks);
648648 language.initialize();
649649
650650 WikiConfiguration config = new WikiConfiguration();
@@ -662,7 +662,7 @@
663663
664664 testAnalyzer = new TestWikiTextAnalyzer(language);
665665 testAnalyzer.addExtraTemplateUser(Pattern.compile(".*"), true);
666 - testAnalyzer.configure(config);
 666+ testAnalyzer.configure(config, tweaks);
667667 testAnalyzer.initialize(namespaces, titleCase);
668668
669669 analyzer = testAnalyzer;
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/AbstractAnalyzer.java
@@ -12,6 +12,7 @@
1313
1414 import de.brightbyte.util.StringUtils;
1515 import de.brightbyte.wikiword.Corpus;
 16+import de.brightbyte.wikiword.TweakSet;
1617 import de.brightbyte.wikiword.analyzer.WikiTextAnalyzer.ArmorEntry;
1718
1819 /**
@@ -22,6 +23,8 @@
2324 */
2425 public class AbstractAnalyzer {
2526
 27+ protected TweakSet tweaks;
 28+
2629 /**
2730 * A Mangler changes text in some way.
2831 */
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/PlainTextAnalyzer.java
@@ -7,6 +7,7 @@
88 import java.util.regex.Matcher;
99
1010 import de.brightbyte.wikiword.Corpus;
 11+import de.brightbyte.wikiword.TweakSet;
1112
1213 public class PlainTextAnalyzer extends AbstractAnalyzer {
1314 private LanguageConfiguration config;
@@ -25,7 +26,7 @@
2627 config.defaults();
2728 }
2829
29 - public static PlainTextAnalyzer getPlainTextAnalyzer(Corpus corpus) throws InstantiationException {
 30+ public static PlainTextAnalyzer getPlainTextAnalyzer(Corpus corpus, TweakSet tweaks) throws InstantiationException {
3031 Class[] acc = getSpecializedClasses(corpus, PlainTextAnalyzer.class, "PlainTextAnalyzer");
3132 Class[] ccc = getSpecializedClasses(corpus, LanguageConfiguration.class, "LanguageConfiguration", corpus.getConfigPackages());
3233
@@ -36,7 +37,7 @@
3738 for (int i = ccc.length-1; i >= 0; i--) { //NOTE: most specific last, because last write wins.
3839 ctor = ccc[i].getConstructor(new Class[] { });
3940 LanguageConfiguration conf = (LanguageConfiguration)ctor.newInstance(new Object[] { } );
40 - analyzer.configure(conf);
 41+ analyzer.configure(conf, tweaks);
4142 }
4243
4344 return analyzer;
@@ -53,7 +54,10 @@
5455 }
5556 }
5657
57 - public void configure(LanguageConfiguration config) {
 58+ public void configure(LanguageConfiguration config, TweakSet tweaks) {
 59+ if (tweaks==null) throw new NullPointerException();
 60+
 61+ this.tweaks = tweaks;
5862 this.config.merge(config);
5963 }
6064
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/analyzer/WikiTextAnalyzer.java
@@ -38,6 +38,7 @@
3939 import de.brightbyte.wikiword.Namespace;
4040 import de.brightbyte.wikiword.NamespaceSet;
4141 import de.brightbyte.wikiword.ResourceType;
 42+import de.brightbyte.wikiword.TweakSet;
4243 import de.brightbyte.wikiword.analyzer.TemplateExtractor.TemplateData;
4344 import de.brightbyte.xml.HtmlEntities;
4445
@@ -1980,7 +1981,8 @@
19811982 private Matcher relevantTemplateMatcher;
19821983 private List<TemplateUser> extraTemplateUsers = new ArrayList<TemplateUser>();
19831984
1984 - private WikiTextSniffer sniffer = new WikiTextSniffer();
 1985+ private WikiTextSniffer sniffer = new WikiTextSniffer();
 1986+ private Map<String, String> languageNames;
19851987
19861988 public WikiTextAnalyzer(PlainTextAnalyzer language) {
19871989 this.language = language;
@@ -2008,8 +2010,11 @@
20092011 return initialized;
20102012 }
20112013
2012 - public void configure(WikiConfiguration config) {
 2014+ public void configure(WikiConfiguration config, TweakSet tweaks) {
20132015 if (isInitialized()) throw new IllegalStateException("already initialized");
 2016+ if (tweaks==null) throw new NullPointerException();
 2017+
 2018+ this.tweaks = tweaks;
20142019 this.config.merge(config);
20152020 }
20162021
@@ -2504,8 +2509,16 @@
25052510
25062511 public boolean isInterlanguagePrefix(CharSequence pre) {
25072512 pre = trimAndLower(pre);
2508 - return Languages.names.containsKey(pre);
 2513+ return getLanguageNames().containsKey(pre);
25092514 }
 2515+
 2516+ protected Map<String, String> getLanguageNames() {
 2517+ if (this.languageNames==null) {
 2518+ this.languageNames = Languages.load(this.tweaks);
 2519+ }
 2520+
 2521+ return this.languageNames;
 2522+ }
25102523
25112524 public boolean isInterwikiPrefix(CharSequence pre) {
25122525 interwikiMatcher.reset(pre);
@@ -2925,8 +2938,8 @@
29262939 return new WikiLink(interwiki, namespace, page, section, text, impliedText, magic);
29272940 }
29282941
2929 - public static WikiTextAnalyzer getWikiTextAnalyzer(Corpus corpus) throws InstantiationException {
2930 - PlainTextAnalyzer language = PlainTextAnalyzer.getPlainTextAnalyzer(corpus);
 2942+ public static WikiTextAnalyzer getWikiTextAnalyzer(Corpus corpus, TweakSet tweaks) throws InstantiationException {
 2943+ PlainTextAnalyzer language = PlainTextAnalyzer.getPlainTextAnalyzer(corpus, tweaks);
29312944 language.initialize();
29322945
29332946 return getWikiTextAnalyzer(language);
@@ -2951,7 +2964,7 @@
29522965
29532966 ctor = ccc[i].getConstructor(new Class[] { });
29542967 WikiConfiguration conf = (WikiConfiguration)ctor.newInstance(new Object[] { } );
2955 - analyzer.configure(conf);
 2968+ analyzer.configure(conf, language.tweaks);
29562969 }
29572970
29582971 return analyzer;
@@ -2991,8 +3004,10 @@
29923005
29933006 String text = IOUtil.slurp(new File(file), "UTF-8");
29943007
 3008+ TweakSet tweaks = new TweakSet();
 3009+
29953010 Corpus corpus = Corpus.forName("TEST", lang, (String[])null);
2996 - WikiTextAnalyzer analyzer = WikiTextAnalyzer.getWikiTextAnalyzer(corpus);
 3011+ WikiTextAnalyzer analyzer = WikiTextAnalyzer.getWikiTextAnalyzer(corpus, tweaks);
29973012
29983013 NamespaceSet namespaces = Namespace.getNamespaces(null);
29993014 analyzer.initialize(namespaces, true);
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/ImportDump.java
@@ -61,7 +61,7 @@
6262 }
6363 */
6464
65 - WikiTextAnalyzer analyzer = WikiTextAnalyzer.getWikiTextAnalyzer(getCorpus());
 65+ WikiTextAnalyzer analyzer = WikiTextAnalyzer.getWikiTextAnalyzer(getCorpus(), tweaks);
6666 WikiWordImporter importer = newImporter(analyzer, store, tweaks);
6767 importer.setLogOutput(getLogOutput());
6868 importer.configure(args);
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/LanguageConfiguration_commons.java
@@ -0,0 +1,6 @@
 2+package de.brightbyte.wikiword.wikis;
 3+
 4+
 5+public class LanguageConfiguration_commons extends LanguageConfiguration_en {
 6+ //noop
 7+}
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/wikis/WikiConfiguration_commonswiki.java
@@ -0,0 +1,9 @@
 2+package de.brightbyte.wikiword.wikis;
 3+
 4+public class WikiConfiguration_commonswiki extends WikiConfiguration_enwiki {
 5+
 6+ public WikiConfiguration_commonswiki() {
 7+ //noop
 8+ }
 9+
 10+}

Status & tagging log