r52121 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r52120‎ | r52121 | r52122 >
Date:16:15, 18 June 2009
Author:daniel
Status:deferred
Tags:
Comment:
BuildConceptMappings plus test framework
Modified paths:
  • /trunk/WikiWord/WikiWordIntegrator/.classpath (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/AbstractIntegratorApp.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/BuildConceptAssociations.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/BuildConceptMappings.java (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/FeatureSetSourceDescriptor.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/LoadForeignProperties.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/CollapsingMatchesCursor.java (modified) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/test/java/de/brightbyte/wikiword/integrator/IntegratorAppTestBase.java (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/test/java/de/brightbyte/wikiword/integrator/LoadForeignPropertiesTest.java (added) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWordIntegrator/.classpath
@@ -15,5 +15,10 @@
1616 <attribute name="javadoc_location" value="http://www.beanshell.org/javadoc/"/>
1717 </attributes>
1818 </classpathentry>
 19+ <classpathentry kind="var" path="M2_REPO/org/dbunit/dbunit/2.4.4/dbunit-2.4.4.jar">
 20+ <attributes>
 21+ <attribute name="javadoc_location" value="http://www.dbunit.org/apidocs/"/>
 22+ </attributes>
 23+ </classpathentry>
1924 <classpathentry kind="output" path="bin"/>
2025 </classpath>
Index: trunk/WikiWord/WikiWordIntegrator/src/test/java/de/brightbyte/wikiword/integrator/LoadForeignPropertiesTest.java
@@ -0,0 +1,38 @@
 2+package de.brightbyte.wikiword.integrator;
 3+
 4+import java.net.URL;
 5+
 6+import de.brightbyte.db.testing.DatabaseTestBase;
 7+
 8+public class LoadForeignPropertiesTest extends IntegratorAppTestBase<LoadForeignProperties> {
 9+
 10+ public LoadForeignPropertiesTest() {
 11+ super("LoadForeignPropertiesTest");
 12+ }
 13+
 14+ /*
 15+ protected String[] getSetUpStatements() {
 16+ return new String[] { "CREATE TABLE QUUXBASE ( foo INT NOT NULL, bar VARCHAR(32) )",
 17+ "CREATE TABLE QUUX ( foo INT NOT NULL, bar VARCHAR(32) )" };
 18+ }
 19+
 20+ protected String[] getTearDownStatements() {
 21+ return new String[] { "DROP TABLE QUUXBASE", "DROP TABLE QUUX" };
 22+ }
 23+ */
 24+
 25+ //-----------------------------------------------------------------------------------------------------
 26+ public void testTableImport() throws Exception {
 27+ runApp("tableImport");
 28+ }
 29+
 30+ public void testTripleImport() throws Exception {
 31+ runApp("tripleImport");
 32+ }
 33+
 34+ @Override
 35+ protected LoadForeignProperties createApp() {
 36+ return new LoadForeignProperties();
 37+ }
 38+
 39+}
Index: trunk/WikiWord/WikiWordIntegrator/src/test/java/de/brightbyte/wikiword/integrator/IntegratorAppTestBase.java
@@ -0,0 +1,50 @@
 2+package de.brightbyte.wikiword.integrator;
 3+
 4+import java.io.IOException;
 5+import java.net.URL;
 6+
 7+import de.brightbyte.db.testing.DatabaseTestBase;
 8+import de.brightbyte.wikiword.DatasetIdentifier;
 9+import de.brightbyte.wikiword.TweakSet;
 10+
 11+public abstract class IntegratorAppTestBase<T extends AbstractIntegratorApp> extends DatabaseTestBase {
 12+
 13+ public IntegratorAppTestBase(String name) {
 14+ super(name);
 15+ }
 16+
 17+ public TweakSet loadTweakSet() throws IOException {
 18+ URL url = requireAuxilliaryFileURL(getBaseName()+".tweaks.properties");
 19+ TweakSet tweaks = new TweakSet();
 20+ tweaks.loadTweaks(url);
 21+ return tweaks;
 22+ }
 23+
 24+ public FeatureSetSourceDescriptor loadSourceDescriptor(String testName) throws IOException {
 25+ URL url = requireAuxilliaryFileURL(getBaseName()+"."+testName+".properties");
 26+ FeatureSetSourceDescriptor descriptor = new FeatureSetSourceDescriptor();
 27+ descriptor.loadTweaks(url);
 28+ return descriptor;
 29+ }
 30+
 31+ protected abstract T createApp();
 32+
 33+ protected T prepareApp(FeatureSetSourceDescriptor sourceDescriptor) throws IOException {
 34+ TweakSet tweaks = loadTweakSet();
 35+ T app = createApp();
 36+
 37+ app.testInit(testDataSource, DatasetIdentifier.forName("TEST", "xx"), tweaks, sourceDescriptor);
 38+ return app;
 39+ }
 40+
 41+ protected void runApp(String testName) throws Exception {
 42+ FeatureSetSourceDescriptor source = loadSourceDescriptor(testName);
 43+ runApp(source);
 44+ }
 45+
 46+ protected void runApp(FeatureSetSourceDescriptor sourceDescriptor) throws Exception {
 47+ T app = prepareApp(sourceDescriptor);
 48+ app.testLaunch();
 49+ }
 50+
 51+}
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/AbstractIntegratorApp.java
@@ -1,6 +1,7 @@
22 package de.brightbyte.wikiword.integrator;
33
44 import java.beans.IntrospectionException;
 5+import java.io.File;
56 import java.io.IOException;
67 import java.io.InputStream;
78 import java.lang.reflect.InvocationTargetException;
@@ -12,17 +13,23 @@
1314 import java.util.Map;
1415 import java.util.regex.Pattern;
1516
 17+import javax.sql.DataSource;
 18+
1619 import de.brightbyte.data.Functor;
1720 import de.brightbyte.data.cursor.DataCursor;
 21+import de.brightbyte.db.DatabaseConnectionInfo;
1822 import de.brightbyte.db.SqlScriptRunner;
1923 import de.brightbyte.io.IOUtil;
2024 import de.brightbyte.text.Chunker;
2125 import de.brightbyte.util.BeanUtils;
2226 import de.brightbyte.util.PersistenceException;
 27+import de.brightbyte.wikiword.DatasetIdentifier;
2328 import de.brightbyte.wikiword.StoreBackedApp;
2429 import de.brightbyte.wikiword.TweakSet;
2530 import de.brightbyte.wikiword.builder.InputFileHelper;
2631 import de.brightbyte.wikiword.integrator.data.AssemblingFeatureSetCursor;
 32+import de.brightbyte.wikiword.integrator.data.Association;
 33+import de.brightbyte.wikiword.integrator.data.AssociationCursor;
2734 import de.brightbyte.wikiword.integrator.data.FeatureSet;
2835 import de.brightbyte.wikiword.integrator.data.FeatureSetValueSplitter;
2936 import de.brightbyte.wikiword.integrator.data.MangelingFeatureSetCursor;
@@ -42,11 +49,40 @@
4350 protected InputFileHelper inputHelper;
4451 protected P propertyProcessor;
4552 protected FeatureSetSourceDescriptor sourceDescriptor;
 53+ private DataSource configuredDataSource;
 54+ private DatasetIdentifier configuredDataset;
4655
4756 public AbstractIntegratorApp() {
4857 super(true, true);
4958 }
5059
 60+ @Override
 61+ protected DataSource getConfiguredDataSource() throws IOException, PersistenceException {
 62+ if (configuredDataSource!=null) return configuredDataSource;
 63+ configuredDataSource = super.getConfiguredDataSource();
 64+ return configuredDataSource;
 65+ }
 66+
 67+ @Override
 68+ public DatasetIdentifier getConfiguredDataset() {
 69+ if (configuredDataset!=null) return configuredDataset;
 70+ configuredDataset = super.getConfiguredDataset();
 71+ return configuredDataset;
 72+ }
 73+
 74+ public void testInit(DataSource dataSource, DatasetIdentifier dataset, TweakSet tweaks, FeatureSetSourceDescriptor sourceDescriptor) {
 75+ if (this.tweaks!=null || this.sourceDescriptor!=null) throw new IllegalStateException("application already initialized");
 76+
 77+ this.configuredDataSource = dataSource;
 78+ this.configuredDataset = dataset;
 79+ this.tweaks = tweaks;
 80+ this.sourceDescriptor = sourceDescriptor;
 81+ }
 82+
 83+ public void testLaunch() throws Exception {
 84+ launchExecute();
 85+ }
 86+
5187 protected InputFileHelper getInputHelper() {
5288 if (inputHelper==null) {
5389 inputHelper = new InputFileHelper(tweaks);
@@ -77,6 +113,43 @@
78114 args.declare("dataset", null, true, String.class, "sets the wiki name (overrides the <wiki-or-dump> parameter)");
79115 }
80116
 117+ protected DataCursor<Association> openAssociationCursor() throws IOException, SQLException, PersistenceException {
 118+ Iterable<String> foreignFields = sourceDescriptor.getTweak("foreign-fields", (Iterable<String>)null);
 119+ Iterable<String> conceptFields = sourceDescriptor.getTweak("concept-fields", (Iterable<String>)null);
 120+ Iterable<String> propertyFields = sourceDescriptor.getTweak("property-fields", (Iterable<String>)null);
 121+
 122+ if (foreignFields==null) {
 123+ foreignFields = Arrays.asList(new String[] {
 124+ sourceDescriptor.getTweak("foreign-id-field", (String)null),
 125+ sourceDescriptor.getTweak("foreign-name-field", (String)null)
 126+ });
 127+ }
 128+
 129+ if (conceptFields==null) {
 130+ conceptFields = Arrays.asList(new String[] {
 131+ sourceDescriptor.getTweak("concept-id-field", (String)null),
 132+ sourceDescriptor.getTweak("concept-name-field", (String)null)
 133+ });
 134+ }
 135+
 136+ if (propertyFields==null) {
 137+ propertyFields = Arrays.asList(new String[] {
 138+ sourceDescriptor.getTweak("association-via-field", (String)null),
 139+ sourceDescriptor.getTweak("association-weight-field", (String)null)
 140+ });
 141+ }
 142+
 143+ DataCursor<FeatureSet> fsc = openFeatureSetCursor();
 144+
 145+ DataCursor<Association> cursor =
 146+ new AssociationCursor(fsc,
 147+ foreignFields,
 148+ conceptFields,
 149+ propertyFields );
 150+
 151+ return cursor;
 152+ }
 153+
81154 protected DataCursor<FeatureSet> openFeatureSetCursor() throws IOException, SQLException, PersistenceException {
82155 FeatureSetSourceDescriptor sourceDescriptor = getSourceDescriptor();
83156
@@ -173,6 +246,7 @@
174247 }
175248 }
176249
177 -
 250+ protected abstract P createProcessor(S conceptStore) throws InstantiationException;
178251
 252+
179253 }
\ No newline at end of file
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/LoadForeignProperties.java
@@ -1,7 +1,6 @@
22 package de.brightbyte.wikiword.integrator;
33
44 import java.io.IOException;
5 -import java.util.Arrays;
65
76 import de.brightbyte.data.cursor.DataCursor;
87 import de.brightbyte.util.PersistenceException;
@@ -43,7 +42,9 @@
4443 this.conceptStore.finalizeImport();
4544 }
4645
 46+ @Override
4747 protected ForeignPropertyProcessor createProcessor(ForeignPropertyStoreBuilder conceptStore) throws InstantiationException {
 48+ // FIXME: parameter list is restrictive, pass descriptor
4849 return instantiate(sourceDescriptor, "foreignPropertyProcessorClass", ForeignPropertyPassThrough.class, conceptStore);
4950 }
5051
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/FeatureSetSourceDescriptor.java
@@ -1,6 +1,7 @@
22 package de.brightbyte.wikiword.integrator;
33
44 import java.io.File;
 5+import java.io.IOException;
56 import java.net.MalformedURLException;
67 import java.net.URL;
78 import java.util.List;
@@ -23,6 +24,10 @@
2425 super(prefix, parent);
2526 }
2627
 28+ public void loadTweaks(URL u) throws IOException {
 29+ super.loadTweaks(u);
 30+ if (getBaseURL()==null) setBaseURL(u);
 31+ }
2732
2833 public String getAuthorityName() {
2934 String name = getTweak("authority", null);
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/data/CollapsingMatchesCursor.java
@@ -11,17 +11,17 @@
1212 protected DataCursor<Association> cursor;
1313 protected Association prev;
1414
15 - protected String sourceKeyField;
16 - protected String targetKeyField;
 15+ protected String foreignKeyField;
 16+ protected String conceptKeyField;
1717
18 - public CollapsingMatchesCursor(DataCursor<Association> cursor, String sourceKeyField, String targetKeyField) {
 18+ public CollapsingMatchesCursor(DataCursor<Association> cursor, String foreignKeyField, String conceptKeyField) {
1919 if (cursor==null) throw new NullPointerException();
20 - if (sourceKeyField==null) throw new NullPointerException();
21 - if (targetKeyField==null) throw new NullPointerException();
 20+ if (foreignKeyField==null) throw new NullPointerException();
 21+ if (conceptKeyField==null) throw new NullPointerException();
2222
2323 this.cursor = cursor;
24 - this.sourceKeyField = sourceKeyField;
25 - this.targetKeyField = targetKeyField;
 24+ this.foreignKeyField = foreignKeyField;
 25+ this.conceptKeyField = conceptKeyField;
2626 }
2727
2828 public void close() {
@@ -42,15 +42,15 @@
4343 prev = cursor.next();
4444 if (prev==null) break;
4545
46 - if (!prev.getSourceItem().overlaps(s, sourceKeyField)) break;
47 - if (!prev.getTargetItem().overlaps(t, targetKeyField)) break;
 46+ if (!prev.getSourceItem().overlaps(s, foreignKeyField)) break;
 47+ if (!prev.getTargetItem().overlaps(t, conceptKeyField)) break;
4848
4949 t = FeatureSets.merge(t, prev.getTargetItem(), prev.getProperties());
5050 }
5151
5252 candidates.add(t);
5353
54 - if (prev==null || !prev.getSourceItem().overlaps(s, sourceKeyField)) break;
 54+ if (prev==null || !prev.getSourceItem().overlaps(s, foreignKeyField)) break;
5555
5656 s = FeatureSets.merge(s, prev.getSourceItem());
5757 }
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/BuildConceptMappings.java
@@ -0,0 +1,80 @@
 2+package de.brightbyte.wikiword.integrator;
 3+
 4+import java.io.IOException;
 5+import java.util.Collection;
 6+
 7+import de.brightbyte.data.Functor;
 8+import de.brightbyte.data.Functors;
 9+import de.brightbyte.data.cursor.DataCursor;
 10+import de.brightbyte.util.PersistenceException;
 11+import de.brightbyte.wikiword.integrator.data.Association;
 12+import de.brightbyte.wikiword.integrator.data.CollapsingMatchesCursor;
 13+import de.brightbyte.wikiword.integrator.data.MappingCandidates;
 14+import de.brightbyte.wikiword.integrator.processor.ConceptMappingProcessor;
 15+import de.brightbyte.wikiword.integrator.processor.OptimalMappingSelector;
 16+import de.brightbyte.wikiword.integrator.store.ConceptMappingStoreBuilder;
 17+import de.brightbyte.wikiword.integrator.store.DatabaseConceptMappingStoreBuilder;
 18+import de.brightbyte.wikiword.store.WikiWordStoreFactory;
 19+
 20+/**
 21+ * This is the primary entry point to the first phase of a WikiWord analysis.
 22+ * ImportDump can be invoked as a standalone program, use --help as a
 23+ * command line parameter for usage information.
 24+ */
 25+public class BuildConceptMappings extends AbstractIntegratorApp<ConceptMappingStoreBuilder, ConceptMappingProcessor, MappingCandidates> {
 26+
 27+ @Override
 28+ protected WikiWordStoreFactory<? extends ConceptMappingStoreBuilder> createConceptStoreFactory() throws IOException, PersistenceException {
 29+ return new DatabaseConceptMappingStoreBuilder.Factory(
 30+ getTargetTableName(),
 31+ getConfiguredDataset(),
 32+ getConfiguredDataSource(),
 33+ tweaks);
 34+ }
 35+
 36+ @Override
 37+ protected void run() throws Exception {
 38+ Functor<Number, ? extends Collection<? extends Number>> aggregator = sourceDescriptor.getTweak("optiomization-aggregator", null);
 39+
 40+ if (aggregator==null) {
 41+ String f = sourceDescriptor.getTweak("optiomization-function", "sum");
 42+ if (f.equals("sum")) aggregator = Functors.Double.sum;
 43+ else if (f.equals("max")) aggregator = Functors.Double.max;
 44+ else throw new IllegalArgumentException("unknwon aggregator function: "+f);
 45+ }
 46+
 47+ this.propertyProcessor = createProcessor(conceptStore, sourceDescriptor.getTweak("optiomization-field", "freq"), aggregator);
 48+
 49+ section("-- fetching properties --------------------------------------------------");
 50+ DataCursor<Association> asc = openAssociationCursor();
 51+
 52+ DataCursor<MappingCandidates> cursor =
 53+ new CollapsingMatchesCursor(asc,
 54+ sourceDescriptor.getTweak("foreign-id-field", (String)null),
 55+ sourceDescriptor.getTweak("concept-id-field", (String)null) );
 56+
 57+ section("-- process properties --------------------------------------------------");
 58+ this.conceptStore.prepareImport();
 59+
 60+ this.propertyProcessor.processMappings(cursor);
 61+ cursor.close();
 62+
 63+ this.conceptStore.finalizeImport();
 64+ }
 65+
 66+ @Override
 67+ protected ConceptMappingProcessor createProcessor(ConceptMappingStoreBuilder conceptStore) throws InstantiationException {
 68+ //FIXME: parameter list is specific to OptimalMappingSelector
 69+ throw new UnsupportedOperationException("not supported");
 70+ }
 71+
 72+ protected ConceptMappingProcessor createProcessor(ConceptMappingStoreBuilder conceptStore, String property, Functor<Number, ? extends Collection<? extends Number>> aggregator) throws InstantiationException {
 73+ //FIXME: parameter list is specific to OptimalMappingSelector
 74+ return instantiate(sourceDescriptor, "conceptMappingProcessorClass", OptimalMappingSelector.class, conceptStore, property, aggregator);
 75+ }
 76+
 77+ public static void main(String[] argv) throws Exception {
 78+ BuildConceptMappings app = new BuildConceptMappings();
 79+ app.launch(argv);
 80+ }
 81+}
\ No newline at end of file
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/BuildConceptAssociations.java
@@ -1,13 +1,10 @@
22 package de.brightbyte.wikiword.integrator;
33
44 import java.io.IOException;
5 -import java.util.Arrays;
65
76 import de.brightbyte.data.cursor.DataCursor;
87 import de.brightbyte.util.PersistenceException;
98 import de.brightbyte.wikiword.integrator.data.Association;
10 -import de.brightbyte.wikiword.integrator.data.AssociationCursor;
11 -import de.brightbyte.wikiword.integrator.data.FeatureSet;
129 import de.brightbyte.wikiword.integrator.processor.ConceptAssociationPassThrough;
1310 import de.brightbyte.wikiword.integrator.processor.ConceptAssociationProcessor;
1411 import de.brightbyte.wikiword.integrator.store.AssociationAsMappingStoreBuilder;
@@ -50,39 +47,8 @@
5148 this.propertyProcessor = createProcessor(conceptStore);
5249
5350 section("-- fetching properties --------------------------------------------------");
54 - DataCursor<FeatureSet> fsc = openFeatureSetCursor();
 51+ DataCursor<Association> cursor = openAssociationCursor();
5552
56 - Iterable<String> foreignFields = sourceDescriptor.getTweak("foreign-fields", (Iterable<String>)null);
57 - Iterable<String> conceptFields = sourceDescriptor.getTweak("concept-fields", (Iterable<String>)null);
58 - Iterable<String> propertyFields = sourceDescriptor.getTweak("property-fields", (Iterable<String>)null);
59 -
60 - if (foreignFields==null) {
61 - foreignFields = Arrays.asList(new String[] {
62 - sourceDescriptor.getTweak("foreign-id-field", (String)null),
63 - sourceDescriptor.getTweak("foreign-name-field", (String)null)
64 - });
65 - }
66 -
67 - if (conceptFields==null) {
68 - conceptFields = Arrays.asList(new String[] {
69 - sourceDescriptor.getTweak("concept-id-field", (String)null),
70 - sourceDescriptor.getTweak("concept-name-field", (String)null)
71 - });
72 - }
73 -
74 - if (propertyFields==null) {
75 - propertyFields = Arrays.asList(new String[] {
76 - sourceDescriptor.getTweak("association-via-field", (String)null),
77 - sourceDescriptor.getTweak("association-weight-field", (String)null)
78 - });
79 - }
80 -
81 - DataCursor<Association> cursor =
82 - new AssociationCursor(fsc,
83 - sourceDescriptor.getTweak("foreign-fields", (Iterable<String>)null),
84 - sourceDescriptor.getTweak("concept-fields", (Iterable<String>)null),
85 - sourceDescriptor.getTweak("property-fields", (Iterable<String>)null) );
86 -
8753 section("-- process properties --------------------------------------------------");
8854 this.conceptStore.prepareImport();
8955
@@ -92,7 +58,9 @@
9359 this.conceptStore.finalizeImport();
9460 }
9561
 62+ @Override
9663 protected ConceptAssociationProcessor createProcessor(AssociationFeatureStoreBuilder conceptStore) throws InstantiationException {
 64+ // FIXME: parameter list is restrictive, pass descriptor
9765 return instantiate(sourceDescriptor, "conceptAssociationProcessorClass", ConceptAssociationPassThrough.class, conceptStore);
9866 }
9967

Status & tagging log