Index: trunk/WikiWord/pom.xml |
— | — | @@ -11,6 +11,7 @@ |
12 | 12 | <modules>
|
13 | 13 | <module>WikiWord</module>
|
14 | 14 | <module>WikiWordBuilder</module>
|
| 15 | + <module>WikiWordIntegrator</module>
|
15 | 16 | <module>WikiWordBuilder4LifeScience</module>
|
16 | 17 | <module>WikiWordBuilder4Biography</module>
|
17 | 18 | </modules>
|
Index: trunk/WikiWord/WikiWordIntegrator/.classpath |
— | — | @@ -0,0 +1,13 @@ |
| 2 | +<?xml version="1.0" encoding="UTF-8"?> |
| 3 | +<classpath> |
| 4 | + <classpathentry kind="src" path="src/test/java"/> |
| 5 | + <classpathentry kind="src" path="src/main/java"/> |
| 6 | + <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/> |
| 7 | + <classpathentry combineaccessrules="false" kind="src" path="/BrightByteUtil"/> |
| 8 | + <classpathentry combineaccessrules="false" kind="src" path="/WikiWord"/> |
| 9 | + <classpathentry combineaccessrules="false" kind="src" path="/mwdumper"/> |
| 10 | + <classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/3.8.1"/> |
| 11 | + <classpathentry combineaccessrules="false" kind="src" path="/BrightByteDB"/> |
| 12 | + <classpathentry kind="var" path="M2_REPO/mysql/mysql-connector-java/3.1.11/mysql-connector-java-3.1.11.jar"/> |
| 13 | + <classpathentry kind="output" path="bin"/> |
| 14 | +</classpath> |
Property changes on: trunk/WikiWord/WikiWordIntegrator/.classpath |
___________________________________________________________________ |
Name: svn:executable |
1 | 15 | + * |
Index: trunk/WikiWord/WikiWordIntegrator/.project |
— | — | @@ -0,0 +1,17 @@ |
| 2 | +<?xml version="1.0" encoding="UTF-8"?> |
| 3 | +<projectDescription> |
| 4 | + <name>WikiWordIntegrator</name> |
| 5 | + <comment></comment> |
| 6 | + <projects> |
| 7 | + </projects> |
| 8 | + <buildSpec> |
| 9 | + <buildCommand> |
| 10 | + <name>org.eclipse.jdt.core.javabuilder</name> |
| 11 | + <arguments> |
| 12 | + </arguments> |
| 13 | + </buildCommand> |
| 14 | + </buildSpec> |
| 15 | + <natures> |
| 16 | + <nature>org.eclipse.jdt.core.javanature</nature> |
| 17 | + </natures> |
| 18 | +</projectDescription> |
Property changes on: trunk/WikiWord/WikiWordIntegrator/.project |
___________________________________________________________________ |
Name: svn:executable |
1 | 19 | + * |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/CollapsingAssociationCursor.java |
— | — | @@ -0,0 +1,45 @@ |
| 2 | +package de.brightbyte.wikiword.integrator; |
| 3 | + |
| 4 | +import de.brightbyte.data.cursor.DataCursor; |
| 5 | +import de.brightbyte.util.PersistenceException; |
| 6 | + |
| 7 | +public class CollapsingAssociationCursor implements DataCursor<Association> { |
| 8 | + |
| 9 | + protected DataCursor<Association> cursor; |
| 10 | + protected Association prev; |
| 11 | + |
| 12 | + protected String sourceKeyField; |
| 13 | + protected String targetKeyField; |
| 14 | + |
| 15 | + public CollapsingAssociationCursor(String sourceKeyField, String targetKeyField) { |
| 16 | + if (sourceKeyField==null) throw new NullPointerException(); |
| 17 | + if (targetKeyField==null) throw new NullPointerException(); |
| 18 | + |
| 19 | + this.sourceKeyField = sourceKeyField; |
| 20 | + this.targetKeyField = targetKeyField; |
| 21 | + } |
| 22 | + |
| 23 | + public void close() { |
| 24 | + cursor.close(); |
| 25 | + } |
| 26 | + |
| 27 | + public Association next() throws PersistenceException { |
| 28 | + if (prev==null) prev = cursor.next(); |
| 29 | + if (prev==null) return null; |
| 30 | + |
| 31 | + Association a = prev; |
| 32 | + |
| 33 | + while (true) { |
| 34 | + prev = cursor.next(); |
| 35 | + if (prev==null) break; |
| 36 | + |
| 37 | + if (!prev.getSourceItem().overlaps(a.getSourceItem(), sourceKeyField)) break; |
| 38 | + if (!prev.getTargetItem().overlaps(a.getTargetItem(), targetKeyField)) break; |
| 39 | + |
| 40 | + a = Association.merge(a, prev); |
| 41 | + } |
| 42 | + |
| 43 | + return a; |
| 44 | + } |
| 45 | + |
| 46 | +} |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/FeatureSet.java |
— | — | @@ -0,0 +1,11 @@ |
| 2 | +package de.brightbyte.wikiword.integrator; |
| 3 | + |
| 4 | +import java.util.List; |
| 5 | + |
| 6 | +import de.brightbyte.data.MultiMap; |
| 7 | + |
| 8 | +public interface FeatureSet extends MultiMap<String, Object, List<Object>> { |
| 9 | + |
| 10 | + public boolean overlaps(FeatureSet sourceItem, String sourceKeyField); |
| 11 | + |
| 12 | +} |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/FeatureSets.java |
— | — | @@ -0,0 +1,38 @@ |
| 2 | +package de.brightbyte.wikiword.integrator; |
| 3 | + |
| 4 | +import de.brightbyte.data.LabeledVector; |
| 5 | +import de.brightbyte.data.MapLabeledVector; |
| 6 | + |
| 7 | +public class FeatureSets { |
| 8 | + public static FeatureSet merge(FeatureSet... sets) { |
| 9 | + if (sets.length==0) return new DefaultFeatureSet(); |
| 10 | + if (sets.length==1) return sets[0]; |
| 11 | + |
| 12 | + FeatureSet f = new DefaultFeatureSet(); |
| 13 | + |
| 14 | + for (int i = 0; i<sets.length; i++) { |
| 15 | + f.putAll(sets[i]); |
| 16 | + } |
| 17 | + |
| 18 | + return f; |
| 19 | + } |
| 20 | + |
| 21 | + public static <T>LabeledVector<T> histogram(Iterable<T> list) { |
| 22 | + LabeledVector<T> v = new MapLabeledVector<T>(); |
| 23 | + |
| 24 | + for (T obj: list) { |
| 25 | + v.add(obj, 1); |
| 26 | + } |
| 27 | + |
| 28 | + return v; |
| 29 | + } |
| 30 | + |
| 31 | + public static <T>int count(Iterable<T> list, T item) { |
| 32 | + int c = 0; |
| 33 | + for (T obj: list) { |
| 34 | + if (item.equals(obj)) c++; |
| 35 | + } |
| 36 | + |
| 37 | + return c; |
| 38 | + } |
| 39 | +} |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/Association.java |
— | — | @@ -0,0 +1,78 @@ |
| 2 | +package de.brightbyte.wikiword.integrator; |
| 3 | + |
| 4 | +public class Association { |
| 5 | + protected FeatureSet sourceItem; |
| 6 | + protected FeatureSet targetItem; |
| 7 | + protected FeatureSet properties; |
| 8 | + |
| 9 | + public Association(FeatureSet sourceItem, FeatureSet targetItem, FeatureSet... properties) { |
| 10 | + if (sourceItem==null) throw new NullPointerException(); |
| 11 | + if (targetItem==null) throw new NullPointerException(); |
| 12 | + |
| 13 | + this.sourceItem = sourceItem; |
| 14 | + this.targetItem = targetItem; |
| 15 | + this.properties = properties==null ? new DefaultFeatureSet() : FeatureSets.merge(properties); |
| 16 | + } |
| 17 | + |
| 18 | + public FeatureSet getProperties() { |
| 19 | + return properties; |
| 20 | + } |
| 21 | + |
| 22 | + public FeatureSet getSourceItem() { |
| 23 | + return sourceItem; |
| 24 | + } |
| 25 | + |
| 26 | + public FeatureSet getTargetItem() { |
| 27 | + return targetItem; |
| 28 | + } |
| 29 | + |
| 30 | + public String toString() { |
| 31 | + return "[" + sourceItem + " <" + properties + "> " + targetItem + "]"; |
| 32 | + } |
| 33 | + |
| 34 | + @Override |
| 35 | + public int hashCode() { |
| 36 | + final int PRIME = 31; |
| 37 | + int result = 1; |
| 38 | + result = PRIME * result + ((properties == null) ? 0 : properties.hashCode()); |
| 39 | + result = PRIME * result + ((sourceItem == null) ? 0 : sourceItem.hashCode()); |
| 40 | + result = PRIME * result + ((targetItem == null) ? 0 : targetItem.hashCode()); |
| 41 | + return result; |
| 42 | + } |
| 43 | + |
| 44 | + @Override |
| 45 | + public boolean equals(Object obj) { |
| 46 | + if (this == obj) |
| 47 | + return true; |
| 48 | + if (obj == null) |
| 49 | + return false; |
| 50 | + if (getClass() != obj.getClass()) |
| 51 | + return false; |
| 52 | + final Association other = (Association) obj; |
| 53 | + if (properties == null) { |
| 54 | + if (other.properties != null) |
| 55 | + return false; |
| 56 | + } else if (!properties.equals(other.properties)) |
| 57 | + return false; |
| 58 | + if (sourceItem == null) { |
| 59 | + if (other.sourceItem != null) |
| 60 | + return false; |
| 61 | + } else if (!sourceItem.equals(other.sourceItem)) |
| 62 | + return false; |
| 63 | + if (targetItem == null) { |
| 64 | + if (other.targetItem != null) |
| 65 | + return false; |
| 66 | + } else if (!targetItem.equals(other.targetItem)) |
| 67 | + return false; |
| 68 | + return true; |
| 69 | + } |
| 70 | + |
| 71 | + public static Association merge(Association a, Association b) { |
| 72 | + FeatureSet src = FeatureSets.merge(a.getSourceItem(), b.getSourceItem()); |
| 73 | + FeatureSet tgt = FeatureSets.merge(a.getTargetItem(), b.getTargetItem()); |
| 74 | + FeatureSet props = FeatureSets.merge(a.getProperties(), b.getProperties()); |
| 75 | + return new Association(src, tgt, props); |
| 76 | + } |
| 77 | + |
| 78 | + |
| 79 | +} |
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/DefaultFeatureSet.java |
— | — | @@ -0,0 +1,34 @@ |
| 2 | +package de.brightbyte.wikiword.integrator; |
| 3 | + |
| 4 | +import java.util.List; |
| 5 | + |
| 6 | +import de.brightbyte.data.ValueListMultiMap; |
| 7 | + |
| 8 | +public class DefaultFeatureSet extends ValueListMultiMap<String, Object> implements FeatureSet { |
| 9 | + |
| 10 | + protected String nameField; |
| 11 | + |
| 12 | + public DefaultFeatureSet() { |
| 13 | + this(null); |
| 14 | + } |
| 15 | + |
| 16 | + public DefaultFeatureSet(String nameField) { |
| 17 | + this.nameField = nameField; |
| 18 | + } |
| 19 | + |
| 20 | + public String toString() { |
| 21 | + if (nameField != null) return String.valueOf(get(nameField)); |
| 22 | + else return super.toString(); |
| 23 | + } |
| 24 | + |
| 25 | + public boolean overlaps(FeatureSet item, String feature) { |
| 26 | + List<Object> a = get(feature); |
| 27 | + List<Object> b = item.get(feature); |
| 28 | + |
| 29 | + for (Object obj: a) { |
| 30 | + if (b.contains(obj)) return true; |
| 31 | + } |
| 32 | + |
| 33 | + return false; |
| 34 | + } |
| 35 | +} |
Index: trunk/WikiWord/WikiWordIntegrator/.svnignore |
— | — | @@ -0,0 +1,20 @@ |
| 2 | +*.log |
| 3 | +*.tmp |
| 4 | +*~ |
| 5 | +#* |
| 6 | +*.tgz |
| 7 | +*.tar.gz |
| 8 | +*.tar.bz2 |
| 9 | +*.war |
| 10 | +*.ear |
| 11 | +bin |
| 12 | +target |
| 13 | +classes |
| 14 | +build |
| 15 | +dist |
| 16 | +distri |
| 17 | +distrib |
| 18 | +*.hprof.txt |
| 19 | +debug |
| 20 | +local.* |
| 21 | + |
Property changes on: trunk/WikiWord/WikiWordIntegrator/.svnignore |
___________________________________________________________________ |
Name: svn:executable |
1 | 22 | + * |
Index: trunk/WikiWord/WikiWordIntegrator/pom.xml |
— | — | @@ -0,0 +1,112 @@ |
| 2 | +<project xmlns="http://maven.apache.org/POM/4.0.0"
|
| 3 | + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
| 4 | + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
| 5 | +
|
| 6 | + <modelVersion>4.0.0</modelVersion>
|
| 7 | + <groupId>de.wikimedia</groupId>
|
| 8 | + <artifactId>WikiWordIntegrator</artifactId>
|
| 9 | + <version>0.3</version>
|
| 10 | +
|
| 11 | + <dependencies>
|
| 12 | + <dependency>
|
| 13 | + <groupId>org.wikimedia</groupId>
|
| 14 | + <artifactId>mwdumper</artifactId>
|
| 15 | + <version>1.11</version>
|
| 16 | + <scope>compile</scope>
|
| 17 | + </dependency>
|
| 18 | + <dependency>
|
| 19 | + <groupId>de.brightbyte</groupId>
|
| 20 | + <artifactId>BrightByteUtil</artifactId>
|
| 21 | + <version>0.2</version>
|
| 22 | + <scope>compile</scope>
|
| 23 | + </dependency>
|
| 24 | + <dependency>
|
| 25 | + <groupId>de.brightbyte</groupId>
|
| 26 | + <artifactId>BrightByteDB</artifactId>
|
| 27 | + <version>0.2</version>
|
| 28 | + <scope>compile</scope>
|
| 29 | + </dependency>
|
| 30 | + <dependency>
|
| 31 | + <groupId>de.wikimedia</groupId>
|
| 32 | + <artifactId>WikiWord</artifactId>
|
| 33 | + <version>0.3</version>
|
| 34 | + <scope>compile</scope>
|
| 35 | + </dependency>
|
| 36 | + <dependency>
|
| 37 | + <groupId>junit</groupId>
|
| 38 | + <artifactId>junit</artifactId>
|
| 39 | + <version>3.8</version>
|
| 40 | + <scope>test</scope>
|
| 41 | + </dependency>
|
| 42 | + <dependency>
|
| 43 | + <groupId>org.dbunit</groupId>
|
| 44 | + <artifactId>dbunit</artifactId>
|
| 45 | + <version>2.4.4</version>
|
| 46 | + <scope>test</scope>
|
| 47 | + </dependency>
|
| 48 | + <dependency>
|
| 49 | + <groupId>ardverk</groupId>
|
| 50 | + <artifactId>patricia-trie</artifactId>
|
| 51 | + <version>0.1</version>
|
| 52 | + <scope>system</scope>
|
| 53 | + <systemPath>${basedir}/lib/patricia-trie-0.1.jar</systemPath>
|
| 54 | + </dependency>
|
| 55 | + </dependencies>
|
| 56 | +
|
| 57 | + <build>
|
| 58 | + <plugins>
|
| 59 | + <plugin>
|
| 60 | + <groupId>org.apache.maven.plugins</groupId>
|
| 61 | + <artifactId>maven-compiler-plugin</artifactId>
|
| 62 | + <configuration>
|
| 63 | + <source>1.5</source>
|
| 64 | + <target>1.5</target>
|
| 65 | + </configuration>
|
| 66 | + </plugin>
|
| 67 | + <plugin>
|
| 68 | + <groupId>org.apache.maven.plugins</groupId>
|
| 69 | + <artifactId>maven-javadoc-plugin</artifactId>
|
| 70 | + <executions>
|
| 71 | + <execution>
|
| 72 | + <phase>package</phase>
|
| 73 | + </execution>
|
| 74 | + </executions>
|
| 75 | + </plugin>
|
| 76 | + <plugin>
|
| 77 | + <artifactId>maven-assembly-plugin</artifactId>
|
| 78 | + <configuration>
|
| 79 | + <descriptors>
|
| 80 | + <descriptor>src/main/assembly/bin-dep.xml</descriptor>
|
| 81 | + <descriptor>src/main/assembly/src.xml</descriptor>
|
| 82 | + </descriptors>
|
| 83 | + </configuration>
|
| 84 | + </plugin>
|
| 85 | + </plugins>
|
| 86 | +
|
| 87 | + <resources>
|
| 88 | + <resource>
|
| 89 | + <filtering>false</filtering>
|
| 90 | + <directory>src/main/java</directory>
|
| 91 | + <includes>
|
| 92 | + <include>**</include>
|
| 93 | + </includes>
|
| 94 | + <excludes>
|
| 95 | + <exclude>**/*.java</exclude>
|
| 96 | + </excludes>
|
| 97 | + </resource>
|
| 98 | + </resources>
|
| 99 | + <testResources>
|
| 100 | + <testResource>
|
| 101 | + <filtering>false</filtering>
|
| 102 | + <directory>src/test/java</directory>
|
| 103 | + <includes>
|
| 104 | + <include>**</include>
|
| 105 | + </includes>
|
| 106 | + <excludes>
|
| 107 | + <exclude>**/*.java</exclude>
|
| 108 | + </excludes>
|
| 109 | + </testResource>
|
| 110 | + </testResources>
|
| 111 | + </build>
|
| 112 | +
|
| 113 | +</project>
|