r50981 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r50980‎ | r50981 | r50982 >
Date:15:49, 25 May 2009
Author:daniel
Status:deferred
Tags:
Comment:
project for semantic integration of wikiword content
Modified paths:
  • /trunk/WikiWord/WikiWordIntegrator (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/.classpath (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/.project (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/.svnignore (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/pom.xml (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/Association.java (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/CollapsingAssociationCursor.java (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/DefaultFeatureSet.java (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/FeatureSet.java (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/FeatureSets.java (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/test (added) (history)
  • /trunk/WikiWord/WikiWordIntegrator/src/test/java (added) (history)
  • /trunk/WikiWord/pom.xml (modified) (history)

Diff [purge]

Index: trunk/WikiWord/pom.xml
@@ -11,6 +11,7 @@
1212 <modules>
1313 <module>WikiWord</module>
1414 <module>WikiWordBuilder</module>
 15+ <module>WikiWordIntegrator</module>
1516 <module>WikiWordBuilder4LifeScience</module>
1617 <module>WikiWordBuilder4Biography</module>
1718 </modules>
Index: trunk/WikiWord/WikiWordIntegrator/.classpath
@@ -0,0 +1,13 @@
 2+<?xml version="1.0" encoding="UTF-8"?>
 3+<classpath>
 4+ <classpathentry kind="src" path="src/test/java"/>
 5+ <classpathentry kind="src" path="src/main/java"/>
 6+ <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
 7+ <classpathentry combineaccessrules="false" kind="src" path="/BrightByteUtil"/>
 8+ <classpathentry combineaccessrules="false" kind="src" path="/WikiWord"/>
 9+ <classpathentry combineaccessrules="false" kind="src" path="/mwdumper"/>
 10+ <classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/3.8.1"/>
 11+ <classpathentry combineaccessrules="false" kind="src" path="/BrightByteDB"/>
 12+ <classpathentry kind="var" path="M2_REPO/mysql/mysql-connector-java/3.1.11/mysql-connector-java-3.1.11.jar"/>
 13+ <classpathentry kind="output" path="bin"/>
 14+</classpath>
Property changes on: trunk/WikiWord/WikiWordIntegrator/.classpath
___________________________________________________________________
Name: svn:executable
115 + *
Index: trunk/WikiWord/WikiWordIntegrator/.project
@@ -0,0 +1,17 @@
 2+<?xml version="1.0" encoding="UTF-8"?>
 3+<projectDescription>
 4+ <name>WikiWordIntegrator</name>
 5+ <comment></comment>
 6+ <projects>
 7+ </projects>
 8+ <buildSpec>
 9+ <buildCommand>
 10+ <name>org.eclipse.jdt.core.javabuilder</name>
 11+ <arguments>
 12+ </arguments>
 13+ </buildCommand>
 14+ </buildSpec>
 15+ <natures>
 16+ <nature>org.eclipse.jdt.core.javanature</nature>
 17+ </natures>
 18+</projectDescription>
Property changes on: trunk/WikiWord/WikiWordIntegrator/.project
___________________________________________________________________
Name: svn:executable
119 + *
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/CollapsingAssociationCursor.java
@@ -0,0 +1,45 @@
 2+package de.brightbyte.wikiword.integrator;
 3+
 4+import de.brightbyte.data.cursor.DataCursor;
 5+import de.brightbyte.util.PersistenceException;
 6+
 7+public class CollapsingAssociationCursor implements DataCursor<Association> {
 8+
 9+ protected DataCursor<Association> cursor;
 10+ protected Association prev;
 11+
 12+ protected String sourceKeyField;
 13+ protected String targetKeyField;
 14+
 15+ public CollapsingAssociationCursor(String sourceKeyField, String targetKeyField) {
 16+ if (sourceKeyField==null) throw new NullPointerException();
 17+ if (targetKeyField==null) throw new NullPointerException();
 18+
 19+ this.sourceKeyField = sourceKeyField;
 20+ this.targetKeyField = targetKeyField;
 21+ }
 22+
 23+ public void close() {
 24+ cursor.close();
 25+ }
 26+
 27+ public Association next() throws PersistenceException {
 28+ if (prev==null) prev = cursor.next();
 29+ if (prev==null) return null;
 30+
 31+ Association a = prev;
 32+
 33+ while (true) {
 34+ prev = cursor.next();
 35+ if (prev==null) break;
 36+
 37+ if (!prev.getSourceItem().overlaps(a.getSourceItem(), sourceKeyField)) break;
 38+ if (!prev.getTargetItem().overlaps(a.getTargetItem(), targetKeyField)) break;
 39+
 40+ a = Association.merge(a, prev);
 41+ }
 42+
 43+ return a;
 44+ }
 45+
 46+}
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/FeatureSet.java
@@ -0,0 +1,11 @@
 2+package de.brightbyte.wikiword.integrator;
 3+
 4+import java.util.List;
 5+
 6+import de.brightbyte.data.MultiMap;
 7+
 8+public interface FeatureSet extends MultiMap<String, Object, List<Object>> {
 9+
 10+ public boolean overlaps(FeatureSet sourceItem, String sourceKeyField);
 11+
 12+}
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/FeatureSets.java
@@ -0,0 +1,38 @@
 2+package de.brightbyte.wikiword.integrator;
 3+
 4+import de.brightbyte.data.LabeledVector;
 5+import de.brightbyte.data.MapLabeledVector;
 6+
 7+public class FeatureSets {
 8+ public static FeatureSet merge(FeatureSet... sets) {
 9+ if (sets.length==0) return new DefaultFeatureSet();
 10+ if (sets.length==1) return sets[0];
 11+
 12+ FeatureSet f = new DefaultFeatureSet();
 13+
 14+ for (int i = 0; i<sets.length; i++) {
 15+ f.putAll(sets[i]);
 16+ }
 17+
 18+ return f;
 19+ }
 20+
 21+ public static <T>LabeledVector<T> histogram(Iterable<T> list) {
 22+ LabeledVector<T> v = new MapLabeledVector<T>();
 23+
 24+ for (T obj: list) {
 25+ v.add(obj, 1);
 26+ }
 27+
 28+ return v;
 29+ }
 30+
 31+ public static <T>int count(Iterable<T> list, T item) {
 32+ int c = 0;
 33+ for (T obj: list) {
 34+ if (item.equals(obj)) c++;
 35+ }
 36+
 37+ return c;
 38+ }
 39+}
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/Association.java
@@ -0,0 +1,78 @@
 2+package de.brightbyte.wikiword.integrator;
 3+
 4+public class Association {
 5+ protected FeatureSet sourceItem;
 6+ protected FeatureSet targetItem;
 7+ protected FeatureSet properties;
 8+
 9+ public Association(FeatureSet sourceItem, FeatureSet targetItem, FeatureSet... properties) {
 10+ if (sourceItem==null) throw new NullPointerException();
 11+ if (targetItem==null) throw new NullPointerException();
 12+
 13+ this.sourceItem = sourceItem;
 14+ this.targetItem = targetItem;
 15+ this.properties = properties==null ? new DefaultFeatureSet() : FeatureSets.merge(properties);
 16+ }
 17+
 18+ public FeatureSet getProperties() {
 19+ return properties;
 20+ }
 21+
 22+ public FeatureSet getSourceItem() {
 23+ return sourceItem;
 24+ }
 25+
 26+ public FeatureSet getTargetItem() {
 27+ return targetItem;
 28+ }
 29+
 30+ public String toString() {
 31+ return "[" + sourceItem + " <" + properties + "> " + targetItem + "]";
 32+ }
 33+
 34+ @Override
 35+ public int hashCode() {
 36+ final int PRIME = 31;
 37+ int result = 1;
 38+ result = PRIME * result + ((properties == null) ? 0 : properties.hashCode());
 39+ result = PRIME * result + ((sourceItem == null) ? 0 : sourceItem.hashCode());
 40+ result = PRIME * result + ((targetItem == null) ? 0 : targetItem.hashCode());
 41+ return result;
 42+ }
 43+
 44+ @Override
 45+ public boolean equals(Object obj) {
 46+ if (this == obj)
 47+ return true;
 48+ if (obj == null)
 49+ return false;
 50+ if (getClass() != obj.getClass())
 51+ return false;
 52+ final Association other = (Association) obj;
 53+ if (properties == null) {
 54+ if (other.properties != null)
 55+ return false;
 56+ } else if (!properties.equals(other.properties))
 57+ return false;
 58+ if (sourceItem == null) {
 59+ if (other.sourceItem != null)
 60+ return false;
 61+ } else if (!sourceItem.equals(other.sourceItem))
 62+ return false;
 63+ if (targetItem == null) {
 64+ if (other.targetItem != null)
 65+ return false;
 66+ } else if (!targetItem.equals(other.targetItem))
 67+ return false;
 68+ return true;
 69+ }
 70+
 71+ public static Association merge(Association a, Association b) {
 72+ FeatureSet src = FeatureSets.merge(a.getSourceItem(), b.getSourceItem());
 73+ FeatureSet tgt = FeatureSets.merge(a.getTargetItem(), b.getTargetItem());
 74+ FeatureSet props = FeatureSets.merge(a.getProperties(), b.getProperties());
 75+ return new Association(src, tgt, props);
 76+ }
 77+
 78+
 79+}
Index: trunk/WikiWord/WikiWordIntegrator/src/main/java/de/brightbyte/wikiword/integrator/DefaultFeatureSet.java
@@ -0,0 +1,34 @@
 2+package de.brightbyte.wikiword.integrator;
 3+
 4+import java.util.List;
 5+
 6+import de.brightbyte.data.ValueListMultiMap;
 7+
 8+public class DefaultFeatureSet extends ValueListMultiMap<String, Object> implements FeatureSet {
 9+
 10+ protected String nameField;
 11+
 12+ public DefaultFeatureSet() {
 13+ this(null);
 14+ }
 15+
 16+ public DefaultFeatureSet(String nameField) {
 17+ this.nameField = nameField;
 18+ }
 19+
 20+ public String toString() {
 21+ if (nameField != null) return String.valueOf(get(nameField));
 22+ else return super.toString();
 23+ }
 24+
 25+ public boolean overlaps(FeatureSet item, String feature) {
 26+ List<Object> a = get(feature);
 27+ List<Object> b = item.get(feature);
 28+
 29+ for (Object obj: a) {
 30+ if (b.contains(obj)) return true;
 31+ }
 32+
 33+ return false;
 34+ }
 35+}
Index: trunk/WikiWord/WikiWordIntegrator/.svnignore
@@ -0,0 +1,20 @@
 2+*.log
 3+*.tmp
 4+*~
 5+#*
 6+*.tgz
 7+*.tar.gz
 8+*.tar.bz2
 9+*.war
 10+*.ear
 11+bin
 12+target
 13+classes
 14+build
 15+dist
 16+distri
 17+distrib
 18+*.hprof.txt
 19+debug
 20+local.*
 21+
Property changes on: trunk/WikiWord/WikiWordIntegrator/.svnignore
___________________________________________________________________
Name: svn:executable
122 + *
Index: trunk/WikiWord/WikiWordIntegrator/pom.xml
@@ -0,0 +1,112 @@
 2+<project xmlns="http://maven.apache.org/POM/4.0.0"
 3+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 5+
 6+ <modelVersion>4.0.0</modelVersion>
 7+ <groupId>de.wikimedia</groupId>
 8+ <artifactId>WikiWordIntegrator</artifactId>
 9+ <version>0.3</version>
 10+
 11+ <dependencies>
 12+ <dependency>
 13+ <groupId>org.wikimedia</groupId>
 14+ <artifactId>mwdumper</artifactId>
 15+ <version>1.11</version>
 16+ <scope>compile</scope>
 17+ </dependency>
 18+ <dependency>
 19+ <groupId>de.brightbyte</groupId>
 20+ <artifactId>BrightByteUtil</artifactId>
 21+ <version>0.2</version>
 22+ <scope>compile</scope>
 23+ </dependency>
 24+ <dependency>
 25+ <groupId>de.brightbyte</groupId>
 26+ <artifactId>BrightByteDB</artifactId>
 27+ <version>0.2</version>
 28+ <scope>compile</scope>
 29+ </dependency>
 30+ <dependency>
 31+ <groupId>de.wikimedia</groupId>
 32+ <artifactId>WikiWord</artifactId>
 33+ <version>0.3</version>
 34+ <scope>compile</scope>
 35+ </dependency>
 36+ <dependency>
 37+ <groupId>junit</groupId>
 38+ <artifactId>junit</artifactId>
 39+ <version>3.8</version>
 40+ <scope>test</scope>
 41+ </dependency>
 42+ <dependency>
 43+ <groupId>org.dbunit</groupId>
 44+ <artifactId>dbunit</artifactId>
 45+ <version>2.4.4</version>
 46+ <scope>test</scope>
 47+ </dependency>
 48+ <dependency>
 49+ <groupId>ardverk</groupId>
 50+ <artifactId>patricia-trie</artifactId>
 51+ <version>0.1</version>
 52+ <scope>system</scope>
 53+ <systemPath>${basedir}/lib/patricia-trie-0.1.jar</systemPath>
 54+ </dependency>
 55+ </dependencies>
 56+
 57+ <build>
 58+ <plugins>
 59+ <plugin>
 60+ <groupId>org.apache.maven.plugins</groupId>
 61+ <artifactId>maven-compiler-plugin</artifactId>
 62+ <configuration>
 63+ <source>1.5</source>
 64+ <target>1.5</target>
 65+ </configuration>
 66+ </plugin>
 67+ <plugin>
 68+ <groupId>org.apache.maven.plugins</groupId>
 69+ <artifactId>maven-javadoc-plugin</artifactId>
 70+ <executions>
 71+ <execution>
 72+ <phase>package</phase>
 73+ </execution>
 74+ </executions>
 75+ </plugin>
 76+ <plugin>
 77+ <artifactId>maven-assembly-plugin</artifactId>
 78+ <configuration>
 79+ <descriptors>
 80+ <descriptor>src/main/assembly/bin-dep.xml</descriptor>
 81+ <descriptor>src/main/assembly/src.xml</descriptor>
 82+ </descriptors>
 83+ </configuration>
 84+ </plugin>
 85+ </plugins>
 86+
 87+ <resources>
 88+ <resource>
 89+ <filtering>false</filtering>
 90+ <directory>src/main/java</directory>
 91+ <includes>
 92+ <include>**</include>
 93+ </includes>
 94+ <excludes>
 95+ <exclude>**/*.java</exclude>
 96+ </excludes>
 97+ </resource>
 98+ </resources>
 99+ <testResources>
 100+ <testResource>
 101+ <filtering>false</filtering>
 102+ <directory>src/test/java</directory>
 103+ <includes>
 104+ <include>**</include>
 105+ </includes>
 106+ <excludes>
 107+ <exclude>**/*.java</exclude>
 108+ </excludes>
 109+ </testResource>
 110+ </testResources>
 111+ </build>
 112+
 113+</project>

Status & tagging log