r69731 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r69730‎ | r69731 | r69732 >
Date:15:23, 22 July 2010
Author:daniel
Status:deferred
Tags:
Comment:
poking at neo4j
Modified paths:
  • /trunk/WikiWord/CatGraph (added) (history)
  • /trunk/WikiWord/CatGraph/.classpath (added) (history)
  • /trunk/WikiWord/CatGraph/.project (added) (history)
  • /trunk/WikiWord/CatGraph/COPYING (added) (history)
  • /trunk/WikiWord/CatGraph/LGPL (added) (history)
  • /trunk/WikiWord/CatGraph/pom.xml (added) (history)
  • /trunk/WikiWord/CatGraph/src (added) (history)
  • /trunk/WikiWord/CatGraph/src/main (added) (history)
  • /trunk/WikiWord/CatGraph/src/main/assembly (added) (history)
  • /trunk/WikiWord/CatGraph/src/main/java (added) (history)
  • /trunk/WikiWord/CatGraph/src/main/java/de (added) (history)
  • /trunk/WikiWord/CatGraph/src/main/java/de/wikimedia (added) (history)
  • /trunk/WikiWord/CatGraph/src/main/java/de/wikimedia/catgraph (added) (history)
  • /trunk/WikiWord/CatGraph/src/main/java/de/wikimedia/catgraph/CatGraph.java (added) (history)
  • /trunk/WikiWord/CatGraph/src/test (added) (history)
  • /trunk/WikiWord/CatGraph/src/test/java (added) (history)
  • /trunk/WikiWord/CatGraph/target (added) (history)

Diff [purge]

Index: trunk/WikiWord/CatGraph/.classpath
@@ -0,0 +1,16 @@
 2+<?xml version="1.0" encoding="UTF-8"?>
 3+<classpath>
 4+ <classpathentry kind="src" path="src/test/java"/>
 5+ <classpathentry kind="src" path="src/main/java"/>
 6+ <classpathentry kind="var" path="JRE_LIB" sourcepath="JRE_SRC"/>
 7+ <classpathentry combineaccessrules="false" kind="src" path="/BrightByteDB"/>
 8+ <classpathentry combineaccessrules="false" kind="src" path="/BrightByteUtil"/>
 9+ <classpathentry kind="var" path="M2_REPO/org/neo4j/neo4j-kernel/1.0/neo4j-kernel-1.0.jar">
 10+ <attributes>
 11+ <attribute name="javadoc_location" value="http://api.neo4j.org/current/"/>
 12+ </attributes>
 13+ </classpathentry>
 14+ <classpathentry kind="var" path="M2_REPO/org/apache/geronimo/specs/geronimo-jta_1.1_spec/1.1.1/geronimo-jta_1.1_spec-1.1.1.jar"/>
 15+ <classpathentry kind="var" path="M2_REPO/org/neo4j/neo4j-index/1.0/neo4j-index-1.0.jar"/>
 16+ <classpathentry kind="output" path="bin"/>
 17+</classpath>
Index: trunk/WikiWord/CatGraph/.project
@@ -0,0 +1,17 @@
 2+<?xml version="1.0" encoding="UTF-8"?>
 3+<projectDescription>
 4+ <name>CatGraph</name>
 5+ <comment></comment>
 6+ <projects>
 7+ </projects>
 8+ <buildSpec>
 9+ <buildCommand>
 10+ <name>org.eclipse.jdt.core.javabuilder</name>
 11+ <arguments>
 12+ </arguments>
 13+ </buildCommand>
 14+ </buildSpec>
 15+ <natures>
 16+ <nature>org.eclipse.jdt.core.javanature</nature>
 17+ </natures>
 18+</projectDescription>
Index: trunk/WikiWord/CatGraph/src/main/java/de/wikimedia/catgraph/CatGraph.java
@@ -0,0 +1,111 @@
 2+package de.wikimedia.catgraph;
 3+
 4+import java.io.File;
 5+import java.io.IOException;
 6+import java.sql.ResultSet;
 7+import java.sql.SQLException;
 8+
 9+import org.neo4j.graphdb.Direction;
 10+import org.neo4j.graphdb.GraphDatabaseService;
 11+import org.neo4j.graphdb.Node;
 12+import org.neo4j.graphdb.Relationship;
 13+import org.neo4j.graphdb.RelationshipType;
 14+import org.neo4j.graphdb.ReturnableEvaluator;
 15+import org.neo4j.graphdb.StopEvaluator;
 16+import org.neo4j.graphdb.Transaction;
 17+import org.neo4j.graphdb.Traverser;
 18+import org.neo4j.index.IndexService;
 19+import org.neo4j.index.lucene.LuceneIndexService;
 20+import org.neo4j.kernel.EmbeddedGraphDatabase;
 21+
 22+import de.brightbyte.db.DatabaseAccess;
 23+import de.brightbyte.db.DatabaseConnectionInfo;
 24+import de.brightbyte.db.DatabaseSchema;
 25+import de.brightbyte.io.IOUtil;
 26+
 27+public class CatGraph {
 28+ public enum CategoryRelationships implements RelationshipType
 29+ {
 30+ CONTAINS
 31+ }
 32+
 33+ private GraphDatabaseService graphDb;
 34+ private IndexService indexer;
 35+
 36+ public CatGraph(GraphDatabaseService graphDb, IndexService indexer) {
 37+ this.graphDb = graphDb;
 38+ this.indexer = indexer;
 39+ }
 40+
 41+ public void load(DatabaseAccess db, String sql) throws SQLException {
 42+ ResultSet rs = db.executeQuery("load graph", sql);
 43+ while (rs.next()) {
 44+ int from = rs.getInt(1);
 45+ int to = rs.getInt(2);
 46+
 47+ putArc(from ,to);
 48+ }
 49+ }
 50+
 51+ public Node getNodeByPageId(int pageId) {
 52+ return indexer.getSingleNode("page_id", pageId);
 53+ }
 54+
 55+ public Node aquireNodeByPageId(int pageId) {
 56+ Node n = getNodeByPageId(pageId);
 57+
 58+ if (n==null) {
 59+ n = graphDb.createNode();
 60+ n.setProperty("page_id", pageId);
 61+ indexer.index(n, "page_id", pageId);
 62+ }
 63+
 64+ return n;
 65+ }
 66+
 67+ public Relationship putArc(int from, int cat) {
 68+ return putArc( aquireNodeByPageId(from), aquireNodeByPageId(cat) );
 69+ }
 70+
 71+ public Relationship putArc(Node from, Node cat) {
 72+ Relationship relationship = cat.createRelationshipTo( from, CategoryRelationships.CONTAINS );
 73+ return relationship;
 74+ }
 75+
 76+ public void traverseAndDump(Node startNode) {
 77+ Traverser traverser = startNode.traverse( Traverser.Order.BREADTH_FIRST , StopEvaluator.END_OF_GRAPH, ReturnableEvaluator.ALL, CategoryRelationships.CONTAINS, Direction.OUTGOING );
 78+ for ( Node node : traverser )
 79+ {
 80+ System.out.println( node );
 81+ }
 82+ }
 83+
 84+ public static void main(String[] args) throws IOException, SQLException {
 85+ GraphDatabaseService graphDb = new EmbeddedGraphDatabase( args[0] );
 86+ DatabaseConnectionInfo dbInfo = new DatabaseConnectionInfo( new File(args[1]) );
 87+ String sql = IOUtil.slurp(new File(args[2]), "UTF-8");
 88+
 89+ IndexService indexer = new LuceneIndexService(graphDb);
 90+
 91+ DatabaseAccess db = new DatabaseSchema(null, dbInfo, null);
 92+
 93+ CatGraph graph = new CatGraph(graphDb, indexer);
 94+
 95+ Transaction tx = graphDb.beginTx();
 96+ try
 97+ {
 98+
 99+ graph.load(db, sql);
 100+
 101+ graph.traverseAndDump(graphDb.getReferenceNode());
 102+
 103+ tx.success();
 104+ }
 105+ finally
 106+ {
 107+ tx.finish();
 108+ }
 109+
 110+ System.out.println( "done" );
 111+ }
 112+}
Index: trunk/WikiWord/CatGraph/COPYING
@@ -0,0 +1,33 @@
 2+WikiWord is a system for automatically extracting a thesaurus from
 3+Wikipedia. It was developed by Daniel Kinzler in 2007-2009.
 4+
 5+Development started in 2007 as part of a master's thesis at the
 6+University of Leipzig, see <http://brightbyte.de/page/WikiWord>.
 7+
 8+Development was supported since 2009 by Wikimedia Deutschland e.V.
 9+
 10+ NOTE: This software is not released as a product. It was designed
 11+ for Wikimedia Deutschland's own use, and is made public as is, in
 12+ the hope it may be useful. Wikimedia Deutschland may at any time
 13+ discontinue developing or supporting this software. There is no
 14+ guarantee any new versions or even fixes for security issues will
 15+ be released.
 16+
 17+WikiWord was originally licensed under the GPL, with support of the
 18+University of Leipzig, and was released under the LGPL by its author in
 19+2009, in coordination with Wikimedia Deutschland. If you want to use it
 20+under some other license or condition, please contact the author at
 21+<http://brightbyte.de>.
 22+
 23+ This program is free software: you can redistribute it and/or modify
 24+ it under the terms of the GNU General Public License as published by
 25+ the Free Software Foundation, either version 3 of the License, or
 26+ (at your option) any later version.
 27+
 28+ This program is distributed in the hope that it will be useful,
 29+ but WITHOUT ANY WARRANTY; without even the implied warranty of
 30+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 31+ GNU General Public License for more details.
 32+
 33+ You should have received a copy of the GNU General Public License
 34+ along with this program. If not, see <http://www.gnu.org/licenses/>.
Index: trunk/WikiWord/CatGraph/pom.xml
@@ -0,0 +1,172 @@
 2+<project xmlns="http://maven.apache.org/POM/4.0.0"
 3+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 5+
 6+ <modelVersion>4.0.0</modelVersion>
 7+ <groupId>de.wikimedia</groupId>
 8+ <artifactId>CatGraph</artifactId>
 9+ <version>0.1</version>
 10+
 11+ <repositories>
 12+ <repository>
 13+ <id>neo4j-public-repository</id>
 14+ <url>http://m2.neo4j.org </url>
 15+ </repository>
 16+ </repositories>
 17+
 18+ <dependencies>
 19+ <dependency>
 20+ <groupId>de.brightbyte</groupId>
 21+ <artifactId>BrightByteUtil</artifactId>
 22+ <version>0.2</version>
 23+ <scope>compile</scope>
 24+ </dependency>
 25+ <dependency>
 26+ <groupId>de.brightbyte</groupId>
 27+ <artifactId>BrightByteDB</artifactId>
 28+ <version>0.2</version>
 29+ <scope>compile</scope>
 30+ </dependency>
 31+ <dependency>
 32+ <groupId>mysql</groupId>
 33+ <artifactId>mysql-connector-java</artifactId>
 34+ <version>3.1.11</version>
 35+ <scope>runtime</scope>
 36+ </dependency>
 37+ <dependency>
 38+ <groupId>junit</groupId>
 39+ <artifactId>junit</artifactId>
 40+ <version>3.8</version>
 41+ <scope>test</scope>
 42+ </dependency>
 43+ <dependency>
 44+ <groupId>org.neo4j</groupId>
 45+ <artifactId>neo4j-kernel</artifactId>
 46+ <version>1.0</version>
 47+ </dependency>
 48+ <dependency>
 49+ <groupId>org.neo4j</groupId>
 50+ <artifactId>neo4j-index</artifactId>
 51+ <version>1.0</version>
 52+ </dependency>
 53+ <dependency>
 54+ <groupId>org.neo4j</groupId>
 55+ <artifactId>neo4j-lucene-index</artifactId>
 56+ <version>1.0</version>
 57+ </dependency>
 58+ <dependency>
 59+ <groupId>org.neo4j</groupId>
 60+ <artifactId>neo4j-shell</artifactId>
 61+ <version>1.0</version>
 62+ </dependency>
 63+ <dependency>
 64+ <groupId>org.neo4j</groupId>
 65+ <artifactId>neo4j-utils</artifactId>
 66+ <version>1.0</version>
 67+ </dependency>
 68+ <dependency>
 69+ <groupId>org.neo4j</groupId>
 70+ <artifactId>neo4j-rest</artifactId>
 71+ <version>1.0</version>
 72+ </dependency>
 73+ <dependency>
 74+ <groupId>org.neo4j</groupId>
 75+ <artifactId>neo4j-traversal</artifactId>
 76+ <version>1.0</version>
 77+ </dependency>
 78+ <dependency>
 79+ <groupId>org.neo4j</groupId>
 80+ <artifactId>neo4j-graph-algo</artifactId>
 81+ <version>1.0</version>
 82+ </dependency>
 83+ </dependencies>
 84+
 85+ <build>
 86+ <plugins>
 87+ <plugin>
 88+ <groupId>org.apache.maven.plugins</groupId>
 89+ <artifactId>maven-compiler-plugin</artifactId>
 90+ <configuration>
 91+ <source>1.5</source>
 92+ <target>1.5</target>
 93+ </configuration>
 94+ </plugin>
 95+ <plugin>
 96+ <groupId>org.apache.maven.plugins</groupId>
 97+ <artifactId>maven-javadoc-plugin</artifactId>
 98+ <executions>
 99+ <execution>
 100+ <phase>package</phase>
 101+ </execution>
 102+ </executions>
 103+ </plugin>
 104+ <plugin>
 105+ <artifactId>maven-assembly-plugin</artifactId>
 106+ <configuration>
 107+ <descriptors>
 108+ <descriptor>src/main/assembly/bin-dep.xml</descriptor>
 109+ <descriptor>src/main/assembly/src.xml</descriptor>
 110+ </descriptors>
 111+ </configuration>
 112+ </plugin>
 113+ </plugins>
 114+
 115+ <resources>
 116+ <resource>
 117+ <filtering>false</filtering>
 118+ <directory>src/main/java</directory>
 119+ <includes>
 120+ <include>**</include>
 121+ </includes>
 122+ <excludes>
 123+ <exclude>**/*.java</exclude>
 124+ </excludes>
 125+ </resource>
 126+ </resources>
 127+ <testResources>
 128+ <testResource>
 129+ <filtering>false</filtering>
 130+ <directory>src/test/java</directory>
 131+ <includes>
 132+ <include>**</include>
 133+ </includes>
 134+ <excludes>
 135+ <exclude>**/*.java</exclude>
 136+ </excludes>
 137+ </testResource>
 138+ </testResources>
 139+ </build>
 140+
 141+ <reporting>
 142+ <plugins>
 143+ <!--<plugin>
 144+ <groupId>org.apache.maven.plugins</groupId>
 145+ <artifactId>maven-checkstyle-plugin</artifactId>
 146+ <configuration>
 147+ <configLocation>config/sun_checks.xml</configLocation>
 148+ </configuration>
 149+ </plugin>-->
 150+ <plugin>
 151+ <groupId>org.codehaus.mojo</groupId>
 152+ <artifactId>surefire-report-maven-plugin</artifactId>
 153+ </plugin>
 154+ <plugin>
 155+ <groupId>org.apache.maven.plugins</groupId>
 156+ <artifactId>maven-project-info-reports-plugin</artifactId>
 157+ <reportSets>
 158+ <reportSet>
 159+ <reports>
 160+ <report>index</report>
 161+ <report>summary</report>
 162+ <report>dependencies</report>
 163+ </reports>
 164+ </reportSet>
 165+ </reportSets>
 166+ </plugin>
 167+ <plugin>
 168+ <groupId>org.apache.maven.plugins</groupId>
 169+ <artifactId>maven-javadoc-plugin</artifactId>
 170+ </plugin>
 171+ </plugins>
 172+ </reporting>
 173+</project>
Index: trunk/WikiWord/CatGraph/LGPL
@@ -0,0 +1,165 @@
 2+ GNU LESSER GENERAL PUBLIC LICENSE
 3+ Version 3, 29 June 2007
 4+
 5+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
 6+ Everyone is permitted to copy and distribute verbatim copies
 7+ of this license document, but changing it is not allowed.
 8+
 9+
 10+ This version of the GNU Lesser General Public License incorporates
 11+the terms and conditions of version 3 of the GNU General Public
 12+License, supplemented by the additional permissions listed below.
 13+
 14+ 0. Additional Definitions.
 15+
 16+ As used herein, "this License" refers to version 3 of the GNU Lesser
 17+General Public License, and the "GNU GPL" refers to version 3 of the GNU
 18+General Public License.
 19+
 20+ "The Library" refers to a covered work governed by this License,
 21+other than an Application or a Combined Work as defined below.
 22+
 23+ An "Application" is any work that makes use of an interface provided
 24+by the Library, but which is not otherwise based on the Library.
 25+Defining a subclass of a class defined by the Library is deemed a mode
 26+of using an interface provided by the Library.
 27+
 28+ A "Combined Work" is a work produced by combining or linking an
 29+Application with the Library. The particular version of the Library
 30+with which the Combined Work was made is also called the "Linked
 31+Version".
 32+
 33+ The "Minimal Corresponding Source" for a Combined Work means the
 34+Corresponding Source for the Combined Work, excluding any source code
 35+for portions of the Combined Work that, considered in isolation, are
 36+based on the Application, and not on the Linked Version.
 37+
 38+ The "Corresponding Application Code" for a Combined Work means the
 39+object code and/or source code for the Application, including any data
 40+and utility programs needed for reproducing the Combined Work from the
 41+Application, but excluding the System Libraries of the Combined Work.
 42+
 43+ 1. Exception to Section 3 of the GNU GPL.
 44+
 45+ You may convey a covered work under sections 3 and 4 of this License
 46+without being bound by section 3 of the GNU GPL.
 47+
 48+ 2. Conveying Modified Versions.
 49+
 50+ If you modify a copy of the Library, and, in your modifications, a
 51+facility refers to a function or data to be supplied by an Application
 52+that uses the facility (other than as an argument passed when the
 53+facility is invoked), then you may convey a copy of the modified
 54+version:
 55+
 56+ a) under this License, provided that you make a good faith effort to
 57+ ensure that, in the event an Application does not supply the
 58+ function or data, the facility still operates, and performs
 59+ whatever part of its purpose remains meaningful, or
 60+
 61+ b) under the GNU GPL, with none of the additional permissions of
 62+ this License applicable to that copy.
 63+
 64+ 3. Object Code Incorporating Material from Library Header Files.
 65+
 66+ The object code form of an Application may incorporate material from
 67+a header file that is part of the Library. You may convey such object
 68+code under terms of your choice, provided that, if the incorporated
 69+material is not limited to numerical parameters, data structure
 70+layouts and accessors, or small macros, inline functions and templates
 71+(ten or fewer lines in length), you do both of the following:
 72+
 73+ a) Give prominent notice with each copy of the object code that the
 74+ Library is used in it and that the Library and its use are
 75+ covered by this License.
 76+
 77+ b) Accompany the object code with a copy of the GNU GPL and this license
 78+ document.
 79+
 80+ 4. Combined Works.
 81+
 82+ You may convey a Combined Work under terms of your choice that,
 83+taken together, effectively do not restrict modification of the
 84+portions of the Library contained in the Combined Work and reverse
 85+engineering for debugging such modifications, if you also do each of
 86+the following:
 87+
 88+ a) Give prominent notice with each copy of the Combined Work that
 89+ the Library is used in it and that the Library and its use are
 90+ covered by this License.
 91+
 92+ b) Accompany the Combined Work with a copy of the GNU GPL and this license
 93+ document.
 94+
 95+ c) For a Combined Work that displays copyright notices during
 96+ execution, include the copyright notice for the Library among
 97+ these notices, as well as a reference directing the user to the
 98+ copies of the GNU GPL and this license document.
 99+
 100+ d) Do one of the following:
 101+
 102+ 0) Convey the Minimal Corresponding Source under the terms of this
 103+ License, and the Corresponding Application Code in a form
 104+ suitable for, and under terms that permit, the user to
 105+ recombine or relink the Application with a modified version of
 106+ the Linked Version to produce a modified Combined Work, in the
 107+ manner specified by section 6 of the GNU GPL for conveying
 108+ Corresponding Source.
 109+
 110+ 1) Use a suitable shared library mechanism for linking with the
 111+ Library. A suitable mechanism is one that (a) uses at run time
 112+ a copy of the Library already present on the user's computer
 113+ system, and (b) will operate properly with a modified version
 114+ of the Library that is interface-compatible with the Linked
 115+ Version.
 116+
 117+ e) Provide Installation Information, but only if you would otherwise
 118+ be required to provide such information under section 6 of the
 119+ GNU GPL, and only to the extent that such information is
 120+ necessary to install and execute a modified version of the
 121+ Combined Work produced by recombining or relinking the
 122+ Application with a modified version of the Linked Version. (If
 123+ you use option 4d0, the Installation Information must accompany
 124+ the Minimal Corresponding Source and Corresponding Application
 125+ Code. If you use option 4d1, you must provide the Installation
 126+ Information in the manner specified by section 6 of the GNU GPL
 127+ for conveying Corresponding Source.)
 128+
 129+ 5. Combined Libraries.
 130+
 131+ You may place library facilities that are a work based on the
 132+Library side by side in a single library together with other library
 133+facilities that are not Applications and are not covered by this
 134+License, and convey such a combined library under terms of your
 135+choice, if you do both of the following:
 136+
 137+ a) Accompany the combined library with a copy of the same work based
 138+ on the Library, uncombined with any other library facilities,
 139+ conveyed under the terms of this License.
 140+
 141+ b) Give prominent notice with the combined library that part of it
 142+ is a work based on the Library, and explaining where to find the
 143+ accompanying uncombined form of the same work.
 144+
 145+ 6. Revised Versions of the GNU Lesser General Public License.
 146+
 147+ The Free Software Foundation may publish revised and/or new versions
 148+of the GNU Lesser General Public License from time to time. Such new
 149+versions will be similar in spirit to the present version, but may
 150+differ in detail to address new problems or concerns.
 151+
 152+ Each version is given a distinguishing version number. If the
 153+Library as you received it specifies that a certain numbered version
 154+of the GNU Lesser General Public License "or any later version"
 155+applies to it, you have the option of following the terms and
 156+conditions either of that published version or of any later version
 157+published by the Free Software Foundation. If the Library as you
 158+received it does not specify a version number of the GNU Lesser
 159+General Public License, you may choose any version of the GNU Lesser
 160+General Public License ever published by the Free Software Foundation.
 161+
 162+ If the Library as you received it specifies that a proxy can decide
 163+whether future versions of the GNU Lesser General Public License shall
 164+apply, that proxy's public statement of acceptance of any version is
 165+permanent authorization for you to choose that version for the
 166+Library.

Status & tagging log