r19629 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r19628‎ | r19629 | r19630 >
Date:14:40, 24 January 2007
Author:river
Status:old
Tags:
Comment:
import from SCCS
Modified paths:
  • /trunk/sixdeg (added) (history)
  • /trunk/sixdeg/Makefile (added) (history)
  • /trunk/sixdeg/liblinksc.cc (added) (history)
  • /trunk/sixdeg/linksc.cc (added) (history)
  • /trunk/sixdeg/linksc.h (added) (history)
  • /trunk/sixdeg/linksc_jni.cc (added) (history)
  • /trunk/sixdeg/linksd.cc (added) (history)
  • /trunk/sixdeg/mkcache.cc (added) (history)
  • /trunk/sixdeg/org (added) (history)
  • /trunk/sixdeg/org/wikimedia (added) (history)
  • /trunk/sixdeg/org/wikimedia/links (added) (history)
  • /trunk/sixdeg/org/wikimedia/links/ConnectException.java (added) (history)
  • /trunk/sixdeg/org/wikimedia/links/ErrorException.java (added) (history)
  • /trunk/sixdeg/org/wikimedia/links/NoFromArticleException.java (added) (history)
  • /trunk/sixdeg/org/wikimedia/links/NoToArticleException.java (added) (history)
  • /trunk/sixdeg/org/wikimedia/links/linksc.java (added) (history)
  • /trunk/sixdeg/python (added) (history)
  • /trunk/sixdeg/python/six_degrees (added) (history)
  • /trunk/sixdeg/webapp (added) (history)
  • /trunk/sixdeg/webapp/src (added) (history)
  • /trunk/sixdeg/webapp/src/6deg.png (added) (history)
  • /trunk/sixdeg/webapp/src/WEB-INF (added) (history)
  • /trunk/sixdeg/webapp/src/WEB-INF/lib (added) (history)
  • /trunk/sixdeg/webapp/src/WEB-INF/lib/jstl.jar (added) (history)
  • /trunk/sixdeg/webapp/src/WEB-INF/lib/linksc.jar (added) (history)
  • /trunk/sixdeg/webapp/src/WEB-INF/lib/standard.jar (added) (history)
  • /trunk/sixdeg/webapp/src/WEB-INF/web.xml (added) (history)
  • /trunk/sixdeg/webapp/src/index.jsp (added) (history)
  • /trunk/sixdeg/webapp/src/main.css (added) (history)
  • /trunk/sixdeg/webapp/src/sun.gif (added) (history)
  • /trunk/sixdeg/webapp/src/wikimedia-toolserver-button.png (added) (history)

Diff [purge]

Index: trunk/sixdeg/linksc.cc
@@ -0,0 +1,38 @@
 2+/*
 3+ * Six degrees of Wikipedia: Front-end client (command-line)
 4+ *
 5+ * Linux version: modified to use AF_LOCAL sockets.
 6+ */
 7+
 8+#pragma ident "@(#)linksc.cc 1.3 06/09/20 00:12:50"
 9+
 10+#include <iostream>
 11+#include <string>
 12+#include <vector>
 13+
 14+#include "linksc.h"
 15+
 16+int
 17+main(int argc, char *argv[])
 18+{
 19+ std::string src, dst;
 20+ std::getline(std::cin, src);
 21+ std::getline(std::cin, dst);
 22+ std::vector<std::string> result;
 23+ int status = linksc_findpath(result, src, dst);
 24+ switch (status) {
 25+ case 0:
 26+ std::cout << "ERROR\nNO_FROM\n";
 27+ return 0;
 28+ case 1:
 29+ std::cout << "ERROR\nNO_TO\n";
 30+ return 0;
 31+ case 3:
 32+ std::cout << "ERROR\nNO_CONNECT\n";
 33+ return 0;
 34+ }
 35+ std::cout << "OK\n";
 36+ for (int i = 0; i < result.size(); ++i)
 37+ std::cout << result[i] << '\n';
 38+}
 39+
Index: trunk/sixdeg/linksc_jni.cc
@@ -0,0 +1,50 @@
 2+/*
 3+ * Six degrees of Wikipedia: JNI client interface.
 4+ * This source code is released into the public domain.
 5+ */
 6+
 7+#pragma ident "@(#)linksc_jni.cc 1.1 05/11/21 21:00:23"
 8+
 9+#include <string>
 10+#include <vector>
 11+
 12+#include <jni.h>
 13+
 14+#include "linksc.h"
 15+
 16+#include "org_wikimedia_links_linksc.h"
 17+
 18+extern "C" JNIEXPORT jobjectArray JNICALL
 19+Java_org_wikimedia_links_linksc_findPath (JNIEnv *env, jobject o, jstring jfrom, jstring jto)
 20+{
 21+ std::string from, to;
 22+ from = env->GetStringUTFChars(jfrom, 0);
 23+ env->ReleaseStringUTFChars(jfrom, 0);
 24+ to = env->GetStringUTFChars(jto, 0);
 25+ env->ReleaseStringUTFChars(jto, 0);
 26+
 27+ std::vector<std::string> result;
 28+ jclass error;
 29+ jobjectArray resultarr;
 30+ int status = linksc_findpath(result, from, to);
 31+ const char *errorstr = "An unknown error occured";
 32+ switch (status) {
 33+ case 0: errorstr = "Source article does not exist."; break;
 34+ case 1: errorstr = "Target article does not exist."; break;
 35+ case 3: errorstr = "Could not connect to links server."; break;
 36+ }
 37+ if (status < 2 || status == 3) {
 38+ error = env->FindClass("org/wikimedia/links/ErrorException");
 39+ env->ThrowNew(error, errorstr);
 40+ return resultarr;
 41+ }
 42+
 43+ resultarr = (jobjectArray) env->NewObjectArray(result.size(),
 44+ env->FindClass("java/lang/String"), env->NewStringUTF(""));
 45+
 46+ for (int i = 0; i < result.size(); ++i) {
 47+ env->SetObjectArrayElement(resultarr, i, env->NewStringUTF(result[i].c_str()));
 48+ }
 49+ return resultarr;
 50+}
 51+
Index: trunk/sixdeg/org/wikimedia/links/NoFromArticleException.java
@@ -0,0 +1,13 @@
 2+/*
 3+ * Six degrees of Wikipedia: Java client.
 4+ * This source code is released into the public domain.
 5+ *
 6+ * $URL: file:///home/river/s2s/linksd/org/wikimedia/links/NoFromArticleException.java $ %E% %U%
 7+ */
 8+package org.wikimedia.links;
 9+
 10+public class NoFromArticleException extends ErrorException {
 11+ public NoFromArticleException() {
 12+ super("Source article does not exist.");
 13+ }
 14+}
Index: trunk/sixdeg/org/wikimedia/links/linksc.java
@@ -0,0 +1,27 @@
 2+/*
 3+ * Six degrees of Wikipedia: Java client.
 4+ * This source code is released into the public domain.
 5+ *
 6+ * @(#)linksc.java 1.1 05/11/21 21:02:32
 7+ */
 8+package org.wikimedia.links;
 9+
 10+public class linksc {
 11+ public native String[] findPath(String from, String to) throws ErrorException;
 12+ static {
 13+ System.loadLibrary("linksc_jni");
 14+ }
 15+
 16+ public static void main(String[] args) {
 17+ linksc c = new linksc();
 18+ String[] result = null;
 19+ try {
 20+ result = c.findPath(args[0], args[1]);
 21+ } catch (ErrorException e) {
 22+ System.out.printf("Error: %s\n", e.geterror());
 23+ return;
 24+ }
 25+ for (String s: result)
 26+ System.out.println(s);
 27+ }
 28+}
Index: trunk/sixdeg/org/wikimedia/links/ConnectException.java
@@ -0,0 +1,13 @@
 2+/*
 3+ * Six degrees of Wikipedia: Java client.
 4+ * This source code is released into the public domain.
 5+ *
 6+ * $URL: file:///home/river/s2s/linksd/org/wikimedia/links/ConnectException.java $ %E% %U%
 7+ */
 8+package org.wikimedia.links;
 9+
 10+public class ConnectException extends ErrorException {
 11+ public ConnectException() {
 12+ super("Could not connect to links server.");
 13+ }
 14+}
Index: trunk/sixdeg/org/wikimedia/links/ErrorException.java
@@ -0,0 +1,19 @@
 2+/*
 3+ * Six degrees of Wikipedia: Java client.
 4+ * This source code is released into the public domain.
 5+ *
 6+ * $URL: file:///home/river/s2s/linksd/org/wikimedia/links/ErrorException.java $ %E% %U%
 7+ */
 8+package org.wikimedia.links;
 9+
 10+public class ErrorException extends Exception {
 11+ String err;
 12+
 13+ public ErrorException(String what) {
 14+ err = what;
 15+ }
 16+
 17+ public String geterror() {
 18+ return err;
 19+ }
 20+}
Index: trunk/sixdeg/org/wikimedia/links/NoToArticleException.java
@@ -0,0 +1,13 @@
 2+/*
 3+ * Six degrees of Wikipedia: Java client.
 4+ * This source code is released into the public domain.
 5+ *
 6+ * $URL: file:///home/river/s2s/linksd/org/wikimedia/links/NoToArticleException.java $ %E% %U%
 7+ */
 8+package org.wikimedia.links;
 9+
 10+public class NoToArticleException extends ErrorException {
 11+ public NoToArticleException() {
 12+ super("Target article does not exist.");
 13+ }
 14+}
Index: trunk/sixdeg/linksd.cc
@@ -0,0 +1,354 @@
 2+/*
 3+ * Six degrees of Wikipedia: Server.
 4+ * This source code is released into the public domain.
 5+ *
 6+ * Linux version, modified to use AF_UNIX socket instead of doors 2006-09-20.
 7+ */
 8+
 9+#pragma ident "@(#)linksd.cc 1.3 07/01/24 14:14:48"
 10+
 11+#include <sys/types.h>
 12+#include <sys/socket.h>
 13+#include <sys/un.h>
 14+
 15+#include <iostream>
 16+#include <map>
 17+#include <list>
 18+#include <set>
 19+#include <vector>
 20+#include <cstdio>
 21+#include <cstdlib>
 22+#include <cstring>
 23+#include <cassert>
 24+#include <queue>
 25+#include <string>
 26+#include <algorithm>
 27+#include <utility>
 28+#include <fstream>
 29+#include <sstream>
 30+#include <exception>
 31+
 32+#include <unistd.h>
 33+#include <mysql.h>
 34+#include <fcntl.h>
 35+
 36+#include "linksc.h"
 37+
 38+static void handle_request(int s, char *argp, size_t argz);
 39+
 40+std::vector<std::string> names;
 41+std::map<std::string, int> ids;
 42+std::vector<int> isdate;
 43+std::vector<std::vector<int> > adjacency;
 44+
 45+/*
 46+ * Is this article a date?
 47+ */
 48+bool
 49+is_date(std::string name) {
 50+struct std::tm res;
 51+std::string::size_type t;
 52+bool a, b;
 53+ while ((t = name.find_first_of("_")) != std::string::npos)
 54+ name[t] = ' ';
 55+ std::memset(&res, 0, sizeof(res));
 56+ a = strptime(name.c_str(), "%b %d", &res) != NULL;
 57+ std::memset(&res, 0, sizeof(res));
 58+ b = strptime(name.c_str(), "%Y", &res) != NULL;
 59+
 60+ if (a || (b && name.length() <= 4 &&
 61+ /* wtf... strptime("%Y") on Solaris will return a "valid" year for
 62+ * *any* string of four or less characters */
 63+ name.find_first_not_of("0123456789") == std::string::npos))
 64+ return true;
 65+ else return false;
 66+}
 67+
 68+/*
 69+ * I didn't write this function. I don't even know if it works correctly :-). However,
 70+ * it seems to return the right results. (Credit: ZorbaTHut @ EFnet #c++)
 71+ */
 72+std::vector<int>
 73+findPath(int src, int dst, bool ign_date) {
 74+std::vector<int> back;
 75+std::deque<int> next;
 76+
 77+ back.clear();
 78+ back.resize(adjacency.size(), -1);
 79+ next.clear();
 80+ back.at(src) = -2;
 81+ next.push_back(src);
 82+
 83+ while (next.size()) {
 84+ int ts = next.at(0);
 85+ next.pop_front();
 86+
 87+ if (ts == dst) {
 88+ std::vector<int> path;
 89+ int lastlink = back[dst];
 90+ path.push_back(dst);
 91+
 92+ while (lastlink != -2) {
 93+ assert(lastlink != -1);
 94+ path.push_back(lastlink);
 95+ lastlink = back.at(lastlink);
 96+ }
 97+ std::reverse(path.begin(), path.end());
 98+ return path;
 99+ }
 100+
 101+ for (int i = 0; i < adjacency.at(ts).size(); i++) {
 102+ if (ign_date && isdate[adjacency.at(ts).at(i)])
 103+ continue;
 104+ if (back.at(adjacency.at(ts).at(i)) == -1) {
 105+ back.at(adjacency.at(ts).at(i)) = ts;
 106+ next.push_back(adjacency.at(ts).at(i));
 107+ }
 108+ }
 109+ }
 110+ return std::vector<int>();
 111+}
 112+
 113+static void *
 114+start_request(void *arg)
 115+{
 116+int s = (int)(uintptr_t)arg;
 117+uint32_t sz;
 118+int i;
 119+std::vector<char> buf;
 120+ if ((i = read(s, &sz, sizeof(sz))) < sizeof(sz)) {
 121+ if (i == -1)
 122+ std::perror("read");
 123+ close(s);
 124+ return 0;
 125+ }
 126+
 127+ try {
 128+ buf.resize(sz);
 129+ } catch (std::bad_alloc& e) {
 130+ std::cerr << "out of memory for client request!\n";
 131+ close(s);
 132+ return 0;
 133+ }
 134+
 135+ read(s, &buf[0], sz);
 136+ handle_request(s, &buf[0], buf.size());
 137+ close(s);
 138+ return 0;
 139+}
 140+
 141+static void
 142+handle_request(int s, char *argp, size_t argz)
 143+{
 144+ /*
 145+ * Data format:
 146+ * "<uint32>From<uint32>To"
 147+ * The ints contain the size of from and to, respectively.
 148+ *
 149+ * Result format:
 150+ * "<char><text...>"
 151+ * <char> might be:
 152+ * 0: From article did not exist
 153+ * 1: To article did not exist
 154+ * 2: No search result was found.
 155+ * 3: Some other error occured.
 156+ * 4: Result okay, data follows.
 157+ *
 158+ * In case of 3, the rest of the buffer contains a series of items:
 159+ * "<uint32><text>"
 160+ * where uint32 is the length of the next item, and text is the article name.
 161+ */
 162+ std::string from, to;
 163+ std::vector<char> result;
 164+ bool ign_date = false;
 165+ char *p = argp;
 166+ uint32_t l;
 167+ if (argz < 8) {
 168+ char err[4];
 169+ *(uint32_t *)err = 3;
 170+ result.resize(4);
 171+ result.assign(err, err + 4);
 172+ l = result.size();
 173+ write(s, &l, sizeof(l));
 174+ write(s, &result[0], result.size());
 175+ return;
 176+ }
 177+ l = *(uint32_t *)argp;
 178+ argz -= 4;
 179+ argp += 4;
 180+ if (l > argz) {
 181+ char err[4];
 182+ *(uint32_t *)err = 3;
 183+ result.resize(4);
 184+ result.assign(err, err + 4);
 185+ l = result.size();
 186+ write(s, &l, sizeof(l));
 187+ write(s, &result[0], result.size());
 188+ return;
 189+ }
 190+ /* This means ignore dates - it's ugly, but avoids an API change */
 191+ if (*argp == '#') {
 192+ from.assign(argp + 1, argp + l);
 193+ ign_date = true;
 194+ } else
 195+ from.assign(argp, argp + l);
 196+ argp += l;
 197+ argz -= l;
 198+ if (argz < 4) {
 199+ char err[4];
 200+ *(uint32_t *)err = 3;
 201+ result.resize(4);
 202+ result.assign(err, err + 4);
 203+ l = result.size();
 204+ write(s, &l, sizeof(l));
 205+ write(s, &result[0], result.size());
 206+ return;
 207+ }
 208+ l = *(uint32_t *)argp;
 209+ argp += 4;
 210+ if (l > argz) {
 211+ char err[4];
 212+ *(uint32_t *)err = 3;
 213+ result.resize(4);
 214+ result.assign(err, err + 4);
 215+ l = result.size();
 216+ write(s, &l, sizeof(l));
 217+ write(s, &result[0], result.size());
 218+ return;
 219+ }
 220+ to.assign(argp, argp + l);
 221+ int fromid, toid;
 222+ if (ids.find(from) == ids.end()) {
 223+ char err[4];
 224+ *(uint32_t *)err = 0;
 225+ result.resize(4);
 226+ result.assign(err, err + 4);
 227+ l = result.size();
 228+ write(s, &l, sizeof(l));
 229+ write(s, &result[0], result.size());
 230+ return;
 231+ }
 232+ fromid = ids[from];
 233+ if (ids.find(to) == ids.end()) {
 234+ char err[4];
 235+ *(uint32_t *)err = 1;
 236+ result.resize(4);
 237+ result.assign(err, err + 4);
 238+ l = result.size();
 239+ write(s, &l, sizeof(l));
 240+ write(s, &result[0], result.size());
 241+ return;
 242+ }
 243+ toid = ids[to];
 244+ std::vector<int> links = findPath(fromid, toid, ign_date);
 245+ result.resize(4);
 246+ *(uint32_t *)&result[0] = 3;
 247+ for (std::vector<int>::const_iterator it = links.begin(), end = links.end(); it != end; ++it)
 248+ {
 249+ std::string s = names.at(*it);
 250+ char len[4];
 251+ *(uint32_t*)len = s.size();
 252+ result.insert(result.end(), len, len + 4);
 253+ result.insert(result.end(), s.begin(), s.end());
 254+ }
 255+ l = result.size();
 256+ write(s, &l, sizeof(l));
 257+ write(s, &result[0], result.size());
 258+}
 259+
 260+int
 261+main(int argc, char *argv[])
 262+{
 263+ std::ifstream in(CACHE);
 264+ std::string l;
 265+ std::printf("retrieving links table...\n");
 266+ while (std::getline(in, l)) {
 267+ if (l.empty())
 268+ break;
 269+ int l_from, l_to;
 270+ std::istringstream str(l);
 271+ str >> l_from >> l_to;
 272+ if (l_from >= adjacency.size())
 273+ adjacency.resize(l_from + 1);
 274+ std::vector<int>& l = adjacency.at(l_from);
 275+ l.insert(l.end(), l_to);
 276+ }
 277+
 278+ std::printf("ok\n");
 279+ std::printf("retrieving titles...\n");
 280+ while (std::getline(in, l)) {
 281+ int l_id;
 282+ std::string l_ttl;
 283+ std::istringstream str(l);
 284+ str >> l_id;
 285+ std::getline(str, l_ttl);
 286+ while (!l_ttl.empty() && l_ttl[0] == ' ')
 287+ l_ttl.erase(l_ttl.begin());
 288+ if (l_id >= names.size()) {
 289+ names.resize(l_id + 1);
 290+ isdate.resize(l_id + 1);
 291+ }
 292+ if (is_date(l_ttl))
 293+ isdate[l_id] = 1;
 294+ else isdate[l_id] = 0;
 295+ names.at(l_id) = l_ttl;
 296+ ids[l_ttl] = l_id;
 297+ }
 298+ std::printf("ok, %d links, %d titles\n", adjacency.size(), names.size());
 299+ std::printf("filtering links...\n");
 300+ for (int i = 1; i < adjacency.size(); ++i) {
 301+ if (i >= names.size() || names[i].empty()) {
 302+ adjacency.at(i).clear();
 303+ continue;
 304+ }
 305+ for (std::vector<int>::iterator it = adjacency[i].begin(); it != adjacency[i].end();)
 306+ if (*it >= names.size() || names[*it].empty())
 307+ it = adjacency.at(i).erase(it);
 308+ else ++it;
 309+ }
 310+ std::printf("ok\n");
 311+
 312+ int did;
 313+ struct sockaddr_un addr;
 314+
 315+ if ((did = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) {
 316+ std::perror("socket");
 317+ std::exit(1);
 318+ }
 319+ unlink(DOOR);
 320+ std::memset(&addr, 0, sizeof(addr));
 321+ addr.sun_family = AF_LOCAL;
 322+ strncpy(addr.sun_path, DOOR, sizeof(addr.sun_path));
 323+ if (bind(did, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
 324+ std::perror("bind");
 325+ std::exit(1);
 326+ }
 327+
 328+ if (listen(did, 5) == -1) {
 329+ std::perror("listen");
 330+ std::exit(1);
 331+ }
 332+
 333+ for (;;) {
 334+ int cli;
 335+ struct sockaddr_un cliaddr;
 336+ size_t clilen;
 337+ clilen = sizeof(addr);
 338+ std::memset(&cliaddr, 0, clilen);
 339+ if ((cli = accept(did, (struct sockaddr *)&cliaddr, (socklen_t *)&clilen)) == -1) {
 340+ std::perror("accept");
 341+ std::exit(1);
 342+ }
 343+
 344+ pthread_attr_t attr;
 345+ pthread_attr_init(&attr);
 346+ pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
 347+ pthread_t tid;
 348+
 349+ if (pthread_create(&tid, &attr, start_request, (void *)cli) == -1) {
 350+ std::perror("pthread_create");
 351+ std::exit(1);
 352+ }
 353+ }
 354+ close(did);
 355+}
Index: trunk/sixdeg/linksc.h
@@ -0,0 +1,19 @@
 2+/*
 3+ * Six degrees of Wikipedia: Client interface definitions.
 4+ * This source code is released into the public domain.
 5+ */
 6+
 7+#ifndef LINKSC_H
 8+#define LINKSC_H
 9+
 10+#pragma ident "$URL: file:///home/river/s2s/linksd/linksc.h $ %E% %U%"
 11+
 12+#define DOOR "/home/river/.links.door"
 13+#define CACHE "/home/river/.links.cache"
 14+
 15+#include <vector>
 16+#include <string>
 17+
 18+int linksc_findpath(std::vector<std::string>& res, std::string from, std::string to);
 19+
 20+#endif
Index: trunk/sixdeg/Makefile
@@ -0,0 +1,55 @@
 2+# Six degrees of Wikipedia: Makefile
 3+# This source code is released into the public domain.
 4+#
 5+# $URL: file:///home/river/s2s/linksd/Makefile $ %E% %U%
 6+
 7+all: linksd linksc liblinksc_jni.so linksc.jar sixdeg.war mkcache
 8+
 9+# do not use -library=stlport4 for linksd! it's broken because of a sun studio 11 bug
 10+linksd: linksd.cc linksc.h
 11+ g++ -D_REENTRANT -g -O2 -I/usr/include/mysql -lmysqlclient -lz linksd.cc -o linksd
 12+linksc: linksc.o liblinksc.o linksc.h
 13+ g++ -O2 linksc.o liblinksc.o -o linksc
 14+linksc.o: linksc.cc linksc.h
 15+ g++ -O2 -c linksc.cc
 16+mkcache: mkcache.cc linksc.h
 17+ g++ -O2 mkcache.cc -o mkcache -I/usr/include/mysql -L/opt/mysql50/lib/mysql -lmysqlclient -lz
 18+liblinksc.o: liblinksc.cc linksc.h
 19+ g++ -fPIC -O2 -c liblinksc.cc
 20+
 21+liblinksc_jni.so: org/wikimedia/links/linksc.class liblinksc.o linksc_jni.cc
 22+ javah -classpath . org.wikimedia.links.linksc
 23+ g++ -fPIC -O2 -shared -I/usr/lib/j2sdk1.5-sun/include -I/usr/lib/j2sdk1.5-sun/include/linux \
 24+ linksc_jni.cc liblinksc.o -o liblinksc_jni.so
 25+
 26+org/wikimedia/links/linksc.class: org/wikimedia/links/linksc.java org/wikimedia/links/ErrorException.java
 27+ javac org/wikimedia/links/linksc.java
 28+org/wikimedia/links/NoFromArticleException.class: org/wikimedia/links/NoFromArticleException.java
 29+ javac org/wikimedia/links/NoFromArticleException.java
 30+org/wikimedia/links/NoToArticleException.class: org/wikimedia/links/NoToArticleException.java
 31+ javac org/wikimedia/links/NoToArticleException.java
 32+org/wikimedia/links/ConnectException.class: org/wikimedia/links/ConnectException.java
 33+ javac org/wikimedia/links/ConnectException.java
 34+org/wikimedia/links/ErrorException.class: org/wikimedia/links/ErrorException.java
 35+ javac org/wikimedia/links/ErrorException.java
 36+
 37+linksc.jar: org/wikimedia/links/linksc.class org/wikimedia/links/NoFromArticleException.class org/wikimedia/links/NoToArticleException.class org/wikimedia/links/ConnectException.class org/wikimedia/links/ErrorException.class
 38+ rm -f linksc.jar
 39+ zip -r linksc.jar org/wikimedia/links/linksc.class org/wikimedia/links/NoFromArticleException.class org/wikimedia/links/NoToArticleException.class org/wikimedia/links/ConnectException.class org/wikimedia/links/ErrorException.class
 40+
 41+clean:
 42+ rm -f linksc linksd org/wikimedia/links/*.class linksc_jni.so linkc.jar *.so *.o mkcache org_wikimedia_links_linksc.h sixdeg.war linksc.jar core
 43+
 44+sixdeg.war: linksc.jar webapp/src/index.jsp liblinksc_jni.so webapp/src/main.css webapp/src/6deg.png webapp/src/WEB-INF/web.xml
 45+ rm -f sixdeg.war
 46+ (cd webapp/src; zip -Dr ../../sixdeg.war . -x `find . | grep '\.svn'`)
 47+testdeploy: sixdeg.war
 48+ /opt/SUNWappserver/appserver/bin/asadmin deploy --user kate --port 7070 --force --precompilejsp --contextroot sdtest --name sdtest sixdeg.war
 49+deploy: sixdeg.war
 50+ /opt/SUNWappserver/appserver/bin/asadmin deploy --user kate --port 7070 --force --precompilejsp sixdeg.war
 51+dist:
 52+ $(MAKE) clean
 53+ (cd .. && find linksd -name '.*') | tail +1l >../exclude
 54+ cd .. && tar cvfX linksd.tar exclude linksd
 55+ cd .. && gzip -f linksd.tar
 56+.KEEP_STATE:
Index: trunk/sixdeg/liblinksc.cc
@@ -0,0 +1,124 @@
 2+/*
 3+ * Six degrees of Wikipedia: Client library.
 4+ */
 5+
 6+#pragma ident "@(#)liblinksc.cc 1.3 06/09/20 00:47:26"
 7+
 8+#include <sys/types.h>
 9+#include <sys/socket.h>
 10+#include <sys/un.h>
 11+#include <sys/stat.h>
 12+#include <sys/mman.h>
 13+
 14+#include <iostream>
 15+#include <cstdio>
 16+#include <cstdlib>
 17+#include <cstring>
 18+#include <string>
 19+#include <vector>
 20+
 21+#include <unistd.h>
 22+#include <fcntl.h>
 23+
 24+#include "linksc.h"
 25+
 26+int
 27+linksc_findpath(std::vector<std::string>& result, std::string src, std::string dst)
 28+{
 29+ std::vector<char> data;
 30+ int s, l;
 31+ struct sockaddr_un addr;
 32+
 33+ std::memset(&addr, 0, sizeof(addr));
 34+ addr.sun_family = AF_LOCAL;
 35+ std::strcpy(addr.sun_path, DOOR);
 36+ if ((s = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) {
 37+perror("socket");
 38+ return 3;
 39+ }
 40+ if (connect(s, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
 41+perror("connect");
 42+ close(s);
 43+ return 3;
 44+ }
 45+
 46+ uint32_t i;
 47+ i = src.size();
 48+ data.resize(4);
 49+ *(uint32_t*)&data[0] = i;
 50+ data.insert(data.end(), src.begin(), src.end());
 51+
 52+ i = dst.size();
 53+ data.resize(data.size() + 4);
 54+ *(uint32_t*)&data[data.size() - 4] = i;
 55+ data.insert(data.end(), dst.begin(), dst.end());
 56+
 57+ i = data.size();
 58+ if (write(s, &i, sizeof(i)) < sizeof(i)) {
 59+ close(s);
 60+ return 3;
 61+ }
 62+
 63+ if ((l = write(s, &data[0], data.size())) < data.size()) {
 64+ close(s);
 65+ return 3;
 66+ }
 67+
 68+ if (read(s, &i, sizeof(i)) < sizeof(i)) {
 69+ close(s);
 70+ return 3;
 71+ }
 72+
 73+ try {
 74+ data.resize(i);
 75+ } catch (std::bad_alloc&) {
 76+ close(s);
 77+ return 3;
 78+ }
 79+
 80+ if (read(s, &data[0], i) < i) {
 81+ close(s);
 82+ return 3;
 83+ }
 84+
 85+ if (i < 4) {
 86+ close(s);
 87+ return 3;
 88+ }
 89+ char *buf = &data[0];
 90+ void *oaddr = &data[0];
 91+ size_t osize = i;
 92+ uint32_t status = *(uint32_t*)&data[0];
 93+ result.resize(0);
 94+ switch (status) {
 95+ case 0:
 96+ close(s);
 97+ return 0;
 98+ case 1:
 99+ close(s);
 100+ return 1;
 101+ case 2:
 102+ close(s);
 103+ return 3;
 104+ case 3:
 105+ break;
 106+ default:
 107+ close(s);
 108+ return 2;
 109+ }
 110+ i -= 4;
 111+ buf += 4;
 112+ while (i) {
 113+ std::string h;
 114+ status = *(uint32_t*)buf;
 115+ i -= 4;
 116+ buf += 4;
 117+ h.assign(buf, status);
 118+ i -= status;
 119+ buf += status;
 120+ result.insert(result.end(), h);
 121+ }
 122+ close(s);
 123+ return 4;
 124+}
 125+
Index: trunk/sixdeg/webapp/src/wikimedia-toolserver-button.png
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
Property changes on: trunk/sixdeg/webapp/src/wikimedia-toolserver-button.png
___________________________________________________________________
Added: svn:mime-type
1126 + application/octet-stream
Index: trunk/sixdeg/webapp/src/index.jsp
@@ -0,0 +1,135 @@
 2+<%--
 3+ Six degrees of Wikipedia: JSP front-end.
 4+ This source code is released into the public domain.
 5+
 6+ @(#)index.jsp 1.19 06/10/16 01:17:11
 7+--%>
 8+<?xml version="1.0" encoding="UTF-8"?>
 9+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 10+<%@ page language="java" contentType="text/html; charset=UTF-8"%>
 11+<%@ taglib prefix="c" uri="http://java.sun.com/jsp/jstl/core" %>
 12+<%@ taglib prefix="fn" uri="http://java.sun.com/jsp/jstl/functions" %>
 13+<%
 14+String[] path = null;
 15+String error = null;
 16+String from = request.getParameter("from"), to = request.getParameter("to");
 17+org.wikimedia.links.linksc lc = new org.wikimedia.links.linksc();
 18+
 19+if (from != null)
 20+ from = from.trim();
 21+if (to != null)
 22+ to = to.trim();
 23+
 24+String enc = request.getCharacterEncoding();
 25+if (enc == null) enc = "<undefined>";
 26+pageContext.setAttribute("encoding", enc);
 27+
 28+if (request.getCharacterEncoding() == null) {
 29+ if (from != null) from = new String(from.getBytes("ISO-8859-1"), "UTF-8");
 30+ if (to != null) to = new String(to.getBytes("ISO-8859-1"), "UTF-8");
 31+}
 32+
 33+if (from != null && from.length() > 0 && to != null && to.length() > 0) {
 34+ String idp = request.getParameter("ign_dates");
 35+ boolean ign_date = idp != null && idp.equals("1");
 36+ String rfrom = from.substring(0, 1).toUpperCase() + from.substring(1, from.length());
 37+ String rto = to.substring(0, 1).toUpperCase() + to.substring(1, to.length());
 38+ if (ign_date)
 39+ rfrom = "#" + rfrom;
 40+ try {
 41+ path = lc.findPath(rfrom.replaceAll(" ", "_"), rto.replaceAll(" ", "_"));
 42+ } catch (org.wikimedia.links.ErrorException e) {
 43+ error = e.geterror();
 44+ }
 45+}
 46+if (path != null && path.length == 0)
 47+ error = "No route found after 10 degrees.";
 48+
 49+pageContext.setAttribute("error", error);
 50+pageContext.setAttribute("path", path);
 51+pageContext.setAttribute("from", from);
 52+pageContext.setAttribute("to", to);
 53+if (path != null) {
 54+ pageContext.setAttribute("len", Integer.valueOf(path.length - 1));
 55+} else {
 56+ pageContext.setAttribute("len", Integer.valueOf(0));
 57+}
 58+%>
 59+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
 60+<head>
 61+<title>Six degrees of Wikipedia</title>
 62+<meta name="robots" content="index" />
 63+<meta name="robots" content="follow" />
 64+<link rel="stylesheet" href="main.css" />
 65+</head>
 66+<body>
 67+<div style="text-align: right; padding: 0px; margin: 0px"><img src="6deg.png" alt="" /><br/></div>
 68+<div style="text-align: center">
 69+<i>
 70+a <a href="http://en.wikipedia.org/wiki/Shortest_path">shortest path</a>
 71+query solver for the English
 72+<a href="http://en.wikipedia.org/wiki/Main_Page">Wikipedia</a>...</i><br/>
 73+<i>six degrees</i> finds the shortest path between any two Wikipedia articles in the
 74+main namespace using wiki links
 75+</div>
 76+
 77+<div style="padding-top: 35px;">
 78+<form method="get" action="index.jsp" accept-charset="UTF-8">
 79+<center>
 80+<strong>find path...</strong>
 81+from: <input type="text" name="from" value="<c:out value="${from}"/>"/>
 82+to: <input type="text" name="to" value="<c:out value="${to}"/>" />
 83+<input type="submit" value="go" />
 84+<br />
 85+<input type="checkbox" name="ign_dates" value="1" /> ignore date and year articles
 86+</center>
 87+</form>
 88+
 89+<% if (error != null) { %>
 90+<center><div class='error'><span class='error'>error:</span><span class='errtext'><c:out value="${error}" /></span></div></center>
 91+<% if (error.equals("No route found after 10 degrees.")) { %>
 92+<p class='return'><a
 93+href="index.jsp?from=<c:out value='${fn:replace(to, " ", "_")}'/>&amp;to=<c:out value='${fn:replace(from, " ", "_")}'/>"
 94+>Try in the other direction?</a></p>
 95+<% } %>
 96+<% } else if (path != null) { %>
 97+<div class='result'>
 98+<div class='answer'><c:out value="${len}"/> degrees of separation</div>
 99+<c:forEach items="${path}" var="hop">
 100+ <span class="art"><a href="http://en.wikipedia.org/wiki/<c:out value="${hop}"/>"><c:out value='${fn:replace(hop, "_", " ")}'/></a></span><br/>
 101+</c:forEach>
 102+</div>
 103+<p class='return'><a href="index.jsp?from=<c:out value='${fn:replace(to, " ", "_")}'/>&amp;to=<c:out value='${fn:replace(from, " ", "_")}'/>">View the return path?</a></p>
 104+<% } %>
 105+<div style="width: 50%; margin-left: auto; margin-right: auto; border-top: solid 1px black; margin-top: 3em">
 106+<div style="text-align: center">
 107+<strong>hints:</strong>
 108+</div>
 109+<ul>
 110+<li><em>it says my article doesn't exist?</em> - this is usually caused by the article being created later the last database update (often several months ago). alternatively, check your capitalisation.</li>
 111+<li>please <em>do</em> report any other problems with six degrees to me
 112+[<tt>river</tt> (at) <tt>attenuate</tt> (dot) <tt>org</tt>].</li>
 113+<li>redirects are searched as well as articles</li>
 114+<li>using a redirect as the target will generally produce an inferior result (be careful: "United Kingdom" is not a redirect, but "United kingdom" is)</li>
 115+<li>article names are case sensitive except for the first letter, which is always capital</li>
 116+<li>six degrees was recently <a href="http://tools.wikimedia.de/~river/pages/six-degrees-ct">mentioned</a> in the
 117+ German computer magazine <i>c't</i>. fame! who'd've thought it ;-)</li>
 118+</ul>
 119+</div>
 120+</div>
 121+
 122+</a><a href="https://www.mediawiki.org/"><img
 123+ src="wikimedia-toolserver-button.png" style="float: right"
 124+ alt="Hosted by Wikimedia Toolserver" /></a>
 125+<a href="http://www.sun.com/"><img
 126+ style="float: right" src="sun.gif"
 127+ alt = "Powered by Sun Microsystems" /></a>
 128+<p>
 129+<a href="http://tools.wikimedia.de/~river/pages/projects/six-degrees">source code</a> |
 130+<a href="mailto:river@attenuate.org">send feedback...</a><br />
 131+i'm poor. if you like <i>six degrees</i>, feel free to <a href="http://www.paypal.com/"
 132+>PayPal</a> some money to [<tt>river</tt> (at) <tt>attenuate</tt> (dot) <tt>org</tt>].</p>
 133+<span class='version'>Front-end version: @(#)index.jsp 1.19 06/10/16 01:17:11 [request encoding: <c:out value="${encoding}" />]
 134+</div>
 135+</body>
 136+</html>
Index: trunk/sixdeg/webapp/src/WEB-INF/lib/standard.jar
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
Property changes on: trunk/sixdeg/webapp/src/WEB-INF/lib/standard.jar
___________________________________________________________________
Added: svn:mime-type
1137 + application/octet-stream
Index: trunk/sixdeg/webapp/src/WEB-INF/lib/linksc.jar
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
Property changes on: trunk/sixdeg/webapp/src/WEB-INF/lib/linksc.jar
___________________________________________________________________
Added: svn:mime-type
2138 + application/octet-stream
Index: trunk/sixdeg/webapp/src/WEB-INF/lib/jstl.jar
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
Property changes on: trunk/sixdeg/webapp/src/WEB-INF/lib/jstl.jar
___________________________________________________________________
Added: svn:mime-type
3139 + application/octet-stream
Index: trunk/sixdeg/webapp/src/WEB-INF/web.xml
@@ -0,0 +1,10 @@
 2+<?xml version="1.0" encoding="UTF-8"?>
 3+<web-app xmlns="http://java.sun.com/xml/ns/j2ee"
 4+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 5+ xsi:schemaLocation="http://java.sun.com/xml/ns/j2ee http://java.sun.com/xml/ns/j2ee/web-app_2_4.xsd"
 6+ version="2.4">
 7+
 8+ <description>Six degrees of Wikipedia</description>
 9+ <display-name>Six degrees of Wikipedia</display-name>
 10+
 11+</web-app>
Index: trunk/sixdeg/webapp/src/6deg.png
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
Property changes on: trunk/sixdeg/webapp/src/6deg.png
___________________________________________________________________
Added: svn:mime-type
112 + application/octet-stream
Index: trunk/sixdeg/webapp/src/sun.gif
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
Property changes on: trunk/sixdeg/webapp/src/sun.gif
___________________________________________________________________
Added: svn:mime-type
213 + application/octet-stream
Index: trunk/sixdeg/webapp/src/main.css
@@ -0,0 +1,61 @@
 2+/*
 3+ * Six degrees of Wikipedia: JSP front-end styling.
 4+ * This source code is released into the public domain.
 5+ *
 6+ * @(#)main.css 1.4 05/11/21 23:40:14
 7+ */
 8+span.link {
 9+ color: #999999;
 10+}
 11+body {
 12+ background-color: white;
 13+ padding-right: 0px;
 14+ margin-right: 0px;
 15+}
 16+input { border: solid 1px blue; background: white; color: black; }
 17+div.error {
 18+ display: inline;
 19+ border: solid 1px #ff8888;
 20+}
 21+span.error {
 22+ padding-left: 1em;
 23+ padding-right: 1em;
 24+ background-color: #ffdcdc;
 25+ font-weight: bold;
 26+ border-right: solid 1px #ff8888;
 27+}
 28+span.errtext {
 29+ padding-left: 1em;
 30+ padding-right: 1em;
 31+}
 32+div.result {
 33+ border: solid 1px #0000ff;
 34+ width: 75%;
 35+ margin-left: auto; margin-right: auto;
 36+ text-align: center;
 37+ padding: 0em 0 0.5em 0;
 38+}
 39+div.answer {
 40+ width: 100%;
 41+ text-align: center;
 42+ border-bottom: solid 1px #0000ff;
 43+ background-color: #ddddff;
 44+ margin-bottom: 0.5em;
 45+}
 46+span.art {
 47+}
 48+a {
 49+ color: #0000C0;
 50+}
 51+a:hover {
 52+ text-decoration: underline;
 53+}
 54+img {
 55+ border: none;
 56+}
 57+.return {
 58+ text-align: center;
 59+}
 60+span.version {
 61+ color: #999999;
 62+}
Index: trunk/sixdeg/mkcache.cc
@@ -0,0 +1,77 @@
 2+/*
 3+ * Six degrees of Wikipedia: Database cacher.
 4+ * This source code is released into the public domain.
 5+ */
 6+
 7+#pragma ident "@(#)mkcache.cc 1.1 05/11/21 21:00:29"
 8+
 9+#include <iostream>
 10+#include <cstdio>
 11+#include <cstdlib>
 12+#include <cstring>
 13+#include <string>
 14+#include <fstream>
 15+#include <cerrno>
 16+
 17+#include <unistd.h>
 18+#include <mysql.h>
 19+
 20+#include "linksc.h"
 21+
 22+void
 23+mysql_query_ordie(MYSQL* mysql, char const *query)
 24+{
 25+ int i = mysql_query(mysql, query);
 26+ if (i) {
 27+ std::printf("mysql query failed: %s\n", mysql_error(mysql));
 28+ std::exit(8);
 29+ }
 30+}
 31+int
 32+main(int argc, char *argv[])
 33+{
 34+ MYSQL mysql;
 35+ mysql_init(&mysql);
 36+ mysql_options(&mysql, MYSQL_READ_DEFAULT_GROUP, "linksd");
 37+
 38+ if (!mysql_real_connect(&mysql, NULL, NULL, NULL, argv[1], 0, NULL, 0)) {
 39+ std::printf("mysql connect error: %s\n", mysql_error(&mysql));
 40+ return 1;
 41+ }
 42+
 43+ unlink(CACHE);
 44+ std::ofstream out(CACHE);
 45+ if (!out.good()) {
 46+ std::printf("Cannot open cache file: %s\n", std::strerror(errno));
 47+ return 1;
 48+ }
 49+
 50+ std::printf("retrieving links table...\n");
 51+ mysql_query_ordie(&mysql, "SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED");
 52+ mysql_query_ordie(&mysql,
 53+ "SELECT page_id, pl_from FROM pagelinks,page "
 54+ "WHERE pl_title=page_title and pl_namespace=page_namespace and page_namespace=0");
 55+ MYSQL_RES *res = mysql_use_result(&mysql);
 56+
 57+ MYSQL_ROW arow;
 58+ int i = 0;
 59+ while (arow = mysql_fetch_row(res)) {
 60+ if ((i++ % 10000) == 0)
 61+ std::printf("%d...\n", i - 1);
 62+ out << arow[1] << ' ' << arow[0] << '\n';
 63+ }
 64+ mysql_free_result(res);
 65+ out << '\n';
 66+
 67+ std::printf("ok\n");
 68+ std::printf("retrieving titles...\n");
 69+ mysql_query_ordie(&mysql, "SELECT page_title,page_id FROM page WHERE page_namespace=0");
 70+ res = mysql_use_result(&mysql);
 71+ while (arow = mysql_fetch_row(res)) {
 72+ out << arow[1] << ' ' << arow[0] << '\n';
 73+ }
 74+ mysql_free_result(res);
 75+ mysql_close(&mysql);
 76+ out.close();
 77+ std::printf("all done\n");
 78+}
Index: trunk/sixdeg/python/six_degrees
@@ -0,0 +1,286 @@
 2+#! /usr/sfw/bin/python
 3+#
 4+# Six degrees of Wikipedia: Python front-end.
 5+# This source code is released into the public domain.
 6+#
 7+# @(#)six_degrees 1.1 05/11/21 21:00:39
 8+
 9+
 10+
 11+#import cgitb; cgitb.enable()
 12+
 13+execfile("/u01/u/kate/.pydb.cf", globals())
 14+
 15+import MySQLdb
 16+import sys
 17+import cgi
 18+import os
 19+import zlib
 20+import re
 21+
 22+print "Content-Type: text/html; charset=UTF-8"
 23+print
 24+
 25+db = MySQLdb.connect(db="enwiki", host=dbserver, user=dbuser, passwd=dbpassword)
 26+
 27+def getlink(fro, to):
 28+ fro = fro.strip()
 29+ to = to.strip()
 30+ result = "<couldn't find link [%s] -> [%s]...>" % (fro, to)
 31+ c = db.cursor()
 32+ c.execute("SELECT old_text, old_flags FROM page, revision, text WHERE page_namespace=0 AND page_title=%s AND page_latest=rev_id AND old_id=rev_text_id", fro.strip().replace(' ', '_'))
 33+ t = c.fetchone()
 34+ try:
 35+ if t != None:
 36+ text = "".join(t[0])
 37+ flags = "".join(t[1])
 38+ if flags.find('gzip') != -1:
 39+ text = zlib.decompress(text, -15)
 40+ reg = r'\[\['+re.escape(to).replace('\_', '[_ ]') #+r'\]\]'
 41+ r = re.compile(reg, re.I)
 42+ res = r.search(text)
 43+ if res != None:
 44+ start = res.start() - 40
 45+ end = res.end() + 40
 46+ if start < 0:
 47+ start = 0
 48+ if end >= len(text):
 49+ end = len(text) - 1
 50+ result = "..." + text[start:end] + "..."
 51+ else:
 52+ result = "<no text found [%s]...>" % reg
 53+ except zlib.error, err:
 54+ result = "<couldn't decompress text: %s...>" % err
 55+ return cgi.escape(result)
 56+
 57+form = cgi.FieldStorage()
 58+
 59+fromval = ''
 60+toval = ''
 61+
 62+def safe(s):
 63+ return cgi.escape(s).replace('"', "&quot;").replace("'", "&apos;")
 64+
 65+if form.has_key('from'):
 66+ fromval = safe(form['from'].value)
 67+if form.has_key('to'):
 68+ toval = safe(form['to'].value)
 69+
 70+print """
 71+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 72+<html>
 73+<head>
 74+<title>six degrees of wikipedia</title>
 75+<style type="text/css">
 76+span.link {
 77+ color: #999999;
 78+}
 79+body {
 80+ background-color: white;
 81+ padding-right: 0px;
 82+ margin-right: 0px;
 83+}
 84+input { border: solid 1px blue; background: white; color: black; }
 85+div.error {
 86+ display: inline;
 87+ border: solid 1px #ff8888;
 88+}
 89+span.error {
 90+ padding-left: 1em;
 91+ padding-right: 1em;
 92+ background-color: #ffdcdc;
 93+ font-weight: bold;
 94+ border-right: solid 1px #ff8888;
 95+}
 96+span.errtext {
 97+ padding-left: 1em;
 98+ padding-right: 1em;
 99+}
 100+div.result {
 101+ border: solid 1px #0000ff;
 102+ width: 75%;
 103+ margin-left: auto; margin-right: auto;
 104+ text-align: center;
 105+}
 106+div.answer {
 107+ width: 100%;
 108+ text-align: center;
 109+ border-bottom: solid 1px #0000ff;
 110+ background-color: #ddddff;
 111+}
 112+span.art {
 113+}
 114+a {
 115+ color: #0000C0;
 116+}
 117+a:hover {
 118+ text-decoration: underline;
 119+}
 120+</style>
 121+</head>
 122+<body>
 123+<div style="text-align: right; padding: 0px; margin: 0px"><img src="/~kate/6deg.png" alt="" /><br/></div>
 124+<div style="text-align: center">
 125+<i>
 126+<a href="http://en.wikipedia.org/wiki/Iterative_deepening_depth-first_search">iterative deepening</a>
 127+<a href="http://en.wikipedia.org/wiki/Depth-first_search">depth first search</a>
 128+<a href="http://en.wikipedia.org/wiki/Shortest_path_problem">shortest path</a>
 129+query tool for <a href="http://en.wikipedia.org/wiki/Main_Page">Wikipedia</a>...</i><br/>
 130+<i>six degrees</i> finds the shortest path between any two Wikipedia articles in the
 131+main namespace using wiki links
 132+</div>
 133+<div style="padding-top: 35px;">
 134+<form method="get" action="six_degrees">
 135+<center>
 136+<strong>find path...</strong>
 137+from: <input type="text" name="from" value=\"""" + fromval + """\"/>
 138+to: <input type="text" name="to" value=\"""" + toval + """\" />
 139+<input type="submit" value="go" />
 140+</center>
 141+</form>
 142+"""
 143+
 144+def mklink(art):
 145+ a2 = art.replace('_', ' ')
 146+ return "<a href=\"http://en.wikipedia.org/wiki/" + safe(art) + "\">" + safe(a2) + "</a>"
 147+
 148+def chop(s):
 149+ if s[-1:] == '\n':
 150+ return s[:-1]
 151+ else:
 152+ return s
 153+
 154+def getrecent():
 155+ try:
 156+ f = file("/u01/u/kate/rcq", "r")
 157+ except IOError:
 158+ return ''
 159+ i = []
 160+ while True:
 161+ s,t,d = f.readline(), f.readline(), f.readline()
 162+ s = chop(s)
 163+ t = chop(t)
 164+
 165+ if '' in [s,t,d]:
 166+ break
 167+ i = i + [[s,t,d]]
 168+ i.reverse()
 169+ s = ''
 170+ for t in i[:10]:
 171+ s = s + '<li>' + mklink(t[0]) + ' -> ' + mklink(t[1]) + (" (%d hops)" % (int(t[2]) - 1))
 172+ return s
 173+
 174+footer = """
 175+<div style="width: 50%; margin-left: auto; margin-right: auto; border-top: solid 1px black; margin-top: 3em">
 176+<div style="text-align: center">
 177+<strong>notes:</strong>
 178+</div>
 179+<ul>
 180+<li>redirects are searched as well as articles</li>
 181+<li>using a redirect as the target will generally produce an inferior result</li>
 182+<li>article names are case sensitive except for the first letter, which is always capital</li>
 183+<li>removing year articles is on the TODO list</li>
 184+<li>it may take a while to process your query at times of high load. i am working on making the links server more
 185+efficient...</li>
 186+<li>six degrees was recently <a href="http://tools.wikimedia.de/~kate/pages/six-degrees-ct">mentioned</a> in the
 187+German computer magazine <i>c't</i>. fame! who'd've thought it ;-)</li>
 188+</ul>
 189+</div>
 190+</div>
 191+"""
 192+
 193+rc = getrecent()
 194+if rc != '':
 195+ footer = footer + """
 196+<div style="width: 50%; margin-left: auto; margin-right: auto; border-top: solid 1px black; margin-top: 3em">
 197+<div style="text-align: center">
 198+<strong>recent queries:</strong>
 199+<ul>"""
 200+ footer = footer + rc
 201+ footer = footer + "</ul></div></div>"
 202+
 203+footer = footer + """
 204+<hr/>
 205+<a href="https://www.mediawiki.org/"
 206+><img style="float: right; margin-right: 5px"
 207+ src="/images/wikimedia-toolserver-button.png" width="88" heigh="31" id="toolbutton" border="0" /></a><a
 208+href="http://www.sun.com/"
 209+><img style="float: right" src="/~kate/sun.gif" border="0" /></a>
 210+
 211+kate's tools:
 212+ <a href="count_edits">user edit counter</a>
 213+| <a href="archive_contribs">deleted contribs browser</a>
 214+| <strong>six degrees of wikipedia</strong>
 215+| <a href="mailto:keturner@livejournal.com">send feedback...</a>
 216+</div></body></html>"""
 217+
 218+def error(text):
 219+ print "<center><div class='error'><span class='error'>error:</span><span class='errtext'>%s</span></div></center>" % cgi.escape(text)
 220+ print footer
 221+ sys.exit()
 222+
 223+if not (form.has_key('from') and form.has_key('to')):
 224+ print footer
 225+ sys.exit()
 226+
 227+def cfirst(s):
 228+ return s[0].upper() + s[1:]
 229+srcname,targname = cfirst(form['from'].value.replace(' ', '_')), cfirst(form['to'].value.replace(' ', '_'))
 230+
 231+if len(srcname) > 255:
 232+ error("source article %s does not exist" % srcname)
 233+if len(targname) > 255:
 234+ error("target article %s does not exist" % targname)
 235+def findlink(src, dst):
 236+ (sin, sout) = os.popen2("/u01/u/kate/linksc")
 237+ sin.write(src + '\n')
 238+ sin.write(dst + '\n')
 239+ sin.flush()
 240+ i = sout.readline()
 241+ if i == "ERROR\n":
 242+ err = sout.readline()
 243+ if err == "NO_FROM\n":
 244+ error("source article %s does not exist" % src)
 245+ elif err == "NO_TO\n":
 246+ error("target article %s does not exist" % dst)
 247+ elif err == "NO_CONNECT\n":
 248+ error("could not contact links server")
 249+ else:
 250+ error("unknown error")
 251+ i = []
 252+ while True:
 253+ nid = sout.readline()
 254+ if nid == '':
 255+ return i
 256+ i = i + [nid]
 257+
 258+def mklink(art):
 259+ a2 = art.replace('_', ' ')
 260+ return "<a href=\"http://en.wikipedia.org/wiki/" + safe(art) + "\">" + cgi.escape(a2) + "</a>"
 261+
 262+def writelog(src, targ, degs):
 263+ try:
 264+ f = file("/u01/u/kate/rcq", "a")
 265+ f.write(src + '\n' + targ + '\n' + str(degs) + '\n')
 266+ except IOError:
 267+ return
 268+ f.close()
 269+
 270+answer = findlink(srcname, targname)
 271+if answer != []:
 272+ writelog(srcname, targname, len(answer))
 273+ print "<div class='result'>"
 274+ print "<div class='answer'>%d degrees of separation</div>" % (len(answer) - 1)
 275+ print "<span class='art'>%s</span><br/>" % mklink(answer[0]),
 276+ print "<span class='link'>%s</span><br/>" % getlink(answer[0], answer[1])
 277+ j = 1
 278+ for i in answer[1:]:
 279+ print "<span class='art'>%s</span><br/>" % mklink(i)
 280+ if j + 1 <= len(answer)-1:
 281+ print "<span class='link'>%s</span><br/>" % getlink(answer[j], answer[j + 1])
 282+ j = j + 1
 283+ #print "<span class='art'>%s</span>" % mklink(targname)
 284+ print "</div>"
 285+ print footer
 286+ sys.exit()
 287+error("no route found after %d degrees..." % 10)