Index: trunk/sixdeg/linksc.cc |
— | — | @@ -0,0 +1,38 @@ |
| 2 | +/* |
| 3 | + * Six degrees of Wikipedia: Front-end client (command-line) |
| 4 | + * |
| 5 | + * Linux version: modified to use AF_LOCAL sockets. |
| 6 | + */ |
| 7 | + |
| 8 | +#pragma ident "@(#)linksc.cc 1.3 06/09/20 00:12:50" |
| 9 | + |
| 10 | +#include <iostream> |
| 11 | +#include <string> |
| 12 | +#include <vector> |
| 13 | + |
| 14 | +#include "linksc.h" |
| 15 | + |
| 16 | +int |
| 17 | +main(int argc, char *argv[]) |
| 18 | +{ |
| 19 | + std::string src, dst; |
| 20 | + std::getline(std::cin, src); |
| 21 | + std::getline(std::cin, dst); |
| 22 | + std::vector<std::string> result; |
| 23 | + int status = linksc_findpath(result, src, dst); |
| 24 | + switch (status) { |
| 25 | + case 0: |
| 26 | + std::cout << "ERROR\nNO_FROM\n"; |
| 27 | + return 0; |
| 28 | + case 1: |
| 29 | + std::cout << "ERROR\nNO_TO\n"; |
| 30 | + return 0; |
| 31 | + case 3: |
| 32 | + std::cout << "ERROR\nNO_CONNECT\n"; |
| 33 | + return 0; |
| 34 | + } |
| 35 | + std::cout << "OK\n"; |
| 36 | + for (int i = 0; i < result.size(); ++i) |
| 37 | + std::cout << result[i] << '\n'; |
| 38 | +} |
| 39 | + |
Index: trunk/sixdeg/linksc_jni.cc |
— | — | @@ -0,0 +1,50 @@ |
| 2 | +/* |
| 3 | + * Six degrees of Wikipedia: JNI client interface. |
| 4 | + * This source code is released into the public domain. |
| 5 | + */ |
| 6 | + |
| 7 | +#pragma ident "@(#)linksc_jni.cc 1.1 05/11/21 21:00:23" |
| 8 | + |
| 9 | +#include <string> |
| 10 | +#include <vector> |
| 11 | + |
| 12 | +#include <jni.h> |
| 13 | + |
| 14 | +#include "linksc.h" |
| 15 | + |
| 16 | +#include "org_wikimedia_links_linksc.h" |
| 17 | + |
| 18 | +extern "C" JNIEXPORT jobjectArray JNICALL |
| 19 | +Java_org_wikimedia_links_linksc_findPath (JNIEnv *env, jobject o, jstring jfrom, jstring jto) |
| 20 | +{ |
| 21 | + std::string from, to; |
| 22 | + from = env->GetStringUTFChars(jfrom, 0); |
| 23 | + env->ReleaseStringUTFChars(jfrom, 0); |
| 24 | + to = env->GetStringUTFChars(jto, 0); |
| 25 | + env->ReleaseStringUTFChars(jto, 0); |
| 26 | + |
| 27 | + std::vector<std::string> result; |
| 28 | + jclass error; |
| 29 | + jobjectArray resultarr; |
| 30 | + int status = linksc_findpath(result, from, to); |
| 31 | + const char *errorstr = "An unknown error occured"; |
| 32 | + switch (status) { |
| 33 | + case 0: errorstr = "Source article does not exist."; break; |
| 34 | + case 1: errorstr = "Target article does not exist."; break; |
| 35 | + case 3: errorstr = "Could not connect to links server."; break; |
| 36 | + } |
| 37 | + if (status < 2 || status == 3) { |
| 38 | + error = env->FindClass("org/wikimedia/links/ErrorException"); |
| 39 | + env->ThrowNew(error, errorstr); |
| 40 | + return resultarr; |
| 41 | + } |
| 42 | + |
| 43 | + resultarr = (jobjectArray) env->NewObjectArray(result.size(), |
| 44 | + env->FindClass("java/lang/String"), env->NewStringUTF("")); |
| 45 | + |
| 46 | + for (int i = 0; i < result.size(); ++i) { |
| 47 | + env->SetObjectArrayElement(resultarr, i, env->NewStringUTF(result[i].c_str())); |
| 48 | + } |
| 49 | + return resultarr; |
| 50 | +} |
| 51 | + |
Index: trunk/sixdeg/org/wikimedia/links/NoFromArticleException.java |
— | — | @@ -0,0 +1,13 @@ |
| 2 | +/* |
| 3 | + * Six degrees of Wikipedia: Java client. |
| 4 | + * This source code is released into the public domain. |
| 5 | + * |
| 6 | + * $URL: file:///home/river/s2s/linksd/org/wikimedia/links/NoFromArticleException.java $ %E% %U% |
| 7 | + */ |
| 8 | +package org.wikimedia.links; |
| 9 | + |
| 10 | +public class NoFromArticleException extends ErrorException { |
| 11 | + public NoFromArticleException() { |
| 12 | + super("Source article does not exist."); |
| 13 | + } |
| 14 | +} |
Index: trunk/sixdeg/org/wikimedia/links/linksc.java |
— | — | @@ -0,0 +1,27 @@ |
| 2 | +/* |
| 3 | + * Six degrees of Wikipedia: Java client. |
| 4 | + * This source code is released into the public domain. |
| 5 | + * |
| 6 | + * @(#)linksc.java 1.1 05/11/21 21:02:32 |
| 7 | + */ |
| 8 | +package org.wikimedia.links; |
| 9 | + |
| 10 | +public class linksc { |
| 11 | + public native String[] findPath(String from, String to) throws ErrorException; |
| 12 | + static { |
| 13 | + System.loadLibrary("linksc_jni"); |
| 14 | + } |
| 15 | + |
| 16 | + public static void main(String[] args) { |
| 17 | + linksc c = new linksc(); |
| 18 | + String[] result = null; |
| 19 | + try { |
| 20 | + result = c.findPath(args[0], args[1]); |
| 21 | + } catch (ErrorException e) { |
| 22 | + System.out.printf("Error: %s\n", e.geterror()); |
| 23 | + return; |
| 24 | + } |
| 25 | + for (String s: result) |
| 26 | + System.out.println(s); |
| 27 | + } |
| 28 | +} |
Index: trunk/sixdeg/org/wikimedia/links/ConnectException.java |
— | — | @@ -0,0 +1,13 @@ |
| 2 | +/* |
| 3 | + * Six degrees of Wikipedia: Java client. |
| 4 | + * This source code is released into the public domain. |
| 5 | + * |
| 6 | + * $URL: file:///home/river/s2s/linksd/org/wikimedia/links/ConnectException.java $ %E% %U% |
| 7 | + */ |
| 8 | +package org.wikimedia.links; |
| 9 | + |
| 10 | +public class ConnectException extends ErrorException { |
| 11 | + public ConnectException() { |
| 12 | + super("Could not connect to links server."); |
| 13 | + } |
| 14 | +} |
Index: trunk/sixdeg/org/wikimedia/links/ErrorException.java |
— | — | @@ -0,0 +1,19 @@ |
| 2 | +/* |
| 3 | + * Six degrees of Wikipedia: Java client. |
| 4 | + * This source code is released into the public domain. |
| 5 | + * |
| 6 | + * $URL: file:///home/river/s2s/linksd/org/wikimedia/links/ErrorException.java $ %E% %U% |
| 7 | + */ |
| 8 | +package org.wikimedia.links; |
| 9 | + |
| 10 | +public class ErrorException extends Exception { |
| 11 | + String err; |
| 12 | + |
| 13 | + public ErrorException(String what) { |
| 14 | + err = what; |
| 15 | + } |
| 16 | + |
| 17 | + public String geterror() { |
| 18 | + return err; |
| 19 | + } |
| 20 | +} |
Index: trunk/sixdeg/org/wikimedia/links/NoToArticleException.java |
— | — | @@ -0,0 +1,13 @@ |
| 2 | +/* |
| 3 | + * Six degrees of Wikipedia: Java client. |
| 4 | + * This source code is released into the public domain. |
| 5 | + * |
| 6 | + * $URL: file:///home/river/s2s/linksd/org/wikimedia/links/NoToArticleException.java $ %E% %U% |
| 7 | + */ |
| 8 | +package org.wikimedia.links; |
| 9 | + |
| 10 | +public class NoToArticleException extends ErrorException { |
| 11 | + public NoToArticleException() { |
| 12 | + super("Target article does not exist."); |
| 13 | + } |
| 14 | +} |
Index: trunk/sixdeg/linksd.cc |
— | — | @@ -0,0 +1,354 @@ |
| 2 | +/* |
| 3 | + * Six degrees of Wikipedia: Server. |
| 4 | + * This source code is released into the public domain. |
| 5 | + * |
| 6 | + * Linux version, modified to use AF_UNIX socket instead of doors 2006-09-20. |
| 7 | + */ |
| 8 | + |
| 9 | +#pragma ident "@(#)linksd.cc 1.3 07/01/24 14:14:48" |
| 10 | + |
| 11 | +#include <sys/types.h> |
| 12 | +#include <sys/socket.h> |
| 13 | +#include <sys/un.h> |
| 14 | + |
| 15 | +#include <iostream> |
| 16 | +#include <map> |
| 17 | +#include <list> |
| 18 | +#include <set> |
| 19 | +#include <vector> |
| 20 | +#include <cstdio> |
| 21 | +#include <cstdlib> |
| 22 | +#include <cstring> |
| 23 | +#include <cassert> |
| 24 | +#include <queue> |
| 25 | +#include <string> |
| 26 | +#include <algorithm> |
| 27 | +#include <utility> |
| 28 | +#include <fstream> |
| 29 | +#include <sstream> |
| 30 | +#include <exception> |
| 31 | + |
| 32 | +#include <unistd.h> |
| 33 | +#include <mysql.h> |
| 34 | +#include <fcntl.h> |
| 35 | + |
| 36 | +#include "linksc.h" |
| 37 | + |
| 38 | +static void handle_request(int s, char *argp, size_t argz); |
| 39 | + |
| 40 | +std::vector<std::string> names; |
| 41 | +std::map<std::string, int> ids; |
| 42 | +std::vector<int> isdate; |
| 43 | +std::vector<std::vector<int> > adjacency; |
| 44 | + |
| 45 | +/* |
| 46 | + * Is this article a date? |
| 47 | + */ |
| 48 | +bool |
| 49 | +is_date(std::string name) { |
| 50 | +struct std::tm res; |
| 51 | +std::string::size_type t; |
| 52 | +bool a, b; |
| 53 | + while ((t = name.find_first_of("_")) != std::string::npos) |
| 54 | + name[t] = ' '; |
| 55 | + std::memset(&res, 0, sizeof(res)); |
| 56 | + a = strptime(name.c_str(), "%b %d", &res) != NULL; |
| 57 | + std::memset(&res, 0, sizeof(res)); |
| 58 | + b = strptime(name.c_str(), "%Y", &res) != NULL; |
| 59 | + |
| 60 | + if (a || (b && name.length() <= 4 && |
| 61 | + /* wtf... strptime("%Y") on Solaris will return a "valid" year for |
| 62 | + * *any* string of four or less characters */ |
| 63 | + name.find_first_not_of("0123456789") == std::string::npos)) |
| 64 | + return true; |
| 65 | + else return false; |
| 66 | +} |
| 67 | + |
| 68 | +/* |
| 69 | + * I didn't write this function. I don't even know if it works correctly :-). However, |
| 70 | + * it seems to return the right results. (Credit: ZorbaTHut @ EFnet #c++) |
| 71 | + */ |
| 72 | +std::vector<int> |
| 73 | +findPath(int src, int dst, bool ign_date) { |
| 74 | +std::vector<int> back; |
| 75 | +std::deque<int> next; |
| 76 | + |
| 77 | + back.clear(); |
| 78 | + back.resize(adjacency.size(), -1); |
| 79 | + next.clear(); |
| 80 | + back.at(src) = -2; |
| 81 | + next.push_back(src); |
| 82 | + |
| 83 | + while (next.size()) { |
| 84 | + int ts = next.at(0); |
| 85 | + next.pop_front(); |
| 86 | + |
| 87 | + if (ts == dst) { |
| 88 | + std::vector<int> path; |
| 89 | + int lastlink = back[dst]; |
| 90 | + path.push_back(dst); |
| 91 | + |
| 92 | + while (lastlink != -2) { |
| 93 | + assert(lastlink != -1); |
| 94 | + path.push_back(lastlink); |
| 95 | + lastlink = back.at(lastlink); |
| 96 | + } |
| 97 | + std::reverse(path.begin(), path.end()); |
| 98 | + return path; |
| 99 | + } |
| 100 | + |
| 101 | + for (int i = 0; i < adjacency.at(ts).size(); i++) { |
| 102 | + if (ign_date && isdate[adjacency.at(ts).at(i)]) |
| 103 | + continue; |
| 104 | + if (back.at(adjacency.at(ts).at(i)) == -1) { |
| 105 | + back.at(adjacency.at(ts).at(i)) = ts; |
| 106 | + next.push_back(adjacency.at(ts).at(i)); |
| 107 | + } |
| 108 | + } |
| 109 | + } |
| 110 | + return std::vector<int>(); |
| 111 | +} |
| 112 | + |
| 113 | +static void * |
| 114 | +start_request(void *arg) |
| 115 | +{ |
| 116 | +int s = (int)(uintptr_t)arg; |
| 117 | +uint32_t sz; |
| 118 | +int i; |
| 119 | +std::vector<char> buf; |
| 120 | + if ((i = read(s, &sz, sizeof(sz))) < sizeof(sz)) { |
| 121 | + if (i == -1) |
| 122 | + std::perror("read"); |
| 123 | + close(s); |
| 124 | + return 0; |
| 125 | + } |
| 126 | + |
| 127 | + try { |
| 128 | + buf.resize(sz); |
| 129 | + } catch (std::bad_alloc& e) { |
| 130 | + std::cerr << "out of memory for client request!\n"; |
| 131 | + close(s); |
| 132 | + return 0; |
| 133 | + } |
| 134 | + |
| 135 | + read(s, &buf[0], sz); |
| 136 | + handle_request(s, &buf[0], buf.size()); |
| 137 | + close(s); |
| 138 | + return 0; |
| 139 | +} |
| 140 | + |
| 141 | +static void |
| 142 | +handle_request(int s, char *argp, size_t argz) |
| 143 | +{ |
| 144 | + /* |
| 145 | + * Data format: |
| 146 | + * "<uint32>From<uint32>To" |
| 147 | + * The ints contain the size of from and to, respectively. |
| 148 | + * |
| 149 | + * Result format: |
| 150 | + * "<char><text...>" |
| 151 | + * <char> might be: |
| 152 | + * 0: From article did not exist |
| 153 | + * 1: To article did not exist |
| 154 | + * 2: No search result was found. |
| 155 | + * 3: Some other error occured. |
| 156 | + * 4: Result okay, data follows. |
| 157 | + * |
| 158 | + * In case of 3, the rest of the buffer contains a series of items: |
| 159 | + * "<uint32><text>" |
| 160 | + * where uint32 is the length of the next item, and text is the article name. |
| 161 | + */ |
| 162 | + std::string from, to; |
| 163 | + std::vector<char> result; |
| 164 | + bool ign_date = false; |
| 165 | + char *p = argp; |
| 166 | + uint32_t l; |
| 167 | + if (argz < 8) { |
| 168 | + char err[4]; |
| 169 | + *(uint32_t *)err = 3; |
| 170 | + result.resize(4); |
| 171 | + result.assign(err, err + 4); |
| 172 | + l = result.size(); |
| 173 | + write(s, &l, sizeof(l)); |
| 174 | + write(s, &result[0], result.size()); |
| 175 | + return; |
| 176 | + } |
| 177 | + l = *(uint32_t *)argp; |
| 178 | + argz -= 4; |
| 179 | + argp += 4; |
| 180 | + if (l > argz) { |
| 181 | + char err[4]; |
| 182 | + *(uint32_t *)err = 3; |
| 183 | + result.resize(4); |
| 184 | + result.assign(err, err + 4); |
| 185 | + l = result.size(); |
| 186 | + write(s, &l, sizeof(l)); |
| 187 | + write(s, &result[0], result.size()); |
| 188 | + return; |
| 189 | + } |
| 190 | + /* This means ignore dates - it's ugly, but avoids an API change */ |
| 191 | + if (*argp == '#') { |
| 192 | + from.assign(argp + 1, argp + l); |
| 193 | + ign_date = true; |
| 194 | + } else |
| 195 | + from.assign(argp, argp + l); |
| 196 | + argp += l; |
| 197 | + argz -= l; |
| 198 | + if (argz < 4) { |
| 199 | + char err[4]; |
| 200 | + *(uint32_t *)err = 3; |
| 201 | + result.resize(4); |
| 202 | + result.assign(err, err + 4); |
| 203 | + l = result.size(); |
| 204 | + write(s, &l, sizeof(l)); |
| 205 | + write(s, &result[0], result.size()); |
| 206 | + return; |
| 207 | + } |
| 208 | + l = *(uint32_t *)argp; |
| 209 | + argp += 4; |
| 210 | + if (l > argz) { |
| 211 | + char err[4]; |
| 212 | + *(uint32_t *)err = 3; |
| 213 | + result.resize(4); |
| 214 | + result.assign(err, err + 4); |
| 215 | + l = result.size(); |
| 216 | + write(s, &l, sizeof(l)); |
| 217 | + write(s, &result[0], result.size()); |
| 218 | + return; |
| 219 | + } |
| 220 | + to.assign(argp, argp + l); |
| 221 | + int fromid, toid; |
| 222 | + if (ids.find(from) == ids.end()) { |
| 223 | + char err[4]; |
| 224 | + *(uint32_t *)err = 0; |
| 225 | + result.resize(4); |
| 226 | + result.assign(err, err + 4); |
| 227 | + l = result.size(); |
| 228 | + write(s, &l, sizeof(l)); |
| 229 | + write(s, &result[0], result.size()); |
| 230 | + return; |
| 231 | + } |
| 232 | + fromid = ids[from]; |
| 233 | + if (ids.find(to) == ids.end()) { |
| 234 | + char err[4]; |
| 235 | + *(uint32_t *)err = 1; |
| 236 | + result.resize(4); |
| 237 | + result.assign(err, err + 4); |
| 238 | + l = result.size(); |
| 239 | + write(s, &l, sizeof(l)); |
| 240 | + write(s, &result[0], result.size()); |
| 241 | + return; |
| 242 | + } |
| 243 | + toid = ids[to]; |
| 244 | + std::vector<int> links = findPath(fromid, toid, ign_date); |
| 245 | + result.resize(4); |
| 246 | + *(uint32_t *)&result[0] = 3; |
| 247 | + for (std::vector<int>::const_iterator it = links.begin(), end = links.end(); it != end; ++it) |
| 248 | + { |
| 249 | + std::string s = names.at(*it); |
| 250 | + char len[4]; |
| 251 | + *(uint32_t*)len = s.size(); |
| 252 | + result.insert(result.end(), len, len + 4); |
| 253 | + result.insert(result.end(), s.begin(), s.end()); |
| 254 | + } |
| 255 | + l = result.size(); |
| 256 | + write(s, &l, sizeof(l)); |
| 257 | + write(s, &result[0], result.size()); |
| 258 | +} |
| 259 | + |
| 260 | +int |
| 261 | +main(int argc, char *argv[]) |
| 262 | +{ |
| 263 | + std::ifstream in(CACHE); |
| 264 | + std::string l; |
| 265 | + std::printf("retrieving links table...\n"); |
| 266 | + while (std::getline(in, l)) { |
| 267 | + if (l.empty()) |
| 268 | + break; |
| 269 | + int l_from, l_to; |
| 270 | + std::istringstream str(l); |
| 271 | + str >> l_from >> l_to; |
| 272 | + if (l_from >= adjacency.size()) |
| 273 | + adjacency.resize(l_from + 1); |
| 274 | + std::vector<int>& l = adjacency.at(l_from); |
| 275 | + l.insert(l.end(), l_to); |
| 276 | + } |
| 277 | + |
| 278 | + std::printf("ok\n"); |
| 279 | + std::printf("retrieving titles...\n"); |
| 280 | + while (std::getline(in, l)) { |
| 281 | + int l_id; |
| 282 | + std::string l_ttl; |
| 283 | + std::istringstream str(l); |
| 284 | + str >> l_id; |
| 285 | + std::getline(str, l_ttl); |
| 286 | + while (!l_ttl.empty() && l_ttl[0] == ' ') |
| 287 | + l_ttl.erase(l_ttl.begin()); |
| 288 | + if (l_id >= names.size()) { |
| 289 | + names.resize(l_id + 1); |
| 290 | + isdate.resize(l_id + 1); |
| 291 | + } |
| 292 | + if (is_date(l_ttl)) |
| 293 | + isdate[l_id] = 1; |
| 294 | + else isdate[l_id] = 0; |
| 295 | + names.at(l_id) = l_ttl; |
| 296 | + ids[l_ttl] = l_id; |
| 297 | + } |
| 298 | + std::printf("ok, %d links, %d titles\n", adjacency.size(), names.size()); |
| 299 | + std::printf("filtering links...\n"); |
| 300 | + for (int i = 1; i < adjacency.size(); ++i) { |
| 301 | + if (i >= names.size() || names[i].empty()) { |
| 302 | + adjacency.at(i).clear(); |
| 303 | + continue; |
| 304 | + } |
| 305 | + for (std::vector<int>::iterator it = adjacency[i].begin(); it != adjacency[i].end();) |
| 306 | + if (*it >= names.size() || names[*it].empty()) |
| 307 | + it = adjacency.at(i).erase(it); |
| 308 | + else ++it; |
| 309 | + } |
| 310 | + std::printf("ok\n"); |
| 311 | + |
| 312 | + int did; |
| 313 | + struct sockaddr_un addr; |
| 314 | + |
| 315 | + if ((did = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) { |
| 316 | + std::perror("socket"); |
| 317 | + std::exit(1); |
| 318 | + } |
| 319 | + unlink(DOOR); |
| 320 | + std::memset(&addr, 0, sizeof(addr)); |
| 321 | + addr.sun_family = AF_LOCAL; |
| 322 | + strncpy(addr.sun_path, DOOR, sizeof(addr.sun_path)); |
| 323 | + if (bind(did, (struct sockaddr *)&addr, sizeof(addr)) == -1) { |
| 324 | + std::perror("bind"); |
| 325 | + std::exit(1); |
| 326 | + } |
| 327 | + |
| 328 | + if (listen(did, 5) == -1) { |
| 329 | + std::perror("listen"); |
| 330 | + std::exit(1); |
| 331 | + } |
| 332 | + |
| 333 | + for (;;) { |
| 334 | + int cli; |
| 335 | + struct sockaddr_un cliaddr; |
| 336 | + size_t clilen; |
| 337 | + clilen = sizeof(addr); |
| 338 | + std::memset(&cliaddr, 0, clilen); |
| 339 | + if ((cli = accept(did, (struct sockaddr *)&cliaddr, (socklen_t *)&clilen)) == -1) { |
| 340 | + std::perror("accept"); |
| 341 | + std::exit(1); |
| 342 | + } |
| 343 | + |
| 344 | + pthread_attr_t attr; |
| 345 | + pthread_attr_init(&attr); |
| 346 | + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); |
| 347 | + pthread_t tid; |
| 348 | + |
| 349 | + if (pthread_create(&tid, &attr, start_request, (void *)cli) == -1) { |
| 350 | + std::perror("pthread_create"); |
| 351 | + std::exit(1); |
| 352 | + } |
| 353 | + } |
| 354 | + close(did); |
| 355 | +} |
Index: trunk/sixdeg/linksc.h |
— | — | @@ -0,0 +1,19 @@ |
| 2 | +/* |
| 3 | + * Six degrees of Wikipedia: Client interface definitions. |
| 4 | + * This source code is released into the public domain. |
| 5 | + */ |
| 6 | + |
| 7 | +#ifndef LINKSC_H |
| 8 | +#define LINKSC_H |
| 9 | + |
| 10 | +#pragma ident "$URL: file:///home/river/s2s/linksd/linksc.h $ %E% %U%" |
| 11 | + |
| 12 | +#define DOOR "/home/river/.links.door" |
| 13 | +#define CACHE "/home/river/.links.cache" |
| 14 | + |
| 15 | +#include <vector> |
| 16 | +#include <string> |
| 17 | + |
| 18 | +int linksc_findpath(std::vector<std::string>& res, std::string from, std::string to); |
| 19 | + |
| 20 | +#endif |
Index: trunk/sixdeg/Makefile |
— | — | @@ -0,0 +1,55 @@ |
| 2 | +# Six degrees of Wikipedia: Makefile |
| 3 | +# This source code is released into the public domain. |
| 4 | +# |
| 5 | +# $URL: file:///home/river/s2s/linksd/Makefile $ %E% %U% |
| 6 | + |
| 7 | +all: linksd linksc liblinksc_jni.so linksc.jar sixdeg.war mkcache |
| 8 | + |
| 9 | +# do not use -library=stlport4 for linksd! it's broken because of a sun studio 11 bug |
| 10 | +linksd: linksd.cc linksc.h |
| 11 | + g++ -D_REENTRANT -g -O2 -I/usr/include/mysql -lmysqlclient -lz linksd.cc -o linksd |
| 12 | +linksc: linksc.o liblinksc.o linksc.h |
| 13 | + g++ -O2 linksc.o liblinksc.o -o linksc |
| 14 | +linksc.o: linksc.cc linksc.h |
| 15 | + g++ -O2 -c linksc.cc |
| 16 | +mkcache: mkcache.cc linksc.h |
| 17 | + g++ -O2 mkcache.cc -o mkcache -I/usr/include/mysql -L/opt/mysql50/lib/mysql -lmysqlclient -lz |
| 18 | +liblinksc.o: liblinksc.cc linksc.h |
| 19 | + g++ -fPIC -O2 -c liblinksc.cc |
| 20 | + |
| 21 | +liblinksc_jni.so: org/wikimedia/links/linksc.class liblinksc.o linksc_jni.cc |
| 22 | + javah -classpath . org.wikimedia.links.linksc |
| 23 | + g++ -fPIC -O2 -shared -I/usr/lib/j2sdk1.5-sun/include -I/usr/lib/j2sdk1.5-sun/include/linux \ |
| 24 | + linksc_jni.cc liblinksc.o -o liblinksc_jni.so |
| 25 | + |
| 26 | +org/wikimedia/links/linksc.class: org/wikimedia/links/linksc.java org/wikimedia/links/ErrorException.java |
| 27 | + javac org/wikimedia/links/linksc.java |
| 28 | +org/wikimedia/links/NoFromArticleException.class: org/wikimedia/links/NoFromArticleException.java |
| 29 | + javac org/wikimedia/links/NoFromArticleException.java |
| 30 | +org/wikimedia/links/NoToArticleException.class: org/wikimedia/links/NoToArticleException.java |
| 31 | + javac org/wikimedia/links/NoToArticleException.java |
| 32 | +org/wikimedia/links/ConnectException.class: org/wikimedia/links/ConnectException.java |
| 33 | + javac org/wikimedia/links/ConnectException.java |
| 34 | +org/wikimedia/links/ErrorException.class: org/wikimedia/links/ErrorException.java |
| 35 | + javac org/wikimedia/links/ErrorException.java |
| 36 | + |
| 37 | +linksc.jar: org/wikimedia/links/linksc.class org/wikimedia/links/NoFromArticleException.class org/wikimedia/links/NoToArticleException.class org/wikimedia/links/ConnectException.class org/wikimedia/links/ErrorException.class |
| 38 | + rm -f linksc.jar |
| 39 | + zip -r linksc.jar org/wikimedia/links/linksc.class org/wikimedia/links/NoFromArticleException.class org/wikimedia/links/NoToArticleException.class org/wikimedia/links/ConnectException.class org/wikimedia/links/ErrorException.class |
| 40 | + |
| 41 | +clean: |
| 42 | + rm -f linksc linksd org/wikimedia/links/*.class linksc_jni.so linkc.jar *.so *.o mkcache org_wikimedia_links_linksc.h sixdeg.war linksc.jar core |
| 43 | + |
| 44 | +sixdeg.war: linksc.jar webapp/src/index.jsp liblinksc_jni.so webapp/src/main.css webapp/src/6deg.png webapp/src/WEB-INF/web.xml |
| 45 | + rm -f sixdeg.war |
| 46 | + (cd webapp/src; zip -Dr ../../sixdeg.war . -x `find . | grep '\.svn'`) |
| 47 | +testdeploy: sixdeg.war |
| 48 | + /opt/SUNWappserver/appserver/bin/asadmin deploy --user kate --port 7070 --force --precompilejsp --contextroot sdtest --name sdtest sixdeg.war |
| 49 | +deploy: sixdeg.war |
| 50 | + /opt/SUNWappserver/appserver/bin/asadmin deploy --user kate --port 7070 --force --precompilejsp sixdeg.war |
| 51 | +dist: |
| 52 | + $(MAKE) clean |
| 53 | + (cd .. && find linksd -name '.*') | tail +1l >../exclude |
| 54 | + cd .. && tar cvfX linksd.tar exclude linksd |
| 55 | + cd .. && gzip -f linksd.tar |
| 56 | +.KEEP_STATE: |
Index: trunk/sixdeg/liblinksc.cc |
— | — | @@ -0,0 +1,124 @@ |
| 2 | +/* |
| 3 | + * Six degrees of Wikipedia: Client library. |
| 4 | + */ |
| 5 | + |
| 6 | +#pragma ident "@(#)liblinksc.cc 1.3 06/09/20 00:47:26" |
| 7 | + |
| 8 | +#include <sys/types.h> |
| 9 | +#include <sys/socket.h> |
| 10 | +#include <sys/un.h> |
| 11 | +#include <sys/stat.h> |
| 12 | +#include <sys/mman.h> |
| 13 | + |
| 14 | +#include <iostream> |
| 15 | +#include <cstdio> |
| 16 | +#include <cstdlib> |
| 17 | +#include <cstring> |
| 18 | +#include <string> |
| 19 | +#include <vector> |
| 20 | + |
| 21 | +#include <unistd.h> |
| 22 | +#include <fcntl.h> |
| 23 | + |
| 24 | +#include "linksc.h" |
| 25 | + |
| 26 | +int |
| 27 | +linksc_findpath(std::vector<std::string>& result, std::string src, std::string dst) |
| 28 | +{ |
| 29 | + std::vector<char> data; |
| 30 | + int s, l; |
| 31 | + struct sockaddr_un addr; |
| 32 | + |
| 33 | + std::memset(&addr, 0, sizeof(addr)); |
| 34 | + addr.sun_family = AF_LOCAL; |
| 35 | + std::strcpy(addr.sun_path, DOOR); |
| 36 | + if ((s = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) { |
| 37 | +perror("socket"); |
| 38 | + return 3; |
| 39 | + } |
| 40 | + if (connect(s, (struct sockaddr *)&addr, sizeof(addr)) == -1) { |
| 41 | +perror("connect"); |
| 42 | + close(s); |
| 43 | + return 3; |
| 44 | + } |
| 45 | + |
| 46 | + uint32_t i; |
| 47 | + i = src.size(); |
| 48 | + data.resize(4); |
| 49 | + *(uint32_t*)&data[0] = i; |
| 50 | + data.insert(data.end(), src.begin(), src.end()); |
| 51 | + |
| 52 | + i = dst.size(); |
| 53 | + data.resize(data.size() + 4); |
| 54 | + *(uint32_t*)&data[data.size() - 4] = i; |
| 55 | + data.insert(data.end(), dst.begin(), dst.end()); |
| 56 | + |
| 57 | + i = data.size(); |
| 58 | + if (write(s, &i, sizeof(i)) < sizeof(i)) { |
| 59 | + close(s); |
| 60 | + return 3; |
| 61 | + } |
| 62 | + |
| 63 | + if ((l = write(s, &data[0], data.size())) < data.size()) { |
| 64 | + close(s); |
| 65 | + return 3; |
| 66 | + } |
| 67 | + |
| 68 | + if (read(s, &i, sizeof(i)) < sizeof(i)) { |
| 69 | + close(s); |
| 70 | + return 3; |
| 71 | + } |
| 72 | + |
| 73 | + try { |
| 74 | + data.resize(i); |
| 75 | + } catch (std::bad_alloc&) { |
| 76 | + close(s); |
| 77 | + return 3; |
| 78 | + } |
| 79 | + |
| 80 | + if (read(s, &data[0], i) < i) { |
| 81 | + close(s); |
| 82 | + return 3; |
| 83 | + } |
| 84 | + |
| 85 | + if (i < 4) { |
| 86 | + close(s); |
| 87 | + return 3; |
| 88 | + } |
| 89 | + char *buf = &data[0]; |
| 90 | + void *oaddr = &data[0]; |
| 91 | + size_t osize = i; |
| 92 | + uint32_t status = *(uint32_t*)&data[0]; |
| 93 | + result.resize(0); |
| 94 | + switch (status) { |
| 95 | + case 0: |
| 96 | + close(s); |
| 97 | + return 0; |
| 98 | + case 1: |
| 99 | + close(s); |
| 100 | + return 1; |
| 101 | + case 2: |
| 102 | + close(s); |
| 103 | + return 3; |
| 104 | + case 3: |
| 105 | + break; |
| 106 | + default: |
| 107 | + close(s); |
| 108 | + return 2; |
| 109 | + } |
| 110 | + i -= 4; |
| 111 | + buf += 4; |
| 112 | + while (i) { |
| 113 | + std::string h; |
| 114 | + status = *(uint32_t*)buf; |
| 115 | + i -= 4; |
| 116 | + buf += 4; |
| 117 | + h.assign(buf, status); |
| 118 | + i -= status; |
| 119 | + buf += status; |
| 120 | + result.insert(result.end(), h); |
| 121 | + } |
| 122 | + close(s); |
| 123 | + return 4; |
| 124 | +} |
| 125 | + |
Index: trunk/sixdeg/webapp/src/wikimedia-toolserver-button.png |
Cannot display: file marked as a binary type. |
svn:mime-type = application/octet-stream |
Property changes on: trunk/sixdeg/webapp/src/wikimedia-toolserver-button.png |
___________________________________________________________________ |
Added: svn:mime-type |
1 | 126 | + application/octet-stream |
Index: trunk/sixdeg/webapp/src/index.jsp |
— | — | @@ -0,0 +1,135 @@ |
| 2 | +<%-- |
| 3 | + Six degrees of Wikipedia: JSP front-end. |
| 4 | + This source code is released into the public domain. |
| 5 | + |
| 6 | + @(#)index.jsp 1.19 06/10/16 01:17:11 |
| 7 | +--%> |
| 8 | +<?xml version="1.0" encoding="UTF-8"?> |
| 9 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> |
| 10 | +<%@ page language="java" contentType="text/html; charset=UTF-8"%> |
| 11 | +<%@ taglib prefix="c" uri="http://java.sun.com/jsp/jstl/core" %> |
| 12 | +<%@ taglib prefix="fn" uri="http://java.sun.com/jsp/jstl/functions" %> |
| 13 | +<% |
| 14 | +String[] path = null; |
| 15 | +String error = null; |
| 16 | +String from = request.getParameter("from"), to = request.getParameter("to"); |
| 17 | +org.wikimedia.links.linksc lc = new org.wikimedia.links.linksc(); |
| 18 | + |
| 19 | +if (from != null) |
| 20 | + from = from.trim(); |
| 21 | +if (to != null) |
| 22 | + to = to.trim(); |
| 23 | + |
| 24 | +String enc = request.getCharacterEncoding(); |
| 25 | +if (enc == null) enc = "<undefined>"; |
| 26 | +pageContext.setAttribute("encoding", enc); |
| 27 | + |
| 28 | +if (request.getCharacterEncoding() == null) { |
| 29 | + if (from != null) from = new String(from.getBytes("ISO-8859-1"), "UTF-8"); |
| 30 | + if (to != null) to = new String(to.getBytes("ISO-8859-1"), "UTF-8"); |
| 31 | +} |
| 32 | + |
| 33 | +if (from != null && from.length() > 0 && to != null && to.length() > 0) { |
| 34 | + String idp = request.getParameter("ign_dates"); |
| 35 | + boolean ign_date = idp != null && idp.equals("1"); |
| 36 | + String rfrom = from.substring(0, 1).toUpperCase() + from.substring(1, from.length()); |
| 37 | + String rto = to.substring(0, 1).toUpperCase() + to.substring(1, to.length()); |
| 38 | + if (ign_date) |
| 39 | + rfrom = "#" + rfrom; |
| 40 | + try { |
| 41 | + path = lc.findPath(rfrom.replaceAll(" ", "_"), rto.replaceAll(" ", "_")); |
| 42 | + } catch (org.wikimedia.links.ErrorException e) { |
| 43 | + error = e.geterror(); |
| 44 | + } |
| 45 | +} |
| 46 | +if (path != null && path.length == 0) |
| 47 | + error = "No route found after 10 degrees."; |
| 48 | + |
| 49 | +pageContext.setAttribute("error", error); |
| 50 | +pageContext.setAttribute("path", path); |
| 51 | +pageContext.setAttribute("from", from); |
| 52 | +pageContext.setAttribute("to", to); |
| 53 | +if (path != null) { |
| 54 | + pageContext.setAttribute("len", Integer.valueOf(path.length - 1)); |
| 55 | +} else { |
| 56 | + pageContext.setAttribute("len", Integer.valueOf(0)); |
| 57 | +} |
| 58 | +%> |
| 59 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> |
| 60 | +<head> |
| 61 | +<title>Six degrees of Wikipedia</title> |
| 62 | +<meta name="robots" content="index" /> |
| 63 | +<meta name="robots" content="follow" /> |
| 64 | +<link rel="stylesheet" href="main.css" /> |
| 65 | +</head> |
| 66 | +<body> |
| 67 | +<div style="text-align: right; padding: 0px; margin: 0px"><img src="6deg.png" alt="" /><br/></div> |
| 68 | +<div style="text-align: center"> |
| 69 | +<i> |
| 70 | +a <a href="http://en.wikipedia.org/wiki/Shortest_path">shortest path</a> |
| 71 | +query solver for the English |
| 72 | +<a href="http://en.wikipedia.org/wiki/Main_Page">Wikipedia</a>...</i><br/> |
| 73 | +<i>six degrees</i> finds the shortest path between any two Wikipedia articles in the |
| 74 | +main namespace using wiki links |
| 75 | +</div> |
| 76 | + |
| 77 | +<div style="padding-top: 35px;"> |
| 78 | +<form method="get" action="index.jsp" accept-charset="UTF-8"> |
| 79 | +<center> |
| 80 | +<strong>find path...</strong> |
| 81 | +from: <input type="text" name="from" value="<c:out value="${from}"/>"/> |
| 82 | +to: <input type="text" name="to" value="<c:out value="${to}"/>" /> |
| 83 | +<input type="submit" value="go" /> |
| 84 | +<br /> |
| 85 | +<input type="checkbox" name="ign_dates" value="1" /> ignore date and year articles |
| 86 | +</center> |
| 87 | +</form> |
| 88 | + |
| 89 | +<% if (error != null) { %> |
| 90 | +<center><div class='error'><span class='error'>error:</span><span class='errtext'><c:out value="${error}" /></span></div></center> |
| 91 | +<% if (error.equals("No route found after 10 degrees.")) { %> |
| 92 | +<p class='return'><a |
| 93 | +href="index.jsp?from=<c:out value='${fn:replace(to, " ", "_")}'/>&to=<c:out value='${fn:replace(from, " ", "_")}'/>" |
| 94 | +>Try in the other direction?</a></p> |
| 95 | +<% } %> |
| 96 | +<% } else if (path != null) { %> |
| 97 | +<div class='result'> |
| 98 | +<div class='answer'><c:out value="${len}"/> degrees of separation</div> |
| 99 | +<c:forEach items="${path}" var="hop"> |
| 100 | + <span class="art"><a href="http://en.wikipedia.org/wiki/<c:out value="${hop}"/>"><c:out value='${fn:replace(hop, "_", " ")}'/></a></span><br/> |
| 101 | +</c:forEach> |
| 102 | +</div> |
| 103 | +<p class='return'><a href="index.jsp?from=<c:out value='${fn:replace(to, " ", "_")}'/>&to=<c:out value='${fn:replace(from, " ", "_")}'/>">View the return path?</a></p> |
| 104 | +<% } %> |
| 105 | +<div style="width: 50%; margin-left: auto; margin-right: auto; border-top: solid 1px black; margin-top: 3em"> |
| 106 | +<div style="text-align: center"> |
| 107 | +<strong>hints:</strong> |
| 108 | +</div> |
| 109 | +<ul> |
| 110 | +<li><em>it says my article doesn't exist?</em> - this is usually caused by the article being created later the last database update (often several months ago). alternatively, check your capitalisation.</li> |
| 111 | +<li>please <em>do</em> report any other problems with six degrees to me |
| 112 | +[<tt>river</tt> (at) <tt>attenuate</tt> (dot) <tt>org</tt>].</li> |
| 113 | +<li>redirects are searched as well as articles</li> |
| 114 | +<li>using a redirect as the target will generally produce an inferior result (be careful: "United Kingdom" is not a redirect, but "United kingdom" is)</li> |
| 115 | +<li>article names are case sensitive except for the first letter, which is always capital</li> |
| 116 | +<li>six degrees was recently <a href="http://tools.wikimedia.de/~river/pages/six-degrees-ct">mentioned</a> in the |
| 117 | + German computer magazine <i>c't</i>. fame! who'd've thought it ;-)</li> |
| 118 | +</ul> |
| 119 | +</div> |
| 120 | +</div> |
| 121 | + |
| 122 | +</a><a href="https://www.mediawiki.org/"><img |
| 123 | + src="wikimedia-toolserver-button.png" style="float: right" |
| 124 | + alt="Hosted by Wikimedia Toolserver" /></a> |
| 125 | +<a href="http://www.sun.com/"><img |
| 126 | + style="float: right" src="sun.gif" |
| 127 | + alt = "Powered by Sun Microsystems" /></a> |
| 128 | +<p> |
| 129 | +<a href="http://tools.wikimedia.de/~river/pages/projects/six-degrees">source code</a> | |
| 130 | +<a href="mailto:river@attenuate.org">send feedback...</a><br /> |
| 131 | +i'm poor. if you like <i>six degrees</i>, feel free to <a href="http://www.paypal.com/" |
| 132 | +>PayPal</a> some money to [<tt>river</tt> (at) <tt>attenuate</tt> (dot) <tt>org</tt>].</p> |
| 133 | +<span class='version'>Front-end version: @(#)index.jsp 1.19 06/10/16 01:17:11 [request encoding: <c:out value="${encoding}" />] |
| 134 | +</div> |
| 135 | +</body> |
| 136 | +</html> |
Index: trunk/sixdeg/webapp/src/WEB-INF/lib/standard.jar |
Cannot display: file marked as a binary type. |
svn:mime-type = application/octet-stream |
Property changes on: trunk/sixdeg/webapp/src/WEB-INF/lib/standard.jar |
___________________________________________________________________ |
Added: svn:mime-type |
1 | 137 | + application/octet-stream |
Index: trunk/sixdeg/webapp/src/WEB-INF/lib/linksc.jar |
Cannot display: file marked as a binary type. |
svn:mime-type = application/octet-stream |
Property changes on: trunk/sixdeg/webapp/src/WEB-INF/lib/linksc.jar |
___________________________________________________________________ |
Added: svn:mime-type |
2 | 138 | + application/octet-stream |
Index: trunk/sixdeg/webapp/src/WEB-INF/lib/jstl.jar |
Cannot display: file marked as a binary type. |
svn:mime-type = application/octet-stream |
Property changes on: trunk/sixdeg/webapp/src/WEB-INF/lib/jstl.jar |
___________________________________________________________________ |
Added: svn:mime-type |
3 | 139 | + application/octet-stream |
Index: trunk/sixdeg/webapp/src/WEB-INF/web.xml |
— | — | @@ -0,0 +1,10 @@ |
| 2 | +<?xml version="1.0" encoding="UTF-8"?> |
| 3 | +<web-app xmlns="http://java.sun.com/xml/ns/j2ee" |
| 4 | + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
| 5 | + xsi:schemaLocation="http://java.sun.com/xml/ns/j2ee http://java.sun.com/xml/ns/j2ee/web-app_2_4.xsd" |
| 6 | + version="2.4"> |
| 7 | + |
| 8 | + <description>Six degrees of Wikipedia</description> |
| 9 | + <display-name>Six degrees of Wikipedia</display-name> |
| 10 | + |
| 11 | +</web-app> |
Index: trunk/sixdeg/webapp/src/6deg.png |
Cannot display: file marked as a binary type. |
svn:mime-type = application/octet-stream |
Property changes on: trunk/sixdeg/webapp/src/6deg.png |
___________________________________________________________________ |
Added: svn:mime-type |
1 | 12 | + application/octet-stream |
Index: trunk/sixdeg/webapp/src/sun.gif |
Cannot display: file marked as a binary type. |
svn:mime-type = application/octet-stream |
Property changes on: trunk/sixdeg/webapp/src/sun.gif |
___________________________________________________________________ |
Added: svn:mime-type |
2 | 13 | + application/octet-stream |
Index: trunk/sixdeg/webapp/src/main.css |
— | — | @@ -0,0 +1,61 @@ |
| 2 | +/* |
| 3 | + * Six degrees of Wikipedia: JSP front-end styling. |
| 4 | + * This source code is released into the public domain. |
| 5 | + * |
| 6 | + * @(#)main.css 1.4 05/11/21 23:40:14 |
| 7 | + */ |
| 8 | +span.link { |
| 9 | + color: #999999; |
| 10 | +} |
| 11 | +body { |
| 12 | + background-color: white; |
| 13 | + padding-right: 0px; |
| 14 | + margin-right: 0px; |
| 15 | +} |
| 16 | +input { border: solid 1px blue; background: white; color: black; } |
| 17 | +div.error { |
| 18 | + display: inline; |
| 19 | + border: solid 1px #ff8888; |
| 20 | +} |
| 21 | +span.error { |
| 22 | + padding-left: 1em; |
| 23 | + padding-right: 1em; |
| 24 | + background-color: #ffdcdc; |
| 25 | + font-weight: bold; |
| 26 | + border-right: solid 1px #ff8888; |
| 27 | +} |
| 28 | +span.errtext { |
| 29 | + padding-left: 1em; |
| 30 | + padding-right: 1em; |
| 31 | +} |
| 32 | +div.result { |
| 33 | + border: solid 1px #0000ff; |
| 34 | + width: 75%; |
| 35 | + margin-left: auto; margin-right: auto; |
| 36 | + text-align: center; |
| 37 | + padding: 0em 0 0.5em 0; |
| 38 | +} |
| 39 | +div.answer { |
| 40 | + width: 100%; |
| 41 | + text-align: center; |
| 42 | + border-bottom: solid 1px #0000ff; |
| 43 | + background-color: #ddddff; |
| 44 | + margin-bottom: 0.5em; |
| 45 | +} |
| 46 | +span.art { |
| 47 | +} |
| 48 | +a { |
| 49 | + color: #0000C0; |
| 50 | +} |
| 51 | +a:hover { |
| 52 | + text-decoration: underline; |
| 53 | +} |
| 54 | +img { |
| 55 | + border: none; |
| 56 | +} |
| 57 | +.return { |
| 58 | + text-align: center; |
| 59 | +} |
| 60 | +span.version { |
| 61 | + color: #999999; |
| 62 | +} |
Index: trunk/sixdeg/mkcache.cc |
— | — | @@ -0,0 +1,77 @@ |
| 2 | +/* |
| 3 | + * Six degrees of Wikipedia: Database cacher. |
| 4 | + * This source code is released into the public domain. |
| 5 | + */ |
| 6 | + |
| 7 | +#pragma ident "@(#)mkcache.cc 1.1 05/11/21 21:00:29" |
| 8 | + |
| 9 | +#include <iostream> |
| 10 | +#include <cstdio> |
| 11 | +#include <cstdlib> |
| 12 | +#include <cstring> |
| 13 | +#include <string> |
| 14 | +#include <fstream> |
| 15 | +#include <cerrno> |
| 16 | + |
| 17 | +#include <unistd.h> |
| 18 | +#include <mysql.h> |
| 19 | + |
| 20 | +#include "linksc.h" |
| 21 | + |
| 22 | +void |
| 23 | +mysql_query_ordie(MYSQL* mysql, char const *query) |
| 24 | +{ |
| 25 | + int i = mysql_query(mysql, query); |
| 26 | + if (i) { |
| 27 | + std::printf("mysql query failed: %s\n", mysql_error(mysql)); |
| 28 | + std::exit(8); |
| 29 | + } |
| 30 | +} |
| 31 | +int |
| 32 | +main(int argc, char *argv[]) |
| 33 | +{ |
| 34 | + MYSQL mysql; |
| 35 | + mysql_init(&mysql); |
| 36 | + mysql_options(&mysql, MYSQL_READ_DEFAULT_GROUP, "linksd"); |
| 37 | + |
| 38 | + if (!mysql_real_connect(&mysql, NULL, NULL, NULL, argv[1], 0, NULL, 0)) { |
| 39 | + std::printf("mysql connect error: %s\n", mysql_error(&mysql)); |
| 40 | + return 1; |
| 41 | + } |
| 42 | + |
| 43 | + unlink(CACHE); |
| 44 | + std::ofstream out(CACHE); |
| 45 | + if (!out.good()) { |
| 46 | + std::printf("Cannot open cache file: %s\n", std::strerror(errno)); |
| 47 | + return 1; |
| 48 | + } |
| 49 | + |
| 50 | + std::printf("retrieving links table...\n"); |
| 51 | + mysql_query_ordie(&mysql, "SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED"); |
| 52 | + mysql_query_ordie(&mysql, |
| 53 | + "SELECT page_id, pl_from FROM pagelinks,page " |
| 54 | + "WHERE pl_title=page_title and pl_namespace=page_namespace and page_namespace=0"); |
| 55 | + MYSQL_RES *res = mysql_use_result(&mysql); |
| 56 | + |
| 57 | + MYSQL_ROW arow; |
| 58 | + int i = 0; |
| 59 | + while (arow = mysql_fetch_row(res)) { |
| 60 | + if ((i++ % 10000) == 0) |
| 61 | + std::printf("%d...\n", i - 1); |
| 62 | + out << arow[1] << ' ' << arow[0] << '\n'; |
| 63 | + } |
| 64 | + mysql_free_result(res); |
| 65 | + out << '\n'; |
| 66 | + |
| 67 | + std::printf("ok\n"); |
| 68 | + std::printf("retrieving titles...\n"); |
| 69 | + mysql_query_ordie(&mysql, "SELECT page_title,page_id FROM page WHERE page_namespace=0"); |
| 70 | + res = mysql_use_result(&mysql); |
| 71 | + while (arow = mysql_fetch_row(res)) { |
| 72 | + out << arow[1] << ' ' << arow[0] << '\n'; |
| 73 | + } |
| 74 | + mysql_free_result(res); |
| 75 | + mysql_close(&mysql); |
| 76 | + out.close(); |
| 77 | + std::printf("all done\n"); |
| 78 | +} |
Index: trunk/sixdeg/python/six_degrees |
— | — | @@ -0,0 +1,286 @@ |
| 2 | +#! /usr/sfw/bin/python |
| 3 | +# |
| 4 | +# Six degrees of Wikipedia: Python front-end. |
| 5 | +# This source code is released into the public domain. |
| 6 | +# |
| 7 | +# @(#)six_degrees 1.1 05/11/21 21:00:39 |
| 8 | + |
| 9 | + |
| 10 | + |
| 11 | +#import cgitb; cgitb.enable() |
| 12 | + |
| 13 | +execfile("/u01/u/kate/.pydb.cf", globals()) |
| 14 | + |
| 15 | +import MySQLdb |
| 16 | +import sys |
| 17 | +import cgi |
| 18 | +import os |
| 19 | +import zlib |
| 20 | +import re |
| 21 | + |
| 22 | +print "Content-Type: text/html; charset=UTF-8" |
| 23 | +print |
| 24 | + |
| 25 | +db = MySQLdb.connect(db="enwiki", host=dbserver, user=dbuser, passwd=dbpassword) |
| 26 | + |
| 27 | +def getlink(fro, to): |
| 28 | + fro = fro.strip() |
| 29 | + to = to.strip() |
| 30 | + result = "<couldn't find link [%s] -> [%s]...>" % (fro, to) |
| 31 | + c = db.cursor() |
| 32 | + c.execute("SELECT old_text, old_flags FROM page, revision, text WHERE page_namespace=0 AND page_title=%s AND page_latest=rev_id AND old_id=rev_text_id", fro.strip().replace(' ', '_')) |
| 33 | + t = c.fetchone() |
| 34 | + try: |
| 35 | + if t != None: |
| 36 | + text = "".join(t[0]) |
| 37 | + flags = "".join(t[1]) |
| 38 | + if flags.find('gzip') != -1: |
| 39 | + text = zlib.decompress(text, -15) |
| 40 | + reg = r'\[\['+re.escape(to).replace('\_', '[_ ]') #+r'\]\]' |
| 41 | + r = re.compile(reg, re.I) |
| 42 | + res = r.search(text) |
| 43 | + if res != None: |
| 44 | + start = res.start() - 40 |
| 45 | + end = res.end() + 40 |
| 46 | + if start < 0: |
| 47 | + start = 0 |
| 48 | + if end >= len(text): |
| 49 | + end = len(text) - 1 |
| 50 | + result = "..." + text[start:end] + "..." |
| 51 | + else: |
| 52 | + result = "<no text found [%s]...>" % reg |
| 53 | + except zlib.error, err: |
| 54 | + result = "<couldn't decompress text: %s...>" % err |
| 55 | + return cgi.escape(result) |
| 56 | + |
| 57 | +form = cgi.FieldStorage() |
| 58 | + |
| 59 | +fromval = '' |
| 60 | +toval = '' |
| 61 | + |
| 62 | +def safe(s): |
| 63 | + return cgi.escape(s).replace('"', """).replace("'", "'") |
| 64 | + |
| 65 | +if form.has_key('from'): |
| 66 | + fromval = safe(form['from'].value) |
| 67 | +if form.has_key('to'): |
| 68 | + toval = safe(form['to'].value) |
| 69 | + |
| 70 | +print """ |
| 71 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
| 72 | +<html> |
| 73 | +<head> |
| 74 | +<title>six degrees of wikipedia</title> |
| 75 | +<style type="text/css"> |
| 76 | +span.link { |
| 77 | + color: #999999; |
| 78 | +} |
| 79 | +body { |
| 80 | + background-color: white; |
| 81 | + padding-right: 0px; |
| 82 | + margin-right: 0px; |
| 83 | +} |
| 84 | +input { border: solid 1px blue; background: white; color: black; } |
| 85 | +div.error { |
| 86 | + display: inline; |
| 87 | + border: solid 1px #ff8888; |
| 88 | +} |
| 89 | +span.error { |
| 90 | + padding-left: 1em; |
| 91 | + padding-right: 1em; |
| 92 | + background-color: #ffdcdc; |
| 93 | + font-weight: bold; |
| 94 | + border-right: solid 1px #ff8888; |
| 95 | +} |
| 96 | +span.errtext { |
| 97 | + padding-left: 1em; |
| 98 | + padding-right: 1em; |
| 99 | +} |
| 100 | +div.result { |
| 101 | + border: solid 1px #0000ff; |
| 102 | + width: 75%; |
| 103 | + margin-left: auto; margin-right: auto; |
| 104 | + text-align: center; |
| 105 | +} |
| 106 | +div.answer { |
| 107 | + width: 100%; |
| 108 | + text-align: center; |
| 109 | + border-bottom: solid 1px #0000ff; |
| 110 | + background-color: #ddddff; |
| 111 | +} |
| 112 | +span.art { |
| 113 | +} |
| 114 | +a { |
| 115 | + color: #0000C0; |
| 116 | +} |
| 117 | +a:hover { |
| 118 | + text-decoration: underline; |
| 119 | +} |
| 120 | +</style> |
| 121 | +</head> |
| 122 | +<body> |
| 123 | +<div style="text-align: right; padding: 0px; margin: 0px"><img src="/~kate/6deg.png" alt="" /><br/></div> |
| 124 | +<div style="text-align: center"> |
| 125 | +<i> |
| 126 | +<a href="http://en.wikipedia.org/wiki/Iterative_deepening_depth-first_search">iterative deepening</a> |
| 127 | +<a href="http://en.wikipedia.org/wiki/Depth-first_search">depth first search</a> |
| 128 | +<a href="http://en.wikipedia.org/wiki/Shortest_path_problem">shortest path</a> |
| 129 | +query tool for <a href="http://en.wikipedia.org/wiki/Main_Page">Wikipedia</a>...</i><br/> |
| 130 | +<i>six degrees</i> finds the shortest path between any two Wikipedia articles in the |
| 131 | +main namespace using wiki links |
| 132 | +</div> |
| 133 | +<div style="padding-top: 35px;"> |
| 134 | +<form method="get" action="six_degrees"> |
| 135 | +<center> |
| 136 | +<strong>find path...</strong> |
| 137 | +from: <input type="text" name="from" value=\"""" + fromval + """\"/> |
| 138 | +to: <input type="text" name="to" value=\"""" + toval + """\" /> |
| 139 | +<input type="submit" value="go" /> |
| 140 | +</center> |
| 141 | +</form> |
| 142 | +""" |
| 143 | + |
| 144 | +def mklink(art): |
| 145 | + a2 = art.replace('_', ' ') |
| 146 | + return "<a href=\"http://en.wikipedia.org/wiki/" + safe(art) + "\">" + safe(a2) + "</a>" |
| 147 | + |
| 148 | +def chop(s): |
| 149 | + if s[-1:] == '\n': |
| 150 | + return s[:-1] |
| 151 | + else: |
| 152 | + return s |
| 153 | + |
| 154 | +def getrecent(): |
| 155 | + try: |
| 156 | + f = file("/u01/u/kate/rcq", "r") |
| 157 | + except IOError: |
| 158 | + return '' |
| 159 | + i = [] |
| 160 | + while True: |
| 161 | + s,t,d = f.readline(), f.readline(), f.readline() |
| 162 | + s = chop(s) |
| 163 | + t = chop(t) |
| 164 | + |
| 165 | + if '' in [s,t,d]: |
| 166 | + break |
| 167 | + i = i + [[s,t,d]] |
| 168 | + i.reverse() |
| 169 | + s = '' |
| 170 | + for t in i[:10]: |
| 171 | + s = s + '<li>' + mklink(t[0]) + ' -> ' + mklink(t[1]) + (" (%d hops)" % (int(t[2]) - 1)) |
| 172 | + return s |
| 173 | + |
| 174 | +footer = """ |
| 175 | +<div style="width: 50%; margin-left: auto; margin-right: auto; border-top: solid 1px black; margin-top: 3em"> |
| 176 | +<div style="text-align: center"> |
| 177 | +<strong>notes:</strong> |
| 178 | +</div> |
| 179 | +<ul> |
| 180 | +<li>redirects are searched as well as articles</li> |
| 181 | +<li>using a redirect as the target will generally produce an inferior result</li> |
| 182 | +<li>article names are case sensitive except for the first letter, which is always capital</li> |
| 183 | +<li>removing year articles is on the TODO list</li> |
| 184 | +<li>it may take a while to process your query at times of high load. i am working on making the links server more |
| 185 | +efficient...</li> |
| 186 | +<li>six degrees was recently <a href="http://tools.wikimedia.de/~kate/pages/six-degrees-ct">mentioned</a> in the |
| 187 | +German computer magazine <i>c't</i>. fame! who'd've thought it ;-)</li> |
| 188 | +</ul> |
| 189 | +</div> |
| 190 | +</div> |
| 191 | +""" |
| 192 | + |
| 193 | +rc = getrecent() |
| 194 | +if rc != '': |
| 195 | + footer = footer + """ |
| 196 | +<div style="width: 50%; margin-left: auto; margin-right: auto; border-top: solid 1px black; margin-top: 3em"> |
| 197 | +<div style="text-align: center"> |
| 198 | +<strong>recent queries:</strong> |
| 199 | +<ul>""" |
| 200 | + footer = footer + rc |
| 201 | + footer = footer + "</ul></div></div>" |
| 202 | + |
| 203 | +footer = footer + """ |
| 204 | +<hr/> |
| 205 | +<a href="https://www.mediawiki.org/" |
| 206 | +><img style="float: right; margin-right: 5px" |
| 207 | + src="/images/wikimedia-toolserver-button.png" width="88" heigh="31" id="toolbutton" border="0" /></a><a |
| 208 | +href="http://www.sun.com/" |
| 209 | +><img style="float: right" src="/~kate/sun.gif" border="0" /></a> |
| 210 | + |
| 211 | +kate's tools: |
| 212 | + <a href="count_edits">user edit counter</a> |
| 213 | +| <a href="archive_contribs">deleted contribs browser</a> |
| 214 | +| <strong>six degrees of wikipedia</strong> |
| 215 | +| <a href="mailto:keturner@livejournal.com">send feedback...</a> |
| 216 | +</div></body></html>""" |
| 217 | + |
| 218 | +def error(text): |
| 219 | + print "<center><div class='error'><span class='error'>error:</span><span class='errtext'>%s</span></div></center>" % cgi.escape(text) |
| 220 | + print footer |
| 221 | + sys.exit() |
| 222 | + |
| 223 | +if not (form.has_key('from') and form.has_key('to')): |
| 224 | + print footer |
| 225 | + sys.exit() |
| 226 | + |
| 227 | +def cfirst(s): |
| 228 | + return s[0].upper() + s[1:] |
| 229 | +srcname,targname = cfirst(form['from'].value.replace(' ', '_')), cfirst(form['to'].value.replace(' ', '_')) |
| 230 | + |
| 231 | +if len(srcname) > 255: |
| 232 | + error("source article %s does not exist" % srcname) |
| 233 | +if len(targname) > 255: |
| 234 | + error("target article %s does not exist" % targname) |
| 235 | +def findlink(src, dst): |
| 236 | + (sin, sout) = os.popen2("/u01/u/kate/linksc") |
| 237 | + sin.write(src + '\n') |
| 238 | + sin.write(dst + '\n') |
| 239 | + sin.flush() |
| 240 | + i = sout.readline() |
| 241 | + if i == "ERROR\n": |
| 242 | + err = sout.readline() |
| 243 | + if err == "NO_FROM\n": |
| 244 | + error("source article %s does not exist" % src) |
| 245 | + elif err == "NO_TO\n": |
| 246 | + error("target article %s does not exist" % dst) |
| 247 | + elif err == "NO_CONNECT\n": |
| 248 | + error("could not contact links server") |
| 249 | + else: |
| 250 | + error("unknown error") |
| 251 | + i = [] |
| 252 | + while True: |
| 253 | + nid = sout.readline() |
| 254 | + if nid == '': |
| 255 | + return i |
| 256 | + i = i + [nid] |
| 257 | + |
| 258 | +def mklink(art): |
| 259 | + a2 = art.replace('_', ' ') |
| 260 | + return "<a href=\"http://en.wikipedia.org/wiki/" + safe(art) + "\">" + cgi.escape(a2) + "</a>" |
| 261 | + |
| 262 | +def writelog(src, targ, degs): |
| 263 | + try: |
| 264 | + f = file("/u01/u/kate/rcq", "a") |
| 265 | + f.write(src + '\n' + targ + '\n' + str(degs) + '\n') |
| 266 | + except IOError: |
| 267 | + return |
| 268 | + f.close() |
| 269 | + |
| 270 | +answer = findlink(srcname, targname) |
| 271 | +if answer != []: |
| 272 | + writelog(srcname, targname, len(answer)) |
| 273 | + print "<div class='result'>" |
| 274 | + print "<div class='answer'>%d degrees of separation</div>" % (len(answer) - 1) |
| 275 | + print "<span class='art'>%s</span><br/>" % mklink(answer[0]), |
| 276 | + print "<span class='link'>%s</span><br/>" % getlink(answer[0], answer[1]) |
| 277 | + j = 1 |
| 278 | + for i in answer[1:]: |
| 279 | + print "<span class='art'>%s</span><br/>" % mklink(i) |
| 280 | + if j + 1 <= len(answer)-1: |
| 281 | + print "<span class='link'>%s</span><br/>" % getlink(answer[j], answer[j + 1]) |
| 282 | + j = j + 1 |
| 283 | + #print "<span class='art'>%s</span>" % mklink(targname) |
| 284 | + print "</div>" |
| 285 | + print footer |
| 286 | + sys.exit() |
| 287 | +error("no route found after %d degrees..." % 10) |