Index: trunk/sixdeg/webapp/Makefile |
— | — | @@ -16,6 +16,7 @@ |
17 | 17 | src/path.jsp \ |
18 | 18 | src/input.jsp \ |
19 | 19 | src/error.jsp \ |
| 20 | + src/index.jsp \ |
20 | 21 | src/main.css \ |
21 | 22 | src/6deg.png \ |
22 | 23 | src/WEB-INF/web.xml \ |
Index: trunk/sixdeg/webapp/sixdeg/PathFinder.java |
— | — | @@ -10,6 +10,7 @@ |
11 | 11 | String url; |
12 | 12 | String code; |
13 | 13 | String database; |
| 14 | + String basename; |
14 | 15 | |
15 | 16 | public String getDatabase() { |
16 | 17 | return database; |
— | — | @@ -23,10 +24,15 @@ |
24 | 25 | return code; |
25 | 26 | } |
26 | 27 | |
| 28 | + public String getBasename() { |
| 29 | + return basename; |
| 30 | + } |
| 31 | + |
27 | 32 | public Wiki(String db) { |
28 | 33 | this.database = db; |
29 | 34 | this.code = db.substring(0, db.length() - 6); |
30 | 35 | this.url = "http://" + code + ".wikipedia.org/"; |
| 36 | + this.basename = code + ".wikipedia.org"; |
31 | 37 | } |
32 | 38 | } |
33 | 39 | |
— | — | @@ -54,7 +60,7 @@ |
55 | 61 | public Map<String, String> getWikimap() { |
56 | 62 | Map<String, String> m = new TreeMap<String, String>(); |
57 | 63 | for (Wiki w : wikis) |
58 | | - m.put(w.getDatabase(), w.getUrl()); |
| 64 | + m.put(w.getDatabase(), w.getBasename()); |
59 | 65 | |
60 | 66 | return m; |
61 | 67 | } |
Index: trunk/sixdeg/webapp/src/index.jsp |
— | — | @@ -1,194 +1,23 @@ |
2 | | -<%-- |
| 2 | +<%-- vim:et sw=2 ts=2: |
3 | 3 | Six degrees of Wikipedia: JSP front-end. |
4 | 4 | This source code is released into the public domain. |
5 | 5 | |
6 | 6 | From: @(#)index.jsp 1.19 06/10/16 01:17:11 |
7 | 7 | $Id$ |
8 | 8 | --%> |
9 | | -<?xml version="1.0" encoding="UTF-8"?> |
10 | | -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> |
11 | 9 | <%@ page language="java" contentType="text/html; charset=UTF-8"%> |
12 | 10 | <%@ taglib prefix="c" uri="http://java.sun.com/jsp/jstl/core" %> |
13 | | -<%@ taglib prefix="fn" uri="http://java.sun.com/jsp/jstl/functions" %> |
14 | | -<%@ taglib prefix="s" uri="/struts-tags" %> |
15 | 11 | <% |
16 | | -org.wikimedia.links.linksc.PathEntry[] path = null; |
17 | | -String error = null; |
18 | | -String from = request.getParameter("from"), to = request.getParameter("to"); |
19 | | -org.wikimedia.links.linksc lc = new org.wikimedia.links.linksc(); |
| 12 | +String newURL = "pathfinder.action"; |
| 13 | +String from = request.getParameter("from"); |
| 14 | +String to = request.getParameter("to"); |
| 15 | +String ign = request.getParameter("ign_dates"); |
20 | 16 | |
21 | | -if (from != null) |
22 | | - from = from.trim(); |
23 | | -if (to != null) |
24 | | - to = to.trim(); |
| 17 | +if (from != null && to != null) |
| 18 | + newURL = newURL + "?from=" + java.net.URLEncoder.encode(from, "UTF-8") |
| 19 | + + "&to=" + java.net.URLEncoder.encode(to, "UTF-8"); |
| 20 | +if (ign != null) |
| 21 | + newURL = newURL + "&ign_dates=" + java.net.URLEncoder.encode(ign, "UTF-8"); |
25 | 22 | |
26 | | -String enc = request.getCharacterEncoding(); |
27 | | -if (enc == null) enc = "<undefined>"; |
28 | | -pageContext.setAttribute("encoding", enc); |
29 | | - |
30 | | -if (request.getCharacterEncoding() == null) { |
31 | | - if (from != null) from = new String(from.getBytes("ISO-8859-1"), "UTF-8"); |
32 | | - if (to != null) to = new String(to.getBytes("ISO-8859-1"), "UTF-8"); |
33 | | -} |
34 | | - |
35 | | -boolean ign_date = false; |
36 | | -if (from != null && from.length() > 0 && to != null && to.length() > 0) { |
37 | | - String idp = request.getParameter("ign_dates"); |
38 | | - ign_date = idp != null && idp.equals("1"); |
39 | | - String rfrom = from.substring(0, 1).toUpperCase() + from.substring(1, from.length()); |
40 | | - String rto = to.substring(0, 1).toUpperCase() + to.substring(1, to.length()); |
41 | | - try { |
42 | | - path = lc.findPath(rfrom.replaceAll(" ", "_"), rto.replaceAll(" ", "_"), ign_date); |
43 | | - } catch (org.wikimedia.links.ErrorException e) { |
44 | | - error = e.geterror(); |
45 | | - } |
46 | | -} |
47 | | -if (path != null && path.length == 0) |
48 | | - error = "No route found after 10 degrees."; |
49 | | - |
50 | | -pageContext.setAttribute("error", error); |
51 | | -pageContext.setAttribute("path", path); |
52 | | -pageContext.setAttribute("from", from); |
53 | | -pageContext.setAttribute("to", to); |
54 | | - |
55 | | -if (path != null) { |
56 | | - pageContext.setAttribute("len", Integer.valueOf(path.length - 1)); |
57 | | -} else { |
58 | | - pageContext.setAttribute("len", Integer.valueOf(0)); |
59 | | -} |
| 23 | +response.sendRedirect(newURL); |
60 | 24 | %> |
61 | | -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> |
62 | | -<head> |
63 | | -<title>Six degrees of Wikipedia</title> |
64 | | -<meta name="robots" content="index" /> |
65 | | -<meta name="robots" content="follow" /> |
66 | | -<link rel="stylesheet" href="main.css" /> |
67 | | -</head> |
68 | | -<body> |
69 | | -<div style="text-align: right; padding: 0px; margin: 0px"><img src="6deg.png" alt="" /><br/></div> |
70 | | -<div style="text-align: center"> |
71 | | -<i> |
72 | | -a <a href="http://en.wikipedia.org/wiki/Shortest_path">shortest path</a> |
73 | | -query solver for the English |
74 | | -<a href="http://en.wikipedia.org/wiki/Main_Page">Wikipedia</a>...</i><br/> |
75 | | -<i>six degrees</i> finds the shortest path between any two Wikipedia articles in the |
76 | | -main namespace using wiki links |
77 | | -</div> |
78 | | - |
79 | | -<div style="padding-top: 35px;"> |
80 | | -<form method="get" action="index.jsp" accept-charset="UTF-8"> |
81 | | -<center> |
82 | | -<strong>find path...</strong> |
83 | | -from: <input type="text" name="from" value="<c:out value='${fn:replace(from, "_", " ")}'/>"/> |
84 | | -to: <input type="text" name="to" value="<c:out value='${fn:replace(to, "_", " ")}'/>" /> |
85 | | -<input type="submit" value="go" /> |
86 | | -<br /> |
87 | | -<input type="checkbox" name="ign_dates" value="1" |
88 | | -<% if (ign_date) { %> |
89 | | -checked="checked" |
90 | | -<% } %> |
91 | | -/> ignore date and year articles |
92 | | -</center> |
93 | | -</form> |
94 | | - |
95 | | -<% if (error != null) { %> |
96 | | -<center><div class='error'><span class='error'>error:</span><span class='errtext'><c:out value="${error}" /></span></div></center> |
97 | | -<% if (error.equals("No route found after 10 degrees.")) { %> |
98 | | -<p class='return'><a |
99 | | -href="index.jsp?from=<c:out value='${fn:replace(to, " ", "_")}'/>&to=<c:out value='${fn:replace(from, " ", "_")}'/>" |
100 | | ->Try in the other direction?</a></p> |
101 | | -<% } %> |
102 | | - |
103 | | -<%-- |
104 | | - Print path... |
105 | | -<% } else if (path != null) { %> |
106 | | -<div class='result'> |
107 | | -<div class='answer'><c:out value="${len}"/> degrees of separation</div> |
108 | | - |
109 | | -<c:forEach items="${path}" var="hop"> |
110 | | - <span class="art"><a |
111 | | - href="http://en.wikipedia.org/wiki/<c:out value="${hop.article}"/>?oldid=<c:out value="${hop.id}" />" |
112 | | - ><c:out value='${fn:replace(hop.article, "_", " ")}'/></a></span> |
113 | | - <br/> |
114 | | - <span class="context"> |
115 | | - <c:out value='${hop.context}' /> |
116 | | - </span> |
117 | | - </br> |
118 | | -</c:forEach> |
119 | | - |
120 | | -</div> |
121 | | -<p class='return'><a href="index.jsp?from=<c:out value='${fn:replace(to, " ", "_")}'/>&to=<c:out value='${fn:replace(from, " ", "_")}'/>">View the return path?</a></p> |
122 | | -<% } %> |
123 | | -<div style="width: 50%; margin-left: auto; margin-right: auto; border-top: solid 1px black; margin-top: 3em"> |
124 | | -<div style="text-align: center"> |
125 | | -<strong>hints:</strong> |
126 | | -</div> |
127 | | -<ul> |
128 | | -<li><strong>it says my article doesn't exist?</strong> - this is usually caused by the article being created later the last database update (often several months ago). alternatively, check your capitalisation.</li> |
129 | | -<li>redirects ("aliases" from one name to another article) are searched as well as articles</li> |
130 | | -<li>using a <strong>redirect as the target</strong> will generally produce an inferior result, because articles are not meant to link to redirects. (be careful: "United Kingdom" is not a redirect, but "United kingdom"—with a lowercase "k"—is)</li> |
131 | | -<li>article names are <strong>case sensitive</strong> except for the first letter, which is always capital</li> |
132 | | -<li>please <strong>do</strong> report any problems with six degrees to me |
133 | | -[<tt>river</tt> (at) <tt>attenuate</tt> (dot) <tt>org</tt>].</li> |
134 | | -<li>six degrees was recently <a href="http://tools.wikimedia.de/~river/pages/six-degrees-ct">mentioned</a> in the |
135 | | - German computer magazine <i>c't</i>. fame! who'd've thought it ;-)</li> |
136 | | -<li><strong>ignore date and year articles</strong>: at the moment, this only ignores articles like "March 25" and "1995". |
137 | | -in the future, i might add other articles which summarise years, such as "2005 in music".</li> |
138 | | -</ul> |
139 | | - |
140 | | -<div style="border-top: solid 1px black; margin-top: 3em"> |
141 | | -<p><strong>six degrees</strong> finds the shortest path from one article to another, using wiki links. |
142 | | -the <em>shortest path</em> is a problem in computer science: given a list of nodes (articles) and links between them, |
143 | | -find a route from one node to another such that no shorter route exists. it's important to realise that there's no |
144 | | -<em>single</em> shortest path; there may be many routes from one article to another which all traverse four articles, |
145 | | -for example. six degrees will find one such route.</p> |
146 | | - |
147 | | -<p>six degrees works on a copy of the English Wikipedia database, and is not updated in real time. this means |
148 | | -that if someone adds or removes a link on Wikipedia, the change will not be reflected in six degrees until the next |
149 | | -database update. for various reasons, these updates are currently very infrequent, so the path you see here |
150 | | -may be several months out of date. the links in the result will take you to the text of the article as |
151 | | -six degrees saw it, not the current version.</p> |
152 | | - |
153 | | -<p>six degrees will try to show an excerpt of about 100 characters from the article, where the link to the next |
154 | | -article occurred, so you can see the context of the link. in some cases this may not be possible: the link |
155 | | -might be part of a template, or it might be using a form of wiki markup that six degrees doesn't understand.</p> |
156 | | - |
157 | | -<p>six degrees takes its name from <a href="http://en.wikipedia.org/wiki/Six_degrees_of_separation">six degrees |
158 | | -of separation</a>, the theory that everyone is the world is connected by no more than six degrees. however, |
159 | | -this only goes as far as the name. paths between articles have been found which are nine, ten or more degrees.</p> |
160 | | - |
161 | | -<p>six degrees was written by River Tarnell, [<tt>river</tt> (at) <tt>attenuate</tt> (dot) <tt>org</tt>], and |
162 | | -incorporates suggestions and bug fixes from numerous users. please feel free to contact me if you have any |
163 | | -suggestions for six degrees, or if you've found something that doesn't work right (i try to reply to all my |
164 | | -mail, but i may take a long time to get back to you).</p> |
165 | | - |
166 | | -<p><strong>technical details</strong>: the core of six degrees is a breadth-first search implemented in 45 |
167 | | -lines of C++ code. including all of the support infrastructure, six degrees is about 2,000 lines of C++ |
168 | | -and 500 lines of Java. because the graph being searched is so large, the backend runs as a server process |
169 | | -called <em>linksd</em>. linksd accepts network connections from clients, calculates the path, and returns |
170 | | -it. the web-based frontend is such a client, implemented as a Java servlet. (i'm not all that fond of the |
171 | | -Java language, but for web applications, servlets are a nice environment). linksd is multi-threaded so |
172 | | -it can make the best use of its host system, a 2-CPU Opteron system. originally, the graph was held entirely |
173 | | -in memory by linksd; however, not only did reading the graph from disk make startup very slow, it was so |
174 | | -large that linksd used nearly 1.5GB of memory. the current version stores the graph on disk, using Oracle |
175 | | -Berkeley DB.</p> |
176 | | - |
177 | | -</div> |
178 | | -</div> |
179 | | - |
180 | | -</a><a href="https://www.mediawiki.org/"><img |
181 | | - src="wikimedia-toolserver-button.png" style="float: right" |
182 | | - alt="Hosted by Wikimedia Toolserver" /></a> |
183 | | -<a href="http://www.sun.com/"><img |
184 | | - style="float: right" src="sun.gif" |
185 | | - alt = "Powered by Sun Microsystems" /></a> |
186 | | -<p> |
187 | | -<a href="http://tools.wikimedia.de/~river/pages/projects/six-degrees">source code</a> | |
188 | | -<a href="mailto:river@attenuate.org">send feedback...</a><br /> |
189 | | -i'm poor. if you like <i>six degrees</i>, feel free to <a href="http://www.paypal.com/" |
190 | | ->PayPal</a> some money to [<tt>river</tt> (at) <tt>attenuate</tt> (dot) <tt>org</tt>].</p> |
191 | | -<span class='version'>Front-end version: $Revision$] |
192 | | -</div> |
193 | | -</body> |
194 | | -</html> |