r20949 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r20948‎ | r20949 | r20950 >
Date:18:48, 2 April 2007
Author:river
Status:old
Tags:
Comment:
need a redirecter from index.jsp
Modified paths:
  • /trunk/sixdeg/webapp/Makefile (modified) (history)
  • /trunk/sixdeg/webapp/sixdeg/PathFinder.java (modified) (history)
  • /trunk/sixdeg/webapp/src/index.jsp (modified) (history)

Diff [purge]

Index: trunk/sixdeg/webapp/Makefile
@@ -16,6 +16,7 @@
1717 src/path.jsp \
1818 src/input.jsp \
1919 src/error.jsp \
 20+ src/index.jsp \
2021 src/main.css \
2122 src/6deg.png \
2223 src/WEB-INF/web.xml \
Index: trunk/sixdeg/webapp/sixdeg/PathFinder.java
@@ -10,6 +10,7 @@
1111 String url;
1212 String code;
1313 String database;
 14+ String basename;
1415
1516 public String getDatabase() {
1617 return database;
@@ -23,10 +24,15 @@
2425 return code;
2526 }
2627
 28+ public String getBasename() {
 29+ return basename;
 30+ }
 31+
2732 public Wiki(String db) {
2833 this.database = db;
2934 this.code = db.substring(0, db.length() - 6);
3035 this.url = "http://" + code + ".wikipedia.org/";
 36+ this.basename = code + ".wikipedia.org";
3137 }
3238 }
3339
@@ -54,7 +60,7 @@
5561 public Map<String, String> getWikimap() {
5662 Map<String, String> m = new TreeMap<String, String>();
5763 for (Wiki w : wikis)
58 - m.put(w.getDatabase(), w.getUrl());
 64+ m.put(w.getDatabase(), w.getBasename());
5965
6066 return m;
6167 }
Index: trunk/sixdeg/webapp/src/index.jsp
@@ -1,194 +1,23 @@
2 -<%--
 2+<%-- vim:et sw=2 ts=2:
33 Six degrees of Wikipedia: JSP front-end.
44 This source code is released into the public domain.
55
66 From: @(#)index.jsp 1.19 06/10/16 01:17:11
77 $Id$
88 --%>
9 -<?xml version="1.0" encoding="UTF-8"?>
10 -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
119 <%@ page language="java" contentType="text/html; charset=UTF-8"%>
1210 <%@ taglib prefix="c" uri="http://java.sun.com/jsp/jstl/core" %>
13 -<%@ taglib prefix="fn" uri="http://java.sun.com/jsp/jstl/functions" %>
14 -<%@ taglib prefix="s" uri="/struts-tags" %>
1511 <%
16 -org.wikimedia.links.linksc.PathEntry[] path = null;
17 -String error = null;
18 -String from = request.getParameter("from"), to = request.getParameter("to");
19 -org.wikimedia.links.linksc lc = new org.wikimedia.links.linksc();
 12+String newURL = "pathfinder.action";
 13+String from = request.getParameter("from");
 14+String to = request.getParameter("to");
 15+String ign = request.getParameter("ign_dates");
2016
21 -if (from != null)
22 - from = from.trim();
23 -if (to != null)
24 - to = to.trim();
 17+if (from != null && to != null)
 18+ newURL = newURL + "?from=" + java.net.URLEncoder.encode(from, "UTF-8")
 19+ + "&to=" + java.net.URLEncoder.encode(to, "UTF-8");
 20+if (ign != null)
 21+ newURL = newURL + "&ign_dates=" + java.net.URLEncoder.encode(ign, "UTF-8");
2522
26 -String enc = request.getCharacterEncoding();
27 -if (enc == null) enc = "<undefined>";
28 -pageContext.setAttribute("encoding", enc);
29 -
30 -if (request.getCharacterEncoding() == null) {
31 - if (from != null) from = new String(from.getBytes("ISO-8859-1"), "UTF-8");
32 - if (to != null) to = new String(to.getBytes("ISO-8859-1"), "UTF-8");
33 -}
34 -
35 -boolean ign_date = false;
36 -if (from != null && from.length() > 0 && to != null && to.length() > 0) {
37 - String idp = request.getParameter("ign_dates");
38 - ign_date = idp != null && idp.equals("1");
39 - String rfrom = from.substring(0, 1).toUpperCase() + from.substring(1, from.length());
40 - String rto = to.substring(0, 1).toUpperCase() + to.substring(1, to.length());
41 - try {
42 - path = lc.findPath(rfrom.replaceAll(" ", "_"), rto.replaceAll(" ", "_"), ign_date);
43 - } catch (org.wikimedia.links.ErrorException e) {
44 - error = e.geterror();
45 - }
46 -}
47 -if (path != null && path.length == 0)
48 - error = "No route found after 10 degrees.";
49 -
50 -pageContext.setAttribute("error", error);
51 -pageContext.setAttribute("path", path);
52 -pageContext.setAttribute("from", from);
53 -pageContext.setAttribute("to", to);
54 -
55 -if (path != null) {
56 - pageContext.setAttribute("len", Integer.valueOf(path.length - 1));
57 -} else {
58 - pageContext.setAttribute("len", Integer.valueOf(0));
59 -}
 23+response.sendRedirect(newURL);
6024 %>
61 -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
62 -<head>
63 -<title>Six degrees of Wikipedia</title>
64 -<meta name="robots" content="index" />
65 -<meta name="robots" content="follow" />
66 -<link rel="stylesheet" href="main.css" />
67 -</head>
68 -<body>
69 -<div style="text-align: right; padding: 0px; margin: 0px"><img src="6deg.png" alt="" /><br/></div>
70 -<div style="text-align: center">
71 -<i>
72 -a <a href="http://en.wikipedia.org/wiki/Shortest_path">shortest path</a>
73 -query solver for the English
74 -<a href="http://en.wikipedia.org/wiki/Main_Page">Wikipedia</a>...</i><br/>
75 -<i>six degrees</i> finds the shortest path between any two Wikipedia articles in the
76 -main namespace using wiki links
77 -</div>
78 -
79 -<div style="padding-top: 35px;">
80 -<form method="get" action="index.jsp" accept-charset="UTF-8">
81 -<center>
82 -<strong>find path...</strong>
83 -from: <input type="text" name="from" value="<c:out value='${fn:replace(from, "_", " ")}'/>"/>
84 -to: <input type="text" name="to" value="<c:out value='${fn:replace(to, "_", " ")}'/>" />
85 -<input type="submit" value="go" />
86 -<br />
87 -<input type="checkbox" name="ign_dates" value="1"
88 -<% if (ign_date) { %>
89 -checked="checked"
90 -<% } %>
91 -/> ignore date and year articles
92 -</center>
93 -</form>
94 -
95 -<% if (error != null) { %>
96 -<center><div class='error'><span class='error'>error:</span><span class='errtext'><c:out value="${error}" /></span></div></center>
97 -<% if (error.equals("No route found after 10 degrees.")) { %>
98 -<p class='return'><a
99 -href="index.jsp?from=<c:out value='${fn:replace(to, " ", "_")}'/>&amp;to=<c:out value='${fn:replace(from, " ", "_")}'/>"
100 ->Try in the other direction?</a></p>
101 -<% } %>
102 -
103 -<%--
104 - Print path...
105 -<% } else if (path != null) { %>
106 -<div class='result'>
107 -<div class='answer'><c:out value="${len}"/> degrees of separation</div>
108 -
109 -<c:forEach items="${path}" var="hop">
110 - <span class="art"><a
111 - href="http://en.wikipedia.org/wiki/<c:out value="${hop.article}"/>?oldid=<c:out value="${hop.id}" />"
112 - ><c:out value='${fn:replace(hop.article, "_", " ")}'/></a></span>
113 - <br/>
114 - <span class="context">
115 - <c:out value='${hop.context}' />
116 - </span>
117 - </br>
118 -</c:forEach>
119 -
120 -</div>
121 -<p class='return'><a href="index.jsp?from=<c:out value='${fn:replace(to, " ", "_")}'/>&amp;to=<c:out value='${fn:replace(from, " ", "_")}'/>">View the return path?</a></p>
122 -<% } %>
123 -<div style="width: 50%; margin-left: auto; margin-right: auto; border-top: solid 1px black; margin-top: 3em">
124 -<div style="text-align: center">
125 -<strong>hints:</strong>
126 -</div>
127 -<ul>
128 -<li><strong>it says my article doesn't exist?</strong> - this is usually caused by the article being created later the last database update (often several months ago). alternatively, check your capitalisation.</li>
129 -<li>redirects ("aliases" from one name to another article) are searched as well as articles</li>
130 -<li>using a <strong>redirect as the target</strong> will generally produce an inferior result, because articles are not meant to link to redirects. (be careful: "United Kingdom" is not a redirect, but "United kingdom"&mdash;with a lowercase "k"&mdash;is)</li>
131 -<li>article names are <strong>case sensitive</strong> except for the first letter, which is always capital</li>
132 -<li>please <strong>do</strong> report any problems with six degrees to me
133 -[<tt>river</tt> (at) <tt>attenuate</tt> (dot) <tt>org</tt>].</li>
134 -<li>six degrees was recently <a href="http://tools.wikimedia.de/~river/pages/six-degrees-ct">mentioned</a> in the
135 - German computer magazine <i>c't</i>. fame! who'd've thought it ;-)</li>
136 -<li><strong>ignore date and year articles</strong>: at the moment, this only ignores articles like "March 25" and "1995".
137 -in the future, i might add other articles which summarise years, such as "2005 in music".</li>
138 -</ul>
139 -
140 -<div style="border-top: solid 1px black; margin-top: 3em">
141 -<p><strong>six degrees</strong> finds the shortest path from one article to another, using wiki links.
142 -the <em>shortest path</em> is a problem in computer science: given a list of nodes (articles) and links between them,
143 -find a route from one node to another such that no shorter route exists. it's important to realise that there's no
144 -<em>single</em> shortest path; there may be many routes from one article to another which all traverse four articles,
145 -for example. six degrees will find one such route.</p>
146 -
147 -<p>six degrees works on a copy of the English Wikipedia database, and is not updated in real time. this means
148 -that if someone adds or removes a link on Wikipedia, the change will not be reflected in six degrees until the next
149 -database update. for various reasons, these updates are currently very infrequent, so the path you see here
150 -may be several months out of date. the links in the result will take you to the text of the article as
151 -six degrees saw it, not the current version.</p>
152 -
153 -<p>six degrees will try to show an excerpt of about 100 characters from the article, where the link to the next
154 -article occurred, so you can see the context of the link. in some cases this may not be possible: the link
155 -might be part of a template, or it might be using a form of wiki markup that six degrees doesn't understand.</p>
156 -
157 -<p>six degrees takes its name from <a href="http://en.wikipedia.org/wiki/Six_degrees_of_separation">six degrees
158 -of separation</a>, the theory that everyone is the world is connected by no more than six degrees. however,
159 -this only goes as far as the name. paths between articles have been found which are nine, ten or more degrees.</p>
160 -
161 -<p>six degrees was written by River Tarnell, [<tt>river</tt> (at) <tt>attenuate</tt> (dot) <tt>org</tt>], and
162 -incorporates suggestions and bug fixes from numerous users. please feel free to contact me if you have any
163 -suggestions for six degrees, or if you've found something that doesn't work right (i try to reply to all my
164 -mail, but i may take a long time to get back to you).</p>
165 -
166 -<p><strong>technical details</strong>: the core of six degrees is a breadth-first search implemented in 45
167 -lines of C++ code. including all of the support infrastructure, six degrees is about 2,000 lines of C++
168 -and 500 lines of Java. because the graph being searched is so large, the backend runs as a server process
169 -called <em>linksd</em>. linksd accepts network connections from clients, calculates the path, and returns
170 -it. the web-based frontend is such a client, implemented as a Java servlet. (i'm not all that fond of the
171 -Java language, but for web applications, servlets are a nice environment). linksd is multi-threaded so
172 -it can make the best use of its host system, a 2-CPU Opteron system. originally, the graph was held entirely
173 -in memory by linksd; however, not only did reading the graph from disk make startup very slow, it was so
174 -large that linksd used nearly 1.5GB of memory. the current version stores the graph on disk, using Oracle
175 -Berkeley DB.</p>
176 -
177 -</div>
178 -</div>
179 -
180 -</a><a href="https://www.mediawiki.org/"><img
181 - src="wikimedia-toolserver-button.png" style="float: right"
182 - alt="Hosted by Wikimedia Toolserver" /></a>
183 -<a href="http://www.sun.com/"><img
184 - style="float: right" src="sun.gif"
185 - alt = "Powered by Sun Microsystems" /></a>
186 -<p>
187 -<a href="http://tools.wikimedia.de/~river/pages/projects/six-degrees">source code</a> |
188 -<a href="mailto:river@attenuate.org">send feedback...</a><br />
189 -i'm poor. if you like <i>six degrees</i>, feel free to <a href="http://www.paypal.com/"
190 ->PayPal</a> some money to [<tt>river</tt> (at) <tt>attenuate</tt> (dot) <tt>org</tt>].</p>
191 -<span class='version'>Front-end version: $Revision$]
192 -</div>
193 -</body>
194 -</html>