Index: trunk/lucene-search-2/src/org/wikimedia/lsearch/spell/Suggest.java |
— | — | @@ -3,52 +3,49 @@ |
4 | 4 | import java.io.IOException; |
5 | 5 | import java.io.Serializable; |
6 | 6 | import java.util.ArrayList; |
7 | | -import java.util.Collection; |
8 | 7 | import java.util.Collections; |
9 | 8 | import java.util.Comparator; |
10 | 9 | import java.util.HashMap; |
11 | 10 | import java.util.HashSet; |
12 | | -import java.util.Iterator; |
13 | 11 | import java.util.Map; |
| 12 | +import java.util.Map.Entry; |
14 | 13 | import java.util.Set; |
15 | 14 | import java.util.WeakHashMap; |
16 | | -import java.util.Map.Entry; |
17 | 15 | |
18 | 16 | import org.apache.log4j.Logger; |
19 | | -import org.apache.lucene.analysis.Analyzer; |
20 | 17 | import org.apache.lucene.analysis.Token; |
21 | | -import org.apache.lucene.analysis.TokenStream; |
22 | 18 | import org.apache.lucene.document.Document; |
23 | 19 | import org.apache.lucene.index.IndexReader; |
24 | 20 | import org.apache.lucene.index.Term; |
25 | 21 | import org.apache.lucene.index.TermDocs; |
26 | 22 | import org.apache.lucene.search.BooleanClause; |
27 | 23 | import org.apache.lucene.search.BooleanQuery; |
28 | | -import org.apache.lucene.search.Hits; |
29 | 24 | import org.apache.lucene.search.IndexSearcher; |
30 | 25 | import org.apache.lucene.search.Query; |
31 | 26 | import org.apache.lucene.search.ScoreDoc; |
32 | 27 | import org.apache.lucene.search.TermQuery; |
33 | 28 | import org.apache.lucene.search.TopDocs; |
34 | | -import org.wikimedia.lsearch.analyzers.Analyzers; |
35 | 29 | import org.wikimedia.lsearch.analyzers.FastWikiTokenizerEngine; |
36 | | -import org.wikimedia.lsearch.analyzers.FieldNameFactory; |
37 | 30 | import org.wikimedia.lsearch.analyzers.FilterFactory; |
38 | | -import org.wikimedia.lsearch.beans.ResultSet; |
39 | | -import org.wikimedia.lsearch.beans.SearchResults; |
40 | 31 | import org.wikimedia.lsearch.config.GlobalConfiguration; |
41 | 32 | import org.wikimedia.lsearch.config.IndexId; |
42 | | -import org.wikimedia.lsearch.ranks.ObjectCache; |
43 | 33 | import org.wikimedia.lsearch.ranks.StringList; |
44 | 34 | import org.wikimedia.lsearch.ranks.StringList.LookupSet; |
45 | 35 | import org.wikimedia.lsearch.search.NamespaceFilter; |
46 | | -import org.wikimedia.lsearch.search.FilterWrapper; |
47 | 36 | import org.wikimedia.lsearch.search.SearcherCache; |
48 | 37 | import org.wikimedia.lsearch.spell.api.NgramIndexer; |
49 | 38 | import org.wikimedia.lsearch.spell.dist.DoubleMetaphone; |
50 | 39 | import org.wikimedia.lsearch.spell.dist.EditDistance; |
51 | 40 | |
52 | | -public class Suggest { |
| 41 | + |
| 42 | +/** |
| 43 | + * The Suggest componnet of spell checking |
| 44 | + * |
| 45 | + * @author rainman |
| 46 | + * |
| 47 | + */ |
| 48 | +public class Suggest |
| 49 | +{ |
53 | 50 | static Logger log = Logger.getLogger(Suggest.class); |
54 | 51 | protected static GlobalConfiguration global=null; |
55 | 52 | protected IndexId iid; |
— | — | @@ -74,6 +71,7 @@ |
75 | 72 | public Metric(String word){ |
76 | 73 | this(word,true); |
77 | 74 | } |
| 75 | + |
78 | 76 | public Metric(String word, boolean useMetaphones){ |
79 | 77 | this.word = word; |
80 | 78 | this.decomposed = FastWikiTokenizerEngine.decompose(word); |
— | — | @@ -86,17 +84,21 @@ |
87 | 85 | sdmeta2 = new EditDistance(meta2,false); |
88 | 86 | } |
89 | 87 | } |
| 88 | + |
90 | 89 | public boolean hasDecomposed(){ |
91 | 90 | return decomposed != word; // equals() not necessary since decompose() returns same object |
92 | 91 | } |
| 92 | + |
93 | 93 | /** Edit distance */ |
94 | 94 | public int distanceWithDecomposition(String w){ |
95 | 95 | return sd.getDistance(FastWikiTokenizerEngine.decompose(w)); |
96 | 96 | } |
| 97 | + |
97 | 98 | /** Get distance when input words is already decomposed */ |
98 | 99 | public int distance(String w){ |
99 | 100 | return sd.getDistance(w); |
100 | 101 | } |
| 102 | + |
101 | 103 | /* Edit distance to decomposed word (input word is also decomposed) */ |
102 | 104 | /*public int decomposedDistance(String w){ |
103 | 105 | return sdd.getDistance(FastWikiTokenizerEngine.decompose(w)); |
— | — | @@ -187,6 +189,14 @@ |
188 | 190 | this(iid,null,true); |
189 | 191 | } |
190 | 192 | |
| 193 | + /** |
| 194 | + * constructor |
| 195 | + * |
| 196 | + * @param iid |
| 197 | + * @param searcher |
| 198 | + * @param useLogging |
| 199 | + * @throws IOException |
| 200 | + */ |
191 | 201 | public Suggest(IndexId iid, IndexSearcher searcher, boolean useLogging) throws IOException{ |
192 | 202 | SearcherCache cache = SearcherCache.getInstance(); |
193 | 203 | this.iid = iid; |
— | — | @@ -250,6 +260,8 @@ |
251 | 261 | * |
252 | 262 | */ |
253 | 263 | public static class ExtraInfo implements Serializable { |
| 264 | + |
| 265 | + private static final long serialVersionUID = 1L; |
254 | 266 | protected HashSet<String> phrases; |
255 | 267 | protected HashSet<String> foundInContext; |
256 | 268 | protected HashSet<String> foundInTitles; |
— | — | @@ -790,7 +802,6 @@ |
791 | 803 | return true; |
792 | 804 | } |
793 | 805 | |
794 | | - @SuppressWarnings("unchecked") |
795 | 806 | private HashSet<String> getContext(String w, LookupSet allWords, Namespaces ns) throws IOException{ |
796 | 807 | if(ns == null || ns.additional){ // no context for nondefault namespaces |
797 | 808 | TermDocs td = reader.termDocs(new Term("context_key",w)); |