Index: trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/search/MultiSearcherMul.java |
— | — | @@ -15,15 +15,16 @@ |
16 | 16 | import org.apache.lucene.search.Weight; |
17 | 17 | |
18 | 18 | /** MultiSearcher that can return multiple documents in one method call */ |
19 | | -public class MultiSearcherMul extends MultiSearcherBase implements SearchableMul { |
| 19 | +public class MultiSearcherMul extends MultiSearcherBase implements |
| 20 | + SearchableMul { |
20 | 21 | protected CachedDfSource cacheSim; |
21 | 22 | |
22 | 23 | public MultiSearcherMul(SearchableMul[] searchables) throws IOException { |
23 | 24 | super(searchables); |
24 | 25 | } |
25 | | - |
| 26 | + |
26 | 27 | public Document[] docs(int[] n, FieldSelector sel) throws IOException { |
27 | | - // searchable -> doc ids |
| 28 | + // searchable -> doc ids |
28 | 29 | int[][] map = new int[searchables.length][n.length]; |
29 | 30 | // searchable -> number of doc ids |
30 | 31 | int[] count = new int[searchables.length]; |
— | — | @@ -31,94 +32,93 @@ |
32 | 33 | int[] orderSearcher = new int[n.length]; |
33 | 34 | // original index (in n) -> document within searchable |
34 | 35 | int[] orderDoc = new int[n.length]; |
35 | | - int j=0; |
36 | | - for(int i : n){ |
| 36 | + int j = 0; |
| 37 | + for (int i : n) { |
37 | 38 | int si = subSearcher(i); |
38 | 39 | int docid = i - starts[si]; // doc id on subsearcher |
39 | 40 | orderSearcher[j] = si; |
40 | 41 | orderDoc[j++] = count[si]; |
41 | | - map[si][count[si]++] = docid; |
| 42 | + map[si][count[si]++] = docid; |
42 | 43 | } |
43 | | - |
44 | | - // batch-get |
| 44 | + |
| 45 | + // batch-get |
45 | 46 | Document[][] docs = new Document[searchables.length][n.length]; |
46 | | - for(j=0;j<searchables.length;j++){ |
47 | | - if(count[j]==0) |
| 47 | + for (j = 0; j < searchables.length; j++) { |
| 48 | + if (count[j] == 0) |
48 | 49 | continue; |
49 | 50 | int[] val = new int[count[j]]; |
50 | | - System.arraycopy( map[j], 0, val, 0, count[j] ); |
51 | | - if(sel == null) |
| 51 | + System.arraycopy(map[j], 0, val, 0, count[j]); |
| 52 | + if (sel == null) |
52 | 53 | docs[j] = searchables[j].docs(val); |
53 | 54 | else |
54 | | - docs[j] = searchables[j].docs(val,sel); |
| 55 | + docs[j] = searchables[j].docs(val, sel); |
55 | 56 | } |
56 | 57 | // arrange in original order |
57 | 58 | Document[] ret = new Document[n.length]; |
58 | | - for(j=0;j<n.length;j++){ |
| 59 | + for (j = 0; j < n.length; j++) { |
59 | 60 | ret[j] = docs[orderSearcher[j]][orderDoc[j]]; |
60 | 61 | } |
61 | | - |
| 62 | + |
62 | 63 | return ret; |
63 | 64 | |
64 | | - } |
| 65 | + } |
| 66 | + |
65 | 67 | // inherit javadoc |
66 | 68 | public Document[] docs(int[] n) throws IOException { |
67 | | - return docs(n,null); |
| 69 | + return docs(n, null); |
68 | 70 | } |
69 | | - |
70 | | - /** |
71 | | - * Create weight in multiple index scenario. |
72 | | - * |
73 | | - * Distributed query processing is done in the following steps: |
74 | | - * 1. rewrite query |
75 | | - * 2. extract necessary terms |
76 | | - * 3. collect dfs for these terms from the Searchables |
77 | | - * 4. create query weight using aggregate dfs. |
78 | | - * 5. distribute that weight to Searchables |
79 | | - * 6. merge results |
80 | | - * |
81 | | - * Steps 1-4 are done here, 5+6 in the search() methods |
82 | | - * |
83 | | - * @return rewritten queries |
84 | | - */ |
85 | | - public Weight createWeight(Query original) throws IOException { |
86 | | - // step 1 |
87 | | - Query rewrittenQuery = rewrite(original); |
88 | 71 | |
89 | | - // step 2 |
90 | | - Set terms = new HashSet(); |
91 | | - rewrittenQuery.extractTerms(terms); |
| 72 | + /** |
| 73 | + * Create weight in multiple index scenario. |
| 74 | + * Distributed query processing is done in the following steps: |
| 75 | + * 1. rewrite query |
| 76 | + * 2. extract necessary terms |
| 77 | + * 3. collect dfs for these terms from the Searchables |
| 78 | + * 4. create query weight using aggregate dfs. |
| 79 | + * 5. distribute that weight to Searchables |
| 80 | + * 6. merge results |
| 81 | + * Steps 1-4 are done here, 5+6 in the search() methods |
| 82 | + * |
| 83 | + * @return rewritten queries |
| 84 | + */ |
| 85 | + public Weight createWeight(Query original) throws IOException { |
| 86 | + // step 1 |
| 87 | + Query rewrittenQuery = rewrite(original); |
92 | 88 | |
93 | | - // step3 |
94 | | - Term[] allTermsArray = new Term[terms.size()]; |
95 | | - terms.toArray(allTermsArray); |
96 | | - int[] aggregatedDfs = new int[terms.size()]; |
97 | | - for (int i = 0; i < searchables.length; i++) { |
98 | | - int[] dfs = searchables[i].docFreqs(allTermsArray); |
99 | | - for(int j=0; j<aggregatedDfs.length; j++){ |
100 | | - aggregatedDfs[j] += dfs[j]; |
101 | | - } |
102 | | - } |
| 89 | + // step 2 |
| 90 | + Set terms = new HashSet(); |
| 91 | + rewrittenQuery.extractTerms(terms); |
103 | 92 | |
104 | | - HashMap dfMap = new HashMap(); |
105 | | - for(int i=0; i<allTermsArray.length; i++) { |
106 | | - dfMap.put(allTermsArray[i], new Integer(aggregatedDfs[i])); |
107 | | - } |
108 | | - |
109 | | - // step4 |
110 | | - int numDocs = maxDoc(); |
111 | | - cacheSim = new CachedDfSource(dfMap, numDocs, getSimilarity()); |
| 93 | + // step3 |
| 94 | + Term[] allTermsArray = new Term[terms.size()]; |
| 95 | + terms.toArray(allTermsArray); |
| 96 | + int[] aggregatedDfs = new int[terms.size()]; |
| 97 | + for (int i = 0; i < searchables.length; i++) { |
| 98 | + int[] dfs = searchables[i].docFreqs(allTermsArray); |
| 99 | + for (int j = 0; j < aggregatedDfs.length; j++) { |
| 100 | + aggregatedDfs[j] += dfs[j]; |
| 101 | + } |
| 102 | + } |
112 | 103 | |
113 | | - return rewrittenQuery.weight(cacheSim); |
114 | | - } |
115 | | - |
116 | | - /** |
117 | | - * Get cached document frequencies from last query. Never use this method |
118 | | - * if single instance of multisearcher is shared between threads. |
119 | | - * |
120 | | - * @return |
121 | | - */ |
122 | | - public Searcher getLastCachedDfSource(){ |
123 | | - return cacheSim; |
124 | | - } |
| 104 | + HashMap<Term, Integer> dfMap = new HashMap<Term, Integer>(); |
| 105 | + for (int i = 0; i < allTermsArray.length; i++) { |
| 106 | + dfMap.put(allTermsArray[i], new Integer(aggregatedDfs[i])); |
| 107 | + } |
| 108 | + |
| 109 | + // step4 |
| 110 | + int numDocs = maxDoc(); |
| 111 | + cacheSim = new CachedDfSource(dfMap, numDocs, getSimilarity()); |
| 112 | + |
| 113 | + return rewrittenQuery.weight(cacheSim); |
| 114 | + } |
| 115 | + |
| 116 | + /** |
| 117 | + * Get cached document frequencies from last query. Never use this method |
| 118 | + * if single instance of multisearcher is shared between threads. |
| 119 | + * |
| 120 | + * @return |
| 121 | + */ |
| 122 | + public Searcher getLastCachedDfSource() { |
| 123 | + return cacheSim; |
| 124 | + } |
125 | 125 | } |