r109790 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r109789‎ | r109790 | r109791 >
Date:01:40, 23 January 2012
Author:oren
Status:deferred
Tags:
Comment:
added generic arguments to make hash usage type safe
Modified paths:
  • /trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/search/MultiSearcherMul.java (modified) (history)

Diff [purge]

Index: trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/search/MultiSearcherMul.java
@@ -15,15 +15,16 @@
1616 import org.apache.lucene.search.Weight;
1717
1818 /** MultiSearcher that can return multiple documents in one method call */
19 -public class MultiSearcherMul extends MultiSearcherBase implements SearchableMul {
 19+public class MultiSearcherMul extends MultiSearcherBase implements
 20+ SearchableMul {
2021 protected CachedDfSource cacheSim;
2122
2223 public MultiSearcherMul(SearchableMul[] searchables) throws IOException {
2324 super(searchables);
2425 }
25 -
 26+
2627 public Document[] docs(int[] n, FieldSelector sel) throws IOException {
27 - // searchable -> doc ids
 28+ // searchable -> doc ids
2829 int[][] map = new int[searchables.length][n.length];
2930 // searchable -> number of doc ids
3031 int[] count = new int[searchables.length];
@@ -31,94 +32,93 @@
3233 int[] orderSearcher = new int[n.length];
3334 // original index (in n) -> document within searchable
3435 int[] orderDoc = new int[n.length];
35 - int j=0;
36 - for(int i : n){
 36+ int j = 0;
 37+ for (int i : n) {
3738 int si = subSearcher(i);
3839 int docid = i - starts[si]; // doc id on subsearcher
3940 orderSearcher[j] = si;
4041 orderDoc[j++] = count[si];
41 - map[si][count[si]++] = docid;
 42+ map[si][count[si]++] = docid;
4243 }
43 -
44 - // batch-get
 44+
 45+ // batch-get
4546 Document[][] docs = new Document[searchables.length][n.length];
46 - for(j=0;j<searchables.length;j++){
47 - if(count[j]==0)
 47+ for (j = 0; j < searchables.length; j++) {
 48+ if (count[j] == 0)
4849 continue;
4950 int[] val = new int[count[j]];
50 - System.arraycopy( map[j], 0, val, 0, count[j] );
51 - if(sel == null)
 51+ System.arraycopy(map[j], 0, val, 0, count[j]);
 52+ if (sel == null)
5253 docs[j] = searchables[j].docs(val);
5354 else
54 - docs[j] = searchables[j].docs(val,sel);
 55+ docs[j] = searchables[j].docs(val, sel);
5556 }
5657 // arrange in original order
5758 Document[] ret = new Document[n.length];
58 - for(j=0;j<n.length;j++){
 59+ for (j = 0; j < n.length; j++) {
5960 ret[j] = docs[orderSearcher[j]][orderDoc[j]];
6061 }
61 -
 62+
6263 return ret;
6364
64 - }
 65+ }
 66+
6567 // inherit javadoc
6668 public Document[] docs(int[] n) throws IOException {
67 - return docs(n,null);
 69+ return docs(n, null);
6870 }
69 -
70 - /**
71 - * Create weight in multiple index scenario.
72 - *
73 - * Distributed query processing is done in the following steps:
74 - * 1. rewrite query
75 - * 2. extract necessary terms
76 - * 3. collect dfs for these terms from the Searchables
77 - * 4. create query weight using aggregate dfs.
78 - * 5. distribute that weight to Searchables
79 - * 6. merge results
80 - *
81 - * Steps 1-4 are done here, 5+6 in the search() methods
82 - *
83 - * @return rewritten queries
84 - */
85 - public Weight createWeight(Query original) throws IOException {
86 - // step 1
87 - Query rewrittenQuery = rewrite(original);
8871
89 - // step 2
90 - Set terms = new HashSet();
91 - rewrittenQuery.extractTerms(terms);
 72+ /**
 73+ * Create weight in multiple index scenario.
 74+ * Distributed query processing is done in the following steps:
 75+ * 1. rewrite query
 76+ * 2. extract necessary terms
 77+ * 3. collect dfs for these terms from the Searchables
 78+ * 4. create query weight using aggregate dfs.
 79+ * 5. distribute that weight to Searchables
 80+ * 6. merge results
 81+ * Steps 1-4 are done here, 5+6 in the search() methods
 82+ *
 83+ * @return rewritten queries
 84+ */
 85+ public Weight createWeight(Query original) throws IOException {
 86+ // step 1
 87+ Query rewrittenQuery = rewrite(original);
9288
93 - // step3
94 - Term[] allTermsArray = new Term[terms.size()];
95 - terms.toArray(allTermsArray);
96 - int[] aggregatedDfs = new int[terms.size()];
97 - for (int i = 0; i < searchables.length; i++) {
98 - int[] dfs = searchables[i].docFreqs(allTermsArray);
99 - for(int j=0; j<aggregatedDfs.length; j++){
100 - aggregatedDfs[j] += dfs[j];
101 - }
102 - }
 89+ // step 2
 90+ Set terms = new HashSet();
 91+ rewrittenQuery.extractTerms(terms);
10392
104 - HashMap dfMap = new HashMap();
105 - for(int i=0; i<allTermsArray.length; i++) {
106 - dfMap.put(allTermsArray[i], new Integer(aggregatedDfs[i]));
107 - }
108 -
109 - // step4
110 - int numDocs = maxDoc();
111 - cacheSim = new CachedDfSource(dfMap, numDocs, getSimilarity());
 93+ // step3
 94+ Term[] allTermsArray = new Term[terms.size()];
 95+ terms.toArray(allTermsArray);
 96+ int[] aggregatedDfs = new int[terms.size()];
 97+ for (int i = 0; i < searchables.length; i++) {
 98+ int[] dfs = searchables[i].docFreqs(allTermsArray);
 99+ for (int j = 0; j < aggregatedDfs.length; j++) {
 100+ aggregatedDfs[j] += dfs[j];
 101+ }
 102+ }
112103
113 - return rewrittenQuery.weight(cacheSim);
114 - }
115 -
116 - /**
117 - * Get cached document frequencies from last query. Never use this method
118 - * if single instance of multisearcher is shared between threads.
119 - *
120 - * @return
121 - */
122 - public Searcher getLastCachedDfSource(){
123 - return cacheSim;
124 - }
 104+ HashMap<Term, Integer> dfMap = new HashMap<Term, Integer>();
 105+ for (int i = 0; i < allTermsArray.length; i++) {
 106+ dfMap.put(allTermsArray[i], new Integer(aggregatedDfs[i]));
 107+ }
 108+
 109+ // step4
 110+ int numDocs = maxDoc();
 111+ cacheSim = new CachedDfSource(dfMap, numDocs, getSimilarity());
 112+
 113+ return rewrittenQuery.weight(cacheSim);
 114+ }
 115+
 116+ /**
 117+ * Get cached document frequencies from last query. Never use this method
 118+ * if single instance of multisearcher is shared between threads.
 119+ *
 120+ * @return
 121+ */
 122+ public Searcher getLastCachedDfSource() {
 123+ return cacheSim;
 124+ }
125125 }

Status & tagging log