r109790 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r109789‎ \| r109790 \| r109791 >
Date:	01:40, 23 January 2012
Author:	oren
Status:	deferred
Tags:
Comment:	added generic arguments to make hash usage type safe
Modified paths:	/trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/search/MultiSearcherMul.java (modified) (history)

Diff [purge]

Index: trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/search/MultiSearcherMul.java
—	—	@@ -15,15 +15,16 @@
16	16	import org.apache.lucene.search.Weight;
17	17
18	18	/** MultiSearcher that can return multiple documents in one method call */
19		~~-public class MultiSearcherMul extends MultiSearcherBase implements SearchableMul {~~
	19	+public class MultiSearcherMul extends MultiSearcherBase implements
	20	+ SearchableMul {
20	21	protected CachedDfSource cacheSim;
21	22
22	23	public MultiSearcherMul(SearchableMul[] searchables) throws IOException {
23	24	super(searchables);
24	25	}
25		-
	26	+
26	27	public Document[] docs(int[] n, FieldSelector sel) throws IOException {
27		~~- // searchable -> doc ids~~
	28	+ // searchable -> doc ids
28	29	int[][] map = new int[searchables.length][n.length];
29	30	// searchable -> number of doc ids
30	31	int[] count = new int[searchables.length];
—	—	@@ -31,94 +32,93 @@
32	33	int[] orderSearcher = new int[n.length];
33	34	// original index (in n) -> document within searchable
34	35	int[] orderDoc = new int[n.length];
35		~~- int j=0;~~
36		~~- for(int i : n){~~
	36	+ int j = 0;
	37	+ for (int i : n) {
37	38	int si = subSearcher(i);
38	39	int docid = i - starts[si]; // doc id on subsearcher
39	40	orderSearcher[j] = si;
40	41	orderDoc[j++] = count[si];
41		~~- map[si][count[si]++] = docid;~~
	42	+ map[si][count[si]++] = docid;
42	43	}
43		-
44		~~- // batch-get~~
	44	+
	45	+ // batch-get
45	46	Document[][] docs = new Document[searchables.length][n.length];
46		~~- for(j=0;j<searchables.length;j++){~~
47		~~- if(count[j]==0)~~
	47	+ for (j = 0; j < searchables.length; j++) {
	48	+ if (count[j] == 0)
48	49	continue;
49	50	int[] val = new int[count[j]];
50		~~- System.arraycopy( map[j], 0, val, 0, count[j] );~~
51		~~- if(sel == null)~~
	51	+ System.arraycopy(map[j], 0, val, 0, count[j]);
	52	+ if (sel == null)
52	53	docs[j] = searchables[j].docs(val);
53	54	else
54		~~- docs[j] = searchables[j].docs(val,sel);~~
	55	+ docs[j] = searchables[j].docs(val, sel);
55	56	}
56	57	// arrange in original order
57	58	Document[] ret = new Document[n.length];
58		~~- for(j=0;j<n.length;j++){~~
	59	+ for (j = 0; j < n.length; j++) {
59	60	ret[j] = docs[orderSearcher[j]][orderDoc[j]];
60	61	}
61		-
	62	+
62	63	return ret;
63	64
64		~~- }~~
	65	+ }
	66	+
65	67	// inherit javadoc
66	68	public Document[] docs(int[] n) throws IOException {
67		~~- return docs(n,null);~~
	69	+ return docs(n, null);
68	70	}
69		-
70		- /**
71		~~- * Create weight in multiple index scenario.~~
72		- *
73		~~- * Distributed query processing is done in the following steps:~~
74		~~- * 1. rewrite query~~
75		~~- * 2. extract necessary terms~~
76		~~- * 3. collect dfs for these terms from the Searchables~~
77		~~- * 4. create query weight using aggregate dfs.~~
78		~~- * 5. distribute that weight to Searchables~~
79		~~- * 6. merge results~~
80		- *
81		~~- * Steps 1-4 are done here, 5+6 in the search() methods~~
82		- *
83		~~- * @return rewritten queries~~
84		~~- */~~
85		~~- public Weight createWeight(Query original) throws IOException {~~
86		~~- // step 1~~
87		~~- Query rewrittenQuery = rewrite(original);~~
88	71
89		~~- // step 2~~
90		~~- Set terms = new HashSet();~~
91		~~- rewrittenQuery.extractTerms(terms);~~
	72	+ /**
	73	+ * Create weight in multiple index scenario.
	74	+ * Distributed query processing is done in the following steps:
	75	+ * 1. rewrite query
	76	+ * 2. extract necessary terms
	77	+ * 3. collect dfs for these terms from the Searchables
	78	+ * 4. create query weight using aggregate dfs.
	79	+ * 5. distribute that weight to Searchables
	80	+ * 6. merge results
	81	+ * Steps 1-4 are done here, 5+6 in the search() methods
	82	+ *
	83	+ * @return rewritten queries
	84	+ */
	85	+ public Weight createWeight(Query original) throws IOException {
	86	+ // step 1
	87	+ Query rewrittenQuery = rewrite(original);
92	88
93		~~- // step3~~
94		~~- Term[] allTermsArray = new Term[terms.size()];~~
95		~~- terms.toArray(allTermsArray);~~
96		~~- int[] aggregatedDfs = new int[terms.size()];~~
97		~~- for (int i = 0; i < searchables.length; i++) {~~
98		~~- int[] dfs = searchables[i].docFreqs(allTermsArray);~~
99		~~- for(int j=0; j<aggregatedDfs.length; j++){~~
100		~~- aggregatedDfs[j] += dfs[j];~~
101		~~- }~~
102		~~- }~~
	89	+ // step 2
	90	+ Set terms = new HashSet();
	91	+ rewrittenQuery.extractTerms(terms);
103	92
104		~~- HashMap dfMap = new HashMap();~~
105		~~- for(int i=0; i<allTermsArray.length; i++) {~~
106		~~- dfMap.put(allTermsArray[i], new Integer(aggregatedDfs[i]));~~
107		~~- }~~
108		-
109		~~- // step4~~
110		~~- int numDocs = maxDoc();~~
111		~~- cacheSim = new CachedDfSource(dfMap, numDocs, getSimilarity());~~
	93	+ // step3
	94	+ Term[] allTermsArray = new Term[terms.size()];
	95	+ terms.toArray(allTermsArray);
	96	+ int[] aggregatedDfs = new int[terms.size()];
	97	+ for (int i = 0; i < searchables.length; i++) {
	98	+ int[] dfs = searchables[i].docFreqs(allTermsArray);
	99	+ for (int j = 0; j < aggregatedDfs.length; j++) {
	100	+ aggregatedDfs[j] += dfs[j];
	101	+ }
	102	+ }
112	103
113		~~- return rewrittenQuery.weight(cacheSim);~~
114		~~- }~~
115		-
116		- /**
117		~~- * Get cached document frequencies from last query. Never use this method~~
118		~~- * if single instance of multisearcher is shared between threads.~~
119		- *
120		~~- * @return~~
121		~~- */~~
122		~~- public Searcher getLastCachedDfSource(){~~
123		~~- return cacheSim;~~
124		~~- }~~
	104	+ HashMap<Term, Integer> dfMap = new HashMap<Term, Integer>();
	105	+ for (int i = 0; i < allTermsArray.length; i++) {
	106	+ dfMap.put(allTermsArray[i], new Integer(aggregatedDfs[i]));
	107	+ }
	108	+
	109	+ // step4
	110	+ int numDocs = maxDoc();
	111	+ cacheSim = new CachedDfSource(dfMap, numDocs, getSimilarity());
	112	+
	113	+ return rewrittenQuery.weight(cacheSim);
	114	+ }
	115	+
	116	+ /**
	117	+ * Get cached document frequencies from last query. Never use this method
	118	+ * if single instance of multisearcher is shared between threads.
	119	+ *
	120	+ * @return
	121	+ */
	122	+ public Searcher getLastCachedDfSource() {
	123	+ return cacheSim;
	124	+ }
125	125	}

Status & tagging log

15:16, 23 January 2012 Reedy (talk | contribs) changed the status of r109790 [removed: new added: deferred]