r22920 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r22919‎ \| r22920 \| r22921 >
Date:	00:00, 12 June 2007
Author:	rainman
Status:	old
Tags:
Comment:	New: split index by any combination of namespaces. Added sql that I forgot to commit last time.
Modified paths:	/trunk/lucene-search-2.0/lsearch-global.conf (modified) (history) /trunk/lucene-search-2.0/sql (added) (history) /trunk/lucene-search-2.0/sql/references_table.sql (added) (history) /trunk/lucene-search-2.0/src/org/wikimedia/lsearch/analyzers/FastWikiTokenizerEngine.java (modified) (history) /trunk/lucene-search-2.0/src/org/wikimedia/lsearch/config/GlobalConfiguration.java (modified) (history) /trunk/lucene-search-2.0/src/org/wikimedia/lsearch/config/IndexId.java (modified) (history) /trunk/lucene-search-2.0/src/org/wikimedia/lsearch/importer/Importer.java (modified) (history) /trunk/lucene-search-2.0/src/org/wikimedia/lsearch/importer/SimpleIndexWriter.java (modified) (history) /trunk/lucene-search-2.0/src/org/wikimedia/lsearch/index/IndexThread.java (modified) (history) /trunk/lucene-search-2.0/src/org/wikimedia/lsearch/index/WikiSimilarity.java (modified) (history) /trunk/lucene-search-2.0/src/org/wikimedia/lsearch/search/SearchEngine.java (modified) (history) /trunk/lucene-search-2.0/src/org/wikimedia/lsearch/search/WikiSearcher.java (modified) (history) /trunk/lucene-search-2.0/src/org/wikimedia/lsearch/test/FastWikiTokenizerTest.java (modified) (history) /trunk/lucene-search-2.0/src/org/wikimedia/lsearch/test/GlobalConfigurationTest.java (modified) (history) /trunk/lucene-search-2.0/src/org/wikimedia/lsearch/util/Localization.java (modified) (history) /trunk/lucene-search-2.0/test-data/mwsearch-global.test (modified) (history)

Diff [purge]

Index: trunk/lucene-search-2.0/test-data/mwsearch-global.test
—	—	@@ -13,6 +13,7 @@
14	14	detest,rutest : (single,true,2,10)
15	15	frtest : (split,3) (part1) (part2) (part3)
16	16	srwiki : (single)
	17	+njawiki : (nssplit,3) (nspart1,[0,1],false,5) (nspart2,[12,13,14,15]) (nspart3,[])
17	18
18	19	# Search nodes
19	20	# host : db1.role, db2.role
—	—	@@ -35,7 +36,7 @@
36	37	192.168.0.5 : detest, rutest, frtest
37	38	192.168.0.2 : entest.ngram
38	39	192.168.0.2 : frtest.part1, frtest.part2, frtest.part3
39		~~-192.168.0.10 : srwiki~~
	40	+192.168.0.10 : srwiki njawiki
40	41
41	42	# Path where indexes are on hosts, after default value put hosts where
42	43	# the location differs
Index: trunk/lucene-search-2.0/src/org/wikimedia/lsearch/test/GlobalConfigurationTest.java
—	—	@@ -106,6 +106,10 @@
107	107	assertNotNull(splitroles.get("part2"));
108	108	assertNotNull(splitroles.get("part3"));
109	109
	110	+ Hashtable nspart1 = (Hashtable) ((Hashtable) database.get("njawiki")).get("nspart1");
	111	+ assertEquals("false",nspart1.get("optimize"));
	112	+ assertEquals("5",nspart1.get("mergeFactor"));
	113	+
110	114	// search
111	115	Hashtable search = testgc.getSearch();
112	116	ArrayList sr = (ArrayList) search.get("192.168.0.2");
—	—	@@ -170,6 +174,8 @@
171	175	assertTrue(testgc.useKeywordScoring("rutest"));
172	176
173	177
	178	+
	179	+
174	180	} catch (MalformedURLException e) {
175	181	e.printStackTrace();
176	182	} catch (IOException e) {
—	—	@@ -223,6 +229,22 @@
224	230	IndexId detest = IndexId.get("detest");
225	231	assertFalse(detest.isLogical());
226	232
	233	+ // check nssplit
	234	+ IndexId njawiki = IndexId.get("njawiki");
	235	+ assertTrue(njawiki.isLogical());
	236	+ assertFalse(njawiki.isSplit());
	237	+ assertTrue(njawiki.isNssplit());
	238	+ assertEquals(3,njawiki.getSplitFactor());
	239	+ assertEquals("njawiki.nspart3",njawiki.getPartByNamespace("4").toString());
	240	+ assertEquals("njawiki.nspart1",njawiki.getPartByNamespace("0").toString());
	241	+ assertEquals("njawiki.nspart2",njawiki.getPartByNamespace("12").toString());
227	242
	243	+ IndexId njawiki2 = IndexId.get("njawiki.nspart2");
	244	+ assertFalse(njawiki2.isLogical());
	245	+ assertFalse(njawiki2.isSplit());
	246	+ assertTrue(njawiki2.isNssplit());
	247	+ assertEquals(3,njawiki2.getSplitFactor());
	248	+ assertEquals(2,njawiki2.getPartNum());
	249	+
228	250	}
229	251	}
Index: trunk/lucene-search-2.0/src/org/wikimedia/lsearch/test/FastWikiTokenizerTest.java
—	—	@@ -80,6 +80,8 @@
81	81	showTokens(text);
82	82	text = "This is <!-- Unclosed";
83	83	showTokens(text);
	84	+ text = "This are [[bean]]s and more [[bla]]njah also Großmann";
	85	+ showTokens(text);
84	86	text = "[[Category:Blah Blah?!]], and [[:Category:Link to something]]";
85	87	showTokens(text);
86	88	text = "[[sr:Glavna stranica]], and [[:Category:Link to category]]";
—	—	@@ -92,7 +94,7 @@
93	95	showTokens(text);
94	96	text = "[[First]] second third fourth and so on goes the ... [[last link]]";
95	97	showTokens(text);
96		~~- text = "{{Something\| param = {{another}}[[First]] } }} }} }} {{name\| [[many]] many many tokens }} second third fourth and so on goes the ... [[good keyword]]";~~
	98	+ text = "{{Something\| param = {{another}}[[First]] } }} }} }} [[first good]]s {{name\| [[many]] many many tokens }} second third fourth and so on goes the ... [[good keyword]]";
97	99	showTokens(text);
98	100
99	101	if(true)
Index: trunk/lucene-search-2.0/src/org/wikimedia/lsearch/importer/Importer.java
—	—	@@ -141,7 +141,7 @@
142	142	if(iid.isMainsplit()){
143	143	IndexThread.makeIndexSnapshot(iid.getMainPart(),iid.getMainPart().getImportPath());
144	144	IndexThread.makeIndexSnapshot(iid.getRestPart(),iid.getRestPart().getImportPath());
145		~~- } else if(iid.isSplit()){~~
	145	+ } else if(iid.isSplit() \|\| iid.isNssplit()){
146	146	for(String part : iid.getSplitParts()){
147	147	IndexId iidp = IndexId.get(part);
148	148	IndexThread.makeIndexSnapshot(iidp,iidp.getImportPath());
Index: trunk/lucene-search-2.0/src/org/wikimedia/lsearch/importer/SimpleIndexWriter.java
—	—	@@ -48,7 +48,7 @@
49	49	else if(iid.isMainsplit()){
50	50	indexes.put(iid.getMainPart().toString(),openIndex(iid.getMainPart()));
51	51	indexes.put(iid.getRestPart().toString(),openIndex(iid.getRestPart()));
52		~~- } else if(iid.isSplit()){~~
	52	+ } else if(iid.isSplit() \|\| iid.isNssplit()){
53	53	for(String dbpart : iid.getSplitParts()){
54	54	indexes.put(IndexId.get(dbpart).toString(),openIndex(IndexId.get(dbpart)));
55	55	}
—	—	@@ -98,8 +98,8 @@
99	99	IndexId target;
100	100	if(iid.isSingle())
101	101	target = iid;
102		~~- else if(iid.isMainsplit()) // assign according to namespace~~
103		~~- target = (a.getNamespace().equals("0"))? iid.getMainPart() : iid.getRestPart();~~
	102	+ else if(iid.isMainsplit() \|\| iid.isNssplit()) // assign according to namespace
	103	+ target = iid.getPartByNamespace(a.getNamespace());
104	104	else // split index, randomly assign to some index part
105	105	target = iid.getPart(1+(int)(Math.random()*iid.getSplitFactor()));
106	106
Index: trunk/lucene-search-2.0/src/org/wikimedia/lsearch/config/IndexId.java
—	—	@@ -58,13 +58,19 @@
59	59	/** If true, this machine is an indexer for this index */
60	60	protected boolean myIndex;
61	61
62		~~- protected enum IndexType { SINGLE, MAINSPLIT, SPLIT };~~
	62	+ protected enum IndexType { SINGLE, MAINSPLIT, SPLIT, NSSPLIT };
63	63
64	64	/** Type of index, enumeration */
65	65	protected IndexType type;
66	66	/** Part number in split repestnation, e.g. 1..N */
67	67	protected int partNum;
68	68
	69	+ /** Namespace -> part (for nssplit indexes) */
	70	+ protected Hashtable<String,String> nssplitMap;
	71	+
	72	+ /** Set of namespaces for this nssplit part */
	73	+ protected HashSet<String> namespaceSet;
	74	+
69	75	/** All parameters as they appear in the global conf, e.g. merge factor, optimize, etc.. */
70	76	protected Hashtable<String,String> params;
71	77
—	—	@@ -146,6 +152,8 @@
147	153	this.type = IndexType.MAINSPLIT;
148	154	else if(type.equals("split"))
149	155	this.type = IndexType.SPLIT;
	156	+ else if(type.equals("nssplit"))
	157	+ this.type = IndexType.NSSPLIT;
150	158
151	159	// parts
152	160	String[] parts = dbrole.split("\\.");
—	—	@@ -177,9 +185,22 @@
178	186	partNum = Integer.parseInt(part.substring(4));
179	187	else
180	188	partNum = 0;
	189	+ } else if(this.type == IndexType.NSSPLIT){
	190	+ splitFactor = Integer.parseInt(typeParams.get("number"));
	191	+ splitParts = new String[splitFactor];
	192	+ for(int i=0;i<splitFactor;i++)
	193	+ splitParts[i] = dbname+".nspart"+(i+1);
	194	+ if(part!=null){
	195	+ partNum = Integer.parseInt(part.substring(6));
	196	+ namespaceSet = new HashSet<String>();
	197	+ String[] nss = params.get("namespaces").split(",");
	198	+ for(String ns : nss)
	199	+ namespaceSet.add(ns.trim());
	200	+ } else
	201	+ partNum = 0;
181	202	}
182	203	// for split/mainsplit the main iid is logical, it doesn't have local path
183		~~- if(myIndex && !(part == null && (this.type==IndexType.SPLIT \|\| this.type==IndexType.MAINSPLIT))){~~
	204	+ if(myIndex && !(part == null && (this.type==IndexType.SPLIT \|\| this.type==IndexType.MAINSPLIT \|\| this.type==IndexType.NSSPLIT))){
184	205	indexPath = localIndexPath + "index" + sep + dbrole;
185	206	importPath = localIndexPath + "import" + sep + dbrole;
186	207	snapshotPath = localIndexPath + "snapshot" + sep + dbrole;
—	—	@@ -219,9 +240,13 @@
220	241	public boolean isSplit(){
221	242	return type == IndexType.SPLIT;
222	243	}
	244	+ /** If type of this index is mainsplit */
	245	+ public boolean isNssplit(){
	246	+ return type == IndexType.NSSPLIT;
	247	+ }
223	248	/** If this is a split index, returns the current part number, e.g. for entest.part4 will return 4 */
224	249	public int getPartNum() {
225		~~- if(type == IndexType.SPLIT)~~
	250	+ if(type == IndexType.SPLIT \|\| type == IndexType.NSSPLIT \|\| type == IndexType.MAINSPLIT)
226	251	return partNum;
227	252	else{
228	253	log.error("Called getPartNum() on non-split object! Probably a bug in the code.");
—	—	@@ -414,14 +439,50 @@
415	440	HashSet<String> ret = new HashSet<String>();
416	441	if(isSingle())
417	442	ret.add(dbrole);
418		~~- else if(isMainsplit() \|\| isSplit()){~~
	443	+ else if(isMainsplit() \|\| isSplit() \|\| isNssplit()){
419	444	for(String p : splitParts)
420	445	ret.add(p);
421	446	}
422	447
423	448	return ret;
424	449	}
	450	+
	451	+ /** Rebuild namespace map from information, call only when sure that iid's for all parts are constructed.
	452	+ * Note: always call on main iid, not parts */
	453	+ public void rebuildNsMap(Hashtable<String,IndexId> pool) {
	454	+ if(isNssplit() && part==null){
	455	+ // rebuild
	456	+ nssplitMap = new Hashtable<String,String>();
	457	+ for(String part : splitParts){
	458	+ for(String ns : pool.get(part).namespaceSet){
	459	+ nssplitMap.put(ns,part);
	460	+ }
	461	+ }
	462	+ // set on all parts as well
	463	+ for(String part : splitParts){
	464	+ pool.get(part).nssplitMap = nssplitMap;
	465	+ }
	466	+ }
	467	+ }
425	468
	469	+ public IndexId getPartByNamespace(int ns){
	470	+ return getPartByNamespace(Integer.toString(ns));
	471	+ }
426	472
	473	+ /** If this is nssplit/mainsplit index, get part with certain namespace */
	474	+ public IndexId getPartByNamespace(String ns){
	475	+ if(isNssplit()){
	476	+ String dbrole = nssplitMap.get(ns);
	477	+ if(dbrole == null)
	478	+ dbrole = nssplitMap.get("<default>");
	479	+ return get(dbrole);
	480	+ } else if(isMainsplit()){
	481	+ if(ns.equals("0"))
	482	+ return getMainPart();
	483	+ else
	484	+ return getRestPart();
	485	+ } else
	486	+ return null;
	487	+ }
427	488
428	489	}
Index: trunk/lucene-search-2.0/src/org/wikimedia/lsearch/config/GlobalConfiguration.java
—	—	@@ -160,7 +160,7 @@
161	161	for(String typeid : database.get(dbname).keySet()){
162	162	String type = "";
163	163	String dbrole = "";
164		~~- if(typeid.equals("single") \|\| typeid.equals("mainsplit") \|\| typeid.equals("split")){~~
	164	+ if(typeid.equals("single") \|\| typeid.equals("mainsplit") \|\| typeid.equals("split") \|\| typeid.equals("nssplit")){
165	165	type = typeid;
166	166	dbrole = dbname;
167	167	} else if(typeid.equals("mainpart") \|\| typeid.equals("restpart")){
—	—	@@ -169,6 +169,9 @@
170	170	} else if(typeid.matches("part[1-9][0-9]*")){
171	171	type = "split";
172	172	dbrole = dbname + "." + typeid;
	173	+ } else if(typeid.matches("nspart[1-9][0-9]*")){
	174	+ type = "nssplit";
	175	+ dbrole = dbname + "." + typeid;
173	176	} else
174	177	continue; // uknown type, skip
175	178
—	—	@@ -404,7 +407,7 @@
405	408	for(String typeid : database.get(dbname).keySet()){
406	409	String type = "";
407	410	String dbrole = "";
408		~~- if(typeid.equals("single") \|\| typeid.equals("mainsplit") \|\| typeid.equals("split")){~~
	411	+ if(typeid.equals("single") \|\| typeid.equals("mainsplit") \|\| typeid.equals("split") \|\| typeid.equals("nssplit")){
409	412	type = typeid;
410	413	dbrole = dbname;
411	414	} else if(typeid.equals("mainpart") \|\| typeid.equals("restpart")){
—	—	@@ -413,6 +416,9 @@
414	417	} else if(typeid.matches("part[1-9][0-9]*")){
415	418	type = "split";
416	419	dbrole = dbname + "." + typeid;
	420	+ } else if(typeid.matches("nspart[1-9][0-9]*")){
	421	+ type = "nssplit";
	422	+ dbrole = dbname + "." + typeid;
417	423	} else
418	424	continue; // uknown type, skip
419	425
—	—	@@ -452,6 +458,8 @@
453	459	oairepo);
454	460	indexIdPool.put(dbrole,iid);
455	461	}
	462	+ if(indexIdPool.get(dbname).isNssplit())
	463	+ indexIdPool.get(dbname).rebuildNsMap(indexIdPool);
456	464	}
457	465
458	466	}
—	—	@@ -628,7 +636,7 @@
629	637	} else if(type.equals("mainsplit")){
630	638	// currently no params
631	639	dbroles.put(type,params);
632		~~- } else if(type.equals("split")){~~
	640	+ } else if(type.equals("split") \|\| type.equals("nssplit")){
633	641	if(tokens.length>1) // number of segments
634	642	params.put("number",tokens[1]);
635	643	else{
—	—	@@ -656,6 +664,27 @@
657	665
658	666	dbroles.put(type,params);
659	667
	668	+ } else if(type.matches("nspart[1-9][0-9]*")){
	669	+ // [0,1,2] syntax gets split up in first split, retokenize
	670	+ String ns = role.substring(role.indexOf(",")+1,role.lastIndexOf("]")+1).trim();
	671	+ tokens = role.substring(role.lastIndexOf("]")+1).split(",");
	672	+ // definition of namespaces, e.g. [0,1,2]
	673	+ if(ns.length() > 2 && ns.startsWith("[") && ns.endsWith("]"))
	674	+ ns = ns.substring(1,ns.length()-1);
	675	+ else
	676	+ ns = "<default>";
	677	+ params.put("namespaces",ns);
	678	+
	679	+ // all params are optional, if absent default will be used
	680	+ if(tokens.length>1)
	681	+ params.put("optimize",tokens[1].trim().toLowerCase());
	682	+ if(tokens.length>2)
	683	+ params.put("mergeFactor",tokens[2]);
	684	+ if(tokens.length>3)
	685	+ params.put("maxBufDocs", tokens[3]);
	686	+
	687	+ dbroles.put(type,params);
	688	+
660	689	} else{
661	690	System.out.println("Warning: Unrecognized role \""+role+"\".Ignoring.");
662	691	}
—	—	@@ -670,10 +699,10 @@
671	700	dbr = new Hashtable<String, Hashtable<String, String>>();
672	701	database.put(db,dbr);
673	702	}
674		~~- if(type.equals("split") \|\| type.equals("mainsplit") \|\| type.equals("single")){~~
675		~~- if(dbr.get("split")!=null \|\| dbr.get("mainsplit")!=null \|\| dbr.get("single")!=null){~~
	703	+ if(type.equals("split") \|\| type.equals("mainsplit") \|\| type.equals("single") \|\| type.equals("nssplit")){
	704	+ if(dbr.get("split")!=null \|\| dbr.get("mainsplit")!=null \|\| dbr.get("single")!=null \|\| dbr.get("nssplit")!=null){
676	705	System.out.println("WARNING: in Global Configuration: defined new architecture "+type+" for "+db);
677		~~- dbr.remove("split"); dbr.remove("mainsplit"); dbr.remove("single");~~
	706	+ dbr.remove("split"); dbr.remove("mainsplit"); dbr.remove("single"); dbr.remove("nssplit");
678	707	}
679	708	}
680	709	if(dbr.get(type)!=null)
Index: trunk/lucene-search-2.0/src/org/wikimedia/lsearch/search/WikiSearcher.java
—	—	@@ -3,6 +3,7 @@
4	4	import java.io.IOException;
5	5	import java.util.ArrayList;
6	6	import java.util.Arrays;
	7	+import java.util.Hashtable;
7	8
8	9	import org.apache.log4j.Logger;
9	10	import org.apache.lucene.document.Document;
—	—	@@ -40,7 +41,8 @@
41	42	static org.apache.log4j.Logger log = Logger.getLogger(WikiSearcher.class);
42	43	protected SearchableMul searcher;
43	44	protected SearcherCache cache;
44		~~- protected Searchable mainpart,restpart;~~
	45	+ /** parts of the multisearcher, dbrole -> searchable */
	46	+ protected Hashtable<String,Searchable> searcherParts = new Hashtable<String,Searchable>();
45	47	protected MultiSearcherMul ms = null;
46	48
47	49	public static final boolean INVALIDATE_CACHE = true;
—	—	@@ -62,12 +64,8 @@
63	65
64	66	if(s != null){
65	67	ss.add(s);
66		~~- if(iid.isMainPart())~~
67		~~- mainpart = s;~~
68		~~- else if(iid.isRestPart())~~
69		~~- restpart = s;~~
70		~~- }~~
71		~~- else~~
	68	+ searcherParts.put(iid.toString(),s);
	69	+ } else
72	70	log.warn("Cannot get a search index (nor local or remote) for "+iid);
73	71	}
74	72	if(ss.size() == 0)
—	—	@@ -79,7 +77,6 @@
80	78	/** New object from cache */
81	79	public WikiSearcher(IndexId iid) throws Exception {
82	80	cache = SearcherCache.getInstance();
83		~~- mainpart = null; restpart = null;~~
84	81
85	82	if(iid.isSingle()){ // is always local
86	83	searcher = cache.getLocalSearcher(iid);
—	—	@@ -91,7 +88,7 @@
92	89
93	90	ms = makeMultiSearcher(parts);
94	91	searcher = ms;
95		~~- } else if(iid.isSplit()){~~
	92	+ } else if(iid.isSplit() \|\| iid.isNssplit()){
96	93	ArrayList<IndexId> parts = new ArrayList<IndexId>();
97	94	for(int i=1; i<=iid.getSplitFactor(); i++){
98	95	parts.add(iid.getPart(i));
—	—	@@ -105,21 +102,16 @@
106	103
107	104	cache.checkout(searcher);
108	105	}
109		-
110		~~- public String getMainPartHost(){~~
111		~~- if(mainpart == null)~~
	106	+
	107	+ /** Got host for the iid within this multi searcher */
	108	+ public String getHost(IndexId iid){
	109	+ Searchable s = searcherParts.get(iid.toString());
	110	+ if(s == null)
112	111	return null;
113	112	else
114		~~- return cache.getSearchableHost(mainpart);~~
	113	+ return cache.getSearchableHost(s);
115	114	}
116		-
117		~~- public String getRestPartHost(){~~
118		~~- if(restpart == null)~~
119		~~- return null;~~
120		~~- else~~
121		~~- return cache.getSearchableHost(restpart);~~
122		~~- }~~
123		-
	115	+
124	116	@Override
125	117	public void close() throws IOException {
126	118	cache.release(searcher);
Index: trunk/lucene-search-2.0/src/org/wikimedia/lsearch/search/SearchEngine.java
—	—	@@ -57,7 +57,7 @@
58	58	SearchResults res = search(iid, searchterm, offset, limit, namespaces, what.equals("explain"));
59	59	if(res!=null && res.isRetry()){
60	60	int retries = 0;
61		~~- if(iid.isSplit()){~~
	61	+ if(iid.isSplit() \|\| iid.isNssplit()){
62	62	retries = iid.getSplitFactor()-2;
63	63	} else if(iid.isMainsplit())
64	64	retries = 1;
—	—	@@ -84,7 +84,7 @@
85	85
86	86	/** Search mainpart or restpart of the split index */
87	87	public SearchResults searchPart(IndexId iid, Query q, NamespaceFilterWrapper filter, int offset, int limit, boolean explain){
88		~~- if( ! iid.isMainsplit())~~
	88	+ if( ! (iid.isMainsplit() \|\| iid.isNssplit()))
89	89	return null;
90	90	try {
91	91	SearcherCache cache = SearcherCache.getInstance();
—	—	@@ -150,40 +150,43 @@
151	151
152	152	WikiSearcher searcher = new WikiSearcher(iid);
153	153	TopDocs hits=null;
154		~~- // mainpart special case~~
155		~~- if(nsfw!=null && iid.isMainsplit() && nsfw.getFilter().cardinality()==1 && nsfw.getFilter().contains(0)){~~
156		~~- String host = searcher.getMainPartHost();~~
157		~~- if(host == null){~~
158		~~- res = new SearchResults();~~
159		~~- res.setErrorMsg("Error contacting searcher for mainpart of the index.");~~
160		~~- log.error("Error contacting searcher for mainpart of the index.");~~
161		~~- return res;~~
	154	+ // see if we can search only part of the index
	155	+ if(nsfw!=null && (iid.isMainPart() \|\| iid.isNssplit())){
	156	+ String part = null;
	157	+ for(NamespaceFilter f : nsfw.getFilter().decompose()){
	158	+ if(part == null)
	159	+ part = iid.getPartByNamespace(f.getNamespace()).toString();
	160	+ else{
	161	+ if(!part.equals(iid.getPartByNamespace(f.getNamespace()).toString())){
	162	+ part = null; // namespace filter wants to search more than one index parts
	163	+ break;
	164	+ }
	165	+ }
	166	+ }
	167	+ if(part!=null){
	168	+ IndexId piid = IndexId.get(part);
	169	+ String host = searcher.getHost(piid);
	170	+ if(host == null){
	171	+ res = new SearchResults();
	172	+ res.setErrorMsg("Error contacting searcher for "+part);
	173	+ log.error("Error contacting searcher for "+part);
	174	+ return res;
	175	+ }
	176	+ RMIMessengerClient messenger = new RMIMessengerClient();
	177	+ return messenger.searchPart(piid,q,nsfw,offset,limit,explain,host);
162	178	}
163		~~- RMIMessengerClient messenger = new RMIMessengerClient();~~
164		~~- return messenger.searchPart(iid.getMainPart(),q,null,offset,limit,explain,host);~~
165		~~- // restpart special case~~
166		~~- } else if(nsfw!=null && iid.isMainsplit() && !nsfw.getFilter().contains(0)){~~
167		~~- String host = searcher.getRestPartHost();~~
168		~~- if(host == null){~~
169		~~- res = new SearchResults();~~
170		~~- res.setErrorMsg("Error contacting searcher for restpart of the index.");~~
171		~~- log.error("Error contacting searcher for restpart of the index.");~~
172		~~- return res;~~
173		~~- }~~
174		~~- RMIMessengerClient messenger = new RMIMessengerClient();~~
175		~~- return messenger.searchPart(iid.getRestPart(),q,nsfw,offset,limit,explain,host);~~
176		~~- } else{ // normal search~~
177		~~- try{~~
178		~~- hits = searcher.search(q,nsfw,offset+limit);~~
179		~~- res = makeSearchResults(searcher,hits,offset,limit,iid,searchterm,q,searchStart,explain);~~
180		~~- return res;~~
181		~~- } catch(Exception e){~~
182		~~- e.printStackTrace();~~
183		~~- res = new SearchResults();~~
184		~~- res.retry();~~
185		~~- log.warn("Retry, temportal error for query: ["+q+"] on "+iid);~~
186		~~- return res;~~
187		~~- }~~
	179	+ }
	180	+ // normal search
	181	+ try{
	182	+ hits = searcher.search(q,nsfw,offset+limit);
	183	+ res = makeSearchResults(searcher,hits,offset,limit,iid,searchterm,q,searchStart,explain);
	184	+ return res;
	185	+ } catch(Exception e){
	186	+ e.printStackTrace();
	187	+ res = new SearchResults();
	188	+ res.retry();
	189	+ log.warn("Retry, temportal error for query: ["+q+"] on "+iid);
	190	+ return res;
188	191	}
189	192	} catch(ParseException e){
190	193	res = new SearchResults();
Index: trunk/lucene-search-2.0/src/org/wikimedia/lsearch/index/IndexThread.java
—	—	@@ -363,23 +363,19 @@
364	364
365	365	if( iid.isSingle() ){
366	366	enqueueLocally(record);
367		~~- } else if( iid.isMainsplit() ){~~
	367	+ } else if( iid.isMainsplit() \|\| iid.isNssplit()){
368	368	IndexId piid;
369	369	Article ar = record.getArticle();
370		~~- // deletion when we have only page_id needs to be sent to both parts,~~
	370	+ // deletion when we have only page_id needs to be sent to all parts,
371	371	// because we don't have namespace info
372	372	if(record.isDelete() && ar.getTitle().equals("")){
373		~~- IndexUpdateRecord rec1 = (IndexUpdateRecord) record.clone();~~
374		~~- IndexUpdateRecord rec2 = (IndexUpdateRecord) record.clone();~~
375		~~- rec1.setIndexId(iid.getMainPart());~~
376		~~- rec2.setIndexId(iid.getRestPart());~~
377		~~- enqueueRemotely(rec1.getIndexId().getIndexHost(),rec1);~~
378		~~- enqueueRemotely(rec2.getIndexId().getIndexHost(),rec2);~~
	373	+ for(String dbrole : iid.getSplitParts()){
	374	+ IndexUpdateRecord recp = (IndexUpdateRecord) record.clone();
	375	+ recp.setIndexId(IndexId.get(dbrole));
	376	+ enqueueRemotely(recp.getIndexId().getIndexHost(),recp);
	377	+ }
379	378	} else{
380		~~- if( ar.getNamespace().equals("0") )~~
381		~~- piid = iid.getMainPart();~~
382		~~- else~~
383		~~- piid = iid.getRestPart();~~
	379	+ piid = iid.getPartByNamespace(ar.getNamespace());
384	380	// set recipient to new host
385	381	record.setIndexId(piid);
386	382	enqueueRemotely(piid.getIndexHost(),record);
Index: trunk/lucene-search-2.0/src/org/wikimedia/lsearch/index/WikiSimilarity.java
—	—	@@ -36,7 +36,8 @@
37	37	return f;
38	38	}
39	39	} else if(fieldName.equals("title") \|\| fieldName.equals("stemtitle") \|\| fieldName.startsWith("alttitle")){
40		~~- float f = (float) (1.0 / (Math.sqrt(numTokens) * numTokens));~~
	40	+ //float f = (float) (1.0 / (Math.sqrt(numTokens) * numTokens));
	41	+ float f = (float) (1.0 / numTokens);
41	42	//log.debug("Length-norm: "+f+", numtokens: "+numTokens);
42	43	return f;
43	44	} else if(fieldName.startsWith("redirect") \|\| fieldName.startsWith("keyword")){
Index: trunk/lucene-search-2.0/src/org/wikimedia/lsearch/analyzers/FastWikiTokenizerEngine.java
—	—	@@ -333,8 +333,8 @@
334	334	return tokens; // already parsed
335	335
336	336	// before starting, make sure this is not a redirect
337		~~- if(isRedirect())~~
338		~~- return tokens;~~
	337	+ //if(isRedirect())
	338	+ // return tokens;
339	339
340	340	for(cur = 0; cur < textLength; cur++ ){
341	341	c = text[cur];
—	—	@@ -514,7 +514,7 @@
515	515
516	516	switch(fetch){
517	517	case WORD:
518		~~- addToken();~~
	518	+ // don't add token to get syntax like [[bean]]s
519	519	continue;
520	520	case CATEGORY:
521	521	categories.add(new String(buffer,0,length));
Index: trunk/lucene-search-2.0/src/org/wikimedia/lsearch/util/Localization.java
—	—	@@ -203,10 +203,13 @@
204	204	return null;
205	205	}
206	206
	207	+ /** If text redirects to some page, get that page's title object */
207	208	public static Title getRedirectTitle(String text, String lang){
208	209	String full = getRedirectTarget(text,lang);
209	210	if(full == null)
210	211	return null;
	212	+ if(full.startsWith(":"))
	213	+ full = full.substring(1);
211	214	String[] parts = full.split(":",2);
212	215	if(parts.length == 2){
213	216	String ns = parts[0].toLowerCase();
Index: trunk/lucene-search-2.0/sql/references_table.sql
—	—	@@ -0,0 +1,15 @@
	2	+--
	3	+-- Table with cached information about references to a page
	4	+--
	5	+CREATE TABLE /DBprefix/references (
	6	+ -- key in form <ns>:<title>
	7	+ rf_key varchar(255) binary NOT NULL,
	8	+
	9	+ -- number of page links to this page
	10	+ rf_references int(10) unsigned NOT NULL,
	11	+
	12	+ --
	13	+ PRIMARY KEY rf_key(rf_key)
	14	+
	15	+) TYPE=InnoDB;
	16	+
Index: trunk/lucene-search-2.0/lsearch-global.conf
—	—	@@ -9,15 +9,16 @@
10	10	# warmup <numberOfQueries>
11	11	# databases can be writen as {url}, where url contains list of dbs
12	12	[Database]
13		~~-wikilucene : (single) (language,en) (warmup,0)~~
	13	+#wikilucene : (single) (language,en) (warmup,0)
14	14	wikidev : (single) (language,sr)
	15	+wikilucene : (nssplit,3) (nspart1,[0]) (nspart2,[4,5,12,13]), (nspart3,[])
15	16
16	17	# Search groups
17	18	# Index parts of a split index are always taken from the node's group
18	19	# host : db1.part db2.part
19	20	# Mulitple hosts can search multiple dbs (N-N mapping)
20	21	[Search-Group]
21		~~-oblak : wikilucene wikidev~~
	22	+oblak : wikilucene wikidev wikilucene.nspart1 wikilucene.nspart2 wikilucene.nspart3
22	23
23	24	# Index nodes
24	25	# host: db1.part db2.part

Status & tagging log

15:16, 12 September 2011 Meno25 (talk | contribs) changed the status of r22920 [removed: ok added: old]