r63821 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r63820‎ \| r63821 \| r63822 >
Date:	18:33, 16 March 2010
Author:	mah
Status:	deferred
Tags:
Comment:	Whitespace cleanups, set ignores.
Modified paths:	/branches/lucene-search-2.1 (modified) (history) /branches/lucene-search-2.1/build.xml (modified) (history) /branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/AggregateInfoImpl.java (modified) (history) /branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/AggregateMetaField.java (modified) (history) /branches/lucene-search-2.1/test/org/wikimedia/lsearch/analyzers (modified) (history) /branches/lucene-search-2.1/test/org/wikimedia/lsearch/analyzers/AnalysisTest.java (modified) (history) /branches/lucene-search-2.1/test/org/wikimedia/lsearch/analyzers/FastWikiTokenizerTest.java (modified) (history) /branches/lucene-search-2.1/test/org/wikimedia/lsearch/beans (modified) (history) /branches/lucene-search-2.1/test/org/wikimedia/lsearch/config (modified) (history) /branches/lucene-search-2.1/test/org/wikimedia/lsearch/highlight (modified) (history) /branches/lucene-search-2.1/test/org/wikimedia/lsearch/index (modified) (history) /branches/lucene-search-2.1/test/org/wikimedia/lsearch/ranks (modified) (history) /branches/lucene-search-2.1/test/org/wikimedia/lsearch/search (modified) (history) /branches/lucene-search-2.1/test/org/wikimedia/lsearch/spell (modified) (history) /branches/lucene-search-2.1/test/org/wikimedia/lsearch/spell/api (modified) (history) /branches/lucene-search-2.1/test/org/wikimedia/lsearch/storage (modified) (history) /branches/lucene-search-2.1/test/org/wikimedia/lsearch/test (modified) (history) /branches/lucene-search-2.1/test/org/wikimedia/lsearch/util (modified) (history)

Diff [purge]

Property changes on: branches/lucene-search-2.1/test/org/wikimedia/lsearch/spell/api
___________________________________________________________________
Name: svn:ignore
1	1	+ *.class
Property changes on: branches/lucene-search-2.1/test/org/wikimedia/lsearch/spell
___________________________________________________________________
Name: svn:ignore
2	2	+ *.class
Property changes on: branches/lucene-search-2.1/test/org/wikimedia/lsearch/test
___________________________________________________________________
Name: svn:ignore
3	3	+ *.class
Property changes on: branches/lucene-search-2.1/test/org/wikimedia/lsearch/storage
___________________________________________________________________
Name: svn:ignore
4	4	+ *.class
Property changes on: branches/lucene-search-2.1/test/org/wikimedia/lsearch/config
___________________________________________________________________
Name: svn:ignore
5	5	+ *.class
Property changes on: branches/lucene-search-2.1/test/org/wikimedia/lsearch/search
___________________________________________________________________
Name: svn:ignore
6	6	+ *.class
Property changes on: branches/lucene-search-2.1/test/org/wikimedia/lsearch/highlight
___________________________________________________________________
Name: svn:ignore
7	7	+ *.class
Property changes on: branches/lucene-search-2.1/test/org/wikimedia/lsearch/index
___________________________________________________________________
Name: svn:ignore
8	8	+ *.class
Index: branches/lucene-search-2.1/test/org/wikimedia/lsearch/analyzers/AnalysisTest.java
—	—	@@ -41,7 +41,7 @@
42	42	public class AnalysisTest extends WikiTestCase {
43	43	Analyzer a = null;
44	44	Configuration config = null;
45		-
	45	+
46	46	protected void setUp() throws Exception {
47	47	super.setUp();
48	48	if(config == null){
—	—	@@ -49,7 +49,7 @@
50	50	GlobalConfiguration.getInstance();
51	51	}
52	52	}
53		-
	53	+
54	54	public void testCJKAnalyzer(){
55	55	a = new CJKAnalyzer();
56	56	assertEquals("[(いわ,0,2,type=double), (わさ,1,3,type=double), (さき,2,4,type=double), (ic,4,6,type=single), (カー,6,8,type=double), (ード,7,9,type=double)]",tokens("いわさきicカード"));
—	—	@@ -69,35 +69,35 @@
70	70	assertEquals("[(pokémons,0,8), (pokemons,0,8,posIncr=0), (pokemon,0,8,type=stemmed,posIncr=0)]",tokens("Pokémons"));
71	71	assertEquals("[(1990,0,4), (s,4,5), (iv,6,8)]",tokens("1990s IV"));
72	72	}
73		-
	73	+
74	74	public void testEnglishSearch(){
75	75	a = Analyzers.getSearcherAnalyzer(IndexId.get("enwiki"));
76	76	commonEnglish();
77	77	// acronyms don't get split
78	78	assertEquals("[(a.k.a,0,5), (aka,0,5,posIncr=0), (www,6,9), (google,10,16), (com,17,20)]",tokens("a.k.a www.google.com"));
79	79	}
80		-
	80	+
81	81	public void testEnglishIndex(){
82	82	a = Analyzers.getIndexerAnalyzer(new FieldBuilder(IndexId.get("enwiki")));
83	83	commonEnglish();
84	84	// acronyms are always split
85	85	assertEquals("[(a.k.a,0,5), (aka,0,5,posIncr=0), (a,0,5,posIncr=0), (k,2,7,posIncr=0), (a,4,9,posIncr=0), (www,6,9), (google,10,16), (com,17,20)]",tokens("a.k.a www.google.com"));
86	86	}
87		-
	87	+
88	88	public void commonSerbian(){
89	89	assertEquals("[(нешто,0,5), (nesto,0,5,type=alias,posIncr=0), (на,6,8), (na,6,8,type=alias,posIncr=0), (ћирилици,9,17), (cirilici,9,17,type=alias,posIncr=0)]",tokens("Нешто на ћирилици"));
90	90	}
91		-
	91	+
92	92	public void testSerbianSearch(){
93	93	a = Analyzers.getSearcherAnalyzer(IndexId.get("srwiki"));
94	94	commonSerbian();
95	95	}
96		-
	96	+
97	97	public void testSerbianIndex(){
98	98	a = Analyzers.getIndexerAnalyzer(new FieldBuilder(IndexId.get("srwiki")));
99	99	commonSerbian();
100	100	}
101		-
	101	+
102	102	public String tokens(String text){
103	103	try{
104	104	return Arrays.toString(tokensFromAnalysis(a,text,"contents"));
—	—	@@ -106,7 +106,7 @@
107	107	return null;
108	108	}
109	109	}
110		-
	110	+
111	111	public static Token[] tokensFromAnalysis(Analyzer analyzer, String text, String field) throws IOException {
112	112	TokenStream stream = analyzer.tokenStream(field, text);
113	113	ArrayList tokenList = new ArrayList();
—	—	@@ -117,15 +117,15 @@
118	118	}
119	119	return (Token[]) tokenList.toArray(new Token[0]);
120	120	}
121		-
122		~~- public static void displayTokens(Analyzer analyzer, String text) throws IOException {~~
	121	+
	122	+ public static void displayTokens(Analyzer analyzer, String text) throws IOException {
123	123	Token[] tokens = tokensFromAnalysis(analyzer, text, "contents");
124	124	System.out.println(text);
125	125	System.out.print(">> ");
126	126	print(tokens);
127	127	System.out.println();
128	128	}
129		-
	129	+
130	130	protected static void print(Token[] tokens){
131	131	for (int i = 0, j =0; i < tokens.length; i++, j++) {
132	132	Token token = tokens[i];
—	—	@@ -138,14 +138,14 @@
139	139	System.out.println();
140	140	j=0;
141	141	}
142		-
143		~~- }~~
	142	+
	143	+ }
144	144	}
145		-
146		~~- public static void displayTokens2(Analyzer analyzer, String text) throws IOException {~~
	145	+
	146	+ public static void displayTokens2(Analyzer analyzer, String text) throws IOException {
147	147	Token[] tokens = tokensFromAnalysis(analyzer, text, "contents");
148	148	System.out.println(text);
149		~~- System.out.print("contents >> ");~~
	149	+ System.out.print("contents >> ");
150	150	print(tokens);
151	151	System.out.println();
152	152	tokens = tokensFromAnalysis(analyzer, text, "stemmed");
—	—	@@ -165,16 +165,16 @@
166	166	System.out.println();
167	167	}
168	168	}
169		-
	169	+
170	170	public static void main(String args[]) throws IOException, ParseException{
171	171	Configuration.open();
172		-
	172	+
173	173	//serializeTest(Analyzers.getHighlightAnalyzer(IndexId.get("enwiki")));
174	174	//testAnalyzer(Analyzers.getHighlightAnalyzer(IndexId.get("enwiki")),"Aaliyah");
175		-
	175	+
176	176	Analyzer aa = Analyzers.getSearcherAnalyzer(IndexId.get("wikilucene"));
177	177	displayTokens(aa,"boxes france");
178		-
	178	+
179	179	HashSet<String> stopWords = new HashSet<String>();
180	180	stopWords.add("the"); stopWords.add("of"); stopWords.add("is"); stopWords.add("in"); stopWords.add("and"); stopWords.add("he") ;
181	181	//Analyzer analyzer = Analyzers.getSpellCheckAnalyzer(IndexId.get("enwiki"),stopWords);
—	—	@@ -186,7 +186,7 @@
187	187	text = "a.k.a www.google.com Google's Pokémons links abacus something aries douglas adams boxes bands working s and Frame semantics (linguistics)";
188	188	displayTokens(analyzer,text);
189	189	text = "a8n sli compatible compatibly Thomas c# c++ good-thomas Good-Thomas rats RATS Frame semantics (linguistics) 16th century sixteenth .fr web.fr other";
190		~~- displayTokens(analyzer,text);~~
	190	+ displayTokens(analyzer,text);
191	191	displayTokens(Analyzers.getSearcherAnalyzer(IndexId.get("zhwiki")),"末朝以來藩鎮割據and some plain english 和宦官亂政的現象 as well");
192	192	displayTokens(analyzer,"Thomas Goode school");
193	193	displayTokens(analyzer,"Agreement reply readily Gödel;");
—	—	@@ -200,7 +200,7 @@
201	201	displayTokens(analyzer,"[[Image:Lawrence_Brainerd.jpg]], [[Image:Lawrence_Brainerd.jpg\|thumb\|300px\|Lawrence Brainerd]]");
202	202	displayTokens(analyzer,"{{Otheruses4\|the Irish rock band\|other uses\|U2 (disambiguation)}}");
203	203	displayTokens(analyzer,"{{Otheruses4\|the Irish rock band\|other uses\|U2<ref>U2-ref</ref> (disambiguation)}} Let's see<ref>Seeing is...</ref> if template extraction works.\n==Some heading==\n And after that some text..\n\nAnd now? Not now. Then when? ");
204		-
	204	+
205	205	ArrayList<String> l = new ArrayList<String>();
206	206	l.add("0:Douglas Adams\|0:Someone");
207	207	l.add("0:Someone");
—	—	@@ -208,27 +208,27 @@
209	209	l.add("");
210	210	l.add("0:Heu");
211	211	displayTokens(new SplitAnalyzer(10,true),new StringList(l).toString());
212		-
	212	+
213	213	analyzer = Analyzers.getSearcherAnalyzer(IndexId.get("viwiki"));
214	214	displayTokens(analyzer,"ä, ö, ü; Đ đViệt Nam Đ/đ ↔ D/d lastone");
215		-
	215	+
216	216	analyzer = Analyzers.getSearcherAnalyzer(IndexId.get("dewiki"));
217	217	displayTokens(analyzer,"Gunzen ä, ö, ü; for instance, Ø ÓóÒò Goedel for Gödel; čakšire");
218		-
	218	+
219	219	analyzer = Analyzers.getSearcherAnalyzer(IndexId.get("enwiki"));
220	220	displayTokens(analyzer," ä, ö, ü; for instance, Ø ÓóÒò Goedel for Gödel; čakšire");
221		-
	221	+
222	222	analyzer = Analyzers.getSearcherAnalyzer(IndexId.get("srwiki"));
223	223	displayTokens(analyzer," ä, ö, ü; for instance, Ø ÓóÒò Goedel for Gödel; čakšire");
224		-
	224	+
225	225	analyzer = Analyzers.getSearcherAnalyzer(IndexId.get("eswiki"));
226	226	displayTokens(analyzer,"lógico y matemático");
227		-
	227	+
228	228	analyzer = Analyzers.getSearcherAnalyzer(IndexId.get("mlwiki"));
229	229	displayTokens(analyzer,"കൊറിയ,“കൊറിയ”");
230		-
	230	+
231	231	printCodePoints("“കൊറിയ”");
232		-
	232	+
233	233	QueryParser parser = new QueryParser("contents",new CJKAnalyzer());
234	234	Query q = parser.parse("いわさきicカードプロサッカークラブをつくろう");
235	235	System.out.println("Japanese in standard analyzer: "+q);
—	—	@@ -236,7 +236,7 @@
237	237	displayTokens(Analyzers.getHighlightAnalyzer(IndexId.get("jawiki"),false),"鈴木孝治（すずきこうじ、1954年 - ）『パンツぱんくろう』というタイトルは、阪本牙城の漫画『タンクタンクロー』が元ネタになっているといわれる。ただし、このアニメと『タンクタンクロー』に内容的な直接の関係は全く無い。");
238	238	displayTokens(Analyzers.getSearcherAnalyzer(IndexId.get("jawiki")),"『パンツぱんくろう』というタjavaイトルはbalaton");
239	239	displayTokens(Analyzers.getSearcherAnalyzer(IndexId.get("jawiki")),"パン");
240		-
	240	+
241	241	ArrayList<Aggregate> items = new ArrayList<Aggregate>();
242	242	analyzer = Analyzers.getSearcherAnalyzer(IndexId.get("enwiki"));
243	243	items.add(new Aggregate("douglas adams",10,IndexId.get("enwiki"),analyzer,"related",stopWords,Flags.ALTTITLE));
—	—	@@ -244,7 +244,7 @@
245	245	items.add(new Aggregate("hurricane",3.22f,IndexId.get("enwiki"),analyzer,"related",stopWords,Flags.ANCHOR));
246	246	items.add(new Aggregate("and some other stuff",3.2f,IndexId.get("enwiki"),analyzer,"related",stopWords,Flags.NONE));
247	247	displayTokens(new AggregateAnalyzer(items),"AGGREGATE TEST");
248		-
	248	+
249	249	// redirects?
250	250	FieldBuilder builder = new FieldBuilder(IndexId.get("enwiki"));
251	251	ArrayList<String> list = new ArrayList<String>();
—	—	@@ -264,20 +264,20 @@
265	265	int p[] = MathFunc.partitionList(new double[] {0.52,0.12},5);
266	266	analyzer = (Analyzer) Analyzers.getIndexerAnalyzer("Agreement reply readily",builder,null,null,related,p,null,null,null)[0];
267	267	displayTokens2(analyzer,"");
268		-
	268	+
269	269	analyzer = (Analyzer) Analyzers.getIndexerAnalyzer("Pascal's earliest work was in the natural and applied sciences where he made important contributions to the construction of mechanical calculators, the study of fluids, and clarified the concepts of pressure and vacuum by generalizing the work of Evangelista Torricelli. Pascal also wrote powerfully in defense of the scientific method.",builder,null,null,null,null,null,null,null)[0];
270	270	displayTokens2(analyzer,"");
271	271	analyzer = (Analyzer) Analyzers.getIndexerAnalyzer("1,039/Smoothed Out Slappy Hours",new FieldBuilder(IndexId.get("itwiki")),null,null,null,null,null,null,null)[0];
272	272	displayTokens2(analyzer,"");
273	273	displayTokens(Analyzers.getSearcherAnalyzer(IndexId.get("itwiki")),"1,039/Smoothed Out Slappy Hours");
274		-
	274	+
275	275	ArrayList<Aggregate> items = new ArrayList<Aggregate>();
276	276	items.add(new Aggregate("douglas adams",10,IndexId.get("enwiki"),false));
277	277	items.add(new Aggregate("the selected works...",2.1f,IndexId.get("enwiki"),false));
278	278	items.add(new Aggregate("hurricane",3.22f,IndexId.get("enwiki"),false));
279	279	items.add(new Aggregate("and some other stuff",3.2f,IndexId.get("enwiki"),false));
280	280	displayTokens(new AggregateAnalyzer(items),"AGGREGATE TEST"); */
281		-
	281	+
282	282	IndexId wl = IndexId.get("wikilucene");
283	283	Analyzer an = Analyzers.getSearcherAnalyzer(wl);
284	284	Aggregate a1 = new Aggregate("Redheugh Bridges",1,wl,an,"alttitle",Flags.ALTTITLE);
—	—	@@ -285,24 +285,24 @@
286	286	ArrayList<Aggregate> al = new ArrayList<Aggregate>();
287	287	al.add(a1); al.add(a2);
288	288	displayTokens(new AggregateAnalyzer(al),"AGGREGATE TEST");
289		-
	289	+
290	290	displayTokens(Analyzers.getSpellCheckAnalyzer(IndexId.get("enwiki"),new HashSet<String>()),
291	291	"Agreement boxes reply readily Gödel, Gödel; a/b");
292		-
293		-
	292	+
	293	+
294	294	if(true)
295	295	return;
296		-
	296	+
297	297	//testAnalyzer(new EnglishAnalyzer());
298	298	testAnalyzer(Analyzers.getSearcherAnalyzer(IndexId.get("enwiki")));
299	299	testAnalyzer(Analyzers.getSearcherAnalyzer(IndexId.get("dewiki")));
300	300	testAnalyzer(Analyzers.getSearcherAnalyzer(IndexId.get("frwiki")));
301	301	testAnalyzer(Analyzers.getSearcherAnalyzer(IndexId.get("srwiki")));
302	302	testAnalyzer(Analyzers.getSearcherAnalyzer(IndexId.get("eswiki")));
303		-
304		-
	303	+
	304	+
305	305	}
306		-
	306	+
307	307	private static void printCodePoints(String string) {
308	308	char[] str = string.toCharArray();
309	309	for(int i=0;i<str.length;i++){
—	—	@@ -323,7 +323,7 @@
324	324	byte[] b = ExtToken.serialize(analyzer.tokenStream("",article.content));
325	325	if(i == 0)
326	326	size += b.length;
327		~~- else~~
	327	+ else
328	328	size2 += b.length;
329	329	tokensFromAnalysis(analyzer, article.content,"contents");
330	330	}
—	—	@@ -331,7 +331,7 @@
332	332	long delta = System.currentTimeMillis() - start;
333	333	System.out.println(delta+"ms ["+delta/count+"ms/ar] elapsed for analyzer "+analyzer+", size="+size+", size2="+size2);
334	334	}
335		-
	335	+
336	336	public static void testAnalyzer(Analyzer analyzer) throws IOException{
337	337	ArticlesParser ap = new ArticlesParser("./test-data/indexing-articles.test");
338	338	ArrayList<TestArticle> articles = ap.getArticles();
—	—	@@ -347,7 +347,7 @@
348	348	long delta = System.currentTimeMillis() - start;
349	349	System.out.println(delta+"ms ["+delta/count+"ms/ar] elapsed for analyzer "+analyzer);
350	350	}
351		-
	351	+
352	352	public static void testAnalyzer(Analyzer analyzer, String name) throws IOException{
353	353	ArticlesParser ap = new ArticlesParser("./test-data/indexing-articles.test");
354	354	ArrayList<TestArticle> articles = ap.getArticles();
Index: branches/lucene-search-2.1/test/org/wikimedia/lsearch/analyzers/FastWikiTokenizerTest.java
—	—	@@ -25,116 +25,116 @@
26	26
27	27	public class FastWikiTokenizerTest extends WikiTestCase {
28	28	IndexId iid;
29		~~- TokenizerOptions options;~~
30		-
	29	+ TokenizerOptions options;
	30	+
31	31	public void testIndex(){
32	32	this.iid = IndexId.get("enwiki");
33	33	this.options = new TokenizerOptions.ContentOptions(false);
34		-
35		~~- assertEquals("1 [link] 1 [text]",~~
	34	+
	35	+ assertEquals("1 [link] 1 [text]",
36	36	tokens("[[link text]]"));
37		-
38		~~- assertEquals("1 [anchor] 1 [text]",~~
	37	+
	38	+ assertEquals("1 [anchor] 1 [text]",
39	39	tokens("[[some link\|anchor text]]"));
40		-
41		~~- assertEquals("1 [italic] 2 [see]",~~
	40	+
	41	+ assertEquals("1 [italic] 2 [see]",
42	42	tokens("''italic''<nowiki><!-- see --></nowiki><!-- nosee -->"));
43		-
44		~~- assertEquals("1 [http] 2 [en] 1 [wikipedia] 1 [org/] 0 [org] 1 [english] 1 [wikipedia]",~~
	43	+
	44	+ assertEquals("1 [http] 2 [en] 1 [wikipedia] 1 [org/] 0 [org] 1 [english] 1 [wikipedia]",
45	45	tokens("[http://en.wikipedia.org/ english wikipedia]"));
46		-
47		~~- assertEquals("500 [image] 1 [argishti] 1 [monument] 1 [jpg] 1 [king] 1 [argishti] 1 [of] 1 [urartu]",~~
	46	+
	47	+ assertEquals("500 [image] 1 [argishti] 1 [monument] 1 [jpg] 1 [king] 1 [argishti] 1 [of] 1 [urartu]",
48	48	tokens("[[Image:Argishti monument.JPG\|thumb\|King Argishti of Urartu]]"));
49		-
50		~~- assertEquals("500 [image] 1 [argishti] 1 [monument] 1 [jpg] 1 [king] 1 [argishti] 1 [of] 1 [urartu]",~~
	49	+
	50	+ assertEquals("500 [image] 1 [argishti] 1 [monument] 1 [jpg] 1 [king] 1 [argishti] 1 [of] 1 [urartu]",
51	51	tokens("[[Image:Argishti monument.JPG\|thumb\|King [[link target\|Argishti]] of Urartu]]"));
52		-
	52	+
53	53	assertEquals("500 [image] 1 [frizbi] 1 [jpg] 1 [frizbi] 1 [za] 1 [ultimate] 1 [28] 1 [cm] 1 [175] 1 [g]",
54	54	tokens("[[Image:frizbi.jpg\|десно\|мини\|240п\|Frizbi za ultimate, 28cm, 175g]]"));
55		-
56		~~- assertEquals("1 [image] 3 [argishti] 1 [monument] 1 [jpg] 1 [thumb] 1 [king] 1 [argishti] 1 [of] 1 [urartu]",~~
	55	+
	56	+ assertEquals("1 [image] 3 [argishti] 1 [monument] 1 [jpg] 1 [thumb] 1 [king] 1 [argishti] 1 [of] 1 [urartu]",
57	57	tokens("[[Image:Argishti monument.JPG\|thumb\|King Argishti of Urartu"));
58		-
59		~~- assertEquals("1 [clinton] 1 [comets]",~~
	58	+
	59	+ assertEquals("1 [clinton] 1 [comets]",
60	60	tokens("{\| style=\"margin:0px 5px 10px 10px; border:1px solid #8888AA;\" align=right cellpadding=3 cellspacing=3 width=360\n\|- align=\"center\" bgcolor=\"#dddddd\"\n\|colspan=\"3\"\| '''Clinton Comets'''"));
61		-
62		~~- assertEquals("2 [or] 1 [ا] 500 [lɒs] 1 [ˈændʒəˌlɪs] 0 [ˈaendʒəˌlɪs]",~~
	61	+
	62	+ assertEquals("2 [or] 1 [ا] 500 [lɒs] 1 [ˈændʒəˌlɪs] 0 [ˈaendʒəˌlɪs]",
63	63	tokens("{{IPA\|[lɒs ˈændʒəˌlɪs]}} < or < © ©ا"));
64		-
65		~~- assertEquals("500 [text1] 1 [text2] 1 [text3]",~~
	64	+
	65	+ assertEquals("500 [text1] 1 [text2] 1 [text3]",
66	66	tokens("{{template\|text1}} {{template\|text2\|text3}}"));
67		-
	67	+
68	68	assertEquals("",
69	69	tokens("[[sr:Naslov]]"));
70		-
71		~~- assertEquals("500 [some] 1 [category] 1 [name]",~~
	70	+
	71	+ assertEquals("500 [some] 1 [category] 1 [name]",
72	72	tokens("[[Category:Some category name]]"));
73		-
74		~~- assertEquals("[Some category name]",~~
	73	+
	74	+ assertEquals("[Some category name]",
75	75	categories("[[Category:Some category name]]"));
76		-
77		~~- assertEquals("500 [param1] 1 [param2] 1 [value2]",~~
	76	+
	77	+ assertEquals("500 [param1] 1 [param2] 1 [value2]",
78	78	tokens("{{template\|param1 = {{value1}}\|param2 = value2}}"));
79		-
80		~~- assertEquals("500 [param1] 1 [value1] 1 [param2] 1 [value2]",~~
	79	+
	80	+ assertEquals("500 [param1] 1 [value1] 1 [param2] 1 [value2]",
81	81	tokens("{{template\|param1 = [[target\|value1]]\|param2 = value2}}"));
82		-
	82	+
83	83	assertEquals("1 [wikipedia] 1 [is] 1 [accurate] 2 [and] 1 [it's] 0 [its] 1 [not] 1 [a] 1 [lie] 20 [see] 1 [kurir]",
84	84	tokens("Wikipedia is accurate<ref>see Kurir</ref>, and it's not a lie."));
85		-
	85	+
86	86	assertEquals("1 [this] 1 [column] 1 [is] 1 [100] 1 [points] 1 [wide] 1 [this] 1 [column] 1 [is] 1 [200] 1 [points] 1 [wide] 1 [this] 1 [column] 1 [is] 1 [300] 1 [points] 1 [wide] 1 [blah] 1 [blah] 1 [blah]",
87	87	tokens("{\| border=\"1\" cellpadding=\"2\"\n\|-\n\|width=\"100pt\"\|This column is 100 points wide\n\|width=\"200pt\"\|This column is 200 points wide\n\|width=\"300pt\"\|This column is 300 points wide\n\|-\n\|blah \|\| blah \|\| blah\n\|}"));
88		-
	88	+
89	89	assertEquals("1 [first] 10 [second]",
90	90	tokens("first\n\nsecond"));
91		-
	91	+
92	92	assertEquals("1 [u2] 1 [heading1]",
93	93	tokens("u2 heading1"));
94		-
	94	+
95	95	assertEquals("1 [test] 1 [apostrophe's] 0 [apostrophes] 1 [and] 1 [other’s] 0 [others]",
96	96	tokens("Test apostrophe's and other\u2019s."));
97	97
98		-
	98	+
99	99	}
100		-
	100	+
101	101	public void testHighlight(){
102	102	this.iid = IndexId.get("enwiki");
103	103	this.options = new TokenizerOptions.Highlight(false);
104		-
105		~~- assertEquals("1 [' ' GLUE FIRST_SECTION] 1 ['link' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION] 1 ['text' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION]",~~
	104	+
	105	+ assertEquals("1 [' ' GLUE FIRST_SECTION] 1 ['link' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION] 1 ['text' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION]",
106	106	tokens("[[link text]]"));
107		-
108		~~- assertEquals("1 [' ' GLUE BULLETINS] 10 ['bullet1' TEXT BULLETINS] 1 [' ' SENTENCE_BREAK BULLETINS] 1 ['bullet2' TEXT BULLETINS]",~~
	107	+
	108	+ assertEquals("1 [' ' GLUE BULLETINS] 10 ['bullet1' TEXT BULLETINS] 1 [' ' SENTENCE_BREAK BULLETINS] 1 ['bullet2' TEXT BULLETINS]",
109	109	tokens("* bullet1\n* bullet2"));
110		-
111		- assertEquals("1 [' ' GLUE FIRST_SECTION] 1 ['http' TEXT FIRST_SECTION] 1 ['://' MINOR_BREAK FIRST_SECTION] 1 ['en' TEXT FIRST_SECTION] 1 ['.' SENTENCE_BREAK FIRST_SECTION] 1 ['wikipedia' TEXT FIRST_SECTION] 1 ['.' SENTENCE_BREAK FIRST_SECTION] 1 ['org/' TEXT FIRST_SECTION] 0 ['org' TEXT FIRST_SECTION] 1 ['wiki' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION] 1 ['english' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION] 1 ['wiki' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION]",
	110	+
	111	+ assertEquals("1 [' ' GLUE FIRST_SECTION] 1 ['http' TEXT FIRST_SECTION] 1 ['://' MINOR_BREAK FIRST_SECTION] 1 ['en' TEXT FIRST_SECTION] 1 ['.' SENTENCE_BREAK FIRST_SECTION] 1 ['wikipedia' TEXT FIRST_SECTION] 1 ['.' SENTENCE_BREAK FIRST_SECTION] 1 ['org/' TEXT FIRST_SECTION] 0 ['org' TEXT FIRST_SECTION] 1 ['wiki' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION] 1 ['english' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION] 1 ['wiki' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION]",
112	112	tokens("[http://en.wikipedia.org/wiki english wiki]"));
113		-
114		- assertEquals("1 [' ' GLUE IMAGE_CAT_IW] 1 ['image' TEXT IMAGE_CAT_IW] 1 [':' MINOR_BREAK IMAGE_CAT_IW] 1 ['argishti' TEXT IMAGE_CAT_IW] 1 [' ' GLUE IMAGE_CAT_IW] 1 ['monument' TEXT IMAGE_CAT_IW] 1 ['.' SENTENCE_BREAK IMAGE_CAT_IW] 1 ['jpg' TEXT IMAGE_CAT_IW] 1 [' \| ' GLUE IMAGE_CAT_IW] 1 ['king' TEXT IMAGE_CAT_IW] 1 [' ' GLUE IMAGE_CAT_IW] 1 ['argishti' TEXT IMAGE_CAT_IW] 1 [' ' GLUE IMAGE_CAT_IW] 1 ['of' TEXT IMAGE_CAT_IW] 1 [' ' GLUE IMAGE_CAT_IW] 1 ['urartu' TEXT IMAGE_CAT_IW] 1 [' ' GLUE IMAGE_CAT_IW] 1 [' ' SENTENCE_BREAK FIRST_SECTION] 1 ['main' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION] 1 ['text' TEXT FIRST_SECTION]",
	113	+
	114	+ assertEquals("1 [' ' GLUE IMAGE_CAT_IW] 1 ['image' TEXT IMAGE_CAT_IW] 1 [':' MINOR_BREAK IMAGE_CAT_IW] 1 ['argishti' TEXT IMAGE_CAT_IW] 1 [' ' GLUE IMAGE_CAT_IW] 1 ['monument' TEXT IMAGE_CAT_IW] 1 ['.' SENTENCE_BREAK IMAGE_CAT_IW] 1 ['jpg' TEXT IMAGE_CAT_IW] 1 [' \| ' GLUE IMAGE_CAT_IW] 1 ['king' TEXT IMAGE_CAT_IW] 1 [' ' GLUE IMAGE_CAT_IW] 1 ['argishti' TEXT IMAGE_CAT_IW] 1 [' ' GLUE IMAGE_CAT_IW] 1 ['of' TEXT IMAGE_CAT_IW] 1 [' ' GLUE IMAGE_CAT_IW] 1 ['urartu' TEXT IMAGE_CAT_IW] 1 [' ' GLUE IMAGE_CAT_IW] 1 [' ' SENTENCE_BREAK FIRST_SECTION] 1 ['main' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION] 1 ['text' TEXT FIRST_SECTION]",
115	115	tokens("[[Image:Argishti monument.JPG\|thumb\|King Argishti of Urartu]]\n\nMain text"));
116		-
	116	+
117	117	assertEquals("1 [' ' GLUE IMAGE_CAT_IW] 1 ['category' TEXT IMAGE_CAT_IW] 1 [':' MINOR_BREAK IMAGE_CAT_IW] 1 ['name' TEXT IMAGE_CAT_IW] 1 [' ' GLUE IMAGE_CAT_IW]",
118	118	tokens("[[Category:Name\|sort key]]"));
119		-
120		- assertEquals("1 [' ' GLUE TEMPLATE] 1 ['param1' TEXT TEMPLATE] 1 [' ' GLUE TEMPLATE] 1 ['value1' TEXT TEMPLATE] 1 [' ' GLUE TEMPLATE] 1 [' \| ' GLUE TEMPLATE] 1 ['param2' TEXT TEMPLATE] 1 [' ' GLUE TEMPLATE] 1 ['value2' TEXT TEMPLATE] 1 [' ' GLUE FIRST_SECTION]",
	119	+
	120	+ assertEquals("1 [' ' GLUE TEMPLATE] 1 ['param1' TEXT TEMPLATE] 1 [' ' GLUE TEMPLATE] 1 ['value1' TEXT TEMPLATE] 1 [' ' GLUE TEMPLATE] 1 [' \| ' GLUE TEMPLATE] 1 ['param2' TEXT TEMPLATE] 1 [' ' GLUE TEMPLATE] 1 ['value2' TEXT TEMPLATE] 1 [' ' GLUE FIRST_SECTION]",
121	121	tokens("{{template\|param1 = [[value1]]\|param2 = value2}}"));
122		-
123		~~- assertEquals("1 [' ' GLUE TEMPLATE] 1 ['param1' TEXT TEMPLATE] 1 [' \| ' GLUE TEMPLATE] 1 ['param2' TEXT TEMPLATE] 1 [' ' GLUE TEMPLATE] 1 ['value2' TEXT TEMPLATE] 1 [' ' GLUE FIRST_SECTION]",~~
	122	+
	123	+ assertEquals("1 [' ' GLUE TEMPLATE] 1 ['param1' TEXT TEMPLATE] 1 [' \| ' GLUE TEMPLATE] 1 ['param2' TEXT TEMPLATE] 1 [' ' GLUE TEMPLATE] 1 ['value2' TEXT TEMPLATE] 1 [' ' GLUE FIRST_SECTION]",
124	124	tokens("{{template\|param1 = {{value1}}\|param2 = value2}}"));
125	125
126	126	assertEquals("1 [' ' GLUE HEADING] 1 ['heading' TEXT HEADING] 1 [' ' GLUE HEADING] 1 ['1' TEXT HEADING] 1 [' ' GLUE HEADING] 1 [' ' GLUE NORMAL]",
127	127	tokens("== Heading 1 ==\n"));
128		-
	128	+
129	129	assertEquals("1 ['wikipedia' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION] 1 ['is' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION] 1 ['accurate' TEXT FIRST_SECTION] 1 [', ' MINOR_BREAK FIRST_SECTION] 1 ['and' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION] 1 ['it's' TEXT FIRST_SECTION] 0 ['its' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION] 1 ['not' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION] 1 ['a' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION] 1 ['lie' TEXT FIRST_SECTION] 1 ['.' SENTENCE_BREAK FIRST_SECTION] 20 [' ' GLUE REFERENCE] 1 ['see' TEXT REFERENCE] 1 [' ' GLUE REFERENCE] 1 ['kurir' TEXT REFERENCE] 1 [' ' GLUE REFERENCE]",
130	130	tokens("Wikipedia is accurate<ref>see Kurir</ref>, and it's not a lie."));
131		-
	131	+
132	132	assertEquals("1 [' \| ' GLUE TABLE] 1 ['this' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['column' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['is' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['100' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['points' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['wide' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['this' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['column' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['is' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['200' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['points' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['wide' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['this' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['column' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['is' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['300' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['points' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['wide' TEXT TABLE] 1 [' \| ' SENTENCE_BREAK TABLE] 1 ['blah' TEXT TABLE] 1 [' \| ' GLUE TABLE] 1 ['blah' TEXT TABLE] 1 [' \| ' GLUE TABLE] 1 ['blah' TEXT TABLE] 1 [' \| ' GLUE FIRST_SECTION]",
133	133	tokens("{\| border=\"1\" cellpadding=\"2\"\n\|-\n\|width=\"100pt\"\|This column is 100 points wide\n\|width=\"200pt\"\|This column is 200 points wide\n\|width=\"300pt\"\|This column is 300 points wide\n\|-\n\|blah \|\| blah \|\| blah\n\|}"));
134		-
	134	+
135	135	}
136		-
137		-
138		-
	136	+
	137	+
	138	+
139	139	public String tokens(String text){
140	140	StringBuilder sb = new StringBuilder();
141	141	FastWikiTokenizerEngine parser = new FastWikiTokenizerEngine(text,iid,options);
—	—	@@ -149,7 +149,7 @@
150	150	}
151	151	return sb.toString().trim();
152	152	}
153		-
	153	+
154	154	public String categories(String text){
155	155	FastWikiTokenizerEngine parser = new FastWikiTokenizerEngine(text,iid,options);
156	156	parser.parse();
—	—	@@ -179,7 +179,7 @@
180	180	System.out.print("INTERWIKI: ");
181	181	}
182	182	for(Entry<String,String> t : iw.entrySet()){
183		~~- System.out.print("["+t.getKey()+"] => ["+t.getValue()+"] ");~~
	183	+ System.out.print("["+t.getKey()+"] => ["+t.getValue()+"] ");
184	184	}
185	185	if(iw.size()!=0) System.out.println();
186	186
—	—	@@ -188,7 +188,7 @@
189	189	System.out.print("KEYWORDS: ");
190	190	}
191	191	for(String t : keywords){
192		~~- System.out.print("["+t+"] ");~~
	192	+ System.out.print("["+t+"] ");
193	193	}
194	194	if(keywords.size()!=0) System.out.println();
195	195
—	—	@@ -198,7 +198,7 @@
199	199	static void showTokens(String text){
200	200	System.out.println("TEXT: "+text);
201	201	System.out.flush();
202		~~- displayTokensForParser(text);~~
	202	+ displayTokensForParser(text);
203	203	System.out.flush();
204	204	}
205	205
—	—	@@ -231,7 +231,7 @@
232	232	text = "\|something\|else\| is\| there\| to \| see";
233	233	showTokens(text);
234	234	text = "-test 3.14 and U.S.A and more, .test more";
235		~~- showTokens(text);~~
	235	+ showTokens(text);
236	236	text = "{{IPA\|[lɒs ˈændʒəˌlɪs]}} < or < © ©ا or ا ";
237	237	showTokens(text);
238	238	text = "\| Unseen\n\|-\n\| \"Junior\"\n\|\n\| Goa'uld larva\n\|} something";
—	—	@@ -318,14 +318,14 @@
319	319	text = "[[First]] second third fourth and so on goes the ... [[last link]]";
320	320	showTokens(text);
321	321	text = "{{Something\| param = {{another}}[[First]] } }} }} }} [[first good]]s {{name\| [[many]] many many tokens }} second third fourth and so on goes the ... [[good keyword]]";
322		~~- showTokens(text);~~
323		- text = "{\| style=\"float: right; clear: right; background-color: transparent\"\n\|-\n\|{{Infobox Military Conflict\|\n\|conflict=1982 Lebanon War <br>([[Israel-Lebanon conflict]])\n\|image=[[Image:Map of Lebanon.png\|300px]]\n\|caption=Map of modern Lebanon\n\|date=June - September 1982\n\|place=Southern [[Lebanon]]\n\|casus=Two main causes:\nTerrorist raids on northern Israel by [[PLO]] [[guerrilla]] based in Lebanon\nthe [[Shlomo Argov\|shooting of Israel's ambassador]] by the [[Abu Nidal Organization]]<ref>[http://www.usatoday.com/graphics/news/gra/gisrael2/flash.htm The Middle East conflict], ''[[USA Today]]'' (sourced guardian.co.uk, Facts on File, AP) \"Israel invades Lebanon in response to terrorist attacks by PLO guerrillas based there.\"</ref><ref>{{cite book\n\|author = Mark C. Carnes, John A. Garraty\n\|title = The American Nation\n\|publisher = Pearson Education, Inc.\n\|date = 2006\n\|location = USA\n\|pages = 903\n\|id = ISBN 0-321-42606-1\n}}</ref><ref>{{cite book\n\|author= ''[[Time (magazine)\|Time]]''\n\|title = The Year in Review\n\|publisher = Time Books\n\|date = 2006\n\|location = 1271 Avenue of the Americs, New York, NY 10020\n\|id = ISSN: 1097-5721\n}} \"For decades now, Arab terrorists operating out of southern Lebanon have staged raids and fired mortar shells into northern Israel, denying the Israelis peace of mind. In the early 1980s, the terrorists operating out of Lebanon were controlled by Yasser Arafat's Palestine Liberation Organization (P.L.O.). After Israel's ambassador to Britain, Shlomo Argov, was shot in cold blood and seriously wounded by the Palestinian terror group Abu Nidal in London in 1982, fed-up Israelis sent tanks and troops rolling into Lebanon to disperse the guerrillas.\" (pg. 44-45)</ref><ref>\"The Palestine Liberation Organization (PLO) had been launching guerrilla attacks against Israel since the 1960s (see Palestine Liberation Organization). After the PLO was driven from Jordan in 1971, the organization established bases in southern Lebanon, from which it continued to attack Israel. In 1981 heavy PLO rocket fire on Israeli settlements led Israel to conduct air strikes in Lebanon. The Israelis also destroyed Iraq's nuclear reactor at Daura near Baghdad.";
324		~~- showTokens(text);~~
	322	+ showTokens(text);
	323	+ text = "{\| style=\"float: right; clear: right; background-color: transparent\"\n\|-\n\|{{Infobox Military Conflict\|\n\|conflict=1982 Lebanon War <br>([[Israel-Lebanon conflict]])\n\|image=[[Image:Map of Lebanon.png\|300px]]\n\|caption=Map of modern Lebanon\n\|date=June - September 1982\n\|place=Southern [[Lebanon]]\n\|casus=Two main causes:\nTerrorist raids on northern Israel by [[PLO]] [[guerrilla]] based in Lebanon\nthe [[Shlomo Argov\|shooting of Israel's ambassador]] by the [[Abu Nidal Organization]]<ref>[http://www.usatoday.com/graphics/news/gra/gisrael2/flash.htm The Middle East conflict], ''[[USA Today]]'' (sourced guardian.co.uk, Facts on File, AP) \"Israel invades Lebanon in response to terrorist attacks by PLO guerrillas based there.\"</ref><ref>{{cite book\n\|author = Mark C. Carnes, John A. Garraty\n\|title = The American Nation\n\|publisher = Pearson Education, Inc.\n\|date = 2006\n\|location = USA\n\|pages = 903\n\|id = ISBN 0-321-42606-1\n}}</ref><ref>{{cite book\n\|author= ''[[Time (magazine)\|Time]]''\n\|title = The Year in Review\n\|publisher = Time Books\n\|date = 2006\n\|location = 1271 Avenue of the Americs, New York, NY 10020\n\|id = ISSN: 1097-5721\n}} \"For decades now, Arab terrorists operating out of southern Lebanon have staged raids and fired mortar shells into northern Israel, denying the Israelis peace of mind. In the early 1980s, the terrorists operating out of Lebanon were controlled by Yasser Arafat's Palestine Liberation Organization (P.L.O.). After Israel's ambassador to Britain, Shlomo Argov, was shot in cold blood and seriously wounded by the Palestinian terror group Abu Nidal in London in 1982, fed-up Israelis sent tanks and troops rolling into Lebanon to disperse the guerrillas.\" (pg. 44-45)</ref><ref>\"The Palestine Liberation Organization (PLO) had been launching guerrilla attacks against Israel since the 1960s (see Palestine Liberation Organization). After the PLO was driven from Jordan in 1971, the organization established bases in southern Lebanon, from which it continued to attack Israel. In 1981 heavy PLO rocket fire on Israeli settlements led Israel to conduct air strikes in Lebanon. The Israelis also destroyed Iraq's nuclear reactor at Daura near Baghdad.";
	324	+ showTokens(text);
325	325
326	326
327	327
328	328	ArticlesParser ap1 = new ArticlesParser("./test-data/indexing-articles.test");
329		~~- ArrayList<TestArticle> articles1 = ap1.getArticles();~~
	329	+ ArrayList<TestArticle> articles1 = ap1.getArticles();
330	330	showTokens(articles1.get(articles1.size()-1).content);
331	331
332	332	//if(true)
—	—	@@ -378,7 +378,7 @@
379	379	ObjectOutputStream out = new ObjectOutputStream(ba);
380	380	out.writeObject(tokens);
381	381	size += ba.size(); */
382		~~- //byte[] b = ExtToken.serializetokens);~~
	382	+ //byte[] b = ExtToken.serializetokens);
383	383	//size += b.length;
384	384	//ObjectInputStream in = new ObjectInputStream(new ByteArrayInputStream(ba.toByteArray()));
385	385	//ArrayList<ExtToken> some = (ArrayList<ExtToken>) in.readObject();
—	—	@@ -388,7 +388,7 @@
389	389	System.out.println("Parser elapsed: "+delta+"ms, per serialization: "+((double)delta/total)+"ms, size:"+size/total);
390	390
391	391	}
392		-
	392	+
393	393	public void testVowels(){
394	394	assertEquals("zdrv", FastWikiTokenizerEngine.deleteVowels("zdravo"));
395	395	assertEquals("v g mlrd", FastWikiTokenizerEngine.deleteVowels("eve ga milorad"));
Property changes on: branches/lucene-search-2.1/test/org/wikimedia/lsearch/analyzers
___________________________________________________________________
Name: svn:ignore
396	396	+ *.class
Property changes on: branches/lucene-search-2.1/test/org/wikimedia/lsearch/beans
___________________________________________________________________
Name: svn:ignore
397	397	+ *.class
Property changes on: branches/lucene-search-2.1/test/org/wikimedia/lsearch/util
___________________________________________________________________
Name: svn:ignore
398	398	+ *.class
Property changes on: branches/lucene-search-2.1/test/org/wikimedia/lsearch/ranks
___________________________________________________________________
Name: svn:ignore
399	399	+ *.class
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/AggregateMetaField.java
—	—	@@ -21,7 +21,7 @@
22	22
23	23	/**
24	24	* Local cache of aggregate field meta informations
25		- *
	25	+ *
26	26	* @author rainman
27	27	*
28	28	*/
—	—	@@ -32,20 +32,20 @@
33	33	protected static Object lock = new Object();
34	34	/** directory -> fields */
35	35	protected static WeakHashMap<Directory,Set<String>> cachingInProgress = new WeakHashMap<Directory,Set<String>>();
36		-
	36	+
37	37	/** Check if there is a current background caching on a reader */
38	38	public static boolean isBeingCached(IndexReader reader){
39	39	synchronized(cachingInProgress){
40	40	return cachingInProgress.containsKey(reader.directory());
41	41	}
42	42	}
43		-
	43	+
44	44	public static void invalidateCache(IndexReader reader){
45	45	synchronized (lock) {
46	46	cache.remove(reader.directory());
47	47	}
48	48	}
49		-
	49	+
50	50	/** Get a meta cacher, return null if field is already cached or being cached */
51	51	public static CacheBuilder getCacherBuilder(IndexReader reader, String field) throws IOException {
52	52	synchronized(lock){
—	—	@@ -61,10 +61,10 @@
62	62	s = new AggregateMetaFieldSource(reader,field);
63	63	fields.put(field,s);
64	64	return s;
65		~~- }~~
	65	+ }
66	66	}
67	67	}
68		-
	68	+
69	69	/** Get a cached meta source to use in queries */
70	70	public static AggregateMetaFieldSource getCachedSource(IndexReader reader, String field) {
71	71	synchronized(lock){
—	—	@@ -75,10 +75,10 @@
76	76	}
77	77	}
78	78
79		-
	79	+
80	80	/**
81		~~- * Cached meta aggregate info~~
82		- *
	81	+ * Cached meta aggregate info
	82	+ *
83	83	* @author rainman
84	84	*
85	85	*/
—	—	@@ -94,10 +94,10 @@
95	95	protected String field;
96	96	protected boolean cachingFinished = false;
97	97	protected boolean isOptimized;
98		~~- // temporary:~~
	98	+ // temporary:
99	99	protected int count = 0;
100	100	protected int maxdoc = 0;
101		-
	101	+
102	102	public void init() {
103	103	synchronized(cachingInProgress){
104	104	Set<String> set = cachingInProgress.get(reader.directory());
—	—	@@ -113,7 +113,7 @@
114	114	index = new int[maxdoc];
115	115	length = new byte[maxdoc]; // estimate maxdoc values
116	116	lengthNoStopWords = new byte[maxdoc];
117		~~- lengthComplete = new byte[maxdoc];~~
	117	+ lengthComplete = new byte[maxdoc];
118	118	boost = new float[maxdoc];
119	119	flags = new byte[maxdoc];
120	120	namespaces = new byte[maxdoc];
—	—	@@ -133,10 +133,10 @@
134	134	if(count >= length.length){
135	135	length = extendBytes(length);
136	136	lengthNoStopWords = extendBytes(lengthNoStopWords);
137		~~- lengthComplete = extendBytes(lengthComplete);~~
	137	+ lengthComplete = extendBytes(lengthComplete);
138	138	boost = extendFloats(boost);
139	139	flags = extendBytes(flags);
140		~~- }~~
	140	+ }
141	141	length[count] = stored[j*8];
142	142	if(length[count] == 0){
143	143	log.debug("Broken length=0 for docid="+i+", at position "+j);
—	—	@@ -147,14 +147,14 @@
148	148	lengthComplete[count] = stored[j*8+6];
149	149	flags[count] = stored[j*8+7];
150	150	count++;
151		~~- }~~
	151	+ }
152	152	} catch(Exception e){
153	153	log.error("Exception during processing stored_field="+field+" on docid="+i+", with stored="+stored+" : "+e.getMessage(),e);
154	154	e.printStackTrace();
155	155	throw new IOException(e.getMessage());
156	156	}
157	157	}
158		-
	158	+
159	159	public void end(){
160	160	if(count < length.length - 1){
161	161	length = resizeBytes(length,count);
—	—	@@ -164,7 +164,7 @@
165	165	flags = resizeBytes(flags,count);
166	166	}
167	167	cachingFinished = true;
168		-
	168	+
169	169	synchronized(cachingInProgress){
170	170	Set<String> set = cachingInProgress.get(reader.directory());
171	171	set.remove(field);
—	—	@@ -172,7 +172,7 @@
173	173	cachingInProgress.remove(reader.directory());
174	174	}
175	175	}
176		-
	176	+
177	177	protected byte[] extendBytes(byte[] array){
178	178	return resizeBytes(array,array.length*2);
179	179	}
—	—	@@ -183,13 +183,13 @@
184	184	}
185	185	protected float[] extendFloats(float[] array){
186	186	return resizeFloats(array,array.length*2);
187		~~- }~~
	187	+ }
188	188	protected float[] resizeFloats(float[] array, int size){
189	189	float[] t = new float[size];
190	190	System.arraycopy(array,0,t,0,Math.min(array.length,size));
191	191	return t;
192	192	}
193		-
	193	+
194	194	protected AggregateMetaFieldSource(IndexReader reader, String fieldBase) throws IOException{
195	195	this.reader = reader;
196	196	this.field = fieldBase+"_meta";
—	—	@@ -203,13 +203,13 @@
204	204	int end = (docid == index.length-1)? length.length : index[docid+1];
205	205	if(position >= end-start){
206	206	if(checkExists) // if true this is not an error
207		~~- return -1;~~
	207	+ return -1;
208	208	else
209	209	throwException(docid,position,end-start-1);
210	210	}
211	211	return start+position;
212	212	}
213		-
	213	+
214	214	private void throwException(int docid, int position, int lastValid){
215	215	try {
216	216	// first try to give more detailed error
—	—	@@ -217,22 +217,22 @@
218	218	} catch (IOException e) {
219	219	e.printStackTrace();
220	220	throw new ArrayIndexOutOfBoundsException("Requested position "+position+" on field "+field+" unavailable"+" on "+reader.directory());
221		~~- }~~
	221	+ }
222	222	}
223		-
	223	+
224	224	protected byte[] getStored(int docid) throws CorruptIndexException, IOException{
225	225	return reader.document(docid).getBinaryValue(field);
226	226	}
227		-
228		~~- /** Get length of nonalias tokens */~~
	227	+
	228	+ /** Get length of nonalias tokens */
229	229	public int getLength(int docid, int position) throws CorruptIndexException, IOException{
230	230	if(!cachingFinished) // still caching in background
231	231	return getStored(docid)[position*8];
232	232	return length[getValueIndex(docid,position)];
233		~~- }~~
234		~~- /** Get length without stop words */~~
	233	+ }
	234	+ /** Get length without stop words */
235	235	public int getLengthNoStopWords(int docid, int position) throws CorruptIndexException, IOException{
236		~~- if(!cachingFinished)~~
	236	+ if(!cachingFinished)
237	237	return getStored(docid)[position*8+1];
238	238	return lengthNoStopWords[getValueIndex(docid,position)];
239	239	}
—	—	@@ -242,7 +242,7 @@
243	243	return getStored(docid)[position*8+6];
244	244	return lengthComplete[getValueIndex(docid,position)];
245	245	}
246		-
	246	+
247	247	/** generic function to get boost value at some position, if checkExists=true won't die on error */
248	248	private float getBoost(int docid, int position, boolean checkExists) throws CorruptIndexException, IOException{
249	249	if(!cachingFinished){
—	—	@@ -261,25 +261,25 @@
262	262	return 1;
263	263	return boost[inx];
264	264	}
265		-
266		~~- /** Get boost for position */~~
	265	+
	266	+ /** Get boost for position */
267	267	public float getBoost(int docid, int position) throws CorruptIndexException, IOException{
268	268	return getBoost(docid,position,false);
269	269	}
270		-
	270	+
271	271	/** Get rank (boost at position 0) */
272	272	public float getRank(int docid) throws CorruptIndexException, IOException{
273	273	return getBoost(docid,0,true);
274	274	}
275		-
	275	+
276	276	/** Get namespace of the document */
277	277	public int getNamespace(int docid) throws CorruptIndexException, IOException{
278	278	if(!cachingFinished){
279	279	return Integer.parseInt(reader.document(docid).get("namespace"));
280		~~- }~~
	280	+ }
281	281	return namespaces[docid];
282	282	}
283		-
	283	+
284	284	/** Get flag values for docid at position */
285	285	public Flags getFlags(int docid, int position) throws CorruptIndexException, IOException{
286	286	int ord = 0;
—	—	@@ -290,8 +290,8 @@
291	291
292	292	return Flags.values()[ord];
293	293	}
294		-
295		-
	294	+
	295	+
296	296	}
297	297
298	298	}
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/AggregateInfoImpl.java
—	—	@@ -11,11 +11,11 @@
12	12	import org.wikimedia.lsearch.analyzers.Aggregate.Flags;
13	13	import org.wikimedia.lsearch.search.AggregateMetaField.AggregateMetaFieldSource;
14	14
15		-/**
	15	+/**
16	16	* Wrapper for aggregate fields info in the index. Include an instance
17	17	* of this class into CustomPhraseQuery to use the additional meta
18		~~- * info (which is locally cached in AggregateMetaField).~~
19		- *
	18	+ * info (which is locally cached in AggregateMetaField).
	19	+ *
20	20	* @author rainman
21	21	*
22	22	*/
—	—	@@ -24,7 +24,7 @@
25	25	protected transient AggregateMetaFieldSource src = null;
26	26	protected boolean hasRankingData = false;
27	27	protected String field = null;
28		-
	28	+
29	29	/** Call this while (local) scorer is constructed to init cached meta info */
30	30	public void init(IndexReader reader, String field) throws IOException {
31	31	this.field = field;
—	—	@@ -36,7 +36,7 @@
37	37	protected int getSlot(int pos){
38	38	return pos / AggregateAnalyzer.TOKEN_GAP;
39	39	}
40		-
	40	+
41	41	public int length(int docid, int pos) throws IOException {
42	42	try{
43	43	return src.getLength(docid,getSlot(pos));
—	—	@@ -45,7 +45,7 @@
46	46	throw e;
47	47	}
48	48	}
49		-
	49	+
50	50	public float boost(int docid, int pos) throws IOException {
51	51	try{
52	52	return src.getBoost(docid,getSlot(pos));
—	—	@@ -63,7 +63,7 @@
64	64	throw e;
65	65	}
66	66	}
67		-
	67	+
68	68	public int lengthComplete(int docid, int pos) throws IOException {
69	69	try{
70	70	return src.getLengthComplete(docid,getSlot(pos));
—	—	@@ -72,14 +72,14 @@
73	73	throw e;
74	74	}
75	75	}
76		-
	76	+
77	77	public float rank(int docid) throws IOException {
78	78	if(hasRankingData)
79	79	return src.getRank(docid);
80		~~- else~~
	80	+ else
81	81	throw new RuntimeException("Trying to fetch ranking data on field "+field+" where its not available.");
82	82	}
83		-
	83	+
84	84	public int namespace(int docid) throws IOException{
85	85	return src.getNamespace(docid);
86	86	}
—	—	@@ -87,7 +87,7 @@
88	88	public boolean hasRankingData() {
89	89	return hasRankingData;
90	90	}
91		-
	91	+
92	92	public Flags flags(int docid, int pos) throws IOException {
93	93	try{
94	94	return src.getFlags(docid,getSlot(pos));
—	—	@@ -96,14 +96,14 @@
97	97	throw e;
98	98	}
99	99	}
100		-
	100	+
101	101	/** Provides ranking information */
102	102	public static class RankInfo extends AggregateInfoImpl {
103	103	@Override
104	104	public void init(IndexReader reader, String field) throws IOException {
105	105	super.init(reader, "alttitle");
106	106	}
107		-
	107	+
108	108	}
109	109
110	110	}
Index: branches/lucene-search-2.1/build.xml
—	—	@@ -13,9 +13,9 @@
14	14	<property name="include.src" value="src/ sql/ build.xml scripts/* webinterface/* VERSION configure build update test/ udplogger/"/>
15	15	<property name="include.bin" value=".log4j .txt config.inc template/ udplogger/"/>
16	16	<property name="include.sh" value="configure build update lsearchd"/>
17		-
	17	+
18	18	<property file="${basedir}/hostname"/>
19		-
	19	+
20	20	<path id="classpath">
21	21	<fileset dir="${lib}" includes="*.jar"/>
22	22	</path>
—	—	@@ -28,10 +28,10 @@
29	29	<fileset dir="${lib}">
30	30	<include name="*.jar"/>
31	31	</fileset>
32		~~- </classpath>~~
	32	+ </classpath>
33	33	</java>
34		~~- </target>~~
35		-
	34	+ </target>
	35	+
36	36	<target name="makejar" depends="build">
37	37	<jar destfile="${basedir}/${jar.name}">
38	38	<manifest>
—	—	@@ -40,42 +40,42 @@
41	41	</manifest>
42	42	<zipfileset dir="${bin}" prefix="">
43	43	<include name="org/**"/>
44		~~- </zipfileset>~~
	44	+ </zipfileset>
45	45	</jar>
46	46	</target>
47		-
	47	+
48	48	<target name="alljar" depends="build" description="All-in-one jar">
49	49	<jar jarfile="${jar.name}" compress="true">
50	50	<manifest>
51	51	<attribute name="Main-Class" value="org.wikimedia.lsearch.config.StartupManager" />
52	52	</manifest>
53	53	<fileset dir="bin" includes="org/**" />
54		-
	54	+
55	55	<!-- pack libraries as well -->
56	56	<zipfileset src="lib/xmlrpc-common-3.0.jar" />
57	57	<zipfileset src="lib/xmlrpc-client-3.0.jar" />
58	58	<zipfileset src="lib/xmlrpc-server-3.0.jar" />
59	59	<zipfileset src="lib/commons-logging-1.1.jar" />
60	60	<zipfileset src="lib/ws-commons-util-1.0.1.jar" />
61		~~- <zipfileset src="lib/log4j-1.2.14.jar" />~~
	61	+ <zipfileset src="lib/log4j-1.2.14.jar" />
62	62	<zipfileset src="lib/lucene-core-2.3.jar" />
63		~~- <zipfileset src="lib/lucene-analyzers.jar" />~~
	63	+ <zipfileset src="lib/lucene-analyzers.jar" />
64	64	<zipfileset src="lib/snowball.jar" />
65	65	<zipfileset src="lib/mwdumper.jar" />
66	66	<zipfileset src="lib/mysql-connector-java-3.0.17-ga-bin.jar" />
67		-
	67	+
68	68	<fileset dir="resources" includes="/*" />
69	69	</jar>
70	70	</target>
71	71
72		-
	72	+
73	73	<target name="build" description="Compile classes">
74	74	<mkdir dir="${bin}"/>
75	75	<javac srcdir="${src}/org/" debug="on" encoding="UTF-8" includes="*/.java" destdir="${bin}/">
76	76	<classpath refid="classpath"/>
77	77	</javac>
78	78	</target>
79		-
	79	+
80	80	<target name="pack" description="Make tar.gz distribution">
81	81	<mkdir dir="${dist}"/>
82	82	<delete file="${dist}/${pack.name}.tar"/>
—	—	@@ -87,7 +87,7 @@
88	88	<gzip zipfile="${dist}/${pack.name}.tar.gz" src="${dist}/${pack.name}.tar"/>
89	89	<delete file="${dist}/${pack.name}.tar"/>
90	90	</target>
91		-
	91	+
92	92	<target name="pack-src" depends="alljar" description="Make tar.gz distribution of only core source files">
93	93	<mkdir dir="${dist}"/>
94	94	<delete file="${dist}/${src.name}.tar"/>
—	—	@@ -100,7 +100,7 @@
101	101	<delete file="${dist}/${src.name}.tar"/>
102	102	</target>
103	103
104		-
	104	+
105	105	<target name="binary" depends="alljar" description="Make binary tar.gz distribution">
106	106	<mkdir dir="${bin}"/>
107	107	<delete file="${dist}/${binary.name}.tar"/>
—	—	@@ -113,5 +113,5 @@
114	114	<gzip zipfile="${dist}/${binary.name}.tar.gz" src="${dist}/${binary.name}.tar"/>
115	115	<delete file="${dist}/${binary.name}.tar"/>
116	116	</target>
117		-
	117	+
118	118	</project>
Property changes on: branches/lucene-search-2.1
___________________________________________________________________
Name: svn:ignore
119	119	- lsearch.conf
lsearch-global.conf
lsearch.log4j
rsyncd.conf
*~
indexes
120	120	+ bin
dumps
lsearch.log4j
indexes
lsearch-global.conf
lsearch.conf
LuceneSearch.jar
config.inc

Status & tagging log

15:37, 14 November 2010 Reedy (talk | contribs) changed the status of r63821 [removed: new added: deferred]