r63821 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r63820‎ | r63821 | r63822 >
Date:18:33, 16 March 2010
Author:mah
Status:deferred
Tags:
Comment:
Whitespace cleanups, set ignores.
Modified paths:
  • /branches/lucene-search-2.1 (modified) (history)
  • /branches/lucene-search-2.1/build.xml (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/AggregateInfoImpl.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/AggregateMetaField.java (modified) (history)
  • /branches/lucene-search-2.1/test/org/wikimedia/lsearch/analyzers (modified) (history)
  • /branches/lucene-search-2.1/test/org/wikimedia/lsearch/analyzers/AnalysisTest.java (modified) (history)
  • /branches/lucene-search-2.1/test/org/wikimedia/lsearch/analyzers/FastWikiTokenizerTest.java (modified) (history)
  • /branches/lucene-search-2.1/test/org/wikimedia/lsearch/beans (modified) (history)
  • /branches/lucene-search-2.1/test/org/wikimedia/lsearch/config (modified) (history)
  • /branches/lucene-search-2.1/test/org/wikimedia/lsearch/highlight (modified) (history)
  • /branches/lucene-search-2.1/test/org/wikimedia/lsearch/index (modified) (history)
  • /branches/lucene-search-2.1/test/org/wikimedia/lsearch/ranks (modified) (history)
  • /branches/lucene-search-2.1/test/org/wikimedia/lsearch/search (modified) (history)
  • /branches/lucene-search-2.1/test/org/wikimedia/lsearch/spell (modified) (history)
  • /branches/lucene-search-2.1/test/org/wikimedia/lsearch/spell/api (modified) (history)
  • /branches/lucene-search-2.1/test/org/wikimedia/lsearch/storage (modified) (history)
  • /branches/lucene-search-2.1/test/org/wikimedia/lsearch/test (modified) (history)
  • /branches/lucene-search-2.1/test/org/wikimedia/lsearch/util (modified) (history)

Diff [purge]

Property changes on: branches/lucene-search-2.1/test/org/wikimedia/lsearch/spell/api
___________________________________________________________________
Name: svn:ignore
11 + *.class
Property changes on: branches/lucene-search-2.1/test/org/wikimedia/lsearch/spell
___________________________________________________________________
Name: svn:ignore
22 + *.class
Property changes on: branches/lucene-search-2.1/test/org/wikimedia/lsearch/test
___________________________________________________________________
Name: svn:ignore
33 + *.class
Property changes on: branches/lucene-search-2.1/test/org/wikimedia/lsearch/storage
___________________________________________________________________
Name: svn:ignore
44 + *.class
Property changes on: branches/lucene-search-2.1/test/org/wikimedia/lsearch/config
___________________________________________________________________
Name: svn:ignore
55 + *.class
Property changes on: branches/lucene-search-2.1/test/org/wikimedia/lsearch/search
___________________________________________________________________
Name: svn:ignore
66 + *.class
Property changes on: branches/lucene-search-2.1/test/org/wikimedia/lsearch/highlight
___________________________________________________________________
Name: svn:ignore
77 + *.class
Property changes on: branches/lucene-search-2.1/test/org/wikimedia/lsearch/index
___________________________________________________________________
Name: svn:ignore
88 + *.class
Index: branches/lucene-search-2.1/test/org/wikimedia/lsearch/analyzers/AnalysisTest.java
@@ -41,7 +41,7 @@
4242 public class AnalysisTest extends WikiTestCase {
4343 Analyzer a = null;
4444 Configuration config = null;
45 -
 45+
4646 protected void setUp() throws Exception {
4747 super.setUp();
4848 if(config == null){
@@ -49,7 +49,7 @@
5050 GlobalConfiguration.getInstance();
5151 }
5252 }
53 -
 53+
5454 public void testCJKAnalyzer(){
5555 a = new CJKAnalyzer();
5656 assertEquals("[(いわ,0,2,type=double), (わさ,1,3,type=double), (さき,2,4,type=double), (ic,4,6,type=single), (カー,6,8,type=double), (ード,7,9,type=double)]",tokens("いわさきicカード"));
@@ -69,35 +69,35 @@
7070 assertEquals("[(pokémons,0,8), (pokemons,0,8,posIncr=0), (pokemon,0,8,type=stemmed,posIncr=0)]",tokens("Pokémons"));
7171 assertEquals("[(1990,0,4), (s,4,5), (iv,6,8)]",tokens("1990s IV"));
7272 }
73 -
 73+
7474 public void testEnglishSearch(){
7575 a = Analyzers.getSearcherAnalyzer(IndexId.get("enwiki"));
7676 commonEnglish();
7777 // acronyms don't get split
7878 assertEquals("[(a.k.a,0,5), (aka,0,5,posIncr=0), (www,6,9), (google,10,16), (com,17,20)]",tokens("a.k.a www.google.com"));
7979 }
80 -
 80+
8181 public void testEnglishIndex(){
8282 a = Analyzers.getIndexerAnalyzer(new FieldBuilder(IndexId.get("enwiki")));
8383 commonEnglish();
8484 // acronyms are always split
8585 assertEquals("[(a.k.a,0,5), (aka,0,5,posIncr=0), (a,0,5,posIncr=0), (k,2,7,posIncr=0), (a,4,9,posIncr=0), (www,6,9), (google,10,16), (com,17,20)]",tokens("a.k.a www.google.com"));
8686 }
87 -
 87+
8888 public void commonSerbian(){
8989 assertEquals("[(нешто,0,5), (nesto,0,5,type=alias,posIncr=0), (на,6,8), (na,6,8,type=alias,posIncr=0), (ћирилици,9,17), (cirilici,9,17,type=alias,posIncr=0)]",tokens("Нешто на ћирилици"));
9090 }
91 -
 91+
9292 public void testSerbianSearch(){
9393 a = Analyzers.getSearcherAnalyzer(IndexId.get("srwiki"));
9494 commonSerbian();
9595 }
96 -
 96+
9797 public void testSerbianIndex(){
9898 a = Analyzers.getIndexerAnalyzer(new FieldBuilder(IndexId.get("srwiki")));
9999 commonSerbian();
100100 }
101 -
 101+
102102 public String tokens(String text){
103103 try{
104104 return Arrays.toString(tokensFromAnalysis(a,text,"contents"));
@@ -106,7 +106,7 @@
107107 return null;
108108 }
109109 }
110 -
 110+
111111 public static Token[] tokensFromAnalysis(Analyzer analyzer, String text, String field) throws IOException {
112112 TokenStream stream = analyzer.tokenStream(field, text);
113113 ArrayList tokenList = new ArrayList();
@@ -117,15 +117,15 @@
118118 }
119119 return (Token[]) tokenList.toArray(new Token[0]);
120120 }
121 -
122 - public static void displayTokens(Analyzer analyzer, String text) throws IOException {
 121+
 122+ public static void displayTokens(Analyzer analyzer, String text) throws IOException {
123123 Token[] tokens = tokensFromAnalysis(analyzer, text, "contents");
124124 System.out.println(text);
125125 System.out.print(">> ");
126126 print(tokens);
127127 System.out.println();
128128 }
129 -
 129+
130130 protected static void print(Token[] tokens){
131131 for (int i = 0, j =0; i < tokens.length; i++, j++) {
132132 Token token = tokens[i];
@@ -138,14 +138,14 @@
139139 System.out.println();
140140 j=0;
141141 }
142 -
143 - }
 142+
 143+ }
144144 }
145 -
146 - public static void displayTokens2(Analyzer analyzer, String text) throws IOException {
 145+
 146+ public static void displayTokens2(Analyzer analyzer, String text) throws IOException {
147147 Token[] tokens = tokensFromAnalysis(analyzer, text, "contents");
148148 System.out.println(text);
149 - System.out.print("contents >> ");
 149+ System.out.print("contents >> ");
150150 print(tokens);
151151 System.out.println();
152152 tokens = tokensFromAnalysis(analyzer, text, "stemmed");
@@ -165,16 +165,16 @@
166166 System.out.println();
167167 }
168168 }
169 -
 169+
170170 public static void main(String args[]) throws IOException, ParseException{
171171 Configuration.open();
172 -
 172+
173173 //serializeTest(Analyzers.getHighlightAnalyzer(IndexId.get("enwiki")));
174174 //testAnalyzer(Analyzers.getHighlightAnalyzer(IndexId.get("enwiki")),"Aaliyah");
175 -
 175+
176176 Analyzer aa = Analyzers.getSearcherAnalyzer(IndexId.get("wikilucene"));
177177 displayTokens(aa,"boxes france");
178 -
 178+
179179 HashSet<String> stopWords = new HashSet<String>();
180180 stopWords.add("the"); stopWords.add("of"); stopWords.add("is"); stopWords.add("in"); stopWords.add("and"); stopWords.add("he") ;
181181 //Analyzer analyzer = Analyzers.getSpellCheckAnalyzer(IndexId.get("enwiki"),stopWords);
@@ -186,7 +186,7 @@
187187 text = "a.k.a www.google.com Google's Pokémons links abacus something aries douglas adams boxes bands working s and Frame semantics (linguistics)";
188188 displayTokens(analyzer,text);
189189 text = "a8n sli compatible compatibly Thomas c# c++ good-thomas Good-Thomas rats RATS Frame semantics (linguistics) 16th century sixteenth .fr web.fr other";
190 - displayTokens(analyzer,text);
 190+ displayTokens(analyzer,text);
191191 displayTokens(Analyzers.getSearcherAnalyzer(IndexId.get("zhwiki")),"末朝以來藩鎮割據and some plain english 和宦官亂政的現象 as well");
192192 displayTokens(analyzer,"Thomas Goode school");
193193 displayTokens(analyzer,"Agreement reply readily Gödel;");
@@ -200,7 +200,7 @@
201201 displayTokens(analyzer,"[[Image:Lawrence_Brainerd.jpg]], [[Image:Lawrence_Brainerd.jpg|thumb|300px|Lawrence Brainerd]]");
202202 displayTokens(analyzer,"{{Otheruses4|the Irish rock band|other uses|U2 (disambiguation)}}");
203203 displayTokens(analyzer,"{{Otheruses4|the Irish rock band|other uses|U2<ref>U2-ref</ref> (disambiguation)}} Let's see<ref>Seeing is...</ref> if template extraction works.\n==Some heading==\n And after that some text..\n\nAnd now? Not now. Then when? ");
204 -
 204+
205205 ArrayList<String> l = new ArrayList<String>();
206206 l.add("0:Douglas Adams|0:Someone");
207207 l.add("0:Someone");
@@ -208,27 +208,27 @@
209209 l.add("");
210210 l.add("0:Heu");
211211 displayTokens(new SplitAnalyzer(10,true),new StringList(l).toString());
212 -
 212+
213213 analyzer = Analyzers.getSearcherAnalyzer(IndexId.get("viwiki"));
214214 displayTokens(analyzer,"ä, ö, ü; Đ đViệt Nam Đ/đ ↔ D/d lastone");
215 -
 215+
216216 analyzer = Analyzers.getSearcherAnalyzer(IndexId.get("dewiki"));
217217 displayTokens(analyzer,"Gunzen ä, ö, ü; for instance, Ø ÓóÒò Goedel for Gödel; čakšire");
218 -
 218+
219219 analyzer = Analyzers.getSearcherAnalyzer(IndexId.get("enwiki"));
220220 displayTokens(analyzer," ä, ö, ü; for instance, Ø ÓóÒò Goedel for Gödel; čakšire");
221 -
 221+
222222 analyzer = Analyzers.getSearcherAnalyzer(IndexId.get("srwiki"));
223223 displayTokens(analyzer," ä, ö, ü; for instance, Ø ÓóÒò Goedel for Gödel; čakšire");
224 -
 224+
225225 analyzer = Analyzers.getSearcherAnalyzer(IndexId.get("eswiki"));
226226 displayTokens(analyzer,"lógico y matemático");
227 -
 227+
228228 analyzer = Analyzers.getSearcherAnalyzer(IndexId.get("mlwiki"));
229229 displayTokens(analyzer,"കൊറിയ,“കൊറിയ”");
230 -
 230+
231231 printCodePoints("“കൊറിയ”");
232 -
 232+
233233 QueryParser parser = new QueryParser("contents",new CJKAnalyzer());
234234 Query q = parser.parse("いわさきicカード プロサッカークラブをつくろう");
235235 System.out.println("Japanese in standard analyzer: "+q);
@@ -236,7 +236,7 @@
237237 displayTokens(Analyzers.getHighlightAnalyzer(IndexId.get("jawiki"),false),"鈴木 孝治(すずき こうじ、1954年 - )『パンツぱんくろう』というタイトルは、阪本牙城の漫画『タンクタンクロー』が元ネタになっているといわれる。ただし、このアニメと『タンクタンクロー』に内容的な直接の関係は全く無い。");
238238 displayTokens(Analyzers.getSearcherAnalyzer(IndexId.get("jawiki")),"『パンツぱんくろう』というタjavaイトルはbalaton");
239239 displayTokens(Analyzers.getSearcherAnalyzer(IndexId.get("jawiki")),"パ ン");
240 -
 240+
241241 ArrayList<Aggregate> items = new ArrayList<Aggregate>();
242242 analyzer = Analyzers.getSearcherAnalyzer(IndexId.get("enwiki"));
243243 items.add(new Aggregate("douglas adams",10,IndexId.get("enwiki"),analyzer,"related",stopWords,Flags.ALTTITLE));
@@ -244,7 +244,7 @@
245245 items.add(new Aggregate("hurricane",3.22f,IndexId.get("enwiki"),analyzer,"related",stopWords,Flags.ANCHOR));
246246 items.add(new Aggregate("and some other stuff",3.2f,IndexId.get("enwiki"),analyzer,"related",stopWords,Flags.NONE));
247247 displayTokens(new AggregateAnalyzer(items),"AGGREGATE TEST");
248 -
 248+
249249 // redirects?
250250 FieldBuilder builder = new FieldBuilder(IndexId.get("enwiki"));
251251 ArrayList<String> list = new ArrayList<String>();
@@ -264,20 +264,20 @@
265265 int p[] = MathFunc.partitionList(new double[] {0.52,0.12},5);
266266 analyzer = (Analyzer) Analyzers.getIndexerAnalyzer("Agreement reply readily",builder,null,null,related,p,null,null,null)[0];
267267 displayTokens2(analyzer,"");
268 -
 268+
269269 analyzer = (Analyzer) Analyzers.getIndexerAnalyzer("Pascal's earliest work was in the natural and applied sciences where he made important contributions to the construction of mechanical calculators, the study of fluids, and clarified the concepts of pressure and vacuum by generalizing the work of Evangelista Torricelli. Pascal also wrote powerfully in defense of the scientific method.",builder,null,null,null,null,null,null,null)[0];
270270 displayTokens2(analyzer,"");
271271 analyzer = (Analyzer) Analyzers.getIndexerAnalyzer("1,039/Smoothed Out Slappy Hours",new FieldBuilder(IndexId.get("itwiki")),null,null,null,null,null,null,null)[0];
272272 displayTokens2(analyzer,"");
273273 displayTokens(Analyzers.getSearcherAnalyzer(IndexId.get("itwiki")),"1,039/Smoothed Out Slappy Hours");
274 -
 274+
275275 ArrayList<Aggregate> items = new ArrayList<Aggregate>();
276276 items.add(new Aggregate("douglas adams",10,IndexId.get("enwiki"),false));
277277 items.add(new Aggregate("the selected works...",2.1f,IndexId.get("enwiki"),false));
278278 items.add(new Aggregate("hurricane",3.22f,IndexId.get("enwiki"),false));
279279 items.add(new Aggregate("and some other stuff",3.2f,IndexId.get("enwiki"),false));
280280 displayTokens(new AggregateAnalyzer(items),"AGGREGATE TEST"); */
281 -
 281+
282282 IndexId wl = IndexId.get("wikilucene");
283283 Analyzer an = Analyzers.getSearcherAnalyzer(wl);
284284 Aggregate a1 = new Aggregate("Redheugh Bridges",1,wl,an,"alttitle",Flags.ALTTITLE);
@@ -285,24 +285,24 @@
286286 ArrayList<Aggregate> al = new ArrayList<Aggregate>();
287287 al.add(a1); al.add(a2);
288288 displayTokens(new AggregateAnalyzer(al),"AGGREGATE TEST");
289 -
 289+
290290 displayTokens(Analyzers.getSpellCheckAnalyzer(IndexId.get("enwiki"),new HashSet<String>()),
291291 "Agreement boxes reply readily Gödel, Gödel; a/b");
292 -
293 -
 292+
 293+
294294 if(true)
295295 return;
296 -
 296+
297297 //testAnalyzer(new EnglishAnalyzer());
298298 testAnalyzer(Analyzers.getSearcherAnalyzer(IndexId.get("enwiki")));
299299 testAnalyzer(Analyzers.getSearcherAnalyzer(IndexId.get("dewiki")));
300300 testAnalyzer(Analyzers.getSearcherAnalyzer(IndexId.get("frwiki")));
301301 testAnalyzer(Analyzers.getSearcherAnalyzer(IndexId.get("srwiki")));
302302 testAnalyzer(Analyzers.getSearcherAnalyzer(IndexId.get("eswiki")));
303 -
304 -
 303+
 304+
305305 }
306 -
 306+
307307 private static void printCodePoints(String string) {
308308 char[] str = string.toCharArray();
309309 for(int i=0;i<str.length;i++){
@@ -323,7 +323,7 @@
324324 byte[] b = ExtToken.serialize(analyzer.tokenStream("",article.content));
325325 if(i == 0)
326326 size += b.length;
327 - else
 327+ else
328328 size2 += b.length;
329329 tokensFromAnalysis(analyzer, article.content,"contents");
330330 }
@@ -331,7 +331,7 @@
332332 long delta = System.currentTimeMillis() - start;
333333 System.out.println(delta+"ms ["+delta/count+"ms/ar] elapsed for analyzer "+analyzer+", size="+size+", size2="+size2);
334334 }
335 -
 335+
336336 public static void testAnalyzer(Analyzer analyzer) throws IOException{
337337 ArticlesParser ap = new ArticlesParser("./test-data/indexing-articles.test");
338338 ArrayList<TestArticle> articles = ap.getArticles();
@@ -347,7 +347,7 @@
348348 long delta = System.currentTimeMillis() - start;
349349 System.out.println(delta+"ms ["+delta/count+"ms/ar] elapsed for analyzer "+analyzer);
350350 }
351 -
 351+
352352 public static void testAnalyzer(Analyzer analyzer, String name) throws IOException{
353353 ArticlesParser ap = new ArticlesParser("./test-data/indexing-articles.test");
354354 ArrayList<TestArticle> articles = ap.getArticles();
Index: branches/lucene-search-2.1/test/org/wikimedia/lsearch/analyzers/FastWikiTokenizerTest.java
@@ -25,116 +25,116 @@
2626
2727 public class FastWikiTokenizerTest extends WikiTestCase {
2828 IndexId iid;
29 - TokenizerOptions options;
30 -
 29+ TokenizerOptions options;
 30+
3131 public void testIndex(){
3232 this.iid = IndexId.get("enwiki");
3333 this.options = new TokenizerOptions.ContentOptions(false);
34 -
35 - assertEquals("1 [link] 1 [text]",
 34+
 35+ assertEquals("1 [link] 1 [text]",
3636 tokens("[[link text]]"));
37 -
38 - assertEquals("1 [anchor] 1 [text]",
 37+
 38+ assertEquals("1 [anchor] 1 [text]",
3939 tokens("[[some link|anchor text]]"));
40 -
41 - assertEquals("1 [italic] 2 [see]",
 40+
 41+ assertEquals("1 [italic] 2 [see]",
4242 tokens("''italic''<nowiki><!-- see --></nowiki><!-- nosee -->"));
43 -
44 - assertEquals("1 [http] 2 [en] 1 [wikipedia] 1 [org/] 0 [org] 1 [english] 1 [wikipedia]",
 43+
 44+ assertEquals("1 [http] 2 [en] 1 [wikipedia] 1 [org/] 0 [org] 1 [english] 1 [wikipedia]",
4545 tokens("[http://en.wikipedia.org/ english wikipedia]"));
46 -
47 - assertEquals("500 [image] 1 [argishti] 1 [monument] 1 [jpg] 1 [king] 1 [argishti] 1 [of] 1 [urartu]",
 46+
 47+ assertEquals("500 [image] 1 [argishti] 1 [monument] 1 [jpg] 1 [king] 1 [argishti] 1 [of] 1 [urartu]",
4848 tokens("[[Image:Argishti monument.JPG|thumb|King Argishti of Urartu]]"));
49 -
50 - assertEquals("500 [image] 1 [argishti] 1 [monument] 1 [jpg] 1 [king] 1 [argishti] 1 [of] 1 [urartu]",
 49+
 50+ assertEquals("500 [image] 1 [argishti] 1 [monument] 1 [jpg] 1 [king] 1 [argishti] 1 [of] 1 [urartu]",
5151 tokens("[[Image:Argishti monument.JPG|thumb|King [[link target|Argishti]] of Urartu]]"));
52 -
 52+
5353 assertEquals("500 [image] 1 [frizbi] 1 [jpg] 1 [frizbi] 1 [za] 1 [ultimate] 1 [28] 1 [cm] 1 [175] 1 [g]",
5454 tokens("[[Image:frizbi.jpg|десно|мини|240п|Frizbi za ultimate, 28cm, 175g]]"));
55 -
56 - assertEquals("1 [image] 3 [argishti] 1 [monument] 1 [jpg] 1 [thumb] 1 [king] 1 [argishti] 1 [of] 1 [urartu]",
 55+
 56+ assertEquals("1 [image] 3 [argishti] 1 [monument] 1 [jpg] 1 [thumb] 1 [king] 1 [argishti] 1 [of] 1 [urartu]",
5757 tokens("[[Image:Argishti monument.JPG|thumb|King Argishti of Urartu"));
58 -
59 - assertEquals("1 [clinton] 1 [comets]",
 58+
 59+ assertEquals("1 [clinton] 1 [comets]",
6060 tokens("{| style=\"margin:0px 5px 10px 10px; border:1px solid #8888AA;\" align=right cellpadding=3 cellspacing=3 width=360\n|- align=\"center\" bgcolor=\"#dddddd\"\n|colspan=\"3\"| '''Clinton Comets'''"));
61 -
62 - assertEquals("2 [or] 1 [ا] 500 [lɒs] 1 [ˈændʒəˌlɪs] 0 [ˈaendʒəˌlɪs]",
 61+
 62+ assertEquals("2 [or] 1 [ا] 500 [lɒs] 1 [ˈændʒəˌlɪs] 0 [ˈaendʒəˌlɪs]",
6363 tokens("{{IPA|[l&#594;s &#712;&aelig;nd&#658;&#601;&#716;l&#618;s]}} &lt; or &#60; &copy; &#169;&#1575;"));
64 -
65 - assertEquals("500 [text1] 1 [text2] 1 [text3]",
 64+
 65+ assertEquals("500 [text1] 1 [text2] 1 [text3]",
6666 tokens("{{template|text1}} {{template|text2|text3}}"));
67 -
 67+
6868 assertEquals("",
6969 tokens("[[sr:Naslov]]"));
70 -
71 - assertEquals("500 [some] 1 [category] 1 [name]",
 70+
 71+ assertEquals("500 [some] 1 [category] 1 [name]",
7272 tokens("[[Category:Some category name]]"));
73 -
74 - assertEquals("[Some category name]",
 73+
 74+ assertEquals("[Some category name]",
7575 categories("[[Category:Some category name]]"));
76 -
77 - assertEquals("500 [param1] 1 [param2] 1 [value2]",
 76+
 77+ assertEquals("500 [param1] 1 [param2] 1 [value2]",
7878 tokens("{{template|param1 = {{value1}}|param2 = value2}}"));
79 -
80 - assertEquals("500 [param1] 1 [value1] 1 [param2] 1 [value2]",
 79+
 80+ assertEquals("500 [param1] 1 [value1] 1 [param2] 1 [value2]",
8181 tokens("{{template|param1 = [[target|value1]]|param2 = value2}}"));
82 -
 82+
8383 assertEquals("1 [wikipedia] 1 [is] 1 [accurate] 2 [and] 1 [it's] 0 [its] 1 [not] 1 [a] 1 [lie] 20 [see] 1 [kurir]",
8484 tokens("Wikipedia is accurate<ref>see Kurir</ref>, and it's not a lie."));
85 -
 85+
8686 assertEquals("1 [this] 1 [column] 1 [is] 1 [100] 1 [points] 1 [wide] 1 [this] 1 [column] 1 [is] 1 [200] 1 [points] 1 [wide] 1 [this] 1 [column] 1 [is] 1 [300] 1 [points] 1 [wide] 1 [blah] 1 [blah] 1 [blah]",
8787 tokens("{| border=\"1\" cellpadding=\"2\"\n|-\n|width=\"100pt\"|This column is 100 points wide\n|width=\"200pt\"|This column is 200 points wide\n|width=\"300pt\"|This column is 300 points wide\n|-\n|blah || blah || blah\n|}"));
88 -
 88+
8989 assertEquals("1 [first] 10 [second]",
9090 tokens("first\n\nsecond"));
91 -
 91+
9292 assertEquals("1 [u2] 1 [heading1]",
9393 tokens("u2 heading1"));
94 -
 94+
9595 assertEquals("1 [test] 1 [apostrophe's] 0 [apostrophes] 1 [and] 1 [other’s] 0 [others]",
9696 tokens("Test apostrophe's and other\u2019s."));
9797
98 -
 98+
9999 }
100 -
 100+
101101 public void testHighlight(){
102102 this.iid = IndexId.get("enwiki");
103103 this.options = new TokenizerOptions.Highlight(false);
104 -
105 - assertEquals("1 [' ' GLUE FIRST_SECTION] 1 ['link' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION] 1 ['text' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION]",
 104+
 105+ assertEquals("1 [' ' GLUE FIRST_SECTION] 1 ['link' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION] 1 ['text' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION]",
106106 tokens("[[link text]]"));
107 -
108 - assertEquals("1 [' ' GLUE BULLETINS] 10 ['bullet1' TEXT BULLETINS] 1 [' ' SENTENCE_BREAK BULLETINS] 1 ['bullet2' TEXT BULLETINS]",
 107+
 108+ assertEquals("1 [' ' GLUE BULLETINS] 10 ['bullet1' TEXT BULLETINS] 1 [' ' SENTENCE_BREAK BULLETINS] 1 ['bullet2' TEXT BULLETINS]",
109109 tokens("* bullet1\n* bullet2"));
110 -
111 - assertEquals("1 [' ' GLUE FIRST_SECTION] 1 ['http' TEXT FIRST_SECTION] 1 ['://' MINOR_BREAK FIRST_SECTION] 1 ['en' TEXT FIRST_SECTION] 1 ['.' SENTENCE_BREAK FIRST_SECTION] 1 ['wikipedia' TEXT FIRST_SECTION] 1 ['.' SENTENCE_BREAK FIRST_SECTION] 1 ['org/' TEXT FIRST_SECTION] 0 ['org' TEXT FIRST_SECTION] 1 ['wiki' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION] 1 ['english' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION] 1 ['wiki' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION]",
 110+
 111+ assertEquals("1 [' ' GLUE FIRST_SECTION] 1 ['http' TEXT FIRST_SECTION] 1 ['://' MINOR_BREAK FIRST_SECTION] 1 ['en' TEXT FIRST_SECTION] 1 ['.' SENTENCE_BREAK FIRST_SECTION] 1 ['wikipedia' TEXT FIRST_SECTION] 1 ['.' SENTENCE_BREAK FIRST_SECTION] 1 ['org/' TEXT FIRST_SECTION] 0 ['org' TEXT FIRST_SECTION] 1 ['wiki' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION] 1 ['english' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION] 1 ['wiki' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION]",
112112 tokens("[http://en.wikipedia.org/wiki english wiki]"));
113 -
114 - assertEquals("1 [' ' GLUE IMAGE_CAT_IW] 1 ['image' TEXT IMAGE_CAT_IW] 1 [':' MINOR_BREAK IMAGE_CAT_IW] 1 ['argishti' TEXT IMAGE_CAT_IW] 1 [' ' GLUE IMAGE_CAT_IW] 1 ['monument' TEXT IMAGE_CAT_IW] 1 ['.' SENTENCE_BREAK IMAGE_CAT_IW] 1 ['jpg' TEXT IMAGE_CAT_IW] 1 [' | ' GLUE IMAGE_CAT_IW] 1 ['king' TEXT IMAGE_CAT_IW] 1 [' ' GLUE IMAGE_CAT_IW] 1 ['argishti' TEXT IMAGE_CAT_IW] 1 [' ' GLUE IMAGE_CAT_IW] 1 ['of' TEXT IMAGE_CAT_IW] 1 [' ' GLUE IMAGE_CAT_IW] 1 ['urartu' TEXT IMAGE_CAT_IW] 1 [' ' GLUE IMAGE_CAT_IW] 1 [' ' SENTENCE_BREAK FIRST_SECTION] 1 ['main' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION] 1 ['text' TEXT FIRST_SECTION]",
 113+
 114+ assertEquals("1 [' ' GLUE IMAGE_CAT_IW] 1 ['image' TEXT IMAGE_CAT_IW] 1 [':' MINOR_BREAK IMAGE_CAT_IW] 1 ['argishti' TEXT IMAGE_CAT_IW] 1 [' ' GLUE IMAGE_CAT_IW] 1 ['monument' TEXT IMAGE_CAT_IW] 1 ['.' SENTENCE_BREAK IMAGE_CAT_IW] 1 ['jpg' TEXT IMAGE_CAT_IW] 1 [' | ' GLUE IMAGE_CAT_IW] 1 ['king' TEXT IMAGE_CAT_IW] 1 [' ' GLUE IMAGE_CAT_IW] 1 ['argishti' TEXT IMAGE_CAT_IW] 1 [' ' GLUE IMAGE_CAT_IW] 1 ['of' TEXT IMAGE_CAT_IW] 1 [' ' GLUE IMAGE_CAT_IW] 1 ['urartu' TEXT IMAGE_CAT_IW] 1 [' ' GLUE IMAGE_CAT_IW] 1 [' ' SENTENCE_BREAK FIRST_SECTION] 1 ['main' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION] 1 ['text' TEXT FIRST_SECTION]",
115115 tokens("[[Image:Argishti monument.JPG|thumb|King Argishti of Urartu]]\n\nMain text"));
116 -
 116+
117117 assertEquals("1 [' ' GLUE IMAGE_CAT_IW] 1 ['category' TEXT IMAGE_CAT_IW] 1 [':' MINOR_BREAK IMAGE_CAT_IW] 1 ['name' TEXT IMAGE_CAT_IW] 1 [' ' GLUE IMAGE_CAT_IW]",
118118 tokens("[[Category:Name|sort key]]"));
119 -
120 - assertEquals("1 [' ' GLUE TEMPLATE] 1 ['param1' TEXT TEMPLATE] 1 [' ' GLUE TEMPLATE] 1 ['value1' TEXT TEMPLATE] 1 [' ' GLUE TEMPLATE] 1 [' | ' GLUE TEMPLATE] 1 ['param2' TEXT TEMPLATE] 1 [' ' GLUE TEMPLATE] 1 ['value2' TEXT TEMPLATE] 1 [' ' GLUE FIRST_SECTION]",
 119+
 120+ assertEquals("1 [' ' GLUE TEMPLATE] 1 ['param1' TEXT TEMPLATE] 1 [' ' GLUE TEMPLATE] 1 ['value1' TEXT TEMPLATE] 1 [' ' GLUE TEMPLATE] 1 [' | ' GLUE TEMPLATE] 1 ['param2' TEXT TEMPLATE] 1 [' ' GLUE TEMPLATE] 1 ['value2' TEXT TEMPLATE] 1 [' ' GLUE FIRST_SECTION]",
121121 tokens("{{template|param1 = [[value1]]|param2 = value2}}"));
122 -
123 - assertEquals("1 [' ' GLUE TEMPLATE] 1 ['param1' TEXT TEMPLATE] 1 [' | ' GLUE TEMPLATE] 1 ['param2' TEXT TEMPLATE] 1 [' ' GLUE TEMPLATE] 1 ['value2' TEXT TEMPLATE] 1 [' ' GLUE FIRST_SECTION]",
 122+
 123+ assertEquals("1 [' ' GLUE TEMPLATE] 1 ['param1' TEXT TEMPLATE] 1 [' | ' GLUE TEMPLATE] 1 ['param2' TEXT TEMPLATE] 1 [' ' GLUE TEMPLATE] 1 ['value2' TEXT TEMPLATE] 1 [' ' GLUE FIRST_SECTION]",
124124 tokens("{{template|param1 = {{value1}}|param2 = value2}}"));
125125
126126 assertEquals("1 [' ' GLUE HEADING] 1 ['heading' TEXT HEADING] 1 [' ' GLUE HEADING] 1 ['1' TEXT HEADING] 1 [' ' GLUE HEADING] 1 [' ' GLUE NORMAL]",
127127 tokens("== Heading 1 ==\n"));
128 -
 128+
129129 assertEquals("1 ['wikipedia' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION] 1 ['is' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION] 1 ['accurate' TEXT FIRST_SECTION] 1 [', ' MINOR_BREAK FIRST_SECTION] 1 ['and' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION] 1 ['it's' TEXT FIRST_SECTION] 0 ['its' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION] 1 ['not' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION] 1 ['a' TEXT FIRST_SECTION] 1 [' ' GLUE FIRST_SECTION] 1 ['lie' TEXT FIRST_SECTION] 1 ['.' SENTENCE_BREAK FIRST_SECTION] 20 [' ' GLUE REFERENCE] 1 ['see' TEXT REFERENCE] 1 [' ' GLUE REFERENCE] 1 ['kurir' TEXT REFERENCE] 1 [' ' GLUE REFERENCE]",
130130 tokens("Wikipedia is accurate<ref>see Kurir</ref>, and it's not a lie."));
131 -
 131+
132132 assertEquals("1 [' | ' GLUE TABLE] 1 ['this' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['column' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['is' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['100' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['points' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['wide' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['this' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['column' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['is' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['200' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['points' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['wide' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['this' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['column' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['is' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['300' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['points' TEXT TABLE] 1 [' ' GLUE TABLE] 1 ['wide' TEXT TABLE] 1 [' | ' SENTENCE_BREAK TABLE] 1 ['blah' TEXT TABLE] 1 [' | ' GLUE TABLE] 1 ['blah' TEXT TABLE] 1 [' | ' GLUE TABLE] 1 ['blah' TEXT TABLE] 1 [' | ' GLUE FIRST_SECTION]",
133133 tokens("{| border=\"1\" cellpadding=\"2\"\n|-\n|width=\"100pt\"|This column is 100 points wide\n|width=\"200pt\"|This column is 200 points wide\n|width=\"300pt\"|This column is 300 points wide\n|-\n|blah || blah || blah\n|}"));
134 -
 134+
135135 }
136 -
137 -
138 -
 136+
 137+
 138+
139139 public String tokens(String text){
140140 StringBuilder sb = new StringBuilder();
141141 FastWikiTokenizerEngine parser = new FastWikiTokenizerEngine(text,iid,options);
@@ -149,7 +149,7 @@
150150 }
151151 return sb.toString().trim();
152152 }
153 -
 153+
154154 public String categories(String text){
155155 FastWikiTokenizerEngine parser = new FastWikiTokenizerEngine(text,iid,options);
156156 parser.parse();
@@ -179,7 +179,7 @@
180180 System.out.print("INTERWIKI: ");
181181 }
182182 for(Entry<String,String> t : iw.entrySet()){
183 - System.out.print("["+t.getKey()+"] => ["+t.getValue()+"] ");
 183+ System.out.print("["+t.getKey()+"] => ["+t.getValue()+"] ");
184184 }
185185 if(iw.size()!=0) System.out.println();
186186
@@ -188,7 +188,7 @@
189189 System.out.print("KEYWORDS: ");
190190 }
191191 for(String t : keywords){
192 - System.out.print("["+t+"] ");
 192+ System.out.print("["+t+"] ");
193193 }
194194 if(keywords.size()!=0) System.out.println();
195195
@@ -198,7 +198,7 @@
199199 static void showTokens(String text){
200200 System.out.println("TEXT: "+text);
201201 System.out.flush();
202 - displayTokensForParser(text);
 202+ displayTokensForParser(text);
203203 System.out.flush();
204204 }
205205
@@ -231,7 +231,7 @@
232232 text = "|something|else| is| there| to | see";
233233 showTokens(text);
234234 text = "-test 3.14 and U.S.A and more, .test more";
235 - showTokens(text);
 235+ showTokens(text);
236236 text = "{{IPA|[l&#594;s &#712;&aelig;nd&#658;&#601;&#716;l&#618;s]}} &lt; or &#60; &copy; &#169;&#1575; or &#x627; ";
237237 showTokens(text);
238238 text = "| Unseen\n|-\n| \"Junior\"\n|\n| Goa'uld larva\n|} something";
@@ -318,14 +318,14 @@
319319 text = "[[First]] second third fourth and so on goes the ... [[last link]]";
320320 showTokens(text);
321321 text = "{{Something| param = {{another}}[[First]] } }} }} }} [[first good]]s {{name| [[many]] many many tokens }} second third fourth and so on goes the ... [[good keyword]]";
322 - showTokens(text);
323 - text = "{| style=\"float: right; clear: right; background-color: transparent\"\n|-\n|{{Infobox Military Conflict|\n|conflict=1982 Lebanon War <br>([[Israel-Lebanon conflict]])\n|image=[[Image:Map of Lebanon.png|300px]]\n|caption=Map of modern Lebanon\n|date=June - September 1982\n|place=Southern [[Lebanon]]\n|casus=Two main causes:\n*Terrorist raids on northern Israel by [[PLO]] [[guerrilla]] based in Lebanon\n*the [[Shlomo Argov|shooting of Israel's ambassador]] by the [[Abu Nidal Organization]]<ref>[http://www.usatoday.com/graphics/news/gra/gisrael2/flash.htm The Middle East conflict], ''[[USA Today]]'' (sourced guardian.co.uk, Facts on File, AP) \"Israel invades Lebanon in response to terrorist attacks by PLO guerrillas based there.\"</ref><ref>{{cite book\n|author = Mark C. Carnes, John A. Garraty\n|title = The American Nation\n|publisher = Pearson Education, Inc.\n|date = 2006\n|location = USA\n|pages = 903\n|id = ISBN 0-321-42606-1\n}}</ref><ref>{{cite book\n|author= ''[[Time (magazine)|Time]]''\n|title = The Year in Review\n|publisher = Time Books\n|date = 2006\n|location = 1271 Avenue of the Americs, New York, NY 10020\n|id = ISSN: 1097-5721\n}} \"For decades now, Arab terrorists operating out of southern Lebanon have staged raids and fired mortar shells into northern Israel, denying the Israelis peace of mind. In the early 1980s, the terrorists operating out of Lebanon were controlled by Yasser Arafat's Palestine Liberation Organization (P.L.O.). After Israel's ambassador to Britain, Shlomo Argov, was shot in cold blood and seriously wounded by the Palestinian terror group Abu Nidal in London in 1982, fed-up Israelis sent tanks and troops rolling into Lebanon to disperse the guerrillas.\" (pg. 44-45)</ref><ref>\"The Palestine Liberation Organization (PLO) had been launching guerrilla attacks against Israel since the 1960s (see Palestine Liberation Organization). After the PLO was driven from Jordan in 1971, the organization established bases in southern Lebanon, from which it continued to attack Israel. In 1981 heavy PLO rocket fire on Israeli settlements led Israel to conduct air strikes in Lebanon. The Israelis also destroyed Iraq's nuclear reactor at Daura near Baghdad.";
324 - showTokens(text);
 322+ showTokens(text);
 323+ text = "{| style=\"float: right; clear: right; background-color: transparent\"\n|-\n|{{Infobox Military Conflict|\n|conflict=1982 Lebanon War <br>([[Israel-Lebanon conflict]])\n|image=[[Image:Map of Lebanon.png|300px]]\n|caption=Map of modern Lebanon\n|date=June - September 1982\n|place=Southern [[Lebanon]]\n|casus=Two main causes:\n*Terrorist raids on northern Israel by [[PLO]] [[guerrilla]] based in Lebanon\n*the [[Shlomo Argov|shooting of Israel's ambassador]] by the [[Abu Nidal Organization]]<ref>[http://www.usatoday.com/graphics/news/gra/gisrael2/flash.htm The Middle East conflict], ''[[USA Today]]'' (sourced guardian.co.uk, Facts on File, AP) \"Israel invades Lebanon in response to terrorist attacks by PLO guerrillas based there.\"</ref><ref>{{cite book\n|author = Mark C. Carnes, John A. Garraty\n|title = The American Nation\n|publisher = Pearson Education, Inc.\n|date = 2006\n|location = USA\n|pages = 903\n|id = ISBN 0-321-42606-1\n}}</ref><ref>{{cite book\n|author= ''[[Time (magazine)|Time]]''\n|title = The Year in Review\n|publisher = Time Books\n|date = 2006\n|location = 1271 Avenue of the Americs, New York, NY 10020\n|id = ISSN: 1097-5721\n}} \"For decades now, Arab terrorists operating out of southern Lebanon have staged raids and fired mortar shells into northern Israel, denying the Israelis peace of mind. In the early 1980s, the terrorists operating out of Lebanon were controlled by Yasser Arafat's Palestine Liberation Organization (P.L.O.). After Israel's ambassador to Britain, Shlomo Argov, was shot in cold blood and seriously wounded by the Palestinian terror group Abu Nidal in London in 1982, fed-up Israelis sent tanks and troops rolling into Lebanon to disperse the guerrillas.\" (pg. 44-45)</ref><ref>\"The Palestine Liberation Organization (PLO) had been launching guerrilla attacks against Israel since the 1960s (see Palestine Liberation Organization). After the PLO was driven from Jordan in 1971, the organization established bases in southern Lebanon, from which it continued to attack Israel. In 1981 heavy PLO rocket fire on Israeli settlements led Israel to conduct air strikes in Lebanon. The Israelis also destroyed Iraq's nuclear reactor at Daura near Baghdad.";
 324+ showTokens(text);
325325
326326
327327
328328 ArticlesParser ap1 = new ArticlesParser("./test-data/indexing-articles.test");
329 - ArrayList<TestArticle> articles1 = ap1.getArticles();
 329+ ArrayList<TestArticle> articles1 = ap1.getArticles();
330330 showTokens(articles1.get(articles1.size()-1).content);
331331
332332 //if(true)
@@ -378,7 +378,7 @@
379379 ObjectOutputStream out = new ObjectOutputStream(ba);
380380 out.writeObject(tokens);
381381 size += ba.size(); */
382 - //byte[] b = ExtToken.serializetokens);
 382+ //byte[] b = ExtToken.serializetokens);
383383 //size += b.length;
384384 //ObjectInputStream in = new ObjectInputStream(new ByteArrayInputStream(ba.toByteArray()));
385385 //ArrayList<ExtToken> some = (ArrayList<ExtToken>) in.readObject();
@@ -388,7 +388,7 @@
389389 System.out.println("Parser elapsed: "+delta+"ms, per serialization: "+((double)delta/total)+"ms, size:"+size/total);
390390
391391 }
392 -
 392+
393393 public void testVowels(){
394394 assertEquals("zdrv", FastWikiTokenizerEngine.deleteVowels("zdravo"));
395395 assertEquals("v g mlrd", FastWikiTokenizerEngine.deleteVowels("eve ga milorad"));
Property changes on: branches/lucene-search-2.1/test/org/wikimedia/lsearch/analyzers
___________________________________________________________________
Name: svn:ignore
396396 + *.class
Property changes on: branches/lucene-search-2.1/test/org/wikimedia/lsearch/beans
___________________________________________________________________
Name: svn:ignore
397397 + *.class
Property changes on: branches/lucene-search-2.1/test/org/wikimedia/lsearch/util
___________________________________________________________________
Name: svn:ignore
398398 + *.class
Property changes on: branches/lucene-search-2.1/test/org/wikimedia/lsearch/ranks
___________________________________________________________________
Name: svn:ignore
399399 + *.class
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/AggregateMetaField.java
@@ -21,7 +21,7 @@
2222
2323 /**
2424 * Local cache of aggregate field meta informations
25 - *
 25+ *
2626 * @author rainman
2727 *
2828 */
@@ -32,20 +32,20 @@
3333 protected static Object lock = new Object();
3434 /** directory -> fields */
3535 protected static WeakHashMap<Directory,Set<String>> cachingInProgress = new WeakHashMap<Directory,Set<String>>();
36 -
 36+
3737 /** Check if there is a current background caching on a reader */
3838 public static boolean isBeingCached(IndexReader reader){
3939 synchronized(cachingInProgress){
4040 return cachingInProgress.containsKey(reader.directory());
4141 }
4242 }
43 -
 43+
4444 public static void invalidateCache(IndexReader reader){
4545 synchronized (lock) {
4646 cache.remove(reader.directory());
4747 }
4848 }
49 -
 49+
5050 /** Get a meta cacher, return null if field is already cached or being cached */
5151 public static CacheBuilder getCacherBuilder(IndexReader reader, String field) throws IOException {
5252 synchronized(lock){
@@ -61,10 +61,10 @@
6262 s = new AggregateMetaFieldSource(reader,field);
6363 fields.put(field,s);
6464 return s;
65 - }
 65+ }
6666 }
6767 }
68 -
 68+
6969 /** Get a cached meta source to use in queries */
7070 public static AggregateMetaFieldSource getCachedSource(IndexReader reader, String field) {
7171 synchronized(lock){
@@ -75,10 +75,10 @@
7676 }
7777 }
7878
79 -
 79+
8080 /**
81 - * Cached meta aggregate info
82 - *
 81+ * Cached meta aggregate info
 82+ *
8383 * @author rainman
8484 *
8585 */
@@ -94,10 +94,10 @@
9595 protected String field;
9696 protected boolean cachingFinished = false;
9797 protected boolean isOptimized;
98 - // temporary:
 98+ // temporary:
9999 protected int count = 0;
100100 protected int maxdoc = 0;
101 -
 101+
102102 public void init() {
103103 synchronized(cachingInProgress){
104104 Set<String> set = cachingInProgress.get(reader.directory());
@@ -113,7 +113,7 @@
114114 index = new int[maxdoc];
115115 length = new byte[maxdoc]; // estimate maxdoc values
116116 lengthNoStopWords = new byte[maxdoc];
117 - lengthComplete = new byte[maxdoc];
 117+ lengthComplete = new byte[maxdoc];
118118 boost = new float[maxdoc];
119119 flags = new byte[maxdoc];
120120 namespaces = new byte[maxdoc];
@@ -133,10 +133,10 @@
134134 if(count >= length.length){
135135 length = extendBytes(length);
136136 lengthNoStopWords = extendBytes(lengthNoStopWords);
137 - lengthComplete = extendBytes(lengthComplete);
 137+ lengthComplete = extendBytes(lengthComplete);
138138 boost = extendFloats(boost);
139139 flags = extendBytes(flags);
140 - }
 140+ }
141141 length[count] = stored[j*8];
142142 if(length[count] == 0){
143143 log.debug("Broken length=0 for docid="+i+", at position "+j);
@@ -147,14 +147,14 @@
148148 lengthComplete[count] = stored[j*8+6];
149149 flags[count] = stored[j*8+7];
150150 count++;
151 - }
 151+ }
152152 } catch(Exception e){
153153 log.error("Exception during processing stored_field="+field+" on docid="+i+", with stored="+stored+" : "+e.getMessage(),e);
154154 e.printStackTrace();
155155 throw new IOException(e.getMessage());
156156 }
157157 }
158 -
 158+
159159 public void end(){
160160 if(count < length.length - 1){
161161 length = resizeBytes(length,count);
@@ -164,7 +164,7 @@
165165 flags = resizeBytes(flags,count);
166166 }
167167 cachingFinished = true;
168 -
 168+
169169 synchronized(cachingInProgress){
170170 Set<String> set = cachingInProgress.get(reader.directory());
171171 set.remove(field);
@@ -172,7 +172,7 @@
173173 cachingInProgress.remove(reader.directory());
174174 }
175175 }
176 -
 176+
177177 protected byte[] extendBytes(byte[] array){
178178 return resizeBytes(array,array.length*2);
179179 }
@@ -183,13 +183,13 @@
184184 }
185185 protected float[] extendFloats(float[] array){
186186 return resizeFloats(array,array.length*2);
187 - }
 187+ }
188188 protected float[] resizeFloats(float[] array, int size){
189189 float[] t = new float[size];
190190 System.arraycopy(array,0,t,0,Math.min(array.length,size));
191191 return t;
192192 }
193 -
 193+
194194 protected AggregateMetaFieldSource(IndexReader reader, String fieldBase) throws IOException{
195195 this.reader = reader;
196196 this.field = fieldBase+"_meta";
@@ -203,13 +203,13 @@
204204 int end = (docid == index.length-1)? length.length : index[docid+1];
205205 if(position >= end-start){
206206 if(checkExists) // if true this is not an error
207 - return -1;
 207+ return -1;
208208 else
209209 throwException(docid,position,end-start-1);
210210 }
211211 return start+position;
212212 }
213 -
 213+
214214 private void throwException(int docid, int position, int lastValid){
215215 try {
216216 // first try to give more detailed error
@@ -217,22 +217,22 @@
218218 } catch (IOException e) {
219219 e.printStackTrace();
220220 throw new ArrayIndexOutOfBoundsException("Requested position "+position+" on field "+field+" unavailable"+" on "+reader.directory());
221 - }
 221+ }
222222 }
223 -
 223+
224224 protected byte[] getStored(int docid) throws CorruptIndexException, IOException{
225225 return reader.document(docid).getBinaryValue(field);
226226 }
227 -
228 - /** Get length of nonalias tokens */
 227+
 228+ /** Get length of nonalias tokens */
229229 public int getLength(int docid, int position) throws CorruptIndexException, IOException{
230230 if(!cachingFinished) // still caching in background
231231 return getStored(docid)[position*8];
232232 return length[getValueIndex(docid,position)];
233 - }
234 - /** Get length without stop words */
 233+ }
 234+ /** Get length without stop words */
235235 public int getLengthNoStopWords(int docid, int position) throws CorruptIndexException, IOException{
236 - if(!cachingFinished)
 236+ if(!cachingFinished)
237237 return getStored(docid)[position*8+1];
238238 return lengthNoStopWords[getValueIndex(docid,position)];
239239 }
@@ -242,7 +242,7 @@
243243 return getStored(docid)[position*8+6];
244244 return lengthComplete[getValueIndex(docid,position)];
245245 }
246 -
 246+
247247 /** generic function to get boost value at some position, if checkExists=true won't die on error */
248248 private float getBoost(int docid, int position, boolean checkExists) throws CorruptIndexException, IOException{
249249 if(!cachingFinished){
@@ -261,25 +261,25 @@
262262 return 1;
263263 return boost[inx];
264264 }
265 -
266 - /** Get boost for position */
 265+
 266+ /** Get boost for position */
267267 public float getBoost(int docid, int position) throws CorruptIndexException, IOException{
268268 return getBoost(docid,position,false);
269269 }
270 -
 270+
271271 /** Get rank (boost at position 0) */
272272 public float getRank(int docid) throws CorruptIndexException, IOException{
273273 return getBoost(docid,0,true);
274274 }
275 -
 275+
276276 /** Get namespace of the document */
277277 public int getNamespace(int docid) throws CorruptIndexException, IOException{
278278 if(!cachingFinished){
279279 return Integer.parseInt(reader.document(docid).get("namespace"));
280 - }
 280+ }
281281 return namespaces[docid];
282282 }
283 -
 283+
284284 /** Get flag values for docid at position */
285285 public Flags getFlags(int docid, int position) throws CorruptIndexException, IOException{
286286 int ord = 0;
@@ -290,8 +290,8 @@
291291
292292 return Flags.values()[ord];
293293 }
294 -
295 -
 294+
 295+
296296 }
297297
298298 }
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/AggregateInfoImpl.java
@@ -11,11 +11,11 @@
1212 import org.wikimedia.lsearch.analyzers.Aggregate.Flags;
1313 import org.wikimedia.lsearch.search.AggregateMetaField.AggregateMetaFieldSource;
1414
15 -/**
 15+/**
1616 * Wrapper for aggregate fields info in the index. Include an instance
1717 * of this class into CustomPhraseQuery to use the additional meta
18 - * info (which is locally cached in AggregateMetaField).
19 - *
 18+ * info (which is locally cached in AggregateMetaField).
 19+ *
2020 * @author rainman
2121 *
2222 */
@@ -24,7 +24,7 @@
2525 protected transient AggregateMetaFieldSource src = null;
2626 protected boolean hasRankingData = false;
2727 protected String field = null;
28 -
 28+
2929 /** Call this while (local) scorer is constructed to init cached meta info */
3030 public void init(IndexReader reader, String field) throws IOException {
3131 this.field = field;
@@ -36,7 +36,7 @@
3737 protected int getSlot(int pos){
3838 return pos / AggregateAnalyzer.TOKEN_GAP;
3939 }
40 -
 40+
4141 public int length(int docid, int pos) throws IOException {
4242 try{
4343 return src.getLength(docid,getSlot(pos));
@@ -45,7 +45,7 @@
4646 throw e;
4747 }
4848 }
49 -
 49+
5050 public float boost(int docid, int pos) throws IOException {
5151 try{
5252 return src.getBoost(docid,getSlot(pos));
@@ -63,7 +63,7 @@
6464 throw e;
6565 }
6666 }
67 -
 67+
6868 public int lengthComplete(int docid, int pos) throws IOException {
6969 try{
7070 return src.getLengthComplete(docid,getSlot(pos));
@@ -72,14 +72,14 @@
7373 throw e;
7474 }
7575 }
76 -
 76+
7777 public float rank(int docid) throws IOException {
7878 if(hasRankingData)
7979 return src.getRank(docid);
80 - else
 80+ else
8181 throw new RuntimeException("Trying to fetch ranking data on field "+field+" where its not available.");
8282 }
83 -
 83+
8484 public int namespace(int docid) throws IOException{
8585 return src.getNamespace(docid);
8686 }
@@ -87,7 +87,7 @@
8888 public boolean hasRankingData() {
8989 return hasRankingData;
9090 }
91 -
 91+
9292 public Flags flags(int docid, int pos) throws IOException {
9393 try{
9494 return src.getFlags(docid,getSlot(pos));
@@ -96,14 +96,14 @@
9797 throw e;
9898 }
9999 }
100 -
 100+
101101 /** Provides ranking information */
102102 public static class RankInfo extends AggregateInfoImpl {
103103 @Override
104104 public void init(IndexReader reader, String field) throws IOException {
105105 super.init(reader, "alttitle");
106106 }
107 -
 107+
108108 }
109109
110110 }
Index: branches/lucene-search-2.1/build.xml
@@ -13,9 +13,9 @@
1414 <property name="include.src" value="src/** sql/** build.xml scripts/* webinterface/* VERSION configure build update test/** udplogger/**"/>
1515 <property name="include.bin" value="*.log4j *.txt config.inc template/** udplogger/**"/>
1616 <property name="include.sh" value="configure build update lsearchd"/>
17 -
 17+
1818 <property file="${basedir}/hostname"/>
19 -
 19+
2020 <path id="classpath">
2121 <fileset dir="${lib}" includes="*.jar"/>
2222 </path>
@@ -28,10 +28,10 @@
2929 <fileset dir="${lib}">
3030 <include name="*.jar"/>
3131 </fileset>
32 - </classpath>
 32+ </classpath>
3333 </java>
34 - </target>
35 -
 34+ </target>
 35+
3636 <target name="makejar" depends="build">
3737 <jar destfile="${basedir}/${jar.name}">
3838 <manifest>
@@ -40,42 +40,42 @@
4141 </manifest>
4242 <zipfileset dir="${bin}" prefix="">
4343 <include name="org/**"/>
44 - </zipfileset>
 44+ </zipfileset>
4545 </jar>
4646 </target>
47 -
 47+
4848 <target name="alljar" depends="build" description="All-in-one jar">
4949 <jar jarfile="${jar.name}" compress="true">
5050 <manifest>
5151 <attribute name="Main-Class" value="org.wikimedia.lsearch.config.StartupManager" />
5252 </manifest>
5353 <fileset dir="bin" includes="org/**" />
54 -
 54+
5555 <!-- pack libraries as well -->
5656 <zipfileset src="lib/xmlrpc-common-3.0.jar" />
5757 <zipfileset src="lib/xmlrpc-client-3.0.jar" />
5858 <zipfileset src="lib/xmlrpc-server-3.0.jar" />
5959 <zipfileset src="lib/commons-logging-1.1.jar" />
6060 <zipfileset src="lib/ws-commons-util-1.0.1.jar" />
61 - <zipfileset src="lib/log4j-1.2.14.jar" />
 61+ <zipfileset src="lib/log4j-1.2.14.jar" />
6262 <zipfileset src="lib/lucene-core-2.3.jar" />
63 - <zipfileset src="lib/lucene-analyzers.jar" />
 63+ <zipfileset src="lib/lucene-analyzers.jar" />
6464 <zipfileset src="lib/snowball.jar" />
6565 <zipfileset src="lib/mwdumper.jar" />
6666 <zipfileset src="lib/mysql-connector-java-3.0.17-ga-bin.jar" />
67 -
 67+
6868 <fileset dir="resources" includes="*/**" />
6969 </jar>
7070 </target>
7171
72 -
 72+
7373 <target name="build" description="Compile classes">
7474 <mkdir dir="${bin}"/>
7575 <javac srcdir="${src}/org/" debug="on" encoding="UTF-8" includes="**/*.java" destdir="${bin}/">
7676 <classpath refid="classpath"/>
7777 </javac>
7878 </target>
79 -
 79+
8080 <target name="pack" description="Make tar.gz distribution">
8181 <mkdir dir="${dist}"/>
8282 <delete file="${dist}/${pack.name}.tar"/>
@@ -87,7 +87,7 @@
8888 <gzip zipfile="${dist}/${pack.name}.tar.gz" src="${dist}/${pack.name}.tar"/>
8989 <delete file="${dist}/${pack.name}.tar"/>
9090 </target>
91 -
 91+
9292 <target name="pack-src" depends="alljar" description="Make tar.gz distribution of only core source files">
9393 <mkdir dir="${dist}"/>
9494 <delete file="${dist}/${src.name}.tar"/>
@@ -100,7 +100,7 @@
101101 <delete file="${dist}/${src.name}.tar"/>
102102 </target>
103103
104 -
 104+
105105 <target name="binary" depends="alljar" description="Make binary tar.gz distribution">
106106 <mkdir dir="${bin}"/>
107107 <delete file="${dist}/${binary.name}.tar"/>
@@ -113,5 +113,5 @@
114114 <gzip zipfile="${dist}/${binary.name}.tar.gz" src="${dist}/${binary.name}.tar"/>
115115 <delete file="${dist}/${binary.name}.tar"/>
116116 </target>
117 -
 117+
118118 </project>
Property changes on: branches/lucene-search-2.1
___________________________________________________________________
Name: svn:ignore
119119 - lsearch.conf
lsearch-global.conf
lsearch.log4j
rsyncd.conf
*~
indexes
120120 + bin
dumps
lsearch.log4j
indexes
lsearch-global.conf
lsearch.conf
LuceneSearch.jar
config.inc

Status & tagging log