Index: trunk/lucene-search-3/src/test/java/org/wikimedia/lsearch/test/AbstractWikiTestCase.java |
— | — | @@ -4,6 +4,7 @@ |
5 | 5 | |
6 | 6 | import junit.framework.TestCase; |
7 | 7 | |
| 8 | +import org.junit.Before; |
8 | 9 | import org.wikimedia.lsearch.analyzers.WikiQueryParser; |
9 | 10 | import org.wikimedia.lsearch.config.Configuration; |
10 | 11 | import org.wikimedia.lsearch.config.GlobalConfiguration; |
— | — | @@ -43,6 +44,7 @@ |
44 | 45 | protected GlobalConfiguration global = null; |
45 | 46 | |
46 | 47 | @Override |
| 48 | + @Before |
47 | 49 | protected void setUp() { |
48 | 50 | if(config == null) |
49 | 51 | { |
Index: trunk/lucene-search-3/src/test/java/org/wikimedia/lsearch/analyzers/WikiQueryParserTest.java |
— | — | @@ -2,13 +2,13 @@ |
3 | 3 | |
4 | 4 | import java.util.ArrayList; |
5 | 5 | import java.util.Arrays; |
| 6 | + |
6 | 7 | import org.apache.lucene.analysis.Analyzer; |
7 | 8 | import org.apache.lucene.analysis.Token; |
8 | 9 | import org.apache.lucene.search.Query; |
9 | | -import org.wikimedia.lsearch.analyzers.Analyzers; |
10 | | -import org.wikimedia.lsearch.analyzers.FieldBuilder; |
11 | | -import org.wikimedia.lsearch.analyzers.FieldNameFactory; |
12 | | -import org.wikimedia.lsearch.analyzers.WikiQueryParser; |
| 10 | +import org.junit.Before; |
| 11 | +import org.junit.Ignore; |
| 12 | +import org.junit.Test; |
13 | 13 | import org.wikimedia.lsearch.analyzers.WikiQueryParser.NamespacePolicy; |
14 | 14 | import org.wikimedia.lsearch.config.IndexId; |
15 | 15 | import org.wikimedia.lsearch.test.AbstractWikiTestCase; |
— | — | @@ -16,179 +16,447 @@ |
17 | 17 | public class WikiQueryParserTest extends AbstractWikiTestCase { |
18 | 18 | |
19 | 19 | |
| 20 | + /** fixture1 **/ |
20 | 21 | |
21 | | - public void testEnglish() { |
22 | | - IndexId enwiki = IndexId.get("enwiki"); |
23 | | - FieldBuilder.BuilderSet bs = new FieldBuilder(IndexId.get("enwiki")).getBuilder(); |
24 | | - Analyzer analyzer = Analyzers.getSearcherAnalyzer(enwiki); |
25 | | - FieldNameFactory ff = new FieldNameFactory(); |
26 | | - try{ |
27 | | - WikiQueryParser parser = new WikiQueryParser("contents","0",analyzer,bs,NamespacePolicy.IGNORE); |
28 | | - Query q; |
29 | | - |
30 | | - /* =================== BASICS ================= */ |
31 | | - q = parser.parseRaw("1991"); |
32 | | - assertEquals("contents:1991",q.toString()); |
33 | | - |
34 | | - q = parser.parseRaw("\"eggs and bacon\" OR milk"); |
35 | | - assertEquals("contents:\"eggs and bacon\" contents:milk",q.toString()); |
| 22 | + IndexId enwiki; |
| 23 | + FieldBuilder.BuilderSet bs; |
| 24 | + Analyzer analyzer; |
| 25 | + FieldNameFactory ff; |
| 26 | + WikiQueryParser parser; |
36 | 27 | |
37 | | - q = parser.parseRaw("+eggs milk -something"); |
38 | | - assertEquals("+(contents:eggs contents:egg^0.5) +contents:milk -contents:something",q.toString()); |
| 28 | + private void createFixture1() { |
| 29 | + enwiki = IndexId.get("enwiki"); |
| 30 | + bs = new FieldBuilder(IndexId.get("enwiki")).getBuilder(); |
| 31 | + analyzer = Analyzers.getSearcherAnalyzer(enwiki); |
| 32 | + parser = new WikiQueryParser("contents", "0", analyzer, bs, |
| 33 | + NamespacePolicy.IGNORE); |
| 34 | + } |
39 | 35 | |
40 | | - q = parser.parseRaw("eggs AND milk"); |
41 | | - assertEquals("+(contents:eggs contents:egg^0.5) +contents:milk",q.toString()); |
| 36 | + @Before |
| 37 | + @Override |
| 38 | + protected void setUp() { |
| 39 | + super.setUp(); |
| 40 | + createFixture1(); |
| 41 | + } |
42 | 42 | |
43 | | - q = parser.parseRaw("+egg incategory:breakfast"); |
44 | | - assertEquals("+contents:egg +category:breakfast",q.toString()); |
45 | | - |
46 | | - q = parser.parseRaw("+egg incategory:\"two_words\""); |
47 | | - assertEquals("+contents:egg +category:two words",q.toString()); |
48 | | - |
49 | | - q = parser.parseRaw("+egg incategory:\"two words\""); |
50 | | - assertEquals("+contents:egg +category:two words",q.toString()); |
| 43 | + /** |
| 44 | + * basic queries |
| 45 | + */ |
| 46 | + @Test |
| 47 | + public void testBasicEnglish01() { |
| 48 | + Query q = parser.parseRaw("1991"); |
| 49 | + assertEquals("contents:1991", q.toString()); |
| 50 | + } |
51 | 51 | |
52 | | - q = parser.parseRaw("incategory:(help AND pleh)"); |
53 | | - assertEquals("+category:help +category:pleh",q.toString()); |
| 52 | + @Test |
| 53 | + public void testBasicEnglish02() { |
| 54 | + Query q = parser.parseRaw("\"eggs and bacon\" OR milk"); |
| 55 | + assertEquals("contents:\"eggs and bacon\" contents:milk", q.toString()); |
| 56 | + } |
54 | 57 | |
55 | | - q = parser.parseRaw("incategory:(help AND (pleh -ping))"); |
56 | | - assertEquals("+category:help +(+category:pleh -category:ping)",q.toString()); |
| 58 | + @Test |
| 59 | + public void testBasicEnglish03() { |
57 | 60 | |
58 | | - q = parser.parseRaw("(\"something is\" OR \"something else\") AND \"very important\""); |
59 | | - assertEquals("+(contents:\"something is\" contents:\"something else\") +contents:\"very important\"",q.toString()); |
| 61 | + Query q = parser.parseRaw("+eggs milk -something"); |
| 62 | + assertEquals( |
| 63 | + "+(contents:eggs contents:egg^0.5) +contents:milk -contents:something", |
| 64 | + q.toString()); |
| 65 | + } |
60 | 66 | |
61 | | - q = parser.parseRaw("šđčćždzñ"); |
62 | | - assertEquals("contents:šđčćždzñ contents:sđcczdzn^0.5 contents:sđcczdznh^0.5",q.toString()); |
63 | | - |
64 | | - q = parser.parseRaw(".test 3.14 and.. so"); |
65 | | - assertEquals("+(contents:.test contents:test^0.5) +contents:3.14 +contents:and +contents:so",q.toString()); |
66 | | - |
67 | | - q = parser.parseRaw("i'll get"); |
68 | | - assertEquals("+(contents:i'll contents:ill^0.5) +contents:get",q.toString()); |
69 | | - |
70 | | - q = parser.parseRaw("c# good-thomas"); |
71 | | - assertEquals("+(contents:c# contents:c^0.5) +(+(contents:good- contents:good^0.5 contents:goodthomas^0.5) +contents:thomas)",q.toString()); |
72 | | - |
73 | | - q = parser.parseRaw("a8n sli"); |
74 | | - assertEquals("+contents:a8n +contents:sli",q.toString()); |
75 | | - |
76 | | - q = parser.parseRaw("en.wikipedia.org"); |
77 | | - assertEquals("+contents:en +contents:wikipedia +contents:org",q.toString()); |
78 | | - assertEquals("[[contents:en, contents:wikipedia, contents:org]]",parser.getUrls().toString()); |
79 | | - |
80 | | - q = parser.parseRaw("something prefix:[2]:Rainman/Archive"); |
81 | | - assertEquals("contents:something",q.toString()); |
82 | | - assertEquals("[2:rainman/archive]",Arrays.toString(parser.getPrefixFilters())); |
83 | | - |
84 | | - q = parser.parseRaw("something prefix:[2]:Rainman/Archive|Names|[4]:Help"); |
85 | | - assertEquals("contents:something",q.toString()); |
86 | | - assertEquals("[2:rainman/archive, 0:names, 4:help]",Arrays.toString(parser.getPrefixFilters())); |
87 | | - |
88 | | - q = parser.parseRaw("query incategory:Some_category_name"); |
89 | | - assertEquals("+contents:query +category:some category name",q.toString()); |
90 | | - |
91 | | - q = parser.parseRaw("list of countries in Africa by population"); |
92 | | - assertEquals("+contents:list +contents:of +(contents:countries contents:country^0.5) +contents:in +contents:africa +contents:by +contents:population", q.toString()); |
93 | | - |
94 | | - q = parser.parseRaw("list_of_countries_in_Africa_by_population"); |
95 | | - assertEquals("+contents:list +contents:of +(contents:countries contents:country^0.5) +contents:in +contents:africa +contents:by +contents:population", q.toString()); |
96 | | - |
97 | | - // FIXME, some differences in alttitle |
98 | | - //assertEquals(parser.parse("list of countries in Africa by population").toString(), parser.parse("list_of_countries_in_Africa_by_population").toString()); |
| 67 | + @Test |
| 68 | + public void testBasicEnglish04() { |
99 | 69 | |
100 | | - |
101 | | - /* =================== MISC ================= */ |
102 | | - |
103 | | - q = parser.parse("douglas adams OR qian zhongshu OR (ibanez guitars)"); |
104 | | - assertEquals("[douglas, adams, qian, zhongshu, ibanez, guitars]",parser.getWordsClean().toString()); |
105 | | - |
106 | | - assertEquals("[(douglas,0,7), (adam,8,12,type=wildcard)]",parser.tokenizeForSpellCheck("douglas adam*").toString()); |
107 | | - |
108 | | - assertEquals("[(douglas,0,7)]",parser.tokenizeForSpellCheck("douglas -adams").toString()); |
109 | | - assertEquals("[(douglas,4,11)]",parser.tokenizeForSpellCheck("[2]:douglas -adams").toString()); |
110 | | - |
111 | | - assertEquals("[(box,0,3), (ven,4,7,type=fuzzy), (i'll,9,13)]",parser.tokenizeForSpellCheck("box ven~ i'll").toString()); |
112 | | - |
113 | | - q = parser.parse("douglas -adams guides"); |
114 | | - assertEquals("[contents:guides, contents:douglas, contents:guide]", Arrays.toString(parser.getHighlightTerms())); |
115 | | - |
116 | | - /* ================== PREFIXES ============ */ |
117 | | - q = parser.parseRaw("intitle:tests"); |
118 | | - // FIXME: stemming for titles? |
119 | | - assertEquals("title:tests title:test^0.5",q.toString()); |
120 | | - |
121 | | - q = parser.parseRaw("intitle:multiple words in title"); |
122 | | - assertEquals("+title:multiple +title:words +title:in +title:title",q.toString()); |
123 | | - |
124 | | - q = parser.parseRaw("intitle:[2]:tests"); |
125 | | - assertEquals("title:tests title:test^0.5",q.toString()); |
126 | | - |
127 | | - q = parser.parseRaw("something (intitle:[2]:tests) out"); |
128 | | - assertEquals("+contents:something +(title:tests title:test^0.5) +contents:out",q.toString()); |
129 | | - |
130 | | - ArrayList<Token> tokens = parser.tokenizeForSpellCheck("+incategory:\"zero\" a:b incategory:c +incategory:d [1]:20"); |
131 | | - assertEquals("[(a,19,20), (b,21,22), (c,34,35), (d,48,49), (20,54,56)]", tokens.toString()); |
132 | | - |
133 | | - tokens = parser.tokenizeForSpellCheck("+incategory:\"Suspension bridges in the United States\""); |
134 | | - assertEquals("[]", tokens.toString()); |
135 | | - |
136 | | - /* ================== unicode decomposition stuffs ============ */ |
137 | | - q = parser.parseRaw("šta"); |
138 | | - assertEquals("contents:šta contents:sta^0.5",q.toString()); |
139 | | - |
140 | | - q = parser.parseRaw("װאנט"); |
141 | | - assertEquals("contents:װאנט contents:וואנט^0.5",q.toString()); |
142 | | - |
143 | | - q = parser.parseRaw("פּאריז"); |
144 | | - assertEquals("contents:פּאריז contents:פאריז^0.5",q.toString()); |
145 | | - |
146 | | - |
147 | | - } catch(Exception e){ |
148 | | - } |
| 70 | + Query q = parser.parseRaw("eggs AND milk"); |
| 71 | + assertEquals("+(contents:eggs contents:egg^0.5) +contents:milk",q.toString()); |
149 | 72 | } |
150 | 73 | |
| 74 | + @Test |
| 75 | + public void testBasicEnglish05() { |
| 76 | + |
| 77 | + Query q = parser.parseRaw("+egg incategory:breakfast"); |
| 78 | + assertEquals("+contents:egg +category:breakfast", q.toString()); |
| 79 | + } |
| 80 | + |
| 81 | + @Test |
| 82 | + public void testBasicEnglish06() { |
| 83 | + |
| 84 | + Query q = parser.parseRaw("+egg incategory:\"two_words\""); |
| 85 | + assertEquals("+contents:egg +category:two words", q.toString()); |
| 86 | + } |
| 87 | + |
| 88 | + @Test |
| 89 | + public void testBasicEnglish07() { |
| 90 | + |
| 91 | + Query q = parser.parseRaw("incategory:(help AND pleh)"); |
| 92 | + assertEquals("+category:help +category:pleh", q.toString()); |
| 93 | + } |
| 94 | + |
| 95 | + @Test |
| 96 | + public void testBasicEnglish08() { |
| 97 | + |
| 98 | + Query q = parser.parseRaw("incategory:(help AND (pleh -ping))"); |
| 99 | + assertEquals("+category:help +(+category:pleh -category:ping)", |
| 100 | + q.toString()); |
| 101 | + } |
| 102 | + |
| 103 | + @Test |
| 104 | + public void testBasicEnglish09() { |
| 105 | + |
| 106 | + Query q = parser |
| 107 | + .parseRaw("(\"something is\" OR \"something else\") AND \"very important\""); |
| 108 | + assertEquals( |
| 109 | + "+(contents:\"something is\" contents:\"something else\") +contents:\"very important\"", |
| 110 | + q.toString()); |
| 111 | + } |
| 112 | + |
| 113 | + @Test |
| 114 | + public void testBasicEnglish10() { |
| 115 | + |
| 116 | + Query q = parser.parseRaw("šđčćždzñ"); |
| 117 | + assertEquals( |
| 118 | + "contents:šđčćždzñ contents:sđcczdzn^0.5 contents:sđcczdznh^0.5", |
| 119 | + q.toString()); |
| 120 | + |
| 121 | + |
| 122 | + } |
| 123 | + |
| 124 | + @Test |
| 125 | + public void testBasicEnglish11() { |
| 126 | + |
| 127 | + Query q = parser.parseRaw(".test 3.14 and.. so"); |
| 128 | + assertEquals( |
| 129 | + "+(contents:.test contents:test^0.5) +contents:3.14 +contents:and +contents:so", |
| 130 | + q.toString()); |
| 131 | + } |
| 132 | + |
| 133 | + @Test |
| 134 | + public void testBasicEnglish12() { |
| 135 | + |
| 136 | + Query q = parser.parseRaw("i'll get"); |
| 137 | + assertEquals("+(contents:i'll contents:ill^0.5) +contents:get", |
| 138 | + q.toString()); |
| 139 | + } |
| 140 | + |
| 141 | + @Test |
| 142 | + public void testBasicEnglish13() { |
| 143 | + |
| 144 | + Query q = parser.parseRaw("c# good-thomas"); |
| 145 | + assertEquals( |
| 146 | + "+(contents:c# contents:c^0.5) +(+(contents:good- contents:good^0.5 contents:goodthomas^0.5) +contents:thomas)", |
| 147 | + q.toString()); |
| 148 | + |
| 149 | + |
| 150 | + } |
151 | 151 | |
152 | | - public void XtestEnglishFull() { |
153 | | - IndexId enwiki = IndexId.get("enwiki"); |
154 | | - FieldBuilder.BuilderSet bs = new FieldBuilder(IndexId.get("enwiki")).getBuilder(); |
155 | | - Analyzer analyzer = Analyzers.getSearcherAnalyzer(enwiki); |
156 | | - FieldNameFactory ff = new FieldNameFactory(); |
157 | | - try{ |
158 | | - WikiQueryParser parser = new WikiQueryParser("contents","0",analyzer,bs,NamespacePolicy.IGNORE); |
159 | | - Query q; |
160 | | - /* =================== FULL QUERIES ================= */ |
161 | | - |
162 | | - q = parser.parse("simple query",new WikiQueryParser.ParsingOptions(true)); |
163 | | - assertEquals("(+(contents:simple contents:simpl^0.5) +(contents:query contents:queri^0.5)) ((+title:simple^2.0 +title:query^2.0) (+(stemtitle:simple^0.8 stemtitle:simpl^0.32000002) +(stemtitle:query^0.8 stemtitle:queri^0.32000002)))",q.toString()); |
164 | | - |
165 | | - q = parser.parse("guitars",new WikiQueryParser.ParsingOptions(true)); |
166 | | - assertEquals("(contents:guitars contents:guitar^0.5) ((title:guitars^2.0 title:guitar^0.4) (stemtitle:guitars^0.8 stemtitle:guitar^0.32000002))",q.toString()); |
167 | | - assertEquals("[guitars]",parser.getWordsClean().toString()); |
168 | | - |
169 | | - q = parser.parse("simple -query",new WikiQueryParser.ParsingOptions(true)); |
170 | | - assertEquals("(+(contents:simple contents:simpl^0.5) -contents:query) ((+title:simple^2.0 -title:query^2.0) (+(stemtitle:simple^0.8 stemtitle:simpl^0.32000002) -stemtitle:query^0.8)) -(contents:query title:query^2.0 stemtitle:query^0.8)",q.toString()); |
171 | | - assertEquals("[simple]",parser.getWordsClean().toString()); |
172 | | - |
173 | | - q = parser.parse("the who",new WikiQueryParser.ParsingOptions(true)); |
174 | | - assertEquals("(+contents:the +contents:who) ((+title:the^2.0 +title:who^2.0) (+stemtitle:the^0.8 +stemtitle:who^0.8))",q.toString()); |
175 | | - |
176 | | - q = parser.parse("the_who",new WikiQueryParser.ParsingOptions(true)); |
177 | | - assertEquals("(+contents:the +contents:who) ((+title:the^2.0 +title:who^2.0) (+stemtitle:the^0.8 +stemtitle:who^0.8))",q.toString()); |
178 | | - |
179 | | - q = parser.parse("\"Ole von Beust\"",new WikiQueryParser.ParsingOptions(true)); |
180 | | - assertEquals("contents:\"ole von beust\" (title:\"ole von beust\"^2.0 stemtitle:\"ole von beust\"^0.8)",q.toString()); |
181 | | - |
182 | | - q = parser.parse("who is president of u.s.",new WikiQueryParser.ParsingOptions(true)); |
183 | | - assertEquals("(+contents:who +contents:is +(contents:president contents:presid^0.5) +contents:of +(contents:u.s contents:us^0.5)) ((+title:who^2.0 +title:is^2.0 +title:president^2.0 +title:of^2.0 +(title:u.s^2.0 title:us^0.4)) (+stemtitle:who^0.8 +stemtitle:is^0.8 +(stemtitle:president^0.8 stemtitle:presid^0.32000002) +stemtitle:of^0.8 +(stemtitle:u.s^0.8 stemtitle:us^0.32000002)))",q.toString()); |
184 | | - assertEquals("[who, is, president, of, u.s]",parser.getWordsClean().toString()); |
| 152 | + @Test |
| 153 | + public void testBasicEnglish14() { |
| 154 | + |
| 155 | + Query q = parser.parseRaw("i'll get"); |
| 156 | + assertEquals("+(contents:i'll contents:ill^0.5) +contents:get", |
| 157 | + q.toString()); |
| 158 | + } |
| 159 | + |
| 160 | + |
| 161 | + @Test |
| 162 | + public void testBasicEnglish15() { |
| 163 | + |
| 164 | + Query q = parser.parseRaw("a8n sli"); |
| 165 | + assertEquals("+contents:a8n +contents:sli", q.toString()); |
| 166 | + |
185 | 167 | |
186 | | - } catch(Exception e){ |
187 | | - e.printStackTrace(); |
188 | | - } |
189 | 168 | } |
| 169 | + |
| 170 | + @Test |
| 171 | + public void testBasicEnglish16() { |
| 172 | + |
| 173 | + Query q = parser.parseRaw("en.wikipedia.org"); |
| 174 | + assertEquals("+contents:en +contents:wikipedia +contents:org", |
| 175 | + q.toString()); |
| 176 | + assertEquals("[[contents:en, contents:wikipedia, contents:org]]", |
| 177 | + parser.getUrls().toString()); |
| 178 | + |
| 179 | + } |
| 180 | + |
| 181 | + @Test |
| 182 | + public void testBasicEnglish17() { |
| 183 | + |
| 184 | + |
| 185 | + Query q = parser.parseRaw("something prefix:[2]:Rainman/Archive"); |
| 186 | + assertEquals("contents:something", q.toString()); |
| 187 | + assertEquals("[2:rainman/archive]", |
| 188 | + Arrays.toString(parser.getPrefixFilters())); |
| 189 | + |
| 190 | + |
| 191 | + } |
| 192 | + |
| 193 | + @Test |
| 194 | + public void testBasicEnglish18() { |
| 195 | + |
| 196 | + Query q = parser |
| 197 | + .parseRaw("something prefix:[2]:Rainman/Archive|Names|[4]:Help"); |
| 198 | + assertEquals("contents:something", q.toString()); |
| 199 | + assertEquals("[2:rainman/archive, 0:names, 4:help]", |
| 200 | + Arrays.toString(parser.getPrefixFilters())); |
| 201 | + } |
| 202 | + |
| 203 | + @Test |
| 204 | + public void testBasicEnglish19() { |
| 205 | + |
| 206 | + Query q = parser.parseRaw("query incategory:Some_category_name"); |
| 207 | + assertEquals("+contents:query +category:some category name", |
| 208 | + q.toString()); |
| 209 | + |
| 210 | + } |
| 211 | + |
| 212 | + @Test |
| 213 | + public void testBasicEnglish20() { |
| 214 | + |
| 215 | + Query q = parser.parseRaw("list of countries in Africa by population"); |
| 216 | + assertEquals( |
| 217 | + "+contents:list +contents:of +(contents:countries contents:country^0.5) +contents:in +contents:africa +contents:by +contents:population", |
| 218 | + q.toString()); |
| 219 | + } |
| 220 | + |
| 221 | + @Test |
| 222 | + public void testBasicEnglish21() { |
| 223 | + |
| 224 | + Query q = parser.parseRaw("list_of_countries_in_Africa_by_population"); |
| 225 | + assertEquals( |
| 226 | + "+contents:list +contents:of +(contents:countries contents:country^0.5) +contents:in +contents:africa +contents:by +contents:population", |
| 227 | + q.toString()); |
| 228 | + } |
| 229 | + |
| 230 | + @Test |
| 231 | + public void testBasicEnglish22() { |
| 232 | + |
| 233 | + Query q = parser.parseRaw("i'll get"); |
| 234 | + assertEquals("+(contents:i'll contents:ill^0.5) +contents:get", |
| 235 | + q.toString()); |
| 236 | + } |
| 237 | + |
| 238 | + @Test |
| 239 | + public void testBasicEnglish23() { |
| 240 | + |
| 241 | + Query q = parser.parseRaw("i'll get"); |
| 242 | + assertEquals("+(contents:i'll contents:ill^0.5) +contents:get", |
| 243 | + q.toString()); |
| 244 | + } |
| 245 | + |
| 246 | + @Test |
| 247 | + public void testBasicEnglish24() { |
| 248 | + |
| 249 | + Query q = parser.parseRaw("i'll get"); |
| 250 | + assertEquals("+(contents:i'll contents:ill^0.5) +contents:get", |
| 251 | + q.toString()); |
| 252 | + } |
190 | 253 | |
191 | | - public void testExtractRawFields(){ |
| 254 | + // FIXME: some differences in alttitle |
| 255 | + @Ignore("some differences in alttitle") |
| 256 | + @Test |
| 257 | + public void testBasicEnglish25() { |
| 258 | + |
| 259 | + |
| 260 | + assertEquals(parser.parse("list of countries in Africa by population") |
| 261 | + .toString(), |
| 262 | + parser.parse("list_of_countries_in_Africa_by_population") |
| 263 | + .toString()); |
| 264 | + } |
| 265 | + |
| 266 | + |
| 267 | + @Test |
| 268 | + public void testMisc01() { |
| 269 | + Query q = parser.parse("douglas adams OR qian zhongshu OR (ibanez guitars)"); |
| 270 | + assertEquals("[douglas, adams, qian, zhongshu, ibanez, guitars]",parser.getWordsClean().toString()); |
| 271 | + } |
| 272 | + |
| 273 | + @Test |
| 274 | + public void testMisc02() { |
| 275 | + assertEquals("[(douglas,0,7), (adam,8,12,type=wildcard)]",parser.tokenizeForSpellCheck("douglas adam*").toString()); |
| 276 | + } |
| 277 | + |
| 278 | + @Test |
| 279 | + public void testMisc03() { |
| 280 | + assertEquals("[(douglas,0,7)]", |
| 281 | + parser.tokenizeForSpellCheck("douglas -adams").toString()); |
| 282 | + } |
| 283 | + |
| 284 | + @Test |
| 285 | + public void testMisc04() { |
| 286 | + assertEquals("[(douglas,4,11)]", |
| 287 | + parser.tokenizeForSpellCheck("[2]:douglas -adams").toString()); |
| 288 | + } |
| 289 | + |
| 290 | + @Test |
| 291 | + public void testMisc05() { |
| 292 | + assertEquals("[(box,0,3), (ven,4,7,type=fuzzy), (i'll,9,13)]", parser |
| 293 | + .tokenizeForSpellCheck("box ven~ i'll").toString()); |
| 294 | + } |
| 295 | + |
| 296 | + @Test |
| 297 | + public void testMisc06() { |
| 298 | + Query q = parser.parse("douglas -adams guides"); |
| 299 | + assertEquals("[contents:guides, contents:douglas, contents:guide]", |
| 300 | + Arrays.toString(parser.getHighlightTerms())); |
| 301 | + |
| 302 | + } |
| 303 | + |
| 304 | + |
| 305 | + @Test |
| 306 | + public void testPrefix01() { |
| 307 | + Query q = parser.parseRaw("intitle:tests"); |
| 308 | + assertEquals("title:tests title:test^0.5", q.toString()); |
| 309 | + |
| 310 | + } |
| 311 | + |
| 312 | + @Test |
| 313 | + public void testPrefix02() { |
| 314 | + Query q = parser.parseRaw("intitle:multiple words in title"); |
| 315 | + assertEquals("+title:multiple +title:words +title:in +title:title", |
| 316 | + q.toString()); |
| 317 | + |
| 318 | + } |
| 319 | + |
| 320 | + @Test |
| 321 | + public void testPrefix03() { |
| 322 | + Query q = parser.parseRaw("intitle:[2]:tests"); |
| 323 | + assertEquals("title:tests title:test^0.5", q.toString()); |
| 324 | + |
| 325 | + } |
| 326 | + |
| 327 | + @Test |
| 328 | + public void testPrefix04() { |
| 329 | + Query q = parser.parseRaw("something (intitle:[2]:tests) out"); |
| 330 | + assertEquals( |
| 331 | + "+contents:something +(title:tests title:test^0.5) +contents:out", |
| 332 | + q.toString()); |
| 333 | + |
| 334 | + } |
| 335 | + |
| 336 | + @Test |
| 337 | + public void testPrefix05() { |
| 338 | + ArrayList<Token> tokens = parser.tokenizeForSpellCheck("+incategory:\"zero\" a:b incategory:c +incategory:d [1]:20"); |
| 339 | + assertEquals("[(a,19,20), (b,21,22), (c,34,35), (d,48,49), (20,54,56)]", tokens.toString()); |
| 340 | + |
| 341 | + } |
| 342 | + |
| 343 | + @Test |
| 344 | + public void testPrefix06() { |
| 345 | + ArrayList<Token> tokens = parser |
| 346 | + .tokenizeForSpellCheck("+incategory:\"Suspension bridges in the United States\""); |
| 347 | + assertEquals("[]", tokens.toString()); |
| 348 | + } |
| 349 | + |
| 350 | + @Test |
| 351 | + public void testUnicodeDecomposition01() { |
| 352 | + |
| 353 | + Query q = parser.parseRaw("šta"); |
| 354 | + assertEquals("contents:šta contents:sta^0.5", q.toString()); |
| 355 | + |
| 356 | + } |
| 357 | + |
| 358 | + @Test |
| 359 | + public void testUnicodeDecomposition02() { |
| 360 | + Query q = parser.parseRaw("װאנט"); |
| 361 | + assertEquals("contents:װאנט contents:וואנט^0.5", q.toString()); |
| 362 | + |
| 363 | + } |
| 364 | + |
| 365 | + @Test |
| 366 | + public void testUnicodeDecomposition03() { |
| 367 | + Query q = parser.parseRaw("פּאריז"); |
| 368 | + assertEquals("contents:פּאריז contents:פאריז^0.5", q.toString()); |
| 369 | + |
| 370 | + } |
| 371 | + |
| 372 | + @Ignore("doesn't work") |
| 373 | + @Test |
| 374 | + public void testEnglishFull01() { |
| 375 | + Query q = parser.parse("simple query", |
| 376 | + new WikiQueryParser.ParsingOptions(true)); |
| 377 | + assertEquals( |
| 378 | + "(+(contents:simple contents:simpl^0.5) +(contents:query contents:queri^0.5)) ((+title:simple^2.0 +title:query^2.0) (+(stemtitle:simple^0.8 stemtitle:simpl^0.32000002) +(stemtitle:query^0.8 stemtitle:queri^0.32000002)))", |
| 379 | + q.toString()); |
| 380 | + } |
| 381 | + |
| 382 | + @Ignore("doesn't work") |
| 383 | + @Test |
| 384 | + public void testEnglishFull02() { |
| 385 | + Query q = parser.parse("guitars", new WikiQueryParser.ParsingOptions( |
| 386 | + true)); |
| 387 | + assertEquals( |
| 388 | + "(contents:guitars contents:guitar^0.5) ((title:guitars^2.0 title:guitar^0.4) (stemtitle:guitars^0.8 stemtitle:guitar^0.32000002))", |
| 389 | + q.toString()); |
| 390 | + assertEquals("[guitars]", parser.getWordsClean().toString()); |
| 391 | + |
| 392 | + } |
| 393 | + |
| 394 | + @Ignore("doesn't work") |
| 395 | + @Test |
| 396 | + public void testEnglishFull03() { |
| 397 | + Query q = parser.parse("simple -query", |
| 398 | + new WikiQueryParser.ParsingOptions(true)); |
| 399 | + assertEquals( |
| 400 | + "(+(contents:simple contents:simpl^0.5) -contents:query) ((+title:simple^2.0 -title:query^2.0) (+(stemtitle:simple^0.8 stemtitle:simpl^0.32000002) -stemtitle:query^0.8)) -(contents:query title:query^2.0 stemtitle:query^0.8)", |
| 401 | + q.toString()); |
| 402 | + assertEquals("[simple]", parser.getWordsClean().toString()); |
| 403 | + |
| 404 | + } |
| 405 | + |
| 406 | + @Ignore("doesn't work") |
| 407 | + @Test |
| 408 | + public void testEnglishFull04() { |
| 409 | + Query q = parser.parse("the who", new WikiQueryParser.ParsingOptions( |
| 410 | + true)); |
| 411 | + assertEquals( |
| 412 | + "(+contents:the +contents:who) ((+title:the^2.0 +title:who^2.0) (+stemtitle:the^0.8 +stemtitle:who^0.8))", |
| 413 | + q.toString()); |
| 414 | + |
| 415 | + } |
| 416 | + |
| 417 | + @Ignore("doesn't work") |
| 418 | + @Test |
| 419 | + public void testEnglishFull05() { |
| 420 | + Query q = parser.parse("the_who", new WikiQueryParser.ParsingOptions( |
| 421 | + true)); |
| 422 | + assertEquals( |
| 423 | + "(+contents:the +contents:who) ((+title:the^2.0 +title:who^2.0) (+stemtitle:the^0.8 +stemtitle:who^0.8))", |
| 424 | + q.toString()); |
| 425 | + |
| 426 | + } |
| 427 | + |
| 428 | + @Ignore("doesn't work") |
| 429 | + @Test |
| 430 | + public void testEnglishFull06() { |
| 431 | + Query q = parser.parse("\"Ole von Beust\"", |
| 432 | + new WikiQueryParser.ParsingOptions(true)); |
| 433 | + assertEquals( |
| 434 | + "contents:\"ole von beust\" (title:\"ole von beust\"^2.0 stemtitle:\"ole von beust\"^0.8)", |
| 435 | + q.toString()); |
| 436 | + |
| 437 | + } |
| 438 | + |
| 439 | + @Ignore("doesn't work") |
| 440 | + @Test |
| 441 | + public void testEnglishFull07() { |
| 442 | + Query q = parser.parse("who is president of u.s.", |
| 443 | + new WikiQueryParser.ParsingOptions(true)); |
| 444 | + assertEquals( |
| 445 | + "(+contents:who +contents:is +(contents:president contents:presid^0.5) +contents:of +(contents:u.s contents:us^0.5)) ((+title:who^2.0 +title:is^2.0 +title:president^2.0 +title:of^2.0 +(title:u.s^2.0 title:us^0.4)) (+stemtitle:who^0.8 +stemtitle:is^0.8 +(stemtitle:president^0.8 stemtitle:presid^0.32000002) +stemtitle:of^0.8 +(stemtitle:u.s^0.8 stemtitle:us^0.32000002)))", |
| 446 | + q.toString()); |
| 447 | + assertEquals("[who, is, president, of, u.s]", parser.getWordsClean() |
| 448 | + .toString()); |
| 449 | + } |
| 450 | + |
| 451 | + |
| 452 | + @Test |
| 453 | + public void testExtractRawFields01() { |
192 | 454 | assertEquals("[something , 0:eh heh]", Arrays.toString(WikiQueryParser.extractRawField("something ondiscussionpage:eh heh", "ondiscussionpage:"))); |
| 455 | + |
| 456 | + } |
| 457 | + |
| 458 | + @Test |
| 459 | + public void testExtractRawFields02() { |
| 460 | + |
193 | 461 | assertEquals("[something , 0:eh \"heh\"]", Arrays.toString(WikiQueryParser.extractRawField("something ondiscussionpage:eh \"heh\"", "ondiscussionpage:"))); |
194 | 462 | } |
195 | 463 | } |