Index: branches/lucene-search-2.1/test/org/wikimedia/lsearch/analyzers/WikiQueryParserTest.java |
— | — | @@ -1,9 +1,11 @@ |
2 | 2 | package org.wikimedia.lsearch.analyzers; |
3 | 3 | |
| 4 | +import java.util.ArrayList; |
4 | 5 | import java.util.Arrays; |
5 | 6 | import java.util.HashSet; |
6 | 7 | |
7 | 8 | import org.apache.lucene.analysis.Analyzer; |
| 9 | +import org.apache.lucene.analysis.Token; |
8 | 10 | import org.apache.lucene.search.Query; |
9 | 11 | import org.wikimedia.lsearch.analyzers.Analyzers; |
10 | 12 | import org.wikimedia.lsearch.analyzers.FieldBuilder; |
— | — | @@ -128,6 +130,12 @@ |
129 | 131 | |
130 | 132 | q = parser.parseRaw("something (intitle:[2]:tests) out"); |
131 | 133 | assertEquals("+contents:something +(title:tests title:test^0.5) +contents:out",q.toString()); |
| 134 | + |
| 135 | + ArrayList<Token> tokens = parser.tokenizeForSpellCheck("+incategory:\"zero\" a:b incategory:c +incategory:d [1]:20"); |
| 136 | + assertEquals("[(a,19,20), (b,21,22), (c,34,35), (d,48,49), (20,54,56)]", tokens.toString()); |
| 137 | + |
| 138 | + tokens = parser.tokenizeForSpellCheck("+incategory:\"Suspension bridges in the United States\""); |
| 139 | + assertEquals("[]", tokens.toString()); |
132 | 140 | |
133 | 141 | |
134 | 142 | } catch(Exception e){ |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/WikiQueryParser.java |
— | — | @@ -540,9 +540,11 @@ |
541 | 541 | } else if(fieldLevel != -1 && level>fieldLevel) |
542 | 542 | continue; |
543 | 543 | |
544 | | - if(isTermChar(c) && text[cur]!='-'){ |
| 544 | + // include exclusion/inclusion marks |
| 545 | + if(isTermChar(c) && text[cur]!='-' && text[cur]!='+'){ |
545 | 546 | int start = cur; |
546 | 547 | tokenType = fetchToken(inPhrase); |
| 548 | + // ignore excluded words |
547 | 549 | if(tokenType == TokenType.WORD && (start==0 || text[start-1]!='-')){ |
548 | 550 | String type = "word"; |
549 | 551 | if(bufferIsWildCard()) |