Index: trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/analyzers/AcronymFilter.java |
— | — | @@ -12,90 +12,69 @@ |
13 | 13 | */ |
14 | 14 | public class AcronymFilter extends TokenFilter { |
15 | 15 | |
16 | | - Token buffered = null; //TODO: document buffer behavior. |
17 | | - |
18 | | - public AcronymFilter(TokenStream input) { |
| 16 | + protected transient Token buffered = null; // TODO: document buffer behavior. |
| 17 | + |
| 18 | + public AcronymFilter(final TokenStream input) { |
19 | 19 | super(input); |
20 | 20 | } |
21 | | - |
22 | | - |
23 | | - @Override |
24 | | - public Token next(Token reusableToken) throws IOException { |
25 | | - |
26 | | - if(buffered != null){ |
| 21 | + |
| 22 | + @Override |
| 23 | + public Token next(Token reusableToken) throws IOException { // NOPMD by oren on 2/13/12 12:41 AM |
| 24 | + |
| 25 | + if (buffered == null) { |
| 26 | + reusableToken = input.next(reusableToken); |
| 27 | + if (reusableToken != null && isAcronym(reusableToken.termBuffer())) { |
| 28 | + buffered = new Token(filteredBuffer.toString(), |
| 29 | + reusableToken.startOffset(), reusableToken.endOffset(), |
| 30 | + reusableToken.type()); |
| 31 | + buffered.setPositionIncrement(0); |
| 32 | + } |
| 33 | + }else{ |
27 | 34 | reusableToken = buffered; |
28 | | - buffered = null; |
29 | | - return reusableToken; |
| 35 | + buffered = null; // NOPMD by oren on 2/13/12 1:00 AM |
| 36 | + |
30 | 37 | } |
31 | | - reusableToken = input.next(reusableToken); |
32 | | - if(reusableToken == null) |
33 | | - return null; |
34 | | - |
35 | | - if(isAcronym(reusableToken.termBuffer())){ |
36 | | - buffered = new Token(filteredBuffer.toString(),reusableToken.startOffset(),reusableToken.endOffset(),reusableToken.type()); |
37 | | - buffered.setPositionIncrement(0); |
38 | | - } |
39 | 38 | return reusableToken; |
40 | 39 | } |
41 | | - |
42 | | - StringBuffer filteredBuffer = new StringBuffer(); |
43 | 40 | |
| 41 | + protected transient StringBuffer filteredBuffer = new StringBuffer(); // NOPMD by oren on 2/13/12 1:00 AM |
| 42 | + |
44 | 43 | /** |
45 | | - * check is a token is an acronym and gen filtered version |
| 44 | + * check is a token is an acronym and gen filtered version |
46 | 45 | * |
47 | 46 | * @param buffer |
48 | 47 | * @param start |
49 | 48 | * @param end |
50 | 49 | * @return |
51 | 50 | */ |
52 | | - protected boolean isAcronym(char[] buffer){ |
53 | | - |
54 | | - boolean isAlpha=false; |
55 | | - boolean hasDot=false; |
56 | | - //boolean isNumeric=false; |
57 | | - |
| 51 | + protected boolean isAcronym(final char[] buffer) { |
| 52 | + |
| 53 | + boolean isAlpha = false; |
| 54 | + boolean hasDot = false; // NOPMD by oren on 2/13/12 12:53 AM |
| 55 | + // boolean isNumeric=false; |
| 56 | + |
58 | 57 | filteredBuffer.setLength(0); |
59 | | - |
60 | | - char c=' '; |
61 | | - |
| 58 | + |
62 | 59 | for (int offset = 0; offset < buffer.length; offset++) { |
63 | | - c = buffer[offset]; |
| 60 | + final char character = buffer[offset]; |
64 | 61 | |
65 | | - if (c == '.') { |
66 | | - hasDot = true; |
| 62 | + if (character == '.') { |
| 63 | + hasDot = true; // NOPMD by oren on 2/13/12 12:53 AM |
67 | 64 | } else { |
68 | 65 | |
69 | 66 | // side effect - filter the dot |
70 | | - filteredBuffer.append(c); |
| 67 | + filteredBuffer.append(character); |
71 | 68 | |
72 | | - if (!isAlpha && c >= '0' && c <= '9') { |
73 | | - //isNumeric = true; |
74 | | - } else { |
| 69 | + if (isAlpha || character < '0' || character > '9') { |
75 | 70 | isAlpha = true; |
76 | 71 | } |
77 | | - |
78 | | - //process full string |
| 72 | + |
| 73 | + // process full string |
79 | 74 | } |
80 | 75 | } |
81 | 76 | |
82 | | - return hasDot && isAlpha ; |
| 77 | + return hasDot && isAlpha; |
83 | 78 | } |
84 | | - |
85 | | - protected boolean hasDot(char[] buffer){ |
86 | | - for(char c: buffer){ |
87 | | - if (c=='.') return true; |
88 | | - } |
89 | | - return false; |
90 | | - } |
91 | | - |
92 | | - protected boolean isNumber(char[] buffer){ |
93 | | - for(char c: buffer){ |
94 | | - if(! ((c >= '0' && c <='9') || (c=='.') )) |
95 | | - return false; |
96 | | - } |
97 | | - return true; |
98 | | - } |
99 | | - |
100 | | - |
101 | | - |
| 79 | + |
| 80 | + |
102 | 81 | } |