Index: trunk/lucene-search-3/src/test/java/org/wikimedia/lsearch/test/AbstractWikiTestCase.java |
— | — | @@ -1,9 +1,16 @@ |
2 | 2 | package org.wikimedia.lsearch.test; |
3 | 3 | |
4 | 4 | import java.io.File; |
| 5 | +import java.io.IOException; |
| 6 | +import java.io.StringReader; |
| 7 | +import java.util.ArrayList; |
| 8 | +import java.util.Arrays; |
5 | 9 | |
6 | 10 | import junit.framework.TestCase; |
7 | 11 | |
| 12 | +import org.apache.lucene.analysis.Analyzer; |
| 13 | +import org.apache.lucene.analysis.Token; |
| 14 | +import org.apache.lucene.analysis.TokenStream; |
8 | 15 | import org.junit.Before; |
9 | 16 | import org.wikimedia.lsearch.analyzers.WikiQueryParser; |
10 | 17 | import org.wikimedia.lsearch.config.Configuration; |
— | — | @@ -80,4 +87,28 @@ |
81 | 88 | } |
82 | 89 | } |
83 | 90 | |
| 91 | + |
| 92 | + |
| 93 | + protected Analyzer a = null; |
| 94 | + |
| 95 | + public static Token[] tokensFromAnalysis(Analyzer analyzer, String text, String field) throws IOException { |
| 96 | + TokenStream stream = analyzer.tokenStream(field, new StringReader(text)); |
| 97 | + ArrayList<Token> tokenList = new ArrayList<Token>(); |
| 98 | + while (true) { |
| 99 | + Token token = stream.next(); |
| 100 | + if (token == null) break; |
| 101 | + tokenList.add(token); |
| 102 | + } |
| 103 | + return (Token[]) tokenList.toArray(new Token[0]); |
| 104 | + } |
| 105 | + |
| 106 | + public String tokens(String text){ |
| 107 | + try{ |
| 108 | + return Arrays.toString(tokensFromAnalysis(a,text,"contents")); |
| 109 | + } catch(IOException e){ |
| 110 | + fail(e.getMessage()); |
| 111 | + return null; |
| 112 | + } |
| 113 | + } |
| 114 | + |
84 | 115 | } |
Index: trunk/lucene-search-3/src/test/java/org/wikimedia/lsearch/analyzers/AnalysisTest.java |
— | — | @@ -24,7 +24,7 @@ |
25 | 25 | |
26 | 26 | public class AnalysisTest extends AbstractWikiTestCase { |
27 | 27 | |
28 | | - Analyzer a = null; |
| 28 | + |
29 | 29 | Configuration config = null; |
30 | 30 | |
31 | 31 | @Before |
— | — | @@ -35,14 +35,8 @@ |
36 | 36 | GlobalConfiguration.getInstance(); |
37 | 37 | } |
38 | 38 | } |
| 39 | + |
39 | 40 | |
40 | | - @Test |
41 | | - public void testCJKAnalyzer(){ |
42 | | - a = new CJKAnalyzer(); |
43 | | - assertEquals( |
44 | | - "[(いわ,0,2,type=double), (わさ,1,3,type=double), (さき,2,4,type=double), (ic,4,6,type=single), (カー,6,8,type=double), (ード,7,9,type=double)]", |
45 | | - tokens("いわさきicカード")); |
46 | | - } |
47 | 41 | |
48 | 42 | /** Common test for indexer and searcher analyzers */ |
49 | 43 | @Test |
— | — | @@ -97,28 +91,12 @@ |
98 | 92 | commonSerbian(); |
99 | 93 | } |
100 | 94 | |
101 | | - @Test |
102 | | - public String tokens(String text){ |
103 | | - try{ |
104 | | - return Arrays.toString(tokensFromAnalysis(a,text,"contents")); |
105 | | - } catch(IOException e){ |
106 | | - fail(e.getMessage()); |
107 | | - return null; |
108 | | - } |
109 | | - } |
| 95 | + |
110 | 96 | |
111 | 97 | |
112 | | - public static Token[] tokensFromAnalysis(Analyzer analyzer, String text, String field) throws IOException { |
113 | | - TokenStream stream = analyzer.tokenStream(field, new StringReader(text)); |
114 | | - ArrayList<Token> tokenList = new ArrayList<Token>(); |
115 | | - while (true) { |
116 | | - Token token = stream.next(); |
117 | | - if (token == null) break; |
118 | | - tokenList.add(token); |
119 | | - } |
120 | | - return (Token[]) tokenList.toArray(new Token[0]); |
121 | | - } |
122 | 98 | |
| 99 | + |
| 100 | + |
123 | 101 | public static void displayTokens(Analyzer analyzer, String text) throws IOException { |
124 | 102 | Token[] tokens = tokensFromAnalysis(analyzer, text, "contents"); |
125 | 103 | System.out.println(text); |
Index: trunk/lucene-search-3/src/test/java/org/wikimedia/lsearch/analyzers/CJKAnalyzerTest.java |
— | — | @@ -0,0 +1,56 @@ |
| 2 | +package org.wikimedia.lsearch.analyzers;
|
| 3 | +
|
| 4 | +import java.io.IOException;
|
| 5 | +import java.io.StringReader;
|
| 6 | +import java.util.ArrayList;
|
| 7 | +import java.util.Arrays;
|
| 8 | +import java.util.HashSet;
|
| 9 | +
|
| 10 | +import org.apache.lucene.analysis.Analyzer;
|
| 11 | +import org.apache.lucene.analysis.Token;
|
| 12 | +import org.apache.lucene.analysis.TokenStream;
|
| 13 | +import org.apache.lucene.analysis.cjk.CJKAnalyzer;
|
| 14 | +import org.apache.lucene.queryParser.ParseException;
|
| 15 | +import org.apache.lucene.queryParser.QueryParser;
|
| 16 | +import org.apache.lucene.search.Query;
|
| 17 | +import org.junit.Before;
|
| 18 | +import org.junit.Test;
|
| 19 | +import org.wikimedia.lsearch.analyzers.Aggregate.Flags;
|
| 20 | +import org.wikimedia.lsearch.config.Configuration;
|
| 21 | +import org.wikimedia.lsearch.config.GlobalConfiguration;
|
| 22 | +import org.wikimedia.lsearch.config.IndexId;
|
| 23 | +import org.wikimedia.lsearch.ranks.StringList;
|
| 24 | +import org.wikimedia.lsearch.test.AbstractWikiTestCase;
|
| 25 | +
|
| 26 | +public class CJKAnalyzerTest extends AbstractWikiTestCase {
|
| 27 | +
|
| 28 | + Analyzer a = null;
|
| 29 | + Configuration config = null;
|
| 30 | +
|
| 31 | + @Before
|
| 32 | + protected void setUp() {
|
| 33 | + super.setUp();
|
| 34 | + if(config == null){
|
| 35 | + config = Configuration.open();
|
| 36 | + GlobalConfiguration.getInstance();
|
| 37 | + }
|
| 38 | + }
|
| 39 | +
|
| 40 | + public String tokens(String text){
|
| 41 | + try{
|
| 42 | + return Arrays.toString(tokensFromAnalysis(a,text,"contents"));
|
| 43 | + } catch(IOException e){
|
| 44 | + fail(e.getMessage());
|
| 45 | + return null;
|
| 46 | + }
|
| 47 | + }
|
| 48 | +
|
| 49 | + @Test
|
| 50 | + public void testCJKAnalyzer(){
|
| 51 | + a = new CJKAnalyzer();
|
| 52 | + assertEquals(
|
| 53 | + "[(いわ,0,2,type=double), (わさ,1,3,type=double), (さき,2,4,type=double), (ic,4,6,type=single), (カー,6,8,type=double), (ード,7,9,type=double)]",
|
| 54 | + tokens("いわさきicカード"));
|
| 55 | + }
|
| 56 | +
|
| 57 | +} |
\ No newline at end of file |
Property changes on: trunk/lucene-search-3/src/test/java/org/wikimedia/lsearch/analyzers/CJKAnalyzerTest.java |
___________________________________________________________________ |
Added: svn:keywords |
1 | 58 | + LastChangedDate LastChangedRevision LastChangedBy Id |