Index: trunk/lucene-search-2/test/org/wikimedia/lsearch/analyzers/ArticlesParser.java |
— | — | @@ -16,21 +16,20 @@ |
17 | 17 | * |
18 | 18 | */ |
19 | 19 | public class ArticlesParser { |
20 | | - |
| 20 | + |
21 | 21 | protected ArrayList<TestArticle> articles; |
22 | | - |
23 | | - |
| 22 | + |
24 | 23 | /** |
25 | | - * Initialize from a file path, open file and read into memory |
26 | | - * |
| 24 | + * Initialize from a file path, open file and read into memory |
| 25 | + * |
27 | 26 | * @param filename |
28 | 27 | */ |
29 | 28 | public ArticlesParser(String filename){ |
30 | 29 | BufferedReader in; |
31 | 30 | try { |
32 | 31 | articles = new ArrayList<TestArticle>(); |
33 | | - in = new BufferedReader(new FileReader(filename)); |
34 | | - readFromFile(in); |
| 32 | + in = new BufferedReader(new FileReader(filename)); |
| 33 | + readFromFile(in); |
35 | 34 | in.close(); |
36 | 35 | } catch (FileNotFoundException e) { |
37 | 36 | // TODO Auto-generated catch block |
— | — | @@ -40,10 +39,10 @@ |
41 | 40 | e.printStackTrace(); |
42 | 41 | } |
43 | 42 | } |
44 | | - |
| 43 | + |
45 | 44 | /** |
46 | 45 | * Read some articles from BufferedReader |
47 | | - * |
| 46 | + * |
48 | 47 | * @param in |
49 | 48 | */ |
50 | 49 | protected void readFromFile(BufferedReader in){ |
— | — | @@ -52,7 +51,7 @@ |
53 | 52 | boolean readingContent = false; |
54 | 53 | try { |
55 | 54 | while ((str = in.readLine()) != null){ |
56 | | - if(readingContent){ |
| 55 | + if(readingContent){ |
57 | 56 | if(str.startsWith("###")){ |
58 | 57 | // done |
59 | 58 | articles.add(a); |
— | — | @@ -84,12 +83,12 @@ |
85 | 84 | e.printStackTrace(); |
86 | 85 | } |
87 | 86 | } |
88 | | - |
| 87 | + |
89 | 88 | /** |
90 | 89 | * @return Returns the articles. |
91 | 90 | */ |
92 | 91 | public ArrayList<TestArticle> getArticles() { |
93 | 92 | return articles; |
94 | | - } |
95 | | - |
| 93 | + } |
| 94 | + |
96 | 95 | } |
Index: trunk/lucene-search-2/test/org/wikimedia/lsearch/analyzers/FastWikiTokenizerTest.java |
— | — | @@ -94,7 +94,13 @@ |
95 | 95 | assertEquals("1 [test] 1 [apostrophe's] 0 [apostrophes] 1 [and] 1 [other’s] 0 [others]", |
96 | 96 | tokens("Test apostrophe's and other\u2019s.")); |
97 | 97 | |
| 98 | + assertEquals("1 [abcdef] 0 [abcdef]", |
| 99 | + tokens("ABCDEF")); |
98 | 100 | |
| 101 | + assertEquals("1 [123456789] 0 [123456789]", |
| 102 | + tokens("123456789")); |
| 103 | + |
| 104 | + |
99 | 105 | } |
100 | 106 | |
101 | 107 | public void testHighlight(){ |
Index: trunk/lucene-search-2/configure |
— | — | @@ -1,2 +1,4 @@ |
2 | 2 | #!/bin/bash |
3 | | -java -cp LuceneSearch.jar org.wikimedia.lsearch.util.Configure $@ |
| 3 | +dir=`cd $1; pwd` |
| 4 | + |
| 5 | +java -cp LuceneSearch.jar org.wikimedia.lsearch.util.Configure $dir |
Index: trunk/lucene-search-2/RELEASE-NOTES.txt |
— | — | @@ -1,3 +1,8 @@ |
| 2 | +Lucene Search 2.1.3 |
| 3 | +=================== |
| 4 | + |
| 5 | +* Fix normalization of fullwidth numbers. |
| 6 | + |
2 | 7 | Lucene Search 2.0.2 |
3 | 8 | ==================== |
4 | 9 | |
Index: trunk/lucene-search-2/src/org/wikimedia/lsearch/util/PHPParser.java |
— | — | @@ -432,7 +432,7 @@ |
433 | 433 | } |
434 | 434 | try{ |
435 | 435 | PHPParser p = new PHPParser(); |
436 | | - String initset = p.readURL(new URL("file:///home/wikipedia/common/php-1.5/InitialiseSettings.php")); |
| 436 | + String initset = p.readURL(new URL("InitialiseSettings.php")); |
437 | 437 | System.out.println(p.getLanguages(initset)); |
438 | 438 | System.out.println("wgServer: " + p.getServer(initset)); |
439 | 439 | System.out.println(p.getDefaultSearch(initset)); |
Index: trunk/lucene-search-2/build.properties |
— | — | @@ -1,4 +1,4 @@ |
2 | | -version=2.1 |
| 2 | +version=2.1.3 |
3 | 3 | pack.name=lucene-search |
4 | 4 | pack.src.name=lucene-search-src |
5 | 5 | binary.name=lucene-search-bin |
Index: trunk/lucene-search-2/build.xml |
— | — | @@ -71,52 +71,49 @@ |
72 | 72 | |
73 | 73 | <target name="build" description="Compile classes"> |
74 | 74 | <mkdir dir="${dest.dir}"/> |
75 | | - <javac srcdir="${src.dir}/org/" debug="on" encoding="UTF-8" includes="**/*.java" destdir="${dest.dir}/"> |
| 75 | + <javac srcdir="${src.dir}/org/" debug="on" encoding="UTF-8" includes="**/*.java" destdir="${dest.dir}/"> |
76 | 76 | <classpath refid="classpath"/> |
77 | 77 | </javac> |
78 | 78 | </target> |
79 | 79 | |
80 | | - <target name="pack" description="Make tar.gz distribution"> |
| 80 | + <target name="clean-dist" description="Clean up the dist dir"> |
81 | 81 | <mkdir dir="${dist.dir}"/> |
82 | | - <delete file="${dist.dir}/${pack.name}.tar"/> |
83 | | - <delete file="${dist.dir}/${pack.name}.tar.gz"/> |
84 | | - <tar tarfile="${dist.dir}/${pack.name}.tar"> |
85 | | - <tarfileset prefix="${pack.name}" dir="." includes="${include}"/> |
| 82 | + <delete file="${dist.dir}/*.tar"/> |
| 83 | + <delete file="${dist.dir}/*.tar.gz"/> |
| 84 | + </target> |
| 85 | + |
| 86 | + <target name="pack" depends="clean-dist" description="Make tar.gz distribution"> |
| 87 | + <tar tarfile="${dist.dir}/${pack.name}-${version}.tar"> |
| 88 | + <tarfileset prefix="${pack.name}-${version}" dir="." includes="${include}"/> |
86 | 89 | </tar> |
87 | | - |
88 | | - <gzip zipfile="${dist.dir}/${pack.name}.tar.gz" src="${dist.dir}/${pack.name}.tar"/> |
89 | | - <delete file="${dist.dir}/${pack.name}.tar"/> |
| 90 | + <gzip zipfile="${dist.dir}/${pack.name}-${version}.tar.gz" src="${dist.dir}/${pack.name}-${version}.tar"/> |
| 91 | + <delete file="${dist.dir}/${pack.name}-${version}.tar"/> |
90 | 92 | </target> |
91 | 93 | |
92 | | - <target name="pack-src" depends="alljar" description="Make tar.gz distribution of only core source files"> |
93 | | - <mkdir dir="${dist.dir}"/> |
94 | | - <delete file="${dist.dir}/${src.name}.tar"/> |
95 | | - <delete file="${dist.dir}/${src.name}.tar.gz"/> |
96 | | - <tar tarfile="${dist.dir}/${src.name}.tar"> |
97 | | - <tarfileset prefix="${pack.name}" dir="." includes="${include.src}"/> |
| 94 | + <target name="pack-src" depends="alljar, clean-dist" description="Make tar.gz distribution of only core source files"> |
| 95 | + <tar tarfile="${dist.dir}/${src.name}-${version}.tar"> |
| 96 | + <tarfileset prefix="${pack.name}-${version}" dir="." includes="${include.src}"/> |
98 | 97 | </tar> |
99 | 98 | |
100 | | - <gzip zipfile="${dist.dir}/${src.name}.tar.gz" src="${dist.dir}/${src.name}.tar"/> |
101 | | - <delete file="${dist.dir}/${src.name}.tar"/> |
| 99 | + <gzip zipfile="${dist.dir}/${src.name}-${version}.tar.gz" src="${dist.dir}/${src.name}-${version}.tar"/> |
| 100 | + <delete file="${dist.dir}/${src.name}-${version}.tar"/> |
102 | 101 | </target> |
103 | 102 | |
104 | 103 | |
105 | | - <target name="binary" depends="alljar" description="Make binary tar.gz distribution"> |
106 | | - <mkdir dir="${dest.dir}"/> |
107 | | - <delete file="${dist.dir}/${binary.name}.tar"/> |
108 | | - <delete file="${dist.dir}/${binary.name}.tar.gz"/> |
109 | | - <tar tarfile="${dist.dir}/${binary.name}.tar"> |
110 | | - <tarfileset prefix="${pack.name}" dir="." includes="${jar.name} ${include.bin}" |
| 104 | + <target name="binary" depends="alljar, clean-dist" description="Make binary tar.gz distribution"> |
| 105 | + <tar tarfile="${dist.dir}/${binary.name}-${version}.tar"> |
| 106 | + <tarfileset prefix="${pack.name}-${version}" dir="." includes="${jar.name} ${include.bin}" |
111 | 107 | excludes="template/backup/**"/> |
112 | | - <tarfileset prefix="${pack.name}" mode="755" dir="." includes="${include.sh}"/> |
| 108 | + <tarfileset prefix="${pack.name}-${version}" mode="755" dir="." includes="${include.sh}"/> |
113 | 109 | </tar> |
114 | 110 | |
115 | | - <gzip zipfile="${dist.dir}/${binary.name}.tar.gz" src="${dist.dir}/${binary.name}.tar"/> |
116 | | - <delete file="${dist.dir}/${binary.name}.tar"/> |
| 111 | + <gzip zipfile="${dist.dir}/${binary.name}-${version}.tar.gz" src="${dist.dir}/${binary.name}-${version}.tar"/> |
| 112 | + <delete file="${dist.dir}/${binary.name}-${version}.tar"/> |
117 | 113 | </target> |
118 | 114 | |
119 | | - <target name="test-build" description="Compile the junit tests"> |
120 | | - <javac srcdir="${test.dir}" destdir="${test.dir}"> |
| 115 | + <target name="test-build" depends="alljar" description="Compile the junit tests"> |
| 116 | + <mkdir dir="${junit.class.dir}"/> |
| 117 | + <javac srcdir="${test.dir}" destdir="${junit.class.dir}"> |
121 | 118 | <classpath> |
122 | 119 | <pathelement path="${java.class.path}"/> |
123 | 120 | <pathelement location="${collector.dir}"/> |
— | — | @@ -127,12 +124,12 @@ |
128 | 125 | |
129 | 126 | <target name="test" depends="test-build"> |
130 | 127 | <property name="collector.dir" value="${test.dir}"/> |
131 | | - <mkdir dir="${junit.class.dir}"/> |
132 | 128 | |
133 | 129 | <junit haltonerror="false" haltonfailure="false" printsummary="yes" |
134 | 130 | errorProperty="tests.failed" failureproperty="tests.failed"> |
135 | 131 | <classpath> |
136 | 132 | <pathelement path="${java.class.path}"/> |
| 133 | + <pathelement location="${junit.class.dir}"/> |
137 | 134 | <pathelement location="${collector.dir}"/> |
138 | 135 | <pathelement path="${jar.name}"/> |
139 | 136 | </classpath> |
— | — | @@ -142,14 +139,20 @@ |
143 | 140 | <formatter type="plain" usefile="false"/> |
144 | 141 | </batchtest> |
145 | 142 | </junit> |
| 143 | + <fail if="tests.failed" message="Test(s) failed."/> |
146 | 144 | </target> |
147 | | - <fail if="tests.failed" message="Test(s) failed."/> |
148 | 145 | |
149 | 146 | <target name="clean" |
150 | 147 | description="Destroys all generated files and dirs."> |
151 | 148 | <delete dir="${dest.dir}"/> |
152 | 149 | <delete dir="${dist.dir}"/> |
153 | 150 | <delete dir="${junit.class.dir}"/> |
| 151 | + <delete dir="dumps"/> |
| 152 | + <delete dir="indexes"/> |
154 | 153 | <delete file="${basedir}/${jar.name}"/> |
| 154 | + <delete file="lsearch.log4j"/> |
| 155 | + <delete file="lsearch-global.conf"/> |
| 156 | + <delete file="lsearch.conf"/> |
| 157 | + <delete file="config.inc"/> |
155 | 158 | </target> |
156 | 159 | </project> |
Index: trunk/lucene-search-2/README.txt |
— | — | @@ -4,14 +4,14 @@ |
5 | 5 | == Requirements == |
6 | 6 | |
7 | 7 | - Java 5 + |
8 | | - - MediaWiki 1.13 with MWSearch extension |
| 8 | + - MediaWiki 1.13 with MWSearch extension |
9 | 9 | - Apache Ant 1.6 (for building from source) |
10 | 10 | |
11 | 11 | == Installation == |
12 | 12 | |
13 | 13 | A single-host, single-wiki configuration can be generated as follows. |
14 | 14 | |
15 | | -First make sure LuceneSearch.jar is present. If building from sources, |
| 15 | +First make sure LuceneSearch.jar is present. If building from sources, |
16 | 16 | run ant to make it: |
17 | 17 | |
18 | 18 | ant |
— | — | @@ -28,16 +28,16 @@ |
29 | 29 | |
30 | 30 | This will build search, highlight and spellcheck indexes from xml |
31 | 31 | database dump. For small wikis, just put this script into daily |
32 | | -cron and installation is done. |
| 32 | +cron and installation is done. |
33 | 33 | |
34 | | -For larger wikis, install OAIRepository MediaWiki extension and |
| 34 | +For larger wikis, install OAIRepository MediaWiki extension and |
35 | 35 | after building the initial index use incremental updater: |
36 | 36 | |
37 | 37 | ./update |
38 | 38 | |
39 | 39 | This will fetch latest updates from your wiki, and update various |
40 | | -indexes with search, page links and spell check data. Put this into |
41 | | -daily cron to keep the indexes up-to-date. |
| 40 | +indexes with search, page links and spell check data. Put this into |
| 41 | +daily cron to keep the indexes up-to-date. |
42 | 42 | |
43 | 43 | == Running == |
44 | 44 | |
— | — | @@ -45,9 +45,9 @@ |
46 | 46 | |
47 | 47 | ./lsearchd |
48 | 48 | |
49 | | -The deamon will listen on port 8123 for incoming search requests |
| 49 | +The deamon will listen on port 8123 for incoming search requests |
50 | 50 | from MediaWiki, and on port 8321 for incoming incremental updates |
51 | | -for the index. |
| 51 | +for the index. |
52 | 52 | |
53 | 53 | == Further notes == |
54 | 54 | |