Index: trunk/lucene-search-2.0/src/org/wikimedia/lsearch/test/WikiQueryParserTest.java |
— | — | @@ -94,11 +94,11 @@ |
95 | 95 | assertTrue(fields.contains("contents")); |
96 | 96 | |
97 | 97 | // namespace policies |
98 | | - parser = new WikiQueryParser("contents","main",new SimpleAnalyzer(), WikiQueryParser.NamespacePolicy.IGNORE); |
| 98 | + parser = new WikiQueryParser("contents","0",new SimpleAnalyzer(), WikiQueryParser.NamespacePolicy.IGNORE); |
99 | 99 | q = parser.parseRaw("help:making breakfast category:food"); |
100 | 100 | assertEquals("+contents:making +contents:breakfast +category:food",q.toString()); |
101 | 101 | |
102 | | - parser = new WikiQueryParser("contents","main",new SimpleAnalyzer(), WikiQueryParser.NamespacePolicy.REWRITE); |
| 102 | + parser = new WikiQueryParser("contents","0",new SimpleAnalyzer(), WikiQueryParser.NamespacePolicy.REWRITE); |
103 | 103 | q = parser.parseRaw("help:making breakfast category:food"); |
104 | 104 | assertEquals("+namespace:12 +(+contents:making +contents:breakfast +category:food)",q.toString()); |
105 | 105 | |
— | — | @@ -120,7 +120,7 @@ |
121 | 121 | |
122 | 122 | // ====== English Analyzer ======== |
123 | 123 | |
124 | | - parser = new WikiQueryParser("contents","main",new EnglishAnalyzer(), WikiQueryParser.NamespacePolicy.REWRITE); |
| 124 | + parser = new WikiQueryParser("contents","0",new EnglishAnalyzer(), WikiQueryParser.NamespacePolicy.REWRITE); |
125 | 125 | q = parser.parseRaw("main_talk:laziness"); |
126 | 126 | assertEquals("+namespace:1 +(contents:laziness contents:lazi^0.5)",q.toString()); |
127 | 127 | |
— | — | @@ -184,7 +184,7 @@ |
185 | 185 | // Tests with actual params :) |
186 | 186 | // ================================== |
187 | 187 | Analyzer analyzer = Analyzers.getSearcherAnalyzer("en"); |
188 | | - parser = new WikiQueryParser("contents","main",analyzer,NamespacePolicy.LEAVE); |
| 188 | + parser = new WikiQueryParser("contents","0",analyzer,NamespacePolicy.LEAVE); |
189 | 189 | q = parser.parseTwoPass("beans everyone",null); |
190 | 190 | assertEquals("(+(contents:beans contents:bean^0.5) +(contents:everyone contents:everyon^0.5)) (+title:beans^2.0 +title:everyone^2.0)",q.toString()); |
191 | 191 | |
— | — | @@ -233,9 +233,21 @@ |
234 | 234 | q = parser.parseTwoPass("main:1991 category:\"olympic cities\" -all:1990",NamespacePolicy.REWRITE); |
235 | 235 | assertEquals("(+(+namespace:0 +(+contents:1991 +category:\"olympic cities\")) -contents:1990) (+(+namespace:0 +(+title:1991^2.0 +category:\"olympic cities\")) -title:1990^2.0)",q.toString()); |
236 | 236 | |
| 237 | + q = parser.parseTwoPass("main:ba*",NamespacePolicy.IGNORE); |
| 238 | + assertEquals("contents:ba* title:ba*^2.0",q.toString()); |
| 239 | + |
| 240 | + q = parser.parseTwoPass("main:ba* all:lele",NamespacePolicy.REWRITE); |
| 241 | + assertEquals("(+(+namespace:0 +contents:ba*) +contents:lele) (+(+namespace:0 +title:ba*^2.0) +title:lele^2.0)",q.toString()); |
| 242 | + |
| 243 | + q = parser.parseTwoPass("main:ba*beans",NamespacePolicy.IGNORE); |
| 244 | + assertEquals("(+contents:ba +(contents:beans contents:bean^0.5)) (+title:ba^2.0 +title:beans^2.0)",q.toString()); |
| 245 | + |
| 246 | + q = parser.parseTwoPass("*kuta",NamespacePolicy.IGNORE); |
| 247 | + assertEquals("contents:kuta title:kuta^2.0",q.toString()); |
| 248 | + |
237 | 249 | // Localization tests |
238 | 250 | analyzer = Analyzers.getSearcherAnalyzer("sr"); |
239 | | - parser = new WikiQueryParser("contents","main",analyzer,NamespacePolicy.LEAVE); |
| 251 | + parser = new WikiQueryParser("contents","0",analyzer,NamespacePolicy.LEAVE); |
240 | 252 | |
241 | 253 | q = parser.parseTwoPass("all:добродошли на википедију",NamespacePolicy.IGNORE); |
242 | 254 | assertEquals("(+(contents:добродошли contents:dobrodosli^0.5) +(contents:на contents:na^0.5) +(contents:википедију contents:vikipediju^0.5)) (+(title:добродошли^2.0 title:dobrodosli) +(title:на^2.0 title:na) +(title:википедију^2.0 title:vikipediju))",q.toString()); |
— | — | @@ -244,7 +256,7 @@ |
245 | 257 | assertEquals("(+contents:dobrodosli +contents:na +contents:sdjccz) (+title:dobrodosli^2.0 +title:na^2.0 +title:sdjccz^2.0)",q.toString()); |
246 | 258 | |
247 | 259 | analyzer = Analyzers.getSearcherAnalyzer("th"); |
248 | | - parser = new WikiQueryParser("contents","main",analyzer,NamespacePolicy.LEAVE); |
| 260 | + parser = new WikiQueryParser("contents","0",analyzer,NamespacePolicy.LEAVE); |
249 | 261 | |
250 | 262 | q = parser.parseTwoPass("ภาษาไทย",NamespacePolicy.IGNORE); |
251 | 263 | assertEquals("(+contents:ภาษา +contents:ไทย) (+title:ภาษา^2.0 +title:ไทย^2.0)",q.toString()); |
— | — | @@ -252,6 +264,19 @@ |
253 | 265 | q = parser.parseTwoPass("help:ภาษาไทย",NamespacePolicy.REWRITE); |
254 | 266 | assertEquals("(+namespace:12 +(+contents:ภาษา +contents:ไทย)) (+namespace:12 +(+title:ภาษา^2.0 +title:ไทย^2.0))",q.toString()); |
255 | 267 | |
| 268 | + // Backward compatiblity for complex filters |
| 269 | + analyzer = Analyzers.getSearcherAnalyzer("en"); |
| 270 | + parser = new WikiQueryParser("contents","0,1,4,12",analyzer,NamespacePolicy.IGNORE); |
| 271 | + |
| 272 | + q = parser.parseTwoPass("beans everyone",NamespacePolicy.REWRITE); |
| 273 | + assertEquals("(+(namespace:0 namespace:1 namespace:4 namespace:12) +(+(contents:beans contents:bean^0.5) +(contents:everyone contents:everyon^0.5))) (+(namespace:0 namespace:1 namespace:4 namespace:12) +(+title:beans^2.0 +title:everyone^2.0))",q.toString()); |
| 274 | + |
| 275 | + q = parser.parseTwoPass("beans main:everyone",NamespacePolicy.REWRITE); |
| 276 | + assertEquals("((+(namespace:0 namespace:1 namespace:4 namespace:12) +(contents:beans contents:bean^0.5)) (+namespace:0 +(contents:everyone contents:everyon^0.5))) ((+(namespace:0 namespace:1 namespace:4 namespace:12) +title:beans^2.0) (+namespace:0 +title:everyone^2.0))",q.toString()); |
| 277 | + |
| 278 | + q = parser.parseTwoPass("beans everyone category:cheeses",NamespacePolicy.REWRITE); |
| 279 | + assertEquals("(+(namespace:0 namespace:1 namespace:4 namespace:12) +(+(contents:beans contents:bean^0.5) +(contents:everyone contents:everyon^0.5) +category:cheeses)) (+(namespace:0 namespace:1 namespace:4 namespace:12) +(+title:beans^2.0 +title:everyone^2.0 +category:cheeses))",q.toString()); |
| 280 | + |
256 | 281 | } catch(Exception e){ |
257 | 282 | e.printStackTrace(); |
258 | 283 | } |
Index: trunk/lucene-search-2.0/src/org/wikimedia/lsearch/importer/Importer.java |
— | — | @@ -9,6 +9,8 @@ |
10 | 10 | import org.mediawiki.importer.XmlDumpReader; |
11 | 11 | import org.wikimedia.lsearch.config.Configuration; |
12 | 12 | import org.wikimedia.lsearch.config.GlobalConfiguration; |
| 13 | +import org.wikimedia.lsearch.config.IndexId; |
| 14 | +import org.wikimedia.lsearch.index.IndexThread; |
13 | 15 | import org.wikimedia.lsearch.util.Localization; |
14 | 16 | import org.wikimedia.lsearch.util.UnicodeDecomposer; |
15 | 17 | |
— | — | @@ -30,7 +32,8 @@ |
31 | 33 | String dbname = null; |
32 | 34 | Boolean optimize = null; |
33 | 35 | Integer mergeFactor = null, maxBufDocs = null; |
34 | | - boolean newIndex = false; |
| 36 | + boolean newIndex = false, makeSnapshot = false; |
| 37 | + boolean snapshotDb = false; |
35 | 38 | |
36 | 39 | System.out.println("MediaWiki Lucene search indexer - index builder from xml database dumps.\n"); |
37 | 40 | |
— | — | @@ -38,13 +41,15 @@ |
39 | 42 | Logger log = Logger.getLogger(Importer.class); |
40 | 43 | |
41 | 44 | if(args.length < 2){ |
42 | | - System.out.println("Syntax: java Importer [-n] [-l limit] [-o optimize] [-m mergeFactor] [-b maxBufDocs] <inputfile> <dbname>"); |
| 45 | + System.out.println("Syntax: java Importer [-n] [-s] [-l limit] [-o optimize] [-m mergeFactor] [-b maxBufDocs] <inputfile> <dbname>"); |
43 | 46 | System.out.println("Options: "); |
44 | 47 | System.out.println(" -n - create a new index (erase the old one if exists)"); |
| 48 | + System.out.println(" -s - make index snapshot when finished"); |
45 | 49 | System.out.println(" -l limit_num - add at most limit_num articles"); |
46 | 50 | System.out.println(" -o optimize - true/false overrides optimization param from global settings"); |
47 | 51 | System.out.println(" -m mergeFactor - overrides param from global settings"); |
48 | 52 | System.out.println(" -b maxBufDocs - overrides param from global settings"); |
| 53 | + System.out.println(" --snapshot <db> - make snapshot only for dbname"); |
49 | 54 | return; |
50 | 55 | } |
51 | 56 | for(int i=0;i<args.length;i++){ |
— | — | @@ -58,52 +63,78 @@ |
59 | 64 | maxBufDocs = Integer.parseInt(args[++i]); |
60 | 65 | else if(args[i].equals("-n")) |
61 | 66 | newIndex = true; |
62 | | - else if(inputfile == null) |
| 67 | + else if(args[i].equals("-s")) |
| 68 | + makeSnapshot = true; |
| 69 | + else if(args[i].equals("--snapshot")){ |
| 70 | + dbname = args[++i]; |
| 71 | + snapshotDb = true; |
| 72 | + break; |
| 73 | + } else if(inputfile == null) |
63 | 74 | inputfile = args[i]; |
64 | 75 | else if(dbname == null) |
65 | 76 | dbname = args[i]; |
66 | 77 | else |
67 | 78 | System.out.println("Unrecognized option: "+args[i]); |
68 | 79 | } |
69 | | - |
70 | | - if(inputfile == null || dbname == null){ |
71 | | - System.out.println("Please specify both input xml file and database name"); |
72 | | - return; |
73 | | - } |
| 80 | + if(!snapshotDb){ |
| 81 | + if(inputfile == null || dbname == null){ |
| 82 | + System.out.println("Please specify both input xml file and database name"); |
| 83 | + return; |
| 84 | + } |
74 | 85 | |
75 | | - // preload |
76 | | - UnicodeDecomposer.getInstance(); |
77 | | - Localization.readLocalization(GlobalConfiguration.getInstance().getLanguage(dbname)); |
78 | | - Localization.loadInterwiki(); |
79 | | - |
80 | | - long start = System.currentTimeMillis(); |
81 | | - |
82 | | - // open |
83 | | - InputStream input = null; |
84 | | - try { |
85 | | - input = Tools.openInputFile(inputfile); |
86 | | - } catch (IOException e) { |
87 | | - log.fatal("I/O error opening "+inputfile); |
| 86 | + // preload |
| 87 | + UnicodeDecomposer.getInstance(); |
| 88 | + Localization.readLocalization(GlobalConfiguration.getInstance().getLanguage(dbname)); |
| 89 | + Localization.loadInterwiki(); |
| 90 | + |
| 91 | + long start = System.currentTimeMillis(); |
| 92 | + |
| 93 | + // open |
| 94 | + InputStream input = null; |
| 95 | + try { |
| 96 | + input = Tools.openInputFile(inputfile); |
| 97 | + } catch (IOException e) { |
| 98 | + log.fatal("I/O error opening "+inputfile); |
| 99 | + return; |
| 100 | + } |
| 101 | + |
| 102 | + // read |
| 103 | + DumpImporter dp = new DumpImporter(dbname,limit,optimize,mergeFactor,maxBufDocs,newIndex); |
| 104 | + XmlDumpReader reader = new XmlDumpReader(input,new ProgressFilter(dp, 100)); |
| 105 | + try { |
| 106 | + reader.readDump(); |
| 107 | + } catch (IOException e) { |
| 108 | + if(!e.getMessage().equals("stopped")){ |
| 109 | + log.fatal("I/O error reading dump for "+dbname+" from "+inputfile); |
| 110 | + return; |
| 111 | + } |
| 112 | + } |
| 113 | + |
| 114 | + long end = System.currentTimeMillis(); |
| 115 | + |
| 116 | + log.info("Closing/optimizing index..."); |
| 117 | + dp.closeIndex(); |
| 118 | + |
| 119 | + long finalEnd = System.currentTimeMillis(); |
| 120 | + |
| 121 | + System.out.println("Finished indexing in "+formatTime(end-start)+", with final index optimization in "+formatTime(finalEnd-end)); |
| 122 | + System.out.println("Total time: "+formatTime(finalEnd-start)); |
88 | 123 | } |
89 | 124 | |
90 | | - // read |
91 | | - DumpImporter dp = new DumpImporter(dbname,limit,optimize,mergeFactor,maxBufDocs,newIndex); |
92 | | - XmlDumpReader reader = new XmlDumpReader(input,new ProgressFilter(dp, 100)); |
93 | | - try { |
94 | | - reader.readDump(); |
95 | | - } catch (IOException e) { |
96 | | - log.warn("I/O error reading dump for "+dbname+" from "+inputfile); |
97 | | - } |
98 | | - |
99 | | - long end = System.currentTimeMillis(); |
100 | | - |
101 | | - log.info("Closing/optimizing index..."); |
102 | | - dp.closeIndex(); |
103 | | - |
104 | | - long finalEnd = System.currentTimeMillis(); |
105 | | - |
106 | | - System.out.println("Finished indexing in "+formatTime(end-start)+", with final index optimization in "+formatTime(finalEnd-end)); |
107 | | - System.out.println("Total time: "+formatTime(finalEnd-start)); |
| 125 | + // make snapshot if needed |
| 126 | + if(makeSnapshot || snapshotDb){ |
| 127 | + IndexId iid = IndexId.get(dbname); |
| 128 | + if(iid.isMainsplit()){ |
| 129 | + IndexThread.makeIndexSnapshot(iid.getMainPart(),iid.getMainPart().getImportPath()); |
| 130 | + IndexThread.makeIndexSnapshot(iid.getRestPart(),iid.getRestPart().getImportPath()); |
| 131 | + } else if(iid.isSplit()){ |
| 132 | + for(String part : iid.getSplitParts()){ |
| 133 | + IndexId iidp = IndexId.get(part); |
| 134 | + IndexThread.makeIndexSnapshot(iidp,iidp.getImportPath()); |
| 135 | + } |
| 136 | + } else |
| 137 | + IndexThread.makeIndexSnapshot(iid,iid.getImportPath()); |
| 138 | + } |
108 | 139 | } |
109 | 140 | |
110 | 141 | private static String formatTime(long l) { |
Index: trunk/lucene-search-2.0/src/org/wikimedia/lsearch/search/SearcherCache.java |
— | — | @@ -165,7 +165,7 @@ |
166 | 166 | for(IndexId iid : mys){ |
167 | 167 | try { |
168 | 168 | IndexSearcherMul is = getLocalSearcher(iid); |
169 | | - Warmup.warmupIndexSearcher(is,iid); |
| 169 | + Warmup.warmupIndexSearcher(is,iid,false); |
170 | 170 | } catch (IOException e) { |
171 | 171 | log.warn("I/O error warming index for "+iid); |
172 | 172 | } |
Index: trunk/lucene-search-2.0/src/org/wikimedia/lsearch/search/NamespaceFilter.java |
— | — | @@ -76,16 +76,16 @@ |
77 | 77 | return included.get(namespace); |
78 | 78 | } |
79 | 79 | |
| 80 | + public BitSet getIncluded() { |
| 81 | + return included; |
| 82 | + } |
| 83 | + |
80 | 84 | public int cardinality(){ |
81 | 85 | return included.cardinality(); |
82 | 86 | } |
83 | 87 | |
84 | 88 | public int getNamespace(){ |
85 | | - for(int i=0;i<included.size();i++){ |
86 | | - if(included.get(i)) |
87 | | - return i; |
88 | | - } |
89 | | - return Integer.MIN_VALUE; |
| 89 | + return included.nextSetBit(0); |
90 | 90 | } |
91 | 91 | |
92 | 92 | @Override |
Index: trunk/lucene-search-2.0/src/org/wikimedia/lsearch/search/UpdateThread.java |
— | — | @@ -58,6 +58,7 @@ |
59 | 59 | for(LocalIndex li : forUpdate){ |
60 | 60 | log.debug("Syncing "+li.iid); |
61 | 61 | rebuild(li); // rsync, update registry, cache |
| 62 | + pending.remove(li.iid.toString()); |
62 | 63 | } |
63 | 64 | } |
64 | 65 | } |
— | — | @@ -69,6 +70,8 @@ |
70 | 71 | protected long queryInterval; |
71 | 72 | protected SearcherCache cache; |
72 | 73 | protected long delayInterval; |
| 74 | + /** Pending updates, dbrole -> timestamp */ |
| 75 | + protected Hashtable<String,Long> pending = new Hashtable<String,Long>(); |
73 | 76 | |
74 | 77 | protected static UpdateThread instance = null; |
75 | 78 | |
— | — | @@ -115,6 +118,8 @@ |
116 | 119 | |
117 | 120 | for(int i = 0; i < hiids.size(); i++){ |
118 | 121 | IndexId iid = hiids.get(i); |
| 122 | + if(pending.containsKey(iid.toString())) |
| 123 | + continue; // pending update, ignore |
119 | 124 | LocalIndex myli = registry.getCurrentSearch(iid); |
120 | 125 | if(timestamps[i]!= 0 && (myli == null || myli.timestamp < timestamps[i])){ |
121 | 126 | LocalIndex li = new LocalIndex( |
— | — | @@ -122,10 +127,12 @@ |
123 | 128 | iid.getUpdatePath(), |
124 | 129 | timestamps[i]); |
125 | 130 | forUpdate.add(li); // newer snapshot available |
| 131 | + pending.put(iid.toString(),new Long(timestamps[i])); |
126 | 132 | } |
127 | 133 | } |
128 | 134 | } |
129 | | - new DeferredUpdate(forUpdate,delayInterval); |
| 135 | + if(forUpdate.size()>0) |
| 136 | + new DeferredUpdate(forUpdate,delayInterval).start(); |
130 | 137 | } |
131 | 138 | |
132 | 139 | /** Rsync a remote snapshot to a local one, updates registry, cache */ |
— | — | @@ -165,19 +172,23 @@ |
166 | 173 | File ind = new File(iid.getCanonicalSearchPath()); |
167 | 174 | |
168 | 175 | if(ind.exists()){ // prepare a local hard-linked copy of index |
169 | | - try { |
170 | | - // cp -lr update/dbname/timestamp/* update/dbname/timestamp2/ |
171 | | - command = "/bin/cp -lr "+ind.getCanonicalPath()+sep+"*"+" "+updatepath+sep; |
172 | | - log.debug("Running shell command: "+command); |
173 | | - Runtime.getRuntime().exec(command).waitFor(); |
174 | | - } catch (Exception e) { |
175 | | - log.error("Error making update hardlinked copy "+updatepath+": "+e.getMessage()); |
| 176 | + ind = ind.getCanonicalFile(); |
| 177 | + for(File f: ind.listFiles()){ |
| 178 | + // a cp -lr command for each file in the index |
| 179 | + command = "/bin/cp -lr "+ind.getCanonicalPath()+sep+f.getName()+" "+updatepath+sep+f.getName(); |
| 180 | + try { |
| 181 | + log.debug("Running shell command: "+command); |
| 182 | + Runtime.getRuntime().exec(command).waitFor(); |
| 183 | + } catch (Exception e) { |
| 184 | + log.error("Error making update hardlinked copy "+updatepath+": "+e.getMessage()); |
| 185 | + continue; |
| 186 | + } |
176 | 187 | } |
177 | 188 | } |
178 | 189 | |
179 | 190 | // rsync |
180 | 191 | String snapshotpath = iid.getRsyncSnapshotPath()+"/"+li.timestamp; |
181 | | - command = "/usr/bin/rsync --delete -r rsync://"+iid.getIndexHost()+":"+snapshotpath+" "+iid.getUpdatePath(); |
| 192 | + command = "/usr/bin/rsync -W --delete -r rsync://"+iid.getIndexHost()+":"+snapshotpath+" "+iid.getUpdatePath(); |
182 | 193 | log.debug("Running shell command: "+command); |
183 | 194 | Runtime.getRuntime().exec(command).waitFor(); |
184 | 195 | |
— | — | @@ -218,7 +229,7 @@ |
219 | 230 | /** Update search cache after successful rsync of update version of index */ |
220 | 231 | protected void updateCache(IndexSearcherMul is, LocalIndex li){ |
221 | 232 | // do some typical queries to preload some lucene caches, pages into memory, etc.. |
222 | | - Warmup.warmupIndexSearcher(is,li.iid); |
| 233 | + Warmup.warmupIndexSearcher(is,li.iid,true); |
223 | 234 | // add to cache |
224 | 235 | cache.invalidateLocalSearcher(li.iid,is); |
225 | 236 | } |
Index: trunk/lucene-search-2.0/src/org/wikimedia/lsearch/search/SearchEngine.java |
— | — | @@ -107,7 +107,9 @@ |
108 | 108 | */ |
109 | 109 | public SearchResults search(IndexId iid, String searchterm, int offset, int limit, NamespaceFilter nsDefault){ |
110 | 110 | Analyzer analyzer = Analyzers.getSearcherAnalyzer(iid); |
111 | | - WikiQueryParser parser = new WikiQueryParser("contents","main",analyzer,WikiQueryParser.NamespacePolicy.IGNORE); |
| 111 | + if(nsDefault == null || nsDefault.cardinality() == 0) |
| 112 | + nsDefault = new NamespaceFilter("0"); // default to main namespace |
| 113 | + WikiQueryParser parser = new WikiQueryParser("contents",nsDefault,analyzer,WikiQueryParser.NamespacePolicy.IGNORE); |
112 | 114 | HashSet<Integer> fields = parser.getFieldNamespaces(searchterm); |
113 | 115 | NamespaceFilterWrapper nsfw = null; |
114 | 116 | Query q = null; |
— | — | @@ -122,8 +124,6 @@ |
123 | 125 | } |
124 | 126 | else if(fields.size()==0 && nsDefault!=null && nsDefault.cardinality()==1) |
125 | 127 | nsfw = new NamespaceFilterWrapper(nsDefault); |
126 | | - else if(fields.size()==0) // default: search main namespace |
127 | | - nsfw = new NamespaceFilterWrapper(new NamespaceFilter("0")); |
128 | 128 | |
129 | 129 | try { |
130 | 130 | if(nsfw == null){ |
Index: trunk/lucene-search-2.0/src/org/wikimedia/lsearch/search/Warmup.java |
— | — | @@ -28,7 +28,7 @@ |
29 | 29 | protected static Hashtable<String,Terms> langTerms = new Hashtable<String,Terms>(); |
30 | 30 | |
31 | 31 | /** Runs some typical queries on a local index searcher to preload caches, pages into memory, etc .. */ |
32 | | - public static void warmupIndexSearcher(IndexSearcherMul is, IndexId iid){ |
| 32 | + public static void warmupIndexSearcher(IndexSearcherMul is, IndexId iid, boolean useDelay){ |
33 | 33 | log.info("Warming up index "+iid+" ..."); |
34 | 34 | long start = System.currentTimeMillis(); |
35 | 35 | |
— | — | @@ -50,15 +50,15 @@ |
51 | 51 | return; |
52 | 52 | } |
53 | 53 | makeNamespaceFilters(is,iid); |
54 | | - warmupSearchTerms(is,iid,count); |
| 54 | + warmupSearchTerms(is,iid,count,useDelay); |
55 | 55 | long delta = System.currentTimeMillis() - start; |
56 | 56 | log.info("Warmed up "+iid+" in "+delta+" ms"); |
57 | 57 | } |
58 | 58 | } |
59 | 59 | |
60 | 60 | /** Warmup index using some number of simple searches */ |
61 | | - protected static void warmupSearchTerms(IndexSearcherMul is, IndexId iid, int count) { |
62 | | - WikiQueryParser parser = new WikiQueryParser("contents","main",Analyzers.getSearcherAnalyzer(iid),WikiQueryParser.NamespacePolicy.IGNORE); |
| 61 | + protected static void warmupSearchTerms(IndexSearcherMul is, IndexId iid, int count, boolean useDelay) { |
| 62 | + WikiQueryParser parser = new WikiQueryParser("contents","0",Analyzers.getSearcherAnalyzer(iid),WikiQueryParser.NamespacePolicy.IGNORE); |
63 | 63 | Terms terms = getTermsForLang(global.getLanguage(iid.getDBname())); |
64 | 64 | |
65 | 65 | try{ |
— | — | @@ -67,11 +67,18 @@ |
68 | 68 | Hits hits = is.search(q); |
69 | 69 | for(int j =0; j<20 && j<hits.length(); j++) |
70 | 70 | hits.doc(j); // retrieve some documents |
| 71 | + if(useDelay){ |
| 72 | + if(i<1000) |
| 73 | + Thread.sleep(100); |
| 74 | + else |
| 75 | + Thread.sleep(50); |
| 76 | + } |
71 | 77 | } |
72 | 78 | } catch (IOException e) { |
73 | 79 | log.error("Error warming up local IndexSearcherMul for "+iid); |
74 | 80 | } catch (ParseException e) { |
75 | 81 | log.error("Error parsing query in warmup of IndexSearcherMul for "+iid); |
| 82 | + } catch (InterruptedException e) { |
76 | 83 | } |
77 | 84 | } |
78 | 85 | |
— | — | @@ -101,7 +108,7 @@ |
102 | 109 | /** Just run one complex query and rebuild the main namespace filter */ |
103 | 110 | public static void simpleWarmup(IndexSearcherMul is, IndexId iid){ |
104 | 111 | try{ |
105 | | - WikiQueryParser parser = new WikiQueryParser("contents","main",Analyzers.getSearcherAnalyzer(iid),WikiQueryParser.NamespacePolicy.IGNORE); |
| 112 | + WikiQueryParser parser = new WikiQueryParser("contents","0",Analyzers.getSearcherAnalyzer(iid),WikiQueryParser.NamespacePolicy.IGNORE); |
106 | 113 | Query q = parser.parseTwoPass("a OR very OR long OR title OR involving OR both OR wikipedia OR and OR pokemons",WikiQueryParser.NamespacePolicy.IGNORE); |
107 | 114 | is.search(q,new NamespaceFilterWrapper(new NamespaceFilter("0"))); |
108 | 115 | } catch (IOException e) { |
Index: trunk/lucene-search-2.0/src/org/wikimedia/lsearch/index/IndexThread.java |
— | — | @@ -222,7 +222,7 @@ |
223 | 223 | } |
224 | 224 | } |
225 | 225 | |
226 | | - protected void deleteDirRecursive(File file){ |
| 226 | + protected static void deleteDirRecursive(File file){ |
227 | 227 | if(!file.exists()) |
228 | 228 | return; |
229 | 229 | else if(file.isDirectory()){ |
— | — | @@ -241,12 +241,9 @@ |
242 | 242 | * |
243 | 243 | */ |
244 | 244 | protected void makeSnapshot() { |
245 | | - final String sep = Configuration.PATH_SEP; |
246 | 245 | HashSet<IndexId> indexes = WikiIndexModifier.closeAllModifiers(); |
247 | 246 | IndexRegistry registry = IndexRegistry.getInstance(); |
248 | 247 | |
249 | | - DateFormat df = new SimpleDateFormat("yyyyMMddHHmmss"); |
250 | | - String timestamp = df.format(new Date(System.currentTimeMillis())); |
251 | 248 | log.debug("Making snapshots..."); |
252 | 249 | // check filesystem timestamps (for those for which we are unsure if they are updated) |
253 | 250 | for( IndexId iid : global.getMyIndex()){ |
— | — | @@ -264,37 +261,44 @@ |
265 | 262 | } |
266 | 263 | } |
267 | 264 | for( IndexId iid : indexes ){ |
268 | | - log.info("Making snapshot for "+iid); |
269 | | - String index = iid.getIndexPath(); |
270 | | - String snapshotdir = iid.getSnapshotPath(); |
271 | | - String snapshot = snapshotdir+sep+timestamp; |
272 | | - // cleanup the snapshot dir for this iid |
273 | | - File spd = new File(snapshotdir); |
274 | | - if(spd.exists() && spd.isDirectory()){ |
275 | | - File[] files = spd.listFiles(); |
276 | | - for(File f: files) |
277 | | - deleteDirRecursive(f); |
278 | | - } |
279 | | - new File(snapshot).mkdirs(); |
280 | | - File ind =new File(index); |
281 | | - for(File f: ind.listFiles()){ |
282 | | - // use a cp -lr command for each file in the index |
283 | | - String command = "/bin/cp -lr "+index+sep+f.getName()+" "+snapshot+sep+f.getName(); |
284 | | - Process copy; |
285 | | - try { |
286 | | - log.debug("Running shell command: "+command); |
287 | | - copy = Runtime.getRuntime().exec(command); |
288 | | - copy.waitFor(); |
289 | | - } catch (Exception e) { |
290 | | - log.error("Error making snapshot "+snapshot+": "+e.getMessage()); |
291 | | - continue; |
292 | | - } |
293 | | - } |
| 265 | + makeIndexSnapshot(iid,iid.getIndexPath()); |
294 | 266 | registry.refreshSnapshots(iid); |
295 | | - log.info("Made snapshot "+snapshot); |
296 | 267 | } |
297 | 268 | } |
298 | 269 | |
| 270 | + public static void makeIndexSnapshot(IndexId iid, String indexPath){ |
| 271 | + final String sep = Configuration.PATH_SEP; |
| 272 | + DateFormat df = new SimpleDateFormat("yyyyMMddHHmmss"); |
| 273 | + String timestamp = df.format(new Date(System.currentTimeMillis())); |
| 274 | + |
| 275 | + log.info("Making snapshot for "+iid); |
| 276 | + String snapshotdir = iid.getSnapshotPath(); |
| 277 | + String snapshot = snapshotdir+sep+timestamp; |
| 278 | + // cleanup the snapshot dir for this iid |
| 279 | + File spd = new File(snapshotdir); |
| 280 | + if(spd.exists() && spd.isDirectory()){ |
| 281 | + File[] files = spd.listFiles(); |
| 282 | + for(File f: files) |
| 283 | + deleteDirRecursive(f); |
| 284 | + } |
| 285 | + new File(snapshot).mkdirs(); |
| 286 | + File ind =new File(indexPath); |
| 287 | + for(File f: ind.listFiles()){ |
| 288 | + // use a cp -lr command for each file in the index |
| 289 | + String command = "/bin/cp -lr "+indexPath+sep+f.getName()+" "+snapshot+sep+f.getName(); |
| 290 | + Process copy; |
| 291 | + try { |
| 292 | + log.debug("Running shell command: "+command); |
| 293 | + copy = Runtime.getRuntime().exec(command); |
| 294 | + copy.waitFor(); |
| 295 | + } catch (Exception e) { |
| 296 | + log.error("Error making snapshot "+snapshot+": "+e.getMessage()); |
| 297 | + continue; |
| 298 | + } |
| 299 | + } |
| 300 | + log.info("Made snapshot "+snapshot); |
| 301 | + } |
| 302 | + |
299 | 303 | /** |
300 | 304 | * @return if there are queued updates |
301 | 305 | */ |
Index: trunk/lucene-search-2.0/src/org/wikimedia/lsearch/analyzers/WikiQueryParser.java |
— | — | @@ -2,8 +2,10 @@ |
3 | 3 | |
4 | 4 | import java.io.IOException; |
5 | 5 | import java.util.ArrayList; |
| 6 | +import java.util.BitSet; |
6 | 7 | import java.util.HashMap; |
7 | 8 | import java.util.HashSet; |
| 9 | +import java.util.Map.Entry; |
8 | 10 | |
9 | 11 | import org.apache.lucene.analysis.Analyzer; |
10 | 12 | import org.apache.lucene.analysis.Token; |
— | — | @@ -15,6 +17,8 @@ |
16 | 18 | import org.apache.lucene.search.PhraseQuery; |
17 | 19 | import org.apache.lucene.search.Query; |
18 | 20 | import org.apache.lucene.search.TermQuery; |
| 21 | +import org.apache.lucene.search.WildcardQuery; |
| 22 | +import org.wikimedia.lsearch.search.NamespaceFilter; |
19 | 23 | import org.wikimedia.lsearch.util.UnicodeDecomposer; |
20 | 24 | |
21 | 25 | /** |
— | — | @@ -74,7 +78,8 @@ |
75 | 79 | */ |
76 | 80 | public enum NamespacePolicy { LEAVE, IGNORE, REWRITE }; |
77 | 81 | protected HashMap<String,Integer> namespaceMapping; |
78 | | - private String defaultNamespace; |
| 82 | + private String defaultNamespaceName; |
| 83 | + private Query namespaceRewriteQuery; |
79 | 84 | private NamespacePolicy namespacePolicy; |
80 | 85 | |
81 | 86 | /** default value for boolean queries */ |
— | — | @@ -110,7 +115,7 @@ |
111 | 116 | * @param analyzer |
112 | 117 | */ |
113 | 118 | public WikiQueryParser(String field, Analyzer analyzer){ |
114 | | - this(field,null,analyzer,NamespacePolicy.LEAVE); |
| 119 | + this(field,(NamespaceFilter)null,analyzer,NamespacePolicy.LEAVE); |
115 | 120 | } |
116 | 121 | |
117 | 122 | /** |
— | — | @@ -122,16 +127,53 @@ |
123 | 128 | * @param nsPolicy |
124 | 129 | */ |
125 | 130 | public WikiQueryParser(String field, String namespace, Analyzer analyzer, NamespacePolicy nsPolicy){ |
126 | | - defaultField = field; |
127 | | - defaultNamespace = namespace; |
| 131 | + this(field,new NamespaceFilter(namespace),analyzer,nsPolicy); |
| 132 | + } |
| 133 | + |
| 134 | + public WikiQueryParser(String field, NamespaceFilter nsfilter, Analyzer analyzer, NamespacePolicy nsPolicy){ |
| 135 | + defaultField = field; |
128 | 136 | this.analyzer = analyzer; |
129 | 137 | decomposer = UnicodeDecomposer.getInstance(); |
130 | 138 | tokens = new ArrayList<Token>(); |
131 | 139 | this.namespacePolicy = nsPolicy; |
132 | 140 | disableTitleAliases = true; |
133 | 141 | initNamespaces(); |
| 142 | + if(nsfilter != null){ |
| 143 | + namespaceRewriteQuery = generateRewrite(nsfilter); |
| 144 | + defaultNamespaceName = null; |
| 145 | + if(nsfilter.cardinality()==1){ |
| 146 | + Integer in = new Integer(nsfilter.getNamespace()); |
| 147 | + // if has only on namespace, try to get the name of default namespace |
| 148 | + for(Entry<String,Integer> e : namespaceMapping.entrySet()){ |
| 149 | + if(in.equals(e.getValue())){ |
| 150 | + defaultNamespaceName = e.getKey(); |
| 151 | + } |
| 152 | + } |
| 153 | + } |
| 154 | + } |
| 155 | + else{ |
| 156 | + namespaceRewriteQuery = null; |
| 157 | + defaultNamespaceName = null; |
| 158 | + } |
134 | 159 | } |
135 | 160 | |
| 161 | + /** Generate a rewrite query for a collection of namespaces */ |
| 162 | + protected Query generateRewrite(NamespaceFilter nsfilter){ |
| 163 | + if(nsfilter.cardinality() == 0) |
| 164 | + return null; |
| 165 | + else if(nsfilter.cardinality() == 1) |
| 166 | + return new TermQuery(new Term("namespace",Integer.toString(nsfilter.getNamespace()))); |
| 167 | + |
| 168 | + BooleanQuery bq = new BooleanQuery(); |
| 169 | + BitSet bs = nsfilter.getIncluded(); |
| 170 | + // iterate over set bits |
| 171 | + for(int i=bs.nextSetBit(0); i>=0; i=bs.nextSetBit(i+1)){ |
| 172 | + bq.add(new TermQuery(new Term("namespace",Integer.toString(i))), |
| 173 | + BooleanClause.Occur.SHOULD); |
| 174 | + } |
| 175 | + return bq; |
| 176 | + } |
| 177 | + |
136 | 178 | /** |
137 | 179 | * Get a hashset of namespace numbers for fields that are |
138 | 180 | * valid namespace keys. |
— | — | @@ -228,8 +270,8 @@ |
229 | 271 | if(length == 0 && ch == ' ') |
230 | 272 | continue; // ignore whitespaces |
231 | 273 | |
232 | | - // pluses and minuses, underscores can be within words |
233 | | - if(Character.isLetterOrDigit(ch) || ch=='-' || ch=='+' || ch=='_'){ |
| 274 | + // pluses and minuses, underscores can be within words, *,? are for wildcard queries |
| 275 | + if(Character.isLetterOrDigit(ch) || ch=='-' || ch=='+' || ch=='_' || ch=='*' || ch=='?'){ |
234 | 276 | // unicode normalization -> delete accents |
235 | 277 | decomp = decomposer.decompose(ch); |
236 | 278 | if(decomp == null) |
— | — | @@ -353,7 +395,7 @@ |
354 | 396 | } |
355 | 397 | |
356 | 398 | private final boolean needsRewrite(){ |
357 | | - return defaultNamespace != null && namespacePolicy == NamespacePolicy.REWRITE; |
| 399 | + return namespaceRewriteQuery != null && namespacePolicy == NamespacePolicy.REWRITE; |
358 | 400 | } |
359 | 401 | |
360 | 402 | /** Parses a clause: (in regexp notation) |
— | — | @@ -382,7 +424,7 @@ |
383 | 425 | |
384 | 426 | // assume default namespace value on rewrite |
385 | 427 | if(!returnOnFieldDef && field == null && needsRewrite()){ |
386 | | - fieldQuery = getNamespaceQuery(defaultNamespace); |
| 428 | + fieldQuery = namespaceRewriteQuery; |
387 | 429 | } |
388 | 430 | |
389 | 431 | mainloop: for( ; cur < queryLength; cur++ ){ |
— | — | @@ -409,7 +451,7 @@ |
410 | 452 | if(field == null || definedExplicitField){ |
411 | 453 | // set field name |
412 | 454 | field = new String(buffer,0,length); |
413 | | - if((defaultNamespace!=null && field.equals(defaultNamespace)) || field.equals(defaultField)){ |
| 455 | + if((defaultNamespaceName!=null && field.equals(defaultNamespaceName)) || field.equals(defaultField)){ |
414 | 456 | field = null; |
415 | 457 | break; // repeated definition of field, ignore |
416 | 458 | } |
— | — | @@ -433,7 +475,7 @@ |
434 | 476 | case WORD: |
435 | 477 | if(fieldQuery != null){ |
436 | 478 | backToken(); |
437 | | - String myfield = (topFieldName != null)? topFieldName : (field !=null)? field : (defaultNamespace!=null)? defaultNamespace : defaultField; |
| 479 | + String myfield = (topFieldName != null)? topFieldName : (field !=null)? field : (defaultNamespaceName!=null)? defaultNamespaceName : defaultField; |
438 | 480 | fieldsubquery = parseClause(level+1,true,myfield); |
439 | 481 | } else{ |
440 | 482 | analyzeBuffer(); |
— | — | @@ -561,6 +603,14 @@ |
562 | 604 | return new TermQuery(makeTerm()); |
563 | 605 | } |
564 | 606 | |
| 607 | + // check for wildcard seaches, they are also not analyzed/stemmed |
| 608 | + // wildcard signs are allowed only at the end of the word, minimum one letter word |
| 609 | + if(length>1 && Character.isLetter(buffer[0]) && (buffer[length-1]=='*' || buffer[length-1]=='?')){ |
| 610 | + Query ret = new WildcardQuery(makeTerm()); |
| 611 | + ret.setBoost(defaultBoost); |
| 612 | + return ret; |
| 613 | + } |
| 614 | + |
565 | 615 | if(toplevelOccur == BooleanClause.Occur.MUST_NOT) |
566 | 616 | aliasOccur = null; // do not add aliases |
567 | 617 | else |
Index: trunk/lucene-search-2.0/src/org/wikimedia/lsearch/oai/IncrementalUpdater.java |
— | — | @@ -44,6 +44,7 @@ |
45 | 45 | log.error("OAI authentication error. Username/password pair not specified in configuration file."); |
46 | 46 | return null; |
47 | 47 | } |
| 48 | + log.info("Authenticating ... "); |
48 | 49 | return new PasswordAuthentication(username,password.toCharArray()); |
49 | 50 | } |
50 | 51 | } |
Index: trunk/lucene-search-2.0/src/org/wikimedia/lsearch/oai/OAIHarvester.java |
— | — | @@ -22,7 +22,6 @@ |
23 | 23 | public class OAIHarvester { |
24 | 24 | static Logger log = Logger.getLogger(OAIHarvester.class); |
25 | 25 | protected String urlbase; |
26 | | - protected Authenticator auth; |
27 | 26 | protected OAIParser parser; |
28 | 27 | protected IndexUpdatesCollector collector; |
29 | 28 | protected IndexId iid; |
— | — | @@ -30,8 +29,8 @@ |
31 | 30 | |
32 | 31 | public OAIHarvester(IndexId iid, String url, Authenticator auth){ |
33 | 32 | this.urlbase = url; |
34 | | - this.auth = auth; |
35 | 33 | this.iid = iid; |
| 34 | + Authenticator.setDefault(auth); |
36 | 35 | } |
37 | 36 | |
38 | 37 | /** Invoke ListRecords from a certain timestamp */ |
Index: trunk/lucene-search-2.0/src/org/wikimedia/lsearch/benchmark/Collector.java |
— | — | @@ -16,16 +16,18 @@ |
17 | 17 | } |
18 | 18 | |
19 | 19 | protected ArrayList<ReportSet> reports = new ArrayList<ReportSet>(); |
20 | | - protected long startTime; |
| 20 | + protected long startTime, lastTime; |
21 | 21 | protected int reportInc; // after how many reports to print out results |
22 | 22 | protected int curInc; // current increment |
23 | 23 | protected int total; |
| 24 | + protected int threads; |
24 | 25 | |
25 | | - Collector(int reportInc, int total){ |
26 | | - startTime = System.currentTimeMillis(); |
| 26 | + Collector(int reportInc, int total, int threads){ |
| 27 | + lastTime = startTime = System.currentTimeMillis(); |
27 | 28 | this.reportInc = reportInc; |
28 | 29 | curInc = 0; |
29 | 30 | this.total = total; |
| 31 | + this.threads = threads; |
30 | 32 | } |
31 | 33 | |
32 | 34 | synchronized public void add(int results, long time){ |
— | — | @@ -44,6 +46,11 @@ |
45 | 47 | results += rs.results; |
46 | 48 | time += rs.time; |
47 | 49 | } |
| 50 | + long time1k = 0; |
| 51 | + if(reports.size()>=1000){ |
| 52 | + for(int i=reports.size()-1000;i<reports.size();i++) |
| 53 | + time1k += reports.get(i).time; |
| 54 | + } |
48 | 55 | long now = System.currentTimeMillis(); |
49 | 56 | int sec = (int) ((now-startTime)/1000); |
50 | 57 | int min = 0; |
— | — | @@ -52,8 +59,9 @@ |
53 | 60 | sec = sec%60; |
54 | 61 | } |
55 | 62 | double pers = (double)(now-startTime)/reports.size(); |
56 | | - //double avgtime = (double)time/reports.size(); |
57 | | - System.out.format("[%d:%02d %d/%d] %2.1fms : %d results / search\n", min, sec, reports.size(), total, pers, results/reports.size()); |
| 63 | + double nowpers = (double)(now-lastTime)/reportInc; |
| 64 | + lastTime = now; |
| 65 | + System.out.format("[%d:%02d %d/%d] %2.1fms : %d results / search (now: %2.1fms, last 1k: %2.1fms)\n", min, sec, reports.size(), total, pers, results/reports.size(), nowpers, time1k/1000.0/threads); |
58 | 66 | System.out.flush(); |
59 | 67 | } |
60 | 68 | } |
Index: trunk/lucene-search-2.0/src/org/wikimedia/lsearch/benchmark/Benchmark.java |
— | — | @@ -18,6 +18,7 @@ |
19 | 19 | protected Terms terms; |
20 | 20 | protected int words; |
21 | 21 | protected String namespace; |
| 22 | + protected String namespaceFilter; |
22 | 23 | |
23 | 24 | protected int thread; // current thread |
24 | 25 | |
— | — | @@ -35,12 +36,12 @@ |
36 | 37 | protected static Object sharedLock = new Object(); |
37 | 38 | |
38 | 39 | /** Use this to construct the main thread */ |
39 | | - public Benchmark(String host, int port, String database, String verb, Terms terms, int words, String namespace) { |
40 | | - this(host,port,database,verb,terms,words,namespace,0,0); |
| 40 | + public Benchmark(String host, int port, String database, String verb, Terms terms, int words, String namespace, String namespaceFilter) { |
| 41 | + this(host,port,database,verb,terms,words,namespace,namespaceFilter,0,0); |
41 | 42 | } |
42 | 43 | |
43 | 44 | /** Use this to construct a benchmark thread */ |
44 | | - public Benchmark(String host, int port, String database, String verb, Terms terms, int words, String namespace, int runs, int thread) { |
| 45 | + public Benchmark(String host, int port, String database, String verb, Terms terms, int words, String namespace, String namespaceFilter, int runs, int thread) { |
45 | 46 | this.host = host; |
46 | 47 | this.port = port; |
47 | 48 | this.database = database; |
— | — | @@ -50,6 +51,7 @@ |
51 | 52 | this.thread = thread; |
52 | 53 | this.words = words; |
53 | 54 | this.namespace = namespace; |
| 55 | + this.namespaceFilter = namespaceFilter; |
54 | 56 | } |
55 | 57 | |
56 | 58 | /** Start benchmarking on main thread */ |
— | — | @@ -61,10 +63,10 @@ |
62 | 64 | activeThreads = threads; |
63 | 65 | startTime = System.currentTimeMillis(); |
64 | 66 | |
65 | | - collector = new Collector(100,threads*runs); |
| 67 | + collector = new Collector(100,threads*runs,threads); |
66 | 68 | |
67 | 69 | for(int i=0;i<threads;i++) |
68 | | - new Benchmark(host,port,database,verb,terms,words,namespace,runs,i).start(); |
| 70 | + new Benchmark(host,port,database,verb,terms,words,namespace,namespaceFilter,runs,i).start(); |
69 | 71 | |
70 | 72 | // wait until all thread finish |
71 | 73 | while(activeThreads != 0){ |
— | — | @@ -106,11 +108,17 @@ |
107 | 109 | String query = ""; |
108 | 110 | for(int i=0;i<words;i++){ |
109 | 111 | if(!query.equals("")) |
110 | | - query += " "; |
| 112 | + query += " OR "; |
111 | 113 | query += terms.next(); |
112 | 114 | } |
113 | | - query = namespace+":"+URLEncoder.encode(query).replaceAll("\\+","%20"); |
114 | | - String urlString = "http://"+host+":"+port+"/"+verb+"/"+database+"/"+query+"?limit=20"; |
| 115 | + String urlString; |
| 116 | + if(namespace.equals("")){ |
| 117 | + query = URLEncoder.encode(query).replaceAll("\\+","%20"); |
| 118 | + urlString = "http://"+host+":"+port+"/"+verb+"/"+database+"/"+query+"?limit=20&namespaces="+namespaceFilter; |
| 119 | + } else{ |
| 120 | + query = namespace+":"+URLEncoder.encode(query).replaceAll("\\+","%20"); |
| 121 | + urlString = "http://"+host+":"+port+"/"+verb+"/"+database+"/"+query+"?limit=20"; |
| 122 | + } |
115 | 123 | try { |
116 | 124 | URL url; |
117 | 125 | url = new URL(urlString); |
— | — | @@ -160,7 +168,8 @@ |
161 | 169 | int port = 8123; |
162 | 170 | String database = "wikilucene"; |
163 | 171 | String verb = "search"; |
164 | | - String namespace = "all"; |
| 172 | + String namespace = "main"; |
| 173 | + String namespaceFilter= "0"; |
165 | 174 | int runs = 5000; |
166 | 175 | int threads = 10; |
167 | 176 | int words = 2; |
— | — | @@ -180,8 +189,11 @@ |
181 | 190 | runs = Integer.parseInt(args[++i]); |
182 | 191 | } else if (args[i].equals("-v")) { |
183 | 192 | database = args[++i]; |
184 | | - } else if (args[i].equals("-ns")) { |
| 193 | + } else if (args[i].equals("-n") || args[i].equals("-ns")) { |
185 | 194 | namespace = args[++i]; |
| 195 | + } else if (args[i].equals("-f") ) { |
| 196 | + namespaceFilter = args[++i]; |
| 197 | + namespace =""; |
186 | 198 | } else if (args[i].equals("-w")) { |
187 | 199 | words = Integer.parseInt(args[++i]); |
188 | 200 | } else if (args[i].equals("--help")) { |
— | — | @@ -190,15 +202,19 @@ |
191 | 203 | " -p port (default: "+port+")\n"+ |
192 | 204 | " -d database (default: "+database+")\n"+ |
193 | 205 | " -t threads (defaut: "+threads+")\n"+ |
194 | | - " -n count (default: "+runs+")\n"+ |
| 206 | + " -c count (default: "+runs+")\n"+ |
195 | 207 | " -w number of words in query (default: "+words+")\n"+ |
196 | 208 | " -v verb (default: "+verb+")\n"+ |
197 | | - " -ns namespace (default: "+namespace+")\n"); |
| 209 | + " -n namespace (default: "+namespace+")\n"+ |
| 210 | + " -f namespace filter (default: "+namespaceFilter+")\n"); |
198 | 211 | return; |
| 212 | + } else{ |
| 213 | + System.out.println("Unrecognized switch: "+args[i]); |
| 214 | + return; |
199 | 215 | } |
200 | 216 | } |
201 | 217 | System.out.println("Running benchmark on "+host+":"+port+" with "+threads+" theads each "+runs+" runs"); |
202 | | - Benchmark bench = new Benchmark(host, port, database, verb, terms, words, namespace); |
| 218 | + Benchmark bench = new Benchmark(host, port, database, verb, terms, words, namespace, namespaceFilter); |
203 | 219 | bench.startBenchmark(threads,runs); |
204 | 220 | bench.printReport(); |
205 | 221 | } |