Index: branches/lucene-search-2.1/lsearch.conf |
— | — | @@ -18,7 +18,7 @@ |
19 | 19 | Rsync.path=/usr/bin/rsync |
20 | 20 | |
21 | 21 | # Extra params for rsync |
22 | | -# Rsync.params=--bwlimit=4096 |
| 22 | +# Rsync.params=--bwlimit=8192 |
23 | 23 | |
24 | 24 | ################################################ |
25 | 25 | # Search node related configuration |
— | — | @@ -43,6 +43,9 @@ |
44 | 44 | # if to wait for aggregates to warm up before deploying the searcher |
45 | 45 | Search.warmupaggregate=false |
46 | 46 | |
| 47 | +# cache *whole* index in RAM |
| 48 | +Search.ramdirectory=false |
| 49 | + |
47 | 50 | ################################################ |
48 | 51 | # Indexer related configuration |
49 | 52 | ################################################ |
— | — | @@ -62,41 +65,16 @@ |
63 | 66 | # Maximal time an update can remain in queue before being processed (in seconds) |
64 | 67 | Index.maxqueuetimeout=12 |
65 | 68 | |
66 | | -################################################ |
67 | | -# Storage backend (currently mysql) |
68 | | -################################################ |
| 69 | +# If to delete all old snapshots always (default to false - leaves the last good snapshot) |
| 70 | +# Index.delsnapshots=true |
69 | 71 | |
70 | | -# host of database master |
71 | | -Storage.master=localhost |
72 | | - |
73 | | -# array of host->load |
74 | | -#Storage.slaves=host1->10 host2->50 host3->100 |
75 | | - |
76 | | -# Storage.username=root |
77 | | -# Storage.password= |
78 | | - |
79 | | -# Storage.adminuser=root |
80 | | -# Storage.adminpass= |
81 | | - |
82 | | -# Values: |
83 | | -# true - each dbname has a separate db of that name |
84 | | -# false - each dbname is a prefix for tables in a default db (set default db below) |
85 | | -Storage.useSeparateDBs=false |
86 | | - |
87 | | -# Default db where all the stuff will be stored (if useSeparateDB=false) |
88 | | -Storage.defaultDB=lsearch |
89 | | - |
90 | | -# Where table definitions are |
91 | | -Storage.lib=/var/www/html/lucene-search-2.0/sql |
92 | | - |
93 | | - |
94 | 72 | ################################################ |
95 | 73 | # Log, ganglia, localization |
96 | 74 | ################################################ |
97 | 75 | |
98 | 76 | # If this host runs on multiple CPUs maintain a pool of index searchers |
99 | 77 | # It's good idea to make it number of CPUs+1, or some larger odd number |
100 | | -SearcherPool.size=1 |
| 78 | +SearcherPool.size=3 |
101 | 79 | |
102 | 80 | # URL to MediaWiki message files |
103 | 81 | Localization.url=file:///var/www/html/wiki-lucene/phase3/languages/messages |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/prefix/PrefixIndexBuilder.java |
— | — | @@ -371,6 +371,8 @@ |
372 | 372 | public void close() throws IOException { |
373 | 373 | if(writer != null) |
374 | 374 | writer.close(); |
| 375 | + if(links != null) |
| 376 | + links.close(); |
375 | 377 | } |
376 | 378 | |
377 | 379 | } |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/importer/Importer.java |
— | — | @@ -186,7 +186,8 @@ |
187 | 187 | reader.readDump(); |
188 | 188 | end = System.currentTimeMillis(); |
189 | 189 | log.info("Closing/optimizing index..."); |
190 | | - dp.closeIndex(); |
| 190 | + dp.closeIndex(); |
| 191 | + links.close(); |
191 | 192 | } catch (IOException e) { |
192 | 193 | if(!e.getMessage().equals("stopped")){ |
193 | 194 | log.fatal("I/O error processing dump for "+dbname+" from "+inputfile+" : "+e.getMessage()); |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/config/GlobalConfiguration.java |
— | — | @@ -18,6 +18,7 @@ |
19 | 19 | import java.text.MessageFormat; |
20 | 20 | import java.util.ArrayList; |
21 | 21 | import java.util.Collection; |
| 22 | +import java.util.Collections; |
22 | 23 | import java.util.Enumeration; |
23 | 24 | import java.util.HashMap; |
24 | 25 | import java.util.HashSet; |
— | — | @@ -1294,6 +1295,19 @@ |
1295 | 1296 | |
1296 | 1297 | return ret; |
1297 | 1298 | } |
| 1299 | + /** Get all dbnames that are locally indexed */ |
| 1300 | + public ArrayList<String> getMyIndexDBnames(){ |
| 1301 | + HashSet<String> dbnames = new HashSet<String>(); |
| 1302 | + ArrayList<String> dbnamesSorted = new ArrayList<String>(); |
| 1303 | + |
| 1304 | + for(IndexId iid : indexIdPool.values()){ |
| 1305 | + if(iid.isMyIndex() && !iid.isTitlesBySuffix() && !iid.isSpell()) |
| 1306 | + dbnames.add(iid.getDBname().toString()); |
| 1307 | + } |
| 1308 | + dbnamesSorted.addAll(dbnames); |
| 1309 | + Collections.sort(dbnamesSorted); |
| 1310 | + return dbnamesSorted; |
| 1311 | + } |
1298 | 1312 | |
1299 | 1313 | /** Get the name of the localhost as it appears in global configuration */ |
1300 | 1314 | public String getLocalhost(){ |
— | — | @@ -1431,7 +1445,7 @@ |
1432 | 1446 | |
1433 | 1447 | // process $lang |
1434 | 1448 | String lang = getLanguage(dbname); |
1435 | | - repo = repo.replace("$lang",lang); |
| 1449 | + repo = repo.replace("$lang",lang.replace('_','-')); |
1436 | 1450 | repo = repo += "?title=Special:OAIRepository"; |
1437 | 1451 | |
1438 | 1452 | return repo; |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/config/IndexId.java |
— | — | @@ -5,6 +5,8 @@ |
6 | 6 | import java.util.ArrayList; |
7 | 7 | import java.util.HashSet; |
8 | 8 | import java.util.Hashtable; |
| 9 | +import java.util.concurrent.locks.Lock; |
| 10 | +import java.util.concurrent.locks.ReentrantLock; |
9 | 11 | |
10 | 12 | import org.apache.log4j.Logger; |
11 | 13 | import org.wikimedia.lsearch.analyzers.FilterFactory; |
— | — | @@ -150,6 +152,9 @@ |
151 | 153 | /** lock used in {@link SearcherCache} class */ |
152 | 154 | protected Object searcherCacheLock = new Object(); |
153 | 155 | |
| 156 | + /** locks used to serialize transactions on different transaction paths */ |
| 157 | + protected Hashtable<Transaction,Lock> transactionLocks = new Hashtable<Transaction,Lock>(); |
| 158 | + |
154 | 159 | /** |
155 | 160 | * Get index Id object given it's string representation, the actual object |
156 | 161 | * is pulled out of the GlobalConfigurations prepopulated pool of all possible |
— | — | @@ -344,6 +349,9 @@ |
345 | 350 | transactionPath.put(Transaction.INDEX,transRoot+"index"); |
346 | 351 | transactionPath.put(Transaction.IMPORT,transRoot+"import"); |
347 | 352 | transactionPath.put(Transaction.TEMP,transRoot+"temp"); |
| 353 | + transactionLocks.put(Transaction.INDEX,new ReentrantLock()); |
| 354 | + transactionLocks.put(Transaction.IMPORT,new ReentrantLock()); |
| 355 | + transactionLocks.put(Transaction.TEMP,new ReentrantLock()); |
348 | 356 | tempPath = localIndexPath + "temp" + sep + this.dbrole; |
349 | 357 | |
350 | 358 | //if(mySearch){ |
— | — | @@ -924,5 +932,10 @@ |
925 | 933 | public Object getSearcherCacheLock() { |
926 | 934 | return searcherCacheLock; |
927 | 935 | } |
928 | | - |
| 936 | + |
| 937 | + /** Get transaction lock for a transaction type */ |
| 938 | + public Lock getTransactionLock(Transaction trans) { |
| 939 | + return transactionLocks.get(trans); |
| 940 | + } |
| 941 | + |
929 | 942 | } |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/AggregateMetaField.java |
— | — | @@ -91,6 +91,8 @@ |
92 | 92 | set = new HashSet<String>(); |
93 | 93 | cachingInProgress.put(reader.directory(),set); |
94 | 94 | } |
| 95 | + if(set.contains(field)) |
| 96 | + return; |
95 | 97 | set.add(field); |
96 | 98 | } |
97 | 99 | try{ |
— | — | @@ -150,14 +152,14 @@ |
151 | 153 | } catch(Exception e){ |
152 | 154 | e.printStackTrace(); |
153 | 155 | log.error("Whole caching failed on field="+field+", reader="+reader.directory()); |
154 | | - } finally{ |
155 | | - synchronized(cachingInProgress){ |
156 | | - Set<String> set = cachingInProgress.get(reader.directory()); |
157 | | - set.remove(field); |
158 | | - if(set.size() == 0) |
159 | | - cachingInProgress.remove(reader.directory()); |
160 | | - } |
161 | 156 | } |
| 157 | + |
| 158 | + synchronized(cachingInProgress){ |
| 159 | + Set<String> set = cachingInProgress.get(reader.directory()); |
| 160 | + set.remove(field); |
| 161 | + if(set.size() == 0) |
| 162 | + cachingInProgress.remove(reader.directory()); |
| 163 | + } |
162 | 164 | } |
163 | 165 | protected byte[] extendBytes(byte[] array){ |
164 | 166 | return resizeBytes(array,array.length*2); |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/Warmup.java |
— | — | @@ -74,9 +74,9 @@ |
75 | 75 | /** If set in local config file waits for aggregate fields to finish caching */ |
76 | 76 | public static void waitForAggregate(IndexSearcherMul[] pool){ |
77 | 77 | try{ |
78 | | - boolean waitForAggregate = Configuration.open().getString("Search","warmupaggregate","false").equalsIgnoreCase("true"); |
| 78 | + boolean waitForAggregate = true; //Configuration.open().getString("Search","warmupaggregate","false").equalsIgnoreCase("true"); |
79 | 79 | if(waitForAggregate){ // wait for aggregate fields to be cached |
80 | | - log.info("Wait for aggregate caches..."); |
| 80 | + log.info("Waiting for aggregate caches on "+pool[0].getIndexReader().directory()); |
81 | 81 | boolean wait; |
82 | 82 | do{ |
83 | 83 | wait = false; |
— | — | @@ -95,8 +95,13 @@ |
96 | 96 | } |
97 | 97 | } |
98 | 98 | |
| 99 | + public static void warmupPool(IndexSearcherMul[] pool, IndexId iid, boolean useDelay, Integer useCount) throws IOException { |
| 100 | + for(IndexSearcherMul is : pool) |
| 101 | + warmupIndexSearcher(is,iid,useDelay,useCount); |
| 102 | + } |
| 103 | + |
99 | 104 | /** Runs some typical queries on a local index searcher to preload caches, pages into memory, etc .. */ |
100 | | - public static void warmupIndexSearcher(IndexSearcherMul is, IndexId iid, boolean useDelay) throws IOException { |
| 105 | + public static void warmupIndexSearcher(IndexSearcherMul is, IndexId iid, boolean useDelay, Integer useCount) throws IOException { |
101 | 106 | if(iid.isLinks() || iid.isPrecursor()) |
102 | 107 | return; // no warmaup for these |
103 | 108 | try{ |
— | — | @@ -108,7 +113,7 @@ |
109 | 114 | if(global == null) |
110 | 115 | global = GlobalConfiguration.getInstance(); |
111 | 116 | |
112 | | - int count = getWarmupCount(iid); |
| 117 | + int count = useCount == null? getWarmupCount(iid) : useCount; |
113 | 118 | |
114 | 119 | if(iid.isSpell()){ |
115 | 120 | if(count > 0){ |
— | — | @@ -199,9 +204,12 @@ |
200 | 205 | /** Get database of example search terms for language */ |
201 | 206 | protected static Terms getTermsForLang(String lang) { |
202 | 207 | String lib = Configuration.open().getLibraryPath(); |
203 | | - if("en".equals(lang) || "de".equals(lang) || "es".equals(lang) || "fr".equals(lang) || "it".equals(lang) || "pt".equals(lang)) |
204 | | - return new WordTerms(lib+Configuration.PATH_SEP+"dict"+Configuration.PATH_SEP+"terms-"+lang+".txt.gz"); |
205 | | - else |
| 208 | + if("en".equals(lang) || "de".equals(lang) || "es".equals(lang) || "fr".equals(lang) || "it".equals(lang) || "pt".equals(lang)){ |
| 209 | + if( !langTerms.contains(lang) ) |
| 210 | + langTerms.put(lang,new WordTerms(lib+Configuration.PATH_SEP+"dict"+Configuration.PATH_SEP+"terms-"+lang+".txt.gz")); |
| 211 | + |
| 212 | + return langTerms.get(lang); |
| 213 | + } else |
206 | 214 | return new SampleTerms(); |
207 | 215 | } |
208 | 216 | |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/SearcherCache.java |
— | — | @@ -16,6 +16,7 @@ |
17 | 17 | import org.apache.lucene.search.IndexSearcher; |
18 | 18 | import org.apache.lucene.search.Searchable; |
19 | 19 | import org.apache.lucene.search.SearchableMul; |
| 20 | +import org.apache.lucene.store.RAMDirectory; |
20 | 21 | import org.wikimedia.lsearch.beans.SearchHost; |
21 | 22 | import org.wikimedia.lsearch.config.Configuration; |
22 | 23 | import org.wikimedia.lsearch.config.GlobalConfiguration; |
— | — | @@ -60,16 +61,22 @@ |
61 | 62 | IndexSearcherMul searchers[]; |
62 | 63 | IndexId iid; |
63 | 64 | int index = 0; |
| 65 | + static Configuration config = null; |
64 | 66 | |
65 | 67 | SearcherPool(IndexId iid, String path, int poolsize) throws IOException { |
66 | 68 | this.iid = iid; |
67 | 69 | searchers = new IndexSearcherMul[poolsize]; |
| 70 | + if(config == null) |
| 71 | + config = Configuration.open(); |
| 72 | + RAMDirectory dir = null; |
| 73 | + if(config.getBoolean("Search","ramdirectory")) |
| 74 | + dir = new RAMDirectory(path); |
68 | 75 | for(int i=0;i<poolsize;i++){ |
69 | | - searchers[i] = open(iid, path); |
| 76 | + searchers[i] = open(iid, path, dir); |
70 | 77 | } |
71 | 78 | } |
72 | 79 | |
73 | | - private IndexSearcherMul open(IndexId iid, String path) throws IOException { |
| 80 | + private IndexSearcherMul open(IndexId iid, String path, RAMDirectory directory) throws IOException { |
74 | 81 | IndexSearcherMul searcher = null; |
75 | 82 | log.debug("Opening local index for "+iid); |
76 | 83 | if(!iid.isMySearch()) |
— | — | @@ -77,7 +84,10 @@ |
78 | 85 | if(iid.isLogical()) |
79 | 86 | throw new IOException(iid+": will not open logical index."); |
80 | 87 | try { |
81 | | - searcher = new IndexSearcherMul(path); |
| 88 | + if(directory != null) |
| 89 | + searcher = new IndexSearcherMul(directory); |
| 90 | + else |
| 91 | + searcher = new IndexSearcherMul(path); |
82 | 92 | searcher.setSimilarity(new WikiSimilarity()); |
83 | 93 | } catch (IOException e) { |
84 | 94 | e.printStackTrace(); |
— | — | @@ -174,7 +184,7 @@ |
175 | 185 | * @return |
176 | 186 | */ |
177 | 187 | public String getRandomHost(IndexId iid){ |
178 | | - if(iid.isMySearch() && !UpdateThread.isBeingDeployed(iid)) |
| 188 | + if(iid.isMySearch() && !UpdateThread.isBeingDeployed(iid) && hasLocalSearcher(iid)) |
179 | 189 | return "localhost"; |
180 | 190 | if(!initialized.contains(iid.toString())) |
181 | 191 | initializeRemote(iid); |
— | — | @@ -296,19 +306,25 @@ |
297 | 307 | /** |
298 | 308 | * Initialize all local searcher pools |
299 | 309 | */ |
300 | | - protected void initializeLocal(){ |
301 | | - IndexRegistry registry = IndexRegistry.getInstance(); |
302 | | - HashSet<IndexId> mys = GlobalConfiguration.getInstance().getMySearch(); |
303 | | - for(IndexId iid : mys){ |
304 | | - try { |
305 | | - // when searcher is linked into "search" path it's good, initialize it |
306 | | - if(!iid.isLogical() && registry.getCurrentSearch(iid) != null){ |
307 | | - log.debug("Initializing local for "+iid); |
308 | | - IndexSearcherMul[] pool = getLocalSearcherPool(iid); |
309 | | - RMIServer.bind(iid,pool); |
| 310 | + protected class InitialDeploymentThread extends Thread { |
| 311 | + public void run(){ |
| 312 | + IndexRegistry registry = IndexRegistry.getInstance(); |
| 313 | + HashSet<IndexId> mys = GlobalConfiguration.getInstance().getMySearch(); |
| 314 | + for(IndexId iid : mys){ |
| 315 | + try { |
| 316 | + // when searcher is linked into "search" path it's good, initialize it |
| 317 | + if(!iid.isLogical() && registry.getCurrentSearch(iid) != null){ |
| 318 | + log.debug("Initializing local for "+iid); |
| 319 | + SearcherPool pool = initLocalPool(iid); |
| 320 | + Warmup.warmupPool(pool.searchers,iid,false,1); |
| 321 | + Warmup.waitForAggregate(pool.searchers); |
| 322 | + localCache.put(iid.toString(),pool); |
| 323 | + |
| 324 | + RMIServer.bind(iid,pool.searchers); |
| 325 | + } |
| 326 | + } catch (IOException e) { |
| 327 | + log.warn("I/O error warming index for "+iid+" : "+e.getMessage()); |
310 | 328 | } |
311 | | - } catch (IOException e) { |
312 | | - log.warn("I/O error warming index for "+iid+" : "+e.getMessage()); |
313 | 329 | } |
314 | 330 | } |
315 | 331 | } |
— | — | @@ -332,8 +348,8 @@ |
333 | 349 | SearcherPool pool = localCache.get(iid.toString()); |
334 | 350 | if(pool == null){ |
335 | 351 | // try to init |
336 | | - initLocalPool(iid); |
337 | | - pool = localCache.get(iid.toString()); |
| 352 | + pool = initLocalPool(iid); |
| 353 | + localCache.put(iid.toString(),pool); |
338 | 354 | } |
339 | 355 | |
340 | 356 | if(pool == null) |
— | — | @@ -343,7 +359,7 @@ |
344 | 360 | } |
345 | 361 | |
346 | 362 | /** Make local searcher pool */ |
347 | | - protected void initLocalPool(IndexId iid) throws IOException{ |
| 363 | + protected SearcherPool initLocalPool(IndexId iid) throws IOException{ |
348 | 364 | synchronized(iid.getSearcherCacheLock()){ |
349 | 365 | // make sure some other thread has not opened the searcher |
350 | 366 | if(localCache.get(iid.toString()) == null){ |
— | — | @@ -351,9 +367,9 @@ |
352 | 368 | throw new IOException(iid+" is not searched by this host."); |
353 | 369 | if(iid.isLogical()) |
354 | 370 | throw new IOException(iid+": will not open logical index."); |
355 | | - SearcherPool pool = new SearcherPool(iid,iid.getCanonicalSearchPath(),searchPoolSize); |
356 | | - localCache.put(iid.toString(),pool); |
357 | | - } |
| 371 | + return new SearcherPool(iid,iid.getCanonicalSearchPath(),searchPoolSize); |
| 372 | + } else |
| 373 | + return localCache.get(iid.toString()); |
358 | 374 | } |
359 | 375 | } |
360 | 376 | |
— | — | @@ -389,7 +405,7 @@ |
390 | 406 | protected SearcherCache(boolean initialize){ |
391 | 407 | searchPoolSize = Configuration.open().getInt("SearcherPool","size",1); |
392 | 408 | if(initialize) |
393 | | - initializeLocal(); |
| 409 | + new InitialDeploymentThread().start(); |
394 | 410 | } |
395 | 411 | |
396 | 412 | public int getSearchPoolSize() { |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/SearchEngine.java |
— | — | @@ -111,12 +111,8 @@ |
112 | 112 | searchOnly = true; |
113 | 113 | NamespaceFilter namespaces = new NamespaceFilter((String)query.get("namespaces")); |
114 | 114 | SearchResults res = search(iid, searchterm, offset, limit, iwoffset, iwlimit, namespaces, what.equals("explain"), exactCase, false, searchOnly); |
115 | | - if(res!=null && res.isRetry()){ |
116 | | - int retries = 0; |
117 | | - if(iid.isSplit() || iid.isNssplit()){ |
118 | | - retries = iid.getSplitFactor()-2; |
119 | | - } else if(iid.isMainsplit()) |
120 | | - retries = 1; |
| 115 | + /*if(res!=null && res.isRetry()){ |
| 116 | + int retries = 1; |
121 | 117 | |
122 | 118 | while(retries > 0 && res.isRetry()){ |
123 | 119 | res = search(iid, searchterm, offset, limit, iwoffset, iwlimit, namespaces, what.equals("explain"), exactCase, false, searchOnly); |
— | — | @@ -124,7 +120,7 @@ |
125 | 121 | } |
126 | 122 | if(res.isRetry()) |
127 | 123 | res.setErrorMsg("Internal error, too many internal retries."); |
128 | | - } |
| 124 | + } */ |
129 | 125 | return res; |
130 | 126 | } else if (what.equals("raw") || what.equals("rawexplain")) { |
131 | 127 | int offset = 0, limit = 100; boolean exactCase = false; |
— | — | @@ -427,8 +423,6 @@ |
428 | 424 | TermDocs td1 = reader.termDocs(new Term("key",r)); |
429 | 425 | if(td1.next()){ |
430 | 426 | PrefixMatch m = new PrefixMatch(reader.document(td1.doc()).get("article")); |
431 | | - if(r.equals(key)) |
432 | | - m.score *= PrefixIndexBuilder.EXACT_BOOST; // exact boost |
433 | 427 | results.add(m); |
434 | 428 | |
435 | 429 | } |
— | — | @@ -912,6 +906,8 @@ |
913 | 907 | |
914 | 908 | /** Highlight search results, and set the property in ResultSet */ |
915 | 909 | protected void highlight(IndexId iid, Query q, ArrayList<String> words, WikiSearcher searcher, Term[] terms, SearchResults res, boolean exactCase, boolean sortByPhrases, boolean alwaysIncludeFirst) throws IOException{ |
| 910 | + if(terms == null) |
| 911 | + return; |
916 | 912 | int[] df = searcher.docFreqs(terms); |
917 | 913 | int maxDoc = searcher.maxDoc(); |
918 | 914 | highlight(iid,q,words,terms,df,maxDoc,res,exactCase,null,sortByPhrases,alwaysIncludeFirst); |
— | — | @@ -920,6 +916,8 @@ |
921 | 917 | /** Highlight search results, and set the property in ResultSet */ |
922 | 918 | protected void highlight(IndexId iid, Query q, ArrayList<String> words, IndexSearcherMul searcher, SearchResults res, boolean sortByPhrases, boolean alwaysIncludeFirst) throws IOException{ |
923 | 919 | Term[] terms = getTerms(q,"contents"); |
| 920 | + if(terms == null) |
| 921 | + return; |
924 | 922 | int[] df = searcher.docFreqs(terms); |
925 | 923 | int maxDoc = searcher.maxDoc(); |
926 | 924 | highlight(iid,q,words,terms,df,maxDoc,res,false,null,sortByPhrases,alwaysIncludeFirst); |
— | — | @@ -928,6 +926,8 @@ |
929 | 927 | /** Highlight search results from titles index */ |
930 | 928 | protected void highlightTitles(IndexId iid, Query q, ArrayList<String> words, IndexSearcherMul searcher, SearchResults res, boolean sortByPhrases, boolean alwaysIncludeFirst) throws IOException{ |
931 | 929 | Term[] terms = getTerms(q,"alttitle"); |
| 930 | + if(terms == null) |
| 931 | + return; |
932 | 932 | int[] df = searcher.docFreqs(terms); |
933 | 933 | int maxDoc = searcher.maxDoc(); |
934 | 934 | highlight(iid,q,words,terms,df,maxDoc,res,false,searcher.getIndexReader(),sortByPhrases,alwaysIncludeFirst); |
— | — | @@ -937,6 +937,8 @@ |
938 | 938 | /** Highlight search results from titles index using a wikisearcher */ |
939 | 939 | protected void highlightTitles(IndexId iid, Query q, ArrayList<String> words, WikiSearcher searcher, SearchResults res, boolean sortByPhrases, boolean alwaysIncludeFirst) throws IOException{ |
940 | 940 | Term[] terms = getTerms(q,"alttitle"); |
| 941 | + if(terms == null) |
| 942 | + return; |
941 | 943 | int[] df = searcher.docFreqs(terms); |
942 | 944 | int maxDoc = searcher.maxDoc(); |
943 | 945 | highlight(iid,q,words,terms,df,maxDoc,res,false,null,sortByPhrases,alwaysIncludeFirst); |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/UpdateThread.java |
— | — | @@ -319,7 +319,7 @@ |
320 | 320 | beingDeployed.add(iid.toString()); |
321 | 321 | try{ |
322 | 322 | RMIServer.unbind(iid,cache.getLocalSearcherPool(iid)); |
323 | | - } catch(IOException e) { |
| 323 | + } catch(Exception e) { |
324 | 324 | // we gave it a shot... |
325 | 325 | } |
326 | 326 | cache.updateLocalSearcherPool(iid,null); |
— | — | @@ -330,14 +330,18 @@ |
331 | 331 | // do some typical queries to preload some lucene caches, pages into memory, etc.. |
332 | 332 | for(IndexSearcherMul is : pool.searchers){ |
333 | 333 | try{ |
334 | | - Warmup.warmupIndexSearcher(is,li.iid,true); |
| 334 | + // do one to trigger caching |
| 335 | + Warmup.warmupIndexSearcher(is,li.iid,true,1); |
| 336 | + Warmup.waitForAggregate(pool.searchers); |
| 337 | + // do proper warmup |
| 338 | + Warmup.warmupIndexSearcher(is,li.iid,true,null); |
335 | 339 | } catch(IOException e){ |
336 | 340 | e.printStackTrace(); |
337 | 341 | log.warn("Error warmup up "+li+" : "+e.getMessage()); |
338 | 342 | } |
339 | 343 | } |
340 | | - Warmup.waitForAggregate(pool.searchers); |
341 | 344 | |
| 345 | + |
342 | 346 | // add to cache |
343 | 347 | cache.updateLocalSearcherPool(li.iid,pool); |
344 | 348 | if( reroute ){ |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/index/IndexThread.java |
— | — | @@ -15,6 +15,7 @@ |
16 | 16 | import java.util.ArrayList; |
17 | 17 | import java.util.Collection; |
18 | 18 | import java.util.Collections; |
| 19 | +import java.util.Comparator; |
19 | 20 | import java.util.Date; |
20 | 21 | import java.util.Enumeration; |
21 | 22 | import java.util.HashSet; |
— | — | @@ -22,6 +23,7 @@ |
23 | 24 | import java.util.List; |
24 | 25 | import java.util.Set; |
25 | 26 | import java.util.Map.Entry; |
| 27 | +import java.util.concurrent.locks.Lock; |
26 | 28 | |
27 | 29 | import org.apache.log4j.Logger; |
28 | 30 | import org.apache.lucene.analysis.SimpleAnalyzer; |
— | — | @@ -41,6 +43,7 @@ |
42 | 44 | import org.wikimedia.lsearch.ranks.Links; |
43 | 45 | import org.wikimedia.lsearch.util.Command; |
44 | 46 | import org.wikimedia.lsearch.util.FSUtils; |
| 47 | +import org.wikimedia.lsearch.util.ProgressReport; |
45 | 48 | import org.wikimedia.lsearch.util.StringUtils; |
46 | 49 | |
47 | 50 | /** |
— | — | @@ -160,7 +163,7 @@ |
161 | 164 | * |
162 | 165 | */ |
163 | 166 | protected void makeSnapshots() { |
164 | | - HashSet<IndexId> indexes = WikiIndexModifier.getModifiedIndexes(); |
| 167 | + ArrayList<IndexId> indexes = new ArrayList<IndexId>(); |
165 | 168 | IndexRegistry registry = IndexRegistry.getInstance(); |
166 | 169 | |
167 | 170 | ArrayList<Pattern> pat = new ArrayList<Pattern>(); |
— | — | @@ -181,19 +184,35 @@ |
182 | 185 | if(indexdir.exists()) |
183 | 186 | indexes.add(iid); |
184 | 187 | } |
| 188 | + // nicely alphabetically sort |
| 189 | + Collections.sort(indexes, new Comparator<IndexId>() { |
| 190 | + public int compare(IndexId o1, IndexId o2) { |
| 191 | + return o1.toString().compareTo(o2.toString()); |
| 192 | + } |
| 193 | + }); |
185 | 194 | HashSet<IndexId> badOptimization = new HashSet<IndexId>(); |
186 | 195 | // optimize all |
187 | 196 | for( IndexId iid : indexes ){ |
| 197 | + Lock lock = null; |
188 | 198 | try{ |
189 | 199 | if(iid.isLogical()) |
190 | 200 | continue; |
191 | | - if(matchesPattern(pat,iid)) |
| 201 | + if(matchesPattern(pat,iid)){ |
| 202 | + // enforce outer transaction lock to connect optimization & snapshot |
| 203 | + lock = iid.getTransactionLock(IndexId.Transaction.INDEX); |
| 204 | + lock.lock(); |
192 | 205 | optimizeIndex(iid); |
193 | | - |
| 206 | + makeIndexSnapshot(iid,iid.getIndexPath()); |
| 207 | + lock.unlock(); |
| 208 | + lock = null; |
| 209 | + } |
194 | 210 | } catch(IOException e){ |
195 | 211 | e.printStackTrace(); |
196 | 212 | log.error("Error optimizing index "+iid); |
197 | 213 | badOptimization.add(iid); |
| 214 | + } finally { |
| 215 | + if(lock != null) |
| 216 | + lock.unlock(); |
198 | 217 | } |
199 | 218 | } |
200 | 219 | // snapshot all |
— | — | @@ -201,11 +220,10 @@ |
202 | 221 | if(iid.isLogical() || badOptimization.contains(iid)) |
203 | 222 | continue; |
204 | 223 | if(matchesPattern(pat,iid)){ |
205 | | - makeIndexSnapshot(iid,iid.getIndexPath()); |
| 224 | + |
206 | 225 | registry.refreshSnapshots(iid); |
207 | 226 | } |
208 | 227 | } |
209 | | - |
210 | 228 | } |
211 | 229 | |
212 | 230 | private boolean matchesPattern(ArrayList<Pattern> pat, IndexId iid) { |
— | — | @@ -226,7 +244,7 @@ |
227 | 245 | String timestamp = df.format(new Date(System.currentTimeMillis())); |
228 | 246 | if(iid.isLogical()) |
229 | 247 | return; |
230 | | - |
| 248 | + boolean delSnapshots = Configuration.open().getBoolean("Index","delsnapshots") && !iid.isRelated(); |
231 | 249 | log.info("Making snapshot for "+iid); |
232 | 250 | String snapshotdir = iid.getSnapshotPath(); |
233 | 251 | String snapshot = snapshotdir+sep+timestamp; |
— | — | @@ -236,17 +254,22 @@ |
237 | 255 | if(spd.exists() && spd.isDirectory()){ |
238 | 256 | File[] files = spd.listFiles(); |
239 | 257 | for(File f: files){ |
240 | | - if(!f.getAbsolutePath().equals(li.path)) // leave the last snapshot |
241 | | - FSUtils.deleteRecursive(f); |
| 258 | + if(f.getAbsolutePath().equals(li.path) && !delSnapshots) |
| 259 | + continue; // leave last snapshot |
| 260 | + FSUtils.deleteRecursive(f); |
242 | 261 | } |
243 | 262 | } |
244 | 263 | new File(snapshot).mkdirs(); |
245 | | - try { |
246 | | - FSUtils.createHardLinkRecursive(indexPath,snapshot); |
247 | | - } catch (IOException e) { |
248 | | - e.printStackTrace(); |
249 | | - log.error("Error making snapshot "+snapshot+": "+e.getMessage()); |
250 | | - return; |
| 264 | + File ind =new File(indexPath); |
| 265 | + for(File f: ind.listFiles()){ |
| 266 | + // use a cp -lr command for each file in the index |
| 267 | + try { |
| 268 | + FSUtils.createHardLinkRecursive(indexPath+sep+f.getName(),snapshot+sep+f.getName(),true); |
| 269 | + } catch (IOException e) { |
| 270 | + e.printStackTrace(); |
| 271 | + log.error("Error making snapshot "+snapshot+": "+e.getMessage()); |
| 272 | + return; |
| 273 | + } |
251 | 274 | } |
252 | 275 | IndexRegistry.getInstance().refreshSnapshots(iid); |
253 | 276 | log.info("Made snapshot "+snapshot); |
— | — | @@ -263,21 +286,21 @@ |
264 | 287 | return; |
265 | 288 | if(iid.getBooleanParam("optimize",true)){ |
266 | 289 | try { |
| 290 | + Transaction trans = new Transaction(iid,transType); |
| 291 | + trans.begin(); |
267 | 292 | IndexReader reader = IndexReader.open(path); |
268 | 293 | if(!reader.isOptimized()){ |
269 | 294 | reader.close(); |
270 | 295 | log.info("Optimizing "+iid); |
271 | 296 | long start = System.currentTimeMillis(); |
272 | | - Transaction trans = new Transaction(iid,transType); |
273 | | - trans.begin(); |
274 | 297 | IndexWriter writer = new IndexWriter(path,new SimpleAnalyzer(),false); |
275 | 298 | writer.optimize(); |
276 | | - writer.close(); |
277 | | - trans.commit(); |
| 299 | + writer.close(); |
278 | 300 | long delta = System.currentTimeMillis() - start; |
279 | | - log.info("Optimized "+iid+" in "+delta+" ms"); |
| 301 | + log.info("Optimized "+iid+" in "+ProgressReport.formatTime(delta)); |
280 | 302 | } else |
281 | 303 | reader.close(); |
| 304 | + trans.commit(); |
282 | 305 | } catch (IOException e) { |
283 | 306 | log.error("Could not optimize index at "+path+" : "+e.getMessage()); |
284 | 307 | throw e; |
— | — | @@ -299,17 +322,26 @@ |
300 | 323 | HashSet<String> add = new HashSet<String>(); |
301 | 324 | if(records.length > 0){ |
302 | 325 | IndexId iid = records[0].getIndexId(); // we asume all are on same iid |
303 | | - Links links = Links.openForBatchModifiation(iid); |
304 | | - // update links |
305 | | - links.batchUpdate(records); |
306 | | - WikiIndexModifier.fetchLinksInfo(iid,records,links); |
307 | | - // get additional |
308 | | - add.addAll(WikiIndexModifier.fetchAdditional(iid,records,links)); |
309 | | - links.close(); |
310 | | - |
311 | | - for(IndexUpdateRecord r : records){ |
312 | | - enqueue(r); |
313 | | - } |
| 326 | + // get exclusive lock to make sure nothing funny is going on with the index |
| 327 | + Lock lock = iid.getLinks().getTransactionLock(IndexId.Transaction.INDEX); |
| 328 | + lock.lock(); |
| 329 | + try{ |
| 330 | + // FIXME: there should be some kind of failed previous transaction check here |
| 331 | + // works for now because we first do updates, but could easily break in future |
| 332 | + Links links = Links.openForBatchModifiation(iid); |
| 333 | + // update links |
| 334 | + links.batchUpdate(records); |
| 335 | + WikiIndexModifier.fetchLinksInfo(iid,records,links); |
| 336 | + // get additional |
| 337 | + add.addAll(WikiIndexModifier.fetchAdditional(iid,records,links)); |
| 338 | + links.close(); |
| 339 | + |
| 340 | + for(IndexUpdateRecord r : records){ |
| 341 | + enqueue(r); |
| 342 | + } |
| 343 | + } finally{ |
| 344 | + lock.unlock(); |
| 345 | + } |
314 | 346 | } |
315 | 347 | |
316 | 348 | return add; |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/index/Transaction.java |
— | — | @@ -5,6 +5,7 @@ |
6 | 6 | import java.io.FileOutputStream; |
7 | 7 | import java.io.IOException; |
8 | 8 | import java.util.Properties; |
| 9 | +import java.util.concurrent.locks.Lock; |
9 | 10 | |
10 | 11 | import org.apache.log4j.Logger; |
11 | 12 | import org.wikimedia.lsearch.config.Configuration; |
— | — | @@ -28,10 +29,12 @@ |
29 | 30 | protected IndexId iid; |
30 | 31 | protected boolean inTransaction; |
31 | 32 | protected IndexId.Transaction type; |
| 33 | + protected Lock lock; |
32 | 34 | |
33 | 35 | public Transaction(IndexId iid, IndexId.Transaction type){ |
34 | 36 | this.iid = iid; |
35 | 37 | this.type = type; |
| 38 | + this.lock = iid.getTransactionLock(type); |
36 | 39 | inTransaction = false; |
37 | 40 | } |
38 | 41 | |
— | — | @@ -40,6 +43,8 @@ |
41 | 44 | * if not, will return index to consistent state. |
42 | 45 | */ |
43 | 46 | public void begin(){ |
| 47 | + // acquire lock, this will serialize transactions on indexes |
| 48 | + lock.lock(); |
44 | 49 | File backup = new File(getBackupDir()); |
45 | 50 | File info = new File(getInfoFile()); |
46 | 51 | if(backup.exists() && info.exists()){ |
— | — | @@ -62,7 +67,7 @@ |
63 | 68 | backup.getParentFile().mkdirs(); |
64 | 69 | try{ |
65 | 70 | // make a copy |
66 | | - FSUtils.createHardLinkRecursive(iid.getPath(type),backup.getAbsolutePath()); |
| 71 | + FSUtils.createHardLinkRecursive(iid.getPath(type),backup.getAbsolutePath(),true); |
67 | 72 | Properties prop = new Properties(); |
68 | 73 | // write out the status file |
69 | 74 | prop.setProperty("status","started at "+System.currentTimeMillis()); |
— | — | @@ -74,6 +79,7 @@ |
75 | 80 | log.info("Transaction on index "+iid+" started"); |
76 | 81 | } catch(Exception e){ |
77 | 82 | log.error("Error while intializing transaction: "+e.getMessage()); |
| 83 | + lock.unlock(); |
78 | 84 | } |
79 | 85 | } |
80 | 86 | |
— | — | @@ -141,19 +147,27 @@ |
142 | 148 | * Commit changes to index. |
143 | 149 | */ |
144 | 150 | public void commit(){ |
145 | | - cleanup(); |
146 | | - inTransaction = false; |
147 | | - log.info("Successfully commited changes on "+iid); |
| 151 | + try{ |
| 152 | + cleanup(); |
| 153 | + inTransaction = false; |
| 154 | + log.info("Successfully commited changes on "+iid); |
| 155 | + } finally{ |
| 156 | + lock.unlock(); |
| 157 | + } |
148 | 158 | } |
149 | 159 | |
150 | 160 | /** |
151 | 161 | * Rollback changes to index. Returns to previous consistent state. |
152 | 162 | */ |
153 | 163 | public void rollback(){ |
154 | | - if(inTransaction){ |
155 | | - recover(); |
156 | | - inTransaction = false; |
157 | | - log.info("Succesfully rollbacked changes on "+iid); |
| 164 | + try{ |
| 165 | + if(inTransaction){ |
| 166 | + recover(); |
| 167 | + inTransaction = false; |
| 168 | + log.info("Succesfully rollbacked changes on "+iid); |
| 169 | + } |
| 170 | + } finally{ |
| 171 | + lock.unlock(); |
158 | 172 | } |
159 | 173 | } |
160 | 174 | |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/WikiQueryParser.java |
— | — | @@ -1134,7 +1134,7 @@ |
1135 | 1135 | hterms.removeAll(forbiddenTerms); |
1136 | 1136 | highlightTerms = hterms.toArray(new Term[] {}); |
1137 | 1137 | |
1138 | | - if(options.coreQueryOnly || words == null) |
| 1138 | + if(options.coreQueryOnly || words == null || (expandedWordsContents.size()==0 && expandedWordsTitle.size()==0)) |
1139 | 1139 | return bq; |
1140 | 1140 | |
1141 | 1141 | // filter out stop words to SHOULD (this enables queries in form of question) |
— | — | @@ -1338,7 +1338,7 @@ |
1339 | 1339 | defaultAliasBoost = ALIAS_BOOST; |
1340 | 1340 | |
1341 | 1341 | |
1342 | | - if(qt == qs) // either null, or category query |
| 1342 | + if(qt==qs || qt.equals(qs)) // either null, or category query |
1343 | 1343 | return qt; |
1344 | 1344 | if(qt == null) |
1345 | 1345 | return qs; |
— | — | @@ -1797,29 +1797,15 @@ |
1798 | 1798 | |
1799 | 1799 | BooleanQuery full = new BooleanQuery(true); |
1800 | 1800 | full.add(q,Occur.MUST); |
1801 | | - |
1802 | | - /*if(words != null || words.size() > 0){ |
1803 | | - // main relevance |
1804 | | - Query redirects = makeAlttitleForRedirects(words,20,1); |
1805 | | - if(redirects != null) |
1806 | | - full.add(redirects,Occur.SHOULD); |
1807 | 1801 | |
1808 | | - // singular words |
1809 | | - ArrayList<String> singularWords = makeSingularWords(words); |
1810 | | - if(singularWords != null){ |
1811 | | - Query redirectsSing = makeAlttitleForRedirects(singularWords,20,0.8f); |
1812 | | - if(redirectsSing != null) |
1813 | | - full.add(redirectsSing,Occur.SHOULD); |
1814 | | - } |
1815 | | - } */ |
| 1802 | + if(expandedWordsTitle.size() == 0) |
| 1803 | + return full; |
1816 | 1804 | |
1817 | 1805 | // fuzzy & wildcards |
1818 | 1806 | // NOTE: for these to work parseForTitles needs to called after parse() |
1819 | | - //if(hasWildcards() || hasFuzzy()){ |
1820 | 1807 | Query redirectsMulti = makeAlttitleForRedirectsMulti(expandedWordsTitle,expandedBoostTitle,expandedTypes,20,1f); |
1821 | 1808 | if(redirectsMulti != null) |
1822 | 1809 | full.add(redirectsMulti,Occur.SHOULD); |
1823 | | - //} |
1824 | 1810 | |
1825 | 1811 | // add another for complete matches |
1826 | 1812 | BooleanQuery wrap = new BooleanQuery(true); |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/oai/IncrementalUpdater.java |
— | — | @@ -85,10 +85,16 @@ |
86 | 86 | * @param args |
87 | 87 | */ |
88 | 88 | public static void main(String[] args){ |
| 89 | + // config |
| 90 | + Configuration config = Configuration.open(); |
| 91 | + GlobalConfiguration global = GlobalConfiguration.getInstance(); |
| 92 | + |
89 | 93 | ArrayList<String> dbnames = new ArrayList<String>(); |
90 | 94 | boolean daemon = false; |
91 | 95 | long sleepTime = 30000; // 30s |
92 | 96 | String timestamp = null; |
| 97 | + String excludeFile = null; |
| 98 | + boolean useLocal = false; |
93 | 99 | |
94 | 100 | String dblist = null; |
95 | 101 | boolean notification = true; |
— | — | @@ -108,8 +114,12 @@ |
109 | 115 | defaultTimestamp = args[++i]; |
110 | 116 | else if(args[i].equals("-f")) |
111 | 117 | dblist = args[++i]; |
| 118 | + else if(args[i].equals("-l")) |
| 119 | + useLocal = true; |
112 | 120 | else if(args[i].equals("-e")) |
113 | 121 | excludeList.add(args[++i]); |
| 122 | + else if(args[i].equals("-ef")) |
| 123 | + excludeFile = args[++i]; |
114 | 124 | else if(args[i].equals("-n")) |
115 | 125 | notification = true; |
116 | 126 | else if(args[i].equals("--help")) |
— | — | @@ -120,21 +130,10 @@ |
121 | 131 | } else |
122 | 132 | dbnames.add(args[i]); |
123 | 133 | } |
124 | | - if(dblist != null){ |
125 | | - try { |
126 | | - BufferedReader file = new BufferedReader(new FileReader(dblist)); |
127 | | - String line; |
128 | | - while((line = file.readLine()) != null) |
129 | | - dbnames.add(line.trim()); |
130 | | - file.close(); |
131 | | - } catch (FileNotFoundException e) { |
132 | | - System.out.println("Error: File "+dblist+" does not exist"); |
133 | | - return; |
134 | | - } catch (IOException e) { |
135 | | - System.out.println("Error: I/O error reading dblist file "+dblist); |
136 | | - return; |
137 | | - } |
138 | | - } |
| 134 | + if(useLocal) |
| 135 | + dbnames.addAll(global.getMyIndexDBnames()); |
| 136 | + dbnames.addAll(readDBList(dblist)); |
| 137 | + excludeList.addAll(readDBList(excludeFile)); |
139 | 138 | if(dbnames.size() == 0){ |
140 | 139 | System.out.println("Syntax: java IncrementalUpdater [-d] [-s sleep] [-t timestamp] [-e dbname] [-f dblist] [-n] [--no-ranks] dbname1 dbname2 ..."); |
141 | 140 | System.out.println("Options:"); |
— | — | @@ -143,13 +142,13 @@ |
144 | 143 | System.out.println(" -t - timestamp to start from"); |
145 | 144 | System.out.println(" -dt - default timestamp (default: "+defaultTimestamp+")"); |
146 | 145 | System.out.println(" -f - dblist file, one dbname per line"); |
| 146 | + System.out.println(" -l - use all local dbnames"); |
147 | 147 | System.out.println(" -n - wait for notification of flush after done updating one db (default: "+notification+")"); |
148 | 148 | System.out.println(" -e - exclude dbname from incremental updates (overrides -f)"); |
| 149 | + System.out.println(" -ef - exclude db names listed in dblist file"); |
| 150 | + |
149 | 151 | return; |
150 | 152 | } |
151 | | - // config |
152 | | - Configuration config = Configuration.open(); |
153 | | - GlobalConfiguration global = GlobalConfiguration.getInstance(); |
154 | 153 | // preload |
155 | 154 | UnicodeDecomposer.getInstance(); |
156 | 155 | for(String dbname: dbnames){ |
— | — | @@ -279,6 +278,26 @@ |
280 | 279 | } while(daemon); |
281 | 280 | } |
282 | 281 | |
| 282 | + private static Collection<String> readDBList(String dblist) { |
| 283 | + ArrayList<String> dbnames = new ArrayList<String>(); |
| 284 | + if(dblist != null){ |
| 285 | + try { |
| 286 | + BufferedReader file = new BufferedReader(new FileReader(dblist)); |
| 287 | + String line; |
| 288 | + while((line = file.readLine()) != null) |
| 289 | + dbnames.add(line.trim()); |
| 290 | + file.close(); |
| 291 | + } catch (FileNotFoundException e) { |
| 292 | + System.out.println("Error: File "+dblist+" does not exist"); |
| 293 | + System.exit(1); |
| 294 | + } catch (IOException e) { |
| 295 | + System.out.println("Error: I/O error reading dblist file "+dblist); |
| 296 | + System.exit(1); |
| 297 | + } |
| 298 | + } |
| 299 | + return dbnames; |
| 300 | + } |
| 301 | + |
283 | 302 | private static void printRecords(ArrayList<IndexUpdateRecord> records) { |
284 | 303 | for(IndexUpdateRecord rec : records){ |
285 | 304 | Article ar = rec.getArticle(); |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/util/FSUtils.java |
— | — | @@ -14,15 +14,18 @@ |
15 | 15 | public class FSUtils { |
16 | 16 | public static final String PATH_SEP = System.getProperty("file.separator"); |
17 | 17 | |
18 | | - enum OSType { OS_TYPE_UNIX, OS_TYPE_WINXP }; |
| 18 | + enum OSType { OS_TYPE_UNIX, OS_TYPE_WINXP, OS_TYPE_LINUX }; |
19 | 19 | |
20 | 20 | protected static String[] hardLinkCommand; |
| 21 | + protected static String[] hardLinkRecursive = null; |
21 | 22 | |
22 | 23 | static { |
23 | 24 | switch(getOSType()) { |
24 | 25 | case OS_TYPE_WINXP: |
25 | 26 | hardLinkCommand = new String[] {"fsutil","hardlink","create", null, null}; |
26 | 27 | break; |
| 28 | + case OS_TYPE_LINUX: |
| 29 | + hardLinkRecursive = new String[] {"cp", "-lr", null, null}; |
27 | 30 | case OS_TYPE_UNIX: |
28 | 31 | default: |
29 | 32 | hardLinkCommand = new String[] {"ln", "-f", null, null}; |
— | — | @@ -34,6 +37,8 @@ |
35 | 38 | if (osName.indexOf("Windows") >= 0 && |
36 | 39 | (osName.indexOf("XP") >= 0 || osName.indexOf("2003") >= 0)) |
37 | 40 | return OSType.OS_TYPE_WINXP; |
| 41 | + else if(osName.indexOf("Linux")>=0) |
| 42 | + return OSType.OS_TYPE_LINUX; |
38 | 43 | else |
39 | 44 | return OSType.OS_TYPE_UNIX; |
40 | 45 | } |
— | — | @@ -49,12 +54,21 @@ |
50 | 55 | * @param to |
51 | 56 | * @throws IOException |
52 | 57 | */ |
53 | | - public static synchronized void createHardLink(File from, File to) throws IOException { |
54 | | - int len = hardLinkCommand.length; |
55 | | - hardLinkCommand[len-2] = from.getCanonicalPath(); |
56 | | - hardLinkCommand[len-1] = to.getCanonicalPath(); |
57 | | - Command.exec(hardLinkCommand); |
| 58 | + public static void createHardLink(File from, File to) throws IOException { |
| 59 | + String[] command = hardLinkCommand.clone(); |
| 60 | + int len = command.length; |
| 61 | + command[len-2] = from.getCanonicalPath(); |
| 62 | + command[len-1] = to.getCanonicalPath(); |
| 63 | + Command.exec(command); |
58 | 64 | } |
| 65 | + |
| 66 | + protected static void createHardLinkRecursive(File from, File to) throws IOException { |
| 67 | + String[] command = hardLinkRecursive.clone(); |
| 68 | + int len = command.length; |
| 69 | + command[len-2] = from.getCanonicalPath(); |
| 70 | + command[len-1] = to.getCanonicalPath(); |
| 71 | + Command.exec(command); |
| 72 | + } |
59 | 73 | |
60 | 74 | /** |
61 | 75 | * Create hard links recursively if the target is a directory |
— | — | @@ -64,18 +78,36 @@ |
65 | 79 | * @throws IOException |
66 | 80 | */ |
67 | 81 | public static void createHardLinkRecursive(String from, String to) throws IOException { |
| 82 | + createHardLinkRecursive(from,to,false); |
| 83 | + } |
| 84 | + |
| 85 | + /** |
| 86 | + * Creates hard link, with additional option if to use cp -lr since it's default |
| 87 | + * behavior differs from that of ln -f when the destination is a directory. |
| 88 | + * |
| 89 | + * In most non-critical application, the you might want to slowish but predicatable version |
| 90 | + * |
| 91 | + * @param fast |
| 92 | + * @throws IOException |
| 93 | + */ |
| 94 | + public static void createHardLinkRecursive(String from, String to, boolean fast) throws IOException { |
68 | 95 | //System.out.println("Hard-linking "+from+" -> "+to); |
69 | 96 | File file = new File(from); |
70 | 97 | if(!file.exists()) |
71 | 98 | throw new IOException("Trying to hardlink nonexisting file "+from); |
72 | 99 | // snsure we can make the target |
73 | 100 | new File(to).getParentFile().mkdirs(); |
74 | | - if(file.isDirectory()){ |
75 | | - File[] files = file.listFiles(); |
76 | | - for(File f: files) |
77 | | - createHardLinkRecursive(format(new String[]{from,f.getName()}),format(new String[] {to,f.getName()})); |
78 | | - } else |
79 | | - createHardLink(new File(from),new File(to)); |
| 101 | + if(fast && hardLinkRecursive != null){ |
| 102 | + // do a quick cp -lr if it's supported |
| 103 | + createHardLinkRecursive(new File(from),new File(to)); |
| 104 | + } else{ |
| 105 | + if(file.isDirectory()){ |
| 106 | + File[] files = file.listFiles(); |
| 107 | + for(File f: files) |
| 108 | + createHardLinkRecursive(format(new String[]{from,f.getName()}),format(new String[] {to,f.getName()})); |
| 109 | + } else |
| 110 | + createHardLink(new File(from),new File(to)); |
| 111 | + } |
80 | 112 | } |
81 | 113 | |
82 | 114 | |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/ranks/LinkReader.java |
— | — | @@ -58,7 +58,8 @@ |
59 | 59 | public void writeEndPage() throws IOException { |
60 | 60 | Title t = new Title(page.Title.Namespace,page.Title.Text); |
61 | 61 | try{ |
62 | | - links.addArticleInfo(revision.Text,t,exactCase,Integer.toString(page.Id)); |
| 62 | + if( page.Title.Namespace >= 0) |
| 63 | + links.addArticleInfo(revision.Text,t,exactCase,Integer.toString(page.Id)); |
63 | 64 | } catch(Exception e){ |
64 | 65 | log.error("Error adding article "+t+" : "+e.getMessage()); |
65 | 66 | e.printStackTrace(); |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/spell/api/TitleNgramIndexer.java |
— | — | @@ -143,5 +143,6 @@ |
144 | 144 | log.info("Optimizing..."); |
145 | 145 | indexer.closeAndOptimize(); |
146 | 146 | indexer.snapshot(); |
| 147 | + links.close(); |
147 | 148 | } |
148 | 149 | } |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/spell/CleanIndexImporter.java |
— | — | @@ -105,6 +105,7 @@ |
106 | 106 | |
107 | 107 | public void closeIndex() throws IOException { |
108 | 108 | writer.closeAndOptimize(); |
| 109 | + links.close(); |
109 | 110 | } |
110 | 111 | |
111 | 112 | |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/related/RelatedBuilder.java |
— | — | @@ -46,33 +46,40 @@ |
47 | 47 | static Logger log = Logger.getLogger(RelatedBuilder.class); |
48 | 48 | |
49 | 49 | public static void main(String[] args) { |
50 | | - String dbname = null; |
| 50 | + ArrayList<String> dbnames = new ArrayList<String>(); |
51 | 51 | System.out.println("MediaWiki lucene-search indexer - build a map of related articles.\n"); |
52 | 52 | |
53 | 53 | Configuration.open(); |
54 | | - GlobalConfiguration.getInstance(); |
| 54 | + GlobalConfiguration global = GlobalConfiguration.getInstance(); |
55 | 55 | if(args.length != 1){ |
56 | | - System.out.println("Syntax: java RelatedBuilder <dbname>"); |
| 56 | + System.out.println("Syntax: java RelatedBuilder [-l] <dbname>"); |
| 57 | + System.out.println("Options:"); |
| 58 | + System.out.println(" -l - rebuild all local wikis"); |
57 | 59 | return; |
58 | 60 | } |
59 | | - dbname = args[0]; |
60 | | - IndexId iid = IndexId.get(dbname); |
61 | | - if(iid == null){ |
62 | | - System.out.println("Invalid dbname "+iid); |
63 | | - return; |
| 61 | + |
| 62 | + for(int i=0;i<args.length;i++){ |
| 63 | + if(args[i].equals("-l")) |
| 64 | + dbnames.addAll(global.getMyIndexDBnames()); |
| 65 | + else dbnames.add(args[i]); |
64 | 66 | } |
65 | | - long start = System.currentTimeMillis(); |
66 | | - try { |
67 | | - rebuildFromLinks(iid); |
68 | | - } catch (IOException e) { |
69 | | - log.fatal("Rebuild I/O error: "+e.getMessage()); |
70 | | - e.printStackTrace(); |
71 | | - return; |
72 | | - } |
73 | | - |
74 | | - long end = System.currentTimeMillis(); |
| 67 | + Collections.sort(dbnames); |
| 68 | + for(String dbname : dbnames){ |
| 69 | + IndexId iid = IndexId.get(dbname); |
75 | 70 | |
76 | | - System.out.println("Finished generating related in "+formatTime(end-start)); |
| 71 | + long start = System.currentTimeMillis(); |
| 72 | + try { |
| 73 | + rebuildFromLinks(iid); |
| 74 | + } catch (IOException e) { |
| 75 | + log.fatal("Rebuild I/O error: "+e.getMessage()); |
| 76 | + e.printStackTrace(); |
| 77 | + continue; |
| 78 | + } |
| 79 | + |
| 80 | + long end = System.currentTimeMillis(); |
| 81 | + |
| 82 | + System.out.println("Finished generating related in "+formatTime(end-start)); |
| 83 | + } |
77 | 84 | } |
78 | 85 | |
79 | 86 | /** Calculate from links index */ |
— | — | @@ -116,6 +123,7 @@ |
117 | 124 | store.addRelated(key,related); |
118 | 125 | } |
119 | 126 | store.snapshot(); |
| 127 | + links.close(); |
120 | 128 | } |
121 | 129 | |
122 | 130 | |
Index: branches/lucene-search-2.1/build.xml |
— | — | @@ -7,10 +7,10 @@ |
8 | 8 | <property name="dist" location="dist"/> |
9 | 9 | <property name="pack.name" value="lucene-search-2.1"/> |
10 | 10 | <property name="src.name" value="lucene-search-src-2.1"/> |
11 | | - <property name="binary.name" value="ls2-bin"/> |
| 11 | + <property name="binary.name" value="ls2.1-bin"/> |
12 | 12 | <property name="jar.name" value="LuceneSearch.jar"/> |
13 | | - <property name="include" value="src/** lib/** sql/** test-data/** webinterface/** *-example *.txt lsearch* build.xml scripts/*"/> |
14 | | - <property name="include.src" value="src/** sql/** build.xml scripts/* webinterface/*"/> |
| 13 | + <property name="include" value="src/** lib/** sql/** test-data/** webinterface/** *-example *.txt lsearch* build.xml scripts/* VERSION"/> |
| 14 | + <property name="include.src" value="src/** sql/** build.xml scripts/* webinterface/* VERSION"/> |
15 | 15 | |
16 | 16 | <property file="${basedir}/hostname"/> |
17 | 17 | |
Index: branches/lucene-search-2.1/lsearch-global.conf |
— | — | @@ -34,7 +34,7 @@ |
35 | 35 | # Mulitple hosts can search multiple dbs (N-N mapping) |
36 | 36 | [Search-Group] |
37 | 37 | oblak : wikilucene* wikidev* ja* wiki-* |
38 | | -#oblak : wikilucene*1 wikilucene*2 wikilucene*3 |
| 38 | +oblak : wikilucene*1 wikilucene*2 wikilucene*3 |
39 | 39 | |
40 | 40 | # Index nodes |
41 | 41 | # host: db1.part db2.part |
— | — | @@ -72,7 +72,7 @@ |
73 | 73 | # Below are suffixes (or whole names) with various scaling strength |
74 | 74 | AgeScaling.strong=wikinews |
75 | 75 | AgeScaling.medium=mediawikiwiki metawiki |
76 | | -AgeScaling.weak=wiki wikilucene |
| 76 | +#AgeScaling.weak=wiki wikilucene |
77 | 77 | |
78 | 78 | # Use additional per-article ranking data, more suitable for non-encyclopedias |
79 | 79 | AdditionalRank.suffix=mediawikiwiki metawiki |