r35955 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r35954‎ | r35955 | r35956 >
Date:22:24, 5 June 2008
Author:rainman
Status:old
Tags:
Comment:
Minor stuff:
* nicer locking mechanism for transactions, corner cases
* option to use ramdirectory, option to delete all snapshots
* optimize hardlinking on indexer
* -l option to magically figure out indexes to incrementaly update
* explicitely close indexes on builds to avoid too many files open
Modified paths:
  • /branches/lucene-search-2.1/build.xml (modified) (history)
  • /branches/lucene-search-2.1/lsearch-global.conf (modified) (history)
  • /branches/lucene-search-2.1/lsearch.conf (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/WikiQueryParser.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/config/GlobalConfiguration.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/config/IndexId.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/importer/Importer.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/index/IndexThread.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/index/Transaction.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/oai/IncrementalUpdater.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/prefix/PrefixIndexBuilder.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/ranks/LinkReader.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/related/RelatedBuilder.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/AggregateMetaField.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/SearchEngine.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/SearcherCache.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/UpdateThread.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/Warmup.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/spell/CleanIndexImporter.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/spell/api/TitleNgramIndexer.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/util/FSUtils.java (modified) (history)

Diff [purge]

Index: branches/lucene-search-2.1/lsearch.conf
@@ -18,7 +18,7 @@
1919 Rsync.path=/usr/bin/rsync
2020
2121 # Extra params for rsync
22 -# Rsync.params=--bwlimit=4096
 22+# Rsync.params=--bwlimit=8192
2323
2424 ################################################
2525 # Search node related configuration
@@ -43,6 +43,9 @@
4444 # if to wait for aggregates to warm up before deploying the searcher
4545 Search.warmupaggregate=false
4646
 47+# cache *whole* index in RAM
 48+Search.ramdirectory=false
 49+
4750 ################################################
4851 # Indexer related configuration
4952 ################################################
@@ -62,41 +65,16 @@
6366 # Maximal time an update can remain in queue before being processed (in seconds)
6467 Index.maxqueuetimeout=12
6568
66 -################################################
67 -# Storage backend (currently mysql)
68 -################################################
 69+# If to delete all old snapshots always (default to false - leaves the last good snapshot)
 70+# Index.delsnapshots=true
6971
70 -# host of database master
71 -Storage.master=localhost
72 -
73 -# array of host->load
74 -#Storage.slaves=host1->10 host2->50 host3->100
75 -
76 -# Storage.username=root
77 -# Storage.password=
78 -
79 -# Storage.adminuser=root
80 -# Storage.adminpass=
81 -
82 -# Values:
83 -# true - each dbname has a separate db of that name
84 -# false - each dbname is a prefix for tables in a default db (set default db below)
85 -Storage.useSeparateDBs=false
86 -
87 -# Default db where all the stuff will be stored (if useSeparateDB=false)
88 -Storage.defaultDB=lsearch
89 -
90 -# Where table definitions are
91 -Storage.lib=/var/www/html/lucene-search-2.0/sql
92 -
93 -
9472 ################################################
9573 # Log, ganglia, localization
9674 ################################################
9775
9876 # If this host runs on multiple CPUs maintain a pool of index searchers
9977 # It's good idea to make it number of CPUs+1, or some larger odd number
100 -SearcherPool.size=1
 78+SearcherPool.size=3
10179
10280 # URL to MediaWiki message files
10381 Localization.url=file:///var/www/html/wiki-lucene/phase3/languages/messages
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/prefix/PrefixIndexBuilder.java
@@ -371,6 +371,8 @@
372372 public void close() throws IOException {
373373 if(writer != null)
374374 writer.close();
 375+ if(links != null)
 376+ links.close();
375377 }
376378
377379 }
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/importer/Importer.java
@@ -186,7 +186,8 @@
187187 reader.readDump();
188188 end = System.currentTimeMillis();
189189 log.info("Closing/optimizing index...");
190 - dp.closeIndex();
 190+ dp.closeIndex();
 191+ links.close();
191192 } catch (IOException e) {
192193 if(!e.getMessage().equals("stopped")){
193194 log.fatal("I/O error processing dump for "+dbname+" from "+inputfile+" : "+e.getMessage());
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/config/GlobalConfiguration.java
@@ -18,6 +18,7 @@
1919 import java.text.MessageFormat;
2020 import java.util.ArrayList;
2121 import java.util.Collection;
 22+import java.util.Collections;
2223 import java.util.Enumeration;
2324 import java.util.HashMap;
2425 import java.util.HashSet;
@@ -1294,6 +1295,19 @@
12951296
12961297 return ret;
12971298 }
 1299+ /** Get all dbnames that are locally indexed */
 1300+ public ArrayList<String> getMyIndexDBnames(){
 1301+ HashSet<String> dbnames = new HashSet<String>();
 1302+ ArrayList<String> dbnamesSorted = new ArrayList<String>();
 1303+
 1304+ for(IndexId iid : indexIdPool.values()){
 1305+ if(iid.isMyIndex() && !iid.isTitlesBySuffix() && !iid.isSpell())
 1306+ dbnames.add(iid.getDBname().toString());
 1307+ }
 1308+ dbnamesSorted.addAll(dbnames);
 1309+ Collections.sort(dbnamesSorted);
 1310+ return dbnamesSorted;
 1311+ }
12981312
12991313 /** Get the name of the localhost as it appears in global configuration */
13001314 public String getLocalhost(){
@@ -1431,7 +1445,7 @@
14321446
14331447 // process $lang
14341448 String lang = getLanguage(dbname);
1435 - repo = repo.replace("$lang",lang);
 1449+ repo = repo.replace("$lang",lang.replace('_','-'));
14361450 repo = repo += "?title=Special:OAIRepository";
14371451
14381452 return repo;
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/config/IndexId.java
@@ -5,6 +5,8 @@
66 import java.util.ArrayList;
77 import java.util.HashSet;
88 import java.util.Hashtable;
 9+import java.util.concurrent.locks.Lock;
 10+import java.util.concurrent.locks.ReentrantLock;
911
1012 import org.apache.log4j.Logger;
1113 import org.wikimedia.lsearch.analyzers.FilterFactory;
@@ -150,6 +152,9 @@
151153 /** lock used in {@link SearcherCache} class */
152154 protected Object searcherCacheLock = new Object();
153155
 156+ /** locks used to serialize transactions on different transaction paths */
 157+ protected Hashtable<Transaction,Lock> transactionLocks = new Hashtable<Transaction,Lock>();
 158+
154159 /**
155160 * Get index Id object given it's string representation, the actual object
156161 * is pulled out of the GlobalConfigurations prepopulated pool of all possible
@@ -344,6 +349,9 @@
345350 transactionPath.put(Transaction.INDEX,transRoot+"index");
346351 transactionPath.put(Transaction.IMPORT,transRoot+"import");
347352 transactionPath.put(Transaction.TEMP,transRoot+"temp");
 353+ transactionLocks.put(Transaction.INDEX,new ReentrantLock());
 354+ transactionLocks.put(Transaction.IMPORT,new ReentrantLock());
 355+ transactionLocks.put(Transaction.TEMP,new ReentrantLock());
348356 tempPath = localIndexPath + "temp" + sep + this.dbrole;
349357
350358 //if(mySearch){
@@ -924,5 +932,10 @@
925933 public Object getSearcherCacheLock() {
926934 return searcherCacheLock;
927935 }
928 -
 936+
 937+ /** Get transaction lock for a transaction type */
 938+ public Lock getTransactionLock(Transaction trans) {
 939+ return transactionLocks.get(trans);
 940+ }
 941+
929942 }
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/AggregateMetaField.java
@@ -91,6 +91,8 @@
9292 set = new HashSet<String>();
9393 cachingInProgress.put(reader.directory(),set);
9494 }
 95+ if(set.contains(field))
 96+ return;
9597 set.add(field);
9698 }
9799 try{
@@ -150,14 +152,14 @@
151153 } catch(Exception e){
152154 e.printStackTrace();
153155 log.error("Whole caching failed on field="+field+", reader="+reader.directory());
154 - } finally{
155 - synchronized(cachingInProgress){
156 - Set<String> set = cachingInProgress.get(reader.directory());
157 - set.remove(field);
158 - if(set.size() == 0)
159 - cachingInProgress.remove(reader.directory());
160 - }
161156 }
 157+
 158+ synchronized(cachingInProgress){
 159+ Set<String> set = cachingInProgress.get(reader.directory());
 160+ set.remove(field);
 161+ if(set.size() == 0)
 162+ cachingInProgress.remove(reader.directory());
 163+ }
162164 }
163165 protected byte[] extendBytes(byte[] array){
164166 return resizeBytes(array,array.length*2);
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/Warmup.java
@@ -74,9 +74,9 @@
7575 /** If set in local config file waits for aggregate fields to finish caching */
7676 public static void waitForAggregate(IndexSearcherMul[] pool){
7777 try{
78 - boolean waitForAggregate = Configuration.open().getString("Search","warmupaggregate","false").equalsIgnoreCase("true");
 78+ boolean waitForAggregate = true; //Configuration.open().getString("Search","warmupaggregate","false").equalsIgnoreCase("true");
7979 if(waitForAggregate){ // wait for aggregate fields to be cached
80 - log.info("Wait for aggregate caches...");
 80+ log.info("Waiting for aggregate caches on "+pool[0].getIndexReader().directory());
8181 boolean wait;
8282 do{
8383 wait = false;
@@ -95,8 +95,13 @@
9696 }
9797 }
9898
 99+ public static void warmupPool(IndexSearcherMul[] pool, IndexId iid, boolean useDelay, Integer useCount) throws IOException {
 100+ for(IndexSearcherMul is : pool)
 101+ warmupIndexSearcher(is,iid,useDelay,useCount);
 102+ }
 103+
99104 /** Runs some typical queries on a local index searcher to preload caches, pages into memory, etc .. */
100 - public static void warmupIndexSearcher(IndexSearcherMul is, IndexId iid, boolean useDelay) throws IOException {
 105+ public static void warmupIndexSearcher(IndexSearcherMul is, IndexId iid, boolean useDelay, Integer useCount) throws IOException {
101106 if(iid.isLinks() || iid.isPrecursor())
102107 return; // no warmaup for these
103108 try{
@@ -108,7 +113,7 @@
109114 if(global == null)
110115 global = GlobalConfiguration.getInstance();
111116
112 - int count = getWarmupCount(iid);
 117+ int count = useCount == null? getWarmupCount(iid) : useCount;
113118
114119 if(iid.isSpell()){
115120 if(count > 0){
@@ -199,9 +204,12 @@
200205 /** Get database of example search terms for language */
201206 protected static Terms getTermsForLang(String lang) {
202207 String lib = Configuration.open().getLibraryPath();
203 - if("en".equals(lang) || "de".equals(lang) || "es".equals(lang) || "fr".equals(lang) || "it".equals(lang) || "pt".equals(lang))
204 - return new WordTerms(lib+Configuration.PATH_SEP+"dict"+Configuration.PATH_SEP+"terms-"+lang+".txt.gz");
205 - else
 208+ if("en".equals(lang) || "de".equals(lang) || "es".equals(lang) || "fr".equals(lang) || "it".equals(lang) || "pt".equals(lang)){
 209+ if( !langTerms.contains(lang) )
 210+ langTerms.put(lang,new WordTerms(lib+Configuration.PATH_SEP+"dict"+Configuration.PATH_SEP+"terms-"+lang+".txt.gz"));
 211+
 212+ return langTerms.get(lang);
 213+ } else
206214 return new SampleTerms();
207215 }
208216
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/SearcherCache.java
@@ -16,6 +16,7 @@
1717 import org.apache.lucene.search.IndexSearcher;
1818 import org.apache.lucene.search.Searchable;
1919 import org.apache.lucene.search.SearchableMul;
 20+import org.apache.lucene.store.RAMDirectory;
2021 import org.wikimedia.lsearch.beans.SearchHost;
2122 import org.wikimedia.lsearch.config.Configuration;
2223 import org.wikimedia.lsearch.config.GlobalConfiguration;
@@ -60,16 +61,22 @@
6162 IndexSearcherMul searchers[];
6263 IndexId iid;
6364 int index = 0;
 65+ static Configuration config = null;
6466
6567 SearcherPool(IndexId iid, String path, int poolsize) throws IOException {
6668 this.iid = iid;
6769 searchers = new IndexSearcherMul[poolsize];
 70+ if(config == null)
 71+ config = Configuration.open();
 72+ RAMDirectory dir = null;
 73+ if(config.getBoolean("Search","ramdirectory"))
 74+ dir = new RAMDirectory(path);
6875 for(int i=0;i<poolsize;i++){
69 - searchers[i] = open(iid, path);
 76+ searchers[i] = open(iid, path, dir);
7077 }
7178 }
7279
73 - private IndexSearcherMul open(IndexId iid, String path) throws IOException {
 80+ private IndexSearcherMul open(IndexId iid, String path, RAMDirectory directory) throws IOException {
7481 IndexSearcherMul searcher = null;
7582 log.debug("Opening local index for "+iid);
7683 if(!iid.isMySearch())
@@ -77,7 +84,10 @@
7885 if(iid.isLogical())
7986 throw new IOException(iid+": will not open logical index.");
8087 try {
81 - searcher = new IndexSearcherMul(path);
 88+ if(directory != null)
 89+ searcher = new IndexSearcherMul(directory);
 90+ else
 91+ searcher = new IndexSearcherMul(path);
8292 searcher.setSimilarity(new WikiSimilarity());
8393 } catch (IOException e) {
8494 e.printStackTrace();
@@ -174,7 +184,7 @@
175185 * @return
176186 */
177187 public String getRandomHost(IndexId iid){
178 - if(iid.isMySearch() && !UpdateThread.isBeingDeployed(iid))
 188+ if(iid.isMySearch() && !UpdateThread.isBeingDeployed(iid) && hasLocalSearcher(iid))
179189 return "localhost";
180190 if(!initialized.contains(iid.toString()))
181191 initializeRemote(iid);
@@ -296,19 +306,25 @@
297307 /**
298308 * Initialize all local searcher pools
299309 */
300 - protected void initializeLocal(){
301 - IndexRegistry registry = IndexRegistry.getInstance();
302 - HashSet<IndexId> mys = GlobalConfiguration.getInstance().getMySearch();
303 - for(IndexId iid : mys){
304 - try {
305 - // when searcher is linked into "search" path it's good, initialize it
306 - if(!iid.isLogical() && registry.getCurrentSearch(iid) != null){
307 - log.debug("Initializing local for "+iid);
308 - IndexSearcherMul[] pool = getLocalSearcherPool(iid);
309 - RMIServer.bind(iid,pool);
 310+ protected class InitialDeploymentThread extends Thread {
 311+ public void run(){
 312+ IndexRegistry registry = IndexRegistry.getInstance();
 313+ HashSet<IndexId> mys = GlobalConfiguration.getInstance().getMySearch();
 314+ for(IndexId iid : mys){
 315+ try {
 316+ // when searcher is linked into "search" path it's good, initialize it
 317+ if(!iid.isLogical() && registry.getCurrentSearch(iid) != null){
 318+ log.debug("Initializing local for "+iid);
 319+ SearcherPool pool = initLocalPool(iid);
 320+ Warmup.warmupPool(pool.searchers,iid,false,1);
 321+ Warmup.waitForAggregate(pool.searchers);
 322+ localCache.put(iid.toString(),pool);
 323+
 324+ RMIServer.bind(iid,pool.searchers);
 325+ }
 326+ } catch (IOException e) {
 327+ log.warn("I/O error warming index for "+iid+" : "+e.getMessage());
310328 }
311 - } catch (IOException e) {
312 - log.warn("I/O error warming index for "+iid+" : "+e.getMessage());
313329 }
314330 }
315331 }
@@ -332,8 +348,8 @@
333349 SearcherPool pool = localCache.get(iid.toString());
334350 if(pool == null){
335351 // try to init
336 - initLocalPool(iid);
337 - pool = localCache.get(iid.toString());
 352+ pool = initLocalPool(iid);
 353+ localCache.put(iid.toString(),pool);
338354 }
339355
340356 if(pool == null)
@@ -343,7 +359,7 @@
344360 }
345361
346362 /** Make local searcher pool */
347 - protected void initLocalPool(IndexId iid) throws IOException{
 363+ protected SearcherPool initLocalPool(IndexId iid) throws IOException{
348364 synchronized(iid.getSearcherCacheLock()){
349365 // make sure some other thread has not opened the searcher
350366 if(localCache.get(iid.toString()) == null){
@@ -351,9 +367,9 @@
352368 throw new IOException(iid+" is not searched by this host.");
353369 if(iid.isLogical())
354370 throw new IOException(iid+": will not open logical index.");
355 - SearcherPool pool = new SearcherPool(iid,iid.getCanonicalSearchPath(),searchPoolSize);
356 - localCache.put(iid.toString(),pool);
357 - }
 371+ return new SearcherPool(iid,iid.getCanonicalSearchPath(),searchPoolSize);
 372+ } else
 373+ return localCache.get(iid.toString());
358374 }
359375 }
360376
@@ -389,7 +405,7 @@
390406 protected SearcherCache(boolean initialize){
391407 searchPoolSize = Configuration.open().getInt("SearcherPool","size",1);
392408 if(initialize)
393 - initializeLocal();
 409+ new InitialDeploymentThread().start();
394410 }
395411
396412 public int getSearchPoolSize() {
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/SearchEngine.java
@@ -111,12 +111,8 @@
112112 searchOnly = true;
113113 NamespaceFilter namespaces = new NamespaceFilter((String)query.get("namespaces"));
114114 SearchResults res = search(iid, searchterm, offset, limit, iwoffset, iwlimit, namespaces, what.equals("explain"), exactCase, false, searchOnly);
115 - if(res!=null && res.isRetry()){
116 - int retries = 0;
117 - if(iid.isSplit() || iid.isNssplit()){
118 - retries = iid.getSplitFactor()-2;
119 - } else if(iid.isMainsplit())
120 - retries = 1;
 115+ /*if(res!=null && res.isRetry()){
 116+ int retries = 1;
121117
122118 while(retries > 0 && res.isRetry()){
123119 res = search(iid, searchterm, offset, limit, iwoffset, iwlimit, namespaces, what.equals("explain"), exactCase, false, searchOnly);
@@ -124,7 +120,7 @@
125121 }
126122 if(res.isRetry())
127123 res.setErrorMsg("Internal error, too many internal retries.");
128 - }
 124+ } */
129125 return res;
130126 } else if (what.equals("raw") || what.equals("rawexplain")) {
131127 int offset = 0, limit = 100; boolean exactCase = false;
@@ -427,8 +423,6 @@
428424 TermDocs td1 = reader.termDocs(new Term("key",r));
429425 if(td1.next()){
430426 PrefixMatch m = new PrefixMatch(reader.document(td1.doc()).get("article"));
431 - if(r.equals(key))
432 - m.score *= PrefixIndexBuilder.EXACT_BOOST; // exact boost
433427 results.add(m);
434428
435429 }
@@ -912,6 +906,8 @@
913907
914908 /** Highlight search results, and set the property in ResultSet */
915909 protected void highlight(IndexId iid, Query q, ArrayList<String> words, WikiSearcher searcher, Term[] terms, SearchResults res, boolean exactCase, boolean sortByPhrases, boolean alwaysIncludeFirst) throws IOException{
 910+ if(terms == null)
 911+ return;
916912 int[] df = searcher.docFreqs(terms);
917913 int maxDoc = searcher.maxDoc();
918914 highlight(iid,q,words,terms,df,maxDoc,res,exactCase,null,sortByPhrases,alwaysIncludeFirst);
@@ -920,6 +916,8 @@
921917 /** Highlight search results, and set the property in ResultSet */
922918 protected void highlight(IndexId iid, Query q, ArrayList<String> words, IndexSearcherMul searcher, SearchResults res, boolean sortByPhrases, boolean alwaysIncludeFirst) throws IOException{
923919 Term[] terms = getTerms(q,"contents");
 920+ if(terms == null)
 921+ return;
924922 int[] df = searcher.docFreqs(terms);
925923 int maxDoc = searcher.maxDoc();
926924 highlight(iid,q,words,terms,df,maxDoc,res,false,null,sortByPhrases,alwaysIncludeFirst);
@@ -928,6 +926,8 @@
929927 /** Highlight search results from titles index */
930928 protected void highlightTitles(IndexId iid, Query q, ArrayList<String> words, IndexSearcherMul searcher, SearchResults res, boolean sortByPhrases, boolean alwaysIncludeFirst) throws IOException{
931929 Term[] terms = getTerms(q,"alttitle");
 930+ if(terms == null)
 931+ return;
932932 int[] df = searcher.docFreqs(terms);
933933 int maxDoc = searcher.maxDoc();
934934 highlight(iid,q,words,terms,df,maxDoc,res,false,searcher.getIndexReader(),sortByPhrases,alwaysIncludeFirst);
@@ -937,6 +937,8 @@
938938 /** Highlight search results from titles index using a wikisearcher */
939939 protected void highlightTitles(IndexId iid, Query q, ArrayList<String> words, WikiSearcher searcher, SearchResults res, boolean sortByPhrases, boolean alwaysIncludeFirst) throws IOException{
940940 Term[] terms = getTerms(q,"alttitle");
 941+ if(terms == null)
 942+ return;
941943 int[] df = searcher.docFreqs(terms);
942944 int maxDoc = searcher.maxDoc();
943945 highlight(iid,q,words,terms,df,maxDoc,res,false,null,sortByPhrases,alwaysIncludeFirst);
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/UpdateThread.java
@@ -319,7 +319,7 @@
320320 beingDeployed.add(iid.toString());
321321 try{
322322 RMIServer.unbind(iid,cache.getLocalSearcherPool(iid));
323 - } catch(IOException e) {
 323+ } catch(Exception e) {
324324 // we gave it a shot...
325325 }
326326 cache.updateLocalSearcherPool(iid,null);
@@ -330,14 +330,18 @@
331331 // do some typical queries to preload some lucene caches, pages into memory, etc..
332332 for(IndexSearcherMul is : pool.searchers){
333333 try{
334 - Warmup.warmupIndexSearcher(is,li.iid,true);
 334+ // do one to trigger caching
 335+ Warmup.warmupIndexSearcher(is,li.iid,true,1);
 336+ Warmup.waitForAggregate(pool.searchers);
 337+ // do proper warmup
 338+ Warmup.warmupIndexSearcher(is,li.iid,true,null);
335339 } catch(IOException e){
336340 e.printStackTrace();
337341 log.warn("Error warmup up "+li+" : "+e.getMessage());
338342 }
339343 }
340 - Warmup.waitForAggregate(pool.searchers);
341344
 345+
342346 // add to cache
343347 cache.updateLocalSearcherPool(li.iid,pool);
344348 if( reroute ){
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/index/IndexThread.java
@@ -15,6 +15,7 @@
1616 import java.util.ArrayList;
1717 import java.util.Collection;
1818 import java.util.Collections;
 19+import java.util.Comparator;
1920 import java.util.Date;
2021 import java.util.Enumeration;
2122 import java.util.HashSet;
@@ -22,6 +23,7 @@
2324 import java.util.List;
2425 import java.util.Set;
2526 import java.util.Map.Entry;
 27+import java.util.concurrent.locks.Lock;
2628
2729 import org.apache.log4j.Logger;
2830 import org.apache.lucene.analysis.SimpleAnalyzer;
@@ -41,6 +43,7 @@
4244 import org.wikimedia.lsearch.ranks.Links;
4345 import org.wikimedia.lsearch.util.Command;
4446 import org.wikimedia.lsearch.util.FSUtils;
 47+import org.wikimedia.lsearch.util.ProgressReport;
4548 import org.wikimedia.lsearch.util.StringUtils;
4649
4750 /**
@@ -160,7 +163,7 @@
161164 *
162165 */
163166 protected void makeSnapshots() {
164 - HashSet<IndexId> indexes = WikiIndexModifier.getModifiedIndexes();
 167+ ArrayList<IndexId> indexes = new ArrayList<IndexId>();
165168 IndexRegistry registry = IndexRegistry.getInstance();
166169
167170 ArrayList<Pattern> pat = new ArrayList<Pattern>();
@@ -181,19 +184,35 @@
182185 if(indexdir.exists())
183186 indexes.add(iid);
184187 }
 188+ // nicely alphabetically sort
 189+ Collections.sort(indexes, new Comparator<IndexId>() {
 190+ public int compare(IndexId o1, IndexId o2) {
 191+ return o1.toString().compareTo(o2.toString());
 192+ }
 193+ });
185194 HashSet<IndexId> badOptimization = new HashSet<IndexId>();
186195 // optimize all
187196 for( IndexId iid : indexes ){
 197+ Lock lock = null;
188198 try{
189199 if(iid.isLogical())
190200 continue;
191 - if(matchesPattern(pat,iid))
 201+ if(matchesPattern(pat,iid)){
 202+ // enforce outer transaction lock to connect optimization & snapshot
 203+ lock = iid.getTransactionLock(IndexId.Transaction.INDEX);
 204+ lock.lock();
192205 optimizeIndex(iid);
193 -
 206+ makeIndexSnapshot(iid,iid.getIndexPath());
 207+ lock.unlock();
 208+ lock = null;
 209+ }
194210 } catch(IOException e){
195211 e.printStackTrace();
196212 log.error("Error optimizing index "+iid);
197213 badOptimization.add(iid);
 214+ } finally {
 215+ if(lock != null)
 216+ lock.unlock();
198217 }
199218 }
200219 // snapshot all
@@ -201,11 +220,10 @@
202221 if(iid.isLogical() || badOptimization.contains(iid))
203222 continue;
204223 if(matchesPattern(pat,iid)){
205 - makeIndexSnapshot(iid,iid.getIndexPath());
 224+
206225 registry.refreshSnapshots(iid);
207226 }
208227 }
209 -
210228 }
211229
212230 private boolean matchesPattern(ArrayList<Pattern> pat, IndexId iid) {
@@ -226,7 +244,7 @@
227245 String timestamp = df.format(new Date(System.currentTimeMillis()));
228246 if(iid.isLogical())
229247 return;
230 -
 248+ boolean delSnapshots = Configuration.open().getBoolean("Index","delsnapshots") && !iid.isRelated();
231249 log.info("Making snapshot for "+iid);
232250 String snapshotdir = iid.getSnapshotPath();
233251 String snapshot = snapshotdir+sep+timestamp;
@@ -236,17 +254,22 @@
237255 if(spd.exists() && spd.isDirectory()){
238256 File[] files = spd.listFiles();
239257 for(File f: files){
240 - if(!f.getAbsolutePath().equals(li.path)) // leave the last snapshot
241 - FSUtils.deleteRecursive(f);
 258+ if(f.getAbsolutePath().equals(li.path) && !delSnapshots)
 259+ continue; // leave last snapshot
 260+ FSUtils.deleteRecursive(f);
242261 }
243262 }
244263 new File(snapshot).mkdirs();
245 - try {
246 - FSUtils.createHardLinkRecursive(indexPath,snapshot);
247 - } catch (IOException e) {
248 - e.printStackTrace();
249 - log.error("Error making snapshot "+snapshot+": "+e.getMessage());
250 - return;
 264+ File ind =new File(indexPath);
 265+ for(File f: ind.listFiles()){
 266+ // use a cp -lr command for each file in the index
 267+ try {
 268+ FSUtils.createHardLinkRecursive(indexPath+sep+f.getName(),snapshot+sep+f.getName(),true);
 269+ } catch (IOException e) {
 270+ e.printStackTrace();
 271+ log.error("Error making snapshot "+snapshot+": "+e.getMessage());
 272+ return;
 273+ }
251274 }
252275 IndexRegistry.getInstance().refreshSnapshots(iid);
253276 log.info("Made snapshot "+snapshot);
@@ -263,21 +286,21 @@
264287 return;
265288 if(iid.getBooleanParam("optimize",true)){
266289 try {
 290+ Transaction trans = new Transaction(iid,transType);
 291+ trans.begin();
267292 IndexReader reader = IndexReader.open(path);
268293 if(!reader.isOptimized()){
269294 reader.close();
270295 log.info("Optimizing "+iid);
271296 long start = System.currentTimeMillis();
272 - Transaction trans = new Transaction(iid,transType);
273 - trans.begin();
274297 IndexWriter writer = new IndexWriter(path,new SimpleAnalyzer(),false);
275298 writer.optimize();
276 - writer.close();
277 - trans.commit();
 299+ writer.close();
278300 long delta = System.currentTimeMillis() - start;
279 - log.info("Optimized "+iid+" in "+delta+" ms");
 301+ log.info("Optimized "+iid+" in "+ProgressReport.formatTime(delta));
280302 } else
281303 reader.close();
 304+ trans.commit();
282305 } catch (IOException e) {
283306 log.error("Could not optimize index at "+path+" : "+e.getMessage());
284307 throw e;
@@ -299,17 +322,26 @@
300323 HashSet<String> add = new HashSet<String>();
301324 if(records.length > 0){
302325 IndexId iid = records[0].getIndexId(); // we asume all are on same iid
303 - Links links = Links.openForBatchModifiation(iid);
304 - // update links
305 - links.batchUpdate(records);
306 - WikiIndexModifier.fetchLinksInfo(iid,records,links);
307 - // get additional
308 - add.addAll(WikiIndexModifier.fetchAdditional(iid,records,links));
309 - links.close();
310 -
311 - for(IndexUpdateRecord r : records){
312 - enqueue(r);
313 - }
 326+ // get exclusive lock to make sure nothing funny is going on with the index
 327+ Lock lock = iid.getLinks().getTransactionLock(IndexId.Transaction.INDEX);
 328+ lock.lock();
 329+ try{
 330+ // FIXME: there should be some kind of failed previous transaction check here
 331+ // works for now because we first do updates, but could easily break in future
 332+ Links links = Links.openForBatchModifiation(iid);
 333+ // update links
 334+ links.batchUpdate(records);
 335+ WikiIndexModifier.fetchLinksInfo(iid,records,links);
 336+ // get additional
 337+ add.addAll(WikiIndexModifier.fetchAdditional(iid,records,links));
 338+ links.close();
 339+
 340+ for(IndexUpdateRecord r : records){
 341+ enqueue(r);
 342+ }
 343+ } finally{
 344+ lock.unlock();
 345+ }
314346 }
315347
316348 return add;
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/index/Transaction.java
@@ -5,6 +5,7 @@
66 import java.io.FileOutputStream;
77 import java.io.IOException;
88 import java.util.Properties;
 9+import java.util.concurrent.locks.Lock;
910
1011 import org.apache.log4j.Logger;
1112 import org.wikimedia.lsearch.config.Configuration;
@@ -28,10 +29,12 @@
2930 protected IndexId iid;
3031 protected boolean inTransaction;
3132 protected IndexId.Transaction type;
 33+ protected Lock lock;
3234
3335 public Transaction(IndexId iid, IndexId.Transaction type){
3436 this.iid = iid;
3537 this.type = type;
 38+ this.lock = iid.getTransactionLock(type);
3639 inTransaction = false;
3740 }
3841
@@ -40,6 +43,8 @@
4144 * if not, will return index to consistent state.
4245 */
4346 public void begin(){
 47+ // acquire lock, this will serialize transactions on indexes
 48+ lock.lock();
4449 File backup = new File(getBackupDir());
4550 File info = new File(getInfoFile());
4651 if(backup.exists() && info.exists()){
@@ -62,7 +67,7 @@
6368 backup.getParentFile().mkdirs();
6469 try{
6570 // make a copy
66 - FSUtils.createHardLinkRecursive(iid.getPath(type),backup.getAbsolutePath());
 71+ FSUtils.createHardLinkRecursive(iid.getPath(type),backup.getAbsolutePath(),true);
6772 Properties prop = new Properties();
6873 // write out the status file
6974 prop.setProperty("status","started at "+System.currentTimeMillis());
@@ -74,6 +79,7 @@
7580 log.info("Transaction on index "+iid+" started");
7681 } catch(Exception e){
7782 log.error("Error while intializing transaction: "+e.getMessage());
 83+ lock.unlock();
7884 }
7985 }
8086
@@ -141,19 +147,27 @@
142148 * Commit changes to index.
143149 */
144150 public void commit(){
145 - cleanup();
146 - inTransaction = false;
147 - log.info("Successfully commited changes on "+iid);
 151+ try{
 152+ cleanup();
 153+ inTransaction = false;
 154+ log.info("Successfully commited changes on "+iid);
 155+ } finally{
 156+ lock.unlock();
 157+ }
148158 }
149159
150160 /**
151161 * Rollback changes to index. Returns to previous consistent state.
152162 */
153163 public void rollback(){
154 - if(inTransaction){
155 - recover();
156 - inTransaction = false;
157 - log.info("Succesfully rollbacked changes on "+iid);
 164+ try{
 165+ if(inTransaction){
 166+ recover();
 167+ inTransaction = false;
 168+ log.info("Succesfully rollbacked changes on "+iid);
 169+ }
 170+ } finally{
 171+ lock.unlock();
158172 }
159173 }
160174
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/WikiQueryParser.java
@@ -1134,7 +1134,7 @@
11351135 hterms.removeAll(forbiddenTerms);
11361136 highlightTerms = hterms.toArray(new Term[] {});
11371137
1138 - if(options.coreQueryOnly || words == null)
 1138+ if(options.coreQueryOnly || words == null || (expandedWordsContents.size()==0 && expandedWordsTitle.size()==0))
11391139 return bq;
11401140
11411141 // filter out stop words to SHOULD (this enables queries in form of question)
@@ -1338,7 +1338,7 @@
13391339 defaultAliasBoost = ALIAS_BOOST;
13401340
13411341
1342 - if(qt == qs) // either null, or category query
 1342+ if(qt==qs || qt.equals(qs)) // either null, or category query
13431343 return qt;
13441344 if(qt == null)
13451345 return qs;
@@ -1797,29 +1797,15 @@
17981798
17991799 BooleanQuery full = new BooleanQuery(true);
18001800 full.add(q,Occur.MUST);
1801 -
1802 - /*if(words != null || words.size() > 0){
1803 - // main relevance
1804 - Query redirects = makeAlttitleForRedirects(words,20,1);
1805 - if(redirects != null)
1806 - full.add(redirects,Occur.SHOULD);
18071801
1808 - // singular words
1809 - ArrayList<String> singularWords = makeSingularWords(words);
1810 - if(singularWords != null){
1811 - Query redirectsSing = makeAlttitleForRedirects(singularWords,20,0.8f);
1812 - if(redirectsSing != null)
1813 - full.add(redirectsSing,Occur.SHOULD);
1814 - }
1815 - } */
 1802+ if(expandedWordsTitle.size() == 0)
 1803+ return full;
18161804
18171805 // fuzzy & wildcards
18181806 // NOTE: for these to work parseForTitles needs to called after parse()
1819 - //if(hasWildcards() || hasFuzzy()){
18201807 Query redirectsMulti = makeAlttitleForRedirectsMulti(expandedWordsTitle,expandedBoostTitle,expandedTypes,20,1f);
18211808 if(redirectsMulti != null)
18221809 full.add(redirectsMulti,Occur.SHOULD);
1823 - //}
18241810
18251811 // add another for complete matches
18261812 BooleanQuery wrap = new BooleanQuery(true);
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/oai/IncrementalUpdater.java
@@ -85,10 +85,16 @@
8686 * @param args
8787 */
8888 public static void main(String[] args){
 89+ // config
 90+ Configuration config = Configuration.open();
 91+ GlobalConfiguration global = GlobalConfiguration.getInstance();
 92+
8993 ArrayList<String> dbnames = new ArrayList<String>();
9094 boolean daemon = false;
9195 long sleepTime = 30000; // 30s
9296 String timestamp = null;
 97+ String excludeFile = null;
 98+ boolean useLocal = false;
9399
94100 String dblist = null;
95101 boolean notification = true;
@@ -108,8 +114,12 @@
109115 defaultTimestamp = args[++i];
110116 else if(args[i].equals("-f"))
111117 dblist = args[++i];
 118+ else if(args[i].equals("-l"))
 119+ useLocal = true;
112120 else if(args[i].equals("-e"))
113121 excludeList.add(args[++i]);
 122+ else if(args[i].equals("-ef"))
 123+ excludeFile = args[++i];
114124 else if(args[i].equals("-n"))
115125 notification = true;
116126 else if(args[i].equals("--help"))
@@ -120,21 +130,10 @@
121131 } else
122132 dbnames.add(args[i]);
123133 }
124 - if(dblist != null){
125 - try {
126 - BufferedReader file = new BufferedReader(new FileReader(dblist));
127 - String line;
128 - while((line = file.readLine()) != null)
129 - dbnames.add(line.trim());
130 - file.close();
131 - } catch (FileNotFoundException e) {
132 - System.out.println("Error: File "+dblist+" does not exist");
133 - return;
134 - } catch (IOException e) {
135 - System.out.println("Error: I/O error reading dblist file "+dblist);
136 - return;
137 - }
138 - }
 134+ if(useLocal)
 135+ dbnames.addAll(global.getMyIndexDBnames());
 136+ dbnames.addAll(readDBList(dblist));
 137+ excludeList.addAll(readDBList(excludeFile));
139138 if(dbnames.size() == 0){
140139 System.out.println("Syntax: java IncrementalUpdater [-d] [-s sleep] [-t timestamp] [-e dbname] [-f dblist] [-n] [--no-ranks] dbname1 dbname2 ...");
141140 System.out.println("Options:");
@@ -143,13 +142,13 @@
144143 System.out.println(" -t - timestamp to start from");
145144 System.out.println(" -dt - default timestamp (default: "+defaultTimestamp+")");
146145 System.out.println(" -f - dblist file, one dbname per line");
 146+ System.out.println(" -l - use all local dbnames");
147147 System.out.println(" -n - wait for notification of flush after done updating one db (default: "+notification+")");
148148 System.out.println(" -e - exclude dbname from incremental updates (overrides -f)");
 149+ System.out.println(" -ef - exclude db names listed in dblist file");
 150+
149151 return;
150152 }
151 - // config
152 - Configuration config = Configuration.open();
153 - GlobalConfiguration global = GlobalConfiguration.getInstance();
154153 // preload
155154 UnicodeDecomposer.getInstance();
156155 for(String dbname: dbnames){
@@ -279,6 +278,26 @@
280279 } while(daemon);
281280 }
282281
 282+ private static Collection<String> readDBList(String dblist) {
 283+ ArrayList<String> dbnames = new ArrayList<String>();
 284+ if(dblist != null){
 285+ try {
 286+ BufferedReader file = new BufferedReader(new FileReader(dblist));
 287+ String line;
 288+ while((line = file.readLine()) != null)
 289+ dbnames.add(line.trim());
 290+ file.close();
 291+ } catch (FileNotFoundException e) {
 292+ System.out.println("Error: File "+dblist+" does not exist");
 293+ System.exit(1);
 294+ } catch (IOException e) {
 295+ System.out.println("Error: I/O error reading dblist file "+dblist);
 296+ System.exit(1);
 297+ }
 298+ }
 299+ return dbnames;
 300+ }
 301+
283302 private static void printRecords(ArrayList<IndexUpdateRecord> records) {
284303 for(IndexUpdateRecord rec : records){
285304 Article ar = rec.getArticle();
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/util/FSUtils.java
@@ -14,15 +14,18 @@
1515 public class FSUtils {
1616 public static final String PATH_SEP = System.getProperty("file.separator");
1717
18 - enum OSType { OS_TYPE_UNIX, OS_TYPE_WINXP };
 18+ enum OSType { OS_TYPE_UNIX, OS_TYPE_WINXP, OS_TYPE_LINUX };
1919
2020 protected static String[] hardLinkCommand;
 21+ protected static String[] hardLinkRecursive = null;
2122
2223 static {
2324 switch(getOSType()) {
2425 case OS_TYPE_WINXP:
2526 hardLinkCommand = new String[] {"fsutil","hardlink","create", null, null};
2627 break;
 28+ case OS_TYPE_LINUX:
 29+ hardLinkRecursive = new String[] {"cp", "-lr", null, null};
2730 case OS_TYPE_UNIX:
2831 default:
2932 hardLinkCommand = new String[] {"ln", "-f", null, null};
@@ -34,6 +37,8 @@
3538 if (osName.indexOf("Windows") >= 0 &&
3639 (osName.indexOf("XP") >= 0 || osName.indexOf("2003") >= 0))
3740 return OSType.OS_TYPE_WINXP;
 41+ else if(osName.indexOf("Linux")>=0)
 42+ return OSType.OS_TYPE_LINUX;
3843 else
3944 return OSType.OS_TYPE_UNIX;
4045 }
@@ -49,12 +54,21 @@
5055 * @param to
5156 * @throws IOException
5257 */
53 - public static synchronized void createHardLink(File from, File to) throws IOException {
54 - int len = hardLinkCommand.length;
55 - hardLinkCommand[len-2] = from.getCanonicalPath();
56 - hardLinkCommand[len-1] = to.getCanonicalPath();
57 - Command.exec(hardLinkCommand);
 58+ public static void createHardLink(File from, File to) throws IOException {
 59+ String[] command = hardLinkCommand.clone();
 60+ int len = command.length;
 61+ command[len-2] = from.getCanonicalPath();
 62+ command[len-1] = to.getCanonicalPath();
 63+ Command.exec(command);
5864 }
 65+
 66+ protected static void createHardLinkRecursive(File from, File to) throws IOException {
 67+ String[] command = hardLinkRecursive.clone();
 68+ int len = command.length;
 69+ command[len-2] = from.getCanonicalPath();
 70+ command[len-1] = to.getCanonicalPath();
 71+ Command.exec(command);
 72+ }
5973
6074 /**
6175 * Create hard links recursively if the target is a directory
@@ -64,18 +78,36 @@
6579 * @throws IOException
6680 */
6781 public static void createHardLinkRecursive(String from, String to) throws IOException {
 82+ createHardLinkRecursive(from,to,false);
 83+ }
 84+
 85+ /**
 86+ * Creates hard link, with additional option if to use cp -lr since it's default
 87+ * behavior differs from that of ln -f when the destination is a directory.
 88+ *
 89+ * In most non-critical application, the you might want to slowish but predicatable version
 90+ *
 91+ * @param fast
 92+ * @throws IOException
 93+ */
 94+ public static void createHardLinkRecursive(String from, String to, boolean fast) throws IOException {
6895 //System.out.println("Hard-linking "+from+" -> "+to);
6996 File file = new File(from);
7097 if(!file.exists())
7198 throw new IOException("Trying to hardlink nonexisting file "+from);
7299 // snsure we can make the target
73100 new File(to).getParentFile().mkdirs();
74 - if(file.isDirectory()){
75 - File[] files = file.listFiles();
76 - for(File f: files)
77 - createHardLinkRecursive(format(new String[]{from,f.getName()}),format(new String[] {to,f.getName()}));
78 - } else
79 - createHardLink(new File(from),new File(to));
 101+ if(fast && hardLinkRecursive != null){
 102+ // do a quick cp -lr if it's supported
 103+ createHardLinkRecursive(new File(from),new File(to));
 104+ } else{
 105+ if(file.isDirectory()){
 106+ File[] files = file.listFiles();
 107+ for(File f: files)
 108+ createHardLinkRecursive(format(new String[]{from,f.getName()}),format(new String[] {to,f.getName()}));
 109+ } else
 110+ createHardLink(new File(from),new File(to));
 111+ }
80112 }
81113
82114
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/ranks/LinkReader.java
@@ -58,7 +58,8 @@
5959 public void writeEndPage() throws IOException {
6060 Title t = new Title(page.Title.Namespace,page.Title.Text);
6161 try{
62 - links.addArticleInfo(revision.Text,t,exactCase,Integer.toString(page.Id));
 62+ if( page.Title.Namespace >= 0)
 63+ links.addArticleInfo(revision.Text,t,exactCase,Integer.toString(page.Id));
6364 } catch(Exception e){
6465 log.error("Error adding article "+t+" : "+e.getMessage());
6566 e.printStackTrace();
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/spell/api/TitleNgramIndexer.java
@@ -143,5 +143,6 @@
144144 log.info("Optimizing...");
145145 indexer.closeAndOptimize();
146146 indexer.snapshot();
 147+ links.close();
147148 }
148149 }
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/spell/CleanIndexImporter.java
@@ -105,6 +105,7 @@
106106
107107 public void closeIndex() throws IOException {
108108 writer.closeAndOptimize();
 109+ links.close();
109110 }
110111
111112
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/related/RelatedBuilder.java
@@ -46,33 +46,40 @@
4747 static Logger log = Logger.getLogger(RelatedBuilder.class);
4848
4949 public static void main(String[] args) {
50 - String dbname = null;
 50+ ArrayList<String> dbnames = new ArrayList<String>();
5151 System.out.println("MediaWiki lucene-search indexer - build a map of related articles.\n");
5252
5353 Configuration.open();
54 - GlobalConfiguration.getInstance();
 54+ GlobalConfiguration global = GlobalConfiguration.getInstance();
5555 if(args.length != 1){
56 - System.out.println("Syntax: java RelatedBuilder <dbname>");
 56+ System.out.println("Syntax: java RelatedBuilder [-l] <dbname>");
 57+ System.out.println("Options:");
 58+ System.out.println(" -l - rebuild all local wikis");
5759 return;
5860 }
59 - dbname = args[0];
60 - IndexId iid = IndexId.get(dbname);
61 - if(iid == null){
62 - System.out.println("Invalid dbname "+iid);
63 - return;
 61+
 62+ for(int i=0;i<args.length;i++){
 63+ if(args[i].equals("-l"))
 64+ dbnames.addAll(global.getMyIndexDBnames());
 65+ else dbnames.add(args[i]);
6466 }
65 - long start = System.currentTimeMillis();
66 - try {
67 - rebuildFromLinks(iid);
68 - } catch (IOException e) {
69 - log.fatal("Rebuild I/O error: "+e.getMessage());
70 - e.printStackTrace();
71 - return;
72 - }
73 -
74 - long end = System.currentTimeMillis();
 67+ Collections.sort(dbnames);
 68+ for(String dbname : dbnames){
 69+ IndexId iid = IndexId.get(dbname);
7570
76 - System.out.println("Finished generating related in "+formatTime(end-start));
 71+ long start = System.currentTimeMillis();
 72+ try {
 73+ rebuildFromLinks(iid);
 74+ } catch (IOException e) {
 75+ log.fatal("Rebuild I/O error: "+e.getMessage());
 76+ e.printStackTrace();
 77+ continue;
 78+ }
 79+
 80+ long end = System.currentTimeMillis();
 81+
 82+ System.out.println("Finished generating related in "+formatTime(end-start));
 83+ }
7784 }
7885
7986 /** Calculate from links index */
@@ -116,6 +123,7 @@
117124 store.addRelated(key,related);
118125 }
119126 store.snapshot();
 127+ links.close();
120128 }
121129
122130
Index: branches/lucene-search-2.1/build.xml
@@ -7,10 +7,10 @@
88 <property name="dist" location="dist"/>
99 <property name="pack.name" value="lucene-search-2.1"/>
1010 <property name="src.name" value="lucene-search-src-2.1"/>
11 - <property name="binary.name" value="ls2-bin"/>
 11+ <property name="binary.name" value="ls2.1-bin"/>
1212 <property name="jar.name" value="LuceneSearch.jar"/>
13 - <property name="include" value="src/** lib/** sql/** test-data/** webinterface/** *-example *.txt lsearch* build.xml scripts/*"/>
14 - <property name="include.src" value="src/** sql/** build.xml scripts/* webinterface/*"/>
 13+ <property name="include" value="src/** lib/** sql/** test-data/** webinterface/** *-example *.txt lsearch* build.xml scripts/* VERSION"/>
 14+ <property name="include.src" value="src/** sql/** build.xml scripts/* webinterface/* VERSION"/>
1515
1616 <property file="${basedir}/hostname"/>
1717
Index: branches/lucene-search-2.1/lsearch-global.conf
@@ -34,7 +34,7 @@
3535 # Mulitple hosts can search multiple dbs (N-N mapping)
3636 [Search-Group]
3737 oblak : wikilucene* wikidev* ja* wiki-*
38 -#oblak : wikilucene*1 wikilucene*2 wikilucene*3
 38+oblak : wikilucene*1 wikilucene*2 wikilucene*3
3939
4040 # Index nodes
4141 # host: db1.part db2.part
@@ -72,7 +72,7 @@
7373 # Below are suffixes (or whole names) with various scaling strength
7474 AgeScaling.strong=wikinews
7575 AgeScaling.medium=mediawikiwiki metawiki
76 -AgeScaling.weak=wiki wikilucene
 76+#AgeScaling.weak=wiki wikilucene
7777
7878 # Use additional per-article ranking data, more suitable for non-encyclopedias
7979 AdditionalRank.suffix=mediawikiwiki metawiki

Status & tagging log