r35955 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r35954‎ \| r35955 \| r35956 >
Date:	22:24, 5 June 2008
Author:	rainman
Status:	old
Tags:
Comment:	Minor stuff: * nicer locking mechanism for transactions, corner cases * option to use ramdirectory, option to delete all snapshots * optimize hardlinking on indexer * -l option to magically figure out indexes to incrementaly update * explicitely close indexes on builds to avoid too many files open
Modified paths:	/branches/lucene-search-2.1/build.xml (modified) (history) /branches/lucene-search-2.1/lsearch-global.conf (modified) (history) /branches/lucene-search-2.1/lsearch.conf (modified) (history) /branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/WikiQueryParser.java (modified) (history) /branches/lucene-search-2.1/src/org/wikimedia/lsearch/config/GlobalConfiguration.java (modified) (history) /branches/lucene-search-2.1/src/org/wikimedia/lsearch/config/IndexId.java (modified) (history) /branches/lucene-search-2.1/src/org/wikimedia/lsearch/importer/Importer.java (modified) (history) /branches/lucene-search-2.1/src/org/wikimedia/lsearch/index/IndexThread.java (modified) (history) /branches/lucene-search-2.1/src/org/wikimedia/lsearch/index/Transaction.java (modified) (history) /branches/lucene-search-2.1/src/org/wikimedia/lsearch/oai/IncrementalUpdater.java (modified) (history) /branches/lucene-search-2.1/src/org/wikimedia/lsearch/prefix/PrefixIndexBuilder.java (modified) (history) /branches/lucene-search-2.1/src/org/wikimedia/lsearch/ranks/LinkReader.java (modified) (history) /branches/lucene-search-2.1/src/org/wikimedia/lsearch/related/RelatedBuilder.java (modified) (history) /branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/AggregateMetaField.java (modified) (history) /branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/SearchEngine.java (modified) (history) /branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/SearcherCache.java (modified) (history) /branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/UpdateThread.java (modified) (history) /branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/Warmup.java (modified) (history) /branches/lucene-search-2.1/src/org/wikimedia/lsearch/spell/CleanIndexImporter.java (modified) (history) /branches/lucene-search-2.1/src/org/wikimedia/lsearch/spell/api/TitleNgramIndexer.java (modified) (history) /branches/lucene-search-2.1/src/org/wikimedia/lsearch/util/FSUtils.java (modified) (history)

Diff [purge]

Index: branches/lucene-search-2.1/lsearch.conf
—	—	@@ -18,7 +18,7 @@
19	19	Rsync.path=/usr/bin/rsync
20	20
21	21	# Extra params for rsync
22		~~-# Rsync.params=--bwlimit=4096~~
	22	+# Rsync.params=--bwlimit=8192
23	23
24	24	################################################
25	25	# Search node related configuration
—	—	@@ -43,6 +43,9 @@
44	44	# if to wait for aggregates to warm up before deploying the searcher
45	45	Search.warmupaggregate=false
46	46
	47	+# cache whole index in RAM
	48	+Search.ramdirectory=false
	49	+
47	50	################################################
48	51	# Indexer related configuration
49	52	################################################
—	—	@@ -62,41 +65,16 @@
63	66	# Maximal time an update can remain in queue before being processed (in seconds)
64	67	Index.maxqueuetimeout=12
65	68
66		~~-################################################~~
67		~~-# Storage backend (currently mysql)~~
68		~~-################################################~~
	69	+# If to delete all old snapshots always (default to false - leaves the last good snapshot)
	70	+# Index.delsnapshots=true
69	71
70		~~-# host of database master~~
71		~~-Storage.master=localhost~~
72		-
73		~~-# array of host->load~~
74		~~-#Storage.slaves=host1->10 host2->50 host3->100~~
75		-
76		~~-# Storage.username=root~~
77		~~-# Storage.password=~~
78		-
79		~~-# Storage.adminuser=root~~
80		~~-# Storage.adminpass=~~
81		-
82		~~-# Values:~~
83		~~-# true - each dbname has a separate db of that name~~
84		~~-# false - each dbname is a prefix for tables in a default db (set default db below)~~
85		~~-Storage.useSeparateDBs=false~~
86		-
87		~~-# Default db where all the stuff will be stored (if useSeparateDB=false)~~
88		~~-Storage.defaultDB=lsearch~~
89		-
90		~~-# Where table definitions are~~
91		~~-Storage.lib=/var/www/html/lucene-search-2.0/sql~~
92		-
93		-
94	72	################################################
95	73	# Log, ganglia, localization
96	74	################################################
97	75
98	76	# If this host runs on multiple CPUs maintain a pool of index searchers
99	77	# It's good idea to make it number of CPUs+1, or some larger odd number
100		~~-SearcherPool.size=1~~
	78	+SearcherPool.size=3
101	79
102	80	# URL to MediaWiki message files
103	81	Localization.url=file:///var/www/html/wiki-lucene/phase3/languages/messages
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/prefix/PrefixIndexBuilder.java
—	—	@@ -371,6 +371,8 @@
372	372	public void close() throws IOException {
373	373	if(writer != null)
374	374	writer.close();
	375	+ if(links != null)
	376	+ links.close();
375	377	}
376	378
377	379	}
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/importer/Importer.java
—	—	@@ -186,7 +186,8 @@
187	187	reader.readDump();
188	188	end = System.currentTimeMillis();
189	189	log.info("Closing/optimizing index...");
190		~~- dp.closeIndex();~~
	190	+ dp.closeIndex();
	191	+ links.close();
191	192	} catch (IOException e) {
192	193	if(!e.getMessage().equals("stopped")){
193	194	log.fatal("I/O error processing dump for "+dbname+" from "+inputfile+" : "+e.getMessage());
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/config/GlobalConfiguration.java
—	—	@@ -18,6 +18,7 @@
19	19	import java.text.MessageFormat;
20	20	import java.util.ArrayList;
21	21	import java.util.Collection;
	22	+import java.util.Collections;
22	23	import java.util.Enumeration;
23	24	import java.util.HashMap;
24	25	import java.util.HashSet;
—	—	@@ -1294,6 +1295,19 @@
1295	1296
1296	1297	return ret;
1297	1298	}
	1299	+ /** Get all dbnames that are locally indexed */
	1300	+ public ArrayList<String> getMyIndexDBnames(){
	1301	+ HashSet<String> dbnames = new HashSet<String>();
	1302	+ ArrayList<String> dbnamesSorted = new ArrayList<String>();
	1303	+
	1304	+ for(IndexId iid : indexIdPool.values()){
	1305	+ if(iid.isMyIndex() && !iid.isTitlesBySuffix() && !iid.isSpell())
	1306	+ dbnames.add(iid.getDBname().toString());
	1307	+ }
	1308	+ dbnamesSorted.addAll(dbnames);
	1309	+ Collections.sort(dbnamesSorted);
	1310	+ return dbnamesSorted;
	1311	+ }
1298	1312
1299	1313	/** Get the name of the localhost as it appears in global configuration */
1300	1314	public String getLocalhost(){
—	—	@@ -1431,7 +1445,7 @@
1432	1446
1433	1447	// process $lang
1434	1448	String lang = getLanguage(dbname);
1435		~~- repo = repo.replace("$lang",lang);~~
	1449	+ repo = repo.replace("$lang",lang.replace('_','-'));
1436	1450	repo = repo += "?title=Special:OAIRepository";
1437	1451
1438	1452	return repo;
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/config/IndexId.java
—	—	@@ -5,6 +5,8 @@
6	6	import java.util.ArrayList;
7	7	import java.util.HashSet;
8	8	import java.util.Hashtable;
	9	+import java.util.concurrent.locks.Lock;
	10	+import java.util.concurrent.locks.ReentrantLock;
9	11
10	12	import org.apache.log4j.Logger;
11	13	import org.wikimedia.lsearch.analyzers.FilterFactory;
—	—	@@ -150,6 +152,9 @@
151	153	/** lock used in {@link SearcherCache} class */
152	154	protected Object searcherCacheLock = new Object();
153	155
	156	+ /** locks used to serialize transactions on different transaction paths */
	157	+ protected Hashtable<Transaction,Lock> transactionLocks = new Hashtable<Transaction,Lock>();
	158	+
154	159	/**
155	160	* Get index Id object given it's string representation, the actual object
156	161	* is pulled out of the GlobalConfigurations prepopulated pool of all possible
—	—	@@ -344,6 +349,9 @@
345	350	transactionPath.put(Transaction.INDEX,transRoot+"index");
346	351	transactionPath.put(Transaction.IMPORT,transRoot+"import");
347	352	transactionPath.put(Transaction.TEMP,transRoot+"temp");
	353	+ transactionLocks.put(Transaction.INDEX,new ReentrantLock());
	354	+ transactionLocks.put(Transaction.IMPORT,new ReentrantLock());
	355	+ transactionLocks.put(Transaction.TEMP,new ReentrantLock());
348	356	tempPath = localIndexPath + "temp" + sep + this.dbrole;
349	357
350	358	//if(mySearch){
—	—	@@ -924,5 +932,10 @@
925	933	public Object getSearcherCacheLock() {
926	934	return searcherCacheLock;
927	935	}
928		-
	936	+
	937	+ /** Get transaction lock for a transaction type */
	938	+ public Lock getTransactionLock(Transaction trans) {
	939	+ return transactionLocks.get(trans);
	940	+ }
	941	+
929	942	}
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/AggregateMetaField.java
—	—	@@ -91,6 +91,8 @@
92	92	set = new HashSet<String>();
93	93	cachingInProgress.put(reader.directory(),set);
94	94	}
	95	+ if(set.contains(field))
	96	+ return;
95	97	set.add(field);
96	98	}
97	99	try{
—	—	@@ -150,14 +152,14 @@
151	153	} catch(Exception e){
152	154	e.printStackTrace();
153	155	log.error("Whole caching failed on field="+field+", reader="+reader.directory());
154		~~- } finally{~~
155		~~- synchronized(cachingInProgress){~~
156		~~- Set<String> set = cachingInProgress.get(reader.directory());~~
157		~~- set.remove(field);~~
158		~~- if(set.size() == 0)~~
159		~~- cachingInProgress.remove(reader.directory());~~
160		~~- }~~
161	156	}
	157	+
	158	+ synchronized(cachingInProgress){
	159	+ Set<String> set = cachingInProgress.get(reader.directory());
	160	+ set.remove(field);
	161	+ if(set.size() == 0)
	162	+ cachingInProgress.remove(reader.directory());
	163	+ }
162	164	}
163	165	protected byte[] extendBytes(byte[] array){
164	166	return resizeBytes(array,array.length*2);
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/Warmup.java
—	—	@@ -74,9 +74,9 @@
75	75	/** If set in local config file waits for aggregate fields to finish caching */
76	76	public static void waitForAggregate(IndexSearcherMul[] pool){
77	77	try{
78		~~- boolean waitForAggregate = Configuration.open().getString("Search","warmupaggregate","false").equalsIgnoreCase("true");~~
	78	+ boolean waitForAggregate = true; //Configuration.open().getString("Search","warmupaggregate","false").equalsIgnoreCase("true");
79	79	if(waitForAggregate){ // wait for aggregate fields to be cached
80		~~- log.info("Wait for aggregate caches...");~~
	80	+ log.info("Waiting for aggregate caches on "+pool[0].getIndexReader().directory());
81	81	boolean wait;
82	82	do{
83	83	wait = false;
—	—	@@ -95,8 +95,13 @@
96	96	}
97	97	}
98	98
	99	+ public static void warmupPool(IndexSearcherMul[] pool, IndexId iid, boolean useDelay, Integer useCount) throws IOException {
	100	+ for(IndexSearcherMul is : pool)
	101	+ warmupIndexSearcher(is,iid,useDelay,useCount);
	102	+ }
	103	+
99	104	/** Runs some typical queries on a local index searcher to preload caches, pages into memory, etc .. */
100		~~- public static void warmupIndexSearcher(IndexSearcherMul is, IndexId iid, boolean useDelay) throws IOException {~~
	105	+ public static void warmupIndexSearcher(IndexSearcherMul is, IndexId iid, boolean useDelay, Integer useCount) throws IOException {
101	106	if(iid.isLinks() \|\| iid.isPrecursor())
102	107	return; // no warmaup for these
103	108	try{
—	—	@@ -108,7 +113,7 @@
109	114	if(global == null)
110	115	global = GlobalConfiguration.getInstance();
111	116
112		~~- int count = getWarmupCount(iid);~~
	117	+ int count = useCount == null? getWarmupCount(iid) : useCount;
113	118
114	119	if(iid.isSpell()){
115	120	if(count > 0){
—	—	@@ -199,9 +204,12 @@
200	205	/** Get database of example search terms for language */
201	206	protected static Terms getTermsForLang(String lang) {
202	207	String lib = Configuration.open().getLibraryPath();
203		~~- if("en".equals(lang) \|\| "de".equals(lang) \|\| "es".equals(lang) \|\| "fr".equals(lang) \|\| "it".equals(lang) \|\| "pt".equals(lang))~~
204		~~- return new WordTerms(lib+Configuration.PATH_SEP+"dict"+Configuration.PATH_SEP+"terms-"+lang+".txt.gz");~~
205		~~- else~~
	208	+ if("en".equals(lang) \|\| "de".equals(lang) \|\| "es".equals(lang) \|\| "fr".equals(lang) \|\| "it".equals(lang) \|\| "pt".equals(lang)){
	209	+ if( !langTerms.contains(lang) )
	210	+ langTerms.put(lang,new WordTerms(lib+Configuration.PATH_SEP+"dict"+Configuration.PATH_SEP+"terms-"+lang+".txt.gz"));
	211	+
	212	+ return langTerms.get(lang);
	213	+ } else
206	214	return new SampleTerms();
207	215	}
208	216
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/SearcherCache.java
—	—	@@ -16,6 +16,7 @@
17	17	import org.apache.lucene.search.IndexSearcher;
18	18	import org.apache.lucene.search.Searchable;
19	19	import org.apache.lucene.search.SearchableMul;
	20	+import org.apache.lucene.store.RAMDirectory;
20	21	import org.wikimedia.lsearch.beans.SearchHost;
21	22	import org.wikimedia.lsearch.config.Configuration;
22	23	import org.wikimedia.lsearch.config.GlobalConfiguration;
—	—	@@ -60,16 +61,22 @@
61	62	IndexSearcherMul searchers[];
62	63	IndexId iid;
63	64	int index = 0;
	65	+ static Configuration config = null;
64	66
65	67	SearcherPool(IndexId iid, String path, int poolsize) throws IOException {
66	68	this.iid = iid;
67	69	searchers = new IndexSearcherMul[poolsize];
	70	+ if(config == null)
	71	+ config = Configuration.open();
	72	+ RAMDirectory dir = null;
	73	+ if(config.getBoolean("Search","ramdirectory"))
	74	+ dir = new RAMDirectory(path);
68	75	for(int i=0;i<poolsize;i++){
69		~~- searchers[i] = open(iid, path);~~
	76	+ searchers[i] = open(iid, path, dir);
70	77	}
71	78	}
72	79
73		~~- private IndexSearcherMul open(IndexId iid, String path) throws IOException {~~
	80	+ private IndexSearcherMul open(IndexId iid, String path, RAMDirectory directory) throws IOException {
74	81	IndexSearcherMul searcher = null;
75	82	log.debug("Opening local index for "+iid);
76	83	if(!iid.isMySearch())
—	—	@@ -77,7 +84,10 @@
78	85	if(iid.isLogical())
79	86	throw new IOException(iid+": will not open logical index.");
80	87	try {
81		~~- searcher = new IndexSearcherMul(path);~~
	88	+ if(directory != null)
	89	+ searcher = new IndexSearcherMul(directory);
	90	+ else
	91	+ searcher = new IndexSearcherMul(path);
82	92	searcher.setSimilarity(new WikiSimilarity());
83	93	} catch (IOException e) {
84	94	e.printStackTrace();
—	—	@@ -174,7 +184,7 @@
175	185	* @return
176	186	*/
177	187	public String getRandomHost(IndexId iid){
178		~~- if(iid.isMySearch() && !UpdateThread.isBeingDeployed(iid))~~
	188	+ if(iid.isMySearch() && !UpdateThread.isBeingDeployed(iid) && hasLocalSearcher(iid))
179	189	return "localhost";
180	190	if(!initialized.contains(iid.toString()))
181	191	initializeRemote(iid);
—	—	@@ -296,19 +306,25 @@
297	307	/**
298	308	* Initialize all local searcher pools
299	309	*/
300		~~- protected void initializeLocal(){~~
301		~~- IndexRegistry registry = IndexRegistry.getInstance();~~
302		~~- HashSet<IndexId> mys = GlobalConfiguration.getInstance().getMySearch();~~
303		~~- for(IndexId iid : mys){~~
304		~~- try {~~
305		~~- // when searcher is linked into "search" path it's good, initialize it~~
306		~~- if(!iid.isLogical() && registry.getCurrentSearch(iid) != null){~~
307		~~- log.debug("Initializing local for "+iid);~~
308		~~- IndexSearcherMul[] pool = getLocalSearcherPool(iid);~~
309		~~- RMIServer.bind(iid,pool);~~
	310	+ protected class InitialDeploymentThread extends Thread {
	311	+ public void run(){
	312	+ IndexRegistry registry = IndexRegistry.getInstance();
	313	+ HashSet<IndexId> mys = GlobalConfiguration.getInstance().getMySearch();
	314	+ for(IndexId iid : mys){
	315	+ try {
	316	+ // when searcher is linked into "search" path it's good, initialize it
	317	+ if(!iid.isLogical() && registry.getCurrentSearch(iid) != null){
	318	+ log.debug("Initializing local for "+iid);
	319	+ SearcherPool pool = initLocalPool(iid);
	320	+ Warmup.warmupPool(pool.searchers,iid,false,1);
	321	+ Warmup.waitForAggregate(pool.searchers);
	322	+ localCache.put(iid.toString(),pool);
	323	+
	324	+ RMIServer.bind(iid,pool.searchers);
	325	+ }
	326	+ } catch (IOException e) {
	327	+ log.warn("I/O error warming index for "+iid+" : "+e.getMessage());
310	328	}
311		~~- } catch (IOException e) {~~
312		~~- log.warn("I/O error warming index for "+iid+" : "+e.getMessage());~~
313	329	}
314	330	}
315	331	}
—	—	@@ -332,8 +348,8 @@
333	349	SearcherPool pool = localCache.get(iid.toString());
334	350	if(pool == null){
335	351	// try to init
336		~~- initLocalPool(iid);~~
337		~~- pool = localCache.get(iid.toString());~~
	352	+ pool = initLocalPool(iid);
	353	+ localCache.put(iid.toString(),pool);
338	354	}
339	355
340	356	if(pool == null)
—	—	@@ -343,7 +359,7 @@
344	360	}
345	361
346	362	/** Make local searcher pool */
347		~~- protected void initLocalPool(IndexId iid) throws IOException{~~
	363	+ protected SearcherPool initLocalPool(IndexId iid) throws IOException{
348	364	synchronized(iid.getSearcherCacheLock()){
349	365	// make sure some other thread has not opened the searcher
350	366	if(localCache.get(iid.toString()) == null){
—	—	@@ -351,9 +367,9 @@
352	368	throw new IOException(iid+" is not searched by this host.");
353	369	if(iid.isLogical())
354	370	throw new IOException(iid+": will not open logical index.");
355		~~- SearcherPool pool = new SearcherPool(iid,iid.getCanonicalSearchPath(),searchPoolSize);~~
356		~~- localCache.put(iid.toString(),pool);~~
357		~~- }~~
	371	+ return new SearcherPool(iid,iid.getCanonicalSearchPath(),searchPoolSize);
	372	+ } else
	373	+ return localCache.get(iid.toString());
358	374	}
359	375	}
360	376
—	—	@@ -389,7 +405,7 @@
390	406	protected SearcherCache(boolean initialize){
391	407	searchPoolSize = Configuration.open().getInt("SearcherPool","size",1);
392	408	if(initialize)
393		~~- initializeLocal();~~
	409	+ new InitialDeploymentThread().start();
394	410	}
395	411
396	412	public int getSearchPoolSize() {
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/SearchEngine.java
—	—	@@ -111,12 +111,8 @@
112	112	searchOnly = true;
113	113	NamespaceFilter namespaces = new NamespaceFilter((String)query.get("namespaces"));
114	114	SearchResults res = search(iid, searchterm, offset, limit, iwoffset, iwlimit, namespaces, what.equals("explain"), exactCase, false, searchOnly);
115		~~- if(res!=null && res.isRetry()){~~
116		~~- int retries = 0;~~
117		~~- if(iid.isSplit() \|\| iid.isNssplit()){~~
118		~~- retries = iid.getSplitFactor()-2;~~
119		~~- } else if(iid.isMainsplit())~~
120		~~- retries = 1;~~
	115	+ /*if(res!=null && res.isRetry()){
	116	+ int retries = 1;
121	117
122	118	while(retries > 0 && res.isRetry()){
123	119	res = search(iid, searchterm, offset, limit, iwoffset, iwlimit, namespaces, what.equals("explain"), exactCase, false, searchOnly);
—	—	@@ -124,7 +120,7 @@
125	121	}
126	122	if(res.isRetry())
127	123	res.setErrorMsg("Internal error, too many internal retries.");
128		~~- }~~
	124	+ } */
129	125	return res;
130	126	} else if (what.equals("raw") \|\| what.equals("rawexplain")) {
131	127	int offset = 0, limit = 100; boolean exactCase = false;
—	—	@@ -427,8 +423,6 @@
428	424	TermDocs td1 = reader.termDocs(new Term("key",r));
429	425	if(td1.next()){
430	426	PrefixMatch m = new PrefixMatch(reader.document(td1.doc()).get("article"));
431		~~- if(r.equals(key))~~
432		~~- m.score *= PrefixIndexBuilder.EXACT_BOOST; // exact boost~~
433	427	results.add(m);
434	428
435	429	}
—	—	@@ -912,6 +906,8 @@
913	907
914	908	/** Highlight search results, and set the property in ResultSet */
915	909	protected void highlight(IndexId iid, Query q, ArrayList<String> words, WikiSearcher searcher, Term[] terms, SearchResults res, boolean exactCase, boolean sortByPhrases, boolean alwaysIncludeFirst) throws IOException{
	910	+ if(terms == null)
	911	+ return;
916	912	int[] df = searcher.docFreqs(terms);
917	913	int maxDoc = searcher.maxDoc();
918	914	highlight(iid,q,words,terms,df,maxDoc,res,exactCase,null,sortByPhrases,alwaysIncludeFirst);
—	—	@@ -920,6 +916,8 @@
921	917	/** Highlight search results, and set the property in ResultSet */
922	918	protected void highlight(IndexId iid, Query q, ArrayList<String> words, IndexSearcherMul searcher, SearchResults res, boolean sortByPhrases, boolean alwaysIncludeFirst) throws IOException{
923	919	Term[] terms = getTerms(q,"contents");
	920	+ if(terms == null)
	921	+ return;
924	922	int[] df = searcher.docFreqs(terms);
925	923	int maxDoc = searcher.maxDoc();
926	924	highlight(iid,q,words,terms,df,maxDoc,res,false,null,sortByPhrases,alwaysIncludeFirst);
—	—	@@ -928,6 +926,8 @@
929	927	/** Highlight search results from titles index */
930	928	protected void highlightTitles(IndexId iid, Query q, ArrayList<String> words, IndexSearcherMul searcher, SearchResults res, boolean sortByPhrases, boolean alwaysIncludeFirst) throws IOException{
931	929	Term[] terms = getTerms(q,"alttitle");
	930	+ if(terms == null)
	931	+ return;
932	932	int[] df = searcher.docFreqs(terms);
933	933	int maxDoc = searcher.maxDoc();
934	934	highlight(iid,q,words,terms,df,maxDoc,res,false,searcher.getIndexReader(),sortByPhrases,alwaysIncludeFirst);
—	—	@@ -937,6 +937,8 @@
938	938	/** Highlight search results from titles index using a wikisearcher */
939	939	protected void highlightTitles(IndexId iid, Query q, ArrayList<String> words, WikiSearcher searcher, SearchResults res, boolean sortByPhrases, boolean alwaysIncludeFirst) throws IOException{
940	940	Term[] terms = getTerms(q,"alttitle");
	941	+ if(terms == null)
	942	+ return;
941	943	int[] df = searcher.docFreqs(terms);
942	944	int maxDoc = searcher.maxDoc();
943	945	highlight(iid,q,words,terms,df,maxDoc,res,false,null,sortByPhrases,alwaysIncludeFirst);
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/UpdateThread.java
—	—	@@ -319,7 +319,7 @@
320	320	beingDeployed.add(iid.toString());
321	321	try{
322	322	RMIServer.unbind(iid,cache.getLocalSearcherPool(iid));
323		~~- } catch(IOException e) {~~
	323	+ } catch(Exception e) {
324	324	// we gave it a shot...
325	325	}
326	326	cache.updateLocalSearcherPool(iid,null);
—	—	@@ -330,14 +330,18 @@
331	331	// do some typical queries to preload some lucene caches, pages into memory, etc..
332	332	for(IndexSearcherMul is : pool.searchers){
333	333	try{
334		~~- Warmup.warmupIndexSearcher(is,li.iid,true);~~
	334	+ // do one to trigger caching
	335	+ Warmup.warmupIndexSearcher(is,li.iid,true,1);
	336	+ Warmup.waitForAggregate(pool.searchers);
	337	+ // do proper warmup
	338	+ Warmup.warmupIndexSearcher(is,li.iid,true,null);
335	339	} catch(IOException e){
336	340	e.printStackTrace();
337	341	log.warn("Error warmup up "+li+" : "+e.getMessage());
338	342	}
339	343	}
340		~~- Warmup.waitForAggregate(pool.searchers);~~
341	344
	345	+
342	346	// add to cache
343	347	cache.updateLocalSearcherPool(li.iid,pool);
344	348	if( reroute ){
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/index/IndexThread.java
—	—	@@ -15,6 +15,7 @@
16	16	import java.util.ArrayList;
17	17	import java.util.Collection;
18	18	import java.util.Collections;
	19	+import java.util.Comparator;
19	20	import java.util.Date;
20	21	import java.util.Enumeration;
21	22	import java.util.HashSet;
—	—	@@ -22,6 +23,7 @@
23	24	import java.util.List;
24	25	import java.util.Set;
25	26	import java.util.Map.Entry;
	27	+import java.util.concurrent.locks.Lock;
26	28
27	29	import org.apache.log4j.Logger;
28	30	import org.apache.lucene.analysis.SimpleAnalyzer;
—	—	@@ -41,6 +43,7 @@
42	44	import org.wikimedia.lsearch.ranks.Links;
43	45	import org.wikimedia.lsearch.util.Command;
44	46	import org.wikimedia.lsearch.util.FSUtils;
	47	+import org.wikimedia.lsearch.util.ProgressReport;
45	48	import org.wikimedia.lsearch.util.StringUtils;
46	49
47	50	/**
—	—	@@ -160,7 +163,7 @@
161	164	*
162	165	*/
163	166	protected void makeSnapshots() {
164		~~- HashSet<IndexId> indexes = WikiIndexModifier.getModifiedIndexes();~~
	167	+ ArrayList<IndexId> indexes = new ArrayList<IndexId>();
165	168	IndexRegistry registry = IndexRegistry.getInstance();
166	169
167	170	ArrayList<Pattern> pat = new ArrayList<Pattern>();
—	—	@@ -181,19 +184,35 @@
182	185	if(indexdir.exists())
183	186	indexes.add(iid);
184	187	}
	188	+ // nicely alphabetically sort
	189	+ Collections.sort(indexes, new Comparator<IndexId>() {
	190	+ public int compare(IndexId o1, IndexId o2) {
	191	+ return o1.toString().compareTo(o2.toString());
	192	+ }
	193	+ });
185	194	HashSet<IndexId> badOptimization = new HashSet<IndexId>();
186	195	// optimize all
187	196	for( IndexId iid : indexes ){
	197	+ Lock lock = null;
188	198	try{
189	199	if(iid.isLogical())
190	200	continue;
191		~~- if(matchesPattern(pat,iid))~~
	201	+ if(matchesPattern(pat,iid)){
	202	+ // enforce outer transaction lock to connect optimization & snapshot
	203	+ lock = iid.getTransactionLock(IndexId.Transaction.INDEX);
	204	+ lock.lock();
192	205	optimizeIndex(iid);
193		-
	206	+ makeIndexSnapshot(iid,iid.getIndexPath());
	207	+ lock.unlock();
	208	+ lock = null;
	209	+ }
194	210	} catch(IOException e){
195	211	e.printStackTrace();
196	212	log.error("Error optimizing index "+iid);
197	213	badOptimization.add(iid);
	214	+ } finally {
	215	+ if(lock != null)
	216	+ lock.unlock();
198	217	}
199	218	}
200	219	// snapshot all
—	—	@@ -201,11 +220,10 @@
202	221	if(iid.isLogical() \|\| badOptimization.contains(iid))
203	222	continue;
204	223	if(matchesPattern(pat,iid)){
205		~~- makeIndexSnapshot(iid,iid.getIndexPath());~~
	224	+
206	225	registry.refreshSnapshots(iid);
207	226	}
208	227	}
209		-
210	228	}
211	229
212	230	private boolean matchesPattern(ArrayList<Pattern> pat, IndexId iid) {
—	—	@@ -226,7 +244,7 @@
227	245	String timestamp = df.format(new Date(System.currentTimeMillis()));
228	246	if(iid.isLogical())
229	247	return;
230		-
	248	+ boolean delSnapshots = Configuration.open().getBoolean("Index","delsnapshots") && !iid.isRelated();
231	249	log.info("Making snapshot for "+iid);
232	250	String snapshotdir = iid.getSnapshotPath();
233	251	String snapshot = snapshotdir+sep+timestamp;
—	—	@@ -236,17 +254,22 @@
237	255	if(spd.exists() && spd.isDirectory()){
238	256	File[] files = spd.listFiles();
239	257	for(File f: files){
240		~~- if(!f.getAbsolutePath().equals(li.path)) // leave the last snapshot~~
241		~~- FSUtils.deleteRecursive(f);~~
	258	+ if(f.getAbsolutePath().equals(li.path) && !delSnapshots)
	259	+ continue; // leave last snapshot
	260	+ FSUtils.deleteRecursive(f);
242	261	}
243	262	}
244	263	new File(snapshot).mkdirs();
245		~~- try {~~
246		~~- FSUtils.createHardLinkRecursive(indexPath,snapshot);~~
247		~~- } catch (IOException e) {~~
248		~~- e.printStackTrace();~~
249		~~- log.error("Error making snapshot "+snapshot+": "+e.getMessage());~~
250		~~- return;~~
	264	+ File ind =new File(indexPath);
	265	+ for(File f: ind.listFiles()){
	266	+ // use a cp -lr command for each file in the index
	267	+ try {
	268	+ FSUtils.createHardLinkRecursive(indexPath+sep+f.getName(),snapshot+sep+f.getName(),true);
	269	+ } catch (IOException e) {
	270	+ e.printStackTrace();
	271	+ log.error("Error making snapshot "+snapshot+": "+e.getMessage());
	272	+ return;
	273	+ }
251	274	}
252	275	IndexRegistry.getInstance().refreshSnapshots(iid);
253	276	log.info("Made snapshot "+snapshot);
—	—	@@ -263,21 +286,21 @@
264	287	return;
265	288	if(iid.getBooleanParam("optimize",true)){
266	289	try {
	290	+ Transaction trans = new Transaction(iid,transType);
	291	+ trans.begin();
267	292	IndexReader reader = IndexReader.open(path);
268	293	if(!reader.isOptimized()){
269	294	reader.close();
270	295	log.info("Optimizing "+iid);
271	296	long start = System.currentTimeMillis();
272		~~- Transaction trans = new Transaction(iid,transType);~~
273		~~- trans.begin();~~
274	297	IndexWriter writer = new IndexWriter(path,new SimpleAnalyzer(),false);
275	298	writer.optimize();
276		~~- writer.close();~~
277		~~- trans.commit();~~
	299	+ writer.close();
278	300	long delta = System.currentTimeMillis() - start;
279		~~- log.info("Optimized "+iid+" in "+delta+" ms");~~
	301	+ log.info("Optimized "+iid+" in "+ProgressReport.formatTime(delta));
280	302	} else
281	303	reader.close();
	304	+ trans.commit();
282	305	} catch (IOException e) {
283	306	log.error("Could not optimize index at "+path+" : "+e.getMessage());
284	307	throw e;
—	—	@@ -299,17 +322,26 @@
300	323	HashSet<String> add = new HashSet<String>();
301	324	if(records.length > 0){
302	325	IndexId iid = records[0].getIndexId(); // we asume all are on same iid
303		~~- Links links = Links.openForBatchModifiation(iid);~~
304		~~- // update links~~
305		~~- links.batchUpdate(records);~~
306		~~- WikiIndexModifier.fetchLinksInfo(iid,records,links);~~
307		~~- // get additional~~
308		~~- add.addAll(WikiIndexModifier.fetchAdditional(iid,records,links));~~
309		~~- links.close();~~
310		-
311		~~- for(IndexUpdateRecord r : records){~~
312		~~- enqueue(r);~~
313		~~- }~~
	326	+ // get exclusive lock to make sure nothing funny is going on with the index
	327	+ Lock lock = iid.getLinks().getTransactionLock(IndexId.Transaction.INDEX);
	328	+ lock.lock();
	329	+ try{
	330	+ // FIXME: there should be some kind of failed previous transaction check here
	331	+ // works for now because we first do updates, but could easily break in future
	332	+ Links links = Links.openForBatchModifiation(iid);
	333	+ // update links
	334	+ links.batchUpdate(records);
	335	+ WikiIndexModifier.fetchLinksInfo(iid,records,links);
	336	+ // get additional
	337	+ add.addAll(WikiIndexModifier.fetchAdditional(iid,records,links));
	338	+ links.close();
	339	+
	340	+ for(IndexUpdateRecord r : records){
	341	+ enqueue(r);
	342	+ }
	343	+ } finally{
	344	+ lock.unlock();
	345	+ }
314	346	}
315	347
316	348	return add;
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/index/Transaction.java
—	—	@@ -5,6 +5,7 @@
6	6	import java.io.FileOutputStream;
7	7	import java.io.IOException;
8	8	import java.util.Properties;
	9	+import java.util.concurrent.locks.Lock;
9	10
10	11	import org.apache.log4j.Logger;
11	12	import org.wikimedia.lsearch.config.Configuration;
—	—	@@ -28,10 +29,12 @@
29	30	protected IndexId iid;
30	31	protected boolean inTransaction;
31	32	protected IndexId.Transaction type;
	33	+ protected Lock lock;
32	34
33	35	public Transaction(IndexId iid, IndexId.Transaction type){
34	36	this.iid = iid;
35	37	this.type = type;
	38	+ this.lock = iid.getTransactionLock(type);
36	39	inTransaction = false;
37	40	}
38	41
—	—	@@ -40,6 +43,8 @@
41	44	* if not, will return index to consistent state.
42	45	*/
43	46	public void begin(){
	47	+ // acquire lock, this will serialize transactions on indexes
	48	+ lock.lock();
44	49	File backup = new File(getBackupDir());
45	50	File info = new File(getInfoFile());
46	51	if(backup.exists() && info.exists()){
—	—	@@ -62,7 +67,7 @@
63	68	backup.getParentFile().mkdirs();
64	69	try{
65	70	// make a copy
66		~~- FSUtils.createHardLinkRecursive(iid.getPath(type),backup.getAbsolutePath());~~
	71	+ FSUtils.createHardLinkRecursive(iid.getPath(type),backup.getAbsolutePath(),true);
67	72	Properties prop = new Properties();
68	73	// write out the status file
69	74	prop.setProperty("status","started at "+System.currentTimeMillis());
—	—	@@ -74,6 +79,7 @@
75	80	log.info("Transaction on index "+iid+" started");
76	81	} catch(Exception e){
77	82	log.error("Error while intializing transaction: "+e.getMessage());
	83	+ lock.unlock();
78	84	}
79	85	}
80	86
—	—	@@ -141,19 +147,27 @@
142	148	* Commit changes to index.
143	149	*/
144	150	public void commit(){
145		~~- cleanup();~~
146		~~- inTransaction = false;~~
147		~~- log.info("Successfully commited changes on "+iid);~~
	151	+ try{
	152	+ cleanup();
	153	+ inTransaction = false;
	154	+ log.info("Successfully commited changes on "+iid);
	155	+ } finally{
	156	+ lock.unlock();
	157	+ }
148	158	}
149	159
150	160	/**
151	161	* Rollback changes to index. Returns to previous consistent state.
152	162	*/
153	163	public void rollback(){
154		~~- if(inTransaction){~~
155		~~- recover();~~
156		~~- inTransaction = false;~~
157		~~- log.info("Succesfully rollbacked changes on "+iid);~~
	164	+ try{
	165	+ if(inTransaction){
	166	+ recover();
	167	+ inTransaction = false;
	168	+ log.info("Succesfully rollbacked changes on "+iid);
	169	+ }
	170	+ } finally{
	171	+ lock.unlock();
158	172	}
159	173	}
160	174
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/WikiQueryParser.java
—	—	@@ -1134,7 +1134,7 @@
1135	1135	hterms.removeAll(forbiddenTerms);
1136	1136	highlightTerms = hterms.toArray(new Term[] {});
1137	1137
1138		~~- if(options.coreQueryOnly \|\| words == null)~~
	1138	+ if(options.coreQueryOnly \|\| words == null \|\| (expandedWordsContents.size()==0 && expandedWordsTitle.size()==0))
1139	1139	return bq;
1140	1140
1141	1141	// filter out stop words to SHOULD (this enables queries in form of question)
—	—	@@ -1338,7 +1338,7 @@
1339	1339	defaultAliasBoost = ALIAS_BOOST;
1340	1340
1341	1341
1342		~~- if(qt == qs) // either null, or category query~~
	1342	+ if(qt==qs \|\| qt.equals(qs)) // either null, or category query
1343	1343	return qt;
1344	1344	if(qt == null)
1345	1345	return qs;
—	—	@@ -1797,29 +1797,15 @@
1798	1798
1799	1799	BooleanQuery full = new BooleanQuery(true);
1800	1800	full.add(q,Occur.MUST);
1801		-
1802		~~- /*if(words != null \|\| words.size() > 0){~~
1803		~~- // main relevance~~
1804		~~- Query redirects = makeAlttitleForRedirects(words,20,1);~~
1805		~~- if(redirects != null)~~
1806		~~- full.add(redirects,Occur.SHOULD);~~
1807	1801
1808		~~- // singular words~~
1809		~~- ArrayList<String> singularWords = makeSingularWords(words);~~
1810		~~- if(singularWords != null){~~
1811		~~- Query redirectsSing = makeAlttitleForRedirects(singularWords,20,0.8f);~~
1812		~~- if(redirectsSing != null)~~
1813		~~- full.add(redirectsSing,Occur.SHOULD);~~
1814		~~- }~~
1815		~~- } */~~
	1802	+ if(expandedWordsTitle.size() == 0)
	1803	+ return full;
1816	1804
1817	1805	// fuzzy & wildcards
1818	1806	// NOTE: for these to work parseForTitles needs to called after parse()
1819		~~- //if(hasWildcards() \|\| hasFuzzy()){~~
1820	1807	Query redirectsMulti = makeAlttitleForRedirectsMulti(expandedWordsTitle,expandedBoostTitle,expandedTypes,20,1f);
1821	1808	if(redirectsMulti != null)
1822	1809	full.add(redirectsMulti,Occur.SHOULD);
1823		~~- //}~~
1824	1810
1825	1811	// add another for complete matches
1826	1812	BooleanQuery wrap = new BooleanQuery(true);
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/oai/IncrementalUpdater.java
—	—	@@ -85,10 +85,16 @@
86	86	* @param args
87	87	*/
88	88	public static void main(String[] args){
	89	+ // config
	90	+ Configuration config = Configuration.open();
	91	+ GlobalConfiguration global = GlobalConfiguration.getInstance();
	92	+
89	93	ArrayList<String> dbnames = new ArrayList<String>();
90	94	boolean daemon = false;
91	95	long sleepTime = 30000; // 30s
92	96	String timestamp = null;
	97	+ String excludeFile = null;
	98	+ boolean useLocal = false;
93	99
94	100	String dblist = null;
95	101	boolean notification = true;
—	—	@@ -108,8 +114,12 @@
109	115	defaultTimestamp = args[++i];
110	116	else if(args[i].equals("-f"))
111	117	dblist = args[++i];
	118	+ else if(args[i].equals("-l"))
	119	+ useLocal = true;
112	120	else if(args[i].equals("-e"))
113	121	excludeList.add(args[++i]);
	122	+ else if(args[i].equals("-ef"))
	123	+ excludeFile = args[++i];
114	124	else if(args[i].equals("-n"))
115	125	notification = true;
116	126	else if(args[i].equals("--help"))
—	—	@@ -120,21 +130,10 @@
121	131	} else
122	132	dbnames.add(args[i]);
123	133	}
124		~~- if(dblist != null){~~
125		~~- try {~~
126		~~- BufferedReader file = new BufferedReader(new FileReader(dblist));~~
127		~~- String line;~~
128		~~- while((line = file.readLine()) != null)~~
129		~~- dbnames.add(line.trim());~~
130		~~- file.close();~~
131		~~- } catch (FileNotFoundException e) {~~
132		~~- System.out.println("Error: File "+dblist+" does not exist");~~
133		~~- return;~~
134		~~- } catch (IOException e) {~~
135		~~- System.out.println("Error: I/O error reading dblist file "+dblist);~~
136		~~- return;~~
137		~~- }~~
138		~~- }~~
	134	+ if(useLocal)
	135	+ dbnames.addAll(global.getMyIndexDBnames());
	136	+ dbnames.addAll(readDBList(dblist));
	137	+ excludeList.addAll(readDBList(excludeFile));
139	138	if(dbnames.size() == 0){
140	139	System.out.println("Syntax: java IncrementalUpdater [-d] [-s sleep] [-t timestamp] [-e dbname] [-f dblist] [-n] [--no-ranks] dbname1 dbname2 ...");
141	140	System.out.println("Options:");
—	—	@@ -143,13 +142,13 @@
144	143	System.out.println(" -t - timestamp to start from");
145	144	System.out.println(" -dt - default timestamp (default: "+defaultTimestamp+")");
146	145	System.out.println(" -f - dblist file, one dbname per line");
	146	+ System.out.println(" -l - use all local dbnames");
147	147	System.out.println(" -n - wait for notification of flush after done updating one db (default: "+notification+")");
148	148	System.out.println(" -e - exclude dbname from incremental updates (overrides -f)");
	149	+ System.out.println(" -ef - exclude db names listed in dblist file");
	150	+
149	151	return;
150	152	}
151		~~- // config~~
152		~~- Configuration config = Configuration.open();~~
153		~~- GlobalConfiguration global = GlobalConfiguration.getInstance();~~
154	153	// preload
155	154	UnicodeDecomposer.getInstance();
156	155	for(String dbname: dbnames){
—	—	@@ -279,6 +278,26 @@
280	279	} while(daemon);
281	280	}
282	281
	282	+ private static Collection<String> readDBList(String dblist) {
	283	+ ArrayList<String> dbnames = new ArrayList<String>();
	284	+ if(dblist != null){
	285	+ try {
	286	+ BufferedReader file = new BufferedReader(new FileReader(dblist));
	287	+ String line;
	288	+ while((line = file.readLine()) != null)
	289	+ dbnames.add(line.trim());
	290	+ file.close();
	291	+ } catch (FileNotFoundException e) {
	292	+ System.out.println("Error: File "+dblist+" does not exist");
	293	+ System.exit(1);
	294	+ } catch (IOException e) {
	295	+ System.out.println("Error: I/O error reading dblist file "+dblist);
	296	+ System.exit(1);
	297	+ }
	298	+ }
	299	+ return dbnames;
	300	+ }
	301	+
283	302	private static void printRecords(ArrayList<IndexUpdateRecord> records) {
284	303	for(IndexUpdateRecord rec : records){
285	304	Article ar = rec.getArticle();
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/util/FSUtils.java
—	—	@@ -14,15 +14,18 @@
15	15	public class FSUtils {
16	16	public static final String PATH_SEP = System.getProperty("file.separator");
17	17
18		~~- enum OSType { OS_TYPE_UNIX, OS_TYPE_WINXP };~~
	18	+ enum OSType { OS_TYPE_UNIX, OS_TYPE_WINXP, OS_TYPE_LINUX };
19	19
20	20	protected static String[] hardLinkCommand;
	21	+ protected static String[] hardLinkRecursive = null;
21	22
22	23	static {
23	24	switch(getOSType()) {
24	25	case OS_TYPE_WINXP:
25	26	hardLinkCommand = new String[] {"fsutil","hardlink","create", null, null};
26	27	break;
	28	+ case OS_TYPE_LINUX:
	29	+ hardLinkRecursive = new String[] {"cp", "-lr", null, null};
27	30	case OS_TYPE_UNIX:
28	31	default:
29	32	hardLinkCommand = new String[] {"ln", "-f", null, null};
—	—	@@ -34,6 +37,8 @@
35	38	if (osName.indexOf("Windows") >= 0 &&
36	39	(osName.indexOf("XP") >= 0 \|\| osName.indexOf("2003") >= 0))
37	40	return OSType.OS_TYPE_WINXP;
	41	+ else if(osName.indexOf("Linux")>=0)
	42	+ return OSType.OS_TYPE_LINUX;
38	43	else
39	44	return OSType.OS_TYPE_UNIX;
40	45	}
—	—	@@ -49,12 +54,21 @@
50	55	* @param to
51	56	* @throws IOException
52	57	*/
53		~~- public static synchronized void createHardLink(File from, File to) throws IOException {~~
54		~~- int len = hardLinkCommand.length;~~
55		~~- hardLinkCommand[len-2] = from.getCanonicalPath();~~
56		~~- hardLinkCommand[len-1] = to.getCanonicalPath();~~
57		~~- Command.exec(hardLinkCommand);~~
	58	+ public static void createHardLink(File from, File to) throws IOException {
	59	+ String[] command = hardLinkCommand.clone();
	60	+ int len = command.length;
	61	+ command[len-2] = from.getCanonicalPath();
	62	+ command[len-1] = to.getCanonicalPath();
	63	+ Command.exec(command);
58	64	}
	65	+
	66	+ protected static void createHardLinkRecursive(File from, File to) throws IOException {
	67	+ String[] command = hardLinkRecursive.clone();
	68	+ int len = command.length;
	69	+ command[len-2] = from.getCanonicalPath();
	70	+ command[len-1] = to.getCanonicalPath();
	71	+ Command.exec(command);
	72	+ }
59	73
60	74	/**
61	75	* Create hard links recursively if the target is a directory
—	—	@@ -64,18 +78,36 @@
65	79	* @throws IOException
66	80	*/
67	81	public static void createHardLinkRecursive(String from, String to) throws IOException {
	82	+ createHardLinkRecursive(from,to,false);
	83	+ }
	84	+
	85	+ /**
	86	+ * Creates hard link, with additional option if to use cp -lr since it's default
	87	+ * behavior differs from that of ln -f when the destination is a directory.
	88	+ *
	89	+ * In most non-critical application, the you might want to slowish but predicatable version
	90	+ *
	91	+ * @param fast
	92	+ * @throws IOException
	93	+ */
	94	+ public static void createHardLinkRecursive(String from, String to, boolean fast) throws IOException {
68	95	//System.out.println("Hard-linking "+from+" -> "+to);
69	96	File file = new File(from);
70	97	if(!file.exists())
71	98	throw new IOException("Trying to hardlink nonexisting file "+from);
72	99	// snsure we can make the target
73	100	new File(to).getParentFile().mkdirs();
74		~~- if(file.isDirectory()){~~
75		~~- File[] files = file.listFiles();~~
76		~~- for(File f: files)~~
77		~~- createHardLinkRecursive(format(new String[]{from,f.getName()}),format(new String[] {to,f.getName()}));~~
78		~~- } else~~
79		~~- createHardLink(new File(from),new File(to));~~
	101	+ if(fast && hardLinkRecursive != null){
	102	+ // do a quick cp -lr if it's supported
	103	+ createHardLinkRecursive(new File(from),new File(to));
	104	+ } else{
	105	+ if(file.isDirectory()){
	106	+ File[] files = file.listFiles();
	107	+ for(File f: files)
	108	+ createHardLinkRecursive(format(new String[]{from,f.getName()}),format(new String[] {to,f.getName()}));
	109	+ } else
	110	+ createHardLink(new File(from),new File(to));
	111	+ }
80	112	}
81	113
82	114
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/ranks/LinkReader.java
—	—	@@ -58,7 +58,8 @@
59	59	public void writeEndPage() throws IOException {
60	60	Title t = new Title(page.Title.Namespace,page.Title.Text);
61	61	try{
62		~~- links.addArticleInfo(revision.Text,t,exactCase,Integer.toString(page.Id));~~
	62	+ if( page.Title.Namespace >= 0)
	63	+ links.addArticleInfo(revision.Text,t,exactCase,Integer.toString(page.Id));
63	64	} catch(Exception e){
64	65	log.error("Error adding article "+t+" : "+e.getMessage());
65	66	e.printStackTrace();
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/spell/api/TitleNgramIndexer.java
—	—	@@ -143,5 +143,6 @@
144	144	log.info("Optimizing...");
145	145	indexer.closeAndOptimize();
146	146	indexer.snapshot();
	147	+ links.close();
147	148	}
148	149	}
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/spell/CleanIndexImporter.java
—	—	@@ -105,6 +105,7 @@
106	106
107	107	public void closeIndex() throws IOException {
108	108	writer.closeAndOptimize();
	109	+ links.close();
109	110	}
110	111
111	112
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/related/RelatedBuilder.java
—	—	@@ -46,33 +46,40 @@
47	47	static Logger log = Logger.getLogger(RelatedBuilder.class);
48	48
49	49	public static void main(String[] args) {
50		~~- String dbname = null;~~
	50	+ ArrayList<String> dbnames = new ArrayList<String>();
51	51	System.out.println("MediaWiki lucene-search indexer - build a map of related articles.\n");
52	52
53	53	Configuration.open();
54		~~- GlobalConfiguration.getInstance();~~
	54	+ GlobalConfiguration global = GlobalConfiguration.getInstance();
55	55	if(args.length != 1){
56		~~- System.out.println("Syntax: java RelatedBuilder <dbname>");~~
	56	+ System.out.println("Syntax: java RelatedBuilder [-l] <dbname>");
	57	+ System.out.println("Options:");
	58	+ System.out.println(" -l - rebuild all local wikis");
57	59	return;
58	60	}
59		~~- dbname = args[0];~~
60		~~- IndexId iid = IndexId.get(dbname);~~
61		~~- if(iid == null){~~
62		~~- System.out.println("Invalid dbname "+iid);~~
63		~~- return;~~
	61	+
	62	+ for(int i=0;i<args.length;i++){
	63	+ if(args[i].equals("-l"))
	64	+ dbnames.addAll(global.getMyIndexDBnames());
	65	+ else dbnames.add(args[i]);
64	66	}
65		~~- long start = System.currentTimeMillis();~~
66		~~- try {~~
67		~~- rebuildFromLinks(iid);~~
68		~~- } catch (IOException e) {~~
69		~~- log.fatal("Rebuild I/O error: "+e.getMessage());~~
70		~~- e.printStackTrace();~~
71		~~- return;~~
72		~~- }~~
73		-
74		~~- long end = System.currentTimeMillis();~~
	67	+ Collections.sort(dbnames);
	68	+ for(String dbname : dbnames){
	69	+ IndexId iid = IndexId.get(dbname);
75	70
76		~~- System.out.println("Finished generating related in "+formatTime(end-start));~~
	71	+ long start = System.currentTimeMillis();
	72	+ try {
	73	+ rebuildFromLinks(iid);
	74	+ } catch (IOException e) {
	75	+ log.fatal("Rebuild I/O error: "+e.getMessage());
	76	+ e.printStackTrace();
	77	+ continue;
	78	+ }
	79	+
	80	+ long end = System.currentTimeMillis();
	81	+
	82	+ System.out.println("Finished generating related in "+formatTime(end-start));
	83	+ }
77	84	}
78	85
79	86	/** Calculate from links index */
—	—	@@ -116,6 +123,7 @@
117	124	store.addRelated(key,related);
118	125	}
119	126	store.snapshot();
	127	+ links.close();
120	128	}
121	129
122	130
Index: branches/lucene-search-2.1/build.xml
—	—	@@ -7,10 +7,10 @@
8	8	<property name="dist" location="dist"/>
9	9	<property name="pack.name" value="lucene-search-2.1"/>
10	10	<property name="src.name" value="lucene-search-src-2.1"/>
11		~~- <property name="binary.name" value="ls2-bin"/>~~
	11	+ <property name="binary.name" value="ls2.1-bin"/>
12	12	<property name="jar.name" value="LuceneSearch.jar"/>
13		~~- <property name="include" value="src/ lib/ sql/ test-data/ webinterface/** -example .txt lsearch* build.xml scripts/*"/>~~
14		~~- <property name="include.src" value="src/ sql/ build.xml scripts/* webinterface/*"/>~~
	13	+ <property name="include" value="src/ lib/ sql/ test-data/ webinterface/** -example .txt lsearch* build.xml scripts/* VERSION"/>
	14	+ <property name="include.src" value="src/ sql/ build.xml scripts/* webinterface/* VERSION"/>
15	15
16	16	<property file="${basedir}/hostname"/>
17	17
Index: branches/lucene-search-2.1/lsearch-global.conf
—	—	@@ -34,7 +34,7 @@
35	35	# Mulitple hosts can search multiple dbs (N-N mapping)
36	36	[Search-Group]
37	37	oblak : wikilucene* wikidev* ja* wiki-*
38		~~-#oblak : wikilucene1 wikilucene2 wikilucene*3~~
	38	+oblak : wikilucene1 wikilucene2 wikilucene*3
39	39
40	40	# Index nodes
41	41	# host: db1.part db2.part
—	—	@@ -72,7 +72,7 @@
73	73	# Below are suffixes (or whole names) with various scaling strength
74	74	AgeScaling.strong=wikinews
75	75	AgeScaling.medium=mediawikiwiki metawiki
76		~~-AgeScaling.weak=wiki wikilucene~~
	76	+#AgeScaling.weak=wiki wikilucene
77	77
78	78	# Use additional per-article ranking data, more suitable for non-encyclopedias
79	79	AdditionalRank.suffix=mediawikiwiki metawiki

Status & tagging log

15:27, 12 September 2011 Meno25 (talk | contribs) changed the status of r35955 [removed: ok added: old]