r41855 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r41854‎ | r41855 | r41856 >
Date:21:05, 8 October 2008
Author:rainman
Status:old
Tags:
Comment:
Minor changes:
* add namespace scaling for interwiki hits as well
* fix minor bugs with highlighting
* skip the complicated index deployment, seems to hang under high load
* put stack traces for warn,error levels into log
Modified paths:
  • /branches/lucene-search-2.1/lsearchd (modified) (history)
  • /branches/lucene-search-2.1/src/org/apache/lucene/search/ArticleNamespaceScaling.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/apache/lucene/search/ArticleQueryWrap.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/ContextAnalyzer.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/ExtToken.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/LanguageAnalyzer.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/SerbianFilter.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/StopWords.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/WikiQueryParser.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/WordNet.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/benchmark/WordTerms.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/config/GlobalConfiguration.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/config/IndexRegistry.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/frontend/HTTPIndexDaemon.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/frontend/HTTPIndexServer.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/frontend/HttpHandler.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/frontend/SearchDaemon.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/frontend/SearchServer.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/highlight/Highlight.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/importer/BuildAll.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/importer/SimpleIndexWriter.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/index/IndexThread.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/index/Transaction.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/index/WikiIndexModifier.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/interoperability/RMIMessengerClient.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/interoperability/RMIServer.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/oai/IncrementalUpdater.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/oai/OAIHarvester.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/prefix/PrefixIndexBuilder.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/ranks/LinkReader.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/ranks/Links.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/AggregateInfoImpl.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/AggregateMetaField.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/ArticleMeta.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/FilterWrapper.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/RankField.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/SearchEngine.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/SearcherCache.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/UpdateThread.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/Warmup.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/Wildcards.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/spell/CleanIndexWriter.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/spell/Suggest.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/spell/api/NgramIndexer.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/storage/LinkAnalysisStorage.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/storage/MySQLStorage.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/util/Localization.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/util/PHPParser.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/util/UnicodeDecomposer.java (modified) (history)
  • /branches/lucene-search-2.1/test/org/wikimedia/lsearch/analyzers/WikiQueryParserTest.java (modified) (history)
  • /branches/lucene-search-2.1/test/org/wikimedia/lsearch/index/WikiIndexModifierTest.java (modified) (history)

Diff [purge]

Index: branches/lucene-search-2.1/test/org/wikimedia/lsearch/index/WikiIndexModifierTest.java
@@ -71,6 +71,34 @@
7272 }
7373 }
7474
 75+ public void testMakeHighlightDocuments(){
 76+ IndexId iid = IndexId.get("enwiki");
 77+ String text = "Some very [[simple]] text used for testing\n== Heading 1 ==\nParagraph\n[[Category:Category1]]";
 78+ int references = 100;
 79+ int redirectTargetNamespace = -1;
 80+ ArrayList<Redirect> redirects = new ArrayList<Redirect>();
 81+ redirects.add(new Redirect(0,"Redirect",2));
 82+ ArrayList<RelatedTitle> rel = new ArrayList<RelatedTitle>();
 83+ rel.add(new RelatedTitle(new Title(0,"Related test"),50));
 84+ Hashtable<String,Integer> anchors = new Hashtable<String,Integer>();
 85+ anchors.put("Anchor",20);
 86+ Date date = new Date();
 87+
 88+ Article article = new Article(10,0,"Test page",text,null,
 89+ references,redirectTargetNamespace,0,redirects,rel,anchors,date);
 90+
 91+ analyzer = Analyzers.getHighlightAnalyzer(iid, false);
 92+ try{
 93+ doc = WikiIndexModifier.makeHighlightDocument(article,new FieldBuilder(iid),iid);
 94+ assertEquals("1 [10]",
 95+ tokens("pageid"));
 96+ assertEquals("1 [0:Test page]",
 97+ tokens("key"));
 98+ } catch(IOException e){
 99+ fail();
 100+ }
 101+ }
 102+
75103 public void testMakeTitleDocument(){
76104 IndexId iid = IndexId.get("en-titles");
77105 String text = "Some very simple text used for testing\n== Heading 1 ==\nParagraph\n[[Category:Category1]]";
Index: branches/lucene-search-2.1/test/org/wikimedia/lsearch/analyzers/WikiQueryParserTest.java
@@ -110,6 +110,20 @@
111111
112112 q = parser.parse("douglas -adams guides");
113113 assertEquals("[contents:guides, contents:douglas, contents:guide]", Arrays.toString(parser.getHighlightTerms()));
 114+
 115+ /* ================== PREFIXES ============ */
 116+ q = parser.parseRaw("intitle:tests");
 117+ // FIXME: stemming for titles?
 118+ assertEquals("title:tests title:test^0.5",q.toString());
 119+
 120+ q = parser.parseRaw("intitle:multiple words in title");
 121+ assertEquals("+title:multiple +title:words +title:in +title:title",q.toString());
 122+
 123+ q = parser.parseRaw("intitle:[2]:tests");
 124+ assertEquals("title:tests title:test^0.5",q.toString());
 125+
 126+ q = parser.parseRaw("something (intitle:[2]:tests) out");
 127+ assertEquals("+contents:something +(title:tests title:test^0.5) +contents:out",q.toString());
114128
115129
116130 } catch(Exception e){
Index: branches/lucene-search-2.1/lsearchd
@@ -1,3 +1,3 @@
2 -#!/bin/sh
 2+#!/bin/bash
33 jardir=`dirname $0` # put your jar dir here!
44 java -Djava.rmi.server.codebase=file://$jardir/LuceneSearch.jar -Djava.rmi.server.hostname=$HOSTNAME -jar $jardir/LuceneSearch.jar $*
Index: branches/lucene-search-2.1/src/org/apache/lucene/search/ArticleNamespaceScaling.java
@@ -1,10 +1,11 @@
22 package org.apache.lucene.search;
33
 4+import java.io.Serializable;
45 import java.util.Collections;
56 import java.util.Map;
67 import java.util.Map.Entry;
78
8 -public class ArticleNamespaceScaling {
 9+public class ArticleNamespaceScaling implements Serializable {
910 protected float[] nsBoost = null;
1011 public static float talkPageScale = 0.25f;
1112
Index: branches/lucene-search-2.1/src/org/apache/lucene/search/ArticleQueryWrap.java
@@ -34,7 +34,7 @@
3535 @Override
3636 public float customScore(int doc, float subQueryScore, float boostScore) throws IOException {
3737 float sub = 1;
38 - if(article.isSubpage(doc))
 38+ if(article!=null && article.isSubpage(doc))
3939 sub = SUBPAGE;
4040
4141 float r = 1;
@@ -42,10 +42,14 @@
4343 r = rank.rank(doc);
4444
4545 float ns = 1;
46 - if(nsScaling != null)
 46+ if(nsScaling != null && article != null)
4747 ns = nsScaling.scaleNamespace(article.namespace(doc));
4848
49 - return sub * r * ns * scale.score(subQueryScore,article.daysOld(doc));
 49+ float ageScaled = subQueryScore;
 50+ if(scale !=null)
 51+ ageScaled = scale.score(subQueryScore,article.daysOld(doc));
 52+
 53+ return sub * r * ns * ageScaled;
5054 }
5155
5256 @Override
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/util/Localization.java
@@ -340,7 +340,7 @@
341341 }
342342 }
343343 } catch (Exception e) {
344 - log.warn("Error processing message file at "+MessageFormat.format(loc+"Messages{0}.php",langCode));
 344+ log.warn("Error processing message file at "+MessageFormat.format(loc+"Messages{0}.php",langCode),e);
345345 }
346346 log.warn("Could not load localization for "+langCode);
347347 badLocalizations.add(langCode.toLowerCase());
@@ -449,7 +449,7 @@
450450 log.debug("Read interwiki map from jar file");
451451 r.close();
452452 } catch(Exception e){
453 - log.warn("Cannot read interwiki map from jar file");
 453+ log.warn("Cannot read interwiki map from jar file",e);
454454 }
455455 }
456456
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/util/PHPParser.java
@@ -416,25 +416,33 @@
417417 "NS_CATEGORY_TALK => 'Razgovor_o_kategoriji');";
418418 String text2 = "$fallback='sr-ec';";
419419
420 - String file = "/var/www/html/wiki-lucene/phase3/languages/messages/MessagesEn.php";
421 -
422 - PHPParser p = new PHPParser();
423 - String php = p.readFile(file);
424 - Hashtable<String,Integer> map = p.getNamespaces(php);
425 - System.out.println(map);
426 - System.out.println(p.getFallBack(text2));
427 - System.out.println(p.getRedirectMagic(php));
428 -
429 - System.out.println(p.getLanguages("'wgLanguageCode' => array('default' => '$lang')"));
430 - String initset = p.readURL(new URL("file:///home/rainman/Desktop/InitialiseSettings.php"));
431 - System.out.println(p.getLanguages(initset));
432 - System.out.println(p.getServer(initset));
433 - System.out.println(p.getDefaultSearch(initset));
434 - System.out.println(p.getMetaNamespace(initset));
435 - System.out.println(p.getMetaNamespaceTalk(initset));
436 - System.out.println(p.getExtraNamespaces(initset));
437 - System.out.println(p.getNamespacesWithSubpages(initset));
438 - System.out.println(p.getContentNamespaces(initset));
439 -
 420+ try{
 421+ String file = "/var/www/wiki/phase3/languages/messages/MessagesEn.php";
 422+
 423+ PHPParser p = new PHPParser();
 424+ String php = p.readFile(file);
 425+ Hashtable<String,Integer> map = p.getNamespaces(php);
 426+ System.out.println(map);
 427+ System.out.println(p.getFallBack(text2));
 428+ System.out.println(p.getRedirectMagic(php));
 429+
 430+ System.out.println(p.getLanguages("'wgLanguageCode' => array('default' => '$lang')"));
 431+ } catch(Exception e){
 432+ e.printStackTrace();
 433+ }
 434+ try{
 435+ PHPParser p = new PHPParser();
 436+ String initset = p.readURL(new URL("file:///home/wikipedia/common/php-1.5/InitialiseSettings.php"));
 437+ System.out.println(p.getLanguages(initset));
 438+ System.out.println("wgServer: " + p.getServer(initset));
 439+ System.out.println(p.getDefaultSearch(initset));
 440+ System.out.println(p.getMetaNamespace(initset));
 441+ System.out.println(p.getMetaNamespaceTalk(initset));
 442+ System.out.println(p.getExtraNamespaces(initset));
 443+ System.out.println(p.getNamespacesWithSubpages(initset));
 444+ System.out.println(p.getContentNamespaces(initset));
 445+ } catch(Exception e){
 446+ e.printStackTrace();
 447+ }
440448 }
441449 }
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/util/UnicodeDecomposer.java
@@ -149,10 +149,10 @@
150150 in.close();
151151 } catch (IOException e) {
152152 e.printStackTrace();
153 - log.error("Error reading unicode data file from resource : "+e.getMessage());
 153+ log.error("Error reading unicode data file from resource : "+e.getMessage(),e);
154154 } catch (Exception e){
155155 e.printStackTrace();
156 - log.error("Error in unicode data file : "+e.getMessage());
 156+ log.error("Error in unicode data file : "+e.getMessage(),e);
157157 }
158158 }
159159
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/ranks/LinkReader.java
@@ -61,7 +61,7 @@
6262 if( page.Title.Namespace >= 0)
6363 links.addArticleInfo(revision.Text,t,exactCase,Integer.toString(page.Id));
6464 } catch(Exception e){
65 - log.error("Error adding article "+t+" : "+e.getMessage());
 65+ log.error("Error adding article "+t+" : "+e.getMessage(),e);
6666 e.printStackTrace();
6767 }
6868 }
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/ranks/Links.java
@@ -268,7 +268,7 @@
269269 flush();
270270 } catch(Exception e){
271271 // report but continue
272 - log.warn("Error opening links index "+ iid +": "+e.getMessage());
 272+ log.warn("Error opening links index "+ iid +": "+e.getMessage(),e);
273273 }
274274 // batch add
275275 writer = WikiIndexModifier.openForWrite(iid.getIndexPath(),false,new SimpleAnalyzer());
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/spell/CleanIndexWriter.java
@@ -103,7 +103,7 @@
104104 }
105105 reader.close();
106106 } catch(Exception e){
107 - log.warn("Error opening for batch update read "+iid+" : "+e.getMessage());
 107+ log.warn("Error opening for batch update read "+iid+" : "+e.getMessage(),e);
108108 }
109109 // batch add
110110 openWriter(iid.getIndexPath(),false);
@@ -149,10 +149,10 @@
150150 log.debug(iid+": Adding document "+a);
151151 } catch (IOException e) {
152152 e.printStackTrace();
153 - log.error("I/O Error writing articlet "+a+" to index "+writer);
 153+ log.error("I/O Error writing articlet "+a+" to index "+writer,e);
154154 } catch(Exception e){
155155 e.printStackTrace();
156 - log.error("Error adding document "+a+" with message: "+e.getMessage());
 156+ log.error("Error adding document "+a+" with message: "+e.getMessage(),e);
157157 }
158158 }
159159
@@ -170,7 +170,7 @@
171171 writer.addDocument(doc,analyzer);
172172 } catch (IOException e) {
173173 e.printStackTrace();
174 - log.error("Error adding title info for article "+article+" with message: "+e.getMessage());
 174+ log.error("Error adding title info for article "+article+" with message: "+e.getMessage(),e);
175175 }
176176 }
177177
@@ -181,7 +181,7 @@
182182 writer.optimize();
183183 writer.close();
184184 } catch(IOException e){
185 - log.error("I/O error optimizing/closing index at "+iid.getImportPath()+" : "+e.getMessage());
 185+ log.error("I/O error optimizing/closing index at "+iid.getImportPath()+" : "+e.getMessage(),e);
186186 throw e;
187187 }
188188 }
@@ -209,7 +209,7 @@
210210 try {
211211 writer.addDocument(doc);
212212 } catch (IOException e) {
213 - log.warn("Cannot write metadata : "+e.getMessage());
 213+ log.warn("Cannot write metadata : "+e.getMessage(),e);
214214 }
215215 }
216216
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/spell/api/NgramIndexer.java
@@ -55,7 +55,7 @@
5656 WikiIndexModifier.makeDBPath(path); // ensure all directories are made
5757 writer = new IndexWriter(path,analyzer,newIndex);
5858 } catch (IOException e1) {
59 - log.error("I/O error openning index for addition of documents at "+path+" : "+e.getMessage());
 59+ log.error("I/O error openning index for addition of documents at "+path+" : "+e.getMessage(),e);
6060 throw e1;
6161 }
6262 }
@@ -81,7 +81,7 @@
8282 writer.close();
8383 writer = null;
8484 } catch(IOException e){
85 - log.warn("I/O error closing index at "+path);
 85+ log.warn("I/O error closing index at "+path,e);
8686 throw e;
8787 }
8888 }
@@ -95,7 +95,7 @@
9696 writer.close();
9797 writer = null;
9898 } catch(IOException e){
99 - log.warn("I/O error optimizing/closing index at "+path);
 99+ log.warn("I/O error optimizing/closing index at "+path,e);
100100 throw e;
101101 }
102102 }
@@ -259,7 +259,7 @@
260260 log.debug("Deleting document matching term "+t);
261261 writer.deleteDocuments(t);
262262 } catch (Exception e) {
263 - log.error("Cannot delete document : "+e.getMessage());
 263+ log.error("Cannot delete document : "+e.getMessage(),e);
264264 e.printStackTrace();
265265 }
266266 }
@@ -269,7 +269,7 @@
270270 log.debug("Adding document "+doc);
271271 writer.addDocument(doc);
272272 } catch (Exception e) {
273 - log.error("Cannot add document "+doc+" : "+e.getMessage());
 273+ log.error("Cannot add document "+doc+" : "+e.getMessage(),e);
274274 e.printStackTrace();
275275 }
276276 }
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/spell/Suggest.java
@@ -1301,7 +1301,7 @@
13021302 ret.add(res.get(i));
13031303 return ret;
13041304 } catch (IOException e) {
1305 - log.error("Cannot get suggestions for "+word+" at "+iid+" : "+e.getMessage());
 1305+ log.error("Cannot get suggestions for "+word+" at "+iid+" : "+e.getMessage(),e);
13061306 e.printStackTrace();
13071307 return new ArrayList<SuggestResult>();
13081308 }
@@ -1412,7 +1412,7 @@
14131413 ret.add(res.get(i));
14141414 return ret;
14151415 } catch (IOException e) {
1416 - log.error("Cannot get title suggestions for "+title+" at "+iid+" : "+e.getMessage());
 1416+ log.error("Cannot get title suggestions for "+title+" at "+iid+" : "+e.getMessage(),e);
14171417 e.printStackTrace();
14181418 return new ArrayList<SuggestResult>();
14191419 }
@@ -1516,7 +1516,7 @@
15171517 return res.get(0);
15181518 }
15191519 } catch (IOException e) {
1520 - log.warn("I/O error while suggesting split on "+iid+" : "+e.getMessage());
 1520+ log.warn("I/O error while suggesting split on "+iid+" : "+e.getMessage(),e);
15211521 e.printStackTrace();
15221522 }
15231523 return null;
@@ -1531,7 +1531,7 @@
15321532 if(freqJoin > 0 && freqJoin > freqPhrase)
15331533 return new SuggestResult(word1+word2,freqJoin,1);
15341534 } catch (IOException e) {
1535 - log.warn("I/O error while suggesting join on "+iid+" : "+e.getMessage());
 1535+ log.warn("I/O error while suggesting join on "+iid+" : "+e.getMessage(),e);
15361536 e.printStackTrace();
15371537 }
15381538 return null;
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/frontend/SearchDaemon.java
@@ -375,7 +375,7 @@
376376 try{
377377 sendOutputLine((float)score + " " + namespace + " " + encodeTitle(title));
378378 } catch(Exception e){
379 - log.error("Error sending result line ("+score + " " + namespace + " " + title +"): "+e.getMessage());
 379+ log.error("Error sending result line ("+score + " " + namespace + " " + title +"): "+e.getMessage(),e);
380380 }
381381 }
382382
@@ -383,7 +383,7 @@
384384 try{
385385 sendOutputLine(namespace + " " + encodeTitle(title));
386386 } catch(Exception e){
387 - log.error("Error sending prefix result line (" + namespace + " " + title +"): "+e.getMessage());
 387+ log.error("Error sending prefix result line (" + namespace + " " + title +"): "+e.getMessage(),e);
388388 }
389389 }
390390
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/frontend/HTTPIndexDaemon.java
@@ -87,20 +87,20 @@
8888 }
8989
9090 } catch (SecurityException e) {
91 - log.error("Called method "+methodName+" which is not visible");
 91+ log.error("Called method "+methodName+" which is not visible",e);
9292 sendHeaders(400,"Bad Request");
9393 } catch (NoSuchMethodException e) {
94 - log.error("Called unrecognized method "+methodName+". Uri was: "+uri);
 94+ log.error("Called unrecognized method "+methodName+". Uri was: "+uri,e);
9595 sendHeaders(404,"Not Found");
9696 } catch (IllegalArgumentException e) {
97 - log.error("Called method "+methodName+" with illegel arguments");
 97+ log.error("Called method "+methodName+" with illegel arguments",e);
9898 sendHeaders(400,"Bad Request");
9999 } catch (IllegalAccessException e) {
100 - log.error("Cannot call method "+methodName+", illegal access.");
 100+ log.error("Cannot call method "+methodName+", illegal access.",e);
101101 sendHeaders(400,"Bad Request");
102102 } catch (InvocationTargetException e) {
103103 e.printStackTrace();
104 - log.error("Error while calling method "+methodName+": invocation target exception");
 104+ log.error("Error while calling method "+methodName+": invocation target exception",e);
105105 sendHeaders(400,"Bad Request");
106106 }
107107 }
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/frontend/SearchServer.java
@@ -41,7 +41,7 @@
4242 */
4343 public class SearchServer extends Thread {
4444 private static int port = 8123;
45 - private static int maxThreads = 25;
 45+ private static int maxThreads = 80;
4646 private static ServerSocket sock;
4747 public static String indexPath;
4848 public static String[] dbnames;
@@ -90,34 +90,41 @@
9191
9292 for (;;) {
9393 Socket client = null;
94 - try {
95 - log.debug("Listening...");
96 - client = sock.accept();
97 - } catch (Exception e) {
98 - log.error("accept() error: " + e.getMessage());
99 - // be sure to close all sockets
100 - if(client != null){
101 - try{ client.getInputStream().close(); } catch(Exception e1) {}
102 - try{ client.getOutputStream().close(); } catch(Exception e1) {}
103 - try{ client.close(); } catch(Exception e1) {}
 94+ try{
 95+ try {
 96+ log.debug("Listening...");
 97+ client = sock.accept();
 98+ } catch (Exception e) {
 99+ log.error("accept() error: " + e.getMessage(),e);
 100+ // be sure to close all sockets
 101+ if(client != null){
 102+ try{ client.getInputStream().close(); } catch(Exception e1) {}
 103+ try{ client.getOutputStream().close(); } catch(Exception e1) {}
 104+ try{ client.close(); } catch(Exception e1) {}
 105+ }
 106+ continue;
104107 }
105 - continue;
106 - }
107 -
108 - int threadCount = SearchDaemon.getOpenCount();
109 - if (threadCount > maxThreads) {
110 - stats.add(false, 0, threadCount);
111 - log.error("too many connections, skipping a request");
112 - // be sure to close all sockets
113 - if(client != null){
114 - try{ client.getInputStream().close(); } catch(Exception e1) {}
115 - try{ client.getOutputStream().close(); } catch(Exception e1) {}
116 - try{ client.close(); } catch(Exception e1) {}
 108+
 109+ int threadCount = SearchDaemon.getOpenCount();
 110+ if (threadCount > maxThreads) {
 111+ stats.add(false, 0, threadCount);
 112+ log.error("too many connections, skipping a request");
 113+ // be sure to close all sockets
 114+ if(client != null){
 115+ try{ client.getInputStream().close(); } catch(Exception e1) {}
 116+ try{ client.getOutputStream().close(); } catch(Exception e1) {}
 117+ try{ client.close(); } catch(Exception e1) {}
 118+ }
 119+ continue;
 120+ } else {
 121+ SearchDaemon worker = new SearchDaemon(client);
 122+ pool.execute(worker);
117123 }
118 - continue;
119 - } else {
120 - SearchDaemon worker = new SearchDaemon(client);
121 - pool.execute(worker);
 124+ } catch(Exception e){
 125+ log.error("Search server exception: "+e.getMessage(),e);
 126+ try{ client.getInputStream().close(); } catch(Exception e1) {}
 127+ try{ client.getOutputStream().close(); } catch(Exception e1) {}
 128+ try{ client.close(); } catch(Exception e1) {}
122129 }
123130 }
124131 }
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/frontend/HttpHandler.java
@@ -59,7 +59,7 @@
6060 istrm = new DataInputStream(new BufferedInputStream(s.getInputStream()));
6161 ostrm = new PrintWriter(new BufferedWriter(new OutputStreamWriter(s.getOutputStream(),"utf-8")));
6262 } catch (IOException e) {
63 - log.error("I/O in opening http socket.");
 63+ log.error("I/O in opening http socket.",e);
6464 }
6565 }
6666
@@ -113,7 +113,7 @@
114114 log.debug("No keep-alive, closing connection ... ");
115115 } catch (Exception e) {
116116 e.printStackTrace();
117 - log.error(e.getMessage());
 117+ log.error(e.getMessage(),e);
118118 } finally {
119119 if (!headersSent) {
120120 sendError(500, "Internal server error", "An internal error occurred: no header sent.");
@@ -174,7 +174,7 @@
175175 } catch (URISyntaxException e) {
176176 sendError(400, "Bad Request",
177177 "Couldn't make sense of the given URI.");
178 - log.warn("Bad URI in request: " + rawUri);
 178+ log.warn("Bad URI in request: " + rawUri,e);
179179 return;
180180 }
181181
@@ -261,7 +261,7 @@
262262 //log.error("Internal error, read "+read+" bytes istead of "+contentLength+" from POST request");
263263 return data;
264264 } catch (IOException e) {
265 - log.warn("Could not send raw data in bytes to output stream.");
 265+ log.warn("Could not send raw data in bytes to output stream.",e);
266266 }
267267 return null;
268268 }
@@ -273,7 +273,7 @@
274274 try {
275275 sin = istrm.readLine();
276276 } catch (IOException e) {
277 - log.warn("I/O problem in reading from stream");
 277+ log.warn("I/O problem in reading from stream",e);
278278 }
279279 log.debug("<<<"+ sin);
280280 return sin;
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/frontend/HTTPIndexServer.java
@@ -58,7 +58,7 @@
5959 serviceReady = true;
6060 client = sock.accept();
6161 } catch (Exception e) {
62 - log.error("accept() error: " + e.getMessage());
 62+ log.error("accept() error: " + e.getMessage(),e);
6363 // be sure to close all sockets
6464 if(client != null){
6565 try{ client.getInputStream().close(); } catch(Exception e1) {}
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/interoperability/RMIServer.java
@@ -29,7 +29,7 @@
3030 log.info(name+" bound");
3131 } catch (Exception e) {
3232 e.printStackTrace();
33 - log.warn("Cannot bind "+name+" exception:"+e.getMessage());
 33+ log.warn("Cannot bind "+name+" exception:"+e.getMessage(),e);
3434 }
3535 }
3636
@@ -40,7 +40,7 @@
4141 log.info(name+" bound");
4242 } catch (Exception e) {
4343 e.printStackTrace();
44 - log.warn("Cannot bind "+name+" exception:"+e.getMessage());
 44+ log.warn("Cannot bind "+name+" exception:"+e.getMessage(),e);
4545 }
4646 }
4747
@@ -79,7 +79,7 @@
8080 }
8181 return true;
8282 } catch(IOException e){
83 - log.warn("Error rebinding searchers for "+iid+" : "+e.getMessage());
 83+ log.warn("Error rebinding searchers for "+iid+" : "+e.getMessage(),e);
8484 e.printStackTrace();
8585 }
8686 return false;
@@ -96,7 +96,7 @@
9797 return true;
9898 } catch (RemoteException e) {
9999 e.printStackTrace();
100 - log.warn("Error binding searchable with basename "+name+" : "+e.getMessage());
 100+ log.warn("Error binding searchable with basename "+name+" : "+e.getMessage(),e);
101101 } catch(Exception e){
102102 e.printStackTrace();
103103 }
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/interoperability/RMIMessengerClient.java
@@ -107,7 +107,7 @@
108108 log.debug("Calling remotely indexUpdate("+myhost+","+iid+") on "+host);
109109 r.indexUpdated(myhost,iid.toString());
110110 } catch (Exception e) {
111 - log.warn("Error invoking remote method notifyIndexUpdated() on host "+host+" : "+e.getMessage());
 111+ log.warn("Error invoking remote method notifyIndexUpdated() on host "+host+" : "+e.getMessage(),e);
112112 continue;
113113 }
114114 }
@@ -136,10 +136,10 @@
137137 log.debug("Got new RMI messenger for host "+host);
138138 return r;
139139 } catch (RemoteException e) {
140 - log.warn("Cannot contact RMI registry for host "+host+" : "+e.getMessage());
 140+ log.warn("Cannot contact RMI registry for host "+host+" : "+e.getMessage(),e);
141141 throw e;
142142 } catch (NotBoundException e) {
143 - log.warn("No RMIMessenger instance at host "+host+" : "+e.getMessage());
 143+ log.warn("No RMIMessenger instance at host "+host+" : "+e.getMessage(),e);
144144 throw e;
145145 }
146146 }
@@ -160,7 +160,7 @@
161161 return res;
162162 } catch (Exception e) {
163163 //e.printStackTrace();
164 - log.warn("Error invoking remote method getIndexTimestamp() on host "+host+" : "+e.getMessage());
 164+ log.warn("Error invoking remote method getIndexTimestamp() on host "+host+" : "+e.getMessage(),e);
165165 }
166166 return null;
167167 }
@@ -171,7 +171,7 @@
172172 log.debug("Calling enqueueUpdateRecords("+records.length+" records) on "+host);
173173 r.enqueueUpdateRecords(records);
174174 } catch (Exception e) {
175 - log.warn("Error invoking remote method enqueueUpdateRecords() on host "+host+" : "+e.getMessage());
 175+ log.warn("Error invoking remote method enqueueUpdateRecords() on host "+host+" : "+e.getMessage(),e);
176176 throw e;
177177 }
178178 }
@@ -182,7 +182,7 @@
183183 log.debug("Calling enqueueFrontend("+records.length+" records) on "+host);
184184 return r.enqueueFrontend(records);
185185 } catch (Exception e) {
186 - log.warn("Error invoking remote method enqueueFrontend() on host "+host+" : "+e.getMessage());
 186+ log.warn("Error invoking remote method enqueueFrontend() on host "+host+" : "+e.getMessage(),e);
187187 throw e;
188188 }
189189 }
@@ -198,7 +198,7 @@
199199 recheckRemote(iid,host);
200200 HighlightPack pack = new HighlightPack(new SearchResults());
201201 pack.res.retry();
202 - log.warn("Error invoking remote method searchPart on host "+host+" : "+e.getMessage());
 202+ log.warn("Error invoking remote method searchPart on host "+host+" : "+e.getMessage(),e);
203203 e.printStackTrace();
204204 return pack;
205205 }
@@ -211,7 +211,7 @@
212212 log.debug("Calling requestFlushAndNotify("+dbname+" records) on "+host);
213213 return r.requestFlushAndNotify(dbname);
214214 } catch (Exception e) {
215 - log.warn("Error invoking remote method requestFlushAndNotify on host "+host+" : "+e.getMessage());
 215+ log.warn("Error invoking remote method requestFlushAndNotify on host "+host+" : "+e.getMessage(),e);
216216 return false;
217217 }
218218 }
@@ -222,7 +222,7 @@
223223 log.debug("Calling isSuccessfulFlush("+dbname+" records) on "+host);
224224 return r.isSuccessfulFlush(dbname);
225225 } catch (Exception e) {
226 - log.warn("Error invoking remote method isSuccessfulFlush on host "+host+" : "+e.getMessage());
 226+ log.warn("Error invoking remote method isSuccessfulFlush on host "+host+" : "+e.getMessage(),e);
227227 throw new IOException("Remote error");
228228 }
229229 }
@@ -235,7 +235,7 @@
236236 log.debug(" \\-> got: "+size);
237237 return size;
238238 } catch (Exception e) {
239 - log.warn("Error invoking remote method getIndexerQueueSize on host "+host+" : "+e.getMessage());
 239+ log.warn("Error invoking remote method getIndexerQueueSize on host "+host+" : "+e.getMessage(),e);
240240 return -1;
241241 }
242242 }
@@ -268,14 +268,14 @@
269269 return r.searchTitles(dbrole,searchterm,words,query,filter,offset,limit,explain,sortByPhrases);
270270 } catch(Exception e){
271271 if(host == null){
272 - log.warn("Cannot find title host for "+dbrole);
 272+ log.warn("Cannot find title host for "+dbrole,e);
273273 return new SearchResults();
274274 }
275275 e.printStackTrace();
276276 recheckRemote(dbrole,host);
277277 SearchResults res = new SearchResults();
278278 res.setErrorMsg("Error searching titles: "+e.getMessage());
279 - log.warn("Error invoking remote method searchTitles on host "+host+" : "+e.getMessage());
 279+ log.warn("Error invoking remote method searchTitles on host "+host+" : "+e.getMessage(),e);
280280 return res;
281281 }
282282 }
@@ -286,12 +286,12 @@
287287 return r.suggest(dbrole,searchterm,tokens,info,nsf);
288288 } catch(Exception e){
289289 if(host == null){
290 - log.warn("Cannot find spell-check host for "+dbrole);
 290+ log.warn("Cannot find spell-check host for "+dbrole,e);
291291 return null;
292292 }
293293 e.printStackTrace();
294294 recheckRemote(dbrole,host);
295 - log.warn("Error invoking suggest() on "+host+" : "+e.getMessage());
 295+ log.warn("Error invoking suggest() on "+host+" : "+e.getMessage(),e);
296296 return null;
297297 }
298298 }
@@ -302,7 +302,7 @@
303303 } catch(Exception e){
304304 recheckRemote(dbrole,host);
305305 e.printStackTrace();
306 - log.warn("Error invoking getFuzzy() on "+host+" : "+e.getMessage());
 306+ log.warn("Error invoking getFuzzy() on "+host+" : "+e.getMessage(),e);
307307 return new ArrayList<SuggestResult>();
308308 }
309309 }
@@ -314,7 +314,7 @@
315315 return r.searchRelated(dbrole,searchterm,offset,limit);
316316 } catch(Exception e){
317317 e.printStackTrace();
318 - log.warn("Error invoking searchRelated() on "+host+" : "+e.getMessage());
 318+ log.warn("Error invoking searchRelated() on "+host+" : "+e.getMessage(),e);
319319 recheckRemote(dbrole,host);
320320 SearchResults res = new SearchResults();
321321 res.setErrorMsg("Error searching related index: "+e.getMessage());
@@ -357,7 +357,7 @@
358358 } catch (Exception e) {
359359 recheckRemote(dbrole,host);
360360 e.printStackTrace();
361 - log.error("Messenger not bound: "+e.getMessage());
 361+ log.error("Messenger not bound: "+e.getMessage(),e);
362362 return new ArrayList<String>();
363363 }
364364 }
@@ -369,7 +369,7 @@
370370 return r.getSearcherPoolStatus(dbrole);
371371 } catch(NotBoundException e){
372372 e.printStackTrace();
373 - log.error("Messenger not bound: "+e.getMessage());
 373+ log.error("Messenger not bound: "+e.getMessage(),e);
374374 return new SearcherPoolStatus(false);
375375 }
376376 }
@@ -381,7 +381,7 @@
382382 r.requestSnapshotAndNotify(optimize,pattern,forPrecursor);
383383 } catch(NotBoundException e){
384384 e.printStackTrace();
385 - log.error("Messenger not bound: "+e.getMessage());
 385+ log.error("Messenger not bound: "+e.getMessage(),e);
386386 }
387387 }
388388
@@ -392,7 +392,7 @@
393393 return r.snapshotFinished(optimize,pattern,forPrecursor);
394394 } catch(NotBoundException e){
395395 e.printStackTrace();
396 - log.error("Messenger not bound: "+e.getMessage());
 396+ log.error("Messenger not bound: "+e.getMessage(),e);
397397 }
398398 return false;
399399 }
@@ -404,7 +404,7 @@
405405 r.addLocalizationCustomMapping(namespaceIndexToName, dbname);
406406 } catch(NotBoundException e){
407407 e.printStackTrace();
408 - log.error("Messenger not bound: "+e.getMessage());
 408+ log.error("Messenger not bound: "+e.getMessage(),e);
409409 }
410410 }
411411 }
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/benchmark/WordTerms.java
@@ -50,7 +50,7 @@
5151 try {
5252 words = loadWordFreq(path);
5353 } catch (IOException e) {
54 - log.error("Cannot open dictionary of search terms in "+path);
 54+ log.error("Cannot open dictionary of search terms in "+path,e);
5555 e.printStackTrace();
5656 }
5757 }
@@ -59,7 +59,7 @@
6060 try {
6161 words = loadWordFreq(stream);
6262 } catch (IOException e) {
63 - log.error("Cannot open dictionary of search terms from stream");
 63+ log.error("Cannot open dictionary of search terms from stream",e);
6464 e.printStackTrace();
6565 }
6666 }
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/prefix/PrefixIndexBuilder.java
@@ -327,7 +327,7 @@
328328 }
329329 reader.close();
330330 } catch(Exception e){
331 - log.warn("Error while opening prefix precursor "+pre+" : "+e.getMessage());
 331+ log.warn("Error while opening prefix precursor "+pre+" : "+e.getMessage(),e);
332332 }
333333 // batch add
334334 writer = WikiIndexModifier.openForWrite(pre.getIndexPath(),false,new PrefixAnalyzer());
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/importer/SimpleIndexWriter.java
@@ -90,7 +90,7 @@
9191 log.info("Making new index at path "+path);
9292 writer = new IndexWriter(path,null,true);
9393 } catch (IOException e1) {
94 - log.error("I/O error openning index for addition of documents at "+path+" : "+e.getMessage());
 94+ log.error("I/O error openning index for addition of documents at "+path+" : "+e1.getMessage(),e1);
9595 return null;
9696 }
9797 }
@@ -149,10 +149,10 @@
150150 writer.addDocument(doc,indexAnalyzer);
151151 log.debug(target+": Adding document "+a);
152152 } catch (IOException e) {
153 - log.error("I/O Error writing article "+a+" to index "+target.getImportPath()+" : "+e.getMessage());
 153+ log.error("I/O Error writing article "+a+" to index "+target.getImportPath()+" : "+e.getMessage(),e);
154154 } catch(Exception e){
155155 e.printStackTrace();
156 - log.error("Error adding document "+a+" with message: "+e.getMessage());
 156+ log.error("Error adding document "+a+" with message: "+e.getMessage(),e);
157157 }
158158 }
159159
@@ -169,7 +169,7 @@
170170 addDocument(writer,doc,a,target);
171171 } catch (IOException e) {
172172 e.printStackTrace();
173 - log.error("Error adding highlight document for key="+a.getTitleObject().getKey()+" : "+e.getMessage());
 173+ log.error("Error adding highlight document for key="+a.getTitleObject().getKey()+" : "+e.getMessage(),e);
174174 }
175175 }
176176 /** Add to title to the titles index */
@@ -185,7 +185,7 @@
186186 addDocument(writer,doc,a,target);
187187 } catch (IOException e) {
188188 e.printStackTrace();
189 - log.error("Error adding title document for key="+a.getTitleObject().getKey()+" : "+e.getMessage());
 189+ log.error("Error adding title document for key="+a.getTitleObject().getKey()+" : "+e.getMessage(),e);
190190 }
191191 }
192192
@@ -205,7 +205,7 @@
206206 writer.optimize();
207207 writer.close();
208208 } catch(IOException e){
209 - log.warn("I/O error optimizing/closing index at "+iid.getImportPath());
 209+ log.warn("I/O error optimizing/closing index at "+iid.getImportPath(),e);
210210 throw e;
211211 }
212212 }
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/importer/BuildAll.java
@@ -132,7 +132,7 @@
133133 }
134134 } catch(IOException e){
135135 e.printStackTrace();
136 - log.error("Error during rebuild of "+iid+" : "+e.getMessage());
 136+ log.error("Error during rebuild of "+iid+" : "+e.getMessage(),e);
137137 }
138138 }
139139 // link titles
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/storage/LinkAnalysisStorage.java
@@ -169,7 +169,7 @@
170170 return getAnalitics(d.get("key"),d);
171171 } catch(IOException e){
172172 //TODO: Java is not letting us throw exception here
173 - log.error("I/O exception in LinkAnalysisIterator:next() : "+e.getMessage());
 173+ log.error("I/O exception in LinkAnalysisIterator:next() : "+e.getMessage(),e);
174174 return null;
175175 }
176176 }
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/storage/MySQLStorage.java
@@ -61,7 +61,7 @@
6262 try {
6363 Class.forName("com.mysql.jdbc.Driver");
6464 } catch (ClassNotFoundException e) {
65 - log.error("Cannot load mysql jdbc driver, class not found: "+e.getMessage());
 65+ log.error("Cannot load mysql jdbc driver, class not found: "+e.getMessage(),e);
6666 }
6767
6868 lib = config.getString("Storage","lib","./sql");
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/config/IndexRegistry.java
@@ -145,7 +145,7 @@
146146 }
147147 }
148148 } catch (IOException e) {
149 - log.warn("Cannot follow symlink for file "+iid.getSearchPath());
 149+ log.warn("Cannot follow symlink for file "+iid.getSearchPath(),e);
150150 }
151151
152152 }
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/config/GlobalConfiguration.java
@@ -1463,7 +1463,7 @@
14641464 if(repo == null && wgServer != null){
14651465 String key = findSuffix(wgServer.keySet(),dbname);
14661466 if(key == null)
1467 - key = "<default>";
 1467+ key = "default";
14681468 repo = wgServer.get(key);
14691469 if(repo != null){
14701470 if(!repo.endsWith("/"))
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/UpdateThread.java
@@ -39,7 +39,7 @@
4040 *
4141 */
4242 public class UpdateThread extends Thread {
43 -
 43+ public static long MAX_DEPLOYMENT_DELAY = 15*60; // 15 minutes
4444 enum RebuildType { STANDALONE, FULL };
4545
4646 /** iids currently being deployed and out of rotation */
@@ -79,7 +79,7 @@
8080 pending.remove(li.iid.toString());
8181 } catch(Exception e){
8282 e.printStackTrace();
83 - log.error("Error syncing "+li+" : "+e.getMessage());
 83+ log.error("Error syncing "+li+" : "+e.getMessage(),e);
8484 }
8585 }
8686 }
@@ -263,7 +263,7 @@
264264
265265 } catch(IOException ioe){
266266 ioe.printStackTrace();
267 - log.error("I/O error updating index "+iid+" at "+li.path+" : "+ioe.getMessage());
 267+ log.error("I/O error updating index "+iid+" at "+li.path+" : "+ioe.getMessage(),ioe);
268268 badIndexes.put(li.iid.toString(),li.timestamp);
269269 }
270270 }
@@ -276,6 +276,7 @@
277277 HashSet<String> group = iid.getSearchHosts();
278278 int succ = 0, fail = 0;
279279 boolean reroute = false;
 280+ long waitedSoFar = 0;
280281 if(type == RebuildType.FULL){
281282 // never deploy more than one searcher of iid in a search group
282283 // wait for other peers to finish deploying before proceeding
@@ -292,37 +293,39 @@
293294 fail ++;
294295 } catch(RemoteException e){
295296 e.printStackTrace();
296 - log.warn("Error response from "+host+" : "+e.getMessage());
 297+ log.warn("Error response from "+host+" : "+e.getMessage(),e);
297298 }
298299 }
299300 }
300301 if(fail == 0 && succ >= 1){
301302 wait = false; // proceed to deployment
302303 reroute = true;
303 - } else if(fail == 0 && succ == 0){
 304+ } else if(succ == 0){
304305 wait = false; // we're the only one alive, just deploy..
305306 } else
306307 wait = true;
307308 }
308309 if(wait){ // wait random time (5 -> 15 seconds)
309310 try {
310 - Thread.sleep((long)(10000 * (Math.random()+0.5)));
 311+ long interval = (long)(10000 * (Math.random()+0.5));
 312+ waitedSoFar += interval/1000;
 313+ Thread.sleep(interval);
311314 } catch (InterruptedException e) {
312315 e.printStackTrace();
313316 }
314317 }
315 - } while(wait);
 318+ } while(wait && waitedSoFar < MAX_DEPLOYMENT_DELAY);
316319
317320 // reoute queries to other servers
318321 if( reroute ){
319322 log.info("Deploying "+iid);
320323 beingDeployed.add(iid.toString());
321324 try{
322 - RMIServer.unbind(iid,cache.getLocalSearcherPool(iid));
 325+ //RMIServer.unbind(iid,cache.getLocalSearcherPool(iid));
323326 } catch(Exception e) {
324327 // we gave it a shot...
325328 }
326 - cache.updateLocalSearcherPool(iid,null);
 329+ //cache.updateLocalSearcherPool(iid,null);
327330 }
328331
329332 }
@@ -337,7 +340,7 @@
338341 Warmup.warmupIndexSearcher(is,li.iid,true,null);
339342 } catch(IOException e){
340343 e.printStackTrace();
341 - log.warn("Error warmup up "+li+" : "+e.getMessage());
 344+ log.warn("Error warmup up "+li+" : "+e.getMessage(),e);
342345 }
343346 }
344347
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/Wildcards.java
@@ -103,7 +103,7 @@
104104 terms.addAll(client.getTerms(e.getValue(),e.getKey(),wildcard,exactCase));
105105 } catch (RemoteException e1) {
106106 e1.printStackTrace();
107 - log.warn("Cannot get terms for "+wildcard+" on host "+e.getValue()+" for "+e.getKey());
 107+ log.warn("Cannot get terms for "+wildcard+" on host "+e.getValue()+" for "+e.getKey(),e1);
108108 }
109109 }
110110 wildcardCache.put(wildcard,terms);
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/AggregateInfoImpl.java
@@ -41,7 +41,7 @@
4242 try{
4343 return src.getLength(docid,getSlot(pos));
4444 } catch(ArrayIndexOutOfBoundsException e){
45 - log.warn("Exception occured on pos="+pos);
 45+ log.warn("Exception occured on pos="+pos,e);
4646 throw e;
4747 }
4848 }
@@ -50,7 +50,7 @@
5151 try{
5252 return src.getBoost(docid,getSlot(pos));
5353 } catch(ArrayIndexOutOfBoundsException e){
54 - log.warn("Exception occured on pos="+pos);
 54+ log.warn("Exception occured on pos="+pos,e);
5555 throw e;
5656 }
5757 }
@@ -59,7 +59,7 @@
6060 try{
6161 return src.getLengthNoStopWords(docid,getSlot(pos));
6262 } catch(ArrayIndexOutOfBoundsException e){
63 - log.warn("Exception occured on pos="+pos);
 63+ log.warn("Exception occured on pos="+pos,e);
6464 throw e;
6565 }
6666 }
@@ -68,7 +68,7 @@
6969 try{
7070 return src.getLengthComplete(docid,getSlot(pos));
7171 } catch(ArrayIndexOutOfBoundsException e){
72 - log.warn("Exception occured on pos="+pos);
 72+ log.warn("Exception occured on pos="+pos,e);
7373 throw e;
7474 }
7575 }
@@ -92,7 +92,7 @@
9393 try{
9494 return src.getFlags(docid,getSlot(pos));
9595 } catch(ArrayIndexOutOfBoundsException e){
96 - log.warn("Exception occured on pos="+pos);
 96+ log.warn("Exception occured on pos="+pos,e);
9797 throw e;
9898 }
9999 }
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/RankField.java
@@ -50,7 +50,7 @@
5151 try{
5252 ranks[i] = Integer.parseInt(reader.document(i).get("rank"));
5353 } catch(NumberFormatException e){
54 - log.error("Error for docid = "+i);
 54+ log.error("Error for docid = "+i,e);
5555 e.printStackTrace();
5656 }
5757 }
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/AggregateMetaField.java
@@ -149,7 +149,7 @@
150150 count++;
151151 }
152152 } catch(Exception e){
153 - log.error("Exception during processing stored_field="+field+" on docid="+i+", with stored="+stored+" : "+e.getMessage());
 153+ log.error("Exception during processing stored_field="+field+" on docid="+i+", with stored="+stored+" : "+e.getMessage(),e);
154154 e.printStackTrace();
155155 throw new IOException(e.getMessage());
156156 }
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/Warmup.java
@@ -195,10 +195,10 @@
196196 }
197197 } catch (IOException e) {
198198 e.printStackTrace();
199 - log.error("Error warming up local IndexSearcherMul for "+iid);
 199+ log.error("Error warming up local IndexSearcherMul for "+iid,e);
200200 } catch (Exception e) {
201201 e.printStackTrace();
202 - log.error("Exception during warmup of "+iid+" : "+e.getMessage());
 202+ log.error("Exception during warmup of "+iid+" : "+e.getMessage(),e);
203203 }
204204 }
205205
@@ -223,7 +223,7 @@
224224 is.search(new TermQuery(new Term("contents","wikipedia")),
225225 new FilterWrapper(filter));
226226 } catch (IOException e) {
227 - log.warn("I/O error while preloading filter for "+iid+" for filter "+filter+" : "+e.getMessage());
 227+ log.warn("I/O error while preloading filter for "+iid+" for filter "+filter+" : "+e.getMessage(),e);
228228 }
229229 }
230230 }
@@ -236,7 +236,7 @@
237237 Query q = parser.parse("wikimedia foundation");
238238 is.search(q,new FilterWrapper(new NamespaceFilter("0")));
239239 } catch (IOException e) {
240 - log.error("Error warming up local IndexSearcherMul for "+iid);
 240+ log.error("Error warming up local IndexSearcherMul for "+iid,e);
241241 }
242242 }
243243
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/SearcherCache.java
@@ -56,7 +56,7 @@
5757 s.close();
5858 } catch (IOException e) {
5959 e.printStackTrace();
60 - log.warn("I/O error closing searchables "+s+" : "+e.getMessage());
 60+ log.warn("I/O error closing searchables "+s+" : "+e.getMessage(),e);
6161 }
6262 }
6363 }
@@ -82,6 +82,7 @@
8383 }
8484
8585 private IndexSearcherMul open(IndexId iid, String path, RAMDirectory directory) throws IOException {
 86+ initialWarmup.add(iid.toString());
8687 IndexSearcherMul searcher = null;
8788 log.debug("Opening local index for "+iid);
8889 if(!iid.isMySearch())
@@ -96,7 +97,7 @@
9798 searcher.setSimilarity(new WikiSimilarity());
9899
99100 // preload meta caches
100 - if(iid.isArticleIndex()){
 101+ if(iid.isArticleIndex() || iid.isTitlesBySuffix()){
101102 IndexReader reader = searcher.getIndexReader();
102103 ArrayList<CacheBuilder> builders = new ArrayList<CacheBuilder>();
103104 Collection fields = reader.getFieldNames(FieldOption.ALL);
@@ -131,7 +132,7 @@
132133 e.printStackTrace();
133134 // tell registry this is not a good index
134135 IndexRegistry.getInstance().invalidateCurrent(iid);
135 - log.error("I/O Error opening index at path "+iid.getCanonicalSearchPath()+" : "+e.getMessage());
 136+ log.error("I/O Error opening index at path "+iid.getCanonicalSearchPath()+" : "+e.getMessage(),e);
136137 throw e;
137138 }
138139 return searcher;
@@ -203,7 +204,9 @@
204205 protected Set<SearchHost> deadPools = Collections.synchronizedSet(new HashSet<SearchHost>());
205206
206207 protected static SearcherCache instance = null;
207 -
 208+
 209+ /** deployment has been tried at least once for these */
 210+ protected static Set<String> initialWarmup = Collections.synchronizedSet(new HashSet<String>());
208211 /**
209212 * If there is a cached local searcher of iid
210213 *
@@ -260,8 +263,8 @@
261264 public IndexSearcherMul getLocalSearcher(IndexId iid) throws IOException{
262265 if(iid == null)
263266 throw new RuntimeException("No such index");
264 - if(UpdateThread.isBeingDeployed(iid))
265 - throw new IOException(iid+" is being deployed");
 267+ if(!initialWarmup.contains(iid.toString()))
 268+ throw new RuntimeException(iid+" is being deployed");
266269 return fromLocalCache(iid.toString());
267270 }
268271
@@ -329,13 +332,13 @@
330333 }
331334 } catch(RemoteException e){
332335 e.printStackTrace();
333 - log.warn("Cannot get searcher status for "+iid+" on "+host+" : "+e.getMessage());
 336+ log.warn("Cannot get searcher status for "+iid+" on "+host+" : "+e.getMessage(),e);
334337 } catch (IOException e) {
335338 e.printStackTrace();
336 - log.warn("I/O error trying to construct remote searcher pool for "+iid+" on "+host+" : "+e.getMessage());
 339+ log.warn("I/O error trying to construct remote searcher pool for "+iid+" on "+host+" : "+e.getMessage(),e);
337340 } catch (NotBoundException e) {
338341 e.printStackTrace();
339 - log.warn("Remote searcher for "+iid+" on "+host+" not bound : "+e.getMessage());
 342+ log.warn("Remote searcher for "+iid+" on "+host+" not bound : "+e.getMessage(),e);
340343 }
341344 // if we reach this point something went wrong
342345 deadPools.add(new SearchHost(iid,host));
@@ -368,7 +371,7 @@
369372 RMIServer.bind(iid,pool.searchers);
370373 }
371374 } catch (IOException e) {
372 - log.warn("I/O error warming index for "+iid+" : "+e.getMessage());
 375+ log.warn("I/O error warming index for "+iid+" : "+e.getMessage(),e);
373376 }
374377 }
375378 }
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/FilterWrapper.java
@@ -79,5 +79,13 @@
8080 public boolean hasAnyFilters(){
8181 return hasNamespaceFilter() || hasCustomFilters();
8282 }
 83+
 84+ /** If filter is not empty, get this filter, otherwise just get null */
 85+ public Filter getFilterOrNull(){
 86+ if(hasAnyFilters())
 87+ return this;
 88+ else
 89+ return null;
 90+ }
8391
8492 }
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/ArticleMeta.java
@@ -94,7 +94,7 @@
9595 String ext = "";
9696 if(doc != null)
9797 ext = ", ns="+doc.get("namespace")+", title="+doc.get("title");
98 - log.error("Exception during caching of article info for docid="+i+ext);
 98+ log.error("Exception during caching of article info for docid="+i+ext,e);
9999 e.printStackTrace();
100100 throw new IOException(e.getMessage());
101101 }
@@ -156,7 +156,7 @@
157157 return diff;
158158 } catch (ParseException e) {
159159 e.printStackTrace();
160 - log.error("Error parsing date "+dateStr+" : "+e.getMessage());
 160+ log.error("Error parsing date "+dateStr+" : "+e.getMessage(),e);
161161 }
162162 return 0;
163163 }
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/SearchEngine.java
@@ -225,7 +225,7 @@
226226 } catch(IOException e){
227227 e.printStackTrace();
228228 res.setErrorMsg("I/O processing the request : "+e.getMessage());
229 - log.error("I/O error in searchSimilar() : "+e.getMessage());
 229+ log.error("I/O error in searchSimilar() : "+e.getMessage(),e);
230230 }
231231 return res;
232232 }
@@ -314,7 +314,7 @@
315315 HashSet<String> stopWords = StopWords.getPredefinedSet(iid);
316316 WikiQueryParser parser = new WikiQueryParser(bs.getFields().contents(),nsDefault,analyzer,bs,NamespacePolicy.IGNORE,stopWords);
317317 Query q = parser.parse(key.substring(key.indexOf(':')+1),new WikiQueryParser.ParsingOptions(true));
318 - highlight(iid,q,parser.getWordsClean(),searcher,res,true,true);
 318+ highlight(iid,q,parser.getWordsClean(),searcher,res,true,true,null);
319319 } else{
320320 res.addInfo("related",global.getLocalhost());
321321 res.setSuccess(true);
@@ -355,7 +355,7 @@
356356 return messenger.searchPrefix(host,pre.toString(),searchterm,limit,nsf);
357357 } catch(IOException e){
358358 e.printStackTrace();
359 - log.error("Error opening searcher in prefixSearch on "+pre+" : "+e.getMessage());
 359+ log.error("Error opening searcher in prefixSearch on "+pre+" : "+e.getMessage(),e);
360360 SearchResults res = new SearchResults();
361361 res.setErrorMsg("I/O error on index "+pre);
362362 return res;
@@ -489,7 +489,7 @@
490490 sendStats(start-System.currentTimeMillis());
491491 } catch (IOException e) {
492492 e.printStackTrace();
493 - log.error("Internal error in prefixSearch on "+pre+" : "+e.getMessage());
 493+ log.error("Internal error in prefixSearch on "+pre+" : "+e.getMessage(),e);
494494 res.setErrorMsg("I/O error on index "+pre);
495495 }
496496 return res;
@@ -522,13 +522,13 @@
523523 // search
524524 SearchResults res = makeTitlesSearchResults(searcher,hits,offset,limit,iid,searchterm,q,searchStart,explain);
525525 // highlight
526 - highlightTitles(iid,q,words,searcher,res,sortByPhrases,false);
 526+ highlightTitles(iid,q,words,searcher,res,sortByPhrases,false,null);
527527 return res;
528528 } catch (IOException e) {
529529 e.printStackTrace();
530530 SearchResults res = new SearchResults();
531531 res.setErrorMsg("Internal error in SearchEngine: "+e.getMessage());
532 - log.error("I/O error in searchTitles(): "+e.getMessage());
 532+ log.error("I/O error in searchTitles(): "+e.getMessage(),e);
533533 return res;
534534 }
535535 }
@@ -542,14 +542,14 @@
543543 IndexSearcherMul searcher;
544544 long searchStart = System.currentTimeMillis();
545545 searcher = cache.getLocalSearcher(iid);
546 - FilterWrapper localfilter = filter;
 546+ /*FilterWrapper localfilter = filter;
547547 if(iid.isMainsplit() && iid.isMainPart())
548548 localfilter.setNamespaceFilter(null);
549549 else if(iid.isNssplit() && !iid.isLogical() && iid.getNamespaceSet().size()==1 && !iid.getNamespaceSet().contains("<default>"))
550550 localfilter.setNamespaceFilter(null);
551551 if(localfilter.getNamespaceFilter() != null)
552 - log.info("Using namespace filter: "+localfilter);
553 - TopDocs hits = searcher.search(q,localfilter,offset+limit);
 552+ log.info("Using namespace filter: "+localfilter); */
 553+ TopDocs hits = searcher.search(q,filter.getFilterOrNull(),offset+limit);
554554 SearchResults res = makeSearchResults(searcher,hits,offset,limit,iid,searchterm,q,searchStart,explain);
555555 HighlightPack pack = new HighlightPack(res);
556556 // pack extra info needed for highlighting
@@ -561,7 +561,7 @@
562562 e.printStackTrace();
563563 HighlightPack pack = new HighlightPack(new SearchResults());
564564 pack.res.setErrorMsg("Internal error in SearchEngine: "+e.getMessage());
565 - log.error("Internal error in SearchEngine while trying to search main part: "+e.getMessage());
 565+ log.error("Internal error in SearchEngine while trying to search main part: "+e.getMessage(),e);
566566 return pack;
567567 }
568568
@@ -600,6 +600,8 @@
601601 // use default filter if it's cached or composable of cached entries
602602 } else if(cachedFilters.containsValue(nsDefault) || NamespaceCache.isComposable(nsDefault))
603603 nsfw.setNamespaceFilter(nsDefault);
 604+ } else{
 605+ nsfw.setNamespaceFilter(nsDefault);
604606 }
605607
606608 parser.extractPrefixFilter(searchterm);
@@ -647,7 +649,7 @@
648650 res = pack.res;
649651 res.addInfo("search",formatHost(host));
650652 if(!searchOnly){
651 - highlight(iid,q,parser.getWordsClean(),pack.terms,pack.dfs,pack.maxDoc,res,exactCase,null,parser.hasPhrases(),false);
 653+ highlight(iid,q,parser.getWordsClean(),pack.terms,pack.dfs,pack.maxDoc,res,exactCase,null,parser.hasPhrases(),false,commonsWiki);
652654 fetchTitles(res,searchterm,nsfw,iid,parser,offset,iwoffset,iwlimit,explain);
653655 suggest(iid,searchterm,parser,res,offset,nsfw);
654656 }
@@ -676,11 +678,11 @@
677679 Wildcards wildcards = new Wildcards(searcher.getAllHosts(),exactCase);
678680 q = parseQuery(searchterm,parser,iid,raw,nsfw,searchAll,wildcards);
679681
680 - hits = searcher.search(q,nsfw,offset+limit);
 682+ hits = searcher.search(q,nsfw.getFilterOrNull(),offset+limit);
681683 res = makeSearchResults(searcher,hits,offset,limit,iid,searchterm,q,searchStart,explain);
682684 res.addInfo("search",formatHosts(searcher.getAllHosts().values()));
683685 if(!searchOnly){
684 - highlight(iid,q,parser.getWordsClean(),searcher,parser.getHighlightTerms(),res,exactCase,parser.hasPhrases(),false);
 686+ highlight(iid,q,parser.getWordsClean(),searcher,parser.getHighlightTerms(),res,exactCase,parser.hasPhrases(),false,commonsWiki);
685687 fetchTitles(res,searchterm,nsfw,iid,parser,offset,iwoffset,iwlimit,explain);
686688 suggest(iid,searchterm,parser,res,offset,nsfw);
687689 }
@@ -695,19 +697,19 @@
696698 e.printStackTrace();
697699 res = new SearchResults();
698700 res.retry();
699 - log.warn("Retry, temportal error for query: ["+q+"] on "+iid+" : "+e.getMessage());
 701+ log.warn("Retry, temportal error for query: ["+q+"] on "+iid+" : "+e.getMessage(),e);
700702 return res;
701703 }
702704 } catch(ParseException e){
703705 res = new SearchResults();
704706 res.setErrorMsg("Error parsing query: "+searchterm);
705 - log.error("Cannot parse query: "+searchterm+", error: "+e.getMessage());
 707+ log.error("Cannot parse query: "+searchterm+", error: "+e.getMessage(),e);
706708 return res;
707709 } catch (Exception e) {
708710 res = new SearchResults();
709711 e.printStackTrace();
710712 res.setErrorMsg("Internal error in SearchEngine: "+e.getMessage());
711 - log.error("Internal error in SearchEngine trying to make WikiSearcher: "+e.getMessage());
 713+ log.error("Internal error in SearchEngine trying to make WikiSearcher: "+e.getMessage(),e);
712714 return res;
713715 }
714716 }
@@ -791,6 +793,8 @@
792794 return;
793795 if(offset != 0)
794796 return; // do titles search only for first page of normal-search results
 797+ if(parser.hasPrefixFilter())
 798+ return; // TODO: implement, currently we don't do interwiki prefix queries
795799 try{
796800 IndexId titles = iid.getTitlesIndex();
797801 IndexId main = titles.getDB();
@@ -842,7 +846,7 @@
843847
844848 TopDocs hits = searcher.search(q,wrap,iwoffset+iwlimit);
845849 SearchResults r = makeTitlesSearchResults(searcher,hits,iwoffset,iwlimit,main,searchterm,q,searchStart,explain);
846 - highlightTitles(main,q,words,searcher,r,parser.hasWildcards(),false);
 850+ highlightTitles(main,q,words,searcher,r,parser.hasWildcards(),false,null);
847851
848852 if(r.isSuccess()){
849853 res.setTitles(r.getResults());
@@ -855,7 +859,7 @@
856860
857861 } catch(Exception e){
858862 e.printStackTrace();
859 - log.error("Error fetching grouped titles: "+e.getMessage());
 863+ log.error("Error fetching grouped titles: "+e.getMessage(),e);
860864 }
861865 }
862866
@@ -949,48 +953,50 @@
950954 }
951955
952956 /** Highlight search results, and set the property in ResultSet */
953 - protected void highlight(IndexId iid, Query q, ArrayList<String> words, WikiSearcher searcher, Term[] terms, SearchResults res, boolean exactCase, boolean sortByPhrases, boolean alwaysIncludeFirst) throws IOException{
 957+ protected void highlight(IndexId iid, Query q, ArrayList<String> words, WikiSearcher searcher, Term[] terms, SearchResults res, boolean exactCase, boolean sortByPhrases, boolean alwaysIncludeFirst, IndexId commonsWiki) throws IOException{
954958 if(terms == null)
955959 return;
956960 int[] df = searcher.docFreqs(terms);
957961 int maxDoc = searcher.maxDoc();
958 - highlight(iid,q,words,terms,df,maxDoc,res,exactCase,null,sortByPhrases,alwaysIncludeFirst);
 962+ highlight(iid,q,words,terms,df,maxDoc,res,exactCase,null,sortByPhrases,alwaysIncludeFirst,commonsWiki);
959963 }
960964
961965 /** Highlight search results, and set the property in ResultSet */
962 - protected void highlight(IndexId iid, Query q, ArrayList<String> words, IndexSearcherMul searcher, SearchResults res, boolean sortByPhrases, boolean alwaysIncludeFirst) throws IOException{
 966+ protected void highlight(IndexId iid, Query q, ArrayList<String> words, IndexSearcherMul searcher, SearchResults res, boolean sortByPhrases, boolean alwaysIncludeFirst, IndexId commonsWiki) throws IOException{
963967 Term[] terms = getTerms(q,"contents");
964968 if(terms == null)
965969 return;
966970 int[] df = searcher.docFreqs(terms);
967971 int maxDoc = searcher.maxDoc();
968 - highlight(iid,q,words,terms,df,maxDoc,res,false,null,sortByPhrases,alwaysIncludeFirst);
 972+ highlight(iid,q,words,terms,df,maxDoc,res,false,null,sortByPhrases,alwaysIncludeFirst,commonsWiki);
969973 }
970974
971975 /** Highlight search results from titles index */
972 - protected void highlightTitles(IndexId iid, Query q, ArrayList<String> words, IndexSearcherMul searcher, SearchResults res, boolean sortByPhrases, boolean alwaysIncludeFirst) throws IOException{
 976+ protected void highlightTitles(IndexId iid, Query q, ArrayList<String> words, IndexSearcherMul searcher, SearchResults res, boolean sortByPhrases, boolean alwaysIncludeFirst, IndexId commonsWiki) throws IOException{
973977 Term[] terms = getTerms(q,"alttitle");
974978 if(terms == null)
975979 return;
976980 int[] df = searcher.docFreqs(terms);
977981 int maxDoc = searcher.maxDoc();
978 - highlight(iid,q,words,terms,df,maxDoc,res,false,searcher.getIndexReader(),sortByPhrases,alwaysIncludeFirst);
 982+ highlight(iid,q,words,terms,df,maxDoc,res,false,searcher.getIndexReader(),sortByPhrases,alwaysIncludeFirst,commonsWiki);
979983 resolveInterwikiNamespaces(res,iid);
980984 }
981985
982986 /** Highlight search results from titles index using a wikisearcher */
983 - protected void highlightTitles(IndexId iid, Query q, ArrayList<String> words, WikiSearcher searcher, SearchResults res, boolean sortByPhrases, boolean alwaysIncludeFirst) throws IOException{
 987+ protected void highlightTitles(IndexId iid, Query q, ArrayList<String> words, WikiSearcher searcher, SearchResults res, boolean sortByPhrases, boolean alwaysIncludeFirst, IndexId commonsWiki) throws IOException{
984988 Term[] terms = getTerms(q,"alttitle");
985989 if(terms == null)
986990 return;
987991 int[] df = searcher.docFreqs(terms);
988992 int maxDoc = searcher.maxDoc();
989 - highlight(iid,q,words,terms,df,maxDoc,res,false,null,sortByPhrases,alwaysIncludeFirst);
 993+ highlight(iid,q,words,terms,df,maxDoc,res,false,null,sortByPhrases,alwaysIncludeFirst,commonsWiki);
990994 resolveInterwikiNamespaces(res,iid);
991995 }
992996
993997 /** Highlight article (don't call directly, use one of the interfaces above instead) */
994 - protected void highlight(IndexId iid, Query q, ArrayList<String> words, Term[] terms, int[] df, int maxDoc, SearchResults res, boolean exactCase, IndexReader reader, boolean sortByPhrases, boolean alwaysIncludeFirst) throws IOException{
 998+ protected void highlight(IndexId iid, Query q, ArrayList<String> words, Term[] terms, int[] df,
 999+ int maxDoc, SearchResults res, boolean exactCase, IndexReader reader,
 1000+ boolean sortByPhrases, boolean alwaysIncludeFirst, IndexId commonsWiki) throws IOException{
9951001 // iid -> array of keys
9961002 HashMap<IndexId,ArrayList<String>> map = new HashMap<IndexId,ArrayList<String>>();
9971003 iid = iid.getHighlight();
@@ -999,12 +1005,17 @@
10001006 for(ResultSet r : res.getResults()){
10011007 IndexId piid = iid.getPartByNamespace(r.namespace);
10021008 ArrayList<String> hits = map.get(piid);
1003 - if(hits == null){
1004 - hits = new ArrayList<String>();
1005 - map.put(piid,hits);
 1009+ if(hits == null)
 1010+ map.put(piid,hits = new ArrayList<String>());
 1011+ hits.add(r.getKey());
 1012+ keys.put(r.getKey(),r);
 1013+ // check for commons wiki images
 1014+ if(commonsWiki!=null && r.namespace.equals("6")){
 1015+ hits = map.get(commonsWiki);
 1016+ if(hits == null)
 1017+ map.put(commonsWiki,hits=new ArrayList<String>());
 1018+ hits.add(r.getKey());
10061019 }
1007 - hits.add(r.getKey());
1008 - keys.put(r.getKey(),r);
10091020 }
10101021 // highlight!
10111022 HashSet<String> stopWords = StopWords.getPredefinedSet(iid);
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/highlight/Highlight.java
@@ -143,7 +143,7 @@
144144 try{
145145 ret = getTokens(reader,key,allTerms,fields);
146146 } catch(Exception e){
147 - log.error("Error geting tokens: "+e.getMessage());
 147+ log.error("Error geting tokens: "+e.getMessage(),e);
148148 e.printStackTrace();
149149 }
150150 if(ret == null)
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/index/Transaction.java
@@ -78,7 +78,7 @@
7979 inTransaction = true;
8080 log.info("Transaction on index "+iid+" started");
8181 } catch(Exception e){
82 - log.error("Error while intializing transaction: "+e.getMessage());
 82+ log.error("Error while intializing transaction: "+e.getMessage(),e);
8383 lock.unlock();
8484 }
8585 }
@@ -94,7 +94,7 @@
9595 if(info.exists())
9696 FSUtils.deleteRecursive(info.getAbsoluteFile());
9797 } catch(Exception e){
98 - log.error("Error removing old transaction data from "+iid.getTransactionPath(type)+" : "+e.getMessage());
 98+ log.error("Error removing old transaction data from "+iid.getTransactionPath(type)+" : "+e.getMessage(),e);
9999 }
100100
101101 }
@@ -139,7 +139,7 @@
140140 FSUtils.createHardLinkRecursive(backup.getAbsolutePath(),path);
141141 FSUtils.deleteRecursive(backup.getAbsoluteFile()); // cleanup
142142 } catch(Exception e){
143 - log.error("Recovery of index "+iid+" failed with error "+e.getMessage());
 143+ log.error("Recovery of index "+iid+" failed with error "+e.getMessage(),e);
144144 }
145145 }
146146
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/index/WikiIndexModifier.java
@@ -161,7 +161,7 @@
162162 }
163163 reader.close();
164164 } catch (IOException e) {
165 - log.warn("I/O Error: could not open/read "+iid.getIndexPath()+" while deleting document.");
 165+ log.warn("I/O Error: could not open/read "+iid.getIndexPath()+" while deleting document.",e);
166166 return false;
167167 }
168168 return true;
@@ -212,11 +212,11 @@
213213
214214 log.debug(iid+": Adding document "+rec.getArticle().toStringFull());
215215 } catch (IOException e) {
216 - log.error("Error writing document "+rec+" to index "+path);
 216+ log.error("Error writing document "+rec+" to index "+path,e);
217217 succ = false; // report unsucc, but still continue, to process all cards
218218 } catch(Exception e){
219219 e.printStackTrace();
220 - log.error("Error adding document "+rec.getIndexKey()+" with message: "+e.getMessage());
 220+ log.error("Error adding document "+rec.getIndexKey()+" with message: "+e.getMessage(),e);
221221 succ = false; // report unsucc, but still continue, to process all cards
222222 }
223223 }
@@ -224,7 +224,7 @@
225225 try {
226226 writer.close();
227227 } catch (IOException e) {
228 - log.error("Error closing index "+path);
 228+ log.error("Error closing index "+path,e);
229229 return false;
230230 }
231231 return succ;
@@ -258,7 +258,7 @@
259259 throw e;
260260 } catch (IOException e1) {
261261 e1.printStackTrace();
262 - log.error("I/O error openning index at "+path+" : "+e.getMessage());
 262+ log.error("I/O error openning index at "+path+" : "+e.getMessage(),e);
263263 throw e1;
264264 }
265265 }
@@ -356,7 +356,7 @@
357357 log.info("Unlocked index at "+path);
358358 }
359359 } catch(IOException e){
360 - log.warn("I/O error unlock index at "+path+" : "+e.getMessage());
 360+ log.warn("I/O error unlock index at "+path+" : "+e.getMessage(),e);
361361 }
362362 }
363363
@@ -401,7 +401,7 @@
402402 && updateTitles(iid,updateRecords);
403403 } catch(Exception e){
404404 e.printStackTrace();
405 - log.error("Error updating "+iid+" : "+e.getMessage());
 405+ log.error("Error updating "+iid+" : "+e.getMessage(),e);
406406 return false;
407407 }
408408 }
@@ -514,7 +514,7 @@
515515 return true;
516516 } catch(IOException e){
517517 e.printStackTrace();
518 - log.error("Cannot fetch links info: "+e.getMessage());
 518+ log.error("Cannot fetch links info: "+e.getMessage(),e);
519519 throw e;
520520 }
521521 }
@@ -550,7 +550,7 @@
551551 } catch(IOException e){
552552 trans.rollback();
553553 e.printStackTrace();
554 - log.error("Cannot update links index: "+e.getMessage());
 554+ log.error("Cannot update links index: "+e.getMessage(),e);
555555 return false;
556556 }
557557 }
@@ -565,7 +565,7 @@
566566 return true;
567567 } catch(IOException e){
568568 e.printStackTrace();
569 - log.error("Cannot update prefix index: "+e.getMessage());
 569+ log.error("Cannot update prefix index: "+e.getMessage(),e);
570570 return false;
571571 }
572572 }
@@ -579,7 +579,7 @@
580580 return true;
581581 } catch(IOException e){
582582 e.printStackTrace();
583 - log.error("Cannot update spellcheck index: "+e.getMessage());
 583+ log.error("Cannot update spellcheck index: "+e.getMessage(),e);
584584 return false;
585585 }
586586 }
@@ -593,7 +593,7 @@
594594 return true;
595595 } catch(IOException e){
596596 e.printStackTrace();
597 - log.error("Cannot update spellcheck index: "+e.getMessage());
 597+ log.error("Cannot update spellcheck index: "+e.getMessage(),e);
598598 return false;
599599 }
600600 }
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/index/IndexThread.java
@@ -198,7 +198,7 @@
199199 try {
200200 Thread.sleep(1000);
201201 } catch (InterruptedException e) {
202 - log.warn("IndexThread sleep interrupted with message: "+e.getMessage());
 202+ log.warn("IndexThread sleep interrupted with message: "+e.getMessage(),e);
203203 }
204204 }
205205 if(queuedUpdatesExist())
@@ -258,7 +258,7 @@
259259 }
260260 } catch(IOException e){
261261 e.printStackTrace();
262 - log.error("Error optimizing index "+iid);
 262+ log.error("Error optimizing index "+iid,e);
263263 badOptimization.add(iid);
264264 } finally {
265265 if(lock != null)
@@ -322,7 +322,7 @@
323323 FSUtils.createHardLinkRecursive(indexPath+sep+f.getName(),snapshot+sep+f.getName(),true);
324324 } catch (IOException e) {
325325 e.printStackTrace();
326 - log.error("Error making snapshot "+snapshot+": "+e.getMessage());
 326+ log.error("Error making snapshot "+snapshot+": "+e.getMessage(),e);
327327 return;
328328 }
329329 }
@@ -357,7 +357,7 @@
358358 reader.close();
359359 trans.commit();
360360 } catch (IOException e) {
361 - log.error("Could not optimize index at "+path+" : "+e.getMessage());
 361+ log.error("Could not optimize index at "+path+" : "+e.getMessage(),e);
362362 throw e;
363363 }
364364 }
@@ -643,7 +643,7 @@
644644 lastFlush = System.currentTimeMillis();
645645 } catch (Exception e) {
646646 e.printStackTrace();
647 - log.error("Unexpected error in Index thread while applying updates: "+e.getMessage());
 647+ log.error("Unexpected error in Index thread while applying updates: "+e.getMessage(),e);
648648 return;
649649 }
650650 }
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/SerbianFilter.java
@@ -160,18 +160,19 @@
161161 String cv;
162162 boolean diff = false;
163163 aliasDiff = false;
164 - for(char c : text.toCharArray()){
165 - cv = conv[c];
166 - if(cv == null){
167 - buffer[length++] = c;
168 - } else{
169 - for(char ch : cv.toCharArray()){
170 - buffer[length++] = ch;
171 - diff = true;
172 - if( c != 'đ' && c != 'Đ')
173 - aliasDiff = true;
174 - }
175 - }
 164+ for(int i=0;i<text.length() && i<buffer.length;i++){
 165+ char c = text.charAt(i);
 166+ cv = conv[c];
 167+ if(cv == null){
 168+ buffer[length++] = c;
 169+ } else{
 170+ for(char ch : cv.toCharArray()){
 171+ buffer[length++] = ch;
 172+ diff = true;
 173+ if( c != 'đ' && c != 'Đ')
 174+ aliasDiff = true;
 175+ }
 176+ }
176177 }
177178 if(diff)
178179 return new String(buffer,0,length);
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/ExtToken.java
@@ -427,9 +427,11 @@
428428 tt.unstub();
429429 }
430430 tt.setPositionIncrement(0);
 431+ /* FIXME: this should happen, but if it does... oh well...
 432+ if(t.type != Type.TEXT)
 433+ raiseException(serialized,cur,t,"Bad serialized data: trying to assign alias to nontext token");
 434+ */
431435 tokens.add(tt);
432 - if(t.type != Type.TEXT)
433 - raiseException(serialized,cur,t,"Bad serialized data: trying to assign alias to nontext token");
434436 cur += len;
435437 break; }
436438 case 3: // change pos
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/WordNet.java
@@ -166,7 +166,7 @@
167167 log.info("Loaded WordNet synonyms in "+(System.currentTimeMillis()-start)+" ms");
168168 } catch(Exception e){
169169 e.printStackTrace();
170 - log.warn("Cannot load WordNet synonym file : "+e.getMessage());
 170+ log.warn("Cannot load WordNet synonym file : "+e.getMessage(),e);
171171 state = State.FAILED;
172172 }
173173 }
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/LanguageAnalyzer.java
@@ -83,7 +83,7 @@
8484
8585 return filtered;
8686 } catch (Exception e){
87 - log.error("Error applying custom filter for "+filters.getLanguage());
 87+ log.error("Error applying custom filter for "+filters.getLanguage(),e);
8888 }
8989 }
9090 return tokens;
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/ContextAnalyzer.java
@@ -45,7 +45,7 @@
4646 if(contexts != null)
4747 part.addAll(contexts);
4848 } catch (IOException e) {
49 - log.warn("Cannot fetch context for "+key+" from "+t.getKey()+" : "+e.getMessage());
 49+ log.warn("Cannot fetch context for "+key+" from "+t.getKey()+" : "+e.getMessage(),e);
5050 e.printStackTrace();
5151 }
5252
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/StopWords.java
@@ -42,7 +42,7 @@
4343 try{
4444 return HighFreqTerms.getHighFreqTerms(iid.getDB(),"contents",50).toArray(new String[] {});
4545 } catch(Exception e){
46 - log.warn("Failed to fetch stop words for "+iid);
 46+ log.warn("Failed to fetch stop words for "+iid,e);
4747 return new String[] {};
4848 }
4949 }
@@ -85,7 +85,7 @@
8686 try {
8787 ret.addAll(getCached(iid));
8888 } catch (IOException e) {
89 - log.warn("Cannot get cached stop words for "+iid);
 89+ log.warn("Cannot get cached stop words for "+iid,e);
9090 }
9191 return ret;
9292 }
@@ -130,7 +130,7 @@
131131 log.info("Successfully loaded stop words for: "+cachePredefined.keySet()+" in "+(System.currentTimeMillis()-start)+" ms");
132132 } catch(IOException e){
133133 e.printStackTrace();
134 - log.error("Cannot load stop words definitions: "+e.getMessage());
 134+ log.error("Cannot load stop words definitions: "+e.getMessage(),e);
135135 }
136136 loadedPredefined = true;
137137 }
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/WikiQueryParser.java
@@ -143,6 +143,8 @@
144144 protected Wildcards wildcards = null;
145145 protected Fuzzy fuzzy = null;
146146 protected IndexId iid;
 147+ protected boolean isInTitle = false;
 148+ protected int isInTitleLevel = 0;
147149
148150 protected Pattern urlPattern = Pattern.compile("(\\w+:{0,1}\\w*@)?(\\S+)(:[0-9]+)?(\\/|\\/([\\w#!:.?+=&%@!\\-\\/]))?");
149151
@@ -682,7 +684,7 @@
683685 /** analyzer buffer into tokens using default analyzer */
684686 private void analyzeBuffer(){
685687 String analysisField = defaultField;
686 - if(defaultField.equals("contents") && "intitle".equals(currentField))
 688+ if(defaultField.equals("contents") && isInTitle)
687689 analysisField = "title";
688690 tokenStream = analyzer.tokenStream(analysisField,
689691 new String(buffer,0,length));
@@ -729,7 +731,7 @@
730732 private Term makeTerm(String t){
731733 if(currentField == null)
732734 return new Term(defaultField,builder.isExactCase()? t : t.toLowerCase());
733 - else if(defaultField.equals("contents") && "intitle".equals(currentField))
 735+ else if(defaultField.equals("contents") && isInTitle)
734736 return new Term("title",builder.isExactCase()? t : t.toLowerCase());
735737 else if(!"incategory".equals(currentField) &&
736738 (namespacePolicy == NamespacePolicy.IGNORE ||
@@ -866,7 +868,7 @@
867869 continue;
868870
869871 // terms, fields
870 - if(Character.isLetterOrDigit(c) || c=='.' || c == '[' || c=='*' || c=='?'){
 872+ if(Character.isLetterOrDigit(c) || c=='.' || c == '[' || c=='*'){
871873 // check for generic namespace prefixes, e.g. [0,1]:
872874 if(c == '['){
873875 if(fetchGenericPrefix())
@@ -890,6 +892,10 @@
891893 if(currentField == null || definedExplicitField){
892894 // set field name
893895 currentField = new String(buffer,0,length);
 896+ if("intitle".equals(currentField)){
 897+ isInTitle = true;
 898+ isInTitleLevel = level;
 899+ }
894900 if((defaultNamespaceName!=null && currentField.equals(defaultNamespaceName)) || currentField.equals(defaultField)){
895901 currentField = null;
896902 break; // repeated definition of field, ignore
@@ -990,6 +996,9 @@
991997 break;
992998 case ')':
993999 if(level > 0){
 1000+ // get out of titles on appropriate level of parenthesis
 1001+ if(isInTitle && level <= isInTitleLevel)
 1002+ isInTitle = false;
9941003 break mainloop;
9951004 }
9961005 continue;
@@ -1033,7 +1042,7 @@
10341043 boolean wild = false;
10351044 int index = -1;
10361045 for(int i=0;i<length;i++){
1037 - if(buffer[i] == '*' || buffer[i] == '?'){
 1046+ if(buffer[i] == '*'){
10381047 wild = true;
10391048 index = i;
10401049 break;
@@ -1041,10 +1050,10 @@
10421051 }
10431052 // check if it's a valid wildcard
10441053 if(wild){
1045 - if((buffer[0] == '*' || buffer[0] == '?') && (buffer[length-1]=='*' || buffer[length-1]=='?'))
 1054+ if((buffer[0] == '*') && (buffer[length-1]=='*'))
10461055 return false; // don't support patterns like *a*
1047 - if(index == length-1 && buffer[index]=='?')
1048 - return false; // probably just an ordinary question mark
 1056+ //if(index == length-1 && buffer[index]=='?')
 1057+ // return false; // probably just an ordinary question mark
10491058 for(int i=0;i<length;i++){
10501059 if(Character.isLetterOrDigit(buffer[i]))
10511060 return true; // +card :P
@@ -1215,7 +1224,8 @@
12161225 if(prefixFilter.startsWith("[") && prefixFilter.contains("]:")){
12171226 // convert from [2]:query to 2:query form
12181227 prefixFilter = prefixFilter.replace("[","").replace("]:",":");
1219 - }
 1228+ } else // default to main namespace
 1229+ prefixFilter = "0:"+prefixFilter;
12201230 // return the actual query without prefix
12211231 return queryText.substring(0,inx);
12221232 }
@@ -1254,6 +1264,7 @@
12551265 explicitOccur = null;
12561266 parsedWords = new ParsedWords();
12571267 urls = new ArrayList<ArrayList<Term>>();
 1268+ isInTitle = false;
12581269 }
12591270
12601271 /** Init parsing, call this function to parse text */
@@ -1941,7 +1952,8 @@
19421953 if(redirectsMulti != null)
19431954 full.add(redirectsMulti,Occur.SHOULD);
19441955
1945 - return full;
 1956+ ArticleNamespaceScaling nsScale = iid.getNamespaceScaling();
 1957+ return new ArticleQueryWrap(full,new ArticleInfoImpl(),null,null,nsScale);
19461958
19471959 }
19481960
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/oai/IncrementalUpdater.java
@@ -180,7 +180,7 @@
181181 fileis.close();
182182 }
183183 } catch (IOException e) {
184 - log.warn("I/O error reading status file for "+iid+" at "+iid.getStatusPath()+" : "+e.getMessage());
 184+ log.warn("I/O error reading status file for "+iid+" at "+iid.getStatusPath()+" : "+e.getMessage(),e);
185185 }
186186 String from;
187187 if(firstPass.contains(dbname) && timestamp!=null)
@@ -215,7 +215,7 @@
216216 }
217217
218218 } catch (Exception e) {
219 - log.warn("Error sending index update records of "+iid+" to indexer at "+iid.getIndexHost());
 219+ log.warn("Error sending index update records of "+iid+" to indexer at "+iid.getIndexHost(),e);
220220 continue main_loop;
221221 }
222222 // more results?
@@ -261,13 +261,13 @@
262262 status.store(fileos,"Last incremental update timestamp");
263263 fileos.close();
264264 } catch (IOException e) {
265 - log.warn("I/O error writing status file for "+iid+" at "+iid.getStatusPath()+" : "+e.getMessage());
 265+ log.warn("I/O error writing status file for "+iid+" at "+iid.getStatusPath()+" : "+e.getMessage(),e);
266266 }
267267 firstPass.remove(dbname);
268268 log.info("Finished update of "+iid);
269269 } catch(Exception e){
270270 e.printStackTrace();
271 - log.warn("Retry later: error while processing update for "+dbname+" : "+e.getMessage());
 271+ log.warn("Retry later: error while processing update for "+dbname+" : "+e.getMessage(),e);
272272 errors = true;
273273 }
274274 }
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/oai/OAIHarvester.java
@@ -76,7 +76,7 @@
7777 ret.addAll(collector.getRecords());
7878 } while(hasMore() && ret.size() < atLeast);
7979 } catch(IOException e){
80 - log.warn("I/O exception listing records: "+e.getMessage());
 80+ log.warn("I/O exception listing records: "+e.getMessage(),e);
8181 return null;
8282 }
8383 return ret;

Status & tagging log