Index: branches/lucene-search-2.1/test/org/wikimedia/lsearch/index/WikiIndexModifierTest.java |
— | — | @@ -71,6 +71,34 @@ |
72 | 72 | } |
73 | 73 | } |
74 | 74 | |
| 75 | + public void testMakeHighlightDocuments(){ |
| 76 | + IndexId iid = IndexId.get("enwiki"); |
| 77 | + String text = "Some very [[simple]] text used for testing\n== Heading 1 ==\nParagraph\n[[Category:Category1]]"; |
| 78 | + int references = 100; |
| 79 | + int redirectTargetNamespace = -1; |
| 80 | + ArrayList<Redirect> redirects = new ArrayList<Redirect>(); |
| 81 | + redirects.add(new Redirect(0,"Redirect",2)); |
| 82 | + ArrayList<RelatedTitle> rel = new ArrayList<RelatedTitle>(); |
| 83 | + rel.add(new RelatedTitle(new Title(0,"Related test"),50)); |
| 84 | + Hashtable<String,Integer> anchors = new Hashtable<String,Integer>(); |
| 85 | + anchors.put("Anchor",20); |
| 86 | + Date date = new Date(); |
| 87 | + |
| 88 | + Article article = new Article(10,0,"Test page",text,null, |
| 89 | + references,redirectTargetNamespace,0,redirects,rel,anchors,date); |
| 90 | + |
| 91 | + analyzer = Analyzers.getHighlightAnalyzer(iid, false); |
| 92 | + try{ |
| 93 | + doc = WikiIndexModifier.makeHighlightDocument(article,new FieldBuilder(iid),iid); |
| 94 | + assertEquals("1 [10]", |
| 95 | + tokens("pageid")); |
| 96 | + assertEquals("1 [0:Test page]", |
| 97 | + tokens("key")); |
| 98 | + } catch(IOException e){ |
| 99 | + fail(); |
| 100 | + } |
| 101 | + } |
| 102 | + |
75 | 103 | public void testMakeTitleDocument(){ |
76 | 104 | IndexId iid = IndexId.get("en-titles"); |
77 | 105 | String text = "Some very simple text used for testing\n== Heading 1 ==\nParagraph\n[[Category:Category1]]"; |
Index: branches/lucene-search-2.1/test/org/wikimedia/lsearch/analyzers/WikiQueryParserTest.java |
— | — | @@ -110,6 +110,20 @@ |
111 | 111 | |
112 | 112 | q = parser.parse("douglas -adams guides"); |
113 | 113 | assertEquals("[contents:guides, contents:douglas, contents:guide]", Arrays.toString(parser.getHighlightTerms())); |
| 114 | + |
| 115 | + /* ================== PREFIXES ============ */ |
| 116 | + q = parser.parseRaw("intitle:tests"); |
| 117 | + // FIXME: stemming for titles? |
| 118 | + assertEquals("title:tests title:test^0.5",q.toString()); |
| 119 | + |
| 120 | + q = parser.parseRaw("intitle:multiple words in title"); |
| 121 | + assertEquals("+title:multiple +title:words +title:in +title:title",q.toString()); |
| 122 | + |
| 123 | + q = parser.parseRaw("intitle:[2]:tests"); |
| 124 | + assertEquals("title:tests title:test^0.5",q.toString()); |
| 125 | + |
| 126 | + q = parser.parseRaw("something (intitle:[2]:tests) out"); |
| 127 | + assertEquals("+contents:something +(title:tests title:test^0.5) +contents:out",q.toString()); |
114 | 128 | |
115 | 129 | |
116 | 130 | } catch(Exception e){ |
Index: branches/lucene-search-2.1/lsearchd |
— | — | @@ -1,3 +1,3 @@ |
2 | | -#!/bin/sh |
| 2 | +#!/bin/bash |
3 | 3 | jardir=`dirname $0` # put your jar dir here! |
4 | 4 | java -Djava.rmi.server.codebase=file://$jardir/LuceneSearch.jar -Djava.rmi.server.hostname=$HOSTNAME -jar $jardir/LuceneSearch.jar $* |
Index: branches/lucene-search-2.1/src/org/apache/lucene/search/ArticleNamespaceScaling.java |
— | — | @@ -1,10 +1,11 @@ |
2 | 2 | package org.apache.lucene.search; |
3 | 3 | |
| 4 | +import java.io.Serializable; |
4 | 5 | import java.util.Collections; |
5 | 6 | import java.util.Map; |
6 | 7 | import java.util.Map.Entry; |
7 | 8 | |
8 | | -public class ArticleNamespaceScaling { |
| 9 | +public class ArticleNamespaceScaling implements Serializable { |
9 | 10 | protected float[] nsBoost = null; |
10 | 11 | public static float talkPageScale = 0.25f; |
11 | 12 | |
Index: branches/lucene-search-2.1/src/org/apache/lucene/search/ArticleQueryWrap.java |
— | — | @@ -34,7 +34,7 @@ |
35 | 35 | @Override |
36 | 36 | public float customScore(int doc, float subQueryScore, float boostScore) throws IOException { |
37 | 37 | float sub = 1; |
38 | | - if(article.isSubpage(doc)) |
| 38 | + if(article!=null && article.isSubpage(doc)) |
39 | 39 | sub = SUBPAGE; |
40 | 40 | |
41 | 41 | float r = 1; |
— | — | @@ -42,10 +42,14 @@ |
43 | 43 | r = rank.rank(doc); |
44 | 44 | |
45 | 45 | float ns = 1; |
46 | | - if(nsScaling != null) |
| 46 | + if(nsScaling != null && article != null) |
47 | 47 | ns = nsScaling.scaleNamespace(article.namespace(doc)); |
48 | 48 | |
49 | | - return sub * r * ns * scale.score(subQueryScore,article.daysOld(doc)); |
| 49 | + float ageScaled = subQueryScore; |
| 50 | + if(scale !=null) |
| 51 | + ageScaled = scale.score(subQueryScore,article.daysOld(doc)); |
| 52 | + |
| 53 | + return sub * r * ns * ageScaled; |
50 | 54 | } |
51 | 55 | |
52 | 56 | @Override |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/util/Localization.java |
— | — | @@ -340,7 +340,7 @@ |
341 | 341 | } |
342 | 342 | } |
343 | 343 | } catch (Exception e) { |
344 | | - log.warn("Error processing message file at "+MessageFormat.format(loc+"Messages{0}.php",langCode)); |
| 344 | + log.warn("Error processing message file at "+MessageFormat.format(loc+"Messages{0}.php",langCode),e); |
345 | 345 | } |
346 | 346 | log.warn("Could not load localization for "+langCode); |
347 | 347 | badLocalizations.add(langCode.toLowerCase()); |
— | — | @@ -449,7 +449,7 @@ |
450 | 450 | log.debug("Read interwiki map from jar file"); |
451 | 451 | r.close(); |
452 | 452 | } catch(Exception e){ |
453 | | - log.warn("Cannot read interwiki map from jar file"); |
| 453 | + log.warn("Cannot read interwiki map from jar file",e); |
454 | 454 | } |
455 | 455 | } |
456 | 456 | |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/util/PHPParser.java |
— | — | @@ -416,25 +416,33 @@ |
417 | 417 | "NS_CATEGORY_TALK => 'Razgovor_o_kategoriji');"; |
418 | 418 | String text2 = "$fallback='sr-ec';"; |
419 | 419 | |
420 | | - String file = "/var/www/html/wiki-lucene/phase3/languages/messages/MessagesEn.php"; |
421 | | - |
422 | | - PHPParser p = new PHPParser(); |
423 | | - String php = p.readFile(file); |
424 | | - Hashtable<String,Integer> map = p.getNamespaces(php); |
425 | | - System.out.println(map); |
426 | | - System.out.println(p.getFallBack(text2)); |
427 | | - System.out.println(p.getRedirectMagic(php)); |
428 | | - |
429 | | - System.out.println(p.getLanguages("'wgLanguageCode' => array('default' => '$lang')")); |
430 | | - String initset = p.readURL(new URL("file:///home/rainman/Desktop/InitialiseSettings.php")); |
431 | | - System.out.println(p.getLanguages(initset)); |
432 | | - System.out.println(p.getServer(initset)); |
433 | | - System.out.println(p.getDefaultSearch(initset)); |
434 | | - System.out.println(p.getMetaNamespace(initset)); |
435 | | - System.out.println(p.getMetaNamespaceTalk(initset)); |
436 | | - System.out.println(p.getExtraNamespaces(initset)); |
437 | | - System.out.println(p.getNamespacesWithSubpages(initset)); |
438 | | - System.out.println(p.getContentNamespaces(initset)); |
439 | | - |
| 420 | + try{ |
| 421 | + String file = "/var/www/wiki/phase3/languages/messages/MessagesEn.php"; |
| 422 | + |
| 423 | + PHPParser p = new PHPParser(); |
| 424 | + String php = p.readFile(file); |
| 425 | + Hashtable<String,Integer> map = p.getNamespaces(php); |
| 426 | + System.out.println(map); |
| 427 | + System.out.println(p.getFallBack(text2)); |
| 428 | + System.out.println(p.getRedirectMagic(php)); |
| 429 | + |
| 430 | + System.out.println(p.getLanguages("'wgLanguageCode' => array('default' => '$lang')")); |
| 431 | + } catch(Exception e){ |
| 432 | + e.printStackTrace(); |
| 433 | + } |
| 434 | + try{ |
| 435 | + PHPParser p = new PHPParser(); |
| 436 | + String initset = p.readURL(new URL("file:///home/wikipedia/common/php-1.5/InitialiseSettings.php")); |
| 437 | + System.out.println(p.getLanguages(initset)); |
| 438 | + System.out.println("wgServer: " + p.getServer(initset)); |
| 439 | + System.out.println(p.getDefaultSearch(initset)); |
| 440 | + System.out.println(p.getMetaNamespace(initset)); |
| 441 | + System.out.println(p.getMetaNamespaceTalk(initset)); |
| 442 | + System.out.println(p.getExtraNamespaces(initset)); |
| 443 | + System.out.println(p.getNamespacesWithSubpages(initset)); |
| 444 | + System.out.println(p.getContentNamespaces(initset)); |
| 445 | + } catch(Exception e){ |
| 446 | + e.printStackTrace(); |
| 447 | + } |
440 | 448 | } |
441 | 449 | } |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/util/UnicodeDecomposer.java |
— | — | @@ -149,10 +149,10 @@ |
150 | 150 | in.close(); |
151 | 151 | } catch (IOException e) { |
152 | 152 | e.printStackTrace(); |
153 | | - log.error("Error reading unicode data file from resource : "+e.getMessage()); |
| 153 | + log.error("Error reading unicode data file from resource : "+e.getMessage(),e); |
154 | 154 | } catch (Exception e){ |
155 | 155 | e.printStackTrace(); |
156 | | - log.error("Error in unicode data file : "+e.getMessage()); |
| 156 | + log.error("Error in unicode data file : "+e.getMessage(),e); |
157 | 157 | } |
158 | 158 | } |
159 | 159 | |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/ranks/LinkReader.java |
— | — | @@ -61,7 +61,7 @@ |
62 | 62 | if( page.Title.Namespace >= 0) |
63 | 63 | links.addArticleInfo(revision.Text,t,exactCase,Integer.toString(page.Id)); |
64 | 64 | } catch(Exception e){ |
65 | | - log.error("Error adding article "+t+" : "+e.getMessage()); |
| 65 | + log.error("Error adding article "+t+" : "+e.getMessage(),e); |
66 | 66 | e.printStackTrace(); |
67 | 67 | } |
68 | 68 | } |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/ranks/Links.java |
— | — | @@ -268,7 +268,7 @@ |
269 | 269 | flush(); |
270 | 270 | } catch(Exception e){ |
271 | 271 | // report but continue |
272 | | - log.warn("Error opening links index "+ iid +": "+e.getMessage()); |
| 272 | + log.warn("Error opening links index "+ iid +": "+e.getMessage(),e); |
273 | 273 | } |
274 | 274 | // batch add |
275 | 275 | writer = WikiIndexModifier.openForWrite(iid.getIndexPath(),false,new SimpleAnalyzer()); |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/spell/CleanIndexWriter.java |
— | — | @@ -103,7 +103,7 @@ |
104 | 104 | } |
105 | 105 | reader.close(); |
106 | 106 | } catch(Exception e){ |
107 | | - log.warn("Error opening for batch update read "+iid+" : "+e.getMessage()); |
| 107 | + log.warn("Error opening for batch update read "+iid+" : "+e.getMessage(),e); |
108 | 108 | } |
109 | 109 | // batch add |
110 | 110 | openWriter(iid.getIndexPath(),false); |
— | — | @@ -149,10 +149,10 @@ |
150 | 150 | log.debug(iid+": Adding document "+a); |
151 | 151 | } catch (IOException e) { |
152 | 152 | e.printStackTrace(); |
153 | | - log.error("I/O Error writing articlet "+a+" to index "+writer); |
| 153 | + log.error("I/O Error writing articlet "+a+" to index "+writer,e); |
154 | 154 | } catch(Exception e){ |
155 | 155 | e.printStackTrace(); |
156 | | - log.error("Error adding document "+a+" with message: "+e.getMessage()); |
| 156 | + log.error("Error adding document "+a+" with message: "+e.getMessage(),e); |
157 | 157 | } |
158 | 158 | } |
159 | 159 | |
— | — | @@ -170,7 +170,7 @@ |
171 | 171 | writer.addDocument(doc,analyzer); |
172 | 172 | } catch (IOException e) { |
173 | 173 | e.printStackTrace(); |
174 | | - log.error("Error adding title info for article "+article+" with message: "+e.getMessage()); |
| 174 | + log.error("Error adding title info for article "+article+" with message: "+e.getMessage(),e); |
175 | 175 | } |
176 | 176 | } |
177 | 177 | |
— | — | @@ -181,7 +181,7 @@ |
182 | 182 | writer.optimize(); |
183 | 183 | writer.close(); |
184 | 184 | } catch(IOException e){ |
185 | | - log.error("I/O error optimizing/closing index at "+iid.getImportPath()+" : "+e.getMessage()); |
| 185 | + log.error("I/O error optimizing/closing index at "+iid.getImportPath()+" : "+e.getMessage(),e); |
186 | 186 | throw e; |
187 | 187 | } |
188 | 188 | } |
— | — | @@ -209,7 +209,7 @@ |
210 | 210 | try { |
211 | 211 | writer.addDocument(doc); |
212 | 212 | } catch (IOException e) { |
213 | | - log.warn("Cannot write metadata : "+e.getMessage()); |
| 213 | + log.warn("Cannot write metadata : "+e.getMessage(),e); |
214 | 214 | } |
215 | 215 | } |
216 | 216 | |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/spell/api/NgramIndexer.java |
— | — | @@ -55,7 +55,7 @@ |
56 | 56 | WikiIndexModifier.makeDBPath(path); // ensure all directories are made |
57 | 57 | writer = new IndexWriter(path,analyzer,newIndex); |
58 | 58 | } catch (IOException e1) { |
59 | | - log.error("I/O error openning index for addition of documents at "+path+" : "+e.getMessage()); |
| 59 | + log.error("I/O error openning index for addition of documents at "+path+" : "+e.getMessage(),e); |
60 | 60 | throw e1; |
61 | 61 | } |
62 | 62 | } |
— | — | @@ -81,7 +81,7 @@ |
82 | 82 | writer.close(); |
83 | 83 | writer = null; |
84 | 84 | } catch(IOException e){ |
85 | | - log.warn("I/O error closing index at "+path); |
| 85 | + log.warn("I/O error closing index at "+path,e); |
86 | 86 | throw e; |
87 | 87 | } |
88 | 88 | } |
— | — | @@ -95,7 +95,7 @@ |
96 | 96 | writer.close(); |
97 | 97 | writer = null; |
98 | 98 | } catch(IOException e){ |
99 | | - log.warn("I/O error optimizing/closing index at "+path); |
| 99 | + log.warn("I/O error optimizing/closing index at "+path,e); |
100 | 100 | throw e; |
101 | 101 | } |
102 | 102 | } |
— | — | @@ -259,7 +259,7 @@ |
260 | 260 | log.debug("Deleting document matching term "+t); |
261 | 261 | writer.deleteDocuments(t); |
262 | 262 | } catch (Exception e) { |
263 | | - log.error("Cannot delete document : "+e.getMessage()); |
| 263 | + log.error("Cannot delete document : "+e.getMessage(),e); |
264 | 264 | e.printStackTrace(); |
265 | 265 | } |
266 | 266 | } |
— | — | @@ -269,7 +269,7 @@ |
270 | 270 | log.debug("Adding document "+doc); |
271 | 271 | writer.addDocument(doc); |
272 | 272 | } catch (Exception e) { |
273 | | - log.error("Cannot add document "+doc+" : "+e.getMessage()); |
| 273 | + log.error("Cannot add document "+doc+" : "+e.getMessage(),e); |
274 | 274 | e.printStackTrace(); |
275 | 275 | } |
276 | 276 | } |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/spell/Suggest.java |
— | — | @@ -1301,7 +1301,7 @@ |
1302 | 1302 | ret.add(res.get(i)); |
1303 | 1303 | return ret; |
1304 | 1304 | } catch (IOException e) { |
1305 | | - log.error("Cannot get suggestions for "+word+" at "+iid+" : "+e.getMessage()); |
| 1305 | + log.error("Cannot get suggestions for "+word+" at "+iid+" : "+e.getMessage(),e); |
1306 | 1306 | e.printStackTrace(); |
1307 | 1307 | return new ArrayList<SuggestResult>(); |
1308 | 1308 | } |
— | — | @@ -1412,7 +1412,7 @@ |
1413 | 1413 | ret.add(res.get(i)); |
1414 | 1414 | return ret; |
1415 | 1415 | } catch (IOException e) { |
1416 | | - log.error("Cannot get title suggestions for "+title+" at "+iid+" : "+e.getMessage()); |
| 1416 | + log.error("Cannot get title suggestions for "+title+" at "+iid+" : "+e.getMessage(),e); |
1417 | 1417 | e.printStackTrace(); |
1418 | 1418 | return new ArrayList<SuggestResult>(); |
1419 | 1419 | } |
— | — | @@ -1516,7 +1516,7 @@ |
1517 | 1517 | return res.get(0); |
1518 | 1518 | } |
1519 | 1519 | } catch (IOException e) { |
1520 | | - log.warn("I/O error while suggesting split on "+iid+" : "+e.getMessage()); |
| 1520 | + log.warn("I/O error while suggesting split on "+iid+" : "+e.getMessage(),e); |
1521 | 1521 | e.printStackTrace(); |
1522 | 1522 | } |
1523 | 1523 | return null; |
— | — | @@ -1531,7 +1531,7 @@ |
1532 | 1532 | if(freqJoin > 0 && freqJoin > freqPhrase) |
1533 | 1533 | return new SuggestResult(word1+word2,freqJoin,1); |
1534 | 1534 | } catch (IOException e) { |
1535 | | - log.warn("I/O error while suggesting join on "+iid+" : "+e.getMessage()); |
| 1535 | + log.warn("I/O error while suggesting join on "+iid+" : "+e.getMessage(),e); |
1536 | 1536 | e.printStackTrace(); |
1537 | 1537 | } |
1538 | 1538 | return null; |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/frontend/SearchDaemon.java |
— | — | @@ -375,7 +375,7 @@ |
376 | 376 | try{ |
377 | 377 | sendOutputLine((float)score + " " + namespace + " " + encodeTitle(title)); |
378 | 378 | } catch(Exception e){ |
379 | | - log.error("Error sending result line ("+score + " " + namespace + " " + title +"): "+e.getMessage()); |
| 379 | + log.error("Error sending result line ("+score + " " + namespace + " " + title +"): "+e.getMessage(),e); |
380 | 380 | } |
381 | 381 | } |
382 | 382 | |
— | — | @@ -383,7 +383,7 @@ |
384 | 384 | try{ |
385 | 385 | sendOutputLine(namespace + " " + encodeTitle(title)); |
386 | 386 | } catch(Exception e){ |
387 | | - log.error("Error sending prefix result line (" + namespace + " " + title +"): "+e.getMessage()); |
| 387 | + log.error("Error sending prefix result line (" + namespace + " " + title +"): "+e.getMessage(),e); |
388 | 388 | } |
389 | 389 | } |
390 | 390 | |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/frontend/HTTPIndexDaemon.java |
— | — | @@ -87,20 +87,20 @@ |
88 | 88 | } |
89 | 89 | |
90 | 90 | } catch (SecurityException e) { |
91 | | - log.error("Called method "+methodName+" which is not visible"); |
| 91 | + log.error("Called method "+methodName+" which is not visible",e); |
92 | 92 | sendHeaders(400,"Bad Request"); |
93 | 93 | } catch (NoSuchMethodException e) { |
94 | | - log.error("Called unrecognized method "+methodName+". Uri was: "+uri); |
| 94 | + log.error("Called unrecognized method "+methodName+". Uri was: "+uri,e); |
95 | 95 | sendHeaders(404,"Not Found"); |
96 | 96 | } catch (IllegalArgumentException e) { |
97 | | - log.error("Called method "+methodName+" with illegel arguments"); |
| 97 | + log.error("Called method "+methodName+" with illegel arguments",e); |
98 | 98 | sendHeaders(400,"Bad Request"); |
99 | 99 | } catch (IllegalAccessException e) { |
100 | | - log.error("Cannot call method "+methodName+", illegal access."); |
| 100 | + log.error("Cannot call method "+methodName+", illegal access.",e); |
101 | 101 | sendHeaders(400,"Bad Request"); |
102 | 102 | } catch (InvocationTargetException e) { |
103 | 103 | e.printStackTrace(); |
104 | | - log.error("Error while calling method "+methodName+": invocation target exception"); |
| 104 | + log.error("Error while calling method "+methodName+": invocation target exception",e); |
105 | 105 | sendHeaders(400,"Bad Request"); |
106 | 106 | } |
107 | 107 | } |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/frontend/SearchServer.java |
— | — | @@ -41,7 +41,7 @@ |
42 | 42 | */ |
43 | 43 | public class SearchServer extends Thread { |
44 | 44 | private static int port = 8123; |
45 | | - private static int maxThreads = 25; |
| 45 | + private static int maxThreads = 80; |
46 | 46 | private static ServerSocket sock; |
47 | 47 | public static String indexPath; |
48 | 48 | public static String[] dbnames; |
— | — | @@ -90,34 +90,41 @@ |
91 | 91 | |
92 | 92 | for (;;) { |
93 | 93 | Socket client = null; |
94 | | - try { |
95 | | - log.debug("Listening..."); |
96 | | - client = sock.accept(); |
97 | | - } catch (Exception e) { |
98 | | - log.error("accept() error: " + e.getMessage()); |
99 | | - // be sure to close all sockets |
100 | | - if(client != null){ |
101 | | - try{ client.getInputStream().close(); } catch(Exception e1) {} |
102 | | - try{ client.getOutputStream().close(); } catch(Exception e1) {} |
103 | | - try{ client.close(); } catch(Exception e1) {} |
| 94 | + try{ |
| 95 | + try { |
| 96 | + log.debug("Listening..."); |
| 97 | + client = sock.accept(); |
| 98 | + } catch (Exception e) { |
| 99 | + log.error("accept() error: " + e.getMessage(),e); |
| 100 | + // be sure to close all sockets |
| 101 | + if(client != null){ |
| 102 | + try{ client.getInputStream().close(); } catch(Exception e1) {} |
| 103 | + try{ client.getOutputStream().close(); } catch(Exception e1) {} |
| 104 | + try{ client.close(); } catch(Exception e1) {} |
| 105 | + } |
| 106 | + continue; |
104 | 107 | } |
105 | | - continue; |
106 | | - } |
107 | | - |
108 | | - int threadCount = SearchDaemon.getOpenCount(); |
109 | | - if (threadCount > maxThreads) { |
110 | | - stats.add(false, 0, threadCount); |
111 | | - log.error("too many connections, skipping a request"); |
112 | | - // be sure to close all sockets |
113 | | - if(client != null){ |
114 | | - try{ client.getInputStream().close(); } catch(Exception e1) {} |
115 | | - try{ client.getOutputStream().close(); } catch(Exception e1) {} |
116 | | - try{ client.close(); } catch(Exception e1) {} |
| 108 | + |
| 109 | + int threadCount = SearchDaemon.getOpenCount(); |
| 110 | + if (threadCount > maxThreads) { |
| 111 | + stats.add(false, 0, threadCount); |
| 112 | + log.error("too many connections, skipping a request"); |
| 113 | + // be sure to close all sockets |
| 114 | + if(client != null){ |
| 115 | + try{ client.getInputStream().close(); } catch(Exception e1) {} |
| 116 | + try{ client.getOutputStream().close(); } catch(Exception e1) {} |
| 117 | + try{ client.close(); } catch(Exception e1) {} |
| 118 | + } |
| 119 | + continue; |
| 120 | + } else { |
| 121 | + SearchDaemon worker = new SearchDaemon(client); |
| 122 | + pool.execute(worker); |
117 | 123 | } |
118 | | - continue; |
119 | | - } else { |
120 | | - SearchDaemon worker = new SearchDaemon(client); |
121 | | - pool.execute(worker); |
| 124 | + } catch(Exception e){ |
| 125 | + log.error("Search server exception: "+e.getMessage(),e); |
| 126 | + try{ client.getInputStream().close(); } catch(Exception e1) {} |
| 127 | + try{ client.getOutputStream().close(); } catch(Exception e1) {} |
| 128 | + try{ client.close(); } catch(Exception e1) {} |
122 | 129 | } |
123 | 130 | } |
124 | 131 | } |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/frontend/HttpHandler.java |
— | — | @@ -59,7 +59,7 @@ |
60 | 60 | istrm = new DataInputStream(new BufferedInputStream(s.getInputStream())); |
61 | 61 | ostrm = new PrintWriter(new BufferedWriter(new OutputStreamWriter(s.getOutputStream(),"utf-8"))); |
62 | 62 | } catch (IOException e) { |
63 | | - log.error("I/O in opening http socket."); |
| 63 | + log.error("I/O in opening http socket.",e); |
64 | 64 | } |
65 | 65 | } |
66 | 66 | |
— | — | @@ -113,7 +113,7 @@ |
114 | 114 | log.debug("No keep-alive, closing connection ... "); |
115 | 115 | } catch (Exception e) { |
116 | 116 | e.printStackTrace(); |
117 | | - log.error(e.getMessage()); |
| 117 | + log.error(e.getMessage(),e); |
118 | 118 | } finally { |
119 | 119 | if (!headersSent) { |
120 | 120 | sendError(500, "Internal server error", "An internal error occurred: no header sent."); |
— | — | @@ -174,7 +174,7 @@ |
175 | 175 | } catch (URISyntaxException e) { |
176 | 176 | sendError(400, "Bad Request", |
177 | 177 | "Couldn't make sense of the given URI."); |
178 | | - log.warn("Bad URI in request: " + rawUri); |
| 178 | + log.warn("Bad URI in request: " + rawUri,e); |
179 | 179 | return; |
180 | 180 | } |
181 | 181 | |
— | — | @@ -261,7 +261,7 @@ |
262 | 262 | //log.error("Internal error, read "+read+" bytes istead of "+contentLength+" from POST request"); |
263 | 263 | return data; |
264 | 264 | } catch (IOException e) { |
265 | | - log.warn("Could not send raw data in bytes to output stream."); |
| 265 | + log.warn("Could not send raw data in bytes to output stream.",e); |
266 | 266 | } |
267 | 267 | return null; |
268 | 268 | } |
— | — | @@ -273,7 +273,7 @@ |
274 | 274 | try { |
275 | 275 | sin = istrm.readLine(); |
276 | 276 | } catch (IOException e) { |
277 | | - log.warn("I/O problem in reading from stream"); |
| 277 | + log.warn("I/O problem in reading from stream",e); |
278 | 278 | } |
279 | 279 | log.debug("<<<"+ sin); |
280 | 280 | return sin; |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/frontend/HTTPIndexServer.java |
— | — | @@ -58,7 +58,7 @@ |
59 | 59 | serviceReady = true; |
60 | 60 | client = sock.accept(); |
61 | 61 | } catch (Exception e) { |
62 | | - log.error("accept() error: " + e.getMessage()); |
| 62 | + log.error("accept() error: " + e.getMessage(),e); |
63 | 63 | // be sure to close all sockets |
64 | 64 | if(client != null){ |
65 | 65 | try{ client.getInputStream().close(); } catch(Exception e1) {} |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/interoperability/RMIServer.java |
— | — | @@ -29,7 +29,7 @@ |
30 | 30 | log.info(name+" bound"); |
31 | 31 | } catch (Exception e) { |
32 | 32 | e.printStackTrace(); |
33 | | - log.warn("Cannot bind "+name+" exception:"+e.getMessage()); |
| 33 | + log.warn("Cannot bind "+name+" exception:"+e.getMessage(),e); |
34 | 34 | } |
35 | 35 | } |
36 | 36 | |
— | — | @@ -40,7 +40,7 @@ |
41 | 41 | log.info(name+" bound"); |
42 | 42 | } catch (Exception e) { |
43 | 43 | e.printStackTrace(); |
44 | | - log.warn("Cannot bind "+name+" exception:"+e.getMessage()); |
| 44 | + log.warn("Cannot bind "+name+" exception:"+e.getMessage(),e); |
45 | 45 | } |
46 | 46 | } |
47 | 47 | |
— | — | @@ -79,7 +79,7 @@ |
80 | 80 | } |
81 | 81 | return true; |
82 | 82 | } catch(IOException e){ |
83 | | - log.warn("Error rebinding searchers for "+iid+" : "+e.getMessage()); |
| 83 | + log.warn("Error rebinding searchers for "+iid+" : "+e.getMessage(),e); |
84 | 84 | e.printStackTrace(); |
85 | 85 | } |
86 | 86 | return false; |
— | — | @@ -96,7 +96,7 @@ |
97 | 97 | return true; |
98 | 98 | } catch (RemoteException e) { |
99 | 99 | e.printStackTrace(); |
100 | | - log.warn("Error binding searchable with basename "+name+" : "+e.getMessage()); |
| 100 | + log.warn("Error binding searchable with basename "+name+" : "+e.getMessage(),e); |
101 | 101 | } catch(Exception e){ |
102 | 102 | e.printStackTrace(); |
103 | 103 | } |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/interoperability/RMIMessengerClient.java |
— | — | @@ -107,7 +107,7 @@ |
108 | 108 | log.debug("Calling remotely indexUpdate("+myhost+","+iid+") on "+host); |
109 | 109 | r.indexUpdated(myhost,iid.toString()); |
110 | 110 | } catch (Exception e) { |
111 | | - log.warn("Error invoking remote method notifyIndexUpdated() on host "+host+" : "+e.getMessage()); |
| 111 | + log.warn("Error invoking remote method notifyIndexUpdated() on host "+host+" : "+e.getMessage(),e); |
112 | 112 | continue; |
113 | 113 | } |
114 | 114 | } |
— | — | @@ -136,10 +136,10 @@ |
137 | 137 | log.debug("Got new RMI messenger for host "+host); |
138 | 138 | return r; |
139 | 139 | } catch (RemoteException e) { |
140 | | - log.warn("Cannot contact RMI registry for host "+host+" : "+e.getMessage()); |
| 140 | + log.warn("Cannot contact RMI registry for host "+host+" : "+e.getMessage(),e); |
141 | 141 | throw e; |
142 | 142 | } catch (NotBoundException e) { |
143 | | - log.warn("No RMIMessenger instance at host "+host+" : "+e.getMessage()); |
| 143 | + log.warn("No RMIMessenger instance at host "+host+" : "+e.getMessage(),e); |
144 | 144 | throw e; |
145 | 145 | } |
146 | 146 | } |
— | — | @@ -160,7 +160,7 @@ |
161 | 161 | return res; |
162 | 162 | } catch (Exception e) { |
163 | 163 | //e.printStackTrace(); |
164 | | - log.warn("Error invoking remote method getIndexTimestamp() on host "+host+" : "+e.getMessage()); |
| 164 | + log.warn("Error invoking remote method getIndexTimestamp() on host "+host+" : "+e.getMessage(),e); |
165 | 165 | } |
166 | 166 | return null; |
167 | 167 | } |
— | — | @@ -171,7 +171,7 @@ |
172 | 172 | log.debug("Calling enqueueUpdateRecords("+records.length+" records) on "+host); |
173 | 173 | r.enqueueUpdateRecords(records); |
174 | 174 | } catch (Exception e) { |
175 | | - log.warn("Error invoking remote method enqueueUpdateRecords() on host "+host+" : "+e.getMessage()); |
| 175 | + log.warn("Error invoking remote method enqueueUpdateRecords() on host "+host+" : "+e.getMessage(),e); |
176 | 176 | throw e; |
177 | 177 | } |
178 | 178 | } |
— | — | @@ -182,7 +182,7 @@ |
183 | 183 | log.debug("Calling enqueueFrontend("+records.length+" records) on "+host); |
184 | 184 | return r.enqueueFrontend(records); |
185 | 185 | } catch (Exception e) { |
186 | | - log.warn("Error invoking remote method enqueueFrontend() on host "+host+" : "+e.getMessage()); |
| 186 | + log.warn("Error invoking remote method enqueueFrontend() on host "+host+" : "+e.getMessage(),e); |
187 | 187 | throw e; |
188 | 188 | } |
189 | 189 | } |
— | — | @@ -198,7 +198,7 @@ |
199 | 199 | recheckRemote(iid,host); |
200 | 200 | HighlightPack pack = new HighlightPack(new SearchResults()); |
201 | 201 | pack.res.retry(); |
202 | | - log.warn("Error invoking remote method searchPart on host "+host+" : "+e.getMessage()); |
| 202 | + log.warn("Error invoking remote method searchPart on host "+host+" : "+e.getMessage(),e); |
203 | 203 | e.printStackTrace(); |
204 | 204 | return pack; |
205 | 205 | } |
— | — | @@ -211,7 +211,7 @@ |
212 | 212 | log.debug("Calling requestFlushAndNotify("+dbname+" records) on "+host); |
213 | 213 | return r.requestFlushAndNotify(dbname); |
214 | 214 | } catch (Exception e) { |
215 | | - log.warn("Error invoking remote method requestFlushAndNotify on host "+host+" : "+e.getMessage()); |
| 215 | + log.warn("Error invoking remote method requestFlushAndNotify on host "+host+" : "+e.getMessage(),e); |
216 | 216 | return false; |
217 | 217 | } |
218 | 218 | } |
— | — | @@ -222,7 +222,7 @@ |
223 | 223 | log.debug("Calling isSuccessfulFlush("+dbname+" records) on "+host); |
224 | 224 | return r.isSuccessfulFlush(dbname); |
225 | 225 | } catch (Exception e) { |
226 | | - log.warn("Error invoking remote method isSuccessfulFlush on host "+host+" : "+e.getMessage()); |
| 226 | + log.warn("Error invoking remote method isSuccessfulFlush on host "+host+" : "+e.getMessage(),e); |
227 | 227 | throw new IOException("Remote error"); |
228 | 228 | } |
229 | 229 | } |
— | — | @@ -235,7 +235,7 @@ |
236 | 236 | log.debug(" \\-> got: "+size); |
237 | 237 | return size; |
238 | 238 | } catch (Exception e) { |
239 | | - log.warn("Error invoking remote method getIndexerQueueSize on host "+host+" : "+e.getMessage()); |
| 239 | + log.warn("Error invoking remote method getIndexerQueueSize on host "+host+" : "+e.getMessage(),e); |
240 | 240 | return -1; |
241 | 241 | } |
242 | 242 | } |
— | — | @@ -268,14 +268,14 @@ |
269 | 269 | return r.searchTitles(dbrole,searchterm,words,query,filter,offset,limit,explain,sortByPhrases); |
270 | 270 | } catch(Exception e){ |
271 | 271 | if(host == null){ |
272 | | - log.warn("Cannot find title host for "+dbrole); |
| 272 | + log.warn("Cannot find title host for "+dbrole,e); |
273 | 273 | return new SearchResults(); |
274 | 274 | } |
275 | 275 | e.printStackTrace(); |
276 | 276 | recheckRemote(dbrole,host); |
277 | 277 | SearchResults res = new SearchResults(); |
278 | 278 | res.setErrorMsg("Error searching titles: "+e.getMessage()); |
279 | | - log.warn("Error invoking remote method searchTitles on host "+host+" : "+e.getMessage()); |
| 279 | + log.warn("Error invoking remote method searchTitles on host "+host+" : "+e.getMessage(),e); |
280 | 280 | return res; |
281 | 281 | } |
282 | 282 | } |
— | — | @@ -286,12 +286,12 @@ |
287 | 287 | return r.suggest(dbrole,searchterm,tokens,info,nsf); |
288 | 288 | } catch(Exception e){ |
289 | 289 | if(host == null){ |
290 | | - log.warn("Cannot find spell-check host for "+dbrole); |
| 290 | + log.warn("Cannot find spell-check host for "+dbrole,e); |
291 | 291 | return null; |
292 | 292 | } |
293 | 293 | e.printStackTrace(); |
294 | 294 | recheckRemote(dbrole,host); |
295 | | - log.warn("Error invoking suggest() on "+host+" : "+e.getMessage()); |
| 295 | + log.warn("Error invoking suggest() on "+host+" : "+e.getMessage(),e); |
296 | 296 | return null; |
297 | 297 | } |
298 | 298 | } |
— | — | @@ -302,7 +302,7 @@ |
303 | 303 | } catch(Exception e){ |
304 | 304 | recheckRemote(dbrole,host); |
305 | 305 | e.printStackTrace(); |
306 | | - log.warn("Error invoking getFuzzy() on "+host+" : "+e.getMessage()); |
| 306 | + log.warn("Error invoking getFuzzy() on "+host+" : "+e.getMessage(),e); |
307 | 307 | return new ArrayList<SuggestResult>(); |
308 | 308 | } |
309 | 309 | } |
— | — | @@ -314,7 +314,7 @@ |
315 | 315 | return r.searchRelated(dbrole,searchterm,offset,limit); |
316 | 316 | } catch(Exception e){ |
317 | 317 | e.printStackTrace(); |
318 | | - log.warn("Error invoking searchRelated() on "+host+" : "+e.getMessage()); |
| 318 | + log.warn("Error invoking searchRelated() on "+host+" : "+e.getMessage(),e); |
319 | 319 | recheckRemote(dbrole,host); |
320 | 320 | SearchResults res = new SearchResults(); |
321 | 321 | res.setErrorMsg("Error searching related index: "+e.getMessage()); |
— | — | @@ -357,7 +357,7 @@ |
358 | 358 | } catch (Exception e) { |
359 | 359 | recheckRemote(dbrole,host); |
360 | 360 | e.printStackTrace(); |
361 | | - log.error("Messenger not bound: "+e.getMessage()); |
| 361 | + log.error("Messenger not bound: "+e.getMessage(),e); |
362 | 362 | return new ArrayList<String>(); |
363 | 363 | } |
364 | 364 | } |
— | — | @@ -369,7 +369,7 @@ |
370 | 370 | return r.getSearcherPoolStatus(dbrole); |
371 | 371 | } catch(NotBoundException e){ |
372 | 372 | e.printStackTrace(); |
373 | | - log.error("Messenger not bound: "+e.getMessage()); |
| 373 | + log.error("Messenger not bound: "+e.getMessage(),e); |
374 | 374 | return new SearcherPoolStatus(false); |
375 | 375 | } |
376 | 376 | } |
— | — | @@ -381,7 +381,7 @@ |
382 | 382 | r.requestSnapshotAndNotify(optimize,pattern,forPrecursor); |
383 | 383 | } catch(NotBoundException e){ |
384 | 384 | e.printStackTrace(); |
385 | | - log.error("Messenger not bound: "+e.getMessage()); |
| 385 | + log.error("Messenger not bound: "+e.getMessage(),e); |
386 | 386 | } |
387 | 387 | } |
388 | 388 | |
— | — | @@ -392,7 +392,7 @@ |
393 | 393 | return r.snapshotFinished(optimize,pattern,forPrecursor); |
394 | 394 | } catch(NotBoundException e){ |
395 | 395 | e.printStackTrace(); |
396 | | - log.error("Messenger not bound: "+e.getMessage()); |
| 396 | + log.error("Messenger not bound: "+e.getMessage(),e); |
397 | 397 | } |
398 | 398 | return false; |
399 | 399 | } |
— | — | @@ -404,7 +404,7 @@ |
405 | 405 | r.addLocalizationCustomMapping(namespaceIndexToName, dbname); |
406 | 406 | } catch(NotBoundException e){ |
407 | 407 | e.printStackTrace(); |
408 | | - log.error("Messenger not bound: "+e.getMessage()); |
| 408 | + log.error("Messenger not bound: "+e.getMessage(),e); |
409 | 409 | } |
410 | 410 | } |
411 | 411 | } |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/benchmark/WordTerms.java |
— | — | @@ -50,7 +50,7 @@ |
51 | 51 | try { |
52 | 52 | words = loadWordFreq(path); |
53 | 53 | } catch (IOException e) { |
54 | | - log.error("Cannot open dictionary of search terms in "+path); |
| 54 | + log.error("Cannot open dictionary of search terms in "+path,e); |
55 | 55 | e.printStackTrace(); |
56 | 56 | } |
57 | 57 | } |
— | — | @@ -59,7 +59,7 @@ |
60 | 60 | try { |
61 | 61 | words = loadWordFreq(stream); |
62 | 62 | } catch (IOException e) { |
63 | | - log.error("Cannot open dictionary of search terms from stream"); |
| 63 | + log.error("Cannot open dictionary of search terms from stream",e); |
64 | 64 | e.printStackTrace(); |
65 | 65 | } |
66 | 66 | } |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/prefix/PrefixIndexBuilder.java |
— | — | @@ -327,7 +327,7 @@ |
328 | 328 | } |
329 | 329 | reader.close(); |
330 | 330 | } catch(Exception e){ |
331 | | - log.warn("Error while opening prefix precursor "+pre+" : "+e.getMessage()); |
| 331 | + log.warn("Error while opening prefix precursor "+pre+" : "+e.getMessage(),e); |
332 | 332 | } |
333 | 333 | // batch add |
334 | 334 | writer = WikiIndexModifier.openForWrite(pre.getIndexPath(),false,new PrefixAnalyzer()); |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/importer/SimpleIndexWriter.java |
— | — | @@ -90,7 +90,7 @@ |
91 | 91 | log.info("Making new index at path "+path); |
92 | 92 | writer = new IndexWriter(path,null,true); |
93 | 93 | } catch (IOException e1) { |
94 | | - log.error("I/O error openning index for addition of documents at "+path+" : "+e.getMessage()); |
| 94 | + log.error("I/O error openning index for addition of documents at "+path+" : "+e1.getMessage(),e1); |
95 | 95 | return null; |
96 | 96 | } |
97 | 97 | } |
— | — | @@ -149,10 +149,10 @@ |
150 | 150 | writer.addDocument(doc,indexAnalyzer); |
151 | 151 | log.debug(target+": Adding document "+a); |
152 | 152 | } catch (IOException e) { |
153 | | - log.error("I/O Error writing article "+a+" to index "+target.getImportPath()+" : "+e.getMessage()); |
| 153 | + log.error("I/O Error writing article "+a+" to index "+target.getImportPath()+" : "+e.getMessage(),e); |
154 | 154 | } catch(Exception e){ |
155 | 155 | e.printStackTrace(); |
156 | | - log.error("Error adding document "+a+" with message: "+e.getMessage()); |
| 156 | + log.error("Error adding document "+a+" with message: "+e.getMessage(),e); |
157 | 157 | } |
158 | 158 | } |
159 | 159 | |
— | — | @@ -169,7 +169,7 @@ |
170 | 170 | addDocument(writer,doc,a,target); |
171 | 171 | } catch (IOException e) { |
172 | 172 | e.printStackTrace(); |
173 | | - log.error("Error adding highlight document for key="+a.getTitleObject().getKey()+" : "+e.getMessage()); |
| 173 | + log.error("Error adding highlight document for key="+a.getTitleObject().getKey()+" : "+e.getMessage(),e); |
174 | 174 | } |
175 | 175 | } |
176 | 176 | /** Add to title to the titles index */ |
— | — | @@ -185,7 +185,7 @@ |
186 | 186 | addDocument(writer,doc,a,target); |
187 | 187 | } catch (IOException e) { |
188 | 188 | e.printStackTrace(); |
189 | | - log.error("Error adding title document for key="+a.getTitleObject().getKey()+" : "+e.getMessage()); |
| 189 | + log.error("Error adding title document for key="+a.getTitleObject().getKey()+" : "+e.getMessage(),e); |
190 | 190 | } |
191 | 191 | } |
192 | 192 | |
— | — | @@ -205,7 +205,7 @@ |
206 | 206 | writer.optimize(); |
207 | 207 | writer.close(); |
208 | 208 | } catch(IOException e){ |
209 | | - log.warn("I/O error optimizing/closing index at "+iid.getImportPath()); |
| 209 | + log.warn("I/O error optimizing/closing index at "+iid.getImportPath(),e); |
210 | 210 | throw e; |
211 | 211 | } |
212 | 212 | } |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/importer/BuildAll.java |
— | — | @@ -132,7 +132,7 @@ |
133 | 133 | } |
134 | 134 | } catch(IOException e){ |
135 | 135 | e.printStackTrace(); |
136 | | - log.error("Error during rebuild of "+iid+" : "+e.getMessage()); |
| 136 | + log.error("Error during rebuild of "+iid+" : "+e.getMessage(),e); |
137 | 137 | } |
138 | 138 | } |
139 | 139 | // link titles |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/storage/LinkAnalysisStorage.java |
— | — | @@ -169,7 +169,7 @@ |
170 | 170 | return getAnalitics(d.get("key"),d); |
171 | 171 | } catch(IOException e){ |
172 | 172 | //TODO: Java is not letting us throw exception here |
173 | | - log.error("I/O exception in LinkAnalysisIterator:next() : "+e.getMessage()); |
| 173 | + log.error("I/O exception in LinkAnalysisIterator:next() : "+e.getMessage(),e); |
174 | 174 | return null; |
175 | 175 | } |
176 | 176 | } |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/storage/MySQLStorage.java |
— | — | @@ -61,7 +61,7 @@ |
62 | 62 | try { |
63 | 63 | Class.forName("com.mysql.jdbc.Driver"); |
64 | 64 | } catch (ClassNotFoundException e) { |
65 | | - log.error("Cannot load mysql jdbc driver, class not found: "+e.getMessage()); |
| 65 | + log.error("Cannot load mysql jdbc driver, class not found: "+e.getMessage(),e); |
66 | 66 | } |
67 | 67 | |
68 | 68 | lib = config.getString("Storage","lib","./sql"); |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/config/IndexRegistry.java |
— | — | @@ -145,7 +145,7 @@ |
146 | 146 | } |
147 | 147 | } |
148 | 148 | } catch (IOException e) { |
149 | | - log.warn("Cannot follow symlink for file "+iid.getSearchPath()); |
| 149 | + log.warn("Cannot follow symlink for file "+iid.getSearchPath(),e); |
150 | 150 | } |
151 | 151 | |
152 | 152 | } |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/config/GlobalConfiguration.java |
— | — | @@ -1463,7 +1463,7 @@ |
1464 | 1464 | if(repo == null && wgServer != null){ |
1465 | 1465 | String key = findSuffix(wgServer.keySet(),dbname); |
1466 | 1466 | if(key == null) |
1467 | | - key = "<default>"; |
| 1467 | + key = "default"; |
1468 | 1468 | repo = wgServer.get(key); |
1469 | 1469 | if(repo != null){ |
1470 | 1470 | if(!repo.endsWith("/")) |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/UpdateThread.java |
— | — | @@ -39,7 +39,7 @@ |
40 | 40 | * |
41 | 41 | */ |
42 | 42 | public class UpdateThread extends Thread { |
43 | | - |
| 43 | + public static long MAX_DEPLOYMENT_DELAY = 15*60; // 15 minutes |
44 | 44 | enum RebuildType { STANDALONE, FULL }; |
45 | 45 | |
46 | 46 | /** iids currently being deployed and out of rotation */ |
— | — | @@ -79,7 +79,7 @@ |
80 | 80 | pending.remove(li.iid.toString()); |
81 | 81 | } catch(Exception e){ |
82 | 82 | e.printStackTrace(); |
83 | | - log.error("Error syncing "+li+" : "+e.getMessage()); |
| 83 | + log.error("Error syncing "+li+" : "+e.getMessage(),e); |
84 | 84 | } |
85 | 85 | } |
86 | 86 | } |
— | — | @@ -263,7 +263,7 @@ |
264 | 264 | |
265 | 265 | } catch(IOException ioe){ |
266 | 266 | ioe.printStackTrace(); |
267 | | - log.error("I/O error updating index "+iid+" at "+li.path+" : "+ioe.getMessage()); |
| 267 | + log.error("I/O error updating index "+iid+" at "+li.path+" : "+ioe.getMessage(),ioe); |
268 | 268 | badIndexes.put(li.iid.toString(),li.timestamp); |
269 | 269 | } |
270 | 270 | } |
— | — | @@ -276,6 +276,7 @@ |
277 | 277 | HashSet<String> group = iid.getSearchHosts(); |
278 | 278 | int succ = 0, fail = 0; |
279 | 279 | boolean reroute = false; |
| 280 | + long waitedSoFar = 0; |
280 | 281 | if(type == RebuildType.FULL){ |
281 | 282 | // never deploy more than one searcher of iid in a search group |
282 | 283 | // wait for other peers to finish deploying before proceeding |
— | — | @@ -292,37 +293,39 @@ |
293 | 294 | fail ++; |
294 | 295 | } catch(RemoteException e){ |
295 | 296 | e.printStackTrace(); |
296 | | - log.warn("Error response from "+host+" : "+e.getMessage()); |
| 297 | + log.warn("Error response from "+host+" : "+e.getMessage(),e); |
297 | 298 | } |
298 | 299 | } |
299 | 300 | } |
300 | 301 | if(fail == 0 && succ >= 1){ |
301 | 302 | wait = false; // proceed to deployment |
302 | 303 | reroute = true; |
303 | | - } else if(fail == 0 && succ == 0){ |
| 304 | + } else if(succ == 0){ |
304 | 305 | wait = false; // we're the only one alive, just deploy.. |
305 | 306 | } else |
306 | 307 | wait = true; |
307 | 308 | } |
308 | 309 | if(wait){ // wait random time (5 -> 15 seconds) |
309 | 310 | try { |
310 | | - Thread.sleep((long)(10000 * (Math.random()+0.5))); |
| 311 | + long interval = (long)(10000 * (Math.random()+0.5)); |
| 312 | + waitedSoFar += interval/1000; |
| 313 | + Thread.sleep(interval); |
311 | 314 | } catch (InterruptedException e) { |
312 | 315 | e.printStackTrace(); |
313 | 316 | } |
314 | 317 | } |
315 | | - } while(wait); |
| 318 | + } while(wait && waitedSoFar < MAX_DEPLOYMENT_DELAY); |
316 | 319 | |
317 | 320 | // reoute queries to other servers |
318 | 321 | if( reroute ){ |
319 | 322 | log.info("Deploying "+iid); |
320 | 323 | beingDeployed.add(iid.toString()); |
321 | 324 | try{ |
322 | | - RMIServer.unbind(iid,cache.getLocalSearcherPool(iid)); |
| 325 | + //RMIServer.unbind(iid,cache.getLocalSearcherPool(iid)); |
323 | 326 | } catch(Exception e) { |
324 | 327 | // we gave it a shot... |
325 | 328 | } |
326 | | - cache.updateLocalSearcherPool(iid,null); |
| 329 | + //cache.updateLocalSearcherPool(iid,null); |
327 | 330 | } |
328 | 331 | |
329 | 332 | } |
— | — | @@ -337,7 +340,7 @@ |
338 | 341 | Warmup.warmupIndexSearcher(is,li.iid,true,null); |
339 | 342 | } catch(IOException e){ |
340 | 343 | e.printStackTrace(); |
341 | | - log.warn("Error warmup up "+li+" : "+e.getMessage()); |
| 344 | + log.warn("Error warmup up "+li+" : "+e.getMessage(),e); |
342 | 345 | } |
343 | 346 | } |
344 | 347 | |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/Wildcards.java |
— | — | @@ -103,7 +103,7 @@ |
104 | 104 | terms.addAll(client.getTerms(e.getValue(),e.getKey(),wildcard,exactCase)); |
105 | 105 | } catch (RemoteException e1) { |
106 | 106 | e1.printStackTrace(); |
107 | | - log.warn("Cannot get terms for "+wildcard+" on host "+e.getValue()+" for "+e.getKey()); |
| 107 | + log.warn("Cannot get terms for "+wildcard+" on host "+e.getValue()+" for "+e.getKey(),e1); |
108 | 108 | } |
109 | 109 | } |
110 | 110 | wildcardCache.put(wildcard,terms); |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/AggregateInfoImpl.java |
— | — | @@ -41,7 +41,7 @@ |
42 | 42 | try{ |
43 | 43 | return src.getLength(docid,getSlot(pos)); |
44 | 44 | } catch(ArrayIndexOutOfBoundsException e){ |
45 | | - log.warn("Exception occured on pos="+pos); |
| 45 | + log.warn("Exception occured on pos="+pos,e); |
46 | 46 | throw e; |
47 | 47 | } |
48 | 48 | } |
— | — | @@ -50,7 +50,7 @@ |
51 | 51 | try{ |
52 | 52 | return src.getBoost(docid,getSlot(pos)); |
53 | 53 | } catch(ArrayIndexOutOfBoundsException e){ |
54 | | - log.warn("Exception occured on pos="+pos); |
| 54 | + log.warn("Exception occured on pos="+pos,e); |
55 | 55 | throw e; |
56 | 56 | } |
57 | 57 | } |
— | — | @@ -59,7 +59,7 @@ |
60 | 60 | try{ |
61 | 61 | return src.getLengthNoStopWords(docid,getSlot(pos)); |
62 | 62 | } catch(ArrayIndexOutOfBoundsException e){ |
63 | | - log.warn("Exception occured on pos="+pos); |
| 63 | + log.warn("Exception occured on pos="+pos,e); |
64 | 64 | throw e; |
65 | 65 | } |
66 | 66 | } |
— | — | @@ -68,7 +68,7 @@ |
69 | 69 | try{ |
70 | 70 | return src.getLengthComplete(docid,getSlot(pos)); |
71 | 71 | } catch(ArrayIndexOutOfBoundsException e){ |
72 | | - log.warn("Exception occured on pos="+pos); |
| 72 | + log.warn("Exception occured on pos="+pos,e); |
73 | 73 | throw e; |
74 | 74 | } |
75 | 75 | } |
— | — | @@ -92,7 +92,7 @@ |
93 | 93 | try{ |
94 | 94 | return src.getFlags(docid,getSlot(pos)); |
95 | 95 | } catch(ArrayIndexOutOfBoundsException e){ |
96 | | - log.warn("Exception occured on pos="+pos); |
| 96 | + log.warn("Exception occured on pos="+pos,e); |
97 | 97 | throw e; |
98 | 98 | } |
99 | 99 | } |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/RankField.java |
— | — | @@ -50,7 +50,7 @@ |
51 | 51 | try{ |
52 | 52 | ranks[i] = Integer.parseInt(reader.document(i).get("rank")); |
53 | 53 | } catch(NumberFormatException e){ |
54 | | - log.error("Error for docid = "+i); |
| 54 | + log.error("Error for docid = "+i,e); |
55 | 55 | e.printStackTrace(); |
56 | 56 | } |
57 | 57 | } |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/AggregateMetaField.java |
— | — | @@ -149,7 +149,7 @@ |
150 | 150 | count++; |
151 | 151 | } |
152 | 152 | } catch(Exception e){ |
153 | | - log.error("Exception during processing stored_field="+field+" on docid="+i+", with stored="+stored+" : "+e.getMessage()); |
| 153 | + log.error("Exception during processing stored_field="+field+" on docid="+i+", with stored="+stored+" : "+e.getMessage(),e); |
154 | 154 | e.printStackTrace(); |
155 | 155 | throw new IOException(e.getMessage()); |
156 | 156 | } |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/Warmup.java |
— | — | @@ -195,10 +195,10 @@ |
196 | 196 | } |
197 | 197 | } catch (IOException e) { |
198 | 198 | e.printStackTrace(); |
199 | | - log.error("Error warming up local IndexSearcherMul for "+iid); |
| 199 | + log.error("Error warming up local IndexSearcherMul for "+iid,e); |
200 | 200 | } catch (Exception e) { |
201 | 201 | e.printStackTrace(); |
202 | | - log.error("Exception during warmup of "+iid+" : "+e.getMessage()); |
| 202 | + log.error("Exception during warmup of "+iid+" : "+e.getMessage(),e); |
203 | 203 | } |
204 | 204 | } |
205 | 205 | |
— | — | @@ -223,7 +223,7 @@ |
224 | 224 | is.search(new TermQuery(new Term("contents","wikipedia")), |
225 | 225 | new FilterWrapper(filter)); |
226 | 226 | } catch (IOException e) { |
227 | | - log.warn("I/O error while preloading filter for "+iid+" for filter "+filter+" : "+e.getMessage()); |
| 227 | + log.warn("I/O error while preloading filter for "+iid+" for filter "+filter+" : "+e.getMessage(),e); |
228 | 228 | } |
229 | 229 | } |
230 | 230 | } |
— | — | @@ -236,7 +236,7 @@ |
237 | 237 | Query q = parser.parse("wikimedia foundation"); |
238 | 238 | is.search(q,new FilterWrapper(new NamespaceFilter("0"))); |
239 | 239 | } catch (IOException e) { |
240 | | - log.error("Error warming up local IndexSearcherMul for "+iid); |
| 240 | + log.error("Error warming up local IndexSearcherMul for "+iid,e); |
241 | 241 | } |
242 | 242 | } |
243 | 243 | |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/SearcherCache.java |
— | — | @@ -56,7 +56,7 @@ |
57 | 57 | s.close(); |
58 | 58 | } catch (IOException e) { |
59 | 59 | e.printStackTrace(); |
60 | | - log.warn("I/O error closing searchables "+s+" : "+e.getMessage()); |
| 60 | + log.warn("I/O error closing searchables "+s+" : "+e.getMessage(),e); |
61 | 61 | } |
62 | 62 | } |
63 | 63 | } |
— | — | @@ -82,6 +82,7 @@ |
83 | 83 | } |
84 | 84 | |
85 | 85 | private IndexSearcherMul open(IndexId iid, String path, RAMDirectory directory) throws IOException { |
| 86 | + initialWarmup.add(iid.toString()); |
86 | 87 | IndexSearcherMul searcher = null; |
87 | 88 | log.debug("Opening local index for "+iid); |
88 | 89 | if(!iid.isMySearch()) |
— | — | @@ -96,7 +97,7 @@ |
97 | 98 | searcher.setSimilarity(new WikiSimilarity()); |
98 | 99 | |
99 | 100 | // preload meta caches |
100 | | - if(iid.isArticleIndex()){ |
| 101 | + if(iid.isArticleIndex() || iid.isTitlesBySuffix()){ |
101 | 102 | IndexReader reader = searcher.getIndexReader(); |
102 | 103 | ArrayList<CacheBuilder> builders = new ArrayList<CacheBuilder>(); |
103 | 104 | Collection fields = reader.getFieldNames(FieldOption.ALL); |
— | — | @@ -131,7 +132,7 @@ |
132 | 133 | e.printStackTrace(); |
133 | 134 | // tell registry this is not a good index |
134 | 135 | IndexRegistry.getInstance().invalidateCurrent(iid); |
135 | | - log.error("I/O Error opening index at path "+iid.getCanonicalSearchPath()+" : "+e.getMessage()); |
| 136 | + log.error("I/O Error opening index at path "+iid.getCanonicalSearchPath()+" : "+e.getMessage(),e); |
136 | 137 | throw e; |
137 | 138 | } |
138 | 139 | return searcher; |
— | — | @@ -203,7 +204,9 @@ |
204 | 205 | protected Set<SearchHost> deadPools = Collections.synchronizedSet(new HashSet<SearchHost>()); |
205 | 206 | |
206 | 207 | protected static SearcherCache instance = null; |
207 | | - |
| 208 | + |
| 209 | + /** deployment has been tried at least once for these */ |
| 210 | + protected static Set<String> initialWarmup = Collections.synchronizedSet(new HashSet<String>()); |
208 | 211 | /** |
209 | 212 | * If there is a cached local searcher of iid |
210 | 213 | * |
— | — | @@ -260,8 +263,8 @@ |
261 | 264 | public IndexSearcherMul getLocalSearcher(IndexId iid) throws IOException{ |
262 | 265 | if(iid == null) |
263 | 266 | throw new RuntimeException("No such index"); |
264 | | - if(UpdateThread.isBeingDeployed(iid)) |
265 | | - throw new IOException(iid+" is being deployed"); |
| 267 | + if(!initialWarmup.contains(iid.toString())) |
| 268 | + throw new RuntimeException(iid+" is being deployed"); |
266 | 269 | return fromLocalCache(iid.toString()); |
267 | 270 | } |
268 | 271 | |
— | — | @@ -329,13 +332,13 @@ |
330 | 333 | } |
331 | 334 | } catch(RemoteException e){ |
332 | 335 | e.printStackTrace(); |
333 | | - log.warn("Cannot get searcher status for "+iid+" on "+host+" : "+e.getMessage()); |
| 336 | + log.warn("Cannot get searcher status for "+iid+" on "+host+" : "+e.getMessage(),e); |
334 | 337 | } catch (IOException e) { |
335 | 338 | e.printStackTrace(); |
336 | | - log.warn("I/O error trying to construct remote searcher pool for "+iid+" on "+host+" : "+e.getMessage()); |
| 339 | + log.warn("I/O error trying to construct remote searcher pool for "+iid+" on "+host+" : "+e.getMessage(),e); |
337 | 340 | } catch (NotBoundException e) { |
338 | 341 | e.printStackTrace(); |
339 | | - log.warn("Remote searcher for "+iid+" on "+host+" not bound : "+e.getMessage()); |
| 342 | + log.warn("Remote searcher for "+iid+" on "+host+" not bound : "+e.getMessage(),e); |
340 | 343 | } |
341 | 344 | // if we reach this point something went wrong |
342 | 345 | deadPools.add(new SearchHost(iid,host)); |
— | — | @@ -368,7 +371,7 @@ |
369 | 372 | RMIServer.bind(iid,pool.searchers); |
370 | 373 | } |
371 | 374 | } catch (IOException e) { |
372 | | - log.warn("I/O error warming index for "+iid+" : "+e.getMessage()); |
| 375 | + log.warn("I/O error warming index for "+iid+" : "+e.getMessage(),e); |
373 | 376 | } |
374 | 377 | } |
375 | 378 | } |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/FilterWrapper.java |
— | — | @@ -79,5 +79,13 @@ |
80 | 80 | public boolean hasAnyFilters(){ |
81 | 81 | return hasNamespaceFilter() || hasCustomFilters(); |
82 | 82 | } |
| 83 | + |
| 84 | + /** If filter is not empty, get this filter, otherwise just get null */ |
| 85 | + public Filter getFilterOrNull(){ |
| 86 | + if(hasAnyFilters()) |
| 87 | + return this; |
| 88 | + else |
| 89 | + return null; |
| 90 | + } |
83 | 91 | |
84 | 92 | } |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/ArticleMeta.java |
— | — | @@ -94,7 +94,7 @@ |
95 | 95 | String ext = ""; |
96 | 96 | if(doc != null) |
97 | 97 | ext = ", ns="+doc.get("namespace")+", title="+doc.get("title"); |
98 | | - log.error("Exception during caching of article info for docid="+i+ext); |
| 98 | + log.error("Exception during caching of article info for docid="+i+ext,e); |
99 | 99 | e.printStackTrace(); |
100 | 100 | throw new IOException(e.getMessage()); |
101 | 101 | } |
— | — | @@ -156,7 +156,7 @@ |
157 | 157 | return diff; |
158 | 158 | } catch (ParseException e) { |
159 | 159 | e.printStackTrace(); |
160 | | - log.error("Error parsing date "+dateStr+" : "+e.getMessage()); |
| 160 | + log.error("Error parsing date "+dateStr+" : "+e.getMessage(),e); |
161 | 161 | } |
162 | 162 | return 0; |
163 | 163 | } |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/SearchEngine.java |
— | — | @@ -225,7 +225,7 @@ |
226 | 226 | } catch(IOException e){ |
227 | 227 | e.printStackTrace(); |
228 | 228 | res.setErrorMsg("I/O processing the request : "+e.getMessage()); |
229 | | - log.error("I/O error in searchSimilar() : "+e.getMessage()); |
| 229 | + log.error("I/O error in searchSimilar() : "+e.getMessage(),e); |
230 | 230 | } |
231 | 231 | return res; |
232 | 232 | } |
— | — | @@ -314,7 +314,7 @@ |
315 | 315 | HashSet<String> stopWords = StopWords.getPredefinedSet(iid); |
316 | 316 | WikiQueryParser parser = new WikiQueryParser(bs.getFields().contents(),nsDefault,analyzer,bs,NamespacePolicy.IGNORE,stopWords); |
317 | 317 | Query q = parser.parse(key.substring(key.indexOf(':')+1),new WikiQueryParser.ParsingOptions(true)); |
318 | | - highlight(iid,q,parser.getWordsClean(),searcher,res,true,true); |
| 318 | + highlight(iid,q,parser.getWordsClean(),searcher,res,true,true,null); |
319 | 319 | } else{ |
320 | 320 | res.addInfo("related",global.getLocalhost()); |
321 | 321 | res.setSuccess(true); |
— | — | @@ -355,7 +355,7 @@ |
356 | 356 | return messenger.searchPrefix(host,pre.toString(),searchterm,limit,nsf); |
357 | 357 | } catch(IOException e){ |
358 | 358 | e.printStackTrace(); |
359 | | - log.error("Error opening searcher in prefixSearch on "+pre+" : "+e.getMessage()); |
| 359 | + log.error("Error opening searcher in prefixSearch on "+pre+" : "+e.getMessage(),e); |
360 | 360 | SearchResults res = new SearchResults(); |
361 | 361 | res.setErrorMsg("I/O error on index "+pre); |
362 | 362 | return res; |
— | — | @@ -489,7 +489,7 @@ |
490 | 490 | sendStats(start-System.currentTimeMillis()); |
491 | 491 | } catch (IOException e) { |
492 | 492 | e.printStackTrace(); |
493 | | - log.error("Internal error in prefixSearch on "+pre+" : "+e.getMessage()); |
| 493 | + log.error("Internal error in prefixSearch on "+pre+" : "+e.getMessage(),e); |
494 | 494 | res.setErrorMsg("I/O error on index "+pre); |
495 | 495 | } |
496 | 496 | return res; |
— | — | @@ -522,13 +522,13 @@ |
523 | 523 | // search |
524 | 524 | SearchResults res = makeTitlesSearchResults(searcher,hits,offset,limit,iid,searchterm,q,searchStart,explain); |
525 | 525 | // highlight |
526 | | - highlightTitles(iid,q,words,searcher,res,sortByPhrases,false); |
| 526 | + highlightTitles(iid,q,words,searcher,res,sortByPhrases,false,null); |
527 | 527 | return res; |
528 | 528 | } catch (IOException e) { |
529 | 529 | e.printStackTrace(); |
530 | 530 | SearchResults res = new SearchResults(); |
531 | 531 | res.setErrorMsg("Internal error in SearchEngine: "+e.getMessage()); |
532 | | - log.error("I/O error in searchTitles(): "+e.getMessage()); |
| 532 | + log.error("I/O error in searchTitles(): "+e.getMessage(),e); |
533 | 533 | return res; |
534 | 534 | } |
535 | 535 | } |
— | — | @@ -542,14 +542,14 @@ |
543 | 543 | IndexSearcherMul searcher; |
544 | 544 | long searchStart = System.currentTimeMillis(); |
545 | 545 | searcher = cache.getLocalSearcher(iid); |
546 | | - FilterWrapper localfilter = filter; |
| 546 | + /*FilterWrapper localfilter = filter; |
547 | 547 | if(iid.isMainsplit() && iid.isMainPart()) |
548 | 548 | localfilter.setNamespaceFilter(null); |
549 | 549 | else if(iid.isNssplit() && !iid.isLogical() && iid.getNamespaceSet().size()==1 && !iid.getNamespaceSet().contains("<default>")) |
550 | 550 | localfilter.setNamespaceFilter(null); |
551 | 551 | if(localfilter.getNamespaceFilter() != null) |
552 | | - log.info("Using namespace filter: "+localfilter); |
553 | | - TopDocs hits = searcher.search(q,localfilter,offset+limit); |
| 552 | + log.info("Using namespace filter: "+localfilter); */ |
| 553 | + TopDocs hits = searcher.search(q,filter.getFilterOrNull(),offset+limit); |
554 | 554 | SearchResults res = makeSearchResults(searcher,hits,offset,limit,iid,searchterm,q,searchStart,explain); |
555 | 555 | HighlightPack pack = new HighlightPack(res); |
556 | 556 | // pack extra info needed for highlighting |
— | — | @@ -561,7 +561,7 @@ |
562 | 562 | e.printStackTrace(); |
563 | 563 | HighlightPack pack = new HighlightPack(new SearchResults()); |
564 | 564 | pack.res.setErrorMsg("Internal error in SearchEngine: "+e.getMessage()); |
565 | | - log.error("Internal error in SearchEngine while trying to search main part: "+e.getMessage()); |
| 565 | + log.error("Internal error in SearchEngine while trying to search main part: "+e.getMessage(),e); |
566 | 566 | return pack; |
567 | 567 | } |
568 | 568 | |
— | — | @@ -600,6 +600,8 @@ |
601 | 601 | // use default filter if it's cached or composable of cached entries |
602 | 602 | } else if(cachedFilters.containsValue(nsDefault) || NamespaceCache.isComposable(nsDefault)) |
603 | 603 | nsfw.setNamespaceFilter(nsDefault); |
| 604 | + } else{ |
| 605 | + nsfw.setNamespaceFilter(nsDefault); |
604 | 606 | } |
605 | 607 | |
606 | 608 | parser.extractPrefixFilter(searchterm); |
— | — | @@ -647,7 +649,7 @@ |
648 | 650 | res = pack.res; |
649 | 651 | res.addInfo("search",formatHost(host)); |
650 | 652 | if(!searchOnly){ |
651 | | - highlight(iid,q,parser.getWordsClean(),pack.terms,pack.dfs,pack.maxDoc,res,exactCase,null,parser.hasPhrases(),false); |
| 653 | + highlight(iid,q,parser.getWordsClean(),pack.terms,pack.dfs,pack.maxDoc,res,exactCase,null,parser.hasPhrases(),false,commonsWiki); |
652 | 654 | fetchTitles(res,searchterm,nsfw,iid,parser,offset,iwoffset,iwlimit,explain); |
653 | 655 | suggest(iid,searchterm,parser,res,offset,nsfw); |
654 | 656 | } |
— | — | @@ -676,11 +678,11 @@ |
677 | 679 | Wildcards wildcards = new Wildcards(searcher.getAllHosts(),exactCase); |
678 | 680 | q = parseQuery(searchterm,parser,iid,raw,nsfw,searchAll,wildcards); |
679 | 681 | |
680 | | - hits = searcher.search(q,nsfw,offset+limit); |
| 682 | + hits = searcher.search(q,nsfw.getFilterOrNull(),offset+limit); |
681 | 683 | res = makeSearchResults(searcher,hits,offset,limit,iid,searchterm,q,searchStart,explain); |
682 | 684 | res.addInfo("search",formatHosts(searcher.getAllHosts().values())); |
683 | 685 | if(!searchOnly){ |
684 | | - highlight(iid,q,parser.getWordsClean(),searcher,parser.getHighlightTerms(),res,exactCase,parser.hasPhrases(),false); |
| 686 | + highlight(iid,q,parser.getWordsClean(),searcher,parser.getHighlightTerms(),res,exactCase,parser.hasPhrases(),false,commonsWiki); |
685 | 687 | fetchTitles(res,searchterm,nsfw,iid,parser,offset,iwoffset,iwlimit,explain); |
686 | 688 | suggest(iid,searchterm,parser,res,offset,nsfw); |
687 | 689 | } |
— | — | @@ -695,19 +697,19 @@ |
696 | 698 | e.printStackTrace(); |
697 | 699 | res = new SearchResults(); |
698 | 700 | res.retry(); |
699 | | - log.warn("Retry, temportal error for query: ["+q+"] on "+iid+" : "+e.getMessage()); |
| 701 | + log.warn("Retry, temportal error for query: ["+q+"] on "+iid+" : "+e.getMessage(),e); |
700 | 702 | return res; |
701 | 703 | } |
702 | 704 | } catch(ParseException e){ |
703 | 705 | res = new SearchResults(); |
704 | 706 | res.setErrorMsg("Error parsing query: "+searchterm); |
705 | | - log.error("Cannot parse query: "+searchterm+", error: "+e.getMessage()); |
| 707 | + log.error("Cannot parse query: "+searchterm+", error: "+e.getMessage(),e); |
706 | 708 | return res; |
707 | 709 | } catch (Exception e) { |
708 | 710 | res = new SearchResults(); |
709 | 711 | e.printStackTrace(); |
710 | 712 | res.setErrorMsg("Internal error in SearchEngine: "+e.getMessage()); |
711 | | - log.error("Internal error in SearchEngine trying to make WikiSearcher: "+e.getMessage()); |
| 713 | + log.error("Internal error in SearchEngine trying to make WikiSearcher: "+e.getMessage(),e); |
712 | 714 | return res; |
713 | 715 | } |
714 | 716 | } |
— | — | @@ -791,6 +793,8 @@ |
792 | 794 | return; |
793 | 795 | if(offset != 0) |
794 | 796 | return; // do titles search only for first page of normal-search results |
| 797 | + if(parser.hasPrefixFilter()) |
| 798 | + return; // TODO: implement, currently we don't do interwiki prefix queries |
795 | 799 | try{ |
796 | 800 | IndexId titles = iid.getTitlesIndex(); |
797 | 801 | IndexId main = titles.getDB(); |
— | — | @@ -842,7 +846,7 @@ |
843 | 847 | |
844 | 848 | TopDocs hits = searcher.search(q,wrap,iwoffset+iwlimit); |
845 | 849 | SearchResults r = makeTitlesSearchResults(searcher,hits,iwoffset,iwlimit,main,searchterm,q,searchStart,explain); |
846 | | - highlightTitles(main,q,words,searcher,r,parser.hasWildcards(),false); |
| 850 | + highlightTitles(main,q,words,searcher,r,parser.hasWildcards(),false,null); |
847 | 851 | |
848 | 852 | if(r.isSuccess()){ |
849 | 853 | res.setTitles(r.getResults()); |
— | — | @@ -855,7 +859,7 @@ |
856 | 860 | |
857 | 861 | } catch(Exception e){ |
858 | 862 | e.printStackTrace(); |
859 | | - log.error("Error fetching grouped titles: "+e.getMessage()); |
| 863 | + log.error("Error fetching grouped titles: "+e.getMessage(),e); |
860 | 864 | } |
861 | 865 | } |
862 | 866 | |
— | — | @@ -949,48 +953,50 @@ |
950 | 954 | } |
951 | 955 | |
952 | 956 | /** Highlight search results, and set the property in ResultSet */ |
953 | | - protected void highlight(IndexId iid, Query q, ArrayList<String> words, WikiSearcher searcher, Term[] terms, SearchResults res, boolean exactCase, boolean sortByPhrases, boolean alwaysIncludeFirst) throws IOException{ |
| 957 | + protected void highlight(IndexId iid, Query q, ArrayList<String> words, WikiSearcher searcher, Term[] terms, SearchResults res, boolean exactCase, boolean sortByPhrases, boolean alwaysIncludeFirst, IndexId commonsWiki) throws IOException{ |
954 | 958 | if(terms == null) |
955 | 959 | return; |
956 | 960 | int[] df = searcher.docFreqs(terms); |
957 | 961 | int maxDoc = searcher.maxDoc(); |
958 | | - highlight(iid,q,words,terms,df,maxDoc,res,exactCase,null,sortByPhrases,alwaysIncludeFirst); |
| 962 | + highlight(iid,q,words,terms,df,maxDoc,res,exactCase,null,sortByPhrases,alwaysIncludeFirst,commonsWiki); |
959 | 963 | } |
960 | 964 | |
961 | 965 | /** Highlight search results, and set the property in ResultSet */ |
962 | | - protected void highlight(IndexId iid, Query q, ArrayList<String> words, IndexSearcherMul searcher, SearchResults res, boolean sortByPhrases, boolean alwaysIncludeFirst) throws IOException{ |
| 966 | + protected void highlight(IndexId iid, Query q, ArrayList<String> words, IndexSearcherMul searcher, SearchResults res, boolean sortByPhrases, boolean alwaysIncludeFirst, IndexId commonsWiki) throws IOException{ |
963 | 967 | Term[] terms = getTerms(q,"contents"); |
964 | 968 | if(terms == null) |
965 | 969 | return; |
966 | 970 | int[] df = searcher.docFreqs(terms); |
967 | 971 | int maxDoc = searcher.maxDoc(); |
968 | | - highlight(iid,q,words,terms,df,maxDoc,res,false,null,sortByPhrases,alwaysIncludeFirst); |
| 972 | + highlight(iid,q,words,terms,df,maxDoc,res,false,null,sortByPhrases,alwaysIncludeFirst,commonsWiki); |
969 | 973 | } |
970 | 974 | |
971 | 975 | /** Highlight search results from titles index */ |
972 | | - protected void highlightTitles(IndexId iid, Query q, ArrayList<String> words, IndexSearcherMul searcher, SearchResults res, boolean sortByPhrases, boolean alwaysIncludeFirst) throws IOException{ |
| 976 | + protected void highlightTitles(IndexId iid, Query q, ArrayList<String> words, IndexSearcherMul searcher, SearchResults res, boolean sortByPhrases, boolean alwaysIncludeFirst, IndexId commonsWiki) throws IOException{ |
973 | 977 | Term[] terms = getTerms(q,"alttitle"); |
974 | 978 | if(terms == null) |
975 | 979 | return; |
976 | 980 | int[] df = searcher.docFreqs(terms); |
977 | 981 | int maxDoc = searcher.maxDoc(); |
978 | | - highlight(iid,q,words,terms,df,maxDoc,res,false,searcher.getIndexReader(),sortByPhrases,alwaysIncludeFirst); |
| 982 | + highlight(iid,q,words,terms,df,maxDoc,res,false,searcher.getIndexReader(),sortByPhrases,alwaysIncludeFirst,commonsWiki); |
979 | 983 | resolveInterwikiNamespaces(res,iid); |
980 | 984 | } |
981 | 985 | |
982 | 986 | /** Highlight search results from titles index using a wikisearcher */ |
983 | | - protected void highlightTitles(IndexId iid, Query q, ArrayList<String> words, WikiSearcher searcher, SearchResults res, boolean sortByPhrases, boolean alwaysIncludeFirst) throws IOException{ |
| 987 | + protected void highlightTitles(IndexId iid, Query q, ArrayList<String> words, WikiSearcher searcher, SearchResults res, boolean sortByPhrases, boolean alwaysIncludeFirst, IndexId commonsWiki) throws IOException{ |
984 | 988 | Term[] terms = getTerms(q,"alttitle"); |
985 | 989 | if(terms == null) |
986 | 990 | return; |
987 | 991 | int[] df = searcher.docFreqs(terms); |
988 | 992 | int maxDoc = searcher.maxDoc(); |
989 | | - highlight(iid,q,words,terms,df,maxDoc,res,false,null,sortByPhrases,alwaysIncludeFirst); |
| 993 | + highlight(iid,q,words,terms,df,maxDoc,res,false,null,sortByPhrases,alwaysIncludeFirst,commonsWiki); |
990 | 994 | resolveInterwikiNamespaces(res,iid); |
991 | 995 | } |
992 | 996 | |
993 | 997 | /** Highlight article (don't call directly, use one of the interfaces above instead) */ |
994 | | - protected void highlight(IndexId iid, Query q, ArrayList<String> words, Term[] terms, int[] df, int maxDoc, SearchResults res, boolean exactCase, IndexReader reader, boolean sortByPhrases, boolean alwaysIncludeFirst) throws IOException{ |
| 998 | + protected void highlight(IndexId iid, Query q, ArrayList<String> words, Term[] terms, int[] df, |
| 999 | + int maxDoc, SearchResults res, boolean exactCase, IndexReader reader, |
| 1000 | + boolean sortByPhrases, boolean alwaysIncludeFirst, IndexId commonsWiki) throws IOException{ |
995 | 1001 | // iid -> array of keys |
996 | 1002 | HashMap<IndexId,ArrayList<String>> map = new HashMap<IndexId,ArrayList<String>>(); |
997 | 1003 | iid = iid.getHighlight(); |
— | — | @@ -999,12 +1005,17 @@ |
1000 | 1006 | for(ResultSet r : res.getResults()){ |
1001 | 1007 | IndexId piid = iid.getPartByNamespace(r.namespace); |
1002 | 1008 | ArrayList<String> hits = map.get(piid); |
1003 | | - if(hits == null){ |
1004 | | - hits = new ArrayList<String>(); |
1005 | | - map.put(piid,hits); |
| 1009 | + if(hits == null) |
| 1010 | + map.put(piid,hits = new ArrayList<String>()); |
| 1011 | + hits.add(r.getKey()); |
| 1012 | + keys.put(r.getKey(),r); |
| 1013 | + // check for commons wiki images |
| 1014 | + if(commonsWiki!=null && r.namespace.equals("6")){ |
| 1015 | + hits = map.get(commonsWiki); |
| 1016 | + if(hits == null) |
| 1017 | + map.put(commonsWiki,hits=new ArrayList<String>()); |
| 1018 | + hits.add(r.getKey()); |
1006 | 1019 | } |
1007 | | - hits.add(r.getKey()); |
1008 | | - keys.put(r.getKey(),r); |
1009 | 1020 | } |
1010 | 1021 | // highlight! |
1011 | 1022 | HashSet<String> stopWords = StopWords.getPredefinedSet(iid); |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/highlight/Highlight.java |
— | — | @@ -143,7 +143,7 @@ |
144 | 144 | try{ |
145 | 145 | ret = getTokens(reader,key,allTerms,fields); |
146 | 146 | } catch(Exception e){ |
147 | | - log.error("Error geting tokens: "+e.getMessage()); |
| 147 | + log.error("Error geting tokens: "+e.getMessage(),e); |
148 | 148 | e.printStackTrace(); |
149 | 149 | } |
150 | 150 | if(ret == null) |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/index/Transaction.java |
— | — | @@ -78,7 +78,7 @@ |
79 | 79 | inTransaction = true; |
80 | 80 | log.info("Transaction on index "+iid+" started"); |
81 | 81 | } catch(Exception e){ |
82 | | - log.error("Error while intializing transaction: "+e.getMessage()); |
| 82 | + log.error("Error while intializing transaction: "+e.getMessage(),e); |
83 | 83 | lock.unlock(); |
84 | 84 | } |
85 | 85 | } |
— | — | @@ -94,7 +94,7 @@ |
95 | 95 | if(info.exists()) |
96 | 96 | FSUtils.deleteRecursive(info.getAbsoluteFile()); |
97 | 97 | } catch(Exception e){ |
98 | | - log.error("Error removing old transaction data from "+iid.getTransactionPath(type)+" : "+e.getMessage()); |
| 98 | + log.error("Error removing old transaction data from "+iid.getTransactionPath(type)+" : "+e.getMessage(),e); |
99 | 99 | } |
100 | 100 | |
101 | 101 | } |
— | — | @@ -139,7 +139,7 @@ |
140 | 140 | FSUtils.createHardLinkRecursive(backup.getAbsolutePath(),path); |
141 | 141 | FSUtils.deleteRecursive(backup.getAbsoluteFile()); // cleanup |
142 | 142 | } catch(Exception e){ |
143 | | - log.error("Recovery of index "+iid+" failed with error "+e.getMessage()); |
| 143 | + log.error("Recovery of index "+iid+" failed with error "+e.getMessage(),e); |
144 | 144 | } |
145 | 145 | } |
146 | 146 | |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/index/WikiIndexModifier.java |
— | — | @@ -161,7 +161,7 @@ |
162 | 162 | } |
163 | 163 | reader.close(); |
164 | 164 | } catch (IOException e) { |
165 | | - log.warn("I/O Error: could not open/read "+iid.getIndexPath()+" while deleting document."); |
| 165 | + log.warn("I/O Error: could not open/read "+iid.getIndexPath()+" while deleting document.",e); |
166 | 166 | return false; |
167 | 167 | } |
168 | 168 | return true; |
— | — | @@ -212,11 +212,11 @@ |
213 | 213 | |
214 | 214 | log.debug(iid+": Adding document "+rec.getArticle().toStringFull()); |
215 | 215 | } catch (IOException e) { |
216 | | - log.error("Error writing document "+rec+" to index "+path); |
| 216 | + log.error("Error writing document "+rec+" to index "+path,e); |
217 | 217 | succ = false; // report unsucc, but still continue, to process all cards |
218 | 218 | } catch(Exception e){ |
219 | 219 | e.printStackTrace(); |
220 | | - log.error("Error adding document "+rec.getIndexKey()+" with message: "+e.getMessage()); |
| 220 | + log.error("Error adding document "+rec.getIndexKey()+" with message: "+e.getMessage(),e); |
221 | 221 | succ = false; // report unsucc, but still continue, to process all cards |
222 | 222 | } |
223 | 223 | } |
— | — | @@ -224,7 +224,7 @@ |
225 | 225 | try { |
226 | 226 | writer.close(); |
227 | 227 | } catch (IOException e) { |
228 | | - log.error("Error closing index "+path); |
| 228 | + log.error("Error closing index "+path,e); |
229 | 229 | return false; |
230 | 230 | } |
231 | 231 | return succ; |
— | — | @@ -258,7 +258,7 @@ |
259 | 259 | throw e; |
260 | 260 | } catch (IOException e1) { |
261 | 261 | e1.printStackTrace(); |
262 | | - log.error("I/O error openning index at "+path+" : "+e.getMessage()); |
| 262 | + log.error("I/O error openning index at "+path+" : "+e.getMessage(),e); |
263 | 263 | throw e1; |
264 | 264 | } |
265 | 265 | } |
— | — | @@ -356,7 +356,7 @@ |
357 | 357 | log.info("Unlocked index at "+path); |
358 | 358 | } |
359 | 359 | } catch(IOException e){ |
360 | | - log.warn("I/O error unlock index at "+path+" : "+e.getMessage()); |
| 360 | + log.warn("I/O error unlock index at "+path+" : "+e.getMessage(),e); |
361 | 361 | } |
362 | 362 | } |
363 | 363 | |
— | — | @@ -401,7 +401,7 @@ |
402 | 402 | && updateTitles(iid,updateRecords); |
403 | 403 | } catch(Exception e){ |
404 | 404 | e.printStackTrace(); |
405 | | - log.error("Error updating "+iid+" : "+e.getMessage()); |
| 405 | + log.error("Error updating "+iid+" : "+e.getMessage(),e); |
406 | 406 | return false; |
407 | 407 | } |
408 | 408 | } |
— | — | @@ -514,7 +514,7 @@ |
515 | 515 | return true; |
516 | 516 | } catch(IOException e){ |
517 | 517 | e.printStackTrace(); |
518 | | - log.error("Cannot fetch links info: "+e.getMessage()); |
| 518 | + log.error("Cannot fetch links info: "+e.getMessage(),e); |
519 | 519 | throw e; |
520 | 520 | } |
521 | 521 | } |
— | — | @@ -550,7 +550,7 @@ |
551 | 551 | } catch(IOException e){ |
552 | 552 | trans.rollback(); |
553 | 553 | e.printStackTrace(); |
554 | | - log.error("Cannot update links index: "+e.getMessage()); |
| 554 | + log.error("Cannot update links index: "+e.getMessage(),e); |
555 | 555 | return false; |
556 | 556 | } |
557 | 557 | } |
— | — | @@ -565,7 +565,7 @@ |
566 | 566 | return true; |
567 | 567 | } catch(IOException e){ |
568 | 568 | e.printStackTrace(); |
569 | | - log.error("Cannot update prefix index: "+e.getMessage()); |
| 569 | + log.error("Cannot update prefix index: "+e.getMessage(),e); |
570 | 570 | return false; |
571 | 571 | } |
572 | 572 | } |
— | — | @@ -579,7 +579,7 @@ |
580 | 580 | return true; |
581 | 581 | } catch(IOException e){ |
582 | 582 | e.printStackTrace(); |
583 | | - log.error("Cannot update spellcheck index: "+e.getMessage()); |
| 583 | + log.error("Cannot update spellcheck index: "+e.getMessage(),e); |
584 | 584 | return false; |
585 | 585 | } |
586 | 586 | } |
— | — | @@ -593,7 +593,7 @@ |
594 | 594 | return true; |
595 | 595 | } catch(IOException e){ |
596 | 596 | e.printStackTrace(); |
597 | | - log.error("Cannot update spellcheck index: "+e.getMessage()); |
| 597 | + log.error("Cannot update spellcheck index: "+e.getMessage(),e); |
598 | 598 | return false; |
599 | 599 | } |
600 | 600 | } |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/index/IndexThread.java |
— | — | @@ -198,7 +198,7 @@ |
199 | 199 | try { |
200 | 200 | Thread.sleep(1000); |
201 | 201 | } catch (InterruptedException e) { |
202 | | - log.warn("IndexThread sleep interrupted with message: "+e.getMessage()); |
| 202 | + log.warn("IndexThread sleep interrupted with message: "+e.getMessage(),e); |
203 | 203 | } |
204 | 204 | } |
205 | 205 | if(queuedUpdatesExist()) |
— | — | @@ -258,7 +258,7 @@ |
259 | 259 | } |
260 | 260 | } catch(IOException e){ |
261 | 261 | e.printStackTrace(); |
262 | | - log.error("Error optimizing index "+iid); |
| 262 | + log.error("Error optimizing index "+iid,e); |
263 | 263 | badOptimization.add(iid); |
264 | 264 | } finally { |
265 | 265 | if(lock != null) |
— | — | @@ -322,7 +322,7 @@ |
323 | 323 | FSUtils.createHardLinkRecursive(indexPath+sep+f.getName(),snapshot+sep+f.getName(),true); |
324 | 324 | } catch (IOException e) { |
325 | 325 | e.printStackTrace(); |
326 | | - log.error("Error making snapshot "+snapshot+": "+e.getMessage()); |
| 326 | + log.error("Error making snapshot "+snapshot+": "+e.getMessage(),e); |
327 | 327 | return; |
328 | 328 | } |
329 | 329 | } |
— | — | @@ -357,7 +357,7 @@ |
358 | 358 | reader.close(); |
359 | 359 | trans.commit(); |
360 | 360 | } catch (IOException e) { |
361 | | - log.error("Could not optimize index at "+path+" : "+e.getMessage()); |
| 361 | + log.error("Could not optimize index at "+path+" : "+e.getMessage(),e); |
362 | 362 | throw e; |
363 | 363 | } |
364 | 364 | } |
— | — | @@ -643,7 +643,7 @@ |
644 | 644 | lastFlush = System.currentTimeMillis(); |
645 | 645 | } catch (Exception e) { |
646 | 646 | e.printStackTrace(); |
647 | | - log.error("Unexpected error in Index thread while applying updates: "+e.getMessage()); |
| 647 | + log.error("Unexpected error in Index thread while applying updates: "+e.getMessage(),e); |
648 | 648 | return; |
649 | 649 | } |
650 | 650 | } |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/SerbianFilter.java |
— | — | @@ -160,18 +160,19 @@ |
161 | 161 | String cv; |
162 | 162 | boolean diff = false; |
163 | 163 | aliasDiff = false; |
164 | | - for(char c : text.toCharArray()){ |
165 | | - cv = conv[c]; |
166 | | - if(cv == null){ |
167 | | - buffer[length++] = c; |
168 | | - } else{ |
169 | | - for(char ch : cv.toCharArray()){ |
170 | | - buffer[length++] = ch; |
171 | | - diff = true; |
172 | | - if( c != 'đ' && c != 'Đ') |
173 | | - aliasDiff = true; |
174 | | - } |
175 | | - } |
| 164 | + for(int i=0;i<text.length() && i<buffer.length;i++){ |
| 165 | + char c = text.charAt(i); |
| 166 | + cv = conv[c]; |
| 167 | + if(cv == null){ |
| 168 | + buffer[length++] = c; |
| 169 | + } else{ |
| 170 | + for(char ch : cv.toCharArray()){ |
| 171 | + buffer[length++] = ch; |
| 172 | + diff = true; |
| 173 | + if( c != 'đ' && c != 'Đ') |
| 174 | + aliasDiff = true; |
| 175 | + } |
| 176 | + } |
176 | 177 | } |
177 | 178 | if(diff) |
178 | 179 | return new String(buffer,0,length); |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/ExtToken.java |
— | — | @@ -427,9 +427,11 @@ |
428 | 428 | tt.unstub(); |
429 | 429 | } |
430 | 430 | tt.setPositionIncrement(0); |
| 431 | + /* FIXME: this should happen, but if it does... oh well... |
| 432 | + if(t.type != Type.TEXT) |
| 433 | + raiseException(serialized,cur,t,"Bad serialized data: trying to assign alias to nontext token"); |
| 434 | + */ |
431 | 435 | tokens.add(tt); |
432 | | - if(t.type != Type.TEXT) |
433 | | - raiseException(serialized,cur,t,"Bad serialized data: trying to assign alias to nontext token"); |
434 | 436 | cur += len; |
435 | 437 | break; } |
436 | 438 | case 3: // change pos |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/WordNet.java |
— | — | @@ -166,7 +166,7 @@ |
167 | 167 | log.info("Loaded WordNet synonyms in "+(System.currentTimeMillis()-start)+" ms"); |
168 | 168 | } catch(Exception e){ |
169 | 169 | e.printStackTrace(); |
170 | | - log.warn("Cannot load WordNet synonym file : "+e.getMessage()); |
| 170 | + log.warn("Cannot load WordNet synonym file : "+e.getMessage(),e); |
171 | 171 | state = State.FAILED; |
172 | 172 | } |
173 | 173 | } |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/LanguageAnalyzer.java |
— | — | @@ -83,7 +83,7 @@ |
84 | 84 | |
85 | 85 | return filtered; |
86 | 86 | } catch (Exception e){ |
87 | | - log.error("Error applying custom filter for "+filters.getLanguage()); |
| 87 | + log.error("Error applying custom filter for "+filters.getLanguage(),e); |
88 | 88 | } |
89 | 89 | } |
90 | 90 | return tokens; |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/ContextAnalyzer.java |
— | — | @@ -45,7 +45,7 @@ |
46 | 46 | if(contexts != null) |
47 | 47 | part.addAll(contexts); |
48 | 48 | } catch (IOException e) { |
49 | | - log.warn("Cannot fetch context for "+key+" from "+t.getKey()+" : "+e.getMessage()); |
| 49 | + log.warn("Cannot fetch context for "+key+" from "+t.getKey()+" : "+e.getMessage(),e); |
50 | 50 | e.printStackTrace(); |
51 | 51 | } |
52 | 52 | |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/StopWords.java |
— | — | @@ -42,7 +42,7 @@ |
43 | 43 | try{ |
44 | 44 | return HighFreqTerms.getHighFreqTerms(iid.getDB(),"contents",50).toArray(new String[] {}); |
45 | 45 | } catch(Exception e){ |
46 | | - log.warn("Failed to fetch stop words for "+iid); |
| 46 | + log.warn("Failed to fetch stop words for "+iid,e); |
47 | 47 | return new String[] {}; |
48 | 48 | } |
49 | 49 | } |
— | — | @@ -85,7 +85,7 @@ |
86 | 86 | try { |
87 | 87 | ret.addAll(getCached(iid)); |
88 | 88 | } catch (IOException e) { |
89 | | - log.warn("Cannot get cached stop words for "+iid); |
| 89 | + log.warn("Cannot get cached stop words for "+iid,e); |
90 | 90 | } |
91 | 91 | return ret; |
92 | 92 | } |
— | — | @@ -130,7 +130,7 @@ |
131 | 131 | log.info("Successfully loaded stop words for: "+cachePredefined.keySet()+" in "+(System.currentTimeMillis()-start)+" ms"); |
132 | 132 | } catch(IOException e){ |
133 | 133 | e.printStackTrace(); |
134 | | - log.error("Cannot load stop words definitions: "+e.getMessage()); |
| 134 | + log.error("Cannot load stop words definitions: "+e.getMessage(),e); |
135 | 135 | } |
136 | 136 | loadedPredefined = true; |
137 | 137 | } |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/WikiQueryParser.java |
— | — | @@ -143,6 +143,8 @@ |
144 | 144 | protected Wildcards wildcards = null; |
145 | 145 | protected Fuzzy fuzzy = null; |
146 | 146 | protected IndexId iid; |
| 147 | + protected boolean isInTitle = false; |
| 148 | + protected int isInTitleLevel = 0; |
147 | 149 | |
148 | 150 | protected Pattern urlPattern = Pattern.compile("(\\w+:{0,1}\\w*@)?(\\S+)(:[0-9]+)?(\\/|\\/([\\w#!:.?+=&%@!\\-\\/]))?"); |
149 | 151 | |
— | — | @@ -682,7 +684,7 @@ |
683 | 685 | /** analyzer buffer into tokens using default analyzer */ |
684 | 686 | private void analyzeBuffer(){ |
685 | 687 | String analysisField = defaultField; |
686 | | - if(defaultField.equals("contents") && "intitle".equals(currentField)) |
| 688 | + if(defaultField.equals("contents") && isInTitle) |
687 | 689 | analysisField = "title"; |
688 | 690 | tokenStream = analyzer.tokenStream(analysisField, |
689 | 691 | new String(buffer,0,length)); |
— | — | @@ -729,7 +731,7 @@ |
730 | 732 | private Term makeTerm(String t){ |
731 | 733 | if(currentField == null) |
732 | 734 | return new Term(defaultField,builder.isExactCase()? t : t.toLowerCase()); |
733 | | - else if(defaultField.equals("contents") && "intitle".equals(currentField)) |
| 735 | + else if(defaultField.equals("contents") && isInTitle) |
734 | 736 | return new Term("title",builder.isExactCase()? t : t.toLowerCase()); |
735 | 737 | else if(!"incategory".equals(currentField) && |
736 | 738 | (namespacePolicy == NamespacePolicy.IGNORE || |
— | — | @@ -866,7 +868,7 @@ |
867 | 869 | continue; |
868 | 870 | |
869 | 871 | // terms, fields |
870 | | - if(Character.isLetterOrDigit(c) || c=='.' || c == '[' || c=='*' || c=='?'){ |
| 872 | + if(Character.isLetterOrDigit(c) || c=='.' || c == '[' || c=='*'){ |
871 | 873 | // check for generic namespace prefixes, e.g. [0,1]: |
872 | 874 | if(c == '['){ |
873 | 875 | if(fetchGenericPrefix()) |
— | — | @@ -890,6 +892,10 @@ |
891 | 893 | if(currentField == null || definedExplicitField){ |
892 | 894 | // set field name |
893 | 895 | currentField = new String(buffer,0,length); |
| 896 | + if("intitle".equals(currentField)){ |
| 897 | + isInTitle = true; |
| 898 | + isInTitleLevel = level; |
| 899 | + } |
894 | 900 | if((defaultNamespaceName!=null && currentField.equals(defaultNamespaceName)) || currentField.equals(defaultField)){ |
895 | 901 | currentField = null; |
896 | 902 | break; // repeated definition of field, ignore |
— | — | @@ -990,6 +996,9 @@ |
991 | 997 | break; |
992 | 998 | case ')': |
993 | 999 | if(level > 0){ |
| 1000 | + // get out of titles on appropriate level of parenthesis |
| 1001 | + if(isInTitle && level <= isInTitleLevel) |
| 1002 | + isInTitle = false; |
994 | 1003 | break mainloop; |
995 | 1004 | } |
996 | 1005 | continue; |
— | — | @@ -1033,7 +1042,7 @@ |
1034 | 1043 | boolean wild = false; |
1035 | 1044 | int index = -1; |
1036 | 1045 | for(int i=0;i<length;i++){ |
1037 | | - if(buffer[i] == '*' || buffer[i] == '?'){ |
| 1046 | + if(buffer[i] == '*'){ |
1038 | 1047 | wild = true; |
1039 | 1048 | index = i; |
1040 | 1049 | break; |
— | — | @@ -1041,10 +1050,10 @@ |
1042 | 1051 | } |
1043 | 1052 | // check if it's a valid wildcard |
1044 | 1053 | if(wild){ |
1045 | | - if((buffer[0] == '*' || buffer[0] == '?') && (buffer[length-1]=='*' || buffer[length-1]=='?')) |
| 1054 | + if((buffer[0] == '*') && (buffer[length-1]=='*')) |
1046 | 1055 | return false; // don't support patterns like *a* |
1047 | | - if(index == length-1 && buffer[index]=='?') |
1048 | | - return false; // probably just an ordinary question mark |
| 1056 | + //if(index == length-1 && buffer[index]=='?') |
| 1057 | + // return false; // probably just an ordinary question mark |
1049 | 1058 | for(int i=0;i<length;i++){ |
1050 | 1059 | if(Character.isLetterOrDigit(buffer[i])) |
1051 | 1060 | return true; // +card :P |
— | — | @@ -1215,7 +1224,8 @@ |
1216 | 1225 | if(prefixFilter.startsWith("[") && prefixFilter.contains("]:")){ |
1217 | 1226 | // convert from [2]:query to 2:query form |
1218 | 1227 | prefixFilter = prefixFilter.replace("[","").replace("]:",":"); |
1219 | | - } |
| 1228 | + } else // default to main namespace |
| 1229 | + prefixFilter = "0:"+prefixFilter; |
1220 | 1230 | // return the actual query without prefix |
1221 | 1231 | return queryText.substring(0,inx); |
1222 | 1232 | } |
— | — | @@ -1254,6 +1264,7 @@ |
1255 | 1265 | explicitOccur = null; |
1256 | 1266 | parsedWords = new ParsedWords(); |
1257 | 1267 | urls = new ArrayList<ArrayList<Term>>(); |
| 1268 | + isInTitle = false; |
1258 | 1269 | } |
1259 | 1270 | |
1260 | 1271 | /** Init parsing, call this function to parse text */ |
— | — | @@ -1941,7 +1952,8 @@ |
1942 | 1953 | if(redirectsMulti != null) |
1943 | 1954 | full.add(redirectsMulti,Occur.SHOULD); |
1944 | 1955 | |
1945 | | - return full; |
| 1956 | + ArticleNamespaceScaling nsScale = iid.getNamespaceScaling(); |
| 1957 | + return new ArticleQueryWrap(full,new ArticleInfoImpl(),null,null,nsScale); |
1946 | 1958 | |
1947 | 1959 | } |
1948 | 1960 | |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/oai/IncrementalUpdater.java |
— | — | @@ -180,7 +180,7 @@ |
181 | 181 | fileis.close(); |
182 | 182 | } |
183 | 183 | } catch (IOException e) { |
184 | | - log.warn("I/O error reading status file for "+iid+" at "+iid.getStatusPath()+" : "+e.getMessage()); |
| 184 | + log.warn("I/O error reading status file for "+iid+" at "+iid.getStatusPath()+" : "+e.getMessage(),e); |
185 | 185 | } |
186 | 186 | String from; |
187 | 187 | if(firstPass.contains(dbname) && timestamp!=null) |
— | — | @@ -215,7 +215,7 @@ |
216 | 216 | } |
217 | 217 | |
218 | 218 | } catch (Exception e) { |
219 | | - log.warn("Error sending index update records of "+iid+" to indexer at "+iid.getIndexHost()); |
| 219 | + log.warn("Error sending index update records of "+iid+" to indexer at "+iid.getIndexHost(),e); |
220 | 220 | continue main_loop; |
221 | 221 | } |
222 | 222 | // more results? |
— | — | @@ -261,13 +261,13 @@ |
262 | 262 | status.store(fileos,"Last incremental update timestamp"); |
263 | 263 | fileos.close(); |
264 | 264 | } catch (IOException e) { |
265 | | - log.warn("I/O error writing status file for "+iid+" at "+iid.getStatusPath()+" : "+e.getMessage()); |
| 265 | + log.warn("I/O error writing status file for "+iid+" at "+iid.getStatusPath()+" : "+e.getMessage(),e); |
266 | 266 | } |
267 | 267 | firstPass.remove(dbname); |
268 | 268 | log.info("Finished update of "+iid); |
269 | 269 | } catch(Exception e){ |
270 | 270 | e.printStackTrace(); |
271 | | - log.warn("Retry later: error while processing update for "+dbname+" : "+e.getMessage()); |
| 271 | + log.warn("Retry later: error while processing update for "+dbname+" : "+e.getMessage(),e); |
272 | 272 | errors = true; |
273 | 273 | } |
274 | 274 | } |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/oai/OAIHarvester.java |
— | — | @@ -76,7 +76,7 @@ |
77 | 77 | ret.addAll(collector.getRecords()); |
78 | 78 | } while(hasMore() && ret.size() < atLeast); |
79 | 79 | } catch(IOException e){ |
80 | | - log.warn("I/O exception listing records: "+e.getMessage()); |
| 80 | + log.warn("I/O exception listing records: "+e.getMessage(),e); |
81 | 81 | return null; |
82 | 82 | } |
83 | 83 | return ret; |