Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/beans/Article.java |
— | — | @@ -66,6 +66,9 @@ |
67 | 67 | /** Rank of the redirect target */ |
68 | 68 | private int redirectRank = 0; |
69 | 69 | |
| 70 | + /** Threading information */ |
| 71 | + public Hashtable<String,String> DiscussionThreadingInfo; |
| 72 | + |
70 | 73 | public Article(){ |
71 | 74 | namespace=""; |
72 | 75 | title=""; |
— | — | @@ -75,6 +78,7 @@ |
76 | 79 | redirects=new ArrayList<Redirect>(); |
77 | 80 | related = new ArrayList<RelatedTitle>(); |
78 | 81 | anchors = new Hashtable<String,Integer>(); |
| 82 | + this.DiscussionThreadingInfo = new Hashtable<String,String>(); |
79 | 83 | } |
80 | 84 | |
81 | 85 | public Article(long pageId, Title title, String text, String redirectTo, int references, int redirectTargetNamespace, int redirectRank) { |
— | — | @@ -87,6 +91,7 @@ |
88 | 92 | |
89 | 93 | public Article(long pageId, int namespace, String titleText, String text, String redirectTo, int references, int redirectTargetNamespace, int redirectRank, |
90 | 94 | ArrayList<Redirect> redirects, ArrayList<RelatedTitle> related, Hashtable<String,Integer> anchorRank, Date date) { |
| 95 | + this(); |
91 | 96 | this.namespace = Integer.toString(namespace); |
92 | 97 | this.title = titleText; |
93 | 98 | this.contents = text; |
— | — | @@ -101,6 +106,20 @@ |
102 | 107 | this.redirectRank = redirectRank; |
103 | 108 | } |
104 | 109 | |
| 110 | + public Article(long pageId, int namespace, String titleText, String text, |
| 111 | + String redirectTo, int references, int redirectTargetNamespace, |
| 112 | + int redirectRank, ArrayList<Redirect> redirects, |
| 113 | + ArrayList<RelatedTitle> related, |
| 114 | + Hashtable<String,Integer> anchorRank, Date date, |
| 115 | + Hashtable<String,String> DiscussionThreadingInfo) { |
| 116 | + |
| 117 | + this(pageId, namespace, titleText, text, redirectTo, references, |
| 118 | + redirectTargetNamespace, redirectRank, redirects, related, |
| 119 | + anchorRank, date); |
| 120 | + |
| 121 | + this.DiscussionThreadingInfo = DiscussionThreadingInfo; |
| 122 | + } |
| 123 | + |
105 | 124 | public boolean isRedirect() { |
106 | 125 | return redirectTo != null; |
107 | 126 | } |
— | — | @@ -288,8 +307,7 @@ |
289 | 308 | |
290 | 309 | public void setRedirectRank(int redirectRank) { |
291 | 310 | this.redirectRank = redirectRank; |
292 | | - } |
293 | | - |
| 311 | + } |
294 | 312 | |
295 | 313 | |
296 | 314 | } |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/importer/DumpImporter.java |
— | — | @@ -108,8 +108,10 @@ |
109 | 109 | if(makeIndex && related != null) |
110 | 110 | rel = related.getRelated(key); |
111 | 111 | // make article |
112 | | - Article article = new Article(page.Id,page.Title.Namespace,page.Title.Text,revision.Text,redirectTo, |
113 | | - references,redirectTargetNamespace,0,redirects,rel,anchors,date); |
| 112 | + Article article = new Article(page.Id,page.Title.Namespace, |
| 113 | + page.Title.Text,revision.Text,redirectTo,references, |
| 114 | + redirectTargetNamespace,0,redirects,rel,anchors,date, |
| 115 | + page.DiscussionThreadingInfo); |
114 | 116 | // index |
115 | 117 | if(indexWriter != null) |
116 | 118 | indexWriter.addArticle(article); |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/index/WikiIndexModifier.java |
— | — | @@ -781,6 +781,14 @@ |
782 | 782 | rtitle.setBoost(rankBoost); |
783 | 783 | doc.add(rtitle); |
784 | 784 | |
| 785 | + // Threading information |
| 786 | + java.util.Enumeration e = article.DiscussionThreadingInfo.keys(); |
| 787 | + while (e.hasMoreElements()) { |
| 788 | + String key = (String)e.nextElement(); |
| 789 | + String value = article.DiscussionThreadingInfo.get(key); |
| 790 | + doc.add( new Field( key, value, Store.YES, Index.UN_TOKENIZED) ); |
| 791 | + } |
| 792 | + |
785 | 793 | // extra info (for spellcheck indexes) |
786 | 794 | if(extraInfo){ |
787 | 795 | addSpellCheckInfo(doc,article.getTitle(),tokenizer.getKeywords(),tokenizer.getHeadingText(),article.getRedirectKeywords(),iid,fields); |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/WikiQueryParser.java |
— | — | @@ -424,6 +424,10 @@ |
425 | 425 | public HashSet<NamespaceFilter> getFieldNamespaces(String queryText){ |
426 | 426 | HashSet<String> fields = getFields(queryText); |
427 | 427 | HashSet<NamespaceFilter> ret = new HashSet<NamespaceFilter>(); |
| 428 | + List ThreadingKeywords = new ArrayList(); |
| 429 | + ThreadingKeywords.add("inthread"); |
| 430 | + ThreadingKeywords.add("ondiscussionpage"); |
| 431 | + |
428 | 432 | for(String field : fields){ |
429 | 433 | field = field.toLowerCase(); |
430 | 434 | if(namespaceFilters.containsKey(field)) |
— | — | @@ -434,6 +438,8 @@ |
435 | 439 | ret.add(defaultNamespaceFilter); |
436 | 440 | else if(field.startsWith("[")){ |
437 | 441 | ret.add(new NamespaceFilter(field.substring(1,field.length()-1))); |
| 442 | + } else if (ThreadingKeywords.contains(field)) { |
| 443 | + ret.add( new NamespaceFilter(90) ); |
438 | 444 | } |
439 | 445 | } |
440 | 446 | |
— | — | @@ -637,7 +643,13 @@ |
638 | 644 | else if(ch == ':'){ |
639 | 645 | // check if it's a valid field |
640 | 646 | String f = new String(buffer,0,length); |
641 | | - if(f.equals(namespaceAllKeyword) || f.equals("incategory") || f.equals("intitle") || namespaceFilters.containsKey(f) || namespacePolicy == NamespacePolicy.LEAVE){ |
| 647 | + |
| 648 | + List fieldOperators = getFieldOperators(); |
| 649 | + |
| 650 | + if( f.equals(namespaceAllKeyword) |
| 651 | + || fieldOperators.contains(f) |
| 652 | + || namespaceFilters.containsKey(f) |
| 653 | + || namespacePolicy == NamespacePolicy.LEAVE){ |
642 | 654 | cur = lookup; |
643 | 655 | return TokenType.FIELD; |
644 | 656 | } else |
— | — | @@ -649,6 +661,16 @@ |
650 | 662 | return TokenType.WORD; |
651 | 663 | } |
652 | 664 | |
| 665 | + private List getFieldOperators() { |
| 666 | + List fieldOperators = new ArrayList(); |
| 667 | + fieldOperators.add("intitle"); |
| 668 | + fieldOperators.add("incategory"); |
| 669 | + fieldOperators.add("inthread"); |
| 670 | + fieldOperators.add("ondiscussionpage"); |
| 671 | + |
| 672 | + return fieldOperators; |
| 673 | + } |
| 674 | + |
653 | 675 | /** |
654 | 676 | * Fetches prefixes like [0,1,2] (in [0,1,2]:query) |
655 | 677 | * |
— | — | @@ -722,25 +744,33 @@ |
723 | 745 | return makeTerm(token.termText()); |
724 | 746 | } |
725 | 747 | |
726 | | - /** Make term form <code>buffer</code> */ |
| 748 | + /** Make term from <code>buffer</code> */ |
727 | 749 | private Term makeTerm(){ |
728 | 750 | return makeTerm(new String(buffer,0,length)); |
729 | 751 | } |
730 | 752 | |
731 | 753 | /** Make a lucene term from string */ |
732 | 754 | private Term makeTerm(String t){ |
| 755 | + Hashtable<String,String> keywordFieldMapping = new Hashtable<String,String>(); |
| 756 | + keywordFieldMapping.put("inthread", "ThreadAncestor"); |
| 757 | + keywordFieldMapping.put("ondiscussionpage", "ThreadPage"); |
| 758 | + |
733 | 759 | if(currentField == null) |
734 | 760 | return new Term(defaultField,builder.isExactCase()? t : t.toLowerCase()); |
735 | 761 | else if(defaultField.equals("contents") && isInTitle) |
736 | 762 | return new Term("title",builder.isExactCase()? t : t.toLowerCase()); |
737 | | - else if(!"incategory".equals(currentField) && |
| 763 | + else if(currentField.equals("incategory")){ |
| 764 | + String norm = t.replace("_"," "); // bug 10822 |
| 765 | + return new Term("category",builder.isExactCase()? norm : norm.toLowerCase()); |
| 766 | + } else if( keywordFieldMapping.containsKey(currentField) ) { |
| 767 | + String field = keywordFieldMapping.get(currentField); |
| 768 | + |
| 769 | + return new Term(field, t); |
| 770 | + } else if(!"incategory".equals(currentField) && |
738 | 771 | (namespacePolicy == NamespacePolicy.IGNORE || |
739 | 772 | namespacePolicy == NamespacePolicy.REWRITE)) |
740 | 773 | return new Term(defaultField,t); |
741 | | - else if(currentField.equals("incategory")){ |
742 | | - String norm = t.replace("_"," "); // bug 10822 |
743 | | - return new Term("category",builder.isExactCase()? norm : norm.toLowerCase()); |
744 | | - } else |
| 774 | + else |
745 | 775 | return new Term(currentField,t); |
746 | 776 | } |
747 | 777 | |
Index: branches/lucene-search-2.1/lib/mwdumper.jar |
Cannot display: file marked as a binary type. |
svn:mime-type = application/octet-stream |