Index: trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/ranks/Links.java |
— | — | @@ -43,11 +43,12 @@ |
44 | 44 | import org.wikimedia.lsearch.search.NamespaceFilter; |
45 | 45 | import org.wikimedia.lsearch.search.UpdateThread; |
46 | 46 | import org.wikimedia.lsearch.spell.api.Dictionary; |
| 47 | +import org.wikimedia.lsearch.spell.api.Dictionary.Word; |
47 | 48 | import org.wikimedia.lsearch.spell.api.LuceneDictionary; |
48 | | -import org.wikimedia.lsearch.spell.api.Dictionary.Word; |
49 | 49 | import org.wikimedia.lsearch.util.Localization; |
50 | 50 | |
51 | 51 | public class Links { |
| 52 | + |
52 | 53 | static Logger log = Logger.getLogger(Links.class); |
53 | 54 | protected IndexId iid; |
54 | 55 | protected String langCode; |
— | — | @@ -69,6 +70,15 @@ |
70 | 71 | protected boolean autoOptimize = false; |
71 | 72 | protected FilterFactory filters = null; |
72 | 73 | |
| 74 | + /** |
| 75 | + * |
| 76 | + * @param iid |
| 77 | + * @param path |
| 78 | + * @param writer |
| 79 | + * @param autoOptimize |
| 80 | + * @throws CorruptIndexException |
| 81 | + * @throws IOException |
| 82 | + */ |
73 | 83 | private Links(IndexId iid, String path, IndexWriter writer, boolean autoOptimize) throws CorruptIndexException, IOException{ |
74 | 84 | this.writer = writer; |
75 | 85 | this.path = path; |
— | — | @@ -106,7 +116,12 @@ |
107 | 117 | } |
108 | 118 | } |
109 | 119 | |
110 | | - /** Open the index path for updates */ |
| 120 | + /** Open the index path for updates |
| 121 | + * |
| 122 | + * @param iid |
| 123 | + * @return |
| 124 | + * @throws IOException |
| 125 | + */ |
111 | 126 | public static Links openForModification(IndexId iid) throws IOException{ |
112 | 127 | iid = iid.getLinks(); |
113 | 128 | String path = iid.getIndexPath(); |
— | — | @@ -115,7 +130,12 @@ |
116 | 131 | return new Links(iid,path,writer,false); |
117 | 132 | } |
118 | 133 | |
119 | | - /** Open index for old-style batch-delete, batch-add modification */ |
| 134 | + /** Open index for old-style batch-delete, batch-add modification |
| 135 | + * |
| 136 | + * @param iid |
| 137 | + * @return |
| 138 | + * @throws IOException |
| 139 | + */ |
120 | 140 | public static Links openForBatchModifiation(IndexId iid) throws IOException{ |
121 | 141 | iid = iid.getLinks(); |
122 | 142 | String path = iid.getIndexPath(); |
— | — | @@ -132,14 +152,25 @@ |
133 | 153 | return openForRead(iid,iid.getSearchPath()); |
134 | 154 | } |
135 | 155 | |
136 | | - /** Open index at path for reading */ |
| 156 | + /** Open index at path for reading |
| 157 | + * |
| 158 | + * @param iid |
| 159 | + * @param path |
| 160 | + * @return |
| 161 | + * @throws IOException |
| 162 | + */ |
137 | 163 | public static Links openForRead(IndexId iid, String path) throws IOException { |
138 | 164 | iid = iid.getLinks(); |
139 | 165 | log.info("Opening for read "+path); |
140 | 166 | return new Links(iid,path,null,true); |
141 | 167 | } |
142 | 168 | |
143 | | - /** Create new in the import path */ |
| 169 | + /** Create new in the import path |
| 170 | + * |
| 171 | + * @param iid |
| 172 | + * @return |
| 173 | + * @throws IOException |
| 174 | + */ |
144 | 175 | public static Links createNew(IndexId iid) throws IOException{ |
145 | 176 | iid = iid.getLinks(); |
146 | 177 | String path = iid.getImportPath(); |
— | — | @@ -149,7 +180,12 @@ |
150 | 181 | return links; |
151 | 182 | } |
152 | 183 | |
153 | | - /** Create new index in memory (RAMDirectory) */ |
| 184 | + /** Create new index in memory (RAMDirectory) |
| 185 | + * |
| 186 | + * @param iid |
| 187 | + * @return |
| 188 | + * @throws IOException |
| 189 | + */ |
154 | 190 | public static Links createNewInMemory(IndexId iid) throws IOException{ |
155 | 191 | iid = iid.getLinks(); |
156 | 192 | log.info("Making index in memory"); |
— | — | @@ -158,14 +194,21 @@ |
159 | 195 | return links; |
160 | 196 | } |
161 | 197 | |
162 | | - /** Add more entries to namespace mapping (ns_name -> ns_index) */ |
| 198 | + /** Add more entries to namespace mapping (ns_name -> ns_index) |
| 199 | + * |
| 200 | + * @param map |
| 201 | + */ |
163 | 202 | public void addToNamespaceMap(HashMap<String,Integer> map){ |
164 | 203 | for(Entry<String,Integer> e : map.entrySet()){ |
165 | 204 | nsmap.put(e.getKey().toLowerCase(),e.getValue()); |
166 | 205 | } |
167 | 206 | } |
168 | 207 | |
169 | | - /** Add a custom namespace mapping */ |
| 208 | + /** Add a custom namespace mapping |
| 209 | + * |
| 210 | + * @param namespace |
| 211 | + * @param index |
| 212 | + */ |
170 | 213 | public void addToNamespaceMap(String namespace, int index){ |
171 | 214 | nsmap.put(namespace.toLowerCase(),index); |
172 | 215 | } |
— | — | @@ -493,7 +536,7 @@ |
494 | 537 | return ret; |
495 | 538 | } |
496 | 539 | |
497 | | - /** Get mapping text -> occurance count (including actual article title) */ |
| 540 | + /** Get mapping text -> occurrence count (including actual article title) */ |
498 | 541 | public HashMap<String,Integer> getAnchorMap(String key, Integer numInLinks) throws IOException { |
499 | 542 | ensureRead(); |
500 | 543 | HashMap<String,Integer> map = getAnchors(key); |
— | — | @@ -509,7 +552,11 @@ |
510 | 553 | return map; |
511 | 554 | } |
512 | 555 | |
513 | | - /** Merge the second anchor map into the first */ |
| 556 | + /** Merge the second anchor map into the first |
| 557 | + * |
| 558 | + * @param dest |
| 559 | + * @param src |
| 560 | + */ |
514 | 561 | public static void mergeAnchorMaps(Map<String,Integer> dest, Map<String,Integer> src){ |
515 | 562 | for(Entry<String,Integer> e : src.entrySet()){ |
516 | 563 | String key = e.getKey(); |
— | — | @@ -520,7 +567,11 @@ |
521 | 568 | } |
522 | 569 | } |
523 | 570 | |
524 | | - /** Lowercase all anchor keys & merge */ |
| 571 | + /** Lowercase all anchor keys & merge |
| 572 | + * |
| 573 | + * @param src |
| 574 | + * @return |
| 575 | + */ |
525 | 576 | public static HashMap<String,Integer> lowercaseAnchorMap(Map<String,Integer> src){ |
526 | 577 | HashMap<String,Integer> dest = new HashMap<String,Integer>(); |
527 | 578 | for(Entry<String,Integer> e : src.entrySet()){ |
— | — | @@ -533,7 +584,11 @@ |
534 | 585 | return dest; |
535 | 586 | } |
536 | 587 | |
537 | | - /** Sort anchors desc according to rank */ |
| 588 | + /** Sort anchors desc according to rank |
| 589 | + * |
| 590 | + * @param anchors |
| 591 | + * @return |
| 592 | + */ |
538 | 593 | public static ArrayList<Entry<String,Integer>> sortAnchors(Map<String,Integer> anchors){ |
539 | 594 | // sort by rank |
540 | 595 | ArrayList<Entry<String,Integer>> sorted = new ArrayList<Entry<String,Integer>>(); |
— | — | @@ -547,7 +602,11 @@ |
548 | 603 | } |
549 | 604 | |
550 | 605 | /** If an article is a redirect |
551 | | - * @throws IOException */ |
| 606 | + * |
| 607 | + * @param key |
| 608 | + * @return |
| 609 | + * @throws IOException |
| 610 | + */ |
552 | 611 | public boolean isRedirect(String key) throws IOException{ |
553 | 612 | ensureRead(); |
554 | 613 | TermDocs td = reader.termDocs(new Term("article_key",key)); |
— | — | @@ -558,7 +617,12 @@ |
559 | 618 | return false; |
560 | 619 | } |
561 | 620 | |
562 | | - /** Get page_id for ns:title */ |
| 621 | + /** Get page_id for ns:title |
| 622 | + * |
| 623 | + * @param key |
| 624 | + * @return |
| 625 | + * @throws IOException |
| 626 | + */ |
563 | 627 | public String getPageId(String key) throws IOException { |
564 | 628 | ensureRead(); |
565 | 629 | TermDocs td = reader.termDocs(new Term("article_key",key)); |
— | — | @@ -568,7 +632,12 @@ |
569 | 633 | return null; |
570 | 634 | } |
571 | 635 | |
572 | | - /** Get ns:title for page_id */ |
| 636 | + /** Get ns:title for page_id |
| 637 | + * |
| 638 | + * @param pageid |
| 639 | + * @return |
| 640 | + * @throws IOException |
| 641 | + */ |
573 | 642 | public String getKeyFromPageId(String pageid) throws IOException { |
574 | 643 | ensureRead(); |
575 | 644 | TermDocs td = reader.termDocs(new Term("article_pageid",pageid)); |
— | — | @@ -578,7 +647,12 @@ |
579 | 648 | return null; |
580 | 649 | } |
581 | 650 | |
582 | | - /** If article is redirect, get target key, else null */ |
| 651 | + /** If article is redirect, get target key, else null |
| 652 | + * |
| 653 | + * @param key |
| 654 | + * @return |
| 655 | + * @throws IOException |
| 656 | + */ |
583 | 657 | public String getRedirectTarget(String key) throws IOException{ |
584 | 658 | ensureRead(); |
585 | 659 | TermDocs td = reader.termDocs(new Term("article_key",key)); |
— | — | @@ -592,7 +666,12 @@ |
593 | 667 | } |
594 | 668 | |
595 | 669 | |
596 | | - /** Return the namespace of the redirect taget (if any) */ |
| 670 | + /** Return the namespace of the redirect taget (if any) |
| 671 | + * |
| 672 | + * @param key |
| 673 | + * @return |
| 674 | + * @throws IOException |
| 675 | + */ |
597 | 676 | public int getRedirectTargetNamespace(String key) throws IOException{ |
598 | 677 | ensureRead(); |
599 | 678 | String t = getRedirectTarget(key); |
— | — | @@ -603,7 +682,12 @@ |
604 | 683 | } |
605 | 684 | |
606 | 685 | /** Get all article titles linking to given title |
607 | | - * @throws IOException */ |
| 686 | + * |
| 687 | + * @param key |
| 688 | + * @param keyCache |
| 689 | + * @return |
| 690 | + * @throws IOException |
| 691 | + */ |
608 | 692 | public ArrayList<CompactArticleLinks> getInLinks(CompactArticleLinks key, HashMap<Integer,CompactArticleLinks> keyCache) throws IOException{ |
609 | 693 | ensureRead(); |
610 | 694 | ArrayList<CompactArticleLinks> ret = new ArrayList<CompactArticleLinks>(); |
— | — | @@ -617,7 +701,11 @@ |
618 | 702 | } |
619 | 703 | |
620 | 704 | /** Get all article titles linking to given title |
621 | | - * @throws IOException */ |
| 705 | + * |
| 706 | + * @param key |
| 707 | + * @return |
| 708 | + * @throws IOException |
| 709 | + */ |
622 | 710 | public ArrayList<String> getInLinks(String key) throws IOException{ |
623 | 711 | ensureRead(); |
624 | 712 | ArrayList<String> ret = new ArrayList<String>(); |
— | — | @@ -628,7 +716,12 @@ |
629 | 717 | return ret; |
630 | 718 | } |
631 | 719 | |
632 | | - /** Get links from this article to other articles */ |
| 720 | + /** Get links from this article to other articles |
| 721 | + * |
| 722 | + * @param key |
| 723 | + * @return |
| 724 | + * @throws IOException |
| 725 | + */ |
633 | 726 | public StringList getOutLinks(String key) throws IOException{ |
634 | 727 | ensureRead(); |
635 | 728 | TermDocs td = reader.termDocs(new Term("article_key",key)); |
— | — | @@ -639,9 +732,12 @@ |
640 | 733 | } |
641 | 734 | |
642 | 735 | /** Get all contexts in which article <i>to<i/> is linked from <i>from</i>. |
643 | | - * Will return null if there is no context, or link is invalid. |
644 | | - * @throws ClassNotFoundException */ |
645 | | - |
| 736 | + * Will return null if there is no context, or link is invalid. |
| 737 | + * @param from |
| 738 | + * @param to |
| 739 | + * @return |
| 740 | + * @throws IOException |
| 741 | + */ |
646 | 742 | public ArrayList<String> getContext(String from, String to) throws IOException { |
647 | 743 | ensureRead(); |
648 | 744 | String cacheKey = "getContext:"+from; |
— | — | @@ -679,7 +775,13 @@ |
680 | 776 | return null; |
681 | 777 | } |
682 | 778 | |
683 | | - /** return how many times article key1 and key2 cooccur in same context */ |
| 779 | + /** return how many times article key1 and key2 co-occur in same context |
| 780 | + * |
| 781 | + * @param key1 |
| 782 | + * @param key2 |
| 783 | + * @return |
| 784 | + * @throws IOException |
| 785 | + */ |
684 | 786 | public int getRelatedCountInContext(String key1, String key2) throws IOException { |
685 | 787 | ensureRead(); |
686 | 788 | PhraseQuery pq = new PhraseQuery(); |
— | — | @@ -689,7 +791,13 @@ |
690 | 792 | return searcher.search(pq).length(); |
691 | 793 | } |
692 | 794 | |
693 | | - /** return how many times article key1 and key2 cooccur in any article */ |
| 795 | + /** return how many times article key1 and key2 co-occur in any article |
| 796 | + * |
| 797 | + * @param key1 |
| 798 | + * @param key2 |
| 799 | + * @return |
| 800 | + * @throws IOException |
| 801 | + */ |
694 | 802 | public int getRelatedCountAll(String key1, String key2) throws IOException { |
695 | 803 | ensureRead(); |
696 | 804 | // works as an optimized boolean query on key1, key2 |
— | — | @@ -717,7 +825,14 @@ |
718 | 826 | } |
719 | 827 | } |
720 | 828 | |
721 | | - /** return how many times article key1 and key2 cooccur in same context */ |
| 829 | + /** return how many times article key1 and key2 co-occur in same context |
| 830 | + * |
| 831 | + * @param key1 |
| 832 | + * @param key2 |
| 833 | + * @param inLinkCache |
| 834 | + * @return |
| 835 | + * @throws IOException |
| 836 | + */ |
722 | 837 | public double getRelatedScore(String key1, String key2, int[] inLinkCache) throws IOException { |
723 | 838 | ensureRead(); |
724 | 839 | PhraseQuery pq = new PhraseQuery(); |
— | — | @@ -738,10 +853,13 @@ |
739 | 854 | } |
740 | 855 | |
741 | 856 | |
742 | | - /** Get all contexts in which article <i>to<i/> is linked from <i>from</i>. |
743 | | - * Will return null if there is no context, or link is invalid. |
744 | | - * @throws ClassNotFoundException */ |
745 | | - |
| 857 | + /** Get all contexts in which article <i>to<i/> is linked from <i>from</i>. |
| 858 | + * Will return null if there is no context, or link is invalid. |
| 859 | + * @param from |
| 860 | + * @param to |
| 861 | + * @return |
| 862 | + * @throws IOException |
| 863 | + */ |
746 | 864 | public Collection<String> getContextOld(String from, String to) throws IOException { |
747 | 865 | ensureRead(); |
748 | 866 | |
— | — | @@ -753,7 +871,11 @@ |
754 | 872 | return null; |
755 | 873 | } |
756 | 874 | |
757 | | - /** Get a dictionary of all article keys (ns:title) in this index */ |
| 875 | + /** Get a dictionary of all article keys (ns:title) in this index |
| 876 | + * |
| 877 | + * @return |
| 878 | + * @throws IOException |
| 879 | + */ |
758 | 880 | public LuceneDictionary getKeys() throws IOException{ |
759 | 881 | ensureRead(); |
760 | 882 | return new LuceneDictionary(reader,"article_key"); |
— | — | @@ -767,7 +889,10 @@ |
768 | 890 | return null; |
769 | 891 | } |
770 | 892 | |
771 | | - /** Close everything */ |
| 893 | + /** Close everything |
| 894 | + * |
| 895 | + * @throws IOException |
| 896 | + */ |
772 | 897 | public void close() throws IOException { |
773 | 898 | if(writer != null) |
774 | 899 | writer.close(); |