Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/util/ListAnchors.java |
— | — | @@ -0,0 +1,42 @@ |
| 2 | +package org.wikimedia.lsearch.util; |
| 3 | + |
| 4 | +import java.util.HashMap; |
| 5 | + |
| 6 | +import org.apache.lucene.document.Document; |
| 7 | +import org.apache.lucene.index.IndexReader; |
| 8 | +import org.apache.lucene.index.Term; |
| 9 | +import org.apache.lucene.index.TermDocs; |
| 10 | +import org.apache.lucene.index.TermEnum; |
| 11 | + |
| 12 | +public class ListAnchors { |
| 13 | + public static void main(String[] args) throws Exception { |
| 14 | + if(args.length != 2){ |
| 15 | + System.out.println("Usage: ListAnchors <path to links index> <target article (ns:key)> "); |
| 16 | + return; |
| 17 | + } |
| 18 | + |
| 19 | + String key = args[1]; |
| 20 | + String path = args[0]; |
| 21 | + |
| 22 | + IndexReader reader = IndexReader.open(path); |
| 23 | + |
| 24 | + System.out.println("Links to article "+key); |
| 25 | + |
| 26 | + String prefix = key+"|"; |
| 27 | + TermEnum te = reader.terms(new Term("anchors",prefix)); |
| 28 | + for(;te.term()!=null;te.next()){ |
| 29 | + String t = te.term().text(); |
| 30 | + if(t.startsWith(prefix)){ |
| 31 | + String anchor = t.substring(t.indexOf('|')+1); |
| 32 | + TermDocs td = reader.termDocs(new Term("anchors",t)); |
| 33 | + while(td.next()){ // this will skip deleted docs, while docFreq won't |
| 34 | + Document d = reader.document(td.doc()); |
| 35 | + System.out.println("["+d.get("article_key") + "] with [" + anchor+"]"); |
| 36 | + } |
| 37 | + } else |
| 38 | + break; |
| 39 | + } |
| 40 | + |
| 41 | + } |
| 42 | +} |
| 43 | + |