r51322 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r51321‎ | r51322 | r51323 >
Date:23:33, 1 June 2009
Author:rainman
Status:deferred
Tags:
Comment:
A quickly hacked-up tool to show anchors leading to certain article.
Modified paths:
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/util/ListAnchors.java (added) (history)

Diff [purge]

Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/util/ListAnchors.java
@@ -0,0 +1,42 @@
 2+package org.wikimedia.lsearch.util;
 3+
 4+import java.util.HashMap;
 5+
 6+import org.apache.lucene.document.Document;
 7+import org.apache.lucene.index.IndexReader;
 8+import org.apache.lucene.index.Term;
 9+import org.apache.lucene.index.TermDocs;
 10+import org.apache.lucene.index.TermEnum;
 11+
 12+public class ListAnchors {
 13+ public static void main(String[] args) throws Exception {
 14+ if(args.length != 2){
 15+ System.out.println("Usage: ListAnchors <path to links index> <target article (ns:key)> ");
 16+ return;
 17+ }
 18+
 19+ String key = args[1];
 20+ String path = args[0];
 21+
 22+ IndexReader reader = IndexReader.open(path);
 23+
 24+ System.out.println("Links to article "+key);
 25+
 26+ String prefix = key+"|";
 27+ TermEnum te = reader.terms(new Term("anchors",prefix));
 28+ for(;te.term()!=null;te.next()){
 29+ String t = te.term().text();
 30+ if(t.startsWith(prefix)){
 31+ String anchor = t.substring(t.indexOf('|')+1);
 32+ TermDocs td = reader.termDocs(new Term("anchors",t));
 33+ while(td.next()){ // this will skip deleted docs, while docFreq won't
 34+ Document d = reader.document(td.doc());
 35+ System.out.println("["+d.get("article_key") + "] with [" + anchor+"]");
 36+ }
 37+ } else
 38+ break;
 39+ }
 40+
 41+ }
 42+}
 43+

Status & tagging log