r51322 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r51321‎ \| r51322 \| r51323 >
Date:	23:33, 1 June 2009
Author:	rainman
Status:	deferred
Tags:
Comment:	A quickly hacked-up tool to show anchors leading to certain article.
Modified paths:	/branches/lucene-search-2.1/src/org/wikimedia/lsearch/util/ListAnchors.java (added) (history)

Diff [purge]

Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/util/ListAnchors.java
—	—	@@ -0,0 +1,42 @@
	2	+package org.wikimedia.lsearch.util;
	3	+
	4	+import java.util.HashMap;
	5	+
	6	+import org.apache.lucene.document.Document;
	7	+import org.apache.lucene.index.IndexReader;
	8	+import org.apache.lucene.index.Term;
	9	+import org.apache.lucene.index.TermDocs;
	10	+import org.apache.lucene.index.TermEnum;
	11	+
	12	+public class ListAnchors {
	13	+ public static void main(String[] args) throws Exception {
	14	+ if(args.length != 2){
	15	+ System.out.println("Usage: ListAnchors <path to links index> <target article (ns:key)> ");
	16	+ return;
	17	+ }
	18	+
	19	+ String key = args[1];
	20	+ String path = args[0];
	21	+
	22	+ IndexReader reader = IndexReader.open(path);
	23	+
	24	+ System.out.println("Links to article "+key);
	25	+
	26	+ String prefix = key+"\|";
	27	+ TermEnum te = reader.terms(new Term("anchors",prefix));
	28	+ for(;te.term()!=null;te.next()){
	29	+ String t = te.term().text();
	30	+ if(t.startsWith(prefix)){
	31	+ String anchor = t.substring(t.indexOf('\|')+1);
	32	+ TermDocs td = reader.termDocs(new Term("anchors",t));
	33	+ while(td.next()){ // this will skip deleted docs, while docFreq won't
	34	+ Document d = reader.document(td.doc());
	35	+ System.out.println("["+d.get("article_key") + "] with [" + anchor+"]");
	36	+ }
	37	+ } else
	38	+ break;
	39	+ }
	40	+
	41	+ }
	42	+}
	43	+

04:13, 6 June 2009 Tim Starling (talk | contribs) changed the status of r51322 [removed: new added: deferred]