r109150 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r109149‎ \| r109150 \| r109151 >
Date:	14:56, 17 January 2012
Author:	oren
Status:	reverted (Comments)
Tags:
Comment:	this lucene api has switched to streams
Modified paths:	/trunk/lucene-search-2/src/org/wikimedia/lsearch/analyzers/Aggregate.java (deleted) (history)

Diff [purge]

Index: trunk/lucene-search-2/src/org/wikimedia/lsearch/analyzers/Aggregate.java
—	—	@@ -1,134 +0,0 @@
2		~~-package org.wikimedia.lsearch.analyzers;~~
3		-
4		~~-import java.io.IOException;~~
5		~~-import java.util.ArrayList;~~
6		~~-import java.util.HashSet;~~
7		-
8		~~-import org.apache.lucene.analysis.Analyzer;~~
9		~~-import org.apache.lucene.analysis.Token;~~
10		~~-import org.apache.lucene.analysis.TokenStream;~~
11		~~-import org.wikimedia.lsearch.config.IndexId;~~
12		-
13		-/**
14		~~- * Aggregate bean that captures information about one~~
15		~~- * item going into the some index aggregate field.~~
16		- *
17		~~- * @author rainman~~
18		- *
19		~~- */~~
20		~~-public class Aggregate {~~
21		~~- protected ArrayList<Token> tokens;~~
22		~~- protected float boost;~~
23		~~- protected int noStopWordsLength;~~
24		~~- protected Flags flags;~~
25		-
26		~~- public enum Flags { NONE, ALTTITLE, ANCHOR, RELATED, SECTION };~~
27		-
28		~~- /** Construct from arbitrary text that will be tokenized~~
29		~~- * @throws IOException */~~
30		~~- public Aggregate(String text, float boost, IndexId iid, Analyzer analyzer,~~
31		~~- String field, HashSet<String> stopWords, Flags flags) throws IOException{~~
32		~~- setTokens(toTokenArray(analyzer.tokenStream(field,text)),stopWords);~~
33		~~- this.boost = boost;~~
34		~~- this.flags = flags;~~
35		-
36		~~- }~~
37		~~- /** Set new token array, calc length, etc.. */~~
38		~~- public void setTokens(ArrayList<Token> tokens, HashSet<String> stopWords){~~
39		~~- this.tokens = tokens;~~
40		~~- if(stopWords != null){~~
41		~~- noStopWordsLength = 0;~~
42		~~- for(Token t : tokens){~~
43		~~- if(!stopWords.contains(t.termText()) && t.getPositionIncrement()!=0)~~
44		~~- noStopWordsLength++;~~
45		~~- }~~
46		~~- } else{~~
47		~~- noStopWordsLength = noAliasLength();~~
48		~~- }~~
49		~~- }~~
50		~~- /** Number of tokens without aliases */~~
51		~~- public int noAliasLength(){~~
52		~~- int len = 0;~~
53		~~- for(Token t : tokens){~~
54		~~- if(t.getPositionIncrement() != 0)~~
55		~~- len++;~~
56		~~- }~~
57		~~- return len;~~
58		~~- }~~
59		-
60		~~- /** Construct with specific analyzer~~
61		~~- * @throws IOException */~~
62		~~- public Aggregate(String text, float boost, IndexId iid, Analyzer analyzer,~~
63		~~- String field, Flags flags) throws IOException{~~
64		~~- this.tokens = toTokenArray(analyzer.tokenStream(field,text));~~
65		~~- this.boost = boost;~~
66		~~- this.noStopWordsLength = noAliasLength();~~
67		~~- this.flags = flags;~~
68		~~- }~~
69		-
70		~~- private ArrayList<Token> toTokenArray(TokenStream stream) throws IOException {~~
71		~~- ArrayList<Token> tt = new ArrayList<Token>();~~
72		~~- Token t = null;~~
73		~~- while( (t = stream.next()) != null && tt.size() < 0xff-1){~~
74		~~- tt.add(t);~~
75		~~- }~~
76		~~- return tt;~~
77		~~- }~~
78		-
79		~~- /** Number of tokens */~~
80		~~- public int length(){~~
81		~~- if(tokens != null)~~
82		~~- return tokens.size();~~
83		~~- else~~
84		~~- return 0;~~
85		~~- }~~
86		-
87		~~- /** Number of tokens when stop words are excluded */~~
88		~~- public int getNoStopWordsLength(){~~
89		~~- return noStopWordsLength;~~
90		~~- }~~
91		-
92		~~- /** boost factor */~~
93		~~- public float boost(){~~
94		~~- return boost;~~
95		~~- }~~
96		-
97		~~- public Token getToken(int index){~~
98		~~- return tokens.get(index);~~
99		~~- }~~
100		-
101		~~- public ArrayList<Token> getTokens() {~~
102		~~- return tokens;~~
103		~~- }~~
104		-
105		~~- public Flags getFlags() {~~
106		~~- return flags;~~
107		~~- }~~
108		- /**
109		~~- * Generate the meta field stored contents~~
110		~~- * format: [length] [length without stop words] [boost] [complete length] [flags] (1+1+4+1+1 bytes)~~
111		~~- */~~
112		~~- public static byte[] serializeAggregate(ArrayList<Aggregate> items){~~
113		~~- byte[] buf = new byte[items.size() * 8];~~
114		-
115		~~- for(int i=0;i<items.size();i++){~~
116		~~- Aggregate ag = items.get(i);~~
117		~~- assert ag.length() < 0xff;~~
118		~~- assert ag.noAliasLength() < 0xff;~~
119		~~- assert ag.getNoStopWordsLength() < 0xff;~~
120		~~- buf[i*8] = (byte)(ag.noAliasLength() & 0xff);~~
121		~~- buf[i*8+1] = (byte)(ag.getNoStopWordsLength() & 0xff);~~
122		~~- int boost = Float.floatToIntBits(ag.boost());~~
123		~~- buf[i*8+2] = (byte)((boost >>> 24) & 0xff);~~
124		~~- buf[i*8+3] = (byte)((boost >>> 16) & 0xff);~~
125		~~- buf[i*8+4] = (byte)((boost >>> 8) & 0xff);~~
126		~~- buf[i*8+5] = (byte)((boost >>> 0) & 0xff);~~
127		~~- buf[i*8+6] = (byte)(ag.length() & 0xff);~~
128		~~- buf[i*8+7] = (byte)(ag.getFlags().ordinal() & 0xff);~~
129		~~- }~~
130		-
131		~~- return buf;~~
132		~~- }~~
133		-
134		-
135		-}

Comments

#Comment by OrenBochman (talk | contribs) 12:13, 19 January 2012

restored it from history

Status & tagging log

12:13, 19 January 2012 OrenBochman (talk | contribs) changed the status of r109150 [removed: fixme added: reverted]
15:09, 17 January 2012 Nikerabbit (talk | contribs) changed the status of r109150 [removed: ok added: fixme]
15:08, 17 January 2012 Nikerabbit (talk | contribs) changed the status of r109150 [removed: new added: ok]