r109150 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r109149‎ | r109150 | r109151 >
Date:14:56, 17 January 2012
Author:oren
Status:reverted (Comments)
Tags:
Comment:
this lucene api has switched to streams
Modified paths:
  • /trunk/lucene-search-2/src/org/wikimedia/lsearch/analyzers/Aggregate.java (deleted) (history)

Diff [purge]

Index: trunk/lucene-search-2/src/org/wikimedia/lsearch/analyzers/Aggregate.java
@@ -1,134 +0,0 @@
2 -package org.wikimedia.lsearch.analyzers;
3 -
4 -import java.io.IOException;
5 -import java.util.ArrayList;
6 -import java.util.HashSet;
7 -
8 -import org.apache.lucene.analysis.Analyzer;
9 -import org.apache.lucene.analysis.Token;
10 -import org.apache.lucene.analysis.TokenStream;
11 -import org.wikimedia.lsearch.config.IndexId;
12 -
13 -/**
14 - * Aggregate bean that captures information about one
15 - * item going into the some index aggregate field.
16 - *
17 - * @author rainman
18 - *
19 - */
20 -public class Aggregate {
21 - protected ArrayList<Token> tokens;
22 - protected float boost;
23 - protected int noStopWordsLength;
24 - protected Flags flags;
25 -
26 - public enum Flags { NONE, ALTTITLE, ANCHOR, RELATED, SECTION };
27 -
28 - /** Construct from arbitrary text that will be tokenized
29 - * @throws IOException */
30 - public Aggregate(String text, float boost, IndexId iid, Analyzer analyzer,
31 - String field, HashSet<String> stopWords, Flags flags) throws IOException{
32 - setTokens(toTokenArray(analyzer.tokenStream(field,text)),stopWords);
33 - this.boost = boost;
34 - this.flags = flags;
35 -
36 - }
37 - /** Set new token array, calc length, etc.. */
38 - public void setTokens(ArrayList<Token> tokens, HashSet<String> stopWords){
39 - this.tokens = tokens;
40 - if(stopWords != null){
41 - noStopWordsLength = 0;
42 - for(Token t : tokens){
43 - if(!stopWords.contains(t.termText()) && t.getPositionIncrement()!=0)
44 - noStopWordsLength++;
45 - }
46 - } else{
47 - noStopWordsLength = noAliasLength();
48 - }
49 - }
50 - /** Number of tokens without aliases */
51 - public int noAliasLength(){
52 - int len = 0;
53 - for(Token t : tokens){
54 - if(t.getPositionIncrement() != 0)
55 - len++;
56 - }
57 - return len;
58 - }
59 -
60 - /** Construct with specific analyzer
61 - * @throws IOException */
62 - public Aggregate(String text, float boost, IndexId iid, Analyzer analyzer,
63 - String field, Flags flags) throws IOException{
64 - this.tokens = toTokenArray(analyzer.tokenStream(field,text));
65 - this.boost = boost;
66 - this.noStopWordsLength = noAliasLength();
67 - this.flags = flags;
68 - }
69 -
70 - private ArrayList<Token> toTokenArray(TokenStream stream) throws IOException {
71 - ArrayList<Token> tt = new ArrayList<Token>();
72 - Token t = null;
73 - while( (t = stream.next()) != null && tt.size() < 0xff-1){
74 - tt.add(t);
75 - }
76 - return tt;
77 - }
78 -
79 - /** Number of tokens */
80 - public int length(){
81 - if(tokens != null)
82 - return tokens.size();
83 - else
84 - return 0;
85 - }
86 -
87 - /** Number of tokens when stop words are excluded */
88 - public int getNoStopWordsLength(){
89 - return noStopWordsLength;
90 - }
91 -
92 - /** boost factor */
93 - public float boost(){
94 - return boost;
95 - }
96 -
97 - public Token getToken(int index){
98 - return tokens.get(index);
99 - }
100 -
101 - public ArrayList<Token> getTokens() {
102 - return tokens;
103 - }
104 -
105 - public Flags getFlags() {
106 - return flags;
107 - }
108 - /**
109 - * Generate the meta field stored contents
110 - * format: [length] [length without stop words] [boost] [complete length] [flags] (1+1+4+1+1 bytes)
111 - */
112 - public static byte[] serializeAggregate(ArrayList<Aggregate> items){
113 - byte[] buf = new byte[items.size() * 8];
114 -
115 - for(int i=0;i<items.size();i++){
116 - Aggregate ag = items.get(i);
117 - assert ag.length() < 0xff;
118 - assert ag.noAliasLength() < 0xff;
119 - assert ag.getNoStopWordsLength() < 0xff;
120 - buf[i*8] = (byte)(ag.noAliasLength() & 0xff);
121 - buf[i*8+1] = (byte)(ag.getNoStopWordsLength() & 0xff);
122 - int boost = Float.floatToIntBits(ag.boost());
123 - buf[i*8+2] = (byte)((boost >>> 24) & 0xff);
124 - buf[i*8+3] = (byte)((boost >>> 16) & 0xff);
125 - buf[i*8+4] = (byte)((boost >>> 8) & 0xff);
126 - buf[i*8+5] = (byte)((boost >>> 0) & 0xff);
127 - buf[i*8+6] = (byte)(ag.length() & 0xff);
128 - buf[i*8+7] = (byte)(ag.getFlags().ordinal() & 0xff);
129 - }
130 -
131 - return buf;
132 - }
133 -
134 -
135 -}

Comments

#Comment by OrenBochman (talk | contribs)   12:13, 19 January 2012

restored it from history

Status & tagging log