r109152 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r109151‎ | r109152 | r109153 >
Date:15:27, 17 January 2012
Author:oren
Status:deferred
Tags:
Comment:
reverting prior delete to recover file's history
Modified paths:
  • /trunk/lucene-search-2/src/org/wikimedia/lsearch/analyzers/Aggregate.java (deleted) (history)

Diff [purge]

Index: trunk/lucene-search-2/src/org/wikimedia/lsearch/analyzers/Aggregate.java
@@ -1,135 +0,0 @@
2 -package org.wikimedia.lsearch.analyzers;
3 -
4 -import java.io.IOException;
5 -import java.io.StringReader;
6 -import java.util.ArrayList;
7 -import java.util.HashSet;
8 -
9 -import org.apache.lucene.analysis.Analyzer;
10 -import org.apache.lucene.analysis.Token;
11 -import org.apache.lucene.analysis.TokenStream;
12 -import org.wikimedia.lsearch.config.IndexId;
13 -
14 -/**
15 - * Aggregate bean that captures information about one
16 - * item going into the some index aggregate field.
17 - *
18 - * @author rainman
19 - *
20 - */
21 -public class Aggregate {
22 - protected ArrayList<Token> tokens;
23 - protected float boost;
24 - protected int noStopWordsLength;
25 - protected Flags flags;
26 -
27 - public enum Flags { NONE, ALTTITLE, ANCHOR, RELATED, SECTION };
28 -
29 - /** Construct from arbitrary text that will be tokenized
30 - * @throws IOException */
31 - public Aggregate(String text, float boost, IndexId iid, Analyzer analyzer,
32 - String field, HashSet<String> stopWords, Flags flags) throws IOException{
33 - setTokens(toTokenArray(analyzer.tokenStream(field,new StringReader(text))),stopWords);
34 - this.boost = boost;
35 - this.flags = flags;
36 -
37 - }
38 - /** Set new token array, calc length, etc.. */
39 - public void setTokens(ArrayList<Token> tokens, HashSet<String> stopWords){
40 - this.tokens = tokens;
41 - if(stopWords != null){
42 - noStopWordsLength = 0;
43 - for(Token t : tokens){
44 - if(!stopWords.contains(t.termText()) && t.getPositionIncrement()!=0)
45 - noStopWordsLength++;
46 - }
47 - } else{
48 - noStopWordsLength = noAliasLength();
49 - }
50 - }
51 - /** Number of tokens without aliases */
52 - public int noAliasLength(){
53 - int len = 0;
54 - for(Token t : tokens){
55 - if(t.getPositionIncrement() != 0)
56 - len++;
57 - }
58 - return len;
59 - }
60 -
61 - /** Construct with specific analyzer
62 - * @throws IOException */
63 - public Aggregate(String text, float boost, IndexId iid, Analyzer analyzer,
64 - String field, Flags flags) throws IOException{
65 - this.tokens = toTokenArray(analyzer.tokenStream(field,new StringReader(text)));
66 - this.boost = boost;
67 - this.noStopWordsLength = noAliasLength();
68 - this.flags = flags;
69 - }
70 -
71 - private ArrayList<Token> toTokenArray(TokenStream stream) throws IOException {
72 - ArrayList<Token> tt = new ArrayList<Token>();
73 - Token t = null;
74 - while( (t = stream.next()) != null && tt.size() < 0xff-1){
75 - tt.add(t);
76 - }
77 - return tt;
78 - }
79 -
80 - /** Number of tokens */
81 - public int length(){
82 - if(tokens != null)
83 - return tokens.size();
84 - else
85 - return 0;
86 - }
87 -
88 - /** Number of tokens when stop words are excluded */
89 - public int getNoStopWordsLength(){
90 - return noStopWordsLength;
91 - }
92 -
93 - /** boost factor */
94 - public float boost(){
95 - return boost;
96 - }
97 -
98 - public Token getToken(int index){
99 - return tokens.get(index);
100 - }
101 -
102 - public ArrayList<Token> getTokens() {
103 - return tokens;
104 - }
105 -
106 - public Flags getFlags() {
107 - return flags;
108 - }
109 - /**
110 - * Generate the meta field stored contents
111 - * format: [length] [length without stop words] [boost] [complete length] [flags] (1+1+4+1+1 bytes)
112 - */
113 - public static byte[] serializeAggregate(ArrayList<Aggregate> items){
114 - byte[] buf = new byte[items.size() * 8];
115 -
116 - for(int i=0;i<items.size();i++){
117 - Aggregate ag = items.get(i);
118 - assert ag.length() < 0xff;
119 - assert ag.noAliasLength() < 0xff;
120 - assert ag.getNoStopWordsLength() < 0xff;
121 - buf[i*8] = (byte)(ag.noAliasLength() & 0xff);
122 - buf[i*8+1] = (byte)(ag.getNoStopWordsLength() & 0xff);
123 - int boost = Float.floatToIntBits(ag.boost());
124 - buf[i*8+2] = (byte)((boost >>> 24) & 0xff);
125 - buf[i*8+3] = (byte)((boost >>> 16) & 0xff);
126 - buf[i*8+4] = (byte)((boost >>> 8) & 0xff);
127 - buf[i*8+5] = (byte)((boost >>> 0) & 0xff);
128 - buf[i*8+6] = (byte)(ag.length() & 0xff);
129 - buf[i*8+7] = (byte)(ag.getFlags().ordinal() & 0xff);
130 - }
131 -
132 - return buf;
133 - }
134 -
135 -
136 -}

Status & tagging log