r109152 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r109151‎ \| r109152 \| r109153 >
Date:	15:27, 17 January 2012
Author:	oren
Status:	deferred
Tags:
Comment:	reverting prior delete to recover file's history
Modified paths:	/trunk/lucene-search-2/src/org/wikimedia/lsearch/analyzers/Aggregate.java (deleted) (history)

Diff [purge]

Index: trunk/lucene-search-2/src/org/wikimedia/lsearch/analyzers/Aggregate.java
—	—	@@ -1,135 +0,0 @@
2		~~-package org.wikimedia.lsearch.analyzers;~~
3		-
4		~~-import java.io.IOException;~~
5		~~-import java.io.StringReader;~~
6		~~-import java.util.ArrayList;~~
7		~~-import java.util.HashSet;~~
8		-
9		~~-import org.apache.lucene.analysis.Analyzer;~~
10		~~-import org.apache.lucene.analysis.Token;~~
11		~~-import org.apache.lucene.analysis.TokenStream;~~
12		~~-import org.wikimedia.lsearch.config.IndexId;~~
13		-
14		-/**
15		~~- * Aggregate bean that captures information about one~~
16		~~- * item going into the some index aggregate field.~~
17		- *
18		~~- * @author rainman~~
19		- *
20		~~- */~~
21		~~-public class Aggregate {~~
22		~~- protected ArrayList<Token> tokens;~~
23		~~- protected float boost;~~
24		~~- protected int noStopWordsLength;~~
25		~~- protected Flags flags;~~
26		-
27		~~- public enum Flags { NONE, ALTTITLE, ANCHOR, RELATED, SECTION };~~
28		-
29		~~- /** Construct from arbitrary text that will be tokenized~~
30		~~- * @throws IOException */~~
31		~~- public Aggregate(String text, float boost, IndexId iid, Analyzer analyzer,~~
32		~~- String field, HashSet<String> stopWords, Flags flags) throws IOException{~~
33		~~- setTokens(toTokenArray(analyzer.tokenStream(field,new StringReader(text))),stopWords);~~
34		~~- this.boost = boost;~~
35		~~- this.flags = flags;~~
36		-
37		~~- }~~
38		~~- /** Set new token array, calc length, etc.. */~~
39		~~- public void setTokens(ArrayList<Token> tokens, HashSet<String> stopWords){~~
40		~~- this.tokens = tokens;~~
41		~~- if(stopWords != null){~~
42		~~- noStopWordsLength = 0;~~
43		~~- for(Token t : tokens){~~
44		~~- if(!stopWords.contains(t.termText()) && t.getPositionIncrement()!=0)~~
45		~~- noStopWordsLength++;~~
46		~~- }~~
47		~~- } else{~~
48		~~- noStopWordsLength = noAliasLength();~~
49		~~- }~~
50		~~- }~~
51		~~- /** Number of tokens without aliases */~~
52		~~- public int noAliasLength(){~~
53		~~- int len = 0;~~
54		~~- for(Token t : tokens){~~
55		~~- if(t.getPositionIncrement() != 0)~~
56		~~- len++;~~
57		~~- }~~
58		~~- return len;~~
59		~~- }~~
60		-
61		~~- /** Construct with specific analyzer~~
62		~~- * @throws IOException */~~
63		~~- public Aggregate(String text, float boost, IndexId iid, Analyzer analyzer,~~
64		~~- String field, Flags flags) throws IOException{~~
65		~~- this.tokens = toTokenArray(analyzer.tokenStream(field,new StringReader(text)));~~
66		~~- this.boost = boost;~~
67		~~- this.noStopWordsLength = noAliasLength();~~
68		~~- this.flags = flags;~~
69		~~- }~~
70		-
71		~~- private ArrayList<Token> toTokenArray(TokenStream stream) throws IOException {~~
72		~~- ArrayList<Token> tt = new ArrayList<Token>();~~
73		~~- Token t = null;~~
74		~~- while( (t = stream.next()) != null && tt.size() < 0xff-1){~~
75		~~- tt.add(t);~~
76		~~- }~~
77		~~- return tt;~~
78		~~- }~~
79		-
80		~~- /** Number of tokens */~~
81		~~- public int length(){~~
82		~~- if(tokens != null)~~
83		~~- return tokens.size();~~
84		~~- else~~
85		~~- return 0;~~
86		~~- }~~
87		-
88		~~- /** Number of tokens when stop words are excluded */~~
89		~~- public int getNoStopWordsLength(){~~
90		~~- return noStopWordsLength;~~
91		~~- }~~
92		-
93		~~- /** boost factor */~~
94		~~- public float boost(){~~
95		~~- return boost;~~
96		~~- }~~
97		-
98		~~- public Token getToken(int index){~~
99		~~- return tokens.get(index);~~
100		~~- }~~
101		-
102		~~- public ArrayList<Token> getTokens() {~~
103		~~- return tokens;~~
104		~~- }~~
105		-
106		~~- public Flags getFlags() {~~
107		~~- return flags;~~
108		~~- }~~
109		- /**
110		~~- * Generate the meta field stored contents~~
111		~~- * format: [length] [length without stop words] [boost] [complete length] [flags] (1+1+4+1+1 bytes)~~
112		~~- */~~
113		~~- public static byte[] serializeAggregate(ArrayList<Aggregate> items){~~
114		~~- byte[] buf = new byte[items.size() * 8];~~
115		-
116		~~- for(int i=0;i<items.size();i++){~~
117		~~- Aggregate ag = items.get(i);~~
118		~~- assert ag.length() < 0xff;~~
119		~~- assert ag.noAliasLength() < 0xff;~~
120		~~- assert ag.getNoStopWordsLength() < 0xff;~~
121		~~- buf[i*8] = (byte)(ag.noAliasLength() & 0xff);~~
122		~~- buf[i*8+1] = (byte)(ag.getNoStopWordsLength() & 0xff);~~
123		~~- int boost = Float.floatToIntBits(ag.boost());~~
124		~~- buf[i*8+2] = (byte)((boost >>> 24) & 0xff);~~
125		~~- buf[i*8+3] = (byte)((boost >>> 16) & 0xff);~~
126		~~- buf[i*8+4] = (byte)((boost >>> 8) & 0xff);~~
127		~~- buf[i*8+5] = (byte)((boost >>> 0) & 0xff);~~
128		~~- buf[i*8+6] = (byte)(ag.length() & 0xff);~~
129		~~- buf[i*8+7] = (byte)(ag.getFlags().ordinal() & 0xff);~~
130		~~- }~~
131		-
132		~~- return buf;~~
133		~~- }~~
134		-
135		-
136		-}

Status & tagging log

01:16, 18 January 2012 Siebrand (talk | contribs) changed the status of r109152 [removed: new added: deferred]