r109151 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r109150‎ | r109151 | r109152 >
Date:15:00, 17 January 2012
Author:oren
Status:reverted (Comments)
Tags:
Comment:
this lucene api has switched to streams
Modified paths:
  • /trunk/lucene-search-2/src/org/wikimedia/lsearch/analyzers/Aggregate.java (added) (history)

Diff [purge]

Index: trunk/lucene-search-2/src/org/wikimedia/lsearch/analyzers/Aggregate.java
@@ -0,0 +1,135 @@
 2+package org.wikimedia.lsearch.analyzers;
 3+
 4+import java.io.IOException;
 5+import java.io.StringReader;
 6+import java.util.ArrayList;
 7+import java.util.HashSet;
 8+
 9+import org.apache.lucene.analysis.Analyzer;
 10+import org.apache.lucene.analysis.Token;
 11+import org.apache.lucene.analysis.TokenStream;
 12+import org.wikimedia.lsearch.config.IndexId;
 13+
 14+/**
 15+ * Aggregate bean that captures information about one
 16+ * item going into the some index aggregate field.
 17+ *
 18+ * @author rainman
 19+ *
 20+ */
 21+public class Aggregate {
 22+ protected ArrayList<Token> tokens;
 23+ protected float boost;
 24+ protected int noStopWordsLength;
 25+ protected Flags flags;
 26+
 27+ public enum Flags { NONE, ALTTITLE, ANCHOR, RELATED, SECTION };
 28+
 29+ /** Construct from arbitrary text that will be tokenized
 30+ * @throws IOException */
 31+ public Aggregate(String text, float boost, IndexId iid, Analyzer analyzer,
 32+ String field, HashSet<String> stopWords, Flags flags) throws IOException{
 33+ setTokens(toTokenArray(analyzer.tokenStream(field,new StringReader(text))),stopWords);
 34+ this.boost = boost;
 35+ this.flags = flags;
 36+
 37+ }
 38+ /** Set new token array, calc length, etc.. */
 39+ public void setTokens(ArrayList<Token> tokens, HashSet<String> stopWords){
 40+ this.tokens = tokens;
 41+ if(stopWords != null){
 42+ noStopWordsLength = 0;
 43+ for(Token t : tokens){
 44+ if(!stopWords.contains(t.termText()) && t.getPositionIncrement()!=0)
 45+ noStopWordsLength++;
 46+ }
 47+ } else{
 48+ noStopWordsLength = noAliasLength();
 49+ }
 50+ }
 51+ /** Number of tokens without aliases */
 52+ public int noAliasLength(){
 53+ int len = 0;
 54+ for(Token t : tokens){
 55+ if(t.getPositionIncrement() != 0)
 56+ len++;
 57+ }
 58+ return len;
 59+ }
 60+
 61+ /** Construct with specific analyzer
 62+ * @throws IOException */
 63+ public Aggregate(String text, float boost, IndexId iid, Analyzer analyzer,
 64+ String field, Flags flags) throws IOException{
 65+ this.tokens = toTokenArray(analyzer.tokenStream(field,new StringReader(text)));
 66+ this.boost = boost;
 67+ this.noStopWordsLength = noAliasLength();
 68+ this.flags = flags;
 69+ }
 70+
 71+ private ArrayList<Token> toTokenArray(TokenStream stream) throws IOException {
 72+ ArrayList<Token> tt = new ArrayList<Token>();
 73+ Token t = null;
 74+ while( (t = stream.next()) != null && tt.size() < 0xff-1){
 75+ tt.add(t);
 76+ }
 77+ return tt;
 78+ }
 79+
 80+ /** Number of tokens */
 81+ public int length(){
 82+ if(tokens != null)
 83+ return tokens.size();
 84+ else
 85+ return 0;
 86+ }
 87+
 88+ /** Number of tokens when stop words are excluded */
 89+ public int getNoStopWordsLength(){
 90+ return noStopWordsLength;
 91+ }
 92+
 93+ /** boost factor */
 94+ public float boost(){
 95+ return boost;
 96+ }
 97+
 98+ public Token getToken(int index){
 99+ return tokens.get(index);
 100+ }
 101+
 102+ public ArrayList<Token> getTokens() {
 103+ return tokens;
 104+ }
 105+
 106+ public Flags getFlags() {
 107+ return flags;
 108+ }
 109+ /**
 110+ * Generate the meta field stored contents
 111+ * format: [length] [length without stop words] [boost] [complete length] [flags] (1+1+4+1+1 bytes)
 112+ */
 113+ public static byte[] serializeAggregate(ArrayList<Aggregate> items){
 114+ byte[] buf = new byte[items.size() * 8];
 115+
 116+ for(int i=0;i<items.size();i++){
 117+ Aggregate ag = items.get(i);
 118+ assert ag.length() < 0xff;
 119+ assert ag.noAliasLength() < 0xff;
 120+ assert ag.getNoStopWordsLength() < 0xff;
 121+ buf[i*8] = (byte)(ag.noAliasLength() & 0xff);
 122+ buf[i*8+1] = (byte)(ag.getNoStopWordsLength() & 0xff);
 123+ int boost = Float.floatToIntBits(ag.boost());
 124+ buf[i*8+2] = (byte)((boost >>> 24) & 0xff);
 125+ buf[i*8+3] = (byte)((boost >>> 16) & 0xff);
 126+ buf[i*8+4] = (byte)((boost >>> 8) & 0xff);
 127+ buf[i*8+5] = (byte)((boost >>> 0) & 0xff);
 128+ buf[i*8+6] = (byte)(ag.length() & 0xff);
 129+ buf[i*8+7] = (byte)(ag.getFlags().ordinal() & 0xff);
 130+ }
 131+
 132+ return buf;
 133+ }
 134+
 135+
 136+}
Property changes on: trunk/lucene-search-2/src/org/wikimedia/lsearch/analyzers/Aggregate.java
___________________________________________________________________
Added: svn:eol-style
1137 + native

Comments

#Comment by Nikerabbit (talk | contribs)   15:09, 17 January 2012

Why did you delete and re-add the file? Now all the history is gone.

#Comment by OrenBochman (talk | contribs)   12:11, 19 January 2012

I restored it from history

Status & tagging log